infoboxer 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: d1eca6a2e6e025b77b1eeed915629b24338380a8
4
- data.tar.gz: a852e3d6cefa55b04b2a8ffe530478e8199a884f
2
+ SHA256:
3
+ metadata.gz: 36fa882706af5f4b483c3723a179934eb906842f4de8c5118a9cf5b0dd91fec1
4
+ data.tar.gz: 0e11e6635fab15e01a2b4cf53eb8fa209938121f582bf1e925cbeeb841dd7478
5
5
  SHA512:
6
- metadata.gz: 0cb20f539dcb4fecaf1f3c57a842d407481e37a2763ee35018bd436b058e704382493fd0a0a6a008101a70e9d9c283cc9fa3b7dbd8f48deef894e8d857ea7c42
7
- data.tar.gz: b4f9ab8d39bc3c5fca6f7f247fd8c7f54bf76de8b5ed878a9fb935c6ccf3d99f7ed768c16cd1b1529ba56742a0bb1b6a09dafd0fc2900c12f2c0a39264d938fb
6
+ metadata.gz: 35825da3980f77f9fbaaa91e3cde9d3c2e40c02249577a6a2f337dd5077c247cfbeef97c6e714d1c529cd109f91bf7555e4d8e67f7c4d32a226508d004a2e86b
7
+ data.tar.gz: df6abe08b07cf597bc0c196900ddffbeff023cbdfa8ec5b185c343c6fa4dd2e9cd924c92ed1deb793e96f285b287360a6c27c32489d3a970e046dbedc481529b
@@ -1,5 +1,11 @@
1
1
  # Infoboxer's change log
2
2
 
3
+ ## 0.3.3 (2020-02-09)
4
+
5
+ * Fixed table captions handling (thanks @robfors for reporting)
6
+
7
+ PS: Funny that this small bugfix release is exactly two years after the previous one :(
8
+
3
9
  ## 0.3.2 (2018-02-09)
4
10
 
5
11
  * Updated MediaWiktory to finally turn on gzip encoding of responses;
@@ -0,0 +1,150 @@
1
+ GIT
2
+ remote: https://github.com/zverok/dokaz.git
3
+ revision: a8a6f0bbeab5589326fe2714cf89842b5f32b850
4
+ specs:
5
+ dokaz (0.0.4)
6
+ ansi
7
+ rouge
8
+ slop (~> 3)
9
+
10
+ PATH
11
+ remote: .
12
+ specs:
13
+ infoboxer (0.3.3)
14
+ addressable
15
+ htmlentities
16
+ mediawiktory (= 0.1.3)
17
+ terminal-table
18
+
19
+ GEM
20
+ remote: https://rubygems.org/
21
+ specs:
22
+ addressable (2.5.1)
23
+ public_suffix (~> 2.0, >= 2.0.2)
24
+ ansi (1.5.0)
25
+ ast (2.4.0)
26
+ backports (3.10.3)
27
+ byebug (9.0.6)
28
+ coveralls (0.8.21)
29
+ json (>= 1.8, < 3)
30
+ simplecov (~> 0.14.1)
31
+ term-ansicolor (~> 1.3)
32
+ thor (~> 0.19.4)
33
+ tins (~> 1.6)
34
+ crack (0.4.3)
35
+ safe_yaml (~> 1.0.0)
36
+ diff-lcs (1.3)
37
+ docile (1.1.5)
38
+ faraday (0.15.4)
39
+ multipart-post (>= 1.2, < 3)
40
+ faraday_middleware (0.13.0)
41
+ faraday (>= 0.7.4, < 1.0)
42
+ hashdiff (0.3.4)
43
+ hashie (3.6.0)
44
+ htmlentities (4.3.4)
45
+ json (2.1.0)
46
+ json (2.1.0-java)
47
+ mediawiktory (0.1.3)
48
+ addressable
49
+ faraday
50
+ faraday_middleware
51
+ hashie
52
+ naught
53
+ nokogiri
54
+ mini_portile2 (2.4.0)
55
+ multipart-post (2.1.1)
56
+ naught (1.1.0)
57
+ nokogiri (1.10.4)
58
+ mini_portile2 (~> 2.4.0)
59
+ parallel (1.19.1)
60
+ parser (2.7.0.2)
61
+ ast (~> 2.4.0)
62
+ powerpack (0.1.2)
63
+ public_suffix (2.0.5)
64
+ rainbow (3.0.0)
65
+ rake (12.3.0)
66
+ redcarpet (3.3.4)
67
+ rouge (2.1.1)
68
+ rspec (3.6.0)
69
+ rspec-core (~> 3.6.0)
70
+ rspec-expectations (~> 3.6.0)
71
+ rspec-mocks (~> 3.6.0)
72
+ rspec-core (3.6.0)
73
+ rspec-support (~> 3.6.0)
74
+ rspec-expectations (3.6.0)
75
+ diff-lcs (>= 1.2.0, < 2.0)
76
+ rspec-support (~> 3.6.0)
77
+ rspec-its (1.2.0)
78
+ rspec-core (>= 3.0.0)
79
+ rspec-expectations (>= 3.0.0)
80
+ rspec-mocks (3.6.0)
81
+ diff-lcs (>= 1.2.0, < 2.0)
82
+ rspec-support (~> 3.6.0)
83
+ rspec-support (3.6.0)
84
+ rubocop (0.52.1)
85
+ parallel (~> 1.10)
86
+ parser (>= 2.4.0.2, < 3.0)
87
+ powerpack (~> 0.1)
88
+ rainbow (>= 2.2.2, < 4.0)
89
+ ruby-progressbar (~> 1.7)
90
+ unicode-display_width (~> 1.0, >= 1.0.1)
91
+ rubocop-rspec (1.20.1)
92
+ rubocop (>= 0.51.0)
93
+ ruby-prof (0.16.2)
94
+ ruby-progressbar (1.10.1)
95
+ rubygems-tasks (0.2.4)
96
+ safe_yaml (1.0.4)
97
+ saharspec (0.0.4)
98
+ simplecov (0.14.1)
99
+ docile (~> 1.1.0)
100
+ json (>= 1.8, < 3)
101
+ simplecov-html (~> 0.10.0)
102
+ simplecov-html (0.10.1)
103
+ slop (3.6.0)
104
+ term-ansicolor (1.6.0)
105
+ tins (~> 1.0)
106
+ terminal-table (1.8.0)
107
+ unicode-display_width (~> 1.1, >= 1.1.1)
108
+ thor (0.19.4)
109
+ timecop (0.9.0)
110
+ tins (1.14.0)
111
+ tty-color (0.4.2)
112
+ unicode-display_width (1.6.1)
113
+ vcr (3.0.3)
114
+ webmock (3.0.1)
115
+ addressable (>= 2.3.6)
116
+ crack (>= 0.3.2)
117
+ hashdiff
118
+ yard (0.9.12)
119
+ yard-junk (0.0.7)
120
+ backports
121
+ rainbow
122
+ tty-color
123
+ yard
124
+
125
+ PLATFORMS
126
+ java
127
+ ruby
128
+
129
+ DEPENDENCIES
130
+ byebug
131
+ coveralls
132
+ dokaz!
133
+ infoboxer!
134
+ rake
135
+ redcarpet
136
+ rspec (~> 3)
137
+ rspec-its (~> 1)
138
+ rubocop (~> 0.52.1)
139
+ rubocop-rspec (~> 1.20)
140
+ ruby-prof
141
+ rubygems-tasks
142
+ saharspec (= 0.0.4)
143
+ timecop
144
+ vcr
145
+ webmock
146
+ yard (~> 0.9)
147
+ yard-junk (~> 0.0.7)
148
+
149
+ BUNDLED WITH
150
+ 1.17.2
@@ -2,7 +2,7 @@ require 'strscan'
2
2
 
3
3
  module Infoboxer
4
4
  class Parser
5
- class Context
5
+ class Context # rubocop:disable Metrics/ClassLength
6
6
  attr_reader :lineno
7
7
  attr_reader :traits
8
8
 
@@ -128,7 +128,13 @@ module Infoboxer
128
128
 
129
129
  # state inspection
130
130
  def matched_inline?(re)
131
- re.nil? ? (matched.empty? && eol?) : matched =~ re
131
+ if re.nil?
132
+ matched.empty? && eol?
133
+ elsif re.inspect.start_with?('/^') # was it REALLY at the beginning of the line?..
134
+ @scanner.pos == matched.length && matched =~ re
135
+ else
136
+ matched =~ re
137
+ end
132
138
  end
133
139
 
134
140
  def matched?(re)
@@ -144,6 +150,12 @@ module Infoboxer
144
150
  fail(ParsingError, "#{text} at line #{@lineno}:\n\t#{current}")
145
151
  end
146
152
 
153
+ def unscan_matched!
154
+ return unless @matched
155
+ @scanner.pos -= @matched.size
156
+ @rest = nil
157
+ end
158
+
147
159
  private
148
160
 
149
161
  # we do hard use of #matched and #rest, its wiser to memoize them
@@ -16,8 +16,9 @@ module Infoboxer
16
16
 
17
17
  @context.next!
18
18
 
19
- loop do
19
+ guarded_loop do
20
20
  table_next_line(table) or break
21
+ log 'Next table row'
21
22
  @context.next!
22
23
  end
23
24
 
@@ -66,9 +67,18 @@ module Infoboxer
66
67
  log 'Table caption found'
67
68
  @context.skip(/^\s*\|\+\s*/)
68
69
 
70
+ params = if @context.check(/[^|{|\[]+\|([^\|]|$)/)
71
+ parse_params(@context.scan_until(/\|/))
72
+ else
73
+ {}
74
+ end
75
+
69
76
  children = inline(/^\s*([|!]|{\|)/)
70
- @context.prev! if @context.eol? # compensate next! which will be done in table()
71
- table.push_children(TableCaption.new(children.strip))
77
+ if @context.matched
78
+ @context.unscan_matched!
79
+ @context.prev! # compensate next! which will be done in table()
80
+ end
81
+ table.push_children(TableCaption.new(children.strip, params))
72
82
  end
73
83
 
74
84
  def table_cells(table, cell_class = TableCell)
@@ -1,7 +1,7 @@
1
1
  module Infoboxer
2
2
  MAJOR = 0
3
3
  MINOR = 3
4
- PATCH = 2
4
+ PATCH = 3
5
5
  PRE = nil
6
6
  VERSION = [MAJOR, MINOR, PATCH, PRE].compact.join('.')
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: infoboxer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Shepelev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-02-09 00:00:00.000000000 Z
11
+ date: 2020-02-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities
@@ -81,6 +81,7 @@ files:
81
81
  - ".yardopts"
82
82
  - CHANGELOG.md
83
83
  - CONTRIBUTING.md
84
+ - Gemfile.lock
84
85
  - LICENSE.txt
85
86
  - Parsing.md
86
87
  - README.md
@@ -170,8 +171,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
170
171
  - !ruby/object:Gem::Version
171
172
  version: '0'
172
173
  requirements: []
173
- rubyforge_project:
174
- rubygems_version: 2.6.14
174
+ rubygems_version: 3.0.3
175
175
  signing_key:
176
176
  specification_version: 4
177
177
  summary: MediaWiki client and parser, targeting information extraction.