infoboxer 0.3.2 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.md +6 -0
- data/Gemfile.lock +150 -0
- data/lib/infoboxer/parser/context.rb +14 -2
- data/lib/infoboxer/parser/table.rb +13 -3
- data/lib/infoboxer/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 36fa882706af5f4b483c3723a179934eb906842f4de8c5118a9cf5b0dd91fec1
|
4
|
+
data.tar.gz: 0e11e6635fab15e01a2b4cf53eb8fa209938121f582bf1e925cbeeb841dd7478
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 35825da3980f77f9fbaaa91e3cde9d3c2e40c02249577a6a2f337dd5077c247cfbeef97c6e714d1c529cd109f91bf7555e4d8e67f7c4d32a226508d004a2e86b
|
7
|
+
data.tar.gz: df6abe08b07cf597bc0c196900ddffbeff023cbdfa8ec5b185c343c6fa4dd2e9cd924c92ed1deb793e96f285b287360a6c27c32489d3a970e046dbedc481529b
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
# Infoboxer's change log
|
2
2
|
|
3
|
+
## 0.3.3 (2020-02-09)
|
4
|
+
|
5
|
+
* Fixed table captions handling (thanks @robfors for reporting)
|
6
|
+
|
7
|
+
PS: Funny that this small bugfix release is exactly two years after the previous one :(
|
8
|
+
|
3
9
|
## 0.3.2 (2018-02-09)
|
4
10
|
|
5
11
|
* Updated MediaWiktory to finally turn on gzip encoding of responses;
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
GIT
|
2
|
+
remote: https://github.com/zverok/dokaz.git
|
3
|
+
revision: a8a6f0bbeab5589326fe2714cf89842b5f32b850
|
4
|
+
specs:
|
5
|
+
dokaz (0.0.4)
|
6
|
+
ansi
|
7
|
+
rouge
|
8
|
+
slop (~> 3)
|
9
|
+
|
10
|
+
PATH
|
11
|
+
remote: .
|
12
|
+
specs:
|
13
|
+
infoboxer (0.3.3)
|
14
|
+
addressable
|
15
|
+
htmlentities
|
16
|
+
mediawiktory (= 0.1.3)
|
17
|
+
terminal-table
|
18
|
+
|
19
|
+
GEM
|
20
|
+
remote: https://rubygems.org/
|
21
|
+
specs:
|
22
|
+
addressable (2.5.1)
|
23
|
+
public_suffix (~> 2.0, >= 2.0.2)
|
24
|
+
ansi (1.5.0)
|
25
|
+
ast (2.4.0)
|
26
|
+
backports (3.10.3)
|
27
|
+
byebug (9.0.6)
|
28
|
+
coveralls (0.8.21)
|
29
|
+
json (>= 1.8, < 3)
|
30
|
+
simplecov (~> 0.14.1)
|
31
|
+
term-ansicolor (~> 1.3)
|
32
|
+
thor (~> 0.19.4)
|
33
|
+
tins (~> 1.6)
|
34
|
+
crack (0.4.3)
|
35
|
+
safe_yaml (~> 1.0.0)
|
36
|
+
diff-lcs (1.3)
|
37
|
+
docile (1.1.5)
|
38
|
+
faraday (0.15.4)
|
39
|
+
multipart-post (>= 1.2, < 3)
|
40
|
+
faraday_middleware (0.13.0)
|
41
|
+
faraday (>= 0.7.4, < 1.0)
|
42
|
+
hashdiff (0.3.4)
|
43
|
+
hashie (3.6.0)
|
44
|
+
htmlentities (4.3.4)
|
45
|
+
json (2.1.0)
|
46
|
+
json (2.1.0-java)
|
47
|
+
mediawiktory (0.1.3)
|
48
|
+
addressable
|
49
|
+
faraday
|
50
|
+
faraday_middleware
|
51
|
+
hashie
|
52
|
+
naught
|
53
|
+
nokogiri
|
54
|
+
mini_portile2 (2.4.0)
|
55
|
+
multipart-post (2.1.1)
|
56
|
+
naught (1.1.0)
|
57
|
+
nokogiri (1.10.4)
|
58
|
+
mini_portile2 (~> 2.4.0)
|
59
|
+
parallel (1.19.1)
|
60
|
+
parser (2.7.0.2)
|
61
|
+
ast (~> 2.4.0)
|
62
|
+
powerpack (0.1.2)
|
63
|
+
public_suffix (2.0.5)
|
64
|
+
rainbow (3.0.0)
|
65
|
+
rake (12.3.0)
|
66
|
+
redcarpet (3.3.4)
|
67
|
+
rouge (2.1.1)
|
68
|
+
rspec (3.6.0)
|
69
|
+
rspec-core (~> 3.6.0)
|
70
|
+
rspec-expectations (~> 3.6.0)
|
71
|
+
rspec-mocks (~> 3.6.0)
|
72
|
+
rspec-core (3.6.0)
|
73
|
+
rspec-support (~> 3.6.0)
|
74
|
+
rspec-expectations (3.6.0)
|
75
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
76
|
+
rspec-support (~> 3.6.0)
|
77
|
+
rspec-its (1.2.0)
|
78
|
+
rspec-core (>= 3.0.0)
|
79
|
+
rspec-expectations (>= 3.0.0)
|
80
|
+
rspec-mocks (3.6.0)
|
81
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
82
|
+
rspec-support (~> 3.6.0)
|
83
|
+
rspec-support (3.6.0)
|
84
|
+
rubocop (0.52.1)
|
85
|
+
parallel (~> 1.10)
|
86
|
+
parser (>= 2.4.0.2, < 3.0)
|
87
|
+
powerpack (~> 0.1)
|
88
|
+
rainbow (>= 2.2.2, < 4.0)
|
89
|
+
ruby-progressbar (~> 1.7)
|
90
|
+
unicode-display_width (~> 1.0, >= 1.0.1)
|
91
|
+
rubocop-rspec (1.20.1)
|
92
|
+
rubocop (>= 0.51.0)
|
93
|
+
ruby-prof (0.16.2)
|
94
|
+
ruby-progressbar (1.10.1)
|
95
|
+
rubygems-tasks (0.2.4)
|
96
|
+
safe_yaml (1.0.4)
|
97
|
+
saharspec (0.0.4)
|
98
|
+
simplecov (0.14.1)
|
99
|
+
docile (~> 1.1.0)
|
100
|
+
json (>= 1.8, < 3)
|
101
|
+
simplecov-html (~> 0.10.0)
|
102
|
+
simplecov-html (0.10.1)
|
103
|
+
slop (3.6.0)
|
104
|
+
term-ansicolor (1.6.0)
|
105
|
+
tins (~> 1.0)
|
106
|
+
terminal-table (1.8.0)
|
107
|
+
unicode-display_width (~> 1.1, >= 1.1.1)
|
108
|
+
thor (0.19.4)
|
109
|
+
timecop (0.9.0)
|
110
|
+
tins (1.14.0)
|
111
|
+
tty-color (0.4.2)
|
112
|
+
unicode-display_width (1.6.1)
|
113
|
+
vcr (3.0.3)
|
114
|
+
webmock (3.0.1)
|
115
|
+
addressable (>= 2.3.6)
|
116
|
+
crack (>= 0.3.2)
|
117
|
+
hashdiff
|
118
|
+
yard (0.9.12)
|
119
|
+
yard-junk (0.0.7)
|
120
|
+
backports
|
121
|
+
rainbow
|
122
|
+
tty-color
|
123
|
+
yard
|
124
|
+
|
125
|
+
PLATFORMS
|
126
|
+
java
|
127
|
+
ruby
|
128
|
+
|
129
|
+
DEPENDENCIES
|
130
|
+
byebug
|
131
|
+
coveralls
|
132
|
+
dokaz!
|
133
|
+
infoboxer!
|
134
|
+
rake
|
135
|
+
redcarpet
|
136
|
+
rspec (~> 3)
|
137
|
+
rspec-its (~> 1)
|
138
|
+
rubocop (~> 0.52.1)
|
139
|
+
rubocop-rspec (~> 1.20)
|
140
|
+
ruby-prof
|
141
|
+
rubygems-tasks
|
142
|
+
saharspec (= 0.0.4)
|
143
|
+
timecop
|
144
|
+
vcr
|
145
|
+
webmock
|
146
|
+
yard (~> 0.9)
|
147
|
+
yard-junk (~> 0.0.7)
|
148
|
+
|
149
|
+
BUNDLED WITH
|
150
|
+
1.17.2
|
@@ -2,7 +2,7 @@ require 'strscan'
|
|
2
2
|
|
3
3
|
module Infoboxer
|
4
4
|
class Parser
|
5
|
-
class Context
|
5
|
+
class Context # rubocop:disable Metrics/ClassLength
|
6
6
|
attr_reader :lineno
|
7
7
|
attr_reader :traits
|
8
8
|
|
@@ -128,7 +128,13 @@ module Infoboxer
|
|
128
128
|
|
129
129
|
# state inspection
|
130
130
|
def matched_inline?(re)
|
131
|
-
re.nil?
|
131
|
+
if re.nil?
|
132
|
+
matched.empty? && eol?
|
133
|
+
elsif re.inspect.start_with?('/^') # was it REALLY at the beginning of the line?..
|
134
|
+
@scanner.pos == matched.length && matched =~ re
|
135
|
+
else
|
136
|
+
matched =~ re
|
137
|
+
end
|
132
138
|
end
|
133
139
|
|
134
140
|
def matched?(re)
|
@@ -144,6 +150,12 @@ module Infoboxer
|
|
144
150
|
fail(ParsingError, "#{text} at line #{@lineno}:\n\t#{current}")
|
145
151
|
end
|
146
152
|
|
153
|
+
def unscan_matched!
|
154
|
+
return unless @matched
|
155
|
+
@scanner.pos -= @matched.size
|
156
|
+
@rest = nil
|
157
|
+
end
|
158
|
+
|
147
159
|
private
|
148
160
|
|
149
161
|
# we do hard use of #matched and #rest, its wiser to memoize them
|
@@ -16,8 +16,9 @@ module Infoboxer
|
|
16
16
|
|
17
17
|
@context.next!
|
18
18
|
|
19
|
-
|
19
|
+
guarded_loop do
|
20
20
|
table_next_line(table) or break
|
21
|
+
log 'Next table row'
|
21
22
|
@context.next!
|
22
23
|
end
|
23
24
|
|
@@ -66,9 +67,18 @@ module Infoboxer
|
|
66
67
|
log 'Table caption found'
|
67
68
|
@context.skip(/^\s*\|\+\s*/)
|
68
69
|
|
70
|
+
params = if @context.check(/[^|{|\[]+\|([^\|]|$)/)
|
71
|
+
parse_params(@context.scan_until(/\|/))
|
72
|
+
else
|
73
|
+
{}
|
74
|
+
end
|
75
|
+
|
69
76
|
children = inline(/^\s*([|!]|{\|)/)
|
70
|
-
|
71
|
-
|
77
|
+
if @context.matched
|
78
|
+
@context.unscan_matched!
|
79
|
+
@context.prev! # compensate next! which will be done in table()
|
80
|
+
end
|
81
|
+
table.push_children(TableCaption.new(children.strip, params))
|
72
82
|
end
|
73
83
|
|
74
84
|
def table_cells(table, cell_class = TableCell)
|
data/lib/infoboxer/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: infoboxer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Shepelev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-02-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -81,6 +81,7 @@ files:
|
|
81
81
|
- ".yardopts"
|
82
82
|
- CHANGELOG.md
|
83
83
|
- CONTRIBUTING.md
|
84
|
+
- Gemfile.lock
|
84
85
|
- LICENSE.txt
|
85
86
|
- Parsing.md
|
86
87
|
- README.md
|
@@ -170,8 +171,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
170
171
|
- !ruby/object:Gem::Version
|
171
172
|
version: '0'
|
172
173
|
requirements: []
|
173
|
-
|
174
|
-
rubygems_version: 2.6.14
|
174
|
+
rubygems_version: 3.0.3
|
175
175
|
signing_key:
|
176
176
|
specification_version: 4
|
177
177
|
summary: MediaWiki client and parser, targeting information extraction.
|