infoboxer 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/ci.yml +32 -0
- data/.rubocop_todo.yml +0 -15
- data/CHANGELOG.md +43 -0
- data/Gemfile.lock +172 -0
- data/README.md +1 -1
- data/infoboxer.gemspec +1 -1
- data/lib/infoboxer.rb +23 -11
- data/lib/infoboxer/core_ext.rb +1 -1
- data/lib/infoboxer/definitions/en.wikipedia.org.rb +3 -1
- data/lib/infoboxer/media_wiki.rb +83 -65
- data/lib/infoboxer/media_wiki/page.rb +10 -1
- data/lib/infoboxer/media_wiki/traits.rb +69 -22
- data/lib/infoboxer/navigation.rb +7 -1
- data/lib/infoboxer/navigation/lookup.rb +15 -7
- data/lib/infoboxer/navigation/sections.rb +27 -9
- data/lib/infoboxer/navigation/selector.rb +14 -6
- data/lib/infoboxer/navigation/shortcuts.rb +1 -1
- data/lib/infoboxer/navigation/wikipath.rb +1 -1
- data/lib/infoboxer/parser.rb +2 -2
- data/lib/infoboxer/parser/context.rb +23 -9
- data/lib/infoboxer/parser/html.rb +1 -1
- data/lib/infoboxer/parser/image.rb +2 -2
- data/lib/infoboxer/parser/inline.rb +50 -7
- data/lib/infoboxer/parser/paragraphs.rb +3 -3
- data/lib/infoboxer/parser/table.rb +33 -17
- data/lib/infoboxer/parser/template.rb +5 -4
- data/lib/infoboxer/parser/util.rb +2 -1
- data/lib/infoboxer/templates.rb +2 -0
- data/lib/infoboxer/templates/base.rb +2 -0
- data/lib/infoboxer/templates/set.rb +1 -1
- data/lib/infoboxer/tree.rb +2 -2
- data/lib/infoboxer/tree/compound.rb +3 -3
- data/lib/infoboxer/tree/document.rb +1 -1
- data/lib/infoboxer/tree/gallery.rb +12 -0
- data/lib/infoboxer/tree/html.rb +3 -3
- data/lib/infoboxer/tree/image.rb +4 -4
- data/lib/infoboxer/tree/inline.rb +3 -3
- data/lib/infoboxer/tree/linkable.rb +6 -1
- data/lib/infoboxer/tree/list.rb +4 -5
- data/lib/infoboxer/tree/math.rb +2 -3
- data/lib/infoboxer/tree/node.rb +4 -4
- data/lib/infoboxer/tree/nodes.rb +51 -7
- data/lib/infoboxer/tree/paragraphs.rb +1 -1
- data/lib/infoboxer/tree/ref.rb +1 -1
- data/lib/infoboxer/tree/table.rb +4 -4
- data/lib/infoboxer/tree/template.rb +18 -5
- data/lib/infoboxer/tree/text.rb +11 -11
- data/lib/infoboxer/tree/wikilink.rb +16 -8
- data/lib/infoboxer/version.rb +4 -3
- data/lib/infoboxer/wiki_path.rb +12 -1
- data/regression/pages/2012_bdo_world_darts_championship.wiki +941 -0
- data/regression/pages/progress_wrestling.wiki +1308 -0
- metadata +12 -8
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Infoboxer
|
4
4
|
class Parser
|
@@ -11,7 +11,7 @@ module Infoboxer
|
|
11
11
|
|
12
12
|
path = @context.scan_until(/\||\]\]/)
|
13
13
|
attrs = @context.matched == '|' ? image_attrs : {}
|
14
|
-
Tree::Image.new(path, attrs)
|
14
|
+
Tree::Image.new(path, **attrs)
|
15
15
|
end
|
16
16
|
|
17
17
|
def image_attrs
|
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Infoboxer
|
4
4
|
class Parser
|
@@ -18,7 +18,8 @@ module Infoboxer
|
|
18
18
|
|
19
19
|
if @context.eof?
|
20
20
|
break unless until_pattern
|
21
|
-
|
21
|
+
|
22
|
+
@context.fail!("#{until_pattern.source} not found, starting from #{start}")
|
22
23
|
end
|
23
24
|
|
24
25
|
if @context.eol?
|
@@ -35,7 +36,7 @@ module Infoboxer
|
|
35
36
|
guarded_loop do
|
36
37
|
# FIXME: quick and UGLY IS HELL JUST TRYING TO MAKE THE SHIT WORK
|
37
38
|
chunk =
|
38
|
-
if @context.inline_eol_sign == /^\]/
|
39
|
+
if @context.inline_eol_sign == /^\]/ # rubocop:disable Style/CaseLikeIf
|
39
40
|
@context.scan_until(re.short_inline_until_cache_brackets[until_pattern])
|
40
41
|
elsif @context.inline_eol_sign == /^\]\]/
|
41
42
|
@context.scan_until(re.short_inline_until_cache_brackets2[until_pattern])
|
@@ -66,7 +67,8 @@ module Infoboxer
|
|
66
67
|
|
67
68
|
if @context.eof?
|
68
69
|
break unless until_pattern
|
69
|
-
|
70
|
+
|
71
|
+
@context.fail!("#{until_pattern.source} not found")
|
70
72
|
end
|
71
73
|
|
72
74
|
if @context.eol?
|
@@ -83,7 +85,7 @@ module Infoboxer
|
|
83
85
|
|
84
86
|
private
|
85
87
|
|
86
|
-
def inline_formatting(match)
|
88
|
+
def inline_formatting(match) # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/AbcSize
|
87
89
|
case match
|
88
90
|
when "'''''"
|
89
91
|
BoldItalic.new(short_inline(/'''''/))
|
@@ -109,6 +111,8 @@ module Infoboxer
|
|
109
111
|
reference(Regexp.last_match(1))
|
110
112
|
when /<math>/
|
111
113
|
math
|
114
|
+
when /<gallery([^>]*)>/
|
115
|
+
gallery(Regexp.last_match(1))
|
112
116
|
when '<'
|
113
117
|
html || Text.new(match) # it was not HTML, just accidental <
|
114
118
|
else
|
@@ -126,8 +130,18 @@ module Infoboxer
|
|
126
130
|
caption = inline(/\]\]/)
|
127
131
|
@context.pop_eol_sign
|
128
132
|
end
|
133
|
+
name, namespace = link.split(':', 2).reverse
|
134
|
+
lnk, params =
|
135
|
+
if @context.traits.namespace?(namespace)
|
136
|
+
[link, {namespace: namespace}]
|
137
|
+
elsif @context.traits.interwiki?(namespace)
|
138
|
+
[name, {interwiki: namespace}]
|
139
|
+
else
|
140
|
+
[link, {}]
|
141
|
+
end
|
129
142
|
|
130
|
-
|
143
|
+
puts @context.rest if lnk.nil?
|
144
|
+
Wikilink.new(lnk, caption, **params)
|
131
145
|
end
|
132
146
|
|
133
147
|
# http://en.wikipedia.org/wiki/Help:Link#External_links
|
@@ -145,7 +159,7 @@ module Infoboxer
|
|
145
159
|
|
146
160
|
def reference(param_str, closed = false)
|
147
161
|
children = closed ? Nodes[] : long_inline(%r{</ref>})
|
148
|
-
Ref.new(children, parse_params(param_str))
|
162
|
+
Ref.new(children, **parse_params(param_str))
|
149
163
|
end
|
150
164
|
|
151
165
|
def math
|
@@ -159,6 +173,35 @@ module Infoboxer
|
|
159
173
|
Text.new(@context.scan_continued_until(%r{</nowiki>}))
|
160
174
|
end
|
161
175
|
end
|
176
|
+
|
177
|
+
def gallery(tag_rest)
|
178
|
+
params = parse_params(tag_rest)
|
179
|
+
images = []
|
180
|
+
guarded_loop do
|
181
|
+
@context.next! if @context.eol?
|
182
|
+
path = @context.scan_until(%r{</gallery>|\||$})
|
183
|
+
attrs = @context.matched == '|' ? gallery_image_attrs : {}
|
184
|
+
unless path.empty?
|
185
|
+
# FIXME: what if path NOT matches the namespace?
|
186
|
+
images << Tree::Image.new(path.sub(/^#{re.file_namespace.source}/i, ''), **attrs)
|
187
|
+
end
|
188
|
+
break if @context.matched == '</gallery>'
|
189
|
+
end
|
190
|
+
Gallery.new(images, **params)
|
191
|
+
end
|
192
|
+
|
193
|
+
def gallery_image_attrs
|
194
|
+
nodes = []
|
195
|
+
|
196
|
+
guarded_loop do
|
197
|
+
nodes << short_inline(%r{\||</gallery>})
|
198
|
+
break if @context.eol? || @context.matched?(%r{</gallery>})
|
199
|
+
end
|
200
|
+
|
201
|
+
nodes.map(&method(:image_attr))
|
202
|
+
.inject(&:merge)
|
203
|
+
.reject { |_k, v| v.nil? || v.empty? }
|
204
|
+
end
|
162
205
|
end
|
163
206
|
|
164
207
|
require_relative 'image'
|
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Infoboxer
|
4
4
|
class Parser
|
@@ -14,7 +14,7 @@ module Infoboxer
|
|
14
14
|
|
15
15
|
@context.next!
|
16
16
|
end
|
17
|
-
nodes
|
17
|
+
nodes
|
18
18
|
end
|
19
19
|
|
20
20
|
private
|
@@ -25,7 +25,7 @@ module Infoboxer
|
|
25
25
|
heading(Regexp.last_match[:text], Regexp.last_match[:level])
|
26
26
|
when /^\s*{\|/
|
27
27
|
table
|
28
|
-
when /^[
|
28
|
+
when /^[*\#:;]./
|
29
29
|
list(until_pattern)
|
30
30
|
when /^-{4,}/
|
31
31
|
HR.new
|
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Infoboxer
|
4
4
|
class Parser
|
@@ -14,12 +14,13 @@ module Infoboxer
|
|
14
14
|
|
15
15
|
prms = table_params
|
16
16
|
log "Table params found #{prms}"
|
17
|
-
table = Tree::Table.new(Nodes[], prms)
|
17
|
+
table = Tree::Table.new(Nodes[], **prms)
|
18
18
|
|
19
19
|
@context.next!
|
20
20
|
|
21
|
-
|
21
|
+
guarded_loop do
|
22
22
|
table_next_line(table) or break
|
23
|
+
log 'Next table row'
|
23
24
|
@context.next!
|
24
25
|
end
|
25
26
|
|
@@ -52,6 +53,9 @@ module Infoboxer
|
|
52
53
|
table_template(table)
|
53
54
|
when nil
|
54
55
|
return false
|
56
|
+
when /^(?<level>={2,})\s*(?<text>.+?)\s*\k<level>$/ # heading implicitly closes the table
|
57
|
+
@context.prev!
|
58
|
+
return false
|
55
59
|
else
|
56
60
|
return table_cell_cont(table)
|
57
61
|
end
|
@@ -61,16 +65,25 @@ module Infoboxer
|
|
61
65
|
|
62
66
|
def table_row(table, param_str)
|
63
67
|
log 'Table row found'
|
64
|
-
table.push_children(TableRow.new(Nodes[], parse_params(param_str)))
|
68
|
+
table.push_children(TableRow.new(Nodes[], **parse_params(param_str)))
|
65
69
|
end
|
66
70
|
|
67
71
|
def table_caption(table)
|
68
72
|
log 'Table caption found'
|
69
73
|
@context.skip(/^\s*\|\+\s*/)
|
70
74
|
|
75
|
+
params = if @context.check(/[^|{\[]+\|([^|]|$)/)
|
76
|
+
parse_params(@context.scan_until(/\|/))
|
77
|
+
else
|
78
|
+
{}
|
79
|
+
end
|
80
|
+
|
71
81
|
children = inline(/^\s*([|!]|{\|)/)
|
72
|
-
|
73
|
-
|
82
|
+
if @context.matched
|
83
|
+
@context.unscan_matched!
|
84
|
+
@context.prev! # compensate next! which will be done in table()
|
85
|
+
end
|
86
|
+
table.push_children(TableCaption.new(children.strip, **params))
|
74
87
|
end
|
75
88
|
|
76
89
|
def table_cells(table, cell_class = TableCell)
|
@@ -80,13 +93,13 @@ module Infoboxer
|
|
80
93
|
|
81
94
|
@context.skip(/\s*[!|]\s*/)
|
82
95
|
guarded_loop do
|
83
|
-
params = if @context.check(/[^|{
|
96
|
+
params = if @context.check(/[^|{\[]+\|([^|]|$)/)
|
84
97
|
parse_params(@context.scan_until(/\|/))
|
85
98
|
else
|
86
99
|
{}
|
87
100
|
end
|
88
101
|
content = short_inline(/(\|\||!!)/)
|
89
|
-
row.push_children(cell_class.new(content, params))
|
102
|
+
row.push_children(cell_class.new(content, **params))
|
90
103
|
break if @context.eol?
|
91
104
|
end
|
92
105
|
end
|
@@ -94,15 +107,17 @@ module Infoboxer
|
|
94
107
|
def table_template(table)
|
95
108
|
contents = paragraph(/^\s*([|!]|{\|)/).to_templates?
|
96
109
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
110
|
+
# Note: in fact, without full template parsing, we CAN'T know what level to insert it:
|
111
|
+
# Template can be something like <tr><td>Foo</td></tr>
|
112
|
+
# But for consistency, we insert all templates inside the <td>, forcing this <td>
|
113
|
+
# to exist.
|
114
|
+
|
115
|
+
table.push_children(TableRow.new) unless table.children.last.is_a?(TableRow)
|
116
|
+
row = table.children.last
|
117
|
+
row.push_children(TableCell.new) unless row.children.last.is_a?(BaseCell)
|
118
|
+
cell = row.children.last
|
119
|
+
|
120
|
+
cell.push_children(*contents)
|
106
121
|
end
|
107
122
|
|
108
123
|
# Good news, everyone! Table can be IMPLICITLY closed when it's
|
@@ -122,6 +137,7 @@ module Infoboxer
|
|
122
137
|
unless container
|
123
138
|
# return "table not continued" unless row is empty
|
124
139
|
return true if @context.current.empty?
|
140
|
+
|
125
141
|
@context.prev!
|
126
142
|
return false
|
127
143
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Infoboxer
|
4
4
|
class Parser
|
@@ -29,8 +29,8 @@ module Infoboxer
|
|
29
29
|
|
30
30
|
guarded_loop do
|
31
31
|
@context.next! while @context.eol?
|
32
|
-
if @context.check(/\s*([
|
33
|
-
name = @context.scan(/\s*([
|
32
|
+
if @context.check(/\s*([^=}|<]+)\s*=\s*/)
|
33
|
+
name = @context.scan(/\s*([^=]+)/).strip
|
34
34
|
@context.skip(/\s*=\s*/)
|
35
35
|
else
|
36
36
|
name = num
|
@@ -46,13 +46,14 @@ module Infoboxer
|
|
46
46
|
log 'Variable value found'
|
47
47
|
|
48
48
|
break if @context.eat_matched?('}}')
|
49
|
+
|
49
50
|
@context.eof? and @context.fail!("Unexpected break of template variables: #{res}")
|
50
51
|
end
|
51
52
|
res
|
52
53
|
end
|
53
54
|
|
54
55
|
def sanitize_value(nodes)
|
55
|
-
nodes.pop if nodes.last.is_a?(Pre) && nodes.last.text =~ /^\s*$/ # FIXME: dirty!
|
56
|
+
nodes.pop if (nodes.last.is_a?(Pre) || nodes.last.is_a?(Text)) && nodes.last.text =~ /^\s*$/ # FIXME: dirty!
|
56
57
|
nodes
|
57
58
|
end
|
58
59
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Infoboxer
|
4
4
|
class Parser
|
@@ -12,6 +12,7 @@ module Infoboxer
|
|
12
12
|
\[[a-z]+:// | # external link
|
13
13
|
<nowiki[^>]*> | # nowiki
|
14
14
|
<ref[^>]*> | # reference
|
15
|
+
<gallery[^>]*>| # gallery
|
15
16
|
<math> | # math
|
16
17
|
< # HTML tag
|
17
18
|
))x
|
data/lib/infoboxer/templates.rb
CHANGED
data/lib/infoboxer/tree.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Infoboxer
|
4
4
|
# Infoboxer provides you with tree structure of the Wikipedia page,
|
@@ -63,7 +63,7 @@ module Infoboxer
|
|
63
63
|
require_relative 'tree/nodes'
|
64
64
|
|
65
65
|
%w[text compound inline
|
66
|
-
image html paragraphs list template table ref math
|
66
|
+
image gallery html paragraphs list template table ref math
|
67
67
|
document].each do |type|
|
68
68
|
require_relative "tree/#{type}"
|
69
69
|
end
|
@@ -1,11 +1,11 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Infoboxer
|
4
4
|
module Tree
|
5
5
|
# Base class for all nodes with children.
|
6
6
|
class Compound < Node
|
7
|
-
def initialize(children = Nodes.new, params
|
8
|
-
super(params)
|
7
|
+
def initialize(children = Nodes.new, **params)
|
8
|
+
super(**params)
|
9
9
|
@children = Nodes[*children]
|
10
10
|
@children.each { |c| c.parent = self }
|
11
11
|
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Infoboxer
|
4
|
+
module Tree
|
5
|
+
# Represents gallery of images (contents of `<gallery>` special tag).
|
6
|
+
#
|
7
|
+
# See [Wikipedia Tutorial](https://en.wikipedia.org/wiki/Help:Gallery_tag)
|
8
|
+
# for explanation of attributes.
|
9
|
+
class Gallery < Compound
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
data/lib/infoboxer/tree/html.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Infoboxer
|
4
4
|
module Tree
|
@@ -13,7 +13,7 @@ module Infoboxer
|
|
13
13
|
# Represents HTML tag, surrounding some contents.
|
14
14
|
class HTMLTag < Compound
|
15
15
|
def initialize(tag, attrs, children = Nodes.new)
|
16
|
-
super(children, attrs)
|
16
|
+
super(children, **attrs)
|
17
17
|
@tag = tag
|
18
18
|
end
|
19
19
|
|
@@ -45,7 +45,7 @@ module Infoboxer
|
|
45
45
|
#
|
46
46
|
class HTMLOpeningTag < Node
|
47
47
|
def initialize(tag, attrs)
|
48
|
-
super(attrs)
|
48
|
+
super(**attrs)
|
49
49
|
@tag = tag
|
50
50
|
end
|
51
51
|
|
data/lib/infoboxer/tree/image.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Infoboxer
|
4
4
|
module Tree
|
@@ -7,9 +7,9 @@ module Infoboxer
|
|
7
7
|
# See [Wikipedia Tutorial](https://en.wikipedia.org/wiki/Wikipedia:Extended_image_syntax)
|
8
8
|
# for explanation of attributes.
|
9
9
|
class Image < Node
|
10
|
-
def initialize(path,
|
11
|
-
@caption =
|
12
|
-
super(
|
10
|
+
def initialize(path, caption: nil, **params)
|
11
|
+
@caption = caption
|
12
|
+
super(path: path, **params)
|
13
13
|
end
|
14
14
|
|
15
15
|
# Image caption. Can have (sometimes many) other nodes inside.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Infoboxer
|
4
4
|
module Tree
|
@@ -17,8 +17,8 @@ module Infoboxer
|
|
17
17
|
|
18
18
|
# Base class for internal/external links,
|
19
19
|
class Link < Compound
|
20
|
-
def initialize(link, label = nil)
|
21
|
-
super(label || Nodes.new([Text.new(link)]), link: link)
|
20
|
+
def initialize(link, label = nil, **attr)
|
21
|
+
super(label || Nodes.new([Text.new(link)]), link: link, **attr)
|
22
22
|
end
|
23
23
|
|
24
24
|
# @!attribute [r] link
|