infoboxer 0.3.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +32 -0
- data/CHANGELOG.md +13 -0
- data/Gemfile.lock +97 -75
- data/README.md +1 -1
- data/lib/infoboxer.rb +7 -5
- data/lib/infoboxer/core_ext.rb +2 -0
- data/lib/infoboxer/definitions/en.wikipedia.org.rb +2 -0
- data/lib/infoboxer/media_wiki.rb +3 -1
- data/lib/infoboxer/media_wiki/page.rb +2 -0
- data/lib/infoboxer/media_wiki/traits.rb +4 -1
- data/lib/infoboxer/navigation.rb +2 -0
- data/lib/infoboxer/navigation/lookup.rb +5 -5
- data/lib/infoboxer/navigation/sections.rb +5 -1
- data/lib/infoboxer/navigation/selector.rb +3 -1
- data/lib/infoboxer/navigation/shortcuts.rb +2 -0
- data/lib/infoboxer/navigation/wikipath.rb +2 -0
- data/lib/infoboxer/parser.rb +3 -1
- data/lib/infoboxer/parser/context.rb +10 -6
- data/lib/infoboxer/parser/html.rb +2 -0
- data/lib/infoboxer/parser/image.rb +3 -1
- data/lib/infoboxer/parser/inline.rb +8 -4
- data/lib/infoboxer/parser/paragraphs.rb +3 -1
- data/lib/infoboxer/parser/table.rb +23 -15
- data/lib/infoboxer/parser/template.rb +3 -0
- data/lib/infoboxer/parser/util.rb +2 -0
- data/lib/infoboxer/templates.rb +2 -0
- data/lib/infoboxer/templates/base.rb +2 -0
- data/lib/infoboxer/templates/set.rb +2 -0
- data/lib/infoboxer/tree.rb +2 -0
- data/lib/infoboxer/tree/compound.rb +3 -1
- data/lib/infoboxer/tree/document.rb +2 -0
- data/lib/infoboxer/tree/gallery.rb +2 -0
- data/lib/infoboxer/tree/html.rb +4 -2
- data/lib/infoboxer/tree/image.rb +3 -1
- data/lib/infoboxer/tree/inline.rb +2 -0
- data/lib/infoboxer/tree/linkable.rb +2 -0
- data/lib/infoboxer/tree/list.rb +4 -2
- data/lib/infoboxer/tree/math.rb +2 -0
- data/lib/infoboxer/tree/node.rb +3 -1
- data/lib/infoboxer/tree/nodes.rb +16 -4
- data/lib/infoboxer/tree/paragraphs.rb +2 -0
- data/lib/infoboxer/tree/ref.rb +2 -0
- data/lib/infoboxer/tree/table.rb +5 -3
- data/lib/infoboxer/tree/template.rb +3 -1
- data/lib/infoboxer/tree/text.rb +11 -9
- data/lib/infoboxer/tree/wikilink.rb +3 -0
- data/lib/infoboxer/version.rb +4 -2
- data/lib/infoboxer/wiki_path.rb +2 -0
- data/regression/pages/2012_bdo_world_darts_championship.wiki +941 -0
- data/regression/pages/progress_wrestling.wiki +1308 -0
- metadata +6 -3
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Infoboxer
|
2
4
|
module Navigation
|
3
5
|
# `Sections` module provides logical view on document strcture.
|
@@ -75,7 +77,7 @@ module Infoboxer
|
|
75
77
|
when 1
|
76
78
|
@sections.select { |s| names.first === s.heading.text_ }
|
77
79
|
else
|
78
|
-
@sections.select { |s| names.first === s.heading.text_ }.sections(*names[1
|
80
|
+
@sections.select { |s| names.first === s.heading.text_ }.sections(*names[1..])
|
79
81
|
end
|
80
82
|
end
|
81
83
|
|
@@ -83,6 +85,7 @@ module Infoboxer
|
|
83
85
|
sections = names.map { |name|
|
84
86
|
heading = lookup_children(:Heading, text_: name).first
|
85
87
|
next unless heading
|
88
|
+
|
86
89
|
body = heading.next_siblings
|
87
90
|
.take_while { |n| !n.is_a?(Tree::Heading) || n.level > heading.level }
|
88
91
|
|
@@ -104,6 +107,7 @@ module Infoboxer
|
|
104
107
|
def make_sections
|
105
108
|
res = Tree::Nodes[]
|
106
109
|
return res if headings.empty?
|
110
|
+
|
107
111
|
level = headings.first.level
|
108
112
|
|
109
113
|
children
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Infoboxer
|
2
4
|
module Navigation
|
3
5
|
module Lookup
|
@@ -8,7 +10,7 @@ module Infoboxer
|
|
8
10
|
def initialize(*arg, &block)
|
9
11
|
@arg = [arg, block].flatten.compact.map(&method(:sym_to_class))
|
10
12
|
@arg.each do |a|
|
11
|
-
a.
|
13
|
+
a.compact! if a.is_a?(Hash)
|
12
14
|
end
|
13
15
|
end
|
14
16
|
|
data/lib/infoboxer/parser.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'ostruct'
|
2
4
|
require 'logger'
|
3
5
|
|
@@ -52,7 +54,7 @@ module Infoboxer
|
|
52
54
|
def initialize(context)
|
53
55
|
@context = context
|
54
56
|
@re = OpenStruct.new(make_regexps)
|
55
|
-
@logger = Logger.new(
|
57
|
+
@logger = Logger.new($stdout).tap { |l| l.level = Logger::FATAL }
|
56
58
|
end
|
57
59
|
|
58
60
|
require_relative 'parser/inline'
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'strscan'
|
2
4
|
|
3
5
|
module Infoboxer
|
@@ -8,7 +10,7 @@ module Infoboxer
|
|
8
10
|
|
9
11
|
def initialize(text, traits = nil)
|
10
12
|
@lines = text
|
11
|
-
.gsub(
|
13
|
+
.gsub(/<!--.*?-->/m, '') # FIXME: will also kill comments inside <nowiki> tag
|
12
14
|
.split(/[\r\n]/)
|
13
15
|
@lineno = -1
|
14
16
|
@traits = traits || MediaWiki::Traits.default
|
@@ -19,22 +21,23 @@ module Infoboxer
|
|
19
21
|
attr_reader :next_lines
|
20
22
|
|
21
23
|
def colno
|
22
|
-
@scanner
|
24
|
+
@scanner&.pos || 0
|
23
25
|
end
|
24
26
|
|
25
27
|
def matched
|
26
|
-
@matched ||= @scanner
|
28
|
+
@matched ||= @scanner&.matched
|
27
29
|
end
|
28
30
|
|
29
31
|
# check which works only once
|
30
32
|
def eat_matched?(str)
|
31
33
|
return false unless matched == str
|
34
|
+
|
32
35
|
@matched = 'DUMMY'
|
33
36
|
true
|
34
37
|
end
|
35
38
|
|
36
39
|
def rest
|
37
|
-
@rest ||= @scanner
|
40
|
+
@rest ||= @scanner&.rest
|
38
41
|
end
|
39
42
|
|
40
43
|
alias_method :current, :rest
|
@@ -107,7 +110,7 @@ module Infoboxer
|
|
107
110
|
end
|
108
111
|
|
109
112
|
def scan_continued_until(re, leave_pattern = false)
|
110
|
-
res = ''
|
113
|
+
res = +''
|
111
114
|
|
112
115
|
loop do
|
113
116
|
chunk = _scan_until(re)
|
@@ -152,6 +155,7 @@ module Infoboxer
|
|
152
155
|
|
153
156
|
def unscan_matched!
|
154
157
|
return unless @matched
|
158
|
+
|
155
159
|
@scanner.pos -= @matched.size
|
156
160
|
@rest = nil
|
157
161
|
end
|
@@ -173,7 +177,7 @@ module Infoboxer
|
|
173
177
|
def shift(amount)
|
174
178
|
@lineno += amount
|
175
179
|
current = @lines[lineno]
|
176
|
-
@next_lines = @lines[(lineno + 1)
|
180
|
+
@next_lines = @lines[(lineno + 1)..]
|
177
181
|
if current
|
178
182
|
@scanner.string = current
|
179
183
|
@rest = current
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Infoboxer
|
2
4
|
class Parser
|
3
5
|
module Image
|
@@ -9,7 +11,7 @@ module Infoboxer
|
|
9
11
|
|
10
12
|
path = @context.scan_until(/\||\]\]/)
|
11
13
|
attrs = @context.matched == '|' ? image_attrs : {}
|
12
|
-
Tree::Image.new(path, attrs)
|
14
|
+
Tree::Image.new(path, **attrs)
|
13
15
|
end
|
14
16
|
|
15
17
|
def image_attrs
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Infoboxer
|
2
4
|
class Parser
|
3
5
|
module Inline
|
@@ -16,6 +18,7 @@ module Infoboxer
|
|
16
18
|
|
17
19
|
if @context.eof?
|
18
20
|
break unless until_pattern
|
21
|
+
|
19
22
|
@context.fail!("#{until_pattern.source} not found, starting from #{start}")
|
20
23
|
end
|
21
24
|
|
@@ -33,7 +36,7 @@ module Infoboxer
|
|
33
36
|
guarded_loop do
|
34
37
|
# FIXME: quick and UGLY IS HELL JUST TRYING TO MAKE THE SHIT WORK
|
35
38
|
chunk =
|
36
|
-
if @context.inline_eol_sign == /^\]/
|
39
|
+
if @context.inline_eol_sign == /^\]/ # rubocop:disable Style/CaseLikeIf
|
37
40
|
@context.scan_until(re.short_inline_until_cache_brackets[until_pattern])
|
38
41
|
elsif @context.inline_eol_sign == /^\]\]/
|
39
42
|
@context.scan_until(re.short_inline_until_cache_brackets2[until_pattern])
|
@@ -64,6 +67,7 @@ module Infoboxer
|
|
64
67
|
|
65
68
|
if @context.eof?
|
66
69
|
break unless until_pattern
|
70
|
+
|
67
71
|
@context.fail!("#{until_pattern.source} not found")
|
68
72
|
end
|
69
73
|
|
@@ -155,7 +159,7 @@ module Infoboxer
|
|
155
159
|
|
156
160
|
def reference(param_str, closed = false)
|
157
161
|
children = closed ? Nodes[] : long_inline(%r{</ref>})
|
158
|
-
Ref.new(children, parse_params(param_str))
|
162
|
+
Ref.new(children, **parse_params(param_str))
|
159
163
|
end
|
160
164
|
|
161
165
|
def math
|
@@ -179,11 +183,11 @@ module Infoboxer
|
|
179
183
|
attrs = @context.matched == '|' ? gallery_image_attrs : {}
|
180
184
|
unless path.empty?
|
181
185
|
# FIXME: what if path NOT matches the namespace?
|
182
|
-
images << Tree::Image.new(path.sub(/^#{re.file_namespace.source}/i, ''), attrs)
|
186
|
+
images << Tree::Image.new(path.sub(/^#{re.file_namespace.source}/i, ''), **attrs)
|
183
187
|
end
|
184
188
|
break if @context.matched == '</gallery>'
|
185
189
|
end
|
186
|
-
Gallery.new(images, params)
|
190
|
+
Gallery.new(images, **params)
|
187
191
|
end
|
188
192
|
|
189
193
|
def gallery_image_attrs
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Infoboxer
|
2
4
|
class Parser
|
3
5
|
module Paragraphs
|
@@ -23,7 +25,7 @@ module Infoboxer
|
|
23
25
|
heading(Regexp.last_match[:text], Regexp.last_match[:level])
|
24
26
|
when /^\s*{\|/
|
25
27
|
table
|
26
|
-
when /^[
|
28
|
+
when /^[*\#:;]./
|
27
29
|
list(until_pattern)
|
28
30
|
when /^-{4,}/
|
29
31
|
HR.new
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Infoboxer
|
2
4
|
class Parser
|
3
5
|
# http://en.wikipedia.org/wiki/Help:Table
|
@@ -12,7 +14,7 @@ module Infoboxer
|
|
12
14
|
|
13
15
|
prms = table_params
|
14
16
|
log "Table params found #{prms}"
|
15
|
-
table = Tree::Table.new(Nodes[], prms)
|
17
|
+
table = Tree::Table.new(Nodes[], **prms)
|
16
18
|
|
17
19
|
@context.next!
|
18
20
|
|
@@ -51,6 +53,9 @@ module Infoboxer
|
|
51
53
|
table_template(table)
|
52
54
|
when nil
|
53
55
|
return false
|
56
|
+
when /^(?<level>={2,})\s*(?<text>.+?)\s*\k<level>$/ # heading implicitly closes the table
|
57
|
+
@context.prev!
|
58
|
+
return false
|
54
59
|
else
|
55
60
|
return table_cell_cont(table)
|
56
61
|
end
|
@@ -60,14 +65,14 @@ module Infoboxer
|
|
60
65
|
|
61
66
|
def table_row(table, param_str)
|
62
67
|
log 'Table row found'
|
63
|
-
table.push_children(TableRow.new(Nodes[], parse_params(param_str)))
|
68
|
+
table.push_children(TableRow.new(Nodes[], **parse_params(param_str)))
|
64
69
|
end
|
65
70
|
|
66
71
|
def table_caption(table)
|
67
72
|
log 'Table caption found'
|
68
73
|
@context.skip(/^\s*\|\+\s*/)
|
69
74
|
|
70
|
-
params = if @context.check(/[^|{
|
75
|
+
params = if @context.check(/[^|{\[]+\|([^|]|$)/)
|
71
76
|
parse_params(@context.scan_until(/\|/))
|
72
77
|
else
|
73
78
|
{}
|
@@ -78,7 +83,7 @@ module Infoboxer
|
|
78
83
|
@context.unscan_matched!
|
79
84
|
@context.prev! # compensate next! which will be done in table()
|
80
85
|
end
|
81
|
-
table.push_children(TableCaption.new(children.strip, params))
|
86
|
+
table.push_children(TableCaption.new(children.strip, **params))
|
82
87
|
end
|
83
88
|
|
84
89
|
def table_cells(table, cell_class = TableCell)
|
@@ -88,13 +93,13 @@ module Infoboxer
|
|
88
93
|
|
89
94
|
@context.skip(/\s*[!|]\s*/)
|
90
95
|
guarded_loop do
|
91
|
-
params = if @context.check(/[^|{
|
96
|
+
params = if @context.check(/[^|{\[]+\|([^|]|$)/)
|
92
97
|
parse_params(@context.scan_until(/\|/))
|
93
98
|
else
|
94
99
|
{}
|
95
100
|
end
|
96
101
|
content = short_inline(/(\|\||!!)/)
|
97
|
-
row.push_children(cell_class.new(content, params))
|
102
|
+
row.push_children(cell_class.new(content, **params))
|
98
103
|
break if @context.eol?
|
99
104
|
end
|
100
105
|
end
|
@@ -102,15 +107,17 @@ module Infoboxer
|
|
102
107
|
def table_template(table)
|
103
108
|
contents = paragraph(/^\s*([|!]|{\|)/).to_templates?
|
104
109
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
110
|
+
# Note: in fact, without full template parsing, we CAN'T know what level to insert it:
|
111
|
+
# Template can be something like <tr><td>Foo</td></tr>
|
112
|
+
# But for consistency, we insert all templates inside the <td>, forcing this <td>
|
113
|
+
# to exist.
|
114
|
+
|
115
|
+
table.push_children(TableRow.new) unless table.children.last.is_a?(TableRow)
|
116
|
+
row = table.children.last
|
117
|
+
row.push_children(TableCell.new) unless row.children.last.is_a?(BaseCell)
|
118
|
+
cell = row.children.last
|
119
|
+
|
120
|
+
cell.push_children(*contents)
|
114
121
|
end
|
115
122
|
|
116
123
|
# Good news, everyone! Table can be IMPLICITLY closed when it's
|
@@ -130,6 +137,7 @@ module Infoboxer
|
|
130
137
|
unless container
|
131
138
|
# return "table not continued" unless row is empty
|
132
139
|
return true if @context.current.empty?
|
140
|
+
|
133
141
|
@context.prev!
|
134
142
|
return false
|
135
143
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Infoboxer
|
2
4
|
class Parser
|
3
5
|
module Template
|
@@ -44,6 +46,7 @@ module Infoboxer
|
|
44
46
|
log 'Variable value found'
|
45
47
|
|
46
48
|
break if @context.eat_matched?('}}')
|
49
|
+
|
47
50
|
@context.eof? and @context.fail!("Unexpected break of template variables: #{res}")
|
48
51
|
end
|
49
52
|
res
|
data/lib/infoboxer/templates.rb
CHANGED
data/lib/infoboxer/tree.rb
CHANGED
@@ -1,9 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Infoboxer
|
2
4
|
module Tree
|
3
5
|
# Base class for all nodes with children.
|
4
6
|
class Compound < Node
|
5
7
|
def initialize(children = Nodes.new, **params)
|
6
|
-
super(params)
|
8
|
+
super(**params)
|
7
9
|
@children = Nodes[*children]
|
8
10
|
@children.each { |c| c.parent = self }
|
9
11
|
end
|
data/lib/infoboxer/tree/html.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Infoboxer
|
2
4
|
module Tree
|
3
5
|
module HTMLTagCommons
|
@@ -11,7 +13,7 @@ module Infoboxer
|
|
11
13
|
# Represents HTML tag, surrounding some contents.
|
12
14
|
class HTMLTag < Compound
|
13
15
|
def initialize(tag, attrs, children = Nodes.new)
|
14
|
-
super(children, attrs)
|
16
|
+
super(children, **attrs)
|
15
17
|
@tag = tag
|
16
18
|
end
|
17
19
|
|
@@ -43,7 +45,7 @@ module Infoboxer
|
|
43
45
|
#
|
44
46
|
class HTMLOpeningTag < Node
|
45
47
|
def initialize(tag, attrs)
|
46
|
-
super(attrs)
|
48
|
+
super(**attrs)
|
47
49
|
@tag = tag
|
48
50
|
end
|
49
51
|
|
data/lib/infoboxer/tree/image.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Infoboxer
|
2
4
|
module Tree
|
3
5
|
# Represents image (or other media file).
|
@@ -7,7 +9,7 @@ module Infoboxer
|
|
7
9
|
class Image < Node
|
8
10
|
def initialize(path, caption: nil, **params)
|
9
11
|
@caption = caption
|
10
|
-
super(
|
12
|
+
super(path: path, **params)
|
11
13
|
end
|
12
14
|
|
13
15
|
# Image caption. Can have (sometimes many) other nodes inside.
|