infoboxer 0.3.3 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +32 -0
- data/CHANGELOG.md +13 -0
- data/Gemfile.lock +97 -75
- data/README.md +1 -1
- data/lib/infoboxer.rb +7 -5
- data/lib/infoboxer/core_ext.rb +2 -0
- data/lib/infoboxer/definitions/en.wikipedia.org.rb +2 -0
- data/lib/infoboxer/media_wiki.rb +3 -1
- data/lib/infoboxer/media_wiki/page.rb +2 -0
- data/lib/infoboxer/media_wiki/traits.rb +4 -1
- data/lib/infoboxer/navigation.rb +2 -0
- data/lib/infoboxer/navigation/lookup.rb +5 -5
- data/lib/infoboxer/navigation/sections.rb +5 -1
- data/lib/infoboxer/navigation/selector.rb +3 -1
- data/lib/infoboxer/navigation/shortcuts.rb +2 -0
- data/lib/infoboxer/navigation/wikipath.rb +2 -0
- data/lib/infoboxer/parser.rb +3 -1
- data/lib/infoboxer/parser/context.rb +10 -6
- data/lib/infoboxer/parser/html.rb +2 -0
- data/lib/infoboxer/parser/image.rb +3 -1
- data/lib/infoboxer/parser/inline.rb +8 -4
- data/lib/infoboxer/parser/paragraphs.rb +3 -1
- data/lib/infoboxer/parser/table.rb +23 -15
- data/lib/infoboxer/parser/template.rb +3 -0
- data/lib/infoboxer/parser/util.rb +2 -0
- data/lib/infoboxer/templates.rb +2 -0
- data/lib/infoboxer/templates/base.rb +2 -0
- data/lib/infoboxer/templates/set.rb +2 -0
- data/lib/infoboxer/tree.rb +2 -0
- data/lib/infoboxer/tree/compound.rb +3 -1
- data/lib/infoboxer/tree/document.rb +2 -0
- data/lib/infoboxer/tree/gallery.rb +2 -0
- data/lib/infoboxer/tree/html.rb +4 -2
- data/lib/infoboxer/tree/image.rb +3 -1
- data/lib/infoboxer/tree/inline.rb +2 -0
- data/lib/infoboxer/tree/linkable.rb +2 -0
- data/lib/infoboxer/tree/list.rb +4 -2
- data/lib/infoboxer/tree/math.rb +2 -0
- data/lib/infoboxer/tree/node.rb +3 -1
- data/lib/infoboxer/tree/nodes.rb +16 -4
- data/lib/infoboxer/tree/paragraphs.rb +2 -0
- data/lib/infoboxer/tree/ref.rb +2 -0
- data/lib/infoboxer/tree/table.rb +5 -3
- data/lib/infoboxer/tree/template.rb +3 -1
- data/lib/infoboxer/tree/text.rb +11 -9
- data/lib/infoboxer/tree/wikilink.rb +3 -0
- data/lib/infoboxer/version.rb +4 -2
- data/lib/infoboxer/wiki_path.rb +2 -0
- data/regression/pages/2012_bdo_world_darts_championship.wiki +941 -0
- data/regression/pages/progress_wrestling.wiki +1308 -0
- metadata +6 -3
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Infoboxer
|
2
4
|
module Navigation
|
3
5
|
# `Sections` module provides logical view on document strcture.
|
@@ -75,7 +77,7 @@ module Infoboxer
|
|
75
77
|
when 1
|
76
78
|
@sections.select { |s| names.first === s.heading.text_ }
|
77
79
|
else
|
78
|
-
@sections.select { |s| names.first === s.heading.text_ }.sections(*names[1
|
80
|
+
@sections.select { |s| names.first === s.heading.text_ }.sections(*names[1..])
|
79
81
|
end
|
80
82
|
end
|
81
83
|
|
@@ -83,6 +85,7 @@ module Infoboxer
|
|
83
85
|
sections = names.map { |name|
|
84
86
|
heading = lookup_children(:Heading, text_: name).first
|
85
87
|
next unless heading
|
88
|
+
|
86
89
|
body = heading.next_siblings
|
87
90
|
.take_while { |n| !n.is_a?(Tree::Heading) || n.level > heading.level }
|
88
91
|
|
@@ -104,6 +107,7 @@ module Infoboxer
|
|
104
107
|
def make_sections
|
105
108
|
res = Tree::Nodes[]
|
106
109
|
return res if headings.empty?
|
110
|
+
|
107
111
|
level = headings.first.level
|
108
112
|
|
109
113
|
children
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Infoboxer
|
2
4
|
module Navigation
|
3
5
|
module Lookup
|
@@ -8,7 +10,7 @@ module Infoboxer
|
|
8
10
|
def initialize(*arg, &block)
|
9
11
|
@arg = [arg, block].flatten.compact.map(&method(:sym_to_class))
|
10
12
|
@arg.each do |a|
|
11
|
-
a.
|
13
|
+
a.compact! if a.is_a?(Hash)
|
12
14
|
end
|
13
15
|
end
|
14
16
|
|
data/lib/infoboxer/parser.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'ostruct'
|
2
4
|
require 'logger'
|
3
5
|
|
@@ -52,7 +54,7 @@ module Infoboxer
|
|
52
54
|
def initialize(context)
|
53
55
|
@context = context
|
54
56
|
@re = OpenStruct.new(make_regexps)
|
55
|
-
@logger = Logger.new(
|
57
|
+
@logger = Logger.new($stdout).tap { |l| l.level = Logger::FATAL }
|
56
58
|
end
|
57
59
|
|
58
60
|
require_relative 'parser/inline'
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'strscan'
|
2
4
|
|
3
5
|
module Infoboxer
|
@@ -8,7 +10,7 @@ module Infoboxer
|
|
8
10
|
|
9
11
|
def initialize(text, traits = nil)
|
10
12
|
@lines = text
|
11
|
-
.gsub(
|
13
|
+
.gsub(/<!--.*?-->/m, '') # FIXME: will also kill comments inside <nowiki> tag
|
12
14
|
.split(/[\r\n]/)
|
13
15
|
@lineno = -1
|
14
16
|
@traits = traits || MediaWiki::Traits.default
|
@@ -19,22 +21,23 @@ module Infoboxer
|
|
19
21
|
attr_reader :next_lines
|
20
22
|
|
21
23
|
def colno
|
22
|
-
@scanner
|
24
|
+
@scanner&.pos || 0
|
23
25
|
end
|
24
26
|
|
25
27
|
def matched
|
26
|
-
@matched ||= @scanner
|
28
|
+
@matched ||= @scanner&.matched
|
27
29
|
end
|
28
30
|
|
29
31
|
# check which works only once
|
30
32
|
def eat_matched?(str)
|
31
33
|
return false unless matched == str
|
34
|
+
|
32
35
|
@matched = 'DUMMY'
|
33
36
|
true
|
34
37
|
end
|
35
38
|
|
36
39
|
def rest
|
37
|
-
@rest ||= @scanner
|
40
|
+
@rest ||= @scanner&.rest
|
38
41
|
end
|
39
42
|
|
40
43
|
alias_method :current, :rest
|
@@ -107,7 +110,7 @@ module Infoboxer
|
|
107
110
|
end
|
108
111
|
|
109
112
|
def scan_continued_until(re, leave_pattern = false)
|
110
|
-
res = ''
|
113
|
+
res = +''
|
111
114
|
|
112
115
|
loop do
|
113
116
|
chunk = _scan_until(re)
|
@@ -152,6 +155,7 @@ module Infoboxer
|
|
152
155
|
|
153
156
|
def unscan_matched!
|
154
157
|
return unless @matched
|
158
|
+
|
155
159
|
@scanner.pos -= @matched.size
|
156
160
|
@rest = nil
|
157
161
|
end
|
@@ -173,7 +177,7 @@ module Infoboxer
|
|
173
177
|
def shift(amount)
|
174
178
|
@lineno += amount
|
175
179
|
current = @lines[lineno]
|
176
|
-
@next_lines = @lines[(lineno + 1)
|
180
|
+
@next_lines = @lines[(lineno + 1)..]
|
177
181
|
if current
|
178
182
|
@scanner.string = current
|
179
183
|
@rest = current
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Infoboxer
|
2
4
|
class Parser
|
3
5
|
module Image
|
@@ -9,7 +11,7 @@ module Infoboxer
|
|
9
11
|
|
10
12
|
path = @context.scan_until(/\||\]\]/)
|
11
13
|
attrs = @context.matched == '|' ? image_attrs : {}
|
12
|
-
Tree::Image.new(path, attrs)
|
14
|
+
Tree::Image.new(path, **attrs)
|
13
15
|
end
|
14
16
|
|
15
17
|
def image_attrs
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Infoboxer
|
2
4
|
class Parser
|
3
5
|
module Inline
|
@@ -16,6 +18,7 @@ module Infoboxer
|
|
16
18
|
|
17
19
|
if @context.eof?
|
18
20
|
break unless until_pattern
|
21
|
+
|
19
22
|
@context.fail!("#{until_pattern.source} not found, starting from #{start}")
|
20
23
|
end
|
21
24
|
|
@@ -33,7 +36,7 @@ module Infoboxer
|
|
33
36
|
guarded_loop do
|
34
37
|
# FIXME: quick and UGLY IS HELL JUST TRYING TO MAKE THE SHIT WORK
|
35
38
|
chunk =
|
36
|
-
if @context.inline_eol_sign == /^\]/
|
39
|
+
if @context.inline_eol_sign == /^\]/ # rubocop:disable Style/CaseLikeIf
|
37
40
|
@context.scan_until(re.short_inline_until_cache_brackets[until_pattern])
|
38
41
|
elsif @context.inline_eol_sign == /^\]\]/
|
39
42
|
@context.scan_until(re.short_inline_until_cache_brackets2[until_pattern])
|
@@ -64,6 +67,7 @@ module Infoboxer
|
|
64
67
|
|
65
68
|
if @context.eof?
|
66
69
|
break unless until_pattern
|
70
|
+
|
67
71
|
@context.fail!("#{until_pattern.source} not found")
|
68
72
|
end
|
69
73
|
|
@@ -155,7 +159,7 @@ module Infoboxer
|
|
155
159
|
|
156
160
|
def reference(param_str, closed = false)
|
157
161
|
children = closed ? Nodes[] : long_inline(%r{</ref>})
|
158
|
-
Ref.new(children, parse_params(param_str))
|
162
|
+
Ref.new(children, **parse_params(param_str))
|
159
163
|
end
|
160
164
|
|
161
165
|
def math
|
@@ -179,11 +183,11 @@ module Infoboxer
|
|
179
183
|
attrs = @context.matched == '|' ? gallery_image_attrs : {}
|
180
184
|
unless path.empty?
|
181
185
|
# FIXME: what if path NOT matches the namespace?
|
182
|
-
images << Tree::Image.new(path.sub(/^#{re.file_namespace.source}/i, ''), attrs)
|
186
|
+
images << Tree::Image.new(path.sub(/^#{re.file_namespace.source}/i, ''), **attrs)
|
183
187
|
end
|
184
188
|
break if @context.matched == '</gallery>'
|
185
189
|
end
|
186
|
-
Gallery.new(images, params)
|
190
|
+
Gallery.new(images, **params)
|
187
191
|
end
|
188
192
|
|
189
193
|
def gallery_image_attrs
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Infoboxer
|
2
4
|
class Parser
|
3
5
|
module Paragraphs
|
@@ -23,7 +25,7 @@ module Infoboxer
|
|
23
25
|
heading(Regexp.last_match[:text], Regexp.last_match[:level])
|
24
26
|
when /^\s*{\|/
|
25
27
|
table
|
26
|
-
when /^[
|
28
|
+
when /^[*\#:;]./
|
27
29
|
list(until_pattern)
|
28
30
|
when /^-{4,}/
|
29
31
|
HR.new
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Infoboxer
|
2
4
|
class Parser
|
3
5
|
# http://en.wikipedia.org/wiki/Help:Table
|
@@ -12,7 +14,7 @@ module Infoboxer
|
|
12
14
|
|
13
15
|
prms = table_params
|
14
16
|
log "Table params found #{prms}"
|
15
|
-
table = Tree::Table.new(Nodes[], prms)
|
17
|
+
table = Tree::Table.new(Nodes[], **prms)
|
16
18
|
|
17
19
|
@context.next!
|
18
20
|
|
@@ -51,6 +53,9 @@ module Infoboxer
|
|
51
53
|
table_template(table)
|
52
54
|
when nil
|
53
55
|
return false
|
56
|
+
when /^(?<level>={2,})\s*(?<text>.+?)\s*\k<level>$/ # heading implicitly closes the table
|
57
|
+
@context.prev!
|
58
|
+
return false
|
54
59
|
else
|
55
60
|
return table_cell_cont(table)
|
56
61
|
end
|
@@ -60,14 +65,14 @@ module Infoboxer
|
|
60
65
|
|
61
66
|
def table_row(table, param_str)
|
62
67
|
log 'Table row found'
|
63
|
-
table.push_children(TableRow.new(Nodes[], parse_params(param_str)))
|
68
|
+
table.push_children(TableRow.new(Nodes[], **parse_params(param_str)))
|
64
69
|
end
|
65
70
|
|
66
71
|
def table_caption(table)
|
67
72
|
log 'Table caption found'
|
68
73
|
@context.skip(/^\s*\|\+\s*/)
|
69
74
|
|
70
|
-
params = if @context.check(/[^|{
|
75
|
+
params = if @context.check(/[^|{\[]+\|([^|]|$)/)
|
71
76
|
parse_params(@context.scan_until(/\|/))
|
72
77
|
else
|
73
78
|
{}
|
@@ -78,7 +83,7 @@ module Infoboxer
|
|
78
83
|
@context.unscan_matched!
|
79
84
|
@context.prev! # compensate next! which will be done in table()
|
80
85
|
end
|
81
|
-
table.push_children(TableCaption.new(children.strip, params))
|
86
|
+
table.push_children(TableCaption.new(children.strip, **params))
|
82
87
|
end
|
83
88
|
|
84
89
|
def table_cells(table, cell_class = TableCell)
|
@@ -88,13 +93,13 @@ module Infoboxer
|
|
88
93
|
|
89
94
|
@context.skip(/\s*[!|]\s*/)
|
90
95
|
guarded_loop do
|
91
|
-
params = if @context.check(/[^|{
|
96
|
+
params = if @context.check(/[^|{\[]+\|([^|]|$)/)
|
92
97
|
parse_params(@context.scan_until(/\|/))
|
93
98
|
else
|
94
99
|
{}
|
95
100
|
end
|
96
101
|
content = short_inline(/(\|\||!!)/)
|
97
|
-
row.push_children(cell_class.new(content, params))
|
102
|
+
row.push_children(cell_class.new(content, **params))
|
98
103
|
break if @context.eol?
|
99
104
|
end
|
100
105
|
end
|
@@ -102,15 +107,17 @@ module Infoboxer
|
|
102
107
|
def table_template(table)
|
103
108
|
contents = paragraph(/^\s*([|!]|{\|)/).to_templates?
|
104
109
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
110
|
+
# Note: in fact, without full template parsing, we CAN'T know what level to insert it:
|
111
|
+
# Template can be something like <tr><td>Foo</td></tr>
|
112
|
+
# But for consistency, we insert all templates inside the <td>, forcing this <td>
|
113
|
+
# to exist.
|
114
|
+
|
115
|
+
table.push_children(TableRow.new) unless table.children.last.is_a?(TableRow)
|
116
|
+
row = table.children.last
|
117
|
+
row.push_children(TableCell.new) unless row.children.last.is_a?(BaseCell)
|
118
|
+
cell = row.children.last
|
119
|
+
|
120
|
+
cell.push_children(*contents)
|
114
121
|
end
|
115
122
|
|
116
123
|
# Good news, everyone! Table can be IMPLICITLY closed when it's
|
@@ -130,6 +137,7 @@ module Infoboxer
|
|
130
137
|
unless container
|
131
138
|
# return "table not continued" unless row is empty
|
132
139
|
return true if @context.current.empty?
|
140
|
+
|
133
141
|
@context.prev!
|
134
142
|
return false
|
135
143
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Infoboxer
|
2
4
|
class Parser
|
3
5
|
module Template
|
@@ -44,6 +46,7 @@ module Infoboxer
|
|
44
46
|
log 'Variable value found'
|
45
47
|
|
46
48
|
break if @context.eat_matched?('}}')
|
49
|
+
|
47
50
|
@context.eof? and @context.fail!("Unexpected break of template variables: #{res}")
|
48
51
|
end
|
49
52
|
res
|
data/lib/infoboxer/templates.rb
CHANGED
data/lib/infoboxer/tree.rb
CHANGED
@@ -1,9 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Infoboxer
|
2
4
|
module Tree
|
3
5
|
# Base class for all nodes with children.
|
4
6
|
class Compound < Node
|
5
7
|
def initialize(children = Nodes.new, **params)
|
6
|
-
super(params)
|
8
|
+
super(**params)
|
7
9
|
@children = Nodes[*children]
|
8
10
|
@children.each { |c| c.parent = self }
|
9
11
|
end
|
data/lib/infoboxer/tree/html.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Infoboxer
|
2
4
|
module Tree
|
3
5
|
module HTMLTagCommons
|
@@ -11,7 +13,7 @@ module Infoboxer
|
|
11
13
|
# Represents HTML tag, surrounding some contents.
|
12
14
|
class HTMLTag < Compound
|
13
15
|
def initialize(tag, attrs, children = Nodes.new)
|
14
|
-
super(children, attrs)
|
16
|
+
super(children, **attrs)
|
15
17
|
@tag = tag
|
16
18
|
end
|
17
19
|
|
@@ -43,7 +45,7 @@ module Infoboxer
|
|
43
45
|
#
|
44
46
|
class HTMLOpeningTag < Node
|
45
47
|
def initialize(tag, attrs)
|
46
|
-
super(attrs)
|
48
|
+
super(**attrs)
|
47
49
|
@tag = tag
|
48
50
|
end
|
49
51
|
|
data/lib/infoboxer/tree/image.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Infoboxer
|
2
4
|
module Tree
|
3
5
|
# Represents image (or other media file).
|
@@ -7,7 +9,7 @@ module Infoboxer
|
|
7
9
|
class Image < Node
|
8
10
|
def initialize(path, caption: nil, **params)
|
9
11
|
@caption = caption
|
10
|
-
super(
|
12
|
+
super(path: path, **params)
|
11
13
|
end
|
12
14
|
|
13
15
|
# Image caption. Can have (sometimes many) other nodes inside.
|