infoboxer 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/ci.yml +32 -0
  3. data/.rubocop_todo.yml +0 -15
  4. data/CHANGELOG.md +43 -0
  5. data/Gemfile.lock +172 -0
  6. data/README.md +1 -1
  7. data/infoboxer.gemspec +1 -1
  8. data/lib/infoboxer.rb +23 -11
  9. data/lib/infoboxer/core_ext.rb +1 -1
  10. data/lib/infoboxer/definitions/en.wikipedia.org.rb +3 -1
  11. data/lib/infoboxer/media_wiki.rb +83 -65
  12. data/lib/infoboxer/media_wiki/page.rb +10 -1
  13. data/lib/infoboxer/media_wiki/traits.rb +69 -22
  14. data/lib/infoboxer/navigation.rb +7 -1
  15. data/lib/infoboxer/navigation/lookup.rb +15 -7
  16. data/lib/infoboxer/navigation/sections.rb +27 -9
  17. data/lib/infoboxer/navigation/selector.rb +14 -6
  18. data/lib/infoboxer/navigation/shortcuts.rb +1 -1
  19. data/lib/infoboxer/navigation/wikipath.rb +1 -1
  20. data/lib/infoboxer/parser.rb +2 -2
  21. data/lib/infoboxer/parser/context.rb +23 -9
  22. data/lib/infoboxer/parser/html.rb +1 -1
  23. data/lib/infoboxer/parser/image.rb +2 -2
  24. data/lib/infoboxer/parser/inline.rb +50 -7
  25. data/lib/infoboxer/parser/paragraphs.rb +3 -3
  26. data/lib/infoboxer/parser/table.rb +33 -17
  27. data/lib/infoboxer/parser/template.rb +5 -4
  28. data/lib/infoboxer/parser/util.rb +2 -1
  29. data/lib/infoboxer/templates.rb +2 -0
  30. data/lib/infoboxer/templates/base.rb +2 -0
  31. data/lib/infoboxer/templates/set.rb +1 -1
  32. data/lib/infoboxer/tree.rb +2 -2
  33. data/lib/infoboxer/tree/compound.rb +3 -3
  34. data/lib/infoboxer/tree/document.rb +1 -1
  35. data/lib/infoboxer/tree/gallery.rb +12 -0
  36. data/lib/infoboxer/tree/html.rb +3 -3
  37. data/lib/infoboxer/tree/image.rb +4 -4
  38. data/lib/infoboxer/tree/inline.rb +3 -3
  39. data/lib/infoboxer/tree/linkable.rb +6 -1
  40. data/lib/infoboxer/tree/list.rb +4 -5
  41. data/lib/infoboxer/tree/math.rb +2 -3
  42. data/lib/infoboxer/tree/node.rb +4 -4
  43. data/lib/infoboxer/tree/nodes.rb +51 -7
  44. data/lib/infoboxer/tree/paragraphs.rb +1 -1
  45. data/lib/infoboxer/tree/ref.rb +1 -1
  46. data/lib/infoboxer/tree/table.rb +4 -4
  47. data/lib/infoboxer/tree/template.rb +18 -5
  48. data/lib/infoboxer/tree/text.rb +11 -11
  49. data/lib/infoboxer/tree/wikilink.rb +16 -8
  50. data/lib/infoboxer/version.rb +4 -3
  51. data/lib/infoboxer/wiki_path.rb +12 -1
  52. data/regression/pages/2012_bdo_world_darts_championship.wiki +941 -0
  53. data/regression/pages/progress_wrestling.wiki +1308 -0
  54. metadata +12 -8
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  class Parser
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  class Parser
@@ -11,7 +11,7 @@ module Infoboxer
11
11
 
12
12
  path = @context.scan_until(/\||\]\]/)
13
13
  attrs = @context.matched == '|' ? image_attrs : {}
14
- Tree::Image.new(path, attrs)
14
+ Tree::Image.new(path, **attrs)
15
15
  end
16
16
 
17
17
  def image_attrs
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  class Parser
@@ -18,7 +18,8 @@ module Infoboxer
18
18
 
19
19
  if @context.eof?
20
20
  break unless until_pattern
21
- @context.fail!("#{until_pattern} not found, starting from #{start}")
21
+
22
+ @context.fail!("#{until_pattern.source} not found, starting from #{start}")
22
23
  end
23
24
 
24
25
  if @context.eol?
@@ -35,7 +36,7 @@ module Infoboxer
35
36
  guarded_loop do
36
37
  # FIXME: quick and UGLY IS HELL JUST TRYING TO MAKE THE SHIT WORK
37
38
  chunk =
38
- if @context.inline_eol_sign == /^\]/
39
+ if @context.inline_eol_sign == /^\]/ # rubocop:disable Style/CaseLikeIf
39
40
  @context.scan_until(re.short_inline_until_cache_brackets[until_pattern])
40
41
  elsif @context.inline_eol_sign == /^\]\]/
41
42
  @context.scan_until(re.short_inline_until_cache_brackets2[until_pattern])
@@ -66,7 +67,8 @@ module Infoboxer
66
67
 
67
68
  if @context.eof?
68
69
  break unless until_pattern
69
- @context.fail!("#{until_pattern} not found")
70
+
71
+ @context.fail!("#{until_pattern.source} not found")
70
72
  end
71
73
 
72
74
  if @context.eol?
@@ -83,7 +85,7 @@ module Infoboxer
83
85
 
84
86
  private
85
87
 
86
- def inline_formatting(match)
88
+ def inline_formatting(match) # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/AbcSize
87
89
  case match
88
90
  when "'''''"
89
91
  BoldItalic.new(short_inline(/'''''/))
@@ -109,6 +111,8 @@ module Infoboxer
109
111
  reference(Regexp.last_match(1))
110
112
  when /<math>/
111
113
  math
114
+ when /<gallery([^>]*)>/
115
+ gallery(Regexp.last_match(1))
112
116
  when '<'
113
117
  html || Text.new(match) # it was not HTML, just accidental <
114
118
  else
@@ -126,8 +130,18 @@ module Infoboxer
126
130
  caption = inline(/\]\]/)
127
131
  @context.pop_eol_sign
128
132
  end
133
+ name, namespace = link.split(':', 2).reverse
134
+ lnk, params =
135
+ if @context.traits.namespace?(namespace)
136
+ [link, {namespace: namespace}]
137
+ elsif @context.traits.interwiki?(namespace)
138
+ [name, {interwiki: namespace}]
139
+ else
140
+ [link, {}]
141
+ end
129
142
 
130
- Wikilink.new(link, caption)
143
+ puts @context.rest if lnk.nil?
144
+ Wikilink.new(lnk, caption, **params)
131
145
  end
132
146
 
133
147
  # http://en.wikipedia.org/wiki/Help:Link#External_links
@@ -145,7 +159,7 @@ module Infoboxer
145
159
 
146
160
  def reference(param_str, closed = false)
147
161
  children = closed ? Nodes[] : long_inline(%r{</ref>})
148
- Ref.new(children, parse_params(param_str))
162
+ Ref.new(children, **parse_params(param_str))
149
163
  end
150
164
 
151
165
  def math
@@ -159,6 +173,35 @@ module Infoboxer
159
173
  Text.new(@context.scan_continued_until(%r{</nowiki>}))
160
174
  end
161
175
  end
176
+
177
+ def gallery(tag_rest)
178
+ params = parse_params(tag_rest)
179
+ images = []
180
+ guarded_loop do
181
+ @context.next! if @context.eol?
182
+ path = @context.scan_until(%r{</gallery>|\||$})
183
+ attrs = @context.matched == '|' ? gallery_image_attrs : {}
184
+ unless path.empty?
185
+ # FIXME: what if path NOT matches the namespace?
186
+ images << Tree::Image.new(path.sub(/^#{re.file_namespace.source}/i, ''), **attrs)
187
+ end
188
+ break if @context.matched == '</gallery>'
189
+ end
190
+ Gallery.new(images, **params)
191
+ end
192
+
193
+ def gallery_image_attrs
194
+ nodes = []
195
+
196
+ guarded_loop do
197
+ nodes << short_inline(%r{\||</gallery>})
198
+ break if @context.eol? || @context.matched?(%r{</gallery>})
199
+ end
200
+
201
+ nodes.map(&method(:image_attr))
202
+ .inject(&:merge)
203
+ .reject { |_k, v| v.nil? || v.empty? }
204
+ end
162
205
  end
163
206
 
164
207
  require_relative 'image'
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  class Parser
@@ -14,7 +14,7 @@ module Infoboxer
14
14
 
15
15
  @context.next!
16
16
  end
17
- nodes.flow_templates
17
+ nodes
18
18
  end
19
19
 
20
20
  private
@@ -25,7 +25,7 @@ module Infoboxer
25
25
  heading(Regexp.last_match[:text], Regexp.last_match[:level])
26
26
  when /^\s*{\|/
27
27
  table
28
- when /^[\*\#:;]./
28
+ when /^[*\#:;]./
29
29
  list(until_pattern)
30
30
  when /^-{4,}/
31
31
  HR.new
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  class Parser
@@ -14,12 +14,13 @@ module Infoboxer
14
14
 
15
15
  prms = table_params
16
16
  log "Table params found #{prms}"
17
- table = Tree::Table.new(Nodes[], prms)
17
+ table = Tree::Table.new(Nodes[], **prms)
18
18
 
19
19
  @context.next!
20
20
 
21
- loop do
21
+ guarded_loop do
22
22
  table_next_line(table) or break
23
+ log 'Next table row'
23
24
  @context.next!
24
25
  end
25
26
 
@@ -52,6 +53,9 @@ module Infoboxer
52
53
  table_template(table)
53
54
  when nil
54
55
  return false
56
+ when /^(?<level>={2,})\s*(?<text>.+?)\s*\k<level>$/ # heading implicitly closes the table
57
+ @context.prev!
58
+ return false
55
59
  else
56
60
  return table_cell_cont(table)
57
61
  end
@@ -61,16 +65,25 @@ module Infoboxer
61
65
 
62
66
  def table_row(table, param_str)
63
67
  log 'Table row found'
64
- table.push_children(TableRow.new(Nodes[], parse_params(param_str)))
68
+ table.push_children(TableRow.new(Nodes[], **parse_params(param_str)))
65
69
  end
66
70
 
67
71
  def table_caption(table)
68
72
  log 'Table caption found'
69
73
  @context.skip(/^\s*\|\+\s*/)
70
74
 
75
+ params = if @context.check(/[^|{\[]+\|([^|]|$)/)
76
+ parse_params(@context.scan_until(/\|/))
77
+ else
78
+ {}
79
+ end
80
+
71
81
  children = inline(/^\s*([|!]|{\|)/)
72
- @context.prev! if @context.eol? # compensate next! which will be done in table()
73
- table.push_children(TableCaption.new(children.strip))
82
+ if @context.matched
83
+ @context.unscan_matched!
84
+ @context.prev! # compensate next! which will be done in table()
85
+ end
86
+ table.push_children(TableCaption.new(children.strip, **params))
74
87
  end
75
88
 
76
89
  def table_cells(table, cell_class = TableCell)
@@ -80,13 +93,13 @@ module Infoboxer
80
93
 
81
94
  @context.skip(/\s*[!|]\s*/)
82
95
  guarded_loop do
83
- params = if @context.check(/[^|{|\[]+\|([^\|]|$)/)
96
+ params = if @context.check(/[^|{\[]+\|([^|]|$)/)
84
97
  parse_params(@context.scan_until(/\|/))
85
98
  else
86
99
  {}
87
100
  end
88
101
  content = short_inline(/(\|\||!!)/)
89
- row.push_children(cell_class.new(content, params))
102
+ row.push_children(cell_class.new(content, **params))
90
103
  break if @context.eol?
91
104
  end
92
105
  end
@@ -94,15 +107,17 @@ module Infoboxer
94
107
  def table_template(table)
95
108
  contents = paragraph(/^\s*([|!]|{\|)/).to_templates?
96
109
 
97
- if (row = table.children.last).is_a?(TableRow)
98
- if (cell = row.children.last).is_a?(BaseCell)
99
- cell.push_children(*contents)
100
- else
101
- row.push_children(*contents)
102
- end
103
- else
104
- table.push_children(*contents)
105
- end
110
+ # Note: in fact, without full template parsing, we CAN'T know what level to insert it:
111
+ # Template can be something like <tr><td>Foo</td></tr>
112
+ # But for consistency, we insert all templates inside the <td>, forcing this <td>
113
+ # to exist.
114
+
115
+ table.push_children(TableRow.new) unless table.children.last.is_a?(TableRow)
116
+ row = table.children.last
117
+ row.push_children(TableCell.new) unless row.children.last.is_a?(BaseCell)
118
+ cell = row.children.last
119
+
120
+ cell.push_children(*contents)
106
121
  end
107
122
 
108
123
  # Good news, everyone! Table can be IMPLICITLY closed when it's
@@ -122,6 +137,7 @@ module Infoboxer
122
137
  unless container
123
138
  # return "table not continued" unless row is empty
124
139
  return true if @context.current.empty?
140
+
125
141
  @context.prev!
126
142
  return false
127
143
  end
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  class Parser
@@ -29,8 +29,8 @@ module Infoboxer
29
29
 
30
30
  guarded_loop do
31
31
  @context.next! while @context.eol?
32
- if @context.check(/\s*([^ =}|<]+)\s*=\s*/)
33
- name = @context.scan(/\s*([^ =]+)/).strip
32
+ if @context.check(/\s*([^=}|<]+)\s*=\s*/)
33
+ name = @context.scan(/\s*([^=]+)/).strip
34
34
  @context.skip(/\s*=\s*/)
35
35
  else
36
36
  name = num
@@ -46,13 +46,14 @@ module Infoboxer
46
46
  log 'Variable value found'
47
47
 
48
48
  break if @context.eat_matched?('}}')
49
+
49
50
  @context.eof? and @context.fail!("Unexpected break of template variables: #{res}")
50
51
  end
51
52
  res
52
53
  end
53
54
 
54
55
  def sanitize_value(nodes)
55
- nodes.pop if nodes.last.is_a?(Pre) && nodes.last.text =~ /^\s*$/ # FIXME: dirty!
56
+ nodes.pop if (nodes.last.is_a?(Pre) || nodes.last.is_a?(Text)) && nodes.last.text =~ /^\s*$/ # FIXME: dirty!
56
57
  nodes
57
58
  end
58
59
  end
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  class Parser
@@ -12,6 +12,7 @@ module Infoboxer
12
12
  \[[a-z]+:// | # external link
13
13
  <nowiki[^>]*> | # nowiki
14
14
  <ref[^>]*> | # reference
15
+ <gallery[^>]*>| # gallery
15
16
  <math> | # math
16
17
  < # HTML tag
17
18
  ))x
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  # This module covers advanced MediaWiki templates usage.
3
5
  #
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  module Templates
3
5
  class Base < Tree::Template
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  module Templates
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  # Infoboxer provides you with tree structure of the Wikipedia page,
@@ -63,7 +63,7 @@ module Infoboxer
63
63
  require_relative 'tree/nodes'
64
64
 
65
65
  %w[text compound inline
66
- image html paragraphs list template table ref math
66
+ image gallery html paragraphs list template table ref math
67
67
  document].each do |type|
68
68
  require_relative "tree/#{type}"
69
69
  end
@@ -1,11 +1,11 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  module Tree
5
5
  # Base class for all nodes with children.
6
6
  class Compound < Node
7
- def initialize(children = Nodes.new, params = {})
8
- super(params)
7
+ def initialize(children = Nodes.new, **params)
8
+ super(**params)
9
9
  @children = Nodes[*children]
10
10
  @children.each { |c| c.parent = self }
11
11
  end
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  module Tree
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Infoboxer
4
+ module Tree
5
+ # Represents gallery of images (contents of `<gallery>` special tag).
6
+ #
7
+ # See [Wikipedia Tutorial](https://en.wikipedia.org/wiki/Help:Gallery_tag)
8
+ # for explanation of attributes.
9
+ class Gallery < Compound
10
+ end
11
+ end
12
+ end
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  module Tree
@@ -13,7 +13,7 @@ module Infoboxer
13
13
  # Represents HTML tag, surrounding some contents.
14
14
  class HTMLTag < Compound
15
15
  def initialize(tag, attrs, children = Nodes.new)
16
- super(children, attrs)
16
+ super(children, **attrs)
17
17
  @tag = tag
18
18
  end
19
19
 
@@ -45,7 +45,7 @@ module Infoboxer
45
45
  #
46
46
  class HTMLOpeningTag < Node
47
47
  def initialize(tag, attrs)
48
- super(attrs)
48
+ super(**attrs)
49
49
  @tag = tag
50
50
  end
51
51
 
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  module Tree
@@ -7,9 +7,9 @@ module Infoboxer
7
7
  # See [Wikipedia Tutorial](https://en.wikipedia.org/wiki/Wikipedia:Extended_image_syntax)
8
8
  # for explanation of attributes.
9
9
  class Image < Node
10
- def initialize(path, params = {})
11
- @caption = params.delete(:caption)
12
- super({path: path}.merge(params))
10
+ def initialize(path, caption: nil, **params)
11
+ @caption = caption
12
+ super(path: path, **params)
13
13
  end
14
14
 
15
15
  # Image caption. Can have (sometimes many) other nodes inside.
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  module Tree
@@ -17,8 +17,8 @@ module Infoboxer
17
17
 
18
18
  # Base class for internal/external links,
19
19
  class Link < Compound
20
- def initialize(link, label = nil)
21
- super(label || Nodes.new([Text.new(link)]), link: link)
20
+ def initialize(link, label = nil, **attr)
21
+ super(label || Nodes.new([Text.new(link)]), link: link, **attr)
22
22
  end
23
23
 
24
24
  # @!attribute [r] link