infoboxer 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +32 -0
  3. data/CHANGELOG.md +13 -0
  4. data/Gemfile.lock +97 -75
  5. data/README.md +1 -1
  6. data/lib/infoboxer.rb +7 -5
  7. data/lib/infoboxer/core_ext.rb +2 -0
  8. data/lib/infoboxer/definitions/en.wikipedia.org.rb +2 -0
  9. data/lib/infoboxer/media_wiki.rb +3 -1
  10. data/lib/infoboxer/media_wiki/page.rb +2 -0
  11. data/lib/infoboxer/media_wiki/traits.rb +4 -1
  12. data/lib/infoboxer/navigation.rb +2 -0
  13. data/lib/infoboxer/navigation/lookup.rb +5 -5
  14. data/lib/infoboxer/navigation/sections.rb +5 -1
  15. data/lib/infoboxer/navigation/selector.rb +3 -1
  16. data/lib/infoboxer/navigation/shortcuts.rb +2 -0
  17. data/lib/infoboxer/navigation/wikipath.rb +2 -0
  18. data/lib/infoboxer/parser.rb +3 -1
  19. data/lib/infoboxer/parser/context.rb +10 -6
  20. data/lib/infoboxer/parser/html.rb +2 -0
  21. data/lib/infoboxer/parser/image.rb +3 -1
  22. data/lib/infoboxer/parser/inline.rb +8 -4
  23. data/lib/infoboxer/parser/paragraphs.rb +3 -1
  24. data/lib/infoboxer/parser/table.rb +23 -15
  25. data/lib/infoboxer/parser/template.rb +3 -0
  26. data/lib/infoboxer/parser/util.rb +2 -0
  27. data/lib/infoboxer/templates.rb +2 -0
  28. data/lib/infoboxer/templates/base.rb +2 -0
  29. data/lib/infoboxer/templates/set.rb +2 -0
  30. data/lib/infoboxer/tree.rb +2 -0
  31. data/lib/infoboxer/tree/compound.rb +3 -1
  32. data/lib/infoboxer/tree/document.rb +2 -0
  33. data/lib/infoboxer/tree/gallery.rb +2 -0
  34. data/lib/infoboxer/tree/html.rb +4 -2
  35. data/lib/infoboxer/tree/image.rb +3 -1
  36. data/lib/infoboxer/tree/inline.rb +2 -0
  37. data/lib/infoboxer/tree/linkable.rb +2 -0
  38. data/lib/infoboxer/tree/list.rb +4 -2
  39. data/lib/infoboxer/tree/math.rb +2 -0
  40. data/lib/infoboxer/tree/node.rb +3 -1
  41. data/lib/infoboxer/tree/nodes.rb +16 -4
  42. data/lib/infoboxer/tree/paragraphs.rb +2 -0
  43. data/lib/infoboxer/tree/ref.rb +2 -0
  44. data/lib/infoboxer/tree/table.rb +5 -3
  45. data/lib/infoboxer/tree/template.rb +3 -1
  46. data/lib/infoboxer/tree/text.rb +11 -9
  47. data/lib/infoboxer/tree/wikilink.rb +3 -0
  48. data/lib/infoboxer/version.rb +4 -2
  49. data/lib/infoboxer/wiki_path.rb +2 -0
  50. data/regression/pages/2012_bdo_world_darts_championship.wiki +941 -0
  51. data/regression/pages/progress_wrestling.wiki +1308 -0
  52. metadata +6 -3
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  module Navigation
3
5
  # `Sections` module provides logical view on document strcture.
@@ -75,7 +77,7 @@ module Infoboxer
75
77
  when 1
76
78
  @sections.select { |s| names.first === s.heading.text_ }
77
79
  else
78
- @sections.select { |s| names.first === s.heading.text_ }.sections(*names[1..-1])
80
+ @sections.select { |s| names.first === s.heading.text_ }.sections(*names[1..])
79
81
  end
80
82
  end
81
83
 
@@ -83,6 +85,7 @@ module Infoboxer
83
85
  sections = names.map { |name|
84
86
  heading = lookup_children(:Heading, text_: name).first
85
87
  next unless heading
88
+
86
89
  body = heading.next_siblings
87
90
  .take_while { |n| !n.is_a?(Tree::Heading) || n.level > heading.level }
88
91
 
@@ -104,6 +107,7 @@ module Infoboxer
104
107
  def make_sections
105
108
  res = Tree::Nodes[]
106
109
  return res if headings.empty?
110
+
107
111
  level = headings.first.level
108
112
 
109
113
  children
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  module Navigation
3
5
  module Lookup
@@ -8,7 +10,7 @@ module Infoboxer
8
10
  def initialize(*arg, &block)
9
11
  @arg = [arg, block].flatten.compact.map(&method(:sym_to_class))
10
12
  @arg.each do |a|
11
- a.reject! { |_k, v| v.nil? } if a.is_a?(Hash)
13
+ a.compact! if a.is_a?(Hash)
12
14
  end
13
15
  end
14
16
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  module Navigation
3
5
  # See {Shortcuts::Node Shortcuts::Node} for everything!
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative '../wiki_path'
2
4
 
3
5
  module Infoboxer
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'ostruct'
2
4
  require 'logger'
3
5
 
@@ -52,7 +54,7 @@ module Infoboxer
52
54
  def initialize(context)
53
55
  @context = context
54
56
  @re = OpenStruct.new(make_regexps)
55
- @logger = Logger.new(STDOUT).tap { |l| l.level = Logger::FATAL }
57
+ @logger = Logger.new($stdout).tap { |l| l.level = Logger::FATAL }
56
58
  end
57
59
 
58
60
  require_relative 'parser/inline'
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'strscan'
2
4
 
3
5
  module Infoboxer
@@ -8,7 +10,7 @@ module Infoboxer
8
10
 
9
11
  def initialize(text, traits = nil)
10
12
  @lines = text
11
- .gsub(/<!--.+?-->/m, '') # FIXME: will also kill comments inside <nowiki> tag
13
+ .gsub(/<!--.*?-->/m, '') # FIXME: will also kill comments inside <nowiki> tag
12
14
  .split(/[\r\n]/)
13
15
  @lineno = -1
14
16
  @traits = traits || MediaWiki::Traits.default
@@ -19,22 +21,23 @@ module Infoboxer
19
21
  attr_reader :next_lines
20
22
 
21
23
  def colno
22
- @scanner && @scanner.pos || 0
24
+ @scanner&.pos || 0
23
25
  end
24
26
 
25
27
  def matched
26
- @matched ||= @scanner && @scanner.matched
28
+ @matched ||= @scanner&.matched
27
29
  end
28
30
 
29
31
  # check which works only once
30
32
  def eat_matched?(str)
31
33
  return false unless matched == str
34
+
32
35
  @matched = 'DUMMY'
33
36
  true
34
37
  end
35
38
 
36
39
  def rest
37
- @rest ||= @scanner && @scanner.rest
40
+ @rest ||= @scanner&.rest
38
41
  end
39
42
 
40
43
  alias_method :current, :rest
@@ -107,7 +110,7 @@ module Infoboxer
107
110
  end
108
111
 
109
112
  def scan_continued_until(re, leave_pattern = false)
110
- res = ''
113
+ res = +''
111
114
 
112
115
  loop do
113
116
  chunk = _scan_until(re)
@@ -152,6 +155,7 @@ module Infoboxer
152
155
 
153
156
  def unscan_matched!
154
157
  return unless @matched
158
+
155
159
  @scanner.pos -= @matched.size
156
160
  @rest = nil
157
161
  end
@@ -173,7 +177,7 @@ module Infoboxer
173
177
  def shift(amount)
174
178
  @lineno += amount
175
179
  current = @lines[lineno]
176
- @next_lines = @lines[(lineno + 1)..-1]
180
+ @next_lines = @lines[(lineno + 1)..]
177
181
  if current
178
182
  @scanner.string = current
179
183
  @rest = current
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  class Parser
3
5
  module HTML
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  class Parser
3
5
  module Image
@@ -9,7 +11,7 @@ module Infoboxer
9
11
 
10
12
  path = @context.scan_until(/\||\]\]/)
11
13
  attrs = @context.matched == '|' ? image_attrs : {}
12
- Tree::Image.new(path, attrs)
14
+ Tree::Image.new(path, **attrs)
13
15
  end
14
16
 
15
17
  def image_attrs
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  class Parser
3
5
  module Inline
@@ -16,6 +18,7 @@ module Infoboxer
16
18
 
17
19
  if @context.eof?
18
20
  break unless until_pattern
21
+
19
22
  @context.fail!("#{until_pattern.source} not found, starting from #{start}")
20
23
  end
21
24
 
@@ -33,7 +36,7 @@ module Infoboxer
33
36
  guarded_loop do
34
37
  # FIXME: quick and UGLY IS HELL JUST TRYING TO MAKE THE SHIT WORK
35
38
  chunk =
36
- if @context.inline_eol_sign == /^\]/
39
+ if @context.inline_eol_sign == /^\]/ # rubocop:disable Style/CaseLikeIf
37
40
  @context.scan_until(re.short_inline_until_cache_brackets[until_pattern])
38
41
  elsif @context.inline_eol_sign == /^\]\]/
39
42
  @context.scan_until(re.short_inline_until_cache_brackets2[until_pattern])
@@ -64,6 +67,7 @@ module Infoboxer
64
67
 
65
68
  if @context.eof?
66
69
  break unless until_pattern
70
+
67
71
  @context.fail!("#{until_pattern.source} not found")
68
72
  end
69
73
 
@@ -155,7 +159,7 @@ module Infoboxer
155
159
 
156
160
  def reference(param_str, closed = false)
157
161
  children = closed ? Nodes[] : long_inline(%r{</ref>})
158
- Ref.new(children, parse_params(param_str))
162
+ Ref.new(children, **parse_params(param_str))
159
163
  end
160
164
 
161
165
  def math
@@ -179,11 +183,11 @@ module Infoboxer
179
183
  attrs = @context.matched == '|' ? gallery_image_attrs : {}
180
184
  unless path.empty?
181
185
  # FIXME: what if path NOT matches the namespace?
182
- images << Tree::Image.new(path.sub(/^#{re.file_namespace.source}/i, ''), attrs)
186
+ images << Tree::Image.new(path.sub(/^#{re.file_namespace.source}/i, ''), **attrs)
183
187
  end
184
188
  break if @context.matched == '</gallery>'
185
189
  end
186
- Gallery.new(images, params)
190
+ Gallery.new(images, **params)
187
191
  end
188
192
 
189
193
  def gallery_image_attrs
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  class Parser
3
5
  module Paragraphs
@@ -23,7 +25,7 @@ module Infoboxer
23
25
  heading(Regexp.last_match[:text], Regexp.last_match[:level])
24
26
  when /^\s*{\|/
25
27
  table
26
- when /^[\*\#:;]./
28
+ when /^[*\#:;]./
27
29
  list(until_pattern)
28
30
  when /^-{4,}/
29
31
  HR.new
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  class Parser
3
5
  # http://en.wikipedia.org/wiki/Help:Table
@@ -12,7 +14,7 @@ module Infoboxer
12
14
 
13
15
  prms = table_params
14
16
  log "Table params found #{prms}"
15
- table = Tree::Table.new(Nodes[], prms)
17
+ table = Tree::Table.new(Nodes[], **prms)
16
18
 
17
19
  @context.next!
18
20
 
@@ -51,6 +53,9 @@ module Infoboxer
51
53
  table_template(table)
52
54
  when nil
53
55
  return false
56
+ when /^(?<level>={2,})\s*(?<text>.+?)\s*\k<level>$/ # heading implicitly closes the table
57
+ @context.prev!
58
+ return false
54
59
  else
55
60
  return table_cell_cont(table)
56
61
  end
@@ -60,14 +65,14 @@ module Infoboxer
60
65
 
61
66
  def table_row(table, param_str)
62
67
  log 'Table row found'
63
- table.push_children(TableRow.new(Nodes[], parse_params(param_str)))
68
+ table.push_children(TableRow.new(Nodes[], **parse_params(param_str)))
64
69
  end
65
70
 
66
71
  def table_caption(table)
67
72
  log 'Table caption found'
68
73
  @context.skip(/^\s*\|\+\s*/)
69
74
 
70
- params = if @context.check(/[^|{|\[]+\|([^\|]|$)/)
75
+ params = if @context.check(/[^|{\[]+\|([^|]|$)/)
71
76
  parse_params(@context.scan_until(/\|/))
72
77
  else
73
78
  {}
@@ -78,7 +83,7 @@ module Infoboxer
78
83
  @context.unscan_matched!
79
84
  @context.prev! # compensate next! which will be done in table()
80
85
  end
81
- table.push_children(TableCaption.new(children.strip, params))
86
+ table.push_children(TableCaption.new(children.strip, **params))
82
87
  end
83
88
 
84
89
  def table_cells(table, cell_class = TableCell)
@@ -88,13 +93,13 @@ module Infoboxer
88
93
 
89
94
  @context.skip(/\s*[!|]\s*/)
90
95
  guarded_loop do
91
- params = if @context.check(/[^|{|\[]+\|([^\|]|$)/)
96
+ params = if @context.check(/[^|{\[]+\|([^|]|$)/)
92
97
  parse_params(@context.scan_until(/\|/))
93
98
  else
94
99
  {}
95
100
  end
96
101
  content = short_inline(/(\|\||!!)/)
97
- row.push_children(cell_class.new(content, params))
102
+ row.push_children(cell_class.new(content, **params))
98
103
  break if @context.eol?
99
104
  end
100
105
  end
@@ -102,15 +107,17 @@ module Infoboxer
102
107
  def table_template(table)
103
108
  contents = paragraph(/^\s*([|!]|{\|)/).to_templates?
104
109
 
105
- if (row = table.children.last).is_a?(TableRow)
106
- if (cell = row.children.last).is_a?(BaseCell)
107
- cell.push_children(*contents)
108
- else
109
- row.push_children(*contents)
110
- end
111
- else
112
- table.push_children(*contents)
113
- end
110
+ # Note: in fact, without full template parsing, we CAN'T know what level to insert it:
111
+ # Template can be something like <tr><td>Foo</td></tr>
112
+ # But for consistency, we insert all templates inside the <td>, forcing this <td>
113
+ # to exist.
114
+
115
+ table.push_children(TableRow.new) unless table.children.last.is_a?(TableRow)
116
+ row = table.children.last
117
+ row.push_children(TableCell.new) unless row.children.last.is_a?(BaseCell)
118
+ cell = row.children.last
119
+
120
+ cell.push_children(*contents)
114
121
  end
115
122
 
116
123
  # Good news, everyone! Table can be IMPLICITLY closed when it's
@@ -130,6 +137,7 @@ module Infoboxer
130
137
  unless container
131
138
  # return "table not continued" unless row is empty
132
139
  return true if @context.current.empty?
140
+
133
141
  @context.prev!
134
142
  return false
135
143
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  class Parser
3
5
  module Template
@@ -44,6 +46,7 @@ module Infoboxer
44
46
  log 'Variable value found'
45
47
 
46
48
  break if @context.eat_matched?('}}')
49
+
47
50
  @context.eof? and @context.fail!("Unexpected break of template variables: #{res}")
48
51
  end
49
52
  res
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  class Parser
3
5
  module Util
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  # This module covers advanced MediaWiki templates usage.
3
5
  #
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  module Templates
3
5
  class Base < Tree::Template
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  module Templates
3
5
  # Base class for defining set of templates, used for some site/domain.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  # Infoboxer provides you with tree structure of the Wikipedia page,
3
5
  # which you can introspect and navigate with ease. This tree structure
@@ -1,9 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  module Tree
3
5
  # Base class for all nodes with children.
4
6
  class Compound < Node
5
7
  def initialize(children = Nodes.new, **params)
6
- super(params)
8
+ super(**params)
7
9
  @children = Nodes[*children]
8
10
  @children.each { |c| c.parent = self }
9
11
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  module Tree
3
5
  # Represents entire document.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  module Tree
3
5
  # Represents gallery of images (contents of `<gallery>` special tag).
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  module Tree
3
5
  module HTMLTagCommons
@@ -11,7 +13,7 @@ module Infoboxer
11
13
  # Represents HTML tag, surrounding some contents.
12
14
  class HTMLTag < Compound
13
15
  def initialize(tag, attrs, children = Nodes.new)
14
- super(children, attrs)
16
+ super(children, **attrs)
15
17
  @tag = tag
16
18
  end
17
19
 
@@ -43,7 +45,7 @@ module Infoboxer
43
45
  #
44
46
  class HTMLOpeningTag < Node
45
47
  def initialize(tag, attrs)
46
- super(attrs)
48
+ super(**attrs)
47
49
  @tag = tag
48
50
  end
49
51
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  module Tree
3
5
  # Represents image (or other media file).
@@ -7,7 +9,7 @@ module Infoboxer
7
9
  class Image < Node
8
10
  def initialize(path, caption: nil, **params)
9
11
  @caption = caption
10
- super({path: path}.merge(params))
12
+ super(path: path, **params)
11
13
  end
12
14
 
13
15
  # Image caption. Can have (sometimes many) other nodes inside.