infoboxer 0.3.3 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +32 -0
  3. data/CHANGELOG.md +13 -0
  4. data/Gemfile.lock +97 -75
  5. data/README.md +1 -1
  6. data/lib/infoboxer.rb +7 -5
  7. data/lib/infoboxer/core_ext.rb +2 -0
  8. data/lib/infoboxer/definitions/en.wikipedia.org.rb +2 -0
  9. data/lib/infoboxer/media_wiki.rb +3 -1
  10. data/lib/infoboxer/media_wiki/page.rb +2 -0
  11. data/lib/infoboxer/media_wiki/traits.rb +4 -1
  12. data/lib/infoboxer/navigation.rb +2 -0
  13. data/lib/infoboxer/navigation/lookup.rb +5 -5
  14. data/lib/infoboxer/navigation/sections.rb +5 -1
  15. data/lib/infoboxer/navigation/selector.rb +3 -1
  16. data/lib/infoboxer/navigation/shortcuts.rb +2 -0
  17. data/lib/infoboxer/navigation/wikipath.rb +2 -0
  18. data/lib/infoboxer/parser.rb +3 -1
  19. data/lib/infoboxer/parser/context.rb +10 -6
  20. data/lib/infoboxer/parser/html.rb +2 -0
  21. data/lib/infoboxer/parser/image.rb +3 -1
  22. data/lib/infoboxer/parser/inline.rb +8 -4
  23. data/lib/infoboxer/parser/paragraphs.rb +3 -1
  24. data/lib/infoboxer/parser/table.rb +23 -15
  25. data/lib/infoboxer/parser/template.rb +3 -0
  26. data/lib/infoboxer/parser/util.rb +2 -0
  27. data/lib/infoboxer/templates.rb +2 -0
  28. data/lib/infoboxer/templates/base.rb +2 -0
  29. data/lib/infoboxer/templates/set.rb +2 -0
  30. data/lib/infoboxer/tree.rb +2 -0
  31. data/lib/infoboxer/tree/compound.rb +3 -1
  32. data/lib/infoboxer/tree/document.rb +2 -0
  33. data/lib/infoboxer/tree/gallery.rb +2 -0
  34. data/lib/infoboxer/tree/html.rb +4 -2
  35. data/lib/infoboxer/tree/image.rb +3 -1
  36. data/lib/infoboxer/tree/inline.rb +2 -0
  37. data/lib/infoboxer/tree/linkable.rb +2 -0
  38. data/lib/infoboxer/tree/list.rb +4 -2
  39. data/lib/infoboxer/tree/math.rb +2 -0
  40. data/lib/infoboxer/tree/node.rb +3 -1
  41. data/lib/infoboxer/tree/nodes.rb +16 -4
  42. data/lib/infoboxer/tree/paragraphs.rb +2 -0
  43. data/lib/infoboxer/tree/ref.rb +2 -0
  44. data/lib/infoboxer/tree/table.rb +5 -3
  45. data/lib/infoboxer/tree/template.rb +3 -1
  46. data/lib/infoboxer/tree/text.rb +11 -9
  47. data/lib/infoboxer/tree/wikilink.rb +3 -0
  48. data/lib/infoboxer/version.rb +4 -2
  49. data/lib/infoboxer/wiki_path.rb +2 -0
  50. data/regression/pages/2012_bdo_world_darts_championship.wiki +941 -0
  51. data/regression/pages/progress_wrestling.wiki +1308 -0
  52. metadata +6 -3
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  module Navigation
3
5
  # `Sections` module provides logical view on document strcture.
@@ -75,7 +77,7 @@ module Infoboxer
75
77
  when 1
76
78
  @sections.select { |s| names.first === s.heading.text_ }
77
79
  else
78
- @sections.select { |s| names.first === s.heading.text_ }.sections(*names[1..-1])
80
+ @sections.select { |s| names.first === s.heading.text_ }.sections(*names[1..])
79
81
  end
80
82
  end
81
83
 
@@ -83,6 +85,7 @@ module Infoboxer
83
85
  sections = names.map { |name|
84
86
  heading = lookup_children(:Heading, text_: name).first
85
87
  next unless heading
88
+
86
89
  body = heading.next_siblings
87
90
  .take_while { |n| !n.is_a?(Tree::Heading) || n.level > heading.level }
88
91
 
@@ -104,6 +107,7 @@ module Infoboxer
104
107
  def make_sections
105
108
  res = Tree::Nodes[]
106
109
  return res if headings.empty?
110
+
107
111
  level = headings.first.level
108
112
 
109
113
  children
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  module Navigation
3
5
  module Lookup
@@ -8,7 +10,7 @@ module Infoboxer
8
10
  def initialize(*arg, &block)
9
11
  @arg = [arg, block].flatten.compact.map(&method(:sym_to_class))
10
12
  @arg.each do |a|
11
- a.reject! { |_k, v| v.nil? } if a.is_a?(Hash)
13
+ a.compact! if a.is_a?(Hash)
12
14
  end
13
15
  end
14
16
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  module Navigation
3
5
  # See {Shortcuts::Node Shortcuts::Node} for everything!
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative '../wiki_path'
2
4
 
3
5
  module Infoboxer
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'ostruct'
2
4
  require 'logger'
3
5
 
@@ -52,7 +54,7 @@ module Infoboxer
52
54
  def initialize(context)
53
55
  @context = context
54
56
  @re = OpenStruct.new(make_regexps)
55
- @logger = Logger.new(STDOUT).tap { |l| l.level = Logger::FATAL }
57
+ @logger = Logger.new($stdout).tap { |l| l.level = Logger::FATAL }
56
58
  end
57
59
 
58
60
  require_relative 'parser/inline'
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'strscan'
2
4
 
3
5
  module Infoboxer
@@ -8,7 +10,7 @@ module Infoboxer
8
10
 
9
11
  def initialize(text, traits = nil)
10
12
  @lines = text
11
- .gsub(/<!--.+?-->/m, '') # FIXME: will also kill comments inside <nowiki> tag
13
+ .gsub(/<!--.*?-->/m, '') # FIXME: will also kill comments inside <nowiki> tag
12
14
  .split(/[\r\n]/)
13
15
  @lineno = -1
14
16
  @traits = traits || MediaWiki::Traits.default
@@ -19,22 +21,23 @@ module Infoboxer
19
21
  attr_reader :next_lines
20
22
 
21
23
  def colno
22
- @scanner && @scanner.pos || 0
24
+ @scanner&.pos || 0
23
25
  end
24
26
 
25
27
  def matched
26
- @matched ||= @scanner && @scanner.matched
28
+ @matched ||= @scanner&.matched
27
29
  end
28
30
 
29
31
  # check which works only once
30
32
  def eat_matched?(str)
31
33
  return false unless matched == str
34
+
32
35
  @matched = 'DUMMY'
33
36
  true
34
37
  end
35
38
 
36
39
  def rest
37
- @rest ||= @scanner && @scanner.rest
40
+ @rest ||= @scanner&.rest
38
41
  end
39
42
 
40
43
  alias_method :current, :rest
@@ -107,7 +110,7 @@ module Infoboxer
107
110
  end
108
111
 
109
112
  def scan_continued_until(re, leave_pattern = false)
110
- res = ''
113
+ res = +''
111
114
 
112
115
  loop do
113
116
  chunk = _scan_until(re)
@@ -152,6 +155,7 @@ module Infoboxer
152
155
 
153
156
  def unscan_matched!
154
157
  return unless @matched
158
+
155
159
  @scanner.pos -= @matched.size
156
160
  @rest = nil
157
161
  end
@@ -173,7 +177,7 @@ module Infoboxer
173
177
  def shift(amount)
174
178
  @lineno += amount
175
179
  current = @lines[lineno]
176
- @next_lines = @lines[(lineno + 1)..-1]
180
+ @next_lines = @lines[(lineno + 1)..]
177
181
  if current
178
182
  @scanner.string = current
179
183
  @rest = current
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  class Parser
3
5
  module HTML
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  class Parser
3
5
  module Image
@@ -9,7 +11,7 @@ module Infoboxer
9
11
 
10
12
  path = @context.scan_until(/\||\]\]/)
11
13
  attrs = @context.matched == '|' ? image_attrs : {}
12
- Tree::Image.new(path, attrs)
14
+ Tree::Image.new(path, **attrs)
13
15
  end
14
16
 
15
17
  def image_attrs
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  class Parser
3
5
  module Inline
@@ -16,6 +18,7 @@ module Infoboxer
16
18
 
17
19
  if @context.eof?
18
20
  break unless until_pattern
21
+
19
22
  @context.fail!("#{until_pattern.source} not found, starting from #{start}")
20
23
  end
21
24
 
@@ -33,7 +36,7 @@ module Infoboxer
33
36
  guarded_loop do
34
37
  # FIXME: quick and UGLY IS HELL JUST TRYING TO MAKE THE SHIT WORK
35
38
  chunk =
36
- if @context.inline_eol_sign == /^\]/
39
+ if @context.inline_eol_sign == /^\]/ # rubocop:disable Style/CaseLikeIf
37
40
  @context.scan_until(re.short_inline_until_cache_brackets[until_pattern])
38
41
  elsif @context.inline_eol_sign == /^\]\]/
39
42
  @context.scan_until(re.short_inline_until_cache_brackets2[until_pattern])
@@ -64,6 +67,7 @@ module Infoboxer
64
67
 
65
68
  if @context.eof?
66
69
  break unless until_pattern
70
+
67
71
  @context.fail!("#{until_pattern.source} not found")
68
72
  end
69
73
 
@@ -155,7 +159,7 @@ module Infoboxer
155
159
 
156
160
  def reference(param_str, closed = false)
157
161
  children = closed ? Nodes[] : long_inline(%r{</ref>})
158
- Ref.new(children, parse_params(param_str))
162
+ Ref.new(children, **parse_params(param_str))
159
163
  end
160
164
 
161
165
  def math
@@ -179,11 +183,11 @@ module Infoboxer
179
183
  attrs = @context.matched == '|' ? gallery_image_attrs : {}
180
184
  unless path.empty?
181
185
  # FIXME: what if path NOT matches the namespace?
182
- images << Tree::Image.new(path.sub(/^#{re.file_namespace.source}/i, ''), attrs)
186
+ images << Tree::Image.new(path.sub(/^#{re.file_namespace.source}/i, ''), **attrs)
183
187
  end
184
188
  break if @context.matched == '</gallery>'
185
189
  end
186
- Gallery.new(images, params)
190
+ Gallery.new(images, **params)
187
191
  end
188
192
 
189
193
  def gallery_image_attrs
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  class Parser
3
5
  module Paragraphs
@@ -23,7 +25,7 @@ module Infoboxer
23
25
  heading(Regexp.last_match[:text], Regexp.last_match[:level])
24
26
  when /^\s*{\|/
25
27
  table
26
- when /^[\*\#:;]./
28
+ when /^[*\#:;]./
27
29
  list(until_pattern)
28
30
  when /^-{4,}/
29
31
  HR.new
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  class Parser
3
5
  # http://en.wikipedia.org/wiki/Help:Table
@@ -12,7 +14,7 @@ module Infoboxer
12
14
 
13
15
  prms = table_params
14
16
  log "Table params found #{prms}"
15
- table = Tree::Table.new(Nodes[], prms)
17
+ table = Tree::Table.new(Nodes[], **prms)
16
18
 
17
19
  @context.next!
18
20
 
@@ -51,6 +53,9 @@ module Infoboxer
51
53
  table_template(table)
52
54
  when nil
53
55
  return false
56
+ when /^(?<level>={2,})\s*(?<text>.+?)\s*\k<level>$/ # heading implicitly closes the table
57
+ @context.prev!
58
+ return false
54
59
  else
55
60
  return table_cell_cont(table)
56
61
  end
@@ -60,14 +65,14 @@ module Infoboxer
60
65
 
61
66
  def table_row(table, param_str)
62
67
  log 'Table row found'
63
- table.push_children(TableRow.new(Nodes[], parse_params(param_str)))
68
+ table.push_children(TableRow.new(Nodes[], **parse_params(param_str)))
64
69
  end
65
70
 
66
71
  def table_caption(table)
67
72
  log 'Table caption found'
68
73
  @context.skip(/^\s*\|\+\s*/)
69
74
 
70
- params = if @context.check(/[^|{|\[]+\|([^\|]|$)/)
75
+ params = if @context.check(/[^|{\[]+\|([^|]|$)/)
71
76
  parse_params(@context.scan_until(/\|/))
72
77
  else
73
78
  {}
@@ -78,7 +83,7 @@ module Infoboxer
78
83
  @context.unscan_matched!
79
84
  @context.prev! # compensate next! which will be done in table()
80
85
  end
81
- table.push_children(TableCaption.new(children.strip, params))
86
+ table.push_children(TableCaption.new(children.strip, **params))
82
87
  end
83
88
 
84
89
  def table_cells(table, cell_class = TableCell)
@@ -88,13 +93,13 @@ module Infoboxer
88
93
 
89
94
  @context.skip(/\s*[!|]\s*/)
90
95
  guarded_loop do
91
- params = if @context.check(/[^|{|\[]+\|([^\|]|$)/)
96
+ params = if @context.check(/[^|{\[]+\|([^|]|$)/)
92
97
  parse_params(@context.scan_until(/\|/))
93
98
  else
94
99
  {}
95
100
  end
96
101
  content = short_inline(/(\|\||!!)/)
97
- row.push_children(cell_class.new(content, params))
102
+ row.push_children(cell_class.new(content, **params))
98
103
  break if @context.eol?
99
104
  end
100
105
  end
@@ -102,15 +107,17 @@ module Infoboxer
102
107
  def table_template(table)
103
108
  contents = paragraph(/^\s*([|!]|{\|)/).to_templates?
104
109
 
105
- if (row = table.children.last).is_a?(TableRow)
106
- if (cell = row.children.last).is_a?(BaseCell)
107
- cell.push_children(*contents)
108
- else
109
- row.push_children(*contents)
110
- end
111
- else
112
- table.push_children(*contents)
113
- end
110
+ # Note: in fact, without full template parsing, we CAN'T know what level to insert it:
111
+ # Template can be something like <tr><td>Foo</td></tr>
112
+ # But for consistency, we insert all templates inside the <td>, forcing this <td>
113
+ # to exist.
114
+
115
+ table.push_children(TableRow.new) unless table.children.last.is_a?(TableRow)
116
+ row = table.children.last
117
+ row.push_children(TableCell.new) unless row.children.last.is_a?(BaseCell)
118
+ cell = row.children.last
119
+
120
+ cell.push_children(*contents)
114
121
  end
115
122
 
116
123
  # Good news, everyone! Table can be IMPLICITLY closed when it's
@@ -130,6 +137,7 @@ module Infoboxer
130
137
  unless container
131
138
  # return "table not continued" unless row is empty
132
139
  return true if @context.current.empty?
140
+
133
141
  @context.prev!
134
142
  return false
135
143
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  class Parser
3
5
  module Template
@@ -44,6 +46,7 @@ module Infoboxer
44
46
  log 'Variable value found'
45
47
 
46
48
  break if @context.eat_matched?('}}')
49
+
47
50
  @context.eof? and @context.fail!("Unexpected break of template variables: #{res}")
48
51
  end
49
52
  res
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  class Parser
3
5
  module Util
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  # This module covers advanced MediaWiki templates usage.
3
5
  #
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  module Templates
3
5
  class Base < Tree::Template
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  module Templates
3
5
  # Base class for defining set of templates, used for some site/domain.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  # Infoboxer provides you with tree structure of the Wikipedia page,
3
5
  # which you can introspect and navigate with ease. This tree structure
@@ -1,9 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  module Tree
3
5
  # Base class for all nodes with children.
4
6
  class Compound < Node
5
7
  def initialize(children = Nodes.new, **params)
6
- super(params)
8
+ super(**params)
7
9
  @children = Nodes[*children]
8
10
  @children.each { |c| c.parent = self }
9
11
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  module Tree
3
5
  # Represents entire document.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  module Tree
3
5
  # Represents gallery of images (contents of `<gallery>` special tag).
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  module Tree
3
5
  module HTMLTagCommons
@@ -11,7 +13,7 @@ module Infoboxer
11
13
  # Represents HTML tag, surrounding some contents.
12
14
  class HTMLTag < Compound
13
15
  def initialize(tag, attrs, children = Nodes.new)
14
- super(children, attrs)
16
+ super(children, **attrs)
15
17
  @tag = tag
16
18
  end
17
19
 
@@ -43,7 +45,7 @@ module Infoboxer
43
45
  #
44
46
  class HTMLOpeningTag < Node
45
47
  def initialize(tag, attrs)
46
- super(attrs)
48
+ super(**attrs)
47
49
  @tag = tag
48
50
  end
49
51
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  module Tree
3
5
  # Represents image (or other media file).
@@ -7,7 +9,7 @@ module Infoboxer
7
9
  class Image < Node
8
10
  def initialize(path, caption: nil, **params)
9
11
  @caption = caption
10
- super({path: path}.merge(params))
12
+ super(path: path, **params)
11
13
  end
12
14
 
13
15
  # Image caption. Can have (sometimes many) other nodes inside.