infoboxer 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/ci.yml +32 -0
  3. data/.rubocop_todo.yml +0 -15
  4. data/CHANGELOG.md +43 -0
  5. data/Gemfile.lock +172 -0
  6. data/README.md +1 -1
  7. data/infoboxer.gemspec +1 -1
  8. data/lib/infoboxer.rb +23 -11
  9. data/lib/infoboxer/core_ext.rb +1 -1
  10. data/lib/infoboxer/definitions/en.wikipedia.org.rb +3 -1
  11. data/lib/infoboxer/media_wiki.rb +83 -65
  12. data/lib/infoboxer/media_wiki/page.rb +10 -1
  13. data/lib/infoboxer/media_wiki/traits.rb +69 -22
  14. data/lib/infoboxer/navigation.rb +7 -1
  15. data/lib/infoboxer/navigation/lookup.rb +15 -7
  16. data/lib/infoboxer/navigation/sections.rb +27 -9
  17. data/lib/infoboxer/navigation/selector.rb +14 -6
  18. data/lib/infoboxer/navigation/shortcuts.rb +1 -1
  19. data/lib/infoboxer/navigation/wikipath.rb +1 -1
  20. data/lib/infoboxer/parser.rb +2 -2
  21. data/lib/infoboxer/parser/context.rb +23 -9
  22. data/lib/infoboxer/parser/html.rb +1 -1
  23. data/lib/infoboxer/parser/image.rb +2 -2
  24. data/lib/infoboxer/parser/inline.rb +50 -7
  25. data/lib/infoboxer/parser/paragraphs.rb +3 -3
  26. data/lib/infoboxer/parser/table.rb +33 -17
  27. data/lib/infoboxer/parser/template.rb +5 -4
  28. data/lib/infoboxer/parser/util.rb +2 -1
  29. data/lib/infoboxer/templates.rb +2 -0
  30. data/lib/infoboxer/templates/base.rb +2 -0
  31. data/lib/infoboxer/templates/set.rb +1 -1
  32. data/lib/infoboxer/tree.rb +2 -2
  33. data/lib/infoboxer/tree/compound.rb +3 -3
  34. data/lib/infoboxer/tree/document.rb +1 -1
  35. data/lib/infoboxer/tree/gallery.rb +12 -0
  36. data/lib/infoboxer/tree/html.rb +3 -3
  37. data/lib/infoboxer/tree/image.rb +4 -4
  38. data/lib/infoboxer/tree/inline.rb +3 -3
  39. data/lib/infoboxer/tree/linkable.rb +6 -1
  40. data/lib/infoboxer/tree/list.rb +4 -5
  41. data/lib/infoboxer/tree/math.rb +2 -3
  42. data/lib/infoboxer/tree/node.rb +4 -4
  43. data/lib/infoboxer/tree/nodes.rb +51 -7
  44. data/lib/infoboxer/tree/paragraphs.rb +1 -1
  45. data/lib/infoboxer/tree/ref.rb +1 -1
  46. data/lib/infoboxer/tree/table.rb +4 -4
  47. data/lib/infoboxer/tree/template.rb +18 -5
  48. data/lib/infoboxer/tree/text.rb +11 -11
  49. data/lib/infoboxer/tree/wikilink.rb +16 -8
  50. data/lib/infoboxer/version.rb +4 -3
  51. data/lib/infoboxer/wiki_path.rb +12 -1
  52. data/regression/pages/2012_bdo_world_darts_championship.wiki +941 -0
  53. data/regression/pages/progress_wrestling.wiki +1308 -0
  54. metadata +12 -8
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  module Tree
3
5
  # Module included into everything, that can be treated as
@@ -15,7 +17,7 @@ module Infoboxer
15
17
  # * {Tree::Nodes#follow} for extracting multiple links at once;
16
18
  # * {MediaWiki#get} for basic information on page extraction.
17
19
  def follow
18
- client.get(link)
20
+ client.get(link, interwiki: interwiki)
19
21
  end
20
22
 
21
23
  # Human-readable page URL
@@ -28,6 +30,9 @@ module Infoboxer
28
30
 
29
31
  protected
30
32
 
33
+ # redefined in {Wikilink}
34
+ def interwiki; end
35
+
31
36
  def page
32
37
  lookup_parents(MediaWiki::Page).first or fail('Not in a page from real source')
33
38
  end
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  module Tree
@@ -15,9 +15,8 @@ module Infoboxer
15
15
  # Internal, used by {Parser}
16
16
  def merge!(other)
17
17
  ochildren = other.children.dup
18
- if children.last && children.last.can_merge?(ochildren.first)
19
- children.last.merge!(ochildren.shift)
20
- end
18
+ children.last.merge!(ochildren.shift) \
19
+ if children.last&.can_merge?(ochildren.first)
21
20
  push_children(*ochildren)
22
21
  end
23
22
 
@@ -81,7 +80,7 @@ module Infoboxer
81
80
  # Represents ordered list (list with numbers).
82
81
  class OrderedList < List
83
82
  def make_marker(item)
84
- list_text_indent + "#{(item.index + 1)}. "
83
+ list_text_indent + "#{item.index + 1}. "
85
84
  end
86
85
  end
87
86
 
@@ -1,12 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  module Tree
3
5
  # Represents node of math formulae marked with TeX
4
6
  #
5
7
  # See also: https://en.wikipedia.org/wiki/Help:Displaying_a_formula
6
8
  class Math < Text
7
- def text
8
- "<math>#{super}</math>"
9
- end
10
9
  end
11
10
  end
12
11
  end
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  require 'htmlentities'
4
4
 
@@ -11,7 +11,7 @@ module Infoboxer
11
11
  # you will receive it from tree and use for navigations.
12
12
  #
13
13
  class Node
14
- def initialize(params = {})
14
+ def initialize(**params)
15
15
  @params = params
16
16
  end
17
17
 
@@ -154,7 +154,7 @@ module Infoboxer
154
154
  end
155
155
 
156
156
  def show_params(prms = nil)
157
- (prms || params).map { |k, v| "#{k}: #{v.inspect}" }.join(', ')
157
+ (prms || params).compact.map { |k, v| "#{k}: #{v.inspect}" }.join(', ')
158
158
  end
159
159
 
160
160
  def indent(level)
@@ -162,7 +162,7 @@ module Infoboxer
162
162
  end
163
163
 
164
164
  def _eq(_other)
165
- fail(NotImplementedError, "#_eq should be defined in subclasses (called for #{self.class})")
165
+ false
166
166
  end
167
167
 
168
168
  def decode(str)
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  module Tree
@@ -38,15 +38,32 @@ module Infoboxer
38
38
  # @!method compact
39
39
  # Just like Array#compact, but returns Nodes
40
40
 
41
+ # @!method grep(pattern)
42
+ # Just like Array#grep, but returns Nodes
43
+
44
+ # @!method grep_v(pattern)
45
+ # Just like Array#grep_v, but returns Nodes
46
+
41
47
  # @!method -(other)
42
48
  # Just like Array#-, but returns Nodes
43
49
 
44
- %i[select reject sort_by flatten compact -].each do |sym|
50
+ # @!method +(other)
51
+ # Just like Array#+, but returns Nodes
52
+
53
+ # NB: Since Ruby 3.0, we need to redefine all Enumerable methods (otherwise they return Array).
54
+ # TODO: Check those lacking overrides!
55
+
56
+ %i[
57
+ select reject sort_by flatten compact grep grep_v - +
58
+ take_while drop_while
59
+ ].each do |sym|
45
60
  define_method(sym) do |*args, &block|
46
61
  Nodes[*super(*args, &block)]
47
62
  end
48
63
  end
49
64
 
65
+ alias_method :filter, :select
66
+
50
67
  # Just like Array#first, but returns Nodes, if provided with `n` of elements.
51
68
  def first(n = nil)
52
69
  if n.nil?
@@ -75,6 +92,21 @@ module Infoboxer
75
92
  end
76
93
  end
77
94
 
95
+ # Just like Array#flat_map, but returns Nodes, **if** all map results are Node
96
+ def flat_map
97
+ res = super
98
+ if res.all? { |n| n.is_a?(Node) || n.is_a?(Nodes) }
99
+ Nodes[*res]
100
+ else
101
+ res
102
+ end
103
+ end
104
+
105
+ # Just like Array#group, but returns hash with `{<grouping variable> => Nodes}`
106
+ def group_by
107
+ super.transform_values { |group| Nodes[*group] }
108
+ end
109
+
78
110
  # @!method prev_siblings
79
111
  # Previous siblings (flat list) of all nodes inside.
80
112
 
@@ -129,6 +161,12 @@ module Infoboxer
129
161
  map(&:text).join
130
162
  end
131
163
 
164
+ alias_method :to_s, :text
165
+
166
+ def unwrap
167
+ map { |n| n.respond_to?(:unwrap) ? n.unwrap : n }
168
+ end
169
+
132
170
  # Fetches pages by ALL wikilinks inside in ONE query to MediaWiki
133
171
  # API.
134
172
  #
@@ -139,23 +177,27 @@ module Infoboxer
139
177
  # @return [Nodes<MediaWiki::Page>] It is still `Nodes`, so you
140
178
  # still can process them uniformely.
141
179
  def follow
142
- links = select { |n| n.respond_to?(:link) }.map(&:link)
180
+ links = grep(Linkable)
143
181
  return Nodes[] if links.empty?
182
+
144
183
  page = first.lookup_parents(MediaWiki::Page).first or
145
184
  fail('Not in a page from real source')
146
185
  page.client or fail('MediaWiki client not set')
147
- page.client.get(*links)
186
+ pages = links.group_by(&:interwiki)
187
+ .flat_map { |iw, ls| page.client.get(*ls.map(&:link), interwiki: iw) }
188
+ pages.count == 1 ? pages.first : Nodes[*pages]
148
189
  end
149
190
 
150
191
  # @private
151
192
  # Internal, used by {Parser}
152
- def <<(node)
193
+ def <<(node) # rubocop:disable Metrics/PerceivedComplexity
153
194
  if node.is_a?(Array)
154
195
  node.each { |n| self << n }
155
- elsif last && last.can_merge?(node)
196
+ elsif last&.can_merge?(node)
156
197
  last.merge!(node)
157
198
  else
158
199
  return if !node || node.empty?
200
+
159
201
  node = Text.new(node) if node.is_a?(String)
160
202
  super
161
203
  end
@@ -173,7 +215,9 @@ module Infoboxer
173
215
  # @private
174
216
  # Internal, used by {Parser}
175
217
  def flow_templates
176
- make_nodes(map { |n| n.is_a?(Paragraph) ? n.to_templates? : n })
218
+ # TODO: will it be better?..
219
+ # make_nodes(map { |n| n.is_a?(Paragraph) ? n.to_templates? : n })
220
+ self
177
221
  end
178
222
 
179
223
  private
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  module Tree
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  module Tree
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  require 'terminal-table'
4
4
 
@@ -26,13 +26,13 @@ module Infoboxer
26
26
  #
27
27
  # FIXME: it can easily be several table heading rows
28
28
  def heading_row
29
- rows.first if rows.first && rows.first.children.all? { |c| c.is_a?(TableHeading) }
29
+ rows.first if rows.first&.children&.all? { |c| c.is_a?(TableHeading) }
30
30
  end
31
31
 
32
32
  # For now, returns all table rows except {#heading_row}
33
33
  def body_rows
34
- if rows.first && rows.first.children.all? { |c| c.is_a?(TableHeading) }
35
- rows[1..-1]
34
+ if rows.first&.children&.all? { |c| c.is_a?(TableHeading) }
35
+ rows[1..]
36
36
  else
37
37
  rows
38
38
  end
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  require_relative 'linkable'
4
4
 
@@ -22,6 +22,10 @@ module Infoboxer
22
22
  false
23
23
  end
24
24
 
25
+ def named?
26
+ name !~ /^\d+$/
27
+ end
28
+
25
29
  protected
26
30
 
27
31
  def descr
@@ -110,12 +114,17 @@ module Infoboxer
110
114
  alias_method :variables, :children
111
115
 
112
116
  def initialize(name, variables = Nodes[])
113
- super(variables, extract_params(variables))
117
+ super(variables, **extract_params(variables))
114
118
  @name = name
115
119
  end
116
120
 
117
121
  def text
118
- ''
122
+ res = unnamed_variables.map(&:text).join('|')
123
+ res.empty? ? '' : "{#{name}:#{res}}"
124
+ end
125
+
126
+ def unwrap
127
+ unnamed_variables.flat_map(&:children).unwrap
119
128
  end
120
129
 
121
130
  # See {Node#to_tree}
@@ -139,7 +148,11 @@ module Infoboxer
139
148
  #
140
149
  # @return [Nodes<Var>]
141
150
  def unnamed_variables
142
- variables.find(name: /^\d+$/)
151
+ variables.reject(&:named?)
152
+ end
153
+
154
+ def named_variables
155
+ variables.select(&:named?)
143
156
  end
144
157
 
145
158
  # Fetches template variable(s) by name(s) or patterns.
@@ -242,7 +255,7 @@ module Infoboxer
242
255
  def extract_params(vars)
243
256
  vars
244
257
  .select { |v| v.children.count == 1 && v.children.first.is_a?(Text) }
245
- .map { |v| [v.name, v.children.first.raw_text] }.to_h
258
+ .map { |v| [v.name.to_sym, v.children.first.raw_text] }.to_h
246
259
  end
247
260
 
248
261
  def inspect_variables(depth)
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  module Tree
@@ -15,9 +15,9 @@ module Infoboxer
15
15
  # Text fragment without decodint of HTML entities.
16
16
  attr_accessor :raw_text
17
17
 
18
- def initialize(text, params = {})
19
- super(params)
20
- @raw_text = text
18
+ def initialize(text, **params)
19
+ super(**params)
20
+ @raw_text = +text
21
21
  end
22
22
 
23
23
  # See {Node#text}
@@ -39,13 +39,13 @@ module Infoboxer
39
39
  # @private
40
40
  # Internal, used by {Parser}
41
41
  def merge!(other)
42
- if other.is_a?(String)
43
- @raw_text << other
44
- elsif other.is_a?(Text)
45
- @raw_text << other.raw_text
46
- else
47
- fail("Not mergeable into text: #{other.inspect}")
48
- end
42
+ @raw_text <<
43
+ case other
44
+ when String then other
45
+ when Text then other.raw_text
46
+ else
47
+ fail("Not mergeable into text: #{other.inspect}")
48
+ end
49
49
  end
50
50
 
51
51
  # @private
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  require_relative 'linkable'
4
4
 
@@ -12,14 +12,23 @@ module Infoboxer
12
12
  # Note, that Wikilink is {Linkable}, so you can {Linkable#follow #follow}
13
13
  # it to obtain linked pages.
14
14
  class Wikilink < Link
15
- def initialize(*)
16
- super
17
- parse_link!
15
+ def initialize(link, label = nil, namespace: nil, interwiki: nil)
16
+ super(link, label, namespace: namespace, interwiki: interwiki)
17
+ @namespace = namespace || ''
18
+ @interwiki = interwiki
19
+ parse_name!
18
20
  end
19
21
 
20
22
  # "Clean" wikilink name, for ex., `Cities` for `[Category:Cities]`
21
23
  attr_reader :name
22
24
 
25
+ # Interwiki identifier. For example, `[[wikt:Argentina]]`
26
+ # will have `"Argentina"` as its {#name} and `"wikt"` (wiktionary) as an
27
+ # interwiki. TODO: how to use it.
28
+ #
29
+ # See [Wikipedia docs](https://en.wikipedia.org/wiki/Help:Interwiki_linking) for details.
30
+ attr_reader :interwiki
31
+
23
32
  # Wikilink namespace, `Category` for `[Category:Cities]`, empty
24
33
  # string (not `nil`!) for just `[Cities]`
25
34
  attr_reader :namespace
@@ -46,10 +55,8 @@ module Infoboxer
46
55
 
47
56
  private
48
57
 
49
- def parse_link!
50
- @name, @namespace = link.split(':', 2).reverse
51
- @namespace ||= ''
52
-
58
+ def parse_name!
59
+ @name = namespace.empty? ? link : link.sub(/^#{namespace}:/, '')
53
60
  @name, @anchor = @name.split('#', 2)
54
61
  @anchor ||= ''
55
62
 
@@ -68,6 +75,7 @@ module Infoboxer
68
75
 
69
76
  return unless children.count == 1 &&
70
77
  children.first.is_a?(Text) && children.first.raw_text.empty?
78
+
71
79
  children.first.raw_text = @topic
72
80
  end
73
81
  end
@@ -1,8 +1,9 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  MAJOR = 0
5
- MINOR = 3
5
+ MINOR = 4
6
6
  PATCH = 0
7
- VERSION = [MAJOR, MINOR, PATCH].join('.')
7
+ PRE = nil
8
+ VERSION = [MAJOR, MINOR, PATCH, PRE].compact.join('.')
8
9
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Infoboxer
2
4
  # @private
3
5
  class WikiPath
@@ -36,7 +38,7 @@ module Infoboxer
36
38
  attrs[attr.to_sym] = process_value(value)
37
39
  end
38
40
  res = op == '//' ? {op: :lookup} : {}
39
- res[:type] = type.gsub(/(?:^|_)([a-z])/, &:upcase).tr('_', '').to_sym unless type.empty?
41
+ res[:type] = process_type(type) unless type.empty?
40
42
  res.merge(attrs) # TODO: raise if empty selector
41
43
  end
42
44
 
@@ -51,6 +53,15 @@ module Infoboxer
51
53
  end
52
54
  end
53
55
 
56
+ def process_type(type)
57
+ type.gsub(/(?:^|_)([a-z])/, &:upcase).tr('_', '').to_sym
58
+ .tap { |t| valid_type?(t) or fail(ParseError, "Unrecognized node type: #{type}") }
59
+ end
60
+
61
+ def valid_type?(t)
62
+ t == :Section || Infoboxer::Tree.const_defined?(t)
63
+ end
64
+
54
65
  def unexpected(scanner, expected)
55
66
  place = scanner.eos? ? 'end of pattern' : scanner.rest.inspect
56
67
  fail ParseError, "Unexpected #{place}, expecting #{expected}"