infoboxer 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/ci.yml +32 -0
  3. data/.rubocop_todo.yml +0 -15
  4. data/CHANGELOG.md +43 -0
  5. data/Gemfile.lock +172 -0
  6. data/README.md +1 -1
  7. data/infoboxer.gemspec +1 -1
  8. data/lib/infoboxer.rb +23 -11
  9. data/lib/infoboxer/core_ext.rb +1 -1
  10. data/lib/infoboxer/definitions/en.wikipedia.org.rb +3 -1
  11. data/lib/infoboxer/media_wiki.rb +83 -65
  12. data/lib/infoboxer/media_wiki/page.rb +10 -1
  13. data/lib/infoboxer/media_wiki/traits.rb +69 -22
  14. data/lib/infoboxer/navigation.rb +7 -1
  15. data/lib/infoboxer/navigation/lookup.rb +15 -7
  16. data/lib/infoboxer/navigation/sections.rb +27 -9
  17. data/lib/infoboxer/navigation/selector.rb +14 -6
  18. data/lib/infoboxer/navigation/shortcuts.rb +1 -1
  19. data/lib/infoboxer/navigation/wikipath.rb +1 -1
  20. data/lib/infoboxer/parser.rb +2 -2
  21. data/lib/infoboxer/parser/context.rb +23 -9
  22. data/lib/infoboxer/parser/html.rb +1 -1
  23. data/lib/infoboxer/parser/image.rb +2 -2
  24. data/lib/infoboxer/parser/inline.rb +50 -7
  25. data/lib/infoboxer/parser/paragraphs.rb +3 -3
  26. data/lib/infoboxer/parser/table.rb +33 -17
  27. data/lib/infoboxer/parser/template.rb +5 -4
  28. data/lib/infoboxer/parser/util.rb +2 -1
  29. data/lib/infoboxer/templates.rb +2 -0
  30. data/lib/infoboxer/templates/base.rb +2 -0
  31. data/lib/infoboxer/templates/set.rb +1 -1
  32. data/lib/infoboxer/tree.rb +2 -2
  33. data/lib/infoboxer/tree/compound.rb +3 -3
  34. data/lib/infoboxer/tree/document.rb +1 -1
  35. data/lib/infoboxer/tree/gallery.rb +12 -0
  36. data/lib/infoboxer/tree/html.rb +3 -3
  37. data/lib/infoboxer/tree/image.rb +4 -4
  38. data/lib/infoboxer/tree/inline.rb +3 -3
  39. data/lib/infoboxer/tree/linkable.rb +6 -1
  40. data/lib/infoboxer/tree/list.rb +4 -5
  41. data/lib/infoboxer/tree/math.rb +2 -3
  42. data/lib/infoboxer/tree/node.rb +4 -4
  43. data/lib/infoboxer/tree/nodes.rb +51 -7
  44. data/lib/infoboxer/tree/paragraphs.rb +1 -1
  45. data/lib/infoboxer/tree/ref.rb +1 -1
  46. data/lib/infoboxer/tree/table.rb +4 -4
  47. data/lib/infoboxer/tree/template.rb +18 -5
  48. data/lib/infoboxer/tree/text.rb +11 -11
  49. data/lib/infoboxer/tree/wikilink.rb +16 -8
  50. data/lib/infoboxer/version.rb +4 -3
  51. data/lib/infoboxer/wiki_path.rb +12 -1
  52. data/regression/pages/2012_bdo_world_darts_championship.wiki +941 -0
  53. data/regression/pages/progress_wrestling.wiki +1308 -0
  54. metadata +12 -8
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  class MediaWiki
@@ -35,6 +35,15 @@ module Infoboxer
35
35
  client.traits
36
36
  end
37
37
 
38
+ # FIXME: take from siteinfo!
39
+ def namespace
40
+ Traits::STANDARD_NAMESPACES[source.fetch('ns') + 2] # Media = -2, Specia = -1, Main = 0
41
+ end
42
+
43
+ def category?
44
+ namespace == 'Category'
45
+ end
46
+
38
47
  private
39
48
 
40
49
  PARAMS_TO_INSPECT = %i[url title].freeze
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  class MediaWiki
@@ -34,9 +34,8 @@ module Infoboxer
34
34
  end
35
35
 
36
36
  # @private
37
- def get(domain, options = {})
38
- cls = Traits.domains[domain]
39
- cls ? cls.new(options) : Traits.new(options)
37
+ def get(domain, site_info = {})
38
+ (Traits.domains[domain] || Traits).new(site_info)
40
39
  end
41
40
 
42
41
  # @private
@@ -60,7 +59,7 @@ module Infoboxer
60
59
  # [English Wikipedia traits](https://github.com/molybdenum-99/infoboxer/blob/master/lib/infoboxer/definitions/en.wikipedia.org.rb)
61
60
  # for example implementation.
62
61
  def for(domain, &block)
63
- Traits.domains[domain].tap { |c| c && c.instance_eval(&block) } ||
62
+ Traits.domains[domain]&.instance_eval(&block) ||
64
63
  Class.new(self, &block).domain(domain)
65
64
  end
66
65
 
@@ -68,18 +67,27 @@ module Infoboxer
68
67
  alias_method :default, :new
69
68
  end
70
69
 
71
- def initialize(options = {})
72
- @options = options
73
- @file_namespace =
74
- [DEFAULTS[:file_namespace], namespace_aliases(options, 'File')]
75
- .flatten.compact.uniq
76
- @category_namespace =
77
- [DEFAULTS[:category_namespace], namespace_aliases(options, 'Category')]
78
- .flatten.compact.uniq
70
+ def initialize(site_info = {})
71
+ @site_info = site_info
72
+ end
73
+
74
+ def namespace?(prefix)
75
+ known_namespaces.include?(prefix)
76
+ end
77
+
78
+ def interwiki?(prefix)
79
+ known_interwikis.key?(prefix)
79
80
  end
80
81
 
81
82
  # @private
82
- attr_reader :file_namespace, :category_namespace
83
+ def file_namespace
84
+ @file_namespace ||= ns_aliases('File')
85
+ end
86
+
87
+ # @private
88
+ def category_namespace
89
+ @category_namespace ||= ns_aliases('Category')
90
+ end
83
91
 
84
92
  # @private
85
93
  def templates
@@ -88,16 +96,55 @@ module Infoboxer
88
96
 
89
97
  private
90
98
 
91
- def namespace_aliases(options, canonical)
92
- namespace = (options[:namespaces] || []).detect { |v| v['canonical'] == canonical }
93
- return nil unless namespace
94
- [namespace['*'], *namespace['aliases']]
99
+ def known_namespaces
100
+ @known_namespaces ||=
101
+ if @site_info.empty?
102
+ STANDARD_NAMESPACES
103
+ else
104
+ (@site_info['namespaces'].values + @site_info['namespacealiases']).map { |n| n['*'] }
105
+ end
106
+ end
107
+
108
+ def known_interwikis
109
+ @known_interwikis ||=
110
+ if @site_info.empty?
111
+ {}
112
+ else
113
+ @site_info['interwikimap'].map { |iw| [iw['prefix'], iw] }.to_h
114
+ end
115
+ end
116
+
117
+ def ns_aliases(base)
118
+ return [base] if @site_info.empty?
119
+
120
+ main = @site_info['namespaces'].values.detect { |n| n['canonical'] == base }
121
+ [base, main['*']] +
122
+ @site_info['namespacealiases']
123
+ .select { |a| a['id'] == main['id'] }.flat_map { |n| n['*'] }
124
+ .compact.uniq
95
125
  end
96
126
 
97
- DEFAULTS = {
98
- file_namespace: 'File',
99
- category_namespace: 'Category'
100
- }.freeze
127
+ # See https://www.mediawiki.org/wiki/Help:Namespaces#Standard_namespaces
128
+ STANDARD_NAMESPACES = [
129
+ 'Media', # Direct linking to media files.
130
+ 'Special', # Special (non-editable) pages.
131
+ '', # (Main)
132
+ 'Talk', # Article discussion.
133
+ 'User', #
134
+ 'User talk', #
135
+ 'Project', # Meta-discussions related to the operation and development of the wiki.
136
+ 'Project talk', #
137
+ 'File', # Metadata for images, videos, sound files and other media.
138
+ 'File talk', #
139
+ 'MediaWiki', # System messages and other important content.
140
+ 'MediaWiki talk', #
141
+ 'Template', # Templates: blocks of text or wikicode that are intended to be transcluded.
142
+ 'Template talk', #
143
+ 'Help', # Help files, instructions and "how-to" guides.
144
+ 'Help talk', #
145
+ 'Category', # Categories: dynamic lists of other pages.
146
+ 'Category talk', #
147
+ ].freeze
101
148
  end
102
149
  end
103
150
  end
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  # Navigation is one of the things Infoboxer is proud about. It tries
@@ -96,5 +96,11 @@ module Infoboxer
96
96
  class Tree::Document
97
97
  include Navigation::Sections::Container
98
98
  end
99
+
100
+ module Helpers
101
+ def W(*arg, &block) # rubocop:disable Naming/MethodName
102
+ Lookup::Selector.new(*arg, &block)
103
+ end
104
+ end
99
105
  end
100
106
  end
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  require_relative 'selector'
4
4
 
@@ -98,9 +98,13 @@ module Infoboxer
98
98
  # Selects matching nodes from current node's siblings, which
99
99
  # are above current node in parents children list.
100
100
 
101
+ # @!method lookup_prev_sibling(*selectors, &block)
102
+ # Selects first matching nodes from current node's siblings, which
103
+ # are above current node in parents children list.
104
+
101
105
  # Underscored version of {#matches?}
102
106
  def _matches?(selector)
103
- selector.matches?(self)
107
+ selector === self
104
108
  end
105
109
 
106
110
  # Underscored version of {#lookup}
@@ -136,6 +140,11 @@ module Infoboxer
136
140
  prev_siblings._find(selector)
137
141
  end
138
142
 
143
+ # Underscored version of {#lookup_prev_sibling}
144
+ def _lookup_prev_sibling(selector)
145
+ prev_siblings.reverse.detect { |n| selector === n }
146
+ end
147
+
139
148
  # Underscored version of {#lookup_next_siblings}
140
149
  def _lookup_next_siblings(selector)
141
150
  next_siblings._find(selector)
@@ -146,14 +155,14 @@ module Infoboxer
146
155
  lookup lookup_children lookup_parents
147
156
  lookup_siblings
148
157
  lookup_next_siblings lookup_prev_siblings
158
+ lookup_prev_sibling
149
159
  ]
150
160
  .map { |sym| [sym, :"_#{sym}"] }
151
161
  .each do |sym, underscored|
152
-
153
- define_method(sym) do |*args, &block|
154
- send(underscored, Selector.new(*args, &block))
162
+ define_method(sym) do |*args, &block|
163
+ send(underscored, Selector.new(*args, &block))
164
+ end
155
165
  end
156
- end
157
166
 
158
167
  # Checks if node has any parent matching selectors.
159
168
  def parent?(*selectors, &block)
@@ -209,7 +218,6 @@ module Infoboxer
209
218
  lookup_siblings
210
219
  lookup_next_siblings lookup_prev_siblings
211
220
  ].map { |sym| [sym, :"_#{sym}"] }.each do |sym, underscored|
212
-
213
221
  define_method(sym) do |*args, &block|
214
222
  send(underscored, Selector.new(*args, &block))
215
223
  end
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  module Navigation
@@ -77,10 +77,23 @@ module Infoboxer
77
77
  when 1
78
78
  @sections.select { |s| names.first === s.heading.text_ }
79
79
  else
80
- @sections.select { |s| names.first === s.heading.text_ }.sections(*names[1..-1])
80
+ @sections.select { |s| names.first === s.heading.text_ }.sections(*names[1..])
81
81
  end
82
82
  end
83
83
 
84
+ def subsections(*names)
85
+ sections = names.map { |name|
86
+ heading = lookup_children(:Heading, text_: name).first
87
+ next unless heading
88
+
89
+ body = heading.next_siblings
90
+ .take_while { |n| !n.is_a?(Tree::Heading) || n.level > heading.level }
91
+
92
+ Section.new(heading, body)
93
+ }.compact
94
+ Tree::Nodes.new(sections)
95
+ end
96
+
84
97
  def lookup_children(*arg)
85
98
  if arg.include?(:Section)
86
99
  sections.find(*(arg - [:Section]))
@@ -94,6 +107,7 @@ module Infoboxer
94
107
  def make_sections
95
108
  res = Tree::Nodes[]
96
109
  return res if headings.empty?
110
+
97
111
  level = headings.first.level
98
112
 
99
113
  children
@@ -123,21 +137,25 @@ module Infoboxer
123
137
  #
124
138
  # @return {Tree::Nodes<Section>}
125
139
  def in_sections
126
- main_node = parent.is_a?(Tree::Document) ? self : lookup_parents[-2]
140
+ return parent.in_sections unless parent.is_a?(Tree::Document)
141
+ return @in_sections if @in_sections
127
142
 
128
143
  heading =
129
- if main_node.is_a?(Tree::Heading)
130
- main_node.lookup_prev_siblings(Tree::Heading, level: main_node.level - 1).last
144
+ if is_a?(Tree::Heading)
145
+ lookup_prev_sibling(Tree::Heading, level: level - 1)
131
146
  else
132
- main_node.lookup_prev_siblings(Tree::Heading).last
147
+ lookup_prev_sibling(Tree::Heading)
133
148
  end
134
- return Tree::Nodes[] unless heading
149
+ unless heading
150
+ @in_sections = Tree::Nodes[]
151
+ return @in_sections
152
+ end
135
153
 
136
154
  body = heading.next_siblings
137
- .take_while { |n| !n.is_a?(Tree::Heading) || n.level < heading.level }
155
+ .take_while { |n| !n.is_a?(Tree::Heading) || n.level > heading.level }
138
156
 
139
157
  section = Section.new(heading, body)
140
- Tree::Nodes[section, *heading.in_sections]
158
+ @in_sections = Tree::Nodes[section, *heading.in_sections]
141
159
  end
142
160
  end
143
161
 
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  module Navigation
@@ -10,7 +10,7 @@ module Infoboxer
10
10
  def initialize(*arg, &block)
11
11
  @arg = [arg, block].flatten.compact.map(&method(:sym_to_class))
12
12
  @arg.each do |a|
13
- a.reject! { |_k, v| v.nil? } if a.is_a?(Hash)
13
+ a.compact! if a.is_a?(Hash)
14
14
  end
15
15
  end
16
16
 
@@ -24,8 +24,8 @@ module Infoboxer
24
24
  "#<Selector(#{@arg.map(&:to_s).join(', ')})>"
25
25
  end
26
26
 
27
- def matches?(node)
28
- @arg.all? { |a| arg_matches?(a, node) }
27
+ def ===(other)
28
+ @arg.all? { |a| arg_matches?(a, other) }
29
29
  end
30
30
 
31
31
  private
@@ -44,8 +44,8 @@ module Infoboxer
44
44
  check.call(node)
45
45
  when Hash
46
46
  check.all? { |attr, value|
47
- node.respond_to?(attr) && value === node.send(attr) ||
48
- node.params.key?(attr) && value === node.params[attr]
47
+ node.respond_to?(attr) && value_matches?(value, node.send(attr)) ||
48
+ node.params.key?(attr) && value_matches?(value, node.params[attr])
49
49
  }
50
50
  when Symbol
51
51
  node.respond_to?(check) && node.send(check)
@@ -53,6 +53,14 @@ module Infoboxer
53
53
  check === node
54
54
  end
55
55
  end
56
+
57
+ def value_matches?(matcher, value)
58
+ if matcher.is_a?(String) && value.is_a?(String)
59
+ matcher.casecmp(value).zero?
60
+ else
61
+ matcher === value
62
+ end
63
+ end
56
64
  end
57
65
  end
58
66
  end
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Infoboxer
4
4
  module Navigation
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  require_relative '../wiki_path'
4
4
 
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  require 'ostruct'
4
4
  require 'logger'
@@ -54,7 +54,7 @@ module Infoboxer
54
54
  def initialize(context)
55
55
  @context = context
56
56
  @re = OpenStruct.new(make_regexps)
57
- @logger = Logger.new(STDOUT).tap { |l| l.level = Logger::FATAL }
57
+ @logger = Logger.new($stdout).tap { |l| l.level = Logger::FATAL }
58
58
  end
59
59
 
60
60
  require_relative 'parser/inline'
@@ -1,16 +1,16 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  require 'strscan'
4
4
 
5
5
  module Infoboxer
6
6
  class Parser
7
- class Context
7
+ class Context # rubocop:disable Metrics/ClassLength
8
8
  attr_reader :lineno
9
9
  attr_reader :traits
10
10
 
11
11
  def initialize(text, traits = nil)
12
12
  @lines = text
13
- .gsub(/<!--.+?-->/m, '') # FIXME: will also kill comments inside <nowiki> tag
13
+ .gsub(/<!--.*?-->/m, '') # FIXME: will also kill comments inside <nowiki> tag
14
14
  .split(/[\r\n]/)
15
15
  @lineno = -1
16
16
  @traits = traits || MediaWiki::Traits.default
@@ -21,22 +21,23 @@ module Infoboxer
21
21
  attr_reader :next_lines
22
22
 
23
23
  def colno
24
- @scanner && @scanner.pos || 0
24
+ @scanner&.pos || 0
25
25
  end
26
26
 
27
27
  def matched
28
- @matched ||= @scanner && @scanner.matched
28
+ @matched ||= @scanner&.matched
29
29
  end
30
30
 
31
31
  # check which works only once
32
32
  def eat_matched?(str)
33
33
  return false unless matched == str
34
+
34
35
  @matched = 'DUMMY'
35
36
  true
36
37
  end
37
38
 
38
39
  def rest
39
- @rest ||= @scanner && @scanner.rest
40
+ @rest ||= @scanner&.rest
40
41
  end
41
42
 
42
43
  alias_method :current, :rest
@@ -109,7 +110,7 @@ module Infoboxer
109
110
  end
110
111
 
111
112
  def scan_continued_until(re, leave_pattern = false)
112
- res = ''
113
+ res = +''
113
114
 
114
115
  loop do
115
116
  chunk = _scan_until(re)
@@ -130,7 +131,13 @@ module Infoboxer
130
131
 
131
132
  # state inspection
132
133
  def matched_inline?(re)
133
- re.nil? ? (matched.empty? && eol?) : matched =~ re
134
+ if re.nil?
135
+ matched.empty? && eol?
136
+ elsif re.inspect.start_with?('/^') # was it REALLY at the beginning of the line?..
137
+ @scanner.pos == matched.length && matched =~ re
138
+ else
139
+ matched =~ re
140
+ end
134
141
  end
135
142
 
136
143
  def matched?(re)
@@ -146,6 +153,13 @@ module Infoboxer
146
153
  fail(ParsingError, "#{text} at line #{@lineno}:\n\t#{current}")
147
154
  end
148
155
 
156
+ def unscan_matched!
157
+ return unless @matched
158
+
159
+ @scanner.pos -= @matched.size
160
+ @rest = nil
161
+ end
162
+
149
163
  private
150
164
 
151
165
  # we do hard use of #matched and #rest, its wiser to memoize them
@@ -163,7 +177,7 @@ module Infoboxer
163
177
  def shift(amount)
164
178
  @lineno += amount
165
179
  current = @lines[lineno]
166
- @next_lines = @lines[(lineno + 1)..-1]
180
+ @next_lines = @lines[(lineno + 1)..]
167
181
  if current
168
182
  @scanner.string = current
169
183
  @rest = current