infoboxer 0.2.8 → 0.3.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ef407b6160d6d0ff8cfb874338320532d1b18906
4
- data.tar.gz: fdc4ccf4b051c50958e6cc9720619ea5110b5ea5
3
+ metadata.gz: 67b07bcee67c15aeb316e2eccf75e85a26cb43ae
4
+ data.tar.gz: 91f3253151ff816a83b4ffb8f7937c878a5a3ed8
5
5
  SHA512:
6
- metadata.gz: d5224104b02115c47f27c69926128023da3765d1d7089241190a6b3f5c8c3de79be91f65276c82536c671424d10000e2f5198413b3aa21e9802ccc9ca41fbdef
7
- data.tar.gz: 0f1f8d9ee3a4e94d6529208a9678bcb0915a884525433d2397249afada141d96c88c74e0546ddd0576adffcc2785f96ad1da57d61284b84d96661fb49c0fd46d
6
+ metadata.gz: f7dd9aa6545853dbf827c1b0b7857b2b5654caf33dcbfcaf4c2194709379f7e73d0978776675fd6265cbf83d88a814332626945db1b45c9562e54a01fa486a55
7
+ data.tar.gz: fa70f671da0d9acd7a0a131d8119a775c1ff762d7d2b640096f448678b7547477fd0fe93df74c438cd846fa550e33e530999c811bb4b0d4afe347bf058c3321b
data/.yardopts CHANGED
@@ -1,2 +1,3 @@
1
1
  --markup=markdown
2
+ --markup-provider=redcarpet
2
3
  --no-private
@@ -1,5 +1,23 @@
1
1
  # Infoboxer's change log
2
2
 
3
+ ## 0.3.1.pre (2017-09-16)
4
+
5
+ * Introduce interwiki links following (and proper handling of interwikis, in general);
6
+ * Add `<gallery>` tag support;
7
+ * Introduce `Navigation::Selector#===`;
8
+ * Much more `Enumerable`'s methods supported by `Nodes`;
9
+ * Lot of small simplifications, cleanups and bugfixes.
10
+
11
+ TBH, it should be 0.4.0 or more, but it would be a shame to change versions so fast :) So, at least
12
+ until it is `-pre`, let it be 0.3.1.
13
+
14
+ ## 0.3.0 (2017-07-23)
15
+
16
+ * Change logic of navigation through templates; now templates contents aren't hidden from global
17
+ lookups. While sometimes leading to less impressive demos, this approach proved itself to be more
18
+ useful for production.
19
+ * Introduce WikiPath query language as an alternative to series of lookups.
20
+
3
21
  ## 0.2.8 (2017-05-11)
4
22
 
5
23
  * Switch to MediaWiktory 0.1.0 + some subsequent cleanup of internal logic;
@@ -32,7 +32,6 @@ Gem::Specification.new do |s|
32
32
  s.executables << 'infoboxer'
33
33
 
34
34
  s.add_dependency 'htmlentities'
35
- s.add_dependency 'procme'
36
35
  s.add_dependency 'mediawiktory', '>= 0.1.0'
37
36
  s.add_dependency 'addressable'
38
37
  s.add_dependency 'terminal-table'
@@ -1,7 +1,5 @@
1
1
  # encoding: utf-8
2
2
 
3
- require 'procme'
4
-
5
3
  # Main client module for entire infoboxer functionality. If you're lucky,
6
4
  # there's no other classes/modules you need to instantiate or call
7
5
  # directly. You just do:
@@ -74,8 +72,8 @@ module Infoboxer
74
72
  end
75
73
 
76
74
  # Includeable version of {Infoboxer.wiki}
77
- def wiki(api_url, options = {})
78
- wikis[api_url] ||= MediaWiki.new(api_url, options || {})
75
+ def wiki(api_url, **options)
76
+ wikis[api_url] ||= MediaWiki.new(api_url, options)
79
77
  end
80
78
 
81
79
  class << self
@@ -170,7 +168,7 @@ module Infoboxer
170
168
  end
171
169
 
172
170
  WIKIMEDIA_PROJECTS.each do |name, domain|
173
- define_method name do |lang = 'en', options = {}|
171
+ define_method name do |lang = 'en', **options|
174
172
  lang, options = 'en', lang if lang.is_a?(Hash)
175
173
 
176
174
  wiki("https://#{lang}.#{domain}/w/api.php", options)
@@ -180,7 +178,7 @@ module Infoboxer
180
178
  alias_method :wp, :wikipedia
181
179
 
182
180
  WIKIMEDIA_COMMONS.each do |name, domain|
183
- define_method name do |options = {}|
181
+ define_method name do |**options|
184
182
  wiki("https://#{domain}/w/api.php", options)
185
183
  end
186
184
  end
@@ -47,15 +47,14 @@ module Infoboxer
47
47
  # for it, as well as shortcuts for some well-known wikis, like
48
48
  # {Infoboxer.wikipedia}.
49
49
  #
50
- # @param api_base_url URL of `api.php` file in your MediaWiki
50
+ # @param api_base_url [String] URL of `api.php` file in your MediaWiki
51
51
  # installation. Typically, its `<domain>/w/api.php`, but can vary
52
52
  # in different wikis.
53
- # @param options Only one option is currently supported:
54
- # * `:user_agent` (also aliased as `:ua`) -- custom User-Agent header.
55
- def initialize(api_base_url, options = {})
53
+ # @param user_agent [String] (also aliased as `:ua`) Custom User-Agent header.
54
+ def initialize(api_base_url, ua: nil, user_agent: ua)
56
55
  @api_base_url = Addressable::URI.parse(api_base_url)
57
- @client = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(options))
58
- @traits = Traits.get(@api_base_url.host, namespaces: extract_namespaces)
56
+ @client = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(user_agent))
57
+ @traits = Traits.get(@api_base_url.host, siteinfo)
59
58
  end
60
59
 
61
60
  # Receive "raw" data from Wikipedia (without parsing or wrapping in
@@ -123,7 +122,9 @@ module Infoboxer
123
122
  # and obtain meaningful results instead of `NoMethodError` or
124
123
  # `SomethingNotFound`.
125
124
  #
126
- def get(*titles, prop: [])
125
+ def get(*titles, prop: [], interwiki: nil)
126
+ return interwikis(interwiki).get(*titles, prop: prop) if interwiki
127
+
127
128
  pages = get_h(*titles, prop: prop).values.compact
128
129
  titles.count == 1 ? pages.first : Tree::Nodes[*pages]
129
130
  end
@@ -251,17 +252,26 @@ module Infoboxer
251
252
  [namespace, titl].join(':')
252
253
  end
253
254
 
254
- def user_agent(options)
255
- options[:user_agent] || options[:ua] || self.class.user_agent || UA
255
+ def user_agent(custom)
256
+ custom || self.class.user_agent || UA
257
+ end
258
+
259
+ def siteinfo
260
+ @siteinfo ||= @client.query.meta(:siteinfo).prop(:namespaces, :namespacealiases, :interwikimap).response.to_h
256
261
  end
257
262
 
258
- def extract_namespaces
259
- siteinfo = @client.query.meta(:siteinfo).prop(:namespaces, :namespacealiases).response
260
- siteinfo['namespaces'].map do |_, namespace|
261
- aliases =
262
- siteinfo['namespacealiases'].select { |a| a['id'] == namespace['id'] }.map { |a| a['*'] }
263
- namespace.merge('aliases' => aliases)
264
- end
263
+ def interwikis(prefix)
264
+ @interwikis ||= Hash.new { |h, pre|
265
+ interwiki = siteinfo['interwikimap'].detect { |iw| iw['prefix'] == prefix } or
266
+ fail ArgumentError, "Undefined interwiki: #{prefix}"
267
+
268
+ # FIXME: fragile, but what can we do?..
269
+ m = interwiki['url'].match(%r{^(.+)/wiki/\$1$}) or
270
+ fail ArgumentError, "Interwiki #{interwiki} seems not to be a MediaWiki instance"
271
+ h[pre] = self.class.new("#{m[1]}/w/api.php") # TODO: copy useragent
272
+ }
273
+
274
+ @interwikis[prefix]
265
275
  end
266
276
  end
267
277
  end
@@ -34,9 +34,8 @@ module Infoboxer
34
34
  end
35
35
 
36
36
  # @private
37
- def get(domain, options = {})
38
- cls = Traits.domains[domain]
39
- cls ? cls.new(options) : Traits.new(options)
37
+ def get(domain, site_info = {})
38
+ (Traits.domains[domain] || Traits).new(site_info)
40
39
  end
41
40
 
42
41
  # @private
@@ -68,18 +67,27 @@ module Infoboxer
68
67
  alias_method :default, :new
69
68
  end
70
69
 
71
- def initialize(options = {})
72
- @options = options
73
- @file_namespace =
74
- [DEFAULTS[:file_namespace], namespace_aliases(options, 'File')]
75
- .flatten.compact.uniq
76
- @category_namespace =
77
- [DEFAULTS[:category_namespace], namespace_aliases(options, 'Category')]
78
- .flatten.compact.uniq
70
+ def initialize(site_info = {})
71
+ @site_info = site_info
72
+ end
73
+
74
+ def namespace?(prefix)
75
+ known_namespaces.include?(prefix)
76
+ end
77
+
78
+ def interwiki?(prefix)
79
+ known_interwikis.key?(prefix)
80
+ end
81
+
82
+ # @private
83
+ def file_namespace
84
+ @file_namespace ||= ns_aliases('File')
79
85
  end
80
86
 
81
87
  # @private
82
- attr_reader :file_namespace, :category_namespace
88
+ def category_namespace
89
+ @category_namespace ||= ns_aliases('Category')
90
+ end
83
91
 
84
92
  # @private
85
93
  def templates
@@ -88,16 +96,54 @@ module Infoboxer
88
96
 
89
97
  private
90
98
 
91
- def namespace_aliases(options, canonical)
92
- namespace = (options[:namespaces] || []).detect { |v| v['canonical'] == canonical }
93
- return nil unless namespace
94
- [namespace['*'], *namespace['aliases']]
99
+ def known_namespaces
100
+ @known_namespaces ||=
101
+ if @site_info.empty?
102
+ STANDARD_NAMESPACES
103
+ else
104
+ (@site_info['namespaces'].values + @site_info['namespacealiases']).map { |n| n['*'] }
105
+ end
106
+ end
107
+
108
+ def known_interwikis
109
+ @known_interwikis ||=
110
+ if @site_info.empty?
111
+ {}
112
+ else
113
+ @site_info['interwikimap'].map { |iw| [iw['prefix'], iw] }.to_h
114
+ end
115
+ end
116
+
117
+ def ns_aliases(base)
118
+ return [base] if @site_info.empty?
119
+ main = @site_info['namespaces'].values.detect { |n| n['canonical'] == base }
120
+ [base, main['*']] +
121
+ @site_info['namespacealiases']
122
+ .select { |a| a['id'] == main['id'] }.flat_map { |n| n['*'] }
123
+ .compact.uniq
95
124
  end
96
125
 
97
- DEFAULTS = {
98
- file_namespace: 'File',
99
- category_namespace: 'Category'
100
- }.freeze
126
+ # See https://www.mediawiki.org/wiki/Help:Namespaces#Standard_namespaces
127
+ STANDARD_NAMESPACES = [
128
+ 'Media', # Direct linking to media files.
129
+ 'Special', # Special (non-editable) pages.
130
+ '', # (Main)
131
+ 'Talk', # Article discussion.
132
+ 'User', #
133
+ 'User talk', #
134
+ 'Project', # Meta-discussions related to the operation and development of the wiki.
135
+ 'Project talk', #
136
+ 'File', # Metadata for images, videos, sound files and other media.
137
+ 'File talk', #
138
+ 'MediaWiki', # System messages and other important content.
139
+ 'MediaWiki talk', #
140
+ 'Template', # Templates: blocks of text or wikicode that are intended to be transcluded.
141
+ 'Template talk', #
142
+ 'Help', # Help files, instructions and "how-to" guides.
143
+ 'Help talk', #
144
+ 'Category', # Categories: dynamic lists of other pages.
145
+ 'Category talk', #
146
+ ].freeze
101
147
  end
102
148
  end
103
149
  end
@@ -38,6 +38,19 @@ module Infoboxer
38
38
  #
39
39
  # Look into {Shortcuts::Node} documentation for list of shortcuts.
40
40
  #
41
+ # ## Wikipath
42
+ #
43
+ # WikiPath is XPath-alike query language you can use to navigate the tree:
44
+ #
45
+ # ```ruby
46
+ # document.wikipath('//paragraph//wikilink[namespace=Category]')
47
+ # ```
48
+ #
49
+ # It can look more or less verbose than pure-ruby navigation, but the big advantage of WikiPath
50
+ # is it is pure data: you can store some paths in YAML file, for example.
51
+ #
52
+ # Look at {Wikipath#wikipath #wikipath} method docs for full reference.
53
+ #
41
54
  # ## Logical structure navigation
42
55
  #
43
56
  # MediaWiki page structure is flat, like HTML's (there's just sequence
@@ -62,7 +75,7 @@ module Infoboxer
62
75
  # {Sections::Node} for upwards.
63
76
  #
64
77
  module Navigation
65
- %w[lookup shortcuts sections].each do |nav|
78
+ %w[lookup shortcuts sections wikipath].each do |nav|
66
79
  require_relative "navigation/#{nav}"
67
80
  end
68
81
 
@@ -70,12 +83,14 @@ module Infoboxer
70
83
  include Navigation::Lookup::Node
71
84
  include Navigation::Shortcuts::Node
72
85
  include Navigation::Sections::Node
86
+ include Navigation::Wikipath
73
87
  end
74
88
 
75
89
  class Tree::Nodes
76
90
  include Navigation::Lookup::Nodes
77
91
  include Navigation::Shortcuts::Nodes
78
92
  include Navigation::Sections::Nodes
93
+ include Navigation::Wikipath
79
94
  end
80
95
 
81
96
  class Tree::Document
@@ -98,9 +98,13 @@ module Infoboxer
98
98
  # Selects matching nodes from current node's siblings, which
99
99
  # are above current node in parents children list.
100
100
 
101
+ # @!method lookup_prev_sibling(*selectors, &block)
102
+ # Selects first matching nodes from current node's siblings, which
103
+ # are above current node in parents children list.
104
+
101
105
  # Underscored version of {#matches?}
102
106
  def _matches?(selector)
103
- selector.matches?(self)
107
+ selector === self
104
108
  end
105
109
 
106
110
  # Underscored version of {#lookup}
@@ -136,6 +140,11 @@ module Infoboxer
136
140
  prev_siblings._find(selector)
137
141
  end
138
142
 
143
+ # Underscored version of {#lookup_prev_sibling}
144
+ def _lookup_prev_sibling(selector)
145
+ prev_siblings.reverse.detect { |n| selector === n }
146
+ end
147
+
139
148
  # Underscored version of {#lookup_next_siblings}
140
149
  def _lookup_next_siblings(selector)
141
150
  next_siblings._find(selector)
@@ -146,6 +155,7 @@ module Infoboxer
146
155
  lookup lookup_children lookup_parents
147
156
  lookup_siblings
148
157
  lookup_next_siblings lookup_prev_siblings
158
+ lookup_prev_sibling
149
159
  ]
150
160
  .map { |sym| [sym, :"_#{sym}"] }
151
161
  .each do |sym, underscored|
@@ -81,6 +81,14 @@ module Infoboxer
81
81
  end
82
82
  end
83
83
 
84
+ def lookup_children(*arg)
85
+ if arg.include?(:Section)
86
+ sections.find(*(arg - [:Section]))
87
+ else
88
+ super
89
+ end
90
+ end
91
+
84
92
  private
85
93
 
86
94
  def make_sections
@@ -115,21 +123,25 @@ module Infoboxer
115
123
  #
116
124
  # @return {Tree::Nodes<Section>}
117
125
  def in_sections
118
- main_node = parent.is_a?(Tree::Document) ? self : lookup_parents[-2]
126
+ return parent.in_sections unless parent.is_a?(Tree::Document)
127
+ return @in_sections if @in_sections
119
128
 
120
129
  heading =
121
- if main_node.is_a?(Tree::Heading)
122
- main_node.lookup_prev_siblings(Tree::Heading, level: main_node.level - 1).last
130
+ if is_a?(Tree::Heading)
131
+ lookup_prev_sibling(Tree::Heading, level: level - 1)
123
132
  else
124
- main_node.lookup_prev_siblings(Tree::Heading).last
133
+ lookup_prev_sibling(Tree::Heading)
125
134
  end
126
- return Tree::Nodes[] unless heading
135
+ unless heading
136
+ @in_sections = Tree::Nodes[]
137
+ return @in_sections
138
+ end
127
139
 
128
140
  body = heading.next_siblings
129
141
  .take_while { |n| !n.is_a?(Tree::Heading) || n.level < heading.level }
130
142
 
131
143
  section = Section.new(heading, body)
132
- Tree::Nodes[section, *heading.in_sections]
144
+ @in_sections = Tree::Nodes[section, *heading.in_sections]
133
145
  end
134
146
  end
135
147
 
@@ -145,6 +157,14 @@ module Infoboxer
145
157
  make_nodes(map { |n| n.send(sym, *args) })
146
158
  end
147
159
  end
160
+
161
+ def lookup_children(*arg)
162
+ if arg.include?(:Section)
163
+ sections.find(*(arg - [:Section]))
164
+ else
165
+ super
166
+ end
167
+ end
148
168
  end
149
169
 
150
170
  # Virtual node, representing logical section of the document.
@@ -175,6 +195,10 @@ module Infoboxer
175
195
  false
176
196
  end
177
197
 
198
+ def inspect
199
+ "#<#{descr}: #{children.count} nodes>"
200
+ end
201
+
178
202
  include Container
179
203
  end
180
204
  end
@@ -7,8 +7,6 @@ module Infoboxer
7
7
  #
8
8
  # See {Lookup::Node Lookup::Node} for detailed explanation of available selectors.
9
9
  class Selector
10
- include ProcMe
11
-
12
10
  def initialize(*arg, &block)
13
11
  @arg = [arg, block].flatten.compact.map(&method(:sym_to_class))
14
12
  @arg.each do |a|
@@ -26,8 +24,8 @@ module Infoboxer
26
24
  "#<Selector(#{@arg.map(&:to_s).join(', ')})>"
27
25
  end
28
26
 
29
- def matches?(node)
30
- @arg.all? { |a| arg_matches?(a, node) }
27
+ def ===(other)
28
+ @arg.all? { |a| arg_matches?(a, other) }
31
29
  end
32
30
 
33
31
  private
@@ -45,13 +43,24 @@ module Infoboxer
45
43
  when Proc
46
44
  check.call(node)
47
45
  when Hash
48
- check.all? { |attr, value| node.respond_to?(attr) && value === node.send(attr) }
46
+ check.all? { |attr, value|
47
+ node.respond_to?(attr) && value_matches?(value, node.send(attr)) ||
48
+ node.params.key?(attr) && value_matches?(value, node.params[attr])
49
+ }
49
50
  when Symbol
50
51
  node.respond_to?(check) && node.send(check)
51
52
  else
52
53
  check === node
53
54
  end
54
55
  end
56
+
57
+ def value_matches?(matcher, value)
58
+ if matcher.is_a?(String) && value.is_a?(String)
59
+ matcher.casecmp(value).zero?
60
+ else
61
+ matcher === value
62
+ end
63
+ end
55
64
  end
56
65
  end
57
66
  end
@@ -0,0 +1,32 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative '../wiki_path'
4
+
5
+ module Infoboxer
6
+ module Navigation
7
+ module Wikipath
8
+ # Search nodes inside current by XPath alike query language.
9
+ #
10
+ # This feature is experimental, but should work for most of the useful cases.
11
+ #
12
+ # Examples of WikiPath:
13
+ #
14
+ # ```
15
+ # /paragraph # direct child of current node, being paragraph
16
+ # //paragraph # any node in current node's subtree, being paragraph
17
+ # //template[name=Infobox] # template node in subtree, with name attribute equal to Infobox
18
+ # //template[name="Infobox country"] # optional quotes are allowed
19
+ # //template[name=/^Infobox/] # regexes are supported
20
+ # //wikilink[italic] # node predicates are supported (the same as `lookup(:Wikilink, :italic?)`
21
+ # //*[italic] # type wildcards are supported
22
+ # //template[name=/^Infobox/]/var[name=birthday] # series of lookups work
23
+ # ```
24
+ #
25
+ # @param string [String] WikiPath to lookup
26
+ # @return [Nodes]
27
+ def wikipath(string)
28
+ Infoboxer::WikiPath.parse(string).call(self)
29
+ end
30
+ end
31
+ end
32
+ end
@@ -1,7 +1,6 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  require 'ostruct'
4
- require 'procme'
5
4
  require 'logger'
6
5
 
7
6
  module Infoboxer
@@ -83,7 +83,7 @@ module Infoboxer
83
83
 
84
84
  private
85
85
 
86
- def inline_formatting(match)
86
+ def inline_formatting(match) # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/AbcSize
87
87
  case match
88
88
  when "'''''"
89
89
  BoldItalic.new(short_inline(/'''''/))
@@ -109,6 +109,8 @@ module Infoboxer
109
109
  reference(Regexp.last_match(1))
110
110
  when /<math>/
111
111
  math
112
+ when /<gallery([^>]*)>/
113
+ gallery(Regexp.last_match(1))
112
114
  when '<'
113
115
  html || Text.new(match) # it was not HTML, just accidental <
114
116
  else
@@ -126,8 +128,18 @@ module Infoboxer
126
128
  caption = inline(/\]\]/)
127
129
  @context.pop_eol_sign
128
130
  end
131
+ name, namespace = link.split(':', 2).reverse
132
+ lnk, params =
133
+ if @context.traits.namespace?(namespace)
134
+ [link, {namespace: namespace}]
135
+ elsif @context.traits.interwiki?(namespace)
136
+ [name, {interwiki: namespace}]
137
+ else
138
+ [link, {}]
139
+ end
129
140
 
130
- Wikilink.new(link, caption)
141
+ puts @context.rest if lnk.nil?
142
+ Wikilink.new(lnk, caption, **params)
131
143
  end
132
144
 
133
145
  # http://en.wikipedia.org/wiki/Help:Link#External_links
@@ -159,6 +171,34 @@ module Infoboxer
159
171
  Text.new(@context.scan_continued_until(%r{</nowiki>}))
160
172
  end
161
173
  end
174
+
175
+ def gallery(tag_rest)
176
+ params = parse_params(tag_rest)
177
+ images = []
178
+ guarded_loop do
179
+ @context.next! if @context.eol?
180
+ path = @context.scan_until(%r{</gallery>|\||$})
181
+ attrs = @context.matched == '|' ? gallery_image_attrs : {}
182
+ unless path.empty?
183
+ images << Tree::Image.new(path.sub(/^#{re.file_namespace}/, ''), attrs)
184
+ end
185
+ break if @context.matched == '</gallery>'
186
+ end
187
+ Gallery.new(images, params)
188
+ end
189
+
190
+ def gallery_image_attrs
191
+ nodes = []
192
+
193
+ guarded_loop do
194
+ nodes << short_inline(%r{\||</gallery>})
195
+ break if @context.eol? || @context.matched?(%r{</gallery>})
196
+ end
197
+
198
+ nodes.map(&method(:image_attr))
199
+ .inject(&:merge)
200
+ .reject { |_k, v| v.nil? || v.empty? }
201
+ end
162
202
  end
163
203
 
164
204
  require_relative 'image'
@@ -14,7 +14,7 @@ module Infoboxer
14
14
 
15
15
  @context.next!
16
16
  end
17
- nodes.flow_templates
17
+ nodes
18
18
  end
19
19
 
20
20
  private
@@ -29,8 +29,8 @@ module Infoboxer
29
29
 
30
30
  guarded_loop do
31
31
  @context.next! while @context.eol?
32
- if @context.check(/\s*([^ =}|<]+)\s*=\s*/)
33
- name = @context.scan(/\s*([^ =]+)/).strip
32
+ if @context.check(/\s*([^=}|<]+)\s*=\s*/)
33
+ name = @context.scan(/\s*([^=]+)/).strip
34
34
  @context.skip(/\s*=\s*/)
35
35
  else
36
36
  name = num
@@ -38,7 +38,7 @@ module Infoboxer
38
38
  end
39
39
  log "Variable #{name} found"
40
40
 
41
- value = long_inline(/\||}}/)
41
+ value = sanitize_value(long_inline(/\||}}/))
42
42
 
43
43
  # it was just empty line otherwise
44
44
  res << Var.new(name.to_s, value) unless value.empty? && name.is_a?(Numeric)
@@ -50,6 +50,11 @@ module Infoboxer
50
50
  end
51
51
  res
52
52
  end
53
+
54
+ def sanitize_value(nodes)
55
+ nodes.pop if (nodes.last.is_a?(Pre) || nodes.last.is_a?(Text)) && nodes.last.text =~ /^\s*$/ # FIXME: dirty!
56
+ nodes
57
+ end
53
58
  end
54
59
  end
55
60
  end
@@ -12,6 +12,7 @@ module Infoboxer
12
12
  \[[a-z]+:// | # external link
13
13
  <nowiki[^>]*> | # nowiki
14
14
  <ref[^>]*> | # reference
15
+ <gallery[^>]*>| # gallery
15
16
  <math> | # math
16
17
  < # HTML tag
17
18
  ))x
@@ -35,9 +35,11 @@ module Infoboxer
35
35
  #
36
36
  # Used for {Set} definitions.
37
37
  class Show < Base
38
- alias_method :children, :unnamed_variables
38
+ def text
39
+ unnamed_variables.map(&:text).join(children_separator)
40
+ end
39
41
 
40
- protected
42
+ private
41
43
 
42
44
  def children_separator
43
45
  ' '
@@ -63,7 +63,7 @@ module Infoboxer
63
63
  require_relative 'tree/nodes'
64
64
 
65
65
  %w[text compound inline
66
- image html paragraphs list template table ref math
66
+ image gallery html paragraphs list template table ref math
67
67
  document].each do |type|
68
68
  require_relative "tree/#{type}"
69
69
  end
@@ -4,7 +4,7 @@ module Infoboxer
4
4
  module Tree
5
5
  # Base class for all nodes with children.
6
6
  class Compound < Node
7
- def initialize(children = Nodes.new, params = {})
7
+ def initialize(children = Nodes.new, **params)
8
8
  super(params)
9
9
  @children = Nodes[*children]
10
10
  @children.each { |c| c.parent = self }
@@ -40,8 +40,7 @@ module Infoboxer
40
40
  if children.count == 1 && children.first.is_a?(Text)
41
41
  "#{indent(level)}#{children.first.text} <#{descr}>\n"
42
42
  else
43
- "#{indent(level)}<#{descr}>\n" +
44
- children.map(&call(to_tree: level + 1)).join
43
+ "#{indent(level)}<#{descr}>\n" + children.map { |c| c.to_tree(level + 1) }.join
45
44
  end
46
45
  end
47
46
 
@@ -0,0 +1,12 @@
1
+ # encoding: utf-8
2
+
3
+ module Infoboxer
4
+ module Tree
5
+ # Represents gallery of images (contents of `<gallery>` special tag).
6
+ #
7
+ # See [Wikipedia Tutorial](https://en.wikipedia.org/wiki/Help:Gallery_tag)
8
+ # for explanation of attributes.
9
+ class Gallery < Compound
10
+ end
11
+ end
12
+ end
@@ -7,8 +7,8 @@ module Infoboxer
7
7
  # See [Wikipedia Tutorial](https://en.wikipedia.org/wiki/Wikipedia:Extended_image_syntax)
8
8
  # for explanation of attributes.
9
9
  class Image < Node
10
- def initialize(path, params = {})
11
- @caption = params.delete(:caption)
10
+ def initialize(path, caption: nil, **params)
11
+ @caption = caption
12
12
  super({path: path}.merge(params))
13
13
  end
14
14
 
@@ -44,7 +44,7 @@ module Infoboxer
44
44
  super(level) +
45
45
  if caption && !caption.empty?
46
46
  indent(level + 1) + "caption:\n" +
47
- caption.children.map(&call(to_tree: level + 2)).join
47
+ caption.children.map { |c| c.to_tree(level + 2) }.join
48
48
  else
49
49
  ''
50
50
  end
@@ -17,8 +17,8 @@ module Infoboxer
17
17
 
18
18
  # Base class for internal/external links,
19
19
  class Link < Compound
20
- def initialize(link, label = nil)
21
- super(label || Nodes.new([Text.new(link)]), link: link)
20
+ def initialize(link, label = nil, **attr)
21
+ super(label || Nodes.new([Text.new(link)]), link: link, **attr)
22
22
  end
23
23
 
24
24
  # @!attribute [r] link
@@ -15,7 +15,7 @@ module Infoboxer
15
15
  # * {Tree::Nodes#follow} for extracting multiple links at once;
16
16
  # * {MediaWiki#get} for basic information on page extraction.
17
17
  def follow
18
- client.get(link)
18
+ client.get(link, interwiki: interwiki)
19
19
  end
20
20
 
21
21
  # Human-readable page URL
@@ -28,6 +28,9 @@ module Infoboxer
28
28
 
29
29
  protected
30
30
 
31
+ # redefined in {Wikilink}
32
+ def interwiki; end
33
+
31
34
  def page
32
35
  lookup_parents(MediaWiki::Page).first or fail('Not in a page from real source')
33
36
  end
@@ -4,9 +4,6 @@ module Infoboxer
4
4
  #
5
5
  # See also: https://en.wikipedia.org/wiki/Help:Displaying_a_formula
6
6
  class Math < Text
7
- def text
8
- "<math>#{super}</math>"
9
- end
10
7
  end
11
8
  end
12
9
  end
@@ -11,9 +11,7 @@ module Infoboxer
11
11
  # you will receive it from tree and use for navigations.
12
12
  #
13
13
  class Node
14
- include ProcMe
15
-
16
- def initialize(params = {})
14
+ def initialize(**params)
17
15
  @params = params
18
16
  end
19
17
 
@@ -42,6 +40,10 @@ module Infoboxer
42
40
  parent ? parent.index_of(self) : 0
43
41
  end
44
42
 
43
+ def first?
44
+ index.zero?
45
+ end
46
+
45
47
  # List of all sibling nodes (children of same parent)
46
48
  def siblings
47
49
  parent ? parent.children - [self] : Nodes[]
@@ -152,7 +154,7 @@ module Infoboxer
152
154
  end
153
155
 
154
156
  def show_params(prms = nil)
155
- (prms || params).map { |k, v| "#{k}: #{v.inspect}" }.join(', ')
157
+ (prms || params).reject { |_, v| v.nil? }.map { |k, v| "#{k}: #{v.inspect}" }.join(', ')
156
158
  end
157
159
 
158
160
  def indent(level)
@@ -38,10 +38,19 @@ module Infoboxer
38
38
  # @!method compact
39
39
  # Just like Array#compact, but returns Nodes
40
40
 
41
+ # @!method grep(pattern)
42
+ # Just like Array#grep, but returns Nodes
43
+
44
+ # @!method grep_v(pattern)
45
+ # Just like Array#grep_v, but returns Nodes
46
+
41
47
  # @!method -(other)
42
48
  # Just like Array#-, but returns Nodes
43
49
 
44
- %i[select reject sort_by flatten compact -].each do |sym|
50
+ # @!method +(other)
51
+ # Just like Array#+, but returns Nodes
52
+
53
+ %i[select reject sort_by flatten compact grep grep_v - +].each do |sym|
45
54
  define_method(sym) do |*args, &block|
46
55
  Nodes[*super(*args, &block)]
47
56
  end
@@ -75,6 +84,21 @@ module Infoboxer
75
84
  end
76
85
  end
77
86
 
87
+ # Just like Array#flat_map, but returns Nodes, **if** all map results are Node
88
+ def flat_map
89
+ res = super
90
+ if res.all? { |n| n.is_a?(Node) || n.is_a?(Nodes) }
91
+ Nodes[*res]
92
+ else
93
+ res
94
+ end
95
+ end
96
+
97
+ # Just like Array#group, but returns hash with `{<grouping variable> => Nodes}`
98
+ def group_by
99
+ super.map { |title, group| [title, Nodes[*group]] }.to_h
100
+ end
101
+
78
102
  # @!method prev_siblings
79
103
  # Previous siblings (flat list) of all nodes inside.
80
104
 
@@ -139,12 +163,14 @@ module Infoboxer
139
163
  # @return [Nodes<MediaWiki::Page>] It is still `Nodes`, so you
140
164
  # still can process them uniformely.
141
165
  def follow
142
- links = select { |n| n.respond_to?(:link) }.map(&:link)
166
+ links = grep(Linkable)
143
167
  return Nodes[] if links.empty?
144
168
  page = first.lookup_parents(MediaWiki::Page).first or
145
169
  fail('Not in a page from real source')
146
170
  page.client or fail('MediaWiki client not set')
147
- page.client.get(*links)
171
+ pages = links.group_by(&:interwiki)
172
+ .flat_map { |iw, ls| page.client.get(*ls.map(&:link), interwiki: iw) }
173
+ pages.count == 1 ? pages.first : Nodes[*pages]
148
174
  end
149
175
 
150
176
  # @private
@@ -173,7 +199,9 @@ module Infoboxer
173
199
  # @private
174
200
  # Internal, used by {Parser}
175
201
  def flow_templates
176
- make_nodes(map { |n| n.is_a?(Paragraph) ? n.to_templates? : n })
202
+ # TODO: will it be better?..
203
+ # make_nodes(map { |n| n.is_a?(Paragraph) ? n.to_templates? : n })
204
+ self
177
205
  end
178
206
 
179
207
  private
@@ -75,7 +75,7 @@ module Infoboxer
75
75
  # @private
76
76
  # Internal, used by {Parser}
77
77
  def to_templates
78
- children.select(&filter(itself: Template))
78
+ children.grep(Template)
79
79
  end
80
80
 
81
81
  # @private
@@ -13,12 +13,12 @@ module Infoboxer
13
13
 
14
14
  # All table rows.
15
15
  def rows
16
- children.select(&fltr(itself: TableRow))
16
+ children.grep(TableRow)
17
17
  end
18
18
 
19
19
  # Table caption, if exists.
20
20
  def caption
21
- children.detect(&fltr(itself: TableCaption))
21
+ children.grep(TableCaption).first
22
22
  end
23
23
 
24
24
  # For now, returns first table row, if it consists only of
@@ -26,12 +26,12 @@ module Infoboxer
26
26
  #
27
27
  # FIXME: it can easily be several table heading rows
28
28
  def heading_row
29
- rows.first if rows.first && rows.first.children.all?(&call(matches?: TableHeading))
29
+ rows.first if rows.first && rows.first.children.all? { |c| c.is_a?(TableHeading) }
30
30
  end
31
31
 
32
32
  # For now, returns all table rows except {#heading_row}
33
33
  def body_rows
34
- if rows.first && rows.first.children.all?(&call(matches?: TableHeading))
34
+ if rows.first && rows.first.children.all? { |c| c.is_a?(TableHeading) }
35
35
  rows[1..-1]
36
36
  else
37
37
  rows
@@ -39,19 +39,11 @@ module Infoboxer
39
39
  end
40
40
 
41
41
  def text
42
- table = Terminal::Table.new
43
- table.title = caption.text.sub(/\n+\Z/, '') if caption
44
-
45
- if heading_row
46
- table.headings = heading_row.children.map(&:text)
47
- .map(&call(sub: [/\n+\Z/, '']))
48
- end
49
-
50
- table.rows = body_rows.map { |r|
51
- r.children.map(&:text)
52
- .map(&call(sub: [/\n+\Z/, '']))
53
- }
54
- table.to_s + "\n\n"
42
+ Terminal::Table.new.tap { |table|
43
+ table.title = caption.text.sub(/\n+\Z/, '') if caption
44
+ table.headings = heading_row.children.map(&:text_) if heading_row
45
+ table.rows = body_rows.map { |r| r.children.map(&:text_) }
46
+ }.to_s + "\n\n"
55
47
  end
56
48
  end
57
49
 
@@ -17,10 +17,15 @@ module Infoboxer
17
17
  end
18
18
 
19
19
  # Internal, used by {Parser}
20
+ # Means even children-less Var should not be removed from parser tree.
20
21
  def empty?
21
22
  false
22
23
  end
23
24
 
25
+ def named?
26
+ name !~ /^\d+$/
27
+ end
28
+
24
29
  protected
25
30
 
26
31
  def descr
@@ -54,6 +59,7 @@ module Infoboxer
54
59
  # values.
55
60
  #
56
61
  # ### On variables naming
62
+ #
57
63
  # MediaWiki templates can contain _named_ and _unnamed_ variables.
58
64
  # Example:
59
65
  #
@@ -104,12 +110,16 @@ module Infoboxer
104
110
  # See {Var} class to understand what you can do with them.
105
111
  #
106
112
  # @return [Nodes<Var>]
107
- attr_reader :variables
113
+ # attr_reader :variables
114
+ alias_method :variables, :children
108
115
 
109
116
  def initialize(name, variables = Nodes[])
110
- super(Nodes[], extract_params(variables))
117
+ super(variables, extract_params(variables))
111
118
  @name = name
112
- @variables = Nodes[*variables].each { |v| v.parent = self }
119
+ end
120
+
121
+ def text
122
+ ''
113
123
  end
114
124
 
115
125
  # See {Node#to_tree}
@@ -133,7 +143,7 @@ module Infoboxer
133
143
  #
134
144
  # @return [Nodes<Var>]
135
145
  def unnamed_variables
136
- variables.find(name: /^\d+$/)
146
+ variables.reject(&:named?)
137
147
  end
138
148
 
139
149
  # Fetches template variable(s) by name(s) or patterns.
@@ -236,7 +246,7 @@ module Infoboxer
236
246
  def extract_params(vars)
237
247
  vars
238
248
  .select { |v| v.children.count == 1 && v.children.first.is_a?(Text) }
239
- .map { |v| [v.name, v.children.first.raw_text] }.to_h
249
+ .map { |v| [v.name.to_sym, v.children.first.raw_text] }.to_h
240
250
  end
241
251
 
242
252
  def inspect_variables(depth)
@@ -15,7 +15,7 @@ module Infoboxer
15
15
  # Text fragment without decodint of HTML entities.
16
16
  attr_accessor :raw_text
17
17
 
18
- def initialize(text, params = {})
18
+ def initialize(text, **params)
19
19
  super(params)
20
20
  @raw_text = text
21
21
  end
@@ -12,14 +12,23 @@ module Infoboxer
12
12
  # Note, that Wikilink is {Linkable}, so you can {Linkable#follow #follow}
13
13
  # it to obtain linked pages.
14
14
  class Wikilink < Link
15
- def initialize(*)
16
- super
17
- parse_link!
15
+ def initialize(link, label = nil, namespace: nil, interwiki: nil)
16
+ super(link, label, namespace: namespace, interwiki: interwiki)
17
+ @namespace = namespace || ''
18
+ @interwiki = interwiki
19
+ parse_name!
18
20
  end
19
21
 
20
22
  # "Clean" wikilink name, for ex., `Cities` for `[Category:Cities]`
21
23
  attr_reader :name
22
24
 
25
+ # Interwiki identifier. For example, `[[wikt:Argentina]]`
26
+ # will have `"Argentina"` as its {#name} and `"wikt"` (wiktionary) as an
27
+ # interwiki. TODO: how to use it.
28
+ #
29
+ # See [Wikipedia docs](https://en.wikipedia.org/wiki/Help:Interwiki_linking) for details.
30
+ attr_reader :interwiki
31
+
23
32
  # Wikilink namespace, `Category` for `[Category:Cities]`, empty
24
33
  # string (not `nil`!) for just `[Cities]`
25
34
  attr_reader :namespace
@@ -46,10 +55,8 @@ module Infoboxer
46
55
 
47
56
  private
48
57
 
49
- def parse_link!
50
- @name, @namespace = link.split(':', 2).reverse
51
- @namespace ||= ''
52
-
58
+ def parse_name!
59
+ @name = namespace.empty? ? link : link.sub(/^#{namespace}:/, '')
53
60
  @name, @anchor = @name.split('#', 2)
54
61
  @anchor ||= ''
55
62
 
@@ -2,7 +2,8 @@
2
2
 
3
3
  module Infoboxer
4
4
  MAJOR = 0
5
- MINOR = 2
6
- PATCH = 8
7
- VERSION = [MAJOR, MINOR, PATCH].join('.')
5
+ MINOR = 3
6
+ PATCH = 0
7
+ PRE = 'pre'.freeze # set to `nil` for normal releases
8
+ VERSION = [MAJOR, MINOR, PATCH, PRE].compact.join('.')
8
9
  end
@@ -0,0 +1,94 @@
1
+ module Infoboxer
2
+ # @private
3
+ class WikiPath
4
+ ParseError = Class.new(ArgumentError)
5
+
6
+ class << self
7
+ def _parse(string)
8
+ scanner = StringScanner.new(string)
9
+ res = []
10
+ loop do
11
+ res << scan_step(scanner)
12
+ break if scanner.eos?
13
+ end
14
+ res
15
+ end
16
+
17
+ def parse(string)
18
+ new(_parse(string))
19
+ end
20
+
21
+ private
22
+
23
+ def scan_step(scanner) # rubocop:disable Metrics/PerceivedComplexity
24
+ op = scanner.scan(%r{//?}) or unexpected(scanner, '/')
25
+ type = scanner.scan(/[A-Za-z_]*/)
26
+ attrs = {}
27
+ while scanner.scan(/\[/)
28
+ attr = scanner.scan(/[-a-z_0-9]+/) or unexpected(scanner, 'attribute name')
29
+ if scanner.scan(/\]/)
30
+ (attrs[:predicates] ||= []) << "#{attr}?".to_sym
31
+ next
32
+ end
33
+ scanner.scan(/\s*=\s*/) or unexpected(scanner, '= or ]')
34
+ value = scanner.scan(/[^\]]+/) # TODO: probably, should do a proper [] counting?..
35
+ scanner.scan(/\]/) or unexpected(scanner, ']')
36
+ attrs[attr.to_sym] = process_value(value)
37
+ end
38
+ res = op == '//' ? {op: :lookup} : {}
39
+ res[:type] = process_type(type) unless type.empty?
40
+ res.merge(attrs) # TODO: raise if empty selector
41
+ end
42
+
43
+ def process_value(value)
44
+ case value
45
+ when /^'(.*)'$/, /^"(.*)"$/
46
+ Regexp.last_match(1)
47
+ when %r{^/(.+)/$}
48
+ Regexp.new(Regexp.last_match(1))
49
+ else
50
+ value
51
+ end
52
+ end
53
+
54
+ def process_type(type)
55
+ type.gsub(/(?:^|_)([a-z])/, &:upcase).tr('_', '').to_sym
56
+ .tap { |t| valid_type?(t) or fail(ParseError, "Unrecognized node type: #{type}") }
57
+ end
58
+
59
+ def valid_type?(t)
60
+ t == :Section || Infoboxer::Tree.const_defined?(t)
61
+ end
62
+
63
+ def unexpected(scanner, expected)
64
+ place = scanner.eos? ? 'end of pattern' : scanner.rest.inspect
65
+ fail ParseError, "Unexpected #{place}, expecting #{expected}"
66
+ end
67
+ end
68
+
69
+ def initialize(path)
70
+ @path = path
71
+ end
72
+
73
+ def call(node)
74
+ @path.inject(node) { |res, step| apply_step(res, step) }
75
+ end
76
+
77
+ private
78
+
79
+ def apply_step(node, step)
80
+ # TODO: "compile" the op/args sequences at WikiPath initialization
81
+ step = step.dup
82
+ op = step.delete(:op) || :lookup_children
83
+ args = []
84
+ if (t = step.delete(:type))
85
+ args << t
86
+ end
87
+ if (pred = step.delete(:predicates))
88
+ args.concat(pred)
89
+ end
90
+ args << step unless step.empty?
91
+ node.send(op, *args)
92
+ end
93
+ end
94
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: infoboxer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.8
4
+ version: 0.3.0.pre
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Shepelev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-05-11 00:00:00.000000000 Z
11
+ date: 2017-09-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities
@@ -24,20 +24,6 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
- - !ruby/object:Gem::Dependency
28
- name: procme
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :runtime
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
27
  - !ruby/object:Gem::Dependency
42
28
  name: mediawiktory
43
29
  requirement: !ruby/object:Gem::Requirement
@@ -115,6 +101,7 @@ files:
115
101
  - lib/infoboxer/navigation/sections.rb
116
102
  - lib/infoboxer/navigation/selector.rb
117
103
  - lib/infoboxer/navigation/shortcuts.rb
104
+ - lib/infoboxer/navigation/wikipath.rb
118
105
  - lib/infoboxer/parser.rb
119
106
  - lib/infoboxer/parser/context.rb
120
107
  - lib/infoboxer/parser/html.rb
@@ -130,6 +117,7 @@ files:
130
117
  - lib/infoboxer/tree.rb
131
118
  - lib/infoboxer/tree/compound.rb
132
119
  - lib/infoboxer/tree/document.rb
120
+ - lib/infoboxer/tree/gallery.rb
133
121
  - lib/infoboxer/tree/html.rb
134
122
  - lib/infoboxer/tree/image.rb
135
123
  - lib/infoboxer/tree/inline.rb
@@ -145,6 +133,7 @@ files:
145
133
  - lib/infoboxer/tree/text.rb
146
134
  - lib/infoboxer/tree/wikilink.rb
147
135
  - lib/infoboxer/version.rb
136
+ - lib/infoboxer/wiki_path.rb
148
137
  - profile/out/.gitkeep
149
138
  - profile/pages/argentina.txt
150
139
  - profile/pages/canada.wiki
@@ -177,9 +166,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
177
166
  version: 2.1.0
178
167
  required_rubygems_version: !ruby/object:Gem::Requirement
179
168
  requirements:
180
- - - ">="
169
+ - - ">"
181
170
  - !ruby/object:Gem::Version
182
- version: '0'
171
+ version: 1.3.1
183
172
  requirements: []
184
173
  rubyforge_project:
185
174
  rubygems_version: 2.6.10