infoboxer 0.2.8 → 0.3.0.pre

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ef407b6160d6d0ff8cfb874338320532d1b18906
4
- data.tar.gz: fdc4ccf4b051c50958e6cc9720619ea5110b5ea5
3
+ metadata.gz: 67b07bcee67c15aeb316e2eccf75e85a26cb43ae
4
+ data.tar.gz: 91f3253151ff816a83b4ffb8f7937c878a5a3ed8
5
5
  SHA512:
6
- metadata.gz: d5224104b02115c47f27c69926128023da3765d1d7089241190a6b3f5c8c3de79be91f65276c82536c671424d10000e2f5198413b3aa21e9802ccc9ca41fbdef
7
- data.tar.gz: 0f1f8d9ee3a4e94d6529208a9678bcb0915a884525433d2397249afada141d96c88c74e0546ddd0576adffcc2785f96ad1da57d61284b84d96661fb49c0fd46d
6
+ metadata.gz: f7dd9aa6545853dbf827c1b0b7857b2b5654caf33dcbfcaf4c2194709379f7e73d0978776675fd6265cbf83d88a814332626945db1b45c9562e54a01fa486a55
7
+ data.tar.gz: fa70f671da0d9acd7a0a131d8119a775c1ff762d7d2b640096f448678b7547477fd0fe93df74c438cd846fa550e33e530999c811bb4b0d4afe347bf058c3321b
data/.yardopts CHANGED
@@ -1,2 +1,3 @@
1
1
  --markup=markdown
2
+ --markup-provider=redcarpet
2
3
  --no-private
@@ -1,5 +1,23 @@
1
1
  # Infoboxer's change log
2
2
 
3
+ ## 0.3.1.pre (2017-09-16)
4
+
5
+ * Introduce interwiki links following (and proper handling of interwikis, in general);
6
+ * Add `<gallery>` tag support;
7
+ * Introduce `Navigation::Selector#===`;
8
+ * Much more `Enumerable`'s methods supported by `Nodes`;
9
+ * Lot of small simplifications, cleanups and bugfixes.
10
+
11
+ TBH, it should be 0.4.0 or more, but it would be a shame to change versions so fast :) So, at least
12
+ until it is `-pre`, let it be 0.3.1.
13
+
14
+ ## 0.3.0 (2017-07-23)
15
+
16
+ * Change logic of navigation through templates; now templates contents aren't hidden from global
17
+ lookups. While sometimes leading to less impressive demos, this approach proved itself to be more
18
+ useful for production.
19
+ * Introduce WikiPath query language as an alternative to series of lookups.
20
+
3
21
  ## 0.2.8 (2017-05-11)
4
22
 
5
23
  * Switch to MediaWiktory 0.1.0 + some subsequent cleanup of internal logic;
@@ -32,7 +32,6 @@ Gem::Specification.new do |s|
32
32
  s.executables << 'infoboxer'
33
33
 
34
34
  s.add_dependency 'htmlentities'
35
- s.add_dependency 'procme'
36
35
  s.add_dependency 'mediawiktory', '>= 0.1.0'
37
36
  s.add_dependency 'addressable'
38
37
  s.add_dependency 'terminal-table'
@@ -1,7 +1,5 @@
1
1
  # encoding: utf-8
2
2
 
3
- require 'procme'
4
-
5
3
  # Main client module for entire infoboxer functionality. If you're lucky,
6
4
  # there's no other classes/modules you need to instantiate or call
7
5
  # directly. You just do:
@@ -74,8 +72,8 @@ module Infoboxer
74
72
  end
75
73
 
76
74
  # Includeable version of {Infoboxer.wiki}
77
- def wiki(api_url, options = {})
78
- wikis[api_url] ||= MediaWiki.new(api_url, options || {})
75
+ def wiki(api_url, **options)
76
+ wikis[api_url] ||= MediaWiki.new(api_url, options)
79
77
  end
80
78
 
81
79
  class << self
@@ -170,7 +168,7 @@ module Infoboxer
170
168
  end
171
169
 
172
170
  WIKIMEDIA_PROJECTS.each do |name, domain|
173
- define_method name do |lang = 'en', options = {}|
171
+ define_method name do |lang = 'en', **options|
174
172
  lang, options = 'en', lang if lang.is_a?(Hash)
175
173
 
176
174
  wiki("https://#{lang}.#{domain}/w/api.php", options)
@@ -180,7 +178,7 @@ module Infoboxer
180
178
  alias_method :wp, :wikipedia
181
179
 
182
180
  WIKIMEDIA_COMMONS.each do |name, domain|
183
- define_method name do |options = {}|
181
+ define_method name do |**options|
184
182
  wiki("https://#{domain}/w/api.php", options)
185
183
  end
186
184
  end
@@ -47,15 +47,14 @@ module Infoboxer
47
47
  # for it, as well as shortcuts for some well-known wikis, like
48
48
  # {Infoboxer.wikipedia}.
49
49
  #
50
- # @param api_base_url URL of `api.php` file in your MediaWiki
50
+ # @param api_base_url [String] URL of `api.php` file in your MediaWiki
51
51
  # installation. Typically, its `<domain>/w/api.php`, but can vary
52
52
  # in different wikis.
53
- # @param options Only one option is currently supported:
54
- # * `:user_agent` (also aliased as `:ua`) -- custom User-Agent header.
55
- def initialize(api_base_url, options = {})
53
+ # @param user_agent [String] (also aliased as `:ua`) Custom User-Agent header.
54
+ def initialize(api_base_url, ua: nil, user_agent: ua)
56
55
  @api_base_url = Addressable::URI.parse(api_base_url)
57
- @client = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(options))
58
- @traits = Traits.get(@api_base_url.host, namespaces: extract_namespaces)
56
+ @client = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(user_agent))
57
+ @traits = Traits.get(@api_base_url.host, siteinfo)
59
58
  end
60
59
 
61
60
  # Receive "raw" data from Wikipedia (without parsing or wrapping in
@@ -123,7 +122,9 @@ module Infoboxer
123
122
  # and obtain meaningful results instead of `NoMethodError` or
124
123
  # `SomethingNotFound`.
125
124
  #
126
- def get(*titles, prop: [])
125
+ def get(*titles, prop: [], interwiki: nil)
126
+ return interwikis(interwiki).get(*titles, prop: prop) if interwiki
127
+
127
128
  pages = get_h(*titles, prop: prop).values.compact
128
129
  titles.count == 1 ? pages.first : Tree::Nodes[*pages]
129
130
  end
@@ -251,17 +252,26 @@ module Infoboxer
251
252
  [namespace, titl].join(':')
252
253
  end
253
254
 
254
- def user_agent(options)
255
- options[:user_agent] || options[:ua] || self.class.user_agent || UA
255
+ def user_agent(custom)
256
+ custom || self.class.user_agent || UA
257
+ end
258
+
259
+ def siteinfo
260
+ @siteinfo ||= @client.query.meta(:siteinfo).prop(:namespaces, :namespacealiases, :interwikimap).response.to_h
256
261
  end
257
262
 
258
- def extract_namespaces
259
- siteinfo = @client.query.meta(:siteinfo).prop(:namespaces, :namespacealiases).response
260
- siteinfo['namespaces'].map do |_, namespace|
261
- aliases =
262
- siteinfo['namespacealiases'].select { |a| a['id'] == namespace['id'] }.map { |a| a['*'] }
263
- namespace.merge('aliases' => aliases)
264
- end
263
+ def interwikis(prefix)
264
+ @interwikis ||= Hash.new { |h, pre|
265
+ interwiki = siteinfo['interwikimap'].detect { |iw| iw['prefix'] == prefix } or
266
+ fail ArgumentError, "Undefined interwiki: #{prefix}"
267
+
268
+ # FIXME: fragile, but what can we do?..
269
+ m = interwiki['url'].match(%r{^(.+)/wiki/\$1$}) or
270
+ fail ArgumentError, "Interwiki #{interwiki} seems not to be a MediaWiki instance"
271
+ h[pre] = self.class.new("#{m[1]}/w/api.php") # TODO: copy useragent
272
+ }
273
+
274
+ @interwikis[prefix]
265
275
  end
266
276
  end
267
277
  end
@@ -34,9 +34,8 @@ module Infoboxer
34
34
  end
35
35
 
36
36
  # @private
37
- def get(domain, options = {})
38
- cls = Traits.domains[domain]
39
- cls ? cls.new(options) : Traits.new(options)
37
+ def get(domain, site_info = {})
38
+ (Traits.domains[domain] || Traits).new(site_info)
40
39
  end
41
40
 
42
41
  # @private
@@ -68,18 +67,27 @@ module Infoboxer
68
67
  alias_method :default, :new
69
68
  end
70
69
 
71
- def initialize(options = {})
72
- @options = options
73
- @file_namespace =
74
- [DEFAULTS[:file_namespace], namespace_aliases(options, 'File')]
75
- .flatten.compact.uniq
76
- @category_namespace =
77
- [DEFAULTS[:category_namespace], namespace_aliases(options, 'Category')]
78
- .flatten.compact.uniq
70
+ def initialize(site_info = {})
71
+ @site_info = site_info
72
+ end
73
+
74
+ def namespace?(prefix)
75
+ known_namespaces.include?(prefix)
76
+ end
77
+
78
+ def interwiki?(prefix)
79
+ known_interwikis.key?(prefix)
80
+ end
81
+
82
+ # @private
83
+ def file_namespace
84
+ @file_namespace ||= ns_aliases('File')
79
85
  end
80
86
 
81
87
  # @private
82
- attr_reader :file_namespace, :category_namespace
88
+ def category_namespace
89
+ @category_namespace ||= ns_aliases('Category')
90
+ end
83
91
 
84
92
  # @private
85
93
  def templates
@@ -88,16 +96,54 @@ module Infoboxer
88
96
 
89
97
  private
90
98
 
91
- def namespace_aliases(options, canonical)
92
- namespace = (options[:namespaces] || []).detect { |v| v['canonical'] == canonical }
93
- return nil unless namespace
94
- [namespace['*'], *namespace['aliases']]
99
+ def known_namespaces
100
+ @known_namespaces ||=
101
+ if @site_info.empty?
102
+ STANDARD_NAMESPACES
103
+ else
104
+ (@site_info['namespaces'].values + @site_info['namespacealiases']).map { |n| n['*'] }
105
+ end
106
+ end
107
+
108
+ def known_interwikis
109
+ @known_interwikis ||=
110
+ if @site_info.empty?
111
+ {}
112
+ else
113
+ @site_info['interwikimap'].map { |iw| [iw['prefix'], iw] }.to_h
114
+ end
115
+ end
116
+
117
+ def ns_aliases(base)
118
+ return [base] if @site_info.empty?
119
+ main = @site_info['namespaces'].values.detect { |n| n['canonical'] == base }
120
+ [base, main['*']] +
121
+ @site_info['namespacealiases']
122
+ .select { |a| a['id'] == main['id'] }.flat_map { |n| n['*'] }
123
+ .compact.uniq
95
124
  end
96
125
 
97
- DEFAULTS = {
98
- file_namespace: 'File',
99
- category_namespace: 'Category'
100
- }.freeze
126
+ # See https://www.mediawiki.org/wiki/Help:Namespaces#Standard_namespaces
127
+ STANDARD_NAMESPACES = [
128
+ 'Media', # Direct linking to media files.
129
+ 'Special', # Special (non-editable) pages.
130
+ '', # (Main)
131
+ 'Talk', # Article discussion.
132
+ 'User', #
133
+ 'User talk', #
134
+ 'Project', # Meta-discussions related to the operation and development of the wiki.
135
+ 'Project talk', #
136
+ 'File', # Metadata for images, videos, sound files and other media.
137
+ 'File talk', #
138
+ 'MediaWiki', # System messages and other important content.
139
+ 'MediaWiki talk', #
140
+ 'Template', # Templates: blocks of text or wikicode that are intended to be transcluded.
141
+ 'Template talk', #
142
+ 'Help', # Help files, instructions and "how-to" guides.
143
+ 'Help talk', #
144
+ 'Category', # Categories: dynamic lists of other pages.
145
+ 'Category talk', #
146
+ ].freeze
101
147
  end
102
148
  end
103
149
  end
@@ -38,6 +38,19 @@ module Infoboxer
38
38
  #
39
39
  # Look into {Shortcuts::Node} documentation for list of shortcuts.
40
40
  #
41
+ # ## Wikipath
42
+ #
43
+ # WikiPath is XPath-alike query language you can use to navigate the tree:
44
+ #
45
+ # ```ruby
46
+ # document.wikipath('//paragraph//wikilink[namespace=Category]')
47
+ # ```
48
+ #
49
+ # It can look more or less verbose than pure-ruby navigation, but the big advantage of WikiPath
50
+ # is it is pure data: you can store some paths in YAML file, for example.
51
+ #
52
+ # Look at {Wikipath#wikipath #wikipath} method docs for full reference.
53
+ #
41
54
  # ## Logical structure navigation
42
55
  #
43
56
  # MediaWiki page structure is flat, like HTML's (there's just sequence
@@ -62,7 +75,7 @@ module Infoboxer
62
75
  # {Sections::Node} for upwards.
63
76
  #
64
77
  module Navigation
65
- %w[lookup shortcuts sections].each do |nav|
78
+ %w[lookup shortcuts sections wikipath].each do |nav|
66
79
  require_relative "navigation/#{nav}"
67
80
  end
68
81
 
@@ -70,12 +83,14 @@ module Infoboxer
70
83
  include Navigation::Lookup::Node
71
84
  include Navigation::Shortcuts::Node
72
85
  include Navigation::Sections::Node
86
+ include Navigation::Wikipath
73
87
  end
74
88
 
75
89
  class Tree::Nodes
76
90
  include Navigation::Lookup::Nodes
77
91
  include Navigation::Shortcuts::Nodes
78
92
  include Navigation::Sections::Nodes
93
+ include Navigation::Wikipath
79
94
  end
80
95
 
81
96
  class Tree::Document
@@ -98,9 +98,13 @@ module Infoboxer
98
98
  # Selects matching nodes from current node's siblings, which
99
99
  # are above current node in parents children list.
100
100
 
101
+ # @!method lookup_prev_sibling(*selectors, &block)
102
+ # Selects first matching nodes from current node's siblings, which
103
+ # are above current node in parents children list.
104
+
101
105
  # Underscored version of {#matches?}
102
106
  def _matches?(selector)
103
- selector.matches?(self)
107
+ selector === self
104
108
  end
105
109
 
106
110
  # Underscored version of {#lookup}
@@ -136,6 +140,11 @@ module Infoboxer
136
140
  prev_siblings._find(selector)
137
141
  end
138
142
 
143
+ # Underscored version of {#lookup_prev_sibling}
144
+ def _lookup_prev_sibling(selector)
145
+ prev_siblings.reverse.detect { |n| selector === n }
146
+ end
147
+
139
148
  # Underscored version of {#lookup_next_siblings}
140
149
  def _lookup_next_siblings(selector)
141
150
  next_siblings._find(selector)
@@ -146,6 +155,7 @@ module Infoboxer
146
155
  lookup lookup_children lookup_parents
147
156
  lookup_siblings
148
157
  lookup_next_siblings lookup_prev_siblings
158
+ lookup_prev_sibling
149
159
  ]
150
160
  .map { |sym| [sym, :"_#{sym}"] }
151
161
  .each do |sym, underscored|
@@ -81,6 +81,14 @@ module Infoboxer
81
81
  end
82
82
  end
83
83
 
84
+ def lookup_children(*arg)
85
+ if arg.include?(:Section)
86
+ sections.find(*(arg - [:Section]))
87
+ else
88
+ super
89
+ end
90
+ end
91
+
84
92
  private
85
93
 
86
94
  def make_sections
@@ -115,21 +123,25 @@ module Infoboxer
115
123
  #
116
124
  # @return {Tree::Nodes<Section>}
117
125
  def in_sections
118
- main_node = parent.is_a?(Tree::Document) ? self : lookup_parents[-2]
126
+ return parent.in_sections unless parent.is_a?(Tree::Document)
127
+ return @in_sections if @in_sections
119
128
 
120
129
  heading =
121
- if main_node.is_a?(Tree::Heading)
122
- main_node.lookup_prev_siblings(Tree::Heading, level: main_node.level - 1).last
130
+ if is_a?(Tree::Heading)
131
+ lookup_prev_sibling(Tree::Heading, level: level - 1)
123
132
  else
124
- main_node.lookup_prev_siblings(Tree::Heading).last
133
+ lookup_prev_sibling(Tree::Heading)
125
134
  end
126
- return Tree::Nodes[] unless heading
135
+ unless heading
136
+ @in_sections = Tree::Nodes[]
137
+ return @in_sections
138
+ end
127
139
 
128
140
  body = heading.next_siblings
129
141
  .take_while { |n| !n.is_a?(Tree::Heading) || n.level < heading.level }
130
142
 
131
143
  section = Section.new(heading, body)
132
- Tree::Nodes[section, *heading.in_sections]
144
+ @in_sections = Tree::Nodes[section, *heading.in_sections]
133
145
  end
134
146
  end
135
147
 
@@ -145,6 +157,14 @@ module Infoboxer
145
157
  make_nodes(map { |n| n.send(sym, *args) })
146
158
  end
147
159
  end
160
+
161
+ def lookup_children(*arg)
162
+ if arg.include?(:Section)
163
+ sections.find(*(arg - [:Section]))
164
+ else
165
+ super
166
+ end
167
+ end
148
168
  end
149
169
 
150
170
  # Virtual node, representing logical section of the document.
@@ -175,6 +195,10 @@ module Infoboxer
175
195
  false
176
196
  end
177
197
 
198
+ def inspect
199
+ "#<#{descr}: #{children.count} nodes>"
200
+ end
201
+
178
202
  include Container
179
203
  end
180
204
  end
@@ -7,8 +7,6 @@ module Infoboxer
7
7
  #
8
8
  # See {Lookup::Node Lookup::Node} for detailed explanation of available selectors.
9
9
  class Selector
10
- include ProcMe
11
-
12
10
  def initialize(*arg, &block)
13
11
  @arg = [arg, block].flatten.compact.map(&method(:sym_to_class))
14
12
  @arg.each do |a|
@@ -26,8 +24,8 @@ module Infoboxer
26
24
  "#<Selector(#{@arg.map(&:to_s).join(', ')})>"
27
25
  end
28
26
 
29
- def matches?(node)
30
- @arg.all? { |a| arg_matches?(a, node) }
27
+ def ===(other)
28
+ @arg.all? { |a| arg_matches?(a, other) }
31
29
  end
32
30
 
33
31
  private
@@ -45,13 +43,24 @@ module Infoboxer
45
43
  when Proc
46
44
  check.call(node)
47
45
  when Hash
48
- check.all? { |attr, value| node.respond_to?(attr) && value === node.send(attr) }
46
+ check.all? { |attr, value|
47
+ node.respond_to?(attr) && value_matches?(value, node.send(attr)) ||
48
+ node.params.key?(attr) && value_matches?(value, node.params[attr])
49
+ }
49
50
  when Symbol
50
51
  node.respond_to?(check) && node.send(check)
51
52
  else
52
53
  check === node
53
54
  end
54
55
  end
56
+
57
+ def value_matches?(matcher, value)
58
+ if matcher.is_a?(String) && value.is_a?(String)
59
+ matcher.casecmp(value).zero?
60
+ else
61
+ matcher === value
62
+ end
63
+ end
55
64
  end
56
65
  end
57
66
  end
@@ -0,0 +1,32 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative '../wiki_path'
4
+
5
+ module Infoboxer
6
+ module Navigation
7
+ module Wikipath
8
+ # Search nodes inside current by XPath alike query language.
9
+ #
10
+ # This feature is experimental, but should work for most of the useful cases.
11
+ #
12
+ # Examples of WikiPath:
13
+ #
14
+ # ```
15
+ # /paragraph # direct child of current node, being paragraph
16
+ # //paragraph # any node in current node's subtree, being paragraph
17
+ # //template[name=Infobox] # template node in subtree, with name attribute equal to Infobox
18
+ # //template[name="Infobox country"] # optional quotes are allowed
19
+ # //template[name=/^Infobox/] # regexes are supported
20
+ # //wikilink[italic] # node predicates are supported (the same as `lookup(:Wikilink, :italic?)`
21
+ # //*[italic] # type wildcards are supported
22
+ # //template[name=/^Infobox/]/var[name=birthday] # series of lookups work
23
+ # ```
24
+ #
25
+ # @param string [String] WikiPath to lookup
26
+ # @return [Nodes]
27
+ def wikipath(string)
28
+ Infoboxer::WikiPath.parse(string).call(self)
29
+ end
30
+ end
31
+ end
32
+ end
@@ -1,7 +1,6 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  require 'ostruct'
4
- require 'procme'
5
4
  require 'logger'
6
5
 
7
6
  module Infoboxer
@@ -83,7 +83,7 @@ module Infoboxer
83
83
 
84
84
  private
85
85
 
86
- def inline_formatting(match)
86
+ def inline_formatting(match) # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/AbcSize
87
87
  case match
88
88
  when "'''''"
89
89
  BoldItalic.new(short_inline(/'''''/))
@@ -109,6 +109,8 @@ module Infoboxer
109
109
  reference(Regexp.last_match(1))
110
110
  when /<math>/
111
111
  math
112
+ when /<gallery([^>]*)>/
113
+ gallery(Regexp.last_match(1))
112
114
  when '<'
113
115
  html || Text.new(match) # it was not HTML, just accidental <
114
116
  else
@@ -126,8 +128,18 @@ module Infoboxer
126
128
  caption = inline(/\]\]/)
127
129
  @context.pop_eol_sign
128
130
  end
131
+ name, namespace = link.split(':', 2).reverse
132
+ lnk, params =
133
+ if @context.traits.namespace?(namespace)
134
+ [link, {namespace: namespace}]
135
+ elsif @context.traits.interwiki?(namespace)
136
+ [name, {interwiki: namespace}]
137
+ else
138
+ [link, {}]
139
+ end
129
140
 
130
- Wikilink.new(link, caption)
141
+ puts @context.rest if lnk.nil?
142
+ Wikilink.new(lnk, caption, **params)
131
143
  end
132
144
 
133
145
  # http://en.wikipedia.org/wiki/Help:Link#External_links
@@ -159,6 +171,34 @@ module Infoboxer
159
171
  Text.new(@context.scan_continued_until(%r{</nowiki>}))
160
172
  end
161
173
  end
174
+
175
+ def gallery(tag_rest)
176
+ params = parse_params(tag_rest)
177
+ images = []
178
+ guarded_loop do
179
+ @context.next! if @context.eol?
180
+ path = @context.scan_until(%r{</gallery>|\||$})
181
+ attrs = @context.matched == '|' ? gallery_image_attrs : {}
182
+ unless path.empty?
183
+ images << Tree::Image.new(path.sub(/^#{re.file_namespace}/, ''), attrs)
184
+ end
185
+ break if @context.matched == '</gallery>'
186
+ end
187
+ Gallery.new(images, params)
188
+ end
189
+
190
+ def gallery_image_attrs
191
+ nodes = []
192
+
193
+ guarded_loop do
194
+ nodes << short_inline(%r{\||</gallery>})
195
+ break if @context.eol? || @context.matched?(%r{</gallery>})
196
+ end
197
+
198
+ nodes.map(&method(:image_attr))
199
+ .inject(&:merge)
200
+ .reject { |_k, v| v.nil? || v.empty? }
201
+ end
162
202
  end
163
203
 
164
204
  require_relative 'image'
@@ -14,7 +14,7 @@ module Infoboxer
14
14
 
15
15
  @context.next!
16
16
  end
17
- nodes.flow_templates
17
+ nodes
18
18
  end
19
19
 
20
20
  private
@@ -29,8 +29,8 @@ module Infoboxer
29
29
 
30
30
  guarded_loop do
31
31
  @context.next! while @context.eol?
32
- if @context.check(/\s*([^ =}|<]+)\s*=\s*/)
33
- name = @context.scan(/\s*([^ =]+)/).strip
32
+ if @context.check(/\s*([^=}|<]+)\s*=\s*/)
33
+ name = @context.scan(/\s*([^=]+)/).strip
34
34
  @context.skip(/\s*=\s*/)
35
35
  else
36
36
  name = num
@@ -38,7 +38,7 @@ module Infoboxer
38
38
  end
39
39
  log "Variable #{name} found"
40
40
 
41
- value = long_inline(/\||}}/)
41
+ value = sanitize_value(long_inline(/\||}}/))
42
42
 
43
43
  # it was just empty line otherwise
44
44
  res << Var.new(name.to_s, value) unless value.empty? && name.is_a?(Numeric)
@@ -50,6 +50,11 @@ module Infoboxer
50
50
  end
51
51
  res
52
52
  end
53
+
54
+ def sanitize_value(nodes)
55
+ nodes.pop if (nodes.last.is_a?(Pre) || nodes.last.is_a?(Text)) && nodes.last.text =~ /^\s*$/ # FIXME: dirty!
56
+ nodes
57
+ end
53
58
  end
54
59
  end
55
60
  end
@@ -12,6 +12,7 @@ module Infoboxer
12
12
  \[[a-z]+:// | # external link
13
13
  <nowiki[^>]*> | # nowiki
14
14
  <ref[^>]*> | # reference
15
+ <gallery[^>]*>| # gallery
15
16
  <math> | # math
16
17
  < # HTML tag
17
18
  ))x
@@ -35,9 +35,11 @@ module Infoboxer
35
35
  #
36
36
  # Used for {Set} definitions.
37
37
  class Show < Base
38
- alias_method :children, :unnamed_variables
38
+ def text
39
+ unnamed_variables.map(&:text).join(children_separator)
40
+ end
39
41
 
40
- protected
42
+ private
41
43
 
42
44
  def children_separator
43
45
  ' '
@@ -63,7 +63,7 @@ module Infoboxer
63
63
  require_relative 'tree/nodes'
64
64
 
65
65
  %w[text compound inline
66
- image html paragraphs list template table ref math
66
+ image gallery html paragraphs list template table ref math
67
67
  document].each do |type|
68
68
  require_relative "tree/#{type}"
69
69
  end
@@ -4,7 +4,7 @@ module Infoboxer
4
4
  module Tree
5
5
  # Base class for all nodes with children.
6
6
  class Compound < Node
7
- def initialize(children = Nodes.new, params = {})
7
+ def initialize(children = Nodes.new, **params)
8
8
  super(params)
9
9
  @children = Nodes[*children]
10
10
  @children.each { |c| c.parent = self }
@@ -40,8 +40,7 @@ module Infoboxer
40
40
  if children.count == 1 && children.first.is_a?(Text)
41
41
  "#{indent(level)}#{children.first.text} <#{descr}>\n"
42
42
  else
43
- "#{indent(level)}<#{descr}>\n" +
44
- children.map(&call(to_tree: level + 1)).join
43
+ "#{indent(level)}<#{descr}>\n" + children.map { |c| c.to_tree(level + 1) }.join
45
44
  end
46
45
  end
47
46
 
@@ -0,0 +1,12 @@
1
+ # encoding: utf-8
2
+
3
+ module Infoboxer
4
+ module Tree
5
+ # Represents gallery of images (contents of `<gallery>` special tag).
6
+ #
7
+ # See [Wikipedia Tutorial](https://en.wikipedia.org/wiki/Help:Gallery_tag)
8
+ # for explanation of attributes.
9
+ class Gallery < Compound
10
+ end
11
+ end
12
+ end
@@ -7,8 +7,8 @@ module Infoboxer
7
7
  # See [Wikipedia Tutorial](https://en.wikipedia.org/wiki/Wikipedia:Extended_image_syntax)
8
8
  # for explanation of attributes.
9
9
  class Image < Node
10
- def initialize(path, params = {})
11
- @caption = params.delete(:caption)
10
+ def initialize(path, caption: nil, **params)
11
+ @caption = caption
12
12
  super({path: path}.merge(params))
13
13
  end
14
14
 
@@ -44,7 +44,7 @@ module Infoboxer
44
44
  super(level) +
45
45
  if caption && !caption.empty?
46
46
  indent(level + 1) + "caption:\n" +
47
- caption.children.map(&call(to_tree: level + 2)).join
47
+ caption.children.map { |c| c.to_tree(level + 2) }.join
48
48
  else
49
49
  ''
50
50
  end
@@ -17,8 +17,8 @@ module Infoboxer
17
17
 
18
18
  # Base class for internal/external links,
19
19
  class Link < Compound
20
- def initialize(link, label = nil)
21
- super(label || Nodes.new([Text.new(link)]), link: link)
20
+ def initialize(link, label = nil, **attr)
21
+ super(label || Nodes.new([Text.new(link)]), link: link, **attr)
22
22
  end
23
23
 
24
24
  # @!attribute [r] link
@@ -15,7 +15,7 @@ module Infoboxer
15
15
  # * {Tree::Nodes#follow} for extracting multiple links at once;
16
16
  # * {MediaWiki#get} for basic information on page extraction.
17
17
  def follow
18
- client.get(link)
18
+ client.get(link, interwiki: interwiki)
19
19
  end
20
20
 
21
21
  # Human-readable page URL
@@ -28,6 +28,9 @@ module Infoboxer
28
28
 
29
29
  protected
30
30
 
31
+ # redefined in {Wikilink}
32
+ def interwiki; end
33
+
31
34
  def page
32
35
  lookup_parents(MediaWiki::Page).first or fail('Not in a page from real source')
33
36
  end
@@ -4,9 +4,6 @@ module Infoboxer
4
4
  #
5
5
  # See also: https://en.wikipedia.org/wiki/Help:Displaying_a_formula
6
6
  class Math < Text
7
- def text
8
- "<math>#{super}</math>"
9
- end
10
7
  end
11
8
  end
12
9
  end
@@ -11,9 +11,7 @@ module Infoboxer
11
11
  # you will receive it from tree and use for navigations.
12
12
  #
13
13
  class Node
14
- include ProcMe
15
-
16
- def initialize(params = {})
14
+ def initialize(**params)
17
15
  @params = params
18
16
  end
19
17
 
@@ -42,6 +40,10 @@ module Infoboxer
42
40
  parent ? parent.index_of(self) : 0
43
41
  end
44
42
 
43
+ def first?
44
+ index.zero?
45
+ end
46
+
45
47
  # List of all sibling nodes (children of same parent)
46
48
  def siblings
47
49
  parent ? parent.children - [self] : Nodes[]
@@ -152,7 +154,7 @@ module Infoboxer
152
154
  end
153
155
 
154
156
  def show_params(prms = nil)
155
- (prms || params).map { |k, v| "#{k}: #{v.inspect}" }.join(', ')
157
+ (prms || params).reject { |_, v| v.nil? }.map { |k, v| "#{k}: #{v.inspect}" }.join(', ')
156
158
  end
157
159
 
158
160
  def indent(level)
@@ -38,10 +38,19 @@ module Infoboxer
38
38
  # @!method compact
39
39
  # Just like Array#compact, but returns Nodes
40
40
 
41
+ # @!method grep(pattern)
42
+ # Just like Array#grep, but returns Nodes
43
+
44
+ # @!method grep_v(pattern)
45
+ # Just like Array#grep_v, but returns Nodes
46
+
41
47
  # @!method -(other)
42
48
  # Just like Array#-, but returns Nodes
43
49
 
44
- %i[select reject sort_by flatten compact -].each do |sym|
50
+ # @!method +(other)
51
+ # Just like Array#+, but returns Nodes
52
+
53
+ %i[select reject sort_by flatten compact grep grep_v - +].each do |sym|
45
54
  define_method(sym) do |*args, &block|
46
55
  Nodes[*super(*args, &block)]
47
56
  end
@@ -75,6 +84,21 @@ module Infoboxer
75
84
  end
76
85
  end
77
86
 
87
+ # Just like Array#flat_map, but returns Nodes, **if** all map results are Node
88
+ def flat_map
89
+ res = super
90
+ if res.all? { |n| n.is_a?(Node) || n.is_a?(Nodes) }
91
+ Nodes[*res]
92
+ else
93
+ res
94
+ end
95
+ end
96
+
97
+ # Just like Array#group, but returns hash with `{<grouping variable> => Nodes}`
98
+ def group_by
99
+ super.map { |title, group| [title, Nodes[*group]] }.to_h
100
+ end
101
+
78
102
  # @!method prev_siblings
79
103
  # Previous siblings (flat list) of all nodes inside.
80
104
 
@@ -139,12 +163,14 @@ module Infoboxer
139
163
  # @return [Nodes<MediaWiki::Page>] It is still `Nodes`, so you
140
164
  # still can process them uniformely.
141
165
  def follow
142
- links = select { |n| n.respond_to?(:link) }.map(&:link)
166
+ links = grep(Linkable)
143
167
  return Nodes[] if links.empty?
144
168
  page = first.lookup_parents(MediaWiki::Page).first or
145
169
  fail('Not in a page from real source')
146
170
  page.client or fail('MediaWiki client not set')
147
- page.client.get(*links)
171
+ pages = links.group_by(&:interwiki)
172
+ .flat_map { |iw, ls| page.client.get(*ls.map(&:link), interwiki: iw) }
173
+ pages.count == 1 ? pages.first : Nodes[*pages]
148
174
  end
149
175
 
150
176
  # @private
@@ -173,7 +199,9 @@ module Infoboxer
173
199
  # @private
174
200
  # Internal, used by {Parser}
175
201
  def flow_templates
176
- make_nodes(map { |n| n.is_a?(Paragraph) ? n.to_templates? : n })
202
+ # TODO: will it be better?..
203
+ # make_nodes(map { |n| n.is_a?(Paragraph) ? n.to_templates? : n })
204
+ self
177
205
  end
178
206
 
179
207
  private
@@ -75,7 +75,7 @@ module Infoboxer
75
75
  # @private
76
76
  # Internal, used by {Parser}
77
77
  def to_templates
78
- children.select(&filter(itself: Template))
78
+ children.grep(Template)
79
79
  end
80
80
 
81
81
  # @private
@@ -13,12 +13,12 @@ module Infoboxer
13
13
 
14
14
  # All table rows.
15
15
  def rows
16
- children.select(&fltr(itself: TableRow))
16
+ children.grep(TableRow)
17
17
  end
18
18
 
19
19
  # Table caption, if exists.
20
20
  def caption
21
- children.detect(&fltr(itself: TableCaption))
21
+ children.grep(TableCaption).first
22
22
  end
23
23
 
24
24
  # For now, returns first table row, if it consists only of
@@ -26,12 +26,12 @@ module Infoboxer
26
26
  #
27
27
  # FIXME: it can easily be several table heading rows
28
28
  def heading_row
29
- rows.first if rows.first && rows.first.children.all?(&call(matches?: TableHeading))
29
+ rows.first if rows.first && rows.first.children.all? { |c| c.is_a?(TableHeading) }
30
30
  end
31
31
 
32
32
  # For now, returns all table rows except {#heading_row}
33
33
  def body_rows
34
- if rows.first && rows.first.children.all?(&call(matches?: TableHeading))
34
+ if rows.first && rows.first.children.all? { |c| c.is_a?(TableHeading) }
35
35
  rows[1..-1]
36
36
  else
37
37
  rows
@@ -39,19 +39,11 @@ module Infoboxer
39
39
  end
40
40
 
41
41
  def text
42
- table = Terminal::Table.new
43
- table.title = caption.text.sub(/\n+\Z/, '') if caption
44
-
45
- if heading_row
46
- table.headings = heading_row.children.map(&:text)
47
- .map(&call(sub: [/\n+\Z/, '']))
48
- end
49
-
50
- table.rows = body_rows.map { |r|
51
- r.children.map(&:text)
52
- .map(&call(sub: [/\n+\Z/, '']))
53
- }
54
- table.to_s + "\n\n"
42
+ Terminal::Table.new.tap { |table|
43
+ table.title = caption.text.sub(/\n+\Z/, '') if caption
44
+ table.headings = heading_row.children.map(&:text_) if heading_row
45
+ table.rows = body_rows.map { |r| r.children.map(&:text_) }
46
+ }.to_s + "\n\n"
55
47
  end
56
48
  end
57
49
 
@@ -17,10 +17,15 @@ module Infoboxer
17
17
  end
18
18
 
19
19
  # Internal, used by {Parser}
20
+ # Means even children-less Var should not be removed from parser tree.
20
21
  def empty?
21
22
  false
22
23
  end
23
24
 
25
+ def named?
26
+ name !~ /^\d+$/
27
+ end
28
+
24
29
  protected
25
30
 
26
31
  def descr
@@ -54,6 +59,7 @@ module Infoboxer
54
59
  # values.
55
60
  #
56
61
  # ### On variables naming
62
+ #
57
63
  # MediaWiki templates can contain _named_ and _unnamed_ variables.
58
64
  # Example:
59
65
  #
@@ -104,12 +110,16 @@ module Infoboxer
104
110
  # See {Var} class to understand what you can do with them.
105
111
  #
106
112
  # @return [Nodes<Var>]
107
- attr_reader :variables
113
+ # attr_reader :variables
114
+ alias_method :variables, :children
108
115
 
109
116
  def initialize(name, variables = Nodes[])
110
- super(Nodes[], extract_params(variables))
117
+ super(variables, extract_params(variables))
111
118
  @name = name
112
- @variables = Nodes[*variables].each { |v| v.parent = self }
119
+ end
120
+
121
+ def text
122
+ ''
113
123
  end
114
124
 
115
125
  # See {Node#to_tree}
@@ -133,7 +143,7 @@ module Infoboxer
133
143
  #
134
144
  # @return [Nodes<Var>]
135
145
  def unnamed_variables
136
- variables.find(name: /^\d+$/)
146
+ variables.reject(&:named?)
137
147
  end
138
148
 
139
149
  # Fetches template variable(s) by name(s) or patterns.
@@ -236,7 +246,7 @@ module Infoboxer
236
246
  def extract_params(vars)
237
247
  vars
238
248
  .select { |v| v.children.count == 1 && v.children.first.is_a?(Text) }
239
- .map { |v| [v.name, v.children.first.raw_text] }.to_h
249
+ .map { |v| [v.name.to_sym, v.children.first.raw_text] }.to_h
240
250
  end
241
251
 
242
252
  def inspect_variables(depth)
@@ -15,7 +15,7 @@ module Infoboxer
15
15
  # Text fragment without decodint of HTML entities.
16
16
  attr_accessor :raw_text
17
17
 
18
- def initialize(text, params = {})
18
+ def initialize(text, **params)
19
19
  super(params)
20
20
  @raw_text = text
21
21
  end
@@ -12,14 +12,23 @@ module Infoboxer
12
12
  # Note, that Wikilink is {Linkable}, so you can {Linkable#follow #follow}
13
13
  # it to obtain linked pages.
14
14
  class Wikilink < Link
15
- def initialize(*)
16
- super
17
- parse_link!
15
+ def initialize(link, label = nil, namespace: nil, interwiki: nil)
16
+ super(link, label, namespace: namespace, interwiki: interwiki)
17
+ @namespace = namespace || ''
18
+ @interwiki = interwiki
19
+ parse_name!
18
20
  end
19
21
 
20
22
  # "Clean" wikilink name, for ex., `Cities` for `[Category:Cities]`
21
23
  attr_reader :name
22
24
 
25
+ # Interwiki identifier. For example, `[[wikt:Argentina]]`
26
+ # will have `"Argentina"` as its {#name} and `"wikt"` (wiktionary) as an
27
+ # interwiki. TODO: how to use it.
28
+ #
29
+ # See [Wikipedia docs](https://en.wikipedia.org/wiki/Help:Interwiki_linking) for details.
30
+ attr_reader :interwiki
31
+
23
32
  # Wikilink namespace, `Category` for `[Category:Cities]`, empty
24
33
  # string (not `nil`!) for just `[Cities]`
25
34
  attr_reader :namespace
@@ -46,10 +55,8 @@ module Infoboxer
46
55
 
47
56
  private
48
57
 
49
- def parse_link!
50
- @name, @namespace = link.split(':', 2).reverse
51
- @namespace ||= ''
52
-
58
+ def parse_name!
59
+ @name = namespace.empty? ? link : link.sub(/^#{namespace}:/, '')
53
60
  @name, @anchor = @name.split('#', 2)
54
61
  @anchor ||= ''
55
62
 
@@ -2,7 +2,8 @@
2
2
 
3
3
  module Infoboxer
4
4
  MAJOR = 0
5
- MINOR = 2
6
- PATCH = 8
7
- VERSION = [MAJOR, MINOR, PATCH].join('.')
5
+ MINOR = 3
6
+ PATCH = 0
7
+ PRE = 'pre'.freeze # set to `nil` for normal releases
8
+ VERSION = [MAJOR, MINOR, PATCH, PRE].compact.join('.')
8
9
  end
@@ -0,0 +1,94 @@
1
+ module Infoboxer
2
+ # @private
3
+ class WikiPath
4
+ ParseError = Class.new(ArgumentError)
5
+
6
+ class << self
7
+ def _parse(string)
8
+ scanner = StringScanner.new(string)
9
+ res = []
10
+ loop do
11
+ res << scan_step(scanner)
12
+ break if scanner.eos?
13
+ end
14
+ res
15
+ end
16
+
17
+ def parse(string)
18
+ new(_parse(string))
19
+ end
20
+
21
+ private
22
+
23
+ def scan_step(scanner) # rubocop:disable Metrics/PerceivedComplexity
24
+ op = scanner.scan(%r{//?}) or unexpected(scanner, '/')
25
+ type = scanner.scan(/[A-Za-z_]*/)
26
+ attrs = {}
27
+ while scanner.scan(/\[/)
28
+ attr = scanner.scan(/[-a-z_0-9]+/) or unexpected(scanner, 'attribute name')
29
+ if scanner.scan(/\]/)
30
+ (attrs[:predicates] ||= []) << "#{attr}?".to_sym
31
+ next
32
+ end
33
+ scanner.scan(/\s*=\s*/) or unexpected(scanner, '= or ]')
34
+ value = scanner.scan(/[^\]]+/) # TODO: probably, should do a proper [] counting?..
35
+ scanner.scan(/\]/) or unexpected(scanner, ']')
36
+ attrs[attr.to_sym] = process_value(value)
37
+ end
38
+ res = op == '//' ? {op: :lookup} : {}
39
+ res[:type] = process_type(type) unless type.empty?
40
+ res.merge(attrs) # TODO: raise if empty selector
41
+ end
42
+
43
+ def process_value(value)
44
+ case value
45
+ when /^'(.*)'$/, /^"(.*)"$/
46
+ Regexp.last_match(1)
47
+ when %r{^/(.+)/$}
48
+ Regexp.new(Regexp.last_match(1))
49
+ else
50
+ value
51
+ end
52
+ end
53
+
54
+ def process_type(type)
55
+ type.gsub(/(?:^|_)([a-z])/, &:upcase).tr('_', '').to_sym
56
+ .tap { |t| valid_type?(t) or fail(ParseError, "Unrecognized node type: #{type}") }
57
+ end
58
+
59
+ def valid_type?(t)
60
+ t == :Section || Infoboxer::Tree.const_defined?(t)
61
+ end
62
+
63
+ def unexpected(scanner, expected)
64
+ place = scanner.eos? ? 'end of pattern' : scanner.rest.inspect
65
+ fail ParseError, "Unexpected #{place}, expecting #{expected}"
66
+ end
67
+ end
68
+
69
+ def initialize(path)
70
+ @path = path
71
+ end
72
+
73
+ def call(node)
74
+ @path.inject(node) { |res, step| apply_step(res, step) }
75
+ end
76
+
77
+ private
78
+
79
+ def apply_step(node, step)
80
+ # TODO: "compile" the op/args sequences at WikiPath initialization
81
+ step = step.dup
82
+ op = step.delete(:op) || :lookup_children
83
+ args = []
84
+ if (t = step.delete(:type))
85
+ args << t
86
+ end
87
+ if (pred = step.delete(:predicates))
88
+ args.concat(pred)
89
+ end
90
+ args << step unless step.empty?
91
+ node.send(op, *args)
92
+ end
93
+ end
94
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: infoboxer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.8
4
+ version: 0.3.0.pre
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Shepelev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-05-11 00:00:00.000000000 Z
11
+ date: 2017-09-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities
@@ -24,20 +24,6 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
- - !ruby/object:Gem::Dependency
28
- name: procme
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :runtime
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
27
  - !ruby/object:Gem::Dependency
42
28
  name: mediawiktory
43
29
  requirement: !ruby/object:Gem::Requirement
@@ -115,6 +101,7 @@ files:
115
101
  - lib/infoboxer/navigation/sections.rb
116
102
  - lib/infoboxer/navigation/selector.rb
117
103
  - lib/infoboxer/navigation/shortcuts.rb
104
+ - lib/infoboxer/navigation/wikipath.rb
118
105
  - lib/infoboxer/parser.rb
119
106
  - lib/infoboxer/parser/context.rb
120
107
  - lib/infoboxer/parser/html.rb
@@ -130,6 +117,7 @@ files:
130
117
  - lib/infoboxer/tree.rb
131
118
  - lib/infoboxer/tree/compound.rb
132
119
  - lib/infoboxer/tree/document.rb
120
+ - lib/infoboxer/tree/gallery.rb
133
121
  - lib/infoboxer/tree/html.rb
134
122
  - lib/infoboxer/tree/image.rb
135
123
  - lib/infoboxer/tree/inline.rb
@@ -145,6 +133,7 @@ files:
145
133
  - lib/infoboxer/tree/text.rb
146
134
  - lib/infoboxer/tree/wikilink.rb
147
135
  - lib/infoboxer/version.rb
136
+ - lib/infoboxer/wiki_path.rb
148
137
  - profile/out/.gitkeep
149
138
  - profile/pages/argentina.txt
150
139
  - profile/pages/canada.wiki
@@ -177,9 +166,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
177
166
  version: 2.1.0
178
167
  required_rubygems_version: !ruby/object:Gem::Requirement
179
168
  requirements:
180
- - - ">="
169
+ - - ">"
181
170
  - !ruby/object:Gem::Version
182
- version: '0'
171
+ version: 1.3.1
183
172
  requirements: []
184
173
  rubyforge_project:
185
174
  rubygems_version: 2.6.10