infoboxer 0.3.0 → 0.3.1.pre
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +0 -15
- data/CHANGELOG.md +11 -0
- data/lib/infoboxer.rb +4 -4
- data/lib/infoboxer/media_wiki.rb +26 -16
- data/lib/infoboxer/media_wiki/traits.rb +66 -20
- data/lib/infoboxer/navigation/lookup.rb +11 -1
- data/lib/infoboxer/navigation/sections.rb +10 -6
- data/lib/infoboxer/navigation/selector.rb +12 -4
- data/lib/infoboxer/parser/inline.rb +42 -2
- data/lib/infoboxer/parser/paragraphs.rb +1 -1
- data/lib/infoboxer/parser/template.rb +3 -3
- data/lib/infoboxer/parser/util.rb +1 -0
- data/lib/infoboxer/tree.rb +1 -1
- data/lib/infoboxer/tree/compound.rb +1 -1
- data/lib/infoboxer/tree/gallery.rb +12 -0
- data/lib/infoboxer/tree/image.rb +2 -2
- data/lib/infoboxer/tree/inline.rb +2 -2
- data/lib/infoboxer/tree/linkable.rb +4 -1
- data/lib/infoboxer/tree/math.rb +0 -3
- data/lib/infoboxer/tree/node.rb +2 -2
- data/lib/infoboxer/tree/nodes.rb +32 -4
- data/lib/infoboxer/tree/template.rb +6 -2
- data/lib/infoboxer/tree/text.rb +1 -1
- data/lib/infoboxer/tree/wikilink.rb +14 -7
- data/lib/infoboxer/version.rb +3 -2
- data/lib/infoboxer/wiki_path.rb +10 -1
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0ebc38c153d481aca588625caf2bcb576046afa9
|
4
|
+
data.tar.gz: 4f0ccc3b3130403a7a1e35adfc3977d954ac5a66
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4593f04e93b2714f13f9abb68cb42cec84ad258d786c815a99ca95d150ba55d597106d1e9e71d3802177cce4a4ad5bf1de0b1f5fa4cc733af19d673fbc600945
|
7
|
+
data.tar.gz: 1dc93e4fc257a4cee3fd2c9eb263b24f1a9d8b755a3765bfdf71d1617f7ba9a4ee7d9323502dbee23e48dc85bb6899facb7eadcc62ccc386d958dfca6f28fe5e
|
data/.rubocop_todo.yml
CHANGED
@@ -1,16 +1 @@
|
|
1
|
-
# This configuration was generated by
|
2
|
-
# `rubocop --auto-gen-config`
|
3
|
-
# on 2017-06-23 13:52:16 +0300 using RuboCop version 0.49.1.
|
4
|
-
# The point is for the user to remove these configuration records
|
5
|
-
# one by one as the offenses are removed from the code base.
|
6
|
-
# Note that changes in the inspected code, or installation of new
|
7
|
-
# versions of RuboCop, may require this file to be generated again.
|
8
|
-
|
9
|
-
# Offense count: 1
|
10
|
-
Metrics/AbcSize:
|
11
|
-
Max: 29
|
12
|
-
|
13
|
-
# Offense count: 1
|
14
|
-
Metrics/PerceivedComplexity:
|
15
|
-
Max: 10
|
16
1
|
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,16 @@
|
|
1
1
|
# Infoboxer's change log
|
2
2
|
|
3
|
+
## 0.3.1.pre (2017-09-16)
|
4
|
+
|
5
|
+
* Introduce interwiki links following (and proper handling of interwikis, in general);
|
6
|
+
* Add `<gallery>` tag support;
|
7
|
+
* Introduce `Navigation::Selector#===`;
|
8
|
+
* Much more `Enumerable`'s methods supported by `Nodes`;
|
9
|
+
* Lot of small simplifications, cleanups and bugfixes.
|
10
|
+
|
11
|
+
TBH, it should be 0.4.0 or more, but it would be a shame to change versions so fast :) So, at least
|
12
|
+
until it is `-pre`, let it be 0.3.1.
|
13
|
+
|
3
14
|
## 0.3.0 (2017-07-23)
|
4
15
|
|
5
16
|
* Change logic of navigation through templates; now templates contents aren't hidden from global
|
data/lib/infoboxer.rb
CHANGED
@@ -72,8 +72,8 @@ module Infoboxer
|
|
72
72
|
end
|
73
73
|
|
74
74
|
# Includeable version of {Infoboxer.wiki}
|
75
|
-
def wiki(api_url, options
|
76
|
-
wikis[api_url] ||= MediaWiki.new(api_url, options
|
75
|
+
def wiki(api_url, **options)
|
76
|
+
wikis[api_url] ||= MediaWiki.new(api_url, options)
|
77
77
|
end
|
78
78
|
|
79
79
|
class << self
|
@@ -168,7 +168,7 @@ module Infoboxer
|
|
168
168
|
end
|
169
169
|
|
170
170
|
WIKIMEDIA_PROJECTS.each do |name, domain|
|
171
|
-
define_method name do |lang = 'en', options
|
171
|
+
define_method name do |lang = 'en', **options|
|
172
172
|
lang, options = 'en', lang if lang.is_a?(Hash)
|
173
173
|
|
174
174
|
wiki("https://#{lang}.#{domain}/w/api.php", options)
|
@@ -178,7 +178,7 @@ module Infoboxer
|
|
178
178
|
alias_method :wp, :wikipedia
|
179
179
|
|
180
180
|
WIKIMEDIA_COMMONS.each do |name, domain|
|
181
|
-
define_method name do
|
181
|
+
define_method name do |**options|
|
182
182
|
wiki("https://#{domain}/w/api.php", options)
|
183
183
|
end
|
184
184
|
end
|
data/lib/infoboxer/media_wiki.rb
CHANGED
@@ -47,15 +47,14 @@ module Infoboxer
|
|
47
47
|
# for it, as well as shortcuts for some well-known wikis, like
|
48
48
|
# {Infoboxer.wikipedia}.
|
49
49
|
#
|
50
|
-
# @param api_base_url URL of `api.php` file in your MediaWiki
|
50
|
+
# @param api_base_url [String] URL of `api.php` file in your MediaWiki
|
51
51
|
# installation. Typically, its `<domain>/w/api.php`, but can vary
|
52
52
|
# in different wikis.
|
53
|
-
# @param
|
54
|
-
|
55
|
-
def initialize(api_base_url, options = {})
|
53
|
+
# @param user_agent [String] (also aliased as `:ua`) Custom User-Agent header.
|
54
|
+
def initialize(api_base_url, ua: nil, user_agent: ua)
|
56
55
|
@api_base_url = Addressable::URI.parse(api_base_url)
|
57
|
-
@client = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(
|
58
|
-
@traits = Traits.get(@api_base_url.host,
|
56
|
+
@client = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(user_agent))
|
57
|
+
@traits = Traits.get(@api_base_url.host, siteinfo)
|
59
58
|
end
|
60
59
|
|
61
60
|
# Receive "raw" data from Wikipedia (without parsing or wrapping in
|
@@ -123,7 +122,9 @@ module Infoboxer
|
|
123
122
|
# and obtain meaningful results instead of `NoMethodError` or
|
124
123
|
# `SomethingNotFound`.
|
125
124
|
#
|
126
|
-
def get(*titles, prop: [])
|
125
|
+
def get(*titles, prop: [], interwiki: nil)
|
126
|
+
return interwikis(interwiki).get(*titles, prop: prop) if interwiki
|
127
|
+
|
127
128
|
pages = get_h(*titles, prop: prop).values.compact
|
128
129
|
titles.count == 1 ? pages.first : Tree::Nodes[*pages]
|
129
130
|
end
|
@@ -251,17 +252,26 @@ module Infoboxer
|
|
251
252
|
[namespace, titl].join(':')
|
252
253
|
end
|
253
254
|
|
254
|
-
def user_agent(
|
255
|
-
|
255
|
+
def user_agent(custom)
|
256
|
+
custom || self.class.user_agent || UA
|
257
|
+
end
|
258
|
+
|
259
|
+
def siteinfo
|
260
|
+
@siteinfo ||= @client.query.meta(:siteinfo).prop(:namespaces, :namespacealiases, :interwikimap).response.to_h
|
256
261
|
end
|
257
262
|
|
258
|
-
def
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
263
|
+
def interwikis(prefix)
|
264
|
+
@interwikis ||= Hash.new { |h, pre|
|
265
|
+
interwiki = siteinfo['interwikimap'].detect { |iw| iw['prefix'] == prefix } or
|
266
|
+
fail ArgumentError, "Undefined interwiki: #{prefix}"
|
267
|
+
|
268
|
+
# FIXME: fragile, but what can we do?..
|
269
|
+
m = interwiki['url'].match(%r{^(.+)/wiki/\$1$}) or
|
270
|
+
fail ArgumentError, "Interwiki #{interwiki} seems not to be a MediaWiki instance"
|
271
|
+
h[pre] = self.class.new("#{m[1]}/w/api.php") # TODO: copy useragent
|
272
|
+
}
|
273
|
+
|
274
|
+
@interwikis[prefix]
|
265
275
|
end
|
266
276
|
end
|
267
277
|
end
|
@@ -34,9 +34,8 @@ module Infoboxer
|
|
34
34
|
end
|
35
35
|
|
36
36
|
# @private
|
37
|
-
def get(domain,
|
38
|
-
|
39
|
-
cls ? cls.new(options) : Traits.new(options)
|
37
|
+
def get(domain, site_info = {})
|
38
|
+
(Traits.domains[domain] || Traits).new(site_info)
|
40
39
|
end
|
41
40
|
|
42
41
|
# @private
|
@@ -68,18 +67,27 @@ module Infoboxer
|
|
68
67
|
alias_method :default, :new
|
69
68
|
end
|
70
69
|
|
71
|
-
def initialize(
|
72
|
-
@
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
70
|
+
def initialize(site_info = {})
|
71
|
+
@site_info = site_info
|
72
|
+
end
|
73
|
+
|
74
|
+
def namespace?(prefix)
|
75
|
+
known_namespaces.include?(prefix)
|
76
|
+
end
|
77
|
+
|
78
|
+
def interwiki?(prefix)
|
79
|
+
known_interwikis.key?(prefix)
|
80
|
+
end
|
81
|
+
|
82
|
+
# @private
|
83
|
+
def file_namespace
|
84
|
+
@file_namespace ||= ns_aliases('File')
|
79
85
|
end
|
80
86
|
|
81
87
|
# @private
|
82
|
-
|
88
|
+
def category_namespace
|
89
|
+
@category_namespace ||= ns_aliases('Category')
|
90
|
+
end
|
83
91
|
|
84
92
|
# @private
|
85
93
|
def templates
|
@@ -88,16 +96,54 @@ module Infoboxer
|
|
88
96
|
|
89
97
|
private
|
90
98
|
|
91
|
-
def
|
92
|
-
|
93
|
-
|
94
|
-
|
99
|
+
def known_namespaces
|
100
|
+
@known_namespaces ||=
|
101
|
+
if @site_info.empty?
|
102
|
+
STANDARD_NAMESPACES
|
103
|
+
else
|
104
|
+
(@site_info['namespaces'].values + @site_info['namespacealiases']).map { |n| n['*'] }
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def known_interwikis
|
109
|
+
@known_interwikis ||=
|
110
|
+
if @site_info.empty?
|
111
|
+
{}
|
112
|
+
else
|
113
|
+
@site_info['interwikimap'].map { |iw| [iw['prefix'], iw] }.to_h
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def ns_aliases(base)
|
118
|
+
return [base] if @site_info.empty?
|
119
|
+
main = @site_info['namespaces'].values.detect { |n| n['canonical'] == base }
|
120
|
+
[base, main['*']] +
|
121
|
+
@site_info['namespacealiases']
|
122
|
+
.select { |a| a['id'] == main['id'] }.flat_map { |n| n['*'] }
|
123
|
+
.compact.uniq
|
95
124
|
end
|
96
125
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
126
|
+
# See https://www.mediawiki.org/wiki/Help:Namespaces#Standard_namespaces
|
127
|
+
STANDARD_NAMESPACES = [
|
128
|
+
'Media', # Direct linking to media files.
|
129
|
+
'Special', # Special (non-editable) pages.
|
130
|
+
'', # (Main)
|
131
|
+
'Talk', # Article discussion.
|
132
|
+
'User', #
|
133
|
+
'User talk', #
|
134
|
+
'Project', # Meta-discussions related to the operation and development of the wiki.
|
135
|
+
'Project talk', #
|
136
|
+
'File', # Metadata for images, videos, sound files and other media.
|
137
|
+
'File talk', #
|
138
|
+
'MediaWiki', # System messages and other important content.
|
139
|
+
'MediaWiki talk', #
|
140
|
+
'Template', # Templates: blocks of text or wikicode that are intended to be transcluded.
|
141
|
+
'Template talk', #
|
142
|
+
'Help', # Help files, instructions and "how-to" guides.
|
143
|
+
'Help talk', #
|
144
|
+
'Category', # Categories: dynamic lists of other pages.
|
145
|
+
'Category talk', #
|
146
|
+
].freeze
|
101
147
|
end
|
102
148
|
end
|
103
149
|
end
|
@@ -98,9 +98,13 @@ module Infoboxer
|
|
98
98
|
# Selects matching nodes from current node's siblings, which
|
99
99
|
# are above current node in parents children list.
|
100
100
|
|
101
|
+
# @!method lookup_prev_sibling(*selectors, &block)
|
102
|
+
# Selects first matching nodes from current node's siblings, which
|
103
|
+
# are above current node in parents children list.
|
104
|
+
|
101
105
|
# Underscored version of {#matches?}
|
102
106
|
def _matches?(selector)
|
103
|
-
selector
|
107
|
+
selector === self
|
104
108
|
end
|
105
109
|
|
106
110
|
# Underscored version of {#lookup}
|
@@ -136,6 +140,11 @@ module Infoboxer
|
|
136
140
|
prev_siblings._find(selector)
|
137
141
|
end
|
138
142
|
|
143
|
+
# Underscored version of {#lookup_prev_sibling}
|
144
|
+
def _lookup_prev_sibling(selector)
|
145
|
+
prev_siblings.reverse.detect { |n| selector === n }
|
146
|
+
end
|
147
|
+
|
139
148
|
# Underscored version of {#lookup_next_siblings}
|
140
149
|
def _lookup_next_siblings(selector)
|
141
150
|
next_siblings._find(selector)
|
@@ -146,6 +155,7 @@ module Infoboxer
|
|
146
155
|
lookup lookup_children lookup_parents
|
147
156
|
lookup_siblings
|
148
157
|
lookup_next_siblings lookup_prev_siblings
|
158
|
+
lookup_prev_sibling
|
149
159
|
]
|
150
160
|
.map { |sym| [sym, :"_#{sym}"] }
|
151
161
|
.each do |sym, underscored|
|
@@ -123,21 +123,25 @@ module Infoboxer
|
|
123
123
|
#
|
124
124
|
# @return {Tree::Nodes<Section>}
|
125
125
|
def in_sections
|
126
|
-
|
126
|
+
return parent.in_sections unless parent.is_a?(Tree::Document)
|
127
|
+
return @in_sections if @in_sections
|
127
128
|
|
128
129
|
heading =
|
129
|
-
if
|
130
|
-
|
130
|
+
if is_a?(Tree::Heading)
|
131
|
+
lookup_prev_sibling(Tree::Heading, level: level - 1)
|
131
132
|
else
|
132
|
-
|
133
|
+
lookup_prev_sibling(Tree::Heading)
|
133
134
|
end
|
134
|
-
|
135
|
+
unless heading
|
136
|
+
@in_sections = Tree::Nodes[]
|
137
|
+
return @in_sections
|
138
|
+
end
|
135
139
|
|
136
140
|
body = heading.next_siblings
|
137
141
|
.take_while { |n| !n.is_a?(Tree::Heading) || n.level < heading.level }
|
138
142
|
|
139
143
|
section = Section.new(heading, body)
|
140
|
-
Tree::Nodes[section, *heading.in_sections]
|
144
|
+
@in_sections = Tree::Nodes[section, *heading.in_sections]
|
141
145
|
end
|
142
146
|
end
|
143
147
|
|
@@ -24,8 +24,8 @@ module Infoboxer
|
|
24
24
|
"#<Selector(#{@arg.map(&:to_s).join(', ')})>"
|
25
25
|
end
|
26
26
|
|
27
|
-
def
|
28
|
-
@arg.all? { |a| arg_matches?(a,
|
27
|
+
def ===(other)
|
28
|
+
@arg.all? { |a| arg_matches?(a, other) }
|
29
29
|
end
|
30
30
|
|
31
31
|
private
|
@@ -44,8 +44,8 @@ module Infoboxer
|
|
44
44
|
check.call(node)
|
45
45
|
when Hash
|
46
46
|
check.all? { |attr, value|
|
47
|
-
node.respond_to?(attr) && value
|
48
|
-
node.params.key?(attr) && value
|
47
|
+
node.respond_to?(attr) && value_matches?(value, node.send(attr)) ||
|
48
|
+
node.params.key?(attr) && value_matches?(value, node.params[attr])
|
49
49
|
}
|
50
50
|
when Symbol
|
51
51
|
node.respond_to?(check) && node.send(check)
|
@@ -53,6 +53,14 @@ module Infoboxer
|
|
53
53
|
check === node
|
54
54
|
end
|
55
55
|
end
|
56
|
+
|
57
|
+
def value_matches?(matcher, value)
|
58
|
+
if matcher.is_a?(String) && value.is_a?(String)
|
59
|
+
matcher.casecmp(value).zero?
|
60
|
+
else
|
61
|
+
matcher === value
|
62
|
+
end
|
63
|
+
end
|
56
64
|
end
|
57
65
|
end
|
58
66
|
end
|
@@ -83,7 +83,7 @@ module Infoboxer
|
|
83
83
|
|
84
84
|
private
|
85
85
|
|
86
|
-
def inline_formatting(match)
|
86
|
+
def inline_formatting(match) # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/AbcSize
|
87
87
|
case match
|
88
88
|
when "'''''"
|
89
89
|
BoldItalic.new(short_inline(/'''''/))
|
@@ -109,6 +109,8 @@ module Infoboxer
|
|
109
109
|
reference(Regexp.last_match(1))
|
110
110
|
when /<math>/
|
111
111
|
math
|
112
|
+
when /<gallery([^>]*)>/
|
113
|
+
gallery(Regexp.last_match(1))
|
112
114
|
when '<'
|
113
115
|
html || Text.new(match) # it was not HTML, just accidental <
|
114
116
|
else
|
@@ -126,8 +128,18 @@ module Infoboxer
|
|
126
128
|
caption = inline(/\]\]/)
|
127
129
|
@context.pop_eol_sign
|
128
130
|
end
|
131
|
+
name, namespace = link.split(':', 2).reverse
|
132
|
+
lnk, params =
|
133
|
+
if @context.traits.namespace?(namespace)
|
134
|
+
[link, {namespace: namespace}]
|
135
|
+
elsif @context.traits.interwiki?(namespace)
|
136
|
+
[name, {interwiki: namespace}]
|
137
|
+
else
|
138
|
+
[link, {}]
|
139
|
+
end
|
129
140
|
|
130
|
-
|
141
|
+
puts @context.rest if lnk.nil?
|
142
|
+
Wikilink.new(lnk, caption, **params)
|
131
143
|
end
|
132
144
|
|
133
145
|
# http://en.wikipedia.org/wiki/Help:Link#External_links
|
@@ -159,6 +171,34 @@ module Infoboxer
|
|
159
171
|
Text.new(@context.scan_continued_until(%r{</nowiki>}))
|
160
172
|
end
|
161
173
|
end
|
174
|
+
|
175
|
+
def gallery(tag_rest)
|
176
|
+
params = parse_params(tag_rest)
|
177
|
+
images = []
|
178
|
+
guarded_loop do
|
179
|
+
@context.next! if @context.eol?
|
180
|
+
path = @context.scan_until(%r{</gallery>|\||$})
|
181
|
+
attrs = @context.matched == '|' ? gallery_image_attrs : {}
|
182
|
+
unless path.empty?
|
183
|
+
images << Tree::Image.new(path.sub(/^#{re.file_namespace}/, ''), attrs)
|
184
|
+
end
|
185
|
+
break if @context.matched == '</gallery>'
|
186
|
+
end
|
187
|
+
Gallery.new(images, params)
|
188
|
+
end
|
189
|
+
|
190
|
+
def gallery_image_attrs
|
191
|
+
nodes = []
|
192
|
+
|
193
|
+
guarded_loop do
|
194
|
+
nodes << short_inline(%r{\||</gallery>})
|
195
|
+
break if @context.eol? || @context.matched?(%r{</gallery>})
|
196
|
+
end
|
197
|
+
|
198
|
+
nodes.map(&method(:image_attr))
|
199
|
+
.inject(&:merge)
|
200
|
+
.reject { |_k, v| v.nil? || v.empty? }
|
201
|
+
end
|
162
202
|
end
|
163
203
|
|
164
204
|
require_relative 'image'
|
@@ -29,8 +29,8 @@ module Infoboxer
|
|
29
29
|
|
30
30
|
guarded_loop do
|
31
31
|
@context.next! while @context.eol?
|
32
|
-
if @context.check(/\s*([
|
33
|
-
name = @context.scan(/\s*([
|
32
|
+
if @context.check(/\s*([^=}|<]+)\s*=\s*/)
|
33
|
+
name = @context.scan(/\s*([^=]+)/).strip
|
34
34
|
@context.skip(/\s*=\s*/)
|
35
35
|
else
|
36
36
|
name = num
|
@@ -52,7 +52,7 @@ module Infoboxer
|
|
52
52
|
end
|
53
53
|
|
54
54
|
def sanitize_value(nodes)
|
55
|
-
nodes.pop if nodes.last.is_a?(Pre) && nodes.last.text =~ /^\s*$/ # FIXME: dirty!
|
55
|
+
nodes.pop if (nodes.last.is_a?(Pre) || nodes.last.is_a?(Text)) && nodes.last.text =~ /^\s*$/ # FIXME: dirty!
|
56
56
|
nodes
|
57
57
|
end
|
58
58
|
end
|
data/lib/infoboxer/tree.rb
CHANGED
@@ -63,7 +63,7 @@ module Infoboxer
|
|
63
63
|
require_relative 'tree/nodes'
|
64
64
|
|
65
65
|
%w[text compound inline
|
66
|
-
image html paragraphs list template table ref math
|
66
|
+
image gallery html paragraphs list template table ref math
|
67
67
|
document].each do |type|
|
68
68
|
require_relative "tree/#{type}"
|
69
69
|
end
|
@@ -4,7 +4,7 @@ module Infoboxer
|
|
4
4
|
module Tree
|
5
5
|
# Base class for all nodes with children.
|
6
6
|
class Compound < Node
|
7
|
-
def initialize(children = Nodes.new, params
|
7
|
+
def initialize(children = Nodes.new, **params)
|
8
8
|
super(params)
|
9
9
|
@children = Nodes[*children]
|
10
10
|
@children.each { |c| c.parent = self }
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Infoboxer
|
4
|
+
module Tree
|
5
|
+
# Represents gallery of images (contents of `<gallery>` special tag).
|
6
|
+
#
|
7
|
+
# See [Wikipedia Tutorial](https://en.wikipedia.org/wiki/Help:Gallery_tag)
|
8
|
+
# for explanation of attributes.
|
9
|
+
class Gallery < Compound
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
data/lib/infoboxer/tree/image.rb
CHANGED
@@ -7,8 +7,8 @@ module Infoboxer
|
|
7
7
|
# See [Wikipedia Tutorial](https://en.wikipedia.org/wiki/Wikipedia:Extended_image_syntax)
|
8
8
|
# for explanation of attributes.
|
9
9
|
class Image < Node
|
10
|
-
def initialize(path,
|
11
|
-
@caption =
|
10
|
+
def initialize(path, caption: nil, **params)
|
11
|
+
@caption = caption
|
12
12
|
super({path: path}.merge(params))
|
13
13
|
end
|
14
14
|
|
@@ -17,8 +17,8 @@ module Infoboxer
|
|
17
17
|
|
18
18
|
# Base class for internal/external links,
|
19
19
|
class Link < Compound
|
20
|
-
def initialize(link, label = nil)
|
21
|
-
super(label || Nodes.new([Text.new(link)]), link: link)
|
20
|
+
def initialize(link, label = nil, **attr)
|
21
|
+
super(label || Nodes.new([Text.new(link)]), link: link, **attr)
|
22
22
|
end
|
23
23
|
|
24
24
|
# @!attribute [r] link
|
@@ -15,7 +15,7 @@ module Infoboxer
|
|
15
15
|
# * {Tree::Nodes#follow} for extracting multiple links at once;
|
16
16
|
# * {MediaWiki#get} for basic information on page extraction.
|
17
17
|
def follow
|
18
|
-
client.get(link)
|
18
|
+
client.get(link, interwiki: interwiki)
|
19
19
|
end
|
20
20
|
|
21
21
|
# Human-readable page URL
|
@@ -28,6 +28,9 @@ module Infoboxer
|
|
28
28
|
|
29
29
|
protected
|
30
30
|
|
31
|
+
# redefined in {Wikilink}
|
32
|
+
def interwiki; end
|
33
|
+
|
31
34
|
def page
|
32
35
|
lookup_parents(MediaWiki::Page).first or fail('Not in a page from real source')
|
33
36
|
end
|
data/lib/infoboxer/tree/math.rb
CHANGED
data/lib/infoboxer/tree/node.rb
CHANGED
@@ -11,7 +11,7 @@ module Infoboxer
|
|
11
11
|
# you will receive it from tree and use for navigations.
|
12
12
|
#
|
13
13
|
class Node
|
14
|
-
def initialize(params
|
14
|
+
def initialize(**params)
|
15
15
|
@params = params
|
16
16
|
end
|
17
17
|
|
@@ -154,7 +154,7 @@ module Infoboxer
|
|
154
154
|
end
|
155
155
|
|
156
156
|
def show_params(prms = nil)
|
157
|
-
(prms || params).map { |k, v| "#{k}: #{v.inspect}" }.join(', ')
|
157
|
+
(prms || params).reject { |_, v| v.nil? }.map { |k, v| "#{k}: #{v.inspect}" }.join(', ')
|
158
158
|
end
|
159
159
|
|
160
160
|
def indent(level)
|
data/lib/infoboxer/tree/nodes.rb
CHANGED
@@ -38,10 +38,19 @@ module Infoboxer
|
|
38
38
|
# @!method compact
|
39
39
|
# Just like Array#compact, but returns Nodes
|
40
40
|
|
41
|
+
# @!method grep(pattern)
|
42
|
+
# Just like Array#grep, but returns Nodes
|
43
|
+
|
44
|
+
# @!method grep_v(pattern)
|
45
|
+
# Just like Array#grep_v, but returns Nodes
|
46
|
+
|
41
47
|
# @!method -(other)
|
42
48
|
# Just like Array#-, but returns Nodes
|
43
49
|
|
44
|
-
|
50
|
+
# @!method +(other)
|
51
|
+
# Just like Array#+, but returns Nodes
|
52
|
+
|
53
|
+
%i[select reject sort_by flatten compact grep grep_v - +].each do |sym|
|
45
54
|
define_method(sym) do |*args, &block|
|
46
55
|
Nodes[*super(*args, &block)]
|
47
56
|
end
|
@@ -75,6 +84,21 @@ module Infoboxer
|
|
75
84
|
end
|
76
85
|
end
|
77
86
|
|
87
|
+
# Just like Array#flat_map, but returns Nodes, **if** all map results are Node
|
88
|
+
def flat_map
|
89
|
+
res = super
|
90
|
+
if res.all? { |n| n.is_a?(Node) || n.is_a?(Nodes) }
|
91
|
+
Nodes[*res]
|
92
|
+
else
|
93
|
+
res
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# Just like Array#group, but returns hash with `{<grouping variable> => Nodes}`
|
98
|
+
def group_by
|
99
|
+
super.map { |title, group| [title, Nodes[*group]] }.to_h
|
100
|
+
end
|
101
|
+
|
78
102
|
# @!method prev_siblings
|
79
103
|
# Previous siblings (flat list) of all nodes inside.
|
80
104
|
|
@@ -139,12 +163,14 @@ module Infoboxer
|
|
139
163
|
# @return [Nodes<MediaWiki::Page>] It is still `Nodes`, so you
|
140
164
|
# still can process them uniformely.
|
141
165
|
def follow
|
142
|
-
links =
|
166
|
+
links = grep(Linkable)
|
143
167
|
return Nodes[] if links.empty?
|
144
168
|
page = first.lookup_parents(MediaWiki::Page).first or
|
145
169
|
fail('Not in a page from real source')
|
146
170
|
page.client or fail('MediaWiki client not set')
|
147
|
-
|
171
|
+
pages = links.group_by(&:interwiki)
|
172
|
+
.flat_map { |iw, ls| page.client.get(*ls.map(&:link), interwiki: iw) }
|
173
|
+
pages.count == 1 ? pages.first : Nodes[*pages]
|
148
174
|
end
|
149
175
|
|
150
176
|
# @private
|
@@ -173,7 +199,9 @@ module Infoboxer
|
|
173
199
|
# @private
|
174
200
|
# Internal, used by {Parser}
|
175
201
|
def flow_templates
|
176
|
-
|
202
|
+
# TODO: will it be better?..
|
203
|
+
# make_nodes(map { |n| n.is_a?(Paragraph) ? n.to_templates? : n })
|
204
|
+
self
|
177
205
|
end
|
178
206
|
|
179
207
|
private
|
@@ -22,6 +22,10 @@ module Infoboxer
|
|
22
22
|
false
|
23
23
|
end
|
24
24
|
|
25
|
+
def named?
|
26
|
+
name !~ /^\d+$/
|
27
|
+
end
|
28
|
+
|
25
29
|
protected
|
26
30
|
|
27
31
|
def descr
|
@@ -139,7 +143,7 @@ module Infoboxer
|
|
139
143
|
#
|
140
144
|
# @return [Nodes<Var>]
|
141
145
|
def unnamed_variables
|
142
|
-
variables.
|
146
|
+
variables.reject(&:named?)
|
143
147
|
end
|
144
148
|
|
145
149
|
# Fetches template variable(s) by name(s) or patterns.
|
@@ -242,7 +246,7 @@ module Infoboxer
|
|
242
246
|
def extract_params(vars)
|
243
247
|
vars
|
244
248
|
.select { |v| v.children.count == 1 && v.children.first.is_a?(Text) }
|
245
|
-
.map { |v| [v.name, v.children.first.raw_text] }.to_h
|
249
|
+
.map { |v| [v.name.to_sym, v.children.first.raw_text] }.to_h
|
246
250
|
end
|
247
251
|
|
248
252
|
def inspect_variables(depth)
|
data/lib/infoboxer/tree/text.rb
CHANGED
@@ -12,14 +12,23 @@ module Infoboxer
|
|
12
12
|
# Note, that Wikilink is {Linkable}, so you can {Linkable#follow #follow}
|
13
13
|
# it to obtain linked pages.
|
14
14
|
class Wikilink < Link
|
15
|
-
def initialize(
|
16
|
-
super
|
17
|
-
|
15
|
+
def initialize(link, label = nil, namespace: nil, interwiki: nil)
|
16
|
+
super(link, label, namespace: namespace, interwiki: interwiki)
|
17
|
+
@namespace = namespace || ''
|
18
|
+
@interwiki = interwiki
|
19
|
+
parse_name!
|
18
20
|
end
|
19
21
|
|
20
22
|
# "Clean" wikilink name, for ex., `Cities` for `[Category:Cities]`
|
21
23
|
attr_reader :name
|
22
24
|
|
25
|
+
# Interwiki identifier. For example, `[[wikt:Argentina]]`
|
26
|
+
# will have `"Argentina"` as its {#name} and `"wikt"` (wiktionary) as an
|
27
|
+
# interwiki. TODO: how to use it.
|
28
|
+
#
|
29
|
+
# See [Wikipedia docs](https://en.wikipedia.org/wiki/Help:Interwiki_linking) for details.
|
30
|
+
attr_reader :interwiki
|
31
|
+
|
23
32
|
# Wikilink namespace, `Category` for `[Category:Cities]`, empty
|
24
33
|
# string (not `nil`!) for just `[Cities]`
|
25
34
|
attr_reader :namespace
|
@@ -46,10 +55,8 @@ module Infoboxer
|
|
46
55
|
|
47
56
|
private
|
48
57
|
|
49
|
-
def
|
50
|
-
@name
|
51
|
-
@namespace ||= ''
|
52
|
-
|
58
|
+
def parse_name!
|
59
|
+
@name = namespace.empty? ? link : link.sub(/^#{namespace}:/, '')
|
53
60
|
@name, @anchor = @name.split('#', 2)
|
54
61
|
@anchor ||= ''
|
55
62
|
|
data/lib/infoboxer/version.rb
CHANGED
data/lib/infoboxer/wiki_path.rb
CHANGED
@@ -36,7 +36,7 @@ module Infoboxer
|
|
36
36
|
attrs[attr.to_sym] = process_value(value)
|
37
37
|
end
|
38
38
|
res = op == '//' ? {op: :lookup} : {}
|
39
|
-
res[:type] = type
|
39
|
+
res[:type] = process_type(type) unless type.empty?
|
40
40
|
res.merge(attrs) # TODO: raise if empty selector
|
41
41
|
end
|
42
42
|
|
@@ -51,6 +51,15 @@ module Infoboxer
|
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
54
|
+
def process_type(type)
|
55
|
+
type.gsub(/(?:^|_)([a-z])/, &:upcase).tr('_', '').to_sym
|
56
|
+
.tap { |t| valid_type?(t) or fail(ParseError, "Unrecognized node type: #{type}") }
|
57
|
+
end
|
58
|
+
|
59
|
+
def valid_type?(t)
|
60
|
+
t == :Section || Infoboxer::Tree.const_defined?(t)
|
61
|
+
end
|
62
|
+
|
54
63
|
def unexpected(scanner, expected)
|
55
64
|
place = scanner.eos? ? 'end of pattern' : scanner.rest.inspect
|
56
65
|
fail ParseError, "Unexpected #{place}, expecting #{expected}"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: infoboxer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1.pre
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Shepelev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-09-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -117,6 +117,7 @@ files:
|
|
117
117
|
- lib/infoboxer/tree.rb
|
118
118
|
- lib/infoboxer/tree/compound.rb
|
119
119
|
- lib/infoboxer/tree/document.rb
|
120
|
+
- lib/infoboxer/tree/gallery.rb
|
120
121
|
- lib/infoboxer/tree/html.rb
|
121
122
|
- lib/infoboxer/tree/image.rb
|
122
123
|
- lib/infoboxer/tree/inline.rb
|
@@ -165,9 +166,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
165
166
|
version: 2.1.0
|
166
167
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
167
168
|
requirements:
|
168
|
-
- - "
|
169
|
+
- - ">"
|
169
170
|
- !ruby/object:Gem::Version
|
170
|
-
version:
|
171
|
+
version: 1.3.1
|
171
172
|
requirements: []
|
172
173
|
rubyforge_project:
|
173
174
|
rubygems_version: 2.6.10
|