infoboxer 0.3.0 → 0.3.1.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +0 -15
- data/CHANGELOG.md +11 -0
- data/lib/infoboxer.rb +4 -4
- data/lib/infoboxer/media_wiki.rb +26 -16
- data/lib/infoboxer/media_wiki/traits.rb +66 -20
- data/lib/infoboxer/navigation/lookup.rb +11 -1
- data/lib/infoboxer/navigation/sections.rb +10 -6
- data/lib/infoboxer/navigation/selector.rb +12 -4
- data/lib/infoboxer/parser/inline.rb +42 -2
- data/lib/infoboxer/parser/paragraphs.rb +1 -1
- data/lib/infoboxer/parser/template.rb +3 -3
- data/lib/infoboxer/parser/util.rb +1 -0
- data/lib/infoboxer/tree.rb +1 -1
- data/lib/infoboxer/tree/compound.rb +1 -1
- data/lib/infoboxer/tree/gallery.rb +12 -0
- data/lib/infoboxer/tree/image.rb +2 -2
- data/lib/infoboxer/tree/inline.rb +2 -2
- data/lib/infoboxer/tree/linkable.rb +4 -1
- data/lib/infoboxer/tree/math.rb +0 -3
- data/lib/infoboxer/tree/node.rb +2 -2
- data/lib/infoboxer/tree/nodes.rb +32 -4
- data/lib/infoboxer/tree/template.rb +6 -2
- data/lib/infoboxer/tree/text.rb +1 -1
- data/lib/infoboxer/tree/wikilink.rb +14 -7
- data/lib/infoboxer/version.rb +3 -2
- data/lib/infoboxer/wiki_path.rb +10 -1
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0ebc38c153d481aca588625caf2bcb576046afa9
|
4
|
+
data.tar.gz: 4f0ccc3b3130403a7a1e35adfc3977d954ac5a66
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4593f04e93b2714f13f9abb68cb42cec84ad258d786c815a99ca95d150ba55d597106d1e9e71d3802177cce4a4ad5bf1de0b1f5fa4cc733af19d673fbc600945
|
7
|
+
data.tar.gz: 1dc93e4fc257a4cee3fd2c9eb263b24f1a9d8b755a3765bfdf71d1617f7ba9a4ee7d9323502dbee23e48dc85bb6899facb7eadcc62ccc386d958dfca6f28fe5e
|
data/.rubocop_todo.yml
CHANGED
@@ -1,16 +1 @@
|
|
1
|
-
# This configuration was generated by
|
2
|
-
# `rubocop --auto-gen-config`
|
3
|
-
# on 2017-06-23 13:52:16 +0300 using RuboCop version 0.49.1.
|
4
|
-
# The point is for the user to remove these configuration records
|
5
|
-
# one by one as the offenses are removed from the code base.
|
6
|
-
# Note that changes in the inspected code, or installation of new
|
7
|
-
# versions of RuboCop, may require this file to be generated again.
|
8
|
-
|
9
|
-
# Offense count: 1
|
10
|
-
Metrics/AbcSize:
|
11
|
-
Max: 29
|
12
|
-
|
13
|
-
# Offense count: 1
|
14
|
-
Metrics/PerceivedComplexity:
|
15
|
-
Max: 10
|
16
1
|
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,16 @@
|
|
1
1
|
# Infoboxer's change log
|
2
2
|
|
3
|
+
## 0.3.1.pre (2017-09-16)
|
4
|
+
|
5
|
+
* Introduce interwiki links following (and proper handling of interwikis, in general);
|
6
|
+
* Add `<gallery>` tag support;
|
7
|
+
* Introduce `Navigation::Selector#===`;
|
8
|
+
* Much more `Enumerable`'s methods supported by `Nodes`;
|
9
|
+
* Lot of small simplifications, cleanups and bugfixes.
|
10
|
+
|
11
|
+
TBH, it should be 0.4.0 or more, but it would be a shame to change versions so fast :) So, at least
|
12
|
+
until it is `-pre`, let it be 0.3.1.
|
13
|
+
|
3
14
|
## 0.3.0 (2017-07-23)
|
4
15
|
|
5
16
|
* Change logic of navigation through templates; now templates contents aren't hidden from global
|
data/lib/infoboxer.rb
CHANGED
@@ -72,8 +72,8 @@ module Infoboxer
|
|
72
72
|
end
|
73
73
|
|
74
74
|
# Includeable version of {Infoboxer.wiki}
|
75
|
-
def wiki(api_url, options
|
76
|
-
wikis[api_url] ||= MediaWiki.new(api_url, options
|
75
|
+
def wiki(api_url, **options)
|
76
|
+
wikis[api_url] ||= MediaWiki.new(api_url, options)
|
77
77
|
end
|
78
78
|
|
79
79
|
class << self
|
@@ -168,7 +168,7 @@ module Infoboxer
|
|
168
168
|
end
|
169
169
|
|
170
170
|
WIKIMEDIA_PROJECTS.each do |name, domain|
|
171
|
-
define_method name do |lang = 'en', options
|
171
|
+
define_method name do |lang = 'en', **options|
|
172
172
|
lang, options = 'en', lang if lang.is_a?(Hash)
|
173
173
|
|
174
174
|
wiki("https://#{lang}.#{domain}/w/api.php", options)
|
@@ -178,7 +178,7 @@ module Infoboxer
|
|
178
178
|
alias_method :wp, :wikipedia
|
179
179
|
|
180
180
|
WIKIMEDIA_COMMONS.each do |name, domain|
|
181
|
-
define_method name do
|
181
|
+
define_method name do |**options|
|
182
182
|
wiki("https://#{domain}/w/api.php", options)
|
183
183
|
end
|
184
184
|
end
|
data/lib/infoboxer/media_wiki.rb
CHANGED
@@ -47,15 +47,14 @@ module Infoboxer
|
|
47
47
|
# for it, as well as shortcuts for some well-known wikis, like
|
48
48
|
# {Infoboxer.wikipedia}.
|
49
49
|
#
|
50
|
-
# @param api_base_url URL of `api.php` file in your MediaWiki
|
50
|
+
# @param api_base_url [String] URL of `api.php` file in your MediaWiki
|
51
51
|
# installation. Typically, its `<domain>/w/api.php`, but can vary
|
52
52
|
# in different wikis.
|
53
|
-
# @param
|
54
|
-
|
55
|
-
def initialize(api_base_url, options = {})
|
53
|
+
# @param user_agent [String] (also aliased as `:ua`) Custom User-Agent header.
|
54
|
+
def initialize(api_base_url, ua: nil, user_agent: ua)
|
56
55
|
@api_base_url = Addressable::URI.parse(api_base_url)
|
57
|
-
@client = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(
|
58
|
-
@traits = Traits.get(@api_base_url.host,
|
56
|
+
@client = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(user_agent))
|
57
|
+
@traits = Traits.get(@api_base_url.host, siteinfo)
|
59
58
|
end
|
60
59
|
|
61
60
|
# Receive "raw" data from Wikipedia (without parsing or wrapping in
|
@@ -123,7 +122,9 @@ module Infoboxer
|
|
123
122
|
# and obtain meaningful results instead of `NoMethodError` or
|
124
123
|
# `SomethingNotFound`.
|
125
124
|
#
|
126
|
-
def get(*titles, prop: [])
|
125
|
+
def get(*titles, prop: [], interwiki: nil)
|
126
|
+
return interwikis(interwiki).get(*titles, prop: prop) if interwiki
|
127
|
+
|
127
128
|
pages = get_h(*titles, prop: prop).values.compact
|
128
129
|
titles.count == 1 ? pages.first : Tree::Nodes[*pages]
|
129
130
|
end
|
@@ -251,17 +252,26 @@ module Infoboxer
|
|
251
252
|
[namespace, titl].join(':')
|
252
253
|
end
|
253
254
|
|
254
|
-
def user_agent(
|
255
|
-
|
255
|
+
def user_agent(custom)
|
256
|
+
custom || self.class.user_agent || UA
|
257
|
+
end
|
258
|
+
|
259
|
+
def siteinfo
|
260
|
+
@siteinfo ||= @client.query.meta(:siteinfo).prop(:namespaces, :namespacealiases, :interwikimap).response.to_h
|
256
261
|
end
|
257
262
|
|
258
|
-
def
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
263
|
+
def interwikis(prefix)
|
264
|
+
@interwikis ||= Hash.new { |h, pre|
|
265
|
+
interwiki = siteinfo['interwikimap'].detect { |iw| iw['prefix'] == prefix } or
|
266
|
+
fail ArgumentError, "Undefined interwiki: #{prefix}"
|
267
|
+
|
268
|
+
# FIXME: fragile, but what can we do?..
|
269
|
+
m = interwiki['url'].match(%r{^(.+)/wiki/\$1$}) or
|
270
|
+
fail ArgumentError, "Interwiki #{interwiki} seems not to be a MediaWiki instance"
|
271
|
+
h[pre] = self.class.new("#{m[1]}/w/api.php") # TODO: copy useragent
|
272
|
+
}
|
273
|
+
|
274
|
+
@interwikis[prefix]
|
265
275
|
end
|
266
276
|
end
|
267
277
|
end
|
@@ -34,9 +34,8 @@ module Infoboxer
|
|
34
34
|
end
|
35
35
|
|
36
36
|
# @private
|
37
|
-
def get(domain,
|
38
|
-
|
39
|
-
cls ? cls.new(options) : Traits.new(options)
|
37
|
+
def get(domain, site_info = {})
|
38
|
+
(Traits.domains[domain] || Traits).new(site_info)
|
40
39
|
end
|
41
40
|
|
42
41
|
# @private
|
@@ -68,18 +67,27 @@ module Infoboxer
|
|
68
67
|
alias_method :default, :new
|
69
68
|
end
|
70
69
|
|
71
|
-
def initialize(
|
72
|
-
@
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
70
|
+
def initialize(site_info = {})
|
71
|
+
@site_info = site_info
|
72
|
+
end
|
73
|
+
|
74
|
+
def namespace?(prefix)
|
75
|
+
known_namespaces.include?(prefix)
|
76
|
+
end
|
77
|
+
|
78
|
+
def interwiki?(prefix)
|
79
|
+
known_interwikis.key?(prefix)
|
80
|
+
end
|
81
|
+
|
82
|
+
# @private
|
83
|
+
def file_namespace
|
84
|
+
@file_namespace ||= ns_aliases('File')
|
79
85
|
end
|
80
86
|
|
81
87
|
# @private
|
82
|
-
|
88
|
+
def category_namespace
|
89
|
+
@category_namespace ||= ns_aliases('Category')
|
90
|
+
end
|
83
91
|
|
84
92
|
# @private
|
85
93
|
def templates
|
@@ -88,16 +96,54 @@ module Infoboxer
|
|
88
96
|
|
89
97
|
private
|
90
98
|
|
91
|
-
def
|
92
|
-
|
93
|
-
|
94
|
-
|
99
|
+
def known_namespaces
|
100
|
+
@known_namespaces ||=
|
101
|
+
if @site_info.empty?
|
102
|
+
STANDARD_NAMESPACES
|
103
|
+
else
|
104
|
+
(@site_info['namespaces'].values + @site_info['namespacealiases']).map { |n| n['*'] }
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def known_interwikis
|
109
|
+
@known_interwikis ||=
|
110
|
+
if @site_info.empty?
|
111
|
+
{}
|
112
|
+
else
|
113
|
+
@site_info['interwikimap'].map { |iw| [iw['prefix'], iw] }.to_h
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def ns_aliases(base)
|
118
|
+
return [base] if @site_info.empty?
|
119
|
+
main = @site_info['namespaces'].values.detect { |n| n['canonical'] == base }
|
120
|
+
[base, main['*']] +
|
121
|
+
@site_info['namespacealiases']
|
122
|
+
.select { |a| a['id'] == main['id'] }.flat_map { |n| n['*'] }
|
123
|
+
.compact.uniq
|
95
124
|
end
|
96
125
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
126
|
+
# See https://www.mediawiki.org/wiki/Help:Namespaces#Standard_namespaces
|
127
|
+
STANDARD_NAMESPACES = [
|
128
|
+
'Media', # Direct linking to media files.
|
129
|
+
'Special', # Special (non-editable) pages.
|
130
|
+
'', # (Main)
|
131
|
+
'Talk', # Article discussion.
|
132
|
+
'User', #
|
133
|
+
'User talk', #
|
134
|
+
'Project', # Meta-discussions related to the operation and development of the wiki.
|
135
|
+
'Project talk', #
|
136
|
+
'File', # Metadata for images, videos, sound files and other media.
|
137
|
+
'File talk', #
|
138
|
+
'MediaWiki', # System messages and other important content.
|
139
|
+
'MediaWiki talk', #
|
140
|
+
'Template', # Templates: blocks of text or wikicode that are intended to be transcluded.
|
141
|
+
'Template talk', #
|
142
|
+
'Help', # Help files, instructions and "how-to" guides.
|
143
|
+
'Help talk', #
|
144
|
+
'Category', # Categories: dynamic lists of other pages.
|
145
|
+
'Category talk', #
|
146
|
+
].freeze
|
101
147
|
end
|
102
148
|
end
|
103
149
|
end
|
@@ -98,9 +98,13 @@ module Infoboxer
|
|
98
98
|
# Selects matching nodes from current node's siblings, which
|
99
99
|
# are above current node in parents children list.
|
100
100
|
|
101
|
+
# @!method lookup_prev_sibling(*selectors, &block)
|
102
|
+
# Selects first matching nodes from current node's siblings, which
|
103
|
+
# are above current node in parents children list.
|
104
|
+
|
101
105
|
# Underscored version of {#matches?}
|
102
106
|
def _matches?(selector)
|
103
|
-
selector
|
107
|
+
selector === self
|
104
108
|
end
|
105
109
|
|
106
110
|
# Underscored version of {#lookup}
|
@@ -136,6 +140,11 @@ module Infoboxer
|
|
136
140
|
prev_siblings._find(selector)
|
137
141
|
end
|
138
142
|
|
143
|
+
# Underscored version of {#lookup_prev_sibling}
|
144
|
+
def _lookup_prev_sibling(selector)
|
145
|
+
prev_siblings.reverse.detect { |n| selector === n }
|
146
|
+
end
|
147
|
+
|
139
148
|
# Underscored version of {#lookup_next_siblings}
|
140
149
|
def _lookup_next_siblings(selector)
|
141
150
|
next_siblings._find(selector)
|
@@ -146,6 +155,7 @@ module Infoboxer
|
|
146
155
|
lookup lookup_children lookup_parents
|
147
156
|
lookup_siblings
|
148
157
|
lookup_next_siblings lookup_prev_siblings
|
158
|
+
lookup_prev_sibling
|
149
159
|
]
|
150
160
|
.map { |sym| [sym, :"_#{sym}"] }
|
151
161
|
.each do |sym, underscored|
|
@@ -123,21 +123,25 @@ module Infoboxer
|
|
123
123
|
#
|
124
124
|
# @return {Tree::Nodes<Section>}
|
125
125
|
def in_sections
|
126
|
-
|
126
|
+
return parent.in_sections unless parent.is_a?(Tree::Document)
|
127
|
+
return @in_sections if @in_sections
|
127
128
|
|
128
129
|
heading =
|
129
|
-
if
|
130
|
-
|
130
|
+
if is_a?(Tree::Heading)
|
131
|
+
lookup_prev_sibling(Tree::Heading, level: level - 1)
|
131
132
|
else
|
132
|
-
|
133
|
+
lookup_prev_sibling(Tree::Heading)
|
133
134
|
end
|
134
|
-
|
135
|
+
unless heading
|
136
|
+
@in_sections = Tree::Nodes[]
|
137
|
+
return @in_sections
|
138
|
+
end
|
135
139
|
|
136
140
|
body = heading.next_siblings
|
137
141
|
.take_while { |n| !n.is_a?(Tree::Heading) || n.level < heading.level }
|
138
142
|
|
139
143
|
section = Section.new(heading, body)
|
140
|
-
Tree::Nodes[section, *heading.in_sections]
|
144
|
+
@in_sections = Tree::Nodes[section, *heading.in_sections]
|
141
145
|
end
|
142
146
|
end
|
143
147
|
|
@@ -24,8 +24,8 @@ module Infoboxer
|
|
24
24
|
"#<Selector(#{@arg.map(&:to_s).join(', ')})>"
|
25
25
|
end
|
26
26
|
|
27
|
-
def
|
28
|
-
@arg.all? { |a| arg_matches?(a,
|
27
|
+
def ===(other)
|
28
|
+
@arg.all? { |a| arg_matches?(a, other) }
|
29
29
|
end
|
30
30
|
|
31
31
|
private
|
@@ -44,8 +44,8 @@ module Infoboxer
|
|
44
44
|
check.call(node)
|
45
45
|
when Hash
|
46
46
|
check.all? { |attr, value|
|
47
|
-
node.respond_to?(attr) && value
|
48
|
-
node.params.key?(attr) && value
|
47
|
+
node.respond_to?(attr) && value_matches?(value, node.send(attr)) ||
|
48
|
+
node.params.key?(attr) && value_matches?(value, node.params[attr])
|
49
49
|
}
|
50
50
|
when Symbol
|
51
51
|
node.respond_to?(check) && node.send(check)
|
@@ -53,6 +53,14 @@ module Infoboxer
|
|
53
53
|
check === node
|
54
54
|
end
|
55
55
|
end
|
56
|
+
|
57
|
+
def value_matches?(matcher, value)
|
58
|
+
if matcher.is_a?(String) && value.is_a?(String)
|
59
|
+
matcher.casecmp(value).zero?
|
60
|
+
else
|
61
|
+
matcher === value
|
62
|
+
end
|
63
|
+
end
|
56
64
|
end
|
57
65
|
end
|
58
66
|
end
|
@@ -83,7 +83,7 @@ module Infoboxer
|
|
83
83
|
|
84
84
|
private
|
85
85
|
|
86
|
-
def inline_formatting(match)
|
86
|
+
def inline_formatting(match) # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/AbcSize
|
87
87
|
case match
|
88
88
|
when "'''''"
|
89
89
|
BoldItalic.new(short_inline(/'''''/))
|
@@ -109,6 +109,8 @@ module Infoboxer
|
|
109
109
|
reference(Regexp.last_match(1))
|
110
110
|
when /<math>/
|
111
111
|
math
|
112
|
+
when /<gallery([^>]*)>/
|
113
|
+
gallery(Regexp.last_match(1))
|
112
114
|
when '<'
|
113
115
|
html || Text.new(match) # it was not HTML, just accidental <
|
114
116
|
else
|
@@ -126,8 +128,18 @@ module Infoboxer
|
|
126
128
|
caption = inline(/\]\]/)
|
127
129
|
@context.pop_eol_sign
|
128
130
|
end
|
131
|
+
name, namespace = link.split(':', 2).reverse
|
132
|
+
lnk, params =
|
133
|
+
if @context.traits.namespace?(namespace)
|
134
|
+
[link, {namespace: namespace}]
|
135
|
+
elsif @context.traits.interwiki?(namespace)
|
136
|
+
[name, {interwiki: namespace}]
|
137
|
+
else
|
138
|
+
[link, {}]
|
139
|
+
end
|
129
140
|
|
130
|
-
|
141
|
+
puts @context.rest if lnk.nil?
|
142
|
+
Wikilink.new(lnk, caption, **params)
|
131
143
|
end
|
132
144
|
|
133
145
|
# http://en.wikipedia.org/wiki/Help:Link#External_links
|
@@ -159,6 +171,34 @@ module Infoboxer
|
|
159
171
|
Text.new(@context.scan_continued_until(%r{</nowiki>}))
|
160
172
|
end
|
161
173
|
end
|
174
|
+
|
175
|
+
def gallery(tag_rest)
|
176
|
+
params = parse_params(tag_rest)
|
177
|
+
images = []
|
178
|
+
guarded_loop do
|
179
|
+
@context.next! if @context.eol?
|
180
|
+
path = @context.scan_until(%r{</gallery>|\||$})
|
181
|
+
attrs = @context.matched == '|' ? gallery_image_attrs : {}
|
182
|
+
unless path.empty?
|
183
|
+
images << Tree::Image.new(path.sub(/^#{re.file_namespace}/, ''), attrs)
|
184
|
+
end
|
185
|
+
break if @context.matched == '</gallery>'
|
186
|
+
end
|
187
|
+
Gallery.new(images, params)
|
188
|
+
end
|
189
|
+
|
190
|
+
def gallery_image_attrs
|
191
|
+
nodes = []
|
192
|
+
|
193
|
+
guarded_loop do
|
194
|
+
nodes << short_inline(%r{\||</gallery>})
|
195
|
+
break if @context.eol? || @context.matched?(%r{</gallery>})
|
196
|
+
end
|
197
|
+
|
198
|
+
nodes.map(&method(:image_attr))
|
199
|
+
.inject(&:merge)
|
200
|
+
.reject { |_k, v| v.nil? || v.empty? }
|
201
|
+
end
|
162
202
|
end
|
163
203
|
|
164
204
|
require_relative 'image'
|
@@ -29,8 +29,8 @@ module Infoboxer
|
|
29
29
|
|
30
30
|
guarded_loop do
|
31
31
|
@context.next! while @context.eol?
|
32
|
-
if @context.check(/\s*([
|
33
|
-
name = @context.scan(/\s*([
|
32
|
+
if @context.check(/\s*([^=}|<]+)\s*=\s*/)
|
33
|
+
name = @context.scan(/\s*([^=]+)/).strip
|
34
34
|
@context.skip(/\s*=\s*/)
|
35
35
|
else
|
36
36
|
name = num
|
@@ -52,7 +52,7 @@ module Infoboxer
|
|
52
52
|
end
|
53
53
|
|
54
54
|
def sanitize_value(nodes)
|
55
|
-
nodes.pop if nodes.last.is_a?(Pre) && nodes.last.text =~ /^\s*$/ # FIXME: dirty!
|
55
|
+
nodes.pop if (nodes.last.is_a?(Pre) || nodes.last.is_a?(Text)) && nodes.last.text =~ /^\s*$/ # FIXME: dirty!
|
56
56
|
nodes
|
57
57
|
end
|
58
58
|
end
|
data/lib/infoboxer/tree.rb
CHANGED
@@ -63,7 +63,7 @@ module Infoboxer
|
|
63
63
|
require_relative 'tree/nodes'
|
64
64
|
|
65
65
|
%w[text compound inline
|
66
|
-
image html paragraphs list template table ref math
|
66
|
+
image gallery html paragraphs list template table ref math
|
67
67
|
document].each do |type|
|
68
68
|
require_relative "tree/#{type}"
|
69
69
|
end
|
@@ -4,7 +4,7 @@ module Infoboxer
|
|
4
4
|
module Tree
|
5
5
|
# Base class for all nodes with children.
|
6
6
|
class Compound < Node
|
7
|
-
def initialize(children = Nodes.new, params
|
7
|
+
def initialize(children = Nodes.new, **params)
|
8
8
|
super(params)
|
9
9
|
@children = Nodes[*children]
|
10
10
|
@children.each { |c| c.parent = self }
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Infoboxer
|
4
|
+
module Tree
|
5
|
+
# Represents gallery of images (contents of `<gallery>` special tag).
|
6
|
+
#
|
7
|
+
# See [Wikipedia Tutorial](https://en.wikipedia.org/wiki/Help:Gallery_tag)
|
8
|
+
# for explanation of attributes.
|
9
|
+
class Gallery < Compound
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
data/lib/infoboxer/tree/image.rb
CHANGED
@@ -7,8 +7,8 @@ module Infoboxer
|
|
7
7
|
# See [Wikipedia Tutorial](https://en.wikipedia.org/wiki/Wikipedia:Extended_image_syntax)
|
8
8
|
# for explanation of attributes.
|
9
9
|
class Image < Node
|
10
|
-
def initialize(path,
|
11
|
-
@caption =
|
10
|
+
def initialize(path, caption: nil, **params)
|
11
|
+
@caption = caption
|
12
12
|
super({path: path}.merge(params))
|
13
13
|
end
|
14
14
|
|
@@ -17,8 +17,8 @@ module Infoboxer
|
|
17
17
|
|
18
18
|
# Base class for internal/external links,
|
19
19
|
class Link < Compound
|
20
|
-
def initialize(link, label = nil)
|
21
|
-
super(label || Nodes.new([Text.new(link)]), link: link)
|
20
|
+
def initialize(link, label = nil, **attr)
|
21
|
+
super(label || Nodes.new([Text.new(link)]), link: link, **attr)
|
22
22
|
end
|
23
23
|
|
24
24
|
# @!attribute [r] link
|
@@ -15,7 +15,7 @@ module Infoboxer
|
|
15
15
|
# * {Tree::Nodes#follow} for extracting multiple links at once;
|
16
16
|
# * {MediaWiki#get} for basic information on page extraction.
|
17
17
|
def follow
|
18
|
-
client.get(link)
|
18
|
+
client.get(link, interwiki: interwiki)
|
19
19
|
end
|
20
20
|
|
21
21
|
# Human-readable page URL
|
@@ -28,6 +28,9 @@ module Infoboxer
|
|
28
28
|
|
29
29
|
protected
|
30
30
|
|
31
|
+
# redefined in {Wikilink}
|
32
|
+
def interwiki; end
|
33
|
+
|
31
34
|
def page
|
32
35
|
lookup_parents(MediaWiki::Page).first or fail('Not in a page from real source')
|
33
36
|
end
|
data/lib/infoboxer/tree/math.rb
CHANGED
data/lib/infoboxer/tree/node.rb
CHANGED
@@ -11,7 +11,7 @@ module Infoboxer
|
|
11
11
|
# you will receive it from tree and use for navigations.
|
12
12
|
#
|
13
13
|
class Node
|
14
|
-
def initialize(params
|
14
|
+
def initialize(**params)
|
15
15
|
@params = params
|
16
16
|
end
|
17
17
|
|
@@ -154,7 +154,7 @@ module Infoboxer
|
|
154
154
|
end
|
155
155
|
|
156
156
|
def show_params(prms = nil)
|
157
|
-
(prms || params).map { |k, v| "#{k}: #{v.inspect}" }.join(', ')
|
157
|
+
(prms || params).reject { |_, v| v.nil? }.map { |k, v| "#{k}: #{v.inspect}" }.join(', ')
|
158
158
|
end
|
159
159
|
|
160
160
|
def indent(level)
|
data/lib/infoboxer/tree/nodes.rb
CHANGED
@@ -38,10 +38,19 @@ module Infoboxer
|
|
38
38
|
# @!method compact
|
39
39
|
# Just like Array#compact, but returns Nodes
|
40
40
|
|
41
|
+
# @!method grep(pattern)
|
42
|
+
# Just like Array#grep, but returns Nodes
|
43
|
+
|
44
|
+
# @!method grep_v(pattern)
|
45
|
+
# Just like Array#grep_v, but returns Nodes
|
46
|
+
|
41
47
|
# @!method -(other)
|
42
48
|
# Just like Array#-, but returns Nodes
|
43
49
|
|
44
|
-
|
50
|
+
# @!method +(other)
|
51
|
+
# Just like Array#+, but returns Nodes
|
52
|
+
|
53
|
+
%i[select reject sort_by flatten compact grep grep_v - +].each do |sym|
|
45
54
|
define_method(sym) do |*args, &block|
|
46
55
|
Nodes[*super(*args, &block)]
|
47
56
|
end
|
@@ -75,6 +84,21 @@ module Infoboxer
|
|
75
84
|
end
|
76
85
|
end
|
77
86
|
|
87
|
+
# Just like Array#flat_map, but returns Nodes, **if** all map results are Node
|
88
|
+
def flat_map
|
89
|
+
res = super
|
90
|
+
if res.all? { |n| n.is_a?(Node) || n.is_a?(Nodes) }
|
91
|
+
Nodes[*res]
|
92
|
+
else
|
93
|
+
res
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# Just like Array#group, but returns hash with `{<grouping variable> => Nodes}`
|
98
|
+
def group_by
|
99
|
+
super.map { |title, group| [title, Nodes[*group]] }.to_h
|
100
|
+
end
|
101
|
+
|
78
102
|
# @!method prev_siblings
|
79
103
|
# Previous siblings (flat list) of all nodes inside.
|
80
104
|
|
@@ -139,12 +163,14 @@ module Infoboxer
|
|
139
163
|
# @return [Nodes<MediaWiki::Page>] It is still `Nodes`, so you
|
140
164
|
# still can process them uniformely.
|
141
165
|
def follow
|
142
|
-
links =
|
166
|
+
links = grep(Linkable)
|
143
167
|
return Nodes[] if links.empty?
|
144
168
|
page = first.lookup_parents(MediaWiki::Page).first or
|
145
169
|
fail('Not in a page from real source')
|
146
170
|
page.client or fail('MediaWiki client not set')
|
147
|
-
|
171
|
+
pages = links.group_by(&:interwiki)
|
172
|
+
.flat_map { |iw, ls| page.client.get(*ls.map(&:link), interwiki: iw) }
|
173
|
+
pages.count == 1 ? pages.first : Nodes[*pages]
|
148
174
|
end
|
149
175
|
|
150
176
|
# @private
|
@@ -173,7 +199,9 @@ module Infoboxer
|
|
173
199
|
# @private
|
174
200
|
# Internal, used by {Parser}
|
175
201
|
def flow_templates
|
176
|
-
|
202
|
+
# TODO: will it be better?..
|
203
|
+
# make_nodes(map { |n| n.is_a?(Paragraph) ? n.to_templates? : n })
|
204
|
+
self
|
177
205
|
end
|
178
206
|
|
179
207
|
private
|
@@ -22,6 +22,10 @@ module Infoboxer
|
|
22
22
|
false
|
23
23
|
end
|
24
24
|
|
25
|
+
def named?
|
26
|
+
name !~ /^\d+$/
|
27
|
+
end
|
28
|
+
|
25
29
|
protected
|
26
30
|
|
27
31
|
def descr
|
@@ -139,7 +143,7 @@ module Infoboxer
|
|
139
143
|
#
|
140
144
|
# @return [Nodes<Var>]
|
141
145
|
def unnamed_variables
|
142
|
-
variables.
|
146
|
+
variables.reject(&:named?)
|
143
147
|
end
|
144
148
|
|
145
149
|
# Fetches template variable(s) by name(s) or patterns.
|
@@ -242,7 +246,7 @@ module Infoboxer
|
|
242
246
|
def extract_params(vars)
|
243
247
|
vars
|
244
248
|
.select { |v| v.children.count == 1 && v.children.first.is_a?(Text) }
|
245
|
-
.map { |v| [v.name, v.children.first.raw_text] }.to_h
|
249
|
+
.map { |v| [v.name.to_sym, v.children.first.raw_text] }.to_h
|
246
250
|
end
|
247
251
|
|
248
252
|
def inspect_variables(depth)
|
data/lib/infoboxer/tree/text.rb
CHANGED
@@ -12,14 +12,23 @@ module Infoboxer
|
|
12
12
|
# Note, that Wikilink is {Linkable}, so you can {Linkable#follow #follow}
|
13
13
|
# it to obtain linked pages.
|
14
14
|
class Wikilink < Link
|
15
|
-
def initialize(
|
16
|
-
super
|
17
|
-
|
15
|
+
def initialize(link, label = nil, namespace: nil, interwiki: nil)
|
16
|
+
super(link, label, namespace: namespace, interwiki: interwiki)
|
17
|
+
@namespace = namespace || ''
|
18
|
+
@interwiki = interwiki
|
19
|
+
parse_name!
|
18
20
|
end
|
19
21
|
|
20
22
|
# "Clean" wikilink name, for ex., `Cities` for `[Category:Cities]`
|
21
23
|
attr_reader :name
|
22
24
|
|
25
|
+
# Interwiki identifier. For example, `[[wikt:Argentina]]`
|
26
|
+
# will have `"Argentina"` as its {#name} and `"wikt"` (wiktionary) as an
|
27
|
+
# interwiki. TODO: how to use it.
|
28
|
+
#
|
29
|
+
# See [Wikipedia docs](https://en.wikipedia.org/wiki/Help:Interwiki_linking) for details.
|
30
|
+
attr_reader :interwiki
|
31
|
+
|
23
32
|
# Wikilink namespace, `Category` for `[Category:Cities]`, empty
|
24
33
|
# string (not `nil`!) for just `[Cities]`
|
25
34
|
attr_reader :namespace
|
@@ -46,10 +55,8 @@ module Infoboxer
|
|
46
55
|
|
47
56
|
private
|
48
57
|
|
49
|
-
def
|
50
|
-
@name
|
51
|
-
@namespace ||= ''
|
52
|
-
|
58
|
+
def parse_name!
|
59
|
+
@name = namespace.empty? ? link : link.sub(/^#{namespace}:/, '')
|
53
60
|
@name, @anchor = @name.split('#', 2)
|
54
61
|
@anchor ||= ''
|
55
62
|
|
data/lib/infoboxer/version.rb
CHANGED
data/lib/infoboxer/wiki_path.rb
CHANGED
@@ -36,7 +36,7 @@ module Infoboxer
|
|
36
36
|
attrs[attr.to_sym] = process_value(value)
|
37
37
|
end
|
38
38
|
res = op == '//' ? {op: :lookup} : {}
|
39
|
-
res[:type] = type
|
39
|
+
res[:type] = process_type(type) unless type.empty?
|
40
40
|
res.merge(attrs) # TODO: raise if empty selector
|
41
41
|
end
|
42
42
|
|
@@ -51,6 +51,15 @@ module Infoboxer
|
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
54
|
+
def process_type(type)
|
55
|
+
type.gsub(/(?:^|_)([a-z])/, &:upcase).tr('_', '').to_sym
|
56
|
+
.tap { |t| valid_type?(t) or fail(ParseError, "Unrecognized node type: #{type}") }
|
57
|
+
end
|
58
|
+
|
59
|
+
def valid_type?(t)
|
60
|
+
t == :Section || Infoboxer::Tree.const_defined?(t)
|
61
|
+
end
|
62
|
+
|
54
63
|
def unexpected(scanner, expected)
|
55
64
|
place = scanner.eos? ? 'end of pattern' : scanner.rest.inspect
|
56
65
|
fail ParseError, "Unexpected #{place}, expecting #{expected}"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: infoboxer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1.pre
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Shepelev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-09-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -117,6 +117,7 @@ files:
|
|
117
117
|
- lib/infoboxer/tree.rb
|
118
118
|
- lib/infoboxer/tree/compound.rb
|
119
119
|
- lib/infoboxer/tree/document.rb
|
120
|
+
- lib/infoboxer/tree/gallery.rb
|
120
121
|
- lib/infoboxer/tree/html.rb
|
121
122
|
- lib/infoboxer/tree/image.rb
|
122
123
|
- lib/infoboxer/tree/inline.rb
|
@@ -165,9 +166,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
165
166
|
version: 2.1.0
|
166
167
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
167
168
|
requirements:
|
168
|
-
- - "
|
169
|
+
- - ">"
|
169
170
|
- !ruby/object:Gem::Version
|
170
|
-
version:
|
171
|
+
version: 1.3.1
|
171
172
|
requirements: []
|
172
173
|
rubyforge_project:
|
173
174
|
rubygems_version: 2.6.10
|