infoboxer 0.3.0.pre → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +15 -0
- data/CHANGELOG.md +0 -11
- data/lib/infoboxer.rb +4 -4
- data/lib/infoboxer/media_wiki.rb +16 -26
- data/lib/infoboxer/media_wiki/traits.rb +20 -66
- data/lib/infoboxer/navigation/lookup.rb +1 -11
- data/lib/infoboxer/navigation/sections.rb +6 -10
- data/lib/infoboxer/navigation/selector.rb +4 -12
- data/lib/infoboxer/parser/inline.rb +2 -42
- data/lib/infoboxer/parser/paragraphs.rb +1 -1
- data/lib/infoboxer/parser/template.rb +3 -3
- data/lib/infoboxer/parser/util.rb +0 -1
- data/lib/infoboxer/tree.rb +1 -1
- data/lib/infoboxer/tree/compound.rb +1 -1
- data/lib/infoboxer/tree/image.rb +2 -2
- data/lib/infoboxer/tree/inline.rb +2 -2
- data/lib/infoboxer/tree/linkable.rb +1 -4
- data/lib/infoboxer/tree/math.rb +3 -0
- data/lib/infoboxer/tree/node.rb +2 -2
- data/lib/infoboxer/tree/nodes.rb +4 -32
- data/lib/infoboxer/tree/template.rb +2 -6
- data/lib/infoboxer/tree/text.rb +1 -1
- data/lib/infoboxer/tree/wikilink.rb +7 -14
- data/lib/infoboxer/version.rb +1 -2
- data/lib/infoboxer/wiki_path.rb +1 -10
- metadata +4 -5
- data/lib/infoboxer/tree/gallery.rb +0 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: be65bc91a5370bc24553e500754f413196caed76
|
4
|
+
data.tar.gz: d24dca5a13a64d563ddc473197a732a9f95884fd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 661c06d6703db103035f61f55ebee5f0bc8a5f9ad182fcaf2a22be9f91063a9abd80bac1793fabae436b28bed274fcb7908219ba07453f46271a25d1cba0367a
|
7
|
+
data.tar.gz: 25029633e6516c30a7de21433db1903ab923af2c74082b3a3c9322b50b170cdb104652b95d261b3033a070629ee478a8b5510a7298fbef830d1445dfe56157d0
|
data/.rubocop_todo.yml
CHANGED
@@ -1 +1,16 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2017-06-23 13:52:16 +0300 using RuboCop version 0.49.1.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 1
|
10
|
+
Metrics/AbcSize:
|
11
|
+
Max: 29
|
12
|
+
|
13
|
+
# Offense count: 1
|
14
|
+
Metrics/PerceivedComplexity:
|
15
|
+
Max: 10
|
1
16
|
|
data/CHANGELOG.md
CHANGED
@@ -1,16 +1,5 @@
|
|
1
1
|
# Infoboxer's change log
|
2
2
|
|
3
|
-
## 0.3.1.pre (2017-09-16)
|
4
|
-
|
5
|
-
* Introduce interwiki links following (and proper handling of interwikis, in general);
|
6
|
-
* Add `<gallery>` tag support;
|
7
|
-
* Introduce `Navigation::Selector#===`;
|
8
|
-
* Much more `Enumerable`'s methods supported by `Nodes`;
|
9
|
-
* Lot of small simplifications, cleanups and bugfixes.
|
10
|
-
|
11
|
-
TBH, it should be 0.4.0 or more, but it would be a shame to change versions so fast :) So, at least
|
12
|
-
until it is `-pre`, let it be 0.3.1.
|
13
|
-
|
14
3
|
## 0.3.0 (2017-07-23)
|
15
4
|
|
16
5
|
* Change logic of navigation through templates; now templates contents aren't hidden from global
|
data/lib/infoboxer.rb
CHANGED
@@ -72,8 +72,8 @@ module Infoboxer
|
|
72
72
|
end
|
73
73
|
|
74
74
|
# Includeable version of {Infoboxer.wiki}
|
75
|
-
def wiki(api_url,
|
76
|
-
wikis[api_url] ||= MediaWiki.new(api_url, options)
|
75
|
+
def wiki(api_url, options = {})
|
76
|
+
wikis[api_url] ||= MediaWiki.new(api_url, options || {})
|
77
77
|
end
|
78
78
|
|
79
79
|
class << self
|
@@ -168,7 +168,7 @@ module Infoboxer
|
|
168
168
|
end
|
169
169
|
|
170
170
|
WIKIMEDIA_PROJECTS.each do |name, domain|
|
171
|
-
define_method name do |lang = 'en',
|
171
|
+
define_method name do |lang = 'en', options = {}|
|
172
172
|
lang, options = 'en', lang if lang.is_a?(Hash)
|
173
173
|
|
174
174
|
wiki("https://#{lang}.#{domain}/w/api.php", options)
|
@@ -178,7 +178,7 @@ module Infoboxer
|
|
178
178
|
alias_method :wp, :wikipedia
|
179
179
|
|
180
180
|
WIKIMEDIA_COMMONS.each do |name, domain|
|
181
|
-
define_method name do
|
181
|
+
define_method name do |options = {}|
|
182
182
|
wiki("https://#{domain}/w/api.php", options)
|
183
183
|
end
|
184
184
|
end
|
data/lib/infoboxer/media_wiki.rb
CHANGED
@@ -47,14 +47,15 @@ module Infoboxer
|
|
47
47
|
# for it, as well as shortcuts for some well-known wikis, like
|
48
48
|
# {Infoboxer.wikipedia}.
|
49
49
|
#
|
50
|
-
# @param api_base_url
|
50
|
+
# @param api_base_url URL of `api.php` file in your MediaWiki
|
51
51
|
# installation. Typically, its `<domain>/w/api.php`, but can vary
|
52
52
|
# in different wikis.
|
53
|
-
# @param
|
54
|
-
|
53
|
+
# @param options Only one option is currently supported:
|
54
|
+
# * `:user_agent` (also aliased as `:ua`) -- custom User-Agent header.
|
55
|
+
def initialize(api_base_url, options = {})
|
55
56
|
@api_base_url = Addressable::URI.parse(api_base_url)
|
56
|
-
@client = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(
|
57
|
-
@traits = Traits.get(@api_base_url.host,
|
57
|
+
@client = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(options))
|
58
|
+
@traits = Traits.get(@api_base_url.host, namespaces: extract_namespaces)
|
58
59
|
end
|
59
60
|
|
60
61
|
# Receive "raw" data from Wikipedia (without parsing or wrapping in
|
@@ -122,9 +123,7 @@ module Infoboxer
|
|
122
123
|
# and obtain meaningful results instead of `NoMethodError` or
|
123
124
|
# `SomethingNotFound`.
|
124
125
|
#
|
125
|
-
def get(*titles, prop: []
|
126
|
-
return interwikis(interwiki).get(*titles, prop: prop) if interwiki
|
127
|
-
|
126
|
+
def get(*titles, prop: [])
|
128
127
|
pages = get_h(*titles, prop: prop).values.compact
|
129
128
|
titles.count == 1 ? pages.first : Tree::Nodes[*pages]
|
130
129
|
end
|
@@ -252,26 +251,17 @@ module Infoboxer
|
|
252
251
|
[namespace, titl].join(':')
|
253
252
|
end
|
254
253
|
|
255
|
-
def user_agent(
|
256
|
-
|
257
|
-
end
|
258
|
-
|
259
|
-
def siteinfo
|
260
|
-
@siteinfo ||= @client.query.meta(:siteinfo).prop(:namespaces, :namespacealiases, :interwikimap).response.to_h
|
254
|
+
def user_agent(options)
|
255
|
+
options[:user_agent] || options[:ua] || self.class.user_agent || UA
|
261
256
|
end
|
262
257
|
|
263
|
-
def
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
fail ArgumentError, "Interwiki #{interwiki} seems not to be a MediaWiki instance"
|
271
|
-
h[pre] = self.class.new("#{m[1]}/w/api.php") # TODO: copy useragent
|
272
|
-
}
|
273
|
-
|
274
|
-
@interwikis[prefix]
|
258
|
+
def extract_namespaces
|
259
|
+
siteinfo = @client.query.meta(:siteinfo).prop(:namespaces, :namespacealiases).response
|
260
|
+
siteinfo['namespaces'].map do |_, namespace|
|
261
|
+
aliases =
|
262
|
+
siteinfo['namespacealiases'].select { |a| a['id'] == namespace['id'] }.map { |a| a['*'] }
|
263
|
+
namespace.merge('aliases' => aliases)
|
264
|
+
end
|
275
265
|
end
|
276
266
|
end
|
277
267
|
end
|
@@ -34,8 +34,9 @@ module Infoboxer
|
|
34
34
|
end
|
35
35
|
|
36
36
|
# @private
|
37
|
-
def get(domain,
|
38
|
-
|
37
|
+
def get(domain, options = {})
|
38
|
+
cls = Traits.domains[domain]
|
39
|
+
cls ? cls.new(options) : Traits.new(options)
|
39
40
|
end
|
40
41
|
|
41
42
|
# @private
|
@@ -67,27 +68,18 @@ module Infoboxer
|
|
67
68
|
alias_method :default, :new
|
68
69
|
end
|
69
70
|
|
70
|
-
def initialize(
|
71
|
-
@
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
def interwiki?(prefix)
|
79
|
-
known_interwikis.key?(prefix)
|
80
|
-
end
|
81
|
-
|
82
|
-
# @private
|
83
|
-
def file_namespace
|
84
|
-
@file_namespace ||= ns_aliases('File')
|
71
|
+
def initialize(options = {})
|
72
|
+
@options = options
|
73
|
+
@file_namespace =
|
74
|
+
[DEFAULTS[:file_namespace], namespace_aliases(options, 'File')]
|
75
|
+
.flatten.compact.uniq
|
76
|
+
@category_namespace =
|
77
|
+
[DEFAULTS[:category_namespace], namespace_aliases(options, 'Category')]
|
78
|
+
.flatten.compact.uniq
|
85
79
|
end
|
86
80
|
|
87
81
|
# @private
|
88
|
-
|
89
|
-
@category_namespace ||= ns_aliases('Category')
|
90
|
-
end
|
82
|
+
attr_reader :file_namespace, :category_namespace
|
91
83
|
|
92
84
|
# @private
|
93
85
|
def templates
|
@@ -96,54 +88,16 @@ module Infoboxer
|
|
96
88
|
|
97
89
|
private
|
98
90
|
|
99
|
-
def
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
else
|
104
|
-
(@site_info['namespaces'].values + @site_info['namespacealiases']).map { |n| n['*'] }
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
def known_interwikis
|
109
|
-
@known_interwikis ||=
|
110
|
-
if @site_info.empty?
|
111
|
-
{}
|
112
|
-
else
|
113
|
-
@site_info['interwikimap'].map { |iw| [iw['prefix'], iw] }.to_h
|
114
|
-
end
|
115
|
-
end
|
116
|
-
|
117
|
-
def ns_aliases(base)
|
118
|
-
return [base] if @site_info.empty?
|
119
|
-
main = @site_info['namespaces'].values.detect { |n| n['canonical'] == base }
|
120
|
-
[base, main['*']] +
|
121
|
-
@site_info['namespacealiases']
|
122
|
-
.select { |a| a['id'] == main['id'] }.flat_map { |n| n['*'] }
|
123
|
-
.compact.uniq
|
91
|
+
def namespace_aliases(options, canonical)
|
92
|
+
namespace = (options[:namespaces] || []).detect { |v| v['canonical'] == canonical }
|
93
|
+
return nil unless namespace
|
94
|
+
[namespace['*'], *namespace['aliases']]
|
124
95
|
end
|
125
96
|
|
126
|
-
|
127
|
-
|
128
|
-
'
|
129
|
-
|
130
|
-
'', # (Main)
|
131
|
-
'Talk', # Article discussion.
|
132
|
-
'User', #
|
133
|
-
'User talk', #
|
134
|
-
'Project', # Meta-discussions related to the operation and development of the wiki.
|
135
|
-
'Project talk', #
|
136
|
-
'File', # Metadata for images, videos, sound files and other media.
|
137
|
-
'File talk', #
|
138
|
-
'MediaWiki', # System messages and other important content.
|
139
|
-
'MediaWiki talk', #
|
140
|
-
'Template', # Templates: blocks of text or wikicode that are intended to be transcluded.
|
141
|
-
'Template talk', #
|
142
|
-
'Help', # Help files, instructions and "how-to" guides.
|
143
|
-
'Help talk', #
|
144
|
-
'Category', # Categories: dynamic lists of other pages.
|
145
|
-
'Category talk', #
|
146
|
-
].freeze
|
97
|
+
DEFAULTS = {
|
98
|
+
file_namespace: 'File',
|
99
|
+
category_namespace: 'Category'
|
100
|
+
}.freeze
|
147
101
|
end
|
148
102
|
end
|
149
103
|
end
|
@@ -98,13 +98,9 @@ module Infoboxer
|
|
98
98
|
# Selects matching nodes from current node's siblings, which
|
99
99
|
# are above current node in parents children list.
|
100
100
|
|
101
|
-
# @!method lookup_prev_sibling(*selectors, &block)
|
102
|
-
# Selects first matching nodes from current node's siblings, which
|
103
|
-
# are above current node in parents children list.
|
104
|
-
|
105
101
|
# Underscored version of {#matches?}
|
106
102
|
def _matches?(selector)
|
107
|
-
selector
|
103
|
+
selector.matches?(self)
|
108
104
|
end
|
109
105
|
|
110
106
|
# Underscored version of {#lookup}
|
@@ -140,11 +136,6 @@ module Infoboxer
|
|
140
136
|
prev_siblings._find(selector)
|
141
137
|
end
|
142
138
|
|
143
|
-
# Underscored version of {#lookup_prev_sibling}
|
144
|
-
def _lookup_prev_sibling(selector)
|
145
|
-
prev_siblings.reverse.detect { |n| selector === n }
|
146
|
-
end
|
147
|
-
|
148
139
|
# Underscored version of {#lookup_next_siblings}
|
149
140
|
def _lookup_next_siblings(selector)
|
150
141
|
next_siblings._find(selector)
|
@@ -155,7 +146,6 @@ module Infoboxer
|
|
155
146
|
lookup lookup_children lookup_parents
|
156
147
|
lookup_siblings
|
157
148
|
lookup_next_siblings lookup_prev_siblings
|
158
|
-
lookup_prev_sibling
|
159
149
|
]
|
160
150
|
.map { |sym| [sym, :"_#{sym}"] }
|
161
151
|
.each do |sym, underscored|
|
@@ -123,25 +123,21 @@ module Infoboxer
|
|
123
123
|
#
|
124
124
|
# @return {Tree::Nodes<Section>}
|
125
125
|
def in_sections
|
126
|
-
|
127
|
-
return @in_sections if @in_sections
|
126
|
+
main_node = parent.is_a?(Tree::Document) ? self : lookup_parents[-2]
|
128
127
|
|
129
128
|
heading =
|
130
|
-
if is_a?(Tree::Heading)
|
131
|
-
|
129
|
+
if main_node.is_a?(Tree::Heading)
|
130
|
+
main_node.lookup_prev_siblings(Tree::Heading, level: main_node.level - 1).last
|
132
131
|
else
|
133
|
-
|
132
|
+
main_node.lookup_prev_siblings(Tree::Heading).last
|
134
133
|
end
|
135
|
-
unless heading
|
136
|
-
@in_sections = Tree::Nodes[]
|
137
|
-
return @in_sections
|
138
|
-
end
|
134
|
+
return Tree::Nodes[] unless heading
|
139
135
|
|
140
136
|
body = heading.next_siblings
|
141
137
|
.take_while { |n| !n.is_a?(Tree::Heading) || n.level < heading.level }
|
142
138
|
|
143
139
|
section = Section.new(heading, body)
|
144
|
-
|
140
|
+
Tree::Nodes[section, *heading.in_sections]
|
145
141
|
end
|
146
142
|
end
|
147
143
|
|
@@ -24,8 +24,8 @@ module Infoboxer
|
|
24
24
|
"#<Selector(#{@arg.map(&:to_s).join(', ')})>"
|
25
25
|
end
|
26
26
|
|
27
|
-
def
|
28
|
-
@arg.all? { |a| arg_matches?(a,
|
27
|
+
def matches?(node)
|
28
|
+
@arg.all? { |a| arg_matches?(a, node) }
|
29
29
|
end
|
30
30
|
|
31
31
|
private
|
@@ -44,8 +44,8 @@ module Infoboxer
|
|
44
44
|
check.call(node)
|
45
45
|
when Hash
|
46
46
|
check.all? { |attr, value|
|
47
|
-
node.respond_to?(attr) &&
|
48
|
-
node.params.key?(attr) &&
|
47
|
+
node.respond_to?(attr) && value === node.send(attr) ||
|
48
|
+
node.params.key?(attr) && value === node.params[attr]
|
49
49
|
}
|
50
50
|
when Symbol
|
51
51
|
node.respond_to?(check) && node.send(check)
|
@@ -53,14 +53,6 @@ module Infoboxer
|
|
53
53
|
check === node
|
54
54
|
end
|
55
55
|
end
|
56
|
-
|
57
|
-
def value_matches?(matcher, value)
|
58
|
-
if matcher.is_a?(String) && value.is_a?(String)
|
59
|
-
matcher.casecmp(value).zero?
|
60
|
-
else
|
61
|
-
matcher === value
|
62
|
-
end
|
63
|
-
end
|
64
56
|
end
|
65
57
|
end
|
66
58
|
end
|
@@ -83,7 +83,7 @@ module Infoboxer
|
|
83
83
|
|
84
84
|
private
|
85
85
|
|
86
|
-
def inline_formatting(match)
|
86
|
+
def inline_formatting(match)
|
87
87
|
case match
|
88
88
|
when "'''''"
|
89
89
|
BoldItalic.new(short_inline(/'''''/))
|
@@ -109,8 +109,6 @@ module Infoboxer
|
|
109
109
|
reference(Regexp.last_match(1))
|
110
110
|
when /<math>/
|
111
111
|
math
|
112
|
-
when /<gallery([^>]*)>/
|
113
|
-
gallery(Regexp.last_match(1))
|
114
112
|
when '<'
|
115
113
|
html || Text.new(match) # it was not HTML, just accidental <
|
116
114
|
else
|
@@ -128,18 +126,8 @@ module Infoboxer
|
|
128
126
|
caption = inline(/\]\]/)
|
129
127
|
@context.pop_eol_sign
|
130
128
|
end
|
131
|
-
name, namespace = link.split(':', 2).reverse
|
132
|
-
lnk, params =
|
133
|
-
if @context.traits.namespace?(namespace)
|
134
|
-
[link, {namespace: namespace}]
|
135
|
-
elsif @context.traits.interwiki?(namespace)
|
136
|
-
[name, {interwiki: namespace}]
|
137
|
-
else
|
138
|
-
[link, {}]
|
139
|
-
end
|
140
129
|
|
141
|
-
|
142
|
-
Wikilink.new(lnk, caption, **params)
|
130
|
+
Wikilink.new(link, caption)
|
143
131
|
end
|
144
132
|
|
145
133
|
# http://en.wikipedia.org/wiki/Help:Link#External_links
|
@@ -171,34 +159,6 @@ module Infoboxer
|
|
171
159
|
Text.new(@context.scan_continued_until(%r{</nowiki>}))
|
172
160
|
end
|
173
161
|
end
|
174
|
-
|
175
|
-
def gallery(tag_rest)
|
176
|
-
params = parse_params(tag_rest)
|
177
|
-
images = []
|
178
|
-
guarded_loop do
|
179
|
-
@context.next! if @context.eol?
|
180
|
-
path = @context.scan_until(%r{</gallery>|\||$})
|
181
|
-
attrs = @context.matched == '|' ? gallery_image_attrs : {}
|
182
|
-
unless path.empty?
|
183
|
-
images << Tree::Image.new(path.sub(/^#{re.file_namespace}/, ''), attrs)
|
184
|
-
end
|
185
|
-
break if @context.matched == '</gallery>'
|
186
|
-
end
|
187
|
-
Gallery.new(images, params)
|
188
|
-
end
|
189
|
-
|
190
|
-
def gallery_image_attrs
|
191
|
-
nodes = []
|
192
|
-
|
193
|
-
guarded_loop do
|
194
|
-
nodes << short_inline(%r{\||</gallery>})
|
195
|
-
break if @context.eol? || @context.matched?(%r{</gallery>})
|
196
|
-
end
|
197
|
-
|
198
|
-
nodes.map(&method(:image_attr))
|
199
|
-
.inject(&:merge)
|
200
|
-
.reject { |_k, v| v.nil? || v.empty? }
|
201
|
-
end
|
202
162
|
end
|
203
163
|
|
204
164
|
require_relative 'image'
|
@@ -29,8 +29,8 @@ module Infoboxer
|
|
29
29
|
|
30
30
|
guarded_loop do
|
31
31
|
@context.next! while @context.eol?
|
32
|
-
if @context.check(/\s*([
|
33
|
-
name = @context.scan(/\s*([
|
32
|
+
if @context.check(/\s*([^ =}|<]+)\s*=\s*/)
|
33
|
+
name = @context.scan(/\s*([^ =]+)/).strip
|
34
34
|
@context.skip(/\s*=\s*/)
|
35
35
|
else
|
36
36
|
name = num
|
@@ -52,7 +52,7 @@ module Infoboxer
|
|
52
52
|
end
|
53
53
|
|
54
54
|
def sanitize_value(nodes)
|
55
|
-
nodes.pop if
|
55
|
+
nodes.pop if nodes.last.is_a?(Pre) && nodes.last.text =~ /^\s*$/ # FIXME: dirty!
|
56
56
|
nodes
|
57
57
|
end
|
58
58
|
end
|
data/lib/infoboxer/tree.rb
CHANGED
@@ -63,7 +63,7 @@ module Infoboxer
|
|
63
63
|
require_relative 'tree/nodes'
|
64
64
|
|
65
65
|
%w[text compound inline
|
66
|
-
image
|
66
|
+
image html paragraphs list template table ref math
|
67
67
|
document].each do |type|
|
68
68
|
require_relative "tree/#{type}"
|
69
69
|
end
|
@@ -4,7 +4,7 @@ module Infoboxer
|
|
4
4
|
module Tree
|
5
5
|
# Base class for all nodes with children.
|
6
6
|
class Compound < Node
|
7
|
-
def initialize(children = Nodes.new,
|
7
|
+
def initialize(children = Nodes.new, params = {})
|
8
8
|
super(params)
|
9
9
|
@children = Nodes[*children]
|
10
10
|
@children.each { |c| c.parent = self }
|
data/lib/infoboxer/tree/image.rb
CHANGED
@@ -7,8 +7,8 @@ module Infoboxer
|
|
7
7
|
# See [Wikipedia Tutorial](https://en.wikipedia.org/wiki/Wikipedia:Extended_image_syntax)
|
8
8
|
# for explanation of attributes.
|
9
9
|
class Image < Node
|
10
|
-
def initialize(path,
|
11
|
-
@caption = caption
|
10
|
+
def initialize(path, params = {})
|
11
|
+
@caption = params.delete(:caption)
|
12
12
|
super({path: path}.merge(params))
|
13
13
|
end
|
14
14
|
|
@@ -17,8 +17,8 @@ module Infoboxer
|
|
17
17
|
|
18
18
|
# Base class for internal/external links,
|
19
19
|
class Link < Compound
|
20
|
-
def initialize(link, label = nil
|
21
|
-
super(label || Nodes.new([Text.new(link)]), link: link
|
20
|
+
def initialize(link, label = nil)
|
21
|
+
super(label || Nodes.new([Text.new(link)]), link: link)
|
22
22
|
end
|
23
23
|
|
24
24
|
# @!attribute [r] link
|
@@ -15,7 +15,7 @@ module Infoboxer
|
|
15
15
|
# * {Tree::Nodes#follow} for extracting multiple links at once;
|
16
16
|
# * {MediaWiki#get} for basic information on page extraction.
|
17
17
|
def follow
|
18
|
-
client.get(link
|
18
|
+
client.get(link)
|
19
19
|
end
|
20
20
|
|
21
21
|
# Human-readable page URL
|
@@ -28,9 +28,6 @@ module Infoboxer
|
|
28
28
|
|
29
29
|
protected
|
30
30
|
|
31
|
-
# redefined in {Wikilink}
|
32
|
-
def interwiki; end
|
33
|
-
|
34
31
|
def page
|
35
32
|
lookup_parents(MediaWiki::Page).first or fail('Not in a page from real source')
|
36
33
|
end
|
data/lib/infoboxer/tree/math.rb
CHANGED
data/lib/infoboxer/tree/node.rb
CHANGED
@@ -11,7 +11,7 @@ module Infoboxer
|
|
11
11
|
# you will receive it from tree and use for navigations.
|
12
12
|
#
|
13
13
|
class Node
|
14
|
-
def initialize(
|
14
|
+
def initialize(params = {})
|
15
15
|
@params = params
|
16
16
|
end
|
17
17
|
|
@@ -154,7 +154,7 @@ module Infoboxer
|
|
154
154
|
end
|
155
155
|
|
156
156
|
def show_params(prms = nil)
|
157
|
-
(prms || params).
|
157
|
+
(prms || params).map { |k, v| "#{k}: #{v.inspect}" }.join(', ')
|
158
158
|
end
|
159
159
|
|
160
160
|
def indent(level)
|
data/lib/infoboxer/tree/nodes.rb
CHANGED
@@ -38,19 +38,10 @@ module Infoboxer
|
|
38
38
|
# @!method compact
|
39
39
|
# Just like Array#compact, but returns Nodes
|
40
40
|
|
41
|
-
# @!method grep(pattern)
|
42
|
-
# Just like Array#grep, but returns Nodes
|
43
|
-
|
44
|
-
# @!method grep_v(pattern)
|
45
|
-
# Just like Array#grep_v, but returns Nodes
|
46
|
-
|
47
41
|
# @!method -(other)
|
48
42
|
# Just like Array#-, but returns Nodes
|
49
43
|
|
50
|
-
|
51
|
-
# Just like Array#+, but returns Nodes
|
52
|
-
|
53
|
-
%i[select reject sort_by flatten compact grep grep_v - +].each do |sym|
|
44
|
+
%i[select reject sort_by flatten compact -].each do |sym|
|
54
45
|
define_method(sym) do |*args, &block|
|
55
46
|
Nodes[*super(*args, &block)]
|
56
47
|
end
|
@@ -84,21 +75,6 @@ module Infoboxer
|
|
84
75
|
end
|
85
76
|
end
|
86
77
|
|
87
|
-
# Just like Array#flat_map, but returns Nodes, **if** all map results are Node
|
88
|
-
def flat_map
|
89
|
-
res = super
|
90
|
-
if res.all? { |n| n.is_a?(Node) || n.is_a?(Nodes) }
|
91
|
-
Nodes[*res]
|
92
|
-
else
|
93
|
-
res
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
# Just like Array#group, but returns hash with `{<grouping variable> => Nodes}`
|
98
|
-
def group_by
|
99
|
-
super.map { |title, group| [title, Nodes[*group]] }.to_h
|
100
|
-
end
|
101
|
-
|
102
78
|
# @!method prev_siblings
|
103
79
|
# Previous siblings (flat list) of all nodes inside.
|
104
80
|
|
@@ -163,14 +139,12 @@ module Infoboxer
|
|
163
139
|
# @return [Nodes<MediaWiki::Page>] It is still `Nodes`, so you
|
164
140
|
# still can process them uniformely.
|
165
141
|
def follow
|
166
|
-
links =
|
142
|
+
links = select { |n| n.respond_to?(:link) }.map(&:link)
|
167
143
|
return Nodes[] if links.empty?
|
168
144
|
page = first.lookup_parents(MediaWiki::Page).first or
|
169
145
|
fail('Not in a page from real source')
|
170
146
|
page.client or fail('MediaWiki client not set')
|
171
|
-
|
172
|
-
.flat_map { |iw, ls| page.client.get(*ls.map(&:link), interwiki: iw) }
|
173
|
-
pages.count == 1 ? pages.first : Nodes[*pages]
|
147
|
+
page.client.get(*links)
|
174
148
|
end
|
175
149
|
|
176
150
|
# @private
|
@@ -199,9 +173,7 @@ module Infoboxer
|
|
199
173
|
# @private
|
200
174
|
# Internal, used by {Parser}
|
201
175
|
def flow_templates
|
202
|
-
|
203
|
-
# make_nodes(map { |n| n.is_a?(Paragraph) ? n.to_templates? : n })
|
204
|
-
self
|
176
|
+
make_nodes(map { |n| n.is_a?(Paragraph) ? n.to_templates? : n })
|
205
177
|
end
|
206
178
|
|
207
179
|
private
|
@@ -22,10 +22,6 @@ module Infoboxer
|
|
22
22
|
false
|
23
23
|
end
|
24
24
|
|
25
|
-
def named?
|
26
|
-
name !~ /^\d+$/
|
27
|
-
end
|
28
|
-
|
29
25
|
protected
|
30
26
|
|
31
27
|
def descr
|
@@ -143,7 +139,7 @@ module Infoboxer
|
|
143
139
|
#
|
144
140
|
# @return [Nodes<Var>]
|
145
141
|
def unnamed_variables
|
146
|
-
variables.
|
142
|
+
variables.find(name: /^\d+$/)
|
147
143
|
end
|
148
144
|
|
149
145
|
# Fetches template variable(s) by name(s) or patterns.
|
@@ -246,7 +242,7 @@ module Infoboxer
|
|
246
242
|
def extract_params(vars)
|
247
243
|
vars
|
248
244
|
.select { |v| v.children.count == 1 && v.children.first.is_a?(Text) }
|
249
|
-
.map { |v| [v.name
|
245
|
+
.map { |v| [v.name, v.children.first.raw_text] }.to_h
|
250
246
|
end
|
251
247
|
|
252
248
|
def inspect_variables(depth)
|
data/lib/infoboxer/tree/text.rb
CHANGED
@@ -12,23 +12,14 @@ module Infoboxer
|
|
12
12
|
# Note, that Wikilink is {Linkable}, so you can {Linkable#follow #follow}
|
13
13
|
# it to obtain linked pages.
|
14
14
|
class Wikilink < Link
|
15
|
-
def initialize(
|
16
|
-
super
|
17
|
-
|
18
|
-
@interwiki = interwiki
|
19
|
-
parse_name!
|
15
|
+
def initialize(*)
|
16
|
+
super
|
17
|
+
parse_link!
|
20
18
|
end
|
21
19
|
|
22
20
|
# "Clean" wikilink name, for ex., `Cities` for `[Category:Cities]`
|
23
21
|
attr_reader :name
|
24
22
|
|
25
|
-
# Interwiki identifier. For example, `[[wikt:Argentina]]`
|
26
|
-
# will have `"Argentina"` as its {#name} and `"wikt"` (wiktionary) as an
|
27
|
-
# interwiki. TODO: how to use it.
|
28
|
-
#
|
29
|
-
# See [Wikipedia docs](https://en.wikipedia.org/wiki/Help:Interwiki_linking) for details.
|
30
|
-
attr_reader :interwiki
|
31
|
-
|
32
23
|
# Wikilink namespace, `Category` for `[Category:Cities]`, empty
|
33
24
|
# string (not `nil`!) for just `[Cities]`
|
34
25
|
attr_reader :namespace
|
@@ -55,8 +46,10 @@ module Infoboxer
|
|
55
46
|
|
56
47
|
private
|
57
48
|
|
58
|
-
def
|
59
|
-
@name
|
49
|
+
def parse_link!
|
50
|
+
@name, @namespace = link.split(':', 2).reverse
|
51
|
+
@namespace ||= ''
|
52
|
+
|
60
53
|
@name, @anchor = @name.split('#', 2)
|
61
54
|
@anchor ||= ''
|
62
55
|
|
data/lib/infoboxer/version.rb
CHANGED
data/lib/infoboxer/wiki_path.rb
CHANGED
@@ -36,7 +36,7 @@ module Infoboxer
|
|
36
36
|
attrs[attr.to_sym] = process_value(value)
|
37
37
|
end
|
38
38
|
res = op == '//' ? {op: :lookup} : {}
|
39
|
-
res[:type] =
|
39
|
+
res[:type] = type.gsub(/(?:^|_)([a-z])/, &:upcase).tr('_', '').to_sym unless type.empty?
|
40
40
|
res.merge(attrs) # TODO: raise if empty selector
|
41
41
|
end
|
42
42
|
|
@@ -51,15 +51,6 @@ module Infoboxer
|
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
54
|
-
def process_type(type)
|
55
|
-
type.gsub(/(?:^|_)([a-z])/, &:upcase).tr('_', '').to_sym
|
56
|
-
.tap { |t| valid_type?(t) or fail(ParseError, "Unrecognized node type: #{type}") }
|
57
|
-
end
|
58
|
-
|
59
|
-
def valid_type?(t)
|
60
|
-
t == :Section || Infoboxer::Tree.const_defined?(t)
|
61
|
-
end
|
62
|
-
|
63
54
|
def unexpected(scanner, expected)
|
64
55
|
place = scanner.eos? ? 'end of pattern' : scanner.rest.inspect
|
65
56
|
fail ParseError, "Unexpected #{place}, expecting #{expected}"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: infoboxer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.0
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Shepelev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-07-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -117,7 +117,6 @@ files:
|
|
117
117
|
- lib/infoboxer/tree.rb
|
118
118
|
- lib/infoboxer/tree/compound.rb
|
119
119
|
- lib/infoboxer/tree/document.rb
|
120
|
-
- lib/infoboxer/tree/gallery.rb
|
121
120
|
- lib/infoboxer/tree/html.rb
|
122
121
|
- lib/infoboxer/tree/image.rb
|
123
122
|
- lib/infoboxer/tree/inline.rb
|
@@ -166,9 +165,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
166
165
|
version: 2.1.0
|
167
166
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
168
167
|
requirements:
|
169
|
-
- - "
|
168
|
+
- - ">="
|
170
169
|
- !ruby/object:Gem::Version
|
171
|
-
version:
|
170
|
+
version: '0'
|
172
171
|
requirements: []
|
173
172
|
rubyforge_project:
|
174
173
|
rubygems_version: 2.6.10
|
@@ -1,12 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module Infoboxer
|
4
|
-
module Tree
|
5
|
-
# Represents gallery of images (contents of `<gallery>` special tag).
|
6
|
-
#
|
7
|
-
# See [Wikipedia Tutorial](https://en.wikipedia.org/wiki/Help:Gallery_tag)
|
8
|
-
# for explanation of attributes.
|
9
|
-
class Gallery < Compound
|
10
|
-
end
|
11
|
-
end
|
12
|
-
end
|