infoboxer 0.3.0.pre → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +15 -0
- data/CHANGELOG.md +0 -11
- data/lib/infoboxer.rb +4 -4
- data/lib/infoboxer/media_wiki.rb +16 -26
- data/lib/infoboxer/media_wiki/traits.rb +20 -66
- data/lib/infoboxer/navigation/lookup.rb +1 -11
- data/lib/infoboxer/navigation/sections.rb +6 -10
- data/lib/infoboxer/navigation/selector.rb +4 -12
- data/lib/infoboxer/parser/inline.rb +2 -42
- data/lib/infoboxer/parser/paragraphs.rb +1 -1
- data/lib/infoboxer/parser/template.rb +3 -3
- data/lib/infoboxer/parser/util.rb +0 -1
- data/lib/infoboxer/tree.rb +1 -1
- data/lib/infoboxer/tree/compound.rb +1 -1
- data/lib/infoboxer/tree/image.rb +2 -2
- data/lib/infoboxer/tree/inline.rb +2 -2
- data/lib/infoboxer/tree/linkable.rb +1 -4
- data/lib/infoboxer/tree/math.rb +3 -0
- data/lib/infoboxer/tree/node.rb +2 -2
- data/lib/infoboxer/tree/nodes.rb +4 -32
- data/lib/infoboxer/tree/template.rb +2 -6
- data/lib/infoboxer/tree/text.rb +1 -1
- data/lib/infoboxer/tree/wikilink.rb +7 -14
- data/lib/infoboxer/version.rb +1 -2
- data/lib/infoboxer/wiki_path.rb +1 -10
- metadata +4 -5
- data/lib/infoboxer/tree/gallery.rb +0 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: be65bc91a5370bc24553e500754f413196caed76
|
4
|
+
data.tar.gz: d24dca5a13a64d563ddc473197a732a9f95884fd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 661c06d6703db103035f61f55ebee5f0bc8a5f9ad182fcaf2a22be9f91063a9abd80bac1793fabae436b28bed274fcb7908219ba07453f46271a25d1cba0367a
|
7
|
+
data.tar.gz: 25029633e6516c30a7de21433db1903ab923af2c74082b3a3c9322b50b170cdb104652b95d261b3033a070629ee478a8b5510a7298fbef830d1445dfe56157d0
|
data/.rubocop_todo.yml
CHANGED
@@ -1 +1,16 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2017-06-23 13:52:16 +0300 using RuboCop version 0.49.1.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 1
|
10
|
+
Metrics/AbcSize:
|
11
|
+
Max: 29
|
12
|
+
|
13
|
+
# Offense count: 1
|
14
|
+
Metrics/PerceivedComplexity:
|
15
|
+
Max: 10
|
1
16
|
|
data/CHANGELOG.md
CHANGED
@@ -1,16 +1,5 @@
|
|
1
1
|
# Infoboxer's change log
|
2
2
|
|
3
|
-
## 0.3.1.pre (2017-09-16)
|
4
|
-
|
5
|
-
* Introduce interwiki links following (and proper handling of interwikis, in general);
|
6
|
-
* Add `<gallery>` tag support;
|
7
|
-
* Introduce `Navigation::Selector#===`;
|
8
|
-
* Much more `Enumerable`'s methods supported by `Nodes`;
|
9
|
-
* Lot of small simplifications, cleanups and bugfixes.
|
10
|
-
|
11
|
-
TBH, it should be 0.4.0 or more, but it would be a shame to change versions so fast :) So, at least
|
12
|
-
until it is `-pre`, let it be 0.3.1.
|
13
|
-
|
14
3
|
## 0.3.0 (2017-07-23)
|
15
4
|
|
16
5
|
* Change logic of navigation through templates; now templates contents aren't hidden from global
|
data/lib/infoboxer.rb
CHANGED
@@ -72,8 +72,8 @@ module Infoboxer
|
|
72
72
|
end
|
73
73
|
|
74
74
|
# Includeable version of {Infoboxer.wiki}
|
75
|
-
def wiki(api_url,
|
76
|
-
wikis[api_url] ||= MediaWiki.new(api_url, options)
|
75
|
+
def wiki(api_url, options = {})
|
76
|
+
wikis[api_url] ||= MediaWiki.new(api_url, options || {})
|
77
77
|
end
|
78
78
|
|
79
79
|
class << self
|
@@ -168,7 +168,7 @@ module Infoboxer
|
|
168
168
|
end
|
169
169
|
|
170
170
|
WIKIMEDIA_PROJECTS.each do |name, domain|
|
171
|
-
define_method name do |lang = 'en',
|
171
|
+
define_method name do |lang = 'en', options = {}|
|
172
172
|
lang, options = 'en', lang if lang.is_a?(Hash)
|
173
173
|
|
174
174
|
wiki("https://#{lang}.#{domain}/w/api.php", options)
|
@@ -178,7 +178,7 @@ module Infoboxer
|
|
178
178
|
alias_method :wp, :wikipedia
|
179
179
|
|
180
180
|
WIKIMEDIA_COMMONS.each do |name, domain|
|
181
|
-
define_method name do
|
181
|
+
define_method name do |options = {}|
|
182
182
|
wiki("https://#{domain}/w/api.php", options)
|
183
183
|
end
|
184
184
|
end
|
data/lib/infoboxer/media_wiki.rb
CHANGED
@@ -47,14 +47,15 @@ module Infoboxer
|
|
47
47
|
# for it, as well as shortcuts for some well-known wikis, like
|
48
48
|
# {Infoboxer.wikipedia}.
|
49
49
|
#
|
50
|
-
# @param api_base_url
|
50
|
+
# @param api_base_url URL of `api.php` file in your MediaWiki
|
51
51
|
# installation. Typically, its `<domain>/w/api.php`, but can vary
|
52
52
|
# in different wikis.
|
53
|
-
# @param
|
54
|
-
|
53
|
+
# @param options Only one option is currently supported:
|
54
|
+
# * `:user_agent` (also aliased as `:ua`) -- custom User-Agent header.
|
55
|
+
def initialize(api_base_url, options = {})
|
55
56
|
@api_base_url = Addressable::URI.parse(api_base_url)
|
56
|
-
@client = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(
|
57
|
-
@traits = Traits.get(@api_base_url.host,
|
57
|
+
@client = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(options))
|
58
|
+
@traits = Traits.get(@api_base_url.host, namespaces: extract_namespaces)
|
58
59
|
end
|
59
60
|
|
60
61
|
# Receive "raw" data from Wikipedia (without parsing or wrapping in
|
@@ -122,9 +123,7 @@ module Infoboxer
|
|
122
123
|
# and obtain meaningful results instead of `NoMethodError` or
|
123
124
|
# `SomethingNotFound`.
|
124
125
|
#
|
125
|
-
def get(*titles, prop: []
|
126
|
-
return interwikis(interwiki).get(*titles, prop: prop) if interwiki
|
127
|
-
|
126
|
+
def get(*titles, prop: [])
|
128
127
|
pages = get_h(*titles, prop: prop).values.compact
|
129
128
|
titles.count == 1 ? pages.first : Tree::Nodes[*pages]
|
130
129
|
end
|
@@ -252,26 +251,17 @@ module Infoboxer
|
|
252
251
|
[namespace, titl].join(':')
|
253
252
|
end
|
254
253
|
|
255
|
-
def user_agent(
|
256
|
-
|
257
|
-
end
|
258
|
-
|
259
|
-
def siteinfo
|
260
|
-
@siteinfo ||= @client.query.meta(:siteinfo).prop(:namespaces, :namespacealiases, :interwikimap).response.to_h
|
254
|
+
def user_agent(options)
|
255
|
+
options[:user_agent] || options[:ua] || self.class.user_agent || UA
|
261
256
|
end
|
262
257
|
|
263
|
-
def
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
fail ArgumentError, "Interwiki #{interwiki} seems not to be a MediaWiki instance"
|
271
|
-
h[pre] = self.class.new("#{m[1]}/w/api.php") # TODO: copy useragent
|
272
|
-
}
|
273
|
-
|
274
|
-
@interwikis[prefix]
|
258
|
+
def extract_namespaces
|
259
|
+
siteinfo = @client.query.meta(:siteinfo).prop(:namespaces, :namespacealiases).response
|
260
|
+
siteinfo['namespaces'].map do |_, namespace|
|
261
|
+
aliases =
|
262
|
+
siteinfo['namespacealiases'].select { |a| a['id'] == namespace['id'] }.map { |a| a['*'] }
|
263
|
+
namespace.merge('aliases' => aliases)
|
264
|
+
end
|
275
265
|
end
|
276
266
|
end
|
277
267
|
end
|
@@ -34,8 +34,9 @@ module Infoboxer
|
|
34
34
|
end
|
35
35
|
|
36
36
|
# @private
|
37
|
-
def get(domain,
|
38
|
-
|
37
|
+
def get(domain, options = {})
|
38
|
+
cls = Traits.domains[domain]
|
39
|
+
cls ? cls.new(options) : Traits.new(options)
|
39
40
|
end
|
40
41
|
|
41
42
|
# @private
|
@@ -67,27 +68,18 @@ module Infoboxer
|
|
67
68
|
alias_method :default, :new
|
68
69
|
end
|
69
70
|
|
70
|
-
def initialize(
|
71
|
-
@
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
def interwiki?(prefix)
|
79
|
-
known_interwikis.key?(prefix)
|
80
|
-
end
|
81
|
-
|
82
|
-
# @private
|
83
|
-
def file_namespace
|
84
|
-
@file_namespace ||= ns_aliases('File')
|
71
|
+
def initialize(options = {})
|
72
|
+
@options = options
|
73
|
+
@file_namespace =
|
74
|
+
[DEFAULTS[:file_namespace], namespace_aliases(options, 'File')]
|
75
|
+
.flatten.compact.uniq
|
76
|
+
@category_namespace =
|
77
|
+
[DEFAULTS[:category_namespace], namespace_aliases(options, 'Category')]
|
78
|
+
.flatten.compact.uniq
|
85
79
|
end
|
86
80
|
|
87
81
|
# @private
|
88
|
-
|
89
|
-
@category_namespace ||= ns_aliases('Category')
|
90
|
-
end
|
82
|
+
attr_reader :file_namespace, :category_namespace
|
91
83
|
|
92
84
|
# @private
|
93
85
|
def templates
|
@@ -96,54 +88,16 @@ module Infoboxer
|
|
96
88
|
|
97
89
|
private
|
98
90
|
|
99
|
-
def
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
else
|
104
|
-
(@site_info['namespaces'].values + @site_info['namespacealiases']).map { |n| n['*'] }
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
def known_interwikis
|
109
|
-
@known_interwikis ||=
|
110
|
-
if @site_info.empty?
|
111
|
-
{}
|
112
|
-
else
|
113
|
-
@site_info['interwikimap'].map { |iw| [iw['prefix'], iw] }.to_h
|
114
|
-
end
|
115
|
-
end
|
116
|
-
|
117
|
-
def ns_aliases(base)
|
118
|
-
return [base] if @site_info.empty?
|
119
|
-
main = @site_info['namespaces'].values.detect { |n| n['canonical'] == base }
|
120
|
-
[base, main['*']] +
|
121
|
-
@site_info['namespacealiases']
|
122
|
-
.select { |a| a['id'] == main['id'] }.flat_map { |n| n['*'] }
|
123
|
-
.compact.uniq
|
91
|
+
def namespace_aliases(options, canonical)
|
92
|
+
namespace = (options[:namespaces] || []).detect { |v| v['canonical'] == canonical }
|
93
|
+
return nil unless namespace
|
94
|
+
[namespace['*'], *namespace['aliases']]
|
124
95
|
end
|
125
96
|
|
126
|
-
|
127
|
-
|
128
|
-
'
|
129
|
-
|
130
|
-
'', # (Main)
|
131
|
-
'Talk', # Article discussion.
|
132
|
-
'User', #
|
133
|
-
'User talk', #
|
134
|
-
'Project', # Meta-discussions related to the operation and development of the wiki.
|
135
|
-
'Project talk', #
|
136
|
-
'File', # Metadata for images, videos, sound files and other media.
|
137
|
-
'File talk', #
|
138
|
-
'MediaWiki', # System messages and other important content.
|
139
|
-
'MediaWiki talk', #
|
140
|
-
'Template', # Templates: blocks of text or wikicode that are intended to be transcluded.
|
141
|
-
'Template talk', #
|
142
|
-
'Help', # Help files, instructions and "how-to" guides.
|
143
|
-
'Help talk', #
|
144
|
-
'Category', # Categories: dynamic lists of other pages.
|
145
|
-
'Category talk', #
|
146
|
-
].freeze
|
97
|
+
DEFAULTS = {
|
98
|
+
file_namespace: 'File',
|
99
|
+
category_namespace: 'Category'
|
100
|
+
}.freeze
|
147
101
|
end
|
148
102
|
end
|
149
103
|
end
|
@@ -98,13 +98,9 @@ module Infoboxer
|
|
98
98
|
# Selects matching nodes from current node's siblings, which
|
99
99
|
# are above current node in parents children list.
|
100
100
|
|
101
|
-
# @!method lookup_prev_sibling(*selectors, &block)
|
102
|
-
# Selects first matching nodes from current node's siblings, which
|
103
|
-
# are above current node in parents children list.
|
104
|
-
|
105
101
|
# Underscored version of {#matches?}
|
106
102
|
def _matches?(selector)
|
107
|
-
selector
|
103
|
+
selector.matches?(self)
|
108
104
|
end
|
109
105
|
|
110
106
|
# Underscored version of {#lookup}
|
@@ -140,11 +136,6 @@ module Infoboxer
|
|
140
136
|
prev_siblings._find(selector)
|
141
137
|
end
|
142
138
|
|
143
|
-
# Underscored version of {#lookup_prev_sibling}
|
144
|
-
def _lookup_prev_sibling(selector)
|
145
|
-
prev_siblings.reverse.detect { |n| selector === n }
|
146
|
-
end
|
147
|
-
|
148
139
|
# Underscored version of {#lookup_next_siblings}
|
149
140
|
def _lookup_next_siblings(selector)
|
150
141
|
next_siblings._find(selector)
|
@@ -155,7 +146,6 @@ module Infoboxer
|
|
155
146
|
lookup lookup_children lookup_parents
|
156
147
|
lookup_siblings
|
157
148
|
lookup_next_siblings lookup_prev_siblings
|
158
|
-
lookup_prev_sibling
|
159
149
|
]
|
160
150
|
.map { |sym| [sym, :"_#{sym}"] }
|
161
151
|
.each do |sym, underscored|
|
@@ -123,25 +123,21 @@ module Infoboxer
|
|
123
123
|
#
|
124
124
|
# @return {Tree::Nodes<Section>}
|
125
125
|
def in_sections
|
126
|
-
|
127
|
-
return @in_sections if @in_sections
|
126
|
+
main_node = parent.is_a?(Tree::Document) ? self : lookup_parents[-2]
|
128
127
|
|
129
128
|
heading =
|
130
|
-
if is_a?(Tree::Heading)
|
131
|
-
|
129
|
+
if main_node.is_a?(Tree::Heading)
|
130
|
+
main_node.lookup_prev_siblings(Tree::Heading, level: main_node.level - 1).last
|
132
131
|
else
|
133
|
-
|
132
|
+
main_node.lookup_prev_siblings(Tree::Heading).last
|
134
133
|
end
|
135
|
-
unless heading
|
136
|
-
@in_sections = Tree::Nodes[]
|
137
|
-
return @in_sections
|
138
|
-
end
|
134
|
+
return Tree::Nodes[] unless heading
|
139
135
|
|
140
136
|
body = heading.next_siblings
|
141
137
|
.take_while { |n| !n.is_a?(Tree::Heading) || n.level < heading.level }
|
142
138
|
|
143
139
|
section = Section.new(heading, body)
|
144
|
-
|
140
|
+
Tree::Nodes[section, *heading.in_sections]
|
145
141
|
end
|
146
142
|
end
|
147
143
|
|
@@ -24,8 +24,8 @@ module Infoboxer
|
|
24
24
|
"#<Selector(#{@arg.map(&:to_s).join(', ')})>"
|
25
25
|
end
|
26
26
|
|
27
|
-
def
|
28
|
-
@arg.all? { |a| arg_matches?(a,
|
27
|
+
def matches?(node)
|
28
|
+
@arg.all? { |a| arg_matches?(a, node) }
|
29
29
|
end
|
30
30
|
|
31
31
|
private
|
@@ -44,8 +44,8 @@ module Infoboxer
|
|
44
44
|
check.call(node)
|
45
45
|
when Hash
|
46
46
|
check.all? { |attr, value|
|
47
|
-
node.respond_to?(attr) &&
|
48
|
-
node.params.key?(attr) &&
|
47
|
+
node.respond_to?(attr) && value === node.send(attr) ||
|
48
|
+
node.params.key?(attr) && value === node.params[attr]
|
49
49
|
}
|
50
50
|
when Symbol
|
51
51
|
node.respond_to?(check) && node.send(check)
|
@@ -53,14 +53,6 @@ module Infoboxer
|
|
53
53
|
check === node
|
54
54
|
end
|
55
55
|
end
|
56
|
-
|
57
|
-
def value_matches?(matcher, value)
|
58
|
-
if matcher.is_a?(String) && value.is_a?(String)
|
59
|
-
matcher.casecmp(value).zero?
|
60
|
-
else
|
61
|
-
matcher === value
|
62
|
-
end
|
63
|
-
end
|
64
56
|
end
|
65
57
|
end
|
66
58
|
end
|
@@ -83,7 +83,7 @@ module Infoboxer
|
|
83
83
|
|
84
84
|
private
|
85
85
|
|
86
|
-
def inline_formatting(match)
|
86
|
+
def inline_formatting(match)
|
87
87
|
case match
|
88
88
|
when "'''''"
|
89
89
|
BoldItalic.new(short_inline(/'''''/))
|
@@ -109,8 +109,6 @@ module Infoboxer
|
|
109
109
|
reference(Regexp.last_match(1))
|
110
110
|
when /<math>/
|
111
111
|
math
|
112
|
-
when /<gallery([^>]*)>/
|
113
|
-
gallery(Regexp.last_match(1))
|
114
112
|
when '<'
|
115
113
|
html || Text.new(match) # it was not HTML, just accidental <
|
116
114
|
else
|
@@ -128,18 +126,8 @@ module Infoboxer
|
|
128
126
|
caption = inline(/\]\]/)
|
129
127
|
@context.pop_eol_sign
|
130
128
|
end
|
131
|
-
name, namespace = link.split(':', 2).reverse
|
132
|
-
lnk, params =
|
133
|
-
if @context.traits.namespace?(namespace)
|
134
|
-
[link, {namespace: namespace}]
|
135
|
-
elsif @context.traits.interwiki?(namespace)
|
136
|
-
[name, {interwiki: namespace}]
|
137
|
-
else
|
138
|
-
[link, {}]
|
139
|
-
end
|
140
129
|
|
141
|
-
|
142
|
-
Wikilink.new(lnk, caption, **params)
|
130
|
+
Wikilink.new(link, caption)
|
143
131
|
end
|
144
132
|
|
145
133
|
# http://en.wikipedia.org/wiki/Help:Link#External_links
|
@@ -171,34 +159,6 @@ module Infoboxer
|
|
171
159
|
Text.new(@context.scan_continued_until(%r{</nowiki>}))
|
172
160
|
end
|
173
161
|
end
|
174
|
-
|
175
|
-
def gallery(tag_rest)
|
176
|
-
params = parse_params(tag_rest)
|
177
|
-
images = []
|
178
|
-
guarded_loop do
|
179
|
-
@context.next! if @context.eol?
|
180
|
-
path = @context.scan_until(%r{</gallery>|\||$})
|
181
|
-
attrs = @context.matched == '|' ? gallery_image_attrs : {}
|
182
|
-
unless path.empty?
|
183
|
-
images << Tree::Image.new(path.sub(/^#{re.file_namespace}/, ''), attrs)
|
184
|
-
end
|
185
|
-
break if @context.matched == '</gallery>'
|
186
|
-
end
|
187
|
-
Gallery.new(images, params)
|
188
|
-
end
|
189
|
-
|
190
|
-
def gallery_image_attrs
|
191
|
-
nodes = []
|
192
|
-
|
193
|
-
guarded_loop do
|
194
|
-
nodes << short_inline(%r{\||</gallery>})
|
195
|
-
break if @context.eol? || @context.matched?(%r{</gallery>})
|
196
|
-
end
|
197
|
-
|
198
|
-
nodes.map(&method(:image_attr))
|
199
|
-
.inject(&:merge)
|
200
|
-
.reject { |_k, v| v.nil? || v.empty? }
|
201
|
-
end
|
202
162
|
end
|
203
163
|
|
204
164
|
require_relative 'image'
|
@@ -29,8 +29,8 @@ module Infoboxer
|
|
29
29
|
|
30
30
|
guarded_loop do
|
31
31
|
@context.next! while @context.eol?
|
32
|
-
if @context.check(/\s*([
|
33
|
-
name = @context.scan(/\s*([
|
32
|
+
if @context.check(/\s*([^ =}|<]+)\s*=\s*/)
|
33
|
+
name = @context.scan(/\s*([^ =]+)/).strip
|
34
34
|
@context.skip(/\s*=\s*/)
|
35
35
|
else
|
36
36
|
name = num
|
@@ -52,7 +52,7 @@ module Infoboxer
|
|
52
52
|
end
|
53
53
|
|
54
54
|
def sanitize_value(nodes)
|
55
|
-
nodes.pop if
|
55
|
+
nodes.pop if nodes.last.is_a?(Pre) && nodes.last.text =~ /^\s*$/ # FIXME: dirty!
|
56
56
|
nodes
|
57
57
|
end
|
58
58
|
end
|
data/lib/infoboxer/tree.rb
CHANGED
@@ -63,7 +63,7 @@ module Infoboxer
|
|
63
63
|
require_relative 'tree/nodes'
|
64
64
|
|
65
65
|
%w[text compound inline
|
66
|
-
image
|
66
|
+
image html paragraphs list template table ref math
|
67
67
|
document].each do |type|
|
68
68
|
require_relative "tree/#{type}"
|
69
69
|
end
|
@@ -4,7 +4,7 @@ module Infoboxer
|
|
4
4
|
module Tree
|
5
5
|
# Base class for all nodes with children.
|
6
6
|
class Compound < Node
|
7
|
-
def initialize(children = Nodes.new,
|
7
|
+
def initialize(children = Nodes.new, params = {})
|
8
8
|
super(params)
|
9
9
|
@children = Nodes[*children]
|
10
10
|
@children.each { |c| c.parent = self }
|
data/lib/infoboxer/tree/image.rb
CHANGED
@@ -7,8 +7,8 @@ module Infoboxer
|
|
7
7
|
# See [Wikipedia Tutorial](https://en.wikipedia.org/wiki/Wikipedia:Extended_image_syntax)
|
8
8
|
# for explanation of attributes.
|
9
9
|
class Image < Node
|
10
|
-
def initialize(path,
|
11
|
-
@caption = caption
|
10
|
+
def initialize(path, params = {})
|
11
|
+
@caption = params.delete(:caption)
|
12
12
|
super({path: path}.merge(params))
|
13
13
|
end
|
14
14
|
|
@@ -17,8 +17,8 @@ module Infoboxer
|
|
17
17
|
|
18
18
|
# Base class for internal/external links,
|
19
19
|
class Link < Compound
|
20
|
-
def initialize(link, label = nil
|
21
|
-
super(label || Nodes.new([Text.new(link)]), link: link
|
20
|
+
def initialize(link, label = nil)
|
21
|
+
super(label || Nodes.new([Text.new(link)]), link: link)
|
22
22
|
end
|
23
23
|
|
24
24
|
# @!attribute [r] link
|
@@ -15,7 +15,7 @@ module Infoboxer
|
|
15
15
|
# * {Tree::Nodes#follow} for extracting multiple links at once;
|
16
16
|
# * {MediaWiki#get} for basic information on page extraction.
|
17
17
|
def follow
|
18
|
-
client.get(link
|
18
|
+
client.get(link)
|
19
19
|
end
|
20
20
|
|
21
21
|
# Human-readable page URL
|
@@ -28,9 +28,6 @@ module Infoboxer
|
|
28
28
|
|
29
29
|
protected
|
30
30
|
|
31
|
-
# redefined in {Wikilink}
|
32
|
-
def interwiki; end
|
33
|
-
|
34
31
|
def page
|
35
32
|
lookup_parents(MediaWiki::Page).first or fail('Not in a page from real source')
|
36
33
|
end
|
data/lib/infoboxer/tree/math.rb
CHANGED
data/lib/infoboxer/tree/node.rb
CHANGED
@@ -11,7 +11,7 @@ module Infoboxer
|
|
11
11
|
# you will receive it from tree and use for navigations.
|
12
12
|
#
|
13
13
|
class Node
|
14
|
-
def initialize(
|
14
|
+
def initialize(params = {})
|
15
15
|
@params = params
|
16
16
|
end
|
17
17
|
|
@@ -154,7 +154,7 @@ module Infoboxer
|
|
154
154
|
end
|
155
155
|
|
156
156
|
def show_params(prms = nil)
|
157
|
-
(prms || params).
|
157
|
+
(prms || params).map { |k, v| "#{k}: #{v.inspect}" }.join(', ')
|
158
158
|
end
|
159
159
|
|
160
160
|
def indent(level)
|
data/lib/infoboxer/tree/nodes.rb
CHANGED
@@ -38,19 +38,10 @@ module Infoboxer
|
|
38
38
|
# @!method compact
|
39
39
|
# Just like Array#compact, but returns Nodes
|
40
40
|
|
41
|
-
# @!method grep(pattern)
|
42
|
-
# Just like Array#grep, but returns Nodes
|
43
|
-
|
44
|
-
# @!method grep_v(pattern)
|
45
|
-
# Just like Array#grep_v, but returns Nodes
|
46
|
-
|
47
41
|
# @!method -(other)
|
48
42
|
# Just like Array#-, but returns Nodes
|
49
43
|
|
50
|
-
|
51
|
-
# Just like Array#+, but returns Nodes
|
52
|
-
|
53
|
-
%i[select reject sort_by flatten compact grep grep_v - +].each do |sym|
|
44
|
+
%i[select reject sort_by flatten compact -].each do |sym|
|
54
45
|
define_method(sym) do |*args, &block|
|
55
46
|
Nodes[*super(*args, &block)]
|
56
47
|
end
|
@@ -84,21 +75,6 @@ module Infoboxer
|
|
84
75
|
end
|
85
76
|
end
|
86
77
|
|
87
|
-
# Just like Array#flat_map, but returns Nodes, **if** all map results are Node
|
88
|
-
def flat_map
|
89
|
-
res = super
|
90
|
-
if res.all? { |n| n.is_a?(Node) || n.is_a?(Nodes) }
|
91
|
-
Nodes[*res]
|
92
|
-
else
|
93
|
-
res
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
# Just like Array#group, but returns hash with `{<grouping variable> => Nodes}`
|
98
|
-
def group_by
|
99
|
-
super.map { |title, group| [title, Nodes[*group]] }.to_h
|
100
|
-
end
|
101
|
-
|
102
78
|
# @!method prev_siblings
|
103
79
|
# Previous siblings (flat list) of all nodes inside.
|
104
80
|
|
@@ -163,14 +139,12 @@ module Infoboxer
|
|
163
139
|
# @return [Nodes<MediaWiki::Page>] It is still `Nodes`, so you
|
164
140
|
# still can process them uniformely.
|
165
141
|
def follow
|
166
|
-
links =
|
142
|
+
links = select { |n| n.respond_to?(:link) }.map(&:link)
|
167
143
|
return Nodes[] if links.empty?
|
168
144
|
page = first.lookup_parents(MediaWiki::Page).first or
|
169
145
|
fail('Not in a page from real source')
|
170
146
|
page.client or fail('MediaWiki client not set')
|
171
|
-
|
172
|
-
.flat_map { |iw, ls| page.client.get(*ls.map(&:link), interwiki: iw) }
|
173
|
-
pages.count == 1 ? pages.first : Nodes[*pages]
|
147
|
+
page.client.get(*links)
|
174
148
|
end
|
175
149
|
|
176
150
|
# @private
|
@@ -199,9 +173,7 @@ module Infoboxer
|
|
199
173
|
# @private
|
200
174
|
# Internal, used by {Parser}
|
201
175
|
def flow_templates
|
202
|
-
|
203
|
-
# make_nodes(map { |n| n.is_a?(Paragraph) ? n.to_templates? : n })
|
204
|
-
self
|
176
|
+
make_nodes(map { |n| n.is_a?(Paragraph) ? n.to_templates? : n })
|
205
177
|
end
|
206
178
|
|
207
179
|
private
|
@@ -22,10 +22,6 @@ module Infoboxer
|
|
22
22
|
false
|
23
23
|
end
|
24
24
|
|
25
|
-
def named?
|
26
|
-
name !~ /^\d+$/
|
27
|
-
end
|
28
|
-
|
29
25
|
protected
|
30
26
|
|
31
27
|
def descr
|
@@ -143,7 +139,7 @@ module Infoboxer
|
|
143
139
|
#
|
144
140
|
# @return [Nodes<Var>]
|
145
141
|
def unnamed_variables
|
146
|
-
variables.
|
142
|
+
variables.find(name: /^\d+$/)
|
147
143
|
end
|
148
144
|
|
149
145
|
# Fetches template variable(s) by name(s) or patterns.
|
@@ -246,7 +242,7 @@ module Infoboxer
|
|
246
242
|
def extract_params(vars)
|
247
243
|
vars
|
248
244
|
.select { |v| v.children.count == 1 && v.children.first.is_a?(Text) }
|
249
|
-
.map { |v| [v.name
|
245
|
+
.map { |v| [v.name, v.children.first.raw_text] }.to_h
|
250
246
|
end
|
251
247
|
|
252
248
|
def inspect_variables(depth)
|
data/lib/infoboxer/tree/text.rb
CHANGED
@@ -12,23 +12,14 @@ module Infoboxer
|
|
12
12
|
# Note, that Wikilink is {Linkable}, so you can {Linkable#follow #follow}
|
13
13
|
# it to obtain linked pages.
|
14
14
|
class Wikilink < Link
|
15
|
-
def initialize(
|
16
|
-
super
|
17
|
-
|
18
|
-
@interwiki = interwiki
|
19
|
-
parse_name!
|
15
|
+
def initialize(*)
|
16
|
+
super
|
17
|
+
parse_link!
|
20
18
|
end
|
21
19
|
|
22
20
|
# "Clean" wikilink name, for ex., `Cities` for `[Category:Cities]`
|
23
21
|
attr_reader :name
|
24
22
|
|
25
|
-
# Interwiki identifier. For example, `[[wikt:Argentina]]`
|
26
|
-
# will have `"Argentina"` as its {#name} and `"wikt"` (wiktionary) as an
|
27
|
-
# interwiki. TODO: how to use it.
|
28
|
-
#
|
29
|
-
# See [Wikipedia docs](https://en.wikipedia.org/wiki/Help:Interwiki_linking) for details.
|
30
|
-
attr_reader :interwiki
|
31
|
-
|
32
23
|
# Wikilink namespace, `Category` for `[Category:Cities]`, empty
|
33
24
|
# string (not `nil`!) for just `[Cities]`
|
34
25
|
attr_reader :namespace
|
@@ -55,8 +46,10 @@ module Infoboxer
|
|
55
46
|
|
56
47
|
private
|
57
48
|
|
58
|
-
def
|
59
|
-
@name
|
49
|
+
def parse_link!
|
50
|
+
@name, @namespace = link.split(':', 2).reverse
|
51
|
+
@namespace ||= ''
|
52
|
+
|
60
53
|
@name, @anchor = @name.split('#', 2)
|
61
54
|
@anchor ||= ''
|
62
55
|
|
data/lib/infoboxer/version.rb
CHANGED
data/lib/infoboxer/wiki_path.rb
CHANGED
@@ -36,7 +36,7 @@ module Infoboxer
|
|
36
36
|
attrs[attr.to_sym] = process_value(value)
|
37
37
|
end
|
38
38
|
res = op == '//' ? {op: :lookup} : {}
|
39
|
-
res[:type] =
|
39
|
+
res[:type] = type.gsub(/(?:^|_)([a-z])/, &:upcase).tr('_', '').to_sym unless type.empty?
|
40
40
|
res.merge(attrs) # TODO: raise if empty selector
|
41
41
|
end
|
42
42
|
|
@@ -51,15 +51,6 @@ module Infoboxer
|
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
54
|
-
def process_type(type)
|
55
|
-
type.gsub(/(?:^|_)([a-z])/, &:upcase).tr('_', '').to_sym
|
56
|
-
.tap { |t| valid_type?(t) or fail(ParseError, "Unrecognized node type: #{type}") }
|
57
|
-
end
|
58
|
-
|
59
|
-
def valid_type?(t)
|
60
|
-
t == :Section || Infoboxer::Tree.const_defined?(t)
|
61
|
-
end
|
62
|
-
|
63
54
|
def unexpected(scanner, expected)
|
64
55
|
place = scanner.eos? ? 'end of pattern' : scanner.rest.inspect
|
65
56
|
fail ParseError, "Unexpected #{place}, expecting #{expected}"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: infoboxer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.0
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Shepelev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-07-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -117,7 +117,6 @@ files:
|
|
117
117
|
- lib/infoboxer/tree.rb
|
118
118
|
- lib/infoboxer/tree/compound.rb
|
119
119
|
- lib/infoboxer/tree/document.rb
|
120
|
-
- lib/infoboxer/tree/gallery.rb
|
121
120
|
- lib/infoboxer/tree/html.rb
|
122
121
|
- lib/infoboxer/tree/image.rb
|
123
122
|
- lib/infoboxer/tree/inline.rb
|
@@ -166,9 +165,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
166
165
|
version: 2.1.0
|
167
166
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
168
167
|
requirements:
|
169
|
-
- - "
|
168
|
+
- - ">="
|
170
169
|
- !ruby/object:Gem::Version
|
171
|
-
version:
|
170
|
+
version: '0'
|
172
171
|
requirements: []
|
173
172
|
rubyforge_project:
|
174
173
|
rubygems_version: 2.6.10
|
@@ -1,12 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module Infoboxer
|
4
|
-
module Tree
|
5
|
-
# Represents gallery of images (contents of `<gallery>` special tag).
|
6
|
-
#
|
7
|
-
# See [Wikipedia Tutorial](https://en.wikipedia.org/wiki/Help:Gallery_tag)
|
8
|
-
# for explanation of attributes.
|
9
|
-
class Gallery < Compound
|
10
|
-
end
|
11
|
-
end
|
12
|
-
end
|