infoboxer 0.2.7 → 0.2.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +1 -0
- data/CHANGELOG.md +6 -0
- data/bin/infoboxer +11 -12
- data/infoboxer.gemspec +3 -2
- data/lib/infoboxer/core_ext.rb +1 -0
- data/lib/infoboxer/definitions/en.wikipedia.org.rb +13 -13
- data/lib/infoboxer/media_wiki/page.rb +4 -3
- data/lib/infoboxer/media_wiki/traits.rb +12 -10
- data/lib/infoboxer/media_wiki.rb +97 -68
- data/lib/infoboxer/navigation/lookup.rb +30 -26
- data/lib/infoboxer/navigation/sections.rb +33 -37
- data/lib/infoboxer/navigation/selector.rb +5 -6
- data/lib/infoboxer/navigation/shortcuts.rb +12 -11
- data/lib/infoboxer/navigation.rb +2 -1
- data/lib/infoboxer/parser/context.rb +12 -13
- data/lib/infoboxer/parser/html.rb +7 -6
- data/lib/infoboxer/parser/image.rb +25 -29
- data/lib/infoboxer/parser/inline.rb +82 -79
- data/lib/infoboxer/parser/paragraphs.rb +34 -37
- data/lib/infoboxer/parser/table.rb +26 -27
- data/lib/infoboxer/parser/template.rb +12 -4
- data/lib/infoboxer/parser/util.rb +11 -16
- data/lib/infoboxer/parser.rb +8 -1
- data/lib/infoboxer/templates/base.rb +3 -3
- data/lib/infoboxer/templates/set.rb +11 -10
- data/lib/infoboxer/tree/compound.rb +7 -6
- data/lib/infoboxer/tree/document.rb +1 -0
- data/lib/infoboxer/tree/html.rb +5 -4
- data/lib/infoboxer/tree/image.rb +8 -7
- data/lib/infoboxer/tree/inline.rb +4 -5
- data/lib/infoboxer/tree/linkable.rb +3 -5
- data/lib/infoboxer/tree/list.rb +15 -16
- data/lib/infoboxer/tree/node.rb +11 -10
- data/lib/infoboxer/tree/nodes.rb +24 -23
- data/lib/infoboxer/tree/paragraphs.rb +3 -2
- data/lib/infoboxer/tree/ref.rb +6 -3
- data/lib/infoboxer/tree/table.rb +13 -13
- data/lib/infoboxer/tree/template.rb +15 -15
- data/lib/infoboxer/tree/text.rb +2 -1
- data/lib/infoboxer/tree/wikilink.rb +9 -8
- data/lib/infoboxer/tree.rb +3 -2
- data/lib/infoboxer/version.rb +2 -1
- data/lib/infoboxer.rb +24 -26
- data/regression/pages/wyoming.wiki +1085 -0
- metadata +8 -21
- data/lib/infoboxer/media_wiki/mediawiktory_patch.rb +0 -23
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ef407b6160d6d0ff8cfb874338320532d1b18906
|
4
|
+
data.tar.gz: fdc4ccf4b051c50958e6cc9720619ea5110b5ea5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d5224104b02115c47f27c69926128023da3765d1d7089241190a6b3f5c8c3de79be91f65276c82536c671424d10000e2f5198413b3aa21e9802ccc9ca41fbdef
|
7
|
+
data.tar.gz: 0f1f8d9ee3a4e94d6529208a9678bcb0915a884525433d2397249afada141d96c88c74e0546ddd0576adffcc2785f96ad1da57d61284b84d96661fb49c0fd46d
|
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
# Infoboxer's change log
|
2
2
|
|
3
|
+
## 0.2.8 (2017-05-11)
|
4
|
+
|
5
|
+
* Switch to MediaWiktory 0.1.0 + some subsequent cleanup of internal logic;
|
6
|
+
* Additional `prop:` param for `MediaWiki#get`, `#get_h`, and `#raw`, allowing to fetch arbitrary
|
7
|
+
page properties.
|
8
|
+
|
3
9
|
## 0.2.7 (2016-09-18)
|
4
10
|
|
5
11
|
* Fix `Math` node rendering to text (#68);
|
data/bin/infoboxer
CHANGED
@@ -9,9 +9,9 @@ require 'optparse'
|
|
9
9
|
wiki_url = nil
|
10
10
|
|
11
11
|
OptionParser.new do |opts|
|
12
|
-
opts.banner =
|
12
|
+
opts.banner = 'Usage: infoboxer [-w wiki_api_url]'
|
13
13
|
|
14
|
-
opts.on(
|
14
|
+
opts.on('-w', '--wiki WIKI_API_URL',
|
15
15
|
"Make wiki by WIKI_API_URL a default wiki, and use it with just get('Pagename')") do |w|
|
16
16
|
wiki_url = w
|
17
17
|
end
|
@@ -20,19 +20,19 @@ end.parse!
|
|
20
20
|
if wiki_url
|
21
21
|
if wiki_url =~ /^[a-z]+$/
|
22
22
|
wiki_url = case
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
23
|
+
when domain = Infoboxer::WIKIMEDIA_PROJECTS[wiki_url.to_sym]
|
24
|
+
"https://en.#{domain}/w/api.php"
|
25
|
+
when domain = Infoboxer::WIKIMEDIA_PROJECTS[('w' + wiki_url).to_sym]
|
26
|
+
"https://en.#{domain}/w/api.php"
|
27
|
+
else
|
28
|
+
fail("Unidentified wiki: #{wiki_url}")
|
29
|
+
end
|
30
30
|
end
|
31
|
-
|
31
|
+
|
32
32
|
DEFAULT_WIKI = Infoboxer.wiki(wiki_url)
|
33
33
|
puts "Default Wiki selected: #{wiki_url}.\nNow you can use `get('Pagename')`, `category('Categoryname')` and so on.\n\n"
|
34
34
|
[:raw, :get, :get_h, :category, :search, :prefixsearch].each do |m|
|
35
|
-
define_method(m){|*arg|
|
35
|
+
define_method(m) { |*arg|
|
36
36
|
DEFAULT_WIKI.send(m, *arg)
|
37
37
|
}
|
38
38
|
end
|
@@ -41,4 +41,3 @@ end
|
|
41
41
|
require 'irb'
|
42
42
|
ARGV.shift until ARGV.empty?
|
43
43
|
IRB.start
|
44
|
-
|
data/infoboxer.gemspec
CHANGED
@@ -14,6 +14,8 @@ Gem::Specification.new do |s|
|
|
14
14
|
EOF
|
15
15
|
s.licenses = ['MIT']
|
16
16
|
|
17
|
+
s.required_ruby_version = '>= 2.1.0'
|
18
|
+
|
17
19
|
s.files = `git ls-files`.split($RS).reject do |file|
|
18
20
|
file =~ /^(?:
|
19
21
|
spec\/.*
|
@@ -31,8 +33,7 @@ Gem::Specification.new do |s|
|
|
31
33
|
|
32
34
|
s.add_dependency 'htmlentities'
|
33
35
|
s.add_dependency 'procme'
|
34
|
-
s.add_dependency 'mediawiktory', '>= 0.0
|
36
|
+
s.add_dependency 'mediawiktory', '>= 0.1.0'
|
35
37
|
s.add_dependency 'addressable'
|
36
38
|
s.add_dependency 'terminal-table'
|
37
|
-
s.add_dependency 'backports'
|
38
39
|
end
|
data/lib/infoboxer/core_ext.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
+
|
2
3
|
module Infoboxer
|
3
4
|
MediaWiki::Traits.for('en.wikipedia.org') do
|
4
5
|
templates do
|
@@ -38,7 +39,7 @@ module Infoboxer
|
|
38
39
|
'Asterisk' => '*',
|
39
40
|
'Colon' => ':',
|
40
41
|
'Em dash' => '—',
|
41
|
-
'Gc' =>
|
42
|
+
'Gc' => '†',
|
42
43
|
'Ibeam' => 'I',
|
43
44
|
'Long dash' => ' ——— ',
|
44
45
|
'Nbhyph' => '‑',
|
@@ -64,12 +65,12 @@ module Infoboxer
|
|
64
65
|
'Break' => "\n", # FIXME: in fact, break has optional parameter "how many breaks"
|
65
66
|
'Crlf' => "\n", # FIXME: in fact, alias for break, should have DSL syntax for it!
|
66
67
|
'Crlf2' => "\n",
|
67
|
-
|
68
|
+
|
68
69
|
)
|
69
70
|
show(
|
70
71
|
'Allow wrap',
|
71
72
|
'Nowrap',
|
72
|
-
|
73
|
+
'j', 'nobr', 'nobreak', # aliases for Nowrap
|
73
74
|
'nowraplinks',
|
74
75
|
)
|
75
76
|
# inflow_template('Normalwraplink') # TODO: tricky
|
@@ -168,7 +169,7 @@ module Infoboxer
|
|
168
169
|
'lime', 'green', 'aqua (color)', 'cyan', 'teal', 'blue', 'navy (color)',
|
169
170
|
'purple', 'fuchsia', 'magenta'
|
170
171
|
)
|
171
|
-
|
172
|
+
|
172
173
|
# Some most popular templates, without categorical splitting
|
173
174
|
# https://en.wikipedia.org/wiki/Wikipedia:Database_reports/Templates_transcluded_on_the_most_pages
|
174
175
|
# ------------------------------------------------------------------------------------------------
|
@@ -190,7 +191,7 @@ module Infoboxer
|
|
190
191
|
template 'Coord' do
|
191
192
|
def model
|
192
193
|
@model ||= begin
|
193
|
-
npos = lookup_children(text: /^N|S$/).first.index rescue nil
|
194
|
+
npos = lookup_children(text: /^N|S$/).first.index rescue nil # rubocop:disable Style/RescueModifier
|
194
195
|
case npos
|
195
196
|
when 1
|
196
197
|
:decimal
|
@@ -235,12 +236,11 @@ module Infoboxer
|
|
235
236
|
end
|
236
237
|
|
237
238
|
ALLOW_BETWEEN = ['-;', '–',
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
]
|
239
|
+
'and', '&', 'and(-)', ', and',
|
240
|
+
'or', ', or',
|
241
|
+
'to', 'to(-)', 'to about',
|
242
|
+
'+/-', '±', '+',
|
243
|
+
'by', 'x', '×', 'x',].freeze
|
244
244
|
|
245
245
|
def between
|
246
246
|
ALLOW_BETWEEN.include?(fetch('2').text) ? fetch('2').text : nil
|
@@ -253,11 +253,11 @@ module Infoboxer
|
|
253
253
|
def measure_from
|
254
254
|
between ? fetch('4').text : fetch('2').text
|
255
255
|
end
|
256
|
-
|
256
|
+
|
257
257
|
def measure_to
|
258
258
|
between ? fetch('5').text : fetch('3').text
|
259
259
|
end
|
260
|
-
|
260
|
+
|
261
261
|
def text
|
262
262
|
[value1, between, value2, measure_from].compact.join(' ')
|
263
263
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
+
|
2
3
|
module Infoboxer
|
3
4
|
class MediaWiki
|
4
5
|
# A descendant of {Tree::Document Document}, representing page,
|
@@ -9,7 +10,7 @@ module Infoboxer
|
|
9
10
|
class Page < Tree::Document
|
10
11
|
def initialize(client, children, source)
|
11
12
|
@client, @source = client, source
|
12
|
-
super(children, title: source
|
13
|
+
super(children, title: source['title'], url: source['fullurl'])
|
13
14
|
end
|
14
15
|
|
15
16
|
# Instance of {MediaWiki} which this page was received from
|
@@ -36,10 +37,10 @@ module Infoboxer
|
|
36
37
|
|
37
38
|
private
|
38
39
|
|
39
|
-
PARAMS_TO_INSPECT = [
|
40
|
+
PARAMS_TO_INSPECT = %i[url title].freeze
|
40
41
|
|
41
42
|
def show_params
|
42
|
-
super(params.select{|k,
|
43
|
+
super(params.select { |k, _v| PARAMS_TO_INSPECT.include?(k) })
|
43
44
|
end
|
44
45
|
end
|
45
46
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
+
|
2
3
|
module Infoboxer
|
3
4
|
class MediaWiki
|
4
5
|
# DSL for defining "traits" for some site.
|
@@ -19,7 +20,7 @@ module Infoboxer
|
|
19
20
|
@templates ||= Templates::Set.new
|
20
21
|
|
21
22
|
return @templates unless definition
|
22
|
-
|
23
|
+
|
23
24
|
@templates.define(&definition)
|
24
25
|
end
|
25
26
|
|
@@ -59,7 +60,7 @@ module Infoboxer
|
|
59
60
|
# [English Wikipedia traits](https://github.com/molybdenum-99/infoboxer/blob/master/lib/infoboxer/definitions/en.wikipedia.org.rb)
|
60
61
|
# for example implementation.
|
61
62
|
def for(domain, &block)
|
62
|
-
Traits.domains[domain].tap{|c| c && c.instance_eval(&block)} ||
|
63
|
+
Traits.domains[domain].tap { |c| c && c.instance_eval(&block) } ||
|
63
64
|
Class.new(self, &block).domain(domain)
|
64
65
|
end
|
65
66
|
|
@@ -69,10 +70,12 @@ module Infoboxer
|
|
69
70
|
|
70
71
|
def initialize(options = {})
|
71
72
|
@options = options
|
72
|
-
@file_namespace =
|
73
|
-
|
74
|
-
|
75
|
-
|
73
|
+
@file_namespace =
|
74
|
+
[DEFAULTS[:file_namespace], namespace_aliases(options, 'File')]
|
75
|
+
.flatten.compact.uniq
|
76
|
+
@category_namespace =
|
77
|
+
[DEFAULTS[:category_namespace], namespace_aliases(options, 'Category')]
|
78
|
+
.flatten.compact.uniq
|
76
79
|
end
|
77
80
|
|
78
81
|
# @private
|
@@ -86,16 +89,15 @@ module Infoboxer
|
|
86
89
|
private
|
87
90
|
|
88
91
|
def namespace_aliases(options, canonical)
|
89
|
-
namespace = (options[:namespaces] || []).detect{|v| v
|
92
|
+
namespace = (options[:namespaces] || []).detect { |v| v['canonical'] == canonical }
|
90
93
|
return nil unless namespace
|
91
|
-
[namespace['*'], *namespace
|
94
|
+
[namespace['*'], *namespace['aliases']]
|
92
95
|
end
|
93
96
|
|
94
97
|
DEFAULTS = {
|
95
98
|
file_namespace: 'File',
|
96
99
|
category_namespace: 'Category'
|
97
|
-
}
|
98
|
-
|
100
|
+
}.freeze
|
99
101
|
end
|
100
102
|
end
|
101
103
|
end
|
data/lib/infoboxer/media_wiki.rb
CHANGED
@@ -1,10 +1,8 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
-
|
3
|
-
#require 'json'
|
2
|
+
|
4
3
|
require 'mediawiktory'
|
5
4
|
require 'addressable/uri'
|
6
5
|
|
7
|
-
require_relative 'media_wiki/mediawiktory_patch'
|
8
6
|
require_relative 'media_wiki/traits'
|
9
7
|
require_relative 'media_wiki/page'
|
10
8
|
|
@@ -14,7 +12,8 @@ module Infoboxer
|
|
14
12
|
# Usage:
|
15
13
|
#
|
16
14
|
# ```ruby
|
17
|
-
# client = Infoboxer::MediaWiki
|
15
|
+
# client = Infoboxer::MediaWiki
|
16
|
+
# .new('http://en.wikipedia.org/w/api.php', user_agent: 'My Own Project')
|
18
17
|
# page = client.get('Argentina')
|
19
18
|
# ```
|
20
19
|
#
|
@@ -27,7 +26,8 @@ module Infoboxer
|
|
27
26
|
#
|
28
27
|
# You can set yours as an option to {Infoboxer.wiki} and its shortcuts,
|
29
28
|
# or to {#initialize}
|
30
|
-
UA = "Infoboxer/#{Infoboxer::VERSION}
|
29
|
+
UA = "Infoboxer/#{Infoboxer::VERSION} "\
|
30
|
+
'(https://github.com/molybdenum-99/infoboxer; zverok.offline@gmail.com)'.freeze
|
31
31
|
|
32
32
|
class << self
|
33
33
|
# User agent getter/setter.
|
@@ -35,9 +35,12 @@ module Infoboxer
|
|
35
35
|
# Default value is {UA}.
|
36
36
|
#
|
37
37
|
# You can also use per-instance option, see {#initialize}
|
38
|
+
#
|
39
|
+
# @return [String]
|
38
40
|
attr_accessor :user_agent
|
39
41
|
end
|
40
42
|
|
43
|
+
# @private
|
41
44
|
attr_reader :api_base_url, :traits
|
42
45
|
|
43
46
|
# Creating new MediaWiki client. {Infoboxer.wiki} provides shortcut
|
@@ -51,28 +54,44 @@ module Infoboxer
|
|
51
54
|
# * `:user_agent` (also aliased as `:ua`) -- custom User-Agent header.
|
52
55
|
def initialize(api_base_url, options = {})
|
53
56
|
@api_base_url = Addressable::URI.parse(api_base_url)
|
54
|
-
@client = MediaWiktory::
|
57
|
+
@client = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(options))
|
55
58
|
@traits = Traits.get(@api_base_url.host, namespaces: extract_namespaces)
|
56
59
|
end
|
57
60
|
|
58
61
|
# Receive "raw" data from Wikipedia (without parsing or wrapping in
|
59
62
|
# classes).
|
60
63
|
#
|
61
|
-
# @
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
}
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
64
|
+
# @param titles [Array<String>] List of page titles to get.
|
65
|
+
# @param prop [Array<Symbol>] List of additional page properties to get, refer to
|
66
|
+
# [MediaWiktory::Actions::Query#prop](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query#prop-instance_method)
|
67
|
+
# for the list of available properties.
|
68
|
+
#
|
69
|
+
# @return [Hash{String => Hash}] Hash of `{requested title => raw MediaWiki object}`. Note that
|
70
|
+
# even missing (does not exist in current Wiki) or invalid (impossible title) still be present
|
71
|
+
# in response, just will have `"missing"` or `"invalid"` key, just like MediaWiki returns them.
|
72
|
+
def raw(*titles, prop: [])
|
73
|
+
# could emerge on "automatically" created page lists, should work
|
74
|
+
return {} if titles.empty?
|
75
|
+
|
76
|
+
titles.each_slice(50).map do |part|
|
77
|
+
response = @client
|
78
|
+
.query
|
79
|
+
.titles(*part)
|
80
|
+
.prop(:revisions, :info, *prop).prop(:content, :timestamp, :url)
|
81
|
+
.redirects
|
82
|
+
.response
|
83
|
+
|
84
|
+
sources = response['pages'].values.map { |page| [page['title'], page] }.to_h
|
85
|
+
redirects =
|
86
|
+
if response['redirects']
|
87
|
+
response['redirects'].map { |r| [r['from'], sources[r['to']]] }.to_h
|
88
|
+
else
|
89
|
+
{}
|
90
|
+
end
|
91
|
+
|
92
|
+
# This way for 'Einstein' query we'll have {'Albert Einstein' => page, 'Einstein' => same page}
|
93
|
+
sources.merge(redirects)
|
94
|
+
end.inject(:merge)
|
76
95
|
end
|
77
96
|
|
78
97
|
# Receive list of parsed MediaWiki pages for list of titles provided.
|
@@ -83,7 +102,12 @@ module Infoboxer
|
|
83
102
|
# many queries as necessary to extract them all (it will be like
|
84
103
|
# `(titles.count / 50.0).ceil` requests)
|
85
104
|
#
|
86
|
-
# @
|
105
|
+
# @param titles [Array<String>] List of page titles to get.
|
106
|
+
# @param prop [Array<Symbol>] List of additional page properties to get, refer to
|
107
|
+
# [MediaWiktory::Actions::Query#prop](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query#prop-instance_method)
|
108
|
+
# for the list of available properties.
|
109
|
+
#
|
110
|
+
# @return [Page, Tree::Nodes<Page>] array of parsed pages. Notes:
|
87
111
|
# * if you call `get` with only one title, one page will be
|
88
112
|
# returned instead of an array
|
89
113
|
# * if some of pages are not in wiki, they will not be returned,
|
@@ -96,22 +120,15 @@ module Infoboxer
|
|
96
120
|
# Infoboxer.wp.get('Argentina', 'Chile', 'Something non-existing').
|
97
121
|
# infobox.fetch('some value')
|
98
122
|
# ```
|
99
|
-
# and obtain meaningful results instead of NoMethodError or
|
100
|
-
#
|
101
|
-
#
|
102
|
-
def get(*titles)
|
103
|
-
pages =
|
104
|
-
tap{|pages| pages.detect(&:invalid?).tap{|i| i && fail(i.raw.invalidreason)}}.
|
105
|
-
select(&:exists?).
|
106
|
-
map{|raw|
|
107
|
-
Page.new(self,
|
108
|
-
Parser.paragraphs(raw.content, traits),
|
109
|
-
raw)
|
110
|
-
}
|
123
|
+
# and obtain meaningful results instead of `NoMethodError` or
|
124
|
+
# `SomethingNotFound`.
|
125
|
+
#
|
126
|
+
def get(*titles, prop: [])
|
127
|
+
pages = get_h(*titles, prop: prop).values.compact
|
111
128
|
titles.count == 1 ? pages.first : Tree::Nodes[*pages]
|
112
129
|
end
|
113
130
|
|
114
|
-
# Same as {#get}, but returns hash of {requested title => page}
|
131
|
+
# Same as {#get}, but returns hash of `{requested title => page}`.
|
115
132
|
#
|
116
133
|
# Useful quirks:
|
117
134
|
# * when requested page not existing, key will be still present in
|
@@ -123,13 +140,18 @@ module Infoboxer
|
|
123
140
|
# This allows you to be in full control of what pages of large list
|
124
141
|
# you've received.
|
125
142
|
#
|
143
|
+
# @param titles [Array<String>] List of page titles to get.
|
144
|
+
# @param prop [Array<Symbol>] List of additional page properties to get, refer to
|
145
|
+
# [MediaWiktory::Actions::Query#prop](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query#prop-instance_method)
|
146
|
+
# for the list of available properties.
|
147
|
+
#
|
126
148
|
# @return [Hash<String, Page>]
|
127
149
|
#
|
128
|
-
def get_h(*titles)
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
}.to_h
|
150
|
+
def get_h(*titles, prop: [])
|
151
|
+
raw_pages = raw(*titles, prop: prop)
|
152
|
+
.tap { |ps| ps.detect { |_, p| p['invalid'] }.tap { |_, i| i && fail(i['invalidreason']) } }
|
153
|
+
.reject { |_, p| p.key?('missing') }
|
154
|
+
titles.map { |title| [title, make_page(raw_pages, title)] }.to_h
|
133
155
|
end
|
134
156
|
|
135
157
|
# Receive list of parsed MediaWiki pages from specified category.
|
@@ -139,8 +161,8 @@ module Infoboxer
|
|
139
161
|
# fetched in 50-page batches, then parsed. So, for large category
|
140
162
|
# it can really take a while to fetch all pages.
|
141
163
|
#
|
142
|
-
# @param title Category title. You can use namespaceless title (like
|
143
|
-
# `"Countries in South America"`), title with namespace (like
|
164
|
+
# @param title [String] Category title. You can use namespaceless title (like
|
165
|
+
# `"Countries in South America"`), title with namespace (like
|
144
166
|
# `"Category:Countries in South America"`) or title with local
|
145
167
|
# namespace (like `"Catégorie:Argentine"` for French Wikipedia)
|
146
168
|
#
|
@@ -148,8 +170,8 @@ module Infoboxer
|
|
148
170
|
#
|
149
171
|
def category(title)
|
150
172
|
title = normalize_category_title(title)
|
151
|
-
|
152
|
-
list(categorymembers
|
173
|
+
|
174
|
+
list(@client.query.generator(:categorymembers).title(title).limit('max'))
|
153
175
|
end
|
154
176
|
|
155
177
|
# Receive list of parsed MediaWiki pages for provided search query.
|
@@ -158,10 +180,10 @@ module Infoboxer
|
|
158
180
|
#
|
159
181
|
# **NB**: currently, this API **always** fetches all pages from
|
160
182
|
# category, there is no option to "take first 20 pages". Pages are
|
161
|
-
# fetched in 50-page batches, then parsed. So, for large
|
183
|
+
# fetched in 50-page batches, then parsed. So, for large search query
|
162
184
|
# it can really take a while to fetch all pages.
|
163
185
|
#
|
164
|
-
# @param query Search query. For old installations, look at
|
186
|
+
# @param query [String] Search query. For old installations, look at
|
165
187
|
# https://www.mediawiki.org/wiki/Help:Searching
|
166
188
|
# for search syntax. For new ones (including Wikipedia), see at
|
167
189
|
# https://www.mediawiki.org/wiki/Help:CirrusSearch.
|
@@ -169,7 +191,7 @@ module Infoboxer
|
|
169
191
|
# @return [Tree::Nodes<Page>] array of parsed pages.
|
170
192
|
#
|
171
193
|
def search(query)
|
172
|
-
list(
|
194
|
+
list(@client.query.generator(:search).search(query).limit('max'))
|
173
195
|
end
|
174
196
|
|
175
197
|
# Receive list of parsed MediaWiki pages with titles startin from prefix.
|
@@ -178,38 +200,44 @@ module Infoboxer
|
|
178
200
|
#
|
179
201
|
# **NB**: currently, this API **always** fetches all pages from
|
180
202
|
# category, there is no option to "take first 20 pages". Pages are
|
181
|
-
# fetched in 50-page batches, then parsed. So, for large
|
203
|
+
# fetched in 50-page batches, then parsed. So, for large search query
|
182
204
|
# it can really take a while to fetch all pages.
|
183
205
|
#
|
184
|
-
# @param prefix
|
206
|
+
# @param prefix [String] Page title prefix.
|
185
207
|
#
|
186
208
|
# @return [Tree::Nodes<Page>] array of parsed pages.
|
187
209
|
#
|
188
210
|
def prefixsearch(prefix)
|
189
|
-
list(prefixsearch
|
211
|
+
list(@client.query.generator(:prefixsearch).search(prefix).limit('max'))
|
190
212
|
end
|
191
213
|
|
214
|
+
# @return [String]
|
192
215
|
def inspect
|
193
216
|
"#<#{self.class}(#{@api_base_url.host})>"
|
194
217
|
end
|
195
218
|
|
196
219
|
private
|
197
220
|
|
221
|
+
def make_page(raw_pages, title)
|
222
|
+
_, source = raw_pages.detect { |ptitle, _| ptitle.casecmp(title).zero? }
|
223
|
+
source or return nil
|
224
|
+
Page.new(self, Parser.paragraphs(source['revisions'].first['*'], traits), source)
|
225
|
+
end
|
226
|
+
|
198
227
|
def list(query)
|
199
|
-
response =
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
228
|
+
response = query
|
229
|
+
.prop(:revisions, :info)
|
230
|
+
.prop(:content, :timestamp, :url)
|
231
|
+
.redirects
|
232
|
+
.response
|
204
233
|
|
205
|
-
response.continue
|
234
|
+
response = response.continue while response.continue?
|
206
235
|
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
}
|
236
|
+
return Tree::Nodes[] if response['pages'].nil?
|
237
|
+
|
238
|
+
pages = response['pages']
|
239
|
+
.values.select { |p| p['missing'].nil? }
|
240
|
+
.map { |raw| Page.new(self, Parser.paragraphs(raw['revisions'].first['*'], traits), raw) }
|
213
241
|
|
214
242
|
Tree::Nodes[*pages]
|
215
243
|
end
|
@@ -218,7 +246,7 @@ module Infoboxer
|
|
218
246
|
# FIXME: shouldn't it go to MediaWiktory?..
|
219
247
|
namespace, titl = title.include?(':') ? title.split(':', 2) : [nil, title]
|
220
248
|
namespace, titl = nil, title unless traits.category_namespace.include?(namespace)
|
221
|
-
|
249
|
+
|
222
250
|
namespace ||= traits.category_namespace.first
|
223
251
|
[namespace, titl].join(':')
|
224
252
|
end
|
@@ -228,11 +256,12 @@ module Infoboxer
|
|
228
256
|
end
|
229
257
|
|
230
258
|
def extract_namespaces
|
231
|
-
siteinfo = @client.query.meta(siteinfo
|
232
|
-
siteinfo
|
233
|
-
aliases =
|
234
|
-
|
235
|
-
|
259
|
+
siteinfo = @client.query.meta(:siteinfo).prop(:namespaces, :namespacealiases).response
|
260
|
+
siteinfo['namespaces'].map do |_, namespace|
|
261
|
+
aliases =
|
262
|
+
siteinfo['namespacealiases'].select { |a| a['id'] == namespace['id'] }.map { |a| a['*'] }
|
263
|
+
namespace.merge('aliases' => aliases)
|
264
|
+
end
|
236
265
|
end
|
237
266
|
end
|
238
267
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
+
|
2
3
|
require_relative 'selector'
|
3
4
|
|
4
5
|
module Infoboxer
|
@@ -104,8 +105,8 @@ module Infoboxer
|
|
104
105
|
|
105
106
|
# Underscored version of {#lookup}
|
106
107
|
def _lookup(selector)
|
107
|
-
Tree::Nodes[_matches?(selector) ? self : nil, *children._lookup(selector)]
|
108
|
-
flatten.compact
|
108
|
+
Tree::Nodes[_matches?(selector) ? self : nil, *children._lookup(selector)]
|
109
|
+
.flatten.compact
|
109
110
|
end
|
110
111
|
|
111
112
|
# Underscored version of {#lookup_children}
|
@@ -139,20 +140,23 @@ module Infoboxer
|
|
139
140
|
def _lookup_next_siblings(selector)
|
140
141
|
next_siblings._find(selector)
|
141
142
|
end
|
142
|
-
|
143
|
-
[
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
143
|
+
|
144
|
+
%i[
|
145
|
+
matches?
|
146
|
+
lookup lookup_children lookup_parents
|
147
|
+
lookup_siblings
|
148
|
+
lookup_next_siblings lookup_prev_siblings
|
149
|
+
]
|
150
|
+
.map { |sym| [sym, :"_#{sym}"] }
|
151
|
+
.each do |sym, underscored|
|
152
|
+
|
153
|
+
define_method(sym) do |*args, &block|
|
150
154
|
send(underscored, Selector.new(*args, &block))
|
151
|
-
|
155
|
+
end
|
152
156
|
end
|
153
157
|
|
154
158
|
# Checks if node has any parent matching selectors.
|
155
|
-
def
|
159
|
+
def parent?(*selectors, &block)
|
156
160
|
!lookup_parents(*selectors, &block).empty?
|
157
161
|
end
|
158
162
|
end
|
@@ -181,7 +185,7 @@ module Infoboxer
|
|
181
185
|
|
182
186
|
# Underscored version of {#find}.
|
183
187
|
def _find(selector)
|
184
|
-
select{|n| n._matches?(selector)}
|
188
|
+
select { |n| n._matches?(selector) }
|
185
189
|
end
|
186
190
|
|
187
191
|
# Selects nodes of current list (and only it, no children checks),
|
@@ -190,25 +194,25 @@ module Infoboxer
|
|
190
194
|
_find(Selector.new(*selectors, &block))
|
191
195
|
end
|
192
196
|
|
193
|
-
[
|
194
|
-
|
195
|
-
|
197
|
+
%i[
|
198
|
+
_lookup _lookup_children _lookup_parents
|
199
|
+
_lookup_siblings _lookup_prev_siblings _lookup_next_siblings
|
196
200
|
].each do |sym|
|
197
|
-
define_method(sym)
|
198
|
-
make_nodes
|
199
|
-
|
201
|
+
define_method(sym) do |*args|
|
202
|
+
make_nodes(map { |n| n.send(sym, *args) })
|
203
|
+
end
|
200
204
|
end
|
201
205
|
|
202
206
|
# not delegate, but redefine: Selector should be constructed only once
|
203
|
-
[
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
].map{|sym| [sym, :"_#{sym}"]}.each do |sym, underscored|
|
207
|
+
%i[
|
208
|
+
lookup lookup_children lookup_parents
|
209
|
+
lookup_siblings
|
210
|
+
lookup_next_siblings lookup_prev_siblings
|
211
|
+
].map { |sym| [sym, :"_#{sym}"] }.each do |sym, underscored|
|
208
212
|
|
209
|
-
define_method(sym)
|
213
|
+
define_method(sym) do |*args, &block|
|
210
214
|
send(underscored, Selector.new(*args, &block))
|
211
|
-
|
215
|
+
end
|
212
216
|
end
|
213
217
|
end
|
214
218
|
end
|