infoboxer 0.2.7 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +1 -0
- data/CHANGELOG.md +6 -0
- data/bin/infoboxer +11 -12
- data/infoboxer.gemspec +3 -2
- data/lib/infoboxer/core_ext.rb +1 -0
- data/lib/infoboxer/definitions/en.wikipedia.org.rb +13 -13
- data/lib/infoboxer/media_wiki/page.rb +4 -3
- data/lib/infoboxer/media_wiki/traits.rb +12 -10
- data/lib/infoboxer/media_wiki.rb +97 -68
- data/lib/infoboxer/navigation/lookup.rb +30 -26
- data/lib/infoboxer/navigation/sections.rb +33 -37
- data/lib/infoboxer/navigation/selector.rb +5 -6
- data/lib/infoboxer/navigation/shortcuts.rb +12 -11
- data/lib/infoboxer/navigation.rb +2 -1
- data/lib/infoboxer/parser/context.rb +12 -13
- data/lib/infoboxer/parser/html.rb +7 -6
- data/lib/infoboxer/parser/image.rb +25 -29
- data/lib/infoboxer/parser/inline.rb +82 -79
- data/lib/infoboxer/parser/paragraphs.rb +34 -37
- data/lib/infoboxer/parser/table.rb +26 -27
- data/lib/infoboxer/parser/template.rb +12 -4
- data/lib/infoboxer/parser/util.rb +11 -16
- data/lib/infoboxer/parser.rb +8 -1
- data/lib/infoboxer/templates/base.rb +3 -3
- data/lib/infoboxer/templates/set.rb +11 -10
- data/lib/infoboxer/tree/compound.rb +7 -6
- data/lib/infoboxer/tree/document.rb +1 -0
- data/lib/infoboxer/tree/html.rb +5 -4
- data/lib/infoboxer/tree/image.rb +8 -7
- data/lib/infoboxer/tree/inline.rb +4 -5
- data/lib/infoboxer/tree/linkable.rb +3 -5
- data/lib/infoboxer/tree/list.rb +15 -16
- data/lib/infoboxer/tree/node.rb +11 -10
- data/lib/infoboxer/tree/nodes.rb +24 -23
- data/lib/infoboxer/tree/paragraphs.rb +3 -2
- data/lib/infoboxer/tree/ref.rb +6 -3
- data/lib/infoboxer/tree/table.rb +13 -13
- data/lib/infoboxer/tree/template.rb +15 -15
- data/lib/infoboxer/tree/text.rb +2 -1
- data/lib/infoboxer/tree/wikilink.rb +9 -8
- data/lib/infoboxer/tree.rb +3 -2
- data/lib/infoboxer/version.rb +2 -1
- data/lib/infoboxer.rb +24 -26
- data/regression/pages/wyoming.wiki +1085 -0
- metadata +8 -21
- data/lib/infoboxer/media_wiki/mediawiktory_patch.rb +0 -23
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ef407b6160d6d0ff8cfb874338320532d1b18906
|
4
|
+
data.tar.gz: fdc4ccf4b051c50958e6cc9720619ea5110b5ea5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d5224104b02115c47f27c69926128023da3765d1d7089241190a6b3f5c8c3de79be91f65276c82536c671424d10000e2f5198413b3aa21e9802ccc9ca41fbdef
|
7
|
+
data.tar.gz: 0f1f8d9ee3a4e94d6529208a9678bcb0915a884525433d2397249afada141d96c88c74e0546ddd0576adffcc2785f96ad1da57d61284b84d96661fb49c0fd46d
|
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
# Infoboxer's change log
|
2
2
|
|
3
|
+
## 0.2.8 (2017-05-11)
|
4
|
+
|
5
|
+
* Switch to MediaWiktory 0.1.0 + some subsequent cleanup of internal logic;
|
6
|
+
* Additional `prop:` param for `MediaWiki#get`, `#get_h`, and `#raw`, allowing to fetch arbitrary
|
7
|
+
page properties.
|
8
|
+
|
3
9
|
## 0.2.7 (2016-09-18)
|
4
10
|
|
5
11
|
* Fix `Math` node rendering to text (#68);
|
data/bin/infoboxer
CHANGED
@@ -9,9 +9,9 @@ require 'optparse'
|
|
9
9
|
wiki_url = nil
|
10
10
|
|
11
11
|
OptionParser.new do |opts|
|
12
|
-
opts.banner =
|
12
|
+
opts.banner = 'Usage: infoboxer [-w wiki_api_url]'
|
13
13
|
|
14
|
-
opts.on(
|
14
|
+
opts.on('-w', '--wiki WIKI_API_URL',
|
15
15
|
"Make wiki by WIKI_API_URL a default wiki, and use it with just get('Pagename')") do |w|
|
16
16
|
wiki_url = w
|
17
17
|
end
|
@@ -20,19 +20,19 @@ end.parse!
|
|
20
20
|
if wiki_url
|
21
21
|
if wiki_url =~ /^[a-z]+$/
|
22
22
|
wiki_url = case
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
23
|
+
when domain = Infoboxer::WIKIMEDIA_PROJECTS[wiki_url.to_sym]
|
24
|
+
"https://en.#{domain}/w/api.php"
|
25
|
+
when domain = Infoboxer::WIKIMEDIA_PROJECTS[('w' + wiki_url).to_sym]
|
26
|
+
"https://en.#{domain}/w/api.php"
|
27
|
+
else
|
28
|
+
fail("Unidentified wiki: #{wiki_url}")
|
29
|
+
end
|
30
30
|
end
|
31
|
-
|
31
|
+
|
32
32
|
DEFAULT_WIKI = Infoboxer.wiki(wiki_url)
|
33
33
|
puts "Default Wiki selected: #{wiki_url}.\nNow you can use `get('Pagename')`, `category('Categoryname')` and so on.\n\n"
|
34
34
|
[:raw, :get, :get_h, :category, :search, :prefixsearch].each do |m|
|
35
|
-
define_method(m){|*arg|
|
35
|
+
define_method(m) { |*arg|
|
36
36
|
DEFAULT_WIKI.send(m, *arg)
|
37
37
|
}
|
38
38
|
end
|
@@ -41,4 +41,3 @@ end
|
|
41
41
|
require 'irb'
|
42
42
|
ARGV.shift until ARGV.empty?
|
43
43
|
IRB.start
|
44
|
-
|
data/infoboxer.gemspec
CHANGED
@@ -14,6 +14,8 @@ Gem::Specification.new do |s|
|
|
14
14
|
EOF
|
15
15
|
s.licenses = ['MIT']
|
16
16
|
|
17
|
+
s.required_ruby_version = '>= 2.1.0'
|
18
|
+
|
17
19
|
s.files = `git ls-files`.split($RS).reject do |file|
|
18
20
|
file =~ /^(?:
|
19
21
|
spec\/.*
|
@@ -31,8 +33,7 @@ Gem::Specification.new do |s|
|
|
31
33
|
|
32
34
|
s.add_dependency 'htmlentities'
|
33
35
|
s.add_dependency 'procme'
|
34
|
-
s.add_dependency 'mediawiktory', '>= 0.0
|
36
|
+
s.add_dependency 'mediawiktory', '>= 0.1.0'
|
35
37
|
s.add_dependency 'addressable'
|
36
38
|
s.add_dependency 'terminal-table'
|
37
|
-
s.add_dependency 'backports'
|
38
39
|
end
|
data/lib/infoboxer/core_ext.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
+
|
2
3
|
module Infoboxer
|
3
4
|
MediaWiki::Traits.for('en.wikipedia.org') do
|
4
5
|
templates do
|
@@ -38,7 +39,7 @@ module Infoboxer
|
|
38
39
|
'Asterisk' => '*',
|
39
40
|
'Colon' => ':',
|
40
41
|
'Em dash' => '—',
|
41
|
-
'Gc' =>
|
42
|
+
'Gc' => '†',
|
42
43
|
'Ibeam' => 'I',
|
43
44
|
'Long dash' => ' ——— ',
|
44
45
|
'Nbhyph' => '‑',
|
@@ -64,12 +65,12 @@ module Infoboxer
|
|
64
65
|
'Break' => "\n", # FIXME: in fact, break has optional parameter "how many breaks"
|
65
66
|
'Crlf' => "\n", # FIXME: in fact, alias for break, should have DSL syntax for it!
|
66
67
|
'Crlf2' => "\n",
|
67
|
-
|
68
|
+
|
68
69
|
)
|
69
70
|
show(
|
70
71
|
'Allow wrap',
|
71
72
|
'Nowrap',
|
72
|
-
|
73
|
+
'j', 'nobr', 'nobreak', # aliases for Nowrap
|
73
74
|
'nowraplinks',
|
74
75
|
)
|
75
76
|
# inflow_template('Normalwraplink') # TODO: tricky
|
@@ -168,7 +169,7 @@ module Infoboxer
|
|
168
169
|
'lime', 'green', 'aqua (color)', 'cyan', 'teal', 'blue', 'navy (color)',
|
169
170
|
'purple', 'fuchsia', 'magenta'
|
170
171
|
)
|
171
|
-
|
172
|
+
|
172
173
|
# Some most popular templates, without categorical splitting
|
173
174
|
# https://en.wikipedia.org/wiki/Wikipedia:Database_reports/Templates_transcluded_on_the_most_pages
|
174
175
|
# ------------------------------------------------------------------------------------------------
|
@@ -190,7 +191,7 @@ module Infoboxer
|
|
190
191
|
template 'Coord' do
|
191
192
|
def model
|
192
193
|
@model ||= begin
|
193
|
-
npos = lookup_children(text: /^N|S$/).first.index rescue nil
|
194
|
+
npos = lookup_children(text: /^N|S$/).first.index rescue nil # rubocop:disable Style/RescueModifier
|
194
195
|
case npos
|
195
196
|
when 1
|
196
197
|
:decimal
|
@@ -235,12 +236,11 @@ module Infoboxer
|
|
235
236
|
end
|
236
237
|
|
237
238
|
ALLOW_BETWEEN = ['-;', '–',
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
]
|
239
|
+
'and', '&', 'and(-)', ', and',
|
240
|
+
'or', ', or',
|
241
|
+
'to', 'to(-)', 'to about',
|
242
|
+
'+/-', '±', '+',
|
243
|
+
'by', 'x', '×', 'x',].freeze
|
244
244
|
|
245
245
|
def between
|
246
246
|
ALLOW_BETWEEN.include?(fetch('2').text) ? fetch('2').text : nil
|
@@ -253,11 +253,11 @@ module Infoboxer
|
|
253
253
|
def measure_from
|
254
254
|
between ? fetch('4').text : fetch('2').text
|
255
255
|
end
|
256
|
-
|
256
|
+
|
257
257
|
def measure_to
|
258
258
|
between ? fetch('5').text : fetch('3').text
|
259
259
|
end
|
260
|
-
|
260
|
+
|
261
261
|
def text
|
262
262
|
[value1, between, value2, measure_from].compact.join(' ')
|
263
263
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
+
|
2
3
|
module Infoboxer
|
3
4
|
class MediaWiki
|
4
5
|
# A descendant of {Tree::Document Document}, representing page,
|
@@ -9,7 +10,7 @@ module Infoboxer
|
|
9
10
|
class Page < Tree::Document
|
10
11
|
def initialize(client, children, source)
|
11
12
|
@client, @source = client, source
|
12
|
-
super(children, title: source
|
13
|
+
super(children, title: source['title'], url: source['fullurl'])
|
13
14
|
end
|
14
15
|
|
15
16
|
# Instance of {MediaWiki} which this page was received from
|
@@ -36,10 +37,10 @@ module Infoboxer
|
|
36
37
|
|
37
38
|
private
|
38
39
|
|
39
|
-
PARAMS_TO_INSPECT = [
|
40
|
+
PARAMS_TO_INSPECT = %i[url title].freeze
|
40
41
|
|
41
42
|
def show_params
|
42
|
-
super(params.select{|k,
|
43
|
+
super(params.select { |k, _v| PARAMS_TO_INSPECT.include?(k) })
|
43
44
|
end
|
44
45
|
end
|
45
46
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
+
|
2
3
|
module Infoboxer
|
3
4
|
class MediaWiki
|
4
5
|
# DSL for defining "traits" for some site.
|
@@ -19,7 +20,7 @@ module Infoboxer
|
|
19
20
|
@templates ||= Templates::Set.new
|
20
21
|
|
21
22
|
return @templates unless definition
|
22
|
-
|
23
|
+
|
23
24
|
@templates.define(&definition)
|
24
25
|
end
|
25
26
|
|
@@ -59,7 +60,7 @@ module Infoboxer
|
|
59
60
|
# [English Wikipedia traits](https://github.com/molybdenum-99/infoboxer/blob/master/lib/infoboxer/definitions/en.wikipedia.org.rb)
|
60
61
|
# for example implementation.
|
61
62
|
def for(domain, &block)
|
62
|
-
Traits.domains[domain].tap{|c| c && c.instance_eval(&block)} ||
|
63
|
+
Traits.domains[domain].tap { |c| c && c.instance_eval(&block) } ||
|
63
64
|
Class.new(self, &block).domain(domain)
|
64
65
|
end
|
65
66
|
|
@@ -69,10 +70,12 @@ module Infoboxer
|
|
69
70
|
|
70
71
|
def initialize(options = {})
|
71
72
|
@options = options
|
72
|
-
@file_namespace =
|
73
|
-
|
74
|
-
|
75
|
-
|
73
|
+
@file_namespace =
|
74
|
+
[DEFAULTS[:file_namespace], namespace_aliases(options, 'File')]
|
75
|
+
.flatten.compact.uniq
|
76
|
+
@category_namespace =
|
77
|
+
[DEFAULTS[:category_namespace], namespace_aliases(options, 'Category')]
|
78
|
+
.flatten.compact.uniq
|
76
79
|
end
|
77
80
|
|
78
81
|
# @private
|
@@ -86,16 +89,15 @@ module Infoboxer
|
|
86
89
|
private
|
87
90
|
|
88
91
|
def namespace_aliases(options, canonical)
|
89
|
-
namespace = (options[:namespaces] || []).detect{|v| v
|
92
|
+
namespace = (options[:namespaces] || []).detect { |v| v['canonical'] == canonical }
|
90
93
|
return nil unless namespace
|
91
|
-
[namespace['*'], *namespace
|
94
|
+
[namespace['*'], *namespace['aliases']]
|
92
95
|
end
|
93
96
|
|
94
97
|
DEFAULTS = {
|
95
98
|
file_namespace: 'File',
|
96
99
|
category_namespace: 'Category'
|
97
|
-
}
|
98
|
-
|
100
|
+
}.freeze
|
99
101
|
end
|
100
102
|
end
|
101
103
|
end
|
data/lib/infoboxer/media_wiki.rb
CHANGED
@@ -1,10 +1,8 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
-
|
3
|
-
#require 'json'
|
2
|
+
|
4
3
|
require 'mediawiktory'
|
5
4
|
require 'addressable/uri'
|
6
5
|
|
7
|
-
require_relative 'media_wiki/mediawiktory_patch'
|
8
6
|
require_relative 'media_wiki/traits'
|
9
7
|
require_relative 'media_wiki/page'
|
10
8
|
|
@@ -14,7 +12,8 @@ module Infoboxer
|
|
14
12
|
# Usage:
|
15
13
|
#
|
16
14
|
# ```ruby
|
17
|
-
# client = Infoboxer::MediaWiki
|
15
|
+
# client = Infoboxer::MediaWiki
|
16
|
+
# .new('http://en.wikipedia.org/w/api.php', user_agent: 'My Own Project')
|
18
17
|
# page = client.get('Argentina')
|
19
18
|
# ```
|
20
19
|
#
|
@@ -27,7 +26,8 @@ module Infoboxer
|
|
27
26
|
#
|
28
27
|
# You can set yours as an option to {Infoboxer.wiki} and its shortcuts,
|
29
28
|
# or to {#initialize}
|
30
|
-
UA = "Infoboxer/#{Infoboxer::VERSION}
|
29
|
+
UA = "Infoboxer/#{Infoboxer::VERSION} "\
|
30
|
+
'(https://github.com/molybdenum-99/infoboxer; zverok.offline@gmail.com)'.freeze
|
31
31
|
|
32
32
|
class << self
|
33
33
|
# User agent getter/setter.
|
@@ -35,9 +35,12 @@ module Infoboxer
|
|
35
35
|
# Default value is {UA}.
|
36
36
|
#
|
37
37
|
# You can also use per-instance option, see {#initialize}
|
38
|
+
#
|
39
|
+
# @return [String]
|
38
40
|
attr_accessor :user_agent
|
39
41
|
end
|
40
42
|
|
43
|
+
# @private
|
41
44
|
attr_reader :api_base_url, :traits
|
42
45
|
|
43
46
|
# Creating new MediaWiki client. {Infoboxer.wiki} provides shortcut
|
@@ -51,28 +54,44 @@ module Infoboxer
|
|
51
54
|
# * `:user_agent` (also aliased as `:ua`) -- custom User-Agent header.
|
52
55
|
def initialize(api_base_url, options = {})
|
53
56
|
@api_base_url = Addressable::URI.parse(api_base_url)
|
54
|
-
@client = MediaWiktory::
|
57
|
+
@client = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(options))
|
55
58
|
@traits = Traits.get(@api_base_url.host, namespaces: extract_namespaces)
|
56
59
|
end
|
57
60
|
|
58
61
|
# Receive "raw" data from Wikipedia (without parsing or wrapping in
|
59
62
|
# classes).
|
60
63
|
#
|
61
|
-
# @
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
}
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
64
|
+
# @param titles [Array<String>] List of page titles to get.
|
65
|
+
# @param prop [Array<Symbol>] List of additional page properties to get, refer to
|
66
|
+
# [MediaWiktory::Actions::Query#prop](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query#prop-instance_method)
|
67
|
+
# for the list of available properties.
|
68
|
+
#
|
69
|
+
# @return [Hash{String => Hash}] Hash of `{requested title => raw MediaWiki object}`. Note that
|
70
|
+
# even missing (does not exist in current Wiki) or invalid (impossible title) still be present
|
71
|
+
# in response, just will have `"missing"` or `"invalid"` key, just like MediaWiki returns them.
|
72
|
+
def raw(*titles, prop: [])
|
73
|
+
# could emerge on "automatically" created page lists, should work
|
74
|
+
return {} if titles.empty?
|
75
|
+
|
76
|
+
titles.each_slice(50).map do |part|
|
77
|
+
response = @client
|
78
|
+
.query
|
79
|
+
.titles(*part)
|
80
|
+
.prop(:revisions, :info, *prop).prop(:content, :timestamp, :url)
|
81
|
+
.redirects
|
82
|
+
.response
|
83
|
+
|
84
|
+
sources = response['pages'].values.map { |page| [page['title'], page] }.to_h
|
85
|
+
redirects =
|
86
|
+
if response['redirects']
|
87
|
+
response['redirects'].map { |r| [r['from'], sources[r['to']]] }.to_h
|
88
|
+
else
|
89
|
+
{}
|
90
|
+
end
|
91
|
+
|
92
|
+
# This way for 'Einstein' query we'll have {'Albert Einstein' => page, 'Einstein' => same page}
|
93
|
+
sources.merge(redirects)
|
94
|
+
end.inject(:merge)
|
76
95
|
end
|
77
96
|
|
78
97
|
# Receive list of parsed MediaWiki pages for list of titles provided.
|
@@ -83,7 +102,12 @@ module Infoboxer
|
|
83
102
|
# many queries as necessary to extract them all (it will be like
|
84
103
|
# `(titles.count / 50.0).ceil` requests)
|
85
104
|
#
|
86
|
-
# @
|
105
|
+
# @param titles [Array<String>] List of page titles to get.
|
106
|
+
# @param prop [Array<Symbol>] List of additional page properties to get, refer to
|
107
|
+
# [MediaWiktory::Actions::Query#prop](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query#prop-instance_method)
|
108
|
+
# for the list of available properties.
|
109
|
+
#
|
110
|
+
# @return [Page, Tree::Nodes<Page>] array of parsed pages. Notes:
|
87
111
|
# * if you call `get` with only one title, one page will be
|
88
112
|
# returned instead of an array
|
89
113
|
# * if some of pages are not in wiki, they will not be returned,
|
@@ -96,22 +120,15 @@ module Infoboxer
|
|
96
120
|
# Infoboxer.wp.get('Argentina', 'Chile', 'Something non-existing').
|
97
121
|
# infobox.fetch('some value')
|
98
122
|
# ```
|
99
|
-
# and obtain meaningful results instead of NoMethodError or
|
100
|
-
#
|
101
|
-
#
|
102
|
-
def get(*titles)
|
103
|
-
pages =
|
104
|
-
tap{|pages| pages.detect(&:invalid?).tap{|i| i && fail(i.raw.invalidreason)}}.
|
105
|
-
select(&:exists?).
|
106
|
-
map{|raw|
|
107
|
-
Page.new(self,
|
108
|
-
Parser.paragraphs(raw.content, traits),
|
109
|
-
raw)
|
110
|
-
}
|
123
|
+
# and obtain meaningful results instead of `NoMethodError` or
|
124
|
+
# `SomethingNotFound`.
|
125
|
+
#
|
126
|
+
def get(*titles, prop: [])
|
127
|
+
pages = get_h(*titles, prop: prop).values.compact
|
111
128
|
titles.count == 1 ? pages.first : Tree::Nodes[*pages]
|
112
129
|
end
|
113
130
|
|
114
|
-
# Same as {#get}, but returns hash of {requested title => page}
|
131
|
+
# Same as {#get}, but returns hash of `{requested title => page}`.
|
115
132
|
#
|
116
133
|
# Useful quirks:
|
117
134
|
# * when requested page not existing, key will be still present in
|
@@ -123,13 +140,18 @@ module Infoboxer
|
|
123
140
|
# This allows you to be in full control of what pages of large list
|
124
141
|
# you've received.
|
125
142
|
#
|
143
|
+
# @param titles [Array<String>] List of page titles to get.
|
144
|
+
# @param prop [Array<Symbol>] List of additional page properties to get, refer to
|
145
|
+
# [MediaWiktory::Actions::Query#prop](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query#prop-instance_method)
|
146
|
+
# for the list of available properties.
|
147
|
+
#
|
126
148
|
# @return [Hash<String, Page>]
|
127
149
|
#
|
128
|
-
def get_h(*titles)
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
}.to_h
|
150
|
+
def get_h(*titles, prop: [])
|
151
|
+
raw_pages = raw(*titles, prop: prop)
|
152
|
+
.tap { |ps| ps.detect { |_, p| p['invalid'] }.tap { |_, i| i && fail(i['invalidreason']) } }
|
153
|
+
.reject { |_, p| p.key?('missing') }
|
154
|
+
titles.map { |title| [title, make_page(raw_pages, title)] }.to_h
|
133
155
|
end
|
134
156
|
|
135
157
|
# Receive list of parsed MediaWiki pages from specified category.
|
@@ -139,8 +161,8 @@ module Infoboxer
|
|
139
161
|
# fetched in 50-page batches, then parsed. So, for large category
|
140
162
|
# it can really take a while to fetch all pages.
|
141
163
|
#
|
142
|
-
# @param title Category title. You can use namespaceless title (like
|
143
|
-
# `"Countries in South America"`), title with namespace (like
|
164
|
+
# @param title [String] Category title. You can use namespaceless title (like
|
165
|
+
# `"Countries in South America"`), title with namespace (like
|
144
166
|
# `"Category:Countries in South America"`) or title with local
|
145
167
|
# namespace (like `"Catégorie:Argentine"` for French Wikipedia)
|
146
168
|
#
|
@@ -148,8 +170,8 @@ module Infoboxer
|
|
148
170
|
#
|
149
171
|
def category(title)
|
150
172
|
title = normalize_category_title(title)
|
151
|
-
|
152
|
-
list(categorymembers
|
173
|
+
|
174
|
+
list(@client.query.generator(:categorymembers).title(title).limit('max'))
|
153
175
|
end
|
154
176
|
|
155
177
|
# Receive list of parsed MediaWiki pages for provided search query.
|
@@ -158,10 +180,10 @@ module Infoboxer
|
|
158
180
|
#
|
159
181
|
# **NB**: currently, this API **always** fetches all pages from
|
160
182
|
# category, there is no option to "take first 20 pages". Pages are
|
161
|
-
# fetched in 50-page batches, then parsed. So, for large
|
183
|
+
# fetched in 50-page batches, then parsed. So, for large search query
|
162
184
|
# it can really take a while to fetch all pages.
|
163
185
|
#
|
164
|
-
# @param query Search query. For old installations, look at
|
186
|
+
# @param query [String] Search query. For old installations, look at
|
165
187
|
# https://www.mediawiki.org/wiki/Help:Searching
|
166
188
|
# for search syntax. For new ones (including Wikipedia), see at
|
167
189
|
# https://www.mediawiki.org/wiki/Help:CirrusSearch.
|
@@ -169,7 +191,7 @@ module Infoboxer
|
|
169
191
|
# @return [Tree::Nodes<Page>] array of parsed pages.
|
170
192
|
#
|
171
193
|
def search(query)
|
172
|
-
list(
|
194
|
+
list(@client.query.generator(:search).search(query).limit('max'))
|
173
195
|
end
|
174
196
|
|
175
197
|
# Receive list of parsed MediaWiki pages with titles startin from prefix.
|
@@ -178,38 +200,44 @@ module Infoboxer
|
|
178
200
|
#
|
179
201
|
# **NB**: currently, this API **always** fetches all pages from
|
180
202
|
# category, there is no option to "take first 20 pages". Pages are
|
181
|
-
# fetched in 50-page batches, then parsed. So, for large
|
203
|
+
# fetched in 50-page batches, then parsed. So, for large search query
|
182
204
|
# it can really take a while to fetch all pages.
|
183
205
|
#
|
184
|
-
# @param prefix
|
206
|
+
# @param prefix [String] Page title prefix.
|
185
207
|
#
|
186
208
|
# @return [Tree::Nodes<Page>] array of parsed pages.
|
187
209
|
#
|
188
210
|
def prefixsearch(prefix)
|
189
|
-
list(prefixsearch
|
211
|
+
list(@client.query.generator(:prefixsearch).search(prefix).limit('max'))
|
190
212
|
end
|
191
213
|
|
214
|
+
# @return [String]
|
192
215
|
def inspect
|
193
216
|
"#<#{self.class}(#{@api_base_url.host})>"
|
194
217
|
end
|
195
218
|
|
196
219
|
private
|
197
220
|
|
221
|
+
def make_page(raw_pages, title)
|
222
|
+
_, source = raw_pages.detect { |ptitle, _| ptitle.casecmp(title).zero? }
|
223
|
+
source or return nil
|
224
|
+
Page.new(self, Parser.paragraphs(source['revisions'].first['*'], traits), source)
|
225
|
+
end
|
226
|
+
|
198
227
|
def list(query)
|
199
|
-
response =
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
228
|
+
response = query
|
229
|
+
.prop(:revisions, :info)
|
230
|
+
.prop(:content, :timestamp, :url)
|
231
|
+
.redirects
|
232
|
+
.response
|
204
233
|
|
205
|
-
response.continue
|
234
|
+
response = response.continue while response.continue?
|
206
235
|
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
}
|
236
|
+
return Tree::Nodes[] if response['pages'].nil?
|
237
|
+
|
238
|
+
pages = response['pages']
|
239
|
+
.values.select { |p| p['missing'].nil? }
|
240
|
+
.map { |raw| Page.new(self, Parser.paragraphs(raw['revisions'].first['*'], traits), raw) }
|
213
241
|
|
214
242
|
Tree::Nodes[*pages]
|
215
243
|
end
|
@@ -218,7 +246,7 @@ module Infoboxer
|
|
218
246
|
# FIXME: shouldn't it go to MediaWiktory?..
|
219
247
|
namespace, titl = title.include?(':') ? title.split(':', 2) : [nil, title]
|
220
248
|
namespace, titl = nil, title unless traits.category_namespace.include?(namespace)
|
221
|
-
|
249
|
+
|
222
250
|
namespace ||= traits.category_namespace.first
|
223
251
|
[namespace, titl].join(':')
|
224
252
|
end
|
@@ -228,11 +256,12 @@ module Infoboxer
|
|
228
256
|
end
|
229
257
|
|
230
258
|
def extract_namespaces
|
231
|
-
siteinfo = @client.query.meta(siteinfo
|
232
|
-
siteinfo
|
233
|
-
aliases =
|
234
|
-
|
235
|
-
|
259
|
+
siteinfo = @client.query.meta(:siteinfo).prop(:namespaces, :namespacealiases).response
|
260
|
+
siteinfo['namespaces'].map do |_, namespace|
|
261
|
+
aliases =
|
262
|
+
siteinfo['namespacealiases'].select { |a| a['id'] == namespace['id'] }.map { |a| a['*'] }
|
263
|
+
namespace.merge('aliases' => aliases)
|
264
|
+
end
|
236
265
|
end
|
237
266
|
end
|
238
267
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
+
|
2
3
|
require_relative 'selector'
|
3
4
|
|
4
5
|
module Infoboxer
|
@@ -104,8 +105,8 @@ module Infoboxer
|
|
104
105
|
|
105
106
|
# Underscored version of {#lookup}
|
106
107
|
def _lookup(selector)
|
107
|
-
Tree::Nodes[_matches?(selector) ? self : nil, *children._lookup(selector)]
|
108
|
-
flatten.compact
|
108
|
+
Tree::Nodes[_matches?(selector) ? self : nil, *children._lookup(selector)]
|
109
|
+
.flatten.compact
|
109
110
|
end
|
110
111
|
|
111
112
|
# Underscored version of {#lookup_children}
|
@@ -139,20 +140,23 @@ module Infoboxer
|
|
139
140
|
def _lookup_next_siblings(selector)
|
140
141
|
next_siblings._find(selector)
|
141
142
|
end
|
142
|
-
|
143
|
-
[
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
143
|
+
|
144
|
+
%i[
|
145
|
+
matches?
|
146
|
+
lookup lookup_children lookup_parents
|
147
|
+
lookup_siblings
|
148
|
+
lookup_next_siblings lookup_prev_siblings
|
149
|
+
]
|
150
|
+
.map { |sym| [sym, :"_#{sym}"] }
|
151
|
+
.each do |sym, underscored|
|
152
|
+
|
153
|
+
define_method(sym) do |*args, &block|
|
150
154
|
send(underscored, Selector.new(*args, &block))
|
151
|
-
|
155
|
+
end
|
152
156
|
end
|
153
157
|
|
154
158
|
# Checks if node has any parent matching selectors.
|
155
|
-
def
|
159
|
+
def parent?(*selectors, &block)
|
156
160
|
!lookup_parents(*selectors, &block).empty?
|
157
161
|
end
|
158
162
|
end
|
@@ -181,7 +185,7 @@ module Infoboxer
|
|
181
185
|
|
182
186
|
# Underscored version of {#find}.
|
183
187
|
def _find(selector)
|
184
|
-
select{|n| n._matches?(selector)}
|
188
|
+
select { |n| n._matches?(selector) }
|
185
189
|
end
|
186
190
|
|
187
191
|
# Selects nodes of current list (and only it, no children checks),
|
@@ -190,25 +194,25 @@ module Infoboxer
|
|
190
194
|
_find(Selector.new(*selectors, &block))
|
191
195
|
end
|
192
196
|
|
193
|
-
[
|
194
|
-
|
195
|
-
|
197
|
+
%i[
|
198
|
+
_lookup _lookup_children _lookup_parents
|
199
|
+
_lookup_siblings _lookup_prev_siblings _lookup_next_siblings
|
196
200
|
].each do |sym|
|
197
|
-
define_method(sym)
|
198
|
-
make_nodes
|
199
|
-
|
201
|
+
define_method(sym) do |*args|
|
202
|
+
make_nodes(map { |n| n.send(sym, *args) })
|
203
|
+
end
|
200
204
|
end
|
201
205
|
|
202
206
|
# not delegate, but redefine: Selector should be constructed only once
|
203
|
-
[
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
].map{|sym| [sym, :"_#{sym}"]}.each do |sym, underscored|
|
207
|
+
%i[
|
208
|
+
lookup lookup_children lookup_parents
|
209
|
+
lookup_siblings
|
210
|
+
lookup_next_siblings lookup_prev_siblings
|
211
|
+
].map { |sym| [sym, :"_#{sym}"] }.each do |sym, underscored|
|
208
212
|
|
209
|
-
define_method(sym)
|
213
|
+
define_method(sym) do |*args, &block|
|
210
214
|
send(underscored, Selector.new(*args, &block))
|
211
|
-
|
215
|
+
end
|
212
216
|
end
|
213
217
|
end
|
214
218
|
end
|