infoboxer 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/ci.yml +32 -0
- data/.rubocop_todo.yml +0 -15
- data/CHANGELOG.md +43 -0
- data/Gemfile.lock +172 -0
- data/README.md +1 -1
- data/infoboxer.gemspec +1 -1
- data/lib/infoboxer.rb +23 -11
- data/lib/infoboxer/core_ext.rb +1 -1
- data/lib/infoboxer/definitions/en.wikipedia.org.rb +3 -1
- data/lib/infoboxer/media_wiki.rb +83 -65
- data/lib/infoboxer/media_wiki/page.rb +10 -1
- data/lib/infoboxer/media_wiki/traits.rb +69 -22
- data/lib/infoboxer/navigation.rb +7 -1
- data/lib/infoboxer/navigation/lookup.rb +15 -7
- data/lib/infoboxer/navigation/sections.rb +27 -9
- data/lib/infoboxer/navigation/selector.rb +14 -6
- data/lib/infoboxer/navigation/shortcuts.rb +1 -1
- data/lib/infoboxer/navigation/wikipath.rb +1 -1
- data/lib/infoboxer/parser.rb +2 -2
- data/lib/infoboxer/parser/context.rb +23 -9
- data/lib/infoboxer/parser/html.rb +1 -1
- data/lib/infoboxer/parser/image.rb +2 -2
- data/lib/infoboxer/parser/inline.rb +50 -7
- data/lib/infoboxer/parser/paragraphs.rb +3 -3
- data/lib/infoboxer/parser/table.rb +33 -17
- data/lib/infoboxer/parser/template.rb +5 -4
- data/lib/infoboxer/parser/util.rb +2 -1
- data/lib/infoboxer/templates.rb +2 -0
- data/lib/infoboxer/templates/base.rb +2 -0
- data/lib/infoboxer/templates/set.rb +1 -1
- data/lib/infoboxer/tree.rb +2 -2
- data/lib/infoboxer/tree/compound.rb +3 -3
- data/lib/infoboxer/tree/document.rb +1 -1
- data/lib/infoboxer/tree/gallery.rb +12 -0
- data/lib/infoboxer/tree/html.rb +3 -3
- data/lib/infoboxer/tree/image.rb +4 -4
- data/lib/infoboxer/tree/inline.rb +3 -3
- data/lib/infoboxer/tree/linkable.rb +6 -1
- data/lib/infoboxer/tree/list.rb +4 -5
- data/lib/infoboxer/tree/math.rb +2 -3
- data/lib/infoboxer/tree/node.rb +4 -4
- data/lib/infoboxer/tree/nodes.rb +51 -7
- data/lib/infoboxer/tree/paragraphs.rb +1 -1
- data/lib/infoboxer/tree/ref.rb +1 -1
- data/lib/infoboxer/tree/table.rb +4 -4
- data/lib/infoboxer/tree/template.rb +18 -5
- data/lib/infoboxer/tree/text.rb +11 -11
- data/lib/infoboxer/tree/wikilink.rb +16 -8
- data/lib/infoboxer/version.rb +4 -3
- data/lib/infoboxer/wiki_path.rb +12 -1
- data/regression/pages/2012_bdo_world_darts_championship.wiki +941 -0
- data/regression/pages/progress_wrestling.wiki +1308 -0
- metadata +12 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 755e1283e896d2c2b363983a9b04ac92cb14870cfa2ab67fd62777684bac1352
|
4
|
+
data.tar.gz: c5443d788fc06a2310c65b80bc16531b820bb7142f611f5d063af8992cee7525
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1443ecf7dbb485555a275a4d5390417ee48fabb2321a23fb59c208d32c1424259515924e903872b762afebcaaf5574afd713e4eade6c346aeb8e71719c051436
|
7
|
+
data.tar.gz: 71c007554240c40a7e7f9bee81d66d637b8643f1ccf18a5900c24087545d3c430830d8a30d9c4463244b13419d4edb349aee585885b6aecf03dc8d8b37d701a7
|
@@ -0,0 +1,32 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches: [ master ]
|
6
|
+
pull_request:
|
7
|
+
branches: [ master ]
|
8
|
+
|
9
|
+
jobs:
|
10
|
+
main:
|
11
|
+
name: >-
|
12
|
+
${{ matrix.ruby }}
|
13
|
+
runs-on: ubuntu-latest
|
14
|
+
strategy:
|
15
|
+
fail-fast: false
|
16
|
+
matrix:
|
17
|
+
ruby: [ 2.6, 2.7, 3.0, head ]
|
18
|
+
|
19
|
+
steps:
|
20
|
+
- name: checkout
|
21
|
+
uses: actions/checkout@v2
|
22
|
+
- name: set up Ruby
|
23
|
+
uses: ruby/setup-ruby@v1
|
24
|
+
with:
|
25
|
+
ruby-version: ${{ matrix.ruby }}
|
26
|
+
|
27
|
+
- name: install dependencies
|
28
|
+
run: bundle install --jobs 3 --retry 3
|
29
|
+
- name: spec
|
30
|
+
run: bundle exec rake spec
|
31
|
+
- name: rubocop
|
32
|
+
run: bundle exec rake rubocop
|
data/.rubocop_todo.yml
CHANGED
@@ -1,16 +1 @@
|
|
1
|
-
# This configuration was generated by
|
2
|
-
# `rubocop --auto-gen-config`
|
3
|
-
# on 2017-06-23 13:52:16 +0300 using RuboCop version 0.49.1.
|
4
|
-
# The point is for the user to remove these configuration records
|
5
|
-
# one by one as the offenses are removed from the code base.
|
6
|
-
# Note that changes in the inspected code, or installation of new
|
7
|
-
# versions of RuboCop, may require this file to be generated again.
|
8
|
-
|
9
|
-
# Offense count: 1
|
10
|
-
Metrics/AbcSize:
|
11
|
-
Max: 29
|
12
|
-
|
13
|
-
# Offense count: 1
|
14
|
-
Metrics/PerceivedComplexity:
|
15
|
-
Max: 10
|
16
1
|
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,48 @@
|
|
1
1
|
# Infoboxer's change log
|
2
2
|
|
3
|
+
## 0.4.0 (2021-05-30)
|
4
|
+
|
5
|
+
* A cluster of bugs found in #81 fixed:
|
6
|
+
* Empty comment (`<!---->`) now processed properly;
|
7
|
+
* Templates that are implicitly inside tables (put on a separate row) now always create
|
8
|
+
an implicit `<TableCell>`
|
9
|
+
* Heading after non-closed table closes the table implicitly instead of being inserted
|
10
|
+
into the last cell.
|
11
|
+
* Drop Ruby < 2.6, and support 3.0 instead.
|
12
|
+
|
13
|
+
PS: Yeah, year-and-almost-half is much better than 2 years between releases, I guess.. And let's call
|
14
|
+
it non-patch version then.
|
15
|
+
|
16
|
+
## 0.3.3 (2020-02-09)
|
17
|
+
|
18
|
+
* Fixed table captions handling (thanks @robfors for reporting)
|
19
|
+
|
20
|
+
PS: Funny that this small bugfix release is exactly two years after the previous one :(
|
21
|
+
|
22
|
+
## 0.3.2 (2018-02-09)
|
23
|
+
|
24
|
+
* Updated MediaWiktory to finally turn on gzip encoding of responses;
|
25
|
+
* Utility methods to expose some internals (`MediaWiki#api`, `Infoboxer#url_for(:wikipedia)`,
|
26
|
+
`Page#namespaces`, `Template#named_variables` and so on);
|
27
|
+
* Fix parsing of lowercase `file:` links in `<gallery>`.
|
28
|
+
|
29
|
+
## 0.3.1 (2017-12-04)
|
30
|
+
|
31
|
+
* (Experimental) new representation of templates, much more readable;
|
32
|
+
* More access to querying process and underlying `MediaWiktory::Wikipedia::Query`;
|
33
|
+
* Finally, `limit` parameter for multi-page queries (category, search, prefixsearch).
|
34
|
+
|
35
|
+
## 0.3.1.pre (2017-09-16)
|
36
|
+
|
37
|
+
* Introduce interwiki links following (and proper handling of interwikis, in general);
|
38
|
+
* Add `<gallery>` tag support;
|
39
|
+
* Introduce `Navigation::Selector#===`;
|
40
|
+
* Much more `Enumerable`'s methods supported by `Nodes`;
|
41
|
+
* Lot of small simplifications, cleanups and bugfixes.
|
42
|
+
|
43
|
+
TBH, it should be 0.4.0 or more, but it would be a shame to change versions so fast :) So, at least
|
44
|
+
until it is `-pre`, let it be 0.3.1.
|
45
|
+
|
3
46
|
## 0.3.0 (2017-07-23)
|
4
47
|
|
5
48
|
* Change logic of navigation through templates; now templates contents aren't hidden from global
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,172 @@
|
|
1
|
+
GIT
|
2
|
+
remote: https://github.com/zverok/dokaz.git
|
3
|
+
revision: a8a6f0bbeab5589326fe2714cf89842b5f32b850
|
4
|
+
specs:
|
5
|
+
dokaz (0.0.4)
|
6
|
+
ansi
|
7
|
+
rouge
|
8
|
+
slop (~> 3)
|
9
|
+
|
10
|
+
PATH
|
11
|
+
remote: .
|
12
|
+
specs:
|
13
|
+
infoboxer (0.4.0)
|
14
|
+
addressable
|
15
|
+
htmlentities
|
16
|
+
mediawiktory (= 0.1.3)
|
17
|
+
terminal-table
|
18
|
+
|
19
|
+
GEM
|
20
|
+
remote: https://rubygems.org/
|
21
|
+
specs:
|
22
|
+
addressable (2.7.0)
|
23
|
+
public_suffix (>= 2.0.2, < 5.0)
|
24
|
+
ansi (1.5.0)
|
25
|
+
ast (2.4.2)
|
26
|
+
backports (3.21.0)
|
27
|
+
byebug (11.1.3)
|
28
|
+
coveralls (0.8.23)
|
29
|
+
json (>= 1.8, < 3)
|
30
|
+
simplecov (~> 0.16.1)
|
31
|
+
term-ansicolor (~> 1.3)
|
32
|
+
thor (>= 0.19.4, < 2.0)
|
33
|
+
tins (~> 1.6)
|
34
|
+
crack (0.4.5)
|
35
|
+
rexml
|
36
|
+
diff-lcs (1.4.4)
|
37
|
+
docile (1.4.0)
|
38
|
+
faraday (1.4.2)
|
39
|
+
faraday-em_http (~> 1.0)
|
40
|
+
faraday-em_synchrony (~> 1.0)
|
41
|
+
faraday-excon (~> 1.1)
|
42
|
+
faraday-net_http (~> 1.0)
|
43
|
+
faraday-net_http_persistent (~> 1.1)
|
44
|
+
multipart-post (>= 1.2, < 3)
|
45
|
+
ruby2_keywords (>= 0.0.4)
|
46
|
+
faraday-em_http (1.0.0)
|
47
|
+
faraday-em_synchrony (1.0.0)
|
48
|
+
faraday-excon (1.1.0)
|
49
|
+
faraday-net_http (1.0.1)
|
50
|
+
faraday-net_http_persistent (1.1.0)
|
51
|
+
faraday_middleware (1.0.0)
|
52
|
+
faraday (~> 1.0)
|
53
|
+
hashdiff (1.0.1)
|
54
|
+
hashie (4.1.0)
|
55
|
+
htmlentities (4.3.4)
|
56
|
+
io-console (0.5.9)
|
57
|
+
irb (1.3.5)
|
58
|
+
reline (>= 0.1.5)
|
59
|
+
json (2.5.1)
|
60
|
+
mediawiktory (0.1.3)
|
61
|
+
addressable
|
62
|
+
faraday
|
63
|
+
faraday_middleware
|
64
|
+
hashie
|
65
|
+
naught
|
66
|
+
nokogiri
|
67
|
+
multipart-post (2.1.1)
|
68
|
+
naught (1.1.0)
|
69
|
+
nokogiri (1.11.6-x86_64-linux)
|
70
|
+
racc (~> 1.4)
|
71
|
+
parallel (1.20.1)
|
72
|
+
parser (3.0.1.1)
|
73
|
+
ast (~> 2.4.1)
|
74
|
+
public_suffix (4.0.6)
|
75
|
+
racc (1.5.2)
|
76
|
+
rainbow (3.0.0)
|
77
|
+
rake (13.0.3)
|
78
|
+
redcarpet (3.5.1)
|
79
|
+
regexp_parser (2.1.1)
|
80
|
+
reline (0.2.5)
|
81
|
+
io-console (~> 0.5)
|
82
|
+
rexml (3.2.5)
|
83
|
+
rouge (3.26.0)
|
84
|
+
rspec (3.10.0)
|
85
|
+
rspec-core (~> 3.10.0)
|
86
|
+
rspec-expectations (~> 3.10.0)
|
87
|
+
rspec-mocks (~> 3.10.0)
|
88
|
+
rspec-core (3.10.1)
|
89
|
+
rspec-support (~> 3.10.0)
|
90
|
+
rspec-expectations (3.10.1)
|
91
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
92
|
+
rspec-support (~> 3.10.0)
|
93
|
+
rspec-its (1.3.0)
|
94
|
+
rspec-core (>= 3.0.0)
|
95
|
+
rspec-expectations (>= 3.0.0)
|
96
|
+
rspec-mocks (3.10.2)
|
97
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
98
|
+
rspec-support (~> 3.10.0)
|
99
|
+
rspec-support (3.10.2)
|
100
|
+
rubocop (1.15.0)
|
101
|
+
parallel (~> 1.10)
|
102
|
+
parser (>= 3.0.0.0)
|
103
|
+
rainbow (>= 2.2.2, < 4.0)
|
104
|
+
regexp_parser (>= 1.8, < 3.0)
|
105
|
+
rexml
|
106
|
+
rubocop-ast (>= 1.5.0, < 2.0)
|
107
|
+
ruby-progressbar (~> 1.7)
|
108
|
+
unicode-display_width (>= 1.4.0, < 3.0)
|
109
|
+
rubocop-ast (1.7.0)
|
110
|
+
parser (>= 3.0.1.1)
|
111
|
+
rubocop-rspec (2.3.0)
|
112
|
+
rubocop (~> 1.0)
|
113
|
+
rubocop-ast (>= 1.1.0)
|
114
|
+
ruby-prof (1.4.3)
|
115
|
+
ruby-progressbar (1.11.0)
|
116
|
+
ruby2_keywords (0.0.4)
|
117
|
+
rubygems-tasks (0.2.5)
|
118
|
+
irb (~> 1.0)
|
119
|
+
saharspec (0.0.4)
|
120
|
+
simplecov (0.16.1)
|
121
|
+
docile (~> 1.1)
|
122
|
+
json (>= 1.8, < 3)
|
123
|
+
simplecov-html (~> 0.10.0)
|
124
|
+
simplecov-html (0.10.2)
|
125
|
+
slop (3.6.0)
|
126
|
+
sync (0.5.0)
|
127
|
+
term-ansicolor (1.7.1)
|
128
|
+
tins (~> 1.0)
|
129
|
+
terminal-table (3.0.1)
|
130
|
+
unicode-display_width (>= 1.1.1, < 3)
|
131
|
+
thor (1.1.0)
|
132
|
+
timecop (0.9.4)
|
133
|
+
tins (1.29.1)
|
134
|
+
sync
|
135
|
+
unicode-display_width (2.0.0)
|
136
|
+
vcr (6.0.0)
|
137
|
+
webmock (3.13.0)
|
138
|
+
addressable (>= 2.3.6)
|
139
|
+
crack (>= 0.3.2)
|
140
|
+
hashdiff (>= 0.4.0, < 2.0.0)
|
141
|
+
yard (0.9.26)
|
142
|
+
yard-junk (0.0.9)
|
143
|
+
backports (>= 3.18)
|
144
|
+
rainbow
|
145
|
+
yard
|
146
|
+
|
147
|
+
PLATFORMS
|
148
|
+
ruby
|
149
|
+
x86_64-linux
|
150
|
+
|
151
|
+
DEPENDENCIES
|
152
|
+
byebug
|
153
|
+
coveralls
|
154
|
+
dokaz!
|
155
|
+
infoboxer!
|
156
|
+
rake
|
157
|
+
redcarpet
|
158
|
+
rspec (~> 3)
|
159
|
+
rspec-its (~> 1)
|
160
|
+
rubocop (~> 1.15.0)
|
161
|
+
rubocop-rspec (~> 2.3.0)
|
162
|
+
ruby-prof
|
163
|
+
rubygems-tasks
|
164
|
+
saharspec (= 0.0.4)
|
165
|
+
timecop
|
166
|
+
vcr
|
167
|
+
webmock
|
168
|
+
yard (~> 0.9)
|
169
|
+
yard-junk (~> 0.0.7)
|
170
|
+
|
171
|
+
BUNDLED WITH
|
172
|
+
2.2.0
|
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# Infoboxer
|
2
2
|
|
3
3
|
[](http://badge.fury.io/rb/infoboxer)
|
4
|
-
|
4
|
+

|
5
5
|
[](https://coveralls.io/github/molybdenum-99/infoboxer?branch=master)
|
6
6
|
[](https://codeclimate.com/github/molybdenum-99/infoboxer)
|
7
7
|
[](https://gitter.im/molybdenum-99/infoboxer)
|
data/infoboxer.gemspec
CHANGED
@@ -32,7 +32,7 @@ Gem::Specification.new do |s|
|
|
32
32
|
s.executables << 'infoboxer'
|
33
33
|
|
34
34
|
s.add_dependency 'htmlentities'
|
35
|
-
s.add_dependency 'mediawiktory', '
|
35
|
+
s.add_dependency 'mediawiktory', '= 0.1.3'
|
36
36
|
s.add_dependency 'addressable'
|
37
37
|
s.add_dependency 'terminal-table'
|
38
38
|
end
|
data/lib/infoboxer.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
# Main client module for entire infoboxer functionality. If you're lucky,
|
4
4
|
# there's no other classes/modules you need to instantiate or call
|
@@ -47,9 +47,8 @@
|
|
47
47
|
#
|
48
48
|
module Infoboxer
|
49
49
|
# @private
|
50
|
-
WIKIA_API_URL = 'http://%s.wikia.com/api.php'
|
50
|
+
WIKIA_API_URL = 'http://%s.wikia.com/api.php'
|
51
51
|
|
52
|
-
# @private
|
53
52
|
WIKIMEDIA_PROJECTS = {
|
54
53
|
wikipedia: 'wikipedia.org',
|
55
54
|
wikivoyage: 'wikivoyage.org',
|
@@ -61,7 +60,6 @@ module Infoboxer
|
|
61
60
|
wikisource: 'wikisource.org'
|
62
61
|
}.freeze
|
63
62
|
|
64
|
-
# @private
|
65
63
|
WIKIMEDIA_COMMONS = {
|
66
64
|
commons: 'commons.wikimedia.org',
|
67
65
|
species: 'species.wikimedia.org',
|
@@ -72,11 +70,11 @@ module Infoboxer
|
|
72
70
|
end
|
73
71
|
|
74
72
|
# Includeable version of {Infoboxer.wiki}
|
75
|
-
def wiki(api_url, options
|
76
|
-
wikis[api_url] ||= MediaWiki.new(api_url, options
|
73
|
+
def wiki(api_url, **options)
|
74
|
+
wikis[api_url] ||= MediaWiki.new(api_url, **options)
|
77
75
|
end
|
78
76
|
|
79
|
-
class << self
|
77
|
+
class << self # rubocop:disable Lint/EmptyClass -- that's for YARD!
|
80
78
|
# @!method wiki(api_url, options = {})
|
81
79
|
# Default method for creating MediaWiki API client.
|
82
80
|
#
|
@@ -168,21 +166,35 @@ module Infoboxer
|
|
168
166
|
end
|
169
167
|
|
170
168
|
WIKIMEDIA_PROJECTS.each do |name, domain|
|
171
|
-
define_method name do |lang = 'en', options
|
169
|
+
define_method name do |lang = 'en', **options|
|
172
170
|
lang, options = 'en', lang if lang.is_a?(Hash)
|
173
171
|
|
174
|
-
wiki("https://#{lang}.#{domain}/w/api.php", options)
|
172
|
+
wiki("https://#{lang}.#{domain}/w/api.php", **options)
|
175
173
|
end
|
176
174
|
end
|
177
175
|
|
178
176
|
alias_method :wp, :wikipedia
|
179
177
|
|
180
178
|
WIKIMEDIA_COMMONS.each do |name, domain|
|
181
|
-
define_method name do
|
179
|
+
define_method name do |**options|
|
182
180
|
wiki("https://#{domain}/w/api.php", options)
|
183
181
|
end
|
184
182
|
end
|
185
183
|
|
184
|
+
# Returns URL of API entry-point for a well-known Wiki-project (wikipedia, wikivoyage etc.)
|
185
|
+
# by project's name.
|
186
|
+
#
|
187
|
+
# @param symbol [Symbol] One of {WIKIMEDIA_PROJECTS} or {WIKIMEDIA_COMMONS} keys.
|
188
|
+
# @param lang [String, Symbol] Language of the project, if applicable.
|
189
|
+
# @return [String]
|
190
|
+
def url_for(symbol, lang = 'en')
|
191
|
+
if (domain = WIKIMEDIA_PROJECTS[symbol])
|
192
|
+
"https://#{lang}.#{domain}/w/api.php"
|
193
|
+
elsif (domain = WIKIMEDIA_COMMONS[symbol])
|
194
|
+
"https://#{domain}/w/api.php"
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
186
198
|
# @!method wikipedia(lang = 'en', options = {})
|
187
199
|
# Includeable version of {Infoboxer.wikipedia}
|
188
200
|
|
@@ -216,7 +228,7 @@ module Infoboxer
|
|
216
228
|
# Includeable version of {Infoboxer.wikia}
|
217
229
|
def wikia(*domains)
|
218
230
|
options = domains.last.is_a?(Hash) ? domains.pop : {}
|
219
|
-
wiki(WIKIA_API_URL % domains.reverse.join('.'), options)
|
231
|
+
wiki(WIKIA_API_URL % domains.reverse.join('.'), **options)
|
220
232
|
end
|
221
233
|
|
222
234
|
# Sets user agent string globally. Default user agent is
|
data/lib/infoboxer/core_ext.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
+
# rubocop:disable Layout/EmptyLinesAroundArguments
|
3
4
|
module Infoboxer
|
4
5
|
MediaWiki::Traits.for('en.wikipedia.org') do
|
5
6
|
templates do
|
@@ -372,3 +373,4 @@ module Infoboxer
|
|
372
373
|
end
|
373
374
|
end
|
374
375
|
end
|
376
|
+
# rubocop:enable Layout/EmptyLinesAroundArguments
|
data/lib/infoboxer/media_wiki.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'mediawiktory'
|
4
4
|
require 'addressable/uri'
|
@@ -27,7 +27,7 @@ module Infoboxer
|
|
27
27
|
# You can set yours as an option to {Infoboxer.wiki} and its shortcuts,
|
28
28
|
# or to {#initialize}
|
29
29
|
UA = "Infoboxer/#{Infoboxer::VERSION} "\
|
30
|
-
'(https://github.com/molybdenum-99/infoboxer; zverok.offline@gmail.com)'
|
30
|
+
'(https://github.com/molybdenum-99/infoboxer; zverok.offline@gmail.com)'
|
31
31
|
|
32
32
|
class << self
|
33
33
|
# User agent getter/setter.
|
@@ -43,43 +43,45 @@ module Infoboxer
|
|
43
43
|
# @private
|
44
44
|
attr_reader :api_base_url, :traits
|
45
45
|
|
46
|
+
# @return [MediaWiktory::Wikipedia::Client]
|
47
|
+
attr_reader :api
|
48
|
+
|
46
49
|
# Creating new MediaWiki client. {Infoboxer.wiki} provides shortcut
|
47
50
|
# for it, as well as shortcuts for some well-known wikis, like
|
48
51
|
# {Infoboxer.wikipedia}.
|
49
52
|
#
|
50
|
-
# @param api_base_url URL of `api.php` file in your MediaWiki
|
53
|
+
# @param api_base_url [String] URL of `api.php` file in your MediaWiki
|
51
54
|
# installation. Typically, its `<domain>/w/api.php`, but can vary
|
52
55
|
# in different wikis.
|
53
|
-
# @param
|
54
|
-
|
55
|
-
def initialize(api_base_url, options = {})
|
56
|
+
# @param user_agent [String] (also aliased as `:ua`) Custom User-Agent header.
|
57
|
+
def initialize(api_base_url, ua: nil, user_agent: ua)
|
56
58
|
@api_base_url = Addressable::URI.parse(api_base_url)
|
57
|
-
@
|
58
|
-
@traits = Traits.get(@api_base_url.host,
|
59
|
+
@api = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(user_agent))
|
60
|
+
@traits = Traits.get(@api_base_url.host, siteinfo)
|
59
61
|
end
|
60
62
|
|
61
63
|
# Receive "raw" data from Wikipedia (without parsing or wrapping in
|
62
64
|
# classes).
|
63
65
|
#
|
64
66
|
# @param titles [Array<String>] List of page titles to get.
|
65
|
-
# @param
|
66
|
-
# [MediaWiktory::Actions::Query
|
67
|
-
# for the
|
67
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
68
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
69
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
70
|
+
# while using it.
|
68
71
|
#
|
69
72
|
# @return [Hash{String => Hash}] Hash of `{requested title => raw MediaWiki object}`. Note that
|
70
73
|
# even missing (does not exist in current Wiki) or invalid (impossible title) still be present
|
71
74
|
# in response, just will have `"missing"` or `"invalid"` key, just like MediaWiki returns them.
|
72
|
-
def raw(*titles,
|
75
|
+
def raw(*titles, &processor)
|
73
76
|
# could emerge on "automatically" created page lists, should work
|
74
77
|
return {} if titles.empty?
|
75
78
|
|
76
79
|
titles.each_slice(50).map do |part|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
.response
|
80
|
+
request = prepare_request(@api.query.titles(*part), &processor)
|
81
|
+
response = request.response
|
82
|
+
|
83
|
+
# If additional props are required, there may be additional pages, even despite each_slice(50)
|
84
|
+
response = response.continue while response.continue?
|
83
85
|
|
84
86
|
sources = response['pages'].values.map { |page| [page['title'], page] }.to_h
|
85
87
|
redirects =
|
@@ -103,9 +105,11 @@ module Infoboxer
|
|
103
105
|
# `(titles.count / 50.0).ceil` requests)
|
104
106
|
#
|
105
107
|
# @param titles [Array<String>] List of page titles to get.
|
106
|
-
# @param
|
107
|
-
#
|
108
|
-
#
|
108
|
+
# @param interwiki [Symbol] Identifier of other wiki, related to current, to fetch pages from.
|
109
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
110
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
111
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
112
|
+
# while using it.
|
109
113
|
#
|
110
114
|
# @return [Page, Tree::Nodes<Page>] array of parsed pages. Notes:
|
111
115
|
# * if you call `get` with only one title, one page will be
|
@@ -123,8 +127,10 @@ module Infoboxer
|
|
123
127
|
# and obtain meaningful results instead of `NoMethodError` or
|
124
128
|
# `SomethingNotFound`.
|
125
129
|
#
|
126
|
-
def get(*titles,
|
127
|
-
|
130
|
+
def get(*titles, interwiki: nil, &processor)
|
131
|
+
return interwikis(interwiki).get(*titles, &processor) if interwiki
|
132
|
+
|
133
|
+
pages = get_h(*titles, &processor).values.compact
|
128
134
|
titles.count == 1 ? pages.first : Tree::Nodes[*pages]
|
129
135
|
end
|
130
136
|
|
@@ -141,14 +147,15 @@ module Infoboxer
|
|
141
147
|
# you've received.
|
142
148
|
#
|
143
149
|
# @param titles [Array<String>] List of page titles to get.
|
144
|
-
# @param
|
145
|
-
# [MediaWiktory::Actions::Query
|
146
|
-
# for the
|
150
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
151
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
152
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
153
|
+
# while using it.
|
147
154
|
#
|
148
155
|
# @return [Hash<String, Page>]
|
149
156
|
#
|
150
|
-
def get_h(*titles,
|
151
|
-
raw_pages = raw(*titles,
|
157
|
+
def get_h(*titles, &processor)
|
158
|
+
raw_pages = raw(*titles, &processor)
|
152
159
|
.tap { |ps| ps.detect { |_, p| p['invalid'] }.tap { |_, i| i && fail(i['invalidreason']) } }
|
153
160
|
.reject { |_, p| p.key?('missing') }
|
154
161
|
titles.map { |title| [title, make_page(raw_pages, title)] }.to_h
|
@@ -156,59 +163,59 @@ module Infoboxer
|
|
156
163
|
|
157
164
|
# Receive list of parsed MediaWiki pages from specified category.
|
158
165
|
#
|
159
|
-
# **NB**: currently, this API **always** fetches all pages from
|
160
|
-
# category, there is no option to "take first 20 pages". Pages are
|
161
|
-
# fetched in 50-page batches, then parsed. So, for large category
|
162
|
-
# it can really take a while to fetch all pages.
|
163
|
-
#
|
164
166
|
# @param title [String] Category title. You can use namespaceless title (like
|
165
167
|
# `"Countries in South America"`), title with namespace (like
|
166
168
|
# `"Category:Countries in South America"`) or title with local
|
167
169
|
# namespace (like `"Catégorie:Argentine"` for French Wikipedia)
|
170
|
+
# @param limit [Integer, "max"]
|
171
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
172
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
173
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
174
|
+
# while using it.
|
168
175
|
#
|
169
176
|
# @return [Tree::Nodes<Page>] array of parsed pages.
|
170
177
|
#
|
171
|
-
def category(title)
|
178
|
+
def category(title, limit: 'max', &processor)
|
172
179
|
title = normalize_category_title(title)
|
173
180
|
|
174
|
-
list(@
|
181
|
+
list(@api.query.generator(:categorymembers).title(title), limit, &processor)
|
175
182
|
end
|
176
183
|
|
177
184
|
# Receive list of parsed MediaWiki pages for provided search query.
|
178
185
|
# See [MediaWiki API docs](https://www.mediawiki.org/w/api.php?action=help&modules=query%2Bsearch)
|
179
186
|
# for details.
|
180
187
|
#
|
181
|
-
# **NB**: currently, this API **always** fetches all pages from
|
182
|
-
# category, there is no option to "take first 20 pages". Pages are
|
183
|
-
# fetched in 50-page batches, then parsed. So, for large search query
|
184
|
-
# it can really take a while to fetch all pages.
|
185
|
-
#
|
186
188
|
# @param query [String] Search query. For old installations, look at
|
187
189
|
# https://www.mediawiki.org/wiki/Help:Searching
|
188
190
|
# for search syntax. For new ones (including Wikipedia), see at
|
189
191
|
# https://www.mediawiki.org/wiki/Help:CirrusSearch.
|
192
|
+
# @param limit [Integer, "max"]
|
193
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
194
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
195
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
196
|
+
# while using it.
|
190
197
|
#
|
191
198
|
# @return [Tree::Nodes<Page>] array of parsed pages.
|
192
199
|
#
|
193
|
-
def search(query)
|
194
|
-
list(@
|
200
|
+
def search(query, limit: 'max', &processor)
|
201
|
+
list(@api.query.generator(:search).search(query), limit, &processor)
|
195
202
|
end
|
196
203
|
|
197
204
|
# Receive list of parsed MediaWiki pages with titles startin from prefix.
|
198
205
|
# See [MediaWiki API docs](https://www.mediawiki.org/w/api.php?action=help&modules=query%2Bprefixsearch)
|
199
206
|
# for details.
|
200
207
|
#
|
201
|
-
# **NB**: currently, this API **always** fetches all pages from
|
202
|
-
# category, there is no option to "take first 20 pages". Pages are
|
203
|
-
# fetched in 50-page batches, then parsed. So, for large search query
|
204
|
-
# it can really take a while to fetch all pages.
|
205
|
-
#
|
206
208
|
# @param prefix [String] Page title prefix.
|
209
|
+
# @param limit [Integer, "max"]
|
210
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
211
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
212
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
213
|
+
# while using it.
|
207
214
|
#
|
208
215
|
# @return [Tree::Nodes<Page>] array of parsed pages.
|
209
216
|
#
|
210
|
-
def prefixsearch(prefix)
|
211
|
-
list(@
|
217
|
+
def prefixsearch(prefix, limit: 'max', &processor)
|
218
|
+
list(@api.query.generator(:prefixsearch).search(prefix), limit, &processor)
|
212
219
|
end
|
213
220
|
|
214
221
|
# @return [String]
|
@@ -224,14 +231,11 @@ module Infoboxer
|
|
224
231
|
Page.new(self, Parser.paragraphs(source['revisions'].first['*'], traits), source)
|
225
232
|
end
|
226
233
|
|
227
|
-
def list(query)
|
228
|
-
|
229
|
-
|
230
|
-
.prop(:content, :timestamp, :url)
|
231
|
-
.redirects
|
232
|
-
.response
|
234
|
+
def list(query, limit, &processor)
|
235
|
+
request = prepare_request(query.limit(limit), &processor)
|
236
|
+
response = request.response
|
233
237
|
|
234
|
-
response = response.continue while response.continue?
|
238
|
+
response = response.continue while response.continue? && (limit == 'max' || response['pages'].count < limit)
|
235
239
|
|
236
240
|
return Tree::Nodes[] if response['pages'].nil?
|
237
241
|
|
@@ -242,6 +246,11 @@ module Infoboxer
|
|
242
246
|
Tree::Nodes[*pages]
|
243
247
|
end
|
244
248
|
|
249
|
+
def prepare_request(request)
|
250
|
+
request = request.prop(:revisions, :info).prop(:content, :timestamp, :url).redirects
|
251
|
+
block_given? ? yield(request) : request
|
252
|
+
end
|
253
|
+
|
245
254
|
def normalize_category_title(title)
|
246
255
|
# FIXME: shouldn't it go to MediaWiktory?..
|
247
256
|
namespace, titl = title.include?(':') ? title.split(':', 2) : [nil, title]
|
@@ -251,17 +260,26 @@ module Infoboxer
|
|
251
260
|
[namespace, titl].join(':')
|
252
261
|
end
|
253
262
|
|
254
|
-
def user_agent(
|
255
|
-
|
263
|
+
def user_agent(custom)
|
264
|
+
custom || self.class.user_agent || UA
|
265
|
+
end
|
266
|
+
|
267
|
+
def siteinfo
|
268
|
+
@siteinfo ||= @api.query.meta(:siteinfo).prop(:namespaces, :namespacealiases, :interwikimap).response.to_h
|
256
269
|
end
|
257
270
|
|
258
|
-
def
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
271
|
+
def interwikis(prefix)
|
272
|
+
@interwikis ||= Hash.new { |h, pre|
|
273
|
+
interwiki = siteinfo['interwikimap'].detect { |iw| iw['prefix'] == prefix } or
|
274
|
+
fail ArgumentError, "Undefined interwiki: #{prefix}"
|
275
|
+
|
276
|
+
# FIXME: fragile, but what can we do?..
|
277
|
+
m = interwiki['url'].match(%r{^(.+)/wiki/\$1$}) or
|
278
|
+
fail ArgumentError, "Interwiki #{interwiki} seems not to be a MediaWiki instance"
|
279
|
+
h[pre] = self.class.new("#{m[1]}/w/api.php") # TODO: copy useragent
|
280
|
+
}
|
281
|
+
|
282
|
+
@interwikis[prefix]
|
265
283
|
end
|
266
284
|
end
|
267
285
|
end
|