infoboxer 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/workflows/ci.yml +32 -0
- data/.rubocop_todo.yml +0 -15
- data/CHANGELOG.md +43 -0
- data/Gemfile.lock +172 -0
- data/README.md +1 -1
- data/infoboxer.gemspec +1 -1
- data/lib/infoboxer.rb +23 -11
- data/lib/infoboxer/core_ext.rb +1 -1
- data/lib/infoboxer/definitions/en.wikipedia.org.rb +3 -1
- data/lib/infoboxer/media_wiki.rb +83 -65
- data/lib/infoboxer/media_wiki/page.rb +10 -1
- data/lib/infoboxer/media_wiki/traits.rb +69 -22
- data/lib/infoboxer/navigation.rb +7 -1
- data/lib/infoboxer/navigation/lookup.rb +15 -7
- data/lib/infoboxer/navigation/sections.rb +27 -9
- data/lib/infoboxer/navigation/selector.rb +14 -6
- data/lib/infoboxer/navigation/shortcuts.rb +1 -1
- data/lib/infoboxer/navigation/wikipath.rb +1 -1
- data/lib/infoboxer/parser.rb +2 -2
- data/lib/infoboxer/parser/context.rb +23 -9
- data/lib/infoboxer/parser/html.rb +1 -1
- data/lib/infoboxer/parser/image.rb +2 -2
- data/lib/infoboxer/parser/inline.rb +50 -7
- data/lib/infoboxer/parser/paragraphs.rb +3 -3
- data/lib/infoboxer/parser/table.rb +33 -17
- data/lib/infoboxer/parser/template.rb +5 -4
- data/lib/infoboxer/parser/util.rb +2 -1
- data/lib/infoboxer/templates.rb +2 -0
- data/lib/infoboxer/templates/base.rb +2 -0
- data/lib/infoboxer/templates/set.rb +1 -1
- data/lib/infoboxer/tree.rb +2 -2
- data/lib/infoboxer/tree/compound.rb +3 -3
- data/lib/infoboxer/tree/document.rb +1 -1
- data/lib/infoboxer/tree/gallery.rb +12 -0
- data/lib/infoboxer/tree/html.rb +3 -3
- data/lib/infoboxer/tree/image.rb +4 -4
- data/lib/infoboxer/tree/inline.rb +3 -3
- data/lib/infoboxer/tree/linkable.rb +6 -1
- data/lib/infoboxer/tree/list.rb +4 -5
- data/lib/infoboxer/tree/math.rb +2 -3
- data/lib/infoboxer/tree/node.rb +4 -4
- data/lib/infoboxer/tree/nodes.rb +51 -7
- data/lib/infoboxer/tree/paragraphs.rb +1 -1
- data/lib/infoboxer/tree/ref.rb +1 -1
- data/lib/infoboxer/tree/table.rb +4 -4
- data/lib/infoboxer/tree/template.rb +18 -5
- data/lib/infoboxer/tree/text.rb +11 -11
- data/lib/infoboxer/tree/wikilink.rb +16 -8
- data/lib/infoboxer/version.rb +4 -3
- data/lib/infoboxer/wiki_path.rb +12 -1
- data/regression/pages/2012_bdo_world_darts_championship.wiki +941 -0
- data/regression/pages/progress_wrestling.wiki +1308 -0
- metadata +12 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 755e1283e896d2c2b363983a9b04ac92cb14870cfa2ab67fd62777684bac1352
|
4
|
+
data.tar.gz: c5443d788fc06a2310c65b80bc16531b820bb7142f611f5d063af8992cee7525
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1443ecf7dbb485555a275a4d5390417ee48fabb2321a23fb59c208d32c1424259515924e903872b762afebcaaf5574afd713e4eade6c346aeb8e71719c051436
|
7
|
+
data.tar.gz: 71c007554240c40a7e7f9bee81d66d637b8643f1ccf18a5900c24087545d3c430830d8a30d9c4463244b13419d4edb349aee585885b6aecf03dc8d8b37d701a7
|
@@ -0,0 +1,32 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches: [ master ]
|
6
|
+
pull_request:
|
7
|
+
branches: [ master ]
|
8
|
+
|
9
|
+
jobs:
|
10
|
+
main:
|
11
|
+
name: >-
|
12
|
+
${{ matrix.ruby }}
|
13
|
+
runs-on: ubuntu-latest
|
14
|
+
strategy:
|
15
|
+
fail-fast: false
|
16
|
+
matrix:
|
17
|
+
ruby: [ 2.6, 2.7, 3.0, head ]
|
18
|
+
|
19
|
+
steps:
|
20
|
+
- name: checkout
|
21
|
+
uses: actions/checkout@v2
|
22
|
+
- name: set up Ruby
|
23
|
+
uses: ruby/setup-ruby@v1
|
24
|
+
with:
|
25
|
+
ruby-version: ${{ matrix.ruby }}
|
26
|
+
|
27
|
+
- name: install dependencies
|
28
|
+
run: bundle install --jobs 3 --retry 3
|
29
|
+
- name: spec
|
30
|
+
run: bundle exec rake spec
|
31
|
+
- name: rubocop
|
32
|
+
run: bundle exec rake rubocop
|
data/.rubocop_todo.yml
CHANGED
@@ -1,16 +1 @@
|
|
1
|
-
# This configuration was generated by
|
2
|
-
# `rubocop --auto-gen-config`
|
3
|
-
# on 2017-06-23 13:52:16 +0300 using RuboCop version 0.49.1.
|
4
|
-
# The point is for the user to remove these configuration records
|
5
|
-
# one by one as the offenses are removed from the code base.
|
6
|
-
# Note that changes in the inspected code, or installation of new
|
7
|
-
# versions of RuboCop, may require this file to be generated again.
|
8
|
-
|
9
|
-
# Offense count: 1
|
10
|
-
Metrics/AbcSize:
|
11
|
-
Max: 29
|
12
|
-
|
13
|
-
# Offense count: 1
|
14
|
-
Metrics/PerceivedComplexity:
|
15
|
-
Max: 10
|
16
1
|
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,48 @@
|
|
1
1
|
# Infoboxer's change log
|
2
2
|
|
3
|
+
## 0.4.0 (2021-05-30)
|
4
|
+
|
5
|
+
* A cluster of bugs found in #81 fixed:
|
6
|
+
* Empty comment (`<!---->`) now processed properly;
|
7
|
+
* Templates that are implicitly inside tables (put on a separate row) now always create
|
8
|
+
an implicit `<TableCell>`
|
9
|
+
* Heading after non-closed table closes the table implicitly instead of being inserted
|
10
|
+
into the last cell.
|
11
|
+
* Drop Ruby < 2.6, and support 3.0 instead.
|
12
|
+
|
13
|
+
PS: Yeah, year-and-almost-half is much better than 2 years between releases, I guess.. And let's call
|
14
|
+
it non-patch version then.
|
15
|
+
|
16
|
+
## 0.3.3 (2020-02-09)
|
17
|
+
|
18
|
+
* Fixed table captions handling (thanks @robfors for reporting)
|
19
|
+
|
20
|
+
PS: Funny that this small bugfix release is exactly two years after the previous one :(
|
21
|
+
|
22
|
+
## 0.3.2 (2018-02-09)
|
23
|
+
|
24
|
+
* Updated MediaWiktory to finally turn on gzip encoding of responses;
|
25
|
+
* Utility methods to expose some internals (`MediaWiki#api`, `Infoboxer#url_for(:wikipedia)`,
|
26
|
+
`Page#namespaces`, `Template#named_variables` and so on);
|
27
|
+
* Fix parsing of lowercase `file:` links in `<gallery>`.
|
28
|
+
|
29
|
+
## 0.3.1 (2017-12-04)
|
30
|
+
|
31
|
+
* (Experimental) new representation of templates, much more readable;
|
32
|
+
* More access to querying process and underlying `MediaWiktory::Wikipedia::Query`;
|
33
|
+
* Finally, `limit` parameter for multi-page queries (category, search, prefixsearch).
|
34
|
+
|
35
|
+
## 0.3.1.pre (2017-09-16)
|
36
|
+
|
37
|
+
* Introduce interwiki links following (and proper handling of interwikis, in general);
|
38
|
+
* Add `<gallery>` tag support;
|
39
|
+
* Introduce `Navigation::Selector#===`;
|
40
|
+
* Much more `Enumerable`'s methods supported by `Nodes`;
|
41
|
+
* Lot of small simplifications, cleanups and bugfixes.
|
42
|
+
|
43
|
+
TBH, it should be 0.4.0 or more, but it would be a shame to change versions so fast :) So, at least
|
44
|
+
until it is `-pre`, let it be 0.3.1.
|
45
|
+
|
3
46
|
## 0.3.0 (2017-07-23)
|
4
47
|
|
5
48
|
* Change logic of navigation through templates; now templates contents aren't hidden from global
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,172 @@
|
|
1
|
+
GIT
|
2
|
+
remote: https://github.com/zverok/dokaz.git
|
3
|
+
revision: a8a6f0bbeab5589326fe2714cf89842b5f32b850
|
4
|
+
specs:
|
5
|
+
dokaz (0.0.4)
|
6
|
+
ansi
|
7
|
+
rouge
|
8
|
+
slop (~> 3)
|
9
|
+
|
10
|
+
PATH
|
11
|
+
remote: .
|
12
|
+
specs:
|
13
|
+
infoboxer (0.4.0)
|
14
|
+
addressable
|
15
|
+
htmlentities
|
16
|
+
mediawiktory (= 0.1.3)
|
17
|
+
terminal-table
|
18
|
+
|
19
|
+
GEM
|
20
|
+
remote: https://rubygems.org/
|
21
|
+
specs:
|
22
|
+
addressable (2.7.0)
|
23
|
+
public_suffix (>= 2.0.2, < 5.0)
|
24
|
+
ansi (1.5.0)
|
25
|
+
ast (2.4.2)
|
26
|
+
backports (3.21.0)
|
27
|
+
byebug (11.1.3)
|
28
|
+
coveralls (0.8.23)
|
29
|
+
json (>= 1.8, < 3)
|
30
|
+
simplecov (~> 0.16.1)
|
31
|
+
term-ansicolor (~> 1.3)
|
32
|
+
thor (>= 0.19.4, < 2.0)
|
33
|
+
tins (~> 1.6)
|
34
|
+
crack (0.4.5)
|
35
|
+
rexml
|
36
|
+
diff-lcs (1.4.4)
|
37
|
+
docile (1.4.0)
|
38
|
+
faraday (1.4.2)
|
39
|
+
faraday-em_http (~> 1.0)
|
40
|
+
faraday-em_synchrony (~> 1.0)
|
41
|
+
faraday-excon (~> 1.1)
|
42
|
+
faraday-net_http (~> 1.0)
|
43
|
+
faraday-net_http_persistent (~> 1.1)
|
44
|
+
multipart-post (>= 1.2, < 3)
|
45
|
+
ruby2_keywords (>= 0.0.4)
|
46
|
+
faraday-em_http (1.0.0)
|
47
|
+
faraday-em_synchrony (1.0.0)
|
48
|
+
faraday-excon (1.1.0)
|
49
|
+
faraday-net_http (1.0.1)
|
50
|
+
faraday-net_http_persistent (1.1.0)
|
51
|
+
faraday_middleware (1.0.0)
|
52
|
+
faraday (~> 1.0)
|
53
|
+
hashdiff (1.0.1)
|
54
|
+
hashie (4.1.0)
|
55
|
+
htmlentities (4.3.4)
|
56
|
+
io-console (0.5.9)
|
57
|
+
irb (1.3.5)
|
58
|
+
reline (>= 0.1.5)
|
59
|
+
json (2.5.1)
|
60
|
+
mediawiktory (0.1.3)
|
61
|
+
addressable
|
62
|
+
faraday
|
63
|
+
faraday_middleware
|
64
|
+
hashie
|
65
|
+
naught
|
66
|
+
nokogiri
|
67
|
+
multipart-post (2.1.1)
|
68
|
+
naught (1.1.0)
|
69
|
+
nokogiri (1.11.6-x86_64-linux)
|
70
|
+
racc (~> 1.4)
|
71
|
+
parallel (1.20.1)
|
72
|
+
parser (3.0.1.1)
|
73
|
+
ast (~> 2.4.1)
|
74
|
+
public_suffix (4.0.6)
|
75
|
+
racc (1.5.2)
|
76
|
+
rainbow (3.0.0)
|
77
|
+
rake (13.0.3)
|
78
|
+
redcarpet (3.5.1)
|
79
|
+
regexp_parser (2.1.1)
|
80
|
+
reline (0.2.5)
|
81
|
+
io-console (~> 0.5)
|
82
|
+
rexml (3.2.5)
|
83
|
+
rouge (3.26.0)
|
84
|
+
rspec (3.10.0)
|
85
|
+
rspec-core (~> 3.10.0)
|
86
|
+
rspec-expectations (~> 3.10.0)
|
87
|
+
rspec-mocks (~> 3.10.0)
|
88
|
+
rspec-core (3.10.1)
|
89
|
+
rspec-support (~> 3.10.0)
|
90
|
+
rspec-expectations (3.10.1)
|
91
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
92
|
+
rspec-support (~> 3.10.0)
|
93
|
+
rspec-its (1.3.0)
|
94
|
+
rspec-core (>= 3.0.0)
|
95
|
+
rspec-expectations (>= 3.0.0)
|
96
|
+
rspec-mocks (3.10.2)
|
97
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
98
|
+
rspec-support (~> 3.10.0)
|
99
|
+
rspec-support (3.10.2)
|
100
|
+
rubocop (1.15.0)
|
101
|
+
parallel (~> 1.10)
|
102
|
+
parser (>= 3.0.0.0)
|
103
|
+
rainbow (>= 2.2.2, < 4.0)
|
104
|
+
regexp_parser (>= 1.8, < 3.0)
|
105
|
+
rexml
|
106
|
+
rubocop-ast (>= 1.5.0, < 2.0)
|
107
|
+
ruby-progressbar (~> 1.7)
|
108
|
+
unicode-display_width (>= 1.4.0, < 3.0)
|
109
|
+
rubocop-ast (1.7.0)
|
110
|
+
parser (>= 3.0.1.1)
|
111
|
+
rubocop-rspec (2.3.0)
|
112
|
+
rubocop (~> 1.0)
|
113
|
+
rubocop-ast (>= 1.1.0)
|
114
|
+
ruby-prof (1.4.3)
|
115
|
+
ruby-progressbar (1.11.0)
|
116
|
+
ruby2_keywords (0.0.4)
|
117
|
+
rubygems-tasks (0.2.5)
|
118
|
+
irb (~> 1.0)
|
119
|
+
saharspec (0.0.4)
|
120
|
+
simplecov (0.16.1)
|
121
|
+
docile (~> 1.1)
|
122
|
+
json (>= 1.8, < 3)
|
123
|
+
simplecov-html (~> 0.10.0)
|
124
|
+
simplecov-html (0.10.2)
|
125
|
+
slop (3.6.0)
|
126
|
+
sync (0.5.0)
|
127
|
+
term-ansicolor (1.7.1)
|
128
|
+
tins (~> 1.0)
|
129
|
+
terminal-table (3.0.1)
|
130
|
+
unicode-display_width (>= 1.1.1, < 3)
|
131
|
+
thor (1.1.0)
|
132
|
+
timecop (0.9.4)
|
133
|
+
tins (1.29.1)
|
134
|
+
sync
|
135
|
+
unicode-display_width (2.0.0)
|
136
|
+
vcr (6.0.0)
|
137
|
+
webmock (3.13.0)
|
138
|
+
addressable (>= 2.3.6)
|
139
|
+
crack (>= 0.3.2)
|
140
|
+
hashdiff (>= 0.4.0, < 2.0.0)
|
141
|
+
yard (0.9.26)
|
142
|
+
yard-junk (0.0.9)
|
143
|
+
backports (>= 3.18)
|
144
|
+
rainbow
|
145
|
+
yard
|
146
|
+
|
147
|
+
PLATFORMS
|
148
|
+
ruby
|
149
|
+
x86_64-linux
|
150
|
+
|
151
|
+
DEPENDENCIES
|
152
|
+
byebug
|
153
|
+
coveralls
|
154
|
+
dokaz!
|
155
|
+
infoboxer!
|
156
|
+
rake
|
157
|
+
redcarpet
|
158
|
+
rspec (~> 3)
|
159
|
+
rspec-its (~> 1)
|
160
|
+
rubocop (~> 1.15.0)
|
161
|
+
rubocop-rspec (~> 2.3.0)
|
162
|
+
ruby-prof
|
163
|
+
rubygems-tasks
|
164
|
+
saharspec (= 0.0.4)
|
165
|
+
timecop
|
166
|
+
vcr
|
167
|
+
webmock
|
168
|
+
yard (~> 0.9)
|
169
|
+
yard-junk (~> 0.0.7)
|
170
|
+
|
171
|
+
BUNDLED WITH
|
172
|
+
2.2.0
|
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# Infoboxer
|
2
2
|
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/infoboxer.svg)](http://badge.fury.io/rb/infoboxer)
|
4
|
-
|
4
|
+
![Build Status](https://github.com/molybdenum-99/infoboxer/workflows/CI/badge.svg?branch=master)
|
5
5
|
[![Coverage Status](https://coveralls.io/repos/molybdenum-99/infoboxer/badge.svg?branch=master&service=github)](https://coveralls.io/github/molybdenum-99/infoboxer?branch=master)
|
6
6
|
[![Code Climate](https://codeclimate.com/github/molybdenum-99/infoboxer/badges/gpa.svg)](https://codeclimate.com/github/molybdenum-99/infoboxer)
|
7
7
|
[![Infoboxer Gitter](https://badges.gitter.im/molybdenum-99/infoboxer.svg)](https://gitter.im/molybdenum-99/infoboxer)
|
data/infoboxer.gemspec
CHANGED
@@ -32,7 +32,7 @@ Gem::Specification.new do |s|
|
|
32
32
|
s.executables << 'infoboxer'
|
33
33
|
|
34
34
|
s.add_dependency 'htmlentities'
|
35
|
-
s.add_dependency 'mediawiktory', '
|
35
|
+
s.add_dependency 'mediawiktory', '= 0.1.3'
|
36
36
|
s.add_dependency 'addressable'
|
37
37
|
s.add_dependency 'terminal-table'
|
38
38
|
end
|
data/lib/infoboxer.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
# Main client module for entire infoboxer functionality. If you're lucky,
|
4
4
|
# there's no other classes/modules you need to instantiate or call
|
@@ -47,9 +47,8 @@
|
|
47
47
|
#
|
48
48
|
module Infoboxer
|
49
49
|
# @private
|
50
|
-
WIKIA_API_URL = 'http://%s.wikia.com/api.php'
|
50
|
+
WIKIA_API_URL = 'http://%s.wikia.com/api.php'
|
51
51
|
|
52
|
-
# @private
|
53
52
|
WIKIMEDIA_PROJECTS = {
|
54
53
|
wikipedia: 'wikipedia.org',
|
55
54
|
wikivoyage: 'wikivoyage.org',
|
@@ -61,7 +60,6 @@ module Infoboxer
|
|
61
60
|
wikisource: 'wikisource.org'
|
62
61
|
}.freeze
|
63
62
|
|
64
|
-
# @private
|
65
63
|
WIKIMEDIA_COMMONS = {
|
66
64
|
commons: 'commons.wikimedia.org',
|
67
65
|
species: 'species.wikimedia.org',
|
@@ -72,11 +70,11 @@ module Infoboxer
|
|
72
70
|
end
|
73
71
|
|
74
72
|
# Includeable version of {Infoboxer.wiki}
|
75
|
-
def wiki(api_url, options
|
76
|
-
wikis[api_url] ||= MediaWiki.new(api_url, options
|
73
|
+
def wiki(api_url, **options)
|
74
|
+
wikis[api_url] ||= MediaWiki.new(api_url, **options)
|
77
75
|
end
|
78
76
|
|
79
|
-
class << self
|
77
|
+
class << self # rubocop:disable Lint/EmptyClass -- that's for YARD!
|
80
78
|
# @!method wiki(api_url, options = {})
|
81
79
|
# Default method for creating MediaWiki API client.
|
82
80
|
#
|
@@ -168,21 +166,35 @@ module Infoboxer
|
|
168
166
|
end
|
169
167
|
|
170
168
|
WIKIMEDIA_PROJECTS.each do |name, domain|
|
171
|
-
define_method name do |lang = 'en', options
|
169
|
+
define_method name do |lang = 'en', **options|
|
172
170
|
lang, options = 'en', lang if lang.is_a?(Hash)
|
173
171
|
|
174
|
-
wiki("https://#{lang}.#{domain}/w/api.php", options)
|
172
|
+
wiki("https://#{lang}.#{domain}/w/api.php", **options)
|
175
173
|
end
|
176
174
|
end
|
177
175
|
|
178
176
|
alias_method :wp, :wikipedia
|
179
177
|
|
180
178
|
WIKIMEDIA_COMMONS.each do |name, domain|
|
181
|
-
define_method name do
|
179
|
+
define_method name do |**options|
|
182
180
|
wiki("https://#{domain}/w/api.php", options)
|
183
181
|
end
|
184
182
|
end
|
185
183
|
|
184
|
+
# Returns URL of API entry-point for a well-known Wiki-project (wikipedia, wikivoyage etc.)
|
185
|
+
# by project's name.
|
186
|
+
#
|
187
|
+
# @param symbol [Symbol] One of {WIKIMEDIA_PROJECTS} or {WIKIMEDIA_COMMONS} keys.
|
188
|
+
# @param lang [String, Symbol] Language of the project, if applicable.
|
189
|
+
# @return [String]
|
190
|
+
def url_for(symbol, lang = 'en')
|
191
|
+
if (domain = WIKIMEDIA_PROJECTS[symbol])
|
192
|
+
"https://#{lang}.#{domain}/w/api.php"
|
193
|
+
elsif (domain = WIKIMEDIA_COMMONS[symbol])
|
194
|
+
"https://#{domain}/w/api.php"
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
186
198
|
# @!method wikipedia(lang = 'en', options = {})
|
187
199
|
# Includeable version of {Infoboxer.wikipedia}
|
188
200
|
|
@@ -216,7 +228,7 @@ module Infoboxer
|
|
216
228
|
# Includeable version of {Infoboxer.wikia}
|
217
229
|
def wikia(*domains)
|
218
230
|
options = domains.last.is_a?(Hash) ? domains.pop : {}
|
219
|
-
wiki(WIKIA_API_URL % domains.reverse.join('.'), options)
|
231
|
+
wiki(WIKIA_API_URL % domains.reverse.join('.'), **options)
|
220
232
|
end
|
221
233
|
|
222
234
|
# Sets user agent string globally. Default user agent is
|
data/lib/infoboxer/core_ext.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
+
# rubocop:disable Layout/EmptyLinesAroundArguments
|
3
4
|
module Infoboxer
|
4
5
|
MediaWiki::Traits.for('en.wikipedia.org') do
|
5
6
|
templates do
|
@@ -372,3 +373,4 @@ module Infoboxer
|
|
372
373
|
end
|
373
374
|
end
|
374
375
|
end
|
376
|
+
# rubocop:enable Layout/EmptyLinesAroundArguments
|
data/lib/infoboxer/media_wiki.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'mediawiktory'
|
4
4
|
require 'addressable/uri'
|
@@ -27,7 +27,7 @@ module Infoboxer
|
|
27
27
|
# You can set yours as an option to {Infoboxer.wiki} and its shortcuts,
|
28
28
|
# or to {#initialize}
|
29
29
|
UA = "Infoboxer/#{Infoboxer::VERSION} "\
|
30
|
-
'(https://github.com/molybdenum-99/infoboxer; zverok.offline@gmail.com)'
|
30
|
+
'(https://github.com/molybdenum-99/infoboxer; zverok.offline@gmail.com)'
|
31
31
|
|
32
32
|
class << self
|
33
33
|
# User agent getter/setter.
|
@@ -43,43 +43,45 @@ module Infoboxer
|
|
43
43
|
# @private
|
44
44
|
attr_reader :api_base_url, :traits
|
45
45
|
|
46
|
+
# @return [MediaWiktory::Wikipedia::Client]
|
47
|
+
attr_reader :api
|
48
|
+
|
46
49
|
# Creating new MediaWiki client. {Infoboxer.wiki} provides shortcut
|
47
50
|
# for it, as well as shortcuts for some well-known wikis, like
|
48
51
|
# {Infoboxer.wikipedia}.
|
49
52
|
#
|
50
|
-
# @param api_base_url URL of `api.php` file in your MediaWiki
|
53
|
+
# @param api_base_url [String] URL of `api.php` file in your MediaWiki
|
51
54
|
# installation. Typically, its `<domain>/w/api.php`, but can vary
|
52
55
|
# in different wikis.
|
53
|
-
# @param
|
54
|
-
|
55
|
-
def initialize(api_base_url, options = {})
|
56
|
+
# @param user_agent [String] (also aliased as `:ua`) Custom User-Agent header.
|
57
|
+
def initialize(api_base_url, ua: nil, user_agent: ua)
|
56
58
|
@api_base_url = Addressable::URI.parse(api_base_url)
|
57
|
-
@
|
58
|
-
@traits = Traits.get(@api_base_url.host,
|
59
|
+
@api = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(user_agent))
|
60
|
+
@traits = Traits.get(@api_base_url.host, siteinfo)
|
59
61
|
end
|
60
62
|
|
61
63
|
# Receive "raw" data from Wikipedia (without parsing or wrapping in
|
62
64
|
# classes).
|
63
65
|
#
|
64
66
|
# @param titles [Array<String>] List of page titles to get.
|
65
|
-
# @param
|
66
|
-
# [MediaWiktory::Actions::Query
|
67
|
-
# for the
|
67
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
68
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
69
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
70
|
+
# while using it.
|
68
71
|
#
|
69
72
|
# @return [Hash{String => Hash}] Hash of `{requested title => raw MediaWiki object}`. Note that
|
70
73
|
# even missing (does not exist in current Wiki) or invalid (impossible title) still be present
|
71
74
|
# in response, just will have `"missing"` or `"invalid"` key, just like MediaWiki returns them.
|
72
|
-
def raw(*titles,
|
75
|
+
def raw(*titles, &processor)
|
73
76
|
# could emerge on "automatically" created page lists, should work
|
74
77
|
return {} if titles.empty?
|
75
78
|
|
76
79
|
titles.each_slice(50).map do |part|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
.response
|
80
|
+
request = prepare_request(@api.query.titles(*part), &processor)
|
81
|
+
response = request.response
|
82
|
+
|
83
|
+
# If additional props are required, there may be additional pages, even despite each_slice(50)
|
84
|
+
response = response.continue while response.continue?
|
83
85
|
|
84
86
|
sources = response['pages'].values.map { |page| [page['title'], page] }.to_h
|
85
87
|
redirects =
|
@@ -103,9 +105,11 @@ module Infoboxer
|
|
103
105
|
# `(titles.count / 50.0).ceil` requests)
|
104
106
|
#
|
105
107
|
# @param titles [Array<String>] List of page titles to get.
|
106
|
-
# @param
|
107
|
-
#
|
108
|
-
#
|
108
|
+
# @param interwiki [Symbol] Identifier of other wiki, related to current, to fetch pages from.
|
109
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
110
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
111
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
112
|
+
# while using it.
|
109
113
|
#
|
110
114
|
# @return [Page, Tree::Nodes<Page>] array of parsed pages. Notes:
|
111
115
|
# * if you call `get` with only one title, one page will be
|
@@ -123,8 +127,10 @@ module Infoboxer
|
|
123
127
|
# and obtain meaningful results instead of `NoMethodError` or
|
124
128
|
# `SomethingNotFound`.
|
125
129
|
#
|
126
|
-
def get(*titles,
|
127
|
-
|
130
|
+
def get(*titles, interwiki: nil, &processor)
|
131
|
+
return interwikis(interwiki).get(*titles, &processor) if interwiki
|
132
|
+
|
133
|
+
pages = get_h(*titles, &processor).values.compact
|
128
134
|
titles.count == 1 ? pages.first : Tree::Nodes[*pages]
|
129
135
|
end
|
130
136
|
|
@@ -141,14 +147,15 @@ module Infoboxer
|
|
141
147
|
# you've received.
|
142
148
|
#
|
143
149
|
# @param titles [Array<String>] List of page titles to get.
|
144
|
-
# @param
|
145
|
-
# [MediaWiktory::Actions::Query
|
146
|
-
# for the
|
150
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
151
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
152
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
153
|
+
# while using it.
|
147
154
|
#
|
148
155
|
# @return [Hash<String, Page>]
|
149
156
|
#
|
150
|
-
def get_h(*titles,
|
151
|
-
raw_pages = raw(*titles,
|
157
|
+
def get_h(*titles, &processor)
|
158
|
+
raw_pages = raw(*titles, &processor)
|
152
159
|
.tap { |ps| ps.detect { |_, p| p['invalid'] }.tap { |_, i| i && fail(i['invalidreason']) } }
|
153
160
|
.reject { |_, p| p.key?('missing') }
|
154
161
|
titles.map { |title| [title, make_page(raw_pages, title)] }.to_h
|
@@ -156,59 +163,59 @@ module Infoboxer
|
|
156
163
|
|
157
164
|
# Receive list of parsed MediaWiki pages from specified category.
|
158
165
|
#
|
159
|
-
# **NB**: currently, this API **always** fetches all pages from
|
160
|
-
# category, there is no option to "take first 20 pages". Pages are
|
161
|
-
# fetched in 50-page batches, then parsed. So, for large category
|
162
|
-
# it can really take a while to fetch all pages.
|
163
|
-
#
|
164
166
|
# @param title [String] Category title. You can use namespaceless title (like
|
165
167
|
# `"Countries in South America"`), title with namespace (like
|
166
168
|
# `"Category:Countries in South America"`) or title with local
|
167
169
|
# namespace (like `"Catégorie:Argentine"` for French Wikipedia)
|
170
|
+
# @param limit [Integer, "max"]
|
171
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
172
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
173
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
174
|
+
# while using it.
|
168
175
|
#
|
169
176
|
# @return [Tree::Nodes<Page>] array of parsed pages.
|
170
177
|
#
|
171
|
-
def category(title)
|
178
|
+
def category(title, limit: 'max', &processor)
|
172
179
|
title = normalize_category_title(title)
|
173
180
|
|
174
|
-
list(@
|
181
|
+
list(@api.query.generator(:categorymembers).title(title), limit, &processor)
|
175
182
|
end
|
176
183
|
|
177
184
|
# Receive list of parsed MediaWiki pages for provided search query.
|
178
185
|
# See [MediaWiki API docs](https://www.mediawiki.org/w/api.php?action=help&modules=query%2Bsearch)
|
179
186
|
# for details.
|
180
187
|
#
|
181
|
-
# **NB**: currently, this API **always** fetches all pages from
|
182
|
-
# category, there is no option to "take first 20 pages". Pages are
|
183
|
-
# fetched in 50-page batches, then parsed. So, for large search query
|
184
|
-
# it can really take a while to fetch all pages.
|
185
|
-
#
|
186
188
|
# @param query [String] Search query. For old installations, look at
|
187
189
|
# https://www.mediawiki.org/wiki/Help:Searching
|
188
190
|
# for search syntax. For new ones (including Wikipedia), see at
|
189
191
|
# https://www.mediawiki.org/wiki/Help:CirrusSearch.
|
192
|
+
# @param limit [Integer, "max"]
|
193
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
194
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
195
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
196
|
+
# while using it.
|
190
197
|
#
|
191
198
|
# @return [Tree::Nodes<Page>] array of parsed pages.
|
192
199
|
#
|
193
|
-
def search(query)
|
194
|
-
list(@
|
200
|
+
def search(query, limit: 'max', &processor)
|
201
|
+
list(@api.query.generator(:search).search(query), limit, &processor)
|
195
202
|
end
|
196
203
|
|
197
204
|
# Receive list of parsed MediaWiki pages with titles startin from prefix.
|
198
205
|
# See [MediaWiki API docs](https://www.mediawiki.org/w/api.php?action=help&modules=query%2Bprefixsearch)
|
199
206
|
# for details.
|
200
207
|
#
|
201
|
-
# **NB**: currently, this API **always** fetches all pages from
|
202
|
-
# category, there is no option to "take first 20 pages". Pages are
|
203
|
-
# fetched in 50-page batches, then parsed. So, for large search query
|
204
|
-
# it can really take a while to fetch all pages.
|
205
|
-
#
|
206
208
|
# @param prefix [String] Page title prefix.
|
209
|
+
# @param limit [Integer, "max"]
|
210
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
211
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
212
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
213
|
+
# while using it.
|
207
214
|
#
|
208
215
|
# @return [Tree::Nodes<Page>] array of parsed pages.
|
209
216
|
#
|
210
|
-
def prefixsearch(prefix)
|
211
|
-
list(@
|
217
|
+
def prefixsearch(prefix, limit: 'max', &processor)
|
218
|
+
list(@api.query.generator(:prefixsearch).search(prefix), limit, &processor)
|
212
219
|
end
|
213
220
|
|
214
221
|
# @return [String]
|
@@ -224,14 +231,11 @@ module Infoboxer
|
|
224
231
|
Page.new(self, Parser.paragraphs(source['revisions'].first['*'], traits), source)
|
225
232
|
end
|
226
233
|
|
227
|
-
def list(query)
|
228
|
-
|
229
|
-
|
230
|
-
.prop(:content, :timestamp, :url)
|
231
|
-
.redirects
|
232
|
-
.response
|
234
|
+
def list(query, limit, &processor)
|
235
|
+
request = prepare_request(query.limit(limit), &processor)
|
236
|
+
response = request.response
|
233
237
|
|
234
|
-
response = response.continue while response.continue?
|
238
|
+
response = response.continue while response.continue? && (limit == 'max' || response['pages'].count < limit)
|
235
239
|
|
236
240
|
return Tree::Nodes[] if response['pages'].nil?
|
237
241
|
|
@@ -242,6 +246,11 @@ module Infoboxer
|
|
242
246
|
Tree::Nodes[*pages]
|
243
247
|
end
|
244
248
|
|
249
|
+
def prepare_request(request)
|
250
|
+
request = request.prop(:revisions, :info).prop(:content, :timestamp, :url).redirects
|
251
|
+
block_given? ? yield(request) : request
|
252
|
+
end
|
253
|
+
|
245
254
|
def normalize_category_title(title)
|
246
255
|
# FIXME: shouldn't it go to MediaWiktory?..
|
247
256
|
namespace, titl = title.include?(':') ? title.split(':', 2) : [nil, title]
|
@@ -251,17 +260,26 @@ module Infoboxer
|
|
251
260
|
[namespace, titl].join(':')
|
252
261
|
end
|
253
262
|
|
254
|
-
def user_agent(
|
255
|
-
|
263
|
+
def user_agent(custom)
|
264
|
+
custom || self.class.user_agent || UA
|
265
|
+
end
|
266
|
+
|
267
|
+
def siteinfo
|
268
|
+
@siteinfo ||= @api.query.meta(:siteinfo).prop(:namespaces, :namespacealiases, :interwikimap).response.to_h
|
256
269
|
end
|
257
270
|
|
258
|
-
def
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
271
|
+
def interwikis(prefix)
|
272
|
+
@interwikis ||= Hash.new { |h, pre|
|
273
|
+
interwiki = siteinfo['interwikimap'].detect { |iw| iw['prefix'] == prefix } or
|
274
|
+
fail ArgumentError, "Undefined interwiki: #{prefix}"
|
275
|
+
|
276
|
+
# FIXME: fragile, but what can we do?..
|
277
|
+
m = interwiki['url'].match(%r{^(.+)/wiki/\$1$}) or
|
278
|
+
fail ArgumentError, "Interwiki #{interwiki} seems not to be a MediaWiki instance"
|
279
|
+
h[pre] = self.class.new("#{m[1]}/w/api.php") # TODO: copy useragent
|
280
|
+
}
|
281
|
+
|
282
|
+
@interwikis[prefix]
|
265
283
|
end
|
266
284
|
end
|
267
285
|
end
|