infoboxer 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/ci.yml +32 -0
  3. data/.rubocop_todo.yml +0 -15
  4. data/CHANGELOG.md +43 -0
  5. data/Gemfile.lock +172 -0
  6. data/README.md +1 -1
  7. data/infoboxer.gemspec +1 -1
  8. data/lib/infoboxer.rb +23 -11
  9. data/lib/infoboxer/core_ext.rb +1 -1
  10. data/lib/infoboxer/definitions/en.wikipedia.org.rb +3 -1
  11. data/lib/infoboxer/media_wiki.rb +83 -65
  12. data/lib/infoboxer/media_wiki/page.rb +10 -1
  13. data/lib/infoboxer/media_wiki/traits.rb +69 -22
  14. data/lib/infoboxer/navigation.rb +7 -1
  15. data/lib/infoboxer/navigation/lookup.rb +15 -7
  16. data/lib/infoboxer/navigation/sections.rb +27 -9
  17. data/lib/infoboxer/navigation/selector.rb +14 -6
  18. data/lib/infoboxer/navigation/shortcuts.rb +1 -1
  19. data/lib/infoboxer/navigation/wikipath.rb +1 -1
  20. data/lib/infoboxer/parser.rb +2 -2
  21. data/lib/infoboxer/parser/context.rb +23 -9
  22. data/lib/infoboxer/parser/html.rb +1 -1
  23. data/lib/infoboxer/parser/image.rb +2 -2
  24. data/lib/infoboxer/parser/inline.rb +50 -7
  25. data/lib/infoboxer/parser/paragraphs.rb +3 -3
  26. data/lib/infoboxer/parser/table.rb +33 -17
  27. data/lib/infoboxer/parser/template.rb +5 -4
  28. data/lib/infoboxer/parser/util.rb +2 -1
  29. data/lib/infoboxer/templates.rb +2 -0
  30. data/lib/infoboxer/templates/base.rb +2 -0
  31. data/lib/infoboxer/templates/set.rb +1 -1
  32. data/lib/infoboxer/tree.rb +2 -2
  33. data/lib/infoboxer/tree/compound.rb +3 -3
  34. data/lib/infoboxer/tree/document.rb +1 -1
  35. data/lib/infoboxer/tree/gallery.rb +12 -0
  36. data/lib/infoboxer/tree/html.rb +3 -3
  37. data/lib/infoboxer/tree/image.rb +4 -4
  38. data/lib/infoboxer/tree/inline.rb +3 -3
  39. data/lib/infoboxer/tree/linkable.rb +6 -1
  40. data/lib/infoboxer/tree/list.rb +4 -5
  41. data/lib/infoboxer/tree/math.rb +2 -3
  42. data/lib/infoboxer/tree/node.rb +4 -4
  43. data/lib/infoboxer/tree/nodes.rb +51 -7
  44. data/lib/infoboxer/tree/paragraphs.rb +1 -1
  45. data/lib/infoboxer/tree/ref.rb +1 -1
  46. data/lib/infoboxer/tree/table.rb +4 -4
  47. data/lib/infoboxer/tree/template.rb +18 -5
  48. data/lib/infoboxer/tree/text.rb +11 -11
  49. data/lib/infoboxer/tree/wikilink.rb +16 -8
  50. data/lib/infoboxer/version.rb +4 -3
  51. data/lib/infoboxer/wiki_path.rb +12 -1
  52. data/regression/pages/2012_bdo_world_darts_championship.wiki +941 -0
  53. data/regression/pages/progress_wrestling.wiki +1308 -0
  54. metadata +12 -8
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: be65bc91a5370bc24553e500754f413196caed76
4
- data.tar.gz: d24dca5a13a64d563ddc473197a732a9f95884fd
2
+ SHA256:
3
+ metadata.gz: 755e1283e896d2c2b363983a9b04ac92cb14870cfa2ab67fd62777684bac1352
4
+ data.tar.gz: c5443d788fc06a2310c65b80bc16531b820bb7142f611f5d063af8992cee7525
5
5
  SHA512:
6
- metadata.gz: 661c06d6703db103035f61f55ebee5f0bc8a5f9ad182fcaf2a22be9f91063a9abd80bac1793fabae436b28bed274fcb7908219ba07453f46271a25d1cba0367a
7
- data.tar.gz: 25029633e6516c30a7de21433db1903ab923af2c74082b3a3c9322b50b170cdb104652b95d261b3033a070629ee478a8b5510a7298fbef830d1445dfe56157d0
6
+ metadata.gz: 1443ecf7dbb485555a275a4d5390417ee48fabb2321a23fb59c208d32c1424259515924e903872b762afebcaaf5574afd713e4eade6c346aeb8e71719c051436
7
+ data.tar.gz: 71c007554240c40a7e7f9bee81d66d637b8643f1ccf18a5900c24087545d3c430830d8a30d9c4463244b13419d4edb349aee585885b6aecf03dc8d8b37d701a7
@@ -0,0 +1,32 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [ master ]
6
+ pull_request:
7
+ branches: [ master ]
8
+
9
+ jobs:
10
+ main:
11
+ name: >-
12
+ ${{ matrix.ruby }}
13
+ runs-on: ubuntu-latest
14
+ strategy:
15
+ fail-fast: false
16
+ matrix:
17
+ ruby: [ 2.6, 2.7, 3.0, head ]
18
+
19
+ steps:
20
+ - name: checkout
21
+ uses: actions/checkout@v2
22
+ - name: set up Ruby
23
+ uses: ruby/setup-ruby@v1
24
+ with:
25
+ ruby-version: ${{ matrix.ruby }}
26
+
27
+ - name: install dependencies
28
+ run: bundle install --jobs 3 --retry 3
29
+ - name: spec
30
+ run: bundle exec rake spec
31
+ - name: rubocop
32
+ run: bundle exec rake rubocop
data/.rubocop_todo.yml CHANGED
@@ -1,16 +1 @@
1
- # This configuration was generated by
2
- # `rubocop --auto-gen-config`
3
- # on 2017-06-23 13:52:16 +0300 using RuboCop version 0.49.1.
4
- # The point is for the user to remove these configuration records
5
- # one by one as the offenses are removed from the code base.
6
- # Note that changes in the inspected code, or installation of new
7
- # versions of RuboCop, may require this file to be generated again.
8
-
9
- # Offense count: 1
10
- Metrics/AbcSize:
11
- Max: 29
12
-
13
- # Offense count: 1
14
- Metrics/PerceivedComplexity:
15
- Max: 10
16
1
 
data/CHANGELOG.md CHANGED
@@ -1,5 +1,48 @@
1
1
  # Infoboxer's change log
2
2
 
3
+ ## 0.4.0 (2021-05-30)
4
+
5
+ * A cluster of bugs found in #81 fixed:
6
+ * Empty comment (`<!---->`) now processed properly;
7
+ * Templates that are implicitly inside tables (put on a separate row) now always create
8
+ an implicit `<TableCell>`
9
+ * Heading after non-closed table closes the table implicitly instead of being inserted
10
+ into the last cell.
11
+ * Drop Ruby < 2.6, and support 3.0 instead.
12
+
13
+ PS: Yeah, year-and-almost-half is much better than 2 years between releases, I guess.. And let's call
14
+ it non-patch version then.
15
+
16
+ ## 0.3.3 (2020-02-09)
17
+
18
+ * Fixed table captions handling (thanks @robfors for reporting)
19
+
20
+ PS: Funny that this small bugfix release is exactly two years after the previous one :(
21
+
22
+ ## 0.3.2 (2018-02-09)
23
+
24
+ * Updated MediaWiktory to finally turn on gzip encoding of responses;
25
+ * Utility methods to expose some internals (`MediaWiki#api`, `Infoboxer#url_for(:wikipedia)`,
26
+ `Page#namespaces`, `Template#named_variables` and so on);
27
+ * Fix parsing of lowercase `file:` links in `<gallery>`.
28
+
29
+ ## 0.3.1 (2017-12-04)
30
+
31
+ * (Experimental) new representation of templates, much more readable;
32
+ * More access to querying process and underlying `MediaWiktory::Wikipedia::Query`;
33
+ * Finally, `limit` parameter for multi-page queries (category, search, prefixsearch).
34
+
35
+ ## 0.3.1.pre (2017-09-16)
36
+
37
+ * Introduce interwiki links following (and proper handling of interwikis, in general);
38
+ * Add `<gallery>` tag support;
39
+ * Introduce `Navigation::Selector#===`;
40
+ * Much more `Enumerable`'s methods supported by `Nodes`;
41
+ * Lot of small simplifications, cleanups and bugfixes.
42
+
43
+ TBH, it should be 0.4.0 or more, but it would be a shame to change versions so fast :) So, at least
44
+ until it is `-pre`, let it be 0.3.1.
45
+
3
46
  ## 0.3.0 (2017-07-23)
4
47
 
5
48
  * Change logic of navigation through templates; now templates contents aren't hidden from global
data/Gemfile.lock ADDED
@@ -0,0 +1,172 @@
1
+ GIT
2
+ remote: https://github.com/zverok/dokaz.git
3
+ revision: a8a6f0bbeab5589326fe2714cf89842b5f32b850
4
+ specs:
5
+ dokaz (0.0.4)
6
+ ansi
7
+ rouge
8
+ slop (~> 3)
9
+
10
+ PATH
11
+ remote: .
12
+ specs:
13
+ infoboxer (0.4.0)
14
+ addressable
15
+ htmlentities
16
+ mediawiktory (= 0.1.3)
17
+ terminal-table
18
+
19
+ GEM
20
+ remote: https://rubygems.org/
21
+ specs:
22
+ addressable (2.7.0)
23
+ public_suffix (>= 2.0.2, < 5.0)
24
+ ansi (1.5.0)
25
+ ast (2.4.2)
26
+ backports (3.21.0)
27
+ byebug (11.1.3)
28
+ coveralls (0.8.23)
29
+ json (>= 1.8, < 3)
30
+ simplecov (~> 0.16.1)
31
+ term-ansicolor (~> 1.3)
32
+ thor (>= 0.19.4, < 2.0)
33
+ tins (~> 1.6)
34
+ crack (0.4.5)
35
+ rexml
36
+ diff-lcs (1.4.4)
37
+ docile (1.4.0)
38
+ faraday (1.4.2)
39
+ faraday-em_http (~> 1.0)
40
+ faraday-em_synchrony (~> 1.0)
41
+ faraday-excon (~> 1.1)
42
+ faraday-net_http (~> 1.0)
43
+ faraday-net_http_persistent (~> 1.1)
44
+ multipart-post (>= 1.2, < 3)
45
+ ruby2_keywords (>= 0.0.4)
46
+ faraday-em_http (1.0.0)
47
+ faraday-em_synchrony (1.0.0)
48
+ faraday-excon (1.1.0)
49
+ faraday-net_http (1.0.1)
50
+ faraday-net_http_persistent (1.1.0)
51
+ faraday_middleware (1.0.0)
52
+ faraday (~> 1.0)
53
+ hashdiff (1.0.1)
54
+ hashie (4.1.0)
55
+ htmlentities (4.3.4)
56
+ io-console (0.5.9)
57
+ irb (1.3.5)
58
+ reline (>= 0.1.5)
59
+ json (2.5.1)
60
+ mediawiktory (0.1.3)
61
+ addressable
62
+ faraday
63
+ faraday_middleware
64
+ hashie
65
+ naught
66
+ nokogiri
67
+ multipart-post (2.1.1)
68
+ naught (1.1.0)
69
+ nokogiri (1.11.6-x86_64-linux)
70
+ racc (~> 1.4)
71
+ parallel (1.20.1)
72
+ parser (3.0.1.1)
73
+ ast (~> 2.4.1)
74
+ public_suffix (4.0.6)
75
+ racc (1.5.2)
76
+ rainbow (3.0.0)
77
+ rake (13.0.3)
78
+ redcarpet (3.5.1)
79
+ regexp_parser (2.1.1)
80
+ reline (0.2.5)
81
+ io-console (~> 0.5)
82
+ rexml (3.2.5)
83
+ rouge (3.26.0)
84
+ rspec (3.10.0)
85
+ rspec-core (~> 3.10.0)
86
+ rspec-expectations (~> 3.10.0)
87
+ rspec-mocks (~> 3.10.0)
88
+ rspec-core (3.10.1)
89
+ rspec-support (~> 3.10.0)
90
+ rspec-expectations (3.10.1)
91
+ diff-lcs (>= 1.2.0, < 2.0)
92
+ rspec-support (~> 3.10.0)
93
+ rspec-its (1.3.0)
94
+ rspec-core (>= 3.0.0)
95
+ rspec-expectations (>= 3.0.0)
96
+ rspec-mocks (3.10.2)
97
+ diff-lcs (>= 1.2.0, < 2.0)
98
+ rspec-support (~> 3.10.0)
99
+ rspec-support (3.10.2)
100
+ rubocop (1.15.0)
101
+ parallel (~> 1.10)
102
+ parser (>= 3.0.0.0)
103
+ rainbow (>= 2.2.2, < 4.0)
104
+ regexp_parser (>= 1.8, < 3.0)
105
+ rexml
106
+ rubocop-ast (>= 1.5.0, < 2.0)
107
+ ruby-progressbar (~> 1.7)
108
+ unicode-display_width (>= 1.4.0, < 3.0)
109
+ rubocop-ast (1.7.0)
110
+ parser (>= 3.0.1.1)
111
+ rubocop-rspec (2.3.0)
112
+ rubocop (~> 1.0)
113
+ rubocop-ast (>= 1.1.0)
114
+ ruby-prof (1.4.3)
115
+ ruby-progressbar (1.11.0)
116
+ ruby2_keywords (0.0.4)
117
+ rubygems-tasks (0.2.5)
118
+ irb (~> 1.0)
119
+ saharspec (0.0.4)
120
+ simplecov (0.16.1)
121
+ docile (~> 1.1)
122
+ json (>= 1.8, < 3)
123
+ simplecov-html (~> 0.10.0)
124
+ simplecov-html (0.10.2)
125
+ slop (3.6.0)
126
+ sync (0.5.0)
127
+ term-ansicolor (1.7.1)
128
+ tins (~> 1.0)
129
+ terminal-table (3.0.1)
130
+ unicode-display_width (>= 1.1.1, < 3)
131
+ thor (1.1.0)
132
+ timecop (0.9.4)
133
+ tins (1.29.1)
134
+ sync
135
+ unicode-display_width (2.0.0)
136
+ vcr (6.0.0)
137
+ webmock (3.13.0)
138
+ addressable (>= 2.3.6)
139
+ crack (>= 0.3.2)
140
+ hashdiff (>= 0.4.0, < 2.0.0)
141
+ yard (0.9.26)
142
+ yard-junk (0.0.9)
143
+ backports (>= 3.18)
144
+ rainbow
145
+ yard
146
+
147
+ PLATFORMS
148
+ ruby
149
+ x86_64-linux
150
+
151
+ DEPENDENCIES
152
+ byebug
153
+ coveralls
154
+ dokaz!
155
+ infoboxer!
156
+ rake
157
+ redcarpet
158
+ rspec (~> 3)
159
+ rspec-its (~> 1)
160
+ rubocop (~> 1.15.0)
161
+ rubocop-rspec (~> 2.3.0)
162
+ ruby-prof
163
+ rubygems-tasks
164
+ saharspec (= 0.0.4)
165
+ timecop
166
+ vcr
167
+ webmock
168
+ yard (~> 0.9)
169
+ yard-junk (~> 0.0.7)
170
+
171
+ BUNDLED WITH
172
+ 2.2.0
data/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  # Infoboxer
2
2
 
3
3
  [![Gem Version](https://badge.fury.io/rb/infoboxer.svg)](http://badge.fury.io/rb/infoboxer)
4
- [![Build Status](https://travis-ci.org/molybdenum-99/infoboxer.svg?branch=master)](https://travis-ci.org/molybdenum-99/infoboxer)
4
+ ![Build Status](https://github.com/molybdenum-99/infoboxer/workflows/CI/badge.svg?branch=master)
5
5
  [![Coverage Status](https://coveralls.io/repos/molybdenum-99/infoboxer/badge.svg?branch=master&service=github)](https://coveralls.io/github/molybdenum-99/infoboxer?branch=master)
6
6
  [![Code Climate](https://codeclimate.com/github/molybdenum-99/infoboxer/badges/gpa.svg)](https://codeclimate.com/github/molybdenum-99/infoboxer)
7
7
  [![Infoboxer Gitter](https://badges.gitter.im/molybdenum-99/infoboxer.svg)](https://gitter.im/molybdenum-99/infoboxer)
data/infoboxer.gemspec CHANGED
@@ -32,7 +32,7 @@ Gem::Specification.new do |s|
32
32
  s.executables << 'infoboxer'
33
33
 
34
34
  s.add_dependency 'htmlentities'
35
- s.add_dependency 'mediawiktory', '>= 0.1.0'
35
+ s.add_dependency 'mediawiktory', '= 0.1.3'
36
36
  s.add_dependency 'addressable'
37
37
  s.add_dependency 'terminal-table'
38
38
  end
data/lib/infoboxer.rb CHANGED
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  # Main client module for entire infoboxer functionality. If you're lucky,
4
4
  # there's no other classes/modules you need to instantiate or call
@@ -47,9 +47,8 @@
47
47
  #
48
48
  module Infoboxer
49
49
  # @private
50
- WIKIA_API_URL = 'http://%s.wikia.com/api.php'.freeze
50
+ WIKIA_API_URL = 'http://%s.wikia.com/api.php'
51
51
 
52
- # @private
53
52
  WIKIMEDIA_PROJECTS = {
54
53
  wikipedia: 'wikipedia.org',
55
54
  wikivoyage: 'wikivoyage.org',
@@ -61,7 +60,6 @@ module Infoboxer
61
60
  wikisource: 'wikisource.org'
62
61
  }.freeze
63
62
 
64
- # @private
65
63
  WIKIMEDIA_COMMONS = {
66
64
  commons: 'commons.wikimedia.org',
67
65
  species: 'species.wikimedia.org',
@@ -72,11 +70,11 @@ module Infoboxer
72
70
  end
73
71
 
74
72
  # Includeable version of {Infoboxer.wiki}
75
- def wiki(api_url, options = {})
76
- wikis[api_url] ||= MediaWiki.new(api_url, options || {})
73
+ def wiki(api_url, **options)
74
+ wikis[api_url] ||= MediaWiki.new(api_url, **options)
77
75
  end
78
76
 
79
- class << self
77
+ class << self # rubocop:disable Lint/EmptyClass -- that's for YARD!
80
78
  # @!method wiki(api_url, options = {})
81
79
  # Default method for creating MediaWiki API client.
82
80
  #
@@ -168,21 +166,35 @@ module Infoboxer
168
166
  end
169
167
 
170
168
  WIKIMEDIA_PROJECTS.each do |name, domain|
171
- define_method name do |lang = 'en', options = {}|
169
+ define_method name do |lang = 'en', **options|
172
170
  lang, options = 'en', lang if lang.is_a?(Hash)
173
171
 
174
- wiki("https://#{lang}.#{domain}/w/api.php", options)
172
+ wiki("https://#{lang}.#{domain}/w/api.php", **options)
175
173
  end
176
174
  end
177
175
 
178
176
  alias_method :wp, :wikipedia
179
177
 
180
178
  WIKIMEDIA_COMMONS.each do |name, domain|
181
- define_method name do |options = {}|
179
+ define_method name do |**options|
182
180
  wiki("https://#{domain}/w/api.php", options)
183
181
  end
184
182
  end
185
183
 
184
+ # Returns URL of API entry-point for a well-known Wiki-project (wikipedia, wikivoyage etc.)
185
+ # by project's name.
186
+ #
187
+ # @param symbol [Symbol] One of {WIKIMEDIA_PROJECTS} or {WIKIMEDIA_COMMONS} keys.
188
+ # @param lang [String, Symbol] Language of the project, if applicable.
189
+ # @return [String]
190
+ def url_for(symbol, lang = 'en')
191
+ if (domain = WIKIMEDIA_PROJECTS[symbol])
192
+ "https://#{lang}.#{domain}/w/api.php"
193
+ elsif (domain = WIKIMEDIA_COMMONS[symbol])
194
+ "https://#{domain}/w/api.php"
195
+ end
196
+ end
197
+
186
198
  # @!method wikipedia(lang = 'en', options = {})
187
199
  # Includeable version of {Infoboxer.wikipedia}
188
200
 
@@ -216,7 +228,7 @@ module Infoboxer
216
228
  # Includeable version of {Infoboxer.wikia}
217
229
  def wikia(*domains)
218
230
  options = domains.last.is_a?(Hash) ? domains.pop : {}
219
- wiki(WIKIA_API_URL % domains.reverse.join('.'), options)
231
+ wiki(WIKIA_API_URL % domains.reverse.join('.'), **options)
220
232
  end
221
233
 
222
234
  # Sets user agent string globally. Default user agent is
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  # @private
4
4
  class Object
@@ -1,5 +1,6 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
+ # rubocop:disable Layout/EmptyLinesAroundArguments
3
4
  module Infoboxer
4
5
  MediaWiki::Traits.for('en.wikipedia.org') do
5
6
  templates do
@@ -372,3 +373,4 @@ module Infoboxer
372
373
  end
373
374
  end
374
375
  end
376
+ # rubocop:enable Layout/EmptyLinesAroundArguments
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  require 'mediawiktory'
4
4
  require 'addressable/uri'
@@ -27,7 +27,7 @@ module Infoboxer
27
27
  # You can set yours as an option to {Infoboxer.wiki} and its shortcuts,
28
28
  # or to {#initialize}
29
29
  UA = "Infoboxer/#{Infoboxer::VERSION} "\
30
- '(https://github.com/molybdenum-99/infoboxer; zverok.offline@gmail.com)'.freeze
30
+ '(https://github.com/molybdenum-99/infoboxer; zverok.offline@gmail.com)'
31
31
 
32
32
  class << self
33
33
  # User agent getter/setter.
@@ -43,43 +43,45 @@ module Infoboxer
43
43
  # @private
44
44
  attr_reader :api_base_url, :traits
45
45
 
46
+ # @return [MediaWiktory::Wikipedia::Client]
47
+ attr_reader :api
48
+
46
49
  # Creating new MediaWiki client. {Infoboxer.wiki} provides shortcut
47
50
  # for it, as well as shortcuts for some well-known wikis, like
48
51
  # {Infoboxer.wikipedia}.
49
52
  #
50
- # @param api_base_url URL of `api.php` file in your MediaWiki
53
+ # @param api_base_url [String] URL of `api.php` file in your MediaWiki
51
54
  # installation. Typically, its `<domain>/w/api.php`, but can vary
52
55
  # in different wikis.
53
- # @param options Only one option is currently supported:
54
- # * `:user_agent` (also aliased as `:ua`) -- custom User-Agent header.
55
- def initialize(api_base_url, options = {})
56
+ # @param user_agent [String] (also aliased as `:ua`) Custom User-Agent header.
57
+ def initialize(api_base_url, ua: nil, user_agent: ua)
56
58
  @api_base_url = Addressable::URI.parse(api_base_url)
57
- @client = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(options))
58
- @traits = Traits.get(@api_base_url.host, namespaces: extract_namespaces)
59
+ @api = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(user_agent))
60
+ @traits = Traits.get(@api_base_url.host, siteinfo)
59
61
  end
60
62
 
61
63
  # Receive "raw" data from Wikipedia (without parsing or wrapping in
62
64
  # classes).
63
65
  #
64
66
  # @param titles [Array<String>] List of page titles to get.
65
- # @param prop [Array<Symbol>] List of additional page properties to get, refer to
66
- # [MediaWiktory::Actions::Query#prop](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query#prop-instance_method)
67
- # for the list of available properties.
67
+ # @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
68
+ # [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
69
+ # for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
70
+ # while using it.
68
71
  #
69
72
  # @return [Hash{String => Hash}] Hash of `{requested title => raw MediaWiki object}`. Note that
70
73
  # even missing (does not exist in current Wiki) or invalid (impossible title) still be present
71
74
  # in response, just will have `"missing"` or `"invalid"` key, just like MediaWiki returns them.
72
- def raw(*titles, prop: [])
75
+ def raw(*titles, &processor)
73
76
  # could emerge on "automatically" created page lists, should work
74
77
  return {} if titles.empty?
75
78
 
76
79
  titles.each_slice(50).map do |part|
77
- response = @client
78
- .query
79
- .titles(*part)
80
- .prop(:revisions, :info, *prop).prop(:content, :timestamp, :url)
81
- .redirects
82
- .response
80
+ request = prepare_request(@api.query.titles(*part), &processor)
81
+ response = request.response
82
+
83
+ # If additional props are required, there may be additional pages, even despite each_slice(50)
84
+ response = response.continue while response.continue?
83
85
 
84
86
  sources = response['pages'].values.map { |page| [page['title'], page] }.to_h
85
87
  redirects =
@@ -103,9 +105,11 @@ module Infoboxer
103
105
  # `(titles.count / 50.0).ceil` requests)
104
106
  #
105
107
  # @param titles [Array<String>] List of page titles to get.
106
- # @param prop [Array<Symbol>] List of additional page properties to get, refer to
107
- # [MediaWiktory::Actions::Query#prop](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query#prop-instance_method)
108
- # for the list of available properties.
108
+ # @param interwiki [Symbol] Identifier of other wiki, related to current, to fetch pages from.
109
+ # @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
110
+ # [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
111
+ # for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
112
+ # while using it.
109
113
  #
110
114
  # @return [Page, Tree::Nodes<Page>] array of parsed pages. Notes:
111
115
  # * if you call `get` with only one title, one page will be
@@ -123,8 +127,10 @@ module Infoboxer
123
127
  # and obtain meaningful results instead of `NoMethodError` or
124
128
  # `SomethingNotFound`.
125
129
  #
126
- def get(*titles, prop: [])
127
- pages = get_h(*titles, prop: prop).values.compact
130
+ def get(*titles, interwiki: nil, &processor)
131
+ return interwikis(interwiki).get(*titles, &processor) if interwiki
132
+
133
+ pages = get_h(*titles, &processor).values.compact
128
134
  titles.count == 1 ? pages.first : Tree::Nodes[*pages]
129
135
  end
130
136
 
@@ -141,14 +147,15 @@ module Infoboxer
141
147
  # you've received.
142
148
  #
143
149
  # @param titles [Array<String>] List of page titles to get.
144
- # @param prop [Array<Symbol>] List of additional page properties to get, refer to
145
- # [MediaWiktory::Actions::Query#prop](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query#prop-instance_method)
146
- # for the list of available properties.
150
+ # @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
151
+ # [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
152
+ # for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
153
+ # while using it.
147
154
  #
148
155
  # @return [Hash<String, Page>]
149
156
  #
150
- def get_h(*titles, prop: [])
151
- raw_pages = raw(*titles, prop: prop)
157
+ def get_h(*titles, &processor)
158
+ raw_pages = raw(*titles, &processor)
152
159
  .tap { |ps| ps.detect { |_, p| p['invalid'] }.tap { |_, i| i && fail(i['invalidreason']) } }
153
160
  .reject { |_, p| p.key?('missing') }
154
161
  titles.map { |title| [title, make_page(raw_pages, title)] }.to_h
@@ -156,59 +163,59 @@ module Infoboxer
156
163
 
157
164
  # Receive list of parsed MediaWiki pages from specified category.
158
165
  #
159
- # **NB**: currently, this API **always** fetches all pages from
160
- # category, there is no option to "take first 20 pages". Pages are
161
- # fetched in 50-page batches, then parsed. So, for large category
162
- # it can really take a while to fetch all pages.
163
- #
164
166
  # @param title [String] Category title. You can use namespaceless title (like
165
167
  # `"Countries in South America"`), title with namespace (like
166
168
  # `"Category:Countries in South America"`) or title with local
167
169
  # namespace (like `"Catégorie:Argentine"` for French Wikipedia)
170
+ # @param limit [Integer, "max"]
171
+ # @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
172
+ # [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
173
+ # for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
174
+ # while using it.
168
175
  #
169
176
  # @return [Tree::Nodes<Page>] array of parsed pages.
170
177
  #
171
- def category(title)
178
+ def category(title, limit: 'max', &processor)
172
179
  title = normalize_category_title(title)
173
180
 
174
- list(@client.query.generator(:categorymembers).title(title).limit('max'))
181
+ list(@api.query.generator(:categorymembers).title(title), limit, &processor)
175
182
  end
176
183
 
177
184
  # Receive list of parsed MediaWiki pages for provided search query.
178
185
  # See [MediaWiki API docs](https://www.mediawiki.org/w/api.php?action=help&modules=query%2Bsearch)
179
186
  # for details.
180
187
  #
181
- # **NB**: currently, this API **always** fetches all pages from
182
- # category, there is no option to "take first 20 pages". Pages are
183
- # fetched in 50-page batches, then parsed. So, for large search query
184
- # it can really take a while to fetch all pages.
185
- #
186
188
  # @param query [String] Search query. For old installations, look at
187
189
  # https://www.mediawiki.org/wiki/Help:Searching
188
190
  # for search syntax. For new ones (including Wikipedia), see at
189
191
  # https://www.mediawiki.org/wiki/Help:CirrusSearch.
192
+ # @param limit [Integer, "max"]
193
+ # @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
194
+ # [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
195
+ # for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
196
+ # while using it.
190
197
  #
191
198
  # @return [Tree::Nodes<Page>] array of parsed pages.
192
199
  #
193
- def search(query)
194
- list(@client.query.generator(:search).search(query).limit('max'))
200
+ def search(query, limit: 'max', &processor)
201
+ list(@api.query.generator(:search).search(query), limit, &processor)
195
202
  end
196
203
 
197
204
  # Receive list of parsed MediaWiki pages with titles startin from prefix.
198
205
  # See [MediaWiki API docs](https://www.mediawiki.org/w/api.php?action=help&modules=query%2Bprefixsearch)
199
206
  # for details.
200
207
  #
201
- # **NB**: currently, this API **always** fetches all pages from
202
- # category, there is no option to "take first 20 pages". Pages are
203
- # fetched in 50-page batches, then parsed. So, for large search query
204
- # it can really take a while to fetch all pages.
205
- #
206
208
  # @param prefix [String] Page title prefix.
209
+ # @param limit [Integer, "max"]
210
+ # @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
211
+ # [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
212
+ # for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
213
+ # while using it.
207
214
  #
208
215
  # @return [Tree::Nodes<Page>] array of parsed pages.
209
216
  #
210
- def prefixsearch(prefix)
211
- list(@client.query.generator(:prefixsearch).search(prefix).limit('max'))
217
+ def prefixsearch(prefix, limit: 'max', &processor)
218
+ list(@api.query.generator(:prefixsearch).search(prefix), limit, &processor)
212
219
  end
213
220
 
214
221
  # @return [String]
@@ -224,14 +231,11 @@ module Infoboxer
224
231
  Page.new(self, Parser.paragraphs(source['revisions'].first['*'], traits), source)
225
232
  end
226
233
 
227
- def list(query)
228
- response = query
229
- .prop(:revisions, :info)
230
- .prop(:content, :timestamp, :url)
231
- .redirects
232
- .response
234
+ def list(query, limit, &processor)
235
+ request = prepare_request(query.limit(limit), &processor)
236
+ response = request.response
233
237
 
234
- response = response.continue while response.continue?
238
+ response = response.continue while response.continue? && (limit == 'max' || response['pages'].count < limit)
235
239
 
236
240
  return Tree::Nodes[] if response['pages'].nil?
237
241
 
@@ -242,6 +246,11 @@ module Infoboxer
242
246
  Tree::Nodes[*pages]
243
247
  end
244
248
 
249
+ def prepare_request(request)
250
+ request = request.prop(:revisions, :info).prop(:content, :timestamp, :url).redirects
251
+ block_given? ? yield(request) : request
252
+ end
253
+
245
254
  def normalize_category_title(title)
246
255
  # FIXME: shouldn't it go to MediaWiktory?..
247
256
  namespace, titl = title.include?(':') ? title.split(':', 2) : [nil, title]
@@ -251,17 +260,26 @@ module Infoboxer
251
260
  [namespace, titl].join(':')
252
261
  end
253
262
 
254
- def user_agent(options)
255
- options[:user_agent] || options[:ua] || self.class.user_agent || UA
263
+ def user_agent(custom)
264
+ custom || self.class.user_agent || UA
265
+ end
266
+
267
+ def siteinfo
268
+ @siteinfo ||= @api.query.meta(:siteinfo).prop(:namespaces, :namespacealiases, :interwikimap).response.to_h
256
269
  end
257
270
 
258
- def extract_namespaces
259
- siteinfo = @client.query.meta(:siteinfo).prop(:namespaces, :namespacealiases).response
260
- siteinfo['namespaces'].map do |_, namespace|
261
- aliases =
262
- siteinfo['namespacealiases'].select { |a| a['id'] == namespace['id'] }.map { |a| a['*'] }
263
- namespace.merge('aliases' => aliases)
264
- end
271
+ def interwikis(prefix)
272
+ @interwikis ||= Hash.new { |h, pre|
273
+ interwiki = siteinfo['interwikimap'].detect { |iw| iw['prefix'] == prefix } or
274
+ fail ArgumentError, "Undefined interwiki: #{prefix}"
275
+
276
+ # FIXME: fragile, but what can we do?..
277
+ m = interwiki['url'].match(%r{^(.+)/wiki/\$1$}) or
278
+ fail ArgumentError, "Interwiki #{interwiki} seems not to be a MediaWiki instance"
279
+ h[pre] = self.class.new("#{m[1]}/w/api.php") # TODO: copy useragent
280
+ }
281
+
282
+ @interwikis[prefix]
265
283
  end
266
284
  end
267
285
  end