infoboxer 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/ci.yml +32 -0
  3. data/.rubocop_todo.yml +0 -15
  4. data/CHANGELOG.md +43 -0
  5. data/Gemfile.lock +172 -0
  6. data/README.md +1 -1
  7. data/infoboxer.gemspec +1 -1
  8. data/lib/infoboxer.rb +23 -11
  9. data/lib/infoboxer/core_ext.rb +1 -1
  10. data/lib/infoboxer/definitions/en.wikipedia.org.rb +3 -1
  11. data/lib/infoboxer/media_wiki.rb +83 -65
  12. data/lib/infoboxer/media_wiki/page.rb +10 -1
  13. data/lib/infoboxer/media_wiki/traits.rb +69 -22
  14. data/lib/infoboxer/navigation.rb +7 -1
  15. data/lib/infoboxer/navigation/lookup.rb +15 -7
  16. data/lib/infoboxer/navigation/sections.rb +27 -9
  17. data/lib/infoboxer/navigation/selector.rb +14 -6
  18. data/lib/infoboxer/navigation/shortcuts.rb +1 -1
  19. data/lib/infoboxer/navigation/wikipath.rb +1 -1
  20. data/lib/infoboxer/parser.rb +2 -2
  21. data/lib/infoboxer/parser/context.rb +23 -9
  22. data/lib/infoboxer/parser/html.rb +1 -1
  23. data/lib/infoboxer/parser/image.rb +2 -2
  24. data/lib/infoboxer/parser/inline.rb +50 -7
  25. data/lib/infoboxer/parser/paragraphs.rb +3 -3
  26. data/lib/infoboxer/parser/table.rb +33 -17
  27. data/lib/infoboxer/parser/template.rb +5 -4
  28. data/lib/infoboxer/parser/util.rb +2 -1
  29. data/lib/infoboxer/templates.rb +2 -0
  30. data/lib/infoboxer/templates/base.rb +2 -0
  31. data/lib/infoboxer/templates/set.rb +1 -1
  32. data/lib/infoboxer/tree.rb +2 -2
  33. data/lib/infoboxer/tree/compound.rb +3 -3
  34. data/lib/infoboxer/tree/document.rb +1 -1
  35. data/lib/infoboxer/tree/gallery.rb +12 -0
  36. data/lib/infoboxer/tree/html.rb +3 -3
  37. data/lib/infoboxer/tree/image.rb +4 -4
  38. data/lib/infoboxer/tree/inline.rb +3 -3
  39. data/lib/infoboxer/tree/linkable.rb +6 -1
  40. data/lib/infoboxer/tree/list.rb +4 -5
  41. data/lib/infoboxer/tree/math.rb +2 -3
  42. data/lib/infoboxer/tree/node.rb +4 -4
  43. data/lib/infoboxer/tree/nodes.rb +51 -7
  44. data/lib/infoboxer/tree/paragraphs.rb +1 -1
  45. data/lib/infoboxer/tree/ref.rb +1 -1
  46. data/lib/infoboxer/tree/table.rb +4 -4
  47. data/lib/infoboxer/tree/template.rb +18 -5
  48. data/lib/infoboxer/tree/text.rb +11 -11
  49. data/lib/infoboxer/tree/wikilink.rb +16 -8
  50. data/lib/infoboxer/version.rb +4 -3
  51. data/lib/infoboxer/wiki_path.rb +12 -1
  52. data/regression/pages/2012_bdo_world_darts_championship.wiki +941 -0
  53. data/regression/pages/progress_wrestling.wiki +1308 -0
  54. metadata +12 -8
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: be65bc91a5370bc24553e500754f413196caed76
4
- data.tar.gz: d24dca5a13a64d563ddc473197a732a9f95884fd
2
+ SHA256:
3
+ metadata.gz: 755e1283e896d2c2b363983a9b04ac92cb14870cfa2ab67fd62777684bac1352
4
+ data.tar.gz: c5443d788fc06a2310c65b80bc16531b820bb7142f611f5d063af8992cee7525
5
5
  SHA512:
6
- metadata.gz: 661c06d6703db103035f61f55ebee5f0bc8a5f9ad182fcaf2a22be9f91063a9abd80bac1793fabae436b28bed274fcb7908219ba07453f46271a25d1cba0367a
7
- data.tar.gz: 25029633e6516c30a7de21433db1903ab923af2c74082b3a3c9322b50b170cdb104652b95d261b3033a070629ee478a8b5510a7298fbef830d1445dfe56157d0
6
+ metadata.gz: 1443ecf7dbb485555a275a4d5390417ee48fabb2321a23fb59c208d32c1424259515924e903872b762afebcaaf5574afd713e4eade6c346aeb8e71719c051436
7
+ data.tar.gz: 71c007554240c40a7e7f9bee81d66d637b8643f1ccf18a5900c24087545d3c430830d8a30d9c4463244b13419d4edb349aee585885b6aecf03dc8d8b37d701a7
@@ -0,0 +1,32 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [ master ]
6
+ pull_request:
7
+ branches: [ master ]
8
+
9
+ jobs:
10
+ main:
11
+ name: >-
12
+ ${{ matrix.ruby }}
13
+ runs-on: ubuntu-latest
14
+ strategy:
15
+ fail-fast: false
16
+ matrix:
17
+ ruby: [ 2.6, 2.7, 3.0, head ]
18
+
19
+ steps:
20
+ - name: checkout
21
+ uses: actions/checkout@v2
22
+ - name: set up Ruby
23
+ uses: ruby/setup-ruby@v1
24
+ with:
25
+ ruby-version: ${{ matrix.ruby }}
26
+
27
+ - name: install dependencies
28
+ run: bundle install --jobs 3 --retry 3
29
+ - name: spec
30
+ run: bundle exec rake spec
31
+ - name: rubocop
32
+ run: bundle exec rake rubocop
data/.rubocop_todo.yml CHANGED
@@ -1,16 +1 @@
1
- # This configuration was generated by
2
- # `rubocop --auto-gen-config`
3
- # on 2017-06-23 13:52:16 +0300 using RuboCop version 0.49.1.
4
- # The point is for the user to remove these configuration records
5
- # one by one as the offenses are removed from the code base.
6
- # Note that changes in the inspected code, or installation of new
7
- # versions of RuboCop, may require this file to be generated again.
8
-
9
- # Offense count: 1
10
- Metrics/AbcSize:
11
- Max: 29
12
-
13
- # Offense count: 1
14
- Metrics/PerceivedComplexity:
15
- Max: 10
16
1
 
data/CHANGELOG.md CHANGED
@@ -1,5 +1,48 @@
1
1
  # Infoboxer's change log
2
2
 
3
+ ## 0.4.0 (2021-05-30)
4
+
5
+ * A cluster of bugs found in #81 fixed:
6
+ * Empty comment (`<!---->`) now processed properly;
7
+ * Templates that are implicitly inside tables (put on a separate row) now always create
8
+ an implicit `<TableCell>`
9
+ * Heading after non-closed table closes the table implicitly instead of being inserted
10
+ into the last cell.
11
+ * Drop Ruby < 2.6, and support 3.0 instead.
12
+
13
+ PS: Yeah, year-and-almost-half is much better than 2 years between releases, I guess.. And let's call
14
+ it non-patch version then.
15
+
16
+ ## 0.3.3 (2020-02-09)
17
+
18
+ * Fixed table captions handling (thanks @robfors for reporting)
19
+
20
+ PS: Funny that this small bugfix release is exactly two years after the previous one :(
21
+
22
+ ## 0.3.2 (2018-02-09)
23
+
24
+ * Updated MediaWiktory to finally turn on gzip encoding of responses;
25
+ * Utility methods to expose some internals (`MediaWiki#api`, `Infoboxer#url_for(:wikipedia)`,
26
+ `Page#namespaces`, `Template#named_variables` and so on);
27
+ * Fix parsing of lowercase `file:` links in `<gallery>`.
28
+
29
+ ## 0.3.1 (2017-12-04)
30
+
31
+ * (Experimental) new representation of templates, much more readable;
32
+ * More access to querying process and underlying `MediaWiktory::Wikipedia::Query`;
33
+ * Finally, `limit` parameter for multi-page queries (category, search, prefixsearch).
34
+
35
+ ## 0.3.1.pre (2017-09-16)
36
+
37
+ * Introduce interwiki links following (and proper handling of interwikis, in general);
38
+ * Add `<gallery>` tag support;
39
+ * Introduce `Navigation::Selector#===`;
40
+ * Much more `Enumerable`'s methods supported by `Nodes`;
41
+ * Lot of small simplifications, cleanups and bugfixes.
42
+
43
+ TBH, it should be 0.4.0 or more, but it would be a shame to change versions so fast :) So, at least
44
+ until it is `-pre`, let it be 0.3.1.
45
+
3
46
  ## 0.3.0 (2017-07-23)
4
47
 
5
48
  * Change logic of navigation through templates; now templates contents aren't hidden from global
data/Gemfile.lock ADDED
@@ -0,0 +1,172 @@
1
+ GIT
2
+ remote: https://github.com/zverok/dokaz.git
3
+ revision: a8a6f0bbeab5589326fe2714cf89842b5f32b850
4
+ specs:
5
+ dokaz (0.0.4)
6
+ ansi
7
+ rouge
8
+ slop (~> 3)
9
+
10
+ PATH
11
+ remote: .
12
+ specs:
13
+ infoboxer (0.4.0)
14
+ addressable
15
+ htmlentities
16
+ mediawiktory (= 0.1.3)
17
+ terminal-table
18
+
19
+ GEM
20
+ remote: https://rubygems.org/
21
+ specs:
22
+ addressable (2.7.0)
23
+ public_suffix (>= 2.0.2, < 5.0)
24
+ ansi (1.5.0)
25
+ ast (2.4.2)
26
+ backports (3.21.0)
27
+ byebug (11.1.3)
28
+ coveralls (0.8.23)
29
+ json (>= 1.8, < 3)
30
+ simplecov (~> 0.16.1)
31
+ term-ansicolor (~> 1.3)
32
+ thor (>= 0.19.4, < 2.0)
33
+ tins (~> 1.6)
34
+ crack (0.4.5)
35
+ rexml
36
+ diff-lcs (1.4.4)
37
+ docile (1.4.0)
38
+ faraday (1.4.2)
39
+ faraday-em_http (~> 1.0)
40
+ faraday-em_synchrony (~> 1.0)
41
+ faraday-excon (~> 1.1)
42
+ faraday-net_http (~> 1.0)
43
+ faraday-net_http_persistent (~> 1.1)
44
+ multipart-post (>= 1.2, < 3)
45
+ ruby2_keywords (>= 0.0.4)
46
+ faraday-em_http (1.0.0)
47
+ faraday-em_synchrony (1.0.0)
48
+ faraday-excon (1.1.0)
49
+ faraday-net_http (1.0.1)
50
+ faraday-net_http_persistent (1.1.0)
51
+ faraday_middleware (1.0.0)
52
+ faraday (~> 1.0)
53
+ hashdiff (1.0.1)
54
+ hashie (4.1.0)
55
+ htmlentities (4.3.4)
56
+ io-console (0.5.9)
57
+ irb (1.3.5)
58
+ reline (>= 0.1.5)
59
+ json (2.5.1)
60
+ mediawiktory (0.1.3)
61
+ addressable
62
+ faraday
63
+ faraday_middleware
64
+ hashie
65
+ naught
66
+ nokogiri
67
+ multipart-post (2.1.1)
68
+ naught (1.1.0)
69
+ nokogiri (1.11.6-x86_64-linux)
70
+ racc (~> 1.4)
71
+ parallel (1.20.1)
72
+ parser (3.0.1.1)
73
+ ast (~> 2.4.1)
74
+ public_suffix (4.0.6)
75
+ racc (1.5.2)
76
+ rainbow (3.0.0)
77
+ rake (13.0.3)
78
+ redcarpet (3.5.1)
79
+ regexp_parser (2.1.1)
80
+ reline (0.2.5)
81
+ io-console (~> 0.5)
82
+ rexml (3.2.5)
83
+ rouge (3.26.0)
84
+ rspec (3.10.0)
85
+ rspec-core (~> 3.10.0)
86
+ rspec-expectations (~> 3.10.0)
87
+ rspec-mocks (~> 3.10.0)
88
+ rspec-core (3.10.1)
89
+ rspec-support (~> 3.10.0)
90
+ rspec-expectations (3.10.1)
91
+ diff-lcs (>= 1.2.0, < 2.0)
92
+ rspec-support (~> 3.10.0)
93
+ rspec-its (1.3.0)
94
+ rspec-core (>= 3.0.0)
95
+ rspec-expectations (>= 3.0.0)
96
+ rspec-mocks (3.10.2)
97
+ diff-lcs (>= 1.2.0, < 2.0)
98
+ rspec-support (~> 3.10.0)
99
+ rspec-support (3.10.2)
100
+ rubocop (1.15.0)
101
+ parallel (~> 1.10)
102
+ parser (>= 3.0.0.0)
103
+ rainbow (>= 2.2.2, < 4.0)
104
+ regexp_parser (>= 1.8, < 3.0)
105
+ rexml
106
+ rubocop-ast (>= 1.5.0, < 2.0)
107
+ ruby-progressbar (~> 1.7)
108
+ unicode-display_width (>= 1.4.0, < 3.0)
109
+ rubocop-ast (1.7.0)
110
+ parser (>= 3.0.1.1)
111
+ rubocop-rspec (2.3.0)
112
+ rubocop (~> 1.0)
113
+ rubocop-ast (>= 1.1.0)
114
+ ruby-prof (1.4.3)
115
+ ruby-progressbar (1.11.0)
116
+ ruby2_keywords (0.0.4)
117
+ rubygems-tasks (0.2.5)
118
+ irb (~> 1.0)
119
+ saharspec (0.0.4)
120
+ simplecov (0.16.1)
121
+ docile (~> 1.1)
122
+ json (>= 1.8, < 3)
123
+ simplecov-html (~> 0.10.0)
124
+ simplecov-html (0.10.2)
125
+ slop (3.6.0)
126
+ sync (0.5.0)
127
+ term-ansicolor (1.7.1)
128
+ tins (~> 1.0)
129
+ terminal-table (3.0.1)
130
+ unicode-display_width (>= 1.1.1, < 3)
131
+ thor (1.1.0)
132
+ timecop (0.9.4)
133
+ tins (1.29.1)
134
+ sync
135
+ unicode-display_width (2.0.0)
136
+ vcr (6.0.0)
137
+ webmock (3.13.0)
138
+ addressable (>= 2.3.6)
139
+ crack (>= 0.3.2)
140
+ hashdiff (>= 0.4.0, < 2.0.0)
141
+ yard (0.9.26)
142
+ yard-junk (0.0.9)
143
+ backports (>= 3.18)
144
+ rainbow
145
+ yard
146
+
147
+ PLATFORMS
148
+ ruby
149
+ x86_64-linux
150
+
151
+ DEPENDENCIES
152
+ byebug
153
+ coveralls
154
+ dokaz!
155
+ infoboxer!
156
+ rake
157
+ redcarpet
158
+ rspec (~> 3)
159
+ rspec-its (~> 1)
160
+ rubocop (~> 1.15.0)
161
+ rubocop-rspec (~> 2.3.0)
162
+ ruby-prof
163
+ rubygems-tasks
164
+ saharspec (= 0.0.4)
165
+ timecop
166
+ vcr
167
+ webmock
168
+ yard (~> 0.9)
169
+ yard-junk (~> 0.0.7)
170
+
171
+ BUNDLED WITH
172
+ 2.2.0
data/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  # Infoboxer
2
2
 
3
3
  [![Gem Version](https://badge.fury.io/rb/infoboxer.svg)](http://badge.fury.io/rb/infoboxer)
4
- [![Build Status](https://travis-ci.org/molybdenum-99/infoboxer.svg?branch=master)](https://travis-ci.org/molybdenum-99/infoboxer)
4
+ ![Build Status](https://github.com/molybdenum-99/infoboxer/workflows/CI/badge.svg?branch=master)
5
5
  [![Coverage Status](https://coveralls.io/repos/molybdenum-99/infoboxer/badge.svg?branch=master&service=github)](https://coveralls.io/github/molybdenum-99/infoboxer?branch=master)
6
6
  [![Code Climate](https://codeclimate.com/github/molybdenum-99/infoboxer/badges/gpa.svg)](https://codeclimate.com/github/molybdenum-99/infoboxer)
7
7
  [![Infoboxer Gitter](https://badges.gitter.im/molybdenum-99/infoboxer.svg)](https://gitter.im/molybdenum-99/infoboxer)
data/infoboxer.gemspec CHANGED
@@ -32,7 +32,7 @@ Gem::Specification.new do |s|
32
32
  s.executables << 'infoboxer'
33
33
 
34
34
  s.add_dependency 'htmlentities'
35
- s.add_dependency 'mediawiktory', '>= 0.1.0'
35
+ s.add_dependency 'mediawiktory', '= 0.1.3'
36
36
  s.add_dependency 'addressable'
37
37
  s.add_dependency 'terminal-table'
38
38
  end
data/lib/infoboxer.rb CHANGED
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  # Main client module for entire infoboxer functionality. If you're lucky,
4
4
  # there's no other classes/modules you need to instantiate or call
@@ -47,9 +47,8 @@
47
47
  #
48
48
  module Infoboxer
49
49
  # @private
50
- WIKIA_API_URL = 'http://%s.wikia.com/api.php'.freeze
50
+ WIKIA_API_URL = 'http://%s.wikia.com/api.php'
51
51
 
52
- # @private
53
52
  WIKIMEDIA_PROJECTS = {
54
53
  wikipedia: 'wikipedia.org',
55
54
  wikivoyage: 'wikivoyage.org',
@@ -61,7 +60,6 @@ module Infoboxer
61
60
  wikisource: 'wikisource.org'
62
61
  }.freeze
63
62
 
64
- # @private
65
63
  WIKIMEDIA_COMMONS = {
66
64
  commons: 'commons.wikimedia.org',
67
65
  species: 'species.wikimedia.org',
@@ -72,11 +70,11 @@ module Infoboxer
72
70
  end
73
71
 
74
72
  # Includeable version of {Infoboxer.wiki}
75
- def wiki(api_url, options = {})
76
- wikis[api_url] ||= MediaWiki.new(api_url, options || {})
73
+ def wiki(api_url, **options)
74
+ wikis[api_url] ||= MediaWiki.new(api_url, **options)
77
75
  end
78
76
 
79
- class << self
77
+ class << self # rubocop:disable Lint/EmptyClass -- that's for YARD!
80
78
  # @!method wiki(api_url, options = {})
81
79
  # Default method for creating MediaWiki API client.
82
80
  #
@@ -168,21 +166,35 @@ module Infoboxer
168
166
  end
169
167
 
170
168
  WIKIMEDIA_PROJECTS.each do |name, domain|
171
- define_method name do |lang = 'en', options = {}|
169
+ define_method name do |lang = 'en', **options|
172
170
  lang, options = 'en', lang if lang.is_a?(Hash)
173
171
 
174
- wiki("https://#{lang}.#{domain}/w/api.php", options)
172
+ wiki("https://#{lang}.#{domain}/w/api.php", **options)
175
173
  end
176
174
  end
177
175
 
178
176
  alias_method :wp, :wikipedia
179
177
 
180
178
  WIKIMEDIA_COMMONS.each do |name, domain|
181
- define_method name do |options = {}|
179
+ define_method name do |**options|
182
180
  wiki("https://#{domain}/w/api.php", options)
183
181
  end
184
182
  end
185
183
 
184
+ # Returns URL of API entry-point for a well-known Wiki-project (wikipedia, wikivoyage etc.)
185
+ # by project's name.
186
+ #
187
+ # @param symbol [Symbol] One of {WIKIMEDIA_PROJECTS} or {WIKIMEDIA_COMMONS} keys.
188
+ # @param lang [String, Symbol] Language of the project, if applicable.
189
+ # @return [String]
190
+ def url_for(symbol, lang = 'en')
191
+ if (domain = WIKIMEDIA_PROJECTS[symbol])
192
+ "https://#{lang}.#{domain}/w/api.php"
193
+ elsif (domain = WIKIMEDIA_COMMONS[symbol])
194
+ "https://#{domain}/w/api.php"
195
+ end
196
+ end
197
+
186
198
  # @!method wikipedia(lang = 'en', options = {})
187
199
  # Includeable version of {Infoboxer.wikipedia}
188
200
 
@@ -216,7 +228,7 @@ module Infoboxer
216
228
  # Includeable version of {Infoboxer.wikia}
217
229
  def wikia(*domains)
218
230
  options = domains.last.is_a?(Hash) ? domains.pop : {}
219
- wiki(WIKIA_API_URL % domains.reverse.join('.'), options)
231
+ wiki(WIKIA_API_URL % domains.reverse.join('.'), **options)
220
232
  end
221
233
 
222
234
  # Sets user agent string globally. Default user agent is
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  # @private
4
4
  class Object
@@ -1,5 +1,6 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
+ # rubocop:disable Layout/EmptyLinesAroundArguments
3
4
  module Infoboxer
4
5
  MediaWiki::Traits.for('en.wikipedia.org') do
5
6
  templates do
@@ -372,3 +373,4 @@ module Infoboxer
372
373
  end
373
374
  end
374
375
  end
376
+ # rubocop:enable Layout/EmptyLinesAroundArguments
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  require 'mediawiktory'
4
4
  require 'addressable/uri'
@@ -27,7 +27,7 @@ module Infoboxer
27
27
  # You can set yours as an option to {Infoboxer.wiki} and its shortcuts,
28
28
  # or to {#initialize}
29
29
  UA = "Infoboxer/#{Infoboxer::VERSION} "\
30
- '(https://github.com/molybdenum-99/infoboxer; zverok.offline@gmail.com)'.freeze
30
+ '(https://github.com/molybdenum-99/infoboxer; zverok.offline@gmail.com)'
31
31
 
32
32
  class << self
33
33
  # User agent getter/setter.
@@ -43,43 +43,45 @@ module Infoboxer
43
43
  # @private
44
44
  attr_reader :api_base_url, :traits
45
45
 
46
+ # @return [MediaWiktory::Wikipedia::Client]
47
+ attr_reader :api
48
+
46
49
  # Creating new MediaWiki client. {Infoboxer.wiki} provides shortcut
47
50
  # for it, as well as shortcuts for some well-known wikis, like
48
51
  # {Infoboxer.wikipedia}.
49
52
  #
50
- # @param api_base_url URL of `api.php` file in your MediaWiki
53
+ # @param api_base_url [String] URL of `api.php` file in your MediaWiki
51
54
  # installation. Typically, its `<domain>/w/api.php`, but can vary
52
55
  # in different wikis.
53
- # @param options Only one option is currently supported:
54
- # * `:user_agent` (also aliased as `:ua`) -- custom User-Agent header.
55
- def initialize(api_base_url, options = {})
56
+ # @param user_agent [String] (also aliased as `:ua`) Custom User-Agent header.
57
+ def initialize(api_base_url, ua: nil, user_agent: ua)
56
58
  @api_base_url = Addressable::URI.parse(api_base_url)
57
- @client = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(options))
58
- @traits = Traits.get(@api_base_url.host, namespaces: extract_namespaces)
59
+ @api = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(user_agent))
60
+ @traits = Traits.get(@api_base_url.host, siteinfo)
59
61
  end
60
62
 
61
63
  # Receive "raw" data from Wikipedia (without parsing or wrapping in
62
64
  # classes).
63
65
  #
64
66
  # @param titles [Array<String>] List of page titles to get.
65
- # @param prop [Array<Symbol>] List of additional page properties to get, refer to
66
- # [MediaWiktory::Actions::Query#prop](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query#prop-instance_method)
67
- # for the list of available properties.
67
+ # @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
68
+ # [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
69
+ # for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
70
+ # while using it.
68
71
  #
69
72
  # @return [Hash{String => Hash}] Hash of `{requested title => raw MediaWiki object}`. Note that
70
73
  # even missing (does not exist in current Wiki) or invalid (impossible title) still be present
71
74
  # in response, just will have `"missing"` or `"invalid"` key, just like MediaWiki returns them.
72
- def raw(*titles, prop: [])
75
+ def raw(*titles, &processor)
73
76
  # could emerge on "automatically" created page lists, should work
74
77
  return {} if titles.empty?
75
78
 
76
79
  titles.each_slice(50).map do |part|
77
- response = @client
78
- .query
79
- .titles(*part)
80
- .prop(:revisions, :info, *prop).prop(:content, :timestamp, :url)
81
- .redirects
82
- .response
80
+ request = prepare_request(@api.query.titles(*part), &processor)
81
+ response = request.response
82
+
83
+ # If additional props are required, there may be additional pages, even despite each_slice(50)
84
+ response = response.continue while response.continue?
83
85
 
84
86
  sources = response['pages'].values.map { |page| [page['title'], page] }.to_h
85
87
  redirects =
@@ -103,9 +105,11 @@ module Infoboxer
103
105
  # `(titles.count / 50.0).ceil` requests)
104
106
  #
105
107
  # @param titles [Array<String>] List of page titles to get.
106
- # @param prop [Array<Symbol>] List of additional page properties to get, refer to
107
- # [MediaWiktory::Actions::Query#prop](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query#prop-instance_method)
108
- # for the list of available properties.
108
+ # @param interwiki [Symbol] Identifier of other wiki, related to current, to fetch pages from.
109
+ # @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
110
+ # [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
111
+ # for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
112
+ # while using it.
109
113
  #
110
114
  # @return [Page, Tree::Nodes<Page>] array of parsed pages. Notes:
111
115
  # * if you call `get` with only one title, one page will be
@@ -123,8 +127,10 @@ module Infoboxer
123
127
  # and obtain meaningful results instead of `NoMethodError` or
124
128
  # `SomethingNotFound`.
125
129
  #
126
- def get(*titles, prop: [])
127
- pages = get_h(*titles, prop: prop).values.compact
130
+ def get(*titles, interwiki: nil, &processor)
131
+ return interwikis(interwiki).get(*titles, &processor) if interwiki
132
+
133
+ pages = get_h(*titles, &processor).values.compact
128
134
  titles.count == 1 ? pages.first : Tree::Nodes[*pages]
129
135
  end
130
136
 
@@ -141,14 +147,15 @@ module Infoboxer
141
147
  # you've received.
142
148
  #
143
149
  # @param titles [Array<String>] List of page titles to get.
144
- # @param prop [Array<Symbol>] List of additional page properties to get, refer to
145
- # [MediaWiktory::Actions::Query#prop](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query#prop-instance_method)
146
- # for the list of available properties.
150
+ # @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
151
+ # [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
152
+ # for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
153
+ # while using it.
147
154
  #
148
155
  # @return [Hash<String, Page>]
149
156
  #
150
- def get_h(*titles, prop: [])
151
- raw_pages = raw(*titles, prop: prop)
157
+ def get_h(*titles, &processor)
158
+ raw_pages = raw(*titles, &processor)
152
159
  .tap { |ps| ps.detect { |_, p| p['invalid'] }.tap { |_, i| i && fail(i['invalidreason']) } }
153
160
  .reject { |_, p| p.key?('missing') }
154
161
  titles.map { |title| [title, make_page(raw_pages, title)] }.to_h
@@ -156,59 +163,59 @@ module Infoboxer
156
163
 
157
164
  # Receive list of parsed MediaWiki pages from specified category.
158
165
  #
159
- # **NB**: currently, this API **always** fetches all pages from
160
- # category, there is no option to "take first 20 pages". Pages are
161
- # fetched in 50-page batches, then parsed. So, for large category
162
- # it can really take a while to fetch all pages.
163
- #
164
166
  # @param title [String] Category title. You can use namespaceless title (like
165
167
  # `"Countries in South America"`), title with namespace (like
166
168
  # `"Category:Countries in South America"`) or title with local
167
169
  # namespace (like `"Catégorie:Argentine"` for French Wikipedia)
170
+ # @param limit [Integer, "max"]
171
+ # @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
172
+ # [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
173
+ # for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
174
+ # while using it.
168
175
  #
169
176
  # @return [Tree::Nodes<Page>] array of parsed pages.
170
177
  #
171
- def category(title)
178
+ def category(title, limit: 'max', &processor)
172
179
  title = normalize_category_title(title)
173
180
 
174
- list(@client.query.generator(:categorymembers).title(title).limit('max'))
181
+ list(@api.query.generator(:categorymembers).title(title), limit, &processor)
175
182
  end
176
183
 
177
184
  # Receive list of parsed MediaWiki pages for provided search query.
178
185
  # See [MediaWiki API docs](https://www.mediawiki.org/w/api.php?action=help&modules=query%2Bsearch)
179
186
  # for details.
180
187
  #
181
- # **NB**: currently, this API **always** fetches all pages from
182
- # category, there is no option to "take first 20 pages". Pages are
183
- # fetched in 50-page batches, then parsed. So, for large search query
184
- # it can really take a while to fetch all pages.
185
- #
186
188
  # @param query [String] Search query. For old installations, look at
187
189
  # https://www.mediawiki.org/wiki/Help:Searching
188
190
  # for search syntax. For new ones (including Wikipedia), see at
189
191
  # https://www.mediawiki.org/wiki/Help:CirrusSearch.
192
+ # @param limit [Integer, "max"]
193
+ # @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
194
+ # [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
195
+ # for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
196
+ # while using it.
190
197
  #
191
198
  # @return [Tree::Nodes<Page>] array of parsed pages.
192
199
  #
193
- def search(query)
194
- list(@client.query.generator(:search).search(query).limit('max'))
200
+ def search(query, limit: 'max', &processor)
201
+ list(@api.query.generator(:search).search(query), limit, &processor)
195
202
  end
196
203
 
197
204
  # Receive list of parsed MediaWiki pages with titles startin from prefix.
198
205
  # See [MediaWiki API docs](https://www.mediawiki.org/w/api.php?action=help&modules=query%2Bprefixsearch)
199
206
  # for details.
200
207
  #
201
- # **NB**: currently, this API **always** fetches all pages from
202
- # category, there is no option to "take first 20 pages". Pages are
203
- # fetched in 50-page batches, then parsed. So, for large search query
204
- # it can really take a while to fetch all pages.
205
- #
206
208
  # @param prefix [String] Page title prefix.
209
+ # @param limit [Integer, "max"]
210
+ # @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
211
+ # [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
212
+ # for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
213
+ # while using it.
207
214
  #
208
215
  # @return [Tree::Nodes<Page>] array of parsed pages.
209
216
  #
210
- def prefixsearch(prefix)
211
- list(@client.query.generator(:prefixsearch).search(prefix).limit('max'))
217
+ def prefixsearch(prefix, limit: 'max', &processor)
218
+ list(@api.query.generator(:prefixsearch).search(prefix), limit, &processor)
212
219
  end
213
220
 
214
221
  # @return [String]
@@ -224,14 +231,11 @@ module Infoboxer
224
231
  Page.new(self, Parser.paragraphs(source['revisions'].first['*'], traits), source)
225
232
  end
226
233
 
227
- def list(query)
228
- response = query
229
- .prop(:revisions, :info)
230
- .prop(:content, :timestamp, :url)
231
- .redirects
232
- .response
234
+ def list(query, limit, &processor)
235
+ request = prepare_request(query.limit(limit), &processor)
236
+ response = request.response
233
237
 
234
- response = response.continue while response.continue?
238
+ response = response.continue while response.continue? && (limit == 'max' || response['pages'].count < limit)
235
239
 
236
240
  return Tree::Nodes[] if response['pages'].nil?
237
241
 
@@ -242,6 +246,11 @@ module Infoboxer
242
246
  Tree::Nodes[*pages]
243
247
  end
244
248
 
249
+ def prepare_request(request)
250
+ request = request.prop(:revisions, :info).prop(:content, :timestamp, :url).redirects
251
+ block_given? ? yield(request) : request
252
+ end
253
+
245
254
  def normalize_category_title(title)
246
255
  # FIXME: shouldn't it go to MediaWiktory?..
247
256
  namespace, titl = title.include?(':') ? title.split(':', 2) : [nil, title]
@@ -251,17 +260,26 @@ module Infoboxer
251
260
  [namespace, titl].join(':')
252
261
  end
253
262
 
254
- def user_agent(options)
255
- options[:user_agent] || options[:ua] || self.class.user_agent || UA
263
+ def user_agent(custom)
264
+ custom || self.class.user_agent || UA
265
+ end
266
+
267
+ def siteinfo
268
+ @siteinfo ||= @api.query.meta(:siteinfo).prop(:namespaces, :namespacealiases, :interwikimap).response.to_h
256
269
  end
257
270
 
258
- def extract_namespaces
259
- siteinfo = @client.query.meta(:siteinfo).prop(:namespaces, :namespacealiases).response
260
- siteinfo['namespaces'].map do |_, namespace|
261
- aliases =
262
- siteinfo['namespacealiases'].select { |a| a['id'] == namespace['id'] }.map { |a| a['*'] }
263
- namespace.merge('aliases' => aliases)
264
- end
271
+ def interwikis(prefix)
272
+ @interwikis ||= Hash.new { |h, pre|
273
+ interwiki = siteinfo['interwikimap'].detect { |iw| iw['prefix'] == prefix } or
274
+ fail ArgumentError, "Undefined interwiki: #{prefix}"
275
+
276
+ # FIXME: fragile, but what can we do?..
277
+ m = interwiki['url'].match(%r{^(.+)/wiki/\$1$}) or
278
+ fail ArgumentError, "Interwiki #{interwiki} seems not to be a MediaWiki instance"
279
+ h[pre] = self.class.new("#{m[1]}/w/api.php") # TODO: copy useragent
280
+ }
281
+
282
+ @interwikis[prefix]
265
283
  end
266
284
  end
267
285
  end