infoboxer 0.2.7 → 0.2.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +1 -0
  3. data/CHANGELOG.md +6 -0
  4. data/bin/infoboxer +11 -12
  5. data/infoboxer.gemspec +3 -2
  6. data/lib/infoboxer/core_ext.rb +1 -0
  7. data/lib/infoboxer/definitions/en.wikipedia.org.rb +13 -13
  8. data/lib/infoboxer/media_wiki/page.rb +4 -3
  9. data/lib/infoboxer/media_wiki/traits.rb +12 -10
  10. data/lib/infoboxer/media_wiki.rb +97 -68
  11. data/lib/infoboxer/navigation/lookup.rb +30 -26
  12. data/lib/infoboxer/navigation/sections.rb +33 -37
  13. data/lib/infoboxer/navigation/selector.rb +5 -6
  14. data/lib/infoboxer/navigation/shortcuts.rb +12 -11
  15. data/lib/infoboxer/navigation.rb +2 -1
  16. data/lib/infoboxer/parser/context.rb +12 -13
  17. data/lib/infoboxer/parser/html.rb +7 -6
  18. data/lib/infoboxer/parser/image.rb +25 -29
  19. data/lib/infoboxer/parser/inline.rb +82 -79
  20. data/lib/infoboxer/parser/paragraphs.rb +34 -37
  21. data/lib/infoboxer/parser/table.rb +26 -27
  22. data/lib/infoboxer/parser/template.rb +12 -4
  23. data/lib/infoboxer/parser/util.rb +11 -16
  24. data/lib/infoboxer/parser.rb +8 -1
  25. data/lib/infoboxer/templates/base.rb +3 -3
  26. data/lib/infoboxer/templates/set.rb +11 -10
  27. data/lib/infoboxer/tree/compound.rb +7 -6
  28. data/lib/infoboxer/tree/document.rb +1 -0
  29. data/lib/infoboxer/tree/html.rb +5 -4
  30. data/lib/infoboxer/tree/image.rb +8 -7
  31. data/lib/infoboxer/tree/inline.rb +4 -5
  32. data/lib/infoboxer/tree/linkable.rb +3 -5
  33. data/lib/infoboxer/tree/list.rb +15 -16
  34. data/lib/infoboxer/tree/node.rb +11 -10
  35. data/lib/infoboxer/tree/nodes.rb +24 -23
  36. data/lib/infoboxer/tree/paragraphs.rb +3 -2
  37. data/lib/infoboxer/tree/ref.rb +6 -3
  38. data/lib/infoboxer/tree/table.rb +13 -13
  39. data/lib/infoboxer/tree/template.rb +15 -15
  40. data/lib/infoboxer/tree/text.rb +2 -1
  41. data/lib/infoboxer/tree/wikilink.rb +9 -8
  42. data/lib/infoboxer/tree.rb +3 -2
  43. data/lib/infoboxer/version.rb +2 -1
  44. data/lib/infoboxer.rb +24 -26
  45. data/regression/pages/wyoming.wiki +1085 -0
  46. metadata +8 -21
  47. data/lib/infoboxer/media_wiki/mediawiktory_patch.rb +0 -23
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3dc65cf33309c84191cac67a24377302a220a899
4
- data.tar.gz: 64d1a9f50089fa0b676ffa02db045efce5445100
3
+ metadata.gz: ef407b6160d6d0ff8cfb874338320532d1b18906
4
+ data.tar.gz: fdc4ccf4b051c50958e6cc9720619ea5110b5ea5
5
5
  SHA512:
6
- metadata.gz: b7850a54a7aa864dd0d5d4c8e741877b3bde859d3bd19b0dde1303a411f6948c58144e6fd2389df7626d3b3e967063b2784f91a34bd69b7c6375598e6051044f
7
- data.tar.gz: 59cd28da34d3e66c6ecb971985d7fbfecb3f649911f45a8d205df156899d4a265a887f30566a9825f67ca3a4457638c92b4966f75e559c36ea310eb94d05e212
6
+ metadata.gz: d5224104b02115c47f27c69926128023da3765d1d7089241190a6b3f5c8c3de79be91f65276c82536c671424d10000e2f5198413b3aa21e9802ccc9ca41fbdef
7
+ data.tar.gz: 0f1f8d9ee3a4e94d6529208a9678bcb0915a884525433d2397249afada141d96c88c74e0546ddd0576adffcc2785f96ad1da57d61284b84d96661fb49c0fd46d
data/.rubocop_todo.yml ADDED
@@ -0,0 +1 @@
1
+
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # Infoboxer's change log
2
2
 
3
+ ## 0.2.8 (2017-05-11)
4
+
5
+ * Switch to MediaWiktory 0.1.0 + some subsequent cleanup of internal logic;
6
+ * Additional `prop:` param for `MediaWiki#get`, `#get_h`, and `#raw`, allowing to fetch arbitrary
7
+ page properties.
8
+
3
9
  ## 0.2.7 (2016-09-18)
4
10
 
5
11
  * Fix `Math` node rendering to text (#68);
data/bin/infoboxer CHANGED
@@ -9,9 +9,9 @@ require 'optparse'
9
9
  wiki_url = nil
10
10
 
11
11
  OptionParser.new do |opts|
12
- opts.banner = "Usage: infoboxer [-w wiki_api_url]"
12
+ opts.banner = 'Usage: infoboxer [-w wiki_api_url]'
13
13
 
14
- opts.on("-w", "--wiki WIKI_API_URL",
14
+ opts.on('-w', '--wiki WIKI_API_URL',
15
15
  "Make wiki by WIKI_API_URL a default wiki, and use it with just get('Pagename')") do |w|
16
16
  wiki_url = w
17
17
  end
@@ -20,19 +20,19 @@ end.parse!
20
20
  if wiki_url
21
21
  if wiki_url =~ /^[a-z]+$/
22
22
  wiki_url = case
23
- when domain = Infoboxer::WIKIMEDIA_PROJECTS[wiki_url.to_sym]
24
- "https://en.#{domain}/w/api.php"
25
- when domain = Infoboxer::WIKIMEDIA_PROJECTS[('w' + wiki_url).to_sym]
26
- "https://en.#{domain}/w/api.php"
27
- else
28
- fail("Unidentified wiki: #{wiki_url}")
29
- end
23
+ when domain = Infoboxer::WIKIMEDIA_PROJECTS[wiki_url.to_sym]
24
+ "https://en.#{domain}/w/api.php"
25
+ when domain = Infoboxer::WIKIMEDIA_PROJECTS[('w' + wiki_url).to_sym]
26
+ "https://en.#{domain}/w/api.php"
27
+ else
28
+ fail("Unidentified wiki: #{wiki_url}")
29
+ end
30
30
  end
31
-
31
+
32
32
  DEFAULT_WIKI = Infoboxer.wiki(wiki_url)
33
33
  puts "Default Wiki selected: #{wiki_url}.\nNow you can use `get('Pagename')`, `category('Categoryname')` and so on.\n\n"
34
34
  [:raw, :get, :get_h, :category, :search, :prefixsearch].each do |m|
35
- define_method(m){|*arg|
35
+ define_method(m) { |*arg|
36
36
  DEFAULT_WIKI.send(m, *arg)
37
37
  }
38
38
  end
@@ -41,4 +41,3 @@ end
41
41
  require 'irb'
42
42
  ARGV.shift until ARGV.empty?
43
43
  IRB.start
44
-
data/infoboxer.gemspec CHANGED
@@ -14,6 +14,8 @@ Gem::Specification.new do |s|
14
14
  EOF
15
15
  s.licenses = ['MIT']
16
16
 
17
+ s.required_ruby_version = '>= 2.1.0'
18
+
17
19
  s.files = `git ls-files`.split($RS).reject do |file|
18
20
  file =~ /^(?:
19
21
  spec\/.*
@@ -31,8 +33,7 @@ Gem::Specification.new do |s|
31
33
 
32
34
  s.add_dependency 'htmlentities'
33
35
  s.add_dependency 'procme'
34
- s.add_dependency 'mediawiktory', '>= 0.0.2'
36
+ s.add_dependency 'mediawiktory', '>= 0.1.0'
35
37
  s.add_dependency 'addressable'
36
38
  s.add_dependency 'terminal-table'
37
- s.add_dependency 'backports'
38
39
  end
@@ -1,4 +1,5 @@
1
1
  # encoding: utf-8
2
+
2
3
  # @private
3
4
  class Object
4
5
  # Unfortunately, not in backports gem still :(
@@ -1,4 +1,5 @@
1
1
  # encoding: utf-8
2
+
2
3
  module Infoboxer
3
4
  MediaWiki::Traits.for('en.wikipedia.org') do
4
5
  templates do
@@ -38,7 +39,7 @@ module Infoboxer
38
39
  'Asterisk' => '*',
39
40
  'Colon' => ':',
40
41
  'Em dash' => '—',
41
- 'Gc' => "",
42
+ 'Gc' => '',
42
43
  'Ibeam' => 'I',
43
44
  'Long dash' => ' ——— ',
44
45
  'Nbhyph' => '‑',
@@ -64,12 +65,12 @@ module Infoboxer
64
65
  'Break' => "\n", # FIXME: in fact, break has optional parameter "how many breaks"
65
66
  'Crlf' => "\n", # FIXME: in fact, alias for break, should have DSL syntax for it!
66
67
  'Crlf2' => "\n",
67
-
68
+
68
69
  )
69
70
  show(
70
71
  'Allow wrap',
71
72
  'Nowrap',
72
- 'j', 'nobr', 'nobreak', # aliases for Nowrap
73
+ 'j', 'nobr', 'nobreak', # aliases for Nowrap
73
74
  'nowraplinks',
74
75
  )
75
76
  # inflow_template('Normalwraplink') # TODO: tricky
@@ -168,7 +169,7 @@ module Infoboxer
168
169
  'lime', 'green', 'aqua (color)', 'cyan', 'teal', 'blue', 'navy (color)',
169
170
  'purple', 'fuchsia', 'magenta'
170
171
  )
171
-
172
+
172
173
  # Some most popular templates, without categorical splitting
173
174
  # https://en.wikipedia.org/wiki/Wikipedia:Database_reports/Templates_transcluded_on_the_most_pages
174
175
  # ------------------------------------------------------------------------------------------------
@@ -190,7 +191,7 @@ module Infoboxer
190
191
  template 'Coord' do
191
192
  def model
192
193
  @model ||= begin
193
- npos = lookup_children(text: /^N|S$/).first.index rescue nil
194
+ npos = lookup_children(text: /^N|S$/).first.index rescue nil # rubocop:disable Style/RescueModifier
194
195
  case npos
195
196
  when 1
196
197
  :decimal
@@ -235,12 +236,11 @@ module Infoboxer
235
236
  end
236
237
 
237
238
  ALLOW_BETWEEN = ['-;', '–',
238
- 'and', '&', 'and(-)', ', and',
239
- 'or', ', or',
240
- 'to', 'to(-)', 'to about',
241
- '+/-', '±', '+',
242
- 'by', 'x', '×', 'x',
243
- ]
239
+ 'and', '&', 'and(-)', ', and',
240
+ 'or', ', or',
241
+ 'to', 'to(-)', 'to about',
242
+ '+/-', '±', '+',
243
+ 'by', 'x', '×', 'x',].freeze
244
244
 
245
245
  def between
246
246
  ALLOW_BETWEEN.include?(fetch('2').text) ? fetch('2').text : nil
@@ -253,11 +253,11 @@ module Infoboxer
253
253
  def measure_from
254
254
  between ? fetch('4').text : fetch('2').text
255
255
  end
256
-
256
+
257
257
  def measure_to
258
258
  between ? fetch('5').text : fetch('3').text
259
259
  end
260
-
260
+
261
261
  def text
262
262
  [value1, between, value2, measure_from].compact.join(' ')
263
263
  end
@@ -1,4 +1,5 @@
1
1
  # encoding: utf-8
2
+
2
3
  module Infoboxer
3
4
  class MediaWiki
4
5
  # A descendant of {Tree::Document Document}, representing page,
@@ -9,7 +10,7 @@ module Infoboxer
9
10
  class Page < Tree::Document
10
11
  def initialize(client, children, source)
11
12
  @client, @source = client, source
12
- super(children, title: source.title, url: source.fullurl)
13
+ super(children, title: source['title'], url: source['fullurl'])
13
14
  end
14
15
 
15
16
  # Instance of {MediaWiki} which this page was received from
@@ -36,10 +37,10 @@ module Infoboxer
36
37
 
37
38
  private
38
39
 
39
- PARAMS_TO_INSPECT = [:url, :title] #, :domain]
40
+ PARAMS_TO_INSPECT = %i[url title].freeze
40
41
 
41
42
  def show_params
42
- super(params.select{|k, v| PARAMS_TO_INSPECT.include?(k)})
43
+ super(params.select { |k, _v| PARAMS_TO_INSPECT.include?(k) })
43
44
  end
44
45
  end
45
46
  end
@@ -1,4 +1,5 @@
1
1
  # encoding: utf-8
2
+
2
3
  module Infoboxer
3
4
  class MediaWiki
4
5
  # DSL for defining "traits" for some site.
@@ -19,7 +20,7 @@ module Infoboxer
19
20
  @templates ||= Templates::Set.new
20
21
 
21
22
  return @templates unless definition
22
-
23
+
23
24
  @templates.define(&definition)
24
25
  end
25
26
 
@@ -59,7 +60,7 @@ module Infoboxer
59
60
  # [English Wikipedia traits](https://github.com/molybdenum-99/infoboxer/blob/master/lib/infoboxer/definitions/en.wikipedia.org.rb)
60
61
  # for example implementation.
61
62
  def for(domain, &block)
62
- Traits.domains[domain].tap{|c| c && c.instance_eval(&block)} ||
63
+ Traits.domains[domain].tap { |c| c && c.instance_eval(&block) } ||
63
64
  Class.new(self, &block).domain(domain)
64
65
  end
65
66
 
@@ -69,10 +70,12 @@ module Infoboxer
69
70
 
70
71
  def initialize(options = {})
71
72
  @options = options
72
- @file_namespace = [DEFAULTS[:file_namespace], namespace_aliases(options, 'File')].
73
- flatten.compact.uniq
74
- @category_namespace = [DEFAULTS[:category_namespace], namespace_aliases(options, 'Category')].
75
- flatten.compact.uniq
73
+ @file_namespace =
74
+ [DEFAULTS[:file_namespace], namespace_aliases(options, 'File')]
75
+ .flatten.compact.uniq
76
+ @category_namespace =
77
+ [DEFAULTS[:category_namespace], namespace_aliases(options, 'Category')]
78
+ .flatten.compact.uniq
76
79
  end
77
80
 
78
81
  # @private
@@ -86,16 +89,15 @@ module Infoboxer
86
89
  private
87
90
 
88
91
  def namespace_aliases(options, canonical)
89
- namespace = (options[:namespaces] || []).detect{|v| v.canonical == canonical}
92
+ namespace = (options[:namespaces] || []).detect { |v| v['canonical'] == canonical }
90
93
  return nil unless namespace
91
- [namespace['*'], *namespace.aliases]
94
+ [namespace['*'], *namespace['aliases']]
92
95
  end
93
96
 
94
97
  DEFAULTS = {
95
98
  file_namespace: 'File',
96
99
  category_namespace: 'Category'
97
- }
98
-
100
+ }.freeze
99
101
  end
100
102
  end
101
103
  end
@@ -1,10 +1,8 @@
1
1
  # encoding: utf-8
2
- #require 'rest-client'
3
- #require 'json'
2
+
4
3
  require 'mediawiktory'
5
4
  require 'addressable/uri'
6
5
 
7
- require_relative 'media_wiki/mediawiktory_patch'
8
6
  require_relative 'media_wiki/traits'
9
7
  require_relative 'media_wiki/page'
10
8
 
@@ -14,7 +12,8 @@ module Infoboxer
14
12
  # Usage:
15
13
  #
16
14
  # ```ruby
17
- # client = Infoboxer::MediaWiki.new('http://en.wikipedia.org/w/api.php', user_agent: 'My Own Project')
15
+ # client = Infoboxer::MediaWiki
16
+ # .new('http://en.wikipedia.org/w/api.php', user_agent: 'My Own Project')
18
17
  # page = client.get('Argentina')
19
18
  # ```
20
19
  #
@@ -27,7 +26,8 @@ module Infoboxer
27
26
  #
28
27
  # You can set yours as an option to {Infoboxer.wiki} and its shortcuts,
29
28
  # or to {#initialize}
30
- UA = "Infoboxer/#{Infoboxer::VERSION} (https://github.com/molybdenum-99/infoboxer; zverok.offline@gmail.com)"
29
+ UA = "Infoboxer/#{Infoboxer::VERSION} "\
30
+ '(https://github.com/molybdenum-99/infoboxer; zverok.offline@gmail.com)'.freeze
31
31
 
32
32
  class << self
33
33
  # User agent getter/setter.
@@ -35,9 +35,12 @@ module Infoboxer
35
35
  # Default value is {UA}.
36
36
  #
37
37
  # You can also use per-instance option, see {#initialize}
38
+ #
39
+ # @return [String]
38
40
  attr_accessor :user_agent
39
41
  end
40
42
 
43
+ # @private
41
44
  attr_reader :api_base_url, :traits
42
45
 
43
46
  # Creating new MediaWiki client. {Infoboxer.wiki} provides shortcut
@@ -51,28 +54,44 @@ module Infoboxer
51
54
  # * `:user_agent` (also aliased as `:ua`) -- custom User-Agent header.
52
55
  def initialize(api_base_url, options = {})
53
56
  @api_base_url = Addressable::URI.parse(api_base_url)
54
- @client = MediaWiktory::Client.new(api_base_url, user_agent: user_agent(options))
57
+ @client = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(options))
55
58
  @traits = Traits.get(@api_base_url.host, namespaces: extract_namespaces)
56
59
  end
57
60
 
58
61
  # Receive "raw" data from Wikipedia (without parsing or wrapping in
59
62
  # classes).
60
63
  #
61
- # @return [Array<Hash>]
62
- def raw(*titles)
63
- return [] if titles.empty? # could emerge on "automatically" created page lists, should work
64
-
65
- titles.each_slice(50).map{|part|
66
- @client.query.
67
- titles(*part).
68
- prop(revisions: {prop: :content}, info: {prop: :url}).
69
- redirects(true). # FIXME: should be done transparently by MediaWiktory?
70
- perform.pages
71
- }.inject(:concat). # somehow flatten(1) fails!
72
- sort_by{|page|
73
- res_title = page.alt_titles.detect{|t| titles.map(&:downcase).include?(t.downcase)} # FIXME?..
74
- titles.index(res_title) || 1_000
75
- }
64
+ # @param titles [Array<String>] List of page titles to get.
65
+ # @param prop [Array<Symbol>] List of additional page properties to get, refer to
66
+ # [MediaWiktory::Actions::Query#prop](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query#prop-instance_method)
67
+ # for the list of available properties.
68
+ #
69
+ # @return [Hash{String => Hash}] Hash of `{requested title => raw MediaWiki object}`. Note that
70
+ # even missing (does not exist in current Wiki) or invalid (impossible title) still be present
71
+ # in response, just will have `"missing"` or `"invalid"` key, just like MediaWiki returns them.
72
+ def raw(*titles, prop: [])
73
+ # could emerge on "automatically" created page lists, should work
74
+ return {} if titles.empty?
75
+
76
+ titles.each_slice(50).map do |part|
77
+ response = @client
78
+ .query
79
+ .titles(*part)
80
+ .prop(:revisions, :info, *prop).prop(:content, :timestamp, :url)
81
+ .redirects
82
+ .response
83
+
84
+ sources = response['pages'].values.map { |page| [page['title'], page] }.to_h
85
+ redirects =
86
+ if response['redirects']
87
+ response['redirects'].map { |r| [r['from'], sources[r['to']]] }.to_h
88
+ else
89
+ {}
90
+ end
91
+
92
+ # This way for 'Einstein' query we'll have {'Albert Einstein' => page, 'Einstein' => same page}
93
+ sources.merge(redirects)
94
+ end.inject(:merge)
76
95
  end
77
96
 
78
97
  # Receive list of parsed MediaWiki pages for list of titles provided.
@@ -83,7 +102,12 @@ module Infoboxer
83
102
  # many queries as necessary to extract them all (it will be like
84
103
  # `(titles.count / 50.0).ceil` requests)
85
104
  #
86
- # @return [Tree::Nodes<Page>] array of parsed pages. Notes:
105
+ # @param titles [Array<String>] List of page titles to get.
106
+ # @param prop [Array<Symbol>] List of additional page properties to get, refer to
107
+ # [MediaWiktory::Actions::Query#prop](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query#prop-instance_method)
108
+ # for the list of available properties.
109
+ #
110
+ # @return [Page, Tree::Nodes<Page>] array of parsed pages. Notes:
87
111
  # * if you call `get` with only one title, one page will be
88
112
  # returned instead of an array
89
113
  # * if some of pages are not in wiki, they will not be returned,
@@ -96,22 +120,15 @@ module Infoboxer
96
120
  # Infoboxer.wp.get('Argentina', 'Chile', 'Something non-existing').
97
121
  # infobox.fetch('some value')
98
122
  # ```
99
- # and obtain meaningful results instead of NoMethodError or some
100
- # NotFound.
101
- #
102
- def get(*titles)
103
- pages = raw(*titles).
104
- tap{|pages| pages.detect(&:invalid?).tap{|i| i && fail(i.raw.invalidreason)}}.
105
- select(&:exists?).
106
- map{|raw|
107
- Page.new(self,
108
- Parser.paragraphs(raw.content, traits),
109
- raw)
110
- }
123
+ # and obtain meaningful results instead of `NoMethodError` or
124
+ # `SomethingNotFound`.
125
+ #
126
+ def get(*titles, prop: [])
127
+ pages = get_h(*titles, prop: prop).values.compact
111
128
  titles.count == 1 ? pages.first : Tree::Nodes[*pages]
112
129
  end
113
130
 
114
- # Same as {#get}, but returns hash of {requested title => page}.
131
+ # Same as {#get}, but returns hash of `{requested title => page}`.
115
132
  #
116
133
  # Useful quirks:
117
134
  # * when requested page not existing, key will be still present in
@@ -123,13 +140,18 @@ module Infoboxer
123
140
  # This allows you to be in full control of what pages of large list
124
141
  # you've received.
125
142
  #
143
+ # @param titles [Array<String>] List of page titles to get.
144
+ # @param prop [Array<Symbol>] List of additional page properties to get, refer to
145
+ # [MediaWiktory::Actions::Query#prop](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query#prop-instance_method)
146
+ # for the list of available properties.
147
+ #
126
148
  # @return [Hash<String, Page>]
127
149
  #
128
- def get_h(*titles)
129
- pages = [*get(*titles)]
130
- titles.map{|t|
131
- [t, pages.detect{|p| p.source.alt_titles.map(&:downcase).include?(t.downcase)}]
132
- }.to_h
150
+ def get_h(*titles, prop: [])
151
+ raw_pages = raw(*titles, prop: prop)
152
+ .tap { |ps| ps.detect { |_, p| p['invalid'] }.tap { |_, i| i && fail(i['invalidreason']) } }
153
+ .reject { |_, p| p.key?('missing') }
154
+ titles.map { |title| [title, make_page(raw_pages, title)] }.to_h
133
155
  end
134
156
 
135
157
  # Receive list of parsed MediaWiki pages from specified category.
@@ -139,8 +161,8 @@ module Infoboxer
139
161
  # fetched in 50-page batches, then parsed. So, for large category
140
162
  # it can really take a while to fetch all pages.
141
163
  #
142
- # @param title Category title. You can use namespaceless title (like
143
- # `"Countries in South America"`), title with namespace (like
164
+ # @param title [String] Category title. You can use namespaceless title (like
165
+ # `"Countries in South America"`), title with namespace (like
144
166
  # `"Category:Countries in South America"`) or title with local
145
167
  # namespace (like `"Catégorie:Argentine"` for French Wikipedia)
146
168
  #
@@ -148,8 +170,8 @@ module Infoboxer
148
170
  #
149
171
  def category(title)
150
172
  title = normalize_category_title(title)
151
-
152
- list(categorymembers: {title: title, limit: 50})
173
+
174
+ list(@client.query.generator(:categorymembers).title(title).limit('max'))
153
175
  end
154
176
 
155
177
  # Receive list of parsed MediaWiki pages for provided search query.
@@ -158,10 +180,10 @@ module Infoboxer
158
180
  #
159
181
  # **NB**: currently, this API **always** fetches all pages from
160
182
  # category, there is no option to "take first 20 pages". Pages are
161
- # fetched in 50-page batches, then parsed. So, for large category
183
+ # fetched in 50-page batches, then parsed. So, for large search query
162
184
  # it can really take a while to fetch all pages.
163
185
  #
164
- # @param query Search query. For old installations, look at
186
+ # @param query [String] Search query. For old installations, look at
165
187
  # https://www.mediawiki.org/wiki/Help:Searching
166
188
  # for search syntax. For new ones (including Wikipedia), see at
167
189
  # https://www.mediawiki.org/wiki/Help:CirrusSearch.
@@ -169,7 +191,7 @@ module Infoboxer
169
191
  # @return [Tree::Nodes<Page>] array of parsed pages.
170
192
  #
171
193
  def search(query)
172
- list(search: {search: query, limit: 50})
194
+ list(@client.query.generator(:search).search(query).limit('max'))
173
195
  end
174
196
 
175
197
  # Receive list of parsed MediaWiki pages with titles startin from prefix.
@@ -178,38 +200,44 @@ module Infoboxer
178
200
  #
179
201
  # **NB**: currently, this API **always** fetches all pages from
180
202
  # category, there is no option to "take first 20 pages". Pages are
181
- # fetched in 50-page batches, then parsed. So, for large category
203
+ # fetched in 50-page batches, then parsed. So, for large search query
182
204
  # it can really take a while to fetch all pages.
183
205
  #
184
- # @param prefix page title prefix.
206
+ # @param prefix [String] Page title prefix.
185
207
  #
186
208
  # @return [Tree::Nodes<Page>] array of parsed pages.
187
209
  #
188
210
  def prefixsearch(prefix)
189
- list(prefixsearch: {search: prefix, limit: 100})
211
+ list(@client.query.generator(:prefixsearch).search(prefix).limit('max'))
190
212
  end
191
213
 
214
+ # @return [String]
192
215
  def inspect
193
216
  "#<#{self.class}(#{@api_base_url.host})>"
194
217
  end
195
218
 
196
219
  private
197
220
 
221
+ def make_page(raw_pages, title)
222
+ _, source = raw_pages.detect { |ptitle, _| ptitle.casecmp(title).zero? }
223
+ source or return nil
224
+ Page.new(self, Parser.paragraphs(source['revisions'].first['*'], traits), source)
225
+ end
226
+
198
227
  def list(query)
199
- response = @client.query.
200
- generator(query).
201
- prop(revisions: {prop: :content}, info: {prop: :url}).
202
- redirects(true). # FIXME: should be done transparently by MediaWiktory?
203
- perform
228
+ response = query
229
+ .prop(:revisions, :info)
230
+ .prop(:content, :timestamp, :url)
231
+ .redirects
232
+ .response
204
233
 
205
- response.continue! while response.continue?
234
+ response = response.continue while response.continue?
206
235
 
207
- pages = response.pages.select(&:exists?).
208
- map{|raw|
209
- Page.new(self,
210
- Parser.paragraphs(raw.content, traits),
211
- raw)
212
- }
236
+ return Tree::Nodes[] if response['pages'].nil?
237
+
238
+ pages = response['pages']
239
+ .values.select { |p| p['missing'].nil? }
240
+ .map { |raw| Page.new(self, Parser.paragraphs(raw['revisions'].first['*'], traits), raw) }
213
241
 
214
242
  Tree::Nodes[*pages]
215
243
  end
@@ -218,7 +246,7 @@ module Infoboxer
218
246
  # FIXME: shouldn't it go to MediaWiktory?..
219
247
  namespace, titl = title.include?(':') ? title.split(':', 2) : [nil, title]
220
248
  namespace, titl = nil, title unless traits.category_namespace.include?(namespace)
221
-
249
+
222
250
  namespace ||= traits.category_namespace.first
223
251
  [namespace, titl].join(':')
224
252
  end
@@ -228,11 +256,12 @@ module Infoboxer
228
256
  end
229
257
 
230
258
  def extract_namespaces
231
- siteinfo = @client.query.meta(siteinfo: {prop: [:namespaces, :namespacealiases]}).perform
232
- siteinfo.raw.query.namespaces.map{|_, namespace|
233
- aliases = siteinfo.raw.query.namespacealiases.select{|a| a.id == namespace.id}.map{|a| a['*']}
234
- namespace.merge(aliases: aliases)
235
- }
259
+ siteinfo = @client.query.meta(:siteinfo).prop(:namespaces, :namespacealiases).response
260
+ siteinfo['namespaces'].map do |_, namespace|
261
+ aliases =
262
+ siteinfo['namespacealiases'].select { |a| a['id'] == namespace['id'] }.map { |a| a['*'] }
263
+ namespace.merge('aliases' => aliases)
264
+ end
236
265
  end
237
266
  end
238
267
  end
@@ -1,4 +1,5 @@
1
1
  # encoding: utf-8
2
+
2
3
  require_relative 'selector'
3
4
 
4
5
  module Infoboxer
@@ -104,8 +105,8 @@ module Infoboxer
104
105
 
105
106
  # Underscored version of {#lookup}
106
107
  def _lookup(selector)
107
- Tree::Nodes[_matches?(selector) ? self : nil, *children._lookup(selector)].
108
- flatten.compact
108
+ Tree::Nodes[_matches?(selector) ? self : nil, *children._lookup(selector)]
109
+ .flatten.compact
109
110
  end
110
111
 
111
112
  # Underscored version of {#lookup_children}
@@ -139,20 +140,23 @@ module Infoboxer
139
140
  def _lookup_next_siblings(selector)
140
141
  next_siblings._find(selector)
141
142
  end
142
-
143
- [:matches?,
144
- :lookup, :lookup_children, :lookup_parents,
145
- :lookup_siblings,
146
- :lookup_next_siblings, :lookup_prev_siblings
147
- ].map{|sym| [sym, :"_#{sym}"]}.each do |sym, underscored|
148
-
149
- define_method(sym){|*args, &block|
143
+
144
+ %i[
145
+ matches?
146
+ lookup lookup_children lookup_parents
147
+ lookup_siblings
148
+ lookup_next_siblings lookup_prev_siblings
149
+ ]
150
+ .map { |sym| [sym, :"_#{sym}"] }
151
+ .each do |sym, underscored|
152
+
153
+ define_method(sym) do |*args, &block|
150
154
  send(underscored, Selector.new(*args, &block))
151
- }
155
+ end
152
156
  end
153
157
 
154
158
  # Checks if node has any parent matching selectors.
155
- def has_parent?(*selectors, &block)
159
+ def parent?(*selectors, &block)
156
160
  !lookup_parents(*selectors, &block).empty?
157
161
  end
158
162
  end
@@ -181,7 +185,7 @@ module Infoboxer
181
185
 
182
186
  # Underscored version of {#find}.
183
187
  def _find(selector)
184
- select{|n| n._matches?(selector)}
188
+ select { |n| n._matches?(selector) }
185
189
  end
186
190
 
187
191
  # Selects nodes of current list (and only it, no children checks),
@@ -190,25 +194,25 @@ module Infoboxer
190
194
  _find(Selector.new(*selectors, &block))
191
195
  end
192
196
 
193
- [
194
- :_lookup, :_lookup_children, :_lookup_parents,
195
- :_lookup_siblings, :_lookup_prev_siblings, :_lookup_next_siblings
197
+ %i[
198
+ _lookup _lookup_children _lookup_parents
199
+ _lookup_siblings _lookup_prev_siblings _lookup_next_siblings
196
200
  ].each do |sym|
197
- define_method(sym){|*args|
198
- make_nodes map{|n| n.send(sym, *args)}
199
- }
201
+ define_method(sym) do |*args|
202
+ make_nodes(map { |n| n.send(sym, *args) })
203
+ end
200
204
  end
201
205
 
202
206
  # not delegate, but redefine: Selector should be constructed only once
203
- [
204
- :lookup, :lookup_children, :lookup_parents,
205
- :lookup_siblings,
206
- :lookup_next_siblings, :lookup_prev_siblings
207
- ].map{|sym| [sym, :"_#{sym}"]}.each do |sym, underscored|
207
+ %i[
208
+ lookup lookup_children lookup_parents
209
+ lookup_siblings
210
+ lookup_next_siblings lookup_prev_siblings
211
+ ].map { |sym| [sym, :"_#{sym}"] }.each do |sym, underscored|
208
212
 
209
- define_method(sym){|*args, &block|
213
+ define_method(sym) do |*args, &block|
210
214
  send(underscored, Selector.new(*args, &block))
211
- }
215
+ end
212
216
  end
213
217
  end
214
218
  end