infoboxer 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 67bb7aed02bc7048e3508902a5b921be691b8bac
4
- data.tar.gz: 0c1b8de5e72f824f29802d5a1e202e914ad3c241
3
+ metadata.gz: d1eca6a2e6e025b77b1eeed915629b24338380a8
4
+ data.tar.gz: a852e3d6cefa55b04b2a8ffe530478e8199a884f
5
5
  SHA512:
6
- metadata.gz: ee0babd55c7fe433dc3b55aa6988abf17ddee81424f56bd83b31dcb076dcdcfd22c966b50a26b979e5a917e096847bdaf9390063807694133734671201cb5003
7
- data.tar.gz: b70854401485cb7981110c3da6a5f7d4b4623545b3b1af4b05c4b541514efa9727818a09c25d89dbe55b31a79c2e772adedf06f2b1211386f17242ddb11cde1a
6
+ metadata.gz: 0cb20f539dcb4fecaf1f3c57a842d407481e37a2763ee35018bd436b058e704382493fd0a0a6a008101a70e9d9c283cc9fa3b7dbd8f48deef894e8d857ea7c42
7
+ data.tar.gz: b4f9ab8d39bc3c5fca6f7f247fd8c7f54bf76de8b5ed878a9fb935c6ccf3d99f7ed768c16cd1b1529ba56742a0bb1b6a09dafd0fc2900c12f2c0a39264d938fb
@@ -1,5 +1,12 @@
1
1
  # Infoboxer's change log
2
2
 
3
+ ## 0.3.2 (2018-02-09)
4
+
5
+ * Updated MediaWiktory to finally turn on gzip encoding of responses;
6
+ * Utility methods to expose some internals (`MediaWiki#api`, `Infoboxer#url_for(:wikipedia)`,
7
+ `Page#namespaces`, `Template#named_variables` and so on);
8
+ * Fix parsing of lowercase `file:` links in `<gallery>`.
9
+
3
10
  ## 0.3.1 (2017-12-04)
4
11
 
5
12
  * (Experimental) new representation of templates, much more readable;
@@ -32,7 +32,7 @@ Gem::Specification.new do |s|
32
32
  s.executables << 'infoboxer'
33
33
 
34
34
  s.add_dependency 'htmlentities'
35
- s.add_dependency 'mediawiktory', '= 0.1.2'
35
+ s.add_dependency 'mediawiktory', '= 0.1.3'
36
36
  s.add_dependency 'addressable'
37
37
  s.add_dependency 'terminal-table'
38
38
  end
@@ -47,7 +47,6 @@ module Infoboxer
47
47
  # @private
48
48
  WIKIA_API_URL = 'http://%s.wikia.com/api.php'.freeze
49
49
 
50
- # @private
51
50
  WIKIMEDIA_PROJECTS = {
52
51
  wikipedia: 'wikipedia.org',
53
52
  wikivoyage: 'wikivoyage.org',
@@ -59,7 +58,6 @@ module Infoboxer
59
58
  wikisource: 'wikisource.org'
60
59
  }.freeze
61
60
 
62
- # @private
63
61
  WIKIMEDIA_COMMONS = {
64
62
  commons: 'commons.wikimedia.org',
65
63
  species: 'species.wikimedia.org',
@@ -181,6 +179,20 @@ module Infoboxer
181
179
  end
182
180
  end
183
181
 
182
+ # Returns URL of API entry-point for a well-known Wiki-project (wikipedia, wikivoyage etc.)
183
+ # by project's name.
184
+ #
185
+ # @param symbol [Symbol] One of {WIKIMEDIA_PROJECTS} or {WIKIMEDIA_COMMONS} keys.
186
+ # @param lang [String, Symbol] Language of the project, if applicable.
187
+ # @return [String]
188
+ def url_for(symbol, lang = 'en')
189
+ if (domain = WIKIMEDIA_PROJECTS[symbol])
190
+ "https://#{lang}.#{domain}/w/api.php"
191
+ elsif (domain = WIKIMEDIA_COMMONS[symbol])
192
+ "https://#{domain}/w/api.php"
193
+ end
194
+ end
195
+
184
196
  # @!method wikipedia(lang = 'en', options = {})
185
197
  # Includeable version of {Infoboxer.wikipedia}
186
198
 
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Layout/EmptyLinesAroundArguments
1
2
  module Infoboxer
2
3
  MediaWiki::Traits.for('en.wikipedia.org') do
3
4
  templates do
@@ -370,3 +371,4 @@ module Infoboxer
370
371
  end
371
372
  end
372
373
  end
374
+ # rubocop:enable Layout/EmptyLinesAroundArguments
@@ -41,6 +41,9 @@ module Infoboxer
41
41
  # @private
42
42
  attr_reader :api_base_url, :traits
43
43
 
44
+ # @return [MediaWiktory::Wikipedia::Client]
45
+ attr_reader :api
46
+
44
47
  # Creating new MediaWiki client. {Infoboxer.wiki} provides shortcut
45
48
  # for it, as well as shortcuts for some well-known wikis, like
46
49
  # {Infoboxer.wikipedia}.
@@ -51,7 +54,7 @@ module Infoboxer
51
54
  # @param user_agent [String] (also aliased as `:ua`) Custom User-Agent header.
52
55
  def initialize(api_base_url, ua: nil, user_agent: ua)
53
56
  @api_base_url = Addressable::URI.parse(api_base_url)
54
- @client = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(user_agent))
57
+ @api = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(user_agent))
55
58
  @traits = Traits.get(@api_base_url.host, siteinfo)
56
59
  end
57
60
 
@@ -72,7 +75,7 @@ module Infoboxer
72
75
  return {} if titles.empty?
73
76
 
74
77
  titles.each_slice(50).map do |part|
75
- request = prepare_request(@client.query.titles(*part), &processor)
78
+ request = prepare_request(@api.query.titles(*part), &processor)
76
79
  response = request.response
77
80
 
78
81
  # If additional props are required, there may be additional pages, even despite each_slice(50)
@@ -173,7 +176,7 @@ module Infoboxer
173
176
  def category(title, limit: 'max', &processor)
174
177
  title = normalize_category_title(title)
175
178
 
176
- list(@client.query.generator(:categorymembers).title(title), limit, &processor)
179
+ list(@api.query.generator(:categorymembers).title(title), limit, &processor)
177
180
  end
178
181
 
179
182
  # Receive list of parsed MediaWiki pages for provided search query.
@@ -193,7 +196,7 @@ module Infoboxer
193
196
  # @return [Tree::Nodes<Page>] array of parsed pages.
194
197
  #
195
198
  def search(query, limit: 'max', &processor)
196
- list(@client.query.generator(:search).search(query), limit, &processor)
199
+ list(@api.query.generator(:search).search(query), limit, &processor)
197
200
  end
198
201
 
199
202
  # Receive list of parsed MediaWiki pages with titles startin from prefix.
@@ -210,7 +213,7 @@ module Infoboxer
210
213
  # @return [Tree::Nodes<Page>] array of parsed pages.
211
214
  #
212
215
  def prefixsearch(prefix, limit: 'max', &processor)
213
- list(@client.query.generator(:prefixsearch).search(prefix), limit, &processor)
216
+ list(@api.query.generator(:prefixsearch).search(prefix), limit, &processor)
214
217
  end
215
218
 
216
219
  # @return [String]
@@ -260,7 +263,7 @@ module Infoboxer
260
263
  end
261
264
 
262
265
  def siteinfo
263
- @siteinfo ||= @client.query.meta(:siteinfo).prop(:namespaces, :namespacealiases, :interwikimap).response.to_h
266
+ @siteinfo ||= @api.query.meta(:siteinfo).prop(:namespaces, :namespacealiases, :interwikimap).response.to_h
264
267
  end
265
268
 
266
269
  def interwikis(prefix)
@@ -33,6 +33,15 @@ module Infoboxer
33
33
  client.traits
34
34
  end
35
35
 
36
+ # FIXME: take from siteinfo!
37
+ def namespace
38
+ Traits::STANDARD_NAMESPACES[source.fetch('ns') + 2] # Media = -2, Specia = -1, Main = 0
39
+ end
40
+
41
+ def category?
42
+ namespace == 'Category'
43
+ end
44
+
36
45
  private
37
46
 
38
47
  PARAMS_TO_INSPECT = %i[url title].freeze
@@ -94,5 +94,11 @@ module Infoboxer
94
94
  class Tree::Document
95
95
  include Navigation::Sections::Container
96
96
  end
97
+
98
+ module Helpers
99
+ def W(*arg, &block) # rubocop:disable Naming/MethodName
100
+ Lookup::Selector.new(*arg, &block)
101
+ end
102
+ end
97
103
  end
98
104
  end
@@ -16,7 +16,7 @@ module Infoboxer
16
16
 
17
17
  if @context.eof?
18
18
  break unless until_pattern
19
- @context.fail!("#{until_pattern} not found, starting from #{start}")
19
+ @context.fail!("#{until_pattern.source} not found, starting from #{start}")
20
20
  end
21
21
 
22
22
  if @context.eol?
@@ -64,7 +64,7 @@ module Infoboxer
64
64
 
65
65
  if @context.eof?
66
66
  break unless until_pattern
67
- @context.fail!("#{until_pattern} not found")
67
+ @context.fail!("#{until_pattern.source} not found")
68
68
  end
69
69
 
70
70
  if @context.eol?
@@ -178,7 +178,8 @@ module Infoboxer
178
178
  path = @context.scan_until(%r{</gallery>|\||$})
179
179
  attrs = @context.matched == '|' ? gallery_image_attrs : {}
180
180
  unless path.empty?
181
- images << Tree::Image.new(path.sub(/^#{re.file_namespace}/, ''), attrs)
181
+ # FIXME: what if path NOT matches the namespace?
182
+ images << Tree::Image.new(path.sub(/^#{re.file_namespace.source}/i, ''), attrs)
182
183
  end
183
184
  break if @context.matched == '</gallery>'
184
185
  end
@@ -13,9 +13,8 @@ module Infoboxer
13
13
  # Internal, used by {Parser}
14
14
  def merge!(other)
15
15
  ochildren = other.children.dup
16
- if children.last && children.last.can_merge?(ochildren.first)
17
- children.last.merge!(ochildren.shift)
18
- end
16
+ children.last.merge!(ochildren.shift) \
17
+ if children.last && children.last.can_merge?(ochildren.first)
19
18
  push_children(*ochildren)
20
19
  end
21
20
 
@@ -149,6 +149,10 @@ module Infoboxer
149
149
  variables.reject(&:named?)
150
150
  end
151
151
 
152
+ def named_variables
153
+ variables.select(&:named?)
154
+ end
155
+
152
156
  # Fetches template variable(s) by name(s) or patterns.
153
157
  #
154
158
  # Usage:
@@ -1,7 +1,7 @@
1
1
  module Infoboxer
2
2
  MAJOR = 0
3
3
  MINOR = 3
4
- PATCH = 1
4
+ PATCH = 2
5
5
  PRE = nil
6
6
  VERSION = [MAJOR, MINOR, PATCH, PRE].compact.join('.')
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: infoboxer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Shepelev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-12-04 00:00:00.000000000 Z
11
+ date: 2018-02-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - '='
32
32
  - !ruby/object:Gem::Version
33
- version: 0.1.2
33
+ version: 0.1.3
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - '='
39
39
  - !ruby/object:Gem::Version
40
- version: 0.1.2
40
+ version: 0.1.3
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: addressable
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -171,7 +171,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
171
171
  version: '0'
172
172
  requirements: []
173
173
  rubyforge_project:
174
- rubygems_version: 2.6.10
174
+ rubygems_version: 2.6.14
175
175
  signing_key:
176
176
  specification_version: 4
177
177
  summary: MediaWiki client and parser, targeting information extraction.