infoboxer 0.3.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 67bb7aed02bc7048e3508902a5b921be691b8bac
4
- data.tar.gz: 0c1b8de5e72f824f29802d5a1e202e914ad3c241
3
+ metadata.gz: d1eca6a2e6e025b77b1eeed915629b24338380a8
4
+ data.tar.gz: a852e3d6cefa55b04b2a8ffe530478e8199a884f
5
5
  SHA512:
6
- metadata.gz: ee0babd55c7fe433dc3b55aa6988abf17ddee81424f56bd83b31dcb076dcdcfd22c966b50a26b979e5a917e096847bdaf9390063807694133734671201cb5003
7
- data.tar.gz: b70854401485cb7981110c3da6a5f7d4b4623545b3b1af4b05c4b541514efa9727818a09c25d89dbe55b31a79c2e772adedf06f2b1211386f17242ddb11cde1a
6
+ metadata.gz: 0cb20f539dcb4fecaf1f3c57a842d407481e37a2763ee35018bd436b058e704382493fd0a0a6a008101a70e9d9c283cc9fa3b7dbd8f48deef894e8d857ea7c42
7
+ data.tar.gz: b4f9ab8d39bc3c5fca6f7f247fd8c7f54bf76de8b5ed878a9fb935c6ccf3d99f7ed768c16cd1b1529ba56742a0bb1b6a09dafd0fc2900c12f2c0a39264d938fb
@@ -1,5 +1,12 @@
1
1
  # Infoboxer's change log
2
2
 
3
+ ## 0.3.2 (2018-02-09)
4
+
5
+ * Updated MediaWiktory to finally turn on gzip encoding of responses;
6
+ * Utility methods to expose some internals (`MediaWiki#api`, `Infoboxer#url_for(:wikipedia)`,
7
+ `Page#namespaces`, `Template#named_variables` and so on);
8
+ * Fix parsing of lowercase `file:` links in `<gallery>`.
9
+
3
10
  ## 0.3.1 (2017-12-04)
4
11
 
5
12
  * (Experimental) new representation of templates, much more readable;
@@ -32,7 +32,7 @@ Gem::Specification.new do |s|
32
32
  s.executables << 'infoboxer'
33
33
 
34
34
  s.add_dependency 'htmlentities'
35
- s.add_dependency 'mediawiktory', '= 0.1.2'
35
+ s.add_dependency 'mediawiktory', '= 0.1.3'
36
36
  s.add_dependency 'addressable'
37
37
  s.add_dependency 'terminal-table'
38
38
  end
@@ -47,7 +47,6 @@ module Infoboxer
47
47
  # @private
48
48
  WIKIA_API_URL = 'http://%s.wikia.com/api.php'.freeze
49
49
 
50
- # @private
51
50
  WIKIMEDIA_PROJECTS = {
52
51
  wikipedia: 'wikipedia.org',
53
52
  wikivoyage: 'wikivoyage.org',
@@ -59,7 +58,6 @@ module Infoboxer
59
58
  wikisource: 'wikisource.org'
60
59
  }.freeze
61
60
 
62
- # @private
63
61
  WIKIMEDIA_COMMONS = {
64
62
  commons: 'commons.wikimedia.org',
65
63
  species: 'species.wikimedia.org',
@@ -181,6 +179,20 @@ module Infoboxer
181
179
  end
182
180
  end
183
181
 
182
+ # Returns URL of API entry-point for a well-known Wiki-project (wikipedia, wikivoyage etc.)
183
+ # by project's name.
184
+ #
185
+ # @param symbol [Symbol] One of {WIKIMEDIA_PROJECTS} or {WIKIMEDIA_COMMONS} keys.
186
+ # @param lang [String, Symbol] Language of the project, if applicable.
187
+ # @return [String]
188
+ def url_for(symbol, lang = 'en')
189
+ if (domain = WIKIMEDIA_PROJECTS[symbol])
190
+ "https://#{lang}.#{domain}/w/api.php"
191
+ elsif (domain = WIKIMEDIA_COMMONS[symbol])
192
+ "https://#{domain}/w/api.php"
193
+ end
194
+ end
195
+
184
196
  # @!method wikipedia(lang = 'en', options = {})
185
197
  # Includeable version of {Infoboxer.wikipedia}
186
198
 
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Layout/EmptyLinesAroundArguments
1
2
  module Infoboxer
2
3
  MediaWiki::Traits.for('en.wikipedia.org') do
3
4
  templates do
@@ -370,3 +371,4 @@ module Infoboxer
370
371
  end
371
372
  end
372
373
  end
374
+ # rubocop:enable Layout/EmptyLinesAroundArguments
@@ -41,6 +41,9 @@ module Infoboxer
41
41
  # @private
42
42
  attr_reader :api_base_url, :traits
43
43
 
44
+ # @return [MediaWiktory::Wikipedia::Client]
45
+ attr_reader :api
46
+
44
47
  # Creating new MediaWiki client. {Infoboxer.wiki} provides shortcut
45
48
  # for it, as well as shortcuts for some well-known wikis, like
46
49
  # {Infoboxer.wikipedia}.
@@ -51,7 +54,7 @@ module Infoboxer
51
54
  # @param user_agent [String] (also aliased as `:ua`) Custom User-Agent header.
52
55
  def initialize(api_base_url, ua: nil, user_agent: ua)
53
56
  @api_base_url = Addressable::URI.parse(api_base_url)
54
- @client = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(user_agent))
57
+ @api = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(user_agent))
55
58
  @traits = Traits.get(@api_base_url.host, siteinfo)
56
59
  end
57
60
 
@@ -72,7 +75,7 @@ module Infoboxer
72
75
  return {} if titles.empty?
73
76
 
74
77
  titles.each_slice(50).map do |part|
75
- request = prepare_request(@client.query.titles(*part), &processor)
78
+ request = prepare_request(@api.query.titles(*part), &processor)
76
79
  response = request.response
77
80
 
78
81
  # If additional props are required, there may be additional pages, even despite each_slice(50)
@@ -173,7 +176,7 @@ module Infoboxer
173
176
  def category(title, limit: 'max', &processor)
174
177
  title = normalize_category_title(title)
175
178
 
176
- list(@client.query.generator(:categorymembers).title(title), limit, &processor)
179
+ list(@api.query.generator(:categorymembers).title(title), limit, &processor)
177
180
  end
178
181
 
179
182
  # Receive list of parsed MediaWiki pages for provided search query.
@@ -193,7 +196,7 @@ module Infoboxer
193
196
  # @return [Tree::Nodes<Page>] array of parsed pages.
194
197
  #
195
198
  def search(query, limit: 'max', &processor)
196
- list(@client.query.generator(:search).search(query), limit, &processor)
199
+ list(@api.query.generator(:search).search(query), limit, &processor)
197
200
  end
198
201
 
199
202
  # Receive list of parsed MediaWiki pages with titles startin from prefix.
@@ -210,7 +213,7 @@ module Infoboxer
210
213
  # @return [Tree::Nodes<Page>] array of parsed pages.
211
214
  #
212
215
  def prefixsearch(prefix, limit: 'max', &processor)
213
- list(@client.query.generator(:prefixsearch).search(prefix), limit, &processor)
216
+ list(@api.query.generator(:prefixsearch).search(prefix), limit, &processor)
214
217
  end
215
218
 
216
219
  # @return [String]
@@ -260,7 +263,7 @@ module Infoboxer
260
263
  end
261
264
 
262
265
  def siteinfo
263
- @siteinfo ||= @client.query.meta(:siteinfo).prop(:namespaces, :namespacealiases, :interwikimap).response.to_h
266
+ @siteinfo ||= @api.query.meta(:siteinfo).prop(:namespaces, :namespacealiases, :interwikimap).response.to_h
264
267
  end
265
268
 
266
269
  def interwikis(prefix)
@@ -33,6 +33,15 @@ module Infoboxer
33
33
  client.traits
34
34
  end
35
35
 
36
+ # FIXME: take from siteinfo!
37
+ def namespace
38
+ Traits::STANDARD_NAMESPACES[source.fetch('ns') + 2] # Media = -2, Specia = -1, Main = 0
39
+ end
40
+
41
+ def category?
42
+ namespace == 'Category'
43
+ end
44
+
36
45
  private
37
46
 
38
47
  PARAMS_TO_INSPECT = %i[url title].freeze
@@ -94,5 +94,11 @@ module Infoboxer
94
94
  class Tree::Document
95
95
  include Navigation::Sections::Container
96
96
  end
97
+
98
+ module Helpers
99
+ def W(*arg, &block) # rubocop:disable Naming/MethodName
100
+ Lookup::Selector.new(*arg, &block)
101
+ end
102
+ end
97
103
  end
98
104
  end
@@ -16,7 +16,7 @@ module Infoboxer
16
16
 
17
17
  if @context.eof?
18
18
  break unless until_pattern
19
- @context.fail!("#{until_pattern} not found, starting from #{start}")
19
+ @context.fail!("#{until_pattern.source} not found, starting from #{start}")
20
20
  end
21
21
 
22
22
  if @context.eol?
@@ -64,7 +64,7 @@ module Infoboxer
64
64
 
65
65
  if @context.eof?
66
66
  break unless until_pattern
67
- @context.fail!("#{until_pattern} not found")
67
+ @context.fail!("#{until_pattern.source} not found")
68
68
  end
69
69
 
70
70
  if @context.eol?
@@ -178,7 +178,8 @@ module Infoboxer
178
178
  path = @context.scan_until(%r{</gallery>|\||$})
179
179
  attrs = @context.matched == '|' ? gallery_image_attrs : {}
180
180
  unless path.empty?
181
- images << Tree::Image.new(path.sub(/^#{re.file_namespace}/, ''), attrs)
181
+ # FIXME: what if path NOT matches the namespace?
182
+ images << Tree::Image.new(path.sub(/^#{re.file_namespace.source}/i, ''), attrs)
182
183
  end
183
184
  break if @context.matched == '</gallery>'
184
185
  end
@@ -13,9 +13,8 @@ module Infoboxer
13
13
  # Internal, used by {Parser}
14
14
  def merge!(other)
15
15
  ochildren = other.children.dup
16
- if children.last && children.last.can_merge?(ochildren.first)
17
- children.last.merge!(ochildren.shift)
18
- end
16
+ children.last.merge!(ochildren.shift) \
17
+ if children.last && children.last.can_merge?(ochildren.first)
19
18
  push_children(*ochildren)
20
19
  end
21
20
 
@@ -149,6 +149,10 @@ module Infoboxer
149
149
  variables.reject(&:named?)
150
150
  end
151
151
 
152
+ def named_variables
153
+ variables.select(&:named?)
154
+ end
155
+
152
156
  # Fetches template variable(s) by name(s) or patterns.
153
157
  #
154
158
  # Usage:
@@ -1,7 +1,7 @@
1
1
  module Infoboxer
2
2
  MAJOR = 0
3
3
  MINOR = 3
4
- PATCH = 1
4
+ PATCH = 2
5
5
  PRE = nil
6
6
  VERSION = [MAJOR, MINOR, PATCH, PRE].compact.join('.')
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: infoboxer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Shepelev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-12-04 00:00:00.000000000 Z
11
+ date: 2018-02-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - '='
32
32
  - !ruby/object:Gem::Version
33
- version: 0.1.2
33
+ version: 0.1.3
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - '='
39
39
  - !ruby/object:Gem::Version
40
- version: 0.1.2
40
+ version: 0.1.3
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: addressable
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -171,7 +171,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
171
171
  version: '0'
172
172
  requirements: []
173
173
  rubyforge_project:
174
- rubygems_version: 2.6.10
174
+ rubygems_version: 2.6.14
175
175
  signing_key:
176
176
  specification_version: 4
177
177
  summary: MediaWiki client and parser, targeting information extraction.