infoboxer 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/infoboxer.gemspec +1 -1
- data/lib/infoboxer.rb +14 -2
- data/lib/infoboxer/definitions/en.wikipedia.org.rb +2 -0
- data/lib/infoboxer/media_wiki.rb +9 -6
- data/lib/infoboxer/media_wiki/page.rb +9 -0
- data/lib/infoboxer/navigation.rb +6 -0
- data/lib/infoboxer/parser/inline.rb +4 -3
- data/lib/infoboxer/tree/list.rb +2 -3
- data/lib/infoboxer/tree/template.rb +4 -0
- data/lib/infoboxer/version.rb +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d1eca6a2e6e025b77b1eeed915629b24338380a8
|
4
|
+
data.tar.gz: a852e3d6cefa55b04b2a8ffe530478e8199a884f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0cb20f539dcb4fecaf1f3c57a842d407481e37a2763ee35018bd436b058e704382493fd0a0a6a008101a70e9d9c283cc9fa3b7dbd8f48deef894e8d857ea7c42
|
7
|
+
data.tar.gz: b4f9ab8d39bc3c5fca6f7f247fd8c7f54bf76de8b5ed878a9fb935c6ccf3d99f7ed768c16cd1b1529ba56742a0bb1b6a09dafd0fc2900c12f2c0a39264d938fb
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
# Infoboxer's change log
|
2
2
|
|
3
|
+
## 0.3.2 (2018-02-09)
|
4
|
+
|
5
|
+
* Updated MediaWiktory to finally turn on gzip encoding of responses;
|
6
|
+
* Utility methods to expose some internals (`MediaWiki#api`, `Infoboxer#url_for(:wikipedia)`,
|
7
|
+
`Page#namespaces`, `Template#named_variables` and so on);
|
8
|
+
* Fix parsing of lowercase `file:` links in `<gallery>`.
|
9
|
+
|
3
10
|
## 0.3.1 (2017-12-04)
|
4
11
|
|
5
12
|
* (Experimental) new representation of templates, much more readable;
|
data/infoboxer.gemspec
CHANGED
@@ -32,7 +32,7 @@ Gem::Specification.new do |s|
|
|
32
32
|
s.executables << 'infoboxer'
|
33
33
|
|
34
34
|
s.add_dependency 'htmlentities'
|
35
|
-
s.add_dependency 'mediawiktory', '= 0.1.
|
35
|
+
s.add_dependency 'mediawiktory', '= 0.1.3'
|
36
36
|
s.add_dependency 'addressable'
|
37
37
|
s.add_dependency 'terminal-table'
|
38
38
|
end
|
data/lib/infoboxer.rb
CHANGED
@@ -47,7 +47,6 @@ module Infoboxer
|
|
47
47
|
# @private
|
48
48
|
WIKIA_API_URL = 'http://%s.wikia.com/api.php'.freeze
|
49
49
|
|
50
|
-
# @private
|
51
50
|
WIKIMEDIA_PROJECTS = {
|
52
51
|
wikipedia: 'wikipedia.org',
|
53
52
|
wikivoyage: 'wikivoyage.org',
|
@@ -59,7 +58,6 @@ module Infoboxer
|
|
59
58
|
wikisource: 'wikisource.org'
|
60
59
|
}.freeze
|
61
60
|
|
62
|
-
# @private
|
63
61
|
WIKIMEDIA_COMMONS = {
|
64
62
|
commons: 'commons.wikimedia.org',
|
65
63
|
species: 'species.wikimedia.org',
|
@@ -181,6 +179,20 @@ module Infoboxer
|
|
181
179
|
end
|
182
180
|
end
|
183
181
|
|
182
|
+
# Returns URL of API entry-point for a well-known Wiki-project (wikipedia, wikivoyage etc.)
|
183
|
+
# by project's name.
|
184
|
+
#
|
185
|
+
# @param symbol [Symbol] One of {WIKIMEDIA_PROJECTS} or {WIKIMEDIA_COMMONS} keys.
|
186
|
+
# @param lang [String, Symbol] Language of the project, if applicable.
|
187
|
+
# @return [String]
|
188
|
+
def url_for(symbol, lang = 'en')
|
189
|
+
if (domain = WIKIMEDIA_PROJECTS[symbol])
|
190
|
+
"https://#{lang}.#{domain}/w/api.php"
|
191
|
+
elsif (domain = WIKIMEDIA_COMMONS[symbol])
|
192
|
+
"https://#{domain}/w/api.php"
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
184
196
|
# @!method wikipedia(lang = 'en', options = {})
|
185
197
|
# Includeable version of {Infoboxer.wikipedia}
|
186
198
|
|
data/lib/infoboxer/media_wiki.rb
CHANGED
@@ -41,6 +41,9 @@ module Infoboxer
|
|
41
41
|
# @private
|
42
42
|
attr_reader :api_base_url, :traits
|
43
43
|
|
44
|
+
# @return [MediaWiktory::Wikipedia::Client]
|
45
|
+
attr_reader :api
|
46
|
+
|
44
47
|
# Creating new MediaWiki client. {Infoboxer.wiki} provides shortcut
|
45
48
|
# for it, as well as shortcuts for some well-known wikis, like
|
46
49
|
# {Infoboxer.wikipedia}.
|
@@ -51,7 +54,7 @@ module Infoboxer
|
|
51
54
|
# @param user_agent [String] (also aliased as `:ua`) Custom User-Agent header.
|
52
55
|
def initialize(api_base_url, ua: nil, user_agent: ua)
|
53
56
|
@api_base_url = Addressable::URI.parse(api_base_url)
|
54
|
-
@
|
57
|
+
@api = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(user_agent))
|
55
58
|
@traits = Traits.get(@api_base_url.host, siteinfo)
|
56
59
|
end
|
57
60
|
|
@@ -72,7 +75,7 @@ module Infoboxer
|
|
72
75
|
return {} if titles.empty?
|
73
76
|
|
74
77
|
titles.each_slice(50).map do |part|
|
75
|
-
request = prepare_request(@
|
78
|
+
request = prepare_request(@api.query.titles(*part), &processor)
|
76
79
|
response = request.response
|
77
80
|
|
78
81
|
# If additional props are required, there may be additional pages, even despite each_slice(50)
|
@@ -173,7 +176,7 @@ module Infoboxer
|
|
173
176
|
def category(title, limit: 'max', &processor)
|
174
177
|
title = normalize_category_title(title)
|
175
178
|
|
176
|
-
list(@
|
179
|
+
list(@api.query.generator(:categorymembers).title(title), limit, &processor)
|
177
180
|
end
|
178
181
|
|
179
182
|
# Receive list of parsed MediaWiki pages for provided search query.
|
@@ -193,7 +196,7 @@ module Infoboxer
|
|
193
196
|
# @return [Tree::Nodes<Page>] array of parsed pages.
|
194
197
|
#
|
195
198
|
def search(query, limit: 'max', &processor)
|
196
|
-
list(@
|
199
|
+
list(@api.query.generator(:search).search(query), limit, &processor)
|
197
200
|
end
|
198
201
|
|
199
202
|
# Receive list of parsed MediaWiki pages with titles startin from prefix.
|
@@ -210,7 +213,7 @@ module Infoboxer
|
|
210
213
|
# @return [Tree::Nodes<Page>] array of parsed pages.
|
211
214
|
#
|
212
215
|
def prefixsearch(prefix, limit: 'max', &processor)
|
213
|
-
list(@
|
216
|
+
list(@api.query.generator(:prefixsearch).search(prefix), limit, &processor)
|
214
217
|
end
|
215
218
|
|
216
219
|
# @return [String]
|
@@ -260,7 +263,7 @@ module Infoboxer
|
|
260
263
|
end
|
261
264
|
|
262
265
|
def siteinfo
|
263
|
-
@siteinfo ||= @
|
266
|
+
@siteinfo ||= @api.query.meta(:siteinfo).prop(:namespaces, :namespacealiases, :interwikimap).response.to_h
|
264
267
|
end
|
265
268
|
|
266
269
|
def interwikis(prefix)
|
@@ -33,6 +33,15 @@ module Infoboxer
|
|
33
33
|
client.traits
|
34
34
|
end
|
35
35
|
|
36
|
+
# FIXME: take from siteinfo!
|
37
|
+
def namespace
|
38
|
+
Traits::STANDARD_NAMESPACES[source.fetch('ns') + 2] # Media = -2, Specia = -1, Main = 0
|
39
|
+
end
|
40
|
+
|
41
|
+
def category?
|
42
|
+
namespace == 'Category'
|
43
|
+
end
|
44
|
+
|
36
45
|
private
|
37
46
|
|
38
47
|
PARAMS_TO_INSPECT = %i[url title].freeze
|
data/lib/infoboxer/navigation.rb
CHANGED
@@ -16,7 +16,7 @@ module Infoboxer
|
|
16
16
|
|
17
17
|
if @context.eof?
|
18
18
|
break unless until_pattern
|
19
|
-
@context.fail!("#{until_pattern} not found, starting from #{start}")
|
19
|
+
@context.fail!("#{until_pattern.source} not found, starting from #{start}")
|
20
20
|
end
|
21
21
|
|
22
22
|
if @context.eol?
|
@@ -64,7 +64,7 @@ module Infoboxer
|
|
64
64
|
|
65
65
|
if @context.eof?
|
66
66
|
break unless until_pattern
|
67
|
-
@context.fail!("#{until_pattern} not found")
|
67
|
+
@context.fail!("#{until_pattern.source} not found")
|
68
68
|
end
|
69
69
|
|
70
70
|
if @context.eol?
|
@@ -178,7 +178,8 @@ module Infoboxer
|
|
178
178
|
path = @context.scan_until(%r{</gallery>|\||$})
|
179
179
|
attrs = @context.matched == '|' ? gallery_image_attrs : {}
|
180
180
|
unless path.empty?
|
181
|
-
|
181
|
+
# FIXME: what if path NOT matches the namespace?
|
182
|
+
images << Tree::Image.new(path.sub(/^#{re.file_namespace.source}/i, ''), attrs)
|
182
183
|
end
|
183
184
|
break if @context.matched == '</gallery>'
|
184
185
|
end
|
data/lib/infoboxer/tree/list.rb
CHANGED
@@ -13,9 +13,8 @@ module Infoboxer
|
|
13
13
|
# Internal, used by {Parser}
|
14
14
|
def merge!(other)
|
15
15
|
ochildren = other.children.dup
|
16
|
-
|
17
|
-
children.last.
|
18
|
-
end
|
16
|
+
children.last.merge!(ochildren.shift) \
|
17
|
+
if children.last && children.last.can_merge?(ochildren.first)
|
19
18
|
push_children(*ochildren)
|
20
19
|
end
|
21
20
|
|
data/lib/infoboxer/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: infoboxer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Shepelev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-02-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - '='
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.1.
|
33
|
+
version: 0.1.3
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - '='
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0.1.
|
40
|
+
version: 0.1.3
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: addressable
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -171,7 +171,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
171
171
|
version: '0'
|
172
172
|
requirements: []
|
173
173
|
rubyforge_project:
|
174
|
-
rubygems_version: 2.6.
|
174
|
+
rubygems_version: 2.6.14
|
175
175
|
signing_key:
|
176
176
|
specification_version: 4
|
177
177
|
summary: MediaWiki client and parser, targeting information extraction.
|