infoboxer 0.3.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/infoboxer.gemspec +1 -1
- data/lib/infoboxer.rb +14 -2
- data/lib/infoboxer/definitions/en.wikipedia.org.rb +2 -0
- data/lib/infoboxer/media_wiki.rb +9 -6
- data/lib/infoboxer/media_wiki/page.rb +9 -0
- data/lib/infoboxer/navigation.rb +6 -0
- data/lib/infoboxer/parser/inline.rb +4 -3
- data/lib/infoboxer/tree/list.rb +2 -3
- data/lib/infoboxer/tree/template.rb +4 -0
- data/lib/infoboxer/version.rb +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d1eca6a2e6e025b77b1eeed915629b24338380a8
|
4
|
+
data.tar.gz: a852e3d6cefa55b04b2a8ffe530478e8199a884f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0cb20f539dcb4fecaf1f3c57a842d407481e37a2763ee35018bd436b058e704382493fd0a0a6a008101a70e9d9c283cc9fa3b7dbd8f48deef894e8d857ea7c42
|
7
|
+
data.tar.gz: b4f9ab8d39bc3c5fca6f7f247fd8c7f54bf76de8b5ed878a9fb935c6ccf3d99f7ed768c16cd1b1529ba56742a0bb1b6a09dafd0fc2900c12f2c0a39264d938fb
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
# Infoboxer's change log
|
2
2
|
|
3
|
+
## 0.3.2 (2018-02-09)
|
4
|
+
|
5
|
+
* Updated MediaWiktory to finally turn on gzip encoding of responses;
|
6
|
+
* Utility methods to expose some internals (`MediaWiki#api`, `Infoboxer#url_for(:wikipedia)`,
|
7
|
+
`Page#namespaces`, `Template#named_variables` and so on);
|
8
|
+
* Fix parsing of lowercase `file:` links in `<gallery>`.
|
9
|
+
|
3
10
|
## 0.3.1 (2017-12-04)
|
4
11
|
|
5
12
|
* (Experimental) new representation of templates, much more readable;
|
data/infoboxer.gemspec
CHANGED
@@ -32,7 +32,7 @@ Gem::Specification.new do |s|
|
|
32
32
|
s.executables << 'infoboxer'
|
33
33
|
|
34
34
|
s.add_dependency 'htmlentities'
|
35
|
-
s.add_dependency 'mediawiktory', '= 0.1.
|
35
|
+
s.add_dependency 'mediawiktory', '= 0.1.3'
|
36
36
|
s.add_dependency 'addressable'
|
37
37
|
s.add_dependency 'terminal-table'
|
38
38
|
end
|
data/lib/infoboxer.rb
CHANGED
@@ -47,7 +47,6 @@ module Infoboxer
|
|
47
47
|
# @private
|
48
48
|
WIKIA_API_URL = 'http://%s.wikia.com/api.php'.freeze
|
49
49
|
|
50
|
-
# @private
|
51
50
|
WIKIMEDIA_PROJECTS = {
|
52
51
|
wikipedia: 'wikipedia.org',
|
53
52
|
wikivoyage: 'wikivoyage.org',
|
@@ -59,7 +58,6 @@ module Infoboxer
|
|
59
58
|
wikisource: 'wikisource.org'
|
60
59
|
}.freeze
|
61
60
|
|
62
|
-
# @private
|
63
61
|
WIKIMEDIA_COMMONS = {
|
64
62
|
commons: 'commons.wikimedia.org',
|
65
63
|
species: 'species.wikimedia.org',
|
@@ -181,6 +179,20 @@ module Infoboxer
|
|
181
179
|
end
|
182
180
|
end
|
183
181
|
|
182
|
+
# Returns URL of API entry-point for a well-known Wiki-project (wikipedia, wikivoyage etc.)
|
183
|
+
# by project's name.
|
184
|
+
#
|
185
|
+
# @param symbol [Symbol] One of {WIKIMEDIA_PROJECTS} or {WIKIMEDIA_COMMONS} keys.
|
186
|
+
# @param lang [String, Symbol] Language of the project, if applicable.
|
187
|
+
# @return [String]
|
188
|
+
def url_for(symbol, lang = 'en')
|
189
|
+
if (domain = WIKIMEDIA_PROJECTS[symbol])
|
190
|
+
"https://#{lang}.#{domain}/w/api.php"
|
191
|
+
elsif (domain = WIKIMEDIA_COMMONS[symbol])
|
192
|
+
"https://#{domain}/w/api.php"
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
184
196
|
# @!method wikipedia(lang = 'en', options = {})
|
185
197
|
# Includeable version of {Infoboxer.wikipedia}
|
186
198
|
|
data/lib/infoboxer/media_wiki.rb
CHANGED
@@ -41,6 +41,9 @@ module Infoboxer
|
|
41
41
|
# @private
|
42
42
|
attr_reader :api_base_url, :traits
|
43
43
|
|
44
|
+
# @return [MediaWiktory::Wikipedia::Client]
|
45
|
+
attr_reader :api
|
46
|
+
|
44
47
|
# Creating new MediaWiki client. {Infoboxer.wiki} provides shortcut
|
45
48
|
# for it, as well as shortcuts for some well-known wikis, like
|
46
49
|
# {Infoboxer.wikipedia}.
|
@@ -51,7 +54,7 @@ module Infoboxer
|
|
51
54
|
# @param user_agent [String] (also aliased as `:ua`) Custom User-Agent header.
|
52
55
|
def initialize(api_base_url, ua: nil, user_agent: ua)
|
53
56
|
@api_base_url = Addressable::URI.parse(api_base_url)
|
54
|
-
@
|
57
|
+
@api = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(user_agent))
|
55
58
|
@traits = Traits.get(@api_base_url.host, siteinfo)
|
56
59
|
end
|
57
60
|
|
@@ -72,7 +75,7 @@ module Infoboxer
|
|
72
75
|
return {} if titles.empty?
|
73
76
|
|
74
77
|
titles.each_slice(50).map do |part|
|
75
|
-
request = prepare_request(@
|
78
|
+
request = prepare_request(@api.query.titles(*part), &processor)
|
76
79
|
response = request.response
|
77
80
|
|
78
81
|
# If additional props are required, there may be additional pages, even despite each_slice(50)
|
@@ -173,7 +176,7 @@ module Infoboxer
|
|
173
176
|
def category(title, limit: 'max', &processor)
|
174
177
|
title = normalize_category_title(title)
|
175
178
|
|
176
|
-
list(@
|
179
|
+
list(@api.query.generator(:categorymembers).title(title), limit, &processor)
|
177
180
|
end
|
178
181
|
|
179
182
|
# Receive list of parsed MediaWiki pages for provided search query.
|
@@ -193,7 +196,7 @@ module Infoboxer
|
|
193
196
|
# @return [Tree::Nodes<Page>] array of parsed pages.
|
194
197
|
#
|
195
198
|
def search(query, limit: 'max', &processor)
|
196
|
-
list(@
|
199
|
+
list(@api.query.generator(:search).search(query), limit, &processor)
|
197
200
|
end
|
198
201
|
|
199
202
|
# Receive list of parsed MediaWiki pages with titles startin from prefix.
|
@@ -210,7 +213,7 @@ module Infoboxer
|
|
210
213
|
# @return [Tree::Nodes<Page>] array of parsed pages.
|
211
214
|
#
|
212
215
|
def prefixsearch(prefix, limit: 'max', &processor)
|
213
|
-
list(@
|
216
|
+
list(@api.query.generator(:prefixsearch).search(prefix), limit, &processor)
|
214
217
|
end
|
215
218
|
|
216
219
|
# @return [String]
|
@@ -260,7 +263,7 @@ module Infoboxer
|
|
260
263
|
end
|
261
264
|
|
262
265
|
def siteinfo
|
263
|
-
@siteinfo ||= @
|
266
|
+
@siteinfo ||= @api.query.meta(:siteinfo).prop(:namespaces, :namespacealiases, :interwikimap).response.to_h
|
264
267
|
end
|
265
268
|
|
266
269
|
def interwikis(prefix)
|
@@ -33,6 +33,15 @@ module Infoboxer
|
|
33
33
|
client.traits
|
34
34
|
end
|
35
35
|
|
36
|
+
# FIXME: take from siteinfo!
|
37
|
+
def namespace
|
38
|
+
Traits::STANDARD_NAMESPACES[source.fetch('ns') + 2] # Media = -2, Specia = -1, Main = 0
|
39
|
+
end
|
40
|
+
|
41
|
+
def category?
|
42
|
+
namespace == 'Category'
|
43
|
+
end
|
44
|
+
|
36
45
|
private
|
37
46
|
|
38
47
|
PARAMS_TO_INSPECT = %i[url title].freeze
|
data/lib/infoboxer/navigation.rb
CHANGED
@@ -16,7 +16,7 @@ module Infoboxer
|
|
16
16
|
|
17
17
|
if @context.eof?
|
18
18
|
break unless until_pattern
|
19
|
-
@context.fail!("#{until_pattern} not found, starting from #{start}")
|
19
|
+
@context.fail!("#{until_pattern.source} not found, starting from #{start}")
|
20
20
|
end
|
21
21
|
|
22
22
|
if @context.eol?
|
@@ -64,7 +64,7 @@ module Infoboxer
|
|
64
64
|
|
65
65
|
if @context.eof?
|
66
66
|
break unless until_pattern
|
67
|
-
@context.fail!("#{until_pattern} not found")
|
67
|
+
@context.fail!("#{until_pattern.source} not found")
|
68
68
|
end
|
69
69
|
|
70
70
|
if @context.eol?
|
@@ -178,7 +178,8 @@ module Infoboxer
|
|
178
178
|
path = @context.scan_until(%r{</gallery>|\||$})
|
179
179
|
attrs = @context.matched == '|' ? gallery_image_attrs : {}
|
180
180
|
unless path.empty?
|
181
|
-
|
181
|
+
# FIXME: what if path NOT matches the namespace?
|
182
|
+
images << Tree::Image.new(path.sub(/^#{re.file_namespace.source}/i, ''), attrs)
|
182
183
|
end
|
183
184
|
break if @context.matched == '</gallery>'
|
184
185
|
end
|
data/lib/infoboxer/tree/list.rb
CHANGED
@@ -13,9 +13,8 @@ module Infoboxer
|
|
13
13
|
# Internal, used by {Parser}
|
14
14
|
def merge!(other)
|
15
15
|
ochildren = other.children.dup
|
16
|
-
|
17
|
-
children.last.
|
18
|
-
end
|
16
|
+
children.last.merge!(ochildren.shift) \
|
17
|
+
if children.last && children.last.can_merge?(ochildren.first)
|
19
18
|
push_children(*ochildren)
|
20
19
|
end
|
21
20
|
|
data/lib/infoboxer/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: infoboxer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Shepelev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-02-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - '='
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.1.
|
33
|
+
version: 0.1.3
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - '='
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0.1.
|
40
|
+
version: 0.1.3
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: addressable
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -171,7 +171,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
171
171
|
version: '0'
|
172
172
|
requirements: []
|
173
173
|
rubyforge_project:
|
174
|
-
rubygems_version: 2.6.
|
174
|
+
rubygems_version: 2.6.14
|
175
175
|
signing_key:
|
176
176
|
specification_version: 4
|
177
177
|
summary: MediaWiki client and parser, targeting information extraction.
|