infoboxer 0.3.1.pre → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/infoboxer.gemspec +1 -1
- data/lib/infoboxer.rb +0 -2
- data/lib/infoboxer/core_ext.rb +0 -2
- data/lib/infoboxer/definitions/en.wikipedia.org.rb +0 -2
- data/lib/infoboxer/media_wiki.rb +54 -51
- data/lib/infoboxer/media_wiki/page.rb +0 -2
- data/lib/infoboxer/media_wiki/traits.rb +0 -2
- data/lib/infoboxer/navigation.rb +0 -2
- data/lib/infoboxer/navigation/lookup.rb +0 -2
- data/lib/infoboxer/navigation/sections.rb +13 -3
- data/lib/infoboxer/navigation/selector.rb +0 -2
- data/lib/infoboxer/navigation/shortcuts.rb +0 -2
- data/lib/infoboxer/navigation/wikipath.rb +0 -2
- data/lib/infoboxer/parser.rb +0 -2
- data/lib/infoboxer/parser/context.rb +0 -2
- data/lib/infoboxer/parser/html.rb +0 -2
- data/lib/infoboxer/parser/image.rb +0 -2
- data/lib/infoboxer/parser/inline.rb +0 -2
- data/lib/infoboxer/parser/paragraphs.rb +0 -2
- data/lib/infoboxer/parser/table.rb +0 -2
- data/lib/infoboxer/parser/template.rb +0 -2
- data/lib/infoboxer/parser/util.rb +0 -2
- data/lib/infoboxer/templates/set.rb +0 -2
- data/lib/infoboxer/tree.rb +0 -2
- data/lib/infoboxer/tree/compound.rb +0 -2
- data/lib/infoboxer/tree/document.rb +0 -2
- data/lib/infoboxer/tree/gallery.rb +0 -2
- data/lib/infoboxer/tree/html.rb +0 -2
- data/lib/infoboxer/tree/image.rb +0 -2
- data/lib/infoboxer/tree/inline.rb +0 -2
- data/lib/infoboxer/tree/list.rb +0 -2
- data/lib/infoboxer/tree/node.rb +1 -3
- data/lib/infoboxer/tree/nodes.rb +6 -2
- data/lib/infoboxer/tree/paragraphs.rb +0 -2
- data/lib/infoboxer/tree/ref.rb +0 -2
- data/lib/infoboxer/tree/table.rb +0 -2
- data/lib/infoboxer/tree/template.rb +6 -3
- data/lib/infoboxer/tree/text.rb +0 -2
- data/lib/infoboxer/tree/wikilink.rb +0 -2
- data/lib/infoboxer/version.rb +1 -3
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 67bb7aed02bc7048e3508902a5b921be691b8bac
|
4
|
+
data.tar.gz: 0c1b8de5e72f824f29802d5a1e202e914ad3c241
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ee0babd55c7fe433dc3b55aa6988abf17ddee81424f56bd83b31dcb076dcdcfd22c966b50a26b979e5a917e096847bdaf9390063807694133734671201cb5003
|
7
|
+
data.tar.gz: b70854401485cb7981110c3da6a5f7d4b4623545b3b1af4b05c4b541514efa9727818a09c25d89dbe55b31a79c2e772adedf06f2b1211386f17242ddb11cde1a
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
# Infoboxer's change log
|
2
2
|
|
3
|
+
## 0.3.1 (2017-12-04)
|
4
|
+
|
5
|
+
* (Experimental) new representation of templates, much more readable;
|
6
|
+
* More access to querying process and underlying `MediaWiktory::Wikipedia::Query`;
|
7
|
+
* Finally, `limit` parameter for multi-page queries (category, search, prefixsearch).
|
8
|
+
|
3
9
|
## 0.3.1.pre (2017-09-16)
|
4
10
|
|
5
11
|
* Introduce interwiki links following (and proper handling of interwikis, in general);
|
data/infoboxer.gemspec
CHANGED
@@ -32,7 +32,7 @@ Gem::Specification.new do |s|
|
|
32
32
|
s.executables << 'infoboxer'
|
33
33
|
|
34
34
|
s.add_dependency 'htmlentities'
|
35
|
-
s.add_dependency 'mediawiktory', '
|
35
|
+
s.add_dependency 'mediawiktory', '= 0.1.2'
|
36
36
|
s.add_dependency 'addressable'
|
37
37
|
s.add_dependency 'terminal-table'
|
38
38
|
end
|
data/lib/infoboxer.rb
CHANGED
data/lib/infoboxer/core_ext.rb
CHANGED
data/lib/infoboxer/media_wiki.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
1
|
require 'mediawiktory'
|
4
2
|
require 'addressable/uri'
|
5
3
|
|
@@ -61,24 +59,24 @@ module Infoboxer
|
|
61
59
|
# classes).
|
62
60
|
#
|
63
61
|
# @param titles [Array<String>] List of page titles to get.
|
64
|
-
# @param
|
65
|
-
# [MediaWiktory::Actions::Query
|
66
|
-
# for the
|
62
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
63
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
64
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
65
|
+
# while using it.
|
67
66
|
#
|
68
67
|
# @return [Hash{String => Hash}] Hash of `{requested title => raw MediaWiki object}`. Note that
|
69
68
|
# even missing (does not exist in current Wiki) or invalid (impossible title) still be present
|
70
69
|
# in response, just will have `"missing"` or `"invalid"` key, just like MediaWiki returns them.
|
71
|
-
def raw(*titles,
|
70
|
+
def raw(*titles, &processor)
|
72
71
|
# could emerge on "automatically" created page lists, should work
|
73
72
|
return {} if titles.empty?
|
74
73
|
|
75
74
|
titles.each_slice(50).map do |part|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
.response
|
75
|
+
request = prepare_request(@client.query.titles(*part), &processor)
|
76
|
+
response = request.response
|
77
|
+
|
78
|
+
# If additional props are required, there may be additional pages, even despite each_slice(50)
|
79
|
+
response = response.continue while response.continue?
|
82
80
|
|
83
81
|
sources = response['pages'].values.map { |page| [page['title'], page] }.to_h
|
84
82
|
redirects =
|
@@ -102,9 +100,11 @@ module Infoboxer
|
|
102
100
|
# `(titles.count / 50.0).ceil` requests)
|
103
101
|
#
|
104
102
|
# @param titles [Array<String>] List of page titles to get.
|
105
|
-
# @param
|
106
|
-
#
|
107
|
-
#
|
103
|
+
# @param interwiki [Symbol] Identifier of other wiki, related to current, to fetch pages from.
|
104
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
105
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
106
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
107
|
+
# while using it.
|
108
108
|
#
|
109
109
|
# @return [Page, Tree::Nodes<Page>] array of parsed pages. Notes:
|
110
110
|
# * if you call `get` with only one title, one page will be
|
@@ -122,10 +122,10 @@ module Infoboxer
|
|
122
122
|
# and obtain meaningful results instead of `NoMethodError` or
|
123
123
|
# `SomethingNotFound`.
|
124
124
|
#
|
125
|
-
def get(*titles,
|
126
|
-
return interwikis(interwiki).get(*titles,
|
125
|
+
def get(*titles, interwiki: nil, &processor)
|
126
|
+
return interwikis(interwiki).get(*titles, &processor) if interwiki
|
127
127
|
|
128
|
-
pages = get_h(*titles,
|
128
|
+
pages = get_h(*titles, &processor).values.compact
|
129
129
|
titles.count == 1 ? pages.first : Tree::Nodes[*pages]
|
130
130
|
end
|
131
131
|
|
@@ -142,14 +142,15 @@ module Infoboxer
|
|
142
142
|
# you've received.
|
143
143
|
#
|
144
144
|
# @param titles [Array<String>] List of page titles to get.
|
145
|
-
# @param
|
146
|
-
# [MediaWiktory::Actions::Query
|
147
|
-
# for the
|
145
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
146
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
147
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
148
|
+
# while using it.
|
148
149
|
#
|
149
150
|
# @return [Hash<String, Page>]
|
150
151
|
#
|
151
|
-
def get_h(*titles,
|
152
|
-
raw_pages = raw(*titles,
|
152
|
+
def get_h(*titles, &processor)
|
153
|
+
raw_pages = raw(*titles, &processor)
|
153
154
|
.tap { |ps| ps.detect { |_, p| p['invalid'] }.tap { |_, i| i && fail(i['invalidreason']) } }
|
154
155
|
.reject { |_, p| p.key?('missing') }
|
155
156
|
titles.map { |title| [title, make_page(raw_pages, title)] }.to_h
|
@@ -157,59 +158,59 @@ module Infoboxer
|
|
157
158
|
|
158
159
|
# Receive list of parsed MediaWiki pages from specified category.
|
159
160
|
#
|
160
|
-
# **NB**: currently, this API **always** fetches all pages from
|
161
|
-
# category, there is no option to "take first 20 pages". Pages are
|
162
|
-
# fetched in 50-page batches, then parsed. So, for large category
|
163
|
-
# it can really take a while to fetch all pages.
|
164
|
-
#
|
165
161
|
# @param title [String] Category title. You can use namespaceless title (like
|
166
162
|
# `"Countries in South America"`), title with namespace (like
|
167
163
|
# `"Category:Countries in South America"`) or title with local
|
168
164
|
# namespace (like `"Catégorie:Argentine"` for French Wikipedia)
|
165
|
+
# @param limit [Integer, "max"]
|
166
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
167
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
168
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
169
|
+
# while using it.
|
169
170
|
#
|
170
171
|
# @return [Tree::Nodes<Page>] array of parsed pages.
|
171
172
|
#
|
172
|
-
def category(title)
|
173
|
+
def category(title, limit: 'max', &processor)
|
173
174
|
title = normalize_category_title(title)
|
174
175
|
|
175
|
-
list(@client.query.generator(:categorymembers).title(title)
|
176
|
+
list(@client.query.generator(:categorymembers).title(title), limit, &processor)
|
176
177
|
end
|
177
178
|
|
178
179
|
# Receive list of parsed MediaWiki pages for provided search query.
|
179
180
|
# See [MediaWiki API docs](https://www.mediawiki.org/w/api.php?action=help&modules=query%2Bsearch)
|
180
181
|
# for details.
|
181
182
|
#
|
182
|
-
# **NB**: currently, this API **always** fetches all pages from
|
183
|
-
# category, there is no option to "take first 20 pages". Pages are
|
184
|
-
# fetched in 50-page batches, then parsed. So, for large search query
|
185
|
-
# it can really take a while to fetch all pages.
|
186
|
-
#
|
187
183
|
# @param query [String] Search query. For old installations, look at
|
188
184
|
# https://www.mediawiki.org/wiki/Help:Searching
|
189
185
|
# for search syntax. For new ones (including Wikipedia), see at
|
190
186
|
# https://www.mediawiki.org/wiki/Help:CirrusSearch.
|
187
|
+
# @param limit [Integer, "max"]
|
188
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
189
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
190
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
191
|
+
# while using it.
|
191
192
|
#
|
192
193
|
# @return [Tree::Nodes<Page>] array of parsed pages.
|
193
194
|
#
|
194
|
-
def search(query)
|
195
|
-
list(@client.query.generator(:search).search(query)
|
195
|
+
def search(query, limit: 'max', &processor)
|
196
|
+
list(@client.query.generator(:search).search(query), limit, &processor)
|
196
197
|
end
|
197
198
|
|
198
199
|
# Receive list of parsed MediaWiki pages with titles startin from prefix.
|
199
200
|
# See [MediaWiki API docs](https://www.mediawiki.org/w/api.php?action=help&modules=query%2Bprefixsearch)
|
200
201
|
# for details.
|
201
202
|
#
|
202
|
-
# **NB**: currently, this API **always** fetches all pages from
|
203
|
-
# category, there is no option to "take first 20 pages". Pages are
|
204
|
-
# fetched in 50-page batches, then parsed. So, for large search query
|
205
|
-
# it can really take a while to fetch all pages.
|
206
|
-
#
|
207
203
|
# @param prefix [String] Page title prefix.
|
204
|
+
# @param limit [Integer, "max"]
|
205
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
206
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
207
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
208
|
+
# while using it.
|
208
209
|
#
|
209
210
|
# @return [Tree::Nodes<Page>] array of parsed pages.
|
210
211
|
#
|
211
|
-
def prefixsearch(prefix)
|
212
|
-
list(@client.query.generator(:prefixsearch).search(prefix)
|
212
|
+
def prefixsearch(prefix, limit: 'max', &processor)
|
213
|
+
list(@client.query.generator(:prefixsearch).search(prefix), limit, &processor)
|
213
214
|
end
|
214
215
|
|
215
216
|
# @return [String]
|
@@ -225,14 +226,11 @@ module Infoboxer
|
|
225
226
|
Page.new(self, Parser.paragraphs(source['revisions'].first['*'], traits), source)
|
226
227
|
end
|
227
228
|
|
228
|
-
def list(query)
|
229
|
-
|
230
|
-
|
231
|
-
.prop(:content, :timestamp, :url)
|
232
|
-
.redirects
|
233
|
-
.response
|
229
|
+
def list(query, limit, &processor)
|
230
|
+
request = prepare_request(query.limit(limit), &processor)
|
231
|
+
response = request.response
|
234
232
|
|
235
|
-
response = response.continue while response.continue?
|
233
|
+
response = response.continue while response.continue? && (limit == 'max' || response['pages'].count < limit)
|
236
234
|
|
237
235
|
return Tree::Nodes[] if response['pages'].nil?
|
238
236
|
|
@@ -243,6 +241,11 @@ module Infoboxer
|
|
243
241
|
Tree::Nodes[*pages]
|
244
242
|
end
|
245
243
|
|
244
|
+
def prepare_request(request)
|
245
|
+
request = request.prop(:revisions, :info).prop(:content, :timestamp, :url).redirects
|
246
|
+
block_given? ? yield(request) : request
|
247
|
+
end
|
248
|
+
|
246
249
|
def normalize_category_title(title)
|
247
250
|
# FIXME: shouldn't it go to MediaWiktory?..
|
248
251
|
namespace, titl = title.include?(':') ? title.split(':', 2) : [nil, title]
|
data/lib/infoboxer/navigation.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
1
|
module Infoboxer
|
4
2
|
module Navigation
|
5
3
|
# `Sections` module provides logical view on document strcture.
|
@@ -81,6 +79,18 @@ module Infoboxer
|
|
81
79
|
end
|
82
80
|
end
|
83
81
|
|
82
|
+
def subsections(*names)
|
83
|
+
sections = names.map { |name|
|
84
|
+
heading = lookup_children(:Heading, text_: name).first
|
85
|
+
next unless heading
|
86
|
+
body = heading.next_siblings
|
87
|
+
.take_while { |n| !n.is_a?(Tree::Heading) || n.level > heading.level }
|
88
|
+
|
89
|
+
Section.new(heading, body)
|
90
|
+
}.compact
|
91
|
+
Tree::Nodes.new(sections)
|
92
|
+
end
|
93
|
+
|
84
94
|
def lookup_children(*arg)
|
85
95
|
if arg.include?(:Section)
|
86
96
|
sections.find(*(arg - [:Section]))
|
@@ -138,7 +148,7 @@ module Infoboxer
|
|
138
148
|
end
|
139
149
|
|
140
150
|
body = heading.next_siblings
|
141
|
-
.take_while { |n| !n.is_a?(Tree::Heading) || n.level
|
151
|
+
.take_while { |n| !n.is_a?(Tree::Heading) || n.level > heading.level }
|
142
152
|
|
143
153
|
section = Section.new(heading, body)
|
144
154
|
@in_sections = Tree::Nodes[section, *heading.in_sections]
|
data/lib/infoboxer/parser.rb
CHANGED
data/lib/infoboxer/tree.rb
CHANGED
data/lib/infoboxer/tree/html.rb
CHANGED
data/lib/infoboxer/tree/image.rb
CHANGED
data/lib/infoboxer/tree/list.rb
CHANGED
data/lib/infoboxer/tree/node.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
1
|
require 'htmlentities'
|
4
2
|
|
5
3
|
module Infoboxer
|
@@ -162,7 +160,7 @@ module Infoboxer
|
|
162
160
|
end
|
163
161
|
|
164
162
|
def _eq(_other)
|
165
|
-
|
163
|
+
false
|
166
164
|
end
|
167
165
|
|
168
166
|
def decode(str)
|
data/lib/infoboxer/tree/nodes.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
1
|
module Infoboxer
|
4
2
|
module Tree
|
5
3
|
# List of nodes, which tries to be useful both as array, and as proxy
|
@@ -153,6 +151,12 @@ module Infoboxer
|
|
153
151
|
map(&:text).join
|
154
152
|
end
|
155
153
|
|
154
|
+
alias_method :to_s, :text
|
155
|
+
|
156
|
+
def unwrap
|
157
|
+
map { |n| n.respond_to?(:unwrap) ? n.unwrap : n }
|
158
|
+
end
|
159
|
+
|
156
160
|
# Fetches pages by ALL wikilinks inside in ONE query to MediaWiki
|
157
161
|
# API.
|
158
162
|
#
|
data/lib/infoboxer/tree/ref.rb
CHANGED
data/lib/infoboxer/tree/table.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
1
|
require_relative 'linkable'
|
4
2
|
|
5
3
|
module Infoboxer
|
@@ -119,7 +117,12 @@ module Infoboxer
|
|
119
117
|
end
|
120
118
|
|
121
119
|
def text
|
122
|
-
''
|
120
|
+
res = unnamed_variables.map(&:text).join('|')
|
121
|
+
res.empty? ? '' : "{#{name}:#{res}}"
|
122
|
+
end
|
123
|
+
|
124
|
+
def unwrap
|
125
|
+
unnamed_variables.flat_map(&:children).unwrap
|
123
126
|
end
|
124
127
|
|
125
128
|
# See {Node#to_tree}
|
data/lib/infoboxer/tree/text.rb
CHANGED
data/lib/infoboxer/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: infoboxer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.1
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Shepelev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-12-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -28,16 +28,16 @@ dependencies:
|
|
28
28
|
name: mediawiktory
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - '='
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.1.
|
33
|
+
version: 0.1.2
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - '='
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0.1.
|
40
|
+
version: 0.1.2
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: addressable
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -166,9 +166,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
166
166
|
version: 2.1.0
|
167
167
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
168
168
|
requirements:
|
169
|
-
- - "
|
169
|
+
- - ">="
|
170
170
|
- !ruby/object:Gem::Version
|
171
|
-
version:
|
171
|
+
version: '0'
|
172
172
|
requirements: []
|
173
173
|
rubyforge_project:
|
174
174
|
rubygems_version: 2.6.10
|