infoboxer 0.3.1.pre → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/infoboxer.gemspec +1 -1
- data/lib/infoboxer.rb +0 -2
- data/lib/infoboxer/core_ext.rb +0 -2
- data/lib/infoboxer/definitions/en.wikipedia.org.rb +0 -2
- data/lib/infoboxer/media_wiki.rb +54 -51
- data/lib/infoboxer/media_wiki/page.rb +0 -2
- data/lib/infoboxer/media_wiki/traits.rb +0 -2
- data/lib/infoboxer/navigation.rb +0 -2
- data/lib/infoboxer/navigation/lookup.rb +0 -2
- data/lib/infoboxer/navigation/sections.rb +13 -3
- data/lib/infoboxer/navigation/selector.rb +0 -2
- data/lib/infoboxer/navigation/shortcuts.rb +0 -2
- data/lib/infoboxer/navigation/wikipath.rb +0 -2
- data/lib/infoboxer/parser.rb +0 -2
- data/lib/infoboxer/parser/context.rb +0 -2
- data/lib/infoboxer/parser/html.rb +0 -2
- data/lib/infoboxer/parser/image.rb +0 -2
- data/lib/infoboxer/parser/inline.rb +0 -2
- data/lib/infoboxer/parser/paragraphs.rb +0 -2
- data/lib/infoboxer/parser/table.rb +0 -2
- data/lib/infoboxer/parser/template.rb +0 -2
- data/lib/infoboxer/parser/util.rb +0 -2
- data/lib/infoboxer/templates/set.rb +0 -2
- data/lib/infoboxer/tree.rb +0 -2
- data/lib/infoboxer/tree/compound.rb +0 -2
- data/lib/infoboxer/tree/document.rb +0 -2
- data/lib/infoboxer/tree/gallery.rb +0 -2
- data/lib/infoboxer/tree/html.rb +0 -2
- data/lib/infoboxer/tree/image.rb +0 -2
- data/lib/infoboxer/tree/inline.rb +0 -2
- data/lib/infoboxer/tree/list.rb +0 -2
- data/lib/infoboxer/tree/node.rb +1 -3
- data/lib/infoboxer/tree/nodes.rb +6 -2
- data/lib/infoboxer/tree/paragraphs.rb +0 -2
- data/lib/infoboxer/tree/ref.rb +0 -2
- data/lib/infoboxer/tree/table.rb +0 -2
- data/lib/infoboxer/tree/template.rb +6 -3
- data/lib/infoboxer/tree/text.rb +0 -2
- data/lib/infoboxer/tree/wikilink.rb +0 -2
- data/lib/infoboxer/version.rb +1 -3
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 67bb7aed02bc7048e3508902a5b921be691b8bac
|
4
|
+
data.tar.gz: 0c1b8de5e72f824f29802d5a1e202e914ad3c241
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ee0babd55c7fe433dc3b55aa6988abf17ddee81424f56bd83b31dcb076dcdcfd22c966b50a26b979e5a917e096847bdaf9390063807694133734671201cb5003
|
7
|
+
data.tar.gz: b70854401485cb7981110c3da6a5f7d4b4623545b3b1af4b05c4b541514efa9727818a09c25d89dbe55b31a79c2e772adedf06f2b1211386f17242ddb11cde1a
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
# Infoboxer's change log
|
2
2
|
|
3
|
+
## 0.3.1 (2017-12-04)
|
4
|
+
|
5
|
+
* (Experimental) new representation of templates, much more readable;
|
6
|
+
* More access to querying process and underlying `MediaWiktory::Wikipedia::Query`;
|
7
|
+
* Finally, `limit` parameter for multi-page queries (category, search, prefixsearch).
|
8
|
+
|
3
9
|
## 0.3.1.pre (2017-09-16)
|
4
10
|
|
5
11
|
* Introduce interwiki links following (and proper handling of interwikis, in general);
|
data/infoboxer.gemspec
CHANGED
@@ -32,7 +32,7 @@ Gem::Specification.new do |s|
|
|
32
32
|
s.executables << 'infoboxer'
|
33
33
|
|
34
34
|
s.add_dependency 'htmlentities'
|
35
|
-
s.add_dependency 'mediawiktory', '
|
35
|
+
s.add_dependency 'mediawiktory', '= 0.1.2'
|
36
36
|
s.add_dependency 'addressable'
|
37
37
|
s.add_dependency 'terminal-table'
|
38
38
|
end
|
data/lib/infoboxer.rb
CHANGED
data/lib/infoboxer/core_ext.rb
CHANGED
data/lib/infoboxer/media_wiki.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
1
|
require 'mediawiktory'
|
4
2
|
require 'addressable/uri'
|
5
3
|
|
@@ -61,24 +59,24 @@ module Infoboxer
|
|
61
59
|
# classes).
|
62
60
|
#
|
63
61
|
# @param titles [Array<String>] List of page titles to get.
|
64
|
-
# @param
|
65
|
-
# [MediaWiktory::Actions::Query
|
66
|
-
# for the
|
62
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
63
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
64
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
65
|
+
# while using it.
|
67
66
|
#
|
68
67
|
# @return [Hash{String => Hash}] Hash of `{requested title => raw MediaWiki object}`. Note that
|
69
68
|
# even missing (does not exist in current Wiki) or invalid (impossible title) still be present
|
70
69
|
# in response, just will have `"missing"` or `"invalid"` key, just like MediaWiki returns them.
|
71
|
-
def raw(*titles,
|
70
|
+
def raw(*titles, &processor)
|
72
71
|
# could emerge on "automatically" created page lists, should work
|
73
72
|
return {} if titles.empty?
|
74
73
|
|
75
74
|
titles.each_slice(50).map do |part|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
.response
|
75
|
+
request = prepare_request(@client.query.titles(*part), &processor)
|
76
|
+
response = request.response
|
77
|
+
|
78
|
+
# If additional props are required, there may be additional pages, even despite each_slice(50)
|
79
|
+
response = response.continue while response.continue?
|
82
80
|
|
83
81
|
sources = response['pages'].values.map { |page| [page['title'], page] }.to_h
|
84
82
|
redirects =
|
@@ -102,9 +100,11 @@ module Infoboxer
|
|
102
100
|
# `(titles.count / 50.0).ceil` requests)
|
103
101
|
#
|
104
102
|
# @param titles [Array<String>] List of page titles to get.
|
105
|
-
# @param
|
106
|
-
#
|
107
|
-
#
|
103
|
+
# @param interwiki [Symbol] Identifier of other wiki, related to current, to fetch pages from.
|
104
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
105
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
106
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
107
|
+
# while using it.
|
108
108
|
#
|
109
109
|
# @return [Page, Tree::Nodes<Page>] array of parsed pages. Notes:
|
110
110
|
# * if you call `get` with only one title, one page will be
|
@@ -122,10 +122,10 @@ module Infoboxer
|
|
122
122
|
# and obtain meaningful results instead of `NoMethodError` or
|
123
123
|
# `SomethingNotFound`.
|
124
124
|
#
|
125
|
-
def get(*titles,
|
126
|
-
return interwikis(interwiki).get(*titles,
|
125
|
+
def get(*titles, interwiki: nil, &processor)
|
126
|
+
return interwikis(interwiki).get(*titles, &processor) if interwiki
|
127
127
|
|
128
|
-
pages = get_h(*titles,
|
128
|
+
pages = get_h(*titles, &processor).values.compact
|
129
129
|
titles.count == 1 ? pages.first : Tree::Nodes[*pages]
|
130
130
|
end
|
131
131
|
|
@@ -142,14 +142,15 @@ module Infoboxer
|
|
142
142
|
# you've received.
|
143
143
|
#
|
144
144
|
# @param titles [Array<String>] List of page titles to get.
|
145
|
-
# @param
|
146
|
-
# [MediaWiktory::Actions::Query
|
147
|
-
# for the
|
145
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
146
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
147
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
148
|
+
# while using it.
|
148
149
|
#
|
149
150
|
# @return [Hash<String, Page>]
|
150
151
|
#
|
151
|
-
def get_h(*titles,
|
152
|
-
raw_pages = raw(*titles,
|
152
|
+
def get_h(*titles, &processor)
|
153
|
+
raw_pages = raw(*titles, &processor)
|
153
154
|
.tap { |ps| ps.detect { |_, p| p['invalid'] }.tap { |_, i| i && fail(i['invalidreason']) } }
|
154
155
|
.reject { |_, p| p.key?('missing') }
|
155
156
|
titles.map { |title| [title, make_page(raw_pages, title)] }.to_h
|
@@ -157,59 +158,59 @@ module Infoboxer
|
|
157
158
|
|
158
159
|
# Receive list of parsed MediaWiki pages from specified category.
|
159
160
|
#
|
160
|
-
# **NB**: currently, this API **always** fetches all pages from
|
161
|
-
# category, there is no option to "take first 20 pages". Pages are
|
162
|
-
# fetched in 50-page batches, then parsed. So, for large category
|
163
|
-
# it can really take a while to fetch all pages.
|
164
|
-
#
|
165
161
|
# @param title [String] Category title. You can use namespaceless title (like
|
166
162
|
# `"Countries in South America"`), title with namespace (like
|
167
163
|
# `"Category:Countries in South America"`) or title with local
|
168
164
|
# namespace (like `"Catégorie:Argentine"` for French Wikipedia)
|
165
|
+
# @param limit [Integer, "max"]
|
166
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
167
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
168
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
169
|
+
# while using it.
|
169
170
|
#
|
170
171
|
# @return [Tree::Nodes<Page>] array of parsed pages.
|
171
172
|
#
|
172
|
-
def category(title)
|
173
|
+
def category(title, limit: 'max', &processor)
|
173
174
|
title = normalize_category_title(title)
|
174
175
|
|
175
|
-
list(@client.query.generator(:categorymembers).title(title)
|
176
|
+
list(@client.query.generator(:categorymembers).title(title), limit, &processor)
|
176
177
|
end
|
177
178
|
|
178
179
|
# Receive list of parsed MediaWiki pages for provided search query.
|
179
180
|
# See [MediaWiki API docs](https://www.mediawiki.org/w/api.php?action=help&modules=query%2Bsearch)
|
180
181
|
# for details.
|
181
182
|
#
|
182
|
-
# **NB**: currently, this API **always** fetches all pages from
|
183
|
-
# category, there is no option to "take first 20 pages". Pages are
|
184
|
-
# fetched in 50-page batches, then parsed. So, for large search query
|
185
|
-
# it can really take a while to fetch all pages.
|
186
|
-
#
|
187
183
|
# @param query [String] Search query. For old installations, look at
|
188
184
|
# https://www.mediawiki.org/wiki/Help:Searching
|
189
185
|
# for search syntax. For new ones (including Wikipedia), see at
|
190
186
|
# https://www.mediawiki.org/wiki/Help:CirrusSearch.
|
187
|
+
# @param limit [Integer, "max"]
|
188
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
189
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
190
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
191
|
+
# while using it.
|
191
192
|
#
|
192
193
|
# @return [Tree::Nodes<Page>] array of parsed pages.
|
193
194
|
#
|
194
|
-
def search(query)
|
195
|
-
list(@client.query.generator(:search).search(query)
|
195
|
+
def search(query, limit: 'max', &processor)
|
196
|
+
list(@client.query.generator(:search).search(query), limit, &processor)
|
196
197
|
end
|
197
198
|
|
198
199
|
# Receive list of parsed MediaWiki pages with titles startin from prefix.
|
199
200
|
# See [MediaWiki API docs](https://www.mediawiki.org/w/api.php?action=help&modules=query%2Bprefixsearch)
|
200
201
|
# for details.
|
201
202
|
#
|
202
|
-
# **NB**: currently, this API **always** fetches all pages from
|
203
|
-
# category, there is no option to "take first 20 pages". Pages are
|
204
|
-
# fetched in 50-page batches, then parsed. So, for large search query
|
205
|
-
# it can really take a while to fetch all pages.
|
206
|
-
#
|
207
203
|
# @param prefix [String] Page title prefix.
|
204
|
+
# @param limit [Integer, "max"]
|
205
|
+
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
|
206
|
+
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
|
207
|
+
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
|
208
|
+
# while using it.
|
208
209
|
#
|
209
210
|
# @return [Tree::Nodes<Page>] array of parsed pages.
|
210
211
|
#
|
211
|
-
def prefixsearch(prefix)
|
212
|
-
list(@client.query.generator(:prefixsearch).search(prefix)
|
212
|
+
def prefixsearch(prefix, limit: 'max', &processor)
|
213
|
+
list(@client.query.generator(:prefixsearch).search(prefix), limit, &processor)
|
213
214
|
end
|
214
215
|
|
215
216
|
# @return [String]
|
@@ -225,14 +226,11 @@ module Infoboxer
|
|
225
226
|
Page.new(self, Parser.paragraphs(source['revisions'].first['*'], traits), source)
|
226
227
|
end
|
227
228
|
|
228
|
-
def list(query)
|
229
|
-
|
230
|
-
|
231
|
-
.prop(:content, :timestamp, :url)
|
232
|
-
.redirects
|
233
|
-
.response
|
229
|
+
def list(query, limit, &processor)
|
230
|
+
request = prepare_request(query.limit(limit), &processor)
|
231
|
+
response = request.response
|
234
232
|
|
235
|
-
response = response.continue while response.continue?
|
233
|
+
response = response.continue while response.continue? && (limit == 'max' || response['pages'].count < limit)
|
236
234
|
|
237
235
|
return Tree::Nodes[] if response['pages'].nil?
|
238
236
|
|
@@ -243,6 +241,11 @@ module Infoboxer
|
|
243
241
|
Tree::Nodes[*pages]
|
244
242
|
end
|
245
243
|
|
244
|
+
def prepare_request(request)
|
245
|
+
request = request.prop(:revisions, :info).prop(:content, :timestamp, :url).redirects
|
246
|
+
block_given? ? yield(request) : request
|
247
|
+
end
|
248
|
+
|
246
249
|
def normalize_category_title(title)
|
247
250
|
# FIXME: shouldn't it go to MediaWiktory?..
|
248
251
|
namespace, titl = title.include?(':') ? title.split(':', 2) : [nil, title]
|
data/lib/infoboxer/navigation.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
1
|
module Infoboxer
|
4
2
|
module Navigation
|
5
3
|
# `Sections` module provides logical view on document strcture.
|
@@ -81,6 +79,18 @@ module Infoboxer
|
|
81
79
|
end
|
82
80
|
end
|
83
81
|
|
82
|
+
def subsections(*names)
|
83
|
+
sections = names.map { |name|
|
84
|
+
heading = lookup_children(:Heading, text_: name).first
|
85
|
+
next unless heading
|
86
|
+
body = heading.next_siblings
|
87
|
+
.take_while { |n| !n.is_a?(Tree::Heading) || n.level > heading.level }
|
88
|
+
|
89
|
+
Section.new(heading, body)
|
90
|
+
}.compact
|
91
|
+
Tree::Nodes.new(sections)
|
92
|
+
end
|
93
|
+
|
84
94
|
def lookup_children(*arg)
|
85
95
|
if arg.include?(:Section)
|
86
96
|
sections.find(*(arg - [:Section]))
|
@@ -138,7 +148,7 @@ module Infoboxer
|
|
138
148
|
end
|
139
149
|
|
140
150
|
body = heading.next_siblings
|
141
|
-
.take_while { |n| !n.is_a?(Tree::Heading) || n.level
|
151
|
+
.take_while { |n| !n.is_a?(Tree::Heading) || n.level > heading.level }
|
142
152
|
|
143
153
|
section = Section.new(heading, body)
|
144
154
|
@in_sections = Tree::Nodes[section, *heading.in_sections]
|
data/lib/infoboxer/parser.rb
CHANGED
data/lib/infoboxer/tree.rb
CHANGED
data/lib/infoboxer/tree/html.rb
CHANGED
data/lib/infoboxer/tree/image.rb
CHANGED
data/lib/infoboxer/tree/list.rb
CHANGED
data/lib/infoboxer/tree/node.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
1
|
require 'htmlentities'
|
4
2
|
|
5
3
|
module Infoboxer
|
@@ -162,7 +160,7 @@ module Infoboxer
|
|
162
160
|
end
|
163
161
|
|
164
162
|
def _eq(_other)
|
165
|
-
|
163
|
+
false
|
166
164
|
end
|
167
165
|
|
168
166
|
def decode(str)
|
data/lib/infoboxer/tree/nodes.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
1
|
module Infoboxer
|
4
2
|
module Tree
|
5
3
|
# List of nodes, which tries to be useful both as array, and as proxy
|
@@ -153,6 +151,12 @@ module Infoboxer
|
|
153
151
|
map(&:text).join
|
154
152
|
end
|
155
153
|
|
154
|
+
alias_method :to_s, :text
|
155
|
+
|
156
|
+
def unwrap
|
157
|
+
map { |n| n.respond_to?(:unwrap) ? n.unwrap : n }
|
158
|
+
end
|
159
|
+
|
156
160
|
# Fetches pages by ALL wikilinks inside in ONE query to MediaWiki
|
157
161
|
# API.
|
158
162
|
#
|
data/lib/infoboxer/tree/ref.rb
CHANGED
data/lib/infoboxer/tree/table.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
1
|
require_relative 'linkable'
|
4
2
|
|
5
3
|
module Infoboxer
|
@@ -119,7 +117,12 @@ module Infoboxer
|
|
119
117
|
end
|
120
118
|
|
121
119
|
def text
|
122
|
-
''
|
120
|
+
res = unnamed_variables.map(&:text).join('|')
|
121
|
+
res.empty? ? '' : "{#{name}:#{res}}"
|
122
|
+
end
|
123
|
+
|
124
|
+
def unwrap
|
125
|
+
unnamed_variables.flat_map(&:children).unwrap
|
123
126
|
end
|
124
127
|
|
125
128
|
# See {Node#to_tree}
|
data/lib/infoboxer/tree/text.rb
CHANGED
data/lib/infoboxer/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: infoboxer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.1
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Shepelev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-12-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -28,16 +28,16 @@ dependencies:
|
|
28
28
|
name: mediawiktory
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - '='
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.1.
|
33
|
+
version: 0.1.2
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - '='
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0.1.
|
40
|
+
version: 0.1.2
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: addressable
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -166,9 +166,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
166
166
|
version: 2.1.0
|
167
167
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
168
168
|
requirements:
|
169
|
-
- - "
|
169
|
+
- - ">="
|
170
170
|
- !ruby/object:Gem::Version
|
171
|
-
version:
|
171
|
+
version: '0'
|
172
172
|
requirements: []
|
173
173
|
rubyforge_project:
|
174
174
|
rubygems_version: 2.6.10
|