infoboxer 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -1
- data/Parsing.md +1 -1
- data/README.md +0 -4
- data/bin/infoboxer +1 -1
- data/lib/infoboxer/definitions/en.wikipedia.org.rb +20 -0
- data/lib/infoboxer/media_wiki/mediawiktory_patch.rb +25 -0
- data/lib/infoboxer/media_wiki.rb +24 -1
- data/lib/infoboxer/navigation/sections.rb +8 -1
- data/lib/infoboxer/tree/image.rb +6 -0
- data/lib/infoboxer/tree/node.rb +3 -2
- data/lib/infoboxer/version.rb +1 -1
- data/lib/infoboxer.rb +3 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fcf940ddedd92a04eb2e555bc16c5ef765a3e8d3
|
4
|
+
data.tar.gz: c8e8c4976ea2e8e023c79c26bd221ff5f22b5a32
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d2e54fe4fe9d1a7714f6aa5b0d47b2957a0b0a00b1d65ca9ea4f925efa2dafb364d354235f3c19990f812293ee6bfe6e86602b019fb71eff4b6a67b96d6bf82a
|
7
|
+
data.tar.gz: d3ea47fcedb10473abc5083b750b4c3804532f8f6d0278a51a2d270913a899f3897c35fee9099a5aa08bf1dea8dfaa7292fc5b06cc350671cbde24625936d956
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,21 @@
|
|
1
1
|
# Infoboxer's change log
|
2
2
|
|
3
|
+
## 0.2.3 (2016-03-02)
|
4
|
+
|
5
|
+
New and enchanced features:
|
6
|
+
* more useful templates (quick-n-dirty sometimes, but prettier output for
|
7
|
+
typical cases);
|
8
|
+
* Caching of wikiobjects, so for several calls to `Infoboxer.wp` it would
|
9
|
+
be only one API call for wiki metainformation;
|
10
|
+
* `MediaWiki#get` now preserves order of pages (page list would be in
|
11
|
+
the same order as requested titles);
|
12
|
+
* `MediaWiki#get_h` to receive hash of `title => page object` (useful
|
13
|
+
to know which titles have been no pages for and better control on
|
14
|
+
redirects).
|
15
|
+
|
16
|
+
Fixes:
|
17
|
+
* `Image` node equality fixed.
|
18
|
+
|
3
19
|
## 0.2.2 (2016-01-03)
|
4
20
|
|
5
21
|
Fixes:
|
@@ -22,7 +38,7 @@ Fixes:
|
|
22
38
|
50 in previous versions);
|
23
39
|
* `bin/infoboxer` console added for quick experimenting;
|
24
40
|
* `Template#to_h` added for quick information extraction;
|
25
|
-
* many small bugfixes and
|
41
|
+
* many small bugfixes and enchancements.
|
26
42
|
|
27
43
|
## 0.1.2.1 (2015-12-04)
|
28
44
|
|
data/Parsing.md
CHANGED
@@ -19,7 +19,7 @@ Here's what I've came with:
|
|
19
19
|
* Long formatting like templates can span several lines, so we continue
|
20
20
|
scan through next lines, till template end (it means we are still in
|
21
21
|
same paragraph!), it's "normal inline scan", or just "inline scan"
|
22
|
-
* Some __inline__ formatting (like
|
22
|
+
* Some __inline__ formatting (like `<ref>`'s) and special formatting,
|
23
23
|
like table cells, can have other paragraphs inside! (But it's still
|
24
24
|
"inline" formatting, because when <ref> is ended, the same paragraph
|
25
25
|
is continued -- while showing it in Wikipedia, ref will leave a small
|
data/README.md
CHANGED
@@ -147,10 +147,6 @@ they may still work for you.
|
|
147
147
|
|
148
148
|
* [Wiki](https://github.com/molybdenum-99/infoboxer/wiki)
|
149
149
|
* [API Docs](http://www.rubydoc.info/gems/infoboxer)
|
150
|
-
* **NB**: ↑ this is "current version" link, but RubyDoc.info unfortunately
|
151
|
-
sometimes fails to update it to really _current_; in case you feel
|
152
|
-
something seriously underdocumented, please-please look at
|
153
|
-
[0.2.0 docs](http://www.rubydoc.info/gems/infoboxer/0.2.0).
|
154
150
|
* [Contributing](https://github.com/molybdenum-99/infoboxer/wiki/Contributing)
|
155
151
|
* [Roadmap](https://github.com/molybdenum-99/infoboxer/wiki/Roadmap)
|
156
152
|
|
data/bin/infoboxer
CHANGED
@@ -9,7 +9,7 @@ require 'optparse'
|
|
9
9
|
wiki_url = nil
|
10
10
|
|
11
11
|
OptionParser.new do |opts|
|
12
|
-
opts.banner = "Usage:
|
12
|
+
opts.banner = "Usage: infoboxer [-w wiki_api_url]"
|
13
13
|
|
14
14
|
opts.on("-w", "--wiki WIKI_API_URL",
|
15
15
|
"Make wiki by WIKI_API_URL a default wiki, and use it with just get('Pagename')") do |w|
|
@@ -335,6 +335,26 @@ module Infoboxer
|
|
335
335
|
end
|
336
336
|
end
|
337
337
|
|
338
|
+
# Prononciation/lang templates - are frequent in article abstracts
|
339
|
+
# Doint it dirty, but useful, for now:
|
340
|
+
template 'Lang', match: /^lang-(\w{2,3})$/i do
|
341
|
+
def children
|
342
|
+
fetch('1')
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
346
|
+
template 'IPAc', match: /^IPAc[12]?-(\w{2,3})$/i do
|
347
|
+
def text
|
348
|
+
unnamed_variables.text
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
template 'IPA', match: /^IPA-(\w{2,3})$/i do
|
353
|
+
def text
|
354
|
+
fetch('1').text
|
355
|
+
end
|
356
|
+
end
|
357
|
+
|
338
358
|
# TODO: extremely popular:
|
339
359
|
# Str left - https://en.wikipedia.org/wiki/Category:String_manipulation_templates
|
340
360
|
# Rnd - https://en.wikipedia.org/wiki/Category:Mathematical_function_templates
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'mediawiktory'
|
2
|
+
|
3
|
+
# FIXME: looks like pretty "core" functionality and should moved to mediawiktory itself
|
4
|
+
|
5
|
+
class MediaWiktory::Page
|
6
|
+
attr_writer :queried_title
|
7
|
+
|
8
|
+
def queried_title
|
9
|
+
@queried_title || title
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class MediaWiktory::Query::Response
|
14
|
+
alias_method :old_initialize, :initialize
|
15
|
+
def initialize(*arg)
|
16
|
+
old_initialize(*arg)
|
17
|
+
|
18
|
+
if raw.query.redirects
|
19
|
+
raw.query.redirects.each do |redirect|
|
20
|
+
pg = @pages.detect{|p| p.title == redirect.to} or next
|
21
|
+
pg.queried_title = redirect.from
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/lib/infoboxer/media_wiki.rb
CHANGED
@@ -4,6 +4,7 @@
|
|
4
4
|
require 'mediawiktory'
|
5
5
|
require 'addressable/uri'
|
6
6
|
|
7
|
+
require_relative 'media_wiki/mediawiktory_patch'
|
7
8
|
require_relative 'media_wiki/traits'
|
8
9
|
require_relative 'media_wiki/page'
|
9
10
|
|
@@ -65,7 +66,8 @@ module Infoboxer
|
|
65
66
|
prop(revisions: {prop: :content}, info: {prop: :url}).
|
66
67
|
redirects(true). # FIXME: should be done transparently by MediaWiktory?
|
67
68
|
perform.pages
|
68
|
-
}.inject(:concat) # somehow flatten(1) fails!
|
69
|
+
}.inject(:concat). # somehow flatten(1) fails!
|
70
|
+
sort_by{|page| titles.index(page.queried_title) || 1_000}
|
69
71
|
end
|
70
72
|
|
71
73
|
# Receive list of parsed MediaWiki pages for list of titles provided.
|
@@ -104,6 +106,27 @@ module Infoboxer
|
|
104
106
|
titles.count == 1 ? pages.first : Tree::Nodes[*pages]
|
105
107
|
end
|
106
108
|
|
109
|
+
# Same as {#get}, but returns hash of {requested title => page}.
|
110
|
+
#
|
111
|
+
# Useful quirks:
|
112
|
+
# * when requested page not existing, key will be still present in
|
113
|
+
# resulting hash (value will be `nil`);
|
114
|
+
# * when requested page redirects to another, key will still be the
|
115
|
+
# requested title. For ex., `get_h('Einstein')` will return hash
|
116
|
+
# with key 'Einstein' and page titled 'Albert Einstein'.
|
117
|
+
#
|
118
|
+
# This allows you to be in full control of what pages of large list
|
119
|
+
# you've received.
|
120
|
+
#
|
121
|
+
# @return [Hash<String, Page>]
|
122
|
+
#
|
123
|
+
def get_h(*titles)
|
124
|
+
pages = [*get(*titles)]
|
125
|
+
titles.map{|t|
|
126
|
+
[t, pages.detect{|p| p.source.queried_title == t}]
|
127
|
+
}.to_h
|
128
|
+
end
|
129
|
+
|
107
130
|
# Receive list of parsed MediaWiki pages from specified category.
|
108
131
|
#
|
109
132
|
# **NB**: currently, this API **always** fetches all pages from
|
@@ -151,9 +151,10 @@ module Infoboxer
|
|
151
151
|
# See {Sections parent module} documentation for details.
|
152
152
|
class Section < Tree::Compound
|
153
153
|
def initialize(heading, children = Tree::Nodes[])
|
154
|
-
# no super: we don't wont to
|
154
|
+
# no super: we don't wont to rewrite children's parent
|
155
155
|
@children = Tree::Nodes[*children]
|
156
156
|
@heading = heading
|
157
|
+
@params = {level: heading.level, heading: heading.text.strip}
|
157
158
|
end
|
158
159
|
|
159
160
|
# Section's heading.
|
@@ -173,6 +174,12 @@ module Infoboxer
|
|
173
174
|
end
|
174
175
|
|
175
176
|
include Container
|
177
|
+
|
178
|
+
private
|
179
|
+
|
180
|
+
#def show_params
|
181
|
+
#super(level: heading.level, heading: heading.text)
|
182
|
+
#end
|
176
183
|
end
|
177
184
|
end
|
178
185
|
end
|
data/lib/infoboxer/tree/image.rb
CHANGED
data/lib/infoboxer/tree/node.rb
CHANGED
@@ -134,7 +134,8 @@ module Infoboxer
|
|
134
134
|
MAX_CHARS = 30
|
135
135
|
|
136
136
|
def shorten_text
|
137
|
-
|
137
|
+
txt = text_.sub(/^([^\n]+)\n.+$/m, '\1...')
|
138
|
+
txt.length > MAX_CHARS ? txt[0..MAX_CHARS] + '...' : txt
|
138
139
|
end
|
139
140
|
|
140
141
|
def clean_class
|
@@ -158,7 +159,7 @@ module Infoboxer
|
|
158
159
|
end
|
159
160
|
|
160
161
|
def _eq(other)
|
161
|
-
fail(NotImplementedError, "#_eq should be defined in subclasses")
|
162
|
+
fail(NotImplementedError, "#_eq should be defined in subclasses (called for #{self.class})")
|
162
163
|
end
|
163
164
|
|
164
165
|
def decode(str)
|
data/lib/infoboxer/version.rb
CHANGED
data/lib/infoboxer.rb
CHANGED
@@ -69,11 +69,13 @@ module Infoboxer
|
|
69
69
|
species: 'species.wikimedia.org',
|
70
70
|
}
|
71
71
|
|
72
|
+
WIKIS = {}
|
73
|
+
|
72
74
|
public
|
73
75
|
|
74
76
|
# Includeable version of {Infoboxer.wiki}
|
75
77
|
def wiki(api_url, options = {})
|
76
|
-
MediaWiki.new(api_url, options || {})
|
78
|
+
WIKIS[api_url] ||= MediaWiki.new(api_url, options || {})
|
77
79
|
end
|
78
80
|
|
79
81
|
class << self
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: infoboxer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Shepelev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-03-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -121,6 +121,7 @@ files:
|
|
121
121
|
- lib/infoboxer/core_ext.rb
|
122
122
|
- lib/infoboxer/definitions/en.wikipedia.org.rb
|
123
123
|
- lib/infoboxer/media_wiki.rb
|
124
|
+
- lib/infoboxer/media_wiki/mediawiktory_patch.rb
|
124
125
|
- lib/infoboxer/media_wiki/page.rb
|
125
126
|
- lib/infoboxer/media_wiki/traits.rb
|
126
127
|
- lib/infoboxer/navigation.rb
|