infoboxer 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -1
- data/Parsing.md +1 -1
- data/README.md +0 -4
- data/bin/infoboxer +1 -1
- data/lib/infoboxer/definitions/en.wikipedia.org.rb +20 -0
- data/lib/infoboxer/media_wiki/mediawiktory_patch.rb +25 -0
- data/lib/infoboxer/media_wiki.rb +24 -1
- data/lib/infoboxer/navigation/sections.rb +8 -1
- data/lib/infoboxer/tree/image.rb +6 -0
- data/lib/infoboxer/tree/node.rb +3 -2
- data/lib/infoboxer/version.rb +1 -1
- data/lib/infoboxer.rb +3 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fcf940ddedd92a04eb2e555bc16c5ef765a3e8d3
|
4
|
+
data.tar.gz: c8e8c4976ea2e8e023c79c26bd221ff5f22b5a32
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d2e54fe4fe9d1a7714f6aa5b0d47b2957a0b0a00b1d65ca9ea4f925efa2dafb364d354235f3c19990f812293ee6bfe6e86602b019fb71eff4b6a67b96d6bf82a
|
7
|
+
data.tar.gz: d3ea47fcedb10473abc5083b750b4c3804532f8f6d0278a51a2d270913a899f3897c35fee9099a5aa08bf1dea8dfaa7292fc5b06cc350671cbde24625936d956
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,21 @@
|
|
1
1
|
# Infoboxer's change log
|
2
2
|
|
3
|
+
## 0.2.3 (2016-03-02)
|
4
|
+
|
5
|
+
New and enchanced features:
|
6
|
+
* more useful templates (quick-n-dirty sometimes, but prettier output for
|
7
|
+
typical cases);
|
8
|
+
* Caching of wikiobjects, so for several calls to `Infoboxer.wp` it would
|
9
|
+
be only one API call for wiki metainformation;
|
10
|
+
* `MediaWiki#get` now preserves order of pages (page list would be in
|
11
|
+
the same order as requested titles);
|
12
|
+
* `MediaWiki#get_h` to receive hash of `title => page object` (useful
|
13
|
+
to know which titles have been no pages for and better control on
|
14
|
+
redirects).
|
15
|
+
|
16
|
+
Fixes:
|
17
|
+
* `Image` node equality fixed.
|
18
|
+
|
3
19
|
## 0.2.2 (2016-01-03)
|
4
20
|
|
5
21
|
Fixes:
|
@@ -22,7 +38,7 @@ Fixes:
|
|
22
38
|
50 in previous versions);
|
23
39
|
* `bin/infoboxer` console added for quick experimenting;
|
24
40
|
* `Template#to_h` added for quick information extraction;
|
25
|
-
* many small bugfixes and
|
41
|
+
* many small bugfixes and enchancements.
|
26
42
|
|
27
43
|
## 0.1.2.1 (2015-12-04)
|
28
44
|
|
data/Parsing.md
CHANGED
@@ -19,7 +19,7 @@ Here's what I've came with:
|
|
19
19
|
* Long formatting like templates can span several lines, so we continue
|
20
20
|
scan through next lines, till template end (it means we are still in
|
21
21
|
same paragraph!), it's "normal inline scan", or just "inline scan"
|
22
|
-
* Some __inline__ formatting (like
|
22
|
+
* Some __inline__ formatting (like `<ref>`'s) and special formatting,
|
23
23
|
like table cells, can have other paragraphs inside! (But it's still
|
24
24
|
"inline" formatting, because when <ref> is ended, the same paragraph
|
25
25
|
is continued -- while showing it in Wikipedia, ref will leave a small
|
data/README.md
CHANGED
@@ -147,10 +147,6 @@ they may still work for you.
|
|
147
147
|
|
148
148
|
* [Wiki](https://github.com/molybdenum-99/infoboxer/wiki)
|
149
149
|
* [API Docs](http://www.rubydoc.info/gems/infoboxer)
|
150
|
-
* **NB**: ↑ this is "current version" link, but RubyDoc.info unfortunately
|
151
|
-
sometimes fails to update it to really _current_; in case you feel
|
152
|
-
something seriously underdocumented, please-please look at
|
153
|
-
[0.2.0 docs](http://www.rubydoc.info/gems/infoboxer/0.2.0).
|
154
150
|
* [Contributing](https://github.com/molybdenum-99/infoboxer/wiki/Contributing)
|
155
151
|
* [Roadmap](https://github.com/molybdenum-99/infoboxer/wiki/Roadmap)
|
156
152
|
|
data/bin/infoboxer
CHANGED
@@ -9,7 +9,7 @@ require 'optparse'
|
|
9
9
|
wiki_url = nil
|
10
10
|
|
11
11
|
OptionParser.new do |opts|
|
12
|
-
opts.banner = "Usage:
|
12
|
+
opts.banner = "Usage: infoboxer [-w wiki_api_url]"
|
13
13
|
|
14
14
|
opts.on("-w", "--wiki WIKI_API_URL",
|
15
15
|
"Make wiki by WIKI_API_URL a default wiki, and use it with just get('Pagename')") do |w|
|
@@ -335,6 +335,26 @@ module Infoboxer
|
|
335
335
|
end
|
336
336
|
end
|
337
337
|
|
338
|
+
# Prononciation/lang templates - are frequent in article abstracts
|
339
|
+
# Doint it dirty, but useful, for now:
|
340
|
+
template 'Lang', match: /^lang-(\w{2,3})$/i do
|
341
|
+
def children
|
342
|
+
fetch('1')
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
346
|
+
template 'IPAc', match: /^IPAc[12]?-(\w{2,3})$/i do
|
347
|
+
def text
|
348
|
+
unnamed_variables.text
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
template 'IPA', match: /^IPA-(\w{2,3})$/i do
|
353
|
+
def text
|
354
|
+
fetch('1').text
|
355
|
+
end
|
356
|
+
end
|
357
|
+
|
338
358
|
# TODO: extremely popular:
|
339
359
|
# Str left - https://en.wikipedia.org/wiki/Category:String_manipulation_templates
|
340
360
|
# Rnd - https://en.wikipedia.org/wiki/Category:Mathematical_function_templates
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'mediawiktory'
|
2
|
+
|
3
|
+
# FIXME: looks like pretty "core" functionality and should moved to mediawiktory itself
|
4
|
+
|
5
|
+
class MediaWiktory::Page
|
6
|
+
attr_writer :queried_title
|
7
|
+
|
8
|
+
def queried_title
|
9
|
+
@queried_title || title
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class MediaWiktory::Query::Response
|
14
|
+
alias_method :old_initialize, :initialize
|
15
|
+
def initialize(*arg)
|
16
|
+
old_initialize(*arg)
|
17
|
+
|
18
|
+
if raw.query.redirects
|
19
|
+
raw.query.redirects.each do |redirect|
|
20
|
+
pg = @pages.detect{|p| p.title == redirect.to} or next
|
21
|
+
pg.queried_title = redirect.from
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/lib/infoboxer/media_wiki.rb
CHANGED
@@ -4,6 +4,7 @@
|
|
4
4
|
require 'mediawiktory'
|
5
5
|
require 'addressable/uri'
|
6
6
|
|
7
|
+
require_relative 'media_wiki/mediawiktory_patch'
|
7
8
|
require_relative 'media_wiki/traits'
|
8
9
|
require_relative 'media_wiki/page'
|
9
10
|
|
@@ -65,7 +66,8 @@ module Infoboxer
|
|
65
66
|
prop(revisions: {prop: :content}, info: {prop: :url}).
|
66
67
|
redirects(true). # FIXME: should be done transparently by MediaWiktory?
|
67
68
|
perform.pages
|
68
|
-
}.inject(:concat) # somehow flatten(1) fails!
|
69
|
+
}.inject(:concat). # somehow flatten(1) fails!
|
70
|
+
sort_by{|page| titles.index(page.queried_title) || 1_000}
|
69
71
|
end
|
70
72
|
|
71
73
|
# Receive list of parsed MediaWiki pages for list of titles provided.
|
@@ -104,6 +106,27 @@ module Infoboxer
|
|
104
106
|
titles.count == 1 ? pages.first : Tree::Nodes[*pages]
|
105
107
|
end
|
106
108
|
|
109
|
+
# Same as {#get}, but returns hash of {requested title => page}.
|
110
|
+
#
|
111
|
+
# Useful quirks:
|
112
|
+
# * when requested page not existing, key will be still present in
|
113
|
+
# resulting hash (value will be `nil`);
|
114
|
+
# * when requested page redirects to another, key will still be the
|
115
|
+
# requested title. For ex., `get_h('Einstein')` will return hash
|
116
|
+
# with key 'Einstein' and page titled 'Albert Einstein'.
|
117
|
+
#
|
118
|
+
# This allows you to be in full control of what pages of large list
|
119
|
+
# you've received.
|
120
|
+
#
|
121
|
+
# @return [Hash<String, Page>]
|
122
|
+
#
|
123
|
+
def get_h(*titles)
|
124
|
+
pages = [*get(*titles)]
|
125
|
+
titles.map{|t|
|
126
|
+
[t, pages.detect{|p| p.source.queried_title == t}]
|
127
|
+
}.to_h
|
128
|
+
end
|
129
|
+
|
107
130
|
# Receive list of parsed MediaWiki pages from specified category.
|
108
131
|
#
|
109
132
|
# **NB**: currently, this API **always** fetches all pages from
|
@@ -151,9 +151,10 @@ module Infoboxer
|
|
151
151
|
# See {Sections parent module} documentation for details.
|
152
152
|
class Section < Tree::Compound
|
153
153
|
def initialize(heading, children = Tree::Nodes[])
|
154
|
-
# no super: we don't wont to
|
154
|
+
# no super: we don't wont to rewrite children's parent
|
155
155
|
@children = Tree::Nodes[*children]
|
156
156
|
@heading = heading
|
157
|
+
@params = {level: heading.level, heading: heading.text.strip}
|
157
158
|
end
|
158
159
|
|
159
160
|
# Section's heading.
|
@@ -173,6 +174,12 @@ module Infoboxer
|
|
173
174
|
end
|
174
175
|
|
175
176
|
include Container
|
177
|
+
|
178
|
+
private
|
179
|
+
|
180
|
+
#def show_params
|
181
|
+
#super(level: heading.level, heading: heading.text)
|
182
|
+
#end
|
176
183
|
end
|
177
184
|
end
|
178
185
|
end
|
data/lib/infoboxer/tree/image.rb
CHANGED
data/lib/infoboxer/tree/node.rb
CHANGED
@@ -134,7 +134,8 @@ module Infoboxer
|
|
134
134
|
MAX_CHARS = 30
|
135
135
|
|
136
136
|
def shorten_text
|
137
|
-
|
137
|
+
txt = text_.sub(/^([^\n]+)\n.+$/m, '\1...')
|
138
|
+
txt.length > MAX_CHARS ? txt[0..MAX_CHARS] + '...' : txt
|
138
139
|
end
|
139
140
|
|
140
141
|
def clean_class
|
@@ -158,7 +159,7 @@ module Infoboxer
|
|
158
159
|
end
|
159
160
|
|
160
161
|
def _eq(other)
|
161
|
-
fail(NotImplementedError, "#_eq should be defined in subclasses")
|
162
|
+
fail(NotImplementedError, "#_eq should be defined in subclasses (called for #{self.class})")
|
162
163
|
end
|
163
164
|
|
164
165
|
def decode(str)
|
data/lib/infoboxer/version.rb
CHANGED
data/lib/infoboxer.rb
CHANGED
@@ -69,11 +69,13 @@ module Infoboxer
|
|
69
69
|
species: 'species.wikimedia.org',
|
70
70
|
}
|
71
71
|
|
72
|
+
WIKIS = {}
|
73
|
+
|
72
74
|
public
|
73
75
|
|
74
76
|
# Includeable version of {Infoboxer.wiki}
|
75
77
|
def wiki(api_url, options = {})
|
76
|
-
MediaWiki.new(api_url, options || {})
|
78
|
+
WIKIS[api_url] ||= MediaWiki.new(api_url, options || {})
|
77
79
|
end
|
78
80
|
|
79
81
|
class << self
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: infoboxer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Shepelev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-03-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -121,6 +121,7 @@ files:
|
|
121
121
|
- lib/infoboxer/core_ext.rb
|
122
122
|
- lib/infoboxer/definitions/en.wikipedia.org.rb
|
123
123
|
- lib/infoboxer/media_wiki.rb
|
124
|
+
- lib/infoboxer/media_wiki/mediawiktory_patch.rb
|
124
125
|
- lib/infoboxer/media_wiki/page.rb
|
125
126
|
- lib/infoboxer/media_wiki/traits.rb
|
126
127
|
- lib/infoboxer/navigation.rb
|