reality 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.dokaz +1 -0
- data/LICENSE.txt +22 -0
- data/README.md +538 -66
- data/bin/reality +9 -0
- data/config/demo.yml +3 -0
- data/data/wikidata-predicates.json +1 -0
- data/data/wikidata-predicates.yaml +2089 -0
- data/lib/reality.rb +26 -7
- data/lib/reality/config.rb +46 -0
- data/lib/reality/definitions/dictionaries.rb +67 -0
- data/lib/reality/definitions/helpers.rb +34 -0
- data/lib/reality/definitions/wikidata.rb +105 -0
- data/lib/reality/definitions/wikipedia_character.rb +17 -0
- data/lib/reality/definitions/wikipedia_city.rb +19 -0
- data/lib/reality/definitions/wikipedia_continent.rb +21 -0
- data/lib/reality/definitions/wikipedia_country.rb +23 -0
- data/lib/reality/definitions/wikipedia_musical_artist.rb +15 -0
- data/lib/reality/definitions/wikipedia_person.rb +17 -0
- data/lib/reality/entity.rb +152 -0
- data/lib/reality/entity/coercion.rb +76 -0
- data/lib/reality/entity/wikidata_predicates.rb +31 -0
- data/lib/reality/entity/wikipedia_type.rb +73 -0
- data/lib/reality/extras/geonames.rb +29 -0
- data/lib/reality/extras/open_weather_map.rb +63 -0
- data/lib/reality/geo.rb +122 -0
- data/lib/reality/infoboxer_templates.rb +8 -0
- data/lib/reality/list.rb +95 -0
- data/lib/reality/measure.rb +18 -12
- data/lib/reality/measure/unit.rb +5 -1
- data/lib/reality/methods.rb +16 -0
- data/lib/reality/pretty_inspect.rb +11 -0
- data/lib/reality/refinements.rb +26 -0
- data/lib/reality/shortcuts.rb +11 -0
- data/lib/reality/tz_offset.rb +64 -0
- data/lib/reality/util/formatters.rb +35 -0
- data/lib/reality/util/parsers.rb +53 -0
- data/lib/reality/version.rb +6 -0
- data/lib/reality/wikidata.rb +310 -0
- data/reality.gemspec +12 -3
- data/script/extract_wikidata_properties.rb +23 -0
- data/script/lib/nokogiri_more.rb +175 -0
- metadata +137 -7
- data/examples/all_countries.rb +0 -16
- data/lib/reality/country.rb +0 -283
data/reality.gemspec
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
+
require_relative 'lib/reality/version'
|
2
|
+
|
1
3
|
Gem::Specification.new do |s|
|
2
4
|
s.name = 'reality'
|
3
|
-
s.version =
|
4
|
-
s.authors = ['Victor Shepelev']
|
5
|
+
s.version = Reality::VERSION
|
6
|
+
s.authors = ['Victor Shepelev and contributors']
|
5
7
|
s.email = 'zverok.offline@gmail.com'
|
6
8
|
s.homepage = 'https://github.com/molybdenum-99/reality'
|
7
9
|
|
@@ -28,7 +30,14 @@ Gem::Specification.new do |s|
|
|
28
30
|
s.bindir = 'bin'
|
29
31
|
s.executables << 'reality'
|
30
32
|
|
31
|
-
s.add_dependency 'infoboxer', '>= 0.2.
|
33
|
+
s.add_dependency 'infoboxer', '>= 0.2.3'
|
34
|
+
s.add_dependency 'hashie'
|
35
|
+
s.add_dependency 'open-weather'
|
36
|
+
s.add_dependency 'timezone'
|
37
|
+
s.add_dependency 'geokit'
|
38
|
+
s.add_dependency 'tzinfo'
|
39
|
+
s.add_dependency 'ruby-sun-times'
|
40
|
+
s.add_dependency 'time_boots'
|
32
41
|
|
33
42
|
s.add_development_dependency 'rspec', '~> 3'
|
34
43
|
s.add_development_dependency 'rspec-its', '~> 1'
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'bundler/setup'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'progress_bar/core_ext/enumerable_with_progress'
|
5
|
+
require 'nokogiri'
|
6
|
+
require 'json'
|
7
|
+
require_relative 'lib/nokogiri_more'
|
8
|
+
|
9
|
+
start = Nokogiri::HTML(open('https://www.wikidata.org/wiki/Wikidata:List_of_properties'))
|
10
|
+
res = start.
|
11
|
+
search('th:contains("By number")').first.parent.search('td > a').
|
12
|
+
with_progress.map{|a|
|
13
|
+
name = a.text
|
14
|
+
data = Nokogiri::HTML(open('https://www.wikidata.org' + a.href).read).
|
15
|
+
search('tr').
|
16
|
+
map{|tr| tr.search('td').map(&:text)}.
|
17
|
+
map{|tds| [tds[0], tds[1]]}.
|
18
|
+
map(&:reverse).
|
19
|
+
reject{|id, name| id.nil? || name.nil?}.
|
20
|
+
to_h
|
21
|
+
}.inject(&:merge)
|
22
|
+
|
23
|
+
File.write 'data/wikidata-predicates.json', res.to_json
|
@@ -0,0 +1,175 @@
|
|
1
|
+
require 'naught'
|
2
|
+
require 'addressable/uri'
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
module More
|
6
|
+
module NodeOnlyFor
|
7
|
+
def only_for!(selector)
|
8
|
+
matches?(selector) or fail(ArgumentError, "Doesn't works for nodes other than '#{selector}'")
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
module NodeHref
|
13
|
+
include NodeOnlyFor
|
14
|
+
|
15
|
+
def href
|
16
|
+
#only_for!('a[href]')
|
17
|
+
document.absolute(self['href'])
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
module DocumentURI
|
22
|
+
def uri
|
23
|
+
url ? Addressable::URI.parse(url) : nil
|
24
|
+
end
|
25
|
+
|
26
|
+
def absolute(link)
|
27
|
+
if uri
|
28
|
+
(uri + link).to_s
|
29
|
+
else
|
30
|
+
Addressable::URI.parse(link).to_s # double check it's really a link
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
module NodeChildrenGroups
|
36
|
+
def children_groups(*selectors)
|
37
|
+
groups = []
|
38
|
+
flat = children.select{|node| selectors.any?{|s| node.matches?(s)}}
|
39
|
+
while !flat.empty?
|
40
|
+
groups << make_group(flat, selectors)
|
41
|
+
end
|
42
|
+
groups
|
43
|
+
end
|
44
|
+
|
45
|
+
include NodeOnlyFor
|
46
|
+
|
47
|
+
def each_term
|
48
|
+
only_for!('dl')
|
49
|
+
children_groups('dt', 'dd')
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
def make_group(flat, selectors)
|
55
|
+
sel = selectors.dup
|
56
|
+
group = [[]]
|
57
|
+
while !flat.empty?
|
58
|
+
if flat.first.matches?(sel.first)
|
59
|
+
group.last << flat.shift
|
60
|
+
elsif sel.size > 1 && flat.first.matches?(sel[1])
|
61
|
+
sel.shift
|
62
|
+
group << []
|
63
|
+
group.last << flat.shift
|
64
|
+
else
|
65
|
+
break
|
66
|
+
end
|
67
|
+
end
|
68
|
+
group
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
module NodeText
|
73
|
+
def text_
|
74
|
+
text.strip
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
NodeNaught = Naught.build do |config|
|
79
|
+
config.black_hole
|
80
|
+
|
81
|
+
def tap # so you can just at?(selector).tap{|node| - and never be here, if it's not found
|
82
|
+
self
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
class NodeNotFound < RuntimeError
|
87
|
+
end
|
88
|
+
|
89
|
+
module NodeBangMethods
|
90
|
+
def at!(selector)
|
91
|
+
bang!(at(selector), selector)
|
92
|
+
end
|
93
|
+
|
94
|
+
def at_css!(selector)
|
95
|
+
bang!(at_css(selector), selector)
|
96
|
+
end
|
97
|
+
|
98
|
+
def at_xpath!(selector)
|
99
|
+
bang!(at_xpath(selector), selector)
|
100
|
+
end
|
101
|
+
|
102
|
+
def find_child!(selector)
|
103
|
+
bang!(find_child(selector), selector)
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
|
108
|
+
def bang!(node, selector)
|
109
|
+
if node
|
110
|
+
node
|
111
|
+
else
|
112
|
+
no_node!(selector)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def no_node!(selector)
|
117
|
+
#case Nokogiri::More::Config.bang_mode
|
118
|
+
#when :fail
|
119
|
+
fail NodeNotFound, "#{name} have no node at #{selector}"
|
120
|
+
#when :naught
|
121
|
+
#NodeNaught.new
|
122
|
+
#when :log
|
123
|
+
#NodeNaught.new
|
124
|
+
#end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
module NodeQuestMethods
|
129
|
+
def at?(selector)
|
130
|
+
at(selector) || NodeNaught.new
|
131
|
+
end
|
132
|
+
|
133
|
+
def at_css?(selector)
|
134
|
+
at_css(selector) || NodeNaught.new
|
135
|
+
end
|
136
|
+
|
137
|
+
def at_xpath?(selector)
|
138
|
+
at_xpath(selector) || NodeNaught.new
|
139
|
+
end
|
140
|
+
|
141
|
+
def find_child?(selector)
|
142
|
+
find_child(selector) || NodeNaught.new
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
module NodeFindChildren
|
147
|
+
def find_child(selector)
|
148
|
+
children.filter(selector).first
|
149
|
+
end
|
150
|
+
|
151
|
+
def find_children(selector)
|
152
|
+
children.filter(selector)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
# now let's do evil
|
158
|
+
class ::Class
|
159
|
+
public :include
|
160
|
+
end
|
161
|
+
|
162
|
+
Nokogiri::XML::Document.include More::DocumentURI
|
163
|
+
Nokogiri::XML::Node.include More::NodeText
|
164
|
+
Nokogiri::XML::Node.include More::NodeHref
|
165
|
+
Nokogiri::XML::Node.include More::NodeChildrenGroups
|
166
|
+
|
167
|
+
Nokogiri::XML::Node.include More::NodeBangMethods
|
168
|
+
Nokogiri::XML::Node.include More::NodeQuestMethods
|
169
|
+
Nokogiri::XML::Node.include More::NodeFindChildren
|
170
|
+
|
171
|
+
Nokogiri::XML::NodeSet.include More::NodeBangMethods
|
172
|
+
Nokogiri::XML::NodeSet.include More::NodeQuestMethods
|
173
|
+
Nokogiri::XML::NodeSet.include More::NodeFindChildren
|
174
|
+
end
|
175
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: reality
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
- Victor Shepelev
|
7
|
+
- Victor Shepelev and contributors
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-03-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: infoboxer
|
@@ -16,14 +16,112 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.2.
|
19
|
+
version: 0.2.3
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.2.
|
26
|
+
version: 0.2.3
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: hashie
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: open-weather
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: timezone
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: geokit
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: tzinfo
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: ruby-sun-times
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: time_boots
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
27
125
|
- !ruby/object:Gem::Dependency
|
28
126
|
name: rspec
|
29
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -90,26 +188,58 @@ executables:
|
|
90
188
|
extensions: []
|
91
189
|
extra_rdoc_files: []
|
92
190
|
files:
|
191
|
+
- ".dokaz"
|
192
|
+
- LICENSE.txt
|
93
193
|
- README.md
|
94
194
|
- bin/reality
|
195
|
+
- config/demo.yml
|
95
196
|
- data/countries.yaml
|
96
197
|
- data/country_lists.txt
|
97
198
|
- data/country_orgs.yaml
|
98
199
|
- data/infoboxes.txt
|
99
200
|
- data/infoboxes_freq.txt
|
100
201
|
- data/infoboxes_freq_sorted.txt
|
101
|
-
-
|
202
|
+
- data/wikidata-predicates.json
|
203
|
+
- data/wikidata-predicates.yaml
|
102
204
|
- lib/reality.rb
|
103
|
-
- lib/reality/
|
205
|
+
- lib/reality/config.rb
|
206
|
+
- lib/reality/definitions/dictionaries.rb
|
207
|
+
- lib/reality/definitions/helpers.rb
|
208
|
+
- lib/reality/definitions/wikidata.rb
|
209
|
+
- lib/reality/definitions/wikipedia_character.rb
|
210
|
+
- lib/reality/definitions/wikipedia_city.rb
|
211
|
+
- lib/reality/definitions/wikipedia_continent.rb
|
212
|
+
- lib/reality/definitions/wikipedia_country.rb
|
213
|
+
- lib/reality/definitions/wikipedia_musical_artist.rb
|
214
|
+
- lib/reality/definitions/wikipedia_person.rb
|
215
|
+
- lib/reality/entity.rb
|
216
|
+
- lib/reality/entity/coercion.rb
|
217
|
+
- lib/reality/entity/wikidata_predicates.rb
|
218
|
+
- lib/reality/entity/wikipedia_type.rb
|
219
|
+
- lib/reality/extras/geonames.rb
|
220
|
+
- lib/reality/extras/open_weather_map.rb
|
221
|
+
- lib/reality/geo.rb
|
104
222
|
- lib/reality/infoboxer_templates.rb
|
223
|
+
- lib/reality/list.rb
|
105
224
|
- lib/reality/measure.rb
|
106
225
|
- lib/reality/measure/unit.rb
|
226
|
+
- lib/reality/methods.rb
|
227
|
+
- lib/reality/pretty_inspect.rb
|
228
|
+
- lib/reality/refinements.rb
|
229
|
+
- lib/reality/shortcuts.rb
|
230
|
+
- lib/reality/tz_offset.rb
|
231
|
+
- lib/reality/util/formatters.rb
|
232
|
+
- lib/reality/util/parsers.rb
|
233
|
+
- lib/reality/version.rb
|
234
|
+
- lib/reality/wikidata.rb
|
107
235
|
- reality.gemspec
|
108
236
|
- script/extract_all_infoboxes.rb
|
109
237
|
- script/extract_countries.rb
|
110
238
|
- script/extract_country_categories.rb
|
111
239
|
- script/extract_infobox_frequency.rb
|
240
|
+
- script/extract_wikidata_properties.rb
|
112
241
|
- script/lib/faraday_naive_cache.rb
|
242
|
+
- script/lib/nokogiri_more.rb
|
113
243
|
- script/out/categories.txt
|
114
244
|
homepage: https://github.com/molybdenum-99/reality
|
115
245
|
licenses:
|
data/examples/all_countries.rb
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
require 'rubygems'
|
3
|
-
require 'bundler/setup'
|
4
|
-
$:.unshift 'lib'
|
5
|
-
require 'reality'
|
6
|
-
require 'fileutils'
|
7
|
-
require 'yaml'
|
8
|
-
|
9
|
-
FileUtils.mkdir_p 'examples/output'
|
10
|
-
|
11
|
-
start = Time.now
|
12
|
-
|
13
|
-
File.write 'examples/output/countries.yaml',
|
14
|
-
Reality.countries.to_a.sort_by(&:name).map(&:to_h).to_yaml
|
15
|
-
|
16
|
-
puts "Finished in %i seconds" % (Time.now - start)
|
data/lib/reality/country.rb
DELETED
@@ -1,283 +0,0 @@
|
|
1
|
-
# NB: all of this is early drafts, so may look naive and sub-optimal.
|
2
|
-
# Just stay tuned!
|
3
|
-
|
4
|
-
module Reality
|
5
|
-
class Country
|
6
|
-
class List
|
7
|
-
def initialize(*names)
|
8
|
-
@names = names
|
9
|
-
end
|
10
|
-
|
11
|
-
def count
|
12
|
-
@names.count
|
13
|
-
end
|
14
|
-
|
15
|
-
def first(n = nil)
|
16
|
-
res = get(*@names.first(n || 1))
|
17
|
-
n ? res : res.first
|
18
|
-
end
|
19
|
-
|
20
|
-
def last(n = nil)
|
21
|
-
res = get(*@names.last(n || 1))
|
22
|
-
n ? res : res.first
|
23
|
-
end
|
24
|
-
|
25
|
-
def sample(n = nil)
|
26
|
-
res = get(*@names.sample(n || 1))
|
27
|
-
n ? res : res.first
|
28
|
-
end
|
29
|
-
|
30
|
-
def each(&block)
|
31
|
-
@pages = get(*@names)
|
32
|
-
@pages.each(&block)
|
33
|
-
end
|
34
|
-
|
35
|
-
include Enumerable
|
36
|
-
|
37
|
-
def to_a
|
38
|
-
get(*@names)
|
39
|
-
end
|
40
|
-
|
41
|
-
def where(**filters)
|
42
|
-
names = @names & Reality::Country.
|
43
|
-
by_continents.
|
44
|
-
select{|k, v| v == filters[:continent]}.
|
45
|
-
map(&:first)
|
46
|
-
|
47
|
-
self.class.new(*names)
|
48
|
-
end
|
49
|
-
|
50
|
-
private
|
51
|
-
|
52
|
-
def get(*names)
|
53
|
-
Reality.wp.get(*names).map{|page| Country.new(page)}
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
def initialize(page)
|
58
|
-
@page = page
|
59
|
-
end
|
60
|
-
|
61
|
-
def name
|
62
|
-
page.title
|
63
|
-
end
|
64
|
-
|
65
|
-
def long_name
|
66
|
-
infobox.fetch('conventional_long_name').text.strip
|
67
|
-
end
|
68
|
-
|
69
|
-
def capital
|
70
|
-
infobox.fetch('capital').lookup(:Wikilink).first
|
71
|
-
end
|
72
|
-
|
73
|
-
def languages
|
74
|
-
[
|
75
|
-
['Official', infobox_links('official_languages')],
|
76
|
-
[infobox.fetch('languages_type').text.sub(/ languages?$/, ''), infobox_links('languages')]
|
77
|
-
].reject{|k, v| k.empty? || v.empty?}.to_h
|
78
|
-
end
|
79
|
-
|
80
|
-
def tld
|
81
|
-
tlds.first
|
82
|
-
end
|
83
|
-
|
84
|
-
def tlds
|
85
|
-
infobox_links('cctld').map(&:link)
|
86
|
-
end
|
87
|
-
|
88
|
-
def calling_code
|
89
|
-
infobox.fetch('calling_code').text.strip
|
90
|
-
end
|
91
|
-
|
92
|
-
def utc_offset
|
93
|
-
infobox.fetch('utc_offset').text.sub('−', '-').to_i
|
94
|
-
end
|
95
|
-
|
96
|
-
def currency
|
97
|
-
currencies.first
|
98
|
-
end
|
99
|
-
|
100
|
-
def currencies
|
101
|
-
infobox_links('currency').reject{|l| l.link == 'ISO 4217'}
|
102
|
-
end
|
103
|
-
|
104
|
-
def area
|
105
|
-
Reality::Measure(infobox.fetch('area_km2').text.gsub(',', '').to_i, 'km²')
|
106
|
-
end
|
107
|
-
|
108
|
-
def population
|
109
|
-
val = %w[population_estimate population_census].map{|var|
|
110
|
-
infobox.fetch(var).text.strip
|
111
|
-
}.reject(&:empty?).first
|
112
|
-
val && Reality::Measure(parse_maybe_scaled(val), 'person')
|
113
|
-
end
|
114
|
-
|
115
|
-
def gdp_ppp
|
116
|
-
val = infobox.fetch('GDP_PPP').text.strip.sub(/^((Int|US)?\$|USD)/, '')
|
117
|
-
val.empty? ? nil : Reality::Measure(parse_scaled(val), '$')
|
118
|
-
end
|
119
|
-
|
120
|
-
def gdp_nominal
|
121
|
-
val = infobox.fetch('GDP_nominal').text.strip.sub(/^((Int|US)?\$|USD)/, '')
|
122
|
-
val.empty? ? nil : Reality::Measure(parse_scaled(val), '$')
|
123
|
-
end
|
124
|
-
|
125
|
-
alias_method :gdp, :gdp_nominal
|
126
|
-
|
127
|
-
def leaders
|
128
|
-
titles = infobox.fetch(/^leader_title\d/).map(&:text_)
|
129
|
-
names = infobox.fetch(/^leader_name\d/).map{|v| v.lookup(:Wikilink).first}
|
130
|
-
titles.zip(names).to_h
|
131
|
-
end
|
132
|
-
|
133
|
-
def continent
|
134
|
-
self.class.by_continents[page.title]
|
135
|
-
end
|
136
|
-
|
137
|
-
def organizations
|
138
|
-
organizations_list.map{|o| o[:name]}
|
139
|
-
end
|
140
|
-
|
141
|
-
def member_of?(org)
|
142
|
-
organizations_list.any?{|o| o[:name] == org || o[:abbr] == org}
|
143
|
-
end
|
144
|
-
|
145
|
-
def to_s
|
146
|
-
name
|
147
|
-
end
|
148
|
-
|
149
|
-
def inspect
|
150
|
-
"#<#{self.class}(#{name})>"
|
151
|
-
end
|
152
|
-
|
153
|
-
PROPERTIES = %i[
|
154
|
-
continent name long_name
|
155
|
-
tld tlds calling_code utc_offset
|
156
|
-
capital languages currency
|
157
|
-
leaders area population
|
158
|
-
gdp_ppp gdp_nominal
|
159
|
-
]
|
160
|
-
|
161
|
-
def to_h
|
162
|
-
#p self
|
163
|
-
PROPERTIES.
|
164
|
-
map{|prop| [prop, to_simple_type(send(prop))] }.
|
165
|
-
#reject{|prop, val| !val || val.respond_to?(:empty?) && val.empty?}.
|
166
|
-
to_h
|
167
|
-
end
|
168
|
-
|
169
|
-
class << self
|
170
|
-
def by_continents
|
171
|
-
@by_continents ||= Reality.wp.
|
172
|
-
get('List of countries by continent').
|
173
|
-
sections.first.
|
174
|
-
sections.map{|s|
|
175
|
-
continent = s.heading.text_
|
176
|
-
s.tables.first.
|
177
|
-
lookup(:Wikilink, :bold?).map(&:link).
|
178
|
-
map{|country| [country, continent]}
|
179
|
-
}.flatten(1).
|
180
|
-
to_h
|
181
|
-
end
|
182
|
-
|
183
|
-
def organizations
|
184
|
-
@organizations ||= YAML.load(File.read(File.expand_path('../../../data/country_orgs.yaml', __FILE__)))
|
185
|
-
end
|
186
|
-
end
|
187
|
-
|
188
|
-
private
|
189
|
-
|
190
|
-
attr_reader :page
|
191
|
-
|
192
|
-
def infobox
|
193
|
-
page.infobox
|
194
|
-
end
|
195
|
-
|
196
|
-
def organizations_list
|
197
|
-
catnames = page.categories.map(&:name)
|
198
|
-
self.class.organizations.select{|o| catnames.include?(o[:category])}
|
199
|
-
end
|
200
|
-
|
201
|
-
def infobox_links(varname)
|
202
|
-
src = infobox.fetch(varname)
|
203
|
-
if tmpl = src.lookup(:Template, name: /list$/).first
|
204
|
-
# values could be both inside and outside list, see India's cctld value
|
205
|
-
src = Infoboxer::Tree::Nodes[src, tmpl.variables]
|
206
|
-
end
|
207
|
-
src.lookup(:Wikilink).uniq
|
208
|
-
end
|
209
|
-
|
210
|
-
# See "Short scale": https://en.wikipedia.org/wiki/Long_and_short_scales#Comparison
|
211
|
-
SCALES = {
|
212
|
-
'million' => 1_000_000,
|
213
|
-
'billion' => 1_000_000_000,
|
214
|
-
'trillion' => 1_000_000_000_000,
|
215
|
-
'quadrillion' => 1_000_000_000_000_000,
|
216
|
-
'quintillion' => 1_000_000_000_000_000_000,
|
217
|
-
'sextillion' => 1_000_000_000_000_000_000_000,
|
218
|
-
'septillion' => 1_000_000_000_000_000_000_000_000,
|
219
|
-
}
|
220
|
-
SCALES_REGEXP = Regexp.union(*SCALES.keys)
|
221
|
-
|
222
|
-
def parse_scaled(str)
|
223
|
-
match, amount, scale = */^([0-9.,]+)[[:space:]]*(#{SCALES_REGEXP})/.match(str)
|
224
|
-
match or
|
225
|
-
fail(ArgumentError, "Unparseable scaled value #{str} for #{self}")
|
226
|
-
|
227
|
-
(amount.gsub(/[,]/, '').to_f * fetch_scale(scale)).to_i
|
228
|
-
end
|
229
|
-
|
230
|
-
def parse_maybe_scaled(str)
|
231
|
-
match, amount, scale = */^([0-9.,]+)[[:space:]]*(#{SCALES_REGEXP})?/.match(str)
|
232
|
-
match or
|
233
|
-
fail(ArgumentError, "Unparseable scaled value #{str} for #{self}")
|
234
|
-
|
235
|
-
if scale
|
236
|
-
(amount.gsub(/[,]/, '').to_f * fetch_scale(scale)).to_i
|
237
|
-
else
|
238
|
-
amount.gsub(/[,]/, '').to_i
|
239
|
-
end
|
240
|
-
end
|
241
|
-
|
242
|
-
def fetch_scale(str)
|
243
|
-
_, res = SCALES.detect{|key, val| str.start_with?(key)}
|
244
|
-
|
245
|
-
res or fail("Scale not found: #{str} for #{self}")
|
246
|
-
end
|
247
|
-
|
248
|
-
def to_simple_type(val)
|
249
|
-
case val
|
250
|
-
when nil, Numeric, String, Symbol
|
251
|
-
val
|
252
|
-
when Array
|
253
|
-
val.map{|v| to_simple_type(v)}
|
254
|
-
when Hash
|
255
|
-
val.map{|k, v| [to_simple_type(k), to_simple_type(v)]}.to_h
|
256
|
-
when Infoboxer::Tree::Wikilink
|
257
|
-
val.link
|
258
|
-
when Infoboxer::Tree::Node
|
259
|
-
val.text_
|
260
|
-
when Reality::Measure
|
261
|
-
val.amount
|
262
|
-
else
|
263
|
-
fail ArgumentError, "Non-coercible value #{val.class}"
|
264
|
-
end
|
265
|
-
end
|
266
|
-
end
|
267
|
-
|
268
|
-
def Reality.country(name)
|
269
|
-
page = wp.get(name) or return nil
|
270
|
-
# FIXME: not very reliable, as some fictional countries, aliances
|
271
|
-
# and country groups also have this infobox. Or maybe it is acceptable?..
|
272
|
-
page.templates(name: 'Infobox country').empty? ? nil : Country.new(page)
|
273
|
-
end
|
274
|
-
|
275
|
-
def Reality.countries(*names)
|
276
|
-
names = Country.by_continents.keys.sort if names.empty?
|
277
|
-
Country::List.new(*names)
|
278
|
-
end
|
279
|
-
|
280
|
-
def Reality.wp
|
281
|
-
@wp ||= Infoboxer.wp # while Infoboxer recreates wp for each request
|
282
|
-
end
|
283
|
-
end
|