reality 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.dokaz +1 -0
- data/LICENSE.txt +22 -0
- data/README.md +538 -66
- data/bin/reality +9 -0
- data/config/demo.yml +3 -0
- data/data/wikidata-predicates.json +1 -0
- data/data/wikidata-predicates.yaml +2089 -0
- data/lib/reality.rb +26 -7
- data/lib/reality/config.rb +46 -0
- data/lib/reality/definitions/dictionaries.rb +67 -0
- data/lib/reality/definitions/helpers.rb +34 -0
- data/lib/reality/definitions/wikidata.rb +105 -0
- data/lib/reality/definitions/wikipedia_character.rb +17 -0
- data/lib/reality/definitions/wikipedia_city.rb +19 -0
- data/lib/reality/definitions/wikipedia_continent.rb +21 -0
- data/lib/reality/definitions/wikipedia_country.rb +23 -0
- data/lib/reality/definitions/wikipedia_musical_artist.rb +15 -0
- data/lib/reality/definitions/wikipedia_person.rb +17 -0
- data/lib/reality/entity.rb +152 -0
- data/lib/reality/entity/coercion.rb +76 -0
- data/lib/reality/entity/wikidata_predicates.rb +31 -0
- data/lib/reality/entity/wikipedia_type.rb +73 -0
- data/lib/reality/extras/geonames.rb +29 -0
- data/lib/reality/extras/open_weather_map.rb +63 -0
- data/lib/reality/geo.rb +122 -0
- data/lib/reality/infoboxer_templates.rb +8 -0
- data/lib/reality/list.rb +95 -0
- data/lib/reality/measure.rb +18 -12
- data/lib/reality/measure/unit.rb +5 -1
- data/lib/reality/methods.rb +16 -0
- data/lib/reality/pretty_inspect.rb +11 -0
- data/lib/reality/refinements.rb +26 -0
- data/lib/reality/shortcuts.rb +11 -0
- data/lib/reality/tz_offset.rb +64 -0
- data/lib/reality/util/formatters.rb +35 -0
- data/lib/reality/util/parsers.rb +53 -0
- data/lib/reality/version.rb +6 -0
- data/lib/reality/wikidata.rb +310 -0
- data/reality.gemspec +12 -3
- data/script/extract_wikidata_properties.rb +23 -0
- data/script/lib/nokogiri_more.rb +175 -0
- metadata +137 -7
- data/examples/all_countries.rb +0 -16
- data/lib/reality/country.rb +0 -283
data/reality.gemspec
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
+
require_relative 'lib/reality/version'
|
2
|
+
|
1
3
|
Gem::Specification.new do |s|
|
2
4
|
s.name = 'reality'
|
3
|
-
s.version =
|
4
|
-
s.authors = ['Victor Shepelev']
|
5
|
+
s.version = Reality::VERSION
|
6
|
+
s.authors = ['Victor Shepelev and contributors']
|
5
7
|
s.email = 'zverok.offline@gmail.com'
|
6
8
|
s.homepage = 'https://github.com/molybdenum-99/reality'
|
7
9
|
|
@@ -28,7 +30,14 @@ Gem::Specification.new do |s|
|
|
28
30
|
s.bindir = 'bin'
|
29
31
|
s.executables << 'reality'
|
30
32
|
|
31
|
-
s.add_dependency 'infoboxer', '>= 0.2.
|
33
|
+
s.add_dependency 'infoboxer', '>= 0.2.3'
|
34
|
+
s.add_dependency 'hashie'
|
35
|
+
s.add_dependency 'open-weather'
|
36
|
+
s.add_dependency 'timezone'
|
37
|
+
s.add_dependency 'geokit'
|
38
|
+
s.add_dependency 'tzinfo'
|
39
|
+
s.add_dependency 'ruby-sun-times'
|
40
|
+
s.add_dependency 'time_boots'
|
32
41
|
|
33
42
|
s.add_development_dependency 'rspec', '~> 3'
|
34
43
|
s.add_development_dependency 'rspec-its', '~> 1'
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'bundler/setup'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'progress_bar/core_ext/enumerable_with_progress'
|
5
|
+
require 'nokogiri'
|
6
|
+
require 'json'
|
7
|
+
require_relative 'lib/nokogiri_more'
|
8
|
+
|
9
|
+
start = Nokogiri::HTML(open('https://www.wikidata.org/wiki/Wikidata:List_of_properties'))
|
10
|
+
res = start.
|
11
|
+
search('th:contains("By number")').first.parent.search('td > a').
|
12
|
+
with_progress.map{|a|
|
13
|
+
name = a.text
|
14
|
+
data = Nokogiri::HTML(open('https://www.wikidata.org' + a.href).read).
|
15
|
+
search('tr').
|
16
|
+
map{|tr| tr.search('td').map(&:text)}.
|
17
|
+
map{|tds| [tds[0], tds[1]]}.
|
18
|
+
map(&:reverse).
|
19
|
+
reject{|id, name| id.nil? || name.nil?}.
|
20
|
+
to_h
|
21
|
+
}.inject(&:merge)
|
22
|
+
|
23
|
+
File.write 'data/wikidata-predicates.json', res.to_json
|
@@ -0,0 +1,175 @@
|
|
1
|
+
require 'naught'
|
2
|
+
require 'addressable/uri'
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
module More
|
6
|
+
module NodeOnlyFor
|
7
|
+
def only_for!(selector)
|
8
|
+
matches?(selector) or fail(ArgumentError, "Doesn't works for nodes other than '#{selector}'")
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
module NodeHref
|
13
|
+
include NodeOnlyFor
|
14
|
+
|
15
|
+
def href
|
16
|
+
#only_for!('a[href]')
|
17
|
+
document.absolute(self['href'])
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
module DocumentURI
|
22
|
+
def uri
|
23
|
+
url ? Addressable::URI.parse(url) : nil
|
24
|
+
end
|
25
|
+
|
26
|
+
def absolute(link)
|
27
|
+
if uri
|
28
|
+
(uri + link).to_s
|
29
|
+
else
|
30
|
+
Addressable::URI.parse(link).to_s # double check it's really a link
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
module NodeChildrenGroups
|
36
|
+
def children_groups(*selectors)
|
37
|
+
groups = []
|
38
|
+
flat = children.select{|node| selectors.any?{|s| node.matches?(s)}}
|
39
|
+
while !flat.empty?
|
40
|
+
groups << make_group(flat, selectors)
|
41
|
+
end
|
42
|
+
groups
|
43
|
+
end
|
44
|
+
|
45
|
+
include NodeOnlyFor
|
46
|
+
|
47
|
+
def each_term
|
48
|
+
only_for!('dl')
|
49
|
+
children_groups('dt', 'dd')
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
def make_group(flat, selectors)
|
55
|
+
sel = selectors.dup
|
56
|
+
group = [[]]
|
57
|
+
while !flat.empty?
|
58
|
+
if flat.first.matches?(sel.first)
|
59
|
+
group.last << flat.shift
|
60
|
+
elsif sel.size > 1 && flat.first.matches?(sel[1])
|
61
|
+
sel.shift
|
62
|
+
group << []
|
63
|
+
group.last << flat.shift
|
64
|
+
else
|
65
|
+
break
|
66
|
+
end
|
67
|
+
end
|
68
|
+
group
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
module NodeText
|
73
|
+
def text_
|
74
|
+
text.strip
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
NodeNaught = Naught.build do |config|
|
79
|
+
config.black_hole
|
80
|
+
|
81
|
+
def tap # so you can just at?(selector).tap{|node| - and never be here, if it's not found
|
82
|
+
self
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
class NodeNotFound < RuntimeError
|
87
|
+
end
|
88
|
+
|
89
|
+
module NodeBangMethods
|
90
|
+
def at!(selector)
|
91
|
+
bang!(at(selector), selector)
|
92
|
+
end
|
93
|
+
|
94
|
+
def at_css!(selector)
|
95
|
+
bang!(at_css(selector), selector)
|
96
|
+
end
|
97
|
+
|
98
|
+
def at_xpath!(selector)
|
99
|
+
bang!(at_xpath(selector), selector)
|
100
|
+
end
|
101
|
+
|
102
|
+
def find_child!(selector)
|
103
|
+
bang!(find_child(selector), selector)
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
|
108
|
+
def bang!(node, selector)
|
109
|
+
if node
|
110
|
+
node
|
111
|
+
else
|
112
|
+
no_node!(selector)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def no_node!(selector)
|
117
|
+
#case Nokogiri::More::Config.bang_mode
|
118
|
+
#when :fail
|
119
|
+
fail NodeNotFound, "#{name} have no node at #{selector}"
|
120
|
+
#when :naught
|
121
|
+
#NodeNaught.new
|
122
|
+
#when :log
|
123
|
+
#NodeNaught.new
|
124
|
+
#end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
module NodeQuestMethods
|
129
|
+
def at?(selector)
|
130
|
+
at(selector) || NodeNaught.new
|
131
|
+
end
|
132
|
+
|
133
|
+
def at_css?(selector)
|
134
|
+
at_css(selector) || NodeNaught.new
|
135
|
+
end
|
136
|
+
|
137
|
+
def at_xpath?(selector)
|
138
|
+
at_xpath(selector) || NodeNaught.new
|
139
|
+
end
|
140
|
+
|
141
|
+
def find_child?(selector)
|
142
|
+
find_child(selector) || NodeNaught.new
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
module NodeFindChildren
|
147
|
+
def find_child(selector)
|
148
|
+
children.filter(selector).first
|
149
|
+
end
|
150
|
+
|
151
|
+
def find_children(selector)
|
152
|
+
children.filter(selector)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
# now let's do evil
|
158
|
+
class ::Class
|
159
|
+
public :include
|
160
|
+
end
|
161
|
+
|
162
|
+
Nokogiri::XML::Document.include More::DocumentURI
|
163
|
+
Nokogiri::XML::Node.include More::NodeText
|
164
|
+
Nokogiri::XML::Node.include More::NodeHref
|
165
|
+
Nokogiri::XML::Node.include More::NodeChildrenGroups
|
166
|
+
|
167
|
+
Nokogiri::XML::Node.include More::NodeBangMethods
|
168
|
+
Nokogiri::XML::Node.include More::NodeQuestMethods
|
169
|
+
Nokogiri::XML::Node.include More::NodeFindChildren
|
170
|
+
|
171
|
+
Nokogiri::XML::NodeSet.include More::NodeBangMethods
|
172
|
+
Nokogiri::XML::NodeSet.include More::NodeQuestMethods
|
173
|
+
Nokogiri::XML::NodeSet.include More::NodeFindChildren
|
174
|
+
end
|
175
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: reality
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
- Victor Shepelev
|
7
|
+
- Victor Shepelev and contributors
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-03-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: infoboxer
|
@@ -16,14 +16,112 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.2.
|
19
|
+
version: 0.2.3
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.2.
|
26
|
+
version: 0.2.3
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: hashie
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: open-weather
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: timezone
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: geokit
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: tzinfo
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: ruby-sun-times
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: time_boots
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
27
125
|
- !ruby/object:Gem::Dependency
|
28
126
|
name: rspec
|
29
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -90,26 +188,58 @@ executables:
|
|
90
188
|
extensions: []
|
91
189
|
extra_rdoc_files: []
|
92
190
|
files:
|
191
|
+
- ".dokaz"
|
192
|
+
- LICENSE.txt
|
93
193
|
- README.md
|
94
194
|
- bin/reality
|
195
|
+
- config/demo.yml
|
95
196
|
- data/countries.yaml
|
96
197
|
- data/country_lists.txt
|
97
198
|
- data/country_orgs.yaml
|
98
199
|
- data/infoboxes.txt
|
99
200
|
- data/infoboxes_freq.txt
|
100
201
|
- data/infoboxes_freq_sorted.txt
|
101
|
-
-
|
202
|
+
- data/wikidata-predicates.json
|
203
|
+
- data/wikidata-predicates.yaml
|
102
204
|
- lib/reality.rb
|
103
|
-
- lib/reality/
|
205
|
+
- lib/reality/config.rb
|
206
|
+
- lib/reality/definitions/dictionaries.rb
|
207
|
+
- lib/reality/definitions/helpers.rb
|
208
|
+
- lib/reality/definitions/wikidata.rb
|
209
|
+
- lib/reality/definitions/wikipedia_character.rb
|
210
|
+
- lib/reality/definitions/wikipedia_city.rb
|
211
|
+
- lib/reality/definitions/wikipedia_continent.rb
|
212
|
+
- lib/reality/definitions/wikipedia_country.rb
|
213
|
+
- lib/reality/definitions/wikipedia_musical_artist.rb
|
214
|
+
- lib/reality/definitions/wikipedia_person.rb
|
215
|
+
- lib/reality/entity.rb
|
216
|
+
- lib/reality/entity/coercion.rb
|
217
|
+
- lib/reality/entity/wikidata_predicates.rb
|
218
|
+
- lib/reality/entity/wikipedia_type.rb
|
219
|
+
- lib/reality/extras/geonames.rb
|
220
|
+
- lib/reality/extras/open_weather_map.rb
|
221
|
+
- lib/reality/geo.rb
|
104
222
|
- lib/reality/infoboxer_templates.rb
|
223
|
+
- lib/reality/list.rb
|
105
224
|
- lib/reality/measure.rb
|
106
225
|
- lib/reality/measure/unit.rb
|
226
|
+
- lib/reality/methods.rb
|
227
|
+
- lib/reality/pretty_inspect.rb
|
228
|
+
- lib/reality/refinements.rb
|
229
|
+
- lib/reality/shortcuts.rb
|
230
|
+
- lib/reality/tz_offset.rb
|
231
|
+
- lib/reality/util/formatters.rb
|
232
|
+
- lib/reality/util/parsers.rb
|
233
|
+
- lib/reality/version.rb
|
234
|
+
- lib/reality/wikidata.rb
|
107
235
|
- reality.gemspec
|
108
236
|
- script/extract_all_infoboxes.rb
|
109
237
|
- script/extract_countries.rb
|
110
238
|
- script/extract_country_categories.rb
|
111
239
|
- script/extract_infobox_frequency.rb
|
240
|
+
- script/extract_wikidata_properties.rb
|
112
241
|
- script/lib/faraday_naive_cache.rb
|
242
|
+
- script/lib/nokogiri_more.rb
|
113
243
|
- script/out/categories.txt
|
114
244
|
homepage: https://github.com/molybdenum-99/reality
|
115
245
|
licenses:
|
data/examples/all_countries.rb
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
require 'rubygems'
|
3
|
-
require 'bundler/setup'
|
4
|
-
$:.unshift 'lib'
|
5
|
-
require 'reality'
|
6
|
-
require 'fileutils'
|
7
|
-
require 'yaml'
|
8
|
-
|
9
|
-
FileUtils.mkdir_p 'examples/output'
|
10
|
-
|
11
|
-
start = Time.now
|
12
|
-
|
13
|
-
File.write 'examples/output/countries.yaml',
|
14
|
-
Reality.countries.to_a.sort_by(&:name).map(&:to_h).to_yaml
|
15
|
-
|
16
|
-
puts "Finished in %i seconds" % (Time.now - start)
|
data/lib/reality/country.rb
DELETED
@@ -1,283 +0,0 @@
|
|
1
|
-
# NB: all of this is early drafts, so may look naive and sub-optimal.
|
2
|
-
# Just stay tuned!
|
3
|
-
|
4
|
-
module Reality
|
5
|
-
class Country
|
6
|
-
class List
|
7
|
-
def initialize(*names)
|
8
|
-
@names = names
|
9
|
-
end
|
10
|
-
|
11
|
-
def count
|
12
|
-
@names.count
|
13
|
-
end
|
14
|
-
|
15
|
-
def first(n = nil)
|
16
|
-
res = get(*@names.first(n || 1))
|
17
|
-
n ? res : res.first
|
18
|
-
end
|
19
|
-
|
20
|
-
def last(n = nil)
|
21
|
-
res = get(*@names.last(n || 1))
|
22
|
-
n ? res : res.first
|
23
|
-
end
|
24
|
-
|
25
|
-
def sample(n = nil)
|
26
|
-
res = get(*@names.sample(n || 1))
|
27
|
-
n ? res : res.first
|
28
|
-
end
|
29
|
-
|
30
|
-
def each(&block)
|
31
|
-
@pages = get(*@names)
|
32
|
-
@pages.each(&block)
|
33
|
-
end
|
34
|
-
|
35
|
-
include Enumerable
|
36
|
-
|
37
|
-
def to_a
|
38
|
-
get(*@names)
|
39
|
-
end
|
40
|
-
|
41
|
-
def where(**filters)
|
42
|
-
names = @names & Reality::Country.
|
43
|
-
by_continents.
|
44
|
-
select{|k, v| v == filters[:continent]}.
|
45
|
-
map(&:first)
|
46
|
-
|
47
|
-
self.class.new(*names)
|
48
|
-
end
|
49
|
-
|
50
|
-
private
|
51
|
-
|
52
|
-
def get(*names)
|
53
|
-
Reality.wp.get(*names).map{|page| Country.new(page)}
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
def initialize(page)
|
58
|
-
@page = page
|
59
|
-
end
|
60
|
-
|
61
|
-
def name
|
62
|
-
page.title
|
63
|
-
end
|
64
|
-
|
65
|
-
def long_name
|
66
|
-
infobox.fetch('conventional_long_name').text.strip
|
67
|
-
end
|
68
|
-
|
69
|
-
def capital
|
70
|
-
infobox.fetch('capital').lookup(:Wikilink).first
|
71
|
-
end
|
72
|
-
|
73
|
-
def languages
|
74
|
-
[
|
75
|
-
['Official', infobox_links('official_languages')],
|
76
|
-
[infobox.fetch('languages_type').text.sub(/ languages?$/, ''), infobox_links('languages')]
|
77
|
-
].reject{|k, v| k.empty? || v.empty?}.to_h
|
78
|
-
end
|
79
|
-
|
80
|
-
def tld
|
81
|
-
tlds.first
|
82
|
-
end
|
83
|
-
|
84
|
-
def tlds
|
85
|
-
infobox_links('cctld').map(&:link)
|
86
|
-
end
|
87
|
-
|
88
|
-
def calling_code
|
89
|
-
infobox.fetch('calling_code').text.strip
|
90
|
-
end
|
91
|
-
|
92
|
-
def utc_offset
|
93
|
-
infobox.fetch('utc_offset').text.sub('−', '-').to_i
|
94
|
-
end
|
95
|
-
|
96
|
-
def currency
|
97
|
-
currencies.first
|
98
|
-
end
|
99
|
-
|
100
|
-
def currencies
|
101
|
-
infobox_links('currency').reject{|l| l.link == 'ISO 4217'}
|
102
|
-
end
|
103
|
-
|
104
|
-
def area
|
105
|
-
Reality::Measure(infobox.fetch('area_km2').text.gsub(',', '').to_i, 'km²')
|
106
|
-
end
|
107
|
-
|
108
|
-
def population
|
109
|
-
val = %w[population_estimate population_census].map{|var|
|
110
|
-
infobox.fetch(var).text.strip
|
111
|
-
}.reject(&:empty?).first
|
112
|
-
val && Reality::Measure(parse_maybe_scaled(val), 'person')
|
113
|
-
end
|
114
|
-
|
115
|
-
def gdp_ppp
|
116
|
-
val = infobox.fetch('GDP_PPP').text.strip.sub(/^((Int|US)?\$|USD)/, '')
|
117
|
-
val.empty? ? nil : Reality::Measure(parse_scaled(val), '$')
|
118
|
-
end
|
119
|
-
|
120
|
-
def gdp_nominal
|
121
|
-
val = infobox.fetch('GDP_nominal').text.strip.sub(/^((Int|US)?\$|USD)/, '')
|
122
|
-
val.empty? ? nil : Reality::Measure(parse_scaled(val), '$')
|
123
|
-
end
|
124
|
-
|
125
|
-
alias_method :gdp, :gdp_nominal
|
126
|
-
|
127
|
-
def leaders
|
128
|
-
titles = infobox.fetch(/^leader_title\d/).map(&:text_)
|
129
|
-
names = infobox.fetch(/^leader_name\d/).map{|v| v.lookup(:Wikilink).first}
|
130
|
-
titles.zip(names).to_h
|
131
|
-
end
|
132
|
-
|
133
|
-
def continent
|
134
|
-
self.class.by_continents[page.title]
|
135
|
-
end
|
136
|
-
|
137
|
-
def organizations
|
138
|
-
organizations_list.map{|o| o[:name]}
|
139
|
-
end
|
140
|
-
|
141
|
-
def member_of?(org)
|
142
|
-
organizations_list.any?{|o| o[:name] == org || o[:abbr] == org}
|
143
|
-
end
|
144
|
-
|
145
|
-
def to_s
|
146
|
-
name
|
147
|
-
end
|
148
|
-
|
149
|
-
def inspect
|
150
|
-
"#<#{self.class}(#{name})>"
|
151
|
-
end
|
152
|
-
|
153
|
-
PROPERTIES = %i[
|
154
|
-
continent name long_name
|
155
|
-
tld tlds calling_code utc_offset
|
156
|
-
capital languages currency
|
157
|
-
leaders area population
|
158
|
-
gdp_ppp gdp_nominal
|
159
|
-
]
|
160
|
-
|
161
|
-
def to_h
|
162
|
-
#p self
|
163
|
-
PROPERTIES.
|
164
|
-
map{|prop| [prop, to_simple_type(send(prop))] }.
|
165
|
-
#reject{|prop, val| !val || val.respond_to?(:empty?) && val.empty?}.
|
166
|
-
to_h
|
167
|
-
end
|
168
|
-
|
169
|
-
class << self
|
170
|
-
def by_continents
|
171
|
-
@by_continents ||= Reality.wp.
|
172
|
-
get('List of countries by continent').
|
173
|
-
sections.first.
|
174
|
-
sections.map{|s|
|
175
|
-
continent = s.heading.text_
|
176
|
-
s.tables.first.
|
177
|
-
lookup(:Wikilink, :bold?).map(&:link).
|
178
|
-
map{|country| [country, continent]}
|
179
|
-
}.flatten(1).
|
180
|
-
to_h
|
181
|
-
end
|
182
|
-
|
183
|
-
def organizations
|
184
|
-
@organizations ||= YAML.load(File.read(File.expand_path('../../../data/country_orgs.yaml', __FILE__)))
|
185
|
-
end
|
186
|
-
end
|
187
|
-
|
188
|
-
private
|
189
|
-
|
190
|
-
attr_reader :page
|
191
|
-
|
192
|
-
def infobox
|
193
|
-
page.infobox
|
194
|
-
end
|
195
|
-
|
196
|
-
def organizations_list
|
197
|
-
catnames = page.categories.map(&:name)
|
198
|
-
self.class.organizations.select{|o| catnames.include?(o[:category])}
|
199
|
-
end
|
200
|
-
|
201
|
-
def infobox_links(varname)
|
202
|
-
src = infobox.fetch(varname)
|
203
|
-
if tmpl = src.lookup(:Template, name: /list$/).first
|
204
|
-
# values could be both inside and outside list, see India's cctld value
|
205
|
-
src = Infoboxer::Tree::Nodes[src, tmpl.variables]
|
206
|
-
end
|
207
|
-
src.lookup(:Wikilink).uniq
|
208
|
-
end
|
209
|
-
|
210
|
-
# See "Short scale": https://en.wikipedia.org/wiki/Long_and_short_scales#Comparison
|
211
|
-
SCALES = {
|
212
|
-
'million' => 1_000_000,
|
213
|
-
'billion' => 1_000_000_000,
|
214
|
-
'trillion' => 1_000_000_000_000,
|
215
|
-
'quadrillion' => 1_000_000_000_000_000,
|
216
|
-
'quintillion' => 1_000_000_000_000_000_000,
|
217
|
-
'sextillion' => 1_000_000_000_000_000_000_000,
|
218
|
-
'septillion' => 1_000_000_000_000_000_000_000_000,
|
219
|
-
}
|
220
|
-
SCALES_REGEXP = Regexp.union(*SCALES.keys)
|
221
|
-
|
222
|
-
def parse_scaled(str)
|
223
|
-
match, amount, scale = */^([0-9.,]+)[[:space:]]*(#{SCALES_REGEXP})/.match(str)
|
224
|
-
match or
|
225
|
-
fail(ArgumentError, "Unparseable scaled value #{str} for #{self}")
|
226
|
-
|
227
|
-
(amount.gsub(/[,]/, '').to_f * fetch_scale(scale)).to_i
|
228
|
-
end
|
229
|
-
|
230
|
-
def parse_maybe_scaled(str)
|
231
|
-
match, amount, scale = */^([0-9.,]+)[[:space:]]*(#{SCALES_REGEXP})?/.match(str)
|
232
|
-
match or
|
233
|
-
fail(ArgumentError, "Unparseable scaled value #{str} for #{self}")
|
234
|
-
|
235
|
-
if scale
|
236
|
-
(amount.gsub(/[,]/, '').to_f * fetch_scale(scale)).to_i
|
237
|
-
else
|
238
|
-
amount.gsub(/[,]/, '').to_i
|
239
|
-
end
|
240
|
-
end
|
241
|
-
|
242
|
-
def fetch_scale(str)
|
243
|
-
_, res = SCALES.detect{|key, val| str.start_with?(key)}
|
244
|
-
|
245
|
-
res or fail("Scale not found: #{str} for #{self}")
|
246
|
-
end
|
247
|
-
|
248
|
-
def to_simple_type(val)
|
249
|
-
case val
|
250
|
-
when nil, Numeric, String, Symbol
|
251
|
-
val
|
252
|
-
when Array
|
253
|
-
val.map{|v| to_simple_type(v)}
|
254
|
-
when Hash
|
255
|
-
val.map{|k, v| [to_simple_type(k), to_simple_type(v)]}.to_h
|
256
|
-
when Infoboxer::Tree::Wikilink
|
257
|
-
val.link
|
258
|
-
when Infoboxer::Tree::Node
|
259
|
-
val.text_
|
260
|
-
when Reality::Measure
|
261
|
-
val.amount
|
262
|
-
else
|
263
|
-
fail ArgumentError, "Non-coercible value #{val.class}"
|
264
|
-
end
|
265
|
-
end
|
266
|
-
end
|
267
|
-
|
268
|
-
def Reality.country(name)
|
269
|
-
page = wp.get(name) or return nil
|
270
|
-
# FIXME: not very reliable, as some fictional countries, aliances
|
271
|
-
# and country groups also have this infobox. Or maybe it is acceptable?..
|
272
|
-
page.templates(name: 'Infobox country').empty? ? nil : Country.new(page)
|
273
|
-
end
|
274
|
-
|
275
|
-
def Reality.countries(*names)
|
276
|
-
names = Country.by_continents.keys.sort if names.empty?
|
277
|
-
Country::List.new(*names)
|
278
|
-
end
|
279
|
-
|
280
|
-
def Reality.wp
|
281
|
-
@wp ||= Infoboxer.wp # while Infoboxer recreates wp for each request
|
282
|
-
end
|
283
|
-
end
|