reality 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'bundler/setup'
4
+ $:.unshift 'lib'
5
+ require 'reality'
6
+ require 'fileutils'
7
+ require 'yaml'
8
+
9
+ FileUtils.mkdir_p 'examples/output'
10
+
11
+ start = Time.now
12
+
13
+ File.write 'examples/output/countries.yaml',
14
+ Reality.countries.to_a.sort_by(&:name).map(&:to_h).to_yaml
15
+
16
+ puts "Finished in %i seconds" % (Time.now - start)
@@ -0,0 +1,15 @@
1
+ require 'infoboxer'
2
+
3
+ module Reality
4
+ require_relative 'reality/infoboxer_templates'
5
+
6
+ # basic functionality
7
+ %w[measure].each do |mod|
8
+ require_relative "reality/#{mod}"
9
+ end
10
+
11
+ # entities
12
+ %w[country].each do |mod|
13
+ require_relative "reality/#{mod}"
14
+ end
15
+ end
@@ -0,0 +1,283 @@
1
+ # NB: all of this is early drafts, so may look naive and sub-optimal.
2
+ # Just stay tuned!
3
+
4
+ module Reality
5
+ class Country
6
+ class List
7
+ def initialize(*names)
8
+ @names = names
9
+ end
10
+
11
+ def count
12
+ @names.count
13
+ end
14
+
15
+ def first(n = nil)
16
+ res = get(*@names.first(n || 1))
17
+ n ? res : res.first
18
+ end
19
+
20
+ def last(n = nil)
21
+ res = get(*@names.last(n || 1))
22
+ n ? res : res.first
23
+ end
24
+
25
+ def sample(n = nil)
26
+ res = get(*@names.sample(n || 1))
27
+ n ? res : res.first
28
+ end
29
+
30
+ def each(&block)
31
+ @pages = get(*@names)
32
+ @pages.each(&block)
33
+ end
34
+
35
+ include Enumerable
36
+
37
+ def to_a
38
+ get(*@names)
39
+ end
40
+
41
+ def where(**filters)
42
+ names = @names & Reality::Country.
43
+ by_continents.
44
+ select{|k, v| v == filters[:continent]}.
45
+ map(&:first)
46
+
47
+ self.class.new(*names)
48
+ end
49
+
50
+ private
51
+
52
+ def get(*names)
53
+ Reality.wp.get(*names).map{|page| Country.new(page)}
54
+ end
55
+ end
56
+
57
+ def initialize(page)
58
+ @page = page
59
+ end
60
+
61
+ def name
62
+ page.title
63
+ end
64
+
65
+ def long_name
66
+ infobox.fetch('conventional_long_name').text.strip
67
+ end
68
+
69
+ def capital
70
+ infobox.fetch('capital').lookup(:Wikilink).first
71
+ end
72
+
73
+ def languages
74
+ [
75
+ ['Official', infobox_links('official_languages')],
76
+ [infobox.fetch('languages_type').text.sub(/ languages?$/, ''), infobox_links('languages')]
77
+ ].reject{|k, v| k.empty? || v.empty?}.to_h
78
+ end
79
+
80
+ def tld
81
+ tlds.first
82
+ end
83
+
84
+ def tlds
85
+ infobox_links('cctld').map(&:link)
86
+ end
87
+
88
+ def calling_code
89
+ infobox.fetch('calling_code').text.strip
90
+ end
91
+
92
+ def utc_offset
93
+ infobox.fetch('utc_offset').text.sub('−', '-').to_i
94
+ end
95
+
96
+ def currency
97
+ currencies.first
98
+ end
99
+
100
+ def currencies
101
+ infobox_links('currency').reject{|l| l.link == 'ISO 4217'}
102
+ end
103
+
104
+ def area
105
+ Reality::Measure(infobox.fetch('area_km2').text.gsub(',', '').to_i, 'km²')
106
+ end
107
+
108
+ def population
109
+ val = %w[population_estimate population_census].map{|var|
110
+ infobox.fetch(var).text.strip
111
+ }.reject(&:empty?).first
112
+ val && Reality::Measure(parse_maybe_scaled(val), 'person')
113
+ end
114
+
115
+ def gdp_ppp
116
+ val = infobox.fetch('GDP_PPP').text.strip.sub(/^((Int|US)?\$|USD)/, '')
117
+ val.empty? ? nil : Reality::Measure(parse_scaled(val), '$')
118
+ end
119
+
120
+ def gdp_nominal
121
+ val = infobox.fetch('GDP_nominal').text.strip.sub(/^((Int|US)?\$|USD)/, '')
122
+ val.empty? ? nil : Reality::Measure(parse_scaled(val), '$')
123
+ end
124
+
125
+ alias_method :gdp, :gdp_nominal
126
+
127
+ def leaders
128
+ titles = infobox.fetch(/^leader_title\d/).map(&:text_)
129
+ names = infobox.fetch(/^leader_name\d/).map{|v| v.lookup(:Wikilink).first}
130
+ titles.zip(names).to_h
131
+ end
132
+
133
+ def continent
134
+ self.class.by_continents[page.title]
135
+ end
136
+
137
+ def organizations
138
+ organizations_list.map{|o| o[:name]}
139
+ end
140
+
141
+ def member_of?(org)
142
+ organizations_list.any?{|o| o[:name] == org || o[:abbr] == org}
143
+ end
144
+
145
+ def to_s
146
+ name
147
+ end
148
+
149
+ def inspect
150
+ "#<#{self.class}(#{name})>"
151
+ end
152
+
153
+ PROPERTIES = %i[
154
+ continent name long_name
155
+ tld tlds calling_code utc_offset
156
+ capital languages currency
157
+ leaders area population
158
+ gdp_ppp gdp_nominal
159
+ ]
160
+
161
+ def to_h
162
+ #p self
163
+ PROPERTIES.
164
+ map{|prop| [prop, to_simple_type(send(prop))] }.
165
+ #reject{|prop, val| !val || val.respond_to?(:empty?) && val.empty?}.
166
+ to_h
167
+ end
168
+
169
+ class << self
170
+ def by_continents
171
+ @by_continents ||= Reality.wp.
172
+ get('List of countries by continent').
173
+ sections.first.
174
+ sections.map{|s|
175
+ continent = s.heading.text_
176
+ s.tables.first.
177
+ lookup(:Wikilink, :bold?).map(&:link).
178
+ map{|country| [country, continent]}
179
+ }.flatten(1).
180
+ to_h
181
+ end
182
+
183
+ def organizations
184
+ @organizations ||= YAML.load(File.read(File.expand_path('../../../data/country_orgs.yaml', __FILE__)))
185
+ end
186
+ end
187
+
188
+ private
189
+
190
+ attr_reader :page
191
+
192
+ def infobox
193
+ page.infobox
194
+ end
195
+
196
+ def organizations_list
197
+ catnames = page.categories.map(&:name)
198
+ self.class.organizations.select{|o| catnames.include?(o[:category])}
199
+ end
200
+
201
+ def infobox_links(varname)
202
+ src = infobox.fetch(varname)
203
+ if tmpl = src.lookup(:Template, name: /list$/).first
204
+ # values could be both inside and outside list, see India's cctld value
205
+ src = Infoboxer::Tree::Nodes[src, tmpl.variables]
206
+ end
207
+ src.lookup(:Wikilink).uniq
208
+ end
209
+
210
+ # See "Short scale": https://en.wikipedia.org/wiki/Long_and_short_scales#Comparison
211
+ SCALES = {
212
+ 'million' => 1_000_000,
213
+ 'billion' => 1_000_000_000,
214
+ 'trillion' => 1_000_000_000_000,
215
+ 'quadrillion' => 1_000_000_000_000_000,
216
+ 'quintillion' => 1_000_000_000_000_000_000,
217
+ 'sextillion' => 1_000_000_000_000_000_000_000,
218
+ 'septillion' => 1_000_000_000_000_000_000_000_000,
219
+ }
220
+ SCALES_REGEXP = Regexp.union(*SCALES.keys)
221
+
222
+ def parse_scaled(str)
223
+ match, amount, scale = */^([0-9.,]+)[[:space:]]*(#{SCALES_REGEXP})/.match(str)
224
+ match or
225
+ fail(ArgumentError, "Unparseable scaled value #{str} for #{self}")
226
+
227
+ (amount.gsub(/[,]/, '').to_f * fetch_scale(scale)).to_i
228
+ end
229
+
230
+ def parse_maybe_scaled(str)
231
+ match, amount, scale = */^([0-9.,]+)[[:space:]]*(#{SCALES_REGEXP})?/.match(str)
232
+ match or
233
+ fail(ArgumentError, "Unparseable scaled value #{str} for #{self}")
234
+
235
+ if scale
236
+ (amount.gsub(/[,]/, '').to_f * fetch_scale(scale)).to_i
237
+ else
238
+ amount.gsub(/[,]/, '').to_i
239
+ end
240
+ end
241
+
242
+ def fetch_scale(str)
243
+ _, res = SCALES.detect{|key, val| str.start_with?(key)}
244
+
245
+ res or fail("Scale not found: #{str} for #{self}")
246
+ end
247
+
248
+ def to_simple_type(val)
249
+ case val
250
+ when nil, Numeric, String, Symbol
251
+ val
252
+ when Array
253
+ val.map{|v| to_simple_type(v)}
254
+ when Hash
255
+ val.map{|k, v| [to_simple_type(k), to_simple_type(v)]}.to_h
256
+ when Infoboxer::Tree::Wikilink
257
+ val.link
258
+ when Infoboxer::Tree::Node
259
+ val.text_
260
+ when Reality::Measure
261
+ val.amount
262
+ else
263
+ fail ArgumentError, "Non-coercible value #{val.class}"
264
+ end
265
+ end
266
+ end
267
+
268
+ def Reality.country(name)
269
+ page = wp.get(name) or return nil
270
+ # FIXME: not very reliable, as some fictional countries, aliances
271
+ # and country groups also have this infobox. Or maybe it is acceptable?..
272
+ page.templates(name: 'Infobox country').empty? ? nil : Country.new(page)
273
+ end
274
+
275
+ def Reality.countries(*names)
276
+ names = Country.by_continents.keys.sort if names.empty?
277
+ Country::List.new(*names)
278
+ end
279
+
280
+ def Reality.wp
281
+ @wp ||= Infoboxer.wp # while Infoboxer recreates wp for each request
282
+ end
283
+ end
@@ -0,0 +1,11 @@
1
+ Infoboxer::MediaWiki::Traits.for('en.wikipedia.org') do
2
+ templates do
3
+ template 'lang' do
4
+ def children
5
+ fetch('2')
6
+ end
7
+ end
8
+
9
+ show 'US$' # TODO: in fact, has second option (year)
10
+ end
11
+ end
@@ -0,0 +1,92 @@
1
+ module Reality
2
+ class Measure
3
+ %w[unit].each{|mod| require_relative "measure/#{mod}"}
4
+
5
+ attr_reader :amount, :unit
6
+
7
+ def initialize(amount, unit)
8
+ @amount, @unit = Rational(amount), Unit.parse(unit)
9
+ end
10
+
11
+ def <=>(other)
12
+ check_compatibility!(other)
13
+
14
+ amount <=> other.amount
15
+ end
16
+
17
+ def -@
18
+ self.class.new(-amount, unit)
19
+ end
20
+
21
+ def +(other)
22
+ check_compatibility!(other)
23
+
24
+ self.class.new(amount + other.amount, unit)
25
+ end
26
+
27
+ def -(other)
28
+ self + (-other)
29
+ end
30
+
31
+ def *(other)
32
+ case other
33
+ when Numeric
34
+ self.class.new(amount * other, unit)
35
+ when self.class
36
+ self.class.new(amount * other.amount, unit * other.unit)
37
+ else
38
+ fail ArgumentError, "Can't multiply by #{other.class}"
39
+ end
40
+ end
41
+
42
+ def /(other)
43
+ case other
44
+ when Numeric
45
+ self.class.new(amount / other, unit)
46
+ when self.class
47
+ un = unit / other.unit
48
+ un.scalar? ?
49
+ amount / other.amount :
50
+ self.class.new(amount / other.amount, un)
51
+ else
52
+ fail ArgumentError, "Can't divide by #{other.class}"
53
+ end
54
+ end
55
+
56
+ def **(num)
57
+ (num-1).times.inject(self){|res| res*self}
58
+ end
59
+
60
+ include Comparable
61
+
62
+ def to_s
63
+ '%s%s' % [formatted_amount, unit]
64
+ end
65
+
66
+ def inspect
67
+ "#<%s(%s %s)>" % [self.class, formatted_amount, unit]
68
+ end
69
+
70
+ private
71
+
72
+ def formatted_amount
73
+ # FIXME: really naive
74
+ if amount.abs < 1
75
+ amount.to_f.to_s
76
+ else
77
+ # see http://stackoverflow.com/a/6460145/3683228
78
+ amount.to_i.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
79
+ end
80
+ end
81
+
82
+ def check_compatibility!(other)
83
+ unless other.kind_of?(self.class) && other.unit == unit
84
+ fail ArgumentError, "#{self} incompatible with #{other}"
85
+ end
86
+ end
87
+ end
88
+
89
+ def Reality.Measure(*arg)
90
+ Measure.new(*arg)
91
+ end
92
+ end
@@ -0,0 +1,120 @@
1
+ require 'strscan'
2
+
3
+ module Reality
4
+ class Measure
5
+ class Unit
6
+ @unicode = true
7
+
8
+ class << self
9
+ attr_accessor :unicode
10
+
11
+ UNIT_REGEX = /[a-zA-Z\$]+/ # FIXME: there are many non-ASCII units, especially in money
12
+ POWER_REGEX = /[²³]|\^(\d+)/
13
+ OP_REGEX = /[\/*·]/
14
+
15
+ def parse(str)
16
+ return str if str.kind_of?(Unit)
17
+
18
+ scanner = StringScanner.new(str)
19
+ denom = false
20
+ units = []
21
+
22
+ loop do
23
+ # (variable [power] operator) ....
24
+ unit = scanner.scan(UNIT_REGEX) or fail("Variable expected at #{scanner.rest}")
25
+ pow = scanner.scan(POWER_REGEX)
26
+ units << [unit, parse_pow(pow, denom)]
27
+ break if scanner.eos?
28
+
29
+ op = scanner.scan(OP_REGEX) or fail("Operator expected at #{scanner.rest}")
30
+ if op == '/'
31
+ denom and fail("Second division at #{scanner.rest}")
32
+ denom = true
33
+ end
34
+ end
35
+ new(*units)
36
+ end
37
+
38
+ def parse_pow(p, denom)
39
+ res = case p
40
+ when nil then 1
41
+ when '²' then 2
42
+ when '³' then 3
43
+ when /^\^(\d+)$/ then $1.to_i
44
+ else fail(ArgumentError, "Can't parse power #{p}")
45
+ end
46
+
47
+ denom ? -res : res
48
+ end
49
+ end
50
+
51
+ attr_reader :components
52
+
53
+ def initialize(*components)
54
+ @components = components.
55
+ group_by{|sig, pow| sig}.
56
+ map{|sig, cmps| [sig, cmps.map(&:last).inject(:+)]}.
57
+ reject{|sig, pow| pow.zero?}
58
+ end
59
+
60
+ def ==(other)
61
+ other.class == self.class && other.components == self.components
62
+ end
63
+
64
+ def scalar?
65
+ components.empty?
66
+ end
67
+
68
+ def -@
69
+ self.class.new(*components.map{|sig, pow| [sig, -pow]})
70
+ end
71
+
72
+ def *(other)
73
+ other.class == self.class or
74
+ fail(TypeError, "Can't multiply #{self.class} by #{other.class}")
75
+
76
+ self.class.new(*components, *other.components)
77
+ end
78
+
79
+ def /(other)
80
+ other.class == self.class or
81
+ fail(TypeError, "Can't divide #{self.class} by #{other.class}")
82
+
83
+ self * -other
84
+ end
85
+
86
+ def to_s
87
+ num, denom = components.partition{|sig, pow| pow > 0}
88
+ numerator = num.map{|sig, pow| "#{sig}#{power(pow)}"}.join(mul)
89
+ denominator = denom.map{|sig, pow| "#{sig}#{power(pow)}"}.join(mul)
90
+ case
91
+ when numerator.empty?
92
+ [1, denominator].join('/')
93
+ when denominator.empty?
94
+ numerator
95
+ else
96
+ [numerator, denominator].join('/')
97
+ end
98
+ end
99
+
100
+ private
101
+
102
+ UNICODE_SUPER = {2 => '²', 3 => '³'}
103
+
104
+ def mul
105
+ self.class.unicode ? '·' : '*'
106
+ end
107
+
108
+ def power(num)
109
+ num = num.abs
110
+ case num
111
+ when 0 then fail(ArgumentError, "0-power unit!")
112
+ when 1 then ''
113
+ when 2..3
114
+ self.class.unicode ? UNICODE_SUPER.fetch(num) : "^#{num}"
115
+ else "^#{num}"
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end