reality 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'bundler/setup'
4
+ $:.unshift 'lib'
5
+ require 'reality'
6
+ require 'fileutils'
7
+ require 'yaml'
8
+
9
+ FileUtils.mkdir_p 'examples/output'
10
+
11
+ start = Time.now
12
+
13
+ File.write 'examples/output/countries.yaml',
14
+ Reality.countries.to_a.sort_by(&:name).map(&:to_h).to_yaml
15
+
16
+ puts "Finished in %i seconds" % (Time.now - start)
@@ -0,0 +1,15 @@
1
+ require 'infoboxer'
2
+
3
+ module Reality
4
+ require_relative 'reality/infoboxer_templates'
5
+
6
+ # basic functionality
7
+ %w[measure].each do |mod|
8
+ require_relative "reality/#{mod}"
9
+ end
10
+
11
+ # entities
12
+ %w[country].each do |mod|
13
+ require_relative "reality/#{mod}"
14
+ end
15
+ end
@@ -0,0 +1,283 @@
1
+ # NB: all of this is early drafts, so may look naive and sub-optimal.
2
+ # Just stay tuned!
3
+
4
+ module Reality
5
+ class Country
6
+ class List
7
+ def initialize(*names)
8
+ @names = names
9
+ end
10
+
11
+ def count
12
+ @names.count
13
+ end
14
+
15
+ def first(n = nil)
16
+ res = get(*@names.first(n || 1))
17
+ n ? res : res.first
18
+ end
19
+
20
+ def last(n = nil)
21
+ res = get(*@names.last(n || 1))
22
+ n ? res : res.first
23
+ end
24
+
25
+ def sample(n = nil)
26
+ res = get(*@names.sample(n || 1))
27
+ n ? res : res.first
28
+ end
29
+
30
+ def each(&block)
31
+ @pages = get(*@names)
32
+ @pages.each(&block)
33
+ end
34
+
35
+ include Enumerable
36
+
37
+ def to_a
38
+ get(*@names)
39
+ end
40
+
41
+ def where(**filters)
42
+ names = @names & Reality::Country.
43
+ by_continents.
44
+ select{|k, v| v == filters[:continent]}.
45
+ map(&:first)
46
+
47
+ self.class.new(*names)
48
+ end
49
+
50
+ private
51
+
52
+ def get(*names)
53
+ Reality.wp.get(*names).map{|page| Country.new(page)}
54
+ end
55
+ end
56
+
57
+ def initialize(page)
58
+ @page = page
59
+ end
60
+
61
+ def name
62
+ page.title
63
+ end
64
+
65
+ def long_name
66
+ infobox.fetch('conventional_long_name').text.strip
67
+ end
68
+
69
+ def capital
70
+ infobox.fetch('capital').lookup(:Wikilink).first
71
+ end
72
+
73
+ def languages
74
+ [
75
+ ['Official', infobox_links('official_languages')],
76
+ [infobox.fetch('languages_type').text.sub(/ languages?$/, ''), infobox_links('languages')]
77
+ ].reject{|k, v| k.empty? || v.empty?}.to_h
78
+ end
79
+
80
+ def tld
81
+ tlds.first
82
+ end
83
+
84
+ def tlds
85
+ infobox_links('cctld').map(&:link)
86
+ end
87
+
88
+ def calling_code
89
+ infobox.fetch('calling_code').text.strip
90
+ end
91
+
92
+ def utc_offset
93
+ infobox.fetch('utc_offset').text.sub('−', '-').to_i
94
+ end
95
+
96
+ def currency
97
+ currencies.first
98
+ end
99
+
100
+ def currencies
101
+ infobox_links('currency').reject{|l| l.link == 'ISO 4217'}
102
+ end
103
+
104
+ def area
105
+ Reality::Measure(infobox.fetch('area_km2').text.gsub(',', '').to_i, 'km²')
106
+ end
107
+
108
+ def population
109
+ val = %w[population_estimate population_census].map{|var|
110
+ infobox.fetch(var).text.strip
111
+ }.reject(&:empty?).first
112
+ val && Reality::Measure(parse_maybe_scaled(val), 'person')
113
+ end
114
+
115
+ def gdp_ppp
116
+ val = infobox.fetch('GDP_PPP').text.strip.sub(/^((Int|US)?\$|USD)/, '')
117
+ val.empty? ? nil : Reality::Measure(parse_scaled(val), '$')
118
+ end
119
+
120
+ def gdp_nominal
121
+ val = infobox.fetch('GDP_nominal').text.strip.sub(/^((Int|US)?\$|USD)/, '')
122
+ val.empty? ? nil : Reality::Measure(parse_scaled(val), '$')
123
+ end
124
+
125
+ alias_method :gdp, :gdp_nominal
126
+
127
+ def leaders
128
+ titles = infobox.fetch(/^leader_title\d/).map(&:text_)
129
+ names = infobox.fetch(/^leader_name\d/).map{|v| v.lookup(:Wikilink).first}
130
+ titles.zip(names).to_h
131
+ end
132
+
133
+ def continent
134
+ self.class.by_continents[page.title]
135
+ end
136
+
137
+ def organizations
138
+ organizations_list.map{|o| o[:name]}
139
+ end
140
+
141
+ def member_of?(org)
142
+ organizations_list.any?{|o| o[:name] == org || o[:abbr] == org}
143
+ end
144
+
145
+ def to_s
146
+ name
147
+ end
148
+
149
+ def inspect
150
+ "#<#{self.class}(#{name})>"
151
+ end
152
+
153
+ PROPERTIES = %i[
154
+ continent name long_name
155
+ tld tlds calling_code utc_offset
156
+ capital languages currency
157
+ leaders area population
158
+ gdp_ppp gdp_nominal
159
+ ]
160
+
161
+ def to_h
162
+ #p self
163
+ PROPERTIES.
164
+ map{|prop| [prop, to_simple_type(send(prop))] }.
165
+ #reject{|prop, val| !val || val.respond_to?(:empty?) && val.empty?}.
166
+ to_h
167
+ end
168
+
169
+ class << self
170
+ def by_continents
171
+ @by_continents ||= Reality.wp.
172
+ get('List of countries by continent').
173
+ sections.first.
174
+ sections.map{|s|
175
+ continent = s.heading.text_
176
+ s.tables.first.
177
+ lookup(:Wikilink, :bold?).map(&:link).
178
+ map{|country| [country, continent]}
179
+ }.flatten(1).
180
+ to_h
181
+ end
182
+
183
+ def organizations
184
+ @organizations ||= YAML.load(File.read(File.expand_path('../../../data/country_orgs.yaml', __FILE__)))
185
+ end
186
+ end
187
+
188
+ private
189
+
190
+ attr_reader :page
191
+
192
+ def infobox
193
+ page.infobox
194
+ end
195
+
196
+ def organizations_list
197
+ catnames = page.categories.map(&:name)
198
+ self.class.organizations.select{|o| catnames.include?(o[:category])}
199
+ end
200
+
201
+ def infobox_links(varname)
202
+ src = infobox.fetch(varname)
203
+ if tmpl = src.lookup(:Template, name: /list$/).first
204
+ # values could be both inside and outside list, see India's cctld value
205
+ src = Infoboxer::Tree::Nodes[src, tmpl.variables]
206
+ end
207
+ src.lookup(:Wikilink).uniq
208
+ end
209
+
210
+ # See "Short scale": https://en.wikipedia.org/wiki/Long_and_short_scales#Comparison
211
+ SCALES = {
212
+ 'million' => 1_000_000,
213
+ 'billion' => 1_000_000_000,
214
+ 'trillion' => 1_000_000_000_000,
215
+ 'quadrillion' => 1_000_000_000_000_000,
216
+ 'quintillion' => 1_000_000_000_000_000_000,
217
+ 'sextillion' => 1_000_000_000_000_000_000_000,
218
+ 'septillion' => 1_000_000_000_000_000_000_000_000,
219
+ }
220
+ SCALES_REGEXP = Regexp.union(*SCALES.keys)
221
+
222
+ def parse_scaled(str)
223
+ match, amount, scale = */^([0-9.,]+)[[:space:]]*(#{SCALES_REGEXP})/.match(str)
224
+ match or
225
+ fail(ArgumentError, "Unparseable scaled value #{str} for #{self}")
226
+
227
+ (amount.gsub(/[,]/, '').to_f * fetch_scale(scale)).to_i
228
+ end
229
+
230
+ def parse_maybe_scaled(str)
231
+ match, amount, scale = */^([0-9.,]+)[[:space:]]*(#{SCALES_REGEXP})?/.match(str)
232
+ match or
233
+ fail(ArgumentError, "Unparseable scaled value #{str} for #{self}")
234
+
235
+ if scale
236
+ (amount.gsub(/[,]/, '').to_f * fetch_scale(scale)).to_i
237
+ else
238
+ amount.gsub(/[,]/, '').to_i
239
+ end
240
+ end
241
+
242
+ def fetch_scale(str)
243
+ _, res = SCALES.detect{|key, val| str.start_with?(key)}
244
+
245
+ res or fail("Scale not found: #{str} for #{self}")
246
+ end
247
+
248
+ def to_simple_type(val)
249
+ case val
250
+ when nil, Numeric, String, Symbol
251
+ val
252
+ when Array
253
+ val.map{|v| to_simple_type(v)}
254
+ when Hash
255
+ val.map{|k, v| [to_simple_type(k), to_simple_type(v)]}.to_h
256
+ when Infoboxer::Tree::Wikilink
257
+ val.link
258
+ when Infoboxer::Tree::Node
259
+ val.text_
260
+ when Reality::Measure
261
+ val.amount
262
+ else
263
+ fail ArgumentError, "Non-coercible value #{val.class}"
264
+ end
265
+ end
266
+ end
267
+
268
+ def Reality.country(name)
269
+ page = wp.get(name) or return nil
270
+ # FIXME: not very reliable, as some fictional countries, aliances
271
+ # and country groups also have this infobox. Or maybe it is acceptable?..
272
+ page.templates(name: 'Infobox country').empty? ? nil : Country.new(page)
273
+ end
274
+
275
+ def Reality.countries(*names)
276
+ names = Country.by_continents.keys.sort if names.empty?
277
+ Country::List.new(*names)
278
+ end
279
+
280
+ def Reality.wp
281
+ @wp ||= Infoboxer.wp # while Infoboxer recreates wp for each request
282
+ end
283
+ end
@@ -0,0 +1,11 @@
1
+ Infoboxer::MediaWiki::Traits.for('en.wikipedia.org') do
2
+ templates do
3
+ template 'lang' do
4
+ def children
5
+ fetch('2')
6
+ end
7
+ end
8
+
9
+ show 'US$' # TODO: in fact, has second option (year)
10
+ end
11
+ end
@@ -0,0 +1,92 @@
1
+ module Reality
2
+ class Measure
3
+ %w[unit].each{|mod| require_relative "measure/#{mod}"}
4
+
5
+ attr_reader :amount, :unit
6
+
7
+ def initialize(amount, unit)
8
+ @amount, @unit = Rational(amount), Unit.parse(unit)
9
+ end
10
+
11
+ def <=>(other)
12
+ check_compatibility!(other)
13
+
14
+ amount <=> other.amount
15
+ end
16
+
17
+ def -@
18
+ self.class.new(-amount, unit)
19
+ end
20
+
21
+ def +(other)
22
+ check_compatibility!(other)
23
+
24
+ self.class.new(amount + other.amount, unit)
25
+ end
26
+
27
+ def -(other)
28
+ self + (-other)
29
+ end
30
+
31
+ def *(other)
32
+ case other
33
+ when Numeric
34
+ self.class.new(amount * other, unit)
35
+ when self.class
36
+ self.class.new(amount * other.amount, unit * other.unit)
37
+ else
38
+ fail ArgumentError, "Can't multiply by #{other.class}"
39
+ end
40
+ end
41
+
42
+ def /(other)
43
+ case other
44
+ when Numeric
45
+ self.class.new(amount / other, unit)
46
+ when self.class
47
+ un = unit / other.unit
48
+ un.scalar? ?
49
+ amount / other.amount :
50
+ self.class.new(amount / other.amount, un)
51
+ else
52
+ fail ArgumentError, "Can't divide by #{other.class}"
53
+ end
54
+ end
55
+
56
+ def **(num)
57
+ (num-1).times.inject(self){|res| res*self}
58
+ end
59
+
60
+ include Comparable
61
+
62
+ def to_s
63
+ '%s%s' % [formatted_amount, unit]
64
+ end
65
+
66
+ def inspect
67
+ "#<%s(%s %s)>" % [self.class, formatted_amount, unit]
68
+ end
69
+
70
+ private
71
+
72
+ def formatted_amount
73
+ # FIXME: really naive
74
+ if amount.abs < 1
75
+ amount.to_f.to_s
76
+ else
77
+ # see http://stackoverflow.com/a/6460145/3683228
78
+ amount.to_i.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
79
+ end
80
+ end
81
+
82
+ def check_compatibility!(other)
83
+ unless other.kind_of?(self.class) && other.unit == unit
84
+ fail ArgumentError, "#{self} incompatible with #{other}"
85
+ end
86
+ end
87
+ end
88
+
89
+ def Reality.Measure(*arg)
90
+ Measure.new(*arg)
91
+ end
92
+ end
@@ -0,0 +1,120 @@
1
+ require 'strscan'
2
+
3
+ module Reality
4
+ class Measure
5
+ class Unit
6
+ @unicode = true
7
+
8
+ class << self
9
+ attr_accessor :unicode
10
+
11
+ UNIT_REGEX = /[a-zA-Z\$]+/ # FIXME: there are many non-ASCII units, especially in money
12
+ POWER_REGEX = /[²³]|\^(\d+)/
13
+ OP_REGEX = /[\/*·]/
14
+
15
+ def parse(str)
16
+ return str if str.kind_of?(Unit)
17
+
18
+ scanner = StringScanner.new(str)
19
+ denom = false
20
+ units = []
21
+
22
+ loop do
23
+ # (variable [power] operator) ....
24
+ unit = scanner.scan(UNIT_REGEX) or fail("Variable expected at #{scanner.rest}")
25
+ pow = scanner.scan(POWER_REGEX)
26
+ units << [unit, parse_pow(pow, denom)]
27
+ break if scanner.eos?
28
+
29
+ op = scanner.scan(OP_REGEX) or fail("Operator expected at #{scanner.rest}")
30
+ if op == '/'
31
+ denom and fail("Second division at #{scanner.rest}")
32
+ denom = true
33
+ end
34
+ end
35
+ new(*units)
36
+ end
37
+
38
+ def parse_pow(p, denom)
39
+ res = case p
40
+ when nil then 1
41
+ when '²' then 2
42
+ when '³' then 3
43
+ when /^\^(\d+)$/ then $1.to_i
44
+ else fail(ArgumentError, "Can't parse power #{p}")
45
+ end
46
+
47
+ denom ? -res : res
48
+ end
49
+ end
50
+
51
+ attr_reader :components
52
+
53
+ def initialize(*components)
54
+ @components = components.
55
+ group_by{|sig, pow| sig}.
56
+ map{|sig, cmps| [sig, cmps.map(&:last).inject(:+)]}.
57
+ reject{|sig, pow| pow.zero?}
58
+ end
59
+
60
+ def ==(other)
61
+ other.class == self.class && other.components == self.components
62
+ end
63
+
64
+ def scalar?
65
+ components.empty?
66
+ end
67
+
68
+ def -@
69
+ self.class.new(*components.map{|sig, pow| [sig, -pow]})
70
+ end
71
+
72
+ def *(other)
73
+ other.class == self.class or
74
+ fail(TypeError, "Can't multiply #{self.class} by #{other.class}")
75
+
76
+ self.class.new(*components, *other.components)
77
+ end
78
+
79
+ def /(other)
80
+ other.class == self.class or
81
+ fail(TypeError, "Can't divide #{self.class} by #{other.class}")
82
+
83
+ self * -other
84
+ end
85
+
86
+ def to_s
87
+ num, denom = components.partition{|sig, pow| pow > 0}
88
+ numerator = num.map{|sig, pow| "#{sig}#{power(pow)}"}.join(mul)
89
+ denominator = denom.map{|sig, pow| "#{sig}#{power(pow)}"}.join(mul)
90
+ case
91
+ when numerator.empty?
92
+ [1, denominator].join('/')
93
+ when denominator.empty?
94
+ numerator
95
+ else
96
+ [numerator, denominator].join('/')
97
+ end
98
+ end
99
+
100
+ private
101
+
102
+ UNICODE_SUPER = {2 => '²', 3 => '³'}
103
+
104
+ def mul
105
+ self.class.unicode ? '·' : '*'
106
+ end
107
+
108
+ def power(num)
109
+ num = num.abs
110
+ case num
111
+ when 0 then fail(ArgumentError, "0-power unit!")
112
+ when 1 then ''
113
+ when 2..3
114
+ self.class.unicode ? UNICODE_SUPER.fetch(num) : "^#{num}"
115
+ else "^#{num}"
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end