reality 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.dokaz +1 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +538 -66
  5. data/bin/reality +9 -0
  6. data/config/demo.yml +3 -0
  7. data/data/wikidata-predicates.json +1 -0
  8. data/data/wikidata-predicates.yaml +2089 -0
  9. data/lib/reality.rb +26 -7
  10. data/lib/reality/config.rb +46 -0
  11. data/lib/reality/definitions/dictionaries.rb +67 -0
  12. data/lib/reality/definitions/helpers.rb +34 -0
  13. data/lib/reality/definitions/wikidata.rb +105 -0
  14. data/lib/reality/definitions/wikipedia_character.rb +17 -0
  15. data/lib/reality/definitions/wikipedia_city.rb +19 -0
  16. data/lib/reality/definitions/wikipedia_continent.rb +21 -0
  17. data/lib/reality/definitions/wikipedia_country.rb +23 -0
  18. data/lib/reality/definitions/wikipedia_musical_artist.rb +15 -0
  19. data/lib/reality/definitions/wikipedia_person.rb +17 -0
  20. data/lib/reality/entity.rb +152 -0
  21. data/lib/reality/entity/coercion.rb +76 -0
  22. data/lib/reality/entity/wikidata_predicates.rb +31 -0
  23. data/lib/reality/entity/wikipedia_type.rb +73 -0
  24. data/lib/reality/extras/geonames.rb +29 -0
  25. data/lib/reality/extras/open_weather_map.rb +63 -0
  26. data/lib/reality/geo.rb +122 -0
  27. data/lib/reality/infoboxer_templates.rb +8 -0
  28. data/lib/reality/list.rb +95 -0
  29. data/lib/reality/measure.rb +18 -12
  30. data/lib/reality/measure/unit.rb +5 -1
  31. data/lib/reality/methods.rb +16 -0
  32. data/lib/reality/pretty_inspect.rb +11 -0
  33. data/lib/reality/refinements.rb +26 -0
  34. data/lib/reality/shortcuts.rb +11 -0
  35. data/lib/reality/tz_offset.rb +64 -0
  36. data/lib/reality/util/formatters.rb +35 -0
  37. data/lib/reality/util/parsers.rb +53 -0
  38. data/lib/reality/version.rb +6 -0
  39. data/lib/reality/wikidata.rb +310 -0
  40. data/reality.gemspec +12 -3
  41. data/script/extract_wikidata_properties.rb +23 -0
  42. data/script/lib/nokogiri_more.rb +175 -0
  43. metadata +137 -7
  44. data/examples/all_countries.rb +0 -16
  45. data/lib/reality/country.rb +0 -283
@@ -3,6 +3,10 @@ module Reality
3
3
  %w[unit].each{|mod| require_relative "measure/#{mod}"}
4
4
 
5
5
  attr_reader :amount, :unit
6
+
7
+ def Measure.coerce(amount, unit)
8
+ amount && unit && new(amount, unit)
9
+ end
6
10
 
7
11
  def initialize(amount, unit)
8
12
  @amount, @unit = Rational(amount), Unit.parse(unit)
@@ -14,6 +18,10 @@ module Reality
14
18
  amount <=> other.amount
15
19
  end
16
20
 
21
+ def ==(other)
22
+ amount == other.amount && unit == other.unit
23
+ end
24
+
17
25
  def -@
18
26
  self.class.new(-amount, unit)
19
27
  end
@@ -60,25 +68,23 @@ module Reality
60
68
  include Comparable
61
69
 
62
70
  def to_s
63
- '%s%s' % [formatted_amount, unit]
71
+ '%s%s' % [Util::Format.number(amount), unit]
64
72
  end
65
73
 
66
- def inspect
67
- "#<%s(%s %s)>" % [self.class, formatted_amount, unit]
74
+ def to_f
75
+ amount.to_f
68
76
  end
69
77
 
70
- private
78
+ def to_i
79
+ amount.to_i
80
+ end
71
81
 
72
- def formatted_amount
73
- # FIXME: really naive
74
- if amount.abs < 1
75
- amount.to_f.to_s
76
- else
77
- # see http://stackoverflow.com/a/6460145/3683228
78
- amount.to_i.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
79
- end
82
+ def inspect
83
+ "#<%s(%s %s)>" % [self.class, Util::Format.number(amount), unit]
80
84
  end
81
85
 
86
+ private
87
+
82
88
  def check_compatibility!(other)
83
89
  unless other.kind_of?(self.class) && other.unit == unit
84
90
  fail ArgumentError, "#{self} incompatible with #{other}"
@@ -8,7 +8,7 @@ module Reality
8
8
  class << self
9
9
  attr_accessor :unicode
10
10
 
11
- UNIT_REGEX = /[a-zA-Z\$]+/ # FIXME: there are many non-ASCII units, especially in money
11
+ UNIT_REGEX = /[^\s\+\*\/\/\^²³·]+/
12
12
  POWER_REGEX = /[²³]|\^(\d+)/
13
13
  OP_REGEX = /[\/*·]/
14
14
 
@@ -97,6 +97,10 @@ module Reality
97
97
  end
98
98
  end
99
99
 
100
+ def inspect
101
+ "#<#{self.class}(#{to_s})>"
102
+ end
103
+
100
104
  private
101
105
 
102
106
  UNICODE_SUPER = {2 => '²', 3 => '³'}
@@ -0,0 +1,16 @@
1
+ require 'forwardable'
2
+
3
+ module Reality
4
+ module Methods
5
+ def Entity(name, entity_class = nil)
6
+ Entity.load(name, entity_class)
7
+ end
8
+
9
+ def List(*names)
10
+ Entity::List.new(*names)
11
+ end
12
+
13
+ extend Forwardable
14
+ def_delegators Dictionaries, :countries, :continents
15
+ end
16
+ end
@@ -0,0 +1,11 @@
1
+ class Date
2
+ def inspect
3
+ strftime('#<Date: %Y-%m-%d>')
4
+ end
5
+ end
6
+
7
+ class Rational
8
+ def inspect
9
+ Reality::Util::Format.number(self)
10
+ end
11
+ end
@@ -0,0 +1,26 @@
1
+ module Reality
2
+ module Refinements
3
+ refine Object do
4
+ def derp
5
+ yield self
6
+ end
7
+ end
8
+
9
+ refine Hash do
10
+ def except(*keys)
11
+ reject { |k, _v| keys.include?(k) }
12
+ end
13
+ end
14
+
15
+ refine Array do
16
+ def group_count(&block)
17
+ block ||= ->(x) { x }
18
+ Hash.new{ 0 }.tap{|res|
19
+ each do |val|
20
+ res[block.call(val)] += 1
21
+ end
22
+ }
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,11 @@
1
+ module Reality
2
+ module Methods
3
+ def E(*arg)
4
+ Entity(*arg)
5
+ end
6
+
7
+ def L(*arg)
8
+ List(*arg)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,64 @@
1
+ module Reality
2
+ class TZOffset
3
+ using Refinements
4
+
5
+ attr_reader :minutes
6
+
7
+ MINUSES = /[−—–]/
8
+
9
+ def self.parse(text)
10
+ text = text.gsub(MINUSES, '-')
11
+
12
+ case text
13
+ when /^[A-Z]{3}$/
14
+ Time.zone_offset(text)
15
+ when /^(?:UTC|GMT)?([+-]\d{1,2}:?\d{2})$/
16
+ Time.zone_offset($1)
17
+ when /^(?:UTC|GMT)?([+-]\d{1,2})/
18
+ $1.to_i * 3600
19
+ end.derp{|sec| sec && new(sec / 60)}
20
+ end
21
+
22
+ def initialize(minutes)
23
+ @minutes = minutes
24
+ end
25
+
26
+ def inspect
27
+ '#<%s(UTC%+03i:%02i)>' % [self.class.name, *minutes.divmod(60)]
28
+ end
29
+
30
+ def to_s
31
+ '%+03i:%02i' % minutes.divmod(60)
32
+ end
33
+
34
+ def <=>(other)
35
+ other.is_a?(TZOffset) or fail ArgumentError, "Can't compare TZOffset with #{other.class}"
36
+ minutes <=> other.minutes
37
+ end
38
+
39
+ include Comparable
40
+
41
+ def now
42
+ convert(Time.now)
43
+ end
44
+
45
+ def local(*values)
46
+ values << 0 until values.count == 6
47
+ Time.new(*values, to_s)
48
+ end
49
+
50
+ # FIXME: usec are lost
51
+ def convert(tm)
52
+ pattern = tm.utc + minutes * 60
53
+ Time.new(
54
+ pattern.year,
55
+ pattern.month,
56
+ pattern.day,
57
+ pattern.hour,
58
+ pattern.min,
59
+ pattern.sec,
60
+ to_s
61
+ )
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,35 @@
1
+ module Reality
2
+ module Util
3
+ module Format
4
+ module_function
5
+
6
+ def number(n)
7
+ case n.abs
8
+ when 0..1
9
+ n.to_f.to_s.sub(/(\.0*[1-9]).*$/, '\1')
10
+ when 1..4
11
+ ('%.2f' % n).sub(/\.?0+$/, '')
12
+ when 1_000..Float::INFINITY
13
+ # see http://stackoverflow.com/a/6460145/3683228
14
+ n.to_i.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
15
+ else
16
+ n.to_i.to_s
17
+ end
18
+ end
19
+
20
+ def describe(title, hash)
21
+ # hash may be an array, in fact :)
22
+ key_width = hash.map(&:first).map(&:length).max.to_i + 1
23
+
24
+ [
25
+ '-' * title.length,
26
+ title,
27
+ '-' * title.length,
28
+ *hash.sort_by(&:first).map{|key, value|
29
+ "#{key.to_s.rjust(key_width)}: #{value}"
30
+ }
31
+ ].join("\n")
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,53 @@
1
+ module Reality
2
+ module Util
3
+ module Parse
4
+ module_function
5
+
6
+ def scaled_number(str)
7
+ match, amount, scale = */^([0-9.,]+)[[:space:]]*(#{SCALES_REGEXP})?/.match(str)
8
+ match or return nil
9
+
10
+ if scale
11
+ number(amount) * fetch_scale(scale)
12
+ else
13
+ number(amount)
14
+ end
15
+ end
16
+
17
+ def number(str)
18
+ str = str.gsub(',', '').tr('−', '-')
19
+ case str
20
+ when /^-?\d+$/
21
+ str.to_i
22
+ when /^-?\d+\.\d+$/
23
+ str.to_f
24
+ else
25
+ nil
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ module_function
32
+
33
+ # See "Short scale": https://en.wikipedia.org/wiki/Long_and_short_scales#Comparison
34
+ SCALES = {
35
+ 'million' => 1_000_000,
36
+ 'billion' => 1_000_000_000,
37
+ 'trillion' => 1_000_000_000_000,
38
+ 'quadrillion' => 1_000_000_000_000_000,
39
+ 'quintillion' => 1_000_000_000_000_000_000,
40
+ 'sextillion' => 1_000_000_000_000_000_000_000,
41
+ 'septillion' => 1_000_000_000_000_000_000_000_000,
42
+ }
43
+ SCALES_REGEXP = Regexp.union(*SCALES.keys)
44
+
45
+ def fetch_scale(str)
46
+ _, res = SCALES.detect{|key, val| str.start_with?(key)}
47
+
48
+ res or fail("Scale not found: #{str} for #{self}")
49
+ end
50
+
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,6 @@
1
+ module Reality
2
+ MAJOR = 0
3
+ MINOR = 0
4
+ PATCH = 3
5
+ VERSION = [MINOR, MAJOR, PATCH].join('.')
6
+ end
@@ -0,0 +1,310 @@
1
+ module Reality
2
+ using Reality::Refinements
3
+
4
+ module Wikidata
5
+ class Link
6
+ attr_reader :id, :label
7
+
8
+ def initialize(id, label = nil)
9
+ @id, @label = id, label
10
+ end
11
+
12
+ def inspect
13
+ "#<#{self.class}(#{[id, label].compact.join(': ')})>"
14
+ end
15
+
16
+ def to_s
17
+ label || id
18
+ end
19
+ end
20
+
21
+ # FIXME: I should be burn in hell for this mess. But it works. Somehow.
22
+ class Entity
23
+ PREFIX = %Q{
24
+ PREFIX wikibase: <http://wikiba.se/ontology#>
25
+ PREFIX wd: <http://www.wikidata.org/entity/>
26
+ PREFIX wdt: <http://www.wikidata.org/prop/direct/>
27
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
28
+ PREFIX p: <http://www.wikidata.org/prop/>
29
+ PREFIX v: <http://www.wikidata.org/prop/statement/>
30
+ PREFIX schema: <http://schema.org/>
31
+ }
32
+
33
+ SINGLE_QUERY = %Q{
34
+ #{PREFIX}
35
+
36
+ SELECT ?id ?p ?o ?oLabel WHERE {
37
+ <https://en.wikipedia.org/wiki/%{title}> schema:about ?id .
38
+ {
39
+ ?id ?p ?o .
40
+ FILTER(STRSTARTS(STR(?p), "http://www.wikidata.org/prop/direct/"))
41
+ } union {
42
+ ?id ?p ?o .
43
+ filter(langMatches(lang(?o), "EN")).
44
+ filter(?p = rdfs:label)
45
+ }
46
+ SERVICE wikibase:label {
47
+ bd:serviceParam wikibase:language "en" .
48
+ }
49
+ }
50
+ }
51
+
52
+ ID_QUERY = %Q{
53
+ #{PREFIX}
54
+
55
+ SELECT ?id ?p ?o ?oLabel WHERE {
56
+ bind(wd:%{id} as ?id)
57
+ {
58
+ ?id ?p ?o .
59
+ FILTER(
60
+ STRSTARTS(STR(?p), "http://www.wikidata.org/prop/direct/") ||
61
+ (?p = rdfs:label && langMatches(lang(?o), "EN"))
62
+ )
63
+ } union {
64
+ bind(schema:about as ?p) .
65
+ ?o schema:about ?id .
66
+ filter(strstarts(str(?o), "https://en.wikipedia.org/wiki/"))
67
+ }
68
+ SERVICE wikibase:label {
69
+ bd:serviceParam wikibase:language "en" .
70
+ }
71
+ }
72
+ }
73
+
74
+ MULTIPLE_QUERY = %Q{
75
+ #{PREFIX}
76
+
77
+ SELECT ?id ?p ?o ?oLabel WHERE {
78
+ %{selectors} .
79
+ {
80
+ ?id ?p ?o .
81
+ FILTER(
82
+ STRSTARTS(STR(?p), "http://www.wikidata.org/prop/direct/") ||
83
+ (?p = rdfs:label && langMatches(lang(?o), "EN"))
84
+ )
85
+ } union {
86
+ bind(schema:about as ?p) .
87
+ ?o schema:about ?id .
88
+ filter(strstarts(str(?o), "https://en.wikipedia.org/wiki/"))
89
+ }
90
+ SERVICE wikibase:label {
91
+ bd:serviceParam wikibase:language "en" .
92
+ }
93
+ }
94
+ }
95
+ MULTIPLE_IDS_QUERY = %Q{
96
+ #{PREFIX}
97
+
98
+ SELECT ?id ?p ?o ?oLabel WHERE {
99
+ %{selectors} .
100
+ {
101
+ ?id ?p ?o .
102
+ FILTER(
103
+ STRSTARTS(STR(?p), "http://www.wikidata.org/prop/direct/") ||
104
+ (?p = rdfs:label && langMatches(lang(?o), "EN"))
105
+ )
106
+ } union {
107
+ bind(schema:about as ?p) .
108
+ ?o schema:about ?id .
109
+ filter(strstarts(str(?o), "https://en.wikipedia.org/wiki/"))
110
+ }
111
+ SERVICE wikibase:label {
112
+ bd:serviceParam wikibase:language "en" .
113
+ }
114
+ }
115
+ }
116
+ SELECTOR = %Q{
117
+ {
118
+ <https://en.wikipedia.org/wiki/%{title}> schema:about ?id
119
+ }
120
+ }
121
+ IDSELECTOR = %Q{
122
+ {
123
+ BIND(wd:%{id} as ?id)
124
+ }
125
+ }
126
+
127
+ UNSAFE = Regexp.union(URI::UNSAFE, /[,()']/)
128
+
129
+ class << self
130
+ def faraday
131
+ @faraday ||= Faraday.new(url: 'https://query.wikidata.org/sparql'){|f|
132
+ f.adapter Faraday.default_adapter
133
+ }
134
+ end
135
+
136
+ def fetch(title)
137
+ title = URI.escape(title, UNSAFE)
138
+ faraday.get('', query: SINGLE_QUERY % {title: title}, format: :json).
139
+ derp{|res| from_sparql(res.body, subject: 'id', predicate: 'p', object: 'o', object_label: 'oLabel')}
140
+ end
141
+
142
+ def fetch_by_id(id)
143
+ faraday.get('', query: ID_QUERY % {id: id}, format: :json).
144
+ derp{|res| from_sparql(res.body, subject: 'id', predicate: 'p', object: 'o', object_label: 'oLabel')}.
145
+ first
146
+ end
147
+
148
+ WIKIURL = 'https://en.wikipedia.org/wiki/%{title}'
149
+
150
+ MAX_SLICE = 20
151
+
152
+ def fetch_list(*titles)
153
+ titles.each_slice(MAX_SLICE).map{|titles_chunk|
154
+ fetch_small_list(*titles_chunk)
155
+ }.inject(:merge)
156
+ end
157
+
158
+ def fetch_list_by_id(*ids)
159
+ ids.each_slice(MAX_SLICE).map{|ids_chunk|
160
+ fetch_small_idlist(*ids_chunk)
161
+ }.inject(:merge)
162
+ end
163
+
164
+ def fetch_small_list(*titles)
165
+ titles.
166
+ map{|t| SELECTOR % {title: URI.escape(t, UNSAFE)}}.
167
+ join(' UNION ').
168
+ derp{|selectors| MULTIPLE_QUERY % {selectors: selectors}}.
169
+ derp{|query|
170
+ faraday.get('', query: query, format: :json)
171
+ }.
172
+ derp{|res|
173
+ from_sparql(
174
+ res.body,
175
+ subject: 'id',
176
+ predicate: 'p',
177
+ object: 'o',
178
+ object_label: 'oLabel')
179
+ }.
180
+ map{|e|
181
+ [e.en_wikipage, e]
182
+ }.to_h
183
+ end
184
+
185
+
186
+ def fetch_small_idlist(*ids)
187
+ ids.
188
+ map{|i| IDSELECTOR % {id: i}}.
189
+ join(' UNION ').
190
+ derp{|selectors| MULTIPLE_IDS_QUERY % {selectors: selectors}}.
191
+ derp{|query|
192
+ faraday.get('', query: query, format: :json)
193
+ }.
194
+ derp{|res|
195
+ from_sparql(
196
+ res.body,
197
+ subject: 'id',
198
+ predicate: 'p',
199
+ object: 'o',
200
+ object_label: 'oLabel')
201
+ }.
202
+ map{|e|
203
+ [e.id, e]
204
+ }.to_h
205
+ end
206
+
207
+ def from_sparql(sparql_json, subject: 'subject', predicate: 'predicate', object: 'object', object_label: 'object_label')
208
+ JSON.parse(sparql_json)['results']['bindings'].map{|row|
209
+ [
210
+ row[subject]['value'].sub('http://www.wikidata.org/entity/', ''),
211
+ row[predicate]['value'].sub('http://www.wikidata.org/prop/direct/', ''),
212
+ row[object].merge('label' => row[object_label]['value'])
213
+ ]
214
+ }.group_by(&:first).
215
+ map{|id, rows|
216
+ new(id, hash_from_predicates(rows))
217
+ }
218
+ end
219
+
220
+ def hash_from_predicates(rows)
221
+ rows.map{|s, p, o| [p, parse_value(o)]}.
222
+ group_by(&:first).map{|p, gs| [p, gs.map(&:last).compact]}.
223
+ to_h
224
+ end
225
+
226
+ def parse_value(hash)
227
+ case hash['type']
228
+ when 'literal'
229
+ parse_literal(hash)
230
+ when 'uri'
231
+ parse_uri(hash)
232
+ when 'bnode'
233
+ nil
234
+ else
235
+ fail ArgumentError, "Unidentifieble datatype: #{hash['type']} in #{hash}"
236
+ end
237
+ end
238
+
239
+ def parse_uri(hash)
240
+ if hash['value'] =~ %r{https?://www\.wikidata\.org/entity/([^/]+)$}
241
+ Link.new($1, hash['label'])
242
+ else
243
+ hash['value']
244
+ end
245
+ end
246
+
247
+ def parse_literal(hash)
248
+ case hash['datatype']
249
+ when 'http://www.w3.org/2001/XMLSchema#decimal'
250
+ hash['value'].to_i
251
+ when 'http://www.w3.org/2001/XMLSchema#dateTime'
252
+ DateTime.parse(hash['value'])
253
+ when 'http://www.opengis.net/ont/geosparql#wktLiteral'
254
+ # TODO: WTF
255
+ if hash['value'] =~ /^\s*point\s*\(\s*([-\d.]+)\s+([-\d.]+)\s*\)\s*$/i
256
+ lat, lng = $1, $2
257
+ Geo::Coord.new(lat.to_f, lng.to_f)
258
+ else
259
+ fail ArgumentError, "Unparseable WKT: #{hash['value']}"
260
+ end
261
+ else
262
+ if hash['xml:lang'] && hash['xml:lang'] != 'en'
263
+ nil
264
+ else
265
+ hash['value']
266
+ end
267
+ end
268
+ end
269
+ end
270
+
271
+ attr_reader :id, :predicates
272
+
273
+ def initialize(id, predicates)
274
+ @id, @predicates = id, predicates
275
+ end
276
+
277
+ def [](pred)
278
+ @predicates[pred]
279
+ end
280
+
281
+ def label
282
+ self['http://www.w3.org/2000/01/rdf-schema#label'].first
283
+ end
284
+
285
+ def about
286
+ self['http://schema.org/about']
287
+ end
288
+
289
+ def en_wikipage
290
+ return nil unless about
291
+
292
+ name = about.first.
293
+ scan(%r{https://en\.wikipedia\.org/wiki/(.+)$}).
294
+ flatten.first.derp{|s| URI.unescape(s)}
295
+ end
296
+
297
+ def inspect
298
+ "#<#{self.class}(#{[id, label].compact.join(': ')})>"
299
+ end
300
+
301
+ def to_s
302
+ label || id
303
+ end
304
+
305
+ def to_h
306
+ @predicates
307
+ end
308
+ end
309
+ end
310
+ end