reality 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.dokaz +1 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +538 -66
  5. data/bin/reality +9 -0
  6. data/config/demo.yml +3 -0
  7. data/data/wikidata-predicates.json +1 -0
  8. data/data/wikidata-predicates.yaml +2089 -0
  9. data/lib/reality.rb +26 -7
  10. data/lib/reality/config.rb +46 -0
  11. data/lib/reality/definitions/dictionaries.rb +67 -0
  12. data/lib/reality/definitions/helpers.rb +34 -0
  13. data/lib/reality/definitions/wikidata.rb +105 -0
  14. data/lib/reality/definitions/wikipedia_character.rb +17 -0
  15. data/lib/reality/definitions/wikipedia_city.rb +19 -0
  16. data/lib/reality/definitions/wikipedia_continent.rb +21 -0
  17. data/lib/reality/definitions/wikipedia_country.rb +23 -0
  18. data/lib/reality/definitions/wikipedia_musical_artist.rb +15 -0
  19. data/lib/reality/definitions/wikipedia_person.rb +17 -0
  20. data/lib/reality/entity.rb +152 -0
  21. data/lib/reality/entity/coercion.rb +76 -0
  22. data/lib/reality/entity/wikidata_predicates.rb +31 -0
  23. data/lib/reality/entity/wikipedia_type.rb +73 -0
  24. data/lib/reality/extras/geonames.rb +29 -0
  25. data/lib/reality/extras/open_weather_map.rb +63 -0
  26. data/lib/reality/geo.rb +122 -0
  27. data/lib/reality/infoboxer_templates.rb +8 -0
  28. data/lib/reality/list.rb +95 -0
  29. data/lib/reality/measure.rb +18 -12
  30. data/lib/reality/measure/unit.rb +5 -1
  31. data/lib/reality/methods.rb +16 -0
  32. data/lib/reality/pretty_inspect.rb +11 -0
  33. data/lib/reality/refinements.rb +26 -0
  34. data/lib/reality/shortcuts.rb +11 -0
  35. data/lib/reality/tz_offset.rb +64 -0
  36. data/lib/reality/util/formatters.rb +35 -0
  37. data/lib/reality/util/parsers.rb +53 -0
  38. data/lib/reality/version.rb +6 -0
  39. data/lib/reality/wikidata.rb +310 -0
  40. data/reality.gemspec +12 -3
  41. data/script/extract_wikidata_properties.rb +23 -0
  42. data/script/lib/nokogiri_more.rb +175 -0
  43. metadata +137 -7
  44. data/examples/all_countries.rb +0 -16
  45. data/lib/reality/country.rb +0 -283
@@ -3,6 +3,10 @@ module Reality
3
3
  %w[unit].each{|mod| require_relative "measure/#{mod}"}
4
4
 
5
5
  attr_reader :amount, :unit
6
+
7
+ def Measure.coerce(amount, unit)
8
+ amount && unit && new(amount, unit)
9
+ end
6
10
 
7
11
  def initialize(amount, unit)
8
12
  @amount, @unit = Rational(amount), Unit.parse(unit)
@@ -14,6 +18,10 @@ module Reality
14
18
  amount <=> other.amount
15
19
  end
16
20
 
21
+ def ==(other)
22
+ amount == other.amount && unit == other.unit
23
+ end
24
+
17
25
  def -@
18
26
  self.class.new(-amount, unit)
19
27
  end
@@ -60,25 +68,23 @@ module Reality
60
68
  include Comparable
61
69
 
62
70
  def to_s
63
- '%s%s' % [formatted_amount, unit]
71
+ '%s%s' % [Util::Format.number(amount), unit]
64
72
  end
65
73
 
66
- def inspect
67
- "#<%s(%s %s)>" % [self.class, formatted_amount, unit]
74
+ def to_f
75
+ amount.to_f
68
76
  end
69
77
 
70
- private
78
+ def to_i
79
+ amount.to_i
80
+ end
71
81
 
72
- def formatted_amount
73
- # FIXME: really naive
74
- if amount.abs < 1
75
- amount.to_f.to_s
76
- else
77
- # see http://stackoverflow.com/a/6460145/3683228
78
- amount.to_i.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
79
- end
82
+ def inspect
83
+ "#<%s(%s %s)>" % [self.class, Util::Format.number(amount), unit]
80
84
  end
81
85
 
86
+ private
87
+
82
88
  def check_compatibility!(other)
83
89
  unless other.kind_of?(self.class) && other.unit == unit
84
90
  fail ArgumentError, "#{self} incompatible with #{other}"
@@ -8,7 +8,7 @@ module Reality
8
8
  class << self
9
9
  attr_accessor :unicode
10
10
 
11
- UNIT_REGEX = /[a-zA-Z\$]+/ # FIXME: there are many non-ASCII units, especially in money
11
+ UNIT_REGEX = /[^\s\+\*\/\/\^²³·]+/
12
12
  POWER_REGEX = /[²³]|\^(\d+)/
13
13
  OP_REGEX = /[\/*·]/
14
14
 
@@ -97,6 +97,10 @@ module Reality
97
97
  end
98
98
  end
99
99
 
100
+ def inspect
101
+ "#<#{self.class}(#{to_s})>"
102
+ end
103
+
100
104
  private
101
105
 
102
106
  UNICODE_SUPER = {2 => '²', 3 => '³'}
@@ -0,0 +1,16 @@
1
+ require 'forwardable'
2
+
3
+ module Reality
4
+ module Methods
5
+ def Entity(name, entity_class = nil)
6
+ Entity.load(name, entity_class)
7
+ end
8
+
9
+ def List(*names)
10
+ Entity::List.new(*names)
11
+ end
12
+
13
+ extend Forwardable
14
+ def_delegators Dictionaries, :countries, :continents
15
+ end
16
+ end
@@ -0,0 +1,11 @@
1
+ class Date
2
+ def inspect
3
+ strftime('#<Date: %Y-%m-%d>')
4
+ end
5
+ end
6
+
7
+ class Rational
8
+ def inspect
9
+ Reality::Util::Format.number(self)
10
+ end
11
+ end
@@ -0,0 +1,26 @@
1
+ module Reality
2
+ module Refinements
3
+ refine Object do
4
+ def derp
5
+ yield self
6
+ end
7
+ end
8
+
9
+ refine Hash do
10
+ def except(*keys)
11
+ reject { |k, _v| keys.include?(k) }
12
+ end
13
+ end
14
+
15
+ refine Array do
16
+ def group_count(&block)
17
+ block ||= ->(x) { x }
18
+ Hash.new{ 0 }.tap{|res|
19
+ each do |val|
20
+ res[block.call(val)] += 1
21
+ end
22
+ }
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,11 @@
1
+ module Reality
2
+ module Methods
3
+ def E(*arg)
4
+ Entity(*arg)
5
+ end
6
+
7
+ def L(*arg)
8
+ List(*arg)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,64 @@
1
+ module Reality
2
+ class TZOffset
3
+ using Refinements
4
+
5
+ attr_reader :minutes
6
+
7
+ MINUSES = /[−—–]/
8
+
9
+ def self.parse(text)
10
+ text = text.gsub(MINUSES, '-')
11
+
12
+ case text
13
+ when /^[A-Z]{3}$/
14
+ Time.zone_offset(text)
15
+ when /^(?:UTC|GMT)?([+-]\d{1,2}:?\d{2})$/
16
+ Time.zone_offset($1)
17
+ when /^(?:UTC|GMT)?([+-]\d{1,2})/
18
+ $1.to_i * 3600
19
+ end.derp{|sec| sec && new(sec / 60)}
20
+ end
21
+
22
+ def initialize(minutes)
23
+ @minutes = minutes
24
+ end
25
+
26
+ def inspect
27
+ '#<%s(UTC%+03i:%02i)>' % [self.class.name, *minutes.divmod(60)]
28
+ end
29
+
30
+ def to_s
31
+ '%+03i:%02i' % minutes.divmod(60)
32
+ end
33
+
34
+ def <=>(other)
35
+ other.is_a?(TZOffset) or fail ArgumentError, "Can't compare TZOffset with #{other.class}"
36
+ minutes <=> other.minutes
37
+ end
38
+
39
+ include Comparable
40
+
41
+ def now
42
+ convert(Time.now)
43
+ end
44
+
45
+ def local(*values)
46
+ values << 0 until values.count == 6
47
+ Time.new(*values, to_s)
48
+ end
49
+
50
+ # FIXME: usec are lost
51
+ def convert(tm)
52
+ pattern = tm.utc + minutes * 60
53
+ Time.new(
54
+ pattern.year,
55
+ pattern.month,
56
+ pattern.day,
57
+ pattern.hour,
58
+ pattern.min,
59
+ pattern.sec,
60
+ to_s
61
+ )
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,35 @@
1
+ module Reality
2
+ module Util
3
+ module Format
4
+ module_function
5
+
6
+ def number(n)
7
+ case n.abs
8
+ when 0..1
9
+ n.to_f.to_s.sub(/(\.0*[1-9]).*$/, '\1')
10
+ when 1..4
11
+ ('%.2f' % n).sub(/\.?0+$/, '')
12
+ when 1_000..Float::INFINITY
13
+ # see http://stackoverflow.com/a/6460145/3683228
14
+ n.to_i.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
15
+ else
16
+ n.to_i.to_s
17
+ end
18
+ end
19
+
20
+ def describe(title, hash)
21
+ # hash may be an array, in fact :)
22
+ key_width = hash.map(&:first).map(&:length).max.to_i + 1
23
+
24
+ [
25
+ '-' * title.length,
26
+ title,
27
+ '-' * title.length,
28
+ *hash.sort_by(&:first).map{|key, value|
29
+ "#{key.to_s.rjust(key_width)}: #{value}"
30
+ }
31
+ ].join("\n")
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,53 @@
1
+ module Reality
2
+ module Util
3
+ module Parse
4
+ module_function
5
+
6
+ def scaled_number(str)
7
+ match, amount, scale = */^([0-9.,]+)[[:space:]]*(#{SCALES_REGEXP})?/.match(str)
8
+ match or return nil
9
+
10
+ if scale
11
+ number(amount) * fetch_scale(scale)
12
+ else
13
+ number(amount)
14
+ end
15
+ end
16
+
17
+ def number(str)
18
+ str = str.gsub(',', '').tr('−', '-')
19
+ case str
20
+ when /^-?\d+$/
21
+ str.to_i
22
+ when /^-?\d+\.\d+$/
23
+ str.to_f
24
+ else
25
+ nil
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ module_function
32
+
33
+ # See "Short scale": https://en.wikipedia.org/wiki/Long_and_short_scales#Comparison
34
+ SCALES = {
35
+ 'million' => 1_000_000,
36
+ 'billion' => 1_000_000_000,
37
+ 'trillion' => 1_000_000_000_000,
38
+ 'quadrillion' => 1_000_000_000_000_000,
39
+ 'quintillion' => 1_000_000_000_000_000_000,
40
+ 'sextillion' => 1_000_000_000_000_000_000_000,
41
+ 'septillion' => 1_000_000_000_000_000_000_000_000,
42
+ }
43
+ SCALES_REGEXP = Regexp.union(*SCALES.keys)
44
+
45
+ def fetch_scale(str)
46
+ _, res = SCALES.detect{|key, val| str.start_with?(key)}
47
+
48
+ res or fail("Scale not found: #{str} for #{self}")
49
+ end
50
+
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,6 @@
1
+ module Reality
2
+ MAJOR = 0
3
+ MINOR = 0
4
+ PATCH = 3
5
+ VERSION = [MINOR, MAJOR, PATCH].join('.')
6
+ end
@@ -0,0 +1,310 @@
1
+ module Reality
2
+ using Reality::Refinements
3
+
4
+ module Wikidata
5
+ class Link
6
+ attr_reader :id, :label
7
+
8
+ def initialize(id, label = nil)
9
+ @id, @label = id, label
10
+ end
11
+
12
+ def inspect
13
+ "#<#{self.class}(#{[id, label].compact.join(': ')})>"
14
+ end
15
+
16
+ def to_s
17
+ label || id
18
+ end
19
+ end
20
+
21
+ # FIXME: I should be burn in hell for this mess. But it works. Somehow.
22
+ class Entity
23
+ PREFIX = %Q{
24
+ PREFIX wikibase: <http://wikiba.se/ontology#>
25
+ PREFIX wd: <http://www.wikidata.org/entity/>
26
+ PREFIX wdt: <http://www.wikidata.org/prop/direct/>
27
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
28
+ PREFIX p: <http://www.wikidata.org/prop/>
29
+ PREFIX v: <http://www.wikidata.org/prop/statement/>
30
+ PREFIX schema: <http://schema.org/>
31
+ }
32
+
33
+ SINGLE_QUERY = %Q{
34
+ #{PREFIX}
35
+
36
+ SELECT ?id ?p ?o ?oLabel WHERE {
37
+ <https://en.wikipedia.org/wiki/%{title}> schema:about ?id .
38
+ {
39
+ ?id ?p ?o .
40
+ FILTER(STRSTARTS(STR(?p), "http://www.wikidata.org/prop/direct/"))
41
+ } union {
42
+ ?id ?p ?o .
43
+ filter(langMatches(lang(?o), "EN")).
44
+ filter(?p = rdfs:label)
45
+ }
46
+ SERVICE wikibase:label {
47
+ bd:serviceParam wikibase:language "en" .
48
+ }
49
+ }
50
+ }
51
+
52
+ ID_QUERY = %Q{
53
+ #{PREFIX}
54
+
55
+ SELECT ?id ?p ?o ?oLabel WHERE {
56
+ bind(wd:%{id} as ?id)
57
+ {
58
+ ?id ?p ?o .
59
+ FILTER(
60
+ STRSTARTS(STR(?p), "http://www.wikidata.org/prop/direct/") ||
61
+ (?p = rdfs:label && langMatches(lang(?o), "EN"))
62
+ )
63
+ } union {
64
+ bind(schema:about as ?p) .
65
+ ?o schema:about ?id .
66
+ filter(strstarts(str(?o), "https://en.wikipedia.org/wiki/"))
67
+ }
68
+ SERVICE wikibase:label {
69
+ bd:serviceParam wikibase:language "en" .
70
+ }
71
+ }
72
+ }
73
+
74
+ MULTIPLE_QUERY = %Q{
75
+ #{PREFIX}
76
+
77
+ SELECT ?id ?p ?o ?oLabel WHERE {
78
+ %{selectors} .
79
+ {
80
+ ?id ?p ?o .
81
+ FILTER(
82
+ STRSTARTS(STR(?p), "http://www.wikidata.org/prop/direct/") ||
83
+ (?p = rdfs:label && langMatches(lang(?o), "EN"))
84
+ )
85
+ } union {
86
+ bind(schema:about as ?p) .
87
+ ?o schema:about ?id .
88
+ filter(strstarts(str(?o), "https://en.wikipedia.org/wiki/"))
89
+ }
90
+ SERVICE wikibase:label {
91
+ bd:serviceParam wikibase:language "en" .
92
+ }
93
+ }
94
+ }
95
+ MULTIPLE_IDS_QUERY = %Q{
96
+ #{PREFIX}
97
+
98
+ SELECT ?id ?p ?o ?oLabel WHERE {
99
+ %{selectors} .
100
+ {
101
+ ?id ?p ?o .
102
+ FILTER(
103
+ STRSTARTS(STR(?p), "http://www.wikidata.org/prop/direct/") ||
104
+ (?p = rdfs:label && langMatches(lang(?o), "EN"))
105
+ )
106
+ } union {
107
+ bind(schema:about as ?p) .
108
+ ?o schema:about ?id .
109
+ filter(strstarts(str(?o), "https://en.wikipedia.org/wiki/"))
110
+ }
111
+ SERVICE wikibase:label {
112
+ bd:serviceParam wikibase:language "en" .
113
+ }
114
+ }
115
+ }
116
+ SELECTOR = %Q{
117
+ {
118
+ <https://en.wikipedia.org/wiki/%{title}> schema:about ?id
119
+ }
120
+ }
121
+ IDSELECTOR = %Q{
122
+ {
123
+ BIND(wd:%{id} as ?id)
124
+ }
125
+ }
126
+
127
+ UNSAFE = Regexp.union(URI::UNSAFE, /[,()']/)
128
+
129
+ class << self
130
+ def faraday
131
+ @faraday ||= Faraday.new(url: 'https://query.wikidata.org/sparql'){|f|
132
+ f.adapter Faraday.default_adapter
133
+ }
134
+ end
135
+
136
+ def fetch(title)
137
+ title = URI.escape(title, UNSAFE)
138
+ faraday.get('', query: SINGLE_QUERY % {title: title}, format: :json).
139
+ derp{|res| from_sparql(res.body, subject: 'id', predicate: 'p', object: 'o', object_label: 'oLabel')}
140
+ end
141
+
142
+ def fetch_by_id(id)
143
+ faraday.get('', query: ID_QUERY % {id: id}, format: :json).
144
+ derp{|res| from_sparql(res.body, subject: 'id', predicate: 'p', object: 'o', object_label: 'oLabel')}.
145
+ first
146
+ end
147
+
148
+ WIKIURL = 'https://en.wikipedia.org/wiki/%{title}'
149
+
150
+ MAX_SLICE = 20
151
+
152
+ def fetch_list(*titles)
153
+ titles.each_slice(MAX_SLICE).map{|titles_chunk|
154
+ fetch_small_list(*titles_chunk)
155
+ }.inject(:merge)
156
+ end
157
+
158
+ def fetch_list_by_id(*ids)
159
+ ids.each_slice(MAX_SLICE).map{|ids_chunk|
160
+ fetch_small_idlist(*ids_chunk)
161
+ }.inject(:merge)
162
+ end
163
+
164
+ def fetch_small_list(*titles)
165
+ titles.
166
+ map{|t| SELECTOR % {title: URI.escape(t, UNSAFE)}}.
167
+ join(' UNION ').
168
+ derp{|selectors| MULTIPLE_QUERY % {selectors: selectors}}.
169
+ derp{|query|
170
+ faraday.get('', query: query, format: :json)
171
+ }.
172
+ derp{|res|
173
+ from_sparql(
174
+ res.body,
175
+ subject: 'id',
176
+ predicate: 'p',
177
+ object: 'o',
178
+ object_label: 'oLabel')
179
+ }.
180
+ map{|e|
181
+ [e.en_wikipage, e]
182
+ }.to_h
183
+ end
184
+
185
+
186
+ def fetch_small_idlist(*ids)
187
+ ids.
188
+ map{|i| IDSELECTOR % {id: i}}.
189
+ join(' UNION ').
190
+ derp{|selectors| MULTIPLE_IDS_QUERY % {selectors: selectors}}.
191
+ derp{|query|
192
+ faraday.get('', query: query, format: :json)
193
+ }.
194
+ derp{|res|
195
+ from_sparql(
196
+ res.body,
197
+ subject: 'id',
198
+ predicate: 'p',
199
+ object: 'o',
200
+ object_label: 'oLabel')
201
+ }.
202
+ map{|e|
203
+ [e.id, e]
204
+ }.to_h
205
+ end
206
+
207
+ def from_sparql(sparql_json, subject: 'subject', predicate: 'predicate', object: 'object', object_label: 'object_label')
208
+ JSON.parse(sparql_json)['results']['bindings'].map{|row|
209
+ [
210
+ row[subject]['value'].sub('http://www.wikidata.org/entity/', ''),
211
+ row[predicate]['value'].sub('http://www.wikidata.org/prop/direct/', ''),
212
+ row[object].merge('label' => row[object_label]['value'])
213
+ ]
214
+ }.group_by(&:first).
215
+ map{|id, rows|
216
+ new(id, hash_from_predicates(rows))
217
+ }
218
+ end
219
+
220
+ def hash_from_predicates(rows)
221
+ rows.map{|s, p, o| [p, parse_value(o)]}.
222
+ group_by(&:first).map{|p, gs| [p, gs.map(&:last).compact]}.
223
+ to_h
224
+ end
225
+
226
+ def parse_value(hash)
227
+ case hash['type']
228
+ when 'literal'
229
+ parse_literal(hash)
230
+ when 'uri'
231
+ parse_uri(hash)
232
+ when 'bnode'
233
+ nil
234
+ else
235
+ fail ArgumentError, "Unidentifieble datatype: #{hash['type']} in #{hash}"
236
+ end
237
+ end
238
+
239
+ def parse_uri(hash)
240
+ if hash['value'] =~ %r{https?://www\.wikidata\.org/entity/([^/]+)$}
241
+ Link.new($1, hash['label'])
242
+ else
243
+ hash['value']
244
+ end
245
+ end
246
+
247
+ def parse_literal(hash)
248
+ case hash['datatype']
249
+ when 'http://www.w3.org/2001/XMLSchema#decimal'
250
+ hash['value'].to_i
251
+ when 'http://www.w3.org/2001/XMLSchema#dateTime'
252
+ DateTime.parse(hash['value'])
253
+ when 'http://www.opengis.net/ont/geosparql#wktLiteral'
254
+ # TODO: WTF
255
+ if hash['value'] =~ /^\s*point\s*\(\s*([-\d.]+)\s+([-\d.]+)\s*\)\s*$/i
256
+ lat, lng = $1, $2
257
+ Geo::Coord.new(lat.to_f, lng.to_f)
258
+ else
259
+ fail ArgumentError, "Unparseable WKT: #{hash['value']}"
260
+ end
261
+ else
262
+ if hash['xml:lang'] && hash['xml:lang'] != 'en'
263
+ nil
264
+ else
265
+ hash['value']
266
+ end
267
+ end
268
+ end
269
+ end
270
+
271
+ attr_reader :id, :predicates
272
+
273
+ def initialize(id, predicates)
274
+ @id, @predicates = id, predicates
275
+ end
276
+
277
+ def [](pred)
278
+ @predicates[pred]
279
+ end
280
+
281
+ def label
282
+ self['http://www.w3.org/2000/01/rdf-schema#label'].first
283
+ end
284
+
285
+ def about
286
+ self['http://schema.org/about']
287
+ end
288
+
289
+ def en_wikipage
290
+ return nil unless about
291
+
292
+ name = about.first.
293
+ scan(%r{https://en\.wikipedia\.org/wiki/(.+)$}).
294
+ flatten.first.derp{|s| URI.unescape(s)}
295
+ end
296
+
297
+ def inspect
298
+ "#<#{self.class}(#{[id, label].compact.join(': ')})>"
299
+ end
300
+
301
+ def to_s
302
+ label || id
303
+ end
304
+
305
+ def to_h
306
+ @predicates
307
+ end
308
+ end
309
+ end
310
+ end