gimme_wikidata 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,157 @@
1
+ module GimmeWikidata
2
+
3
+ ##
4
+ # Models an Entity on Wikidata.
5
+ #
6
+ # An Entity is suclassed into Item and Property.
7
+ #
8
+ # An Entity has to have a valid Wikidata id, but the other class data is optional
9
+ #
10
+ # See: https://www.wikidata.org/wiki/Wikidata:Glossary#Entities.2C_items.2C_properties_and_queries
11
+ class Entity
12
+
13
+ ##
14
+ # The Wikidata id of the Entity in the form 'PN' or 'QN' where 'P' means Property and 'Q' means Item. N can be any positive integer.
15
+ attr_reader :id
16
+ ##
17
+ # The Entity's label (in the language specified by WikidataAPI). Can be nil in the case of an unresolved Entity.
18
+ attr_accessor :label
19
+ ##
20
+ # The Entity's description (in the language specified by WikidataAPI). Can be nil in the case of an unresolved Entity.
21
+ attr_accessor :description
22
+ ##
23
+ # The Entity's aliases (in the language specified by WikidataAPI). Can be an empty array in the case of an unresolved Entity.
24
+ attr_accessor :aliases
25
+ ##
26
+ # Each Entity has a number of claims, stored as an array of Claim objects.
27
+ attr_accessor :claims
28
+
29
+ def initialize(id, label = nil, description = nil, aliases = nil, claims = [])
30
+ throw ArgumentError.new "Invalid Wikidata Entity id: #{id}" unless GimmeWikidata.valid_id? id
31
+ @id = id
32
+ @label = label
33
+ @description = description
34
+ @aliases = aliases
35
+ @claims = claims
36
+ @claims = [] if @claims.nil?
37
+ end
38
+
39
+ ##
40
+ # Does this Entity have a fingerprint (an +id+, +title+ and +description+)
41
+ #
42
+ # Returns:
43
+ # - boolean
44
+ def has_fingerprint?
45
+ !(@id.nil? || @label.nil? || @description.nil?)
46
+ end
47
+
48
+ ##
49
+ # Does this Entity have any claims?
50
+ #
51
+ # Returns:
52
+ # - boolean
53
+ def has_claims?
54
+ !(@claims.empty?)
55
+ end
56
+
57
+ ##
58
+ # Get all the Claims which have Properties with the passed id
59
+ def claims_with_property_id(id)
60
+ raise ArgumentError.new 'Invaild Wikidata Id' unless GimmeWikidata.valid_id? id
61
+ claims = []
62
+ @claims.each do |c|
63
+ claims << c if c.property.id == id
64
+ end
65
+ claims
66
+ end
67
+
68
+ ##
69
+ # Get more data on an Entity's Properties
70
+ #
71
+ # Entities have Properties (through Claims), which are by default only returned as ids. This function gets more details for each of these.
72
+ #
73
+ # * *Parameters:*
74
+ # - +props+ -> The Props to fetch. Defaults to +Props::LABELS+.
75
+ # * *Returns:*
76
+ # - +nil+
77
+ # * *Raises:*
78
+ # - +StandardError+ -> if there was an error in fetching the data
79
+ def resolve_properties(props = [Props::LABELS])
80
+ return nil unless has_claims?
81
+ unique_property_ids = (claims.map {|c| c.property.id}).uniq
82
+ # Get only the labels for the Entity's Properties from the Wikidata API
83
+ response = GimmeWikidata::fetch(unique_property_ids, props: [Props::LABELS])
84
+ raise StandardError.new "Could not resolve Entity (#{@id} - #{@label}) properties" unless response.was_successful?
85
+ response.entities.each do |property_details|
86
+ claims = claims_with_property_id(property_details.id)
87
+ claims.map {|c| c.property.resolve_with(property_details)}
88
+ end
89
+ return nil
90
+ end
91
+
92
+ ##
93
+ # Resolves the Claims that refer to Entities
94
+ #
95
+ # Claims often refer to another entity on Wikidata. By default, we will only know the ids of these with one fetch Entity call. This therefore gets more data regarding these.
96
+ #
97
+ # * *Parameters:*
98
+ # - +props+ -> The Props to fetch. Defaults to +Props::LABELS+
99
+ def resolve_claims(props = [Props::LABELS])
100
+ return unless has_claims?
101
+ item_claims = get_claims_by_value_type(:item)
102
+ item_ids = item_claims.map {|c| c.value.id }
103
+ entity_result = GimmeWikidata.fetch(item_ids, props: props)
104
+ entity_result.entities.each do |item_details|
105
+ item_index = item_ids.index(item_details.id)
106
+ item_to_resolve = item_claims[item_index].value
107
+ item_to_resolve.resolve_with(item_details)
108
+ end
109
+ end
110
+
111
+ ##
112
+ # Get all Claims that have a specific +value_type+ symbol
113
+ #
114
+ # * *Parameters:*
115
+ # - +type+ -> Symbol value of the Claim's +value_type+ attribute
116
+ # * *Returns:*
117
+ # - An Array of Claims
118
+ def get_claims_by_value_type(type)
119
+ return [] unless has_claims?
120
+ claims = []
121
+ @claims.each do |c|
122
+ claims << c if c.value_type == type.to_sym
123
+ end
124
+ return claims
125
+ end
126
+
127
+ ##
128
+ # Returns a simplified version of an Entity's Claims
129
+ # * *Returns:*
130
+ # - Array of hashes representing simplified Claims. See Claim.simplify()
131
+ def simple_claims
132
+ claims.map {|c| c.simplify }
133
+ end
134
+
135
+ ##
136
+ # Resolves an incomplete Entity with additional details
137
+ #
138
+ # * *Parameters:*
139
+ # - +entity_details+ -> Either an Item or an Property
140
+ # * *Returns:*
141
+ # * *Raises:*
142
+ # - +ArgumentError+ -> if attempting to resolve an Item with a Property, or a Property with an Item
143
+ # - +StandardError+ -> if attempting to resolve an Entity with details that have unequal ids
144
+ def resolve_with(entity_details)
145
+ raise ArgumentError.new "Attempting to resolve an Item with a Property or vice versa" if entity_details.id[0] != @id[0]
146
+ raise StandardError.new "Attempting to resolve Entity with id #{@id} with entity_details with id #{entity_details.id}" unless @id == entity_details.id
147
+ #TODO: Wouldn't it be easier to simply overwrite the Entity? (self = entity_details?)
148
+ @label = entity_details.label
149
+ @description = entity_details.description
150
+ @aliases = entity_details.aliases
151
+ @claims = entity_details.claims
152
+ @claims = [] if @claims.nil?
153
+ end
154
+
155
+ end
156
+
157
+ end
@@ -0,0 +1,37 @@
1
+ module GimmeWikidata
2
+
3
+ ##
4
+ # Models a parsed response from a get entities query to the Wikidata API
5
+ class EntityResult
6
+
7
+ attr_reader :success, :error, :entities
8
+
9
+ def initialize(success, error = nil, entities = [])
10
+ @error = error
11
+ @success = @error.nil? ? (success == 1) : false
12
+ @entities = entities
13
+ end
14
+
15
+ def was_successful?
16
+ @success
17
+ end
18
+
19
+ def empty?
20
+ @entities.empty?
21
+ end
22
+
23
+ ##
24
+ # TODO: DOCUMENT!
25
+ def resolve_all_properties
26
+ entities.each {|e| e.resolve_properties }
27
+ end
28
+
29
+ ##
30
+ # TODO: DOCUMENT!
31
+ def resolve_all_claims
32
+ entities.each {|e| e.resolve_claims }
33
+ end
34
+
35
+ end
36
+
37
+ end
@@ -0,0 +1,61 @@
1
+ require 'ruby-enum'
2
+
3
+ module GimmeWikidata
4
+
5
+ ##
6
+ # The languages possible when communicating with the Wikidata API
7
+ #
8
+ # Currently available values:
9
+ # - Languages::ENGLISH = 'en'
10
+ # - Languages::GERMAN = 'de'
11
+ #
12
+ # See: https://www.wikidata.org/w/api.php?action=help&modules=wbgetentities for a list of the supported languages
13
+ class Languages
14
+ include Ruby::Enum
15
+
16
+ define :ENGLISH, 'en'
17
+ define :GERMAN, 'de'
18
+ # etc...
19
+
20
+ end
21
+
22
+ ##
23
+ # Models an enum of the 'action' parameters in Wikidata API calls
24
+ #
25
+ # Available values:
26
+ # - +Actions::SEARCH+ = 'wbsearchentities
27
+ # - +Actions::GET_ENTITIES+ = 'wbgetentities
28
+ # See https://www.wikidata.org/w/api.php?action=help&modules=main
29
+ class Actions
30
+ include Ruby::Enum
31
+
32
+ define :SEARCH, 'wbsearchentities'
33
+ define :GET_ENTITIES, 'wbgetentities'
34
+ end
35
+
36
+ ##
37
+ # Models an enum of the 'props' arguments in wbgetentity calls.
38
+ #
39
+ # Available values
40
+ # - +Props::INFO+ = 'info'
41
+ # - +Props::SITELINKS+ = 'sitelinks'
42
+ # - +Props::ALIASES+ = 'aliases'
43
+ # - +Props::LABELS+ = 'labels'
44
+ # - +Props::DESCRIPTIONS+ = 'descriptions'
45
+ # - +Props::CLAIMS+ = 'claims'
46
+ # - +Props::DATATYPE+ = 'datatype'
47
+ #
48
+ # See https://www.wikidata.org/w/api.php?action=help&modules=wbgetentities
49
+ class Props
50
+ include Ruby::Enum
51
+
52
+ define :INFO, 'info'
53
+ define :SITELINKS, 'sitelinks'
54
+ define :ALIASES, 'aliases'
55
+ define :LABELS, 'labels'
56
+ define :DESCRIPTIONS, 'descriptions'
57
+ define :CLAIMS, 'claims'
58
+ define :DATATYPE, 'datatype'
59
+ end
60
+
61
+ end
@@ -0,0 +1,42 @@
1
+ ##
2
+ # Class to deep symbolize keys in a Hash and Array of Hashes
3
+ #
4
+ # Code written by EdvardM at: http://apidock.com/rails/Hash/deep_symbolize_keys
5
+ module SymbolizeHelper
6
+ extend self
7
+
8
+ def symbolize_recursive(hash)
9
+ {}.tap do |h|
10
+ hash.each { |key, value| h[key.to_sym] = transform(value) }
11
+ end
12
+ end
13
+
14
+ private
15
+
16
+ def transform(thing)
17
+ case thing
18
+ when Hash; symbolize_recursive(thing)
19
+ when Array; thing.map { |v| transform(v) }
20
+ else; thing
21
+ end
22
+ end
23
+
24
+ refine Hash do
25
+ def deep_symbolize_keys
26
+ SymbolizeHelper.symbolize_recursive(self)
27
+ end
28
+ end
29
+ end
30
+
31
+ class Array
32
+ ##
33
+ # Merges an Array of Hases into a single Hash, keeping duplicate values in an Array
34
+ #
35
+ # Raises StandardError if the Array is not an Array of Hashes, exclusively
36
+ def merge_hashes
37
+ raise StandardError.new "Array is not an Array of Hashes" unless self.all? {|e| e.is_a? Hash}
38
+ self.each_with_object({}) do |el, h|
39
+ el.each { |k, v| h[k] = h[k] ? [*h[k]] << v : v }
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,18 @@
1
+ require 'gimme_wikidata/entity'
2
+
3
+ module GimmeWikidata
4
+
5
+ ##
6
+ # Models an Item on Wikidata, which is a "real-world object, concept, event"
7
+ #
8
+ # Please see: https://www.wikidata.org/wiki/Wikidata:Glossary#Entities.2C_items.2C_properties_and_queries for more details
9
+ class Item < Entity
10
+
11
+ def initialize(id, label = nil, description = nil, aliases = nil, claims = [])
12
+ throw ArgumentError.new "Invalid Wikidata Item id: #{id}" unless GimmeWikidata.valid_id?(id, [:item])
13
+ super(id, label, description, aliases, claims)
14
+ end
15
+
16
+ end
17
+
18
+ end
@@ -0,0 +1,211 @@
1
+ require 'gimme_wikidata/wikidata_api'
2
+ require 'carbon_date'
3
+
4
+ module GimmeWikidata
5
+
6
+ ##
7
+ # Responsible for parsing the JSON data returned from the Wikidata API.
8
+ #
9
+ #
10
+ # Parses responses to the following Wikidata API calls:
11
+ # - +wbsearchentities+
12
+ # - +wbgetentities+
13
+ class Parser
14
+
15
+ include CarbonDate
16
+
17
+ ##
18
+ # Parses the results from a search query (wbsearchentities)
19
+ # * *Args* :
20
+ # - +response+ -> hash form of the response from the Wikidata API
21
+ # * *Returns* :
22
+ # - A +Search+ object representing and containing the +SearchResults+ found
23
+ # Returns a Search object representing a collection of the SearchResults found
24
+ def self.parse_search_response(response)
25
+ search = Search.new(
26
+ response.fetch(:success, false),
27
+ response.fetch(:error, nil),
28
+ response.fetch(:searchinfo, {}).fetch(:search, nil))
29
+ return search unless search.was_successful?
30
+ raise ArgumentError, 'response did not seem to be a response from a search query' if response[:searchinfo].nil?
31
+ response[:search].each do |r|
32
+ search.results << SearchResult.new(r[:id], r[:label], r[:description])
33
+ end
34
+ return search
35
+ end
36
+
37
+ ##
38
+ # Parses the results from a get entities query (wbgetentities)
39
+ #
40
+ # * *Args* :
41
+ # - +response+ -> hash form of the response from the Wikidata API
42
+ # * *Returns* :
43
+ # - A +EntityResult+ object representing the +Entites+ fetched
44
+ def self.parse_entity_response(response)
45
+ entity_result = EntityResult.new(response.fetch(:success, false), response.fetch(:error, nil))
46
+ return entity_result unless entity_result.was_successful?
47
+ response[:entities].each do |key, value|
48
+ entity_result.entities << parse_entity(value)
49
+ end
50
+ return entity_result
51
+ end
52
+
53
+ ##
54
+ # Parses a single entity as part of a get entities query (wbgetentities)
55
+ #
56
+ # * *Args* :
57
+ # - +e+ -> hash form of entity from the Wikidata API
58
+ # * *Returns* :
59
+ # - Either an +Item+ object or a +Property+ object representing the passed +Entity+
60
+ def self.parse_entity(e)
61
+ lang = WikidataAPI.get_language.to_sym
62
+ # Parse the fingerprint (id, label and description)
63
+ id = e.fetch(:id, nil)
64
+ label = e.fetch(:labels, {}).fetch(lang, {}).fetch(:value, nil)
65
+ description = e.fetch(:descriptions, {}).fetch(lang, {}).fetch(:value, nil)
66
+ # Parse aliases, if any:
67
+ aliases_hash = e.fetch(:aliases, {}).fetch(lang, nil)
68
+ aliases = aliases_hash.nil? ? [] :aliases_hash.map { |a| a.fetch(:value, nil) }
69
+ # Parse claims, if any
70
+ claims = e.fetch(:claims, nil).nil? ? [] : parse_claims(e.fetch(:claims, nil))
71
+ # Create an Item or a Property
72
+ case e.fetch(:type, nil)
73
+ when 'item'
74
+ return Item.new(id, label, description, aliases, claims)
75
+ when 'property'
76
+ return Property.new(id, label, description, aliases, claims)
77
+ else
78
+ return nil
79
+ end
80
+ end
81
+
82
+ ##
83
+ # TODO: DOCUMENT
84
+ def self.parse_claims(c)
85
+ claims = []
86
+ c.values.flatten.each do |snak|
87
+ claims << parse_snak(snak[:mainsnak])
88
+ end
89
+ return claims
90
+ end
91
+
92
+ ##
93
+ # TODO: DOCUMENT
94
+ #
95
+ # A List of all Wikidata datatypes: https://www.wikidata.org/wiki/Special:ListDatatypes
96
+ def self.parse_snak(s)
97
+ property = Property.new(s[:property])
98
+ raw_value = s.fetch(:datavalue, {}).fetch(:value, nil)
99
+
100
+ #TODO: Figure out why raw_value has some strange keys. Example => ':"key"'
101
+ #TODO: Correct for the very strange keys in raw_value
102
+
103
+ ##
104
+ # TODO: Use meta-programming and public_send() to DRY this code up:
105
+ value, value_type =
106
+ case s[:datatype]
107
+ when 'wikibase-item' then parse_snak_wikibase_item(raw_value)
108
+ when 'external-id' then parse_snak_external_id(raw_value)
109
+ when 'time' then parse_snak_time(raw_value)
110
+ when 'commonsMedia' then parse_snak_commons_media(raw_value)
111
+ when 'monolingualtext' then parse_snak_monolingual_text(raw_value)
112
+ when 'string' then parse_snak_string(raw_value)
113
+ when 'url' then parse_snak_url(raw_value)
114
+ when 'globe-coordinate' then parse_snak_gps_coordinate(raw_value)
115
+ when 'quantity' then parse_snak_quantity(raw_value)
116
+ when 'math' then parse_snak_math(raw_value)
117
+ else
118
+ raise StandardError.new "Unsupported Wikidata snak datatype: #{s[:datatype]}"
119
+ end
120
+
121
+ Claim.new(property, value, value_type)
122
+ end
123
+
124
+ # Individual Snak Parsing
125
+
126
+ ##
127
+ # TODO: DOCUMENT FULLY
128
+ #
129
+ # A List of all Wikidata datatypes: https://www.wikidata.org/wiki/Special:ListDatatypes
130
+ def self.parse_snak_wikibase_item(raw_value)
131
+ id = raw_value.fetch(:"numeric-id", nil)
132
+ type = raw_value.fetch(:"entity-type", nil)
133
+ case type
134
+ when 'item'
135
+ return Item.new("Q#{id}"), :item
136
+ when 'property'
137
+ return Property.new("P#{id}"), :property
138
+ else
139
+ raise StandardError.new "Unknown Wikibase item type #{raw_value[:entity_type]}"
140
+ end
141
+ end
142
+
143
+ def self.parse_snak_external_id(raw_value)
144
+ #TODO: Extract the authoritative source
145
+ return raw_value, :external_id
146
+ end
147
+
148
+ ##
149
+ # Parses a Wikidata Time value
150
+ #
151
+ # Times on Wikidata are stored as timestamp in the ISO8601 standard. Use the CarbonDate gem (https://github.com/bradleymarques/carbon_date) to interpret these
152
+ #
153
+ # Params:
154
+ # - +raw_value+: a hash with the keys:
155
+ # - +:time+: The time in the ISO8601 standard
156
+ # - +:timezone+: currently unused
157
+ # - +:before+: currently unused
158
+ # - +:after+: currently unused
159
+ # - +:precision+: an integer value (0..14)
160
+ # - +:calendarmodel+: currently unused
161
+ #
162
+ # Example +raw_value+:
163
+ # {"time": "+1940-10-10T00:00:00Z", "timezone": 0, "before": 0, "after": 0, "precision": 11, "calendarmodel": "http://www.wikidata.org/entity/Q1985727"}
164
+ #
165
+ # Returns:
166
+ # - [CarbonDate::Date object, :carbon_date]
167
+ def self.parse_snak_time(raw_value)
168
+ time = raw_value.fetch(:time, nil)
169
+ precision = raw_value.fetch(:precision, nil)
170
+ return CarbonDate::Date.iso8601(time, precision), :carbon_date
171
+ end
172
+
173
+ def self.parse_snak_commons_media(raw_value)
174
+ file_name = raw_value.to_s.gsub(' ', '_')
175
+ full_url = "https://commons.wikimedia.org/wiki/File:" + file_name
176
+ return full_url, :media
177
+ end
178
+
179
+ def self.parse_snak_monolingual_text(raw_value)
180
+ return raw_value.fetch(:text, nil), :text
181
+ end
182
+
183
+ def self.parse_snak_string(raw_value)
184
+ return raw_value, :text
185
+ end
186
+
187
+ def self.parse_snak_url(raw_value)
188
+ return raw_value, :url
189
+ end
190
+
191
+ def self.parse_snak_gps_coordinate(raw_value)
192
+ return {latitude: raw_value[:latitude], longitude: raw_value[:longitude]}, :gps_coordinates
193
+ end
194
+
195
+ def self.parse_snak_quantity(raw_value)
196
+ quantity = {
197
+ amount: raw_value.fetch(:amount, 0).to_f,
198
+ upper_bound: raw_value.fetch(:upperBound, 0).to_f,
199
+ lower_bound: raw_value.fetch(:lowerBound, 0).to_f,
200
+ unit: raw_value.fetch(:unit, 0).to_f
201
+ }
202
+ return quantity, :quantity
203
+ end
204
+
205
+ def self.parse_snak_math(raw_value)
206
+ return raw_value, :math
207
+ end
208
+
209
+ end
210
+
211
+ end