kasabi 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,80 @@
1
+ module Kasabi
2
+
3
+ module Search
4
+
5
+ # Client object for working with a Kasabi Search API
6
+ class Client < BaseClient
7
+
8
+ #Initialize the client to work with a specific endpoint
9
+ #
10
+ # The _options_ hash can contain the following values:
11
+ # * *:apikey*: required. apikey authorized to use the API
12
+ # * *:client*: HTTPClient object instance
13
+ def initialize(endpoint, options={})
14
+ super(endpoint, options)
15
+ end
16
+
17
+ # Search the Metabox indexes.
18
+ #
19
+ # query:: the query to perform.
20
+ # params:: additional query parameters (see below)
21
+ #
22
+ # The _params_ hash can contain the following values:
23
+ # * *:max*: The maximum number of results to return (default is 10)
24
+ # * *:offset*: Offset into the query results (for paging; default is 0)
25
+ # * *:sort*: ordered list of fields to be used when applying sorting
26
+ def search(query, params=nil)
27
+ search_params = build_search_params(query, params)
28
+ search_params[:output] = "json"
29
+ response = @client.get(search_url(), search_params)
30
+
31
+ validate_response(response)
32
+
33
+ #TODO provide a better structure?
34
+ return JSON.parse( response.content )
35
+
36
+ end
37
+
38
+ def facet_url()
39
+ return "#{@endpoint}/facet"
40
+ end
41
+
42
+ def search_url()
43
+ return "#{@endpoint}/search"
44
+ end
45
+
46
+ def authorize_url(search_url)
47
+ return "#{search_url}&apikey=#{@apikey}"
48
+ end
49
+
50
+ # The _params_ hash can contain the following values:
51
+ # * *:top*: the maximum number of results to return for each facet
52
+ # * *:output*: the preferred response format, can be html or xml (the default)
53
+ def facet(query, facets, params=Hash.new)
54
+ if facets == nil or facets.empty?
55
+ throw "Must supply at least one facet"
56
+ end
57
+ search_params = build_search_params( query, params)
58
+ search_params[:fields] = facets.join(",")
59
+ response = @client.get(facet_url(), search_params)
60
+
61
+ validate_response(response)
62
+
63
+ return Kasabi::Search::Facet::Results.parse( response.content )
64
+ end
65
+
66
+ def build_search_params(query, params)
67
+ if params != nil
68
+ search_params = params.clone()
69
+ else
70
+ search_params = Hash.new
71
+ end
72
+ search_params[:query] = query
73
+ search_params[:apikey] = @apikey
74
+ return search_params
75
+ end
76
+
77
+ end
78
+
79
+ end
80
+ end
@@ -0,0 +1,462 @@
1
+ module Kasabi
2
+
3
+ #Module providing a SPARQL client library, support for parsing SPARQL query responses into Ruby objects
4
+ #and other useful behaviour
5
+ module Sparql
6
+
7
+ SPARQL_RESULTS_XML = "application/sparql-results+xml"
8
+ SPARQL_RESULTS_JSON = "application/sparql-results+json"
9
+
10
+ #Includes all statements along both in-bound and out-bound arc paths
11
+ #
12
+ #See http://n2.talis.com/wiki/Bounded_Descriptions_in_RDF
13
+ SYMMETRIC_BOUNDED_DESCRIPTION = <<-EOL
14
+ CONSTRUCT {?uri ?p ?o . ?s ?p2 ?uri .} WHERE { {?uri ?p ?o .} UNION {?s ?p2 ?uri .} }
15
+ EOL
16
+
17
+ #Similar to Concise Bounded Description but includes labels for referenced resources
18
+ #
19
+ #See http://n2.talis.com/wiki/Bounded_Descriptions_in_RDF
20
+ LABELLED_BOUNDED_DESCRIPTION = <<-EOL
21
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
22
+ CONSTRUCT {
23
+ ?uri ?p ?o .
24
+ ?o rdfs:label ?label .
25
+ ?o rdfs:comment ?comment .
26
+ ?o <http://www.w3.org/2004/02/skos/core#prefLabel> ?plabel .
27
+ ?o rdfs:seeAlso ?seealso.
28
+ } WHERE {
29
+ ?uri ?p ?o .
30
+ OPTIONAL {
31
+ ?o rdfs:label ?label .
32
+ }
33
+ OPTIONAL {
34
+ ?o <http://www.w3.org/2004/02/skos/core#prefLabel> ?plabel .
35
+ }
36
+ OPTIONAL {
37
+ ?o rdfs:comment ?comment .
38
+ }
39
+ OPTIONAL {
40
+ ?o rdfs:seeAlso ?seealso.
41
+ }
42
+ }
43
+ EOL
44
+
45
+ #Derived from both the Symmetric and Labelled Bounded Descriptions. Includes all in-bound
46
+ #and out-bound arc paths, with labels for any referenced resources.
47
+ #
48
+ #See http://n2.talis.com/wiki/Bounded_Descriptions_in_RDF
49
+ SYMMETRIC_LABELLED_BOUNDED_DESCRIPTION = <<-EOL
50
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
51
+ CONSTRUCT {
52
+ ?uri ?p ?o .
53
+ ?o rdfs:label ?label .
54
+ ?o rdfs:comment ?comment .
55
+ ?o rdfs:seeAlso ?seealso.
56
+ ?s ?p2 ?uri .
57
+ ?s rdfs:label ?label .
58
+ ?s rdfs:comment ?comment .
59
+ ?s rdfs:seeAlso ?seealso.
60
+ } WHERE {
61
+ { ?uri ?p ?o .
62
+ OPTIONAL {
63
+ ?o rdfs:label ?label .
64
+ }
65
+ OPTIONAL {
66
+ ?o rdfs:comment ?comment .
67
+ }
68
+ OPTIONAL {
69
+ ?o rdfs:seeAlso ?seealso.
70
+ }
71
+ }
72
+ UNION {
73
+ ?s ?p2 ?uri .
74
+ OPTIONAL {
75
+ ?s rdfs:label ?label .
76
+ }
77
+ OPTIONAL {
78
+ ?s rdfs:comment ?comment .
79
+ }
80
+ OPTIONAL {
81
+ ?s rdfs:seeAlso ?seealso.
82
+ }
83
+ }
84
+ }
85
+ EOL
86
+
87
+ DESCRIPTIONS = {
88
+ :cbd => "DESCRIBE ?uri",
89
+ :scbd => SYMMETRIC_BOUNDED_DESCRIPTION,
90
+ :lcbd => LABELLED_BOUNDED_DESCRIPTION,
91
+ :slcbd => SYMMETRIC_LABELLED_BOUNDED_DESCRIPTION
92
+ }
93
+
94
+ #A simple SPARQL client that handles the basic HTTP traffic
95
+ class Client < BaseClient
96
+
97
+ attr_reader :graphs
98
+ attr_reader :named_graphs
99
+
100
+ #Initialize a client for a specific endpoint
101
+ #
102
+ #endpoint:: uri of the SPARQL endpoint
103
+ #options:: hash containing additional configuration options, including +:apikey+ for specifying api key
104
+ def initialize(endpoint, options={} )
105
+ super(endpoint, options)
106
+ @graphs = options[:graphs] || nil
107
+ @named_graphs = options[:named_graphs] || nil
108
+ end
109
+
110
+ #Add a default graph. This will be added as a default graph in the request protocol
111
+ def add_default_graph(graph_uri)
112
+ if @graphs == nil
113
+ @graphs = []
114
+ end
115
+ @graphs << graph_uri
116
+ end
117
+
118
+ #Add a named graph. This will be added as a named graph in the request protocol
119
+ def add_named_graph(graph_uri)
120
+ if @named_graphs == nil
121
+ @named_graphs = []
122
+ end
123
+ @named_graphs << graph_uri
124
+ end
125
+
126
+ #Perform a sparql query
127
+ #
128
+ #sparql:: a valid SPARQL query
129
+ #format:: specific a request format. Usually a media-type, but may be a name for a type, if not using Conneg
130
+ #graphs:: an array of default graphs
131
+ #named_graphs:: an array of named graphs
132
+ def query(sparql, format=nil, graphs=nil, named_graphs=nil)
133
+
134
+ params = {}
135
+ params["query"] = sparql
136
+
137
+ if @apikey != nil
138
+ params["apikey"] = @apikey
139
+ end
140
+
141
+ if graphs != nil
142
+ params["default-graph-uri"] = graphs
143
+ elsif @graphs != nil
144
+ params["default-graph-uri"] = @graphs
145
+ end
146
+
147
+ if named_graphs != nil
148
+ params["named-graph-uri"] = named_graphs
149
+ elsif @named_graphs != nil
150
+ params["named-graph-uri"] = @named_graphs
151
+ end
152
+
153
+ headers = {}
154
+ if format != nil
155
+
156
+ if @output_parameter_name != nil
157
+ params[@output_parameter_name] = format
158
+ else
159
+ headers["Accept"] = format
160
+ end
161
+
162
+ end
163
+
164
+ return @client.get( @endpoint, params, headers )
165
+ end
166
+
167
+ #Describe a uri, optionally specifying a form of bounded description
168
+ #
169
+ #uri:: the uri to describe
170
+ #format:: mimetype for results
171
+ #type:: symbol indicating type of description, i.e. +:cbd+, +:scbd+, +:lcbd+, or +:slcbd+
172
+ def describe_uri(uri, format="application/rdf+xml", type=:cbd)
173
+ template = Sparql::DESCRIPTIONS[type]
174
+ if template == nil
175
+ raise "Unknown description type"
176
+ end
177
+ query = Sparql::SparqlHelper.apply_initial_bindings(template, {"uri" => "<#{uri}>"} )
178
+ return describe(query, format)
179
+ end
180
+
181
+ #Perform a SPARQL DESCRIBE query.
182
+ #
183
+ #query:: the SPARQL query
184
+ #format:: the preferred response format
185
+ def describe(query, format="application/rdf+xml")
186
+ return query(query, format)
187
+ end
188
+
189
+ #DESCRIBE multiple resources in a single query. The provided array should contain
190
+ #the uris that are to be described
191
+ #
192
+ #This will generate a query like:
193
+ # DESCRIBE <http://www.example.org> <http://www.example.com> ...
194
+ #
195
+ #uris:: list of the uris to be described
196
+ #format:: the preferred response format. Default is RDF/XML
197
+ def multi_describe(uris, format="application/rdf+xml")
198
+ query = "DESCRIBE " + uris.map {|u| "<#{u}>" }.join(" ")
199
+ return query(query, format)
200
+ end
201
+
202
+ #Perform a SPARQL CONSTRUCT query.
203
+ #
204
+ #query:: the SPARQL query
205
+ #format:: the preferred response format
206
+ def construct(query, format="application/rdf+xml")
207
+ return query(query, format)
208
+ end
209
+
210
+ #Perform a SPARQL ASK query.
211
+ #
212
+ #query:: the SPARQL query
213
+ #format:: the preferred response format
214
+ def ask(query, format=Sparql::SPARQL_RESULTS_JSON)
215
+ return query(query, format)
216
+ end
217
+
218
+ #Perform a SPARQL SELECT query.
219
+ #
220
+ #query:: the SPARQL query
221
+ #format:: the preferred response format
222
+ def select(query, format=Sparql::SPARQL_RESULTS_JSON)
223
+ return query(query, format)
224
+ end
225
+
226
+ end
227
+
228
+ #Simple helper class for manipulating and executing SPARQL queries and manipulating the results
229
+ class SparqlHelper
230
+ VARIABLE_MATCHER = /(\?|\$)([a-zA-Z]+)/
231
+
232
+ #Apply some initial bindings to parameters in a query
233
+ #
234
+ #The keys in the values hash are used to replace variables in a query
235
+ #The values are supplied as is, allowing them to be provided as URIs, or typed literals
236
+ #according to Turtle syntax.
237
+ #
238
+ #Any keys in the hash that are not in the query are ignored. Any variables not found
239
+ #in the hash remain unbound.
240
+ #
241
+ #query:: the query whose initial bindings are to be set
242
+ #values:: hash of query name to value
243
+ def SparqlHelper.apply_initial_bindings(query, bindings={})
244
+ copy = query.clone()
245
+ copy.gsub!(VARIABLE_MATCHER) do |pattern|
246
+ key = $2
247
+ if bindings.has_key?(key)
248
+ bindings[key].to_s
249
+ else
250
+ pattern
251
+ end
252
+ end
253
+ return copy
254
+ end
255
+
256
+ #Convert a SPARQL query result binding into a hash suitable for passing
257
+ #to the apply_initial_bindings method.
258
+ #
259
+ #The result param is assumed to be a Ruby hash that reflects the structure of
260
+ #a binding in a SELECT query result (i.e. the result of parsing the <tt>application/sparql-results+json</tt>
261
+ #format and extracting an specific result binding.
262
+ #
263
+ #The method is intended to be used to support cases where an initial select query is
264
+ #performed to extract some variables that can later be plugged into a subsequent
265
+ #query
266
+ #
267
+ #result:: hash conforming to structure of a <tt>binding</tt> in the SPARQL JSON format
268
+ def SparqlHelper.result_to_query_binding(result)
269
+ hash = {}
270
+ result.each_pair do |key, value|
271
+ if value["type"] == "uri"
272
+ hash[key] = "<#{value["value"]}>"
273
+ elsif (value["type"] == "literal" && !value.has_key?("datatype"))
274
+ hash[key] = "\"#{value["value"]}\""
275
+ elsif (value["type"] == "literal" && value.has_key?("datatype"))
276
+ hash[key] = "\"#{value["value"]}\"^^#{value["datatype"]}"
277
+ else
278
+ #do nothing for bnodes
279
+ end
280
+ end
281
+ return hash
282
+ end
283
+
284
+ #Convert Ruby hash structured according to SPARQL JSON format
285
+ #into an array of hashes by calling result_to_query_binding on each binding
286
+ #into the results.
287
+ #
288
+ #E.g:
289
+ #<tt>results = Sparql::SparqlHelper.select(query, sparql_client)</tt>
290
+ #<tt>bindings = Sparql::SparqlHelper.results_to_query_bindings(results)</tt>
291
+ #
292
+ #results:: hash conforming to SPARQL SELECT structure
293
+ def SparqlHelper.results_to_query_bindings(results)
294
+ bindings = []
295
+
296
+ results["results"]["bindings"].each do |result|
297
+ bindings << result_to_query_binding(result)
298
+ end
299
+ return bindings
300
+ end
301
+
302
+ #Perform a simple SELECT query on an endpoint.
303
+ #Will request the results using the SPARQL JSON results format, and parse the
304
+ #resulting JSON results. The result will therefore be a simple ruby hash of the results
305
+ #
306
+ #An error will be raised if the response is HTTP OK.
307
+ #
308
+ #query:: the SPARQL SELECT query
309
+ #sparql_client:: a configured Sparql Client object
310
+ def SparqlHelper.select(query, sparql_client)
311
+ resp = sparql_client.select(query, Sparql::SPARQL_RESULTS_JSON)
312
+ if resp.status != 200
313
+ raise "Error performing sparql query: #{resp.status} #{resp.reason}\n#{resp.content}"
314
+ end
315
+ return JSON.parse( resp.content )
316
+ end
317
+
318
+ #Performs an ASK query on an endpoint, returing a boolean true/false response
319
+ #
320
+ #Will request the results using the SPARQL JSON results format, parse the
321
+ #resulting JSON results, and extract the true/false response.
322
+ #
323
+ #query:: the SPARQL SELECT query
324
+ #sparql_client:: a configured Sparql Client object
325
+ def SparqlHelper.ask(query, sparql_client)
326
+ json = SparqlHelper.select(query, sparql_client)
327
+ return json["boolean"] == "true"
328
+ end
329
+
330
+ #Performs an ASK query on the SPARQL endpoint to test whether there are any statements
331
+ #in the triple store about the specified uri.
332
+ #
333
+ #uri:: the uri to test for
334
+ #sparql_client:: a configured Sparql Client object
335
+ def SparqlHelper.exists(uri, sparql_client)
336
+ return SparqlHelper.ask("ASK { <#{uri}> ?p ?o }", sparql_client)
337
+ end
338
+
339
+ #Perform a simple SELECT query on an endpoint and return a simple array of values
340
+ #
341
+ #Will request the results using the SPARQL JSON results format, and parse the
342
+ #resulting JSON results. The assumption is that the SELECT query contains a single "column"
343
+ #of values, which will be returned as an array
344
+ #
345
+ #Note this will lose any type information, only the value of the bindings are returned
346
+ #
347
+ #Also note that if row has an empty binding for the selected variable, then this row will
348
+ #be dropped from the resulting array
349
+ #
350
+ #query:: the SPARQL SELECT query
351
+ #sparql_client:: a configured Sparql Client object
352
+ def SparqlHelper.select_values(query, sparql_client)
353
+ results = SparqlHelper.select(query, sparql_client)
354
+ v = results["head"]["vars"][0];
355
+ values = [];
356
+ results["results"]["bindings"].each do |binding|
357
+ values << binding[v]["value"] if binding[v]
358
+ end
359
+ return values
360
+ end
361
+
362
+ #Perform a simple SELECT query and return the results as a simple array of hashes.
363
+ #Each entry in the array will be a row in the results, and each hash will have a key for
364
+ #each variable.
365
+ #
366
+ #Note that this will lose any type information, only the value of the bindings are returned
367
+ #
368
+ #Also note that if a row has an empty binding for a given variable, then this variable will
369
+ #not be presented in the hash for that row.
370
+ #
371
+ #query:: the SPARQL SELECT query
372
+ #sparql_client:: a configured Sparql Client object
373
+ def SparqlHelper.select_into_array(query, sparql_client)
374
+ results = SparqlHelper.select(query, sparql_client)
375
+ rows = []
376
+ results["results"]["bindings"].each do |binding|
377
+ row = {}
378
+ binding.each do |key, value|
379
+ row[key] = value["value"]
380
+ end
381
+ rows << row
382
+ end
383
+ return rows
384
+ end
385
+
386
+ #Perform a simple SELECT query on an endpoint and return a single result
387
+ #
388
+ #Will request the results using the SPARQL JSON results format, and parse the
389
+ #resulting JSON results. The assumption is that the SELECT query returns a single
390
+ #value (i.e single variable, with single binding)
391
+ #
392
+ #Note this will lose any type information, only the value of the binding is returned
393
+ #If additional results are returned, then these are ignored
394
+ #
395
+ #query:: the SPARQL SELECT query
396
+ #sparql_client:: a configured Sparql Client object
397
+ def SparqlHelper.select_single_value(query, sparql_client)
398
+ results = SparqlHelper.select(query, sparql_client)
399
+ v = results["head"]["vars"][0];
400
+ return results["results"]["bindings"][0][v]["value"]
401
+ end
402
+
403
+ #Perform a SPARQL CONSTRUCT query against an endpoint, requesting the results in JSON
404
+ #
405
+ #Will request the results as application/json (with the expectation that it returns RDF_JSON),
406
+ #and parses the resulting JSON document.
407
+ #
408
+ #query:: the SPARQL SELECT query
409
+ #sparql_client:: a configured Sparql Client object
410
+ def SparqlHelper.construct_to_resource_hash(query, sparql_client)
411
+ resp = sparql_client.construct(query, "application/json")
412
+ if resp.status != 200
413
+ raise "Error performing sparql query: #{resp.status} #{resp.reason}\n#{resp.content}"
414
+ end
415
+ return JSON.parse( resp.content )
416
+ end
417
+
418
+ #Perform a SPARQL DESCRIBE query against an endpoint, requesting the results in JSON
419
+ #
420
+ #Will request the results as application/json (with the expectation that it returns RDF_JSON),
421
+ #and parses the resulting JSON document.
422
+ #
423
+ #query:: the SPARQL SELECT query
424
+ #sparql_client:: a configured Sparql Client object
425
+ def SparqlHelper.describe_to_resource_hash(query, sparql_client)
426
+ resp = sparql_client.describe(query, "application/json")
427
+ if resp.status != 200
428
+ raise "Error performing sparql query: #{resp.status} #{resp.reason}\n#{resp.content}"
429
+ end
430
+ return JSON.parse( resp.content )
431
+ end
432
+
433
+ #DESCRIBE multiple resources in a single SPARQL request
434
+ #
435
+ #uris:: an array of URIs
436
+ #sparql_client:: a configured Sparql Client objec
437
+ def SparqlHelper.multi_describe(uris, sparql_client)
438
+ resp = sparql_client.multi_describe(uris, "application/json")
439
+ if resp.status != 200
440
+ raise "Error performing sparql query: #{resp.status} #{resp.reason}\n#{resp.content}"
441
+ end
442
+ return JSON.parse( resp.content )
443
+ end
444
+
445
+ #Describe a single URI using one of several forms of Bounded Description
446
+ #See Sparql Client.describe_uri
447
+ #
448
+ #uri:: resource to describe
449
+ #sparql_client:: configured SPARQL client
450
+ #type:: form of bounded description to generate
451
+ def SparqlHelper.describe_uri(uri, sparql_client, type=:cbd)
452
+ resp = sparql_client.describe_uri(uri, "application/json", type)
453
+ if resp.status != 200
454
+ raise "Error performing sparql query: #{resp.status} #{resp.reason}\n#{resp.content}"
455
+ end
456
+ return JSON.parse( resp.content )
457
+ end
458
+ end
459
+
460
+ end
461
+
462
+ end