kasabi 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,80 @@
1
+ module Kasabi
2
+
3
+ module Search
4
+
5
+ # Client object for working with a Kasabi Search API
6
+ class Client < BaseClient
7
+
8
+ #Initialize the client to work with a specific endpoint
9
+ #
10
+ # The _options_ hash can contain the following values:
11
+ # * *:apikey*: required. apikey authorized to use the API
12
+ # * *:client*: HTTPClient object instance
13
+ def initialize(endpoint, options={})
14
+ super(endpoint, options)
15
+ end
16
+
17
+ # Search the Metabox indexes.
18
+ #
19
+ # query:: the query to perform.
20
+ # params:: additional query parameters (see below)
21
+ #
22
+ # The _params_ hash can contain the following values:
23
+ # * *:max*: The maximum number of results to return (default is 10)
24
+ # * *:offset*: Offset into the query results (for paging; default is 0)
25
+ # * *:sort*: ordered list of fields to be used when applying sorting
26
+ def search(query, params=nil)
27
+ search_params = build_search_params(query, params)
28
+ search_params[:output] = "json"
29
+ response = @client.get(search_url(), search_params)
30
+
31
+ validate_response(response)
32
+
33
+ #TODO provide a better structure?
34
+ return JSON.parse( response.content )
35
+
36
+ end
37
+
38
+ def facet_url()
39
+ return "#{@endpoint}/facet"
40
+ end
41
+
42
+ def search_url()
43
+ return "#{@endpoint}/search"
44
+ end
45
+
46
+ def authorize_url(search_url)
47
+ return "#{search_url}&apikey=#{@apikey}"
48
+ end
49
+
50
+ # The _params_ hash can contain the following values:
51
+ # * *:top*: the maximum number of results to return for each facet
52
+ # * *:output*: the preferred response format, can be html or xml (the default)
53
+ def facet(query, facets, params=Hash.new)
54
+ if facets == nil or facets.empty?
55
+ throw "Must supply at least one facet"
56
+ end
57
+ search_params = build_search_params( query, params)
58
+ search_params[:fields] = facets.join(",")
59
+ response = @client.get(facet_url(), search_params)
60
+
61
+ validate_response(response)
62
+
63
+ return Kasabi::Search::Facet::Results.parse( response.content )
64
+ end
65
+
66
+ def build_search_params(query, params)
67
+ if params != nil
68
+ search_params = params.clone()
69
+ else
70
+ search_params = Hash.new
71
+ end
72
+ search_params[:query] = query
73
+ search_params[:apikey] = @apikey
74
+ return search_params
75
+ end
76
+
77
+ end
78
+
79
+ end
80
+ end
@@ -0,0 +1,462 @@
1
+ module Kasabi
2
+
3
+ #Module providing a SPARQL client library, support for parsing SPARQL query responses into Ruby objects
4
+ #and other useful behaviour
5
+ module Sparql
6
+
7
+ SPARQL_RESULTS_XML = "application/sparql-results+xml"
8
+ SPARQL_RESULTS_JSON = "application/sparql-results+json"
9
+
10
+ #Includes all statements along both in-bound and out-bound arc paths
11
+ #
12
+ #See http://n2.talis.com/wiki/Bounded_Descriptions_in_RDF
13
+ SYMMETRIC_BOUNDED_DESCRIPTION = <<-EOL
14
+ CONSTRUCT {?uri ?p ?o . ?s ?p2 ?uri .} WHERE { {?uri ?p ?o .} UNION {?s ?p2 ?uri .} }
15
+ EOL
16
+
17
+ #Similar to Concise Bounded Description but includes labels for referenced resources
18
+ #
19
+ #See http://n2.talis.com/wiki/Bounded_Descriptions_in_RDF
20
+ LABELLED_BOUNDED_DESCRIPTION = <<-EOL
21
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
22
+ CONSTRUCT {
23
+ ?uri ?p ?o .
24
+ ?o rdfs:label ?label .
25
+ ?o rdfs:comment ?comment .
26
+ ?o <http://www.w3.org/2004/02/skos/core#prefLabel> ?plabel .
27
+ ?o rdfs:seeAlso ?seealso.
28
+ } WHERE {
29
+ ?uri ?p ?o .
30
+ OPTIONAL {
31
+ ?o rdfs:label ?label .
32
+ }
33
+ OPTIONAL {
34
+ ?o <http://www.w3.org/2004/02/skos/core#prefLabel> ?plabel .
35
+ }
36
+ OPTIONAL {
37
+ ?o rdfs:comment ?comment .
38
+ }
39
+ OPTIONAL {
40
+ ?o rdfs:seeAlso ?seealso.
41
+ }
42
+ }
43
+ EOL
44
+
45
+ #Derived from both the Symmetric and Labelled Bounded Descriptions. Includes all in-bound
46
+ #and out-bound arc paths, with labels for any referenced resources.
47
+ #
48
+ #See http://n2.talis.com/wiki/Bounded_Descriptions_in_RDF
49
+ SYMMETRIC_LABELLED_BOUNDED_DESCRIPTION = <<-EOL
50
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
51
+ CONSTRUCT {
52
+ ?uri ?p ?o .
53
+ ?o rdfs:label ?label .
54
+ ?o rdfs:comment ?comment .
55
+ ?o rdfs:seeAlso ?seealso.
56
+ ?s ?p2 ?uri .
57
+ ?s rdfs:label ?label .
58
+ ?s rdfs:comment ?comment .
59
+ ?s rdfs:seeAlso ?seealso.
60
+ } WHERE {
61
+ { ?uri ?p ?o .
62
+ OPTIONAL {
63
+ ?o rdfs:label ?label .
64
+ }
65
+ OPTIONAL {
66
+ ?o rdfs:comment ?comment .
67
+ }
68
+ OPTIONAL {
69
+ ?o rdfs:seeAlso ?seealso.
70
+ }
71
+ }
72
+ UNION {
73
+ ?s ?p2 ?uri .
74
+ OPTIONAL {
75
+ ?s rdfs:label ?label .
76
+ }
77
+ OPTIONAL {
78
+ ?s rdfs:comment ?comment .
79
+ }
80
+ OPTIONAL {
81
+ ?s rdfs:seeAlso ?seealso.
82
+ }
83
+ }
84
+ }
85
+ EOL
86
+
87
+ DESCRIPTIONS = {
88
+ :cbd => "DESCRIBE ?uri",
89
+ :scbd => SYMMETRIC_BOUNDED_DESCRIPTION,
90
+ :lcbd => LABELLED_BOUNDED_DESCRIPTION,
91
+ :slcbd => SYMMETRIC_LABELLED_BOUNDED_DESCRIPTION
92
+ }
93
+
94
+ #A simple SPARQL client that handles the basic HTTP traffic
95
+ class Client < BaseClient
96
+
97
+ attr_reader :graphs
98
+ attr_reader :named_graphs
99
+
100
+ #Initialize a client for a specific endpoint
101
+ #
102
+ #endpoint:: uri of the SPARQL endpoint
103
+ #options:: hash containing additional configuration options, including +:apikey+ for specifying api key
104
+ def initialize(endpoint, options={} )
105
+ super(endpoint, options)
106
+ @graphs = options[:graphs] || nil
107
+ @named_graphs = options[:named_graphs] || nil
108
+ end
109
+
110
+ #Add a default graph. This will be added as a default graph in the request protocol
111
+ def add_default_graph(graph_uri)
112
+ if @graphs == nil
113
+ @graphs = []
114
+ end
115
+ @graphs << graph_uri
116
+ end
117
+
118
+ #Add a named graph. This will be added as a named graph in the request protocol
119
+ def add_named_graph(graph_uri)
120
+ if @named_graphs == nil
121
+ @named_graphs = []
122
+ end
123
+ @named_graphs << graph_uri
124
+ end
125
+
126
+ #Perform a sparql query
127
+ #
128
+ #sparql:: a valid SPARQL query
129
+ #format:: specific a request format. Usually a media-type, but may be a name for a type, if not using Conneg
130
+ #graphs:: an array of default graphs
131
+ #named_graphs:: an array of named graphs
132
+ def query(sparql, format=nil, graphs=nil, named_graphs=nil)
133
+
134
+ params = {}
135
+ params["query"] = sparql
136
+
137
+ if @apikey != nil
138
+ params["apikey"] = @apikey
139
+ end
140
+
141
+ if graphs != nil
142
+ params["default-graph-uri"] = graphs
143
+ elsif @graphs != nil
144
+ params["default-graph-uri"] = @graphs
145
+ end
146
+
147
+ if named_graphs != nil
148
+ params["named-graph-uri"] = named_graphs
149
+ elsif @named_graphs != nil
150
+ params["named-graph-uri"] = @named_graphs
151
+ end
152
+
153
+ headers = {}
154
+ if format != nil
155
+
156
+ if @output_parameter_name != nil
157
+ params[@output_parameter_name] = format
158
+ else
159
+ headers["Accept"] = format
160
+ end
161
+
162
+ end
163
+
164
+ return @client.get( @endpoint, params, headers )
165
+ end
166
+
167
+ #Describe a uri, optionally specifying a form of bounded description
168
+ #
169
+ #uri:: the uri to describe
170
+ #format:: mimetype for results
171
+ #type:: symbol indicating type of description, i.e. +:cbd+, +:scbd+, +:lcbd+, or +:slcbd+
172
+ def describe_uri(uri, format="application/rdf+xml", type=:cbd)
173
+ template = Sparql::DESCRIPTIONS[type]
174
+ if template == nil
175
+ raise "Unknown description type"
176
+ end
177
+ query = Sparql::SparqlHelper.apply_initial_bindings(template, {"uri" => "<#{uri}>"} )
178
+ return describe(query, format)
179
+ end
180
+
181
+ #Perform a SPARQL DESCRIBE query.
182
+ #
183
+ #query:: the SPARQL query
184
+ #format:: the preferred response format
185
+ def describe(query, format="application/rdf+xml")
186
+ return query(query, format)
187
+ end
188
+
189
+ #DESCRIBE multiple resources in a single query. The provided array should contain
190
+ #the uris that are to be described
191
+ #
192
+ #This will generate a query like:
193
+ # DESCRIBE <http://www.example.org> <http://www.example.com> ...
194
+ #
195
+ #uris:: list of the uris to be described
196
+ #format:: the preferred response format. Default is RDF/XML
197
+ def multi_describe(uris, format="application/rdf+xml")
198
+ query = "DESCRIBE " + uris.map {|u| "<#{u}>" }.join(" ")
199
+ return query(query, format)
200
+ end
201
+
202
+ #Perform a SPARQL CONSTRUCT query.
203
+ #
204
+ #query:: the SPARQL query
205
+ #format:: the preferred response format
206
+ def construct(query, format="application/rdf+xml")
207
+ return query(query, format)
208
+ end
209
+
210
+ #Perform a SPARQL ASK query.
211
+ #
212
+ #query:: the SPARQL query
213
+ #format:: the preferred response format
214
+ def ask(query, format=Sparql::SPARQL_RESULTS_JSON)
215
+ return query(query, format)
216
+ end
217
+
218
+ #Perform a SPARQL SELECT query.
219
+ #
220
+ #query:: the SPARQL query
221
+ #format:: the preferred response format
222
+ def select(query, format=Sparql::SPARQL_RESULTS_JSON)
223
+ return query(query, format)
224
+ end
225
+
226
+ end
227
+
228
+ #Simple helper class for manipulating and executing SPARQL queries and manipulating the results
229
+ class SparqlHelper
230
+ VARIABLE_MATCHER = /(\?|\$)([a-zA-Z]+)/
231
+
232
+ #Apply some initial bindings to parameters in a query
233
+ #
234
+ #The keys in the values hash are used to replace variables in a query
235
+ #The values are supplied as is, allowing them to be provided as URIs, or typed literals
236
+ #according to Turtle syntax.
237
+ #
238
+ #Any keys in the hash that are not in the query are ignored. Any variables not found
239
+ #in the hash remain unbound.
240
+ #
241
+ #query:: the query whose initial bindings are to be set
242
+ #values:: hash of query name to value
243
+ def SparqlHelper.apply_initial_bindings(query, bindings={})
244
+ copy = query.clone()
245
+ copy.gsub!(VARIABLE_MATCHER) do |pattern|
246
+ key = $2
247
+ if bindings.has_key?(key)
248
+ bindings[key].to_s
249
+ else
250
+ pattern
251
+ end
252
+ end
253
+ return copy
254
+ end
255
+
256
+ #Convert a SPARQL query result binding into a hash suitable for passing
257
+ #to the apply_initial_bindings method.
258
+ #
259
+ #The result param is assumed to be a Ruby hash that reflects the structure of
260
+ #a binding in a SELECT query result (i.e. the result of parsing the <tt>application/sparql-results+json</tt>
261
+ #format and extracting an specific result binding.
262
+ #
263
+ #The method is intended to be used to support cases where an initial select query is
264
+ #performed to extract some variables that can later be plugged into a subsequent
265
+ #query
266
+ #
267
+ #result:: hash conforming to structure of a <tt>binding</tt> in the SPARQL JSON format
268
+ def SparqlHelper.result_to_query_binding(result)
269
+ hash = {}
270
+ result.each_pair do |key, value|
271
+ if value["type"] == "uri"
272
+ hash[key] = "<#{value["value"]}>"
273
+ elsif (value["type"] == "literal" && !value.has_key?("datatype"))
274
+ hash[key] = "\"#{value["value"]}\""
275
+ elsif (value["type"] == "literal" && value.has_key?("datatype"))
276
+ hash[key] = "\"#{value["value"]}\"^^#{value["datatype"]}"
277
+ else
278
+ #do nothing for bnodes
279
+ end
280
+ end
281
+ return hash
282
+ end
283
+
284
+ #Convert Ruby hash structured according to SPARQL JSON format
285
+ #into an array of hashes by calling result_to_query_binding on each binding
286
+ #into the results.
287
+ #
288
+ #E.g:
289
+ #<tt>results = Sparql::SparqlHelper.select(query, sparql_client)</tt>
290
+ #<tt>bindings = Sparql::SparqlHelper.results_to_query_bindings(results)</tt>
291
+ #
292
+ #results:: hash conforming to SPARQL SELECT structure
293
+ def SparqlHelper.results_to_query_bindings(results)
294
+ bindings = []
295
+
296
+ results["results"]["bindings"].each do |result|
297
+ bindings << result_to_query_binding(result)
298
+ end
299
+ return bindings
300
+ end
301
+
302
+ #Perform a simple SELECT query on an endpoint.
303
+ #Will request the results using the SPARQL JSON results format, and parse the
304
+ #resulting JSON results. The result will therefore be a simple ruby hash of the results
305
+ #
306
+ #An error will be raised if the response is HTTP OK.
307
+ #
308
+ #query:: the SPARQL SELECT query
309
+ #sparql_client:: a configured Sparql Client object
310
+ def SparqlHelper.select(query, sparql_client)
311
+ resp = sparql_client.select(query, Sparql::SPARQL_RESULTS_JSON)
312
+ if resp.status != 200
313
+ raise "Error performing sparql query: #{resp.status} #{resp.reason}\n#{resp.content}"
314
+ end
315
+ return JSON.parse( resp.content )
316
+ end
317
+
318
+ #Performs an ASK query on an endpoint, returing a boolean true/false response
319
+ #
320
+ #Will request the results using the SPARQL JSON results format, parse the
321
+ #resulting JSON results, and extract the true/false response.
322
+ #
323
+ #query:: the SPARQL SELECT query
324
+ #sparql_client:: a configured Sparql Client object
325
+ def SparqlHelper.ask(query, sparql_client)
326
+ json = SparqlHelper.select(query, sparql_client)
327
+ return json["boolean"] == "true"
328
+ end
329
+
330
+ #Performs an ASK query on the SPARQL endpoint to test whether there are any statements
331
+ #in the triple store about the specified uri.
332
+ #
333
+ #uri:: the uri to test for
334
+ #sparql_client:: a configured Sparql Client object
335
+ def SparqlHelper.exists(uri, sparql_client)
336
+ return SparqlHelper.ask("ASK { <#{uri}> ?p ?o }", sparql_client)
337
+ end
338
+
339
+ #Perform a simple SELECT query on an endpoint and return a simple array of values
340
+ #
341
+ #Will request the results using the SPARQL JSON results format, and parse the
342
+ #resulting JSON results. The assumption is that the SELECT query contains a single "column"
343
+ #of values, which will be returned as an array
344
+ #
345
+ #Note this will lose any type information, only the value of the bindings are returned
346
+ #
347
+ #Also note that if row has an empty binding for the selected variable, then this row will
348
+ #be dropped from the resulting array
349
+ #
350
+ #query:: the SPARQL SELECT query
351
+ #sparql_client:: a configured Sparql Client object
352
+ def SparqlHelper.select_values(query, sparql_client)
353
+ results = SparqlHelper.select(query, sparql_client)
354
+ v = results["head"]["vars"][0];
355
+ values = [];
356
+ results["results"]["bindings"].each do |binding|
357
+ values << binding[v]["value"] if binding[v]
358
+ end
359
+ return values
360
+ end
361
+
362
+ #Perform a simple SELECT query and return the results as a simple array of hashes.
363
+ #Each entry in the array will be a row in the results, and each hash will have a key for
364
+ #each variable.
365
+ #
366
+ #Note that this will lose any type information, only the value of the bindings are returned
367
+ #
368
+ #Also note that if a row has an empty binding for a given variable, then this variable will
369
+ #not be presented in the hash for that row.
370
+ #
371
+ #query:: the SPARQL SELECT query
372
+ #sparql_client:: a configured Sparql Client object
373
+ def SparqlHelper.select_into_array(query, sparql_client)
374
+ results = SparqlHelper.select(query, sparql_client)
375
+ rows = []
376
+ results["results"]["bindings"].each do |binding|
377
+ row = {}
378
+ binding.each do |key, value|
379
+ row[key] = value["value"]
380
+ end
381
+ rows << row
382
+ end
383
+ return rows
384
+ end
385
+
386
+ #Perform a simple SELECT query on an endpoint and return a single result
387
+ #
388
+ #Will request the results using the SPARQL JSON results format, and parse the
389
+ #resulting JSON results. The assumption is that the SELECT query returns a single
390
+ #value (i.e single variable, with single binding)
391
+ #
392
+ #Note this will lose any type information, only the value of the binding is returned
393
+ #If additional results are returned, then these are ignored
394
+ #
395
+ #query:: the SPARQL SELECT query
396
+ #sparql_client:: a configured Sparql Client object
397
+ def SparqlHelper.select_single_value(query, sparql_client)
398
+ results = SparqlHelper.select(query, sparql_client)
399
+ v = results["head"]["vars"][0];
400
+ return results["results"]["bindings"][0][v]["value"]
401
+ end
402
+
403
+ #Perform a SPARQL CONSTRUCT query against an endpoint, requesting the results in JSON
404
+ #
405
+ #Will request the results as application/json (with the expectation that it returns RDF_JSON),
406
+ #and parses the resulting JSON document.
407
+ #
408
+ #query:: the SPARQL SELECT query
409
+ #sparql_client:: a configured Sparql Client object
410
+ def SparqlHelper.construct_to_resource_hash(query, sparql_client)
411
+ resp = sparql_client.construct(query, "application/json")
412
+ if resp.status != 200
413
+ raise "Error performing sparql query: #{resp.status} #{resp.reason}\n#{resp.content}"
414
+ end
415
+ return JSON.parse( resp.content )
416
+ end
417
+
418
+ #Perform a SPARQL DESCRIBE query against an endpoint, requesting the results in JSON
419
+ #
420
+ #Will request the results as application/json (with the expectation that it returns RDF_JSON),
421
+ #and parses the resulting JSON document.
422
+ #
423
+ #query:: the SPARQL SELECT query
424
+ #sparql_client:: a configured Sparql Client object
425
+ def SparqlHelper.describe_to_resource_hash(query, sparql_client)
426
+ resp = sparql_client.describe(query, "application/json")
427
+ if resp.status != 200
428
+ raise "Error performing sparql query: #{resp.status} #{resp.reason}\n#{resp.content}"
429
+ end
430
+ return JSON.parse( resp.content )
431
+ end
432
+
433
+ #DESCRIBE multiple resources in a single SPARQL request
434
+ #
435
+ #uris:: an array of URIs
436
+ #sparql_client:: a configured Sparql Client objec
437
+ def SparqlHelper.multi_describe(uris, sparql_client)
438
+ resp = sparql_client.multi_describe(uris, "application/json")
439
+ if resp.status != 200
440
+ raise "Error performing sparql query: #{resp.status} #{resp.reason}\n#{resp.content}"
441
+ end
442
+ return JSON.parse( resp.content )
443
+ end
444
+
445
+ #Describe a single URI using one of several forms of Bounded Description
446
+ #See Sparql Client.describe_uri
447
+ #
448
+ #uri:: resource to describe
449
+ #sparql_client:: configured SPARQL client
450
+ #type:: form of bounded description to generate
451
+ def SparqlHelper.describe_uri(uri, sparql_client, type=:cbd)
452
+ resp = sparql_client.describe_uri(uri, "application/json", type)
453
+ if resp.status != 200
454
+ raise "Error performing sparql query: #{resp.status} #{resp.reason}\n#{resp.content}"
455
+ end
456
+ return JSON.parse( resp.content )
457
+ end
458
+ end
459
+
460
+ end
461
+
462
+ end