pho 0.4.1 → 0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. data/CHANGES +18 -1
  2. data/README +10 -0
  3. data/Rakefile +2 -1
  4. data/doc/rdoc/classes/Pho.html +33 -22
  5. data/doc/rdoc/classes/Pho/DatatypeProperty.html +12 -12
  6. data/doc/rdoc/classes/Pho/{RDF_JSON.html → Enrichment.html} +8 -7
  7. data/doc/rdoc/classes/Pho/Enrichment/ResourceEnricher.html +310 -0
  8. data/doc/rdoc/classes/Pho/Etags.html +42 -42
  9. data/doc/rdoc/classes/Pho/Facet/Results.html +19 -19
  10. data/doc/rdoc/classes/Pho/Facet/Term.html +6 -6
  11. data/doc/rdoc/classes/Pho/FieldPredicateMap.html +110 -105
  12. data/doc/rdoc/classes/Pho/FieldWeighting.html +12 -12
  13. data/doc/rdoc/classes/Pho/FileManagement.html +121 -0
  14. data/doc/rdoc/classes/Pho/FileManagement/AbstractFileManager.html +443 -0
  15. data/doc/rdoc/classes/Pho/FileManagement/FileManager.html +258 -0
  16. data/doc/rdoc/classes/Pho/FileManagement/RDFManager.html +271 -0
  17. data/doc/rdoc/classes/Pho/Job.html +64 -64
  18. data/doc/rdoc/classes/Pho/Jobs.html +60 -60
  19. data/doc/rdoc/classes/Pho/QueryProfile.html +60 -60
  20. data/doc/rdoc/classes/Pho/RDFCollection.html +4 -378
  21. data/doc/rdoc/classes/Pho/ResourceHash.html +123 -0
  22. data/doc/rdoc/classes/Pho/ResourceHash/Converter.html +323 -0
  23. data/doc/rdoc/classes/Pho/{RDF_JSON → ResourceHash}/SetAlgebra.html +18 -18
  24. data/doc/rdoc/classes/Pho/Snapshot.html +35 -35
  25. data/doc/rdoc/classes/Pho/Sparql.html +137 -0
  26. data/doc/rdoc/classes/Pho/Sparql/SparqlClient.html +515 -0
  27. data/doc/rdoc/classes/Pho/Sparql/SparqlHelper.html +575 -0
  28. data/doc/rdoc/classes/Pho/Status.html +26 -26
  29. data/doc/rdoc/classes/Pho/Store.html +271 -241
  30. data/doc/rdoc/classes/Pho/Update/Changeset.html +73 -73
  31. data/doc/rdoc/classes/Pho/Update/ChangesetBuilder.html +34 -34
  32. data/doc/rdoc/classes/Pho/Update/Changesets.html +14 -14
  33. data/doc/rdoc/classes/Pho/Update/LiteralStatement.html +31 -23
  34. data/doc/rdoc/classes/Pho/Update/ResourceStatement.html +45 -21
  35. data/doc/rdoc/classes/Pho/Update/Statement.html +29 -29
  36. data/doc/rdoc/classes/String.html +1 -1
  37. data/doc/rdoc/created.rid +1 -1
  38. data/doc/rdoc/files/CHANGES.html +49 -3
  39. data/doc/rdoc/files/README.html +15 -1
  40. data/doc/rdoc/files/lib/pho/changeset_builder_rb.html +1 -1
  41. data/doc/rdoc/files/lib/pho/changeset_rb.html +1 -1
  42. data/doc/rdoc/files/lib/pho/converter_rb.html +108 -0
  43. data/doc/rdoc/files/lib/pho/enrichment_rb.html +101 -0
  44. data/doc/rdoc/files/lib/pho/etags_rb.html +1 -1
  45. data/doc/rdoc/files/lib/pho/field_predicate_map_rb.html +1 -1
  46. data/doc/rdoc/files/lib/pho/file_management_rb.html +101 -0
  47. data/doc/rdoc/files/lib/pho/file_manager_rb.html +108 -0
  48. data/doc/rdoc/files/lib/pho/rdf_collection_rb.html +1 -1
  49. data/doc/rdoc/files/lib/pho/resource_hash_rb.html +101 -0
  50. data/doc/rdoc/files/lib/pho/{rdf_json_rb.html → sparql_rb.html} +4 -4
  51. data/doc/rdoc/files/lib/pho/store_rb.html +1 -1
  52. data/doc/rdoc/files/lib/pho_rb.html +7 -2
  53. data/doc/rdoc/fr_class_index.html +12 -2
  54. data/doc/rdoc/fr_file_index.html +6 -1
  55. data/doc/rdoc/fr_method_index.html +176 -139
  56. data/examples/sparql_construct_hash.rb +26 -0
  57. data/examples/sparql_select.rb +18 -0
  58. data/lib/pho.rb +6 -1
  59. data/lib/pho/changeset.rb +24 -9
  60. data/lib/pho/changeset_builder.rb +10 -10
  61. data/lib/pho/converter.rb +74 -0
  62. data/lib/pho/enrichment.rb +81 -0
  63. data/lib/pho/etags.rb +1 -0
  64. data/lib/pho/field_predicate_map.rb +6 -1
  65. data/lib/pho/file_management.rb +102 -0
  66. data/lib/pho/file_manager.rb +61 -0
  67. data/lib/pho/rdf_collection.rb +54 -120
  68. data/lib/pho/{rdf_json.rb → resource_hash.rb} +3 -4
  69. data/lib/pho/sparql.rb +332 -0
  70. data/lib/pho/store.rb +20 -14
  71. data/tests/tc_changeset.rb +46 -0
  72. data/tests/tc_changeset_builder.rb +122 -1
  73. data/tests/tc_converter.rb +95 -0
  74. data/tests/tc_enrichment.rb +83 -0
  75. data/tests/tc_file_manager.rb +88 -0
  76. data/tests/tc_rdf_collection.rb +3 -0
  77. data/tests/{tc_rdf_json.rb → tc_resource_hash.rb} +23 -23
  78. data/tests/tc_search.rb +1 -1
  79. data/tests/tc_sparql.rb +131 -6
  80. data/tests/tc_sparql_helper.rb +214 -0
  81. data/tests/ts_pho.rb +6 -2
  82. metadata +47 -8
@@ -0,0 +1,26 @@
1
+ require 'rubygems'
2
+ require 'pho'
3
+
4
+ query = <<-EOL
5
+ PREFIX space: <http://purl.org/net/schemas/space/>
6
+ PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
7
+ PREFIX foaf: <http://xmlns.com/foaf/0.1/>
8
+ CONSTRUCT {
9
+ ?spacecraft foaf:name ?name;
10
+ space:agency ?agency;
11
+ space:mass ?mass.
12
+ }
13
+ WHERE {
14
+ ?launch space:launched "1969-07-16"^^xsd:date.
15
+ ?spacecraft space:launch ?launch;
16
+ foaf:name ?name;
17
+ space:agency ?agency;
18
+ space:mass ?mass.
19
+ }
20
+ EOL
21
+
22
+ sparql_client = Pho::Sparql::SparqlClient.new("http://api.talis.com/stores/space/services/sparql")
23
+
24
+ data = Pho::Sparql::SparqlHelper.constructToResourceHash(query, sparql_client)
25
+
26
+ puts data.inspect()
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'pho'
3
+
4
+ query = <<-EOL
5
+ PREFIX space: <http://purl.org/net/schemas/space/>
6
+ SELECT ?s WHERE {
7
+ ?s a space:Spacecraft.
8
+ }
9
+ LIMIT 10
10
+ EOL
11
+
12
+ sparql_client = Pho::Sparql::SparqlClient.new("http://api.talis.com/stores/space/services/sparql")
13
+
14
+ uris = Pho::Sparql::SparqlHelper.selectValues(query, sparql_client)
15
+
16
+ uris.each do |uri|
17
+ puts uri
18
+ end
data/lib/pho.rb CHANGED
@@ -12,10 +12,15 @@ require 'pho/query_profile'
12
12
  require 'pho/status'
13
13
  require 'pho/facet'
14
14
  require 'pho/job'
15
+ require 'pho/file_management'
15
16
  require 'pho/rdf_collection'
16
- require 'pho/rdf_json'
17
+ require 'pho/file_manager'
18
+ require 'pho/resource_hash'
19
+ require 'pho/converter'
17
20
  require 'pho/changeset_builder'
18
21
  require 'pho/changeset'
22
+ require 'pho/sparql'
23
+ require 'pho/enrichment'
19
24
 
20
25
  if RUBY_VERSION < "1.8.7"
21
26
  class String
@@ -1,9 +1,8 @@
1
1
  module Pho
2
2
 
3
- # Previous changeset
4
- # Identifier for changeset?
5
- # Blank nodes
6
- # Convenience functions for adding/removing lists?
3
+ # TODO: assigning identifiers via changesets (Blank nodes on subject of change and subject)
4
+ # TODO: assigning identifiers to a changeset docs
5
+ # TODO: linking together changesets using either nodeID or an identifier
7
6
 
8
7
  require 'uri'
9
8
 
@@ -43,15 +42,19 @@ module Pho
43
42
  #Create an RDF/XML fragment describing this Statement
44
43
  def to_rdf()
45
44
  rdf = "<rdf:Statement>"
46
- rdf << " <rdf:subject rdf:resource=\"#{@subject}\"/>"
45
+ rdf << write_subject()
47
46
  rdf << " <rdf:predicate rdf:resource=\"#{@predicate}\"/>"
48
47
  rdf << write_object()
49
48
  rdf << "</rdf:Statement>"
50
49
  return rdf
51
50
  end
52
-
51
+
53
52
  private
54
-
53
+
54
+ def write_subject()
55
+ return " <rdf:subject rdf:resource=\"#{@subject}\"/>"
56
+ end
57
+
55
58
  # subject:: URI of subject of triple
56
59
  # predicate:: URI of predicate of triple
57
60
  # object:: object value of triple (may be URI or literal)
@@ -102,8 +105,16 @@ module Pho
102
105
 
103
106
  protected
104
107
  def write_object()
105
- #FIXME language and datatype
106
- "<rdf:object>#{@object}</rdf:object>"
108
+ tag = ""
109
+ if @datatype != nil
110
+ tag = "<rdf:object rdf:datatype=\"#{@datatype}\">"
111
+ elsif @language != nil
112
+ tag = "<rdf:object xml:lang=\"#{@language}\">"
113
+ else
114
+ tag = "<rdf:object>"
115
+ end
116
+ tag << "#{@object}</rdf:object>"
117
+ return tag
107
118
  end
108
119
  end
109
120
 
@@ -127,6 +138,10 @@ module Pho
127
138
  end
128
139
 
129
140
  protected
141
+ def write_subject()
142
+ return " <rdf:subject rdf:resource=\"#{@subject}\"/>"
143
+ end
144
+
130
145
  def write_object()
131
146
  "<rdf:object rdf:resource=\"#{@object}\"/>"
132
147
  end
@@ -15,14 +15,14 @@ module Pho
15
15
  #
16
16
  #Returns an array of Changeset objects
17
17
  #
18
- # before:: triple hash describing current state of the resource
19
- # after:: triple hash describing updated state of the resource
18
+ # before:: resource hash describing current state of the resource
19
+ # after:: resource hash describing updated state of the resource
20
20
  # creator_name:: name of the creator of the changes
21
21
  # change_reason:: description of why the changes are being made
22
22
  def ChangesetBuilder.build_batch(before, after, creator_name=nil, change_reason=nil)
23
23
 
24
- removals = Pho::RDF_JSON::SetAlgebra.minus(before, after)
25
- additions = Pho::RDF_JSON::SetAlgebra.minus(after, before)
24
+ removals = Pho::ResourceHash::SetAlgebra.minus(before, after)
25
+ additions = Pho::ResourceHash::SetAlgebra.minus(after, before)
26
26
 
27
27
  batch = Array.new
28
28
 
@@ -58,13 +58,13 @@ module Pho
58
58
  #The method will return a single Changeset object.
59
59
  #
60
60
  # subject_of_change:: uri of the resource being updated
61
- # before:: triple hash describing current state of the resource
62
- # after:: triple hash describing updated state of the resource
61
+ # before:: resource hash describing current state of the resource
62
+ # after:: resource hash describing updated state of the resource
63
63
  # creator_name:: name of the creator of the changes
64
64
  # change_reason:: description of why the changes are being made
65
65
  def ChangesetBuilder.build(subject_of_change, before, after, creator_name=nil, change_reason=nil)
66
- removals = Pho::RDF_JSON::SetAlgebra.minus(before, after)
67
- additions = Pho::RDF_JSON::SetAlgebra.minus(after, before)
66
+ removals = Pho::ResourceHash::SetAlgebra.minus(before, after)
67
+ additions = Pho::ResourceHash::SetAlgebra.minus(after, before)
68
68
 
69
69
  cs = Pho::Update::Changeset.new(subject_of_change, creator_name, change_reason) do |cs|
70
70
  cs.add_removals( create_statements(removals) )
@@ -75,9 +75,9 @@ module Pho
75
75
 
76
76
  end
77
77
 
78
- #Takes an triple hash and serializes it as an array of Pho::Update::Statement objects
78
+ #Takes a resource hash and serializes it as an array of Pho::Update::Statement objects
79
79
  #
80
- # triples:: a hash of triples, conforming to RDF-in-JSON structure
80
+ # triples:: a resource hash, conforming to RDF-in-JSON structure
81
81
  def ChangesetBuilder.create_statements(triples)
82
82
  statements = Array.new
83
83
  triples.each do |uri, properties|
@@ -0,0 +1,74 @@
1
+ module Pho
2
+
3
+ module ResourceHash
4
+
5
+ #TODO reading from streams
6
+ #TODO convert into other formats
7
+ #TODO wrap Redland exceptions. Parser/Serializer contruction as well as parsing errors
8
+
9
+ require 'rdf/redland'
10
+
11
+ #Class for converting to and from resource hashes
12
+ class Converter
13
+
14
+ #Parse JSON structured according to the RDF-in-JSON specification into
15
+ #a Ruby resource hash. Simply invokes the JSON parser.
16
+ #
17
+ # json:: valid RDF-in-JSON
18
+ def Converter.parse_json(json)
19
+ return JSON.parse(json)
20
+ end
21
+
22
+ #Parse a string containing RDF/XML into a resource hash
23
+ #
24
+ # rdfxml: a String containing RDF/XML
25
+ def Converter.parse_rdfxml(rdfxml, base_uri)
26
+ return Converter.parse(rdfxml, base_uri, "rdfxml")
27
+ end
28
+
29
+ #Parse a string containing N-Triples into a resource hash
30
+ #
31
+ # ntriples:: a String containing N-Triples
32
+ def Converter.parse_ntriples(ntriples, base_uri)
33
+ return Converter.parse(ntriples, base_uri, "ntriples")
34
+ end
35
+
36
+ #Parse a string containing Turtle into a resource hash
37
+ #
38
+ # ntriples:: a String containing Turtle
39
+ def Converter.parse_turtle(turtle, base_uri)
40
+ return Converter.parse(turtle, base_uri, "turtle")
41
+ end
42
+
43
+ #Convert specified format into a ResourceHash
44
+ #
45
+ # format:: one of rdfxml, ntriples, turtle
46
+ # data:: String containing the data to be parsed
47
+ # base_uri:: base uri of the data
48
+ def Converter.parse(data, base_uri, format="rdfxml")
49
+ model = Redland::Model.new()
50
+ case format
51
+ when "rdfxml" then mime="application/rdf+xml"
52
+ when "json" then mime="application/json"
53
+ else mime=""
54
+ end
55
+
56
+ parser = Redland::Parser.new(format, mime)
57
+ parser.parse_string_into_model(model, data, base_uri)
58
+ serializer = Redland::Serializer.new( "json", "application/json" )
59
+ json = serializer.model_to_string(Redland::Uri.new(base_uri), model)
60
+ return Converter.parse_json( json )
61
+ end
62
+
63
+ #Serialize a resource hash as RDF-in-JSON
64
+ #
65
+ # hash:: the resource hash to serialize
66
+ def Converter.serialize_json(hash)
67
+ return JSON.dump(hash)
68
+ end
69
+
70
+
71
+ end
72
+
73
+ end
74
+ end
@@ -0,0 +1,81 @@
1
+ module Pho
2
+
3
+ #Enrichment module. Provides classes and mixins for enriching data held in Platform stores using
4
+ #external SPARQL endpoints and services
5
+ module Enrichment
6
+
7
+ class ResourceEnricher
8
+
9
+ attr_reader :store
10
+ attr_reader :sparql_client
11
+
12
+ # Constructor
13
+ # store:: the store containing resource(s) to enrich
14
+ # sparql_client:: SparqlClient object to be used as client for retrieving enrichments
15
+ def initialize(store, sparql_client)
16
+ @store = store
17
+ @sparql_client = sparql_client
18
+ end
19
+
20
+ #Enrich a store against itself
21
+ #
22
+ #For the common case where a store needs to be enriched against itself by inferring new
23
+ #data from old using a CONSTRUCT query
24
+ #
25
+ # store:: the store to be updated
26
+ # query:: the SPARQL CONSTRUCT query to be used to infer the new data
27
+ def ResourceEnricher.infer(store, query)
28
+ enricher = ResourceEnricher.new( store, store.sparql_client() )
29
+ return enricher.merge( query )
30
+ end
31
+
32
+ #Execute the provided query against the configured SPARQL endpoint and store the results in
33
+ #the Platform Store
34
+ #
35
+ # query:: the SPARQL CONSTRUCT or DESCRIBE query to execute
36
+ def merge(query)
37
+ resp = @sparql_client.query( query, "application/rdf+xml" )
38
+ if resp.status != 200
39
+ raise "Unable to execute query. Response: #{resp.status} #{resp.reason} #{resp.content}"
40
+ end
41
+ resp = @store.store_data( resp.content )
42
+ return resp
43
+ end
44
+
45
+ #TODO: optimize POSTs back to the Platform, to deal with large number of resources, e.g. batching
46
+
47
+ # Enrich a store with data extracted from a SPARQL endpoint.
48
+ #
49
+ # The locator_query is used to find resources in the Platform Store that should be enriched. The query
50
+ # should be a SPARQL SELECT query that returns the data to be used to parameterize the enrichment_query.
51
+ #
52
+ # For each query result, the enrichment_query will be submitted to the configured SPARQL endpoint, after
53
+ # first interpolating the string, providing the query result bindings as parameters. (See SparqlHelper.apply_initial_bindings
54
+ # and SparqlHelper.results_to_query_bindings
55
+ #
56
+ # If successful, the result of each enrichment query will then be pushed back into the Platform Store by
57
+ # posting the results of the query to the metabox. Enrichment queries should therefore be CONSTRUCT or
58
+ # DESCRIBE queries. The SPARQL protocol request will be sent with an Accept header of application/rdf+xml
59
+ #
60
+ # locator_query:: query to locate resources to be enriched
61
+ # enrichment_query:: query to be used to enrich the resource
62
+ def enrich(locator_query, enrichment_query)
63
+ results = Pho::Sparql::SparqlHelper.select(locator_query, @store.sparql_client() )
64
+ bindings = Pho::Sparql::SparqlHelper.results_to_query_bindings(results)
65
+ bindings.each do |binding|
66
+ bound_query = Pho::Sparql::SparqlHelper.apply_initial_bindings(enrichment_query, binding)
67
+
68
+ result = @sparql_client.query(bound_query, "application/rdf+xml")
69
+
70
+ #TODO error handling and callbacks
71
+ if result.status == 200
72
+ resp = @store.store_data( result.content )
73
+ end
74
+
75
+ end
76
+ end
77
+
78
+ end
79
+
80
+ end
81
+ end
@@ -1,5 +1,6 @@
1
1
  require 'yaml'
2
2
 
3
+ #TODO put together proper test suite for this
3
4
  module Pho
4
5
 
5
6
  #Simple mechanism for managing etags
@@ -116,7 +116,12 @@ module Pho
116
116
 
117
117
  json = JSON.parse( resp.content )
118
118
 
119
- label = json[fpmap_uri]["http:\/\/www.w3.org\/2000\/01\/rdf-schema#label"][0]["value"]
119
+ labels = json[fpmap_uri]["http:\/\/www.w3.org\/2000\/01\/rdf-schema#label"]
120
+ label = ""
121
+ if labels != nil
122
+ label = labels[0]["value"]
123
+ end
124
+
120
125
  fpmap = FieldPredicateMap.new(fpmap_uri, label)
121
126
 
122
127
  mapped_properties = json[fpmap_uri]["http:\/\/schemas.talis.com\/2006\/frame\/schema#mappedDatatypeProperty"]
@@ -0,0 +1,102 @@
1
+ module Pho
2
+
3
+ module FileManagement
4
+
5
+ #TODO: move files into hidden directory
6
+ #TODO: add directory traversal
7
+ class AbstractFileManager
8
+
9
+ attr_reader :dir
10
+ attr_reader :store
11
+
12
+ OK = "ok".freeze
13
+ FAIL = "fail".freeze
14
+
15
+ def initialize(store, dir, ok_suffix=OK, fail_suffix=FAIL, sleep=1)
16
+ @store = store
17
+ @dir = dir
18
+ @sleep = sleep
19
+ @ok_suffix = ok_suffix
20
+ @fail_suffix = fail_suffix
21
+ end
22
+
23
+ #Store all files that match the file name in directory
24
+ def store()
25
+ files_to_store = new_files()
26
+ files_to_store.each do |filename|
27
+ file = File.new(filename)
28
+ store_file(file, filename)
29
+ end
30
+ end
31
+
32
+ #Retry anything known to have failed
33
+ def retry_failures()
34
+ retries = failures()
35
+ retries.each do |filename|
36
+ File.delete( get_fail_file_for(filename) )
37
+ #store it
38
+ file = File.new(filename)
39
+ store_file(file, filename)
40
+ end
41
+ end
42
+
43
+ #Reset the directory to clear out any previous statuses
44
+ #Store can also be reset at the same time: use with care!
45
+ def reset(reset_store=false)
46
+ Dir.glob( File.join(@dir, "*.#{@fail_suffix}") ).each do |file|
47
+ File.delete(file)
48
+ end
49
+ Dir.glob( File.join(@dir, "*.#{@ok_suffix}") ).each do |file|
50
+ File.delete(file)
51
+ end
52
+ end
53
+
54
+ #List failures
55
+ def failures()
56
+ fails = Array.new
57
+ Dir.glob( File.join(@dir, "*") ).each do |file|
58
+ if File.extname(file) != ".#{@fail_suffix}" && File.extname(file) != ".#{@ok_suffix}"
59
+ fails << file if File.exists?( get_fail_file_for(file) )
60
+ end
61
+ end
62
+ return fails
63
+ end
64
+
65
+ #List successes
66
+ def successes()
67
+ successes = Array.new
68
+ Dir.glob( File.join(@dir, "*") ).each do |file|
69
+ if File.extname(file) != ".#{@fail_suffix}" && File.extname(file) != ".#{@ok_suffix}"
70
+ successes << file if File.exists?( get_ok_file_for(file) )
71
+ end
72
+ end
73
+ return successes
74
+ end
75
+
76
+ #Summarize the state of the collection to the provied IO object
77
+ #Creates a simple report
78
+ def summary()
79
+ failures = failures()
80
+ successes = successes()
81
+ newfiles = new_files()
82
+ total = failures.size + successes.size + newfiles.size
83
+ summary = "#{@dir} contains #{total} files: #{successes.size} stored, #{failures.size} failed, #{newfiles.size} new"
84
+ return summary
85
+ end
86
+
87
+ def get_fail_file_for(filename)
88
+ ext = File.extname(filename)
89
+ return filename.gsub(/#{ext}/, ".#{@fail_suffix}")
90
+ end
91
+
92
+ def get_ok_file_for(filename)
93
+ ext = File.extname(filename)
94
+ return filename.gsub(/#{ext}/, ".#{@ok_suffix}")
95
+ end
96
+
97
+ end
98
+
99
+ end
100
+ #end file Module
101
+
102
+ end