pho 0.4.1 → 0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. data/CHANGES +18 -1
  2. data/README +10 -0
  3. data/Rakefile +2 -1
  4. data/doc/rdoc/classes/Pho.html +33 -22
  5. data/doc/rdoc/classes/Pho/DatatypeProperty.html +12 -12
  6. data/doc/rdoc/classes/Pho/{RDF_JSON.html → Enrichment.html} +8 -7
  7. data/doc/rdoc/classes/Pho/Enrichment/ResourceEnricher.html +310 -0
  8. data/doc/rdoc/classes/Pho/Etags.html +42 -42
  9. data/doc/rdoc/classes/Pho/Facet/Results.html +19 -19
  10. data/doc/rdoc/classes/Pho/Facet/Term.html +6 -6
  11. data/doc/rdoc/classes/Pho/FieldPredicateMap.html +110 -105
  12. data/doc/rdoc/classes/Pho/FieldWeighting.html +12 -12
  13. data/doc/rdoc/classes/Pho/FileManagement.html +121 -0
  14. data/doc/rdoc/classes/Pho/FileManagement/AbstractFileManager.html +443 -0
  15. data/doc/rdoc/classes/Pho/FileManagement/FileManager.html +258 -0
  16. data/doc/rdoc/classes/Pho/FileManagement/RDFManager.html +271 -0
  17. data/doc/rdoc/classes/Pho/Job.html +64 -64
  18. data/doc/rdoc/classes/Pho/Jobs.html +60 -60
  19. data/doc/rdoc/classes/Pho/QueryProfile.html +60 -60
  20. data/doc/rdoc/classes/Pho/RDFCollection.html +4 -378
  21. data/doc/rdoc/classes/Pho/ResourceHash.html +123 -0
  22. data/doc/rdoc/classes/Pho/ResourceHash/Converter.html +323 -0
  23. data/doc/rdoc/classes/Pho/{RDF_JSON → ResourceHash}/SetAlgebra.html +18 -18
  24. data/doc/rdoc/classes/Pho/Snapshot.html +35 -35
  25. data/doc/rdoc/classes/Pho/Sparql.html +137 -0
  26. data/doc/rdoc/classes/Pho/Sparql/SparqlClient.html +515 -0
  27. data/doc/rdoc/classes/Pho/Sparql/SparqlHelper.html +575 -0
  28. data/doc/rdoc/classes/Pho/Status.html +26 -26
  29. data/doc/rdoc/classes/Pho/Store.html +271 -241
  30. data/doc/rdoc/classes/Pho/Update/Changeset.html +73 -73
  31. data/doc/rdoc/classes/Pho/Update/ChangesetBuilder.html +34 -34
  32. data/doc/rdoc/classes/Pho/Update/Changesets.html +14 -14
  33. data/doc/rdoc/classes/Pho/Update/LiteralStatement.html +31 -23
  34. data/doc/rdoc/classes/Pho/Update/ResourceStatement.html +45 -21
  35. data/doc/rdoc/classes/Pho/Update/Statement.html +29 -29
  36. data/doc/rdoc/classes/String.html +1 -1
  37. data/doc/rdoc/created.rid +1 -1
  38. data/doc/rdoc/files/CHANGES.html +49 -3
  39. data/doc/rdoc/files/README.html +15 -1
  40. data/doc/rdoc/files/lib/pho/changeset_builder_rb.html +1 -1
  41. data/doc/rdoc/files/lib/pho/changeset_rb.html +1 -1
  42. data/doc/rdoc/files/lib/pho/converter_rb.html +108 -0
  43. data/doc/rdoc/files/lib/pho/enrichment_rb.html +101 -0
  44. data/doc/rdoc/files/lib/pho/etags_rb.html +1 -1
  45. data/doc/rdoc/files/lib/pho/field_predicate_map_rb.html +1 -1
  46. data/doc/rdoc/files/lib/pho/file_management_rb.html +101 -0
  47. data/doc/rdoc/files/lib/pho/file_manager_rb.html +108 -0
  48. data/doc/rdoc/files/lib/pho/rdf_collection_rb.html +1 -1
  49. data/doc/rdoc/files/lib/pho/resource_hash_rb.html +101 -0
  50. data/doc/rdoc/files/lib/pho/{rdf_json_rb.html → sparql_rb.html} +4 -4
  51. data/doc/rdoc/files/lib/pho/store_rb.html +1 -1
  52. data/doc/rdoc/files/lib/pho_rb.html +7 -2
  53. data/doc/rdoc/fr_class_index.html +12 -2
  54. data/doc/rdoc/fr_file_index.html +6 -1
  55. data/doc/rdoc/fr_method_index.html +176 -139
  56. data/examples/sparql_construct_hash.rb +26 -0
  57. data/examples/sparql_select.rb +18 -0
  58. data/lib/pho.rb +6 -1
  59. data/lib/pho/changeset.rb +24 -9
  60. data/lib/pho/changeset_builder.rb +10 -10
  61. data/lib/pho/converter.rb +74 -0
  62. data/lib/pho/enrichment.rb +81 -0
  63. data/lib/pho/etags.rb +1 -0
  64. data/lib/pho/field_predicate_map.rb +6 -1
  65. data/lib/pho/file_management.rb +102 -0
  66. data/lib/pho/file_manager.rb +61 -0
  67. data/lib/pho/rdf_collection.rb +54 -120
  68. data/lib/pho/{rdf_json.rb → resource_hash.rb} +3 -4
  69. data/lib/pho/sparql.rb +332 -0
  70. data/lib/pho/store.rb +20 -14
  71. data/tests/tc_changeset.rb +46 -0
  72. data/tests/tc_changeset_builder.rb +122 -1
  73. data/tests/tc_converter.rb +95 -0
  74. data/tests/tc_enrichment.rb +83 -0
  75. data/tests/tc_file_manager.rb +88 -0
  76. data/tests/tc_rdf_collection.rb +3 -0
  77. data/tests/{tc_rdf_json.rb → tc_resource_hash.rb} +23 -23
  78. data/tests/tc_search.rb +1 -1
  79. data/tests/tc_sparql.rb +131 -6
  80. data/tests/tc_sparql_helper.rb +214 -0
  81. data/tests/ts_pho.rb +6 -2
  82. metadata +47 -8
@@ -0,0 +1,26 @@
1
+ require 'rubygems'
2
+ require 'pho'
3
+
4
+ query = <<-EOL
5
+ PREFIX space: <http://purl.org/net/schemas/space/>
6
+ PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
7
+ PREFIX foaf: <http://xmlns.com/foaf/0.1/>
8
+ CONSTRUCT {
9
+ ?spacecraft foaf:name ?name;
10
+ space:agency ?agency;
11
+ space:mass ?mass.
12
+ }
13
+ WHERE {
14
+ ?launch space:launched "1969-07-16"^^xsd:date.
15
+ ?spacecraft space:launch ?launch;
16
+ foaf:name ?name;
17
+ space:agency ?agency;
18
+ space:mass ?mass.
19
+ }
20
+ EOL
21
+
22
+ sparql_client = Pho::Sparql::SparqlClient.new("http://api.talis.com/stores/space/services/sparql")
23
+
24
+ data = Pho::Sparql::SparqlHelper.constructToResourceHash(query, sparql_client)
25
+
26
+ puts data.inspect()
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'pho'
3
+
4
+ query = <<-EOL
5
+ PREFIX space: <http://purl.org/net/schemas/space/>
6
+ SELECT ?s WHERE {
7
+ ?s a space:Spacecraft.
8
+ }
9
+ LIMIT 10
10
+ EOL
11
+
12
+ sparql_client = Pho::Sparql::SparqlClient.new("http://api.talis.com/stores/space/services/sparql")
13
+
14
+ uris = Pho::Sparql::SparqlHelper.selectValues(query, sparql_client)
15
+
16
+ uris.each do |uri|
17
+ puts uri
18
+ end
data/lib/pho.rb CHANGED
@@ -12,10 +12,15 @@ require 'pho/query_profile'
12
12
  require 'pho/status'
13
13
  require 'pho/facet'
14
14
  require 'pho/job'
15
+ require 'pho/file_management'
15
16
  require 'pho/rdf_collection'
16
- require 'pho/rdf_json'
17
+ require 'pho/file_manager'
18
+ require 'pho/resource_hash'
19
+ require 'pho/converter'
17
20
  require 'pho/changeset_builder'
18
21
  require 'pho/changeset'
22
+ require 'pho/sparql'
23
+ require 'pho/enrichment'
19
24
 
20
25
  if RUBY_VERSION < "1.8.7"
21
26
  class String
@@ -1,9 +1,8 @@
1
1
  module Pho
2
2
 
3
- # Previous changeset
4
- # Identifier for changeset?
5
- # Blank nodes
6
- # Convenience functions for adding/removing lists?
3
+ # TODO: assigning identifiers via changesets (Blank nodes on subject of change and subject)
4
+ # TODO: assigning identifiers to a changeset docs
5
+ # TODO: linking together changesets using either nodeID or an identifier
7
6
 
8
7
  require 'uri'
9
8
 
@@ -43,15 +42,19 @@ module Pho
43
42
  #Create an RDF/XML fragment describing this Statement
44
43
  def to_rdf()
45
44
  rdf = "<rdf:Statement>"
46
- rdf << " <rdf:subject rdf:resource=\"#{@subject}\"/>"
45
+ rdf << write_subject()
47
46
  rdf << " <rdf:predicate rdf:resource=\"#{@predicate}\"/>"
48
47
  rdf << write_object()
49
48
  rdf << "</rdf:Statement>"
50
49
  return rdf
51
50
  end
52
-
51
+
53
52
  private
54
-
53
+
54
+ def write_subject()
55
+ return " <rdf:subject rdf:resource=\"#{@subject}\"/>"
56
+ end
57
+
55
58
  # subject:: URI of subject of triple
56
59
  # predicate:: URI of predicate of triple
57
60
  # object:: object value of triple (may be URI or literal)
@@ -102,8 +105,16 @@ module Pho
102
105
 
103
106
  protected
104
107
  def write_object()
105
- #FIXME language and datatype
106
- "<rdf:object>#{@object}</rdf:object>"
108
+ tag = ""
109
+ if @datatype != nil
110
+ tag = "<rdf:object rdf:datatype=\"#{@datatype}\">"
111
+ elsif @language != nil
112
+ tag = "<rdf:object xml:lang=\"#{@language}\">"
113
+ else
114
+ tag = "<rdf:object>"
115
+ end
116
+ tag << "#{@object}</rdf:object>"
117
+ return tag
107
118
  end
108
119
  end
109
120
 
@@ -127,6 +138,10 @@ module Pho
127
138
  end
128
139
 
129
140
  protected
141
+ def write_subject()
142
+ return " <rdf:subject rdf:resource=\"#{@subject}\"/>"
143
+ end
144
+
130
145
  def write_object()
131
146
  "<rdf:object rdf:resource=\"#{@object}\"/>"
132
147
  end
@@ -15,14 +15,14 @@ module Pho
15
15
  #
16
16
  #Returns an array of Changeset objects
17
17
  #
18
- # before:: triple hash describing current state of the resource
19
- # after:: triple hash describing updated state of the resource
18
+ # before:: resource hash describing current state of the resource
19
+ # after:: resource hash describing updated state of the resource
20
20
  # creator_name:: name of the creator of the changes
21
21
  # change_reason:: description of why the changes are being made
22
22
  def ChangesetBuilder.build_batch(before, after, creator_name=nil, change_reason=nil)
23
23
 
24
- removals = Pho::RDF_JSON::SetAlgebra.minus(before, after)
25
- additions = Pho::RDF_JSON::SetAlgebra.minus(after, before)
24
+ removals = Pho::ResourceHash::SetAlgebra.minus(before, after)
25
+ additions = Pho::ResourceHash::SetAlgebra.minus(after, before)
26
26
 
27
27
  batch = Array.new
28
28
 
@@ -58,13 +58,13 @@ module Pho
58
58
  #The method will return a single Changeset object.
59
59
  #
60
60
  # subject_of_change:: uri of the resource being updated
61
- # before:: triple hash describing current state of the resource
62
- # after:: triple hash describing updated state of the resource
61
+ # before:: resource hash describing current state of the resource
62
+ # after:: resource hash describing updated state of the resource
63
63
  # creator_name:: name of the creator of the changes
64
64
  # change_reason:: description of why the changes are being made
65
65
  def ChangesetBuilder.build(subject_of_change, before, after, creator_name=nil, change_reason=nil)
66
- removals = Pho::RDF_JSON::SetAlgebra.minus(before, after)
67
- additions = Pho::RDF_JSON::SetAlgebra.minus(after, before)
66
+ removals = Pho::ResourceHash::SetAlgebra.minus(before, after)
67
+ additions = Pho::ResourceHash::SetAlgebra.minus(after, before)
68
68
 
69
69
  cs = Pho::Update::Changeset.new(subject_of_change, creator_name, change_reason) do |cs|
70
70
  cs.add_removals( create_statements(removals) )
@@ -75,9 +75,9 @@ module Pho
75
75
 
76
76
  end
77
77
 
78
- #Takes an triple hash and serializes it as an array of Pho::Update::Statement objects
78
+ #Takes a resource hash and serializes it as an array of Pho::Update::Statement objects
79
79
  #
80
- # triples:: a hash of triples, conforming to RDF-in-JSON structure
80
+ # triples:: a resource hash, conforming to RDF-in-JSON structure
81
81
  def ChangesetBuilder.create_statements(triples)
82
82
  statements = Array.new
83
83
  triples.each do |uri, properties|
@@ -0,0 +1,74 @@
1
+ module Pho
2
+
3
+ module ResourceHash
4
+
5
+ #TODO reading from streams
6
+ #TODO convert into other formats
7
+ #TODO wrap Redland exceptions. Parser/Serializer contruction as well as parsing errors
8
+
9
+ require 'rdf/redland'
10
+
11
+ #Class for converting to and from resource hashes
12
+ class Converter
13
+
14
+ #Parse JSON structured according to the RDF-in-JSON specification into
15
+ #a Ruby resource hash. Simply invokes the JSON parser.
16
+ #
17
+ # json:: valid RDF-in-JSON
18
+ def Converter.parse_json(json)
19
+ return JSON.parse(json)
20
+ end
21
+
22
+ #Parse a string containing RDF/XML into a resource hash
23
+ #
24
+ # rdfxml: a String containing RDF/XML
25
+ def Converter.parse_rdfxml(rdfxml, base_uri)
26
+ return Converter.parse(rdfxml, base_uri, "rdfxml")
27
+ end
28
+
29
+ #Parse a string containing N-Triples into a resource hash
30
+ #
31
+ # ntriples:: a String containing N-Triples
32
+ def Converter.parse_ntriples(ntriples, base_uri)
33
+ return Converter.parse(ntriples, base_uri, "ntriples")
34
+ end
35
+
36
+ #Parse a string containing Turtle into a resource hash
37
+ #
38
+ # ntriples:: a String containing Turtle
39
+ def Converter.parse_turtle(turtle, base_uri)
40
+ return Converter.parse(turtle, base_uri, "turtle")
41
+ end
42
+
43
+ #Convert specified format into a ResourceHash
44
+ #
45
+ # format:: one of rdfxml, ntriples, turtle
46
+ # data:: String containing the data to be parsed
47
+ # base_uri:: base uri of the data
48
+ def Converter.parse(data, base_uri, format="rdfxml")
49
+ model = Redland::Model.new()
50
+ case format
51
+ when "rdfxml" then mime="application/rdf+xml"
52
+ when "json" then mime="application/json"
53
+ else mime=""
54
+ end
55
+
56
+ parser = Redland::Parser.new(format, mime)
57
+ parser.parse_string_into_model(model, data, base_uri)
58
+ serializer = Redland::Serializer.new( "json", "application/json" )
59
+ json = serializer.model_to_string(Redland::Uri.new(base_uri), model)
60
+ return Converter.parse_json( json )
61
+ end
62
+
63
+ #Serialize a resource hash as RDF-in-JSON
64
+ #
65
+ # hash:: the resource hash to serialize
66
+ def Converter.serialize_json(hash)
67
+ return JSON.dump(hash)
68
+ end
69
+
70
+
71
+ end
72
+
73
+ end
74
+ end
@@ -0,0 +1,81 @@
1
+ module Pho
2
+
3
+ #Enrichment module. Provides classes and mixins for enriching data held in Platform stores using
4
+ #external SPARQL endpoints and services
5
+ module Enrichment
6
+
7
+ class ResourceEnricher
8
+
9
+ attr_reader :store
10
+ attr_reader :sparql_client
11
+
12
+ # Constructor
13
+ # store:: the store containing resource(s) to enrich
14
+ # sparql_client:: SparqlClient object to be used as client for retrieving enrichments
15
+ def initialize(store, sparql_client)
16
+ @store = store
17
+ @sparql_client = sparql_client
18
+ end
19
+
20
+ #Enrich a store against itself
21
+ #
22
+ #For the common case where a store needs to be enriched against itself by inferring new
23
+ #data from old using a CONSTRUCT query
24
+ #
25
+ # store:: the store to be updated
26
+ # query:: the SPARQL CONSTRUCT query to be used to infer the new data
27
+ def ResourceEnricher.infer(store, query)
28
+ enricher = ResourceEnricher.new( store, store.sparql_client() )
29
+ return enricher.merge( query )
30
+ end
31
+
32
+ #Execute the provided query against the configured SPARQL endpoint and store the results in
33
+ #the Platform Store
34
+ #
35
+ # query:: the SPARQL CONSTRUCT or DESCRIBE query to execute
36
+ def merge(query)
37
+ resp = @sparql_client.query( query, "application/rdf+xml" )
38
+ if resp.status != 200
39
+ raise "Unable to execute query. Response: #{resp.status} #{resp.reason} #{resp.content}"
40
+ end
41
+ resp = @store.store_data( resp.content )
42
+ return resp
43
+ end
44
+
45
+ #TODO: optimize POSTs back to the Platform, to deal with large number of resources, e.g. batching
46
+
47
+ # Enrich a store with data extracted from a SPARQL endpoint.
48
+ #
49
+ # The locator_query is used to find resources in the Platform Store that should be enriched. The query
50
+ # should be a SPARQL SELECT query that returns the data to be used to parameterize the enrichment_query.
51
+ #
52
+ # For each query result, the enrichment_query will be submitted to the configured SPARQL endpoint, after
53
+ # first interpolating the string, providing the query result bindings as parameters. (See SparqlHelper.apply_initial_bindings
54
+ # and SparqlHelper.results_to_query_bindings
55
+ #
56
+ # If successful, the result of each enrichment query will then be pushed back into the Platform Store by
57
+ # posting the results of the query to the metabox. Enrichment queries should therefore be CONSTRUCT or
58
+ # DESCRIBE queries. The SPARQL protocol request will be sent with an Accept header of application/rdf+xml
59
+ #
60
+ # locator_query:: query to locate resources to be enriched
61
+ # enrichment_query:: query to be used to enrich the resource
62
+ def enrich(locator_query, enrichment_query)
63
+ results = Pho::Sparql::SparqlHelper.select(locator_query, @store.sparql_client() )
64
+ bindings = Pho::Sparql::SparqlHelper.results_to_query_bindings(results)
65
+ bindings.each do |binding|
66
+ bound_query = Pho::Sparql::SparqlHelper.apply_initial_bindings(enrichment_query, binding)
67
+
68
+ result = @sparql_client.query(bound_query, "application/rdf+xml")
69
+
70
+ #TODO error handling and callbacks
71
+ if result.status == 200
72
+ resp = @store.store_data( result.content )
73
+ end
74
+
75
+ end
76
+ end
77
+
78
+ end
79
+
80
+ end
81
+ end
@@ -1,5 +1,6 @@
1
1
  require 'yaml'
2
2
 
3
+ #TODO put together proper test suite for this
3
4
  module Pho
4
5
 
5
6
  #Simple mechanism for managing etags
@@ -116,7 +116,12 @@ module Pho
116
116
 
117
117
  json = JSON.parse( resp.content )
118
118
 
119
- label = json[fpmap_uri]["http:\/\/www.w3.org\/2000\/01\/rdf-schema#label"][0]["value"]
119
+ labels = json[fpmap_uri]["http:\/\/www.w3.org\/2000\/01\/rdf-schema#label"]
120
+ label = ""
121
+ if labels != nil
122
+ label = labels[0]["value"]
123
+ end
124
+
120
125
  fpmap = FieldPredicateMap.new(fpmap_uri, label)
121
126
 
122
127
  mapped_properties = json[fpmap_uri]["http:\/\/schemas.talis.com\/2006\/frame\/schema#mappedDatatypeProperty"]
@@ -0,0 +1,102 @@
1
+ module Pho
2
+
3
+ module FileManagement
4
+
5
+ #TODO: move files into hidden directory
6
+ #TODO: add directory traversal
7
+ class AbstractFileManager
8
+
9
+ attr_reader :dir
10
+ attr_reader :store
11
+
12
+ OK = "ok".freeze
13
+ FAIL = "fail".freeze
14
+
15
+ def initialize(store, dir, ok_suffix=OK, fail_suffix=FAIL, sleep=1)
16
+ @store = store
17
+ @dir = dir
18
+ @sleep = sleep
19
+ @ok_suffix = ok_suffix
20
+ @fail_suffix = fail_suffix
21
+ end
22
+
23
+ #Store all files that match the file name in directory
24
+ def store()
25
+ files_to_store = new_files()
26
+ files_to_store.each do |filename|
27
+ file = File.new(filename)
28
+ store_file(file, filename)
29
+ end
30
+ end
31
+
32
+ #Retry anything known to have failed
33
+ def retry_failures()
34
+ retries = failures()
35
+ retries.each do |filename|
36
+ File.delete( get_fail_file_for(filename) )
37
+ #store it
38
+ file = File.new(filename)
39
+ store_file(file, filename)
40
+ end
41
+ end
42
+
43
+ #Reset the directory to clear out any previous statuses
44
+ #Store can also be reset at the same time: use with care!
45
+ def reset(reset_store=false)
46
+ Dir.glob( File.join(@dir, "*.#{@fail_suffix}") ).each do |file|
47
+ File.delete(file)
48
+ end
49
+ Dir.glob( File.join(@dir, "*.#{@ok_suffix}") ).each do |file|
50
+ File.delete(file)
51
+ end
52
+ end
53
+
54
+ #List failures
55
+ def failures()
56
+ fails = Array.new
57
+ Dir.glob( File.join(@dir, "*") ).each do |file|
58
+ if File.extname(file) != ".#{@fail_suffix}" && File.extname(file) != ".#{@ok_suffix}"
59
+ fails << file if File.exists?( get_fail_file_for(file) )
60
+ end
61
+ end
62
+ return fails
63
+ end
64
+
65
+ #List successes
66
+ def successes()
67
+ successes = Array.new
68
+ Dir.glob( File.join(@dir, "*") ).each do |file|
69
+ if File.extname(file) != ".#{@fail_suffix}" && File.extname(file) != ".#{@ok_suffix}"
70
+ successes << file if File.exists?( get_ok_file_for(file) )
71
+ end
72
+ end
73
+ return successes
74
+ end
75
+
76
+ #Summarize the state of the collection to the provied IO object
77
+ #Creates a simple report
78
+ def summary()
79
+ failures = failures()
80
+ successes = successes()
81
+ newfiles = new_files()
82
+ total = failures.size + successes.size + newfiles.size
83
+ summary = "#{@dir} contains #{total} files: #{successes.size} stored, #{failures.size} failed, #{newfiles.size} new"
84
+ return summary
85
+ end
86
+
87
+ def get_fail_file_for(filename)
88
+ ext = File.extname(filename)
89
+ return filename.gsub(/#{ext}/, ".#{@fail_suffix}")
90
+ end
91
+
92
+ def get_ok_file_for(filename)
93
+ ext = File.extname(filename)
94
+ return filename.gsub(/#{ext}/, ".#{@ok_suffix}")
95
+ end
96
+
97
+ end
98
+
99
+ end
100
+ #end file Module
101
+
102
+ end