pho 0.5 → 0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES +17 -0
- data/Rakefile +10 -2
- data/bin/talis_store +37 -92
- data/doc/rdoc/classes/Pho.html +11 -0
- data/doc/rdoc/classes/Pho/CommandLine.html +508 -0
- data/doc/rdoc/classes/Pho/DatatypeProperty.html +15 -15
- data/doc/rdoc/classes/Pho/Enrichment.html +1 -1
- data/doc/rdoc/classes/Pho/Enrichment/{ResourceEnricher.html → StoreEnricher.html} +158 -58
- data/doc/rdoc/classes/Pho/Etags.html +36 -36
- data/doc/rdoc/classes/Pho/Facet/Results.html +19 -19
- data/doc/rdoc/classes/Pho/Facet/Term.html +6 -6
- data/doc/rdoc/classes/Pho/FieldPredicateMap.html +94 -94
- data/doc/rdoc/classes/Pho/FieldWeighting.html +14 -14
- data/doc/rdoc/classes/Pho/FileManagement/AbstractFileManager.html +126 -91
- data/doc/rdoc/classes/Pho/FileManagement/FileManager.html +68 -62
- data/doc/rdoc/classes/Pho/FileManagement/RDFManager.html +22 -57
- data/doc/rdoc/classes/Pho/Job.html +68 -67
- data/doc/rdoc/classes/Pho/Jobs.html +62 -62
- data/doc/rdoc/classes/Pho/QueryProfile.html +64 -64
- data/doc/rdoc/classes/Pho/RDF.html +120 -0
- data/doc/rdoc/classes/Pho/RDF/Parser.html +277 -0
- data/doc/rdoc/classes/Pho/ResourceHash.html +1 -1
- data/doc/rdoc/classes/Pho/ResourceHash/Converter.html +48 -46
- data/doc/rdoc/classes/Pho/ResourceHash/SetAlgebra.html +15 -14
- data/doc/rdoc/classes/Pho/Snapshot.html +36 -36
- data/doc/rdoc/classes/Pho/Sparql.html +50 -0
- data/doc/rdoc/classes/Pho/Sparql/SparqlClient.html +147 -68
- data/doc/rdoc/classes/Pho/Sparql/SparqlHelper.html +195 -114
- data/doc/rdoc/classes/Pho/Status.html +26 -26
- data/doc/rdoc/classes/Pho/Store.html +271 -264
- data/doc/rdoc/classes/Pho/StoreSparqlClient.html +183 -0
- data/doc/rdoc/classes/Pho/Update.html +1 -0
- data/doc/rdoc/classes/Pho/Update/Changeset.html +69 -68
- data/doc/rdoc/classes/Pho/Update/ChangesetBuilder.html +24 -24
- data/doc/rdoc/classes/Pho/Update/Changesets.html +15 -14
- data/doc/rdoc/classes/Pho/Update/LiteralStatement.html +18 -18
- data/doc/rdoc/classes/Pho/Update/ResourceStatement.html +24 -24
- data/doc/rdoc/classes/Pho/Update/Statement.html +28 -27
- data/doc/rdoc/classes/String.html +1 -1
- data/doc/rdoc/created.rid +1 -1
- data/doc/rdoc/files/CHANGES.html +53 -1
- data/doc/rdoc/files/lib/pho/changeset_rb.html +1 -1
- data/doc/rdoc/files/lib/pho/command_line_rb.html +101 -0
- data/doc/rdoc/files/lib/pho/converter_rb.html +1 -1
- data/doc/rdoc/files/lib/pho/enrichment_rb.html +1 -1
- data/doc/rdoc/files/lib/pho/file_management_rb.html +1 -1
- data/doc/rdoc/files/lib/pho/file_manager_rb.html +1 -1
- data/doc/rdoc/files/lib/pho/rdf_collection_rb.html +1 -1
- data/doc/rdoc/files/lib/pho/rdf_rb.html +108 -0
- data/doc/rdoc/files/lib/pho/sparql_rb.html +1 -1
- data/doc/rdoc/files/lib/pho/store_rb.html +8 -1
- data/doc/rdoc/files/lib/pho_rb.html +4 -2
- data/doc/rdoc/fr_class_index.html +6 -1
- data/doc/rdoc/fr_file_index.html +2 -0
- data/doc/rdoc/fr_method_index.html +192 -172
- data/examples/calais/bio.txt +7 -0
- data/examples/calais/dump.rb +17 -0
- data/examples/calais/enlighten.rb +23 -0
- data/examples/calais/output.rdf +25 -0
- data/examples/sinatra/viewer.rb +20 -0
- data/lib/pho.rb +4 -1
- data/lib/pho/changeset.rb +17 -1
- data/lib/pho/command_line.rb +166 -0
- data/lib/pho/converter.rb +8 -3
- data/lib/pho/enrichment.rb +93 -28
- data/lib/pho/file_management.rb +32 -19
- data/lib/pho/file_manager.rb +22 -26
- data/lib/pho/rdf.rb +74 -0
- data/lib/pho/rdf_collection.rb +2 -17
- data/lib/pho/sparql.rb +150 -38
- data/lib/pho/store.rb +28 -2
- data/tests/tc_contentbox.rb +41 -0
- data/tests/tc_enrichment.rb +41 -4
- data/tests/tc_file_manager.rb +97 -7
- data/tests/tc_rdf_collection.rb +25 -13
- data/tests/tc_rdf_parser.rb +37 -0
- data/tests/tc_sparql.rb +72 -2
- data/tests/tc_sparql_helper.rb +16 -0
- data/tests/ts_pho.rb +2 -1
- metadata +20 -3
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
require 'rubygems'
|
|
2
|
+
require 'pho'
|
|
3
|
+
require 'sinatra'
|
|
4
|
+
|
|
5
|
+
#SETUP
|
|
6
|
+
store = Pho::Store.new("http://api.talis.com/stores/space")
|
|
7
|
+
mime :rdf, "application/rdf+xml"
|
|
8
|
+
|
|
9
|
+
#ROUTES
|
|
10
|
+
|
|
11
|
+
#E.g. http://127.0.0.1:4567/?url=http://nasa.dataincubator.org/spacecraft/1969-059A
|
|
12
|
+
get "/*" do
|
|
13
|
+
sparql_client = store.sparql_client()
|
|
14
|
+
resp = sparql_client.describe_uri(params[:url], "application/rdf+xml", :lcbd)
|
|
15
|
+
|
|
16
|
+
if resp.status != 200
|
|
17
|
+
halt resp.status, resp.message
|
|
18
|
+
end
|
|
19
|
+
content_type :rdf
|
|
20
|
+
resp.content
|
data/lib/pho.rb
CHANGED
|
@@ -16,11 +16,14 @@ require 'pho/file_management'
|
|
|
16
16
|
require 'pho/rdf_collection'
|
|
17
17
|
require 'pho/file_manager'
|
|
18
18
|
require 'pho/resource_hash'
|
|
19
|
-
require 'pho/converter'
|
|
20
19
|
require 'pho/changeset_builder'
|
|
21
20
|
require 'pho/changeset'
|
|
22
21
|
require 'pho/sparql'
|
|
23
22
|
require 'pho/enrichment'
|
|
23
|
+
require 'pho/command_line'
|
|
24
|
+
#Following depend on redland
|
|
25
|
+
require 'pho/converter'
|
|
26
|
+
require 'pho/rdf'
|
|
24
27
|
|
|
25
28
|
if RUBY_VERSION < "1.8.7"
|
|
26
29
|
class String
|
data/lib/pho/changeset.rb
CHANGED
|
@@ -320,7 +320,23 @@ module Pho
|
|
|
320
320
|
|
|
321
321
|
#end changeset
|
|
322
322
|
end
|
|
323
|
-
|
|
323
|
+
|
|
324
|
+
#Utility methods for making changes to graphs via Changesets
|
|
325
|
+
class ChangesetHelper
|
|
326
|
+
|
|
327
|
+
def ChangesetHelper.update_literal(store, subject, predicate, old_value, new_value, old_lang=nil, old_datatype=nil,
|
|
328
|
+
new_lang=nil, new_datatype=nil, creator_name=nil, change_reason=nil, versioned=false)
|
|
329
|
+
cs = Changeset.new(subject, creator_name, change_reason)
|
|
330
|
+
old = Statement.create_literal(subject, predicate, old_value, old_lang, old_datatype)
|
|
331
|
+
new = Statement.create_literal(subject, predicate, new_value, new_lang, new_datatype)
|
|
332
|
+
cs.add_removal(old)
|
|
333
|
+
cs.add_addition(new)
|
|
334
|
+
return cs.submit(store, versioned)
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
end
|
|
339
|
+
|
|
324
340
|
#end module
|
|
325
341
|
end
|
|
326
342
|
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
module Pho
|
|
2
|
+
|
|
3
|
+
#Class that implements the command-line behaviour
|
|
4
|
+
class CommandLine
|
|
5
|
+
|
|
6
|
+
def initialize(store, opts)
|
|
7
|
+
@store = store
|
|
8
|
+
@opts = opts
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def status()
|
|
12
|
+
status = Pho::Status.read_from_store(@store)
|
|
13
|
+
puts "Store Status:\nReadable: #{status.readable?}\nWritable: #{status.writeable?}"
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def backup()
|
|
17
|
+
puts "Submitting Snapshot Job"
|
|
18
|
+
resp = Pho::Jobs.submit_snapshot(@store, "Reindex", Time.now)
|
|
19
|
+
puts "Monitoring Job"
|
|
20
|
+
job = Pho::Jobs.wait_for_submitted(resp, @store) do |job, message, time|
|
|
21
|
+
puts "#{time} #{message}"
|
|
22
|
+
end
|
|
23
|
+
puts "Job Completed"
|
|
24
|
+
snapshot = Pho::Snapshot.read_from_store(@store)
|
|
25
|
+
puts "Retrieving #{snapshot.url}"
|
|
26
|
+
dir = @opts["dir"] || Dir.tmpdir
|
|
27
|
+
snapshot.backup(@store, dir)
|
|
28
|
+
puts "Download complete. MD5 OK."
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def snapshot()
|
|
32
|
+
puts "Submitting Snapshot Job"
|
|
33
|
+
resp = Pho::Jobs.submit_snapshot(@store, "Reindex", Time.now)
|
|
34
|
+
puts "Monitoring Job"
|
|
35
|
+
job = Pho::Jobs.wait_for_submitted(resp, @store) do |job, message, time|
|
|
36
|
+
puts "#{time} #{message}"
|
|
37
|
+
end
|
|
38
|
+
puts "Snapshot Completed"
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def reindex()
|
|
42
|
+
puts "Submitting Reindex Job"
|
|
43
|
+
resp = Pho::Jobs.submit_reindex(@store, "Reindex", Time.now)
|
|
44
|
+
if resp.status == 201
|
|
45
|
+
puts "Monitoring Reindex Job: #{resp.header["Location"].first}"
|
|
46
|
+
end
|
|
47
|
+
job = Pho::Jobs.wait_for_submitted(resp, @store) do |job, message, time|
|
|
48
|
+
puts "#{time} #{message}"
|
|
49
|
+
end
|
|
50
|
+
puts "Reindex Completed"
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def reset()
|
|
54
|
+
puts "Resetting Store"
|
|
55
|
+
resp = Pho::Jobs.submit_reset(@store, "Reset", Time.now)
|
|
56
|
+
puts "Monitoring Reset Job:"
|
|
57
|
+
job = Pho::Jobs.wait_for_submitted(resp, @store) do |job, message, time|
|
|
58
|
+
puts "#{time} #{message}"
|
|
59
|
+
end
|
|
60
|
+
puts "Reset Completed"
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def restore()
|
|
64
|
+
url = @opts["url"]
|
|
65
|
+
if url == nil
|
|
66
|
+
puts "Restore from latest snapshot"
|
|
67
|
+
snapshot = Pho::Snapshot.read_from_store(@store)
|
|
68
|
+
url = snapshot.url
|
|
69
|
+
end
|
|
70
|
+
puts "Restoring from #{url}"
|
|
71
|
+
resp = Pho::Jobs.submit_restore(@store, url, "Reset", Time.now)
|
|
72
|
+
job = Pho::Jobs.wait_for_submitted(resp, @store) do |job, message, time|
|
|
73
|
+
puts "#{time} #{message}"
|
|
74
|
+
end
|
|
75
|
+
puts "Restore Completed"
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def describe()
|
|
79
|
+
resp = @store.describe( @opts["url"] )
|
|
80
|
+
if resp.status == 200
|
|
81
|
+
puts resp.content
|
|
82
|
+
else
|
|
83
|
+
puts "Error: #{resp.status} #{resp.reason}"
|
|
84
|
+
puts resp.content
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def sparql()
|
|
89
|
+
query = File.new( @opts["file"] ).read()
|
|
90
|
+
resp = @store.sparql(query)
|
|
91
|
+
if resp.status == 200
|
|
92
|
+
puts resp.content
|
|
93
|
+
else
|
|
94
|
+
puts "Error: #{resp.status} #{resp.reason}"
|
|
95
|
+
puts resp.content
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def store()
|
|
100
|
+
resp = nil
|
|
101
|
+
if @opts["url"]
|
|
102
|
+
puts "Storing remote data: #{@opts["url"]}"
|
|
103
|
+
resp = @store.store_url( @opts["url"] )
|
|
104
|
+
elsif @opts["file"]
|
|
105
|
+
puts "Storing local file: #{@opts["file"]}"
|
|
106
|
+
f = File.new( @opts["file"] )
|
|
107
|
+
if File.extname(f) == ".nt"
|
|
108
|
+
Pho::RDF::Parser.store_ntriples(f, @store)
|
|
109
|
+
else
|
|
110
|
+
resp = @store.store_file( f )
|
|
111
|
+
end
|
|
112
|
+
elsif @opts["dir"]
|
|
113
|
+
puts "Storing contents of directory: #{@opts["dir"]}"
|
|
114
|
+
collection = Pho::RDFCollection.new(@store, @opts["dir"])
|
|
115
|
+
collection.store()
|
|
116
|
+
puts collection.summary()
|
|
117
|
+
else
|
|
118
|
+
#noop
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
if resp != nil
|
|
122
|
+
if resp.status == 204
|
|
123
|
+
puts "Complete"
|
|
124
|
+
else
|
|
125
|
+
puts "Error: #{resp.status} #{resp.reason}"
|
|
126
|
+
puts resp.content
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def upload()
|
|
132
|
+
resp = nil
|
|
133
|
+
if @opts["file"]
|
|
134
|
+
f = File.new( @opts["file"] )
|
|
135
|
+
uri = File.basename( @opts["file"] )
|
|
136
|
+
uri = "#{opts["base"]}/#{uri}" if @opts["base"]
|
|
137
|
+
mime = MIME::Types.type_for( @opts["file"] )[0].to_s
|
|
138
|
+
puts "Uploading file: #{ @opts["file"] } to /items/#{ uri } as #{mime}"
|
|
139
|
+
resp = @store.upload_item( f , mime , uri )
|
|
140
|
+
elsif @opts["dir"]
|
|
141
|
+
puts "Uploading contents of directory: #{@opts["dir"]}"
|
|
142
|
+
collection = Pho::FileManagement::FileManager.new(@store, @opts["dir"], @opts["base"])
|
|
143
|
+
if opts["traverse"]
|
|
144
|
+
collection.store(:traverse)
|
|
145
|
+
puts collection.summary(:traverse)
|
|
146
|
+
else
|
|
147
|
+
collection.store()
|
|
148
|
+
puts collection.summary()
|
|
149
|
+
end
|
|
150
|
+
else
|
|
151
|
+
#noop
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
if resp != nil
|
|
155
|
+
if resp.status == 204
|
|
156
|
+
puts "Complete"
|
|
157
|
+
else
|
|
158
|
+
puts "Error: #{resp.status} #{resp.reason}"
|
|
159
|
+
puts resp.content
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
end
|
data/lib/pho/converter.rb
CHANGED
|
@@ -1,12 +1,17 @@
|
|
|
1
1
|
module Pho
|
|
2
2
|
|
|
3
|
+
#Resource Hashes
|
|
4
|
+
#
|
|
5
|
+
#Dependent on the redland ruby bindings
|
|
3
6
|
module ResourceHash
|
|
4
7
|
|
|
5
|
-
#TODO reading from streams
|
|
6
|
-
#TODO convert into other formats
|
|
7
8
|
#TODO wrap Redland exceptions. Parser/Serializer contruction as well as parsing errors
|
|
8
9
|
|
|
9
|
-
|
|
10
|
+
begin
|
|
11
|
+
require 'rdf/redland'
|
|
12
|
+
rescue LoadError
|
|
13
|
+
$stderr.puts "WARNING: Unable to load redland-ruby bindings. Changeset support unavailable"
|
|
14
|
+
end
|
|
10
15
|
|
|
11
16
|
#Class for converting to and from resource hashes
|
|
12
17
|
class Converter
|
data/lib/pho/enrichment.rb
CHANGED
|
@@ -4,14 +4,15 @@ module Pho
|
|
|
4
4
|
#external SPARQL endpoints and services
|
|
5
5
|
module Enrichment
|
|
6
6
|
|
|
7
|
-
class
|
|
7
|
+
class StoreEnricher
|
|
8
8
|
|
|
9
9
|
attr_reader :store
|
|
10
10
|
attr_reader :sparql_client
|
|
11
11
|
|
|
12
|
-
#
|
|
13
|
-
#
|
|
14
|
-
#
|
|
12
|
+
#Constructor
|
|
13
|
+
#
|
|
14
|
+
#store:: the store containing resource(s) to enrich
|
|
15
|
+
#sparql_client:: SparqlClient object to be used as client for retrieving enrichments
|
|
15
16
|
def initialize(store, sparql_client)
|
|
16
17
|
@store = store
|
|
17
18
|
@sparql_client = sparql_client
|
|
@@ -22,56 +23,120 @@ module Pho
|
|
|
22
23
|
#For the common case where a store needs to be enriched against itself by inferring new
|
|
23
24
|
#data from old using a CONSTRUCT query
|
|
24
25
|
#
|
|
25
|
-
#
|
|
26
|
-
#
|
|
27
|
-
def
|
|
28
|
-
enricher =
|
|
29
|
-
return enricher.merge( query )
|
|
26
|
+
#store:: the store to be updated
|
|
27
|
+
#query:: the SPARQL CONSTRUCT query to be used to infer the new data
|
|
28
|
+
def StoreEnricher.infer(store, query, &block)
|
|
29
|
+
enricher = StoreEnricher.new( store, store.sparql_client() )
|
|
30
|
+
return enricher.merge( query, &block )
|
|
30
31
|
end
|
|
31
32
|
|
|
32
33
|
#Execute the provided query against the configured SPARQL endpoint and store the results in
|
|
33
34
|
#the Platform Store
|
|
34
35
|
#
|
|
35
|
-
#
|
|
36
|
+
#query:: the SPARQL CONSTRUCT or DESCRIBE query to execute
|
|
36
37
|
def merge(query)
|
|
37
38
|
resp = @sparql_client.query( query, "application/rdf+xml" )
|
|
38
39
|
if resp.status != 200
|
|
39
40
|
raise "Unable to execute query. Response: #{resp.status} #{resp.reason} #{resp.content}"
|
|
40
41
|
end
|
|
41
|
-
|
|
42
|
+
data = resp.content
|
|
43
|
+
resp = @store.store_data( data )
|
|
44
|
+
if block_given?
|
|
45
|
+
yield resp, data
|
|
46
|
+
end
|
|
47
|
+
|
|
42
48
|
return resp
|
|
43
49
|
end
|
|
44
50
|
|
|
45
51
|
#TODO: optimize POSTs back to the Platform, to deal with large number of resources, e.g. batching
|
|
46
52
|
|
|
47
|
-
#
|
|
53
|
+
#Enrich a store with data extracted from a SPARQL endpoint.
|
|
54
|
+
#
|
|
55
|
+
#The locator_query is used to find resources in the Platform Store that should be enriched. The query
|
|
56
|
+
#should be a SPARQL SELECT query that returns the data to be used to parameterize the enrichment_query.
|
|
57
|
+
#
|
|
58
|
+
#For each query result, the enrichment_query will be submitted to the configured SPARQL endpoint, after
|
|
59
|
+
#first interpolating the string, providing the query result bindings as parameters. (See SparqlHelper.apply_initial_bindings
|
|
60
|
+
#and SparqlHelper.results_to_query_bindings
|
|
61
|
+
#
|
|
62
|
+
#If successful, the result of each enrichment query will then be pushed back into the Platform Store by
|
|
63
|
+
#posting the results of the query to the metabox. Enrichment queries should therefore be CONSTRUCT or
|
|
64
|
+
#DESCRIBE queries. The SPARQL protocol request will be sent with an Accept header of application/rdf+xml
|
|
65
|
+
#
|
|
66
|
+
#The method supports a callback object that can be provided as an optional parameter to the query. If provided then
|
|
67
|
+
#then object should respond to either or both of the following methods:
|
|
68
|
+
# pre_process(rdf_xml)
|
|
69
|
+
# post_process(resp, rdf_xml)
|
|
70
|
+
#The first of these is invoked after each enrichment query has been executed on the configured SPARQL endpoint. It is
|
|
71
|
+
#intended to support additional filtering or annotation. If the +pre_process+ method returns nil, then no data will be written
|
|
72
|
+
#to the store, otherwise the return value is substituted instead of the original value.
|
|
73
|
+
#
|
|
74
|
+
#The second callback method, +post_process+ is called after data has been written to the store and provides access to the
|
|
75
|
+
#response from the store, and the RDF/XML data that had been attempted to be stored. As the request may have been un-successful,
|
|
76
|
+
#code should check the status on the HTTPMessage parameter.
|
|
48
77
|
#
|
|
49
|
-
#
|
|
50
|
-
#
|
|
78
|
+
# class MyCallback
|
|
79
|
+
# def pre_process(rdf)
|
|
80
|
+
# if !should_store?(rdf)
|
|
81
|
+
# return nil
|
|
82
|
+
# end
|
|
83
|
+
# return rdf
|
|
84
|
+
# end
|
|
85
|
+
# def post_process(resp, rdf)
|
|
86
|
+
# puts "Store returned #{resp.status} when storing: #{rdf}"
|
|
87
|
+
# end
|
|
88
|
+
# end
|
|
89
|
+
# callback = MyCallback.new()
|
|
90
|
+
# enricher.enrich("SELECT ?item WHERE { ?item a ex:Class } LIMIT 10", "DESCRIBE ?item", callback)
|
|
51
91
|
#
|
|
52
|
-
#
|
|
53
|
-
#
|
|
54
|
-
# and
|
|
92
|
+
#The callback support is primarily intended to support filtering and notification of activities. For simple logging purposes, the
|
|
93
|
+
#method also supports a block parameter. This is invoked after each enrichment query, and each store response. The block can
|
|
94
|
+
#receive two values: the first is a symbol (either +:query+ or +:store+) indicating the source of the response, and the response
|
|
95
|
+
#object itself. E.g:
|
|
55
96
|
#
|
|
56
|
-
#
|
|
57
|
-
#
|
|
58
|
-
#
|
|
97
|
+
# enricher.enrich("SELECT ?item WHERE { ?item a ex:Class } LIMIT 10", "DESCRIBE ?item") do |source, resp|
|
|
98
|
+
# if source == :query
|
|
99
|
+
# puts "Enrichment query returned #{resp.status}"
|
|
100
|
+
# else
|
|
101
|
+
# puts "Store returned #{resp.status} when storing data"
|
|
102
|
+
# end
|
|
103
|
+
# end
|
|
59
104
|
#
|
|
60
|
-
#
|
|
61
|
-
#
|
|
62
|
-
|
|
105
|
+
#locator_query:: query to locate resources to be enriched
|
|
106
|
+
#enrichment_query:: query to be used to enrich the resource
|
|
107
|
+
#callback:: optional callback object
|
|
108
|
+
def enrich(locator_query, enrichment_query, callback=nil)
|
|
63
109
|
results = Pho::Sparql::SparqlHelper.select(locator_query, @store.sparql_client() )
|
|
64
110
|
bindings = Pho::Sparql::SparqlHelper.results_to_query_bindings(results)
|
|
65
111
|
bindings.each do |binding|
|
|
66
112
|
bound_query = Pho::Sparql::SparqlHelper.apply_initial_bindings(enrichment_query, binding)
|
|
67
113
|
|
|
68
|
-
|
|
114
|
+
#TODO counting numbers of requests and responses?
|
|
115
|
+
query_response = @sparql_client.query(bound_query, "application/rdf+xml")
|
|
69
116
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
end
|
|
117
|
+
if block_given?
|
|
118
|
+
yield :query, query_response
|
|
119
|
+
end
|
|
74
120
|
|
|
121
|
+
if query_response.status == 200
|
|
122
|
+
result = query_response.content
|
|
123
|
+
if callback != nil && callback.respond_to?(:pre_process)
|
|
124
|
+
result = callback.pre_process(result)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
if result != nil
|
|
128
|
+
store_response = @store.store_data( result )
|
|
129
|
+
if callback != nil && callback.respond_to?(:post_process)
|
|
130
|
+
callback.post_process(store_response, result)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
if block_given?
|
|
134
|
+
yield :store, store_response
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
end
|
|
139
|
+
#end each binding
|
|
75
140
|
end
|
|
76
141
|
end
|
|
77
142
|
|
data/lib/pho/file_management.rb
CHANGED
|
@@ -3,7 +3,6 @@ module Pho
|
|
|
3
3
|
module FileManagement
|
|
4
4
|
|
|
5
5
|
#TODO: move files into hidden directory
|
|
6
|
-
#TODO: add directory traversal
|
|
7
6
|
class AbstractFileManager
|
|
8
7
|
|
|
9
8
|
attr_reader :dir
|
|
@@ -21,12 +20,12 @@ module Pho
|
|
|
21
20
|
end
|
|
22
21
|
|
|
23
22
|
#Store all files that match the file name in directory
|
|
24
|
-
def store()
|
|
25
|
-
files_to_store = new_files()
|
|
23
|
+
def store(recursive=false)
|
|
24
|
+
files_to_store = new_files(recursive)
|
|
26
25
|
files_to_store.each do |filename|
|
|
27
26
|
file = File.new(filename)
|
|
28
27
|
store_file(file, filename)
|
|
29
|
-
end
|
|
28
|
+
end
|
|
30
29
|
end
|
|
31
30
|
|
|
32
31
|
#Retry anything known to have failed
|
|
@@ -41,8 +40,7 @@ module Pho
|
|
|
41
40
|
end
|
|
42
41
|
|
|
43
42
|
#Reset the directory to clear out any previous statuses
|
|
44
|
-
|
|
45
|
-
def reset(reset_store=false)
|
|
43
|
+
def reset()
|
|
46
44
|
Dir.glob( File.join(@dir, "*.#{@fail_suffix}") ).each do |file|
|
|
47
45
|
File.delete(file)
|
|
48
46
|
end
|
|
@@ -50,11 +48,26 @@ module Pho
|
|
|
50
48
|
File.delete(file)
|
|
51
49
|
end
|
|
52
50
|
end
|
|
53
|
-
|
|
51
|
+
|
|
52
|
+
#List any new files in the directory
|
|
53
|
+
def new_files(recursive=false)
|
|
54
|
+
newfiles = Array.new
|
|
55
|
+
list(recursive).each do |file|
|
|
56
|
+
|
|
57
|
+
ok_file = get_ok_file_for(file)
|
|
58
|
+
fail_file = get_fail_file_for(file)
|
|
59
|
+
if !( File.exists?(ok_file) or File.exists?(fail_file) )
|
|
60
|
+
newfiles << file
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
end
|
|
64
|
+
return newfiles
|
|
65
|
+
end
|
|
66
|
+
|
|
54
67
|
#List failures
|
|
55
|
-
def failures()
|
|
68
|
+
def failures(recursive=false)
|
|
56
69
|
fails = Array.new
|
|
57
|
-
|
|
70
|
+
list(recursive).each do |file|
|
|
58
71
|
if File.extname(file) != ".#{@fail_suffix}" && File.extname(file) != ".#{@ok_suffix}"
|
|
59
72
|
fails << file if File.exists?( get_fail_file_for(file) )
|
|
60
73
|
end
|
|
@@ -63,9 +76,9 @@ module Pho
|
|
|
63
76
|
end
|
|
64
77
|
|
|
65
78
|
#List successes
|
|
66
|
-
def successes()
|
|
79
|
+
def successes(recursive=false)
|
|
67
80
|
successes = Array.new
|
|
68
|
-
|
|
81
|
+
list(recursive).each do |file|
|
|
69
82
|
if File.extname(file) != ".#{@fail_suffix}" && File.extname(file) != ".#{@ok_suffix}"
|
|
70
83
|
successes << file if File.exists?( get_ok_file_for(file) )
|
|
71
84
|
end
|
|
@@ -73,25 +86,25 @@ module Pho
|
|
|
73
86
|
return successes
|
|
74
87
|
end
|
|
75
88
|
|
|
76
|
-
#Summarize the state of the collection to the
|
|
89
|
+
#Summarize the state of the collection to the provided IO object
|
|
77
90
|
#Creates a simple report
|
|
78
|
-
def summary()
|
|
79
|
-
failures = failures()
|
|
80
|
-
successes = successes()
|
|
81
|
-
newfiles = new_files()
|
|
91
|
+
def summary(recursive=false)
|
|
92
|
+
failures = failures(recursive)
|
|
93
|
+
successes = successes(recursive)
|
|
94
|
+
newfiles = new_files(recursive)
|
|
82
95
|
total = failures.size + successes.size + newfiles.size
|
|
83
|
-
summary = "#{@dir} contains #{total} files: #{successes.size} stored, #{failures.size} failed, #{newfiles.size} new"
|
|
96
|
+
summary = "#{@dir} #{recursive ? " and sub-directories" : ""} contains #{total} files: #{successes.size} stored, #{failures.size} failed, #{newfiles.size} new"
|
|
84
97
|
return summary
|
|
85
98
|
end
|
|
86
99
|
|
|
87
100
|
def get_fail_file_for(filename)
|
|
88
101
|
ext = File.extname(filename)
|
|
89
|
-
return filename.gsub(/#{ext}
|
|
102
|
+
return filename.gsub(/#{ext}$/, ".#{@fail_suffix}")
|
|
90
103
|
end
|
|
91
104
|
|
|
92
105
|
def get_ok_file_for(filename)
|
|
93
106
|
ext = File.extname(filename)
|
|
94
|
-
return filename.gsub(/#{ext}
|
|
107
|
+
return filename.gsub(/#{ext}$/, ".#{@ok_suffix}")
|
|
95
108
|
end
|
|
96
109
|
|
|
97
110
|
end
|