rdf-sak 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE +202 -0
- data/README.md +268 -0
- data/Rakefile +13 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/example/cleanup.xsl +14 -0
- data/example/matches.xhtml +11 -0
- data/example/transforms.ttl +58 -0
- data/lib/rdf-sak.rb +1 -0
- data/lib/rdf/sak.rb +2506 -0
- data/lib/rdf/sak/ci.rb +827 -0
- data/lib/rdf/sak/cli.rb +35 -0
- data/lib/rdf/sak/docstats.rb +188 -0
- data/lib/rdf/sak/document.rb +772 -0
- data/lib/rdf/sak/ibis.rb +248 -0
- data/lib/rdf/sak/mimemagic.rb +73 -0
- data/lib/rdf/sak/pav.rb +479 -0
- data/lib/rdf/sak/qb.rb +280 -0
- data/lib/rdf/sak/scovo.rb +51 -0
- data/lib/rdf/sak/tfo.rb +301 -0
- data/lib/rdf/sak/transform.rb +1172 -0
- data/lib/rdf/sak/urlrunner.rb +602 -0
- data/lib/rdf/sak/util.rb +2081 -0
- data/lib/rdf/sak/version.rb +5 -0
- data/rdf-sak.gemspec +60 -0
- metadata +366 -0
@@ -0,0 +1,1172 @@
|
|
1
|
+
require 'rdf'
|
2
|
+
require 'rdf/vocab'
|
3
|
+
require 'rdf/sak/tfo'
|
4
|
+
require 'rdf/sak/util'
|
5
|
+
require 'set'
|
6
|
+
require 'mimemagic'
|
7
|
+
require 'http/negotiate'
|
8
|
+
require 'time'
|
9
|
+
|
10
|
+
# This class encapsulates a specification for an individual
|
11
|
+
# transformation function, including its parameter spec, accepted and
|
12
|
+
# returned types, identity, and implementation.
|
13
|
+
#
|
14
|
+
class RDF::SAK::Transform
|
15
|
+
# mkay basically this transformation function stuff got too hairy to
|
16
|
+
# just do ad-hoc so i guess i'm doing this now
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def self.numeric_objects repo, subject, predicate, entail: false
|
21
|
+
RDF::SAK::Util.objects_for(repo, subject, predicate, entail: entail,
|
22
|
+
only: :literal).map(&:object).select { |c| c.is_a? Numeric }.sort
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.gather_params repo, subject
|
26
|
+
params = {}
|
27
|
+
RDF::SAK::Util.objects_for(repo, subject, RDF::SAK::TFO.parameter,
|
28
|
+
entail: false, only: :resource).each do |ps|
|
29
|
+
param = params[ps] ||= {}
|
30
|
+
|
31
|
+
# slug/identifier
|
32
|
+
if id = RDF::SAK::Util.objects_for(
|
33
|
+
repo, ps, RDF::Vocab::DC.identifier, only: :literal).sort.first
|
34
|
+
param[:id] = id.value.to_sym
|
35
|
+
end
|
36
|
+
|
37
|
+
# rdfs:range
|
38
|
+
range = RDF::SAK::Util.objects_for(
|
39
|
+
repo, ps, RDF::RDFS.range, only: :resource)
|
40
|
+
param[:range] = range.to_set unless range.empty?
|
41
|
+
|
42
|
+
# default = RDF::SAK::Util
|
43
|
+
param[:default] = RDF::SAK::Util.objects_for(
|
44
|
+
repo, ps, RDF::SAK::TFO.default)
|
45
|
+
|
46
|
+
# cardinalities
|
47
|
+
param[:minc] = 0
|
48
|
+
param[:maxc] = Float::INFINITY
|
49
|
+
|
50
|
+
if c0 = numeric_objects(repo, ps, RDF::OWL.cardinality).first
|
51
|
+
param[:minc] = param[:maxc] = c0
|
52
|
+
else
|
53
|
+
if c1 = numeric_objects(repo, ps, RDF::OWL.minCardinality).first
|
54
|
+
param[:minc] = c1
|
55
|
+
end
|
56
|
+
if c2 = numeric_objects(repo, ps, RDF::OWL.maxCardinality).first
|
57
|
+
param[:maxc] = c2
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
params
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.gather_accepts_returns repo, subject, raw: false, returns: false
|
66
|
+
literals = []
|
67
|
+
lists = []
|
68
|
+
pred = RDF::SAK::TFO[returns ? 'returns' : 'accepts']
|
69
|
+
repo.query([subject, pred, nil]).objects.each do |o|
|
70
|
+
if o.literal?
|
71
|
+
literals << o
|
72
|
+
else
|
73
|
+
lists << RDF::List.from(repo, o).to_a
|
74
|
+
end
|
75
|
+
end
|
76
|
+
# this is mainly to give us consistent results
|
77
|
+
out = (lists.sort.flatten + literals.sort).uniq
|
78
|
+
# raw as in raw literals
|
79
|
+
raw ? out : out.map(&:value)
|
80
|
+
end
|
81
|
+
|
82
|
+
protected
|
83
|
+
|
84
|
+
# Initialize the implementation. Does nothing in the base
|
85
|
+
# class. Return value is ignored.
|
86
|
+
#
|
87
|
+
# @param harness [RDF::SAK::Transform::Harness] the harness
|
88
|
+
#
|
89
|
+
def init_implementation harness
|
90
|
+
end
|
91
|
+
|
92
|
+
public
|
93
|
+
|
94
|
+
# Resolve a transform out of the repository. Optionally supply a
|
95
|
+
# block to resolve any implementation associated with the transform.
|
96
|
+
#
|
97
|
+
# @param harness [RDF::SAK::Transform::Harness] the harness
|
98
|
+
# @param subject [RDF::Resource]
|
99
|
+
def self.resolve harness, subject
|
100
|
+
# noop
|
101
|
+
return subject if subject.is_a? self
|
102
|
+
|
103
|
+
repo = harness.repo
|
104
|
+
|
105
|
+
asserted = RDF::SAK::Util.objects_for repo, subject,
|
106
|
+
RDF.type, only: :resource
|
107
|
+
|
108
|
+
return if
|
109
|
+
(asserted & RDF::SAK::Util.all_related(RDF::SAK::TFO.Transform)).empty?
|
110
|
+
|
111
|
+
params = gather_params repo, subject
|
112
|
+
|
113
|
+
plist = if pl = RDF::SAK::Util.objects_for(repo, subject,
|
114
|
+
RDF::SAK::TFO['parameter-list'], only: :resource).sort.first
|
115
|
+
RDF::List.from(repo, pl).to_a
|
116
|
+
else
|
117
|
+
params.keys.sort
|
118
|
+
end
|
119
|
+
|
120
|
+
accepts = gather_accepts_returns repo, subject
|
121
|
+
returns = gather_accepts_returns repo, subject, returns: true
|
122
|
+
|
123
|
+
tclass = self
|
124
|
+
|
125
|
+
# XXX this is all dumb but it has to be this way for now
|
126
|
+
|
127
|
+
if impl = RDF::SAK::Util.objects_for(repo, subject,
|
128
|
+
RDF::SAK::TFO.implementation, only: :uri).sort.first
|
129
|
+
case impl.to_s
|
130
|
+
when /^file:/i then
|
131
|
+
# XXX redo this later
|
132
|
+
if /xsl/i.match? MimeMagic.by_path(impl.path.to_s).to_s
|
133
|
+
tclass = RDF::SAK::Transform::XSLT
|
134
|
+
end
|
135
|
+
when /^urn:x-ruby:(.*)$/i then
|
136
|
+
cn = $1
|
137
|
+
begin
|
138
|
+
cs = Object.const_get cn
|
139
|
+
tclass = cs
|
140
|
+
rescue NameError, e
|
141
|
+
raise NotImplementedError,
|
142
|
+
"Could not locate implementation for #{impl}!"
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
tclass.new subject, params: params, param_list: plist, accepts: accepts,
|
148
|
+
returns: returns, implementation: impl, harness: harness
|
149
|
+
end
|
150
|
+
|
151
|
+
def self.coerce_params params
|
152
|
+
# this idiom is everywhere
|
153
|
+
params.transform_values do |v|
|
154
|
+
Set.new(v.respond_to?(:to_a) ? v.to_a : [v]) unless v.nil?
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
attr_reader :subject
|
159
|
+
|
160
|
+
# Initialize a transform from data.
|
161
|
+
# @param subject [RDF::Resource]
|
162
|
+
# @param harness [RDF::SAK::Transform::Harness]
|
163
|
+
# @param params [Hash]
|
164
|
+
# @param param_list [Array]
|
165
|
+
# @param accepts [Array]
|
166
|
+
# @param returns [Array]
|
167
|
+
# @param implementation [RDF::Resource]
|
168
|
+
#
|
169
|
+
def initialize subject, harness: nil, params: {}, param_list: [],
|
170
|
+
accepts: %w[*/*], returns: %w[*/*], implementation: nil
|
171
|
+
@subject = subject.dup.freeze
|
172
|
+
@params = params.freeze
|
173
|
+
@plist = (param_list.empty? ? params.keys.sort : param_list.dup).freeze
|
174
|
+
@pcache = params.map { |k, v| [v[:id], k] }.to_h.freeze
|
175
|
+
@accepts = (accepts.respond_to?(:to_a) ? accepts.to_a : [accepts]).freeze
|
176
|
+
@returns = (returns.respond_to?(:to_a) ? returns.to_a : [returns]).freeze
|
177
|
+
@impl = implementation.freeze
|
178
|
+
|
179
|
+
# initialize the implementation
|
180
|
+
init_implementation harness
|
181
|
+
end
|
182
|
+
|
183
|
+
# Return the identifier of the implementation.
|
184
|
+
#
|
185
|
+
# @return [RDF::URI]
|
186
|
+
#
|
187
|
+
def implementation
|
188
|
+
@impl
|
189
|
+
end
|
190
|
+
|
191
|
+
# True if this transform is *actually* implemented.
|
192
|
+
#
|
193
|
+
# @return [false, true]
|
194
|
+
#
|
195
|
+
def implemented?
|
196
|
+
false
|
197
|
+
end
|
198
|
+
|
199
|
+
# True if the transform accepts the given Content-Type.
|
200
|
+
#
|
201
|
+
# @param type [String] the content type to test
|
202
|
+
# @return [false, true] wh
|
203
|
+
#
|
204
|
+
def accepts? type
|
205
|
+
# construct the variants: this gives us a stack of all the types
|
206
|
+
# all the way up to the top, then turns it into a hash of faux
|
207
|
+
# variants. this will ensure the negotiate algorithm will return a
|
208
|
+
# value if the transform function can handle the type, even if it
|
209
|
+
# does not explicitly mention it (e.g. if the transform specifies
|
210
|
+
# it accepts application/xml and you hand it application/xhtml+xml)
|
211
|
+
variants = RDF::SAK::MimeMagic.new(type).lineage.map do |t|
|
212
|
+
# the key can be anything as long as it's unique since it ends
|
213
|
+
# up as a hash
|
214
|
+
[t.to_s, [1, t.to_s]]
|
215
|
+
end.to_h
|
216
|
+
|
217
|
+
# construct the pseudo-header
|
218
|
+
accept = @accepts.dup
|
219
|
+
accept << '*/*;q=0' unless accept.include? '*/*'
|
220
|
+
accept = { Accept: accept.join(', ') }
|
221
|
+
|
222
|
+
# we only care *if* this returns something, not *what*
|
223
|
+
!!HTTP::Negotiate.negotiate(accept, variants)
|
224
|
+
end
|
225
|
+
|
226
|
+
# Return the parameter list, or a sorted list of parameter keys in lieu
|
227
|
+
#
|
228
|
+
# @return [Array]
|
229
|
+
#
|
230
|
+
def keys
|
231
|
+
# XXX this should be unique to begin with. what is going on here?
|
232
|
+
# tests mysteriously started failing and the output was duplicated
|
233
|
+
@plist.uniq
|
234
|
+
end
|
235
|
+
|
236
|
+
# Retrieve a parameter spec, either by its fully-qualified URI or
|
237
|
+
# its `dct:identifier`.
|
238
|
+
#
|
239
|
+
# @param key [RDF::Resource,Symbol,String] the parameter URI or its identifier
|
240
|
+
# @return [Hash] the parameter spec
|
241
|
+
#
|
242
|
+
def [](key)
|
243
|
+
out = case key
|
244
|
+
when RDF::Resource then @params[key]
|
245
|
+
when Symbol then @params[@pcache[key]]
|
246
|
+
when String
|
247
|
+
@params[@pcache[key.to_sym]] || @params[RDF::URI(key)]
|
248
|
+
end
|
249
|
+
# add the key to the group
|
250
|
+
out.merge({ uri: key }) if out
|
251
|
+
end
|
252
|
+
|
253
|
+
# XXX kill this
|
254
|
+
def lint params
|
255
|
+
raise ArgumentError, "params must be a hash, not #{params.class}" unless
|
256
|
+
params.is_a? Hash
|
257
|
+
params.keys.sort == keys
|
258
|
+
end
|
259
|
+
|
260
|
+
# Return the validated parameters or raise an exception.
|
261
|
+
#
|
262
|
+
# @param params [Hash] the hash of parameters
|
263
|
+
# @param symbols [false, true] whether the keys should be symbols or URIs
|
264
|
+
# @param defaults [true, false] whether to supplant the defaults
|
265
|
+
# @param silent [false, true] return nil rather than raise if true
|
266
|
+
# @return [Hash] the validated parameters
|
267
|
+
#
|
268
|
+
def validate params, symbols: false, defaults: true, silent: false
|
269
|
+
# duplicate so we can delete from it
|
270
|
+
params = params.dup
|
271
|
+
out = {}
|
272
|
+
|
273
|
+
# note the instance variable vs the argument
|
274
|
+
@params.each do |k, spec|
|
275
|
+
v = params.delete(k) || params.delete(spec[:id]) || []
|
276
|
+
v = (v.respond_to?(:to_a) ? v.to_a : [v]).map do |v|
|
277
|
+
case v
|
278
|
+
when RDF::Term then v
|
279
|
+
when URI then RDF::URI(v.to_s)
|
280
|
+
when nil then RDF::nil
|
281
|
+
else
|
282
|
+
range = spec[:range] || []
|
283
|
+
if r = range.select(&:datatype?) and !r.empty?
|
284
|
+
r = r.to_a.sort
|
285
|
+
"multiple ranges; arbitrarily picking #{r.first}" if
|
286
|
+
r.size > 1
|
287
|
+
RDF::Literal(v, datatype: r.first)
|
288
|
+
elsif v.is_a? String and r = range.reject(&:datatype?) and !r.empty?
|
289
|
+
if m = /^_:(.+)$/.match(v)
|
290
|
+
RDF::Node(m[1])
|
291
|
+
else
|
292
|
+
RDF::URI(v)
|
293
|
+
end
|
294
|
+
else
|
295
|
+
RDF::Literal(v)
|
296
|
+
end
|
297
|
+
end
|
298
|
+
end
|
299
|
+
|
300
|
+
# XXX one day we should check types but not today
|
301
|
+
|
302
|
+
# give us the default(s) then
|
303
|
+
v = spec[:default].dup if v.empty? and spec[:default]
|
304
|
+
|
305
|
+
# but we *will* check the cardinality
|
306
|
+
minc = spec.fetch :minc, 0
|
307
|
+
maxc = spec.fetch :maxc, Float::INFINITY
|
308
|
+
|
309
|
+
raise ArgumentError, "Parameter #{k} must have at least"\
|
310
|
+
" #{minc} value#{minc == 1 ? '' : ?s }" if v.size < minc
|
311
|
+
raise ArgumentError, "Parameter #{k} must have at most"\
|
312
|
+
" #{maxc} value#{maxc == 1 ? '' : ?s }" if v.size > maxc
|
313
|
+
# XXX if cardinality == 1 should we set v to v.first? dunno
|
314
|
+
|
315
|
+
# now overwrite k
|
316
|
+
k = spec[:id] || k.to_s if symbols
|
317
|
+
|
318
|
+
out[k] = v unless !defaults and v == spec[:default]
|
319
|
+
end
|
320
|
+
|
321
|
+
# if params are not empty then this is an error
|
322
|
+
unless params.empty?
|
323
|
+
return if silent
|
324
|
+
raise ArgumentError,
|
325
|
+
"Unrecognized parameters #{params.keys.join ', '}"
|
326
|
+
end
|
327
|
+
|
328
|
+
out
|
329
|
+
end
|
330
|
+
|
331
|
+
# Check the parameters and apply the function, then check the
|
332
|
+
# output. Parameters are checked with {#validate} for key
|
333
|
+
# resolution, cardinality, range, and type.
|
334
|
+
#
|
335
|
+
# @param input [String,IO,#to_s,#read] Something bytelike
|
336
|
+
# @param params [Hash,RDF::SAK::Transform::Partial] the instance parameters
|
337
|
+
# @param parsed [Object] the already-parsed object, if applicable
|
338
|
+
# @param type [String] the content-type of the input
|
339
|
+
# @param accept [String] a string in the form of an Accept header
|
340
|
+
# @yieldparam output [String,IO] the output
|
341
|
+
# @yieldparam parseout [Object] the parsed output, if applicable
|
342
|
+
# @return [#to_s, Object] the serialized output (and parsed if applicable)
|
343
|
+
#
|
344
|
+
def apply input, params = {}, parsed: nil,
|
345
|
+
type: 'application/octet-stream', accept: '*/*', &block
|
346
|
+
raise NotImplementedError, "Transform #{@id} is not implemented!" unless
|
347
|
+
implemented?
|
348
|
+
|
349
|
+
# XXX validate accept or explode
|
350
|
+
mimetypes = HTTP::Negotiate.negotiate({ Accept: accept },
|
351
|
+
@returns.map { |t| [t, [1, t]] }.to_h, all: true) or return
|
352
|
+
|
353
|
+
# this will succeed or explode
|
354
|
+
params = validate params, symbols: true
|
355
|
+
|
356
|
+
# run the transform
|
357
|
+
out, parseout = execute input, parsed, params
|
358
|
+
|
359
|
+
# bail out if nothing was returned
|
360
|
+
return unless out
|
361
|
+
|
362
|
+
# now run the block if present
|
363
|
+
block.call out, parseout if block
|
364
|
+
|
365
|
+
# return it to the caller
|
366
|
+
[out, parseout]
|
367
|
+
end
|
368
|
+
|
369
|
+
# This class implements a cache for partial transformation function
|
370
|
+
# applications, which bundle transforms with a set of instance
|
371
|
+
# parameters under a reusable identity.
|
372
|
+
class PartialCache
|
373
|
+
private
|
374
|
+
|
375
|
+
def coerce_params params
|
376
|
+
RDF::SAK::Transform.coerce_params params
|
377
|
+
end
|
378
|
+
|
379
|
+
public
|
380
|
+
|
381
|
+
# Initialize the cache with all partials pre-loaded.
|
382
|
+
#
|
383
|
+
# @param harness [RDF::SAK::Transform::Harness] the transform harness
|
384
|
+
# @return [RDF::SAK::Transform::PartialCache] the instance
|
385
|
+
#
|
386
|
+
def self.load harness
|
387
|
+
new(harness).load
|
388
|
+
end
|
389
|
+
|
390
|
+
attr_reader :harness
|
391
|
+
|
392
|
+
# Initialize an empty cache.
|
393
|
+
# @param harness [RDF::SAK::Transform::Harness] the parent harness.
|
394
|
+
#
|
395
|
+
def initialize harness
|
396
|
+
@harness = harness
|
397
|
+
@cache = {}
|
398
|
+
@mapping = {}
|
399
|
+
@transforms = {}
|
400
|
+
end
|
401
|
+
|
402
|
+
# Load an initialized partial cache.
|
403
|
+
#
|
404
|
+
# @return [self] daisy-chainable self-reference
|
405
|
+
#
|
406
|
+
def load
|
407
|
+
RDF::SAK::Util.subjects_for(repo, RDF.type,
|
408
|
+
RDF::SAK::TFO.Partial).each do |s|
|
409
|
+
resolve subject: s
|
410
|
+
end
|
411
|
+
|
412
|
+
# return self to daisy-chain
|
413
|
+
self
|
414
|
+
end
|
415
|
+
|
416
|
+
def partials
|
417
|
+
@cache.keys.select { |x| x.is_a? RDF::Resource }
|
418
|
+
end
|
419
|
+
|
420
|
+
def repo
|
421
|
+
@harness.repo
|
422
|
+
end
|
423
|
+
|
424
|
+
def transforms
|
425
|
+
@transforms.dup
|
426
|
+
end
|
427
|
+
|
428
|
+
# Retrieve a Partial from the cache based on its
|
429
|
+
def get transform, params
|
430
|
+
ts = case transform
|
431
|
+
when RDF::SAK::Transform then transform.subject
|
432
|
+
when RDF::URI
|
433
|
+
# XXX transforms resolved here may not get implemented
|
434
|
+
transform = RDF::SAK::Transform.resolve @repo, transform
|
435
|
+
transform.subject
|
436
|
+
else
|
437
|
+
raise ArgumentError, "Don't know what to do with #{transform}"
|
438
|
+
end
|
439
|
+
|
440
|
+
# return direct cache entry if transform is really the subject
|
441
|
+
return @cache[ts] if @cache.key?
|
442
|
+
|
443
|
+
# otherwise return the mapping
|
444
|
+
@mapping[transform][coerce_params params]
|
445
|
+
end
|
446
|
+
|
447
|
+
# Resolves a partial either by subject or by transform + parameter
|
448
|
+
# set.
|
449
|
+
#
|
450
|
+
# @param subject [RDF::URI] The subject URI of the partial
|
451
|
+
# @param transform [RDF::URI,RDF::SAK::Transform] the transform
|
452
|
+
# @param params [Hash] an instance of parameters
|
453
|
+
# @return [RDF::SAK::Transform::Partial]
|
454
|
+
#
|
455
|
+
def resolve subject: nil, transform: nil, params: {}
|
456
|
+
if subject
|
457
|
+
if subject.is_a? RDF::SAK::Transform::Partial
|
458
|
+
# snag the transform
|
459
|
+
transform = @harness.resolve(subject.transform) or
|
460
|
+
raise 'Could not resolve the transform associated with ' +
|
461
|
+
subject.subject
|
462
|
+
|
463
|
+
# mkay now add this to the cache
|
464
|
+
t = @mapping[transform.subject] ||= {} # lol got all that?
|
465
|
+
@cache[subject.subject] ||= t[subject.params] ||= subject
|
466
|
+
else
|
467
|
+
# resolve the partial
|
468
|
+
partial = @cache[subject] || RDF::SAK::Transform::Partial.resolve(
|
469
|
+
@harness, subject: subject) or return
|
470
|
+
|
471
|
+
# initialize the mapping if not present
|
472
|
+
t = @mapping[partial.transform.subject] ||= {}
|
473
|
+
|
474
|
+
# off we go
|
475
|
+
@cache[subject] ||= t[partial.params] ||= partial
|
476
|
+
end
|
477
|
+
elsif transform
|
478
|
+
transform = @harness.resolve transform unless
|
479
|
+
transform.is_a? RDF::SAK::Transform
|
480
|
+
|
481
|
+
params = transform.validate params, defaults: false
|
482
|
+
|
483
|
+
# note the *presence* of the key means the cache item has been
|
484
|
+
# checked already; its *value* may be nil
|
485
|
+
t = @mapping[transform.subject] ||= {}
|
486
|
+
return t[params] if t.key? params
|
487
|
+
|
488
|
+
# try to resolve the partial
|
489
|
+
partial = RDF::SAK::Transform::Partial.resolve(@harness,
|
490
|
+
transform: transform, params: params) or return
|
491
|
+
|
492
|
+
# update the caches
|
493
|
+
@cache[partial.subject] = t[params] = partial
|
494
|
+
end
|
495
|
+
end
|
496
|
+
end
|
497
|
+
|
498
|
+
# This class is the main harness for holding all the transforms and
|
499
|
+
# operating over them. This is the primary interface through which
|
500
|
+
# we manipulate transforms.
|
501
|
+
class Harness
|
502
|
+
|
503
|
+
attr_reader :partials, :repo, :root
|
504
|
+
|
505
|
+
# Create a new harness instance.
|
506
|
+
#
|
507
|
+
# @param repo [RDF::Repository] the repository to find RDF data
|
508
|
+
# @param root [String,Pathname] the root directory for implementations
|
509
|
+
#
|
510
|
+
def initialize repo, root
|
511
|
+
raise ArgumentError,
|
512
|
+
"repo is #{repo.class}, not an RDF::Repository" unless
|
513
|
+
repo.is_a? RDF::Repository
|
514
|
+
@repo = repo
|
515
|
+
@root = Pathname(root).expand_path
|
516
|
+
raise ArgumentError, "Root #{@root} does not exist" unless
|
517
|
+
@root.directory? and @root.readable?
|
518
|
+
@cache = {}
|
519
|
+
@partials = RDF::SAK::Transform::PartialCache.new self
|
520
|
+
end
|
521
|
+
|
522
|
+
# Bootstrap all the transforms.
|
523
|
+
#
|
524
|
+
# @param repo [RDF::Repository] the repository to find RDF data
|
525
|
+
# @param root [String,Pathname] the root directory for implementations
|
526
|
+
# @return [RDF::SAK::Transform::Harness] the harness instance
|
527
|
+
def self.load repo, root
|
528
|
+
self.new(repo, root).load
|
529
|
+
end
|
530
|
+
|
531
|
+
# Load transforms into an existing instance
|
532
|
+
# @return [Array] the transforms
|
533
|
+
def load
|
534
|
+
RDF::SAK::Util.subjects_for(@repo, RDF.type,
|
535
|
+
RDF::SAK::TFO.Transform, only: :resource).each do |subject|
|
536
|
+
resolve subject
|
537
|
+
end
|
538
|
+
|
539
|
+
# return self so we can daisy-chain
|
540
|
+
self
|
541
|
+
end
|
542
|
+
|
543
|
+
# Return all cached Transform identities.
|
544
|
+
#
|
545
|
+
# @return [Array] the URIs of known Transforms
|
546
|
+
#
|
547
|
+
def transforms
|
548
|
+
@cache.keys.sort
|
549
|
+
end
|
550
|
+
|
551
|
+
# Resolve a Transform based on its URI.
|
552
|
+
#
|
553
|
+
# @param subject [RDF::Resource] the identifier for the transform.
|
554
|
+
# @return [RDF::SAK::Transform] the Transform, if present.
|
555
|
+
#
|
556
|
+
def resolve subject
|
557
|
+
return @cache[subject] if @cache[subject]
|
558
|
+
# XXX raise???
|
559
|
+
transform =
|
560
|
+
RDF::SAK::Transform.resolve(self, subject) or return
|
561
|
+
@cache[subject] = transform
|
562
|
+
end
|
563
|
+
|
564
|
+
# Resolve a Partial based on either its subject URI or the
|
565
|
+
# transform-params pair.
|
566
|
+
#
|
567
|
+
# @param subject [RDF::Resource] the Partial's subject
|
568
|
+
# @param transform [RDF::Resource,RDF::SAK::Transform] the transform
|
569
|
+
# @param params [Hash] an instance of parameters
|
570
|
+
# @return [RDF::SAK::Transform::Partial] the Partial, if present
|
571
|
+
#
|
572
|
+
def resolve_partial subject: nil, transform: nil, params: nil
|
573
|
+
partials.resolve subject: subject, transform: transform, params: params
|
574
|
+
end
|
575
|
+
|
576
|
+
# Resolve a total function application record based on either its
|
577
|
+
# subject URI, a transform-params pair, or a Partial.
|
578
|
+
#
|
579
|
+
# @param subject [RDF::Resource] the Application's subject
|
580
|
+
# @param transform [RDF::Resource,RDF::SAK::Transform] the Transform
|
581
|
+
# @param params [Hash] an instance of parameters
|
582
|
+
# @param partial [RDF::Resource,RDF::SAK::Transform::Partial] a Partial
|
583
|
+
# @return [RDF::SAK::Transform::Application] the Application, if present
|
584
|
+
#
|
585
|
+
def resolve_application subject: nil, transform: nil, params: {},
|
586
|
+
partial: nil, input: nil, output: nil
|
587
|
+
RDF::SAK::Transform::Application.resolve self, subject: subject,
|
588
|
+
transform: transform, params: params, partial: partial,
|
589
|
+
input: input, output: output
|
590
|
+
end
|
591
|
+
|
592
|
+
# Returns true if the Application with the given subject URI
|
593
|
+
# matches either the transform-params pair, or a partial.
|
594
|
+
#
|
595
|
+
# @param subject [RDF::Resource,RDF::SAK::Transform::Application]
|
596
|
+
# the application
|
597
|
+
# @param transform [RDF::Resource,RDF::SAK::Transform] the transform
|
598
|
+
# @param params [Hash] an instance of parameters
|
599
|
+
# @param partial [RDF::Resource,RDF::SAK::Transform::Partial] a partial
|
600
|
+
# @return [true, false] whether or not the application matches
|
601
|
+
#
|
602
|
+
def application_matches? subject, transform: nil, params: {}, partial: nil
|
603
|
+
|
604
|
+
# unbundle the params; partial overrides transform+params
|
605
|
+
if partial
|
606
|
+
partial = resolve_partial partial unless
|
607
|
+
partial.is_a? RDF::SAK::Transform::Partial
|
608
|
+
transform = partial.transform
|
609
|
+
params = partial.params
|
610
|
+
else
|
611
|
+
transform = resolve transform unless
|
612
|
+
transform.is_a? RDF::SAK::Transform
|
613
|
+
params = transform.validate params
|
614
|
+
end
|
615
|
+
|
616
|
+
if subject.is_a? RDF::SAK::Transform::Application
|
617
|
+
return true if partial and subject.completes? partial
|
618
|
+
return true if
|
619
|
+
subject.transform == transform and subject.matches? params
|
620
|
+
else
|
621
|
+
# this should say, try matching the partial if there is one
|
622
|
+
# to match, otherwise attempt to directly match the transform
|
623
|
+
return true if partial and repo.has_statement?(
|
624
|
+
RDF::Statement(subject, RDF::SAK::TFO.completes, partial.subject))
|
625
|
+
|
626
|
+
if repo.has_statement?(
|
627
|
+
RDF::Statement(subject, RDF::SAK::TFO.transform, transform.subject))
|
628
|
+
testp = transform.keys.map do |p|
|
629
|
+
o = repo.query([subject, p, nil]).objects.uniq.sort
|
630
|
+
o.empty? ? nil : [p, o]
|
631
|
+
end.compact.to_h
|
632
|
+
|
633
|
+
# this will clear any explicit declarations of defaults
|
634
|
+
testp = transform.validate testp, defaults: false, silent: true
|
635
|
+
# true means it matches
|
636
|
+
return testp == params
|
637
|
+
end
|
638
|
+
end
|
639
|
+
|
640
|
+
false
|
641
|
+
end
|
642
|
+
end
|
643
|
+
|
644
|
+
class Partial
|
645
|
+
# Resolve a partial function application with the given parameters.
|
646
|
+
#
|
647
|
+
# @param harness [RDF::SAK::Transform::Harness] the harness
|
648
|
+
# @param subject [RDF::Resource] the identity of the partial
|
649
|
+
# @param transform [RDF::Resource] the identity of the transform
|
650
|
+
# @param params [Hash] key-value pairs
|
651
|
+
def self.resolve harness, subject: nil, transform: nil, params: {}
|
652
|
+
raise ArgumentError, 'Must supply either a subject or a transform' unless
|
653
|
+
subject or transform
|
654
|
+
|
655
|
+
repo = harness.repo
|
656
|
+
|
657
|
+
# coerce the transform to a Transform object if it isn't already
|
658
|
+
if transform
|
659
|
+
transform = harness.resolve(transform) or
|
660
|
+
return unless transform.is_a?(RDF::SAK::Transform)
|
661
|
+
elsif subject.is_a? RDF::URI
|
662
|
+
# locate the transform if given the subject
|
663
|
+
transform = RDF::SAK::Util.objects_for(repo, subject,
|
664
|
+
RDF::SAK::TFO.transform, only: :resource).first or return
|
665
|
+
transform = harness.resolve(transform) or return
|
666
|
+
warn transform
|
667
|
+
end
|
668
|
+
|
669
|
+
# obtain the subject for the given parameters
|
670
|
+
if subject
|
671
|
+
params = {}
|
672
|
+
transform.keys.each do |p|
|
673
|
+
o = repo.query([subject, p, nil]).objects.uniq.sort
|
674
|
+
params[p] = o unless o.empty?
|
675
|
+
end
|
676
|
+
else
|
677
|
+
params = transform.validate params, symbols: false, defaults: false
|
678
|
+
|
679
|
+
candidates = RDF::Query.new do
|
680
|
+
# XXX we should sort parameters by longest value since
|
681
|
+
# longer values will probably be less common; anyway this is
|
682
|
+
# gonna all need to be rethought
|
683
|
+
params.each { |p, objs| objs.each { |o| pattern [:s, p, o] } }
|
684
|
+
pattern [:s, RDF.type, RDF::SAK::TFO.Partial]
|
685
|
+
pattern [:s, RDF::SAK::TFO.transform, transform.subject]
|
686
|
+
|
687
|
+
# add any remaining parameters
|
688
|
+
# XXX this actually messes up; we don't want this
|
689
|
+
# (transform.keys - params.keys.sort).each { |r| pattern [:s, r, nil] }
|
690
|
+
end.execute(repo).map { |sol| [sol[:s], {}] }.to_h
|
691
|
+
|
692
|
+
# warn "yo #{transform.subject} #{params} #{candidates}"
|
693
|
+
|
694
|
+
# this is ruby being cheeky
|
695
|
+
candidates.select! do |s, ps|
|
696
|
+
transform.keys.each do |p|
|
697
|
+
o = repo.query([s, p, nil]).objects.uniq.sort
|
698
|
+
ps[p] = o unless o.empty?
|
699
|
+
end
|
700
|
+
ps == params
|
701
|
+
end
|
702
|
+
|
703
|
+
return if candidates.empty?
|
704
|
+
|
705
|
+
# sort it so we always get the same thing
|
706
|
+
subject = candidates.keys.sort.first
|
707
|
+
params = candidates[subject]
|
708
|
+
end
|
709
|
+
|
710
|
+
self.new subject, transform, params
|
711
|
+
end
|
712
|
+
|
713
|
+
attr_reader :subject, :transform
|
714
|
+
|
715
|
+
def initialize subject, transform, params = {}
|
716
|
+
raise ArgumentError, 'transform must be a Transform' unless
|
717
|
+
transform.is_a? RDF::SAK::Transform
|
718
|
+
@subject = subject
|
719
|
+
@transform = transform
|
720
|
+
@params = transform.validate params unless
|
721
|
+
params.is_a? RDF::SAK::Transform::Partial
|
722
|
+
end
|
723
|
+
|
724
|
+
def [](key)
|
725
|
+
@params[key]
|
726
|
+
end
|
727
|
+
|
728
|
+
def keys
|
729
|
+
@params.keys
|
730
|
+
end
|
731
|
+
|
732
|
+
def params
|
733
|
+
@params.dup
|
734
|
+
end
|
735
|
+
|
736
|
+
def matches? params
|
737
|
+
@params == @transform.validate(params)
|
738
|
+
end
|
739
|
+
|
740
|
+
def ===(other)
|
741
|
+
return false unless other.is_a? RDF::SAK::Transform::Partial
|
742
|
+
transform == other.transform and matches? other.params
|
743
|
+
end
|
744
|
+
|
745
|
+
def ==(other)
|
746
|
+
self === other and subject == other.subject
|
747
|
+
end
|
748
|
+
end
|
749
|
+
|
750
|
+
# A record of a transformation function application.
|
751
|
+
# @note "Application" as in to "apply" a function, not an "app".
|
752
|
+
class Application < Partial
|
753
|
+
# Resolve a particular function Application from the repository.
|
754
|
+
# Either resolve by subject, or resolve by a transform + parameter
|
755
|
+
# + input set. Applications that complete Partials will be
|
756
|
+
# automatically resolved.
|
757
|
+
#
|
758
|
+
# @param harness [RDF::SAK::Transform::Harness] the harness
|
759
|
+
# @param subject [RDF::Resource] the subject
|
760
|
+
# @param transform [RDF::Resource,RDF::SAK::Transform] the transform
|
761
|
+
# @param params [Hash] an instance of parameters
|
762
|
+
# @param input [RDF::Resource] the Application's input
|
763
|
+
# @param output [RDF::Resource] the Application's output
|
764
|
+
# @return [RDF::SAK::Transform::Application] the Application, if present
|
765
|
+
#
|
766
|
+
def self.resolve harness, subject: nil, transform: nil, params: {},
|
767
|
+
partial: nil, input: nil, output: nil
|
768
|
+
# either a subject or transform + input + output? + params?
|
769
|
+
|
770
|
+
repo = harness.repo
|
771
|
+
partials = harness.partials
|
772
|
+
|
773
|
+
if subject
|
774
|
+
# noop
|
775
|
+
return subject if subject.is_a? self
|
776
|
+
|
777
|
+
# okay partial
|
778
|
+
partial = RDF::SAK::Util.objects_for(
|
779
|
+
subject, RDF::SAK::TFO.completes, only: :resource).sort.first
|
780
|
+
|
781
|
+
if partial
|
782
|
+
tmp = partials.resolve(subject: partial) or
|
783
|
+
raise "Could not find partial #{partial}"
|
784
|
+
partial = tmp
|
785
|
+
transform = partial.transform
|
786
|
+
else
|
787
|
+
transform = RDF::SAK::Util.objects_for(
|
788
|
+
subject, RDF::SAK::TFO.transform, only: :resource).sort.first or
|
789
|
+
raise "Could not find a transform for #{subject}"
|
790
|
+
tmp = harness.resolve(transform) or
|
791
|
+
raise "Could not find transform #{transform}"
|
792
|
+
transform = tmp
|
793
|
+
|
794
|
+
params = transform.validate
|
795
|
+
|
796
|
+
# get params
|
797
|
+
params = {}
|
798
|
+
transform.keys.each do |p|
|
799
|
+
o = repo.query([subject, p, nil]).objects.uniq.sort
|
800
|
+
params[p] = o unless o.empty?
|
801
|
+
end
|
802
|
+
end
|
803
|
+
|
804
|
+
# get inputs and outputs
|
805
|
+
input = RDF::SAK::Util.objects_for(
|
806
|
+
subject, RDF::SAK::TFO.input, only: :resource).sort.first
|
807
|
+
output = RDF::SAK::Util.objects_for(
|
808
|
+
subject, RDF::SAK::TFO.output, only: :resource).sort.first
|
809
|
+
|
810
|
+
raise 'Data must have both input and output' unless input and output
|
811
|
+
elsif input and ((transform and params) or partial)
|
812
|
+
|
813
|
+
# XXX dispatch on partial only? smart? dumb?
|
814
|
+
if partial
|
815
|
+
transform = partial.transform
|
816
|
+
params = partial.params
|
817
|
+
else
|
818
|
+
# do transform
|
819
|
+
t = harness.resolve(transform) or
|
820
|
+
raise "Could not resolve transform #{transform}"
|
821
|
+
transform = t
|
822
|
+
|
823
|
+
# coerce/validate params
|
824
|
+
params = transform.validate params, defaults: false
|
825
|
+
|
826
|
+
# do partial
|
827
|
+
partial = partials.resolve transform: transform, params: params
|
828
|
+
end
|
829
|
+
|
830
|
+
# collect function application receipts
|
831
|
+
candidates = RDF::Query.new do
|
832
|
+
# note that there is no cost-based optimization so we write
|
833
|
+
# these in the order of least to most cardinality
|
834
|
+
pattern [:t, RDF::SAK::TFO.output, output] if output
|
835
|
+
pattern [:t, RDF::SAK::TFO.input, input]
|
836
|
+
end.execute(repo).map { |sol| sol[:t] }.compact.uniq.select do |s|
|
837
|
+
# this should say, try matching the partial if there is one
|
838
|
+
# to match, otherwise attempt to directly match the transform
|
839
|
+
if partial and repo.has_statement?(
|
840
|
+
RDF::Statement(s, RDF::SAK::TFO.completes, partial.subject))
|
841
|
+
true
|
842
|
+
elsif repo.has_statement?(
|
843
|
+
RDF::Statement(s, RDF::SAK::TFO.transform, transform.subject))
|
844
|
+
testp = transform.keys.map do |p|
|
845
|
+
o = repo.query([s, p, nil]).objects.uniq.sort
|
846
|
+
o.empty? ? nil : [p, o]
|
847
|
+
end.compact.to_h
|
848
|
+
|
849
|
+
testp = transform.validate testp, defaults: false, silent: true
|
850
|
+
testp == params
|
851
|
+
end
|
852
|
+
end.compact.uniq.sort
|
853
|
+
|
854
|
+
return if candidates.empty?
|
855
|
+
|
856
|
+
if candidates.size == 1
|
857
|
+
subject = candidates.first
|
858
|
+
else
|
859
|
+
# now we have the unlikely case that there are two identical
|
860
|
+
# records so we just sort em first by end date, then by
|
861
|
+
# start date, then lexically
|
862
|
+
subject = candidates.map do |s|
|
863
|
+
st, et = %i[startedAtTime endedAtTime].map do |p|
|
864
|
+
repo.query([s, RDF::Vocab::PROV[p], nil]).map do |stmt|
|
865
|
+
dt = stmt.object.object
|
866
|
+
dt if dt.is_a? DateTime
|
867
|
+
end.compact.sort.last
|
868
|
+
end
|
869
|
+
[s, st, et]
|
870
|
+
end.sort do |a, b|
|
871
|
+
# first check latest end-time, then check latest start-time
|
872
|
+
c = a[2] && b[2] ? b[2] <=> a[2] : 0
|
873
|
+
# if those two yield nothing, then sort lexically i guess
|
874
|
+
(c == 0 && a[1] && b[1]) ? b[1] <=> a[1] : a[0] <=> b[0]
|
875
|
+
end.first.first
|
876
|
+
end
|
877
|
+
else
|
878
|
+
raise ArgumentError,
|
879
|
+
'must have either a subject or transform + params + input'
|
880
|
+
end
|
881
|
+
|
882
|
+
# don't forget the output
|
883
|
+
output ||= repo.query(
|
884
|
+
[subject, RDF::SAK::TFO.output, nil]
|
885
|
+
).objects.select(&:uri?).sort.first
|
886
|
+
|
887
|
+
new subject, transform, input, output, partial || params
|
888
|
+
|
889
|
+
end
|
890
|
+
|
891
|
+
attr_reader :input, :output, :completes
|
892
|
+
|
893
|
+
# Create a new function application from whole cloth.
|
894
|
+
#
|
895
|
+
# @param subject [RDF::Resource]
|
896
|
+
# @param transform [RDF::Resource] the identifier for the transform
|
897
|
+
# @param input [RDF::Resource] the identifier for the input
|
898
|
+
# @param output [RDF::Resource] the identifier for the output
|
899
|
+
# @param params [Hash, RDF::SAK::Transform::Partial] the parameters
|
900
|
+
# or partial application that is completed
|
901
|
+
def initialize subject, transform, input, output, params = {},
|
902
|
+
start: nil, stop: nil
|
903
|
+
# params may be a partial
|
904
|
+
super subject, transform, params
|
905
|
+
|
906
|
+
@input = input
|
907
|
+
@output = output
|
908
|
+
@completes = params if params.is_a? RDF::SAK::Transform::Partial
|
909
|
+
@start = start
|
910
|
+
@stop = stop
|
911
|
+
end
|
912
|
+
|
913
|
+
# Returns the function application as an array of triples.
|
914
|
+
def to_triples
|
915
|
+
out = [] # .extend RDF::Enumerable
|
916
|
+
s = @subject
|
917
|
+
out << [s, RDF.type, RDF::SAK::TFO.Application]
|
918
|
+
|
919
|
+
if @start
|
920
|
+
start = @start.is_a?(RDF::Literal) ? @start : RDF::Literal(@start)
|
921
|
+
out << [s, RDF::Vocab::PROV.startedAtTime, start]
|
922
|
+
end
|
923
|
+
|
924
|
+
if @stop
|
925
|
+
stop = @stop.is_a?(RDF::Literal) ? @stop : RDF::Literal(@stop)
|
926
|
+
out << [s, RDF::Vocab::PROV.endedAtTime, stop]
|
927
|
+
end
|
928
|
+
|
929
|
+
if @completes
|
930
|
+
out << [s, RDF::SAK::TFO.completes, @completes.subject]
|
931
|
+
else
|
932
|
+
out << [s, RDF::SAK::TFO.transform, transform.subject]
|
933
|
+
pdup = transform.validate params, defaults: false, silent: true
|
934
|
+
pdup.each do |k, vals|
|
935
|
+
vals.each { |v| out << [s, k, v] }
|
936
|
+
end
|
937
|
+
end
|
938
|
+
|
939
|
+
out.map { |triples| RDF::Statement(*triples) }
|
940
|
+
end
|
941
|
+
|
942
|
+
def [](key)
|
943
|
+
# note complete is
|
944
|
+
(@completes || @params)[key]
|
945
|
+
end
|
946
|
+
|
947
|
+
def keys
|
948
|
+
(@completes || @params).keys
|
949
|
+
end
|
950
|
+
|
951
|
+
def params
|
952
|
+
@completes ? @completes.params : @params.dup
|
953
|
+
end
|
954
|
+
|
955
|
+
def transform
|
956
|
+
@completes ? @completes.transform : @transform
|
957
|
+
end
|
958
|
+
|
959
|
+
def completes? partial
|
960
|
+
@completes and partial and @completes == partial
|
961
|
+
end
|
962
|
+
|
963
|
+
def matches? params
|
964
|
+
return @completes.matches? params if @completes
|
965
|
+
super params
|
966
|
+
end
|
967
|
+
|
968
|
+
def ===(other)
|
969
|
+
return false unless other.is_a? Application
|
970
|
+
return false unless @input == other.input and @output == other.output
|
971
|
+
|
972
|
+
# now the comparand is either the partial or us
|
973
|
+
cmp = @completes || self
|
974
|
+
|
975
|
+
# and this should do it
|
976
|
+
other.transform == cmp.transform and other.matches? cmp.params
|
977
|
+
end
|
978
|
+
end
|
979
|
+
|
980
|
+
# XXX everything below this line is trash
|
981
|
+
|
982
|
+
def match_params repo, candidate, params = {}
|
983
|
+
# overwrite normalized params
|
984
|
+
params = params.transform_values do |v|
|
985
|
+
Set.new(v.respond_to?(:to_a) ? v.to_a : [v])
|
986
|
+
end
|
987
|
+
|
988
|
+
struct = {}
|
989
|
+
params.keys.each do |p|
|
990
|
+
repo.query([candidate, p, nil]) do |stmt|
|
991
|
+
x = struct[stmt.predicate] ||= Set.new
|
992
|
+
x << stmt.object
|
993
|
+
end
|
994
|
+
end
|
995
|
+
end
|
996
|
+
|
997
|
+
# Resolve a transformation application function in the repository
|
998
|
+
# with the given inputs and outputs.
|
999
|
+
#
|
1000
|
+
# XXX note that this thing in its current state will not distinguish
|
1001
|
+
# between two different function applications that happen to map the
|
1002
|
+
# same input to the same output, but with different scalar
|
1003
|
+
# parameters. For example, the `subtree` function could be given two
|
1004
|
+
# different XPath queries but return the same subtree.
|
1005
|
+
#
|
1006
|
+
def resolve_transformation repo, transform, input, output = nil,
|
1007
|
+
graph: nil, params: {}, partials: {}
|
1008
|
+
|
1009
|
+
# overwrite normalized params XXX replace this with something real
|
1010
|
+
params = params.transform_values do |v|
|
1011
|
+
Set.new(v.respond_to?(:to_a) ? v.to_a : [v])
|
1012
|
+
end
|
1013
|
+
|
1014
|
+
# first we check the cache of partials to see if there is one that
|
1015
|
+
# matches our parameters. we want to use trasns
|
1016
|
+
partial = partials.values.select do |p|
|
1017
|
+
p.transform == transform and p.matches? params
|
1018
|
+
end.sort.first
|
1019
|
+
|
1020
|
+
# find the partial if there is one
|
1021
|
+
unless partial
|
1022
|
+
partial = Partial.resolve transform: transform, params: params
|
1023
|
+
# argh this isn't right; it should be partials[transform][params]
|
1024
|
+
partials[partial.subject] = partial if partial
|
1025
|
+
end
|
1026
|
+
|
1027
|
+
# collect function application receipts
|
1028
|
+
candidates = RDF::Query.new do
|
1029
|
+
# note that there is no cost-based optimization so we write
|
1030
|
+
# these in the order of least to most cardinality
|
1031
|
+
pattern [:t, RDF::SAK::TFO.output, output]
|
1032
|
+
pattern [:t, RDF::SAK::TFO.input, input]
|
1033
|
+
end.execute(repo).map { |sol| sol[:t] }.compact.uniq.select do |s|
|
1034
|
+
repo.has_statement?(
|
1035
|
+
RDF::Statement(s, RDF::SAK::TFO.transform, transform)) or
|
1036
|
+
partial && repo.has_statement?(
|
1037
|
+
RDF::Statement(s, RDF::SAK::TFO.completes, partial))
|
1038
|
+
end.compact.uniq
|
1039
|
+
|
1040
|
+
# first will be nil if this is empty so voila
|
1041
|
+
return candidates.first unless candidates.size > 1
|
1042
|
+
|
1043
|
+
# now we have the unlikely case that there are two identical records
|
1044
|
+
candidates.map do |s|
|
1045
|
+
st, et = %i[startedAtTime endedAtTime].map do |p|
|
1046
|
+
repo.query([s, RDF::Vocab::PROV[p], nil]) do |stmt|
|
1047
|
+
dt = stmt.object.object
|
1048
|
+
dt if dt.is_a? DateTime
|
1049
|
+
end.compact.sort.last
|
1050
|
+
end
|
1051
|
+
[s, st, et]
|
1052
|
+
end.sort do |a, b|
|
1053
|
+
# first check latest end-time, then check latest start-time
|
1054
|
+
c = a[2] && b[2] ? b[2] <=> a[2] : 0
|
1055
|
+
# if those two yield nothing, then sort lexically i guess
|
1056
|
+
(c == 0 && a[1] && b[1]) ? b[1] <=> a[1] : a[0] <=> b[0]
|
1057
|
+
end.first.first
|
1058
|
+
end
|
1059
|
+
|
1060
|
+
def record_application repo, transform, input, output, start, finish,
|
1061
|
+
partial: false, graph: nil, subject: nil, params: {}
|
1062
|
+
end
|
1063
|
+
|
1064
|
+
# get transform
|
1065
|
+
def get_partial_transform repo, function, params = {}
|
1066
|
+
temp = {}
|
1067
|
+
RDF::Query.new do
|
1068
|
+
pattern [:s, RDF.type, RDF::SAK::TFO.Partial]
|
1069
|
+
pattern [:s, RDF::SAK::TFO.transform, function]
|
1070
|
+
params.keys.each { |k| pattern [:s, k, nil] }
|
1071
|
+
end.execute(repo).each do |sol|
|
1072
|
+
t = temp[sol[:s]] ||= {}
|
1073
|
+
params.keys.each do |k|
|
1074
|
+
# make these a set for now cause we don't care about the
|
1075
|
+
t[k] = Set.new(repo.query([sol[:s], k, nil]).objects)
|
1076
|
+
end
|
1077
|
+
end
|
1078
|
+
|
1079
|
+
# now we imagine massaging the candidates' parameters so they
|
1080
|
+
# match the input (eg sets/arrays or whatever)
|
1081
|
+
|
1082
|
+
# (in this case the input params are made to match the retrieved params)
|
1083
|
+
newp = params.transform_values do |v|
|
1084
|
+
Set.new(v.respond_to?(:to_a) ? v.to_a : [v])
|
1085
|
+
end
|
1086
|
+
|
1087
|
+
# sort this because we want it to return the same thing every time
|
1088
|
+
# if there are multiples for some reason
|
1089
|
+
temp.keys.sort.each do |k|
|
1090
|
+
# do a cheaper comparison first
|
1091
|
+
next unless temp[k].keys.sort == params.keys.sort
|
1092
|
+
#
|
1093
|
+
return k if temp[k] == newp
|
1094
|
+
end
|
1095
|
+
|
1096
|
+
nil
|
1097
|
+
end
|
1098
|
+
|
1099
|
+
class XPath < RDF::SAK::Transform
|
1100
|
+
protected
|
1101
|
+
|
1102
|
+
def execute input, parsed = nil, params
|
1103
|
+
xpath = params.fetch(:xpath, []).first or raise
|
1104
|
+
prefix = params.fetch(:prefix, []).map do |x|
|
1105
|
+
x.value.split(/\s*:\s*/, 2)
|
1106
|
+
end.to_h.transform_keys(&:to_sym)
|
1107
|
+
reindent = (params.fetch(:reindent).first || RDF::Literal(true)).object
|
1108
|
+
|
1109
|
+
begin
|
1110
|
+
parsed ||= Nokogiri.XML input
|
1111
|
+
rescue Nokogiri::SyntaxError
|
1112
|
+
# XXX i dunno, raise?
|
1113
|
+
return
|
1114
|
+
end
|
1115
|
+
|
1116
|
+
doc = RDF::SAK::Util.subtree parsed,
|
1117
|
+
xpath.value, prefixes: prefix, reindent: reindent
|
1118
|
+
|
1119
|
+
return unless doc
|
1120
|
+
|
1121
|
+
[doc.to_xml, doc]
|
1122
|
+
end
|
1123
|
+
|
1124
|
+
public
|
1125
|
+
|
1126
|
+
def implemented?
|
1127
|
+
true
|
1128
|
+
end
|
1129
|
+
end
|
1130
|
+
|
1131
|
+
class XSLT < RDF::SAK::Transform
|
1132
|
+
protected
|
1133
|
+
|
1134
|
+
def init_implementation harness
|
1135
|
+
root = harness.root
|
1136
|
+
raise ArgumentError,
|
1137
|
+
"Need a root to initialize the implementation" unless root
|
1138
|
+
root = Pathname(root).expand_path unless root.is_a? Pathname
|
1139
|
+
raise ArgumentError, "#{root} is not a readable directory" unless
|
1140
|
+
root.directory? and root.readable?
|
1141
|
+
|
1142
|
+
# XXX this assumes this is a file URI but so far that is the
|
1143
|
+
# only way we get here
|
1144
|
+
filename = root + implementation.path
|
1145
|
+
raise ArgumentError, "#{filename} is not a readable file" unless
|
1146
|
+
filename.file? and filename.readable?
|
1147
|
+
@sheet = Nokogiri::XSLT(filename.read)
|
1148
|
+
end
|
1149
|
+
|
1150
|
+
def execute input, parsed = nil, params
|
1151
|
+
begin
|
1152
|
+
parsed ||= Nokogiri.XML input
|
1153
|
+
rescue Nokogiri::SyntaxError
|
1154
|
+
# XXX i dunno, raise?
|
1155
|
+
return
|
1156
|
+
end
|
1157
|
+
|
1158
|
+
# XXX do we wanna allow params?
|
1159
|
+
out = @sheet.transform parsed
|
1160
|
+
|
1161
|
+
# now return string and still-parsed
|
1162
|
+
[@sheet.serialize(out), out]
|
1163
|
+
end
|
1164
|
+
|
1165
|
+
public
|
1166
|
+
|
1167
|
+
def implemented?
|
1168
|
+
true
|
1169
|
+
end
|
1170
|
+
|
1171
|
+
end
|
1172
|
+
end
|