rdf-sak 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE +202 -0
- data/README.md +268 -0
- data/Rakefile +13 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/example/cleanup.xsl +14 -0
- data/example/matches.xhtml +11 -0
- data/example/transforms.ttl +58 -0
- data/lib/rdf-sak.rb +1 -0
- data/lib/rdf/sak.rb +2506 -0
- data/lib/rdf/sak/ci.rb +827 -0
- data/lib/rdf/sak/cli.rb +35 -0
- data/lib/rdf/sak/docstats.rb +188 -0
- data/lib/rdf/sak/document.rb +772 -0
- data/lib/rdf/sak/ibis.rb +248 -0
- data/lib/rdf/sak/mimemagic.rb +73 -0
- data/lib/rdf/sak/pav.rb +479 -0
- data/lib/rdf/sak/qb.rb +280 -0
- data/lib/rdf/sak/scovo.rb +51 -0
- data/lib/rdf/sak/tfo.rb +301 -0
- data/lib/rdf/sak/transform.rb +1172 -0
- data/lib/rdf/sak/urlrunner.rb +602 -0
- data/lib/rdf/sak/util.rb +2081 -0
- data/lib/rdf/sak/version.rb +5 -0
- data/rdf-sak.gemspec +60 -0
- metadata +366 -0
@@ -0,0 +1,1172 @@
|
|
1
|
+
require 'rdf'
|
2
|
+
require 'rdf/vocab'
|
3
|
+
require 'rdf/sak/tfo'
|
4
|
+
require 'rdf/sak/util'
|
5
|
+
require 'set'
|
6
|
+
require 'mimemagic'
|
7
|
+
require 'http/negotiate'
|
8
|
+
require 'time'
|
9
|
+
|
10
|
+
# This class encapsulates a specification for an individual
|
11
|
+
# transformation function, including its parameter spec, accepted and
|
12
|
+
# returned types, identity, and implementation.
|
13
|
+
#
|
14
|
+
class RDF::SAK::Transform
|
15
|
+
# mkay basically this transformation function stuff got too hairy to
|
16
|
+
# just do ad-hoc so i guess i'm doing this now
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def self.numeric_objects repo, subject, predicate, entail: false
|
21
|
+
RDF::SAK::Util.objects_for(repo, subject, predicate, entail: entail,
|
22
|
+
only: :literal).map(&:object).select { |c| c.is_a? Numeric }.sort
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.gather_params repo, subject
|
26
|
+
params = {}
|
27
|
+
RDF::SAK::Util.objects_for(repo, subject, RDF::SAK::TFO.parameter,
|
28
|
+
entail: false, only: :resource).each do |ps|
|
29
|
+
param = params[ps] ||= {}
|
30
|
+
|
31
|
+
# slug/identifier
|
32
|
+
if id = RDF::SAK::Util.objects_for(
|
33
|
+
repo, ps, RDF::Vocab::DC.identifier, only: :literal).sort.first
|
34
|
+
param[:id] = id.value.to_sym
|
35
|
+
end
|
36
|
+
|
37
|
+
# rdfs:range
|
38
|
+
range = RDF::SAK::Util.objects_for(
|
39
|
+
repo, ps, RDF::RDFS.range, only: :resource)
|
40
|
+
param[:range] = range.to_set unless range.empty?
|
41
|
+
|
42
|
+
# default = RDF::SAK::Util
|
43
|
+
param[:default] = RDF::SAK::Util.objects_for(
|
44
|
+
repo, ps, RDF::SAK::TFO.default)
|
45
|
+
|
46
|
+
# cardinalities
|
47
|
+
param[:minc] = 0
|
48
|
+
param[:maxc] = Float::INFINITY
|
49
|
+
|
50
|
+
if c0 = numeric_objects(repo, ps, RDF::OWL.cardinality).first
|
51
|
+
param[:minc] = param[:maxc] = c0
|
52
|
+
else
|
53
|
+
if c1 = numeric_objects(repo, ps, RDF::OWL.minCardinality).first
|
54
|
+
param[:minc] = c1
|
55
|
+
end
|
56
|
+
if c2 = numeric_objects(repo, ps, RDF::OWL.maxCardinality).first
|
57
|
+
param[:maxc] = c2
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
params
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.gather_accepts_returns repo, subject, raw: false, returns: false
|
66
|
+
literals = []
|
67
|
+
lists = []
|
68
|
+
pred = RDF::SAK::TFO[returns ? 'returns' : 'accepts']
|
69
|
+
repo.query([subject, pred, nil]).objects.each do |o|
|
70
|
+
if o.literal?
|
71
|
+
literals << o
|
72
|
+
else
|
73
|
+
lists << RDF::List.from(repo, o).to_a
|
74
|
+
end
|
75
|
+
end
|
76
|
+
# this is mainly to give us consistent results
|
77
|
+
out = (lists.sort.flatten + literals.sort).uniq
|
78
|
+
# raw as in raw literals
|
79
|
+
raw ? out : out.map(&:value)
|
80
|
+
end
|
81
|
+
|
82
|
+
protected
|
83
|
+
|
84
|
+
# Initialize the implementation. Does nothing in the base
|
85
|
+
# class. Return value is ignored.
|
86
|
+
#
|
87
|
+
# @param harness [RDF::SAK::Transform::Harness] the harness
|
88
|
+
#
|
89
|
+
def init_implementation harness
|
90
|
+
end
|
91
|
+
|
92
|
+
public
|
93
|
+
|
94
|
+
# Resolve a transform out of the repository. Optionally supply a
|
95
|
+
# block to resolve any implementation associated with the transform.
|
96
|
+
#
|
97
|
+
# @param harness [RDF::SAK::Transform::Harness] the harness
|
98
|
+
# @param subject [RDF::Resource]
|
99
|
+
def self.resolve harness, subject
|
100
|
+
# noop
|
101
|
+
return subject if subject.is_a? self
|
102
|
+
|
103
|
+
repo = harness.repo
|
104
|
+
|
105
|
+
asserted = RDF::SAK::Util.objects_for repo, subject,
|
106
|
+
RDF.type, only: :resource
|
107
|
+
|
108
|
+
return if
|
109
|
+
(asserted & RDF::SAK::Util.all_related(RDF::SAK::TFO.Transform)).empty?
|
110
|
+
|
111
|
+
params = gather_params repo, subject
|
112
|
+
|
113
|
+
plist = if pl = RDF::SAK::Util.objects_for(repo, subject,
|
114
|
+
RDF::SAK::TFO['parameter-list'], only: :resource).sort.first
|
115
|
+
RDF::List.from(repo, pl).to_a
|
116
|
+
else
|
117
|
+
params.keys.sort
|
118
|
+
end
|
119
|
+
|
120
|
+
accepts = gather_accepts_returns repo, subject
|
121
|
+
returns = gather_accepts_returns repo, subject, returns: true
|
122
|
+
|
123
|
+
tclass = self
|
124
|
+
|
125
|
+
# XXX this is all dumb but it has to be this way for now
|
126
|
+
|
127
|
+
if impl = RDF::SAK::Util.objects_for(repo, subject,
|
128
|
+
RDF::SAK::TFO.implementation, only: :uri).sort.first
|
129
|
+
case impl.to_s
|
130
|
+
when /^file:/i then
|
131
|
+
# XXX redo this later
|
132
|
+
if /xsl/i.match? MimeMagic.by_path(impl.path.to_s).to_s
|
133
|
+
tclass = RDF::SAK::Transform::XSLT
|
134
|
+
end
|
135
|
+
when /^urn:x-ruby:(.*)$/i then
|
136
|
+
cn = $1
|
137
|
+
begin
|
138
|
+
cs = Object.const_get cn
|
139
|
+
tclass = cs
|
140
|
+
rescue NameError, e
|
141
|
+
raise NotImplementedError,
|
142
|
+
"Could not locate implementation for #{impl}!"
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
tclass.new subject, params: params, param_list: plist, accepts: accepts,
|
148
|
+
returns: returns, implementation: impl, harness: harness
|
149
|
+
end
|
150
|
+
|
151
|
+
def self.coerce_params params
|
152
|
+
# this idiom is everywhere
|
153
|
+
params.transform_values do |v|
|
154
|
+
Set.new(v.respond_to?(:to_a) ? v.to_a : [v]) unless v.nil?
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
attr_reader :subject
|
159
|
+
|
160
|
+
# Initialize a transform from data.
|
161
|
+
# @param subject [RDF::Resource]
|
162
|
+
# @param harness [RDF::SAK::Transform::Harness]
|
163
|
+
# @param params [Hash]
|
164
|
+
# @param param_list [Array]
|
165
|
+
# @param accepts [Array]
|
166
|
+
# @param returns [Array]
|
167
|
+
# @param implementation [RDF::Resource]
|
168
|
+
#
|
169
|
+
def initialize subject, harness: nil, params: {}, param_list: [],
|
170
|
+
accepts: %w[*/*], returns: %w[*/*], implementation: nil
|
171
|
+
@subject = subject.dup.freeze
|
172
|
+
@params = params.freeze
|
173
|
+
@plist = (param_list.empty? ? params.keys.sort : param_list.dup).freeze
|
174
|
+
@pcache = params.map { |k, v| [v[:id], k] }.to_h.freeze
|
175
|
+
@accepts = (accepts.respond_to?(:to_a) ? accepts.to_a : [accepts]).freeze
|
176
|
+
@returns = (returns.respond_to?(:to_a) ? returns.to_a : [returns]).freeze
|
177
|
+
@impl = implementation.freeze
|
178
|
+
|
179
|
+
# initialize the implementation
|
180
|
+
init_implementation harness
|
181
|
+
end
|
182
|
+
|
183
|
+
# Return the identifier of the implementation.
|
184
|
+
#
|
185
|
+
# @return [RDF::URI]
|
186
|
+
#
|
187
|
+
def implementation
|
188
|
+
@impl
|
189
|
+
end
|
190
|
+
|
191
|
+
# True if this transform is *actually* implemented.
|
192
|
+
#
|
193
|
+
# @return [false, true]
|
194
|
+
#
|
195
|
+
def implemented?
|
196
|
+
false
|
197
|
+
end
|
198
|
+
|
199
|
+
# True if the transform accepts the given Content-Type.
|
200
|
+
#
|
201
|
+
# @param type [String] the content type to test
|
202
|
+
# @return [false, true] wh
|
203
|
+
#
|
204
|
+
def accepts? type
|
205
|
+
# construct the variants: this gives us a stack of all the types
|
206
|
+
# all the way up to the top, then turns it into a hash of faux
|
207
|
+
# variants. this will ensure the negotiate algorithm will return a
|
208
|
+
# value if the transform function can handle the type, even if it
|
209
|
+
# does not explicitly mention it (e.g. if the transform specifies
|
210
|
+
# it accepts application/xml and you hand it application/xhtml+xml)
|
211
|
+
variants = RDF::SAK::MimeMagic.new(type).lineage.map do |t|
|
212
|
+
# the key can be anything as long as it's unique since it ends
|
213
|
+
# up as a hash
|
214
|
+
[t.to_s, [1, t.to_s]]
|
215
|
+
end.to_h
|
216
|
+
|
217
|
+
# construct the pseudo-header
|
218
|
+
accept = @accepts.dup
|
219
|
+
accept << '*/*;q=0' unless accept.include? '*/*'
|
220
|
+
accept = { Accept: accept.join(', ') }
|
221
|
+
|
222
|
+
# we only care *if* this returns something, not *what*
|
223
|
+
!!HTTP::Negotiate.negotiate(accept, variants)
|
224
|
+
end
|
225
|
+
|
226
|
+
# Return the parameter list, or a sorted list of parameter keys in lieu
|
227
|
+
#
|
228
|
+
# @return [Array]
|
229
|
+
#
|
230
|
+
def keys
|
231
|
+
# XXX this should be unique to begin with. what is going on here?
|
232
|
+
# tests mysteriously started failing and the output was duplicated
|
233
|
+
@plist.uniq
|
234
|
+
end
|
235
|
+
|
236
|
+
# Retrieve a parameter spec, either by its fully-qualified URI or
|
237
|
+
# its `dct:identifier`.
|
238
|
+
#
|
239
|
+
# @param key [RDF::Resource,Symbol,String] the parameter URI or its identifier
|
240
|
+
# @return [Hash] the parameter spec
|
241
|
+
#
|
242
|
+
def [](key)
|
243
|
+
out = case key
|
244
|
+
when RDF::Resource then @params[key]
|
245
|
+
when Symbol then @params[@pcache[key]]
|
246
|
+
when String
|
247
|
+
@params[@pcache[key.to_sym]] || @params[RDF::URI(key)]
|
248
|
+
end
|
249
|
+
# add the key to the group
|
250
|
+
out.merge({ uri: key }) if out
|
251
|
+
end
|
252
|
+
|
253
|
+
# XXX kill this
|
254
|
+
def lint params
|
255
|
+
raise ArgumentError, "params must be a hash, not #{params.class}" unless
|
256
|
+
params.is_a? Hash
|
257
|
+
params.keys.sort == keys
|
258
|
+
end
|
259
|
+
|
260
|
+
# Return the validated parameters or raise an exception.
|
261
|
+
#
|
262
|
+
# @param params [Hash] the hash of parameters
|
263
|
+
# @param symbols [false, true] whether the keys should be symbols or URIs
|
264
|
+
# @param defaults [true, false] whether to supplant the defaults
|
265
|
+
# @param silent [false, true] return nil rather than raise if true
|
266
|
+
# @return [Hash] the validated parameters
|
267
|
+
#
|
268
|
+
def validate params, symbols: false, defaults: true, silent: false
|
269
|
+
# duplicate so we can delete from it
|
270
|
+
params = params.dup
|
271
|
+
out = {}
|
272
|
+
|
273
|
+
# note the instance variable vs the argument
|
274
|
+
@params.each do |k, spec|
|
275
|
+
v = params.delete(k) || params.delete(spec[:id]) || []
|
276
|
+
v = (v.respond_to?(:to_a) ? v.to_a : [v]).map do |v|
|
277
|
+
case v
|
278
|
+
when RDF::Term then v
|
279
|
+
when URI then RDF::URI(v.to_s)
|
280
|
+
when nil then RDF::nil
|
281
|
+
else
|
282
|
+
range = spec[:range] || []
|
283
|
+
if r = range.select(&:datatype?) and !r.empty?
|
284
|
+
r = r.to_a.sort
|
285
|
+
"multiple ranges; arbitrarily picking #{r.first}" if
|
286
|
+
r.size > 1
|
287
|
+
RDF::Literal(v, datatype: r.first)
|
288
|
+
elsif v.is_a? String and r = range.reject(&:datatype?) and !r.empty?
|
289
|
+
if m = /^_:(.+)$/.match(v)
|
290
|
+
RDF::Node(m[1])
|
291
|
+
else
|
292
|
+
RDF::URI(v)
|
293
|
+
end
|
294
|
+
else
|
295
|
+
RDF::Literal(v)
|
296
|
+
end
|
297
|
+
end
|
298
|
+
end
|
299
|
+
|
300
|
+
# XXX one day we should check types but not today
|
301
|
+
|
302
|
+
# give us the default(s) then
|
303
|
+
v = spec[:default].dup if v.empty? and spec[:default]
|
304
|
+
|
305
|
+
# but we *will* check the cardinality
|
306
|
+
minc = spec.fetch :minc, 0
|
307
|
+
maxc = spec.fetch :maxc, Float::INFINITY
|
308
|
+
|
309
|
+
raise ArgumentError, "Parameter #{k} must have at least"\
|
310
|
+
" #{minc} value#{minc == 1 ? '' : ?s }" if v.size < minc
|
311
|
+
raise ArgumentError, "Parameter #{k} must have at most"\
|
312
|
+
" #{maxc} value#{maxc == 1 ? '' : ?s }" if v.size > maxc
|
313
|
+
# XXX if cardinality == 1 should we set v to v.first? dunno
|
314
|
+
|
315
|
+
# now overwrite k
|
316
|
+
k = spec[:id] || k.to_s if symbols
|
317
|
+
|
318
|
+
out[k] = v unless !defaults and v == spec[:default]
|
319
|
+
end
|
320
|
+
|
321
|
+
# if params are not empty then this is an error
|
322
|
+
unless params.empty?
|
323
|
+
return if silent
|
324
|
+
raise ArgumentError,
|
325
|
+
"Unrecognized parameters #{params.keys.join ', '}"
|
326
|
+
end
|
327
|
+
|
328
|
+
out
|
329
|
+
end
|
330
|
+
|
331
|
+
# Check the parameters and apply the function, then check the
|
332
|
+
# output. Parameters are checked with {#validate} for key
|
333
|
+
# resolution, cardinality, range, and type.
|
334
|
+
#
|
335
|
+
# @param input [String,IO,#to_s,#read] Something bytelike
|
336
|
+
# @param params [Hash,RDF::SAK::Transform::Partial] the instance parameters
|
337
|
+
# @param parsed [Object] the already-parsed object, if applicable
|
338
|
+
# @param type [String] the content-type of the input
|
339
|
+
# @param accept [String] a string in the form of an Accept header
|
340
|
+
# @yieldparam output [String,IO] the output
|
341
|
+
# @yieldparam parseout [Object] the parsed output, if applicable
|
342
|
+
# @return [#to_s, Object] the serialized output (and parsed if applicable)
|
343
|
+
#
|
344
|
+
def apply input, params = {}, parsed: nil,
|
345
|
+
type: 'application/octet-stream', accept: '*/*', &block
|
346
|
+
raise NotImplementedError, "Transform #{@id} is not implemented!" unless
|
347
|
+
implemented?
|
348
|
+
|
349
|
+
# XXX validate accept or explode
|
350
|
+
mimetypes = HTTP::Negotiate.negotiate({ Accept: accept },
|
351
|
+
@returns.map { |t| [t, [1, t]] }.to_h, all: true) or return
|
352
|
+
|
353
|
+
# this will succeed or explode
|
354
|
+
params = validate params, symbols: true
|
355
|
+
|
356
|
+
# run the transform
|
357
|
+
out, parseout = execute input, parsed, params
|
358
|
+
|
359
|
+
# bail out if nothing was returned
|
360
|
+
return unless out
|
361
|
+
|
362
|
+
# now run the block if present
|
363
|
+
block.call out, parseout if block
|
364
|
+
|
365
|
+
# return it to the caller
|
366
|
+
[out, parseout]
|
367
|
+
end
|
368
|
+
|
369
|
+
# This class implements a cache for partial transformation function
|
370
|
+
# applications, which bundle transforms with a set of instance
|
371
|
+
# parameters under a reusable identity.
|
372
|
+
class PartialCache
|
373
|
+
private
|
374
|
+
|
375
|
+
def coerce_params params
|
376
|
+
RDF::SAK::Transform.coerce_params params
|
377
|
+
end
|
378
|
+
|
379
|
+
public
|
380
|
+
|
381
|
+
# Initialize the cache with all partials pre-loaded.
|
382
|
+
#
|
383
|
+
# @param harness [RDF::SAK::Transform::Harness] the transform harness
|
384
|
+
# @return [RDF::SAK::Transform::PartialCache] the instance
|
385
|
+
#
|
386
|
+
def self.load harness
|
387
|
+
new(harness).load
|
388
|
+
end
|
389
|
+
|
390
|
+
attr_reader :harness
|
391
|
+
|
392
|
+
# Initialize an empty cache.
|
393
|
+
# @param harness [RDF::SAK::Transform::Harness] the parent harness.
|
394
|
+
#
|
395
|
+
def initialize harness
|
396
|
+
@harness = harness
|
397
|
+
@cache = {}
|
398
|
+
@mapping = {}
|
399
|
+
@transforms = {}
|
400
|
+
end
|
401
|
+
|
402
|
+
# Load an initialized partial cache.
|
403
|
+
#
|
404
|
+
# @return [self] daisy-chainable self-reference
|
405
|
+
#
|
406
|
+
def load
|
407
|
+
RDF::SAK::Util.subjects_for(repo, RDF.type,
|
408
|
+
RDF::SAK::TFO.Partial).each do |s|
|
409
|
+
resolve subject: s
|
410
|
+
end
|
411
|
+
|
412
|
+
# return self to daisy-chain
|
413
|
+
self
|
414
|
+
end
|
415
|
+
|
416
|
+
def partials
|
417
|
+
@cache.keys.select { |x| x.is_a? RDF::Resource }
|
418
|
+
end
|
419
|
+
|
420
|
+
def repo
|
421
|
+
@harness.repo
|
422
|
+
end
|
423
|
+
|
424
|
+
def transforms
|
425
|
+
@transforms.dup
|
426
|
+
end
|
427
|
+
|
428
|
+
# Retrieve a Partial from the cache based on its
|
429
|
+
def get transform, params
|
430
|
+
ts = case transform
|
431
|
+
when RDF::SAK::Transform then transform.subject
|
432
|
+
when RDF::URI
|
433
|
+
# XXX transforms resolved here may not get implemented
|
434
|
+
transform = RDF::SAK::Transform.resolve @repo, transform
|
435
|
+
transform.subject
|
436
|
+
else
|
437
|
+
raise ArgumentError, "Don't know what to do with #{transform}"
|
438
|
+
end
|
439
|
+
|
440
|
+
# return direct cache entry if transform is really the subject
|
441
|
+
return @cache[ts] if @cache.key?
|
442
|
+
|
443
|
+
# otherwise return the mapping
|
444
|
+
@mapping[transform][coerce_params params]
|
445
|
+
end
|
446
|
+
|
447
|
+
# Resolves a partial either by subject or by transform + parameter
|
448
|
+
# set.
|
449
|
+
#
|
450
|
+
# @param subject [RDF::URI] The subject URI of the partial
|
451
|
+
# @param transform [RDF::URI,RDF::SAK::Transform] the transform
|
452
|
+
# @param params [Hash] an instance of parameters
|
453
|
+
# @return [RDF::SAK::Transform::Partial]
|
454
|
+
#
|
455
|
+
def resolve subject: nil, transform: nil, params: {}
|
456
|
+
if subject
|
457
|
+
if subject.is_a? RDF::SAK::Transform::Partial
|
458
|
+
# snag the transform
|
459
|
+
transform = @harness.resolve(subject.transform) or
|
460
|
+
raise 'Could not resolve the transform associated with ' +
|
461
|
+
subject.subject
|
462
|
+
|
463
|
+
# mkay now add this to the cache
|
464
|
+
t = @mapping[transform.subject] ||= {} # lol got all that?
|
465
|
+
@cache[subject.subject] ||= t[subject.params] ||= subject
|
466
|
+
else
|
467
|
+
# resolve the partial
|
468
|
+
partial = @cache[subject] || RDF::SAK::Transform::Partial.resolve(
|
469
|
+
@harness, subject: subject) or return
|
470
|
+
|
471
|
+
# initialize the mapping if not present
|
472
|
+
t = @mapping[partial.transform.subject] ||= {}
|
473
|
+
|
474
|
+
# off we go
|
475
|
+
@cache[subject] ||= t[partial.params] ||= partial
|
476
|
+
end
|
477
|
+
elsif transform
|
478
|
+
transform = @harness.resolve transform unless
|
479
|
+
transform.is_a? RDF::SAK::Transform
|
480
|
+
|
481
|
+
params = transform.validate params, defaults: false
|
482
|
+
|
483
|
+
# note the *presence* of the key means the cache item has been
|
484
|
+
# checked already; its *value* may be nil
|
485
|
+
t = @mapping[transform.subject] ||= {}
|
486
|
+
return t[params] if t.key? params
|
487
|
+
|
488
|
+
# try to resolve the partial
|
489
|
+
partial = RDF::SAK::Transform::Partial.resolve(@harness,
|
490
|
+
transform: transform, params: params) or return
|
491
|
+
|
492
|
+
# update the caches
|
493
|
+
@cache[partial.subject] = t[params] = partial
|
494
|
+
end
|
495
|
+
end
|
496
|
+
end
|
497
|
+
|
498
|
+
# This class is the main harness for holding all the transforms and
|
499
|
+
# operating over them. This is the primary interface through which
|
500
|
+
# we manipulate transforms.
|
501
|
+
class Harness
|
502
|
+
|
503
|
+
attr_reader :partials, :repo, :root
|
504
|
+
|
505
|
+
# Create a new harness instance.
|
506
|
+
#
|
507
|
+
# @param repo [RDF::Repository] the repository to find RDF data
|
508
|
+
# @param root [String,Pathname] the root directory for implementations
|
509
|
+
#
|
510
|
+
def initialize repo, root
|
511
|
+
raise ArgumentError,
|
512
|
+
"repo is #{repo.class}, not an RDF::Repository" unless
|
513
|
+
repo.is_a? RDF::Repository
|
514
|
+
@repo = repo
|
515
|
+
@root = Pathname(root).expand_path
|
516
|
+
raise ArgumentError, "Root #{@root} does not exist" unless
|
517
|
+
@root.directory? and @root.readable?
|
518
|
+
@cache = {}
|
519
|
+
@partials = RDF::SAK::Transform::PartialCache.new self
|
520
|
+
end
|
521
|
+
|
522
|
+
# Bootstrap all the transforms.
|
523
|
+
#
|
524
|
+
# @param repo [RDF::Repository] the repository to find RDF data
|
525
|
+
# @param root [String,Pathname] the root directory for implementations
|
526
|
+
# @return [RDF::SAK::Transform::Harness] the harness instance
|
527
|
+
def self.load repo, root
|
528
|
+
self.new(repo, root).load
|
529
|
+
end
|
530
|
+
|
531
|
+
# Load transforms into an existing instance
|
532
|
+
# @return [Array] the transforms
|
533
|
+
def load
|
534
|
+
RDF::SAK::Util.subjects_for(@repo, RDF.type,
|
535
|
+
RDF::SAK::TFO.Transform, only: :resource).each do |subject|
|
536
|
+
resolve subject
|
537
|
+
end
|
538
|
+
|
539
|
+
# return self so we can daisy-chain
|
540
|
+
self
|
541
|
+
end
|
542
|
+
|
543
|
+
# Return all cached Transform identities.
|
544
|
+
#
|
545
|
+
# @return [Array] the URIs of known Transforms
|
546
|
+
#
|
547
|
+
def transforms
|
548
|
+
@cache.keys.sort
|
549
|
+
end
|
550
|
+
|
551
|
+
# Resolve a Transform based on its URI.
|
552
|
+
#
|
553
|
+
# @param subject [RDF::Resource] the identifier for the transform.
|
554
|
+
# @return [RDF::SAK::Transform] the Transform, if present.
|
555
|
+
#
|
556
|
+
def resolve subject
|
557
|
+
return @cache[subject] if @cache[subject]
|
558
|
+
# XXX raise???
|
559
|
+
transform =
|
560
|
+
RDF::SAK::Transform.resolve(self, subject) or return
|
561
|
+
@cache[subject] = transform
|
562
|
+
end
|
563
|
+
|
564
|
+
# Resolve a Partial based on either its subject URI or the
|
565
|
+
# transform-params pair.
|
566
|
+
#
|
567
|
+
# @param subject [RDF::Resource] the Partial's subject
|
568
|
+
# @param transform [RDF::Resource,RDF::SAK::Transform] the transform
|
569
|
+
# @param params [Hash] an instance of parameters
|
570
|
+
# @return [RDF::SAK::Transform::Partial] the Partial, if present
|
571
|
+
#
|
572
|
+
def resolve_partial subject: nil, transform: nil, params: nil
|
573
|
+
partials.resolve subject: subject, transform: transform, params: params
|
574
|
+
end
|
575
|
+
|
576
|
+
# Resolve a total function application record based on either its
|
577
|
+
# subject URI, a transform-params pair, or a Partial.
|
578
|
+
#
|
579
|
+
# @param subject [RDF::Resource] the Application's subject
|
580
|
+
# @param transform [RDF::Resource,RDF::SAK::Transform] the Transform
|
581
|
+
# @param params [Hash] an instance of parameters
|
582
|
+
# @param partial [RDF::Resource,RDF::SAK::Transform::Partial] a Partial
|
583
|
+
# @return [RDF::SAK::Transform::Application] the Application, if present
|
584
|
+
#
|
585
|
+
def resolve_application subject: nil, transform: nil, params: {},
|
586
|
+
partial: nil, input: nil, output: nil
|
587
|
+
RDF::SAK::Transform::Application.resolve self, subject: subject,
|
588
|
+
transform: transform, params: params, partial: partial,
|
589
|
+
input: input, output: output
|
590
|
+
end
|
591
|
+
|
592
|
+
# Returns true if the Application with the given subject URI
|
593
|
+
# matches either the transform-params pair, or a partial.
|
594
|
+
#
|
595
|
+
# @param subject [RDF::Resource,RDF::SAK::Transform::Application]
|
596
|
+
# the application
|
597
|
+
# @param transform [RDF::Resource,RDF::SAK::Transform] the transform
|
598
|
+
# @param params [Hash] an instance of parameters
|
599
|
+
# @param partial [RDF::Resource,RDF::SAK::Transform::Partial] a partial
|
600
|
+
# @return [true, false] whether or not the application matches
|
601
|
+
#
|
602
|
+
def application_matches? subject, transform: nil, params: {}, partial: nil
|
603
|
+
|
604
|
+
# unbundle the params; partial overrides transform+params
|
605
|
+
if partial
|
606
|
+
partial = resolve_partial partial unless
|
607
|
+
partial.is_a? RDF::SAK::Transform::Partial
|
608
|
+
transform = partial.transform
|
609
|
+
params = partial.params
|
610
|
+
else
|
611
|
+
transform = resolve transform unless
|
612
|
+
transform.is_a? RDF::SAK::Transform
|
613
|
+
params = transform.validate params
|
614
|
+
end
|
615
|
+
|
616
|
+
if subject.is_a? RDF::SAK::Transform::Application
|
617
|
+
return true if partial and subject.completes? partial
|
618
|
+
return true if
|
619
|
+
subject.transform == transform and subject.matches? params
|
620
|
+
else
|
621
|
+
# this should say, try matching the partial if there is one
|
622
|
+
# to match, otherwise attempt to directly match the transform
|
623
|
+
return true if partial and repo.has_statement?(
|
624
|
+
RDF::Statement(subject, RDF::SAK::TFO.completes, partial.subject))
|
625
|
+
|
626
|
+
if repo.has_statement?(
|
627
|
+
RDF::Statement(subject, RDF::SAK::TFO.transform, transform.subject))
|
628
|
+
testp = transform.keys.map do |p|
|
629
|
+
o = repo.query([subject, p, nil]).objects.uniq.sort
|
630
|
+
o.empty? ? nil : [p, o]
|
631
|
+
end.compact.to_h
|
632
|
+
|
633
|
+
# this will clear any explicit declarations of defaults
|
634
|
+
testp = transform.validate testp, defaults: false, silent: true
|
635
|
+
# true means it matches
|
636
|
+
return testp == params
|
637
|
+
end
|
638
|
+
end
|
639
|
+
|
640
|
+
false
|
641
|
+
end
|
642
|
+
end
|
643
|
+
|
644
|
+
class Partial
|
645
|
+
# Resolve a partial function application with the given parameters.
|
646
|
+
#
|
647
|
+
# @param harness [RDF::SAK::Transform::Harness] the harness
|
648
|
+
# @param subject [RDF::Resource] the identity of the partial
|
649
|
+
# @param transform [RDF::Resource] the identity of the transform
|
650
|
+
# @param params [Hash] key-value pairs
|
651
|
+
def self.resolve harness, subject: nil, transform: nil, params: {}
|
652
|
+
raise ArgumentError, 'Must supply either a subject or a transform' unless
|
653
|
+
subject or transform
|
654
|
+
|
655
|
+
repo = harness.repo
|
656
|
+
|
657
|
+
# coerce the transform to a Transform object if it isn't already
|
658
|
+
if transform
|
659
|
+
transform = harness.resolve(transform) or
|
660
|
+
return unless transform.is_a?(RDF::SAK::Transform)
|
661
|
+
elsif subject.is_a? RDF::URI
|
662
|
+
# locate the transform if given the subject
|
663
|
+
transform = RDF::SAK::Util.objects_for(repo, subject,
|
664
|
+
RDF::SAK::TFO.transform, only: :resource).first or return
|
665
|
+
transform = harness.resolve(transform) or return
|
666
|
+
warn transform
|
667
|
+
end
|
668
|
+
|
669
|
+
# obtain the subject for the given parameters
|
670
|
+
if subject
|
671
|
+
params = {}
|
672
|
+
transform.keys.each do |p|
|
673
|
+
o = repo.query([subject, p, nil]).objects.uniq.sort
|
674
|
+
params[p] = o unless o.empty?
|
675
|
+
end
|
676
|
+
else
|
677
|
+
params = transform.validate params, symbols: false, defaults: false
|
678
|
+
|
679
|
+
candidates = RDF::Query.new do
|
680
|
+
# XXX we should sort parameters by longest value since
|
681
|
+
# longer values will probably be less common; anyway this is
|
682
|
+
# gonna all need to be rethought
|
683
|
+
params.each { |p, objs| objs.each { |o| pattern [:s, p, o] } }
|
684
|
+
pattern [:s, RDF.type, RDF::SAK::TFO.Partial]
|
685
|
+
pattern [:s, RDF::SAK::TFO.transform, transform.subject]
|
686
|
+
|
687
|
+
# add any remaining parameters
|
688
|
+
# XXX this actually messes up; we don't want this
|
689
|
+
# (transform.keys - params.keys.sort).each { |r| pattern [:s, r, nil] }
|
690
|
+
end.execute(repo).map { |sol| [sol[:s], {}] }.to_h
|
691
|
+
|
692
|
+
# warn "yo #{transform.subject} #{params} #{candidates}"
|
693
|
+
|
694
|
+
# this is ruby being cheeky
|
695
|
+
candidates.select! do |s, ps|
|
696
|
+
transform.keys.each do |p|
|
697
|
+
o = repo.query([s, p, nil]).objects.uniq.sort
|
698
|
+
ps[p] = o unless o.empty?
|
699
|
+
end
|
700
|
+
ps == params
|
701
|
+
end
|
702
|
+
|
703
|
+
return if candidates.empty?
|
704
|
+
|
705
|
+
# sort it so we always get the same thing
|
706
|
+
subject = candidates.keys.sort.first
|
707
|
+
params = candidates[subject]
|
708
|
+
end
|
709
|
+
|
710
|
+
self.new subject, transform, params
|
711
|
+
end
|
712
|
+
|
713
|
+
attr_reader :subject, :transform
|
714
|
+
|
715
|
+
def initialize subject, transform, params = {}
|
716
|
+
raise ArgumentError, 'transform must be a Transform' unless
|
717
|
+
transform.is_a? RDF::SAK::Transform
|
718
|
+
@subject = subject
|
719
|
+
@transform = transform
|
720
|
+
@params = transform.validate params unless
|
721
|
+
params.is_a? RDF::SAK::Transform::Partial
|
722
|
+
end
|
723
|
+
|
724
|
+
def [](key)
|
725
|
+
@params[key]
|
726
|
+
end
|
727
|
+
|
728
|
+
def keys
|
729
|
+
@params.keys
|
730
|
+
end
|
731
|
+
|
732
|
+
def params
|
733
|
+
@params.dup
|
734
|
+
end
|
735
|
+
|
736
|
+
def matches? params
|
737
|
+
@params == @transform.validate(params)
|
738
|
+
end
|
739
|
+
|
740
|
+
def ===(other)
|
741
|
+
return false unless other.is_a? RDF::SAK::Transform::Partial
|
742
|
+
transform == other.transform and matches? other.params
|
743
|
+
end
|
744
|
+
|
745
|
+
def ==(other)
|
746
|
+
self === other and subject == other.subject
|
747
|
+
end
|
748
|
+
end
|
749
|
+
|
750
|
+
# A record of a transformation function application.
|
751
|
+
# @note "Application" as in to "apply" a function, not an "app".
|
752
|
+
class Application < Partial
|
753
|
+
# Resolve a particular function Application from the repository.
|
754
|
+
# Either resolve by subject, or resolve by a transform + parameter
|
755
|
+
# + input set. Applications that complete Partials will be
|
756
|
+
# automatically resolved.
|
757
|
+
#
|
758
|
+
# @param harness [RDF::SAK::Transform::Harness] the harness
|
759
|
+
# @param subject [RDF::Resource] the subject
|
760
|
+
# @param transform [RDF::Resource,RDF::SAK::Transform] the transform
|
761
|
+
# @param params [Hash] an instance of parameters
|
762
|
+
# @param input [RDF::Resource] the Application's input
|
763
|
+
# @param output [RDF::Resource] the Application's output
|
764
|
+
# @return [RDF::SAK::Transform::Application] the Application, if present
|
765
|
+
#
|
766
|
+
def self.resolve harness, subject: nil, transform: nil, params: {},
|
767
|
+
partial: nil, input: nil, output: nil
|
768
|
+
# either a subject or transform + input + output? + params?
|
769
|
+
|
770
|
+
repo = harness.repo
|
771
|
+
partials = harness.partials
|
772
|
+
|
773
|
+
if subject
|
774
|
+
# noop
|
775
|
+
return subject if subject.is_a? self
|
776
|
+
|
777
|
+
# okay partial
|
778
|
+
partial = RDF::SAK::Util.objects_for(
|
779
|
+
subject, RDF::SAK::TFO.completes, only: :resource).sort.first
|
780
|
+
|
781
|
+
if partial
|
782
|
+
tmp = partials.resolve(subject: partial) or
|
783
|
+
raise "Could not find partial #{partial}"
|
784
|
+
partial = tmp
|
785
|
+
transform = partial.transform
|
786
|
+
else
|
787
|
+
transform = RDF::SAK::Util.objects_for(
|
788
|
+
subject, RDF::SAK::TFO.transform, only: :resource).sort.first or
|
789
|
+
raise "Could not find a transform for #{subject}"
|
790
|
+
tmp = harness.resolve(transform) or
|
791
|
+
raise "Could not find transform #{transform}"
|
792
|
+
transform = tmp
|
793
|
+
|
794
|
+
params = transform.validate
|
795
|
+
|
796
|
+
# get params
|
797
|
+
params = {}
|
798
|
+
transform.keys.each do |p|
|
799
|
+
o = repo.query([subject, p, nil]).objects.uniq.sort
|
800
|
+
params[p] = o unless o.empty?
|
801
|
+
end
|
802
|
+
end
|
803
|
+
|
804
|
+
# get inputs and outputs
|
805
|
+
input = RDF::SAK::Util.objects_for(
|
806
|
+
subject, RDF::SAK::TFO.input, only: :resource).sort.first
|
807
|
+
output = RDF::SAK::Util.objects_for(
|
808
|
+
subject, RDF::SAK::TFO.output, only: :resource).sort.first
|
809
|
+
|
810
|
+
raise 'Data must have both input and output' unless input and output
|
811
|
+
elsif input and ((transform and params) or partial)
|
812
|
+
|
813
|
+
# XXX dispatch on partial only? smart? dumb?
|
814
|
+
if partial
|
815
|
+
transform = partial.transform
|
816
|
+
params = partial.params
|
817
|
+
else
|
818
|
+
# do transform
|
819
|
+
t = harness.resolve(transform) or
|
820
|
+
raise "Could not resolve transform #{transform}"
|
821
|
+
transform = t
|
822
|
+
|
823
|
+
# coerce/validate params
|
824
|
+
params = transform.validate params, defaults: false
|
825
|
+
|
826
|
+
# do partial
|
827
|
+
partial = partials.resolve transform: transform, params: params
|
828
|
+
end
|
829
|
+
|
830
|
+
# collect function application receipts
|
831
|
+
candidates = RDF::Query.new do
|
832
|
+
# note that there is no cost-based optimization so we write
|
833
|
+
# these in the order of least to most cardinality
|
834
|
+
pattern [:t, RDF::SAK::TFO.output, output] if output
|
835
|
+
pattern [:t, RDF::SAK::TFO.input, input]
|
836
|
+
end.execute(repo).map { |sol| sol[:t] }.compact.uniq.select do |s|
|
837
|
+
# this should say, try matching the partial if there is one
|
838
|
+
# to match, otherwise attempt to directly match the transform
|
839
|
+
if partial and repo.has_statement?(
|
840
|
+
RDF::Statement(s, RDF::SAK::TFO.completes, partial.subject))
|
841
|
+
true
|
842
|
+
elsif repo.has_statement?(
|
843
|
+
RDF::Statement(s, RDF::SAK::TFO.transform, transform.subject))
|
844
|
+
testp = transform.keys.map do |p|
|
845
|
+
o = repo.query([s, p, nil]).objects.uniq.sort
|
846
|
+
o.empty? ? nil : [p, o]
|
847
|
+
end.compact.to_h
|
848
|
+
|
849
|
+
testp = transform.validate testp, defaults: false, silent: true
|
850
|
+
testp == params
|
851
|
+
end
|
852
|
+
end.compact.uniq.sort
|
853
|
+
|
854
|
+
return if candidates.empty?
|
855
|
+
|
856
|
+
if candidates.size == 1
|
857
|
+
subject = candidates.first
|
858
|
+
else
|
859
|
+
# now we have the unlikely case that there are two identical
|
860
|
+
# records so we just sort em first by end date, then by
|
861
|
+
# start date, then lexically
|
862
|
+
subject = candidates.map do |s|
|
863
|
+
st, et = %i[startedAtTime endedAtTime].map do |p|
|
864
|
+
repo.query([s, RDF::Vocab::PROV[p], nil]).map do |stmt|
|
865
|
+
dt = stmt.object.object
|
866
|
+
dt if dt.is_a? DateTime
|
867
|
+
end.compact.sort.last
|
868
|
+
end
|
869
|
+
[s, st, et]
|
870
|
+
end.sort do |a, b|
|
871
|
+
# first check latest end-time, then check latest start-time
|
872
|
+
c = a[2] && b[2] ? b[2] <=> a[2] : 0
|
873
|
+
# if those two yield nothing, then sort lexically i guess
|
874
|
+
(c == 0 && a[1] && b[1]) ? b[1] <=> a[1] : a[0] <=> b[0]
|
875
|
+
end.first.first
|
876
|
+
end
|
877
|
+
else
|
878
|
+
raise ArgumentError,
|
879
|
+
'must have either a subject or transform + params + input'
|
880
|
+
end
|
881
|
+
|
882
|
+
# don't forget the output
|
883
|
+
output ||= repo.query(
|
884
|
+
[subject, RDF::SAK::TFO.output, nil]
|
885
|
+
).objects.select(&:uri?).sort.first
|
886
|
+
|
887
|
+
new subject, transform, input, output, partial || params
|
888
|
+
|
889
|
+
end
|
890
|
+
|
891
|
+
attr_reader :input, :output, :completes
|
892
|
+
|
893
|
+
# Create a new function application from whole cloth.
|
894
|
+
#
|
895
|
+
# @param subject [RDF::Resource]
|
896
|
+
# @param transform [RDF::Resource] the identifier for the transform
|
897
|
+
# @param input [RDF::Resource] the identifier for the input
|
898
|
+
# @param output [RDF::Resource] the identifier for the output
|
899
|
+
# @param params [Hash, RDF::SAK::Transform::Partial] the parameters
|
900
|
+
# or partial application that is completed
|
901
|
+
def initialize subject, transform, input, output, params = {},
|
902
|
+
start: nil, stop: nil
|
903
|
+
# params may be a partial
|
904
|
+
super subject, transform, params
|
905
|
+
|
906
|
+
@input = input
|
907
|
+
@output = output
|
908
|
+
@completes = params if params.is_a? RDF::SAK::Transform::Partial
|
909
|
+
@start = start
|
910
|
+
@stop = stop
|
911
|
+
end
|
912
|
+
|
913
|
+
# Returns the function application as an array of triples.
|
914
|
+
def to_triples
|
915
|
+
out = [] # .extend RDF::Enumerable
|
916
|
+
s = @subject
|
917
|
+
out << [s, RDF.type, RDF::SAK::TFO.Application]
|
918
|
+
|
919
|
+
if @start
|
920
|
+
start = @start.is_a?(RDF::Literal) ? @start : RDF::Literal(@start)
|
921
|
+
out << [s, RDF::Vocab::PROV.startedAtTime, start]
|
922
|
+
end
|
923
|
+
|
924
|
+
if @stop
|
925
|
+
stop = @stop.is_a?(RDF::Literal) ? @stop : RDF::Literal(@stop)
|
926
|
+
out << [s, RDF::Vocab::PROV.endedAtTime, stop]
|
927
|
+
end
|
928
|
+
|
929
|
+
if @completes
|
930
|
+
out << [s, RDF::SAK::TFO.completes, @completes.subject]
|
931
|
+
else
|
932
|
+
out << [s, RDF::SAK::TFO.transform, transform.subject]
|
933
|
+
pdup = transform.validate params, defaults: false, silent: true
|
934
|
+
pdup.each do |k, vals|
|
935
|
+
vals.each { |v| out << [s, k, v] }
|
936
|
+
end
|
937
|
+
end
|
938
|
+
|
939
|
+
out.map { |triples| RDF::Statement(*triples) }
|
940
|
+
end
|
941
|
+
|
942
|
+
def [](key)
|
943
|
+
# note complete is
|
944
|
+
(@completes || @params)[key]
|
945
|
+
end
|
946
|
+
|
947
|
+
def keys
|
948
|
+
(@completes || @params).keys
|
949
|
+
end
|
950
|
+
|
951
|
+
def params
|
952
|
+
@completes ? @completes.params : @params.dup
|
953
|
+
end
|
954
|
+
|
955
|
+
def transform
|
956
|
+
@completes ? @completes.transform : @transform
|
957
|
+
end
|
958
|
+
|
959
|
+
def completes? partial
|
960
|
+
@completes and partial and @completes == partial
|
961
|
+
end
|
962
|
+
|
963
|
+
def matches? params
|
964
|
+
return @completes.matches? params if @completes
|
965
|
+
super params
|
966
|
+
end
|
967
|
+
|
968
|
+
def ===(other)
|
969
|
+
return false unless other.is_a? Application
|
970
|
+
return false unless @input == other.input and @output == other.output
|
971
|
+
|
972
|
+
# now the comparand is either the partial or us
|
973
|
+
cmp = @completes || self
|
974
|
+
|
975
|
+
# and this should do it
|
976
|
+
other.transform == cmp.transform and other.matches? cmp.params
|
977
|
+
end
|
978
|
+
end
|
979
|
+
|
980
|
+
# XXX everything below this line is trash
|
981
|
+
|
982
|
+
def match_params repo, candidate, params = {}
|
983
|
+
# overwrite normalized params
|
984
|
+
params = params.transform_values do |v|
|
985
|
+
Set.new(v.respond_to?(:to_a) ? v.to_a : [v])
|
986
|
+
end
|
987
|
+
|
988
|
+
struct = {}
|
989
|
+
params.keys.each do |p|
|
990
|
+
repo.query([candidate, p, nil]) do |stmt|
|
991
|
+
x = struct[stmt.predicate] ||= Set.new
|
992
|
+
x << stmt.object
|
993
|
+
end
|
994
|
+
end
|
995
|
+
end
|
996
|
+
|
997
|
+
# Resolve a transformation application function in the repository
|
998
|
+
# with the given inputs and outputs.
|
999
|
+
#
|
1000
|
+
# XXX note that this thing in its current state will not distinguish
|
1001
|
+
# between two different function applications that happen to map the
|
1002
|
+
# same input to the same output, but with different scalar
|
1003
|
+
# parameters. For example, the `subtree` function could be given two
|
1004
|
+
# different XPath queries but return the same subtree.
|
1005
|
+
#
|
1006
|
+
def resolve_transformation repo, transform, input, output = nil,
|
1007
|
+
graph: nil, params: {}, partials: {}
|
1008
|
+
|
1009
|
+
# overwrite normalized params XXX replace this with something real
|
1010
|
+
params = params.transform_values do |v|
|
1011
|
+
Set.new(v.respond_to?(:to_a) ? v.to_a : [v])
|
1012
|
+
end
|
1013
|
+
|
1014
|
+
# first we check the cache of partials to see if there is one that
|
1015
|
+
# matches our parameters. we want to use trasns
|
1016
|
+
partial = partials.values.select do |p|
|
1017
|
+
p.transform == transform and p.matches? params
|
1018
|
+
end.sort.first
|
1019
|
+
|
1020
|
+
# find the partial if there is one
|
1021
|
+
unless partial
|
1022
|
+
partial = Partial.resolve transform: transform, params: params
|
1023
|
+
# argh this isn't right; it should be partials[transform][params]
|
1024
|
+
partials[partial.subject] = partial if partial
|
1025
|
+
end
|
1026
|
+
|
1027
|
+
# collect function application receipts
|
1028
|
+
candidates = RDF::Query.new do
|
1029
|
+
# note that there is no cost-based optimization so we write
|
1030
|
+
# these in the order of least to most cardinality
|
1031
|
+
pattern [:t, RDF::SAK::TFO.output, output]
|
1032
|
+
pattern [:t, RDF::SAK::TFO.input, input]
|
1033
|
+
end.execute(repo).map { |sol| sol[:t] }.compact.uniq.select do |s|
|
1034
|
+
repo.has_statement?(
|
1035
|
+
RDF::Statement(s, RDF::SAK::TFO.transform, transform)) or
|
1036
|
+
partial && repo.has_statement?(
|
1037
|
+
RDF::Statement(s, RDF::SAK::TFO.completes, partial))
|
1038
|
+
end.compact.uniq
|
1039
|
+
|
1040
|
+
# first will be nil if this is empty so voila
|
1041
|
+
return candidates.first unless candidates.size > 1
|
1042
|
+
|
1043
|
+
# now we have the unlikely case that there are two identical records
|
1044
|
+
candidates.map do |s|
|
1045
|
+
st, et = %i[startedAtTime endedAtTime].map do |p|
|
1046
|
+
repo.query([s, RDF::Vocab::PROV[p], nil]) do |stmt|
|
1047
|
+
dt = stmt.object.object
|
1048
|
+
dt if dt.is_a? DateTime
|
1049
|
+
end.compact.sort.last
|
1050
|
+
end
|
1051
|
+
[s, st, et]
|
1052
|
+
end.sort do |a, b|
|
1053
|
+
# first check latest end-time, then check latest start-time
|
1054
|
+
c = a[2] && b[2] ? b[2] <=> a[2] : 0
|
1055
|
+
# if those two yield nothing, then sort lexically i guess
|
1056
|
+
(c == 0 && a[1] && b[1]) ? b[1] <=> a[1] : a[0] <=> b[0]
|
1057
|
+
end.first.first
|
1058
|
+
end
|
1059
|
+
|
1060
|
+
def record_application repo, transform, input, output, start, finish,
|
1061
|
+
partial: false, graph: nil, subject: nil, params: {}
|
1062
|
+
end
|
1063
|
+
|
1064
|
+
# get transform
|
1065
|
+
def get_partial_transform repo, function, params = {}
|
1066
|
+
temp = {}
|
1067
|
+
RDF::Query.new do
|
1068
|
+
pattern [:s, RDF.type, RDF::SAK::TFO.Partial]
|
1069
|
+
pattern [:s, RDF::SAK::TFO.transform, function]
|
1070
|
+
params.keys.each { |k| pattern [:s, k, nil] }
|
1071
|
+
end.execute(repo).each do |sol|
|
1072
|
+
t = temp[sol[:s]] ||= {}
|
1073
|
+
params.keys.each do |k|
|
1074
|
+
# make these a set for now cause we don't care about the
|
1075
|
+
t[k] = Set.new(repo.query([sol[:s], k, nil]).objects)
|
1076
|
+
end
|
1077
|
+
end
|
1078
|
+
|
1079
|
+
# now we imagine massaging the candidates' parameters so they
|
1080
|
+
# match the input (eg sets/arrays or whatever)
|
1081
|
+
|
1082
|
+
# (in this case the input params are made to match the retrieved params)
|
1083
|
+
newp = params.transform_values do |v|
|
1084
|
+
Set.new(v.respond_to?(:to_a) ? v.to_a : [v])
|
1085
|
+
end
|
1086
|
+
|
1087
|
+
# sort this because we want it to return the same thing every time
|
1088
|
+
# if there are multiples for some reason
|
1089
|
+
temp.keys.sort.each do |k|
|
1090
|
+
# do a cheaper comparison first
|
1091
|
+
next unless temp[k].keys.sort == params.keys.sort
|
1092
|
+
#
|
1093
|
+
return k if temp[k] == newp
|
1094
|
+
end
|
1095
|
+
|
1096
|
+
nil
|
1097
|
+
end
|
1098
|
+
|
1099
|
+
class XPath < RDF::SAK::Transform
|
1100
|
+
protected
|
1101
|
+
|
1102
|
+
def execute input, parsed = nil, params
|
1103
|
+
xpath = params.fetch(:xpath, []).first or raise
|
1104
|
+
prefix = params.fetch(:prefix, []).map do |x|
|
1105
|
+
x.value.split(/\s*:\s*/, 2)
|
1106
|
+
end.to_h.transform_keys(&:to_sym)
|
1107
|
+
reindent = (params.fetch(:reindent).first || RDF::Literal(true)).object
|
1108
|
+
|
1109
|
+
begin
|
1110
|
+
parsed ||= Nokogiri.XML input
|
1111
|
+
rescue Nokogiri::SyntaxError
|
1112
|
+
# XXX i dunno, raise?
|
1113
|
+
return
|
1114
|
+
end
|
1115
|
+
|
1116
|
+
doc = RDF::SAK::Util.subtree parsed,
|
1117
|
+
xpath.value, prefixes: prefix, reindent: reindent
|
1118
|
+
|
1119
|
+
return unless doc
|
1120
|
+
|
1121
|
+
[doc.to_xml, doc]
|
1122
|
+
end
|
1123
|
+
|
1124
|
+
public
|
1125
|
+
|
1126
|
+
def implemented?
|
1127
|
+
true
|
1128
|
+
end
|
1129
|
+
end
|
1130
|
+
|
1131
|
+
class XSLT < RDF::SAK::Transform
|
1132
|
+
protected
|
1133
|
+
|
1134
|
+
def init_implementation harness
|
1135
|
+
root = harness.root
|
1136
|
+
raise ArgumentError,
|
1137
|
+
"Need a root to initialize the implementation" unless root
|
1138
|
+
root = Pathname(root).expand_path unless root.is_a? Pathname
|
1139
|
+
raise ArgumentError, "#{root} is not a readable directory" unless
|
1140
|
+
root.directory? and root.readable?
|
1141
|
+
|
1142
|
+
# XXX this assumes this is a file URI but so far that is the
|
1143
|
+
# only way we get here
|
1144
|
+
filename = root + implementation.path
|
1145
|
+
raise ArgumentError, "#{filename} is not a readable file" unless
|
1146
|
+
filename.file? and filename.readable?
|
1147
|
+
@sheet = Nokogiri::XSLT(filename.read)
|
1148
|
+
end
|
1149
|
+
|
1150
|
+
def execute input, parsed = nil, params
|
1151
|
+
begin
|
1152
|
+
parsed ||= Nokogiri.XML input
|
1153
|
+
rescue Nokogiri::SyntaxError
|
1154
|
+
# XXX i dunno, raise?
|
1155
|
+
return
|
1156
|
+
end
|
1157
|
+
|
1158
|
+
# XXX do we wanna allow params?
|
1159
|
+
out = @sheet.transform parsed
|
1160
|
+
|
1161
|
+
# now return string and still-parsed
|
1162
|
+
[@sheet.serialize(out), out]
|
1163
|
+
end
|
1164
|
+
|
1165
|
+
public
|
1166
|
+
|
1167
|
+
def implemented?
|
1168
|
+
true
|
1169
|
+
end
|
1170
|
+
|
1171
|
+
end
|
1172
|
+
end
|