rdf-sak 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,1172 @@
1
+ require 'rdf'
2
+ require 'rdf/vocab'
3
+ require 'rdf/sak/tfo'
4
+ require 'rdf/sak/util'
5
+ require 'set'
6
+ require 'mimemagic'
7
+ require 'http/negotiate'
8
+ require 'time'
9
+
10
+ # This class encapsulates a specification for an individual
11
+ # transformation function, including its parameter spec, accepted and
12
+ # returned types, identity, and implementation.
13
+ #
14
+ class RDF::SAK::Transform
15
+ # mkay basically this transformation function stuff got too hairy to
16
+ # just do ad-hoc so i guess i'm doing this now
17
+
18
+ private
19
+
20
+ def self.numeric_objects repo, subject, predicate, entail: false
21
+ RDF::SAK::Util.objects_for(repo, subject, predicate, entail: entail,
22
+ only: :literal).map(&:object).select { |c| c.is_a? Numeric }.sort
23
+ end
24
+
25
+ def self.gather_params repo, subject
26
+ params = {}
27
+ RDF::SAK::Util.objects_for(repo, subject, RDF::SAK::TFO.parameter,
28
+ entail: false, only: :resource).each do |ps|
29
+ param = params[ps] ||= {}
30
+
31
+ # slug/identifier
32
+ if id = RDF::SAK::Util.objects_for(
33
+ repo, ps, RDF::Vocab::DC.identifier, only: :literal).sort.first
34
+ param[:id] = id.value.to_sym
35
+ end
36
+
37
+ # rdfs:range
38
+ range = RDF::SAK::Util.objects_for(
39
+ repo, ps, RDF::RDFS.range, only: :resource)
40
+ param[:range] = range.to_set unless range.empty?
41
+
42
+ # default = RDF::SAK::Util
43
+ param[:default] = RDF::SAK::Util.objects_for(
44
+ repo, ps, RDF::SAK::TFO.default)
45
+
46
+ # cardinalities
47
+ param[:minc] = 0
48
+ param[:maxc] = Float::INFINITY
49
+
50
+ if c0 = numeric_objects(repo, ps, RDF::OWL.cardinality).first
51
+ param[:minc] = param[:maxc] = c0
52
+ else
53
+ if c1 = numeric_objects(repo, ps, RDF::OWL.minCardinality).first
54
+ param[:minc] = c1
55
+ end
56
+ if c2 = numeric_objects(repo, ps, RDF::OWL.maxCardinality).first
57
+ param[:maxc] = c2
58
+ end
59
+ end
60
+ end
61
+
62
+ params
63
+ end
64
+
65
+ def self.gather_accepts_returns repo, subject, raw: false, returns: false
66
+ literals = []
67
+ lists = []
68
+ pred = RDF::SAK::TFO[returns ? 'returns' : 'accepts']
69
+ repo.query([subject, pred, nil]).objects.each do |o|
70
+ if o.literal?
71
+ literals << o
72
+ else
73
+ lists << RDF::List.from(repo, o).to_a
74
+ end
75
+ end
76
+ # this is mainly to give us consistent results
77
+ out = (lists.sort.flatten + literals.sort).uniq
78
+ # raw as in raw literals
79
+ raw ? out : out.map(&:value)
80
+ end
81
+
82
+ protected
83
+
84
+ # Initialize the implementation. Does nothing in the base
85
+ # class. Return value is ignored.
86
+ #
87
+ # @param harness [RDF::SAK::Transform::Harness] the harness
88
+ #
89
+ def init_implementation harness
90
+ end
91
+
92
+ public
93
+
94
+ # Resolve a transform out of the repository. Optionally supply a
95
+ # block to resolve any implementation associated with the transform.
96
+ #
97
+ # @param harness [RDF::SAK::Transform::Harness] the harness
98
+ # @param subject [RDF::Resource]
99
+ def self.resolve harness, subject
100
+ # noop
101
+ return subject if subject.is_a? self
102
+
103
+ repo = harness.repo
104
+
105
+ asserted = RDF::SAK::Util.objects_for repo, subject,
106
+ RDF.type, only: :resource
107
+
108
+ return if
109
+ (asserted & RDF::SAK::Util.all_related(RDF::SAK::TFO.Transform)).empty?
110
+
111
+ params = gather_params repo, subject
112
+
113
+ plist = if pl = RDF::SAK::Util.objects_for(repo, subject,
114
+ RDF::SAK::TFO['parameter-list'], only: :resource).sort.first
115
+ RDF::List.from(repo, pl).to_a
116
+ else
117
+ params.keys.sort
118
+ end
119
+
120
+ accepts = gather_accepts_returns repo, subject
121
+ returns = gather_accepts_returns repo, subject, returns: true
122
+
123
+ tclass = self
124
+
125
+ # XXX this is all dumb but it has to be this way for now
126
+
127
+ if impl = RDF::SAK::Util.objects_for(repo, subject,
128
+ RDF::SAK::TFO.implementation, only: :uri).sort.first
129
+ case impl.to_s
130
+ when /^file:/i then
131
+ # XXX redo this later
132
+ if /xsl/i.match? MimeMagic.by_path(impl.path.to_s).to_s
133
+ tclass = RDF::SAK::Transform::XSLT
134
+ end
135
+ when /^urn:x-ruby:(.*)$/i then
136
+ cn = $1
137
+ begin
138
+ cs = Object.const_get cn
139
+ tclass = cs
140
+ rescue NameError, e
141
+ raise NotImplementedError,
142
+ "Could not locate implementation for #{impl}!"
143
+ end
144
+ end
145
+ end
146
+
147
+ tclass.new subject, params: params, param_list: plist, accepts: accepts,
148
+ returns: returns, implementation: impl, harness: harness
149
+ end
150
+
151
+ def self.coerce_params params
152
+ # this idiom is everywhere
153
+ params.transform_values do |v|
154
+ Set.new(v.respond_to?(:to_a) ? v.to_a : [v]) unless v.nil?
155
+ end
156
+ end
157
+
158
+ attr_reader :subject
159
+
160
+ # Initialize a transform from data.
161
+ # @param subject [RDF::Resource]
162
+ # @param harness [RDF::SAK::Transform::Harness]
163
+ # @param params [Hash]
164
+ # @param param_list [Array]
165
+ # @param accepts [Array]
166
+ # @param returns [Array]
167
+ # @param implementation [RDF::Resource]
168
+ #
169
+ def initialize subject, harness: nil, params: {}, param_list: [],
170
+ accepts: %w[*/*], returns: %w[*/*], implementation: nil
171
+ @subject = subject.dup.freeze
172
+ @params = params.freeze
173
+ @plist = (param_list.empty? ? params.keys.sort : param_list.dup).freeze
174
+ @pcache = params.map { |k, v| [v[:id], k] }.to_h.freeze
175
+ @accepts = (accepts.respond_to?(:to_a) ? accepts.to_a : [accepts]).freeze
176
+ @returns = (returns.respond_to?(:to_a) ? returns.to_a : [returns]).freeze
177
+ @impl = implementation.freeze
178
+
179
+ # initialize the implementation
180
+ init_implementation harness
181
+ end
182
+
183
+ # Return the identifier of the implementation.
184
+ #
185
+ # @return [RDF::URI]
186
+ #
187
+ def implementation
188
+ @impl
189
+ end
190
+
191
+ # True if this transform is *actually* implemented.
192
+ #
193
+ # @return [false, true]
194
+ #
195
+ def implemented?
196
+ false
197
+ end
198
+
199
+ # True if the transform accepts the given Content-Type.
200
+ #
201
+ # @param type [String] the content type to test
202
+ # @return [false, true] wh
203
+ #
204
+ def accepts? type
205
+ # construct the variants: this gives us a stack of all the types
206
+ # all the way up to the top, then turns it into a hash of faux
207
+ # variants. this will ensure the negotiate algorithm will return a
208
+ # value if the transform function can handle the type, even if it
209
+ # does not explicitly mention it (e.g. if the transform specifies
210
+ # it accepts application/xml and you hand it application/xhtml+xml)
211
+ variants = RDF::SAK::MimeMagic.new(type).lineage.map do |t|
212
+ # the key can be anything as long as it's unique since it ends
213
+ # up as a hash
214
+ [t.to_s, [1, t.to_s]]
215
+ end.to_h
216
+
217
+ # construct the pseudo-header
218
+ accept = @accepts.dup
219
+ accept << '*/*;q=0' unless accept.include? '*/*'
220
+ accept = { Accept: accept.join(', ') }
221
+
222
+ # we only care *if* this returns something, not *what*
223
+ !!HTTP::Negotiate.negotiate(accept, variants)
224
+ end
225
+
226
+ # Return the parameter list, or a sorted list of parameter keys in lieu
227
+ #
228
+ # @return [Array]
229
+ #
230
+ def keys
231
+ # XXX this should be unique to begin with. what is going on here?
232
+ # tests mysteriously started failing and the output was duplicated
233
+ @plist.uniq
234
+ end
235
+
236
+ # Retrieve a parameter spec, either by its fully-qualified URI or
237
+ # its `dct:identifier`.
238
+ #
239
+ # @param key [RDF::Resource,Symbol,String] the parameter URI or its identifier
240
+ # @return [Hash] the parameter spec
241
+ #
242
+ def [](key)
243
+ out = case key
244
+ when RDF::Resource then @params[key]
245
+ when Symbol then @params[@pcache[key]]
246
+ when String
247
+ @params[@pcache[key.to_sym]] || @params[RDF::URI(key)]
248
+ end
249
+ # add the key to the group
250
+ out.merge({ uri: key }) if out
251
+ end
252
+
253
+ # XXX kill this
254
+ def lint params
255
+ raise ArgumentError, "params must be a hash, not #{params.class}" unless
256
+ params.is_a? Hash
257
+ params.keys.sort == keys
258
+ end
259
+
260
+ # Return the validated parameters or raise an exception.
261
+ #
262
+ # @param params [Hash] the hash of parameters
263
+ # @param symbols [false, true] whether the keys should be symbols or URIs
264
+ # @param defaults [true, false] whether to supplant the defaults
265
+ # @param silent [false, true] return nil rather than raise if true
266
+ # @return [Hash] the validated parameters
267
+ #
268
+ def validate params, symbols: false, defaults: true, silent: false
269
+ # duplicate so we can delete from it
270
+ params = params.dup
271
+ out = {}
272
+
273
+ # note the instance variable vs the argument
274
+ @params.each do |k, spec|
275
+ v = params.delete(k) || params.delete(spec[:id]) || []
276
+ v = (v.respond_to?(:to_a) ? v.to_a : [v]).map do |v|
277
+ case v
278
+ when RDF::Term then v
279
+ when URI then RDF::URI(v.to_s)
280
+ when nil then RDF::nil
281
+ else
282
+ range = spec[:range] || []
283
+ if r = range.select(&:datatype?) and !r.empty?
284
+ r = r.to_a.sort
285
+ "multiple ranges; arbitrarily picking #{r.first}" if
286
+ r.size > 1
287
+ RDF::Literal(v, datatype: r.first)
288
+ elsif v.is_a? String and r = range.reject(&:datatype?) and !r.empty?
289
+ if m = /^_:(.+)$/.match(v)
290
+ RDF::Node(m[1])
291
+ else
292
+ RDF::URI(v)
293
+ end
294
+ else
295
+ RDF::Literal(v)
296
+ end
297
+ end
298
+ end
299
+
300
+ # XXX one day we should check types but not today
301
+
302
+ # give us the default(s) then
303
+ v = spec[:default].dup if v.empty? and spec[:default]
304
+
305
+ # but we *will* check the cardinality
306
+ minc = spec.fetch :minc, 0
307
+ maxc = spec.fetch :maxc, Float::INFINITY
308
+
309
+ raise ArgumentError, "Parameter #{k} must have at least"\
310
+ " #{minc} value#{minc == 1 ? '' : ?s }" if v.size < minc
311
+ raise ArgumentError, "Parameter #{k} must have at most"\
312
+ " #{maxc} value#{maxc == 1 ? '' : ?s }" if v.size > maxc
313
+ # XXX if cardinality == 1 should we set v to v.first? dunno
314
+
315
+ # now overwrite k
316
+ k = spec[:id] || k.to_s if symbols
317
+
318
+ out[k] = v unless !defaults and v == spec[:default]
319
+ end
320
+
321
+ # if params are not empty then this is an error
322
+ unless params.empty?
323
+ return if silent
324
+ raise ArgumentError,
325
+ "Unrecognized parameters #{params.keys.join ', '}"
326
+ end
327
+
328
+ out
329
+ end
330
+
331
+ # Check the parameters and apply the function, then check the
332
+ # output. Parameters are checked with {#validate} for key
333
+ # resolution, cardinality, range, and type.
334
+ #
335
+ # @param input [String,IO,#to_s,#read] Something bytelike
336
+ # @param params [Hash,RDF::SAK::Transform::Partial] the instance parameters
337
+ # @param parsed [Object] the already-parsed object, if applicable
338
+ # @param type [String] the content-type of the input
339
+ # @param accept [String] a string in the form of an Accept header
340
+ # @yieldparam output [String,IO] the output
341
+ # @yieldparam parseout [Object] the parsed output, if applicable
342
+ # @return [#to_s, Object] the serialized output (and parsed if applicable)
343
+ #
344
+ def apply input, params = {}, parsed: nil,
345
+ type: 'application/octet-stream', accept: '*/*', &block
346
+ raise NotImplementedError, "Transform #{@id} is not implemented!" unless
347
+ implemented?
348
+
349
+ # XXX validate accept or explode
350
+ mimetypes = HTTP::Negotiate.negotiate({ Accept: accept },
351
+ @returns.map { |t| [t, [1, t]] }.to_h, all: true) or return
352
+
353
+ # this will succeed or explode
354
+ params = validate params, symbols: true
355
+
356
+ # run the transform
357
+ out, parseout = execute input, parsed, params
358
+
359
+ # bail out if nothing was returned
360
+ return unless out
361
+
362
+ # now run the block if present
363
+ block.call out, parseout if block
364
+
365
+ # return it to the caller
366
+ [out, parseout]
367
+ end
368
+
369
+ # This class implements a cache for partial transformation function
370
+ # applications, which bundle transforms with a set of instance
371
+ # parameters under a reusable identity.
372
+ class PartialCache
373
+ private
374
+
375
+ def coerce_params params
376
+ RDF::SAK::Transform.coerce_params params
377
+ end
378
+
379
+ public
380
+
381
+ # Initialize the cache with all partials pre-loaded.
382
+ #
383
+ # @param harness [RDF::SAK::Transform::Harness] the transform harness
384
+ # @return [RDF::SAK::Transform::PartialCache] the instance
385
+ #
386
+ def self.load harness
387
+ new(harness).load
388
+ end
389
+
390
+ attr_reader :harness
391
+
392
+ # Initialize an empty cache.
393
+ # @param harness [RDF::SAK::Transform::Harness] the parent harness.
394
+ #
395
+ def initialize harness
396
+ @harness = harness
397
+ @cache = {}
398
+ @mapping = {}
399
+ @transforms = {}
400
+ end
401
+
402
+ # Load an initialized partial cache.
403
+ #
404
+ # @return [self] daisy-chainable self-reference
405
+ #
406
+ def load
407
+ RDF::SAK::Util.subjects_for(repo, RDF.type,
408
+ RDF::SAK::TFO.Partial).each do |s|
409
+ resolve subject: s
410
+ end
411
+
412
+ # return self to daisy-chain
413
+ self
414
+ end
415
+
416
+ def partials
417
+ @cache.keys.select { |x| x.is_a? RDF::Resource }
418
+ end
419
+
420
+ def repo
421
+ @harness.repo
422
+ end
423
+
424
+ def transforms
425
+ @transforms.dup
426
+ end
427
+
428
+ # Retrieve a Partial from the cache based on its
429
+ def get transform, params
430
+ ts = case transform
431
+ when RDF::SAK::Transform then transform.subject
432
+ when RDF::URI
433
+ # XXX transforms resolved here may not get implemented
434
+ transform = RDF::SAK::Transform.resolve @repo, transform
435
+ transform.subject
436
+ else
437
+ raise ArgumentError, "Don't know what to do with #{transform}"
438
+ end
439
+
440
+ # return direct cache entry if transform is really the subject
441
+ return @cache[ts] if @cache.key?
442
+
443
+ # otherwise return the mapping
444
+ @mapping[transform][coerce_params params]
445
+ end
446
+
447
+ # Resolves a partial either by subject or by transform + parameter
448
+ # set.
449
+ #
450
+ # @param subject [RDF::URI] The subject URI of the partial
451
+ # @param transform [RDF::URI,RDF::SAK::Transform] the transform
452
+ # @param params [Hash] an instance of parameters
453
+ # @return [RDF::SAK::Transform::Partial]
454
+ #
455
+ def resolve subject: nil, transform: nil, params: {}
456
+ if subject
457
+ if subject.is_a? RDF::SAK::Transform::Partial
458
+ # snag the transform
459
+ transform = @harness.resolve(subject.transform) or
460
+ raise 'Could not resolve the transform associated with ' +
461
+ subject.subject
462
+
463
+ # mkay now add this to the cache
464
+ t = @mapping[transform.subject] ||= {} # lol got all that?
465
+ @cache[subject.subject] ||= t[subject.params] ||= subject
466
+ else
467
+ # resolve the partial
468
+ partial = @cache[subject] || RDF::SAK::Transform::Partial.resolve(
469
+ @harness, subject: subject) or return
470
+
471
+ # initialize the mapping if not present
472
+ t = @mapping[partial.transform.subject] ||= {}
473
+
474
+ # off we go
475
+ @cache[subject] ||= t[partial.params] ||= partial
476
+ end
477
+ elsif transform
478
+ transform = @harness.resolve transform unless
479
+ transform.is_a? RDF::SAK::Transform
480
+
481
+ params = transform.validate params, defaults: false
482
+
483
+ # note the *presence* of the key means the cache item has been
484
+ # checked already; its *value* may be nil
485
+ t = @mapping[transform.subject] ||= {}
486
+ return t[params] if t.key? params
487
+
488
+ # try to resolve the partial
489
+ partial = RDF::SAK::Transform::Partial.resolve(@harness,
490
+ transform: transform, params: params) or return
491
+
492
+ # update the caches
493
+ @cache[partial.subject] = t[params] = partial
494
+ end
495
+ end
496
+ end
497
+
498
+ # This class is the main harness for holding all the transforms and
499
+ # operating over them. This is the primary interface through which
500
+ # we manipulate transforms.
501
+ class Harness
502
+
503
+ attr_reader :partials, :repo, :root
504
+
505
+ # Create a new harness instance.
506
+ #
507
+ # @param repo [RDF::Repository] the repository to find RDF data
508
+ # @param root [String,Pathname] the root directory for implementations
509
+ #
510
+ def initialize repo, root
511
+ raise ArgumentError,
512
+ "repo is #{repo.class}, not an RDF::Repository" unless
513
+ repo.is_a? RDF::Repository
514
+ @repo = repo
515
+ @root = Pathname(root).expand_path
516
+ raise ArgumentError, "Root #{@root} does not exist" unless
517
+ @root.directory? and @root.readable?
518
+ @cache = {}
519
+ @partials = RDF::SAK::Transform::PartialCache.new self
520
+ end
521
+
522
+ # Bootstrap all the transforms.
523
+ #
524
+ # @param repo [RDF::Repository] the repository to find RDF data
525
+ # @param root [String,Pathname] the root directory for implementations
526
+ # @return [RDF::SAK::Transform::Harness] the harness instance
527
+ def self.load repo, root
528
+ self.new(repo, root).load
529
+ end
530
+
531
+ # Load transforms into an existing instance
532
+ # @return [Array] the transforms
533
+ def load
534
+ RDF::SAK::Util.subjects_for(@repo, RDF.type,
535
+ RDF::SAK::TFO.Transform, only: :resource).each do |subject|
536
+ resolve subject
537
+ end
538
+
539
+ # return self so we can daisy-chain
540
+ self
541
+ end
542
+
543
+ # Return all cached Transform identities.
544
+ #
545
+ # @return [Array] the URIs of known Transforms
546
+ #
547
+ def transforms
548
+ @cache.keys.sort
549
+ end
550
+
551
+ # Resolve a Transform based on its URI.
552
+ #
553
+ # @param subject [RDF::Resource] the identifier for the transform.
554
+ # @return [RDF::SAK::Transform] the Transform, if present.
555
+ #
556
+ def resolve subject
557
+ return @cache[subject] if @cache[subject]
558
+ # XXX raise???
559
+ transform =
560
+ RDF::SAK::Transform.resolve(self, subject) or return
561
+ @cache[subject] = transform
562
+ end
563
+
564
+ # Resolve a Partial based on either its subject URI or the
565
+ # transform-params pair.
566
+ #
567
+ # @param subject [RDF::Resource] the Partial's subject
568
+ # @param transform [RDF::Resource,RDF::SAK::Transform] the transform
569
+ # @param params [Hash] an instance of parameters
570
+ # @return [RDF::SAK::Transform::Partial] the Partial, if present
571
+ #
572
+ def resolve_partial subject: nil, transform: nil, params: nil
573
+ partials.resolve subject: subject, transform: transform, params: params
574
+ end
575
+
576
+ # Resolve a total function application record based on either its
577
+ # subject URI, a transform-params pair, or a Partial.
578
+ #
579
+ # @param subject [RDF::Resource] the Application's subject
580
+ # @param transform [RDF::Resource,RDF::SAK::Transform] the Transform
581
+ # @param params [Hash] an instance of parameters
582
+ # @param partial [RDF::Resource,RDF::SAK::Transform::Partial] a Partial
583
+ # @return [RDF::SAK::Transform::Application] the Application, if present
584
+ #
585
+ def resolve_application subject: nil, transform: nil, params: {},
586
+ partial: nil, input: nil, output: nil
587
+ RDF::SAK::Transform::Application.resolve self, subject: subject,
588
+ transform: transform, params: params, partial: partial,
589
+ input: input, output: output
590
+ end
591
+
592
+ # Returns true if the Application with the given subject URI
593
+ # matches either the transform-params pair, or a partial.
594
+ #
595
+ # @param subject [RDF::Resource,RDF::SAK::Transform::Application]
596
+ # the application
597
+ # @param transform [RDF::Resource,RDF::SAK::Transform] the transform
598
+ # @param params [Hash] an instance of parameters
599
+ # @param partial [RDF::Resource,RDF::SAK::Transform::Partial] a partial
600
+ # @return [true, false] whether or not the application matches
601
+ #
602
+ def application_matches? subject, transform: nil, params: {}, partial: nil
603
+
604
+ # unbundle the params; partial overrides transform+params
605
+ if partial
606
+ partial = resolve_partial partial unless
607
+ partial.is_a? RDF::SAK::Transform::Partial
608
+ transform = partial.transform
609
+ params = partial.params
610
+ else
611
+ transform = resolve transform unless
612
+ transform.is_a? RDF::SAK::Transform
613
+ params = transform.validate params
614
+ end
615
+
616
+ if subject.is_a? RDF::SAK::Transform::Application
617
+ return true if partial and subject.completes? partial
618
+ return true if
619
+ subject.transform == transform and subject.matches? params
620
+ else
621
+ # this should say, try matching the partial if there is one
622
+ # to match, otherwise attempt to directly match the transform
623
+ return true if partial and repo.has_statement?(
624
+ RDF::Statement(subject, RDF::SAK::TFO.completes, partial.subject))
625
+
626
+ if repo.has_statement?(
627
+ RDF::Statement(subject, RDF::SAK::TFO.transform, transform.subject))
628
+ testp = transform.keys.map do |p|
629
+ o = repo.query([subject, p, nil]).objects.uniq.sort
630
+ o.empty? ? nil : [p, o]
631
+ end.compact.to_h
632
+
633
+ # this will clear any explicit declarations of defaults
634
+ testp = transform.validate testp, defaults: false, silent: true
635
+ # true means it matches
636
+ return testp == params
637
+ end
638
+ end
639
+
640
+ false
641
+ end
642
+ end
643
+
644
+ class Partial
645
+ # Resolve a partial function application with the given parameters.
646
+ #
647
+ # @param harness [RDF::SAK::Transform::Harness] the harness
648
+ # @param subject [RDF::Resource] the identity of the partial
649
+ # @param transform [RDF::Resource] the identity of the transform
650
+ # @param params [Hash] key-value pairs
651
+ def self.resolve harness, subject: nil, transform: nil, params: {}
652
+ raise ArgumentError, 'Must supply either a subject or a transform' unless
653
+ subject or transform
654
+
655
+ repo = harness.repo
656
+
657
+ # coerce the transform to a Transform object if it isn't already
658
+ if transform
659
+ transform = harness.resolve(transform) or
660
+ return unless transform.is_a?(RDF::SAK::Transform)
661
+ elsif subject.is_a? RDF::URI
662
+ # locate the transform if given the subject
663
+ transform = RDF::SAK::Util.objects_for(repo, subject,
664
+ RDF::SAK::TFO.transform, only: :resource).first or return
665
+ transform = harness.resolve(transform) or return
666
+ warn transform
667
+ end
668
+
669
+ # obtain the subject for the given parameters
670
+ if subject
671
+ params = {}
672
+ transform.keys.each do |p|
673
+ o = repo.query([subject, p, nil]).objects.uniq.sort
674
+ params[p] = o unless o.empty?
675
+ end
676
+ else
677
+ params = transform.validate params, symbols: false, defaults: false
678
+
679
+ candidates = RDF::Query.new do
680
+ # XXX we should sort parameters by longest value since
681
+ # longer values will probably be less common; anyway this is
682
+ # gonna all need to be rethought
683
+ params.each { |p, objs| objs.each { |o| pattern [:s, p, o] } }
684
+ pattern [:s, RDF.type, RDF::SAK::TFO.Partial]
685
+ pattern [:s, RDF::SAK::TFO.transform, transform.subject]
686
+
687
+ # add any remaining parameters
688
+ # XXX this actually messes up; we don't want this
689
+ # (transform.keys - params.keys.sort).each { |r| pattern [:s, r, nil] }
690
+ end.execute(repo).map { |sol| [sol[:s], {}] }.to_h
691
+
692
+ # warn "yo #{transform.subject} #{params} #{candidates}"
693
+
694
+ # this is ruby being cheeky
695
+ candidates.select! do |s, ps|
696
+ transform.keys.each do |p|
697
+ o = repo.query([s, p, nil]).objects.uniq.sort
698
+ ps[p] = o unless o.empty?
699
+ end
700
+ ps == params
701
+ end
702
+
703
+ return if candidates.empty?
704
+
705
+ # sort it so we always get the same thing
706
+ subject = candidates.keys.sort.first
707
+ params = candidates[subject]
708
+ end
709
+
710
+ self.new subject, transform, params
711
+ end
712
+
713
+ attr_reader :subject, :transform
714
+
715
+ def initialize subject, transform, params = {}
716
+ raise ArgumentError, 'transform must be a Transform' unless
717
+ transform.is_a? RDF::SAK::Transform
718
+ @subject = subject
719
+ @transform = transform
720
+ @params = transform.validate params unless
721
+ params.is_a? RDF::SAK::Transform::Partial
722
+ end
723
+
724
+ def [](key)
725
+ @params[key]
726
+ end
727
+
728
+ def keys
729
+ @params.keys
730
+ end
731
+
732
+ def params
733
+ @params.dup
734
+ end
735
+
736
+ def matches? params
737
+ @params == @transform.validate(params)
738
+ end
739
+
740
+ def ===(other)
741
+ return false unless other.is_a? RDF::SAK::Transform::Partial
742
+ transform == other.transform and matches? other.params
743
+ end
744
+
745
+ def ==(other)
746
+ self === other and subject == other.subject
747
+ end
748
+ end
749
+
750
+ # A record of a transformation function application.
751
+ # @note "Application" as in to "apply" a function, not an "app".
752
+ class Application < Partial
753
+ # Resolve a particular function Application from the repository.
754
+ # Either resolve by subject, or resolve by a transform + parameter
755
+ # + input set. Applications that complete Partials will be
756
+ # automatically resolved.
757
+ #
758
+ # @param harness [RDF::SAK::Transform::Harness] the harness
759
+ # @param subject [RDF::Resource] the subject
760
+ # @param transform [RDF::Resource,RDF::SAK::Transform] the transform
761
+ # @param params [Hash] an instance of parameters
762
+ # @param input [RDF::Resource] the Application's input
763
+ # @param output [RDF::Resource] the Application's output
764
+ # @return [RDF::SAK::Transform::Application] the Application, if present
765
+ #
766
+ def self.resolve harness, subject: nil, transform: nil, params: {},
767
+ partial: nil, input: nil, output: nil
768
+ # either a subject or transform + input + output? + params?
769
+
770
+ repo = harness.repo
771
+ partials = harness.partials
772
+
773
+ if subject
774
+ # noop
775
+ return subject if subject.is_a? self
776
+
777
+ # okay partial
778
+ partial = RDF::SAK::Util.objects_for(
779
+ subject, RDF::SAK::TFO.completes, only: :resource).sort.first
780
+
781
+ if partial
782
+ tmp = partials.resolve(subject: partial) or
783
+ raise "Could not find partial #{partial}"
784
+ partial = tmp
785
+ transform = partial.transform
786
+ else
787
+ transform = RDF::SAK::Util.objects_for(
788
+ subject, RDF::SAK::TFO.transform, only: :resource).sort.first or
789
+ raise "Could not find a transform for #{subject}"
790
+ tmp = harness.resolve(transform) or
791
+ raise "Could not find transform #{transform}"
792
+ transform = tmp
793
+
794
+ params = transform.validate
795
+
796
+ # get params
797
+ params = {}
798
+ transform.keys.each do |p|
799
+ o = repo.query([subject, p, nil]).objects.uniq.sort
800
+ params[p] = o unless o.empty?
801
+ end
802
+ end
803
+
804
+ # get inputs and outputs
805
+ input = RDF::SAK::Util.objects_for(
806
+ subject, RDF::SAK::TFO.input, only: :resource).sort.first
807
+ output = RDF::SAK::Util.objects_for(
808
+ subject, RDF::SAK::TFO.output, only: :resource).sort.first
809
+
810
+ raise 'Data must have both input and output' unless input and output
811
+ elsif input and ((transform and params) or partial)
812
+
813
+ # XXX dispatch on partial only? smart? dumb?
814
+ if partial
815
+ transform = partial.transform
816
+ params = partial.params
817
+ else
818
+ # do transform
819
+ t = harness.resolve(transform) or
820
+ raise "Could not resolve transform #{transform}"
821
+ transform = t
822
+
823
+ # coerce/validate params
824
+ params = transform.validate params, defaults: false
825
+
826
+ # do partial
827
+ partial = partials.resolve transform: transform, params: params
828
+ end
829
+
830
+ # collect function application receipts
831
+ candidates = RDF::Query.new do
832
+ # note that there is no cost-based optimization so we write
833
+ # these in the order of least to most cardinality
834
+ pattern [:t, RDF::SAK::TFO.output, output] if output
835
+ pattern [:t, RDF::SAK::TFO.input, input]
836
+ end.execute(repo).map { |sol| sol[:t] }.compact.uniq.select do |s|
837
+ # this should say, try matching the partial if there is one
838
+ # to match, otherwise attempt to directly match the transform
839
+ if partial and repo.has_statement?(
840
+ RDF::Statement(s, RDF::SAK::TFO.completes, partial.subject))
841
+ true
842
+ elsif repo.has_statement?(
843
+ RDF::Statement(s, RDF::SAK::TFO.transform, transform.subject))
844
+ testp = transform.keys.map do |p|
845
+ o = repo.query([s, p, nil]).objects.uniq.sort
846
+ o.empty? ? nil : [p, o]
847
+ end.compact.to_h
848
+
849
+ testp = transform.validate testp, defaults: false, silent: true
850
+ testp == params
851
+ end
852
+ end.compact.uniq.sort
853
+
854
+ return if candidates.empty?
855
+
856
+ if candidates.size == 1
857
+ subject = candidates.first
858
+ else
859
+ # now we have the unlikely case that there are two identical
860
+ # records so we just sort em first by end date, then by
861
+ # start date, then lexically
862
+ subject = candidates.map do |s|
863
+ st, et = %i[startedAtTime endedAtTime].map do |p|
864
+ repo.query([s, RDF::Vocab::PROV[p], nil]).map do |stmt|
865
+ dt = stmt.object.object
866
+ dt if dt.is_a? DateTime
867
+ end.compact.sort.last
868
+ end
869
+ [s, st, et]
870
+ end.sort do |a, b|
871
+ # first check latest end-time, then check latest start-time
872
+ c = a[2] && b[2] ? b[2] <=> a[2] : 0
873
+ # if those two yield nothing, then sort lexically i guess
874
+ (c == 0 && a[1] && b[1]) ? b[1] <=> a[1] : a[0] <=> b[0]
875
+ end.first.first
876
+ end
877
+ else
878
+ raise ArgumentError,
879
+ 'must have either a subject or transform + params + input'
880
+ end
881
+
882
+ # don't forget the output
883
+ output ||= repo.query(
884
+ [subject, RDF::SAK::TFO.output, nil]
885
+ ).objects.select(&:uri?).sort.first
886
+
887
+ new subject, transform, input, output, partial || params
888
+
889
+ end
890
+
891
+ attr_reader :input, :output, :completes
892
+
893
+ # Create a new function application from whole cloth.
894
+ #
895
+ # @param subject [RDF::Resource]
896
+ # @param transform [RDF::Resource] the identifier for the transform
897
+ # @param input [RDF::Resource] the identifier for the input
898
+ # @param output [RDF::Resource] the identifier for the output
899
+ # @param params [Hash, RDF::SAK::Transform::Partial] the parameters
900
+ # or partial application that is completed
901
+ def initialize subject, transform, input, output, params = {},
902
+ start: nil, stop: nil
903
+ # params may be a partial
904
+ super subject, transform, params
905
+
906
+ @input = input
907
+ @output = output
908
+ @completes = params if params.is_a? RDF::SAK::Transform::Partial
909
+ @start = start
910
+ @stop = stop
911
+ end
912
+
913
+ # Returns the function application as an array of triples.
914
+ def to_triples
915
+ out = [] # .extend RDF::Enumerable
916
+ s = @subject
917
+ out << [s, RDF.type, RDF::SAK::TFO.Application]
918
+
919
+ if @start
920
+ start = @start.is_a?(RDF::Literal) ? @start : RDF::Literal(@start)
921
+ out << [s, RDF::Vocab::PROV.startedAtTime, start]
922
+ end
923
+
924
+ if @stop
925
+ stop = @stop.is_a?(RDF::Literal) ? @stop : RDF::Literal(@stop)
926
+ out << [s, RDF::Vocab::PROV.endedAtTime, stop]
927
+ end
928
+
929
+ if @completes
930
+ out << [s, RDF::SAK::TFO.completes, @completes.subject]
931
+ else
932
+ out << [s, RDF::SAK::TFO.transform, transform.subject]
933
+ pdup = transform.validate params, defaults: false, silent: true
934
+ pdup.each do |k, vals|
935
+ vals.each { |v| out << [s, k, v] }
936
+ end
937
+ end
938
+
939
+ out.map { |triples| RDF::Statement(*triples) }
940
+ end
941
+
942
+ def [](key)
943
+ # note complete is
944
+ (@completes || @params)[key]
945
+ end
946
+
947
+ def keys
948
+ (@completes || @params).keys
949
+ end
950
+
951
+ def params
952
+ @completes ? @completes.params : @params.dup
953
+ end
954
+
955
+ def transform
956
+ @completes ? @completes.transform : @transform
957
+ end
958
+
959
+ def completes? partial
960
+ @completes and partial and @completes == partial
961
+ end
962
+
963
+ def matches? params
964
+ return @completes.matches? params if @completes
965
+ super params
966
+ end
967
+
968
+ def ===(other)
969
+ return false unless other.is_a? Application
970
+ return false unless @input == other.input and @output == other.output
971
+
972
+ # now the comparand is either the partial or us
973
+ cmp = @completes || self
974
+
975
+ # and this should do it
976
+ other.transform == cmp.transform and other.matches? cmp.params
977
+ end
978
+ end
979
+
980
+ # XXX everything below this line is trash
981
+
982
+ def match_params repo, candidate, params = {}
983
+ # overwrite normalized params
984
+ params = params.transform_values do |v|
985
+ Set.new(v.respond_to?(:to_a) ? v.to_a : [v])
986
+ end
987
+
988
+ struct = {}
989
+ params.keys.each do |p|
990
+ repo.query([candidate, p, nil]) do |stmt|
991
+ x = struct[stmt.predicate] ||= Set.new
992
+ x << stmt.object
993
+ end
994
+ end
995
+ end
996
+
997
+ # Resolve a transformation application function in the repository
998
+ # with the given inputs and outputs.
999
+ #
1000
+ # XXX note that this thing in its current state will not distinguish
1001
+ # between two different function applications that happen to map the
1002
+ # same input to the same output, but with different scalar
1003
+ # parameters. For example, the `subtree` function could be given two
1004
+ # different XPath queries but return the same subtree.
1005
+ #
1006
+ def resolve_transformation repo, transform, input, output = nil,
1007
+ graph: nil, params: {}, partials: {}
1008
+
1009
+ # overwrite normalized params XXX replace this with something real
1010
+ params = params.transform_values do |v|
1011
+ Set.new(v.respond_to?(:to_a) ? v.to_a : [v])
1012
+ end
1013
+
1014
+ # first we check the cache of partials to see if there is one that
1015
+ # matches our parameters. we want to use trasns
1016
+ partial = partials.values.select do |p|
1017
+ p.transform == transform and p.matches? params
1018
+ end.sort.first
1019
+
1020
+ # find the partial if there is one
1021
+ unless partial
1022
+ partial = Partial.resolve transform: transform, params: params
1023
+ # argh this isn't right; it should be partials[transform][params]
1024
+ partials[partial.subject] = partial if partial
1025
+ end
1026
+
1027
+ # collect function application receipts
1028
+ candidates = RDF::Query.new do
1029
+ # note that there is no cost-based optimization so we write
1030
+ # these in the order of least to most cardinality
1031
+ pattern [:t, RDF::SAK::TFO.output, output]
1032
+ pattern [:t, RDF::SAK::TFO.input, input]
1033
+ end.execute(repo).map { |sol| sol[:t] }.compact.uniq.select do |s|
1034
+ repo.has_statement?(
1035
+ RDF::Statement(s, RDF::SAK::TFO.transform, transform)) or
1036
+ partial && repo.has_statement?(
1037
+ RDF::Statement(s, RDF::SAK::TFO.completes, partial))
1038
+ end.compact.uniq
1039
+
1040
+ # first will be nil if this is empty so voila
1041
+ return candidates.first unless candidates.size > 1
1042
+
1043
+ # now we have the unlikely case that there are two identical records
1044
+ candidates.map do |s|
1045
+ st, et = %i[startedAtTime endedAtTime].map do |p|
1046
+ repo.query([s, RDF::Vocab::PROV[p], nil]) do |stmt|
1047
+ dt = stmt.object.object
1048
+ dt if dt.is_a? DateTime
1049
+ end.compact.sort.last
1050
+ end
1051
+ [s, st, et]
1052
+ end.sort do |a, b|
1053
+ # first check latest end-time, then check latest start-time
1054
+ c = a[2] && b[2] ? b[2] <=> a[2] : 0
1055
+ # if those two yield nothing, then sort lexically i guess
1056
+ (c == 0 && a[1] && b[1]) ? b[1] <=> a[1] : a[0] <=> b[0]
1057
+ end.first.first
1058
+ end
1059
+
1060
+ def record_application repo, transform, input, output, start, finish,
1061
+ partial: false, graph: nil, subject: nil, params: {}
1062
+ end
1063
+
1064
+ # get transform
1065
+ def get_partial_transform repo, function, params = {}
1066
+ temp = {}
1067
+ RDF::Query.new do
1068
+ pattern [:s, RDF.type, RDF::SAK::TFO.Partial]
1069
+ pattern [:s, RDF::SAK::TFO.transform, function]
1070
+ params.keys.each { |k| pattern [:s, k, nil] }
1071
+ end.execute(repo).each do |sol|
1072
+ t = temp[sol[:s]] ||= {}
1073
+ params.keys.each do |k|
1074
+ # make these a set for now cause we don't care about the
1075
+ t[k] = Set.new(repo.query([sol[:s], k, nil]).objects)
1076
+ end
1077
+ end
1078
+
1079
+ # now we imagine massaging the candidates' parameters so they
1080
+ # match the input (eg sets/arrays or whatever)
1081
+
1082
+ # (in this case the input params are made to match the retrieved params)
1083
+ newp = params.transform_values do |v|
1084
+ Set.new(v.respond_to?(:to_a) ? v.to_a : [v])
1085
+ end
1086
+
1087
+ # sort this because we want it to return the same thing every time
1088
+ # if there are multiples for some reason
1089
+ temp.keys.sort.each do |k|
1090
+ # do a cheaper comparison first
1091
+ next unless temp[k].keys.sort == params.keys.sort
1092
+ #
1093
+ return k if temp[k] == newp
1094
+ end
1095
+
1096
+ nil
1097
+ end
1098
+
1099
+ class XPath < RDF::SAK::Transform
1100
+ protected
1101
+
1102
+ def execute input, parsed = nil, params
1103
+ xpath = params.fetch(:xpath, []).first or raise
1104
+ prefix = params.fetch(:prefix, []).map do |x|
1105
+ x.value.split(/\s*:\s*/, 2)
1106
+ end.to_h.transform_keys(&:to_sym)
1107
+ reindent = (params.fetch(:reindent).first || RDF::Literal(true)).object
1108
+
1109
+ begin
1110
+ parsed ||= Nokogiri.XML input
1111
+ rescue Nokogiri::SyntaxError
1112
+ # XXX i dunno, raise?
1113
+ return
1114
+ end
1115
+
1116
+ doc = RDF::SAK::Util.subtree parsed,
1117
+ xpath.value, prefixes: prefix, reindent: reindent
1118
+
1119
+ return unless doc
1120
+
1121
+ [doc.to_xml, doc]
1122
+ end
1123
+
1124
+ public
1125
+
1126
+ def implemented?
1127
+ true
1128
+ end
1129
+ end
1130
+
1131
+ class XSLT < RDF::SAK::Transform
1132
+ protected
1133
+
1134
+ def init_implementation harness
1135
+ root = harness.root
1136
+ raise ArgumentError,
1137
+ "Need a root to initialize the implementation" unless root
1138
+ root = Pathname(root).expand_path unless root.is_a? Pathname
1139
+ raise ArgumentError, "#{root} is not a readable directory" unless
1140
+ root.directory? and root.readable?
1141
+
1142
+ # XXX this assumes this is a file URI but so far that is the
1143
+ # only way we get here
1144
+ filename = root + implementation.path
1145
+ raise ArgumentError, "#{filename} is not a readable file" unless
1146
+ filename.file? and filename.readable?
1147
+ @sheet = Nokogiri::XSLT(filename.read)
1148
+ end
1149
+
1150
+ def execute input, parsed = nil, params
1151
+ begin
1152
+ parsed ||= Nokogiri.XML input
1153
+ rescue Nokogiri::SyntaxError
1154
+ # XXX i dunno, raise?
1155
+ return
1156
+ end
1157
+
1158
+ # XXX do we wanna allow params?
1159
+ out = @sheet.transform parsed
1160
+
1161
+ # now return string and still-parsed
1162
+ [@sheet.serialize(out), out]
1163
+ end
1164
+
1165
+ public
1166
+
1167
+ def implemented?
1168
+ true
1169
+ end
1170
+
1171
+ end
1172
+ end