rdf-sak 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1172 @@
1
+ require 'rdf'
2
+ require 'rdf/vocab'
3
+ require 'rdf/sak/tfo'
4
+ require 'rdf/sak/util'
5
+ require 'set'
6
+ require 'mimemagic'
7
+ require 'http/negotiate'
8
+ require 'time'
9
+
10
+ # This class encapsulates a specification for an individual
11
+ # transformation function, including its parameter spec, accepted and
12
+ # returned types, identity, and implementation.
13
+ #
14
+ class RDF::SAK::Transform
15
+ # mkay basically this transformation function stuff got too hairy to
16
+ # just do ad-hoc so i guess i'm doing this now
17
+
18
+ private
19
+
20
+ def self.numeric_objects repo, subject, predicate, entail: false
21
+ RDF::SAK::Util.objects_for(repo, subject, predicate, entail: entail,
22
+ only: :literal).map(&:object).select { |c| c.is_a? Numeric }.sort
23
+ end
24
+
25
+ def self.gather_params repo, subject
26
+ params = {}
27
+ RDF::SAK::Util.objects_for(repo, subject, RDF::SAK::TFO.parameter,
28
+ entail: false, only: :resource).each do |ps|
29
+ param = params[ps] ||= {}
30
+
31
+ # slug/identifier
32
+ if id = RDF::SAK::Util.objects_for(
33
+ repo, ps, RDF::Vocab::DC.identifier, only: :literal).sort.first
34
+ param[:id] = id.value.to_sym
35
+ end
36
+
37
+ # rdfs:range
38
+ range = RDF::SAK::Util.objects_for(
39
+ repo, ps, RDF::RDFS.range, only: :resource)
40
+ param[:range] = range.to_set unless range.empty?
41
+
42
+ # default = RDF::SAK::Util
43
+ param[:default] = RDF::SAK::Util.objects_for(
44
+ repo, ps, RDF::SAK::TFO.default)
45
+
46
+ # cardinalities
47
+ param[:minc] = 0
48
+ param[:maxc] = Float::INFINITY
49
+
50
+ if c0 = numeric_objects(repo, ps, RDF::OWL.cardinality).first
51
+ param[:minc] = param[:maxc] = c0
52
+ else
53
+ if c1 = numeric_objects(repo, ps, RDF::OWL.minCardinality).first
54
+ param[:minc] = c1
55
+ end
56
+ if c2 = numeric_objects(repo, ps, RDF::OWL.maxCardinality).first
57
+ param[:maxc] = c2
58
+ end
59
+ end
60
+ end
61
+
62
+ params
63
+ end
64
+
65
+ def self.gather_accepts_returns repo, subject, raw: false, returns: false
66
+ literals = []
67
+ lists = []
68
+ pred = RDF::SAK::TFO[returns ? 'returns' : 'accepts']
69
+ repo.query([subject, pred, nil]).objects.each do |o|
70
+ if o.literal?
71
+ literals << o
72
+ else
73
+ lists << RDF::List.from(repo, o).to_a
74
+ end
75
+ end
76
+ # this is mainly to give us consistent results
77
+ out = (lists.sort.flatten + literals.sort).uniq
78
+ # raw as in raw literals
79
+ raw ? out : out.map(&:value)
80
+ end
81
+
82
+ protected
83
+
84
+ # Initialize the implementation. Does nothing in the base
85
+ # class. Return value is ignored.
86
+ #
87
+ # @param harness [RDF::SAK::Transform::Harness] the harness
88
+ #
89
+ def init_implementation harness
90
+ end
91
+
92
+ public
93
+
94
+ # Resolve a transform out of the repository. Optionally supply a
95
+ # block to resolve any implementation associated with the transform.
96
+ #
97
+ # @param harness [RDF::SAK::Transform::Harness] the harness
98
+ # @param subject [RDF::Resource]
99
+ def self.resolve harness, subject
100
+ # noop
101
+ return subject if subject.is_a? self
102
+
103
+ repo = harness.repo
104
+
105
+ asserted = RDF::SAK::Util.objects_for repo, subject,
106
+ RDF.type, only: :resource
107
+
108
+ return if
109
+ (asserted & RDF::SAK::Util.all_related(RDF::SAK::TFO.Transform)).empty?
110
+
111
+ params = gather_params repo, subject
112
+
113
+ plist = if pl = RDF::SAK::Util.objects_for(repo, subject,
114
+ RDF::SAK::TFO['parameter-list'], only: :resource).sort.first
115
+ RDF::List.from(repo, pl).to_a
116
+ else
117
+ params.keys.sort
118
+ end
119
+
120
+ accepts = gather_accepts_returns repo, subject
121
+ returns = gather_accepts_returns repo, subject, returns: true
122
+
123
+ tclass = self
124
+
125
+ # XXX this is all dumb but it has to be this way for now
126
+
127
+ if impl = RDF::SAK::Util.objects_for(repo, subject,
128
+ RDF::SAK::TFO.implementation, only: :uri).sort.first
129
+ case impl.to_s
130
+ when /^file:/i then
131
+ # XXX redo this later
132
+ if /xsl/i.match? MimeMagic.by_path(impl.path.to_s).to_s
133
+ tclass = RDF::SAK::Transform::XSLT
134
+ end
135
+ when /^urn:x-ruby:(.*)$/i then
136
+ cn = $1
137
+ begin
138
+ cs = Object.const_get cn
139
+ tclass = cs
140
+ rescue NameError, e
141
+ raise NotImplementedError,
142
+ "Could not locate implementation for #{impl}!"
143
+ end
144
+ end
145
+ end
146
+
147
+ tclass.new subject, params: params, param_list: plist, accepts: accepts,
148
+ returns: returns, implementation: impl, harness: harness
149
+ end
150
+
151
+ def self.coerce_params params
152
+ # this idiom is everywhere
153
+ params.transform_values do |v|
154
+ Set.new(v.respond_to?(:to_a) ? v.to_a : [v]) unless v.nil?
155
+ end
156
+ end
157
+
158
+ attr_reader :subject
159
+
160
+ # Initialize a transform from data.
161
+ # @param subject [RDF::Resource]
162
+ # @param harness [RDF::SAK::Transform::Harness]
163
+ # @param params [Hash]
164
+ # @param param_list [Array]
165
+ # @param accepts [Array]
166
+ # @param returns [Array]
167
+ # @param implementation [RDF::Resource]
168
+ #
169
+ def initialize subject, harness: nil, params: {}, param_list: [],
170
+ accepts: %w[*/*], returns: %w[*/*], implementation: nil
171
+ @subject = subject.dup.freeze
172
+ @params = params.freeze
173
+ @plist = (param_list.empty? ? params.keys.sort : param_list.dup).freeze
174
+ @pcache = params.map { |k, v| [v[:id], k] }.to_h.freeze
175
+ @accepts = (accepts.respond_to?(:to_a) ? accepts.to_a : [accepts]).freeze
176
+ @returns = (returns.respond_to?(:to_a) ? returns.to_a : [returns]).freeze
177
+ @impl = implementation.freeze
178
+
179
+ # initialize the implementation
180
+ init_implementation harness
181
+ end
182
+
183
+ # Return the identifier of the implementation.
184
+ #
185
+ # @return [RDF::URI]
186
+ #
187
+ def implementation
188
+ @impl
189
+ end
190
+
191
+ # True if this transform is *actually* implemented.
192
+ #
193
+ # @return [false, true]
194
+ #
195
+ def implemented?
196
+ false
197
+ end
198
+
199
+ # True if the transform accepts the given Content-Type.
200
+ #
201
+ # @param type [String] the content type to test
202
+ # @return [false, true] wh
203
+ #
204
+ def accepts? type
205
+ # construct the variants: this gives us a stack of all the types
206
+ # all the way up to the top, then turns it into a hash of faux
207
+ # variants. this will ensure the negotiate algorithm will return a
208
+ # value if the transform function can handle the type, even if it
209
+ # does not explicitly mention it (e.g. if the transform specifies
210
+ # it accepts application/xml and you hand it application/xhtml+xml)
211
+ variants = RDF::SAK::MimeMagic.new(type).lineage.map do |t|
212
+ # the key can be anything as long as it's unique since it ends
213
+ # up as a hash
214
+ [t.to_s, [1, t.to_s]]
215
+ end.to_h
216
+
217
+ # construct the pseudo-header
218
+ accept = @accepts.dup
219
+ accept << '*/*;q=0' unless accept.include? '*/*'
220
+ accept = { Accept: accept.join(', ') }
221
+
222
+ # we only care *if* this returns something, not *what*
223
+ !!HTTP::Negotiate.negotiate(accept, variants)
224
+ end
225
+
226
+ # Return the parameter list, or a sorted list of parameter keys in lieu
227
+ #
228
+ # @return [Array]
229
+ #
230
+ def keys
231
+ # XXX this should be unique to begin with. what is going on here?
232
+ # tests mysteriously started failing and the output was duplicated
233
+ @plist.uniq
234
+ end
235
+
236
+ # Retrieve a parameter spec, either by its fully-qualified URI or
237
+ # its `dct:identifier`.
238
+ #
239
+ # @param key [RDF::Resource,Symbol,String] the parameter URI or its identifier
240
+ # @return [Hash] the parameter spec
241
+ #
242
+ def [](key)
243
+ out = case key
244
+ when RDF::Resource then @params[key]
245
+ when Symbol then @params[@pcache[key]]
246
+ when String
247
+ @params[@pcache[key.to_sym]] || @params[RDF::URI(key)]
248
+ end
249
+ # add the key to the group
250
+ out.merge({ uri: key }) if out
251
+ end
252
+
253
+ # XXX kill this
254
+ def lint params
255
+ raise ArgumentError, "params must be a hash, not #{params.class}" unless
256
+ params.is_a? Hash
257
+ params.keys.sort == keys
258
+ end
259
+
260
+ # Return the validated parameters or raise an exception.
261
+ #
262
+ # @param params [Hash] the hash of parameters
263
+ # @param symbols [false, true] whether the keys should be symbols or URIs
264
+ # @param defaults [true, false] whether to supplant the defaults
265
+ # @param silent [false, true] return nil rather than raise if true
266
+ # @return [Hash] the validated parameters
267
+ #
268
+ def validate params, symbols: false, defaults: true, silent: false
269
+ # duplicate so we can delete from it
270
+ params = params.dup
271
+ out = {}
272
+
273
+ # note the instance variable vs the argument
274
+ @params.each do |k, spec|
275
+ v = params.delete(k) || params.delete(spec[:id]) || []
276
+ v = (v.respond_to?(:to_a) ? v.to_a : [v]).map do |v|
277
+ case v
278
+ when RDF::Term then v
279
+ when URI then RDF::URI(v.to_s)
280
+ when nil then RDF::nil
281
+ else
282
+ range = spec[:range] || []
283
+ if r = range.select(&:datatype?) and !r.empty?
284
+ r = r.to_a.sort
285
+ "multiple ranges; arbitrarily picking #{r.first}" if
286
+ r.size > 1
287
+ RDF::Literal(v, datatype: r.first)
288
+ elsif v.is_a? String and r = range.reject(&:datatype?) and !r.empty?
289
+ if m = /^_:(.+)$/.match(v)
290
+ RDF::Node(m[1])
291
+ else
292
+ RDF::URI(v)
293
+ end
294
+ else
295
+ RDF::Literal(v)
296
+ end
297
+ end
298
+ end
299
+
300
+ # XXX one day we should check types but not today
301
+
302
+ # give us the default(s) then
303
+ v = spec[:default].dup if v.empty? and spec[:default]
304
+
305
+ # but we *will* check the cardinality
306
+ minc = spec.fetch :minc, 0
307
+ maxc = spec.fetch :maxc, Float::INFINITY
308
+
309
+ raise ArgumentError, "Parameter #{k} must have at least"\
310
+ " #{minc} value#{minc == 1 ? '' : ?s }" if v.size < minc
311
+ raise ArgumentError, "Parameter #{k} must have at most"\
312
+ " #{maxc} value#{maxc == 1 ? '' : ?s }" if v.size > maxc
313
+ # XXX if cardinality == 1 should we set v to v.first? dunno
314
+
315
+ # now overwrite k
316
+ k = spec[:id] || k.to_s if symbols
317
+
318
+ out[k] = v unless !defaults and v == spec[:default]
319
+ end
320
+
321
+ # if params are not empty then this is an error
322
+ unless params.empty?
323
+ return if silent
324
+ raise ArgumentError,
325
+ "Unrecognized parameters #{params.keys.join ', '}"
326
+ end
327
+
328
+ out
329
+ end
330
+
331
+ # Check the parameters and apply the function, then check the
332
+ # output. Parameters are checked with {#validate} for key
333
+ # resolution, cardinality, range, and type.
334
+ #
335
+ # @param input [String,IO,#to_s,#read] Something bytelike
336
+ # @param params [Hash,RDF::SAK::Transform::Partial] the instance parameters
337
+ # @param parsed [Object] the already-parsed object, if applicable
338
+ # @param type [String] the content-type of the input
339
+ # @param accept [String] a string in the form of an Accept header
340
+ # @yieldparam output [String,IO] the output
341
+ # @yieldparam parseout [Object] the parsed output, if applicable
342
+ # @return [#to_s, Object] the serialized output (and parsed if applicable)
343
+ #
344
+ def apply input, params = {}, parsed: nil,
345
+ type: 'application/octet-stream', accept: '*/*', &block
346
+ raise NotImplementedError, "Transform #{@id} is not implemented!" unless
347
+ implemented?
348
+
349
+ # XXX validate accept or explode
350
+ mimetypes = HTTP::Negotiate.negotiate({ Accept: accept },
351
+ @returns.map { |t| [t, [1, t]] }.to_h, all: true) or return
352
+
353
+ # this will succeed or explode
354
+ params = validate params, symbols: true
355
+
356
+ # run the transform
357
+ out, parseout = execute input, parsed, params
358
+
359
+ # bail out if nothing was returned
360
+ return unless out
361
+
362
+ # now run the block if present
363
+ block.call out, parseout if block
364
+
365
+ # return it to the caller
366
+ [out, parseout]
367
+ end
368
+
369
+ # This class implements a cache for partial transformation function
370
+ # applications, which bundle transforms with a set of instance
371
+ # parameters under a reusable identity.
372
+ class PartialCache
373
+ private
374
+
375
+ def coerce_params params
376
+ RDF::SAK::Transform.coerce_params params
377
+ end
378
+
379
+ public
380
+
381
+ # Initialize the cache with all partials pre-loaded.
382
+ #
383
+ # @param harness [RDF::SAK::Transform::Harness] the transform harness
384
+ # @return [RDF::SAK::Transform::PartialCache] the instance
385
+ #
386
+ def self.load harness
387
+ new(harness).load
388
+ end
389
+
390
+ attr_reader :harness
391
+
392
+ # Initialize an empty cache.
393
+ # @param harness [RDF::SAK::Transform::Harness] the parent harness.
394
+ #
395
+ def initialize harness
396
+ @harness = harness
397
+ @cache = {}
398
+ @mapping = {}
399
+ @transforms = {}
400
+ end
401
+
402
+ # Load an initialized partial cache.
403
+ #
404
+ # @return [self] daisy-chainable self-reference
405
+ #
406
+ def load
407
+ RDF::SAK::Util.subjects_for(repo, RDF.type,
408
+ RDF::SAK::TFO.Partial).each do |s|
409
+ resolve subject: s
410
+ end
411
+
412
+ # return self to daisy-chain
413
+ self
414
+ end
415
+
416
+ def partials
417
+ @cache.keys.select { |x| x.is_a? RDF::Resource }
418
+ end
419
+
420
+ def repo
421
+ @harness.repo
422
+ end
423
+
424
+ def transforms
425
+ @transforms.dup
426
+ end
427
+
428
+ # Retrieve a Partial from the cache based on its
429
+ def get transform, params
430
+ ts = case transform
431
+ when RDF::SAK::Transform then transform.subject
432
+ when RDF::URI
433
+ # XXX transforms resolved here may not get implemented
434
+ transform = RDF::SAK::Transform.resolve @repo, transform
435
+ transform.subject
436
+ else
437
+ raise ArgumentError, "Don't know what to do with #{transform}"
438
+ end
439
+
440
+ # return direct cache entry if transform is really the subject
441
+ return @cache[ts] if @cache.key?
442
+
443
+ # otherwise return the mapping
444
+ @mapping[transform][coerce_params params]
445
+ end
446
+
447
+ # Resolves a partial either by subject or by transform + parameter
448
+ # set.
449
+ #
450
+ # @param subject [RDF::URI] The subject URI of the partial
451
+ # @param transform [RDF::URI,RDF::SAK::Transform] the transform
452
+ # @param params [Hash] an instance of parameters
453
+ # @return [RDF::SAK::Transform::Partial]
454
+ #
455
+ def resolve subject: nil, transform: nil, params: {}
456
+ if subject
457
+ if subject.is_a? RDF::SAK::Transform::Partial
458
+ # snag the transform
459
+ transform = @harness.resolve(subject.transform) or
460
+ raise 'Could not resolve the transform associated with ' +
461
+ subject.subject
462
+
463
+ # mkay now add this to the cache
464
+ t = @mapping[transform.subject] ||= {} # lol got all that?
465
+ @cache[subject.subject] ||= t[subject.params] ||= subject
466
+ else
467
+ # resolve the partial
468
+ partial = @cache[subject] || RDF::SAK::Transform::Partial.resolve(
469
+ @harness, subject: subject) or return
470
+
471
+ # initialize the mapping if not present
472
+ t = @mapping[partial.transform.subject] ||= {}
473
+
474
+ # off we go
475
+ @cache[subject] ||= t[partial.params] ||= partial
476
+ end
477
+ elsif transform
478
+ transform = @harness.resolve transform unless
479
+ transform.is_a? RDF::SAK::Transform
480
+
481
+ params = transform.validate params, defaults: false
482
+
483
+ # note the *presence* of the key means the cache item has been
484
+ # checked already; its *value* may be nil
485
+ t = @mapping[transform.subject] ||= {}
486
+ return t[params] if t.key? params
487
+
488
+ # try to resolve the partial
489
+ partial = RDF::SAK::Transform::Partial.resolve(@harness,
490
+ transform: transform, params: params) or return
491
+
492
+ # update the caches
493
+ @cache[partial.subject] = t[params] = partial
494
+ end
495
+ end
496
+ end
497
+
498
+ # This class is the main harness for holding all the transforms and
499
+ # operating over them. This is the primary interface through which
500
+ # we manipulate transforms.
501
+ class Harness
502
+
503
+ attr_reader :partials, :repo, :root
504
+
505
+ # Create a new harness instance.
506
+ #
507
+ # @param repo [RDF::Repository] the repository to find RDF data
508
+ # @param root [String,Pathname] the root directory for implementations
509
+ #
510
+ def initialize repo, root
511
+ raise ArgumentError,
512
+ "repo is #{repo.class}, not an RDF::Repository" unless
513
+ repo.is_a? RDF::Repository
514
+ @repo = repo
515
+ @root = Pathname(root).expand_path
516
+ raise ArgumentError, "Root #{@root} does not exist" unless
517
+ @root.directory? and @root.readable?
518
+ @cache = {}
519
+ @partials = RDF::SAK::Transform::PartialCache.new self
520
+ end
521
+
522
+ # Bootstrap all the transforms.
523
+ #
524
+ # @param repo [RDF::Repository] the repository to find RDF data
525
+ # @param root [String,Pathname] the root directory for implementations
526
+ # @return [RDF::SAK::Transform::Harness] the harness instance
527
+ def self.load repo, root
528
+ self.new(repo, root).load
529
+ end
530
+
531
+ # Load transforms into an existing instance
532
+ # @return [Array] the transforms
533
+ def load
534
+ RDF::SAK::Util.subjects_for(@repo, RDF.type,
535
+ RDF::SAK::TFO.Transform, only: :resource).each do |subject|
536
+ resolve subject
537
+ end
538
+
539
+ # return self so we can daisy-chain
540
+ self
541
+ end
542
+
543
+ # Return all cached Transform identities.
544
+ #
545
+ # @return [Array] the URIs of known Transforms
546
+ #
547
+ def transforms
548
+ @cache.keys.sort
549
+ end
550
+
551
+ # Resolve a Transform based on its URI.
552
+ #
553
+ # @param subject [RDF::Resource] the identifier for the transform.
554
+ # @return [RDF::SAK::Transform] the Transform, if present.
555
+ #
556
+ def resolve subject
557
+ return @cache[subject] if @cache[subject]
558
+ # XXX raise???
559
+ transform =
560
+ RDF::SAK::Transform.resolve(self, subject) or return
561
+ @cache[subject] = transform
562
+ end
563
+
564
+ # Resolve a Partial based on either its subject URI or the
565
+ # transform-params pair.
566
+ #
567
+ # @param subject [RDF::Resource] the Partial's subject
568
+ # @param transform [RDF::Resource,RDF::SAK::Transform] the transform
569
+ # @param params [Hash] an instance of parameters
570
+ # @return [RDF::SAK::Transform::Partial] the Partial, if present
571
+ #
572
+ def resolve_partial subject: nil, transform: nil, params: nil
573
+ partials.resolve subject: subject, transform: transform, params: params
574
+ end
575
+
576
+ # Resolve a total function application record based on either its
577
+ # subject URI, a transform-params pair, or a Partial.
578
+ #
579
+ # @param subject [RDF::Resource] the Application's subject
580
+ # @param transform [RDF::Resource,RDF::SAK::Transform] the Transform
581
+ # @param params [Hash] an instance of parameters
582
+ # @param partial [RDF::Resource,RDF::SAK::Transform::Partial] a Partial
583
+ # @return [RDF::SAK::Transform::Application] the Application, if present
584
+ #
585
+ def resolve_application subject: nil, transform: nil, params: {},
586
+ partial: nil, input: nil, output: nil
587
+ RDF::SAK::Transform::Application.resolve self, subject: subject,
588
+ transform: transform, params: params, partial: partial,
589
+ input: input, output: output
590
+ end
591
+
592
+ # Returns true if the Application with the given subject URI
593
+ # matches either the transform-params pair, or a partial.
594
+ #
595
+ # @param subject [RDF::Resource,RDF::SAK::Transform::Application]
596
+ # the application
597
+ # @param transform [RDF::Resource,RDF::SAK::Transform] the transform
598
+ # @param params [Hash] an instance of parameters
599
+ # @param partial [RDF::Resource,RDF::SAK::Transform::Partial] a partial
600
+ # @return [true, false] whether or not the application matches
601
+ #
602
+ def application_matches? subject, transform: nil, params: {}, partial: nil
603
+
604
+ # unbundle the params; partial overrides transform+params
605
+ if partial
606
+ partial = resolve_partial partial unless
607
+ partial.is_a? RDF::SAK::Transform::Partial
608
+ transform = partial.transform
609
+ params = partial.params
610
+ else
611
+ transform = resolve transform unless
612
+ transform.is_a? RDF::SAK::Transform
613
+ params = transform.validate params
614
+ end
615
+
616
+ if subject.is_a? RDF::SAK::Transform::Application
617
+ return true if partial and subject.completes? partial
618
+ return true if
619
+ subject.transform == transform and subject.matches? params
620
+ else
621
+ # this should say, try matching the partial if there is one
622
+ # to match, otherwise attempt to directly match the transform
623
+ return true if partial and repo.has_statement?(
624
+ RDF::Statement(subject, RDF::SAK::TFO.completes, partial.subject))
625
+
626
+ if repo.has_statement?(
627
+ RDF::Statement(subject, RDF::SAK::TFO.transform, transform.subject))
628
+ testp = transform.keys.map do |p|
629
+ o = repo.query([subject, p, nil]).objects.uniq.sort
630
+ o.empty? ? nil : [p, o]
631
+ end.compact.to_h
632
+
633
+ # this will clear any explicit declarations of defaults
634
+ testp = transform.validate testp, defaults: false, silent: true
635
+ # true means it matches
636
+ return testp == params
637
+ end
638
+ end
639
+
640
+ false
641
+ end
642
+ end
643
+
644
+ class Partial
645
+ # Resolve a partial function application with the given parameters.
646
+ #
647
+ # @param harness [RDF::SAK::Transform::Harness] the harness
648
+ # @param subject [RDF::Resource] the identity of the partial
649
+ # @param transform [RDF::Resource] the identity of the transform
650
+ # @param params [Hash] key-value pairs
651
+ def self.resolve harness, subject: nil, transform: nil, params: {}
652
+ raise ArgumentError, 'Must supply either a subject or a transform' unless
653
+ subject or transform
654
+
655
+ repo = harness.repo
656
+
657
+ # coerce the transform to a Transform object if it isn't already
658
+ if transform
659
+ transform = harness.resolve(transform) or
660
+ return unless transform.is_a?(RDF::SAK::Transform)
661
+ elsif subject.is_a? RDF::URI
662
+ # locate the transform if given the subject
663
+ transform = RDF::SAK::Util.objects_for(repo, subject,
664
+ RDF::SAK::TFO.transform, only: :resource).first or return
665
+ transform = harness.resolve(transform) or return
666
+ warn transform
667
+ end
668
+
669
+ # obtain the subject for the given parameters
670
+ if subject
671
+ params = {}
672
+ transform.keys.each do |p|
673
+ o = repo.query([subject, p, nil]).objects.uniq.sort
674
+ params[p] = o unless o.empty?
675
+ end
676
+ else
677
+ params = transform.validate params, symbols: false, defaults: false
678
+
679
+ candidates = RDF::Query.new do
680
+ # XXX we should sort parameters by longest value since
681
+ # longer values will probably be less common; anyway this is
682
+ # gonna all need to be rethought
683
+ params.each { |p, objs| objs.each { |o| pattern [:s, p, o] } }
684
+ pattern [:s, RDF.type, RDF::SAK::TFO.Partial]
685
+ pattern [:s, RDF::SAK::TFO.transform, transform.subject]
686
+
687
+ # add any remaining parameters
688
+ # XXX this actually messes up; we don't want this
689
+ # (transform.keys - params.keys.sort).each { |r| pattern [:s, r, nil] }
690
+ end.execute(repo).map { |sol| [sol[:s], {}] }.to_h
691
+
692
+ # warn "yo #{transform.subject} #{params} #{candidates}"
693
+
694
+ # this is ruby being cheeky
695
+ candidates.select! do |s, ps|
696
+ transform.keys.each do |p|
697
+ o = repo.query([s, p, nil]).objects.uniq.sort
698
+ ps[p] = o unless o.empty?
699
+ end
700
+ ps == params
701
+ end
702
+
703
+ return if candidates.empty?
704
+
705
+ # sort it so we always get the same thing
706
+ subject = candidates.keys.sort.first
707
+ params = candidates[subject]
708
+ end
709
+
710
+ self.new subject, transform, params
711
+ end
712
+
713
+ attr_reader :subject, :transform
714
+
715
+ def initialize subject, transform, params = {}
716
+ raise ArgumentError, 'transform must be a Transform' unless
717
+ transform.is_a? RDF::SAK::Transform
718
+ @subject = subject
719
+ @transform = transform
720
+ @params = transform.validate params unless
721
+ params.is_a? RDF::SAK::Transform::Partial
722
+ end
723
+
724
+ def [](key)
725
+ @params[key]
726
+ end
727
+
728
+ def keys
729
+ @params.keys
730
+ end
731
+
732
+ def params
733
+ @params.dup
734
+ end
735
+
736
+ def matches? params
737
+ @params == @transform.validate(params)
738
+ end
739
+
740
+ def ===(other)
741
+ return false unless other.is_a? RDF::SAK::Transform::Partial
742
+ transform == other.transform and matches? other.params
743
+ end
744
+
745
+ def ==(other)
746
+ self === other and subject == other.subject
747
+ end
748
+ end
749
+
750
+ # A record of a transformation function application.
751
+ # @note "Application" as in to "apply" a function, not an "app".
752
+ class Application < Partial
753
+ # Resolve a particular function Application from the repository.
754
+ # Either resolve by subject, or resolve by a transform + parameter
755
+ # + input set. Applications that complete Partials will be
756
+ # automatically resolved.
757
+ #
758
+ # @param harness [RDF::SAK::Transform::Harness] the harness
759
+ # @param subject [RDF::Resource] the subject
760
+ # @param transform [RDF::Resource,RDF::SAK::Transform] the transform
761
+ # @param params [Hash] an instance of parameters
762
+ # @param input [RDF::Resource] the Application's input
763
+ # @param output [RDF::Resource] the Application's output
764
+ # @return [RDF::SAK::Transform::Application] the Application, if present
765
+ #
766
+ def self.resolve harness, subject: nil, transform: nil, params: {},
767
+ partial: nil, input: nil, output: nil
768
+ # either a subject or transform + input + output? + params?
769
+
770
+ repo = harness.repo
771
+ partials = harness.partials
772
+
773
+ if subject
774
+ # noop
775
+ return subject if subject.is_a? self
776
+
777
+ # okay partial
778
+ partial = RDF::SAK::Util.objects_for(
779
+ subject, RDF::SAK::TFO.completes, only: :resource).sort.first
780
+
781
+ if partial
782
+ tmp = partials.resolve(subject: partial) or
783
+ raise "Could not find partial #{partial}"
784
+ partial = tmp
785
+ transform = partial.transform
786
+ else
787
+ transform = RDF::SAK::Util.objects_for(
788
+ subject, RDF::SAK::TFO.transform, only: :resource).sort.first or
789
+ raise "Could not find a transform for #{subject}"
790
+ tmp = harness.resolve(transform) or
791
+ raise "Could not find transform #{transform}"
792
+ transform = tmp
793
+
794
+ params = transform.validate
795
+
796
+ # get params
797
+ params = {}
798
+ transform.keys.each do |p|
799
+ o = repo.query([subject, p, nil]).objects.uniq.sort
800
+ params[p] = o unless o.empty?
801
+ end
802
+ end
803
+
804
+ # get inputs and outputs
805
+ input = RDF::SAK::Util.objects_for(
806
+ subject, RDF::SAK::TFO.input, only: :resource).sort.first
807
+ output = RDF::SAK::Util.objects_for(
808
+ subject, RDF::SAK::TFO.output, only: :resource).sort.first
809
+
810
+ raise 'Data must have both input and output' unless input and output
811
+ elsif input and ((transform and params) or partial)
812
+
813
+ # XXX dispatch on partial only? smart? dumb?
814
+ if partial
815
+ transform = partial.transform
816
+ params = partial.params
817
+ else
818
+ # do transform
819
+ t = harness.resolve(transform) or
820
+ raise "Could not resolve transform #{transform}"
821
+ transform = t
822
+
823
+ # coerce/validate params
824
+ params = transform.validate params, defaults: false
825
+
826
+ # do partial
827
+ partial = partials.resolve transform: transform, params: params
828
+ end
829
+
830
+ # collect function application receipts
831
+ candidates = RDF::Query.new do
832
+ # note that there is no cost-based optimization so we write
833
+ # these in the order of least to most cardinality
834
+ pattern [:t, RDF::SAK::TFO.output, output] if output
835
+ pattern [:t, RDF::SAK::TFO.input, input]
836
+ end.execute(repo).map { |sol| sol[:t] }.compact.uniq.select do |s|
837
+ # this should say, try matching the partial if there is one
838
+ # to match, otherwise attempt to directly match the transform
839
+ if partial and repo.has_statement?(
840
+ RDF::Statement(s, RDF::SAK::TFO.completes, partial.subject))
841
+ true
842
+ elsif repo.has_statement?(
843
+ RDF::Statement(s, RDF::SAK::TFO.transform, transform.subject))
844
+ testp = transform.keys.map do |p|
845
+ o = repo.query([s, p, nil]).objects.uniq.sort
846
+ o.empty? ? nil : [p, o]
847
+ end.compact.to_h
848
+
849
+ testp = transform.validate testp, defaults: false, silent: true
850
+ testp == params
851
+ end
852
+ end.compact.uniq.sort
853
+
854
+ return if candidates.empty?
855
+
856
+ if candidates.size == 1
857
+ subject = candidates.first
858
+ else
859
+ # now we have the unlikely case that there are two identical
860
+ # records so we just sort em first by end date, then by
861
+ # start date, then lexically
862
+ subject = candidates.map do |s|
863
+ st, et = %i[startedAtTime endedAtTime].map do |p|
864
+ repo.query([s, RDF::Vocab::PROV[p], nil]).map do |stmt|
865
+ dt = stmt.object.object
866
+ dt if dt.is_a? DateTime
867
+ end.compact.sort.last
868
+ end
869
+ [s, st, et]
870
+ end.sort do |a, b|
871
+ # first check latest end-time, then check latest start-time
872
+ c = a[2] && b[2] ? b[2] <=> a[2] : 0
873
+ # if those two yield nothing, then sort lexically i guess
874
+ (c == 0 && a[1] && b[1]) ? b[1] <=> a[1] : a[0] <=> b[0]
875
+ end.first.first
876
+ end
877
+ else
878
+ raise ArgumentError,
879
+ 'must have either a subject or transform + params + input'
880
+ end
881
+
882
+ # don't forget the output
883
+ output ||= repo.query(
884
+ [subject, RDF::SAK::TFO.output, nil]
885
+ ).objects.select(&:uri?).sort.first
886
+
887
+ new subject, transform, input, output, partial || params
888
+
889
+ end
890
+
891
+ attr_reader :input, :output, :completes
892
+
893
+ # Create a new function application from whole cloth.
894
+ #
895
+ # @param subject [RDF::Resource]
896
+ # @param transform [RDF::Resource] the identifier for the transform
897
+ # @param input [RDF::Resource] the identifier for the input
898
+ # @param output [RDF::Resource] the identifier for the output
899
+ # @param params [Hash, RDF::SAK::Transform::Partial] the parameters
900
+ # or partial application that is completed
901
+ def initialize subject, transform, input, output, params = {},
902
+ start: nil, stop: nil
903
+ # params may be a partial
904
+ super subject, transform, params
905
+
906
+ @input = input
907
+ @output = output
908
+ @completes = params if params.is_a? RDF::SAK::Transform::Partial
909
+ @start = start
910
+ @stop = stop
911
+ end
912
+
913
+ # Returns the function application as an array of triples.
914
+ def to_triples
915
+ out = [] # .extend RDF::Enumerable
916
+ s = @subject
917
+ out << [s, RDF.type, RDF::SAK::TFO.Application]
918
+
919
+ if @start
920
+ start = @start.is_a?(RDF::Literal) ? @start : RDF::Literal(@start)
921
+ out << [s, RDF::Vocab::PROV.startedAtTime, start]
922
+ end
923
+
924
+ if @stop
925
+ stop = @stop.is_a?(RDF::Literal) ? @stop : RDF::Literal(@stop)
926
+ out << [s, RDF::Vocab::PROV.endedAtTime, stop]
927
+ end
928
+
929
+ if @completes
930
+ out << [s, RDF::SAK::TFO.completes, @completes.subject]
931
+ else
932
+ out << [s, RDF::SAK::TFO.transform, transform.subject]
933
+ pdup = transform.validate params, defaults: false, silent: true
934
+ pdup.each do |k, vals|
935
+ vals.each { |v| out << [s, k, v] }
936
+ end
937
+ end
938
+
939
+ out.map { |triples| RDF::Statement(*triples) }
940
+ end
941
+
942
+ def [](key)
943
+ # note complete is
944
+ (@completes || @params)[key]
945
+ end
946
+
947
+ def keys
948
+ (@completes || @params).keys
949
+ end
950
+
951
+ def params
952
+ @completes ? @completes.params : @params.dup
953
+ end
954
+
955
+ def transform
956
+ @completes ? @completes.transform : @transform
957
+ end
958
+
959
+ def completes? partial
960
+ @completes and partial and @completes == partial
961
+ end
962
+
963
+ def matches? params
964
+ return @completes.matches? params if @completes
965
+ super params
966
+ end
967
+
968
+ def ===(other)
969
+ return false unless other.is_a? Application
970
+ return false unless @input == other.input and @output == other.output
971
+
972
+ # now the comparand is either the partial or us
973
+ cmp = @completes || self
974
+
975
+ # and this should do it
976
+ other.transform == cmp.transform and other.matches? cmp.params
977
+ end
978
+ end
979
+
980
+ # XXX everything below this line is trash
981
+
982
+ def match_params repo, candidate, params = {}
983
+ # overwrite normalized params
984
+ params = params.transform_values do |v|
985
+ Set.new(v.respond_to?(:to_a) ? v.to_a : [v])
986
+ end
987
+
988
+ struct = {}
989
+ params.keys.each do |p|
990
+ repo.query([candidate, p, nil]) do |stmt|
991
+ x = struct[stmt.predicate] ||= Set.new
992
+ x << stmt.object
993
+ end
994
+ end
995
+ end
996
+
997
+ # Resolve a transformation application function in the repository
998
+ # with the given inputs and outputs.
999
+ #
1000
+ # XXX note that this thing in its current state will not distinguish
1001
+ # between two different function applications that happen to map the
1002
+ # same input to the same output, but with different scalar
1003
+ # parameters. For example, the `subtree` function could be given two
1004
+ # different XPath queries but return the same subtree.
1005
+ #
1006
+ def resolve_transformation repo, transform, input, output = nil,
1007
+ graph: nil, params: {}, partials: {}
1008
+
1009
+ # overwrite normalized params XXX replace this with something real
1010
+ params = params.transform_values do |v|
1011
+ Set.new(v.respond_to?(:to_a) ? v.to_a : [v])
1012
+ end
1013
+
1014
+ # first we check the cache of partials to see if there is one that
1015
+ # matches our parameters. we want to use trasns
1016
+ partial = partials.values.select do |p|
1017
+ p.transform == transform and p.matches? params
1018
+ end.sort.first
1019
+
1020
+ # find the partial if there is one
1021
+ unless partial
1022
+ partial = Partial.resolve transform: transform, params: params
1023
+ # argh this isn't right; it should be partials[transform][params]
1024
+ partials[partial.subject] = partial if partial
1025
+ end
1026
+
1027
+ # collect function application receipts
1028
+ candidates = RDF::Query.new do
1029
+ # note that there is no cost-based optimization so we write
1030
+ # these in the order of least to most cardinality
1031
+ pattern [:t, RDF::SAK::TFO.output, output]
1032
+ pattern [:t, RDF::SAK::TFO.input, input]
1033
+ end.execute(repo).map { |sol| sol[:t] }.compact.uniq.select do |s|
1034
+ repo.has_statement?(
1035
+ RDF::Statement(s, RDF::SAK::TFO.transform, transform)) or
1036
+ partial && repo.has_statement?(
1037
+ RDF::Statement(s, RDF::SAK::TFO.completes, partial))
1038
+ end.compact.uniq
1039
+
1040
+ # first will be nil if this is empty so voila
1041
+ return candidates.first unless candidates.size > 1
1042
+
1043
+ # now we have the unlikely case that there are two identical records
1044
+ candidates.map do |s|
1045
+ st, et = %i[startedAtTime endedAtTime].map do |p|
1046
+ repo.query([s, RDF::Vocab::PROV[p], nil]) do |stmt|
1047
+ dt = stmt.object.object
1048
+ dt if dt.is_a? DateTime
1049
+ end.compact.sort.last
1050
+ end
1051
+ [s, st, et]
1052
+ end.sort do |a, b|
1053
+ # first check latest end-time, then check latest start-time
1054
+ c = a[2] && b[2] ? b[2] <=> a[2] : 0
1055
+ # if those two yield nothing, then sort lexically i guess
1056
+ (c == 0 && a[1] && b[1]) ? b[1] <=> a[1] : a[0] <=> b[0]
1057
+ end.first.first
1058
+ end
1059
+
1060
+ def record_application repo, transform, input, output, start, finish,
1061
+ partial: false, graph: nil, subject: nil, params: {}
1062
+ end
1063
+
1064
+ # get transform
1065
+ def get_partial_transform repo, function, params = {}
1066
+ temp = {}
1067
+ RDF::Query.new do
1068
+ pattern [:s, RDF.type, RDF::SAK::TFO.Partial]
1069
+ pattern [:s, RDF::SAK::TFO.transform, function]
1070
+ params.keys.each { |k| pattern [:s, k, nil] }
1071
+ end.execute(repo).each do |sol|
1072
+ t = temp[sol[:s]] ||= {}
1073
+ params.keys.each do |k|
1074
+ # make these a set for now cause we don't care about the
1075
+ t[k] = Set.new(repo.query([sol[:s], k, nil]).objects)
1076
+ end
1077
+ end
1078
+
1079
+ # now we imagine massaging the candidates' parameters so they
1080
+ # match the input (eg sets/arrays or whatever)
1081
+
1082
+ # (in this case the input params are made to match the retrieved params)
1083
+ newp = params.transform_values do |v|
1084
+ Set.new(v.respond_to?(:to_a) ? v.to_a : [v])
1085
+ end
1086
+
1087
+ # sort this because we want it to return the same thing every time
1088
+ # if there are multiples for some reason
1089
+ temp.keys.sort.each do |k|
1090
+ # do a cheaper comparison first
1091
+ next unless temp[k].keys.sort == params.keys.sort
1092
+ #
1093
+ return k if temp[k] == newp
1094
+ end
1095
+
1096
+ nil
1097
+ end
1098
+
1099
+ class XPath < RDF::SAK::Transform
1100
+ protected
1101
+
1102
+ def execute input, parsed = nil, params
1103
+ xpath = params.fetch(:xpath, []).first or raise
1104
+ prefix = params.fetch(:prefix, []).map do |x|
1105
+ x.value.split(/\s*:\s*/, 2)
1106
+ end.to_h.transform_keys(&:to_sym)
1107
+ reindent = (params.fetch(:reindent).first || RDF::Literal(true)).object
1108
+
1109
+ begin
1110
+ parsed ||= Nokogiri.XML input
1111
+ rescue Nokogiri::SyntaxError
1112
+ # XXX i dunno, raise?
1113
+ return
1114
+ end
1115
+
1116
+ doc = RDF::SAK::Util.subtree parsed,
1117
+ xpath.value, prefixes: prefix, reindent: reindent
1118
+
1119
+ return unless doc
1120
+
1121
+ [doc.to_xml, doc]
1122
+ end
1123
+
1124
+ public
1125
+
1126
+ def implemented?
1127
+ true
1128
+ end
1129
+ end
1130
+
1131
+ class XSLT < RDF::SAK::Transform
1132
+ protected
1133
+
1134
+ def init_implementation harness
1135
+ root = harness.root
1136
+ raise ArgumentError,
1137
+ "Need a root to initialize the implementation" unless root
1138
+ root = Pathname(root).expand_path unless root.is_a? Pathname
1139
+ raise ArgumentError, "#{root} is not a readable directory" unless
1140
+ root.directory? and root.readable?
1141
+
1142
+ # XXX this assumes this is a file URI but so far that is the
1143
+ # only way we get here
1144
+ filename = root + implementation.path
1145
+ raise ArgumentError, "#{filename} is not a readable file" unless
1146
+ filename.file? and filename.readable?
1147
+ @sheet = Nokogiri::XSLT(filename.read)
1148
+ end
1149
+
1150
+ def execute input, parsed = nil, params
1151
+ begin
1152
+ parsed ||= Nokogiri.XML input
1153
+ rescue Nokogiri::SyntaxError
1154
+ # XXX i dunno, raise?
1155
+ return
1156
+ end
1157
+
1158
+ # XXX do we wanna allow params?
1159
+ out = @sheet.transform parsed
1160
+
1161
+ # now return string and still-parsed
1162
+ [@sheet.serialize(out), out]
1163
+ end
1164
+
1165
+ public
1166
+
1167
+ def implemented?
1168
+ true
1169
+ end
1170
+
1171
+ end
1172
+ end