shex 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/AUTHORS +1 -0
- data/CREDITS +0 -0
- data/LICENSE +24 -0
- data/README.md +124 -0
- data/VERSION +1 -0
- data/etc/doap.ttl +33 -0
- data/lib/shex.rb +133 -0
- data/lib/shex/algebra.rb +39 -0
- data/lib/shex/algebra/and.rb +32 -0
- data/lib/shex/algebra/annotation.rb +6 -0
- data/lib/shex/algebra/base.rb +6 -0
- data/lib/shex/algebra/each_of.rb +53 -0
- data/lib/shex/algebra/external.rb +24 -0
- data/lib/shex/algebra/inclusion.rb +59 -0
- data/lib/shex/algebra/node_constraint.rb +148 -0
- data/lib/shex/algebra/not.rb +19 -0
- data/lib/shex/algebra/one_of.rb +50 -0
- data/lib/shex/algebra/operator.rb +274 -0
- data/lib/shex/algebra/or.rb +37 -0
- data/lib/shex/algebra/prefix.rb +6 -0
- data/lib/shex/algebra/satisfiable.rb +44 -0
- data/lib/shex/algebra/schema.rb +125 -0
- data/lib/shex/algebra/semact.rb +38 -0
- data/lib/shex/algebra/shape.rb +93 -0
- data/lib/shex/algebra/shape_ref.rb +46 -0
- data/lib/shex/algebra/start.rb +20 -0
- data/lib/shex/algebra/stem.rb +20 -0
- data/lib/shex/algebra/stem_range.rb +42 -0
- data/lib/shex/algebra/triple_constraint.rb +72 -0
- data/lib/shex/algebra/triple_expression.rb +46 -0
- data/lib/shex/algebra/unary_shape.rb +6 -0
- data/lib/shex/algebra/value.rb +28 -0
- data/lib/shex/meta.rb +7914 -0
- data/lib/shex/parser.rb +801 -0
- data/lib/shex/terminals.rb +106 -0
- metadata +224 -0
@@ -0,0 +1,37 @@
|
|
1
|
+
module ShEx::Algebra
|
2
|
+
##
|
3
|
+
class Or < Operator
|
4
|
+
include Satisfiable
|
5
|
+
NAME = :or
|
6
|
+
|
7
|
+
def initialize(*args, **options)
|
8
|
+
case
|
9
|
+
when args.length <= 1
|
10
|
+
raise ArgumentError, "wrong number of arguments (given #{args.length}, expected 1..)"
|
11
|
+
end
|
12
|
+
super
|
13
|
+
end
|
14
|
+
|
15
|
+
#
|
16
|
+
# S is a ShapeOr and there is some shape expression se2 in shapeExprs such that satisfies(n, se2, G, m).
|
17
|
+
# @param [RDF::Resource] n
|
18
|
+
# @return [Boolean] `true` if satisfied, `false` if it does not apply
|
19
|
+
# @raise [ShEx::NotSatisfied] if not satisfied
|
20
|
+
def satisfies?(n)
|
21
|
+
any_not_satisfied = false
|
22
|
+
operands.select {|o| o.is_a?(Satisfiable)}.any? do |op|
|
23
|
+
begin
|
24
|
+
op.satisfies?(n)
|
25
|
+
status "satisfied #{n}"
|
26
|
+
return true
|
27
|
+
rescue ShEx::NotSatisfied => e
|
28
|
+
log_recover("or: ignore error: #{e.message}", depth: options.fetch(:depth, 0))
|
29
|
+
any_not_satisfied = e
|
30
|
+
false
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
not_satisfied "Expected some expression to be satisfied"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'sparql/algebra'
|
2
|
+
require 'sparql/extensions'
|
3
|
+
|
4
|
+
module ShEx::Algebra
|
5
|
+
# Implements `satisfies?` and `not_satisfies?`
|
6
|
+
module Satisfiable
|
7
|
+
##
|
8
|
+
# Satisfies method
|
9
|
+
# @param [RDF::Resource] n
|
10
|
+
# @return [Boolean] `true` if satisfied, `false` if it does not apply
|
11
|
+
# @raise [ShEx::NotSatisfied] if not satisfied
|
12
|
+
# @see [https://shexspec.github.io/spec/#shape-expression-semantics]
|
13
|
+
def satisfies?(n)
|
14
|
+
raise NotImplementedError, "#satisfies? Not implemented in #{self.class}"
|
15
|
+
end
|
16
|
+
|
17
|
+
##
|
18
|
+
# Satisfies method
|
19
|
+
# @param [RDF::Resource] n
|
20
|
+
# @return [Boolean] `true` if not satisfied, `false` if it does not apply
|
21
|
+
# @raise [ShEx::NotSatisfied] if satisfied
|
22
|
+
# @see [https://shexspec.github.io/spec/#shape-expression-semantics]
|
23
|
+
def not_satisfies?(n)
|
24
|
+
begin
|
25
|
+
satisfies?(n)
|
26
|
+
rescue ShEx::NotSatisfied => e
|
27
|
+
log_recover(self.class.const_get(:NAME), "ignore error: #{e.message}", depth: options.fetch(:depth, 0))
|
28
|
+
return true # Expected it to not satisfy
|
29
|
+
end
|
30
|
+
not_satisfied "Expression should not have matched"
|
31
|
+
end
|
32
|
+
alias_method :notSatisfies?, :not_satisfies?
|
33
|
+
|
34
|
+
##
|
35
|
+
# Included TripleExpressions
|
36
|
+
# @return [Array<TripleExpressions>]
|
37
|
+
def triple_expressions
|
38
|
+
operands.select {|o| o.is_a?(Satisfiable)}.map(&:triple_expressions).flatten.uniq
|
39
|
+
end
|
40
|
+
|
41
|
+
# This operator includes Satisfiable
|
42
|
+
def satisfiable?; true; end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
module ShEx::Algebra
|
2
|
+
##
|
3
|
+
class Schema < Operator
|
4
|
+
include Satisfiable
|
5
|
+
NAME = :schema
|
6
|
+
|
7
|
+
# Graph to validate
|
8
|
+
# @return [RDF::Queryable]
|
9
|
+
attr_accessor :graph
|
10
|
+
|
11
|
+
# Map of nodes to shapes
|
12
|
+
# @return [Hash{RDF::Resource => RDF::Resource}]
|
13
|
+
attr_reader :map
|
14
|
+
|
15
|
+
##
|
16
|
+
# Match on schema. Finds appropriate shape for node, and matches that shape.
|
17
|
+
#
|
18
|
+
# @param [RDF::Resource] n
|
19
|
+
# @param [RDF::Queryable] g
|
20
|
+
# @param [Hash{RDF::Resource => RDF::Resource}] m
|
21
|
+
# @param [Array<Schema, String>] shapeExterns ([])
|
22
|
+
# One or more schemas, or paths to ShEx schema resources used for finding external shapes.
|
23
|
+
# @return [Boolean] `true` if satisfied, `false` if it does not apply
|
24
|
+
# @raise [ShEx::NotSatisfied] if not satisfied
|
25
|
+
# FIXME: set of node/shape pairs
|
26
|
+
def satisfies?(n, g, m, shapeExterns: [], **options)
|
27
|
+
@graph = g
|
28
|
+
@external_schemas = shapeExterns
|
29
|
+
# Make sure they're URIs
|
30
|
+
@map = m.inject({}) {|memo, (k,v)| memo.merge(k.to_s => v.to_s)}
|
31
|
+
|
32
|
+
# First, evaluate semantic acts
|
33
|
+
semantic_actions.all? do |op|
|
34
|
+
op.satisfies?([])
|
35
|
+
end
|
36
|
+
|
37
|
+
# Next run any start expression
|
38
|
+
if start
|
39
|
+
status("start") {"expression: #{start.to_sxp}"}
|
40
|
+
start.satisfies?(n)
|
41
|
+
end
|
42
|
+
|
43
|
+
label = @map[n.to_s]
|
44
|
+
if label && !label.empty?
|
45
|
+
shape = shapes[label]
|
46
|
+
structure_error("No shape found for #{label}") unless shape
|
47
|
+
|
48
|
+
# If `n` is a Blank Node, we won't find it through normal matching, find an equivalent node in the graph having the same label
|
49
|
+
if n.is_a?(RDF::Node)
|
50
|
+
nn = graph.enum_term.detect {|t| t.id == n.id}
|
51
|
+
n = nn if nn
|
52
|
+
end
|
53
|
+
|
54
|
+
shape.satisfies?(n)
|
55
|
+
end
|
56
|
+
status "schema satisfied"
|
57
|
+
true
|
58
|
+
end
|
59
|
+
|
60
|
+
##
|
61
|
+
# Shapes as a hash
|
62
|
+
# @return [Hash{RDF::Resource => Operator}]
|
63
|
+
def shapes
|
64
|
+
@shapes ||= begin
|
65
|
+
shapes = operands.
|
66
|
+
detect {|op| op.is_a?(Array) && op.first == :shapes}
|
67
|
+
shapes = shapes ? shapes.last : {}
|
68
|
+
shapes.inject({}) do |memo, (label, operand)|
|
69
|
+
memo.merge(label.to_s => operand)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
##
|
75
|
+
# Externally loaded schemas, lazily evaluated
|
76
|
+
# @return [Array<Schema>]
|
77
|
+
def external_schemas
|
78
|
+
@external_schemas = Array(@external_schemas).map do |extern|
|
79
|
+
schema = case extern
|
80
|
+
when Schema then extern
|
81
|
+
else
|
82
|
+
status "Load extern #{extern}"
|
83
|
+
ShEx.open(extern, logger: options[:logger])
|
84
|
+
end
|
85
|
+
schema.graph = graph
|
86
|
+
schema
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
##
|
91
|
+
# Enumerate via depth-first recursive descent over operands, yielding each operator
|
92
|
+
# @yield operator
|
93
|
+
# @yieldparam [Object] operator
|
94
|
+
# @return [Enumerator]
|
95
|
+
def each_descendant(depth = 0, &block)
|
96
|
+
if block_given?
|
97
|
+
super(depth + 1, &block)
|
98
|
+
shapes.values.each do |op|
|
99
|
+
op.each_descendant(depth + 1, &block) if op.respond_to?(:each_descendant)
|
100
|
+
|
101
|
+
case block.arity
|
102
|
+
when 1 then block.call(op)
|
103
|
+
else block.call(depth, op)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
enum_for(:each_descendant)
|
108
|
+
end
|
109
|
+
|
110
|
+
##
|
111
|
+
# Start action, if any
|
112
|
+
def start
|
113
|
+
@start ||= operands.detect {|op| op.is_a?(Start)}
|
114
|
+
end
|
115
|
+
|
116
|
+
##
|
117
|
+
# Validate shapes, in addition to other operands
|
118
|
+
# @return [SPARQL::Algebra::Expression] `self`
|
119
|
+
# @raise [ArgumentError] if the value is invalid
|
120
|
+
def validate!
|
121
|
+
shapes.values.each {|op| op.validate! if op.respond_to?(:validate!)}
|
122
|
+
super
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module ShEx::Algebra
|
2
|
+
##
|
3
|
+
class SemAct < Operator
|
4
|
+
NAME = :semact
|
5
|
+
|
6
|
+
#
|
7
|
+
# The evaluation semActsSatisfied on a list of SemActs returns success or failure. The evaluation of an individual SemAct is implementation-dependent.
|
8
|
+
# @param [Array<RDF::Statement>] statements
|
9
|
+
# @return [Boolean] `true` if satisfied, `false` if it does not apply
|
10
|
+
# @raise [ShEx::NotSatisfied] if not satisfied
|
11
|
+
def satisfies?(statements)
|
12
|
+
# FIXME: should have a registry
|
13
|
+
case operands.first.to_s
|
14
|
+
when "http://shex.io/extensions/Test/"
|
15
|
+
str = if md = /^ *(fail|print) *\( *(?:(\"(?:[^\\"]|\\")*\")|([spo])) *\) *$/.match(operands[1].to_s)
|
16
|
+
md[2] || case md[3]
|
17
|
+
when 's' then statements.first.subject
|
18
|
+
when 'p' then statements.first.predicate
|
19
|
+
when 'o' then statements.first.object
|
20
|
+
else statements.first.to_sxp
|
21
|
+
end.to_s
|
22
|
+
else
|
23
|
+
statements.empty? ? 'no statement' : statements.first.to_sxp
|
24
|
+
end
|
25
|
+
$stdout.puts str
|
26
|
+
status str
|
27
|
+
not_satisfied "fail" if md && md[1] == 'fail'
|
28
|
+
true
|
29
|
+
else
|
30
|
+
status("unknown SemAct name #{operands.first}") {"expression: #{self.to_sxp}"}
|
31
|
+
false
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Does This operator is SemAct
|
36
|
+
def semact?; true; end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
module ShEx::Algebra
|
2
|
+
##
|
3
|
+
class Shape < Operator
|
4
|
+
include Satisfiable
|
5
|
+
NAME = :shape
|
6
|
+
|
7
|
+
#
|
8
|
+
# The `satisfies` semantics for a `Shape` depend on a matches function defined below. For a node `n`, shape `S`, graph `G`, and shapeMap `m`, `satisfies(n, S, G, m)`.
|
9
|
+
# @param [RDF::Resource] n
|
10
|
+
# @return [Boolean] `true` if satisfied
|
11
|
+
# @raise [ShEx::NotSatisfied] if not satisfied
|
12
|
+
def satisfies?(n)
|
13
|
+
expression = operands.detect {|op| op.is_a?(TripleExpression)}
|
14
|
+
|
15
|
+
# neigh(G, n) is the neighbourhood of the node n in the graph G.
|
16
|
+
#
|
17
|
+
# neigh(G, n) = arcsOut(G, n) ∪ arcsIn(G, n)
|
18
|
+
arcs_in = schema.graph.query(object: n).to_a.sort_by(&:to_sxp)
|
19
|
+
arcs_out = schema.graph.query(subject: n).to_a.sort_by(&:to_sxp)
|
20
|
+
neigh = (arcs_in + arcs_out).uniq
|
21
|
+
|
22
|
+
# `matched` is the subset of statements which match `expression`.
|
23
|
+
status("arcsIn: #{arcs_in.count}, arcsOut: #{arcs_out.count}")
|
24
|
+
matched = expression ? expression.matches(neigh) : []
|
25
|
+
|
26
|
+
# `remainder` is the set of unmatched statements
|
27
|
+
remainder = neigh - matched
|
28
|
+
|
29
|
+
# Let `outs` be the `arcsOut` in `remainder`: `outs = remainder ∩ arcsOut(G, n)`.
|
30
|
+
outs = remainder.select {|s| s.subject == n}
|
31
|
+
|
32
|
+
# Let `matchables` be the triples in `outs` whose predicate appears in a `TripleConstraint` in `expression`. If `expression` is absent, `matchables = Ø` (the empty set).
|
33
|
+
predicates = expression ? expression.triple_constraints.map(&:predicate).uniq : []
|
34
|
+
matchables = outs.select {|s| predicates.include?(s.predicate)}
|
35
|
+
|
36
|
+
# No matchable can be matched by any TripleConstraint in expression
|
37
|
+
matchables.each do |statement|
|
38
|
+
expression.triple_constraints.each do |expr|
|
39
|
+
begin
|
40
|
+
status "check matchable #{statement.to_sxp} against #{expr.to_sxp}"
|
41
|
+
if statement.predicate == expr.predicate && expr.matches([statement])
|
42
|
+
not_satisfied "Unmatched statement: #{statement.to_sxp} matched #{expr.to_sxp}"
|
43
|
+
end
|
44
|
+
rescue NotMatched
|
45
|
+
logger.recovering = false
|
46
|
+
# Expected not to match
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end if expression
|
50
|
+
|
51
|
+
# There is no triple in `matchables` which matches a `TripleConstraint` in `expression`.
|
52
|
+
# FIXME: Really run against every TripleConstraint?
|
53
|
+
|
54
|
+
# Let `unmatchables` be the triples in `outs` which are not in `matchables`.
|
55
|
+
unmatchables = outs - matchables
|
56
|
+
|
57
|
+
# There is no triple in matchables whose predicate does not appear in extra.
|
58
|
+
matchables.each do |statement|
|
59
|
+
not_satisfied "Statement remains with predicate #{statement.predicate} not in extra" unless extra.include?(statement.predicate)
|
60
|
+
end
|
61
|
+
|
62
|
+
# closed is false or unmatchables is empty.
|
63
|
+
not_satisfied "Unmatchables remain on a closed shape" unless !closed? || unmatchables.empty?
|
64
|
+
|
65
|
+
# Presumably, to be satisfied, there must be some triples in matches
|
66
|
+
|
67
|
+
semantic_actions.all? do |op|
|
68
|
+
# FIXME: what triples to run against satisfies?
|
69
|
+
op.satisfies?(matched)
|
70
|
+
end unless matched.empty?
|
71
|
+
|
72
|
+
true
|
73
|
+
rescue NotMatched => e
|
74
|
+
logger.recovering = false
|
75
|
+
not_satisfied e.message
|
76
|
+
end
|
77
|
+
|
78
|
+
##
|
79
|
+
# Included TripleExpressions
|
80
|
+
# @return [Array<TripleExpressions>]
|
81
|
+
def triple_expressions
|
82
|
+
operands.select {|op| op.is_a?(TripleExpression)}
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
# There may be multiple extra operands
|
87
|
+
def extra
|
88
|
+
operands.select {|op| op.is_a?(Array) && op.first == :extra}.inject([]) do |memo, ary|
|
89
|
+
memo + Array(ary[1..-1])
|
90
|
+
end.uniq
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module ShEx::Algebra
|
2
|
+
##
|
3
|
+
class ShapeRef < Operator::Unary
|
4
|
+
include Satisfiable
|
5
|
+
NAME = :shapeRef
|
6
|
+
|
7
|
+
def initialize(arg, **options)
|
8
|
+
structure_error("Shape reference must be an IRI or BNode: #{arg}", exception: ArgumentError) unless arg.is_a?(RDF::Resource)
|
9
|
+
super
|
10
|
+
end
|
11
|
+
|
12
|
+
##
|
13
|
+
# Satisfies method
|
14
|
+
# @param [RDF::Resource] n
|
15
|
+
# @return [Boolean] `true` if satisfied
|
16
|
+
# @raise [ShEx::NotSatisfied] if not satisfied
|
17
|
+
# @see [https://shexspec.github.io/spec/#shape-expression-semantics]
|
18
|
+
def satisfies?(n)
|
19
|
+
status "ref #{operands.first.to_s}"
|
20
|
+
referenced_shape.satisfies?(n)
|
21
|
+
status "ref satisfied"
|
22
|
+
true
|
23
|
+
rescue ShEx::NotSatisfied => e
|
24
|
+
not_satisfied e.message
|
25
|
+
raise
|
26
|
+
end
|
27
|
+
|
28
|
+
##
|
29
|
+
# Returns the referenced shape
|
30
|
+
#
|
31
|
+
# @return [Shape]
|
32
|
+
def referenced_shape
|
33
|
+
schema.shapes[operands.first.to_s]
|
34
|
+
end
|
35
|
+
|
36
|
+
##
|
37
|
+
# A ShapeRef is valid if it's ancestor schema has any shape with a lable
|
38
|
+
# the same as it's reference.
|
39
|
+
def validate!
|
40
|
+
structure_error("Missing referenced shape: #{operands.first}") if referenced_shape.nil?
|
41
|
+
# FIXME
|
42
|
+
#raise ShEx::ParseError, "Self referencing shape: #{operands.first}" if referenced_shape == first_ancestor(Shape)
|
43
|
+
super
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module ShEx::Algebra
|
2
|
+
##
|
3
|
+
class Start < Operator::Unary
|
4
|
+
NAME = :start
|
5
|
+
|
6
|
+
#
|
7
|
+
# @param [RDF::Resource] n
|
8
|
+
# @return [Boolean] `true` if satisfied
|
9
|
+
# @raise [ShEx::NotSatisfied] if not satisfied
|
10
|
+
def satisfies?(n)
|
11
|
+
status ""
|
12
|
+
operands.first.satisfies?(n)
|
13
|
+
status("satisfied")
|
14
|
+
true
|
15
|
+
rescue ShEx::NotSatisfied => e
|
16
|
+
not_satisfied e.message
|
17
|
+
raise
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module ShEx::Algebra
|
2
|
+
##
|
3
|
+
class Stem < Operator::Unary
|
4
|
+
NAME = :stem
|
5
|
+
|
6
|
+
##
|
7
|
+
# For a node n and constraint value v, nodeSatisfies(n, v) if n matches some valueSetValue vsv in v. A term matches a valueSetValue if:
|
8
|
+
#
|
9
|
+
# * vsv is a Stem with stem st and nodeIn(n, st).
|
10
|
+
def match?(value)
|
11
|
+
if value.start_with?(operands.first)
|
12
|
+
status "matched #{value}"
|
13
|
+
true
|
14
|
+
else
|
15
|
+
status "not matched #{value}"
|
16
|
+
false
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module ShEx::Algebra
|
2
|
+
##
|
3
|
+
class StemRange < Operator::Binary
|
4
|
+
NAME = :stemRange
|
5
|
+
|
6
|
+
##
|
7
|
+
# For a node n and constraint value v, nodeSatisfies(n, v) if n matches some valueSetValue vsv in v. A term matches a valueSetValue if:
|
8
|
+
#
|
9
|
+
# * vsv is a StemRange with stem st and exclusions excls and nodeIn(n, st) and there is no x in excls such that nodeIn(n, excl).
|
10
|
+
# * vsv is a Wildcard with exclusions excls and there is no x in excls such that nodeIn(n, excl).
|
11
|
+
def match?(value)
|
12
|
+
initial_match = case operands.first
|
13
|
+
when :wildcard then true
|
14
|
+
when RDF::Value then value.start_with?(operands.first)
|
15
|
+
else false
|
16
|
+
end
|
17
|
+
|
18
|
+
unless initial_match
|
19
|
+
status "#{value} does not match #{operands.first}"
|
20
|
+
return false
|
21
|
+
end
|
22
|
+
|
23
|
+
if exclusions.any? do |exclusion|
|
24
|
+
case exclusion
|
25
|
+
when RDF::Value then value == exclusion
|
26
|
+
when Stem then exclusion.match?(value)
|
27
|
+
else false
|
28
|
+
end
|
29
|
+
end
|
30
|
+
status "#{value} excluded"
|
31
|
+
return false
|
32
|
+
end
|
33
|
+
|
34
|
+
status "matched #{value}"
|
35
|
+
true
|
36
|
+
end
|
37
|
+
|
38
|
+
def exclusions
|
39
|
+
(operands.last.is_a?(Array) && operands.last.first == :exclusions) ? operands.last[1..-1] : []
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|