shex 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/AUTHORS +1 -0
- data/CREDITS +0 -0
- data/LICENSE +24 -0
- data/README.md +124 -0
- data/VERSION +1 -0
- data/etc/doap.ttl +33 -0
- data/lib/shex.rb +133 -0
- data/lib/shex/algebra.rb +39 -0
- data/lib/shex/algebra/and.rb +32 -0
- data/lib/shex/algebra/annotation.rb +6 -0
- data/lib/shex/algebra/base.rb +6 -0
- data/lib/shex/algebra/each_of.rb +53 -0
- data/lib/shex/algebra/external.rb +24 -0
- data/lib/shex/algebra/inclusion.rb +59 -0
- data/lib/shex/algebra/node_constraint.rb +148 -0
- data/lib/shex/algebra/not.rb +19 -0
- data/lib/shex/algebra/one_of.rb +50 -0
- data/lib/shex/algebra/operator.rb +274 -0
- data/lib/shex/algebra/or.rb +37 -0
- data/lib/shex/algebra/prefix.rb +6 -0
- data/lib/shex/algebra/satisfiable.rb +44 -0
- data/lib/shex/algebra/schema.rb +125 -0
- data/lib/shex/algebra/semact.rb +38 -0
- data/lib/shex/algebra/shape.rb +93 -0
- data/lib/shex/algebra/shape_ref.rb +46 -0
- data/lib/shex/algebra/start.rb +20 -0
- data/lib/shex/algebra/stem.rb +20 -0
- data/lib/shex/algebra/stem_range.rb +42 -0
- data/lib/shex/algebra/triple_constraint.rb +72 -0
- data/lib/shex/algebra/triple_expression.rb +46 -0
- data/lib/shex/algebra/unary_shape.rb +6 -0
- data/lib/shex/algebra/value.rb +28 -0
- data/lib/shex/meta.rb +7914 -0
- data/lib/shex/parser.rb +801 -0
- data/lib/shex/terminals.rb +106 -0
- metadata +224 -0
data/lib/shex/parser.rb
ADDED
@@ -0,0 +1,801 @@
|
|
1
|
+
require 'ebnf'
|
2
|
+
require 'ebnf/ll1/parser'
|
3
|
+
require 'shex/meta'
|
4
|
+
|
5
|
+
module ShEx
|
6
|
+
##
|
7
|
+
# A parser for the ShEx grammar.
|
8
|
+
#
|
9
|
+
# @see https://www.w3.org/2005/01/yacker/uploads/ShEx3?lang=perl&markup=html#productions
|
10
|
+
# @see http://en.wikipedia.org/wiki/LR_parser
|
11
|
+
class Parser
|
12
|
+
include ShEx::Meta
|
13
|
+
include ShEx::Terminals
|
14
|
+
include EBNF::LL1::Parser
|
15
|
+
|
16
|
+
##
|
17
|
+
# Any additional options for the parser.
|
18
|
+
#
|
19
|
+
# @return [Hash]
|
20
|
+
attr_reader :options
|
21
|
+
|
22
|
+
##
|
23
|
+
# The current input string being processed.
|
24
|
+
#
|
25
|
+
# @return [String]
|
26
|
+
attr_accessor :input
|
27
|
+
|
28
|
+
##
|
29
|
+
# The current input tokens being processed.
|
30
|
+
#
|
31
|
+
# @return [Array<Token>]
|
32
|
+
attr_reader :tokens
|
33
|
+
|
34
|
+
##
|
35
|
+
# The internal representation of the result using hierarchy of RDF objects and ShEx::Operator
|
36
|
+
# objects.
|
37
|
+
# @return [Array]
|
38
|
+
# @see http://sparql.rubyforge.org/algebra
|
39
|
+
attr_accessor :result
|
40
|
+
|
41
|
+
# Terminals passed to lexer. Order matters!
|
42
|
+
terminal(:CODE, CODE, unescape: true) do |prod, token, input|
|
43
|
+
# { foo %}
|
44
|
+
# Keep surrounding whitespace for now
|
45
|
+
input[:code] = token.value[1..-2].sub(/%\s*$/, '') # Drop {} and %
|
46
|
+
end
|
47
|
+
terminal(:REPEAT_RANGE, REPEAT_RANGE) do |prod, token, input|
|
48
|
+
card = token.value[1..-2].split(',').map {|v| v =~ /^\d+$/ ? v.to_i : v}
|
49
|
+
card[1] = token.value.include?(',') ? '*' : card[0] if card.length == 1
|
50
|
+
input[:cardinality] = {min: card[0], max: card[1]}
|
51
|
+
end
|
52
|
+
terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL) do |prod, token, input|
|
53
|
+
input[:blankNode] = bnode(token.value[2..-1])
|
54
|
+
end
|
55
|
+
terminal(:IRIREF, IRIREF, unescape: true) do |prod, token, input|
|
56
|
+
begin
|
57
|
+
input[:iri] = iri(token.value[1..-2])
|
58
|
+
rescue ArgumentError => e
|
59
|
+
raise Error, e.message
|
60
|
+
end
|
61
|
+
end
|
62
|
+
terminal(:DOUBLE, DOUBLE) do |prod, token, input|
|
63
|
+
# Note that a Turtle Double may begin with a '.[eE]', so tack on a leading
|
64
|
+
# zero if necessary
|
65
|
+
value = token.value.sub(/\.([eE])/, '.0\1')
|
66
|
+
input[:literal] = literal(value, datatype: RDF::XSD.double)
|
67
|
+
end
|
68
|
+
terminal(:DECIMAL, DECIMAL) do |prod, token, input|
|
69
|
+
# Note that a Turtle Decimal may begin with a '.', so tack on a leading
|
70
|
+
# zero if necessary
|
71
|
+
value = token.value
|
72
|
+
#value = "0#{token.value}" if token.value[0,1] == "."
|
73
|
+
input[:literal] = literal(value, datatype: RDF::XSD.decimal)
|
74
|
+
end
|
75
|
+
terminal(:INTEGER, INTEGER) do |prod, token, input|
|
76
|
+
input[:literal] = literal(token.value, datatype: RDF::XSD.integer)
|
77
|
+
end
|
78
|
+
terminal(:PNAME_LN, PNAME_LN, unescape: true) do |prod, token, input|
|
79
|
+
prefix, suffix = token.value.split(":", 2)
|
80
|
+
input[:iri] = ns(prefix, suffix)
|
81
|
+
error(nil, "Compact IRI missing prefix definition: #{token.value}", production: :PNAME_LN) unless input[:iri].absolute?
|
82
|
+
end
|
83
|
+
terminal(:PNAME_NS, PNAME_NS) do |prod, token, input|
|
84
|
+
prefix = token.value[0..-2]
|
85
|
+
|
86
|
+
input[:iri] = ns(prefix, nil)
|
87
|
+
input[:prefix] = prefix && prefix.to_sym
|
88
|
+
end
|
89
|
+
terminal(:ATPNAME_LN, ATPNAME_LN, unescape: true) do |prod, token, input|
|
90
|
+
prefix, suffix = token.value.split(":", 2)
|
91
|
+
prefix.sub!(/^@#{WS}*/, '')
|
92
|
+
input[:shapeLabel] = ns(prefix, suffix)
|
93
|
+
error(nil, "Compact IRI missing prefix definition: #{token.value}", production: :ATPNAME_LN) unless input[:shapeLabel].absolute?
|
94
|
+
end
|
95
|
+
terminal(:ATPNAME_NS, ATPNAME_NS) do |prod, token, input|
|
96
|
+
prefix = token.value[0..-2]
|
97
|
+
prefix.sub!(/^@\s*/, '')
|
98
|
+
|
99
|
+
input[:shapeLabel] = ns(prefix, nil)
|
100
|
+
end
|
101
|
+
terminal(:LANGTAG, LANGTAG) do |prod, token, input|
|
102
|
+
input[:language] = token.value[1..-1]
|
103
|
+
end
|
104
|
+
terminal(:STRING_LITERAL_LONG1, STRING_LITERAL_LONG1, unescape: true) do |prod, token, input|
|
105
|
+
input[:string] = token.value[3..-4]
|
106
|
+
end
|
107
|
+
terminal(:STRING_LITERAL_LONG2, STRING_LITERAL_LONG2, unescape: true) do |prod, token, input|
|
108
|
+
input[:string] = token.value[3..-4]
|
109
|
+
end
|
110
|
+
terminal(:STRING_LITERAL1, STRING_LITERAL1, unescape: true) do |prod, token, input|
|
111
|
+
input[:string] = token.value[1..-2]
|
112
|
+
end
|
113
|
+
terminal(:STRING_LITERAL2, STRING_LITERAL2, unescape: true) do |prod, token, input|
|
114
|
+
input[:string] = token.value[1..-2]
|
115
|
+
end
|
116
|
+
terminal(:RDF_TYPE, RDF_TYPE) do |prod, token, input|
|
117
|
+
input[:iri] = (a = RDF.type.dup; a.lexical = 'a'; a)
|
118
|
+
end
|
119
|
+
|
120
|
+
# String terminals
|
121
|
+
terminal(nil, STR_EXPR, map: STR_MAP) do |prod, token, input|
|
122
|
+
case token.value
|
123
|
+
when '*' then input[:cardinality] = {min: 0, max: "*"}
|
124
|
+
when '+' then input[:cardinality] = {min: 1, max: "*"}
|
125
|
+
when '?' then input[:cardinality] = {min: 0, max: 1}
|
126
|
+
when '!' then input[:not] = token.value
|
127
|
+
when '^' then input[:inverse] = token.value
|
128
|
+
when '.' then input[:dot] = token.value
|
129
|
+
when 'true', 'false' then input[:literal] = RDF::Literal::Boolean.new(token.value)
|
130
|
+
when '~' then input[:pattern] = token.value
|
131
|
+
when 'BNODE', 'IRI',
|
132
|
+
'NONLITERAL' then input[:nonLiteralKind] = token.value.downcase.to_sym
|
133
|
+
when 'CLOSED' then input[:closed] = token.value.downcase.to_sym
|
134
|
+
when 'EXTERNAL' then input[:external] = token.value.downcase.to_sym
|
135
|
+
when 'FRACTIONDIGITS',
|
136
|
+
'TOTALDIGITS' then input[:numericLength] = token.value.downcase.to_sym
|
137
|
+
when 'LITERAL' then input[:shapeAtomLiteral] = token.value.downcase.to_sym
|
138
|
+
when 'LENGTH',
|
139
|
+
'MINLENGTH',
|
140
|
+
'MAXLENGTH' then input[:stringLength] = token.value.downcase.to_sym
|
141
|
+
when 'MININCLUSIVE',
|
142
|
+
'MINEXCLUSIVE',
|
143
|
+
'MAXINCLUSIVE',
|
144
|
+
'MAXEXCLUSIVE' then input[:numericRange] = token.value.downcase.to_sym
|
145
|
+
when 'NOT' then input[:not] = token.value.downcase.to_sym
|
146
|
+
when 'PATTERN' then input[:pattern] = token.value.downcase.to_sym
|
147
|
+
when 'START' then input[:start] = token.value.downcase.to_sym
|
148
|
+
else
|
149
|
+
#raise "Unexpected MC terminal: #{token.inspect}"
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
# Productions
|
154
|
+
# [1] shexDoc ::= directive* ((notStartAction | startActions) statement*)?
|
155
|
+
production(:shexDoc) do |input, data, callback|
|
156
|
+
data[:start] = data[:start] if data[:start]
|
157
|
+
|
158
|
+
expressions = []
|
159
|
+
expressions << [:base, data[:baseDecl]] if data[:baseDecl]
|
160
|
+
expressions << [:prefix, data[:prefixDecl]] if data[:prefixDecl]
|
161
|
+
expressions += Array(data[:codeDecl])
|
162
|
+
expressions << Algebra::Start.new(data[:start]) if data[:start]
|
163
|
+
expressions << [:shapes, data[:shapes]] if data[:shapes]
|
164
|
+
|
165
|
+
input[:schema] = Algebra::Schema.new(*expressions, options)
|
166
|
+
|
167
|
+
# Set schema accessor for all included expressions
|
168
|
+
input[:schema].each_descendant do |op|
|
169
|
+
op.schema = input[:schema] if op.respond_to?(:schema=)
|
170
|
+
end
|
171
|
+
self
|
172
|
+
end
|
173
|
+
|
174
|
+
# [2] directive ::= baseDecl | prefixDecl
|
175
|
+
|
176
|
+
# [3] baseDecl ::= "BASE" IRIREF
|
177
|
+
production(:baseDecl) do |input, data, callback|
|
178
|
+
input[:baseDecl] = self.base_uri = iri(data[:iri])
|
179
|
+
end
|
180
|
+
|
181
|
+
# [4] prefixDecl ::= "PREFIX" PNAME_NS IRIREF
|
182
|
+
production(:prefixDecl) do |input, data, callback|
|
183
|
+
pfx = data[:prefix]
|
184
|
+
self.prefix(pfx, data[:iri])
|
185
|
+
(input[:prefixDecl] ||= []) << [pfx.to_s, data[:iri]]
|
186
|
+
end
|
187
|
+
|
188
|
+
# [5] notStartAction ::= start | shapeExprDecl
|
189
|
+
# [6] start ::= "start" '=' shapeExpression
|
190
|
+
production(:start) do |input, data, callback|
|
191
|
+
input[:start] = data[:shapeExpression]
|
192
|
+
end
|
193
|
+
# [7] startActions ::= codeDecl+
|
194
|
+
|
195
|
+
# [8] statement ::= directive | notStartAction
|
196
|
+
|
197
|
+
# [9] shapeExprDecl ::= shapeLabel (shapeExpression|"EXTERNAL")
|
198
|
+
production(:shapeExprDecl) do |input, data, callback|
|
199
|
+
label = Array(data[:shapeLabel]).first
|
200
|
+
expression = case data[:shapeExpression]
|
201
|
+
when Algebra::NodeConstraint, Algebra::Or, Algebra::And, Algebra::Not, Algebra::ShapeRef, Algebra::Shape
|
202
|
+
data[:shapeExpression]
|
203
|
+
else
|
204
|
+
data[:external] ? Algebra::External.new() : Algebra::Shape.new()
|
205
|
+
end
|
206
|
+
|
207
|
+
(input[:shapes] ||= {})[label] = expression
|
208
|
+
end
|
209
|
+
|
210
|
+
# [10] shapeExpression ::= shapeOr
|
211
|
+
# [11] inlineShapeExpression ::= inlineShapeOr
|
212
|
+
|
213
|
+
# [12] shapeOr ::= shapeAnd ("OR" shapeAnd)*
|
214
|
+
production(:shapeOr) do |input, data, callback|
|
215
|
+
shape_or(input, data)
|
216
|
+
end
|
217
|
+
# [13] inlineShapeOr ::= inlineShapeAnd ("OR" inlineShapeAnd)*
|
218
|
+
production(:inlineShapeOr) do |input, data, callback|
|
219
|
+
shape_or(input, data)
|
220
|
+
end
|
221
|
+
def shape_or(input, data)
|
222
|
+
input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
|
223
|
+
expression = if Array(data[:shapeExpression]).length > 1
|
224
|
+
Algebra::Or.new(*data[:shapeExpression])
|
225
|
+
else
|
226
|
+
Array(data[:shapeExpression]).first
|
227
|
+
end
|
228
|
+
input[:shapeExpression] = expression if expression
|
229
|
+
rescue ArgumentError => e
|
230
|
+
error(nil, "Argument Error on OR: #{e.message}")
|
231
|
+
end
|
232
|
+
private :shape_or
|
233
|
+
|
234
|
+
# [14] shapeAnd ::= shapeNot ("AND" shapeNot)*
|
235
|
+
production(:shapeAnd) do |input, data, callback|
|
236
|
+
shape_and(input, data)
|
237
|
+
end
|
238
|
+
# [15] inlineShapeAnd ::= inlineShapeNot ("AND" inlineShapeNot)*
|
239
|
+
production(:inlineShapeAnd) do |input, data, callback|
|
240
|
+
shape_and(input, data)
|
241
|
+
end
|
242
|
+
def shape_and(input, data)
|
243
|
+
input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
|
244
|
+
expressions = Array(data[:shapeExpression]).inject([]) do |memo, expr|
|
245
|
+
memo.concat(expr.is_a?(Algebra::And) ? expr.operands : [expr])
|
246
|
+
end
|
247
|
+
expression = if expressions.length > 1
|
248
|
+
Algebra::And.new(*expressions)
|
249
|
+
else
|
250
|
+
expressions.first
|
251
|
+
end
|
252
|
+
(input[:shapeExpression] ||= []) << expression if expression
|
253
|
+
rescue ArgumentError => e
|
254
|
+
error(nil, "Argument Error on AND: #{e.message}")
|
255
|
+
end
|
256
|
+
private :shape_and
|
257
|
+
|
258
|
+
# [16] shapeNot ::= "NOT"? shapeAtom
|
259
|
+
production(:shapeNot) do |input, data, callback|
|
260
|
+
shape_not(input, data)
|
261
|
+
end
|
262
|
+
# [17] inlineShapeNot ::= "NOT"? inlineShapeAtom
|
263
|
+
production(:inlineShapeNot) do |input, data, callback|
|
264
|
+
shape_not(input, data)
|
265
|
+
end
|
266
|
+
def shape_not(input, data)
|
267
|
+
input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
|
268
|
+
expression = data[:shapeExpression]
|
269
|
+
expression = Algebra::Not.new(expression) if data[:not]
|
270
|
+
#error(nil, "Expected an atom for NOT") unless expression
|
271
|
+
(input[:shapeExpression] ||= []) << expression if expression
|
272
|
+
end
|
273
|
+
private :shape_not
|
274
|
+
|
275
|
+
# [18] shapeAtom ::= nodeConstraint shapeOrRef?
|
276
|
+
# | shapeOrRef
|
277
|
+
# | "(" shapeExpression ")"
|
278
|
+
# | '.' # no constraint
|
279
|
+
production(:shapeAtom) do |input, data, callback|
|
280
|
+
shape_atom(input, data)
|
281
|
+
end
|
282
|
+
# [19] inlineShapeAtom ::= nodeConstraint inlineShapeOrRef?
|
283
|
+
# | inlineShapeOrRef nodeConstraint?
|
284
|
+
# | "(" shapeExpression ")"
|
285
|
+
# | '.' # no constraint
|
286
|
+
production(:inlineShapeAtom) do |input, data, callback|
|
287
|
+
shape_atom(input, data)
|
288
|
+
end
|
289
|
+
def shape_atom(input, data)
|
290
|
+
constraint = data[:nodeConstraint]
|
291
|
+
shape = data[:shapeOrRef] || data[:shapeExpression]
|
292
|
+
input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
|
293
|
+
|
294
|
+
expression = [constraint, shape].compact
|
295
|
+
expression = case expression.length
|
296
|
+
when 0 then nil
|
297
|
+
when 1
|
298
|
+
expression.first
|
299
|
+
else Algebra::And.new(*expression)
|
300
|
+
end
|
301
|
+
|
302
|
+
input[:shapeExpression] = expression if expression
|
303
|
+
end
|
304
|
+
private :shape_atom
|
305
|
+
|
306
|
+
# [20] shapeOrRef ::= ATPNAME_LN | ATPNAME_NS | '@' shapeLabel | shapeDefinition
|
307
|
+
production(:shapeOrRef) do |input, data, callback|
|
308
|
+
shape_or_ref(input, data)
|
309
|
+
end
|
310
|
+
# [21] inlineShapeOrRef ::= ATPNAME_LN | ATPNAME_NS | '@' shapeLabel | inlineShapeDefinition
|
311
|
+
production(:inlineShapeOrRef) do |input, data, callback|
|
312
|
+
shape_or_ref(input, data)
|
313
|
+
end
|
314
|
+
def shape_or_ref(input, data)
|
315
|
+
input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
|
316
|
+
if data[:shape] || Array(data[:shapeLabel]).first
|
317
|
+
input[:shapeOrRef] = data[:shape] || Algebra::ShapeRef.new(Array(data[:shapeLabel]).first)
|
318
|
+
end
|
319
|
+
rescue ArgumentError => e
|
320
|
+
error(nil, "Argument Error on ShapeOrRef: #{e.message}")
|
321
|
+
end
|
322
|
+
private :shape_or_ref
|
323
|
+
|
324
|
+
# [22] nodeConstraint ::= "LITERAL" xsFacet*
|
325
|
+
# | nonLiteralKind stringFacet*
|
326
|
+
# | datatype xsFacet*
|
327
|
+
# | valueSet xsFacet*
|
328
|
+
# | xsFacet+
|
329
|
+
production(:nodeConstraint) do |input, data, callback|
|
330
|
+
# Semantic validate (A Syntax error)
|
331
|
+
case
|
332
|
+
when data[:datatype] && data[:numericFacet]
|
333
|
+
# Can only use a numeric facet on a numeric datatype
|
334
|
+
l = RDF::Literal.new("1", datatype: data[:datatype])
|
335
|
+
error(nil, "Numeric facet used on non-numeric datatype: #{data[:datatype]}", production: :nodeConstraint) unless l.is_a?(RDF::Literal::Numeric)
|
336
|
+
end
|
337
|
+
|
338
|
+
attrs = []
|
339
|
+
attrs += [:datatype, data[:datatype]] if data [:datatype]
|
340
|
+
attrs += [data[:shapeAtomLiteral], data[:nonLiteralKind]]
|
341
|
+
attrs += Array(data[:valueSetValue])
|
342
|
+
attrs += Array(data[:numericFacet])
|
343
|
+
attrs += Array(data[:stringFacet])
|
344
|
+
|
345
|
+
input[:nodeConstraint] = Algebra::NodeConstraint.new(*attrs.compact)
|
346
|
+
end
|
347
|
+
|
348
|
+
# [23] nonLiteralKind ::= "IRI" | "BNODE" | "NONLITERAL"
|
349
|
+
|
350
|
+
# [24] xsFacet ::= stringFacet | numericFacet
|
351
|
+
# [25] stringFacet ::= stringLength INTEGER
|
352
|
+
# | "PATTERN" string
|
353
|
+
# | '~' string # shortcut for "PATTERN"
|
354
|
+
production(:stringFacet) do |input, data, callback|
|
355
|
+
input[:stringFacet] ||= []
|
356
|
+
input[:stringFacet] << if data[:stringLength]
|
357
|
+
if input[:stringFacet].flatten.include?(data[:stringLength])
|
358
|
+
error(nil, "#{data[:stringLength]} constraint may only be used once in a Node Constraint", production: :stringFacet)
|
359
|
+
end
|
360
|
+
[data[:stringLength], data[:literal]]
|
361
|
+
elsif data[:pattern]
|
362
|
+
[:pattern, data[:string]]
|
363
|
+
end
|
364
|
+
end
|
365
|
+
|
366
|
+
# [26] stringLength ::= "LENGTH" | "MINLENGTH" | "MAXLENGTH"
|
367
|
+
|
368
|
+
# [27] numericFacet ::= numericRange (numericLiteral | string '^^' datatype )
|
369
|
+
# | numericLength INTEGER
|
370
|
+
production(:numericFacet) do |input, data, callback|
|
371
|
+
input[:numericFacet] ||= []
|
372
|
+
input[:numericFacet] << if data[:numericRange]
|
373
|
+
literal = data[:literal] || literal(data[:string], datatype: data[:datatype])
|
374
|
+
error(nil, "numericRange must use a numeric datatype: #{data[:datatype]}", production: :numericFacet) unless literal.is_a?(RDF::Literal::Numeric)
|
375
|
+
[data[:numericRange], literal]
|
376
|
+
elsif data[:numericLength]
|
377
|
+
[data[:numericLength], data[:literal]]
|
378
|
+
end
|
379
|
+
end
|
380
|
+
|
381
|
+
# [28] numericRange ::= "MININCLUSIVE" | "MINEXCLUSIVE" | "MAXINCLUSIVE" | "MAXEXCLUSIVE"
|
382
|
+
# [29] numericLength ::= "TOTALDIGITS" | "FRACTIONDIGITS"
|
383
|
+
|
384
|
+
# [30] shapeDefinition ::= (extraPropertySet | "CLOSED")* '{' tripleExpression? '}' annotation* semanticActions
|
385
|
+
production(:shapeDefinition) do |input, data, callback|
|
386
|
+
shape_definition(input, data)
|
387
|
+
end
|
388
|
+
# [31] inlineShapeDefinition ::= (extraPropertySet | "CLOSED")* '{' tripleExpression? '}'
|
389
|
+
production(:inlineShapeDefinition) do |input, data, callback|
|
390
|
+
shape_definition(input, data)
|
391
|
+
end
|
392
|
+
def shape_definition(input, data)
|
393
|
+
expression = data[:tripleExpression]
|
394
|
+
attrs = Array(data[:extraPropertySet])
|
395
|
+
attrs << :closed if data[:closed]
|
396
|
+
attrs += Array(data[:annotation])
|
397
|
+
attrs += Array(data[:codeDecl])
|
398
|
+
|
399
|
+
input[:shape] = Algebra::Shape.new(expression, *attrs) if expression
|
400
|
+
end
|
401
|
+
private :shape_definition
|
402
|
+
|
403
|
+
# [32] extraPropertySet ::= "EXTRA" predicate+
|
404
|
+
production(:extraPropertySet) do |input, data, callback|
|
405
|
+
(input[:extraPropertySet] ||= []) << data[:predicate].unshift(:extra)
|
406
|
+
end
|
407
|
+
|
408
|
+
# [33] tripleExpression ::= oneOfTripleExpr
|
409
|
+
# [34] oneOfTripleExpr ::= groupTripleExpr ('|' groupTripleExpr)*
|
410
|
+
production(:oneOfTripleExpr) do |input, data, callback|
|
411
|
+
expression = if Array(data[:tripleExpression]).length > 1
|
412
|
+
Algebra::OneOf.new(*data[:tripleExpression])
|
413
|
+
else
|
414
|
+
Array(data[:tripleExpression]).first
|
415
|
+
end
|
416
|
+
input[:tripleExpression] = expression if expression
|
417
|
+
end
|
418
|
+
|
419
|
+
# [37] groupTripleExpr ::= unaryTripleExpr (';' unaryTripleExpr?)*
|
420
|
+
production(:groupTripleExpr) do |input, data, callback|
|
421
|
+
expression = if Array(data[:tripleExpression]).length > 1
|
422
|
+
Algebra::EachOf.new(*data[:tripleExpression])
|
423
|
+
else
|
424
|
+
Array(data[:tripleExpression]).first
|
425
|
+
end
|
426
|
+
(input[:tripleExpression] ||= []) << expression if expression
|
427
|
+
end
|
428
|
+
|
429
|
+
# [40] unaryTripleExpr ::= productionLabel? (tripleConstraint | bracketedTripleExpr) | include
|
430
|
+
production(:unaryTripleExpr) do |input, data, callback|
|
431
|
+
expression = data[:tripleExpression]
|
432
|
+
expression.operands << data[:productionLabel] if expression && data[:productionLabel]
|
433
|
+
|
434
|
+
(input[:tripleExpression] ||= []) << expression if expression
|
435
|
+
end
|
436
|
+
|
437
|
+
# [41] bracketedTripleExpr ::= '(' oneOfTripleExpr ')' cardinality? annotation* semanticActions
|
438
|
+
production(:bracketedTripleExpr) do |input, data, callback|
|
439
|
+
# XXX cardinality? annotation* semanticActions
|
440
|
+
case expression = data[:tripleExpression]
|
441
|
+
when Algebra::OneOf, Algebra::EachOf
|
442
|
+
else
|
443
|
+
error(nil, "Bracketed Expression requires multiple contained expressions", production: :bracketedTripleExpr)
|
444
|
+
end
|
445
|
+
cardinality = data.fetch(:cardinality, {})
|
446
|
+
attrs = [
|
447
|
+
([:min, cardinality[:min]] if cardinality[:min]),
|
448
|
+
([:max, cardinality[:max]] if cardinality[:max])
|
449
|
+
].compact
|
450
|
+
attrs += Array(data[:codeDecl])
|
451
|
+
attrs += Array(data[:annotation])
|
452
|
+
|
453
|
+
expression.operands.concat(attrs)
|
454
|
+
input[:tripleExpression] = expression
|
455
|
+
end
|
456
|
+
|
457
|
+
# [42] productionLabel ::= '$' (iri | blankNode)
|
458
|
+
production(:productionLabel) do |input, data, callback|
|
459
|
+
input[:productionLabel] = data[:iri] || data[:blankNode]
|
460
|
+
end
|
461
|
+
|
462
|
+
# [43] tripleConstraint ::= senseFlags? predicate shapeExpression cardinality? annotation* semanticActions
|
463
|
+
production(:tripleConstraint) do |input, data, callback|
|
464
|
+
cardinality = data.fetch(:cardinality, {})
|
465
|
+
attrs = [
|
466
|
+
(:inverse if data[:inverse] || data[:not]),
|
467
|
+
Array(data[:predicate]).first, # predicate
|
468
|
+
data[:shapeExpression],
|
469
|
+
([:min, cardinality[:min]] if cardinality[:min]),
|
470
|
+
([:max, cardinality[:max]] if cardinality[:max])
|
471
|
+
].compact
|
472
|
+
attrs += Array(data[:codeDecl])
|
473
|
+
attrs += Array(data[:annotation])
|
474
|
+
|
475
|
+
input[:tripleExpression] = Algebra::TripleConstraint.new(*attrs) unless attrs.empty?
|
476
|
+
end
|
477
|
+
|
478
|
+
# [44] cardinality ::= '*' | '+' | '?' | REPEAT_RANGE
|
479
|
+
# [45] senseFlags ::= '^'
|
480
|
+
# [46] valueSet ::= '[' valueSetValue* ']'
|
481
|
+
|
482
|
+
# [47] valueSetValue ::= iriRange | literal
|
483
|
+
production(:valueSetValue) do |input, data, callback|
|
484
|
+
(input[:valueSetValue] ||= []) << Algebra::Value.new(data[:iriRange] || data[:literal])
|
485
|
+
end
|
486
|
+
|
487
|
+
# [48] iriRange ::= iri ('~' exclusion*)? | '.' exclusion+
|
488
|
+
production(:iriRange) do |input, data, callback|
|
489
|
+
exclusions = data[:exclusion].unshift(:exclusions) if data[:exclusion]
|
490
|
+
input[:iriRange] = if data[:pattern] && exclusions
|
491
|
+
Algebra::StemRange.new(data[:iri], exclusions)
|
492
|
+
elsif data[:pattern]
|
493
|
+
Algebra::Stem.new(data[:iri])
|
494
|
+
elsif data[:dot]
|
495
|
+
Algebra::StemRange.new(:wildcard, exclusions)
|
496
|
+
else
|
497
|
+
data[:iri]
|
498
|
+
end
|
499
|
+
end
|
500
|
+
|
501
|
+
# [49] exclusion ::= '-' iri '~'?
|
502
|
+
production(:exclusion) do |input, data, callback|
|
503
|
+
(input[:exclusion] ||= []) << (data[:pattern] ? Algebra::Stem.new(data[:iri]) : data[:iri])
|
504
|
+
end
|
505
|
+
|
506
|
+
# [50] include ::= '&' shapeLabel
|
507
|
+
production(:include) do |input, data, callback|
|
508
|
+
input[:tripleExpression] = Algebra::Inclusion.new(*data[:shapeLabel])
|
509
|
+
end
|
510
|
+
|
511
|
+
# [51] annotation ::= '//' predicate (iri | literal)
|
512
|
+
production(:annotation) do |input, data, callback|
|
513
|
+
annotation = Algebra::Annotation.new(data[:predicate].first, (data[:iri] || data[:literal]))
|
514
|
+
(input[:annotation] ||= []) << annotation
|
515
|
+
end
|
516
|
+
|
517
|
+
# [52] semanticActions ::= codeDecl*
|
518
|
+
|
519
|
+
# [53] codeDecl ::= '%' iri (CODE | "%")
|
520
|
+
production(:codeDecl) do |input, data, callback|
|
521
|
+
(input[:codeDecl] ||= []) << Algebra::SemAct.new(*[data[:iri], data[:code]].compact)
|
522
|
+
end
|
523
|
+
|
524
|
+
# [13t] literal ::= rdfLiteral | numericLiteral | booleanLiteral
|
525
|
+
|
526
|
+
# [54] predicate ::= iri | RDF_TYPE
|
527
|
+
production(:predicate) do |input, data, callback|
|
528
|
+
(input[:predicate] ||= []) << data[:iri]
|
529
|
+
end
|
530
|
+
|
531
|
+
# [55] datatype ::= iri
|
532
|
+
production(:datatype) do |input, data, callback|
|
533
|
+
input[:datatype] = data[:iri]
|
534
|
+
end
|
535
|
+
|
536
|
+
# [56] shapeLabel ::= iri | blankNode
|
537
|
+
production(:shapeLabel) do |input, data, callback|
|
538
|
+
(input[:shapeLabel] ||= []) << (data[:iri] || data[:blankNode])
|
539
|
+
end
|
540
|
+
|
541
|
+
# [16t] numericLiteral ::= INTEGER | DECIMAL | DOUBLE
|
542
|
+
# [129s] rdfLiteral ::= string (LANGTAG | '^^' datatype)?
|
543
|
+
production(:rdfLiteral) do |input, data, callback|
|
544
|
+
input[:literal] = literal(data[:string], data)
|
545
|
+
end
|
546
|
+
|
547
|
+
# [134s] booleanLiteral ::= 'true' | 'false'
|
548
|
+
# [135s] string ::= STRING_LITERAL1 | STRING_LITERAL_LONG1
|
549
|
+
# | STRING_LITERAL2 | STRING_LITERAL_LONG2
|
550
|
+
# [136s] iri ::= IRIREF | prefixedName
|
551
|
+
# [137s] prefixedName ::= PNAME_LN | PNAME_NS
|
552
|
+
# [138s] blankNode ::= BLANK_NODE_LABEL
|
553
|
+
|
554
|
+
##
|
555
|
+
# Initializes a new parser instance.
|
556
|
+
#
|
557
|
+
# @example parsing a ShExC schema
|
558
|
+
# schema = ShEx::Parser.new(%(
|
559
|
+
# PREFIX ex: <http://schema.example/> ex:IssueShape {ex:state IRI}
|
560
|
+
# ).parse
|
561
|
+
#
|
562
|
+
# @param [String, IO, StringIO, #to_s] input
|
563
|
+
# @param [Hash{Symbol => Object}] options
|
564
|
+
# @option options [Hash] :prefixes (Hash.new)
|
565
|
+
# the prefix mappings to use (for acessing intermediate parser productions)
|
566
|
+
# @option options [#to_s] :base_uri (nil)
|
567
|
+
# the base URI to use when resolving relative URIs (for acessing intermediate parser productions)
|
568
|
+
# @option options [#to_s] :anon_base ("b0")
|
569
|
+
# Basis for generating anonymous Nodes
|
570
|
+
# @option options [Boolean] :resolve_iris (false)
|
571
|
+
# Resolve prefix and relative IRIs, otherwise, when serializing the parsed SSE
|
572
|
+
# as S-Expressions, use the original prefixed and relative URIs along with `base` and `prefix`
|
573
|
+
# definitions.
|
574
|
+
# @option options [Boolean] :validate (false)
|
575
|
+
# whether to validate the parsed statements and values
|
576
|
+
# @option options [Boolean] :progress
|
577
|
+
# Show progress of parser productions
|
578
|
+
# @option options [Boolean] :debug
|
579
|
+
# Detailed debug output
|
580
|
+
# @yield [parser] `self`
|
581
|
+
# @yieldparam [ShEx::Parser] parser
|
582
|
+
# @yieldreturn [void] ignored
|
583
|
+
# @return [ShEx::Parser]
|
584
|
+
# @raise [ShEx::NotSatisfied] if not satisfied
|
585
|
+
# @raise [ShEx::ParseError] when a syntax error is detected
|
586
|
+
# @raise [ShEx::StructureError, ArgumentError] on structural problems with schema
|
587
|
+
def initialize(input = nil, options = {}, &block)
|
588
|
+
@input = case input
|
589
|
+
when IO, StringIO then input.read
|
590
|
+
else input.to_s.dup
|
591
|
+
end
|
592
|
+
@input.encode!(Encoding::UTF_8) if @input.respond_to?(:encode!)
|
593
|
+
@options = {anon_base: "b0", validate: false}.merge(options)
|
594
|
+
@options[:debug] ||= case
|
595
|
+
when options[:progress] then 2
|
596
|
+
when options[:validate] then 1
|
597
|
+
end
|
598
|
+
|
599
|
+
debug("base IRI") {base_uri.inspect}
|
600
|
+
debug("validate") {validate?.inspect}
|
601
|
+
|
602
|
+
if block_given?
|
603
|
+
case block.arity
|
604
|
+
when 0 then instance_eval(&block)
|
605
|
+
else block.call(self)
|
606
|
+
end
|
607
|
+
end
|
608
|
+
end
|
609
|
+
|
610
|
+
# @return [String]
|
611
|
+
def to_sxp_bin
|
612
|
+
@result
|
613
|
+
end
|
614
|
+
|
615
|
+
def to_s
|
616
|
+
@result.to_sxp
|
617
|
+
end
|
618
|
+
|
619
|
+
alias_method :ll1_parse, :parse
|
620
|
+
|
621
|
+
# Parse query
|
622
|
+
#
|
623
|
+
# The result is a SPARQL Algebra S-List. Productions return an array such as the following:
|
624
|
+
#
|
625
|
+
# (prefix ((: <http://example/>))
|
626
|
+
# (union
|
627
|
+
# (bgp (triple ?s ?p ?o))
|
628
|
+
# (graph ?g
|
629
|
+
# (bgp (triple ?s ?p ?o)))))
|
630
|
+
#
|
631
|
+
# @param [Symbol, #to_s] prod The starting production for the parser.
|
632
|
+
# It may be a URI from the grammar, or a symbol representing the local_name portion of the grammar URI.
|
633
|
+
# @return [Array]
|
634
|
+
# @see http://www.w3.org/TR/sparql11-query/#sparqlAlgebra
|
635
|
+
# @see http://axel.deri.ie/sparqltutorial/ESWC2007_SPARQL_Tutorial_unit2b.pdf
|
636
|
+
def parse(prod = START)
|
637
|
+
ll1_parse(@input, prod.to_sym, @options.merge(branch: BRANCH,
|
638
|
+
first: FIRST,
|
639
|
+
follow: FOLLOW,
|
640
|
+
whitespace: WS)
|
641
|
+
) do |context, *data|
|
642
|
+
case context
|
643
|
+
when :trace
|
644
|
+
level, lineno, depth, *args = data
|
645
|
+
message = args.to_sse
|
646
|
+
d_str = depth > 100 ? ' ' * 100 + '+' : ' ' * depth
|
647
|
+
str = "[#{lineno}](#{level})#{d_str}#{message}".chop
|
648
|
+
case @options[:debug]
|
649
|
+
when Array
|
650
|
+
@options[:debug] << str unless level > 2
|
651
|
+
when TrueClass
|
652
|
+
$stderr.puts str
|
653
|
+
when Integer
|
654
|
+
$stderr.puts(str) if level <= @options[:debug]
|
655
|
+
end
|
656
|
+
end
|
657
|
+
end
|
658
|
+
|
659
|
+
# The last thing on the @prod_data stack is the result
|
660
|
+
@result = case
|
661
|
+
when !prod_data.is_a?(Hash)
|
662
|
+
prod_data
|
663
|
+
when prod_data.empty?
|
664
|
+
nil
|
665
|
+
when prod_data[:schema]
|
666
|
+
prod_data[:schema]
|
667
|
+
else
|
668
|
+
key = prod_data.keys.first
|
669
|
+
[key] + Array(prod_data[key]) # Creates [:key, [:triple], ...]
|
670
|
+
end
|
671
|
+
|
672
|
+
# Validate resulting expression
|
673
|
+
@result.validate! if @result && validate?
|
674
|
+
@result
|
675
|
+
rescue EBNF::LL1::Parser::Error, EBNF::LL1::Lexer::Error => e
|
676
|
+
raise ShEx::ParseError.new(e.message, lineno: e.lineno, token: e.token)
|
677
|
+
end
|
678
|
+
|
679
|
+
private
|
680
|
+
##
|
681
|
+
# Returns the URI prefixes currently defined for this parser.
|
682
|
+
#
|
683
|
+
# @example
|
684
|
+
# prefixes[:dc] #=> RDF::URI('http://purl.org/dc/terms/')
|
685
|
+
#
|
686
|
+
# @return [Hash{Symbol => RDF::URI}]
|
687
|
+
# @since 0.3.0
|
688
|
+
def prefixes
|
689
|
+
@options[:prefixes] ||= {}
|
690
|
+
end
|
691
|
+
|
692
|
+
##
|
693
|
+
# Defines the given URI prefixes for this parser.
|
694
|
+
#
|
695
|
+
# @example
|
696
|
+
# prefixes = {
|
697
|
+
# dc: RDF::URI('http://purl.org/dc/terms/'),
|
698
|
+
# }
|
699
|
+
#
|
700
|
+
# @param [Hash{Symbol => RDF::URI}] prefixes
|
701
|
+
# @return [Hash{Symbol => RDF::URI}]
|
702
|
+
# @since 0.3.0
|
703
|
+
def prefixes=(prefixes)
|
704
|
+
@options[:prefixes] = prefixes
|
705
|
+
end
|
706
|
+
|
707
|
+
##
|
708
|
+
# Defines the given named URI prefix for this parser.
|
709
|
+
#
|
710
|
+
# @example Defining a URI prefix
|
711
|
+
# prefix :dc, RDF::URI('http://purl.org/dc/terms/')
|
712
|
+
#
|
713
|
+
# @example Returning a URI prefix
|
714
|
+
# prefix(:dc) #=> RDF::URI('http://purl.org/dc/terms/')
|
715
|
+
#
|
716
|
+
# @overload prefix(name, uri)
|
717
|
+
# @param [Symbol, #to_s] name
|
718
|
+
# @param [RDF::URI, #to_s] uri
|
719
|
+
#
|
720
|
+
# @overload prefix(name)
|
721
|
+
# @param [Symbol, #to_s] name
|
722
|
+
#
|
723
|
+
# @return [RDF::URI]
|
724
|
+
def prefix(name, iri = nil)
|
725
|
+
name = name.to_s.empty? ? nil : (name.respond_to?(:to_sym) ? name.to_sym : name.to_s.to_sym)
|
726
|
+
iri.nil? ? prefixes[name] : prefixes[name] = iri
|
727
|
+
end
|
728
|
+
|
729
|
+
##
|
730
|
+
# Returns the Base URI defined for the parser,
|
731
|
+
# as specified or when parsing a BASE prologue element.
|
732
|
+
#
|
733
|
+
# @example
|
734
|
+
# base #=> RDF::URI('http://example.com/')
|
735
|
+
#
|
736
|
+
# @return [HRDF::URI]
|
737
|
+
def base_uri
|
738
|
+
RDF::URI(@options[:base_uri])
|
739
|
+
end
|
740
|
+
|
741
|
+
##
|
742
|
+
# Set the Base URI to use for this parser.
|
743
|
+
#
|
744
|
+
# @param [RDF::URI, #to_s] iri
|
745
|
+
#
|
746
|
+
# @example
|
747
|
+
# base_uri = RDF::URI('http://purl.org/dc/terms/')
|
748
|
+
#
|
749
|
+
# @return [RDF::URI]
|
750
|
+
def base_uri=(iri)
|
751
|
+
@options[:base_uri] = RDF::URI(iri)
|
752
|
+
end
|
753
|
+
|
754
|
+
##
|
755
|
+
# Returns `true` when resolving IRIs, otherwise BASE and PREFIX are retained in the output algebra.
|
756
|
+
#
|
757
|
+
# @return [Boolean] `true` or `false`
|
758
|
+
# @since 1.0.3
|
759
|
+
def validate?
|
760
|
+
@options[:validate]
|
761
|
+
end
|
762
|
+
|
763
|
+
# Generate a BNode identifier
|
764
|
+
def bnode(id)
|
765
|
+
@bnode_cache ||= {}
|
766
|
+
raise Error, "Illegal attempt to reuse a BNode" if @bnode_cache[id] && @bnode_cache[id].frozen?
|
767
|
+
@bnode_cache[id] ||= RDF::Node.new(id)
|
768
|
+
end
|
769
|
+
|
770
|
+
# Create URIs
|
771
|
+
def iri(value)
|
772
|
+
# If we have a base URI, use that when constructing a new URI
|
773
|
+
value = RDF::URI(value)
|
774
|
+
if base_uri && value.relative?
|
775
|
+
base_uri.join(value)
|
776
|
+
else
|
777
|
+
value
|
778
|
+
end
|
779
|
+
end
|
780
|
+
|
781
|
+
def ns(prefix, suffix)
|
782
|
+
base = prefix(prefix).to_s
|
783
|
+
suffix = suffix.to_s.sub(/^\#/, "") if base.index("#")
|
784
|
+
debug {"ns(#{prefix.inspect}): base: '#{base}', suffix: '#{suffix}'"}
|
785
|
+
iri(base + suffix.to_s)
|
786
|
+
end
|
787
|
+
|
788
|
+
# Create a literal
|
789
|
+
def literal(value, options = {})
|
790
|
+
options = options.dup
|
791
|
+
# Internal representation is to not use xsd:string, although it could arguably go the other way.
|
792
|
+
options.delete(:datatype) if options[:datatype] == RDF::XSD.string
|
793
|
+
debug("literal") do
|
794
|
+
"value: #{value.inspect}, " +
|
795
|
+
"options: #{options.inspect}, " +
|
796
|
+
"validate: #{validate?.inspect}, "
|
797
|
+
end
|
798
|
+
RDF::Literal.new(value, options.merge(validate: validate?))
|
799
|
+
end
|
800
|
+
end # class Parser
|
801
|
+
end # module ShEx
|