shex 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,801 @@
1
+ require 'ebnf'
2
+ require 'ebnf/ll1/parser'
3
+ require 'shex/meta'
4
+
5
+ module ShEx
6
+ ##
7
+ # A parser for the ShEx grammar.
8
+ #
9
+ # @see https://www.w3.org/2005/01/yacker/uploads/ShEx3?lang=perl&markup=html#productions
10
+ # @see http://en.wikipedia.org/wiki/LR_parser
11
+ class Parser
12
+ include ShEx::Meta
13
+ include ShEx::Terminals
14
+ include EBNF::LL1::Parser
15
+
16
+ ##
17
+ # Any additional options for the parser.
18
+ #
19
+ # @return [Hash]
20
+ attr_reader :options
21
+
22
+ ##
23
+ # The current input string being processed.
24
+ #
25
+ # @return [String]
26
+ attr_accessor :input
27
+
28
+ ##
29
+ # The current input tokens being processed.
30
+ #
31
+ # @return [Array<Token>]
32
+ attr_reader :tokens
33
+
34
+ ##
35
+ # The internal representation of the result using hierarchy of RDF objects and ShEx::Operator
36
+ # objects.
37
+ # @return [Array]
38
+ # @see http://sparql.rubyforge.org/algebra
39
+ attr_accessor :result
40
+
41
+ # Terminals passed to lexer. Order matters!
42
+ terminal(:CODE, CODE, unescape: true) do |prod, token, input|
43
+ # { foo %}
44
+ # Keep surrounding whitespace for now
45
+ input[:code] = token.value[1..-2].sub(/%\s*$/, '') # Drop {} and %
46
+ end
47
+ terminal(:REPEAT_RANGE, REPEAT_RANGE) do |prod, token, input|
48
+ card = token.value[1..-2].split(',').map {|v| v =~ /^\d+$/ ? v.to_i : v}
49
+ card[1] = token.value.include?(',') ? '*' : card[0] if card.length == 1
50
+ input[:cardinality] = {min: card[0], max: card[1]}
51
+ end
52
+ terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL) do |prod, token, input|
53
+ input[:blankNode] = bnode(token.value[2..-1])
54
+ end
55
+ terminal(:IRIREF, IRIREF, unescape: true) do |prod, token, input|
56
+ begin
57
+ input[:iri] = iri(token.value[1..-2])
58
+ rescue ArgumentError => e
59
+ raise Error, e.message
60
+ end
61
+ end
62
+ terminal(:DOUBLE, DOUBLE) do |prod, token, input|
63
+ # Note that a Turtle Double may begin with a '.[eE]', so tack on a leading
64
+ # zero if necessary
65
+ value = token.value.sub(/\.([eE])/, '.0\1')
66
+ input[:literal] = literal(value, datatype: RDF::XSD.double)
67
+ end
68
+ terminal(:DECIMAL, DECIMAL) do |prod, token, input|
69
+ # Note that a Turtle Decimal may begin with a '.', so tack on a leading
70
+ # zero if necessary
71
+ value = token.value
72
+ #value = "0#{token.value}" if token.value[0,1] == "."
73
+ input[:literal] = literal(value, datatype: RDF::XSD.decimal)
74
+ end
75
+ terminal(:INTEGER, INTEGER) do |prod, token, input|
76
+ input[:literal] = literal(token.value, datatype: RDF::XSD.integer)
77
+ end
78
+ terminal(:PNAME_LN, PNAME_LN, unescape: true) do |prod, token, input|
79
+ prefix, suffix = token.value.split(":", 2)
80
+ input[:iri] = ns(prefix, suffix)
81
+ error(nil, "Compact IRI missing prefix definition: #{token.value}", production: :PNAME_LN) unless input[:iri].absolute?
82
+ end
83
+ terminal(:PNAME_NS, PNAME_NS) do |prod, token, input|
84
+ prefix = token.value[0..-2]
85
+
86
+ input[:iri] = ns(prefix, nil)
87
+ input[:prefix] = prefix && prefix.to_sym
88
+ end
89
+ terminal(:ATPNAME_LN, ATPNAME_LN, unescape: true) do |prod, token, input|
90
+ prefix, suffix = token.value.split(":", 2)
91
+ prefix.sub!(/^@#{WS}*/, '')
92
+ input[:shapeLabel] = ns(prefix, suffix)
93
+ error(nil, "Compact IRI missing prefix definition: #{token.value}", production: :ATPNAME_LN) unless input[:shapeLabel].absolute?
94
+ end
95
+ terminal(:ATPNAME_NS, ATPNAME_NS) do |prod, token, input|
96
+ prefix = token.value[0..-2]
97
+ prefix.sub!(/^@\s*/, '')
98
+
99
+ input[:shapeLabel] = ns(prefix, nil)
100
+ end
101
+ terminal(:LANGTAG, LANGTAG) do |prod, token, input|
102
+ input[:language] = token.value[1..-1]
103
+ end
104
+ terminal(:STRING_LITERAL_LONG1, STRING_LITERAL_LONG1, unescape: true) do |prod, token, input|
105
+ input[:string] = token.value[3..-4]
106
+ end
107
+ terminal(:STRING_LITERAL_LONG2, STRING_LITERAL_LONG2, unescape: true) do |prod, token, input|
108
+ input[:string] = token.value[3..-4]
109
+ end
110
+ terminal(:STRING_LITERAL1, STRING_LITERAL1, unescape: true) do |prod, token, input|
111
+ input[:string] = token.value[1..-2]
112
+ end
113
+ terminal(:STRING_LITERAL2, STRING_LITERAL2, unescape: true) do |prod, token, input|
114
+ input[:string] = token.value[1..-2]
115
+ end
116
+ terminal(:RDF_TYPE, RDF_TYPE) do |prod, token, input|
117
+ input[:iri] = (a = RDF.type.dup; a.lexical = 'a'; a)
118
+ end
119
+
120
+ # String terminals
121
+ terminal(nil, STR_EXPR, map: STR_MAP) do |prod, token, input|
122
+ case token.value
123
+ when '*' then input[:cardinality] = {min: 0, max: "*"}
124
+ when '+' then input[:cardinality] = {min: 1, max: "*"}
125
+ when '?' then input[:cardinality] = {min: 0, max: 1}
126
+ when '!' then input[:not] = token.value
127
+ when '^' then input[:inverse] = token.value
128
+ when '.' then input[:dot] = token.value
129
+ when 'true', 'false' then input[:literal] = RDF::Literal::Boolean.new(token.value)
130
+ when '~' then input[:pattern] = token.value
131
+ when 'BNODE', 'IRI',
132
+ 'NONLITERAL' then input[:nonLiteralKind] = token.value.downcase.to_sym
133
+ when 'CLOSED' then input[:closed] = token.value.downcase.to_sym
134
+ when 'EXTERNAL' then input[:external] = token.value.downcase.to_sym
135
+ when 'FRACTIONDIGITS',
136
+ 'TOTALDIGITS' then input[:numericLength] = token.value.downcase.to_sym
137
+ when 'LITERAL' then input[:shapeAtomLiteral] = token.value.downcase.to_sym
138
+ when 'LENGTH',
139
+ 'MINLENGTH',
140
+ 'MAXLENGTH' then input[:stringLength] = token.value.downcase.to_sym
141
+ when 'MININCLUSIVE',
142
+ 'MINEXCLUSIVE',
143
+ 'MAXINCLUSIVE',
144
+ 'MAXEXCLUSIVE' then input[:numericRange] = token.value.downcase.to_sym
145
+ when 'NOT' then input[:not] = token.value.downcase.to_sym
146
+ when 'PATTERN' then input[:pattern] = token.value.downcase.to_sym
147
+ when 'START' then input[:start] = token.value.downcase.to_sym
148
+ else
149
+ #raise "Unexpected MC terminal: #{token.inspect}"
150
+ end
151
+ end
152
+
153
+ # Productions
154
+ # [1] shexDoc ::= directive* ((notStartAction | startActions) statement*)?
155
+ production(:shexDoc) do |input, data, callback|
156
+ data[:start] = data[:start] if data[:start]
157
+
158
+ expressions = []
159
+ expressions << [:base, data[:baseDecl]] if data[:baseDecl]
160
+ expressions << [:prefix, data[:prefixDecl]] if data[:prefixDecl]
161
+ expressions += Array(data[:codeDecl])
162
+ expressions << Algebra::Start.new(data[:start]) if data[:start]
163
+ expressions << [:shapes, data[:shapes]] if data[:shapes]
164
+
165
+ input[:schema] = Algebra::Schema.new(*expressions, options)
166
+
167
+ # Set schema accessor for all included expressions
168
+ input[:schema].each_descendant do |op|
169
+ op.schema = input[:schema] if op.respond_to?(:schema=)
170
+ end
171
+ self
172
+ end
173
+
174
+ # [2] directive ::= baseDecl | prefixDecl
175
+
176
+ # [3] baseDecl ::= "BASE" IRIREF
177
+ production(:baseDecl) do |input, data, callback|
178
+ input[:baseDecl] = self.base_uri = iri(data[:iri])
179
+ end
180
+
181
+ # [4] prefixDecl ::= "PREFIX" PNAME_NS IRIREF
182
+ production(:prefixDecl) do |input, data, callback|
183
+ pfx = data[:prefix]
184
+ self.prefix(pfx, data[:iri])
185
+ (input[:prefixDecl] ||= []) << [pfx.to_s, data[:iri]]
186
+ end
187
+
188
+ # [5] notStartAction ::= start | shapeExprDecl
189
+ # [6] start ::= "start" '=' shapeExpression
190
+ production(:start) do |input, data, callback|
191
+ input[:start] = data[:shapeExpression]
192
+ end
193
+ # [7] startActions ::= codeDecl+
194
+
195
+ # [8] statement ::= directive | notStartAction
196
+
197
+ # [9] shapeExprDecl ::= shapeLabel (shapeExpression|"EXTERNAL")
198
+ production(:shapeExprDecl) do |input, data, callback|
199
+ label = Array(data[:shapeLabel]).first
200
+ expression = case data[:shapeExpression]
201
+ when Algebra::NodeConstraint, Algebra::Or, Algebra::And, Algebra::Not, Algebra::ShapeRef, Algebra::Shape
202
+ data[:shapeExpression]
203
+ else
204
+ data[:external] ? Algebra::External.new() : Algebra::Shape.new()
205
+ end
206
+
207
+ (input[:shapes] ||= {})[label] = expression
208
+ end
209
+
210
+ # [10] shapeExpression ::= shapeOr
211
+ # [11] inlineShapeExpression ::= inlineShapeOr
212
+
213
+ # [12] shapeOr ::= shapeAnd ("OR" shapeAnd)*
214
+ production(:shapeOr) do |input, data, callback|
215
+ shape_or(input, data)
216
+ end
217
+ # [13] inlineShapeOr ::= inlineShapeAnd ("OR" inlineShapeAnd)*
218
+ production(:inlineShapeOr) do |input, data, callback|
219
+ shape_or(input, data)
220
+ end
221
+ def shape_or(input, data)
222
+ input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
223
+ expression = if Array(data[:shapeExpression]).length > 1
224
+ Algebra::Or.new(*data[:shapeExpression])
225
+ else
226
+ Array(data[:shapeExpression]).first
227
+ end
228
+ input[:shapeExpression] = expression if expression
229
+ rescue ArgumentError => e
230
+ error(nil, "Argument Error on OR: #{e.message}")
231
+ end
232
+ private :shape_or
233
+
234
+ # [14] shapeAnd ::= shapeNot ("AND" shapeNot)*
235
+ production(:shapeAnd) do |input, data, callback|
236
+ shape_and(input, data)
237
+ end
238
+ # [15] inlineShapeAnd ::= inlineShapeNot ("AND" inlineShapeNot)*
239
+ production(:inlineShapeAnd) do |input, data, callback|
240
+ shape_and(input, data)
241
+ end
242
+ def shape_and(input, data)
243
+ input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
244
+ expressions = Array(data[:shapeExpression]).inject([]) do |memo, expr|
245
+ memo.concat(expr.is_a?(Algebra::And) ? expr.operands : [expr])
246
+ end
247
+ expression = if expressions.length > 1
248
+ Algebra::And.new(*expressions)
249
+ else
250
+ expressions.first
251
+ end
252
+ (input[:shapeExpression] ||= []) << expression if expression
253
+ rescue ArgumentError => e
254
+ error(nil, "Argument Error on AND: #{e.message}")
255
+ end
256
+ private :shape_and
257
+
258
+ # [16] shapeNot ::= "NOT"? shapeAtom
259
+ production(:shapeNot) do |input, data, callback|
260
+ shape_not(input, data)
261
+ end
262
+ # [17] inlineShapeNot ::= "NOT"? inlineShapeAtom
263
+ production(:inlineShapeNot) do |input, data, callback|
264
+ shape_not(input, data)
265
+ end
266
+ def shape_not(input, data)
267
+ input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
268
+ expression = data[:shapeExpression]
269
+ expression = Algebra::Not.new(expression) if data[:not]
270
+ #error(nil, "Expected an atom for NOT") unless expression
271
+ (input[:shapeExpression] ||= []) << expression if expression
272
+ end
273
+ private :shape_not
274
+
275
+ # [18] shapeAtom ::= nodeConstraint shapeOrRef?
276
+ # | shapeOrRef
277
+ # | "(" shapeExpression ")"
278
+ # | '.' # no constraint
279
+ production(:shapeAtom) do |input, data, callback|
280
+ shape_atom(input, data)
281
+ end
282
+ # [19] inlineShapeAtom ::= nodeConstraint inlineShapeOrRef?
283
+ # | inlineShapeOrRef nodeConstraint?
284
+ # | "(" shapeExpression ")"
285
+ # | '.' # no constraint
286
+ production(:inlineShapeAtom) do |input, data, callback|
287
+ shape_atom(input, data)
288
+ end
289
+ def shape_atom(input, data)
290
+ constraint = data[:nodeConstraint]
291
+ shape = data[:shapeOrRef] || data[:shapeExpression]
292
+ input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
293
+
294
+ expression = [constraint, shape].compact
295
+ expression = case expression.length
296
+ when 0 then nil
297
+ when 1
298
+ expression.first
299
+ else Algebra::And.new(*expression)
300
+ end
301
+
302
+ input[:shapeExpression] = expression if expression
303
+ end
304
+ private :shape_atom
305
+
306
+ # [20] shapeOrRef ::= ATPNAME_LN | ATPNAME_NS | '@' shapeLabel | shapeDefinition
307
+ production(:shapeOrRef) do |input, data, callback|
308
+ shape_or_ref(input, data)
309
+ end
310
+ # [21] inlineShapeOrRef ::= ATPNAME_LN | ATPNAME_NS | '@' shapeLabel | inlineShapeDefinition
311
+ production(:inlineShapeOrRef) do |input, data, callback|
312
+ shape_or_ref(input, data)
313
+ end
314
+ def shape_or_ref(input, data)
315
+ input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
316
+ if data[:shape] || Array(data[:shapeLabel]).first
317
+ input[:shapeOrRef] = data[:shape] || Algebra::ShapeRef.new(Array(data[:shapeLabel]).first)
318
+ end
319
+ rescue ArgumentError => e
320
+ error(nil, "Argument Error on ShapeOrRef: #{e.message}")
321
+ end
322
+ private :shape_or_ref
323
+
324
+ # [22] nodeConstraint ::= "LITERAL" xsFacet*
325
+ # | nonLiteralKind stringFacet*
326
+ # | datatype xsFacet*
327
+ # | valueSet xsFacet*
328
+ # | xsFacet+
329
+ production(:nodeConstraint) do |input, data, callback|
330
+ # Semantic validate (A Syntax error)
331
+ case
332
+ when data[:datatype] && data[:numericFacet]
333
+ # Can only use a numeric facet on a numeric datatype
334
+ l = RDF::Literal.new("1", datatype: data[:datatype])
335
+ error(nil, "Numeric facet used on non-numeric datatype: #{data[:datatype]}", production: :nodeConstraint) unless l.is_a?(RDF::Literal::Numeric)
336
+ end
337
+
338
+ attrs = []
339
+ attrs += [:datatype, data[:datatype]] if data [:datatype]
340
+ attrs += [data[:shapeAtomLiteral], data[:nonLiteralKind]]
341
+ attrs += Array(data[:valueSetValue])
342
+ attrs += Array(data[:numericFacet])
343
+ attrs += Array(data[:stringFacet])
344
+
345
+ input[:nodeConstraint] = Algebra::NodeConstraint.new(*attrs.compact)
346
+ end
347
+
348
+ # [23] nonLiteralKind ::= "IRI" | "BNODE" | "NONLITERAL"
349
+
350
+ # [24] xsFacet ::= stringFacet | numericFacet
351
+ # [25] stringFacet ::= stringLength INTEGER
352
+ # | "PATTERN" string
353
+ # | '~' string # shortcut for "PATTERN"
354
+ production(:stringFacet) do |input, data, callback|
355
+ input[:stringFacet] ||= []
356
+ input[:stringFacet] << if data[:stringLength]
357
+ if input[:stringFacet].flatten.include?(data[:stringLength])
358
+ error(nil, "#{data[:stringLength]} constraint may only be used once in a Node Constraint", production: :stringFacet)
359
+ end
360
+ [data[:stringLength], data[:literal]]
361
+ elsif data[:pattern]
362
+ [:pattern, data[:string]]
363
+ end
364
+ end
365
+
366
+ # [26] stringLength ::= "LENGTH" | "MINLENGTH" | "MAXLENGTH"
367
+
368
+ # [27] numericFacet ::= numericRange (numericLiteral | string '^^' datatype )
369
+ # | numericLength INTEGER
370
+ production(:numericFacet) do |input, data, callback|
371
+ input[:numericFacet] ||= []
372
+ input[:numericFacet] << if data[:numericRange]
373
+ literal = data[:literal] || literal(data[:string], datatype: data[:datatype])
374
+ error(nil, "numericRange must use a numeric datatype: #{data[:datatype]}", production: :numericFacet) unless literal.is_a?(RDF::Literal::Numeric)
375
+ [data[:numericRange], literal]
376
+ elsif data[:numericLength]
377
+ [data[:numericLength], data[:literal]]
378
+ end
379
+ end
380
+
381
+ # [28] numericRange ::= "MININCLUSIVE" | "MINEXCLUSIVE" | "MAXINCLUSIVE" | "MAXEXCLUSIVE"
382
+ # [29] numericLength ::= "TOTALDIGITS" | "FRACTIONDIGITS"
383
+
384
+ # [30] shapeDefinition ::= (extraPropertySet | "CLOSED")* '{' tripleExpression? '}' annotation* semanticActions
385
+ production(:shapeDefinition) do |input, data, callback|
386
+ shape_definition(input, data)
387
+ end
388
+ # [31] inlineShapeDefinition ::= (extraPropertySet | "CLOSED")* '{' tripleExpression? '}'
389
+ production(:inlineShapeDefinition) do |input, data, callback|
390
+ shape_definition(input, data)
391
+ end
392
+ def shape_definition(input, data)
393
+ expression = data[:tripleExpression]
394
+ attrs = Array(data[:extraPropertySet])
395
+ attrs << :closed if data[:closed]
396
+ attrs += Array(data[:annotation])
397
+ attrs += Array(data[:codeDecl])
398
+
399
+ input[:shape] = Algebra::Shape.new(expression, *attrs) if expression
400
+ end
401
+ private :shape_definition
402
+
403
+ # [32] extraPropertySet ::= "EXTRA" predicate+
404
+ production(:extraPropertySet) do |input, data, callback|
405
+ (input[:extraPropertySet] ||= []) << data[:predicate].unshift(:extra)
406
+ end
407
+
408
+ # [33] tripleExpression ::= oneOfTripleExpr
409
+ # [34] oneOfTripleExpr ::= groupTripleExpr ('|' groupTripleExpr)*
410
+ production(:oneOfTripleExpr) do |input, data, callback|
411
+ expression = if Array(data[:tripleExpression]).length > 1
412
+ Algebra::OneOf.new(*data[:tripleExpression])
413
+ else
414
+ Array(data[:tripleExpression]).first
415
+ end
416
+ input[:tripleExpression] = expression if expression
417
+ end
418
+
419
+ # [37] groupTripleExpr ::= unaryTripleExpr (';' unaryTripleExpr?)*
420
+ production(:groupTripleExpr) do |input, data, callback|
421
+ expression = if Array(data[:tripleExpression]).length > 1
422
+ Algebra::EachOf.new(*data[:tripleExpression])
423
+ else
424
+ Array(data[:tripleExpression]).first
425
+ end
426
+ (input[:tripleExpression] ||= []) << expression if expression
427
+ end
428
+
429
+ # [40] unaryTripleExpr ::= productionLabel? (tripleConstraint | bracketedTripleExpr) | include
430
+ production(:unaryTripleExpr) do |input, data, callback|
431
+ expression = data[:tripleExpression]
432
+ expression.operands << data[:productionLabel] if expression && data[:productionLabel]
433
+
434
+ (input[:tripleExpression] ||= []) << expression if expression
435
+ end
436
+
437
+ # [41] bracketedTripleExpr ::= '(' oneOfTripleExpr ')' cardinality? annotation* semanticActions
438
+ production(:bracketedTripleExpr) do |input, data, callback|
439
+ # XXX cardinality? annotation* semanticActions
440
+ case expression = data[:tripleExpression]
441
+ when Algebra::OneOf, Algebra::EachOf
442
+ else
443
+ error(nil, "Bracketed Expression requires multiple contained expressions", production: :bracketedTripleExpr)
444
+ end
445
+ cardinality = data.fetch(:cardinality, {})
446
+ attrs = [
447
+ ([:min, cardinality[:min]] if cardinality[:min]),
448
+ ([:max, cardinality[:max]] if cardinality[:max])
449
+ ].compact
450
+ attrs += Array(data[:codeDecl])
451
+ attrs += Array(data[:annotation])
452
+
453
+ expression.operands.concat(attrs)
454
+ input[:tripleExpression] = expression
455
+ end
456
+
457
+ # [42] productionLabel ::= '$' (iri | blankNode)
458
+ production(:productionLabel) do |input, data, callback|
459
+ input[:productionLabel] = data[:iri] || data[:blankNode]
460
+ end
461
+
462
+ # [43] tripleConstraint ::= senseFlags? predicate shapeExpression cardinality? annotation* semanticActions
463
+ production(:tripleConstraint) do |input, data, callback|
464
+ cardinality = data.fetch(:cardinality, {})
465
+ attrs = [
466
+ (:inverse if data[:inverse] || data[:not]),
467
+ Array(data[:predicate]).first, # predicate
468
+ data[:shapeExpression],
469
+ ([:min, cardinality[:min]] if cardinality[:min]),
470
+ ([:max, cardinality[:max]] if cardinality[:max])
471
+ ].compact
472
+ attrs += Array(data[:codeDecl])
473
+ attrs += Array(data[:annotation])
474
+
475
+ input[:tripleExpression] = Algebra::TripleConstraint.new(*attrs) unless attrs.empty?
476
+ end
477
+
478
+ # [44] cardinality ::= '*' | '+' | '?' | REPEAT_RANGE
479
+ # [45] senseFlags ::= '^'
480
+ # [46] valueSet ::= '[' valueSetValue* ']'
481
+
482
+ # [47] valueSetValue ::= iriRange | literal
483
+ production(:valueSetValue) do |input, data, callback|
484
+ (input[:valueSetValue] ||= []) << Algebra::Value.new(data[:iriRange] || data[:literal])
485
+ end
486
+
487
+ # [48] iriRange ::= iri ('~' exclusion*)? | '.' exclusion+
488
+ production(:iriRange) do |input, data, callback|
489
+ exclusions = data[:exclusion].unshift(:exclusions) if data[:exclusion]
490
+ input[:iriRange] = if data[:pattern] && exclusions
491
+ Algebra::StemRange.new(data[:iri], exclusions)
492
+ elsif data[:pattern]
493
+ Algebra::Stem.new(data[:iri])
494
+ elsif data[:dot]
495
+ Algebra::StemRange.new(:wildcard, exclusions)
496
+ else
497
+ data[:iri]
498
+ end
499
+ end
500
+
501
+ # [49] exclusion ::= '-' iri '~'?
502
+ production(:exclusion) do |input, data, callback|
503
+ (input[:exclusion] ||= []) << (data[:pattern] ? Algebra::Stem.new(data[:iri]) : data[:iri])
504
+ end
505
+
506
+ # [50] include ::= '&' shapeLabel
507
+ production(:include) do |input, data, callback|
508
+ input[:tripleExpression] = Algebra::Inclusion.new(*data[:shapeLabel])
509
+ end
510
+
511
+ # [51] annotation ::= '//' predicate (iri | literal)
512
+ production(:annotation) do |input, data, callback|
513
+ annotation = Algebra::Annotation.new(data[:predicate].first, (data[:iri] || data[:literal]))
514
+ (input[:annotation] ||= []) << annotation
515
+ end
516
+
517
+ # [52] semanticActions ::= codeDecl*
518
+
519
+ # [53] codeDecl ::= '%' iri (CODE | "%")
520
+ production(:codeDecl) do |input, data, callback|
521
+ (input[:codeDecl] ||= []) << Algebra::SemAct.new(*[data[:iri], data[:code]].compact)
522
+ end
523
+
524
+ # [13t] literal ::= rdfLiteral | numericLiteral | booleanLiteral
525
+
526
+ # [54] predicate ::= iri | RDF_TYPE
527
+ production(:predicate) do |input, data, callback|
528
+ (input[:predicate] ||= []) << data[:iri]
529
+ end
530
+
531
+ # [55] datatype ::= iri
532
+ production(:datatype) do |input, data, callback|
533
+ input[:datatype] = data[:iri]
534
+ end
535
+
536
+ # [56] shapeLabel ::= iri | blankNode
537
+ production(:shapeLabel) do |input, data, callback|
538
+ (input[:shapeLabel] ||= []) << (data[:iri] || data[:blankNode])
539
+ end
540
+
541
+ # [16t] numericLiteral ::= INTEGER | DECIMAL | DOUBLE
542
+ # [129s] rdfLiteral ::= string (LANGTAG | '^^' datatype)?
543
+ production(:rdfLiteral) do |input, data, callback|
544
+ input[:literal] = literal(data[:string], data)
545
+ end
546
+
547
+ # [134s] booleanLiteral ::= 'true' | 'false'
548
+ # [135s] string ::= STRING_LITERAL1 | STRING_LITERAL_LONG1
549
+ # | STRING_LITERAL2 | STRING_LITERAL_LONG2
550
+ # [136s] iri ::= IRIREF | prefixedName
551
+ # [137s] prefixedName ::= PNAME_LN | PNAME_NS
552
+ # [138s] blankNode ::= BLANK_NODE_LABEL
553
+
554
+ ##
555
+ # Initializes a new parser instance.
556
+ #
557
+ # @example parsing a ShExC schema
558
+ # schema = ShEx::Parser.new(%(
559
+ # PREFIX ex: <http://schema.example/> ex:IssueShape {ex:state IRI}
560
+ # ).parse
561
+ #
562
+ # @param [String, IO, StringIO, #to_s] input
563
+ # @param [Hash{Symbol => Object}] options
564
+ # @option options [Hash] :prefixes (Hash.new)
565
+ # the prefix mappings to use (for acessing intermediate parser productions)
566
+ # @option options [#to_s] :base_uri (nil)
567
+ # the base URI to use when resolving relative URIs (for acessing intermediate parser productions)
568
+ # @option options [#to_s] :anon_base ("b0")
569
+ # Basis for generating anonymous Nodes
570
+ # @option options [Boolean] :resolve_iris (false)
571
+ # Resolve prefix and relative IRIs, otherwise, when serializing the parsed SSE
572
+ # as S-Expressions, use the original prefixed and relative URIs along with `base` and `prefix`
573
+ # definitions.
574
+ # @option options [Boolean] :validate (false)
575
+ # whether to validate the parsed statements and values
576
+ # @option options [Boolean] :progress
577
+ # Show progress of parser productions
578
+ # @option options [Boolean] :debug
579
+ # Detailed debug output
580
+ # @yield [parser] `self`
581
+ # @yieldparam [ShEx::Parser] parser
582
+ # @yieldreturn [void] ignored
583
+ # @return [ShEx::Parser]
584
+ # @raise [ShEx::NotSatisfied] if not satisfied
585
+ # @raise [ShEx::ParseError] when a syntax error is detected
586
+ # @raise [ShEx::StructureError, ArgumentError] on structural problems with schema
587
+ def initialize(input = nil, options = {}, &block)
588
+ @input = case input
589
+ when IO, StringIO then input.read
590
+ else input.to_s.dup
591
+ end
592
+ @input.encode!(Encoding::UTF_8) if @input.respond_to?(:encode!)
593
+ @options = {anon_base: "b0", validate: false}.merge(options)
594
+ @options[:debug] ||= case
595
+ when options[:progress] then 2
596
+ when options[:validate] then 1
597
+ end
598
+
599
+ debug("base IRI") {base_uri.inspect}
600
+ debug("validate") {validate?.inspect}
601
+
602
+ if block_given?
603
+ case block.arity
604
+ when 0 then instance_eval(&block)
605
+ else block.call(self)
606
+ end
607
+ end
608
+ end
609
+
610
+ # @return [String]
611
+ def to_sxp_bin
612
+ @result
613
+ end
614
+
615
+ def to_s
616
+ @result.to_sxp
617
+ end
618
+
619
+ alias_method :ll1_parse, :parse
620
+
621
+ # Parse query
622
+ #
623
+ # The result is a SPARQL Algebra S-List. Productions return an array such as the following:
624
+ #
625
+ # (prefix ((: <http://example/>))
626
+ # (union
627
+ # (bgp (triple ?s ?p ?o))
628
+ # (graph ?g
629
+ # (bgp (triple ?s ?p ?o)))))
630
+ #
631
+ # @param [Symbol, #to_s] prod The starting production for the parser.
632
+ # It may be a URI from the grammar, or a symbol representing the local_name portion of the grammar URI.
633
+ # @return [Array]
634
+ # @see http://www.w3.org/TR/sparql11-query/#sparqlAlgebra
635
+ # @see http://axel.deri.ie/sparqltutorial/ESWC2007_SPARQL_Tutorial_unit2b.pdf
636
+ def parse(prod = START)
637
+ ll1_parse(@input, prod.to_sym, @options.merge(branch: BRANCH,
638
+ first: FIRST,
639
+ follow: FOLLOW,
640
+ whitespace: WS)
641
+ ) do |context, *data|
642
+ case context
643
+ when :trace
644
+ level, lineno, depth, *args = data
645
+ message = args.to_sse
646
+ d_str = depth > 100 ? ' ' * 100 + '+' : ' ' * depth
647
+ str = "[#{lineno}](#{level})#{d_str}#{message}".chop
648
+ case @options[:debug]
649
+ when Array
650
+ @options[:debug] << str unless level > 2
651
+ when TrueClass
652
+ $stderr.puts str
653
+ when Integer
654
+ $stderr.puts(str) if level <= @options[:debug]
655
+ end
656
+ end
657
+ end
658
+
659
+ # The last thing on the @prod_data stack is the result
660
+ @result = case
661
+ when !prod_data.is_a?(Hash)
662
+ prod_data
663
+ when prod_data.empty?
664
+ nil
665
+ when prod_data[:schema]
666
+ prod_data[:schema]
667
+ else
668
+ key = prod_data.keys.first
669
+ [key] + Array(prod_data[key]) # Creates [:key, [:triple], ...]
670
+ end
671
+
672
+ # Validate resulting expression
673
+ @result.validate! if @result && validate?
674
+ @result
675
+ rescue EBNF::LL1::Parser::Error, EBNF::LL1::Lexer::Error => e
676
+ raise ShEx::ParseError.new(e.message, lineno: e.lineno, token: e.token)
677
+ end
678
+
679
+ private
680
+ ##
681
+ # Returns the URI prefixes currently defined for this parser.
682
+ #
683
+ # @example
684
+ # prefixes[:dc] #=> RDF::URI('http://purl.org/dc/terms/')
685
+ #
686
+ # @return [Hash{Symbol => RDF::URI}]
687
+ # @since 0.3.0
688
+ def prefixes
689
+ @options[:prefixes] ||= {}
690
+ end
691
+
692
+ ##
693
+ # Defines the given URI prefixes for this parser.
694
+ #
695
+ # @example
696
+ # prefixes = {
697
+ # dc: RDF::URI('http://purl.org/dc/terms/'),
698
+ # }
699
+ #
700
+ # @param [Hash{Symbol => RDF::URI}] prefixes
701
+ # @return [Hash{Symbol => RDF::URI}]
702
+ # @since 0.3.0
703
+ def prefixes=(prefixes)
704
+ @options[:prefixes] = prefixes
705
+ end
706
+
707
+ ##
708
+ # Defines the given named URI prefix for this parser.
709
+ #
710
+ # @example Defining a URI prefix
711
+ # prefix :dc, RDF::URI('http://purl.org/dc/terms/')
712
+ #
713
+ # @example Returning a URI prefix
714
+ # prefix(:dc) #=> RDF::URI('http://purl.org/dc/terms/')
715
+ #
716
+ # @overload prefix(name, uri)
717
+ # @param [Symbol, #to_s] name
718
+ # @param [RDF::URI, #to_s] uri
719
+ #
720
+ # @overload prefix(name)
721
+ # @param [Symbol, #to_s] name
722
+ #
723
+ # @return [RDF::URI]
724
+ def prefix(name, iri = nil)
725
+ name = name.to_s.empty? ? nil : (name.respond_to?(:to_sym) ? name.to_sym : name.to_s.to_sym)
726
+ iri.nil? ? prefixes[name] : prefixes[name] = iri
727
+ end
728
+
729
+ ##
730
+ # Returns the Base URI defined for the parser,
731
+ # as specified or when parsing a BASE prologue element.
732
+ #
733
+ # @example
734
+ # base #=> RDF::URI('http://example.com/')
735
+ #
736
+ # @return [HRDF::URI]
737
+ def base_uri
738
+ RDF::URI(@options[:base_uri])
739
+ end
740
+
741
+ ##
742
+ # Set the Base URI to use for this parser.
743
+ #
744
+ # @param [RDF::URI, #to_s] iri
745
+ #
746
+ # @example
747
+ # base_uri = RDF::URI('http://purl.org/dc/terms/')
748
+ #
749
+ # @return [RDF::URI]
750
+ def base_uri=(iri)
751
+ @options[:base_uri] = RDF::URI(iri)
752
+ end
753
+
754
+ ##
755
+ # Returns `true` when resolving IRIs, otherwise BASE and PREFIX are retained in the output algebra.
756
+ #
757
+ # @return [Boolean] `true` or `false`
758
+ # @since 1.0.3
759
+ def validate?
760
+ @options[:validate]
761
+ end
762
+
763
+ # Generate a BNode identifier
764
+ def bnode(id)
765
+ @bnode_cache ||= {}
766
+ raise Error, "Illegal attempt to reuse a BNode" if @bnode_cache[id] && @bnode_cache[id].frozen?
767
+ @bnode_cache[id] ||= RDF::Node.new(id)
768
+ end
769
+
770
+ # Create URIs
771
+ def iri(value)
772
+ # If we have a base URI, use that when constructing a new URI
773
+ value = RDF::URI(value)
774
+ if base_uri && value.relative?
775
+ base_uri.join(value)
776
+ else
777
+ value
778
+ end
779
+ end
780
+
781
+ def ns(prefix, suffix)
782
+ base = prefix(prefix).to_s
783
+ suffix = suffix.to_s.sub(/^\#/, "") if base.index("#")
784
+ debug {"ns(#{prefix.inspect}): base: '#{base}', suffix: '#{suffix}'"}
785
+ iri(base + suffix.to_s)
786
+ end
787
+
788
+ # Create a literal
789
+ def literal(value, options = {})
790
+ options = options.dup
791
+ # Internal representation is to not use xsd:string, although it could arguably go the other way.
792
+ options.delete(:datatype) if options[:datatype] == RDF::XSD.string
793
+ debug("literal") do
794
+ "value: #{value.inspect}, " +
795
+ "options: #{options.inspect}, " +
796
+ "validate: #{validate?.inspect}, "
797
+ end
798
+ RDF::Literal.new(value, options.merge(validate: validate?))
799
+ end
800
+ end # class Parser
801
+ end # module ShEx