shex 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,801 @@
1
+ require 'ebnf'
2
+ require 'ebnf/ll1/parser'
3
+ require 'shex/meta'
4
+
5
+ module ShEx
6
+ ##
7
+ # A parser for the ShEx grammar.
8
+ #
9
+ # @see https://www.w3.org/2005/01/yacker/uploads/ShEx3?lang=perl&markup=html#productions
10
+ # @see http://en.wikipedia.org/wiki/LR_parser
11
+ class Parser
12
+ include ShEx::Meta
13
+ include ShEx::Terminals
14
+ include EBNF::LL1::Parser
15
+
16
+ ##
17
+ # Any additional options for the parser.
18
+ #
19
+ # @return [Hash]
20
+ attr_reader :options
21
+
22
+ ##
23
+ # The current input string being processed.
24
+ #
25
+ # @return [String]
26
+ attr_accessor :input
27
+
28
+ ##
29
+ # The current input tokens being processed.
30
+ #
31
+ # @return [Array<Token>]
32
+ attr_reader :tokens
33
+
34
+ ##
35
+ # The internal representation of the result using hierarchy of RDF objects and ShEx::Operator
36
+ # objects.
37
+ # @return [Array]
38
+ # @see http://sparql.rubyforge.org/algebra
39
+ attr_accessor :result
40
+
41
+ # Terminals passed to lexer. Order matters!
42
+ terminal(:CODE, CODE, unescape: true) do |prod, token, input|
43
+ # { foo %}
44
+ # Keep surrounding whitespace for now
45
+ input[:code] = token.value[1..-2].sub(/%\s*$/, '') # Drop {} and %
46
+ end
47
+ terminal(:REPEAT_RANGE, REPEAT_RANGE) do |prod, token, input|
48
+ card = token.value[1..-2].split(',').map {|v| v =~ /^\d+$/ ? v.to_i : v}
49
+ card[1] = token.value.include?(',') ? '*' : card[0] if card.length == 1
50
+ input[:cardinality] = {min: card[0], max: card[1]}
51
+ end
52
+ terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL) do |prod, token, input|
53
+ input[:blankNode] = bnode(token.value[2..-1])
54
+ end
55
+ terminal(:IRIREF, IRIREF, unescape: true) do |prod, token, input|
56
+ begin
57
+ input[:iri] = iri(token.value[1..-2])
58
+ rescue ArgumentError => e
59
+ raise Error, e.message
60
+ end
61
+ end
62
+ terminal(:DOUBLE, DOUBLE) do |prod, token, input|
63
+ # Note that a Turtle Double may begin with a '.[eE]', so tack on a leading
64
+ # zero if necessary
65
+ value = token.value.sub(/\.([eE])/, '.0\1')
66
+ input[:literal] = literal(value, datatype: RDF::XSD.double)
67
+ end
68
+ terminal(:DECIMAL, DECIMAL) do |prod, token, input|
69
+ # Note that a Turtle Decimal may begin with a '.', so tack on a leading
70
+ # zero if necessary
71
+ value = token.value
72
+ #value = "0#{token.value}" if token.value[0,1] == "."
73
+ input[:literal] = literal(value, datatype: RDF::XSD.decimal)
74
+ end
75
+ terminal(:INTEGER, INTEGER) do |prod, token, input|
76
+ input[:literal] = literal(token.value, datatype: RDF::XSD.integer)
77
+ end
78
+ terminal(:PNAME_LN, PNAME_LN, unescape: true) do |prod, token, input|
79
+ prefix, suffix = token.value.split(":", 2)
80
+ input[:iri] = ns(prefix, suffix)
81
+ error(nil, "Compact IRI missing prefix definition: #{token.value}", production: :PNAME_LN) unless input[:iri].absolute?
82
+ end
83
+ terminal(:PNAME_NS, PNAME_NS) do |prod, token, input|
84
+ prefix = token.value[0..-2]
85
+
86
+ input[:iri] = ns(prefix, nil)
87
+ input[:prefix] = prefix && prefix.to_sym
88
+ end
89
+ terminal(:ATPNAME_LN, ATPNAME_LN, unescape: true) do |prod, token, input|
90
+ prefix, suffix = token.value.split(":", 2)
91
+ prefix.sub!(/^@#{WS}*/, '')
92
+ input[:shapeLabel] = ns(prefix, suffix)
93
+ error(nil, "Compact IRI missing prefix definition: #{token.value}", production: :ATPNAME_LN) unless input[:shapeLabel].absolute?
94
+ end
95
+ terminal(:ATPNAME_NS, ATPNAME_NS) do |prod, token, input|
96
+ prefix = token.value[0..-2]
97
+ prefix.sub!(/^@\s*/, '')
98
+
99
+ input[:shapeLabel] = ns(prefix, nil)
100
+ end
101
+ terminal(:LANGTAG, LANGTAG) do |prod, token, input|
102
+ input[:language] = token.value[1..-1]
103
+ end
104
+ terminal(:STRING_LITERAL_LONG1, STRING_LITERAL_LONG1, unescape: true) do |prod, token, input|
105
+ input[:string] = token.value[3..-4]
106
+ end
107
+ terminal(:STRING_LITERAL_LONG2, STRING_LITERAL_LONG2, unescape: true) do |prod, token, input|
108
+ input[:string] = token.value[3..-4]
109
+ end
110
+ terminal(:STRING_LITERAL1, STRING_LITERAL1, unescape: true) do |prod, token, input|
111
+ input[:string] = token.value[1..-2]
112
+ end
113
+ terminal(:STRING_LITERAL2, STRING_LITERAL2, unescape: true) do |prod, token, input|
114
+ input[:string] = token.value[1..-2]
115
+ end
116
+ terminal(:RDF_TYPE, RDF_TYPE) do |prod, token, input|
117
+ input[:iri] = (a = RDF.type.dup; a.lexical = 'a'; a)
118
+ end
119
+
120
+ # String terminals
121
+ terminal(nil, STR_EXPR, map: STR_MAP) do |prod, token, input|
122
+ case token.value
123
+ when '*' then input[:cardinality] = {min: 0, max: "*"}
124
+ when '+' then input[:cardinality] = {min: 1, max: "*"}
125
+ when '?' then input[:cardinality] = {min: 0, max: 1}
126
+ when '!' then input[:not] = token.value
127
+ when '^' then input[:inverse] = token.value
128
+ when '.' then input[:dot] = token.value
129
+ when 'true', 'false' then input[:literal] = RDF::Literal::Boolean.new(token.value)
130
+ when '~' then input[:pattern] = token.value
131
+ when 'BNODE', 'IRI',
132
+ 'NONLITERAL' then input[:nonLiteralKind] = token.value.downcase.to_sym
133
+ when 'CLOSED' then input[:closed] = token.value.downcase.to_sym
134
+ when 'EXTERNAL' then input[:external] = token.value.downcase.to_sym
135
+ when 'FRACTIONDIGITS',
136
+ 'TOTALDIGITS' then input[:numericLength] = token.value.downcase.to_sym
137
+ when 'LITERAL' then input[:shapeAtomLiteral] = token.value.downcase.to_sym
138
+ when 'LENGTH',
139
+ 'MINLENGTH',
140
+ 'MAXLENGTH' then input[:stringLength] = token.value.downcase.to_sym
141
+ when 'MININCLUSIVE',
142
+ 'MINEXCLUSIVE',
143
+ 'MAXINCLUSIVE',
144
+ 'MAXEXCLUSIVE' then input[:numericRange] = token.value.downcase.to_sym
145
+ when 'NOT' then input[:not] = token.value.downcase.to_sym
146
+ when 'PATTERN' then input[:pattern] = token.value.downcase.to_sym
147
+ when 'START' then input[:start] = token.value.downcase.to_sym
148
+ else
149
+ #raise "Unexpected MC terminal: #{token.inspect}"
150
+ end
151
+ end
152
+
153
+ # Productions
154
+ # [1] shexDoc ::= directive* ((notStartAction | startActions) statement*)?
155
+ production(:shexDoc) do |input, data, callback|
156
+ data[:start] = data[:start] if data[:start]
157
+
158
+ expressions = []
159
+ expressions << [:base, data[:baseDecl]] if data[:baseDecl]
160
+ expressions << [:prefix, data[:prefixDecl]] if data[:prefixDecl]
161
+ expressions += Array(data[:codeDecl])
162
+ expressions << Algebra::Start.new(data[:start]) if data[:start]
163
+ expressions << [:shapes, data[:shapes]] if data[:shapes]
164
+
165
+ input[:schema] = Algebra::Schema.new(*expressions, options)
166
+
167
+ # Set schema accessor for all included expressions
168
+ input[:schema].each_descendant do |op|
169
+ op.schema = input[:schema] if op.respond_to?(:schema=)
170
+ end
171
+ self
172
+ end
173
+
174
+ # [2] directive ::= baseDecl | prefixDecl
175
+
176
+ # [3] baseDecl ::= "BASE" IRIREF
177
+ production(:baseDecl) do |input, data, callback|
178
+ input[:baseDecl] = self.base_uri = iri(data[:iri])
179
+ end
180
+
181
+ # [4] prefixDecl ::= "PREFIX" PNAME_NS IRIREF
182
+ production(:prefixDecl) do |input, data, callback|
183
+ pfx = data[:prefix]
184
+ self.prefix(pfx, data[:iri])
185
+ (input[:prefixDecl] ||= []) << [pfx.to_s, data[:iri]]
186
+ end
187
+
188
+ # [5] notStartAction ::= start | shapeExprDecl
189
+ # [6] start ::= "start" '=' shapeExpression
190
+ production(:start) do |input, data, callback|
191
+ input[:start] = data[:shapeExpression]
192
+ end
193
+ # [7] startActions ::= codeDecl+
194
+
195
+ # [8] statement ::= directive | notStartAction
196
+
197
+ # [9] shapeExprDecl ::= shapeLabel (shapeExpression|"EXTERNAL")
198
+ production(:shapeExprDecl) do |input, data, callback|
199
+ label = Array(data[:shapeLabel]).first
200
+ expression = case data[:shapeExpression]
201
+ when Algebra::NodeConstraint, Algebra::Or, Algebra::And, Algebra::Not, Algebra::ShapeRef, Algebra::Shape
202
+ data[:shapeExpression]
203
+ else
204
+ data[:external] ? Algebra::External.new() : Algebra::Shape.new()
205
+ end
206
+
207
+ (input[:shapes] ||= {})[label] = expression
208
+ end
209
+
210
+ # [10] shapeExpression ::= shapeOr
211
+ # [11] inlineShapeExpression ::= inlineShapeOr
212
+
213
+ # [12] shapeOr ::= shapeAnd ("OR" shapeAnd)*
214
+ production(:shapeOr) do |input, data, callback|
215
+ shape_or(input, data)
216
+ end
217
+ # [13] inlineShapeOr ::= inlineShapeAnd ("OR" inlineShapeAnd)*
218
+ production(:inlineShapeOr) do |input, data, callback|
219
+ shape_or(input, data)
220
+ end
221
+ def shape_or(input, data)
222
+ input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
223
+ expression = if Array(data[:shapeExpression]).length > 1
224
+ Algebra::Or.new(*data[:shapeExpression])
225
+ else
226
+ Array(data[:shapeExpression]).first
227
+ end
228
+ input[:shapeExpression] = expression if expression
229
+ rescue ArgumentError => e
230
+ error(nil, "Argument Error on OR: #{e.message}")
231
+ end
232
+ private :shape_or
233
+
234
+ # [14] shapeAnd ::= shapeNot ("AND" shapeNot)*
235
+ production(:shapeAnd) do |input, data, callback|
236
+ shape_and(input, data)
237
+ end
238
+ # [15] inlineShapeAnd ::= inlineShapeNot ("AND" inlineShapeNot)*
239
+ production(:inlineShapeAnd) do |input, data, callback|
240
+ shape_and(input, data)
241
+ end
242
+ def shape_and(input, data)
243
+ input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
244
+ expressions = Array(data[:shapeExpression]).inject([]) do |memo, expr|
245
+ memo.concat(expr.is_a?(Algebra::And) ? expr.operands : [expr])
246
+ end
247
+ expression = if expressions.length > 1
248
+ Algebra::And.new(*expressions)
249
+ else
250
+ expressions.first
251
+ end
252
+ (input[:shapeExpression] ||= []) << expression if expression
253
+ rescue ArgumentError => e
254
+ error(nil, "Argument Error on AND: #{e.message}")
255
+ end
256
+ private :shape_and
257
+
258
+ # [16] shapeNot ::= "NOT"? shapeAtom
259
+ production(:shapeNot) do |input, data, callback|
260
+ shape_not(input, data)
261
+ end
262
+ # [17] inlineShapeNot ::= "NOT"? inlineShapeAtom
263
+ production(:inlineShapeNot) do |input, data, callback|
264
+ shape_not(input, data)
265
+ end
266
+ def shape_not(input, data)
267
+ input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
268
+ expression = data[:shapeExpression]
269
+ expression = Algebra::Not.new(expression) if data[:not]
270
+ #error(nil, "Expected an atom for NOT") unless expression
271
+ (input[:shapeExpression] ||= []) << expression if expression
272
+ end
273
+ private :shape_not
274
+
275
+ # [18] shapeAtom ::= nodeConstraint shapeOrRef?
276
+ # | shapeOrRef
277
+ # | "(" shapeExpression ")"
278
+ # | '.' # no constraint
279
+ production(:shapeAtom) do |input, data, callback|
280
+ shape_atom(input, data)
281
+ end
282
+ # [19] inlineShapeAtom ::= nodeConstraint inlineShapeOrRef?
283
+ # | inlineShapeOrRef nodeConstraint?
284
+ # | "(" shapeExpression ")"
285
+ # | '.' # no constraint
286
+ production(:inlineShapeAtom) do |input, data, callback|
287
+ shape_atom(input, data)
288
+ end
289
+ def shape_atom(input, data)
290
+ constraint = data[:nodeConstraint]
291
+ shape = data[:shapeOrRef] || data[:shapeExpression]
292
+ input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
293
+
294
+ expression = [constraint, shape].compact
295
+ expression = case expression.length
296
+ when 0 then nil
297
+ when 1
298
+ expression.first
299
+ else Algebra::And.new(*expression)
300
+ end
301
+
302
+ input[:shapeExpression] = expression if expression
303
+ end
304
+ private :shape_atom
305
+
306
+ # [20] shapeOrRef ::= ATPNAME_LN | ATPNAME_NS | '@' shapeLabel | shapeDefinition
307
+ production(:shapeOrRef) do |input, data, callback|
308
+ shape_or_ref(input, data)
309
+ end
310
+ # [21] inlineShapeOrRef ::= ATPNAME_LN | ATPNAME_NS | '@' shapeLabel | inlineShapeDefinition
311
+ production(:inlineShapeOrRef) do |input, data, callback|
312
+ shape_or_ref(input, data)
313
+ end
314
+ def shape_or_ref(input, data)
315
+ input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
316
+ if data[:shape] || Array(data[:shapeLabel]).first
317
+ input[:shapeOrRef] = data[:shape] || Algebra::ShapeRef.new(Array(data[:shapeLabel]).first)
318
+ end
319
+ rescue ArgumentError => e
320
+ error(nil, "Argument Error on ShapeOrRef: #{e.message}")
321
+ end
322
+ private :shape_or_ref
323
+
324
+ # [22] nodeConstraint ::= "LITERAL" xsFacet*
325
+ # | nonLiteralKind stringFacet*
326
+ # | datatype xsFacet*
327
+ # | valueSet xsFacet*
328
+ # | xsFacet+
329
+ production(:nodeConstraint) do |input, data, callback|
330
+ # Semantic validate (A Syntax error)
331
+ case
332
+ when data[:datatype] && data[:numericFacet]
333
+ # Can only use a numeric facet on a numeric datatype
334
+ l = RDF::Literal.new("1", datatype: data[:datatype])
335
+ error(nil, "Numeric facet used on non-numeric datatype: #{data[:datatype]}", production: :nodeConstraint) unless l.is_a?(RDF::Literal::Numeric)
336
+ end
337
+
338
+ attrs = []
339
+ attrs += [:datatype, data[:datatype]] if data [:datatype]
340
+ attrs += [data[:shapeAtomLiteral], data[:nonLiteralKind]]
341
+ attrs += Array(data[:valueSetValue])
342
+ attrs += Array(data[:numericFacet])
343
+ attrs += Array(data[:stringFacet])
344
+
345
+ input[:nodeConstraint] = Algebra::NodeConstraint.new(*attrs.compact)
346
+ end
347
+
348
+ # [23] nonLiteralKind ::= "IRI" | "BNODE" | "NONLITERAL"
349
+
350
+ # [24] xsFacet ::= stringFacet | numericFacet
351
+ # [25] stringFacet ::= stringLength INTEGER
352
+ # | "PATTERN" string
353
+ # | '~' string # shortcut for "PATTERN"
354
+ production(:stringFacet) do |input, data, callback|
355
+ input[:stringFacet] ||= []
356
+ input[:stringFacet] << if data[:stringLength]
357
+ if input[:stringFacet].flatten.include?(data[:stringLength])
358
+ error(nil, "#{data[:stringLength]} constraint may only be used once in a Node Constraint", production: :stringFacet)
359
+ end
360
+ [data[:stringLength], data[:literal]]
361
+ elsif data[:pattern]
362
+ [:pattern, data[:string]]
363
+ end
364
+ end
365
+
366
+ # [26] stringLength ::= "LENGTH" | "MINLENGTH" | "MAXLENGTH"
367
+
368
+ # [27] numericFacet ::= numericRange (numericLiteral | string '^^' datatype )
369
+ # | numericLength INTEGER
370
+ production(:numericFacet) do |input, data, callback|
371
+ input[:numericFacet] ||= []
372
+ input[:numericFacet] << if data[:numericRange]
373
+ literal = data[:literal] || literal(data[:string], datatype: data[:datatype])
374
+ error(nil, "numericRange must use a numeric datatype: #{data[:datatype]}", production: :numericFacet) unless literal.is_a?(RDF::Literal::Numeric)
375
+ [data[:numericRange], literal]
376
+ elsif data[:numericLength]
377
+ [data[:numericLength], data[:literal]]
378
+ end
379
+ end
380
+
381
+ # [28] numericRange ::= "MININCLUSIVE" | "MINEXCLUSIVE" | "MAXINCLUSIVE" | "MAXEXCLUSIVE"
382
+ # [29] numericLength ::= "TOTALDIGITS" | "FRACTIONDIGITS"
383
+
384
+ # [30] shapeDefinition ::= (extraPropertySet | "CLOSED")* '{' tripleExpression? '}' annotation* semanticActions
385
+ production(:shapeDefinition) do |input, data, callback|
386
+ shape_definition(input, data)
387
+ end
388
+ # [31] inlineShapeDefinition ::= (extraPropertySet | "CLOSED")* '{' tripleExpression? '}'
389
+ production(:inlineShapeDefinition) do |input, data, callback|
390
+ shape_definition(input, data)
391
+ end
392
+ def shape_definition(input, data)
393
+ expression = data[:tripleExpression]
394
+ attrs = Array(data[:extraPropertySet])
395
+ attrs << :closed if data[:closed]
396
+ attrs += Array(data[:annotation])
397
+ attrs += Array(data[:codeDecl])
398
+
399
+ input[:shape] = Algebra::Shape.new(expression, *attrs) if expression
400
+ end
401
+ private :shape_definition
402
+
403
+ # [32] extraPropertySet ::= "EXTRA" predicate+
404
+ production(:extraPropertySet) do |input, data, callback|
405
+ (input[:extraPropertySet] ||= []) << data[:predicate].unshift(:extra)
406
+ end
407
+
408
+ # [33] tripleExpression ::= oneOfTripleExpr
409
+ # [34] oneOfTripleExpr ::= groupTripleExpr ('|' groupTripleExpr)*
410
+ production(:oneOfTripleExpr) do |input, data, callback|
411
+ expression = if Array(data[:tripleExpression]).length > 1
412
+ Algebra::OneOf.new(*data[:tripleExpression])
413
+ else
414
+ Array(data[:tripleExpression]).first
415
+ end
416
+ input[:tripleExpression] = expression if expression
417
+ end
418
+
419
+ # [37] groupTripleExpr ::= unaryTripleExpr (';' unaryTripleExpr?)*
420
+ production(:groupTripleExpr) do |input, data, callback|
421
+ expression = if Array(data[:tripleExpression]).length > 1
422
+ Algebra::EachOf.new(*data[:tripleExpression])
423
+ else
424
+ Array(data[:tripleExpression]).first
425
+ end
426
+ (input[:tripleExpression] ||= []) << expression if expression
427
+ end
428
+
429
+ # [40] unaryTripleExpr ::= productionLabel? (tripleConstraint | bracketedTripleExpr) | include
430
+ production(:unaryTripleExpr) do |input, data, callback|
431
+ expression = data[:tripleExpression]
432
+ expression.operands << data[:productionLabel] if expression && data[:productionLabel]
433
+
434
+ (input[:tripleExpression] ||= []) << expression if expression
435
+ end
436
+
437
+ # [41] bracketedTripleExpr ::= '(' oneOfTripleExpr ')' cardinality? annotation* semanticActions
438
+ production(:bracketedTripleExpr) do |input, data, callback|
439
+ # XXX cardinality? annotation* semanticActions
440
+ case expression = data[:tripleExpression]
441
+ when Algebra::OneOf, Algebra::EachOf
442
+ else
443
+ error(nil, "Bracketed Expression requires multiple contained expressions", production: :bracketedTripleExpr)
444
+ end
445
+ cardinality = data.fetch(:cardinality, {})
446
+ attrs = [
447
+ ([:min, cardinality[:min]] if cardinality[:min]),
448
+ ([:max, cardinality[:max]] if cardinality[:max])
449
+ ].compact
450
+ attrs += Array(data[:codeDecl])
451
+ attrs += Array(data[:annotation])
452
+
453
+ expression.operands.concat(attrs)
454
+ input[:tripleExpression] = expression
455
+ end
456
+
457
+ # [42] productionLabel ::= '$' (iri | blankNode)
458
+ production(:productionLabel) do |input, data, callback|
459
+ input[:productionLabel] = data[:iri] || data[:blankNode]
460
+ end
461
+
462
+ # [43] tripleConstraint ::= senseFlags? predicate shapeExpression cardinality? annotation* semanticActions
463
+ production(:tripleConstraint) do |input, data, callback|
464
+ cardinality = data.fetch(:cardinality, {})
465
+ attrs = [
466
+ (:inverse if data[:inverse] || data[:not]),
467
+ Array(data[:predicate]).first, # predicate
468
+ data[:shapeExpression],
469
+ ([:min, cardinality[:min]] if cardinality[:min]),
470
+ ([:max, cardinality[:max]] if cardinality[:max])
471
+ ].compact
472
+ attrs += Array(data[:codeDecl])
473
+ attrs += Array(data[:annotation])
474
+
475
+ input[:tripleExpression] = Algebra::TripleConstraint.new(*attrs) unless attrs.empty?
476
+ end
477
+
478
+ # [44] cardinality ::= '*' | '+' | '?' | REPEAT_RANGE
479
+ # [45] senseFlags ::= '^'
480
+ # [46] valueSet ::= '[' valueSetValue* ']'
481
+
482
+ # [47] valueSetValue ::= iriRange | literal
483
+ production(:valueSetValue) do |input, data, callback|
484
+ (input[:valueSetValue] ||= []) << Algebra::Value.new(data[:iriRange] || data[:literal])
485
+ end
486
+
487
+ # [48] iriRange ::= iri ('~' exclusion*)? | '.' exclusion+
488
+ production(:iriRange) do |input, data, callback|
489
+ exclusions = data[:exclusion].unshift(:exclusions) if data[:exclusion]
490
+ input[:iriRange] = if data[:pattern] && exclusions
491
+ Algebra::StemRange.new(data[:iri], exclusions)
492
+ elsif data[:pattern]
493
+ Algebra::Stem.new(data[:iri])
494
+ elsif data[:dot]
495
+ Algebra::StemRange.new(:wildcard, exclusions)
496
+ else
497
+ data[:iri]
498
+ end
499
+ end
500
+
501
+ # [49] exclusion ::= '-' iri '~'?
502
+ production(:exclusion) do |input, data, callback|
503
+ (input[:exclusion] ||= []) << (data[:pattern] ? Algebra::Stem.new(data[:iri]) : data[:iri])
504
+ end
505
+
506
+ # [50] include ::= '&' shapeLabel
507
+ production(:include) do |input, data, callback|
508
+ input[:tripleExpression] = Algebra::Inclusion.new(*data[:shapeLabel])
509
+ end
510
+
511
+ # [51] annotation ::= '//' predicate (iri | literal)
512
+ production(:annotation) do |input, data, callback|
513
+ annotation = Algebra::Annotation.new(data[:predicate].first, (data[:iri] || data[:literal]))
514
+ (input[:annotation] ||= []) << annotation
515
+ end
516
+
517
+ # [52] semanticActions ::= codeDecl*
518
+
519
+ # [53] codeDecl ::= '%' iri (CODE | "%")
520
+ production(:codeDecl) do |input, data, callback|
521
+ (input[:codeDecl] ||= []) << Algebra::SemAct.new(*[data[:iri], data[:code]].compact)
522
+ end
523
+
524
+ # [13t] literal ::= rdfLiteral | numericLiteral | booleanLiteral
525
+
526
+ # [54] predicate ::= iri | RDF_TYPE
527
+ production(:predicate) do |input, data, callback|
528
+ (input[:predicate] ||= []) << data[:iri]
529
+ end
530
+
531
+ # [55] datatype ::= iri
532
+ production(:datatype) do |input, data, callback|
533
+ input[:datatype] = data[:iri]
534
+ end
535
+
536
+ # [56] shapeLabel ::= iri | blankNode
537
+ production(:shapeLabel) do |input, data, callback|
538
+ (input[:shapeLabel] ||= []) << (data[:iri] || data[:blankNode])
539
+ end
540
+
541
+ # [16t] numericLiteral ::= INTEGER | DECIMAL | DOUBLE
542
+ # [129s] rdfLiteral ::= string (LANGTAG | '^^' datatype)?
543
+ production(:rdfLiteral) do |input, data, callback|
544
+ input[:literal] = literal(data[:string], data)
545
+ end
546
+
547
+ # [134s] booleanLiteral ::= 'true' | 'false'
548
+ # [135s] string ::= STRING_LITERAL1 | STRING_LITERAL_LONG1
549
+ # | STRING_LITERAL2 | STRING_LITERAL_LONG2
550
+ # [136s] iri ::= IRIREF | prefixedName
551
+ # [137s] prefixedName ::= PNAME_LN | PNAME_NS
552
+ # [138s] blankNode ::= BLANK_NODE_LABEL
553
+
554
+ ##
555
+ # Initializes a new parser instance.
556
+ #
557
+ # @example parsing a ShExC schema
558
+ # schema = ShEx::Parser.new(%(
559
+ # PREFIX ex: <http://schema.example/> ex:IssueShape {ex:state IRI}
560
+ # ).parse
561
+ #
562
+ # @param [String, IO, StringIO, #to_s] input
563
+ # @param [Hash{Symbol => Object}] options
564
+ # @option options [Hash] :prefixes (Hash.new)
565
+ # the prefix mappings to use (for acessing intermediate parser productions)
566
+ # @option options [#to_s] :base_uri (nil)
567
+ # the base URI to use when resolving relative URIs (for acessing intermediate parser productions)
568
+ # @option options [#to_s] :anon_base ("b0")
569
+ # Basis for generating anonymous Nodes
570
+ # @option options [Boolean] :resolve_iris (false)
571
+ # Resolve prefix and relative IRIs, otherwise, when serializing the parsed SSE
572
+ # as S-Expressions, use the original prefixed and relative URIs along with `base` and `prefix`
573
+ # definitions.
574
+ # @option options [Boolean] :validate (false)
575
+ # whether to validate the parsed statements and values
576
+ # @option options [Boolean] :progress
577
+ # Show progress of parser productions
578
+ # @option options [Boolean] :debug
579
+ # Detailed debug output
580
+ # @yield [parser] `self`
581
+ # @yieldparam [ShEx::Parser] parser
582
+ # @yieldreturn [void] ignored
583
+ # @return [ShEx::Parser]
584
+ # @raise [ShEx::NotSatisfied] if not satisfied
585
+ # @raise [ShEx::ParseError] when a syntax error is detected
586
+ # @raise [ShEx::StructureError, ArgumentError] on structural problems with schema
587
+ def initialize(input = nil, options = {}, &block)
588
+ @input = case input
589
+ when IO, StringIO then input.read
590
+ else input.to_s.dup
591
+ end
592
+ @input.encode!(Encoding::UTF_8) if @input.respond_to?(:encode!)
593
+ @options = {anon_base: "b0", validate: false}.merge(options)
594
+ @options[:debug] ||= case
595
+ when options[:progress] then 2
596
+ when options[:validate] then 1
597
+ end
598
+
599
+ debug("base IRI") {base_uri.inspect}
600
+ debug("validate") {validate?.inspect}
601
+
602
+ if block_given?
603
+ case block.arity
604
+ when 0 then instance_eval(&block)
605
+ else block.call(self)
606
+ end
607
+ end
608
+ end
609
+
610
+ # @return [String]
611
+ def to_sxp_bin
612
+ @result
613
+ end
614
+
615
+ def to_s
616
+ @result.to_sxp
617
+ end
618
+
619
+ alias_method :ll1_parse, :parse
620
+
621
+ # Parse query
622
+ #
623
+ # The result is a SPARQL Algebra S-List. Productions return an array such as the following:
624
+ #
625
+ # (prefix ((: <http://example/>))
626
+ # (union
627
+ # (bgp (triple ?s ?p ?o))
628
+ # (graph ?g
629
+ # (bgp (triple ?s ?p ?o)))))
630
+ #
631
+ # @param [Symbol, #to_s] prod The starting production for the parser.
632
+ # It may be a URI from the grammar, or a symbol representing the local_name portion of the grammar URI.
633
+ # @return [Array]
634
+ # @see http://www.w3.org/TR/sparql11-query/#sparqlAlgebra
635
+ # @see http://axel.deri.ie/sparqltutorial/ESWC2007_SPARQL_Tutorial_unit2b.pdf
636
+ def parse(prod = START)
637
+ ll1_parse(@input, prod.to_sym, @options.merge(branch: BRANCH,
638
+ first: FIRST,
639
+ follow: FOLLOW,
640
+ whitespace: WS)
641
+ ) do |context, *data|
642
+ case context
643
+ when :trace
644
+ level, lineno, depth, *args = data
645
+ message = args.to_sse
646
+ d_str = depth > 100 ? ' ' * 100 + '+' : ' ' * depth
647
+ str = "[#{lineno}](#{level})#{d_str}#{message}".chop
648
+ case @options[:debug]
649
+ when Array
650
+ @options[:debug] << str unless level > 2
651
+ when TrueClass
652
+ $stderr.puts str
653
+ when Integer
654
+ $stderr.puts(str) if level <= @options[:debug]
655
+ end
656
+ end
657
+ end
658
+
659
+ # The last thing on the @prod_data stack is the result
660
+ @result = case
661
+ when !prod_data.is_a?(Hash)
662
+ prod_data
663
+ when prod_data.empty?
664
+ nil
665
+ when prod_data[:schema]
666
+ prod_data[:schema]
667
+ else
668
+ key = prod_data.keys.first
669
+ [key] + Array(prod_data[key]) # Creates [:key, [:triple], ...]
670
+ end
671
+
672
+ # Validate resulting expression
673
+ @result.validate! if @result && validate?
674
+ @result
675
+ rescue EBNF::LL1::Parser::Error, EBNF::LL1::Lexer::Error => e
676
+ raise ShEx::ParseError.new(e.message, lineno: e.lineno, token: e.token)
677
+ end
678
+
679
+ private
680
+ ##
681
+ # Returns the URI prefixes currently defined for this parser.
682
+ #
683
+ # @example
684
+ # prefixes[:dc] #=> RDF::URI('http://purl.org/dc/terms/')
685
+ #
686
+ # @return [Hash{Symbol => RDF::URI}]
687
+ # @since 0.3.0
688
+ def prefixes
689
+ @options[:prefixes] ||= {}
690
+ end
691
+
692
+ ##
693
+ # Defines the given URI prefixes for this parser.
694
+ #
695
+ # @example
696
+ # prefixes = {
697
+ # dc: RDF::URI('http://purl.org/dc/terms/'),
698
+ # }
699
+ #
700
+ # @param [Hash{Symbol => RDF::URI}] prefixes
701
+ # @return [Hash{Symbol => RDF::URI}]
702
+ # @since 0.3.0
703
+ def prefixes=(prefixes)
704
+ @options[:prefixes] = prefixes
705
+ end
706
+
707
+ ##
708
+ # Defines the given named URI prefix for this parser.
709
+ #
710
+ # @example Defining a URI prefix
711
+ # prefix :dc, RDF::URI('http://purl.org/dc/terms/')
712
+ #
713
+ # @example Returning a URI prefix
714
+ # prefix(:dc) #=> RDF::URI('http://purl.org/dc/terms/')
715
+ #
716
+ # @overload prefix(name, uri)
717
+ # @param [Symbol, #to_s] name
718
+ # @param [RDF::URI, #to_s] uri
719
+ #
720
+ # @overload prefix(name)
721
+ # @param [Symbol, #to_s] name
722
+ #
723
+ # @return [RDF::URI]
724
+ def prefix(name, iri = nil)
725
+ name = name.to_s.empty? ? nil : (name.respond_to?(:to_sym) ? name.to_sym : name.to_s.to_sym)
726
+ iri.nil? ? prefixes[name] : prefixes[name] = iri
727
+ end
728
+
729
+ ##
730
+ # Returns the Base URI defined for the parser,
731
+ # as specified or when parsing a BASE prologue element.
732
+ #
733
+ # @example
734
+ # base #=> RDF::URI('http://example.com/')
735
+ #
736
+ # @return [HRDF::URI]
737
+ def base_uri
738
+ RDF::URI(@options[:base_uri])
739
+ end
740
+
741
+ ##
742
+ # Set the Base URI to use for this parser.
743
+ #
744
+ # @param [RDF::URI, #to_s] iri
745
+ #
746
+ # @example
747
+ # base_uri = RDF::URI('http://purl.org/dc/terms/')
748
+ #
749
+ # @return [RDF::URI]
750
+ def base_uri=(iri)
751
+ @options[:base_uri] = RDF::URI(iri)
752
+ end
753
+
754
+ ##
755
+ # Returns `true` when resolving IRIs, otherwise BASE and PREFIX are retained in the output algebra.
756
+ #
757
+ # @return [Boolean] `true` or `false`
758
+ # @since 1.0.3
759
+ def validate?
760
+ @options[:validate]
761
+ end
762
+
763
+ # Generate a BNode identifier
764
+ def bnode(id)
765
+ @bnode_cache ||= {}
766
+ raise Error, "Illegal attempt to reuse a BNode" if @bnode_cache[id] && @bnode_cache[id].frozen?
767
+ @bnode_cache[id] ||= RDF::Node.new(id)
768
+ end
769
+
770
+ # Create URIs
771
+ def iri(value)
772
+ # If we have a base URI, use that when constructing a new URI
773
+ value = RDF::URI(value)
774
+ if base_uri && value.relative?
775
+ base_uri.join(value)
776
+ else
777
+ value
778
+ end
779
+ end
780
+
781
+ def ns(prefix, suffix)
782
+ base = prefix(prefix).to_s
783
+ suffix = suffix.to_s.sub(/^\#/, "") if base.index("#")
784
+ debug {"ns(#{prefix.inspect}): base: '#{base}', suffix: '#{suffix}'"}
785
+ iri(base + suffix.to_s)
786
+ end
787
+
788
+ # Create a literal
789
+ def literal(value, options = {})
790
+ options = options.dup
791
+ # Internal representation is to not use xsd:string, although it could arguably go the other way.
792
+ options.delete(:datatype) if options[:datatype] == RDF::XSD.string
793
+ debug("literal") do
794
+ "value: #{value.inspect}, " +
795
+ "options: #{options.inspect}, " +
796
+ "validate: #{validate?.inspect}, "
797
+ end
798
+ RDF::Literal.new(value, options.merge(validate: validate?))
799
+ end
800
+ end # class Parser
801
+ end # module ShEx