shex 0.4.0 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,4 @@
1
+ # -*- encoding: utf-8 -*-
1
2
  require 'ebnf'
2
3
  require 'ebnf/ll1/parser'
3
4
  require 'shex/meta'
@@ -7,7 +8,7 @@ module ShEx
7
8
  # A parser for the ShEx grammar.
8
9
  #
9
10
  # @see https://www.w3.org/2005/01/yacker/uploads/ShEx3?lang=perl&markup=html#productions
10
- # @see http://en.wikipedia.org/wiki/LR_parser
11
+ # @see https://en.wikipedia.org/wiki/LR_parser
11
12
  class Parser
12
13
  include ShEx::Meta
13
14
  include ShEx::Terminals
@@ -36,7 +37,7 @@ module ShEx
36
37
  # The internal representation of the result using hierarchy of RDF objects and ShEx::Operator
37
38
  # objects.
38
39
  # @return [Array]
39
- # @see http://sparql.rubyforge.org/algebra
40
+ # @see https://www.rubydoc.info/github/ruby-rdf/sparql/SPARQL/Algebra
40
41
  attr_accessor :result
41
42
 
42
43
  # Terminals passed to lexer. Order matters!
@@ -102,6 +103,18 @@ module ShEx
102
103
  terminal(:LANGTAG, LANGTAG) do |prod, token, input|
103
104
  input[:language] = token.value[1..-1]
104
105
  end
106
+ terminal(:LANG_STRING_LITERAL_LONG1, LANG_STRING_LITERAL_LONG1, unescape: true) do |prod, token, input|
107
+ input[:string], _, input[:language] = token.value[3..-1].rpartition("'''@")
108
+ end
109
+ terminal(:LANG_STRING_LITERAL_LONG2, LANG_STRING_LITERAL_LONG2, unescape: true) do |prod, token, input|
110
+ input[:string], _, input[:language] = token.value[3..-1].rpartition('"""@')
111
+ end
112
+ terminal(:LANG_STRING_LITERAL1, LANG_STRING_LITERAL1, unescape: true) do |prod, token, input|
113
+ input[:string], _, input[:language] = token.value[1..-1].rpartition("'@")
114
+ end
115
+ terminal(:LANG_STRING_LITERAL2, LANG_STRING_LITERAL2, unescape: true) do |prod, token, input|
116
+ input[:string], _, input[:language] = token.value[1..-1].rpartition('"@')
117
+ end
105
118
  terminal(:STRING_LITERAL_LONG1, STRING_LITERAL_LONG1, unescape: true) do |prod, token, input|
106
119
  input[:string] = token.value[3..-4]
107
120
  end
@@ -114,6 +127,9 @@ module ShEx
114
127
  terminal(:STRING_LITERAL2, STRING_LITERAL2, unescape: true) do |prod, token, input|
115
128
  input[:string] = token.value[1..-2]
116
129
  end
130
+ terminal(:REGEXP, REGEXP) do |prod, token, input|
131
+ input[:regexp] = token.value
132
+ end
117
133
  terminal(:RDF_TYPE, RDF_TYPE) do |prod, token, input|
118
134
  input[:iri] = (a = RDF.type.dup; a.lexical = 'a'; a)
119
135
  end
@@ -143,8 +159,7 @@ module ShEx
143
159
  'MINEXCLUSIVE',
144
160
  'MAXINCLUSIVE',
145
161
  'MAXEXCLUSIVE' then input[:numericRange] = token.value.downcase.to_sym
146
- when 'NOT' then input[:not] = token.value.downcase.to_sym
147
- when 'PATTERN' then input[:pattern] = token.value.downcase.to_sym
162
+ when 'NOT' then input[:not] = token.value.downcase.to_sym
148
163
  when 'START' then input[:start] = token.value.downcase.to_sym
149
164
  else
150
165
  #raise "Unexpected MC terminal: #{token.inspect}"
@@ -163,7 +178,7 @@ module ShEx
163
178
  expressions << Algebra::Start.new(data[:start]) if data[:start]
164
179
  expressions << data[:shapes].unshift(:shapes) if data[:shapes]
165
180
 
166
- input[:schema] = Algebra::Schema.new(*expressions, options)
181
+ input[:schema] = Algebra::Schema.new(*expressions, **options)
167
182
  self
168
183
  end
169
184
 
@@ -184,7 +199,7 @@ module ShEx
184
199
  # [5] notStartAction ::= start | shapeExprDecl
185
200
  # [6] start ::= "start" '=' shapeExpression
186
201
  production(:start) do |input, data, callback|
187
- input[:start] = data[:shapeExpression]
202
+ input[:start] = Array(data[:shapeExpression]).first || data[:shape]
188
203
  end
189
204
  # [7] startActions ::= codeDecl+
190
205
 
@@ -193,9 +208,9 @@ module ShEx
193
208
  # [9] shapeExprDecl ::= shapeLabel (shapeExpression|"EXTERNAL")
194
209
  production(:shapeExprDecl) do |input, data, callback|
195
210
  id = Array(data[:shapeLabel]).first
196
- expression = case data[:shapeExpression]
211
+ expression = case Array(data[:shapeExpression]).first
197
212
  when Algebra::NodeConstraint, Algebra::Or, Algebra::And, Algebra::Not, Algebra::Shape, RDF::Resource
198
- data[:shapeExpression]
213
+ Array(data[:shapeExpression]).first
199
214
  else
200
215
  data[:external] ? Algebra::External.new() : Algebra::Shape.new()
201
216
  end
@@ -204,13 +219,36 @@ module ShEx
204
219
  (input[:shapes] ||= []) << expression
205
220
  end
206
221
 
207
- # [10] shapeExpression ::= shapeOr
208
- # [11] inlineShapeExpression ::= inlineShapeOr
222
+ # [10] shapeExpression ::= shapeAtomNoRef shapeOr?
223
+ # | "NOT" (shapeAtomNoRef | shapeRef) shapeOr?
224
+ # | shapeRef shapeOr
225
+ production(:shapeExpression) do |input, data, callback|
226
+ expression = Array(data[:shapeExpression]).first || data[:shape]
227
+ expression = Algebra::Not.new(expression) if data[:not]
228
+ (input[:shapeExpression] ||= []) << expression
229
+ end
209
230
 
210
- # [12] shapeOr ::= shapeAnd ("OR" shapeAnd)*
211
- production(:shapeOr) do |input, data, callback|
231
+ # [11] inlineShapeExpression ::= inlineShapeOr
232
+ # [12] shapeOr ::= shapeOrA | shapeOrB shapeOrA?
233
+ # [12a] shapeOrA ::= ("OR" shapeAnd)+
234
+ start_production(:shapeOrA) do |input, data, callback|
235
+ data[:shapeExpression] = input.delete(:shapeExpression)
236
+ data[:shapeExpression] ||= Array(input.delete(:shape)) if input[:shape]
237
+ data[:shapeExpression] = [Algebra::Not.new(*data[:shapeExpression])] if input.delete(:not)
238
+ end
239
+ production(:shapeOrA) do |input, data, callback|
212
240
  shape_or(input, data)
213
241
  end
242
+ # [12b] shapeOrB ::= ("AND" shapeNot)+
243
+ start_production(:shapeOrB) do |input, data, callback|
244
+ data[:shapeExpression] = input.delete(:shapeExpression)
245
+ data[:shapeExpression] ||= Array(input.delete(:shape)) if input[:shape]
246
+ data[:shapeExpression] = [Algebra::Not.new(*data[:shapeExpression])] if input.delete(:not)
247
+ end
248
+ production(:shapeOrB) do |input, data, callback|
249
+ shape_and(input, data)
250
+ end
251
+
214
252
  # [13] inlineShapeOr ::= inlineShapeAnd ("OR" inlineShapeAnd)*
215
253
  production(:inlineShapeOr) do |input, data, callback|
216
254
  shape_or(input, data)
@@ -222,7 +260,7 @@ module ShEx
222
260
  else
223
261
  Array(data[:shapeExpression]).first
224
262
  end
225
- input[:shapeExpression] = expression if expression
263
+ (input[:shapeExpression] ||= []) << expression if expression
226
264
  rescue ArgumentError => e
227
265
  error(nil, "Argument Error on OR: #{e.message}")
228
266
  end
@@ -239,8 +277,10 @@ module ShEx
239
277
  def shape_and(input, data)
240
278
  input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
241
279
  expressions = Array(data[:shapeExpression]).inject([]) do |memo, expr|
242
- memo.concat(expr.is_a?(Algebra::And) ? expr.operands : [expr])
280
+ #memo.concat(expr.is_a?(Algebra::And) ? expr.operands : [expr])
281
+ memo.concat([expr])
243
282
  end
283
+
244
284
  expression = if expressions.length > 1
245
285
  Algebra::And.new(*expressions, {})
246
286
  else
@@ -262,7 +302,7 @@ module ShEx
262
302
  end
263
303
  def shape_not(input, data)
264
304
  input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
265
- expression = data[:shapeExpression]
305
+ expression = Array(data[:shapeExpression]).first
266
306
  expression = Algebra::Not.new(expression) if data[:not]
267
307
  #error(nil, "Expected an atom for NOT") unless expression
268
308
  (input[:shapeExpression] ||= []) << expression if expression
@@ -276,7 +316,16 @@ module ShEx
276
316
  production(:shapeAtom) do |input, data, callback|
277
317
  shape_atom(input, data)
278
318
  end
279
- # [19] inlineShapeAtom ::= nodeConstraint inlineShapeOrRef?
319
+
320
+ # [19] shapeAtomNoRef ::= nodeConstraint shapeOrRef?
321
+ # | shapeDefinition
322
+ # | "(" shapeExpression ")"
323
+ # | '.' # no constraint
324
+ production(:shapeAtomNoRef) do |input, data, callback|
325
+ shape_atom(input, data)
326
+ end
327
+
328
+ # [20] inlineShapeAtom ::= nodeConstraint inlineShapeOrRef?
280
329
  # | inlineShapeOrRef nodeConstraint?
281
330
  # | "(" shapeExpression ")"
282
331
  # | '.' # no constraint
@@ -285,7 +334,7 @@ module ShEx
285
334
  end
286
335
  def shape_atom(input, data)
287
336
  constraint = data[:nodeConstraint]
288
- shape = data[:shapeOrRef] || data[:shapeExpression]
337
+ shape = data[:shapeOrRef] || Array(data[:shapeExpression]).first || data[:shape]
289
338
  input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
290
339
 
291
340
  expression = [constraint, shape].compact
@@ -295,34 +344,36 @@ module ShEx
295
344
  else Algebra::And.new(*expression, {})
296
345
  end
297
346
 
298
- input[:shapeExpression] = expression if expression
347
+ (input[:shapeExpression] ||= []) << expression if expression
299
348
  end
300
349
  private :shape_atom
301
350
 
302
- # [20] shapeOrRef ::= ATPNAME_LN | ATPNAME_NS | '@' shapeLabel | shapeDefinition
351
+ # [21] shapeOrRef ::= shapeDefinition | shapeRef
303
352
  production(:shapeOrRef) do |input, data, callback|
304
353
  shape_or_ref(input, data)
305
354
  end
306
- # [21] inlineShapeOrRef ::= ATPNAME_LN | ATPNAME_NS | '@' shapeLabel | inlineShapeDefinition
355
+ # [22] inlineShapeOrRef ::= inlineShapeDefinition | shapeRef
307
356
  production(:inlineShapeOrRef) do |input, data, callback|
308
357
  shape_or_ref(input, data)
309
358
  end
310
359
  def shape_or_ref(input, data)
311
360
  input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
312
- if data[:shape] || Array(data[:shapeLabel]).first
313
- input[:shapeOrRef] = data[:shape] || Array(data[:shapeLabel]).first
314
- end
361
+ input[:shapeOrRef] = data[:shape] if data[:shape]
315
362
  rescue ArgumentError => e
316
363
  error(nil, "Argument Error on ShapeOrRef: #{e.message}")
317
364
  end
318
365
  private :shape_or_ref
319
366
 
320
- # [22] nodeConstraint ::= "LITERAL" xsFacet*
321
- # | nonLiteralKind stringFacet*
367
+ # [23] shapeRef ::= ATPNAME_LN | ATPNAME_NS | '@' shapeLabel
368
+ production(:shapeRef) do |input, data, callback|
369
+ input[:shape] = Array(data[:shapeLabel]).first
370
+ end
371
+
372
+ # [24] litNodeConstraint ::= "LITERAL" xsFacet*
322
373
  # | datatype xsFacet*
323
374
  # | valueSet xsFacet*
324
- # | xsFacet+
325
- production(:nodeConstraint) do |input, data, callback|
375
+ # | numericFacet+
376
+ production(:litNodeConstraint) do |input, data, callback|
326
377
  # Semantic validate (A Syntax error)
327
378
  case
328
379
  when data[:datatype] && data[:numericFacet]
@@ -333,7 +384,7 @@ module ShEx
333
384
 
334
385
  attrs = []
335
386
  attrs << [:datatype, data[:datatype]] if data [:datatype]
336
- attrs += [data[:shapeAtomLiteral], data[:nonLiteralKind]]
387
+ attrs += Array(data[:shapeAtomLiteral])
337
388
  attrs += Array(data[:valueSetValue])
338
389
  attrs += Array(data[:numericFacet])
339
390
  attrs += Array(data[:stringFacet])
@@ -341,12 +392,23 @@ module ShEx
341
392
  input[:nodeConstraint] = Algebra::NodeConstraint.new(*attrs.compact, {})
342
393
  end
343
394
 
344
- # [23] nonLiteralKind ::= "IRI" | "BNODE" | "NONLITERAL"
395
+ # [25] nonLitNodeConstraint ::= nonLiteralKind stringFacet*
396
+ # | stringFacet+
397
+ production(:nonLitNodeConstraint) do |input, data, callback|
398
+ # Semantic validate (A Syntax error)
345
399
 
346
- # [24] xsFacet ::= stringFacet | numericFacet
347
- # [25] stringFacet ::= stringLength INTEGER
348
- # | "PATTERN" string
349
- # | '~' string # shortcut for "PATTERN"
400
+ attrs = []
401
+ attrs += Array(data[:nonLiteralKind])
402
+ attrs += Array(data[:stringFacet])
403
+
404
+ input[:nodeConstraint] = Algebra::NodeConstraint.new(*attrs.compact, {})
405
+ end
406
+
407
+ # [26] nonLiteralKind ::= "IRI" | "BNODE" | "NONLITERAL"
408
+
409
+ # [27] xsFacet ::= stringFacet | numericFacet
410
+ # [28] stringFacet ::= stringLength INTEGER
411
+ # | REGEXP
350
412
  production(:stringFacet) do |input, data, callback|
351
413
  input[:stringFacet] ||= []
352
414
  input[:stringFacet] << if data[:stringLength]
@@ -354,14 +416,27 @@ module ShEx
354
416
  error(nil, "#{data[:stringLength]} constraint may only be used once in a Node Constraint", production: :stringFacet)
355
417
  end
356
418
  [data[:stringLength], data[:literal]]
357
- elsif data[:pattern]
358
- [:pattern, data[:string]]
419
+ elsif re = data[:regexp]
420
+ unless re =~ %r(^/(.*)/([smix]*)$)
421
+ error(nil, "#{re.inspect} regular expression must be in the form /pattern/flags?", production: :stringFacet)
422
+ end
423
+ flags = $2 unless $2.to_s.empty?
424
+ pattern = $1.gsub('\\/', '/').gsub(UCHAR) do
425
+ [($1 || $2).hex].pack('U*')
426
+ end.force_encoding(Encoding::UTF_8)
427
+
428
+ # Any other escaped character is a syntax error
429
+ if pattern.match(%r([^\\]\\[^nrt/\\|\.?*+\[\]\(\){}$#x2D#x5B#x5D#x5E-]))
430
+ error(nil, "Regexp contains illegal escape: #{pattern.inspect}", production: :stringFacet)
431
+ end
432
+
433
+ [:pattern, pattern, flags].compact
359
434
  end
360
435
  end
361
436
 
362
- # [26] stringLength ::= "LENGTH" | "MINLENGTH" | "MAXLENGTH"
437
+ # [29] stringLength ::= "LENGTH" | "MINLENGTH" | "MAXLENGTH"
363
438
 
364
- # [27] numericFacet ::= numericRange (numericLiteral | string '^^' datatype )
439
+ # [30] numericFacet ::= numericRange (numericLiteral | string '^^' datatype )
365
440
  # | numericLength INTEGER
366
441
  production(:numericFacet) do |input, data, callback|
367
442
  input[:numericFacet] ||= []
@@ -374,36 +449,37 @@ module ShEx
374
449
  end
375
450
  end
376
451
 
377
- # [28] numericRange ::= "MININCLUSIVE" | "MINEXCLUSIVE" | "MAXINCLUSIVE" | "MAXEXCLUSIVE"
378
- # [29] numericLength ::= "TOTALDIGITS" | "FRACTIONDIGITS"
452
+ # [31] numericRange ::= "MININCLUSIVE" | "MINEXCLUSIVE" | "MAXINCLUSIVE" | "MAXEXCLUSIVE"
453
+ # [32] numericLength ::= "TOTALDIGITS" | "FRACTIONDIGITS"
379
454
 
380
- # [30] shapeDefinition ::= (extraPropertySet | "CLOSED")* '{' tripleExpression? '}' annotation* semanticActions
455
+ # [33] shapeDefinition ::= (includeSet | extraPropertySet | "CLOSED")* '{' tripleExpression? '}' annotation* semanticActions
381
456
  production(:shapeDefinition) do |input, data, callback|
382
457
  shape_definition(input, data)
383
458
  end
384
- # [31] inlineShapeDefinition ::= (extraPropertySet | "CLOSED")* '{' tripleExpression? '}'
459
+ # [34] inlineShapeDefinition ::= (includeSet | extraPropertySet | "CLOSED")* '{' tripleExpression? '}'
385
460
  production(:inlineShapeDefinition) do |input, data, callback|
386
461
  shape_definition(input, data)
387
462
  end
388
463
  def shape_definition(input, data)
464
+ # FIXME: includeSet
389
465
  expression = data[:tripleExpression]
390
466
  attrs = Array(data[:extraPropertySet])
391
467
  attrs << :closed if data[:closed]
392
- attrs << expression
468
+ attrs << expression if expression
393
469
  attrs += Array(data[:annotation])
394
470
  attrs += Array(data[:codeDecl])
395
471
 
396
- input[:shape] = Algebra::Shape.new(*attrs, {}) if expression
472
+ input[:shape] = Algebra::Shape.new(*attrs, {})
397
473
  end
398
474
  private :shape_definition
399
475
 
400
- # [32] extraPropertySet ::= "EXTRA" predicate+
476
+ # [35] extraPropertySet ::= "EXTRA" predicate+
401
477
  production(:extraPropertySet) do |input, data, callback|
402
478
  (input[:extraPropertySet] ||= []) << data[:predicate].unshift(:extra)
403
479
  end
404
480
 
405
- # [33] tripleExpression ::= oneOfTripleExpr
406
- # [34] oneOfTripleExpr ::= groupTripleExpr ('|' groupTripleExpr)*
481
+ # [36] tripleExpression ::= oneOfTripleExpr
482
+ # [37] oneOfTripleExpr ::= groupTripleExpr ('|' groupTripleExpr)*
407
483
  production(:oneOfTripleExpr) do |input, data, callback|
408
484
  expression = if Array(data[:tripleExpression]).length > 1
409
485
  Algebra::OneOf.new(*data[:tripleExpression], {})
@@ -413,7 +489,7 @@ module ShEx
413
489
  input[:tripleExpression] = expression if expression
414
490
  end
415
491
 
416
- # [37] groupTripleExpr ::= unaryTripleExpr (';' unaryTripleExpr?)*
492
+ # [40] groupTripleExpr ::= unaryTripleExpr (';' unaryTripleExpr?)*
417
493
  production(:groupTripleExpr) do |input, data, callback|
418
494
  expression = if Array(data[:tripleExpression]).length > 1
419
495
  Algebra::EachOf.new(*data[:tripleExpression], {})
@@ -423,7 +499,7 @@ module ShEx
423
499
  (input[:tripleExpression] ||= []) << expression if expression
424
500
  end
425
501
 
426
- # [40] unaryTripleExpr ::= productionLabel? (tripleConstraint | bracketedTripleExpr) | include
502
+ # [43] unaryTripleExpr ::= productionLabel? (tripleConstraint | bracketedTripleExpr) | include
427
503
  production(:unaryTripleExpr) do |input, data, callback|
428
504
  expression = data[:tripleExpression]
429
505
  expression.id = data[:productionLabel] if expression && data[:productionLabel]
@@ -431,7 +507,12 @@ module ShEx
431
507
  (input[:tripleExpression] ||= []) << expression if expression
432
508
  end
433
509
 
434
- # [41] bracketedTripleExpr ::= '(' oneOfTripleExpr ')' cardinality? annotation* semanticActions
510
+ # [43a] productionLabel ::= '$' (iri | blankNode)
511
+ production(:productionLabel) do |input, data, callback|
512
+ input[:productionLabel] = data[:iri] || data[:blankNode]
513
+ end
514
+
515
+ # [44] bracketedTripleExpr ::= '(' oneOfTripleExpr ')' cardinality? annotation* semanticActions
435
516
  production(:bracketedTripleExpr) do |input, data, callback|
436
517
  # XXX cardinality? annotation* semanticActions
437
518
  case expression = data[:tripleExpression]
@@ -451,18 +532,13 @@ module ShEx
451
532
  input[:tripleExpression] = expression
452
533
  end
453
534
 
454
- # [42] productionLabel ::= '$' (iri | blankNode)
455
- production(:productionLabel) do |input, data, callback|
456
- input[:productionLabel] = data[:iri] || data[:blankNode]
457
- end
458
-
459
- # [43] tripleConstraint ::= senseFlags? predicate shapeExpression cardinality? annotation* semanticActions
535
+ # [45] tripleConstraint ::= senseFlags? predicate shapeExpression cardinality? annotation* semanticActions
460
536
  production(:tripleConstraint) do |input, data, callback|
461
537
  cardinality = data.fetch(:cardinality, {})
462
538
  attrs = [
463
539
  (:inverse if data[:inverse] || data[:not]),
464
540
  [:predicate, Array(data[:predicate]).first],
465
- data[:shapeExpression],
541
+ Array(data[:shapeExpression]).first,
466
542
  ([:min, cardinality[:min]] if cardinality[:min]),
467
543
  ([:max, cardinality[:max]] if cardinality[:max])
468
544
  ].compact
@@ -472,71 +548,146 @@ module ShEx
472
548
  input[:tripleExpression] = Algebra::TripleConstraint.new(*attrs, {}) unless attrs.empty?
473
549
  end
474
550
 
475
- # [44] cardinality ::= '*' | '+' | '?' | REPEAT_RANGE
476
- # [45] senseFlags ::= '^'
477
- # [46] valueSet ::= '[' valueSetValue* ']'
551
+ # [46] cardinality ::= '*' | '+' | '?' | REPEAT_RANGE
552
+ # [47] senseFlags ::= '^'
553
+ # [48] valueSet ::= '[' valueSetValue* ']'
478
554
 
479
- # [47] valueSetValue ::= iriRange | literal
555
+ # [49] valueSetValue ::= iriRange | literalRange | languageRange | '.' exclusion+
480
556
  production(:valueSetValue) do |input, data, callback|
481
- (input[:valueSetValue] ||= []) << Algebra::Value.new(data[:iriRange] || data[:literal])
557
+ range = data[:iriRange] || data[:literalRange] || data[:languageRange]
558
+ if !range
559
+ # All exclusions must be consistent IRI/Literal/Language
560
+ case data[:exclusion].first
561
+ when Algebra::IriStem, RDF::URI
562
+ unless data[:exclusion].all? {|e| e.is_a?(Algebra::IriStem) || e.is_a?(RDF::URI)}
563
+ error(nil, "Exclusions must all be IRI type")
564
+ end
565
+ range = Algebra::IriStemRange.new(:wildcard, data[:exclusion].unshift(:exclusions))
566
+ when Algebra::LiteralStem, RDF::Literal
567
+ unless data[:exclusion].all? {|e| e.is_a?(Algebra::LiteralStem) || e.is_a?(RDF::Literal)}
568
+ error(nil, "Exclusions must all be Literal type")
569
+ end
570
+ range = Algebra::LiteralStemRange.new(:wildcard, data[:exclusion].unshift(:exclusions))
571
+ else
572
+ unless data[:exclusion].all? {|e| e.is_a?(Algebra::LanguageStem) || e.is_a?(String)}
573
+ error(nil, "Exclusions must all be Language type")
574
+ end
575
+ range = Algebra::LanguageStemRange.new(:wildcard, data[:exclusion].unshift(:exclusions))
576
+ end
577
+ end
578
+ (input[:valueSetValue] ||= []) << Algebra::Value.new(range)
579
+ end
580
+
581
+ # [50] exclusion ::= '-' (iri | literal | LANGTAG) '~'?
582
+ production(:exclusion) do |input, data, callback|
583
+ (input[:exclusion] ||= []) << if data[:pattern]
584
+ case
585
+ when data[:iri] then Algebra::IriStem.new(data[:iri])
586
+ when data[:literal] then Algebra::LiteralStem.new(data[:literal])
587
+ when data[:language] then Algebra::LanguageStem.new(data[:language])
588
+ end
589
+ else
590
+ data[:iri] || data[:literal] || data[:language]
591
+ end
482
592
  end
483
593
 
484
- # [48] iriRange ::= iri ('~' exclusion*)? | '.' exclusion+
594
+ # [51] iriRange ::= iri ('~' iriExclusion*)?
485
595
  production(:iriRange) do |input, data, callback|
486
596
  exclusions = data[:exclusion].unshift(:exclusions) if data[:exclusion]
487
597
  input[:iriRange] = if data[:pattern] && exclusions
488
- Algebra::StemRange.new(data[:iri], exclusions)
598
+ Algebra::IriStemRange.new(data[:iri], exclusions)
489
599
  elsif data[:pattern]
490
- Algebra::Stem.new(data[:iri])
600
+ Algebra::IriStem.new(data[:iri])
491
601
  elsif data[:dot]
492
- Algebra::StemRange.new(:wildcard, exclusions)
602
+ Algebra::IriStemRange.new(:wildcard, exclusions)
493
603
  else
494
604
  data[:iri]
495
605
  end
496
606
  end
497
607
 
498
- # [49] exclusion ::= '-' iri '~'?
499
- production(:exclusion) do |input, data, callback|
500
- (input[:exclusion] ||= []) << (data[:pattern] ? Algebra::Stem.new(data[:iri]) : data[:iri])
608
+ # [52] iriExclusion ::= '-' iri '~'?
609
+ production(:iriExclusion) do |input, data, callback|
610
+ val = data[:iri]
611
+ (input[:exclusion] ||= []) << (data[:pattern] ? Algebra::IriStem.new(val) : val)
612
+ end
613
+
614
+ # [53] literalRange ::= literal ('~' literalExclusion*)?
615
+ production(:literalRange) do |input, data, callback|
616
+ exclusions = data[:exclusion].unshift(:exclusions) if data[:exclusion]
617
+ input[:literalRange] = if data[:pattern] && exclusions
618
+ Algebra::LiteralStemRange.new(data[:literal], exclusions)
619
+ elsif data[:pattern]
620
+ Algebra::LiteralStem.new(data[:literal])
621
+ elsif data[:dot]
622
+ Algebra::LiteralStemRange.new(:wildcard, exclusions)
623
+ else
624
+ data[:literal]
625
+ end
626
+ end
627
+
628
+ # [54] literalExclusion ::= '-' literal '~'?
629
+ production(:literalExclusion) do |input, data, callback|
630
+ val = data[:literal]
631
+ (input[:exclusion] ||= []) << (data[:pattern] ? Algebra::LiteralStem.new(val) : val)
632
+ end
633
+
634
+ # [55] languageRange ::= LANGTAG ('~' languageExclusion*)?
635
+ production(:languageRange) do |input, data, callback|
636
+ exclusions = data[:exclusion].unshift(:exclusions) if data[:exclusion]
637
+ input[:languageRange] = if data[:pattern] && exclusions
638
+ Algebra::LanguageStemRange.new(data[:language], exclusions)
639
+ elsif data[:pattern]
640
+ Algebra::LanguageStem.new(data[:language])
641
+ elsif data[:dot]
642
+ Algebra::LanguageStemRange.new(:wildcard, exclusions)
643
+ else
644
+ Algebra::Language.new(data[:language])
645
+ end
646
+ end
647
+
648
+ # [56] languageExclusion ::= '-' literal '~'?
649
+ production(:languageExclusion) do |input, data, callback|
650
+ val = data[:language]
651
+ (input[:exclusion] ||= []) << (data[:pattern] ? Algebra::LanguageStem.new(val) : val)
501
652
  end
502
653
 
503
- # [50] include ::= '&' shapeLabel
654
+ # [57] include ::= '&' shapeLabel
504
655
  production(:include) do |input, data, callback|
505
656
  input[:tripleExpression] = data[:shapeLabel].first
506
657
  end
507
658
 
508
- # [51] annotation ::= '//' predicate (iri | literal)
659
+ # [58] annotation ::= '//' predicate (iri | literal)
509
660
  production(:annotation) do |input, data, callback|
510
661
  annotation = Algebra::Annotation.new([:predicate, data[:predicate].first], (data[:iri] || data[:literal]))
511
662
  (input[:annotation] ||= []) << annotation
512
663
  end
513
664
 
514
- # [52] semanticActions ::= codeDecl*
665
+ # [59] semanticActions ::= codeDecl*
515
666
 
516
- # [53] codeDecl ::= '%' iri (CODE | "%")
667
+ # [60] codeDecl ::= '%' iri (CODE | "%")
517
668
  production(:codeDecl) do |input, data, callback|
518
669
  (input[:codeDecl] ||= []) << Algebra::SemAct.new(*[data[:iri], data[:code]].compact, {})
519
670
  end
520
671
 
521
672
  # [13t] literal ::= rdfLiteral | numericLiteral | booleanLiteral
522
673
 
523
- # [54] predicate ::= iri | RDF_TYPE
674
+ # [61] predicate ::= iri | RDF_TYPE
524
675
  production(:predicate) do |input, data, callback|
525
676
  (input[:predicate] ||= []) << data[:iri]
526
677
  end
527
678
 
528
- # [55] datatype ::= iri
679
+ # [62] datatype ::= iri
529
680
  production(:datatype) do |input, data, callback|
530
681
  input[:datatype] = data[:iri]
531
682
  end
532
683
 
533
- # [56] shapeLabel ::= iri | blankNode
684
+ # [63] shapeLabel ::= iri | blankNode
534
685
  production(:shapeLabel) do |input, data, callback|
535
686
  (input[:shapeLabel] ||= []) << (data[:iri] || data[:blankNode])
536
687
  end
537
688
 
538
689
  # [16t] numericLiteral ::= INTEGER | DECIMAL | DOUBLE
539
- # [129s] rdfLiteral ::= string (LANGTAG | '^^' datatype)?
690
+ # [129s] rdfLiteral ::= langString | string ('^^' datatype)?
540
691
  production(:rdfLiteral) do |input, data, callback|
541
692
  input[:literal] = literal(data[:string], data)
542
693
  end
@@ -544,8 +695,10 @@ module ShEx
544
695
  # [134s] booleanLiteral ::= 'true' | 'false'
545
696
  # [135s] string ::= STRING_LITERAL1 | STRING_LITERAL_LONG1
546
697
  # | STRING_LITERAL2 | STRING_LITERAL_LONG2
698
+ # [66] langString ::= LANG_STRING_LITERAL1 | LANG_STRING_LITERAL_LONG1
699
+ # | LANG_STRING_LITERAL2 | LANG_STRING_LITERAL_LONG2
547
700
  # [136s] iri ::= IRIREF | prefixedName
548
- # [137s] prefixedName ::= PNAME_LN | PNAME_NS
701
+ # [1372] prefixedName ::= PNAME_LN | PNAME_NS
549
702
  # [138s] blankNode ::= BLANK_NODE_LABEL
550
703
 
551
704
  ##
@@ -577,7 +730,7 @@ module ShEx
577
730
  # @raise [ShEx::NotSatisfied] if not satisfied
578
731
  # @raise [ShEx::ParseError] when a syntax error is detected
579
732
  # @raise [ShEx::StructureError, ArgumentError] on structural problems with schema
580
- def initialize(input = nil, options = {}, &block)
733
+ def initialize(input = nil, **options, &block)
581
734
  @input = case input
582
735
  when IO, StringIO then input.read
583
736
  else input.to_s.dup
@@ -622,13 +775,16 @@ module ShEx
622
775
  # @return [ShEx::Algebra::Schema] The executable parsed expression.
623
776
  # @raise [ShEx::ParseError] when a syntax error is detected
624
777
  # @raise [ShEx::StructureError, ArgumentError] on structural problems with schema
625
- # @see http://www.w3.org/TR/sparql11-query/#sparqlAlgebra
626
- # @see http://axel.deri.ie/sparqltutorial/ESWC2007_SPARQL_Tutorial_unit2b.pdf
778
+ # @see https://www.w3.org/TR/sparql11-query/#sparqlAlgebra
779
+ # @see https://axel.deri.ie/sparqltutorial/ESWC2007_SPARQL_Tutorial_unit2b.pdf
627
780
  def parse(prod = START)
628
- ll1_parse(@input, prod.to_sym, @options.merge(branch: BRANCH,
629
- first: FIRST,
630
- follow: FOLLOW,
631
- whitespace: WS)
781
+ ll1_parse(@input,
782
+ prod.to_sym,
783
+ branch: BRANCH,
784
+ first: FIRST,
785
+ follow: FOLLOW,
786
+ whitespace: WS,
787
+ **@options
632
788
  ) do |context, *data|
633
789
  case context
634
790
  when :trace
@@ -638,7 +794,7 @@ module ShEx
638
794
  when 0
639
795
  log_error(*args, depth: depth, lineno: lineno)
640
796
  when 1
641
- log_warning(*args, depth: depth, lineno: lineno)
797
+ log_warn(*args, depth: depth, lineno: lineno)
642
798
  when 2
643
799
  log_info(*args, depth: depth, lineno: lineno)
644
800
  else
@@ -776,7 +932,7 @@ module ShEx
776
932
  end
777
933
 
778
934
  # Create a literal
779
- def literal(value, options = {})
935
+ def literal(value, **options)
780
936
  options = options.dup
781
937
  # Internal representation is to not use xsd:string, although it could arguably go the other way.
782
938
  options.delete(:datatype) if options[:datatype] == RDF::XSD.string