shex 0.4.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ # -*- encoding: utf-8 -*-
1
2
  require 'ebnf'
2
3
  require 'ebnf/ll1/parser'
3
4
  require 'shex/meta'
@@ -7,7 +8,7 @@ module ShEx
7
8
  # A parser for the ShEx grammar.
8
9
  #
9
10
  # @see https://www.w3.org/2005/01/yacker/uploads/ShEx3?lang=perl&markup=html#productions
10
- # @see http://en.wikipedia.org/wiki/LR_parser
11
+ # @see https://en.wikipedia.org/wiki/LR_parser
11
12
  class Parser
12
13
  include ShEx::Meta
13
14
  include ShEx::Terminals
@@ -36,7 +37,7 @@ module ShEx
36
37
  # The internal representation of the result using hierarchy of RDF objects and ShEx::Operator
37
38
  # objects.
38
39
  # @return [Array]
39
- # @see http://sparql.rubyforge.org/algebra
40
+ # @see https://www.rubydoc.info/github/ruby-rdf/sparql/SPARQL/Algebra
40
41
  attr_accessor :result
41
42
 
42
43
  # Terminals passed to lexer. Order matters!
@@ -102,6 +103,18 @@ module ShEx
102
103
  terminal(:LANGTAG, LANGTAG) do |prod, token, input|
103
104
  input[:language] = token.value[1..-1]
104
105
  end
106
+ terminal(:LANG_STRING_LITERAL_LONG1, LANG_STRING_LITERAL_LONG1, unescape: true) do |prod, token, input|
107
+ input[:string], _, input[:language] = token.value[3..-1].rpartition("'''@")
108
+ end
109
+ terminal(:LANG_STRING_LITERAL_LONG2, LANG_STRING_LITERAL_LONG2, unescape: true) do |prod, token, input|
110
+ input[:string], _, input[:language] = token.value[3..-1].rpartition('"""@')
111
+ end
112
+ terminal(:LANG_STRING_LITERAL1, LANG_STRING_LITERAL1, unescape: true) do |prod, token, input|
113
+ input[:string], _, input[:language] = token.value[1..-1].rpartition("'@")
114
+ end
115
+ terminal(:LANG_STRING_LITERAL2, LANG_STRING_LITERAL2, unescape: true) do |prod, token, input|
116
+ input[:string], _, input[:language] = token.value[1..-1].rpartition('"@')
117
+ end
105
118
  terminal(:STRING_LITERAL_LONG1, STRING_LITERAL_LONG1, unescape: true) do |prod, token, input|
106
119
  input[:string] = token.value[3..-4]
107
120
  end
@@ -114,6 +127,9 @@ module ShEx
114
127
  terminal(:STRING_LITERAL2, STRING_LITERAL2, unescape: true) do |prod, token, input|
115
128
  input[:string] = token.value[1..-2]
116
129
  end
130
+ terminal(:REGEXP, REGEXP) do |prod, token, input|
131
+ input[:regexp] = token.value
132
+ end
117
133
  terminal(:RDF_TYPE, RDF_TYPE) do |prod, token, input|
118
134
  input[:iri] = (a = RDF.type.dup; a.lexical = 'a'; a)
119
135
  end
@@ -143,8 +159,7 @@ module ShEx
143
159
  'MINEXCLUSIVE',
144
160
  'MAXINCLUSIVE',
145
161
  'MAXEXCLUSIVE' then input[:numericRange] = token.value.downcase.to_sym
146
- when 'NOT' then input[:not] = token.value.downcase.to_sym
147
- when 'PATTERN' then input[:pattern] = token.value.downcase.to_sym
162
+ when 'NOT' then input[:not] = token.value.downcase.to_sym
148
163
  when 'START' then input[:start] = token.value.downcase.to_sym
149
164
  else
150
165
  #raise "Unexpected MC terminal: #{token.inspect}"
@@ -163,7 +178,7 @@ module ShEx
163
178
  expressions << Algebra::Start.new(data[:start]) if data[:start]
164
179
  expressions << data[:shapes].unshift(:shapes) if data[:shapes]
165
180
 
166
- input[:schema] = Algebra::Schema.new(*expressions, options)
181
+ input[:schema] = Algebra::Schema.new(*expressions, **options)
167
182
  self
168
183
  end
169
184
 
@@ -184,7 +199,7 @@ module ShEx
184
199
  # [5] notStartAction ::= start | shapeExprDecl
185
200
  # [6] start ::= "start" '=' shapeExpression
186
201
  production(:start) do |input, data, callback|
187
- input[:start] = data[:shapeExpression]
202
+ input[:start] = Array(data[:shapeExpression]).first || data[:shape]
188
203
  end
189
204
  # [7] startActions ::= codeDecl+
190
205
 
@@ -193,9 +208,9 @@ module ShEx
193
208
  # [9] shapeExprDecl ::= shapeLabel (shapeExpression|"EXTERNAL")
194
209
  production(:shapeExprDecl) do |input, data, callback|
195
210
  id = Array(data[:shapeLabel]).first
196
- expression = case data[:shapeExpression]
211
+ expression = case Array(data[:shapeExpression]).first
197
212
  when Algebra::NodeConstraint, Algebra::Or, Algebra::And, Algebra::Not, Algebra::Shape, RDF::Resource
198
- data[:shapeExpression]
213
+ Array(data[:shapeExpression]).first
199
214
  else
200
215
  data[:external] ? Algebra::External.new() : Algebra::Shape.new()
201
216
  end
@@ -204,13 +219,36 @@ module ShEx
204
219
  (input[:shapes] ||= []) << expression
205
220
  end
206
221
 
207
- # [10] shapeExpression ::= shapeOr
208
- # [11] inlineShapeExpression ::= inlineShapeOr
222
+ # [10] shapeExpression ::= shapeAtomNoRef shapeOr?
223
+ # | "NOT" (shapeAtomNoRef | shapeRef) shapeOr?
224
+ # | shapeRef shapeOr
225
+ production(:shapeExpression) do |input, data, callback|
226
+ expression = Array(data[:shapeExpression]).first || data[:shape]
227
+ expression = Algebra::Not.new(expression) if data[:not]
228
+ (input[:shapeExpression] ||= []) << expression
229
+ end
209
230
 
210
- # [12] shapeOr ::= shapeAnd ("OR" shapeAnd)*
211
- production(:shapeOr) do |input, data, callback|
231
+ # [11] inlineShapeExpression ::= inlineShapeOr
232
+ # [12] shapeOr ::= shapeOrA | shapeOrB shapeOrA?
233
+ # [12a] shapeOrA ::= ("OR" shapeAnd)+
234
+ start_production(:shapeOrA) do |input, data, callback|
235
+ data[:shapeExpression] = input.delete(:shapeExpression)
236
+ data[:shapeExpression] ||= Array(input.delete(:shape)) if input[:shape]
237
+ data[:shapeExpression] = [Algebra::Not.new(*data[:shapeExpression])] if input.delete(:not)
238
+ end
239
+ production(:shapeOrA) do |input, data, callback|
212
240
  shape_or(input, data)
213
241
  end
242
+ # [12b] shapeOrB ::= ("AND" shapeNot)+
243
+ start_production(:shapeOrB) do |input, data, callback|
244
+ data[:shapeExpression] = input.delete(:shapeExpression)
245
+ data[:shapeExpression] ||= Array(input.delete(:shape)) if input[:shape]
246
+ data[:shapeExpression] = [Algebra::Not.new(*data[:shapeExpression])] if input.delete(:not)
247
+ end
248
+ production(:shapeOrB) do |input, data, callback|
249
+ shape_and(input, data)
250
+ end
251
+
214
252
  # [13] inlineShapeOr ::= inlineShapeAnd ("OR" inlineShapeAnd)*
215
253
  production(:inlineShapeOr) do |input, data, callback|
216
254
  shape_or(input, data)
@@ -222,7 +260,7 @@ module ShEx
222
260
  else
223
261
  Array(data[:shapeExpression]).first
224
262
  end
225
- input[:shapeExpression] = expression if expression
263
+ (input[:shapeExpression] ||= []) << expression if expression
226
264
  rescue ArgumentError => e
227
265
  error(nil, "Argument Error on OR: #{e.message}")
228
266
  end
@@ -239,8 +277,10 @@ module ShEx
239
277
  def shape_and(input, data)
240
278
  input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
241
279
  expressions = Array(data[:shapeExpression]).inject([]) do |memo, expr|
242
- memo.concat(expr.is_a?(Algebra::And) ? expr.operands : [expr])
280
+ #memo.concat(expr.is_a?(Algebra::And) ? expr.operands : [expr])
281
+ memo.concat([expr])
243
282
  end
283
+
244
284
  expression = if expressions.length > 1
245
285
  Algebra::And.new(*expressions, {})
246
286
  else
@@ -262,7 +302,7 @@ module ShEx
262
302
  end
263
303
  def shape_not(input, data)
264
304
  input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
265
- expression = data[:shapeExpression]
305
+ expression = Array(data[:shapeExpression]).first
266
306
  expression = Algebra::Not.new(expression) if data[:not]
267
307
  #error(nil, "Expected an atom for NOT") unless expression
268
308
  (input[:shapeExpression] ||= []) << expression if expression
@@ -276,7 +316,16 @@ module ShEx
276
316
  production(:shapeAtom) do |input, data, callback|
277
317
  shape_atom(input, data)
278
318
  end
279
- # [19] inlineShapeAtom ::= nodeConstraint inlineShapeOrRef?
319
+
320
+ # [19] shapeAtomNoRef ::= nodeConstraint shapeOrRef?
321
+ # | shapeDefinition
322
+ # | "(" shapeExpression ")"
323
+ # | '.' # no constraint
324
+ production(:shapeAtomNoRef) do |input, data, callback|
325
+ shape_atom(input, data)
326
+ end
327
+
328
+ # [20] inlineShapeAtom ::= nodeConstraint inlineShapeOrRef?
280
329
  # | inlineShapeOrRef nodeConstraint?
281
330
  # | "(" shapeExpression ")"
282
331
  # | '.' # no constraint
@@ -285,7 +334,7 @@ module ShEx
285
334
  end
286
335
  def shape_atom(input, data)
287
336
  constraint = data[:nodeConstraint]
288
- shape = data[:shapeOrRef] || data[:shapeExpression]
337
+ shape = data[:shapeOrRef] || Array(data[:shapeExpression]).first || data[:shape]
289
338
  input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
290
339
 
291
340
  expression = [constraint, shape].compact
@@ -295,34 +344,36 @@ module ShEx
295
344
  else Algebra::And.new(*expression, {})
296
345
  end
297
346
 
298
- input[:shapeExpression] = expression if expression
347
+ (input[:shapeExpression] ||= []) << expression if expression
299
348
  end
300
349
  private :shape_atom
301
350
 
302
- # [20] shapeOrRef ::= ATPNAME_LN | ATPNAME_NS | '@' shapeLabel | shapeDefinition
351
+ # [21] shapeOrRef ::= shapeDefinition | shapeRef
303
352
  production(:shapeOrRef) do |input, data, callback|
304
353
  shape_or_ref(input, data)
305
354
  end
306
- # [21] inlineShapeOrRef ::= ATPNAME_LN | ATPNAME_NS | '@' shapeLabel | inlineShapeDefinition
355
+ # [22] inlineShapeOrRef ::= inlineShapeDefinition | shapeRef
307
356
  production(:inlineShapeOrRef) do |input, data, callback|
308
357
  shape_or_ref(input, data)
309
358
  end
310
359
  def shape_or_ref(input, data)
311
360
  input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
312
- if data[:shape] || Array(data[:shapeLabel]).first
313
- input[:shapeOrRef] = data[:shape] || Array(data[:shapeLabel]).first
314
- end
361
+ input[:shapeOrRef] = data[:shape] if data[:shape]
315
362
  rescue ArgumentError => e
316
363
  error(nil, "Argument Error on ShapeOrRef: #{e.message}")
317
364
  end
318
365
  private :shape_or_ref
319
366
 
320
- # [22] nodeConstraint ::= "LITERAL" xsFacet*
321
- # | nonLiteralKind stringFacet*
367
+ # [23] shapeRef ::= ATPNAME_LN | ATPNAME_NS | '@' shapeLabel
368
+ production(:shapeRef) do |input, data, callback|
369
+ input[:shape] = Array(data[:shapeLabel]).first
370
+ end
371
+
372
+ # [24] litNodeConstraint ::= "LITERAL" xsFacet*
322
373
  # | datatype xsFacet*
323
374
  # | valueSet xsFacet*
324
- # | xsFacet+
325
- production(:nodeConstraint) do |input, data, callback|
375
+ # | numericFacet+
376
+ production(:litNodeConstraint) do |input, data, callback|
326
377
  # Semantic validate (A Syntax error)
327
378
  case
328
379
  when data[:datatype] && data[:numericFacet]
@@ -333,7 +384,7 @@ module ShEx
333
384
 
334
385
  attrs = []
335
386
  attrs << [:datatype, data[:datatype]] if data [:datatype]
336
- attrs += [data[:shapeAtomLiteral], data[:nonLiteralKind]]
387
+ attrs += Array(data[:shapeAtomLiteral])
337
388
  attrs += Array(data[:valueSetValue])
338
389
  attrs += Array(data[:numericFacet])
339
390
  attrs += Array(data[:stringFacet])
@@ -341,12 +392,23 @@ module ShEx
341
392
  input[:nodeConstraint] = Algebra::NodeConstraint.new(*attrs.compact, {})
342
393
  end
343
394
 
344
- # [23] nonLiteralKind ::= "IRI" | "BNODE" | "NONLITERAL"
395
+ # [25] nonLitNodeConstraint ::= nonLiteralKind stringFacet*
396
+ # | stringFacet+
397
+ production(:nonLitNodeConstraint) do |input, data, callback|
398
+ # Semantic validate (A Syntax error)
345
399
 
346
- # [24] xsFacet ::= stringFacet | numericFacet
347
- # [25] stringFacet ::= stringLength INTEGER
348
- # | "PATTERN" string
349
- # | '~' string # shortcut for "PATTERN"
400
+ attrs = []
401
+ attrs += Array(data[:nonLiteralKind])
402
+ attrs += Array(data[:stringFacet])
403
+
404
+ input[:nodeConstraint] = Algebra::NodeConstraint.new(*attrs.compact, {})
405
+ end
406
+
407
+ # [26] nonLiteralKind ::= "IRI" | "BNODE" | "NONLITERAL"
408
+
409
+ # [27] xsFacet ::= stringFacet | numericFacet
410
+ # [28] stringFacet ::= stringLength INTEGER
411
+ # | REGEXP
350
412
  production(:stringFacet) do |input, data, callback|
351
413
  input[:stringFacet] ||= []
352
414
  input[:stringFacet] << if data[:stringLength]
@@ -354,14 +416,27 @@ module ShEx
354
416
  error(nil, "#{data[:stringLength]} constraint may only be used once in a Node Constraint", production: :stringFacet)
355
417
  end
356
418
  [data[:stringLength], data[:literal]]
357
- elsif data[:pattern]
358
- [:pattern, data[:string]]
419
+ elsif re = data[:regexp]
420
+ unless re =~ %r(^/(.*)/([smix]*)$)
421
+ error(nil, "#{re.inspect} regular expression must be in the form /pattern/flags?", production: :stringFacet)
422
+ end
423
+ flags = $2 unless $2.to_s.empty?
424
+ pattern = $1.gsub('\\/', '/').gsub(UCHAR) do
425
+ [($1 || $2).hex].pack('U*')
426
+ end.force_encoding(Encoding::UTF_8)
427
+
428
+ # Any other escaped character is a syntax error
429
+ if pattern.match(%r([^\\]\\[^nrt/\\|\.?*+\[\]\(\){}$#x2D#x5B#x5D#x5E-]))
430
+ error(nil, "Regexp contains illegal escape: #{pattern.inspect}", production: :stringFacet)
431
+ end
432
+
433
+ [:pattern, pattern, flags].compact
359
434
  end
360
435
  end
361
436
 
362
- # [26] stringLength ::= "LENGTH" | "MINLENGTH" | "MAXLENGTH"
437
+ # [29] stringLength ::= "LENGTH" | "MINLENGTH" | "MAXLENGTH"
363
438
 
364
- # [27] numericFacet ::= numericRange (numericLiteral | string '^^' datatype )
439
+ # [30] numericFacet ::= numericRange (numericLiteral | string '^^' datatype )
365
440
  # | numericLength INTEGER
366
441
  production(:numericFacet) do |input, data, callback|
367
442
  input[:numericFacet] ||= []
@@ -374,36 +449,37 @@ module ShEx
374
449
  end
375
450
  end
376
451
 
377
- # [28] numericRange ::= "MININCLUSIVE" | "MINEXCLUSIVE" | "MAXINCLUSIVE" | "MAXEXCLUSIVE"
378
- # [29] numericLength ::= "TOTALDIGITS" | "FRACTIONDIGITS"
452
+ # [31] numericRange ::= "MININCLUSIVE" | "MINEXCLUSIVE" | "MAXINCLUSIVE" | "MAXEXCLUSIVE"
453
+ # [32] numericLength ::= "TOTALDIGITS" | "FRACTIONDIGITS"
379
454
 
380
- # [30] shapeDefinition ::= (extraPropertySet | "CLOSED")* '{' tripleExpression? '}' annotation* semanticActions
455
+ # [33] shapeDefinition ::= (includeSet | extraPropertySet | "CLOSED")* '{' tripleExpression? '}' annotation* semanticActions
381
456
  production(:shapeDefinition) do |input, data, callback|
382
457
  shape_definition(input, data)
383
458
  end
384
- # [31] inlineShapeDefinition ::= (extraPropertySet | "CLOSED")* '{' tripleExpression? '}'
459
+ # [34] inlineShapeDefinition ::= (includeSet | extraPropertySet | "CLOSED")* '{' tripleExpression? '}'
385
460
  production(:inlineShapeDefinition) do |input, data, callback|
386
461
  shape_definition(input, data)
387
462
  end
388
463
  def shape_definition(input, data)
464
+ # FIXME: includeSet
389
465
  expression = data[:tripleExpression]
390
466
  attrs = Array(data[:extraPropertySet])
391
467
  attrs << :closed if data[:closed]
392
- attrs << expression
468
+ attrs << expression if expression
393
469
  attrs += Array(data[:annotation])
394
470
  attrs += Array(data[:codeDecl])
395
471
 
396
- input[:shape] = Algebra::Shape.new(*attrs, {}) if expression
472
+ input[:shape] = Algebra::Shape.new(*attrs, {})
397
473
  end
398
474
  private :shape_definition
399
475
 
400
- # [32] extraPropertySet ::= "EXTRA" predicate+
476
+ # [35] extraPropertySet ::= "EXTRA" predicate+
401
477
  production(:extraPropertySet) do |input, data, callback|
402
478
  (input[:extraPropertySet] ||= []) << data[:predicate].unshift(:extra)
403
479
  end
404
480
 
405
- # [33] tripleExpression ::= oneOfTripleExpr
406
- # [34] oneOfTripleExpr ::= groupTripleExpr ('|' groupTripleExpr)*
481
+ # [36] tripleExpression ::= oneOfTripleExpr
482
+ # [37] oneOfTripleExpr ::= groupTripleExpr ('|' groupTripleExpr)*
407
483
  production(:oneOfTripleExpr) do |input, data, callback|
408
484
  expression = if Array(data[:tripleExpression]).length > 1
409
485
  Algebra::OneOf.new(*data[:tripleExpression], {})
@@ -413,7 +489,7 @@ module ShEx
413
489
  input[:tripleExpression] = expression if expression
414
490
  end
415
491
 
416
- # [37] groupTripleExpr ::= unaryTripleExpr (';' unaryTripleExpr?)*
492
+ # [40] groupTripleExpr ::= unaryTripleExpr (';' unaryTripleExpr?)*
417
493
  production(:groupTripleExpr) do |input, data, callback|
418
494
  expression = if Array(data[:tripleExpression]).length > 1
419
495
  Algebra::EachOf.new(*data[:tripleExpression], {})
@@ -423,7 +499,7 @@ module ShEx
423
499
  (input[:tripleExpression] ||= []) << expression if expression
424
500
  end
425
501
 
426
- # [40] unaryTripleExpr ::= productionLabel? (tripleConstraint | bracketedTripleExpr) | include
502
+ # [43] unaryTripleExpr ::= productionLabel? (tripleConstraint | bracketedTripleExpr) | include
427
503
  production(:unaryTripleExpr) do |input, data, callback|
428
504
  expression = data[:tripleExpression]
429
505
  expression.id = data[:productionLabel] if expression && data[:productionLabel]
@@ -431,7 +507,12 @@ module ShEx
431
507
  (input[:tripleExpression] ||= []) << expression if expression
432
508
  end
433
509
 
434
- # [41] bracketedTripleExpr ::= '(' oneOfTripleExpr ')' cardinality? annotation* semanticActions
510
+ # [43a] productionLabel ::= '$' (iri | blankNode)
511
+ production(:productionLabel) do |input, data, callback|
512
+ input[:productionLabel] = data[:iri] || data[:blankNode]
513
+ end
514
+
515
+ # [44] bracketedTripleExpr ::= '(' oneOfTripleExpr ')' cardinality? annotation* semanticActions
435
516
  production(:bracketedTripleExpr) do |input, data, callback|
436
517
  # XXX cardinality? annotation* semanticActions
437
518
  case expression = data[:tripleExpression]
@@ -451,18 +532,13 @@ module ShEx
451
532
  input[:tripleExpression] = expression
452
533
  end
453
534
 
454
- # [42] productionLabel ::= '$' (iri | blankNode)
455
- production(:productionLabel) do |input, data, callback|
456
- input[:productionLabel] = data[:iri] || data[:blankNode]
457
- end
458
-
459
- # [43] tripleConstraint ::= senseFlags? predicate shapeExpression cardinality? annotation* semanticActions
535
+ # [45] tripleConstraint ::= senseFlags? predicate shapeExpression cardinality? annotation* semanticActions
460
536
  production(:tripleConstraint) do |input, data, callback|
461
537
  cardinality = data.fetch(:cardinality, {})
462
538
  attrs = [
463
539
  (:inverse if data[:inverse] || data[:not]),
464
540
  [:predicate, Array(data[:predicate]).first],
465
- data[:shapeExpression],
541
+ Array(data[:shapeExpression]).first,
466
542
  ([:min, cardinality[:min]] if cardinality[:min]),
467
543
  ([:max, cardinality[:max]] if cardinality[:max])
468
544
  ].compact
@@ -472,71 +548,146 @@ module ShEx
472
548
  input[:tripleExpression] = Algebra::TripleConstraint.new(*attrs, {}) unless attrs.empty?
473
549
  end
474
550
 
475
- # [44] cardinality ::= '*' | '+' | '?' | REPEAT_RANGE
476
- # [45] senseFlags ::= '^'
477
- # [46] valueSet ::= '[' valueSetValue* ']'
551
+ # [46] cardinality ::= '*' | '+' | '?' | REPEAT_RANGE
552
+ # [47] senseFlags ::= '^'
553
+ # [48] valueSet ::= '[' valueSetValue* ']'
478
554
 
479
- # [47] valueSetValue ::= iriRange | literal
555
+ # [49] valueSetValue ::= iriRange | literalRange | languageRange | '.' exclusion+
480
556
  production(:valueSetValue) do |input, data, callback|
481
- (input[:valueSetValue] ||= []) << Algebra::Value.new(data[:iriRange] || data[:literal])
557
+ range = data[:iriRange] || data[:literalRange] || data[:languageRange]
558
+ if !range
559
+ # All exclusions must be consistent IRI/Literal/Language
560
+ case data[:exclusion].first
561
+ when Algebra::IriStem, RDF::URI
562
+ unless data[:exclusion].all? {|e| e.is_a?(Algebra::IriStem) || e.is_a?(RDF::URI)}
563
+ error(nil, "Exclusions must all be IRI type")
564
+ end
565
+ range = Algebra::IriStemRange.new(:wildcard, data[:exclusion].unshift(:exclusions))
566
+ when Algebra::LiteralStem, RDF::Literal
567
+ unless data[:exclusion].all? {|e| e.is_a?(Algebra::LiteralStem) || e.is_a?(RDF::Literal)}
568
+ error(nil, "Exclusions must all be Literal type")
569
+ end
570
+ range = Algebra::LiteralStemRange.new(:wildcard, data[:exclusion].unshift(:exclusions))
571
+ else
572
+ unless data[:exclusion].all? {|e| e.is_a?(Algebra::LanguageStem) || e.is_a?(String)}
573
+ error(nil, "Exclusions must all be Language type")
574
+ end
575
+ range = Algebra::LanguageStemRange.new(:wildcard, data[:exclusion].unshift(:exclusions))
576
+ end
577
+ end
578
+ (input[:valueSetValue] ||= []) << Algebra::Value.new(range)
579
+ end
580
+
581
+ # [50] exclusion ::= '-' (iri | literal | LANGTAG) '~'?
582
+ production(:exclusion) do |input, data, callback|
583
+ (input[:exclusion] ||= []) << if data[:pattern]
584
+ case
585
+ when data[:iri] then Algebra::IriStem.new(data[:iri])
586
+ when data[:literal] then Algebra::LiteralStem.new(data[:literal])
587
+ when data[:language] then Algebra::LanguageStem.new(data[:language])
588
+ end
589
+ else
590
+ data[:iri] || data[:literal] || data[:language]
591
+ end
482
592
  end
483
593
 
484
- # [48] iriRange ::= iri ('~' exclusion*)? | '.' exclusion+
594
+ # [51] iriRange ::= iri ('~' iriExclusion*)?
485
595
  production(:iriRange) do |input, data, callback|
486
596
  exclusions = data[:exclusion].unshift(:exclusions) if data[:exclusion]
487
597
  input[:iriRange] = if data[:pattern] && exclusions
488
- Algebra::StemRange.new(data[:iri], exclusions)
598
+ Algebra::IriStemRange.new(data[:iri], exclusions)
489
599
  elsif data[:pattern]
490
- Algebra::Stem.new(data[:iri])
600
+ Algebra::IriStem.new(data[:iri])
491
601
  elsif data[:dot]
492
- Algebra::StemRange.new(:wildcard, exclusions)
602
+ Algebra::IriStemRange.new(:wildcard, exclusions)
493
603
  else
494
604
  data[:iri]
495
605
  end
496
606
  end
497
607
 
498
- # [49] exclusion ::= '-' iri '~'?
499
- production(:exclusion) do |input, data, callback|
500
- (input[:exclusion] ||= []) << (data[:pattern] ? Algebra::Stem.new(data[:iri]) : data[:iri])
608
+ # [52] iriExclusion ::= '-' iri '~'?
609
+ production(:iriExclusion) do |input, data, callback|
610
+ val = data[:iri]
611
+ (input[:exclusion] ||= []) << (data[:pattern] ? Algebra::IriStem.new(val) : val)
612
+ end
613
+
614
+ # [53] literalRange ::= literal ('~' literalExclusion*)?
615
+ production(:literalRange) do |input, data, callback|
616
+ exclusions = data[:exclusion].unshift(:exclusions) if data[:exclusion]
617
+ input[:literalRange] = if data[:pattern] && exclusions
618
+ Algebra::LiteralStemRange.new(data[:literal], exclusions)
619
+ elsif data[:pattern]
620
+ Algebra::LiteralStem.new(data[:literal])
621
+ elsif data[:dot]
622
+ Algebra::LiteralStemRange.new(:wildcard, exclusions)
623
+ else
624
+ data[:literal]
625
+ end
626
+ end
627
+
628
+ # [54] literalExclusion ::= '-' literal '~'?
629
+ production(:literalExclusion) do |input, data, callback|
630
+ val = data[:literal]
631
+ (input[:exclusion] ||= []) << (data[:pattern] ? Algebra::LiteralStem.new(val) : val)
632
+ end
633
+
634
+ # [55] languageRange ::= LANGTAG ('~' languageExclusion*)?
635
+ production(:languageRange) do |input, data, callback|
636
+ exclusions = data[:exclusion].unshift(:exclusions) if data[:exclusion]
637
+ input[:languageRange] = if data[:pattern] && exclusions
638
+ Algebra::LanguageStemRange.new(data[:language], exclusions)
639
+ elsif data[:pattern]
640
+ Algebra::LanguageStem.new(data[:language])
641
+ elsif data[:dot]
642
+ Algebra::LanguageStemRange.new(:wildcard, exclusions)
643
+ else
644
+ Algebra::Language.new(data[:language])
645
+ end
646
+ end
647
+
648
+ # [56] languageExclusion ::= '-' literal '~'?
649
+ production(:languageExclusion) do |input, data, callback|
650
+ val = data[:language]
651
+ (input[:exclusion] ||= []) << (data[:pattern] ? Algebra::LanguageStem.new(val) : val)
501
652
  end
502
653
 
503
- # [50] include ::= '&' shapeLabel
654
+ # [57] include ::= '&' shapeLabel
504
655
  production(:include) do |input, data, callback|
505
656
  input[:tripleExpression] = data[:shapeLabel].first
506
657
  end
507
658
 
508
- # [51] annotation ::= '//' predicate (iri | literal)
659
+ # [58] annotation ::= '//' predicate (iri | literal)
509
660
  production(:annotation) do |input, data, callback|
510
661
  annotation = Algebra::Annotation.new([:predicate, data[:predicate].first], (data[:iri] || data[:literal]))
511
662
  (input[:annotation] ||= []) << annotation
512
663
  end
513
664
 
514
- # [52] semanticActions ::= codeDecl*
665
+ # [59] semanticActions ::= codeDecl*
515
666
 
516
- # [53] codeDecl ::= '%' iri (CODE | "%")
667
+ # [60] codeDecl ::= '%' iri (CODE | "%")
517
668
  production(:codeDecl) do |input, data, callback|
518
669
  (input[:codeDecl] ||= []) << Algebra::SemAct.new(*[data[:iri], data[:code]].compact, {})
519
670
  end
520
671
 
521
672
  # [13t] literal ::= rdfLiteral | numericLiteral | booleanLiteral
522
673
 
523
- # [54] predicate ::= iri | RDF_TYPE
674
+ # [61] predicate ::= iri | RDF_TYPE
524
675
  production(:predicate) do |input, data, callback|
525
676
  (input[:predicate] ||= []) << data[:iri]
526
677
  end
527
678
 
528
- # [55] datatype ::= iri
679
+ # [62] datatype ::= iri
529
680
  production(:datatype) do |input, data, callback|
530
681
  input[:datatype] = data[:iri]
531
682
  end
532
683
 
533
- # [56] shapeLabel ::= iri | blankNode
684
+ # [63] shapeLabel ::= iri | blankNode
534
685
  production(:shapeLabel) do |input, data, callback|
535
686
  (input[:shapeLabel] ||= []) << (data[:iri] || data[:blankNode])
536
687
  end
537
688
 
538
689
  # [16t] numericLiteral ::= INTEGER | DECIMAL | DOUBLE
539
- # [129s] rdfLiteral ::= string (LANGTAG | '^^' datatype)?
690
+ # [129s] rdfLiteral ::= langString | string ('^^' datatype)?
540
691
  production(:rdfLiteral) do |input, data, callback|
541
692
  input[:literal] = literal(data[:string], data)
542
693
  end
@@ -544,8 +695,10 @@ module ShEx
544
695
  # [134s] booleanLiteral ::= 'true' | 'false'
545
696
  # [135s] string ::= STRING_LITERAL1 | STRING_LITERAL_LONG1
546
697
  # | STRING_LITERAL2 | STRING_LITERAL_LONG2
698
+ # [66] langString ::= LANG_STRING_LITERAL1 | LANG_STRING_LITERAL_LONG1
699
+ # | LANG_STRING_LITERAL2 | LANG_STRING_LITERAL_LONG2
547
700
  # [136s] iri ::= IRIREF | prefixedName
548
- # [137s] prefixedName ::= PNAME_LN | PNAME_NS
701
+ # [1372] prefixedName ::= PNAME_LN | PNAME_NS
549
702
  # [138s] blankNode ::= BLANK_NODE_LABEL
550
703
 
551
704
  ##
@@ -577,7 +730,7 @@ module ShEx
577
730
  # @raise [ShEx::NotSatisfied] if not satisfied
578
731
  # @raise [ShEx::ParseError] when a syntax error is detected
579
732
  # @raise [ShEx::StructureError, ArgumentError] on structural problems with schema
580
- def initialize(input = nil, options = {}, &block)
733
+ def initialize(input = nil, **options, &block)
581
734
  @input = case input
582
735
  when IO, StringIO then input.read
583
736
  else input.to_s.dup
@@ -622,13 +775,16 @@ module ShEx
622
775
  # @return [ShEx::Algebra::Schema] The executable parsed expression.
623
776
  # @raise [ShEx::ParseError] when a syntax error is detected
624
777
  # @raise [ShEx::StructureError, ArgumentError] on structural problems with schema
625
- # @see http://www.w3.org/TR/sparql11-query/#sparqlAlgebra
626
- # @see http://axel.deri.ie/sparqltutorial/ESWC2007_SPARQL_Tutorial_unit2b.pdf
778
+ # @see https://www.w3.org/TR/sparql11-query/#sparqlAlgebra
779
+ # @see https://axel.deri.ie/sparqltutorial/ESWC2007_SPARQL_Tutorial_unit2b.pdf
627
780
  def parse(prod = START)
628
- ll1_parse(@input, prod.to_sym, @options.merge(branch: BRANCH,
629
- first: FIRST,
630
- follow: FOLLOW,
631
- whitespace: WS)
781
+ ll1_parse(@input,
782
+ prod.to_sym,
783
+ branch: BRANCH,
784
+ first: FIRST,
785
+ follow: FOLLOW,
786
+ whitespace: WS,
787
+ **@options
632
788
  ) do |context, *data|
633
789
  case context
634
790
  when :trace
@@ -638,7 +794,7 @@ module ShEx
638
794
  when 0
639
795
  log_error(*args, depth: depth, lineno: lineno)
640
796
  when 1
641
- log_warning(*args, depth: depth, lineno: lineno)
797
+ log_warn(*args, depth: depth, lineno: lineno)
642
798
  when 2
643
799
  log_info(*args, depth: depth, lineno: lineno)
644
800
  else
@@ -776,7 +932,7 @@ module ShEx
776
932
  end
777
933
 
778
934
  # Create a literal
779
- def literal(value, options = {})
935
+ def literal(value, **options)
780
936
  options = options.dup
781
937
  # Internal representation is to not use xsd:string, although it could arguably go the other way.
782
938
  options.delete(:datatype) if options[:datatype] == RDF::XSD.string