shex 0.6.2 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/shex/parser.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  require 'ebnf'
3
- require 'ebnf/ll1/parser'
3
+ require 'ebnf/peg/parser'
4
4
  require 'shex/meta'
5
5
 
6
6
  module ShEx
@@ -10,9 +10,8 @@ module ShEx
10
10
  # @see https://www.w3.org/2005/01/yacker/uploads/ShEx3?lang=perl&markup=html#productions
11
11
  # @see https://en.wikipedia.org/wiki/LR_parser
12
12
  class Parser
13
- include ShEx::Meta
14
13
  include ShEx::Terminals
15
- include EBNF::LL1::Parser
14
+ include EBNF::PEG::Parser
16
15
  include RDF::Util::Logger
17
16
 
18
17
  ##
@@ -27,679 +26,798 @@ module ShEx
27
26
  # @return [String]
28
27
  attr_accessor :input
29
28
 
30
- ##
31
- # The current input tokens being processed.
32
- #
33
- # @return [Array<Token>]
34
- attr_reader :tokens
35
-
36
29
  ##
37
30
  # The internal representation of the result using hierarchy of RDF objects and ShEx::Operator
38
31
  # objects.
39
32
  # @return [Array]
40
- # @see https://www.rubydoc.info/github/ruby-rdf/sparql/SPARQL/Algebra
33
+ # @see https://ruby-rdf.github.io/sparql/SPARQL/Algebra
41
34
  attr_accessor :result
42
35
 
43
36
  # Terminals passed to lexer. Order matters!
44
- terminal(:CODE, CODE, unescape: true) do |prod, token, input|
37
+ terminal(:CODE, CODE, unescape: true) do |value|
45
38
  # { foo %}
46
39
  # Keep surrounding whitespace for now
47
- input[:code] = token.value[1..-2].sub(/%\s*$/, '') # Drop {} and %
40
+ value[1..-2].sub(/%\s*$/, '') # Drop {} and %
48
41
  end
49
- terminal(:REPEAT_RANGE, REPEAT_RANGE) do |prod, token, input|
50
- card = token.value[1..-2].split(',').map {|v| v =~ /^\d+$/ ? v.to_i : v}
51
- card[1] = token.value.include?(',') ? '*' : card[0] if card.length == 1
52
- input[:cardinality] = {min: card[0], max: card[1]}
42
+ terminal(:REPEAT_RANGE, REPEAT_RANGE) do |value|
43
+ card = value[1..-2].split(',').map {|v| v =~ /^\d+$/ ? v.to_i : v}
44
+ card[1] = value.include?(',') ? '*' : card[0] if card.length == 1
45
+ {min: card[0], max: card[1]}
53
46
  end
54
- terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL) do |prod, token, input|
55
- input[:blankNode] = bnode(token.value[2..-1])
47
+ terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL) do |value|
48
+ bnode(value[2..-1])
56
49
  end
57
- terminal(:IRIREF, IRIREF, unescape: true) do |prod, token, input|
50
+ terminal(:IRIREF, IRIREF, unescape: true) do |value|
58
51
  begin
59
- input[:iri] = iri(token.value[1..-2])
52
+ iri(value[1..-2])
60
53
  rescue ArgumentError => e
61
54
  raise Error, e.message
62
55
  end
63
56
  end
64
- terminal(:DOUBLE, DOUBLE) do |prod, token, input|
57
+ terminal(:DOUBLE, DOUBLE) do |value|
65
58
  # Note that a Turtle Double may begin with a '.[eE]', so tack on a leading
66
59
  # zero if necessary
67
- value = token.value.sub(/\.([eE])/, '.0\1')
68
- input[:literal] = literal(value, datatype: RDF::XSD.double)
60
+ value = value.sub(/\.([eE])/, '.0\1')
61
+ literal(value, datatype: RDF::XSD.double)
69
62
  end
70
- terminal(:DECIMAL, DECIMAL) do |prod, token, input|
63
+ terminal(:DECIMAL, DECIMAL) do |value|
71
64
  # Note that a Turtle Decimal may begin with a '.', so tack on a leading
72
65
  # zero if necessary
73
- value = token.value
74
- #value = "0#{token.value}" if token.value[0,1] == "."
75
- input[:literal] = literal(value, datatype: RDF::XSD.decimal)
66
+ literal(value, datatype: RDF::XSD.decimal)
76
67
  end
77
- terminal(:INTEGER, INTEGER) do |prod, token, input|
78
- input[:literal] = literal(token.value, datatype: RDF::XSD.integer)
68
+ terminal(:INTEGER, INTEGER) do |value|
69
+ literal(value, datatype: RDF::XSD.integer)
79
70
  end
80
- terminal(:PNAME_LN, PNAME_LN, unescape: true) do |prod, token, input|
81
- prefix, suffix = token.value.split(":", 2)
82
- input[:iri] = ns(prefix, suffix)
83
- error(nil, "Compact IRI missing prefix definition: #{token.value}", production: :PNAME_LN) unless prefix(prefix)
71
+ terminal(:PNAME_LN, PNAME_LN, unescape: true) do |value|
72
+ prefix, suffix = value.split(":", 2)
73
+ error(nil, "Compact IRI missing prefix definition: #{prefix}", production: :PNAME_LN) unless prefix(prefix)
74
+ ns(prefix, suffix)
84
75
  end
85
- terminal(:PNAME_NS, PNAME_NS) do |prod, token, input|
86
- prefix = token.value[0..-2]
87
-
88
- input[:iri] = ns(prefix, nil)
89
- input[:prefix] = prefix && prefix.to_sym
76
+ terminal(:PNAME_NS, PNAME_NS) do |value|
77
+ value[0..-2]
90
78
  end
91
- terminal(:ATPNAME_LN, ATPNAME_LN, unescape: true) do |prod, token, input|
92
- prefix, suffix = token.value.split(":", 2)
79
+ terminal(:ATPNAME_LN, ATPNAME_LN, unescape: true) do |value, parent_prod|
80
+ prefix, suffix = value.split(":", 2)
93
81
  prefix.sub!(/^@#{WS}*/, '')
94
- input[:shapeLabel] = ns(prefix, suffix)
95
- error(nil, "Compact IRI missing prefix definition: #{token.value}", production: :ATPNAME_LN) unless input[:shapeLabel].absolute?
82
+ ns(prefix, suffix)
96
83
  end
97
- terminal(:ATPNAME_NS, ATPNAME_NS) do |prod, token, input|
98
- prefix = token.value[0..-2]
84
+ terminal(:ATPNAME_NS, ATPNAME_NS) do |value|
85
+ prefix = value[0..-2]
99
86
  prefix.sub!(/^@\s*/, '')
100
87
 
101
- input[:shapeLabel] = ns(prefix, nil)
102
- end
103
- terminal(:LANGTAG, LANGTAG) do |prod, token, input|
104
- input[:language] = token.value[1..-1]
105
- end
106
- terminal(:LANG_STRING_LITERAL_LONG1, LANG_STRING_LITERAL_LONG1, unescape: true) do |prod, token, input|
107
- input[:string], _, input[:language] = token.value[3..-1].rpartition("'''@")
108
- end
109
- terminal(:LANG_STRING_LITERAL_LONG2, LANG_STRING_LITERAL_LONG2, unescape: true) do |prod, token, input|
110
- input[:string], _, input[:language] = token.value[3..-1].rpartition('"""@')
111
- end
112
- terminal(:LANG_STRING_LITERAL1, LANG_STRING_LITERAL1, unescape: true) do |prod, token, input|
113
- input[:string], _, input[:language] = token.value[1..-1].rpartition("'@")
114
- end
115
- terminal(:LANG_STRING_LITERAL2, LANG_STRING_LITERAL2, unescape: true) do |prod, token, input|
116
- input[:string], _, input[:language] = token.value[1..-1].rpartition('"@')
117
- end
118
- terminal(:STRING_LITERAL_LONG1, STRING_LITERAL_LONG1, unescape: true) do |prod, token, input|
119
- input[:string] = token.value[3..-4]
120
- end
121
- terminal(:STRING_LITERAL_LONG2, STRING_LITERAL_LONG2, unescape: true) do |prod, token, input|
122
- input[:string] = token.value[3..-4]
123
- end
124
- terminal(:STRING_LITERAL1, STRING_LITERAL1, unescape: true) do |prod, token, input|
125
- input[:string] = token.value[1..-2]
126
- end
127
- terminal(:STRING_LITERAL2, STRING_LITERAL2, unescape: true) do |prod, token, input|
128
- input[:string] = token.value[1..-2]
129
- end
130
- terminal(:REGEXP, REGEXP) do |prod, token, input|
131
- input[:regexp] = token.value
132
- end
133
- terminal(:RDF_TYPE, RDF_TYPE) do |prod, token, input|
134
- input[:iri] = (a = RDF.type.dup; a.lexical = 'a'; a)
135
- end
136
-
137
- # String terminals
138
- terminal(nil, STR_EXPR, map: STR_MAP) do |prod, token, input|
139
- case token.value
140
- when '*' then input[:cardinality] = {min: 0, max: "*"}
141
- when '+' then input[:cardinality] = {min: 1, max: "*"}
142
- when '?' then input[:cardinality] = {min: 0, max: 1}
143
- when '!' then input[:not] = token.value
144
- when '^' then input[:inverse] = token.value
145
- when '.' then input[:dot] = token.value
146
- when 'true', 'false' then input[:literal] = RDF::Literal::Boolean.new(token.value)
147
- when '~' then input[:pattern] = token.value
148
- when 'BNODE', 'IRI',
149
- 'NONLITERAL' then input[:nonLiteralKind] = token.value.downcase.to_sym
150
- when 'CLOSED' then input[:closed] = token.value.downcase.to_sym
151
- when 'EXTERNAL' then input[:external] = token.value.downcase.to_sym
152
- when 'FRACTIONDIGITS',
153
- 'TOTALDIGITS' then input[:numericLength] = token.value.downcase.to_sym
154
- when 'LITERAL' then input[:shapeAtomLiteral] = token.value.downcase.to_sym
155
- when 'LENGTH',
156
- 'MINLENGTH',
157
- 'MAXLENGTH' then input[:stringLength] = token.value.downcase.to_sym
158
- when 'MININCLUSIVE',
159
- 'MINEXCLUSIVE',
160
- 'MAXINCLUSIVE',
161
- 'MAXEXCLUSIVE' then input[:numericRange] = token.value.downcase.to_sym
162
- when 'NOT' then input[:not] = token.value.downcase.to_sym
163
- when 'START' then input[:start] = token.value.downcase.to_sym
164
- else
165
- #raise "Unexpected MC terminal: #{token.inspect}"
166
- end
88
+ ns(prefix, nil)
89
+ end
90
+ terminal(:LANGTAG, LANGTAG) do |value|
91
+ value[1..-1]
92
+ end
93
+ terminal(:LANG_STRING_LITERAL_LONG1, LANG_STRING_LITERAL_LONG1, unescape: true) do |value|
94
+ s, _, l = value[3..-1].rpartition("'''@")
95
+ [s, language: l]
96
+ end
97
+ terminal(:LANG_STRING_LITERAL_LONG2, LANG_STRING_LITERAL_LONG2, unescape: true) do |value|
98
+ s, _, l = value[3..-1].rpartition('"""@')
99
+ [s, language: l]
100
+ end
101
+ terminal(:LANG_STRING_LITERAL1, LANG_STRING_LITERAL1, unescape: true) do |value|
102
+ s, _, l = value[1..-1].rpartition("'@")
103
+ [s, language: l]
104
+ end
105
+ terminal(:LANG_STRING_LITERAL2, LANG_STRING_LITERAL2, unescape: true) do |value|
106
+ s, _, l = value[1..-1].rpartition('"@')
107
+ [s, language: l]
108
+ end
109
+ terminal(:STRING_LITERAL_LONG1, STRING_LITERAL_LONG1, unescape: true) do |value|
110
+ value[3..-4]
111
+ end
112
+ terminal(:STRING_LITERAL_LONG2, STRING_LITERAL_LONG2, unescape: true) do |value|
113
+ value[3..-4]
114
+ end
115
+ terminal(:STRING_LITERAL1, STRING_LITERAL1, unescape: true) do |value|
116
+ value[1..-2]
117
+ end
118
+ terminal(:STRING_LITERAL2, STRING_LITERAL2, unescape: true) do |value|
119
+ value[1..-2]
120
+ end
121
+ terminal(:REGEXP, REGEXP)
122
+ terminal(:RDF_TYPE, RDF_TYPE) do |value|
123
+ (a = RDF.type.dup; a.lexical = 'a'; a)
167
124
  end
168
125
 
169
126
  # Productions
170
127
  # [1] shexDoc ::= directive* ((notStartAction | startActions) statement*)?
171
- production(:shexDoc) do |input, data, callback|
172
- data[:start] = data[:start] if data[:start]
173
-
128
+ start_production(:shexDoc, as_hash: true, clear_packrat: true)
129
+ production(:shexDoc) do |value|
174
130
  expressions = []
175
- expressions << [:base, data[:baseDecl]] if data[:baseDecl]
176
- expressions << [:prefix, data[:prefixDecl]] if data[:prefixDecl]
177
- expressions += Array(data[:codeDecl])
178
- expressions << Algebra::Start.new(data[:start]) if data[:start]
179
- expressions << data[:shapes].unshift(:shapes) if data[:shapes]
131
+ prefixes = []
132
+
133
+ # directive *
134
+ expressions += value[:_shexDoc_1]
135
+
136
+ # ((notStartAction | startActions) statement*)?
137
+ if value = value[:_shexDoc_2]
138
+ # These may start with codeDecl or start. otherwise, they are all shapes
139
+ expressions += Array(value[:_shexDoc_4])
140
+ expressions += Array(value[:_shexDoc_5])
141
+ end
142
+
143
+ # Extract declarations, startacts and start from expressions
144
+ declarations, expressions = expressions.partition {|op| op.is_a?(Array)}
145
+ prefixes, bases = declarations.partition {|op| op.first == :prefix}
146
+ semacts, expressions = expressions.partition {|op| op.is_a?(Algebra::SemAct)}
147
+ starts, expressions = expressions.partition {|op| op.is_a?(Algebra::Start)}
180
148
 
181
- input[:schema] = Algebra::Schema.new(*expressions, **options)
182
- self
149
+ operands = []
150
+ operands += bases unless bases.empty?
151
+ unless prefixes.empty?
152
+ operands << [:prefix, prefixes.map {|p| p[1,2]}]
153
+ end
154
+ operands += semacts
155
+ operands += starts
156
+ operands << expressions.unshift(:shapes) unless expressions.empty?
157
+ Algebra::Schema.new(*operands, **self.options)
183
158
  end
159
+ start_production(:_shexDoc_2, as_hash: true)
160
+ start_production(:_shexDoc_3, as_hash: true)
184
161
 
185
- # [2] directive ::= baseDecl | prefixDecl
162
+ # [2] directive ::= baseDecl | prefixDecl | importDecl
186
163
 
187
164
  # [3] baseDecl ::= "BASE" IRIREF
188
- production(:baseDecl) do |input, data, callback|
189
- input[:baseDecl] = self.base_uri = iri(data[:iri])
165
+ start_production(:baseDecl, as_hash: true, insensitive_strings: :lower)
166
+ production(:baseDecl) do |value|
167
+ self.base_uri = iri(value[:IRIREF])
168
+ [:base, self.base_uri]
190
169
  end
191
170
 
192
171
  # [4] prefixDecl ::= "PREFIX" PNAME_NS IRIREF
193
- production(:prefixDecl) do |input, data, callback|
194
- pfx = data[:prefix]
195
- self.prefix(pfx, data[:iri])
196
- (input[:prefixDecl] ||= []) << [pfx.to_s, data[:iri]]
172
+ start_production(:prefixDecl, as_hash: true, insensitive_strings: :lower)
173
+ production(:prefixDecl) do |value|
174
+ pfx = value[:PNAME_NS]
175
+ prefix(pfx, value[:IRIREF])
176
+ [:prefix, pfx.to_s, value[:IRIREF]]
177
+ end
178
+
179
+ # [4] importDecl ::= "IMPORT" IRIREF
180
+ start_production(:importDecl, as_hash: true, insensitive_strings: :lower)
181
+ production(:importDecl) do |value|
182
+ Algebra::Import.new(value[:IRIREF], **self.options)
197
183
  end
198
184
 
199
185
  # [5] notStartAction ::= start | shapeExprDecl
200
- # [6] start ::= "start" '=' shapeExpression
201
- production(:start) do |input, data, callback|
202
- input[:start] = Array(data[:shapeExpression]).first || data[:shape]
186
+ # [6] start ::= "START" '=' inlineShapeExpression
187
+ start_production(:start, as_hash: true, insensitive_strings: :lower)
188
+ production(:start) do |value|
189
+ Algebra::Start.new(value[:inlineShapeExpression], **self.options)
203
190
  end
191
+
204
192
  # [7] startActions ::= codeDecl+
205
193
 
206
194
  # [8] statement ::= directive | notStartAction
207
195
 
208
- # [9] shapeExprDecl ::= shapeLabel (shapeExpression|"EXTERNAL")
209
- production(:shapeExprDecl) do |input, data, callback|
210
- id = Array(data[:shapeLabel]).first
211
- expression = case Array(data[:shapeExpression]).first
196
+ # [9] shapeExprDecl ::= shapeExprLabel (shapeExpression | "EXTERNAL")
197
+ start_production(:shapeExprDecl, as_hash: true)
198
+ production(:shapeExprDecl) do |value|
199
+ id = value[:shapeExprLabel]
200
+ expression = case value[:_shapeExprDecl_1]
212
201
  when Algebra::NodeConstraint, Algebra::Or, Algebra::And, Algebra::Not, Algebra::Shape, RDF::Resource
213
- Array(data[:shapeExpression]).first
202
+ value[:_shapeExprDecl_1]
203
+ when /external/i
204
+ Algebra::External.new(**options)
214
205
  else
215
- data[:external] ? Algebra::External.new() : Algebra::Shape.new()
206
+ Algebra::Shape.new(**options)
216
207
  end
217
208
  expression.id = id if id && !expression.is_a?(RDF::Resource)
218
209
 
219
- (input[:shapes] ||= []) << expression
210
+ expression
220
211
  end
221
212
 
222
- # [10] shapeExpression ::= shapeAtomNoRef shapeOr?
223
- # | "NOT" (shapeAtomNoRef | shapeRef) shapeOr?
224
- # | shapeRef shapeOr
225
- production(:shapeExpression) do |input, data, callback|
226
- expression = Array(data[:shapeExpression]).first || data[:shape]
227
- expression = Algebra::Not.new(expression) if data[:not]
228
- (input[:shapeExpression] ||= []) << expression
213
+ # [10] shapeExpression ::= shapeOr
214
+ production(:shapeExpression) do |value|
215
+ value.first[:shapeOr]
229
216
  end
230
217
 
231
218
  # [11] inlineShapeExpression ::= inlineShapeOr
232
- # [12] shapeOr ::= shapeOrA | shapeOrB shapeOrA?
233
- # [12a] shapeOrA ::= ("OR" shapeAnd)+
234
- start_production(:shapeOrA) do |input, data, callback|
235
- data[:shapeExpression] = input.delete(:shapeExpression)
236
- data[:shapeExpression] ||= Array(input.delete(:shape)) if input[:shape]
237
- data[:shapeExpression] = [Algebra::Not.new(*data[:shapeExpression])] if input.delete(:not)
219
+ production(:inlineShapeExpression) do |value|
220
+ value.first[:inlineShapeOr]
238
221
  end
239
- production(:shapeOrA) do |input, data, callback|
240
- shape_or(input, data)
241
- end
242
- # [12b] shapeOrB ::= ("AND" shapeNot)+
243
- start_production(:shapeOrB) do |input, data, callback|
244
- data[:shapeExpression] = input.delete(:shapeExpression)
245
- data[:shapeExpression] ||= Array(input.delete(:shape)) if input[:shape]
246
- data[:shapeExpression] = [Algebra::Not.new(*data[:shapeExpression])] if input.delete(:not)
247
- end
248
- production(:shapeOrB) do |input, data, callback|
249
- shape_and(input, data)
222
+
223
+ # [12] shapeOr ::= shapeAnd ("OR" shapeAnd)*
224
+ start_production(:shapeOr, as_hash: true)
225
+ production(:shapeOr) do |value|
226
+ if value[:_shapeOr_1].empty?
227
+ value[:shapeAnd]
228
+ else
229
+ lhs = value[:_shapeOr_1].map {|v| v.last[:shapeAnd]}
230
+ Algebra::Or.new(value[:shapeAnd], *lhs, **self.options)
231
+ end
250
232
  end
233
+ start_production(:_shapeOr_2, insensitive_strings: :lower)
251
234
 
252
235
  # [13] inlineShapeOr ::= inlineShapeAnd ("OR" inlineShapeAnd)*
253
- production(:inlineShapeOr) do |input, data, callback|
254
- shape_or(input, data)
255
- end
256
- def shape_or(input, data)
257
- input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
258
- expression = if Array(data[:shapeExpression]).length > 1
259
- Algebra::Or.new(*data[:shapeExpression])
236
+ start_production(:inlineShapeOr, as_hash: true)
237
+ production(:inlineShapeOr) do |value|
238
+ if value[:_inlineShapeOr_1].empty?
239
+ value[:inlineShapeAnd]
260
240
  else
261
- Array(data[:shapeExpression]).first
241
+ lhs = value[:_inlineShapeOr_1].map {|v| v.last[:inlineShapeAnd]}
242
+ Algebra::Or.new(value[:inlineShapeAnd], *lhs, **self.options)
262
243
  end
263
- (input[:shapeExpression] ||= []) << expression if expression
264
- rescue ArgumentError => e
265
- error(nil, "Argument Error on OR: #{e.message}")
266
244
  end
267
- private :shape_or
245
+ start_production(:_inlineShapeOr_2, insensitive_strings: :lower)
268
246
 
269
247
  # [14] shapeAnd ::= shapeNot ("AND" shapeNot)*
270
- production(:shapeAnd) do |input, data, callback|
271
- shape_and(input, data)
272
- end
273
- # [15] inlineShapeAnd ::= inlineShapeNot ("AND" inlineShapeNot)*
274
- production(:inlineShapeAnd) do |input, data, callback|
275
- shape_and(input, data)
276
- end
277
- def shape_and(input, data)
278
- input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
279
- expressions = Array(data[:shapeExpression]).inject([]) do |memo, expr|
280
- #memo.concat(expr.is_a?(Algebra::And) ? expr.operands : [expr])
281
- memo.concat([expr])
248
+ start_production(:shapeAnd, as_hash: true)
249
+ production(:shapeAnd) do |value|
250
+ if value[:_shapeAnd_1].empty?
251
+ value[:shapeNot]
252
+ else
253
+ lhs = value[:_shapeAnd_1].map {|v| v.last[:shapeNot]}
254
+ Algebra::And.new(value[:shapeNot], *lhs, **self.options)
282
255
  end
256
+ end
257
+ start_production(:_shapeAnd_2, insensitive_strings: :lower)
283
258
 
284
- expression = if expressions.length > 1
285
- Algebra::And.new(*expressions)
259
+ # [15] inlineShapeAnd ::= inlineShapeNot ("AND" inlineShapeNot)*
260
+ start_production(:inlineShapeAnd, as_hash: true)
261
+ production(:inlineShapeAnd) do |value|
262
+ if value[:_inlineShapeAnd_1].empty?
263
+ value[:inlineShapeNot]
286
264
  else
287
- expressions.first
265
+ lhs = value[:_inlineShapeAnd_1].map {|v| v.last[:inlineShapeNot]}
266
+ Algebra::And.new(value[:inlineShapeNot], *lhs, **self.options)
288
267
  end
289
- (input[:shapeExpression] ||= []) << expression if expression
290
- rescue ArgumentError => e
291
- error(nil, "Argument Error on AND: #{e.message}")
292
268
  end
293
- private :shape_and
269
+ start_production(:_inlineShapeAnd_2, insensitive_strings: :lower)
294
270
 
295
271
  # [16] shapeNot ::= "NOT"? shapeAtom
296
- production(:shapeNot) do |input, data, callback|
297
- shape_not(input, data)
272
+ start_production(:shapeNot, as_hash: true)
273
+ production(:shapeNot) do |value|
274
+ atom = value[:shapeAtom]
275
+ value[:_shapeNot_1] ? Algebra::Not.new(atom || Algebra::Shape.new(**options), **self.options) : atom
298
276
  end
277
+ start_production(:_shapeNot_1, insensitive_strings: :lower)
278
+
299
279
  # [17] inlineShapeNot ::= "NOT"? inlineShapeAtom
300
- production(:inlineShapeNot) do |input, data, callback|
301
- shape_not(input, data)
280
+ start_production(:inlineShapeNot, as_hash: true)
281
+ production(:inlineShapeNot) do |value|
282
+ atom = value[:inlineShapeAtom]
283
+ value[:_inlineShapeNot_1] ? Algebra::Not.new(atom || Algebra::Shape.new(**options), **self.options) : atom
302
284
  end
303
- def shape_not(input, data)
304
- input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
305
- expression = Array(data[:shapeExpression]).first
306
- expression = Algebra::Not.new(expression) if data[:not]
307
- #error(nil, "Expected an atom for NOT") unless expression
308
- (input[:shapeExpression] ||= []) << expression if expression
309
- end
310
- private :shape_not
285
+ start_production(:_inlineShapeNot_1, insensitive_strings: :lower)
311
286
 
312
- # [18] shapeAtom ::= nodeConstraint shapeOrRef?
313
- # | shapeOrRef
287
+ # [18] shapeAtom ::= nonLitNodeConstraint shapeOrRef?
288
+ # | litNodeConstraint
289
+ # | shapeOrRef nonLitNodeConstraint?
314
290
  # | "(" shapeExpression ")"
315
291
  # | '.' # no constraint
316
- production(:shapeAtom) do |input, data, callback|
317
- shape_atom(input, data)
292
+ production(:shapeAtom) do |value|
293
+ expressions = case
294
+ when value.is_a?(Algebra::Operator)
295
+ [value]
296
+ when value == '.' then []
297
+ when value[:nonLitNodeConstraint]
298
+ [value[:nonLitNodeConstraint], value[:_shapeAtom_4]].compact
299
+ when value[:shapeOrRef]
300
+ [value[:shapeOrRef], value[:_shapeAtom_5]].compact
301
+ when value[:_shapeAtom_3]
302
+ value[:_shapeAtom_3]
303
+ else []
304
+ end
305
+
306
+ case expressions.length
307
+ when 0 then nil
308
+ when 1 then expressions.first
309
+ else Algebra::And.new(*expressions, **self.options)
310
+ end
318
311
  end
312
+ start_production(:_shapeAtom_1, as_hash: true)
313
+ start_production(:_shapeAtom_2, as_hash: true)
314
+ production(:_shapeAtom_3) do |value|
315
+ value[1][:shapeExpression]
316
+ end
317
+
318
+ # [19] shapeAtomNoRef ::= nonLitNodeConstraint shapeOrRef?
319
+ # | litNodeConstraint
320
+ # | shapeDefinition nonLitNodeConstraint?
321
+ # | "(" shapeExpression ")"
322
+ # | '.' # no constraint
323
+ production(:shapeAtomNoRef) do |value|
324
+ expressions = case
325
+ when value.is_a?(Algebra::Operator)
326
+ [value]
327
+ when value == '.' then []
328
+ when value[:nonLitNodeConstraint]
329
+ [value[:nonLitNodeConstraint], value[:_shapeAtomNoRef_4]].compact
330
+ when value[:shapeDefinition]
331
+ [value[:shapeDefinition], value[:_shapeAtomNoRef_5]].compact
332
+ when value[:_shapeAtomNoRef_3]
333
+ value[:_shapeAtomNoRef_3]
334
+ else []
335
+ end
319
336
 
320
- # [19] shapeAtomNoRef ::= nodeConstraint shapeOrRef?
321
- # | shapeDefinition
322
- # | "(" shapeExpression ")"
323
- # | '.' # no constraint
324
- production(:shapeAtomNoRef) do |input, data, callback|
325
- shape_atom(input, data)
337
+ case expressions.length
338
+ when 0 then nil
339
+ when 1 then expressions.first
340
+ else Algebra::And.new(*expressions, **self.options)
341
+ end
342
+ end
343
+ start_production(:_shapeAtomNoRef_1, as_hash: true)
344
+ start_production(:_shapeAtomNoRef_2, as_hash: true)
345
+ production(:_shapeAtomNoRef_3) do |value|
346
+ value[1][:shapeExpression]
326
347
  end
327
348
 
328
- # [20] inlineShapeAtom ::= nodeConstraint inlineShapeOrRef?
329
- # | inlineShapeOrRef nodeConstraint?
349
+ # [20] inlineShapeAtom ::= nonLitNodeConstraint inlineShapeOrRef?
350
+ # | litNodeConstraint
351
+ # | inlineShapeOrRef nonLitNodeConstraint?
330
352
  # | "(" shapeExpression ")"
331
353
  # | '.' # no constraint
332
- production(:inlineShapeAtom) do |input, data, callback|
333
- shape_atom(input, data)
334
- end
335
- def shape_atom(input, data)
336
- constraint = data[:nodeConstraint]
337
- shape = data[:shapeOrRef] || Array(data[:shapeExpression]).first || data[:shape]
338
- input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
354
+ production(:inlineShapeAtom) do |value|
355
+ expressions = case
356
+ when value == '.' then []
357
+ when value.is_a?(Algebra::Operator)
358
+ [value]
359
+ when value[:nonLitNodeConstraint]
360
+ [value[:nonLitNodeConstraint], value[:_inlineShapeAtom_4]].compact
361
+ when value[:inlineShapeOrRef]
362
+ [value[:inlineShapeOrRef], value[:__inlineShapeAtom_5]].compact
363
+ when value[:_inlineShapeAtom_3]
364
+ value[:_inlineShapeAtom_3]
365
+ else []
366
+ end
339
367
 
340
- expression = [constraint, shape].compact
341
- expression = case expression.length
368
+ case expressions.length
342
369
  when 0 then nil
343
- when 1 then expression.first
344
- else Algebra::And.new(*expression)
370
+ when 1 then expressions.first
371
+ else Algebra::And.new(*expressions, **self.options)
345
372
  end
346
-
347
- (input[:shapeExpression] ||= []) << expression if expression
348
373
  end
349
- private :shape_atom
374
+ start_production(:_inlineShapeAtom_1, as_hash: true)
375
+ start_production(:_inlineShapeAtom_2, as_hash: true)
376
+ production(:_inlineShapeAtom_3) do |value|
377
+ value[1][:shapeExpression]
378
+ end
350
379
 
351
380
  # [21] shapeOrRef ::= shapeDefinition | shapeRef
352
- production(:shapeOrRef) do |input, data, callback|
353
- shape_or_ref(input, data)
354
- end
355
381
  # [22] inlineShapeOrRef ::= inlineShapeDefinition | shapeRef
356
- production(:inlineShapeOrRef) do |input, data, callback|
357
- shape_or_ref(input, data)
358
- end
359
- def shape_or_ref(input, data)
360
- input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
361
- input[:shapeOrRef] = data[:shape] if data[:shape]
362
- rescue ArgumentError => e
363
- error(nil, "Argument Error on ShapeOrRef: #{e.message}")
364
- end
365
- private :shape_or_ref
366
382
 
367
- # [23] shapeRef ::= ATPNAME_LN | ATPNAME_NS | '@' shapeLabel
368
- production(:shapeRef) do |input, data, callback|
369
- input[:shape] = Array(data[:shapeLabel]).first
383
+ # [23] shapeRef ::= ATPNAME_LN | ATPNAME_NS | '@' shapeExprLabel
384
+ production(:shapeRef) do |value|
385
+ value.is_a?(Array) ? value.last[:shapeExprLabel] : value
370
386
  end
371
387
 
372
388
  # [24] litNodeConstraint ::= "LITERAL" xsFacet*
373
389
  # | datatype xsFacet*
374
390
  # | valueSet xsFacet*
375
391
  # | numericFacet+
376
- production(:litNodeConstraint) do |input, data, callback|
377
- # Semantic validate (A Syntax error)
378
- case
379
- when data[:datatype] && data[:numericFacet]
380
- # Can only use a numeric facet on a numeric datatype
381
- l = RDF::Literal.new("1", datatype: data[:datatype])
382
- error(nil, "Numeric facet used on non-numeric datatype: #{data[:datatype]}", production: :nodeConstraint) unless l.is_a?(RDF::Literal::Numeric)
392
+ start_production(:_litNodeConstraint_1, as_hash: true, insensitive_strings: :lower)
393
+ production(:_litNodeConstraint_1) do |value|
394
+ # LITERAL" xsFacet*
395
+ facets = value[:_litNodeConstraint_5]
396
+ validate_facets(facets, :litNodeConstraint)
397
+ Algebra::NodeConstraint.new(:literal, *facets, **self.options)
398
+ end
399
+ start_production(:_litNodeConstraint_2, as_hash: true)
400
+ production(:_litNodeConstraint_2) do |value|
401
+ # datatype xsFacet*
402
+ facets = value[:_litNodeConstraint_6]
403
+ validate_facets(facets, :litNodeConstraint)
404
+
405
+ # Numeric Facet Constraints can only be used when datatype is derived from the set of SPARQL 1.1 Operand Data Types
406
+ l = RDF::Literal("0", datatype: value[:datatype])
407
+ facets.each do |f|
408
+ error(nil, "#{f.first} constraint may only be used once on a numeric datatype (#{value[:datatype]})", production: :litNodeConstraint) if
409
+ f.to_s.match(/digits|inclusive|exclusive/) &&
410
+ !l.is_a?(RDF::Literal::Numeric)
383
411
  end
384
412
 
385
- attrs = []
386
- attrs << [:datatype, data[:datatype]] if data [:datatype]
387
- attrs += Array(data[:shapeAtomLiteral])
388
- attrs += Array(data[:valueSetValue])
389
- attrs += Array(data[:numericFacet])
390
- attrs += Array(data[:stringFacet])
391
-
392
- input[:nodeConstraint] = Algebra::NodeConstraint.new(*attrs.compact)
413
+ attrs = [[:datatype, value[:datatype]]] + facets
414
+ Algebra::NodeConstraint.new(*attrs.compact, **self.options)
415
+ end
416
+ start_production(:_litNodeConstraint_3, as_hash: true)
417
+ production(:_litNodeConstraint_3) do |value|
418
+ # valueSet xsFacet*
419
+ facets = value[:_litNodeConstraint_7]
420
+ validate_facets(facets, :litNodeConstraint)
421
+ attrs = value[:valueSet]+ facets
422
+ Algebra::NodeConstraint.new(*attrs.compact, **self.options)
423
+ end
424
+ production(:_litNodeConstraint_4) do |value|
425
+ # numericFacet+
426
+ validate_facets(value, :litNodeConstraint)
427
+ Algebra::NodeConstraint.new(*value, **self.options)
393
428
  end
394
429
 
395
430
  # [25] nonLitNodeConstraint ::= nonLiteralKind stringFacet*
396
431
  # | stringFacet+
397
- production(:nonLitNodeConstraint) do |input, data, callback|
398
- # Semantic validate (A Syntax error)
399
-
400
- attrs = []
401
- attrs += Array(data[:nonLiteralKind])
402
- attrs += Array(data[:stringFacet])
403
-
404
- input[:nodeConstraint] = Algebra::NodeConstraint.new(*attrs.compact)
432
+ start_production(:_nonLitNodeConstraint_1, as_hash: true)
433
+ production(:_nonLitNodeConstraint_1) do |value|
434
+ # nonLiteralKind stringFacet*
435
+ facets = Array(value[:_nonLitNodeConstraint_3])
436
+ validate_facets(facets, :nonLitNodeConstraint)
437
+ attrs = Array(value[:nonLiteralKind]) + facets
438
+ Algebra::NodeConstraint.new(*attrs.compact, **self.options)
439
+ end
440
+ production(:_nonLitNodeConstraint_2) do |value|
441
+ # stringFacet+
442
+ validate_facets(value, :nonLitNodeConstraint)
443
+ Algebra::NodeConstraint.new(*value, **self.options)
444
+ end
445
+
446
+ def validate_facets(facets, prod)
447
+ facets.each do |facet|
448
+ if facets.count {|f| f.first == facet.first} > 1
449
+ error(nil, "#{facet.first} constraint may only be used once in a Node Constraint", production: prod)
450
+ end
451
+ end
405
452
  end
453
+ private :validate_facets
406
454
 
407
455
  # [26] nonLiteralKind ::= "IRI" | "BNODE" | "NONLITERAL"
456
+ start_production(:nonLiteralKind, insensitive_strings: :lower)
457
+ production(:nonLiteralKind) do |value|
458
+ value.to_sym
459
+ end
408
460
 
409
461
  # [27] xsFacet ::= stringFacet | numericFacet
410
462
  # [28] stringFacet ::= stringLength INTEGER
411
463
  # | REGEXP
412
- production(:stringFacet) do |input, data, callback|
413
- input[:stringFacet] ||= []
414
- input[:stringFacet] << if data[:stringLength]
415
- if input[:stringFacet].flatten.include?(data[:stringLength])
416
- error(nil, "#{data[:stringLength]} constraint may only be used once in a Node Constraint", production: :stringFacet)
417
- end
418
- [data[:stringLength], data[:literal]]
419
- elsif re = data[:regexp]
420
- unless re =~ %r(^/(.*)/([smix]*)$)
421
- error(nil, "#{re.inspect} regular expression must be in the form /pattern/flags?", production: :stringFacet)
464
+ production(:stringFacet) do |value|
465
+ if value.is_a?(Array) # stringLength
466
+ value
467
+ else
468
+ unless value =~ %r(^/(.*)/([smix]*)$)
469
+ error(nil, "#{value.inspect} regular expression must be in the form /pattern/flags?", production: :stringFacet)
422
470
  end
471
+
423
472
  flags = $2 unless $2.to_s.empty?
424
473
  pattern = $1.gsub('\\/', '/').gsub(UCHAR) do
425
474
  [($1 || $2).hex].pack('U*')
426
475
  end.force_encoding(Encoding::UTF_8)
427
476
 
428
477
  # Any other escaped character is a syntax error
429
- if pattern.match(%r([^\\]\\[^nrt/\\|\.?*+\[\]\(\){}$#x2D#x5B#x5D#x5E-]))
478
+ if pattern.match?(%r([^\\]\\[^nrt/\\|\.?*+\[\]\(\){}$#x2D#x5B#x5D#x5E-]))
430
479
  error(nil, "Regexp contains illegal escape: #{pattern.inspect}", production: :stringFacet)
431
480
  end
432
481
 
433
482
  [:pattern, pattern, flags].compact
434
483
  end
435
484
  end
485
+ start_production(:_stringFacet_1, as_hash: true)
486
+ production(:_stringFacet_1) do |value|
487
+ [value[:stringLength].to_sym, value[:INTEGER]]
488
+ end
436
489
 
437
490
  # [29] stringLength ::= "LENGTH" | "MINLENGTH" | "MAXLENGTH"
491
+ start_production(:stringLength, insensitive_strings: :lower)
438
492
 
439
- # [30] numericFacet ::= numericRange (numericLiteral | string '^^' datatype )
493
+ # [30] numericFacet ::= numericRange numericLiteral
440
494
  # | numericLength INTEGER
441
- production(:numericFacet) do |input, data, callback|
442
- input[:numericFacet] ||= []
443
- input[:numericFacet] << if data[:numericRange]
444
- literal = data[:literal] || literal(data[:string], datatype: data[:datatype])
445
- error(nil, "numericRange must use a numeric datatype: #{data[:datatype]}", production: :numericFacet) unless literal.is_a?(RDF::Literal::Numeric)
446
- [data[:numericRange], literal]
447
- elsif data[:numericLength]
448
- [data[:numericLength], data[:literal]]
449
- end
495
+ start_production(:_numericFacet_1, as_hash: true)
496
+ production(:_numericFacet_1) do |value|
497
+ [value[:numericRange].to_sym, value[:numericLiteral]]
498
+ end
499
+ start_production(:_numericFacet_2, as_hash: true)
500
+ production(:_numericFacet_2) do |value|
501
+ [value[:numericLength].to_sym, value[:INTEGER]]
450
502
  end
451
503
 
452
504
  # [31] numericRange ::= "MININCLUSIVE" | "MINEXCLUSIVE" | "MAXINCLUSIVE" | "MAXEXCLUSIVE"
505
+ start_production(:numericRange, insensitive_strings: :lower)
506
+
453
507
  # [32] numericLength ::= "TOTALDIGITS" | "FRACTIONDIGITS"
508
+ start_production(:numericLength, insensitive_strings: :lower)
454
509
 
455
510
  # [33] shapeDefinition ::= (includeSet | extraPropertySet | "CLOSED")* '{' tripleExpression? '}' annotation* semanticActions
456
- production(:shapeDefinition) do |input, data, callback|
457
- shape_definition(input, data)
511
+ start_production(:shapeDefinition, as_hash: true)
512
+ production(:shapeDefinition) do |value|
513
+ shape_definition(
514
+ value[:_shapeDefinition_1],
515
+ value[:_shapeDefinition_2],
516
+ value[:_shapeDefinition_3],
517
+ value[:semanticActions])
458
518
  end
519
+ start_production(:_shapeDefinition_4, insensitive_strings: :lower)
520
+
459
521
  # [34] inlineShapeDefinition ::= (includeSet | extraPropertySet | "CLOSED")* '{' tripleExpression? '}'
460
- production(:inlineShapeDefinition) do |input, data, callback|
461
- shape_definition(input, data)
462
- end
463
- def shape_definition(input, data)
464
- # FIXME: includeSet
465
- expression = data[:tripleExpression]
466
- attrs = Array(data[:extraPropertySet])
467
- attrs << :closed if data[:closed]
522
+ start_production(:inlineShapeDefinition, as_hash: true)
523
+ production(:inlineShapeDefinition) do |value|
524
+ shape_definition(
525
+ value[:_inlineShapeDefinition_1],
526
+ value[:_inlineShapeDefinition_2])
527
+ end
528
+ def shape_definition(extra_closed, expression, annotations = [], semact = [])
529
+ closed = extra_closed.any? {|v| v.to_s == 'closed'}
530
+ extra = extra_closed.reject {|v| v.to_s == 'closed'}
531
+ attrs = extra
532
+ attrs << :closed if closed
468
533
  attrs << expression if expression
469
- attrs += Array(data[:annotation])
470
- attrs += Array(data[:codeDecl])
534
+ attrs += annotations
535
+ attrs += semact
471
536
 
472
- input[:shape] = Algebra::Shape.new(*attrs)
537
+ Algebra::Shape.new(*attrs, **self.options)
473
538
  end
474
539
  private :shape_definition
475
540
 
476
541
  # [35] extraPropertySet ::= "EXTRA" predicate+
477
- production(:extraPropertySet) do |input, data, callback|
478
- (input[:extraPropertySet] ||= []) << data[:predicate].unshift(:extra)
542
+ start_production(:extraPropertySet, insensitive_strings: :lower)
543
+ production(:extraPropertySet) do |value|
544
+ value.last[:_extraPropertySet_1].unshift(:extra)
479
545
  end
480
546
 
481
547
  # [36] tripleExpression ::= oneOfTripleExpr
482
- # [37] oneOfTripleExpr ::= groupTripleExpr ('|' groupTripleExpr)*
483
- production(:oneOfTripleExpr) do |input, data, callback|
484
- expression = if Array(data[:tripleExpression]).length > 1
485
- Algebra::OneOf.new(*data[:tripleExpression])
486
- else
487
- Array(data[:tripleExpression]).first
488
- end
489
- input[:tripleExpression] = expression if expression
548
+ production(:tripleExpression) do |value|
549
+ value.first[:oneOfTripleExpr]
490
550
  end
491
551
 
492
- # [40] groupTripleExpr ::= unaryTripleExpr (';' unaryTripleExpr?)*
493
- production(:groupTripleExpr) do |input, data, callback|
494
- expression = if Array(data[:tripleExpression]).length > 1
495
- Algebra::EachOf.new(*data[:tripleExpression])
496
- else
497
- Array(data[:tripleExpression]).first
498
- end
499
- (input[:tripleExpression] ||= []) << expression if expression
552
+ # [37] oneOfTripleExpr ::= groupTripleExpr ('|' groupTripleExpr)*
553
+ start_production(:oneOfTripleExpr, as_hash: true)
554
+ production(:oneOfTripleExpr) do |value|
555
+ expressions = [value[:groupTripleExpr]] + value[:_oneOfTripleExpr_1]
556
+ expressions.length == 1 ? expressions.first : Algebra::OneOf.new(*expressions, **self.options)
557
+ end
558
+ production(:_oneOfTripleExpr_2) do |value|
559
+ value.last[:groupTripleExpr]
500
560
  end
501
561
 
502
- # [43] unaryTripleExpr ::= productionLabel? (tripleConstraint | bracketedTripleExpr) | include
503
- production(:unaryTripleExpr) do |input, data, callback|
504
- expression = data[:tripleExpression]
505
- expression.id = data[:productionLabel] if expression && data[:productionLabel]
506
-
507
- (input[:tripleExpression] ||= []) << expression if expression
562
+ # [40] groupTripleExpr ::= unaryTripleExpr (';' unaryTripleExpr?)*
563
+ start_production(:groupTripleExpr, as_hash: true)
564
+ production(:groupTripleExpr) do |value|
565
+ expressions = [value[:unaryTripleExpr]] + value[:_groupTripleExpr_1]
566
+ expressions.length == 1 ? expressions.first : Algebra::EachOf.new(*expressions, **self.options)
567
+ end
568
+ production(:_groupTripleExpr_2) do |value|
569
+ value.last[:_groupTripleExpr_3]
508
570
  end
509
571
 
510
- # [43a] productionLabel ::= '$' (iri | blankNode)
511
- production(:productionLabel) do |input, data, callback|
512
- input[:productionLabel] = data[:iri] || data[:blankNode]
572
+ # [43] unaryTripleExpr ::= ('$' tripleExprLabel)? (tripleConstraint | bracketedTripleExpr) | include
573
+ start_production(:_unaryTripleExpr_1, as_hash: true)
574
+ production(:_unaryTripleExpr_1) do |value|
575
+ expression = value[:_unaryTripleExpr_3]
576
+ expression.id = value[:_unaryTripleExpr_2] if expression && value[:_unaryTripleExpr_2]
577
+
578
+ expression
579
+ end
580
+ production(:_unaryTripleExpr_4) do |value|
581
+ # '$' tripleExprLabel
582
+ value.last[:tripleExprLabel]
513
583
  end
514
584
 
515
- # [44] bracketedTripleExpr ::= '(' oneOfTripleExpr ')' cardinality? annotation* semanticActions
516
- production(:bracketedTripleExpr) do |input, data, callback|
517
- # XXX cardinality? annotation* semanticActions
518
- case expression = data[:tripleExpression]
519
- when Algebra::OneOf, Algebra::EachOf
585
+ # [44] bracketedTripleExpr ::= '(' tripleExpression ')' cardinality? annotation* semanticActions
586
+ start_production(:bracketedTripleExpr, as_hash: true)
587
+ production(:bracketedTripleExpr) do |value|
588
+ case expression = value[:tripleExpression]
589
+ when Algebra::TripleExpression
520
590
  else
521
- error(nil, "Bracketed Expression requires multiple contained expressions", production: :bracketedTripleExpr)
591
+ error(nil, "Bracketed Expression requires contained triple expression", production: :bracketedTripleExpr)
522
592
  end
523
- cardinality = data.fetch(:cardinality, {})
593
+ cardinality = value[:_bracketedTripleExpr_1] || {}
524
594
  attrs = [
525
595
  ([:min, cardinality[:min]] if cardinality[:min]),
526
596
  ([:max, cardinality[:max]] if cardinality[:max])
527
597
  ].compact
528
- attrs += Array(data[:codeDecl])
529
- attrs += Array(data[:annotation])
598
+ attrs += value[:semanticActions]
599
+ attrs += Array(value[:_bracketedTripleExpr_2])
530
600
 
531
601
  expression.operands.concat(attrs)
532
- input[:tripleExpression] = expression
602
+ expression
533
603
  end
534
604
 
535
- # [45] tripleConstraint ::= senseFlags? predicate shapeExpression cardinality? annotation* semanticActions
536
- production(:tripleConstraint) do |input, data, callback|
537
- cardinality = data.fetch(:cardinality, {})
605
+ # [45] tripleConstraint ::= senseFlags? predicate inlineShapeExpression cardinality? annotation* semanticActions
606
+ start_production(:tripleConstraint, as_hash: true)
607
+ production(:tripleConstraint) do |value|
608
+ cardinality = value[:_tripleConstraint_2] || {}
538
609
  attrs = [
539
- (:inverse if data[:inverse] || data[:not]),
540
- [:predicate, Array(data[:predicate]).first],
541
- Array(data[:shapeExpression]).first,
610
+ (:inverse if value[:_tripleConstraint_1]),
611
+ [:predicate, value[:predicate]],
612
+ value[:inlineShapeExpression],
542
613
  ([:min, cardinality[:min]] if cardinality[:min]),
543
614
  ([:max, cardinality[:max]] if cardinality[:max])
544
615
  ].compact
545
- attrs += Array(data[:codeDecl])
546
- attrs += Array(data[:annotation])
616
+ attrs += value[:_tripleConstraint_3]
617
+ attrs += value[:semanticActions]
547
618
 
548
- input[:tripleExpression] = Algebra::TripleConstraint.new(*attrs) unless attrs.empty?
619
+ Algebra::TripleConstraint.new(*attrs, **self.options) # unless attrs.empty?
549
620
  end
550
621
 
551
622
  # [46] cardinality ::= '*' | '+' | '?' | REPEAT_RANGE
623
+ production(:cardinality) do |value|
624
+ case value
625
+ when '*' then {min: 0, max: "*"}
626
+ when '+' then {min: 1, max: "*"}
627
+ when '?' then {min: 0, max: 1}
628
+ else value
629
+ end
630
+ end
631
+
552
632
  # [47] senseFlags ::= '^'
553
633
  # [48] valueSet ::= '[' valueSetValue* ']'
634
+ production(:valueSet) do |value|
635
+ value[1][:_valueSet_1]
636
+ end
554
637
 
555
638
  # [49] valueSetValue ::= iriRange | literalRange | languageRange | '.' exclusion+
556
- production(:valueSetValue) do |input, data, callback|
557
- range = data[:iriRange] || data[:literalRange] || data[:languageRange]
558
- if !range
559
- # All exclusions must be consistent IRI/Literal/Language
560
- case data[:exclusion].first
561
- when Algebra::IriStem, RDF::URI
562
- unless data[:exclusion].all? {|e| e.is_a?(Algebra::IriStem) || e.is_a?(RDF::URI)}
563
- error(nil, "Exclusions must all be IRI type")
564
- end
565
- range = Algebra::IriStemRange.new(:wildcard, data[:exclusion].unshift(:exclusions))
566
- when Algebra::LiteralStem, RDF::Literal
567
- unless data[:exclusion].all? {|e| e.is_a?(Algebra::LiteralStem) || e.is_a?(RDF::Literal)}
568
- error(nil, "Exclusions must all be Literal type")
569
- end
570
- range = Algebra::LiteralStemRange.new(:wildcard, data[:exclusion].unshift(:exclusions))
571
- else
572
- unless data[:exclusion].all? {|e| e.is_a?(Algebra::LanguageStem) || e.is_a?(String)}
573
- error(nil, "Exclusions must all be Language type")
574
- end
575
- range = Algebra::LanguageStemRange.new(:wildcard, data[:exclusion].unshift(:exclusions))
639
+ production(:valueSetValue) do |value|
640
+ Algebra::Value.new(value, **self.options)
641
+ end
642
+ production(:_valueSetValue_1) do |value|
643
+ # All exclusions must be consistent IRI/Literal/Language
644
+ value = value.last[:_valueSetValue_2]
645
+ case value.first
646
+ when Algebra::IriStem, RDF::URI
647
+ unless value.all? {|e| e.is_a?(Algebra::IriStem) || e.is_a?(RDF::URI)}
648
+ error(nil, "Exclusions must all be IRI type")
649
+ end
650
+ Algebra::IriStemRange.new(:wildcard, value.unshift(:exclusions), **self.options)
651
+ when Algebra::LiteralStem, RDF::Literal
652
+ unless value.all? {|e| e.is_a?(Algebra::LiteralStem) || e.is_a?(RDF::Literal)}
653
+ error(nil, "Exclusions must all be Literal type")
654
+ end
655
+ Algebra::LiteralStemRange.new(:wildcard, value.unshift(:exclusions), **self.options)
656
+ else
657
+ unless value.all? {|e| e.is_a?(Algebra::LanguageStem) || e.is_a?(String)}
658
+ error(nil, "Exclusions must all be Language type")
576
659
  end
660
+ Algebra::LanguageStemRange.new(:wildcard, value.unshift(:exclusions), **self.options)
577
661
  end
578
- (input[:valueSetValue] ||= []) << Algebra::Value.new(range)
579
662
  end
580
663
 
581
- # [50] exclusion ::= '-' (iri | literal | LANGTAG) '~'?
582
- production(:exclusion) do |input, data, callback|
583
- (input[:exclusion] ||= []) << if data[:pattern]
584
- case
585
- when data[:iri] then Algebra::IriStem.new(data[:iri])
586
- when data[:literal] then Algebra::LiteralStem.new(data[:literal])
587
- when data[:language] then Algebra::LanguageStem.new(data[:language])
664
+ # [50] exclusion ::= '.' '-' (iri | literal | LANGTAG) '~'?
665
+ start_production(:exclusion, as_hash: true)
666
+ production(:exclusion) do |value|
667
+ if value[:_exclusion_2]
668
+ case value[:_exclusion_1]
669
+ when RDF::URI then Algebra::IriStem.new(value[:_exclusion_1], **self.options)
670
+ when RDF::Literal then Algebra::LiteralStem.new(value[:_exclusion_1], **self.options)
671
+ else Algebra::LanguageStem.new(value[:_exclusion_1], **self.options)
588
672
  end
589
673
  else
590
- data[:iri] || data[:literal] || data[:language]
674
+ value[:_exclusion_1]
591
675
  end
592
676
  end
593
677
 
594
678
  # [51] iriRange ::= iri ('~' iriExclusion*)?
595
- production(:iriRange) do |input, data, callback|
596
- exclusions = data[:exclusion].unshift(:exclusions) if data[:exclusion]
597
- input[:iriRange] = if data[:pattern] && exclusions
598
- Algebra::IriStemRange.new(data[:iri], exclusions)
599
- elsif data[:pattern]
600
- Algebra::IriStem.new(data[:iri])
601
- elsif data[:dot]
602
- Algebra::IriStemRange.new(:wildcard, exclusions)
679
+ production(:iriRange) do |value|
680
+ iri = value.first[:iri]
681
+ if value.last[:_iriRange_1]
682
+ exclusions = value.last[:_iriRange_1].last[:_iriRange_3]
683
+ if exclusions.empty?
684
+ Algebra::IriStem.new(iri, **self.options)
685
+ else
686
+ Algebra::IriStemRange.new(iri, exclusions.unshift(:exclusions), **self.options)
687
+ end
603
688
  else
604
- data[:iri]
689
+ iri
605
690
  end
606
691
  end
607
692
 
608
693
  # [52] iriExclusion ::= '-' iri '~'?
609
- production(:iriExclusion) do |input, data, callback|
610
- val = data[:iri]
611
- (input[:exclusion] ||= []) << (data[:pattern] ? Algebra::IriStem.new(val) : val)
694
+ start_production(:iriExclusion, as_hash: true)
695
+ production(:iriExclusion) do |value|
696
+ value[:_iriExclusion_1] ? Algebra::IriStem.new(value[:iri], **self.options) : value[:iri]
612
697
  end
613
698
 
614
699
  # [53] literalRange ::= literal ('~' literalExclusion*)?
615
- production(:literalRange) do |input, data, callback|
616
- exclusions = data[:exclusion].unshift(:exclusions) if data[:exclusion]
617
- input[:literalRange] = if data[:pattern] && exclusions
618
- Algebra::LiteralStemRange.new(data[:literal], exclusions)
619
- elsif data[:pattern]
620
- Algebra::LiteralStem.new(data[:literal])
621
- elsif data[:dot]
622
- Algebra::LiteralStemRange.new(:wildcard, exclusions)
700
+ production(:literalRange) do |value|
701
+ lit = value.first[:literal]
702
+ if value.last[:_literalRange_1]
703
+ exclusions = value.last[:_literalRange_1].last[:_literalRange_3]
704
+ if exclusions.empty?
705
+ Algebra::LiteralStem.new(lit, **self.options)
706
+ else
707
+ Algebra::LiteralStemRange.new(lit, exclusions.unshift(:exclusions), **self.options)
708
+ end
623
709
  else
624
- data[:literal]
710
+ lit
625
711
  end
626
712
  end
627
713
 
628
714
  # [54] literalExclusion ::= '-' literal '~'?
629
- production(:literalExclusion) do |input, data, callback|
630
- val = data[:literal]
631
- (input[:exclusion] ||= []) << (data[:pattern] ? Algebra::LiteralStem.new(val) : val)
715
+ start_production(:literalExclusion, as_hash: true)
716
+ production(:literalExclusion) do |value|
717
+ val = value[:literal]
718
+ value[:_literalExclusion_1] ? Algebra::LiteralStem.new(val, **self.options) : val
632
719
  end
633
720
 
634
721
  # [55] languageRange ::= LANGTAG ('~' languageExclusion*)?
635
- production(:languageRange) do |input, data, callback|
636
- exclusions = data[:exclusion].unshift(:exclusions) if data[:exclusion]
637
- input[:languageRange] = if data[:pattern] && exclusions
638
- Algebra::LanguageStemRange.new(data[:language], exclusions)
639
- elsif data[:pattern]
640
- Algebra::LanguageStem.new(data[:language])
641
- elsif data[:dot]
642
- Algebra::LanguageStemRange.new(:wildcard, exclusions)
722
+ # | '@' '~' languageExclusion*
723
+ start_production(:_languageRange_1, as_hash: true)
724
+ production(:_languageRange_1) do |value|
725
+ exclusions = value[:_languageRange_3] if value[:_languageRange_3]
726
+ pattern = !!value[:_languageRange_3]
727
+ if pattern && exclusions.empty?
728
+ Algebra::LanguageStem.new(value[:LANGTAG], **self.options)
729
+ elsif pattern
730
+ Algebra::LanguageStemRange.new(value[:LANGTAG], exclusions.unshift(:exclusions), **self.options)
731
+ else
732
+ Algebra::Language.new(value[:LANGTAG], **self.options)
733
+ end
734
+ end
735
+ start_production(:_languageRange_2, as_hash: true)
736
+ production(:_languageRange_2) do |value|
737
+ exclusions = value[:_languageRange_6]
738
+ if exclusions.empty?
739
+ Algebra::LanguageStem.new('', **self.options)
643
740
  else
644
- Algebra::Language.new(data[:language])
741
+ Algebra::LanguageStemRange.new('', exclusions.unshift(:exclusions), **self.options)
645
742
  end
646
743
  end
744
+ production(:_languageRange_4) do |value|
745
+ value.last[:_languageRange_5]
746
+ end
647
747
 
648
- # [56] languageExclusion ::= '-' literal '~'?
649
- production(:languageExclusion) do |input, data, callback|
650
- val = data[:language]
651
- (input[:exclusion] ||= []) << (data[:pattern] ? Algebra::LanguageStem.new(val) : val)
748
+ # [56] languageExclusion ::= '-' LANGTAG '~'?
749
+ start_production(:languageExclusion, as_hash: true)
750
+ production(:languageExclusion) do |value|
751
+ val = value[:LANGTAG]
752
+ value[:_languageExclusion_1] ? Algebra::LanguageStem.new(val, **self.options) : val
652
753
  end
653
754
 
654
- # [57] include ::= '&' shapeLabel
655
- production(:include) do |input, data, callback|
656
- input[:tripleExpression] = data[:shapeLabel].first
755
+ # [57] include ::= '&' tripleExprLabel
756
+ production(:include) do |value|
757
+ value.last[:tripleExprLabel]
657
758
  end
658
759
 
659
760
  # [58] annotation ::= '//' predicate (iri | literal)
660
- production(:annotation) do |input, data, callback|
661
- annotation = Algebra::Annotation.new([:predicate, data[:predicate].first], (data[:iri] || data[:literal]))
662
- (input[:annotation] ||= []) << annotation
761
+ start_production(:annotation, as_hash: true)
762
+ production(:annotation) do |value|
763
+ Algebra::Annotation.new([:predicate, value[:predicate]], value[:_annotation_1], **self.options)
663
764
  end
664
765
 
665
766
  # [59] semanticActions ::= codeDecl*
666
767
 
667
768
  # [60] codeDecl ::= '%' iri (CODE | "%")
668
- production(:codeDecl) do |input, data, callback|
669
- (input[:codeDecl] ||= []) << Algebra::SemAct.new(*[data[:iri], data[:code]].compact)
769
+ start_production(:codeDecl, as_hash: true)
770
+ production(:codeDecl) do |value|
771
+ code = value[:_codeDecl_1] unless value[:_codeDecl_1] == '%'
772
+ Algebra::SemAct.new(*[value[:iri], code].compact, **self.options)
670
773
  end
671
774
 
672
775
  # [13t] literal ::= rdfLiteral | numericLiteral | booleanLiteral
673
776
 
674
777
  # [61] predicate ::= iri | RDF_TYPE
675
- production(:predicate) do |input, data, callback|
676
- (input[:predicate] ||= []) << data[:iri]
778
+ production(:predicate) do |value|
779
+ value
677
780
  end
678
781
 
679
782
  # [62] datatype ::= iri
680
- production(:datatype) do |input, data, callback|
681
- input[:datatype] = data[:iri]
783
+ production(:datatype) do |value|
784
+ value.first[:iri]
682
785
  end
683
786
 
684
- # [63] shapeLabel ::= iri | blankNode
685
- production(:shapeLabel) do |input, data, callback|
686
- (input[:shapeLabel] ||= []) << (data[:iri] || data[:blankNode])
787
+ # [63] shapeExprLabel ::= iri | blankNode
788
+ # [16t] numericLiteral ::= INTEGER | DECIMAL | DOUBLE
789
+ # [65] rdfLiteral ::= langString | string ('^^' datatype)?
790
+ production(:rdfLiteral) do |value|
791
+ literal(*value)
792
+ end
793
+ start_production(:_rdfLiteral_1, as_hash: true)
794
+ production(:_rdfLiteral_1) do |value|
795
+ [value[:string], {datatype: value[:_rdfLiteral_2]}]
796
+ end
797
+ production(:_rdfLiteral_3) do |value|
798
+ value.last[:datatype]
687
799
  end
688
800
 
689
- # [16t] numericLiteral ::= INTEGER | DECIMAL | DOUBLE
690
- # [129s] rdfLiteral ::= langString | string ('^^' datatype)?
691
- production(:rdfLiteral) do |input, data, callback|
692
- input[:literal] = literal(data[:string], data)
801
+ # [134s] booleanLiteral ::= "true" | "false"
802
+ start_production(:booleanLiteral, insensitive_strings: :lower)
803
+ production(:booleanLiteral) do |value|
804
+ literal(value == 'true')
693
805
  end
694
806
 
695
- # [134s] booleanLiteral ::= 'true' | 'false'
696
807
  # [135s] string ::= STRING_LITERAL1 | STRING_LITERAL_LONG1
697
808
  # | STRING_LITERAL2 | STRING_LITERAL_LONG2
698
809
  # [66] langString ::= LANG_STRING_LITERAL1 | LANG_STRING_LITERAL_LONG1
699
810
  # | LANG_STRING_LITERAL2 | LANG_STRING_LITERAL_LONG2
700
811
  # [136s] iri ::= IRIREF | prefixedName
701
812
  # [1372] prefixedName ::= PNAME_LN | PNAME_NS
813
+ production(:prefixedName) do |value|
814
+ value.is_a?(RDF::URI) ? value : ns(value, '')
815
+ end
816
+
702
817
  # [138s] blankNode ::= BLANK_NODE_LABEL
818
+ production(:blankNode) do |value|
819
+ value.first[:BLANK_NODE_LABEL]
820
+ end
703
821
 
704
822
  ##
705
823
  # Initializes a new parser instance.
@@ -758,7 +876,7 @@ module ShEx
758
876
  @result.to_sxp
759
877
  end
760
878
 
761
- alias_method :ll1_parse, :parse
879
+ alias_method :peg_parse, :parse
762
880
 
763
881
  # Parse query
764
882
  #
@@ -777,50 +895,17 @@ module ShEx
777
895
  # @raise [ShEx::StructureError, ArgumentError] on structural problems with schema
778
896
  # @see https://www.w3.org/TR/sparql11-query/#sparqlAlgebra
779
897
  # @see https://axel.deri.ie/sparqltutorial/ESWC2007_SPARQL_Tutorial_unit2b.pdf
780
- def parse(prod = START)
781
- ll1_parse(@input,
898
+ def parse(prod = :shexDoc)
899
+ @result = peg_parse(@input,
782
900
  prod.to_sym,
783
- branch: BRANCH,
784
- first: FIRST,
785
- follow: FOLLOW,
901
+ ShEx::Meta::RULES,
786
902
  whitespace: WS,
787
- **@options
788
- ) do |context, *data|
789
- case context
790
- when :trace
791
- if options[:logger]
792
- level, lineno, depth, *args = data
793
- case level
794
- when 0
795
- log_error(*args, depth: depth, lineno: lineno)
796
- when 1
797
- log_warn(*args, depth: depth, lineno: lineno)
798
- when 2
799
- log_info(*args, depth: depth, lineno: lineno)
800
- else
801
- log_debug(*args, depth: depth, lineno: lineno)
802
- end
803
- end
804
- end
805
- end
806
-
807
- # The last thing on the @prod_data stack is the result
808
- @result = case
809
- when !prod_data.is_a?(Hash)
810
- prod_data
811
- when prod_data.empty?
812
- nil
813
- when prod_data[:schema]
814
- prod_data[:schema]
815
- else
816
- key = prod_data.keys.first
817
- [key] + Array(prod_data[key]) # Creates [:key, [:triple], ...]
818
- end
903
+ **@options)
819
904
 
820
905
  # Validate resulting expression
821
906
  @result.validate! if @result && validate?
822
907
  @result
823
- rescue EBNF::LL1::Parser::Error, EBNF::LL1::Lexer::Error => e
908
+ rescue EBNF::PEG::Parser::Error, EBNF::LL1::Lexer::Error => e
824
909
  raise ShEx::ParseError, e.message, e.backtrace
825
910
  end
826
911