shex 0.5.2 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/shex/parser.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  require 'ebnf'
3
- require 'ebnf/ll1/parser'
3
+ require 'ebnf/peg/parser'
4
4
  require 'shex/meta'
5
5
 
6
6
  module ShEx
@@ -8,11 +8,10 @@ module ShEx
8
8
  # A parser for the ShEx grammar.
9
9
  #
10
10
  # @see https://www.w3.org/2005/01/yacker/uploads/ShEx3?lang=perl&markup=html#productions
11
- # @see http://en.wikipedia.org/wiki/LR_parser
11
+ # @see https://en.wikipedia.org/wiki/LR_parser
12
12
  class Parser
13
- include ShEx::Meta
14
13
  include ShEx::Terminals
15
- include EBNF::LL1::Parser
14
+ include EBNF::PEG::Parser
16
15
  include RDF::Util::Logger
17
16
 
18
17
  ##
@@ -22,13 +21,13 @@ module ShEx
22
21
  attr_reader :options
23
22
 
24
23
  ##
25
- # The current input string being processed.
24
+ # The current input string being processed. XXX
26
25
  #
27
26
  # @return [String]
28
27
  attr_accessor :input
29
28
 
30
29
  ##
31
- # The current input tokens being processed.
30
+ # The current input tokens being processed. XXX
32
31
  #
33
32
  # @return [Array<Token>]
34
33
  attr_reader :tokens
@@ -37,669 +36,804 @@ module ShEx
37
36
  # The internal representation of the result using hierarchy of RDF objects and ShEx::Operator
38
37
  # objects.
39
38
  # @return [Array]
40
- # @see http://sparql.rubyforge.org/algebra
39
+ # @see https://www.rubydoc.info/github/ruby-rdf/sparql/SPARQL/Algebra
41
40
  attr_accessor :result
42
41
 
43
42
  # Terminals passed to lexer. Order matters!
44
- terminal(:CODE, CODE, unescape: true) do |prod, token, input|
43
+ terminal(:CODE, CODE, unescape: true) do |value|
45
44
  # { foo %}
46
45
  # Keep surrounding whitespace for now
47
- input[:code] = token.value[1..-2].sub(/%\s*$/, '') # Drop {} and %
46
+ value[1..-2].sub(/%\s*$/, '') # Drop {} and %
48
47
  end
49
- terminal(:REPEAT_RANGE, REPEAT_RANGE) do |prod, token, input|
50
- card = token.value[1..-2].split(',').map {|v| v =~ /^\d+$/ ? v.to_i : v}
51
- card[1] = token.value.include?(',') ? '*' : card[0] if card.length == 1
52
- input[:cardinality] = {min: card[0], max: card[1]}
48
+ terminal(:REPEAT_RANGE, REPEAT_RANGE) do |value|
49
+ card = value[1..-2].split(',').map {|v| v =~ /^\d+$/ ? v.to_i : v}
50
+ card[1] = value.include?(',') ? '*' : card[0] if card.length == 1
51
+ {min: card[0], max: card[1]}
53
52
  end
54
- terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL) do |prod, token, input|
55
- input[:blankNode] = bnode(token.value[2..-1])
53
+ terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL) do |value|
54
+ bnode(value[2..-1])
56
55
  end
57
- terminal(:IRIREF, IRIREF, unescape: true) do |prod, token, input|
56
+ terminal(:IRIREF, IRIREF, unescape: true) do |value|
58
57
  begin
59
- input[:iri] = iri(token.value[1..-2])
58
+ iri(value[1..-2])
60
59
  rescue ArgumentError => e
61
60
  raise Error, e.message
62
61
  end
63
62
  end
64
- terminal(:DOUBLE, DOUBLE) do |prod, token, input|
63
+ terminal(:DOUBLE, DOUBLE) do |value|
65
64
  # Note that a Turtle Double may begin with a '.[eE]', so tack on a leading
66
65
  # zero if necessary
67
- value = token.value.sub(/\.([eE])/, '.0\1')
68
- input[:literal] = literal(value, datatype: RDF::XSD.double)
66
+ value = value.sub(/\.([eE])/, '.0\1')
67
+ literal(value, datatype: RDF::XSD.double)
69
68
  end
70
- terminal(:DECIMAL, DECIMAL) do |prod, token, input|
69
+ terminal(:DECIMAL, DECIMAL) do |value|
71
70
  # Note that a Turtle Decimal may begin with a '.', so tack on a leading
72
71
  # zero if necessary
73
- value = token.value
74
- #value = "0#{token.value}" if token.value[0,1] == "."
75
- input[:literal] = literal(value, datatype: RDF::XSD.decimal)
72
+ literal(value, datatype: RDF::XSD.decimal)
76
73
  end
77
- terminal(:INTEGER, INTEGER) do |prod, token, input|
78
- input[:literal] = literal(token.value, datatype: RDF::XSD.integer)
74
+ terminal(:INTEGER, INTEGER) do |value|
75
+ literal(value, datatype: RDF::XSD.integer)
79
76
  end
80
- terminal(:PNAME_LN, PNAME_LN, unescape: true) do |prod, token, input|
81
- prefix, suffix = token.value.split(":", 2)
82
- input[:iri] = ns(prefix, suffix)
83
- error(nil, "Compact IRI missing prefix definition: #{token.value}", production: :PNAME_LN) unless prefix(prefix)
77
+ terminal(:PNAME_LN, PNAME_LN, unescape: true) do |value|
78
+ prefix, suffix = value.split(":", 2)
79
+ error(nil, "Compact IRI missing prefix definition: #{prefix}", production: :PNAME_LN) unless prefix(prefix)
80
+ ns(prefix, suffix)
84
81
  end
85
- terminal(:PNAME_NS, PNAME_NS) do |prod, token, input|
86
- prefix = token.value[0..-2]
87
-
88
- input[:iri] = ns(prefix, nil)
89
- input[:prefix] = prefix && prefix.to_sym
82
+ terminal(:PNAME_NS, PNAME_NS) do |value|
83
+ value[0..-2]
90
84
  end
91
- terminal(:ATPNAME_LN, ATPNAME_LN, unescape: true) do |prod, token, input|
92
- prefix, suffix = token.value.split(":", 2)
85
+ terminal(:ATPNAME_LN, ATPNAME_LN, unescape: true) do |value, parent_prod|
86
+ prefix, suffix = value.split(":", 2)
93
87
  prefix.sub!(/^@#{WS}*/, '')
94
- input[:shapeLabel] = ns(prefix, suffix)
95
- error(nil, "Compact IRI missing prefix definition: #{token.value}", production: :ATPNAME_LN) unless input[:shapeLabel].absolute?
88
+ ns(prefix, suffix)
96
89
  end
97
- terminal(:ATPNAME_NS, ATPNAME_NS) do |prod, token, input|
98
- prefix = token.value[0..-2]
90
+ terminal(:ATPNAME_NS, ATPNAME_NS) do |value|
91
+ prefix = value[0..-2]
99
92
  prefix.sub!(/^@\s*/, '')
100
93
 
101
- input[:shapeLabel] = ns(prefix, nil)
102
- end
103
- terminal(:LANGTAG, LANGTAG) do |prod, token, input|
104
- input[:language] = token.value[1..-1]
105
- end
106
- terminal(:LANG_STRING_LITERAL_LONG1, LANG_STRING_LITERAL_LONG1, unescape: true) do |prod, token, input|
107
- input[:string], _, input[:language] = token.value[3..-1].rpartition("'''@")
108
- end
109
- terminal(:LANG_STRING_LITERAL_LONG2, LANG_STRING_LITERAL_LONG2, unescape: true) do |prod, token, input|
110
- input[:string], _, input[:language] = token.value[3..-1].rpartition('"""@')
111
- end
112
- terminal(:LANG_STRING_LITERAL1, LANG_STRING_LITERAL1, unescape: true) do |prod, token, input|
113
- input[:string], _, input[:language] = token.value[1..-1].rpartition("'@")
114
- end
115
- terminal(:LANG_STRING_LITERAL2, LANG_STRING_LITERAL2, unescape: true) do |prod, token, input|
116
- input[:string], _, input[:language] = token.value[1..-1].rpartition('"@')
117
- end
118
- terminal(:STRING_LITERAL_LONG1, STRING_LITERAL_LONG1, unescape: true) do |prod, token, input|
119
- input[:string] = token.value[3..-4]
120
- end
121
- terminal(:STRING_LITERAL_LONG2, STRING_LITERAL_LONG2, unescape: true) do |prod, token, input|
122
- input[:string] = token.value[3..-4]
123
- end
124
- terminal(:STRING_LITERAL1, STRING_LITERAL1, unescape: true) do |prod, token, input|
125
- input[:string] = token.value[1..-2]
126
- end
127
- terminal(:STRING_LITERAL2, STRING_LITERAL2, unescape: true) do |prod, token, input|
128
- input[:string] = token.value[1..-2]
129
- end
130
- terminal(:REGEXP, REGEXP) do |prod, token, input|
131
- input[:regexp] = token.value
132
- end
133
- terminal(:RDF_TYPE, RDF_TYPE) do |prod, token, input|
134
- input[:iri] = (a = RDF.type.dup; a.lexical = 'a'; a)
135
- end
136
-
137
- # String terminals
138
- terminal(nil, STR_EXPR, map: STR_MAP) do |prod, token, input|
139
- case token.value
140
- when '*' then input[:cardinality] = {min: 0, max: "*"}
141
- when '+' then input[:cardinality] = {min: 1, max: "*"}
142
- when '?' then input[:cardinality] = {min: 0, max: 1}
143
- when '!' then input[:not] = token.value
144
- when '^' then input[:inverse] = token.value
145
- when '.' then input[:dot] = token.value
146
- when 'true', 'false' then input[:literal] = RDF::Literal::Boolean.new(token.value)
147
- when '~' then input[:pattern] = token.value
148
- when 'BNODE', 'IRI',
149
- 'NONLITERAL' then input[:nonLiteralKind] = token.value.downcase.to_sym
150
- when 'CLOSED' then input[:closed] = token.value.downcase.to_sym
151
- when 'EXTERNAL' then input[:external] = token.value.downcase.to_sym
152
- when 'FRACTIONDIGITS',
153
- 'TOTALDIGITS' then input[:numericLength] = token.value.downcase.to_sym
154
- when 'LITERAL' then input[:shapeAtomLiteral] = token.value.downcase.to_sym
155
- when 'LENGTH',
156
- 'MINLENGTH',
157
- 'MAXLENGTH' then input[:stringLength] = token.value.downcase.to_sym
158
- when 'MININCLUSIVE',
159
- 'MINEXCLUSIVE',
160
- 'MAXINCLUSIVE',
161
- 'MAXEXCLUSIVE' then input[:numericRange] = token.value.downcase.to_sym
162
- when 'NOT' then input[:not] = token.value.downcase.to_sym
163
- when 'START' then input[:start] = token.value.downcase.to_sym
164
- else
165
- #raise "Unexpected MC terminal: #{token.inspect}"
166
- end
94
+ ns(prefix, nil)
95
+ end
96
+ terminal(:LANGTAG, LANGTAG) do |value|
97
+ value[1..-1]
98
+ end
99
+ terminal(:LANG_STRING_LITERAL_LONG1, LANG_STRING_LITERAL_LONG1, unescape: true) do |value|
100
+ s, _, l = value[3..-1].rpartition("'''@")
101
+ [s, language: l]
102
+ end
103
+ terminal(:LANG_STRING_LITERAL_LONG2, LANG_STRING_LITERAL_LONG2, unescape: true) do |value|
104
+ s, _, l = value[3..-1].rpartition('"""@')
105
+ [s, language: l]
106
+ end
107
+ terminal(:LANG_STRING_LITERAL1, LANG_STRING_LITERAL1, unescape: true) do |value|
108
+ s, _, l = value[1..-1].rpartition("'@")
109
+ [s, language: l]
110
+ end
111
+ terminal(:LANG_STRING_LITERAL2, LANG_STRING_LITERAL2, unescape: true) do |value|
112
+ s, _, l = value[1..-1].rpartition('"@')
113
+ [s, language: l]
114
+ end
115
+ terminal(:STRING_LITERAL_LONG1, STRING_LITERAL_LONG1, unescape: true) do |value|
116
+ value[3..-4]
117
+ end
118
+ terminal(:STRING_LITERAL_LONG2, STRING_LITERAL_LONG2, unescape: true) do |value|
119
+ value[3..-4]
120
+ end
121
+ terminal(:STRING_LITERAL1, STRING_LITERAL1, unescape: true) do |value|
122
+ value[1..-2]
123
+ end
124
+ terminal(:STRING_LITERAL2, STRING_LITERAL2, unescape: true) do |value|
125
+ value[1..-2]
126
+ end
127
+ terminal(:REGEXP, REGEXP)
128
+ terminal(:RDF_TYPE, RDF_TYPE) do |value|
129
+ (a = RDF.type.dup; a.lexical = 'a'; a)
167
130
  end
168
131
 
169
132
  # Productions
170
133
  # [1] shexDoc ::= directive* ((notStartAction | startActions) statement*)?
171
- production(:shexDoc) do |input, data, callback|
172
- data[:start] = data[:start] if data[:start]
173
-
134
+ start_production(:shexDoc, as_hash: true, clear_packrat: true)
135
+ production(:shexDoc) do |value|
174
136
  expressions = []
175
- expressions << [:base, data[:baseDecl]] if data[:baseDecl]
176
- expressions << [:prefix, data[:prefixDecl]] if data[:prefixDecl]
177
- expressions += Array(data[:codeDecl])
178
- expressions << Algebra::Start.new(data[:start]) if data[:start]
179
- expressions << data[:shapes].unshift(:shapes) if data[:shapes]
137
+ prefixes = []
138
+
139
+ # directive *
140
+ expressions += value[:_shexDoc_1]
141
+
142
+ # ((notStartAction | startActions) statement*)?
143
+ if value = value[:_shexDoc_2]
144
+ # These may start with codeDecl or start. otherwise, they are all shapes
145
+ expressions += Array(value[:_shexDoc_4])
146
+ expressions += Array(value[:_shexDoc_5])
147
+ end
180
148
 
181
- input[:schema] = Algebra::Schema.new(*expressions, options)
182
- self
149
+ # Extract declarations, startacts and start from expressions
150
+ declarations, expressions = expressions.partition {|op| op.is_a?(Array)}
151
+ prefixes, bases = declarations.partition {|op| op.first == :prefix}
152
+ semacts, expressions = expressions.partition {|op| op.is_a?(Algebra::SemAct)}
153
+ starts, expressions = expressions.partition {|op| op.is_a?(Algebra::Start)}
154
+
155
+ operands = []
156
+ operands += bases unless bases.empty?
157
+ unless prefixes.empty?
158
+ operands << [:prefix, prefixes.map {|p| p[1,2]}]
159
+ end
160
+ operands += semacts
161
+ operands += starts
162
+ operands << expressions.unshift(:shapes) unless expressions.empty?
163
+ Algebra::Schema.new(*operands, **options)
183
164
  end
165
+ start_production(:_shexDoc_2, as_hash: true)
166
+ start_production(:_shexDoc_3, as_hash: true)
184
167
 
185
168
  # [2] directive ::= baseDecl | prefixDecl
186
169
 
187
170
  # [3] baseDecl ::= "BASE" IRIREF
188
- production(:baseDecl) do |input, data, callback|
189
- input[:baseDecl] = self.base_uri = iri(data[:iri])
171
+ start_production(:baseDecl, as_hash: true, insensitive_strings: :lower)
172
+ production(:baseDecl) do |value|
173
+ self.base_uri = iri(value[:IRIREF])
174
+ [:base, self.base_uri]
190
175
  end
191
176
 
192
177
  # [4] prefixDecl ::= "PREFIX" PNAME_NS IRIREF
193
- production(:prefixDecl) do |input, data, callback|
194
- pfx = data[:prefix]
195
- self.prefix(pfx, data[:iri])
196
- (input[:prefixDecl] ||= []) << [pfx.to_s, data[:iri]]
178
+ start_production(:prefixDecl, as_hash: true, insensitive_strings: :lower)
179
+ production(:prefixDecl) do |value|
180
+ pfx = value[:PNAME_NS]
181
+ prefix(pfx, value[:IRIREF])
182
+ [:prefix, pfx.to_s, value[:IRIREF]]
197
183
  end
198
184
 
199
185
  # [5] notStartAction ::= start | shapeExprDecl
200
- # [6] start ::= "start" '=' shapeExpression
201
- production(:start) do |input, data, callback|
202
- input[:start] = Array(data[:shapeExpression]).first || data[:shape]
186
+ # [6] start ::= "START" '=' "NOT"? (shapeAtomNoRef | shapeRef) shapeOr?
187
+ start_production(:start, as_hash: true, insensitive_strings: :lower)
188
+ production(:start) do |value|
189
+ expr = value[:_start_2]
190
+ expr = value[:_start_3].call(expr) if value[:_start_3]
191
+ expr = Algebra::Not.new(expr) if value[:__start_1]
192
+ Algebra::Start.new(expr)
203
193
  end
204
194
  # [7] startActions ::= codeDecl+
205
195
 
206
196
  # [8] statement ::= directive | notStartAction
207
197
 
208
- # [9] shapeExprDecl ::= shapeLabel (shapeExpression|"EXTERNAL")
209
- production(:shapeExprDecl) do |input, data, callback|
210
- id = Array(data[:shapeLabel]).first
211
- expression = case Array(data[:shapeExpression]).first
198
+ # [9] shapeExprDecl ::= shapeExprLabel (shapeExpression | "EXTERNAL")
199
+ start_production(:shapeExprDecl, as_hash: true)
200
+ production(:shapeExprDecl) do |value|
201
+ id = value[:shapeExprLabel]
202
+ expression = case value[:_shapeExprDecl_1]
212
203
  when Algebra::NodeConstraint, Algebra::Or, Algebra::And, Algebra::Not, Algebra::Shape, RDF::Resource
213
- Array(data[:shapeExpression]).first
204
+ value[:_shapeExprDecl_1]
205
+ when /external/i
206
+ Algebra::External.new()
214
207
  else
215
- data[:external] ? Algebra::External.new() : Algebra::Shape.new()
208
+ Algebra::Shape.new()
216
209
  end
217
210
  expression.id = id if id && !expression.is_a?(RDF::Resource)
218
211
 
219
- (input[:shapes] ||= []) << expression
212
+ expression
220
213
  end
221
214
 
222
- # [10] shapeExpression ::= shapeAtomNoRef shapeOr?
223
- # | "NOT" (shapeAtomNoRef | shapeRef) shapeOr?
215
+ # [10] shapeExpression ::= "NOT"? shapeAtomNoRef shapeOr?
216
+ # | "NOT" shapeRef shapeOr?
224
217
  # | shapeRef shapeOr
225
- production(:shapeExpression) do |input, data, callback|
226
- expression = Array(data[:shapeExpression]).first || data[:shape]
227
- expression = Algebra::Not.new(expression) if data[:not]
228
- (input[:shapeExpression] ||= []) << expression
218
+ start_production(:_shapeExpression_1, as_hash: true, insensitive_strings: :lower)
219
+ production(:_shapeExpression_1) do |value|
220
+ # "NOT"? shapeAtomNoRef shapeOr?
221
+ expr = value[:shapeAtomNoRef]
222
+ expr = Algebra::Not.new(expr) if value[:_shapeExpression_4]
223
+ expr = value[:_shapeExpression_5].call(expr) if value[:_shapeExpression_5]
224
+ expr
225
+ end
226
+ start_production(:_shapeExpression_2, as_hash: true, insensitive_strings: :lower)
227
+ production(:_shapeExpression_2) do |value|
228
+ # "NOT" shapeRef shapeOr?
229
+ expr = Algebra::Not.new(value[:shapeRef])
230
+ expr = value[:_shapeExpression_6].call(expr) if value[:_shapeExpression_6]
231
+ expr
232
+ end
233
+ start_production(:_shapeExpression_3, as_hash: true)
234
+ production(:_shapeExpression_3) do |value|
235
+ # shapeRef shapeOr
236
+ value[:shapeOr].call(value[:shapeRef])
229
237
  end
230
238
 
231
239
  # [11] inlineShapeExpression ::= inlineShapeOr
232
- # [12] shapeOr ::= shapeOrA | shapeOrB shapeOrA?
233
- # [12a] shapeOrA ::= ("OR" shapeAnd)+
234
- start_production(:shapeOrA) do |input, data, callback|
235
- data[:shapeExpression] = input.delete(:shapeExpression)
236
- data[:shapeExpression] ||= Array(input.delete(:shape)) if input[:shape]
237
- data[:shapeExpression] = [Algebra::Not.new(*data[:shapeExpression])] if input.delete(:not)
238
- end
239
- production(:shapeOrA) do |input, data, callback|
240
- shape_or(input, data)
241
- end
242
- # [12b] shapeOrB ::= ("AND" shapeNot)+
243
- start_production(:shapeOrB) do |input, data, callback|
244
- data[:shapeExpression] = input.delete(:shapeExpression)
245
- data[:shapeExpression] ||= Array(input.delete(:shape)) if input[:shape]
246
- data[:shapeExpression] = [Algebra::Not.new(*data[:shapeExpression])] if input.delete(:not)
247
- end
248
- production(:shapeOrB) do |input, data, callback|
249
- shape_and(input, data)
240
+ production(:inlineShapeExpression) do |value|
241
+ value.first[:inlineShapeOr]
242
+ end
243
+ # [12] shapeOr ::= ("OR" shapeAnd)+
244
+ # | ("AND" shapeNot)+ ("OR" shapeAnd)*
245
+ # As shapeOr has an implicit first parameter from the invoking production's first element, the result is a block which will accept the value of that production and apply it to any RHS expression found here.
246
+ start_production(:_shapeOr_1, insensitive_strings: :lower)
247
+ production(:_shapeOr_1) do |value|
248
+ # ("OR" shapeAnd)+
249
+ -> (lhs) {Algebra::Or.new(lhs, *value.map {|v| v.last[:shapeAnd]})}
250
+ end
251
+ start_production(:_shapeOr_2, as_hash: true, insensitive_strings: :lower)
252
+ production(:_shapeOr_2) do |value|
253
+ # ("AND" shapeNot)+ ("OR" shapeAnd)*
254
+ ands = value[:_shapeOr_4].map {|v| v.last[:shapeNot]}
255
+ ors = value[:_shapeOr_5].map {|v| v.last[:shapeAnd]}
256
+ if ors.empty?
257
+ -> (lhs) {Algebra::And.new(lhs, *ands)}
258
+ else
259
+ -> (lhs) {ShapeOr(Algebra::And.new(lhs, ands), *ors)}
260
+ end
250
261
  end
251
262
 
252
263
  # [13] inlineShapeOr ::= inlineShapeAnd ("OR" inlineShapeAnd)*
253
- production(:inlineShapeOr) do |input, data, callback|
254
- shape_or(input, data)
255
- end
256
- def shape_or(input, data)
257
- input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
258
- expression = if Array(data[:shapeExpression]).length > 1
259
- Algebra::Or.new(*data[:shapeExpression], {})
264
+ start_production(:inlineShapeOr, as_hash: true)
265
+ production(:inlineShapeOr) do |value|
266
+ if value[:_inlineShapeOr_1].empty?
267
+ value[:inlineShapeAnd]
260
268
  else
261
- Array(data[:shapeExpression]).first
269
+ lhs = value[:_inlineShapeOr_1].map {|v| v.last[:inlineShapeAnd]}
270
+ Algebra::Or.new(value[:inlineShapeAnd], *lhs)
262
271
  end
263
- (input[:shapeExpression] ||= []) << expression if expression
264
- rescue ArgumentError => e
265
- error(nil, "Argument Error on OR: #{e.message}")
266
272
  end
267
- private :shape_or
273
+ start_production(:_inlineShapeOr_2, insensitive_strings: :lower)
268
274
 
269
275
  # [14] shapeAnd ::= shapeNot ("AND" shapeNot)*
270
- production(:shapeAnd) do |input, data, callback|
271
- shape_and(input, data)
272
- end
273
- # [15] inlineShapeAnd ::= inlineShapeNot ("AND" inlineShapeNot)*
274
- production(:inlineShapeAnd) do |input, data, callback|
275
- shape_and(input, data)
276
- end
277
- def shape_and(input, data)
278
- input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
279
- expressions = Array(data[:shapeExpression]).inject([]) do |memo, expr|
280
- #memo.concat(expr.is_a?(Algebra::And) ? expr.operands : [expr])
281
- memo.concat([expr])
276
+ start_production(:shapeAnd, as_hash: true)
277
+ production(:shapeAnd) do |value|
278
+ if value[:_shapeAnd_1].empty?
279
+ value[:shapeNot]
280
+ else
281
+ lhs = value[:_shapeAnd_1].map {|v| v.last[:shapeNot]}
282
+ Algebra::And.new(value[:shapeNot], *lhs)
282
283
  end
284
+ end
285
+ start_production(:_shapeAnd_2, insensitive_strings: :lower)
283
286
 
284
- expression = if expressions.length > 1
285
- Algebra::And.new(*expressions, {})
287
+ # [15] inlineShapeAnd ::= inlineShapeNot ("AND" inlineShapeNot)*
288
+ start_production(:inlineShapeAnd, as_hash: true)
289
+ production(:inlineShapeAnd) do |value|
290
+ if value[:_inlineShapeAnd_1].empty?
291
+ value[:inlineShapeNot]
286
292
  else
287
- expressions.first
293
+ lhs = value[:_inlineShapeAnd_1].map {|v| v.last[:inlineShapeNot]}
294
+ Algebra::And.new(value[:inlineShapeNot], *lhs)
288
295
  end
289
- (input[:shapeExpression] ||= []) << expression if expression
290
- rescue ArgumentError => e
291
- error(nil, "Argument Error on AND: #{e.message}")
292
296
  end
293
- private :shape_and
297
+ start_production(:_inlineShapeAnd_2, insensitive_strings: :lower)
294
298
 
295
299
  # [16] shapeNot ::= "NOT"? shapeAtom
296
- production(:shapeNot) do |input, data, callback|
297
- shape_not(input, data)
300
+ start_production(:shapeNot, as_hash: true)
301
+ production(:shapeNot) do |value|
302
+ atom = value[:shapeAtom]
303
+ value[:_shapeNot_1] ? Algebra::Not.new(atom) : atom
298
304
  end
305
+ start_production(:_shapeNot_1, insensitive_strings: :lower)
306
+
299
307
  # [17] inlineShapeNot ::= "NOT"? inlineShapeAtom
300
- production(:inlineShapeNot) do |input, data, callback|
301
- shape_not(input, data)
302
- end
303
- def shape_not(input, data)
304
- input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
305
- expression = Array(data[:shapeExpression]).first
306
- expression = Algebra::Not.new(expression) if data[:not]
307
- #error(nil, "Expected an atom for NOT") unless expression
308
- (input[:shapeExpression] ||= []) << expression if expression
308
+ start_production(:inlineShapeNot, as_hash: true)
309
+ production(:inlineShapeNot) do |value|
310
+ atom = value[:inlineShapeAtom]
311
+ value[:_inlineShapeNot] ? Algebra::Not.new(atom) : atom
309
312
  end
310
- private :shape_not
313
+ start_production(:_inlineShapeNot_1, insensitive_strings: :lower)
311
314
 
312
- # [18] shapeAtom ::= nodeConstraint shapeOrRef?
313
- # | shapeOrRef
315
+ # [18] shapeAtom ::= nonLitNodeConstraint shapeOrRef?
316
+ # | litNodeConstraint
317
+ # | shapeOrRef nonLitNodeConstraint?
314
318
  # | "(" shapeExpression ")"
315
319
  # | '.' # no constraint
316
- production(:shapeAtom) do |input, data, callback|
317
- shape_atom(input, data)
320
+ production(:shapeAtom) do |value|
321
+ expressions = case
322
+ when value.is_a?(Algebra::Operator)
323
+ [value]
324
+ when value == '.' then []
325
+ when value[:nonLitNodeConstraint]
326
+ [value[:nonLitNodeConstraint], value[:_shapeAtom_4]].compact
327
+ when value[:shapeOrRef]
328
+ [value[:shapeOrRef], value[:_shapeAtom_5]].compact
329
+ when value[:_shapeAtom_3]
330
+ value[:_shapeAtom_3]
331
+ else []
332
+ end
333
+
334
+ case expressions.length
335
+ when 0 then nil
336
+ when 1 then expressions.first
337
+ else Algebra::And.new(*expressions)
338
+ end
318
339
  end
340
+ start_production(:_shapeAtom_1, as_hash: true)
341
+ start_production(:_shapeAtom_2, as_hash: true)
342
+ production(:_shapeAtom_3) do |value|
343
+ value[1][:shapeExpression]
344
+ end
345
+
346
+ # [19] shapeAtomNoRef ::= nonLitNodeConstraint shapeOrRef?
347
+ # | litNodeConstraint
348
+ # | shapeDefinition nonLitNodeConstraint?
349
+ # | "(" shapeExpression ")"
350
+ # | '.' # no constraint
351
+ production(:shapeAtomNoRef) do |value|
352
+ expressions = case
353
+ when value.is_a?(Algebra::Operator)
354
+ [value]
355
+ when value == '.' then []
356
+ when value[:nonLitNodeConstraint]
357
+ [value[:nonLitNodeConstraint], value[:_shapeAtomNoRef_4]].compact
358
+ when value[:shapeDefinition]
359
+ [value[:shapeDefinition], value[:_shapeAtomNoRef_5]].compact
360
+ when value[:_shapeAtomNoRef_3]
361
+ value[:_shapeAtomNoRef_3]
362
+ else []
363
+ end
319
364
 
320
- # [19] shapeAtomNoRef ::= nodeConstraint shapeOrRef?
321
- # | shapeDefinition
322
- # | "(" shapeExpression ")"
323
- # | '.' # no constraint
324
- production(:shapeAtomNoRef) do |input, data, callback|
325
- shape_atom(input, data)
365
+ case expressions.length
366
+ when 0 then nil
367
+ when 1 then expressions.first
368
+ else Algebra::And.new(*expressions)
369
+ end
370
+ end
371
+ start_production(:_shapeAtomNoRef_1, as_hash: true)
372
+ start_production(:_shapeAtomNoRef_2, as_hash: true)
373
+ production(:_shapeAtomNoRef_3) do |value|
374
+ value[1][:shapeExpression]
326
375
  end
327
376
 
328
- # [20] inlineShapeAtom ::= nodeConstraint inlineShapeOrRef?
329
- # | inlineShapeOrRef nodeConstraint?
377
+ # [20] inlineShapeAtom ::= nonLitNodeConstraint inlineShapeOrRef?
378
+ # | litNodeConstraint
379
+ # | inlineShapeOrRef nonLitNodeConstraint?
330
380
  # | "(" shapeExpression ")"
331
381
  # | '.' # no constraint
332
- production(:inlineShapeAtom) do |input, data, callback|
333
- shape_atom(input, data)
334
- end
335
- def shape_atom(input, data)
336
- constraint = data[:nodeConstraint]
337
- shape = data[:shapeOrRef] || Array(data[:shapeExpression]).first || data[:shape]
338
- input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
382
+ production(:inlineShapeAtom) do |value|
383
+ expressions = case
384
+ when value == '.' then []
385
+ when value.is_a?(Algebra::Operator)
386
+ [value]
387
+ when value[:nonLitNodeConstraint]
388
+ [value[:nonLitNodeConstraint], value[:_inlineShapeAtom_4]].compact
389
+ when value[:inlineShapeOrRef]
390
+ [value[:inlineShapeOrRef], value[:__inlineShapeAtom_5]].compact
391
+ when value[:_inlineShapeAtom_3]
392
+ value[:_inlineShapeAtom_3]
393
+ else []
394
+ end
339
395
 
340
- expression = [constraint, shape].compact
341
- expression = case expression.length
396
+ case expressions.length
342
397
  when 0 then nil
343
- when 1 then expression.first
344
- else Algebra::And.new(*expression, {})
398
+ when 1 then expressions.first
399
+ else Algebra::And.new(*expressions)
345
400
  end
346
-
347
- (input[:shapeExpression] ||= []) << expression if expression
348
401
  end
349
- private :shape_atom
402
+ start_production(:_inlineShapeAtom_1, as_hash: true)
403
+ start_production(:_inlineShapeAtom_2, as_hash: true)
404
+ production(:_inlineShapeAtom_3) do |value|
405
+ value[1][:shapeExpression]
406
+ end
350
407
 
351
408
  # [21] shapeOrRef ::= shapeDefinition | shapeRef
352
- production(:shapeOrRef) do |input, data, callback|
353
- shape_or_ref(input, data)
354
- end
355
409
  # [22] inlineShapeOrRef ::= inlineShapeDefinition | shapeRef
356
- production(:inlineShapeOrRef) do |input, data, callback|
357
- shape_or_ref(input, data)
358
- end
359
- def shape_or_ref(input, data)
360
- input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
361
- input[:shapeOrRef] = data[:shape] if data[:shape]
362
- rescue ArgumentError => e
363
- error(nil, "Argument Error on ShapeOrRef: #{e.message}")
364
- end
365
- private :shape_or_ref
366
410
 
367
- # [23] shapeRef ::= ATPNAME_LN | ATPNAME_NS | '@' shapeLabel
368
- production(:shapeRef) do |input, data, callback|
369
- input[:shape] = Array(data[:shapeLabel]).first
411
+ # [23] shapeRef ::= ATPNAME_LN | ATPNAME_NS | '@' shapeExprLabel
412
+ production(:shapeRef) do |value|
413
+ value.is_a?(Array) ? value.last[:shapeExprLabel] : value
370
414
  end
371
415
 
372
416
  # [24] litNodeConstraint ::= "LITERAL" xsFacet*
417
+ # | nonLiteralKind stringFacet*
373
418
  # | datatype xsFacet*
374
419
  # | valueSet xsFacet*
375
420
  # | numericFacet+
376
- production(:litNodeConstraint) do |input, data, callback|
377
- # Semantic validate (A Syntax error)
378
- case
379
- when data[:datatype] && data[:numericFacet]
380
- # Can only use a numeric facet on a numeric datatype
381
- l = RDF::Literal.new("1", datatype: data[:datatype])
382
- error(nil, "Numeric facet used on non-numeric datatype: #{data[:datatype]}", production: :nodeConstraint) unless l.is_a?(RDF::Literal::Numeric)
383
- end
384
-
385
- attrs = []
386
- attrs << [:datatype, data[:datatype]] if data [:datatype]
387
- attrs += Array(data[:shapeAtomLiteral])
388
- attrs += Array(data[:valueSetValue])
389
- attrs += Array(data[:numericFacet])
390
- attrs += Array(data[:stringFacet])
391
-
392
- input[:nodeConstraint] = Algebra::NodeConstraint.new(*attrs.compact, {})
421
+ start_production(:_litNodeConstraint_1, as_hash: true, insensitive_strings: :lower)
422
+ production(:_litNodeConstraint_1) do |value|
423
+ facets = value[:_litNodeConstraint_6]
424
+ validate_facets(facets, :litNodeConstraint)
425
+ Algebra::NodeConstraint.new(:literal, *facets)
426
+ end
427
+ start_production(:_litNodeConstraint_2, as_hash: true)
428
+ production(:_litNodeConstraint_2) do |value|
429
+ facets = Array(value[:_litNodeConstraint_7])
430
+ validate_facets(facets, :litNodeConstraint)
431
+ attrs = Array(value[:nonLiteralKind]) + facets
432
+ Algebra::NodeConstraint.new(*attrs.compact)
433
+ end
434
+ start_production(:_litNodeConstraint_3, as_hash: true)
435
+ production(:_litNodeConstraint_3) do |value|
436
+ facets = value[:_litNodeConstraint_8]
437
+ validate_facets(facets, :litNodeConstraint)
438
+ attrs = [[:datatype, value[:datatype]]] + facets
439
+ Algebra::NodeConstraint.new(*attrs.compact)
440
+ end
441
+ start_production(:_litNodeConstraint_4, as_hash: true)
442
+ production(:_litNodeConstraint_4) do |value|
443
+ facets = value[:_litNodeConstraint_9]
444
+ validate_facets(facets, :litNodeConstraint)
445
+ attrs = value[:valueSet]+ facets
446
+ Algebra::NodeConstraint.new(*attrs.compact)
447
+ end
448
+ production(:_litNodeConstraint_5) do |value|
449
+ validate_facets(value, :litNodeConstraint)
450
+ Algebra::NodeConstraint.new(*value)
393
451
  end
394
452
 
395
453
  # [25] nonLitNodeConstraint ::= nonLiteralKind stringFacet*
396
454
  # | stringFacet+
397
- production(:nonLitNodeConstraint) do |input, data, callback|
398
- # Semantic validate (A Syntax error)
399
-
400
- attrs = []
401
- attrs += Array(data[:nonLiteralKind])
402
- attrs += Array(data[:stringFacet])
403
-
404
- input[:nodeConstraint] = Algebra::NodeConstraint.new(*attrs.compact, {})
455
+ start_production(:_nonLitNodeConstraint_1, as_hash: true)
456
+ production(:_nonLitNodeConstraint_1) do |value|
457
+ # nonLiteralKind stringFacet*
458
+ facets = Array(value[:_nonLitNodeConstraint_3])
459
+ validate_facets(facets, :nonLitNodeConstraint)
460
+ attrs = Array(value[:nonLiteralKind]) + facets
461
+ Algebra::NodeConstraint.new(*attrs.compact)
462
+ end
463
+ production(:_nonLitNodeConstraint_2) do |value|
464
+ # stringFacet+
465
+ validate_facets(value, :nonLitNodeConstraint)
466
+ Algebra::NodeConstraint.new(*value)
467
+ end
468
+
469
+ def validate_facets(facets, prod)
470
+ facets.each do |facet|
471
+ if facets.count {|f| f.first == facet.first} > 1
472
+ error(nil, "#{facet.first} constraint may only be used once in a Node Constraint", production: prod)
473
+ end
474
+ end
405
475
  end
476
+ private :validate_facets
406
477
 
407
478
  # [26] nonLiteralKind ::= "IRI" | "BNODE" | "NONLITERAL"
479
+ start_production(:nonLiteralKind, insensitive_strings: :lower)
480
+ production(:nonLiteralKind) do |value|
481
+ value.downcase.to_sym
482
+ end
408
483
 
409
484
  # [27] xsFacet ::= stringFacet | numericFacet
410
485
  # [28] stringFacet ::= stringLength INTEGER
411
486
  # | REGEXP
412
- production(:stringFacet) do |input, data, callback|
413
- input[:stringFacet] ||= []
414
- input[:stringFacet] << if data[:stringLength]
415
- if input[:stringFacet].flatten.include?(data[:stringLength])
416
- error(nil, "#{data[:stringLength]} constraint may only be used once in a Node Constraint", production: :stringFacet)
417
- end
418
- [data[:stringLength], data[:literal]]
419
- elsif re = data[:regexp]
420
- unless re =~ %r(^/(.*)/([smix]*)$)
421
- error(nil, "#{re.inspect} regular expression must be in the form /pattern/flags?", production: :stringFacet)
487
+ production(:stringFacet) do |value|
488
+ if value.is_a?(Array) # stringLength
489
+ value
490
+ else
491
+ unless value =~ %r(^/(.*)/([smix]*)$)
492
+ error(nil, "#{value.inspect} regular expression must be in the form /pattern/flags?", production: :stringFacet)
422
493
  end
494
+
423
495
  flags = $2 unless $2.to_s.empty?
424
496
  pattern = $1.gsub('\\/', '/').gsub(UCHAR) do
425
497
  [($1 || $2).hex].pack('U*')
426
498
  end.force_encoding(Encoding::UTF_8)
427
499
 
428
500
  # Any other escaped character is a syntax error
429
- if pattern.match(%r([^\\]\\[^nrt/\\|\.?*+\[\]\(\){}$#x2D#x5B#x5D#x5E-]))
501
+ if pattern.match?(%r([^\\]\\[^nrt/\\|\.?*+\[\]\(\){}$#x2D#x5B#x5D#x5E-]))
430
502
  error(nil, "Regexp contains illegal escape: #{pattern.inspect}", production: :stringFacet)
431
503
  end
432
504
 
433
505
  [:pattern, pattern, flags].compact
434
506
  end
435
507
  end
508
+ start_production(:_stringFacet_1, as_hash: true)
509
+ production(:_stringFacet_1) do |value|
510
+ [value[:stringLength].downcase.to_sym, value[:INTEGER]]
511
+ end
436
512
 
437
513
  # [29] stringLength ::= "LENGTH" | "MINLENGTH" | "MAXLENGTH"
514
+ start_production(:stringLength, insensitive_strings: :lower)
438
515
 
439
- # [30] numericFacet ::= numericRange (numericLiteral | string '^^' datatype )
516
+ # [30] numericFacet ::= numericRange numericLiteral
440
517
  # | numericLength INTEGER
441
- production(:numericFacet) do |input, data, callback|
442
- input[:numericFacet] ||= []
443
- input[:numericFacet] << if data[:numericRange]
444
- literal = data[:literal] || literal(data[:string], datatype: data[:datatype])
445
- error(nil, "numericRange must use a numeric datatype: #{data[:datatype]}", production: :numericFacet) unless literal.is_a?(RDF::Literal::Numeric)
446
- [data[:numericRange], literal]
447
- elsif data[:numericLength]
448
- [data[:numericLength], data[:literal]]
449
- end
518
+ start_production(:_numericFacet_1, as_hash: true)
519
+ production(:_numericFacet_1) do |value|
520
+ [value[:numericRange].downcase.to_sym, value[:numericLiteral]]
521
+ end
522
+ start_production(:_numericFacet_2, as_hash: true)
523
+ production(:_numericFacet_2) do |value|
524
+ [value[:numericLength].downcase.to_sym, value[:INTEGER]]
450
525
  end
451
526
 
452
527
  # [31] numericRange ::= "MININCLUSIVE" | "MINEXCLUSIVE" | "MAXINCLUSIVE" | "MAXEXCLUSIVE"
528
+ start_production(:numericRange, insensitive_strings: :lower)
529
+
453
530
  # [32] numericLength ::= "TOTALDIGITS" | "FRACTIONDIGITS"
531
+ start_production(:numericLength, insensitive_strings: :lower)
454
532
 
455
533
  # [33] shapeDefinition ::= (includeSet | extraPropertySet | "CLOSED")* '{' tripleExpression? '}' annotation* semanticActions
456
- production(:shapeDefinition) do |input, data, callback|
457
- shape_definition(input, data)
534
+ start_production(:shapeDefinition, as_hash: true)
535
+ production(:shapeDefinition) do |value|
536
+ shape_definition(
537
+ value[:_shapeDefinition_1],
538
+ value[:_shapeDefinition_2],
539
+ value[:_shapeDefinition_3],
540
+ value[:semanticActions])
458
541
  end
542
+ start_production(:_shapeDefinition_4, insensitive_strings: :lower)
543
+
459
544
  # [34] inlineShapeDefinition ::= (includeSet | extraPropertySet | "CLOSED")* '{' tripleExpression? '}'
460
- production(:inlineShapeDefinition) do |input, data, callback|
461
- shape_definition(input, data)
462
- end
463
- def shape_definition(input, data)
464
- # FIXME: includeSet
465
- expression = data[:tripleExpression]
466
- attrs = Array(data[:extraPropertySet])
467
- attrs << :closed if data[:closed]
545
+ start_production(:inlineShapeDefinition, as_hash: true)
546
+ production(:inlineShapeDefinition) do |value|
547
+ shape_definition(
548
+ value[:_inlineShapeDefinition_1],
549
+ value[:_inlineShapeDefinition_2])
550
+ end
551
+ def shape_definition(extra_closed, expression, annotations = [], semact = [])
552
+ closed = extra_closed.any? {|v| v.to_s.downcase == 'closed'}
553
+ extra = extra_closed.reject {|v| v.to_s.downcase == 'closed'}
554
+ attrs = extra
555
+ attrs << :closed if closed
468
556
  attrs << expression if expression
469
- attrs += Array(data[:annotation])
470
- attrs += Array(data[:codeDecl])
557
+ attrs += annotations
558
+ attrs += semact
471
559
 
472
- input[:shape] = Algebra::Shape.new(*attrs, {})
560
+ Algebra::Shape.new(*attrs)
473
561
  end
474
562
  private :shape_definition
475
563
 
476
564
  # [35] extraPropertySet ::= "EXTRA" predicate+
477
- production(:extraPropertySet) do |input, data, callback|
478
- (input[:extraPropertySet] ||= []) << data[:predicate].unshift(:extra)
565
+ start_production(:extraPropertySet, insensitive_strings: :lower)
566
+ production(:extraPropertySet) do |value|
567
+ value.last[:_extraPropertySet_1].unshift(:extra)
479
568
  end
480
569
 
481
570
  # [36] tripleExpression ::= oneOfTripleExpr
482
- # [37] oneOfTripleExpr ::= groupTripleExpr ('|' groupTripleExpr)*
483
- production(:oneOfTripleExpr) do |input, data, callback|
484
- expression = if Array(data[:tripleExpression]).length > 1
485
- Algebra::OneOf.new(*data[:tripleExpression], {})
486
- else
487
- Array(data[:tripleExpression]).first
488
- end
489
- input[:tripleExpression] = expression if expression
571
+ production(:tripleExpression) do |value|
572
+ value.first[:oneOfTripleExpr]
490
573
  end
491
574
 
492
- # [40] groupTripleExpr ::= unaryTripleExpr (';' unaryTripleExpr?)*
493
- production(:groupTripleExpr) do |input, data, callback|
494
- expression = if Array(data[:tripleExpression]).length > 1
495
- Algebra::EachOf.new(*data[:tripleExpression], {})
496
- else
497
- Array(data[:tripleExpression]).first
498
- end
499
- (input[:tripleExpression] ||= []) << expression if expression
575
+ # [37] oneOfTripleExpr ::= groupTripleExpr ('|' groupTripleExpr)*
576
+ start_production(:oneOfTripleExpr, as_hash: true)
577
+ production(:oneOfTripleExpr) do |value|
578
+ expressions = [value[:groupTripleExpr]] + value[:_oneOfTripleExpr_1]
579
+ expressions.length == 1 ? expressions.first : Algebra::OneOf.new(*expressions)
580
+ end
581
+ production(:_oneOfTripleExpr_2) do |value|
582
+ value.last[:groupTripleExpr]
583
+ end
584
+
585
+ # [40] groupTripleExpr ::= unaryTripleExpr (';' unaryTripleExpr?)*
586
+ start_production(:groupTripleExpr, as_hash: true)
587
+ production(:groupTripleExpr) do |value|
588
+ expressions = [value[:unaryTripleExpr]] + value[:_groupTripleExpr_1]
589
+ expressions.length == 1 ? expressions.first : Algebra::EachOf.new(*expressions)
590
+ end
591
+ production(:_groupTripleExpr_2) do |value|
592
+ value.last[:_groupTripleExpr_3]
500
593
  end
501
594
 
502
595
  # [43] unaryTripleExpr ::= productionLabel? (tripleConstraint | bracketedTripleExpr) | include
503
- production(:unaryTripleExpr) do |input, data, callback|
504
- expression = data[:tripleExpression]
505
- expression.id = data[:productionLabel] if expression && data[:productionLabel]
596
+ start_production(:_unaryTripleExpr_1, as_hash: true)
597
+ production(:_unaryTripleExpr_1) do |value|
598
+ expression = value[:_unaryTripleExpr_3]
599
+ expression.id = value[:_unaryTripleExpr_2] if expression && value[:_unaryTripleExpr_2]
506
600
 
507
- (input[:tripleExpression] ||= []) << expression if expression
601
+ expression
508
602
  end
509
603
 
510
604
  # [43a] productionLabel ::= '$' (iri | blankNode)
511
- production(:productionLabel) do |input, data, callback|
512
- input[:productionLabel] = data[:iri] || data[:blankNode]
605
+ production(:productionLabel) do |value|
606
+ value.last[:_productionLabel_1]
513
607
  end
514
608
 
515
609
  # [44] bracketedTripleExpr ::= '(' oneOfTripleExpr ')' cardinality? annotation* semanticActions
516
- production(:bracketedTripleExpr) do |input, data, callback|
610
+ start_production(:bracketedTripleExpr, as_hash: true)
611
+ production(:bracketedTripleExpr) do |value|
517
612
  # XXX cardinality? annotation* semanticActions
518
- case expression = data[:tripleExpression]
613
+ case expression = value[:oneOfTripleExpr]
519
614
  when Algebra::OneOf, Algebra::EachOf
520
615
  else
521
616
  error(nil, "Bracketed Expression requires multiple contained expressions", production: :bracketedTripleExpr)
522
617
  end
523
- cardinality = data.fetch(:cardinality, {})
618
+ cardinality = value[:_bracketedTripleExpr_1] || {}
524
619
  attrs = [
525
620
  ([:min, cardinality[:min]] if cardinality[:min]),
526
621
  ([:max, cardinality[:max]] if cardinality[:max])
527
622
  ].compact
528
- attrs += Array(data[:codeDecl])
529
- attrs += Array(data[:annotation])
623
+ attrs += value[:semanticActions]
624
+ attrs += Array(value[:_bracketedTripleExpr_2])
530
625
 
531
626
  expression.operands.concat(attrs)
532
- input[:tripleExpression] = expression
627
+ expression
533
628
  end
534
629
 
535
- # [45] tripleConstraint ::= senseFlags? predicate shapeExpression cardinality? annotation* semanticActions
536
- production(:tripleConstraint) do |input, data, callback|
537
- cardinality = data.fetch(:cardinality, {})
630
+ # [45] tripleConstraint ::= senseFlags? predicate inlineShapeExpression cardinality? annotation* semanticActions
631
+ start_production(:tripleConstraint, as_hash: true)
632
+ production(:tripleConstraint) do |value|
633
+ cardinality = value[:_tripleConstraint_2] || {}
538
634
  attrs = [
539
- (:inverse if data[:inverse] || data[:not]),
540
- [:predicate, Array(data[:predicate]).first],
541
- Array(data[:shapeExpression]).first,
635
+ (:inverse if value[:_tripleConstraint_1]),
636
+ [:predicate, value[:predicate]],
637
+ value[:inlineShapeExpression],
542
638
  ([:min, cardinality[:min]] if cardinality[:min]),
543
639
  ([:max, cardinality[:max]] if cardinality[:max])
544
640
  ].compact
545
- attrs += Array(data[:codeDecl])
546
- attrs += Array(data[:annotation])
641
+ attrs += value[:_tripleConstraint_3]
642
+ attrs += value[:semanticActions]
547
643
 
548
- input[:tripleExpression] = Algebra::TripleConstraint.new(*attrs, {}) unless attrs.empty?
644
+ Algebra::TripleConstraint.new(*attrs) # unless attrs.empty?
549
645
  end
550
646
 
551
647
  # [46] cardinality ::= '*' | '+' | '?' | REPEAT_RANGE
648
+ production(:cardinality) do |value|
649
+ case value
650
+ when '*' then {min: 0, max: "*"}
651
+ when '+' then {min: 1, max: "*"}
652
+ when '?' then {min: 0, max: 1}
653
+ else value
654
+ end
655
+ end
656
+
552
657
  # [47] senseFlags ::= '^'
553
658
  # [48] valueSet ::= '[' valueSetValue* ']'
659
+ production(:valueSet) do |value|
660
+ value[1][:_valueSet_1]
661
+ end
554
662
 
555
663
  # [49] valueSetValue ::= iriRange | literalRange | languageRange | '.' exclusion+
556
- production(:valueSetValue) do |input, data, callback|
557
- range = data[:iriRange] || data[:literalRange] || data[:languageRange]
558
- if !range
559
- # All exclusions must be consistent IRI/Literal/Language
560
- case data[:exclusion].first
561
- when Algebra::IriStem, RDF::URI
562
- unless data[:exclusion].all? {|e| e.is_a?(Algebra::IriStem) || e.is_a?(RDF::URI)}
563
- error(nil, "Exclusions must all be IRI type")
564
- end
565
- range = Algebra::IriStemRange.new(:wildcard, data[:exclusion].unshift(:exclusions))
566
- when Algebra::LiteralStem, RDF::Literal
567
- unless data[:exclusion].all? {|e| e.is_a?(Algebra::LiteralStem) || e.is_a?(RDF::Literal)}
568
- error(nil, "Exclusions must all be Literal type")
569
- end
570
- range = Algebra::LiteralStemRange.new(:wildcard, data[:exclusion].unshift(:exclusions))
571
- else
572
- unless data[:exclusion].all? {|e| e.is_a?(Algebra::LanguageStem) || e.is_a?(String)}
573
- error(nil, "Exclusions must all be Language type")
574
- end
575
- range = Algebra::LanguageStemRange.new(:wildcard, data[:exclusion].unshift(:exclusions))
664
+ production(:valueSetValue) do |value|
665
+ Algebra::Value.new(value)
666
+ end
667
+ production(:_valueSetValue_1) do |value|
668
+ # All exclusions must be consistent IRI/Literal/Language
669
+ value = value.last[:_valueSetValue_2]
670
+ case value.first
671
+ when Algebra::IriStem, RDF::URI
672
+ unless value.all? {|e| e.is_a?(Algebra::IriStem) || e.is_a?(RDF::URI)}
673
+ error(nil, "Exclusions must all be IRI type")
674
+ end
675
+ Algebra::IriStemRange.new(:wildcard, value.unshift(:exclusions))
676
+ when Algebra::LiteralStem, RDF::Literal
677
+ unless value.all? {|e| e.is_a?(Algebra::LiteralStem) || e.is_a?(RDF::Literal)}
678
+ error(nil, "Exclusions must all be Literal type")
576
679
  end
680
+ Algebra::LiteralStemRange.new(:wildcard, value.unshift(:exclusions))
681
+ else
682
+ unless value.all? {|e| e.is_a?(Algebra::LanguageStem) || e.is_a?(String)}
683
+ error(nil, "Exclusions must all be Language type")
684
+ end
685
+ Algebra::LanguageStemRange.new(:wildcard, value.unshift(:exclusions))
577
686
  end
578
- (input[:valueSetValue] ||= []) << Algebra::Value.new(range)
579
687
  end
580
688
 
581
689
  # [50] exclusion ::= '-' (iri | literal | LANGTAG) '~'?
582
- production(:exclusion) do |input, data, callback|
583
- (input[:exclusion] ||= []) << if data[:pattern]
584
- case
585
- when data[:iri] then Algebra::IriStem.new(data[:iri])
586
- when data[:literal] then Algebra::LiteralStem.new(data[:literal])
587
- when data[:language] then Algebra::LanguageStem.new(data[:language])
690
+ start_production(:exclusion, as_hash: true)
691
+ production(:exclusion) do |value|
692
+ if value[:_exclusion_2]
693
+ case value[:_exclusion_1]
694
+ when RDF::URI then Algebra::IriStem.new(value[:_exclusion_1])
695
+ when RDF::Literal then Algebra::LiteralStem.new(value[:_exclusion_1])
696
+ else Algebra::LanguageStem.new(value[:_exclusion_1])
588
697
  end
589
698
  else
590
- data[:iri] || data[:literal] || data[:language]
699
+ value[:_exclusion_1]
591
700
  end
592
701
  end
593
702
 
594
703
  # [51] iriRange ::= iri ('~' iriExclusion*)?
595
- production(:iriRange) do |input, data, callback|
596
- exclusions = data[:exclusion].unshift(:exclusions) if data[:exclusion]
597
- input[:iriRange] = if data[:pattern] && exclusions
598
- Algebra::IriStemRange.new(data[:iri], exclusions)
599
- elsif data[:pattern]
600
- Algebra::IriStem.new(data[:iri])
601
- elsif data[:dot]
602
- Algebra::IriStemRange.new(:wildcard, exclusions)
704
+ production(:iriRange) do |value|
705
+ iri = value.first[:iri]
706
+ if value.last[:_iriRange_1]
707
+ exclusions = value.last[:_iriRange_1].last[:_iriRange_3]
708
+ if exclusions.empty?
709
+ Algebra::IriStem.new(iri)
710
+ else
711
+ Algebra::IriStemRange.new(iri, exclusions.unshift(:exclusions))
712
+ end
603
713
  else
604
- data[:iri]
714
+ iri
605
715
  end
606
716
  end
607
717
 
608
718
  # [52] iriExclusion ::= '-' iri '~'?
609
- production(:iriExclusion) do |input, data, callback|
610
- val = data[:iri]
611
- (input[:exclusion] ||= []) << (data[:pattern] ? Algebra::IriStem.new(val) : val)
719
+ start_production(:iriExclusion, as_hash: true)
720
+ production(:iriExclusion) do |value|
721
+ value[:_iriExclusion_1] ? Algebra::IriStem.new(value[:iri]) : value[:iri]
612
722
  end
613
723
 
614
724
  # [53] literalRange ::= literal ('~' literalExclusion*)?
615
- production(:literalRange) do |input, data, callback|
616
- exclusions = data[:exclusion].unshift(:exclusions) if data[:exclusion]
617
- input[:literalRange] = if data[:pattern] && exclusions
618
- Algebra::LiteralStemRange.new(data[:literal], exclusions)
619
- elsif data[:pattern]
620
- Algebra::LiteralStem.new(data[:literal])
621
- elsif data[:dot]
622
- Algebra::LiteralStemRange.new(:wildcard, exclusions)
725
+ production(:literalRange) do |value|
726
+ lit = value.first[:literal]
727
+ if value.last[:_literalRange_1]
728
+ exclusions = value.last[:_literalRange_1].last[:_literalRange_3]
729
+ # FIXME Algebra::LiteralStemRange.new(:wildcard, exclusions)
730
+ if exclusions.empty?
731
+ Algebra::LiteralStem.new(lit)
732
+ else
733
+ Algebra::LiteralStemRange.new(lit, exclusions.unshift(:exclusions))
734
+ end
623
735
  else
624
- data[:literal]
736
+ lit
625
737
  end
626
738
  end
627
739
 
628
740
  # [54] literalExclusion ::= '-' literal '~'?
629
- production(:literalExclusion) do |input, data, callback|
630
- val = data[:literal]
631
- (input[:exclusion] ||= []) << (data[:pattern] ? Algebra::LiteralStem.new(val) : val)
741
+ start_production(:literalExclusion, as_hash: true)
742
+ production(:literalExclusion) do |value|
743
+ val = value[:literal]
744
+ value[:_literalExclusion_1] ? Algebra::LiteralStem.new(val) : val
632
745
  end
633
746
 
634
747
  # [55] languageRange ::= LANGTAG ('~' languageExclusion*)?
635
- production(:languageRange) do |input, data, callback|
636
- exclusions = data[:exclusion].unshift(:exclusions) if data[:exclusion]
637
- input[:languageRange] = if data[:pattern] && exclusions
638
- Algebra::LanguageStemRange.new(data[:language], exclusions)
639
- elsif data[:pattern]
640
- Algebra::LanguageStem.new(data[:language])
641
- elsif data[:dot]
642
- Algebra::LanguageStemRange.new(:wildcard, exclusions)
748
+ start_production(:languageRange, as_hash: true)
749
+ production(:languageRange) do |value|
750
+ exclusions = value[:_languageRange_1] if value[:_languageRange_1]
751
+ pattern = !!value[:_languageRange_1]
752
+ if pattern && exclusions.empty?
753
+ Algebra::LanguageStem.new(value[:LANGTAG])
754
+ elsif pattern
755
+ Algebra::LanguageStemRange.new(value[:LANGTAG], exclusions.unshift(:exclusions))
643
756
  else
644
- Algebra::Language.new(data[:language])
757
+ Algebra::Language.new(value[:LANGTAG])
645
758
  end
646
759
  end
760
+ start_production(:_languageRange_2, as_hash: true)
761
+ production(:_languageRange_2) do |value|
762
+ value[:_languageRange_3]
763
+ end
647
764
 
648
- # [56] languageExclusion ::= '-' literal '~'?
649
- production(:languageExclusion) do |input, data, callback|
650
- val = data[:language]
651
- (input[:exclusion] ||= []) << (data[:pattern] ? Algebra::LanguageStem.new(val) : val)
765
+ # [56] languageExclusion ::= '-' LANGTAG '~'?
766
+ start_production(:languageExclusion, as_hash: true)
767
+ production(:languageExclusion) do |value|
768
+ val = value[:LANGTAG]
769
+ value[:_languageExclusion_1] ? Algebra::LanguageStem.new(val) : val
652
770
  end
653
771
 
654
- # [57] include ::= '&' shapeLabel
655
- production(:include) do |input, data, callback|
656
- input[:tripleExpression] = data[:shapeLabel].first
772
+ # [57] include ::= '&' tripleExprLabel
773
+ production(:include) do |value|
774
+ value.last[:tripleExprLabel]
657
775
  end
658
776
 
659
777
  # [58] annotation ::= '//' predicate (iri | literal)
660
- production(:annotation) do |input, data, callback|
661
- annotation = Algebra::Annotation.new([:predicate, data[:predicate].first], (data[:iri] || data[:literal]))
662
- (input[:annotation] ||= []) << annotation
778
+ start_production(:annotation, as_hash: true)
779
+ production(:annotation) do |value|
780
+ Algebra::Annotation.new([:predicate, value[:predicate]], value[:_annotation_1])
663
781
  end
664
782
 
665
783
  # [59] semanticActions ::= codeDecl*
666
784
 
667
785
  # [60] codeDecl ::= '%' iri (CODE | "%")
668
- production(:codeDecl) do |input, data, callback|
669
- (input[:codeDecl] ||= []) << Algebra::SemAct.new(*[data[:iri], data[:code]].compact, {})
786
+ start_production(:codeDecl, as_hash: true)
787
+ production(:codeDecl) do |value|
788
+ code = value[:_codeDecl_1] unless value[:_codeDecl_1] == '%'
789
+ Algebra::SemAct.new(*[value[:iri], code].compact)
670
790
  end
671
791
 
672
792
  # [13t] literal ::= rdfLiteral | numericLiteral | booleanLiteral
673
793
 
674
794
  # [61] predicate ::= iri | RDF_TYPE
675
- production(:predicate) do |input, data, callback|
676
- (input[:predicate] ||= []) << data[:iri]
795
+ production(:predicate) do |value|
796
+ value
677
797
  end
678
798
 
679
799
  # [62] datatype ::= iri
680
- production(:datatype) do |input, data, callback|
681
- input[:datatype] = data[:iri]
682
- end
683
-
684
- # [63] shapeLabel ::= iri | blankNode
685
- production(:shapeLabel) do |input, data, callback|
686
- (input[:shapeLabel] ||= []) << (data[:iri] || data[:blankNode])
800
+ production(:datatype) do |value|
801
+ value.first[:iri]
687
802
  end
688
803
 
804
+ # [63] shapeExprLabel ::= iri | blankNode
689
805
  # [16t] numericLiteral ::= INTEGER | DECIMAL | DOUBLE
690
- # [129s] rdfLiteral ::= langString | string ('^^' datatype)?
691
- production(:rdfLiteral) do |input, data, callback|
692
- input[:literal] = literal(data[:string], data)
806
+ # [65] rdfLiteral ::= langString | string ('^^' datatype)?
807
+ production(:rdfLiteral) do |value|
808
+ literal(*value)
809
+ end
810
+ start_production(:_rdfLiteral_1, as_hash: true)
811
+ production(:_rdfLiteral_1) do |value|
812
+ [value[:string], {datatype: value[:_rdfLiteral_2]}]
813
+ end
814
+ production(:_rdfLiteral_3) do |value|
815
+ value.last[:datatype]
693
816
  end
694
817
 
695
818
  # [134s] booleanLiteral ::= 'true' | 'false'
819
+ production(:booleanLiteral) do |value|
820
+ literal(value == 'true')
821
+ end
822
+
696
823
  # [135s] string ::= STRING_LITERAL1 | STRING_LITERAL_LONG1
697
824
  # | STRING_LITERAL2 | STRING_LITERAL_LONG2
698
825
  # [66] langString ::= LANG_STRING_LITERAL1 | LANG_STRING_LITERAL_LONG1
699
826
  # | LANG_STRING_LITERAL2 | LANG_STRING_LITERAL_LONG2
700
827
  # [136s] iri ::= IRIREF | prefixedName
701
828
  # [1372] prefixedName ::= PNAME_LN | PNAME_NS
829
+ production(:prefixedName) do |value|
830
+ value.is_a?(RDF::URI) ? value : ns(value, '')
831
+ end
832
+
702
833
  # [138s] blankNode ::= BLANK_NODE_LABEL
834
+ production(:blankNode) do |value|
835
+ value.first[:BLANK_NODE_LABEL]
836
+ end
703
837
 
704
838
  ##
705
839
  # Initializes a new parser instance.
@@ -730,7 +864,7 @@ module ShEx
730
864
  # @raise [ShEx::NotSatisfied] if not satisfied
731
865
  # @raise [ShEx::ParseError] when a syntax error is detected
732
866
  # @raise [ShEx::StructureError, ArgumentError] on structural problems with schema
733
- def initialize(input = nil, options = {}, &block)
867
+ def initialize(input = nil, **options, &block)
734
868
  @input = case input
735
869
  when IO, StringIO then input.read
736
870
  else input.to_s.dup
@@ -758,7 +892,7 @@ module ShEx
758
892
  @result.to_sxp
759
893
  end
760
894
 
761
- alias_method :ll1_parse, :parse
895
+ alias_method :peg_parse, :parse
762
896
 
763
897
  # Parse query
764
898
  #
@@ -775,49 +909,19 @@ module ShEx
775
909
  # @return [ShEx::Algebra::Schema] The executable parsed expression.
776
910
  # @raise [ShEx::ParseError] when a syntax error is detected
777
911
  # @raise [ShEx::StructureError, ArgumentError] on structural problems with schema
778
- # @see http://www.w3.org/TR/sparql11-query/#sparqlAlgebra
779
- # @see http://axel.deri.ie/sparqltutorial/ESWC2007_SPARQL_Tutorial_unit2b.pdf
780
- def parse(prod = START)
781
- ll1_parse(@input, prod.to_sym, @options.merge(branch: BRANCH,
782
- first: FIRST,
783
- follow: FOLLOW,
784
- whitespace: WS)
785
- ) do |context, *data|
786
- case context
787
- when :trace
788
- if options[:logger]
789
- level, lineno, depth, *args = data
790
- case level
791
- when 0
792
- log_error(*args, depth: depth, lineno: lineno)
793
- when 1
794
- log_warn(*args, depth: depth, lineno: lineno)
795
- when 2
796
- log_info(*args, depth: depth, lineno: lineno)
797
- else
798
- log_debug(*args, depth: depth, lineno: lineno)
799
- end
800
- end
801
- end
802
- end
803
-
804
- # The last thing on the @prod_data stack is the result
805
- @result = case
806
- when !prod_data.is_a?(Hash)
807
- prod_data
808
- when prod_data.empty?
809
- nil
810
- when prod_data[:schema]
811
- prod_data[:schema]
812
- else
813
- key = prod_data.keys.first
814
- [key] + Array(prod_data[key]) # Creates [:key, [:triple], ...]
815
- end
912
+ # @see https://www.w3.org/TR/sparql11-query/#sparqlAlgebra
913
+ # @see https://axel.deri.ie/sparqltutorial/ESWC2007_SPARQL_Tutorial_unit2b.pdf
914
+ def parse(prod = :shexDoc)
915
+ @result = peg_parse(@input,
916
+ prod.to_sym,
917
+ ShEx::Meta::RULES,
918
+ whitespace: WS,
919
+ **@options)
816
920
 
817
921
  # Validate resulting expression
818
922
  @result.validate! if @result && validate?
819
923
  @result
820
- rescue EBNF::LL1::Parser::Error, EBNF::LL1::Lexer::Error => e
924
+ rescue EBNF::PEG::Parser::Error, EBNF::LL1::Lexer::Error => e
821
925
  raise ShEx::ParseError, e.message, e.backtrace
822
926
  end
823
927
 
@@ -929,7 +1033,7 @@ module ShEx
929
1033
  end
930
1034
 
931
1035
  # Create a literal
932
- def literal(value, options = {})
1036
+ def literal(value, **options)
933
1037
  options = options.dup
934
1038
  # Internal representation is to not use xsd:string, although it could arguably go the other way.
935
1039
  options.delete(:datatype) if options[:datatype] == RDF::XSD.string
@@ -938,7 +1042,7 @@ module ShEx
938
1042
  "options: #{options.inspect}, " +
939
1043
  "validate: #{validate?.inspect}, "
940
1044
  end
941
- RDF::Literal.new(value, options.merge(validate: validate?))
1045
+ RDF::Literal.new(value, **options.merge(validate: validate?))
942
1046
  end
943
1047
  end # class Parser
944
1048
  end # module ShEx