shex 0.5.2 → 0.6.3

Sign up to get free protection for your applications and to get access to all the features.
data/lib/shex/parser.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  require 'ebnf'
3
- require 'ebnf/ll1/parser'
3
+ require 'ebnf/peg/parser'
4
4
  require 'shex/meta'
5
5
 
6
6
  module ShEx
@@ -8,11 +8,10 @@ module ShEx
8
8
  # A parser for the ShEx grammar.
9
9
  #
10
10
  # @see https://www.w3.org/2005/01/yacker/uploads/ShEx3?lang=perl&markup=html#productions
11
- # @see http://en.wikipedia.org/wiki/LR_parser
11
+ # @see https://en.wikipedia.org/wiki/LR_parser
12
12
  class Parser
13
- include ShEx::Meta
14
13
  include ShEx::Terminals
15
- include EBNF::LL1::Parser
14
+ include EBNF::PEG::Parser
16
15
  include RDF::Util::Logger
17
16
 
18
17
  ##
@@ -22,13 +21,13 @@ module ShEx
22
21
  attr_reader :options
23
22
 
24
23
  ##
25
- # The current input string being processed.
24
+ # The current input string being processed. XXX
26
25
  #
27
26
  # @return [String]
28
27
  attr_accessor :input
29
28
 
30
29
  ##
31
- # The current input tokens being processed.
30
+ # The current input tokens being processed. XXX
32
31
  #
33
32
  # @return [Array<Token>]
34
33
  attr_reader :tokens
@@ -37,669 +36,804 @@ module ShEx
37
36
  # The internal representation of the result using hierarchy of RDF objects and ShEx::Operator
38
37
  # objects.
39
38
  # @return [Array]
40
- # @see http://sparql.rubyforge.org/algebra
39
+ # @see https://www.rubydoc.info/github/ruby-rdf/sparql/SPARQL/Algebra
41
40
  attr_accessor :result
42
41
 
43
42
  # Terminals passed to lexer. Order matters!
44
- terminal(:CODE, CODE, unescape: true) do |prod, token, input|
43
+ terminal(:CODE, CODE, unescape: true) do |value|
45
44
  # { foo %}
46
45
  # Keep surrounding whitespace for now
47
- input[:code] = token.value[1..-2].sub(/%\s*$/, '') # Drop {} and %
46
+ value[1..-2].sub(/%\s*$/, '') # Drop {} and %
48
47
  end
49
- terminal(:REPEAT_RANGE, REPEAT_RANGE) do |prod, token, input|
50
- card = token.value[1..-2].split(',').map {|v| v =~ /^\d+$/ ? v.to_i : v}
51
- card[1] = token.value.include?(',') ? '*' : card[0] if card.length == 1
52
- input[:cardinality] = {min: card[0], max: card[1]}
48
+ terminal(:REPEAT_RANGE, REPEAT_RANGE) do |value|
49
+ card = value[1..-2].split(',').map {|v| v =~ /^\d+$/ ? v.to_i : v}
50
+ card[1] = value.include?(',') ? '*' : card[0] if card.length == 1
51
+ {min: card[0], max: card[1]}
53
52
  end
54
- terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL) do |prod, token, input|
55
- input[:blankNode] = bnode(token.value[2..-1])
53
+ terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL) do |value|
54
+ bnode(value[2..-1])
56
55
  end
57
- terminal(:IRIREF, IRIREF, unescape: true) do |prod, token, input|
56
+ terminal(:IRIREF, IRIREF, unescape: true) do |value|
58
57
  begin
59
- input[:iri] = iri(token.value[1..-2])
58
+ iri(value[1..-2])
60
59
  rescue ArgumentError => e
61
60
  raise Error, e.message
62
61
  end
63
62
  end
64
- terminal(:DOUBLE, DOUBLE) do |prod, token, input|
63
+ terminal(:DOUBLE, DOUBLE) do |value|
65
64
  # Note that a Turtle Double may begin with a '.[eE]', so tack on a leading
66
65
  # zero if necessary
67
- value = token.value.sub(/\.([eE])/, '.0\1')
68
- input[:literal] = literal(value, datatype: RDF::XSD.double)
66
+ value = value.sub(/\.([eE])/, '.0\1')
67
+ literal(value, datatype: RDF::XSD.double)
69
68
  end
70
- terminal(:DECIMAL, DECIMAL) do |prod, token, input|
69
+ terminal(:DECIMAL, DECIMAL) do |value|
71
70
  # Note that a Turtle Decimal may begin with a '.', so tack on a leading
72
71
  # zero if necessary
73
- value = token.value
74
- #value = "0#{token.value}" if token.value[0,1] == "."
75
- input[:literal] = literal(value, datatype: RDF::XSD.decimal)
72
+ literal(value, datatype: RDF::XSD.decimal)
76
73
  end
77
- terminal(:INTEGER, INTEGER) do |prod, token, input|
78
- input[:literal] = literal(token.value, datatype: RDF::XSD.integer)
74
+ terminal(:INTEGER, INTEGER) do |value|
75
+ literal(value, datatype: RDF::XSD.integer)
79
76
  end
80
- terminal(:PNAME_LN, PNAME_LN, unescape: true) do |prod, token, input|
81
- prefix, suffix = token.value.split(":", 2)
82
- input[:iri] = ns(prefix, suffix)
83
- error(nil, "Compact IRI missing prefix definition: #{token.value}", production: :PNAME_LN) unless prefix(prefix)
77
+ terminal(:PNAME_LN, PNAME_LN, unescape: true) do |value|
78
+ prefix, suffix = value.split(":", 2)
79
+ error(nil, "Compact IRI missing prefix definition: #{prefix}", production: :PNAME_LN) unless prefix(prefix)
80
+ ns(prefix, suffix)
84
81
  end
85
- terminal(:PNAME_NS, PNAME_NS) do |prod, token, input|
86
- prefix = token.value[0..-2]
87
-
88
- input[:iri] = ns(prefix, nil)
89
- input[:prefix] = prefix && prefix.to_sym
82
+ terminal(:PNAME_NS, PNAME_NS) do |value|
83
+ value[0..-2]
90
84
  end
91
- terminal(:ATPNAME_LN, ATPNAME_LN, unescape: true) do |prod, token, input|
92
- prefix, suffix = token.value.split(":", 2)
85
+ terminal(:ATPNAME_LN, ATPNAME_LN, unescape: true) do |value, parent_prod|
86
+ prefix, suffix = value.split(":", 2)
93
87
  prefix.sub!(/^@#{WS}*/, '')
94
- input[:shapeLabel] = ns(prefix, suffix)
95
- error(nil, "Compact IRI missing prefix definition: #{token.value}", production: :ATPNAME_LN) unless input[:shapeLabel].absolute?
88
+ ns(prefix, suffix)
96
89
  end
97
- terminal(:ATPNAME_NS, ATPNAME_NS) do |prod, token, input|
98
- prefix = token.value[0..-2]
90
+ terminal(:ATPNAME_NS, ATPNAME_NS) do |value|
91
+ prefix = value[0..-2]
99
92
  prefix.sub!(/^@\s*/, '')
100
93
 
101
- input[:shapeLabel] = ns(prefix, nil)
102
- end
103
- terminal(:LANGTAG, LANGTAG) do |prod, token, input|
104
- input[:language] = token.value[1..-1]
105
- end
106
- terminal(:LANG_STRING_LITERAL_LONG1, LANG_STRING_LITERAL_LONG1, unescape: true) do |prod, token, input|
107
- input[:string], _, input[:language] = token.value[3..-1].rpartition("'''@")
108
- end
109
- terminal(:LANG_STRING_LITERAL_LONG2, LANG_STRING_LITERAL_LONG2, unescape: true) do |prod, token, input|
110
- input[:string], _, input[:language] = token.value[3..-1].rpartition('"""@')
111
- end
112
- terminal(:LANG_STRING_LITERAL1, LANG_STRING_LITERAL1, unescape: true) do |prod, token, input|
113
- input[:string], _, input[:language] = token.value[1..-1].rpartition("'@")
114
- end
115
- terminal(:LANG_STRING_LITERAL2, LANG_STRING_LITERAL2, unescape: true) do |prod, token, input|
116
- input[:string], _, input[:language] = token.value[1..-1].rpartition('"@')
117
- end
118
- terminal(:STRING_LITERAL_LONG1, STRING_LITERAL_LONG1, unescape: true) do |prod, token, input|
119
- input[:string] = token.value[3..-4]
120
- end
121
- terminal(:STRING_LITERAL_LONG2, STRING_LITERAL_LONG2, unescape: true) do |prod, token, input|
122
- input[:string] = token.value[3..-4]
123
- end
124
- terminal(:STRING_LITERAL1, STRING_LITERAL1, unescape: true) do |prod, token, input|
125
- input[:string] = token.value[1..-2]
126
- end
127
- terminal(:STRING_LITERAL2, STRING_LITERAL2, unescape: true) do |prod, token, input|
128
- input[:string] = token.value[1..-2]
129
- end
130
- terminal(:REGEXP, REGEXP) do |prod, token, input|
131
- input[:regexp] = token.value
132
- end
133
- terminal(:RDF_TYPE, RDF_TYPE) do |prod, token, input|
134
- input[:iri] = (a = RDF.type.dup; a.lexical = 'a'; a)
135
- end
136
-
137
- # String terminals
138
- terminal(nil, STR_EXPR, map: STR_MAP) do |prod, token, input|
139
- case token.value
140
- when '*' then input[:cardinality] = {min: 0, max: "*"}
141
- when '+' then input[:cardinality] = {min: 1, max: "*"}
142
- when '?' then input[:cardinality] = {min: 0, max: 1}
143
- when '!' then input[:not] = token.value
144
- when '^' then input[:inverse] = token.value
145
- when '.' then input[:dot] = token.value
146
- when 'true', 'false' then input[:literal] = RDF::Literal::Boolean.new(token.value)
147
- when '~' then input[:pattern] = token.value
148
- when 'BNODE', 'IRI',
149
- 'NONLITERAL' then input[:nonLiteralKind] = token.value.downcase.to_sym
150
- when 'CLOSED' then input[:closed] = token.value.downcase.to_sym
151
- when 'EXTERNAL' then input[:external] = token.value.downcase.to_sym
152
- when 'FRACTIONDIGITS',
153
- 'TOTALDIGITS' then input[:numericLength] = token.value.downcase.to_sym
154
- when 'LITERAL' then input[:shapeAtomLiteral] = token.value.downcase.to_sym
155
- when 'LENGTH',
156
- 'MINLENGTH',
157
- 'MAXLENGTH' then input[:stringLength] = token.value.downcase.to_sym
158
- when 'MININCLUSIVE',
159
- 'MINEXCLUSIVE',
160
- 'MAXINCLUSIVE',
161
- 'MAXEXCLUSIVE' then input[:numericRange] = token.value.downcase.to_sym
162
- when 'NOT' then input[:not] = token.value.downcase.to_sym
163
- when 'START' then input[:start] = token.value.downcase.to_sym
164
- else
165
- #raise "Unexpected MC terminal: #{token.inspect}"
166
- end
94
+ ns(prefix, nil)
95
+ end
96
+ terminal(:LANGTAG, LANGTAG) do |value|
97
+ value[1..-1]
98
+ end
99
+ terminal(:LANG_STRING_LITERAL_LONG1, LANG_STRING_LITERAL_LONG1, unescape: true) do |value|
100
+ s, _, l = value[3..-1].rpartition("'''@")
101
+ [s, language: l]
102
+ end
103
+ terminal(:LANG_STRING_LITERAL_LONG2, LANG_STRING_LITERAL_LONG2, unescape: true) do |value|
104
+ s, _, l = value[3..-1].rpartition('"""@')
105
+ [s, language: l]
106
+ end
107
+ terminal(:LANG_STRING_LITERAL1, LANG_STRING_LITERAL1, unescape: true) do |value|
108
+ s, _, l = value[1..-1].rpartition("'@")
109
+ [s, language: l]
110
+ end
111
+ terminal(:LANG_STRING_LITERAL2, LANG_STRING_LITERAL2, unescape: true) do |value|
112
+ s, _, l = value[1..-1].rpartition('"@')
113
+ [s, language: l]
114
+ end
115
+ terminal(:STRING_LITERAL_LONG1, STRING_LITERAL_LONG1, unescape: true) do |value|
116
+ value[3..-4]
117
+ end
118
+ terminal(:STRING_LITERAL_LONG2, STRING_LITERAL_LONG2, unescape: true) do |value|
119
+ value[3..-4]
120
+ end
121
+ terminal(:STRING_LITERAL1, STRING_LITERAL1, unescape: true) do |value|
122
+ value[1..-2]
123
+ end
124
+ terminal(:STRING_LITERAL2, STRING_LITERAL2, unescape: true) do |value|
125
+ value[1..-2]
126
+ end
127
+ terminal(:REGEXP, REGEXP)
128
+ terminal(:RDF_TYPE, RDF_TYPE) do |value|
129
+ (a = RDF.type.dup; a.lexical = 'a'; a)
167
130
  end
168
131
 
169
132
  # Productions
170
133
  # [1] shexDoc ::= directive* ((notStartAction | startActions) statement*)?
171
- production(:shexDoc) do |input, data, callback|
172
- data[:start] = data[:start] if data[:start]
173
-
134
+ start_production(:shexDoc, as_hash: true, clear_packrat: true)
135
+ production(:shexDoc) do |value|
174
136
  expressions = []
175
- expressions << [:base, data[:baseDecl]] if data[:baseDecl]
176
- expressions << [:prefix, data[:prefixDecl]] if data[:prefixDecl]
177
- expressions += Array(data[:codeDecl])
178
- expressions << Algebra::Start.new(data[:start]) if data[:start]
179
- expressions << data[:shapes].unshift(:shapes) if data[:shapes]
137
+ prefixes = []
138
+
139
+ # directive *
140
+ expressions += value[:_shexDoc_1]
141
+
142
+ # ((notStartAction | startActions) statement*)?
143
+ if value = value[:_shexDoc_2]
144
+ # These may start with codeDecl or start. otherwise, they are all shapes
145
+ expressions += Array(value[:_shexDoc_4])
146
+ expressions += Array(value[:_shexDoc_5])
147
+ end
180
148
 
181
- input[:schema] = Algebra::Schema.new(*expressions, options)
182
- self
149
+ # Extract declarations, startacts and start from expressions
150
+ declarations, expressions = expressions.partition {|op| op.is_a?(Array)}
151
+ prefixes, bases = declarations.partition {|op| op.first == :prefix}
152
+ semacts, expressions = expressions.partition {|op| op.is_a?(Algebra::SemAct)}
153
+ starts, expressions = expressions.partition {|op| op.is_a?(Algebra::Start)}
154
+
155
+ operands = []
156
+ operands += bases unless bases.empty?
157
+ unless prefixes.empty?
158
+ operands << [:prefix, prefixes.map {|p| p[1,2]}]
159
+ end
160
+ operands += semacts
161
+ operands += starts
162
+ operands << expressions.unshift(:shapes) unless expressions.empty?
163
+ Algebra::Schema.new(*operands, **options)
183
164
  end
165
+ start_production(:_shexDoc_2, as_hash: true)
166
+ start_production(:_shexDoc_3, as_hash: true)
184
167
 
185
168
  # [2] directive ::= baseDecl | prefixDecl
186
169
 
187
170
  # [3] baseDecl ::= "BASE" IRIREF
188
- production(:baseDecl) do |input, data, callback|
189
- input[:baseDecl] = self.base_uri = iri(data[:iri])
171
+ start_production(:baseDecl, as_hash: true, insensitive_strings: :lower)
172
+ production(:baseDecl) do |value|
173
+ self.base_uri = iri(value[:IRIREF])
174
+ [:base, self.base_uri]
190
175
  end
191
176
 
192
177
  # [4] prefixDecl ::= "PREFIX" PNAME_NS IRIREF
193
- production(:prefixDecl) do |input, data, callback|
194
- pfx = data[:prefix]
195
- self.prefix(pfx, data[:iri])
196
- (input[:prefixDecl] ||= []) << [pfx.to_s, data[:iri]]
178
+ start_production(:prefixDecl, as_hash: true, insensitive_strings: :lower)
179
+ production(:prefixDecl) do |value|
180
+ pfx = value[:PNAME_NS]
181
+ prefix(pfx, value[:IRIREF])
182
+ [:prefix, pfx.to_s, value[:IRIREF]]
197
183
  end
198
184
 
199
185
  # [5] notStartAction ::= start | shapeExprDecl
200
- # [6] start ::= "start" '=' shapeExpression
201
- production(:start) do |input, data, callback|
202
- input[:start] = Array(data[:shapeExpression]).first || data[:shape]
186
+ # [6] start ::= "START" '=' "NOT"? (shapeAtomNoRef | shapeRef) shapeOr?
187
+ start_production(:start, as_hash: true, insensitive_strings: :lower)
188
+ production(:start) do |value|
189
+ expr = value[:_start_2]
190
+ expr = value[:_start_3].call(expr) if value[:_start_3]
191
+ expr = Algebra::Not.new(expr) if value[:__start_1]
192
+ Algebra::Start.new(expr)
203
193
  end
204
194
  # [7] startActions ::= codeDecl+
205
195
 
206
196
  # [8] statement ::= directive | notStartAction
207
197
 
208
- # [9] shapeExprDecl ::= shapeLabel (shapeExpression|"EXTERNAL")
209
- production(:shapeExprDecl) do |input, data, callback|
210
- id = Array(data[:shapeLabel]).first
211
- expression = case Array(data[:shapeExpression]).first
198
+ # [9] shapeExprDecl ::= shapeExprLabel (shapeExpression | "EXTERNAL")
199
+ start_production(:shapeExprDecl, as_hash: true)
200
+ production(:shapeExprDecl) do |value|
201
+ id = value[:shapeExprLabel]
202
+ expression = case value[:_shapeExprDecl_1]
212
203
  when Algebra::NodeConstraint, Algebra::Or, Algebra::And, Algebra::Not, Algebra::Shape, RDF::Resource
213
- Array(data[:shapeExpression]).first
204
+ value[:_shapeExprDecl_1]
205
+ when /external/i
206
+ Algebra::External.new()
214
207
  else
215
- data[:external] ? Algebra::External.new() : Algebra::Shape.new()
208
+ Algebra::Shape.new()
216
209
  end
217
210
  expression.id = id if id && !expression.is_a?(RDF::Resource)
218
211
 
219
- (input[:shapes] ||= []) << expression
212
+ expression
220
213
  end
221
214
 
222
- # [10] shapeExpression ::= shapeAtomNoRef shapeOr?
223
- # | "NOT" (shapeAtomNoRef | shapeRef) shapeOr?
215
+ # [10] shapeExpression ::= "NOT"? shapeAtomNoRef shapeOr?
216
+ # | "NOT" shapeRef shapeOr?
224
217
  # | shapeRef shapeOr
225
- production(:shapeExpression) do |input, data, callback|
226
- expression = Array(data[:shapeExpression]).first || data[:shape]
227
- expression = Algebra::Not.new(expression) if data[:not]
228
- (input[:shapeExpression] ||= []) << expression
218
+ start_production(:_shapeExpression_1, as_hash: true, insensitive_strings: :lower)
219
+ production(:_shapeExpression_1) do |value|
220
+ # "NOT"? shapeAtomNoRef shapeOr?
221
+ expr = value[:shapeAtomNoRef]
222
+ expr = Algebra::Not.new(expr) if value[:_shapeExpression_4]
223
+ expr = value[:_shapeExpression_5].call(expr) if value[:_shapeExpression_5]
224
+ expr
225
+ end
226
+ start_production(:_shapeExpression_2, as_hash: true, insensitive_strings: :lower)
227
+ production(:_shapeExpression_2) do |value|
228
+ # "NOT" shapeRef shapeOr?
229
+ expr = Algebra::Not.new(value[:shapeRef])
230
+ expr = value[:_shapeExpression_6].call(expr) if value[:_shapeExpression_6]
231
+ expr
232
+ end
233
+ start_production(:_shapeExpression_3, as_hash: true)
234
+ production(:_shapeExpression_3) do |value|
235
+ # shapeRef shapeOr
236
+ value[:shapeOr].call(value[:shapeRef])
229
237
  end
230
238
 
231
239
  # [11] inlineShapeExpression ::= inlineShapeOr
232
- # [12] shapeOr ::= shapeOrA | shapeOrB shapeOrA?
233
- # [12a] shapeOrA ::= ("OR" shapeAnd)+
234
- start_production(:shapeOrA) do |input, data, callback|
235
- data[:shapeExpression] = input.delete(:shapeExpression)
236
- data[:shapeExpression] ||= Array(input.delete(:shape)) if input[:shape]
237
- data[:shapeExpression] = [Algebra::Not.new(*data[:shapeExpression])] if input.delete(:not)
238
- end
239
- production(:shapeOrA) do |input, data, callback|
240
- shape_or(input, data)
241
- end
242
- # [12b] shapeOrB ::= ("AND" shapeNot)+
243
- start_production(:shapeOrB) do |input, data, callback|
244
- data[:shapeExpression] = input.delete(:shapeExpression)
245
- data[:shapeExpression] ||= Array(input.delete(:shape)) if input[:shape]
246
- data[:shapeExpression] = [Algebra::Not.new(*data[:shapeExpression])] if input.delete(:not)
247
- end
248
- production(:shapeOrB) do |input, data, callback|
249
- shape_and(input, data)
240
+ production(:inlineShapeExpression) do |value|
241
+ value.first[:inlineShapeOr]
242
+ end
243
+ # [12] shapeOr ::= ("OR" shapeAnd)+
244
+ # | ("AND" shapeNot)+ ("OR" shapeAnd)*
245
+ # As shapeOr has an implicit first parameter from the invoking production's first element, the result is a block which will accept the value of that production and apply it to any RHS expression found here.
246
+ start_production(:_shapeOr_1, insensitive_strings: :lower)
247
+ production(:_shapeOr_1) do |value|
248
+ # ("OR" shapeAnd)+
249
+ -> (lhs) {Algebra::Or.new(lhs, *value.map {|v| v.last[:shapeAnd]})}
250
+ end
251
+ start_production(:_shapeOr_2, as_hash: true, insensitive_strings: :lower)
252
+ production(:_shapeOr_2) do |value|
253
+ # ("AND" shapeNot)+ ("OR" shapeAnd)*
254
+ ands = value[:_shapeOr_4].map {|v| v.last[:shapeNot]}
255
+ ors = value[:_shapeOr_5].map {|v| v.last[:shapeAnd]}
256
+ if ors.empty?
257
+ -> (lhs) {Algebra::And.new(lhs, *ands)}
258
+ else
259
+ -> (lhs) {ShapeOr(Algebra::And.new(lhs, ands), *ors)}
260
+ end
250
261
  end
251
262
 
252
263
  # [13] inlineShapeOr ::= inlineShapeAnd ("OR" inlineShapeAnd)*
253
- production(:inlineShapeOr) do |input, data, callback|
254
- shape_or(input, data)
255
- end
256
- def shape_or(input, data)
257
- input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
258
- expression = if Array(data[:shapeExpression]).length > 1
259
- Algebra::Or.new(*data[:shapeExpression], {})
264
+ start_production(:inlineShapeOr, as_hash: true)
265
+ production(:inlineShapeOr) do |value|
266
+ if value[:_inlineShapeOr_1].empty?
267
+ value[:inlineShapeAnd]
260
268
  else
261
- Array(data[:shapeExpression]).first
269
+ lhs = value[:_inlineShapeOr_1].map {|v| v.last[:inlineShapeAnd]}
270
+ Algebra::Or.new(value[:inlineShapeAnd], *lhs)
262
271
  end
263
- (input[:shapeExpression] ||= []) << expression if expression
264
- rescue ArgumentError => e
265
- error(nil, "Argument Error on OR: #{e.message}")
266
272
  end
267
- private :shape_or
273
+ start_production(:_inlineShapeOr_2, insensitive_strings: :lower)
268
274
 
269
275
  # [14] shapeAnd ::= shapeNot ("AND" shapeNot)*
270
- production(:shapeAnd) do |input, data, callback|
271
- shape_and(input, data)
272
- end
273
- # [15] inlineShapeAnd ::= inlineShapeNot ("AND" inlineShapeNot)*
274
- production(:inlineShapeAnd) do |input, data, callback|
275
- shape_and(input, data)
276
- end
277
- def shape_and(input, data)
278
- input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
279
- expressions = Array(data[:shapeExpression]).inject([]) do |memo, expr|
280
- #memo.concat(expr.is_a?(Algebra::And) ? expr.operands : [expr])
281
- memo.concat([expr])
276
+ start_production(:shapeAnd, as_hash: true)
277
+ production(:shapeAnd) do |value|
278
+ if value[:_shapeAnd_1].empty?
279
+ value[:shapeNot]
280
+ else
281
+ lhs = value[:_shapeAnd_1].map {|v| v.last[:shapeNot]}
282
+ Algebra::And.new(value[:shapeNot], *lhs)
282
283
  end
284
+ end
285
+ start_production(:_shapeAnd_2, insensitive_strings: :lower)
283
286
 
284
- expression = if expressions.length > 1
285
- Algebra::And.new(*expressions, {})
287
+ # [15] inlineShapeAnd ::= inlineShapeNot ("AND" inlineShapeNot)*
288
+ start_production(:inlineShapeAnd, as_hash: true)
289
+ production(:inlineShapeAnd) do |value|
290
+ if value[:_inlineShapeAnd_1].empty?
291
+ value[:inlineShapeNot]
286
292
  else
287
- expressions.first
293
+ lhs = value[:_inlineShapeAnd_1].map {|v| v.last[:inlineShapeNot]}
294
+ Algebra::And.new(value[:inlineShapeNot], *lhs)
288
295
  end
289
- (input[:shapeExpression] ||= []) << expression if expression
290
- rescue ArgumentError => e
291
- error(nil, "Argument Error on AND: #{e.message}")
292
296
  end
293
- private :shape_and
297
+ start_production(:_inlineShapeAnd_2, insensitive_strings: :lower)
294
298
 
295
299
  # [16] shapeNot ::= "NOT"? shapeAtom
296
- production(:shapeNot) do |input, data, callback|
297
- shape_not(input, data)
300
+ start_production(:shapeNot, as_hash: true)
301
+ production(:shapeNot) do |value|
302
+ atom = value[:shapeAtom]
303
+ value[:_shapeNot_1] ? Algebra::Not.new(atom) : atom
298
304
  end
305
+ start_production(:_shapeNot_1, insensitive_strings: :lower)
306
+
299
307
  # [17] inlineShapeNot ::= "NOT"? inlineShapeAtom
300
- production(:inlineShapeNot) do |input, data, callback|
301
- shape_not(input, data)
302
- end
303
- def shape_not(input, data)
304
- input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
305
- expression = Array(data[:shapeExpression]).first
306
- expression = Algebra::Not.new(expression) if data[:not]
307
- #error(nil, "Expected an atom for NOT") unless expression
308
- (input[:shapeExpression] ||= []) << expression if expression
308
+ start_production(:inlineShapeNot, as_hash: true)
309
+ production(:inlineShapeNot) do |value|
310
+ atom = value[:inlineShapeAtom]
311
+ value[:_inlineShapeNot] ? Algebra::Not.new(atom) : atom
309
312
  end
310
- private :shape_not
313
+ start_production(:_inlineShapeNot_1, insensitive_strings: :lower)
311
314
 
312
- # [18] shapeAtom ::= nodeConstraint shapeOrRef?
313
- # | shapeOrRef
315
+ # [18] shapeAtom ::= nonLitNodeConstraint shapeOrRef?
316
+ # | litNodeConstraint
317
+ # | shapeOrRef nonLitNodeConstraint?
314
318
  # | "(" shapeExpression ")"
315
319
  # | '.' # no constraint
316
- production(:shapeAtom) do |input, data, callback|
317
- shape_atom(input, data)
320
+ production(:shapeAtom) do |value|
321
+ expressions = case
322
+ when value.is_a?(Algebra::Operator)
323
+ [value]
324
+ when value == '.' then []
325
+ when value[:nonLitNodeConstraint]
326
+ [value[:nonLitNodeConstraint], value[:_shapeAtom_4]].compact
327
+ when value[:shapeOrRef]
328
+ [value[:shapeOrRef], value[:_shapeAtom_5]].compact
329
+ when value[:_shapeAtom_3]
330
+ value[:_shapeAtom_3]
331
+ else []
332
+ end
333
+
334
+ case expressions.length
335
+ when 0 then nil
336
+ when 1 then expressions.first
337
+ else Algebra::And.new(*expressions)
338
+ end
318
339
  end
340
+ start_production(:_shapeAtom_1, as_hash: true)
341
+ start_production(:_shapeAtom_2, as_hash: true)
342
+ production(:_shapeAtom_3) do |value|
343
+ value[1][:shapeExpression]
344
+ end
345
+
346
+ # [19] shapeAtomNoRef ::= nonLitNodeConstraint shapeOrRef?
347
+ # | litNodeConstraint
348
+ # | shapeDefinition nonLitNodeConstraint?
349
+ # | "(" shapeExpression ")"
350
+ # | '.' # no constraint
351
+ production(:shapeAtomNoRef) do |value|
352
+ expressions = case
353
+ when value.is_a?(Algebra::Operator)
354
+ [value]
355
+ when value == '.' then []
356
+ when value[:nonLitNodeConstraint]
357
+ [value[:nonLitNodeConstraint], value[:_shapeAtomNoRef_4]].compact
358
+ when value[:shapeDefinition]
359
+ [value[:shapeDefinition], value[:_shapeAtomNoRef_5]].compact
360
+ when value[:_shapeAtomNoRef_3]
361
+ value[:_shapeAtomNoRef_3]
362
+ else []
363
+ end
319
364
 
320
- # [19] shapeAtomNoRef ::= nodeConstraint shapeOrRef?
321
- # | shapeDefinition
322
- # | "(" shapeExpression ")"
323
- # | '.' # no constraint
324
- production(:shapeAtomNoRef) do |input, data, callback|
325
- shape_atom(input, data)
365
+ case expressions.length
366
+ when 0 then nil
367
+ when 1 then expressions.first
368
+ else Algebra::And.new(*expressions)
369
+ end
370
+ end
371
+ start_production(:_shapeAtomNoRef_1, as_hash: true)
372
+ start_production(:_shapeAtomNoRef_2, as_hash: true)
373
+ production(:_shapeAtomNoRef_3) do |value|
374
+ value[1][:shapeExpression]
326
375
  end
327
376
 
328
- # [20] inlineShapeAtom ::= nodeConstraint inlineShapeOrRef?
329
- # | inlineShapeOrRef nodeConstraint?
377
+ # [20] inlineShapeAtom ::= nonLitNodeConstraint inlineShapeOrRef?
378
+ # | litNodeConstraint
379
+ # | inlineShapeOrRef nonLitNodeConstraint?
330
380
  # | "(" shapeExpression ")"
331
381
  # | '.' # no constraint
332
- production(:inlineShapeAtom) do |input, data, callback|
333
- shape_atom(input, data)
334
- end
335
- def shape_atom(input, data)
336
- constraint = data[:nodeConstraint]
337
- shape = data[:shapeOrRef] || Array(data[:shapeExpression]).first || data[:shape]
338
- input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
382
+ production(:inlineShapeAtom) do |value|
383
+ expressions = case
384
+ when value == '.' then []
385
+ when value.is_a?(Algebra::Operator)
386
+ [value]
387
+ when value[:nonLitNodeConstraint]
388
+ [value[:nonLitNodeConstraint], value[:_inlineShapeAtom_4]].compact
389
+ when value[:inlineShapeOrRef]
390
+ [value[:inlineShapeOrRef], value[:__inlineShapeAtom_5]].compact
391
+ when value[:_inlineShapeAtom_3]
392
+ value[:_inlineShapeAtom_3]
393
+ else []
394
+ end
339
395
 
340
- expression = [constraint, shape].compact
341
- expression = case expression.length
396
+ case expressions.length
342
397
  when 0 then nil
343
- when 1 then expression.first
344
- else Algebra::And.new(*expression, {})
398
+ when 1 then expressions.first
399
+ else Algebra::And.new(*expressions)
345
400
  end
346
-
347
- (input[:shapeExpression] ||= []) << expression if expression
348
401
  end
349
- private :shape_atom
402
+ start_production(:_inlineShapeAtom_1, as_hash: true)
403
+ start_production(:_inlineShapeAtom_2, as_hash: true)
404
+ production(:_inlineShapeAtom_3) do |value|
405
+ value[1][:shapeExpression]
406
+ end
350
407
 
351
408
  # [21] shapeOrRef ::= shapeDefinition | shapeRef
352
- production(:shapeOrRef) do |input, data, callback|
353
- shape_or_ref(input, data)
354
- end
355
409
  # [22] inlineShapeOrRef ::= inlineShapeDefinition | shapeRef
356
- production(:inlineShapeOrRef) do |input, data, callback|
357
- shape_or_ref(input, data)
358
- end
359
- def shape_or_ref(input, data)
360
- input.merge!(data.dup.keep_if {|k, v| [:closed, :extraPropertySet, :codeDecl].include?(k)})
361
- input[:shapeOrRef] = data[:shape] if data[:shape]
362
- rescue ArgumentError => e
363
- error(nil, "Argument Error on ShapeOrRef: #{e.message}")
364
- end
365
- private :shape_or_ref
366
410
 
367
- # [23] shapeRef ::= ATPNAME_LN | ATPNAME_NS | '@' shapeLabel
368
- production(:shapeRef) do |input, data, callback|
369
- input[:shape] = Array(data[:shapeLabel]).first
411
+ # [23] shapeRef ::= ATPNAME_LN | ATPNAME_NS | '@' shapeExprLabel
412
+ production(:shapeRef) do |value|
413
+ value.is_a?(Array) ? value.last[:shapeExprLabel] : value
370
414
  end
371
415
 
372
416
  # [24] litNodeConstraint ::= "LITERAL" xsFacet*
417
+ # | nonLiteralKind stringFacet*
373
418
  # | datatype xsFacet*
374
419
  # | valueSet xsFacet*
375
420
  # | numericFacet+
376
- production(:litNodeConstraint) do |input, data, callback|
377
- # Semantic validate (A Syntax error)
378
- case
379
- when data[:datatype] && data[:numericFacet]
380
- # Can only use a numeric facet on a numeric datatype
381
- l = RDF::Literal.new("1", datatype: data[:datatype])
382
- error(nil, "Numeric facet used on non-numeric datatype: #{data[:datatype]}", production: :nodeConstraint) unless l.is_a?(RDF::Literal::Numeric)
383
- end
384
-
385
- attrs = []
386
- attrs << [:datatype, data[:datatype]] if data [:datatype]
387
- attrs += Array(data[:shapeAtomLiteral])
388
- attrs += Array(data[:valueSetValue])
389
- attrs += Array(data[:numericFacet])
390
- attrs += Array(data[:stringFacet])
391
-
392
- input[:nodeConstraint] = Algebra::NodeConstraint.new(*attrs.compact, {})
421
+ start_production(:_litNodeConstraint_1, as_hash: true, insensitive_strings: :lower)
422
+ production(:_litNodeConstraint_1) do |value|
423
+ facets = value[:_litNodeConstraint_6]
424
+ validate_facets(facets, :litNodeConstraint)
425
+ Algebra::NodeConstraint.new(:literal, *facets)
426
+ end
427
+ start_production(:_litNodeConstraint_2, as_hash: true)
428
+ production(:_litNodeConstraint_2) do |value|
429
+ facets = Array(value[:_litNodeConstraint_7])
430
+ validate_facets(facets, :litNodeConstraint)
431
+ attrs = Array(value[:nonLiteralKind]) + facets
432
+ Algebra::NodeConstraint.new(*attrs.compact)
433
+ end
434
+ start_production(:_litNodeConstraint_3, as_hash: true)
435
+ production(:_litNodeConstraint_3) do |value|
436
+ facets = value[:_litNodeConstraint_8]
437
+ validate_facets(facets, :litNodeConstraint)
438
+ attrs = [[:datatype, value[:datatype]]] + facets
439
+ Algebra::NodeConstraint.new(*attrs.compact)
440
+ end
441
+ start_production(:_litNodeConstraint_4, as_hash: true)
442
+ production(:_litNodeConstraint_4) do |value|
443
+ facets = value[:_litNodeConstraint_9]
444
+ validate_facets(facets, :litNodeConstraint)
445
+ attrs = value[:valueSet]+ facets
446
+ Algebra::NodeConstraint.new(*attrs.compact)
447
+ end
448
+ production(:_litNodeConstraint_5) do |value|
449
+ validate_facets(value, :litNodeConstraint)
450
+ Algebra::NodeConstraint.new(*value)
393
451
  end
394
452
 
395
453
  # [25] nonLitNodeConstraint ::= nonLiteralKind stringFacet*
396
454
  # | stringFacet+
397
- production(:nonLitNodeConstraint) do |input, data, callback|
398
- # Semantic validate (A Syntax error)
399
-
400
- attrs = []
401
- attrs += Array(data[:nonLiteralKind])
402
- attrs += Array(data[:stringFacet])
403
-
404
- input[:nodeConstraint] = Algebra::NodeConstraint.new(*attrs.compact, {})
455
+ start_production(:_nonLitNodeConstraint_1, as_hash: true)
456
+ production(:_nonLitNodeConstraint_1) do |value|
457
+ # nonLiteralKind stringFacet*
458
+ facets = Array(value[:_nonLitNodeConstraint_3])
459
+ validate_facets(facets, :nonLitNodeConstraint)
460
+ attrs = Array(value[:nonLiteralKind]) + facets
461
+ Algebra::NodeConstraint.new(*attrs.compact)
462
+ end
463
+ production(:_nonLitNodeConstraint_2) do |value|
464
+ # stringFacet+
465
+ validate_facets(value, :nonLitNodeConstraint)
466
+ Algebra::NodeConstraint.new(*value)
467
+ end
468
+
469
+ def validate_facets(facets, prod)
470
+ facets.each do |facet|
471
+ if facets.count {|f| f.first == facet.first} > 1
472
+ error(nil, "#{facet.first} constraint may only be used once in a Node Constraint", production: prod)
473
+ end
474
+ end
405
475
  end
476
+ private :validate_facets
406
477
 
407
478
  # [26] nonLiteralKind ::= "IRI" | "BNODE" | "NONLITERAL"
479
+ start_production(:nonLiteralKind, insensitive_strings: :lower)
480
+ production(:nonLiteralKind) do |value|
481
+ value.downcase.to_sym
482
+ end
408
483
 
409
484
  # [27] xsFacet ::= stringFacet | numericFacet
410
485
  # [28] stringFacet ::= stringLength INTEGER
411
486
  # | REGEXP
412
- production(:stringFacet) do |input, data, callback|
413
- input[:stringFacet] ||= []
414
- input[:stringFacet] << if data[:stringLength]
415
- if input[:stringFacet].flatten.include?(data[:stringLength])
416
- error(nil, "#{data[:stringLength]} constraint may only be used once in a Node Constraint", production: :stringFacet)
417
- end
418
- [data[:stringLength], data[:literal]]
419
- elsif re = data[:regexp]
420
- unless re =~ %r(^/(.*)/([smix]*)$)
421
- error(nil, "#{re.inspect} regular expression must be in the form /pattern/flags?", production: :stringFacet)
487
+ production(:stringFacet) do |value|
488
+ if value.is_a?(Array) # stringLength
489
+ value
490
+ else
491
+ unless value =~ %r(^/(.*)/([smix]*)$)
492
+ error(nil, "#{value.inspect} regular expression must be in the form /pattern/flags?", production: :stringFacet)
422
493
  end
494
+
423
495
  flags = $2 unless $2.to_s.empty?
424
496
  pattern = $1.gsub('\\/', '/').gsub(UCHAR) do
425
497
  [($1 || $2).hex].pack('U*')
426
498
  end.force_encoding(Encoding::UTF_8)
427
499
 
428
500
  # Any other escaped character is a syntax error
429
- if pattern.match(%r([^\\]\\[^nrt/\\|\.?*+\[\]\(\){}$#x2D#x5B#x5D#x5E-]))
501
+ if pattern.match?(%r([^\\]\\[^nrt/\\|\.?*+\[\]\(\){}$#x2D#x5B#x5D#x5E-]))
430
502
  error(nil, "Regexp contains illegal escape: #{pattern.inspect}", production: :stringFacet)
431
503
  end
432
504
 
433
505
  [:pattern, pattern, flags].compact
434
506
  end
435
507
  end
508
+ start_production(:_stringFacet_1, as_hash: true)
509
+ production(:_stringFacet_1) do |value|
510
+ [value[:stringLength].downcase.to_sym, value[:INTEGER]]
511
+ end
436
512
 
437
513
  # [29] stringLength ::= "LENGTH" | "MINLENGTH" | "MAXLENGTH"
514
+ start_production(:stringLength, insensitive_strings: :lower)
438
515
 
439
- # [30] numericFacet ::= numericRange (numericLiteral | string '^^' datatype )
516
+ # [30] numericFacet ::= numericRange numericLiteral
440
517
  # | numericLength INTEGER
441
- production(:numericFacet) do |input, data, callback|
442
- input[:numericFacet] ||= []
443
- input[:numericFacet] << if data[:numericRange]
444
- literal = data[:literal] || literal(data[:string], datatype: data[:datatype])
445
- error(nil, "numericRange must use a numeric datatype: #{data[:datatype]}", production: :numericFacet) unless literal.is_a?(RDF::Literal::Numeric)
446
- [data[:numericRange], literal]
447
- elsif data[:numericLength]
448
- [data[:numericLength], data[:literal]]
449
- end
518
+ start_production(:_numericFacet_1, as_hash: true)
519
+ production(:_numericFacet_1) do |value|
520
+ [value[:numericRange].downcase.to_sym, value[:numericLiteral]]
521
+ end
522
+ start_production(:_numericFacet_2, as_hash: true)
523
+ production(:_numericFacet_2) do |value|
524
+ [value[:numericLength].downcase.to_sym, value[:INTEGER]]
450
525
  end
451
526
 
452
527
  # [31] numericRange ::= "MININCLUSIVE" | "MINEXCLUSIVE" | "MAXINCLUSIVE" | "MAXEXCLUSIVE"
528
+ start_production(:numericRange, insensitive_strings: :lower)
529
+
453
530
  # [32] numericLength ::= "TOTALDIGITS" | "FRACTIONDIGITS"
531
+ start_production(:numericLength, insensitive_strings: :lower)
454
532
 
455
533
  # [33] shapeDefinition ::= (includeSet | extraPropertySet | "CLOSED")* '{' tripleExpression? '}' annotation* semanticActions
456
- production(:shapeDefinition) do |input, data, callback|
457
- shape_definition(input, data)
534
+ start_production(:shapeDefinition, as_hash: true)
535
+ production(:shapeDefinition) do |value|
536
+ shape_definition(
537
+ value[:_shapeDefinition_1],
538
+ value[:_shapeDefinition_2],
539
+ value[:_shapeDefinition_3],
540
+ value[:semanticActions])
458
541
  end
542
+ start_production(:_shapeDefinition_4, insensitive_strings: :lower)
543
+
459
544
  # [34] inlineShapeDefinition ::= (includeSet | extraPropertySet | "CLOSED")* '{' tripleExpression? '}'
460
- production(:inlineShapeDefinition) do |input, data, callback|
461
- shape_definition(input, data)
462
- end
463
- def shape_definition(input, data)
464
- # FIXME: includeSet
465
- expression = data[:tripleExpression]
466
- attrs = Array(data[:extraPropertySet])
467
- attrs << :closed if data[:closed]
545
+ start_production(:inlineShapeDefinition, as_hash: true)
546
+ production(:inlineShapeDefinition) do |value|
547
+ shape_definition(
548
+ value[:_inlineShapeDefinition_1],
549
+ value[:_inlineShapeDefinition_2])
550
+ end
551
+ def shape_definition(extra_closed, expression, annotations = [], semact = [])
552
+ closed = extra_closed.any? {|v| v.to_s.downcase == 'closed'}
553
+ extra = extra_closed.reject {|v| v.to_s.downcase == 'closed'}
554
+ attrs = extra
555
+ attrs << :closed if closed
468
556
  attrs << expression if expression
469
- attrs += Array(data[:annotation])
470
- attrs += Array(data[:codeDecl])
557
+ attrs += annotations
558
+ attrs += semact
471
559
 
472
- input[:shape] = Algebra::Shape.new(*attrs, {})
560
+ Algebra::Shape.new(*attrs)
473
561
  end
474
562
  private :shape_definition
475
563
 
476
564
  # [35] extraPropertySet ::= "EXTRA" predicate+
477
- production(:extraPropertySet) do |input, data, callback|
478
- (input[:extraPropertySet] ||= []) << data[:predicate].unshift(:extra)
565
+ start_production(:extraPropertySet, insensitive_strings: :lower)
566
+ production(:extraPropertySet) do |value|
567
+ value.last[:_extraPropertySet_1].unshift(:extra)
479
568
  end
480
569
 
481
570
  # [36] tripleExpression ::= oneOfTripleExpr
482
- # [37] oneOfTripleExpr ::= groupTripleExpr ('|' groupTripleExpr)*
483
- production(:oneOfTripleExpr) do |input, data, callback|
484
- expression = if Array(data[:tripleExpression]).length > 1
485
- Algebra::OneOf.new(*data[:tripleExpression], {})
486
- else
487
- Array(data[:tripleExpression]).first
488
- end
489
- input[:tripleExpression] = expression if expression
571
+ production(:tripleExpression) do |value|
572
+ value.first[:oneOfTripleExpr]
490
573
  end
491
574
 
492
- # [40] groupTripleExpr ::= unaryTripleExpr (';' unaryTripleExpr?)*
493
- production(:groupTripleExpr) do |input, data, callback|
494
- expression = if Array(data[:tripleExpression]).length > 1
495
- Algebra::EachOf.new(*data[:tripleExpression], {})
496
- else
497
- Array(data[:tripleExpression]).first
498
- end
499
- (input[:tripleExpression] ||= []) << expression if expression
575
+ # [37] oneOfTripleExpr ::= groupTripleExpr ('|' groupTripleExpr)*
576
+ start_production(:oneOfTripleExpr, as_hash: true)
577
+ production(:oneOfTripleExpr) do |value|
578
+ expressions = [value[:groupTripleExpr]] + value[:_oneOfTripleExpr_1]
579
+ expressions.length == 1 ? expressions.first : Algebra::OneOf.new(*expressions)
580
+ end
581
+ production(:_oneOfTripleExpr_2) do |value|
582
+ value.last[:groupTripleExpr]
583
+ end
584
+
585
+ # [40] groupTripleExpr ::= unaryTripleExpr (';' unaryTripleExpr?)*
586
+ start_production(:groupTripleExpr, as_hash: true)
587
+ production(:groupTripleExpr) do |value|
588
+ expressions = [value[:unaryTripleExpr]] + value[:_groupTripleExpr_1]
589
+ expressions.length == 1 ? expressions.first : Algebra::EachOf.new(*expressions)
590
+ end
591
+ production(:_groupTripleExpr_2) do |value|
592
+ value.last[:_groupTripleExpr_3]
500
593
  end
501
594
 
502
595
  # [43] unaryTripleExpr ::= productionLabel? (tripleConstraint | bracketedTripleExpr) | include
503
- production(:unaryTripleExpr) do |input, data, callback|
504
- expression = data[:tripleExpression]
505
- expression.id = data[:productionLabel] if expression && data[:productionLabel]
596
+ start_production(:_unaryTripleExpr_1, as_hash: true)
597
+ production(:_unaryTripleExpr_1) do |value|
598
+ expression = value[:_unaryTripleExpr_3]
599
+ expression.id = value[:_unaryTripleExpr_2] if expression && value[:_unaryTripleExpr_2]
506
600
 
507
- (input[:tripleExpression] ||= []) << expression if expression
601
+ expression
508
602
  end
509
603
 
510
604
  # [43a] productionLabel ::= '$' (iri | blankNode)
511
- production(:productionLabel) do |input, data, callback|
512
- input[:productionLabel] = data[:iri] || data[:blankNode]
605
+ production(:productionLabel) do |value|
606
+ value.last[:_productionLabel_1]
513
607
  end
514
608
 
515
609
  # [44] bracketedTripleExpr ::= '(' oneOfTripleExpr ')' cardinality? annotation* semanticActions
516
- production(:bracketedTripleExpr) do |input, data, callback|
610
+ start_production(:bracketedTripleExpr, as_hash: true)
611
+ production(:bracketedTripleExpr) do |value|
517
612
  # XXX cardinality? annotation* semanticActions
518
- case expression = data[:tripleExpression]
613
+ case expression = value[:oneOfTripleExpr]
519
614
  when Algebra::OneOf, Algebra::EachOf
520
615
  else
521
616
  error(nil, "Bracketed Expression requires multiple contained expressions", production: :bracketedTripleExpr)
522
617
  end
523
- cardinality = data.fetch(:cardinality, {})
618
+ cardinality = value[:_bracketedTripleExpr_1] || {}
524
619
  attrs = [
525
620
  ([:min, cardinality[:min]] if cardinality[:min]),
526
621
  ([:max, cardinality[:max]] if cardinality[:max])
527
622
  ].compact
528
- attrs += Array(data[:codeDecl])
529
- attrs += Array(data[:annotation])
623
+ attrs += value[:semanticActions]
624
+ attrs += Array(value[:_bracketedTripleExpr_2])
530
625
 
531
626
  expression.operands.concat(attrs)
532
- input[:tripleExpression] = expression
627
+ expression
533
628
  end
534
629
 
535
- # [45] tripleConstraint ::= senseFlags? predicate shapeExpression cardinality? annotation* semanticActions
536
- production(:tripleConstraint) do |input, data, callback|
537
- cardinality = data.fetch(:cardinality, {})
630
+ # [45] tripleConstraint ::= senseFlags? predicate inlineShapeExpression cardinality? annotation* semanticActions
631
+ start_production(:tripleConstraint, as_hash: true)
632
+ production(:tripleConstraint) do |value|
633
+ cardinality = value[:_tripleConstraint_2] || {}
538
634
  attrs = [
539
- (:inverse if data[:inverse] || data[:not]),
540
- [:predicate, Array(data[:predicate]).first],
541
- Array(data[:shapeExpression]).first,
635
+ (:inverse if value[:_tripleConstraint_1]),
636
+ [:predicate, value[:predicate]],
637
+ value[:inlineShapeExpression],
542
638
  ([:min, cardinality[:min]] if cardinality[:min]),
543
639
  ([:max, cardinality[:max]] if cardinality[:max])
544
640
  ].compact
545
- attrs += Array(data[:codeDecl])
546
- attrs += Array(data[:annotation])
641
+ attrs += value[:_tripleConstraint_3]
642
+ attrs += value[:semanticActions]
547
643
 
548
- input[:tripleExpression] = Algebra::TripleConstraint.new(*attrs, {}) unless attrs.empty?
644
+ Algebra::TripleConstraint.new(*attrs) # unless attrs.empty?
549
645
  end
550
646
 
551
647
  # [46] cardinality ::= '*' | '+' | '?' | REPEAT_RANGE
648
+ production(:cardinality) do |value|
649
+ case value
650
+ when '*' then {min: 0, max: "*"}
651
+ when '+' then {min: 1, max: "*"}
652
+ when '?' then {min: 0, max: 1}
653
+ else value
654
+ end
655
+ end
656
+
552
657
  # [47] senseFlags ::= '^'
553
658
  # [48] valueSet ::= '[' valueSetValue* ']'
659
+ production(:valueSet) do |value|
660
+ value[1][:_valueSet_1]
661
+ end
554
662
 
555
663
  # [49] valueSetValue ::= iriRange | literalRange | languageRange | '.' exclusion+
556
- production(:valueSetValue) do |input, data, callback|
557
- range = data[:iriRange] || data[:literalRange] || data[:languageRange]
558
- if !range
559
- # All exclusions must be consistent IRI/Literal/Language
560
- case data[:exclusion].first
561
- when Algebra::IriStem, RDF::URI
562
- unless data[:exclusion].all? {|e| e.is_a?(Algebra::IriStem) || e.is_a?(RDF::URI)}
563
- error(nil, "Exclusions must all be IRI type")
564
- end
565
- range = Algebra::IriStemRange.new(:wildcard, data[:exclusion].unshift(:exclusions))
566
- when Algebra::LiteralStem, RDF::Literal
567
- unless data[:exclusion].all? {|e| e.is_a?(Algebra::LiteralStem) || e.is_a?(RDF::Literal)}
568
- error(nil, "Exclusions must all be Literal type")
569
- end
570
- range = Algebra::LiteralStemRange.new(:wildcard, data[:exclusion].unshift(:exclusions))
571
- else
572
- unless data[:exclusion].all? {|e| e.is_a?(Algebra::LanguageStem) || e.is_a?(String)}
573
- error(nil, "Exclusions must all be Language type")
574
- end
575
- range = Algebra::LanguageStemRange.new(:wildcard, data[:exclusion].unshift(:exclusions))
664
+ production(:valueSetValue) do |value|
665
+ Algebra::Value.new(value)
666
+ end
667
+ production(:_valueSetValue_1) do |value|
668
+ # All exclusions must be consistent IRI/Literal/Language
669
+ value = value.last[:_valueSetValue_2]
670
+ case value.first
671
+ when Algebra::IriStem, RDF::URI
672
+ unless value.all? {|e| e.is_a?(Algebra::IriStem) || e.is_a?(RDF::URI)}
673
+ error(nil, "Exclusions must all be IRI type")
674
+ end
675
+ Algebra::IriStemRange.new(:wildcard, value.unshift(:exclusions))
676
+ when Algebra::LiteralStem, RDF::Literal
677
+ unless value.all? {|e| e.is_a?(Algebra::LiteralStem) || e.is_a?(RDF::Literal)}
678
+ error(nil, "Exclusions must all be Literal type")
576
679
  end
680
+ Algebra::LiteralStemRange.new(:wildcard, value.unshift(:exclusions))
681
+ else
682
+ unless value.all? {|e| e.is_a?(Algebra::LanguageStem) || e.is_a?(String)}
683
+ error(nil, "Exclusions must all be Language type")
684
+ end
685
+ Algebra::LanguageStemRange.new(:wildcard, value.unshift(:exclusions))
577
686
  end
578
- (input[:valueSetValue] ||= []) << Algebra::Value.new(range)
579
687
  end
580
688
 
581
689
  # [50] exclusion ::= '-' (iri | literal | LANGTAG) '~'?
582
- production(:exclusion) do |input, data, callback|
583
- (input[:exclusion] ||= []) << if data[:pattern]
584
- case
585
- when data[:iri] then Algebra::IriStem.new(data[:iri])
586
- when data[:literal] then Algebra::LiteralStem.new(data[:literal])
587
- when data[:language] then Algebra::LanguageStem.new(data[:language])
690
+ start_production(:exclusion, as_hash: true)
691
+ production(:exclusion) do |value|
692
+ if value[:_exclusion_2]
693
+ case value[:_exclusion_1]
694
+ when RDF::URI then Algebra::IriStem.new(value[:_exclusion_1])
695
+ when RDF::Literal then Algebra::LiteralStem.new(value[:_exclusion_1])
696
+ else Algebra::LanguageStem.new(value[:_exclusion_1])
588
697
  end
589
698
  else
590
- data[:iri] || data[:literal] || data[:language]
699
+ value[:_exclusion_1]
591
700
  end
592
701
  end
593
702
 
594
703
  # [51] iriRange ::= iri ('~' iriExclusion*)?
595
- production(:iriRange) do |input, data, callback|
596
- exclusions = data[:exclusion].unshift(:exclusions) if data[:exclusion]
597
- input[:iriRange] = if data[:pattern] && exclusions
598
- Algebra::IriStemRange.new(data[:iri], exclusions)
599
- elsif data[:pattern]
600
- Algebra::IriStem.new(data[:iri])
601
- elsif data[:dot]
602
- Algebra::IriStemRange.new(:wildcard, exclusions)
704
+ production(:iriRange) do |value|
705
+ iri = value.first[:iri]
706
+ if value.last[:_iriRange_1]
707
+ exclusions = value.last[:_iriRange_1].last[:_iriRange_3]
708
+ if exclusions.empty?
709
+ Algebra::IriStem.new(iri)
710
+ else
711
+ Algebra::IriStemRange.new(iri, exclusions.unshift(:exclusions))
712
+ end
603
713
  else
604
- data[:iri]
714
+ iri
605
715
  end
606
716
  end
607
717
 
608
718
  # [52] iriExclusion ::= '-' iri '~'?
609
- production(:iriExclusion) do |input, data, callback|
610
- val = data[:iri]
611
- (input[:exclusion] ||= []) << (data[:pattern] ? Algebra::IriStem.new(val) : val)
719
+ start_production(:iriExclusion, as_hash: true)
720
+ production(:iriExclusion) do |value|
721
+ value[:_iriExclusion_1] ? Algebra::IriStem.new(value[:iri]) : value[:iri]
612
722
  end
613
723
 
614
724
  # [53] literalRange ::= literal ('~' literalExclusion*)?
615
- production(:literalRange) do |input, data, callback|
616
- exclusions = data[:exclusion].unshift(:exclusions) if data[:exclusion]
617
- input[:literalRange] = if data[:pattern] && exclusions
618
- Algebra::LiteralStemRange.new(data[:literal], exclusions)
619
- elsif data[:pattern]
620
- Algebra::LiteralStem.new(data[:literal])
621
- elsif data[:dot]
622
- Algebra::LiteralStemRange.new(:wildcard, exclusions)
725
+ production(:literalRange) do |value|
726
+ lit = value.first[:literal]
727
+ if value.last[:_literalRange_1]
728
+ exclusions = value.last[:_literalRange_1].last[:_literalRange_3]
729
+ # FIXME Algebra::LiteralStemRange.new(:wildcard, exclusions)
730
+ if exclusions.empty?
731
+ Algebra::LiteralStem.new(lit)
732
+ else
733
+ Algebra::LiteralStemRange.new(lit, exclusions.unshift(:exclusions))
734
+ end
623
735
  else
624
- data[:literal]
736
+ lit
625
737
  end
626
738
  end
627
739
 
628
740
  # [54] literalExclusion ::= '-' literal '~'?
629
- production(:literalExclusion) do |input, data, callback|
630
- val = data[:literal]
631
- (input[:exclusion] ||= []) << (data[:pattern] ? Algebra::LiteralStem.new(val) : val)
741
+ start_production(:literalExclusion, as_hash: true)
742
+ production(:literalExclusion) do |value|
743
+ val = value[:literal]
744
+ value[:_literalExclusion_1] ? Algebra::LiteralStem.new(val) : val
632
745
  end
633
746
 
634
747
  # [55] languageRange ::= LANGTAG ('~' languageExclusion*)?
635
- production(:languageRange) do |input, data, callback|
636
- exclusions = data[:exclusion].unshift(:exclusions) if data[:exclusion]
637
- input[:languageRange] = if data[:pattern] && exclusions
638
- Algebra::LanguageStemRange.new(data[:language], exclusions)
639
- elsif data[:pattern]
640
- Algebra::LanguageStem.new(data[:language])
641
- elsif data[:dot]
642
- Algebra::LanguageStemRange.new(:wildcard, exclusions)
748
+ start_production(:languageRange, as_hash: true)
749
+ production(:languageRange) do |value|
750
+ exclusions = value[:_languageRange_1] if value[:_languageRange_1]
751
+ pattern = !!value[:_languageRange_1]
752
+ if pattern && exclusions.empty?
753
+ Algebra::LanguageStem.new(value[:LANGTAG])
754
+ elsif pattern
755
+ Algebra::LanguageStemRange.new(value[:LANGTAG], exclusions.unshift(:exclusions))
643
756
  else
644
- Algebra::Language.new(data[:language])
757
+ Algebra::Language.new(value[:LANGTAG])
645
758
  end
646
759
  end
760
+ start_production(:_languageRange_2, as_hash: true)
761
+ production(:_languageRange_2) do |value|
762
+ value[:_languageRange_3]
763
+ end
647
764
 
648
- # [56] languageExclusion ::= '-' literal '~'?
649
- production(:languageExclusion) do |input, data, callback|
650
- val = data[:language]
651
- (input[:exclusion] ||= []) << (data[:pattern] ? Algebra::LanguageStem.new(val) : val)
765
+ # [56] languageExclusion ::= '-' LANGTAG '~'?
766
+ start_production(:languageExclusion, as_hash: true)
767
+ production(:languageExclusion) do |value|
768
+ val = value[:LANGTAG]
769
+ value[:_languageExclusion_1] ? Algebra::LanguageStem.new(val) : val
652
770
  end
653
771
 
654
- # [57] include ::= '&' shapeLabel
655
- production(:include) do |input, data, callback|
656
- input[:tripleExpression] = data[:shapeLabel].first
772
+ # [57] include ::= '&' tripleExprLabel
773
+ production(:include) do |value|
774
+ value.last[:tripleExprLabel]
657
775
  end
658
776
 
659
777
  # [58] annotation ::= '//' predicate (iri | literal)
660
- production(:annotation) do |input, data, callback|
661
- annotation = Algebra::Annotation.new([:predicate, data[:predicate].first], (data[:iri] || data[:literal]))
662
- (input[:annotation] ||= []) << annotation
778
+ start_production(:annotation, as_hash: true)
779
+ production(:annotation) do |value|
780
+ Algebra::Annotation.new([:predicate, value[:predicate]], value[:_annotation_1])
663
781
  end
664
782
 
665
783
  # [59] semanticActions ::= codeDecl*
666
784
 
667
785
  # [60] codeDecl ::= '%' iri (CODE | "%")
668
- production(:codeDecl) do |input, data, callback|
669
- (input[:codeDecl] ||= []) << Algebra::SemAct.new(*[data[:iri], data[:code]].compact, {})
786
+ start_production(:codeDecl, as_hash: true)
787
+ production(:codeDecl) do |value|
788
+ code = value[:_codeDecl_1] unless value[:_codeDecl_1] == '%'
789
+ Algebra::SemAct.new(*[value[:iri], code].compact)
670
790
  end
671
791
 
672
792
  # [13t] literal ::= rdfLiteral | numericLiteral | booleanLiteral
673
793
 
674
794
  # [61] predicate ::= iri | RDF_TYPE
675
- production(:predicate) do |input, data, callback|
676
- (input[:predicate] ||= []) << data[:iri]
795
+ production(:predicate) do |value|
796
+ value
677
797
  end
678
798
 
679
799
  # [62] datatype ::= iri
680
- production(:datatype) do |input, data, callback|
681
- input[:datatype] = data[:iri]
682
- end
683
-
684
- # [63] shapeLabel ::= iri | blankNode
685
- production(:shapeLabel) do |input, data, callback|
686
- (input[:shapeLabel] ||= []) << (data[:iri] || data[:blankNode])
800
+ production(:datatype) do |value|
801
+ value.first[:iri]
687
802
  end
688
803
 
804
+ # [63] shapeExprLabel ::= iri | blankNode
689
805
  # [16t] numericLiteral ::= INTEGER | DECIMAL | DOUBLE
690
- # [129s] rdfLiteral ::= langString | string ('^^' datatype)?
691
- production(:rdfLiteral) do |input, data, callback|
692
- input[:literal] = literal(data[:string], data)
806
+ # [65] rdfLiteral ::= langString | string ('^^' datatype)?
807
+ production(:rdfLiteral) do |value|
808
+ literal(*value)
809
+ end
810
+ start_production(:_rdfLiteral_1, as_hash: true)
811
+ production(:_rdfLiteral_1) do |value|
812
+ [value[:string], {datatype: value[:_rdfLiteral_2]}]
813
+ end
814
+ production(:_rdfLiteral_3) do |value|
815
+ value.last[:datatype]
693
816
  end
694
817
 
695
818
  # [134s] booleanLiteral ::= 'true' | 'false'
819
+ production(:booleanLiteral) do |value|
820
+ literal(value == 'true')
821
+ end
822
+
696
823
  # [135s] string ::= STRING_LITERAL1 | STRING_LITERAL_LONG1
697
824
  # | STRING_LITERAL2 | STRING_LITERAL_LONG2
698
825
  # [66] langString ::= LANG_STRING_LITERAL1 | LANG_STRING_LITERAL_LONG1
699
826
  # | LANG_STRING_LITERAL2 | LANG_STRING_LITERAL_LONG2
700
827
  # [136s] iri ::= IRIREF | prefixedName
701
828
  # [1372] prefixedName ::= PNAME_LN | PNAME_NS
829
+ production(:prefixedName) do |value|
830
+ value.is_a?(RDF::URI) ? value : ns(value, '')
831
+ end
832
+
702
833
  # [138s] blankNode ::= BLANK_NODE_LABEL
834
+ production(:blankNode) do |value|
835
+ value.first[:BLANK_NODE_LABEL]
836
+ end
703
837
 
704
838
  ##
705
839
  # Initializes a new parser instance.
@@ -730,7 +864,7 @@ module ShEx
730
864
  # @raise [ShEx::NotSatisfied] if not satisfied
731
865
  # @raise [ShEx::ParseError] when a syntax error is detected
732
866
  # @raise [ShEx::StructureError, ArgumentError] on structural problems with schema
733
- def initialize(input = nil, options = {}, &block)
867
+ def initialize(input = nil, **options, &block)
734
868
  @input = case input
735
869
  when IO, StringIO then input.read
736
870
  else input.to_s.dup
@@ -758,7 +892,7 @@ module ShEx
758
892
  @result.to_sxp
759
893
  end
760
894
 
761
- alias_method :ll1_parse, :parse
895
+ alias_method :peg_parse, :parse
762
896
 
763
897
  # Parse query
764
898
  #
@@ -775,49 +909,19 @@ module ShEx
775
909
  # @return [ShEx::Algebra::Schema] The executable parsed expression.
776
910
  # @raise [ShEx::ParseError] when a syntax error is detected
777
911
  # @raise [ShEx::StructureError, ArgumentError] on structural problems with schema
778
- # @see http://www.w3.org/TR/sparql11-query/#sparqlAlgebra
779
- # @see http://axel.deri.ie/sparqltutorial/ESWC2007_SPARQL_Tutorial_unit2b.pdf
780
- def parse(prod = START)
781
- ll1_parse(@input, prod.to_sym, @options.merge(branch: BRANCH,
782
- first: FIRST,
783
- follow: FOLLOW,
784
- whitespace: WS)
785
- ) do |context, *data|
786
- case context
787
- when :trace
788
- if options[:logger]
789
- level, lineno, depth, *args = data
790
- case level
791
- when 0
792
- log_error(*args, depth: depth, lineno: lineno)
793
- when 1
794
- log_warn(*args, depth: depth, lineno: lineno)
795
- when 2
796
- log_info(*args, depth: depth, lineno: lineno)
797
- else
798
- log_debug(*args, depth: depth, lineno: lineno)
799
- end
800
- end
801
- end
802
- end
803
-
804
- # The last thing on the @prod_data stack is the result
805
- @result = case
806
- when !prod_data.is_a?(Hash)
807
- prod_data
808
- when prod_data.empty?
809
- nil
810
- when prod_data[:schema]
811
- prod_data[:schema]
812
- else
813
- key = prod_data.keys.first
814
- [key] + Array(prod_data[key]) # Creates [:key, [:triple], ...]
815
- end
912
+ # @see https://www.w3.org/TR/sparql11-query/#sparqlAlgebra
913
+ # @see https://axel.deri.ie/sparqltutorial/ESWC2007_SPARQL_Tutorial_unit2b.pdf
914
+ def parse(prod = :shexDoc)
915
+ @result = peg_parse(@input,
916
+ prod.to_sym,
917
+ ShEx::Meta::RULES,
918
+ whitespace: WS,
919
+ **@options)
816
920
 
817
921
  # Validate resulting expression
818
922
  @result.validate! if @result && validate?
819
923
  @result
820
- rescue EBNF::LL1::Parser::Error, EBNF::LL1::Lexer::Error => e
924
+ rescue EBNF::PEG::Parser::Error, EBNF::LL1::Lexer::Error => e
821
925
  raise ShEx::ParseError, e.message, e.backtrace
822
926
  end
823
927
 
@@ -929,7 +1033,7 @@ module ShEx
929
1033
  end
930
1034
 
931
1035
  # Create a literal
932
- def literal(value, options = {})
1036
+ def literal(value, **options)
933
1037
  options = options.dup
934
1038
  # Internal representation is to not use xsd:string, although it could arguably go the other way.
935
1039
  options.delete(:datatype) if options[:datatype] == RDF::XSD.string
@@ -938,7 +1042,7 @@ module ShEx
938
1042
  "options: #{options.inspect}, " +
939
1043
  "validate: #{validate?.inspect}, "
940
1044
  end
941
- RDF::Literal.new(value, options.merge(validate: validate?))
1045
+ RDF::Literal.new(value, **options.merge(validate: validate?))
942
1046
  end
943
1047
  end # class Parser
944
1048
  end # module ShEx