rdf-n3 2.2.0 → 3.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +192 -69
  3. data/UNLICENSE +1 -1
  4. data/VERSION +1 -1
  5. data/lib/rdf/n3.rb +11 -8
  6. data/lib/rdf/n3/algebra.rb +204 -0
  7. data/lib/rdf/n3/algebra/builtin.rb +79 -0
  8. data/lib/rdf/n3/algebra/formula.rb +446 -0
  9. data/lib/rdf/n3/algebra/list/append.rb +42 -0
  10. data/lib/rdf/n3/algebra/list/first.rb +24 -0
  11. data/lib/rdf/n3/algebra/list/in.rb +48 -0
  12. data/lib/rdf/n3/algebra/list/last.rb +24 -0
  13. data/lib/rdf/n3/algebra/list/length.rb +24 -0
  14. data/lib/rdf/n3/algebra/list/member.rb +44 -0
  15. data/lib/rdf/n3/algebra/list_operator.rb +83 -0
  16. data/lib/rdf/n3/algebra/log/conclusion.rb +65 -0
  17. data/lib/rdf/n3/algebra/log/conjunction.rb +36 -0
  18. data/lib/rdf/n3/algebra/log/content.rb +34 -0
  19. data/lib/rdf/n3/algebra/log/equal_to.rb +34 -0
  20. data/lib/rdf/n3/algebra/log/implies.rb +102 -0
  21. data/lib/rdf/n3/algebra/log/includes.rb +70 -0
  22. data/lib/rdf/n3/algebra/log/n3_string.rb +34 -0
  23. data/lib/rdf/n3/algebra/log/not_equal_to.rb +23 -0
  24. data/lib/rdf/n3/algebra/log/not_includes.rb +27 -0
  25. data/lib/rdf/n3/algebra/log/output_string.rb +40 -0
  26. data/lib/rdf/n3/algebra/log/parsed_as_n3.rb +36 -0
  27. data/lib/rdf/n3/algebra/log/semantics.rb +40 -0
  28. data/lib/rdf/n3/algebra/math/absolute_value.rb +36 -0
  29. data/lib/rdf/n3/algebra/math/acos.rb +26 -0
  30. data/lib/rdf/n3/algebra/math/acosh.rb +26 -0
  31. data/lib/rdf/n3/algebra/math/asin.rb +26 -0
  32. data/lib/rdf/n3/algebra/math/asinh.rb +26 -0
  33. data/lib/rdf/n3/algebra/math/atan.rb +26 -0
  34. data/lib/rdf/n3/algebra/math/atanh.rb +26 -0
  35. data/lib/rdf/n3/algebra/math/ceiling.rb +28 -0
  36. data/lib/rdf/n3/algebra/math/cos.rb +40 -0
  37. data/lib/rdf/n3/algebra/math/cosh.rb +38 -0
  38. data/lib/rdf/n3/algebra/math/difference.rb +40 -0
  39. data/lib/rdf/n3/algebra/math/equal_to.rb +54 -0
  40. data/lib/rdf/n3/algebra/math/exponentiation.rb +35 -0
  41. data/lib/rdf/n3/algebra/math/floor.rb +28 -0
  42. data/lib/rdf/n3/algebra/math/greater_than.rb +41 -0
  43. data/lib/rdf/n3/algebra/math/less_than.rb +41 -0
  44. data/lib/rdf/n3/algebra/math/negation.rb +38 -0
  45. data/lib/rdf/n3/algebra/math/not_equal_to.rb +25 -0
  46. data/lib/rdf/n3/algebra/math/not_greater_than.rb +25 -0
  47. data/lib/rdf/n3/algebra/math/not_less_than.rb +25 -0
  48. data/lib/rdf/n3/algebra/math/product.rb +20 -0
  49. data/lib/rdf/n3/algebra/math/quotient.rb +36 -0
  50. data/lib/rdf/n3/algebra/math/remainder.rb +35 -0
  51. data/lib/rdf/n3/algebra/math/rounded.rb +26 -0
  52. data/lib/rdf/n3/algebra/math/sin.rb +40 -0
  53. data/lib/rdf/n3/algebra/math/sinh.rb +38 -0
  54. data/lib/rdf/n3/algebra/math/sum.rb +40 -0
  55. data/lib/rdf/n3/algebra/math/tan.rb +40 -0
  56. data/lib/rdf/n3/algebra/math/tanh.rb +38 -0
  57. data/lib/rdf/n3/algebra/not_implemented.rb +13 -0
  58. data/lib/rdf/n3/algebra/resource_operator.rb +123 -0
  59. data/lib/rdf/n3/algebra/str/concatenation.rb +27 -0
  60. data/lib/rdf/n3/algebra/str/contains.rb +33 -0
  61. data/lib/rdf/n3/algebra/str/contains_ignoring_case.rb +33 -0
  62. data/lib/rdf/n3/algebra/str/ends_with.rb +33 -0
  63. data/lib/rdf/n3/algebra/str/equal_ignoring_case.rb +34 -0
  64. data/lib/rdf/n3/algebra/str/format.rb +17 -0
  65. data/lib/rdf/n3/algebra/str/greater_than.rb +38 -0
  66. data/lib/rdf/n3/algebra/str/less_than.rb +33 -0
  67. data/lib/rdf/n3/algebra/str/matches.rb +37 -0
  68. data/lib/rdf/n3/algebra/str/not_equal_ignoring_case.rb +17 -0
  69. data/lib/rdf/n3/algebra/str/not_greater_than.rb +17 -0
  70. data/lib/rdf/n3/algebra/str/not_less_than.rb +17 -0
  71. data/lib/rdf/n3/algebra/str/not_matches.rb +18 -0
  72. data/lib/rdf/n3/algebra/str/replace.rb +35 -0
  73. data/lib/rdf/n3/algebra/str/scrape.rb +35 -0
  74. data/lib/rdf/n3/algebra/str/starts_with.rb +33 -0
  75. data/lib/rdf/n3/algebra/time/day.rb +35 -0
  76. data/lib/rdf/n3/algebra/time/day_of_week.rb +27 -0
  77. data/lib/rdf/n3/algebra/time/gm_time.rb +29 -0
  78. data/lib/rdf/n3/algebra/time/hour.rb +35 -0
  79. data/lib/rdf/n3/algebra/time/in_seconds.rb +59 -0
  80. data/lib/rdf/n3/algebra/time/local_time.rb +29 -0
  81. data/lib/rdf/n3/algebra/time/minute.rb +35 -0
  82. data/lib/rdf/n3/algebra/time/month.rb +35 -0
  83. data/lib/rdf/n3/algebra/time/second.rb +35 -0
  84. data/lib/rdf/n3/algebra/time/timezone.rb +36 -0
  85. data/lib/rdf/n3/algebra/time/year.rb +29 -0
  86. data/lib/rdf/n3/extensions.rb +221 -0
  87. data/lib/rdf/n3/format.rb +66 -1
  88. data/lib/rdf/n3/list.rb +630 -0
  89. data/lib/rdf/n3/reader.rb +834 -492
  90. data/lib/rdf/n3/reasoner.rb +282 -0
  91. data/lib/rdf/n3/refinements.rb +178 -0
  92. data/lib/rdf/n3/repository.rb +332 -0
  93. data/lib/rdf/n3/terminals.rb +80 -0
  94. data/lib/rdf/n3/vocab.rb +36 -3
  95. data/lib/rdf/n3/writer.rb +476 -239
  96. metadata +187 -68
  97. data/AUTHORS +0 -1
  98. data/History.markdown +0 -99
  99. data/lib/rdf/n3/patches/array_hacks.rb +0 -53
  100. data/lib/rdf/n3/reader/meta.rb +0 -641
  101. data/lib/rdf/n3/reader/parser.rb +0 -237
@@ -1,4 +1,7 @@
1
1
  # coding: utf-8
2
+ require 'rdf/reader'
3
+ require 'ebnf'
4
+
2
5
  module RDF::N3
3
6
  ##
4
7
  # A Notation-3/Turtle parser in Ruby
@@ -9,6 +12,10 @@ module RDF::N3
9
12
  #
10
13
  # Separate pass to create branch_table from n3-selectors.n3
11
14
  #
15
+ # This implementation uses distinguished variables for both universal and explicit existential variables (defined with `@forSome`). Variables created from blank nodes are non-distinguished. Distinguished existential variables are named using an `_ext` suffix, internally, as the RDF `query_pattern` logic looses details of the variable definition in solutions, where the variable is represented using a symbol.
16
+ #
17
+ # Non-distinguished blank node variables are created as part of reasoning.
18
+ #
12
19
  # @todo
13
20
  # * Formulae as RDF::Query representations
14
21
  # * Formula expansion similar to SPARQL Construct
@@ -16,12 +23,41 @@ module RDF::N3
16
23
  # @author [Gregg Kellogg](http://greggkellogg.net/)
17
24
  class Reader < RDF::Reader
18
25
  format Format
26
+ using Refinements
19
27
 
20
28
  include RDF::Util::Logger
21
- include Meta
22
- include Parser
23
-
24
- N3_KEYWORDS = %w(a is of has keywords prefix base true false forSome forAny)
29
+ include EBNF::LL1::Parser
30
+ include Terminals
31
+
32
+ # Nodes used as Formulae graph names
33
+ #
34
+ # @return [Array<RDF::Node>]
35
+ attr_reader :formulae
36
+
37
+ # All nodes allocated to formulae
38
+ #
39
+ # @return [Hash{RDF::Node => RDF::Graph}]
40
+ attr_reader :formula_nodes
41
+
42
+ # Allocated variables by formula
43
+ #
44
+ # @return [Hash{Symbol => RDF::Node}]
45
+ attr_reader :variables
46
+
47
+ ##
48
+ # N3 Reader options
49
+ # @see http://www.rubydoc.info/github/ruby-rdf/rdf/RDF/Reader#options-class_method
50
+ def self.options
51
+ super + [
52
+ RDF::CLI::Option.new(
53
+ symbol: :list_terms,
54
+ datatype: TrueClass,
55
+ default: true,
56
+ control: :checkbox,
57
+ on: ["--list-terms CONTEXT"],
58
+ description: "Use native collections (lists), not first/rest ladder.")
59
+ ]
60
+ end
25
61
 
26
62
  ##
27
63
  # Initializes the N3 reader instance.
@@ -34,43 +70,55 @@ module RDF::N3
34
70
  # @option options [Boolean] :validate (false)
35
71
  # whether to validate the parsed statements and values
36
72
  # @option options [Boolean] :canonicalize (false)
37
- # whether to canonicalize parsed literals
38
- # @option options [Boolean] :intern (true)
39
- # whether to intern all parsed URIs
73
+ # whether to canonicalize parsed literals and URIs.
40
74
  # @option options [Hash] :prefixes (Hash.new)
41
75
  # the prefix mappings to use (not supported by all readers)
76
+ # @option options [Hash] :list_terms (false)
77
+ # represent collections as an `RDF::Term`, rather than an rdf:first/rest ladder.
42
78
  # @return [reader]
43
79
  # @yield [reader] `self`
44
80
  # @yieldparam [RDF::Reader] reader
45
81
  # @yieldreturn [void] ignored
46
82
  # @raise [Error]:: Raises RDF::ReaderError if validating and an error is found
47
- def initialize(input = $stdin, options = {}, &block)
83
+ def initialize(input = $stdin, **options, &block)
48
84
  super do
49
- input.rewind if input.respond_to?(:rewind)
50
- @input = input.respond_to?(:read) ? input : StringIO.new(input.to_s)
51
- @lineno = 0
52
- readline # Prime the pump
53
-
54
- @memo = {}
55
- @keyword_mode = false
56
- @keywords = %w(a is of this has)
57
- @productions = []
58
- @prod_data = []
59
-
60
- @branches = BRANCHES # Get from meta class
61
- @regexps = REGEXPS # Get from meta class
62
-
63
- @formulae = [] # Nodes used as Formluae graph names
64
- @formulae_nodes = {}
65
- @variables = {} # variable definitions along with defining formula
85
+ @options = {
86
+ anon_base: "b0",
87
+ whitespace: WS,
88
+ depth: 0,
89
+ }.merge(@options)
90
+ @prod_stack = []
91
+
92
+ @formulae = []
93
+ @formula_nodes = {}
94
+ @label_uniquifier = "0"
95
+ @bnodes = {}
96
+ @bn_labler = @options[:anon_base].dup
97
+ @bn_mapper = {}
98
+ @variables = {}
66
99
 
67
100
  if options[:base_uri]
68
- log_debug("@uri") { base_uri.inspect}
69
- namespace(nil, uri("#{base_uri}#"))
101
+ progress("base_uri") { base_uri.inspect}
102
+ namespace(nil, iri(base_uri.to_s.match?(%r{[#/]$}) ? base_uri : "#{base_uri}#"))
103
+ end
104
+
105
+ # Prepopulate operator namespaces unless validating
106
+ unless validate?
107
+ namespace(:rdf, RDF.to_uri)
108
+ namespace(:rdfs, RDF::RDFS.to_uri)
109
+ namespace(:xsd, RDF::XSD.to_uri)
110
+ namespace(:crypto, RDF::N3::Crypto.to_uri)
111
+ namespace(:list, RDF::N3::List.to_uri)
112
+ namespace(:log, RDF::N3::Log.to_uri)
113
+ namespace(:math, RDF::N3::Math.to_uri)
114
+ namespace(:rei, RDF::N3::Rei.to_uri)
115
+ #namespace(:string, RDF::N3::String.to_uri)
116
+ namespace(:time, RDF::N3::Time.to_uri)
70
117
  end
71
- log_debug("validate") {validate?.inspect}
72
- log_debug("canonicalize") {canonicalize?.inspect}
73
- log_debug("intern") {intern?.inspect}
118
+ progress("validate") {validate?.inspect}
119
+ progress("canonicalize") {canonicalize?.inspect}
120
+
121
+ @lexer = EBNF::LL1::Lexer.new(input, self.class.patterns, **@options)
74
122
 
75
123
  if block_given?
76
124
  case block.arity
@@ -87,23 +135,29 @@ module RDF::N3
87
135
 
88
136
  ##
89
137
  # Iterates the given block for each RDF statement in the input.
90
- #
91
138
  # @yield [statement]
92
139
  # @yieldparam [RDF::Statement] statement
93
140
  # @return [void]
94
141
  def each_statement(&block)
95
142
  if block_given?
143
+ log_recover
96
144
  @callback = block
97
145
 
98
- parse(START.to_sym)
146
+ begin
147
+ while (@lexer.first rescue true)
148
+ read_n3Doc
149
+ end
150
+ rescue EBNF::LL1::Lexer::Error, SyntaxError, EOFError, Recovery
151
+ # Terminate loop if EOF found while recovering
152
+ end
99
153
 
100
154
  if validate? && log_statistics[:error]
101
155
  raise RDF::ReaderError, "Errors found during processing"
102
156
  end
103
157
  end
104
- enum_for(:each_triple)
158
+ enum_for(:each_statement)
105
159
  end
106
-
160
+
107
161
  ##
108
162
  # Iterates the given block for each RDF triple in the input.
109
163
  #
@@ -112,570 +166,858 @@ module RDF::N3
112
166
  # @yieldparam [RDF::URI] predicate
113
167
  # @yieldparam [RDF::Value] object
114
168
  # @return [void]
115
- def each_triple(&block)
169
+ def each_triple
116
170
  if block_given?
117
171
  each_statement do |statement|
118
- block.call(*statement.to_triple)
172
+ yield(*statement.to_triple)
119
173
  end
120
174
  end
121
175
  enum_for(:each_triple)
122
176
  end
123
-
177
+
124
178
  protected
125
- # Start of production
126
- def onStart(prod)
127
- handler = "#{prod}Start".to_sym
128
- log_debug("#{handler}(#{respond_to?(handler, true)})", prod)
129
- @productions << prod
130
- send(handler, prod) if respond_to?(handler, true)
131
- end
132
-
133
- # End of production
134
- def onFinish
135
- prod = @productions.pop()
136
- handler = "#{prod}Finish".to_sym
137
- log_debug("#{handler}(#{respond_to?(handler, true)})") {"#{prod}: #{@prod_data.last.inspect}"}
138
- send(handler) if respond_to?(handler, true)
139
- end
140
-
141
- # Process of a token
142
- def onToken(prod, tok)
143
- unless @productions.empty?
144
- parentProd = @productions.last
145
- handler = "#{parentProd}Token".to_sym
146
- log_debug("#{handler}(#{respond_to?(handler, true)})") {"#{prod}, #{tok}: #{@prod_data.last.inspect}"}
147
- send(handler, prod, tok) if respond_to?(handler, true)
148
- else
149
- error("Token has no parent production")
179
+
180
+ # Terminals passed to lexer. Order matters!
181
+
182
+ # @!parse none
183
+ terminal(:ANON, ANON)
184
+ terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL)
185
+ terminal(:IRIREF, IRIREF, unescape: true)
186
+ terminal(:DOUBLE, DOUBLE)
187
+ terminal(:DECIMAL, DECIMAL)
188
+ terminal(:INTEGER, INTEGER)
189
+ terminal(:PNAME_LN, PNAME_LN, unescape: true)
190
+ terminal(:PNAME_NS, PNAME_NS)
191
+ terminal(:STRING_LITERAL_LONG_SINGLE_QUOTE, STRING_LITERAL_LONG_SINGLE_QUOTE, unescape: true, partial_regexp: /^'''/)
192
+ terminal(:STRING_LITERAL_LONG_QUOTE, STRING_LITERAL_LONG_QUOTE, unescape: true, partial_regexp: /^"""/)
193
+ terminal(:STRING_LITERAL_QUOTE, STRING_LITERAL_QUOTE, unescape: true)
194
+ terminal(:STRING_LITERAL_SINGLE_QUOTE, STRING_LITERAL_SINGLE_QUOTE, unescape: true)
195
+
196
+ # String terminals
197
+ terminal(nil, %r(
198
+ [\(\){},.;\[\]a!]
199
+ | \^\^|\^
200
+ |<-|<=|=>|=
201
+ | true|false
202
+ | has|is|of
203
+ |@forAll|@forSome
204
+ )x)
205
+
206
+ terminal(:PREFIX, PREFIX)
207
+ terminal(:BASE, BASE)
208
+ terminal(:LANGTAG, LANGTAG)
209
+ terminal(:QUICK_VAR_NAME, QUICK_VAR_NAME, unescape: true)
210
+
211
+ private
212
+ ##
213
+ # Read statements and directives
214
+ #
215
+ # [1] n3Doc ::= (n3Statement '.' | sparqlDirective)*
216
+ #
217
+ # @return [void]
218
+ def read_n3Doc
219
+ prod(:n3Doc, %w{.}) do
220
+ error("read_n3Doc", "Unexpected end of file") unless token = @lexer.first
221
+ case token.type
222
+ when :BASE, :PREFIX
223
+ read_directive || error("Failed to parse directive", production: :directive, token: token)
224
+ else
225
+ read_n3Statement
226
+ if !log_recovering? || @lexer.first === '.'
227
+ # If recovering, we will have eaten the closing '.'
228
+ token = @lexer.shift
229
+ unless token && token.value == '.'
230
+ error("Expected '.' following n3Statement", production: :n3Statement, token: token)
231
+ end
232
+ end
233
+ end
150
234
  end
151
235
  end
152
-
153
- def booleanToken(prod, tok)
154
- lit = RDF::Literal.new(tok.delete("@"), datatype: RDF::XSD.boolean, validate: validate?, canonicalize: canonicalize?)
155
- add_prod_data(:literal, lit)
156
- end
157
-
158
- def declarationStart(prod)
159
- @prod_data << {}
160
- end
161
-
162
- def declarationToken(prod, tok)
163
- case prod
164
- when "@prefix", "@base", "@keywords"
165
- add_prod_data(:prod, prod)
166
- when "prefix"
167
- add_prod_data(:prefix, tok[0..-2])
168
- when "explicituri"
169
- add_prod_data(:explicituri, tok[1..-2])
170
- else
171
- add_prod_data(prod.to_sym, tok)
236
+
237
+
238
+ ##
239
+ # Read statements and directives
240
+ #
241
+ # [2] n3Statement ::= n3Directive | triples | existential | universal
242
+ #
243
+ # @return [void]
244
+ def read_n3Statement
245
+ prod(:n3Statement, %w{.}) do
246
+ error("read_n3Doc", "Unexpected end of file") unless token = @lexer.first
247
+ read_uniext ||
248
+ read_triples ||
249
+ error("Expected token", production: :statement, token: token)
172
250
  end
173
251
  end
174
252
 
175
- def declarationFinish
176
- decl = @prod_data.pop
177
- case decl[:prod]
178
- when "@prefix"
179
- uri = process_uri(decl[:explicituri])
180
- namespace(decl[:prefix], uri)
181
- when "@base"
182
- # Base, set or update document URI
183
- uri = decl[:explicituri]
184
- options[:base_uri] = process_uri(uri)
185
-
186
- # The empty prefix "" is by default , bound to "#" -- the local namespace of the file.
187
- # The parser behaves as though there were a
188
- # @prefix : <#>.
189
- # just before the file.
190
- # This means that <#foo> can be written :foo and using @keywords one can reduce that to foo.
191
-
192
- namespace(nil, uri.match(/[\/\#]$/) ? base_uri : process_uri("#{uri}#"))
193
- log_debug("declarationFinish[@base]") {"@base=#{base_uri}"}
194
- when "@keywords"
195
- log_debug("declarationFinish[@keywords]") {@keywords.inspect}
196
- # Keywords are handled in tokenizer and maintained in @keywords array
197
- if (@keywords & N3_KEYWORDS) != @keywords
198
- error("Undefined keywords used: #{(@keywords - N3_KEYWORDS).to_sentence}") if validate?
253
+ ##
254
+ # Read base and prefix directives
255
+ #
256
+ # [3] n3Directive ::= prefixID | base
257
+ #
258
+ # @return [void]
259
+ def read_directive
260
+ prod(:directive, %w{.}) do
261
+ token = @lexer.first
262
+ case token.type
263
+ when :BASE
264
+ prod(:base) do
265
+ @lexer.shift
266
+ terminated = token.value == '@base'
267
+ iri = @lexer.shift
268
+ error("Expected IRIREF", production: :base, token: iri) unless iri === :IRIREF
269
+ @options[:base_uri] = process_iri(iri.value[1..-2].gsub(/\s/, ''))
270
+ namespace(nil, base_uri.to_s.end_with?('#') ? base_uri : iri("#{base_uri}#"))
271
+ error("base", "#{token} should be downcased") if token.value.start_with?('@') && token.value != '@base'
272
+
273
+ if terminated
274
+ error("base", "Expected #{token} to be terminated") unless @lexer.first === '.'
275
+ @lexer.shift
276
+ elsif @lexer.first === '.'
277
+ error("base", "Expected #{token} not to be terminated")
278
+ else
279
+ true
280
+ end
281
+ end
282
+ when :PREFIX
283
+ prod(:prefixID, %w{.}) do
284
+ @lexer.shift
285
+ pfx, iri = @lexer.shift, @lexer.shift
286
+ terminated = token.value == '@prefix'
287
+ error("Expected PNAME_NS", production: :prefix, token: pfx) unless pfx === :PNAME_NS
288
+ error("Expected IRIREF", production: :prefix, token: iri) unless iri === :IRIREF
289
+ debug("prefixID", depth: options[:depth]) {"Defined prefix #{pfx.inspect} mapping to #{iri.inspect}"}
290
+ namespace(pfx.value[0..-2], process_iri(iri.value[1..-2].gsub(/\s/, '')))
291
+ error("prefixId", "#{token} should be downcased") if token.value.start_with?('@') && token.value != '@prefix'
292
+
293
+ if terminated
294
+ error("prefixID", "Expected #{token} to be terminated") unless @lexer.first === '.'
295
+ @lexer.shift
296
+ elsif @lexer.first === '.'
297
+ error("prefixID", "Expected #{token} not to be terminated")
298
+ else
299
+ true
300
+ end
301
+ end
199
302
  end
200
- @userkeys = true
201
- else
202
- error("declarationFinish: FIXME #{decl.inspect}")
203
303
  end
204
304
  end
205
-
206
- # Document start, instantiate
207
- def documentStart(prod)
208
- @formulae.push(nil)
209
- @prod_data << {}
210
- end
211
-
212
- def dtlangToken(prod, tok)
213
- add_prod_data(:langcode, tok) if prod == "langcode"
214
- end
215
-
216
- def existentialStart(prod)
217
- @prod_data << {}
218
- end
219
305
 
220
- # Apart from the set of statements, a formula also has a set of URIs of symbols which are universally quantified,
221
- # and a set of URIs of symbols which are existentially quantified.
222
- # Variables are then in general symbols which have been quantified.
306
+ ##
307
+ # Read triples
223
308
  #
224
- # Here we allocate a variable (making up a name) and record with the defining formula. Quantification is done
225
- # when the formula is completed against all in-scope variables
226
- def existentialFinish
227
- pd = @prod_data.pop
228
- forSome = Array(pd[:symbol])
229
- forSome.each do |term|
230
- @variables[term.to_s] = {formula: @formulae.last, var: RDF::Node.new(term.to_s.split(/[\/#]/).last)}
309
+ # [9] triples ::= subject predicateObjectList?
310
+ #
311
+ # @return [Object] returns the last IRI matched, or subject BNode on predicateObjectList?
312
+ def read_triples
313
+ prod(:triples, %w{.}) do
314
+ error("read_triples", "Unexpected end of file") unless token = @lexer.first
315
+ subject = case token.type || token.value
316
+ when '['
317
+ # blankNodePropertyList predicateObjectList?
318
+ read_blankNodePropertyList || error("Failed to parse blankNodePropertyList", production: :triples, token: @lexer.first)
319
+ else
320
+ # subject predicateObjectList
321
+ read_path || error("Failed to parse subject", production: :triples, token: @lexer.first)
322
+ end
323
+ read_predicateObjectList(subject) || subject
231
324
  end
232
325
  end
233
-
234
- def expressionStart(prod)
235
- @prod_data << {}
236
- end
237
-
238
- # Process path items, and push on the last object for parent processing
239
- def expressionFinish
240
- expression = @prod_data.pop
241
-
242
- # If we're in teh middle of a pathtail, append
243
- if @prod_data.last[:pathtail] && expression[:pathitem] && expression[:pathtail]
244
- path_list = [expression[:pathitem]] + expression[:pathtail]
245
- log_debug("expressionFinish(pathtail)") {"set pathtail to #{path_list.inspect}"}
246
- @prod_data.last[:pathtail] = path_list
247
-
248
- dir_list = [expression[:direction]] if expression[:direction]
249
- dir_list += expression[:directiontail] if expression[:directiontail]
250
- @prod_data.last[:directiontail] = dir_list if dir_list
251
- elsif expression[:pathitem] && expression[:pathtail]
252
- add_prod_data(:expression, process_path(expression))
253
- elsif expression[:pathitem]
254
- add_prod_data(:expression, expression[:pathitem])
255
- else
256
- error("expressionFinish: FIXME #{expression.inspect}")
326
+
327
+ ##
328
+ # Read predicateObjectList
329
+ #
330
+ # [10] predicateObjectList ::= verb objectList (';' (verb objectList)?)*
331
+ #
332
+ # @param [RDF::Resource] subject
333
+ # @return [RDF::URI] the last matched verb
334
+ def read_predicateObjectList(subject)
335
+ return if @lexer.first.nil? || %w(. }).include?(@lexer.first.value)
336
+ prod(:predicateObjectList, %{;}) do
337
+ last_verb = nil
338
+ loop do
339
+ verb, invert = read_verb
340
+ break unless verb
341
+ last_verb = verb
342
+ prod(:_predicateObjectList_2) do
343
+ read_objectList(subject, verb, invert) || error("Expected objectList", production: :predicateObjectList, token: @lexer.first)
344
+ end
345
+ break unless @lexer.first === ';'
346
+ @lexer.shift while @lexer.first === ';'
347
+ end
348
+ last_verb
257
349
  end
258
350
  end
259
-
260
- def literalStart(prod)
261
- @prod_data << {}
262
- end
263
-
264
- def literalToken(prod, tok)
265
- tok = tok[0, 3] == '"""' ? tok[3..-4] : tok[1..-2]
266
- add_prod_data(:string, tok)
267
- end
268
-
269
- def literalFinish
270
- lit = @prod_data.pop
271
- content = RDF::NTriples.unescape(lit[:string])
272
- language = lit[:langcode] if lit[:langcode]
273
- language = language.downcase if language && canonicalize?
274
- datatype = lit[:symbol]
275
-
276
- lit = RDF::Literal.new(content, language: language, datatype: datatype, validate: validate?, canonicalize: canonicalize?)
277
- add_prod_data(:literal, lit)
278
- end
279
-
280
- def objectStart(prod)
281
- @prod_data << {}
282
- end
283
-
284
- def objectFinish
285
- object = @prod_data.pop
286
- if object[:expression]
287
- add_prod_data(:object, object[:expression])
288
- else
289
- error("objectFinish: FIXME #{object.inspect}")
351
+
352
+ ##
353
+ # Read objectList
354
+ #
355
+ # [11] objectList ::= object (',' object)*
356
+ #
357
+ # @return [RDF::Term] the last matched subject
358
+ def read_objectList(subject, predicate, invert)
359
+ prod(:objectList, %{,}) do
360
+ last_object = nil
361
+ while object = prod(:_objectList_2) {read_path}
362
+ last_object = object
363
+
364
+ if invert
365
+ add_statement(:objectList, object, predicate, subject)
366
+ else
367
+ add_statement(:objectList, subject, predicate, object)
368
+ end
369
+
370
+ break unless @lexer.first === ','
371
+ @lexer.shift while @lexer.first === ','
372
+ end
373
+ last_object
290
374
  end
291
375
  end
292
-
293
- def pathitemStart(prod)
294
- @prod_data << {}
376
+
377
+ ##
378
+ # Read a verb
379
+ #
380
+ # [12] verb = predicate
381
+ # | 'a'
382
+ # | 'has' expression
383
+ # | 'is' expression 'of'
384
+ # | '<-' expression
385
+ # | '<='
386
+ # | '=>'
387
+ # | '='
388
+ #
389
+ # @return [RDF::Resource, Boolean] verb and invert?
390
+ def read_verb
391
+ invert = false
392
+ error("read_verb", "Unexpected end of file") unless token = @lexer.first
393
+ verb = case token.type || token.value
394
+ when 'a' then prod(:verb) {@lexer.shift && RDF.type}
395
+ when 'has' then prod(:verb) {@lexer.shift && read_path}
396
+ when 'is' then prod(:verb) {
397
+ @lexer.shift
398
+ invert, v = true, read_path
399
+ error( "Expected 'of'", production: :verb, token: @lexer.first) unless @lexer.first.value == 'of'
400
+ @lexer.shift
401
+ v
402
+ }
403
+ when '<-' then prod(:verb) {
404
+ @lexer.shift
405
+ invert = true
406
+ read_path
407
+ }
408
+ when '<=' then prod(:verb) {
409
+ @lexer.shift
410
+ invert = true
411
+ RDF::N3::Log.implies
412
+ }
413
+ when '=>' then prod(:verb) {@lexer.shift && RDF::N3::Log.implies}
414
+ when '=' then prod(:verb) {@lexer.shift && RDF::OWL.sameAs}
415
+ else read_path
416
+ end
417
+ [verb, invert]
295
418
  end
296
-
297
- def pathitemToken(prod, tok)
298
- case prod
299
- when "numericliteral"
300
- nl = RDF::NTriples.unescape(tok)
301
- datatype = case nl
302
- when /e/i then RDF::XSD.double
303
- when /\./ then RDF::XSD.decimal
304
- else RDF::XSD.integer
419
+
420
+ ##
421
+ # subjects, predicates and objects are all expressions, which are all paths
422
+ #
423
+ # [13] subject ::= expression
424
+ # [14] predicate ::= expression
425
+ # [16] expression ::= path
426
+ # [17] path ::= pathItem ('!' path | '^' path)?
427
+ #
428
+ # @return [RDF::Resource]
429
+ def read_path
430
+ return if @lexer.first.nil? || %w/. } ) ]/.include?(@lexer.first.value)
431
+ prod(:path) do
432
+ pathtail = path = {}
433
+ loop do
434
+ pathtail[:pathitem] = prod(:pathItem) do
435
+ read_iri ||
436
+ read_blankNode ||
437
+ read_quickVar ||
438
+ read_collection ||
439
+ read_blankNodePropertyList ||
440
+ read_literal ||
441
+ read_formula
442
+ end
443
+
444
+ break if @lexer.first.nil? || !%w(! ^).include?(@lexer.first.value)
445
+ prod(:_path_2) do
446
+ pathtail[:direction] = @lexer.shift.value == '!' ? :forward : :reverse
447
+ pathtail = pathtail[:pathtail] = {}
448
+ end
305
449
  end
306
-
307
- lit = RDF::Literal.new(nl, datatype: datatype, validate: validate?, canonicalize: canonicalize?)
308
- add_prod_data(:literal, lit)
309
- when "quickvariable"
310
- # There is a also a shorthand syntax ?x which is the same as :x except that it implies that x is
311
- # universally quantified not in the formula but in its parent formula
312
- uri = process_qname(tok.sub('?', ':'))
313
- @variables[uri.to_s] = { formula: @formulae[-2], var: univar(uri) }
314
- add_prod_data(:symbol, uri)
315
- when "boolean"
316
- lit = RDF::Literal.new(tok.delete("@"), datatype: RDF::XSD.boolean, validate: validate?, canonicalize: canonicalize?)
317
- add_prod_data(:literal, lit)
318
- when "[", "("
319
- # Push on state for content of blank node
320
- @prod_data << {}
321
- when "]", ")"
322
- # Construct
323
- symbol = process_anonnode(@prod_data.pop)
324
- add_prod_data(:symbol, symbol)
325
- when "{"
326
- # A new formula, push on a node as a named graph
327
- node = RDF::Node.new
328
- @formulae << node
329
- @formulae_nodes[node] = true
330
- when "}"
331
- # Pop off the formula, and remove any variables defined in this graph
332
- formula = @formulae.pop
333
- @variables.delete_if {|k, v| v[:formula] == formula}
334
- add_prod_data(:symbol, formula)
335
- else
336
- error("pathitemToken(#{prod}, #{tok}): FIXME")
450
+
451
+ # Returns the first object in the path
452
+ # FIXME: what if it's a verb?
453
+ process_path(path)
337
454
  end
338
455
  end
339
456
 
340
- def pathitemFinish
341
- pathitem = @prod_data.pop
342
- if pathitem[:pathlist]
343
- error("pathitemFinish(pathlist): FIXME #{pathitem.inspect}")
344
- elsif pathitem[:propertylist]
345
- error("pathitemFinish(propertylist): FIXME #{pathitem.inspect}")
346
- elsif pathitem[:symbol] || pathitem[:literal]
347
- add_prod_data(:pathitem, pathitem[:symbol] || pathitem[:literal])
348
- else
349
- error("pathitemFinish: FIXME #{pathitem.inspect}")
457
+ ##
458
+ # Read a literal
459
+ #
460
+ # [19] literal ::= rdfLiteral | numericLiteral | BOOLEAN_LITERAL
461
+ #
462
+ # @return [RDF::Literal]
463
+ def read_literal
464
+ error("Unexpected end of file", production: :literal) unless token = @lexer.first
465
+ case token.type || token.value
466
+ when :INTEGER then prod(:literal) {literal(@lexer.shift.value, datatype: RDF::XSD.integer, canonicalize: canonicalize?)}
467
+ when :DECIMAL
468
+ prod(:literal) do
469
+ value = @lexer.shift.value
470
+ value = "0#{value}" if value.start_with?(".")
471
+ literal(value, datatype: RDF::XSD.decimal, canonicalize: canonicalize?)
472
+ end
473
+ when :DOUBLE then prod(:literal) {literal(@lexer.shift.value.sub(/\.([eE])/, '.0\1'), datatype: RDF::XSD.double, canonicalize: canonicalize?)}
474
+ when "true", "false" then prod(:literal) {literal(@lexer.shift.value, datatype: RDF::XSD.boolean, canonicalize: canonicalize?)}
475
+ when :STRING_LITERAL_QUOTE, :STRING_LITERAL_SINGLE_QUOTE
476
+ prod(:literal) do
477
+ value = @lexer.shift.value[1..-2]
478
+ error("read_literal", "Unexpected end of file") unless token = @lexer.first
479
+ case token.type || token.value
480
+ when :LANGTAG
481
+ literal(value, language: @lexer.shift.value[1..-1].to_sym)
482
+ when '^^'
483
+ @lexer.shift
484
+ literal(value, datatype: read_iri)
485
+ else
486
+ literal(value)
487
+ end
488
+ end
489
+ when :STRING_LITERAL_LONG_QUOTE, :STRING_LITERAL_LONG_SINGLE_QUOTE
490
+ prod(:literal) do
491
+ value = @lexer.shift.value[3..-4]
492
+ error("read_literal", "Unexpected end of file") unless token = @lexer.first
493
+ case token.type || token.value
494
+ when :LANGTAG
495
+ literal(value, language: @lexer.shift.value[1..-1].to_sym)
496
+ when '^^'
497
+ @lexer.shift
498
+ literal(value, datatype: read_iri)
499
+ else
500
+ literal(value)
501
+ end
502
+ end
350
503
  end
351
504
  end
352
-
353
- def pathlistStart(prod)
354
- @prod_data << {pathlist: []}
355
- end
356
-
357
- def pathlistFinish
358
- pathlist = @prod_data.pop
359
- # Flatten propertylist into an array
360
- expr = @prod_data.last.delete(:expression)
361
- add_prod_data(:pathlist, expr) if expr
362
- add_prod_data(:pathlist, pathlist[:pathlist]) if pathlist[:pathlist]
363
- end
364
-
365
- def pathtailStart(prod)
366
- @prod_data << {pathtail: []}
367
- end
368
-
369
- def pathtailToken(prod, tok)
370
- case tok
371
- when "!", "."
372
- add_prod_data(:direction, :forward)
373
- when "^"
374
- add_prod_data(:direction, :reverse)
505
+
506
+ ##
507
+ # Read a blankNodePropertyList
508
+ #
509
+ # [20] blankNodePropertyList ::= '[' predicateObjectList ']'
510
+ #
511
+ # @return [RDF::Node]
512
+ def read_blankNodePropertyList
513
+ token = @lexer.first
514
+ if token === '['
515
+ prod(:blankNodePropertyList, %{]}) do
516
+ @lexer.shift
517
+ progress("blankNodePropertyList", depth: options[:depth], token: token)
518
+ node = bnode
519
+ debug("blankNodePropertyList: subject", depth: options[:depth]) {node.to_sxp}
520
+ read_predicateObjectList(node)
521
+ error("blankNodePropertyList", "Expected closing ']'") unless @lexer.first === ']'
522
+ @lexer.shift
523
+ node
524
+ end
375
525
  end
376
526
  end
377
-
378
- def pathtailFinish
379
- pathtail = @prod_data.pop
380
- add_prod_data(:pathtail, pathtail[:pathtail])
381
- add_prod_data(:direction, pathtail[:direction]) if pathtail[:direction]
382
- add_prod_data(:directiontail, pathtail[:directiontail]) if pathtail[:directiontail]
383
- end
384
-
385
- def propertylistStart(prod)
386
- @prod_data << {}
387
- end
388
-
389
- def propertylistFinish
390
- propertylist = @prod_data.pop
391
- # Flatten propertylist into an array
392
- ary = [propertylist, propertylist.delete(:propertylist)].flatten.compact
393
- @prod_data.last[:propertylist] = ary
527
+
528
+ ##
529
+ # Read a collection (`RDF::List`)
530
+ #
531
+ # [21] collection ::= '(' object* ')'
532
+ #
533
+ # If the `list_terms` option is given, the resulting resource is a list, otherwise, it is the list subject, and the first/rest entries are also emitted.
534
+ # @return [RDF::Node]
535
+ def read_collection
536
+ if @lexer.first === '('
537
+ prod(:collection, %{)}) do
538
+ @lexer.shift
539
+ token = @lexer.first
540
+ progress("collection", depth: options[:depth]) {"token: #{token.inspect}"}
541
+ objects = []
542
+ while @lexer.first.value != ')' && (object = read_path)
543
+ objects << object
544
+ end
545
+ error("collection", "Expected closing ')'") unless @lexer.first === ')'
546
+ @lexer.shift
547
+ list = RDF::N3::List.new(values: objects)
548
+ if options[:list_terms]
549
+ list
550
+ else
551
+ list.each_statement do |statement|
552
+ add_statement("collection", *statement.to_a)
553
+ end
554
+ list.subject
555
+ end
556
+ end
557
+ end
394
558
  end
395
-
396
- def simpleStatementStart(prod)
397
- @prod_data << {}
559
+
560
+ ##
561
+ # Read a formula
562
+ #
563
+ # [22] formula ::= '{' formulaContent? '}'
564
+ # [23] formulaContent ::= n3Statement ('.' formulaContent?)?
565
+ #
566
+ # @return [RDF::Node]
567
+ def read_formula
568
+ if @lexer.first === '{'
569
+ prod(:formula, %(})) do
570
+ @lexer.shift
571
+ node = RDF::Node.intern("_form_#{unique_label}")
572
+ formulae.push(node)
573
+ formula_nodes[node] = true
574
+ debug(:formula, depth: @options[:depth]) {"id: #{node}, depth: #{formulae.length}"}
575
+
576
+ # Promote variables defined on the earlier formula to this formula
577
+ variables[node] = {}
578
+ variables.fetch(formulae[-2], {}).each do |name, var|
579
+ variables[node][name] = var
580
+ end
581
+
582
+ read_formulaContent
583
+
584
+ # Pop off the formula
585
+ # Result is the BNode associated with the formula
586
+ debug(:formula, depth: @options[:depth]) {"pop: #{formulae.last}, depth: #{formulae.length}"}
587
+ error("collection", "Expected closing '}'") unless @lexer.shift === '}'
588
+
589
+ formulae.pop
590
+ end
591
+ end
398
592
  end
399
-
400
- # Completion of Simple Statement, all productions include :subject, and :propertyList
401
- def simpleStatementFinish
402
- statement = @prod_data.pop
403
-
404
- subject = statement[:subject]
405
- properties = Array(statement[:propertylist])
406
- properties.each do |p|
407
- predicate = p[:verb]
408
- next unless predicate
409
- log_debug("simpleStatementFinish(pred)") {predicate.to_s}
410
- error(%(Illegal statment: "#{predicate}" missing object)) unless p.has_key?(:object)
411
- objects = Array(p[:object])
412
- objects.each do |object|
413
- if p[:invert]
414
- add_triple("simpleStatementFinish", object, predicate, subject)
593
+
594
+ ##
595
+ # Read formula content, similaer to n3Statement
596
+ #
597
+ # [23] formulaContent ::= n3Statement ('.' formulaContent?)?
598
+ #
599
+ # @return [void]
600
+ def read_formulaContent
601
+ return if @lexer.first === '}' # Allow empty formula
602
+ prod(:formulaContent, %w(. })) do
603
+ loop do
604
+ token = @lexer.first
605
+ error("read_formulaContent", "Unexpected end of file") unless token
606
+ case token.type
607
+ when :BASE, :PREFIX
608
+ read_directive || error("Failed to parse directive", production: :directive, token: token)
609
+ break if @lexer.first === '}'
415
610
  else
416
- add_triple("simpleStatementFinish", subject, predicate, object)
611
+ read_n3Statement
612
+ token = @lexer.first
613
+ case token.value
614
+ when '.'
615
+ @lexer.shift
616
+ # '.' optional at end of formulaContent
617
+ break if @lexer.first === '}'
618
+ when '}'
619
+ break
620
+ else
621
+ error("Expected '.' or '}' following n3Statement", production: :formulaContent, token: token)
622
+ end
417
623
  end
418
624
  end
419
625
  end
420
626
  end
421
627
 
422
- def subjectStart(prod)
423
- @prod_data << {}
628
+ ##
629
+ # Read an IRI
630
+ #
631
+ # (rule iri "26" (alt IRIREF prefixedName))
632
+ #
633
+ # @return [RDF::URI]
634
+ def read_iri
635
+ token = @lexer.first
636
+ case token && token.type
637
+ when :IRIREF then prod(:iri) {process_iri(@lexer.shift.value[1..-2].gsub(/\s+/m, ''))}
638
+ when :PNAME_LN, :PNAME_NS then prod(:prefixedName) {process_pname(*@lexer.shift.value)}
639
+ end
424
640
  end
425
-
426
- def subjectFinish
427
- subject = @prod_data.pop
428
-
429
- if subject[:expression]
430
- add_prod_data(:subject, subject[:expression])
431
- else
432
- error("unknown expression type")
641
+
642
+ ##
643
+ # Read a blank node
644
+ #
645
+ # [29] blankNode ::= BLANK_NODE_LABEL | ANON
646
+ #
647
+ # @return [RDF::Node]
648
+ def read_blankNode
649
+ token = @lexer.first
650
+ case token && token.type
651
+ when :BLANK_NODE_LABEL then prod(:blankNode) {bnode(@lexer.shift.value[2..-1])}
652
+ when :ANON then @lexer.shift && prod(:blankNode) {bnode}
433
653
  end
434
654
  end
435
-
436
- def symbolToken(prod, tok)
437
- term = case prod
438
- when 'explicituri'
439
- process_uri(tok[1..-2])
440
- when 'qname'
441
- process_qname(tok)
442
- else
443
- error("symbolToken(#{prod}, #{tok}): FIXME #{term.inspect}")
655
+
656
+ ##
657
+ # Read a quickVar, having global scope.
658
+ #
659
+ # [30] quickVar ::= QUICK_VAR_NAME
660
+ #
661
+ # @return [RDF::Query::Variable]
662
+ def read_quickVar
663
+ if @lexer.first.type == :QUICK_VAR_NAME
664
+ prod(:quickVar) do
665
+ token = @lexer.shift
666
+ value = token.value.sub('?', '')
667
+ iri = ns(nil, "#{value}_quick")
668
+ variables[nil][iri] ||= univar(iri, scope: nil)
669
+ end
444
670
  end
445
-
446
- add_prod_data(:symbol, term)
447
671
  end
448
672
 
449
- def universalStart(prod)
450
- @prod_data << {}
673
+ ##
674
+ # Read a list of IRIs
675
+ #
676
+ # [27] iriList ::= iri ( ',' iri )*
677
+ #
678
+ # @return [Array<RDF::URI>] the list of IRIs
679
+ def read_irilist
680
+ iris = []
681
+ prod(:iriList, %{,}) do
682
+ while iri = read_iri
683
+ iris << iri
684
+ break unless @lexer.first === ','
685
+ @lexer.shift while @lexer.first === ','
686
+ end
687
+ end
688
+ iris
451
689
  end
452
690
 
691
+ ##
692
+ # Read a univeral or existential
693
+ #
453
694
  # Apart from the set of statements, a formula also has a set of URIs of symbols which are universally quantified,
454
695
  # and a set of URIs of symbols which are existentially quantified.
455
696
  # Variables are then in general symbols which have been quantified.
456
697
  #
457
698
  # Here we allocate a variable (making up a name) and record with the defining formula. Quantification is done
458
699
  # when the formula is completed against all in-scope variables
459
- def universalFinish
460
- pd = @prod_data.pop
461
- forAll = Array(pd[:symbol])
462
- forAll.each do |term|
463
- @variables[term.to_s] = { formula: @formulae.last, var: univar(term) }
464
- end
465
- end
466
-
467
- def verbStart(prod)
468
- @prod_data << {}
469
- end
470
-
471
- def verbToken(prod, tok)
472
- term = case prod
473
- when '<='
474
- add_prod_data(:expression, RDF::LOG.implies)
475
- add_prod_data(:invert, true)
476
- when '=>'
477
- add_prod_data(:expression, RDF::LOG.implies)
478
- when '='
479
- add_prod_data(:expression, RDF::OWL.sameAs)
480
- when '@a'
481
- add_prod_data(:expression, RDF.type)
482
- when '@has', "@of"
483
- # Syntactic sugar
484
- when '@is'
485
- add_prod_data(:invert, true)
486
- else
487
- error("verbToken(#{prod}, #{tok}): FIXME #{term.inspect}")
700
+ #
701
+ # [31] existential ::= '@forSome' iriList
702
+ # [32] universal ::= '@forAll' iriList
703
+ #
704
+ # @return [void]
705
+ def read_uniext
706
+ if %w(@forSome @forAll).include?(@lexer.first.value)
707
+ token = @lexer.shift
708
+ prod(token === '@forAll' ? :universal : :existential) do
709
+ iri_list = read_irilist
710
+ iri_list.each do |iri|
711
+ # Note, this might re-create an equivalent variable already defined in this formula, and replaces an equivalent variable that may have been defined in the parent formula.
712
+ var = univar(iri, scope: formulae.last, existential: token === '@forSome')
713
+ add_var_to_formula(formulae.last, iri, var)
714
+ end
715
+ end
488
716
  end
489
-
490
- add_prod_data(:symbol, term)
491
717
  end
492
718
 
493
- def verbFinish
494
- verb = @prod_data.pop
495
- if verb[:expression]
496
- error("Literal may not be used as a predicate") if verb[:expression].is_a?(RDF::Literal)
497
- error("Formula may not be used as a peredicate") if @formulae_nodes.has_key?(verb[:expression])
498
- add_prod_data(:verb, verb[:expression])
499
- add_prod_data(:invert, true) if verb[:invert]
500
- else
501
- error("verbFinish: FIXME #{verb.inspect}")
502
- end
503
- end
504
-
505
- private
506
-
507
719
  ###################
508
720
  # Utility Functions
509
721
  ###################
510
722
 
511
- def process_anonnode(anonnode)
512
- log_debug("process_anonnode") {anonnode.inspect}
513
-
514
- if anonnode[:propertylist]
515
- properties = anonnode[:propertylist]
516
- bnode = RDF::Node.new
517
- properties.each do |p|
518
- predicate = p[:verb]
519
- log_debug("process_anonnode(verb)") {predicate.inspect}
520
- objects = Array(p[:object])
521
- objects.each { |object| add_triple("anonnode", bnode, predicate, object) }
522
- end
523
- bnode
524
- elsif anonnode[:pathlist]
525
- objects = Array(anonnode[:pathlist])
526
- list = RDF::List[*objects]
527
- list.each_statement do |statement|
528
- next if statement.predicate == RDF.type && statement.object == RDF.List
529
- add_triple("anonnode(list)", statement.subject, statement.predicate, statement.object)
530
- end
531
- list.subject
532
- end
533
- end
534
-
535
723
  # Process a path, such as:
536
- # :a.:b means [is :b of :a] Deprecated
537
724
  # :a!:b means [is :b of :a] => :a :b []
538
725
  # :a^:b means [:b :a] => [] :b :a
539
726
  #
540
727
  # Create triple and return property used for next iteration
541
- def process_path(expression)
542
- log_debug("process_path") {expression.inspect}
543
-
544
- pathitem = expression[:pathitem]
545
- pathtail = expression[:pathtail]
546
-
547
- direction_list = [expression[:direction], expression[:directiontail]].flatten.compact
548
-
549
- pathtail.each do |pred|
550
- direction = direction_list.shift
551
- bnode = RDF::Node.new
728
+ #
729
+ # Result is last created bnode
730
+ def process_path(path)
731
+ pathitem, direction, pathtail = path[:pathitem], path[:direction], path[:pathtail]
732
+ debug("process_path", depth: @options[:depth]) {path.inspect}
733
+
734
+ while pathtail
735
+ bnode = bnode()
736
+ pred = pathtail.is_a?(RDF::Term) ? pathtail : pathtail[:pathitem]
552
737
  if direction == :reverse
553
- add_triple("process_path(reverse)", bnode, pred, pathitem)
738
+ add_statement("process_path(reverse)", bnode, pred, pathitem)
554
739
  else
555
- add_triple("process_path(forward)", pathitem, pred, bnode)
740
+ add_statement("process_path(forward)", pathitem, pred, bnode)
556
741
  end
557
742
  pathitem = bnode
743
+ direction = pathtail[:direction] if pathtail.is_a?(Hash)
744
+ pathtail = pathtail.is_a?(Hash) && pathtail[:pathtail]
558
745
  end
559
746
  pathitem
560
747
  end
561
748
 
562
- def process_uri(uri)
563
- uri(base_uri, RDF::NTriples.unescape(uri))
749
+ def process_iri(iri)
750
+ iri(base_uri, iri.to_s)
564
751
  end
565
-
566
- def process_qname(tok)
567
- if tok.include?(":")
568
- prefix, name = tok.split(":")
569
- elsif @userkeys
570
- # If the @keywords directive is given, the keywords given will thereafter be recognized
571
- # without a "@" prefix, and anything else is a local name in the default namespace.
572
- prefix, name = "", tok
573
- elsif %w(true false).include?(tok)
574
- # The words true and false are boolean literals.
575
- #
576
- # They were added to Notation3 in 2006-02 in discussion with the SPARQL language developers, the Data
577
- # Access Working Group. Note that no existing documents will have used a naked true or false word, without a
578
- # @keyword statement which would make it clear that they were not to be treated as keywords. Furthermore any
579
- # old parser encountering true or false naked or in a @keywords
580
- return RDF::Literal.new(tok, datatype: RDF::XSD.boolean)
581
- else
582
- error("Set user @keywords to use barenames.")
583
- end
584
752
 
585
- uri = if prefix(prefix)
586
- log_debug('process_qname(ns)') {"#{prefix(prefix)}, #{name}"}
753
+ def process_pname(value)
754
+ prefix, name = value.split(":", 2)
755
+
756
+ iri = if prefix(prefix)
757
+ #debug('process_pname(ns)', depth: @options[:depth]) {"#{prefix(prefix)}, #{name}"}
587
758
  ns(prefix, name)
588
- elsif prefix == '_'
589
- log_debug('process_qname(bnode)', name)
590
- bnode(name)
759
+ elsif prefix && !prefix.empty?
760
+ error("process_pname", "Use of undefined prefix #{prefix.inspect}")
761
+ ns(nil, name)
591
762
  else
592
- log_debug('process_qname(default_ns)', name)
593
- namespace(nil, uri("#{base_uri}#")) unless prefix(nil)
594
763
  ns(nil, name)
595
764
  end
596
- log_debug('process_qname') {uri.inspect}
597
- uri
598
- end
599
-
600
- # Add values to production data, values aranged as an array
601
- def add_prod_data(sym, value)
602
- case @prod_data.last[sym]
603
- when nil
604
- @prod_data.last[sym] = value
605
- when Array
606
- @prod_data.last[sym] += Array(value)
607
- else
608
- @prod_data.last[sym] = Array(@prod_data.last[sym]) + Array(value)
765
+ debug('process_pname', depth: @options[:depth]) {iri.inspect}
766
+ iri
767
+ end
768
+
769
+ # Keep track of allocated BNodes. Blank nodes are allocated to the formula.
770
+ # Unnnamed bnodes are created using an incrementing labeler for repeatability.
771
+ def bnode(label = nil)
772
+ form_id = formulae.last ? formulae.last.id : '_bn_ground'
773
+ if label
774
+ # Return previously allocated blank node for.
775
+ @bn_mapper[form_id] ||= {}
776
+ return @bn_mapper[form_id][label] if @bn_mapper[form_id][label]
609
777
  end
610
- end
611
778
 
612
- # Keep track of allocated BNodes
613
- def bnode(value = nil)
614
- @bnode_cache ||= {}
615
- @bnode_cache[value.to_s] ||= RDF::Node.new(value)
779
+ # Get a fresh label
780
+ @bn_labler.succ! while @bnodes[@bn_labler]
781
+
782
+ bn = RDF::Node.intern(@bn_labler.to_sym)
783
+ @bnodes[@bn_labler] = bn
784
+ @bn_mapper[form_id][label] = bn if label
785
+ bn
616
786
  end
617
787
 
618
- def univar(label)
619
- unless label
620
- @unnamed_label ||= "var0"
621
- label = @unnamed_label = @unnamed_label.succ
622
- end
623
- RDF::Query::Variable.new(label.to_s)
788
+ # If not in ground formula, note scope, and if existential
789
+ def univar(label, scope:, existential: false)
790
+ value = existential ? "#{label}_ext" : label
791
+ value = "#{value}#{scope.id}" if scope
792
+ RDF::Query::Variable.new(value, existential: existential)
624
793
  end
625
794
 
626
- # add a statement, object can be literal or URI or bnode
795
+ # add a pattern or statement
627
796
  #
628
797
  # @param [any] node string for showing graph_name
629
- # @param [URI, Node] subject the subject of the statement
630
- # @param [URI] predicate the predicate of the statement
631
- # @param [URI, Node, Literal] object the object of the statement
798
+ # @param [RDF::Term] subject the subject of the statement
799
+ # @param [RDF::URI] predicate the predicate of the statement
800
+ # @param [RDF::Term] object the object of the statement
632
801
  # @return [Statement] Added statement
633
802
  # @raise [RDF::ReaderError] Checks parameter types and raises if they are incorrect if parsing mode is _validate_.
634
- def add_triple(node, subject, predicate, object)
635
- graph_name_opts = @formulae.last ? {graph_name: @formulae.last} : {}
636
-
637
- statement = RDF::Statement(subject, predicate, object, graph_name_opts)
638
- log_debug(node) {statement.to_s}
803
+ def add_statement(node, subject, predicate, object)
804
+ statement = if @formulae.last
805
+ # It's a pattern in a formula
806
+ RDF::Query::Pattern.new(subject, predicate, object, graph_name: @formulae.last)
807
+ else
808
+ RDF::Statement(subject, predicate, object)
809
+ end
810
+ debug("statement(#{node})", depth: @options[:depth]) {statement.to_s}
811
+ error("statement(#{node})", "Statement is invalid: #{statement.inspect}") if validate? && statement.invalid?
639
812
  @callback.call(statement)
640
813
  end
641
814
 
642
- def namespace(prefix, uri)
643
- uri = uri.to_s
644
- if uri == '#'
645
- uri = prefix(nil).to_s + '#'
815
+ def namespace(prefix, iri)
816
+ iri = iri.to_s
817
+ if iri == '#'
818
+ iri = prefix(nil).to_s + '#'
646
819
  end
647
- log_debug("namespace") {"'#{prefix}' <#{uri}>"}
648
- prefix(prefix, uri(uri))
820
+ debug("namespace", depth: @options[:depth]) {"'#{prefix}' <#{iri}>"}
821
+ prefix(prefix, iri(iri))
649
822
  end
650
823
 
651
- # Is this an allowable keyword?
652
- def keyword_check(kw)
653
- unless (@keywords || %w(a is of has)).include?(kw)
654
- raise RDF::ReaderError, "unqualified keyword '#{kw}' used without @keyword directive" if validate?
655
- end
656
- end
657
-
658
- # Create URIs
659
- def uri(value, append = nil)
824
+ # Create IRIs
825
+ def iri(value, append = nil)
660
826
  value = RDF::URI(value)
661
827
  value = value.join(append) if append
662
828
  value.validate! if validate? && value.respond_to?(:validate)
663
829
  value.canonicalize! if canonicalize?
664
- value = RDF::URI.intern(value, {}) if intern?
665
-
666
- # Variable substitution for in-scope variables. Variables are in scope if they are defined in anthing other than
667
- # the current formula
668
- var = @variables[value.to_s]
669
- value = var[:var] if var
830
+
831
+ # Variable substitution for in-scope variables. Variables are in scope if they are defined in anthing other than the current formula
832
+ var = find_var(value)
833
+ value = var if var
670
834
 
671
835
  value
836
+ rescue ArgumentError => e
837
+ error("iri", e.message)
672
838
  end
673
839
 
674
- def ns(prefix, suffix)
840
+ # Create a literal
841
+ def literal(value, **options)
842
+ debug("literal", depth: @options[:depth]) do
843
+ "value: #{value.inspect}, " +
844
+ "options: #{options.inspect}, " +
845
+ "validate: #{validate?.inspect}, " +
846
+ "c14n?: #{canonicalize?.inspect}"
847
+ end
848
+ RDF::Literal.new(value, validate: validate?, canonicalize: canonicalize?, **options)
849
+ rescue ArgumentError => e
850
+ error("Argument Error #{e.message}", production: :literal, token: @lexer.first)
851
+ end
852
+
853
+ # Decode a PName
854
+ def ns(prefix = nil, suffix = nil)
855
+ namespace(nil, iri("#{base_uri}#")) if prefix.nil? && !prefix(nil)
856
+
675
857
  base = prefix(prefix).to_s
676
858
  suffix = suffix.to_s.sub(/^\#/, "") if base.index("#")
677
- log_debug("ns") {"base: '#{base}', suffix: '#{suffix}'"}
678
- uri(base + suffix.to_s)
859
+ iri(base + suffix.to_s)
860
+ end
861
+
862
+ # Returns a unique label
863
+ def unique_label
864
+ label, @label_uniquifier = @label_uniquifier, @label_uniquifier.succ
865
+ label
866
+ end
867
+
868
+ # Find any variable that may be defined in the formula identified by `bn`
869
+ # @param [RDF::Node] name of formula
870
+ # @return [RDF::Query::Variable]
871
+ def find_var(name)
872
+ (variables[@formulae.last] ||= {})[name.to_s]
873
+ end
874
+
875
+ # Add a variable to the formula identified by `bn`, returning the variable. Useful as an LRU for variable name lookups
876
+ # @param [RDF::Node] bn name of formula
877
+ # @param [#to_s] name of variable for lookup
878
+ # @param [RDF::Query::Variable] var
879
+ # @return [RDF::Query::Variable]
880
+ def add_var_to_formula(bn, name, var)
881
+ (variables[bn] ||= {})[name.to_s] = var
882
+ end
883
+
884
+ def prod(production, recover_to = [])
885
+ @prod_stack << {prod: production, recover_to: recover_to}
886
+ @options[:depth] += 1
887
+ recover("#{production}(start)", depth: options[:depth], token: @lexer.first)
888
+ yield
889
+ rescue EBNF::LL1::Lexer::Error, SyntaxError, Recovery => e
890
+ # Lexer encountered an illegal token or the parser encountered
891
+ # a terminal which is inappropriate for the current production.
892
+ # Perform error recovery to find a reasonable terminal based
893
+ # on the follow sets of the relevant productions. This includes
894
+ # remaining terms from the current production and the stacked
895
+ # productions
896
+ case e
897
+ when EBNF::LL1::Lexer::Error
898
+ @lexer.recover
899
+ begin
900
+ error("Lexer error", "With input '#{e.input}': #{e.message}",
901
+ production: production,
902
+ token: e.token)
903
+ rescue SyntaxError
904
+ end
905
+ end
906
+ raise EOFError, "End of input found when recovering" if @lexer.first.nil?
907
+ debug("recovery", "current token: #{@lexer.first.inspect}", depth: @options[:depth])
908
+
909
+ unless e.is_a?(Recovery)
910
+ # Get the list of follows for this sequence, this production and the stacked productions.
911
+ debug("recovery", "stack follows:", depth: @options[:depth])
912
+ @prod_stack.reverse.each do |prod|
913
+ debug("recovery", level: 1, depth: @options[:depth]) {" #{prod[:prod]}: #{prod[:recover_to].inspect}"}
914
+ end
915
+ end
916
+
917
+ # Find all follows to the top of the stack
918
+ follows = @prod_stack.map {|prod| Array(prod[:recover_to])}.flatten.compact.uniq
919
+
920
+ # Skip tokens until one is found in follows
921
+ while (token = (@lexer.first rescue @lexer.recover)) && follows.none? {|t| token === t}
922
+ skipped = @lexer.shift
923
+ debug("recovery", depth: @options[:depth]) {"skip #{skipped.inspect}"}
924
+ end
925
+ debug("recovery", depth: @options[:depth]) {"found #{token.inspect} in follows"}
926
+
927
+ # Re-raise the error unless token is a follows of this production
928
+ raise Recovery unless Array(recover_to).any? {|t| token === t}
929
+
930
+ # Skip that token to get something reasonable to start the next production with
931
+ @lexer.shift
932
+ ensure
933
+ progress("#{production}(finish)", depth: options[:depth])
934
+ @options[:depth] -= 1
935
+ @prod_stack.pop
936
+ end
937
+
938
+ def progress(*args, &block)
939
+ lineno = (options[:token].lineno if options[:token].respond_to?(:lineno)) || (@lexer && @lexer.lineno)
940
+ opts = args.last.is_a?(Hash) ? args.pop : {}
941
+ opts[:level] ||= 1
942
+ opts[:lineno] ||= lineno
943
+ log_info(*args, **opts, &block)
944
+ end
945
+
946
+ def recover(*args, &block)
947
+ lineno = (options[:token].lineno if options[:token].respond_to?(:lineno)) || (@lexer && @lexer.lineno)
948
+ opts = args.last.is_a?(Hash) ? args.pop : {}
949
+ opts[:level] ||= 1
950
+ opts[:lineno] ||= lineno
951
+ log_recover(*args, **opts, &block)
952
+ end
953
+
954
+ def debug(*args, &block)
955
+ lineno = (options[:token].lineno if options[:token].respond_to?(:lineno)) || (@lexer && @lexer.lineno)
956
+ opts = args.last.is_a?(Hash) ? args.pop : {}
957
+ opts[:level] ||= 0
958
+ opts[:lineno] ||= lineno
959
+ log_debug(*args, **opts, &block)
960
+ end
961
+
962
+ ##
963
+ # Error information, used as level `0` debug messages.
964
+ #
965
+ # @overload error(node, message, options)
966
+ # @param [String] node Relevant location associated with message
967
+ # @param [String] message Error string
968
+ # @param [Hash] options
969
+ # @option options [URI, #to_s] :production
970
+ # @option options [Token] :token
971
+ # @see {#debug}
972
+ def error(*args)
973
+ ctx = ""
974
+ ctx += "(found #{options[:token].inspect})" if options[:token]
975
+ ctx += ", production = #{options[:production].inspect}" if options[:production]
976
+ lineno = (options[:token].lineno if options[:token].respond_to?(:lineno)) || (@lexer && @lexer.lineno)
977
+ log_error(*args, ctx,
978
+ lineno: lineno,
979
+ token: options[:token],
980
+ production: options[:production],
981
+ depth: options[:depth],
982
+ exception: SyntaxError,)
983
+ end
984
+
985
+ # Used for internal error recovery
986
+ class Recovery < StandardError; end
987
+
988
+ class SyntaxError < RDF::ReaderError
989
+ ##
990
+ # The current production.
991
+ #
992
+ # @return [Symbol]
993
+ attr_reader :production
994
+
995
+ ##
996
+ # The invalid token which triggered the error.
997
+ #
998
+ # @return [String]
999
+ attr_reader :token
1000
+
1001
+ ##
1002
+ # The line number where the error occurred.
1003
+ #
1004
+ # @return [Integer]
1005
+ attr_reader :lineno
1006
+
1007
+ ##
1008
+ # Initializes a new syntax error instance.
1009
+ #
1010
+ # @param [String, #to_s] message
1011
+ # @param [Hash{Symbol => Object}] options
1012
+ # @option options [Symbol] :production (nil)
1013
+ # @option options [String] :token (nil)
1014
+ # @option options [Integer] :lineno (nil)
1015
+ def initialize(message, **options)
1016
+ @production = options[:production]
1017
+ @token = options[:token]
1018
+ @lineno = options[:lineno] || (@token.lineno if @token.respond_to?(:lineno))
1019
+ super(message.to_s)
1020
+ end
679
1021
  end
680
1022
  end
681
1023
  end