rdf-n3 3.0.1 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +198 -76
  3. data/UNLICENSE +1 -1
  4. data/VERSION +1 -1
  5. data/lib/rdf/n3/algebra/builtin.rb +79 -0
  6. data/lib/rdf/n3/algebra/formula.rb +446 -0
  7. data/lib/rdf/n3/algebra/list/append.rb +42 -0
  8. data/lib/rdf/n3/algebra/list/first.rb +24 -0
  9. data/lib/rdf/n3/algebra/list/in.rb +48 -0
  10. data/lib/rdf/n3/algebra/list/iterate.rb +96 -0
  11. data/lib/rdf/n3/algebra/list/last.rb +24 -0
  12. data/lib/rdf/n3/algebra/list/length.rb +24 -0
  13. data/lib/rdf/n3/algebra/list/member.rb +44 -0
  14. data/lib/rdf/n3/algebra/list_operator.rb +96 -0
  15. data/lib/rdf/n3/algebra/log/conclusion.rb +65 -0
  16. data/lib/rdf/n3/algebra/log/conjunction.rb +36 -0
  17. data/lib/rdf/n3/algebra/log/content.rb +34 -0
  18. data/lib/rdf/n3/algebra/log/dtlit.rb +41 -0
  19. data/lib/rdf/n3/algebra/log/equal_to.rb +34 -0
  20. data/lib/rdf/n3/algebra/log/implies.rb +102 -0
  21. data/lib/rdf/n3/algebra/log/includes.rb +70 -0
  22. data/lib/rdf/n3/algebra/log/langlit.rb +41 -0
  23. data/lib/rdf/n3/algebra/log/n3_string.rb +34 -0
  24. data/lib/rdf/n3/algebra/log/not_equal_to.rb +23 -0
  25. data/lib/rdf/n3/algebra/log/not_includes.rb +27 -0
  26. data/lib/rdf/n3/algebra/log/output_string.rb +40 -0
  27. data/lib/rdf/n3/algebra/log/parsed_as_n3.rb +36 -0
  28. data/lib/rdf/n3/algebra/log/semantics.rb +40 -0
  29. data/lib/rdf/n3/algebra/math/absolute_value.rb +36 -0
  30. data/lib/rdf/n3/algebra/math/acos.rb +26 -0
  31. data/lib/rdf/n3/algebra/math/acosh.rb +26 -0
  32. data/lib/rdf/n3/algebra/math/asin.rb +26 -0
  33. data/lib/rdf/n3/algebra/math/asinh.rb +26 -0
  34. data/lib/rdf/n3/algebra/math/atan.rb +26 -0
  35. data/lib/rdf/n3/algebra/math/atanh.rb +26 -0
  36. data/lib/rdf/n3/algebra/math/ceiling.rb +28 -0
  37. data/lib/rdf/n3/algebra/math/cos.rb +40 -0
  38. data/lib/rdf/n3/algebra/math/cosh.rb +38 -0
  39. data/lib/rdf/n3/algebra/math/difference.rb +40 -0
  40. data/lib/rdf/n3/algebra/math/equal_to.rb +54 -0
  41. data/lib/rdf/n3/algebra/math/exponentiation.rb +35 -0
  42. data/lib/rdf/n3/algebra/math/floor.rb +28 -0
  43. data/lib/rdf/n3/algebra/math/greater_than.rb +41 -0
  44. data/lib/rdf/n3/algebra/math/less_than.rb +41 -0
  45. data/lib/rdf/n3/algebra/math/negation.rb +38 -0
  46. data/lib/rdf/n3/algebra/math/not_equal_to.rb +25 -0
  47. data/lib/rdf/n3/algebra/math/not_greater_than.rb +25 -0
  48. data/lib/rdf/n3/algebra/math/not_less_than.rb +25 -0
  49. data/lib/rdf/n3/algebra/math/product.rb +20 -0
  50. data/lib/rdf/n3/algebra/math/quotient.rb +36 -0
  51. data/lib/rdf/n3/algebra/math/remainder.rb +35 -0
  52. data/lib/rdf/n3/algebra/math/rounded.rb +26 -0
  53. data/lib/rdf/n3/algebra/math/sin.rb +40 -0
  54. data/lib/rdf/n3/algebra/math/sinh.rb +38 -0
  55. data/lib/rdf/n3/algebra/math/sum.rb +40 -0
  56. data/lib/rdf/n3/algebra/math/tan.rb +40 -0
  57. data/lib/rdf/n3/algebra/math/tanh.rb +38 -0
  58. data/lib/rdf/n3/algebra/not_implemented.rb +13 -0
  59. data/lib/rdf/n3/algebra/resource_operator.rb +122 -0
  60. data/lib/rdf/n3/algebra/str/concatenation.rb +27 -0
  61. data/lib/rdf/n3/algebra/str/contains.rb +33 -0
  62. data/lib/rdf/n3/algebra/str/contains_ignoring_case.rb +33 -0
  63. data/lib/rdf/n3/algebra/str/ends_with.rb +33 -0
  64. data/lib/rdf/n3/algebra/str/equal_ignoring_case.rb +34 -0
  65. data/lib/rdf/n3/algebra/str/format.rb +17 -0
  66. data/lib/rdf/n3/algebra/str/greater_than.rb +38 -0
  67. data/lib/rdf/n3/algebra/str/less_than.rb +33 -0
  68. data/lib/rdf/n3/algebra/str/matches.rb +37 -0
  69. data/lib/rdf/n3/algebra/str/not_equal_ignoring_case.rb +17 -0
  70. data/lib/rdf/n3/algebra/str/not_greater_than.rb +17 -0
  71. data/lib/rdf/n3/algebra/str/not_less_than.rb +17 -0
  72. data/lib/rdf/n3/algebra/str/not_matches.rb +18 -0
  73. data/lib/rdf/n3/algebra/str/replace.rb +35 -0
  74. data/lib/rdf/n3/algebra/str/scrape.rb +35 -0
  75. data/lib/rdf/n3/algebra/str/starts_with.rb +33 -0
  76. data/lib/rdf/n3/algebra/time/day.rb +35 -0
  77. data/lib/rdf/n3/algebra/time/day_of_week.rb +27 -0
  78. data/lib/rdf/n3/algebra/time/gm_time.rb +29 -0
  79. data/lib/rdf/n3/algebra/time/hour.rb +35 -0
  80. data/lib/rdf/n3/algebra/time/in_seconds.rb +59 -0
  81. data/lib/rdf/n3/algebra/time/local_time.rb +29 -0
  82. data/lib/rdf/n3/algebra/time/minute.rb +35 -0
  83. data/lib/rdf/n3/algebra/time/month.rb +35 -0
  84. data/lib/rdf/n3/algebra/time/second.rb +35 -0
  85. data/lib/rdf/n3/algebra/time/timezone.rb +36 -0
  86. data/lib/rdf/n3/algebra/time/year.rb +29 -0
  87. data/lib/rdf/n3/algebra.rb +210 -0
  88. data/lib/rdf/n3/extensions.rb +221 -0
  89. data/lib/rdf/n3/format.rb +66 -1
  90. data/lib/rdf/n3/list.rb +630 -0
  91. data/lib/rdf/n3/reader.rb +774 -497
  92. data/lib/rdf/n3/reasoner.rb +282 -0
  93. data/lib/rdf/n3/refinements.rb +178 -0
  94. data/lib/rdf/n3/repository.rb +332 -0
  95. data/lib/rdf/n3/terminals.rb +78 -0
  96. data/lib/rdf/n3/vocab.rb +36 -3
  97. data/lib/rdf/n3/writer.rb +461 -250
  98. data/lib/rdf/n3.rb +11 -8
  99. metadata +177 -49
  100. data/AUTHORS +0 -1
  101. data/History.markdown +0 -99
  102. data/lib/rdf/n3/patches/array_hacks.rb +0 -53
  103. data/lib/rdf/n3/reader/meta.rb +0 -641
  104. data/lib/rdf/n3/reader/parser.rb +0 -237
data/lib/rdf/n3/reader.rb CHANGED
@@ -1,4 +1,7 @@
1
1
  # coding: utf-8
2
+ require 'rdf/reader'
3
+ require 'ebnf'
4
+
2
5
  module RDF::N3
3
6
  ##
4
7
  # A Notation-3/Turtle parser in Ruby
@@ -9,6 +12,10 @@ module RDF::N3
9
12
  #
10
13
  # Separate pass to create branch_table from n3-selectors.n3
11
14
  #
15
+ # This implementation only supports quickVars at the document scope.
16
+ #
17
+ # Non-distinguished blank node variables are created as part of reasoning.
18
+ #
12
19
  # @todo
13
20
  # * Formulae as RDF::Query representations
14
21
  # * Formula expansion similar to SPARQL Construct
@@ -16,12 +23,41 @@ module RDF::N3
16
23
  # @author [Gregg Kellogg](http://greggkellogg.net/)
17
24
  class Reader < RDF::Reader
18
25
  format Format
26
+ using Refinements
19
27
 
20
28
  include RDF::Util::Logger
21
- include Meta
22
- include Parser
23
-
24
- N3_KEYWORDS = %w(a is of has keywords prefix base true false forSome forAny)
29
+ include EBNF::LL1::Parser
30
+ include Terminals
31
+
32
+ # Nodes used as Formulae graph names
33
+ #
34
+ # @return [Array<RDF::Node>]
35
+ attr_reader :formulae
36
+
37
+ # All nodes allocated to formulae
38
+ #
39
+ # @return [Hash{RDF::Node => RDF::Graph}]
40
+ attr_reader :formula_nodes
41
+
42
+ # Allocated variables by formula
43
+ #
44
+ # @return [Hash{Symbol => RDF::Node}]
45
+ attr_reader :variables
46
+
47
+ ##
48
+ # N3 Reader options
49
+ # @see http://www.rubydoc.info/github/ruby-rdf/rdf/RDF/Reader#options-class_method
50
+ def self.options
51
+ super + [
52
+ RDF::CLI::Option.new(
53
+ symbol: :list_terms,
54
+ datatype: TrueClass,
55
+ default: true,
56
+ control: :checkbox,
57
+ on: ["--list-terms CONTEXT"],
58
+ description: "Use native collections (lists), not first/rest ladder.")
59
+ ]
60
+ end
25
61
 
26
62
  ##
27
63
  # Initializes the N3 reader instance.
@@ -34,43 +70,55 @@ module RDF::N3
34
70
  # @option options [Boolean] :validate (false)
35
71
  # whether to validate the parsed statements and values
36
72
  # @option options [Boolean] :canonicalize (false)
37
- # whether to canonicalize parsed literals
38
- # @option options [Boolean] :intern (true)
39
- # whether to intern all parsed URIs
73
+ # whether to canonicalize parsed literals and URIs.
40
74
  # @option options [Hash] :prefixes (Hash.new)
41
75
  # the prefix mappings to use (not supported by all readers)
76
+ # @option options [Hash] :list_terms (false)
77
+ # represent collections as an `RDF::Term`, rather than an rdf:first/rest ladder.
42
78
  # @return [reader]
43
79
  # @yield [reader] `self`
44
80
  # @yieldparam [RDF::Reader] reader
45
81
  # @yieldreturn [void] ignored
46
82
  # @raise [Error]:: Raises RDF::ReaderError if validating and an error is found
47
- def initialize(input = $stdin, options = {}, &block)
83
+ def initialize(input = $stdin, **options, &block)
48
84
  super do
49
- input.rewind if input.respond_to?(:rewind)
50
- @input = input.respond_to?(:read) ? input : StringIO.new(input.to_s)
51
- @lineno = 0
52
- readline # Prime the pump
53
-
54
- @memo = {}
55
- @keyword_mode = false
56
- @keywords = %w(a is of this has)
57
- @productions = []
58
- @prod_data = []
59
-
60
- @branches = BRANCHES # Get from meta class
61
- @regexps = REGEXPS # Get from meta class
62
-
63
- @formulae = [] # Nodes used as Formluae graph names
64
- @formulae_nodes = {}
65
- @variables = {} # variable definitions along with defining formula
85
+ @options = {
86
+ anon_base: "b0",
87
+ whitespace: WS,
88
+ depth: 0,
89
+ }.merge(@options)
90
+ @prod_stack = []
91
+
92
+ @formulae = []
93
+ @formula_nodes = {}
94
+ @label_uniquifier = "0"
95
+ @bnodes = {}
96
+ @bn_labler = @options[:anon_base].dup
97
+ @bn_mapper = {}
98
+ @variables = {}
66
99
 
67
100
  if options[:base_uri]
68
- log_debug("@uri") { base_uri.inspect}
69
- namespace(nil, uri("#{base_uri}#"))
101
+ progress("base_uri") { base_uri.inspect}
102
+ namespace(nil, iri(base_uri.to_s.match?(%r{[#/]$}) ? base_uri : "#{base_uri}#"))
103
+ end
104
+
105
+ # Prepopulate operator namespaces unless validating
106
+ unless validate?
107
+ namespace(:rdf, RDF.to_uri)
108
+ namespace(:rdfs, RDF::RDFS.to_uri)
109
+ namespace(:xsd, RDF::XSD.to_uri)
110
+ namespace(:crypto, RDF::N3::Crypto.to_uri)
111
+ namespace(:list, RDF::N3::List.to_uri)
112
+ namespace(:log, RDF::N3::Log.to_uri)
113
+ namespace(:math, RDF::N3::Math.to_uri)
114
+ namespace(:rei, RDF::N3::Rei.to_uri)
115
+ #namespace(:string, RDF::N3::String.to_uri)
116
+ namespace(:time, RDF::N3::Time.to_uri)
70
117
  end
71
- log_debug("validate") {validate?.inspect}
72
- log_debug("canonicalize") {canonicalize?.inspect}
73
- log_debug("intern") {intern?.inspect}
118
+ progress("validate") {validate?.inspect}
119
+ progress("canonicalize") {canonicalize?.inspect}
120
+
121
+ @lexer = EBNF::LL1::Lexer.new(input, self.class.patterns, **@options)
74
122
 
75
123
  if block_given?
76
124
  case block.arity
@@ -87,23 +135,29 @@ module RDF::N3
87
135
 
88
136
  ##
89
137
  # Iterates the given block for each RDF statement in the input.
90
- #
91
138
  # @yield [statement]
92
139
  # @yieldparam [RDF::Statement] statement
93
140
  # @return [void]
94
141
  def each_statement(&block)
95
142
  if block_given?
143
+ log_recover
96
144
  @callback = block
97
145
 
98
- parse(START.to_sym)
146
+ begin
147
+ while (@lexer.first rescue true)
148
+ read_n3Doc
149
+ end
150
+ rescue EBNF::LL1::Lexer::Error, SyntaxError, EOFError, Recovery
151
+ # Terminate loop if EOF found while recovering
152
+ end
99
153
 
100
154
  if validate? && log_statistics[:error]
101
155
  raise RDF::ReaderError, "Errors found during processing"
102
156
  end
103
157
  end
104
- enum_for(:each_triple)
158
+ enum_for(:each_statement)
105
159
  end
106
-
160
+
107
161
  ##
108
162
  # Iterates the given block for each RDF triple in the input.
109
163
  #
@@ -112,570 +166,793 @@ module RDF::N3
112
166
  # @yieldparam [RDF::URI] predicate
113
167
  # @yieldparam [RDF::Value] object
114
168
  # @return [void]
115
- def each_triple(&block)
169
+ def each_triple
116
170
  if block_given?
117
171
  each_statement do |statement|
118
- block.call(*statement.to_triple)
172
+ yield(*statement.to_triple)
119
173
  end
120
174
  end
121
175
  enum_for(:each_triple)
122
176
  end
123
-
177
+
124
178
  protected
125
- # Start of production
126
- def onStart(prod)
127
- handler = "#{prod}Start".to_sym
128
- log_debug("#{handler}(#{respond_to?(handler, true)})", prod)
129
- @productions << prod
130
- send(handler, prod) if respond_to?(handler, true)
131
- end
132
-
133
- # End of production
134
- def onFinish
135
- prod = @productions.pop()
136
- handler = "#{prod}Finish".to_sym
137
- log_debug("#{handler}(#{respond_to?(handler, true)})") {"#{prod}: #{@prod_data.last.inspect}"}
138
- send(handler) if respond_to?(handler, true)
139
- end
140
-
141
- # Process of a token
142
- def onToken(prod, tok)
143
- unless @productions.empty?
144
- parentProd = @productions.last
145
- handler = "#{parentProd}Token".to_sym
146
- log_debug("#{handler}(#{respond_to?(handler, true)})") {"#{prod}, #{tok}: #{@prod_data.last.inspect}"}
147
- send(handler, prod, tok) if respond_to?(handler, true)
148
- else
149
- error("Token has no parent production")
179
+
180
+ # Terminals passed to lexer. Order matters!
181
+
182
+ # @!parse none
183
+ terminal(:ANON, ANON)
184
+ terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL)
185
+ terminal(:IRIREF, IRIREF, unescape: true)
186
+ terminal(:DOUBLE, DOUBLE)
187
+ terminal(:DECIMAL, DECIMAL)
188
+ terminal(:INTEGER, INTEGER)
189
+ terminal(:PNAME_LN, PNAME_LN, unescape: true)
190
+ terminal(:PNAME_NS, PNAME_NS)
191
+ terminal(:STRING_LITERAL_LONG_SINGLE_QUOTE, STRING_LITERAL_LONG_SINGLE_QUOTE, unescape: true, partial_regexp: /^'''/)
192
+ terminal(:STRING_LITERAL_LONG_QUOTE, STRING_LITERAL_LONG_QUOTE, unescape: true, partial_regexp: /^"""/)
193
+ terminal(:STRING_LITERAL_QUOTE, STRING_LITERAL_QUOTE, unescape: true)
194
+ terminal(:STRING_LITERAL_SINGLE_QUOTE, STRING_LITERAL_SINGLE_QUOTE, unescape: true)
195
+
196
+ # String terminals
197
+ terminal(nil, %r(
198
+ [\(\){},.;\[\]a!]
199
+ | \^\^|\^
200
+ |<-|<=|=>|=
201
+ | true|false
202
+ | has|is|of
203
+ )x)
204
+
205
+ terminal(:PREFIX, PREFIX)
206
+ terminal(:BASE, BASE)
207
+ terminal(:LANGTAG, LANGTAG)
208
+ terminal(:QUICK_VAR_NAME, QUICK_VAR_NAME, unescape: true)
209
+
210
+ private
211
+ ##
212
+ # Read statements and directives
213
+ #
214
+ # [1] n3Doc ::= (n3Statement '.' | sparqlDirective)*
215
+ #
216
+ # @return [void]
217
+ def read_n3Doc
218
+ prod(:n3Doc, %w{.}) do
219
+ error("read_n3Doc", "Unexpected end of file") unless token = @lexer.first
220
+ case token.type
221
+ when :BASE, :PREFIX
222
+ read_directive || error("Failed to parse directive", production: :directive, token: token)
223
+ else
224
+ read_n3Statement
225
+ if !log_recovering? || @lexer.first === '.'
226
+ # If recovering, we will have eaten the closing '.'
227
+ token = @lexer.shift
228
+ unless token && token.value == '.'
229
+ error("Expected '.' following n3Statement", production: :n3Statement, token: token)
230
+ end
231
+ end
232
+ end
150
233
  end
151
234
  end
152
-
153
- def booleanToken(prod, tok)
154
- lit = RDF::Literal.new(tok.delete("@"), datatype: RDF::XSD.boolean, validate: validate?, canonicalize: canonicalize?)
155
- add_prod_data(:literal, lit)
156
- end
157
-
158
- def declarationStart(prod)
159
- @prod_data << {}
160
- end
161
-
162
- def declarationToken(prod, tok)
163
- case prod
164
- when "@prefix", "@base", "@keywords"
165
- add_prod_data(:prod, prod)
166
- when "prefix"
167
- add_prod_data(:prefix, tok[0..-2])
168
- when "explicituri"
169
- add_prod_data(:explicituri, tok[1..-2])
170
- else
171
- add_prod_data(prod.to_sym, tok)
235
+
236
+
237
+ ##
238
+ # Read statements and directives
239
+ #
240
+ # [2] n3Statement ::= n3Directive | triples | existential | universal
241
+ #
242
+ # @return [void]
243
+ def read_n3Statement
244
+ prod(:n3Statement, %w{.}) do
245
+ error("read_n3Doc", "Unexpected end of file") unless token = @lexer.first
246
+ read_triples ||
247
+ error("Expected token", production: :statement, token: token)
172
248
  end
173
249
  end
174
250
 
175
- def declarationFinish
176
- decl = @prod_data.pop
177
- case decl[:prod]
178
- when "@prefix"
179
- uri = process_uri(decl[:explicituri])
180
- namespace(decl[:prefix], uri)
181
- when "@base"
182
- # Base, set or update document URI
183
- uri = decl[:explicituri]
184
- options[:base_uri] = process_uri(uri)
185
-
186
- # The empty prefix "" is by default , bound to "#" -- the local namespace of the file.
187
- # The parser behaves as though there were a
188
- # @prefix : <#>.
189
- # just before the file.
190
- # This means that <#foo> can be written :foo and using @keywords one can reduce that to foo.
191
-
192
- namespace(nil, uri.match(/[\/\#]$/) ? base_uri : process_uri("#{uri}#"))
193
- log_debug("declarationFinish[@base]") {"@base=#{base_uri}"}
194
- when "@keywords"
195
- log_debug("declarationFinish[@keywords]") {@keywords.inspect}
196
- # Keywords are handled in tokenizer and maintained in @keywords array
197
- if (@keywords & N3_KEYWORDS) != @keywords
198
- error("Undefined keywords used: #{(@keywords - N3_KEYWORDS).to_sentence}") if validate?
251
+ ##
252
+ # Read base and prefix directives
253
+ #
254
+ # [3] n3Directive ::= prefixID | base
255
+ #
256
+ # @return [void]
257
+ def read_directive
258
+ prod(:directive, %w{.}) do
259
+ token = @lexer.first
260
+ case token.type
261
+ when :BASE
262
+ prod(:base) do
263
+ @lexer.shift
264
+ terminated = token.value == '@base'
265
+ iri = @lexer.shift
266
+ error("Expected IRIREF", production: :base, token: iri) unless iri === :IRIREF
267
+ @options[:base_uri] = process_iri(iri.value[1..-2].gsub(/\s/, ''))
268
+ namespace(nil, base_uri.to_s.end_with?('#') ? base_uri : iri("#{base_uri}#"))
269
+ error("base", "#{token} should be downcased") if token.value.start_with?('@') && token.value != '@base'
270
+
271
+ if terminated
272
+ error("base", "Expected #{token} to be terminated") unless @lexer.first === '.'
273
+ @lexer.shift
274
+ elsif @lexer.first === '.'
275
+ error("base", "Expected #{token} not to be terminated")
276
+ else
277
+ true
278
+ end
279
+ end
280
+ when :PREFIX
281
+ prod(:prefixID, %w{.}) do
282
+ @lexer.shift
283
+ pfx, iri = @lexer.shift, @lexer.shift
284
+ terminated = token.value == '@prefix'
285
+ error("Expected PNAME_NS", production: :prefix, token: pfx) unless pfx === :PNAME_NS
286
+ error("Expected IRIREF", production: :prefix, token: iri) unless iri === :IRIREF
287
+ debug("prefixID", depth: options[:depth]) {"Defined prefix #{pfx.inspect} mapping to #{iri.inspect}"}
288
+ namespace(pfx.value[0..-2], process_iri(iri.value[1..-2].gsub(/\s/, '')))
289
+ error("prefixId", "#{token} should be downcased") if token.value.start_with?('@') && token.value != '@prefix'
290
+
291
+ if terminated
292
+ error("prefixID", "Expected #{token} to be terminated") unless @lexer.first === '.'
293
+ @lexer.shift
294
+ elsif @lexer.first === '.'
295
+ error("prefixID", "Expected #{token} not to be terminated")
296
+ else
297
+ true
298
+ end
299
+ end
199
300
  end
200
- @userkeys = true
201
- else
202
- error("declarationFinish: FIXME #{decl.inspect}")
203
301
  end
204
302
  end
205
-
206
- # Document start, instantiate
207
- def documentStart(prod)
208
- @formulae.push(nil)
209
- @prod_data << {}
210
- end
211
-
212
- def dtlangToken(prod, tok)
213
- add_prod_data(:langcode, tok) if prod == "langcode"
214
- end
215
-
216
- def existentialStart(prod)
217
- @prod_data << {}
218
- end
219
303
 
220
- # Apart from the set of statements, a formula also has a set of URIs of symbols which are universally quantified,
221
- # and a set of URIs of symbols which are existentially quantified.
222
- # Variables are then in general symbols which have been quantified.
304
+ ##
305
+ # Read triples
223
306
  #
224
- # Here we allocate a variable (making up a name) and record with the defining formula. Quantification is done
225
- # when the formula is completed against all in-scope variables
226
- def existentialFinish
227
- pd = @prod_data.pop
228
- forSome = Array(pd[:symbol])
229
- forSome.each do |term|
230
- @variables[term.to_s] = {formula: @formulae.last, var: RDF::Node.new(term.to_s.split(/[\/#]/).last)}
231
- end
232
- end
233
-
234
- def expressionStart(prod)
235
- @prod_data << {}
236
- end
237
-
238
- # Process path items, and push on the last object for parent processing
239
- def expressionFinish
240
- expression = @prod_data.pop
241
-
242
- # If we're in teh middle of a pathtail, append
243
- if @prod_data.last[:pathtail] && expression[:pathitem] && expression[:pathtail]
244
- path_list = [expression[:pathitem]] + expression[:pathtail]
245
- log_debug("expressionFinish(pathtail)") {"set pathtail to #{path_list.inspect}"}
246
- @prod_data.last[:pathtail] = path_list
247
-
248
- dir_list = [expression[:direction]] if expression[:direction]
249
- dir_list += expression[:directiontail] if expression[:directiontail]
250
- @prod_data.last[:directiontail] = dir_list if dir_list
251
- elsif expression[:pathitem] && expression[:pathtail]
252
- add_prod_data(:expression, process_path(expression))
253
- elsif expression[:pathitem]
254
- add_prod_data(:expression, expression[:pathitem])
255
- else
256
- error("expressionFinish: FIXME #{expression.inspect}")
307
+ # [9] triples ::= subject predicateObjectList?
308
+ #
309
+ # @return [Object] returns the last IRI matched, or subject BNode on predicateObjectList?
310
+ def read_triples
311
+ prod(:triples, %w{.}) do
312
+ error("read_triples", "Unexpected end of file") unless token = @lexer.first
313
+ subject = case token.type || token.value
314
+ when '['
315
+ # blankNodePropertyList predicateObjectList?
316
+ read_blankNodePropertyList || error("Failed to parse blankNodePropertyList", production: :triples, token: @lexer.first)
317
+ else
318
+ # subject predicateObjectList
319
+ read_path || error("Failed to parse subject", production: :triples, token: @lexer.first)
320
+ end
321
+ read_predicateObjectList(subject) || subject
257
322
  end
258
323
  end
259
-
260
- def literalStart(prod)
261
- @prod_data << {}
262
- end
263
-
264
- def literalToken(prod, tok)
265
- tok = tok[0, 3] == '"""' ? tok[3..-4] : tok[1..-2]
266
- add_prod_data(:string, tok)
267
- end
268
-
269
- def literalFinish
270
- lit = @prod_data.pop
271
- content = RDF::NTriples.unescape(lit[:string])
272
- language = lit[:langcode] if lit[:langcode]
273
- language = language.downcase if language && canonicalize?
274
- datatype = lit[:symbol]
275
-
276
- lit = RDF::Literal.new(content, language: language, datatype: datatype, validate: validate?, canonicalize: canonicalize?)
277
- add_prod_data(:literal, lit)
278
- end
279
-
280
- def objectStart(prod)
281
- @prod_data << {}
282
- end
283
-
284
- def objectFinish
285
- object = @prod_data.pop
286
- if object[:expression]
287
- add_prod_data(:object, object[:expression])
288
- else
289
- error("objectFinish: FIXME #{object.inspect}")
324
+
325
+ ##
326
+ # Read predicateObjectList
327
+ #
328
+ # [10] predicateObjectList ::= verb objectList (';' (verb objectList)?)*
329
+ #
330
+ # @param [RDF::Resource] subject
331
+ # @return [RDF::URI] the last matched verb
332
+ def read_predicateObjectList(subject)
333
+ return if @lexer.first.nil? || %w(. }).include?(@lexer.first.value)
334
+ prod(:predicateObjectList, %{;}) do
335
+ last_verb = nil
336
+ loop do
337
+ verb, invert = read_verb
338
+ break unless verb
339
+ last_verb = verb
340
+ prod(:_predicateObjectList_2) do
341
+ read_objectList(subject, verb, invert) || error("Expected objectList", production: :predicateObjectList, token: @lexer.first)
342
+ end
343
+ break unless @lexer.first === ';'
344
+ @lexer.shift while @lexer.first === ';'
345
+ end
346
+ last_verb
290
347
  end
291
348
  end
292
-
293
- def pathitemStart(prod)
294
- @prod_data << {}
295
- end
296
-
297
- def pathitemToken(prod, tok)
298
- case prod
299
- when "numericliteral"
300
- nl = RDF::NTriples.unescape(tok)
301
- datatype = case nl
302
- when /e/i then RDF::XSD.double
303
- when /\./ then RDF::XSD.decimal
304
- else RDF::XSD.integer
349
+
350
+ ##
351
+ # Read objectList
352
+ #
353
+ # [11] objectList ::= object (',' object)*
354
+ #
355
+ # @return [RDF::Term] the last matched subject
356
+ def read_objectList(subject, predicate, invert)
357
+ prod(:objectList, %{,}) do
358
+ last_object = nil
359
+ while object = prod(:_objectList_2) {read_path}
360
+ last_object = object
361
+
362
+ if invert
363
+ add_statement(:objectList, object, predicate, subject)
364
+ else
365
+ add_statement(:objectList, subject, predicate, object)
366
+ end
367
+
368
+ break unless @lexer.first === ','
369
+ @lexer.shift while @lexer.first === ','
305
370
  end
306
-
307
- lit = RDF::Literal.new(nl, datatype: datatype, validate: validate?, canonicalize: canonicalize?)
308
- add_prod_data(:literal, lit)
309
- when "quickvariable"
310
- # There is a also a shorthand syntax ?x which is the same as :x except that it implies that x is
311
- # universally quantified not in the formula but in its parent formula
312
- uri = process_qname(tok.sub('?', ':'))
313
- @variables[uri.to_s] = { formula: @formulae[-2], var: univar(uri) }
314
- add_prod_data(:symbol, uri)
315
- when "boolean"
316
- lit = RDF::Literal.new(tok.delete("@"), datatype: RDF::XSD.boolean, validate: validate?, canonicalize: canonicalize?)
317
- add_prod_data(:literal, lit)
318
- when "[", "("
319
- # Push on state for content of blank node
320
- @prod_data << {}
321
- when "]", ")"
322
- # Construct
323
- symbol = process_anonnode(@prod_data.pop)
324
- add_prod_data(:symbol, symbol)
325
- when "{"
326
- # A new formula, push on a node as a named graph
327
- node = RDF::Node.new
328
- @formulae << node
329
- @formulae_nodes[node] = true
330
- when "}"
331
- # Pop off the formula, and remove any variables defined in this graph
332
- formula = @formulae.pop
333
- @variables.delete_if {|k, v| v[:formula] == formula}
334
- add_prod_data(:symbol, formula)
335
- else
336
- error("pathitemToken(#{prod}, #{tok}): FIXME")
371
+ last_object
337
372
  end
338
373
  end
339
374
 
340
- def pathitemFinish
341
- pathitem = @prod_data.pop
342
- if pathitem[:pathlist]
343
- error("pathitemFinish(pathlist): FIXME #{pathitem.inspect}")
344
- elsif pathitem[:propertylist]
345
- error("pathitemFinish(propertylist): FIXME #{pathitem.inspect}")
346
- elsif pathitem[:symbol] || pathitem[:literal]
347
- add_prod_data(:pathitem, pathitem[:symbol] || pathitem[:literal])
348
- else
349
- error("pathitemFinish: FIXME #{pathitem.inspect}")
375
+ ##
376
+ # Read a verb
377
+ #
378
+ # [12] verb = predicate
379
+ # | 'a'
380
+ # | 'has' expression
381
+ # | 'is' expression 'of'
382
+ # | '<-' expression
383
+ # | '<='
384
+ # | '=>'
385
+ # | '='
386
+ #
387
+ # @return [RDF::Resource, Boolean] verb and invert?
388
+ def read_verb
389
+ invert = false
390
+ error("read_verb", "Unexpected end of file") unless token = @lexer.first
391
+ verb = case token.type || token.value
392
+ when 'a' then prod(:verb) {@lexer.shift && RDF.type}
393
+ when 'has' then prod(:verb) {@lexer.shift && read_path}
394
+ when 'is' then prod(:verb) {
395
+ @lexer.shift
396
+ invert, v = true, read_path
397
+ error( "Expected 'of'", production: :verb, token: @lexer.first) unless @lexer.first.value == 'of'
398
+ @lexer.shift
399
+ v
400
+ }
401
+ when '<-' then prod(:verb) {
402
+ @lexer.shift
403
+ invert = true
404
+ read_path
405
+ }
406
+ when '<=' then prod(:verb) {
407
+ @lexer.shift
408
+ invert = true
409
+ RDF::N3::Log.implies
410
+ }
411
+ when '=>' then prod(:verb) {@lexer.shift && RDF::N3::Log.implies}
412
+ when '=' then prod(:verb) {@lexer.shift && RDF::OWL.sameAs}
413
+ else read_path
350
414
  end
415
+ [verb, invert]
351
416
  end
352
-
353
- def pathlistStart(prod)
354
- @prod_data << {pathlist: []}
355
- end
356
-
357
- def pathlistFinish
358
- pathlist = @prod_data.pop
359
- # Flatten propertylist into an array
360
- expr = @prod_data.last.delete(:expression)
361
- add_prod_data(:pathlist, expr) if expr
362
- add_prod_data(:pathlist, pathlist[:pathlist]) if pathlist[:pathlist]
363
- end
364
-
365
- def pathtailStart(prod)
366
- @prod_data << {pathtail: []}
367
- end
368
-
369
- def pathtailToken(prod, tok)
370
- case tok
371
- when "!", "."
372
- add_prod_data(:direction, :forward)
373
- when "^"
374
- add_prod_data(:direction, :reverse)
417
+
418
+ ##
419
+ # subjects, predicates and objects are all expressions, which are all paths
420
+ #
421
+ # [13] subject ::= expression
422
+ # [14] predicate ::= expression
423
+ # [16] expression ::= path
424
+ # [17] path ::= pathItem ('!' path | '^' path)?
425
+ #
426
+ # @return [RDF::Resource]
427
+ def read_path
428
+ return if @lexer.first.nil? || %w/. } ) ]/.include?(@lexer.first.value)
429
+ prod(:path) do
430
+ pathtail = path = {}
431
+ loop do
432
+ pathtail[:pathitem] = prod(:pathItem) do
433
+ read_iri ||
434
+ read_blankNode ||
435
+ read_quickVar ||
436
+ read_collection ||
437
+ read_blankNodePropertyList ||
438
+ read_literal ||
439
+ read_formula
440
+ end
441
+
442
+ break if @lexer.first.nil? || !%w(! ^).include?(@lexer.first.value)
443
+ prod(:_path_2) do
444
+ pathtail[:direction] = @lexer.shift.value == '!' ? :forward : :reverse
445
+ pathtail = pathtail[:pathtail] = {}
446
+ end
447
+ end
448
+
449
+ # Returns the first object in the path
450
+ # FIXME: what if it's a verb?
451
+ process_path(path)
375
452
  end
376
453
  end
377
-
378
- def pathtailFinish
379
- pathtail = @prod_data.pop
380
- add_prod_data(:pathtail, pathtail[:pathtail])
381
- add_prod_data(:direction, pathtail[:direction]) if pathtail[:direction]
382
- add_prod_data(:directiontail, pathtail[:directiontail]) if pathtail[:directiontail]
383
- end
384
-
385
- def propertylistStart(prod)
386
- @prod_data << {}
387
- end
388
-
389
- def propertylistFinish
390
- propertylist = @prod_data.pop
391
- # Flatten propertylist into an array
392
- ary = [propertylist, propertylist.delete(:propertylist)].flatten.compact
393
- @prod_data.last[:propertylist] = ary
394
- end
395
-
396
- def simpleStatementStart(prod)
397
- @prod_data << {}
398
- end
399
-
400
- # Completion of Simple Statement, all productions include :subject, and :propertyList
401
- def simpleStatementFinish
402
- statement = @prod_data.pop
403
-
404
- subject = statement[:subject]
405
- properties = Array(statement[:propertylist])
406
- properties.each do |p|
407
- predicate = p[:verb]
408
- next unless predicate
409
- log_debug("simpleStatementFinish(pred)") {predicate.to_s}
410
- error(%(Illegal statment: "#{predicate}" missing object)) unless p.has_key?(:object)
411
- objects = Array(p[:object])
412
- objects.each do |object|
413
- if p[:invert]
414
- add_triple("simpleStatementFinish", object, predicate, subject)
454
+
455
+ ##
456
+ # Read a literal
457
+ #
458
+ # [19] literal ::= rdfLiteral | numericLiteral | BOOLEAN_LITERAL
459
+ #
460
+ # @return [RDF::Literal]
461
+ def read_literal
462
+ error("Unexpected end of file", production: :literal) unless token = @lexer.first
463
+ case token.type || token.value
464
+ when :INTEGER then prod(:literal) {literal(@lexer.shift.value, datatype: RDF::XSD.integer, canonicalize: canonicalize?)}
465
+ when :DECIMAL
466
+ prod(:literal) do
467
+ value = @lexer.shift.value
468
+ value = "0#{value}" if value.start_with?(".")
469
+ literal(value, datatype: RDF::XSD.decimal, canonicalize: canonicalize?)
470
+ end
471
+ when :DOUBLE then prod(:literal) {literal(@lexer.shift.value.sub(/\.([eE])/, '.0\1'), datatype: RDF::XSD.double, canonicalize: canonicalize?)}
472
+ when "true", "false" then prod(:literal) {literal(@lexer.shift.value, datatype: RDF::XSD.boolean, canonicalize: canonicalize?)}
473
+ when :STRING_LITERAL_QUOTE, :STRING_LITERAL_SINGLE_QUOTE
474
+ prod(:literal) do
475
+ value = @lexer.shift.value[1..-2]
476
+ error("read_literal", "Unexpected end of file") unless token = @lexer.first
477
+ case token.type || token.value
478
+ when :LANGTAG
479
+ literal(value, language: @lexer.shift.value[1..-1].to_sym)
480
+ when '^^'
481
+ @lexer.shift
482
+ literal(value, datatype: read_iri)
415
483
  else
416
- add_triple("simpleStatementFinish", subject, predicate, object)
484
+ literal(value)
485
+ end
486
+ end
487
+ when :STRING_LITERAL_LONG_QUOTE, :STRING_LITERAL_LONG_SINGLE_QUOTE
488
+ prod(:literal) do
489
+ value = @lexer.shift.value[3..-4]
490
+ error("read_literal", "Unexpected end of file") unless token = @lexer.first
491
+ case token.type || token.value
492
+ when :LANGTAG
493
+ literal(value, language: @lexer.shift.value[1..-1].to_sym)
494
+ when '^^'
495
+ @lexer.shift
496
+ literal(value, datatype: read_iri)
497
+ else
498
+ literal(value)
417
499
  end
418
500
  end
419
501
  end
420
502
  end
421
503
 
422
- def subjectStart(prod)
423
- @prod_data << {}
424
- end
425
-
426
- def subjectFinish
427
- subject = @prod_data.pop
428
-
429
- if subject[:expression]
430
- add_prod_data(:subject, subject[:expression])
431
- else
432
- error("unknown expression type")
504
+ ##
505
+ # Read a blankNodePropertyList
506
+ #
507
+ # [20] blankNodePropertyList ::= '[' predicateObjectList ']'
508
+ #
509
+ # @return [RDF::Node]
510
+ def read_blankNodePropertyList
511
+ token = @lexer.first
512
+ if token === '['
513
+ prod(:blankNodePropertyList, %{]}) do
514
+ @lexer.shift
515
+ progress("blankNodePropertyList", depth: options[:depth], token: token)
516
+ node = bnode
517
+ debug("blankNodePropertyList: subject", depth: options[:depth]) {node.to_sxp}
518
+ read_predicateObjectList(node)
519
+ error("blankNodePropertyList", "Expected closing ']'") unless @lexer.first === ']'
520
+ @lexer.shift
521
+ node
522
+ end
433
523
  end
434
524
  end
435
-
436
- def symbolToken(prod, tok)
437
- term = case prod
438
- when 'explicituri'
439
- process_uri(tok[1..-2])
440
- when 'qname'
441
- process_qname(tok)
442
- else
443
- error("symbolToken(#{prod}, #{tok}): FIXME #{term.inspect}")
525
+
526
+ ##
527
+ # Read a collection (`RDF::List`)
528
+ #
529
+ # [21] collection ::= '(' object* ')'
530
+ #
531
+ # If the `list_terms` option is given, the resulting resource is a list, otherwise, it is the list subject, and the first/rest entries are also emitted.
532
+ # @return [RDF::Node]
533
+ def read_collection
534
+ if @lexer.first === '('
535
+ prod(:collection, %{)}) do
536
+ @lexer.shift
537
+ token = @lexer.first
538
+ progress("collection", depth: options[:depth]) {"token: #{token.inspect}"}
539
+ objects = []
540
+ while @lexer.first.value != ')' && (object = read_path)
541
+ objects << object
542
+ end
543
+ error("collection", "Expected closing ')'") unless @lexer.first === ')'
544
+ @lexer.shift
545
+ list = RDF::N3::List.new(values: objects)
546
+ if options[:list_terms]
547
+ list
548
+ else
549
+ list.each_statement do |statement|
550
+ add_statement("collection", *statement.to_a)
551
+ end
552
+ list.subject
553
+ end
554
+ end
444
555
  end
445
-
446
- add_prod_data(:symbol, term)
447
556
  end
448
557
 
449
- def universalStart(prod)
450
- @prod_data << {}
558
+ ##
559
+ # Read a formula
560
+ #
561
+ # [22] formula ::= '{' formulaContent? '}'
562
+ # [23] formulaContent ::= n3Statement ('.' formulaContent?)?
563
+ #
564
+ # @return [RDF::Node]
565
+ def read_formula
566
+ if @lexer.first === '{'
567
+ prod(:formula, %(})) do
568
+ @lexer.shift
569
+ node = RDF::Node.intern("_form_#{unique_label}")
570
+ formulae.push(node)
571
+ formula_nodes[node] = true
572
+ debug(:formula, depth: @options[:depth]) {"id: #{node}, depth: #{formulae.length}"}
573
+
574
+ read_formulaContent
575
+
576
+ # Pop off the formula
577
+ # Result is the BNode associated with the formula
578
+ debug(:formula, depth: @options[:depth]) {"pop: #{formulae.last}, depth: #{formulae.length}"}
579
+ error("collection", "Expected closing '}'") unless @lexer.shift === '}'
580
+
581
+ formulae.pop
582
+ end
583
+ end
451
584
  end
452
585
 
453
- # Apart from the set of statements, a formula also has a set of URIs of symbols which are universally quantified,
454
- # and a set of URIs of symbols which are existentially quantified.
455
- # Variables are then in general symbols which have been quantified.
586
+ ##
587
+ # Read formula content, similaer to n3Statement
588
+ #
589
+ # [23] formulaContent ::= n3Statement ('.' formulaContent?)?
456
590
  #
457
- # Here we allocate a variable (making up a name) and record with the defining formula. Quantification is done
458
- # when the formula is completed against all in-scope variables
459
- def universalFinish
460
- pd = @prod_data.pop
461
- forAll = Array(pd[:symbol])
462
- forAll.each do |term|
463
- @variables[term.to_s] = { formula: @formulae.last, var: univar(term) }
591
+ # @return [void]
592
+ def read_formulaContent
593
+ return if @lexer.first === '}' # Allow empty formula
594
+ prod(:formulaContent, %w(. })) do
595
+ loop do
596
+ token = @lexer.first
597
+ error("read_formulaContent", "Unexpected end of file") unless token
598
+ case token.type
599
+ when :BASE, :PREFIX
600
+ read_directive || error("Failed to parse directive", production: :directive, token: token)
601
+ break if @lexer.first === '}'
602
+ else
603
+ read_n3Statement
604
+ token = @lexer.first
605
+ case token.value
606
+ when '.'
607
+ @lexer.shift
608
+ # '.' optional at end of formulaContent
609
+ break if @lexer.first === '}'
610
+ when '}'
611
+ break
612
+ else
613
+ error("Expected '.' or '}' following n3Statement", production: :formulaContent, token: token)
614
+ end
615
+ end
616
+ end
464
617
  end
465
618
  end
466
619
 
467
- def verbStart(prod)
468
- @prod_data << {}
469
- end
470
-
471
- def verbToken(prod, tok)
472
- term = case prod
473
- when '<='
474
- add_prod_data(:expression, RDF::LOG.implies)
475
- add_prod_data(:invert, true)
476
- when '=>'
477
- add_prod_data(:expression, RDF::LOG.implies)
478
- when '='
479
- add_prod_data(:expression, RDF::OWL.sameAs)
480
- when '@a'
481
- add_prod_data(:expression, RDF.type)
482
- when '@has', "@of"
483
- # Syntactic sugar
484
- when '@is'
485
- add_prod_data(:invert, true)
486
- else
487
- error("verbToken(#{prod}, #{tok}): FIXME #{term.inspect}")
620
+ ##
621
+ # Read an IRI
622
+ #
623
+ # (rule iri "26" (alt IRIREF prefixedName))
624
+ #
625
+ # @return [RDF::URI]
626
+ def read_iri
627
+ token = @lexer.first
628
+ case token && token.type
629
+ when :IRIREF then prod(:iri) {process_iri(@lexer.shift.value[1..-2].gsub(/\s+/m, ''))}
630
+ when :PNAME_LN, :PNAME_NS then prod(:prefixedName) {process_pname(*@lexer.shift.value)}
488
631
  end
489
-
490
- add_prod_data(:symbol, term)
491
632
  end
492
633
 
493
- def verbFinish
494
- verb = @prod_data.pop
495
- if verb[:expression]
496
- error("Literal may not be used as a predicate") if verb[:expression].is_a?(RDF::Literal)
497
- error("Formula may not be used as a peredicate") if @formulae_nodes.has_key?(verb[:expression])
498
- add_prod_data(:verb, verb[:expression])
499
- add_prod_data(:invert, true) if verb[:invert]
500
- else
501
- error("verbFinish: FIXME #{verb.inspect}")
634
+ ##
635
+ # Read a blank node
636
+ #
637
+ # [29] blankNode ::= BLANK_NODE_LABEL | ANON
638
+ #
639
+ # @return [RDF::Node]
640
+ def read_blankNode
641
+ token = @lexer.first
642
+ case token && token.type
643
+ when :BLANK_NODE_LABEL then prod(:blankNode) {bnode(@lexer.shift.value[2..-1])}
644
+ when :ANON then @lexer.shift && prod(:blankNode) {bnode}
502
645
  end
503
646
  end
504
-
505
- private
506
-
507
- ###################
508
- # Utility Functions
509
- ###################
510
647
 
511
- def process_anonnode(anonnode)
512
- log_debug("process_anonnode") {anonnode.inspect}
513
-
514
- if anonnode[:propertylist]
515
- properties = anonnode[:propertylist]
516
- bnode = RDF::Node.new
517
- properties.each do |p|
518
- predicate = p[:verb]
519
- log_debug("process_anonnode(verb)") {predicate.inspect}
520
- objects = Array(p[:object])
521
- objects.each { |object| add_triple("anonnode", bnode, predicate, object) }
522
- end
523
- bnode
524
- elsif anonnode[:pathlist]
525
- objects = Array(anonnode[:pathlist])
526
- list = RDF::List[*objects]
527
- list.each_statement do |statement|
528
- next if statement.predicate == RDF.type && statement.object == RDF.List
529
- add_triple("anonnode(list)", statement.subject, statement.predicate, statement.object)
648
+ ##
649
+ # Read a quickVar, having global scope.
650
+ #
651
+ # [30] quickVar ::= QUICK_VAR_NAME
652
+ #
653
+ # @return [RDF::Query::Variable]
654
+ def read_quickVar
655
+ if @lexer.first.type == :QUICK_VAR_NAME
656
+ prod(:quickVar) do
657
+ token = @lexer.shift
658
+ value = token.value.sub('?', '')
659
+ variables[value] ||= RDF::Query::Variable.new(value)
530
660
  end
531
- list.subject
532
661
  end
533
662
  end
534
663
 
664
+ ###################
665
+ # Utility Functions
666
+ ###################
667
+
535
668
  # Process a path, such as:
536
- # :a.:b means [is :b of :a] Deprecated
537
669
  # :a!:b means [is :b of :a] => :a :b []
538
670
  # :a^:b means [:b :a] => [] :b :a
539
671
  #
540
672
  # Create triple and return property used for next iteration
541
- def process_path(expression)
542
- log_debug("process_path") {expression.inspect}
543
-
544
- pathitem = expression[:pathitem]
545
- pathtail = expression[:pathtail]
546
-
547
- direction_list = [expression[:direction], expression[:directiontail]].flatten.compact
548
-
549
- pathtail.each do |pred|
550
- direction = direction_list.shift
551
- bnode = RDF::Node.new
673
+ #
674
+ # Result is last created bnode
675
+ def process_path(path)
676
+ pathitem, direction, pathtail = path[:pathitem], path[:direction], path[:pathtail]
677
+ debug("process_path", depth: @options[:depth]) {path.inspect}
678
+
679
+ while pathtail
680
+ bnode = bnode()
681
+ pred = pathtail.is_a?(RDF::Term) ? pathtail : pathtail[:pathitem]
552
682
  if direction == :reverse
553
- add_triple("process_path(reverse)", bnode, pred, pathitem)
683
+ add_statement("process_path(reverse)", bnode, pred, pathitem)
554
684
  else
555
- add_triple("process_path(forward)", pathitem, pred, bnode)
685
+ add_statement("process_path(forward)", pathitem, pred, bnode)
556
686
  end
557
687
  pathitem = bnode
688
+ direction = pathtail[:direction] if pathtail.is_a?(Hash)
689
+ pathtail = pathtail.is_a?(Hash) && pathtail[:pathtail]
558
690
  end
559
691
  pathitem
560
692
  end
561
693
 
562
- def process_uri(uri)
563
- uri(base_uri, RDF::NTriples.unescape(uri))
694
+ def process_iri(iri)
695
+ iri(base_uri, iri.to_s)
564
696
  end
565
-
566
- def process_qname(tok)
567
- if tok.include?(":")
568
- prefix, name = tok.split(":")
569
- elsif @userkeys
570
- # If the @keywords directive is given, the keywords given will thereafter be recognized
571
- # without a "@" prefix, and anything else is a local name in the default namespace.
572
- prefix, name = "", tok
573
- elsif %w(true false).include?(tok)
574
- # The words true and false are boolean literals.
575
- #
576
- # They were added to Notation3 in 2006-02 in discussion with the SPARQL language developers, the Data
577
- # Access Working Group. Note that no existing documents will have used a naked true or false word, without a
578
- # @keyword statement which would make it clear that they were not to be treated as keywords. Furthermore any
579
- # old parser encountering true or false naked or in a @keywords
580
- return RDF::Literal.new(tok, datatype: RDF::XSD.boolean)
581
- else
582
- error("Set user @keywords to use barenames.")
583
- end
584
697
 
585
- uri = if prefix(prefix)
586
- log_debug('process_qname(ns)') {"#{prefix(prefix)}, #{name}"}
698
+ def process_pname(value)
699
+ prefix, name = value.split(":", 2)
700
+
701
+ iri = if prefix(prefix)
702
+ #debug('process_pname(ns)', depth: @options[:depth]) {"#{prefix(prefix)}, #{name}"}
587
703
  ns(prefix, name)
588
- elsif prefix == '_'
589
- log_debug('process_qname(bnode)', name)
590
- bnode(name)
704
+ elsif prefix && !prefix.empty?
705
+ error("process_pname", "Use of undefined prefix #{prefix.inspect}")
706
+ ns(nil, name)
591
707
  else
592
- log_debug('process_qname(default_ns)', name)
593
- namespace(nil, uri("#{base_uri}#")) unless prefix(nil)
594
708
  ns(nil, name)
595
709
  end
596
- log_debug('process_qname') {uri.inspect}
597
- uri
598
- end
599
-
600
- # Add values to production data, values aranged as an array
601
- def add_prod_data(sym, value)
602
- case @prod_data.last[sym]
603
- when nil
604
- @prod_data.last[sym] = value
605
- when Array
606
- @prod_data.last[sym] += Array(value)
607
- else
608
- @prod_data.last[sym] = Array(@prod_data.last[sym]) + Array(value)
710
+ debug('process_pname', depth: @options[:depth]) {iri.inspect}
711
+ iri
712
+ end
713
+
714
+ # Keep track of allocated BNodes. Blank nodes are allocated to the formula.
715
+ # Unnnamed bnodes are created using an incrementing labeler for repeatability.
716
+ def bnode(label = nil)
717
+ form_id = formulae.last ? formulae.last.id : '_bn_ground'
718
+ if label
719
+ # Return previously allocated blank node for.
720
+ @bn_mapper[form_id] ||= {}
721
+ return @bn_mapper[form_id][label] if @bn_mapper[form_id][label]
609
722
  end
610
- end
611
723
 
612
- # Keep track of allocated BNodes
613
- def bnode(value = nil)
614
- @bnode_cache ||= {}
615
- @bnode_cache[value.to_s] ||= RDF::Node.new(value)
724
+ # Get a fresh label
725
+ @bn_labler.succ! while @bnodes[@bn_labler]
726
+
727
+ bn = RDF::Node.intern(@bn_labler.to_sym)
728
+ @bnodes[@bn_labler] = bn
729
+ @bn_mapper[form_id][label] = bn if label
730
+ bn
616
731
  end
617
732
 
618
- def univar(label)
619
- unless label
620
- @unnamed_label ||= "var0"
621
- label = @unnamed_label = @unnamed_label.succ
622
- end
623
- RDF::Query::Variable.new(label.to_s)
733
+ # If not in ground formula, note scope, and if existential
734
+ def univar(label, scope:)
735
+ value = label
736
+ RDF::Query::Variable.new(value)
624
737
  end
625
738
 
626
- # add a statement, object can be literal or URI or bnode
739
+ # add a pattern or statement
627
740
  #
628
741
  # @param [any] node string for showing graph_name
629
- # @param [URI, Node] subject the subject of the statement
630
- # @param [URI] predicate the predicate of the statement
631
- # @param [URI, Node, Literal] object the object of the statement
742
+ # @param [RDF::Term] subject the subject of the statement
743
+ # @param [RDF::URI] predicate the predicate of the statement
744
+ # @param [RDF::Term] object the object of the statement
632
745
  # @return [Statement] Added statement
633
746
  # @raise [RDF::ReaderError] Checks parameter types and raises if they are incorrect if parsing mode is _validate_.
634
- def add_triple(node, subject, predicate, object)
635
- graph_name_opts = @formulae.last ? {graph_name: @formulae.last} : {}
636
-
637
- statement = RDF::Statement(subject, predicate, object, graph_name_opts)
638
- log_debug(node) {statement.to_s}
747
+ def add_statement(node, subject, predicate, object)
748
+ statement = if @formulae.last
749
+ # It's a pattern in a formula
750
+ RDF::Query::Pattern.new(subject, predicate, object, graph_name: @formulae.last)
751
+ else
752
+ RDF::Statement(subject, predicate, object)
753
+ end
754
+ debug("statement(#{node})", depth: @options[:depth]) {statement.to_s}
755
+ error("statement(#{node})", "Statement is invalid: #{statement.inspect}") if validate? && statement.invalid?
639
756
  @callback.call(statement)
640
757
  end
641
758
 
642
- def namespace(prefix, uri)
643
- uri = uri.to_s
644
- if uri == '#'
645
- uri = prefix(nil).to_s + '#'
759
+ def namespace(prefix, iri)
760
+ iri = iri.to_s
761
+ if iri == '#'
762
+ iri = prefix(nil).to_s + '#'
646
763
  end
647
- log_debug("namespace") {"'#{prefix}' <#{uri}>"}
648
- prefix(prefix, uri(uri))
764
+ debug("namespace", depth: @options[:depth]) {"'#{prefix}' <#{iri}>"}
765
+ prefix(prefix, iri(iri))
649
766
  end
650
767
 
651
- # Is this an allowable keyword?
652
- def keyword_check(kw)
653
- unless (@keywords || %w(a is of has)).include?(kw)
654
- raise RDF::ReaderError, "unqualified keyword '#{kw}' used without @keyword directive" if validate?
655
- end
656
- end
657
-
658
- # Create URIs
659
- def uri(value, append = nil)
768
+ # Create IRIs
769
+ def iri(value, append = nil)
660
770
  value = RDF::URI(value)
661
771
  value = value.join(append) if append
662
772
  value.validate! if validate? && value.respond_to?(:validate)
663
773
  value.canonicalize! if canonicalize?
664
- value = RDF::URI.intern(value, {}) if intern?
665
-
666
- # Variable substitution for in-scope variables. Variables are in scope if they are defined in anthing other than
667
- # the current formula
668
- var = @variables[value.to_s]
669
- value = var[:var] if var
774
+
775
+ # Variable substitution for in-scope variables. Variables are in scope if they are defined in anthing other than the current formula
776
+ var = find_var(value)
777
+ value = var if var
670
778
 
671
779
  value
780
+ rescue ArgumentError => e
781
+ error("iri", e.message)
672
782
  end
673
783
 
674
- def ns(prefix, suffix)
784
+ # Create a literal
785
+ def literal(value, **options)
786
+ debug("literal", depth: @options[:depth]) do
787
+ "value: #{value.inspect}, " +
788
+ "options: #{options.inspect}, " +
789
+ "validate: #{validate?.inspect}, " +
790
+ "c14n?: #{canonicalize?.inspect}"
791
+ end
792
+ RDF::Literal.new(value, validate: validate?, canonicalize: canonicalize?, **options)
793
+ rescue ArgumentError => e
794
+ error("Argument Error #{e.message}", production: :literal, token: @lexer.first)
795
+ end
796
+
797
+ # Decode a PName
798
+ def ns(prefix = nil, suffix = nil)
799
+ namespace(nil, iri("#{base_uri}#")) if prefix.nil? && !prefix(nil)
800
+
675
801
  base = prefix(prefix).to_s
676
802
  suffix = suffix.to_s.sub(/^\#/, "") if base.index("#")
677
- log_debug("ns") {"base: '#{base}', suffix: '#{suffix}'"}
678
- uri(base + suffix.to_s)
803
+ iri(base + suffix.to_s)
804
+ end
805
+
806
+ # Returns a unique label
807
+ def unique_label
808
+ label, @label_uniquifier = @label_uniquifier, @label_uniquifier.succ
809
+ label
810
+ end
811
+
812
+ # Find any variable that may be defined identified by `name`
813
+ # @param [RDF::Node] name of formula
814
+ # @return [RDF::Query::Variable]
815
+ def find_var(name)
816
+ variables[name.to_s]
817
+ end
818
+
819
+ def prod(production, recover_to = [])
820
+ @prod_stack << {prod: production, recover_to: recover_to}
821
+ @options[:depth] += 1
822
+ recover("#{production}(start)", depth: options[:depth], token: @lexer.first)
823
+ yield
824
+ rescue EBNF::LL1::Lexer::Error, SyntaxError, Recovery => e
825
+ # Lexer encountered an illegal token or the parser encountered
826
+ # a terminal which is inappropriate for the current production.
827
+ # Perform error recovery to find a reasonable terminal based
828
+ # on the follow sets of the relevant productions. This includes
829
+ # remaining terms from the current production and the stacked
830
+ # productions
831
+ case e
832
+ when EBNF::LL1::Lexer::Error
833
+ @lexer.recover
834
+ begin
835
+ error("Lexer error", "With input '#{e.input}': #{e.message}",
836
+ production: production,
837
+ token: e.token)
838
+ rescue SyntaxError
839
+ end
840
+ end
841
+ raise EOFError, "End of input found when recovering" if @lexer.first.nil?
842
+ debug("recovery", "current token: #{@lexer.first.inspect}", depth: @options[:depth])
843
+
844
+ unless e.is_a?(Recovery)
845
+ # Get the list of follows for this sequence, this production and the stacked productions.
846
+ debug("recovery", "stack follows:", depth: @options[:depth])
847
+ @prod_stack.reverse.each do |prod|
848
+ debug("recovery", level: 1, depth: @options[:depth]) {" #{prod[:prod]}: #{prod[:recover_to].inspect}"}
849
+ end
850
+ end
851
+
852
+ # Find all follows to the top of the stack
853
+ follows = @prod_stack.map {|prod| Array(prod[:recover_to])}.flatten.compact.uniq
854
+
855
+ # Skip tokens until one is found in follows
856
+ while (token = (@lexer.first rescue @lexer.recover)) && follows.none? {|t| token === t}
857
+ skipped = @lexer.shift
858
+ debug("recovery", depth: @options[:depth]) {"skip #{skipped.inspect}"}
859
+ end
860
+ debug("recovery", depth: @options[:depth]) {"found #{token.inspect} in follows"}
861
+
862
+ # Re-raise the error unless token is a follows of this production
863
+ raise Recovery unless Array(recover_to).any? {|t| token === t}
864
+
865
+ # Skip that token to get something reasonable to start the next production with
866
+ @lexer.shift
867
+ ensure
868
+ progress("#{production}(finish)", depth: options[:depth])
869
+ @options[:depth] -= 1
870
+ @prod_stack.pop
871
+ end
872
+
873
+ def progress(*args, &block)
874
+ lineno = (options[:token].lineno if options[:token].respond_to?(:lineno)) || (@lexer && @lexer.lineno)
875
+ opts = args.last.is_a?(Hash) ? args.pop : {}
876
+ opts[:level] ||= 1
877
+ opts[:lineno] ||= lineno
878
+ log_info(*args, **opts, &block)
879
+ end
880
+
881
+ def recover(*args, &block)
882
+ lineno = (options[:token].lineno if options[:token].respond_to?(:lineno)) || (@lexer && @lexer.lineno)
883
+ opts = args.last.is_a?(Hash) ? args.pop : {}
884
+ opts[:level] ||= 1
885
+ opts[:lineno] ||= lineno
886
+ log_recover(*args, **opts, &block)
887
+ end
888
+
889
+ def debug(*args, &block)
890
+ lineno = (options[:token].lineno if options[:token].respond_to?(:lineno)) || (@lexer && @lexer.lineno)
891
+ opts = args.last.is_a?(Hash) ? args.pop : {}
892
+ opts[:level] ||= 0
893
+ opts[:lineno] ||= lineno
894
+ log_debug(*args, **opts, &block)
895
+ end
896
+
897
+ ##
898
+ # Error information, used as level `0` debug messages.
899
+ #
900
+ # @overload error(node, message, options)
901
+ # @param [String] node Relevant location associated with message
902
+ # @param [String] message Error string
903
+ # @param [Hash] options
904
+ # @option options [URI, #to_s] :production
905
+ # @option options [Token] :token
906
+ # @see {#debug}
907
+ def error(*args)
908
+ ctx = ""
909
+ ctx += "(found #{options[:token].inspect})" if options[:token]
910
+ ctx += ", production = #{options[:production].inspect}" if options[:production]
911
+ lineno = (options[:token].lineno if options[:token].respond_to?(:lineno)) || (@lexer && @lexer.lineno)
912
+ log_error(*args, ctx,
913
+ lineno: lineno,
914
+ token: options[:token],
915
+ production: options[:production],
916
+ depth: options[:depth],
917
+ exception: SyntaxError,)
918
+ end
919
+
920
+ # Used for internal error recovery
921
+ class Recovery < StandardError; end
922
+
923
+ class SyntaxError < RDF::ReaderError
924
+ ##
925
+ # The current production.
926
+ #
927
+ # @return [Symbol]
928
+ attr_reader :production
929
+
930
+ ##
931
+ # The invalid token which triggered the error.
932
+ #
933
+ # @return [String]
934
+ attr_reader :token
935
+
936
+ ##
937
+ # The line number where the error occurred.
938
+ #
939
+ # @return [Integer]
940
+ attr_reader :lineno
941
+
942
+ ##
943
+ # Initializes a new syntax error instance.
944
+ #
945
+ # @param [String, #to_s] message
946
+ # @param [Hash{Symbol => Object}] options
947
+ # @option options [Symbol] :production (nil)
948
+ # @option options [String] :token (nil)
949
+ # @option options [Integer] :lineno (nil)
950
+ def initialize(message, **options)
951
+ @production = options[:production]
952
+ @token = options[:token]
953
+ @lineno = options[:lineno] || (@token.lineno if @token.respond_to?(:lineno))
954
+ super(message.to_s)
955
+ end
679
956
  end
680
957
  end
681
958
  end