rdf-n3 3.1.1 → 3.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (124) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +148 -69
  3. data/UNLICENSE +1 -1
  4. data/VERSION +1 -1
  5. data/lib/rdf/n3.rb +8 -8
  6. data/lib/rdf/n3/algebra.rb +147 -68
  7. data/lib/rdf/n3/algebra/builtin.rb +79 -0
  8. data/lib/rdf/n3/algebra/formula.rb +355 -94
  9. data/lib/rdf/n3/algebra/list/append.rb +33 -4
  10. data/lib/rdf/n3/algebra/list/first.rb +24 -0
  11. data/lib/rdf/n3/algebra/list/in.rb +42 -3
  12. data/lib/rdf/n3/algebra/list/last.rb +17 -4
  13. data/lib/rdf/n3/algebra/list/length.rb +24 -0
  14. data/lib/rdf/n3/algebra/list/member.rb +39 -2
  15. data/lib/rdf/n3/algebra/list_operator.rb +83 -0
  16. data/lib/rdf/n3/algebra/log/conclusion.rb +57 -1
  17. data/lib/rdf/n3/algebra/log/conjunction.rb +28 -1
  18. data/lib/rdf/n3/algebra/log/content.rb +34 -0
  19. data/lib/rdf/n3/algebra/log/equal_to.rb +34 -0
  20. data/lib/rdf/n3/algebra/log/implies.rb +55 -30
  21. data/lib/rdf/n3/algebra/log/includes.rb +58 -1
  22. data/lib/rdf/n3/algebra/log/n3_string.rb +34 -0
  23. data/lib/rdf/n3/algebra/log/not_equal_to.rb +23 -0
  24. data/lib/rdf/n3/algebra/log/not_includes.rb +27 -0
  25. data/lib/rdf/n3/algebra/log/output_string.rb +40 -0
  26. data/lib/rdf/n3/algebra/log/parsed_as_n3.rb +36 -0
  27. data/lib/rdf/n3/algebra/log/semantics.rb +40 -0
  28. data/lib/rdf/n3/algebra/math/absolute_value.rb +36 -0
  29. data/lib/rdf/n3/algebra/math/acos.rb +26 -0
  30. data/lib/rdf/n3/algebra/math/acosh.rb +26 -0
  31. data/lib/rdf/n3/algebra/math/asin.rb +26 -0
  32. data/lib/rdf/n3/algebra/math/asinh.rb +26 -0
  33. data/lib/rdf/n3/algebra/math/atan.rb +26 -0
  34. data/lib/rdf/n3/algebra/math/atanh.rb +26 -0
  35. data/lib/rdf/n3/algebra/math/ceiling.rb +28 -0
  36. data/lib/rdf/n3/algebra/math/cos.rb +40 -0
  37. data/lib/rdf/n3/algebra/math/cosh.rb +38 -0
  38. data/lib/rdf/n3/algebra/math/difference.rb +34 -3
  39. data/lib/rdf/n3/algebra/math/equal_to.rb +54 -0
  40. data/lib/rdf/n3/algebra/math/exponentiation.rb +29 -3
  41. data/lib/rdf/n3/algebra/math/floor.rb +28 -0
  42. data/lib/rdf/n3/algebra/math/greater_than.rb +41 -0
  43. data/lib/rdf/n3/algebra/math/less_than.rb +41 -0
  44. data/lib/rdf/n3/algebra/math/negation.rb +31 -2
  45. data/lib/rdf/n3/algebra/math/not_equal_to.rb +25 -0
  46. data/lib/rdf/n3/algebra/math/not_greater_than.rb +25 -0
  47. data/lib/rdf/n3/algebra/math/not_less_than.rb +25 -0
  48. data/lib/rdf/n3/algebra/math/product.rb +14 -3
  49. data/lib/rdf/n3/algebra/math/quotient.rb +30 -3
  50. data/lib/rdf/n3/algebra/math/remainder.rb +29 -3
  51. data/lib/rdf/n3/algebra/math/rounded.rb +20 -3
  52. data/lib/rdf/n3/algebra/math/sin.rb +40 -0
  53. data/lib/rdf/n3/algebra/math/sinh.rb +38 -0
  54. data/lib/rdf/n3/algebra/math/sum.rb +35 -4
  55. data/lib/rdf/n3/algebra/math/tan.rb +40 -0
  56. data/lib/rdf/n3/algebra/math/tanh.rb +38 -0
  57. data/lib/rdf/n3/algebra/not_implemented.rb +13 -0
  58. data/lib/rdf/n3/algebra/resource_operator.rb +123 -0
  59. data/lib/rdf/n3/algebra/str/concatenation.rb +21 -3
  60. data/lib/rdf/n3/algebra/str/contains.rb +28 -4
  61. data/lib/rdf/n3/algebra/str/contains_ignoring_case.rb +33 -0
  62. data/lib/rdf/n3/algebra/str/ends_with.rb +33 -0
  63. data/lib/rdf/n3/algebra/str/equal_ignoring_case.rb +34 -0
  64. data/lib/rdf/n3/algebra/str/format.rb +12 -4
  65. data/lib/rdf/n3/algebra/str/greater_than.rb +38 -0
  66. data/lib/rdf/n3/algebra/str/less_than.rb +33 -0
  67. data/lib/rdf/n3/algebra/str/matches.rb +33 -5
  68. data/lib/rdf/n3/algebra/str/not_equal_ignoring_case.rb +17 -0
  69. data/lib/rdf/n3/algebra/str/not_greater_than.rb +17 -0
  70. data/lib/rdf/n3/algebra/str/not_less_than.rb +17 -0
  71. data/lib/rdf/n3/algebra/str/not_matches.rb +18 -0
  72. data/lib/rdf/n3/algebra/str/replace.rb +28 -5
  73. data/lib/rdf/n3/algebra/str/scrape.rb +31 -5
  74. data/lib/rdf/n3/algebra/str/starts_with.rb +33 -0
  75. data/lib/rdf/n3/algebra/time/day.rb +35 -0
  76. data/lib/rdf/n3/algebra/time/day_of_week.rb +27 -0
  77. data/lib/rdf/n3/algebra/time/gm_time.rb +29 -0
  78. data/lib/rdf/n3/algebra/time/hour.rb +35 -0
  79. data/lib/rdf/n3/algebra/time/in_seconds.rb +59 -0
  80. data/lib/rdf/n3/algebra/time/local_time.rb +29 -0
  81. data/lib/rdf/n3/algebra/time/minute.rb +35 -0
  82. data/lib/rdf/n3/algebra/time/month.rb +35 -0
  83. data/lib/rdf/n3/algebra/time/second.rb +35 -0
  84. data/lib/rdf/n3/algebra/time/timezone.rb +36 -0
  85. data/lib/rdf/n3/algebra/time/year.rb +29 -0
  86. data/lib/rdf/n3/extensions.rb +180 -21
  87. data/lib/rdf/n3/format.rb +65 -0
  88. data/lib/rdf/n3/list.rb +630 -0
  89. data/lib/rdf/n3/reader.rb +762 -485
  90. data/lib/rdf/n3/reasoner.rb +57 -68
  91. data/lib/rdf/n3/refinements.rb +178 -0
  92. data/lib/rdf/n3/repository.rb +332 -0
  93. data/lib/rdf/n3/terminals.rb +80 -0
  94. data/lib/rdf/n3/vocab.rb +35 -7
  95. data/lib/rdf/n3/writer.rb +208 -148
  96. metadata +110 -52
  97. data/AUTHORS +0 -1
  98. data/History.markdown +0 -99
  99. data/lib/rdf/n3/algebra/log/equalTo.rb +0 -7
  100. data/lib/rdf/n3/algebra/log/notEqualTo.rb +0 -7
  101. data/lib/rdf/n3/algebra/log/notIncludes.rb +0 -12
  102. data/lib/rdf/n3/algebra/log/outputString.rb +0 -7
  103. data/lib/rdf/n3/algebra/math/absoluteValue.rb +0 -9
  104. data/lib/rdf/n3/algebra/math/equalTo.rb +0 -9
  105. data/lib/rdf/n3/algebra/math/greaterThan.rb +0 -9
  106. data/lib/rdf/n3/algebra/math/integerQuotient.rb +0 -9
  107. data/lib/rdf/n3/algebra/math/lessThan.rb +0 -9
  108. data/lib/rdf/n3/algebra/math/memberCount.rb +0 -9
  109. data/lib/rdf/n3/algebra/math/notEqualTo.rb +0 -9
  110. data/lib/rdf/n3/algebra/math/notGreaterThan.rb +0 -9
  111. data/lib/rdf/n3/algebra/math/notLessThan.rb +0 -9
  112. data/lib/rdf/n3/algebra/str/containsIgnoringCase.rb +0 -9
  113. data/lib/rdf/n3/algebra/str/endsWith.rb +0 -9
  114. data/lib/rdf/n3/algebra/str/equalIgnoringCase.rb +0 -9
  115. data/lib/rdf/n3/algebra/str/greaterThan.rb +0 -9
  116. data/lib/rdf/n3/algebra/str/lessThan.rb +0 -9
  117. data/lib/rdf/n3/algebra/str/notEqualIgnoringCase.rb +0 -9
  118. data/lib/rdf/n3/algebra/str/notGreaterThan.rb +0 -9
  119. data/lib/rdf/n3/algebra/str/notLessThan.rb +0 -9
  120. data/lib/rdf/n3/algebra/str/notMatches.rb +0 -9
  121. data/lib/rdf/n3/algebra/str/startsWith.rb +0 -56
  122. data/lib/rdf/n3/patches/array_hacks.rb +0 -53
  123. data/lib/rdf/n3/reader/meta.rb +0 -641
  124. data/lib/rdf/n3/reader/parser.rb +0 -239
@@ -1,4 +1,7 @@
1
1
  # coding: utf-8
2
+ require 'rdf/reader'
3
+ require 'ebnf'
4
+
2
5
  module RDF::N3
3
6
  ##
4
7
  # A Notation-3/Turtle parser in Ruby
@@ -9,7 +12,9 @@ module RDF::N3
9
12
  #
10
13
  # Separate pass to create branch_table from n3-selectors.n3
11
14
  #
12
- # This implementation uses distinguished variables for both universal and explicit existential variables (defined with `@forSome`). Variables created from blank nodes are non-distinguished. Distinguished existential variables are tracked using `$`, internally, as the RDF `query_pattern` logic looses details of the variable definition in solutions, where the variable is represented using a symbol.
15
+ # This implementation uses distinguished variables for both universal and explicit existential variables (defined with `@forSome`). Variables created from blank nodes are non-distinguished. Distinguished existential variables are named using an `_ext` suffix, internally, as the RDF `query_pattern` logic looses details of the variable definition in solutions, where the variable is represented using a symbol.
16
+ #
17
+ # Non-distinguished blank node variables are created as part of reasoning.
13
18
  #
14
19
  # @todo
15
20
  # * Formulae as RDF::Query representations
@@ -18,17 +23,42 @@ module RDF::N3
18
23
  # @author [Gregg Kellogg](http://greggkellogg.net/)
19
24
  class Reader < RDF::Reader
20
25
  format Format
26
+ using Refinements
21
27
 
22
28
  include RDF::Util::Logger
23
- include Meta
24
- include Parser
29
+ include EBNF::LL1::Parser
30
+ include Terminals
25
31
 
26
- N3_KEYWORDS = %w(a is of has keywords prefix base true false forSome forAny)
27
-
28
- # The Blank nodes allocated for formula
32
+ # Nodes used as Formulae graph names
33
+ #
29
34
  # @return [Array<RDF::Node>]
30
35
  attr_reader :formulae
31
36
 
37
+ # All nodes allocated to formulae
38
+ #
39
+ # @return [Hash{RDF::Node => RDF::Graph}]
40
+ attr_reader :formula_nodes
41
+
42
+ # Allocated variables by formula
43
+ #
44
+ # @return [Hash{Symbol => RDF::Node}]
45
+ attr_reader :variables
46
+
47
+ ##
48
+ # N3 Reader options
49
+ # @see http://www.rubydoc.info/github/ruby-rdf/rdf/RDF/Reader#options-class_method
50
+ def self.options
51
+ super + [
52
+ RDF::CLI::Option.new(
53
+ symbol: :list_terms,
54
+ datatype: TrueClass,
55
+ default: true,
56
+ control: :checkbox,
57
+ on: ["--list-terms CONTEXT"],
58
+ description: "Use native collections (lists), not first/rest ladder.")
59
+ ]
60
+ end
61
+
32
62
  ##
33
63
  # Initializes the N3 reader instance.
34
64
  #
@@ -40,11 +70,11 @@ module RDF::N3
40
70
  # @option options [Boolean] :validate (false)
41
71
  # whether to validate the parsed statements and values
42
72
  # @option options [Boolean] :canonicalize (false)
43
- # whether to canonicalize parsed literals
44
- # @option options [Boolean] :intern (true)
45
- # whether to intern all parsed URIs
73
+ # whether to canonicalize parsed literals and URIs.
46
74
  # @option options [Hash] :prefixes (Hash.new)
47
75
  # the prefix mappings to use (not supported by all readers)
76
+ # @option options [Hash] :list_terms (false)
77
+ # represent collections as an `RDF::Term`, rather than an rdf:first/rest ladder.
48
78
  # @return [reader]
49
79
  # @yield [reader] `self`
50
80
  # @yieldparam [RDF::Reader] reader
@@ -52,44 +82,43 @@ module RDF::N3
52
82
  # @raise [Error]:: Raises RDF::ReaderError if validating and an error is found
53
83
  def initialize(input = $stdin, **options, &block)
54
84
  super do
55
- input.rewind if input.respond_to?(:rewind)
56
- @input = input.respond_to?(:read) ? input : StringIO.new(input.to_s)
57
- @lineno = 0
58
- readline # Prime the pump
59
-
60
- @memo = {}
61
- @keyword_mode = false
62
- @keywords = %w(a is of this has).map(&:freeze).freeze
63
- @productions = []
64
- @prod_data = []
65
-
66
- @branches = BRANCHES # Get from meta class
67
- @regexps = REGEXPS # Get from meta class
68
-
69
- @formulae = [] # Nodes used as Formulae graph names
70
- @formulae_nodes = {}
71
- @label_uniquifier ||= "#{Random.new_seed}_000000"
72
- @bnodes = {} # allocated bnodes by formula
73
- @variables = {} # allocated variables by formula
85
+ @options = {
86
+ anon_base: "b0",
87
+ whitespace: WS,
88
+ depth: 0,
89
+ }.merge(@options)
90
+ @prod_stack = []
91
+
92
+ @formulae = []
93
+ @formula_nodes = {}
94
+ @label_uniquifier = "0"
95
+ @bnodes = {}
96
+ @bn_labler = @options[:anon_base].dup
97
+ @bn_mapper = {}
98
+ @variables = {}
74
99
 
75
100
  if options[:base_uri]
76
- log_info("@uri") { base_uri.inspect}
77
- namespace(nil, uri("#{base_uri}#"))
101
+ progress("base_uri") { base_uri.inspect}
102
+ namespace(nil, iri(base_uri.to_s.match?(%r{[#/]$}) ? base_uri : "#{base_uri}#"))
78
103
  end
79
104
 
80
105
  # Prepopulate operator namespaces unless validating
81
106
  unless validate?
82
- namespace(:crypto, RDF::N3::Crypto)
83
- namespace(:list, RDF::N3::List)
84
- namespace(:log, RDF::N3::Log)
85
- namespace(:math, RDF::N3::Math)
86
- namespace(:rei, RDF::N3::Rei)
87
- #namespace(:string, RDF::N3::String)
88
- namespace(:time, RDF::N3::Time)
107
+ namespace(:rdf, RDF.to_uri)
108
+ namespace(:rdfs, RDF::RDFS.to_uri)
109
+ namespace(:xsd, RDF::XSD.to_uri)
110
+ namespace(:crypto, RDF::N3::Crypto.to_uri)
111
+ namespace(:list, RDF::N3::List.to_uri)
112
+ namespace(:log, RDF::N3::Log.to_uri)
113
+ namespace(:math, RDF::N3::Math.to_uri)
114
+ namespace(:rei, RDF::N3::Rei.to_uri)
115
+ #namespace(:string, RDF::N3::String.to_uri)
116
+ namespace(:time, RDF::N3::Time.to_uri)
89
117
  end
90
- log_info("validate") {validate?.inspect}
91
- log_info("canonicalize") {canonicalize?.inspect}
92
- log_info("intern") {intern?.inspect}
118
+ progress("validate") {validate?.inspect}
119
+ progress("canonicalize") {canonicalize?.inspect}
120
+
121
+ @lexer = EBNF::LL1::Lexer.new(input, self.class.patterns, **@options)
93
122
 
94
123
  if block_given?
95
124
  case block.arity
@@ -111,9 +140,16 @@ module RDF::N3
111
140
  # @return [void]
112
141
  def each_statement(&block)
113
142
  if block_given?
143
+ log_recover
114
144
  @callback = block
115
145
 
116
- parse(START.to_sym)
146
+ begin
147
+ while (@lexer.first rescue true)
148
+ read_n3Doc
149
+ end
150
+ rescue EBNF::LL1::Lexer::Error, SyntaxError, EOFError, Recovery
151
+ # Terminate loop if EOF found while recovering
152
+ end
117
153
 
118
154
  if validate? && log_statistics[:error]
119
155
  raise RDF::ReaderError, "Errors found during processing"
@@ -140,526 +176,619 @@ module RDF::N3
140
176
  end
141
177
 
142
178
  protected
143
- # Start of production
144
- def onStart(prod)
145
- handler = "#{prod}Start".to_sym
146
- log_info("#{handler}(#{respond_to?(handler, true)})", prod, depth: depth)
147
- @productions << prod
148
- send(handler, prod) if respond_to?(handler, true)
149
-
150
- end
151
-
152
- # End of production
153
- def onFinish
154
- prod = @productions.pop()
155
- handler = "#{prod}Finish".to_sym
156
- log_info("#{handler}(#{respond_to?(handler, true)})", depth: depth) {"#{prod}: #{@prod_data.last.inspect}"}
157
- send(handler) if respond_to?(handler, true)
158
- end
159
-
160
- # Process of a token
161
- def onToken(prod, tok)
162
- unless @productions.empty?
163
- parentProd = @productions.last
164
- handler = "#{parentProd}Token".to_sym
165
- log_info("#{handler}(#{respond_to?(handler, true)})", depth: depth) {"#{prod}, #{tok}: #{@prod_data.last.inspect}"}
166
- send(handler, prod, tok) if respond_to?(handler, true)
167
- else
168
- error("Token has no parent production")
169
- end
170
- end
171
179
 
172
- def booleanToken(prod, tok)
173
- lit = RDF::Literal.new(tok.delete("@"), datatype: RDF::XSD.boolean, validate: validate?, canonicalize: canonicalize?)
174
- add_prod_data(:literal, lit)
180
+ # Terminals passed to lexer. Order matters!
181
+
182
+ # @!parse none
183
+ terminal(:ANON, ANON)
184
+ terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL)
185
+ terminal(:IRIREF, IRIREF, unescape: true)
186
+ terminal(:DOUBLE, DOUBLE)
187
+ terminal(:DECIMAL, DECIMAL)
188
+ terminal(:INTEGER, INTEGER)
189
+ terminal(:PNAME_LN, PNAME_LN, unescape: true)
190
+ terminal(:PNAME_NS, PNAME_NS)
191
+ terminal(:STRING_LITERAL_LONG_SINGLE_QUOTE, STRING_LITERAL_LONG_SINGLE_QUOTE, unescape: true, partial_regexp: /^'''/)
192
+ terminal(:STRING_LITERAL_LONG_QUOTE, STRING_LITERAL_LONG_QUOTE, unescape: true, partial_regexp: /^"""/)
193
+ terminal(:STRING_LITERAL_QUOTE, STRING_LITERAL_QUOTE, unescape: true)
194
+ terminal(:STRING_LITERAL_SINGLE_QUOTE, STRING_LITERAL_SINGLE_QUOTE, unescape: true)
195
+
196
+ # String terminals
197
+ terminal(nil, %r(
198
+ [\(\){},.;\[\]a!]
199
+ | \^\^|\^
200
+ |<-|<=|=>|=
201
+ | true|false
202
+ | has|is|of
203
+ |@forAll|@forSome
204
+ )x)
205
+
206
+ terminal(:PREFIX, PREFIX)
207
+ terminal(:BASE, BASE)
208
+ terminal(:LANGTAG, LANGTAG)
209
+ terminal(:QUICK_VAR_NAME, QUICK_VAR_NAME, unescape: true)
210
+
211
+ private
212
+ ##
213
+ # Read statements and directives
214
+ #
215
+ # [1] n3Doc ::= (n3Statement '.' | sparqlDirective)*
216
+ #
217
+ # @return [void]
218
+ def read_n3Doc
219
+ prod(:n3Doc, %w{.}) do
220
+ error("read_n3Doc", "Unexpected end of file") unless token = @lexer.first
221
+ case token.type
222
+ when :BASE, :PREFIX
223
+ read_directive || error("Failed to parse directive", production: :directive, token: token)
224
+ else
225
+ read_n3Statement
226
+ if !log_recovering? || @lexer.first === '.'
227
+ # If recovering, we will have eaten the closing '.'
228
+ token = @lexer.shift
229
+ unless token && token.value == '.'
230
+ error("Expected '.' following n3Statement", production: :n3Statement, token: token)
231
+ end
232
+ end
233
+ end
234
+ end
175
235
  end
176
236
 
177
- def declarationStart(prod)
178
- @prod_data << {}
179
- end
180
237
 
181
- def declarationToken(prod, tok)
182
- case prod
183
- when "@prefix", "@base", "@keywords"
184
- add_prod_data(:prod, prod)
185
- when "prefix"
186
- add_prod_data(:prefix, tok[0..-2])
187
- when "explicituri"
188
- add_prod_data(:explicituri, tok[1..-2])
189
- else
190
- add_prod_data(prod.to_sym, tok)
238
+ ##
239
+ # Read statements and directives
240
+ #
241
+ # [2] n3Statement ::= n3Directive | triples | existential | universal
242
+ #
243
+ # @return [void]
244
+ def read_n3Statement
245
+ prod(:n3Statement, %w{.}) do
246
+ error("read_n3Doc", "Unexpected end of file") unless token = @lexer.first
247
+ read_uniext ||
248
+ read_triples ||
249
+ error("Expected token", production: :statement, token: token)
191
250
  end
192
251
  end
193
252
 
194
- def declarationFinish
195
- decl = @prod_data.pop
196
- case decl[:prod]
197
- when "@prefix"
198
- uri = process_uri(decl[:explicituri])
199
- namespace(decl[:prefix], uri)
200
- when "@base"
201
- # Base, set or update document URI
202
- uri = decl[:explicituri]
203
- options[:base_uri] = process_uri(uri)
204
-
205
- # The empty prefix "" is by default , bound to "#" -- the local namespace of the file.
206
- # The parser behaves as though there were a
207
- # @prefix : <#>.
208
- # just before the file.
209
- # This means that <#foo> can be written :foo and using @keywords one can reduce that to foo.
210
-
211
- namespace(nil, uri.match(/[\/\#]$/) ? base_uri : process_uri("#{uri}#"))
212
- log_debug("declarationFinish[@base]", depth: depth) {"@base=#{base_uri}"}
213
- when "@keywords"
214
- log_debug("declarationFinish[@keywords]", depth: depth) {@keywords.inspect}
215
- # Keywords are handled in tokenizer and maintained in @keywords array
216
- if (@keywords & N3_KEYWORDS) != @keywords
217
- error("Undefined keywords used: #{(@keywords - N3_KEYWORDS).to_sentence}") if validate?
253
+ ##
254
+ # Read base and prefix directives
255
+ #
256
+ # [3] n3Directive ::= prefixID | base
257
+ #
258
+ # @return [void]
259
+ def read_directive
260
+ prod(:directive, %w{.}) do
261
+ token = @lexer.first
262
+ case token.type
263
+ when :BASE
264
+ prod(:base) do
265
+ @lexer.shift
266
+ terminated = token.value == '@base'
267
+ iri = @lexer.shift
268
+ error("Expected IRIREF", production: :base, token: iri) unless iri === :IRIREF
269
+ @options[:base_uri] = process_iri(iri.value[1..-2].gsub(/\s/, ''))
270
+ namespace(nil, base_uri.to_s.end_with?('#') ? base_uri : iri("#{base_uri}#"))
271
+ error("base", "#{token} should be downcased") if token.value.start_with?('@') && token.value != '@base'
272
+
273
+ if terminated
274
+ error("base", "Expected #{token} to be terminated") unless @lexer.first === '.'
275
+ @lexer.shift
276
+ elsif @lexer.first === '.'
277
+ error("base", "Expected #{token} not to be terminated")
278
+ else
279
+ true
280
+ end
281
+ end
282
+ when :PREFIX
283
+ prod(:prefixID, %w{.}) do
284
+ @lexer.shift
285
+ pfx, iri = @lexer.shift, @lexer.shift
286
+ terminated = token.value == '@prefix'
287
+ error("Expected PNAME_NS", production: :prefix, token: pfx) unless pfx === :PNAME_NS
288
+ error("Expected IRIREF", production: :prefix, token: iri) unless iri === :IRIREF
289
+ debug("prefixID", depth: options[:depth]) {"Defined prefix #{pfx.inspect} mapping to #{iri.inspect}"}
290
+ namespace(pfx.value[0..-2], process_iri(iri.value[1..-2].gsub(/\s/, '')))
291
+ error("prefixId", "#{token} should be downcased") if token.value.start_with?('@') && token.value != '@prefix'
292
+
293
+ if terminated
294
+ error("prefixID", "Expected #{token} to be terminated") unless @lexer.first === '.'
295
+ @lexer.shift
296
+ elsif @lexer.first === '.'
297
+ error("prefixID", "Expected #{token} not to be terminated")
298
+ else
299
+ true
300
+ end
301
+ end
218
302
  end
219
- @userkeys = true
220
- else
221
- error("declarationFinish: FIXME #{decl.inspect}")
222
303
  end
223
304
  end
224
305
 
225
- # Document start, instantiate
226
- def documentStart(prod)
227
- @formulae.push(nil)
228
- @prod_data << {}
229
- end
230
-
231
- def dtlangToken(prod, tok)
232
- add_prod_data(:langcode, tok) if prod == "langcode"
233
- end
234
-
235
- def existentialStart(prod)
236
- @prod_data << {}
237
- end
238
-
239
- # Apart from the set of statements, a formula also has a set of URIs of symbols which are universally quantified,
240
- # and a set of URIs of symbols which are existentially quantified.
241
- # Variables are then in general symbols which have been quantified.
306
+ ##
307
+ # Read triples
242
308
  #
243
- # Here we allocate a variable (making up a name) and record with the defining formula. Quantification is done
244
- # when the formula is completed against all in-scope variables
245
- def existentialFinish
246
- pd = @prod_data.pop
247
- forSome = Array(pd[:symbol])
248
- forSome.each do |term|
249
- var = univar(term, existential: true)
250
- add_var_to_formula(@formulae.last, term, var)
309
+ # [9] triples ::= subject predicateObjectList?
310
+ #
311
+ # @return [Object] returns the last IRI matched, or subject BNode on predicateObjectList?
312
+ def read_triples
313
+ prod(:triples, %w{.}) do
314
+ error("read_triples", "Unexpected end of file") unless token = @lexer.first
315
+ subject = case token.type || token.value
316
+ when '['
317
+ # blankNodePropertyList predicateObjectList?
318
+ read_blankNodePropertyList || error("Failed to parse blankNodePropertyList", production: :triples, token: @lexer.first)
319
+ else
320
+ # subject predicateObjectList
321
+ read_path || error("Failed to parse subject", production: :triples, token: @lexer.first)
322
+ end
323
+ read_predicateObjectList(subject) || subject
251
324
  end
252
325
  end
253
326
 
254
- def expressionStart(prod)
255
- @prod_data << {}
327
+ ##
328
+ # Read predicateObjectList
329
+ #
330
+ # [10] predicateObjectList ::= verb objectList (';' (verb objectList)?)*
331
+ #
332
+ # @param [RDF::Resource] subject
333
+ # @return [RDF::URI] the last matched verb
334
+ def read_predicateObjectList(subject)
335
+ return if @lexer.first.nil? || %w(. }).include?(@lexer.first.value)
336
+ prod(:predicateObjectList, %{;}) do
337
+ last_verb = nil
338
+ loop do
339
+ verb, invert = read_verb
340
+ break unless verb
341
+ last_verb = verb
342
+ prod(:_predicateObjectList_2) do
343
+ read_objectList(subject, verb, invert) || error("Expected objectList", production: :predicateObjectList, token: @lexer.first)
344
+ end
345
+ break unless @lexer.first === ';'
346
+ @lexer.shift while @lexer.first === ';'
347
+ end
348
+ last_verb
349
+ end
256
350
  end
257
351
 
258
- # Process path items, and push on the last object for parent processing
259
- def expressionFinish
260
- expression = @prod_data.pop
261
-
262
- # If we're in teh middle of a pathtail, append
263
- if @prod_data.last[:pathtail] && expression[:pathitem] && expression[:pathtail]
264
- path_list = [expression[:pathitem]] + expression[:pathtail]
265
- log_debug("expressionFinish(pathtail)", depth: depth) {"set pathtail to #{path_list.inspect}"}
266
- @prod_data.last[:pathtail] = path_list
352
+ ##
353
+ # Read objectList
354
+ #
355
+ # [11] objectList ::= object (',' object)*
356
+ #
357
+ # @return [RDF::Term] the last matched subject
358
+ def read_objectList(subject, predicate, invert)
359
+ prod(:objectList, %{,}) do
360
+ last_object = nil
361
+ while object = prod(:_objectList_2) {read_path}
362
+ last_object = object
363
+
364
+ if invert
365
+ add_statement(:objectList, object, predicate, subject)
366
+ else
367
+ add_statement(:objectList, subject, predicate, object)
368
+ end
267
369
 
268
- dir_list = [expression[:direction]] if expression[:direction]
269
- dir_list += expression[:directiontail] if expression[:directiontail]
270
- @prod_data.last[:directiontail] = dir_list if dir_list
271
- elsif expression[:pathitem] && expression[:pathtail]
272
- add_prod_data(:expression, process_path(expression))
273
- elsif expression[:pathitem]
274
- add_prod_data(:expression, expression[:pathitem])
275
- else
276
- error("expressionFinish: FIXME #{expression.inspect}")
370
+ break unless @lexer.first === ','
371
+ @lexer.shift while @lexer.first === ','
372
+ end
373
+ last_object
277
374
  end
278
375
  end
279
376
 
280
- def literalStart(prod)
281
- @prod_data << {}
282
- end
283
-
284
- def literalToken(prod, tok)
285
- tok = tok[0, 3] == '"""' ? tok[3..-4] : tok[1..-2]
286
- add_prod_data(:string, tok)
377
+ ##
378
+ # Read a verb
379
+ #
380
+ # [12] verb = predicate
381
+ # | 'a'
382
+ # | 'has' expression
383
+ # | 'is' expression 'of'
384
+ # | '<-' expression
385
+ # | '<='
386
+ # | '=>'
387
+ # | '='
388
+ #
389
+ # @return [RDF::Resource, Boolean] verb and invert?
390
+ def read_verb
391
+ invert = false
392
+ error("read_verb", "Unexpected end of file") unless token = @lexer.first
393
+ verb = case token.type || token.value
394
+ when 'a' then prod(:verb) {@lexer.shift && RDF.type}
395
+ when 'has' then prod(:verb) {@lexer.shift && read_path}
396
+ when 'is' then prod(:verb) {
397
+ @lexer.shift
398
+ invert, v = true, read_path
399
+ error( "Expected 'of'", production: :verb, token: @lexer.first) unless @lexer.first.value == 'of'
400
+ @lexer.shift
401
+ v
402
+ }
403
+ when '<-' then prod(:verb) {
404
+ @lexer.shift
405
+ invert = true
406
+ read_path
407
+ }
408
+ when '<=' then prod(:verb) {
409
+ @lexer.shift
410
+ invert = true
411
+ RDF::N3::Log.implies
412
+ }
413
+ when '=>' then prod(:verb) {@lexer.shift && RDF::N3::Log.implies}
414
+ when '=' then prod(:verb) {@lexer.shift && RDF::OWL.sameAs}
415
+ else read_path
416
+ end
417
+ [verb, invert]
287
418
  end
288
419
 
289
- def literalFinish
290
- lit = @prod_data.pop
291
- content = RDF::NTriples.unescape(lit[:string])
292
- language = lit[:langcode] if lit[:langcode]
293
- language = language.downcase if language && canonicalize?
294
- datatype = lit[:symbol]
295
-
296
- lit = RDF::Literal.new(content, language: language, datatype: datatype, validate: validate?, canonicalize: canonicalize?)
297
- add_prod_data(:literal, lit)
298
- end
420
+ ##
421
+ # subjects, predicates and objects are all expressions, which are all paths
422
+ #
423
+ # [13] subject ::= expression
424
+ # [14] predicate ::= expression
425
+ # [16] expression ::= path
426
+ # [17] path ::= pathItem ('!' path | '^' path)?
427
+ #
428
+ # @return [RDF::Resource]
429
+ def read_path
430
+ return if @lexer.first.nil? || %w/. } ) ]/.include?(@lexer.first.value)
431
+ prod(:path) do
432
+ pathtail = path = {}
433
+ loop do
434
+ pathtail[:pathitem] = prod(:pathItem) do
435
+ read_iri ||
436
+ read_blankNode ||
437
+ read_quickVar ||
438
+ read_collection ||
439
+ read_blankNodePropertyList ||
440
+ read_literal ||
441
+ read_formula
442
+ end
299
443
 
300
- def objectStart(prod)
301
- @prod_data << {}
302
- end
444
+ break if @lexer.first.nil? || !%w(! ^).include?(@lexer.first.value)
445
+ prod(:_path_2) do
446
+ pathtail[:direction] = @lexer.shift.value == '!' ? :forward : :reverse
447
+ pathtail = pathtail[:pathtail] = {}
448
+ end
449
+ end
303
450
 
304
- def objectFinish
305
- object = @prod_data.pop
306
- if object[:expression]
307
- add_prod_data(:object, object[:expression])
308
- else
309
- error("objectFinish: FIXME #{object.inspect}")
451
+ # Returns the first object in the path
452
+ # FIXME: what if it's a verb?
453
+ process_path(path)
310
454
  end
311
455
  end
312
456
 
313
- def pathitemStart(prod)
314
- @prod_data << {}
315
- end
316
-
317
- def pathitemToken(prod, tok)
318
- case prod
319
- when "numericliteral"
320
- nl = RDF::NTriples.unescape(tok)
321
- datatype = case nl
322
- when /e/i then RDF::XSD.double
323
- when /\./ then RDF::XSD.decimal
324
- else RDF::XSD.integer
457
+ ##
458
+ # Read a literal
459
+ #
460
+ # [19] literal ::= rdfLiteral | numericLiteral | BOOLEAN_LITERAL
461
+ #
462
+ # @return [RDF::Literal]
463
+ def read_literal
464
+ error("Unexpected end of file", production: :literal) unless token = @lexer.first
465
+ case token.type || token.value
466
+ when :INTEGER then prod(:literal) {literal(@lexer.shift.value, datatype: RDF::XSD.integer, canonicalize: canonicalize?)}
467
+ when :DECIMAL
468
+ prod(:literal) do
469
+ value = @lexer.shift.value
470
+ value = "0#{value}" if value.start_with?(".")
471
+ literal(value, datatype: RDF::XSD.decimal, canonicalize: canonicalize?)
325
472
  end
326
-
327
- lit = RDF::Literal.new(nl, datatype: datatype, validate: validate?, canonicalize: canonicalize?)
328
- add_prod_data(:literal, lit)
329
- when "quickvariable"
330
- # There is a also a shorthand syntax ?x which is the same as :x except that it implies that x is
331
- # universally quantified not in the formula but in its parent formula
332
- uri = process_qname(tok.sub('?', ':'))
333
- var = uri.variable? ? uri : univar(uri)
334
- add_var_to_formula(@formulae[-2], uri, var)
335
- # Also add var to this formula
336
- add_var_to_formula(@formulae.last, uri, var)
337
-
338
- add_prod_data(:symbol, var)
339
- when "boolean"
340
- lit = RDF::Literal.new(tok.delete("@"), datatype: RDF::XSD.boolean, validate: validate?, canonicalize: canonicalize?)
341
- add_prod_data(:literal, lit)
342
- when "[", "("
343
- # Push on state for content of blank node
344
- @prod_data << {}
345
- when "]", ")"
346
- # Construct
347
- symbol = process_anonnode(@prod_data.pop)
348
- add_prod_data(:symbol, symbol)
349
- when "{"
350
- # A new formula, push on a node as a named graph
351
- node = RDF::Node.new(".form_#{unique_label}")
352
- @formulae << node
353
- @formulae_nodes[node] = true
354
-
355
- # Promote variables defined on the earlier formula to this formula
356
- @variables[node] = {}
357
- @variables[@formulae[-2]].each do |name, var|
358
- @variables[node][name] = var
473
+ when :DOUBLE then prod(:literal) {literal(@lexer.shift.value.sub(/\.([eE])/, '.0\1'), datatype: RDF::XSD.double, canonicalize: canonicalize?)}
474
+ when "true", "false" then prod(:literal) {literal(@lexer.shift.value, datatype: RDF::XSD.boolean, canonicalize: canonicalize?)}
475
+ when :STRING_LITERAL_QUOTE, :STRING_LITERAL_SINGLE_QUOTE
476
+ prod(:literal) do
477
+ value = @lexer.shift.value[1..-2]
478
+ error("read_literal", "Unexpected end of file") unless token = @lexer.first
479
+ case token.type || token.value
480
+ when :LANGTAG
481
+ literal(value, language: @lexer.shift.value[1..-1].to_sym)
482
+ when '^^'
483
+ @lexer.shift
484
+ literal(value, datatype: read_iri)
485
+ else
486
+ literal(value)
487
+ end
488
+ end
489
+ when :STRING_LITERAL_LONG_QUOTE, :STRING_LITERAL_LONG_SINGLE_QUOTE
490
+ prod(:literal) do
491
+ value = @lexer.shift.value[3..-4]
492
+ error("read_literal", "Unexpected end of file") unless token = @lexer.first
493
+ case token.type || token.value
494
+ when :LANGTAG
495
+ literal(value, language: @lexer.shift.value[1..-1].to_sym)
496
+ when '^^'
497
+ @lexer.shift
498
+ literal(value, datatype: read_iri)
499
+ else
500
+ literal(value)
501
+ end
359
502
  end
360
- when "}"
361
- # Pop off the formula
362
- formula = @formulae.pop
363
- add_prod_data(:symbol, formula)
364
- else
365
- error("pathitemToken(#{prod}, #{tok}): FIXME")
366
503
  end
367
504
  end
368
505
 
369
- def pathitemFinish
370
- pathitem = @prod_data.pop
371
- if pathitem[:pathlist]
372
- error("pathitemFinish(pathlist): FIXME #{pathitem.inspect}")
373
- elsif pathitem[:propertylist]
374
- error("pathitemFinish(propertylist): FIXME #{pathitem.inspect}")
375
- elsif pathitem[:symbol] || pathitem[:literal]
376
- add_prod_data(:pathitem, pathitem[:symbol] || pathitem[:literal])
377
- else
378
- error("pathitemFinish: FIXME #{pathitem.inspect}")
506
+ ##
507
+ # Read a blankNodePropertyList
508
+ #
509
+ # [20] blankNodePropertyList ::= '[' predicateObjectList ']'
510
+ #
511
+ # @return [RDF::Node]
512
+ def read_blankNodePropertyList
513
+ token = @lexer.first
514
+ if token === '['
515
+ prod(:blankNodePropertyList, %{]}) do
516
+ @lexer.shift
517
+ progress("blankNodePropertyList", depth: options[:depth], token: token)
518
+ node = bnode
519
+ debug("blankNodePropertyList: subject", depth: options[:depth]) {node.to_sxp}
520
+ read_predicateObjectList(node)
521
+ error("blankNodePropertyList", "Expected closing ']'") unless @lexer.first === ']'
522
+ @lexer.shift
523
+ node
524
+ end
379
525
  end
380
526
  end
381
527
 
382
- def pathlistStart(prod)
383
- @prod_data << {pathlist: []}
384
- end
385
-
386
- def pathlistFinish
387
- pathlist = @prod_data.pop
388
- # Flatten propertylist into an array
389
- expr = @prod_data.last.delete(:expression)
390
- add_prod_data(:pathlist, expr) if expr
391
- add_prod_data(:pathlist, pathlist[:pathlist]) if pathlist[:pathlist]
392
- end
393
-
394
- def pathtailStart(prod)
395
- @prod_data << {pathtail: []}
396
- end
397
-
398
- def pathtailToken(prod, tok)
399
- case tok
400
- when "!", "."
401
- add_prod_data(:direction, :forward)
402
- when "^"
403
- add_prod_data(:direction, :reverse)
528
+ ##
529
+ # Read a collection (`RDF::List`)
530
+ #
531
+ # [21] collection ::= '(' object* ')'
532
+ #
533
+ # If the `list_terms` option is given, the resulting resource is a list, otherwise, it is the list subject, and the first/rest entries are also emitted.
534
+ # @return [RDF::Node]
535
+ def read_collection
536
+ if @lexer.first === '('
537
+ prod(:collection, %{)}) do
538
+ @lexer.shift
539
+ token = @lexer.first
540
+ progress("collection", depth: options[:depth]) {"token: #{token.inspect}"}
541
+ objects = []
542
+ while @lexer.first.value != ')' && (object = read_path)
543
+ objects << object
544
+ end
545
+ error("collection", "Expected closing ')'") unless @lexer.first === ')'
546
+ @lexer.shift
547
+ list = RDF::N3::List.new(values: objects)
548
+ if options[:list_terms]
549
+ list
550
+ else
551
+ list.each_statement do |statement|
552
+ add_statement("collection", *statement.to_a)
553
+ end
554
+ list.subject
555
+ end
556
+ end
404
557
  end
405
558
  end
406
559
 
407
- def pathtailFinish
408
- pathtail = @prod_data.pop
409
- add_prod_data(:pathtail, pathtail[:pathtail])
410
- add_prod_data(:direction, pathtail[:direction]) if pathtail[:direction]
411
- add_prod_data(:directiontail, pathtail[:directiontail]) if pathtail[:directiontail]
412
- end
560
+ ##
561
+ # Read a formula
562
+ #
563
+ # [22] formula ::= '{' formulaContent? '}'
564
+ # [23] formulaContent ::= n3Statement ('.' formulaContent?)?
565
+ #
566
+ # @return [RDF::Node]
567
+ def read_formula
568
+ if @lexer.first === '{'
569
+ prod(:formula, %(})) do
570
+ @lexer.shift
571
+ node = RDF::Node.intern("_form_#{unique_label}")
572
+ formulae.push(node)
573
+ formula_nodes[node] = true
574
+ debug(:formula, depth: @options[:depth]) {"id: #{node}, depth: #{formulae.length}"}
575
+
576
+ # Promote variables defined on the earlier formula to this formula
577
+ variables[node] = {}
578
+ variables.fetch(formulae[-2], {}).each do |name, var|
579
+ variables[node][name] = var
580
+ end
413
581
 
414
- def propertylistStart(prod)
415
- @prod_data << {}
416
- end
582
+ read_formulaContent
417
583
 
418
- def propertylistFinish
419
- propertylist = @prod_data.pop
420
- # Flatten propertylist into an array
421
- ary = [propertylist, propertylist.delete(:propertylist)].flatten.compact
422
- @prod_data.last[:propertylist] = ary
423
- end
584
+ # Pop off the formula
585
+ # Result is the BNode associated with the formula
586
+ debug(:formula, depth: @options[:depth]) {"pop: #{formulae.last}, depth: #{formulae.length}"}
587
+ error("collection", "Expected closing '}'") unless @lexer.shift === '}'
424
588
 
425
- def simpleStatementStart(prod)
426
- @prod_data << {}
589
+ formulae.pop
590
+ end
591
+ end
427
592
  end
428
593
 
429
- # Completion of Simple Statement, all productions include :subject, and :propertyList
430
- def simpleStatementFinish
431
- statement = @prod_data.pop
432
-
433
- subject = statement[:subject]
434
- properties = Array(statement[:propertylist])
435
- properties.each do |p|
436
- predicate = p[:verb]
437
- next unless predicate
438
- log_debug("simpleStatementFinish(pred)", depth: depth) {predicate.to_s}
439
- error(%(Illegal statment: "#{predicate}" missing object)) unless p.has_key?(:object)
440
- objects = Array(p[:object])
441
- objects.each do |object|
442
- if p[:invert]
443
- add_statement("simpleStatementFinish", object, predicate, subject)
594
+ ##
595
+ # Read formula content, similaer to n3Statement
596
+ #
597
+ # [23] formulaContent ::= n3Statement ('.' formulaContent?)?
598
+ #
599
+ # @return [void]
600
+ def read_formulaContent
601
+ return if @lexer.first === '}' # Allow empty formula
602
+ prod(:formulaContent, %w(. })) do
603
+ loop do
604
+ token = @lexer.first
605
+ error("read_formulaContent", "Unexpected end of file") unless token
606
+ case token.type
607
+ when :BASE, :PREFIX
608
+ read_directive || error("Failed to parse directive", production: :directive, token: token)
609
+ break if @lexer.first === '}'
444
610
  else
445
- add_statement("simpleStatementFinish", subject, predicate, object)
611
+ read_n3Statement
612
+ token = @lexer.first
613
+ case token.value
614
+ when '.'
615
+ @lexer.shift
616
+ # '.' optional at end of formulaContent
617
+ break if @lexer.first === '}'
618
+ when '}'
619
+ break
620
+ else
621
+ error("Expected '.' or '}' following n3Statement", production: :formulaContent, token: token)
622
+ end
446
623
  end
447
624
  end
448
625
  end
449
626
  end
450
627
 
451
- def subjectStart(prod)
452
- @prod_data << {}
628
+ ##
629
+ # Read an IRI
630
+ #
631
+ # (rule iri "26" (alt IRIREF prefixedName))
632
+ #
633
+ # @return [RDF::URI]
634
+ def read_iri
635
+ token = @lexer.first
636
+ case token && token.type
637
+ when :IRIREF then prod(:iri) {process_iri(@lexer.shift.value[1..-2].gsub(/\s+/m, ''))}
638
+ when :PNAME_LN, :PNAME_NS then prod(:prefixedName) {process_pname(*@lexer.shift.value)}
639
+ end
453
640
  end
454
641
 
455
- def subjectFinish
456
- subject = @prod_data.pop
457
-
458
- if subject[:expression]
459
- add_prod_data(:subject, subject[:expression])
460
- else
461
- error("unknown expression type")
642
+ ##
643
+ # Read a blank node
644
+ #
645
+ # [29] blankNode ::= BLANK_NODE_LABEL | ANON
646
+ #
647
+ # @return [RDF::Node]
648
+ def read_blankNode
649
+ token = @lexer.first
650
+ case token && token.type
651
+ when :BLANK_NODE_LABEL then prod(:blankNode) {bnode(@lexer.shift.value[2..-1])}
652
+ when :ANON then @lexer.shift && prod(:blankNode) {bnode}
462
653
  end
463
654
  end
464
655
 
465
- def symbolToken(prod, tok)
466
- term = case prod
467
- when 'explicituri'
468
- process_uri(tok[1..-2])
469
- when 'qname'
470
- process_qname(tok)
471
- else
472
- error("symbolToken(#{prod}, #{tok}): FIXME #{term.inspect}")
656
+ ##
657
+ # Read a quickVar, having global scope.
658
+ #
659
+ # [30] quickVar ::= QUICK_VAR_NAME
660
+ #
661
+ # @return [RDF::Query::Variable]
662
+ def read_quickVar
663
+ if @lexer.first.type == :QUICK_VAR_NAME
664
+ prod(:quickVar) do
665
+ token = @lexer.shift
666
+ value = token.value.sub('?', '')
667
+ iri = ns(nil, "#{value}_quick")
668
+ variables[nil][iri] ||= univar(iri, scope: nil)
669
+ end
473
670
  end
474
-
475
- add_prod_data(:symbol, term)
476
671
  end
477
672
 
478
- def universalStart(prod)
479
- @prod_data << {}
673
+ ##
674
+ # Read a list of IRIs
675
+ #
676
+ # [27] iriList ::= iri ( ',' iri )*
677
+ #
678
+ # @return [Array<RDF::URI>] the list of IRIs
679
+ def read_irilist
680
+ iris = []
681
+ prod(:iriList, %{,}) do
682
+ while iri = read_iri
683
+ iris << iri
684
+ break unless @lexer.first === ','
685
+ @lexer.shift while @lexer.first === ','
686
+ end
687
+ end
688
+ iris
480
689
  end
481
690
 
691
+ ##
692
+ # Read a univeral or existential
693
+ #
482
694
  # Apart from the set of statements, a formula also has a set of URIs of symbols which are universally quantified,
483
695
  # and a set of URIs of symbols which are existentially quantified.
484
696
  # Variables are then in general symbols which have been quantified.
485
697
  #
486
698
  # Here we allocate a variable (making up a name) and record with the defining formula. Quantification is done
487
699
  # when the formula is completed against all in-scope variables
488
- def universalFinish
489
- pd = @prod_data.pop
490
- forAll = Array(pd[:symbol])
491
- forAll.each do |term|
492
- add_var_to_formula(@formulae.last, term, univar(term))
493
- end
494
- end
495
-
496
- def verbStart(prod)
497
- @prod_data << {}
498
- end
499
-
500
- def verbToken(prod, tok)
501
- term = case prod
502
- when '<='
503
- add_prod_data(:expression, RDF::N3::Log.implies)
504
- add_prod_data(:invert, true)
505
- when '=>'
506
- add_prod_data(:expression, RDF::N3::Log.implies)
507
- when '='
508
- add_prod_data(:expression, RDF::OWL.sameAs)
509
- when '@a'
510
- add_prod_data(:expression, RDF.type)
511
- when '@has', "@of"
512
- # Syntactic sugar
513
- when '@is'
514
- add_prod_data(:invert, true)
515
- else
516
- error("verbToken(#{prod}, #{tok}): FIXME #{term.inspect}")
517
- end
518
-
519
- add_prod_data(:symbol, term)
520
- end
521
-
522
- def verbFinish
523
- verb = @prod_data.pop
524
- if verb[:expression]
525
- error("Literal may not be used as a predicate") if verb[:expression].is_a?(RDF::Literal)
526
- error("Formula may not be used as a peredicate") if @formulae_nodes.has_key?(verb[:expression])
527
- add_prod_data(:verb, verb[:expression])
528
- add_prod_data(:invert, true) if verb[:invert]
529
- else
530
- error("verbFinish: FIXME #{verb.inspect}")
700
+ #
701
+ # [31] existential ::= '@forSome' iriList
702
+ # [32] universal ::= '@forAll' iriList
703
+ #
704
+ # @return [void]
705
+ def read_uniext
706
+ if %w(@forSome @forAll).include?(@lexer.first.value)
707
+ token = @lexer.shift
708
+ prod(token === '@forAll' ? :universal : :existential) do
709
+ iri_list = read_irilist
710
+ iri_list.each do |iri|
711
+ # Note, this might re-create an equivalent variable already defined in this formula, and replaces an equivalent variable that may have been defined in the parent formula.
712
+ var = univar(iri, scope: formulae.last, existential: token === '@forSome')
713
+ add_var_to_formula(formulae.last, iri, var)
714
+ end
715
+ end
531
716
  end
532
717
  end
533
718
 
534
- private
535
-
536
719
  ###################
537
720
  # Utility Functions
538
721
  ###################
539
722
 
540
- def process_anonnode(anonnode)
541
- log_debug("process_anonnode", depth: depth) {anonnode.inspect}
542
-
543
- if anonnode[:propertylist]
544
- properties = anonnode[:propertylist]
545
- bnode = bnode()
546
- properties.each do |p|
547
- predicate = p[:verb]
548
- log_debug("process_anonnode(verb)", depth: depth) {predicate.inspect}
549
- objects = Array(p[:object])
550
- objects.each do |object|
551
- if p[:invert]
552
- add_statement("anonnode", object, predicate, bnode)
553
- else
554
- add_statement("anonnode", bnode, predicate, object)
555
- end
556
- end
557
- end
558
- bnode
559
- elsif anonnode[:pathlist]
560
- objects = Array(anonnode[:pathlist])
561
- list = RDF::List[*objects]
562
- list_subjects = {}
563
- list.each_statement do |statement|
564
- next if statement.predicate == RDF.type && statement.object == RDF.List
565
- add_statement("anonnode(list)", statement.subject, statement.predicate, statement.object)
566
- end
567
- list.subject
568
- end
569
- end
570
-
571
723
  # Process a path, such as:
572
- # :a.:b means [is :b of :a] Deprecated
573
724
  # :a!:b means [is :b of :a] => :a :b []
574
725
  # :a^:b means [:b :a] => [] :b :a
575
726
  #
576
727
  # Create triple and return property used for next iteration
577
- def process_path(expression)
578
- log_debug("process_path", depth: depth) {expression.inspect}
579
-
580
- pathitem = expression[:pathitem]
581
- pathtail = expression[:pathtail]
582
-
583
- direction_list = [expression[:direction], expression[:directiontail]].flatten.compact
728
+ #
729
+ # Result is last created bnode
730
+ def process_path(path)
731
+ pathitem, direction, pathtail = path[:pathitem], path[:direction], path[:pathtail]
732
+ debug("process_path", depth: @options[:depth]) {path.inspect}
584
733
 
585
- pathtail.each do |pred|
586
- direction = direction_list.shift
587
- bnode = RDF::Node.new
734
+ while pathtail
735
+ bnode = bnode()
736
+ pred = pathtail.is_a?(RDF::Term) ? pathtail : pathtail[:pathitem]
588
737
  if direction == :reverse
589
738
  add_statement("process_path(reverse)", bnode, pred, pathitem)
590
739
  else
591
740
  add_statement("process_path(forward)", pathitem, pred, bnode)
592
741
  end
593
742
  pathitem = bnode
743
+ direction = pathtail[:direction] if pathtail.is_a?(Hash)
744
+ pathtail = pathtail.is_a?(Hash) && pathtail[:pathtail]
594
745
  end
595
746
  pathitem
596
747
  end
597
748
 
598
- def process_uri(uri)
599
- uri(base_uri, RDF::NTriples.unescape(uri))
600
- end
601
-
602
- def process_qname(tok)
603
- if tok.include?(":")
604
- prefix, name = tok.split(":")
605
- elsif @userkeys
606
- # If the @keywords directive is given, the keywords given will thereafter be recognized
607
- # without a "@" prefix, and anything else is a local name in the default namespace.
608
- prefix, name = "", tok
609
- elsif %w(true false).include?(tok)
610
- # The words true and false are boolean literals.
611
- #
612
- # They were added to Notation3 in 2006-02 in discussion with the SPARQL language developers, the Data
613
- # Access Working Group. Note that no existing documents will have used a naked true or false word, without a
614
- # @keyword statement which would make it clear that they were not to be treated as keywords. Furthermore any
615
- # old parser encountering true or false naked or in a @keywords
616
- return RDF::Literal.new(tok, datatype: RDF::XSD.boolean)
617
- else
618
- error("Set user @keywords to use barenames (#{tok}).")
619
- end
749
+ def process_iri(iri)
750
+ iri(base_uri, iri.to_s)
751
+ end
620
752
 
621
- uri = if prefix(prefix)
622
- log_debug('process_qname(ns)', depth: depth) {"#{prefix(prefix)}, #{name}"}
753
+ def process_pname(value)
754
+ prefix, name = value.split(":", 2)
755
+
756
+ iri = if prefix(prefix)
757
+ #debug('process_pname(ns)', depth: @options[:depth]) {"#{prefix(prefix)}, #{name}"}
623
758
  ns(prefix, name)
624
- elsif prefix == '_'
625
- log_debug('process_qname(bnode)', name, depth: depth)
626
- # If we're in a formula, create a non-distigushed variable instead
627
- # Note from https://www.w3.org/TeamSubmission/n3/#bnodes, it seems the blank nodes are scoped to the formula, not the file.
628
- bnode(name)
629
- else
630
- log_debug('process_qname(default_ns)', name, depth: depth)
631
- namespace(nil, uri("#{base_uri}#")) unless prefix(nil)
759
+ elsif prefix && !prefix.empty?
760
+ error("process_pname", "Use of undefined prefix #{prefix.inspect}")
632
761
  ns(nil, name)
633
- end
634
- log_debug('process_qname', depth: depth) {uri.inspect}
635
- uri
636
- end
637
-
638
- # Add values to production data, values aranged as an array
639
- def add_prod_data(sym, value)
640
- case @prod_data.last[sym]
641
- when nil
642
- @prod_data.last[sym] = value
643
- when Array
644
- @prod_data.last[sym] += Array(value)
645
762
  else
646
- @prod_data.last[sym] = Array(@prod_data.last[sym]) + Array(value)
763
+ ns(nil, name)
647
764
  end
765
+ debug('process_pname', depth: @options[:depth]) {iri.inspect}
766
+ iri
648
767
  end
649
768
 
650
769
  # Keep track of allocated BNodes. Blank nodes are allocated to the formula.
770
+ # Unnnamed bnodes are created using an incrementing labeler for repeatability.
651
771
  def bnode(label = nil)
772
+ form_id = formulae.last ? formulae.last.id : '_bn_ground'
652
773
  if label
653
- value = "#{label}_#{unique_label}"
654
- (@bnodes[@formulae.last] ||= {})[label.to_s] ||= RDF::Node.new(value)
655
- else
656
- RDF::Node.new
774
+ # Return previously allocated blank node for.
775
+ @bn_mapper[form_id] ||= {}
776
+ return @bn_mapper[form_id][label] if @bn_mapper[form_id][label]
657
777
  end
778
+
779
+ # Get a fresh label
780
+ @bn_labler.succ! while @bnodes[@bn_labler]
781
+
782
+ bn = RDF::Node.intern(@bn_labler.to_sym)
783
+ @bnodes[@bn_labler] = bn
784
+ @bn_mapper[form_id][label] = bn if label
785
+ bn
658
786
  end
659
787
 
660
- def univar(label, existential: false)
661
- # Label using any provided label, followed by seed, followed by incrementing index
662
- value = "#{label}_#{unique_label}"
788
+ # If not in ground formula, note scope, and if existential
789
+ def univar(label, scope:, existential: false)
790
+ value = existential ? "#{label}_ext" : label
791
+ value = "#{value}#{scope.id}" if scope
663
792
  RDF::Query::Variable.new(value, existential: existential)
664
793
  end
665
794
 
@@ -678,46 +807,56 @@ module RDF::N3
678
807
  else
679
808
  RDF::Statement(subject, predicate, object)
680
809
  end
681
- log_debug("statement(#{node})", depth: depth) {statement.to_s}
810
+ debug("statement(#{node})", depth: @options[:depth]) {statement.to_s}
811
+ error("statement(#{node})", "Statement is invalid: #{statement.inspect}") if validate? && statement.invalid?
682
812
  @callback.call(statement)
683
813
  end
684
814
 
685
- def namespace(prefix, uri)
686
- uri = uri.to_s
687
- if uri == '#'
688
- uri = prefix(nil).to_s + '#'
815
+ def namespace(prefix, iri)
816
+ iri = iri.to_s
817
+ if iri == '#'
818
+ iri = prefix(nil).to_s + '#'
689
819
  end
690
- log_debug("namespace", depth: depth) {"'#{prefix}' <#{uri}>"}
691
- prefix(prefix, uri(uri))
820
+ debug("namespace", depth: @options[:depth]) {"'#{prefix}' <#{iri}>"}
821
+ prefix(prefix, iri(iri))
692
822
  end
693
823
 
694
- # Is this an allowable keyword?
695
- def keyword_check(kw)
696
- unless (@keywords || %w(a is of has)).include?(kw)
697
- raise RDF::ReaderError, "unqualified keyword '#{kw}' used without @keyword directive" if validate?
698
- end
699
- end
700
-
701
- # Create URIs
702
- def uri(value, append = nil)
824
+ # Create IRIs
825
+ def iri(value, append = nil)
703
826
  value = RDF::URI(value)
704
827
  value = value.join(append) if append
705
828
  value.validate! if validate? && value.respond_to?(:validate)
706
829
  value.canonicalize! if canonicalize?
707
- value = RDF::URI.intern(value) if intern?
708
830
 
709
831
  # Variable substitution for in-scope variables. Variables are in scope if they are defined in anthing other than the current formula
710
- var = find_var(@formulae.last, value)
832
+ var = find_var(value)
711
833
  value = var if var
712
834
 
713
835
  value
836
+ rescue ArgumentError => e
837
+ error("iri", e.message)
838
+ end
839
+
840
+ # Create a literal
841
+ def literal(value, **options)
842
+ debug("literal", depth: @options[:depth]) do
843
+ "value: #{value.inspect}, " +
844
+ "options: #{options.inspect}, " +
845
+ "validate: #{validate?.inspect}, " +
846
+ "c14n?: #{canonicalize?.inspect}"
847
+ end
848
+ RDF::Literal.new(value, validate: validate?, canonicalize: canonicalize?, **options)
849
+ rescue ArgumentError => e
850
+ error("Argument Error #{e.message}", production: :literal, token: @lexer.first)
714
851
  end
715
852
 
716
- def ns(prefix, suffix)
853
+ # Decode a PName
854
+ def ns(prefix = nil, suffix = nil)
855
+ namespace(nil, iri("#{base_uri}#")) if prefix.nil? && !prefix(nil)
856
+
717
857
  base = prefix(prefix).to_s
718
858
  suffix = suffix.to_s.sub(/^\#/, "") if base.index("#")
719
- log_debug("ns", depth: depth) {"base: '#{base}', suffix: '#{suffix}'"}
720
- uri(base + suffix.to_s)
859
+ iri(base + suffix.to_s)
721
860
  end
722
861
 
723
862
  # Returns a unique label
@@ -727,11 +866,10 @@ module RDF::N3
727
866
  end
728
867
 
729
868
  # Find any variable that may be defined in the formula identified by `bn`
730
- # @param [RDF::Node] bn name of formula
731
- # @param [#to_s] name
869
+ # @param [RDF::Node] name of formula
732
870
  # @return [RDF::Query::Variable]
733
- def find_var(sym, name)
734
- (@variables[sym] ||= {})[name.to_s]
871
+ def find_var(name)
872
+ (variables[@formulae.last] ||= {})[name.to_s]
735
873
  end
736
874
 
737
875
  # Add a variable to the formula identified by `bn`, returning the variable. Useful as an LRU for variable name lookups
@@ -740,7 +878,146 @@ module RDF::N3
740
878
  # @param [RDF::Query::Variable] var
741
879
  # @return [RDF::Query::Variable]
742
880
  def add_var_to_formula(bn, name, var)
743
- (@variables[bn] ||= {})[name.to_s] = var
881
+ (variables[bn] ||= {})[name.to_s] = var
882
+ end
883
+
884
+ def prod(production, recover_to = [])
885
+ @prod_stack << {prod: production, recover_to: recover_to}
886
+ @options[:depth] += 1
887
+ recover("#{production}(start)", depth: options[:depth], token: @lexer.first)
888
+ yield
889
+ rescue EBNF::LL1::Lexer::Error, SyntaxError, Recovery => e
890
+ # Lexer encountered an illegal token or the parser encountered
891
+ # a terminal which is inappropriate for the current production.
892
+ # Perform error recovery to find a reasonable terminal based
893
+ # on the follow sets of the relevant productions. This includes
894
+ # remaining terms from the current production and the stacked
895
+ # productions
896
+ case e
897
+ when EBNF::LL1::Lexer::Error
898
+ @lexer.recover
899
+ begin
900
+ error("Lexer error", "With input '#{e.input}': #{e.message}",
901
+ production: production,
902
+ token: e.token)
903
+ rescue SyntaxError
904
+ end
905
+ end
906
+ raise EOFError, "End of input found when recovering" if @lexer.first.nil?
907
+ debug("recovery", "current token: #{@lexer.first.inspect}", depth: @options[:depth])
908
+
909
+ unless e.is_a?(Recovery)
910
+ # Get the list of follows for this sequence, this production and the stacked productions.
911
+ debug("recovery", "stack follows:", depth: @options[:depth])
912
+ @prod_stack.reverse.each do |prod|
913
+ debug("recovery", level: 1, depth: @options[:depth]) {" #{prod[:prod]}: #{prod[:recover_to].inspect}"}
914
+ end
915
+ end
916
+
917
+ # Find all follows to the top of the stack
918
+ follows = @prod_stack.map {|prod| Array(prod[:recover_to])}.flatten.compact.uniq
919
+
920
+ # Skip tokens until one is found in follows
921
+ while (token = (@lexer.first rescue @lexer.recover)) && follows.none? {|t| token === t}
922
+ skipped = @lexer.shift
923
+ debug("recovery", depth: @options[:depth]) {"skip #{skipped.inspect}"}
924
+ end
925
+ debug("recovery", depth: @options[:depth]) {"found #{token.inspect} in follows"}
926
+
927
+ # Re-raise the error unless token is a follows of this production
928
+ raise Recovery unless Array(recover_to).any? {|t| token === t}
929
+
930
+ # Skip that token to get something reasonable to start the next production with
931
+ @lexer.shift
932
+ ensure
933
+ progress("#{production}(finish)", depth: options[:depth])
934
+ @options[:depth] -= 1
935
+ @prod_stack.pop
936
+ end
937
+
938
+ def progress(*args, &block)
939
+ lineno = (options[:token].lineno if options[:token].respond_to?(:lineno)) || (@lexer && @lexer.lineno)
940
+ opts = args.last.is_a?(Hash) ? args.pop : {}
941
+ opts[:level] ||= 1
942
+ opts[:lineno] ||= lineno
943
+ log_info(*args, **opts, &block)
944
+ end
945
+
946
+ def recover(*args, &block)
947
+ lineno = (options[:token].lineno if options[:token].respond_to?(:lineno)) || (@lexer && @lexer.lineno)
948
+ opts = args.last.is_a?(Hash) ? args.pop : {}
949
+ opts[:level] ||= 1
950
+ opts[:lineno] ||= lineno
951
+ log_recover(*args, **opts, &block)
952
+ end
953
+
954
+ def debug(*args, &block)
955
+ lineno = (options[:token].lineno if options[:token].respond_to?(:lineno)) || (@lexer && @lexer.lineno)
956
+ opts = args.last.is_a?(Hash) ? args.pop : {}
957
+ opts[:level] ||= 0
958
+ opts[:lineno] ||= lineno
959
+ log_debug(*args, **opts, &block)
960
+ end
961
+
962
+ ##
963
+ # Error information, used as level `0` debug messages.
964
+ #
965
+ # @overload error(node, message, options)
966
+ # @param [String] node Relevant location associated with message
967
+ # @param [String] message Error string
968
+ # @param [Hash] options
969
+ # @option options [URI, #to_s] :production
970
+ # @option options [Token] :token
971
+ # @see {#debug}
972
+ def error(*args)
973
+ ctx = ""
974
+ ctx += "(found #{options[:token].inspect})" if options[:token]
975
+ ctx += ", production = #{options[:production].inspect}" if options[:production]
976
+ lineno = (options[:token].lineno if options[:token].respond_to?(:lineno)) || (@lexer && @lexer.lineno)
977
+ log_error(*args, ctx,
978
+ lineno: lineno,
979
+ token: options[:token],
980
+ production: options[:production],
981
+ depth: options[:depth],
982
+ exception: SyntaxError,)
983
+ end
984
+
985
+ # Used for internal error recovery
986
+ class Recovery < StandardError; end
987
+
988
+ class SyntaxError < RDF::ReaderError
989
+ ##
990
+ # The current production.
991
+ #
992
+ # @return [Symbol]
993
+ attr_reader :production
994
+
995
+ ##
996
+ # The invalid token which triggered the error.
997
+ #
998
+ # @return [String]
999
+ attr_reader :token
1000
+
1001
+ ##
1002
+ # The line number where the error occurred.
1003
+ #
1004
+ # @return [Integer]
1005
+ attr_reader :lineno
1006
+
1007
+ ##
1008
+ # Initializes a new syntax error instance.
1009
+ #
1010
+ # @param [String, #to_s] message
1011
+ # @param [Hash{Symbol => Object}] options
1012
+ # @option options [Symbol] :production (nil)
1013
+ # @option options [String] :token (nil)
1014
+ # @option options [Integer] :lineno (nil)
1015
+ def initialize(message, **options)
1016
+ @production = options[:production]
1017
+ @token = options[:token]
1018
+ @lineno = options[:lineno] || (@token.lineno if @token.respond_to?(:lineno))
1019
+ super(message.to_s)
1020
+ end
744
1021
  end
745
1022
  end
746
1023
  end