rdf-turtle 1.1.7 → 1.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +8 -15
- data/VERSION +1 -1
- data/lib/rdf/turtle/reader.rb +502 -224
- data/lib/rdf/turtle/streaming_writer.rb +4 -4
- data/lib/rdf/turtle/writer.rb +4 -1
- metadata +4 -25
- data/lib/rdf/turtle/meta.rb +0 -1461
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3eb247803e3c16fd5a6338c2175664deaeeea53c
|
4
|
+
data.tar.gz: cf0ea0c919bfc0421e91eb0dbd7337fd61dfe881
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0a461d97b2f6c0fb5a8e63cf0f07f427cbea96dbf38952b0da5dac2064464ff999237067a53d7ac18ddeaaff809a74db273f93095c6007b7d2a13f0e826a15e4
|
7
|
+
data.tar.gz: d145feb24bbda56cbdb7992becc7fe68c501efebac7e020bdcb6e87015e09189951f77b0516e2ad8db84f7197e4a055b4fb7564b603b5da1809eb7cffb1b0f7d
|
data/README.md
CHANGED
@@ -4,6 +4,8 @@
|
|
4
4
|
|
5
5
|
[](http://badge.fury.io/rb/rdf-turtle)
|
6
6
|
[](http://travis-ci.org/ruby-rdf/rdf-turtle)
|
7
|
+
[](https://coveralls.io/r/ruby-rdf/rdf-turtle)
|
8
|
+
[](https://gemnasium.com/ruby-rdf/rdf-turtle)
|
7
9
|
|
8
10
|
## Description
|
9
11
|
This is a [Ruby][] implementation of a [Turtle][] parser for [RDF.rb][].
|
@@ -46,10 +48,7 @@ Full documentation available on [Rubydoc.info][Turtle doc]
|
|
46
48
|
### Variations from the spec
|
47
49
|
In some cases, the specification is unclear on certain issues:
|
48
50
|
|
49
|
-
* The LC version of the [Turtle][] specification separates rules for `@base` and `@prefix` with
|
50
|
-
closing '.' from the
|
51
|
-
SPARQL-like `BASE` and `PREFIX` without closing '.'. This version implements a more flexible
|
52
|
-
syntax where the `@` and closing `.` are optional and `base/prefix` are matched case independently.
|
51
|
+
* The LC version of the [Turtle][] specification separates rules for `@base` and `@prefix` with closing '.' from the SPARQL-like `BASE` and `PREFIX` without closing '.'. This version implements a more flexible syntax where the `@` and closing `.` are optional and `base/prefix` are matched case independently.
|
53
52
|
* Additionally, both `a` and `A` match `rdf:type`.
|
54
53
|
|
55
54
|
### Freebase-specific Reader
|
@@ -83,19 +82,13 @@ An example of reading Freebase dumps:
|
|
83
82
|
r.each_statement {|stmt| puts stmt.to_ntriples}
|
84
83
|
end
|
85
84
|
## Implementation Notes
|
86
|
-
|
87
|
-
the `Parser` and `Lexer` modules to implement the Turtle parser.
|
88
|
-
|
89
|
-
The parser takes branch and follow tables generated from the original [Turtle
|
90
|
-
EBNF Grammar][Turtle EBNF] described in the [specification][Turtle]. Branch and
|
91
|
-
Follow tables are specified in {RDF::Turtle::Meta}, which is in turn generated
|
92
|
-
using the [EBNF][] gem.
|
85
|
+
This version uses a hand-written parser using the Lexer from the [EBNF][] gem instead of a general [EBNF][] LL(1) parser for faster performance.
|
93
86
|
|
94
87
|
## Dependencies
|
95
88
|
|
96
|
-
* [Ruby](http://ruby-lang.org/) (>= 1.9.
|
97
|
-
* [RDF.rb](http://rubygems.org/gems/rdf) (
|
98
|
-
* [EBNF][] (
|
89
|
+
* [Ruby](http://ruby-lang.org/) (>= 1.9.3)
|
90
|
+
* [RDF.rb](http://rubygems.org/gems/rdf) (~> 1.1)
|
91
|
+
* [EBNF][] (~> 0.3)
|
99
92
|
|
100
93
|
## Installation
|
101
94
|
|
@@ -136,7 +129,7 @@ A copy of the [Turtle EBNF][] and derived parser files are included in the repos
|
|
136
129
|
[YARD]: http://yardoc.org/
|
137
130
|
[YARD-GS]: http://rubydoc.info/docs/yard/file/docs/GettingStarted.md
|
138
131
|
[PDD]: http://lists.w3.org/Archives/Public/public-rdf-ruby/2010May/0013.html
|
139
|
-
[RDF.rb]: http://rubydoc.info/github/ruby-rdf/rdf
|
132
|
+
[RDF.rb]: http://rubydoc.info/github/ruby-rdf/rdf
|
140
133
|
[EBNF]: http://rubygems.org/gems/ebnf
|
141
134
|
[Backports]: http://rubygems.org/gems/backports
|
142
135
|
[N-Triples]: http://www.w3.org/TR/rdf-testcases/#ntriples
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.1.
|
1
|
+
1.1.8
|
data/lib/rdf/turtle/reader.rb
CHANGED
@@ -1,203 +1,44 @@
|
|
1
|
-
|
2
|
-
require 'ebnf/ll1/
|
1
|
+
# coding: utf-8
|
2
|
+
require 'ebnf/ll1/lexer'
|
3
3
|
|
4
4
|
module RDF::Turtle
|
5
5
|
##
|
6
6
|
# A parser for the Turtle 2
|
7
7
|
class Reader < RDF::Reader
|
8
8
|
format Format
|
9
|
-
include RDF::Turtle::Meta
|
10
9
|
include EBNF::LL1::Parser
|
11
10
|
include RDF::Turtle::Terminals
|
12
11
|
|
13
12
|
# Terminals passed to lexer. Order matters!
|
14
|
-
terminal(:ANON,
|
15
|
-
|
16
|
-
|
17
|
-
terminal(:
|
18
|
-
|
19
|
-
|
20
|
-
terminal(:
|
21
|
-
|
22
|
-
|
23
|
-
terminal(:
|
24
|
-
|
25
|
-
|
26
|
-
value = token.value.sub(/\.([eE])/, '.0\1')
|
27
|
-
input[:resource] = literal(value, datatype: RDF::XSD.double)
|
28
|
-
end
|
29
|
-
terminal(:DECIMAL, DECIMAL) do |prod, token, input|
|
30
|
-
# Note that a Turtle Decimal may begin with a '.', so tack on a leading
|
31
|
-
# zero if necessary
|
32
|
-
value = token.value
|
33
|
-
value = "0#{token.value}" if token.value[0,1] == "."
|
34
|
-
input[:resource] = literal(value, datatype: RDF::XSD.decimal)
|
35
|
-
end
|
36
|
-
terminal(:INTEGER, INTEGER) do |prod, token, input|
|
37
|
-
input[:resource] = literal(token.value, datatype: RDF::XSD.integer)
|
38
|
-
end
|
39
|
-
# Spec confusion: spec says : "Literals , prefixed names and IRIs may also contain escape sequences"
|
40
|
-
terminal(:PNAME_LN, PNAME_LN, unescape: true) do |prod, token, input|
|
41
|
-
prefix, suffix = token.value.split(":", 2)
|
42
|
-
input[:resource] = pname(prefix, suffix)
|
43
|
-
end
|
44
|
-
# Spec confusion: spec says : "Literals , prefixed names and IRIs may also contain escape sequences"
|
45
|
-
terminal(:PNAME_NS, PNAME_NS) do |prod, token, input|
|
46
|
-
prefix = token.value[0..-2]
|
47
|
-
|
48
|
-
# Two contexts, one when prefix is being defined, the other when being used
|
49
|
-
case prod
|
50
|
-
when :prefixID, :sparqlPrefix
|
51
|
-
input[:prefix] = prefix
|
52
|
-
else
|
53
|
-
input[:resource] = pname(prefix, '')
|
54
|
-
end
|
55
|
-
end
|
56
|
-
terminal(:STRING_LITERAL_LONG_SINGLE_QUOTE, STRING_LITERAL_LONG_SINGLE_QUOTE, unescape: true) do |prod, token, input|
|
57
|
-
input[:string_value] = token.value[3..-4]
|
58
|
-
end
|
59
|
-
terminal(:STRING_LITERAL_LONG_QUOTE, STRING_LITERAL_LONG_QUOTE, unescape: true) do |prod, token, input|
|
60
|
-
input[:string_value] = token.value[3..-4]
|
61
|
-
end
|
62
|
-
terminal(:STRING_LITERAL_QUOTE, STRING_LITERAL_QUOTE, unescape: true) do |prod, token, input|
|
63
|
-
input[:string_value] = token.value[1..-2]
|
64
|
-
end
|
65
|
-
terminal(:STRING_LITERAL_SINGLE_QUOTE, STRING_LITERAL_SINGLE_QUOTE, unescape: true) do |prod, token, input|
|
66
|
-
input[:string_value] = token.value[1..-2]
|
67
|
-
end
|
13
|
+
terminal(:ANON, ANON)
|
14
|
+
terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL)
|
15
|
+
terminal(:IRIREF, IRIREF, unescape: true)
|
16
|
+
terminal(:DOUBLE, DOUBLE)
|
17
|
+
terminal(:DECIMAL, DECIMAL)
|
18
|
+
terminal(:INTEGER, INTEGER)
|
19
|
+
terminal(:PNAME_LN, PNAME_LN, unescape: true)
|
20
|
+
terminal(:PNAME_NS, PNAME_NS)
|
21
|
+
terminal(:STRING_LITERAL_LONG_SINGLE_QUOTE, STRING_LITERAL_LONG_SINGLE_QUOTE, unescape: true)
|
22
|
+
terminal(:STRING_LITERAL_LONG_QUOTE, STRING_LITERAL_LONG_QUOTE, unescape: true)
|
23
|
+
terminal(:STRING_LITERAL_QUOTE, STRING_LITERAL_QUOTE, unescape: true)
|
24
|
+
terminal(:STRING_LITERAL_SINGLE_QUOTE, STRING_LITERAL_SINGLE_QUOTE, unescape: true)
|
68
25
|
|
69
26
|
# String terminals
|
70
|
-
terminal(nil,
|
71
|
-
case token.value
|
72
|
-
when 'A', 'a' then input[:resource] = RDF.type
|
73
|
-
when 'true', 'false' then input[:resource] = RDF::Literal::Boolean.new(token.value)
|
74
|
-
when '@base', '@prefix' then input[:lang] = token.value[1..-1]
|
75
|
-
when '.' then input[:terminated] = true
|
76
|
-
else input[:string] = token.value
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
terminal(:PREFIX, PREFIX) do |prod, token, input|
|
81
|
-
input[:string_value] = token.value
|
82
|
-
end
|
83
|
-
terminal(:BASE, BASE) do |prod, token, input|
|
84
|
-
input[:string_value] = token.value
|
85
|
-
end
|
86
|
-
|
87
|
-
terminal(:LANGTAG, LANGTAG) do |prod, token, input|
|
88
|
-
input[:lang] = token.value[1..-1]
|
89
|
-
end
|
27
|
+
terminal(nil, %r([\(\),.;\[\]Aa]|\^\^|true|false))
|
90
28
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
prefix = current[:prefix]
|
95
|
-
iri = current[:resource]
|
96
|
-
lexical = current[:string_value]
|
97
|
-
terminated = current[:terminated]
|
98
|
-
debug("prefixID") {"Defined prefix #{prefix.inspect} mapping to #{iri.inspect}"}
|
99
|
-
if lexical.start_with?('@') && lexical != '@prefix'
|
100
|
-
error(:prefixID, "should be downcased")
|
101
|
-
elsif lexical == '@prefix'
|
102
|
-
error(:prefixID, "directive not terminated") unless terminated
|
103
|
-
else
|
104
|
-
error(:prefixID, "directive should not be terminated") if terminated
|
105
|
-
end
|
106
|
-
prefix(prefix, iri)
|
107
|
-
end
|
108
|
-
|
109
|
-
# [5] base set base_uri
|
110
|
-
production(:base) do |input, current, callback|
|
111
|
-
iri = current[:resource]
|
112
|
-
lexical = current[:string_value]
|
113
|
-
terminated = current[:terminated]
|
114
|
-
debug("base") {"Defined base as #{iri}"}
|
115
|
-
if lexical.start_with?('@') && lexical != '@base'
|
116
|
-
error(:base, "should be downcased")
|
117
|
-
elsif lexical == '@base'
|
118
|
-
error(:base, "directive not terminated") unless terminated
|
119
|
-
else
|
120
|
-
error(:base, "directive should not be terminated") if terminated
|
121
|
-
end
|
122
|
-
options[:base_uri] = iri
|
123
|
-
end
|
124
|
-
|
125
|
-
# [6] triples
|
126
|
-
start_production(:triples) do |input, current, callback|
|
127
|
-
# Note production as triples for blankNodePropertyList
|
128
|
-
# to set :subject instead of :resource
|
129
|
-
current[:triples] = true
|
130
|
-
end
|
131
|
-
production(:triples) do |input, current, callback|
|
132
|
-
# Note production as triples for blankNodePropertyList
|
133
|
-
# to set :subject instead of :resource
|
134
|
-
current[:triples] = true
|
135
|
-
end
|
29
|
+
terminal(:PREFIX, PREFIX)
|
30
|
+
terminal(:BASE, BASE)
|
31
|
+
terminal(:LANGTAG, LANGTAG)
|
136
32
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
# [10] subject ::= IRIref | BlankNode | collection
|
143
|
-
start_production(:subject) do |input, current, callback|
|
144
|
-
current[:triples] = nil
|
145
|
-
end
|
146
|
-
|
147
|
-
production(:subject) do |input, current, callback|
|
148
|
-
input[:subject] = current[:resource]
|
149
|
-
end
|
150
|
-
|
151
|
-
# [12] object ::= iri | BlankNode | collection | blankNodePropertyList | literal
|
152
|
-
production(:object) do |input, current, callback|
|
153
|
-
if input[:object_list]
|
154
|
-
# Part of an rdf:List collection
|
155
|
-
input[:object_list] << current[:resource]
|
156
|
-
else
|
157
|
-
debug("object") {"current: #{current.inspect}"}
|
158
|
-
callback.call(:statement, "object", input[:subject], input[:predicate], current[:resource])
|
159
|
-
end
|
160
|
-
end
|
161
|
-
|
162
|
-
# [14] blankNodePropertyList ::= "[" predicateObjectList "]"
|
163
|
-
start_production(:blankNodePropertyList) do |input, current, callback|
|
164
|
-
current[:subject] = self.bnode
|
165
|
-
end
|
166
|
-
|
167
|
-
production(:blankNodePropertyList) do |input, current, callback|
|
168
|
-
if input[:triples]
|
169
|
-
input[:subject] = current[:subject]
|
170
|
-
else
|
171
|
-
input[:resource] = current[:subject]
|
172
|
-
end
|
173
|
-
end
|
174
|
-
|
175
|
-
# [15] collection ::= "(" object* ")"
|
176
|
-
start_production(:collection) do |input, current, callback|
|
177
|
-
# Tells the object production to collect and not generate statements
|
178
|
-
current[:object_list] = []
|
179
|
-
end
|
180
|
-
|
181
|
-
production(:collection) do |input, current, callback|
|
182
|
-
# Create an RDF list
|
183
|
-
objects = current[:object_list]
|
184
|
-
list = RDF::List[*objects]
|
185
|
-
list.each_statement do |statement|
|
186
|
-
next if statement.predicate == RDF.type && statement.object == RDF.List
|
187
|
-
callback.call(:statement, "collection", statement.subject, statement.predicate, statement.object)
|
188
|
-
end
|
33
|
+
##
|
34
|
+
# Accumulated errors found during processing
|
35
|
+
# @return [Array<String>]
|
36
|
+
attr_reader :errors
|
189
37
|
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
# [16] RDFLiteral ::= String ( LanguageTag | ( "^^" IRIref ) )?
|
195
|
-
production(:RDFLiteral) do |input, current, callback|
|
196
|
-
opts = {}
|
197
|
-
opts[:datatype] = current[:resource] if current[:resource]
|
198
|
-
opts[:language] = current[:lang] if current[:lang]
|
199
|
-
input[:resource] = literal(current[:string_value], opts)
|
200
|
-
end
|
38
|
+
##
|
39
|
+
# Accumulated warnings found during processing
|
40
|
+
# @return [Array<String>]
|
41
|
+
attr_reader :warnings
|
201
42
|
|
202
43
|
##
|
203
44
|
# Redirect for Freebase Reader
|
@@ -229,13 +70,13 @@ module RDF::Turtle
|
|
229
70
|
# the base URI to use when resolving relative URIs (for acessing intermediate parser productions)
|
230
71
|
# @option options [#to_s] :anon_base ("b0")
|
231
72
|
# Basis for generating anonymous Nodes
|
232
|
-
# @option options [Boolean] :resolve_uris (false)
|
233
|
-
# Resolve prefix and relative IRIs, otherwise, when serializing the parsed SSE
|
234
|
-
# as S-Expressions, use the original prefixed and relative URIs along with `base` and `prefix`
|
235
|
-
# definitions.
|
236
73
|
# @option options [Boolean] :validate (false)
|
237
74
|
# whether to validate the parsed statements and values. If not validating,
|
238
75
|
# the parser will attempt to recover from errors.
|
76
|
+
# @option options [Array] :errors
|
77
|
+
# array for placing errors found when parsing
|
78
|
+
# @option options [Array] :warnings
|
79
|
+
# array for placing warnings found when parsing
|
239
80
|
# @option options [Boolean] :progress
|
240
81
|
# Show progress of parser productions
|
241
82
|
# @option options [Boolean, Integer, Array] :debug
|
@@ -255,6 +96,11 @@ module RDF::Turtle
|
|
255
96
|
whitespace: WS,
|
256
97
|
}.merge(options)
|
257
98
|
@options = {prefixes: {nil => ""}}.merge(@options) unless @options[:validate]
|
99
|
+
@errors = @options[:errors] || []
|
100
|
+
@warnings = @options[:warnings] || []
|
101
|
+
@depth = 0
|
102
|
+
@prod_stack = []
|
103
|
+
|
258
104
|
@options[:debug] ||= case
|
259
105
|
when RDF::Turtle.debug? then true
|
260
106
|
when @options[:progress] then 2
|
@@ -268,6 +114,8 @@ module RDF::Turtle
|
|
268
114
|
debug("canonicalize") {canonicalize?.inspect}
|
269
115
|
debug("intern") {intern?.inspect}
|
270
116
|
|
117
|
+
@lexer = EBNF::LL1::Lexer.new(input, self.class.patterns, @options)
|
118
|
+
|
271
119
|
if block_given?
|
272
120
|
case block.arity
|
273
121
|
when 0 then instance_eval(&block)
|
@@ -289,41 +137,28 @@ module RDF::Turtle
|
|
289
137
|
# @return [void]
|
290
138
|
def each_statement(&block)
|
291
139
|
if block_given?
|
140
|
+
@recovering = false
|
292
141
|
@callback = block
|
293
142
|
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
case @options[:debug]
|
310
|
-
when Array
|
311
|
-
@options[:debug] << str
|
312
|
-
when TrueClass
|
313
|
-
$stderr.puts str
|
314
|
-
when Integer
|
315
|
-
$stderr.puts(str) if level <= @options[:debug]
|
316
|
-
end
|
143
|
+
begin
|
144
|
+
while (@lexer.first rescue true)
|
145
|
+
read_statement
|
146
|
+
end
|
147
|
+
rescue EBNF::LL1::Lexer::Error, SyntaxError, EOFError, Recovery
|
148
|
+
# Terminate loop if EOF found while recovering
|
149
|
+
end
|
150
|
+
|
151
|
+
if validate?
|
152
|
+
if !warnings.empty? && !@options[:warnings]
|
153
|
+
$stderr.puts "Warnings: #{warnings.join("\n")}"
|
154
|
+
end
|
155
|
+
if !errors.empty?
|
156
|
+
$stderr.puts "Errors: #{errors.join("\n")}" unless @options[:errors]
|
157
|
+
raise RDF::ReaderError, "Errors found during processing"
|
317
158
|
end
|
318
159
|
end
|
319
160
|
end
|
320
161
|
enum_for(:each_statement)
|
321
|
-
rescue EBNF::LL1::Parser::Error, EBNF::LL1::Lexer::Error => e
|
322
|
-
if validate?
|
323
|
-
raise RDF::ReaderError.new(e.message, lineno: e.lineno, token: e.token)
|
324
|
-
else
|
325
|
-
$stderr.puts e.message
|
326
|
-
end
|
327
162
|
end
|
328
163
|
|
329
164
|
##
|
@@ -345,13 +180,12 @@ module RDF::Turtle
|
|
345
180
|
|
346
181
|
# add a statement, object can be literal or URI or bnode
|
347
182
|
#
|
348
|
-
# @param [
|
183
|
+
# @param [Symbol] production
|
349
184
|
# @param [RDF::Statement] statement the subject of the statement
|
350
185
|
# @return [RDF::Statement] Added statement
|
351
186
|
# @raise [RDF::ReaderError] Checks parameter types and raises if they are incorrect if parsing mode is _validate_.
|
352
|
-
def add_statement(
|
353
|
-
error(
|
354
|
-
progress(node) {"generate statement: #{statement.to_ntriples}"}
|
187
|
+
def add_statement(production, statement)
|
188
|
+
error("Statement is invalid: #{statement.inspect.inspect}", production: produciton) if validate? && statement.invalid?
|
355
189
|
@callback.call(statement) if statement.subject &&
|
356
190
|
statement.predicate &&
|
357
191
|
statement.object &&
|
@@ -360,11 +194,15 @@ module RDF::Turtle
|
|
360
194
|
|
361
195
|
# Process a URI against base
|
362
196
|
def process_iri(iri)
|
363
|
-
|
197
|
+
iri = iri.value[1..-2] if iri === :IRIREF
|
198
|
+
value = RDF::URI(iri)
|
199
|
+
value = base_uri.join(value) if value.relative?
|
364
200
|
value.validate! if validate?
|
365
201
|
value.canonicalize! if canonicalize?
|
366
202
|
value = RDF::URI.intern(value) if intern?
|
367
203
|
value
|
204
|
+
rescue ArgumentError => e
|
205
|
+
error("process_iri", e)
|
368
206
|
end
|
369
207
|
|
370
208
|
# Create a literal
|
@@ -376,6 +214,8 @@ module RDF::Turtle
|
|
376
214
|
"c14n?: #{canonicalize?.inspect}"
|
377
215
|
end
|
378
216
|
RDF::Literal.new(value, options.merge(validate: validate?, canonicalize: canonicalize?))
|
217
|
+
rescue ArgumentError => e
|
218
|
+
error("Argument Error #{e.message}", production: :literal, token: @lexer.first)
|
379
219
|
end
|
380
220
|
|
381
221
|
##
|
@@ -397,7 +237,7 @@ module RDF::Turtle
|
|
397
237
|
if prefix(prefix)
|
398
238
|
base = prefix(prefix).to_s
|
399
239
|
elsif !prefix(prefix)
|
400
|
-
error("
|
240
|
+
error("undefined prefix", production: :pname, token: prefix)
|
401
241
|
base = ''
|
402
242
|
end
|
403
243
|
suffix = suffix.to_s.sub(/^\#/, "") if base.index("#")
|
@@ -411,5 +251,443 @@ module RDF::Turtle
|
|
411
251
|
@bnode_cache ||= {}
|
412
252
|
@bnode_cache[value.to_s] ||= RDF::Node.new(value)
|
413
253
|
end
|
254
|
+
|
255
|
+
protected
|
256
|
+
# @return [void]
|
257
|
+
def read_statement
|
258
|
+
prod(:statement, %w{.}) do
|
259
|
+
error("read_statement", "Unexpected end of file") unless token = @lexer.first
|
260
|
+
case token.type
|
261
|
+
when :BASE, :PREFIX
|
262
|
+
read_directive || error("Failed to parse directive", production: :directive, token: token)
|
263
|
+
else
|
264
|
+
read_triples || error("Expected token", production: :statement, token: token)
|
265
|
+
if !@recovering || @lexer.first === '.'
|
266
|
+
# If recovering, we will have eaten the closing '.'
|
267
|
+
token = @lexer.shift
|
268
|
+
unless token && token.value == '.'
|
269
|
+
error("Expected '.' following triple", production: :statement, token: token)
|
270
|
+
end
|
271
|
+
end
|
272
|
+
end
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
# @return [void]
|
277
|
+
def read_directive
|
278
|
+
prod(:directive, %w{.}) do
|
279
|
+
token = @lexer.first
|
280
|
+
case token.type
|
281
|
+
when :BASE
|
282
|
+
prod(:base) do
|
283
|
+
@lexer.shift
|
284
|
+
terminated = token.value == '@base'
|
285
|
+
iri = @lexer.shift
|
286
|
+
error("Expected IRIREF", :production => :base, token: iri) unless iri === :IRIREF
|
287
|
+
@options[:base_uri] = process_iri(iri)
|
288
|
+
error("base", "#{token} should be downcased") if token.value.start_with?('@') && token.value != '@base'
|
289
|
+
|
290
|
+
if terminated
|
291
|
+
error("base", "Expected #{token} to be terminated") unless @lexer.first === '.'
|
292
|
+
@lexer.shift
|
293
|
+
elsif @lexer.first === '.'
|
294
|
+
error("base", "Expected #{token} not to be terminated")
|
295
|
+
else
|
296
|
+
true
|
297
|
+
end
|
298
|
+
end
|
299
|
+
when :PREFIX
|
300
|
+
prod(:prefixID, %w{.}) do
|
301
|
+
@lexer.shift
|
302
|
+
pfx, iri = @lexer.shift, @lexer.shift
|
303
|
+
terminated = token.value == '@prefix'
|
304
|
+
error("Expected PNAME_NS", :production => :prefix, token: pfx) unless pfx === :PNAME_NS
|
305
|
+
error("Expected IRIREF", :production => :prefix, token: iri) unless iri === :IRIREF
|
306
|
+
debug("prefixID") {"Defined prefix #{pfx.inspect} mapping to #{iri.inspect}"}
|
307
|
+
prefix(pfx.value[0..-2], process_iri(iri))
|
308
|
+
error("prefixId", "#{token} should be downcased") if token.value.start_with?('@') && token.value != '@prefix'
|
309
|
+
|
310
|
+
if terminated
|
311
|
+
error("prefixID", "Expected #{token} to be terminated") unless @lexer.first === '.'
|
312
|
+
@lexer.shift
|
313
|
+
elsif @lexer.first === '.'
|
314
|
+
error("prefixID", "Expected #{token} not to be terminated")
|
315
|
+
else
|
316
|
+
true
|
317
|
+
end
|
318
|
+
end
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
# @return [Object] returns the last verb matched, or subject BNode on predicateObjectList?
|
324
|
+
def read_triples
|
325
|
+
prod(:triples, %w{.}) do
|
326
|
+
error("read_triples", "Unexpected end of file") unless token = @lexer.first
|
327
|
+
case token.type || token.value
|
328
|
+
when '['
|
329
|
+
# blankNodePropertyList predicateObjectList?
|
330
|
+
subject = read_blankNodePropertyList || error("Failed to parse blankNodePropertyList", production: :triples, token: @lexer.first)
|
331
|
+
read_predicateObjectList(subject) || subject
|
332
|
+
else
|
333
|
+
# subject predicateObjectList
|
334
|
+
subject = read_subject || error("Failed to parse subject", production: :triples, token: @lexer.first)
|
335
|
+
read_predicateObjectList(subject) || error("Expected predicateObjectList", production: :triples, token: @lexer.first)
|
336
|
+
end
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
# @param [RDF::Resource] subject
|
341
|
+
# @return [RDF::URI] the last matched verb
|
342
|
+
def read_predicateObjectList(subject)
|
343
|
+
prod(:predicateObjectList, %{;}) do
|
344
|
+
last_verb = nil
|
345
|
+
while verb = read_verb
|
346
|
+
last_verb = verb
|
347
|
+
prod(:_predicateObjectList_5) do
|
348
|
+
read_objectList(subject, verb) || error("Expected objectList", production: :predicateObjectList, token: @lexer.first)
|
349
|
+
end
|
350
|
+
break unless @lexer.first === ';'
|
351
|
+
@lexer.shift while @lexer.first === ';'
|
352
|
+
end
|
353
|
+
last_verb
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
357
|
+
# @return [RDF::Term] the last matched subject
|
358
|
+
def read_objectList(subject, predicate)
|
359
|
+
prod(:objectList, %{,}) do
|
360
|
+
last_object = nil
|
361
|
+
while object = prod(:_objectList_2) {read_object(subject, predicate)}
|
362
|
+
last_object = object
|
363
|
+
break unless @lexer.first === ','
|
364
|
+
@lexer.shift while @lexer.first === ','
|
365
|
+
end
|
366
|
+
last_object
|
367
|
+
end
|
368
|
+
end
|
369
|
+
|
370
|
+
# @return [RDF::URI]
|
371
|
+
def read_verb
|
372
|
+
error("read_verb", "Unexpected end of file") unless token = @lexer.first
|
373
|
+
case token.type || token.value
|
374
|
+
when 'a' then prod(:verb) {@lexer.shift && RDF.type}
|
375
|
+
else prod(:verb) {read_iri}
|
376
|
+
end
|
377
|
+
end
|
378
|
+
|
379
|
+
# @return [RDF::Resource]
|
380
|
+
def read_subject
|
381
|
+
prod(:subject) do
|
382
|
+
read_iri ||
|
383
|
+
read_BlankNode ||
|
384
|
+
read_collection ||
|
385
|
+
error( "Expected subject", production: :subject, token: @lexer.first)
|
386
|
+
end
|
387
|
+
end
|
388
|
+
|
389
|
+
# @return [void]
|
390
|
+
def read_object(subject = nil, predicate = nil)
|
391
|
+
prod(:object) do
|
392
|
+
if object = read_iri ||
|
393
|
+
read_BlankNode ||
|
394
|
+
read_collection ||
|
395
|
+
read_blankNodePropertyList ||
|
396
|
+
read_literal
|
397
|
+
|
398
|
+
add_statement(:object, RDF::Statement(subject, predicate, object)) if subject && predicate
|
399
|
+
object
|
400
|
+
end
|
401
|
+
end
|
402
|
+
end
|
403
|
+
|
404
|
+
# @return [RDF::Literal]
|
405
|
+
def read_literal
|
406
|
+
error("Unexpected end of file", production: :literal) unless token = @lexer.first
|
407
|
+
case token.type || token.value
|
408
|
+
when :INTEGER then prod(:literal) {literal(@lexer.shift.value, datatype: RDF::XSD.integer)}
|
409
|
+
when :DECIMAL
|
410
|
+
prod(:litearl) do
|
411
|
+
value = @lexer.shift.value
|
412
|
+
value = "0#{value}" if value.start_with?(".")
|
413
|
+
literal(value, datatype: RDF::XSD.decimal)
|
414
|
+
end
|
415
|
+
when :DOUBLE then prod(:literal) {literal(@lexer.shift.value.sub(/\.([eE])/, '.0\1'), datatype: RDF::XSD.double)}
|
416
|
+
when "true", "false" then prod(:literal) {literal(@lexer.shift.value, datatype: RDF::XSD.boolean)}
|
417
|
+
when :STRING_LITERAL_QUOTE, :STRING_LITERAL_SINGLE_QUOTE
|
418
|
+
prod(:literal) do
|
419
|
+
value = @lexer.shift.value[1..-2]
|
420
|
+
error("read_literal", "Unexpected end of file") unless token = @lexer.first
|
421
|
+
case token.type || token.value
|
422
|
+
when :LANGTAG
|
423
|
+
literal(value, language: @lexer.shift.value[1..-1].to_sym)
|
424
|
+
when '^^'
|
425
|
+
@lexer.shift
|
426
|
+
literal(value, datatype: read_iri)
|
427
|
+
else
|
428
|
+
literal(value)
|
429
|
+
end
|
430
|
+
end
|
431
|
+
when :STRING_LITERAL_LONG_QUOTE, :STRING_LITERAL_LONG_SINGLE_QUOTE
|
432
|
+
prod(:literal) do
|
433
|
+
value = @lexer.shift.value[3..-4]
|
434
|
+
error("read_literal", "Unexpected end of file") unless token = @lexer.first
|
435
|
+
case token.type || token.value
|
436
|
+
when :LANGTAG
|
437
|
+
literal(value, language: @lexer.shift.value[1..-1].to_sym)
|
438
|
+
when '^^'
|
439
|
+
@lexer.shift
|
440
|
+
literal(value, datatype: read_iri)
|
441
|
+
else
|
442
|
+
literal(value)
|
443
|
+
end
|
444
|
+
end
|
445
|
+
end
|
446
|
+
end
|
447
|
+
|
448
|
+
# @return [RDF::Node]
|
449
|
+
def read_blankNodePropertyList
|
450
|
+
token = @lexer.first
|
451
|
+
if token === '['
|
452
|
+
prod(:blankNodePropertyList, %{]}) do
|
453
|
+
@lexer.shift
|
454
|
+
progress("blankNodePropertyList") {"token: #{token.inspect}"}
|
455
|
+
node = bnode
|
456
|
+
read_predicateObjectList(node)
|
457
|
+
error("blankNodePropertyList", "Expected closing ']'") unless @lexer.first === ']'
|
458
|
+
@lexer.shift
|
459
|
+
node
|
460
|
+
end
|
461
|
+
end
|
462
|
+
end
|
463
|
+
|
464
|
+
# @return [RDF::Node]
|
465
|
+
def read_collection
|
466
|
+
if @lexer.first === '('
|
467
|
+
prod(:collection, %{)}) do
|
468
|
+
@lexer.shift
|
469
|
+
token = @lexer.first
|
470
|
+
progress("collection") {"token: #{token.inspect}"}
|
471
|
+
objects = []
|
472
|
+
while object = read_object
|
473
|
+
objects << object
|
474
|
+
end
|
475
|
+
list = RDF::List.new(nil, nil, objects)
|
476
|
+
list.each_statement do |statement|
|
477
|
+
add_statement("collection", statement)
|
478
|
+
end
|
479
|
+
error("collection", "Expected closing ')'") unless @lexer.first === ')'
|
480
|
+
@lexer.shift
|
481
|
+
list.subject
|
482
|
+
end
|
483
|
+
end
|
484
|
+
end
|
485
|
+
|
486
|
+
# @return [RDF::URI]
|
487
|
+
def read_iri
|
488
|
+
token = @lexer.first
|
489
|
+
case token && token.type
|
490
|
+
when :IRIREF then prod(:iri) {process_iri(@lexer.shift)}
|
491
|
+
when :PNAME_LN, :PNAME_NS then prod(:iri) {pname(*@lexer.shift.value.split(':', 2))}
|
492
|
+
end
|
493
|
+
end
|
494
|
+
|
495
|
+
# @return [RDF::Node]
|
496
|
+
def read_BlankNode
|
497
|
+
token = @lexer.first
|
498
|
+
case token && token.type
|
499
|
+
when :BLANK_NODE_LABEL then prod(:BlankNode) {bnode(@lexer.shift.value[2..-1])}
|
500
|
+
when :ANON then @lexer.shift && prod(:BlankNode) {bnode}
|
501
|
+
end
|
502
|
+
end
|
503
|
+
|
504
|
+
def prod(production, recover_to = [])
|
505
|
+
@prod_stack << {prod: production, recover_to: recover_to}
|
506
|
+
@depth += 1
|
507
|
+
@recovering = false
|
508
|
+
progress("#{production}(start)") {"token: #{@lexer.first.inspect}"}
|
509
|
+
yield
|
510
|
+
rescue EBNF::LL1::Lexer::Error, SyntaxError, Recovery => e
|
511
|
+
# Lexer encountered an illegal token or the parser encountered
|
512
|
+
# a terminal which is inappropriate for the current production.
|
513
|
+
# Perform error recovery to find a reasonable terminal based
|
514
|
+
# on the follow sets of the relevant productions. This includes
|
515
|
+
# remaining terms from the current production and the stacked
|
516
|
+
# productions
|
517
|
+
case e
|
518
|
+
when EBNF::LL1::Lexer::Error
|
519
|
+
@lexer.recover
|
520
|
+
begin
|
521
|
+
error("Lexer error", "With input '#{e.input}': #{e.message}",
|
522
|
+
production: production,
|
523
|
+
token: e.token)
|
524
|
+
rescue SyntaxError
|
525
|
+
end
|
526
|
+
end
|
527
|
+
raise EOFError, "End of input found when recovering" if @lexer.first.nil?
|
528
|
+
debug("recovery", "current token: #{@lexer.first.inspect}", :level => 4)
|
529
|
+
|
530
|
+
unless e.is_a?(Recovery)
|
531
|
+
# Get the list of follows for this sequence, this production and the stacked productions.
|
532
|
+
debug("recovery", "stack follows:", :level => 4)
|
533
|
+
@prod_stack.reverse.each do |prod|
|
534
|
+
debug("recovery", :level => 4) {" #{prod[:prod]}: #{prod[:recover_to].inspect}"}
|
535
|
+
end
|
536
|
+
end
|
537
|
+
|
538
|
+
# Find all follows to the top of the stack
|
539
|
+
follows = @prod_stack.map {|prod| Array(prod[:recover_to])}.flatten.compact.uniq
|
540
|
+
|
541
|
+
# Skip tokens until one is found in follows
|
542
|
+
while (token = (@lexer.first rescue @lexer.recover)) && follows.none? {|t| token === t}
|
543
|
+
skipped = @lexer.shift
|
544
|
+
progress("recovery") {"skip #{skipped.inspect}"}
|
545
|
+
end
|
546
|
+
debug("recovery") {"found #{token.inspect} in follows"}
|
547
|
+
|
548
|
+
# Re-raise the error unless token is a follows of this production
|
549
|
+
raise Recovery unless Array(recover_to).any? {|t| token === t}
|
550
|
+
|
551
|
+
# Skip that token to get something reasonable to start the next production with
|
552
|
+
@lexer.shift
|
553
|
+
ensure
|
554
|
+
progress("#{production}(finish)")
|
555
|
+
@depth -= 1
|
556
|
+
@prod_stack.pop
|
557
|
+
end
|
558
|
+
|
559
|
+
##
|
560
|
+
# Warning information, used as level `1` debug messages.
|
561
|
+
#
|
562
|
+
# @param [String] node Relevant location associated with message
|
563
|
+
# @param [String] message Error string
|
564
|
+
# @param [Hash] options
|
565
|
+
# @option options [URI, #to_s] :production
|
566
|
+
# @option options [Token] :token
|
567
|
+
# @see {#debug}
|
568
|
+
def warn(node, message, options = {})
|
569
|
+
m = "WARNING "
|
570
|
+
m += "[line: #{@lineno}] " if @lineno
|
571
|
+
m += message
|
572
|
+
m += " (found #{options[:token].inspect})" if options[:token]
|
573
|
+
m += ", production = #{options[:production].inspect}" if options[:production]
|
574
|
+
@warnings << m unless @recovering
|
575
|
+
debug(node, m, options.merge(:level => 1))
|
576
|
+
end
|
577
|
+
|
578
|
+
##
|
579
|
+
# Error information, used as level `0` debug messages.
|
580
|
+
#
|
581
|
+
# @overload debug(node, message, options)
|
582
|
+
# @param [String] node Relevant location associated with message
|
583
|
+
# @param [String] message Error string
|
584
|
+
# @param [Hash] options
|
585
|
+
# @option options [URI, #to_s] :production
|
586
|
+
# @option options [Token] :token
|
587
|
+
# @see {#debug}
|
588
|
+
def error(*args)
|
589
|
+
return if @recovering
|
590
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
591
|
+
lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
|
592
|
+
message = "#{args.join(': ')}"
|
593
|
+
m = "ERROR "
|
594
|
+
m += "[line: #{lineno}] " if lineno
|
595
|
+
m += message
|
596
|
+
m += " (found #{options[:token].inspect})" if options[:token]
|
597
|
+
m += ", production = #{options[:production].inspect}" if options[:production]
|
598
|
+
@recovering = true
|
599
|
+
@errors << m
|
600
|
+
debug(m, options.merge(level: 0))
|
601
|
+
raise SyntaxError.new(m, lineno: lineno, token: options[:token], production: options[:production])
|
602
|
+
end
|
603
|
+
|
604
|
+
##
|
605
|
+
# Progress output when debugging.
|
606
|
+
#
|
607
|
+
# The call is ignored, unless `@options[:debug]` is set, in which
|
608
|
+
# case it records tracing information as indicated. Additionally,
|
609
|
+
# if `@options[:debug]` is an Integer, the call is aborted if the
|
610
|
+
# `:level` option is less than than `:level`.
|
611
|
+
#
|
612
|
+
# @overload debug(node, message, options)
|
613
|
+
# @param [Array<String>] args Relevant location associated with message
|
614
|
+
# @param [Hash] options
|
615
|
+
# @option options [Integer] :depth
|
616
|
+
# Recursion depth for indenting output
|
617
|
+
# @option options [Integer] :level
|
618
|
+
# Level assigned to message, by convention, level `0` is for
|
619
|
+
# errors, level `1` is for warnings, level `2` is for parser
|
620
|
+
# progress information, and anything higher is for various levels
|
621
|
+
# of debug information.
|
622
|
+
#
|
623
|
+
# @yieldparam [:trace] trace
|
624
|
+
# @yieldparam [Integer] level
|
625
|
+
# @yieldparam [Integer] lineno
|
626
|
+
# @yieldparam [Integer] depth Recursive depth of productions
|
627
|
+
# @yieldparam [Array<String>] args
|
628
|
+
# @yieldreturn [String] added to message
|
629
|
+
def debug(*args)
|
630
|
+
return unless @options[:debug]
|
631
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
632
|
+
debug_level = options.fetch(:level, 3)
|
633
|
+
return if @options[:debug].is_a?(Integer) && debug_level > @options[:debug]
|
634
|
+
|
635
|
+
depth = options[:depth] || @depth
|
636
|
+
args << yield if block_given?
|
637
|
+
|
638
|
+
message = "#{args.join(': ')}"
|
639
|
+
d_str = depth > 100 ? ' ' * 100 + '+' : ' ' * depth
|
640
|
+
str = "[#{lineno}](#{debug_level})#{d_str}#{message}"
|
641
|
+
case @options[:debug]
|
642
|
+
when Array
|
643
|
+
@options[:debug] << str
|
644
|
+
when TrueClass
|
645
|
+
$stderr.puts str
|
646
|
+
when Integer
|
647
|
+
case debug_level
|
648
|
+
when 0 then return if @options[:errors]
|
649
|
+
when 1 then return if @options[:warnings]
|
650
|
+
end
|
651
|
+
$stderr.puts(str) if debug_level <= @options[:debug]
|
652
|
+
end
|
653
|
+
end
|
654
|
+
|
655
|
+
# Used for internal error recovery
|
656
|
+
class Recovery < StandardError; end
|
657
|
+
|
658
|
+
class SyntaxError < RDF::ReaderError
|
659
|
+
##
|
660
|
+
# The current production.
|
661
|
+
#
|
662
|
+
# @return [Symbol]
|
663
|
+
attr_reader :production
|
664
|
+
|
665
|
+
##
|
666
|
+
# The invalid token which triggered the error.
|
667
|
+
#
|
668
|
+
# @return [String]
|
669
|
+
attr_reader :token
|
670
|
+
|
671
|
+
##
|
672
|
+
# The line number where the error occurred.
|
673
|
+
#
|
674
|
+
# @return [Integer]
|
675
|
+
attr_reader :lineno
|
676
|
+
|
677
|
+
##
|
678
|
+
# Initializes a new syntax error instance.
|
679
|
+
#
|
680
|
+
# @param [String, #to_s] message
|
681
|
+
# @param [Hash{Symbol => Object}] options
|
682
|
+
# @option options [Symbol] :production (nil)
|
683
|
+
# @option options [String] :token (nil)
|
684
|
+
# @option options [Integer] :lineno (nil)
|
685
|
+
def initialize(message, options = {})
|
686
|
+
@production = options[:production]
|
687
|
+
@token = options[:token]
|
688
|
+
@lineno = options[:lineno] || (@token.lineno if @token.respond_to?(:lineno))
|
689
|
+
super(message.to_s)
|
690
|
+
end
|
691
|
+
end
|
414
692
|
end # class Reader
|
415
693
|
end # module RDF::Turtle
|