rdf-turtle 1.1.7 → 1.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +8 -15
- data/VERSION +1 -1
- data/lib/rdf/turtle/reader.rb +502 -224
- data/lib/rdf/turtle/streaming_writer.rb +4 -4
- data/lib/rdf/turtle/writer.rb +4 -1
- metadata +4 -25
- data/lib/rdf/turtle/meta.rb +0 -1461
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3eb247803e3c16fd5a6338c2175664deaeeea53c
|
4
|
+
data.tar.gz: cf0ea0c919bfc0421e91eb0dbd7337fd61dfe881
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0a461d97b2f6c0fb5a8e63cf0f07f427cbea96dbf38952b0da5dac2064464ff999237067a53d7ac18ddeaaff809a74db273f93095c6007b7d2a13f0e826a15e4
|
7
|
+
data.tar.gz: d145feb24bbda56cbdb7992becc7fe68c501efebac7e020bdcb6e87015e09189951f77b0516e2ad8db84f7197e4a055b4fb7564b603b5da1809eb7cffb1b0f7d
|
data/README.md
CHANGED
@@ -4,6 +4,8 @@
|
|
4
4
|
|
5
5
|
[![Gem Version](https://badge.fury.io/rb/rdf-turtle.png)](http://badge.fury.io/rb/rdf-turtle)
|
6
6
|
[![Build Status](https://travis-ci.org/ruby-rdf/rdf-turtle.png?branch=master)](http://travis-ci.org/ruby-rdf/rdf-turtle)
|
7
|
+
[![Coverage Status](https://coveralls.io/repos/ruby-rdf/rdf-turtle/badge.svg)](https://coveralls.io/r/ruby-rdf/rdf-turtle)
|
8
|
+
[![Dependency Status](https://gemnasium.com/ruby-rdf/rdf-turtle.png)](https://gemnasium.com/ruby-rdf/rdf-turtle)
|
7
9
|
|
8
10
|
## Description
|
9
11
|
This is a [Ruby][] implementation of a [Turtle][] parser for [RDF.rb][].
|
@@ -46,10 +48,7 @@ Full documentation available on [Rubydoc.info][Turtle doc]
|
|
46
48
|
### Variations from the spec
|
47
49
|
In some cases, the specification is unclear on certain issues:
|
48
50
|
|
49
|
-
* The LC version of the [Turtle][] specification separates rules for `@base` and `@prefix` with
|
50
|
-
closing '.' from the
|
51
|
-
SPARQL-like `BASE` and `PREFIX` without closing '.'. This version implements a more flexible
|
52
|
-
syntax where the `@` and closing `.` are optional and `base/prefix` are matched case independently.
|
51
|
+
* The LC version of the [Turtle][] specification separates rules for `@base` and `@prefix` with closing '.' from the SPARQL-like `BASE` and `PREFIX` without closing '.'. This version implements a more flexible syntax where the `@` and closing `.` are optional and `base/prefix` are matched case independently.
|
53
52
|
* Additionally, both `a` and `A` match `rdf:type`.
|
54
53
|
|
55
54
|
### Freebase-specific Reader
|
@@ -83,19 +82,13 @@ An example of reading Freebase dumps:
|
|
83
82
|
r.each_statement {|stmt| puts stmt.to_ntriples}
|
84
83
|
end
|
85
84
|
## Implementation Notes
|
86
|
-
|
87
|
-
the `Parser` and `Lexer` modules to implement the Turtle parser.
|
88
|
-
|
89
|
-
The parser takes branch and follow tables generated from the original [Turtle
|
90
|
-
EBNF Grammar][Turtle EBNF] described in the [specification][Turtle]. Branch and
|
91
|
-
Follow tables are specified in {RDF::Turtle::Meta}, which is in turn generated
|
92
|
-
using the [EBNF][] gem.
|
85
|
+
This version uses a hand-written parser using the Lexer from the [EBNF][] gem instead of a general [EBNF][] LL(1) parser for faster performance.
|
93
86
|
|
94
87
|
## Dependencies
|
95
88
|
|
96
|
-
* [Ruby](http://ruby-lang.org/) (>= 1.9.
|
97
|
-
* [RDF.rb](http://rubygems.org/gems/rdf) (
|
98
|
-
* [EBNF][] (
|
89
|
+
* [Ruby](http://ruby-lang.org/) (>= 1.9.3)
|
90
|
+
* [RDF.rb](http://rubygems.org/gems/rdf) (~> 1.1)
|
91
|
+
* [EBNF][] (~> 0.3)
|
99
92
|
|
100
93
|
## Installation
|
101
94
|
|
@@ -136,7 +129,7 @@ A copy of the [Turtle EBNF][] and derived parser files are included in the repos
|
|
136
129
|
[YARD]: http://yardoc.org/
|
137
130
|
[YARD-GS]: http://rubydoc.info/docs/yard/file/docs/GettingStarted.md
|
138
131
|
[PDD]: http://lists.w3.org/Archives/Public/public-rdf-ruby/2010May/0013.html
|
139
|
-
[RDF.rb]: http://rubydoc.info/github/ruby-rdf/rdf
|
132
|
+
[RDF.rb]: http://rubydoc.info/github/ruby-rdf/rdf
|
140
133
|
[EBNF]: http://rubygems.org/gems/ebnf
|
141
134
|
[Backports]: http://rubygems.org/gems/backports
|
142
135
|
[N-Triples]: http://www.w3.org/TR/rdf-testcases/#ntriples
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.1.
|
1
|
+
1.1.8
|
data/lib/rdf/turtle/reader.rb
CHANGED
@@ -1,203 +1,44 @@
|
|
1
|
-
|
2
|
-
require 'ebnf/ll1/
|
1
|
+
# coding: utf-8
|
2
|
+
require 'ebnf/ll1/lexer'
|
3
3
|
|
4
4
|
module RDF::Turtle
|
5
5
|
##
|
6
6
|
# A parser for the Turtle 2
|
7
7
|
class Reader < RDF::Reader
|
8
8
|
format Format
|
9
|
-
include RDF::Turtle::Meta
|
10
9
|
include EBNF::LL1::Parser
|
11
10
|
include RDF::Turtle::Terminals
|
12
11
|
|
13
12
|
# Terminals passed to lexer. Order matters!
|
14
|
-
terminal(:ANON,
|
15
|
-
|
16
|
-
|
17
|
-
terminal(:
|
18
|
-
|
19
|
-
|
20
|
-
terminal(:
|
21
|
-
|
22
|
-
|
23
|
-
terminal(:
|
24
|
-
|
25
|
-
|
26
|
-
value = token.value.sub(/\.([eE])/, '.0\1')
|
27
|
-
input[:resource] = literal(value, datatype: RDF::XSD.double)
|
28
|
-
end
|
29
|
-
terminal(:DECIMAL, DECIMAL) do |prod, token, input|
|
30
|
-
# Note that a Turtle Decimal may begin with a '.', so tack on a leading
|
31
|
-
# zero if necessary
|
32
|
-
value = token.value
|
33
|
-
value = "0#{token.value}" if token.value[0,1] == "."
|
34
|
-
input[:resource] = literal(value, datatype: RDF::XSD.decimal)
|
35
|
-
end
|
36
|
-
terminal(:INTEGER, INTEGER) do |prod, token, input|
|
37
|
-
input[:resource] = literal(token.value, datatype: RDF::XSD.integer)
|
38
|
-
end
|
39
|
-
# Spec confusion: spec says : "Literals , prefixed names and IRIs may also contain escape sequences"
|
40
|
-
terminal(:PNAME_LN, PNAME_LN, unescape: true) do |prod, token, input|
|
41
|
-
prefix, suffix = token.value.split(":", 2)
|
42
|
-
input[:resource] = pname(prefix, suffix)
|
43
|
-
end
|
44
|
-
# Spec confusion: spec says : "Literals , prefixed names and IRIs may also contain escape sequences"
|
45
|
-
terminal(:PNAME_NS, PNAME_NS) do |prod, token, input|
|
46
|
-
prefix = token.value[0..-2]
|
47
|
-
|
48
|
-
# Two contexts, one when prefix is being defined, the other when being used
|
49
|
-
case prod
|
50
|
-
when :prefixID, :sparqlPrefix
|
51
|
-
input[:prefix] = prefix
|
52
|
-
else
|
53
|
-
input[:resource] = pname(prefix, '')
|
54
|
-
end
|
55
|
-
end
|
56
|
-
terminal(:STRING_LITERAL_LONG_SINGLE_QUOTE, STRING_LITERAL_LONG_SINGLE_QUOTE, unescape: true) do |prod, token, input|
|
57
|
-
input[:string_value] = token.value[3..-4]
|
58
|
-
end
|
59
|
-
terminal(:STRING_LITERAL_LONG_QUOTE, STRING_LITERAL_LONG_QUOTE, unescape: true) do |prod, token, input|
|
60
|
-
input[:string_value] = token.value[3..-4]
|
61
|
-
end
|
62
|
-
terminal(:STRING_LITERAL_QUOTE, STRING_LITERAL_QUOTE, unescape: true) do |prod, token, input|
|
63
|
-
input[:string_value] = token.value[1..-2]
|
64
|
-
end
|
65
|
-
terminal(:STRING_LITERAL_SINGLE_QUOTE, STRING_LITERAL_SINGLE_QUOTE, unescape: true) do |prod, token, input|
|
66
|
-
input[:string_value] = token.value[1..-2]
|
67
|
-
end
|
13
|
+
terminal(:ANON, ANON)
|
14
|
+
terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL)
|
15
|
+
terminal(:IRIREF, IRIREF, unescape: true)
|
16
|
+
terminal(:DOUBLE, DOUBLE)
|
17
|
+
terminal(:DECIMAL, DECIMAL)
|
18
|
+
terminal(:INTEGER, INTEGER)
|
19
|
+
terminal(:PNAME_LN, PNAME_LN, unescape: true)
|
20
|
+
terminal(:PNAME_NS, PNAME_NS)
|
21
|
+
terminal(:STRING_LITERAL_LONG_SINGLE_QUOTE, STRING_LITERAL_LONG_SINGLE_QUOTE, unescape: true)
|
22
|
+
terminal(:STRING_LITERAL_LONG_QUOTE, STRING_LITERAL_LONG_QUOTE, unescape: true)
|
23
|
+
terminal(:STRING_LITERAL_QUOTE, STRING_LITERAL_QUOTE, unescape: true)
|
24
|
+
terminal(:STRING_LITERAL_SINGLE_QUOTE, STRING_LITERAL_SINGLE_QUOTE, unescape: true)
|
68
25
|
|
69
26
|
# String terminals
|
70
|
-
terminal(nil,
|
71
|
-
case token.value
|
72
|
-
when 'A', 'a' then input[:resource] = RDF.type
|
73
|
-
when 'true', 'false' then input[:resource] = RDF::Literal::Boolean.new(token.value)
|
74
|
-
when '@base', '@prefix' then input[:lang] = token.value[1..-1]
|
75
|
-
when '.' then input[:terminated] = true
|
76
|
-
else input[:string] = token.value
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
terminal(:PREFIX, PREFIX) do |prod, token, input|
|
81
|
-
input[:string_value] = token.value
|
82
|
-
end
|
83
|
-
terminal(:BASE, BASE) do |prod, token, input|
|
84
|
-
input[:string_value] = token.value
|
85
|
-
end
|
86
|
-
|
87
|
-
terminal(:LANGTAG, LANGTAG) do |prod, token, input|
|
88
|
-
input[:lang] = token.value[1..-1]
|
89
|
-
end
|
27
|
+
terminal(nil, %r([\(\),.;\[\]Aa]|\^\^|true|false))
|
90
28
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
prefix = current[:prefix]
|
95
|
-
iri = current[:resource]
|
96
|
-
lexical = current[:string_value]
|
97
|
-
terminated = current[:terminated]
|
98
|
-
debug("prefixID") {"Defined prefix #{prefix.inspect} mapping to #{iri.inspect}"}
|
99
|
-
if lexical.start_with?('@') && lexical != '@prefix'
|
100
|
-
error(:prefixID, "should be downcased")
|
101
|
-
elsif lexical == '@prefix'
|
102
|
-
error(:prefixID, "directive not terminated") unless terminated
|
103
|
-
else
|
104
|
-
error(:prefixID, "directive should not be terminated") if terminated
|
105
|
-
end
|
106
|
-
prefix(prefix, iri)
|
107
|
-
end
|
108
|
-
|
109
|
-
# [5] base set base_uri
|
110
|
-
production(:base) do |input, current, callback|
|
111
|
-
iri = current[:resource]
|
112
|
-
lexical = current[:string_value]
|
113
|
-
terminated = current[:terminated]
|
114
|
-
debug("base") {"Defined base as #{iri}"}
|
115
|
-
if lexical.start_with?('@') && lexical != '@base'
|
116
|
-
error(:base, "should be downcased")
|
117
|
-
elsif lexical == '@base'
|
118
|
-
error(:base, "directive not terminated") unless terminated
|
119
|
-
else
|
120
|
-
error(:base, "directive should not be terminated") if terminated
|
121
|
-
end
|
122
|
-
options[:base_uri] = iri
|
123
|
-
end
|
124
|
-
|
125
|
-
# [6] triples
|
126
|
-
start_production(:triples) do |input, current, callback|
|
127
|
-
# Note production as triples for blankNodePropertyList
|
128
|
-
# to set :subject instead of :resource
|
129
|
-
current[:triples] = true
|
130
|
-
end
|
131
|
-
production(:triples) do |input, current, callback|
|
132
|
-
# Note production as triples for blankNodePropertyList
|
133
|
-
# to set :subject instead of :resource
|
134
|
-
current[:triples] = true
|
135
|
-
end
|
29
|
+
terminal(:PREFIX, PREFIX)
|
30
|
+
terminal(:BASE, BASE)
|
31
|
+
terminal(:LANGTAG, LANGTAG)
|
136
32
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
# [10] subject ::= IRIref | BlankNode | collection
|
143
|
-
start_production(:subject) do |input, current, callback|
|
144
|
-
current[:triples] = nil
|
145
|
-
end
|
146
|
-
|
147
|
-
production(:subject) do |input, current, callback|
|
148
|
-
input[:subject] = current[:resource]
|
149
|
-
end
|
150
|
-
|
151
|
-
# [12] object ::= iri | BlankNode | collection | blankNodePropertyList | literal
|
152
|
-
production(:object) do |input, current, callback|
|
153
|
-
if input[:object_list]
|
154
|
-
# Part of an rdf:List collection
|
155
|
-
input[:object_list] << current[:resource]
|
156
|
-
else
|
157
|
-
debug("object") {"current: #{current.inspect}"}
|
158
|
-
callback.call(:statement, "object", input[:subject], input[:predicate], current[:resource])
|
159
|
-
end
|
160
|
-
end
|
161
|
-
|
162
|
-
# [14] blankNodePropertyList ::= "[" predicateObjectList "]"
|
163
|
-
start_production(:blankNodePropertyList) do |input, current, callback|
|
164
|
-
current[:subject] = self.bnode
|
165
|
-
end
|
166
|
-
|
167
|
-
production(:blankNodePropertyList) do |input, current, callback|
|
168
|
-
if input[:triples]
|
169
|
-
input[:subject] = current[:subject]
|
170
|
-
else
|
171
|
-
input[:resource] = current[:subject]
|
172
|
-
end
|
173
|
-
end
|
174
|
-
|
175
|
-
# [15] collection ::= "(" object* ")"
|
176
|
-
start_production(:collection) do |input, current, callback|
|
177
|
-
# Tells the object production to collect and not generate statements
|
178
|
-
current[:object_list] = []
|
179
|
-
end
|
180
|
-
|
181
|
-
production(:collection) do |input, current, callback|
|
182
|
-
# Create an RDF list
|
183
|
-
objects = current[:object_list]
|
184
|
-
list = RDF::List[*objects]
|
185
|
-
list.each_statement do |statement|
|
186
|
-
next if statement.predicate == RDF.type && statement.object == RDF.List
|
187
|
-
callback.call(:statement, "collection", statement.subject, statement.predicate, statement.object)
|
188
|
-
end
|
33
|
+
##
|
34
|
+
# Accumulated errors found during processing
|
35
|
+
# @return [Array<String>]
|
36
|
+
attr_reader :errors
|
189
37
|
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
# [16] RDFLiteral ::= String ( LanguageTag | ( "^^" IRIref ) )?
|
195
|
-
production(:RDFLiteral) do |input, current, callback|
|
196
|
-
opts = {}
|
197
|
-
opts[:datatype] = current[:resource] if current[:resource]
|
198
|
-
opts[:language] = current[:lang] if current[:lang]
|
199
|
-
input[:resource] = literal(current[:string_value], opts)
|
200
|
-
end
|
38
|
+
##
|
39
|
+
# Accumulated warnings found during processing
|
40
|
+
# @return [Array<String>]
|
41
|
+
attr_reader :warnings
|
201
42
|
|
202
43
|
##
|
203
44
|
# Redirect for Freebase Reader
|
@@ -229,13 +70,13 @@ module RDF::Turtle
|
|
229
70
|
# the base URI to use when resolving relative URIs (for acessing intermediate parser productions)
|
230
71
|
# @option options [#to_s] :anon_base ("b0")
|
231
72
|
# Basis for generating anonymous Nodes
|
232
|
-
# @option options [Boolean] :resolve_uris (false)
|
233
|
-
# Resolve prefix and relative IRIs, otherwise, when serializing the parsed SSE
|
234
|
-
# as S-Expressions, use the original prefixed and relative URIs along with `base` and `prefix`
|
235
|
-
# definitions.
|
236
73
|
# @option options [Boolean] :validate (false)
|
237
74
|
# whether to validate the parsed statements and values. If not validating,
|
238
75
|
# the parser will attempt to recover from errors.
|
76
|
+
# @option options [Array] :errors
|
77
|
+
# array for placing errors found when parsing
|
78
|
+
# @option options [Array] :warnings
|
79
|
+
# array for placing warnings found when parsing
|
239
80
|
# @option options [Boolean] :progress
|
240
81
|
# Show progress of parser productions
|
241
82
|
# @option options [Boolean, Integer, Array] :debug
|
@@ -255,6 +96,11 @@ module RDF::Turtle
|
|
255
96
|
whitespace: WS,
|
256
97
|
}.merge(options)
|
257
98
|
@options = {prefixes: {nil => ""}}.merge(@options) unless @options[:validate]
|
99
|
+
@errors = @options[:errors] || []
|
100
|
+
@warnings = @options[:warnings] || []
|
101
|
+
@depth = 0
|
102
|
+
@prod_stack = []
|
103
|
+
|
258
104
|
@options[:debug] ||= case
|
259
105
|
when RDF::Turtle.debug? then true
|
260
106
|
when @options[:progress] then 2
|
@@ -268,6 +114,8 @@ module RDF::Turtle
|
|
268
114
|
debug("canonicalize") {canonicalize?.inspect}
|
269
115
|
debug("intern") {intern?.inspect}
|
270
116
|
|
117
|
+
@lexer = EBNF::LL1::Lexer.new(input, self.class.patterns, @options)
|
118
|
+
|
271
119
|
if block_given?
|
272
120
|
case block.arity
|
273
121
|
when 0 then instance_eval(&block)
|
@@ -289,41 +137,28 @@ module RDF::Turtle
|
|
289
137
|
# @return [void]
|
290
138
|
def each_statement(&block)
|
291
139
|
if block_given?
|
140
|
+
@recovering = false
|
292
141
|
@callback = block
|
293
142
|
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
case @options[:debug]
|
310
|
-
when Array
|
311
|
-
@options[:debug] << str
|
312
|
-
when TrueClass
|
313
|
-
$stderr.puts str
|
314
|
-
when Integer
|
315
|
-
$stderr.puts(str) if level <= @options[:debug]
|
316
|
-
end
|
143
|
+
begin
|
144
|
+
while (@lexer.first rescue true)
|
145
|
+
read_statement
|
146
|
+
end
|
147
|
+
rescue EBNF::LL1::Lexer::Error, SyntaxError, EOFError, Recovery
|
148
|
+
# Terminate loop if EOF found while recovering
|
149
|
+
end
|
150
|
+
|
151
|
+
if validate?
|
152
|
+
if !warnings.empty? && !@options[:warnings]
|
153
|
+
$stderr.puts "Warnings: #{warnings.join("\n")}"
|
154
|
+
end
|
155
|
+
if !errors.empty?
|
156
|
+
$stderr.puts "Errors: #{errors.join("\n")}" unless @options[:errors]
|
157
|
+
raise RDF::ReaderError, "Errors found during processing"
|
317
158
|
end
|
318
159
|
end
|
319
160
|
end
|
320
161
|
enum_for(:each_statement)
|
321
|
-
rescue EBNF::LL1::Parser::Error, EBNF::LL1::Lexer::Error => e
|
322
|
-
if validate?
|
323
|
-
raise RDF::ReaderError.new(e.message, lineno: e.lineno, token: e.token)
|
324
|
-
else
|
325
|
-
$stderr.puts e.message
|
326
|
-
end
|
327
162
|
end
|
328
163
|
|
329
164
|
##
|
@@ -345,13 +180,12 @@ module RDF::Turtle
|
|
345
180
|
|
346
181
|
# add a statement, object can be literal or URI or bnode
|
347
182
|
#
|
348
|
-
# @param [
|
183
|
+
# @param [Symbol] production
|
349
184
|
# @param [RDF::Statement] statement the subject of the statement
|
350
185
|
# @return [RDF::Statement] Added statement
|
351
186
|
# @raise [RDF::ReaderError] Checks parameter types and raises if they are incorrect if parsing mode is _validate_.
|
352
|
-
def add_statement(
|
353
|
-
error(
|
354
|
-
progress(node) {"generate statement: #{statement.to_ntriples}"}
|
187
|
+
def add_statement(production, statement)
|
188
|
+
error("Statement is invalid: #{statement.inspect.inspect}", production: produciton) if validate? && statement.invalid?
|
355
189
|
@callback.call(statement) if statement.subject &&
|
356
190
|
statement.predicate &&
|
357
191
|
statement.object &&
|
@@ -360,11 +194,15 @@ module RDF::Turtle
|
|
360
194
|
|
361
195
|
# Process a URI against base
|
362
196
|
def process_iri(iri)
|
363
|
-
|
197
|
+
iri = iri.value[1..-2] if iri === :IRIREF
|
198
|
+
value = RDF::URI(iri)
|
199
|
+
value = base_uri.join(value) if value.relative?
|
364
200
|
value.validate! if validate?
|
365
201
|
value.canonicalize! if canonicalize?
|
366
202
|
value = RDF::URI.intern(value) if intern?
|
367
203
|
value
|
204
|
+
rescue ArgumentError => e
|
205
|
+
error("process_iri", e)
|
368
206
|
end
|
369
207
|
|
370
208
|
# Create a literal
|
@@ -376,6 +214,8 @@ module RDF::Turtle
|
|
376
214
|
"c14n?: #{canonicalize?.inspect}"
|
377
215
|
end
|
378
216
|
RDF::Literal.new(value, options.merge(validate: validate?, canonicalize: canonicalize?))
|
217
|
+
rescue ArgumentError => e
|
218
|
+
error("Argument Error #{e.message}", production: :literal, token: @lexer.first)
|
379
219
|
end
|
380
220
|
|
381
221
|
##
|
@@ -397,7 +237,7 @@ module RDF::Turtle
|
|
397
237
|
if prefix(prefix)
|
398
238
|
base = prefix(prefix).to_s
|
399
239
|
elsif !prefix(prefix)
|
400
|
-
error("
|
240
|
+
error("undefined prefix", production: :pname, token: prefix)
|
401
241
|
base = ''
|
402
242
|
end
|
403
243
|
suffix = suffix.to_s.sub(/^\#/, "") if base.index("#")
|
@@ -411,5 +251,443 @@ module RDF::Turtle
|
|
411
251
|
@bnode_cache ||= {}
|
412
252
|
@bnode_cache[value.to_s] ||= RDF::Node.new(value)
|
413
253
|
end
|
254
|
+
|
255
|
+
protected
|
256
|
+
# @return [void]
|
257
|
+
def read_statement
|
258
|
+
prod(:statement, %w{.}) do
|
259
|
+
error("read_statement", "Unexpected end of file") unless token = @lexer.first
|
260
|
+
case token.type
|
261
|
+
when :BASE, :PREFIX
|
262
|
+
read_directive || error("Failed to parse directive", production: :directive, token: token)
|
263
|
+
else
|
264
|
+
read_triples || error("Expected token", production: :statement, token: token)
|
265
|
+
if !@recovering || @lexer.first === '.'
|
266
|
+
# If recovering, we will have eaten the closing '.'
|
267
|
+
token = @lexer.shift
|
268
|
+
unless token && token.value == '.'
|
269
|
+
error("Expected '.' following triple", production: :statement, token: token)
|
270
|
+
end
|
271
|
+
end
|
272
|
+
end
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
# @return [void]
|
277
|
+
def read_directive
|
278
|
+
prod(:directive, %w{.}) do
|
279
|
+
token = @lexer.first
|
280
|
+
case token.type
|
281
|
+
when :BASE
|
282
|
+
prod(:base) do
|
283
|
+
@lexer.shift
|
284
|
+
terminated = token.value == '@base'
|
285
|
+
iri = @lexer.shift
|
286
|
+
error("Expected IRIREF", :production => :base, token: iri) unless iri === :IRIREF
|
287
|
+
@options[:base_uri] = process_iri(iri)
|
288
|
+
error("base", "#{token} should be downcased") if token.value.start_with?('@') && token.value != '@base'
|
289
|
+
|
290
|
+
if terminated
|
291
|
+
error("base", "Expected #{token} to be terminated") unless @lexer.first === '.'
|
292
|
+
@lexer.shift
|
293
|
+
elsif @lexer.first === '.'
|
294
|
+
error("base", "Expected #{token} not to be terminated")
|
295
|
+
else
|
296
|
+
true
|
297
|
+
end
|
298
|
+
end
|
299
|
+
when :PREFIX
|
300
|
+
prod(:prefixID, %w{.}) do
|
301
|
+
@lexer.shift
|
302
|
+
pfx, iri = @lexer.shift, @lexer.shift
|
303
|
+
terminated = token.value == '@prefix'
|
304
|
+
error("Expected PNAME_NS", :production => :prefix, token: pfx) unless pfx === :PNAME_NS
|
305
|
+
error("Expected IRIREF", :production => :prefix, token: iri) unless iri === :IRIREF
|
306
|
+
debug("prefixID") {"Defined prefix #{pfx.inspect} mapping to #{iri.inspect}"}
|
307
|
+
prefix(pfx.value[0..-2], process_iri(iri))
|
308
|
+
error("prefixId", "#{token} should be downcased") if token.value.start_with?('@') && token.value != '@prefix'
|
309
|
+
|
310
|
+
if terminated
|
311
|
+
error("prefixID", "Expected #{token} to be terminated") unless @lexer.first === '.'
|
312
|
+
@lexer.shift
|
313
|
+
elsif @lexer.first === '.'
|
314
|
+
error("prefixID", "Expected #{token} not to be terminated")
|
315
|
+
else
|
316
|
+
true
|
317
|
+
end
|
318
|
+
end
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
# @return [Object] returns the last verb matched, or subject BNode on predicateObjectList?
|
324
|
+
def read_triples
|
325
|
+
prod(:triples, %w{.}) do
|
326
|
+
error("read_triples", "Unexpected end of file") unless token = @lexer.first
|
327
|
+
case token.type || token.value
|
328
|
+
when '['
|
329
|
+
# blankNodePropertyList predicateObjectList?
|
330
|
+
subject = read_blankNodePropertyList || error("Failed to parse blankNodePropertyList", production: :triples, token: @lexer.first)
|
331
|
+
read_predicateObjectList(subject) || subject
|
332
|
+
else
|
333
|
+
# subject predicateObjectList
|
334
|
+
subject = read_subject || error("Failed to parse subject", production: :triples, token: @lexer.first)
|
335
|
+
read_predicateObjectList(subject) || error("Expected predicateObjectList", production: :triples, token: @lexer.first)
|
336
|
+
end
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
# @param [RDF::Resource] subject
|
341
|
+
# @return [RDF::URI] the last matched verb
|
342
|
+
def read_predicateObjectList(subject)
|
343
|
+
prod(:predicateObjectList, %{;}) do
|
344
|
+
last_verb = nil
|
345
|
+
while verb = read_verb
|
346
|
+
last_verb = verb
|
347
|
+
prod(:_predicateObjectList_5) do
|
348
|
+
read_objectList(subject, verb) || error("Expected objectList", production: :predicateObjectList, token: @lexer.first)
|
349
|
+
end
|
350
|
+
break unless @lexer.first === ';'
|
351
|
+
@lexer.shift while @lexer.first === ';'
|
352
|
+
end
|
353
|
+
last_verb
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
357
|
+
# @return [RDF::Term] the last matched subject
|
358
|
+
def read_objectList(subject, predicate)
|
359
|
+
prod(:objectList, %{,}) do
|
360
|
+
last_object = nil
|
361
|
+
while object = prod(:_objectList_2) {read_object(subject, predicate)}
|
362
|
+
last_object = object
|
363
|
+
break unless @lexer.first === ','
|
364
|
+
@lexer.shift while @lexer.first === ','
|
365
|
+
end
|
366
|
+
last_object
|
367
|
+
end
|
368
|
+
end
|
369
|
+
|
370
|
+
# @return [RDF::URI]
|
371
|
+
def read_verb
|
372
|
+
error("read_verb", "Unexpected end of file") unless token = @lexer.first
|
373
|
+
case token.type || token.value
|
374
|
+
when 'a' then prod(:verb) {@lexer.shift && RDF.type}
|
375
|
+
else prod(:verb) {read_iri}
|
376
|
+
end
|
377
|
+
end
|
378
|
+
|
379
|
+
# @return [RDF::Resource]
|
380
|
+
def read_subject
|
381
|
+
prod(:subject) do
|
382
|
+
read_iri ||
|
383
|
+
read_BlankNode ||
|
384
|
+
read_collection ||
|
385
|
+
error( "Expected subject", production: :subject, token: @lexer.first)
|
386
|
+
end
|
387
|
+
end
|
388
|
+
|
389
|
+
# @return [void]
|
390
|
+
def read_object(subject = nil, predicate = nil)
|
391
|
+
prod(:object) do
|
392
|
+
if object = read_iri ||
|
393
|
+
read_BlankNode ||
|
394
|
+
read_collection ||
|
395
|
+
read_blankNodePropertyList ||
|
396
|
+
read_literal
|
397
|
+
|
398
|
+
add_statement(:object, RDF::Statement(subject, predicate, object)) if subject && predicate
|
399
|
+
object
|
400
|
+
end
|
401
|
+
end
|
402
|
+
end
|
403
|
+
|
404
|
+
# @return [RDF::Literal]
|
405
|
+
def read_literal
|
406
|
+
error("Unexpected end of file", production: :literal) unless token = @lexer.first
|
407
|
+
case token.type || token.value
|
408
|
+
when :INTEGER then prod(:literal) {literal(@lexer.shift.value, datatype: RDF::XSD.integer)}
|
409
|
+
when :DECIMAL
|
410
|
+
prod(:litearl) do
|
411
|
+
value = @lexer.shift.value
|
412
|
+
value = "0#{value}" if value.start_with?(".")
|
413
|
+
literal(value, datatype: RDF::XSD.decimal)
|
414
|
+
end
|
415
|
+
when :DOUBLE then prod(:literal) {literal(@lexer.shift.value.sub(/\.([eE])/, '.0\1'), datatype: RDF::XSD.double)}
|
416
|
+
when "true", "false" then prod(:literal) {literal(@lexer.shift.value, datatype: RDF::XSD.boolean)}
|
417
|
+
when :STRING_LITERAL_QUOTE, :STRING_LITERAL_SINGLE_QUOTE
|
418
|
+
prod(:literal) do
|
419
|
+
value = @lexer.shift.value[1..-2]
|
420
|
+
error("read_literal", "Unexpected end of file") unless token = @lexer.first
|
421
|
+
case token.type || token.value
|
422
|
+
when :LANGTAG
|
423
|
+
literal(value, language: @lexer.shift.value[1..-1].to_sym)
|
424
|
+
when '^^'
|
425
|
+
@lexer.shift
|
426
|
+
literal(value, datatype: read_iri)
|
427
|
+
else
|
428
|
+
literal(value)
|
429
|
+
end
|
430
|
+
end
|
431
|
+
when :STRING_LITERAL_LONG_QUOTE, :STRING_LITERAL_LONG_SINGLE_QUOTE
|
432
|
+
prod(:literal) do
|
433
|
+
value = @lexer.shift.value[3..-4]
|
434
|
+
error("read_literal", "Unexpected end of file") unless token = @lexer.first
|
435
|
+
case token.type || token.value
|
436
|
+
when :LANGTAG
|
437
|
+
literal(value, language: @lexer.shift.value[1..-1].to_sym)
|
438
|
+
when '^^'
|
439
|
+
@lexer.shift
|
440
|
+
literal(value, datatype: read_iri)
|
441
|
+
else
|
442
|
+
literal(value)
|
443
|
+
end
|
444
|
+
end
|
445
|
+
end
|
446
|
+
end
|
447
|
+
|
448
|
+
# @return [RDF::Node]
|
449
|
+
def read_blankNodePropertyList
|
450
|
+
token = @lexer.first
|
451
|
+
if token === '['
|
452
|
+
prod(:blankNodePropertyList, %{]}) do
|
453
|
+
@lexer.shift
|
454
|
+
progress("blankNodePropertyList") {"token: #{token.inspect}"}
|
455
|
+
node = bnode
|
456
|
+
read_predicateObjectList(node)
|
457
|
+
error("blankNodePropertyList", "Expected closing ']'") unless @lexer.first === ']'
|
458
|
+
@lexer.shift
|
459
|
+
node
|
460
|
+
end
|
461
|
+
end
|
462
|
+
end
|
463
|
+
|
464
|
+
# @return [RDF::Node]
|
465
|
+
def read_collection
|
466
|
+
if @lexer.first === '('
|
467
|
+
prod(:collection, %{)}) do
|
468
|
+
@lexer.shift
|
469
|
+
token = @lexer.first
|
470
|
+
progress("collection") {"token: #{token.inspect}"}
|
471
|
+
objects = []
|
472
|
+
while object = read_object
|
473
|
+
objects << object
|
474
|
+
end
|
475
|
+
list = RDF::List.new(nil, nil, objects)
|
476
|
+
list.each_statement do |statement|
|
477
|
+
add_statement("collection", statement)
|
478
|
+
end
|
479
|
+
error("collection", "Expected closing ')'") unless @lexer.first === ')'
|
480
|
+
@lexer.shift
|
481
|
+
list.subject
|
482
|
+
end
|
483
|
+
end
|
484
|
+
end
|
485
|
+
|
486
|
+
# @return [RDF::URI]
|
487
|
+
def read_iri
|
488
|
+
token = @lexer.first
|
489
|
+
case token && token.type
|
490
|
+
when :IRIREF then prod(:iri) {process_iri(@lexer.shift)}
|
491
|
+
when :PNAME_LN, :PNAME_NS then prod(:iri) {pname(*@lexer.shift.value.split(':', 2))}
|
492
|
+
end
|
493
|
+
end
|
494
|
+
|
495
|
+
# @return [RDF::Node]
|
496
|
+
def read_BlankNode
|
497
|
+
token = @lexer.first
|
498
|
+
case token && token.type
|
499
|
+
when :BLANK_NODE_LABEL then prod(:BlankNode) {bnode(@lexer.shift.value[2..-1])}
|
500
|
+
when :ANON then @lexer.shift && prod(:BlankNode) {bnode}
|
501
|
+
end
|
502
|
+
end
|
503
|
+
|
504
|
+
def prod(production, recover_to = [])
|
505
|
+
@prod_stack << {prod: production, recover_to: recover_to}
|
506
|
+
@depth += 1
|
507
|
+
@recovering = false
|
508
|
+
progress("#{production}(start)") {"token: #{@lexer.first.inspect}"}
|
509
|
+
yield
|
510
|
+
rescue EBNF::LL1::Lexer::Error, SyntaxError, Recovery => e
|
511
|
+
# Lexer encountered an illegal token or the parser encountered
|
512
|
+
# a terminal which is inappropriate for the current production.
|
513
|
+
# Perform error recovery to find a reasonable terminal based
|
514
|
+
# on the follow sets of the relevant productions. This includes
|
515
|
+
# remaining terms from the current production and the stacked
|
516
|
+
# productions
|
517
|
+
case e
|
518
|
+
when EBNF::LL1::Lexer::Error
|
519
|
+
@lexer.recover
|
520
|
+
begin
|
521
|
+
error("Lexer error", "With input '#{e.input}': #{e.message}",
|
522
|
+
production: production,
|
523
|
+
token: e.token)
|
524
|
+
rescue SyntaxError
|
525
|
+
end
|
526
|
+
end
|
527
|
+
raise EOFError, "End of input found when recovering" if @lexer.first.nil?
|
528
|
+
debug("recovery", "current token: #{@lexer.first.inspect}", :level => 4)
|
529
|
+
|
530
|
+
unless e.is_a?(Recovery)
|
531
|
+
# Get the list of follows for this sequence, this production and the stacked productions.
|
532
|
+
debug("recovery", "stack follows:", :level => 4)
|
533
|
+
@prod_stack.reverse.each do |prod|
|
534
|
+
debug("recovery", :level => 4) {" #{prod[:prod]}: #{prod[:recover_to].inspect}"}
|
535
|
+
end
|
536
|
+
end
|
537
|
+
|
538
|
+
# Find all follows to the top of the stack
|
539
|
+
follows = @prod_stack.map {|prod| Array(prod[:recover_to])}.flatten.compact.uniq
|
540
|
+
|
541
|
+
# Skip tokens until one is found in follows
|
542
|
+
while (token = (@lexer.first rescue @lexer.recover)) && follows.none? {|t| token === t}
|
543
|
+
skipped = @lexer.shift
|
544
|
+
progress("recovery") {"skip #{skipped.inspect}"}
|
545
|
+
end
|
546
|
+
debug("recovery") {"found #{token.inspect} in follows"}
|
547
|
+
|
548
|
+
# Re-raise the error unless token is a follows of this production
|
549
|
+
raise Recovery unless Array(recover_to).any? {|t| token === t}
|
550
|
+
|
551
|
+
# Skip that token to get something reasonable to start the next production with
|
552
|
+
@lexer.shift
|
553
|
+
ensure
|
554
|
+
progress("#{production}(finish)")
|
555
|
+
@depth -= 1
|
556
|
+
@prod_stack.pop
|
557
|
+
end
|
558
|
+
|
559
|
+
##
|
560
|
+
# Warning information, used as level `1` debug messages.
|
561
|
+
#
|
562
|
+
# @param [String] node Relevant location associated with message
|
563
|
+
# @param [String] message Error string
|
564
|
+
# @param [Hash] options
|
565
|
+
# @option options [URI, #to_s] :production
|
566
|
+
# @option options [Token] :token
|
567
|
+
# @see {#debug}
|
568
|
+
def warn(node, message, options = {})
|
569
|
+
m = "WARNING "
|
570
|
+
m += "[line: #{@lineno}] " if @lineno
|
571
|
+
m += message
|
572
|
+
m += " (found #{options[:token].inspect})" if options[:token]
|
573
|
+
m += ", production = #{options[:production].inspect}" if options[:production]
|
574
|
+
@warnings << m unless @recovering
|
575
|
+
debug(node, m, options.merge(:level => 1))
|
576
|
+
end
|
577
|
+
|
578
|
+
##
|
579
|
+
# Error information, used as level `0` debug messages.
|
580
|
+
#
|
581
|
+
# @overload debug(node, message, options)
|
582
|
+
# @param [String] node Relevant location associated with message
|
583
|
+
# @param [String] message Error string
|
584
|
+
# @param [Hash] options
|
585
|
+
# @option options [URI, #to_s] :production
|
586
|
+
# @option options [Token] :token
|
587
|
+
# @see {#debug}
|
588
|
+
def error(*args)
|
589
|
+
return if @recovering
|
590
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
591
|
+
lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
|
592
|
+
message = "#{args.join(': ')}"
|
593
|
+
m = "ERROR "
|
594
|
+
m += "[line: #{lineno}] " if lineno
|
595
|
+
m += message
|
596
|
+
m += " (found #{options[:token].inspect})" if options[:token]
|
597
|
+
m += ", production = #{options[:production].inspect}" if options[:production]
|
598
|
+
@recovering = true
|
599
|
+
@errors << m
|
600
|
+
debug(m, options.merge(level: 0))
|
601
|
+
raise SyntaxError.new(m, lineno: lineno, token: options[:token], production: options[:production])
|
602
|
+
end
|
603
|
+
|
604
|
+
##
|
605
|
+
# Progress output when debugging.
|
606
|
+
#
|
607
|
+
# The call is ignored, unless `@options[:debug]` is set, in which
|
608
|
+
# case it records tracing information as indicated. Additionally,
|
609
|
+
# if `@options[:debug]` is an Integer, the call is aborted if the
|
610
|
+
# `:level` option is less than than `:level`.
|
611
|
+
#
|
612
|
+
# @overload debug(node, message, options)
|
613
|
+
# @param [Array<String>] args Relevant location associated with message
|
614
|
+
# @param [Hash] options
|
615
|
+
# @option options [Integer] :depth
|
616
|
+
# Recursion depth for indenting output
|
617
|
+
# @option options [Integer] :level
|
618
|
+
# Level assigned to message, by convention, level `0` is for
|
619
|
+
# errors, level `1` is for warnings, level `2` is for parser
|
620
|
+
# progress information, and anything higher is for various levels
|
621
|
+
# of debug information.
|
622
|
+
#
|
623
|
+
# @yieldparam [:trace] trace
|
624
|
+
# @yieldparam [Integer] level
|
625
|
+
# @yieldparam [Integer] lineno
|
626
|
+
# @yieldparam [Integer] depth Recursive depth of productions
|
627
|
+
# @yieldparam [Array<String>] args
|
628
|
+
# @yieldreturn [String] added to message
|
629
|
+
def debug(*args)
|
630
|
+
return unless @options[:debug]
|
631
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
632
|
+
debug_level = options.fetch(:level, 3)
|
633
|
+
return if @options[:debug].is_a?(Integer) && debug_level > @options[:debug]
|
634
|
+
|
635
|
+
depth = options[:depth] || @depth
|
636
|
+
args << yield if block_given?
|
637
|
+
|
638
|
+
message = "#{args.join(': ')}"
|
639
|
+
d_str = depth > 100 ? ' ' * 100 + '+' : ' ' * depth
|
640
|
+
str = "[#{lineno}](#{debug_level})#{d_str}#{message}"
|
641
|
+
case @options[:debug]
|
642
|
+
when Array
|
643
|
+
@options[:debug] << str
|
644
|
+
when TrueClass
|
645
|
+
$stderr.puts str
|
646
|
+
when Integer
|
647
|
+
case debug_level
|
648
|
+
when 0 then return if @options[:errors]
|
649
|
+
when 1 then return if @options[:warnings]
|
650
|
+
end
|
651
|
+
$stderr.puts(str) if debug_level <= @options[:debug]
|
652
|
+
end
|
653
|
+
end
|
654
|
+
|
655
|
+
# Used for internal error recovery
|
656
|
+
class Recovery < StandardError; end
|
657
|
+
|
658
|
+
class SyntaxError < RDF::ReaderError
|
659
|
+
##
|
660
|
+
# The current production.
|
661
|
+
#
|
662
|
+
# @return [Symbol]
|
663
|
+
attr_reader :production
|
664
|
+
|
665
|
+
##
|
666
|
+
# The invalid token which triggered the error.
|
667
|
+
#
|
668
|
+
# @return [String]
|
669
|
+
attr_reader :token
|
670
|
+
|
671
|
+
##
|
672
|
+
# The line number where the error occurred.
|
673
|
+
#
|
674
|
+
# @return [Integer]
|
675
|
+
attr_reader :lineno
|
676
|
+
|
677
|
+
##
|
678
|
+
# Initializes a new syntax error instance.
|
679
|
+
#
|
680
|
+
# @param [String, #to_s] message
|
681
|
+
# @param [Hash{Symbol => Object}] options
|
682
|
+
# @option options [Symbol] :production (nil)
|
683
|
+
# @option options [String] :token (nil)
|
684
|
+
# @option options [Integer] :lineno (nil)
|
685
|
+
def initialize(message, options = {})
|
686
|
+
@production = options[:production]
|
687
|
+
@token = options[:token]
|
688
|
+
@lineno = options[:lineno] || (@token.lineno if @token.respond_to?(:lineno))
|
689
|
+
super(message.to_s)
|
690
|
+
end
|
691
|
+
end
|
414
692
|
end # class Reader
|
415
693
|
end # module RDF::Turtle
|