rdf-turtle 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/AUTHORS +1 -0
- data/History +9 -0
- data/README.markdown +142 -0
- data/UNLICENSE +24 -0
- data/VERSION +1 -0
- data/lib/rdf/ll1/lexer.rb +458 -0
- data/lib/rdf/ll1/parser.rb +462 -0
- data/lib/rdf/ll1/scanner.rb +100 -0
- data/lib/rdf/turtle.rb +35 -0
- data/lib/rdf/turtle/format.rb +41 -0
- data/lib/rdf/turtle/meta.rb +1748 -0
- data/lib/rdf/turtle/patches.rb +38 -0
- data/lib/rdf/turtle/reader.rb +362 -0
- data/lib/rdf/turtle/terminals.rb +88 -0
- data/lib/rdf/turtle/writer.rb +562 -0
- metadata +115 -0
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'rdf'
|
2
|
+
|
3
|
+
module RDF
|
4
|
+
class List
|
5
|
+
##
|
6
|
+
# Validate the list ensuring that
|
7
|
+
# * rdf:rest values are all BNodes are nil
|
8
|
+
# * rdf:type, if it exists, is rdf:List
|
9
|
+
# * each subject has no properties other than single-valued rdf:first, rdf:rest
|
10
|
+
# other than for the first node in the list
|
11
|
+
# @return [Boolean]
|
12
|
+
def valid?
|
13
|
+
li = subject
|
14
|
+
while li != RDF.nil do
|
15
|
+
rest = nil
|
16
|
+
firsts = rests = 0
|
17
|
+
@graph.query(:subject => li) do |st|
|
18
|
+
case st.predicate
|
19
|
+
when RDF.type
|
20
|
+
# Be tollerant about rdf:type entries, as some OWL vocabularies use it excessively
|
21
|
+
when RDF.first
|
22
|
+
firsts += 1
|
23
|
+
when RDF.rest
|
24
|
+
rest = st.object
|
25
|
+
return false unless rest.node? || rest == RDF.nil
|
26
|
+
rests += 1
|
27
|
+
else
|
28
|
+
# First node may have other properties
|
29
|
+
return false unless li == subject
|
30
|
+
end
|
31
|
+
end
|
32
|
+
return false unless firsts == 1 && rests == 1
|
33
|
+
li = rest
|
34
|
+
end
|
35
|
+
true
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,362 @@
|
|
1
|
+
require 'rdf/turtle/meta'
|
2
|
+
require 'rdf/ll1/parser'
|
3
|
+
|
4
|
+
module RDF::Turtle
|
5
|
+
##
|
6
|
+
# A parser for the Turtle 2
|
7
|
+
class Reader < RDF::Reader
|
8
|
+
format Format
|
9
|
+
include RDF::Turtle::Meta
|
10
|
+
include RDF::LL1::Parser
|
11
|
+
include RDF::Turtle::Terminals
|
12
|
+
|
13
|
+
# Terminals passed to lexer. Order matters!
|
14
|
+
terminal(:ANON, ANON) do |reader, prod, token, input|
|
15
|
+
input[:resource] = reader.bnode
|
16
|
+
end
|
17
|
+
terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL) do |reader, prod, token, input|
|
18
|
+
input[:resource] = reader.bnode(token.value[2..-1])
|
19
|
+
end
|
20
|
+
terminal(:IRI_REF, IRI_REF, :unescape => true) do |reader, prod, token, input|
|
21
|
+
input[:resource] = reader.process_iri(token.value[1..-2])
|
22
|
+
end
|
23
|
+
terminal(:DOUBLE, DOUBLE) do |reader, prod, token, input|
|
24
|
+
input[:resource] = reader.literal(token.value, :datatype => RDF::XSD.double)
|
25
|
+
end
|
26
|
+
terminal(:DOUBLE_NEGATIVE, DOUBLE_NEGATIVE) do |reader, prod, token, input|
|
27
|
+
input[:resource] = reader.literal(token.value, :datatype => RDF::XSD.double)
|
28
|
+
end
|
29
|
+
terminal(:DOUBLE_POSITIVE, DOUBLE_POSITIVE) do |reader, prod, token, input|
|
30
|
+
input[:resource] = reader.literal(token.value, :datatype => RDF::XSD.double)
|
31
|
+
end
|
32
|
+
terminal(:DECIMAL, DECIMAL) do |reader, prod, token, input|
|
33
|
+
input[:resource] = reader.literal(token.value, :datatype => RDF::XSD.decimal)
|
34
|
+
end
|
35
|
+
terminal(:DECIMAL_NEGATIVE, DECIMAL_NEGATIVE) do |reader, prod, token, input|
|
36
|
+
input[:resource] = reader.literal(token.value, :datatype => RDF::XSD.decimal)
|
37
|
+
end
|
38
|
+
terminal(:DECIMAL_POSITIVE, DECIMAL_POSITIVE) do |reader, prod, token, input|
|
39
|
+
input[:resource] = reader.literal(token.value, :datatype => RDF::XSD.decimal)
|
40
|
+
end
|
41
|
+
terminal(:INTEGER, INTEGER) do |reader, prod, token, input|
|
42
|
+
input[:resource] = reader.literal(token.value, :datatype => RDF::XSD.integer)
|
43
|
+
end
|
44
|
+
terminal(:INTEGER_NEGATIVE, INTEGER_NEGATIVE) do |reader, prod, token, input|
|
45
|
+
input[:resource] = reader.literal(token.value, :datatype => RDF::XSD.integer)
|
46
|
+
end
|
47
|
+
terminal(:INTEGER_POSITIVE, INTEGER_POSITIVE) do |reader, prod, token, input|
|
48
|
+
input[:resource] = reader.literal(token.value, :datatype => RDF::XSD.integer)
|
49
|
+
end
|
50
|
+
# Spec confusion: spec says : "Literals , prefixed names and IRIs may also contain escape sequences"
|
51
|
+
terminal(:PNAME_LN, PNAME_LN) do |reader, prod, token, input|
|
52
|
+
prefix, suffix = token.value.split(":", 2)
|
53
|
+
input[:resource] = reader.pname(prefix, suffix)
|
54
|
+
end
|
55
|
+
# Spec confusion: spec says : "Literals , prefixed names and IRIs may also contain escape sequences"
|
56
|
+
terminal(:PNAME_NS, PNAME_NS) do |reader, prod, token, input|
|
57
|
+
prefix = token.value[0..-2]
|
58
|
+
|
59
|
+
# Two contexts, one when prefix is being defined, the other when being used
|
60
|
+
case prod
|
61
|
+
when :prefixID
|
62
|
+
input[:prefix] = prefix
|
63
|
+
else
|
64
|
+
input[:resource] = reader.pname(prefix, '')
|
65
|
+
end
|
66
|
+
end
|
67
|
+
terminal(:STRING_LITERAL_LONG1, STRING_LITERAL_LONG1, :unescape => true) do |reader, prod, token, input|
|
68
|
+
input[:string_value] = token.value[3..-4]
|
69
|
+
end
|
70
|
+
terminal(:STRING_LITERAL_LONG2, STRING_LITERAL_LONG2, :unescape => true) do |reader, prod, token, input|
|
71
|
+
input[:string_value] = token.value[3..-4]
|
72
|
+
end
|
73
|
+
terminal(:STRING_LITERAL1, STRING_LITERAL1, :unescape => true) do |reader, prod, token, input|
|
74
|
+
input[:string_value] = token.value[1..-2]
|
75
|
+
end
|
76
|
+
terminal(:STRING_LITERAL2, STRING_LITERAL2, :unescape => true) do |reader, prod, token, input|
|
77
|
+
input[:string_value] = token.value[1..-2]
|
78
|
+
end
|
79
|
+
|
80
|
+
# String terminals
|
81
|
+
terminal(nil, %r([\(\),.;\[\]a]|\^\^|@base|@prefix|true|false)) do |reader, prod, token, input|
|
82
|
+
case token.value
|
83
|
+
when 'a' then input[:resource] = RDF.type
|
84
|
+
when 'true', 'false' then input[:resource] = RDF::Literal::Boolean.new(token.value)
|
85
|
+
else input[:string] = token.value
|
86
|
+
end
|
87
|
+
end
|
88
|
+
terminal(:LANGTAG, LANGTAG) do |reader, prod, token, input|
|
89
|
+
input[:lang] = token.value[1..-1]
|
90
|
+
end
|
91
|
+
|
92
|
+
# Productions
|
93
|
+
|
94
|
+
# [4] prefixID defines a prefix mapping
|
95
|
+
production(:prefixID) do |reader, phase, input, current, callback|
|
96
|
+
next unless phase == :finish
|
97
|
+
prefix = current[:prefix]
|
98
|
+
iri = current[:resource]
|
99
|
+
callback.call(:trace, "prefixID", "Defined prefix #{prefix.inspect} mapping to #{iri.inspect}")
|
100
|
+
reader.prefix(prefix, iri)
|
101
|
+
end
|
102
|
+
|
103
|
+
# [5] base set base_uri
|
104
|
+
production(:base) do |reader, phase, input, current, callback|
|
105
|
+
next unless phase == :finish
|
106
|
+
iri = current[:resource]
|
107
|
+
callback.call(:trace, "base", "Defined base as #{iri}")
|
108
|
+
reader.options[:base_uri] = iri
|
109
|
+
end
|
110
|
+
|
111
|
+
# [9] verb ::= predicate | "a"
|
112
|
+
production(:verb) do |reader, phase, input, current, callback|
|
113
|
+
next unless phase == :finish
|
114
|
+
input[:predicate] = current[:resource] if phase == :finish
|
115
|
+
end
|
116
|
+
|
117
|
+
# [10] subject ::= IRIref | blank
|
118
|
+
production(:subject) do |reader, phase, input, current, callback|
|
119
|
+
next unless phase == :finish
|
120
|
+
input[:subject] = current[:resource] if phase == :finish
|
121
|
+
end
|
122
|
+
|
123
|
+
# [12] object ::= IRIref | blank | literal
|
124
|
+
production(:object) do |reader, phase, input, current, callback|
|
125
|
+
next unless phase == :finish
|
126
|
+
if input[:object_list]
|
127
|
+
# Part of an rdf:List collection
|
128
|
+
input[:object_list] << current[:resource]
|
129
|
+
else
|
130
|
+
callback.call(:trace, "object", "current: #{current.inspect}")
|
131
|
+
callback.call(:statement, "object", input[:subject], input[:predicate], current[:resource])
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# [15] blankNodePropertyList ::= "[" predicateObjectList "]"
|
136
|
+
production(:blankNodePropertyList) do |reader, phase, input, current, callback|
|
137
|
+
if phase == :start
|
138
|
+
current[:subject] = reader.bnode
|
139
|
+
else
|
140
|
+
input[:resource] = current[:subject]
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
# [16] collection ::= "(" object* ")"
|
145
|
+
production(:collection) do |reader, phase, input, current, callback|
|
146
|
+
if phase == :start
|
147
|
+
current[:object_list] = [] # Tells the object production to collect and not generate statements
|
148
|
+
else
|
149
|
+
# Create an RDF list
|
150
|
+
bnode = reader.bnode
|
151
|
+
objects = current[:object_list]
|
152
|
+
list = RDF::List.new(bnode, nil, objects)
|
153
|
+
list.each_statement do |statement|
|
154
|
+
# Spec Confusion, referenced section "Collection" is missing from the spec.
|
155
|
+
# Anicdodal evidence indicates that some expect each node to be of type rdf:list,
|
156
|
+
# but existing Notation3 and Turtle tests (http://www.w3.org/2001/sw/DataAccess/df1/tests/manifest.ttl) do not.
|
157
|
+
next if statement.predicate == RDF.type && statement.object == RDF.List
|
158
|
+
callback.call(:statement, "collection", statement.subject, statement.predicate, statement.object)
|
159
|
+
end
|
160
|
+
bnode = RDF.nil if list.empty?
|
161
|
+
|
162
|
+
# Return bnode as resource
|
163
|
+
input[:resource] = bnode
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
# [60s] RDFLiteral ::= String ( LANGTAG | ( "^^" IRIref ) )?
|
168
|
+
production(:RDFLiteral) do |reader, phase, input, current, callback|
|
169
|
+
next unless phase == :finish
|
170
|
+
opts = {}
|
171
|
+
opts[:datatype] = current[:resource] if current[:resource]
|
172
|
+
opts[:language] = current[:lang] if current[:lang]
|
173
|
+
input[:resource] = reader.literal(current[:string_value], opts)
|
174
|
+
end
|
175
|
+
|
176
|
+
##
|
177
|
+
# Missing in 0.3.2
|
178
|
+
def base_uri
|
179
|
+
@options[:base_uri]
|
180
|
+
end
|
181
|
+
|
182
|
+
##
|
183
|
+
# Initializes a new parser instance.
|
184
|
+
#
|
185
|
+
# @param [String, #to_s] input
|
186
|
+
# @param [Hash{Symbol => Object}] options
|
187
|
+
# @option options [Hash] :prefixes (Hash.new)
|
188
|
+
# the prefix mappings to use (for acessing intermediate parser productions)
|
189
|
+
# @option options [#to_s] :base_uri (nil)
|
190
|
+
# the base URI to use when resolving relative URIs (for acessing intermediate parser productions)
|
191
|
+
# @option options [#to_s] :anon_base ("b0")
|
192
|
+
# Basis for generating anonymous Nodes
|
193
|
+
# @option options [Boolean] :resolve_uris (false)
|
194
|
+
# Resolve prefix and relative IRIs, otherwise, when serializing the parsed SSE
|
195
|
+
# as S-Expressions, use the original prefixed and relative URIs along with `base` and `prefix`
|
196
|
+
# definitions.
|
197
|
+
# @option options [Boolean] :validate (false)
|
198
|
+
# whether to validate the parsed statements and values. If not validating,
|
199
|
+
# the parser will attempt to recover from errors.
|
200
|
+
# @option options [Boolean] :progress
|
201
|
+
# Show progress of parser productions
|
202
|
+
# @option options [Boolean] :debug
|
203
|
+
# Detailed debug output
|
204
|
+
# @return [RDF::Turtle::Reader]
|
205
|
+
def initialize(input = nil, options = {}, &block)
|
206
|
+
super do
|
207
|
+
@options = {:anon_base => "b0", :validate => false}.merge(options)
|
208
|
+
|
209
|
+
debug("def prefix", "#{base_uri.inspect}")
|
210
|
+
|
211
|
+
debug("validate", "#{validate?.inspect}")
|
212
|
+
debug("canonicalize", "#{canonicalize?.inspect}")
|
213
|
+
debug("intern", "#{intern?.inspect}")
|
214
|
+
|
215
|
+
if block_given?
|
216
|
+
case block.arity
|
217
|
+
when 0 then instance_eval(&block)
|
218
|
+
else block.call(self)
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
def inspect
|
225
|
+
sprintf("#<%s:%#0x(%s)>", self.class.name, __id__, base_uri.to_s)
|
226
|
+
end
|
227
|
+
|
228
|
+
##
|
229
|
+
# Iterates the given block for each RDF statement in the input.
|
230
|
+
#
|
231
|
+
# @yield [statement]
|
232
|
+
# @yieldparam [RDF::Statement] statement
|
233
|
+
# @return [void]
|
234
|
+
def each_statement(&block)
|
235
|
+
@callback = block
|
236
|
+
|
237
|
+
parse(@input, START.to_sym, @options.merge(:branch => BRANCH, :follow => FOLLOW)) do |context, *data|
|
238
|
+
case context
|
239
|
+
when :statement
|
240
|
+
add_triple(*data)
|
241
|
+
when :trace
|
242
|
+
debug(*data)
|
243
|
+
end
|
244
|
+
end
|
245
|
+
rescue RDF::LL1::Parser::Error => e
|
246
|
+
error("each_statement", e.message, :backtrace => e.backtrace)
|
247
|
+
end
|
248
|
+
|
249
|
+
##
|
250
|
+
# Iterates the given block for each RDF triple in the input.
|
251
|
+
#
|
252
|
+
# @yield [subject, predicate, object]
|
253
|
+
# @yieldparam [RDF::Resource] subject
|
254
|
+
# @yieldparam [RDF::URI] predicate
|
255
|
+
# @yieldparam [RDF::Value] object
|
256
|
+
# @return [void]
|
257
|
+
def each_triple(&block)
|
258
|
+
each_statement do |statement|
|
259
|
+
block.call(*statement.to_triple)
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
# add a statement, object can be literal or URI or bnode
|
264
|
+
#
|
265
|
+
# @param [Nokogiri::XML::Node, any] node:: XML Node or string for showing context
|
266
|
+
# @param [URI, Node] subject:: the subject of the statement
|
267
|
+
# @param [URI] predicate:: the predicate of the statement
|
268
|
+
# @param [URI, Node, Literal] object:: the object of the statement
|
269
|
+
# @return [Statement]:: Added statement
|
270
|
+
# @raise [RDF::ReaderError]:: Checks parameter types and raises if they are incorrect if parsing mode is _validate_.
|
271
|
+
def add_triple(node, subject, predicate, object)
|
272
|
+
statement = RDF::Statement.new(subject, predicate, object)
|
273
|
+
if statement.valid?
|
274
|
+
debug(node, "generate statement: #{statement}")
|
275
|
+
@callback.call(statement)
|
276
|
+
else
|
277
|
+
error(node, "Statement is invalid: #{statement.inspect}")
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
def process_iri(iri)
|
282
|
+
iri(base_uri, iri)
|
283
|
+
end
|
284
|
+
|
285
|
+
# Create IRIs
|
286
|
+
def iri(value, append = nil)
|
287
|
+
value = RDF::URI.new(value)
|
288
|
+
value = value.join(append) if append
|
289
|
+
value.validate! if validate? && value.respond_to?(:validate)
|
290
|
+
value.canonicalize! if canonicalize?
|
291
|
+
value = RDF::URI.intern(value) if intern?
|
292
|
+
value
|
293
|
+
end
|
294
|
+
|
295
|
+
# Create a literal
|
296
|
+
def literal(value, options = {})
|
297
|
+
options = options.dup
|
298
|
+
# Internal representation is to not use xsd:string, although it could arguably go the other way.
|
299
|
+
options.delete(:datatype) if options[:datatype] == RDF::XSD.string
|
300
|
+
debug("literal", "value: #{value.inspect}, options: #{options.inspect}, validate: #{validate?.inspect}, c14n?: #{canonicalize?.inspect}")
|
301
|
+
RDF::Literal.new(value, options.merge(:validate => validate?, :canonicalize => canonicalize?))
|
302
|
+
end
|
303
|
+
|
304
|
+
##
|
305
|
+
# Override #prefix to take a relative IRI
|
306
|
+
#
|
307
|
+
# prefix directives map a local name to an IRI, also resolved against the current In-Scope Base URI.
|
308
|
+
# Spec confusion, presume that an undefined empty prefix has an empty relative IRI, which uses
|
309
|
+
# string contatnation rules against the in-scope IRI at the time of use
|
310
|
+
def prefix(prefix, iri = nil)
|
311
|
+
debug("prefix", "'#{prefix}' <#{iri}>")
|
312
|
+
# Relative IRIs are resolved against @base
|
313
|
+
iri = process_iri(iri) if iri
|
314
|
+
super(prefix, iri)
|
315
|
+
end
|
316
|
+
|
317
|
+
##
|
318
|
+
# Expand a PNAME using string concatenation
|
319
|
+
def pname(prefix, suffix)
|
320
|
+
# Prefixes must be defined, except special case for empty prefix being alias for current @base
|
321
|
+
if prefix(prefix)
|
322
|
+
base = prefix(prefix).to_s
|
323
|
+
elsif prefix.to_s.empty?
|
324
|
+
base = base_uri.to_s
|
325
|
+
else
|
326
|
+
error("pname", "undefined prefix #{prefix.inspect}") unless prefix(prefix) || prefix.to_s.empty?
|
327
|
+
end
|
328
|
+
suffix = suffix.to_s.sub(/^\#/, "") if base.index("#")
|
329
|
+
debug("pname", "base: '#{base}', suffix: '#{suffix}'")
|
330
|
+
iri(base + suffix.to_s)
|
331
|
+
end
|
332
|
+
|
333
|
+
# Keep track of allocated BNodes
|
334
|
+
def bnode(value = nil)
|
335
|
+
return RDF::Node.new unless value
|
336
|
+
@bnode_cache ||= {}
|
337
|
+
@bnode_cache[value.to_s] ||= RDF::Node.new(value)
|
338
|
+
end
|
339
|
+
|
340
|
+
# @param [String] str Error string
|
341
|
+
# @param [Hash] options
|
342
|
+
# @option options [URI, #to_s] :production
|
343
|
+
# @option options [Token] :token
|
344
|
+
def error(node, message, options = {})
|
345
|
+
if !@options[:validate] && !options[:fatal]
|
346
|
+
debug(node, message, options)
|
347
|
+
else
|
348
|
+
raise RDF::ReaderError, message, options[:backtrace]
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
##
|
353
|
+
# Progress output when debugging
|
354
|
+
# @param [String] str
|
355
|
+
def debug(node, message, options = {})
|
356
|
+
depth = options[:depth] || self.depth
|
357
|
+
str = "[#{@lineno}]#{' ' * depth}#{node}: #{message}"
|
358
|
+
@options[:debug] << str if @options[:debug].is_a?(Array)
|
359
|
+
$stderr.puts(str) if RDF::Turtle.debug?
|
360
|
+
end
|
361
|
+
end # class Reader
|
362
|
+
end # module RDF::Turtle
|
@@ -0,0 +1,88 @@
|
|
1
|
+
require 'rdf/ll1/lexer'
|
2
|
+
|
3
|
+
module RDF::Turtle
|
4
|
+
module Terminals
|
5
|
+
# Definitions of token regular expressions used for lexical analysis
|
6
|
+
|
7
|
+
if RUBY_VERSION >= '1.9'
|
8
|
+
##
|
9
|
+
# Unicode regular expressions for Ruby 1.9+ with the Oniguruma engine.
|
10
|
+
U_CHARS1 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
|
11
|
+
[\\u00C0-\\u00D6]|[\\u00D8-\\u00F6]|[\\u00F8-\\u02FF]|
|
12
|
+
[\\u0370-\\u037D]|[\\u037F-\\u1FFF]|[\\u200C-\\u200D]|
|
13
|
+
[\\u2070-\\u218F]|[\\u2C00-\\u2FEF]|[\\u3001-\\uD7FF]|
|
14
|
+
[\\uF900-\\uFDCF]|[\\uFDF0-\\uFFFD]|[\\u{10000}-\\u{EFFFF}]
|
15
|
+
EOS
|
16
|
+
U_CHARS2 = Regexp.compile("\\u00B7|[\\u0300-\\u036F]|[\\u203F-\\u2040]")
|
17
|
+
else
|
18
|
+
##
|
19
|
+
# UTF-8 regular expressions for Ruby 1.8.x.
|
20
|
+
U_CHARS1 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
|
21
|
+
\\xC3[\\x80-\\x96]| (?# [\\u00C0-\\u00D6]|)
|
22
|
+
\\xC3[\\x98-\\xB6]| (?# [\\u00D8-\\u00F6]|)
|
23
|
+
\\xC3[\\xB8-\\xBF]|[\\xC4-\\xCB][\\x80-\\xBF]| (?# [\\u00F8-\\u02FF]|)
|
24
|
+
\\xCD[\\xB0-\\xBD]| (?# [\\u0370-\\u037D]|)
|
25
|
+
\\xCD\\xBF|[\\xCE-\\xDF][\\x80-\\xBF]| (?# [\\u037F-\\u1FFF]|)
|
26
|
+
\\xE0[\\xA0-\\xBF][\\x80-\\xBF]| (?# ...)
|
27
|
+
\\xE1[\\x80-\\xBF][\\x80-\\xBF]| (?# ...)
|
28
|
+
\\xE2\\x80[\\x8C-\\x8D]| (?# [\\u200C-\\u200D]|)
|
29
|
+
\\xE2\\x81[\\xB0-\\xBF]| (?# [\\u2070-\\u218F]|)
|
30
|
+
\\xE2[\\x82-\\x85][\\x80-\\xBF]| (?# ...)
|
31
|
+
\\xE2\\x86[\\x80-\\x8F]| (?# ...)
|
32
|
+
\\xE2[\\xB0-\\xBE][\\x80-\\xBF]| (?# [\\u2C00-\\u2FEF]|)
|
33
|
+
\\xE2\\xBF[\\x80-\\xAF]| (?# ...)
|
34
|
+
\\xE3\\x80[\\x81-\\xBF]| (?# [\\u3001-\\uD7FF]|)
|
35
|
+
\\xE3[\\x81-\\xBF][\\x80-\\xBF]| (?# ...)
|
36
|
+
[\\xE4-\\xEC][\\x80-\\xBF][\\x80-\\xBF]| (?# ...)
|
37
|
+
\\xED[\\x80-\\x9F][\\x80-\\xBF]| (?# ...)
|
38
|
+
\\xEF[\\xA4-\\xB6][\\x80-\\xBF]| (?# [\\uF900-\\uFDCF]|)
|
39
|
+
\\xEF\\xB7[\\x80-\\x8F]| (?# ...)
|
40
|
+
\\xEF\\xB7[\\xB0-\\xBF]| (?# [\\uFDF0-\\uFFFD]|)
|
41
|
+
\\xEF[\\xB8-\\xBE][\\x80-\\xBF]| (?# ...)
|
42
|
+
\\xEF\\xBF[\\x80-\\xBD]| (?# ...)
|
43
|
+
\\xF0[\\x90-\\xBF][\\x80-\\xBF][\\x80-\\xBF]| (?# [\\u{10000}-\\u{EFFFF}])
|
44
|
+
[\\xF1-\\xF2][\\x80-\\xBF][\\x80-\\xBF][\\x80-\\xBF]|
|
45
|
+
\\xF3[\\x80-\\xAF][\\x80-\\xBF][\\x80-\\xBF] (?# ...)
|
46
|
+
EOS
|
47
|
+
U_CHARS2 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
|
48
|
+
\\xC2\\xB7| (?# \\u00B7|)
|
49
|
+
\\xCC[\\x80-\\xBF]|\\xCD[\\x80-\\xAF]| (?# [\\u0300-\\u036F]|)
|
50
|
+
\\xE2\\x80\\xBF|\\xE2\\x81\\x80 (?# [\\u203F-\\u2040])
|
51
|
+
EOS
|
52
|
+
end
|
53
|
+
UCHAR = RDF::LL1::Lexer::UCHAR
|
54
|
+
|
55
|
+
WS = / |\t|\r|\n / # [93s]
|
56
|
+
PN_CHARS_BASE = /[A-Z]|[a-z]|#{U_CHARS1}|#{UCHAR}/ # [95s]
|
57
|
+
PN_CHARS_U = /_|#{PN_CHARS_BASE}/ # [96s]
|
58
|
+
PN_CHARS = /-|[0-9]|#{PN_CHARS_U}|#{U_CHARS2}/ # [98s]
|
59
|
+
PN_CHARS_BODY = /(?:(?:\.|#{PN_CHARS})*#{PN_CHARS})?/
|
60
|
+
PN_LOCAL = /(?:[0-9]|#{PN_CHARS_U})#{PN_CHARS_BODY}/ # [100s]
|
61
|
+
|
62
|
+
EXPONENT = /[eE][+-]?[0-9]+/ # [86s]
|
63
|
+
|
64
|
+
ANON = /\[#{WS}*\]/ # [94s]
|
65
|
+
BLANK_NODE_LABEL = /_:#{PN_LOCAL}/ # [73s]
|
66
|
+
DECIMAL = /(?:[0-9]+\.[0-9]*|\.[0-9]+)/ # [78s]
|
67
|
+
DECIMAL_NEGATIVE = /\-(?:[0-9]+\.[0-9]*|\.[0-9]+)/ # [83s]
|
68
|
+
DECIMAL_POSITIVE = /\+(?:[0-9]+\.[0-9]*|\.[0-9]+)/ # [81s]
|
69
|
+
DOUBLE = /(?:[0-9]+\.[0-9]*|\.[0-9]+|[0-9]+)#{EXPONENT}/ # [79s]
|
70
|
+
DOUBLE_NEGATIVE = /\-(?:[0-9]+\.[0-9]*|\.[0-9]+|[0-9]+)#{EXPONENT}/ # [79s]
|
71
|
+
DOUBLE_POSITIVE = /\+(?:[0-9]+\.[0-9]*|\.[0-9]+|[0-9]+)#{EXPONENT}/ # [79s]
|
72
|
+
ECHAR = /\\[tbnrf\\"']/ # [91s]
|
73
|
+
INTEGER = /[0-9]+/ # [77s]
|
74
|
+
INTEGER_NEGATIVE = /\-[0-9]+/ # [83s]
|
75
|
+
INTEGER_POSITIVE = /\+[0-9]+/ # [80s]
|
76
|
+
# Spec confusion: the EBNF definition of IRI_REF seems malformed, and has no
|
77
|
+
# provision for \^, as discussed elsewhere in the spec.
|
78
|
+
IRI_REF = /<(?:[^<>"{}|^`\\\x00-\x20]|#{U_CHARS1})*>/ # [70s]
|
79
|
+
LANGTAG = /@[a-zA-Z]+(?:-[a-zA-Z0-9]+)*/ # [76s]
|
80
|
+
PN_PREFIX = /#{PN_CHARS_BASE}#{PN_CHARS_BODY}/ # [99s]
|
81
|
+
PNAME_NS = /#{PN_PREFIX}?:/ # [71s]
|
82
|
+
PNAME_LN = /#{PNAME_NS}#{PN_LOCAL}/ # [72s]
|
83
|
+
STRING_LITERAL1 = /'(?:[^\'\\\n\r]|#{ECHAR}|#{UCHAR})*'/ # [87s]
|
84
|
+
STRING_LITERAL2 = /"(?:[^\"\\\n\r]|#{ECHAR}|#{UCHAR})*"/ # [88s]
|
85
|
+
STRING_LITERAL_LONG1 = /'''(?:(?:'|'')?(?:[^'\\]|#{ECHAR}|#{UCHAR}))*'''/m # [89s]
|
86
|
+
STRING_LITERAL_LONG2 = /"""(?:(?:"|"")?(?:[^"\\]|#{ECHAR}|#{UCHAR}))*"""/m # [90s]
|
87
|
+
end
|
88
|
+
end
|