rdf-trig 1.1.4 → 1.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e14c37b1f090059ce8dd6b279a15e968c2c6f99f
4
- data.tar.gz: cd3c7fe9e89ae3a13ce38a83c31c7553b59bcc78
3
+ metadata.gz: d00d578b68a875bc5125f9aa774126ccc243033b
4
+ data.tar.gz: 1009f36d61ec685e87c38bfb5bef15a0af419bd3
5
5
  SHA512:
6
- metadata.gz: 4777a92570a042a8ad45f3f01938744b0a76531fa5355ec7ebb62506a27c804050c2c00d18740e55c7cd50b4a6fdf086b5e8b45b506909319efd61f8c71e5072
7
- data.tar.gz: 6ab8ff3b7a65feda9a6636978e4dcf0b2214e0f2bb94757182196f53d577652fe6331c82a0f59a64979c18e9568f398fb21c0a7f09e3b8bd08197b08e4372763
6
+ metadata.gz: 35af56aa261c892dd3b46a6662f9f5bb21f7e58e4dba381a60b1059d81ad46c8d176fba227438b74fa8a93d18438dc67651e839e68e0f5eddd59186957da14df
7
+ data.tar.gz: ac9cd12228fbb1051186d82255c18b9c53052accc5029c2078c3611bf54879e4179964956a6414239eb84724eb523376f927427746951a008cbf2a8a88286341
data/README.md CHANGED
@@ -4,6 +4,8 @@
4
4
 
5
5
  [![Gem Version](https://badge.fury.io/rb/rdf-trig.png)](http://badge.fury.io/rb/rdf-trig)
6
6
  [![Build Status](https://travis-ci.org/ruby-rdf/rdf-trig.png?branch=master)](http://travis-ci.org/ruby-rdf/rdf-trig)
7
+ [![Coverage Status](https://coveralls.io/repos/ruby-rdf/rdf-trig/badge.svg)](https://coveralls.io/r/ruby-rdf/rdf-trig)
8
+ [![Dependency Status](https://gemnasium.com/ruby-rdf/rdf-trig.png)](https://gemnasium.com/ruby-rdf/rdf-trig)
7
9
 
8
10
  ## Description
9
11
  This is a [Ruby][] implementation of a [TriG][] reader and writer for [RDF.rb][].
@@ -46,8 +48,9 @@ Full documentation available on [Rubydoc.info][TriG doc].
46
48
 
47
49
 
48
50
  ## Implementation Notes
49
- The reader uses the [Turtle][Turtle doc] parser, which is based on the LL1::Parser with minor updates for the TriG grammar.
50
- The writer also is based on the Turtle writer.
51
+ This version uses a hand-written parser using the Lexer from the [EBNF][] gem instead of a general [EBNF][] LL(1) parser for faster performance.
52
+
53
+ The reader uses the [Turtle][Turtle doc] parser. The writer also is based on the Turtle writer.
51
54
 
52
55
  The syntax is compatible with placing default triples within `{}`, but the writer does not use this for writing triples in the default graph.
53
56
 
@@ -55,9 +58,9 @@ There is a new `:stream` option to {RDF::TriG::Writer} which is more efficient f
55
58
 
56
59
  ## Dependencies
57
60
 
58
- * [Ruby](http://ruby-lang.org/) (>= 1.8.7) or (>= 1.8.1 with [Backports][])
59
- * [RDF.rb](http://rubygems.org/gems/rdf) (>= 1.0)
60
- * [rdf-turtle](http://rubygems.org/gems/rdf-turtle) (>= 1.0)
61
+ * [Ruby](http://ruby-lang.org/) (>= 1.9.3)
62
+ * [RDF.rb](http://rubygems.org/gems/rdf) (~> 1.1)
63
+ * [rdf-turtle](http://rubygems.org/gems/rdf-turtle) (~> 1.1, >= 1.1.8)
61
64
 
62
65
  ## Installation
63
66
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.1.4
1
+ 1.1.5
@@ -1,5 +1,5 @@
1
1
  require 'rdf'
2
- require 'ebnf'
2
+ require 'rdf/turtle'
3
3
 
4
4
  module RDF
5
5
  ##
@@ -1,5 +1,4 @@
1
1
  require 'rdf/turtle'
2
- require 'rdf/trig/meta'
3
2
 
4
3
  module RDF::TriG
5
4
  ##
@@ -8,224 +7,28 @@ module RDF::TriG
8
7
  # Leverages the Turtle reader
9
8
  class Reader < RDF::Turtle::Reader
10
9
  format Format
11
- include RDF::TriG::Meta
12
10
 
13
11
  # Terminals passed to lexer. Order matters!
14
- terminal(:ANON, ANON) do |prod, token, input|
15
- input[:resource] = self.bnode
16
- end
17
- terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL) do |prod, token, input|
18
- input[:resource] = self.bnode(token.value[2..-1])
19
- end
20
- terminal(:IRIREF, IRIREF, :unescape => true) do |prod, token, input|
21
- input[:resource] = process_iri(token.value[1..-2])
22
- end
23
- terminal(:DOUBLE, DOUBLE) do |prod, token, input|
24
- # Note that a Turtle Double may begin with a '.[eE]', so tack on a leading
25
- # zero if necessary
26
- value = token.value.sub(/\.([eE])/, '.0\1')
27
- input[:resource] = literal(value, :datatype => RDF::XSD.double)
28
- end
29
- terminal(:DECIMAL, DECIMAL) do |prod, token, input|
30
- # Note that a Turtle Decimal may begin with a '.', so tack on a leading
31
- # zero if necessary
32
- value = token.value
33
- value = "0#{token.value}" if token.value[0,1] == "."
34
- input[:resource] = literal(value, :datatype => RDF::XSD.decimal)
35
- end
36
- terminal(:INTEGER, INTEGER) do |prod, token, input|
37
- input[:resource] = literal(token.value, :datatype => RDF::XSD.integer)
38
- end
39
- # Spec confusion: spec says : "Literals , prefixed names and IRIs may also contain escape sequences"
40
- terminal(:PNAME_LN, PNAME_LN, :unescape => true) do |prod, token, input|
41
- prefix, suffix = token.value.split(":", 2)
42
- input[:resource] = pname(prefix, suffix)
43
- end
44
- # Spec confusion: spec says : "Literals , prefixed names and IRIs may also contain escape sequences"
45
- terminal(:PNAME_NS, PNAME_NS) do |prod, token, input|
46
- prefix = token.value[0..-2]
47
-
48
- # Two contexts, one when prefix is being defined, the other when being used
49
- case prod
50
- when :prefixID, :sparqlPrefix
51
- input[:prefix] = prefix
52
- else
53
- input[:resource] = pname(prefix, '')
54
- end
55
- end
56
- terminal(:STRING_LITERAL_LONG_SINGLE_QUOTE, STRING_LITERAL_LONG_SINGLE_QUOTE, :unescape => true) do |prod, token, input|
57
- input[:string_value] = token.value[3..-4]
58
- end
59
- terminal(:STRING_LITERAL_LONG_QUOTE, STRING_LITERAL_LONG_QUOTE, :unescape => true) do |prod, token, input|
60
- input[:string_value] = token.value[3..-4]
61
- end
62
- terminal(:STRING_LITERAL_QUOTE, STRING_LITERAL_QUOTE, :unescape => true) do |prod, token, input|
63
- input[:string_value] = token.value[1..-2]
64
- end
65
- terminal(:STRING_LITERAL_SINGLE_QUOTE, STRING_LITERAL_SINGLE_QUOTE, :unescape => true) do |prod, token, input|
66
- input[:string_value] = token.value[1..-2]
67
- end
68
-
12
+ terminal(:ANON, ANON)
13
+ terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL)
14
+ terminal(:IRIREF, IRIREF, unescape: true)
15
+ terminal(:DOUBLE, DOUBLE)
16
+ terminal(:DECIMAL, DECIMAL)
17
+ terminal(:INTEGER, INTEGER)
18
+ terminal(:PNAME_LN, PNAME_LN, unescape: true)
19
+ terminal(:PNAME_NS, PNAME_NS)
20
+ terminal(:STRING_LITERAL_LONG_SINGLE_QUOTE, STRING_LITERAL_LONG_SINGLE_QUOTE, unescape: true)
21
+ terminal(:STRING_LITERAL_LONG_QUOTE, STRING_LITERAL_LONG_QUOTE, unescape: true)
22
+ terminal(:STRING_LITERAL_QUOTE, STRING_LITERAL_QUOTE, unescape: true)
23
+ terminal(:STRING_LITERAL_SINGLE_QUOTE, STRING_LITERAL_SINGLE_QUOTE, unescape: true)
24
+
69
25
  # String terminals
70
- terminal(nil, %r([\{\}\(\),.;\[\]a]|\^\^|true|false)) do |prod, token, input|
71
- case token.value
72
- when 'A', 'a' then input[:resource] = RDF.type
73
- when 'true', 'false' then input[:resource] = RDF::Literal::Boolean.new(token.value)
74
- when '.' then input[:terminated] = true
75
- else input[:string] = token.value
76
- end
77
- end
78
-
79
- terminal(:GRAPH, /graph/i) do |prod, token, input|
80
- input[:string_value] = token.value
81
- end
82
- terminal(:PREFIX, PREFIX) do |prod, token, input|
83
- input[:string_value] = token.value
84
- end
85
- terminal(:BASE, BASE) do |prod, token, input|
86
- input[:string_value] = token.value
87
- end
88
-
89
- terminal(:LANGTAG, LANGTAG) do |prod, token, input|
90
- input[:lang] = token.value[1..-1]
91
- end
92
-
93
- # Productions
94
- # [2g] block defines the basic creation of context
95
- start_production(:block) do |input, current, callback|
96
- callback.call(:context, "graph", nil)
97
- end
98
- production(:block) do |input, current, callback|
99
- callback.call(:context, "graph", nil)
100
- end
101
-
102
- # [7g] labelOrSubject
103
- # Sets the context for triples defined within that graph
104
- production(:labelOrSubject) do |input, current, callback|
105
- # If input contains set_graph_iri, use the returned value to set @context
106
- debug(":labelOrSubject") {"Set graph context to #{current[:resource]}"}
107
- callback.call(:context, "labelOrSubject", current[:resource])
108
- input[:resource] = current[:resource]
109
- end
110
-
111
- # _triplesOrGraph_2 ::= predicateObjectList '.'
112
- start_production(:_triplesOrGraph_2) do |input, current, callback|
113
- # Default graph after all
114
- callback.call(:context, "graph", nil)
115
- debug("_triplesOrGraph_2") {"subject: #{current[:resource]}"}
116
- current[:subject] = input[:resource]
117
- end
118
-
119
- # Productions
120
- # [4] prefixID defines a prefix mapping
121
- production(:prefixID) do |input, current, callback|
122
- prefix = current[:prefix]
123
- iri = current[:resource]
124
- lexical = current[:string_value]
125
- terminated = current[:terminated]
126
- debug("prefixID") {"Defined prefix #{prefix.inspect} mapping to #{iri.inspect}"}
127
- if lexical.start_with?('@') && lexical != '@prefix'
128
- error(:prefixID, "should be downcased")
129
- elsif lexical == '@prefix'
130
- error(:prefixID, "directive not terminated") unless terminated
131
- else
132
- error(:prefixID, "directive should not be terminated") if terminated
133
- end
134
- prefix(prefix, iri)
135
- end
26
+ terminal(nil, %r([\{\}\(\),.;\[\]a]|\^\^|true|false))
136
27
 
137
- # [5] base set base_uri
138
- production(:base) do |input, current, callback|
139
- iri = current[:resource]
140
- lexical = current[:string_value]
141
- terminated = current[:terminated]
142
- debug("base") {"Defined base as #{iri}"}
143
- if lexical.start_with?('@') && lexical != '@base'
144
- error(:base, "should be downcased")
145
- elsif lexical == '@base'
146
- error(:base, "directive not terminated") unless terminated
147
- else
148
- error(:base, "directive should not be terminated") if terminated
149
- end
150
- options[:base_uri] = iri
151
- end
152
-
153
- # [52s] triplesBlock
154
- start_production(:triplesBlock) do |input, current, callback|
155
- # Note production as triples for blankNodePropertyList
156
- # to set :subject instead of :resource
157
- current[:triples] = true
158
- end
159
- production(:triplesBlock) do |input, current, callback|
160
- # Note production as triples for blankNodePropertyList
161
- # to set :subject instead of :resource
162
- current[:triples] = true
163
- end
164
-
165
- # [9] verb ::= predicate | "a"
166
- production(:verb) do |input, current, callback|
167
- input[:predicate] = current[:resource]
168
- end
169
-
170
- # [10] subject ::= iri | blank
171
- start_production(:subject) do |input, current, callback|
172
- current[:triples] = nil
173
- end
174
-
175
- production(:subject) do |input, current, callback|
176
- input[:subject] = current[:resource]
177
- end
178
-
179
- # [12] object ::= iri | BlankNode | collection | blankNodePropertyList | literal
180
- production(:object) do |input, current, callback|
181
- if input[:object_list]
182
- # Part of an rdf:List collection
183
- input[:object_list] << current[:resource]
184
- else
185
- debug("object") {"current: #{current.inspect}"}
186
- callback.call(:statement, "object", input[:subject], input[:predicate], current[:resource])
187
- end
188
- end
189
-
190
- # [14] blankNodePropertyList ::= "[" predicateObjectList "]"
191
- start_production(:blankNodePropertyList) do |input, current, callback|
192
- current[:subject] = self.bnode
193
- end
194
-
195
- production(:blankNodePropertyList) do |input, current, callback|
196
- if input[:triples]
197
- input[:subject] = current[:subject]
198
- else
199
- input[:resource] = current[:subject]
200
- end
201
- end
202
-
203
- # [15] collection ::= "(" object* ")"
204
- start_production(:collection) do |input, current, callback|
205
- # Tells the object production to collect and not generate statements
206
- current[:object_list] = []
207
- end
208
-
209
- production(:collection) do |input, current, callback|
210
- # Create an RDF list
211
- objects = current[:object_list]
212
- list = RDF::List[*objects]
213
- list.each_statement do |statement|
214
- next if statement.predicate == RDF.type && statement.object == RDF.List
215
- callback.call(:statement, "collection", statement.subject, statement.predicate, statement.object)
216
- end
217
-
218
- # Return bnode as resource
219
- input[:resource] = list.subject
220
- end
221
-
222
- # [16] RDFLiteral ::= String ( LanguageTag | ( "^^" IRIref ) )?
223
- production(:RDFLiteral) do |input, current, callback|
224
- opts = {}
225
- opts[:datatype] = current[:resource] if current[:resource]
226
- opts[:language] = current[:lang] if current[:lang]
227
- input[:resource] = literal(current[:string_value], opts)
228
- end
28
+ terminal(:GRAPH, /graph/i)
29
+ terminal(:PREFIX, PREFIX)
30
+ terminal(:BASE, BASE)
31
+ terminal(:LANGTAG, LANGTAG)
229
32
 
230
33
  ##
231
34
  # Iterates the given block for each RDF statement in the input.
@@ -235,63 +38,28 @@ module RDF::TriG
235
38
  # @return [void]
236
39
  def each_statement(&block)
237
40
  if block_given?
41
+ @recovering = false
238
42
  @callback = block
239
43
 
240
- parse(@input, START.to_sym, @options.merge(:branch => BRANCH,
241
- :first => FIRST,
242
- :follow => FOLLOW,
243
- :reset_on_start => true)
244
- ) do |context, *data|
245
- case context
246
- when :context
247
- @context = data[1]
248
- when :statement
249
- data << @context if @context
250
- debug("each_statement") {"data: #{data.inspect}, context: #{@context.inspect}"}
251
- loc = data.shift
252
- s = RDF::Statement.from(data, :lineno => lineno)
253
- add_statement(loc, s) unless !s.valid? && validate?
254
- when :trace
255
- level, lineno, depth, *args = data
256
- message = "#{args.join(': ')}"
257
- d_str = depth > 100 ? ' ' * 100 + '+' : ' ' * depth
258
- str = "[#{lineno}](#{level})#{d_str}#{message}"
259
- case @options[:debug]
260
- when Array
261
- @options[:debug] << str
262
- when TrueClass
263
- $stderr.puts str
264
- when Integer
265
- $stderr.puts(str) if level <= @options[:debug]
266
- end
44
+ begin
45
+ while (@lexer.first rescue true)
46
+ read_trigDoc
267
47
  end
48
+ rescue EBNF::LL1::Lexer::Error, SyntaxError, EOFError, Recovery
49
+ # Terminate loop if EOF found while recovering
268
50
  end
269
- end
270
- enum_for(:each_statement)
271
- rescue EBNF::LL1::Parser::Error, EBNF::LL1::Lexer::Error => e
272
- if validate?
273
- raise RDF::ReaderError, e.message
274
- else
275
- $stderr.puts e.message
276
- end
277
- end
278
51
 
279
- ##
280
- # Iterates the given block for each RDF quad in the input.
281
- # This results in flattening each quad into a triple.
282
- #
283
- # @yield [subject, predicate, object]
284
- # @yieldparam [RDF::Resource] subject
285
- # @yieldparam [RDF::URI] predicate
286
- # @yieldparam [RDF::Value] object
287
- # @return [void]
288
- def each_triple(&block)
289
- if block_given?
290
- each_statement do |statement|
291
- block.call(*statement.to_triple)
52
+ if validate?
53
+ if !warnings.empty? && !@options[:warnings]
54
+ $stderr.puts "Warnings: #{warnings.join("\n")}"
55
+ end
56
+ if !errors.empty?
57
+ $stderr.puts "Errors: #{errors.join("\n")}" unless @options[:errors]
58
+ raise RDF::ReaderError, "Errors found during processing"
59
+ end
292
60
  end
293
61
  end
294
- enum_for(:each_quad)
62
+ enum_for(:each_statement)
295
63
  end
296
64
 
297
65
  ##
@@ -311,5 +79,156 @@ module RDF::TriG
311
79
  end
312
80
  enum_for(:each_quad)
313
81
  end
82
+
83
+ # add a statement, object can be literal or URI or bnode
84
+ #
85
+ # @param [Symbol] production
86
+ # @param [RDF::Statement] statement the subject of the statement
87
+ # @return [RDF::Statement] Added statement
88
+ # @raise [RDF::ReaderError] Checks parameter types and raises if they are incorrect if parsing mode is _validate_.
89
+ def add_statement(production, statement)
90
+ error("Statement is invalid: #{statement.inspect.inspect}", production: produciton) if validate? && statement.invalid?
91
+ statement.context = @graph_name if @graph_name
92
+ @callback.call(statement) if statement.subject &&
93
+ statement.predicate &&
94
+ statement.object &&
95
+ (validate? ? statement.valid? : true)
96
+ end
97
+
98
+ protected
99
+ # @return [Object]
100
+ def read_trigDoc
101
+ prod(:trigDoc, %(} .)) do
102
+ read_directive || read_block
103
+ end
104
+ end
105
+
106
+ # @return [Object]
107
+ def read_block
108
+ prod(:block, %(})) do
109
+ @graph_name = nil
110
+ token = @lexer.first
111
+ case token && (token.type || token.value)
112
+ when :GRAPH
113
+ @lexer.shift
114
+ @graph_name = read_labelOrSubject || error("Expected label or subject", production: :block, token: @lexer.first)
115
+ read_wrappedGraph || error("Expected wrappedGraph", production: :block, token: @lexer.first)
116
+ @graph_name = nil
117
+ when :IRIREF, :BLANK_NODE_LABEL, :ANON, :PNAME_LN, :PNAME_NS
118
+ read_triplesOrGraph || error("Expected triplesOrGraph", production: :block, token: @lexer.first)
119
+ when '{'
120
+ read_wrappedGraph || error("Expected wrappedGraph", production: :block, token: @lexer.first)
121
+ when '(', '['
122
+ read_triples2 || error("Expected collection or blankNodePropertyList", production: :block, token: @lexer.first)
123
+ when nil
124
+ # End of input
125
+ else
126
+ error("Unexpected token", production: :block, token: @lexer.first)
127
+ end
128
+ end
129
+ end
130
+
131
+ # @return [Object]
132
+ def read_triplesOrGraph
133
+ while name = read_labelOrSubject
134
+ prod(:triplesOrGraph, %(} .)) do
135
+ token = @lexer.first
136
+ case token && token.value
137
+ when '{'
138
+ @graph_name = name
139
+ read_wrappedGraph || error("Expected wrappedGraph", production: :triplesOrGraph, token: @lexer.first)
140
+ @graph_name = nil
141
+ true
142
+ else
143
+ read_predicateObjectList(name) || error("Expected predicateObjectList", production: :triplesOrGraph, token: @lexer.first)
144
+ unless @recovering
145
+ # If recovering, we will have eaten the closing '.'
146
+ token = @lexer.shift
147
+ unless token && token.value == '.'
148
+ error("Expected '.' following triple", production: :triplesOrGraph, token: token)
149
+ end
150
+ end
151
+ end
152
+ end
153
+ end
154
+ true
155
+ end
156
+
157
+ # @return [Object]
158
+ def read_triples2
159
+ token = @lexer.first
160
+ case token && token.value
161
+ when '['
162
+ prod(:triples2) do
163
+ # blankNodePropertyList predicateObjectList?
164
+ subject = read_blankNodePropertyList || error("Failed to parse blankNodePropertyList", production: :triples2, token: @lexer.first)
165
+ read_predicateObjectList(subject)
166
+ if !@recovering || @lexer.first === '.'
167
+ # If recovering, we will have eaten the closing '.'
168
+ token = @lexer.shift
169
+ unless token && token.value == '.'
170
+ error("Expected '.' following triple", production: :triples2, token: token)
171
+ end
172
+ end
173
+ true
174
+ end
175
+ when '('
176
+ prod(:triples2) do
177
+ subject = read_collection || error("Failed to parse read_collection", production: :triples2, token: @lexer.first)
178
+ token = @lexer.first
179
+ case token && (token.type || token.value)
180
+ when 'a', :IRIREF, :PNAME_LN, :PNAME_NS then read_predicateObjectList(subject)
181
+ else error("Expected predicateObjectList after collection subject", production: :triples2, token: token)
182
+ end
183
+ if !@recovering || @lexer.first === '.'
184
+ # If recovering, we will have eaten the closing '.'
185
+ token = @lexer.shift
186
+ unless token && token.value == '.'
187
+ error("Expected '.' following triple", production: :triples2, token: token)
188
+ end
189
+ end
190
+ true
191
+ end
192
+ end
193
+ end
194
+
195
+ # @return [Object]
196
+ def read_wrappedGraph
197
+ token = @lexer.first
198
+ if token && token.value == '{'
199
+ prod(:wrappedGraph, %w(})) do
200
+ @lexer.shift
201
+ while read_triplesBlock
202
+ # Read until nothing found
203
+ end
204
+ if !@recovering || @lexer.first === '}'
205
+ # If recovering, we will have eaten the closing '}'
206
+ token = @lexer.shift
207
+ unless token && token.value == '}'
208
+ error("Expected '}' following triple", production: :wrappedGraph, token: token)
209
+ end
210
+ end
211
+ true
212
+ end
213
+ end
214
+ end
215
+
216
+ # @return [Object]
217
+ def read_triplesBlock
218
+ prod(:triplesBlock, %w(.)) do
219
+ while (token = @lexer.first) && token.value != '}' && read_triples
220
+ break unless @lexer.first === '.'
221
+ @lexer.shift
222
+ end
223
+ end
224
+ end
225
+
226
+ # @return [RDF::Resource]
227
+ def read_labelOrSubject
228
+ prod(:labelOrSubject) do
229
+ read_iri || read_BlankNode
230
+ end
231
+ end
232
+
314
233
  end # class Reader
315
234
  end # module RDF::Turtle