rdf-turtle 1.0.3 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -45,6 +45,36 @@ In some cases, the specification is unclear on certain issues:
45
45
  datatype are saved as non-datatyped triples in the graph. This will be updated in the future when the rest of the
46
46
  library suite is brought up to date with RDF 1.1.
47
47
 
48
+ ### Freebase-specific Reader
49
+ There is a special reader useful for processing [Freebase Dumps][]. To invoke
50
+ this, add the `:freebase => true` option to the {RDF::Turtle::Reader.new}, or
51
+ use {RDF::Turtle::FreebaseReader} directly. As with {RDF::Turtle::Reader},
52
+ prefix definitions may be passed in using the `:prefixes` option to
53
+ {RDF::Turtle::FreebaseReader#initialize} using the standard mechanism defined
54
+ for `RDF::Reader`.
55
+
56
+ The [Freebase Dumps][] have a very normalized form, similar to N-Triples but
57
+ with prefixes. They also have a large amount of garbage. This Reader is
58
+ optimized for this format and will perform faster error recovery.
59
+
60
+ An example of reading Freebase dumps:
61
+
62
+ require "rdf/turtle"
63
+ fb = "../freebase/freebase-rdf-2013-03-03-00-00.ttl"
64
+ fb_prefixes = {
65
+ :ns => "http://rdf.freebase.com/ns/",
66
+ :key => "http://rdf.freebase.com/key/",
67
+ :owl => "http://www.w3.org/2002/07/owl#>",
68
+ :rdfs => "http://www.w3.org/2000/01/rdf-schema#",
69
+ :rdf => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
70
+ :xsd => "http://www.w3.org/2001/XMLSchema#"
71
+ }
72
+ RDF::Turtle::Reader.open(fb,
73
+ :freebase => true,
74
+ :prefixes => fb_prefixes) do |r|
75
+
76
+ r.each_statement {|stmt| puts stmt.to_ntriples}
77
+ end
48
78
  ## Implementation Notes
49
79
  The reader uses the [EBNF][] gem to generate first, follow and branch tables, and uses
50
80
  the `Parser` and `Lexer` modules to implement the Turtle parser.
@@ -102,4 +132,4 @@ see <http://unlicense.org/> or the accompanying {file:UNLICENSE} file.
102
132
  [Turtle]: http://www.w3.org/TR/2012/WD-turtle-20120710/
103
133
  [Turtle doc]: http://rubydoc.info/github/ruby-rdf/rdf-turtle/master/file/README.md
104
134
  [Turtle EBNF]: http://dvcs.w3.org/hg/rdf/file/default/rdf-turtle/turtle.bnf
105
- [Swap]: http://www.w3.org/2000/10/swap/
135
+ [Freebase Dumps]: https://developers.google.com/freebase/data
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.3
1
+ 1.0.4
@@ -21,10 +21,11 @@ module RDF
21
21
  # @author [Gregg Kellogg](http://greggkellogg.net/)
22
22
  module Turtle
23
23
  require 'rdf/turtle/format'
24
- autoload :Reader, 'rdf/turtle/reader'
25
- autoload :Terminals, 'rdf/turtle/terminals'
26
- autoload :VERSION, 'rdf/turtle/version'
27
- autoload :Writer, 'rdf/turtle/writer'
24
+ autoload :Reader, 'rdf/turtle/reader'
25
+ autoload :FreebaseReader, 'rdf/turtle/freebase_reader'
26
+ autoload :Terminals, 'rdf/turtle/terminals'
27
+ autoload :VERSION, 'rdf/turtle/version'
28
+ autoload :Writer, 'rdf/turtle/writer'
28
29
 
29
30
  def self.debug?; @debug; end
30
31
  def self.debug=(value); @debug = value; end
@@ -0,0 +1,96 @@
1
+ require 'rdf'
2
+ require 'rdf/ntriples'
3
+
4
+ module RDF::Turtle
5
+ ##
6
+ # Parser specifically for Freebase, which has a very regular form.
7
+ #
8
+ # @see https://developers.google.com/freebase/data
9
+ class FreebaseReader < RDF::NTriples::Reader
10
+ include RDF::Turtle::Terminals
11
+
12
+ def self.format; RDF::Turtle::Format; end
13
+
14
+ ##
15
+ # Extension to N-Triples reader, includes reading
16
+ # pnames and prefixes
17
+ def read_triple
18
+ loop do
19
+ begin
20
+ readline.strip!
21
+ line = @line
22
+ unless blank? || read_prefix
23
+ subject = read_pname(:intern => true) || fail_subject
24
+ predicate = read_pname(:intern => true) || fail_predicate
25
+ object = read_pname || read_uriref || read_boolean || read_numeric || read_literal || fail_object
26
+ if validate? && !read_eos
27
+ raise RDF::ReaderError, "expected end of statement in line #{lineno}: #{current_line.inspect}"
28
+ end
29
+ return [subject, predicate, object]
30
+ end
31
+ rescue RDF::ReaderError => e
32
+ raise e if validate?
33
+ $stderr.puts e.message
34
+ end
35
+ end
36
+ end
37
+
38
+ ##
39
+ # Read a prefix of the form `@prefix pfx: <uri> .
40
+ #
41
+ # Add prefix definition to `prefixes`
42
+ # @return [RDF::URI]
43
+ def read_prefix
44
+ if prefix_str = match(/^@prefix\s+(\w+:\s+#{IRIREF})\s*.$/)
45
+ prefix, iri = prefix_str.split(/:\s+/)
46
+ return nil unless iri
47
+ prefix(prefix, iri[1..-2])
48
+ end
49
+ end
50
+
51
+ ##
52
+ # Read a PNAME of the form `prefix:suffix`.
53
+ # @return [RDF::URI]
54
+ def read_pname(options = {})
55
+ if pname_str = match(/^(\w+:\S+)/)
56
+ ns, suffix = pname_str.split(':', 2)
57
+ if suffix[-1,1] == "."
58
+ suffix.chop! # Remove end of statement
59
+ @line.insert(0, ".")
60
+ end
61
+ pfx_iri = prefix(ns)
62
+ raise RDF::ReaderError, "prefix #{ns.inspect} is not defined" unless pfx_iri
63
+ uri = RDF::URI(pfx_iri + suffix)
64
+ uri.validate! if validate?
65
+ uri
66
+ end
67
+ rescue ArgumentError => e
68
+ raise RDF::ReaderError, "invalid PName"
69
+ end
70
+
71
+ ##
72
+ # Read a numeric value
73
+ # @return [RDF::Literal::Integer, RDF::Literal::Float, RDF::Literal::Double]
74
+ def read_numeric
75
+ case
76
+ when double_str = match(/^(#{DOUBLE})/)
77
+ double_str = double_str.sub(/\.([eE])/, '.0\1')
78
+ RDF::Literal::Double.new(double_str, :canonicalize => canonicalize?)
79
+ when decimal_str = match(/^(#{DECIMAL})/)
80
+ decimal_str = "0#{decimal_str}" if decimal_str[0,1] == "."
81
+ RDF::Literal::Decimal.new(decimal_str, :canonicalize => canonicalize?)
82
+ when integer_str = match(/^(#{INTEGER})/)
83
+ RDF::Literal::Integer.new(integer_str, :canonicalize => canonicalize?)
84
+ end
85
+ end
86
+
87
+ ##
88
+ # Read a boolean value
89
+ # @return [RDF::Literal::Boolean]
90
+ def read_boolean
91
+ if bool_str = match(/^(true|false)/)
92
+ RDF::Literal::Boolean.new(bool_str, :canonicalize => canonicalize?)
93
+ end
94
+ end
95
+ end # class Reader
96
+ end # module RDF::Turtle
@@ -18,11 +18,7 @@ module RDF::Turtle
18
18
  input[:resource] = self.bnode(token.value[2..-1])
19
19
  end
20
20
  terminal(:IRIREF, IRIREF, :unescape => true) do |prod, token, input|
21
- begin
22
- input[:resource] = process_iri(token.value[1..-2])
23
- rescue ArgumentError => e
24
- raise RDF::ReaderError, e.message
25
- end
21
+ input[:resource] = process_iri(token.value[1..-2])
26
22
  end
27
23
  terminal(:DOUBLE, DOUBLE) do |prod, token, input|
28
24
  # Note that a Turtle Double may begin with a '.[eE]', so tack on a leading
@@ -204,6 +200,21 @@ module RDF::Turtle
204
200
  input[:resource] = literal(current[:string_value], opts)
205
201
  end
206
202
 
203
+ ##
204
+ # Redirect for Freebase Reader
205
+ #
206
+ # @private
207
+ def self.new(input = nil, options = {}, &block)
208
+ klass = if options[:freebase]
209
+ FreebaseReader
210
+ else
211
+ self
212
+ end
213
+ reader = klass.allocate
214
+ reader.send(:initialize, input, options, &block)
215
+ reader
216
+ end
217
+
207
218
  ##
208
219
  # Initializes a new reader instance.
209
220
  #
@@ -228,18 +239,29 @@ module RDF::Turtle
228
239
  # the parser will attempt to recover from errors.
229
240
  # @option options [Boolean] :progress
230
241
  # Show progress of parser productions
231
- # @option options [Boolean] :debug
232
- # Detailed debug output
242
+ # @option options [Boolean, Integer, Array] :debug
243
+ # Detailed debug output. If set to an Integer, output is restricted
244
+ # to messages of that priority: `0` for errors, `1` for warnings,
245
+ # `2` for processor tracing, and anything else for various levels
246
+ # of debug. If set to an Array, information is collected in the array
247
+ # instead of being output to `$stderr`.
248
+ # @option options [Boolean] :freebase (false)
249
+ # Use optimized Freebase reader
233
250
  # @return [RDF::Turtle::Reader]
234
251
  def initialize(input = nil, options = {}, &block)
235
252
  super do
236
253
  @options = {
237
254
  :anon_base => "b0",
238
255
  :validate => false,
239
- :debug => RDF::Turtle.debug?,
240
256
  }.merge(options)
241
257
  @options = {:prefixes => {nil => ""}}.merge(@options) unless @options[:validate]
258
+ @options[:debug] ||= case
259
+ when RDF::Turtle.debug? then true
260
+ when @options[:progress] then 2
261
+ when @options[:validate] then 1
262
+ end
242
263
 
264
+ @options[:base_uri] = RDF::URI(base_uri || "")
243
265
  debug("base IRI") {base_uri.inspect}
244
266
 
245
267
  debug("validate") {validate?.inspect}
@@ -273,13 +295,27 @@ module RDF::Turtle
273
295
  :follow => FOLLOW,
274
296
  :reset_on_start => true)
275
297
  ) do |context, *data|
276
- loc = data.shift
277
298
  case context
278
299
  when :statement
279
- add_statement(loc, RDF::Statement.from(data))
300
+ loc = data.shift
301
+ s = RDF::Statement.from(data, :lineno => lineno)
302
+ add_statement(loc, s) unless !s.valid? && validate?
303
+ when :trace
304
+ level, lineno, depth, *args = data
305
+ message = "#{args.join(': ')}"
306
+ d_str = depth > 100 ? ' ' * 100 + '+' : ' ' * depth
307
+ str = "[#{lineno}](#{level})#{d_str}#{message}"
308
+ case @options[:debug]
309
+ when Array
310
+ @options[:debug] << str
311
+ when TrueClass
312
+ $stderr.puts str
313
+ when Integer
314
+ $stderr.puts(str) if level <= @options[:debug]
315
+ end
280
316
  end
281
317
  end
282
- rescue ArgumentError, EBNF::LL1::Parser::Error => e
318
+ rescue EBNF::LL1::Parser::Error => e
283
319
  progress("Parsing completed with errors:\n\t#{e.message}")
284
320
  raise RDF::ReaderError, e.message if validate?
285
321
  end
@@ -305,25 +341,23 @@ module RDF::Turtle
305
341
  # @return [RDF::Statement] Added statement
306
342
  # @raise [RDF::ReaderError] Checks parameter types and raises if they are incorrect if parsing mode is _validate_.
307
343
  def add_statement(node, statement)
308
- error(node, "Statement is invalid: #{statement.inspect.inspect}") unless statement.valid?
344
+ error(node, "Statement is invalid: #{statement.inspect.inspect}") if validate? && statement.invalid?
309
345
  progress(node) {"generate statement: #{statement.to_ntriples}"}
310
- @callback.call(statement)
346
+ @callback.call(statement) if statement.subject &&
347
+ statement.predicate &&
348
+ statement.object &&
349
+ (validate? ? statement.valid? : true)
311
350
  end
312
351
 
352
+ # Process a URI against base
313
353
  def process_iri(iri)
314
- iri(base_uri, iri)
315
- end
316
-
317
- # Create IRIs
318
- def iri(value, append = nil)
319
- value = RDF::URI.new(value)
320
- value = value.join(append) if append
321
- value.validate! if validate? && value.respond_to?(:validate)
354
+ value = base_uri.join(iri)
355
+ value.validate! if validate?
322
356
  value.canonicalize! if canonicalize?
323
357
  value = RDF::URI.intern(value) if intern?
324
358
  value
325
359
  end
326
-
360
+
327
361
  # Create a literal
328
362
  def literal(value, options = {})
329
363
  options = options.dup
metadata CHANGED
@@ -1,141 +1,166 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rdf-turtle
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: 1.0.4
5
+ prerelease:
5
6
  platform: ruby
6
7
  authors:
7
8
  - Gregg Kellogg
8
- autorequire:
9
+ autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2013-03-28 00:00:00.000000000 Z
12
+ date: 2013-04-01 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  name: rdf
15
- requirement: !ruby/object:Gem::Requirement
16
+ version_requirements: !ruby/object:Gem::Requirement
16
17
  requirements:
17
- - - ! '>='
18
+ - - ">="
18
19
  - !ruby/object:Gem::Version
19
20
  version: '1.0'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
21
+ none: false
22
+ requirement: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ! '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.0'
27
+ none: false
28
+ prerelease: false
29
+ type: :runtime
27
30
  - !ruby/object:Gem::Dependency
28
31
  name: ebnf
32
+ version_requirements: !ruby/object:Gem::Requirement
33
+ requirements:
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: 0.3.0
37
+ none: false
29
38
  requirement: !ruby/object:Gem::Requirement
30
39
  requirements:
31
- - - ! '>='
40
+ - - ">="
32
41
  - !ruby/object:Gem::Version
33
- version: 0.2.1
34
- type: :runtime
42
+ version: 0.3.0
43
+ none: false
35
44
  prerelease: false
45
+ type: :runtime
46
+ - !ruby/object:Gem::Dependency
47
+ name: open-uri-cached
36
48
  version_requirements: !ruby/object:Gem::Requirement
37
49
  requirements:
38
- - - ! '>='
50
+ - - ">="
39
51
  - !ruby/object:Gem::Version
40
- version: 0.2.1
41
- - !ruby/object:Gem::Dependency
42
- name: open-uri-cached
52
+ version: 0.0.5
53
+ none: false
43
54
  requirement: !ruby/object:Gem::Requirement
44
55
  requirements:
45
- - - ! '>='
56
+ - - ">="
46
57
  - !ruby/object:Gem::Version
47
58
  version: 0.0.5
48
- type: :development
59
+ none: false
49
60
  prerelease: false
61
+ type: :development
62
+ - !ruby/object:Gem::Dependency
63
+ name: rspec
50
64
  version_requirements: !ruby/object:Gem::Requirement
51
65
  requirements:
52
- - - ! '>='
66
+ - - ">="
53
67
  - !ruby/object:Gem::Version
54
- version: 0.0.5
55
- - !ruby/object:Gem::Dependency
56
- name: rspec
68
+ version: 2.12.0
69
+ none: false
57
70
  requirement: !ruby/object:Gem::Requirement
58
71
  requirements:
59
- - - ! '>='
72
+ - - ">="
60
73
  - !ruby/object:Gem::Version
61
74
  version: 2.12.0
62
- type: :development
75
+ none: false
63
76
  prerelease: false
77
+ type: :development
78
+ - !ruby/object:Gem::Dependency
79
+ name: rdf-isomorphic
64
80
  version_requirements: !ruby/object:Gem::Requirement
65
81
  requirements:
66
- - - ! '>='
82
+ - - ">="
67
83
  - !ruby/object:Gem::Version
68
- version: 2.12.0
69
- - !ruby/object:Gem::Dependency
70
- name: rdf-isomorphic
84
+ version: !binary |-
85
+ MA==
86
+ none: false
71
87
  requirement: !ruby/object:Gem::Requirement
72
88
  requirements:
73
- - - ! '>='
89
+ - - ">="
74
90
  - !ruby/object:Gem::Version
75
- version: '0'
76
- type: :development
91
+ version: !binary |-
92
+ MA==
93
+ none: false
77
94
  prerelease: false
95
+ type: :development
96
+ - !ruby/object:Gem::Dependency
97
+ name: json-ld
78
98
  version_requirements: !ruby/object:Gem::Requirement
79
99
  requirements:
80
- - - ! '>='
100
+ - - ">="
81
101
  - !ruby/object:Gem::Version
82
- version: '0'
83
- - !ruby/object:Gem::Dependency
84
- name: json-ld
102
+ version: !binary |-
103
+ MA==
104
+ none: false
85
105
  requirement: !ruby/object:Gem::Requirement
86
106
  requirements:
87
- - - ! '>='
107
+ - - ">="
88
108
  - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :development
109
+ version: !binary |-
110
+ MA==
111
+ none: false
91
112
  prerelease: false
113
+ type: :development
114
+ - !ruby/object:Gem::Dependency
115
+ name: yard
92
116
  version_requirements: !ruby/object:Gem::Requirement
93
117
  requirements:
94
- - - ! '>='
118
+ - - ">="
95
119
  - !ruby/object:Gem::Version
96
- version: '0'
97
- - !ruby/object:Gem::Dependency
98
- name: yard
120
+ version: 0.8.3
121
+ none: false
99
122
  requirement: !ruby/object:Gem::Requirement
100
123
  requirements:
101
- - - ! '>='
124
+ - - ">="
102
125
  - !ruby/object:Gem::Version
103
126
  version: 0.8.3
104
- type: :development
127
+ none: false
105
128
  prerelease: false
129
+ type: :development
130
+ - !ruby/object:Gem::Dependency
131
+ name: rdf-spec
106
132
  version_requirements: !ruby/object:Gem::Requirement
107
133
  requirements:
108
- - - ! '>='
134
+ - - ">="
109
135
  - !ruby/object:Gem::Version
110
- version: 0.8.3
111
- - !ruby/object:Gem::Dependency
112
- name: rdf-spec
136
+ version: '1.0'
137
+ none: false
113
138
  requirement: !ruby/object:Gem::Requirement
114
139
  requirements:
115
- - - ! '>='
140
+ - - ">="
116
141
  - !ruby/object:Gem::Version
117
142
  version: '1.0'
118
- type: :development
143
+ none: false
119
144
  prerelease: false
145
+ type: :development
146
+ - !ruby/object:Gem::Dependency
147
+ name: rake
120
148
  version_requirements: !ruby/object:Gem::Requirement
121
149
  requirements:
122
- - - ! '>='
150
+ - - ">="
123
151
  - !ruby/object:Gem::Version
124
- version: '1.0'
125
- - !ruby/object:Gem::Dependency
126
- name: rake
152
+ version: !binary |-
153
+ MA==
154
+ none: false
127
155
  requirement: !ruby/object:Gem::Requirement
128
156
  requirements:
129
- - - ! '>='
157
+ - - ">="
130
158
  - !ruby/object:Gem::Version
131
- version: '0'
132
- type: :development
159
+ version: !binary |-
160
+ MA==
161
+ none: false
133
162
  prerelease: false
134
- version_requirements: !ruby/object:Gem::Requirement
135
- requirements:
136
- - - ! '>='
137
- - !ruby/object:Gem::Version
138
- version: '0'
163
+ type: :development
139
164
  description: RDF::Turtle is an Turtle reader/writer for the RDF.rb library suite.
140
165
  email: public-rdf-ruby@w3.org
141
166
  executables: []
@@ -147,36 +172,42 @@ files:
147
172
  - History
148
173
  - UNLICENSE
149
174
  - VERSION
175
+ - lib/rdf/turtle.rb
150
176
  - lib/rdf/turtle/format.rb
177
+ - lib/rdf/turtle/freebase_reader.rb
151
178
  - lib/rdf/turtle/meta.rb
152
179
  - lib/rdf/turtle/reader.rb
153
180
  - lib/rdf/turtle/terminals.rb
154
181
  - lib/rdf/turtle/version.rb
155
182
  - lib/rdf/turtle/writer.rb
156
- - lib/rdf/turtle.rb
157
183
  homepage: http://github.com/ruby-rdf/rdf-turtle
158
184
  licenses:
159
185
  - Public Domain
160
- metadata: {}
161
- post_install_message:
186
+ post_install_message:
162
187
  rdoc_options: []
163
188
  require_paths:
164
189
  - lib
165
190
  required_ruby_version: !ruby/object:Gem::Requirement
166
191
  requirements:
167
- - - ! '>='
192
+ - - ">="
168
193
  - !ruby/object:Gem::Version
169
194
  version: 1.8.1
195
+ none: false
170
196
  required_rubygems_version: !ruby/object:Gem::Requirement
171
197
  requirements:
172
- - - ! '>='
198
+ - - ">="
173
199
  - !ruby/object:Gem::Version
174
- version: '0'
200
+ segments:
201
+ - 0
202
+ version: !binary |-
203
+ MA==
204
+ hash: 2
205
+ none: false
175
206
  requirements: []
176
207
  rubyforge_project: rdf-turtle
177
- rubygems_version: 2.0.3
178
- signing_key:
179
- specification_version: 4
208
+ rubygems_version: 1.8.24
209
+ signing_key:
210
+ specification_version: 3
180
211
  summary: Turtle reader/writer for Ruby.
181
212
  test_files: []
182
213
  has_rdoc: false
checksums.yaml DELETED
@@ -1,15 +0,0 @@
1
- ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: !binary |-
4
- NmFjNDZhYjI5YjkzMzUwMzNjMGVkZGQ2NDg1ZjIzZDRmOTZhZTFkOA==
5
- data.tar.gz: !binary |-
6
- ZjRmYTU5N2I2MzU5OGQzMmNjMzFkNDhkYzE5YWI5NTYxODZmYzg4Yw==
7
- !binary "U0hBNTEy":
8
- metadata.gz: !binary |-
9
- ODBiMWZhMDUzMDNjODhmOTc5MDZmODQ2YTYwMzYyN2I1MWYzMDRmZDZjMmU3
10
- MDFkODc0M2RlNDU4MjdhMGFlMzgxMWJmMWMzOTQxNDkwNjVkOGM5YWM4NTg3
11
- ZGJhNzJkMWVjM2E1Y2UzOWI2MDI0ZmY1YmM2NTFkZWY2ZWEwYTY=
12
- data.tar.gz: !binary |-
13
- NmFlMGUzMjZhMjQwNGVjMDViOGU4ODUwNTE1MmYyYmE1ZWU0YjA2YmEzYmI3
14
- YTY0ZDhiNzc0ZTUzYWFmNzY5NDRlNTJhNjkyMDdmMDc3ZWNjZmFlYzI5ZWEy
15
- NTIzMWEyYTE1MzlkYzQ0YWY1NjQzNjg2NjJkNjRmZDg1MDNjOTk=