rdf-csv 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 99b5481fe4c21f0fedceb1fbd8660cb520415d6a
4
+ data.tar.gz: 606cbbeed5e25354ef019484e558667e9064235c
5
+ SHA512:
6
+ metadata.gz: 694ce180441e8c15dc36c5ed59833ee0dc704d2e129a243858826f4e881b740b145ed9762cbd8565fa3f9d8beb8681146b663656f9a139a93daffe7ed2979623
7
+ data.tar.gz: 62556f047564cf65a11f58d2a394c8b24a589058258e9ac46447cea0e2d312422592e62d0e46fd830b97fe3db2b6aefe2b7ca577c6cd14335b75027f4610aa8b
data/AUTHORS ADDED
@@ -0,0 +1 @@
1
+ * Gregg Kellogg <gregg@greggkellogg.net>
@@ -0,0 +1,77 @@
1
+ # Tabular Data RDF Reader and JSON serializer
2
+
3
+ [RDF-CSV][] reader for [RDF.rb][] and fully JSON serializer.
4
+
5
+ [![Gem Version](https://badge.fury.io/rb/rdf-csv.png)](http://badge.fury.io/rb/rdf-csv)
6
+ [![Build Status](https://secure.travis-ci.org/ruby-rdf/rdf-csv.png?branch=master)](http://travis-ci.org/ruby-rdf/rdf-csv)
7
+
8
+ ## Features
9
+
10
+ RDF::CSV parses and serializes CSV or other Tabular Data into [RDF][] and JSON.
11
+
12
+ Install with `gem install rdf-csv`
13
+
14
+ ## Examples
15
+
16
+ require 'rubygems'
17
+ require 'rdf/csv
18
+
19
+ ## RDF Reader
20
+ {RDF::CSV} also acts as a normal RDF reader, using the standard RDF.rb Reader interface:
21
+
22
+ graph = RDF::Graph.load("etc/doap.csv")
23
+
24
+
25
+ ## Documentation
26
+ Full documentation available on [RubyDoc](http://rubydoc.info/gems/rdf-csv/file/README.md)
27
+
28
+
29
+ ### Principal Classes
30
+ * {RDF::CSV}
31
+ * {RDF::CSV::JSON}
32
+ * {RDF::CSV::Format}
33
+ * {RDF::CSV::Metadata}
34
+ * {RDF::CSV::Reader}
35
+
36
+ ## Dependencies
37
+ * [Ruby](http://ruby-lang.org/) (>= 1.9.2)
38
+ * [RDF.rb](http://rubygems.org/gems/rdf) (>= 1.0)
39
+ * [JSON](https://rubygems.org/gems/json) (>= 1.5)
40
+
41
+ ## Installation
42
+ The recommended installation method is via [RubyGems](http://rubygems.org/).
43
+ To install the latest official release of the `RDF::CSV` gem, do:
44
+
45
+ % [sudo] gem install rdf-csv
46
+
47
+ ## Mailing List
48
+ * <http://lists.w3.org/Archives/Public/public-rdf-ruby/>
49
+
50
+ ## Author
51
+ * [Gregg Kellogg](http://github.com/gkellogg) - <http://greggkellogg.net/>
52
+
53
+ ## Contributing
54
+ * Do your best to adhere to the existing coding conventions and idioms.
55
+ * Don't use hard tabs, and don't leave trailing whitespace on any line.
56
+ * Do document every method you add using [YARD][] annotations. Read the
57
+ [tutorial][YARD-GS] or just look at the existing code for examples.
58
+ * Don't touch the `json-ld.gemspec`, `VERSION` or `AUTHORS` files. If you need to
59
+ change them, do so on your private branch only.
60
+ * Do feel free to add yourself to the `CREDITS` file and the corresponding
61
+ list in the the `README`. Alphabetical order applies.
62
+ * Do note that in order for us to merge any non-trivial changes (as a rule
63
+ of thumb, additions larger than about 15 lines of code), we need an
64
+ explicit [public domain dedication][PDD] on record from you.
65
+
66
+ License
67
+ -------
68
+
69
+ This is free and unencumbered public domain software. For more information,
70
+ see <http://unlicense.org/> or the accompanying {file:UNLICENSE} file.
71
+
72
+ [Ruby]: http://ruby-lang.org/
73
+ [RDF]: http://www.w3.org/RDF/
74
+ [YARD]: http://yardoc.org/
75
+ [YARD-GS]: http://rubydoc.info/docs/yard/file/docs/GettingStarted.md
76
+ [PDD]: http://lists.w3.org/Archives/Public/public-rdf-ruby/2010May/0013.html
77
+ [RDF.rb]: http://rubygems.org/gems/rdf
@@ -0,0 +1,24 @@
1
+ This is free and unencumbered software released into the public domain.
2
+
3
+ Anyone is free to copy, modify, publish, use, compile, sell, or
4
+ distribute this software, either in source code form or as a compiled
5
+ binary, for any purpose, commercial or non-commercial, and by any
6
+ means.
7
+
8
+ In jurisdictions that recognize copyright laws, the author or authors
9
+ of this software dedicate any and all copyright interest in the
10
+ software to the public domain. We make this dedication for the benefit
11
+ of the public at large and to the detriment of our heirs and
12
+ successors. We intend this dedication to be an overt act of
13
+ relinquishment in perpetuity of all present and future rights to this
14
+ software under copyright law.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
+ IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
23
+
24
+ For more information, please refer to <http://unlicense.org/>
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.0
@@ -0,0 +1,17 @@
1
+ $:.unshift(File.expand_path("..", __FILE__))
2
+ require 'rdf' # @see http://rubygems.org/gems/rdf
3
+
4
+ module RDF
5
+ ##
6
+ # **`RDF::CSV`** is a CSV extension for RDF.rb.
7
+ #
8
+ # @see http://w3c.github.io/csvw/
9
+ #
10
+ # @author [Gregg Kellogg](http://greggkellogg.net/)
11
+ module LD
12
+ require 'rdf/csv/format'
13
+ autoload :Reader, 'rdf/csv/reader'
14
+ autoload :VERSION, 'rdf/csv/version'
15
+ autoload :Writer, 'rdf/csv/writer'
16
+ end
17
+ end
@@ -0,0 +1,45 @@
1
+ module RDF::CSV
2
+ ##
3
+ # Tabular Data/CSV format specification.
4
+ #
5
+ # @example Obtaining a CSV format class
6
+ # RDF::Format.for(:csv) #=> RDF::CSV::Format
7
+ # RDF::Format.for(:tsv) #=> RDF::CSV::Format
8
+ # RDF::Format.for("etc/foaf.csv")
9
+ # RDF::Format.for("etc/foaf.tsv")
10
+ # RDF::Format.for(:file_name => "etc/foaf.csv")
11
+ # RDF::Format.for(:file_name => "etc/foaf.tsv")
12
+ # RDF::Format.for(:file_extension => "csv")
13
+ # RDF::Format.for(:file_extension => "tsv")
14
+ # RDF::Format.for(:content_type => "text/csv")
15
+ # RDF::Format.for(:content_type => "text/tab-separated-values")
16
+ #
17
+ # @example Obtaining serialization format MIME types
18
+ # RDF::Format.content_types #=> {"text/csv" => [RDF::CSV::Format]}
19
+ #
20
+ # @example Obtaining serialization format file extension mappings
21
+ # RDF::Format.file_extensions #=> {:csv => "text/csv"}
22
+ #
23
+ # @see http://www.w3.org/TR/rdf-testcases/#ntriples
24
+ class Format < RDF::Format
25
+ content_type 'text/csv',
26
+ extensions: [:csv, :tsv],
27
+ alias: 'text/tab-separated-values'
28
+ content_encoding 'utf-8'
29
+
30
+ reader { RDF::CSV::Reader }
31
+
32
+ ##
33
+ # Sample detection to see if it matches JSON-LD
34
+ #
35
+ # Use a text sample to detect the format of an input file. Sub-classes implement
36
+ # a matcher sufficient to detect probably format matches, including disambiguating
37
+ # between other similar formats.
38
+ #
39
+ # @param [String] sample Beginning several bytes (~ 1K) of input.
40
+ # @return [Boolean]
41
+ def self.detect(sample)
42
+ !!sample.match(/^(?:(?:\w )+,(?:\w ))$/)
43
+ end
44
+ end
45
+ end
File without changes
@@ -0,0 +1,359 @@
1
+ require 'json'
2
+ require 'json/ld'
3
+ require 'bcp47'
4
+
5
+ ##
6
+ # CSVM Metadata processor
7
+ #
8
+ # * Extracts Metadata from file or Hash definition
9
+ # * Merges multiple Metadata definitions
10
+ # * Extract Metadata from a CSV file
11
+ # * Return table-level annotations
12
+ # * Return Column-level annotations
13
+ # * Return row iterator with column information
14
+ #
15
+ # @author [Gregg Kellogg](http://greggkellogg.net/)
16
+ module RDF::CSV
17
+ class Metadata < Hash
18
+ TABLE_GROUP_PROPERTIES = %(
19
+ resources schema table-direction dialect templates @type
20
+ ).map(&:to_sym).freeze
21
+ TABLE_GROUP_REQUIRED = [].freeze
22
+
23
+ TABLE_PROPERTIES = %(
24
+ @id schema notes table-direction templates dialect @type
25
+ ).map(&:to_sym).freeze
26
+ TABLE_REQUIRED = [:@id].freeze
27
+
28
+ DIALECT_DEFAULTS = {
29
+ commentPrefix: nil,
30
+ delimiter: ",".freeze,
31
+ doubleQuote: true,
32
+ encoding: "utf-8".freeze,
33
+ header: true,
34
+ headerColumnnCount: 0,
35
+ headerRowCount: 1,
36
+ lineTerminator: %r(\r?\n) # SPEC says "\r\n",
37
+ quoteChar: '"',
38
+ skipBlankRows: false,
39
+ skipColumns: 0,
40
+ skipInitialSpace: false,
41
+ skipRows: 0,
42
+ trim: false,
43
+ "@type" => nil
44
+ }.freeze
45
+
46
+ TEMPLATE_PROPERTIES = %(
47
+ targetFormat templateFormat title source @type
48
+ ).map(&:to_sym).freeze
49
+ TEMPLATE_REQUIRED = %(targetFormat templateFormat).map(&:to_sym).freeze
50
+
51
+ SCHEMA_PROPERTIES = %(
52
+ columns primaryKey foreignKeys uriTemplate @type
53
+ ).map(&:to_sym).freeze
54
+ SCHEMA_REQUIRED = [].freeze
55
+
56
+ COLUMN_PROPERTIES = %(
57
+ name title required @type
58
+ ).map(&:to_sym).freeze
59
+ COLUMN_REQUIRED = [:name].freeze
60
+
61
+ INHERITED_PROPERTIES = %w(
62
+ null language text-direction separator format datatype
63
+ length minLength maxLength minimum maximum
64
+ minInclusive maxInclusive minExclusive maxExclusive
65
+ ).map(&:to_sym).freeze
66
+
67
+ # Type of this Metadata
68
+ # @return [:TableGroup, :Table, :Template, :Schema, :Column]
69
+ attr_reader :type
70
+
71
+ # Parent of this Metadata (TableGroup for Table, ...)
72
+ # @return [Metadata]
73
+ attr_reader :parent
74
+
75
+ # Attempt to retrieve the file at the specified path. If it is valid metadata, create a new Metadata object from it, otherwise, an empty Metadata object
76
+ #
77
+ # @param [String] path
78
+ # @param [Hash{Symbol => Object}] options
79
+ # see `RDF::Util::File.open_file` in RDF.rb
80
+ def self.open(path, options = {})
81
+ RDF::Util::File.open_file(path, options) {|file| Metadata.initialize(file, options)}
82
+ end
83
+
84
+ # Create Metadata from IO, Hash or String
85
+ #
86
+ # @param [Metadata, Hash, #read, #to_s] input
87
+ # @param [Hash{Symbol => Object}] options
88
+ # @option options [:TableGroup, :Table, :Template, :Schema, :Column] :type
89
+ # Type of schema, if not set, intuited from properties
90
+ # @return [Metadata]
91
+ def initialize(input, options = {})
92
+ @options = options.dup
93
+
94
+ object = case
95
+ when input.is_a?(Metadata) then return input
96
+ when input.respond_to?(:read) then ::JSON.parse(input.read)
97
+ when input.is_a?(Hash) then input
98
+ else ::JSON.parse(input.to_s)
99
+ end
100
+
101
+ if options[:type]
102
+ @type = options[:type]
103
+ raise "If provided, type must be one of :TableGroup, :Table, :Template, :Schema, :Column]" unless
104
+ [:TableGroup, :Table, :Template, :Schema, :Column].include?(@type)
105
+ end
106
+
107
+ # Parent of this Metadata, if any
108
+ @parent = options[:parent]
109
+
110
+ # Metadata is object with symbolic keys
111
+ object.each do |key, value|
112
+ key = key.to_sym
113
+ case key
114
+ when :columns
115
+ # An array of template specifications that provide mechanisms to transform the tabular data into other formats
116
+ self[key] = if value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
117
+ value.map {|v| Metadata.new(v, @options.merge(type: :Column, parent: self))}
118
+ else
119
+ # Invalid, but preserve value
120
+ value
121
+ end
122
+ when :dialect
123
+ # If provided, dialect provides hints to processors about how to parse the referenced file to create a tabular data model.
124
+ self[key] = case value
125
+ when Hash then Metadata.new(value, @options.merge(type: :Dialect, parent: self))
126
+ else
127
+ # Invalid, but preserve value
128
+ value
129
+ end
130
+ when :resources
131
+ # An array of table descriptions for the tables in the group.
132
+ @type ||= :TableGroup
133
+ self[key] = if value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
134
+ value.map {|v| Metadata.new(v, @options.merge(type: :Table, parent: self))}
135
+ else
136
+ # Invalid, but preserve value
137
+ value
138
+ end
139
+ when :schema
140
+ # An object property that provides a schema description as described in section 3.8 Schemas, for all the tables in the group. This may be provided as an embedded object within the JSON metadata or as a URL reference to a separate JSON schema document
141
+ self[key] = case value
142
+ when String then Metadata.open(value, @options.merge(type: :Schema, parent: self))
143
+ when Hash then Metadata.new(value, @options.merge(type: :Schema, parent: self))
144
+ else
145
+ # Invalid, but preserve value
146
+ value
147
+ end
148
+ when :templates
149
+ # An array of template specifications that provide mechanisms to transform the tabular data into other formats
150
+ self[key] = if value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
151
+ value.map {|v| Metadata.new(v, @options.merge(type: :Template, parent: self))}
152
+ else
153
+ # Invalid, but preserve value
154
+ value
155
+ end
156
+ when :targetFormat, :templateFormat, :source
157
+ @type ||= :Template
158
+ self[key] = value
159
+ when :primaryKey, :foreignKeys, :uriTemplate
160
+ @type ||= :Schema
161
+ self[key] = value
162
+ when :name, :required
163
+ @type ||= :Column
164
+ self[key] = value
165
+ when :@id
166
+ # URL of CSV relative to metadata
167
+ # XXX: base from @context, or location of last loaded metadata, or CSV itself. Need to keep track of file base when loading and merging
168
+ @location = @base.join(value)
169
+ else
170
+ self[key] = value
171
+ end
172
+ end
173
+
174
+ # Set type from @type, if present and not otherwise defined
175
+ @type ||= self[:@type] if self[:@type]
176
+ end
177
+
178
+ # Do we have valid metadata?
179
+ def valid?
180
+ validate!
181
+ true
182
+ rescue
183
+ false
184
+ end
185
+
186
+ # Raise error if metadata has any unexpected properties
187
+ def validate!
188
+ expected_props, required_props = case type
189
+ when :TableGroup then [TABLE_GROUP_PROPERTIES, TABLE_GROUP_REQUIRED]
190
+ when :Table then [TABLE_PROPERTIES, TABLE_REQUIRED]
191
+ when :Dialect then [DIALECT_DEFAULTS.keys, []]
192
+ when :Template then [TEMPLATE_PROPERTIES, TEMPLATE_REQUIRED]
193
+ when :Schema then [SCHEMA_PROPERTIES, SCHEMA_REQUIRED]
194
+ when :Column then [COLUMN_PROPERTIES, COLUMN_REQUIRED]
195
+ else
196
+ raise "Unknown metadata type: #{type}"
197
+ end
198
+ expected_props = expected_props.merge(INHERITED_PROPERTIES)
199
+
200
+ # It has only expected properties
201
+ raise "#{type} has unexpected keys: #{keys}" unless keys.all? {|k| expected_proper.include?(k) || k.to_s.include?(':')}
202
+
203
+ # It has required properties
204
+ raise "#{type} missing required keys: #{keys}" unless (required_props - keys) == required_props
205
+
206
+ # Every property is valid
207
+ each do |key, value|
208
+ is_valid = case key.to_s
209
+ when /:/ then true
210
+ when :columns then value.is_a?(Array) && value.all? {|v| v.is_a?(Metadata) && v.type == :Column && v.valid?}
211
+ when :commentPrefix then value.is_a?(String) && value.length == 1
212
+ when :datatype then value.is_a?(String) # FIXME validate against defined datatypes?
213
+ when :delimiter then value.is_a?(String) && value.length == 1
214
+ when :dialect then value.is_a?(Metadata) && v.type == :Dialect && value.valid?
215
+ when :doubleQuote then value == TrueClass || value == FalseClass
216
+ when :encoding then Encoding.find(value)
217
+ when :format then value.is_a?(String)
218
+ when :header then value == TrueClass || value == FalseClass
219
+ when :headerColumnCount then value.is_a?(String) && value.length == 1
220
+ when :headerRowCount then value.is_a?(String) && value.length == 1
221
+ when :length
222
+ value.is_a?(Number) && value.integer? && value >= 0 &&
223
+ self.fetch(:minLength, value) == value &&
224
+ self.fetch(:maxLength, value) == value
225
+ when :language then BCP47::Language.identify(value)
226
+ when :lineTerminator then value.is_a?(String)
227
+ when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
228
+ value.is_a?(Number) ||
229
+ RDF::Literal::Date.new(value).valid? ||
230
+ RDF::Literal::Time.new(value).valid? ||
231
+ RDF::Literal::DateTime.new(value).valid?
232
+ when :minLength, :maxLength
233
+ value.is_a?(Number) && value.integer? && value >= 0
234
+ when :name then value.is_a?(String)
235
+ when :notes then value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
236
+ when :null then value.is_a?(String)
237
+ when :quoteChar then value.is_a?(String) && value.length == 1
238
+ when :required then value == TrueClass || value == FalseClass
239
+ when :resources then value.is_a?(Array) && value.all? {|v| v.is_a?(Metadata) && v.type == :Table && v.valid?}
240
+ when :schema then value.is_a?(Metadata) && value.type == :Schema && value.valid?
241
+ when :separator then value.is_a?(String) && value.length == 1
242
+ when :skipInitialSpace then value == TrueClass || value == FalseClass
243
+ when :skipBlankRows then value == TrueClass || value == FalseClass
244
+ when :skipColumns then value.is_a?(Number) && value.integer? && value >= 0
245
+ when :skipRows then value.is_a?(Number) && value.integer? && value >= 0
246
+ when :source then %w(json rdf).include?(value)
247
+ when :"table-direction" then %w(rtl ltr default).include?(value)
248
+ when :targetFormat, :templateFormat then RDF::URI(value).valid?
249
+ when :templates then value.is_a?(Array) && value.all? {|v| v.is_a?(Metadata) && v.type == :Template && v.valid?}
250
+ when :"text-direction" then %w(rtl ltr).include?(value)
251
+ when :title then valid_natural_language_property?(value)
252
+ when :trim then value == TrueClass || value == FalseClass || %w(true false start end).include?(value)
253
+ when :urlTemplate then value.is_a?(String)
254
+ when :"@id" then @location.valid?
255
+ when :"@type" then value.to_sym == type
256
+ when :primaryKey
257
+ # A column reference property that holds either a single reference to a column description object or an array of references.
258
+ Array(value).all? do |k|
259
+ self.columns.any? {|c| c.name == k}
260
+ end
261
+ when :foreignKey
262
+ # An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
263
+ value.is_a?(Array) && value.all? do |fk|
264
+ raise "Foreign key must be an object" unless fk.is_a?(Hash)
265
+ columns, reference = fk['columns'], fk['reference']
266
+ raise "Foreign key missing columns and reference" unless columns && reference
267
+ raise "Foreign key has extra keys" unless fk.keys.length == 2
268
+ raise "Foreign key must reference columns" unless Array(columns).all? {|k| self.columns.any? {|c| c.name == k}}
269
+ raise "Foreign key resference must be an Object" unless reference.is-a?(Hash)
270
+
271
+ if reference.has_key?('resource')
272
+ raise "Foreign key having a resource reference, must not have a schema" if reference.has_key?('schema')
273
+ # FIXME resource is a URL of a specific resource (table) which must exist
274
+ elsif reference.has_key('schema')
275
+ # FIXME schema is a URL of a specific schema which must exist
276
+ end
277
+ # FIXME: columns
278
+ end
279
+ else
280
+ raise "?!?! shouldn't get here"
281
+ end
282
+
283
+ raise "#{type} has invalid #{key}: #{value.inspect}" unless is_valid
284
+ end
285
+ end
286
+
287
+ # Using Metadata, extract a new Metadata document from the file or data provided
288
+ #
289
+ # @param [#read, Array<String>, #to_s] table_data IO, or file path
290
+ # @param [Hash{Symbol => Object}] options
291
+ # any additional options (see `RDF::Util::File.open_file`)
292
+ # @return [Metadata]
293
+ def file_metadata(table_data, options = {})
294
+ header_rows = []
295
+ CSV.new(table_data.respond_to?(:read) ? table_data : table_data.to_s) do |csv|
296
+ csv.shift for i in 0..skipRows.to_i # Skip initial lines
297
+ for i in 0..(headerRowCount || 1) do
298
+ csv.shift.each_with_index {|value, index| header_rows[index] << value}
299
+ end
300
+ end
301
+
302
+ # Join each header row value
303
+ end
304
+
305
+ # Merge metadata into this a copy of this metadata
306
+ def merge(metadata)
307
+ self.dup.merge(Metadata.new(metadata))
308
+ end
309
+
310
+ # Merge metadata into self
311
+ def merge!(metadata)
312
+ other = Metadata.new(other)
313
+ # XXX ...
314
+ end
315
+
316
+ # Return Table-level metadata with inherited properties merged. If IO is
317
+ # provided, read CSV-level metadata from that file and merge
318
+ #
319
+ # @param [String, #to_s] id of Table if metadata is a TableGroup
320
+ # @param [#read, Hash, Array<Array<String>>] file IO, or Hash or Array of Arrays of column info
321
+ def table_data(id, file = nil)
322
+ table = if table_group?
323
+ data = table_group[id.to_s]
324
+ raise "No table with id #{id}" unless data
325
+ data = data.dup
326
+ inherited_properties.each do |p, v|
327
+ data.merge_property_value(p, v)
328
+ end
329
+ data
330
+ else
331
+ self.dup
332
+ end
333
+
334
+ if file
335
+ table.merge!(file_metadata(file))
336
+ else
337
+ table
338
+ end
339
+ end
340
+
341
+ # Return expanded annotation properties
342
+ # @return [Hash{String => Object}] FIXME
343
+ def expanded_annotation_properties
344
+ end
345
+
346
+ # Logic for accessing elements as accessors
347
+ def method_missing(method, *args)
348
+ if DIALECT_DEFAULTS.has_key?(method.to_sym)
349
+ # As set, or with default
350
+ self.fetch(method, DIALECT_DEFAULTS(method.to_sym))
351
+ elsif INHERITED_PROPERTIES.has_key?(method.to_sym)
352
+ # Inherited properties
353
+ self.fetch(method.to_sym, parent ? parent.send(method) : nil)
354
+ elsif method.to_sym == :name
355
+ # If not set, name comes from title
356
+ self.fetch(:name, self[:title])
357
+ end
358
+ end
359
+ end
@@ -0,0 +1,148 @@
1
+ require 'rdf'
2
+
3
+ module RDF::CSV
4
+ ##
5
+ # A Tabular Data to RDF parser in Ruby.
6
+ #
7
+ # @author [Gregg Kellogg](http://greggkellogg.net/)
8
+ class Reader < RDF::Reader
9
+ format Format
10
+
11
+ # Metadata associated with the CSV
12
+ #
13
+ # @return [Metadata]
14
+ attr_reader :metadata
15
+
16
+ ##
17
+ # Open a CSV file or URI. Also attempts to load relevant metadata
18
+ #
19
+ # @param [String, #to_s] filename
20
+ # @param [Hash{Symbol => Object}] options
21
+ # see `RDF::Util::File.open_file` in RDF.rb
22
+ # @yield [reader]
23
+ # @yieldparam [RDF::CSV::Reader] reader
24
+ # @yieldreturn [void] ignored
25
+ def self.open(filename, options = {}, &block)
26
+ Util::File.open_file(filename, options) do |file|
27
+ # load link metadata, if available
28
+ metadata = if file.respond_to?(:links)
29
+ link = file.links.find_link(%w(rel describedby))
30
+ Metadata.open(link, options)
31
+ end
32
+
33
+ # Otherwise, look for metadata based on filename
34
+ metadata ||= Metadata.open("#{filename}-metadata.json", options)
35
+
36
+ # Otherwise, look for metadata in directory
37
+ metadata ||= Metadata.open(RDF::URI(filename).join("metadata.json"), options)
38
+
39
+ if metadata
40
+ # Merge options
41
+ metadata.merge!(options[:metadata]) if options[:metadata]
42
+ else
43
+ # Just use options
44
+ metadata = options[:metadata]
45
+ end
46
+
47
+ # Return an open CSV with possible block
48
+ RDF::CSV::Reader.new(file, options.merge(metadata: metadata), &block)
49
+ end
50
+ end
51
+
52
+ ##
53
+ # Initializes the RDF::CSV Reader instance.
54
+ #
55
+ # @param [IO, File, String] input
56
+ # @param [Hash{Symbol => Object}] options
57
+ # any additional options (see `RDF::Reader#initialize`)
58
+ # @option options [Metadata, Hash] :metadata extracted when file opened
59
+ # @option options [Metadata, Hash] :user_metadata user supplied metadata, merged on top of extracted metadata
60
+ # @yield [reader] `self`
61
+ # @yieldparam [RDF::Reader] reader
62
+ # @yieldreturn [void] ignored
63
+ # @raise [RDF::ReaderError] if the CSV document cannot be loaded
64
+ def initialize(input = $stdin, options = {}, &block)
65
+ options[:base_uri] ||= options[:base]
66
+ super do
67
+ @options[:base] ||= base_uri.to_s if base_uri
68
+ # Construct metadata from that passed from file open, along with information from the file.
69
+ @metadata = Metadata.new(options[:metadata]).table_data(base_uri, input)
70
+
71
+ # Merge any user-supplied metadata
72
+ # SPEC CONFUSION: Note issue described in https://github.com/w3c/csvw/issues/76#issuecomment-65914880
73
+ @metadata.merge(Metadata.new(options[:user_metadata])) if options[:user_metadata]
74
+ @doc = input.respond_to?(:read) ? input : StringIO.new(input.to_s)
75
+
76
+ if block_given?
77
+ case block.arity
78
+ when 0 then instance_eval(&block)
79
+ else block.call(self)
80
+ end
81
+ end
82
+ end
83
+ end
84
+
85
+ ##
86
+ # @private
87
+ # @see RDF::Reader#each_statement
88
+ def each_statement(&block)
89
+ if block_given?
90
+ @callback = block
91
+
92
+ # Output Table-Level RDF triples
93
+ # SPEC FIXME: csvw:Table, not csv:Table
94
+ add_triple(0, RDF::URI(metadata.id), RDF.type, CSVW.Table) if metadata.type?
95
+
96
+ # Output other table-level metadata
97
+ # SPEC AMBIGUITY(2RDF):
98
+ # output all optional properties in DC space? (they're typically defined in CSVM space)
99
+ # output all namespaced properties?
100
+ # output all non-namespaced properties which aren't specifically defined in CSVM in DC space?
101
+ # We assume to only output namesspaced-properties
102
+ metadata.expanded_annotation_properties.each do |prop, values|
103
+ Array(value).each do |v|
104
+ # Assume prop and value(s) are in RDF form? or expand here?
105
+ add_triple(0, metadata.uri, RDF::URI(prop), v)
106
+ end
107
+ end
108
+
109
+ # SPEC CONFUSION(2RDF):
110
+ # Where to output column-level, vs. cell-level metadata?
111
+ metadata.columns.each do |column|
112
+ # SPEC FIXME: Output csvw:Column, if set
113
+ add_triple(0, RDF::URI(column.uri), RDF.type, CSVW.Column) if column.type?
114
+ column.expanded_annotation_properties.each do |prop, values|
115
+ Array(value).each do |v|
116
+ # Assume prop and value(s) are in RDF form? or expand here?
117
+ add_triple(0, RDF::URI(column.uri), RDF::URI(prop), v)
118
+ end
119
+ end
120
+ end
121
+
122
+ # Output Cell-Level RDF triples
123
+ metadata.rows.each do |row|
124
+ # Output row-level metadata
125
+ add_triple(row.rownum, RDF::URI(row.uri), CSVW.row, RDF::Literal::Integer(row.rownum))
126
+ add_triple(row.rownum, RDF::URI(row.uri), RDF.type, CSVW.Row) if row.type?
127
+ row.columns.each_with_index do |column|
128
+ add_triple("#{row.rownum}", RDF::URI(row.uri), RDF::URI(column.uri), column.rdf_value)
129
+ end
130
+ end
131
+ end
132
+ enum_for(:each_statement)
133
+ end
134
+
135
+ ##
136
+ # @private
137
+ # @see RDF::Reader#each_triple
138
+ def each_triple(&block)
139
+ if block_given?
140
+ each_statement do |statement|
141
+ block.call(*statement.to_triple)
142
+ end
143
+ end
144
+ enum_for(:each_triple)
145
+ end
146
+ end
147
+ end
148
+
metadata ADDED
@@ -0,0 +1,171 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rdf-csv
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Gregg Kellogg
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-12-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rdf
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.1'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.1.7
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '1.1'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.1.7
33
+ - !ruby/object:Gem::Dependency
34
+ name: yard
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '0.8'
40
+ type: :development
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '0.8'
47
+ - !ruby/object:Gem::Dependency
48
+ name: rspec
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '3.0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '3.0'
61
+ - !ruby/object:Gem::Dependency
62
+ name: rspec-its
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '1.0'
68
+ type: :development
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '1.0'
75
+ - !ruby/object:Gem::Dependency
76
+ name: rdf-spec
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: '1.1'
82
+ type: :development
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '1.1'
89
+ - !ruby/object:Gem::Dependency
90
+ name: rdf-turtle
91
+ requirement: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - "~>"
94
+ - !ruby/object:Gem::Version
95
+ version: '1.1'
96
+ type: :development
97
+ prerelease: false
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - "~>"
101
+ - !ruby/object:Gem::Version
102
+ version: '1.1'
103
+ - !ruby/object:Gem::Dependency
104
+ name: rdf-isomorphic
105
+ requirement: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: '1.1'
110
+ type: :development
111
+ prerelease: false
112
+ version_requirements: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - "~>"
115
+ - !ruby/object:Gem::Version
116
+ version: '1.1'
117
+ - !ruby/object:Gem::Dependency
118
+ name: rdf-xsd
119
+ requirement: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - "~>"
122
+ - !ruby/object:Gem::Version
123
+ version: '1.1'
124
+ type: :development
125
+ prerelease: false
126
+ version_requirements: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - "~>"
129
+ - !ruby/object:Gem::Version
130
+ version: '1.1'
131
+ description: RDF::CSV processes tabular data with metadata creating RDF or JSON output.
132
+ email: public-rdf-ruby@w3.org
133
+ executables: []
134
+ extensions: []
135
+ extra_rdoc_files: []
136
+ files:
137
+ - AUTHORS
138
+ - README.md
139
+ - UNLICENSE
140
+ - VERSION
141
+ - lib/rdf/csv.rb
142
+ - lib/rdf/csv/format.rb
143
+ - lib/rdf/csv/json.rb
144
+ - lib/rdf/csv/metadata.rb
145
+ - lib/rdf/csv/reader.rb
146
+ homepage: http://github.com/ruby-rdf/rdf-csv
147
+ licenses:
148
+ - Public Domain
149
+ metadata: {}
150
+ post_install_message:
151
+ rdoc_options: []
152
+ require_paths:
153
+ - lib
154
+ required_ruby_version: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - ">="
157
+ - !ruby/object:Gem::Version
158
+ version: 1.9.2
159
+ required_rubygems_version: !ruby/object:Gem::Requirement
160
+ requirements:
161
+ - - ">="
162
+ - !ruby/object:Gem::Version
163
+ version: '0'
164
+ requirements: []
165
+ rubyforge_project:
166
+ rubygems_version: 2.4.3
167
+ signing_key:
168
+ specification_version: 4
169
+ summary: Tabular Data RDF Reader and JSON serializer.
170
+ test_files: []
171
+ has_rdoc: false