rdf-csv 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/AUTHORS +1 -0
- data/README.md +77 -0
- data/UNLICENSE +24 -0
- data/VERSION +1 -0
- data/lib/rdf/csv.rb +17 -0
- data/lib/rdf/csv/format.rb +45 -0
- data/lib/rdf/csv/json.rb +0 -0
- data/lib/rdf/csv/metadata.rb +359 -0
- data/lib/rdf/csv/reader.rb +148 -0
- metadata +171 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 99b5481fe4c21f0fedceb1fbd8660cb520415d6a
|
4
|
+
data.tar.gz: 606cbbeed5e25354ef019484e558667e9064235c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 694ce180441e8c15dc36c5ed59833ee0dc704d2e129a243858826f4e881b740b145ed9762cbd8565fa3f9d8beb8681146b663656f9a139a93daffe7ed2979623
|
7
|
+
data.tar.gz: 62556f047564cf65a11f58d2a394c8b24a589058258e9ac46447cea0e2d312422592e62d0e46fd830b97fe3db2b6aefe2b7ca577c6cd14335b75027f4610aa8b
|
data/AUTHORS
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
* Gregg Kellogg <gregg@greggkellogg.net>
|
data/README.md
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
# Tabular Data RDF Reader and JSON serializer
|
2
|
+
|
3
|
+
[RDF-CSV][] reader for [RDF.rb][] and fully JSON serializer.
|
4
|
+
|
5
|
+
[](http://badge.fury.io/rb/rdf-csv)
|
6
|
+
[](http://travis-ci.org/ruby-rdf/rdf-csv)
|
7
|
+
|
8
|
+
## Features
|
9
|
+
|
10
|
+
RDF::CSV parses and serializes CSV or other Tabular Data into [RDF][] and JSON.
|
11
|
+
|
12
|
+
Install with `gem install rdf-csv`
|
13
|
+
|
14
|
+
## Examples
|
15
|
+
|
16
|
+
require 'rubygems'
|
17
|
+
require 'rdf/csv
|
18
|
+
|
19
|
+
## RDF Reader
|
20
|
+
{RDF::CSV} also acts as a normal RDF reader, using the standard RDF.rb Reader interface:
|
21
|
+
|
22
|
+
graph = RDF::Graph.load("etc/doap.csv")
|
23
|
+
|
24
|
+
|
25
|
+
## Documentation
|
26
|
+
Full documentation available on [RubyDoc](http://rubydoc.info/gems/rdf-csv/file/README.md)
|
27
|
+
|
28
|
+
|
29
|
+
### Principal Classes
|
30
|
+
* {RDF::CSV}
|
31
|
+
* {RDF::CSV::JSON}
|
32
|
+
* {RDF::CSV::Format}
|
33
|
+
* {RDF::CSV::Metadata}
|
34
|
+
* {RDF::CSV::Reader}
|
35
|
+
|
36
|
+
## Dependencies
|
37
|
+
* [Ruby](http://ruby-lang.org/) (>= 1.9.2)
|
38
|
+
* [RDF.rb](http://rubygems.org/gems/rdf) (>= 1.0)
|
39
|
+
* [JSON](https://rubygems.org/gems/json) (>= 1.5)
|
40
|
+
|
41
|
+
## Installation
|
42
|
+
The recommended installation method is via [RubyGems](http://rubygems.org/).
|
43
|
+
To install the latest official release of the `RDF::CSV` gem, do:
|
44
|
+
|
45
|
+
% [sudo] gem install rdf-csv
|
46
|
+
|
47
|
+
## Mailing List
|
48
|
+
* <http://lists.w3.org/Archives/Public/public-rdf-ruby/>
|
49
|
+
|
50
|
+
## Author
|
51
|
+
* [Gregg Kellogg](http://github.com/gkellogg) - <http://greggkellogg.net/>
|
52
|
+
|
53
|
+
## Contributing
|
54
|
+
* Do your best to adhere to the existing coding conventions and idioms.
|
55
|
+
* Don't use hard tabs, and don't leave trailing whitespace on any line.
|
56
|
+
* Do document every method you add using [YARD][] annotations. Read the
|
57
|
+
[tutorial][YARD-GS] or just look at the existing code for examples.
|
58
|
+
* Don't touch the `json-ld.gemspec`, `VERSION` or `AUTHORS` files. If you need to
|
59
|
+
change them, do so on your private branch only.
|
60
|
+
* Do feel free to add yourself to the `CREDITS` file and the corresponding
|
61
|
+
list in the the `README`. Alphabetical order applies.
|
62
|
+
* Do note that in order for us to merge any non-trivial changes (as a rule
|
63
|
+
of thumb, additions larger than about 15 lines of code), we need an
|
64
|
+
explicit [public domain dedication][PDD] on record from you.
|
65
|
+
|
66
|
+
License
|
67
|
+
-------
|
68
|
+
|
69
|
+
This is free and unencumbered public domain software. For more information,
|
70
|
+
see <http://unlicense.org/> or the accompanying {file:UNLICENSE} file.
|
71
|
+
|
72
|
+
[Ruby]: http://ruby-lang.org/
|
73
|
+
[RDF]: http://www.w3.org/RDF/
|
74
|
+
[YARD]: http://yardoc.org/
|
75
|
+
[YARD-GS]: http://rubydoc.info/docs/yard/file/docs/GettingStarted.md
|
76
|
+
[PDD]: http://lists.w3.org/Archives/Public/public-rdf-ruby/2010May/0013.html
|
77
|
+
[RDF.rb]: http://rubygems.org/gems/rdf
|
data/UNLICENSE
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
This is free and unencumbered software released into the public domain.
|
2
|
+
|
3
|
+
Anyone is free to copy, modify, publish, use, compile, sell, or
|
4
|
+
distribute this software, either in source code form or as a compiled
|
5
|
+
binary, for any purpose, commercial or non-commercial, and by any
|
6
|
+
means.
|
7
|
+
|
8
|
+
In jurisdictions that recognize copyright laws, the author or authors
|
9
|
+
of this software dedicate any and all copyright interest in the
|
10
|
+
software to the public domain. We make this dedication for the benefit
|
11
|
+
of the public at large and to the detriment of our heirs and
|
12
|
+
successors. We intend this dedication to be an overt act of
|
13
|
+
relinquishment in perpetuity of all present and future rights to this
|
14
|
+
software under copyright law.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
19
|
+
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
20
|
+
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
21
|
+
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
|
24
|
+
For more information, please refer to <http://unlicense.org/>
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.0
|
data/lib/rdf/csv.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
$:.unshift(File.expand_path("..", __FILE__))
|
2
|
+
require 'rdf' # @see http://rubygems.org/gems/rdf
|
3
|
+
|
4
|
+
module RDF
|
5
|
+
##
|
6
|
+
# **`RDF::CSV`** is a CSV extension for RDF.rb.
|
7
|
+
#
|
8
|
+
# @see http://w3c.github.io/csvw/
|
9
|
+
#
|
10
|
+
# @author [Gregg Kellogg](http://greggkellogg.net/)
|
11
|
+
module LD
|
12
|
+
require 'rdf/csv/format'
|
13
|
+
autoload :Reader, 'rdf/csv/reader'
|
14
|
+
autoload :VERSION, 'rdf/csv/version'
|
15
|
+
autoload :Writer, 'rdf/csv/writer'
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module RDF::CSV
|
2
|
+
##
|
3
|
+
# Tabular Data/CSV format specification.
|
4
|
+
#
|
5
|
+
# @example Obtaining a CSV format class
|
6
|
+
# RDF::Format.for(:csv) #=> RDF::CSV::Format
|
7
|
+
# RDF::Format.for(:tsv) #=> RDF::CSV::Format
|
8
|
+
# RDF::Format.for("etc/foaf.csv")
|
9
|
+
# RDF::Format.for("etc/foaf.tsv")
|
10
|
+
# RDF::Format.for(:file_name => "etc/foaf.csv")
|
11
|
+
# RDF::Format.for(:file_name => "etc/foaf.tsv")
|
12
|
+
# RDF::Format.for(:file_extension => "csv")
|
13
|
+
# RDF::Format.for(:file_extension => "tsv")
|
14
|
+
# RDF::Format.for(:content_type => "text/csv")
|
15
|
+
# RDF::Format.for(:content_type => "text/tab-separated-values")
|
16
|
+
#
|
17
|
+
# @example Obtaining serialization format MIME types
|
18
|
+
# RDF::Format.content_types #=> {"text/csv" => [RDF::CSV::Format]}
|
19
|
+
#
|
20
|
+
# @example Obtaining serialization format file extension mappings
|
21
|
+
# RDF::Format.file_extensions #=> {:csv => "text/csv"}
|
22
|
+
#
|
23
|
+
# @see http://www.w3.org/TR/rdf-testcases/#ntriples
|
24
|
+
class Format < RDF::Format
|
25
|
+
content_type 'text/csv',
|
26
|
+
extensions: [:csv, :tsv],
|
27
|
+
alias: 'text/tab-separated-values'
|
28
|
+
content_encoding 'utf-8'
|
29
|
+
|
30
|
+
reader { RDF::CSV::Reader }
|
31
|
+
|
32
|
+
##
|
33
|
+
# Sample detection to see if it matches JSON-LD
|
34
|
+
#
|
35
|
+
# Use a text sample to detect the format of an input file. Sub-classes implement
|
36
|
+
# a matcher sufficient to detect probably format matches, including disambiguating
|
37
|
+
# between other similar formats.
|
38
|
+
#
|
39
|
+
# @param [String] sample Beginning several bytes (~ 1K) of input.
|
40
|
+
# @return [Boolean]
|
41
|
+
def self.detect(sample)
|
42
|
+
!!sample.match(/^(?:(?:\w )+,(?:\w ))$/)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/lib/rdf/csv/json.rb
ADDED
File without changes
|
@@ -0,0 +1,359 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'json/ld'
|
3
|
+
require 'bcp47'
|
4
|
+
|
5
|
+
##
|
6
|
+
# CSVM Metadata processor
|
7
|
+
#
|
8
|
+
# * Extracts Metadata from file or Hash definition
|
9
|
+
# * Merges multiple Metadata definitions
|
10
|
+
# * Extract Metadata from a CSV file
|
11
|
+
# * Return table-level annotations
|
12
|
+
# * Return Column-level annotations
|
13
|
+
# * Return row iterator with column information
|
14
|
+
#
|
15
|
+
# @author [Gregg Kellogg](http://greggkellogg.net/)
|
16
|
+
module RDF::CSV
|
17
|
+
class Metadata < Hash
|
18
|
+
TABLE_GROUP_PROPERTIES = %(
|
19
|
+
resources schema table-direction dialect templates @type
|
20
|
+
).map(&:to_sym).freeze
|
21
|
+
TABLE_GROUP_REQUIRED = [].freeze
|
22
|
+
|
23
|
+
TABLE_PROPERTIES = %(
|
24
|
+
@id schema notes table-direction templates dialect @type
|
25
|
+
).map(&:to_sym).freeze
|
26
|
+
TABLE_REQUIRED = [:@id].freeze
|
27
|
+
|
28
|
+
DIALECT_DEFAULTS = {
|
29
|
+
commentPrefix: nil,
|
30
|
+
delimiter: ",".freeze,
|
31
|
+
doubleQuote: true,
|
32
|
+
encoding: "utf-8".freeze,
|
33
|
+
header: true,
|
34
|
+
headerColumnnCount: 0,
|
35
|
+
headerRowCount: 1,
|
36
|
+
lineTerminator: %r(\r?\n) # SPEC says "\r\n",
|
37
|
+
quoteChar: '"',
|
38
|
+
skipBlankRows: false,
|
39
|
+
skipColumns: 0,
|
40
|
+
skipInitialSpace: false,
|
41
|
+
skipRows: 0,
|
42
|
+
trim: false,
|
43
|
+
"@type" => nil
|
44
|
+
}.freeze
|
45
|
+
|
46
|
+
TEMPLATE_PROPERTIES = %(
|
47
|
+
targetFormat templateFormat title source @type
|
48
|
+
).map(&:to_sym).freeze
|
49
|
+
TEMPLATE_REQUIRED = %(targetFormat templateFormat).map(&:to_sym).freeze
|
50
|
+
|
51
|
+
SCHEMA_PROPERTIES = %(
|
52
|
+
columns primaryKey foreignKeys uriTemplate @type
|
53
|
+
).map(&:to_sym).freeze
|
54
|
+
SCHEMA_REQUIRED = [].freeze
|
55
|
+
|
56
|
+
COLUMN_PROPERTIES = %(
|
57
|
+
name title required @type
|
58
|
+
).map(&:to_sym).freeze
|
59
|
+
COLUMN_REQUIRED = [:name].freeze
|
60
|
+
|
61
|
+
INHERITED_PROPERTIES = %w(
|
62
|
+
null language text-direction separator format datatype
|
63
|
+
length minLength maxLength minimum maximum
|
64
|
+
minInclusive maxInclusive minExclusive maxExclusive
|
65
|
+
).map(&:to_sym).freeze
|
66
|
+
|
67
|
+
# Type of this Metadata
|
68
|
+
# @return [:TableGroup, :Table, :Template, :Schema, :Column]
|
69
|
+
attr_reader :type
|
70
|
+
|
71
|
+
# Parent of this Metadata (TableGroup for Table, ...)
|
72
|
+
# @return [Metadata]
|
73
|
+
attr_reader :parent
|
74
|
+
|
75
|
+
# Attempt to retrieve the file at the specified path. If it is valid metadata, create a new Metadata object from it, otherwise, an empty Metadata object
|
76
|
+
#
|
77
|
+
# @param [String] path
|
78
|
+
# @param [Hash{Symbol => Object}] options
|
79
|
+
# see `RDF::Util::File.open_file` in RDF.rb
|
80
|
+
def self.open(path, options = {})
|
81
|
+
RDF::Util::File.open_file(path, options) {|file| Metadata.initialize(file, options)}
|
82
|
+
end
|
83
|
+
|
84
|
+
# Create Metadata from IO, Hash or String
|
85
|
+
#
|
86
|
+
# @param [Metadata, Hash, #read, #to_s] input
|
87
|
+
# @param [Hash{Symbol => Object}] options
|
88
|
+
# @option options [:TableGroup, :Table, :Template, :Schema, :Column] :type
|
89
|
+
# Type of schema, if not set, intuited from properties
|
90
|
+
# @return [Metadata]
|
91
|
+
def initialize(input, options = {})
|
92
|
+
@options = options.dup
|
93
|
+
|
94
|
+
object = case
|
95
|
+
when input.is_a?(Metadata) then return input
|
96
|
+
when input.respond_to?(:read) then ::JSON.parse(input.read)
|
97
|
+
when input.is_a?(Hash) then input
|
98
|
+
else ::JSON.parse(input.to_s)
|
99
|
+
end
|
100
|
+
|
101
|
+
if options[:type]
|
102
|
+
@type = options[:type]
|
103
|
+
raise "If provided, type must be one of :TableGroup, :Table, :Template, :Schema, :Column]" unless
|
104
|
+
[:TableGroup, :Table, :Template, :Schema, :Column].include?(@type)
|
105
|
+
end
|
106
|
+
|
107
|
+
# Parent of this Metadata, if any
|
108
|
+
@parent = options[:parent]
|
109
|
+
|
110
|
+
# Metadata is object with symbolic keys
|
111
|
+
object.each do |key, value|
|
112
|
+
key = key.to_sym
|
113
|
+
case key
|
114
|
+
when :columns
|
115
|
+
# An array of template specifications that provide mechanisms to transform the tabular data into other formats
|
116
|
+
self[key] = if value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
|
117
|
+
value.map {|v| Metadata.new(v, @options.merge(type: :Column, parent: self))}
|
118
|
+
else
|
119
|
+
# Invalid, but preserve value
|
120
|
+
value
|
121
|
+
end
|
122
|
+
when :dialect
|
123
|
+
# If provided, dialect provides hints to processors about how to parse the referenced file to create a tabular data model.
|
124
|
+
self[key] = case value
|
125
|
+
when Hash then Metadata.new(value, @options.merge(type: :Dialect, parent: self))
|
126
|
+
else
|
127
|
+
# Invalid, but preserve value
|
128
|
+
value
|
129
|
+
end
|
130
|
+
when :resources
|
131
|
+
# An array of table descriptions for the tables in the group.
|
132
|
+
@type ||= :TableGroup
|
133
|
+
self[key] = if value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
|
134
|
+
value.map {|v| Metadata.new(v, @options.merge(type: :Table, parent: self))}
|
135
|
+
else
|
136
|
+
# Invalid, but preserve value
|
137
|
+
value
|
138
|
+
end
|
139
|
+
when :schema
|
140
|
+
# An object property that provides a schema description as described in section 3.8 Schemas, for all the tables in the group. This may be provided as an embedded object within the JSON metadata or as a URL reference to a separate JSON schema document
|
141
|
+
self[key] = case value
|
142
|
+
when String then Metadata.open(value, @options.merge(type: :Schema, parent: self))
|
143
|
+
when Hash then Metadata.new(value, @options.merge(type: :Schema, parent: self))
|
144
|
+
else
|
145
|
+
# Invalid, but preserve value
|
146
|
+
value
|
147
|
+
end
|
148
|
+
when :templates
|
149
|
+
# An array of template specifications that provide mechanisms to transform the tabular data into other formats
|
150
|
+
self[key] = if value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
|
151
|
+
value.map {|v| Metadata.new(v, @options.merge(type: :Template, parent: self))}
|
152
|
+
else
|
153
|
+
# Invalid, but preserve value
|
154
|
+
value
|
155
|
+
end
|
156
|
+
when :targetFormat, :templateFormat, :source
|
157
|
+
@type ||= :Template
|
158
|
+
self[key] = value
|
159
|
+
when :primaryKey, :foreignKeys, :uriTemplate
|
160
|
+
@type ||= :Schema
|
161
|
+
self[key] = value
|
162
|
+
when :name, :required
|
163
|
+
@type ||= :Column
|
164
|
+
self[key] = value
|
165
|
+
when :@id
|
166
|
+
# URL of CSV relative to metadata
|
167
|
+
# XXX: base from @context, or location of last loaded metadata, or CSV itself. Need to keep track of file base when loading and merging
|
168
|
+
@location = @base.join(value)
|
169
|
+
else
|
170
|
+
self[key] = value
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
# Set type from @type, if present and not otherwise defined
|
175
|
+
@type ||= self[:@type] if self[:@type]
|
176
|
+
end
|
177
|
+
|
178
|
+
# Do we have valid metadata?
|
179
|
+
def valid?
|
180
|
+
validate!
|
181
|
+
true
|
182
|
+
rescue
|
183
|
+
false
|
184
|
+
end
|
185
|
+
|
186
|
+
# Raise error if metadata has any unexpected properties
|
187
|
+
def validate!
|
188
|
+
expected_props, required_props = case type
|
189
|
+
when :TableGroup then [TABLE_GROUP_PROPERTIES, TABLE_GROUP_REQUIRED]
|
190
|
+
when :Table then [TABLE_PROPERTIES, TABLE_REQUIRED]
|
191
|
+
when :Dialect then [DIALECT_DEFAULTS.keys, []]
|
192
|
+
when :Template then [TEMPLATE_PROPERTIES, TEMPLATE_REQUIRED]
|
193
|
+
when :Schema then [SCHEMA_PROPERTIES, SCHEMA_REQUIRED]
|
194
|
+
when :Column then [COLUMN_PROPERTIES, COLUMN_REQUIRED]
|
195
|
+
else
|
196
|
+
raise "Unknown metadata type: #{type}"
|
197
|
+
end
|
198
|
+
expected_props = expected_props.merge(INHERITED_PROPERTIES)
|
199
|
+
|
200
|
+
# It has only expected properties
|
201
|
+
raise "#{type} has unexpected keys: #{keys}" unless keys.all? {|k| expected_proper.include?(k) || k.to_s.include?(':')}
|
202
|
+
|
203
|
+
# It has required properties
|
204
|
+
raise "#{type} missing required keys: #{keys}" unless (required_props - keys) == required_props
|
205
|
+
|
206
|
+
# Every property is valid
|
207
|
+
each do |key, value|
|
208
|
+
is_valid = case key.to_s
|
209
|
+
when /:/ then true
|
210
|
+
when :columns then value.is_a?(Array) && value.all? {|v| v.is_a?(Metadata) && v.type == :Column && v.valid?}
|
211
|
+
when :commentPrefix then value.is_a?(String) && value.length == 1
|
212
|
+
when :datatype then value.is_a?(String) # FIXME validate against defined datatypes?
|
213
|
+
when :delimiter then value.is_a?(String) && value.length == 1
|
214
|
+
when :dialect then value.is_a?(Metadata) && v.type == :Dialect && value.valid?
|
215
|
+
when :doubleQuote then value == TrueClass || value == FalseClass
|
216
|
+
when :encoding then Encoding.find(value)
|
217
|
+
when :format then value.is_a?(String)
|
218
|
+
when :header then value == TrueClass || value == FalseClass
|
219
|
+
when :headerColumnCount then value.is_a?(String) && value.length == 1
|
220
|
+
when :headerRowCount then value.is_a?(String) && value.length == 1
|
221
|
+
when :length
|
222
|
+
value.is_a?(Number) && value.integer? && value >= 0 &&
|
223
|
+
self.fetch(:minLength, value) == value &&
|
224
|
+
self.fetch(:maxLength, value) == value
|
225
|
+
when :language then BCP47::Language.identify(value)
|
226
|
+
when :lineTerminator then value.is_a?(String)
|
227
|
+
when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
|
228
|
+
value.is_a?(Number) ||
|
229
|
+
RDF::Literal::Date.new(value).valid? ||
|
230
|
+
RDF::Literal::Time.new(value).valid? ||
|
231
|
+
RDF::Literal::DateTime.new(value).valid?
|
232
|
+
when :minLength, :maxLength
|
233
|
+
value.is_a?(Number) && value.integer? && value >= 0
|
234
|
+
when :name then value.is_a?(String)
|
235
|
+
when :notes then value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
|
236
|
+
when :null then value.is_a?(String)
|
237
|
+
when :quoteChar then value.is_a?(String) && value.length == 1
|
238
|
+
when :required then value == TrueClass || value == FalseClass
|
239
|
+
when :resources then value.is_a?(Array) && value.all? {|v| v.is_a?(Metadata) && v.type == :Table && v.valid?}
|
240
|
+
when :schema then value.is_a?(Metadata) && value.type == :Schema && value.valid?
|
241
|
+
when :separator then value.is_a?(String) && value.length == 1
|
242
|
+
when :skipInitialSpace then value == TrueClass || value == FalseClass
|
243
|
+
when :skipBlankRows then value == TrueClass || value == FalseClass
|
244
|
+
when :skipColumns then value.is_a?(Number) && value.integer? && value >= 0
|
245
|
+
when :skipRows then value.is_a?(Number) && value.integer? && value >= 0
|
246
|
+
when :source then %w(json rdf).include?(value)
|
247
|
+
when :"table-direction" then %w(rtl ltr default).include?(value)
|
248
|
+
when :targetFormat, :templateFormat then RDF::URI(value).valid?
|
249
|
+
when :templates then value.is_a?(Array) && value.all? {|v| v.is_a?(Metadata) && v.type == :Template && v.valid?}
|
250
|
+
when :"text-direction" then %w(rtl ltr).include?(value)
|
251
|
+
when :title then valid_natural_language_property?(value)
|
252
|
+
when :trim then value == TrueClass || value == FalseClass || %w(true false start end).include?(value)
|
253
|
+
when :urlTemplate then value.is_a?(String)
|
254
|
+
when :"@id" then @location.valid?
|
255
|
+
when :"@type" then value.to_sym == type
|
256
|
+
when :primaryKey
|
257
|
+
# A column reference property that holds either a single reference to a column description object or an array of references.
|
258
|
+
Array(value).all? do |k|
|
259
|
+
self.columns.any? {|c| c.name == k}
|
260
|
+
end
|
261
|
+
when :foreignKey
|
262
|
+
# An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
|
263
|
+
value.is_a?(Array) && value.all? do |fk|
|
264
|
+
raise "Foreign key must be an object" unless fk.is_a?(Hash)
|
265
|
+
columns, reference = fk['columns'], fk['reference']
|
266
|
+
raise "Foreign key missing columns and reference" unless columns && reference
|
267
|
+
raise "Foreign key has extra keys" unless fk.keys.length == 2
|
268
|
+
raise "Foreign key must reference columns" unless Array(columns).all? {|k| self.columns.any? {|c| c.name == k}}
|
269
|
+
raise "Foreign key resference must be an Object" unless reference.is-a?(Hash)
|
270
|
+
|
271
|
+
if reference.has_key?('resource')
|
272
|
+
raise "Foreign key having a resource reference, must not have a schema" if reference.has_key?('schema')
|
273
|
+
# FIXME resource is a URL of a specific resource (table) which must exist
|
274
|
+
elsif reference.has_key('schema')
|
275
|
+
# FIXME schema is a URL of a specific schema which must exist
|
276
|
+
end
|
277
|
+
# FIXME: columns
|
278
|
+
end
|
279
|
+
else
|
280
|
+
raise "?!?! shouldn't get here"
|
281
|
+
end
|
282
|
+
|
283
|
+
raise "#{type} has invalid #{key}: #{value.inspect}" unless is_valid
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
# Using Metadata, extract a new Metadata document from the file or data provided
|
288
|
+
#
|
289
|
+
# @param [#read, Array<String>, #to_s] table_data IO, or file path
|
290
|
+
# @param [Hash{Symbol => Object}] options
|
291
|
+
# any additional options (see `RDF::Util::File.open_file`)
|
292
|
+
# @return [Metadata]
|
293
|
+
def file_metadata(table_data, options = {})
|
294
|
+
header_rows = []
|
295
|
+
CSV.new(table_data.respond_to?(:read) ? table_data : table_data.to_s) do |csv|
|
296
|
+
csv.shift for i in 0..skipRows.to_i # Skip initial lines
|
297
|
+
for i in 0..(headerRowCount || 1) do
|
298
|
+
csv.shift.each_with_index {|value, index| header_rows[index] << value}
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
# Join each header row value
|
303
|
+
end
|
304
|
+
|
305
|
+
# Merge metadata into this a copy of this metadata
|
306
|
+
def merge(metadata)
|
307
|
+
self.dup.merge(Metadata.new(metadata))
|
308
|
+
end
|
309
|
+
|
310
|
+
# Merge metadata into self
|
311
|
+
def merge!(metadata)
|
312
|
+
other = Metadata.new(other)
|
313
|
+
# XXX ...
|
314
|
+
end
|
315
|
+
|
316
|
+
# Return Table-level metadata with inherited properties merged. If IO is
|
317
|
+
# provided, read CSV-level metadata from that file and merge
|
318
|
+
#
|
319
|
+
# @param [String, #to_s] id of Table if metadata is a TableGroup
|
320
|
+
# @param [#read, Hash, Array<Array<String>>] file IO, or Hash or Array of Arrays of column info
|
321
|
+
def table_data(id, file = nil)
|
322
|
+
table = if table_group?
|
323
|
+
data = table_group[id.to_s]
|
324
|
+
raise "No table with id #{id}" unless data
|
325
|
+
data = data.dup
|
326
|
+
inherited_properties.each do |p, v|
|
327
|
+
data.merge_property_value(p, v)
|
328
|
+
end
|
329
|
+
data
|
330
|
+
else
|
331
|
+
self.dup
|
332
|
+
end
|
333
|
+
|
334
|
+
if file
|
335
|
+
table.merge!(file_metadata(file))
|
336
|
+
else
|
337
|
+
table
|
338
|
+
end
|
339
|
+
end
|
340
|
+
|
341
|
+
# Return expanded annotation properties
|
342
|
+
# @return [Hash{String => Object}] FIXME
|
343
|
+
def expanded_annotation_properties
|
344
|
+
end
|
345
|
+
|
346
|
+
# Logic for accessing elements as accessors
|
347
|
+
def method_missing(method, *args)
|
348
|
+
if DIALECT_DEFAULTS.has_key?(method.to_sym)
|
349
|
+
# As set, or with default
|
350
|
+
self.fetch(method, DIALECT_DEFAULTS(method.to_sym))
|
351
|
+
elsif INHERITED_PROPERTIES.has_key?(method.to_sym)
|
352
|
+
# Inherited properties
|
353
|
+
self.fetch(method.to_sym, parent ? parent.send(method) : nil)
|
354
|
+
elsif method.to_sym == :name
|
355
|
+
# If not set, name comes from title
|
356
|
+
self.fetch(:name, self[:title])
|
357
|
+
end
|
358
|
+
end
|
359
|
+
end
|
@@ -0,0 +1,148 @@
|
|
1
|
+
require 'rdf'
|
2
|
+
|
3
|
+
module RDF::CSV
|
4
|
+
##
|
5
|
+
# A Tabular Data to RDF parser in Ruby.
|
6
|
+
#
|
7
|
+
# @author [Gregg Kellogg](http://greggkellogg.net/)
|
8
|
+
class Reader < RDF::Reader
|
9
|
+
format Format
|
10
|
+
|
11
|
+
# Metadata associated with the CSV
|
12
|
+
#
|
13
|
+
# @return [Metadata]
|
14
|
+
attr_reader :metadata
|
15
|
+
|
16
|
+
##
|
17
|
+
# Open a CSV file or URI. Also attempts to load relevant metadata
|
18
|
+
#
|
19
|
+
# @param [String, #to_s] filename
|
20
|
+
# @param [Hash{Symbol => Object}] options
|
21
|
+
# see `RDF::Util::File.open_file` in RDF.rb
|
22
|
+
# @yield [reader]
|
23
|
+
# @yieldparam [RDF::CSV::Reader] reader
|
24
|
+
# @yieldreturn [void] ignored
|
25
|
+
def self.open(filename, options = {}, &block)
|
26
|
+
Util::File.open_file(filename, options) do |file|
|
27
|
+
# load link metadata, if available
|
28
|
+
metadata = if file.respond_to?(:links)
|
29
|
+
link = file.links.find_link(%w(rel describedby))
|
30
|
+
Metadata.open(link, options)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Otherwise, look for metadata based on filename
|
34
|
+
metadata ||= Metadata.open("#{filename}-metadata.json", options)
|
35
|
+
|
36
|
+
# Otherwise, look for metadata in directory
|
37
|
+
metadata ||= Metadata.open(RDF::URI(filename).join("metadata.json"), options)
|
38
|
+
|
39
|
+
if metadata
|
40
|
+
# Merge options
|
41
|
+
metadata.merge!(options[:metadata]) if options[:metadata]
|
42
|
+
else
|
43
|
+
# Just use options
|
44
|
+
metadata = options[:metadata]
|
45
|
+
end
|
46
|
+
|
47
|
+
# Return an open CSV with possible block
|
48
|
+
RDF::CSV::Reader.new(file, options.merge(metadata: metadata), &block)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
##
|
53
|
+
# Initializes the RDF::CSV Reader instance.
|
54
|
+
#
|
55
|
+
# @param [IO, File, String] input
|
56
|
+
# @param [Hash{Symbol => Object}] options
|
57
|
+
# any additional options (see `RDF::Reader#initialize`)
|
58
|
+
# @option options [Metadata, Hash] :metadata extracted when file opened
|
59
|
+
# @option options [Metadata, Hash] :user_metadata user supplied metadata, merged on top of extracted metadata
|
60
|
+
# @yield [reader] `self`
|
61
|
+
# @yieldparam [RDF::Reader] reader
|
62
|
+
# @yieldreturn [void] ignored
|
63
|
+
# @raise [RDF::ReaderError] if the CSV document cannot be loaded
|
64
|
+
def initialize(input = $stdin, options = {}, &block)
|
65
|
+
options[:base_uri] ||= options[:base]
|
66
|
+
super do
|
67
|
+
@options[:base] ||= base_uri.to_s if base_uri
|
68
|
+
# Construct metadata from that passed from file open, along with information from the file.
|
69
|
+
@metadata = Metadata.new(options[:metadata]).table_data(base_uri, input)
|
70
|
+
|
71
|
+
# Merge any user-supplied metadata
|
72
|
+
# SPEC CONFUSION: Note issue described in https://github.com/w3c/csvw/issues/76#issuecomment-65914880
|
73
|
+
@metadata.merge(Metadata.new(options[:user_metadata])) if options[:user_metadata]
|
74
|
+
@doc = input.respond_to?(:read) ? input : StringIO.new(input.to_s)
|
75
|
+
|
76
|
+
if block_given?
|
77
|
+
case block.arity
|
78
|
+
when 0 then instance_eval(&block)
|
79
|
+
else block.call(self)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
##
|
86
|
+
# @private
|
87
|
+
# @see RDF::Reader#each_statement
|
88
|
+
def each_statement(&block)
|
89
|
+
if block_given?
|
90
|
+
@callback = block
|
91
|
+
|
92
|
+
# Output Table-Level RDF triples
|
93
|
+
# SPEC FIXME: csvw:Table, not csv:Table
|
94
|
+
add_triple(0, RDF::URI(metadata.id), RDF.type, CSVW.Table) if metadata.type?
|
95
|
+
|
96
|
+
# Output other table-level metadata
|
97
|
+
# SPEC AMBIGUITY(2RDF):
|
98
|
+
# output all optional properties in DC space? (they're typically defined in CSVM space)
|
99
|
+
# output all namespaced properties?
|
100
|
+
# output all non-namespaced properties which aren't specifically defined in CSVM in DC space?
|
101
|
+
# We assume to only output namesspaced-properties
|
102
|
+
metadata.expanded_annotation_properties.each do |prop, values|
|
103
|
+
Array(value).each do |v|
|
104
|
+
# Assume prop and value(s) are in RDF form? or expand here?
|
105
|
+
add_triple(0, metadata.uri, RDF::URI(prop), v)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
# SPEC CONFUSION(2RDF):
|
110
|
+
# Where to output column-level, vs. cell-level metadata?
|
111
|
+
metadata.columns.each do |column|
|
112
|
+
# SPEC FIXME: Output csvw:Column, if set
|
113
|
+
add_triple(0, RDF::URI(column.uri), RDF.type, CSVW.Column) if column.type?
|
114
|
+
column.expanded_annotation_properties.each do |prop, values|
|
115
|
+
Array(value).each do |v|
|
116
|
+
# Assume prop and value(s) are in RDF form? or expand here?
|
117
|
+
add_triple(0, RDF::URI(column.uri), RDF::URI(prop), v)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Output Cell-Level RDF triples
|
123
|
+
metadata.rows.each do |row|
|
124
|
+
# Output row-level metadata
|
125
|
+
add_triple(row.rownum, RDF::URI(row.uri), CSVW.row, RDF::Literal::Integer(row.rownum))
|
126
|
+
add_triple(row.rownum, RDF::URI(row.uri), RDF.type, CSVW.Row) if row.type?
|
127
|
+
row.columns.each_with_index do |column|
|
128
|
+
add_triple("#{row.rownum}", RDF::URI(row.uri), RDF::URI(column.uri), column.rdf_value)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
enum_for(:each_statement)
|
133
|
+
end
|
134
|
+
|
135
|
+
##
|
136
|
+
# @private
|
137
|
+
# @see RDF::Reader#each_triple
|
138
|
+
def each_triple(&block)
|
139
|
+
if block_given?
|
140
|
+
each_statement do |statement|
|
141
|
+
block.call(*statement.to_triple)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
enum_for(:each_triple)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
metadata
ADDED
@@ -0,0 +1,171 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rdf-csv
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Gregg Kellogg
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-12-01 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rdf
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.1'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.1.7
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.1'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.1.7
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: yard
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0.8'
|
40
|
+
type: :development
|
41
|
+
prerelease: false
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - "~>"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0.8'
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: rspec
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - "~>"
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '3.0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - "~>"
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '3.0'
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
name: rspec-its
|
63
|
+
requirement: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - "~>"
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '1.0'
|
68
|
+
type: :development
|
69
|
+
prerelease: false
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - "~>"
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '1.0'
|
75
|
+
- !ruby/object:Gem::Dependency
|
76
|
+
name: rdf-spec
|
77
|
+
requirement: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - "~>"
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '1.1'
|
82
|
+
type: :development
|
83
|
+
prerelease: false
|
84
|
+
version_requirements: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - "~>"
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '1.1'
|
89
|
+
- !ruby/object:Gem::Dependency
|
90
|
+
name: rdf-turtle
|
91
|
+
requirement: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - "~>"
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '1.1'
|
96
|
+
type: :development
|
97
|
+
prerelease: false
|
98
|
+
version_requirements: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - "~>"
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '1.1'
|
103
|
+
- !ruby/object:Gem::Dependency
|
104
|
+
name: rdf-isomorphic
|
105
|
+
requirement: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - "~>"
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '1.1'
|
110
|
+
type: :development
|
111
|
+
prerelease: false
|
112
|
+
version_requirements: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - "~>"
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '1.1'
|
117
|
+
- !ruby/object:Gem::Dependency
|
118
|
+
name: rdf-xsd
|
119
|
+
requirement: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - "~>"
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '1.1'
|
124
|
+
type: :development
|
125
|
+
prerelease: false
|
126
|
+
version_requirements: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - "~>"
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '1.1'
|
131
|
+
description: RDF::CSV processes tabular data with metadata creating RDF or JSON output.
|
132
|
+
email: public-rdf-ruby@w3.org
|
133
|
+
executables: []
|
134
|
+
extensions: []
|
135
|
+
extra_rdoc_files: []
|
136
|
+
files:
|
137
|
+
- AUTHORS
|
138
|
+
- README.md
|
139
|
+
- UNLICENSE
|
140
|
+
- VERSION
|
141
|
+
- lib/rdf/csv.rb
|
142
|
+
- lib/rdf/csv/format.rb
|
143
|
+
- lib/rdf/csv/json.rb
|
144
|
+
- lib/rdf/csv/metadata.rb
|
145
|
+
- lib/rdf/csv/reader.rb
|
146
|
+
homepage: http://github.com/ruby-rdf/rdf-csv
|
147
|
+
licenses:
|
148
|
+
- Public Domain
|
149
|
+
metadata: {}
|
150
|
+
post_install_message:
|
151
|
+
rdoc_options: []
|
152
|
+
require_paths:
|
153
|
+
- lib
|
154
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
155
|
+
requirements:
|
156
|
+
- - ">="
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: 1.9.2
|
159
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
160
|
+
requirements:
|
161
|
+
- - ">="
|
162
|
+
- !ruby/object:Gem::Version
|
163
|
+
version: '0'
|
164
|
+
requirements: []
|
165
|
+
rubyforge_project:
|
166
|
+
rubygems_version: 2.4.3
|
167
|
+
signing_key:
|
168
|
+
specification_version: 4
|
169
|
+
summary: Tabular Data RDF Reader and JSON serializer.
|
170
|
+
test_files: []
|
171
|
+
has_rdoc: false
|