datapackage 0.0.4 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.md +4 -2
- data/README.md +82 -143
- data/lib/datapackage/exceptions.rb +12 -0
- data/lib/datapackage/package.rb +177 -156
- data/lib/datapackage/registry.rb +81 -0
- data/lib/datapackage/resource.rb +79 -0
- data/lib/datapackage/schema.rb +111 -0
- data/lib/datapackage/version.rb +1 -1
- data/lib/datapackage.rb +7 -2
- metadata +131 -31
- data/etc/README.md +0 -18
- data/etc/csvddf-dialect-schema.json +0 -24
- data/etc/datapackage-schema.json +0 -208
- data/etc/jsontable-schema.json +0 -34
- data/lib/datapackage/validator.rb +0 -229
data/etc/datapackage-schema.json
DELETED
@@ -1,208 +0,0 @@
|
|
1
|
-
{
|
2
|
-
"$schema": "http://json-schema.org/draft-04/schema#",
|
3
|
-
"title": "DataPackage",
|
4
|
-
"description": "JSON Schema for validating datapackage.json files",
|
5
|
-
"type": "object",
|
6
|
-
"properties": {
|
7
|
-
"name": {
|
8
|
-
"type": "string",
|
9
|
-
"pattern": "^([a-z\\.\\_\\-])+$"
|
10
|
-
},
|
11
|
-
"licences": {
|
12
|
-
"type": "array",
|
13
|
-
"items": {
|
14
|
-
"type": "object",
|
15
|
-
"properties": {
|
16
|
-
"id": { "type": "string" },
|
17
|
-
"url": { "type": "string" }
|
18
|
-
},
|
19
|
-
"anyOf": [
|
20
|
-
{ "title": "id required", "required": ["id"] },
|
21
|
-
{ "title": "url required", "required": ["url"] }
|
22
|
-
]
|
23
|
-
}
|
24
|
-
},
|
25
|
-
"datapackage_version": {
|
26
|
-
"type": "string"
|
27
|
-
},
|
28
|
-
"title": {
|
29
|
-
"type": "string"
|
30
|
-
},
|
31
|
-
"description": {
|
32
|
-
"type": "string"
|
33
|
-
},
|
34
|
-
"homepage": {
|
35
|
-
"type": "string"
|
36
|
-
},
|
37
|
-
"version": {
|
38
|
-
"type": "string"
|
39
|
-
},
|
40
|
-
"sources": {
|
41
|
-
"type": "array",
|
42
|
-
"items": {
|
43
|
-
"type": "object",
|
44
|
-
"properties": {
|
45
|
-
"name": { "type": "string" },
|
46
|
-
"web": { "type": "string" },
|
47
|
-
"email": { "type": "string" }
|
48
|
-
},
|
49
|
-
"anyOf": [
|
50
|
-
{ "title": "name required", "required": ["name"] },
|
51
|
-
{ "title": "web required", "required": ["web"] },
|
52
|
-
{ "title": "email required", "required": ["email"] }
|
53
|
-
]
|
54
|
-
}
|
55
|
-
},
|
56
|
-
"keywords": {
|
57
|
-
"type": "array",
|
58
|
-
"items": {
|
59
|
-
"type": "string"
|
60
|
-
}
|
61
|
-
},
|
62
|
-
"last_modified": {
|
63
|
-
"type": "string"
|
64
|
-
},
|
65
|
-
"image": {
|
66
|
-
"type": "string"
|
67
|
-
},
|
68
|
-
"bugs": {
|
69
|
-
"type": "string"
|
70
|
-
},
|
71
|
-
"maintainers": {
|
72
|
-
"type": "array",
|
73
|
-
"items": {
|
74
|
-
"type": "object",
|
75
|
-
"properties": {
|
76
|
-
"name": {
|
77
|
-
"type": "string"
|
78
|
-
},
|
79
|
-
"email": {
|
80
|
-
"type": "string"
|
81
|
-
},
|
82
|
-
"web": {
|
83
|
-
"type": "string"
|
84
|
-
}
|
85
|
-
},
|
86
|
-
"required": ["name"]
|
87
|
-
}
|
88
|
-
},
|
89
|
-
"contributors": {
|
90
|
-
"type": "array",
|
91
|
-
"items": {
|
92
|
-
"type": "object",
|
93
|
-
"properties": {
|
94
|
-
"name": {
|
95
|
-
"type": "string"
|
96
|
-
},
|
97
|
-
"email": {
|
98
|
-
"type": "string"
|
99
|
-
},
|
100
|
-
"web": {
|
101
|
-
"type": "string"
|
102
|
-
}
|
103
|
-
},
|
104
|
-
"required": ["name"]
|
105
|
-
}
|
106
|
-
},
|
107
|
-
"publisher": {
|
108
|
-
"type": "array",
|
109
|
-
"items": {
|
110
|
-
"type": "object",
|
111
|
-
"properties": {
|
112
|
-
"name": {
|
113
|
-
"type": "string"
|
114
|
-
},
|
115
|
-
"email": {
|
116
|
-
"type": "string"
|
117
|
-
},
|
118
|
-
"web": {
|
119
|
-
"type": "string"
|
120
|
-
}
|
121
|
-
},
|
122
|
-
"required": ["name"]
|
123
|
-
}
|
124
|
-
},
|
125
|
-
"dependencies": {
|
126
|
-
"type": "object"
|
127
|
-
},
|
128
|
-
"resources": {
|
129
|
-
"type": "array",
|
130
|
-
"minItems": 1,
|
131
|
-
"items": {
|
132
|
-
"type": "object",
|
133
|
-
"properties": {
|
134
|
-
"url": {
|
135
|
-
"type": "string"
|
136
|
-
},
|
137
|
-
"path": {
|
138
|
-
"type": "string"
|
139
|
-
},
|
140
|
-
"name": {
|
141
|
-
"type": "string"
|
142
|
-
},
|
143
|
-
"format": {
|
144
|
-
"type": "string"
|
145
|
-
},
|
146
|
-
"mediatype": {
|
147
|
-
"type": "string",
|
148
|
-
"pattern": "^(.+)/(.+)$"
|
149
|
-
},
|
150
|
-
"encoding": {
|
151
|
-
"type": "string"
|
152
|
-
},
|
153
|
-
"bytes": {
|
154
|
-
"type": "integer"
|
155
|
-
},
|
156
|
-
"hash": {
|
157
|
-
"type": "string",
|
158
|
-
"pattern": "^([a-fA-F0-9]{32})$"
|
159
|
-
},
|
160
|
-
"modified": {
|
161
|
-
"type": "string"
|
162
|
-
},
|
163
|
-
"schema": {
|
164
|
-
"type": "object"
|
165
|
-
},
|
166
|
-
"dialect": {
|
167
|
-
"type": "object"
|
168
|
-
},
|
169
|
-
"sources": {
|
170
|
-
"type": "array",
|
171
|
-
"items": {
|
172
|
-
"type": "object",
|
173
|
-
"properties": {
|
174
|
-
"name": { "type": "string" },
|
175
|
-
"web": { "type": "string" },
|
176
|
-
"email": { "type": "string" }
|
177
|
-
},
|
178
|
-
"anyOf": [
|
179
|
-
{ "title": "name required", "required": ["name"] },
|
180
|
-
{ "title": "web required", "required": ["web"] },
|
181
|
-
{ "title": "email required", "required": ["email"] }
|
182
|
-
]
|
183
|
-
}
|
184
|
-
},
|
185
|
-
"licences": {
|
186
|
-
"type": "array",
|
187
|
-
"items": {
|
188
|
-
"type": "object",
|
189
|
-
"properties": {
|
190
|
-
"id": { "type": "string" },
|
191
|
-
"url": { "type": "string" }
|
192
|
-
},
|
193
|
-
"anyOf": [
|
194
|
-
{ "title": "id required", "required": ["id"] },
|
195
|
-
{ "title": "url required", "required": ["url"] }
|
196
|
-
]
|
197
|
-
}
|
198
|
-
}
|
199
|
-
},
|
200
|
-
"anyOf": [
|
201
|
-
{ "title": "url required", "required": ["url"] },
|
202
|
-
{ "title": "path required", "required": ["path"] }
|
203
|
-
]
|
204
|
-
}
|
205
|
-
}
|
206
|
-
},
|
207
|
-
"required": ["name", "resources"]
|
208
|
-
}
|
data/etc/jsontable-schema.json
DELETED
@@ -1,34 +0,0 @@
|
|
1
|
-
{
|
2
|
-
"$schema": "http://json-schema.org/draft-04/schema#",
|
3
|
-
"title": "JSON Table Schema",
|
4
|
-
"description": "JSON Schema for validating JSON Table structures",
|
5
|
-
"type": "object",
|
6
|
-
"properties": {
|
7
|
-
"fields": {
|
8
|
-
"type": "array",
|
9
|
-
"minItems": 1,
|
10
|
-
"items": {
|
11
|
-
"type": "object",
|
12
|
-
"properties": {
|
13
|
-
"name": {
|
14
|
-
"type": "string"
|
15
|
-
},
|
16
|
-
"title": {
|
17
|
-
"type": "string"
|
18
|
-
},
|
19
|
-
"description": {
|
20
|
-
"type": "string"
|
21
|
-
},
|
22
|
-
"type": {
|
23
|
-
"enum": [ "string", "number", "integer", "date", "time", "datetime", "boolean", "binary", "object", "geopoint", "geojson", "array", "any" ]
|
24
|
-
},
|
25
|
-
"format": {
|
26
|
-
"type": "string"
|
27
|
-
}
|
28
|
-
},
|
29
|
-
"required": ["name"]
|
30
|
-
}
|
31
|
-
}
|
32
|
-
},
|
33
|
-
"required": ["fields"]
|
34
|
-
}
|
@@ -1,229 +0,0 @@
|
|
1
|
-
module DataPackage
|
2
|
-
|
3
|
-
#Base class for validators
|
4
|
-
class Validator
|
5
|
-
|
6
|
-
attr_reader :messages
|
7
|
-
|
8
|
-
def Validator.create(profile, opts={})
|
9
|
-
if profile == :simpledataformat
|
10
|
-
return SimpleDataFormatValidator.new(profile, opts)
|
11
|
-
end
|
12
|
-
if profile == :datapackage
|
13
|
-
return DataPackageValidator.new(profile, opts)
|
14
|
-
end
|
15
|
-
return Validator.new(profile, opts)
|
16
|
-
end
|
17
|
-
|
18
|
-
def initialize(schema_name, opts={})
|
19
|
-
@schema_name = schema_name
|
20
|
-
@opts = opts
|
21
|
-
end
|
22
|
-
|
23
|
-
def valid?(package, strict=false)
|
24
|
-
validate( package )
|
25
|
-
return @messages[:errors].empty? if !strict
|
26
|
-
return @messages[:errors].empty? && @messages[:warnings].empty?
|
27
|
-
end
|
28
|
-
|
29
|
-
def validate( package )
|
30
|
-
@messages = {:errors=>[], :warnings=>[]}
|
31
|
-
validate_with_schema( package )
|
32
|
-
validate_integrity( package )
|
33
|
-
return @messages
|
34
|
-
end
|
35
|
-
|
36
|
-
protected
|
37
|
-
|
38
|
-
def validate_with_schema(package)
|
39
|
-
schema = load_schema(@schema_name)
|
40
|
-
messages = JSON::Validator.fully_validate(schema, package.metadata, :errors_as_objects => true)
|
41
|
-
@messages[:errors] += messages.each {|msg| msg[:type] = :metadata }
|
42
|
-
validate_metadata(package)
|
43
|
-
end
|
44
|
-
|
45
|
-
def validate_integrity(package )
|
46
|
-
package.resources.each_with_index do |resource, idx|
|
47
|
-
validate_resource( package, resource, "#/resources/#{idx}" )
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
#implement to perform additional validation on metadata
|
52
|
-
def validate_metadata( package )
|
53
|
-
end
|
54
|
-
|
55
|
-
#implement for per-resource validation
|
56
|
-
def validate_resource( package, resource, path )
|
57
|
-
end
|
58
|
-
|
59
|
-
def load_schema(profile)
|
60
|
-
if @opts[:schema] && @opts[:schema][profile]
|
61
|
-
if !File.exists?( @opts[:schema][profile] )
|
62
|
-
raise "User supplied schema file does not exist: #{@opts[:schema][profile]}"
|
63
|
-
end
|
64
|
-
return JSON.parse( File.read( @opts[:schema][profile] ) )
|
65
|
-
end
|
66
|
-
schema_file = file_in_etc_directory( "#{profile}-schema.json" )
|
67
|
-
if !File.exists?( schema_file )
|
68
|
-
raise "Unable to read schema file #{schema_file} for validation profile #{profile}"
|
69
|
-
end
|
70
|
-
return JSON.parse( File.read( schema_file ) )
|
71
|
-
end
|
72
|
-
|
73
|
-
def add_error(type, message, fragment=nil)
|
74
|
-
@messages[:errors] << create_message(type, message, fragment)
|
75
|
-
end
|
76
|
-
|
77
|
-
def add_warning(type, message, fragment=nil)
|
78
|
-
@messages[:warnings] << create_message(type, message, fragment)
|
79
|
-
end
|
80
|
-
|
81
|
-
def create_message(type, message, fragment=nil)
|
82
|
-
{ :message => message, :type => type, :fragment => fragment }
|
83
|
-
end
|
84
|
-
|
85
|
-
def rebase(base, fragment)
|
86
|
-
return fragment.gsub("#/", base)
|
87
|
-
end
|
88
|
-
|
89
|
-
private
|
90
|
-
|
91
|
-
def file_in_etc_directory(filename)
|
92
|
-
File.join( File.dirname(__FILE__), "..", "..", "etc", filename )
|
93
|
-
end
|
94
|
-
|
95
|
-
end
|
96
|
-
|
97
|
-
#Extends base class with some additional checks for DataPackage conformance.
|
98
|
-
#
|
99
|
-
#These include some warnings about missing metadata elements and an existence
|
100
|
-
#check for all resources
|
101
|
-
class DataPackageValidator < Validator
|
102
|
-
def initialize(schema_name=:datapackage, opts={})
|
103
|
-
super(:datapackage, opts)
|
104
|
-
end
|
105
|
-
|
106
|
-
def validate_metadata(package)
|
107
|
-
#not required, but recommended
|
108
|
-
prefix = "The package does not include a"
|
109
|
-
add_warning( :metadata, "#{prefix} 'licenses' property", "#/") if package.licenses.empty?
|
110
|
-
add_warning( :metadata, "#{prefix} 'datapackage_version' property", "#/") unless package.datapackage_version
|
111
|
-
add_warning( :integrity, "#{prefix} README.md file" ) unless package.resource_exists?( package.resolve("README.md") )
|
112
|
-
end
|
113
|
-
|
114
|
-
def validate_resource( package, resource, path )
|
115
|
-
if !package.resource_exists?( package.resolve_resource( resource ) )
|
116
|
-
add_error( :integrity, "Missing resource #{resource["url"] || resource["path"]}", path)
|
117
|
-
end
|
118
|
-
end
|
119
|
-
|
120
|
-
end
|
121
|
-
|
122
|
-
#Validator that checks whether a package conforms to the Simple Data Format profile
|
123
|
-
class SimpleDataFormatValidator < DataPackageValidator
|
124
|
-
|
125
|
-
def initialize(schema_name=:datapackage, opts={})
|
126
|
-
super(:datapackage, opts)
|
127
|
-
@jsontable_schema = load_schema(:jsontable)
|
128
|
-
@csvddf_schema = load_schema("csvddf-dialect")
|
129
|
-
end
|
130
|
-
|
131
|
-
def validate_resource(package, resource, path)
|
132
|
-
super(package, resource, path)
|
133
|
-
|
134
|
-
if !resource["mediatype"] && !resource["format"]
|
135
|
-
add_warning(:metadata, "#{resource["name"]} does not specify either a mediatype or format", path )
|
136
|
-
end
|
137
|
-
|
138
|
-
if !csv?(resource, package)
|
139
|
-
add_error(:integrity, "#{resource["name"]} is not a CSV file", path )
|
140
|
-
else
|
141
|
-
schema = resource["schema"]
|
142
|
-
if !schema
|
143
|
-
add_error(:metadata, "#{resource["name"]} does not have a schema", path )
|
144
|
-
else
|
145
|
-
messages = JSON::Validator.fully_validate(@jsontable_schema, schema, :errors_as_objects => true)
|
146
|
-
@messages[:errors] += adjust_messages(messages, :metadata, path + "/schema/")
|
147
|
-
end
|
148
|
-
|
149
|
-
if resource["dialect"]
|
150
|
-
messages = JSON::Validator.fully_validate(@csvddf_schema, resource["dialect"], :errors_as_objects => true)
|
151
|
-
@messages[:errors] += adjust_messages(messages, :metadata, path + "/dialect")
|
152
|
-
end
|
153
|
-
|
154
|
-
if package.resource_exists?( package.resolve_resource( resource ) )
|
155
|
-
if resource["schema"] && resource["schema"]["fields"]
|
156
|
-
fields = resource["schema"]["fields"]
|
157
|
-
declared_fields = fields.map{ |f| f["name"] }.compact
|
158
|
-
if declared_fields == []
|
159
|
-
add_error(:metadata, "Schema does not declare any named fields", path + "/schema/fields")
|
160
|
-
end
|
161
|
-
headers = headers(package, resource, path)
|
162
|
-
|
163
|
-
#set algebra to finding fields missing from schema and/or CSV file
|
164
|
-
missing_fields = declared_fields - headers
|
165
|
-
if missing_fields != []
|
166
|
-
add_error( :integrity,
|
167
|
-
"Declared schema has fields not present in CSV file (#{missing_fields.join(",")})",
|
168
|
-
path+"/schema/fields")
|
169
|
-
end
|
170
|
-
undeclared_fields = headers - declared_fields
|
171
|
-
if undeclared_fields != []
|
172
|
-
add_error( :integrity,
|
173
|
-
"CSV file has fields missing from schema (#{undeclared_fields.join(",")})",
|
174
|
-
path+"/schema/fields")
|
175
|
-
end
|
176
|
-
end
|
177
|
-
end
|
178
|
-
end
|
179
|
-
|
180
|
-
end
|
181
|
-
|
182
|
-
def csv?(resource, package)
|
183
|
-
resource["mediatype"] == "text/csv" ||
|
184
|
-
resource["format"] == "csv" ||
|
185
|
-
package.resolve_resource(resource).split(".").last == "csv"
|
186
|
-
end
|
187
|
-
|
188
|
-
def headers(package, resource, path)
|
189
|
-
headers = []
|
190
|
-
#Using built-in CSV parser here as its more permissive than fastercsv
|
191
|
-
#Lets us provide options to tweak the parsing
|
192
|
-
opts = dialect_to_csv_options(resource["dialect"])
|
193
|
-
begin
|
194
|
-
CSV.open( package.resolve_resource(resource), "r", opts) do |csv|
|
195
|
-
headers = csv.shift
|
196
|
-
end
|
197
|
-
rescue => e
|
198
|
-
add_error( :integrity, "Parse error for #{package.resolve_resource(resource)}: #{e}", path)
|
199
|
-
end
|
200
|
-
return headers
|
201
|
-
end
|
202
|
-
|
203
|
-
def dialect_to_csv_options(dialect)
|
204
|
-
return {} unless dialect
|
205
|
-
#supplying defaults here just in case the dialect is invalid
|
206
|
-
delimiter = dialect["delimiter"] || ","
|
207
|
-
delimiter = delimiter + " " if !dialect["skipinitialspace"]
|
208
|
-
return {
|
209
|
-
:col_sep => delimiter,
|
210
|
-
:row_sep => ( dialect["lineterminator"] || :auto ),
|
211
|
-
:quote_char => ( dialect["quotechar"] || '"')
|
212
|
-
}
|
213
|
-
end
|
214
|
-
|
215
|
-
private
|
216
|
-
|
217
|
-
#adjust message structure returned by JSON::Validator to add out type and
|
218
|
-
#adjust fragment references when we're using sub-schemas
|
219
|
-
def adjust_messages(messages, type, path)
|
220
|
-
messages.each do |msg|
|
221
|
-
msg[:type]= type
|
222
|
-
msg[:fragment] = rebase( path , msg[:fragment] )
|
223
|
-
end
|
224
|
-
messages
|
225
|
-
end
|
226
|
-
|
227
|
-
end
|
228
|
-
|
229
|
-
end
|