datapackage 0.0.4 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,208 +0,0 @@
1
- {
2
- "$schema": "http://json-schema.org/draft-04/schema#",
3
- "title": "DataPackage",
4
- "description": "JSON Schema for validating datapackage.json files",
5
- "type": "object",
6
- "properties": {
7
- "name": {
8
- "type": "string",
9
- "pattern": "^([a-z\\.\\_\\-])+$"
10
- },
11
- "licences": {
12
- "type": "array",
13
- "items": {
14
- "type": "object",
15
- "properties": {
16
- "id": { "type": "string" },
17
- "url": { "type": "string" }
18
- },
19
- "anyOf": [
20
- { "title": "id required", "required": ["id"] },
21
- { "title": "url required", "required": ["url"] }
22
- ]
23
- }
24
- },
25
- "datapackage_version": {
26
- "type": "string"
27
- },
28
- "title": {
29
- "type": "string"
30
- },
31
- "description": {
32
- "type": "string"
33
- },
34
- "homepage": {
35
- "type": "string"
36
- },
37
- "version": {
38
- "type": "string"
39
- },
40
- "sources": {
41
- "type": "array",
42
- "items": {
43
- "type": "object",
44
- "properties": {
45
- "name": { "type": "string" },
46
- "web": { "type": "string" },
47
- "email": { "type": "string" }
48
- },
49
- "anyOf": [
50
- { "title": "name required", "required": ["name"] },
51
- { "title": "web required", "required": ["web"] },
52
- { "title": "email required", "required": ["email"] }
53
- ]
54
- }
55
- },
56
- "keywords": {
57
- "type": "array",
58
- "items": {
59
- "type": "string"
60
- }
61
- },
62
- "last_modified": {
63
- "type": "string"
64
- },
65
- "image": {
66
- "type": "string"
67
- },
68
- "bugs": {
69
- "type": "string"
70
- },
71
- "maintainers": {
72
- "type": "array",
73
- "items": {
74
- "type": "object",
75
- "properties": {
76
- "name": {
77
- "type": "string"
78
- },
79
- "email": {
80
- "type": "string"
81
- },
82
- "web": {
83
- "type": "string"
84
- }
85
- },
86
- "required": ["name"]
87
- }
88
- },
89
- "contributors": {
90
- "type": "array",
91
- "items": {
92
- "type": "object",
93
- "properties": {
94
- "name": {
95
- "type": "string"
96
- },
97
- "email": {
98
- "type": "string"
99
- },
100
- "web": {
101
- "type": "string"
102
- }
103
- },
104
- "required": ["name"]
105
- }
106
- },
107
- "publisher": {
108
- "type": "array",
109
- "items": {
110
- "type": "object",
111
- "properties": {
112
- "name": {
113
- "type": "string"
114
- },
115
- "email": {
116
- "type": "string"
117
- },
118
- "web": {
119
- "type": "string"
120
- }
121
- },
122
- "required": ["name"]
123
- }
124
- },
125
- "dependencies": {
126
- "type": "object"
127
- },
128
- "resources": {
129
- "type": "array",
130
- "minItems": 1,
131
- "items": {
132
- "type": "object",
133
- "properties": {
134
- "url": {
135
- "type": "string"
136
- },
137
- "path": {
138
- "type": "string"
139
- },
140
- "name": {
141
- "type": "string"
142
- },
143
- "format": {
144
- "type": "string"
145
- },
146
- "mediatype": {
147
- "type": "string",
148
- "pattern": "^(.+)/(.+)$"
149
- },
150
- "encoding": {
151
- "type": "string"
152
- },
153
- "bytes": {
154
- "type": "integer"
155
- },
156
- "hash": {
157
- "type": "string",
158
- "pattern": "^([a-fA-F0-9]{32})$"
159
- },
160
- "modified": {
161
- "type": "string"
162
- },
163
- "schema": {
164
- "type": "object"
165
- },
166
- "dialect": {
167
- "type": "object"
168
- },
169
- "sources": {
170
- "type": "array",
171
- "items": {
172
- "type": "object",
173
- "properties": {
174
- "name": { "type": "string" },
175
- "web": { "type": "string" },
176
- "email": { "type": "string" }
177
- },
178
- "anyOf": [
179
- { "title": "name required", "required": ["name"] },
180
- { "title": "web required", "required": ["web"] },
181
- { "title": "email required", "required": ["email"] }
182
- ]
183
- }
184
- },
185
- "licences": {
186
- "type": "array",
187
- "items": {
188
- "type": "object",
189
- "properties": {
190
- "id": { "type": "string" },
191
- "url": { "type": "string" }
192
- },
193
- "anyOf": [
194
- { "title": "id required", "required": ["id"] },
195
- { "title": "url required", "required": ["url"] }
196
- ]
197
- }
198
- }
199
- },
200
- "anyOf": [
201
- { "title": "url required", "required": ["url"] },
202
- { "title": "path required", "required": ["path"] }
203
- ]
204
- }
205
- }
206
- },
207
- "required": ["name", "resources"]
208
- }
@@ -1,34 +0,0 @@
1
- {
2
- "$schema": "http://json-schema.org/draft-04/schema#",
3
- "title": "JSON Table Schema",
4
- "description": "JSON Schema for validating JSON Table structures",
5
- "type": "object",
6
- "properties": {
7
- "fields": {
8
- "type": "array",
9
- "minItems": 1,
10
- "items": {
11
- "type": "object",
12
- "properties": {
13
- "name": {
14
- "type": "string"
15
- },
16
- "title": {
17
- "type": "string"
18
- },
19
- "description": {
20
- "type": "string"
21
- },
22
- "type": {
23
- "enum": [ "string", "number", "integer", "date", "time", "datetime", "boolean", "binary", "object", "geopoint", "geojson", "array", "any" ]
24
- },
25
- "format": {
26
- "type": "string"
27
- }
28
- },
29
- "required": ["name"]
30
- }
31
- }
32
- },
33
- "required": ["fields"]
34
- }
@@ -1,229 +0,0 @@
1
- module DataPackage
2
-
3
- #Base class for validators
4
- class Validator
5
-
6
- attr_reader :messages
7
-
8
- def Validator.create(profile, opts={})
9
- if profile == :simpledataformat
10
- return SimpleDataFormatValidator.new(profile, opts)
11
- end
12
- if profile == :datapackage
13
- return DataPackageValidator.new(profile, opts)
14
- end
15
- return Validator.new(profile, opts)
16
- end
17
-
18
- def initialize(schema_name, opts={})
19
- @schema_name = schema_name
20
- @opts = opts
21
- end
22
-
23
- def valid?(package, strict=false)
24
- validate( package )
25
- return @messages[:errors].empty? if !strict
26
- return @messages[:errors].empty? && @messages[:warnings].empty?
27
- end
28
-
29
- def validate( package )
30
- @messages = {:errors=>[], :warnings=>[]}
31
- validate_with_schema( package )
32
- validate_integrity( package )
33
- return @messages
34
- end
35
-
36
- protected
37
-
38
- def validate_with_schema(package)
39
- schema = load_schema(@schema_name)
40
- messages = JSON::Validator.fully_validate(schema, package.metadata, :errors_as_objects => true)
41
- @messages[:errors] += messages.each {|msg| msg[:type] = :metadata }
42
- validate_metadata(package)
43
- end
44
-
45
- def validate_integrity(package )
46
- package.resources.each_with_index do |resource, idx|
47
- validate_resource( package, resource, "#/resources/#{idx}" )
48
- end
49
- end
50
-
51
- #implement to perform additional validation on metadata
52
- def validate_metadata( package )
53
- end
54
-
55
- #implement for per-resource validation
56
- def validate_resource( package, resource, path )
57
- end
58
-
59
- def load_schema(profile)
60
- if @opts[:schema] && @opts[:schema][profile]
61
- if !File.exists?( @opts[:schema][profile] )
62
- raise "User supplied schema file does not exist: #{@opts[:schema][profile]}"
63
- end
64
- return JSON.parse( File.read( @opts[:schema][profile] ) )
65
- end
66
- schema_file = file_in_etc_directory( "#{profile}-schema.json" )
67
- if !File.exists?( schema_file )
68
- raise "Unable to read schema file #{schema_file} for validation profile #{profile}"
69
- end
70
- return JSON.parse( File.read( schema_file ) )
71
- end
72
-
73
- def add_error(type, message, fragment=nil)
74
- @messages[:errors] << create_message(type, message, fragment)
75
- end
76
-
77
- def add_warning(type, message, fragment=nil)
78
- @messages[:warnings] << create_message(type, message, fragment)
79
- end
80
-
81
- def create_message(type, message, fragment=nil)
82
- { :message => message, :type => type, :fragment => fragment }
83
- end
84
-
85
- def rebase(base, fragment)
86
- return fragment.gsub("#/", base)
87
- end
88
-
89
- private
90
-
91
- def file_in_etc_directory(filename)
92
- File.join( File.dirname(__FILE__), "..", "..", "etc", filename )
93
- end
94
-
95
- end
96
-
97
- #Extends base class with some additional checks for DataPackage conformance.
98
- #
99
- #These include some warnings about missing metadata elements and an existence
100
- #check for all resources
101
- class DataPackageValidator < Validator
102
- def initialize(schema_name=:datapackage, opts={})
103
- super(:datapackage, opts)
104
- end
105
-
106
- def validate_metadata(package)
107
- #not required, but recommended
108
- prefix = "The package does not include a"
109
- add_warning( :metadata, "#{prefix} 'licenses' property", "#/") if package.licenses.empty?
110
- add_warning( :metadata, "#{prefix} 'datapackage_version' property", "#/") unless package.datapackage_version
111
- add_warning( :integrity, "#{prefix} README.md file" ) unless package.resource_exists?( package.resolve("README.md") )
112
- end
113
-
114
- def validate_resource( package, resource, path )
115
- if !package.resource_exists?( package.resolve_resource( resource ) )
116
- add_error( :integrity, "Missing resource #{resource["url"] || resource["path"]}", path)
117
- end
118
- end
119
-
120
- end
121
-
122
- #Validator that checks whether a package conforms to the Simple Data Format profile
123
- class SimpleDataFormatValidator < DataPackageValidator
124
-
125
- def initialize(schema_name=:datapackage, opts={})
126
- super(:datapackage, opts)
127
- @jsontable_schema = load_schema(:jsontable)
128
- @csvddf_schema = load_schema("csvddf-dialect")
129
- end
130
-
131
- def validate_resource(package, resource, path)
132
- super(package, resource, path)
133
-
134
- if !resource["mediatype"] && !resource["format"]
135
- add_warning(:metadata, "#{resource["name"]} does not specify either a mediatype or format", path )
136
- end
137
-
138
- if !csv?(resource, package)
139
- add_error(:integrity, "#{resource["name"]} is not a CSV file", path )
140
- else
141
- schema = resource["schema"]
142
- if !schema
143
- add_error(:metadata, "#{resource["name"]} does not have a schema", path )
144
- else
145
- messages = JSON::Validator.fully_validate(@jsontable_schema, schema, :errors_as_objects => true)
146
- @messages[:errors] += adjust_messages(messages, :metadata, path + "/schema/")
147
- end
148
-
149
- if resource["dialect"]
150
- messages = JSON::Validator.fully_validate(@csvddf_schema, resource["dialect"], :errors_as_objects => true)
151
- @messages[:errors] += adjust_messages(messages, :metadata, path + "/dialect")
152
- end
153
-
154
- if package.resource_exists?( package.resolve_resource( resource ) )
155
- if resource["schema"] && resource["schema"]["fields"]
156
- fields = resource["schema"]["fields"]
157
- declared_fields = fields.map{ |f| f["name"] }.compact
158
- if declared_fields == []
159
- add_error(:metadata, "Schema does not declare any named fields", path + "/schema/fields")
160
- end
161
- headers = headers(package, resource, path)
162
-
163
- #set algebra to finding fields missing from schema and/or CSV file
164
- missing_fields = declared_fields - headers
165
- if missing_fields != []
166
- add_error( :integrity,
167
- "Declared schema has fields not present in CSV file (#{missing_fields.join(",")})",
168
- path+"/schema/fields")
169
- end
170
- undeclared_fields = headers - declared_fields
171
- if undeclared_fields != []
172
- add_error( :integrity,
173
- "CSV file has fields missing from schema (#{undeclared_fields.join(",")})",
174
- path+"/schema/fields")
175
- end
176
- end
177
- end
178
- end
179
-
180
- end
181
-
182
- def csv?(resource, package)
183
- resource["mediatype"] == "text/csv" ||
184
- resource["format"] == "csv" ||
185
- package.resolve_resource(resource).split(".").last == "csv"
186
- end
187
-
188
- def headers(package, resource, path)
189
- headers = []
190
- #Using built-in CSV parser here as its more permissive than fastercsv
191
- #Lets us provide options to tweak the parsing
192
- opts = dialect_to_csv_options(resource["dialect"])
193
- begin
194
- CSV.open( package.resolve_resource(resource), "r", opts) do |csv|
195
- headers = csv.shift
196
- end
197
- rescue => e
198
- add_error( :integrity, "Parse error for #{package.resolve_resource(resource)}: #{e}", path)
199
- end
200
- return headers
201
- end
202
-
203
- def dialect_to_csv_options(dialect)
204
- return {} unless dialect
205
- #supplying defaults here just in case the dialect is invalid
206
- delimiter = dialect["delimiter"] || ","
207
- delimiter = delimiter + " " if !dialect["skipinitialspace"]
208
- return {
209
- :col_sep => delimiter,
210
- :row_sep => ( dialect["lineterminator"] || :auto ),
211
- :quote_char => ( dialect["quotechar"] || '"')
212
- }
213
- end
214
-
215
- private
216
-
217
- #adjust message structure returned by JSON::Validator to add out type and
218
- #adjust fragment references when we're using sub-schemas
219
- def adjust_messages(messages, type, path)
220
- messages.each do |msg|
221
- msg[:type]= type
222
- msg[:fragment] = rebase( path , msg[:fragment] )
223
- end
224
- messages
225
- end
226
-
227
- end
228
-
229
- end