datapackage 0.0.4 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,208 +0,0 @@
1
- {
2
- "$schema": "http://json-schema.org/draft-04/schema#",
3
- "title": "DataPackage",
4
- "description": "JSON Schema for validating datapackage.json files",
5
- "type": "object",
6
- "properties": {
7
- "name": {
8
- "type": "string",
9
- "pattern": "^([a-z\\.\\_\\-])+$"
10
- },
11
- "licences": {
12
- "type": "array",
13
- "items": {
14
- "type": "object",
15
- "properties": {
16
- "id": { "type": "string" },
17
- "url": { "type": "string" }
18
- },
19
- "anyOf": [
20
- { "title": "id required", "required": ["id"] },
21
- { "title": "url required", "required": ["url"] }
22
- ]
23
- }
24
- },
25
- "datapackage_version": {
26
- "type": "string"
27
- },
28
- "title": {
29
- "type": "string"
30
- },
31
- "description": {
32
- "type": "string"
33
- },
34
- "homepage": {
35
- "type": "string"
36
- },
37
- "version": {
38
- "type": "string"
39
- },
40
- "sources": {
41
- "type": "array",
42
- "items": {
43
- "type": "object",
44
- "properties": {
45
- "name": { "type": "string" },
46
- "web": { "type": "string" },
47
- "email": { "type": "string" }
48
- },
49
- "anyOf": [
50
- { "title": "name required", "required": ["name"] },
51
- { "title": "web required", "required": ["web"] },
52
- { "title": "email required", "required": ["email"] }
53
- ]
54
- }
55
- },
56
- "keywords": {
57
- "type": "array",
58
- "items": {
59
- "type": "string"
60
- }
61
- },
62
- "last_modified": {
63
- "type": "string"
64
- },
65
- "image": {
66
- "type": "string"
67
- },
68
- "bugs": {
69
- "type": "string"
70
- },
71
- "maintainers": {
72
- "type": "array",
73
- "items": {
74
- "type": "object",
75
- "properties": {
76
- "name": {
77
- "type": "string"
78
- },
79
- "email": {
80
- "type": "string"
81
- },
82
- "web": {
83
- "type": "string"
84
- }
85
- },
86
- "required": ["name"]
87
- }
88
- },
89
- "contributors": {
90
- "type": "array",
91
- "items": {
92
- "type": "object",
93
- "properties": {
94
- "name": {
95
- "type": "string"
96
- },
97
- "email": {
98
- "type": "string"
99
- },
100
- "web": {
101
- "type": "string"
102
- }
103
- },
104
- "required": ["name"]
105
- }
106
- },
107
- "publisher": {
108
- "type": "array",
109
- "items": {
110
- "type": "object",
111
- "properties": {
112
- "name": {
113
- "type": "string"
114
- },
115
- "email": {
116
- "type": "string"
117
- },
118
- "web": {
119
- "type": "string"
120
- }
121
- },
122
- "required": ["name"]
123
- }
124
- },
125
- "dependencies": {
126
- "type": "object"
127
- },
128
- "resources": {
129
- "type": "array",
130
- "minItems": 1,
131
- "items": {
132
- "type": "object",
133
- "properties": {
134
- "url": {
135
- "type": "string"
136
- },
137
- "path": {
138
- "type": "string"
139
- },
140
- "name": {
141
- "type": "string"
142
- },
143
- "format": {
144
- "type": "string"
145
- },
146
- "mediatype": {
147
- "type": "string",
148
- "pattern": "^(.+)/(.+)$"
149
- },
150
- "encoding": {
151
- "type": "string"
152
- },
153
- "bytes": {
154
- "type": "integer"
155
- },
156
- "hash": {
157
- "type": "string",
158
- "pattern": "^([a-fA-F0-9]{32})$"
159
- },
160
- "modified": {
161
- "type": "string"
162
- },
163
- "schema": {
164
- "type": "object"
165
- },
166
- "dialect": {
167
- "type": "object"
168
- },
169
- "sources": {
170
- "type": "array",
171
- "items": {
172
- "type": "object",
173
- "properties": {
174
- "name": { "type": "string" },
175
- "web": { "type": "string" },
176
- "email": { "type": "string" }
177
- },
178
- "anyOf": [
179
- { "title": "name required", "required": ["name"] },
180
- { "title": "web required", "required": ["web"] },
181
- { "title": "email required", "required": ["email"] }
182
- ]
183
- }
184
- },
185
- "licences": {
186
- "type": "array",
187
- "items": {
188
- "type": "object",
189
- "properties": {
190
- "id": { "type": "string" },
191
- "url": { "type": "string" }
192
- },
193
- "anyOf": [
194
- { "title": "id required", "required": ["id"] },
195
- { "title": "url required", "required": ["url"] }
196
- ]
197
- }
198
- }
199
- },
200
- "anyOf": [
201
- { "title": "url required", "required": ["url"] },
202
- { "title": "path required", "required": ["path"] }
203
- ]
204
- }
205
- }
206
- },
207
- "required": ["name", "resources"]
208
- }
@@ -1,34 +0,0 @@
1
- {
2
- "$schema": "http://json-schema.org/draft-04/schema#",
3
- "title": "JSON Table Schema",
4
- "description": "JSON Schema for validating JSON Table structures",
5
- "type": "object",
6
- "properties": {
7
- "fields": {
8
- "type": "array",
9
- "minItems": 1,
10
- "items": {
11
- "type": "object",
12
- "properties": {
13
- "name": {
14
- "type": "string"
15
- },
16
- "title": {
17
- "type": "string"
18
- },
19
- "description": {
20
- "type": "string"
21
- },
22
- "type": {
23
- "enum": [ "string", "number", "integer", "date", "time", "datetime", "boolean", "binary", "object", "geopoint", "geojson", "array", "any" ]
24
- },
25
- "format": {
26
- "type": "string"
27
- }
28
- },
29
- "required": ["name"]
30
- }
31
- }
32
- },
33
- "required": ["fields"]
34
- }
@@ -1,229 +0,0 @@
1
- module DataPackage
2
-
3
- #Base class for validators
4
- class Validator
5
-
6
- attr_reader :messages
7
-
8
- def Validator.create(profile, opts={})
9
- if profile == :simpledataformat
10
- return SimpleDataFormatValidator.new(profile, opts)
11
- end
12
- if profile == :datapackage
13
- return DataPackageValidator.new(profile, opts)
14
- end
15
- return Validator.new(profile, opts)
16
- end
17
-
18
- def initialize(schema_name, opts={})
19
- @schema_name = schema_name
20
- @opts = opts
21
- end
22
-
23
- def valid?(package, strict=false)
24
- validate( package )
25
- return @messages[:errors].empty? if !strict
26
- return @messages[:errors].empty? && @messages[:warnings].empty?
27
- end
28
-
29
- def validate( package )
30
- @messages = {:errors=>[], :warnings=>[]}
31
- validate_with_schema( package )
32
- validate_integrity( package )
33
- return @messages
34
- end
35
-
36
- protected
37
-
38
- def validate_with_schema(package)
39
- schema = load_schema(@schema_name)
40
- messages = JSON::Validator.fully_validate(schema, package.metadata, :errors_as_objects => true)
41
- @messages[:errors] += messages.each {|msg| msg[:type] = :metadata }
42
- validate_metadata(package)
43
- end
44
-
45
- def validate_integrity(package )
46
- package.resources.each_with_index do |resource, idx|
47
- validate_resource( package, resource, "#/resources/#{idx}" )
48
- end
49
- end
50
-
51
- #implement to perform additional validation on metadata
52
- def validate_metadata( package )
53
- end
54
-
55
- #implement for per-resource validation
56
- def validate_resource( package, resource, path )
57
- end
58
-
59
- def load_schema(profile)
60
- if @opts[:schema] && @opts[:schema][profile]
61
- if !File.exists?( @opts[:schema][profile] )
62
- raise "User supplied schema file does not exist: #{@opts[:schema][profile]}"
63
- end
64
- return JSON.parse( File.read( @opts[:schema][profile] ) )
65
- end
66
- schema_file = file_in_etc_directory( "#{profile}-schema.json" )
67
- if !File.exists?( schema_file )
68
- raise "Unable to read schema file #{schema_file} for validation profile #{profile}"
69
- end
70
- return JSON.parse( File.read( schema_file ) )
71
- end
72
-
73
- def add_error(type, message, fragment=nil)
74
- @messages[:errors] << create_message(type, message, fragment)
75
- end
76
-
77
- def add_warning(type, message, fragment=nil)
78
- @messages[:warnings] << create_message(type, message, fragment)
79
- end
80
-
81
- def create_message(type, message, fragment=nil)
82
- { :message => message, :type => type, :fragment => fragment }
83
- end
84
-
85
- def rebase(base, fragment)
86
- return fragment.gsub("#/", base)
87
- end
88
-
89
- private
90
-
91
- def file_in_etc_directory(filename)
92
- File.join( File.dirname(__FILE__), "..", "..", "etc", filename )
93
- end
94
-
95
- end
96
-
97
- #Extends base class with some additional checks for DataPackage conformance.
98
- #
99
- #These include some warnings about missing metadata elements and an existence
100
- #check for all resources
101
- class DataPackageValidator < Validator
102
- def initialize(schema_name=:datapackage, opts={})
103
- super(:datapackage, opts)
104
- end
105
-
106
- def validate_metadata(package)
107
- #not required, but recommended
108
- prefix = "The package does not include a"
109
- add_warning( :metadata, "#{prefix} 'licenses' property", "#/") if package.licenses.empty?
110
- add_warning( :metadata, "#{prefix} 'datapackage_version' property", "#/") unless package.datapackage_version
111
- add_warning( :integrity, "#{prefix} README.md file" ) unless package.resource_exists?( package.resolve("README.md") )
112
- end
113
-
114
- def validate_resource( package, resource, path )
115
- if !package.resource_exists?( package.resolve_resource( resource ) )
116
- add_error( :integrity, "Missing resource #{resource["url"] || resource["path"]}", path)
117
- end
118
- end
119
-
120
- end
121
-
122
- #Validator that checks whether a package conforms to the Simple Data Format profile
123
- class SimpleDataFormatValidator < DataPackageValidator
124
-
125
- def initialize(schema_name=:datapackage, opts={})
126
- super(:datapackage, opts)
127
- @jsontable_schema = load_schema(:jsontable)
128
- @csvddf_schema = load_schema("csvddf-dialect")
129
- end
130
-
131
- def validate_resource(package, resource, path)
132
- super(package, resource, path)
133
-
134
- if !resource["mediatype"] && !resource["format"]
135
- add_warning(:metadata, "#{resource["name"]} does not specify either a mediatype or format", path )
136
- end
137
-
138
- if !csv?(resource, package)
139
- add_error(:integrity, "#{resource["name"]} is not a CSV file", path )
140
- else
141
- schema = resource["schema"]
142
- if !schema
143
- add_error(:metadata, "#{resource["name"]} does not have a schema", path )
144
- else
145
- messages = JSON::Validator.fully_validate(@jsontable_schema, schema, :errors_as_objects => true)
146
- @messages[:errors] += adjust_messages(messages, :metadata, path + "/schema/")
147
- end
148
-
149
- if resource["dialect"]
150
- messages = JSON::Validator.fully_validate(@csvddf_schema, resource["dialect"], :errors_as_objects => true)
151
- @messages[:errors] += adjust_messages(messages, :metadata, path + "/dialect")
152
- end
153
-
154
- if package.resource_exists?( package.resolve_resource( resource ) )
155
- if resource["schema"] && resource["schema"]["fields"]
156
- fields = resource["schema"]["fields"]
157
- declared_fields = fields.map{ |f| f["name"] }.compact
158
- if declared_fields == []
159
- add_error(:metadata, "Schema does not declare any named fields", path + "/schema/fields")
160
- end
161
- headers = headers(package, resource, path)
162
-
163
- #set algebra to finding fields missing from schema and/or CSV file
164
- missing_fields = declared_fields - headers
165
- if missing_fields != []
166
- add_error( :integrity,
167
- "Declared schema has fields not present in CSV file (#{missing_fields.join(",")})",
168
- path+"/schema/fields")
169
- end
170
- undeclared_fields = headers - declared_fields
171
- if undeclared_fields != []
172
- add_error( :integrity,
173
- "CSV file has fields missing from schema (#{undeclared_fields.join(",")})",
174
- path+"/schema/fields")
175
- end
176
- end
177
- end
178
- end
179
-
180
- end
181
-
182
- def csv?(resource, package)
183
- resource["mediatype"] == "text/csv" ||
184
- resource["format"] == "csv" ||
185
- package.resolve_resource(resource).split(".").last == "csv"
186
- end
187
-
188
- def headers(package, resource, path)
189
- headers = []
190
- #Using built-in CSV parser here as its more permissive than fastercsv
191
- #Lets us provide options to tweak the parsing
192
- opts = dialect_to_csv_options(resource["dialect"])
193
- begin
194
- CSV.open( package.resolve_resource(resource), "r", opts) do |csv|
195
- headers = csv.shift
196
- end
197
- rescue => e
198
- add_error( :integrity, "Parse error for #{package.resolve_resource(resource)}: #{e}", path)
199
- end
200
- return headers
201
- end
202
-
203
- def dialect_to_csv_options(dialect)
204
- return {} unless dialect
205
- #supplying defaults here just in case the dialect is invalid
206
- delimiter = dialect["delimiter"] || ","
207
- delimiter = delimiter + " " if !dialect["skipinitialspace"]
208
- return {
209
- :col_sep => delimiter,
210
- :row_sep => ( dialect["lineterminator"] || :auto ),
211
- :quote_char => ( dialect["quotechar"] || '"')
212
- }
213
- end
214
-
215
- private
216
-
217
- #adjust message structure returned by JSON::Validator to add out type and
218
- #adjust fragment references when we're using sub-schemas
219
- def adjust_messages(messages, type, path)
220
- messages.each do |msg|
221
- msg[:type]= type
222
- msg[:fragment] = rebase( path , msg[:fragment] )
223
- end
224
- messages
225
- end
226
-
227
- end
228
-
229
- end