datapackage 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +11 -4
- data/bin/datapackage +13 -11
- data/lib/datapackage/validator.rb +108 -63
- data/lib/datapackage/version.rb +1 -1
- metadata +1 -1
data/README.md
CHANGED
@@ -15,7 +15,7 @@ The library is intending to support:
|
|
15
15
|
|
16
16
|
Add the gem into your Gemfile:
|
17
17
|
|
18
|
-
|
18
|
+
gem 'datapackage.rb', :git => "git://github.com/theodi/datapackage.rb.git"
|
19
19
|
|
20
20
|
Or:
|
21
21
|
|
@@ -65,15 +65,22 @@ To expose more detail on errors and warnings:
|
|
65
65
|
|
66
66
|
messages = package.validate() # or package.validate(:datapackage)
|
67
67
|
|
68
|
-
This returns an object with two keys: `:errors` and `:warnings`.
|
68
|
+
This returns an object with two keys: `:errors` and `:warnings`. The values of these keys are arrays of message object.
|
69
|
+
Message objects are formatted as follows:
|
69
70
|
|
70
|
-
|
71
|
-
|
71
|
+
{
|
72
|
+
:type => :metadata|:integrity,
|
73
|
+
:message => "message for user",
|
74
|
+
:fragment => "/path/to/responsible/element"
|
75
|
+
}
|
72
76
|
|
73
77
|
It is possible to treat all warnings as errors by performing strict validation:
|
74
78
|
|
75
79
|
package.valid?(true)
|
76
80
|
|
81
|
+
Examples of warnings might include notes on missing metadata elements (e.g. package `licenses`) which are not required by the
|
82
|
+
DataPackage specification but which SHOULD be included.
|
83
|
+
|
77
84
|
Warnings are currently generated for:
|
78
85
|
|
79
86
|
* Missing `README.md` files from packages
|
data/bin/datapackage
CHANGED
@@ -12,23 +12,25 @@ end
|
|
12
12
|
|
13
13
|
package = DataPackage::Package.new(ARGV[0])
|
14
14
|
|
15
|
-
messages = package.validate
|
15
|
+
messages = package.validate(:simpledataformat)
|
16
16
|
|
17
17
|
if messages[:errors].length == 0
|
18
|
-
puts "Package #{package.name} is
|
18
|
+
puts "Package #{package.name} is VALID"
|
19
19
|
else
|
20
|
-
puts "Package #{package.name} is
|
21
|
-
puts "
|
22
|
-
messages[:errors].each_with_index do |
|
23
|
-
|
24
|
-
msg
|
25
|
-
|
26
|
-
end
|
20
|
+
puts "Package #{package.name} is INVALID"
|
21
|
+
puts "Errors: "
|
22
|
+
messages[:errors].each_with_index do |msg, idx|
|
23
|
+
puts "#{idx+1}. #{msg[:type]} error. #{msg[:fragment]}"
|
24
|
+
puts " #{msg[:message]}"
|
25
|
+
end
|
27
26
|
end
|
28
27
|
|
29
28
|
if !messages[:warnings].empty?
|
30
29
|
puts "Warnings: "
|
31
|
-
messages[:warnings].each_with_index do |
|
32
|
-
puts "#{idx+1}. #{warning}"
|
30
|
+
messages[:warnings].each_with_index do |msg, idx|
|
31
|
+
puts "#{idx+1}. #{msg[:type]} warning. #{msg[:fragment]}"
|
32
|
+
puts " #{msg[:message]}"
|
33
33
|
end
|
34
34
|
end
|
35
|
+
|
36
|
+
exit( messages[:errors].length )
|
@@ -3,6 +3,8 @@ module DataPackage
|
|
3
3
|
#Base class for validators
|
4
4
|
class Validator
|
5
5
|
|
6
|
+
attr_reader :messages
|
7
|
+
|
6
8
|
def Validator.create(profile, opts={})
|
7
9
|
if profile == :simpledataformat
|
8
10
|
return SimpleDataFormatValidator.new(profile, opts)
|
@@ -19,45 +21,41 @@ module DataPackage
|
|
19
21
|
end
|
20
22
|
|
21
23
|
def valid?(package, strict=false)
|
22
|
-
|
23
|
-
return messages[:errors].empty? if !strict
|
24
|
-
return messages[:errors].empty? && messages[:warnings].empty?
|
24
|
+
validate( package )
|
25
|
+
return @messages[:errors].empty? if !strict
|
26
|
+
return @messages[:errors].empty? && @messages[:warnings].empty?
|
25
27
|
end
|
26
28
|
|
27
29
|
def validate( package )
|
28
|
-
|
30
|
+
@messages = {:errors=>[], :warnings=>[]}
|
31
|
+
validate_with_schema( package )
|
32
|
+
validate_integrity( package )
|
33
|
+
return @messages
|
29
34
|
end
|
30
35
|
|
36
|
+
protected
|
37
|
+
|
31
38
|
def validate_with_schema(package)
|
32
39
|
schema = load_schema(@schema_name)
|
33
|
-
messages =
|
34
|
-
|
35
|
-
|
36
|
-
}
|
37
|
-
validate_metadata(package, messages)
|
38
|
-
return messages
|
40
|
+
messages = JSON::Validator.fully_validate(schema, package.metadata, :errors_as_objects => true)
|
41
|
+
@messages[:errors] += messages.each {|msg| msg[:type] = :metadata }
|
42
|
+
validate_metadata(package)
|
39
43
|
end
|
40
44
|
|
41
|
-
def validate_integrity(package
|
42
|
-
package.resources.
|
43
|
-
validate_resource(package, resource,
|
44
|
-
end
|
45
|
-
|
46
|
-
messages
|
45
|
+
def validate_integrity(package )
|
46
|
+
package.resources.each_with_index do |resource, idx|
|
47
|
+
validate_resource( package, resource, "#/resources/#{idx}" )
|
48
|
+
end
|
47
49
|
end
|
48
|
-
|
49
|
-
protected
|
50
|
-
|
50
|
+
|
51
51
|
#implement to perform additional validation on metadata
|
52
|
-
def validate_metadata(package
|
52
|
+
def validate_metadata( package )
|
53
53
|
end
|
54
54
|
|
55
55
|
#implement for per-resource validation
|
56
|
-
def validate_resource(package, resource,
|
56
|
+
def validate_resource( package, resource, path )
|
57
57
|
end
|
58
|
-
|
59
|
-
protected
|
60
|
-
|
58
|
+
|
61
59
|
def load_schema(profile)
|
62
60
|
if @opts[:schema] && @opts[:schema][profile]
|
63
61
|
if !File.exists?( @opts[:schema][profile] )
|
@@ -71,6 +69,22 @@ module DataPackage
|
|
71
69
|
end
|
72
70
|
return JSON.parse( File.read( schema_file ) )
|
73
71
|
end
|
72
|
+
|
73
|
+
def add_error(type, message, fragment=nil)
|
74
|
+
@messages[:errors] << create_message(type, message, fragment)
|
75
|
+
end
|
76
|
+
|
77
|
+
def add_warning(type, message, fragment=nil)
|
78
|
+
@messages[:warnings] << create_message(type, message, fragment)
|
79
|
+
end
|
80
|
+
|
81
|
+
def create_message(type, message, fragment=nil)
|
82
|
+
{ :message => message, :type => type, :fragment => fragment }
|
83
|
+
end
|
84
|
+
|
85
|
+
def rebase(base, fragment)
|
86
|
+
return fragment.gsub("#/", base)
|
87
|
+
end
|
74
88
|
|
75
89
|
private
|
76
90
|
|
@@ -89,17 +103,17 @@ module DataPackage
|
|
89
103
|
super(:datapackage, opts)
|
90
104
|
end
|
91
105
|
|
92
|
-
def validate_metadata(package
|
106
|
+
def validate_metadata(package)
|
93
107
|
#not required, but recommended
|
94
108
|
prefix = "The package does not include a"
|
95
|
-
|
96
|
-
|
97
|
-
|
109
|
+
add_warning( :metadata, "#{prefix} 'licenses' property", "#/") if package.licenses.empty?
|
110
|
+
add_warning( :metadata, "#{prefix} 'datapackage_version' property", "#/") unless package.datapackage_version
|
111
|
+
add_warning( :integrity, "#{prefix} README.md file" ) unless package.resource_exists?( package.resolve("README.md") )
|
98
112
|
end
|
99
113
|
|
100
|
-
def validate_resource(package, resource,
|
114
|
+
def validate_resource( package, resource, path )
|
101
115
|
if !package.resource_exists?( package.resolve_resource( resource ) )
|
102
|
-
|
116
|
+
add_error( :integrity, "Missing resource #{resource["url"] || resource["path"]}", path)
|
103
117
|
end
|
104
118
|
end
|
105
119
|
|
@@ -114,63 +128,94 @@ module DataPackage
|
|
114
128
|
@csvddf_schema = load_schema("csvddf-dialect")
|
115
129
|
end
|
116
130
|
|
117
|
-
def validate_resource(package, resource,
|
118
|
-
super(package, resource,
|
131
|
+
def validate_resource(package, resource, path)
|
132
|
+
super(package, resource, path)
|
119
133
|
|
120
134
|
if !csv?(resource)
|
121
|
-
|
122
|
-
else
|
123
|
-
|
124
|
-
|
135
|
+
add_error(:integrity, "#{resource["name"]} is not a CSV file", path )
|
136
|
+
else
|
137
|
+
schema = resource["schema"]
|
138
|
+
if !schema
|
139
|
+
add_error(:metadata, "#{resource["name"]} does not have a schema", path )
|
125
140
|
else
|
126
|
-
messages
|
127
|
-
|
128
|
-
|
129
|
-
|
141
|
+
messages = JSON::Validator.fully_validate(@jsontable_schema, schema, :errors_as_objects => true)
|
142
|
+
@messages[:errors] += adjust_messages(messages, :metadata, path + "/schema/")
|
143
|
+
end
|
144
|
+
|
130
145
|
if resource["dialect"]
|
131
|
-
messages[
|
132
|
-
|
133
|
-
resource["dialect"], :errors_as_objects => true)
|
146
|
+
messages = JSON::Validator.fully_validate(@csvddf_schema, resource["dialect"], :errors_as_objects => true)
|
147
|
+
@messages[:errors] += adjust_messages(messages, :metadata, path + "/dialect")
|
134
148
|
end
|
135
149
|
|
136
|
-
if resource
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
150
|
+
if package.resource_exists?( package.resolve_resource( resource ) )
|
151
|
+
if resource["schema"] && resource["schema"]["fields"]
|
152
|
+
fields = resource["schema"]["fields"]
|
153
|
+
declared_fields = fields.map{ |f| f["name"] }
|
154
|
+
headers = headers(package, resource, path)
|
155
|
+
|
156
|
+
#set algebra to finding fields missing from schema and/or CSV file
|
157
|
+
missing_fields = declared_fields - headers
|
158
|
+
if missing_fields != []
|
159
|
+
add_error( :integrity,
|
160
|
+
"Declared schema has fields not present in CSV file (#{missing_fields.join(",")})",
|
161
|
+
path+"/schema/fields")
|
162
|
+
end
|
163
|
+
undeclared_fields = headers - declared_fields
|
164
|
+
if undeclared_fields != []
|
165
|
+
add_error( :integrity,
|
166
|
+
"CSV file has fields missing from schema (#{undeclared_fields.join(",")})",
|
167
|
+
path+"/schema/fields")
|
168
|
+
end
|
146
169
|
end
|
147
|
-
undeclared_fields = headers - declared_fields
|
148
|
-
if undeclared_fields != []
|
149
|
-
messages[:errors] << "CSV file has fields missing from schema (#{undeclared_fields.join(",")})"
|
150
|
-
end
|
151
170
|
end
|
152
|
-
|
153
171
|
end
|
154
172
|
|
155
173
|
end
|
156
|
-
|
174
|
+
|
157
175
|
def csv?(resource)
|
158
176
|
resource["mediatype"] == "text/csv" ||
|
159
177
|
resource["format"] == "csv"
|
160
178
|
end
|
161
179
|
|
162
|
-
def headers(package, resource)
|
180
|
+
def headers(package, resource, path)
|
163
181
|
headers = []
|
182
|
+
#Using built-in CSV parser here as its more permissive than fastercsv
|
183
|
+
#Lets us provide options to tweak the parsing
|
164
184
|
opts = dialect_to_csv_options(resource["dialect"])
|
165
|
-
|
166
|
-
|
185
|
+
begin
|
186
|
+
CSV.open( package.resolve_resource(resource), "r", opts) do |csv|
|
187
|
+
headers = csv.shift
|
188
|
+
end
|
189
|
+
rescue => e
|
190
|
+
add_error( :integrity, "Parse error for #{package.resolve_resource(resource)}: #{e}", path)
|
167
191
|
end
|
168
192
|
return headers
|
169
193
|
end
|
170
194
|
|
171
195
|
def dialect_to_csv_options(dialect)
|
172
|
-
return {}
|
196
|
+
return {} unless dialect
|
197
|
+
#supplying defaults here just in case the dialect is invalid
|
198
|
+
delimiter = dialect["delimiter"] || ","
|
199
|
+
delimiter = delimiter + " " if !dialect["skipinitialspace"]
|
200
|
+
return {
|
201
|
+
:col_sep => delimiter,
|
202
|
+
:row_sep => ( dialect["lineterminator"] || :auto ),
|
203
|
+
:quote_char => ( dialect["quotechar"] || '"')
|
204
|
+
}
|
205
|
+
end
|
206
|
+
|
207
|
+
private
|
208
|
+
|
209
|
+
#adjust message structure returned by JSON::Validator to add out type and
|
210
|
+
#adjust fragment references when we're using sub-schemas
|
211
|
+
def adjust_messages(messages, type, path)
|
212
|
+
messages.each do |msg|
|
213
|
+
msg[:type]= type
|
214
|
+
msg[:fragment] = rebase( path , msg[:fragment] )
|
215
|
+
end
|
216
|
+
messages
|
173
217
|
end
|
218
|
+
|
174
219
|
end
|
175
220
|
|
176
221
|
end
|
data/lib/datapackage/version.rb
CHANGED