datapackage 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +11 -4
- data/bin/datapackage +13 -11
- data/lib/datapackage/validator.rb +108 -63
- data/lib/datapackage/version.rb +1 -1
- metadata +1 -1
data/README.md
CHANGED
@@ -15,7 +15,7 @@ The library is intending to support:
|
|
15
15
|
|
16
16
|
Add the gem into your Gemfile:
|
17
17
|
|
18
|
-
|
18
|
+
gem 'datapackage.rb', :git => "git://github.com/theodi/datapackage.rb.git"
|
19
19
|
|
20
20
|
Or:
|
21
21
|
|
@@ -65,15 +65,22 @@ To expose more detail on errors and warnings:
|
|
65
65
|
|
66
66
|
messages = package.validate() # or package.validate(:datapackage)
|
67
67
|
|
68
|
-
This returns an object with two keys: `:errors` and `:warnings`.
|
68
|
+
This returns an object with two keys: `:errors` and `:warnings`. The values of these keys are arrays of message object.
|
69
|
+
Message objects are formatted as follows:
|
69
70
|
|
70
|
-
|
71
|
-
|
71
|
+
{
|
72
|
+
:type => :metadata|:integrity,
|
73
|
+
:message => "message for user",
|
74
|
+
:fragment => "/path/to/responsible/element"
|
75
|
+
}
|
72
76
|
|
73
77
|
It is possible to treat all warnings as errors by performing strict validation:
|
74
78
|
|
75
79
|
package.valid?(true)
|
76
80
|
|
81
|
+
Examples of warnings might include notes on missing metadata elements (e.g. package `licenses`) which are not required by the
|
82
|
+
DataPackage specification but which SHOULD be included.
|
83
|
+
|
77
84
|
Warnings are currently generated for:
|
78
85
|
|
79
86
|
* Missing `README.md` files from packages
|
data/bin/datapackage
CHANGED
@@ -12,23 +12,25 @@ end
|
|
12
12
|
|
13
13
|
package = DataPackage::Package.new(ARGV[0])
|
14
14
|
|
15
|
-
messages = package.validate
|
15
|
+
messages = package.validate(:simpledataformat)
|
16
16
|
|
17
17
|
if messages[:errors].length == 0
|
18
|
-
puts "Package #{package.name} is
|
18
|
+
puts "Package #{package.name} is VALID"
|
19
19
|
else
|
20
|
-
puts "Package #{package.name} is
|
21
|
-
puts "
|
22
|
-
messages[:errors].each_with_index do |
|
23
|
-
|
24
|
-
msg
|
25
|
-
|
26
|
-
end
|
20
|
+
puts "Package #{package.name} is INVALID"
|
21
|
+
puts "Errors: "
|
22
|
+
messages[:errors].each_with_index do |msg, idx|
|
23
|
+
puts "#{idx+1}. #{msg[:type]} error. #{msg[:fragment]}"
|
24
|
+
puts " #{msg[:message]}"
|
25
|
+
end
|
27
26
|
end
|
28
27
|
|
29
28
|
if !messages[:warnings].empty?
|
30
29
|
puts "Warnings: "
|
31
|
-
messages[:warnings].each_with_index do |
|
32
|
-
puts "#{idx+1}. #{warning}"
|
30
|
+
messages[:warnings].each_with_index do |msg, idx|
|
31
|
+
puts "#{idx+1}. #{msg[:type]} warning. #{msg[:fragment]}"
|
32
|
+
puts " #{msg[:message]}"
|
33
33
|
end
|
34
34
|
end
|
35
|
+
|
36
|
+
exit( messages[:errors].length )
|
@@ -3,6 +3,8 @@ module DataPackage
|
|
3
3
|
#Base class for validators
|
4
4
|
class Validator
|
5
5
|
|
6
|
+
attr_reader :messages
|
7
|
+
|
6
8
|
def Validator.create(profile, opts={})
|
7
9
|
if profile == :simpledataformat
|
8
10
|
return SimpleDataFormatValidator.new(profile, opts)
|
@@ -19,45 +21,41 @@ module DataPackage
|
|
19
21
|
end
|
20
22
|
|
21
23
|
def valid?(package, strict=false)
|
22
|
-
|
23
|
-
return messages[:errors].empty? if !strict
|
24
|
-
return messages[:errors].empty? && messages[:warnings].empty?
|
24
|
+
validate( package )
|
25
|
+
return @messages[:errors].empty? if !strict
|
26
|
+
return @messages[:errors].empty? && @messages[:warnings].empty?
|
25
27
|
end
|
26
28
|
|
27
29
|
def validate( package )
|
28
|
-
|
30
|
+
@messages = {:errors=>[], :warnings=>[]}
|
31
|
+
validate_with_schema( package )
|
32
|
+
validate_integrity( package )
|
33
|
+
return @messages
|
29
34
|
end
|
30
35
|
|
36
|
+
protected
|
37
|
+
|
31
38
|
def validate_with_schema(package)
|
32
39
|
schema = load_schema(@schema_name)
|
33
|
-
messages =
|
34
|
-
|
35
|
-
|
36
|
-
}
|
37
|
-
validate_metadata(package, messages)
|
38
|
-
return messages
|
40
|
+
messages = JSON::Validator.fully_validate(schema, package.metadata, :errors_as_objects => true)
|
41
|
+
@messages[:errors] += messages.each {|msg| msg[:type] = :metadata }
|
42
|
+
validate_metadata(package)
|
39
43
|
end
|
40
44
|
|
41
|
-
def validate_integrity(package
|
42
|
-
package.resources.
|
43
|
-
validate_resource(package, resource,
|
44
|
-
end
|
45
|
-
|
46
|
-
messages
|
45
|
+
def validate_integrity(package )
|
46
|
+
package.resources.each_with_index do |resource, idx|
|
47
|
+
validate_resource( package, resource, "#/resources/#{idx}" )
|
48
|
+
end
|
47
49
|
end
|
48
|
-
|
49
|
-
protected
|
50
|
-
|
50
|
+
|
51
51
|
#implement to perform additional validation on metadata
|
52
|
-
def validate_metadata(package
|
52
|
+
def validate_metadata( package )
|
53
53
|
end
|
54
54
|
|
55
55
|
#implement for per-resource validation
|
56
|
-
def validate_resource(package, resource,
|
56
|
+
def validate_resource( package, resource, path )
|
57
57
|
end
|
58
|
-
|
59
|
-
protected
|
60
|
-
|
58
|
+
|
61
59
|
def load_schema(profile)
|
62
60
|
if @opts[:schema] && @opts[:schema][profile]
|
63
61
|
if !File.exists?( @opts[:schema][profile] )
|
@@ -71,6 +69,22 @@ module DataPackage
|
|
71
69
|
end
|
72
70
|
return JSON.parse( File.read( schema_file ) )
|
73
71
|
end
|
72
|
+
|
73
|
+
def add_error(type, message, fragment=nil)
|
74
|
+
@messages[:errors] << create_message(type, message, fragment)
|
75
|
+
end
|
76
|
+
|
77
|
+
def add_warning(type, message, fragment=nil)
|
78
|
+
@messages[:warnings] << create_message(type, message, fragment)
|
79
|
+
end
|
80
|
+
|
81
|
+
def create_message(type, message, fragment=nil)
|
82
|
+
{ :message => message, :type => type, :fragment => fragment }
|
83
|
+
end
|
84
|
+
|
85
|
+
def rebase(base, fragment)
|
86
|
+
return fragment.gsub("#/", base)
|
87
|
+
end
|
74
88
|
|
75
89
|
private
|
76
90
|
|
@@ -89,17 +103,17 @@ module DataPackage
|
|
89
103
|
super(:datapackage, opts)
|
90
104
|
end
|
91
105
|
|
92
|
-
def validate_metadata(package
|
106
|
+
def validate_metadata(package)
|
93
107
|
#not required, but recommended
|
94
108
|
prefix = "The package does not include a"
|
95
|
-
|
96
|
-
|
97
|
-
|
109
|
+
add_warning( :metadata, "#{prefix} 'licenses' property", "#/") if package.licenses.empty?
|
110
|
+
add_warning( :metadata, "#{prefix} 'datapackage_version' property", "#/") unless package.datapackage_version
|
111
|
+
add_warning( :integrity, "#{prefix} README.md file" ) unless package.resource_exists?( package.resolve("README.md") )
|
98
112
|
end
|
99
113
|
|
100
|
-
def validate_resource(package, resource,
|
114
|
+
def validate_resource( package, resource, path )
|
101
115
|
if !package.resource_exists?( package.resolve_resource( resource ) )
|
102
|
-
|
116
|
+
add_error( :integrity, "Missing resource #{resource["url"] || resource["path"]}", path)
|
103
117
|
end
|
104
118
|
end
|
105
119
|
|
@@ -114,63 +128,94 @@ module DataPackage
|
|
114
128
|
@csvddf_schema = load_schema("csvddf-dialect")
|
115
129
|
end
|
116
130
|
|
117
|
-
def validate_resource(package, resource,
|
118
|
-
super(package, resource,
|
131
|
+
def validate_resource(package, resource, path)
|
132
|
+
super(package, resource, path)
|
119
133
|
|
120
134
|
if !csv?(resource)
|
121
|
-
|
122
|
-
else
|
123
|
-
|
124
|
-
|
135
|
+
add_error(:integrity, "#{resource["name"]} is not a CSV file", path )
|
136
|
+
else
|
137
|
+
schema = resource["schema"]
|
138
|
+
if !schema
|
139
|
+
add_error(:metadata, "#{resource["name"]} does not have a schema", path )
|
125
140
|
else
|
126
|
-
messages
|
127
|
-
|
128
|
-
|
129
|
-
|
141
|
+
messages = JSON::Validator.fully_validate(@jsontable_schema, schema, :errors_as_objects => true)
|
142
|
+
@messages[:errors] += adjust_messages(messages, :metadata, path + "/schema/")
|
143
|
+
end
|
144
|
+
|
130
145
|
if resource["dialect"]
|
131
|
-
messages[
|
132
|
-
|
133
|
-
resource["dialect"], :errors_as_objects => true)
|
146
|
+
messages = JSON::Validator.fully_validate(@csvddf_schema, resource["dialect"], :errors_as_objects => true)
|
147
|
+
@messages[:errors] += adjust_messages(messages, :metadata, path + "/dialect")
|
134
148
|
end
|
135
149
|
|
136
|
-
if resource
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
150
|
+
if package.resource_exists?( package.resolve_resource( resource ) )
|
151
|
+
if resource["schema"] && resource["schema"]["fields"]
|
152
|
+
fields = resource["schema"]["fields"]
|
153
|
+
declared_fields = fields.map{ |f| f["name"] }
|
154
|
+
headers = headers(package, resource, path)
|
155
|
+
|
156
|
+
#set algebra to finding fields missing from schema and/or CSV file
|
157
|
+
missing_fields = declared_fields - headers
|
158
|
+
if missing_fields != []
|
159
|
+
add_error( :integrity,
|
160
|
+
"Declared schema has fields not present in CSV file (#{missing_fields.join(",")})",
|
161
|
+
path+"/schema/fields")
|
162
|
+
end
|
163
|
+
undeclared_fields = headers - declared_fields
|
164
|
+
if undeclared_fields != []
|
165
|
+
add_error( :integrity,
|
166
|
+
"CSV file has fields missing from schema (#{undeclared_fields.join(",")})",
|
167
|
+
path+"/schema/fields")
|
168
|
+
end
|
146
169
|
end
|
147
|
-
undeclared_fields = headers - declared_fields
|
148
|
-
if undeclared_fields != []
|
149
|
-
messages[:errors] << "CSV file has fields missing from schema (#{undeclared_fields.join(",")})"
|
150
|
-
end
|
151
170
|
end
|
152
|
-
|
153
171
|
end
|
154
172
|
|
155
173
|
end
|
156
|
-
|
174
|
+
|
157
175
|
def csv?(resource)
|
158
176
|
resource["mediatype"] == "text/csv" ||
|
159
177
|
resource["format"] == "csv"
|
160
178
|
end
|
161
179
|
|
162
|
-
def headers(package, resource)
|
180
|
+
def headers(package, resource, path)
|
163
181
|
headers = []
|
182
|
+
#Using built-in CSV parser here as its more permissive than fastercsv
|
183
|
+
#Lets us provide options to tweak the parsing
|
164
184
|
opts = dialect_to_csv_options(resource["dialect"])
|
165
|
-
|
166
|
-
|
185
|
+
begin
|
186
|
+
CSV.open( package.resolve_resource(resource), "r", opts) do |csv|
|
187
|
+
headers = csv.shift
|
188
|
+
end
|
189
|
+
rescue => e
|
190
|
+
add_error( :integrity, "Parse error for #{package.resolve_resource(resource)}: #{e}", path)
|
167
191
|
end
|
168
192
|
return headers
|
169
193
|
end
|
170
194
|
|
171
195
|
def dialect_to_csv_options(dialect)
|
172
|
-
return {}
|
196
|
+
return {} unless dialect
|
197
|
+
#supplying defaults here just in case the dialect is invalid
|
198
|
+
delimiter = dialect["delimiter"] || ","
|
199
|
+
delimiter = delimiter + " " if !dialect["skipinitialspace"]
|
200
|
+
return {
|
201
|
+
:col_sep => delimiter,
|
202
|
+
:row_sep => ( dialect["lineterminator"] || :auto ),
|
203
|
+
:quote_char => ( dialect["quotechar"] || '"')
|
204
|
+
}
|
205
|
+
end
|
206
|
+
|
207
|
+
private
|
208
|
+
|
209
|
+
#adjust message structure returned by JSON::Validator to add out type and
|
210
|
+
#adjust fragment references when we're using sub-schemas
|
211
|
+
def adjust_messages(messages, type, path)
|
212
|
+
messages.each do |msg|
|
213
|
+
msg[:type]= type
|
214
|
+
msg[:fragment] = rebase( path , msg[:fragment] )
|
215
|
+
end
|
216
|
+
messages
|
173
217
|
end
|
218
|
+
|
174
219
|
end
|
175
220
|
|
176
221
|
end
|
data/lib/datapackage/version.rb
CHANGED