datapackage 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -15,7 +15,7 @@ The library is intending to support:
15
15
 
16
16
  Add the gem into your Gemfile:
17
17
 
18
- gem 'datapackage.rb', :git => "git://github.com/theodi/datapackage.rb.git"
18
+ gem 'datapackage.rb', :git => "git://github.com/theodi/datapackage.rb.git"
19
19
 
20
20
  Or:
21
21
 
@@ -65,15 +65,22 @@ To expose more detail on errors and warnings:
65
65
 
66
66
  messages = package.validate() # or package.validate(:datapackage)
67
67
 
68
- This returns an object with two keys: `:errors` and `:warnings`. These are arrays of messages.
68
+ This returns an object with two keys: `:errors` and `:warnings`. The values of these keys are arrays of message object.
69
+ Message objects are formatted as follows:
69
70
 
70
- Warnings might include notes on missing metadata elements (e.g. package `licenses`) which are not required by the DataPackage specification
71
- but which SHOULD be included.
71
+ {
72
+ :type => :metadata|:integrity,
73
+ :message => "message for user",
74
+ :fragment => "/path/to/responsible/element"
75
+ }
72
76
 
73
77
  It is possible to treat all warnings as errors by performing strict validation:
74
78
 
75
79
  package.valid?(true)
76
80
 
81
+ Examples of warnings might include notes on missing metadata elements (e.g. package `licenses`) which are not required by the
82
+ DataPackage specification but which SHOULD be included.
83
+
77
84
  Warnings are currently generated for:
78
85
 
79
86
  * Missing `README.md` files from packages
data/bin/datapackage CHANGED
@@ -12,23 +12,25 @@ end
12
12
 
13
13
  package = DataPackage::Package.new(ARGV[0])
14
14
 
15
- messages = package.validate
15
+ messages = package.validate(:simpledataformat)
16
16
 
17
17
  if messages[:errors].length == 0
18
- puts "Package #{package.name} is valid"
18
+ puts "Package #{package.name} is VALID"
19
19
  else
20
- puts "Package #{package.name} is invalid"
21
- puts "Fix the following errors: "
22
- messages[:errors].each_with_index do |error, idx|
23
- msg = error[:message] if error.class == Hash
24
- msg = error if error.class != Hash
25
- puts "#{idx+1}. #{msg}"
26
- end
20
+ puts "Package #{package.name} is INVALID"
21
+ puts "Errors: "
22
+ messages[:errors].each_with_index do |msg, idx|
23
+ puts "#{idx+1}. #{msg[:type]} error. #{msg[:fragment]}"
24
+ puts " #{msg[:message]}"
25
+ end
27
26
  end
28
27
 
29
28
  if !messages[:warnings].empty?
30
29
  puts "Warnings: "
31
- messages[:warnings].each_with_index do |warning, idx|
32
- puts "#{idx+1}. #{warning}"
30
+ messages[:warnings].each_with_index do |msg, idx|
31
+ puts "#{idx+1}. #{msg[:type]} warning. #{msg[:fragment]}"
32
+ puts " #{msg[:message]}"
33
33
  end
34
34
  end
35
+
36
+ exit( messages[:errors].length )
@@ -3,6 +3,8 @@ module DataPackage
3
3
  #Base class for validators
4
4
  class Validator
5
5
 
6
+ attr_reader :messages
7
+
6
8
  def Validator.create(profile, opts={})
7
9
  if profile == :simpledataformat
8
10
  return SimpleDataFormatValidator.new(profile, opts)
@@ -19,45 +21,41 @@ module DataPackage
19
21
  end
20
22
 
21
23
  def valid?(package, strict=false)
22
- messages = validate( package )
23
- return messages[:errors].empty? if !strict
24
- return messages[:errors].empty? && messages[:warnings].empty?
24
+ validate( package )
25
+ return @messages[:errors].empty? if !strict
26
+ return @messages[:errors].empty? && @messages[:warnings].empty?
25
27
  end
26
28
 
27
29
  def validate( package )
28
- return validate_integrity( package, validate_with_schema(package) )
30
+ @messages = {:errors=>[], :warnings=>[]}
31
+ validate_with_schema( package )
32
+ validate_integrity( package )
33
+ return @messages
29
34
  end
30
35
 
36
+ protected
37
+
31
38
  def validate_with_schema(package)
32
39
  schema = load_schema(@schema_name)
33
- messages = {
34
- :errors => JSON::Validator.fully_validate(schema, package.metadata, :errors_as_objects => true),
35
- :warnings => []
36
- }
37
- validate_metadata(package, messages)
38
- return messages
40
+ messages = JSON::Validator.fully_validate(schema, package.metadata, :errors_as_objects => true)
41
+ @messages[:errors] += messages.each {|msg| msg[:type] = :metadata }
42
+ validate_metadata(package)
39
43
  end
40
44
 
41
- def validate_integrity(package, messages={ :errors=>[], :warnings=>[] } )
42
- package.resources.each do |resource|
43
- validate_resource(package, resource, messages)
44
- end
45
-
46
- messages
45
+ def validate_integrity(package )
46
+ package.resources.each_with_index do |resource, idx|
47
+ validate_resource( package, resource, "#/resources/#{idx}" )
48
+ end
47
49
  end
48
-
49
- protected
50
-
50
+
51
51
  #implement to perform additional validation on metadata
52
- def validate_metadata(package, messages)
52
+ def validate_metadata( package )
53
53
  end
54
54
 
55
55
  #implement for per-resource validation
56
- def validate_resource(package, resource, messages)
56
+ def validate_resource( package, resource, path )
57
57
  end
58
-
59
- protected
60
-
58
+
61
59
  def load_schema(profile)
62
60
  if @opts[:schema] && @opts[:schema][profile]
63
61
  if !File.exists?( @opts[:schema][profile] )
@@ -71,6 +69,22 @@ module DataPackage
71
69
  end
72
70
  return JSON.parse( File.read( schema_file ) )
73
71
  end
72
+
73
+ def add_error(type, message, fragment=nil)
74
+ @messages[:errors] << create_message(type, message, fragment)
75
+ end
76
+
77
+ def add_warning(type, message, fragment=nil)
78
+ @messages[:warnings] << create_message(type, message, fragment)
79
+ end
80
+
81
+ def create_message(type, message, fragment=nil)
82
+ { :message => message, :type => type, :fragment => fragment }
83
+ end
84
+
85
+ def rebase(base, fragment)
86
+ return fragment.gsub("#/", base)
87
+ end
74
88
 
75
89
  private
76
90
 
@@ -89,17 +103,17 @@ module DataPackage
89
103
  super(:datapackage, opts)
90
104
  end
91
105
 
92
- def validate_metadata(package, messages)
106
+ def validate_metadata(package)
93
107
  #not required, but recommended
94
108
  prefix = "The package does not include a"
95
- messages[:warnings] << "#{prefix} 'licenses' property" if package.licenses.empty?
96
- messages[:warnings] << "#{prefix} 'datapackage_version' property" unless package.datapackage_version
97
- messages[:warnings] << "#{prefix} README.md file" unless package.resource_exists?( package.resolve("README.md") )
109
+ add_warning( :metadata, "#{prefix} 'licenses' property", "#/") if package.licenses.empty?
110
+ add_warning( :metadata, "#{prefix} 'datapackage_version' property", "#/") unless package.datapackage_version
111
+ add_warning( :integrity, "#{prefix} README.md file" ) unless package.resource_exists?( package.resolve("README.md") )
98
112
  end
99
113
 
100
- def validate_resource(package, resource, messages)
114
+ def validate_resource( package, resource, path )
101
115
  if !package.resource_exists?( package.resolve_resource( resource ) )
102
- messages[:errors] << "Resource #{resource["url"] || resource["path"]} does not exist"
116
+ add_error( :integrity, "Missing resource #{resource["url"] || resource["path"]}", path)
103
117
  end
104
118
  end
105
119
 
@@ -114,63 +128,94 @@ module DataPackage
114
128
  @csvddf_schema = load_schema("csvddf-dialect")
115
129
  end
116
130
 
117
- def validate_resource(package, resource, messages)
118
- super(package, resource, messages)
131
+ def validate_resource(package, resource, path)
132
+ super(package, resource, path)
119
133
 
120
134
  if !csv?(resource)
121
- messages[:errors] << "#{resource["name"]} is not a CSV file"
122
- else
123
- if !resource["schema"]
124
- messages[:errors] << "#{resource["name"]} does not have a schema"
135
+ add_error(:integrity, "#{resource["name"]} is not a CSV file", path )
136
+ else
137
+ schema = resource["schema"]
138
+ if !schema
139
+ add_error(:metadata, "#{resource["name"]} does not have a schema", path )
125
140
  else
126
- messages[:errors] +=
127
- JSON::Validator.fully_validate(@jsontable_schema,
128
- resource["schema"], :errors_as_objects => true)
129
- end
141
+ messages = JSON::Validator.fully_validate(@jsontable_schema, schema, :errors_as_objects => true)
142
+ @messages[:errors] += adjust_messages(messages, :metadata, path + "/schema/")
143
+ end
144
+
130
145
  if resource["dialect"]
131
- messages[:errors] +=
132
- JSON::Validator.fully_validate(@csvddf_schema,
133
- resource["dialect"], :errors_as_objects => true)
146
+ messages = JSON::Validator.fully_validate(@csvddf_schema, resource["dialect"], :errors_as_objects => true)
147
+ @messages[:errors] += adjust_messages(messages, :metadata, path + "/dialect")
134
148
  end
135
149
 
136
- if resource["schema"] && resource["schema"]["fields"]
137
- fields = resource["schema"]["fields"]
138
- declared_fields = fields.map{ |f| f["name"] }
139
- headers = headers(package, resource)
140
-
141
- #set algebra to finding fields missing from schema and/or CSV file
142
- missing_fields = declared_fields - headers
143
- if missing_fields != []
144
- messages[:errors] <<
145
- "Declared schema has fields not present in CSV file (#{missing_fields.join(",")})"
150
+ if package.resource_exists?( package.resolve_resource( resource ) )
151
+ if resource["schema"] && resource["schema"]["fields"]
152
+ fields = resource["schema"]["fields"]
153
+ declared_fields = fields.map{ |f| f["name"] }
154
+ headers = headers(package, resource, path)
155
+
156
+ #set algebra to finding fields missing from schema and/or CSV file
157
+ missing_fields = declared_fields - headers
158
+ if missing_fields != []
159
+ add_error( :integrity,
160
+ "Declared schema has fields not present in CSV file (#{missing_fields.join(",")})",
161
+ path+"/schema/fields")
162
+ end
163
+ undeclared_fields = headers - declared_fields
164
+ if undeclared_fields != []
165
+ add_error( :integrity,
166
+ "CSV file has fields missing from schema (#{undeclared_fields.join(",")})",
167
+ path+"/schema/fields")
168
+ end
146
169
  end
147
- undeclared_fields = headers - declared_fields
148
- if undeclared_fields != []
149
- messages[:errors] << "CSV file has fields missing from schema (#{undeclared_fields.join(",")})"
150
- end
151
170
  end
152
-
153
171
  end
154
172
 
155
173
  end
156
-
174
+
157
175
  def csv?(resource)
158
176
  resource["mediatype"] == "text/csv" ||
159
177
  resource["format"] == "csv"
160
178
  end
161
179
 
162
- def headers(package, resource)
180
+ def headers(package, resource, path)
163
181
  headers = []
182
+ #Using built-in CSV parser here as its more permissive than fastercsv
183
+ #Lets us provide options to tweak the parsing
164
184
  opts = dialect_to_csv_options(resource["dialect"])
165
- CSV.open( package.resolve_resource(resource), "r", opts) do |csv|
166
- headers = csv.shift
185
+ begin
186
+ CSV.open( package.resolve_resource(resource), "r", opts) do |csv|
187
+ headers = csv.shift
188
+ end
189
+ rescue => e
190
+ add_error( :integrity, "Parse error for #{package.resolve_resource(resource)}: #{e}", path)
167
191
  end
168
192
  return headers
169
193
  end
170
194
 
171
195
  def dialect_to_csv_options(dialect)
172
- return {}
196
+ return {} unless dialect
197
+ #supplying defaults here just in case the dialect is invalid
198
+ delimiter = dialect["delimiter"] || ","
199
+ delimiter = delimiter + " " if !dialect["skipinitialspace"]
200
+ return {
201
+ :col_sep => delimiter,
202
+ :row_sep => ( dialect["lineterminator"] || :auto ),
203
+ :quote_char => ( dialect["quotechar"] || '"')
204
+ }
205
+ end
206
+
207
+ private
208
+
209
+ #adjust message structure returned by JSON::Validator to add out type and
210
+ #adjust fragment references when we're using sub-schemas
211
+ def adjust_messages(messages, type, path)
212
+ messages.each do |msg|
213
+ msg[:type]= type
214
+ msg[:fragment] = rebase( path , msg[:fragment] )
215
+ end
216
+ messages
173
217
  end
218
+
174
219
  end
175
220
 
176
221
  end
@@ -1,3 +1,3 @@
1
1
  module DataPackage
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datapackage
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: