datapackage 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -15,7 +15,7 @@ The library is intending to support:
15
15
 
16
16
  Add the gem into your Gemfile:
17
17
 
18
- gem 'datapackage.rb', :git => "git://github.com/theodi/datapackage.rb.git"
18
+ gem 'datapackage.rb', :git => "git://github.com/theodi/datapackage.rb.git"
19
19
 
20
20
  Or:
21
21
 
@@ -65,15 +65,22 @@ To expose more detail on errors and warnings:
65
65
 
66
66
  messages = package.validate() # or package.validate(:datapackage)
67
67
 
68
- This returns an object with two keys: `:errors` and `:warnings`. These are arrays of messages.
68
+ This returns an object with two keys: `:errors` and `:warnings`. The values of these keys are arrays of message object.
69
+ Message objects are formatted as follows:
69
70
 
70
- Warnings might include notes on missing metadata elements (e.g. package `licenses`) which are not required by the DataPackage specification
71
- but which SHOULD be included.
71
+ {
72
+ :type => :metadata|:integrity,
73
+ :message => "message for user",
74
+ :fragment => "/path/to/responsible/element"
75
+ }
72
76
 
73
77
  It is possible to treat all warnings as errors by performing strict validation:
74
78
 
75
79
  package.valid?(true)
76
80
 
81
+ Examples of warnings might include notes on missing metadata elements (e.g. package `licenses`) which are not required by the
82
+ DataPackage specification but which SHOULD be included.
83
+
77
84
  Warnings are currently generated for:
78
85
 
79
86
  * Missing `README.md` files from packages
data/bin/datapackage CHANGED
@@ -12,23 +12,25 @@ end
12
12
 
13
13
  package = DataPackage::Package.new(ARGV[0])
14
14
 
15
- messages = package.validate
15
+ messages = package.validate(:simpledataformat)
16
16
 
17
17
  if messages[:errors].length == 0
18
- puts "Package #{package.name} is valid"
18
+ puts "Package #{package.name} is VALID"
19
19
  else
20
- puts "Package #{package.name} is invalid"
21
- puts "Fix the following errors: "
22
- messages[:errors].each_with_index do |error, idx|
23
- msg = error[:message] if error.class == Hash
24
- msg = error if error.class != Hash
25
- puts "#{idx+1}. #{msg}"
26
- end
20
+ puts "Package #{package.name} is INVALID"
21
+ puts "Errors: "
22
+ messages[:errors].each_with_index do |msg, idx|
23
+ puts "#{idx+1}. #{msg[:type]} error. #{msg[:fragment]}"
24
+ puts " #{msg[:message]}"
25
+ end
27
26
  end
28
27
 
29
28
  if !messages[:warnings].empty?
30
29
  puts "Warnings: "
31
- messages[:warnings].each_with_index do |warning, idx|
32
- puts "#{idx+1}. #{warning}"
30
+ messages[:warnings].each_with_index do |msg, idx|
31
+ puts "#{idx+1}. #{msg[:type]} warning. #{msg[:fragment]}"
32
+ puts " #{msg[:message]}"
33
33
  end
34
34
  end
35
+
36
+ exit( messages[:errors].length )
@@ -3,6 +3,8 @@ module DataPackage
3
3
  #Base class for validators
4
4
  class Validator
5
5
 
6
+ attr_reader :messages
7
+
6
8
  def Validator.create(profile, opts={})
7
9
  if profile == :simpledataformat
8
10
  return SimpleDataFormatValidator.new(profile, opts)
@@ -19,45 +21,41 @@ module DataPackage
19
21
  end
20
22
 
21
23
  def valid?(package, strict=false)
22
- messages = validate( package )
23
- return messages[:errors].empty? if !strict
24
- return messages[:errors].empty? && messages[:warnings].empty?
24
+ validate( package )
25
+ return @messages[:errors].empty? if !strict
26
+ return @messages[:errors].empty? && @messages[:warnings].empty?
25
27
  end
26
28
 
27
29
  def validate( package )
28
- return validate_integrity( package, validate_with_schema(package) )
30
+ @messages = {:errors=>[], :warnings=>[]}
31
+ validate_with_schema( package )
32
+ validate_integrity( package )
33
+ return @messages
29
34
  end
30
35
 
36
+ protected
37
+
31
38
  def validate_with_schema(package)
32
39
  schema = load_schema(@schema_name)
33
- messages = {
34
- :errors => JSON::Validator.fully_validate(schema, package.metadata, :errors_as_objects => true),
35
- :warnings => []
36
- }
37
- validate_metadata(package, messages)
38
- return messages
40
+ messages = JSON::Validator.fully_validate(schema, package.metadata, :errors_as_objects => true)
41
+ @messages[:errors] += messages.each {|msg| msg[:type] = :metadata }
42
+ validate_metadata(package)
39
43
  end
40
44
 
41
- def validate_integrity(package, messages={ :errors=>[], :warnings=>[] } )
42
- package.resources.each do |resource|
43
- validate_resource(package, resource, messages)
44
- end
45
-
46
- messages
45
+ def validate_integrity(package )
46
+ package.resources.each_with_index do |resource, idx|
47
+ validate_resource( package, resource, "#/resources/#{idx}" )
48
+ end
47
49
  end
48
-
49
- protected
50
-
50
+
51
51
  #implement to perform additional validation on metadata
52
- def validate_metadata(package, messages)
52
+ def validate_metadata( package )
53
53
  end
54
54
 
55
55
  #implement for per-resource validation
56
- def validate_resource(package, resource, messages)
56
+ def validate_resource( package, resource, path )
57
57
  end
58
-
59
- protected
60
-
58
+
61
59
  def load_schema(profile)
62
60
  if @opts[:schema] && @opts[:schema][profile]
63
61
  if !File.exists?( @opts[:schema][profile] )
@@ -71,6 +69,22 @@ module DataPackage
71
69
  end
72
70
  return JSON.parse( File.read( schema_file ) )
73
71
  end
72
+
73
+ def add_error(type, message, fragment=nil)
74
+ @messages[:errors] << create_message(type, message, fragment)
75
+ end
76
+
77
+ def add_warning(type, message, fragment=nil)
78
+ @messages[:warnings] << create_message(type, message, fragment)
79
+ end
80
+
81
+ def create_message(type, message, fragment=nil)
82
+ { :message => message, :type => type, :fragment => fragment }
83
+ end
84
+
85
+ def rebase(base, fragment)
86
+ return fragment.gsub("#/", base)
87
+ end
74
88
 
75
89
  private
76
90
 
@@ -89,17 +103,17 @@ module DataPackage
89
103
  super(:datapackage, opts)
90
104
  end
91
105
 
92
- def validate_metadata(package, messages)
106
+ def validate_metadata(package)
93
107
  #not required, but recommended
94
108
  prefix = "The package does not include a"
95
- messages[:warnings] << "#{prefix} 'licenses' property" if package.licenses.empty?
96
- messages[:warnings] << "#{prefix} 'datapackage_version' property" unless package.datapackage_version
97
- messages[:warnings] << "#{prefix} README.md file" unless package.resource_exists?( package.resolve("README.md") )
109
+ add_warning( :metadata, "#{prefix} 'licenses' property", "#/") if package.licenses.empty?
110
+ add_warning( :metadata, "#{prefix} 'datapackage_version' property", "#/") unless package.datapackage_version
111
+ add_warning( :integrity, "#{prefix} README.md file" ) unless package.resource_exists?( package.resolve("README.md") )
98
112
  end
99
113
 
100
- def validate_resource(package, resource, messages)
114
+ def validate_resource( package, resource, path )
101
115
  if !package.resource_exists?( package.resolve_resource( resource ) )
102
- messages[:errors] << "Resource #{resource["url"] || resource["path"]} does not exist"
116
+ add_error( :integrity, "Missing resource #{resource["url"] || resource["path"]}", path)
103
117
  end
104
118
  end
105
119
 
@@ -114,63 +128,94 @@ module DataPackage
114
128
  @csvddf_schema = load_schema("csvddf-dialect")
115
129
  end
116
130
 
117
- def validate_resource(package, resource, messages)
118
- super(package, resource, messages)
131
+ def validate_resource(package, resource, path)
132
+ super(package, resource, path)
119
133
 
120
134
  if !csv?(resource)
121
- messages[:errors] << "#{resource["name"]} is not a CSV file"
122
- else
123
- if !resource["schema"]
124
- messages[:errors] << "#{resource["name"]} does not have a schema"
135
+ add_error(:integrity, "#{resource["name"]} is not a CSV file", path )
136
+ else
137
+ schema = resource["schema"]
138
+ if !schema
139
+ add_error(:metadata, "#{resource["name"]} does not have a schema", path )
125
140
  else
126
- messages[:errors] +=
127
- JSON::Validator.fully_validate(@jsontable_schema,
128
- resource["schema"], :errors_as_objects => true)
129
- end
141
+ messages = JSON::Validator.fully_validate(@jsontable_schema, schema, :errors_as_objects => true)
142
+ @messages[:errors] += adjust_messages(messages, :metadata, path + "/schema/")
143
+ end
144
+
130
145
  if resource["dialect"]
131
- messages[:errors] +=
132
- JSON::Validator.fully_validate(@csvddf_schema,
133
- resource["dialect"], :errors_as_objects => true)
146
+ messages = JSON::Validator.fully_validate(@csvddf_schema, resource["dialect"], :errors_as_objects => true)
147
+ @messages[:errors] += adjust_messages(messages, :metadata, path + "/dialect")
134
148
  end
135
149
 
136
- if resource["schema"] && resource["schema"]["fields"]
137
- fields = resource["schema"]["fields"]
138
- declared_fields = fields.map{ |f| f["name"] }
139
- headers = headers(package, resource)
140
-
141
- #set algebra to finding fields missing from schema and/or CSV file
142
- missing_fields = declared_fields - headers
143
- if missing_fields != []
144
- messages[:errors] <<
145
- "Declared schema has fields not present in CSV file (#{missing_fields.join(",")})"
150
+ if package.resource_exists?( package.resolve_resource( resource ) )
151
+ if resource["schema"] && resource["schema"]["fields"]
152
+ fields = resource["schema"]["fields"]
153
+ declared_fields = fields.map{ |f| f["name"] }
154
+ headers = headers(package, resource, path)
155
+
156
+ #set algebra to finding fields missing from schema and/or CSV file
157
+ missing_fields = declared_fields - headers
158
+ if missing_fields != []
159
+ add_error( :integrity,
160
+ "Declared schema has fields not present in CSV file (#{missing_fields.join(",")})",
161
+ path+"/schema/fields")
162
+ end
163
+ undeclared_fields = headers - declared_fields
164
+ if undeclared_fields != []
165
+ add_error( :integrity,
166
+ "CSV file has fields missing from schema (#{undeclared_fields.join(",")})",
167
+ path+"/schema/fields")
168
+ end
146
169
  end
147
- undeclared_fields = headers - declared_fields
148
- if undeclared_fields != []
149
- messages[:errors] << "CSV file has fields missing from schema (#{undeclared_fields.join(",")})"
150
- end
151
170
  end
152
-
153
171
  end
154
172
 
155
173
  end
156
-
174
+
157
175
  def csv?(resource)
158
176
  resource["mediatype"] == "text/csv" ||
159
177
  resource["format"] == "csv"
160
178
  end
161
179
 
162
- def headers(package, resource)
180
+ def headers(package, resource, path)
163
181
  headers = []
182
+ #Using built-in CSV parser here as its more permissive than fastercsv
183
+ #Lets us provide options to tweak the parsing
164
184
  opts = dialect_to_csv_options(resource["dialect"])
165
- CSV.open( package.resolve_resource(resource), "r", opts) do |csv|
166
- headers = csv.shift
185
+ begin
186
+ CSV.open( package.resolve_resource(resource), "r", opts) do |csv|
187
+ headers = csv.shift
188
+ end
189
+ rescue => e
190
+ add_error( :integrity, "Parse error for #{package.resolve_resource(resource)}: #{e}", path)
167
191
  end
168
192
  return headers
169
193
  end
170
194
 
171
195
  def dialect_to_csv_options(dialect)
172
- return {}
196
+ return {} unless dialect
197
+ #supplying defaults here just in case the dialect is invalid
198
+ delimiter = dialect["delimiter"] || ","
199
+ delimiter = delimiter + " " if !dialect["skipinitialspace"]
200
+ return {
201
+ :col_sep => delimiter,
202
+ :row_sep => ( dialect["lineterminator"] || :auto ),
203
+ :quote_char => ( dialect["quotechar"] || '"')
204
+ }
205
+ end
206
+
207
+ private
208
+
209
+ #adjust message structure returned by JSON::Validator to add out type and
210
+ #adjust fragment references when we're using sub-schemas
211
+ def adjust_messages(messages, type, path)
212
+ messages.each do |msg|
213
+ msg[:type]= type
214
+ msg[:fragment] = rebase( path , msg[:fragment] )
215
+ end
216
+ messages
173
217
  end
218
+
174
219
  end
175
220
 
176
221
  end
@@ -1,3 +1,3 @@
1
1
  module DataPackage
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datapackage
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: