wjordan213.csvlint 0.2.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (77) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +1 -0
  3. data/.gitattributes +2 -0
  4. data/.gitignore +28 -0
  5. data/.ruby-version +1 -0
  6. data/.travis.yml +32 -0
  7. data/CHANGELOG.md +361 -0
  8. data/Gemfile +7 -0
  9. data/LICENSE.md +22 -0
  10. data/README.md +328 -0
  11. data/Rakefile +17 -0
  12. data/bin/create_schema +32 -0
  13. data/bin/csvlint +10 -0
  14. data/features/check_format.feature +46 -0
  15. data/features/cli.feature +210 -0
  16. data/features/csv_options.feature +35 -0
  17. data/features/csvupload.feature +145 -0
  18. data/features/csvw_schema_validation.feature +127 -0
  19. data/features/fixtures/cr-line-endings.csv +0 -0
  20. data/features/fixtures/crlf-line-endings.csv +0 -0
  21. data/features/fixtures/inconsistent-line-endings-unquoted.csv +0 -0
  22. data/features/fixtures/inconsistent-line-endings.csv +0 -0
  23. data/features/fixtures/invalid-byte-sequence.csv +0 -0
  24. data/features/fixtures/invalid_many_rows.csv +0 -0
  25. data/features/fixtures/lf-line-endings.csv +0 -0
  26. data/features/fixtures/spreadsheet.xls +0 -0
  27. data/features/fixtures/spreadsheet.xlsx +0 -0
  28. data/features/fixtures/title-row.csv +0 -0
  29. data/features/fixtures/valid.csv +0 -0
  30. data/features/fixtures/valid_many_rows.csv +0 -0
  31. data/features/fixtures/windows-line-endings.csv +0 -0
  32. data/features/information.feature +22 -0
  33. data/features/parse_csv.feature +90 -0
  34. data/features/schema_validation.feature +105 -0
  35. data/features/sources.feature +17 -0
  36. data/features/step_definitions/cli_steps.rb +11 -0
  37. data/features/step_definitions/csv_options_steps.rb +24 -0
  38. data/features/step_definitions/information_steps.rb +13 -0
  39. data/features/step_definitions/parse_csv_steps.rb +42 -0
  40. data/features/step_definitions/schema_validation_steps.rb +33 -0
  41. data/features/step_definitions/sources_steps.rb +7 -0
  42. data/features/step_definitions/validation_errors_steps.rb +90 -0
  43. data/features/step_definitions/validation_info_steps.rb +22 -0
  44. data/features/step_definitions/validation_warnings_steps.rb +60 -0
  45. data/features/support/aruba.rb +56 -0
  46. data/features/support/env.rb +26 -0
  47. data/features/support/load_tests.rb +114 -0
  48. data/features/support/webmock.rb +1 -0
  49. data/features/validation_errors.feature +147 -0
  50. data/features/validation_info.feature +16 -0
  51. data/features/validation_warnings.feature +86 -0
  52. data/lib/csvlint.rb +27 -0
  53. data/lib/csvlint/cli.rb +165 -0
  54. data/lib/csvlint/csvw/column.rb +359 -0
  55. data/lib/csvlint/csvw/date_format.rb +182 -0
  56. data/lib/csvlint/csvw/metadata_error.rb +13 -0
  57. data/lib/csvlint/csvw/number_format.rb +211 -0
  58. data/lib/csvlint/csvw/property_checker.rb +761 -0
  59. data/lib/csvlint/csvw/table.rb +204 -0
  60. data/lib/csvlint/csvw/table_group.rb +165 -0
  61. data/lib/csvlint/error_collector.rb +27 -0
  62. data/lib/csvlint/error_message.rb +15 -0
  63. data/lib/csvlint/field.rb +196 -0
  64. data/lib/csvlint/schema.rb +92 -0
  65. data/lib/csvlint/validate.rb +599 -0
  66. data/lib/csvlint/version.rb +3 -0
  67. data/spec/csvw/column_spec.rb +112 -0
  68. data/spec/csvw/date_format_spec.rb +49 -0
  69. data/spec/csvw/number_format_spec.rb +417 -0
  70. data/spec/csvw/table_group_spec.rb +143 -0
  71. data/spec/csvw/table_spec.rb +90 -0
  72. data/spec/field_spec.rb +252 -0
  73. data/spec/schema_spec.rb +211 -0
  74. data/spec/spec_helper.rb +17 -0
  75. data/spec/validator_spec.rb +619 -0
  76. data/wjordan213_csvlint.gemspec +46 -0
  77. metadata +490 -0
@@ -0,0 +1,204 @@
1
+ module Csvlint
2
+ module Csvw
3
+ class Table
4
+
5
+ include Csvlint::ErrorCollector
6
+
7
+ attr_reader :columns, :dialect, :table_direction, :foreign_keys, :foreign_key_references, :id, :notes, :primary_key, :schema, :suppress_output, :transformations, :url, :annotations
8
+
9
+ def initialize(url, columns: [], dialect: {}, table_direction: :auto, foreign_keys: [], id: nil, notes: [], primary_key: nil, schema: nil, suppress_output: false, transformations: [], annotations: [], warnings: [])
10
+ @url = url
11
+ @columns = columns
12
+ @dialect = dialect
13
+ @table_direction = table_direction
14
+ @foreign_keys = foreign_keys
15
+ @foreign_key_values = {}
16
+ @foreign_key_references = []
17
+ @foreign_key_reference_values = {}
18
+ @id = id
19
+ @notes = notes
20
+ @primary_key = primary_key
21
+ @primary_key_values = {}
22
+ @schema = schema
23
+ @suppress_output = suppress_output
24
+ @transformations = transformations
25
+ @annotations = annotations
26
+ reset
27
+ @warnings += warnings
28
+ @errors += columns.map{|c| c.errors}.flatten
29
+ @warnings += columns.map{|c| c.warnings}.flatten
30
+ end
31
+
32
+ def validate_header(headers)
33
+ reset
34
+ headers.each_with_index do |header,i|
35
+ if columns[i]
36
+ columns[i].validate_header(header)
37
+ @errors += columns[i].errors
38
+ @warnings += columns[i].warnings
39
+ else
40
+ build_errors(:malformed_header, :schema, 1, nil, header, nil)
41
+ end
42
+ end unless columns.empty?
43
+ return valid?
44
+ end
45
+
46
+ def validate_row(values, row=nil)
47
+ reset
48
+ values.each_with_index do |value,i|
49
+ column = columns[i]
50
+ if column
51
+ column.validate(value, row)
52
+ @errors += column.errors
53
+ @warnings += column.warnings
54
+ else
55
+ build_errors(:too_many_values, :schema, row, nil, value, nil)
56
+ end
57
+ end unless columns.empty?
58
+ unless @primary_key.nil?
59
+ key = @primary_key.map { |column| column.parse(values[column.number - 1], row) }
60
+ build_errors(:duplicate_key, :schema, row, nil, key.join(","), @primary_key_values[key]) if @primary_key_values.include?(key)
61
+ @primary_key_values[key] = row
62
+ end
63
+ # build a record of the unique values that are referenced by foreign keys from other tables
64
+ # so that later we can check whether those foreign keys reference these values
65
+ @foreign_key_references.each do |foreign_key|
66
+ referenced_columns = foreign_key["referenced_columns"]
67
+ key = referenced_columns.map{ |column| column.parse(values[column.number - 1], row) }
68
+ known_values = @foreign_key_reference_values[foreign_key] = @foreign_key_reference_values[foreign_key] || {}
69
+ known_values[key] = known_values[key] || []
70
+ known_values[key] << row
71
+ end
72
+ # build a record of the references from this row to other tables
73
+ # we can't check yet whether these exist in the other tables because
74
+ # we might not have parsed those other tables
75
+ @foreign_keys.each do |foreign_key|
76
+ referencing_columns = foreign_key["referencing_columns"]
77
+ key = referencing_columns.map{ |column| column.parse(values[column.number - 1], row) }
78
+ known_values = @foreign_key_values[foreign_key] = @foreign_key_values[foreign_key] || []
79
+ known_values << key unless known_values.include?(key)
80
+ end
81
+ return valid?
82
+ end
83
+
84
+ def validate_foreign_keys
85
+ reset
86
+ @foreign_keys.each do |foreign_key|
87
+ local = @foreign_key_values[foreign_key]
88
+ remote_table = foreign_key["referenced_table"]
89
+ remote_table.validate_foreign_key_references(foreign_key, @url, local)
90
+ @errors += remote_table.errors unless remote_table == self
91
+ @warnings += remote_table.warnings unless remote_table == self
92
+ end
93
+ return valid?
94
+ end
95
+
96
+ def validate_foreign_key_references(foreign_key, remote_url, remote)
97
+ reset
98
+ local = @foreign_key_reference_values[foreign_key]
99
+ context = { "from" => { "url" => remote_url.to_s.split("/")[-1], "columns" => foreign_key["columnReference"] }, "to" => { "url" => @url.to_s.split("/")[-1], "columns" => foreign_key["reference"]["columnReference"] }}
100
+ remote.each do |r|
101
+ if local[r]
102
+ build_errors(:multiple_matched_rows, :schema, nil, nil, r, context) if local[r].length > 1
103
+ else
104
+ build_errors(:unmatched_foreign_key_reference, :schema, nil, nil, r, context)
105
+ end
106
+ end
107
+ return valid?
108
+ end
109
+
110
+ def self.from_json(table_desc, base_url=nil, lang="und", inherited_properties={})
111
+ annotations = {}
112
+ warnings = []
113
+ table_properties = {}
114
+ columns = []
115
+ notes = []
116
+ inherited_properties = inherited_properties.clone
117
+
118
+ table_desc.each do |property,value|
119
+ if property =="@type"
120
+ raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].@type"), "@type of table is not 'Table'" unless value == 'Table'
121
+ elsif property == "notes"
122
+ notes = value
123
+ else
124
+ v, warning, type = Csvw::PropertyChecker.check_property(property, value, base_url, lang)
125
+ warnings += Array(warning).map{ |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "#{property}: #{value}", nil) } unless warning.nil? || warning.empty?
126
+ if type == :annotation
127
+ annotations[property] = v
128
+ elsif type == :table || type == :common
129
+ table_properties[property] = v
130
+ elsif type == :column
131
+ warnings << Csvlint::ErrorMessage.new(:invalid_property, :metadata, nil, nil, "#{property}", nil)
132
+ else
133
+ inherited_properties[property] = v
134
+ end
135
+ end
136
+ end
137
+
138
+ table_schema = table_properties["tableSchema"] || inherited_properties["tableSchema"]
139
+ column_names = []
140
+ foreign_keys = []
141
+ primary_key = nil
142
+ if table_schema
143
+ raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].tableSchema.columns"), "schema columns is not an array" unless table_schema["columns"].instance_of? Array
144
+ virtual_columns = false
145
+ table_schema["columns"].each_with_index do |column_desc,i|
146
+ if column_desc.instance_of? Hash
147
+ column = Csvlint::Csvw::Column.from_json(i+1, column_desc, base_url, lang, inherited_properties)
148
+ raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].tableSchema.columns[#{i}].virtual"), "virtual columns before non-virtual column #{column.name || i}" if virtual_columns && !column.virtual
149
+ virtual_columns = virtual_columns || column.virtual
150
+ raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].tableSchema.columns"), "multiple columns named #{column.name}" if column_names.include? column.name
151
+ column_names << column.name unless column.name.nil?
152
+ columns << column
153
+ else
154
+ warnings << Csvlint::ErrorMessage.new(:invalid_column_description, :metadata, nil, nil, "#{column_desc}", nil)
155
+ end
156
+ end
157
+
158
+ primary_key = table_schema["primaryKey"]
159
+ primary_key_columns = []
160
+ primary_key_valid = true
161
+ primary_key.each do |reference|
162
+ i = column_names.index(reference)
163
+ if i
164
+ primary_key_columns << columns[i]
165
+ else
166
+ warnings << Csvlint::ErrorMessage.new(:invalid_column_reference, :metadata, nil, nil, "primaryKey: #{reference}", nil)
167
+ primary_key_valid = false
168
+ end
169
+ end if primary_key
170
+
171
+ foreign_keys = table_schema["foreignKeys"]
172
+ foreign_keys.each_with_index do |foreign_key, i|
173
+ foreign_key_columns = []
174
+ foreign_key["columnReference"].each do |reference|
175
+ i = column_names.index(reference)
176
+ raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].tableSchema.foreignKeys[#{i}].columnReference"), "foreignKey references non-existant column" unless i
177
+ foreign_key_columns << columns[i]
178
+ end
179
+ foreign_key["referencing_columns"] = foreign_key_columns
180
+ end if foreign_keys
181
+
182
+ row_titles = table_schema["rowTitles"]
183
+ row_titles.each_with_index do |row_title,i|
184
+ raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].tableSchema.rowTitles[#{i}]"), "rowTitles references non-existant column" unless column_names.include? row_title
185
+ end if row_titles
186
+
187
+ end
188
+
189
+ return self.new(table_properties["url"],
190
+ id: table_properties["@id"],
191
+ columns: columns,
192
+ dialect: table_properties["dialect"],
193
+ foreign_keys: foreign_keys || [],
194
+ notes: notes,
195
+ primary_key: primary_key_valid && !primary_key_columns.empty? ? primary_key_columns : nil,
196
+ schema: table_schema ? table_schema["@id"] : nil,
197
+ annotations: annotations,
198
+ warnings: warnings
199
+ )
200
+ end
201
+
202
+ end
203
+ end
204
+ end
@@ -0,0 +1,165 @@
1
+ module Csvlint
2
+ module Csvw
3
+ class TableGroup
4
+
5
+ include Csvlint::ErrorCollector
6
+
7
+ attr_reader :url, :id, :tables, :notes, :annotations
8
+
9
+ def initialize(url, id: nil, tables: {}, notes: [], annotations: {}, warnings: [])
10
+ @url = url
11
+ @id = id
12
+ @tables = tables
13
+ @notes = notes
14
+ @annotations = annotations
15
+ @validated_tables = {}
16
+ @tables.each { |t,v| @validated_tables[t] = false }
17
+ reset
18
+ @warnings += warnings
19
+ @errors += @tables.map{|url,table| table.errors}.flatten
20
+ @warnings += @tables.map{|url,table| table.warnings}.flatten
21
+ end
22
+
23
+ def validate_header(header, table_url)
24
+ reset
25
+ table_url = "file:#{File.absolute_path(table_url)}" if table_url.instance_of? File
26
+ table = tables[table_url]
27
+ table.validate_header(header)
28
+ @errors += table.errors
29
+ @warnings += table.warnings
30
+ return valid?
31
+ end
32
+
33
+ def validate_row(values, row=nil, all_errors=[], table_url)
34
+ reset
35
+ table_url = "file:#{File.absolute_path(table_url)}" if table_url.instance_of? File
36
+ @validated_tables[table_url] = true
37
+ table = tables[table_url]
38
+ table.validate_row(values, row)
39
+ @errors += table.errors
40
+ @warnings += table.warnings
41
+ return valid?
42
+ end
43
+
44
+ def validate_foreign_keys
45
+ reset
46
+ unless @validated_tables.has_value?(false)
47
+ @tables.each do |table_url,table|
48
+ table.validate_foreign_keys
49
+ @errors += table.errors
50
+ @warnings += table.warnings
51
+ end
52
+ end
53
+ return valid?
54
+ end
55
+
56
+ def self.from_json(url, json)
57
+ warnings = []
58
+ tables = {}
59
+ annotations = {}
60
+ inherited_properties = {}
61
+ common_properties = {}
62
+ base_url = URI(url.to_s.strip)
63
+ lang = "und"
64
+
65
+ context = json["@context"]
66
+ if context.instance_of?(Array) && context[1]
67
+ context[1].each do |property,value|
68
+ v, warning, type = Csvw::PropertyChecker.check_property(property, value, base_url, lang)
69
+ if warning.nil? || warning.empty?
70
+ if type == :context
71
+ base_url = v if property == "@base"
72
+ lang = v if property == "@language"
73
+ else
74
+ raise Csvlint::Csvw::MetadataError.new("$.@context"), "@context contains properties other than @base or @language (#{property})"
75
+ end
76
+ else
77
+ raise Csvlint::Csvw::MetadataError.new("$.@context"), "@context contains properties other than @base or @language (#{property})" unless ["@base", "@language"].include?(property)
78
+ warnings += Array(warning).map{ |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "@context: #{property}: #{value}", nil) }
79
+ end
80
+ end
81
+ end
82
+ json.delete("@context")
83
+
84
+ if json["url"]
85
+ json = { "tables" => [ json ] }
86
+ end unless json["tables"]
87
+
88
+ json.each do |property,value|
89
+ unless VALID_PROPERTIES.include? property
90
+ v, warning, type = Csvw::PropertyChecker.check_property(property, value, base_url, lang)
91
+ warnings += Array(warning).map{ |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "#{property}: #{value}", nil) } unless warning.nil? || warning.empty?
92
+ if type == :annotation
93
+ annotations[property] = v
94
+ elsif type == :common
95
+ common_properties[property] = v
96
+ elsif type == :column
97
+ warnings << Csvlint::ErrorMessage.new(:invalid_property, :metadata, nil, nil, "#{property}", nil)
98
+ else
99
+ inherited_properties[property] = v
100
+ end
101
+ end
102
+ end
103
+
104
+ id = common_properties["@id"]
105
+
106
+ raise Csvlint::Csvw::MetadataError.new("$.@type"), "@type of table group is not 'TableGroup'" if json["@type"] && json["@type"] != 'TableGroup'
107
+
108
+ raise Csvlint::Csvw::MetadataError.new("$"), "no tables property" unless json["tables"]
109
+ raise Csvlint::Csvw::MetadataError.new("$.tables"), "empty tables property" if json["tables"].empty?
110
+ raise Csvlint::Csvw::MetadataError.new("$.tables"), "tables property is not an array" unless json["tables"].instance_of? Array
111
+
112
+ json["tables"].each do |table_desc|
113
+ if table_desc.instance_of? Hash
114
+ table_url = table_desc["url"]
115
+ unless table_url.instance_of? String
116
+ warnings << Csvlint::ErrorMessage.new(:invalid_url, :metadata, nil, nil, "url: #{table_url}", nil)
117
+ table_url = ""
118
+ end
119
+ table_url = URI.join(base_url, table_url).to_s
120
+ table_desc["url"] = table_url
121
+ table = Csvlint::Csvw::Table.from_json(table_desc, base_url, lang, inherited_properties)
122
+ tables[table_url] = table
123
+ else
124
+ warnings << Csvlint::ErrorMessage.new(:invalid_table_description, :metadata, nil, nil, "#{table_desc}", nil)
125
+ end
126
+ end
127
+
128
+ tables.each do |table_url, table|
129
+ table.foreign_keys.each_with_index do |foreign_key,i|
130
+ reference = foreign_key["reference"]
131
+ if reference["resource"]
132
+ resource = URI.join(base_url, reference["resource"]).to_s
133
+ referenced_table = tables[resource]
134
+ raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_url}')].tableSchema.foreign_keys[#{i}].reference.resource"), "foreign key references table that does not exist (#{resource})" if referenced_table.nil?
135
+ else
136
+ schema_url = URI.join(base_url, reference["schemaReference"]).to_s
137
+ referenced_tables = tables.values.select{ |table| table.schema == schema_url }
138
+ referenced_table = referenced_tables[0]
139
+ raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_url}')].tableSchema.foreign_keys[#{i}].reference.schemaReference"), "foreign key references schema that is not used (#{schema_url})" if referenced_table.nil?
140
+ end
141
+ foreign_key["referenced_table"] = referenced_table
142
+ table_columns = {}
143
+ referenced_table.columns.each do |column|
144
+ table_columns[column.name] = column if column.name
145
+ end
146
+ referenced_columns = []
147
+ Array(reference["columnReference"]).each do |column_reference|
148
+ column = table_columns[column_reference]
149
+ raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_url}')].tableSchema.foreign_keys[#{i}].reference.columnReference"), "column named #{column_reference} does not exist in #{resource}" if column.nil?
150
+ referenced_columns << column
151
+ end
152
+ foreign_key["referenced_columns"] = referenced_columns
153
+ referenced_table.foreign_key_references << foreign_key
154
+ end
155
+ end
156
+
157
+ return self.new(base_url, id: id, tables: tables, notes: json["notes"] || [], annotations: annotations, warnings: warnings)
158
+ end
159
+
160
+ private
161
+ VALID_PROPERTIES = ['tables', 'notes', '@type']
162
+
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,27 @@
1
+ module Csvlint
2
+ module ErrorCollector
3
+ attr_reader :errors, :warnings, :info_messages
4
+ # Creates a validation error
5
+ def build_errors(type, category = nil, row = nil, column = nil, content = nil, constraints = {})
6
+ @errors << Csvlint::ErrorMessage.new(type, category, row, column, content, constraints)
7
+ end
8
+ # Creates a validation warning
9
+ def build_warnings(type, category = nil, row = nil, column = nil, content = nil, constraints = {})
10
+ @warnings << Csvlint::ErrorMessage.new(type, category, row, column, content, constraints)
11
+ end
12
+ # Creates a validation information message
13
+ def build_info_messages(type, category = nil, row = nil, column = nil, content = nil, constraints = {})
14
+ @info_messages << Csvlint::ErrorMessage.new(type, category, row, column, content, constraints)
15
+ end
16
+
17
+ def valid?
18
+ errors.empty?
19
+ end
20
+
21
+ def reset
22
+ @errors = []
23
+ @warnings = []
24
+ @info_messages = []
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,15 @@
1
+ module Csvlint
2
+ class ErrorMessage
3
+ attr_reader :type, :category, :row, :column, :content, :constraints
4
+
5
+ def initialize(type, category, row, column, content, constraints)
6
+ @type = type
7
+ @category = category
8
+ @row = row
9
+ @column = column
10
+ @content = content
11
+ @constraints = constraints
12
+
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,196 @@
1
+ module Csvlint
2
+
3
+ class Field
4
+ include Csvlint::ErrorCollector
5
+
6
+ attr_reader :name, :constraints, :title, :description
7
+
8
+ def initialize(name, constraints={}, title=nil, description=nil)
9
+ @name = name
10
+ @constraints = constraints || {}
11
+ @uniques = Set.new
12
+ @title = title
13
+ @description = description
14
+ reset
15
+ end
16
+
17
+ def validate_column(value, row=nil, column=nil, all_errors=[])
18
+ reset
19
+ unless all_errors.any?{|error| ((error.type == :invalid_regex) && (error.column == column))}
20
+ validate_regex(value, row, column, all_errors)
21
+ end
22
+ validate_length(value, row, column)
23
+ validate_values(value, row, column)
24
+ parsed = validate_type(value, row, column)
25
+ validate_range(parsed, row, column) if parsed != nil
26
+ return valid?
27
+ end
28
+
29
+ private
30
+ def validate_length(value, row, column)
31
+ if constraints["required"] == true
32
+ build_errors(:missing_value, :schema, row, column, value,
33
+ { "required" => true }) if value.nil? || value.length == 0
34
+ end
35
+ if constraints["minLength"]
36
+ build_errors(:min_length, :schema, row, column, value,
37
+ { "minLength" => constraints["minLength"] }) if value.nil? || value.length < constraints["minLength"]
38
+ end
39
+ if constraints["maxLength"]
40
+ build_errors(:max_length, :schema, row, column, value,
41
+ { "maxLength" => constraints["maxLength"] } ) if !value.nil? && value.length > constraints["maxLength"]
42
+ end
43
+ end
44
+
45
+ def validate_regex(value, row, column, all_errors)
46
+ pattern = constraints["pattern"]
47
+ if pattern
48
+ begin
49
+ Regexp.new(pattern)
50
+ build_errors(:pattern, :schema, row, column, value,
51
+ { "pattern" => constraints["pattern"] } ) if !value.nil? && !value.match( constraints["pattern"] )
52
+ rescue RegexpError
53
+ build_regex_error(value, row, column, pattern, all_errors)
54
+ end
55
+ end
56
+ end
57
+
58
+ def build_regex_error(value, row, column, pattern, all_errors)
59
+ return if @regex_error_exists
60
+ build_errors(:invalid_regex, :schema, nil, column, ("#{name}: Constraints: Pattern: #{pattern}"),
61
+ { "pattern" => constraints["pattern"] })
62
+ @regex_error_exists = true
63
+ end
64
+
65
+ def validate_values(value, row, column)
66
+ # If a pattern exists, raise an invalid regex error if it is not in
67
+ # valid regex form, else, if the value of the relevant field in the csv
68
+ # does not match the given regex pattern in the schema, raise a
69
+ # pattern error.
70
+ if constraints["unique"] == true
71
+ if @uniques.include? value
72
+ build_errors(:unique, :schema, row, column, value, { "unique" => true })
73
+ else
74
+ @uniques << value
75
+ end
76
+ end
77
+ end
78
+
79
+ def validate_type(value, row, column)
80
+ if constraints["type"] && value != ""
81
+ parsed = convert_to_type(value)
82
+ if parsed == nil
83
+ failed = { "type" => constraints["type"] }
84
+ failed["datePattern"] = constraints["datePattern"] if constraints["datePattern"]
85
+ build_errors(:invalid_type, :schema, row, column, value, failed)
86
+ return nil
87
+ end
88
+ return parsed
89
+ end
90
+ return nil
91
+ end
92
+
93
+ def validate_range(value, row, column)
94
+ #TODO: we're ignoring issues with converting ranges to actual types, maybe we
95
+ #should generate a warning? The schema is invalid
96
+ if constraints["minimum"]
97
+ minimumValue = convert_to_type( constraints["minimum"] )
98
+ if minimumValue
99
+ build_errors(:below_minimum, :schema, row, column, value,
100
+ { "minimum" => constraints["minimum"] }) unless value >= minimumValue
101
+ end
102
+ end
103
+ if constraints["maximum"]
104
+ maximumValue = convert_to_type( constraints["maximum"] )
105
+ if maximumValue
106
+ build_errors(:above_maximum, :schema, row, column, value,
107
+ { "maximum" => constraints["maximum"] }) unless value <= maximumValue
108
+ end
109
+ end
110
+ end
111
+
112
+ def convert_to_type(value)
113
+ parsed = nil
114
+ tv = TYPE_VALIDATIONS[constraints["type"]]
115
+ if tv
116
+ begin
117
+ parsed = tv.call value, constraints
118
+ rescue ArgumentError
119
+ end
120
+ end
121
+ return parsed
122
+ end
123
+
124
+ TYPE_VALIDATIONS = {
125
+ 'http://www.w3.org/2001/XMLSchema#string' => lambda { |value, constraints| value },
126
+ 'http://www.w3.org/2001/XMLSchema#int' => lambda { |value, constraints| Integer value },
127
+ 'http://www.w3.org/2001/XMLSchema#integer' => lambda { |value, constraints| Integer value },
128
+ 'http://www.w3.org/2001/XMLSchema#float' => lambda { |value, constraints| Float value },
129
+ 'http://www.w3.org/2001/XMLSchema#double' => lambda { |value, constraints| Float value },
130
+ 'http://www.w3.org/2001/XMLSchema#anyURI' => lambda do |value, constraints|
131
+ begin
132
+ u = URI.parse value
133
+ raise ArgumentError unless u.kind_of?(URI::HTTP) || u.kind_of?(URI::HTTPS)
134
+ rescue URI::InvalidURIError
135
+ raise ArgumentError
136
+ end
137
+ u
138
+ end,
139
+ 'http://www.w3.org/2001/XMLSchema#boolean' => lambda do |value, constraints|
140
+ return true if ['true', '1'].include? value
141
+ return false if ['false', '0'].include? value
142
+ raise ArgumentError
143
+ end,
144
+ 'http://www.w3.org/2001/XMLSchema#nonPositiveInteger' => lambda do |value, constraints|
145
+ i = Integer value
146
+ raise ArgumentError unless i <= 0
147
+ i
148
+ end,
149
+ 'http://www.w3.org/2001/XMLSchema#negativeInteger' => lambda do |value, constraints|
150
+ i = Integer value
151
+ raise ArgumentError unless i < 0
152
+ i
153
+ end,
154
+ 'http://www.w3.org/2001/XMLSchema#nonNegativeInteger' => lambda do |value, constraints|
155
+ i = Integer value
156
+ raise ArgumentError unless i >= 0
157
+ i
158
+ end,
159
+ 'http://www.w3.org/2001/XMLSchema#positiveInteger' => lambda do |value, constraints|
160
+ i = Integer value
161
+ raise ArgumentError unless i > 0
162
+ i
163
+ end,
164
+ 'http://www.w3.org/2001/XMLSchema#dateTime' => lambda do |value, constraints|
165
+ date_pattern = constraints["datePattern"] || "%Y-%m-%dT%H:%M:%SZ"
166
+ d = DateTime.strptime(value, date_pattern)
167
+ raise ArgumentError unless d.strftime(date_pattern) == value
168
+ d
169
+ end,
170
+ 'http://www.w3.org/2001/XMLSchema#date' => lambda do |value, constraints|
171
+ date_pattern = constraints["datePattern"] || "%Y-%m-%d"
172
+ d = Date.strptime(value, date_pattern)
173
+ raise ArgumentError unless d.strftime(date_pattern) == value
174
+ d
175
+ end,
176
+ 'http://www.w3.org/2001/XMLSchema#time' => lambda do |value, constraints|
177
+ date_pattern = constraints["datePattern"] || "%H:%M:%S"
178
+ d = DateTime.strptime(value, date_pattern)
179
+ raise ArgumentError unless d.strftime(date_pattern) == value
180
+ d
181
+ end,
182
+ 'http://www.w3.org/2001/XMLSchema#gYear' => lambda do |value, constraints|
183
+ date_pattern = constraints["datePattern"] || "%Y"
184
+ d = Date.strptime(value, date_pattern)
185
+ raise ArgumentError unless d.strftime(date_pattern) == value
186
+ d
187
+ end,
188
+ 'http://www.w3.org/2001/XMLSchema#gYearMonth' => lambda do |value, constraints|
189
+ date_pattern = constraints["datePattern"] || "%Y-%m"
190
+ d = Date.strptime(value, date_pattern)
191
+ raise ArgumentError unless d.strftime(date_pattern) == value
192
+ d
193
+ end,
194
+ }
195
+ end
196
+ end