wjordan213-csvlint 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +1 -0
  3. data/.gitattributes +2 -0
  4. data/.gitignore +28 -0
  5. data/.ruby-version +1 -0
  6. data/.travis.yml +32 -0
  7. data/CHANGELOG.md +361 -0
  8. data/Gemfile +7 -0
  9. data/LICENSE.md +22 -0
  10. data/README.md +328 -0
  11. data/Rakefile +17 -0
  12. data/bin/create_schema +32 -0
  13. data/bin/csvlint +10 -0
  14. data/features/check_format.feature +46 -0
  15. data/features/cli.feature +210 -0
  16. data/features/csv_options.feature +35 -0
  17. data/features/csvupload.feature +145 -0
  18. data/features/csvw_schema_validation.feature +127 -0
  19. data/features/fixtures/cr-line-endings.csv +0 -0
  20. data/features/fixtures/crlf-line-endings.csv +0 -0
  21. data/features/fixtures/inconsistent-line-endings-unquoted.csv +0 -0
  22. data/features/fixtures/inconsistent-line-endings.csv +0 -0
  23. data/features/fixtures/invalid-byte-sequence.csv +0 -0
  24. data/features/fixtures/invalid_many_rows.csv +0 -0
  25. data/features/fixtures/lf-line-endings.csv +0 -0
  26. data/features/fixtures/spreadsheet.xls +0 -0
  27. data/features/fixtures/spreadsheet.xlsx +0 -0
  28. data/features/fixtures/title-row.csv +0 -0
  29. data/features/fixtures/valid.csv +0 -0
  30. data/features/fixtures/valid_many_rows.csv +0 -0
  31. data/features/fixtures/windows-line-endings.csv +0 -0
  32. data/features/information.feature +22 -0
  33. data/features/parse_csv.feature +90 -0
  34. data/features/schema_validation.feature +105 -0
  35. data/features/sources.feature +17 -0
  36. data/features/step_definitions/cli_steps.rb +11 -0
  37. data/features/step_definitions/csv_options_steps.rb +24 -0
  38. data/features/step_definitions/information_steps.rb +13 -0
  39. data/features/step_definitions/parse_csv_steps.rb +42 -0
  40. data/features/step_definitions/schema_validation_steps.rb +33 -0
  41. data/features/step_definitions/sources_steps.rb +7 -0
  42. data/features/step_definitions/validation_errors_steps.rb +90 -0
  43. data/features/step_definitions/validation_info_steps.rb +22 -0
  44. data/features/step_definitions/validation_warnings_steps.rb +60 -0
  45. data/features/support/aruba.rb +56 -0
  46. data/features/support/env.rb +26 -0
  47. data/features/support/load_tests.rb +114 -0
  48. data/features/support/webmock.rb +1 -0
  49. data/features/validation_errors.feature +147 -0
  50. data/features/validation_info.feature +16 -0
  51. data/features/validation_warnings.feature +86 -0
  52. data/lib/csvlint.rb +27 -0
  53. data/lib/csvlint/cli.rb +165 -0
  54. data/lib/csvlint/csvw/column.rb +359 -0
  55. data/lib/csvlint/csvw/date_format.rb +182 -0
  56. data/lib/csvlint/csvw/metadata_error.rb +13 -0
  57. data/lib/csvlint/csvw/number_format.rb +211 -0
  58. data/lib/csvlint/csvw/property_checker.rb +761 -0
  59. data/lib/csvlint/csvw/table.rb +204 -0
  60. data/lib/csvlint/csvw/table_group.rb +165 -0
  61. data/lib/csvlint/error_collector.rb +27 -0
  62. data/lib/csvlint/error_message.rb +15 -0
  63. data/lib/csvlint/field.rb +196 -0
  64. data/lib/csvlint/schema.rb +92 -0
  65. data/lib/csvlint/validate.rb +599 -0
  66. data/lib/csvlint/version.rb +3 -0
  67. data/spec/csvw/column_spec.rb +112 -0
  68. data/spec/csvw/date_format_spec.rb +49 -0
  69. data/spec/csvw/number_format_spec.rb +417 -0
  70. data/spec/csvw/table_group_spec.rb +143 -0
  71. data/spec/csvw/table_spec.rb +90 -0
  72. data/spec/field_spec.rb +252 -0
  73. data/spec/schema_spec.rb +211 -0
  74. data/spec/spec_helper.rb +17 -0
  75. data/spec/validator_spec.rb +619 -0
  76. data/wjordan213_csvlint.gemspec +46 -0
  77. metadata +490 -0
@@ -0,0 +1,204 @@
1
+ module Csvlint
2
+ module Csvw
3
+ class Table
4
+
5
+ include Csvlint::ErrorCollector
6
+
7
+ attr_reader :columns, :dialect, :table_direction, :foreign_keys, :foreign_key_references, :id, :notes, :primary_key, :schema, :suppress_output, :transformations, :url, :annotations
8
+
9
+ def initialize(url, columns: [], dialect: {}, table_direction: :auto, foreign_keys: [], id: nil, notes: [], primary_key: nil, schema: nil, suppress_output: false, transformations: [], annotations: [], warnings: [])
10
+ @url = url
11
+ @columns = columns
12
+ @dialect = dialect
13
+ @table_direction = table_direction
14
+ @foreign_keys = foreign_keys
15
+ @foreign_key_values = {}
16
+ @foreign_key_references = []
17
+ @foreign_key_reference_values = {}
18
+ @id = id
19
+ @notes = notes
20
+ @primary_key = primary_key
21
+ @primary_key_values = {}
22
+ @schema = schema
23
+ @suppress_output = suppress_output
24
+ @transformations = transformations
25
+ @annotations = annotations
26
+ reset
27
+ @warnings += warnings
28
+ @errors += columns.map{|c| c.errors}.flatten
29
+ @warnings += columns.map{|c| c.warnings}.flatten
30
+ end
31
+
32
+ def validate_header(headers)
33
+ reset
34
+ headers.each_with_index do |header,i|
35
+ if columns[i]
36
+ columns[i].validate_header(header)
37
+ @errors += columns[i].errors
38
+ @warnings += columns[i].warnings
39
+ else
40
+ build_errors(:malformed_header, :schema, 1, nil, header, nil)
41
+ end
42
+ end unless columns.empty?
43
+ return valid?
44
+ end
45
+
46
+ def validate_row(values, row=nil)
47
+ reset
48
+ values.each_with_index do |value,i|
49
+ column = columns[i]
50
+ if column
51
+ column.validate(value, row)
52
+ @errors += column.errors
53
+ @warnings += column.warnings
54
+ else
55
+ build_errors(:too_many_values, :schema, row, nil, value, nil)
56
+ end
57
+ end unless columns.empty?
58
+ unless @primary_key.nil?
59
+ key = @primary_key.map { |column| column.parse(values[column.number - 1], row) }
60
+ build_errors(:duplicate_key, :schema, row, nil, key.join(","), @primary_key_values[key]) if @primary_key_values.include?(key)
61
+ @primary_key_values[key] = row
62
+ end
63
+ # build a record of the unique values that are referenced by foreign keys from other tables
64
+ # so that later we can check whether those foreign keys reference these values
65
+ @foreign_key_references.each do |foreign_key|
66
+ referenced_columns = foreign_key["referenced_columns"]
67
+ key = referenced_columns.map{ |column| column.parse(values[column.number - 1], row) }
68
+ known_values = @foreign_key_reference_values[foreign_key] = @foreign_key_reference_values[foreign_key] || {}
69
+ known_values[key] = known_values[key] || []
70
+ known_values[key] << row
71
+ end
72
+ # build a record of the references from this row to other tables
73
+ # we can't check yet whether these exist in the other tables because
74
+ # we might not have parsed those other tables
75
+ @foreign_keys.each do |foreign_key|
76
+ referencing_columns = foreign_key["referencing_columns"]
77
+ key = referencing_columns.map{ |column| column.parse(values[column.number - 1], row) }
78
+ known_values = @foreign_key_values[foreign_key] = @foreign_key_values[foreign_key] || []
79
+ known_values << key unless known_values.include?(key)
80
+ end
81
+ return valid?
82
+ end
83
+
84
+ def validate_foreign_keys
85
+ reset
86
+ @foreign_keys.each do |foreign_key|
87
+ local = @foreign_key_values[foreign_key]
88
+ remote_table = foreign_key["referenced_table"]
89
+ remote_table.validate_foreign_key_references(foreign_key, @url, local)
90
+ @errors += remote_table.errors unless remote_table == self
91
+ @warnings += remote_table.warnings unless remote_table == self
92
+ end
93
+ return valid?
94
+ end
95
+
96
+ def validate_foreign_key_references(foreign_key, remote_url, remote)
97
+ reset
98
+ local = @foreign_key_reference_values[foreign_key]
99
+ context = { "from" => { "url" => remote_url.to_s.split("/")[-1], "columns" => foreign_key["columnReference"] }, "to" => { "url" => @url.to_s.split("/")[-1], "columns" => foreign_key["reference"]["columnReference"] }}
100
+ remote.each do |r|
101
+ if local[r]
102
+ build_errors(:multiple_matched_rows, :schema, nil, nil, r, context) if local[r].length > 1
103
+ else
104
+ build_errors(:unmatched_foreign_key_reference, :schema, nil, nil, r, context)
105
+ end
106
+ end
107
+ return valid?
108
+ end
109
+
110
+ def self.from_json(table_desc, base_url=nil, lang="und", inherited_properties={})
111
+ annotations = {}
112
+ warnings = []
113
+ table_properties = {}
114
+ columns = []
115
+ notes = []
116
+ inherited_properties = inherited_properties.clone
117
+
118
+ table_desc.each do |property,value|
119
+ if property =="@type"
120
+ raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].@type"), "@type of table is not 'Table'" unless value == 'Table'
121
+ elsif property == "notes"
122
+ notes = value
123
+ else
124
+ v, warning, type = Csvw::PropertyChecker.check_property(property, value, base_url, lang)
125
+ warnings += Array(warning).map{ |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "#{property}: #{value}", nil) } unless warning.nil? || warning.empty?
126
+ if type == :annotation
127
+ annotations[property] = v
128
+ elsif type == :table || type == :common
129
+ table_properties[property] = v
130
+ elsif type == :column
131
+ warnings << Csvlint::ErrorMessage.new(:invalid_property, :metadata, nil, nil, "#{property}", nil)
132
+ else
133
+ inherited_properties[property] = v
134
+ end
135
+ end
136
+ end
137
+
138
+ table_schema = table_properties["tableSchema"] || inherited_properties["tableSchema"]
139
+ column_names = []
140
+ foreign_keys = []
141
+ primary_key = nil
142
+ if table_schema
143
+ raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].tableSchema.columns"), "schema columns is not an array" unless table_schema["columns"].instance_of? Array
144
+ virtual_columns = false
145
+ table_schema["columns"].each_with_index do |column_desc,i|
146
+ if column_desc.instance_of? Hash
147
+ column = Csvlint::Csvw::Column.from_json(i+1, column_desc, base_url, lang, inherited_properties)
148
+ raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].tableSchema.columns[#{i}].virtual"), "virtual columns before non-virtual column #{column.name || i}" if virtual_columns && !column.virtual
149
+ virtual_columns = virtual_columns || column.virtual
150
+ raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].tableSchema.columns"), "multiple columns named #{column.name}" if column_names.include? column.name
151
+ column_names << column.name unless column.name.nil?
152
+ columns << column
153
+ else
154
+ warnings << Csvlint::ErrorMessage.new(:invalid_column_description, :metadata, nil, nil, "#{column_desc}", nil)
155
+ end
156
+ end
157
+
158
+ primary_key = table_schema["primaryKey"]
159
+ primary_key_columns = []
160
+ primary_key_valid = true
161
+ primary_key.each do |reference|
162
+ i = column_names.index(reference)
163
+ if i
164
+ primary_key_columns << columns[i]
165
+ else
166
+ warnings << Csvlint::ErrorMessage.new(:invalid_column_reference, :metadata, nil, nil, "primaryKey: #{reference}", nil)
167
+ primary_key_valid = false
168
+ end
169
+ end if primary_key
170
+
171
+ foreign_keys = table_schema["foreignKeys"]
172
+ foreign_keys.each_with_index do |foreign_key, i|
173
+ foreign_key_columns = []
174
+ foreign_key["columnReference"].each do |reference|
175
+ i = column_names.index(reference)
176
+ raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].tableSchema.foreignKeys[#{i}].columnReference"), "foreignKey references non-existant column" unless i
177
+ foreign_key_columns << columns[i]
178
+ end
179
+ foreign_key["referencing_columns"] = foreign_key_columns
180
+ end if foreign_keys
181
+
182
+ row_titles = table_schema["rowTitles"]
183
+ row_titles.each_with_index do |row_title,i|
184
+ raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].tableSchema.rowTitles[#{i}]"), "rowTitles references non-existant column" unless column_names.include? row_title
185
+ end if row_titles
186
+
187
+ end
188
+
189
+ return self.new(table_properties["url"],
190
+ id: table_properties["@id"],
191
+ columns: columns,
192
+ dialect: table_properties["dialect"],
193
+ foreign_keys: foreign_keys || [],
194
+ notes: notes,
195
+ primary_key: primary_key_valid && !primary_key_columns.empty? ? primary_key_columns : nil,
196
+ schema: table_schema ? table_schema["@id"] : nil,
197
+ annotations: annotations,
198
+ warnings: warnings
199
+ )
200
+ end
201
+
202
+ end
203
+ end
204
+ end
@@ -0,0 +1,165 @@
1
+ module Csvlint
2
+ module Csvw
3
+ class TableGroup
4
+
5
+ include Csvlint::ErrorCollector
6
+
7
+ attr_reader :url, :id, :tables, :notes, :annotations
8
+
9
+ def initialize(url, id: nil, tables: {}, notes: [], annotations: {}, warnings: [])
10
+ @url = url
11
+ @id = id
12
+ @tables = tables
13
+ @notes = notes
14
+ @annotations = annotations
15
+ @validated_tables = {}
16
+ @tables.each { |t,v| @validated_tables[t] = false }
17
+ reset
18
+ @warnings += warnings
19
+ @errors += @tables.map{|url,table| table.errors}.flatten
20
+ @warnings += @tables.map{|url,table| table.warnings}.flatten
21
+ end
22
+
23
+ def validate_header(header, table_url)
24
+ reset
25
+ table_url = "file:#{File.absolute_path(table_url)}" if table_url.instance_of? File
26
+ table = tables[table_url]
27
+ table.validate_header(header)
28
+ @errors += table.errors
29
+ @warnings += table.warnings
30
+ return valid?
31
+ end
32
+
33
+ def validate_row(values, row=nil, all_errors=[], table_url)
34
+ reset
35
+ table_url = "file:#{File.absolute_path(table_url)}" if table_url.instance_of? File
36
+ @validated_tables[table_url] = true
37
+ table = tables[table_url]
38
+ table.validate_row(values, row)
39
+ @errors += table.errors
40
+ @warnings += table.warnings
41
+ return valid?
42
+ end
43
+
44
+ def validate_foreign_keys
45
+ reset
46
+ unless @validated_tables.has_value?(false)
47
+ @tables.each do |table_url,table|
48
+ table.validate_foreign_keys
49
+ @errors += table.errors
50
+ @warnings += table.warnings
51
+ end
52
+ end
53
+ return valid?
54
+ end
55
+
56
+ def self.from_json(url, json)
57
+ warnings = []
58
+ tables = {}
59
+ annotations = {}
60
+ inherited_properties = {}
61
+ common_properties = {}
62
+ base_url = URI(url.to_s.strip)
63
+ lang = "und"
64
+
65
+ context = json["@context"]
66
+ if context.instance_of?(Array) && context[1]
67
+ context[1].each do |property,value|
68
+ v, warning, type = Csvw::PropertyChecker.check_property(property, value, base_url, lang)
69
+ if warning.nil? || warning.empty?
70
+ if type == :context
71
+ base_url = v if property == "@base"
72
+ lang = v if property == "@language"
73
+ else
74
+ raise Csvlint::Csvw::MetadataError.new("$.@context"), "@context contains properties other than @base or @language (#{property})"
75
+ end
76
+ else
77
+ raise Csvlint::Csvw::MetadataError.new("$.@context"), "@context contains properties other than @base or @language (#{property})" unless ["@base", "@language"].include?(property)
78
+ warnings += Array(warning).map{ |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "@context: #{property}: #{value}", nil) }
79
+ end
80
+ end
81
+ end
82
+ json.delete("@context")
83
+
84
+ if json["url"]
85
+ json = { "tables" => [ json ] }
86
+ end unless json["tables"]
87
+
88
+ json.each do |property,value|
89
+ unless VALID_PROPERTIES.include? property
90
+ v, warning, type = Csvw::PropertyChecker.check_property(property, value, base_url, lang)
91
+ warnings += Array(warning).map{ |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "#{property}: #{value}", nil) } unless warning.nil? || warning.empty?
92
+ if type == :annotation
93
+ annotations[property] = v
94
+ elsif type == :common
95
+ common_properties[property] = v
96
+ elsif type == :column
97
+ warnings << Csvlint::ErrorMessage.new(:invalid_property, :metadata, nil, nil, "#{property}", nil)
98
+ else
99
+ inherited_properties[property] = v
100
+ end
101
+ end
102
+ end
103
+
104
+ id = common_properties["@id"]
105
+
106
+ raise Csvlint::Csvw::MetadataError.new("$.@type"), "@type of table group is not 'TableGroup'" if json["@type"] && json["@type"] != 'TableGroup'
107
+
108
+ raise Csvlint::Csvw::MetadataError.new("$"), "no tables property" unless json["tables"]
109
+ raise Csvlint::Csvw::MetadataError.new("$.tables"), "empty tables property" if json["tables"].empty?
110
+ raise Csvlint::Csvw::MetadataError.new("$.tables"), "tables property is not an array" unless json["tables"].instance_of? Array
111
+
112
+ json["tables"].each do |table_desc|
113
+ if table_desc.instance_of? Hash
114
+ table_url = table_desc["url"]
115
+ unless table_url.instance_of? String
116
+ warnings << Csvlint::ErrorMessage.new(:invalid_url, :metadata, nil, nil, "url: #{table_url}", nil)
117
+ table_url = ""
118
+ end
119
+ table_url = URI.join(base_url, table_url).to_s
120
+ table_desc["url"] = table_url
121
+ table = Csvlint::Csvw::Table.from_json(table_desc, base_url, lang, inherited_properties)
122
+ tables[table_url] = table
123
+ else
124
+ warnings << Csvlint::ErrorMessage.new(:invalid_table_description, :metadata, nil, nil, "#{table_desc}", nil)
125
+ end
126
+ end
127
+
128
+ tables.each do |table_url, table|
129
+ table.foreign_keys.each_with_index do |foreign_key,i|
130
+ reference = foreign_key["reference"]
131
+ if reference["resource"]
132
+ resource = URI.join(base_url, reference["resource"]).to_s
133
+ referenced_table = tables[resource]
134
+ raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_url}')].tableSchema.foreign_keys[#{i}].reference.resource"), "foreign key references table that does not exist (#{resource})" if referenced_table.nil?
135
+ else
136
+ schema_url = URI.join(base_url, reference["schemaReference"]).to_s
137
+ referenced_tables = tables.values.select{ |table| table.schema == schema_url }
138
+ referenced_table = referenced_tables[0]
139
+ raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_url}')].tableSchema.foreign_keys[#{i}].reference.schemaReference"), "foreign key references schema that is not used (#{schema_url})" if referenced_table.nil?
140
+ end
141
+ foreign_key["referenced_table"] = referenced_table
142
+ table_columns = {}
143
+ referenced_table.columns.each do |column|
144
+ table_columns[column.name] = column if column.name
145
+ end
146
+ referenced_columns = []
147
+ Array(reference["columnReference"]).each do |column_reference|
148
+ column = table_columns[column_reference]
149
+ raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_url}')].tableSchema.foreign_keys[#{i}].reference.columnReference"), "column named #{column_reference} does not exist in #{resource}" if column.nil?
150
+ referenced_columns << column
151
+ end
152
+ foreign_key["referenced_columns"] = referenced_columns
153
+ referenced_table.foreign_key_references << foreign_key
154
+ end
155
+ end
156
+
157
+ return self.new(base_url, id: id, tables: tables, notes: json["notes"] || [], annotations: annotations, warnings: warnings)
158
+ end
159
+
160
+ private
161
+ VALID_PROPERTIES = ['tables', 'notes', '@type']
162
+
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,27 @@
1
+ module Csvlint
2
+ module ErrorCollector
3
+ attr_reader :errors, :warnings, :info_messages
4
+ # Creates a validation error
5
+ def build_errors(type, category = nil, row = nil, column = nil, content = nil, constraints = {})
6
+ @errors << Csvlint::ErrorMessage.new(type, category, row, column, content, constraints)
7
+ end
8
+ # Creates a validation warning
9
+ def build_warnings(type, category = nil, row = nil, column = nil, content = nil, constraints = {})
10
+ @warnings << Csvlint::ErrorMessage.new(type, category, row, column, content, constraints)
11
+ end
12
+ # Creates a validation information message
13
+ def build_info_messages(type, category = nil, row = nil, column = nil, content = nil, constraints = {})
14
+ @info_messages << Csvlint::ErrorMessage.new(type, category, row, column, content, constraints)
15
+ end
16
+
17
+ def valid?
18
+ errors.empty?
19
+ end
20
+
21
+ def reset
22
+ @errors = []
23
+ @warnings = []
24
+ @info_messages = []
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,15 @@
1
+ module Csvlint
2
+ class ErrorMessage
3
+ attr_reader :type, :category, :row, :column, :content, :constraints
4
+
5
+ def initialize(type, category, row, column, content, constraints)
6
+ @type = type
7
+ @category = category
8
+ @row = row
9
+ @column = column
10
+ @content = content
11
+ @constraints = constraints
12
+
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,196 @@
1
+ module Csvlint
2
+
3
+ class Field
4
+ include Csvlint::ErrorCollector
5
+
6
+ attr_reader :name, :constraints, :title, :description
7
+
8
+ def initialize(name, constraints={}, title=nil, description=nil)
9
+ @name = name
10
+ @constraints = constraints || {}
11
+ @uniques = Set.new
12
+ @title = title
13
+ @description = description
14
+ reset
15
+ end
16
+
17
+ def validate_column(value, row=nil, column=nil, all_errors=[])
18
+ reset
19
+ unless all_errors.any?{|error| ((error.type == :invalid_regex) && (error.column == column))}
20
+ validate_regex(value, row, column, all_errors)
21
+ end
22
+ validate_length(value, row, column)
23
+ validate_values(value, row, column)
24
+ parsed = validate_type(value, row, column)
25
+ validate_range(parsed, row, column) if parsed != nil
26
+ return valid?
27
+ end
28
+
29
+ private
30
+ def validate_length(value, row, column)
31
+ if constraints["required"] == true
32
+ build_errors(:missing_value, :schema, row, column, value,
33
+ { "required" => true }) if value.nil? || value.length == 0
34
+ end
35
+ if constraints["minLength"]
36
+ build_errors(:min_length, :schema, row, column, value,
37
+ { "minLength" => constraints["minLength"] }) if value.nil? || value.length < constraints["minLength"]
38
+ end
39
+ if constraints["maxLength"]
40
+ build_errors(:max_length, :schema, row, column, value,
41
+ { "maxLength" => constraints["maxLength"] } ) if !value.nil? && value.length > constraints["maxLength"]
42
+ end
43
+ end
44
+
45
+ def validate_regex(value, row, column, all_errors)
46
+ pattern = constraints["pattern"]
47
+ if pattern
48
+ begin
49
+ Regexp.new(pattern)
50
+ build_errors(:pattern, :schema, row, column, value,
51
+ { "pattern" => constraints["pattern"] } ) if !value.nil? && !value.match( constraints["pattern"] )
52
+ rescue RegexpError
53
+ build_regex_error(value, row, column, pattern, all_errors)
54
+ end
55
+ end
56
+ end
57
+
58
+ def build_regex_error(value, row, column, pattern, all_errors)
59
+ return if @regex_error_exists
60
+ build_errors(:invalid_regex, :schema, nil, column, ("#{name}: Constraints: Pattern: #{pattern}"),
61
+ { "pattern" => constraints["pattern"] })
62
+ @regex_error_exists = true
63
+ end
64
+
65
+ def validate_values(value, row, column)
66
+ # If a pattern exists, raise an invalid regex error if it is not in
67
+ # valid regex form, else, if the value of the relevant field in the csv
68
+ # does not match the given regex pattern in the schema, raise a
69
+ # pattern error.
70
+ if constraints["unique"] == true
71
+ if @uniques.include? value
72
+ build_errors(:unique, :schema, row, column, value, { "unique" => true })
73
+ else
74
+ @uniques << value
75
+ end
76
+ end
77
+ end
78
+
79
+ def validate_type(value, row, column)
80
+ if constraints["type"] && value != ""
81
+ parsed = convert_to_type(value)
82
+ if parsed == nil
83
+ failed = { "type" => constraints["type"] }
84
+ failed["datePattern"] = constraints["datePattern"] if constraints["datePattern"]
85
+ build_errors(:invalid_type, :schema, row, column, value, failed)
86
+ return nil
87
+ end
88
+ return parsed
89
+ end
90
+ return nil
91
+ end
92
+
93
+ def validate_range(value, row, column)
94
+ #TODO: we're ignoring issues with converting ranges to actual types, maybe we
95
+ #should generate a warning? The schema is invalid
96
+ if constraints["minimum"]
97
+ minimumValue = convert_to_type( constraints["minimum"] )
98
+ if minimumValue
99
+ build_errors(:below_minimum, :schema, row, column, value,
100
+ { "minimum" => constraints["minimum"] }) unless value >= minimumValue
101
+ end
102
+ end
103
+ if constraints["maximum"]
104
+ maximumValue = convert_to_type( constraints["maximum"] )
105
+ if maximumValue
106
+ build_errors(:above_maximum, :schema, row, column, value,
107
+ { "maximum" => constraints["maximum"] }) unless value <= maximumValue
108
+ end
109
+ end
110
+ end
111
+
112
+ def convert_to_type(value)
113
+ parsed = nil
114
+ tv = TYPE_VALIDATIONS[constraints["type"]]
115
+ if tv
116
+ begin
117
+ parsed = tv.call value, constraints
118
+ rescue ArgumentError
119
+ end
120
+ end
121
+ return parsed
122
+ end
123
+
124
+ TYPE_VALIDATIONS = {
125
+ 'http://www.w3.org/2001/XMLSchema#string' => lambda { |value, constraints| value },
126
+ 'http://www.w3.org/2001/XMLSchema#int' => lambda { |value, constraints| Integer value },
127
+ 'http://www.w3.org/2001/XMLSchema#integer' => lambda { |value, constraints| Integer value },
128
+ 'http://www.w3.org/2001/XMLSchema#float' => lambda { |value, constraints| Float value },
129
+ 'http://www.w3.org/2001/XMLSchema#double' => lambda { |value, constraints| Float value },
130
+ 'http://www.w3.org/2001/XMLSchema#anyURI' => lambda do |value, constraints|
131
+ begin
132
+ u = URI.parse value
133
+ raise ArgumentError unless u.kind_of?(URI::HTTP) || u.kind_of?(URI::HTTPS)
134
+ rescue URI::InvalidURIError
135
+ raise ArgumentError
136
+ end
137
+ u
138
+ end,
139
+ 'http://www.w3.org/2001/XMLSchema#boolean' => lambda do |value, constraints|
140
+ return true if ['true', '1'].include? value
141
+ return false if ['false', '0'].include? value
142
+ raise ArgumentError
143
+ end,
144
+ 'http://www.w3.org/2001/XMLSchema#nonPositiveInteger' => lambda do |value, constraints|
145
+ i = Integer value
146
+ raise ArgumentError unless i <= 0
147
+ i
148
+ end,
149
+ 'http://www.w3.org/2001/XMLSchema#negativeInteger' => lambda do |value, constraints|
150
+ i = Integer value
151
+ raise ArgumentError unless i < 0
152
+ i
153
+ end,
154
+ 'http://www.w3.org/2001/XMLSchema#nonNegativeInteger' => lambda do |value, constraints|
155
+ i = Integer value
156
+ raise ArgumentError unless i >= 0
157
+ i
158
+ end,
159
+ 'http://www.w3.org/2001/XMLSchema#positiveInteger' => lambda do |value, constraints|
160
+ i = Integer value
161
+ raise ArgumentError unless i > 0
162
+ i
163
+ end,
164
+ 'http://www.w3.org/2001/XMLSchema#dateTime' => lambda do |value, constraints|
165
+ date_pattern = constraints["datePattern"] || "%Y-%m-%dT%H:%M:%SZ"
166
+ d = DateTime.strptime(value, date_pattern)
167
+ raise ArgumentError unless d.strftime(date_pattern) == value
168
+ d
169
+ end,
170
+ 'http://www.w3.org/2001/XMLSchema#date' => lambda do |value, constraints|
171
+ date_pattern = constraints["datePattern"] || "%Y-%m-%d"
172
+ d = Date.strptime(value, date_pattern)
173
+ raise ArgumentError unless d.strftime(date_pattern) == value
174
+ d
175
+ end,
176
+ 'http://www.w3.org/2001/XMLSchema#time' => lambda do |value, constraints|
177
+ date_pattern = constraints["datePattern"] || "%H:%M:%S"
178
+ d = DateTime.strptime(value, date_pattern)
179
+ raise ArgumentError unless d.strftime(date_pattern) == value
180
+ d
181
+ end,
182
+ 'http://www.w3.org/2001/XMLSchema#gYear' => lambda do |value, constraints|
183
+ date_pattern = constraints["datePattern"] || "%Y"
184
+ d = Date.strptime(value, date_pattern)
185
+ raise ArgumentError unless d.strftime(date_pattern) == value
186
+ d
187
+ end,
188
+ 'http://www.w3.org/2001/XMLSchema#gYearMonth' => lambda do |value, constraints|
189
+ date_pattern = constraints["datePattern"] || "%Y-%m"
190
+ d = Date.strptime(value, date_pattern)
191
+ raise ArgumentError unless d.strftime(date_pattern) == value
192
+ d
193
+ end,
194
+ }
195
+ end
196
+ end