wjordan213.csvlint 0.2.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.coveralls.yml +1 -0
- data/.gitattributes +2 -0
- data/.gitignore +28 -0
- data/.ruby-version +1 -0
- data/.travis.yml +32 -0
- data/CHANGELOG.md +361 -0
- data/Gemfile +7 -0
- data/LICENSE.md +22 -0
- data/README.md +328 -0
- data/Rakefile +17 -0
- data/bin/create_schema +32 -0
- data/bin/csvlint +10 -0
- data/features/check_format.feature +46 -0
- data/features/cli.feature +210 -0
- data/features/csv_options.feature +35 -0
- data/features/csvupload.feature +145 -0
- data/features/csvw_schema_validation.feature +127 -0
- data/features/fixtures/cr-line-endings.csv +0 -0
- data/features/fixtures/crlf-line-endings.csv +0 -0
- data/features/fixtures/inconsistent-line-endings-unquoted.csv +0 -0
- data/features/fixtures/inconsistent-line-endings.csv +0 -0
- data/features/fixtures/invalid-byte-sequence.csv +0 -0
- data/features/fixtures/invalid_many_rows.csv +0 -0
- data/features/fixtures/lf-line-endings.csv +0 -0
- data/features/fixtures/spreadsheet.xls +0 -0
- data/features/fixtures/spreadsheet.xlsx +0 -0
- data/features/fixtures/title-row.csv +0 -0
- data/features/fixtures/valid.csv +0 -0
- data/features/fixtures/valid_many_rows.csv +0 -0
- data/features/fixtures/windows-line-endings.csv +0 -0
- data/features/information.feature +22 -0
- data/features/parse_csv.feature +90 -0
- data/features/schema_validation.feature +105 -0
- data/features/sources.feature +17 -0
- data/features/step_definitions/cli_steps.rb +11 -0
- data/features/step_definitions/csv_options_steps.rb +24 -0
- data/features/step_definitions/information_steps.rb +13 -0
- data/features/step_definitions/parse_csv_steps.rb +42 -0
- data/features/step_definitions/schema_validation_steps.rb +33 -0
- data/features/step_definitions/sources_steps.rb +7 -0
- data/features/step_definitions/validation_errors_steps.rb +90 -0
- data/features/step_definitions/validation_info_steps.rb +22 -0
- data/features/step_definitions/validation_warnings_steps.rb +60 -0
- data/features/support/aruba.rb +56 -0
- data/features/support/env.rb +26 -0
- data/features/support/load_tests.rb +114 -0
- data/features/support/webmock.rb +1 -0
- data/features/validation_errors.feature +147 -0
- data/features/validation_info.feature +16 -0
- data/features/validation_warnings.feature +86 -0
- data/lib/csvlint.rb +27 -0
- data/lib/csvlint/cli.rb +165 -0
- data/lib/csvlint/csvw/column.rb +359 -0
- data/lib/csvlint/csvw/date_format.rb +182 -0
- data/lib/csvlint/csvw/metadata_error.rb +13 -0
- data/lib/csvlint/csvw/number_format.rb +211 -0
- data/lib/csvlint/csvw/property_checker.rb +761 -0
- data/lib/csvlint/csvw/table.rb +204 -0
- data/lib/csvlint/csvw/table_group.rb +165 -0
- data/lib/csvlint/error_collector.rb +27 -0
- data/lib/csvlint/error_message.rb +15 -0
- data/lib/csvlint/field.rb +196 -0
- data/lib/csvlint/schema.rb +92 -0
- data/lib/csvlint/validate.rb +599 -0
- data/lib/csvlint/version.rb +3 -0
- data/spec/csvw/column_spec.rb +112 -0
- data/spec/csvw/date_format_spec.rb +49 -0
- data/spec/csvw/number_format_spec.rb +417 -0
- data/spec/csvw/table_group_spec.rb +143 -0
- data/spec/csvw/table_spec.rb +90 -0
- data/spec/field_spec.rb +252 -0
- data/spec/schema_spec.rb +211 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/validator_spec.rb +619 -0
- data/wjordan213_csvlint.gemspec +46 -0
- metadata +490 -0
@@ -0,0 +1,204 @@
|
|
1
|
+
module Csvlint
|
2
|
+
module Csvw
|
3
|
+
class Table
|
4
|
+
|
5
|
+
include Csvlint::ErrorCollector
|
6
|
+
|
7
|
+
attr_reader :columns, :dialect, :table_direction, :foreign_keys, :foreign_key_references, :id, :notes, :primary_key, :schema, :suppress_output, :transformations, :url, :annotations
|
8
|
+
|
9
|
+
def initialize(url, columns: [], dialect: {}, table_direction: :auto, foreign_keys: [], id: nil, notes: [], primary_key: nil, schema: nil, suppress_output: false, transformations: [], annotations: [], warnings: [])
|
10
|
+
@url = url
|
11
|
+
@columns = columns
|
12
|
+
@dialect = dialect
|
13
|
+
@table_direction = table_direction
|
14
|
+
@foreign_keys = foreign_keys
|
15
|
+
@foreign_key_values = {}
|
16
|
+
@foreign_key_references = []
|
17
|
+
@foreign_key_reference_values = {}
|
18
|
+
@id = id
|
19
|
+
@notes = notes
|
20
|
+
@primary_key = primary_key
|
21
|
+
@primary_key_values = {}
|
22
|
+
@schema = schema
|
23
|
+
@suppress_output = suppress_output
|
24
|
+
@transformations = transformations
|
25
|
+
@annotations = annotations
|
26
|
+
reset
|
27
|
+
@warnings += warnings
|
28
|
+
@errors += columns.map{|c| c.errors}.flatten
|
29
|
+
@warnings += columns.map{|c| c.warnings}.flatten
|
30
|
+
end
|
31
|
+
|
32
|
+
def validate_header(headers)
|
33
|
+
reset
|
34
|
+
headers.each_with_index do |header,i|
|
35
|
+
if columns[i]
|
36
|
+
columns[i].validate_header(header)
|
37
|
+
@errors += columns[i].errors
|
38
|
+
@warnings += columns[i].warnings
|
39
|
+
else
|
40
|
+
build_errors(:malformed_header, :schema, 1, nil, header, nil)
|
41
|
+
end
|
42
|
+
end unless columns.empty?
|
43
|
+
return valid?
|
44
|
+
end
|
45
|
+
|
46
|
+
def validate_row(values, row=nil)
|
47
|
+
reset
|
48
|
+
values.each_with_index do |value,i|
|
49
|
+
column = columns[i]
|
50
|
+
if column
|
51
|
+
column.validate(value, row)
|
52
|
+
@errors += column.errors
|
53
|
+
@warnings += column.warnings
|
54
|
+
else
|
55
|
+
build_errors(:too_many_values, :schema, row, nil, value, nil)
|
56
|
+
end
|
57
|
+
end unless columns.empty?
|
58
|
+
unless @primary_key.nil?
|
59
|
+
key = @primary_key.map { |column| column.parse(values[column.number - 1], row) }
|
60
|
+
build_errors(:duplicate_key, :schema, row, nil, key.join(","), @primary_key_values[key]) if @primary_key_values.include?(key)
|
61
|
+
@primary_key_values[key] = row
|
62
|
+
end
|
63
|
+
# build a record of the unique values that are referenced by foreign keys from other tables
|
64
|
+
# so that later we can check whether those foreign keys reference these values
|
65
|
+
@foreign_key_references.each do |foreign_key|
|
66
|
+
referenced_columns = foreign_key["referenced_columns"]
|
67
|
+
key = referenced_columns.map{ |column| column.parse(values[column.number - 1], row) }
|
68
|
+
known_values = @foreign_key_reference_values[foreign_key] = @foreign_key_reference_values[foreign_key] || {}
|
69
|
+
known_values[key] = known_values[key] || []
|
70
|
+
known_values[key] << row
|
71
|
+
end
|
72
|
+
# build a record of the references from this row to other tables
|
73
|
+
# we can't check yet whether these exist in the other tables because
|
74
|
+
# we might not have parsed those other tables
|
75
|
+
@foreign_keys.each do |foreign_key|
|
76
|
+
referencing_columns = foreign_key["referencing_columns"]
|
77
|
+
key = referencing_columns.map{ |column| column.parse(values[column.number - 1], row) }
|
78
|
+
known_values = @foreign_key_values[foreign_key] = @foreign_key_values[foreign_key] || []
|
79
|
+
known_values << key unless known_values.include?(key)
|
80
|
+
end
|
81
|
+
return valid?
|
82
|
+
end
|
83
|
+
|
84
|
+
def validate_foreign_keys
|
85
|
+
reset
|
86
|
+
@foreign_keys.each do |foreign_key|
|
87
|
+
local = @foreign_key_values[foreign_key]
|
88
|
+
remote_table = foreign_key["referenced_table"]
|
89
|
+
remote_table.validate_foreign_key_references(foreign_key, @url, local)
|
90
|
+
@errors += remote_table.errors unless remote_table == self
|
91
|
+
@warnings += remote_table.warnings unless remote_table == self
|
92
|
+
end
|
93
|
+
return valid?
|
94
|
+
end
|
95
|
+
|
96
|
+
def validate_foreign_key_references(foreign_key, remote_url, remote)
|
97
|
+
reset
|
98
|
+
local = @foreign_key_reference_values[foreign_key]
|
99
|
+
context = { "from" => { "url" => remote_url.to_s.split("/")[-1], "columns" => foreign_key["columnReference"] }, "to" => { "url" => @url.to_s.split("/")[-1], "columns" => foreign_key["reference"]["columnReference"] }}
|
100
|
+
remote.each do |r|
|
101
|
+
if local[r]
|
102
|
+
build_errors(:multiple_matched_rows, :schema, nil, nil, r, context) if local[r].length > 1
|
103
|
+
else
|
104
|
+
build_errors(:unmatched_foreign_key_reference, :schema, nil, nil, r, context)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
return valid?
|
108
|
+
end
|
109
|
+
|
110
|
+
def self.from_json(table_desc, base_url=nil, lang="und", inherited_properties={})
|
111
|
+
annotations = {}
|
112
|
+
warnings = []
|
113
|
+
table_properties = {}
|
114
|
+
columns = []
|
115
|
+
notes = []
|
116
|
+
inherited_properties = inherited_properties.clone
|
117
|
+
|
118
|
+
table_desc.each do |property,value|
|
119
|
+
if property =="@type"
|
120
|
+
raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].@type"), "@type of table is not 'Table'" unless value == 'Table'
|
121
|
+
elsif property == "notes"
|
122
|
+
notes = value
|
123
|
+
else
|
124
|
+
v, warning, type = Csvw::PropertyChecker.check_property(property, value, base_url, lang)
|
125
|
+
warnings += Array(warning).map{ |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "#{property}: #{value}", nil) } unless warning.nil? || warning.empty?
|
126
|
+
if type == :annotation
|
127
|
+
annotations[property] = v
|
128
|
+
elsif type == :table || type == :common
|
129
|
+
table_properties[property] = v
|
130
|
+
elsif type == :column
|
131
|
+
warnings << Csvlint::ErrorMessage.new(:invalid_property, :metadata, nil, nil, "#{property}", nil)
|
132
|
+
else
|
133
|
+
inherited_properties[property] = v
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
table_schema = table_properties["tableSchema"] || inherited_properties["tableSchema"]
|
139
|
+
column_names = []
|
140
|
+
foreign_keys = []
|
141
|
+
primary_key = nil
|
142
|
+
if table_schema
|
143
|
+
raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].tableSchema.columns"), "schema columns is not an array" unless table_schema["columns"].instance_of? Array
|
144
|
+
virtual_columns = false
|
145
|
+
table_schema["columns"].each_with_index do |column_desc,i|
|
146
|
+
if column_desc.instance_of? Hash
|
147
|
+
column = Csvlint::Csvw::Column.from_json(i+1, column_desc, base_url, lang, inherited_properties)
|
148
|
+
raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].tableSchema.columns[#{i}].virtual"), "virtual columns before non-virtual column #{column.name || i}" if virtual_columns && !column.virtual
|
149
|
+
virtual_columns = virtual_columns || column.virtual
|
150
|
+
raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].tableSchema.columns"), "multiple columns named #{column.name}" if column_names.include? column.name
|
151
|
+
column_names << column.name unless column.name.nil?
|
152
|
+
columns << column
|
153
|
+
else
|
154
|
+
warnings << Csvlint::ErrorMessage.new(:invalid_column_description, :metadata, nil, nil, "#{column_desc}", nil)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
primary_key = table_schema["primaryKey"]
|
159
|
+
primary_key_columns = []
|
160
|
+
primary_key_valid = true
|
161
|
+
primary_key.each do |reference|
|
162
|
+
i = column_names.index(reference)
|
163
|
+
if i
|
164
|
+
primary_key_columns << columns[i]
|
165
|
+
else
|
166
|
+
warnings << Csvlint::ErrorMessage.new(:invalid_column_reference, :metadata, nil, nil, "primaryKey: #{reference}", nil)
|
167
|
+
primary_key_valid = false
|
168
|
+
end
|
169
|
+
end if primary_key
|
170
|
+
|
171
|
+
foreign_keys = table_schema["foreignKeys"]
|
172
|
+
foreign_keys.each_with_index do |foreign_key, i|
|
173
|
+
foreign_key_columns = []
|
174
|
+
foreign_key["columnReference"].each do |reference|
|
175
|
+
i = column_names.index(reference)
|
176
|
+
raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].tableSchema.foreignKeys[#{i}].columnReference"), "foreignKey references non-existant column" unless i
|
177
|
+
foreign_key_columns << columns[i]
|
178
|
+
end
|
179
|
+
foreign_key["referencing_columns"] = foreign_key_columns
|
180
|
+
end if foreign_keys
|
181
|
+
|
182
|
+
row_titles = table_schema["rowTitles"]
|
183
|
+
row_titles.each_with_index do |row_title,i|
|
184
|
+
raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].tableSchema.rowTitles[#{i}]"), "rowTitles references non-existant column" unless column_names.include? row_title
|
185
|
+
end if row_titles
|
186
|
+
|
187
|
+
end
|
188
|
+
|
189
|
+
return self.new(table_properties["url"],
|
190
|
+
id: table_properties["@id"],
|
191
|
+
columns: columns,
|
192
|
+
dialect: table_properties["dialect"],
|
193
|
+
foreign_keys: foreign_keys || [],
|
194
|
+
notes: notes,
|
195
|
+
primary_key: primary_key_valid && !primary_key_columns.empty? ? primary_key_columns : nil,
|
196
|
+
schema: table_schema ? table_schema["@id"] : nil,
|
197
|
+
annotations: annotations,
|
198
|
+
warnings: warnings
|
199
|
+
)
|
200
|
+
end
|
201
|
+
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
@@ -0,0 +1,165 @@
|
|
1
|
+
module Csvlint
|
2
|
+
module Csvw
|
3
|
+
class TableGroup
|
4
|
+
|
5
|
+
include Csvlint::ErrorCollector
|
6
|
+
|
7
|
+
attr_reader :url, :id, :tables, :notes, :annotations
|
8
|
+
|
9
|
+
def initialize(url, id: nil, tables: {}, notes: [], annotations: {}, warnings: [])
|
10
|
+
@url = url
|
11
|
+
@id = id
|
12
|
+
@tables = tables
|
13
|
+
@notes = notes
|
14
|
+
@annotations = annotations
|
15
|
+
@validated_tables = {}
|
16
|
+
@tables.each { |t,v| @validated_tables[t] = false }
|
17
|
+
reset
|
18
|
+
@warnings += warnings
|
19
|
+
@errors += @tables.map{|url,table| table.errors}.flatten
|
20
|
+
@warnings += @tables.map{|url,table| table.warnings}.flatten
|
21
|
+
end
|
22
|
+
|
23
|
+
def validate_header(header, table_url)
|
24
|
+
reset
|
25
|
+
table_url = "file:#{File.absolute_path(table_url)}" if table_url.instance_of? File
|
26
|
+
table = tables[table_url]
|
27
|
+
table.validate_header(header)
|
28
|
+
@errors += table.errors
|
29
|
+
@warnings += table.warnings
|
30
|
+
return valid?
|
31
|
+
end
|
32
|
+
|
33
|
+
def validate_row(values, row=nil, all_errors=[], table_url)
|
34
|
+
reset
|
35
|
+
table_url = "file:#{File.absolute_path(table_url)}" if table_url.instance_of? File
|
36
|
+
@validated_tables[table_url] = true
|
37
|
+
table = tables[table_url]
|
38
|
+
table.validate_row(values, row)
|
39
|
+
@errors += table.errors
|
40
|
+
@warnings += table.warnings
|
41
|
+
return valid?
|
42
|
+
end
|
43
|
+
|
44
|
+
def validate_foreign_keys
|
45
|
+
reset
|
46
|
+
unless @validated_tables.has_value?(false)
|
47
|
+
@tables.each do |table_url,table|
|
48
|
+
table.validate_foreign_keys
|
49
|
+
@errors += table.errors
|
50
|
+
@warnings += table.warnings
|
51
|
+
end
|
52
|
+
end
|
53
|
+
return valid?
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.from_json(url, json)
|
57
|
+
warnings = []
|
58
|
+
tables = {}
|
59
|
+
annotations = {}
|
60
|
+
inherited_properties = {}
|
61
|
+
common_properties = {}
|
62
|
+
base_url = URI(url.to_s.strip)
|
63
|
+
lang = "und"
|
64
|
+
|
65
|
+
context = json["@context"]
|
66
|
+
if context.instance_of?(Array) && context[1]
|
67
|
+
context[1].each do |property,value|
|
68
|
+
v, warning, type = Csvw::PropertyChecker.check_property(property, value, base_url, lang)
|
69
|
+
if warning.nil? || warning.empty?
|
70
|
+
if type == :context
|
71
|
+
base_url = v if property == "@base"
|
72
|
+
lang = v if property == "@language"
|
73
|
+
else
|
74
|
+
raise Csvlint::Csvw::MetadataError.new("$.@context"), "@context contains properties other than @base or @language (#{property})"
|
75
|
+
end
|
76
|
+
else
|
77
|
+
raise Csvlint::Csvw::MetadataError.new("$.@context"), "@context contains properties other than @base or @language (#{property})" unless ["@base", "@language"].include?(property)
|
78
|
+
warnings += Array(warning).map{ |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "@context: #{property}: #{value}", nil) }
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
json.delete("@context")
|
83
|
+
|
84
|
+
if json["url"]
|
85
|
+
json = { "tables" => [ json ] }
|
86
|
+
end unless json["tables"]
|
87
|
+
|
88
|
+
json.each do |property,value|
|
89
|
+
unless VALID_PROPERTIES.include? property
|
90
|
+
v, warning, type = Csvw::PropertyChecker.check_property(property, value, base_url, lang)
|
91
|
+
warnings += Array(warning).map{ |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "#{property}: #{value}", nil) } unless warning.nil? || warning.empty?
|
92
|
+
if type == :annotation
|
93
|
+
annotations[property] = v
|
94
|
+
elsif type == :common
|
95
|
+
common_properties[property] = v
|
96
|
+
elsif type == :column
|
97
|
+
warnings << Csvlint::ErrorMessage.new(:invalid_property, :metadata, nil, nil, "#{property}", nil)
|
98
|
+
else
|
99
|
+
inherited_properties[property] = v
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
id = common_properties["@id"]
|
105
|
+
|
106
|
+
raise Csvlint::Csvw::MetadataError.new("$.@type"), "@type of table group is not 'TableGroup'" if json["@type"] && json["@type"] != 'TableGroup'
|
107
|
+
|
108
|
+
raise Csvlint::Csvw::MetadataError.new("$"), "no tables property" unless json["tables"]
|
109
|
+
raise Csvlint::Csvw::MetadataError.new("$.tables"), "empty tables property" if json["tables"].empty?
|
110
|
+
raise Csvlint::Csvw::MetadataError.new("$.tables"), "tables property is not an array" unless json["tables"].instance_of? Array
|
111
|
+
|
112
|
+
json["tables"].each do |table_desc|
|
113
|
+
if table_desc.instance_of? Hash
|
114
|
+
table_url = table_desc["url"]
|
115
|
+
unless table_url.instance_of? String
|
116
|
+
warnings << Csvlint::ErrorMessage.new(:invalid_url, :metadata, nil, nil, "url: #{table_url}", nil)
|
117
|
+
table_url = ""
|
118
|
+
end
|
119
|
+
table_url = URI.join(base_url, table_url).to_s
|
120
|
+
table_desc["url"] = table_url
|
121
|
+
table = Csvlint::Csvw::Table.from_json(table_desc, base_url, lang, inherited_properties)
|
122
|
+
tables[table_url] = table
|
123
|
+
else
|
124
|
+
warnings << Csvlint::ErrorMessage.new(:invalid_table_description, :metadata, nil, nil, "#{table_desc}", nil)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
tables.each do |table_url, table|
|
129
|
+
table.foreign_keys.each_with_index do |foreign_key,i|
|
130
|
+
reference = foreign_key["reference"]
|
131
|
+
if reference["resource"]
|
132
|
+
resource = URI.join(base_url, reference["resource"]).to_s
|
133
|
+
referenced_table = tables[resource]
|
134
|
+
raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_url}')].tableSchema.foreign_keys[#{i}].reference.resource"), "foreign key references table that does not exist (#{resource})" if referenced_table.nil?
|
135
|
+
else
|
136
|
+
schema_url = URI.join(base_url, reference["schemaReference"]).to_s
|
137
|
+
referenced_tables = tables.values.select{ |table| table.schema == schema_url }
|
138
|
+
referenced_table = referenced_tables[0]
|
139
|
+
raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_url}')].tableSchema.foreign_keys[#{i}].reference.schemaReference"), "foreign key references schema that is not used (#{schema_url})" if referenced_table.nil?
|
140
|
+
end
|
141
|
+
foreign_key["referenced_table"] = referenced_table
|
142
|
+
table_columns = {}
|
143
|
+
referenced_table.columns.each do |column|
|
144
|
+
table_columns[column.name] = column if column.name
|
145
|
+
end
|
146
|
+
referenced_columns = []
|
147
|
+
Array(reference["columnReference"]).each do |column_reference|
|
148
|
+
column = table_columns[column_reference]
|
149
|
+
raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_url}')].tableSchema.foreign_keys[#{i}].reference.columnReference"), "column named #{column_reference} does not exist in #{resource}" if column.nil?
|
150
|
+
referenced_columns << column
|
151
|
+
end
|
152
|
+
foreign_key["referenced_columns"] = referenced_columns
|
153
|
+
referenced_table.foreign_key_references << foreign_key
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
return self.new(base_url, id: id, tables: tables, notes: json["notes"] || [], annotations: annotations, warnings: warnings)
|
158
|
+
end
|
159
|
+
|
160
|
+
private
|
161
|
+
VALID_PROPERTIES = ['tables', 'notes', '@type']
|
162
|
+
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Csvlint
|
2
|
+
module ErrorCollector
|
3
|
+
attr_reader :errors, :warnings, :info_messages
|
4
|
+
# Creates a validation error
|
5
|
+
def build_errors(type, category = nil, row = nil, column = nil, content = nil, constraints = {})
|
6
|
+
@errors << Csvlint::ErrorMessage.new(type, category, row, column, content, constraints)
|
7
|
+
end
|
8
|
+
# Creates a validation warning
|
9
|
+
def build_warnings(type, category = nil, row = nil, column = nil, content = nil, constraints = {})
|
10
|
+
@warnings << Csvlint::ErrorMessage.new(type, category, row, column, content, constraints)
|
11
|
+
end
|
12
|
+
# Creates a validation information message
|
13
|
+
def build_info_messages(type, category = nil, row = nil, column = nil, content = nil, constraints = {})
|
14
|
+
@info_messages << Csvlint::ErrorMessage.new(type, category, row, column, content, constraints)
|
15
|
+
end
|
16
|
+
|
17
|
+
def valid?
|
18
|
+
errors.empty?
|
19
|
+
end
|
20
|
+
|
21
|
+
def reset
|
22
|
+
@errors = []
|
23
|
+
@warnings = []
|
24
|
+
@info_messages = []
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Csvlint
|
2
|
+
class ErrorMessage
|
3
|
+
attr_reader :type, :category, :row, :column, :content, :constraints
|
4
|
+
|
5
|
+
def initialize(type, category, row, column, content, constraints)
|
6
|
+
@type = type
|
7
|
+
@category = category
|
8
|
+
@row = row
|
9
|
+
@column = column
|
10
|
+
@content = content
|
11
|
+
@constraints = constraints
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,196 @@
|
|
1
|
+
module Csvlint
|
2
|
+
|
3
|
+
class Field
|
4
|
+
include Csvlint::ErrorCollector
|
5
|
+
|
6
|
+
attr_reader :name, :constraints, :title, :description
|
7
|
+
|
8
|
+
def initialize(name, constraints={}, title=nil, description=nil)
|
9
|
+
@name = name
|
10
|
+
@constraints = constraints || {}
|
11
|
+
@uniques = Set.new
|
12
|
+
@title = title
|
13
|
+
@description = description
|
14
|
+
reset
|
15
|
+
end
|
16
|
+
|
17
|
+
def validate_column(value, row=nil, column=nil, all_errors=[])
|
18
|
+
reset
|
19
|
+
unless all_errors.any?{|error| ((error.type == :invalid_regex) && (error.column == column))}
|
20
|
+
validate_regex(value, row, column, all_errors)
|
21
|
+
end
|
22
|
+
validate_length(value, row, column)
|
23
|
+
validate_values(value, row, column)
|
24
|
+
parsed = validate_type(value, row, column)
|
25
|
+
validate_range(parsed, row, column) if parsed != nil
|
26
|
+
return valid?
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
def validate_length(value, row, column)
|
31
|
+
if constraints["required"] == true
|
32
|
+
build_errors(:missing_value, :schema, row, column, value,
|
33
|
+
{ "required" => true }) if value.nil? || value.length == 0
|
34
|
+
end
|
35
|
+
if constraints["minLength"]
|
36
|
+
build_errors(:min_length, :schema, row, column, value,
|
37
|
+
{ "minLength" => constraints["minLength"] }) if value.nil? || value.length < constraints["minLength"]
|
38
|
+
end
|
39
|
+
if constraints["maxLength"]
|
40
|
+
build_errors(:max_length, :schema, row, column, value,
|
41
|
+
{ "maxLength" => constraints["maxLength"] } ) if !value.nil? && value.length > constraints["maxLength"]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def validate_regex(value, row, column, all_errors)
|
46
|
+
pattern = constraints["pattern"]
|
47
|
+
if pattern
|
48
|
+
begin
|
49
|
+
Regexp.new(pattern)
|
50
|
+
build_errors(:pattern, :schema, row, column, value,
|
51
|
+
{ "pattern" => constraints["pattern"] } ) if !value.nil? && !value.match( constraints["pattern"] )
|
52
|
+
rescue RegexpError
|
53
|
+
build_regex_error(value, row, column, pattern, all_errors)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def build_regex_error(value, row, column, pattern, all_errors)
|
59
|
+
return if @regex_error_exists
|
60
|
+
build_errors(:invalid_regex, :schema, nil, column, ("#{name}: Constraints: Pattern: #{pattern}"),
|
61
|
+
{ "pattern" => constraints["pattern"] })
|
62
|
+
@regex_error_exists = true
|
63
|
+
end
|
64
|
+
|
65
|
+
def validate_values(value, row, column)
|
66
|
+
# If a pattern exists, raise an invalid regex error if it is not in
|
67
|
+
# valid regex form, else, if the value of the relevant field in the csv
|
68
|
+
# does not match the given regex pattern in the schema, raise a
|
69
|
+
# pattern error.
|
70
|
+
if constraints["unique"] == true
|
71
|
+
if @uniques.include? value
|
72
|
+
build_errors(:unique, :schema, row, column, value, { "unique" => true })
|
73
|
+
else
|
74
|
+
@uniques << value
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def validate_type(value, row, column)
|
80
|
+
if constraints["type"] && value != ""
|
81
|
+
parsed = convert_to_type(value)
|
82
|
+
if parsed == nil
|
83
|
+
failed = { "type" => constraints["type"] }
|
84
|
+
failed["datePattern"] = constraints["datePattern"] if constraints["datePattern"]
|
85
|
+
build_errors(:invalid_type, :schema, row, column, value, failed)
|
86
|
+
return nil
|
87
|
+
end
|
88
|
+
return parsed
|
89
|
+
end
|
90
|
+
return nil
|
91
|
+
end
|
92
|
+
|
93
|
+
def validate_range(value, row, column)
|
94
|
+
#TODO: we're ignoring issues with converting ranges to actual types, maybe we
|
95
|
+
#should generate a warning? The schema is invalid
|
96
|
+
if constraints["minimum"]
|
97
|
+
minimumValue = convert_to_type( constraints["minimum"] )
|
98
|
+
if minimumValue
|
99
|
+
build_errors(:below_minimum, :schema, row, column, value,
|
100
|
+
{ "minimum" => constraints["minimum"] }) unless value >= minimumValue
|
101
|
+
end
|
102
|
+
end
|
103
|
+
if constraints["maximum"]
|
104
|
+
maximumValue = convert_to_type( constraints["maximum"] )
|
105
|
+
if maximumValue
|
106
|
+
build_errors(:above_maximum, :schema, row, column, value,
|
107
|
+
{ "maximum" => constraints["maximum"] }) unless value <= maximumValue
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def convert_to_type(value)
|
113
|
+
parsed = nil
|
114
|
+
tv = TYPE_VALIDATIONS[constraints["type"]]
|
115
|
+
if tv
|
116
|
+
begin
|
117
|
+
parsed = tv.call value, constraints
|
118
|
+
rescue ArgumentError
|
119
|
+
end
|
120
|
+
end
|
121
|
+
return parsed
|
122
|
+
end
|
123
|
+
|
124
|
+
TYPE_VALIDATIONS = {
|
125
|
+
'http://www.w3.org/2001/XMLSchema#string' => lambda { |value, constraints| value },
|
126
|
+
'http://www.w3.org/2001/XMLSchema#int' => lambda { |value, constraints| Integer value },
|
127
|
+
'http://www.w3.org/2001/XMLSchema#integer' => lambda { |value, constraints| Integer value },
|
128
|
+
'http://www.w3.org/2001/XMLSchema#float' => lambda { |value, constraints| Float value },
|
129
|
+
'http://www.w3.org/2001/XMLSchema#double' => lambda { |value, constraints| Float value },
|
130
|
+
'http://www.w3.org/2001/XMLSchema#anyURI' => lambda do |value, constraints|
|
131
|
+
begin
|
132
|
+
u = URI.parse value
|
133
|
+
raise ArgumentError unless u.kind_of?(URI::HTTP) || u.kind_of?(URI::HTTPS)
|
134
|
+
rescue URI::InvalidURIError
|
135
|
+
raise ArgumentError
|
136
|
+
end
|
137
|
+
u
|
138
|
+
end,
|
139
|
+
'http://www.w3.org/2001/XMLSchema#boolean' => lambda do |value, constraints|
|
140
|
+
return true if ['true', '1'].include? value
|
141
|
+
return false if ['false', '0'].include? value
|
142
|
+
raise ArgumentError
|
143
|
+
end,
|
144
|
+
'http://www.w3.org/2001/XMLSchema#nonPositiveInteger' => lambda do |value, constraints|
|
145
|
+
i = Integer value
|
146
|
+
raise ArgumentError unless i <= 0
|
147
|
+
i
|
148
|
+
end,
|
149
|
+
'http://www.w3.org/2001/XMLSchema#negativeInteger' => lambda do |value, constraints|
|
150
|
+
i = Integer value
|
151
|
+
raise ArgumentError unless i < 0
|
152
|
+
i
|
153
|
+
end,
|
154
|
+
'http://www.w3.org/2001/XMLSchema#nonNegativeInteger' => lambda do |value, constraints|
|
155
|
+
i = Integer value
|
156
|
+
raise ArgumentError unless i >= 0
|
157
|
+
i
|
158
|
+
end,
|
159
|
+
'http://www.w3.org/2001/XMLSchema#positiveInteger' => lambda do |value, constraints|
|
160
|
+
i = Integer value
|
161
|
+
raise ArgumentError unless i > 0
|
162
|
+
i
|
163
|
+
end,
|
164
|
+
'http://www.w3.org/2001/XMLSchema#dateTime' => lambda do |value, constraints|
|
165
|
+
date_pattern = constraints["datePattern"] || "%Y-%m-%dT%H:%M:%SZ"
|
166
|
+
d = DateTime.strptime(value, date_pattern)
|
167
|
+
raise ArgumentError unless d.strftime(date_pattern) == value
|
168
|
+
d
|
169
|
+
end,
|
170
|
+
'http://www.w3.org/2001/XMLSchema#date' => lambda do |value, constraints|
|
171
|
+
date_pattern = constraints["datePattern"] || "%Y-%m-%d"
|
172
|
+
d = Date.strptime(value, date_pattern)
|
173
|
+
raise ArgumentError unless d.strftime(date_pattern) == value
|
174
|
+
d
|
175
|
+
end,
|
176
|
+
'http://www.w3.org/2001/XMLSchema#time' => lambda do |value, constraints|
|
177
|
+
date_pattern = constraints["datePattern"] || "%H:%M:%S"
|
178
|
+
d = DateTime.strptime(value, date_pattern)
|
179
|
+
raise ArgumentError unless d.strftime(date_pattern) == value
|
180
|
+
d
|
181
|
+
end,
|
182
|
+
'http://www.w3.org/2001/XMLSchema#gYear' => lambda do |value, constraints|
|
183
|
+
date_pattern = constraints["datePattern"] || "%Y"
|
184
|
+
d = Date.strptime(value, date_pattern)
|
185
|
+
raise ArgumentError unless d.strftime(date_pattern) == value
|
186
|
+
d
|
187
|
+
end,
|
188
|
+
'http://www.w3.org/2001/XMLSchema#gYearMonth' => lambda do |value, constraints|
|
189
|
+
date_pattern = constraints["datePattern"] || "%Y-%m"
|
190
|
+
d = Date.strptime(value, date_pattern)
|
191
|
+
raise ArgumentError unless d.strftime(date_pattern) == value
|
192
|
+
d
|
193
|
+
end,
|
194
|
+
}
|
195
|
+
end
|
196
|
+
end
|