wjordan213-csvlint 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.coveralls.yml +1 -0
- data/.gitattributes +2 -0
- data/.gitignore +28 -0
- data/.ruby-version +1 -0
- data/.travis.yml +32 -0
- data/CHANGELOG.md +361 -0
- data/Gemfile +7 -0
- data/LICENSE.md +22 -0
- data/README.md +328 -0
- data/Rakefile +17 -0
- data/bin/create_schema +32 -0
- data/bin/csvlint +10 -0
- data/features/check_format.feature +46 -0
- data/features/cli.feature +210 -0
- data/features/csv_options.feature +35 -0
- data/features/csvupload.feature +145 -0
- data/features/csvw_schema_validation.feature +127 -0
- data/features/fixtures/cr-line-endings.csv +0 -0
- data/features/fixtures/crlf-line-endings.csv +0 -0
- data/features/fixtures/inconsistent-line-endings-unquoted.csv +0 -0
- data/features/fixtures/inconsistent-line-endings.csv +0 -0
- data/features/fixtures/invalid-byte-sequence.csv +0 -0
- data/features/fixtures/invalid_many_rows.csv +0 -0
- data/features/fixtures/lf-line-endings.csv +0 -0
- data/features/fixtures/spreadsheet.xls +0 -0
- data/features/fixtures/spreadsheet.xlsx +0 -0
- data/features/fixtures/title-row.csv +0 -0
- data/features/fixtures/valid.csv +0 -0
- data/features/fixtures/valid_many_rows.csv +0 -0
- data/features/fixtures/windows-line-endings.csv +0 -0
- data/features/information.feature +22 -0
- data/features/parse_csv.feature +90 -0
- data/features/schema_validation.feature +105 -0
- data/features/sources.feature +17 -0
- data/features/step_definitions/cli_steps.rb +11 -0
- data/features/step_definitions/csv_options_steps.rb +24 -0
- data/features/step_definitions/information_steps.rb +13 -0
- data/features/step_definitions/parse_csv_steps.rb +42 -0
- data/features/step_definitions/schema_validation_steps.rb +33 -0
- data/features/step_definitions/sources_steps.rb +7 -0
- data/features/step_definitions/validation_errors_steps.rb +90 -0
- data/features/step_definitions/validation_info_steps.rb +22 -0
- data/features/step_definitions/validation_warnings_steps.rb +60 -0
- data/features/support/aruba.rb +56 -0
- data/features/support/env.rb +26 -0
- data/features/support/load_tests.rb +114 -0
- data/features/support/webmock.rb +1 -0
- data/features/validation_errors.feature +147 -0
- data/features/validation_info.feature +16 -0
- data/features/validation_warnings.feature +86 -0
- data/lib/csvlint.rb +27 -0
- data/lib/csvlint/cli.rb +165 -0
- data/lib/csvlint/csvw/column.rb +359 -0
- data/lib/csvlint/csvw/date_format.rb +182 -0
- data/lib/csvlint/csvw/metadata_error.rb +13 -0
- data/lib/csvlint/csvw/number_format.rb +211 -0
- data/lib/csvlint/csvw/property_checker.rb +761 -0
- data/lib/csvlint/csvw/table.rb +204 -0
- data/lib/csvlint/csvw/table_group.rb +165 -0
- data/lib/csvlint/error_collector.rb +27 -0
- data/lib/csvlint/error_message.rb +15 -0
- data/lib/csvlint/field.rb +196 -0
- data/lib/csvlint/schema.rb +92 -0
- data/lib/csvlint/validate.rb +599 -0
- data/lib/csvlint/version.rb +3 -0
- data/spec/csvw/column_spec.rb +112 -0
- data/spec/csvw/date_format_spec.rb +49 -0
- data/spec/csvw/number_format_spec.rb +417 -0
- data/spec/csvw/table_group_spec.rb +143 -0
- data/spec/csvw/table_spec.rb +90 -0
- data/spec/field_spec.rb +252 -0
- data/spec/schema_spec.rb +211 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/validator_spec.rb +619 -0
- data/wjordan213_csvlint.gemspec +46 -0
- metadata +490 -0
@@ -0,0 +1,204 @@
|
|
1
|
+
module Csvlint
|
2
|
+
module Csvw
|
3
|
+
class Table
|
4
|
+
|
5
|
+
include Csvlint::ErrorCollector
|
6
|
+
|
7
|
+
attr_reader :columns, :dialect, :table_direction, :foreign_keys, :foreign_key_references, :id, :notes, :primary_key, :schema, :suppress_output, :transformations, :url, :annotations
|
8
|
+
|
9
|
+
def initialize(url, columns: [], dialect: {}, table_direction: :auto, foreign_keys: [], id: nil, notes: [], primary_key: nil, schema: nil, suppress_output: false, transformations: [], annotations: [], warnings: [])
|
10
|
+
@url = url
|
11
|
+
@columns = columns
|
12
|
+
@dialect = dialect
|
13
|
+
@table_direction = table_direction
|
14
|
+
@foreign_keys = foreign_keys
|
15
|
+
@foreign_key_values = {}
|
16
|
+
@foreign_key_references = []
|
17
|
+
@foreign_key_reference_values = {}
|
18
|
+
@id = id
|
19
|
+
@notes = notes
|
20
|
+
@primary_key = primary_key
|
21
|
+
@primary_key_values = {}
|
22
|
+
@schema = schema
|
23
|
+
@suppress_output = suppress_output
|
24
|
+
@transformations = transformations
|
25
|
+
@annotations = annotations
|
26
|
+
reset
|
27
|
+
@warnings += warnings
|
28
|
+
@errors += columns.map{|c| c.errors}.flatten
|
29
|
+
@warnings += columns.map{|c| c.warnings}.flatten
|
30
|
+
end
|
31
|
+
|
32
|
+
def validate_header(headers)
|
33
|
+
reset
|
34
|
+
headers.each_with_index do |header,i|
|
35
|
+
if columns[i]
|
36
|
+
columns[i].validate_header(header)
|
37
|
+
@errors += columns[i].errors
|
38
|
+
@warnings += columns[i].warnings
|
39
|
+
else
|
40
|
+
build_errors(:malformed_header, :schema, 1, nil, header, nil)
|
41
|
+
end
|
42
|
+
end unless columns.empty?
|
43
|
+
return valid?
|
44
|
+
end
|
45
|
+
|
46
|
+
def validate_row(values, row=nil)
|
47
|
+
reset
|
48
|
+
values.each_with_index do |value,i|
|
49
|
+
column = columns[i]
|
50
|
+
if column
|
51
|
+
column.validate(value, row)
|
52
|
+
@errors += column.errors
|
53
|
+
@warnings += column.warnings
|
54
|
+
else
|
55
|
+
build_errors(:too_many_values, :schema, row, nil, value, nil)
|
56
|
+
end
|
57
|
+
end unless columns.empty?
|
58
|
+
unless @primary_key.nil?
|
59
|
+
key = @primary_key.map { |column| column.parse(values[column.number - 1], row) }
|
60
|
+
build_errors(:duplicate_key, :schema, row, nil, key.join(","), @primary_key_values[key]) if @primary_key_values.include?(key)
|
61
|
+
@primary_key_values[key] = row
|
62
|
+
end
|
63
|
+
# build a record of the unique values that are referenced by foreign keys from other tables
|
64
|
+
# so that later we can check whether those foreign keys reference these values
|
65
|
+
@foreign_key_references.each do |foreign_key|
|
66
|
+
referenced_columns = foreign_key["referenced_columns"]
|
67
|
+
key = referenced_columns.map{ |column| column.parse(values[column.number - 1], row) }
|
68
|
+
known_values = @foreign_key_reference_values[foreign_key] = @foreign_key_reference_values[foreign_key] || {}
|
69
|
+
known_values[key] = known_values[key] || []
|
70
|
+
known_values[key] << row
|
71
|
+
end
|
72
|
+
# build a record of the references from this row to other tables
|
73
|
+
# we can't check yet whether these exist in the other tables because
|
74
|
+
# we might not have parsed those other tables
|
75
|
+
@foreign_keys.each do |foreign_key|
|
76
|
+
referencing_columns = foreign_key["referencing_columns"]
|
77
|
+
key = referencing_columns.map{ |column| column.parse(values[column.number - 1], row) }
|
78
|
+
known_values = @foreign_key_values[foreign_key] = @foreign_key_values[foreign_key] || []
|
79
|
+
known_values << key unless known_values.include?(key)
|
80
|
+
end
|
81
|
+
return valid?
|
82
|
+
end
|
83
|
+
|
84
|
+
def validate_foreign_keys
|
85
|
+
reset
|
86
|
+
@foreign_keys.each do |foreign_key|
|
87
|
+
local = @foreign_key_values[foreign_key]
|
88
|
+
remote_table = foreign_key["referenced_table"]
|
89
|
+
remote_table.validate_foreign_key_references(foreign_key, @url, local)
|
90
|
+
@errors += remote_table.errors unless remote_table == self
|
91
|
+
@warnings += remote_table.warnings unless remote_table == self
|
92
|
+
end
|
93
|
+
return valid?
|
94
|
+
end
|
95
|
+
|
96
|
+
def validate_foreign_key_references(foreign_key, remote_url, remote)
|
97
|
+
reset
|
98
|
+
local = @foreign_key_reference_values[foreign_key]
|
99
|
+
context = { "from" => { "url" => remote_url.to_s.split("/")[-1], "columns" => foreign_key["columnReference"] }, "to" => { "url" => @url.to_s.split("/")[-1], "columns" => foreign_key["reference"]["columnReference"] }}
|
100
|
+
remote.each do |r|
|
101
|
+
if local[r]
|
102
|
+
build_errors(:multiple_matched_rows, :schema, nil, nil, r, context) if local[r].length > 1
|
103
|
+
else
|
104
|
+
build_errors(:unmatched_foreign_key_reference, :schema, nil, nil, r, context)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
return valid?
|
108
|
+
end
|
109
|
+
|
110
|
+
def self.from_json(table_desc, base_url=nil, lang="und", inherited_properties={})
|
111
|
+
annotations = {}
|
112
|
+
warnings = []
|
113
|
+
table_properties = {}
|
114
|
+
columns = []
|
115
|
+
notes = []
|
116
|
+
inherited_properties = inherited_properties.clone
|
117
|
+
|
118
|
+
table_desc.each do |property,value|
|
119
|
+
if property =="@type"
|
120
|
+
raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].@type"), "@type of table is not 'Table'" unless value == 'Table'
|
121
|
+
elsif property == "notes"
|
122
|
+
notes = value
|
123
|
+
else
|
124
|
+
v, warning, type = Csvw::PropertyChecker.check_property(property, value, base_url, lang)
|
125
|
+
warnings += Array(warning).map{ |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "#{property}: #{value}", nil) } unless warning.nil? || warning.empty?
|
126
|
+
if type == :annotation
|
127
|
+
annotations[property] = v
|
128
|
+
elsif type == :table || type == :common
|
129
|
+
table_properties[property] = v
|
130
|
+
elsif type == :column
|
131
|
+
warnings << Csvlint::ErrorMessage.new(:invalid_property, :metadata, nil, nil, "#{property}", nil)
|
132
|
+
else
|
133
|
+
inherited_properties[property] = v
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
table_schema = table_properties["tableSchema"] || inherited_properties["tableSchema"]
|
139
|
+
column_names = []
|
140
|
+
foreign_keys = []
|
141
|
+
primary_key = nil
|
142
|
+
if table_schema
|
143
|
+
raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].tableSchema.columns"), "schema columns is not an array" unless table_schema["columns"].instance_of? Array
|
144
|
+
virtual_columns = false
|
145
|
+
table_schema["columns"].each_with_index do |column_desc,i|
|
146
|
+
if column_desc.instance_of? Hash
|
147
|
+
column = Csvlint::Csvw::Column.from_json(i+1, column_desc, base_url, lang, inherited_properties)
|
148
|
+
raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].tableSchema.columns[#{i}].virtual"), "virtual columns before non-virtual column #{column.name || i}" if virtual_columns && !column.virtual
|
149
|
+
virtual_columns = virtual_columns || column.virtual
|
150
|
+
raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].tableSchema.columns"), "multiple columns named #{column.name}" if column_names.include? column.name
|
151
|
+
column_names << column.name unless column.name.nil?
|
152
|
+
columns << column
|
153
|
+
else
|
154
|
+
warnings << Csvlint::ErrorMessage.new(:invalid_column_description, :metadata, nil, nil, "#{column_desc}", nil)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
primary_key = table_schema["primaryKey"]
|
159
|
+
primary_key_columns = []
|
160
|
+
primary_key_valid = true
|
161
|
+
primary_key.each do |reference|
|
162
|
+
i = column_names.index(reference)
|
163
|
+
if i
|
164
|
+
primary_key_columns << columns[i]
|
165
|
+
else
|
166
|
+
warnings << Csvlint::ErrorMessage.new(:invalid_column_reference, :metadata, nil, nil, "primaryKey: #{reference}", nil)
|
167
|
+
primary_key_valid = false
|
168
|
+
end
|
169
|
+
end if primary_key
|
170
|
+
|
171
|
+
foreign_keys = table_schema["foreignKeys"]
|
172
|
+
foreign_keys.each_with_index do |foreign_key, i|
|
173
|
+
foreign_key_columns = []
|
174
|
+
foreign_key["columnReference"].each do |reference|
|
175
|
+
i = column_names.index(reference)
|
176
|
+
raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].tableSchema.foreignKeys[#{i}].columnReference"), "foreignKey references non-existant column" unless i
|
177
|
+
foreign_key_columns << columns[i]
|
178
|
+
end
|
179
|
+
foreign_key["referencing_columns"] = foreign_key_columns
|
180
|
+
end if foreign_keys
|
181
|
+
|
182
|
+
row_titles = table_schema["rowTitles"]
|
183
|
+
row_titles.each_with_index do |row_title,i|
|
184
|
+
raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_desc["url"]}')].tableSchema.rowTitles[#{i}]"), "rowTitles references non-existant column" unless column_names.include? row_title
|
185
|
+
end if row_titles
|
186
|
+
|
187
|
+
end
|
188
|
+
|
189
|
+
return self.new(table_properties["url"],
|
190
|
+
id: table_properties["@id"],
|
191
|
+
columns: columns,
|
192
|
+
dialect: table_properties["dialect"],
|
193
|
+
foreign_keys: foreign_keys || [],
|
194
|
+
notes: notes,
|
195
|
+
primary_key: primary_key_valid && !primary_key_columns.empty? ? primary_key_columns : nil,
|
196
|
+
schema: table_schema ? table_schema["@id"] : nil,
|
197
|
+
annotations: annotations,
|
198
|
+
warnings: warnings
|
199
|
+
)
|
200
|
+
end
|
201
|
+
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
@@ -0,0 +1,165 @@
|
|
1
|
+
module Csvlint
|
2
|
+
module Csvw
|
3
|
+
class TableGroup
|
4
|
+
|
5
|
+
include Csvlint::ErrorCollector
|
6
|
+
|
7
|
+
attr_reader :url, :id, :tables, :notes, :annotations
|
8
|
+
|
9
|
+
def initialize(url, id: nil, tables: {}, notes: [], annotations: {}, warnings: [])
|
10
|
+
@url = url
|
11
|
+
@id = id
|
12
|
+
@tables = tables
|
13
|
+
@notes = notes
|
14
|
+
@annotations = annotations
|
15
|
+
@validated_tables = {}
|
16
|
+
@tables.each { |t,v| @validated_tables[t] = false }
|
17
|
+
reset
|
18
|
+
@warnings += warnings
|
19
|
+
@errors += @tables.map{|url,table| table.errors}.flatten
|
20
|
+
@warnings += @tables.map{|url,table| table.warnings}.flatten
|
21
|
+
end
|
22
|
+
|
23
|
+
def validate_header(header, table_url)
|
24
|
+
reset
|
25
|
+
table_url = "file:#{File.absolute_path(table_url)}" if table_url.instance_of? File
|
26
|
+
table = tables[table_url]
|
27
|
+
table.validate_header(header)
|
28
|
+
@errors += table.errors
|
29
|
+
@warnings += table.warnings
|
30
|
+
return valid?
|
31
|
+
end
|
32
|
+
|
33
|
+
def validate_row(values, row=nil, all_errors=[], table_url)
|
34
|
+
reset
|
35
|
+
table_url = "file:#{File.absolute_path(table_url)}" if table_url.instance_of? File
|
36
|
+
@validated_tables[table_url] = true
|
37
|
+
table = tables[table_url]
|
38
|
+
table.validate_row(values, row)
|
39
|
+
@errors += table.errors
|
40
|
+
@warnings += table.warnings
|
41
|
+
return valid?
|
42
|
+
end
|
43
|
+
|
44
|
+
def validate_foreign_keys
|
45
|
+
reset
|
46
|
+
unless @validated_tables.has_value?(false)
|
47
|
+
@tables.each do |table_url,table|
|
48
|
+
table.validate_foreign_keys
|
49
|
+
@errors += table.errors
|
50
|
+
@warnings += table.warnings
|
51
|
+
end
|
52
|
+
end
|
53
|
+
return valid?
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.from_json(url, json)
|
57
|
+
warnings = []
|
58
|
+
tables = {}
|
59
|
+
annotations = {}
|
60
|
+
inherited_properties = {}
|
61
|
+
common_properties = {}
|
62
|
+
base_url = URI(url.to_s.strip)
|
63
|
+
lang = "und"
|
64
|
+
|
65
|
+
context = json["@context"]
|
66
|
+
if context.instance_of?(Array) && context[1]
|
67
|
+
context[1].each do |property,value|
|
68
|
+
v, warning, type = Csvw::PropertyChecker.check_property(property, value, base_url, lang)
|
69
|
+
if warning.nil? || warning.empty?
|
70
|
+
if type == :context
|
71
|
+
base_url = v if property == "@base"
|
72
|
+
lang = v if property == "@language"
|
73
|
+
else
|
74
|
+
raise Csvlint::Csvw::MetadataError.new("$.@context"), "@context contains properties other than @base or @language (#{property})"
|
75
|
+
end
|
76
|
+
else
|
77
|
+
raise Csvlint::Csvw::MetadataError.new("$.@context"), "@context contains properties other than @base or @language (#{property})" unless ["@base", "@language"].include?(property)
|
78
|
+
warnings += Array(warning).map{ |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "@context: #{property}: #{value}", nil) }
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
json.delete("@context")
|
83
|
+
|
84
|
+
if json["url"]
|
85
|
+
json = { "tables" => [ json ] }
|
86
|
+
end unless json["tables"]
|
87
|
+
|
88
|
+
json.each do |property,value|
|
89
|
+
unless VALID_PROPERTIES.include? property
|
90
|
+
v, warning, type = Csvw::PropertyChecker.check_property(property, value, base_url, lang)
|
91
|
+
warnings += Array(warning).map{ |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "#{property}: #{value}", nil) } unless warning.nil? || warning.empty?
|
92
|
+
if type == :annotation
|
93
|
+
annotations[property] = v
|
94
|
+
elsif type == :common
|
95
|
+
common_properties[property] = v
|
96
|
+
elsif type == :column
|
97
|
+
warnings << Csvlint::ErrorMessage.new(:invalid_property, :metadata, nil, nil, "#{property}", nil)
|
98
|
+
else
|
99
|
+
inherited_properties[property] = v
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
id = common_properties["@id"]
|
105
|
+
|
106
|
+
raise Csvlint::Csvw::MetadataError.new("$.@type"), "@type of table group is not 'TableGroup'" if json["@type"] && json["@type"] != 'TableGroup'
|
107
|
+
|
108
|
+
raise Csvlint::Csvw::MetadataError.new("$"), "no tables property" unless json["tables"]
|
109
|
+
raise Csvlint::Csvw::MetadataError.new("$.tables"), "empty tables property" if json["tables"].empty?
|
110
|
+
raise Csvlint::Csvw::MetadataError.new("$.tables"), "tables property is not an array" unless json["tables"].instance_of? Array
|
111
|
+
|
112
|
+
json["tables"].each do |table_desc|
|
113
|
+
if table_desc.instance_of? Hash
|
114
|
+
table_url = table_desc["url"]
|
115
|
+
unless table_url.instance_of? String
|
116
|
+
warnings << Csvlint::ErrorMessage.new(:invalid_url, :metadata, nil, nil, "url: #{table_url}", nil)
|
117
|
+
table_url = ""
|
118
|
+
end
|
119
|
+
table_url = URI.join(base_url, table_url).to_s
|
120
|
+
table_desc["url"] = table_url
|
121
|
+
table = Csvlint::Csvw::Table.from_json(table_desc, base_url, lang, inherited_properties)
|
122
|
+
tables[table_url] = table
|
123
|
+
else
|
124
|
+
warnings << Csvlint::ErrorMessage.new(:invalid_table_description, :metadata, nil, nil, "#{table_desc}", nil)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
tables.each do |table_url, table|
|
129
|
+
table.foreign_keys.each_with_index do |foreign_key,i|
|
130
|
+
reference = foreign_key["reference"]
|
131
|
+
if reference["resource"]
|
132
|
+
resource = URI.join(base_url, reference["resource"]).to_s
|
133
|
+
referenced_table = tables[resource]
|
134
|
+
raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_url}')].tableSchema.foreign_keys[#{i}].reference.resource"), "foreign key references table that does not exist (#{resource})" if referenced_table.nil?
|
135
|
+
else
|
136
|
+
schema_url = URI.join(base_url, reference["schemaReference"]).to_s
|
137
|
+
referenced_tables = tables.values.select{ |table| table.schema == schema_url }
|
138
|
+
referenced_table = referenced_tables[0]
|
139
|
+
raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_url}')].tableSchema.foreign_keys[#{i}].reference.schemaReference"), "foreign key references schema that is not used (#{schema_url})" if referenced_table.nil?
|
140
|
+
end
|
141
|
+
foreign_key["referenced_table"] = referenced_table
|
142
|
+
table_columns = {}
|
143
|
+
referenced_table.columns.each do |column|
|
144
|
+
table_columns[column.name] = column if column.name
|
145
|
+
end
|
146
|
+
referenced_columns = []
|
147
|
+
Array(reference["columnReference"]).each do |column_reference|
|
148
|
+
column = table_columns[column_reference]
|
149
|
+
raise Csvlint::Csvw::MetadataError.new("$.tables[?(@.url = '#{table_url}')].tableSchema.foreign_keys[#{i}].reference.columnReference"), "column named #{column_reference} does not exist in #{resource}" if column.nil?
|
150
|
+
referenced_columns << column
|
151
|
+
end
|
152
|
+
foreign_key["referenced_columns"] = referenced_columns
|
153
|
+
referenced_table.foreign_key_references << foreign_key
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
return self.new(base_url, id: id, tables: tables, notes: json["notes"] || [], annotations: annotations, warnings: warnings)
|
158
|
+
end
|
159
|
+
|
160
|
+
private
|
161
|
+
VALID_PROPERTIES = ['tables', 'notes', '@type']
|
162
|
+
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Csvlint
|
2
|
+
module ErrorCollector
|
3
|
+
attr_reader :errors, :warnings, :info_messages
|
4
|
+
# Creates a validation error
|
5
|
+
def build_errors(type, category = nil, row = nil, column = nil, content = nil, constraints = {})
|
6
|
+
@errors << Csvlint::ErrorMessage.new(type, category, row, column, content, constraints)
|
7
|
+
end
|
8
|
+
# Creates a validation warning
|
9
|
+
def build_warnings(type, category = nil, row = nil, column = nil, content = nil, constraints = {})
|
10
|
+
@warnings << Csvlint::ErrorMessage.new(type, category, row, column, content, constraints)
|
11
|
+
end
|
12
|
+
# Creates a validation information message
|
13
|
+
def build_info_messages(type, category = nil, row = nil, column = nil, content = nil, constraints = {})
|
14
|
+
@info_messages << Csvlint::ErrorMessage.new(type, category, row, column, content, constraints)
|
15
|
+
end
|
16
|
+
|
17
|
+
def valid?
|
18
|
+
errors.empty?
|
19
|
+
end
|
20
|
+
|
21
|
+
def reset
|
22
|
+
@errors = []
|
23
|
+
@warnings = []
|
24
|
+
@info_messages = []
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Csvlint
|
2
|
+
class ErrorMessage
|
3
|
+
attr_reader :type, :category, :row, :column, :content, :constraints
|
4
|
+
|
5
|
+
def initialize(type, category, row, column, content, constraints)
|
6
|
+
@type = type
|
7
|
+
@category = category
|
8
|
+
@row = row
|
9
|
+
@column = column
|
10
|
+
@content = content
|
11
|
+
@constraints = constraints
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,196 @@
|
|
1
|
+
module Csvlint
|
2
|
+
|
3
|
+
class Field
|
4
|
+
include Csvlint::ErrorCollector
|
5
|
+
|
6
|
+
attr_reader :name, :constraints, :title, :description
|
7
|
+
|
8
|
+
def initialize(name, constraints={}, title=nil, description=nil)
|
9
|
+
@name = name
|
10
|
+
@constraints = constraints || {}
|
11
|
+
@uniques = Set.new
|
12
|
+
@title = title
|
13
|
+
@description = description
|
14
|
+
reset
|
15
|
+
end
|
16
|
+
|
17
|
+
def validate_column(value, row=nil, column=nil, all_errors=[])
|
18
|
+
reset
|
19
|
+
unless all_errors.any?{|error| ((error.type == :invalid_regex) && (error.column == column))}
|
20
|
+
validate_regex(value, row, column, all_errors)
|
21
|
+
end
|
22
|
+
validate_length(value, row, column)
|
23
|
+
validate_values(value, row, column)
|
24
|
+
parsed = validate_type(value, row, column)
|
25
|
+
validate_range(parsed, row, column) if parsed != nil
|
26
|
+
return valid?
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
def validate_length(value, row, column)
|
31
|
+
if constraints["required"] == true
|
32
|
+
build_errors(:missing_value, :schema, row, column, value,
|
33
|
+
{ "required" => true }) if value.nil? || value.length == 0
|
34
|
+
end
|
35
|
+
if constraints["minLength"]
|
36
|
+
build_errors(:min_length, :schema, row, column, value,
|
37
|
+
{ "minLength" => constraints["minLength"] }) if value.nil? || value.length < constraints["minLength"]
|
38
|
+
end
|
39
|
+
if constraints["maxLength"]
|
40
|
+
build_errors(:max_length, :schema, row, column, value,
|
41
|
+
{ "maxLength" => constraints["maxLength"] } ) if !value.nil? && value.length > constraints["maxLength"]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def validate_regex(value, row, column, all_errors)
|
46
|
+
pattern = constraints["pattern"]
|
47
|
+
if pattern
|
48
|
+
begin
|
49
|
+
Regexp.new(pattern)
|
50
|
+
build_errors(:pattern, :schema, row, column, value,
|
51
|
+
{ "pattern" => constraints["pattern"] } ) if !value.nil? && !value.match( constraints["pattern"] )
|
52
|
+
rescue RegexpError
|
53
|
+
build_regex_error(value, row, column, pattern, all_errors)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def build_regex_error(value, row, column, pattern, all_errors)
|
59
|
+
return if @regex_error_exists
|
60
|
+
build_errors(:invalid_regex, :schema, nil, column, ("#{name}: Constraints: Pattern: #{pattern}"),
|
61
|
+
{ "pattern" => constraints["pattern"] })
|
62
|
+
@regex_error_exists = true
|
63
|
+
end
|
64
|
+
|
65
|
+
def validate_values(value, row, column)
|
66
|
+
# If a pattern exists, raise an invalid regex error if it is not in
|
67
|
+
# valid regex form, else, if the value of the relevant field in the csv
|
68
|
+
# does not match the given regex pattern in the schema, raise a
|
69
|
+
# pattern error.
|
70
|
+
if constraints["unique"] == true
|
71
|
+
if @uniques.include? value
|
72
|
+
build_errors(:unique, :schema, row, column, value, { "unique" => true })
|
73
|
+
else
|
74
|
+
@uniques << value
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def validate_type(value, row, column)
|
80
|
+
if constraints["type"] && value != ""
|
81
|
+
parsed = convert_to_type(value)
|
82
|
+
if parsed == nil
|
83
|
+
failed = { "type" => constraints["type"] }
|
84
|
+
failed["datePattern"] = constraints["datePattern"] if constraints["datePattern"]
|
85
|
+
build_errors(:invalid_type, :schema, row, column, value, failed)
|
86
|
+
return nil
|
87
|
+
end
|
88
|
+
return parsed
|
89
|
+
end
|
90
|
+
return nil
|
91
|
+
end
|
92
|
+
|
93
|
+
def validate_range(value, row, column)
|
94
|
+
#TODO: we're ignoring issues with converting ranges to actual types, maybe we
|
95
|
+
#should generate a warning? The schema is invalid
|
96
|
+
if constraints["minimum"]
|
97
|
+
minimumValue = convert_to_type( constraints["minimum"] )
|
98
|
+
if minimumValue
|
99
|
+
build_errors(:below_minimum, :schema, row, column, value,
|
100
|
+
{ "minimum" => constraints["minimum"] }) unless value >= minimumValue
|
101
|
+
end
|
102
|
+
end
|
103
|
+
if constraints["maximum"]
|
104
|
+
maximumValue = convert_to_type( constraints["maximum"] )
|
105
|
+
if maximumValue
|
106
|
+
build_errors(:above_maximum, :schema, row, column, value,
|
107
|
+
{ "maximum" => constraints["maximum"] }) unless value <= maximumValue
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def convert_to_type(value)
|
113
|
+
parsed = nil
|
114
|
+
tv = TYPE_VALIDATIONS[constraints["type"]]
|
115
|
+
if tv
|
116
|
+
begin
|
117
|
+
parsed = tv.call value, constraints
|
118
|
+
rescue ArgumentError
|
119
|
+
end
|
120
|
+
end
|
121
|
+
return parsed
|
122
|
+
end
|
123
|
+
|
124
|
+
TYPE_VALIDATIONS = {
|
125
|
+
'http://www.w3.org/2001/XMLSchema#string' => lambda { |value, constraints| value },
|
126
|
+
'http://www.w3.org/2001/XMLSchema#int' => lambda { |value, constraints| Integer value },
|
127
|
+
'http://www.w3.org/2001/XMLSchema#integer' => lambda { |value, constraints| Integer value },
|
128
|
+
'http://www.w3.org/2001/XMLSchema#float' => lambda { |value, constraints| Float value },
|
129
|
+
'http://www.w3.org/2001/XMLSchema#double' => lambda { |value, constraints| Float value },
|
130
|
+
'http://www.w3.org/2001/XMLSchema#anyURI' => lambda do |value, constraints|
|
131
|
+
begin
|
132
|
+
u = URI.parse value
|
133
|
+
raise ArgumentError unless u.kind_of?(URI::HTTP) || u.kind_of?(URI::HTTPS)
|
134
|
+
rescue URI::InvalidURIError
|
135
|
+
raise ArgumentError
|
136
|
+
end
|
137
|
+
u
|
138
|
+
end,
|
139
|
+
'http://www.w3.org/2001/XMLSchema#boolean' => lambda do |value, constraints|
|
140
|
+
return true if ['true', '1'].include? value
|
141
|
+
return false if ['false', '0'].include? value
|
142
|
+
raise ArgumentError
|
143
|
+
end,
|
144
|
+
'http://www.w3.org/2001/XMLSchema#nonPositiveInteger' => lambda do |value, constraints|
|
145
|
+
i = Integer value
|
146
|
+
raise ArgumentError unless i <= 0
|
147
|
+
i
|
148
|
+
end,
|
149
|
+
'http://www.w3.org/2001/XMLSchema#negativeInteger' => lambda do |value, constraints|
|
150
|
+
i = Integer value
|
151
|
+
raise ArgumentError unless i < 0
|
152
|
+
i
|
153
|
+
end,
|
154
|
+
'http://www.w3.org/2001/XMLSchema#nonNegativeInteger' => lambda do |value, constraints|
|
155
|
+
i = Integer value
|
156
|
+
raise ArgumentError unless i >= 0
|
157
|
+
i
|
158
|
+
end,
|
159
|
+
'http://www.w3.org/2001/XMLSchema#positiveInteger' => lambda do |value, constraints|
|
160
|
+
i = Integer value
|
161
|
+
raise ArgumentError unless i > 0
|
162
|
+
i
|
163
|
+
end,
|
164
|
+
'http://www.w3.org/2001/XMLSchema#dateTime' => lambda do |value, constraints|
|
165
|
+
date_pattern = constraints["datePattern"] || "%Y-%m-%dT%H:%M:%SZ"
|
166
|
+
d = DateTime.strptime(value, date_pattern)
|
167
|
+
raise ArgumentError unless d.strftime(date_pattern) == value
|
168
|
+
d
|
169
|
+
end,
|
170
|
+
'http://www.w3.org/2001/XMLSchema#date' => lambda do |value, constraints|
|
171
|
+
date_pattern = constraints["datePattern"] || "%Y-%m-%d"
|
172
|
+
d = Date.strptime(value, date_pattern)
|
173
|
+
raise ArgumentError unless d.strftime(date_pattern) == value
|
174
|
+
d
|
175
|
+
end,
|
176
|
+
'http://www.w3.org/2001/XMLSchema#time' => lambda do |value, constraints|
|
177
|
+
date_pattern = constraints["datePattern"] || "%H:%M:%S"
|
178
|
+
d = DateTime.strptime(value, date_pattern)
|
179
|
+
raise ArgumentError unless d.strftime(date_pattern) == value
|
180
|
+
d
|
181
|
+
end,
|
182
|
+
'http://www.w3.org/2001/XMLSchema#gYear' => lambda do |value, constraints|
|
183
|
+
date_pattern = constraints["datePattern"] || "%Y"
|
184
|
+
d = Date.strptime(value, date_pattern)
|
185
|
+
raise ArgumentError unless d.strftime(date_pattern) == value
|
186
|
+
d
|
187
|
+
end,
|
188
|
+
'http://www.w3.org/2001/XMLSchema#gYearMonth' => lambda do |value, constraints|
|
189
|
+
date_pattern = constraints["datePattern"] || "%Y-%m"
|
190
|
+
d = Date.strptime(value, date_pattern)
|
191
|
+
raise ArgumentError unless d.strftime(date_pattern) == value
|
192
|
+
d
|
193
|
+
end,
|
194
|
+
}
|
195
|
+
end
|
196
|
+
end
|