csvlint 0.4.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/dependabot.yml +11 -0
- data/.github/workflows/push.yml +35 -0
- data/.gitignore +1 -0
- data/.ruby-version +1 -1
- data/.standard_todo.yml +43 -0
- data/CHANGELOG.md +38 -0
- data/Dockerfile +16 -0
- data/Gemfile +2 -2
- data/README.md +13 -10
- data/Rakefile +7 -7
- data/bin/create_schema +2 -2
- data/csvlint.gemspec +19 -22
- data/docker_notes_for_windows.txt +20 -0
- data/features/step_definitions/cli_steps.rb +11 -11
- data/features/step_definitions/information_steps.rb +4 -4
- data/features/step_definitions/parse_csv_steps.rb +11 -11
- data/features/step_definitions/schema_validation_steps.rb +10 -10
- data/features/step_definitions/sources_steps.rb +1 -1
- data/features/step_definitions/validation_errors_steps.rb +19 -19
- data/features/step_definitions/validation_info_steps.rb +9 -9
- data/features/step_definitions/validation_warnings_steps.rb +11 -11
- data/features/support/aruba.rb +10 -9
- data/features/support/earl_formatter.rb +39 -39
- data/features/support/env.rb +10 -11
- data/features/support/load_tests.rb +109 -105
- data/features/support/webmock.rb +3 -1
- data/lib/csvlint/cli.rb +136 -142
- data/lib/csvlint/csvw/column.rb +279 -280
- data/lib/csvlint/csvw/date_format.rb +90 -92
- data/lib/csvlint/csvw/metadata_error.rb +1 -3
- data/lib/csvlint/csvw/number_format.rb +40 -32
- data/lib/csvlint/csvw/property_checker.rb +714 -717
- data/lib/csvlint/csvw/table.rb +49 -52
- data/lib/csvlint/csvw/table_group.rb +24 -23
- data/lib/csvlint/error_collector.rb +2 -0
- data/lib/csvlint/error_message.rb +0 -1
- data/lib/csvlint/field.rb +153 -141
- data/lib/csvlint/schema.rb +35 -43
- data/lib/csvlint/validate.rb +173 -151
- data/lib/csvlint/version.rb +1 -1
- data/lib/csvlint.rb +22 -23
- data/spec/csvw/column_spec.rb +15 -16
- data/spec/csvw/date_format_spec.rb +5 -7
- data/spec/csvw/number_format_spec.rb +2 -4
- data/spec/csvw/table_group_spec.rb +103 -105
- data/spec/csvw/table_spec.rb +71 -73
- data/spec/field_spec.rb +116 -121
- data/spec/schema_spec.rb +131 -141
- data/spec/spec_helper.rb +6 -6
- data/spec/validator_spec.rb +167 -203
- metadata +41 -85
- data/.travis.yml +0 -37
data/lib/csvlint/cli.rb
CHANGED
@@ -1,14 +1,13 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
1
|
+
require "csvlint"
|
2
|
+
require "rainbow"
|
3
|
+
require "active_support/json"
|
4
|
+
require "json"
|
5
|
+
require "thor"
|
6
6
|
|
7
|
-
require
|
7
|
+
require "active_support/inflector"
|
8
8
|
|
9
9
|
module Csvlint
|
10
10
|
class Cli < Thor
|
11
|
-
|
12
11
|
desc "myfile.csv OR csvlint http://example.com/myfile.csv", "Supports validating CSV files to check their syntax and contents"
|
13
12
|
|
14
13
|
option :dump_errors, desc: "Pretty print error and warning objects.", type: :boolean, aliases: :d
|
@@ -21,6 +20,8 @@ module Csvlint
|
|
21
20
|
@schema = get_schema(options[:schema]) if options[:schema]
|
22
21
|
fetch_schema_tables(@schema, options) if source.nil?
|
23
22
|
|
23
|
+
Rainbow.enabled = $stdout.tty?
|
24
|
+
|
24
25
|
valid = validate_csv(source, @schema, options[:dump_errors], options[:json], options[:werror])
|
25
26
|
exit 1 unless valid
|
26
27
|
end
|
@@ -33,174 +34,167 @@ module Csvlint
|
|
33
34
|
|
34
35
|
private
|
35
36
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
37
|
+
def read_source(source)
|
38
|
+
if source.nil?
|
39
|
+
# If no source is present, try reading from stdin
|
40
|
+
if !$stdin.tty?
|
41
|
+
source = begin
|
42
|
+
StringIO.new($stdin.read)
|
43
|
+
rescue
|
44
|
+
nil
|
42
45
|
end
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
46
|
+
return_error "No CSV data to validate" if !options[:schema] && source.nil?
|
47
|
+
end
|
48
|
+
else
|
49
|
+
# If the source isn't a URL, it's a file
|
50
|
+
unless /^http(s)?/.match?(source)
|
51
|
+
begin
|
52
|
+
source = File.new(source)
|
53
|
+
rescue Errno::ENOENT
|
54
|
+
return_error "#{source} not found"
|
51
55
|
end
|
52
56
|
end
|
53
|
-
|
54
|
-
source
|
55
57
|
end
|
56
58
|
|
57
|
-
|
58
|
-
|
59
|
-
schema = Csvlint::Schema.load_from_json(schema, false)
|
60
|
-
rescue Csvlint::Csvw::MetadataError => e
|
61
|
-
return_error "invalid metadata: #{e.message}#{" at " + e.path if e.path}"
|
62
|
-
rescue OpenURI::HTTPError, Errno::ENOENT
|
63
|
-
return_error "#{options[:schema]} not found"
|
64
|
-
end
|
59
|
+
source
|
60
|
+
end
|
65
61
|
|
66
|
-
|
67
|
-
|
68
|
-
|
62
|
+
def get_schema(schema)
|
63
|
+
begin
|
64
|
+
schema = Csvlint::Schema.load_from_uri(schema, false)
|
65
|
+
rescue Csvlint::Csvw::MetadataError => e
|
66
|
+
return_error "invalid metadata: #{e.message}#{" at " + e.path if e.path}"
|
67
|
+
rescue OpenURI::HTTPError, Errno::ENOENT
|
68
|
+
return_error "#{options[:schema]} not found"
|
69
|
+
end
|
69
70
|
|
70
|
-
|
71
|
+
if schema.instance_of?(Csvlint::Schema) && schema.description == "malformed"
|
72
|
+
return_error "invalid metadata: malformed JSON"
|
71
73
|
end
|
72
74
|
|
73
|
-
|
74
|
-
|
75
|
+
schema
|
76
|
+
end
|
75
77
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
78
|
+
def fetch_schema_tables(schema, options)
|
79
|
+
valid = true
|
80
|
+
|
81
|
+
unless schema.instance_of? Csvlint::Csvw::TableGroup
|
82
|
+
return_error "No CSV data to validate."
|
83
|
+
end
|
84
|
+
schema.tables.keys.each do |source|
|
85
|
+
unless /^http(s)?/.match?(source)
|
80
86
|
begin
|
81
|
-
source = source.sub("file:","")
|
82
|
-
source = File.new(
|
87
|
+
source = source.sub("file:", "")
|
88
|
+
source = File.new(source)
|
83
89
|
rescue Errno::ENOENT
|
84
90
|
return_error "#{source} not found"
|
85
|
-
end unless source =~ /^http(s)?/
|
86
|
-
valid &= validate_csv(source, schema, options[:dump_errors], nil, options[:werror])
|
87
|
-
end
|
88
|
-
|
89
|
-
exit 1 unless valid
|
90
|
-
end
|
91
|
-
|
92
|
-
def print_error(index, error, dump, color)
|
93
|
-
location = ""
|
94
|
-
location += error.row.to_s if error.row
|
95
|
-
location += "#{error.row ? "," : ""}#{error.column.to_s}" if error.column
|
96
|
-
if error.row || error.column
|
97
|
-
location = "#{error.row ? "Row" : "Column"}: #{location}"
|
98
|
-
end
|
99
|
-
output_string = "#{index+1}. "
|
100
|
-
if error.column && @schema && @schema.class == Csvlint::Schema
|
101
|
-
if @schema.fields[error.column - 1] != nil
|
102
|
-
output_string += "#{@schema.fields[error.column - 1].name}: "
|
103
91
|
end
|
104
92
|
end
|
105
|
-
|
106
|
-
|
107
|
-
output_string += ". #{error.content}" if error.content
|
93
|
+
valid &= validate_csv(source, schema, options[:dump_errors], nil, options[:werror])
|
94
|
+
end
|
108
95
|
|
109
|
-
|
110
|
-
|
111
|
-
else
|
112
|
-
puts output_string
|
113
|
-
end
|
96
|
+
exit 1 unless valid
|
97
|
+
end
|
114
98
|
|
115
|
-
|
116
|
-
|
99
|
+
def print_error(index, error, dump, color)
|
100
|
+
location = ""
|
101
|
+
location += error.row.to_s if error.row
|
102
|
+
location += "#{error.row ? "," : ""}#{error.column}" if error.column
|
103
|
+
if error.row || error.column
|
104
|
+
location = "#{error.row ? "Row" : "Column"}: #{location}"
|
105
|
+
end
|
106
|
+
output_string = "#{index + 1}. "
|
107
|
+
if error.column && @schema && @schema.instance_of?(Csvlint::Schema)
|
108
|
+
if @schema.fields[error.column - 1] != nil
|
109
|
+
output_string += "#{@schema.fields[error.column - 1].name}: "
|
117
110
|
end
|
118
111
|
end
|
112
|
+
output_string += error.type.to_s
|
113
|
+
output_string += ". #{location}" unless location.empty?
|
114
|
+
output_string += ". #{error.content}" if error.content
|
119
115
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
116
|
+
puts Rainbow(output_string).color(color)
|
117
|
+
|
118
|
+
if dump
|
119
|
+
pp error
|
124
120
|
end
|
121
|
+
end
|
125
122
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
else
|
130
|
-
puts message
|
131
|
-
end
|
132
|
-
exit 1
|
123
|
+
def print_errors(errors, dump)
|
124
|
+
if errors.size > 0
|
125
|
+
errors.each_with_index { |error, i| print_error(i, error, dump, :red) }
|
133
126
|
end
|
127
|
+
end
|
134
128
|
|
135
|
-
|
136
|
-
|
129
|
+
def return_error(message)
|
130
|
+
puts Rainbow(message).red
|
131
|
+
exit 1
|
132
|
+
end
|
137
133
|
|
138
|
-
|
139
|
-
|
140
|
-
else
|
141
|
-
validator = Csvlint::Validator.new( source, {}, schema, { lambda: report_lines } )
|
142
|
-
end
|
134
|
+
def validate_csv(source, schema, dump, json, werror)
|
135
|
+
@error_count = 0
|
143
136
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
csv = "CSV"
|
150
|
-
end
|
137
|
+
validator = if json === true
|
138
|
+
Csvlint::Validator.new(source, {}, schema)
|
139
|
+
else
|
140
|
+
Csvlint::Validator.new(source, {}, schema, {lambda: report_lines})
|
141
|
+
end
|
151
142
|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
info: validator.info_messages.map { |v| hashify(v) },
|
159
|
-
}
|
160
|
-
}.to_json
|
161
|
-
print json
|
162
|
-
else
|
163
|
-
if $stdout.tty?
|
164
|
-
puts "\r\n#{csv} is #{validator.valid? ? "VALID".green : "INVALID".red}"
|
165
|
-
else
|
166
|
-
puts "\r\n#{csv} is #{validator.valid? ? "VALID" : "INVALID"}"
|
167
|
-
end
|
168
|
-
print_errors(validator.errors, dump)
|
169
|
-
print_errors(validator.warnings, dump)
|
170
|
-
end
|
171
|
-
|
172
|
-
return false if werror && validator.warnings.size > 0
|
173
|
-
return validator.valid?
|
143
|
+
csv = if source.instance_of?(String)
|
144
|
+
source
|
145
|
+
elsif source.instance_of?(File)
|
146
|
+
source.path
|
147
|
+
else
|
148
|
+
"CSV"
|
174
149
|
end
|
175
150
|
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
151
|
+
if json === true
|
152
|
+
json = {
|
153
|
+
validation: {
|
154
|
+
state: validator.valid? ? "valid" : "invalid",
|
155
|
+
errors: validator.errors.map { |v| hashify(v) },
|
156
|
+
warnings: validator.warnings.map { |v| hashify(v) },
|
157
|
+
info: validator.info_messages.map { |v| hashify(v) }
|
158
|
+
}
|
159
|
+
}.to_json
|
160
|
+
print json
|
161
|
+
else
|
162
|
+
puts "\r\n#{csv} is #{validator.valid? ? Rainbow("VALID").green : Rainbow("INVALID").red}"
|
163
|
+
print_errors(validator.errors, dump)
|
164
|
+
print_errors(validator.warnings, dump)
|
165
|
+
end
|
166
|
+
|
167
|
+
return false if werror && validator.warnings.size > 0
|
168
|
+
validator.valid?
|
169
|
+
end
|
189
170
|
|
190
|
-
|
171
|
+
def hashify(error)
|
172
|
+
h = {
|
173
|
+
type: error.type,
|
174
|
+
category: error.category,
|
175
|
+
row: error.row,
|
176
|
+
col: error.column
|
177
|
+
}
|
178
|
+
|
179
|
+
if error.column && @schema && @schema.instance_of?(Csvlint::Schema) && @schema.fields[error.column - 1] != nil
|
180
|
+
field = @schema.fields[error.column - 1]
|
181
|
+
h[:header] = field.name
|
182
|
+
h[:constraints] = field.constraints.map { |k, v| [k.underscore, v] }.to_h
|
191
183
|
end
|
192
184
|
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
185
|
+
h
|
186
|
+
end
|
187
|
+
|
188
|
+
def report_lines
|
189
|
+
lambda do |row|
|
190
|
+
new_errors = row.errors.count
|
191
|
+
if new_errors > @error_count
|
192
|
+
print Rainbow("!").red
|
193
|
+
else
|
194
|
+
print Rainbow(".").green
|
202
195
|
end
|
196
|
+
@error_count = new_errors
|
203
197
|
end
|
204
|
-
|
198
|
+
end
|
205
199
|
end
|
206
200
|
end
|