csvlint 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/CHANGELOG.md +25 -1
- data/README.md +21 -14
- data/bin/csvlint +4 -155
- data/csvlint.gemspec +3 -0
- data/features/cli.feature +207 -0
- data/features/step_definitions/cli_steps.rb +7 -0
- data/features/support/aruba.rb +56 -0
- data/features/support/env.rb +2 -1
- data/lib/csvlint/cli.rb +165 -0
- data/lib/csvlint/schema.rb +7 -5
- data/lib/csvlint/version.rb +1 -1
- metadata +37 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MmYzODNiNTQyMTQ0OTNkZTQ1NGQwMzkwNmRkYWMxODNiNTIwNjNhMA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
NzNhZDQ1NmY1Njc5MDMyZjVlYTNiOTc4Y2YxZGRlNjVmNjJiMDFlYw==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MTVlYjFlN2YxYzViNzEyOTg0MTk4ZTgzY2Q3Mzk2ZGE0NTllNjFlZjE0ODg4
|
10
|
+
ZTUwZTdjNTA0MWNmOTUyMjBjZjVmNGMyYjc1MmUxYjllMDIwYTVjYTg5ZTVm
|
11
|
+
NmEyMjAzZGVhMzI2NGUwYTI2YzAzZjcyODViYmEyYzVlNWMyZmE=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZTU5ZjQzNDZkMWVhZTBkODAwNGFmMzM5ODZhMmMzODQ3ZTg3YzRiMGM2YzEz
|
14
|
+
MGE4M2VjZTIzODgwODM5MTY4MjEwM2M0N2NkZjIwY2VmNTk3Y2RmMWNmMTBl
|
15
|
+
NGM4MzU1MzVhOWQyMWEzM2JkZWQ1ODlkMzgyYTZlNTE3ZWI5MTc=
|
data/CHANGELOG.md
CHANGED
@@ -2,7 +2,31 @@
|
|
2
2
|
|
3
3
|
## [Unreleased](https://github.com/theodi/csvlint.rb/tree/HEAD)
|
4
4
|
|
5
|
-
[Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.2.
|
5
|
+
[Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.2.3...HEAD)
|
6
|
+
|
7
|
+
**Merged pull requests:**
|
8
|
+
|
9
|
+
- Fixes for CLI [\#164](https://github.com/theodi/csvlint.rb/pull/164) ([pezholio](https://github.com/pezholio))
|
10
|
+
|
11
|
+
## [0.2.3](https://github.com/theodi/csvlint.rb/tree/0.2.3) (2015-10-20)
|
12
|
+
|
13
|
+
[Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.2.2...0.2.3)
|
14
|
+
|
15
|
+
**Closed issues:**
|
16
|
+
|
17
|
+
- Include field name with error [\#161](https://github.com/theodi/csvlint.rb/issues/161)
|
18
|
+
|
19
|
+
- Refactor the binary [\#150](https://github.com/theodi/csvlint.rb/issues/150)
|
20
|
+
|
21
|
+
**Merged pull requests:**
|
22
|
+
|
23
|
+
- Refactor CLI [\#163](https://github.com/theodi/csvlint.rb/pull/163) ([pezholio](https://github.com/pezholio))
|
24
|
+
|
25
|
+
- Update schema file example to clarify type [\#162](https://github.com/theodi/csvlint.rb/pull/162) ([wachunga](https://github.com/wachunga))
|
26
|
+
|
27
|
+
## [0.2.2](https://github.com/theodi/csvlint.rb/tree/0.2.2) (2015-10-09)
|
28
|
+
|
29
|
+
[Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.2.1...0.2.2)
|
6
30
|
|
7
31
|
**Closed issues:**
|
8
32
|
|
data/README.md
CHANGED
@@ -162,20 +162,26 @@ An example JSON Table Schema schema file is:
|
|
162
162
|
"fields": [
|
163
163
|
{
|
164
164
|
"name": "id",
|
165
|
-
|
165
|
+
"constraints": {
|
166
|
+
"required": true,
|
167
|
+
"type": "http://www.w3.org/TR/xmlschema-2/#integer"
|
168
|
+
}
|
169
|
+
},
|
170
|
+
{
|
171
|
+
"name": "price",
|
172
|
+
"constraints": {
|
173
|
+
"required": true,
|
174
|
+
"minLength": 1
|
175
|
+
}
|
166
176
|
},
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
"pattern": "[A-Z]{1,2}[0-9][0-9A-Z]? ?[0-9][A-Z]{2}"
|
176
|
-
}
|
177
|
-
}
|
178
|
-
]
|
177
|
+
{
|
178
|
+
"name": "postcode",
|
179
|
+
"constraints": {
|
180
|
+
"required": true,
|
181
|
+
"pattern": "[A-Z]{1,2}[0-9][0-9A-Z]? ?[0-9][A-Z]{2}"
|
182
|
+
}
|
183
|
+
}
|
184
|
+
]
|
179
185
|
}
|
180
186
|
|
181
187
|
An equivalent CSV on the Web Metadata file is:
|
@@ -187,7 +193,8 @@ An equivalent CSV on the Web Metadata file is:
|
|
187
193
|
"columns": [
|
188
194
|
{
|
189
195
|
"name": "id",
|
190
|
-
"required": true
|
196
|
+
"required": true,
|
197
|
+
"datatype": { "base": "integer" }
|
191
198
|
},
|
192
199
|
{
|
193
200
|
"name": "price",
|
data/bin/csvlint
CHANGED
@@ -1,161 +1,10 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
$:.unshift File.join( File.dirname(__FILE__), "..", "lib")
|
3
3
|
|
4
|
-
require 'csvlint'
|
5
|
-
require 'colorize'
|
6
|
-
require 'json'
|
7
|
-
require 'optparse'
|
8
|
-
require 'pp'
|
4
|
+
require 'csvlint/cli'
|
9
5
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
opts.banner = "Usage: csvlint [options] [file]"
|
14
|
-
|
15
|
-
opts.on("-d", "--dump-errors", "Pretty print error and warning objects.") do |d|
|
16
|
-
options[:dump] = d
|
17
|
-
end
|
18
|
-
|
19
|
-
opts.on("-s", "--schema FILENAME", "Schema file") do |s|
|
20
|
-
options[:schema] = s
|
21
|
-
end
|
22
|
-
|
23
|
-
opts.on_tail("-h", "--help",
|
24
|
-
"Show this message") do
|
25
|
-
puts opts
|
26
|
-
exit
|
27
|
-
end
|
28
|
-
|
29
|
-
begin
|
30
|
-
opts.parse!
|
31
|
-
rescue OptionParser::InvalidOption => e
|
32
|
-
puts e
|
33
|
-
puts opts
|
34
|
-
exit(1)
|
35
|
-
end
|
36
|
-
|
37
|
-
def print_error(index, error, dump, color)
|
38
|
-
location = ""
|
39
|
-
location += error.row.to_s if error.row
|
40
|
-
location += "#{error.row ? "," : ""}#{error.column.to_s}" if error.column
|
41
|
-
if error.row || error.column
|
42
|
-
location = "#{error.row ? "Row" : "Column"}: #{location}"
|
43
|
-
end
|
44
|
-
output_string = "#{index+1}. #{error.type}"
|
45
|
-
output_string += ". #{location}" unless location.empty?
|
46
|
-
output_string += ". #{error.content}" if error.content
|
47
|
-
|
48
|
-
if $stdout.tty?
|
49
|
-
puts output_string.colorize(color)
|
50
|
-
else
|
51
|
-
puts output_string
|
52
|
-
end
|
53
|
-
|
54
|
-
if dump
|
55
|
-
pp error
|
56
|
-
end
|
57
|
-
|
58
|
-
end
|
59
|
-
|
60
|
-
def validate_csv(source, schema, dump)
|
61
|
-
@error_count = 0
|
62
|
-
report_lines = lambda do |row|
|
63
|
-
new_errors = row.errors.count
|
64
|
-
if new_errors > @error_count
|
65
|
-
print "!".red
|
66
|
-
else
|
67
|
-
print ".".green
|
68
|
-
end
|
69
|
-
@error_count = new_errors
|
70
|
-
end
|
71
|
-
validator = Csvlint::Validator.new( source, {}, schema, { lambda: report_lines } )
|
72
|
-
|
73
|
-
if $stdout.tty?
|
74
|
-
puts "\r\n#{source.path || source || "CSV"} is #{validator.valid? ? "VALID".green : "INVALID".red}"
|
75
|
-
else
|
76
|
-
puts "\r\n#{source.path || source || "CSV"} is #{validator.valid? ? "VALID" : "INVALID"}"
|
77
|
-
end
|
78
|
-
|
79
|
-
if validator.errors.size > 0
|
80
|
-
validator.errors.each_with_index do |error, i|
|
81
|
-
print_error(i, error, dump, :red)
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
if validator.warnings.size > 0
|
86
|
-
validator.warnings.each_with_index do |error, i|
|
87
|
-
print_error(i, error, dump, :yellow)
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
return validator.valid?
|
92
|
-
end
|
93
|
-
|
94
|
-
if ARGV.length == 0 && !$stdin.tty?
|
95
|
-
source = StringIO.new(ARGF.read)
|
6
|
+
if ARGV == ["help"]
|
7
|
+
Csvlint::Cli.start(["help"])
|
96
8
|
else
|
97
|
-
|
98
|
-
source = ARGV[0]
|
99
|
-
unless source =~ /^http(s)?/
|
100
|
-
begin
|
101
|
-
source = File.new( source ) unless source =~ /^http(s)?/
|
102
|
-
rescue Errno::ENOENT
|
103
|
-
puts "#{source} not found"
|
104
|
-
exit 1
|
105
|
-
end
|
106
|
-
end
|
107
|
-
elsif !options[:schema]
|
108
|
-
puts "No CSV data to validate."
|
109
|
-
puts opts
|
110
|
-
exit 1
|
111
|
-
end
|
9
|
+
Csvlint::Cli.start(ARGV.unshift("validate"))
|
112
10
|
end
|
113
|
-
|
114
|
-
schema = nil
|
115
|
-
if options[:schema]
|
116
|
-
begin
|
117
|
-
schema = Csvlint::Schema.load_from_json(options[:schema])
|
118
|
-
rescue JSON::ParserError => e
|
119
|
-
output_string = "invalid metadata: malformed JSON"
|
120
|
-
if $stdout.tty?
|
121
|
-
puts output_string.colorize(:red)
|
122
|
-
else
|
123
|
-
puts output_string
|
124
|
-
end
|
125
|
-
exit 1
|
126
|
-
rescue Csvlint::Csvw::MetadataError => e
|
127
|
-
output_string = "invalid metadata: #{e.message}#{" at " + e.path if e.path}"
|
128
|
-
if $stdout.tty?
|
129
|
-
puts output_string.colorize(:red)
|
130
|
-
else
|
131
|
-
puts output_string
|
132
|
-
end
|
133
|
-
exit 1
|
134
|
-
rescue Errno::ENOENT
|
135
|
-
puts "#{options[:schema]} not found"
|
136
|
-
exit 1
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
valid = true
|
141
|
-
if source.nil?
|
142
|
-
unless schema.instance_of? Csvlint::Csvw::TableGroup
|
143
|
-
puts "No CSV data to validate."
|
144
|
-
puts opts
|
145
|
-
exit 1
|
146
|
-
end
|
147
|
-
schema.tables.keys.each do |source|
|
148
|
-
begin
|
149
|
-
source = source.sub("file:","")
|
150
|
-
source = File.new( source )
|
151
|
-
rescue Errno::ENOENT
|
152
|
-
puts "#{source} not found"
|
153
|
-
exit 1
|
154
|
-
end unless source =~ /^http(s)?/
|
155
|
-
valid &= validate_csv(source, schema, options[:dump])
|
156
|
-
end
|
157
|
-
else
|
158
|
-
valid = validate_csv(source, schema, options[:dump])
|
159
|
-
end
|
160
|
-
|
161
|
-
exit 1 unless valid
|
data/csvlint.gemspec
CHANGED
@@ -26,6 +26,7 @@ Gem::Specification.new do |spec|
|
|
26
26
|
spec.add_dependency "typhoeus"
|
27
27
|
spec.add_dependency "escape_utils"
|
28
28
|
spec.add_dependency "uri_template"
|
29
|
+
spec.add_dependency "thor"
|
29
30
|
|
30
31
|
spec.add_development_dependency "bundler", "~> 1.3"
|
31
32
|
spec.add_development_dependency "rake"
|
@@ -40,4 +41,6 @@ Gem::Specification.new do |spec|
|
|
40
41
|
spec.add_development_dependency "coveralls"
|
41
42
|
spec.add_development_dependency "pry"
|
42
43
|
spec.add_development_dependency "github_changelog_generator"
|
44
|
+
spec.add_development_dependency "aruba"
|
45
|
+
|
43
46
|
end
|
@@ -0,0 +1,207 @@
|
|
1
|
+
Feature: CSVlint CLI
|
2
|
+
|
3
|
+
Scenario: Valid CSV from url
|
4
|
+
Given I have a CSV with the following content:
|
5
|
+
"""
|
6
|
+
"Foo","Bar","Baz"
|
7
|
+
"1","2","3"
|
8
|
+
"3","2","1"
|
9
|
+
"""
|
10
|
+
And it is stored at the url "http://example.com/example1.csv"
|
11
|
+
When I run `csvlint http://example.com/example1.csv`
|
12
|
+
Then the output should contain "http://example.com/example1.csv is VALID"
|
13
|
+
|
14
|
+
Scenario: Valid CSV from file
|
15
|
+
When I run `csvlint ../../features/fixtures/valid.csv`
|
16
|
+
Then the output should contain "valid.csv is VALID"
|
17
|
+
|
18
|
+
# This is a hacky way of saying to run `cat features/fixtures/valid.csv | csvlint`
|
19
|
+
Scenario: Valid CSV from pipe
|
20
|
+
Given I have stubbed ARGF to contain "features/fixtures/valid.csv"
|
21
|
+
When I run `csvlint`
|
22
|
+
Then the output should contain "CSV is VALID"
|
23
|
+
|
24
|
+
Scenario: URL that 404s
|
25
|
+
Given there is no file at the url "http://example.com/example1.csv"
|
26
|
+
And there is no file at the url "http://example.com/.well-known/csvm"
|
27
|
+
And there is no file at the url "http://example.com/example1.csv-metadata.json"
|
28
|
+
And there is no file at the url "http://example.com/csv-metadata.json"
|
29
|
+
When I run `csvlint http://example.com/example1.csv`
|
30
|
+
Then the output should contain "http://example.com/example1.csv is INVALID"
|
31
|
+
And the output should contain "not_found"
|
32
|
+
|
33
|
+
Scenario: File doesn't exist
|
34
|
+
When I run `csvlint ../../features/fixtures/non-existent-file.csv`
|
35
|
+
Then the output should contain "non-existent-file.csv not found"
|
36
|
+
|
37
|
+
Scenario: No file or URL specified
|
38
|
+
When I run `csvlint`
|
39
|
+
Then the output should contain "No CSV data to validate"
|
40
|
+
|
41
|
+
Scenario: No file or URL specified, but schema specified
|
42
|
+
Given I have a schema with the following content:
|
43
|
+
"""
|
44
|
+
{
|
45
|
+
"fields": [
|
46
|
+
{ "name": "Name", "constraints": { "required": true } },
|
47
|
+
{ "name": "Id", "constraints": { "required": true, "minLength": 1 } },
|
48
|
+
{ "name": "Email", "constraints": { "required": true } }
|
49
|
+
]
|
50
|
+
}
|
51
|
+
"""
|
52
|
+
And the schema is stored at the url "http://example.com/schema.json"
|
53
|
+
When I run `csvlint --schema http://example.com/schema.json`
|
54
|
+
Then the output should contain "No CSV data to validate"
|
55
|
+
|
56
|
+
Scenario: Invalid CSV from url
|
57
|
+
Given I have a CSV with the following content:
|
58
|
+
"""
|
59
|
+
"Foo", "Bar" , "Baz"
|
60
|
+
"""
|
61
|
+
And it is stored at the url "http://example.com/example1.csv"
|
62
|
+
When I run `csvlint http://example.com/example1.csv`
|
63
|
+
Then the output should contain "http://example.com/example1.csv is INVALID"
|
64
|
+
And the output should contain "whitespace"
|
65
|
+
|
66
|
+
Scenario: Specify schema
|
67
|
+
Given I have a CSV with the following content:
|
68
|
+
"""
|
69
|
+
"Bob","1234","bob@example.org"
|
70
|
+
"Alice","5","alice@example.com"
|
71
|
+
"""
|
72
|
+
And it is stored at the url "http://example.com/example1.csv"
|
73
|
+
And I have a schema with the following content:
|
74
|
+
"""
|
75
|
+
{
|
76
|
+
"fields": [
|
77
|
+
{ "name": "Name", "constraints": { "required": true } },
|
78
|
+
{ "name": "Id", "constraints": { "required": true, "minLength": 1 } },
|
79
|
+
{ "name": "Email", "constraints": { "required": true } }
|
80
|
+
]
|
81
|
+
}
|
82
|
+
"""
|
83
|
+
And the schema is stored at the url "http://example.com/schema.json"
|
84
|
+
When I run `csvlint http://example.com/example1.csv --schema http://example.com/schema.json`
|
85
|
+
Then the output should contain "http://example.com/example1.csv is VALID"
|
86
|
+
|
87
|
+
Scenario: Schema errors
|
88
|
+
Given I have a CSV with the following content:
|
89
|
+
"""
|
90
|
+
"Bob","1234","bob@example.org"
|
91
|
+
"Alice","5","alice@example.com"
|
92
|
+
"""
|
93
|
+
And it is stored at the url "http://example.com/example1.csv"
|
94
|
+
And I have a schema with the following content:
|
95
|
+
"""
|
96
|
+
{
|
97
|
+
"fields": [
|
98
|
+
{ "name": "Name", "constraints": { "required": true } },
|
99
|
+
{ "name": "Id", "constraints": { "required": true, "minLength": 3 } },
|
100
|
+
{ "name": "Email", "constraints": { "required": true } }
|
101
|
+
]
|
102
|
+
}
|
103
|
+
"""
|
104
|
+
And the schema is stored at the url "http://example.com/schema.json"
|
105
|
+
When I run `csvlint http://example.com/example1.csv --schema http://example.com/schema.json`
|
106
|
+
Then the output should contain "http://example.com/example1.csv is INVALID"
|
107
|
+
And the output should contain "1. Id: min_length. Row: 2,2. 5"
|
108
|
+
And the output should contain "1. malformed_header. Row: 1. Bob,1234,bob@example.org"
|
109
|
+
|
110
|
+
Scenario: Invalid schema
|
111
|
+
Given I have a CSV with the following content:
|
112
|
+
"""
|
113
|
+
"Bob","1234","bob@example.org"
|
114
|
+
"Alice","5","alice@example.com"
|
115
|
+
"""
|
116
|
+
And it is stored at the url "http://example.com/example1.csv"
|
117
|
+
And I have a schema with the following content:
|
118
|
+
"""
|
119
|
+
NO JSON HERE SON
|
120
|
+
"""
|
121
|
+
And the schema is stored at the url "http://example.com/schema.json"
|
122
|
+
Then nothing should be outputted to STDERR
|
123
|
+
When I run `csvlint http://example.com/example1.csv --schema http://example.com/schema.json`
|
124
|
+
And the output should contain "invalid metadata: malformed JSON"
|
125
|
+
|
126
|
+
Scenario: Schema that 404s
|
127
|
+
Given I have a CSV with the following content:
|
128
|
+
"""
|
129
|
+
"Bob","1234","bob@example.org"
|
130
|
+
"Alice","5","alice@example.com"
|
131
|
+
"""
|
132
|
+
And it is stored at the url "http://example.com/example1.csv"
|
133
|
+
And there is no file at the url "http://example.com/schema404.json"
|
134
|
+
When I run `csvlint http://example.com/example1.csv --schema http://example.com/schema404.json`
|
135
|
+
Then the output should contain "http://example.com/schema404.json not found"
|
136
|
+
|
137
|
+
Scenario: Schema that doesn't exist
|
138
|
+
Given I have a CSV with the following content:
|
139
|
+
"""
|
140
|
+
"Bob","1234","bob@example.org"
|
141
|
+
"Alice","5","alice@example.com"
|
142
|
+
"""
|
143
|
+
And it is stored at the url "http://example.com/example1.csv"
|
144
|
+
When I run `csvlint http://example.com/example1.csv --schema /fake/file/path.json`
|
145
|
+
Then the output should contain "/fake/file/path.json not found"
|
146
|
+
|
147
|
+
Scenario: Valid CSVw schema
|
148
|
+
Given I have a CSV with the following content:
|
149
|
+
"""
|
150
|
+
"Bob","1234","bob@example.org"
|
151
|
+
"Alice","5","alice@example.com"
|
152
|
+
"""
|
153
|
+
And it is stored at the url "http://example.com/example1.csv"
|
154
|
+
And I have metadata with the following content:
|
155
|
+
"""
|
156
|
+
{
|
157
|
+
"@context": "http://www.w3.org/ns/csvw",
|
158
|
+
"url": "http://example.com/example1.csv",
|
159
|
+
"dialect": { "header": false },
|
160
|
+
"tableSchema": {
|
161
|
+
"columns": [
|
162
|
+
{ "name": "Name", "required": true },
|
163
|
+
{ "name": "Id", "required": true, "datatype": { "base": "string", "minLength": 1 } },
|
164
|
+
{ "name": "Email", "required": true }
|
165
|
+
]
|
166
|
+
}
|
167
|
+
}
|
168
|
+
"""
|
169
|
+
And the schema is stored at the url "http://example.com/schema.json"
|
170
|
+
When I run `csvlint http://example.com/example1.csv --schema http://example.com/schema.json`
|
171
|
+
Then the output should contain "http://example.com/example1.csv is VALID"
|
172
|
+
|
173
|
+
Scenario: CSVw schema with invalid CSV
|
174
|
+
Given I have a CSV with the following content:
|
175
|
+
"""
|
176
|
+
"Bob","1234","bob@example.org"
|
177
|
+
"Alice","5","alice@example.com"
|
178
|
+
"""
|
179
|
+
And it is stored at the url "http://example.com/example1.csv"
|
180
|
+
And I have metadata with the following content:
|
181
|
+
"""
|
182
|
+
{
|
183
|
+
"@context": "http://www.w3.org/ns/csvw",
|
184
|
+
"url": "http://example.com/example1.csv",
|
185
|
+
"dialect": { "header": false },
|
186
|
+
"tableSchema": {
|
187
|
+
"columns": [
|
188
|
+
{ "name": "Name", "required": true },
|
189
|
+
{ "name": "Id", "required": true, "datatype": { "base": "string", "minLength": 3 } },
|
190
|
+
{ "name": "Email", "required": true }
|
191
|
+
]
|
192
|
+
}
|
193
|
+
}
|
194
|
+
"""
|
195
|
+
And the schema is stored at the url "http://example.com/schema.json"
|
196
|
+
When I run `csvlint http://example.com/example1.csv --schema http://example.com/schema.json`
|
197
|
+
Then the output should contain "http://example.com/example1.csv is INVALID"
|
198
|
+
And the output should contain "1. min_length. Row: 2,2. 5"
|
199
|
+
|
200
|
+
Scenario: CSVw table Schema
|
201
|
+
Given I have a metadata file called "csvw/countries.json"
|
202
|
+
And the metadata is stored at the url "http://w3c.github.io/csvw/tests/countries.json"
|
203
|
+
And I have a file called "csvw/countries.csv" at the url "http://w3c.github.io/csvw/tests/countries.csv"
|
204
|
+
And I have a file called "csvw/country_slice.csv" at the url "http://w3c.github.io/csvw/tests/country_slice.csv"
|
205
|
+
When I run `csvlint --schema http://w3c.github.io/csvw/tests/countries.json`
|
206
|
+
Then the output should contain "http://w3c.github.io/csvw/tests/countries.csv is VALID"
|
207
|
+
And the output should contain "http://w3c.github.io/csvw/tests/country_slice.csv is VALID"
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'aruba'
|
2
|
+
require 'aruba/in_process'
|
3
|
+
require 'aruba/cucumber'
|
4
|
+
|
5
|
+
require 'csvlint/cli'
|
6
|
+
|
7
|
+
module Csvlint
|
8
|
+
class CliRunner
|
9
|
+
# Allow everything fun to be injected from the outside while defaulting to normal implementations.
|
10
|
+
def initialize(argv, stdin = STDIN, stdout = STDOUT, stderr = STDERR, kernel = Kernel)
|
11
|
+
@argv, @stdin, @stdout, @stderr, @kernel = argv, stdin, stdout, stderr, kernel
|
12
|
+
end
|
13
|
+
|
14
|
+
def execute!
|
15
|
+
exit_code = begin
|
16
|
+
# Thor accesses these streams directly rather than letting them be injected, so we replace them...
|
17
|
+
$stderr = @stderr
|
18
|
+
$stdin = @stdin
|
19
|
+
$stdout = @stdout
|
20
|
+
|
21
|
+
# Run our normal Thor app the way we know and love.
|
22
|
+
Csvlint::Cli.start(@argv.dup.unshift("validate"))
|
23
|
+
|
24
|
+
# Thor::Base#start does not have a return value, assume success if no exception is raised.
|
25
|
+
0
|
26
|
+
rescue StandardError => e
|
27
|
+
# The ruby interpreter would pipe this to STDERR and exit 1 in the case of an unhandled exception
|
28
|
+
b = e.backtrace
|
29
|
+
@stderr.puts("#{b.shift}: #{e.message} (#{e.class})")
|
30
|
+
@stderr.puts(b.map{|s| "\tfrom #{s}"}.join("\n"))
|
31
|
+
1
|
32
|
+
rescue SystemExit => e
|
33
|
+
e.status
|
34
|
+
ensure
|
35
|
+
# TODO: reset your app here, free up resources, etc.
|
36
|
+
# Examples:
|
37
|
+
# MyApp.logger.flush
|
38
|
+
# MyApp.logger.close
|
39
|
+
# MyApp.logger = nil
|
40
|
+
#
|
41
|
+
# MyApp.reset_singleton_instance_variables
|
42
|
+
|
43
|
+
# ...then we put the streams back.
|
44
|
+
$stderr = STDERR
|
45
|
+
$stdin = STDIN
|
46
|
+
$stdout = STDOUT
|
47
|
+
end
|
48
|
+
|
49
|
+
# Proxy our exit code back to the injected kernel.
|
50
|
+
@kernel.exit(exit_code)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
Aruba.process = Aruba::Processes::InProcess
|
56
|
+
Aruba.process.main_class = Csvlint::CliRunner
|
data/features/support/env.rb
CHANGED
@@ -4,6 +4,7 @@ Coveralls.wear_merged!('test_frameworks')
|
|
4
4
|
$:.unshift File.join( File.dirname(__FILE__), "..", "..", "lib")
|
5
5
|
|
6
6
|
require 'rspec/expectations'
|
7
|
+
require 'cucumber/rspec/doubles'
|
7
8
|
require 'csvlint'
|
8
9
|
require 'pry'
|
9
10
|
|
@@ -22,4 +23,4 @@ end
|
|
22
23
|
|
23
24
|
World do
|
24
25
|
CustomWorld.new
|
25
|
-
end
|
26
|
+
end
|
data/lib/csvlint/cli.rb
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
require 'csvlint'
|
2
|
+
require 'colorize'
|
3
|
+
require 'json'
|
4
|
+
require 'pp'
|
5
|
+
require 'thor'
|
6
|
+
|
7
|
+
module Csvlint
|
8
|
+
class Cli < Thor
|
9
|
+
|
10
|
+
desc "myfile.csv OR csvlint http://example.com/myfile.csv", "Supports validating CSV files to check their syntax and contents"
|
11
|
+
option :dump_errors, desc: "Pretty print error and warning objects.", type: :boolean, aliases: :d
|
12
|
+
option :schema, banner: "FILENAME OR URL", desc: "Schema file", aliases: :s
|
13
|
+
def validate(source = nil)
|
14
|
+
source = read_source(source)
|
15
|
+
@schema = get_schema(options[:schema]) if options[:schema]
|
16
|
+
fetch_schema_tables(@schema, options) if source.nil?
|
17
|
+
|
18
|
+
valid = validate_csv(source, @schema, options[:dump])
|
19
|
+
exit 1 unless valid
|
20
|
+
end
|
21
|
+
|
22
|
+
def help
|
23
|
+
self.class.command_help(shell, :validate)
|
24
|
+
end
|
25
|
+
|
26
|
+
default_task :validate
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def read_source(source)
|
31
|
+
if source.nil?
|
32
|
+
# If no source is present, try reading from stdin
|
33
|
+
if !$stdin.tty?
|
34
|
+
source = StringIO.new(ARGF.read) rescue nil
|
35
|
+
return_error "No CSV data to validate" if !options[:schema] && source.nil?
|
36
|
+
end
|
37
|
+
else
|
38
|
+
# If the source isn't a URL, it's a file
|
39
|
+
unless source =~ /^http(s)?/
|
40
|
+
begin
|
41
|
+
source = File.new( source )
|
42
|
+
rescue Errno::ENOENT
|
43
|
+
return_error "#{source} not found"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
source
|
49
|
+
end
|
50
|
+
|
51
|
+
def get_schema(schema)
|
52
|
+
begin
|
53
|
+
schema = Csvlint::Schema.load_from_json(schema, false)
|
54
|
+
rescue Csvlint::Csvw::MetadataError => e
|
55
|
+
return_error "invalid metadata: #{e.message}#{" at " + e.path if e.path}"
|
56
|
+
rescue OpenURI::HTTPError, Errno::ENOENT
|
57
|
+
return_error "#{options[:schema]} not found"
|
58
|
+
end
|
59
|
+
|
60
|
+
if schema.class == Csvlint::Schema && schema.description == "malformed"
|
61
|
+
return_error "invalid metadata: malformed JSON"
|
62
|
+
end
|
63
|
+
|
64
|
+
schema
|
65
|
+
end
|
66
|
+
|
67
|
+
def fetch_schema_tables(schema, options)
|
68
|
+
valid = true
|
69
|
+
|
70
|
+
unless schema.instance_of? Csvlint::Csvw::TableGroup
|
71
|
+
return_error "No CSV data to validate."
|
72
|
+
end
|
73
|
+
schema.tables.keys.each do |source|
|
74
|
+
begin
|
75
|
+
source = source.sub("file:","")
|
76
|
+
source = File.new( source )
|
77
|
+
rescue Errno::ENOENT
|
78
|
+
return_error "#{source} not found"
|
79
|
+
end unless source =~ /^http(s)?/
|
80
|
+
valid &= validate_csv(source, schema, options[:dump])
|
81
|
+
end
|
82
|
+
|
83
|
+
exit 1 unless valid
|
84
|
+
end
|
85
|
+
|
86
|
+
def print_error(index, error, dump, color)
|
87
|
+
location = ""
|
88
|
+
location += error.row.to_s if error.row
|
89
|
+
location += "#{error.row ? "," : ""}#{error.column.to_s}" if error.column
|
90
|
+
if error.row || error.column
|
91
|
+
location = "#{error.row ? "Row" : "Column"}: #{location}"
|
92
|
+
end
|
93
|
+
output_string = "#{index+1}. "
|
94
|
+
if error.column && @schema && @schema.class == Csvlint::Schema
|
95
|
+
output_string += "#{@schema.fields[error.column - 1].name}: "
|
96
|
+
end
|
97
|
+
output_string += "#{error.type}"
|
98
|
+
output_string += ". #{location}" unless location.empty?
|
99
|
+
output_string += ". #{error.content}" if error.content
|
100
|
+
|
101
|
+
if $stdout.tty?
|
102
|
+
puts output_string.colorize(color)
|
103
|
+
else
|
104
|
+
puts output_string
|
105
|
+
end
|
106
|
+
|
107
|
+
if dump
|
108
|
+
pp error
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def print_errors(errors, dump)
|
113
|
+
if errors.size > 0
|
114
|
+
errors.each_with_index { |error, i| print_error(i, error, dump, :red) }
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def return_error(message)
|
119
|
+
if $stdout.tty?
|
120
|
+
puts message.colorize(:red)
|
121
|
+
else
|
122
|
+
puts message
|
123
|
+
end
|
124
|
+
exit 1
|
125
|
+
end
|
126
|
+
|
127
|
+
def validate_csv(source, schema, dump)
|
128
|
+
@error_count = 0
|
129
|
+
|
130
|
+
validator = Csvlint::Validator.new( source, {}, schema, { lambda: report_lines } )
|
131
|
+
|
132
|
+
if source.class == String
|
133
|
+
csv = source
|
134
|
+
elsif source.class == File
|
135
|
+
csv = source.path
|
136
|
+
else
|
137
|
+
csv = "CSV"
|
138
|
+
end
|
139
|
+
|
140
|
+
if $stdout.tty?
|
141
|
+
puts "\r\n#{csv} is #{validator.valid? ? "VALID".green : "INVALID".red}"
|
142
|
+
else
|
143
|
+
puts "\r\n#{csv} is #{validator.valid? ? "VALID" : "INVALID"}"
|
144
|
+
end
|
145
|
+
|
146
|
+
print_errors(validator.errors, dump)
|
147
|
+
print_errors(validator.warnings, dump)
|
148
|
+
|
149
|
+
return validator.valid?
|
150
|
+
end
|
151
|
+
|
152
|
+
def report_lines
|
153
|
+
lambda do |row|
|
154
|
+
new_errors = row.errors.count
|
155
|
+
if new_errors > @error_count
|
156
|
+
print "!".red
|
157
|
+
else
|
158
|
+
print ".".green
|
159
|
+
end
|
160
|
+
@error_count = new_errors
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
end
|
165
|
+
end
|
data/lib/csvlint/schema.rb
CHANGED
@@ -29,7 +29,7 @@ module Csvlint
|
|
29
29
|
return Csvlint::Csvw::TableGroup.from_json(uri, json)
|
30
30
|
end
|
31
31
|
|
32
|
-
def load_from_json(uri)
|
32
|
+
def load_from_json(uri, output_errors = true)
|
33
33
|
begin
|
34
34
|
json = JSON.parse( open(uri).read )
|
35
35
|
if json["@context"]
|
@@ -40,12 +40,14 @@ module Csvlint
|
|
40
40
|
end
|
41
41
|
rescue Csvlint::Csvw::MetadataError => e
|
42
42
|
raise e
|
43
|
-
rescue OpenURI::HTTPError => e
|
43
|
+
rescue OpenURI::HTTPError, Errno::ENOENT => e
|
44
44
|
raise e
|
45
45
|
rescue => e
|
46
|
-
|
47
|
-
|
48
|
-
|
46
|
+
if output_errors === true
|
47
|
+
STDERR.puts e.class
|
48
|
+
STDERR.puts e.message
|
49
|
+
STDERR.puts e.backtrace
|
50
|
+
end
|
49
51
|
return Schema.new(nil, [], "malformed", "malformed")
|
50
52
|
end
|
51
53
|
end
|
data/lib/csvlint/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvlint
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- pezholio
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mime-types
|
@@ -122,6 +122,20 @@ dependencies:
|
|
122
122
|
- - ! '>='
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: thor
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ! '>='
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ! '>='
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
125
139
|
- !ruby/object:Gem::Dependency
|
126
140
|
name: bundler
|
127
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -304,6 +318,20 @@ dependencies:
|
|
304
318
|
- - ! '>='
|
305
319
|
- !ruby/object:Gem::Version
|
306
320
|
version: '0'
|
321
|
+
- !ruby/object:Gem::Dependency
|
322
|
+
name: aruba
|
323
|
+
requirement: !ruby/object:Gem::Requirement
|
324
|
+
requirements:
|
325
|
+
- - ! '>='
|
326
|
+
- !ruby/object:Gem::Version
|
327
|
+
version: '0'
|
328
|
+
type: :development
|
329
|
+
prerelease: false
|
330
|
+
version_requirements: !ruby/object:Gem::Requirement
|
331
|
+
requirements:
|
332
|
+
- - ! '>='
|
333
|
+
- !ruby/object:Gem::Version
|
334
|
+
version: '0'
|
307
335
|
description: CSV Validator
|
308
336
|
email:
|
309
337
|
- pezholio@gmail.com
|
@@ -327,6 +355,7 @@ files:
|
|
327
355
|
- bin/csvlint
|
328
356
|
- csvlint.gemspec
|
329
357
|
- features/check_format.feature
|
358
|
+
- features/cli.feature
|
330
359
|
- features/csv_options.feature
|
331
360
|
- features/csvupload.feature
|
332
361
|
- features/csvw_schema_validation.feature
|
@@ -347,6 +376,7 @@ files:
|
|
347
376
|
- features/parse_csv.feature
|
348
377
|
- features/schema_validation.feature
|
349
378
|
- features/sources.feature
|
379
|
+
- features/step_definitions/cli_steps.rb
|
350
380
|
- features/step_definitions/csv_options_steps.rb
|
351
381
|
- features/step_definitions/information_steps.rb
|
352
382
|
- features/step_definitions/parse_csv_steps.rb
|
@@ -355,6 +385,7 @@ files:
|
|
355
385
|
- features/step_definitions/validation_errors_steps.rb
|
356
386
|
- features/step_definitions/validation_info_steps.rb
|
357
387
|
- features/step_definitions/validation_warnings_steps.rb
|
388
|
+
- features/support/aruba.rb
|
358
389
|
- features/support/env.rb
|
359
390
|
- features/support/load_tests.rb
|
360
391
|
- features/support/webmock.rb
|
@@ -362,6 +393,7 @@ files:
|
|
362
393
|
- features/validation_info.feature
|
363
394
|
- features/validation_warnings.feature
|
364
395
|
- lib/csvlint.rb
|
396
|
+
- lib/csvlint/cli.rb
|
365
397
|
- lib/csvlint/csvw/column.rb
|
366
398
|
- lib/csvlint/csvw/date_format.rb
|
367
399
|
- lib/csvlint/csvw/metadata_error.rb
|
@@ -410,6 +442,7 @@ specification_version: 4
|
|
410
442
|
summary: CSV Validator
|
411
443
|
test_files:
|
412
444
|
- features/check_format.feature
|
445
|
+
- features/cli.feature
|
413
446
|
- features/csv_options.feature
|
414
447
|
- features/csvupload.feature
|
415
448
|
- features/csvw_schema_validation.feature
|
@@ -430,6 +463,7 @@ test_files:
|
|
430
463
|
- features/parse_csv.feature
|
431
464
|
- features/schema_validation.feature
|
432
465
|
- features/sources.feature
|
466
|
+
- features/step_definitions/cli_steps.rb
|
433
467
|
- features/step_definitions/csv_options_steps.rb
|
434
468
|
- features/step_definitions/information_steps.rb
|
435
469
|
- features/step_definitions/parse_csv_steps.rb
|
@@ -438,6 +472,7 @@ test_files:
|
|
438
472
|
- features/step_definitions/validation_errors_steps.rb
|
439
473
|
- features/step_definitions/validation_info_steps.rb
|
440
474
|
- features/step_definitions/validation_warnings_steps.rb
|
475
|
+
- features/support/aruba.rb
|
441
476
|
- features/support/env.rb
|
442
477
|
- features/support/load_tests.rb
|
443
478
|
- features/support/webmock.rb
|