csvlint 0.2.2 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/CHANGELOG.md +25 -1
- data/README.md +21 -14
- data/bin/csvlint +4 -155
- data/csvlint.gemspec +3 -0
- data/features/cli.feature +207 -0
- data/features/step_definitions/cli_steps.rb +7 -0
- data/features/support/aruba.rb +56 -0
- data/features/support/env.rb +2 -1
- data/lib/csvlint/cli.rb +165 -0
- data/lib/csvlint/schema.rb +7 -5
- data/lib/csvlint/version.rb +1 -1
- metadata +37 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MmYzODNiNTQyMTQ0OTNkZTQ1NGQwMzkwNmRkYWMxODNiNTIwNjNhMA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
NzNhZDQ1NmY1Njc5MDMyZjVlYTNiOTc4Y2YxZGRlNjVmNjJiMDFlYw==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MTVlYjFlN2YxYzViNzEyOTg0MTk4ZTgzY2Q3Mzk2ZGE0NTllNjFlZjE0ODg4
|
10
|
+
ZTUwZTdjNTA0MWNmOTUyMjBjZjVmNGMyYjc1MmUxYjllMDIwYTVjYTg5ZTVm
|
11
|
+
NmEyMjAzZGVhMzI2NGUwYTI2YzAzZjcyODViYmEyYzVlNWMyZmE=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZTU5ZjQzNDZkMWVhZTBkODAwNGFmMzM5ODZhMmMzODQ3ZTg3YzRiMGM2YzEz
|
14
|
+
MGE4M2VjZTIzODgwODM5MTY4MjEwM2M0N2NkZjIwY2VmNTk3Y2RmMWNmMTBl
|
15
|
+
NGM4MzU1MzVhOWQyMWEzM2JkZWQ1ODlkMzgyYTZlNTE3ZWI5MTc=
|
data/CHANGELOG.md
CHANGED
@@ -2,7 +2,31 @@
|
|
2
2
|
|
3
3
|
## [Unreleased](https://github.com/theodi/csvlint.rb/tree/HEAD)
|
4
4
|
|
5
|
-
[Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.2.
|
5
|
+
[Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.2.3...HEAD)
|
6
|
+
|
7
|
+
**Merged pull requests:**
|
8
|
+
|
9
|
+
- Fixes for CLI [\#164](https://github.com/theodi/csvlint.rb/pull/164) ([pezholio](https://github.com/pezholio))
|
10
|
+
|
11
|
+
## [0.2.3](https://github.com/theodi/csvlint.rb/tree/0.2.3) (2015-10-20)
|
12
|
+
|
13
|
+
[Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.2.2...0.2.3)
|
14
|
+
|
15
|
+
**Closed issues:**
|
16
|
+
|
17
|
+
- Include field name with error [\#161](https://github.com/theodi/csvlint.rb/issues/161)
|
18
|
+
|
19
|
+
- Refactor the binary [\#150](https://github.com/theodi/csvlint.rb/issues/150)
|
20
|
+
|
21
|
+
**Merged pull requests:**
|
22
|
+
|
23
|
+
- Refactor CLI [\#163](https://github.com/theodi/csvlint.rb/pull/163) ([pezholio](https://github.com/pezholio))
|
24
|
+
|
25
|
+
- Update schema file example to clarify type [\#162](https://github.com/theodi/csvlint.rb/pull/162) ([wachunga](https://github.com/wachunga))
|
26
|
+
|
27
|
+
## [0.2.2](https://github.com/theodi/csvlint.rb/tree/0.2.2) (2015-10-09)
|
28
|
+
|
29
|
+
[Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.2.1...0.2.2)
|
6
30
|
|
7
31
|
**Closed issues:**
|
8
32
|
|
data/README.md
CHANGED
@@ -162,20 +162,26 @@ An example JSON Table Schema schema file is:
|
|
162
162
|
"fields": [
|
163
163
|
{
|
164
164
|
"name": "id",
|
165
|
-
|
165
|
+
"constraints": {
|
166
|
+
"required": true,
|
167
|
+
"type": "http://www.w3.org/TR/xmlschema-2/#integer"
|
168
|
+
}
|
169
|
+
},
|
170
|
+
{
|
171
|
+
"name": "price",
|
172
|
+
"constraints": {
|
173
|
+
"required": true,
|
174
|
+
"minLength": 1
|
175
|
+
}
|
166
176
|
},
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
"pattern": "[A-Z]{1,2}[0-9][0-9A-Z]? ?[0-9][A-Z]{2}"
|
176
|
-
}
|
177
|
-
}
|
178
|
-
]
|
177
|
+
{
|
178
|
+
"name": "postcode",
|
179
|
+
"constraints": {
|
180
|
+
"required": true,
|
181
|
+
"pattern": "[A-Z]{1,2}[0-9][0-9A-Z]? ?[0-9][A-Z]{2}"
|
182
|
+
}
|
183
|
+
}
|
184
|
+
]
|
179
185
|
}
|
180
186
|
|
181
187
|
An equivalent CSV on the Web Metadata file is:
|
@@ -187,7 +193,8 @@ An equivalent CSV on the Web Metadata file is:
|
|
187
193
|
"columns": [
|
188
194
|
{
|
189
195
|
"name": "id",
|
190
|
-
"required": true
|
196
|
+
"required": true,
|
197
|
+
"datatype": { "base": "integer" }
|
191
198
|
},
|
192
199
|
{
|
193
200
|
"name": "price",
|
data/bin/csvlint
CHANGED
@@ -1,161 +1,10 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
$:.unshift File.join( File.dirname(__FILE__), "..", "lib")
|
3
3
|
|
4
|
-
require 'csvlint'
|
5
|
-
require 'colorize'
|
6
|
-
require 'json'
|
7
|
-
require 'optparse'
|
8
|
-
require 'pp'
|
4
|
+
require 'csvlint/cli'
|
9
5
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
opts.banner = "Usage: csvlint [options] [file]"
|
14
|
-
|
15
|
-
opts.on("-d", "--dump-errors", "Pretty print error and warning objects.") do |d|
|
16
|
-
options[:dump] = d
|
17
|
-
end
|
18
|
-
|
19
|
-
opts.on("-s", "--schema FILENAME", "Schema file") do |s|
|
20
|
-
options[:schema] = s
|
21
|
-
end
|
22
|
-
|
23
|
-
opts.on_tail("-h", "--help",
|
24
|
-
"Show this message") do
|
25
|
-
puts opts
|
26
|
-
exit
|
27
|
-
end
|
28
|
-
|
29
|
-
begin
|
30
|
-
opts.parse!
|
31
|
-
rescue OptionParser::InvalidOption => e
|
32
|
-
puts e
|
33
|
-
puts opts
|
34
|
-
exit(1)
|
35
|
-
end
|
36
|
-
|
37
|
-
def print_error(index, error, dump, color)
|
38
|
-
location = ""
|
39
|
-
location += error.row.to_s if error.row
|
40
|
-
location += "#{error.row ? "," : ""}#{error.column.to_s}" if error.column
|
41
|
-
if error.row || error.column
|
42
|
-
location = "#{error.row ? "Row" : "Column"}: #{location}"
|
43
|
-
end
|
44
|
-
output_string = "#{index+1}. #{error.type}"
|
45
|
-
output_string += ". #{location}" unless location.empty?
|
46
|
-
output_string += ". #{error.content}" if error.content
|
47
|
-
|
48
|
-
if $stdout.tty?
|
49
|
-
puts output_string.colorize(color)
|
50
|
-
else
|
51
|
-
puts output_string
|
52
|
-
end
|
53
|
-
|
54
|
-
if dump
|
55
|
-
pp error
|
56
|
-
end
|
57
|
-
|
58
|
-
end
|
59
|
-
|
60
|
-
def validate_csv(source, schema, dump)
|
61
|
-
@error_count = 0
|
62
|
-
report_lines = lambda do |row|
|
63
|
-
new_errors = row.errors.count
|
64
|
-
if new_errors > @error_count
|
65
|
-
print "!".red
|
66
|
-
else
|
67
|
-
print ".".green
|
68
|
-
end
|
69
|
-
@error_count = new_errors
|
70
|
-
end
|
71
|
-
validator = Csvlint::Validator.new( source, {}, schema, { lambda: report_lines } )
|
72
|
-
|
73
|
-
if $stdout.tty?
|
74
|
-
puts "\r\n#{source.path || source || "CSV"} is #{validator.valid? ? "VALID".green : "INVALID".red}"
|
75
|
-
else
|
76
|
-
puts "\r\n#{source.path || source || "CSV"} is #{validator.valid? ? "VALID" : "INVALID"}"
|
77
|
-
end
|
78
|
-
|
79
|
-
if validator.errors.size > 0
|
80
|
-
validator.errors.each_with_index do |error, i|
|
81
|
-
print_error(i, error, dump, :red)
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
if validator.warnings.size > 0
|
86
|
-
validator.warnings.each_with_index do |error, i|
|
87
|
-
print_error(i, error, dump, :yellow)
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
return validator.valid?
|
92
|
-
end
|
93
|
-
|
94
|
-
if ARGV.length == 0 && !$stdin.tty?
|
95
|
-
source = StringIO.new(ARGF.read)
|
6
|
+
if ARGV == ["help"]
|
7
|
+
Csvlint::Cli.start(["help"])
|
96
8
|
else
|
97
|
-
|
98
|
-
source = ARGV[0]
|
99
|
-
unless source =~ /^http(s)?/
|
100
|
-
begin
|
101
|
-
source = File.new( source ) unless source =~ /^http(s)?/
|
102
|
-
rescue Errno::ENOENT
|
103
|
-
puts "#{source} not found"
|
104
|
-
exit 1
|
105
|
-
end
|
106
|
-
end
|
107
|
-
elsif !options[:schema]
|
108
|
-
puts "No CSV data to validate."
|
109
|
-
puts opts
|
110
|
-
exit 1
|
111
|
-
end
|
9
|
+
Csvlint::Cli.start(ARGV.unshift("validate"))
|
112
10
|
end
|
113
|
-
|
114
|
-
schema = nil
|
115
|
-
if options[:schema]
|
116
|
-
begin
|
117
|
-
schema = Csvlint::Schema.load_from_json(options[:schema])
|
118
|
-
rescue JSON::ParserError => e
|
119
|
-
output_string = "invalid metadata: malformed JSON"
|
120
|
-
if $stdout.tty?
|
121
|
-
puts output_string.colorize(:red)
|
122
|
-
else
|
123
|
-
puts output_string
|
124
|
-
end
|
125
|
-
exit 1
|
126
|
-
rescue Csvlint::Csvw::MetadataError => e
|
127
|
-
output_string = "invalid metadata: #{e.message}#{" at " + e.path if e.path}"
|
128
|
-
if $stdout.tty?
|
129
|
-
puts output_string.colorize(:red)
|
130
|
-
else
|
131
|
-
puts output_string
|
132
|
-
end
|
133
|
-
exit 1
|
134
|
-
rescue Errno::ENOENT
|
135
|
-
puts "#{options[:schema]} not found"
|
136
|
-
exit 1
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
valid = true
|
141
|
-
if source.nil?
|
142
|
-
unless schema.instance_of? Csvlint::Csvw::TableGroup
|
143
|
-
puts "No CSV data to validate."
|
144
|
-
puts opts
|
145
|
-
exit 1
|
146
|
-
end
|
147
|
-
schema.tables.keys.each do |source|
|
148
|
-
begin
|
149
|
-
source = source.sub("file:","")
|
150
|
-
source = File.new( source )
|
151
|
-
rescue Errno::ENOENT
|
152
|
-
puts "#{source} not found"
|
153
|
-
exit 1
|
154
|
-
end unless source =~ /^http(s)?/
|
155
|
-
valid &= validate_csv(source, schema, options[:dump])
|
156
|
-
end
|
157
|
-
else
|
158
|
-
valid = validate_csv(source, schema, options[:dump])
|
159
|
-
end
|
160
|
-
|
161
|
-
exit 1 unless valid
|
data/csvlint.gemspec
CHANGED
@@ -26,6 +26,7 @@ Gem::Specification.new do |spec|
|
|
26
26
|
spec.add_dependency "typhoeus"
|
27
27
|
spec.add_dependency "escape_utils"
|
28
28
|
spec.add_dependency "uri_template"
|
29
|
+
spec.add_dependency "thor"
|
29
30
|
|
30
31
|
spec.add_development_dependency "bundler", "~> 1.3"
|
31
32
|
spec.add_development_dependency "rake"
|
@@ -40,4 +41,6 @@ Gem::Specification.new do |spec|
|
|
40
41
|
spec.add_development_dependency "coveralls"
|
41
42
|
spec.add_development_dependency "pry"
|
42
43
|
spec.add_development_dependency "github_changelog_generator"
|
44
|
+
spec.add_development_dependency "aruba"
|
45
|
+
|
43
46
|
end
|
@@ -0,0 +1,207 @@
|
|
1
|
+
Feature: CSVlint CLI
|
2
|
+
|
3
|
+
Scenario: Valid CSV from url
|
4
|
+
Given I have a CSV with the following content:
|
5
|
+
"""
|
6
|
+
"Foo","Bar","Baz"
|
7
|
+
"1","2","3"
|
8
|
+
"3","2","1"
|
9
|
+
"""
|
10
|
+
And it is stored at the url "http://example.com/example1.csv"
|
11
|
+
When I run `csvlint http://example.com/example1.csv`
|
12
|
+
Then the output should contain "http://example.com/example1.csv is VALID"
|
13
|
+
|
14
|
+
Scenario: Valid CSV from file
|
15
|
+
When I run `csvlint ../../features/fixtures/valid.csv`
|
16
|
+
Then the output should contain "valid.csv is VALID"
|
17
|
+
|
18
|
+
# This is a hacky way of saying to run `cat features/fixtures/valid.csv | csvlint`
|
19
|
+
Scenario: Valid CSV from pipe
|
20
|
+
Given I have stubbed ARGF to contain "features/fixtures/valid.csv"
|
21
|
+
When I run `csvlint`
|
22
|
+
Then the output should contain "CSV is VALID"
|
23
|
+
|
24
|
+
Scenario: URL that 404s
|
25
|
+
Given there is no file at the url "http://example.com/example1.csv"
|
26
|
+
And there is no file at the url "http://example.com/.well-known/csvm"
|
27
|
+
And there is no file at the url "http://example.com/example1.csv-metadata.json"
|
28
|
+
And there is no file at the url "http://example.com/csv-metadata.json"
|
29
|
+
When I run `csvlint http://example.com/example1.csv`
|
30
|
+
Then the output should contain "http://example.com/example1.csv is INVALID"
|
31
|
+
And the output should contain "not_found"
|
32
|
+
|
33
|
+
Scenario: File doesn't exist
|
34
|
+
When I run `csvlint ../../features/fixtures/non-existent-file.csv`
|
35
|
+
Then the output should contain "non-existent-file.csv not found"
|
36
|
+
|
37
|
+
Scenario: No file or URL specified
|
38
|
+
When I run `csvlint`
|
39
|
+
Then the output should contain "No CSV data to validate"
|
40
|
+
|
41
|
+
Scenario: No file or URL specified, but schema specified
|
42
|
+
Given I have a schema with the following content:
|
43
|
+
"""
|
44
|
+
{
|
45
|
+
"fields": [
|
46
|
+
{ "name": "Name", "constraints": { "required": true } },
|
47
|
+
{ "name": "Id", "constraints": { "required": true, "minLength": 1 } },
|
48
|
+
{ "name": "Email", "constraints": { "required": true } }
|
49
|
+
]
|
50
|
+
}
|
51
|
+
"""
|
52
|
+
And the schema is stored at the url "http://example.com/schema.json"
|
53
|
+
When I run `csvlint --schema http://example.com/schema.json`
|
54
|
+
Then the output should contain "No CSV data to validate"
|
55
|
+
|
56
|
+
Scenario: Invalid CSV from url
|
57
|
+
Given I have a CSV with the following content:
|
58
|
+
"""
|
59
|
+
"Foo", "Bar" , "Baz"
|
60
|
+
"""
|
61
|
+
And it is stored at the url "http://example.com/example1.csv"
|
62
|
+
When I run `csvlint http://example.com/example1.csv`
|
63
|
+
Then the output should contain "http://example.com/example1.csv is INVALID"
|
64
|
+
And the output should contain "whitespace"
|
65
|
+
|
66
|
+
Scenario: Specify schema
|
67
|
+
Given I have a CSV with the following content:
|
68
|
+
"""
|
69
|
+
"Bob","1234","bob@example.org"
|
70
|
+
"Alice","5","alice@example.com"
|
71
|
+
"""
|
72
|
+
And it is stored at the url "http://example.com/example1.csv"
|
73
|
+
And I have a schema with the following content:
|
74
|
+
"""
|
75
|
+
{
|
76
|
+
"fields": [
|
77
|
+
{ "name": "Name", "constraints": { "required": true } },
|
78
|
+
{ "name": "Id", "constraints": { "required": true, "minLength": 1 } },
|
79
|
+
{ "name": "Email", "constraints": { "required": true } }
|
80
|
+
]
|
81
|
+
}
|
82
|
+
"""
|
83
|
+
And the schema is stored at the url "http://example.com/schema.json"
|
84
|
+
When I run `csvlint http://example.com/example1.csv --schema http://example.com/schema.json`
|
85
|
+
Then the output should contain "http://example.com/example1.csv is VALID"
|
86
|
+
|
87
|
+
Scenario: Schema errors
|
88
|
+
Given I have a CSV with the following content:
|
89
|
+
"""
|
90
|
+
"Bob","1234","bob@example.org"
|
91
|
+
"Alice","5","alice@example.com"
|
92
|
+
"""
|
93
|
+
And it is stored at the url "http://example.com/example1.csv"
|
94
|
+
And I have a schema with the following content:
|
95
|
+
"""
|
96
|
+
{
|
97
|
+
"fields": [
|
98
|
+
{ "name": "Name", "constraints": { "required": true } },
|
99
|
+
{ "name": "Id", "constraints": { "required": true, "minLength": 3 } },
|
100
|
+
{ "name": "Email", "constraints": { "required": true } }
|
101
|
+
]
|
102
|
+
}
|
103
|
+
"""
|
104
|
+
And the schema is stored at the url "http://example.com/schema.json"
|
105
|
+
When I run `csvlint http://example.com/example1.csv --schema http://example.com/schema.json`
|
106
|
+
Then the output should contain "http://example.com/example1.csv is INVALID"
|
107
|
+
And the output should contain "1. Id: min_length. Row: 2,2. 5"
|
108
|
+
And the output should contain "1. malformed_header. Row: 1. Bob,1234,bob@example.org"
|
109
|
+
|
110
|
+
Scenario: Invalid schema
|
111
|
+
Given I have a CSV with the following content:
|
112
|
+
"""
|
113
|
+
"Bob","1234","bob@example.org"
|
114
|
+
"Alice","5","alice@example.com"
|
115
|
+
"""
|
116
|
+
And it is stored at the url "http://example.com/example1.csv"
|
117
|
+
And I have a schema with the following content:
|
118
|
+
"""
|
119
|
+
NO JSON HERE SON
|
120
|
+
"""
|
121
|
+
And the schema is stored at the url "http://example.com/schema.json"
|
122
|
+
Then nothing should be outputted to STDERR
|
123
|
+
When I run `csvlint http://example.com/example1.csv --schema http://example.com/schema.json`
|
124
|
+
And the output should contain "invalid metadata: malformed JSON"
|
125
|
+
|
126
|
+
Scenario: Schema that 404s
|
127
|
+
Given I have a CSV with the following content:
|
128
|
+
"""
|
129
|
+
"Bob","1234","bob@example.org"
|
130
|
+
"Alice","5","alice@example.com"
|
131
|
+
"""
|
132
|
+
And it is stored at the url "http://example.com/example1.csv"
|
133
|
+
And there is no file at the url "http://example.com/schema404.json"
|
134
|
+
When I run `csvlint http://example.com/example1.csv --schema http://example.com/schema404.json`
|
135
|
+
Then the output should contain "http://example.com/schema404.json not found"
|
136
|
+
|
137
|
+
Scenario: Schema that doesn't exist
|
138
|
+
Given I have a CSV with the following content:
|
139
|
+
"""
|
140
|
+
"Bob","1234","bob@example.org"
|
141
|
+
"Alice","5","alice@example.com"
|
142
|
+
"""
|
143
|
+
And it is stored at the url "http://example.com/example1.csv"
|
144
|
+
When I run `csvlint http://example.com/example1.csv --schema /fake/file/path.json`
|
145
|
+
Then the output should contain "/fake/file/path.json not found"
|
146
|
+
|
147
|
+
Scenario: Valid CSVw schema
|
148
|
+
Given I have a CSV with the following content:
|
149
|
+
"""
|
150
|
+
"Bob","1234","bob@example.org"
|
151
|
+
"Alice","5","alice@example.com"
|
152
|
+
"""
|
153
|
+
And it is stored at the url "http://example.com/example1.csv"
|
154
|
+
And I have metadata with the following content:
|
155
|
+
"""
|
156
|
+
{
|
157
|
+
"@context": "http://www.w3.org/ns/csvw",
|
158
|
+
"url": "http://example.com/example1.csv",
|
159
|
+
"dialect": { "header": false },
|
160
|
+
"tableSchema": {
|
161
|
+
"columns": [
|
162
|
+
{ "name": "Name", "required": true },
|
163
|
+
{ "name": "Id", "required": true, "datatype": { "base": "string", "minLength": 1 } },
|
164
|
+
{ "name": "Email", "required": true }
|
165
|
+
]
|
166
|
+
}
|
167
|
+
}
|
168
|
+
"""
|
169
|
+
And the schema is stored at the url "http://example.com/schema.json"
|
170
|
+
When I run `csvlint http://example.com/example1.csv --schema http://example.com/schema.json`
|
171
|
+
Then the output should contain "http://example.com/example1.csv is VALID"
|
172
|
+
|
173
|
+
Scenario: CSVw schema with invalid CSV
|
174
|
+
Given I have a CSV with the following content:
|
175
|
+
"""
|
176
|
+
"Bob","1234","bob@example.org"
|
177
|
+
"Alice","5","alice@example.com"
|
178
|
+
"""
|
179
|
+
And it is stored at the url "http://example.com/example1.csv"
|
180
|
+
And I have metadata with the following content:
|
181
|
+
"""
|
182
|
+
{
|
183
|
+
"@context": "http://www.w3.org/ns/csvw",
|
184
|
+
"url": "http://example.com/example1.csv",
|
185
|
+
"dialect": { "header": false },
|
186
|
+
"tableSchema": {
|
187
|
+
"columns": [
|
188
|
+
{ "name": "Name", "required": true },
|
189
|
+
{ "name": "Id", "required": true, "datatype": { "base": "string", "minLength": 3 } },
|
190
|
+
{ "name": "Email", "required": true }
|
191
|
+
]
|
192
|
+
}
|
193
|
+
}
|
194
|
+
"""
|
195
|
+
And the schema is stored at the url "http://example.com/schema.json"
|
196
|
+
When I run `csvlint http://example.com/example1.csv --schema http://example.com/schema.json`
|
197
|
+
Then the output should contain "http://example.com/example1.csv is INVALID"
|
198
|
+
And the output should contain "1. min_length. Row: 2,2. 5"
|
199
|
+
|
200
|
+
Scenario: CSVw table Schema
|
201
|
+
Given I have a metadata file called "csvw/countries.json"
|
202
|
+
And the metadata is stored at the url "http://w3c.github.io/csvw/tests/countries.json"
|
203
|
+
And I have a file called "csvw/countries.csv" at the url "http://w3c.github.io/csvw/tests/countries.csv"
|
204
|
+
And I have a file called "csvw/country_slice.csv" at the url "http://w3c.github.io/csvw/tests/country_slice.csv"
|
205
|
+
When I run `csvlint --schema http://w3c.github.io/csvw/tests/countries.json`
|
206
|
+
Then the output should contain "http://w3c.github.io/csvw/tests/countries.csv is VALID"
|
207
|
+
And the output should contain "http://w3c.github.io/csvw/tests/country_slice.csv is VALID"
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'aruba'
|
2
|
+
require 'aruba/in_process'
|
3
|
+
require 'aruba/cucumber'
|
4
|
+
|
5
|
+
require 'csvlint/cli'
|
6
|
+
|
7
|
+
module Csvlint
|
8
|
+
class CliRunner
|
9
|
+
# Allow everything fun to be injected from the outside while defaulting to normal implementations.
|
10
|
+
def initialize(argv, stdin = STDIN, stdout = STDOUT, stderr = STDERR, kernel = Kernel)
|
11
|
+
@argv, @stdin, @stdout, @stderr, @kernel = argv, stdin, stdout, stderr, kernel
|
12
|
+
end
|
13
|
+
|
14
|
+
def execute!
|
15
|
+
exit_code = begin
|
16
|
+
# Thor accesses these streams directly rather than letting them be injected, so we replace them...
|
17
|
+
$stderr = @stderr
|
18
|
+
$stdin = @stdin
|
19
|
+
$stdout = @stdout
|
20
|
+
|
21
|
+
# Run our normal Thor app the way we know and love.
|
22
|
+
Csvlint::Cli.start(@argv.dup.unshift("validate"))
|
23
|
+
|
24
|
+
# Thor::Base#start does not have a return value, assume success if no exception is raised.
|
25
|
+
0
|
26
|
+
rescue StandardError => e
|
27
|
+
# The ruby interpreter would pipe this to STDERR and exit 1 in the case of an unhandled exception
|
28
|
+
b = e.backtrace
|
29
|
+
@stderr.puts("#{b.shift}: #{e.message} (#{e.class})")
|
30
|
+
@stderr.puts(b.map{|s| "\tfrom #{s}"}.join("\n"))
|
31
|
+
1
|
32
|
+
rescue SystemExit => e
|
33
|
+
e.status
|
34
|
+
ensure
|
35
|
+
# TODO: reset your app here, free up resources, etc.
|
36
|
+
# Examples:
|
37
|
+
# MyApp.logger.flush
|
38
|
+
# MyApp.logger.close
|
39
|
+
# MyApp.logger = nil
|
40
|
+
#
|
41
|
+
# MyApp.reset_singleton_instance_variables
|
42
|
+
|
43
|
+
# ...then we put the streams back.
|
44
|
+
$stderr = STDERR
|
45
|
+
$stdin = STDIN
|
46
|
+
$stdout = STDOUT
|
47
|
+
end
|
48
|
+
|
49
|
+
# Proxy our exit code back to the injected kernel.
|
50
|
+
@kernel.exit(exit_code)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
Aruba.process = Aruba::Processes::InProcess
|
56
|
+
Aruba.process.main_class = Csvlint::CliRunner
|
data/features/support/env.rb
CHANGED
@@ -4,6 +4,7 @@ Coveralls.wear_merged!('test_frameworks')
|
|
4
4
|
$:.unshift File.join( File.dirname(__FILE__), "..", "..", "lib")
|
5
5
|
|
6
6
|
require 'rspec/expectations'
|
7
|
+
require 'cucumber/rspec/doubles'
|
7
8
|
require 'csvlint'
|
8
9
|
require 'pry'
|
9
10
|
|
@@ -22,4 +23,4 @@ end
|
|
22
23
|
|
23
24
|
World do
|
24
25
|
CustomWorld.new
|
25
|
-
end
|
26
|
+
end
|
data/lib/csvlint/cli.rb
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
require 'csvlint'
|
2
|
+
require 'colorize'
|
3
|
+
require 'json'
|
4
|
+
require 'pp'
|
5
|
+
require 'thor'
|
6
|
+
|
7
|
+
module Csvlint
|
8
|
+
class Cli < Thor
|
9
|
+
|
10
|
+
desc "myfile.csv OR csvlint http://example.com/myfile.csv", "Supports validating CSV files to check their syntax and contents"
|
11
|
+
option :dump_errors, desc: "Pretty print error and warning objects.", type: :boolean, aliases: :d
|
12
|
+
option :schema, banner: "FILENAME OR URL", desc: "Schema file", aliases: :s
|
13
|
+
def validate(source = nil)
|
14
|
+
source = read_source(source)
|
15
|
+
@schema = get_schema(options[:schema]) if options[:schema]
|
16
|
+
fetch_schema_tables(@schema, options) if source.nil?
|
17
|
+
|
18
|
+
valid = validate_csv(source, @schema, options[:dump])
|
19
|
+
exit 1 unless valid
|
20
|
+
end
|
21
|
+
|
22
|
+
def help
|
23
|
+
self.class.command_help(shell, :validate)
|
24
|
+
end
|
25
|
+
|
26
|
+
default_task :validate
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def read_source(source)
|
31
|
+
if source.nil?
|
32
|
+
# If no source is present, try reading from stdin
|
33
|
+
if !$stdin.tty?
|
34
|
+
source = StringIO.new(ARGF.read) rescue nil
|
35
|
+
return_error "No CSV data to validate" if !options[:schema] && source.nil?
|
36
|
+
end
|
37
|
+
else
|
38
|
+
# If the source isn't a URL, it's a file
|
39
|
+
unless source =~ /^http(s)?/
|
40
|
+
begin
|
41
|
+
source = File.new( source )
|
42
|
+
rescue Errno::ENOENT
|
43
|
+
return_error "#{source} not found"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
source
|
49
|
+
end
|
50
|
+
|
51
|
+
def get_schema(schema)
|
52
|
+
begin
|
53
|
+
schema = Csvlint::Schema.load_from_json(schema, false)
|
54
|
+
rescue Csvlint::Csvw::MetadataError => e
|
55
|
+
return_error "invalid metadata: #{e.message}#{" at " + e.path if e.path}"
|
56
|
+
rescue OpenURI::HTTPError, Errno::ENOENT
|
57
|
+
return_error "#{options[:schema]} not found"
|
58
|
+
end
|
59
|
+
|
60
|
+
if schema.class == Csvlint::Schema && schema.description == "malformed"
|
61
|
+
return_error "invalid metadata: malformed JSON"
|
62
|
+
end
|
63
|
+
|
64
|
+
schema
|
65
|
+
end
|
66
|
+
|
67
|
+
def fetch_schema_tables(schema, options)
|
68
|
+
valid = true
|
69
|
+
|
70
|
+
unless schema.instance_of? Csvlint::Csvw::TableGroup
|
71
|
+
return_error "No CSV data to validate."
|
72
|
+
end
|
73
|
+
schema.tables.keys.each do |source|
|
74
|
+
begin
|
75
|
+
source = source.sub("file:","")
|
76
|
+
source = File.new( source )
|
77
|
+
rescue Errno::ENOENT
|
78
|
+
return_error "#{source} not found"
|
79
|
+
end unless source =~ /^http(s)?/
|
80
|
+
valid &= validate_csv(source, schema, options[:dump])
|
81
|
+
end
|
82
|
+
|
83
|
+
exit 1 unless valid
|
84
|
+
end
|
85
|
+
|
86
|
+
def print_error(index, error, dump, color)
|
87
|
+
location = ""
|
88
|
+
location += error.row.to_s if error.row
|
89
|
+
location += "#{error.row ? "," : ""}#{error.column.to_s}" if error.column
|
90
|
+
if error.row || error.column
|
91
|
+
location = "#{error.row ? "Row" : "Column"}: #{location}"
|
92
|
+
end
|
93
|
+
output_string = "#{index+1}. "
|
94
|
+
if error.column && @schema && @schema.class == Csvlint::Schema
|
95
|
+
output_string += "#{@schema.fields[error.column - 1].name}: "
|
96
|
+
end
|
97
|
+
output_string += "#{error.type}"
|
98
|
+
output_string += ". #{location}" unless location.empty?
|
99
|
+
output_string += ". #{error.content}" if error.content
|
100
|
+
|
101
|
+
if $stdout.tty?
|
102
|
+
puts output_string.colorize(color)
|
103
|
+
else
|
104
|
+
puts output_string
|
105
|
+
end
|
106
|
+
|
107
|
+
if dump
|
108
|
+
pp error
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def print_errors(errors, dump)
|
113
|
+
if errors.size > 0
|
114
|
+
errors.each_with_index { |error, i| print_error(i, error, dump, :red) }
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def return_error(message)
|
119
|
+
if $stdout.tty?
|
120
|
+
puts message.colorize(:red)
|
121
|
+
else
|
122
|
+
puts message
|
123
|
+
end
|
124
|
+
exit 1
|
125
|
+
end
|
126
|
+
|
127
|
+
def validate_csv(source, schema, dump)
|
128
|
+
@error_count = 0
|
129
|
+
|
130
|
+
validator = Csvlint::Validator.new( source, {}, schema, { lambda: report_lines } )
|
131
|
+
|
132
|
+
if source.class == String
|
133
|
+
csv = source
|
134
|
+
elsif source.class == File
|
135
|
+
csv = source.path
|
136
|
+
else
|
137
|
+
csv = "CSV"
|
138
|
+
end
|
139
|
+
|
140
|
+
if $stdout.tty?
|
141
|
+
puts "\r\n#{csv} is #{validator.valid? ? "VALID".green : "INVALID".red}"
|
142
|
+
else
|
143
|
+
puts "\r\n#{csv} is #{validator.valid? ? "VALID" : "INVALID"}"
|
144
|
+
end
|
145
|
+
|
146
|
+
print_errors(validator.errors, dump)
|
147
|
+
print_errors(validator.warnings, dump)
|
148
|
+
|
149
|
+
return validator.valid?
|
150
|
+
end
|
151
|
+
|
152
|
+
def report_lines
|
153
|
+
lambda do |row|
|
154
|
+
new_errors = row.errors.count
|
155
|
+
if new_errors > @error_count
|
156
|
+
print "!".red
|
157
|
+
else
|
158
|
+
print ".".green
|
159
|
+
end
|
160
|
+
@error_count = new_errors
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
end
|
165
|
+
end
|
data/lib/csvlint/schema.rb
CHANGED
@@ -29,7 +29,7 @@ module Csvlint
|
|
29
29
|
return Csvlint::Csvw::TableGroup.from_json(uri, json)
|
30
30
|
end
|
31
31
|
|
32
|
-
def load_from_json(uri)
|
32
|
+
def load_from_json(uri, output_errors = true)
|
33
33
|
begin
|
34
34
|
json = JSON.parse( open(uri).read )
|
35
35
|
if json["@context"]
|
@@ -40,12 +40,14 @@ module Csvlint
|
|
40
40
|
end
|
41
41
|
rescue Csvlint::Csvw::MetadataError => e
|
42
42
|
raise e
|
43
|
-
rescue OpenURI::HTTPError => e
|
43
|
+
rescue OpenURI::HTTPError, Errno::ENOENT => e
|
44
44
|
raise e
|
45
45
|
rescue => e
|
46
|
-
|
47
|
-
|
48
|
-
|
46
|
+
if output_errors === true
|
47
|
+
STDERR.puts e.class
|
48
|
+
STDERR.puts e.message
|
49
|
+
STDERR.puts e.backtrace
|
50
|
+
end
|
49
51
|
return Schema.new(nil, [], "malformed", "malformed")
|
50
52
|
end
|
51
53
|
end
|
data/lib/csvlint/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvlint
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- pezholio
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mime-types
|
@@ -122,6 +122,20 @@ dependencies:
|
|
122
122
|
- - ! '>='
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: thor
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ! '>='
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ! '>='
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
125
139
|
- !ruby/object:Gem::Dependency
|
126
140
|
name: bundler
|
127
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -304,6 +318,20 @@ dependencies:
|
|
304
318
|
- - ! '>='
|
305
319
|
- !ruby/object:Gem::Version
|
306
320
|
version: '0'
|
321
|
+
- !ruby/object:Gem::Dependency
|
322
|
+
name: aruba
|
323
|
+
requirement: !ruby/object:Gem::Requirement
|
324
|
+
requirements:
|
325
|
+
- - ! '>='
|
326
|
+
- !ruby/object:Gem::Version
|
327
|
+
version: '0'
|
328
|
+
type: :development
|
329
|
+
prerelease: false
|
330
|
+
version_requirements: !ruby/object:Gem::Requirement
|
331
|
+
requirements:
|
332
|
+
- - ! '>='
|
333
|
+
- !ruby/object:Gem::Version
|
334
|
+
version: '0'
|
307
335
|
description: CSV Validator
|
308
336
|
email:
|
309
337
|
- pezholio@gmail.com
|
@@ -327,6 +355,7 @@ files:
|
|
327
355
|
- bin/csvlint
|
328
356
|
- csvlint.gemspec
|
329
357
|
- features/check_format.feature
|
358
|
+
- features/cli.feature
|
330
359
|
- features/csv_options.feature
|
331
360
|
- features/csvupload.feature
|
332
361
|
- features/csvw_schema_validation.feature
|
@@ -347,6 +376,7 @@ files:
|
|
347
376
|
- features/parse_csv.feature
|
348
377
|
- features/schema_validation.feature
|
349
378
|
- features/sources.feature
|
379
|
+
- features/step_definitions/cli_steps.rb
|
350
380
|
- features/step_definitions/csv_options_steps.rb
|
351
381
|
- features/step_definitions/information_steps.rb
|
352
382
|
- features/step_definitions/parse_csv_steps.rb
|
@@ -355,6 +385,7 @@ files:
|
|
355
385
|
- features/step_definitions/validation_errors_steps.rb
|
356
386
|
- features/step_definitions/validation_info_steps.rb
|
357
387
|
- features/step_definitions/validation_warnings_steps.rb
|
388
|
+
- features/support/aruba.rb
|
358
389
|
- features/support/env.rb
|
359
390
|
- features/support/load_tests.rb
|
360
391
|
- features/support/webmock.rb
|
@@ -362,6 +393,7 @@ files:
|
|
362
393
|
- features/validation_info.feature
|
363
394
|
- features/validation_warnings.feature
|
364
395
|
- lib/csvlint.rb
|
396
|
+
- lib/csvlint/cli.rb
|
365
397
|
- lib/csvlint/csvw/column.rb
|
366
398
|
- lib/csvlint/csvw/date_format.rb
|
367
399
|
- lib/csvlint/csvw/metadata_error.rb
|
@@ -410,6 +442,7 @@ specification_version: 4
|
|
410
442
|
summary: CSV Validator
|
411
443
|
test_files:
|
412
444
|
- features/check_format.feature
|
445
|
+
- features/cli.feature
|
413
446
|
- features/csv_options.feature
|
414
447
|
- features/csvupload.feature
|
415
448
|
- features/csvw_schema_validation.feature
|
@@ -430,6 +463,7 @@ test_files:
|
|
430
463
|
- features/parse_csv.feature
|
431
464
|
- features/schema_validation.feature
|
432
465
|
- features/sources.feature
|
466
|
+
- features/step_definitions/cli_steps.rb
|
433
467
|
- features/step_definitions/csv_options_steps.rb
|
434
468
|
- features/step_definitions/information_steps.rb
|
435
469
|
- features/step_definitions/parse_csv_steps.rb
|
@@ -438,6 +472,7 @@ test_files:
|
|
438
472
|
- features/step_definitions/validation_errors_steps.rb
|
439
473
|
- features/step_definitions/validation_info_steps.rb
|
440
474
|
- features/step_definitions/validation_warnings_steps.rb
|
475
|
+
- features/support/aruba.rb
|
441
476
|
- features/support/env.rb
|
442
477
|
- features/support/load_tests.rb
|
443
478
|
- features/support/webmock.rb
|