csvlint 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +1 -0
  3. data/.gitignore +22 -0
  4. data/.travis.yml +10 -0
  5. data/Gemfile +7 -0
  6. data/LICENSE.md +22 -0
  7. data/README.md +214 -0
  8. data/Rakefile +17 -0
  9. data/bin/create_schema +32 -0
  10. data/bin/csvlint +52 -0
  11. data/csvlint.gemspec +39 -0
  12. data/features/check_format.feature +46 -0
  13. data/features/csv_options.feature +35 -0
  14. data/features/fixtures/cr-line-endings.csv +1 -0
  15. data/features/fixtures/crlf-line-endings.csv +3 -0
  16. data/features/fixtures/inconsistent-line-endings.csv +2 -0
  17. data/features/fixtures/invalid-byte-sequence.csv +24 -0
  18. data/features/fixtures/lf-line-endings.csv +3 -0
  19. data/features/fixtures/spreadsheet.xls +0 -0
  20. data/features/fixtures/title-row.csv +4 -0
  21. data/features/fixtures/valid.csv +3 -0
  22. data/features/fixtures/windows-line-endings.csv +2 -0
  23. data/features/information.feature +22 -0
  24. data/features/parse_csv.feature +90 -0
  25. data/features/schema_validation.feature +63 -0
  26. data/features/sources.feature +18 -0
  27. data/features/step_definitions/csv_options_steps.rb +19 -0
  28. data/features/step_definitions/information_steps.rb +13 -0
  29. data/features/step_definitions/parse_csv_steps.rb +30 -0
  30. data/features/step_definitions/schema_validation_steps.rb +7 -0
  31. data/features/step_definitions/sources_steps.rb +7 -0
  32. data/features/step_definitions/validation_errors_steps.rb +43 -0
  33. data/features/step_definitions/validation_info_steps.rb +18 -0
  34. data/features/step_definitions/validation_warnings_steps.rb +46 -0
  35. data/features/support/env.rb +30 -0
  36. data/features/support/webmock.rb +1 -0
  37. data/features/validation_errors.feature +151 -0
  38. data/features/validation_info.feature +24 -0
  39. data/features/validation_warnings.feature +74 -0
  40. data/lib/csvlint.rb +13 -0
  41. data/lib/csvlint/error_collector.rb +43 -0
  42. data/lib/csvlint/error_message.rb +15 -0
  43. data/lib/csvlint/field.rb +102 -0
  44. data/lib/csvlint/schema.rb +69 -0
  45. data/lib/csvlint/types.rb +113 -0
  46. data/lib/csvlint/validate.rb +253 -0
  47. data/lib/csvlint/version.rb +3 -0
  48. data/lib/csvlint/wrapped_io.rb +39 -0
  49. data/spec/field_spec.rb +247 -0
  50. data/spec/schema_spec.rb +149 -0
  51. data/spec/spec_helper.rb +20 -0
  52. data/spec/validator_spec.rb +279 -0
  53. metadata +367 -0
@@ -0,0 +1,247 @@
1
+ require 'spec_helper'
2
+
3
+ describe Csvlint::Field do
4
+
5
+ it "should validate required fields" do
6
+ field = Csvlint::Field.new("test", { "required" => true } )
7
+ expect( field.validate_column( nil ) ).to be(false)
8
+ expect( field.errors.first.category ).to be(:schema)
9
+ expect( field.validate_column( "" ) ).to be(false)
10
+ expect( field.validate_column( "data" ) ).to be(true)
11
+ end
12
+
13
+ it "should include the failed constraints" do
14
+ field = Csvlint::Field.new("test", { "required" => true } )
15
+ expect( field.validate_column( nil ) ).to be(false)
16
+ expect( field.errors.first.constraints ).to eql( { "required" => true } )
17
+ end
18
+
19
+ it "should validate minimum length" do
20
+ field = Csvlint::Field.new("test", { "minLength" => 3 } )
21
+ expect( field.validate_column( nil ) ).to be(false)
22
+ expect( field.validate_column( "" ) ).to be(false)
23
+ expect( field.validate_column( "ab" ) ).to be(false)
24
+ expect( field.validate_column( "abc" ) ).to be(true)
25
+ expect( field.validate_column( "abcd" ) ).to be(true)
26
+ end
27
+
28
+ it "should validate maximum length" do
29
+ field = Csvlint::Field.new("test", { "maxLength" => 3 } )
30
+ expect( field.validate_column( nil ) ).to be(true)
31
+ expect( field.validate_column( "" ) ).to be(true)
32
+ expect( field.validate_column( "ab" ) ).to be(true)
33
+ expect( field.validate_column( "abc" ) ).to be(true)
34
+ expect( field.validate_column( "abcd" ) ).to be(false)
35
+ end
36
+
37
+ it "should validate against regex" do
38
+ field = Csvlint::Field.new("test", { "pattern" => "\{[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}\}"} )
39
+ expect( field.validate_column( "abc") ).to be(false)
40
+ expect( field.validate_column( "{3B0DA29C-C89A-4FAA-918A-0000074FA0E0}") ).to be(true)
41
+ end
42
+
43
+ it "should apply combinations of constraints" do
44
+ field = Csvlint::Field.new("test", { "required"=>true, "pattern" => "\{[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}\}"} )
45
+ expect( field.validate_column( "abc") ).to be(false)
46
+ expect( field.errors.first.constraints ).to eql( { "pattern" => "\{[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}\}" } )
47
+
48
+ expect( field.validate_column( nil ) ).to be(false)
49
+ expect( field.errors.first.constraints ).to eql( { "required"=>true } )
50
+
51
+ expect( field.validate_column( "{3B0DA29C-C89A-4FAA-918A-0000074FA0E0}") ).to be(true)
52
+
53
+ end
54
+
55
+ it "should enforce uniqueness for a column" do
56
+ field = Csvlint::Field.new("test", { "unique" => true } )
57
+ expect( field.validate_column( "abc") ).to be(true)
58
+ expect( field.validate_column( "abc") ).to be(false)
59
+ expect( field.errors.first.category ).to be(:schema)
60
+ expect( field.errors.first.type ).to be(:unique)
61
+ end
62
+
63
+ context "it should validate correct types" do
64
+ it "skips empty fields" do
65
+ field = Csvlint::Field.new("test", { "type" => "http://www.w3.org/2001/XMLSchema#int" })
66
+ expect( field.validate_column("")).to be(true)
67
+ end
68
+
69
+ it "validates strings" do
70
+ field = Csvlint::Field.new("test", { "type" => "http://www.w3.org/2001/XMLSchema#string" })
71
+ expect( field.validate_column("42")).to be(true)
72
+ expect( field.validate_column("forty-two")).to be(true)
73
+ end
74
+
75
+ it "validates ints" do
76
+ field = Csvlint::Field.new("test", { "type" => "http://www.w3.org/2001/XMLSchema#int" })
77
+ expect( field.validate_column("42")).to be(true)
78
+ expect( field.validate_column("forty-two")).to be(false)
79
+ end
80
+
81
+ it "validates integers" do
82
+ field = Csvlint::Field.new("test", { "type" => "http://www.w3.org/2001/XMLSchema#integer" })
83
+ expect( field.validate_column("42")).to be(true)
84
+ expect( field.validate_column("forty-two")).to be(false)
85
+ end
86
+
87
+ it "validates floats" do
88
+ field = Csvlint::Field.new("test", { "type" => "http://www.w3.org/2001/XMLSchema#float" })
89
+ expect(field.validate_column("42.0")).to be(true)
90
+ expect(field.validate_column("42")).to be(true)
91
+ expect(field.validate_column("forty-two")).to be(false)
92
+ end
93
+
94
+ it "validates URIs" do
95
+ field = Csvlint::Field.new("test", { "type" => "http://www.w3.org/2001/XMLSchema#anyURI" })
96
+ expect(field.validate_column("http://theodi.org/team")).to be(true)
97
+ expect(field.validate_column("https://theodi.org/team")).to be(true)
98
+ expect(field.validate_column("42.0")).to be(false)
99
+ end
100
+
101
+ it "validates booleans" do
102
+ field = Csvlint::Field.new("test", { "type" => "http://www.w3.org/2001/XMLSchema#boolean" })
103
+ expect(field.validate_column("true")).to be(true)
104
+ expect(field.validate_column("1")).to be(true)
105
+ expect(field.validate_column("false")).to be(true)
106
+ expect(field.validate_column("0")).to be(true)
107
+ expect(field.validate_column("derp")).to be(false)
108
+ end
109
+
110
+ context "it should validate all kinds of integers" do
111
+ it "validates a non-positive integer" do
112
+ field = Csvlint::Field.new("test", { "type" => "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" })
113
+ expect(field.validate_column("0")).to be(true)
114
+ expect(field.validate_column("-1")).to be(true)
115
+ expect(field.validate_column("1")).to be(false)
116
+ end
117
+
118
+ it "validates a negative integer" do
119
+ field = Csvlint::Field.new("test", { "type" => "http://www.w3.org/2001/XMLSchema#negativeInteger" })
120
+ expect(field.validate_column("0")).to be(false)
121
+ expect(field.validate_column("-1")).to be(true)
122
+ expect(field.validate_column("1")).to be(false)
123
+ end
124
+
125
+ it "validates a non-negative integer" do
126
+ field = Csvlint::Field.new("test", { "type" => "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" })
127
+ expect(field.validate_column("0")).to be(true)
128
+ expect(field.validate_column("-1")).to be(false)
129
+ expect(field.validate_column("1")).to be(true)
130
+ end
131
+
132
+ it "validates a positive integer" do
133
+ field = Csvlint::Field.new("test", { "type" => "http://www.w3.org/2001/XMLSchema#positiveInteger" })
134
+ expect(field.validate_column("0")).to be(false)
135
+ expect(field.validate_column("-1")).to be(false)
136
+ expect(field.errors.first.constraints).to eql( { "type" => "http://www.w3.org/2001/XMLSchema#positiveInteger" } )
137
+ expect(field.validate_column("1")).to be(true)
138
+ end
139
+ end
140
+
141
+ context "when validating ranges" do
142
+
143
+ it "should enforce minimum values" do
144
+ field = Csvlint::Field.new("test", {
145
+ "type" => "http://www.w3.org/2001/XMLSchema#int",
146
+ "minimum" => "40"
147
+ })
148
+ expect( field.validate_column("42")).to be(true)
149
+
150
+ field = Csvlint::Field.new("test", {
151
+ "type" => "http://www.w3.org/2001/XMLSchema#int",
152
+ "minimum" => "40"
153
+ })
154
+ expect( field.validate_column("39")).to be(false)
155
+ expect( field.errors.first.type ).to eql(:below_minimum)
156
+ end
157
+
158
+ it "should enforce maximum values" do
159
+ field = Csvlint::Field.new("test", {
160
+ "type" => "http://www.w3.org/2001/XMLSchema#int",
161
+ "maximum" => "40"
162
+ })
163
+ expect( field.validate_column("39")).to be(true)
164
+
165
+ field = Csvlint::Field.new("test", {
166
+ "type" => "http://www.w3.org/2001/XMLSchema#int",
167
+ "maximum" => "40"
168
+ })
169
+ expect( field.validate_column("41")).to be(false)
170
+ expect( field.errors.first.type ).to eql(:above_maximum)
171
+
172
+ end
173
+ end
174
+
175
+ context "when validating dates" do
176
+ it "should validate a date time" do
177
+ field = Csvlint::Field.new("test", {
178
+ "type" => "http://www.w3.org/2001/XMLSchema#dateTime"
179
+ })
180
+ expect( field.validate_column("2014-02-17T11:09:00Z")).to be(true)
181
+ expect( field.validate_column("invalid-date")).to be(false)
182
+ expect( field.validate_column("2014-02-17")).to be(false)
183
+ end
184
+ it "should validate a date" do
185
+ field = Csvlint::Field.new("test", {
186
+ "type" => "http://www.w3.org/2001/XMLSchema#date"
187
+ })
188
+ expect( field.validate_column("2014-02-17T11:09:00Z")).to be(false)
189
+ expect( field.validate_column("invalid-date")).to be(false)
190
+ expect( field.validate_column("2014-02-17")).to be(true)
191
+ end
192
+ it "should validate a time" do
193
+ field = Csvlint::Field.new("test", {
194
+ "type" => "http://www.w3.org/2001/XMLSchema#time"
195
+ })
196
+ expect( field.validate_column("11:09:00")).to be(true)
197
+ expect( field.validate_column("2014-02-17T11:09:00Z")).to be(false)
198
+ expect( field.validate_column("not-a-time")).to be(false)
199
+ expect( field.validate_column("27:97:00")).to be(false)
200
+ end
201
+ it "should validate a year" do
202
+ field = Csvlint::Field.new("test", {
203
+ "type" => "http://www.w3.org/2001/XMLSchema#gYear"
204
+ })
205
+ expect( field.validate_column("1999")).to be(true)
206
+ expect( field.validate_column("2525")).to be(true)
207
+ expect( field.validate_column("0001")).to be(true)
208
+ expect( field.validate_column("2014-02-17T11:09:00Z")).to be(false)
209
+ expect( field.validate_column("not-a-time")).to be(false)
210
+ expect( field.validate_column("27:97:00")).to be(false)
211
+ end
212
+ it "should validate a year-month" do
213
+ field = Csvlint::Field.new("test", {
214
+ "type" => "http://www.w3.org/2001/XMLSchema#gYearMonth"
215
+ })
216
+ expect( field.validate_column("1999-12")).to be(true)
217
+ expect( field.validate_column("2525-01")).to be(true)
218
+ expect( field.validate_column("2014-02-17T11:09:00Z")).to be(false)
219
+ expect( field.validate_column("not-a-time")).to be(false)
220
+ expect( field.validate_column("27:97:00")).to be(false)
221
+ end
222
+ it "should allow user to specify custom date time pattern" do
223
+ field = Csvlint::Field.new("test", {
224
+ "type" => "http://www.w3.org/2001/XMLSchema#dateTime",
225
+ "datePattern" => "%Y-%m-%d %H:%M:%S"
226
+ })
227
+ expect( field.validate_column("1999-12-01 10:00:00")).to be(true)
228
+ expect( field.validate_column("invalid-date")).to be(false)
229
+ expect( field.validate_column("2014-02-17")).to be(false)
230
+ expect( field.errors.first.constraints ).to eql( {
231
+ "type" => "http://www.w3.org/2001/XMLSchema#dateTime",
232
+ "datePattern" => "%Y-%m-%d %H:%M:%S"
233
+ })
234
+
235
+ end
236
+ it "should allow user to compare dates" do
237
+ field = Csvlint::Field.new("test", {
238
+ "type" => "http://www.w3.org/2001/XMLSchema#dateTime",
239
+ "datePattern" => "%Y-%m-%d %H:%M:%S",
240
+ "minimum" => "1990-01-01 10:00:00"
241
+ })
242
+ expect( field.validate_column("1999-12-01 10:00:00")).to be(true)
243
+ expect( field.validate_column("1989-12-01 10:00:00")).to be(false)
244
+ end
245
+ end
246
+ end
247
+ end
@@ -0,0 +1,149 @@
1
+ require 'spec_helper'
2
+
3
+ describe Csvlint::Schema do
4
+
5
+ it "should tolerate missing fields" do
6
+ schema = Csvlint::Schema.from_json_table("http://example.org", {})
7
+ expect( schema ).to_not be(nil)
8
+ expect( schema.fields.empty? ).to eql(true)
9
+ end
10
+
11
+ it "should tolerate fields with no constraints" do
12
+ schema = Csvlint::Schema.from_json_table("http://example.org", {
13
+ "fields" => [ { "name" => "test" } ]
14
+ })
15
+ expect( schema ).to_not be(nil)
16
+ expect( schema.fields[0].name ).to eql("test")
17
+ expect( schema.fields[0].constraints ).to eql({})
18
+ end
19
+
20
+ it "should validate against the schema" do
21
+ field = Csvlint::Field.new("test", { "required" => true } )
22
+ field2 = Csvlint::Field.new("test", { "minLength" => 3 } )
23
+ schema = Csvlint::Schema.new("http://example.org", [field, field2] )
24
+
25
+ expect( schema.validate_row( ["", "x"] ) ).to eql(false)
26
+ expect( schema.errors.size ).to eql(2)
27
+ expect( schema.errors.first.type).to eql(:missing_value)
28
+ expect( schema.errors.first.category).to eql(:schema)
29
+ expect( schema.errors.first.column).to eql(1)
30
+ expect( schema.validate_row( ["abc", "1234"] ) ).to eql(true)
31
+
32
+ end
33
+
34
+ it "should include validations for missing columns" do
35
+ minimum = Csvlint::Field.new("test", { "minLength" => 3 } )
36
+ required = Csvlint::Field.new("test2", { "required" => true } )
37
+ schema = Csvlint::Schema.new("http://example.org", [minimum, required] )
38
+
39
+ expect( schema.validate_row( ["abc", "x"] ) ).to eql(true)
40
+
41
+ expect( schema.validate_row( ["abc"] ) ).to eql(false)
42
+ expect( schema.errors.size ).to eql(1)
43
+ expect( schema.errors.first.type).to eql(:missing_value)
44
+
45
+ schema = Csvlint::Schema.new("http://example.org", [required, minimum] )
46
+ expect( schema.validate_row( ["abc"] ) ).to eql(false)
47
+ expect( schema.errors.size ).to eql(1)
48
+ expect( schema.errors.first.type).to eql(:min_length)
49
+ end
50
+
51
+ it "should warn if the data has fewer columns" do
52
+ minimum = Csvlint::Field.new("test", { "minLength" => 3 } )
53
+ required = Csvlint::Field.new("test2", { "maxLength" => 5 } )
54
+ schema = Csvlint::Schema.new("http://example.org", [minimum, required] )
55
+
56
+ expect( schema.validate_row( ["abc"], 1 ) ).to eql(true)
57
+ expect( schema.warnings.size ).to eql(1)
58
+ expect( schema.warnings.first.type).to eql(:missing_column)
59
+ expect( schema.warnings.first.category).to eql(:schema)
60
+ expect( schema.warnings.first.row).to eql(1)
61
+ expect( schema.warnings.first.column).to eql(2)
62
+
63
+ #no ragged row error
64
+ expect( schema.errors.size ).to eql(0)
65
+ end
66
+
67
+ it "should warn if the data has additional columns" do
68
+ minimum = Csvlint::Field.new("test", { "minLength" => 3 } )
69
+ required = Csvlint::Field.new("test2", { "required" => true } )
70
+ schema = Csvlint::Schema.new("http://example.org", [minimum, required] )
71
+
72
+ expect( schema.validate_row( ["abc", "x", "more", "columns"], 1 ) ).to eql(true)
73
+ expect( schema.warnings.size ).to eql(2)
74
+ expect( schema.warnings.first.type).to eql(:extra_column)
75
+ expect( schema.warnings.first.category).to eql(:schema)
76
+ expect( schema.warnings.first.row).to eql(1)
77
+ expect( schema.warnings.first.column).to eql(3)
78
+
79
+ expect( schema.warnings[1].type).to eql(:extra_column)
80
+ expect( schema.warnings[1].column).to eql(4)
81
+
82
+ #no ragged row error
83
+ expect( schema.errors.size ).to eql(0)
84
+ end
85
+
86
+ context "when validating header" do
87
+ it "should warn if column names are different to field names" do
88
+ minimum = Csvlint::Field.new("minimum", { "minLength" => 3 } )
89
+ required = Csvlint::Field.new("required", { "required" => true } )
90
+ schema = Csvlint::Schema.new("http://example.org", [minimum, required] )
91
+
92
+ expect( schema.validate_header(["minimum", "required"]) ).to eql(true)
93
+ expect( schema.warnings.size ).to eql(0)
94
+
95
+ expect( schema.validate_header(["wrong", "required"]) ).to eql(true)
96
+ expect( schema.warnings.size ).to eql(1)
97
+ expect( schema.warnings.first.type).to eql(:header_name)
98
+ expect( schema.warnings.first.content).to eql("wrong")
99
+ expect( schema.warnings.first.column).to eql(1)
100
+ expect( schema.warnings.first.category).to eql(:schema)
101
+
102
+ expect( schema.validate_header(["minimum", "Required"]) ).to eql(true)
103
+ expect( schema.warnings.size ).to eql(1)
104
+
105
+ end
106
+ end
107
+
108
+ context "when parsing JSON Tables" do
109
+
110
+ before(:each) do
111
+ @example=<<-EOL
112
+ {
113
+ "title": "Schema title",
114
+ "description": "schema",
115
+ "fields": [
116
+ { "name": "ID", "constraints": { "required": true }, "title": "id", "description": "house identifier" },
117
+ { "name": "Price", "constraints": { "required": true, "minLength": 1 } },
118
+ { "name": "Postcode", "constraints": { "required": true, "pattern": "[A-Z]{1,2}[0-9][0-9A-Z]? ?[0-9][A-Z]{2}" } }
119
+ ]
120
+ }
121
+ EOL
122
+ stub_request(:get, "http://example.com/example.json").to_return(:status => 200, :body => @example)
123
+ end
124
+
125
+ it "should create a schema from a pre-parsed JSON table" do
126
+ json = JSON.parse( @example )
127
+ schema = Csvlint::Schema.from_json_table("http://example.org", json)
128
+
129
+ expect( schema.uri ).to eql("http://example.org")
130
+ expect( schema.title ).to eql("Schema title")
131
+ expect( schema.description ).to eql("schema")
132
+ expect( schema.fields.length ).to eql(3)
133
+ expect( schema.fields[0].name ).to eql("ID")
134
+ expect( schema.fields[0].constraints["required"] ).to eql(true)
135
+ expect( schema.fields[0].title ).to eql("id")
136
+ expect( schema.fields[0].description ).to eql("house identifier")
137
+ end
138
+
139
+ it "should create a schema from a JSON Table URL" do
140
+ schema = Csvlint::Schema.load_from_json_table("http://example.com/example.json")
141
+ expect( schema.uri ).to eql("http://example.com/example.json")
142
+ expect( schema.fields.length ).to eql(3)
143
+ expect( schema.fields[0].name ).to eql("ID")
144
+ expect( schema.fields[0].constraints["required"] ).to eql(true)
145
+
146
+ end
147
+ end
148
+
149
+ end
@@ -0,0 +1,20 @@
1
+ require 'simplecov'
2
+ require 'simplecov-rcov'
3
+ require 'csvlint'
4
+ require 'pry'
5
+ require 'webmock/rspec'
6
+ require 'coveralls'
7
+
8
+ Coveralls.wear_merged!
9
+
10
+ RSpec.configure do |config|
11
+ config.treat_symbols_as_metadata_keys_with_true_values = true
12
+ config.run_all_when_everything_filtered = true
13
+ config.filter_run :focus
14
+
15
+ # Run specs in random order to surface order dependencies. If you find an
16
+ # order dependency and want to debug it, you can fix the order by providing
17
+ # the seed, which is printed after each run.
18
+ # --seed 1234
19
+ config.order = 'random'
20
+ end
@@ -0,0 +1,279 @@
1
+ require 'spec_helper'
2
+
3
+ describe Csvlint::Validator do
4
+
5
+ before do
6
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :body => "")
7
+ end
8
+
9
+ context "csv dialect" do
10
+ it "should provide sensible defaults for CSV parsing" do
11
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
12
+ opts = validator.instance_variable_get("@csv_options")
13
+ opts.should include({
14
+ :col_sep => ",",
15
+ :row_sep => :auto,
16
+ :quote_char => '"',
17
+ :skip_blanks => false
18
+ })
19
+ end
20
+
21
+ it "should map CSV DDF to correct values" do
22
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
23
+ opts = validator.dialect_to_csv_options( {
24
+ "lineTerminator" => "\n",
25
+ "delimiter" => "\t",
26
+ "quoteChar" => "'"
27
+ })
28
+ opts.should include({
29
+ :col_sep => "\t",
30
+ :row_sep => "\n",
31
+ :quote_char => "'",
32
+ :skip_blanks => false
33
+ })
34
+ end
35
+
36
+ end
37
+
38
+ context "when detecting headers" do
39
+ it "should default to expecting a header" do
40
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
41
+ expect( validator.header? ).to eql(true)
42
+ end
43
+
44
+ it "should look in CSV options to detect header" do
45
+ opts = {
46
+ "header" => true
47
+ }
48
+ validator = Csvlint::Validator.new("http://example.com/example.csv", opts)
49
+ expect( validator.header? ).to eql(true)
50
+ opts = {
51
+ "header" => false
52
+ }
53
+ validator = Csvlint::Validator.new("http://example.com/example.csv", opts)
54
+ expect( validator.header? ).to eql(false)
55
+ end
56
+
57
+ it "should look in content-type for header" do
58
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv; header=absent"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
59
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
60
+ expect( validator.header? ).to eql(false)
61
+
62
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv; header=present"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
63
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
64
+ expect( validator.header? ).to eql(true)
65
+ end
66
+
67
+ end
68
+
69
+ context "when validating headers" do
70
+ it "should warn if column names aren't unique" do
71
+ data = StringIO.new( "minimum, minimum" )
72
+ validator = Csvlint::Validator.new(data)
73
+ expect( validator.validate_header(["minimum", "minimum"]) ).to eql(true)
74
+ expect( validator.warnings.size ).to eql(1)
75
+ expect( validator.warnings.first.type).to eql(:duplicate_column_name)
76
+ expect( validator.warnings.first.category).to eql(:schema)
77
+ end
78
+
79
+ it "should warn if column names are blank" do
80
+ data = StringIO.new( "minimum," )
81
+ validator = Csvlint::Validator.new(data)
82
+
83
+ expect( validator.validate_header(["minimum", ""]) ).to eql(true)
84
+ expect( validator.warnings.size ).to eql(1)
85
+ expect( validator.warnings.first.type).to eql(:empty_column_name)
86
+ expect( validator.warnings.first.category).to eql(:schema)
87
+ end
88
+
89
+ it "should include info message about missing header when we have assumed a header" do
90
+ data = StringIO.new( "1,2,3\r\n" )
91
+ validator = Csvlint::Validator.new(data)
92
+
93
+ expect( validator.valid? ).to eql(true)
94
+ expect( validator.info_messages.size ).to eql(1)
95
+ expect( validator.info_messages.first.type).to eql(:assumed_header)
96
+ expect( validator.info_messages.first.category).to eql(:structure)
97
+ end
98
+
99
+ it "should not include info message about missing header when we are told about the header" do
100
+ data = StringIO.new( "1,2,3\r\n" )
101
+ validator = Csvlint::Validator.new(data, "header"=>false)
102
+
103
+ expect( validator.valid? ).to eql(true)
104
+ expect( validator.info_messages.size ).to eql(0)
105
+ end
106
+
107
+ it "should be an error if we have assumed a header, there is no dialect and there's no content-type" do
108
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
109
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
110
+ expect( validator.valid? ).to eql(false)
111
+ end
112
+
113
+ it "should be an error if we have assumed a header, there is no dialect and content-type doesn't declare header" do
114
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
115
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
116
+ expect( validator.valid? ).to eql(false)
117
+ end
118
+
119
+ it "should be valid if we have a dialect and the data is from the web" do
120
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
121
+ #header defaults to true in csv dialect, so this is valid
122
+ validator = Csvlint::Validator.new("http://example.com/example.csv", {})
123
+ expect( validator.valid? ).to eql(true)
124
+
125
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
126
+ validator = Csvlint::Validator.new("http://example.com/example.csv", {"header"=>true})
127
+ expect( validator.valid? ).to eql(true)
128
+
129
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
130
+ validator = Csvlint::Validator.new("http://example.com/example.csv", {"header"=>false})
131
+ expect( validator.valid? ).to eql(true)
132
+ end
133
+
134
+ end
135
+
136
+ context "build_formats" do
137
+
138
+ {
139
+ "string" => "foo",
140
+ "numeric" => "1",
141
+ "uri" => "http://www.example.com",
142
+ "dateTime_iso8601" => "2013-01-01T13:00:00Z",
143
+ "date_db" => "2013-01-01",
144
+ "dateTime_hms" => "13:00:00"
145
+ }.each do |type, content|
146
+ it "should return the format of #{type} correctly" do
147
+ row = [content]
148
+
149
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
150
+ validator.build_formats(row, 1)
151
+ formats = validator.instance_variable_get("@formats")
152
+
153
+ formats[0].first.should == type
154
+ end
155
+ end
156
+
157
+ it "treats floats and ints the same" do
158
+ row = ["12", "3.1476"]
159
+
160
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
161
+ validator.build_formats(row, 1)
162
+ formats = validator.instance_variable_get("@formats")
163
+
164
+ formats[0].first.should == "numeric"
165
+ formats[1].first.should == "numeric"
166
+ end
167
+
168
+ it "should ignore blank arrays" do
169
+ row = []
170
+
171
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
172
+ validator.build_formats(row, 1)
173
+ formats = validator.instance_variable_get("@formats")
174
+ formats.should == []
175
+ end
176
+
177
+ it "should work correctly for single columns" do
178
+ rows = [
179
+ ["foo"],
180
+ ["bar"],
181
+ ["baz"]
182
+ ]
183
+
184
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
185
+
186
+ rows.each_with_index do |row, i|
187
+ validator.build_formats(row, i)
188
+ end
189
+
190
+ formats = validator.instance_variable_get("@formats")
191
+
192
+ formats.should == [
193
+ ["string",
194
+ "string",
195
+ "string"]
196
+ ]
197
+ end
198
+
199
+ it "should return formats correctly if a row is blank" do
200
+ rows = [
201
+ [],
202
+ ["foo","1","$2345"]
203
+ ]
204
+
205
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
206
+
207
+ rows.each_with_index do |row, i|
208
+ validator.build_formats(row, i)
209
+ end
210
+
211
+ formats = validator.instance_variable_get("@formats")
212
+
213
+ formats.should == [
214
+ ["string"],
215
+ ["numeric"],
216
+ ["string"]
217
+ ]
218
+ end
219
+
220
+ end
221
+
222
+ context "check_consistency" do
223
+
224
+ it "should return a warning if columns have inconsistent values" do
225
+ formats = [
226
+ ["string", "string", "string"],
227
+ ["string", "numeric", "string"],
228
+ ["numeric", "numeric", "numeric"],
229
+ ]
230
+
231
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
232
+ validator.instance_variable_set("@formats", formats)
233
+ validator.check_consistency
234
+
235
+ warnings = validator.instance_variable_get("@warnings")
236
+ warnings.delete_if { |h| h.type != :inconsistent_values }
237
+
238
+ warnings.count.should == 1
239
+ end
240
+
241
+ end
242
+
243
+ context "accessing metadata" do
244
+
245
+ before :all do
246
+ stub_request(:get, "http://example.com/crlf.csv").to_return(:status => 200, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','windows-line-endings.csv')))
247
+ end
248
+
249
+ it "can get line break symbol" do
250
+
251
+ validator = Csvlint::Validator.new("http://example.com/crlf.csv")
252
+ validator.line_breaks.should == "\r\n"
253
+
254
+ end
255
+
256
+ end
257
+
258
+ it "should give access to the complete CSV data file" do
259
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200,
260
+ :headers=>{"Content-Type" => "text/csv; header=present"},
261
+ :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
262
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
263
+ expect( validator.valid? ).to eql(true)
264
+ data = validator.data
265
+ expect( data.count ).to eql 4
266
+ expect( data[0] ).to eql ['Foo','Bar','Baz']
267
+ expect( data[2] ).to eql ['3','2','1']
268
+ end
269
+
270
+ it "should follow redirects to SSL" do
271
+ stub_request(:get, "http://example.com/redirect").to_return(:status => 301, :headers=>{"Location" => "https://example.com/example.csv"})
272
+ stub_request(:get, "https://example.com/example.csv").to_return(:status => 200,
273
+ :headers=>{"Content-Type" => "text/csv; header=present"},
274
+ :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
275
+
276
+ validator = Csvlint::Validator.new("http://example.com/redirect")
277
+ expect( validator.valid? ).to eql(true)
278
+ end
279
+ end