csvlint 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +1 -0
  3. data/.gitignore +22 -0
  4. data/.travis.yml +10 -0
  5. data/Gemfile +7 -0
  6. data/LICENSE.md +22 -0
  7. data/README.md +214 -0
  8. data/Rakefile +17 -0
  9. data/bin/create_schema +32 -0
  10. data/bin/csvlint +52 -0
  11. data/csvlint.gemspec +39 -0
  12. data/features/check_format.feature +46 -0
  13. data/features/csv_options.feature +35 -0
  14. data/features/fixtures/cr-line-endings.csv +1 -0
  15. data/features/fixtures/crlf-line-endings.csv +3 -0
  16. data/features/fixtures/inconsistent-line-endings.csv +2 -0
  17. data/features/fixtures/invalid-byte-sequence.csv +24 -0
  18. data/features/fixtures/lf-line-endings.csv +3 -0
  19. data/features/fixtures/spreadsheet.xls +0 -0
  20. data/features/fixtures/title-row.csv +4 -0
  21. data/features/fixtures/valid.csv +3 -0
  22. data/features/fixtures/windows-line-endings.csv +2 -0
  23. data/features/information.feature +22 -0
  24. data/features/parse_csv.feature +90 -0
  25. data/features/schema_validation.feature +63 -0
  26. data/features/sources.feature +18 -0
  27. data/features/step_definitions/csv_options_steps.rb +19 -0
  28. data/features/step_definitions/information_steps.rb +13 -0
  29. data/features/step_definitions/parse_csv_steps.rb +30 -0
  30. data/features/step_definitions/schema_validation_steps.rb +7 -0
  31. data/features/step_definitions/sources_steps.rb +7 -0
  32. data/features/step_definitions/validation_errors_steps.rb +43 -0
  33. data/features/step_definitions/validation_info_steps.rb +18 -0
  34. data/features/step_definitions/validation_warnings_steps.rb +46 -0
  35. data/features/support/env.rb +30 -0
  36. data/features/support/webmock.rb +1 -0
  37. data/features/validation_errors.feature +151 -0
  38. data/features/validation_info.feature +24 -0
  39. data/features/validation_warnings.feature +74 -0
  40. data/lib/csvlint.rb +13 -0
  41. data/lib/csvlint/error_collector.rb +43 -0
  42. data/lib/csvlint/error_message.rb +15 -0
  43. data/lib/csvlint/field.rb +102 -0
  44. data/lib/csvlint/schema.rb +69 -0
  45. data/lib/csvlint/types.rb +113 -0
  46. data/lib/csvlint/validate.rb +253 -0
  47. data/lib/csvlint/version.rb +3 -0
  48. data/lib/csvlint/wrapped_io.rb +39 -0
  49. data/spec/field_spec.rb +247 -0
  50. data/spec/schema_spec.rb +149 -0
  51. data/spec/spec_helper.rb +20 -0
  52. data/spec/validator_spec.rb +279 -0
  53. metadata +367 -0
@@ -0,0 +1,247 @@
1
+ require 'spec_helper'
2
+
3
+ describe Csvlint::Field do
4
+
5
+ it "should validate required fields" do
6
+ field = Csvlint::Field.new("test", { "required" => true } )
7
+ expect( field.validate_column( nil ) ).to be(false)
8
+ expect( field.errors.first.category ).to be(:schema)
9
+ expect( field.validate_column( "" ) ).to be(false)
10
+ expect( field.validate_column( "data" ) ).to be(true)
11
+ end
12
+
13
+ it "should include the failed constraints" do
14
+ field = Csvlint::Field.new("test", { "required" => true } )
15
+ expect( field.validate_column( nil ) ).to be(false)
16
+ expect( field.errors.first.constraints ).to eql( { "required" => true } )
17
+ end
18
+
19
+ it "should validate minimum length" do
20
+ field = Csvlint::Field.new("test", { "minLength" => 3 } )
21
+ expect( field.validate_column( nil ) ).to be(false)
22
+ expect( field.validate_column( "" ) ).to be(false)
23
+ expect( field.validate_column( "ab" ) ).to be(false)
24
+ expect( field.validate_column( "abc" ) ).to be(true)
25
+ expect( field.validate_column( "abcd" ) ).to be(true)
26
+ end
27
+
28
+ it "should validate maximum length" do
29
+ field = Csvlint::Field.new("test", { "maxLength" => 3 } )
30
+ expect( field.validate_column( nil ) ).to be(true)
31
+ expect( field.validate_column( "" ) ).to be(true)
32
+ expect( field.validate_column( "ab" ) ).to be(true)
33
+ expect( field.validate_column( "abc" ) ).to be(true)
34
+ expect( field.validate_column( "abcd" ) ).to be(false)
35
+ end
36
+
37
+ it "should validate against regex" do
38
+ field = Csvlint::Field.new("test", { "pattern" => "\{[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}\}"} )
39
+ expect( field.validate_column( "abc") ).to be(false)
40
+ expect( field.validate_column( "{3B0DA29C-C89A-4FAA-918A-0000074FA0E0}") ).to be(true)
41
+ end
42
+
43
+ it "should apply combinations of constraints" do
44
+ field = Csvlint::Field.new("test", { "required"=>true, "pattern" => "\{[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}\}"} )
45
+ expect( field.validate_column( "abc") ).to be(false)
46
+ expect( field.errors.first.constraints ).to eql( { "pattern" => "\{[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}\}" } )
47
+
48
+ expect( field.validate_column( nil ) ).to be(false)
49
+ expect( field.errors.first.constraints ).to eql( { "required"=>true } )
50
+
51
+ expect( field.validate_column( "{3B0DA29C-C89A-4FAA-918A-0000074FA0E0}") ).to be(true)
52
+
53
+ end
54
+
55
+ it "should enforce uniqueness for a column" do
56
+ field = Csvlint::Field.new("test", { "unique" => true } )
57
+ expect( field.validate_column( "abc") ).to be(true)
58
+ expect( field.validate_column( "abc") ).to be(false)
59
+ expect( field.errors.first.category ).to be(:schema)
60
+ expect( field.errors.first.type ).to be(:unique)
61
+ end
62
+
63
+ context "it should validate correct types" do
64
+ it "skips empty fields" do
65
+ field = Csvlint::Field.new("test", { "type" => "http://www.w3.org/2001/XMLSchema#int" })
66
+ expect( field.validate_column("")).to be(true)
67
+ end
68
+
69
+ it "validates strings" do
70
+ field = Csvlint::Field.new("test", { "type" => "http://www.w3.org/2001/XMLSchema#string" })
71
+ expect( field.validate_column("42")).to be(true)
72
+ expect( field.validate_column("forty-two")).to be(true)
73
+ end
74
+
75
+ it "validates ints" do
76
+ field = Csvlint::Field.new("test", { "type" => "http://www.w3.org/2001/XMLSchema#int" })
77
+ expect( field.validate_column("42")).to be(true)
78
+ expect( field.validate_column("forty-two")).to be(false)
79
+ end
80
+
81
+ it "validates integers" do
82
+ field = Csvlint::Field.new("test", { "type" => "http://www.w3.org/2001/XMLSchema#integer" })
83
+ expect( field.validate_column("42")).to be(true)
84
+ expect( field.validate_column("forty-two")).to be(false)
85
+ end
86
+
87
+ it "validates floats" do
88
+ field = Csvlint::Field.new("test", { "type" => "http://www.w3.org/2001/XMLSchema#float" })
89
+ expect(field.validate_column("42.0")).to be(true)
90
+ expect(field.validate_column("42")).to be(true)
91
+ expect(field.validate_column("forty-two")).to be(false)
92
+ end
93
+
94
+ it "validates URIs" do
95
+ field = Csvlint::Field.new("test", { "type" => "http://www.w3.org/2001/XMLSchema#anyURI" })
96
+ expect(field.validate_column("http://theodi.org/team")).to be(true)
97
+ expect(field.validate_column("https://theodi.org/team")).to be(true)
98
+ expect(field.validate_column("42.0")).to be(false)
99
+ end
100
+
101
+ it "validates booleans" do
102
+ field = Csvlint::Field.new("test", { "type" => "http://www.w3.org/2001/XMLSchema#boolean" })
103
+ expect(field.validate_column("true")).to be(true)
104
+ expect(field.validate_column("1")).to be(true)
105
+ expect(field.validate_column("false")).to be(true)
106
+ expect(field.validate_column("0")).to be(true)
107
+ expect(field.validate_column("derp")).to be(false)
108
+ end
109
+
110
+ context "it should validate all kinds of integers" do
111
+ it "validates a non-positive integer" do
112
+ field = Csvlint::Field.new("test", { "type" => "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" })
113
+ expect(field.validate_column("0")).to be(true)
114
+ expect(field.validate_column("-1")).to be(true)
115
+ expect(field.validate_column("1")).to be(false)
116
+ end
117
+
118
+ it "validates a negative integer" do
119
+ field = Csvlint::Field.new("test", { "type" => "http://www.w3.org/2001/XMLSchema#negativeInteger" })
120
+ expect(field.validate_column("0")).to be(false)
121
+ expect(field.validate_column("-1")).to be(true)
122
+ expect(field.validate_column("1")).to be(false)
123
+ end
124
+
125
+ it "validates a non-negative integer" do
126
+ field = Csvlint::Field.new("test", { "type" => "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" })
127
+ expect(field.validate_column("0")).to be(true)
128
+ expect(field.validate_column("-1")).to be(false)
129
+ expect(field.validate_column("1")).to be(true)
130
+ end
131
+
132
+ it "validates a positive integer" do
133
+ field = Csvlint::Field.new("test", { "type" => "http://www.w3.org/2001/XMLSchema#positiveInteger" })
134
+ expect(field.validate_column("0")).to be(false)
135
+ expect(field.validate_column("-1")).to be(false)
136
+ expect(field.errors.first.constraints).to eql( { "type" => "http://www.w3.org/2001/XMLSchema#positiveInteger" } )
137
+ expect(field.validate_column("1")).to be(true)
138
+ end
139
+ end
140
+
141
+ context "when validating ranges" do
142
+
143
+ it "should enforce minimum values" do
144
+ field = Csvlint::Field.new("test", {
145
+ "type" => "http://www.w3.org/2001/XMLSchema#int",
146
+ "minimum" => "40"
147
+ })
148
+ expect( field.validate_column("42")).to be(true)
149
+
150
+ field = Csvlint::Field.new("test", {
151
+ "type" => "http://www.w3.org/2001/XMLSchema#int",
152
+ "minimum" => "40"
153
+ })
154
+ expect( field.validate_column("39")).to be(false)
155
+ expect( field.errors.first.type ).to eql(:below_minimum)
156
+ end
157
+
158
+ it "should enforce maximum values" do
159
+ field = Csvlint::Field.new("test", {
160
+ "type" => "http://www.w3.org/2001/XMLSchema#int",
161
+ "maximum" => "40"
162
+ })
163
+ expect( field.validate_column("39")).to be(true)
164
+
165
+ field = Csvlint::Field.new("test", {
166
+ "type" => "http://www.w3.org/2001/XMLSchema#int",
167
+ "maximum" => "40"
168
+ })
169
+ expect( field.validate_column("41")).to be(false)
170
+ expect( field.errors.first.type ).to eql(:above_maximum)
171
+
172
+ end
173
+ end
174
+
175
+ context "when validating dates" do
176
+ it "should validate a date time" do
177
+ field = Csvlint::Field.new("test", {
178
+ "type" => "http://www.w3.org/2001/XMLSchema#dateTime"
179
+ })
180
+ expect( field.validate_column("2014-02-17T11:09:00Z")).to be(true)
181
+ expect( field.validate_column("invalid-date")).to be(false)
182
+ expect( field.validate_column("2014-02-17")).to be(false)
183
+ end
184
+ it "should validate a date" do
185
+ field = Csvlint::Field.new("test", {
186
+ "type" => "http://www.w3.org/2001/XMLSchema#date"
187
+ })
188
+ expect( field.validate_column("2014-02-17T11:09:00Z")).to be(false)
189
+ expect( field.validate_column("invalid-date")).to be(false)
190
+ expect( field.validate_column("2014-02-17")).to be(true)
191
+ end
192
+ it "should validate a time" do
193
+ field = Csvlint::Field.new("test", {
194
+ "type" => "http://www.w3.org/2001/XMLSchema#time"
195
+ })
196
+ expect( field.validate_column("11:09:00")).to be(true)
197
+ expect( field.validate_column("2014-02-17T11:09:00Z")).to be(false)
198
+ expect( field.validate_column("not-a-time")).to be(false)
199
+ expect( field.validate_column("27:97:00")).to be(false)
200
+ end
201
+ it "should validate a year" do
202
+ field = Csvlint::Field.new("test", {
203
+ "type" => "http://www.w3.org/2001/XMLSchema#gYear"
204
+ })
205
+ expect( field.validate_column("1999")).to be(true)
206
+ expect( field.validate_column("2525")).to be(true)
207
+ expect( field.validate_column("0001")).to be(true)
208
+ expect( field.validate_column("2014-02-17T11:09:00Z")).to be(false)
209
+ expect( field.validate_column("not-a-time")).to be(false)
210
+ expect( field.validate_column("27:97:00")).to be(false)
211
+ end
212
+ it "should validate a year-month" do
213
+ field = Csvlint::Field.new("test", {
214
+ "type" => "http://www.w3.org/2001/XMLSchema#gYearMonth"
215
+ })
216
+ expect( field.validate_column("1999-12")).to be(true)
217
+ expect( field.validate_column("2525-01")).to be(true)
218
+ expect( field.validate_column("2014-02-17T11:09:00Z")).to be(false)
219
+ expect( field.validate_column("not-a-time")).to be(false)
220
+ expect( field.validate_column("27:97:00")).to be(false)
221
+ end
222
+ it "should allow user to specify custom date time pattern" do
223
+ field = Csvlint::Field.new("test", {
224
+ "type" => "http://www.w3.org/2001/XMLSchema#dateTime",
225
+ "datePattern" => "%Y-%m-%d %H:%M:%S"
226
+ })
227
+ expect( field.validate_column("1999-12-01 10:00:00")).to be(true)
228
+ expect( field.validate_column("invalid-date")).to be(false)
229
+ expect( field.validate_column("2014-02-17")).to be(false)
230
+ expect( field.errors.first.constraints ).to eql( {
231
+ "type" => "http://www.w3.org/2001/XMLSchema#dateTime",
232
+ "datePattern" => "%Y-%m-%d %H:%M:%S"
233
+ })
234
+
235
+ end
236
+ it "should allow user to compare dates" do
237
+ field = Csvlint::Field.new("test", {
238
+ "type" => "http://www.w3.org/2001/XMLSchema#dateTime",
239
+ "datePattern" => "%Y-%m-%d %H:%M:%S",
240
+ "minimum" => "1990-01-01 10:00:00"
241
+ })
242
+ expect( field.validate_column("1999-12-01 10:00:00")).to be(true)
243
+ expect( field.validate_column("1989-12-01 10:00:00")).to be(false)
244
+ end
245
+ end
246
+ end
247
+ end
@@ -0,0 +1,149 @@
1
+ require 'spec_helper'
2
+
3
+ describe Csvlint::Schema do
4
+
5
+ it "should tolerate missing fields" do
6
+ schema = Csvlint::Schema.from_json_table("http://example.org", {})
7
+ expect( schema ).to_not be(nil)
8
+ expect( schema.fields.empty? ).to eql(true)
9
+ end
10
+
11
+ it "should tolerate fields with no constraints" do
12
+ schema = Csvlint::Schema.from_json_table("http://example.org", {
13
+ "fields" => [ { "name" => "test" } ]
14
+ })
15
+ expect( schema ).to_not be(nil)
16
+ expect( schema.fields[0].name ).to eql("test")
17
+ expect( schema.fields[0].constraints ).to eql({})
18
+ end
19
+
20
+ it "should validate against the schema" do
21
+ field = Csvlint::Field.new("test", { "required" => true } )
22
+ field2 = Csvlint::Field.new("test", { "minLength" => 3 } )
23
+ schema = Csvlint::Schema.new("http://example.org", [field, field2] )
24
+
25
+ expect( schema.validate_row( ["", "x"] ) ).to eql(false)
26
+ expect( schema.errors.size ).to eql(2)
27
+ expect( schema.errors.first.type).to eql(:missing_value)
28
+ expect( schema.errors.first.category).to eql(:schema)
29
+ expect( schema.errors.first.column).to eql(1)
30
+ expect( schema.validate_row( ["abc", "1234"] ) ).to eql(true)
31
+
32
+ end
33
+
34
+ it "should include validations for missing columns" do
35
+ minimum = Csvlint::Field.new("test", { "minLength" => 3 } )
36
+ required = Csvlint::Field.new("test2", { "required" => true } )
37
+ schema = Csvlint::Schema.new("http://example.org", [minimum, required] )
38
+
39
+ expect( schema.validate_row( ["abc", "x"] ) ).to eql(true)
40
+
41
+ expect( schema.validate_row( ["abc"] ) ).to eql(false)
42
+ expect( schema.errors.size ).to eql(1)
43
+ expect( schema.errors.first.type).to eql(:missing_value)
44
+
45
+ schema = Csvlint::Schema.new("http://example.org", [required, minimum] )
46
+ expect( schema.validate_row( ["abc"] ) ).to eql(false)
47
+ expect( schema.errors.size ).to eql(1)
48
+ expect( schema.errors.first.type).to eql(:min_length)
49
+ end
50
+
51
+ it "should warn if the data has fewer columns" do
52
+ minimum = Csvlint::Field.new("test", { "minLength" => 3 } )
53
+ required = Csvlint::Field.new("test2", { "maxLength" => 5 } )
54
+ schema = Csvlint::Schema.new("http://example.org", [minimum, required] )
55
+
56
+ expect( schema.validate_row( ["abc"], 1 ) ).to eql(true)
57
+ expect( schema.warnings.size ).to eql(1)
58
+ expect( schema.warnings.first.type).to eql(:missing_column)
59
+ expect( schema.warnings.first.category).to eql(:schema)
60
+ expect( schema.warnings.first.row).to eql(1)
61
+ expect( schema.warnings.first.column).to eql(2)
62
+
63
+ #no ragged row error
64
+ expect( schema.errors.size ).to eql(0)
65
+ end
66
+
67
+ it "should warn if the data has additional columns" do
68
+ minimum = Csvlint::Field.new("test", { "minLength" => 3 } )
69
+ required = Csvlint::Field.new("test2", { "required" => true } )
70
+ schema = Csvlint::Schema.new("http://example.org", [minimum, required] )
71
+
72
+ expect( schema.validate_row( ["abc", "x", "more", "columns"], 1 ) ).to eql(true)
73
+ expect( schema.warnings.size ).to eql(2)
74
+ expect( schema.warnings.first.type).to eql(:extra_column)
75
+ expect( schema.warnings.first.category).to eql(:schema)
76
+ expect( schema.warnings.first.row).to eql(1)
77
+ expect( schema.warnings.first.column).to eql(3)
78
+
79
+ expect( schema.warnings[1].type).to eql(:extra_column)
80
+ expect( schema.warnings[1].column).to eql(4)
81
+
82
+ #no ragged row error
83
+ expect( schema.errors.size ).to eql(0)
84
+ end
85
+
86
+ context "when validating header" do
87
+ it "should warn if column names are different to field names" do
88
+ minimum = Csvlint::Field.new("minimum", { "minLength" => 3 } )
89
+ required = Csvlint::Field.new("required", { "required" => true } )
90
+ schema = Csvlint::Schema.new("http://example.org", [minimum, required] )
91
+
92
+ expect( schema.validate_header(["minimum", "required"]) ).to eql(true)
93
+ expect( schema.warnings.size ).to eql(0)
94
+
95
+ expect( schema.validate_header(["wrong", "required"]) ).to eql(true)
96
+ expect( schema.warnings.size ).to eql(1)
97
+ expect( schema.warnings.first.type).to eql(:header_name)
98
+ expect( schema.warnings.first.content).to eql("wrong")
99
+ expect( schema.warnings.first.column).to eql(1)
100
+ expect( schema.warnings.first.category).to eql(:schema)
101
+
102
+ expect( schema.validate_header(["minimum", "Required"]) ).to eql(true)
103
+ expect( schema.warnings.size ).to eql(1)
104
+
105
+ end
106
+ end
107
+
108
+ context "when parsing JSON Tables" do
109
+
110
+ before(:each) do
111
+ @example=<<-EOL
112
+ {
113
+ "title": "Schema title",
114
+ "description": "schema",
115
+ "fields": [
116
+ { "name": "ID", "constraints": { "required": true }, "title": "id", "description": "house identifier" },
117
+ { "name": "Price", "constraints": { "required": true, "minLength": 1 } },
118
+ { "name": "Postcode", "constraints": { "required": true, "pattern": "[A-Z]{1,2}[0-9][0-9A-Z]? ?[0-9][A-Z]{2}" } }
119
+ ]
120
+ }
121
+ EOL
122
+ stub_request(:get, "http://example.com/example.json").to_return(:status => 200, :body => @example)
123
+ end
124
+
125
+ it "should create a schema from a pre-parsed JSON table" do
126
+ json = JSON.parse( @example )
127
+ schema = Csvlint::Schema.from_json_table("http://example.org", json)
128
+
129
+ expect( schema.uri ).to eql("http://example.org")
130
+ expect( schema.title ).to eql("Schema title")
131
+ expect( schema.description ).to eql("schema")
132
+ expect( schema.fields.length ).to eql(3)
133
+ expect( schema.fields[0].name ).to eql("ID")
134
+ expect( schema.fields[0].constraints["required"] ).to eql(true)
135
+ expect( schema.fields[0].title ).to eql("id")
136
+ expect( schema.fields[0].description ).to eql("house identifier")
137
+ end
138
+
139
+ it "should create a schema from a JSON Table URL" do
140
+ schema = Csvlint::Schema.load_from_json_table("http://example.com/example.json")
141
+ expect( schema.uri ).to eql("http://example.com/example.json")
142
+ expect( schema.fields.length ).to eql(3)
143
+ expect( schema.fields[0].name ).to eql("ID")
144
+ expect( schema.fields[0].constraints["required"] ).to eql(true)
145
+
146
+ end
147
+ end
148
+
149
+ end
@@ -0,0 +1,20 @@
1
+ require 'simplecov'
2
+ require 'simplecov-rcov'
3
+ require 'csvlint'
4
+ require 'pry'
5
+ require 'webmock/rspec'
6
+ require 'coveralls'
7
+
8
+ Coveralls.wear_merged!
9
+
10
+ RSpec.configure do |config|
11
+ config.treat_symbols_as_metadata_keys_with_true_values = true
12
+ config.run_all_when_everything_filtered = true
13
+ config.filter_run :focus
14
+
15
+ # Run specs in random order to surface order dependencies. If you find an
16
+ # order dependency and want to debug it, you can fix the order by providing
17
+ # the seed, which is printed after each run.
18
+ # --seed 1234
19
+ config.order = 'random'
20
+ end
@@ -0,0 +1,279 @@
1
+ require 'spec_helper'
2
+
3
+ describe Csvlint::Validator do
4
+
5
+ before do
6
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :body => "")
7
+ end
8
+
9
+ context "csv dialect" do
10
+ it "should provide sensible defaults for CSV parsing" do
11
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
12
+ opts = validator.instance_variable_get("@csv_options")
13
+ opts.should include({
14
+ :col_sep => ",",
15
+ :row_sep => :auto,
16
+ :quote_char => '"',
17
+ :skip_blanks => false
18
+ })
19
+ end
20
+
21
+ it "should map CSV DDF to correct values" do
22
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
23
+ opts = validator.dialect_to_csv_options( {
24
+ "lineTerminator" => "\n",
25
+ "delimiter" => "\t",
26
+ "quoteChar" => "'"
27
+ })
28
+ opts.should include({
29
+ :col_sep => "\t",
30
+ :row_sep => "\n",
31
+ :quote_char => "'",
32
+ :skip_blanks => false
33
+ })
34
+ end
35
+
36
+ end
37
+
38
+ context "when detecting headers" do
39
+ it "should default to expecting a header" do
40
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
41
+ expect( validator.header? ).to eql(true)
42
+ end
43
+
44
+ it "should look in CSV options to detect header" do
45
+ opts = {
46
+ "header" => true
47
+ }
48
+ validator = Csvlint::Validator.new("http://example.com/example.csv", opts)
49
+ expect( validator.header? ).to eql(true)
50
+ opts = {
51
+ "header" => false
52
+ }
53
+ validator = Csvlint::Validator.new("http://example.com/example.csv", opts)
54
+ expect( validator.header? ).to eql(false)
55
+ end
56
+
57
+ it "should look in content-type for header" do
58
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv; header=absent"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
59
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
60
+ expect( validator.header? ).to eql(false)
61
+
62
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv; header=present"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
63
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
64
+ expect( validator.header? ).to eql(true)
65
+ end
66
+
67
+ end
68
+
69
+ context "when validating headers" do
70
+ it "should warn if column names aren't unique" do
71
+ data = StringIO.new( "minimum, minimum" )
72
+ validator = Csvlint::Validator.new(data)
73
+ expect( validator.validate_header(["minimum", "minimum"]) ).to eql(true)
74
+ expect( validator.warnings.size ).to eql(1)
75
+ expect( validator.warnings.first.type).to eql(:duplicate_column_name)
76
+ expect( validator.warnings.first.category).to eql(:schema)
77
+ end
78
+
79
+ it "should warn if column names are blank" do
80
+ data = StringIO.new( "minimum," )
81
+ validator = Csvlint::Validator.new(data)
82
+
83
+ expect( validator.validate_header(["minimum", ""]) ).to eql(true)
84
+ expect( validator.warnings.size ).to eql(1)
85
+ expect( validator.warnings.first.type).to eql(:empty_column_name)
86
+ expect( validator.warnings.first.category).to eql(:schema)
87
+ end
88
+
89
+ it "should include info message about missing header when we have assumed a header" do
90
+ data = StringIO.new( "1,2,3\r\n" )
91
+ validator = Csvlint::Validator.new(data)
92
+
93
+ expect( validator.valid? ).to eql(true)
94
+ expect( validator.info_messages.size ).to eql(1)
95
+ expect( validator.info_messages.first.type).to eql(:assumed_header)
96
+ expect( validator.info_messages.first.category).to eql(:structure)
97
+ end
98
+
99
+ it "should not include info message about missing header when we are told about the header" do
100
+ data = StringIO.new( "1,2,3\r\n" )
101
+ validator = Csvlint::Validator.new(data, "header"=>false)
102
+
103
+ expect( validator.valid? ).to eql(true)
104
+ expect( validator.info_messages.size ).to eql(0)
105
+ end
106
+
107
+ it "should be an error if we have assumed a header, there is no dialect and there's no content-type" do
108
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
109
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
110
+ expect( validator.valid? ).to eql(false)
111
+ end
112
+
113
+ it "should be an error if we have assumed a header, there is no dialect and content-type doesn't declare header" do
114
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
115
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
116
+ expect( validator.valid? ).to eql(false)
117
+ end
118
+
119
+ it "should be valid if we have a dialect and the data is from the web" do
120
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
121
+ #header defaults to true in csv dialect, so this is valid
122
+ validator = Csvlint::Validator.new("http://example.com/example.csv", {})
123
+ expect( validator.valid? ).to eql(true)
124
+
125
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
126
+ validator = Csvlint::Validator.new("http://example.com/example.csv", {"header"=>true})
127
+ expect( validator.valid? ).to eql(true)
128
+
129
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
130
+ validator = Csvlint::Validator.new("http://example.com/example.csv", {"header"=>false})
131
+ expect( validator.valid? ).to eql(true)
132
+ end
133
+
134
+ end
135
+
136
+ context "build_formats" do
137
+
138
+ {
139
+ "string" => "foo",
140
+ "numeric" => "1",
141
+ "uri" => "http://www.example.com",
142
+ "dateTime_iso8601" => "2013-01-01T13:00:00Z",
143
+ "date_db" => "2013-01-01",
144
+ "dateTime_hms" => "13:00:00"
145
+ }.each do |type, content|
146
+ it "should return the format of #{type} correctly" do
147
+ row = [content]
148
+
149
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
150
+ validator.build_formats(row, 1)
151
+ formats = validator.instance_variable_get("@formats")
152
+
153
+ formats[0].first.should == type
154
+ end
155
+ end
156
+
157
+ it "treats floats and ints the same" do
158
+ row = ["12", "3.1476"]
159
+
160
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
161
+ validator.build_formats(row, 1)
162
+ formats = validator.instance_variable_get("@formats")
163
+
164
+ formats[0].first.should == "numeric"
165
+ formats[1].first.should == "numeric"
166
+ end
167
+
168
+ it "should ignore blank arrays" do
169
+ row = []
170
+
171
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
172
+ validator.build_formats(row, 1)
173
+ formats = validator.instance_variable_get("@formats")
174
+ formats.should == []
175
+ end
176
+
177
+ it "should work correctly for single columns" do
178
+ rows = [
179
+ ["foo"],
180
+ ["bar"],
181
+ ["baz"]
182
+ ]
183
+
184
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
185
+
186
+ rows.each_with_index do |row, i|
187
+ validator.build_formats(row, i)
188
+ end
189
+
190
+ formats = validator.instance_variable_get("@formats")
191
+
192
+ formats.should == [
193
+ ["string",
194
+ "string",
195
+ "string"]
196
+ ]
197
+ end
198
+
199
+ it "should return formats correctly if a row is blank" do
200
+ rows = [
201
+ [],
202
+ ["foo","1","$2345"]
203
+ ]
204
+
205
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
206
+
207
+ rows.each_with_index do |row, i|
208
+ validator.build_formats(row, i)
209
+ end
210
+
211
+ formats = validator.instance_variable_get("@formats")
212
+
213
+ formats.should == [
214
+ ["string"],
215
+ ["numeric"],
216
+ ["string"]
217
+ ]
218
+ end
219
+
220
+ end
221
+
222
+ context "check_consistency" do
223
+
224
+ it "should return a warning if columns have inconsistent values" do
225
+ formats = [
226
+ ["string", "string", "string"],
227
+ ["string", "numeric", "string"],
228
+ ["numeric", "numeric", "numeric"],
229
+ ]
230
+
231
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
232
+ validator.instance_variable_set("@formats", formats)
233
+ validator.check_consistency
234
+
235
+ warnings = validator.instance_variable_get("@warnings")
236
+ warnings.delete_if { |h| h.type != :inconsistent_values }
237
+
238
+ warnings.count.should == 1
239
+ end
240
+
241
+ end
242
+
243
+ context "accessing metadata" do
244
+
245
+ before :all do
246
+ stub_request(:get, "http://example.com/crlf.csv").to_return(:status => 200, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','windows-line-endings.csv')))
247
+ end
248
+
249
+ it "can get line break symbol" do
250
+
251
+ validator = Csvlint::Validator.new("http://example.com/crlf.csv")
252
+ validator.line_breaks.should == "\r\n"
253
+
254
+ end
255
+
256
+ end
257
+
258
+ it "should give access to the complete CSV data file" do
259
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200,
260
+ :headers=>{"Content-Type" => "text/csv; header=present"},
261
+ :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
262
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
263
+ expect( validator.valid? ).to eql(true)
264
+ data = validator.data
265
+ expect( data.count ).to eql 4
266
+ expect( data[0] ).to eql ['Foo','Bar','Baz']
267
+ expect( data[2] ).to eql ['3','2','1']
268
+ end
269
+
270
+ it "should follow redirects to SSL" do
271
+ stub_request(:get, "http://example.com/redirect").to_return(:status => 301, :headers=>{"Location" => "https://example.com/example.csv"})
272
+ stub_request(:get, "https://example.com/example.csv").to_return(:status => 200,
273
+ :headers=>{"Content-Type" => "text/csv; header=present"},
274
+ :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
275
+
276
+ validator = Csvlint::Validator.new("http://example.com/redirect")
277
+ expect( validator.valid? ).to eql(true)
278
+ end
279
+ end