wjordan213-csvlint 0.2.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (77) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +1 -0
  3. data/.gitattributes +2 -0
  4. data/.gitignore +28 -0
  5. data/.ruby-version +1 -0
  6. data/.travis.yml +32 -0
  7. data/CHANGELOG.md +361 -0
  8. data/Gemfile +7 -0
  9. data/LICENSE.md +22 -0
  10. data/README.md +328 -0
  11. data/Rakefile +17 -0
  12. data/bin/create_schema +32 -0
  13. data/bin/csvlint +10 -0
  14. data/features/check_format.feature +46 -0
  15. data/features/cli.feature +210 -0
  16. data/features/csv_options.feature +35 -0
  17. data/features/csvupload.feature +145 -0
  18. data/features/csvw_schema_validation.feature +127 -0
  19. data/features/fixtures/cr-line-endings.csv +0 -0
  20. data/features/fixtures/crlf-line-endings.csv +0 -0
  21. data/features/fixtures/inconsistent-line-endings-unquoted.csv +0 -0
  22. data/features/fixtures/inconsistent-line-endings.csv +0 -0
  23. data/features/fixtures/invalid-byte-sequence.csv +0 -0
  24. data/features/fixtures/invalid_many_rows.csv +0 -0
  25. data/features/fixtures/lf-line-endings.csv +0 -0
  26. data/features/fixtures/spreadsheet.xls +0 -0
  27. data/features/fixtures/spreadsheet.xlsx +0 -0
  28. data/features/fixtures/title-row.csv +0 -0
  29. data/features/fixtures/valid.csv +0 -0
  30. data/features/fixtures/valid_many_rows.csv +0 -0
  31. data/features/fixtures/windows-line-endings.csv +0 -0
  32. data/features/information.feature +22 -0
  33. data/features/parse_csv.feature +90 -0
  34. data/features/schema_validation.feature +105 -0
  35. data/features/sources.feature +17 -0
  36. data/features/step_definitions/cli_steps.rb +11 -0
  37. data/features/step_definitions/csv_options_steps.rb +24 -0
  38. data/features/step_definitions/information_steps.rb +13 -0
  39. data/features/step_definitions/parse_csv_steps.rb +42 -0
  40. data/features/step_definitions/schema_validation_steps.rb +33 -0
  41. data/features/step_definitions/sources_steps.rb +7 -0
  42. data/features/step_definitions/validation_errors_steps.rb +90 -0
  43. data/features/step_definitions/validation_info_steps.rb +22 -0
  44. data/features/step_definitions/validation_warnings_steps.rb +60 -0
  45. data/features/support/aruba.rb +56 -0
  46. data/features/support/env.rb +26 -0
  47. data/features/support/load_tests.rb +114 -0
  48. data/features/support/webmock.rb +1 -0
  49. data/features/validation_errors.feature +147 -0
  50. data/features/validation_info.feature +16 -0
  51. data/features/validation_warnings.feature +86 -0
  52. data/lib/csvlint.rb +27 -0
  53. data/lib/csvlint/cli.rb +165 -0
  54. data/lib/csvlint/csvw/column.rb +359 -0
  55. data/lib/csvlint/csvw/date_format.rb +182 -0
  56. data/lib/csvlint/csvw/metadata_error.rb +13 -0
  57. data/lib/csvlint/csvw/number_format.rb +211 -0
  58. data/lib/csvlint/csvw/property_checker.rb +761 -0
  59. data/lib/csvlint/csvw/table.rb +204 -0
  60. data/lib/csvlint/csvw/table_group.rb +165 -0
  61. data/lib/csvlint/error_collector.rb +27 -0
  62. data/lib/csvlint/error_message.rb +15 -0
  63. data/lib/csvlint/field.rb +196 -0
  64. data/lib/csvlint/schema.rb +92 -0
  65. data/lib/csvlint/validate.rb +599 -0
  66. data/lib/csvlint/version.rb +3 -0
  67. data/spec/csvw/column_spec.rb +112 -0
  68. data/spec/csvw/date_format_spec.rb +49 -0
  69. data/spec/csvw/number_format_spec.rb +417 -0
  70. data/spec/csvw/table_group_spec.rb +143 -0
  71. data/spec/csvw/table_spec.rb +90 -0
  72. data/spec/field_spec.rb +252 -0
  73. data/spec/schema_spec.rb +211 -0
  74. data/spec/spec_helper.rb +17 -0
  75. data/spec/validator_spec.rb +619 -0
  76. data/wjordan213_csvlint.gemspec +46 -0
  77. metadata +490 -0
@@ -0,0 +1,211 @@
1
+ require 'spec_helper'
2
+
3
+ describe Csvlint::Schema do
4
+
5
+ it "should tolerate missing fields" do
6
+ schema = Csvlint::Schema.from_json_table("http://example.org", {})
7
+ expect( schema ).to_not be(nil)
8
+ expect( schema.fields.empty? ).to eql(true)
9
+ end
10
+
11
+ it "should tolerate fields with no constraints" do
12
+ schema = Csvlint::Schema.from_json_table("http://example.org", {
13
+ "fields" => [ { "name" => "test" } ]
14
+ })
15
+ expect( schema ).to_not be(nil)
16
+ expect( schema.fields[0].name ).to eql("test")
17
+ expect( schema.fields[0].constraints ).to eql({})
18
+ end
19
+
20
+ it "should validate against the schema" do
21
+ field = Csvlint::Field.new("test", { "required" => true } )
22
+ field2 = Csvlint::Field.new("test", { "minLength" => 3 } )
23
+ schema = Csvlint::Schema.new("http://example.org", [field, field2] )
24
+
25
+ expect( schema.validate_row( ["", "x"] ) ).to eql(false)
26
+ expect( schema.errors.size ).to eql(2)
27
+ expect( schema.errors.first.type).to eql(:missing_value)
28
+ expect( schema.errors.first.category).to eql(:schema)
29
+ expect( schema.errors.first.column).to eql(1)
30
+ expect( schema.validate_row( ["abc", "1234"] ) ).to eql(true)
31
+
32
+ end
33
+
34
+ it "should include validations for missing columns" do
35
+ minimum = Csvlint::Field.new("test", { "minLength" => 3 } )
36
+ required = Csvlint::Field.new("test2", { "required" => true } )
37
+ schema = Csvlint::Schema.new("http://example.org", [minimum, required] )
38
+
39
+ expect( schema.validate_row( ["abc", "x"] ) ).to eql(true)
40
+
41
+ expect( schema.validate_row( ["abc"] ) ).to eql(false)
42
+ expect( schema.errors.size ).to eql(1)
43
+ expect( schema.errors.first.type).to eql(:missing_value)
44
+
45
+ schema = Csvlint::Schema.new("http://example.org", [required, minimum] )
46
+ expect( schema.validate_row( ["abc"] ) ).to eql(false)
47
+ expect( schema.errors.size ).to eql(1)
48
+ expect( schema.errors.first.type).to eql(:min_length)
49
+ end
50
+
51
+ it "should warn if the data has fewer columns" do
52
+ minimum = Csvlint::Field.new("test", { "minLength" => 3 } )
53
+ required = Csvlint::Field.new("test2", { "maxLength" => 5 } )
54
+ schema = Csvlint::Schema.new("http://example.org", [minimum, required] )
55
+
56
+ expect( schema.validate_row( ["abc"], 1 ) ).to eql(true)
57
+ expect( schema.warnings.size ).to eql(1)
58
+ expect( schema.warnings.first.type).to eql(:missing_column)
59
+ expect( schema.warnings.first.category).to eql(:schema)
60
+ expect( schema.warnings.first.row).to eql(1)
61
+ expect( schema.warnings.first.column).to eql(2)
62
+
63
+ #no ragged row error
64
+ expect( schema.errors.size ).to eql(0)
65
+ end
66
+
67
+ it "should warn if the data has additional columns" do
68
+ minimum = Csvlint::Field.new("test", { "minLength" => 3 } )
69
+ required = Csvlint::Field.new("test2", { "required" => true } )
70
+ schema = Csvlint::Schema.new("http://example.org", [minimum, required] )
71
+
72
+ expect( schema.validate_row( ["abc", "x", "more", "columns"], 1 ) ).to eql(true)
73
+ expect( schema.warnings.size ).to eql(2)
74
+ expect( schema.warnings.first.type).to eql(:extra_column)
75
+ expect( schema.warnings.first.category).to eql(:schema)
76
+ expect( schema.warnings.first.row).to eql(1)
77
+ expect( schema.warnings.first.column).to eql(3)
78
+
79
+ expect( schema.warnings[1].type).to eql(:extra_column)
80
+ expect( schema.warnings[1].column).to eql(4)
81
+
82
+ #no ragged row error
83
+ expect( schema.errors.size ).to eql(0)
84
+ end
85
+
86
+ context "when validating header" do
87
+ it "should warn if column names are different to field names" do
88
+ minimum = Csvlint::Field.new("minimum", { "minLength" => 3 } )
89
+ required = Csvlint::Field.new("required", { "required" => true } )
90
+ schema = Csvlint::Schema.new("http://example.org", [minimum, required] )
91
+
92
+ expect( schema.validate_header(["minimum", "required"]) ).to eql(true)
93
+ expect( schema.warnings.size ).to eql(0)
94
+
95
+ expect( schema.validate_header(["wrong", "required"]) ).to eql(true)
96
+ expect( schema.warnings.size ).to eql(1)
97
+ expect( schema.warnings.first.row ).to eql(1)
98
+ expect( schema.warnings.first.type ).to eql(:malformed_header)
99
+ expect( schema.warnings.first.content ).to eql('wrong,required')
100
+ expect( schema.warnings.first.column ).to eql(nil)
101
+ expect( schema.warnings.first.category ).to eql(:schema)
102
+ expect schema.warnings.first.constraints.has_value?('minimum,required')
103
+ # expect( schema.warnings.first.constraints.values ).to eql(['minimum,required'])
104
+ expect( schema.validate_header(["minimum", "Required"]) ).to eql(true)
105
+ expect( schema.warnings.size ).to eql(1)
106
+
107
+ end
108
+
109
+ it "should warn if column count is less than field count" do
110
+ minimum = Csvlint::Field.new("minimum", { "minLength" => 3 } )
111
+ required = Csvlint::Field.new("required", { "required" => true } )
112
+ schema = Csvlint::Schema.new("http://example.org", [minimum, required] )
113
+
114
+ expect( schema.validate_header(["minimum"]) ).to eql(true)
115
+ expect( schema.warnings.size ).to eql(1)
116
+ expect( schema.warnings.first.row ).to eql(1)
117
+ expect( schema.warnings.first.type ).to eql(:malformed_header)
118
+ expect( schema.warnings.first.content ).to eql("minimum")
119
+ expect( schema.warnings.first.column ).to eql(nil)
120
+ expect( schema.warnings.first.category ).to eql(:schema)
121
+ expect schema.warnings.first.constraints.has_value?('minimum,required')
122
+ # expect( schema.warnings.first.constraints.values ).to eql(['minimum,required'])
123
+
124
+ end
125
+
126
+ it "should warn if column count is more than field count" do
127
+ minimum = Csvlint::Field.new("minimum", { "minLength" => 3 } )
128
+ schema = Csvlint::Schema.new("http://example.org", [minimum] )
129
+
130
+ expect( schema.validate_header(["wrong", "required"]) ).to eql(true)
131
+ expect( schema.warnings.size ).to eql(1)
132
+ expect( schema.warnings.first.row ).to eql(1)
133
+ expect( schema.warnings.first.type ).to eql(:malformed_header)
134
+ expect( schema.warnings.first.content ).to eql("wrong,required")
135
+ expect( schema.warnings.first.column ).to eql(nil)
136
+ expect( schema.warnings.first.category ).to eql(:schema)
137
+ # expect( schema.warnings.first.constraints.values ).to eql('minimum')
138
+ expect( schema.warnings.first.constraints.has_value?('minimum'))
139
+
140
+ end
141
+
142
+ end
143
+
144
+ context "when parsing JSON Tables" do
145
+
146
+ before(:each) do
147
+ @example=<<-EOL
148
+ {
149
+ "title": "Schema title",
150
+ "description": "schema",
151
+ "fields": [
152
+ { "name": "ID", "constraints": { "required": true }, "title": "id", "description": "house identifier" },
153
+ { "name": "Price", "constraints": { "required": true, "minLength": 1 } },
154
+ { "name": "Postcode", "constraints": { "required": true, "pattern": "[A-Z]{1,2}[0-9][0-9A-Z]? ?[0-9][A-Z]{2}" } }
155
+ ]
156
+ }
157
+ EOL
158
+ stub_request(:get, "http://example.com/example.json").to_return(:status => 200, :body => @example)
159
+ end
160
+
161
+ it "should create a schema from a pre-parsed JSON table" do
162
+ json = JSON.parse( @example )
163
+ schema = Csvlint::Schema.from_json_table("http://example.org", json)
164
+
165
+ expect( schema.uri ).to eql("http://example.org")
166
+ expect( schema.title ).to eql("Schema title")
167
+ expect( schema.description ).to eql("schema")
168
+ expect( schema.fields.length ).to eql(3)
169
+ expect( schema.fields[0].name ).to eql("ID")
170
+ expect( schema.fields[0].constraints["required"] ).to eql(true)
171
+ expect( schema.fields[0].title ).to eql("id")
172
+ expect( schema.fields[0].description ).to eql("house identifier")
173
+ expect( schema.fields[2].constraints["pattern"]).to eql("[A-Z]{1,2}[0-9][0-9A-Z]? ?[0-9][A-Z]{2}")
174
+ end
175
+
176
+ it "should create a schema from a JSON Table URL" do
177
+ schema = Csvlint::Schema.load_from_json("http://example.com/example.json")
178
+ expect( schema.uri ).to eql("http://example.com/example.json")
179
+ expect( schema.fields.length ).to eql(3)
180
+ expect( schema.fields[0].name ).to eql("ID")
181
+ expect( schema.fields[0].constraints["required"] ).to eql(true)
182
+
183
+ end
184
+ end
185
+
186
+ context "when parsing CSVW metadata" do
187
+
188
+ before(:each) do
189
+ @example=<<-EOL
190
+ {
191
+ "@context": "http://www.w3.org/ns/csvw",
192
+ "url": "http://example.com/example1.csv",
193
+ "tableSchema": {
194
+ "columns": [
195
+ { "name": "Name", "required": true, "datatype": { "base": "string", "format": ".+" } },
196
+ { "name": "Id", "required": true, "datatype": { "base": "string", "minLength": 3 } },
197
+ { "name": "Email", "required": true }
198
+ ]
199
+ }
200
+ }
201
+ EOL
202
+ stub_request(:get, "http://example.com/metadata.json").to_return(:status => 200, :body => @example)
203
+ end
204
+
205
+ it "should create a table group from a CSVW metadata URL" do
206
+ schema = Csvlint::Schema.load_from_json("http://example.com/metadata.json")
207
+ expect( schema.class ).to eq(Csvlint::Csvw::TableGroup)
208
+ end
209
+ end
210
+
211
+ end
@@ -0,0 +1,17 @@
1
+ require 'coveralls'
2
+ Coveralls.wear_merged!('test_frameworks')
3
+
4
+ require 'csvlint'
5
+ require 'pry'
6
+ require 'webmock/rspec'
7
+
8
+ RSpec.configure do |config|
9
+ config.run_all_when_everything_filtered = true
10
+ config.filter_run :focus
11
+
12
+ # Run specs in random order to surface order dependencies. If you find an
13
+ # order dependency and want to debug it, you can fix the order by providing
14
+ # the seed, which is printed after each run.
15
+ # --seed 1234
16
+ config.order = 'random'
17
+ end
@@ -0,0 +1,619 @@
1
+ require 'spec_helper'
2
+
3
+ describe Csvlint::Validator do
4
+
5
+ before do
6
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :body => "")
7
+ stub_request(:get, "http://example.com/.well-known/csvm").to_return(:status => 404)
8
+ stub_request(:get, "http://example.com/example.csv-metadata.json").to_return(:status => 404)
9
+ stub_request(:get, "http://example.com/csv-metadata.json").to_return(:status => 404)
10
+ end
11
+
12
+ it "should validate from a URL" do
13
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
14
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
15
+
16
+ expect(validator.valid?).to eql(true)
17
+ expect(validator.instance_variable_get("@expected_columns")).to eql(3)
18
+ expect(validator.instance_variable_get("@col_counts").count).to eql(3)
19
+ expect(validator.data.size).to eql(3)
20
+ end
21
+
22
+ it "should validate from a file path" do
23
+ validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
24
+
25
+ expect(validator.valid?).to eql(true)
26
+ expect(validator.instance_variable_get("@expected_columns")).to eql(3)
27
+ expect(validator.instance_variable_get("@col_counts").count).to eql(3)
28
+ expect(validator.data.size).to eql(3)
29
+ end
30
+
31
+ context "multi line CSV validation with included schema" do
32
+
33
+ end
34
+
35
+ context "single line row validation with included schema" do
36
+
37
+ end
38
+
39
+ context "validation with multiple lines: " do
40
+
41
+ # TODO multiple lines permits testing of warnings
42
+ # TODO need more assertions in each test IE @formats
43
+ # TODO the phrasing of col_counts if only consulting specs might be confusing
44
+ # TODO ^-> col_counts and data.size should be equivalent, but only data is populated outside of if row.nil?
45
+ # TODO ^- -> and its less the size of col_counts than the homogeneity of its contents which is important
46
+
47
+ it ".each() -> parse_contents method validates a well formed CSV" do
48
+ # when invoking parse contents
49
+ data = StringIO.new("\"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"3\",\"2\",\"1\"")
50
+
51
+ validator = Csvlint::Validator.new(data)
52
+
53
+ expect(validator.valid?).to eql(true)
54
+ # TODO would be beneficial to know how formats functions WRT to headers - check_format.feature:17 returns 3 rows total
55
+ # TODO in its formats object but is provided with 5 rows (with one nil row) [uses validation_warnings_steps.rb]
56
+ expect(validator.instance_variable_get("@expected_columns")).to eql(3)
57
+ expect(validator.instance_variable_get("@col_counts").count).to eql(4)
58
+ expect(validator.data.size).to eql(4)
59
+
60
+ end
61
+
62
+ it ".each() -> `parse_contents` parses malformed CSV and catches unclosed quote" do
63
+ # doesn't build warnings because check_consistency isn't invoked
64
+ # TODO below is trailing whitespace but is interpreted as an unclosed quote
65
+ data = StringIO.new("\"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"3\",\"2\",\"1\" ")
66
+
67
+ validator = Csvlint::Validator.new(data)
68
+
69
+ expect(validator.valid?).to eql(false)
70
+ expect(validator.errors.first.type).to eql(:unclosed_quote)
71
+ expect(validator.errors.count).to eql(1)
72
+ end
73
+
74
+ it ".each() -> `parse_contents` parses malformed CSV and catches whitespace and edge case" do
75
+ # when this data gets passed the header it rescues a whitespace error, resulting in the header row being discarded
76
+ # TODO - check if this is an edge case, currently passing because it requires advice on how to specify
77
+ data = StringIO.new(" \"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"Foo\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"3\",\"2\",\"1\" ")
78
+
79
+ validator = Csvlint::Validator.new(data)
80
+
81
+ expect(validator.valid?).to eql(false)
82
+ expect(validator.errors.first.type).to eql(:whitespace)
83
+ expect(validator.errors.count).to eql(2)
84
+ end
85
+
86
+ it "handles line breaks within a cell" do
87
+ data = StringIO.new("\"a\",\"b\",\"c\"\r\n\"d\",\"e\",\"this is\r\nvalid\"\r\n\"a\",\"b\",\"c\"")
88
+ validator = Csvlint::Validator.new(data)
89
+ expect(validator.valid?).to eql(true)
90
+ end
91
+
92
+ it "handles multiple line breaks within a cell" do
93
+ data = StringIO.new("\"a\",\"b\",\"c\"\r\n\"d\",\"this is\r\n valid\",\"as is this\r\n too\"")
94
+ validator = Csvlint::Validator.new(data)
95
+ expect(validator.valid?).to eql(true)
96
+ end
97
+ end
98
+
99
+ context "csv dialect" do
100
+ it "should provide sensible defaults for CSV parsing" do
101
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
102
+ opts = validator.instance_variable_get("@csv_options")
103
+ expect(opts).to include({
104
+ :col_sep => ",",
105
+ :row_sep => :auto,
106
+ :quote_char => '"',
107
+ :skip_blanks => false
108
+ })
109
+ end
110
+
111
+ it "should map CSV DDF to correct values" do
112
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
113
+ opts = validator.dialect_to_csv_options( {
114
+ "lineTerminator" => "\n",
115
+ "delimiter" => "\t",
116
+ "quoteChar" => "'"
117
+ })
118
+ expect(opts).to include({
119
+ :col_sep => "\t",
120
+ :row_sep => "\n",
121
+ :quote_char => "'",
122
+ :skip_blanks => false
123
+ })
124
+ end
125
+
126
+ it ".each() -> `validate` to pass input in streaming fashion" do
127
+ # warnings are built when validate is used to call all three methods
128
+ data = StringIO.new("\"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"3\",\"2\",\"1\"")
129
+ validator = Csvlint::Validator.new(data)
130
+
131
+ expect(validator.valid?).to eql(true)
132
+ expect(validator.instance_variable_get("@expected_columns")).to eql(3)
133
+ expect(validator.instance_variable_get("@col_counts").count).to eql(4)
134
+ expect(validator.data.size).to eql(4)
135
+ expect(validator.info_messages.count).to eql(1)
136
+ end
137
+
138
+ it ".each() -> `validate` parses malformed CSV, populates errors, warnings & info_msgs,invokes finish()" do
139
+ data = StringIO.new("\"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"two\",\"3\"\r\n\"3\",\"2\", \"1\"")
140
+
141
+ validator = Csvlint::Validator.new(data)
142
+
143
+ expect(validator.valid?).to eql(false)
144
+ expect(validator.instance_variable_get("@expected_columns")).to eql(3)
145
+ expect(validator.instance_variable_get("@col_counts").count).to eql(4)
146
+ expect(validator.data.size).to eql(5)
147
+ expect(validator.info_messages.count).to eql(1)
148
+ expect(validator.errors.count).to eql(1)
149
+ expect(validator.errors.first.type).to eql(:whitespace)
150
+ expect(validator.warnings.count).to eql(1)
151
+ expect(validator.warnings.first.type).to eql(:inconsistent_values)
152
+ end
153
+
154
+ it "File.open.each_line -> `validate` passes a valid csv" do
155
+ filename = 'valid_many_rows.csv'
156
+ file = File.join(File.expand_path(Dir.pwd), "features", "fixtures", filename)
157
+ validator = Csvlint::Validator.new(File.new(file))
158
+
159
+ expect(validator.valid?).to eql(true)
160
+ expect(validator.info_messages.size).to eql(1)
161
+ expect(validator.info_messages.first.type).to eql(:assumed_header)
162
+ expect(validator.info_messages.first.category).to eql(:structure)
163
+ end
164
+
165
+ end
166
+
167
+ context "with a single row" do
168
+
169
+ it "validates correctly" do
170
+ stream = "\"a\",\"b\",\"c\"\r\n"
171
+ validator = Csvlint::Validator.new(StringIO.new(stream), "header" => false)
172
+ expect(validator.valid?).to eql(true)
173
+ end
174
+
175
+ it "checks for non rfc line breaks" do
176
+ stream = "\"a\",\"b\",\"c\"\n"
177
+ validator = Csvlint::Validator.new(StringIO.new(stream), {"header" => false})
178
+ expect(validator.valid?).to eql(true)
179
+ expect(validator.info_messages.count).to eq(1)
180
+ expect(validator.info_messages.first.type).to eql(:nonrfc_line_breaks)
181
+ end
182
+
183
+ it "checks for blank rows" do
184
+ data = StringIO.new('"","",')
185
+ validator = Csvlint::Validator.new(data, "header" => false)
186
+
187
+ expect(validator.valid?).to eql(false)
188
+ expect(validator.errors.count).to eq(1)
189
+ expect(validator.errors.first.type).to eql(:blank_rows)
190
+ end
191
+
192
+ it "returns the content of the string with the error" do
193
+ stream = "\"\",\"\",\"\"\r\n"
194
+ validator = Csvlint::Validator.new(StringIO.new(stream), "header" => false)
195
+ expect(validator.errors.first.content).to eql("\"\",\"\",\"\"\r\n")
196
+ end
197
+
198
+ it "should presume a header unless told otherwise" do
199
+ stream = "1,2,3\r\n"
200
+ validator = Csvlint::Validator.new(StringIO.new(stream))
201
+
202
+ expect( validator.valid? ).to eql(true)
203
+ expect( validator.info_messages.size ).to eql(1)
204
+ expect( validator.info_messages.first.type).to eql(:assumed_header)
205
+ expect( validator.info_messages.first.category).to eql(:structure)
206
+ end
207
+
208
+ it "should evaluate the row as 'row 2' when stipulated" do
209
+ stream = "1,2,3\r\n"
210
+ validator = Csvlint::Validator.new(StringIO.new(stream), "header" => false)
211
+ validator.validate
212
+ expect(validator.valid?).to eql(true)
213
+ expect(validator.info_messages.size).to eql(0)
214
+ end
215
+
216
+ end
217
+
218
+ context "it returns the correct error from ERROR_MATCHES" do
219
+
220
+ it "checks for unclosed quotes" do
221
+ stream = "\"a,\"b\",\"c\"\n"
222
+ validator = Csvlint::Validator.new(StringIO.new(stream))
223
+ expect(validator.valid?).to eql(false)
224
+ expect(validator.errors.count).to eq(1)
225
+ expect(validator.errors.first.type).to eql(:unclosed_quote)
226
+ end
227
+
228
+
229
+ # TODO stray quotes is not covered in any spec in this library
230
+ # it "checks for stray quotes" do
231
+ # stream = "\"a\",“b“,\"c\"" "\r\n"
232
+ # validator = Csvlint::Validator.new(stream)
233
+ # validator.validate # implicitly invokes parse_contents(stream)
234
+ # expect(validator.valid?).to eql(false)
235
+ # expect(validator.errors.count).to eq(1)
236
+ # expect(validator.errors.first.type).to eql(:stray_quote)
237
+ # end
238
+
239
+ it "checks for whitespace" do
240
+ stream = " \"a\",\"b\",\"c\"\r\n"
241
+ validator = Csvlint::Validator.new(StringIO.new(stream))
242
+
243
+ expect(validator.valid?).to eql(false)
244
+ expect(validator.errors.count).to eq(1)
245
+ expect(validator.errors.first.type).to eql(:whitespace)
246
+ end
247
+
248
+ it "returns line break errors if incorrectly specified" do
249
+ # TODO the logic for catching this error message is very esoteric
250
+ stream = "\"a\",\"b\",\"c\"\n"
251
+ validator = Csvlint::Validator.new(StringIO.new(stream), {"lineTerminator" => "\r\n"})
252
+ expect(validator.valid?).to eql(false)
253
+ expect(validator.errors.count).to eq(1)
254
+ expect(validator.errors.first.type).to eql(:line_breaks)
255
+ end
256
+
257
+ end
258
+
259
+ context "when validating headers" do
260
+
261
+ it "should warn if column names aren't unique" do
262
+ data = StringIO.new( "minimum, minimum" )
263
+ validator = Csvlint::Validator.new(data)
264
+ validator.reset
265
+ expect( validator.validate_header(["minimum", "minimum"]) ).to eql(true)
266
+ expect( validator.warnings.size ).to eql(1)
267
+ expect( validator.warnings.first.type).to eql(:duplicate_column_name)
268
+ expect( validator.warnings.first.category).to eql(:schema)
269
+ end
270
+
271
+ it "should warn if column names are blank" do
272
+ data = StringIO.new( "minimum," )
273
+ validator = Csvlint::Validator.new(data)
274
+
275
+ expect( validator.validate_header(["minimum", ""]) ).to eql(true)
276
+ expect( validator.warnings.size ).to eql(1)
277
+ expect( validator.warnings.first.type).to eql(:empty_column_name)
278
+ expect( validator.warnings.first.category).to eql(:schema)
279
+ end
280
+
281
+ it "should include info message about missing header when we have assumed a header" do
282
+ data = StringIO.new( "1,2,3\r\n" )
283
+ validator = Csvlint::Validator.new(data)
284
+ expect( validator.valid? ).to eql(true)
285
+ expect( validator.info_messages.size ).to eql(1)
286
+ expect( validator.info_messages.first.type).to eql(:assumed_header)
287
+ expect( validator.info_messages.first.category).to eql(:structure)
288
+ end
289
+
290
+ it "should not include info message about missing header when we are told about the header" do
291
+ data = StringIO.new( "1,2,3\r\n" )
292
+ validator = Csvlint::Validator.new(data, "header" => false)
293
+ expect( validator.valid? ).to eql(true)
294
+ expect( validator.info_messages.size ).to eql(0)
295
+ end
296
+ end
297
+
298
+ context "build_formats" do
299
+
300
+ {
301
+ :string => "foo",
302
+ :numeric => "1",
303
+ :uri => "http://www.example.com",
304
+ :dateTime_iso8601 => "2013-01-01T13:00:00Z",
305
+ :date_db => "2013-01-01",
306
+ :dateTime_hms => "13:00:00"
307
+ }.each do |type, content|
308
+ it "should return the format of #{type} correctly" do
309
+ row = [content]
310
+
311
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
312
+ validator.build_formats(row)
313
+ formats = validator.instance_variable_get("@formats")
314
+
315
+ expect(formats[0].keys.first).to eql type
316
+ end
317
+ end
318
+
319
+ it "treats floats and ints the same" do
320
+ row = ["12", "3.1476"]
321
+
322
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
323
+ validator.build_formats(row)
324
+ formats = validator.instance_variable_get("@formats")
325
+
326
+ expect(formats[0].keys.first).to eql :numeric
327
+ expect(formats[1].keys.first).to eql :numeric
328
+ end
329
+
330
+ it "should ignore blank arrays" do
331
+ row = []
332
+
333
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
334
+ validator.build_formats(row)
335
+
336
+ formats = validator.instance_variable_get("@formats")
337
+ expect(formats).to eql []
338
+ end
339
+
340
+ it "should work correctly for single columns" do
341
+ rows = [
342
+ ["foo"],
343
+ ["bar"],
344
+ ["baz"]
345
+ ]
346
+
347
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
348
+
349
+ rows.each_with_index do |row, i|
350
+ validator.build_formats(row)
351
+ end
352
+
353
+ formats = validator.instance_variable_get("@formats")
354
+ expect(formats).to eql [{:string => 3}]
355
+ end
356
+
357
+ it "should return formats correctly if a row is blank" do
358
+ rows = [
359
+ [],
360
+ ["foo", "1", "$2345"]
361
+ ]
362
+
363
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
364
+
365
+ rows.each_with_index do |row, i|
366
+ validator.build_formats(row)
367
+ end
368
+
369
+ formats = validator.instance_variable_get("@formats")
370
+
371
+ expect(formats).to eql [
372
+ {:string => 1},
373
+ {:numeric => 1},
374
+ {:string => 1},
375
+ ]
376
+ end
377
+
378
+ end
379
+
380
+ context "csv dialect" do
381
+ it "should provide sensible defaults for CSV parsing" do
382
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
383
+ opts = validator.instance_variable_get("@csv_options")
384
+ expect(opts).to include({
385
+ :col_sep => ",",
386
+ :row_sep => :auto,
387
+ :quote_char => '"',
388
+ :skip_blanks => false
389
+ })
390
+ end
391
+
392
+ it "should map CSV DDF to correct values" do
393
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
394
+ opts = validator.dialect_to_csv_options({
395
+ "lineTerminator" => "\n",
396
+ "delimiter" => "\t",
397
+ "quoteChar" => "'"
398
+ })
399
+ expect(opts).to include({
400
+ :col_sep => "\t",
401
+ :row_sep => "\n",
402
+ :quote_char => "'",
403
+ :skip_blanks => false
404
+ })
405
+ end
406
+
407
+ end
408
+
409
+ context "check_consistency" do
410
+
411
+ it "should return a warning if columns have inconsistent values" do
412
+ formats = [
413
+ {:string => 3},
414
+ {:string => 2, :numeric => 1},
415
+ {:numeric => 3},
416
+ ]
417
+
418
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
419
+ validator.instance_variable_set("@formats", formats)
420
+ validator.check_consistency
421
+
422
+ warnings = validator.instance_variable_get("@warnings")
423
+ warnings.delete_if { |h| h.type != :inconsistent_values }
424
+
425
+ expect(warnings.count).to eql 1
426
+ end
427
+
428
+ end
429
+
430
+ #TODO the below tests are all the remaining tests from validator_spec.rb, annotations indicate their status HOWEVER these tests may be best refactored into client specs
431
+ context "when detecting headers" do
432
+ it "should default to expecting a header" do
433
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
434
+ expect( validator.header? ).to eql(true)
435
+ end
436
+
437
+ it "should look in CSV options to detect header" do
438
+ opts = {
439
+ "header" => true
440
+ }
441
+ validator = Csvlint::Validator.new("http://example.com/example.csv", opts)
442
+ expect( validator.header? ).to eql(true)
443
+ opts = {
444
+ "header" => false
445
+ }
446
+ validator = Csvlint::Validator.new("http://example.com/example.csv", opts)
447
+ expect( validator.header? ).to eql(false)
448
+ end
449
+
450
+ it "should look in content-type for header=absent" do
451
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv; header=absent"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
452
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
453
+ expect( validator.header? ).to eql(false)
454
+ expect( validator.errors.size ).to eql(0)
455
+ end
456
+
457
+ it "should look in content-type for header=present" do
458
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv; header=present"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
459
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
460
+ expect( validator.header? ).to eql(true)
461
+ expect( validator.errors.size ).to eql(0)
462
+ end
463
+
464
+ it "assume header present if not specified in content type" do
465
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
466
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
467
+ expect( validator.header? ).to eql(true)
468
+ expect( validator.errors.size ).to eql(0)
469
+ expect( validator.info_messages.size ).to eql(1)
470
+ expect( validator.info_messages.first.type).to eql(:assumed_header)
471
+ end
472
+
473
+ it "give wrong content type error if content type is wrong" do
474
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/html"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
475
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
476
+ expect( validator.header? ).to eql(true)
477
+ expect( validator.errors.size ).to eql(1)
478
+ expect( validator.errors[0].type).to eql(:wrong_content_type)
479
+ end
480
+
481
+ end
482
+
483
+ context "when validating headers" do
484
+ it "should warn if column names aren't unique" do
485
+ data = StringIO.new( "minimum, minimum" )
486
+ validator = Csvlint::Validator.new(data)
487
+ expect( validator.warnings.size ).to eql(1)
488
+ expect( validator.warnings.first.type).to eql(:duplicate_column_name)
489
+ expect( validator.warnings.first.category).to eql(:schema)
490
+ end
491
+
492
+ it "should warn if column names are blank" do
493
+ data = StringIO.new( "minimum," )
494
+ validator = Csvlint::Validator.new(data)
495
+
496
+ expect( validator.validate_header(["minimum", ""]) ).to eql(true)
497
+ expect( validator.warnings.size ).to eql(1)
498
+ expect( validator.warnings.first.type).to eql(:empty_column_name)
499
+ expect( validator.warnings.first.category).to eql(:schema)
500
+ end
501
+
502
+ it "should include info message about missing header when we have assumed a header" do
503
+ data = StringIO.new( "1,2,3\r\n" )
504
+ validator = Csvlint::Validator.new(data)
505
+
506
+ expect( validator.valid? ).to eql(true)
507
+ expect( validator.info_messages.size ).to eql(1)
508
+ expect( validator.info_messages.first.type).to eql(:assumed_header)
509
+ expect( validator.info_messages.first.category).to eql(:structure)
510
+ end
511
+
512
+ it "should not include info message about missing header when we are told about the header" do
513
+ data = StringIO.new( "1,2,3\r\n" )
514
+ validator = Csvlint::Validator.new(data, "header"=>false)
515
+ expect( validator.valid? ).to eql(true)
516
+ expect( validator.info_messages.size ).to eql(0)
517
+ end
518
+
519
+ it "should not be an error if we have assumed a header, there is no dialect and content-type doesn't declare header, as we assume header=present" do
520
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
521
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
522
+ expect( validator.valid? ).to eql(true)
523
+ end
524
+
525
+ it "should be valid if we have a dialect and the data is from the web" do
526
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
527
+ #header defaults to true in csv dialect, so this is valid
528
+ validator = Csvlint::Validator.new("http://example.com/example.csv", {})
529
+ expect( validator.valid? ).to eql(true)
530
+
531
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
532
+ validator = Csvlint::Validator.new("http://example.com/example.csv", {"header"=>true})
533
+ expect( validator.valid? ).to eql(true)
534
+
535
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
536
+ validator = Csvlint::Validator.new("http://example.com/example.csv", {"header"=>false})
537
+ expect( validator.valid? ).to eql(true)
538
+ end
539
+
540
+ end
541
+
542
+ context "accessing metadata" do
543
+
544
+ before :all do
545
+ stub_request(:get, "http://example.com/crlf.csv").to_return(:status => 200, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','windows-line-endings.csv')))
546
+ stub_request(:get, "http://example.com/crlf.csv-metadata.json").to_return(:status => 404)
547
+ end
548
+
549
+ it "can get line break symbol" do
550
+ validator = Csvlint::Validator.new("http://example.com/crlf.csv")
551
+ expect(validator.line_breaks).to eql "\r\n"
552
+ end
553
+
554
+ end
555
+
556
+ it "should give access to the complete CSV data file" do
557
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200,
558
+ :headers=>{"Content-Type" => "text/csv; header=present"},
559
+ :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
560
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
561
+ expect( validator.valid? ).to eql(true)
562
+ data = validator.data
563
+
564
+ expect( data.count ).to eql 3
565
+ expect( data[0] ).to eql ['Foo','Bar','Baz']
566
+ expect( data[2] ).to eql ['3','2','1']
567
+ end
568
+
569
+ it "should count the total number of rows read" do
570
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200,
571
+ :headers=>{"Content-Type" => "text/csv; header=present"},
572
+ :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
573
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
574
+ expect(validator.row_count).to eq(3)
575
+ end
576
+
577
+ it "should limit number of lines read" do
578
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200,
579
+ :headers=>{"Content-Type" => "text/csv; header=present"},
580
+ :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
581
+ validator = Csvlint::Validator.new("http://example.com/example.csv", {}, nil, limit_lines: 2)
582
+ expect( validator.valid? ).to eql(true)
583
+ data = validator.data
584
+ expect( data.count ).to eql 2
585
+ expect( data[0] ).to eql ['Foo','Bar','Baz']
586
+ end
587
+
588
+ context "with a lambda" do
589
+
590
+ it "should call a lambda for each line" do
591
+ @count = 0
592
+ mylambda = lambda { |row| @count = @count + 1 }
593
+ validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')), {}, nil, { lambda: mylambda })
594
+ expect(@count).to eq(3)
595
+ end
596
+
597
+ it "reports back the status of each line" do
598
+ @results = []
599
+ mylambda = lambda { |row| @results << row.current_line }
600
+ validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')), {}, nil, { lambda: mylambda })
601
+ expect(@results.count).to eq(3)
602
+ expect(@results[0]).to eq(1)
603
+ expect(@results[1]).to eq(2)
604
+ expect(@results[2]).to eq(3)
605
+ end
606
+
607
+ end
608
+
609
+ # Commented out because there is currently no way to mock redirects with Typhoeus and WebMock - see https://github.com/bblimke/webmock/issues/237
610
+ # it "should follow redirects to SSL" do
611
+ # stub_request(:get, "http://example.com/redirect").to_return(:status => 301, :headers=>{"Location" => "https://example.com/example.csv"})
612
+ # stub_request(:get, "https://example.com/example.csv").to_return(:status => 200,
613
+ # :headers=>{"Content-Type" => "text/csv; header=present"},
614
+ # :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
615
+ #
616
+ # validator = Csvlint::Validator.new("http://example.com/redirect")
617
+ # expect( validator.valid? ).to eql(true)
618
+ # end
619
+ end