wjordan213-csvlint 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +1 -0
  3. data/.gitattributes +2 -0
  4. data/.gitignore +28 -0
  5. data/.ruby-version +1 -0
  6. data/.travis.yml +32 -0
  7. data/CHANGELOG.md +361 -0
  8. data/Gemfile +7 -0
  9. data/LICENSE.md +22 -0
  10. data/README.md +328 -0
  11. data/Rakefile +17 -0
  12. data/bin/create_schema +32 -0
  13. data/bin/csvlint +10 -0
  14. data/features/check_format.feature +46 -0
  15. data/features/cli.feature +210 -0
  16. data/features/csv_options.feature +35 -0
  17. data/features/csvupload.feature +145 -0
  18. data/features/csvw_schema_validation.feature +127 -0
  19. data/features/fixtures/cr-line-endings.csv +0 -0
  20. data/features/fixtures/crlf-line-endings.csv +0 -0
  21. data/features/fixtures/inconsistent-line-endings-unquoted.csv +0 -0
  22. data/features/fixtures/inconsistent-line-endings.csv +0 -0
  23. data/features/fixtures/invalid-byte-sequence.csv +0 -0
  24. data/features/fixtures/invalid_many_rows.csv +0 -0
  25. data/features/fixtures/lf-line-endings.csv +0 -0
  26. data/features/fixtures/spreadsheet.xls +0 -0
  27. data/features/fixtures/spreadsheet.xlsx +0 -0
  28. data/features/fixtures/title-row.csv +0 -0
  29. data/features/fixtures/valid.csv +0 -0
  30. data/features/fixtures/valid_many_rows.csv +0 -0
  31. data/features/fixtures/windows-line-endings.csv +0 -0
  32. data/features/information.feature +22 -0
  33. data/features/parse_csv.feature +90 -0
  34. data/features/schema_validation.feature +105 -0
  35. data/features/sources.feature +17 -0
  36. data/features/step_definitions/cli_steps.rb +11 -0
  37. data/features/step_definitions/csv_options_steps.rb +24 -0
  38. data/features/step_definitions/information_steps.rb +13 -0
  39. data/features/step_definitions/parse_csv_steps.rb +42 -0
  40. data/features/step_definitions/schema_validation_steps.rb +33 -0
  41. data/features/step_definitions/sources_steps.rb +7 -0
  42. data/features/step_definitions/validation_errors_steps.rb +90 -0
  43. data/features/step_definitions/validation_info_steps.rb +22 -0
  44. data/features/step_definitions/validation_warnings_steps.rb +60 -0
  45. data/features/support/aruba.rb +56 -0
  46. data/features/support/env.rb +26 -0
  47. data/features/support/load_tests.rb +114 -0
  48. data/features/support/webmock.rb +1 -0
  49. data/features/validation_errors.feature +147 -0
  50. data/features/validation_info.feature +16 -0
  51. data/features/validation_warnings.feature +86 -0
  52. data/lib/csvlint.rb +27 -0
  53. data/lib/csvlint/cli.rb +165 -0
  54. data/lib/csvlint/csvw/column.rb +359 -0
  55. data/lib/csvlint/csvw/date_format.rb +182 -0
  56. data/lib/csvlint/csvw/metadata_error.rb +13 -0
  57. data/lib/csvlint/csvw/number_format.rb +211 -0
  58. data/lib/csvlint/csvw/property_checker.rb +761 -0
  59. data/lib/csvlint/csvw/table.rb +204 -0
  60. data/lib/csvlint/csvw/table_group.rb +165 -0
  61. data/lib/csvlint/error_collector.rb +27 -0
  62. data/lib/csvlint/error_message.rb +15 -0
  63. data/lib/csvlint/field.rb +196 -0
  64. data/lib/csvlint/schema.rb +92 -0
  65. data/lib/csvlint/validate.rb +599 -0
  66. data/lib/csvlint/version.rb +3 -0
  67. data/spec/csvw/column_spec.rb +112 -0
  68. data/spec/csvw/date_format_spec.rb +49 -0
  69. data/spec/csvw/number_format_spec.rb +417 -0
  70. data/spec/csvw/table_group_spec.rb +143 -0
  71. data/spec/csvw/table_spec.rb +90 -0
  72. data/spec/field_spec.rb +252 -0
  73. data/spec/schema_spec.rb +211 -0
  74. data/spec/spec_helper.rb +17 -0
  75. data/spec/validator_spec.rb +619 -0
  76. data/wjordan213_csvlint.gemspec +46 -0
  77. metadata +490 -0
@@ -0,0 +1,211 @@
1
+ require 'spec_helper'
2
+
3
+ describe Csvlint::Schema do
4
+
5
+ it "should tolerate missing fields" do
6
+ schema = Csvlint::Schema.from_json_table("http://example.org", {})
7
+ expect( schema ).to_not be(nil)
8
+ expect( schema.fields.empty? ).to eql(true)
9
+ end
10
+
11
+ it "should tolerate fields with no constraints" do
12
+ schema = Csvlint::Schema.from_json_table("http://example.org", {
13
+ "fields" => [ { "name" => "test" } ]
14
+ })
15
+ expect( schema ).to_not be(nil)
16
+ expect( schema.fields[0].name ).to eql("test")
17
+ expect( schema.fields[0].constraints ).to eql({})
18
+ end
19
+
20
+ it "should validate against the schema" do
21
+ field = Csvlint::Field.new("test", { "required" => true } )
22
+ field2 = Csvlint::Field.new("test", { "minLength" => 3 } )
23
+ schema = Csvlint::Schema.new("http://example.org", [field, field2] )
24
+
25
+ expect( schema.validate_row( ["", "x"] ) ).to eql(false)
26
+ expect( schema.errors.size ).to eql(2)
27
+ expect( schema.errors.first.type).to eql(:missing_value)
28
+ expect( schema.errors.first.category).to eql(:schema)
29
+ expect( schema.errors.first.column).to eql(1)
30
+ expect( schema.validate_row( ["abc", "1234"] ) ).to eql(true)
31
+
32
+ end
33
+
34
+ it "should include validations for missing columns" do
35
+ minimum = Csvlint::Field.new("test", { "minLength" => 3 } )
36
+ required = Csvlint::Field.new("test2", { "required" => true } )
37
+ schema = Csvlint::Schema.new("http://example.org", [minimum, required] )
38
+
39
+ expect( schema.validate_row( ["abc", "x"] ) ).to eql(true)
40
+
41
+ expect( schema.validate_row( ["abc"] ) ).to eql(false)
42
+ expect( schema.errors.size ).to eql(1)
43
+ expect( schema.errors.first.type).to eql(:missing_value)
44
+
45
+ schema = Csvlint::Schema.new("http://example.org", [required, minimum] )
46
+ expect( schema.validate_row( ["abc"] ) ).to eql(false)
47
+ expect( schema.errors.size ).to eql(1)
48
+ expect( schema.errors.first.type).to eql(:min_length)
49
+ end
50
+
51
+ it "should warn if the data has fewer columns" do
52
+ minimum = Csvlint::Field.new("test", { "minLength" => 3 } )
53
+ required = Csvlint::Field.new("test2", { "maxLength" => 5 } )
54
+ schema = Csvlint::Schema.new("http://example.org", [minimum, required] )
55
+
56
+ expect( schema.validate_row( ["abc"], 1 ) ).to eql(true)
57
+ expect( schema.warnings.size ).to eql(1)
58
+ expect( schema.warnings.first.type).to eql(:missing_column)
59
+ expect( schema.warnings.first.category).to eql(:schema)
60
+ expect( schema.warnings.first.row).to eql(1)
61
+ expect( schema.warnings.first.column).to eql(2)
62
+
63
+ #no ragged row error
64
+ expect( schema.errors.size ).to eql(0)
65
+ end
66
+
67
+ it "should warn if the data has additional columns" do
68
+ minimum = Csvlint::Field.new("test", { "minLength" => 3 } )
69
+ required = Csvlint::Field.new("test2", { "required" => true } )
70
+ schema = Csvlint::Schema.new("http://example.org", [minimum, required] )
71
+
72
+ expect( schema.validate_row( ["abc", "x", "more", "columns"], 1 ) ).to eql(true)
73
+ expect( schema.warnings.size ).to eql(2)
74
+ expect( schema.warnings.first.type).to eql(:extra_column)
75
+ expect( schema.warnings.first.category).to eql(:schema)
76
+ expect( schema.warnings.first.row).to eql(1)
77
+ expect( schema.warnings.first.column).to eql(3)
78
+
79
+ expect( schema.warnings[1].type).to eql(:extra_column)
80
+ expect( schema.warnings[1].column).to eql(4)
81
+
82
+ #no ragged row error
83
+ expect( schema.errors.size ).to eql(0)
84
+ end
85
+
86
+ context "when validating header" do
87
+ it "should warn if column names are different to field names" do
88
+ minimum = Csvlint::Field.new("minimum", { "minLength" => 3 } )
89
+ required = Csvlint::Field.new("required", { "required" => true } )
90
+ schema = Csvlint::Schema.new("http://example.org", [minimum, required] )
91
+
92
+ expect( schema.validate_header(["minimum", "required"]) ).to eql(true)
93
+ expect( schema.warnings.size ).to eql(0)
94
+
95
+ expect( schema.validate_header(["wrong", "required"]) ).to eql(true)
96
+ expect( schema.warnings.size ).to eql(1)
97
+ expect( schema.warnings.first.row ).to eql(1)
98
+ expect( schema.warnings.first.type ).to eql(:malformed_header)
99
+ expect( schema.warnings.first.content ).to eql('wrong,required')
100
+ expect( schema.warnings.first.column ).to eql(nil)
101
+ expect( schema.warnings.first.category ).to eql(:schema)
102
+ expect schema.warnings.first.constraints.has_value?('minimum,required')
103
+ # expect( schema.warnings.first.constraints.values ).to eql(['minimum,required'])
104
+ expect( schema.validate_header(["minimum", "Required"]) ).to eql(true)
105
+ expect( schema.warnings.size ).to eql(1)
106
+
107
+ end
108
+
109
+ it "should warn if column count is less than field count" do
110
+ minimum = Csvlint::Field.new("minimum", { "minLength" => 3 } )
111
+ required = Csvlint::Field.new("required", { "required" => true } )
112
+ schema = Csvlint::Schema.new("http://example.org", [minimum, required] )
113
+
114
+ expect( schema.validate_header(["minimum"]) ).to eql(true)
115
+ expect( schema.warnings.size ).to eql(1)
116
+ expect( schema.warnings.first.row ).to eql(1)
117
+ expect( schema.warnings.first.type ).to eql(:malformed_header)
118
+ expect( schema.warnings.first.content ).to eql("minimum")
119
+ expect( schema.warnings.first.column ).to eql(nil)
120
+ expect( schema.warnings.first.category ).to eql(:schema)
121
+ expect schema.warnings.first.constraints.has_value?('minimum,required')
122
+ # expect( schema.warnings.first.constraints.values ).to eql(['minimum,required'])
123
+
124
+ end
125
+
126
+ it "should warn if column count is more than field count" do
127
+ minimum = Csvlint::Field.new("minimum", { "minLength" => 3 } )
128
+ schema = Csvlint::Schema.new("http://example.org", [minimum] )
129
+
130
+ expect( schema.validate_header(["wrong", "required"]) ).to eql(true)
131
+ expect( schema.warnings.size ).to eql(1)
132
+ expect( schema.warnings.first.row ).to eql(1)
133
+ expect( schema.warnings.first.type ).to eql(:malformed_header)
134
+ expect( schema.warnings.first.content ).to eql("wrong,required")
135
+ expect( schema.warnings.first.column ).to eql(nil)
136
+ expect( schema.warnings.first.category ).to eql(:schema)
137
+ # expect( schema.warnings.first.constraints.values ).to eql('minimum')
138
+ expect( schema.warnings.first.constraints.has_value?('minimum'))
139
+
140
+ end
141
+
142
+ end
143
+
144
+ context "when parsing JSON Tables" do
145
+
146
+ before(:each) do
147
+ @example=<<-EOL
148
+ {
149
+ "title": "Schema title",
150
+ "description": "schema",
151
+ "fields": [
152
+ { "name": "ID", "constraints": { "required": true }, "title": "id", "description": "house identifier" },
153
+ { "name": "Price", "constraints": { "required": true, "minLength": 1 } },
154
+ { "name": "Postcode", "constraints": { "required": true, "pattern": "[A-Z]{1,2}[0-9][0-9A-Z]? ?[0-9][A-Z]{2}" } }
155
+ ]
156
+ }
157
+ EOL
158
+ stub_request(:get, "http://example.com/example.json").to_return(:status => 200, :body => @example)
159
+ end
160
+
161
+ it "should create a schema from a pre-parsed JSON table" do
162
+ json = JSON.parse( @example )
163
+ schema = Csvlint::Schema.from_json_table("http://example.org", json)
164
+
165
+ expect( schema.uri ).to eql("http://example.org")
166
+ expect( schema.title ).to eql("Schema title")
167
+ expect( schema.description ).to eql("schema")
168
+ expect( schema.fields.length ).to eql(3)
169
+ expect( schema.fields[0].name ).to eql("ID")
170
+ expect( schema.fields[0].constraints["required"] ).to eql(true)
171
+ expect( schema.fields[0].title ).to eql("id")
172
+ expect( schema.fields[0].description ).to eql("house identifier")
173
+ expect( schema.fields[2].constraints["pattern"]).to eql("[A-Z]{1,2}[0-9][0-9A-Z]? ?[0-9][A-Z]{2}")
174
+ end
175
+
176
+ it "should create a schema from a JSON Table URL" do
177
+ schema = Csvlint::Schema.load_from_json("http://example.com/example.json")
178
+ expect( schema.uri ).to eql("http://example.com/example.json")
179
+ expect( schema.fields.length ).to eql(3)
180
+ expect( schema.fields[0].name ).to eql("ID")
181
+ expect( schema.fields[0].constraints["required"] ).to eql(true)
182
+
183
+ end
184
+ end
185
+
186
+ context "when parsing CSVW metadata" do
187
+
188
+ before(:each) do
189
+ @example=<<-EOL
190
+ {
191
+ "@context": "http://www.w3.org/ns/csvw",
192
+ "url": "http://example.com/example1.csv",
193
+ "tableSchema": {
194
+ "columns": [
195
+ { "name": "Name", "required": true, "datatype": { "base": "string", "format": ".+" } },
196
+ { "name": "Id", "required": true, "datatype": { "base": "string", "minLength": 3 } },
197
+ { "name": "Email", "required": true }
198
+ ]
199
+ }
200
+ }
201
+ EOL
202
+ stub_request(:get, "http://example.com/metadata.json").to_return(:status => 200, :body => @example)
203
+ end
204
+
205
+ it "should create a table group from a CSVW metadata URL" do
206
+ schema = Csvlint::Schema.load_from_json("http://example.com/metadata.json")
207
+ expect( schema.class ).to eq(Csvlint::Csvw::TableGroup)
208
+ end
209
+ end
210
+
211
+ end
@@ -0,0 +1,17 @@
1
+ require 'coveralls'
2
+ Coveralls.wear_merged!('test_frameworks')
3
+
4
+ require 'csvlint'
5
+ require 'pry'
6
+ require 'webmock/rspec'
7
+
8
+ RSpec.configure do |config|
9
+ config.run_all_when_everything_filtered = true
10
+ config.filter_run :focus
11
+
12
+ # Run specs in random order to surface order dependencies. If you find an
13
+ # order dependency and want to debug it, you can fix the order by providing
14
+ # the seed, which is printed after each run.
15
+ # --seed 1234
16
+ config.order = 'random'
17
+ end
@@ -0,0 +1,619 @@
1
+ require 'spec_helper'
2
+
3
+ describe Csvlint::Validator do
4
+
5
+ before do
6
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :body => "")
7
+ stub_request(:get, "http://example.com/.well-known/csvm").to_return(:status => 404)
8
+ stub_request(:get, "http://example.com/example.csv-metadata.json").to_return(:status => 404)
9
+ stub_request(:get, "http://example.com/csv-metadata.json").to_return(:status => 404)
10
+ end
11
+
12
+ it "should validate from a URL" do
13
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
14
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
15
+
16
+ expect(validator.valid?).to eql(true)
17
+ expect(validator.instance_variable_get("@expected_columns")).to eql(3)
18
+ expect(validator.instance_variable_get("@col_counts").count).to eql(3)
19
+ expect(validator.data.size).to eql(3)
20
+ end
21
+
22
+ it "should validate from a file path" do
23
+ validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
24
+
25
+ expect(validator.valid?).to eql(true)
26
+ expect(validator.instance_variable_get("@expected_columns")).to eql(3)
27
+ expect(validator.instance_variable_get("@col_counts").count).to eql(3)
28
+ expect(validator.data.size).to eql(3)
29
+ end
30
+
31
+ context "multi line CSV validation with included schema" do
32
+
33
+ end
34
+
35
+ context "single line row validation with included schema" do
36
+
37
+ end
38
+
39
+ context "validation with multiple lines: " do
40
+
41
+ # TODO multiple lines permits testing of warnings
42
+ # TODO need more assertions in each test IE @formats
43
+ # TODO the phrasing of col_counts if only consulting specs might be confusing
44
+ # TODO ^-> col_counts and data.size should be equivalent, but only data is populated outside of if row.nil?
45
+ # TODO ^- -> and its less the size of col_counts than the homogeneity of its contents which is important
46
+
47
+ it ".each() -> parse_contents method validates a well formed CSV" do
48
+ # when invoking parse contents
49
+ data = StringIO.new("\"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"3\",\"2\",\"1\"")
50
+
51
+ validator = Csvlint::Validator.new(data)
52
+
53
+ expect(validator.valid?).to eql(true)
54
+ # TODO would be beneficial to know how formats functions WRT to headers - check_format.feature:17 returns 3 rows total
55
+ # TODO in its formats object but is provided with 5 rows (with one nil row) [uses validation_warnings_steps.rb]
56
+ expect(validator.instance_variable_get("@expected_columns")).to eql(3)
57
+ expect(validator.instance_variable_get("@col_counts").count).to eql(4)
58
+ expect(validator.data.size).to eql(4)
59
+
60
+ end
61
+
62
+ it ".each() -> `parse_contents` parses malformed CSV and catches unclosed quote" do
63
+ # doesn't build warnings because check_consistency isn't invoked
64
+ # TODO below is trailing whitespace but is interpreted as an unclosed quote
65
+ data = StringIO.new("\"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"3\",\"2\",\"1\" ")
66
+
67
+ validator = Csvlint::Validator.new(data)
68
+
69
+ expect(validator.valid?).to eql(false)
70
+ expect(validator.errors.first.type).to eql(:unclosed_quote)
71
+ expect(validator.errors.count).to eql(1)
72
+ end
73
+
74
+ it ".each() -> `parse_contents` parses malformed CSV and catches whitespace and edge case" do
75
+ # when this data gets passed the header it rescues a whitespace error, resulting in the header row being discarded
76
+ # TODO - check if this is an edge case, currently passing because it requires advice on how to specify
77
+ data = StringIO.new(" \"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"Foo\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"3\",\"2\",\"1\" ")
78
+
79
+ validator = Csvlint::Validator.new(data)
80
+
81
+ expect(validator.valid?).to eql(false)
82
+ expect(validator.errors.first.type).to eql(:whitespace)
83
+ expect(validator.errors.count).to eql(2)
84
+ end
85
+
86
+ it "handles line breaks within a cell" do
87
+ data = StringIO.new("\"a\",\"b\",\"c\"\r\n\"d\",\"e\",\"this is\r\nvalid\"\r\n\"a\",\"b\",\"c\"")
88
+ validator = Csvlint::Validator.new(data)
89
+ expect(validator.valid?).to eql(true)
90
+ end
91
+
92
+ it "handles multiple line breaks within a cell" do
93
+ data = StringIO.new("\"a\",\"b\",\"c\"\r\n\"d\",\"this is\r\n valid\",\"as is this\r\n too\"")
94
+ validator = Csvlint::Validator.new(data)
95
+ expect(validator.valid?).to eql(true)
96
+ end
97
+ end
98
+
99
+ context "csv dialect" do
100
+ it "should provide sensible defaults for CSV parsing" do
101
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
102
+ opts = validator.instance_variable_get("@csv_options")
103
+ expect(opts).to include({
104
+ :col_sep => ",",
105
+ :row_sep => :auto,
106
+ :quote_char => '"',
107
+ :skip_blanks => false
108
+ })
109
+ end
110
+
111
+ it "should map CSV DDF to correct values" do
112
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
113
+ opts = validator.dialect_to_csv_options( {
114
+ "lineTerminator" => "\n",
115
+ "delimiter" => "\t",
116
+ "quoteChar" => "'"
117
+ })
118
+ expect(opts).to include({
119
+ :col_sep => "\t",
120
+ :row_sep => "\n",
121
+ :quote_char => "'",
122
+ :skip_blanks => false
123
+ })
124
+ end
125
+
126
+ it ".each() -> `validate` to pass input in streaming fashion" do
127
+ # warnings are built when validate is used to call all three methods
128
+ data = StringIO.new("\"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"3\",\"2\",\"1\"")
129
+ validator = Csvlint::Validator.new(data)
130
+
131
+ expect(validator.valid?).to eql(true)
132
+ expect(validator.instance_variable_get("@expected_columns")).to eql(3)
133
+ expect(validator.instance_variable_get("@col_counts").count).to eql(4)
134
+ expect(validator.data.size).to eql(4)
135
+ expect(validator.info_messages.count).to eql(1)
136
+ end
137
+
138
+ it ".each() -> `validate` parses malformed CSV, populates errors, warnings & info_msgs,invokes finish()" do
139
+ data = StringIO.new("\"Foo\",\"Bar\",\"Baz\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"2\",\"3\"\r\n\"1\",\"two\",\"3\"\r\n\"3\",\"2\", \"1\"")
140
+
141
+ validator = Csvlint::Validator.new(data)
142
+
143
+ expect(validator.valid?).to eql(false)
144
+ expect(validator.instance_variable_get("@expected_columns")).to eql(3)
145
+ expect(validator.instance_variable_get("@col_counts").count).to eql(4)
146
+ expect(validator.data.size).to eql(5)
147
+ expect(validator.info_messages.count).to eql(1)
148
+ expect(validator.errors.count).to eql(1)
149
+ expect(validator.errors.first.type).to eql(:whitespace)
150
+ expect(validator.warnings.count).to eql(1)
151
+ expect(validator.warnings.first.type).to eql(:inconsistent_values)
152
+ end
153
+
154
+ it "File.open.each_line -> `validate` passes a valid csv" do
155
+ filename = 'valid_many_rows.csv'
156
+ file = File.join(File.expand_path(Dir.pwd), "features", "fixtures", filename)
157
+ validator = Csvlint::Validator.new(File.new(file))
158
+
159
+ expect(validator.valid?).to eql(true)
160
+ expect(validator.info_messages.size).to eql(1)
161
+ expect(validator.info_messages.first.type).to eql(:assumed_header)
162
+ expect(validator.info_messages.first.category).to eql(:structure)
163
+ end
164
+
165
+ end
166
+
167
+ context "with a single row" do
168
+
169
+ it "validates correctly" do
170
+ stream = "\"a\",\"b\",\"c\"\r\n"
171
+ validator = Csvlint::Validator.new(StringIO.new(stream), "header" => false)
172
+ expect(validator.valid?).to eql(true)
173
+ end
174
+
175
+ it "checks for non rfc line breaks" do
176
+ stream = "\"a\",\"b\",\"c\"\n"
177
+ validator = Csvlint::Validator.new(StringIO.new(stream), {"header" => false})
178
+ expect(validator.valid?).to eql(true)
179
+ expect(validator.info_messages.count).to eq(1)
180
+ expect(validator.info_messages.first.type).to eql(:nonrfc_line_breaks)
181
+ end
182
+
183
+ it "checks for blank rows" do
184
+ data = StringIO.new('"","",')
185
+ validator = Csvlint::Validator.new(data, "header" => false)
186
+
187
+ expect(validator.valid?).to eql(false)
188
+ expect(validator.errors.count).to eq(1)
189
+ expect(validator.errors.first.type).to eql(:blank_rows)
190
+ end
191
+
192
+ it "returns the content of the string with the error" do
193
+ stream = "\"\",\"\",\"\"\r\n"
194
+ validator = Csvlint::Validator.new(StringIO.new(stream), "header" => false)
195
+ expect(validator.errors.first.content).to eql("\"\",\"\",\"\"\r\n")
196
+ end
197
+
198
+ it "should presume a header unless told otherwise" do
199
+ stream = "1,2,3\r\n"
200
+ validator = Csvlint::Validator.new(StringIO.new(stream))
201
+
202
+ expect( validator.valid? ).to eql(true)
203
+ expect( validator.info_messages.size ).to eql(1)
204
+ expect( validator.info_messages.first.type).to eql(:assumed_header)
205
+ expect( validator.info_messages.first.category).to eql(:structure)
206
+ end
207
+
208
+ it "should evaluate the row as 'row 2' when stipulated" do
209
+ stream = "1,2,3\r\n"
210
+ validator = Csvlint::Validator.new(StringIO.new(stream), "header" => false)
211
+ validator.validate
212
+ expect(validator.valid?).to eql(true)
213
+ expect(validator.info_messages.size).to eql(0)
214
+ end
215
+
216
+ end
217
+
218
+ context "it returns the correct error from ERROR_MATCHES" do
219
+
220
+ it "checks for unclosed quotes" do
221
+ stream = "\"a,\"b\",\"c\"\n"
222
+ validator = Csvlint::Validator.new(StringIO.new(stream))
223
+ expect(validator.valid?).to eql(false)
224
+ expect(validator.errors.count).to eq(1)
225
+ expect(validator.errors.first.type).to eql(:unclosed_quote)
226
+ end
227
+
228
+
229
+ # TODO stray quotes is not covered in any spec in this library
230
+ # it "checks for stray quotes" do
231
+ # stream = "\"a\",“b“,\"c\"" "\r\n"
232
+ # validator = Csvlint::Validator.new(stream)
233
+ # validator.validate # implicitly invokes parse_contents(stream)
234
+ # expect(validator.valid?).to eql(false)
235
+ # expect(validator.errors.count).to eq(1)
236
+ # expect(validator.errors.first.type).to eql(:stray_quote)
237
+ # end
238
+
239
+ it "checks for whitespace" do
240
+ stream = " \"a\",\"b\",\"c\"\r\n"
241
+ validator = Csvlint::Validator.new(StringIO.new(stream))
242
+
243
+ expect(validator.valid?).to eql(false)
244
+ expect(validator.errors.count).to eq(1)
245
+ expect(validator.errors.first.type).to eql(:whitespace)
246
+ end
247
+
248
+ it "returns line break errors if incorrectly specified" do
249
+ # TODO the logic for catching this error message is very esoteric
250
+ stream = "\"a\",\"b\",\"c\"\n"
251
+ validator = Csvlint::Validator.new(StringIO.new(stream), {"lineTerminator" => "\r\n"})
252
+ expect(validator.valid?).to eql(false)
253
+ expect(validator.errors.count).to eq(1)
254
+ expect(validator.errors.first.type).to eql(:line_breaks)
255
+ end
256
+
257
+ end
258
+
259
+ context "when validating headers" do
260
+
261
+ it "should warn if column names aren't unique" do
262
+ data = StringIO.new( "minimum, minimum" )
263
+ validator = Csvlint::Validator.new(data)
264
+ validator.reset
265
+ expect( validator.validate_header(["minimum", "minimum"]) ).to eql(true)
266
+ expect( validator.warnings.size ).to eql(1)
267
+ expect( validator.warnings.first.type).to eql(:duplicate_column_name)
268
+ expect( validator.warnings.first.category).to eql(:schema)
269
+ end
270
+
271
+ it "should warn if column names are blank" do
272
+ data = StringIO.new( "minimum," )
273
+ validator = Csvlint::Validator.new(data)
274
+
275
+ expect( validator.validate_header(["minimum", ""]) ).to eql(true)
276
+ expect( validator.warnings.size ).to eql(1)
277
+ expect( validator.warnings.first.type).to eql(:empty_column_name)
278
+ expect( validator.warnings.first.category).to eql(:schema)
279
+ end
280
+
281
+ it "should include info message about missing header when we have assumed a header" do
282
+ data = StringIO.new( "1,2,3\r\n" )
283
+ validator = Csvlint::Validator.new(data)
284
+ expect( validator.valid? ).to eql(true)
285
+ expect( validator.info_messages.size ).to eql(1)
286
+ expect( validator.info_messages.first.type).to eql(:assumed_header)
287
+ expect( validator.info_messages.first.category).to eql(:structure)
288
+ end
289
+
290
+ it "should not include info message about missing header when we are told about the header" do
291
+ data = StringIO.new( "1,2,3\r\n" )
292
+ validator = Csvlint::Validator.new(data, "header" => false)
293
+ expect( validator.valid? ).to eql(true)
294
+ expect( validator.info_messages.size ).to eql(0)
295
+ end
296
+ end
297
+
298
+ context "build_formats" do
299
+
300
+ {
301
+ :string => "foo",
302
+ :numeric => "1",
303
+ :uri => "http://www.example.com",
304
+ :dateTime_iso8601 => "2013-01-01T13:00:00Z",
305
+ :date_db => "2013-01-01",
306
+ :dateTime_hms => "13:00:00"
307
+ }.each do |type, content|
308
+ it "should return the format of #{type} correctly" do
309
+ row = [content]
310
+
311
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
312
+ validator.build_formats(row)
313
+ formats = validator.instance_variable_get("@formats")
314
+
315
+ expect(formats[0].keys.first).to eql type
316
+ end
317
+ end
318
+
319
+ it "treats floats and ints the same" do
320
+ row = ["12", "3.1476"]
321
+
322
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
323
+ validator.build_formats(row)
324
+ formats = validator.instance_variable_get("@formats")
325
+
326
+ expect(formats[0].keys.first).to eql :numeric
327
+ expect(formats[1].keys.first).to eql :numeric
328
+ end
329
+
330
+ it "should ignore blank arrays" do
331
+ row = []
332
+
333
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
334
+ validator.build_formats(row)
335
+
336
+ formats = validator.instance_variable_get("@formats")
337
+ expect(formats).to eql []
338
+ end
339
+
340
+ it "should work correctly for single columns" do
341
+ rows = [
342
+ ["foo"],
343
+ ["bar"],
344
+ ["baz"]
345
+ ]
346
+
347
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
348
+
349
+ rows.each_with_index do |row, i|
350
+ validator.build_formats(row)
351
+ end
352
+
353
+ formats = validator.instance_variable_get("@formats")
354
+ expect(formats).to eql [{:string => 3}]
355
+ end
356
+
357
+ it "should return formats correctly if a row is blank" do
358
+ rows = [
359
+ [],
360
+ ["foo", "1", "$2345"]
361
+ ]
362
+
363
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
364
+
365
+ rows.each_with_index do |row, i|
366
+ validator.build_formats(row)
367
+ end
368
+
369
+ formats = validator.instance_variable_get("@formats")
370
+
371
+ expect(formats).to eql [
372
+ {:string => 1},
373
+ {:numeric => 1},
374
+ {:string => 1},
375
+ ]
376
+ end
377
+
378
+ end
379
+
380
+ context "csv dialect" do
381
+ it "should provide sensible defaults for CSV parsing" do
382
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
383
+ opts = validator.instance_variable_get("@csv_options")
384
+ expect(opts).to include({
385
+ :col_sep => ",",
386
+ :row_sep => :auto,
387
+ :quote_char => '"',
388
+ :skip_blanks => false
389
+ })
390
+ end
391
+
392
+ it "should map CSV DDF to correct values" do
393
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
394
+ opts = validator.dialect_to_csv_options({
395
+ "lineTerminator" => "\n",
396
+ "delimiter" => "\t",
397
+ "quoteChar" => "'"
398
+ })
399
+ expect(opts).to include({
400
+ :col_sep => "\t",
401
+ :row_sep => "\n",
402
+ :quote_char => "'",
403
+ :skip_blanks => false
404
+ })
405
+ end
406
+
407
+ end
408
+
409
+ context "check_consistency" do
410
+
411
+ it "should return a warning if columns have inconsistent values" do
412
+ formats = [
413
+ {:string => 3},
414
+ {:string => 2, :numeric => 1},
415
+ {:numeric => 3},
416
+ ]
417
+
418
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
419
+ validator.instance_variable_set("@formats", formats)
420
+ validator.check_consistency
421
+
422
+ warnings = validator.instance_variable_get("@warnings")
423
+ warnings.delete_if { |h| h.type != :inconsistent_values }
424
+
425
+ expect(warnings.count).to eql 1
426
+ end
427
+
428
+ end
429
+
430
+ #TODO the below tests are all the remaining tests from validator_spec.rb, annotations indicate their status HOWEVER these tests may be best refactored into client specs
431
+ context "when detecting headers" do
432
+ it "should default to expecting a header" do
433
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
434
+ expect( validator.header? ).to eql(true)
435
+ end
436
+
437
+ it "should look in CSV options to detect header" do
438
+ opts = {
439
+ "header" => true
440
+ }
441
+ validator = Csvlint::Validator.new("http://example.com/example.csv", opts)
442
+ expect( validator.header? ).to eql(true)
443
+ opts = {
444
+ "header" => false
445
+ }
446
+ validator = Csvlint::Validator.new("http://example.com/example.csv", opts)
447
+ expect( validator.header? ).to eql(false)
448
+ end
449
+
450
+ it "should look in content-type for header=absent" do
451
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv; header=absent"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
452
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
453
+ expect( validator.header? ).to eql(false)
454
+ expect( validator.errors.size ).to eql(0)
455
+ end
456
+
457
+ it "should look in content-type for header=present" do
458
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv; header=present"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
459
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
460
+ expect( validator.header? ).to eql(true)
461
+ expect( validator.errors.size ).to eql(0)
462
+ end
463
+
464
+ it "assume header present if not specified in content type" do
465
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
466
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
467
+ expect( validator.header? ).to eql(true)
468
+ expect( validator.errors.size ).to eql(0)
469
+ expect( validator.info_messages.size ).to eql(1)
470
+ expect( validator.info_messages.first.type).to eql(:assumed_header)
471
+ end
472
+
473
+ it "give wrong content type error if content type is wrong" do
474
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/html"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
475
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
476
+ expect( validator.header? ).to eql(true)
477
+ expect( validator.errors.size ).to eql(1)
478
+ expect( validator.errors[0].type).to eql(:wrong_content_type)
479
+ end
480
+
481
+ end
482
+
483
+ context "when validating headers" do
484
+ it "should warn if column names aren't unique" do
485
+ data = StringIO.new( "minimum, minimum" )
486
+ validator = Csvlint::Validator.new(data)
487
+ expect( validator.warnings.size ).to eql(1)
488
+ expect( validator.warnings.first.type).to eql(:duplicate_column_name)
489
+ expect( validator.warnings.first.category).to eql(:schema)
490
+ end
491
+
492
+ it "should warn if column names are blank" do
493
+ data = StringIO.new( "minimum," )
494
+ validator = Csvlint::Validator.new(data)
495
+
496
+ expect( validator.validate_header(["minimum", ""]) ).to eql(true)
497
+ expect( validator.warnings.size ).to eql(1)
498
+ expect( validator.warnings.first.type).to eql(:empty_column_name)
499
+ expect( validator.warnings.first.category).to eql(:schema)
500
+ end
501
+
502
+ it "should include info message about missing header when we have assumed a header" do
503
+ data = StringIO.new( "1,2,3\r\n" )
504
+ validator = Csvlint::Validator.new(data)
505
+
506
+ expect( validator.valid? ).to eql(true)
507
+ expect( validator.info_messages.size ).to eql(1)
508
+ expect( validator.info_messages.first.type).to eql(:assumed_header)
509
+ expect( validator.info_messages.first.category).to eql(:structure)
510
+ end
511
+
512
+ it "should not include info message about missing header when we are told about the header" do
513
+ data = StringIO.new( "1,2,3\r\n" )
514
+ validator = Csvlint::Validator.new(data, "header"=>false)
515
+ expect( validator.valid? ).to eql(true)
516
+ expect( validator.info_messages.size ).to eql(0)
517
+ end
518
+
519
+ it "should not be an error if we have assumed a header, there is no dialect and content-type doesn't declare header, as we assume header=present" do
520
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
521
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
522
+ expect( validator.valid? ).to eql(true)
523
+ end
524
+
525
+ it "should be valid if we have a dialect and the data is from the web" do
526
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
527
+ #header defaults to true in csv dialect, so this is valid
528
+ validator = Csvlint::Validator.new("http://example.com/example.csv", {})
529
+ expect( validator.valid? ).to eql(true)
530
+
531
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
532
+ validator = Csvlint::Validator.new("http://example.com/example.csv", {"header"=>true})
533
+ expect( validator.valid? ).to eql(true)
534
+
535
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200, :headers=>{"Content-Type" => "text/csv"}, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
536
+ validator = Csvlint::Validator.new("http://example.com/example.csv", {"header"=>false})
537
+ expect( validator.valid? ).to eql(true)
538
+ end
539
+
540
+ end
541
+
542
+ context "accessing metadata" do
543
+
544
+ before :all do
545
+ stub_request(:get, "http://example.com/crlf.csv").to_return(:status => 200, :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','windows-line-endings.csv')))
546
+ stub_request(:get, "http://example.com/crlf.csv-metadata.json").to_return(:status => 404)
547
+ end
548
+
549
+ it "can get line break symbol" do
550
+ validator = Csvlint::Validator.new("http://example.com/crlf.csv")
551
+ expect(validator.line_breaks).to eql "\r\n"
552
+ end
553
+
554
+ end
555
+
556
+ it "should give access to the complete CSV data file" do
557
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200,
558
+ :headers=>{"Content-Type" => "text/csv; header=present"},
559
+ :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
560
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
561
+ expect( validator.valid? ).to eql(true)
562
+ data = validator.data
563
+
564
+ expect( data.count ).to eql 3
565
+ expect( data[0] ).to eql ['Foo','Bar','Baz']
566
+ expect( data[2] ).to eql ['3','2','1']
567
+ end
568
+
569
+ it "should count the total number of rows read" do
570
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200,
571
+ :headers=>{"Content-Type" => "text/csv; header=present"},
572
+ :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
573
+ validator = Csvlint::Validator.new("http://example.com/example.csv")
574
+ expect(validator.row_count).to eq(3)
575
+ end
576
+
577
+ it "should limit number of lines read" do
578
+ stub_request(:get, "http://example.com/example.csv").to_return(:status => 200,
579
+ :headers=>{"Content-Type" => "text/csv; header=present"},
580
+ :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
581
+ validator = Csvlint::Validator.new("http://example.com/example.csv", {}, nil, limit_lines: 2)
582
+ expect( validator.valid? ).to eql(true)
583
+ data = validator.data
584
+ expect( data.count ).to eql 2
585
+ expect( data[0] ).to eql ['Foo','Bar','Baz']
586
+ end
587
+
588
+ context "with a lambda" do
589
+
590
+ it "should call a lambda for each line" do
591
+ @count = 0
592
+ mylambda = lambda { |row| @count = @count + 1 }
593
+ validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')), {}, nil, { lambda: mylambda })
594
+ expect(@count).to eq(3)
595
+ end
596
+
597
+ it "reports back the status of each line" do
598
+ @results = []
599
+ mylambda = lambda { |row| @results << row.current_line }
600
+ validator = Csvlint::Validator.new(File.new(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')), {}, nil, { lambda: mylambda })
601
+ expect(@results.count).to eq(3)
602
+ expect(@results[0]).to eq(1)
603
+ expect(@results[1]).to eq(2)
604
+ expect(@results[2]).to eq(3)
605
+ end
606
+
607
+ end
608
+
609
+ # Commented out because there is currently no way to mock redirects with Typhoeus and WebMock - see https://github.com/bblimke/webmock/issues/237
610
+ # it "should follow redirects to SSL" do
611
+ # stub_request(:get, "http://example.com/redirect").to_return(:status => 301, :headers=>{"Location" => "https://example.com/example.csv"})
612
+ # stub_request(:get, "https://example.com/example.csv").to_return(:status => 200,
613
+ # :headers=>{"Content-Type" => "text/csv; header=present"},
614
+ # :body => File.read(File.join(File.dirname(__FILE__),'..','features','fixtures','valid.csv')))
615
+ #
616
+ # validator = Csvlint::Validator.new("http://example.com/redirect")
617
+ # expect( validator.valid? ).to eql(true)
618
+ # end
619
+ end