csvlint 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +1 -0
  3. data/.gitignore +22 -0
  4. data/.travis.yml +10 -0
  5. data/Gemfile +7 -0
  6. data/LICENSE.md +22 -0
  7. data/README.md +214 -0
  8. data/Rakefile +17 -0
  9. data/bin/create_schema +32 -0
  10. data/bin/csvlint +52 -0
  11. data/csvlint.gemspec +39 -0
  12. data/features/check_format.feature +46 -0
  13. data/features/csv_options.feature +35 -0
  14. data/features/fixtures/cr-line-endings.csv +1 -0
  15. data/features/fixtures/crlf-line-endings.csv +3 -0
  16. data/features/fixtures/inconsistent-line-endings.csv +2 -0
  17. data/features/fixtures/invalid-byte-sequence.csv +24 -0
  18. data/features/fixtures/lf-line-endings.csv +3 -0
  19. data/features/fixtures/spreadsheet.xls +0 -0
  20. data/features/fixtures/title-row.csv +4 -0
  21. data/features/fixtures/valid.csv +3 -0
  22. data/features/fixtures/windows-line-endings.csv +2 -0
  23. data/features/information.feature +22 -0
  24. data/features/parse_csv.feature +90 -0
  25. data/features/schema_validation.feature +63 -0
  26. data/features/sources.feature +18 -0
  27. data/features/step_definitions/csv_options_steps.rb +19 -0
  28. data/features/step_definitions/information_steps.rb +13 -0
  29. data/features/step_definitions/parse_csv_steps.rb +30 -0
  30. data/features/step_definitions/schema_validation_steps.rb +7 -0
  31. data/features/step_definitions/sources_steps.rb +7 -0
  32. data/features/step_definitions/validation_errors_steps.rb +43 -0
  33. data/features/step_definitions/validation_info_steps.rb +18 -0
  34. data/features/step_definitions/validation_warnings_steps.rb +46 -0
  35. data/features/support/env.rb +30 -0
  36. data/features/support/webmock.rb +1 -0
  37. data/features/validation_errors.feature +151 -0
  38. data/features/validation_info.feature +24 -0
  39. data/features/validation_warnings.feature +74 -0
  40. data/lib/csvlint.rb +13 -0
  41. data/lib/csvlint/error_collector.rb +43 -0
  42. data/lib/csvlint/error_message.rb +15 -0
  43. data/lib/csvlint/field.rb +102 -0
  44. data/lib/csvlint/schema.rb +69 -0
  45. data/lib/csvlint/types.rb +113 -0
  46. data/lib/csvlint/validate.rb +253 -0
  47. data/lib/csvlint/version.rb +3 -0
  48. data/lib/csvlint/wrapped_io.rb +39 -0
  49. data/spec/field_spec.rb +247 -0
  50. data/spec/schema_spec.rb +149 -0
  51. data/spec/spec_helper.rb +20 -0
  52. data/spec/validator_spec.rb +279 -0
  53. metadata +367 -0
@@ -0,0 +1,30 @@
1
+ $:.unshift File.join( File.dirname(__FILE__), "..", "..", "lib")
2
+
3
+ require 'simplecov'
4
+ require 'simplecov-rcov'
5
+ require 'rspec/expectations'
6
+ require 'csvlint'
7
+ require 'coveralls'
8
+ require 'pry'
9
+
10
+ Coveralls.wear_merged!
11
+
12
+ SimpleCov.formatter = SimpleCov::Formatter::RcovFormatter
13
+ SimpleCov.start
14
+
15
+ require 'spork'
16
+
17
+ Spork.each_run do
18
+ require 'csvlint'
19
+ end
20
+
21
+ class CustomWorld
22
+ def default_csv_options
23
+ return {
24
+ }
25
+ end
26
+ end
27
+
28
+ World do
29
+ CustomWorld.new
30
+ end
@@ -0,0 +1 @@
1
+ require 'webmock/cucumber'
@@ -0,0 +1,151 @@
1
+ Feature: Get validation errors
2
+
3
+ Scenario: CSV with ragged rows
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "col1","col2","col3"
7
+ "1","2","3"
8
+ "4","5"
9
+ """
10
+ And it is stored at the url "http://example.com/example1.csv"
11
+ When I ask if there are errors
12
+ Then there should be 1 error
13
+ And that error should have the type "ragged_rows"
14
+ And that error should have the row "3"
15
+ And that error should have the content ""4","5""
16
+
17
+ Scenario: CSV with incorrect quoting
18
+ Given I have a CSV with the following content:
19
+ """
20
+ "col1","col2","col3"
21
+ "Foo","Bar","Baz
22
+ """
23
+ And it is stored at the url "http://example.com/example1.csv"
24
+ When I ask if there are errors
25
+ Then there should be 1 error
26
+ And that error should have the type "unclosed_quote"
27
+ And that error should have the row "2"
28
+ And that error should have the content ""Foo","Bar","Baz"
29
+
30
+ Scenario: Successfully report a CSV with incorrect whitespace
31
+ Given I have a CSV with the following content:
32
+ """
33
+ "col1","col2","col3"
34
+ "Foo","Bar", "Baz"
35
+ """
36
+ And it is stored at the url "http://example.com/example1.csv"
37
+ When I ask if there are errors
38
+ Then there should be 1 error
39
+ And that error should have the type "whitespace"
40
+ And that error should have the row "2"
41
+ And that error should have the content ""Foo","Bar", "Baz""
42
+
43
+ Scenario: Successfully report a CSV with blank rows
44
+ Given I have a CSV with the following content:
45
+ """
46
+ "col1","col2","col3"
47
+ "Foo","Bar","Baz"
48
+ "","",
49
+ "Baz","Bar","Foo"
50
+ """
51
+ And it is stored at the url "http://example.com/example1.csv"
52
+ When I ask if there are errors
53
+ Then there should be 1 error
54
+ And that error should have the type "blank_rows"
55
+ And that error should have the row "3"
56
+ And that error should have the content ""","","
57
+
58
+ Scenario: Successfully report a CSV with multiple trailing empty rows
59
+ Given I have a CSV with the following content:
60
+ """
61
+ "col1","col2","col3"
62
+ "Foo","Bar","Baz"
63
+ "Foo","Bar","Baz"
64
+
65
+
66
+ """
67
+ And it is stored at the url "http://example.com/example1.csv"
68
+ When I ask if there are errors
69
+ Then there should be 1 error
70
+ And that error should have the type "blank_rows"
71
+ And that error should have the row "4"
72
+
73
+ Scenario: Successfully report a CSV with an empty row
74
+ Given I have a CSV with the following content:
75
+ """
76
+ "col1","col2","col3"
77
+ "Foo","Bar","Baz"
78
+
79
+ "Foo","Bar","Baz"
80
+ """
81
+ And it is stored at the url "http://example.com/example1.csv"
82
+ When I ask if there are errors
83
+ Then there should be 1 error
84
+ And that error should have the type "blank_rows"
85
+ And that error should have the row "3"
86
+
87
+ Scenario: Report invalid Encoding
88
+ Given I have a CSV file called "invalid-byte-sequence.csv"
89
+ And I set an encoding header of "UTF-8"
90
+ And it is stored at the url "http://example.com/example1.csv"
91
+ When I ask if there are errors
92
+ Then there should be 1 error
93
+ And that error should have the type "invalid_encoding"
94
+
95
+ Scenario: Correctly handle different encodings
96
+ Given I have a CSV file called "invalid-byte-sequence.csv"
97
+ And I set an encoding header of "ISO-8859-1"
98
+ And it is stored at the url "http://example.com/example1.csv"
99
+ When I ask if there are errors
100
+ Then there should be no "content_encoding" errors
101
+
102
+ Scenario: Report invalid file
103
+
104
+ Given I have a CSV file called "spreadsheet.xls"
105
+ And it is stored at the url "http://example.com/example1.csv"
106
+ When I ask if there are errors
107
+ Then there should be 1 error
108
+ And that error should have the type "invalid_encoding"
109
+
110
+ Scenario: Incorrect content type
111
+ Given I have a CSV with the following content:
112
+ """
113
+ "abc","2","3"
114
+ """
115
+ And the content type is set to "application/excel"
116
+ And it is stored at the url "http://example.com/example1.xls"
117
+ And I ask if there are errors
118
+ Then there should be 1 error
119
+ And that error should have the type "wrong_content_type"
120
+
121
+ Scenario: Incorrect extension
122
+ Given I have a CSV with the following content:
123
+ """
124
+ "abc","2","3"
125
+ """
126
+ And the content type is set to "application/excel"
127
+ And it is stored at the url "http://example.com/example1.csv"
128
+ And I ask if there are errors
129
+ Then there should be 1 error
130
+ And that error should have the type "wrong_content_type"
131
+
132
+ Scenario: Handles urls that 404
133
+ Given I have a CSV that doesn't exist
134
+ When I ask if there are errors
135
+ Then there should be 1 error
136
+ And that error should have the type "not_found"
137
+
138
+ Scenario: Incorrect line endings specified in settings
139
+ Given I have a CSV file called "cr-line-endings.csv"
140
+ And I set the line endings to linefeed
141
+ And it is stored at the url "http://example.com/example1.csv"
142
+ And I ask if there are errors
143
+ Then there should be 1 error
144
+ And that error should have the type "line_breaks"
145
+
146
+ Scenario: inconsistent line endings in file cause an error
147
+ Given I have a CSV file called "inconsistent-line-endings.csv"
148
+ And it is stored at the url "http://example.com/example1.csv"
149
+ And I ask if there are errors
150
+ Then there should be 1 error
151
+ And that error should have the type "line_breaks"
@@ -0,0 +1,24 @@
1
+ Feature: Get validation information messages
2
+
3
+ Scenario: LF line endings in file give an info message
4
+ Given I have a CSV file called "lf-line-endings.csv"
5
+ And it is stored at the url "http://example.com/example1.csv"
6
+ And I set header to "true"
7
+ And I ask if there are info messages
8
+ Then there should be 1 info message
9
+ And that message should have the type "nonrfc_line_breaks"
10
+
11
+ Scenario: CR line endings in file give an info message
12
+ Given I have a CSV file called "cr-line-endings.csv"
13
+ And it is stored at the url "http://example.com/example1.csv"
14
+ And I set header to "true"
15
+ And I ask if there are info messages
16
+ Then there should be 1 info message
17
+ And that message should have the type "nonrfc_line_breaks"
18
+
19
+ Scenario: CRLF line endings in file produces no info messages
20
+ Given I have a CSV file called "crlf-line-endings.csv"
21
+ And it is stored at the url "http://example.com/example1.csv"
22
+ And I set header to "true"
23
+ And I ask if there are info messages
24
+ Then there should be 0 info messages
@@ -0,0 +1,74 @@
1
+ Feature: Validation warnings
2
+
3
+ Scenario: UTF-8 Encoding
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "col1","col2","col3"
7
+ "abc","2","3"
8
+ """
9
+ And it is encoded as "utf-8"
10
+ And it is stored at the url "http://example.com/example1.csv"
11
+ When I ask if there are warnings
12
+ Then there should be 0 warnings
13
+
14
+ Scenario: ISO-8859-1 Encoding
15
+ Given I have a CSV with the following content:
16
+ """
17
+ "col1","col2","col3"
18
+ "1","2","3"
19
+ """
20
+ And it is encoded as "iso-8859-1"
21
+ And it is stored at the url "http://example.com/example1.csv"
22
+ When I ask if there are warnings
23
+ Then there should be 1 warnings
24
+
25
+ Scenario: Correct content type
26
+ Given I have a CSV with the following content:
27
+ """
28
+ "col1","col2","col3"
29
+ "abc","2","3"
30
+ """
31
+ And the content type is set to "text/csv"
32
+ And it is stored at the url "http://example.com/example1.csv"
33
+ And I ask if there are warnings
34
+ Then there should be 0 warnings
35
+
36
+ Scenario: No extension
37
+ Given I have a CSV with the following content:
38
+ """
39
+ "col1","col2","col3"
40
+ "abc","2","3"
41
+ """
42
+ And the content type is set to "text/csv"
43
+ And it is stored at the url "http://example.com/example1"
44
+ And I ask if there are warnings
45
+ Then there should be 0 warnings
46
+
47
+ Scenario: Allow query params after extension
48
+ Given I have a CSV with the following content:
49
+ """
50
+ "col1","col2","col3"
51
+ "abc","2","3"
52
+ """
53
+ And the content type is set to "text/csv"
54
+ And it is stored at the url "http://example.com/example1.csv?query=param"
55
+ And I ask if there are warnings
56
+ Then there should be 0 warnings
57
+
58
+ Scenario: User doesn't supply encoding
59
+ Given I have a CSV with the following content:
60
+ """
61
+ "col1","col2","col3"
62
+ "abc","2","3"
63
+ """
64
+ And it is stored at the url "http://example.com/example1.csv" with no character set
65
+ When I ask if there are warnings
66
+ Then there should be 1 warnings
67
+ And that warning should have the type "no_encoding"
68
+
69
+ Scenario: Title rows
70
+ Given I have a CSV file called "title-row.csv"
71
+ And it is stored at the url "http://example.com/example1.csv"
72
+ And I ask if there are warnings
73
+ Then there should be 1 warnings
74
+ And that warning should have the type "title_row"
@@ -0,0 +1,13 @@
1
+ require "csvlint/version"
2
+ require 'csv'
3
+ require 'open-uri'
4
+ require 'mime/types'
5
+ require 'tempfile'
6
+
7
+ require 'csvlint/types'
8
+ require 'csvlint/error_message'
9
+ require 'csvlint/error_collector'
10
+ require 'csvlint/validate'
11
+ require 'csvlint/wrapped_io'
12
+ require 'csvlint/field'
13
+ require 'csvlint/schema'
@@ -0,0 +1,43 @@
1
+ module Csvlint
2
+
3
+ module ErrorCollector
4
+
5
+ def build_message(type, category, row, column, content, constraints)
6
+ Csvlint::ErrorMessage.new({
7
+ :type => type,
8
+ :category => category,
9
+ :row => row,
10
+ :column => column,
11
+ :content => content,
12
+ :constraints => constraints
13
+ })
14
+ end
15
+
16
+ MESSAGE_LEVELS = [
17
+ :errors,
18
+ :warnings,
19
+ :info_messages
20
+ ]
21
+
22
+ MESSAGE_LEVELS.each do |level|
23
+
24
+ attr_reader level
25
+
26
+ define_method "build_#{level}" do |type, category = nil, row = nil, column = nil, content = nil, constraints = {}|
27
+ instance_variable_get("@#{level}") << build_message(type, category, row, column, content, constraints)
28
+ end
29
+
30
+ end
31
+
32
+ def valid?
33
+ errors.empty?
34
+ end
35
+
36
+ def reset
37
+ MESSAGE_LEVELS.each do |level|
38
+ instance_variable_set("@#{level}", [])
39
+ end
40
+ end
41
+
42
+ end
43
+ end
@@ -0,0 +1,15 @@
1
+ module Csvlint
2
+
3
+ class ErrorMessage
4
+
5
+ attr_reader :type, :category, :row, :column, :content, :constraints
6
+
7
+ def initialize(params)
8
+ params.each do |key, value|
9
+ self.instance_variable_set("@#{key}".to_sym, value)
10
+ end
11
+ end
12
+
13
+ end
14
+
15
+ end
@@ -0,0 +1,102 @@
1
+ module Csvlint
2
+
3
+ class Field
4
+ include Csvlint::ErrorCollector
5
+ include Csvlint::Types
6
+
7
+ attr_reader :name, :constraints, :title, :description
8
+
9
+ def initialize(name, constraints={}, title=nil, description=nil)
10
+ @name = name
11
+ @constraints = constraints || {}
12
+ @uniques = Set.new
13
+ @title = title
14
+ @description = description
15
+ reset
16
+ end
17
+
18
+ def validate_column(value, row=nil, column=nil)
19
+ reset
20
+ validate_length(value, row, column)
21
+ validate_values(value, row, column)
22
+ parsed = validate_type(value, row, column)
23
+ validate_range(parsed, row, column) if parsed != nil
24
+ return valid?
25
+ end
26
+
27
+ private
28
+ def validate_length(value, row, column)
29
+ if constraints["required"] == true
30
+ build_errors(:missing_value, :schema, row, column, value,
31
+ { "required" => true }) if value.nil? || value.length == 0
32
+ end
33
+ if constraints["minLength"]
34
+ build_errors(:min_length, :schema, row, column, value,
35
+ { "minLength" => constraints["minLength"] }) if value.nil? || value.length < constraints["minLength"]
36
+ end
37
+ if constraints["maxLength"]
38
+ build_errors(:max_length, :schema, row, column, value,
39
+ { "maxLength" => constraints["maxLength"] } ) if !value.nil? && value.length > constraints["maxLength"]
40
+ end
41
+ end
42
+
43
+ def validate_values(value, row, column)
44
+ if constraints["pattern"]
45
+ build_errors(:pattern, :schema, row, column, value,
46
+ { "pattern" => constraints["pattern"] } ) if !value.nil? && !value.match( constraints["pattern"] )
47
+ end
48
+ if constraints["unique"] == true
49
+ if @uniques.include? value
50
+ build_errors(:unique, :schema, row, column, value, { "unique" => true })
51
+ else
52
+ @uniques << value
53
+ end
54
+ end
55
+ end
56
+
57
+ def validate_type(value, row, column)
58
+ if constraints["type"] && value != ""
59
+ parsed = convert_to_type(value)
60
+ if parsed == nil
61
+ failed = { "type" => constraints["type"] }
62
+ failed["datePattern"] = constraints["datePattern"] if constraints["datePattern"]
63
+ build_errors(:invalid_type, :schema, row, column, value, failed)
64
+ return nil
65
+ end
66
+ return parsed
67
+ end
68
+ return nil
69
+ end
70
+
71
+ def validate_range(value, row, column)
72
+ #TODO: we're ignoring issues with converting ranges to actual types, maybe we
73
+ #should generate a warning? The schema is invalid
74
+ if constraints["minimum"]
75
+ minimumValue = convert_to_type( constraints["minimum"] )
76
+ if minimumValue
77
+ build_errors(:below_minimum, :schema, row, column, value,
78
+ { "minimum" => constraints["minimum"] }) unless value >= minimumValue
79
+ end
80
+ end
81
+ if constraints["maximum"]
82
+ maximumValue = convert_to_type( constraints["maximum"] )
83
+ if maximumValue
84
+ build_errors(:above_maximum, :schema, row, column, value,
85
+ { "maximum" => constraints["maximum"] }) unless value <= maximumValue
86
+ end
87
+ end
88
+ end
89
+
90
+ def convert_to_type(value)
91
+ parsed = nil
92
+ tv = TYPE_VALIDATIONS[constraints["type"]]
93
+ if tv
94
+ begin
95
+ parsed = tv.call value, constraints
96
+ rescue ArgumentError
97
+ end
98
+ end
99
+ return parsed
100
+ end
101
+ end
102
+ end