csvlint 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +1 -0
  3. data/.gitignore +22 -0
  4. data/.travis.yml +10 -0
  5. data/Gemfile +7 -0
  6. data/LICENSE.md +22 -0
  7. data/README.md +214 -0
  8. data/Rakefile +17 -0
  9. data/bin/create_schema +32 -0
  10. data/bin/csvlint +52 -0
  11. data/csvlint.gemspec +39 -0
  12. data/features/check_format.feature +46 -0
  13. data/features/csv_options.feature +35 -0
  14. data/features/fixtures/cr-line-endings.csv +1 -0
  15. data/features/fixtures/crlf-line-endings.csv +3 -0
  16. data/features/fixtures/inconsistent-line-endings.csv +2 -0
  17. data/features/fixtures/invalid-byte-sequence.csv +24 -0
  18. data/features/fixtures/lf-line-endings.csv +3 -0
  19. data/features/fixtures/spreadsheet.xls +0 -0
  20. data/features/fixtures/title-row.csv +4 -0
  21. data/features/fixtures/valid.csv +3 -0
  22. data/features/fixtures/windows-line-endings.csv +2 -0
  23. data/features/information.feature +22 -0
  24. data/features/parse_csv.feature +90 -0
  25. data/features/schema_validation.feature +63 -0
  26. data/features/sources.feature +18 -0
  27. data/features/step_definitions/csv_options_steps.rb +19 -0
  28. data/features/step_definitions/information_steps.rb +13 -0
  29. data/features/step_definitions/parse_csv_steps.rb +30 -0
  30. data/features/step_definitions/schema_validation_steps.rb +7 -0
  31. data/features/step_definitions/sources_steps.rb +7 -0
  32. data/features/step_definitions/validation_errors_steps.rb +43 -0
  33. data/features/step_definitions/validation_info_steps.rb +18 -0
  34. data/features/step_definitions/validation_warnings_steps.rb +46 -0
  35. data/features/support/env.rb +30 -0
  36. data/features/support/webmock.rb +1 -0
  37. data/features/validation_errors.feature +151 -0
  38. data/features/validation_info.feature +24 -0
  39. data/features/validation_warnings.feature +74 -0
  40. data/lib/csvlint.rb +13 -0
  41. data/lib/csvlint/error_collector.rb +43 -0
  42. data/lib/csvlint/error_message.rb +15 -0
  43. data/lib/csvlint/field.rb +102 -0
  44. data/lib/csvlint/schema.rb +69 -0
  45. data/lib/csvlint/types.rb +113 -0
  46. data/lib/csvlint/validate.rb +253 -0
  47. data/lib/csvlint/version.rb +3 -0
  48. data/lib/csvlint/wrapped_io.rb +39 -0
  49. data/spec/field_spec.rb +247 -0
  50. data/spec/schema_spec.rb +149 -0
  51. data/spec/spec_helper.rb +20 -0
  52. data/spec/validator_spec.rb +279 -0
  53. metadata +367 -0
@@ -0,0 +1,30 @@
1
+ $:.unshift File.join( File.dirname(__FILE__), "..", "..", "lib")
2
+
3
+ require 'simplecov'
4
+ require 'simplecov-rcov'
5
+ require 'rspec/expectations'
6
+ require 'csvlint'
7
+ require 'coveralls'
8
+ require 'pry'
9
+
10
+ Coveralls.wear_merged!
11
+
12
+ SimpleCov.formatter = SimpleCov::Formatter::RcovFormatter
13
+ SimpleCov.start
14
+
15
+ require 'spork'
16
+
17
+ Spork.each_run do
18
+ require 'csvlint'
19
+ end
20
+
21
+ class CustomWorld
22
+ def default_csv_options
23
+ return {
24
+ }
25
+ end
26
+ end
27
+
28
+ World do
29
+ CustomWorld.new
30
+ end
@@ -0,0 +1 @@
1
+ require 'webmock/cucumber'
@@ -0,0 +1,151 @@
1
+ Feature: Get validation errors
2
+
3
+ Scenario: CSV with ragged rows
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "col1","col2","col3"
7
+ "1","2","3"
8
+ "4","5"
9
+ """
10
+ And it is stored at the url "http://example.com/example1.csv"
11
+ When I ask if there are errors
12
+ Then there should be 1 error
13
+ And that error should have the type "ragged_rows"
14
+ And that error should have the row "3"
15
+ And that error should have the content ""4","5""
16
+
17
+ Scenario: CSV with incorrect quoting
18
+ Given I have a CSV with the following content:
19
+ """
20
+ "col1","col2","col3"
21
+ "Foo","Bar","Baz
22
+ """
23
+ And it is stored at the url "http://example.com/example1.csv"
24
+ When I ask if there are errors
25
+ Then there should be 1 error
26
+ And that error should have the type "unclosed_quote"
27
+ And that error should have the row "2"
28
+ And that error should have the content ""Foo","Bar","Baz"
29
+
30
+ Scenario: Successfully report a CSV with incorrect whitespace
31
+ Given I have a CSV with the following content:
32
+ """
33
+ "col1","col2","col3"
34
+ "Foo","Bar", "Baz"
35
+ """
36
+ And it is stored at the url "http://example.com/example1.csv"
37
+ When I ask if there are errors
38
+ Then there should be 1 error
39
+ And that error should have the type "whitespace"
40
+ And that error should have the row "2"
41
+ And that error should have the content ""Foo","Bar", "Baz""
42
+
43
+ Scenario: Successfully report a CSV with blank rows
44
+ Given I have a CSV with the following content:
45
+ """
46
+ "col1","col2","col3"
47
+ "Foo","Bar","Baz"
48
+ "","",
49
+ "Baz","Bar","Foo"
50
+ """
51
+ And it is stored at the url "http://example.com/example1.csv"
52
+ When I ask if there are errors
53
+ Then there should be 1 error
54
+ And that error should have the type "blank_rows"
55
+ And that error should have the row "3"
56
+ And that error should have the content ""","","
57
+
58
+ Scenario: Successfully report a CSV with multiple trailing empty rows
59
+ Given I have a CSV with the following content:
60
+ """
61
+ "col1","col2","col3"
62
+ "Foo","Bar","Baz"
63
+ "Foo","Bar","Baz"
64
+
65
+
66
+ """
67
+ And it is stored at the url "http://example.com/example1.csv"
68
+ When I ask if there are errors
69
+ Then there should be 1 error
70
+ And that error should have the type "blank_rows"
71
+ And that error should have the row "4"
72
+
73
+ Scenario: Successfully report a CSV with an empty row
74
+ Given I have a CSV with the following content:
75
+ """
76
+ "col1","col2","col3"
77
+ "Foo","Bar","Baz"
78
+
79
+ "Foo","Bar","Baz"
80
+ """
81
+ And it is stored at the url "http://example.com/example1.csv"
82
+ When I ask if there are errors
83
+ Then there should be 1 error
84
+ And that error should have the type "blank_rows"
85
+ And that error should have the row "3"
86
+
87
+ Scenario: Report invalid Encoding
88
+ Given I have a CSV file called "invalid-byte-sequence.csv"
89
+ And I set an encoding header of "UTF-8"
90
+ And it is stored at the url "http://example.com/example1.csv"
91
+ When I ask if there are errors
92
+ Then there should be 1 error
93
+ And that error should have the type "invalid_encoding"
94
+
95
+ Scenario: Correctly handle different encodings
96
+ Given I have a CSV file called "invalid-byte-sequence.csv"
97
+ And I set an encoding header of "ISO-8859-1"
98
+ And it is stored at the url "http://example.com/example1.csv"
99
+ When I ask if there are errors
100
+ Then there should be no "content_encoding" errors
101
+
102
+ Scenario: Report invalid file
103
+
104
+ Given I have a CSV file called "spreadsheet.xls"
105
+ And it is stored at the url "http://example.com/example1.csv"
106
+ When I ask if there are errors
107
+ Then there should be 1 error
108
+ And that error should have the type "invalid_encoding"
109
+
110
+ Scenario: Incorrect content type
111
+ Given I have a CSV with the following content:
112
+ """
113
+ "abc","2","3"
114
+ """
115
+ And the content type is set to "application/excel"
116
+ And it is stored at the url "http://example.com/example1.xls"
117
+ And I ask if there are errors
118
+ Then there should be 1 error
119
+ And that error should have the type "wrong_content_type"
120
+
121
+ Scenario: Incorrect extension
122
+ Given I have a CSV with the following content:
123
+ """
124
+ "abc","2","3"
125
+ """
126
+ And the content type is set to "application/excel"
127
+ And it is stored at the url "http://example.com/example1.csv"
128
+ And I ask if there are errors
129
+ Then there should be 1 error
130
+ And that error should have the type "wrong_content_type"
131
+
132
+ Scenario: Handles urls that 404
133
+ Given I have a CSV that doesn't exist
134
+ When I ask if there are errors
135
+ Then there should be 1 error
136
+ And that error should have the type "not_found"
137
+
138
+ Scenario: Incorrect line endings specified in settings
139
+ Given I have a CSV file called "cr-line-endings.csv"
140
+ And I set the line endings to linefeed
141
+ And it is stored at the url "http://example.com/example1.csv"
142
+ And I ask if there are errors
143
+ Then there should be 1 error
144
+ And that error should have the type "line_breaks"
145
+
146
+ Scenario: inconsistent line endings in file cause an error
147
+ Given I have a CSV file called "inconsistent-line-endings.csv"
148
+ And it is stored at the url "http://example.com/example1.csv"
149
+ And I ask if there are errors
150
+ Then there should be 1 error
151
+ And that error should have the type "line_breaks"
@@ -0,0 +1,24 @@
1
+ Feature: Get validation information messages
2
+
3
+ Scenario: LF line endings in file give an info message
4
+ Given I have a CSV file called "lf-line-endings.csv"
5
+ And it is stored at the url "http://example.com/example1.csv"
6
+ And I set header to "true"
7
+ And I ask if there are info messages
8
+ Then there should be 1 info message
9
+ And that message should have the type "nonrfc_line_breaks"
10
+
11
+ Scenario: CR line endings in file give an info message
12
+ Given I have a CSV file called "cr-line-endings.csv"
13
+ And it is stored at the url "http://example.com/example1.csv"
14
+ And I set header to "true"
15
+ And I ask if there are info messages
16
+ Then there should be 1 info message
17
+ And that message should have the type "nonrfc_line_breaks"
18
+
19
+ Scenario: CRLF line endings in file produces no info messages
20
+ Given I have a CSV file called "crlf-line-endings.csv"
21
+ And it is stored at the url "http://example.com/example1.csv"
22
+ And I set header to "true"
23
+ And I ask if there are info messages
24
+ Then there should be 0 info messages
@@ -0,0 +1,74 @@
1
+ Feature: Validation warnings
2
+
3
+ Scenario: UTF-8 Encoding
4
+ Given I have a CSV with the following content:
5
+ """
6
+ "col1","col2","col3"
7
+ "abc","2","3"
8
+ """
9
+ And it is encoded as "utf-8"
10
+ And it is stored at the url "http://example.com/example1.csv"
11
+ When I ask if there are warnings
12
+ Then there should be 0 warnings
13
+
14
+ Scenario: ISO-8859-1 Encoding
15
+ Given I have a CSV with the following content:
16
+ """
17
+ "col1","col2","col3"
18
+ "1","2","3"
19
+ """
20
+ And it is encoded as "iso-8859-1"
21
+ And it is stored at the url "http://example.com/example1.csv"
22
+ When I ask if there are warnings
23
+ Then there should be 1 warnings
24
+
25
+ Scenario: Correct content type
26
+ Given I have a CSV with the following content:
27
+ """
28
+ "col1","col2","col3"
29
+ "abc","2","3"
30
+ """
31
+ And the content type is set to "text/csv"
32
+ And it is stored at the url "http://example.com/example1.csv"
33
+ And I ask if there are warnings
34
+ Then there should be 0 warnings
35
+
36
+ Scenario: No extension
37
+ Given I have a CSV with the following content:
38
+ """
39
+ "col1","col2","col3"
40
+ "abc","2","3"
41
+ """
42
+ And the content type is set to "text/csv"
43
+ And it is stored at the url "http://example.com/example1"
44
+ And I ask if there are warnings
45
+ Then there should be 0 warnings
46
+
47
+ Scenario: Allow query params after extension
48
+ Given I have a CSV with the following content:
49
+ """
50
+ "col1","col2","col3"
51
+ "abc","2","3"
52
+ """
53
+ And the content type is set to "text/csv"
54
+ And it is stored at the url "http://example.com/example1.csv?query=param"
55
+ And I ask if there are warnings
56
+ Then there should be 0 warnings
57
+
58
+ Scenario: User doesn't supply encoding
59
+ Given I have a CSV with the following content:
60
+ """
61
+ "col1","col2","col3"
62
+ "abc","2","3"
63
+ """
64
+ And it is stored at the url "http://example.com/example1.csv" with no character set
65
+ When I ask if there are warnings
66
+ Then there should be 1 warnings
67
+ And that warning should have the type "no_encoding"
68
+
69
+ Scenario: Title rows
70
+ Given I have a CSV file called "title-row.csv"
71
+ And it is stored at the url "http://example.com/example1.csv"
72
+ And I ask if there are warnings
73
+ Then there should be 1 warnings
74
+ And that warning should have the type "title_row"
@@ -0,0 +1,13 @@
1
+ require "csvlint/version"
2
+ require 'csv'
3
+ require 'open-uri'
4
+ require 'mime/types'
5
+ require 'tempfile'
6
+
7
+ require 'csvlint/types'
8
+ require 'csvlint/error_message'
9
+ require 'csvlint/error_collector'
10
+ require 'csvlint/validate'
11
+ require 'csvlint/wrapped_io'
12
+ require 'csvlint/field'
13
+ require 'csvlint/schema'
@@ -0,0 +1,43 @@
1
+ module Csvlint
2
+
3
+ module ErrorCollector
4
+
5
+ def build_message(type, category, row, column, content, constraints)
6
+ Csvlint::ErrorMessage.new({
7
+ :type => type,
8
+ :category => category,
9
+ :row => row,
10
+ :column => column,
11
+ :content => content,
12
+ :constraints => constraints
13
+ })
14
+ end
15
+
16
+ MESSAGE_LEVELS = [
17
+ :errors,
18
+ :warnings,
19
+ :info_messages
20
+ ]
21
+
22
+ MESSAGE_LEVELS.each do |level|
23
+
24
+ attr_reader level
25
+
26
+ define_method "build_#{level}" do |type, category = nil, row = nil, column = nil, content = nil, constraints = {}|
27
+ instance_variable_get("@#{level}") << build_message(type, category, row, column, content, constraints)
28
+ end
29
+
30
+ end
31
+
32
+ def valid?
33
+ errors.empty?
34
+ end
35
+
36
+ def reset
37
+ MESSAGE_LEVELS.each do |level|
38
+ instance_variable_set("@#{level}", [])
39
+ end
40
+ end
41
+
42
+ end
43
+ end
@@ -0,0 +1,15 @@
1
+ module Csvlint
2
+
3
+ class ErrorMessage
4
+
5
+ attr_reader :type, :category, :row, :column, :content, :constraints
6
+
7
+ def initialize(params)
8
+ params.each do |key, value|
9
+ self.instance_variable_set("@#{key}".to_sym, value)
10
+ end
11
+ end
12
+
13
+ end
14
+
15
+ end
@@ -0,0 +1,102 @@
1
+ module Csvlint
2
+
3
+ class Field
4
+ include Csvlint::ErrorCollector
5
+ include Csvlint::Types
6
+
7
+ attr_reader :name, :constraints, :title, :description
8
+
9
+ def initialize(name, constraints={}, title=nil, description=nil)
10
+ @name = name
11
+ @constraints = constraints || {}
12
+ @uniques = Set.new
13
+ @title = title
14
+ @description = description
15
+ reset
16
+ end
17
+
18
+ def validate_column(value, row=nil, column=nil)
19
+ reset
20
+ validate_length(value, row, column)
21
+ validate_values(value, row, column)
22
+ parsed = validate_type(value, row, column)
23
+ validate_range(parsed, row, column) if parsed != nil
24
+ return valid?
25
+ end
26
+
27
+ private
28
+ def validate_length(value, row, column)
29
+ if constraints["required"] == true
30
+ build_errors(:missing_value, :schema, row, column, value,
31
+ { "required" => true }) if value.nil? || value.length == 0
32
+ end
33
+ if constraints["minLength"]
34
+ build_errors(:min_length, :schema, row, column, value,
35
+ { "minLength" => constraints["minLength"] }) if value.nil? || value.length < constraints["minLength"]
36
+ end
37
+ if constraints["maxLength"]
38
+ build_errors(:max_length, :schema, row, column, value,
39
+ { "maxLength" => constraints["maxLength"] } ) if !value.nil? && value.length > constraints["maxLength"]
40
+ end
41
+ end
42
+
43
+ def validate_values(value, row, column)
44
+ if constraints["pattern"]
45
+ build_errors(:pattern, :schema, row, column, value,
46
+ { "pattern" => constraints["pattern"] } ) if !value.nil? && !value.match( constraints["pattern"] )
47
+ end
48
+ if constraints["unique"] == true
49
+ if @uniques.include? value
50
+ build_errors(:unique, :schema, row, column, value, { "unique" => true })
51
+ else
52
+ @uniques << value
53
+ end
54
+ end
55
+ end
56
+
57
+ def validate_type(value, row, column)
58
+ if constraints["type"] && value != ""
59
+ parsed = convert_to_type(value)
60
+ if parsed == nil
61
+ failed = { "type" => constraints["type"] }
62
+ failed["datePattern"] = constraints["datePattern"] if constraints["datePattern"]
63
+ build_errors(:invalid_type, :schema, row, column, value, failed)
64
+ return nil
65
+ end
66
+ return parsed
67
+ end
68
+ return nil
69
+ end
70
+
71
+ def validate_range(value, row, column)
72
+ #TODO: we're ignoring issues with converting ranges to actual types, maybe we
73
+ #should generate a warning? The schema is invalid
74
+ if constraints["minimum"]
75
+ minimumValue = convert_to_type( constraints["minimum"] )
76
+ if minimumValue
77
+ build_errors(:below_minimum, :schema, row, column, value,
78
+ { "minimum" => constraints["minimum"] }) unless value >= minimumValue
79
+ end
80
+ end
81
+ if constraints["maximum"]
82
+ maximumValue = convert_to_type( constraints["maximum"] )
83
+ if maximumValue
84
+ build_errors(:above_maximum, :schema, row, column, value,
85
+ { "maximum" => constraints["maximum"] }) unless value <= maximumValue
86
+ end
87
+ end
88
+ end
89
+
90
+ def convert_to_type(value)
91
+ parsed = nil
92
+ tv = TYPE_VALIDATIONS[constraints["type"]]
93
+ if tv
94
+ begin
95
+ parsed = tv.call value, constraints
96
+ rescue ArgumentError
97
+ end
98
+ end
99
+ return parsed
100
+ end
101
+ end
102
+ end