csv_schema 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,22 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
22
+ spec/tmp/*
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 jconley
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,3 @@
1
+ csv validations:
2
+
3
+ correct line feeds (mac vs. unix)?
data/README.rdoc ADDED
@@ -0,0 +1,17 @@
1
+ = csv_schema
2
+
3
+ Description goes here.
4
+
5
+ == Note on Patches/Pull Requests
6
+
7
+ * Fork the project.
8
+ * Make your feature addition or bug fix.
9
+ * Add tests for it. This is important so I don't break it in a
10
+ future version unintentionally.
11
+ * Commit, do not mess with rakefile, version, or history.
12
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
13
+ * Send me a pull request. Bonus points for topic branches.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2010 jconley. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,46 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "csv_schema"
8
+ gem.summary = %Q{This gem validates the format of csv data for ETL tools}
9
+ gem.description = %Q{This validator is intended to be run on csv files that are updated and received on a regular basis. It allows you to specify required headers the following data restrictions: columns that must be unique, columns that must be restricted to certain values, or columns that can't allow nil values. This gem checks that the newly received file is consistent with the specified 'csv schema'. This frees you form having to manually check that the new file has not changed and reduces the possibility that an unnoticed change will cause any subsequent analyses to be incorrect. }
10
+ gem.email = ""
11
+ gem.homepage = "http://github.com/jconley88/csv_schema"
12
+ gem.authors = ["jconley"]
13
+ gem.add_development_dependency "rspec", "~> 1.3.1"
14
+ gem.add_dependency "fastercsv", "~> 1.5.3"
15
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
+ end
17
+ Jeweler::GemcutterTasks.new
18
+ rescue LoadError
19
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
20
+ end
21
+
22
+ require 'spec/rake/spectask'
23
+ Spec::Rake::SpecTask.new(:spec) do |spec|
24
+ spec.libs << 'lib' << 'spec'
25
+ spec.spec_files = FileList['spec/**/*_spec.rb']
26
+ end
27
+
28
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
29
+ spec.libs << 'lib' << 'spec'
30
+ spec.pattern = 'spec/**/*_spec.rb'
31
+ spec.rcov = true
32
+ end
33
+
34
+ task :spec => :check_dependencies
35
+
36
+ task :default => :spec
37
+
38
+ require 'rake/rdoctask'
39
+ Rake::RDocTask.new do |rdoc|
40
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
41
+
42
+ rdoc.rdoc_dir = 'rdoc'
43
+ rdoc.title = "csv_schema #{version}"
44
+ rdoc.rdoc_files.include('README*')
45
+ rdoc.rdoc_files.include('lib/**/*.rb')
46
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.0
@@ -0,0 +1,59 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{csv_schema}
8
+ s.version = "0.0.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["jconley"]
12
+ s.date = %q{2010-12-19}
13
+ s.description = %q{This validator is intended to be run on csv files that are updated and received on a regular basis. It allows you to specify required headers the following data restrictions: columns that must be unique, columns that must be restricted to certain values, or columns that can't allow nil values. This gem checks that the newly received file is consistent with the specified 'csv schema'. This frees you form having to manually check that the new file has not changed and reduces the possibility that an unnoticed change will cause any subsequent analyses to be incorrect. }
14
+ s.email = %q{}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README",
18
+ "README.rdoc"
19
+ ]
20
+ s.files = [
21
+ ".document",
22
+ ".gitignore",
23
+ "LICENSE",
24
+ "README",
25
+ "README.rdoc",
26
+ "Rakefile",
27
+ "VERSION",
28
+ "csv_schema.gemspec",
29
+ "lib/csv_schema.rb",
30
+ "spec/csv_schema_spec.rb",
31
+ "spec/spec_helper.rb"
32
+ ]
33
+ s.homepage = %q{http://github.com/jconley88/csv_schema}
34
+ s.rdoc_options = ["--charset=UTF-8"]
35
+ s.require_paths = ["lib"]
36
+ s.rubygems_version = %q{1.3.7}
37
+ s.summary = %q{This gem validates the format of csv data for ETL tools}
38
+ s.test_files = [
39
+ "spec/csv_schema_spec.rb",
40
+ "spec/spec_helper.rb"
41
+ ]
42
+
43
+ if s.respond_to? :specification_version then
44
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
45
+ s.specification_version = 3
46
+
47
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
48
+ s.add_development_dependency(%q<rspec>, ["~> 1.3.1"])
49
+ s.add_runtime_dependency(%q<fastercsv>, ["~> 1.5.3"])
50
+ else
51
+ s.add_dependency(%q<rspec>, ["~> 1.3.1"])
52
+ s.add_dependency(%q<fastercsv>, ["~> 1.5.3"])
53
+ end
54
+ else
55
+ s.add_dependency(%q<rspec>, ["~> 1.3.1"])
56
+ s.add_dependency(%q<fastercsv>, ["~> 1.5.3"])
57
+ end
58
+ end
59
+
data/lib/csv_schema.rb ADDED
@@ -0,0 +1,133 @@
1
+ require 'fastercsv'
2
+
3
+ class CSVSchema
4
+ def initialize(args)
5
+ @file = args[:file] || raise(":file argument is required")
6
+
7
+ @allow_duplicate_headers = (args[:allow_duplicate_headers] == nil ? false : args[:allow_duplicate_headers])
8
+ @headers_transform = args[:headers_transform]
9
+ @allow_blank_headers = (args[:allow_blank_headers] == nil ? false : args[:allow_blank_headers])
10
+ @required_headers = args[:required_headers]
11
+ @allow_blank_rows = (args[:allow_blank_rows] == nil ? false : args[:allow_blank_rows])
12
+ @allow_different_field_counts = (args[:allow_different_field_counts] == nil ? false : args[:allow_different_field_counts])
13
+
14
+ @unique_fields = {}
15
+ @cant_be_nil_fields = []
16
+ @restrict_value_fields = {}
17
+ field_requirements = args[:field_requirements] || []
18
+ field_requirements.each do |field, requirements|
19
+ @unique_fields[field] = [] if requirements[:unique]
20
+ @cant_be_nil_fields << field if requirements[:cant_be_nil]
21
+ @restrict_value_fields[field] = requirements[:restrict_values] if requirements[:restrict_values]
22
+ end
23
+ end
24
+
25
+ def validate()
26
+ raise "#{@file} cannot be found" unless File.exists?(@file)
27
+ @current_row = 1
28
+ FasterCSV.foreach(@file) do |row|
29
+ if @current_row == 1
30
+ @headers = row
31
+ index_field_requirements_by_column_number
32
+ validate_duplicate_headers(row, @headers_transform) unless @allow_duplicate_headers
33
+ validate_blank_headers(row) unless @allow_blank_headers
34
+ validate_required_headers(row) if @required_headers
35
+ else
36
+ validate_blank_rows(row) unless @allow_blank_rows
37
+ validate_restrict_value_fields(row) if !@restrict_value_fields.empty?
38
+ validate_cant_be_nil_fields(row) if !@cant_be_nil_fields.empty?
39
+ add_to_uniqueness_validator(row) if !@unique_fields.empty?
40
+ end
41
+ validate_different_field_counts(row) unless @allow_different_field_counts
42
+ @current_row += 1
43
+ end
44
+ validate_unique_fields
45
+ end
46
+
47
+ private
48
+ def index_field_requirements_by_column_number
49
+ headers_to_column_number = {}
50
+ @headers.each_with_index do |header, column_number|
51
+ headers_to_column_number[header] = column_number
52
+ end
53
+ @indexed_headers = headers_to_column_number.invert
54
+
55
+ cbn = []
56
+ @cant_be_nil_fields.each do |field|
57
+ cbn << headers_to_column_number[field]
58
+ end
59
+ @cant_be_nil_fields = cbn
60
+
61
+ rv = {}
62
+ @restrict_value_fields.each do |field, restrict_values|
63
+ rv[headers_to_column_number[field]] = restrict_values
64
+ end
65
+ @restrict_value_fields = rv
66
+
67
+ u = {}
68
+ @unique_fields.each do |field, unique|
69
+ u[headers_to_column_number[field]] = unique
70
+ end
71
+ @unique_fields = u
72
+ end
73
+
74
+ def validate_duplicate_headers(row, transform = nil)
75
+ headers = transform ? row.map{ |header| transform.call(header)} : row
76
+ dups = headers.inject(Hash.new(0)) { |h, v| h[v] += 1; h }.reject { |k, v| v==1 }.keys #http://snippets.dzone.com/posts/show/3838
77
+ if dups.length > 0
78
+ raise "Duplicate headers exist: #{dups.inspect}"
79
+ end
80
+ end
81
+
82
+ def validate_blank_headers(row)
83
+ raise "There are illegal blank headers. If this is allowed, set :allow_blank_headers => true" if row.any?{|h| (h == '') || (h == nil)}
84
+ end
85
+
86
+ def validate_required_headers(row)
87
+ missing = @required_headers - row
88
+ raise "#{File.basename(@file)} is missing headers: #{missing.inspect}" unless missing.empty?
89
+ end
90
+
91
+ def validate_blank_rows(row)
92
+ raise "Row #{@current_row} in #{File.basename(@file)} is blank" if row.all?{|f| (f == nil) || (f == '')}
93
+ end
94
+
95
+ def validate_different_field_counts(row)
96
+ @field_count ||= row.count
97
+ raise "Row #{@current_row} has a different number of fields than all the rows preceding it" if @field_count != row.count
98
+ end
99
+
100
+ def validate_restrict_value_fields(row)
101
+ @restrict_value_fields.each do |col_num, allowed_values|
102
+ raise "The '#{header_name(col_num)}' column contains an illegal value: '#{row[col_num]}' in row #{@current_row}" unless allowed_values.include?(row[col_num])
103
+ end
104
+ end
105
+
106
+ def validate_cant_be_nil_fields(row)
107
+ @cant_be_nil_fields.each do |col_num|
108
+ if row[col_num] == nil
109
+ msg = "The '#{header_name(col_num)}' column contains an illegal nil value in row #{@current_row}"
110
+ raise msg
111
+ end
112
+ end
113
+ end
114
+
115
+ def add_to_uniqueness_validator(row)
116
+ @unique_fields.keys.each do |col_num|
117
+ @unique_fields[col_num] << row[col_num]
118
+ end
119
+ end
120
+
121
+ def validate_unique_fields
122
+ @unique_fields.each do |col_num, values|
123
+ dups = values.inject(Hash.new(0)) { |h, v| h[v] += 1; h }.reject { |k, v| v==1 }.keys #http://snippets.dzone.com/posts/show/3838
124
+ if dups.length > 0
125
+ raise "The '#{header_name(col_num)}' column contains illegal duplicate values: #{dups.inspect}"
126
+ end
127
+ end
128
+ end
129
+
130
+ def header_name(col_num)
131
+ @indexed_headers[col_num]
132
+ end
133
+ end
@@ -0,0 +1,223 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+ require 'tempfile'
3
+ require 'fileutils'
4
+ include FileUtils
5
+
6
+ describe CSVSchema do
7
+ before :each do
8
+ @lenient_options = {:allow_blank_headers => true,
9
+ :allow_duplicate_headers => true,
10
+ :allow_blank_rows => true,
11
+ :allow_different_field_counts => true}
12
+ end
13
+
14
+ describe "initialize" do
15
+ it "should raise if the FILE argument is not supplied" do
16
+ lambda{CSVSchema.new()}.should raise_error
17
+ end
18
+
19
+ it "the ALLOW_DUPLICATE_HEADERS flag should default to false when not set" do
20
+ csv_schema = CSVSchema.new(:file => '')
21
+ csv_schema.instance_variable_get('@allow_duplicate_headers').should be_false
22
+ end
23
+
24
+ it "the ALLOW_BLANK_HEADERS flag should default to false when not set" do
25
+ csv_schema = CSVSchema.new(:file => '')
26
+ csv_schema.instance_variable_get('@allow_blank_headers').should be_false
27
+ end
28
+
29
+ it "the ALLOW_DIFFERENT_FIELDS flag should default to false when not set" do
30
+ csv_schema = CSVSchema.new(:file => '')
31
+ csv_schema.instance_variable_get('@allow_different_fields').should be_false
32
+ end
33
+
34
+ it "the ALLOW_BLANK_ROWS flag should default to false when not set" do
35
+ csv_schema = CSVSchema.new(:file => '')
36
+ csv_schema.instance_variable_get('@allow_blank_rows').should be_false
37
+ end
38
+ end
39
+
40
+ describe ".validate" do
41
+ before :each do
42
+ @headers = ['header_1', 'header_2', 'header_3']
43
+ @rows = [
44
+ ['value_1', 'value_2', 'value_3']
45
+ ]
46
+ end
47
+
48
+ it "should raise if an invalid FILE argument was supplied" do
49
+ lambda { CSVSchema.new(@lenient_options.merge(:file => '')).validate}.should raise_error
50
+ end
51
+
52
+ describe "duplicate headers" do
53
+ it "should raise when duplicate headers exist and the ALLOW_DUPLICATE_HEADERS flag is FALSE" do
54
+ @headers = ['header_1', 'header_1']
55
+ lambda{CSVSchema.new(@lenient_options.merge(:file => generate_csv_file.path, :allow_duplicate_headers => false)).validate}.should raise_error(StandardError, /header_1/)
56
+ end
57
+
58
+ it "should raise when duplicate headers exist according to the HEADERS_TRANSFORM proc and the ALLOW_DUPLICATE_HEADERS flag is FALSE" do
59
+ @headers = ['header_1', 'HEADER 1']
60
+ transform = FasterCSV::HeaderConverters[:symbol]
61
+ lambda{CSVSchema.new(@lenient_options.merge(:file => generate_csv_file.path, :allow_duplicate_headers => false, :headers_transform => transform)).validate}.should raise_error
62
+ end
63
+
64
+ it "should NOT raise when NO duplicate headers exist and the ALLOW_DUPLICATE_HEADERS flag is FALSE" do
65
+ lambda { CSVSchema.new(@lenient_options.merge(:file => generate_csv_file.path, :allow_duplicate_headers => false)).validate}.should_not raise_error
66
+ end
67
+
68
+ it "should NOT raise when duplicate headers exist and the ALLOW_DUPLICATE_HEADERS flag is TRUE" do
69
+ @headers = ['header_1', 'header_1']
70
+ lambda { CSVSchema.new(@lenient_options.merge(:file => generate_csv_file.path, :allow_duplicate_headers => true)).validate}.should_not raise_error
71
+ end
72
+ end
73
+
74
+ describe "blank_headers" do
75
+ it "should NOT raise when there are NO blank headers and the ALLOW_BLANK_HEADERS flag is true" do
76
+ lambda {CSVSchema.new(@lenient_options.merge(:file => generate_csv_file.path, :allow_blank_headers => true)).validate}.should_not raise_error
77
+ end
78
+
79
+ it "should NOT raise when there are blank headers and the ALLOW_BLANK_HEADERS flag is true" do
80
+ @headers = ['header_1', '', 'header_2']
81
+ lambda {CSVSchema.new(@lenient_options.merge(:file => generate_csv_file.path, :allow_blank_headers => true)).validate}.should_not raise_error
82
+ end
83
+
84
+ it "should NOT raise when there are NO blank headers and the ALLOW_BLANK_HEADERS flag is false" do
85
+ lambda {CSVSchema.new(@lenient_options.merge(:file => generate_csv_file.path, :allow_blank_headers => false)).validate}.should_not raise_error
86
+ end
87
+
88
+ it "should raise when there are blank headers and the ALLOW_BLANK_HEADERS flag is false" do
89
+ @headers = ['header_1', '', 'header_2']
90
+ lambda {CSVSchema.new(@lenient_options.merge(:file => generate_csv_file.path, :allow_blank_headers => false)).validate}.should raise_error
91
+ end
92
+ end
93
+
94
+ describe "required_headers" do
95
+ it "should NOT raise when all REQUIRED HEADERS exist" do
96
+ lambda { CSVSchema.new(@lenient_options.merge(:file => generate_csv_file.path, :required_headers => ['header_1'])).validate }.should_not raise_error
97
+ end
98
+
99
+ it "should raise when any REQUIRED HEADERS do NOT exist" do
100
+ missing_header = 'doesnt_exist'
101
+ lambda { CSVSchema.new(@lenient_options.merge(:file => generate_csv_file.path, :required_headers => [missing_header])).validate }.should raise_error(StandardError, /#{missing_header}/)
102
+ end
103
+ end
104
+
105
+ describe "allow_blank_rows" do
106
+ it "should NOT raise when there are NO blank rows and the ALLOW_BLANK_ROWS flag is true" do
107
+ lambda { CSVSchema.new(@lenient_options.merge(:file => generate_csv_file.path, :allow_blank_rows => true)).validate }.should_not raise_error
108
+ end
109
+
110
+ it "should NOT raise when there are blank rows and the ALLOW_BLANK_ROWS flag is true" do
111
+ @rows << ['', '', '']
112
+ lambda { CSVSchema.new(@lenient_options.merge(:file => generate_csv_file.path, :allow_blank_rows => true)).validate }.should_not raise_error
113
+ end
114
+
115
+ it "should NOT raise when there are NO blank rows and the ALLOW_BLANK_ROWS flag is false" do
116
+ lambda { CSVSchema.new(@lenient_options.merge(:file => generate_csv_file.path, :allow_blank_rows => false)).validate }.should_not raise_error
117
+ end
118
+
119
+ it "should raise when there are blank rows and the ALLOW_BLANK_ROWS flag is false" do
120
+ @rows << ['', '', '']
121
+ lambda { CSVSchema.new(@lenient_options.merge(:file => generate_csv_file.path, :allow_blank_rows => false)).validate }.should raise_error(StandardError, /3/)
122
+ end
123
+ end
124
+
125
+ describe "allow_different_field_counts" do
126
+ it "should NOT raise when all field counts are the same and the ALLOW_DIFFERENT_FIELD_COUNTS flag is true" do
127
+ lambda { CSVSchema.new(@lenient_options.merge(:file => generate_csv_file.path, :allow_different_field_counts => true)).validate }.should_not raise_error
128
+ end
129
+
130
+ it "should NOT raise when the field count in a header row differs from a data row and the ALLOW_DIFFERENT_FIELD_COUNTS flag is true" do
131
+ @headers << 'header_4'
132
+ lambda { CSVSchema.new(@lenient_options.merge(:file => generate_csv_file.path, :allow_different_field_counts => true)).validate }.should_not raise_error
133
+ end
134
+
135
+ it "should NOT raise when the field counts in two data rows differ and the ALLOW_DIFFERENT_FIELD_COUNTS flag is true" do
136
+ @rows << (@rows.first << 'value_4')
137
+ lambda { CSVSchema.new(@lenient_options.merge(:file => generate_csv_file.path, :allow_different_field_counts => true)).validate }.should_not raise_error
138
+ end
139
+
140
+ it "should NOT raise when all field counts are the same and the ALLOW_DIFFERENT_FIELD_COUNTS flag is false" do
141
+ lambda { CSVSchema.new(@lenient_options.merge(:file => generate_csv_file.path, :allow_different_field_counts => false)).validate }.should_not raise_error
142
+ end
143
+
144
+ it "should raise when the field count in a header row differs from a data row and the ALLOW_DIFFERENT_FIELD_COUNTS flag is false" do
145
+ @headers << 'header_4'
146
+ lambda { CSVSchema.new(@lenient_options.merge(:file => generate_csv_file.path, :allow_different_field_counts => false)).validate }.should raise_error(StandardError, /2/)
147
+ end
148
+
149
+ it "should raise when the field counts in two data rows differ and the ALLOW_DIFFERENT_FIELD_COUNTS flag is false" do
150
+ @rows << (@rows.first.dup << 'value_4')
151
+ lambda { CSVSchema.new(@lenient_options.merge(:file => generate_csv_file.path, :allow_different_field_counts => false)).validate }.should raise_error(StandardError, /3/)
152
+ end
153
+ end
154
+
155
+ describe "field_requirements" do
156
+ describe "restrict_values" do
157
+ it "should NOT raise if all field values appear in the RESTRICT_VALUES array for the specified field" do
158
+ options = {:file => generate_csv_file.path, :field_requirements => {'header_1' => {:restrict_values => ['value_1']}}}
159
+ lambda { CSVSchema.new(@lenient_options.merge(options)).validate }.should_not raise_error
160
+ end
161
+
162
+ it "should raise if any field values do NOT appear in the RESTRICT_VALUES array for the specified field" do
163
+ options = {:file => generate_csv_file.path, :field_requirements => {'header_1' => {:restrict_values => []}}}
164
+ lambda { CSVSchema.new(@lenient_options.merge(options)).validate }.should raise_error(StandardError, /header_1.*value_1.*2/)
165
+ end
166
+ end
167
+
168
+ describe "cant_be_nil" do
169
+ it "should NOT raise if all values for the specified field are populated" do
170
+ options = {:file => generate_csv_file.path, :field_requirements => {'header_1' => {:cant_be_nil => true}}}
171
+ lambda { CSVSchema.new(@lenient_options.merge(options)).validate }.should_not raise_error
172
+ end
173
+
174
+ it "should NOT raise if values in fields other than the specified one have nil values" do
175
+ @rows << ['value_1', nil, '']
176
+ options = {:file => generate_csv_file.path, :field_requirements => {'header_1' => {:cant_be_nil => true}}}
177
+ lambda { CSVSchema.new(@lenient_options.merge(options)).validate }.should_not raise_error
178
+ end
179
+
180
+ it "should raise with field name and column number if a value in the specified field is nil" do
181
+ @rows << ['', 'value_2', 'value_3']
182
+ options = {:file => generate_csv_file.path, :field_requirements => {'header_1' => {:cant_be_nil => true}}}
183
+ lambda { CSVSchema.new(@lenient_options.merge(options)).validate }.should raise_error(StandardError, /header_1.*3/)
184
+ end
185
+ end
186
+
187
+ describe 'unique' do
188
+ it "should NOT raise if all of the values are unique" do
189
+ @rows << ['v1']
190
+ @rows << ['v2']
191
+ @rows << ['v3']
192
+ options = {:file => generate_csv_file.path, :field_requirements => {'header_1' => {:unique => true}}}
193
+ lambda { CSVSchema.new(@lenient_options.merge(options)).validate }.should_not raise_error
194
+ end
195
+
196
+ it "should not raise if other fields have non-unique values" do
197
+ @rows << ['v1','v1']
198
+ @rows << ['v2','v1']
199
+ @rows << ['v3','v1']
200
+ options = {:file => generate_csv_file.path, :field_requirements => {'header_1' => {:unique => true}}}
201
+ lambda { CSVSchema.new(@lenient_options.merge(options)).validate }.should_not raise_error
202
+ end
203
+
204
+ it "should raise if there are any duplicate values" do
205
+ @rows << ['v1']
206
+ @rows << ['v1']
207
+ @rows << ['v3']
208
+ options = {:file => generate_csv_file.path, :field_requirements => {'header_1' => {:unique => true}}}
209
+ lambda { CSVSchema.new(@lenient_options.merge(options)).validate }.should raise_error(StandardError, /header_1.*v1/)
210
+ end
211
+ end
212
+ end
213
+ end
214
+ end
215
+
216
+ def generate_csv_file()
217
+ csv_file = Tempfile.new('example_csv', File.expand_path('spec/tmp'))
218
+ csv_file << @headers.join(',') + "\n"
219
+ @rows.each do |row|
220
+ csv_file << row.join(',') + "\n"
221
+ end
222
+ csv_file.open
223
+ end
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'csv_schema'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,111 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: csv_schema
3
+ version: !ruby/object:Gem::Version
4
+ hash: 31
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 0
10
+ version: 0.0.0
11
+ platform: ruby
12
+ authors:
13
+ - jconley
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-12-19 00:00:00 -05:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: rspec
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ hash: 25
30
+ segments:
31
+ - 1
32
+ - 3
33
+ - 1
34
+ version: 1.3.1
35
+ type: :development
36
+ version_requirements: *id001
37
+ - !ruby/object:Gem::Dependency
38
+ name: fastercsv
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ hash: 5
46
+ segments:
47
+ - 1
48
+ - 5
49
+ - 3
50
+ version: 1.5.3
51
+ type: :runtime
52
+ version_requirements: *id002
53
+ description: "This validator is intended to be run on csv files that are updated and received on a regular basis. It allows you to specify required headers the following data restrictions: columns that must be unique, columns that must be restricted to certain values, or columns that can't allow nil values. This gem checks that the newly received file is consistent with the specified 'csv schema'. This frees you form having to manually check that the new file has not changed and reduces the possibility that an unnoticed change will cause any subsequent analyses to be incorrect. "
54
+ email: ""
55
+ executables: []
56
+
57
+ extensions: []
58
+
59
+ extra_rdoc_files:
60
+ - LICENSE
61
+ - README
62
+ - README.rdoc
63
+ files:
64
+ - .document
65
+ - .gitignore
66
+ - LICENSE
67
+ - README
68
+ - README.rdoc
69
+ - Rakefile
70
+ - VERSION
71
+ - csv_schema.gemspec
72
+ - lib/csv_schema.rb
73
+ - spec/csv_schema_spec.rb
74
+ - spec/spec_helper.rb
75
+ has_rdoc: true
76
+ homepage: http://github.com/jconley88/csv_schema
77
+ licenses: []
78
+
79
+ post_install_message:
80
+ rdoc_options:
81
+ - --charset=UTF-8
82
+ require_paths:
83
+ - lib
84
+ required_ruby_version: !ruby/object:Gem::Requirement
85
+ none: false
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ hash: 3
90
+ segments:
91
+ - 0
92
+ version: "0"
93
+ required_rubygems_version: !ruby/object:Gem::Requirement
94
+ none: false
95
+ requirements:
96
+ - - ">="
97
+ - !ruby/object:Gem::Version
98
+ hash: 3
99
+ segments:
100
+ - 0
101
+ version: "0"
102
+ requirements: []
103
+
104
+ rubyforge_project:
105
+ rubygems_version: 1.3.7
106
+ signing_key:
107
+ specification_version: 3
108
+ summary: This gem validates the format of csv data for ETL tools
109
+ test_files:
110
+ - spec/csv_schema_spec.rb
111
+ - spec/spec_helper.rb