csv-check 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in csv-check.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,45 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ csv-check (0.0.1)
5
+ fastercsv
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ aruba (0.4.11)
11
+ childprocess (>= 0.2.3)
12
+ cucumber (>= 1.1.1)
13
+ ffi (>= 1.0.11)
14
+ rspec (>= 2.7.0)
15
+ builder (3.1.3)
16
+ childprocess (0.3.5)
17
+ ffi (~> 1.0, >= 1.0.6)
18
+ cucumber (1.2.1)
19
+ builder (>= 2.1.2)
20
+ diff-lcs (>= 1.1.3)
21
+ gherkin (~> 2.11.0)
22
+ json (>= 1.4.6)
23
+ diff-lcs (1.1.3)
24
+ fastercsv (1.5.5)
25
+ ffi (1.1.5)
26
+ gherkin (2.11.2)
27
+ json (>= 1.4.6)
28
+ json (1.7.5)
29
+ rspec (2.11.0)
30
+ rspec-core (~> 2.11.0)
31
+ rspec-expectations (~> 2.11.0)
32
+ rspec-mocks (~> 2.11.0)
33
+ rspec-core (2.11.1)
34
+ rspec-expectations (2.11.3)
35
+ diff-lcs (~> 1.1.3)
36
+ rspec-mocks (2.11.3)
37
+
38
+ PLATFORMS
39
+ ruby
40
+
41
+ DEPENDENCIES
42
+ aruba
43
+ csv-check!
44
+ cucumber
45
+ rspec (~> 2.6)
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 rory
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # CsvCheck
2
+
3
+ Format checker for CSV files
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'csv-check'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install csv-check
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/bin/csv-check ADDED
@@ -0,0 +1,89 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "rubygems"
4
+ require 'optparse'
5
+ require 'ostruct'
6
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib', 'csvchecker.rb')
7
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib', 'typemapper.rb')
8
+
9
+ module CsvCheckRunner
10
+
11
+ # command-line parsing
12
+ COMMAND = File.basename($0)
13
+ USAGE = "Usage: #{COMMAND} [INPUT] [OPTIONS]"
14
+
15
+ options = OpenStruct.new
16
+ options.output = "-"
17
+ options.separator = ","
18
+ options.pretty = false
19
+ options.headers = nil
20
+ options.skip_first_row = false
21
+
22
+ opts = OptionParser.new do |o|
23
+ o.banner = USAGE
24
+ o.separator ""
25
+ o.separator "Specific options:"
26
+
27
+ o.on("-k", "--skip-headers-from-file", "Ignore the headers (first line) in the file; use with --headers to set your own") do |headers|
28
+ options.skip_first_row = true
29
+ end
30
+
31
+ o.on("-s", "--separator SEP", "Set separator character surrounded by single quotes (default is ',')") do |sep|
32
+ options.separator = sep
33
+ end
34
+
35
+ o.on("-m", "--map \"<type map>\"", "Set separator character surrounded by single quotes (default is ',')") do |sep|
36
+ options.mappings = sep
37
+ end
38
+
39
+ o.on("-o", "--output FILE", "Write output to a file") do |fn|
40
+ options.output = fn
41
+ end
42
+
43
+ o.on_tail("-h", "--help", "Show this message") do
44
+ puts o
45
+ exit
46
+ end
47
+
48
+ o.on_tail("-v", "--version", "Show version") do
49
+ puts CsvCheck::VERSION
50
+ exit
51
+ end
52
+ end
53
+
54
+ begin
55
+ opts.parse!(ARGV)
56
+ rescue
57
+ raise "Unable to parse options: #{$!}"
58
+ end
59
+
60
+ # initialize output handle
61
+ if options.output == "-"
62
+ OUT = $stdout.clone
63
+ else
64
+ OUT = File.open(options.output, "w")
65
+ end
66
+
67
+ if ARGV.size > 0
68
+ begin
69
+ IN = File.open(ARGV[0], "r")
70
+ rescue
71
+ print "Unable to open input file #{ARGV[0]}\n"
72
+ exit 1
73
+ end
74
+ else
75
+ IN = StringIO.new($stdin.read)
76
+ end
77
+
78
+ if options.mappings then
79
+ typeMappings = TypeMapper.new.to_map(options.mappings)
80
+ else
81
+ typeMappingd = {}
82
+ end
83
+
84
+ # run the command
85
+ CsvChecker.check(IN, OUT, typeMappings, {:col_sep => options.separator}, options.skip_first_row)
86
+
87
+ # leave in peace
88
+ OUT.flush
89
+ end
data/csv-check.gemspec ADDED
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'csv-check/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "csv-check"
8
+ gem.version = CsvCheck::VERSION
9
+ gem.authors = ["rory"]
10
+ gem.email = ["rory@deadcrow.net"]
11
+ gem.description = %q{Lets you verify that cells in a CSV files match formats you expect}
12
+ gem.summary = %q{Allows you to specify formats (integer, float, string, date (with checking aginst date format strings) for columns in CSV data, both std in and files)}
13
+ gem.homepage = "http://github.com/rorygibson/csv-check"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_dependency "fastercsv", ">= 0"
21
+
22
+ gem.add_development_dependency "rspec", "~> 2.6"
23
+ gem.add_development_dependency "cucumber"
24
+ gem.add_development_dependency "aruba"
25
+ end
@@ -0,0 +1,47 @@
1
+ @any
2
+ Feature: Checking that a column can contain any type of value
3
+
4
+ Scenario: Any can substiture for integer
5
+ Given a file named "test.csv" with:
6
+ """
7
+ a,b,c,d
8
+ 1,2,3,4
9
+ """
10
+ When I run `csv-check -k --map "0:any" test.csv`
11
+ Then the output should contain "Found 0 errors"
12
+ And the exit status should be 0
13
+
14
+
15
+ Scenario: Any can substiture for float
16
+ Given a file named "test.csv" with:
17
+ """
18
+ a,b,c,d
19
+ 1.0, 2, 3, 4
20
+ """
21
+ When I run `csv-check -k --map "0:any" test.csv`
22
+ Then the output should contain "Found 0 errors"
23
+ And the exit status should be 0
24
+
25
+
26
+ Scenario: Any can substiture for string
27
+ Given a file named "test.csv" with:
28
+ """
29
+ a,b,c,d
30
+ x, 2, 3, 4
31
+ """
32
+ When I run `csv-check -k --map "0:any" test.csv`
33
+ Then the output should contain "Found 0 errors"
34
+ And the exit status should be 0
35
+
36
+
37
+ Scenario: Any doesn't substitute for nil
38
+ Given a file named "test.csv" with:
39
+ """
40
+ a,b,c,d
41
+ , 2, 3, 4
42
+ """
43
+ When I run `csv-check -k --map "0:any" test.csv`
44
+ Then the output should contain "Found 1 errors"
45
+ And the exit status should be 1
46
+
47
+
@@ -0,0 +1,22 @@
1
+ Feature: Skip first row
2
+ In order to work with files with or without headers
3
+ As a CLI
4
+ I want to be able to skip the first row of input
5
+
6
+ Scenario: Skip first row
7
+ Given a file named "test.csv" with:
8
+ """
9
+ a,b,c,d
10
+ 1,2,3,4
11
+ """
12
+ When I run `csv-check -k -m "0:integer" test.csv`
13
+ Then the output should contain "Total number of lines checked: 1"
14
+
15
+ Scenario: Don't skip first row
16
+ Given a file named "test.csv" with:
17
+ """
18
+ a,b,c,d
19
+ 1,2,3,4
20
+ """
21
+ When I run `csv-check -m "0:integer" test.csv`
22
+ Then the output should contain "Total number of lines checked: 2"
@@ -0,0 +1,46 @@
1
+ @date
2
+ Feature: Checking that a column can contain only date values
3
+
4
+ Scenario: Check for presence of date in a column - successful - one row
5
+ Given a file named "test.csv" with:
6
+ """
7
+ a,b,c,d
8
+ 01/01/1980,2,3,4
9
+ """
10
+ When I run `csv-check -k --map "0:date" test.csv`
11
+ Then the output should contain "Found 0 errors"
12
+ And the exit status should be 0
13
+
14
+
15
+ Scenario: Specifying date format - success
16
+ Given a file named "test.csv" with:
17
+ """
18
+ a,b,c,d
19
+ 01/01/1980 12:50,2,3,4
20
+ """
21
+ When I run `csv-check -k --map "0:date('%d/%m/%Y %H:%M')" test.csv`
22
+ Then the output should contain "Found 0 errors"
23
+ And the exit status should be 0
24
+
25
+
26
+ Scenario: Specifying date format - failure
27
+ Given a file named "test.csv" with:
28
+ """
29
+ a,b,c,d
30
+ 01/01/1980 12:50,2,3,4
31
+ """
32
+ When I run `csv-check -k --map "0:date('%d/%m/%Y')" test.csv`
33
+ Then the output should contain "Found 1 errors"
34
+ And the exit status should be 1
35
+
36
+
37
+ Scenario: Check for presence of date in a column - failure - one row
38
+ Given a file named "test.csv" with:
39
+ """
40
+ a,b,c,d
41
+ 1,2,x,4
42
+ """
43
+ When I run `csv-check -k --map "2:date" test.csv`
44
+ Then the output should contain "Found 1 errors"
45
+ And the exit status should be 1
46
+
@@ -0,0 +1,34 @@
1
+ @float
2
+ Feature: Checking that a column can contain only float values
3
+
4
+ Scenario: Check for presence of float in a column - successful - one row
5
+ Given a file named "test.csv" with:
6
+ """
7
+ a,b,c,d
8
+ 1.0, 2, 3, 4
9
+ """
10
+ When I run `csv-check -k --map "0:float" test.csv`
11
+ Then the output should contain "Found 0 errors"
12
+ And the exit status should be 0
13
+
14
+ Scenario: Check for presence of float in a column - failure - one row
15
+ Given a file named "test.csv" with:
16
+ """
17
+ a,b,c,d
18
+ 1.0, 2.0, x, 4.0
19
+ """
20
+ When I run `csv-check -k --map "2:float" test.csv`
21
+ Then the output should contain "Found 1 errors"
22
+ And the exit status should be 1
23
+
24
+ Scenario: Check for presence of float in a column - multiple rows and mapping elements
25
+ Given a file named "test.csv" with:
26
+ """
27
+ a,b,c,d
28
+ 1.0, 2.0, 3.0, 4.0
29
+ 5.0, 6.0, 7.0, 8.0
30
+ 4.0, 3.0, 2.0, 1.0
31
+ """
32
+ When I run `csv-check -k --map "0:float,1:float,2:float,3:float" test.csv`
33
+ Then the output should contain "Found 0 errors"
34
+ And the exit status should be 0
@@ -0,0 +1,54 @@
1
+ @integer
2
+ Feature: Checking that a column can contain only integer values
3
+
4
+ Scenario: Check for presence of integer in a column - successful - one row
5
+ Given a file named "test.csv" with:
6
+ """
7
+ a,b,c,d
8
+ 1,2,3,4
9
+ """
10
+ When I run `csv-check -k --map "0:integer" test.csv`
11
+ Then the output should contain "Found 0 errors"
12
+ And the exit status should be 0
13
+
14
+ Scenario: Check for presence of integer in a column - failure - one row
15
+ Given a file named "test.csv" with:
16
+ """
17
+ a,b,c,d
18
+ 1,2,x,4
19
+ """
20
+ When I run `csv-check -k --map "2:integer" test.csv`
21
+ Then the output should contain "Found 1 errors"
22
+ And the exit status should be 1
23
+
24
+ Scenario: Quoted integers are allowed
25
+ Given a file named "test.csv" with:
26
+ """
27
+ a,b,c,d
28
+ "1000","2000","1000","4000"
29
+ """
30
+ When I run `csv-check -k --map "2:integer" test.csv`
31
+ Then the output should contain "Found 0 errors"
32
+ And the exit status should be 0
33
+
34
+ Scenario: Comma-separated numbers aren't allowed
35
+ Given a file named "test.csv" with:
36
+ """
37
+ a,b,c,d
38
+ "1000","2000","1,000","4000"
39
+ """
40
+ When I run `csv-check -k --map "2:integer" test.csv`
41
+ Then the output should contain "Found 1 errors"
42
+ And the exit status should be 1
43
+
44
+ Scenario: Check for presence of integer in a column - multiple rows and mapping elements
45
+ Given a file named "test.csv" with:
46
+ """
47
+ a,b,c,d
48
+ 1,2,3,4
49
+ 5,6,7,8
50
+ 4,3,2,1
51
+ """
52
+ When I run `csv-check -k --map "0:integer,1:integer,2:integer,3:integer" test.csv`
53
+ Then the output should contain "Found 0 errors"
54
+ And the exit status should be 0
@@ -0,0 +1,13 @@
1
+ Feature: Parsing is tolerant of spaces in cells and at the end of lines
2
+
3
+ Scenario: End of line has spaces
4
+ Given a file named "test.csv" with:
5
+ """
6
+ a,b,c,d
7
+ 1,2,3,4
8
+ 5,6,7,8
9
+ 4,3,2,1
10
+ """
11
+ When I run `csv-check -k --map "0:integer,1:integer,2:integer,3:integer" test.csv`
12
+ Then the output should contain "Found 0 errors"
13
+ And the exit status should be 0
@@ -0,0 +1,42 @@
1
+ @real
2
+ Feature: Test against a real example
3
+
4
+ Scenario: Test against a real example, row is OK
5
+ Given a file named "test.csv" with:
6
+ """
7
+ 1359515,7102,129,11/09/2012 00:00,IP12 7QN,X21356,RM,16 Long Road,2000,200000,7831060
8
+ 1359524,7158,129,11/09/2012 00:00,E14 3NU,X22366,SS,57 Wide Road,34000,0,734820
9
+ 1359534,8073,136,11/09/2012 00:00,S11 3XX,X22373,DP,27 Sally Road,8000,85000,559974/1
10
+ 1359590,7073,240,11/09/2012 00:00,RM5 5UB,X22419,IL,9 Green Green,130000,144000,A6229012190-1
11
+ 1359647,6937,353,11/09/2012 00:00,GU4 0SN,X22474,S2,50 Owls'n'Blades Road,250000,250000,9455066180 040305
12
+ 1359670,20,301,11/09/2012 00:00,DN17 5AD,X22898,FA,19 Olive Oyl,200000,200000,70727669
13
+ 1359682,3228,6,11/09/2012 00:00,AB1 8FD,X22811,RM,7 Middleearth Crescent,225000,500000,9453390 036541705
14
+ """
15
+ When I run `csv-check -k --map "0:integer,1:integer,2:integer,3:string,4:string,5:string,6:string,7:string,8:integer,9:integer,10:any" test.csv`
16
+ Then the output should contain "Found 0 errors"
17
+ And the exit status should be 0
18
+
19
+
20
+ Scenario: Test against a real example, row has errors
21
+ Given a file named "test.csv" with:
22
+ """
23
+ 6856,Mr,"Christie, Stephen",1985,0072613
24
+ 6857,Ms,"Parkes, Sirah",2001,0854757
25
+ 6859,Mr,"Dyce, John",Barr Brady,0096518
26
+ """
27
+ When I run `csv-check -k --map "0:integer,1:string,2:string,3:integer,4:integer" test.csv`
28
+ Then the output should contain "Found 1 errors"
29
+ And the exit status should be 1
30
+
31
+
32
+ Scenario: Check for presence of string in a column - multiple rows and mapping elements
33
+ Given a file named "test.csv" with:
34
+ """
35
+ a,b,c,d
36
+ e,f,g,h
37
+ i,j,k,l
38
+ m,n,o,p
39
+ """
40
+ When I run `csv-check -k --map "0:string,1:string,2:string,3:string" test.csv`
41
+ Then the output should contain "Found 0 errors"
42
+ And the exit status should be 0
@@ -0,0 +1,36 @@
1
+ @string
2
+ Feature: Checking that a column can contain only string values
3
+
4
+ Scenario: Check for presence of string in a column - successful - one row
5
+ Given a file named "test.csv" with:
6
+ """
7
+ a,b,c,d
8
+ w,x,y,z
9
+ """
10
+ When I run `csv-check -k --map "0:string" test.csv`
11
+ Then the output should contain "Found 0 errors"
12
+ And the exit status should be 0
13
+
14
+
15
+ Scenario: Check for presence of string in a column - failure - one row
16
+ Given a file named "test.csv" with:
17
+ """
18
+ a,b,c,d
19
+ w,x,1,z
20
+ """
21
+ When I run `csv-check -k --map "2:string" test.csv`
22
+ Then the output should contain "Found 1 errors"
23
+ And the exit status should be 1
24
+
25
+
26
+ Scenario: Check for presence of string in a column - multiple rows and mapping elements
27
+ Given a file named "test.csv" with:
28
+ """
29
+ a,b,c,d
30
+ e,f,g,h
31
+ i,j,k,l
32
+ m,n,o,p
33
+ """
34
+ When I run `csv-check -k --map "0:string,1:string,2:string,3:string" test.csv`
35
+ Then the output should contain "Found 0 errors"
36
+ And the exit status should be 0
@@ -0,0 +1 @@
1
+ require 'aruba/cucumber'
@@ -0,0 +1,24 @@
1
+ @integer
2
+ Feature: Should work the same regardless of number of rows
3
+
4
+ Scenario: Works for header + one row
5
+ Given a file named "test.csv" with:
6
+ """
7
+ a,b,c,d
8
+ 1,2,3,4
9
+ """
10
+ When I run `csv-check -k --map "0:integer" test.csv`
11
+ Then the output should contain "Found 0 errors"
12
+ And the exit status should be 0
13
+
14
+ Scenario: Works for header + many rows
15
+ Given a file named "test.csv" with:
16
+ """
17
+ a,b,c,d
18
+ 1,2,3,4
19
+ 5,6,7,8
20
+ 4,3,2,1
21
+ """
22
+ When I run `csv-check -k --map "0:integer,1:integer,2:integer,3:integer" test.csv`
23
+ Then the output should contain "Found 0 errors"
24
+ And the exit status should be 0
@@ -0,0 +1,3 @@
1
+ module CsvCheck
2
+ VERSION = "0.0.1"
3
+ end
data/lib/csvchecker.rb ADDED
@@ -0,0 +1,111 @@
1
+ require 'rubygems'
2
+ require 'fastercsv'
3
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib', 'typechecker.rb')
4
+
5
+ module CsvChecker
6
+
7
+ DEFAULT_DATE_FORMAT = "%d/%m/%Y"
8
+
9
+ def check input, output, mappings={}, csv_options={}, skip_first=false
10
+ line_no = 1
11
+ lines_scanned = 0
12
+ num_errors = 0
13
+
14
+ i = 0
15
+ FasterCSV.new(input, csv_options).each do |row|
16
+ if skip_first then
17
+ line_no = line_no + 1
18
+ skip_first = false
19
+ next
20
+ end
21
+
22
+ lines_scanned = lines_scanned + 1
23
+
24
+ num_errors = num_errors + check_row(i, row, mappings) unless row.empty?
25
+ i = i + 1
26
+ end
27
+
28
+ print "Total number of lines checked: #{lines_scanned}\n"
29
+ print "Found #{num_errors} errors"
30
+ print "\n"
31
+
32
+ if num_errors > 0 then
33
+ exit 1
34
+ else
35
+ exit 0
36
+ end
37
+ end
38
+
39
+
40
+ def check_row row_num, row, mappings
41
+ raise "Nil mappings" if mappings.nil?
42
+ raise "Nil row" if row.nil?
43
+
44
+ errors = 0
45
+ i = 0
46
+ row.each { |item|
47
+ type = mappings[i.to_s]
48
+
49
+ if type then
50
+ puts "Checking [#{item}] against [#{type}]\n"
51
+ valid = is_valid item, type
52
+ if !valid then
53
+ print "Error at row #{row_num} column #{i}\n"
54
+ errors = errors + 1
55
+ end
56
+ end
57
+
58
+ i = i + 1
59
+ }
60
+
61
+ return errors
62
+ end
63
+
64
+
65
+ def is_valid cell, type
66
+ raise 'Nil type' unless type
67
+ raise 'Empty type' unless type.size > 0
68
+
69
+ type_selector = type.downcase
70
+ type_selector = "date" if type.match /^date/
71
+
72
+ case type_selector
73
+ when 'integer'
74
+ return TypeChecker.new.is_integer?(cell)
75
+
76
+ when 'float'
77
+ return TypeChecker.new.is_float?(cell)
78
+
79
+ when 'string'
80
+ return TypeChecker.new.is_string?(cell)
81
+
82
+ when 'date'
83
+ format = dateFormatFrom(type)
84
+ return TypeChecker.new.is_date?(cell, format)
85
+
86
+ when 'any'
87
+ return TypeChecker.new.is_any?(cell)
88
+
89
+ else
90
+ raise "Unrecognised column type [#{type_selector}]"
91
+ end
92
+ end
93
+
94
+ def dateFormatFrom str
95
+ return DEFAULT_DATE_FORMAT if str == "date"
96
+
97
+ puts "[#{str}]\n"
98
+
99
+ if str.match /^date.*/ then
100
+ type = str[ /'(.*)'/ , 1 ]
101
+ return type
102
+ end
103
+
104
+ return DEFAULT_DATE_FORMAT
105
+ end
106
+
107
+ module_function :check
108
+ module_function :is_valid
109
+ module_function :check_row
110
+ module_function :dateFormatFrom
111
+ end
@@ -0,0 +1,55 @@
1
+ class TypeChecker
2
+
3
+ def is_integer?(str)
4
+ str = str.to_s
5
+ str.strip!
6
+
7
+ match_pattern = /^[-+]?[0-9][0-9]*?$/
8
+ return !str.match(match_pattern).nil?
9
+ end
10
+
11
+ def is_float?(str)
12
+ str = str.to_s
13
+ str.strip!
14
+
15
+ match_pattern = /^[-+]?[0-9]*\.[0-9]+?$/
16
+ return !str.match(match_pattern).nil?
17
+ end
18
+
19
+ def is_date?(data, format)
20
+ parsed = nil
21
+ begin
22
+ parsed = DateTime.parse(data)
23
+ rescue
24
+ end
25
+ output = parsed.strftime(format) if parsed
26
+ matches = (output == data)
27
+
28
+ return matches
29
+ end
30
+
31
+ def is_space?(thing)
32
+ return thing.to_s == " "
33
+ end
34
+
35
+ def is_nil?(thing)
36
+ return thing.nil?
37
+ end
38
+
39
+ def is_string?(thing)
40
+ return false if is_nil?(thing)
41
+ return false if is_integer?(thing)
42
+ return false if is_float?(thing)
43
+ return true if is_space?(thing)
44
+ true if thing.is_a? String
45
+ end
46
+
47
+ def is_any?(thing)
48
+ return false if is_nil?(thing)
49
+ return true if is_integer?(thing)
50
+ return true if is_float?(thing)
51
+ return true if is_space?(thing)
52
+ true if thing.is_a? String
53
+ end
54
+
55
+ end
data/lib/typemapper.rb ADDED
@@ -0,0 +1,17 @@
1
+ class TypeMapper
2
+
3
+ def to_map str
4
+ raise 'Nil map string' unless str
5
+ raise 'Empty map string' if str.length == 0
6
+
7
+ map = Hash.new
8
+ str.split(",").each {|pair|
9
+ split_pair = pair.split(":", 2)
10
+ k = split_pair[0]
11
+ v = split_pair[1]
12
+ map[k] = v
13
+ }
14
+ map
15
+ end
16
+
17
+ end
@@ -0,0 +1,17 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib', 'csvchecker.rb')
2
+
3
+ describe CsvChecker, "#dateFormatFrom" do
4
+
5
+ it "should extract a simple format" do
6
+ CsvChecker.dateFormatFrom("date('%d-%m-%Y')").should eq("%d-%m-%Y")
7
+ end
8
+
9
+ it "should extract a timestamp format" do
10
+ CsvChecker.dateFormatFrom("date('%d-%m-%Y %H:%M')").should eq("%d-%m-%Y %H:%M")
11
+ end
12
+
13
+ it "should supply the default format if no format is supplied" do
14
+ CsvChecker.dateFormatFrom("date").should eq("%d/%m/%Y")
15
+ end
16
+
17
+ end
@@ -0,0 +1,25 @@
1
+ require 'csvchecker'
2
+
3
+ describe CsvChecker, "#is_valid" do
4
+
5
+ it "should raise an error for an unknown type" do
6
+ expect { CsvChecker.is_valid(1, 'SOMETHING_ELSE')}.to raise_error
7
+ end
8
+
9
+ it "should pass when an integer is found" do
10
+ CsvChecker.is_valid(1, 'integer').should eq(true)
11
+ end
12
+
13
+ it "should allow types to be specific case-insensitively" do
14
+ CsvChecker.is_valid(1, 'InTeGeR').should eq(true)
15
+ end
16
+
17
+ it "should allow cells to have spaces at the end" do
18
+ CsvChecker.is_valid("1 ", 'integer').should eq(true)
19
+ end
20
+
21
+ it "should allow cells to have spaces at the start" do
22
+ CsvChecker.is_valid(" 1 ", 'integer').should eq(true)
23
+ end
24
+
25
+ end
@@ -0,0 +1,25 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib', 'typechecker.rb')
2
+
3
+ describe TypeChecker, "#is_string" do
4
+
5
+ it "should allow a float" do
6
+ TypeChecker.new.is_any?("1.0").should eq(true)
7
+ end
8
+
9
+ it "should allow an integer" do
10
+ TypeChecker.new.is_any?("1").should eq(true)
11
+ end
12
+
13
+ it "should allow a string" do
14
+ TypeChecker.new.is_any?("x").should eq(true)
15
+ end
16
+
17
+ it "should allow a space" do
18
+ TypeChecker.new.is_any?(" ").should eq(true)
19
+ end
20
+
21
+ it "should not allow a nil" do
22
+ TypeChecker.new.is_any?(nil).should eq(false)
23
+ end
24
+
25
+ end
@@ -0,0 +1,17 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib', 'typechecker.rb')
2
+
3
+ describe TypeChecker, "#is_date" do
4
+
5
+ it "should allow a date that matches the date format" do
6
+ TypeChecker.new.is_date?("01/01/2012", "%d/%m/%Y").should eq(true)
7
+ end
8
+
9
+ it "should work with custom formats" do
10
+ TypeChecker.new.is_date?("01-01-2012", "%d/%m/%Y").should eq(false)
11
+ end
12
+
13
+ it "should allow timestamps if used with custom format" do
14
+ TypeChecker.new.is_date?("01-01-2012 12:50", "%d-%m-%Y %H:%M").should eq(true)
15
+ end
16
+
17
+ end
@@ -0,0 +1,29 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib', 'typechecker.rb')
2
+
3
+ describe TypeChecker, "#is_string" do
4
+
5
+ it "should allow a negative float" do
6
+ TypeChecker.new.is_float?("-1.0").should eq(true)
7
+ end
8
+
9
+ it "should allow a positive signed float" do
10
+ TypeChecker.new.is_float?("+1.0").should eq(true)
11
+ end
12
+
13
+ it "should allow a zero-padded float" do
14
+ TypeChecker.new.is_float?("001.0").should eq(true)
15
+ end
16
+
17
+ it "should allow more than one digit after the decimal point" do
18
+ TypeChecker.new.is_float?("2.000").should eq(true)
19
+ end
20
+
21
+ it "should require numbers after the decimal point" do
22
+ TypeChecker.new.is_float?("1.").should eq(false)
23
+ end
24
+
25
+ it "should not require numbers before the decimal point" do
26
+ TypeChecker.new.is_float?(".22").should eq(true)
27
+ end
28
+
29
+ end
@@ -0,0 +1,21 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib', 'typechecker.rb')
2
+
3
+ describe TypeChecker, "#is_integer" do
4
+
5
+ it "should be true for an Integer" do
6
+ TypeChecker.new.is_integer?("1").should eq(true)
7
+ end
8
+
9
+ it "should be false for a string" do
10
+ TypeChecker.new.is_integer?("a string").should eq(false)
11
+ end
12
+
13
+ it "should be false for a space" do
14
+ TypeChecker.new.is_integer?(" ").should eq(false)
15
+ end
16
+
17
+ it "should be false for a float" do
18
+ TypeChecker.new.is_integer?("1.0").should eq(false)
19
+ end
20
+
21
+ end
@@ -0,0 +1,25 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib', 'typechecker.rb')
2
+
3
+ describe TypeChecker, "#is_string" do
4
+
5
+ it "should be true for a string" do
6
+ TypeChecker.new.is_string?("some string").should eq(true)
7
+ end
8
+
9
+ it "should be false for an integer" do
10
+ TypeChecker.new.is_string?("1").should eq(false)
11
+ end
12
+
13
+ it "should be true for a string with an integer in it" do
14
+ TypeChecker.new.is_string?("abc1def").should eq(true)
15
+ end
16
+
17
+ it "should be true for a space" do
18
+ TypeChecker.new.is_string?(" ").should eq(true)
19
+ end
20
+
21
+ it "should be false for a float" do
22
+ TypeChecker.new.is_string?("1.0").should eq(false)
23
+ end
24
+
25
+ end
@@ -0,0 +1,9 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib', 'typechecker.rb')
2
+
3
+ describe TypeChecker, "#misc" do
4
+
5
+ it "should be false for nil" do
6
+ TypeChecker.new.is_nil?(nil).should eq(true)
7
+ end
8
+
9
+ end
@@ -0,0 +1,21 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib', 'typemapper.rb')
2
+
3
+ describe TypeMapper, "#to_map" do
4
+
5
+ it "should create the map" do
6
+ TypeMapper.new.to_map("0:integer,1:string").should == {"1"=>"string", "0"=>"integer"}
7
+ end
8
+
9
+ it "should raise error for a nil string" do
10
+ expect { TypeMapper.new.to_map(nil) }.to raise_error
11
+ end
12
+
13
+ it "should raise error for an empty string" do
14
+ expect { TypeMapper.new.to_map("") }.to raise_error
15
+ end
16
+
17
+ it "should allow type names with colons in (for date format)" do
18
+ TypeMapper.new.to_map("0:integer,1:date('%d/%m/%Y %H:%M')").should == { "0"=>"integer", "1"=>"date('%d/%m/%Y %H:%M')" }
19
+ end
20
+
21
+ end
data/test.csv ADDED
@@ -0,0 +1,2 @@
1
+ a,b,c,d
2
+ 1,2,3,4
metadata ADDED
@@ -0,0 +1,171 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: csv-check
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - rory
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-10-09 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: fastercsv
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 3
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: rspec
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ~>
41
+ - !ruby/object:Gem::Version
42
+ hash: 15
43
+ segments:
44
+ - 2
45
+ - 6
46
+ version: "2.6"
47
+ type: :development
48
+ version_requirements: *id002
49
+ - !ruby/object:Gem::Dependency
50
+ name: cucumber
51
+ prerelease: false
52
+ requirement: &id003 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ hash: 3
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ type: :development
62
+ version_requirements: *id003
63
+ - !ruby/object:Gem::Dependency
64
+ name: aruba
65
+ prerelease: false
66
+ requirement: &id004 !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ hash: 3
72
+ segments:
73
+ - 0
74
+ version: "0"
75
+ type: :development
76
+ version_requirements: *id004
77
+ description: Lets you verify that cells in a CSV files match formats you expect
78
+ email:
79
+ - rory@deadcrow.net
80
+ executables:
81
+ - csv-check
82
+ extensions: []
83
+
84
+ extra_rdoc_files: []
85
+
86
+ files:
87
+ - .gitignore
88
+ - Gemfile
89
+ - Gemfile.lock
90
+ - LICENSE.txt
91
+ - README.md
92
+ - Rakefile
93
+ - bin/csv-check
94
+ - csv-check.gemspec
95
+ - features/any.feature
96
+ - features/cli.feature
97
+ - features/date.feature
98
+ - features/float.feature
99
+ - features/integer.feature
100
+ - features/parsing-tolerance.feature
101
+ - features/real-example.feature
102
+ - features/string.feature
103
+ - features/support/setup.rb
104
+ - features/works-one-one-row-or-many.feature
105
+ - lib/csv-check/version.rb
106
+ - lib/csvchecker.rb
107
+ - lib/typechecker.rb
108
+ - lib/typemapper.rb
109
+ - spec/csvchecker_date_format_extraction.rb
110
+ - spec/csvchecker_is_valid.rb
111
+ - spec/typechecker_is_any.rb
112
+ - spec/typechecker_is_date.rb
113
+ - spec/typechecker_is_float.rb
114
+ - spec/typechecker_is_integer.rb
115
+ - spec/typechecker_is_string.rb
116
+ - spec/typechecker_misc.rb
117
+ - spec/typemapper.rb
118
+ - test.csv
119
+ homepage: http://github.com/rorygibson/csv-check
120
+ licenses: []
121
+
122
+ post_install_message:
123
+ rdoc_options: []
124
+
125
+ require_paths:
126
+ - lib
127
+ required_ruby_version: !ruby/object:Gem::Requirement
128
+ none: false
129
+ requirements:
130
+ - - ">="
131
+ - !ruby/object:Gem::Version
132
+ hash: 3
133
+ segments:
134
+ - 0
135
+ version: "0"
136
+ required_rubygems_version: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ">="
140
+ - !ruby/object:Gem::Version
141
+ hash: 3
142
+ segments:
143
+ - 0
144
+ version: "0"
145
+ requirements: []
146
+
147
+ rubyforge_project:
148
+ rubygems_version: 1.8.24
149
+ signing_key:
150
+ specification_version: 3
151
+ summary: Allows you to specify formats (integer, float, string, date (with checking aginst date format strings) for columns in CSV data, both std in and files)
152
+ test_files:
153
+ - features/any.feature
154
+ - features/cli.feature
155
+ - features/date.feature
156
+ - features/float.feature
157
+ - features/integer.feature
158
+ - features/parsing-tolerance.feature
159
+ - features/real-example.feature
160
+ - features/string.feature
161
+ - features/support/setup.rb
162
+ - features/works-one-one-row-or-many.feature
163
+ - spec/csvchecker_date_format_extraction.rb
164
+ - spec/csvchecker_is_valid.rb
165
+ - spec/typechecker_is_any.rb
166
+ - spec/typechecker_is_date.rb
167
+ - spec/typechecker_is_float.rb
168
+ - spec/typechecker_is_integer.rb
169
+ - spec/typechecker_is_string.rb
170
+ - spec/typechecker_misc.rb
171
+ - spec/typemapper.rb