csv-check 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in csv-check.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,45 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ csv-check (0.0.1)
5
+ fastercsv
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ aruba (0.4.11)
11
+ childprocess (>= 0.2.3)
12
+ cucumber (>= 1.1.1)
13
+ ffi (>= 1.0.11)
14
+ rspec (>= 2.7.0)
15
+ builder (3.1.3)
16
+ childprocess (0.3.5)
17
+ ffi (~> 1.0, >= 1.0.6)
18
+ cucumber (1.2.1)
19
+ builder (>= 2.1.2)
20
+ diff-lcs (>= 1.1.3)
21
+ gherkin (~> 2.11.0)
22
+ json (>= 1.4.6)
23
+ diff-lcs (1.1.3)
24
+ fastercsv (1.5.5)
25
+ ffi (1.1.5)
26
+ gherkin (2.11.2)
27
+ json (>= 1.4.6)
28
+ json (1.7.5)
29
+ rspec (2.11.0)
30
+ rspec-core (~> 2.11.0)
31
+ rspec-expectations (~> 2.11.0)
32
+ rspec-mocks (~> 2.11.0)
33
+ rspec-core (2.11.1)
34
+ rspec-expectations (2.11.3)
35
+ diff-lcs (~> 1.1.3)
36
+ rspec-mocks (2.11.3)
37
+
38
+ PLATFORMS
39
+ ruby
40
+
41
+ DEPENDENCIES
42
+ aruba
43
+ csv-check!
44
+ cucumber
45
+ rspec (~> 2.6)
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 rory
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # CsvCheck
2
+
3
+ Format checker for CSV files
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'csv-check'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install csv-check
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/bin/csv-check ADDED
@@ -0,0 +1,89 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "rubygems"
4
+ require 'optparse'
5
+ require 'ostruct'
6
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib', 'csvchecker.rb')
7
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib', 'typemapper.rb')
8
+
9
+ module CsvCheckRunner
10
+
11
+ # command-line parsing
12
+ COMMAND = File.basename($0)
13
+ USAGE = "Usage: #{COMMAND} [INPUT] [OPTIONS]"
14
+
15
+ options = OpenStruct.new
16
+ options.output = "-"
17
+ options.separator = ","
18
+ options.pretty = false
19
+ options.headers = nil
20
+ options.skip_first_row = false
21
+
22
+ opts = OptionParser.new do |o|
23
+ o.banner = USAGE
24
+ o.separator ""
25
+ o.separator "Specific options:"
26
+
27
+ o.on("-k", "--skip-headers-from-file", "Ignore the headers (first line) in the file; use with --headers to set your own") do |headers|
28
+ options.skip_first_row = true
29
+ end
30
+
31
+ o.on("-s", "--separator SEP", "Set separator character surrounded by single quotes (default is ',')") do |sep|
32
+ options.separator = sep
33
+ end
34
+
35
+ o.on("-m", "--map \"<type map>\"", "Set separator character surrounded by single quotes (default is ',')") do |sep|
36
+ options.mappings = sep
37
+ end
38
+
39
+ o.on("-o", "--output FILE", "Write output to a file") do |fn|
40
+ options.output = fn
41
+ end
42
+
43
+ o.on_tail("-h", "--help", "Show this message") do
44
+ puts o
45
+ exit
46
+ end
47
+
48
+ o.on_tail("-v", "--version", "Show version") do
49
+ puts CsvCheck::VERSION
50
+ exit
51
+ end
52
+ end
53
+
54
+ begin
55
+ opts.parse!(ARGV)
56
+ rescue
57
+ raise "Unable to parse options: #{$!}"
58
+ end
59
+
60
+ # initialize output handle
61
+ if options.output == "-"
62
+ OUT = $stdout.clone
63
+ else
64
+ OUT = File.open(options.output, "w")
65
+ end
66
+
67
+ if ARGV.size > 0
68
+ begin
69
+ IN = File.open(ARGV[0], "r")
70
+ rescue
71
+ print "Unable to open input file #{ARGV[0]}\n"
72
+ exit 1
73
+ end
74
+ else
75
+ IN = StringIO.new($stdin.read)
76
+ end
77
+
78
+ if options.mappings then
79
+ typeMappings = TypeMapper.new.to_map(options.mappings)
80
+ else
81
+ typeMappingd = {}
82
+ end
83
+
84
+ # run the command
85
+ CsvChecker.check(IN, OUT, typeMappings, {:col_sep => options.separator}, options.skip_first_row)
86
+
87
+ # leave in peace
88
+ OUT.flush
89
+ end
data/csv-check.gemspec ADDED
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'csv-check/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "csv-check"
8
+ gem.version = CsvCheck::VERSION
9
+ gem.authors = ["rory"]
10
+ gem.email = ["rory@deadcrow.net"]
11
+ gem.description = %q{Lets you verify that cells in a CSV files match formats you expect}
12
+ gem.summary = %q{Allows you to specify formats (integer, float, string, date (with checking aginst date format strings) for columns in CSV data, both std in and files)}
13
+ gem.homepage = "http://github.com/rorygibson/csv-check"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_dependency "fastercsv", ">= 0"
21
+
22
+ gem.add_development_dependency "rspec", "~> 2.6"
23
+ gem.add_development_dependency "cucumber"
24
+ gem.add_development_dependency "aruba"
25
+ end
@@ -0,0 +1,47 @@
1
+ @any
2
+ Feature: Checking that a column can contain any type of value
3
+
4
+ Scenario: Any can substiture for integer
5
+ Given a file named "test.csv" with:
6
+ """
7
+ a,b,c,d
8
+ 1,2,3,4
9
+ """
10
+ When I run `csv-check -k --map "0:any" test.csv`
11
+ Then the output should contain "Found 0 errors"
12
+ And the exit status should be 0
13
+
14
+
15
+ Scenario: Any can substiture for float
16
+ Given a file named "test.csv" with:
17
+ """
18
+ a,b,c,d
19
+ 1.0, 2, 3, 4
20
+ """
21
+ When I run `csv-check -k --map "0:any" test.csv`
22
+ Then the output should contain "Found 0 errors"
23
+ And the exit status should be 0
24
+
25
+
26
+ Scenario: Any can substiture for string
27
+ Given a file named "test.csv" with:
28
+ """
29
+ a,b,c,d
30
+ x, 2, 3, 4
31
+ """
32
+ When I run `csv-check -k --map "0:any" test.csv`
33
+ Then the output should contain "Found 0 errors"
34
+ And the exit status should be 0
35
+
36
+
37
+ Scenario: Any doesn't substitute for nil
38
+ Given a file named "test.csv" with:
39
+ """
40
+ a,b,c,d
41
+ , 2, 3, 4
42
+ """
43
+ When I run `csv-check -k --map "0:any" test.csv`
44
+ Then the output should contain "Found 1 errors"
45
+ And the exit status should be 1
46
+
47
+
@@ -0,0 +1,22 @@
1
+ Feature: Skip first row
2
+ In order to work with files with or without headers
3
+ As a CLI
4
+ I want to be able to skip the first row of input
5
+
6
+ Scenario: Skip first row
7
+ Given a file named "test.csv" with:
8
+ """
9
+ a,b,c,d
10
+ 1,2,3,4
11
+ """
12
+ When I run `csv-check -k -m "0:integer" test.csv`
13
+ Then the output should contain "Total number of lines checked: 1"
14
+
15
+ Scenario: Don't skip first row
16
+ Given a file named "test.csv" with:
17
+ """
18
+ a,b,c,d
19
+ 1,2,3,4
20
+ """
21
+ When I run `csv-check -m "0:integer" test.csv`
22
+ Then the output should contain "Total number of lines checked: 2"
@@ -0,0 +1,46 @@
1
+ @date
2
+ Feature: Checking that a column can contain only date values
3
+
4
+ Scenario: Check for presence of date in a column - successful - one row
5
+ Given a file named "test.csv" with:
6
+ """
7
+ a,b,c,d
8
+ 01/01/1980,2,3,4
9
+ """
10
+ When I run `csv-check -k --map "0:date" test.csv`
11
+ Then the output should contain "Found 0 errors"
12
+ And the exit status should be 0
13
+
14
+
15
+ Scenario: Specifying date format - success
16
+ Given a file named "test.csv" with:
17
+ """
18
+ a,b,c,d
19
+ 01/01/1980 12:50,2,3,4
20
+ """
21
+ When I run `csv-check -k --map "0:date('%d/%m/%Y %H:%M')" test.csv`
22
+ Then the output should contain "Found 0 errors"
23
+ And the exit status should be 0
24
+
25
+
26
+ Scenario: Specifying date format - failure
27
+ Given a file named "test.csv" with:
28
+ """
29
+ a,b,c,d
30
+ 01/01/1980 12:50,2,3,4
31
+ """
32
+ When I run `csv-check -k --map "0:date('%d/%m/%Y')" test.csv`
33
+ Then the output should contain "Found 1 errors"
34
+ And the exit status should be 1
35
+
36
+
37
+ Scenario: Check for presence of date in a column - failure - one row
38
+ Given a file named "test.csv" with:
39
+ """
40
+ a,b,c,d
41
+ 1,2,x,4
42
+ """
43
+ When I run `csv-check -k --map "2:date" test.csv`
44
+ Then the output should contain "Found 1 errors"
45
+ And the exit status should be 1
46
+
@@ -0,0 +1,34 @@
1
+ @float
2
+ Feature: Checking that a column can contain only float values
3
+
4
+ Scenario: Check for presence of float in a column - successful - one row
5
+ Given a file named "test.csv" with:
6
+ """
7
+ a,b,c,d
8
+ 1.0, 2, 3, 4
9
+ """
10
+ When I run `csv-check -k --map "0:float" test.csv`
11
+ Then the output should contain "Found 0 errors"
12
+ And the exit status should be 0
13
+
14
+ Scenario: Check for presence of float in a column - failure - one row
15
+ Given a file named "test.csv" with:
16
+ """
17
+ a,b,c,d
18
+ 1.0, 2.0, x, 4.0
19
+ """
20
+ When I run `csv-check -k --map "2:float" test.csv`
21
+ Then the output should contain "Found 1 errors"
22
+ And the exit status should be 1
23
+
24
+ Scenario: Check for presence of float in a column - multiple rows and mapping elements
25
+ Given a file named "test.csv" with:
26
+ """
27
+ a,b,c,d
28
+ 1.0, 2.0, 3.0, 4.0
29
+ 5.0, 6.0, 7.0, 8.0
30
+ 4.0, 3.0, 2.0, 1.0
31
+ """
32
+ When I run `csv-check -k --map "0:float,1:float,2:float,3:float" test.csv`
33
+ Then the output should contain "Found 0 errors"
34
+ And the exit status should be 0
@@ -0,0 +1,54 @@
1
+ @integer
2
+ Feature: Checking that a column can contain only integer values
3
+
4
+ Scenario: Check for presence of integer in a column - successful - one row
5
+ Given a file named "test.csv" with:
6
+ """
7
+ a,b,c,d
8
+ 1,2,3,4
9
+ """
10
+ When I run `csv-check -k --map "0:integer" test.csv`
11
+ Then the output should contain "Found 0 errors"
12
+ And the exit status should be 0
13
+
14
+ Scenario: Check for presence of integer in a column - failure - one row
15
+ Given a file named "test.csv" with:
16
+ """
17
+ a,b,c,d
18
+ 1,2,x,4
19
+ """
20
+ When I run `csv-check -k --map "2:integer" test.csv`
21
+ Then the output should contain "Found 1 errors"
22
+ And the exit status should be 1
23
+
24
+ Scenario: Quoted integers are allowed
25
+ Given a file named "test.csv" with:
26
+ """
27
+ a,b,c,d
28
+ "1000","2000","1000","4000"
29
+ """
30
+ When I run `csv-check -k --map "2:integer" test.csv`
31
+ Then the output should contain "Found 0 errors"
32
+ And the exit status should be 0
33
+
34
+ Scenario: Comma-separated numbers aren't allowed
35
+ Given a file named "test.csv" with:
36
+ """
37
+ a,b,c,d
38
+ "1000","2000","1,000","4000"
39
+ """
40
+ When I run `csv-check -k --map "2:integer" test.csv`
41
+ Then the output should contain "Found 1 errors"
42
+ And the exit status should be 1
43
+
44
+ Scenario: Check for presence of integer in a column - multiple rows and mapping elements
45
+ Given a file named "test.csv" with:
46
+ """
47
+ a,b,c,d
48
+ 1,2,3,4
49
+ 5,6,7,8
50
+ 4,3,2,1
51
+ """
52
+ When I run `csv-check -k --map "0:integer,1:integer,2:integer,3:integer" test.csv`
53
+ Then the output should contain "Found 0 errors"
54
+ And the exit status should be 0
@@ -0,0 +1,13 @@
1
+ Feature: Parsing is tolerant of spaces in cells and at the end of lines
2
+
3
+ Scenario: End of line has spaces
4
+ Given a file named "test.csv" with:
5
+ """
6
+ a,b,c,d
7
+ 1,2,3,4
8
+ 5,6,7,8
9
+ 4,3,2,1
10
+ """
11
+ When I run `csv-check -k --map "0:integer,1:integer,2:integer,3:integer" test.csv`
12
+ Then the output should contain "Found 0 errors"
13
+ And the exit status should be 0
@@ -0,0 +1,42 @@
1
+ @real
2
+ Feature: Test against a real example
3
+
4
+ Scenario: Test against a real example, row is OK
5
+ Given a file named "test.csv" with:
6
+ """
7
+ 1359515,7102,129,11/09/2012 00:00,IP12 7QN,X21356,RM,16 Long Road,2000,200000,7831060
8
+ 1359524,7158,129,11/09/2012 00:00,E14 3NU,X22366,SS,57 Wide Road,34000,0,734820
9
+ 1359534,8073,136,11/09/2012 00:00,S11 3XX,X22373,DP,27 Sally Road,8000,85000,559974/1
10
+ 1359590,7073,240,11/09/2012 00:00,RM5 5UB,X22419,IL,9 Green Green,130000,144000,A6229012190-1
11
+ 1359647,6937,353,11/09/2012 00:00,GU4 0SN,X22474,S2,50 Owls'n'Blades Road,250000,250000,9455066180 040305
12
+ 1359670,20,301,11/09/2012 00:00,DN17 5AD,X22898,FA,19 Olive Oyl,200000,200000,70727669
13
+ 1359682,3228,6,11/09/2012 00:00,AB1 8FD,X22811,RM,7 Middleearth Crescent,225000,500000,9453390 036541705
14
+ """
15
+ When I run `csv-check -k --map "0:integer,1:integer,2:integer,3:string,4:string,5:string,6:string,7:string,8:integer,9:integer,10:any" test.csv`
16
+ Then the output should contain "Found 0 errors"
17
+ And the exit status should be 0
18
+
19
+
20
+ Scenario: Test against a real example, row has errors
21
+ Given a file named "test.csv" with:
22
+ """
23
+ 6856,Mr,"Christie, Stephen",1985,0072613
24
+ 6857,Ms,"Parkes, Sirah",2001,0854757
25
+ 6859,Mr,"Dyce, John",Barr Brady,0096518
26
+ """
27
+ When I run `csv-check -k --map "0:integer,1:string,2:string,3:integer,4:integer" test.csv`
28
+ Then the output should contain "Found 1 errors"
29
+ And the exit status should be 1
30
+
31
+
32
+ Scenario: Check for presence of string in a column - multiple rows and mapping elements
33
+ Given a file named "test.csv" with:
34
+ """
35
+ a,b,c,d
36
+ e,f,g,h
37
+ i,j,k,l
38
+ m,n,o,p
39
+ """
40
+ When I run `csv-check -k --map "0:string,1:string,2:string,3:string" test.csv`
41
+ Then the output should contain "Found 0 errors"
42
+ And the exit status should be 0
@@ -0,0 +1,36 @@
1
+ @string
2
+ Feature: Checking that a column can contain only string values
3
+
4
+ Scenario: Check for presence of string in a column - successful - one row
5
+ Given a file named "test.csv" with:
6
+ """
7
+ a,b,c,d
8
+ w,x,y,z
9
+ """
10
+ When I run `csv-check -k --map "0:string" test.csv`
11
+ Then the output should contain "Found 0 errors"
12
+ And the exit status should be 0
13
+
14
+
15
+ Scenario: Check for presence of string in a column - failure - one row
16
+ Given a file named "test.csv" with:
17
+ """
18
+ a,b,c,d
19
+ w,x,1,z
20
+ """
21
+ When I run `csv-check -k --map "2:string" test.csv`
22
+ Then the output should contain "Found 1 errors"
23
+ And the exit status should be 1
24
+
25
+
26
+ Scenario: Check for presence of string in a column - multiple rows and mapping elements
27
+ Given a file named "test.csv" with:
28
+ """
29
+ a,b,c,d
30
+ e,f,g,h
31
+ i,j,k,l
32
+ m,n,o,p
33
+ """
34
+ When I run `csv-check -k --map "0:string,1:string,2:string,3:string" test.csv`
35
+ Then the output should contain "Found 0 errors"
36
+ And the exit status should be 0
@@ -0,0 +1 @@
1
+ require 'aruba/cucumber'
@@ -0,0 +1,24 @@
1
+ @integer
2
+ Feature: Should work the same regardless of number of rows
3
+
4
+ Scenario: Works for header + one row
5
+ Given a file named "test.csv" with:
6
+ """
7
+ a,b,c,d
8
+ 1,2,3,4
9
+ """
10
+ When I run `csv-check -k --map "0:integer" test.csv`
11
+ Then the output should contain "Found 0 errors"
12
+ And the exit status should be 0
13
+
14
+ Scenario: Works for header + many rows
15
+ Given a file named "test.csv" with:
16
+ """
17
+ a,b,c,d
18
+ 1,2,3,4
19
+ 5,6,7,8
20
+ 4,3,2,1
21
+ """
22
+ When I run `csv-check -k --map "0:integer,1:integer,2:integer,3:integer" test.csv`
23
+ Then the output should contain "Found 0 errors"
24
+ And the exit status should be 0
@@ -0,0 +1,3 @@
1
+ module CsvCheck
2
+ VERSION = "0.0.1"
3
+ end
data/lib/csvchecker.rb ADDED
@@ -0,0 +1,111 @@
1
+ require 'rubygems'
2
+ require 'fastercsv'
3
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib', 'typechecker.rb')
4
+
5
+ module CsvChecker
6
+
7
+ DEFAULT_DATE_FORMAT = "%d/%m/%Y"
8
+
9
+ def check input, output, mappings={}, csv_options={}, skip_first=false
10
+ line_no = 1
11
+ lines_scanned = 0
12
+ num_errors = 0
13
+
14
+ i = 0
15
+ FasterCSV.new(input, csv_options).each do |row|
16
+ if skip_first then
17
+ line_no = line_no + 1
18
+ skip_first = false
19
+ next
20
+ end
21
+
22
+ lines_scanned = lines_scanned + 1
23
+
24
+ num_errors = num_errors + check_row(i, row, mappings) unless row.empty?
25
+ i = i + 1
26
+ end
27
+
28
+ print "Total number of lines checked: #{lines_scanned}\n"
29
+ print "Found #{num_errors} errors"
30
+ print "\n"
31
+
32
+ if num_errors > 0 then
33
+ exit 1
34
+ else
35
+ exit 0
36
+ end
37
+ end
38
+
39
+
40
+ def check_row row_num, row, mappings
41
+ raise "Nil mappings" if mappings.nil?
42
+ raise "Nil row" if row.nil?
43
+
44
+ errors = 0
45
+ i = 0
46
+ row.each { |item|
47
+ type = mappings[i.to_s]
48
+
49
+ if type then
50
+ puts "Checking [#{item}] against [#{type}]\n"
51
+ valid = is_valid item, type
52
+ if !valid then
53
+ print "Error at row #{row_num} column #{i}\n"
54
+ errors = errors + 1
55
+ end
56
+ end
57
+
58
+ i = i + 1
59
+ }
60
+
61
+ return errors
62
+ end
63
+
64
+
65
+ def is_valid cell, type
66
+ raise 'Nil type' unless type
67
+ raise 'Empty type' unless type.size > 0
68
+
69
+ type_selector = type.downcase
70
+ type_selector = "date" if type.match /^date/
71
+
72
+ case type_selector
73
+ when 'integer'
74
+ return TypeChecker.new.is_integer?(cell)
75
+
76
+ when 'float'
77
+ return TypeChecker.new.is_float?(cell)
78
+
79
+ when 'string'
80
+ return TypeChecker.new.is_string?(cell)
81
+
82
+ when 'date'
83
+ format = dateFormatFrom(type)
84
+ return TypeChecker.new.is_date?(cell, format)
85
+
86
+ when 'any'
87
+ return TypeChecker.new.is_any?(cell)
88
+
89
+ else
90
+ raise "Unrecognised column type [#{type_selector}]"
91
+ end
92
+ end
93
+
94
+ def dateFormatFrom str
95
+ return DEFAULT_DATE_FORMAT if str == "date"
96
+
97
+ puts "[#{str}]\n"
98
+
99
+ if str.match /^date.*/ then
100
+ type = str[ /'(.*)'/ , 1 ]
101
+ return type
102
+ end
103
+
104
+ return DEFAULT_DATE_FORMAT
105
+ end
106
+
107
+ module_function :check
108
+ module_function :is_valid
109
+ module_function :check_row
110
+ module_function :dateFormatFrom
111
+ end
@@ -0,0 +1,55 @@
1
+ class TypeChecker
2
+
3
+ def is_integer?(str)
4
+ str = str.to_s
5
+ str.strip!
6
+
7
+ match_pattern = /^[-+]?[0-9][0-9]*?$/
8
+ return !str.match(match_pattern).nil?
9
+ end
10
+
11
+ def is_float?(str)
12
+ str = str.to_s
13
+ str.strip!
14
+
15
+ match_pattern = /^[-+]?[0-9]*\.[0-9]+?$/
16
+ return !str.match(match_pattern).nil?
17
+ end
18
+
19
+ def is_date?(data, format)
20
+ parsed = nil
21
+ begin
22
+ parsed = DateTime.parse(data)
23
+ rescue
24
+ end
25
+ output = parsed.strftime(format) if parsed
26
+ matches = (output == data)
27
+
28
+ return matches
29
+ end
30
+
31
+ def is_space?(thing)
32
+ return thing.to_s == " "
33
+ end
34
+
35
+ def is_nil?(thing)
36
+ return thing.nil?
37
+ end
38
+
39
+ def is_string?(thing)
40
+ return false if is_nil?(thing)
41
+ return false if is_integer?(thing)
42
+ return false if is_float?(thing)
43
+ return true if is_space?(thing)
44
+ true if thing.is_a? String
45
+ end
46
+
47
+ def is_any?(thing)
48
+ return false if is_nil?(thing)
49
+ return true if is_integer?(thing)
50
+ return true if is_float?(thing)
51
+ return true if is_space?(thing)
52
+ true if thing.is_a? String
53
+ end
54
+
55
+ end
data/lib/typemapper.rb ADDED
@@ -0,0 +1,17 @@
1
+ class TypeMapper
2
+
3
+ def to_map str
4
+ raise 'Nil map string' unless str
5
+ raise 'Empty map string' if str.length == 0
6
+
7
+ map = Hash.new
8
+ str.split(",").each {|pair|
9
+ split_pair = pair.split(":", 2)
10
+ k = split_pair[0]
11
+ v = split_pair[1]
12
+ map[k] = v
13
+ }
14
+ map
15
+ end
16
+
17
+ end
@@ -0,0 +1,17 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib', 'csvchecker.rb')
2
+
3
+ describe CsvChecker, "#dateFormatFrom" do
4
+
5
+ it "should extract a simple format" do
6
+ CsvChecker.dateFormatFrom("date('%d-%m-%Y')").should eq("%d-%m-%Y")
7
+ end
8
+
9
+ it "should extract a timestamp format" do
10
+ CsvChecker.dateFormatFrom("date('%d-%m-%Y %H:%M')").should eq("%d-%m-%Y %H:%M")
11
+ end
12
+
13
+ it "should supply the default format if no format is supplied" do
14
+ CsvChecker.dateFormatFrom("date").should eq("%d/%m/%Y")
15
+ end
16
+
17
+ end
@@ -0,0 +1,25 @@
1
+ require 'csvchecker'
2
+
3
+ describe CsvChecker, "#is_valid" do
4
+
5
+ it "should raise an error for an unknown type" do
6
+ expect { CsvChecker.is_valid(1, 'SOMETHING_ELSE')}.to raise_error
7
+ end
8
+
9
+ it "should pass when an integer is found" do
10
+ CsvChecker.is_valid(1, 'integer').should eq(true)
11
+ end
12
+
13
+ it "should allow types to be specific case-insensitively" do
14
+ CsvChecker.is_valid(1, 'InTeGeR').should eq(true)
15
+ end
16
+
17
+ it "should allow cells to have spaces at the end" do
18
+ CsvChecker.is_valid("1 ", 'integer').should eq(true)
19
+ end
20
+
21
+ it "should allow cells to have spaces at the start" do
22
+ CsvChecker.is_valid(" 1 ", 'integer').should eq(true)
23
+ end
24
+
25
+ end
@@ -0,0 +1,25 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib', 'typechecker.rb')
2
+
3
+ describe TypeChecker, "#is_string" do
4
+
5
+ it "should allow a float" do
6
+ TypeChecker.new.is_any?("1.0").should eq(true)
7
+ end
8
+
9
+ it "should allow an integer" do
10
+ TypeChecker.new.is_any?("1").should eq(true)
11
+ end
12
+
13
+ it "should allow a string" do
14
+ TypeChecker.new.is_any?("x").should eq(true)
15
+ end
16
+
17
+ it "should allow a space" do
18
+ TypeChecker.new.is_any?(" ").should eq(true)
19
+ end
20
+
21
+ it "should not allow a nil" do
22
+ TypeChecker.new.is_any?(nil).should eq(false)
23
+ end
24
+
25
+ end
@@ -0,0 +1,17 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib', 'typechecker.rb')
2
+
3
+ describe TypeChecker, "#is_date" do
4
+
5
+ it "should allow a date that matches the date format" do
6
+ TypeChecker.new.is_date?("01/01/2012", "%d/%m/%Y").should eq(true)
7
+ end
8
+
9
+ it "should work with custom formats" do
10
+ TypeChecker.new.is_date?("01-01-2012", "%d/%m/%Y").should eq(false)
11
+ end
12
+
13
+ it "should allow timestamps if used with custom format" do
14
+ TypeChecker.new.is_date?("01-01-2012 12:50", "%d-%m-%Y %H:%M").should eq(true)
15
+ end
16
+
17
+ end
@@ -0,0 +1,29 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib', 'typechecker.rb')
2
+
3
+ describe TypeChecker, "#is_string" do
4
+
5
+ it "should allow a negative float" do
6
+ TypeChecker.new.is_float?("-1.0").should eq(true)
7
+ end
8
+
9
+ it "should allow a positive signed float" do
10
+ TypeChecker.new.is_float?("+1.0").should eq(true)
11
+ end
12
+
13
+ it "should allow a zero-padded float" do
14
+ TypeChecker.new.is_float?("001.0").should eq(true)
15
+ end
16
+
17
+ it "should allow more than one digit after the decimal point" do
18
+ TypeChecker.new.is_float?("2.000").should eq(true)
19
+ end
20
+
21
+ it "should require numbers after the decimal point" do
22
+ TypeChecker.new.is_float?("1.").should eq(false)
23
+ end
24
+
25
+ it "should not require numbers before the decimal point" do
26
+ TypeChecker.new.is_float?(".22").should eq(true)
27
+ end
28
+
29
+ end
@@ -0,0 +1,21 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib', 'typechecker.rb')
2
+
3
+ describe TypeChecker, "#is_integer" do
4
+
5
+ it "should be true for an Integer" do
6
+ TypeChecker.new.is_integer?("1").should eq(true)
7
+ end
8
+
9
+ it "should be false for a string" do
10
+ TypeChecker.new.is_integer?("a string").should eq(false)
11
+ end
12
+
13
+ it "should be false for a space" do
14
+ TypeChecker.new.is_integer?(" ").should eq(false)
15
+ end
16
+
17
+ it "should be false for a float" do
18
+ TypeChecker.new.is_integer?("1.0").should eq(false)
19
+ end
20
+
21
+ end
@@ -0,0 +1,25 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib', 'typechecker.rb')
2
+
3
+ describe TypeChecker, "#is_string" do
4
+
5
+ it "should be true for a string" do
6
+ TypeChecker.new.is_string?("some string").should eq(true)
7
+ end
8
+
9
+ it "should be false for an integer" do
10
+ TypeChecker.new.is_string?("1").should eq(false)
11
+ end
12
+
13
+ it "should be true for a string with an integer in it" do
14
+ TypeChecker.new.is_string?("abc1def").should eq(true)
15
+ end
16
+
17
+ it "should be true for a space" do
18
+ TypeChecker.new.is_string?(" ").should eq(true)
19
+ end
20
+
21
+ it "should be false for a float" do
22
+ TypeChecker.new.is_string?("1.0").should eq(false)
23
+ end
24
+
25
+ end
@@ -0,0 +1,9 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib', 'typechecker.rb')
2
+
3
+ describe TypeChecker, "#misc" do
4
+
5
+ it "should be false for nil" do
6
+ TypeChecker.new.is_nil?(nil).should eq(true)
7
+ end
8
+
9
+ end
@@ -0,0 +1,21 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib', 'typemapper.rb')
2
+
3
+ describe TypeMapper, "#to_map" do
4
+
5
+ it "should create the map" do
6
+ TypeMapper.new.to_map("0:integer,1:string").should == {"1"=>"string", "0"=>"integer"}
7
+ end
8
+
9
+ it "should raise error for a nil string" do
10
+ expect { TypeMapper.new.to_map(nil) }.to raise_error
11
+ end
12
+
13
+ it "should raise error for an empty string" do
14
+ expect { TypeMapper.new.to_map("") }.to raise_error
15
+ end
16
+
17
+ it "should allow type names with colons in (for date format)" do
18
+ TypeMapper.new.to_map("0:integer,1:date('%d/%m/%Y %H:%M')").should == { "0"=>"integer", "1"=>"date('%d/%m/%Y %H:%M')" }
19
+ end
20
+
21
+ end
data/test.csv ADDED
@@ -0,0 +1,2 @@
1
+ a,b,c,d
2
+ 1,2,3,4
metadata ADDED
@@ -0,0 +1,171 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: csv-check
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - rory
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-10-09 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: fastercsv
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 3
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: rspec
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ~>
41
+ - !ruby/object:Gem::Version
42
+ hash: 15
43
+ segments:
44
+ - 2
45
+ - 6
46
+ version: "2.6"
47
+ type: :development
48
+ version_requirements: *id002
49
+ - !ruby/object:Gem::Dependency
50
+ name: cucumber
51
+ prerelease: false
52
+ requirement: &id003 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ hash: 3
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ type: :development
62
+ version_requirements: *id003
63
+ - !ruby/object:Gem::Dependency
64
+ name: aruba
65
+ prerelease: false
66
+ requirement: &id004 !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ hash: 3
72
+ segments:
73
+ - 0
74
+ version: "0"
75
+ type: :development
76
+ version_requirements: *id004
77
+ description: Lets you verify that cells in a CSV files match formats you expect
78
+ email:
79
+ - rory@deadcrow.net
80
+ executables:
81
+ - csv-check
82
+ extensions: []
83
+
84
+ extra_rdoc_files: []
85
+
86
+ files:
87
+ - .gitignore
88
+ - Gemfile
89
+ - Gemfile.lock
90
+ - LICENSE.txt
91
+ - README.md
92
+ - Rakefile
93
+ - bin/csv-check
94
+ - csv-check.gemspec
95
+ - features/any.feature
96
+ - features/cli.feature
97
+ - features/date.feature
98
+ - features/float.feature
99
+ - features/integer.feature
100
+ - features/parsing-tolerance.feature
101
+ - features/real-example.feature
102
+ - features/string.feature
103
+ - features/support/setup.rb
104
+ - features/works-one-one-row-or-many.feature
105
+ - lib/csv-check/version.rb
106
+ - lib/csvchecker.rb
107
+ - lib/typechecker.rb
108
+ - lib/typemapper.rb
109
+ - spec/csvchecker_date_format_extraction.rb
110
+ - spec/csvchecker_is_valid.rb
111
+ - spec/typechecker_is_any.rb
112
+ - spec/typechecker_is_date.rb
113
+ - spec/typechecker_is_float.rb
114
+ - spec/typechecker_is_integer.rb
115
+ - spec/typechecker_is_string.rb
116
+ - spec/typechecker_misc.rb
117
+ - spec/typemapper.rb
118
+ - test.csv
119
+ homepage: http://github.com/rorygibson/csv-check
120
+ licenses: []
121
+
122
+ post_install_message:
123
+ rdoc_options: []
124
+
125
+ require_paths:
126
+ - lib
127
+ required_ruby_version: !ruby/object:Gem::Requirement
128
+ none: false
129
+ requirements:
130
+ - - ">="
131
+ - !ruby/object:Gem::Version
132
+ hash: 3
133
+ segments:
134
+ - 0
135
+ version: "0"
136
+ required_rubygems_version: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ">="
140
+ - !ruby/object:Gem::Version
141
+ hash: 3
142
+ segments:
143
+ - 0
144
+ version: "0"
145
+ requirements: []
146
+
147
+ rubyforge_project:
148
+ rubygems_version: 1.8.24
149
+ signing_key:
150
+ specification_version: 3
151
+ summary: Allows you to specify formats (integer, float, string, date (with checking aginst date format strings) for columns in CSV data, both std in and files)
152
+ test_files:
153
+ - features/any.feature
154
+ - features/cli.feature
155
+ - features/date.feature
156
+ - features/float.feature
157
+ - features/integer.feature
158
+ - features/parsing-tolerance.feature
159
+ - features/real-example.feature
160
+ - features/string.feature
161
+ - features/support/setup.rb
162
+ - features/works-one-one-row-or-many.feature
163
+ - spec/csvchecker_date_format_extraction.rb
164
+ - spec/csvchecker_is_valid.rb
165
+ - spec/typechecker_is_any.rb
166
+ - spec/typechecker_is_date.rb
167
+ - spec/typechecker_is_float.rb
168
+ - spec/typechecker_is_integer.rb
169
+ - spec/typechecker_is_string.rb
170
+ - spec/typechecker_misc.rb
171
+ - spec/typemapper.rb