csv-import-analyzer 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/README.md +8 -1
  4. data/csv-import-analyzer.gemspec +1 -1
  5. data/lib/csv-import-analyzer.rb +6 -4
  6. data/lib/csv-import-analyzer/analyzer/csv_check_bounds.rb +30 -19
  7. data/lib/csv-import-analyzer/analyzer/delimiter_identifier.rb +44 -24
  8. data/lib/csv-import-analyzer/analyzer/file_type_assertion.rb +1 -5
  9. data/lib/csv-import-analyzer/csv_datatype_analysis.rb +25 -9
  10. data/lib/csv-import-analyzer/csv_sanitizer.rb +67 -17
  11. data/lib/csv-import-analyzer/export/metadata_analysis.rb +63 -7
  12. data/lib/csv-import-analyzer/helpers/common_functions.rb +4 -0
  13. data/lib/csv-import-analyzer/helpers/datatype_validation.rb +6 -6
  14. data/lib/csv-import-analyzer/helpers/string_class_extensions.rb +9 -3
  15. data/lib/csv-import-analyzer/query_builder/mysql_query_helper.rb +2 -2
  16. data/lib/csv-import-analyzer/query_builder/pg_query_helper.rb +1 -2
  17. data/lib/csv-import-analyzer/query_builder/query_helper.rb +2 -2
  18. data/lib/csv-import-analyzer/sql_query_builder.rb +27 -12
  19. data/lib/csv-import-analyzer/version.rb +1 -1
  20. data/spec/csv-import-analyzer/analyzer/csv_check_bounds_spec.rb +8 -8
  21. data/spec/csv-import-analyzer/analyzer/delimiter_identifier_spec.rb +13 -13
  22. data/spec/csv-import-analyzer/csv_sanitizer_spec.rb +10 -7
  23. data/spec/csv-import-analyzer/helpers/common_functions_spec.rb +20 -19
  24. data/spec/csv-import-analyzer/helpers/datatype_validation_spec.rb +28 -28
  25. data/spec/csv-import-analyzer/helpers/string_class_extension_spec.rb +6 -6
  26. data/spec/csv-import-analyzer/query_builder/mysql_query_helper_spec.rb +13 -13
  27. data/spec/csv-import-analyzer/query_builder/pg_query_helper_spec.rb +16 -16
  28. data/spec/csv-import-analyzer_spec.rb +3 -6
  29. data/spec/fixtures/sample.csv +2 -2
  30. data/spec/spec_helper.rb +3 -0
  31. metadata +17 -6
  32. data/lib/csv-import-analyzer/sampleTab.csv +0 -5
  33. data/samples/metadata_output.json +0 -70
  34. data/spec/csv-import-analyzer/csv_datatype_analysis_spec.rb +0 -1
@@ -1,24 +1,27 @@
1
1
  # require 'spec_helper'
2
2
 
3
- require 'pry'
4
3
  describe CsvImportAnalyzer::CsvSanitizer do
4
+ # May be I should really use subject here
5
+ # why?
5
6
  let (:csv_sanitizer) { CsvImportAnalyzer::CsvSanitizer.new }
6
- it 'should handle file not found issue' do
7
+ it "handles file not found issue - when given a invalid file" do
7
8
  expect(csv_sanitizer.process("sample.csv", options = {})).to be_instance_of(FileNotFound)
8
9
  end
9
- #Testing private methods - Although one should really have to test private methods, it's
10
- context 'testing private methods' do
10
+
11
+ # Testing private methods - Although one shouldn't really have to test private methods
12
+ # Testing here to make sure the private methods are doing what they are supposed to
13
+ context "testing private methods" do
11
14
  let (:test) {"\"t1\", 't2', \"t3\""}
12
15
  let (:res) {"\"t1\", \"t2\", \"t3\""}
13
- xit 'should replace single quotes to double' do
16
+ xit "replaces single quotes to double quotes" do
14
17
  binding.pry
15
18
  expect(csv_sanitizer.send(:replace_line_single_quotes, test, ",")).to eq(res)
16
19
  end
17
20
  let (:test) {["t1","t2","",nil,"t3"]}
18
21
  let (:res) {["t1","t2","NULL","NULL","t3"]}
19
- it 'should replace null values' do
22
+ it "replaces nil or empty values to NULL" do
20
23
  expect(csv_sanitizer.send(:replace_null_values, test)).to eq(res)
21
24
  end
22
25
  end
23
26
 
24
- end
27
+ end
@@ -3,29 +3,30 @@
3
3
  class DummyClass
4
4
  end
5
5
 
6
- describe 'null_like?' do
7
-
8
- before(:each) do
9
- @dummy_class = DummyClass.new
10
- @dummy_class.extend(CsvImportAnalyzer::Helper)
11
- end
12
-
13
- context 'when called on null like objects' do
14
- it 'returns NULL as null type' do
15
- expect(@dummy_class.null_like?('NULL')).to eq(true)
6
+ describe CsvImportAnalyzer::Helper do
7
+ describe "#null_like?" do
8
+ before(:each) do
9
+ @dummy_class = DummyClass.new
10
+ @dummy_class.extend(CsvImportAnalyzer::Helper)
16
11
  end
17
12
 
18
- it 'returns \\N as null type' do
19
- expect(@dummy_class.null_like?('\N')).to eq(true)
20
- end
21
- end
13
+ context "when called on null like objects" do
14
+ it "returns NULL as null type" do
15
+ expect(@dummy_class.null_like?("NULL")).to eq(true)
16
+ end
22
17
 
23
- context 'when called on non-null objects' do
24
- it 'returns hello as not null' do
25
- expect(@dummy_class.null_like?('Hello')).to eq(false)
18
+ it "returns \\N as null type" do
19
+ expect(@dummy_class.null_like?('\N')).to eq(true)
20
+ end
26
21
  end
27
- it 'returns Fixnum(3) as not null' do
28
- expect(@dummy_class.null_like?(3)).to eq(false)
22
+
23
+ context "when called on non-null objects" do
24
+ it "returns hello as not null" do
25
+ expect(@dummy_class.null_like?("Hello")).to eq(false)
26
+ end
27
+ it "returns Fixnum(3) as not null" do
28
+ expect(@dummy_class.null_like?(3)).to eq(false)
29
+ end
29
30
  end
30
31
  end
31
32
  end
@@ -3,70 +3,70 @@ require 'date'
3
3
  class DummyClass
4
4
  end
5
5
 
6
- describe '#validate_field' do
6
+ describe "#validate_field" do
7
7
 
8
8
  before(:each) do
9
9
  @dummy_class = DummyClass.new
10
10
  @dummy_class.extend(CsvImportAnalyzer::DatatypeValidator)
11
11
  end
12
12
 
13
- context 'knows what an integer looks like' do
13
+ context "knows what an integer looks like" do
14
14
 
15
- it 'returns Fixnum type as integer' do
15
+ it "returns Fixnum type as integer" do
16
16
  expect(@dummy_class.validate_field(10)).to eq("int")
17
17
  end
18
- it 'returns Fixnum type with spaces as integer' do
19
- expect(@dummy_class.validate_field(' 10 ')).to eq("int")
18
+ it "returns Fixnum type with spaces as integer" do
19
+ expect(@dummy_class.validate_field(" 10 ")).to eq("int")
20
20
  end
21
- it 'returns Fixnum type with comma as integer' do
22
- expect(@dummy_class.validate_field('1,000')).to eq("int")
21
+ it "returns Fixnum type with comma as integer" do
22
+ expect(@dummy_class.validate_field("1,000")).to eq("int")
23
23
  end
24
- it 'returns Fixnum type negative number as integer' do
24
+ it "returns Fixnum type negative number as integer" do
25
25
  expect(@dummy_class.validate_field(-3)).to eq("int")
26
26
  end
27
27
 
28
28
  end
29
29
 
30
- context 'knows what an Float looks like' do
30
+ context "knows what an Float looks like" do
31
31
 
32
- it 'returns Float type as float' do
32
+ it "returns Float type as float" do
33
33
  expect(@dummy_class.validate_field(10.0)).to eq("float")
34
34
  end
35
- it 'returns Float type with spaces as float' do
36
- expect(@dummy_class.validate_field(' 10.01 ')).to eq("float")
35
+ it "returns Float type with spaces as float" do
36
+ expect(@dummy_class.validate_field(" 10.01 ")).to eq("float")
37
37
  end
38
- it 'returns Float type with comma as float' do
39
- expect(@dummy_class.validate_field('1,000.01')).to eq("float")
38
+ it "returns Float type with comma as float" do
39
+ expect(@dummy_class.validate_field("1,000.01")).to eq("float")
40
40
  end
41
- it 'returns Float type negative number as float' do
41
+ it "returns Float type negative number as float" do
42
42
  expect(@dummy_class.validate_field(-3.3)).to eq("float")
43
43
  end
44
44
 
45
45
  end
46
46
 
47
- context 'it knows what a date looks like' do
48
- it 'return true for a valid date type - dd/mm/yyyy' do
49
- expect(@dummy_class.validate_field('31/12/2014')).to eq("date")
47
+ context "it knows what a date looks like" do
48
+ it "return true for a valid date type - dd/mm/yyyy" do
49
+ expect(@dummy_class.validate_field("31/12/2014")).to eq("date")
50
50
  end
51
- it 'return true for a valid date type - mm/dd/yyyy' do
52
- expect(@dummy_class.validate_field('12/31/2014')).to eq("date")
51
+ it "return true for a valid date type - mm/dd/yyyy" do
52
+ expect(@dummy_class.validate_field("12/31/2014")).to eq("date")
53
53
  end
54
- it 'return true for a valid date type - mm-dd-yyyy' do
55
- expect(@dummy_class.validate_field('12-31-2014')).to eq("date")
54
+ it "return true for a valid date type - mm-dd-yyyy" do
55
+ expect(@dummy_class.validate_field("12-31-2014")).to eq("date")
56
56
  end
57
- it 'return true for a valid date type - mm dd yyyy' do
58
- expect(@dummy_class.validate_field('12 31 2014')).to eq("date")
57
+ it "return true for a valid date type - mm dd yyyy" do
58
+ expect(@dummy_class.validate_field("12 31 2014")).to eq("date")
59
59
  end
60
60
  end
61
61
 
62
- context 'it knows what a String looks like' do
63
- it 'default to String type' do
62
+ context "it knows what a String looks like" do
63
+ it "default to String type" do
64
64
  expect(@dummy_class.validate_field("100 testingNow:)")).to eq("string")
65
65
  end
66
- it 'returns String type as string' do
66
+ it "returns String type as string" do
67
67
  expect(@dummy_class.validate_field("Hello")).to eq("string")
68
68
  end
69
- it 'returns String type of dates as string' do
69
+ it "returns String type of dates as string" do
70
70
  expect(@dummy_class.validate_field("12 31 2014312")).to eq("string")
71
71
  expect(@dummy_class.validate_field("12-31-2014312")).to eq("string")
72
72
  expect(@dummy_class.validate_field("12/31/2014312")).to eq("string")
@@ -1,17 +1,17 @@
1
1
  # require 'spec_helper'
2
2
 
3
- describe 'substr_count' do
4
- context 'different possible delimiters' do
5
- it 'returns count of commas as delimiter in a string' do
3
+ describe "substr_count" do
4
+ context "different possible delimiters" do
5
+ it "returns count of commas as delimiter in a string" do
6
6
  expect("hello, hi, how, are you?".substr_count(",")).to eq(3)
7
7
  end
8
- it 'returns count of semi-colons as delimiter in a string' do
8
+ it "returns count of semi-colons as delimiter in a string" do
9
9
  expect("hello; hi, how, are you?".substr_count(";")).to eq(1)
10
10
  end
11
- it 'returns count of pipe as delimiter in a string' do
11
+ it "returns count of pipe as delimiter in a string" do
12
12
  expect("hello, hi| how| are you?".substr_count("|")).to eq(2)
13
13
  end
14
- it 'returns count of tab as delimiter in a string' do
14
+ it "returns count of tab as delimiter in a string" do
15
15
  expect("hello\thi\thow| are you?".substr_count("\t")).to eq(2)
16
16
  end
17
17
  end
@@ -1,53 +1,53 @@
1
1
  # require 'spec_helper'
2
2
  class DummyClass
3
3
  end
4
- describe '#form_query_for_datatype' do
4
+ describe "#form_query_for_datatype" do
5
5
  before(:each) do
6
6
  @dummy_class = DummyClass.new
7
7
  @dummy_class.extend(CsvImportAnalyzer::MysqlQueryHelper)
8
8
  end
9
- context 'expected arguments are not set' do
9
+ context "when expected arguments are not set" do
10
10
  let(:args) {Hash[:header => :test]}
11
11
  let(:args1) {Hash[:datatype, :test]}
12
- it ' returns missing arguments error' do
12
+ it "returns missing arguments error" do
13
13
  expect(@dummy_class.form_query_for_datatype(args)).to be_instance_of(MissingRequiredArguments)
14
14
  end
15
- it 'returns invalid if set to nil' do
15
+ it "returns invalid if set to nil" do
16
16
  expect(@dummy_class.form_query_for_datatype(args1)).to be_instance_of(MissingRequiredArguments)
17
17
  end
18
18
  end
19
19
 
20
- context 'expected arguments are set' do
20
+ context "when expected arguments are set" do
21
21
  let(:args) {Hash[:header => :test, :datatype => :string]}
22
22
  let(:args1) {Hash[:header => :test, :datatype => :integer]}
23
- it 'returns expected sql query for string' do
23
+ it "returns expected sql query for string" do
24
24
  expect(@dummy_class.form_query_for_datatype(args)).to eq("test varchar(255)")
25
25
  end
26
- it 'returns expected sql query for numeric' do
26
+ it "returns expected sql query for numeric" do
27
27
  expect(@dummy_class.form_query_for_datatype(args1)).to eq("test integer")
28
28
  end
29
29
  end
30
30
 
31
31
  end
32
- describe '#import_csv' do
32
+ describe "#import_csv" do
33
33
  before(:each) do
34
34
  @dummy_class = DummyClass.new
35
35
  @dummy_class.extend(CsvImportAnalyzer::MysqlQueryHelper)
36
36
  end
37
- context 'expected arguments are not set' do
37
+ context "when expected arguments are not set" do
38
38
  let(:args) {Hash[:tablename => "test", :delimiter => ","]}
39
39
  let(:args1) {Hash[:filename => "test"]}
40
- it ' return SqlQueryErrror' do
40
+ it "returns SqlQueryErrror" do
41
41
  expect(@dummy_class.import_csv(args)).to be_instance_of(MissingRequiredArguments)
42
42
  end
43
- it 'should return SqlQueryErrror' do
43
+ it "returns SqlQueryErrror" do
44
44
  expect(@dummy_class.import_csv(args1)).to be_instance_of(MissingRequiredArguments)
45
45
  end
46
46
  end
47
47
 
48
- context 'expected arguments are set' do
48
+ context "when expected arguments are set" do
49
49
  let(:args) {Hash[:tablename => "test", :delimiter => ",", :filename => "sample.csv"]}
50
- it 'returns expected import query' do
50
+ it "returns expected import query" do
51
51
  expect(@dummy_class.import_csv(args)).to eq("LOAD DATA INFILE sample.csv INTO TABLE test FIELDS TERMINATED BY ',' ENCLOSED BY '\"' LINES TERMINATED BY '\\n' IGNORE 1 LINES;")
52
52
  end
53
53
  end
@@ -1,55 +1,55 @@
1
1
  # require 'spec_helper'
2
2
  class DummyClass
3
3
  end
4
- describe '#form_query_for_datatype' do
4
+ describe "#form_query_for_datatype" do
5
5
  before(:each) do
6
+ # Creating a dummy object to test the modules.
7
+ # Extending with dummy object by adding module methods to it using extend
6
8
  @dummy_class = DummyClass.new
7
9
  @dummy_class.extend(CsvImportAnalyzer::PgQueryHelper)
8
10
  end
9
- context 'expected arguments are not set' do
11
+ context "when expected arguments are not set" do
10
12
  let(:args) {Hash[:header => :test]}
11
13
  let(:args1) {Hash[:datatype, :test]}
12
- it ' returns missing arguments error' do
14
+ it "returns missing arguments error" do
13
15
  expect(@dummy_class.form_query_for_datatype(args)).to be_instance_of(MissingRequiredArguments)
14
16
  end
15
- it 'returns invalid if set to nil' do
17
+ it "returns invalid if set to nil" do
16
18
  expect(@dummy_class.form_query_for_datatype(args1)).to be_instance_of(MissingRequiredArguments)
17
19
  end
18
20
  end
19
-
20
- context 'expected arguments are set' do
21
+ context "when expected arguments are set" do
21
22
  let(:args) {Hash[:header => :test, :datatype => :string]}
22
23
  let(:args1) {Hash[:header => :test, :datatype => :integer]}
23
- it 'returns expected sql query for string' do
24
+ it "returns expected sql query for string" do
24
25
  expect(@dummy_class.form_query_for_datatype(args)).to eq("test varchar(255)")
25
26
  end
26
- it 'returns expected sql query for numeric' do
27
+ it "returns expected sql query for numeric" do
27
28
  expect(@dummy_class.form_query_for_datatype(args1)).to eq("test integer")
28
29
  end
29
30
  end
30
-
31
31
  end
32
- describe '#import_csv' do
32
+ describe "#import_csv" do
33
33
  before(:each) do
34
34
  @dummy_class = DummyClass.new
35
35
  @dummy_class.extend(CsvImportAnalyzer::PgQueryHelper)
36
36
  end
37
- context 'expected arguments are not set' do
37
+ context "when expected arguments are not set" do
38
38
  let(:args) {Hash[:tablename => "test", :delimiter => ","]}
39
39
  let(:args1) {Hash[:filename => "test"]}
40
- it ' return SqlQueryErrror' do
40
+ it "returns SqlQueryErrror" do
41
41
  expect(@dummy_class.import_csv(args)).to be_instance_of(MissingRequiredArguments)
42
42
  end
43
- it 'should return SqlQueryErrror' do
43
+ it "returns SqlQueryErrror" do
44
44
  expect(@dummy_class.import_csv(args1)).to be_instance_of(MissingRequiredArguments)
45
45
  end
46
46
  end
47
47
 
48
- context 'expected arguments are set' do
48
+ context "when expected arguments are set" do
49
49
  let(:args) {Hash[:tablename => "test", :delimiter => ",", :filename => "filename"]}
50
- it 'returns expected import query' do
50
+ it "returns expected import query" do
51
51
  expect(@dummy_class.import_csv(args)).to eq("COPY test FROM 'filename' HEADER DELIMITER ',' CSV NULL AS 'NULL';")
52
52
  end
53
53
  end
54
54
 
55
- end
55
+ end
@@ -1,14 +1,11 @@
1
1
  # require 'spec_helper'
2
2
 
3
- # CsvImportAnalyzer.process("sampleTab.csv", {:metadata_output => true})
4
-
5
-
6
3
  describe CsvImportAnalyzer do
7
4
  include CsvImportAnalyzer
8
- it 'should return invalid file as file not found' do
5
+ it "return invalid file as file not found" do
9
6
  expect(CsvImportAnalyzer.process("sample.csv")).to be_instance_of(FileNotFound)
10
7
  end
11
- it 'should be able to process a valid file' do
8
+ it "processes a valid file" do
12
9
  expect(CsvImportAnalyzer.process($sample_csv_path)).not_to be_instance_of(FileNotFound)
13
10
  end
14
- end
11
+ end
@@ -1,5 +1,5 @@
1
1
  Year ID,Make ID,Model ID,Description ID,Price ID
2
- 1997,Ford,,"ac, abs, moon","3000"
3
- 1999,Chevy,"Venture ""Extended Edition""","",4900.00
2
+ 1997,Ford,,"ac, abs, moon","3000
3
+ 1999,Chevy,"Venture ""Extended Edition""",",4900.00
4
4
  1999,'Chevy',"Venture ""Extended Edition, Very Large""",,5000.00
5
5
  1996,Jeep,Grand Che'rokee,"MUST SELL!air, moon roof, loaded",4799.00
data/spec/spec_helper.rb CHANGED
@@ -1,6 +1,9 @@
1
1
  require 'simplecov'
2
+ # Must be ahead of requiring core library to start the code coverage.
2
3
  SimpleCov.start
3
4
  require 'csv-import-analyzer'
5
+
6
+ # Path to sample files in the fixtures folder that are made available to all specs for testing
4
7
  $sample_csv_path = "/home/avinash/Desktop/csv-import-analyzer/spec/fixtures/sample.csv"
5
8
  $sample_ssv_path = "/home/avinash/Desktop/csv-import-analyzer/spec/fixtures/semicolon-sample.csv"
6
9
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-import-analyzer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Avinash Vallabhaneni
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-09 00:00:00.000000000 Z
11
+ date: 2014-10-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -85,6 +85,9 @@ dependencies:
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1.0'
90
+ - - ">="
88
91
  - !ruby/object:Gem::Version
89
92
  version: 1.0.17
90
93
  type: :runtime
@@ -92,6 +95,9 @@ dependencies:
92
95
  version_requirements: !ruby/object:Gem::Requirement
93
96
  requirements:
94
97
  - - "~>"
98
+ - !ruby/object:Gem::Version
99
+ version: '1.0'
100
+ - - ">="
95
101
  - !ruby/object:Gem::Version
96
102
  version: 1.0.17
97
103
  description: Santize large csv files and help in predicting datatypes including min
@@ -123,15 +129,21 @@ files:
123
129
  - lib/csv-import-analyzer/query_builder/mysql_query_helper.rb
124
130
  - lib/csv-import-analyzer/query_builder/pg_query_helper.rb
125
131
  - lib/csv-import-analyzer/query_builder/query_helper.rb
126
- - lib/csv-import-analyzer/sampleTab.csv
127
132
  - lib/csv-import-analyzer/sql_query_builder.rb
128
133
  - lib/csv-import-analyzer/version.rb
129
- - samples/metadata_output.json
134
+ - samples/27_sweep_net_arthropods_1.csv
135
+ - samples/584_ccs_survey_data_2010.csv
136
+ - samples/591_fluxtower_data_corrected_2012.csv
137
+ - samples/5_photos_1.csv
138
+ - samples/80_water_chemistry_1.csv
139
+ - samples/86_dbg_irrigation_rates_1.csv
140
+ - samples/AllstarFull.csv
141
+ - samples/LDP_TenMin.dat
130
142
  - samples/sampleTab.csv
143
+ - samples/sampleTab.tsv
131
144
  - spec/csv-import-analyzer/analyzer/csv_check_bounds_spec.rb
132
145
  - spec/csv-import-analyzer/analyzer/delimiter_identifier_spec.rb
133
146
  - spec/csv-import-analyzer/analyzer/file_type_assertion_spec.rb
134
- - spec/csv-import-analyzer/csv_datatype_analysis_spec.rb
135
147
  - spec/csv-import-analyzer/csv_sanitizer_spec.rb
136
148
  - spec/csv-import-analyzer/export/metadata_analysis_spec.rb
137
149
  - spec/csv-import-analyzer/helpers/common_functions_spec.rb
@@ -176,7 +188,6 @@ test_files:
176
188
  - spec/csv-import-analyzer/analyzer/csv_check_bounds_spec.rb
177
189
  - spec/csv-import-analyzer/analyzer/delimiter_identifier_spec.rb
178
190
  - spec/csv-import-analyzer/analyzer/file_type_assertion_spec.rb
179
- - spec/csv-import-analyzer/csv_datatype_analysis_spec.rb
180
191
  - spec/csv-import-analyzer/csv_sanitizer_spec.rb
181
192
  - spec/csv-import-analyzer/export/metadata_analysis_spec.rb
182
193
  - spec/csv-import-analyzer/helpers/common_functions_spec.rb