csv-import-analyzer 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/README.md +8 -1
  4. data/csv-import-analyzer.gemspec +1 -1
  5. data/lib/csv-import-analyzer.rb +6 -4
  6. data/lib/csv-import-analyzer/analyzer/csv_check_bounds.rb +30 -19
  7. data/lib/csv-import-analyzer/analyzer/delimiter_identifier.rb +44 -24
  8. data/lib/csv-import-analyzer/analyzer/file_type_assertion.rb +1 -5
  9. data/lib/csv-import-analyzer/csv_datatype_analysis.rb +25 -9
  10. data/lib/csv-import-analyzer/csv_sanitizer.rb +67 -17
  11. data/lib/csv-import-analyzer/export/metadata_analysis.rb +63 -7
  12. data/lib/csv-import-analyzer/helpers/common_functions.rb +4 -0
  13. data/lib/csv-import-analyzer/helpers/datatype_validation.rb +6 -6
  14. data/lib/csv-import-analyzer/helpers/string_class_extensions.rb +9 -3
  15. data/lib/csv-import-analyzer/query_builder/mysql_query_helper.rb +2 -2
  16. data/lib/csv-import-analyzer/query_builder/pg_query_helper.rb +1 -2
  17. data/lib/csv-import-analyzer/query_builder/query_helper.rb +2 -2
  18. data/lib/csv-import-analyzer/sql_query_builder.rb +27 -12
  19. data/lib/csv-import-analyzer/version.rb +1 -1
  20. data/spec/csv-import-analyzer/analyzer/csv_check_bounds_spec.rb +8 -8
  21. data/spec/csv-import-analyzer/analyzer/delimiter_identifier_spec.rb +13 -13
  22. data/spec/csv-import-analyzer/csv_sanitizer_spec.rb +10 -7
  23. data/spec/csv-import-analyzer/helpers/common_functions_spec.rb +20 -19
  24. data/spec/csv-import-analyzer/helpers/datatype_validation_spec.rb +28 -28
  25. data/spec/csv-import-analyzer/helpers/string_class_extension_spec.rb +6 -6
  26. data/spec/csv-import-analyzer/query_builder/mysql_query_helper_spec.rb +13 -13
  27. data/spec/csv-import-analyzer/query_builder/pg_query_helper_spec.rb +16 -16
  28. data/spec/csv-import-analyzer_spec.rb +3 -6
  29. data/spec/fixtures/sample.csv +2 -2
  30. data/spec/spec_helper.rb +3 -0
  31. metadata +17 -6
  32. data/lib/csv-import-analyzer/sampleTab.csv +0 -5
  33. data/samples/metadata_output.json +0 -70
  34. data/spec/csv-import-analyzer/csv_datatype_analysis_spec.rb +0 -1
@@ -1,24 +1,27 @@
1
1
  # require 'spec_helper'
2
2
 
3
- require 'pry'
4
3
  describe CsvImportAnalyzer::CsvSanitizer do
4
+ # May be I should really use subject here
5
+ # why?
5
6
  let (:csv_sanitizer) { CsvImportAnalyzer::CsvSanitizer.new }
6
- it 'should handle file not found issue' do
7
+ it "handles file not found issue - when given a invalid file" do
7
8
  expect(csv_sanitizer.process("sample.csv", options = {})).to be_instance_of(FileNotFound)
8
9
  end
9
- #Testing private methods - Although one should really have to test private methods, it's
10
- context 'testing private methods' do
10
+
11
+ # Testing private methods - Although one shouldn't really have to test private methods
12
+ # Testing here to make sure the private methods are doing what they are supposed to
13
+ context "testing private methods" do
11
14
  let (:test) {"\"t1\", 't2', \"t3\""}
12
15
  let (:res) {"\"t1\", \"t2\", \"t3\""}
13
- xit 'should replace single quotes to double' do
16
+ xit "replaces single quotes to double quotes" do
14
17
  binding.pry
15
18
  expect(csv_sanitizer.send(:replace_line_single_quotes, test, ",")).to eq(res)
16
19
  end
17
20
  let (:test) {["t1","t2","",nil,"t3"]}
18
21
  let (:res) {["t1","t2","NULL","NULL","t3"]}
19
- it 'should replace null values' do
22
+ it "replaces nil or empty values to NULL" do
20
23
  expect(csv_sanitizer.send(:replace_null_values, test)).to eq(res)
21
24
  end
22
25
  end
23
26
 
24
- end
27
+ end
@@ -3,29 +3,30 @@
3
3
  class DummyClass
4
4
  end
5
5
 
6
- describe 'null_like?' do
7
-
8
- before(:each) do
9
- @dummy_class = DummyClass.new
10
- @dummy_class.extend(CsvImportAnalyzer::Helper)
11
- end
12
-
13
- context 'when called on null like objects' do
14
- it 'returns NULL as null type' do
15
- expect(@dummy_class.null_like?('NULL')).to eq(true)
6
+ describe CsvImportAnalyzer::Helper do
7
+ describe "#null_like?" do
8
+ before(:each) do
9
+ @dummy_class = DummyClass.new
10
+ @dummy_class.extend(CsvImportAnalyzer::Helper)
16
11
  end
17
12
 
18
- it 'returns \\N as null type' do
19
- expect(@dummy_class.null_like?('\N')).to eq(true)
20
- end
21
- end
13
+ context "when called on null like objects" do
14
+ it "returns NULL as null type" do
15
+ expect(@dummy_class.null_like?("NULL")).to eq(true)
16
+ end
22
17
 
23
- context 'when called on non-null objects' do
24
- it 'returns hello as not null' do
25
- expect(@dummy_class.null_like?('Hello')).to eq(false)
18
+ it "returns \\N as null type" do
19
+ expect(@dummy_class.null_like?('\N')).to eq(true)
20
+ end
26
21
  end
27
- it 'returns Fixnum(3) as not null' do
28
- expect(@dummy_class.null_like?(3)).to eq(false)
22
+
23
+ context "when called on non-null objects" do
24
+ it "returns hello as not null" do
25
+ expect(@dummy_class.null_like?("Hello")).to eq(false)
26
+ end
27
+ it "returns Fixnum(3) as not null" do
28
+ expect(@dummy_class.null_like?(3)).to eq(false)
29
+ end
29
30
  end
30
31
  end
31
32
  end
@@ -3,70 +3,70 @@ require 'date'
3
3
  class DummyClass
4
4
  end
5
5
 
6
- describe '#validate_field' do
6
+ describe "#validate_field" do
7
7
 
8
8
  before(:each) do
9
9
  @dummy_class = DummyClass.new
10
10
  @dummy_class.extend(CsvImportAnalyzer::DatatypeValidator)
11
11
  end
12
12
 
13
- context 'knows what an integer looks like' do
13
+ context "knows what an integer looks like" do
14
14
 
15
- it 'returns Fixnum type as integer' do
15
+ it "returns Fixnum type as integer" do
16
16
  expect(@dummy_class.validate_field(10)).to eq("int")
17
17
  end
18
- it 'returns Fixnum type with spaces as integer' do
19
- expect(@dummy_class.validate_field(' 10 ')).to eq("int")
18
+ it "returns Fixnum type with spaces as integer" do
19
+ expect(@dummy_class.validate_field(" 10 ")).to eq("int")
20
20
  end
21
- it 'returns Fixnum type with comma as integer' do
22
- expect(@dummy_class.validate_field('1,000')).to eq("int")
21
+ it "returns Fixnum type with comma as integer" do
22
+ expect(@dummy_class.validate_field("1,000")).to eq("int")
23
23
  end
24
- it 'returns Fixnum type negative number as integer' do
24
+ it "returns Fixnum type negative number as integer" do
25
25
  expect(@dummy_class.validate_field(-3)).to eq("int")
26
26
  end
27
27
 
28
28
  end
29
29
 
30
- context 'knows what an Float looks like' do
30
+ context "knows what an Float looks like" do
31
31
 
32
- it 'returns Float type as float' do
32
+ it "returns Float type as float" do
33
33
  expect(@dummy_class.validate_field(10.0)).to eq("float")
34
34
  end
35
- it 'returns Float type with spaces as float' do
36
- expect(@dummy_class.validate_field(' 10.01 ')).to eq("float")
35
+ it "returns Float type with spaces as float" do
36
+ expect(@dummy_class.validate_field(" 10.01 ")).to eq("float")
37
37
  end
38
- it 'returns Float type with comma as float' do
39
- expect(@dummy_class.validate_field('1,000.01')).to eq("float")
38
+ it "returns Float type with comma as float" do
39
+ expect(@dummy_class.validate_field("1,000.01")).to eq("float")
40
40
  end
41
- it 'returns Float type negative number as float' do
41
+ it "returns Float type negative number as float" do
42
42
  expect(@dummy_class.validate_field(-3.3)).to eq("float")
43
43
  end
44
44
 
45
45
  end
46
46
 
47
- context 'it knows what a date looks like' do
48
- it 'return true for a valid date type - dd/mm/yyyy' do
49
- expect(@dummy_class.validate_field('31/12/2014')).to eq("date")
47
+ context "it knows what a date looks like" do
48
+ it "return true for a valid date type - dd/mm/yyyy" do
49
+ expect(@dummy_class.validate_field("31/12/2014")).to eq("date")
50
50
  end
51
- it 'return true for a valid date type - mm/dd/yyyy' do
52
- expect(@dummy_class.validate_field('12/31/2014')).to eq("date")
51
+ it "return true for a valid date type - mm/dd/yyyy" do
52
+ expect(@dummy_class.validate_field("12/31/2014")).to eq("date")
53
53
  end
54
- it 'return true for a valid date type - mm-dd-yyyy' do
55
- expect(@dummy_class.validate_field('12-31-2014')).to eq("date")
54
+ it "return true for a valid date type - mm-dd-yyyy" do
55
+ expect(@dummy_class.validate_field("12-31-2014")).to eq("date")
56
56
  end
57
- it 'return true for a valid date type - mm dd yyyy' do
58
- expect(@dummy_class.validate_field('12 31 2014')).to eq("date")
57
+ it "return true for a valid date type - mm dd yyyy" do
58
+ expect(@dummy_class.validate_field("12 31 2014")).to eq("date")
59
59
  end
60
60
  end
61
61
 
62
- context 'it knows what a String looks like' do
63
- it 'default to String type' do
62
+ context "it knows what a String looks like" do
63
+ it "default to String type" do
64
64
  expect(@dummy_class.validate_field("100 testingNow:)")).to eq("string")
65
65
  end
66
- it 'returns String type as string' do
66
+ it "returns String type as string" do
67
67
  expect(@dummy_class.validate_field("Hello")).to eq("string")
68
68
  end
69
- it 'returns String type of dates as string' do
69
+ it "returns String type of dates as string" do
70
70
  expect(@dummy_class.validate_field("12 31 2014312")).to eq("string")
71
71
  expect(@dummy_class.validate_field("12-31-2014312")).to eq("string")
72
72
  expect(@dummy_class.validate_field("12/31/2014312")).to eq("string")
@@ -1,17 +1,17 @@
1
1
  # require 'spec_helper'
2
2
 
3
- describe 'substr_count' do
4
- context 'different possible delimiters' do
5
- it 'returns count of commas as delimiter in a string' do
3
+ describe "substr_count" do
4
+ context "different possible delimiters" do
5
+ it "returns count of commas as delimiter in a string" do
6
6
  expect("hello, hi, how, are you?".substr_count(",")).to eq(3)
7
7
  end
8
- it 'returns count of semi-colons as delimiter in a string' do
8
+ it "returns count of semi-colons as delimiter in a string" do
9
9
  expect("hello; hi, how, are you?".substr_count(";")).to eq(1)
10
10
  end
11
- it 'returns count of pipe as delimiter in a string' do
11
+ it "returns count of pipe as delimiter in a string" do
12
12
  expect("hello, hi| how| are you?".substr_count("|")).to eq(2)
13
13
  end
14
- it 'returns count of tab as delimiter in a string' do
14
+ it "returns count of tab as delimiter in a string" do
15
15
  expect("hello\thi\thow| are you?".substr_count("\t")).to eq(2)
16
16
  end
17
17
  end
@@ -1,53 +1,53 @@
1
1
  # require 'spec_helper'
2
2
  class DummyClass
3
3
  end
4
- describe '#form_query_for_datatype' do
4
+ describe "#form_query_for_datatype" do
5
5
  before(:each) do
6
6
  @dummy_class = DummyClass.new
7
7
  @dummy_class.extend(CsvImportAnalyzer::MysqlQueryHelper)
8
8
  end
9
- context 'expected arguments are not set' do
9
+ context "when expected arguments are not set" do
10
10
  let(:args) {Hash[:header => :test]}
11
11
  let(:args1) {Hash[:datatype, :test]}
12
- it ' returns missing arguments error' do
12
+ it "returns missing arguments error" do
13
13
  expect(@dummy_class.form_query_for_datatype(args)).to be_instance_of(MissingRequiredArguments)
14
14
  end
15
- it 'returns invalid if set to nil' do
15
+ it "returns invalid if set to nil" do
16
16
  expect(@dummy_class.form_query_for_datatype(args1)).to be_instance_of(MissingRequiredArguments)
17
17
  end
18
18
  end
19
19
 
20
- context 'expected arguments are set' do
20
+ context "when expected arguments are set" do
21
21
  let(:args) {Hash[:header => :test, :datatype => :string]}
22
22
  let(:args1) {Hash[:header => :test, :datatype => :integer]}
23
- it 'returns expected sql query for string' do
23
+ it "returns expected sql query for string" do
24
24
  expect(@dummy_class.form_query_for_datatype(args)).to eq("test varchar(255)")
25
25
  end
26
- it 'returns expected sql query for numeric' do
26
+ it "returns expected sql query for numeric" do
27
27
  expect(@dummy_class.form_query_for_datatype(args1)).to eq("test integer")
28
28
  end
29
29
  end
30
30
 
31
31
  end
32
- describe '#import_csv' do
32
+ describe "#import_csv" do
33
33
  before(:each) do
34
34
  @dummy_class = DummyClass.new
35
35
  @dummy_class.extend(CsvImportAnalyzer::MysqlQueryHelper)
36
36
  end
37
- context 'expected arguments are not set' do
37
+ context "when expected arguments are not set" do
38
38
  let(:args) {Hash[:tablename => "test", :delimiter => ","]}
39
39
  let(:args1) {Hash[:filename => "test"]}
40
- it ' return SqlQueryErrror' do
40
+ it "returns SqlQueryErrror" do
41
41
  expect(@dummy_class.import_csv(args)).to be_instance_of(MissingRequiredArguments)
42
42
  end
43
- it 'should return SqlQueryErrror' do
43
+ it "returns SqlQueryErrror" do
44
44
  expect(@dummy_class.import_csv(args1)).to be_instance_of(MissingRequiredArguments)
45
45
  end
46
46
  end
47
47
 
48
- context 'expected arguments are set' do
48
+ context "when expected arguments are set" do
49
49
  let(:args) {Hash[:tablename => "test", :delimiter => ",", :filename => "sample.csv"]}
50
- it 'returns expected import query' do
50
+ it "returns expected import query" do
51
51
  expect(@dummy_class.import_csv(args)).to eq("LOAD DATA INFILE sample.csv INTO TABLE test FIELDS TERMINATED BY ',' ENCLOSED BY '\"' LINES TERMINATED BY '\\n' IGNORE 1 LINES;")
52
52
  end
53
53
  end
@@ -1,55 +1,55 @@
1
1
  # require 'spec_helper'
2
2
  class DummyClass
3
3
  end
4
- describe '#form_query_for_datatype' do
4
+ describe "#form_query_for_datatype" do
5
5
  before(:each) do
6
+ # Creating a dummy object to test the modules.
7
+ # Extending with dummy object by adding module methods to it using extend
6
8
  @dummy_class = DummyClass.new
7
9
  @dummy_class.extend(CsvImportAnalyzer::PgQueryHelper)
8
10
  end
9
- context 'expected arguments are not set' do
11
+ context "when expected arguments are not set" do
10
12
  let(:args) {Hash[:header => :test]}
11
13
  let(:args1) {Hash[:datatype, :test]}
12
- it ' returns missing arguments error' do
14
+ it "returns missing arguments error" do
13
15
  expect(@dummy_class.form_query_for_datatype(args)).to be_instance_of(MissingRequiredArguments)
14
16
  end
15
- it 'returns invalid if set to nil' do
17
+ it "returns invalid if set to nil" do
16
18
  expect(@dummy_class.form_query_for_datatype(args1)).to be_instance_of(MissingRequiredArguments)
17
19
  end
18
20
  end
19
-
20
- context 'expected arguments are set' do
21
+ context "when expected arguments are set" do
21
22
  let(:args) {Hash[:header => :test, :datatype => :string]}
22
23
  let(:args1) {Hash[:header => :test, :datatype => :integer]}
23
- it 'returns expected sql query for string' do
24
+ it "returns expected sql query for string" do
24
25
  expect(@dummy_class.form_query_for_datatype(args)).to eq("test varchar(255)")
25
26
  end
26
- it 'returns expected sql query for numeric' do
27
+ it "returns expected sql query for numeric" do
27
28
  expect(@dummy_class.form_query_for_datatype(args1)).to eq("test integer")
28
29
  end
29
30
  end
30
-
31
31
  end
32
- describe '#import_csv' do
32
+ describe "#import_csv" do
33
33
  before(:each) do
34
34
  @dummy_class = DummyClass.new
35
35
  @dummy_class.extend(CsvImportAnalyzer::PgQueryHelper)
36
36
  end
37
- context 'expected arguments are not set' do
37
+ context "when expected arguments are not set" do
38
38
  let(:args) {Hash[:tablename => "test", :delimiter => ","]}
39
39
  let(:args1) {Hash[:filename => "test"]}
40
- it ' return SqlQueryErrror' do
40
+ it "returns SqlQueryErrror" do
41
41
  expect(@dummy_class.import_csv(args)).to be_instance_of(MissingRequiredArguments)
42
42
  end
43
- it 'should return SqlQueryErrror' do
43
+ it "returns SqlQueryErrror" do
44
44
  expect(@dummy_class.import_csv(args1)).to be_instance_of(MissingRequiredArguments)
45
45
  end
46
46
  end
47
47
 
48
- context 'expected arguments are set' do
48
+ context "when expected arguments are set" do
49
49
  let(:args) {Hash[:tablename => "test", :delimiter => ",", :filename => "filename"]}
50
- it 'returns expected import query' do
50
+ it "returns expected import query" do
51
51
  expect(@dummy_class.import_csv(args)).to eq("COPY test FROM 'filename' HEADER DELIMITER ',' CSV NULL AS 'NULL';")
52
52
  end
53
53
  end
54
54
 
55
- end
55
+ end
@@ -1,14 +1,11 @@
1
1
  # require 'spec_helper'
2
2
 
3
- # CsvImportAnalyzer.process("sampleTab.csv", {:metadata_output => true})
4
-
5
-
6
3
  describe CsvImportAnalyzer do
7
4
  include CsvImportAnalyzer
8
- it 'should return invalid file as file not found' do
5
+ it "return invalid file as file not found" do
9
6
  expect(CsvImportAnalyzer.process("sample.csv")).to be_instance_of(FileNotFound)
10
7
  end
11
- it 'should be able to process a valid file' do
8
+ it "processes a valid file" do
12
9
  expect(CsvImportAnalyzer.process($sample_csv_path)).not_to be_instance_of(FileNotFound)
13
10
  end
14
- end
11
+ end
@@ -1,5 +1,5 @@
1
1
  Year ID,Make ID,Model ID,Description ID,Price ID
2
- 1997,Ford,,"ac, abs, moon","3000"
3
- 1999,Chevy,"Venture ""Extended Edition""","",4900.00
2
+ 1997,Ford,,"ac, abs, moon","3000
3
+ 1999,Chevy,"Venture ""Extended Edition""",",4900.00
4
4
  1999,'Chevy',"Venture ""Extended Edition, Very Large""",,5000.00
5
5
  1996,Jeep,Grand Che'rokee,"MUST SELL!air, moon roof, loaded",4799.00
data/spec/spec_helper.rb CHANGED
@@ -1,6 +1,9 @@
1
1
  require 'simplecov'
2
+ # Must be ahead of requiring core library to start the code coverage.
2
3
  SimpleCov.start
3
4
  require 'csv-import-analyzer'
5
+
6
+ # Path to sample files in the fixtures folder that are made available to all specs for testing
4
7
  $sample_csv_path = "/home/avinash/Desktop/csv-import-analyzer/spec/fixtures/sample.csv"
5
8
  $sample_ssv_path = "/home/avinash/Desktop/csv-import-analyzer/spec/fixtures/semicolon-sample.csv"
6
9
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-import-analyzer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Avinash Vallabhaneni
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-09 00:00:00.000000000 Z
11
+ date: 2014-10-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -85,6 +85,9 @@ dependencies:
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1.0'
90
+ - - ">="
88
91
  - !ruby/object:Gem::Version
89
92
  version: 1.0.17
90
93
  type: :runtime
@@ -92,6 +95,9 @@ dependencies:
92
95
  version_requirements: !ruby/object:Gem::Requirement
93
96
  requirements:
94
97
  - - "~>"
98
+ - !ruby/object:Gem::Version
99
+ version: '1.0'
100
+ - - ">="
95
101
  - !ruby/object:Gem::Version
96
102
  version: 1.0.17
97
103
  description: Santize large csv files and help in predicting datatypes including min
@@ -123,15 +129,21 @@ files:
123
129
  - lib/csv-import-analyzer/query_builder/mysql_query_helper.rb
124
130
  - lib/csv-import-analyzer/query_builder/pg_query_helper.rb
125
131
  - lib/csv-import-analyzer/query_builder/query_helper.rb
126
- - lib/csv-import-analyzer/sampleTab.csv
127
132
  - lib/csv-import-analyzer/sql_query_builder.rb
128
133
  - lib/csv-import-analyzer/version.rb
129
- - samples/metadata_output.json
134
+ - samples/27_sweep_net_arthropods_1.csv
135
+ - samples/584_ccs_survey_data_2010.csv
136
+ - samples/591_fluxtower_data_corrected_2012.csv
137
+ - samples/5_photos_1.csv
138
+ - samples/80_water_chemistry_1.csv
139
+ - samples/86_dbg_irrigation_rates_1.csv
140
+ - samples/AllstarFull.csv
141
+ - samples/LDP_TenMin.dat
130
142
  - samples/sampleTab.csv
143
+ - samples/sampleTab.tsv
131
144
  - spec/csv-import-analyzer/analyzer/csv_check_bounds_spec.rb
132
145
  - spec/csv-import-analyzer/analyzer/delimiter_identifier_spec.rb
133
146
  - spec/csv-import-analyzer/analyzer/file_type_assertion_spec.rb
134
- - spec/csv-import-analyzer/csv_datatype_analysis_spec.rb
135
147
  - spec/csv-import-analyzer/csv_sanitizer_spec.rb
136
148
  - spec/csv-import-analyzer/export/metadata_analysis_spec.rb
137
149
  - spec/csv-import-analyzer/helpers/common_functions_spec.rb
@@ -176,7 +188,6 @@ test_files:
176
188
  - spec/csv-import-analyzer/analyzer/csv_check_bounds_spec.rb
177
189
  - spec/csv-import-analyzer/analyzer/delimiter_identifier_spec.rb
178
190
  - spec/csv-import-analyzer/analyzer/file_type_assertion_spec.rb
179
- - spec/csv-import-analyzer/csv_datatype_analysis_spec.rb
180
191
  - spec/csv-import-analyzer/csv_sanitizer_spec.rb
181
192
  - spec/csv-import-analyzer/export/metadata_analysis_spec.rb
182
193
  - spec/csv-import-analyzer/helpers/common_functions_spec.rb