csv-import-analyzer 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/CsvImportAnalyzer +59 -0
- data/csv-import-analyzer.gemspec +1 -1
- data/lib/csv-import-analyzer.rb +1 -0
- data/lib/csv-import-analyzer/csv_sanitizer.rb +1 -1
- data/lib/csv-import-analyzer/version.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7ff8951a5c2ea050bfbbebf39617d1d735a4f6ea
|
4
|
+
data.tar.gz: f00f91d6123a435c54a5585a7a3d014cae46c67b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 474adc7c9f7e0946c2c86a6c14395569c9574fd4865bc907f43614df39386336959f3cc1b3b71e51a7fa6d2844e8426bf8be22475345a5bceb153cf7473662e5
|
7
|
+
data.tar.gz: b87c638ac9a94fee9fcc357566c50517db1dd30e3bf3c3d158eab05d128d4e17f2965f1a1a37472e7846c807dd4030805e4ac3ea8f6bf8c543327ab02142d53a
|
@@ -0,0 +1,59 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'optparse'
|
3
|
+
require 'csv-import-analyzer'
|
4
|
+
|
5
|
+
# Default options for the executable
|
6
|
+
options = {:input => nil, :metadata_output => nil, :processed_input => nil, :unique => 5,
|
7
|
+
:chunk => 200, :database => [:mysql], :quote_convert => true, :replace_nulls => true, :check_bounds => true}
|
8
|
+
|
9
|
+
# Parse the options using optparse
|
10
|
+
# prases the input given through command line and set to the respective option
|
11
|
+
# E.g. CsvImportAnalyzer -i "test.csv"
|
12
|
+
# ==> options[:input] = "test.csv"
|
13
|
+
parser = OptionParser.new do |opts|
|
14
|
+
opts.banner = "Usage: CsvImportAnalyzer [options]"
|
15
|
+
|
16
|
+
opts.on('-i', '--input filename', 'Input file name') do |input|
|
17
|
+
options[:input] = input # todo: be able to handle files not in the current directory
|
18
|
+
end
|
19
|
+
opts.on('-m', '--output-structure filename', 'Output the metadata of file') do |metadata_output|
|
20
|
+
options[:metadata_output] = metadata_output
|
21
|
+
end
|
22
|
+
opts.on('-o', '--output-cleaned filename', 'Output the cleaned csv file name, defaults to current driectory proccessed_(filename).csv ') do |processed_input|
|
23
|
+
options[:processed_input] = processed_input
|
24
|
+
end
|
25
|
+
opts.on('-u', '--unique unique', 'No of Unique values you need, default: 10') do |unique|
|
26
|
+
options[:unique] = unique
|
27
|
+
end
|
28
|
+
opts.on('-c', '--chunk size', 'Chunk size for predecting datatypes, default: 200') do |chunk|
|
29
|
+
options[:chunk] = chunk
|
30
|
+
end
|
31
|
+
# opts.on('-s', '--skip lines', 'skip the number of lines at the top, default: 0') do |skip|
|
32
|
+
# options[:skip] = skip
|
33
|
+
# end
|
34
|
+
opts.on('-d', '--database type', 'MySQL or Postgres, Options: M or P, default: nil(print nothing)') do |database_type|
|
35
|
+
options[:database] = [database_type.upcase]
|
36
|
+
end
|
37
|
+
opts.on('-q', '--quotes conversion', 'Convert single quotes to double quotes, options: true or false, default: true') do |quote_convert|
|
38
|
+
options[:quote_convert] = quote_convert.upcase
|
39
|
+
end
|
40
|
+
opts.on('-r', '--replace nulls', 'replace empty, Null\'s, \N, NAN with NULL, options: true or false, default: true') do |replace_nulls|
|
41
|
+
options[:replace_nulls] = replace_nulls.upcase
|
42
|
+
end
|
43
|
+
opts.on('-h', '--help', 'Displays Help') do
|
44
|
+
puts opts
|
45
|
+
exit
|
46
|
+
end
|
47
|
+
end
|
48
|
+
parser.parse!
|
49
|
+
|
50
|
+
# Input validations
|
51
|
+
# Make sure a filename is given to the executable
|
52
|
+
filename = nil
|
53
|
+
if options[:input] == nil
|
54
|
+
print " Requires a valid input file name! \n"
|
55
|
+
puts parser
|
56
|
+
exit
|
57
|
+
end
|
58
|
+
|
59
|
+
puts CsvImportAnalyzer.process(options[:input], options)
|
data/csv-import-analyzer.gemspec
CHANGED
@@ -23,6 +23,6 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.add_development_dependency "pry", "~> 0.10"
|
24
24
|
spec.add_development_dependency "rspec", "~> 3.0"
|
25
25
|
spec.add_development_dependency "simplecov", "~> 0.9"
|
26
|
-
|
26
|
+
|
27
27
|
spec.add_runtime_dependency "smarter_csv", "~> 1.0", ">= 1.0.17"
|
28
28
|
end
|
data/lib/csv-import-analyzer.rb
CHANGED
@@ -11,6 +11,7 @@ module CsvImportAnalyzer
|
|
11
11
|
# returns FileNotFound if given file is invalid
|
12
12
|
###
|
13
13
|
def process(filename, options = {})
|
14
|
+
return ArgumentError.new("A valid file needed to process") if filename.nil?
|
14
15
|
if File::exist?(filename)
|
15
16
|
CsvImportAnalyzer::CsvSanitizer.new().process(File.absolute_path(filename), options)
|
16
17
|
else
|
@@ -69,7 +69,7 @@ module CsvImportAnalyzer
|
|
69
69
|
{
|
70
70
|
:metadata_output => nil, # To be set if metadata needs to be printed to a file
|
71
71
|
:processed_input => nil, # To be set if processed input is needed
|
72
|
-
:unique =>
|
72
|
+
:unique => 5, # Threshold for number of defaults values that needs to identified
|
73
73
|
:check_bounds => true, # Option to check for min - max bounds for each column [true => find the bounds]
|
74
74
|
:datatype_analysis => 200, # Number of rows to be sampled for datatype analysis
|
75
75
|
:chunk => 200, # Chunk size (no of rows) that needs to processed in-memory [Important not to load entire file into memory]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-import-analyzer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Avinash Vallabhaneni
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-11-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -104,7 +104,8 @@ description: Santize large csv files and help in predicting datatypes including
|
|
104
104
|
max values for easy import to SQL
|
105
105
|
email:
|
106
106
|
- avinash.vallab@gmail.com
|
107
|
-
executables:
|
107
|
+
executables:
|
108
|
+
- CsvImportAnalyzer
|
108
109
|
extensions: []
|
109
110
|
extra_rdoc_files: []
|
110
111
|
files:
|
@@ -114,6 +115,7 @@ files:
|
|
114
115
|
- LICENSE.txt
|
115
116
|
- README.md
|
116
117
|
- Rakefile
|
118
|
+
- bin/CsvImportAnalyzer
|
117
119
|
- csv-import-analyzer.gemspec
|
118
120
|
- lib/csv-import-analyzer.rb
|
119
121
|
- lib/csv-import-analyzer/analyzer/csv_check_bounds.rb
|