csv-import-analyzer 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 40f1ef2bfdbd829eaa64dfa88f360ee722d60228
4
- data.tar.gz: c0beeb4085de093f7d41b79e3688cfe36a2b6811
3
+ metadata.gz: 7ff8951a5c2ea050bfbbebf39617d1d735a4f6ea
4
+ data.tar.gz: f00f91d6123a435c54a5585a7a3d014cae46c67b
5
5
  SHA512:
6
- metadata.gz: 94a4839a40f22301b36776b6155855f5bc49a162f3f6c3d9706c46b1105ecf4598c65969d76bc4fade7f1a2d9250fc32fe4fb3ce121657f18fd661bf71cf9ca5
7
- data.tar.gz: e359dd8f516b2a96d3f799477975c2006639229ee40a9a9cccf18cca8893e16217c6f7ef1349df4ddd448dbbb7d2964c57854a5d7094808ab81b14fdebe8a4cf
6
+ metadata.gz: 474adc7c9f7e0946c2c86a6c14395569c9574fd4865bc907f43614df39386336959f3cc1b3b71e51a7fa6d2844e8426bf8be22475345a5bceb153cf7473662e5
7
+ data.tar.gz: b87c638ac9a94fee9fcc357566c50517db1dd30e3bf3c3d158eab05d128d4e17f2965f1a1a37472e7846c807dd4030805e4ac3ea8f6bf8c543327ab02142d53a
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env ruby
2
+ require 'optparse'
3
+ require 'csv-import-analyzer'
4
+
5
+ # Default options for the executable
6
+ options = {:input => nil, :metadata_output => nil, :processed_input => nil, :unique => 5,
7
+ :chunk => 200, :database => [:mysql], :quote_convert => true, :replace_nulls => true, :check_bounds => true}
8
+
9
+ # Parse the options using optparse
10
+ # prases the input given through command line and set to the respective option
11
+ # E.g. CsvImportAnalyzer -i "test.csv"
12
+ # ==> options[:input] = "test.csv"
13
+ parser = OptionParser.new do |opts|
14
+ opts.banner = "Usage: CsvImportAnalyzer [options]"
15
+
16
+ opts.on('-i', '--input filename', 'Input file name') do |input|
17
+ options[:input] = input # todo: be able to handle files not in the current directory
18
+ end
19
+ opts.on('-m', '--output-structure filename', 'Output the metadata of file') do |metadata_output|
20
+ options[:metadata_output] = metadata_output
21
+ end
22
+ opts.on('-o', '--output-cleaned filename', 'Output the cleaned csv file name, defaults to current driectory proccessed_(filename).csv ') do |processed_input|
23
+ options[:processed_input] = processed_input
24
+ end
25
+ opts.on('-u', '--unique unique', 'No of Unique values you need, default: 10') do |unique|
26
+ options[:unique] = unique
27
+ end
28
+ opts.on('-c', '--chunk size', 'Chunk size for predecting datatypes, default: 200') do |chunk|
29
+ options[:chunk] = chunk
30
+ end
31
+ # opts.on('-s', '--skip lines', 'skip the number of lines at the top, default: 0') do |skip|
32
+ # options[:skip] = skip
33
+ # end
34
+ opts.on('-d', '--database type', 'MySQL or Postgres, Options: M or P, default: nil(print nothing)') do |database_type|
35
+ options[:database] = [database_type.upcase]
36
+ end
37
+ opts.on('-q', '--quotes conversion', 'Convert single quotes to double quotes, options: true or false, default: true') do |quote_convert|
38
+ options[:quote_convert] = quote_convert.upcase
39
+ end
40
+ opts.on('-r', '--replace nulls', 'replace empty, Null\'s, \N, NAN with NULL, options: true or false, default: true') do |replace_nulls|
41
+ options[:replace_nulls] = replace_nulls.upcase
42
+ end
43
+ opts.on('-h', '--help', 'Displays Help') do
44
+ puts opts
45
+ exit
46
+ end
47
+ end
48
+ parser.parse!
49
+
50
+ # Input validations
51
+ # Make sure a filename is given to the executable
52
+ filename = nil
53
+ if options[:input] == nil
54
+ print " Requires a valid input file name! \n"
55
+ puts parser
56
+ exit
57
+ end
58
+
59
+ puts CsvImportAnalyzer.process(options[:input], options)
@@ -23,6 +23,6 @@ Gem::Specification.new do |spec|
23
23
  spec.add_development_dependency "pry", "~> 0.10"
24
24
  spec.add_development_dependency "rspec", "~> 3.0"
25
25
  spec.add_development_dependency "simplecov", "~> 0.9"
26
-
26
+
27
27
  spec.add_runtime_dependency "smarter_csv", "~> 1.0", ">= 1.0.17"
28
28
  end
@@ -11,6 +11,7 @@ module CsvImportAnalyzer
11
11
  # returns FileNotFound if given file is invalid
12
12
  ###
13
13
  def process(filename, options = {})
14
+ return ArgumentError.new("A valid file needed to process") if filename.nil?
14
15
  if File::exist?(filename)
15
16
  CsvImportAnalyzer::CsvSanitizer.new().process(File.absolute_path(filename), options)
16
17
  else
@@ -69,7 +69,7 @@ module CsvImportAnalyzer
69
69
  {
70
70
  :metadata_output => nil, # To be set if metadata needs to be printed to a file
71
71
  :processed_input => nil, # To be set if processed input is needed
72
- :unique => 2, # Threshold for number of defaults values that needs to identified
72
+ :unique => 5, # Threshold for number of defaults values that needs to identified
73
73
  :check_bounds => true, # Option to check for min - max bounds for each column [true => find the bounds]
74
74
  :datatype_analysis => 200, # Number of rows to be sampled for datatype analysis
75
75
  :chunk => 200, # Chunk size (no of rows) that needs to processed in-memory [Important not to load entire file into memory]
@@ -1,5 +1,5 @@
1
1
  module CsvImportAnalyzer
2
2
  module Version
3
- VERSION = "0.0.6"
3
+ VERSION = "0.0.7"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-import-analyzer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Avinash Vallabhaneni
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-30 00:00:00.000000000 Z
11
+ date: 2014-11-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -104,7 +104,8 @@ description: Santize large csv files and help in predicting datatypes including
104
104
  max values for easy import to SQL
105
105
  email:
106
106
  - avinash.vallab@gmail.com
107
- executables: []
107
+ executables:
108
+ - CsvImportAnalyzer
108
109
  extensions: []
109
110
  extra_rdoc_files: []
110
111
  files:
@@ -114,6 +115,7 @@ files:
114
115
  - LICENSE.txt
115
116
  - README.md
116
117
  - Rakefile
118
+ - bin/CsvImportAnalyzer
117
119
  - csv-import-analyzer.gemspec
118
120
  - lib/csv-import-analyzer.rb
119
121
  - lib/csv-import-analyzer/analyzer/csv_check_bounds.rb