csv-import-analyzer 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 40f1ef2bfdbd829eaa64dfa88f360ee722d60228
4
- data.tar.gz: c0beeb4085de093f7d41b79e3688cfe36a2b6811
3
+ metadata.gz: 7ff8951a5c2ea050bfbbebf39617d1d735a4f6ea
4
+ data.tar.gz: f00f91d6123a435c54a5585a7a3d014cae46c67b
5
5
  SHA512:
6
- metadata.gz: 94a4839a40f22301b36776b6155855f5bc49a162f3f6c3d9706c46b1105ecf4598c65969d76bc4fade7f1a2d9250fc32fe4fb3ce121657f18fd661bf71cf9ca5
7
- data.tar.gz: e359dd8f516b2a96d3f799477975c2006639229ee40a9a9cccf18cca8893e16217c6f7ef1349df4ddd448dbbb7d2964c57854a5d7094808ab81b14fdebe8a4cf
6
+ metadata.gz: 474adc7c9f7e0946c2c86a6c14395569c9574fd4865bc907f43614df39386336959f3cc1b3b71e51a7fa6d2844e8426bf8be22475345a5bceb153cf7473662e5
7
+ data.tar.gz: b87c638ac9a94fee9fcc357566c50517db1dd30e3bf3c3d158eab05d128d4e17f2965f1a1a37472e7846c807dd4030805e4ac3ea8f6bf8c543327ab02142d53a
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env ruby
2
+ require 'optparse'
3
+ require 'csv-import-analyzer'
4
+
5
+ # Default options for the executable
6
+ options = {:input => nil, :metadata_output => nil, :processed_input => nil, :unique => 5,
7
+ :chunk => 200, :database => [:mysql], :quote_convert => true, :replace_nulls => true, :check_bounds => true}
8
+
9
+ # Parse the options using optparse
10
+ # prases the input given through command line and set to the respective option
11
+ # E.g. CsvImportAnalyzer -i "test.csv"
12
+ # ==> options[:input] = "test.csv"
13
+ parser = OptionParser.new do |opts|
14
+ opts.banner = "Usage: CsvImportAnalyzer [options]"
15
+
16
+ opts.on('-i', '--input filename', 'Input file name') do |input|
17
+ options[:input] = input # todo: be able to handle files not in the current directory
18
+ end
19
+ opts.on('-m', '--output-structure filename', 'Output the metadata of file') do |metadata_output|
20
+ options[:metadata_output] = metadata_output
21
+ end
22
+ opts.on('-o', '--output-cleaned filename', 'Output the cleaned csv file name, defaults to current driectory proccessed_(filename).csv ') do |processed_input|
23
+ options[:processed_input] = processed_input
24
+ end
25
+ opts.on('-u', '--unique unique', 'No of Unique values you need, default: 10') do |unique|
26
+ options[:unique] = unique
27
+ end
28
+ opts.on('-c', '--chunk size', 'Chunk size for predecting datatypes, default: 200') do |chunk|
29
+ options[:chunk] = chunk
30
+ end
31
+ # opts.on('-s', '--skip lines', 'skip the number of lines at the top, default: 0') do |skip|
32
+ # options[:skip] = skip
33
+ # end
34
+ opts.on('-d', '--database type', 'MySQL or Postgres, Options: M or P, default: nil(print nothing)') do |database_type|
35
+ options[:database] = [database_type.upcase]
36
+ end
37
+ opts.on('-q', '--quotes conversion', 'Convert single quotes to double quotes, options: true or false, default: true') do |quote_convert|
38
+ options[:quote_convert] = quote_convert.upcase
39
+ end
40
+ opts.on('-r', '--replace nulls', 'replace empty, Null\'s, \N, NAN with NULL, options: true or false, default: true') do |replace_nulls|
41
+ options[:replace_nulls] = replace_nulls.upcase
42
+ end
43
+ opts.on('-h', '--help', 'Displays Help') do
44
+ puts opts
45
+ exit
46
+ end
47
+ end
48
+ parser.parse!
49
+
50
+ # Input validations
51
+ # Make sure a filename is given to the executable
52
+ filename = nil
53
+ if options[:input] == nil
54
+ print " Requires a valid input file name! \n"
55
+ puts parser
56
+ exit
57
+ end
58
+
59
+ puts CsvImportAnalyzer.process(options[:input], options)
@@ -23,6 +23,6 @@ Gem::Specification.new do |spec|
23
23
  spec.add_development_dependency "pry", "~> 0.10"
24
24
  spec.add_development_dependency "rspec", "~> 3.0"
25
25
  spec.add_development_dependency "simplecov", "~> 0.9"
26
-
26
+
27
27
  spec.add_runtime_dependency "smarter_csv", "~> 1.0", ">= 1.0.17"
28
28
  end
@@ -11,6 +11,7 @@ module CsvImportAnalyzer
11
11
  # returns FileNotFound if given file is invalid
12
12
  ###
13
13
  def process(filename, options = {})
14
+ return ArgumentError.new("A valid file needed to process") if filename.nil?
14
15
  if File::exist?(filename)
15
16
  CsvImportAnalyzer::CsvSanitizer.new().process(File.absolute_path(filename), options)
16
17
  else
@@ -69,7 +69,7 @@ module CsvImportAnalyzer
69
69
  {
70
70
  :metadata_output => nil, # To be set if metadata needs to be printed to a file
71
71
  :processed_input => nil, # To be set if processed input is needed
72
- :unique => 2, # Threshold for number of defaults values that needs to identified
72
+ :unique => 5, # Threshold for number of defaults values that needs to identified
73
73
  :check_bounds => true, # Option to check for min - max bounds for each column [true => find the bounds]
74
74
  :datatype_analysis => 200, # Number of rows to be sampled for datatype analysis
75
75
  :chunk => 200, # Chunk size (no of rows) that needs to processed in-memory [Important not to load entire file into memory]
@@ -1,5 +1,5 @@
1
1
  module CsvImportAnalyzer
2
2
  module Version
3
- VERSION = "0.0.6"
3
+ VERSION = "0.0.7"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-import-analyzer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Avinash Vallabhaneni
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-30 00:00:00.000000000 Z
11
+ date: 2014-11-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -104,7 +104,8 @@ description: Santize large csv files and help in predicting datatypes including
104
104
  max values for easy import to SQL
105
105
  email:
106
106
  - avinash.vallab@gmail.com
107
- executables: []
107
+ executables:
108
+ - CsvImportAnalyzer
108
109
  extensions: []
109
110
  extra_rdoc_files: []
110
111
  files:
@@ -114,6 +115,7 @@ files:
114
115
  - LICENSE.txt
115
116
  - README.md
116
117
  - Rakefile
118
+ - bin/CsvImportAnalyzer
117
119
  - csv-import-analyzer.gemspec
118
120
  - lib/csv-import-analyzer.rb
119
121
  - lib/csv-import-analyzer/analyzer/csv_check_bounds.rb