idata 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 61fc4d12442729bbdf41101831b69d10227bf8dd
4
- data.tar.gz: 6614f00c45bf15d9e7daaa7f661180b1a5215794
3
+ metadata.gz: da7b55510deb9dc064fdbb66ae899b7c01d47379
4
+ data.tar.gz: 9af104c24dd81b1e4d4347f717d360ddd4ba639c
5
5
  SHA512:
6
- metadata.gz: 7e98710d6ca73f91e52a31e7c9f17d8d3e235155ab194a51943f245e305de71c8021cf8dbf2ccc0848f2814e714d3381178ee9eed66d1a9e11f67ac9a4f8b2e6
7
- data.tar.gz: bb32b89fa342436dcff667a460a4e2aab1b5e39db3e9d98e1dc0aa42410e6bf9233c4e3eb89a4764893841f35bc6cb241e1eda37c4cbfbcc3df91ee51739e25c
6
+ metadata.gz: 231e0051b69e0a402b3f606f08ae69b351ca63116db4d0bbe835b22ac9aaf268ea563135931c13b9f795c9d78090d446c4188e2175e2192840792e6c4ef0067e
7
+ data.tar.gz: 7592dbb8b9c6dda8e7756c7c36f17543b39e094f44f06d587a11e0141ccf06029ffc23966a2361475ffbe51082fff2afff01a61325b0936b33d836d77f713c07
data/bin/iload CHANGED
@@ -8,9 +8,21 @@
8
8
  # a corresponding table in the specified database
9
9
  # Issue ruby load.rb --help for guideline/examples
10
10
  #
11
+
12
+ require 'idata'
11
13
  require 'optparse'
12
14
  require 'csv'
13
- require 'active_record'
15
+
16
+ begin
17
+ require 'active_record'
18
+ rescue Exception => ex
19
+ end
20
+
21
+ begin
22
+ require 'activerecord'
23
+ rescue Exception => ex
24
+ end
25
+
14
26
  require 'rubygems'
15
27
  require 'digest/sha1'
16
28
  require 'fileutils'
@@ -29,9 +41,13 @@ end
29
41
 
30
42
  $options = {}
31
43
  parser = OptionParser.new("", 24) do |opts|
32
- opts.banner = "\nProgram: Data Loader\nAuthor: MCKi Team\nDescription: the program takes an input text file and creates a corresponding data table\n\n"
44
+ opts.banner = "Program: iload #{Idata::VERSION}\nAuthor: Gaugau\n\nUsage: iload <client name> [option]\n iload [options]\n"
45
+ opts.version = Idata::VERSION
33
46
 
34
- opts.on("-i", "--input INPUT", "Input text file") do |v|
47
+ opts.separator ""
48
+ opts.separator "Command options:"
49
+
50
+ opts.on("-i", "--input INPUT", "Input file") do |v|
35
51
  $options[:input] = v
36
52
  end
37
53
 
@@ -51,10 +67,6 @@ parser = OptionParser.new("", 24) do |opts|
51
67
  $options[:quote] = v
52
68
  end
53
69
 
54
- # opts.on("-o", "--output CSV", "Temporary CSV output file") do |v|
55
- # $options[:output] = v
56
- # end
57
-
58
70
  opts.on("-t", "--table TABLE", "Table name to be created") do |v|
59
71
  $options[:table] = v
60
72
  end
@@ -62,6 +74,9 @@ parser = OptionParser.new("", 24) do |opts|
62
74
  opts.on("--drop", "") do |v|
63
75
  $options[:drop] = v
64
76
  end
77
+
78
+ opts.separator ""
79
+ opts.separator "Connection options, can be ommited if <client name> is already specified:"
65
80
 
66
81
  opts.on("-h", "--host HOST", "PostgreSQL host") do |v|
67
82
  $options[:host] = v
@@ -82,25 +97,41 @@ parser = OptionParser.new("", 24) do |opts|
82
97
  opts.on("-l", "--listen PORT", "PostgreSQL listen port (default to 5432)") do |v|
83
98
  $options[:listen] = v
84
99
  end
100
+
101
+ opts.separator ""
102
+ opts.separator "Common options:"
85
103
 
86
104
  opts.on_tail('--help', 'Displays this help') do
87
- puts opts, "", help
105
+ puts opts, "", help
88
106
  exit
89
- end
107
+ end
108
+
90
109
  end
91
110
 
92
111
  def help
93
112
  return <<-eos
94
-
95
- EXAMPLES
113
+ Example:
96
114
  -------------------------------------------------------
97
- Load data from text file and store to a table name "vendors"
115
+ Load data from text file and store to a table name "items"
116
+
117
+ iload --host=localhost --username=postgres --password=postgres --database=db
118
+ --table=items --input=/user/items.csv
119
+
120
+ In short form, with client-name specified:
121
+
122
+ iload maricopa --table=items --input=/user/items.csv
98
123
 
99
- iload --host=localhost --username=postgres --password=postgres \\
100
- --database=db --table=vendors \\
101
- --input=/home/administrator/VendorMaster.txt \\
102
- --format=CSV --delim=$'\\t'
124
+ To use the short form, the following environment variables must be present:
103
125
 
126
+ MAINDBHOST
127
+ MAINDBSER
128
+ MAINDBNAME
129
+ MAINDBPORT
130
+
131
+ Column delimiter will be auto-detected, in case you want to overwrite the default, use --format/-f.
132
+ For example, tell the program to use TAB as delimiter:
133
+
134
+ iload maricopa --table=items --input=/user/items.csv --format=$'\\t'
104
135
 
105
136
  eos
106
137
  end
@@ -124,7 +155,6 @@ $options[:format].upcase! if $options[:format]
124
155
  $options[:format] ||= CSV_DEFAULT_FORMAT
125
156
  $options[:listen] ||= POSTGRESQL_PORT unless $options[:client]
126
157
  $options[:username] ||= POSTGRESQL_USERNAME unless $options[:client]
127
- $options[:delim] ||= CSV_DEFAULT_DELIMITER
128
158
  $options[:quote] ||= CSV_DEFAULT_QUOTE
129
159
  $options[:drop] ||= false
130
160
 
@@ -141,6 +171,9 @@ unless File.exists?($options[:input])
141
171
  error "file `#{$options[:input]}` not found!"
142
172
  end
143
173
 
174
+ # auto detect delimiter
175
+ $options[:delim] ||= Idata::Detector::new($options[:input]).find
176
+
144
177
  if !SUPPORTED_INPUT_FORMATS.include?($options[:format])
145
178
  error "invalid input file format, supported formats are: #{SUPPORTED_INPUT_FORMATS.join(', ')}"
146
179
  end
@@ -347,12 +380,15 @@ class MyParser
347
380
  end
348
381
 
349
382
  # Execute
350
- `#{insert_data_sql}`
383
+ `#{insert_data_sql} > /dev/null`
351
384
 
352
- # Clean up
353
- File.delete(csv_path) if File.exists?(csv_path)
354
-
355
- puts "Table `#{$options[:table]}` loaded\n"
385
+ if $?.exitstatus == 0
386
+ # Clean up
387
+ File.delete(csv_path) if File.exists?(csv_path)
388
+ puts "Table `#{$options[:table]}` loaded\n"
389
+ else
390
+ puts "Something went wrong!"
391
+ end
356
392
  end
357
393
 
358
394
  private
data/lib/idata.rb CHANGED
@@ -1,5 +1,8 @@
1
1
  require "idata/version"
2
+ require "idata/detector"
2
3
 
3
4
  module Idata
4
5
  # Your code goes here...
5
6
  end
7
+
8
+
@@ -0,0 +1,60 @@
1
+ require 'csv'
2
+ module Idata
3
+ class Detector
4
+ DEFAULT_DELIMITER = ","
5
+ COMMON_DELIMITERS = [DEFAULT_DELIMITER, "|", "\t", ";"]
6
+ SAMPLE_SIZE = 100
7
+
8
+ def initialize(file)
9
+ @file = file
10
+ @sample = `head -n #{SAMPLE_SIZE} #{@file}`
11
+ @sample_lines = @sample.split(/[\r\n]+/)
12
+ @candidates = COMMON_DELIMITERS.map { |delim|
13
+ [delim, @sample.scan(delim).count]
14
+ }.to_h.select{|k,v| v > 0}
15
+ end
16
+
17
+ def find
18
+ return DEFAULT_DELIMITER if @candidates.empty? # for example, file with only one header
19
+ return find_same_occurence || find_valid || find_max_occurence || DEFAULT_DELIMITER
20
+ end
21
+
22
+ # just work
23
+ def find_valid
24
+ selected = @candidates.select { |delim, count|
25
+ begin
26
+ CSV.parse(@sample, col_sep: delim)
27
+ true
28
+ rescue Exception => ex
29
+ false
30
+ end
31
+ }.keys
32
+
33
+ return selected.first if selected.count == 1
34
+ return DEFAULT_DELIMITER if selected.include?(DEFAULT_DELIMITER)
35
+ end
36
+
37
+ # high confident level
38
+ def find_same_occurence
39
+ selected = @candidates.select { |delim, count|
40
+ begin
41
+ CSV.parse(@sample, col_sep: delim).select{|e| !e.empty? }.map{|e| e.count}.uniq.count == 1
42
+ rescue Exception => ex
43
+ false
44
+ end
45
+ }.keys
46
+
47
+ return selected.first if selected.count == 1
48
+ return DEFAULT_DELIMITER if selected.include?(DEFAULT_DELIMITER)
49
+ end
50
+
51
+ # most occurence
52
+ def find_max_occurence
53
+ selected = @candidates.select{|k,v| v == @candidates.sort_by(&:last).last }.keys
54
+
55
+ return selected.first if selected.count == 1
56
+ return DEFAULT_DELIMITER if selected.include?(DEFAULT_DELIMITER)
57
+ end
58
+ end
59
+ end
60
+
data/lib/idata/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Idata
2
- VERSION = "1.0.0"
2
+ VERSION = "1.1.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: idata
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nghi Pham
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-17 00:00:00.000000000 Z
11
+ date: 2016-03-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -86,6 +86,7 @@ files:
86
86
  - full.sh
87
87
  - idata.gemspec
88
88
  - lib/idata.rb
89
+ - lib/idata/detector.rb
89
90
  - lib/idata/version.rb
90
91
  - sample.sh
91
92
  homepage: https://github.com/minhnghivn/idata