idata 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 61fc4d12442729bbdf41101831b69d10227bf8dd
4
- data.tar.gz: 6614f00c45bf15d9e7daaa7f661180b1a5215794
3
+ metadata.gz: da7b55510deb9dc064fdbb66ae899b7c01d47379
4
+ data.tar.gz: 9af104c24dd81b1e4d4347f717d360ddd4ba639c
5
5
  SHA512:
6
- metadata.gz: 7e98710d6ca73f91e52a31e7c9f17d8d3e235155ab194a51943f245e305de71c8021cf8dbf2ccc0848f2814e714d3381178ee9eed66d1a9e11f67ac9a4f8b2e6
7
- data.tar.gz: bb32b89fa342436dcff667a460a4e2aab1b5e39db3e9d98e1dc0aa42410e6bf9233c4e3eb89a4764893841f35bc6cb241e1eda37c4cbfbcc3df91ee51739e25c
6
+ metadata.gz: 231e0051b69e0a402b3f606f08ae69b351ca63116db4d0bbe835b22ac9aaf268ea563135931c13b9f795c9d78090d446c4188e2175e2192840792e6c4ef0067e
7
+ data.tar.gz: 7592dbb8b9c6dda8e7756c7c36f17543b39e094f44f06d587a11e0141ccf06029ffc23966a2361475ffbe51082fff2afff01a61325b0936b33d836d77f713c07
data/bin/iload CHANGED
@@ -8,9 +8,21 @@
8
8
  # a corresponding table in the specified database
9
9
  # Issue ruby load.rb --help for guideline/examples
10
10
  #
11
+
12
+ require 'idata'
11
13
  require 'optparse'
12
14
  require 'csv'
13
- require 'active_record'
15
+
16
+ begin
17
+ require 'active_record'
18
+ rescue Exception => ex
19
+ end
20
+
21
+ begin
22
+ require 'activerecord'
23
+ rescue Exception => ex
24
+ end
25
+
14
26
  require 'rubygems'
15
27
  require 'digest/sha1'
16
28
  require 'fileutils'
@@ -29,9 +41,13 @@ end
29
41
 
30
42
  $options = {}
31
43
  parser = OptionParser.new("", 24) do |opts|
32
- opts.banner = "\nProgram: Data Loader\nAuthor: MCKi Team\nDescription: the program takes an input text file and creates a corresponding data table\n\n"
44
+ opts.banner = "Program: iload #{Idata::VERSION}\nAuthor: Gaugau\n\nUsage: iload <client name> [option]\n iload [options]\n"
45
+ opts.version = Idata::VERSION
33
46
 
34
- opts.on("-i", "--input INPUT", "Input text file") do |v|
47
+ opts.separator ""
48
+ opts.separator "Command options:"
49
+
50
+ opts.on("-i", "--input INPUT", "Input file") do |v|
35
51
  $options[:input] = v
36
52
  end
37
53
 
@@ -51,10 +67,6 @@ parser = OptionParser.new("", 24) do |opts|
51
67
  $options[:quote] = v
52
68
  end
53
69
 
54
- # opts.on("-o", "--output CSV", "Temporary CSV output file") do |v|
55
- # $options[:output] = v
56
- # end
57
-
58
70
  opts.on("-t", "--table TABLE", "Table name to be created") do |v|
59
71
  $options[:table] = v
60
72
  end
@@ -62,6 +74,9 @@ parser = OptionParser.new("", 24) do |opts|
62
74
  opts.on("--drop", "") do |v|
63
75
  $options[:drop] = v
64
76
  end
77
+
78
+ opts.separator ""
79
+ opts.separator "Connection options, can be ommited if <client name> is already specified:"
65
80
 
66
81
  opts.on("-h", "--host HOST", "PostgreSQL host") do |v|
67
82
  $options[:host] = v
@@ -82,25 +97,41 @@ parser = OptionParser.new("", 24) do |opts|
82
97
  opts.on("-l", "--listen PORT", "PostgreSQL listen port (default to 5432)") do |v|
83
98
  $options[:listen] = v
84
99
  end
100
+
101
+ opts.separator ""
102
+ opts.separator "Common options:"
85
103
 
86
104
  opts.on_tail('--help', 'Displays this help') do
87
- puts opts, "", help
105
+ puts opts, "", help
88
106
  exit
89
- end
107
+ end
108
+
90
109
  end
91
110
 
92
111
  def help
93
112
  return <<-eos
94
-
95
- EXAMPLES
113
+ Example:
96
114
  -------------------------------------------------------
97
- Load data from text file and store to a table name "vendors"
115
+ Load data from text file and store to a table name "items"
116
+
117
+ iload --host=localhost --username=postgres --password=postgres --database=db
118
+ --table=items --input=/user/items.csv
119
+
120
+ In short form, with client-name specified:
121
+
122
+ iload maricopa --table=items --input=/user/items.csv
98
123
 
99
- iload --host=localhost --username=postgres --password=postgres \\
100
- --database=db --table=vendors \\
101
- --input=/home/administrator/VendorMaster.txt \\
102
- --format=CSV --delim=$'\\t'
124
+ To use the short form, the following environment variables must be present:
103
125
 
126
+ MAINDBHOST
127
+ MAINDBSER
128
+ MAINDBNAME
129
+ MAINDBPORT
130
+
131
+ Column delimiter will be auto-detected, in case you want to overwrite the default, use --format/-f.
132
+ For example, tell the program to use TAB as delimiter:
133
+
134
+ iload maricopa --table=items --input=/user/items.csv --format=$'\\t'
104
135
 
105
136
  eos
106
137
  end
@@ -124,7 +155,6 @@ $options[:format].upcase! if $options[:format]
124
155
  $options[:format] ||= CSV_DEFAULT_FORMAT
125
156
  $options[:listen] ||= POSTGRESQL_PORT unless $options[:client]
126
157
  $options[:username] ||= POSTGRESQL_USERNAME unless $options[:client]
127
- $options[:delim] ||= CSV_DEFAULT_DELIMITER
128
158
  $options[:quote] ||= CSV_DEFAULT_QUOTE
129
159
  $options[:drop] ||= false
130
160
 
@@ -141,6 +171,9 @@ unless File.exists?($options[:input])
141
171
  error "file `#{$options[:input]}` not found!"
142
172
  end
143
173
 
174
+ # auto detect delimiter
175
+ $options[:delim] ||= Idata::Detector::new($options[:input]).find
176
+
144
177
  if !SUPPORTED_INPUT_FORMATS.include?($options[:format])
145
178
  error "invalid input file format, supported formats are: #{SUPPORTED_INPUT_FORMATS.join(', ')}"
146
179
  end
@@ -347,12 +380,15 @@ class MyParser
347
380
  end
348
381
 
349
382
  # Execute
350
- `#{insert_data_sql}`
383
+ `#{insert_data_sql} > /dev/null`
351
384
 
352
- # Clean up
353
- File.delete(csv_path) if File.exists?(csv_path)
354
-
355
- puts "Table `#{$options[:table]}` loaded\n"
385
+ if $?.exitstatus == 0
386
+ # Clean up
387
+ File.delete(csv_path) if File.exists?(csv_path)
388
+ puts "Table `#{$options[:table]}` loaded\n"
389
+ else
390
+ puts "Something went wrong!"
391
+ end
356
392
  end
357
393
 
358
394
  private
data/lib/idata.rb CHANGED
@@ -1,5 +1,8 @@
1
1
  require "idata/version"
2
+ require "idata/detector"
2
3
 
3
4
  module Idata
4
5
  # Your code goes here...
5
6
  end
7
+
8
+
@@ -0,0 +1,60 @@
1
+ require 'csv'
2
+ module Idata
3
+ class Detector
4
+ DEFAULT_DELIMITER = ","
5
+ COMMON_DELIMITERS = [DEFAULT_DELIMITER, "|", "\t", ";"]
6
+ SAMPLE_SIZE = 100
7
+
8
+ def initialize(file)
9
+ @file = file
10
+ @sample = `head -n #{SAMPLE_SIZE} #{@file}`
11
+ @sample_lines = @sample.split(/[\r\n]+/)
12
+ @candidates = COMMON_DELIMITERS.map { |delim|
13
+ [delim, @sample.scan(delim).count]
14
+ }.to_h.select{|k,v| v > 0}
15
+ end
16
+
17
+ def find
18
+ return DEFAULT_DELIMITER if @candidates.empty? # for example, file with only one header
19
+ return find_same_occurence || find_valid || find_max_occurence || DEFAULT_DELIMITER
20
+ end
21
+
22
+ # just work
23
+ def find_valid
24
+ selected = @candidates.select { |delim, count|
25
+ begin
26
+ CSV.parse(@sample, col_sep: delim)
27
+ true
28
+ rescue Exception => ex
29
+ false
30
+ end
31
+ }.keys
32
+
33
+ return selected.first if selected.count == 1
34
+ return DEFAULT_DELIMITER if selected.include?(DEFAULT_DELIMITER)
35
+ end
36
+
37
+ # high confident level
38
+ def find_same_occurence
39
+ selected = @candidates.select { |delim, count|
40
+ begin
41
+ CSV.parse(@sample, col_sep: delim).select{|e| !e.empty? }.map{|e| e.count}.uniq.count == 1
42
+ rescue Exception => ex
43
+ false
44
+ end
45
+ }.keys
46
+
47
+ return selected.first if selected.count == 1
48
+ return DEFAULT_DELIMITER if selected.include?(DEFAULT_DELIMITER)
49
+ end
50
+
51
+ # most occurence
52
+ def find_max_occurence
53
+ selected = @candidates.select{|k,v| v == @candidates.sort_by(&:last).last }.keys
54
+
55
+ return selected.first if selected.count == 1
56
+ return DEFAULT_DELIMITER if selected.include?(DEFAULT_DELIMITER)
57
+ end
58
+ end
59
+ end
60
+
data/lib/idata/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Idata
2
- VERSION = "1.0.0"
2
+ VERSION = "1.1.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: idata
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nghi Pham
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-17 00:00:00.000000000 Z
11
+ date: 2016-03-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -86,6 +86,7 @@ files:
86
86
  - full.sh
87
87
  - idata.gemspec
88
88
  - lib/idata.rb
89
+ - lib/idata/detector.rb
89
90
  - lib/idata/version.rb
90
91
  - sample.sh
91
92
  homepage: https://github.com/minhnghivn/idata