idata 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/iload +58 -22
- data/lib/idata.rb +3 -0
- data/lib/idata/detector.rb +60 -0
- data/lib/idata/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: da7b55510deb9dc064fdbb66ae899b7c01d47379
|
4
|
+
data.tar.gz: 9af104c24dd81b1e4d4347f717d360ddd4ba639c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 231e0051b69e0a402b3f606f08ae69b351ca63116db4d0bbe835b22ac9aaf268ea563135931c13b9f795c9d78090d446c4188e2175e2192840792e6c4ef0067e
|
7
|
+
data.tar.gz: 7592dbb8b9c6dda8e7756c7c36f17543b39e094f44f06d587a11e0141ccf06029ffc23966a2361475ffbe51082fff2afff01a61325b0936b33d836d77f713c07
|
data/bin/iload
CHANGED
@@ -8,9 +8,21 @@
|
|
8
8
|
# a corresponding table in the specified database
|
9
9
|
# Issue ruby load.rb --help for guideline/examples
|
10
10
|
#
|
11
|
+
|
12
|
+
require 'idata'
|
11
13
|
require 'optparse'
|
12
14
|
require 'csv'
|
13
|
-
|
15
|
+
|
16
|
+
begin
|
17
|
+
require 'active_record'
|
18
|
+
rescue Exception => ex
|
19
|
+
end
|
20
|
+
|
21
|
+
begin
|
22
|
+
require 'activerecord'
|
23
|
+
rescue Exception => ex
|
24
|
+
end
|
25
|
+
|
14
26
|
require 'rubygems'
|
15
27
|
require 'digest/sha1'
|
16
28
|
require 'fileutils'
|
@@ -29,9 +41,13 @@ end
|
|
29
41
|
|
30
42
|
$options = {}
|
31
43
|
parser = OptionParser.new("", 24) do |opts|
|
32
|
-
opts.banner = "
|
44
|
+
opts.banner = "Program: iload #{Idata::VERSION}\nAuthor: Gaugau\n\nUsage: iload <client name> [option]\n iload [options]\n"
|
45
|
+
opts.version = Idata::VERSION
|
33
46
|
|
34
|
-
opts.
|
47
|
+
opts.separator ""
|
48
|
+
opts.separator "Command options:"
|
49
|
+
|
50
|
+
opts.on("-i", "--input INPUT", "Input file") do |v|
|
35
51
|
$options[:input] = v
|
36
52
|
end
|
37
53
|
|
@@ -51,10 +67,6 @@ parser = OptionParser.new("", 24) do |opts|
|
|
51
67
|
$options[:quote] = v
|
52
68
|
end
|
53
69
|
|
54
|
-
# opts.on("-o", "--output CSV", "Temporary CSV output file") do |v|
|
55
|
-
# $options[:output] = v
|
56
|
-
# end
|
57
|
-
|
58
70
|
opts.on("-t", "--table TABLE", "Table name to be created") do |v|
|
59
71
|
$options[:table] = v
|
60
72
|
end
|
@@ -62,6 +74,9 @@ parser = OptionParser.new("", 24) do |opts|
|
|
62
74
|
opts.on("--drop", "") do |v|
|
63
75
|
$options[:drop] = v
|
64
76
|
end
|
77
|
+
|
78
|
+
opts.separator ""
|
79
|
+
opts.separator "Connection options, can be ommited if <client name> is already specified:"
|
65
80
|
|
66
81
|
opts.on("-h", "--host HOST", "PostgreSQL host") do |v|
|
67
82
|
$options[:host] = v
|
@@ -82,25 +97,41 @@ parser = OptionParser.new("", 24) do |opts|
|
|
82
97
|
opts.on("-l", "--listen PORT", "PostgreSQL listen port (default to 5432)") do |v|
|
83
98
|
$options[:listen] = v
|
84
99
|
end
|
100
|
+
|
101
|
+
opts.separator ""
|
102
|
+
opts.separator "Common options:"
|
85
103
|
|
86
104
|
opts.on_tail('--help', 'Displays this help') do
|
87
|
-
|
105
|
+
puts opts, "", help
|
88
106
|
exit
|
89
|
-
|
107
|
+
end
|
108
|
+
|
90
109
|
end
|
91
110
|
|
92
111
|
def help
|
93
112
|
return <<-eos
|
94
|
-
|
95
|
-
EXAMPLES
|
113
|
+
Example:
|
96
114
|
-------------------------------------------------------
|
97
|
-
Load data from text file and store to a table name "
|
115
|
+
Load data from text file and store to a table name "items"
|
116
|
+
|
117
|
+
iload --host=localhost --username=postgres --password=postgres --database=db
|
118
|
+
--table=items --input=/user/items.csv
|
119
|
+
|
120
|
+
In short form, with client-name specified:
|
121
|
+
|
122
|
+
iload maricopa --table=items --input=/user/items.csv
|
98
123
|
|
99
|
-
|
100
|
-
--database=db --table=vendors \\
|
101
|
-
--input=/home/administrator/VendorMaster.txt \\
|
102
|
-
--format=CSV --delim=$'\\t'
|
124
|
+
To use the short form, the following environment variables must be present:
|
103
125
|
|
126
|
+
MAINDBHOST
|
127
|
+
MAINDBSER
|
128
|
+
MAINDBNAME
|
129
|
+
MAINDBPORT
|
130
|
+
|
131
|
+
Column delimiter will be auto-detected, in case you want to overwrite the default, use --format/-f.
|
132
|
+
For example, tell the program to use TAB as delimiter:
|
133
|
+
|
134
|
+
iload maricopa --table=items --input=/user/items.csv --format=$'\\t'
|
104
135
|
|
105
136
|
eos
|
106
137
|
end
|
@@ -124,7 +155,6 @@ $options[:format].upcase! if $options[:format]
|
|
124
155
|
$options[:format] ||= CSV_DEFAULT_FORMAT
|
125
156
|
$options[:listen] ||= POSTGRESQL_PORT unless $options[:client]
|
126
157
|
$options[:username] ||= POSTGRESQL_USERNAME unless $options[:client]
|
127
|
-
$options[:delim] ||= CSV_DEFAULT_DELIMITER
|
128
158
|
$options[:quote] ||= CSV_DEFAULT_QUOTE
|
129
159
|
$options[:drop] ||= false
|
130
160
|
|
@@ -141,6 +171,9 @@ unless File.exists?($options[:input])
|
|
141
171
|
error "file `#{$options[:input]}` not found!"
|
142
172
|
end
|
143
173
|
|
174
|
+
# auto detect delimiter
|
175
|
+
$options[:delim] ||= Idata::Detector::new($options[:input]).find
|
176
|
+
|
144
177
|
if !SUPPORTED_INPUT_FORMATS.include?($options[:format])
|
145
178
|
error "invalid input file format, supported formats are: #{SUPPORTED_INPUT_FORMATS.join(', ')}"
|
146
179
|
end
|
@@ -347,12 +380,15 @@ class MyParser
|
|
347
380
|
end
|
348
381
|
|
349
382
|
# Execute
|
350
|
-
`#{insert_data_sql}`
|
383
|
+
`#{insert_data_sql} > /dev/null`
|
351
384
|
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
385
|
+
if $?.exitstatus == 0
|
386
|
+
# Clean up
|
387
|
+
File.delete(csv_path) if File.exists?(csv_path)
|
388
|
+
puts "Table `#{$options[:table]}` loaded\n"
|
389
|
+
else
|
390
|
+
puts "Something went wrong!"
|
391
|
+
end
|
356
392
|
end
|
357
393
|
|
358
394
|
private
|
data/lib/idata.rb
CHANGED
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'csv'
|
2
|
+
module Idata
|
3
|
+
class Detector
|
4
|
+
DEFAULT_DELIMITER = ","
|
5
|
+
COMMON_DELIMITERS = [DEFAULT_DELIMITER, "|", "\t", ";"]
|
6
|
+
SAMPLE_SIZE = 100
|
7
|
+
|
8
|
+
def initialize(file)
|
9
|
+
@file = file
|
10
|
+
@sample = `head -n #{SAMPLE_SIZE} #{@file}`
|
11
|
+
@sample_lines = @sample.split(/[\r\n]+/)
|
12
|
+
@candidates = COMMON_DELIMITERS.map { |delim|
|
13
|
+
[delim, @sample.scan(delim).count]
|
14
|
+
}.to_h.select{|k,v| v > 0}
|
15
|
+
end
|
16
|
+
|
17
|
+
def find
|
18
|
+
return DEFAULT_DELIMITER if @candidates.empty? # for example, file with only one header
|
19
|
+
return find_same_occurence || find_valid || find_max_occurence || DEFAULT_DELIMITER
|
20
|
+
end
|
21
|
+
|
22
|
+
# just work
|
23
|
+
def find_valid
|
24
|
+
selected = @candidates.select { |delim, count|
|
25
|
+
begin
|
26
|
+
CSV.parse(@sample, col_sep: delim)
|
27
|
+
true
|
28
|
+
rescue Exception => ex
|
29
|
+
false
|
30
|
+
end
|
31
|
+
}.keys
|
32
|
+
|
33
|
+
return selected.first if selected.count == 1
|
34
|
+
return DEFAULT_DELIMITER if selected.include?(DEFAULT_DELIMITER)
|
35
|
+
end
|
36
|
+
|
37
|
+
# high confident level
|
38
|
+
def find_same_occurence
|
39
|
+
selected = @candidates.select { |delim, count|
|
40
|
+
begin
|
41
|
+
CSV.parse(@sample, col_sep: delim).select{|e| !e.empty? }.map{|e| e.count}.uniq.count == 1
|
42
|
+
rescue Exception => ex
|
43
|
+
false
|
44
|
+
end
|
45
|
+
}.keys
|
46
|
+
|
47
|
+
return selected.first if selected.count == 1
|
48
|
+
return DEFAULT_DELIMITER if selected.include?(DEFAULT_DELIMITER)
|
49
|
+
end
|
50
|
+
|
51
|
+
# most occurence
|
52
|
+
def find_max_occurence
|
53
|
+
selected = @candidates.select{|k,v| v == @candidates.sort_by(&:last).last }.keys
|
54
|
+
|
55
|
+
return selected.first if selected.count == 1
|
56
|
+
return DEFAULT_DELIMITER if selected.include?(DEFAULT_DELIMITER)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
data/lib/idata/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: idata
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nghi Pham
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-03-
|
11
|
+
date: 2016-03-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -86,6 +86,7 @@ files:
|
|
86
86
|
- full.sh
|
87
87
|
- idata.gemspec
|
88
88
|
- lib/idata.rb
|
89
|
+
- lib/idata/detector.rb
|
89
90
|
- lib/idata/version.rb
|
90
91
|
- sample.sh
|
91
92
|
homepage: https://github.com/minhnghivn/idata
|