idata 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/iload +58 -22
- data/lib/idata.rb +3 -0
- data/lib/idata/detector.rb +60 -0
- data/lib/idata/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: da7b55510deb9dc064fdbb66ae899b7c01d47379
|
4
|
+
data.tar.gz: 9af104c24dd81b1e4d4347f717d360ddd4ba639c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 231e0051b69e0a402b3f606f08ae69b351ca63116db4d0bbe835b22ac9aaf268ea563135931c13b9f795c9d78090d446c4188e2175e2192840792e6c4ef0067e
|
7
|
+
data.tar.gz: 7592dbb8b9c6dda8e7756c7c36f17543b39e094f44f06d587a11e0141ccf06029ffc23966a2361475ffbe51082fff2afff01a61325b0936b33d836d77f713c07
|
data/bin/iload
CHANGED
@@ -8,9 +8,21 @@
|
|
8
8
|
# a corresponding table in the specified database
|
9
9
|
# Issue ruby load.rb --help for guideline/examples
|
10
10
|
#
|
11
|
+
|
12
|
+
require 'idata'
|
11
13
|
require 'optparse'
|
12
14
|
require 'csv'
|
13
|
-
|
15
|
+
|
16
|
+
begin
|
17
|
+
require 'active_record'
|
18
|
+
rescue Exception => ex
|
19
|
+
end
|
20
|
+
|
21
|
+
begin
|
22
|
+
require 'activerecord'
|
23
|
+
rescue Exception => ex
|
24
|
+
end
|
25
|
+
|
14
26
|
require 'rubygems'
|
15
27
|
require 'digest/sha1'
|
16
28
|
require 'fileutils'
|
@@ -29,9 +41,13 @@ end
|
|
29
41
|
|
30
42
|
$options = {}
|
31
43
|
parser = OptionParser.new("", 24) do |opts|
|
32
|
-
opts.banner = "
|
44
|
+
opts.banner = "Program: iload #{Idata::VERSION}\nAuthor: Gaugau\n\nUsage: iload <client name> [option]\n iload [options]\n"
|
45
|
+
opts.version = Idata::VERSION
|
33
46
|
|
34
|
-
opts.
|
47
|
+
opts.separator ""
|
48
|
+
opts.separator "Command options:"
|
49
|
+
|
50
|
+
opts.on("-i", "--input INPUT", "Input file") do |v|
|
35
51
|
$options[:input] = v
|
36
52
|
end
|
37
53
|
|
@@ -51,10 +67,6 @@ parser = OptionParser.new("", 24) do |opts|
|
|
51
67
|
$options[:quote] = v
|
52
68
|
end
|
53
69
|
|
54
|
-
# opts.on("-o", "--output CSV", "Temporary CSV output file") do |v|
|
55
|
-
# $options[:output] = v
|
56
|
-
# end
|
57
|
-
|
58
70
|
opts.on("-t", "--table TABLE", "Table name to be created") do |v|
|
59
71
|
$options[:table] = v
|
60
72
|
end
|
@@ -62,6 +74,9 @@ parser = OptionParser.new("", 24) do |opts|
|
|
62
74
|
opts.on("--drop", "") do |v|
|
63
75
|
$options[:drop] = v
|
64
76
|
end
|
77
|
+
|
78
|
+
opts.separator ""
|
79
|
+
opts.separator "Connection options, can be ommited if <client name> is already specified:"
|
65
80
|
|
66
81
|
opts.on("-h", "--host HOST", "PostgreSQL host") do |v|
|
67
82
|
$options[:host] = v
|
@@ -82,25 +97,41 @@ parser = OptionParser.new("", 24) do |opts|
|
|
82
97
|
opts.on("-l", "--listen PORT", "PostgreSQL listen port (default to 5432)") do |v|
|
83
98
|
$options[:listen] = v
|
84
99
|
end
|
100
|
+
|
101
|
+
opts.separator ""
|
102
|
+
opts.separator "Common options:"
|
85
103
|
|
86
104
|
opts.on_tail('--help', 'Displays this help') do
|
87
|
-
|
105
|
+
puts opts, "", help
|
88
106
|
exit
|
89
|
-
|
107
|
+
end
|
108
|
+
|
90
109
|
end
|
91
110
|
|
92
111
|
def help
|
93
112
|
return <<-eos
|
94
|
-
|
95
|
-
EXAMPLES
|
113
|
+
Example:
|
96
114
|
-------------------------------------------------------
|
97
|
-
Load data from text file and store to a table name "
|
115
|
+
Load data from text file and store to a table name "items"
|
116
|
+
|
117
|
+
iload --host=localhost --username=postgres --password=postgres --database=db
|
118
|
+
--table=items --input=/user/items.csv
|
119
|
+
|
120
|
+
In short form, with client-name specified:
|
121
|
+
|
122
|
+
iload maricopa --table=items --input=/user/items.csv
|
98
123
|
|
99
|
-
|
100
|
-
--database=db --table=vendors \\
|
101
|
-
--input=/home/administrator/VendorMaster.txt \\
|
102
|
-
--format=CSV --delim=$'\\t'
|
124
|
+
To use the short form, the following environment variables must be present:
|
103
125
|
|
126
|
+
MAINDBHOST
|
127
|
+
MAINDBSER
|
128
|
+
MAINDBNAME
|
129
|
+
MAINDBPORT
|
130
|
+
|
131
|
+
Column delimiter will be auto-detected, in case you want to overwrite the default, use --format/-f.
|
132
|
+
For example, tell the program to use TAB as delimiter:
|
133
|
+
|
134
|
+
iload maricopa --table=items --input=/user/items.csv --format=$'\\t'
|
104
135
|
|
105
136
|
eos
|
106
137
|
end
|
@@ -124,7 +155,6 @@ $options[:format].upcase! if $options[:format]
|
|
124
155
|
$options[:format] ||= CSV_DEFAULT_FORMAT
|
125
156
|
$options[:listen] ||= POSTGRESQL_PORT unless $options[:client]
|
126
157
|
$options[:username] ||= POSTGRESQL_USERNAME unless $options[:client]
|
127
|
-
$options[:delim] ||= CSV_DEFAULT_DELIMITER
|
128
158
|
$options[:quote] ||= CSV_DEFAULT_QUOTE
|
129
159
|
$options[:drop] ||= false
|
130
160
|
|
@@ -141,6 +171,9 @@ unless File.exists?($options[:input])
|
|
141
171
|
error "file `#{$options[:input]}` not found!"
|
142
172
|
end
|
143
173
|
|
174
|
+
# auto detect delimiter
|
175
|
+
$options[:delim] ||= Idata::Detector::new($options[:input]).find
|
176
|
+
|
144
177
|
if !SUPPORTED_INPUT_FORMATS.include?($options[:format])
|
145
178
|
error "invalid input file format, supported formats are: #{SUPPORTED_INPUT_FORMATS.join(', ')}"
|
146
179
|
end
|
@@ -347,12 +380,15 @@ class MyParser
|
|
347
380
|
end
|
348
381
|
|
349
382
|
# Execute
|
350
|
-
`#{insert_data_sql}`
|
383
|
+
`#{insert_data_sql} > /dev/null`
|
351
384
|
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
385
|
+
if $?.exitstatus == 0
|
386
|
+
# Clean up
|
387
|
+
File.delete(csv_path) if File.exists?(csv_path)
|
388
|
+
puts "Table `#{$options[:table]}` loaded\n"
|
389
|
+
else
|
390
|
+
puts "Something went wrong!"
|
391
|
+
end
|
356
392
|
end
|
357
393
|
|
358
394
|
private
|
data/lib/idata.rb
CHANGED
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'csv'
|
2
|
+
module Idata
|
3
|
+
class Detector
|
4
|
+
DEFAULT_DELIMITER = ","
|
5
|
+
COMMON_DELIMITERS = [DEFAULT_DELIMITER, "|", "\t", ";"]
|
6
|
+
SAMPLE_SIZE = 100
|
7
|
+
|
8
|
+
def initialize(file)
|
9
|
+
@file = file
|
10
|
+
@sample = `head -n #{SAMPLE_SIZE} #{@file}`
|
11
|
+
@sample_lines = @sample.split(/[\r\n]+/)
|
12
|
+
@candidates = COMMON_DELIMITERS.map { |delim|
|
13
|
+
[delim, @sample.scan(delim).count]
|
14
|
+
}.to_h.select{|k,v| v > 0}
|
15
|
+
end
|
16
|
+
|
17
|
+
def find
|
18
|
+
return DEFAULT_DELIMITER if @candidates.empty? # for example, file with only one header
|
19
|
+
return find_same_occurence || find_valid || find_max_occurence || DEFAULT_DELIMITER
|
20
|
+
end
|
21
|
+
|
22
|
+
# just work
|
23
|
+
def find_valid
|
24
|
+
selected = @candidates.select { |delim, count|
|
25
|
+
begin
|
26
|
+
CSV.parse(@sample, col_sep: delim)
|
27
|
+
true
|
28
|
+
rescue Exception => ex
|
29
|
+
false
|
30
|
+
end
|
31
|
+
}.keys
|
32
|
+
|
33
|
+
return selected.first if selected.count == 1
|
34
|
+
return DEFAULT_DELIMITER if selected.include?(DEFAULT_DELIMITER)
|
35
|
+
end
|
36
|
+
|
37
|
+
# high confident level
|
38
|
+
def find_same_occurence
|
39
|
+
selected = @candidates.select { |delim, count|
|
40
|
+
begin
|
41
|
+
CSV.parse(@sample, col_sep: delim).select{|e| !e.empty? }.map{|e| e.count}.uniq.count == 1
|
42
|
+
rescue Exception => ex
|
43
|
+
false
|
44
|
+
end
|
45
|
+
}.keys
|
46
|
+
|
47
|
+
return selected.first if selected.count == 1
|
48
|
+
return DEFAULT_DELIMITER if selected.include?(DEFAULT_DELIMITER)
|
49
|
+
end
|
50
|
+
|
51
|
+
# most occurence
|
52
|
+
def find_max_occurence
|
53
|
+
selected = @candidates.select{|k,v| v == @candidates.sort_by(&:last).last }.keys
|
54
|
+
|
55
|
+
return selected.first if selected.count == 1
|
56
|
+
return DEFAULT_DELIMITER if selected.include?(DEFAULT_DELIMITER)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
data/lib/idata/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: idata
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nghi Pham
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-03-
|
11
|
+
date: 2016-03-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -86,6 +86,7 @@ files:
|
|
86
86
|
- full.sh
|
87
87
|
- idata.gemspec
|
88
88
|
- lib/idata.rb
|
89
|
+
- lib/idata/detector.rb
|
89
90
|
- lib/idata/version.rb
|
90
91
|
- sample.sh
|
91
92
|
homepage: https://github.com/minhnghivn/idata
|