idata 1.2.3 → 1.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/icsvutils +220 -0
- data/lib/idata/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: be645ca0daedeec45060ae7434adbf7d3c63a3f9
|
4
|
+
data.tar.gz: 8558bb7c6773bcec21ec21f0fc16581a82000774
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e3ba4939323b57b1c6b6183879f07a9c8643ec7a671b2a7d3e5c84c6a826ac42a9e497b6722406722c9d794f1628f09b18ce1b73993b7174555a0eb944fa20fb
|
7
|
+
data.tar.gz: c506d99a5159751d6ef1664168222a18f152861bca68e7c9cafb01f183a8ea4026c8db976ac530fc0306205b7f042fc2fc26e7da85af1b95bb1785a88d7d516d
|
data/bin/icsvutils
ADDED
@@ -0,0 +1,220 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# DATA LOADER
|
3
|
+
#
|
4
|
+
# @author Nghi Pham
|
5
|
+
# @date April 2014
|
6
|
+
#
|
7
|
+
|
8
|
+
require '../lib/idata/detector'
|
9
|
+
require 'optparse'
|
10
|
+
require 'csv'
|
11
|
+
require 'sqlite3'
|
12
|
+
|
13
|
+
begin
|
14
|
+
require 'active_record'
|
15
|
+
rescue Exception => ex
|
16
|
+
end
|
17
|
+
|
18
|
+
begin
|
19
|
+
require 'activerecord'
|
20
|
+
rescue Exception => ex
|
21
|
+
end
|
22
|
+
|
23
|
+
require 'rubygems'
|
24
|
+
require 'fileutils'
|
25
|
+
|
26
|
+
def error(msg)
|
27
|
+
puts "Error: #{msg}"
|
28
|
+
exit(-1)
|
29
|
+
end
|
30
|
+
|
31
|
+
$options = {:actions => {}}
|
32
|
+
parser = OptionParser.new("", 24) do |opts|
|
33
|
+
opts.banner = "Program: icsvutils #{1}\nAuthor: Gaugau\n\nUsage: iscvutils file [options]\n"
|
34
|
+
opts.version = 1
|
35
|
+
|
36
|
+
opts.separator ""
|
37
|
+
opts.separator "Command options:"
|
38
|
+
|
39
|
+
opts.on("--set-delim DELIMITER", "Set file delimiter") do |v|
|
40
|
+
$options[:actions][:set_delim] = v
|
41
|
+
end
|
42
|
+
|
43
|
+
opts.on("--set-encoding DELIMITER", "Set file encoding. Valid values include: UTF8, UTF16, ASCII, ISO, etc.") do |v|
|
44
|
+
$options[:actions][:set_encoding] = v
|
45
|
+
end
|
46
|
+
|
47
|
+
opts.separator ""
|
48
|
+
opts.separator "Common options:"
|
49
|
+
|
50
|
+
opts.on_tail('--help', 'Displays this help') do
|
51
|
+
puts opts, "", help
|
52
|
+
exit
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
parser.parse!
|
58
|
+
|
59
|
+
$options[:input] = ARGV[0]
|
60
|
+
|
61
|
+
CSV_DEFAULT_DELIMITER = ','
|
62
|
+
CSV_DEFAULT_QUOTE = '"'
|
63
|
+
|
64
|
+
if $options[:input].nil?
|
65
|
+
error "Please specify input file"
|
66
|
+
end
|
67
|
+
|
68
|
+
if File.exists?($options[:input]) && File.directory?($options[:input])
|
69
|
+
error "`#{$options[:input]}` is a directory! input must be a file"
|
70
|
+
end
|
71
|
+
|
72
|
+
unless File.exists?($options[:input])
|
73
|
+
error "file `#{$options[:input]}` not found!"
|
74
|
+
end
|
75
|
+
|
76
|
+
if $options[:actions].empty?
|
77
|
+
error "Please specify at least one action: --set-delim / --set-encoding"
|
78
|
+
end
|
79
|
+
|
80
|
+
$options[:delim] ||= Idata::Detector::new($options[:input]).find
|
81
|
+
$options[:format] ||= 'CSV'
|
82
|
+
$options[:quote] ||= CSV_DEFAULT_QUOTE
|
83
|
+
$options[:table] ||= 'items'
|
84
|
+
$options[:actions][:set_delim] ||= CSV_DEFAULT_DELIMITER
|
85
|
+
|
86
|
+
p $options
|
87
|
+
|
88
|
+
$tmpfile = "/tmp/#{Digest::SHA1.hexdigest(rand(100000).to_s)}.csv"
|
89
|
+
|
90
|
+
class String
|
91
|
+
def underscore
|
92
|
+
return self if self.nil?
|
93
|
+
return self.strip.gsub(/[^a-z0-9]+/, "_")
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
class MyParser
|
98
|
+
def initialize
|
99
|
+
end
|
100
|
+
|
101
|
+
def run
|
102
|
+
load_fx if $options[:format] == 'FX' || $options[:format] == 'RPT'
|
103
|
+
load_csv if $options[:format] == 'CSV'
|
104
|
+
end
|
105
|
+
|
106
|
+
def load_csv
|
107
|
+
# Load CSV data from input file to a temp array
|
108
|
+
csv_data = []
|
109
|
+
CSV.foreach($options[:input], :col_sep => $options[:delim], :quote_char => $options[:quote], :converters => $csv_converters) do |csv|
|
110
|
+
csv_data << csv
|
111
|
+
end
|
112
|
+
|
113
|
+
# Serialize array into a new CSV (with standard delimiter, quote) for later use with PostgreSQL
|
114
|
+
CSV.open($tmpfile, "wb", :col_sep => CSV_DEFAULT_DELIMITER, :quote_char => CSV_DEFAULT_QUOTE) do |writer|
|
115
|
+
csv_data.each do |csv|
|
116
|
+
writer << csv unless csv.empty? # performance caveat here
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
# Send to PostgreSQL
|
121
|
+
create_table_from_csv($tmpfile)
|
122
|
+
end
|
123
|
+
|
124
|
+
def load_fx
|
125
|
+
# Load data
|
126
|
+
data = IO.read($options[:input])
|
127
|
+
|
128
|
+
# Remove the leading "FEFF" char (Byte Order Mark) from the data
|
129
|
+
# Such char usually exists in .RPT file
|
130
|
+
data.gsub!(["feff".hex].pack('U*'), '')
|
131
|
+
data = data.split(/[\r\n]+/)
|
132
|
+
|
133
|
+
# Note: shift must be made in order
|
134
|
+
header = data.shift
|
135
|
+
|
136
|
+
# in case of RPT, remove the first line if it only contains the dash (-) char
|
137
|
+
if $options[:format] == 'RPT'
|
138
|
+
data.shift if data[0] =~ /^[\-\s]*$/
|
139
|
+
end
|
140
|
+
|
141
|
+
headers = header.scan(/[^\s]+\s+/)
|
142
|
+
|
143
|
+
# Parse
|
144
|
+
ranges = headers.map{|s| "a#{s.size}"}.join("")
|
145
|
+
headers.map!{|s| s.downcase.strip }
|
146
|
+
|
147
|
+
# Write
|
148
|
+
CSV.open($tmpfile, "wb", :col_sep => CSV_DEFAULT_DELIMITER, :quote_char => CSV_DEFAULT_QUOTE) do |csv|
|
149
|
+
csv << headers
|
150
|
+
data.each_with_index{|s, index|
|
151
|
+
record = s.unpack(ranges).map{|e| e.strip}
|
152
|
+
|
153
|
+
# take advantage of CSV converters
|
154
|
+
$csv_converters.each {|converter|
|
155
|
+
converter_lambda = CSV::Converters[converter]
|
156
|
+
record.map!(&converter_lambda)
|
157
|
+
}
|
158
|
+
|
159
|
+
csv << record
|
160
|
+
}
|
161
|
+
end
|
162
|
+
|
163
|
+
# Send to PostgreSQL
|
164
|
+
create_table_from_csv($tmpfile)
|
165
|
+
end
|
166
|
+
|
167
|
+
def create_table_from_csv(csv_path)
|
168
|
+
# Get headers
|
169
|
+
csv = CSV.open(csv_path, :headers => true, :col_sep => CSV_DEFAULT_DELIMITER, :quote_char => CSV_DEFAULT_QUOTE)
|
170
|
+
|
171
|
+
first = csv.first
|
172
|
+
unless first
|
173
|
+
raise "File Empty!!!"
|
174
|
+
end
|
175
|
+
|
176
|
+
# sanitize
|
177
|
+
headers = first.headers
|
178
|
+
headers.each_with_index {|e, index|
|
179
|
+
if e.nil? or e.empty?
|
180
|
+
headers[index] = "column_#{index + 1}"
|
181
|
+
end
|
182
|
+
}
|
183
|
+
headers.map!{|e| e.downcase.underscore }
|
184
|
+
|
185
|
+
# check if every field name is unique
|
186
|
+
if headers.count != headers.uniq.count
|
187
|
+
error "duplicate field name [#{headers.sort.join(', ')}]"
|
188
|
+
end
|
189
|
+
|
190
|
+
create_table_sql = headers.map{|e| "\"#{e}\" text"}.join(",")
|
191
|
+
create_table_sql = "create table #{$options[:table]}( #{create_table_sql} );"
|
192
|
+
puts create_table_sql
|
193
|
+
importcmd = %Q{
|
194
|
+
sqlite3 /tmp/i <<!
|
195
|
+
#{create_table_sql}
|
196
|
+
.headers off
|
197
|
+
.mode csv
|
198
|
+
.separator "#{$options[:delim]}"
|
199
|
+
.import #{$options[:input]} items
|
200
|
+
.separator "#{$options[:actions][:set_delim]}"
|
201
|
+
.output #{$options[:input]}
|
202
|
+
SELECT * FROM items;
|
203
|
+
!
|
204
|
+
}
|
205
|
+
|
206
|
+
`#{importcmd}`
|
207
|
+
|
208
|
+
# Clean up
|
209
|
+
File.delete(csv_path) if File.exists?(csv_path)
|
210
|
+
|
211
|
+
if $?.exitstatus != 0
|
212
|
+
puts "Something went wrong!"
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
# Run!
|
218
|
+
e = MyParser.new
|
219
|
+
e.run
|
220
|
+
|
data/lib/idata/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: idata
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nghi Pham
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-07-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -87,6 +87,7 @@ files:
|
|
87
87
|
- README.md
|
88
88
|
- README2.md
|
89
89
|
- Rakefile
|
90
|
+
- bin/icsvutils
|
90
91
|
- bin/ieval
|
91
92
|
- bin/iexport
|
92
93
|
- bin/iload
|