idata 1.2.3 → 1.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/bin/icsvutils +220 -0
  3. data/lib/idata/version.rb +1 -1
  4. metadata +3 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 79ed8b5197ad7ba0a4bbb0906b3317c194d631b0
4
- data.tar.gz: a6c339fe9f9814e22cc93b496f58e2f865f56e5f
3
+ metadata.gz: be645ca0daedeec45060ae7434adbf7d3c63a3f9
4
+ data.tar.gz: 8558bb7c6773bcec21ec21f0fc16581a82000774
5
5
  SHA512:
6
- metadata.gz: bd3a547f42022ec3ab9f986952a71c090aea052a5247e36bf4e52788f7ad10e403dce2880b73efee57c66d841aea1e14cad80dc9e1eb3987646243873a635b0d
7
- data.tar.gz: 24703e1ed62d85b53f7dd017f08814952a1239f23b138ecafc1cf059a98837e7afd82a67d0f0efd6ab186300355fc4a508d654a6a78fe14c40e13d621ffeea64
6
+ metadata.gz: e3ba4939323b57b1c6b6183879f07a9c8643ec7a671b2a7d3e5c84c6a826ac42a9e497b6722406722c9d794f1628f09b18ce1b73993b7174555a0eb944fa20fb
7
+ data.tar.gz: c506d99a5159751d6ef1664168222a18f152861bca68e7c9cafb01f183a8ea4026c8db976ac530fc0306205b7f042fc2fc26e7da85af1b95bb1785a88d7d516d
data/bin/icsvutils ADDED
@@ -0,0 +1,220 @@
1
+ #!/usr/bin/env ruby
2
+ # DATA LOADER
3
+ #
4
+ # @author Nghi Pham
5
+ # @date April 2014
6
+ #
7
+
8
+ require '../lib/idata/detector'
9
+ require 'optparse'
10
+ require 'csv'
11
+ require 'sqlite3'
12
+
13
+ begin
14
+ require 'active_record'
15
+ rescue Exception => ex
16
+ end
17
+
18
+ begin
19
+ require 'activerecord'
20
+ rescue Exception => ex
21
+ end
22
+
23
+ require 'rubygems'
24
+ require 'fileutils'
25
+
26
+ def error(msg)
27
+ puts "Error: #{msg}"
28
+ exit(-1)
29
+ end
30
+
31
+ $options = {:actions => {}}
32
+ parser = OptionParser.new("", 24) do |opts|
33
+ opts.banner = "Program: icsvutils #{1}\nAuthor: Gaugau\n\nUsage: iscvutils file [options]\n"
34
+ opts.version = 1
35
+
36
+ opts.separator ""
37
+ opts.separator "Command options:"
38
+
39
+ opts.on("--set-delim DELIMITER", "Set file delimiter") do |v|
40
+ $options[:actions][:set_delim] = v
41
+ end
42
+
43
+ opts.on("--set-encoding DELIMITER", "Set file encoding. Valid values include: UTF8, UTF16, ASCII, ISO, etc.") do |v|
44
+ $options[:actions][:set_encoding] = v
45
+ end
46
+
47
+ opts.separator ""
48
+ opts.separator "Common options:"
49
+
50
+ opts.on_tail('--help', 'Displays this help') do
51
+ puts opts, "", help
52
+ exit
53
+ end
54
+
55
+ end
56
+
57
+ parser.parse!
58
+
59
+ $options[:input] = ARGV[0]
60
+
61
+ CSV_DEFAULT_DELIMITER = ','
62
+ CSV_DEFAULT_QUOTE = '"'
63
+
64
+ if $options[:input].nil?
65
+ error "Please specify input file"
66
+ end
67
+
68
+ if File.exists?($options[:input]) && File.directory?($options[:input])
69
+ error "`#{$options[:input]}` is a directory! input must be a file"
70
+ end
71
+
72
+ unless File.exists?($options[:input])
73
+ error "file `#{$options[:input]}` not found!"
74
+ end
75
+
76
+ if $options[:actions].empty?
77
+ error "Please specify at least one action: --set-delim / --set-encoding"
78
+ end
79
+
80
+ $options[:delim] ||= Idata::Detector::new($options[:input]).find
81
+ $options[:format] ||= 'CSV'
82
+ $options[:quote] ||= CSV_DEFAULT_QUOTE
83
+ $options[:table] ||= 'items'
84
+ $options[:actions][:set_delim] ||= CSV_DEFAULT_DELIMITER
85
+
86
+ p $options
87
+
88
+ $tmpfile = "/tmp/#{Digest::SHA1.hexdigest(rand(100000).to_s)}.csv"
89
+
90
+ class String
91
+ def underscore
92
+ return self if self.nil?
93
+ return self.strip.gsub(/[^a-z0-9]+/, "_")
94
+ end
95
+ end
96
+
97
+ class MyParser
98
+ def initialize
99
+ end
100
+
101
+ def run
102
+ load_fx if $options[:format] == 'FX' || $options[:format] == 'RPT'
103
+ load_csv if $options[:format] == 'CSV'
104
+ end
105
+
106
+ def load_csv
107
+ # Load CSV data from input file to a temp array
108
+ csv_data = []
109
+ CSV.foreach($options[:input], :col_sep => $options[:delim], :quote_char => $options[:quote], :converters => $csv_converters) do |csv|
110
+ csv_data << csv
111
+ end
112
+
113
+ # Serialize array into a new CSV (with standard delimiter, quote) for later use with PostgreSQL
114
+ CSV.open($tmpfile, "wb", :col_sep => CSV_DEFAULT_DELIMITER, :quote_char => CSV_DEFAULT_QUOTE) do |writer|
115
+ csv_data.each do |csv|
116
+ writer << csv unless csv.empty? # performance caveat here
117
+ end
118
+ end
119
+
120
+ # Send to PostgreSQL
121
+ create_table_from_csv($tmpfile)
122
+ end
123
+
124
+ def load_fx
125
+ # Load data
126
+ data = IO.read($options[:input])
127
+
128
+ # Remove the leading "FEFF" char (Byte Order Mark) from the data
129
+ # Such char usually exists in .RPT file
130
+ data.gsub!(["feff".hex].pack('U*'), '')
131
+ data = data.split(/[\r\n]+/)
132
+
133
+ # Note: shift must be made in order
134
+ header = data.shift
135
+
136
+ # in case of RPT, remove the first line if it only contains the dash (-) char
137
+ if $options[:format] == 'RPT'
138
+ data.shift if data[0] =~ /^[\-\s]*$/
139
+ end
140
+
141
+ headers = header.scan(/[^\s]+\s+/)
142
+
143
+ # Parse
144
+ ranges = headers.map{|s| "a#{s.size}"}.join("")
145
+ headers.map!{|s| s.downcase.strip }
146
+
147
+ # Write
148
+ CSV.open($tmpfile, "wb", :col_sep => CSV_DEFAULT_DELIMITER, :quote_char => CSV_DEFAULT_QUOTE) do |csv|
149
+ csv << headers
150
+ data.each_with_index{|s, index|
151
+ record = s.unpack(ranges).map{|e| e.strip}
152
+
153
+ # take advantage of CSV converters
154
+ $csv_converters.each {|converter|
155
+ converter_lambda = CSV::Converters[converter]
156
+ record.map!(&converter_lambda)
157
+ }
158
+
159
+ csv << record
160
+ }
161
+ end
162
+
163
+ # Send to PostgreSQL
164
+ create_table_from_csv($tmpfile)
165
+ end
166
+
167
+ def create_table_from_csv(csv_path)
168
+ # Get headers
169
+ csv = CSV.open(csv_path, :headers => true, :col_sep => CSV_DEFAULT_DELIMITER, :quote_char => CSV_DEFAULT_QUOTE)
170
+
171
+ first = csv.first
172
+ unless first
173
+ raise "File Empty!!!"
174
+ end
175
+
176
+ # sanitize
177
+ headers = first.headers
178
+ headers.each_with_index {|e, index|
179
+ if e.nil? or e.empty?
180
+ headers[index] = "column_#{index + 1}"
181
+ end
182
+ }
183
+ headers.map!{|e| e.downcase.underscore }
184
+
185
+ # check if every field name is unique
186
+ if headers.count != headers.uniq.count
187
+ error "duplicate field name [#{headers.sort.join(', ')}]"
188
+ end
189
+
190
+ create_table_sql = headers.map{|e| "\"#{e}\" text"}.join(",")
191
+ create_table_sql = "create table #{$options[:table]}( #{create_table_sql} );"
192
+ puts create_table_sql
193
+ importcmd = %Q{
194
+ sqlite3 /tmp/i <<!
195
+ #{create_table_sql}
196
+ .headers off
197
+ .mode csv
198
+ .separator "#{$options[:delim]}"
199
+ .import #{$options[:input]} items
200
+ .separator "#{$options[:actions][:set_delim]}"
201
+ .output #{$options[:input]}
202
+ SELECT * FROM items;
203
+ !
204
+ }
205
+
206
+ `#{importcmd}`
207
+
208
+ # Clean up
209
+ File.delete(csv_path) if File.exists?(csv_path)
210
+
211
+ if $?.exitstatus != 0
212
+ puts "Something went wrong!"
213
+ end
214
+ end
215
+ end
216
+
217
+ # Run!
218
+ e = MyParser.new
219
+ e.run
220
+
data/lib/idata/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Idata
2
- VERSION = "1.2.3"
2
+ VERSION = "1.2.4"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: idata
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.3
4
+ version: 1.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nghi Pham
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-15 00:00:00.000000000 Z
11
+ date: 2016-07-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -87,6 +87,7 @@ files:
87
87
  - README.md
88
88
  - README2.md
89
89
  - Rakefile
90
+ - bin/icsvutils
90
91
  - bin/ieval
91
92
  - bin/iexport
92
93
  - bin/iload