idata 1.2.3 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/bin/icsvutils +220 -0
  3. data/lib/idata/version.rb +1 -1
  4. metadata +3 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 79ed8b5197ad7ba0a4bbb0906b3317c194d631b0
4
- data.tar.gz: a6c339fe9f9814e22cc93b496f58e2f865f56e5f
3
+ metadata.gz: be645ca0daedeec45060ae7434adbf7d3c63a3f9
4
+ data.tar.gz: 8558bb7c6773bcec21ec21f0fc16581a82000774
5
5
  SHA512:
6
- metadata.gz: bd3a547f42022ec3ab9f986952a71c090aea052a5247e36bf4e52788f7ad10e403dce2880b73efee57c66d841aea1e14cad80dc9e1eb3987646243873a635b0d
7
- data.tar.gz: 24703e1ed62d85b53f7dd017f08814952a1239f23b138ecafc1cf059a98837e7afd82a67d0f0efd6ab186300355fc4a508d654a6a78fe14c40e13d621ffeea64
6
+ metadata.gz: e3ba4939323b57b1c6b6183879f07a9c8643ec7a671b2a7d3e5c84c6a826ac42a9e497b6722406722c9d794f1628f09b18ce1b73993b7174555a0eb944fa20fb
7
+ data.tar.gz: c506d99a5159751d6ef1664168222a18f152861bca68e7c9cafb01f183a8ea4026c8db976ac530fc0306205b7f042fc2fc26e7da85af1b95bb1785a88d7d516d
data/bin/icsvutils ADDED
@@ -0,0 +1,220 @@
1
+ #!/usr/bin/env ruby
2
+ # DATA LOADER
3
+ #
4
+ # @author Nghi Pham
5
+ # @date April 2014
6
+ #
7
+
8
+ require '../lib/idata/detector'
9
+ require 'optparse'
10
+ require 'csv'
11
+ require 'sqlite3'
12
+
13
+ begin
14
+ require 'active_record'
15
+ rescue Exception => ex
16
+ end
17
+
18
+ begin
19
+ require 'activerecord'
20
+ rescue Exception => ex
21
+ end
22
+
23
+ require 'rubygems'
24
+ require 'fileutils'
25
+
26
+ def error(msg)
27
+ puts "Error: #{msg}"
28
+ exit(-1)
29
+ end
30
+
31
+ $options = {:actions => {}}
32
+ parser = OptionParser.new("", 24) do |opts|
33
+ opts.banner = "Program: icsvutils #{1}\nAuthor: Gaugau\n\nUsage: iscvutils file [options]\n"
34
+ opts.version = 1
35
+
36
+ opts.separator ""
37
+ opts.separator "Command options:"
38
+
39
+ opts.on("--set-delim DELIMITER", "Set file delimiter") do |v|
40
+ $options[:actions][:set_delim] = v
41
+ end
42
+
43
+ opts.on("--set-encoding DELIMITER", "Set file encoding. Valid values include: UTF8, UTF16, ASCII, ISO, etc.") do |v|
44
+ $options[:actions][:set_encoding] = v
45
+ end
46
+
47
+ opts.separator ""
48
+ opts.separator "Common options:"
49
+
50
+ opts.on_tail('--help', 'Displays this help') do
51
+ puts opts, "", help
52
+ exit
53
+ end
54
+
55
+ end
56
+
57
+ parser.parse!
58
+
59
+ $options[:input] = ARGV[0]
60
+
61
+ CSV_DEFAULT_DELIMITER = ','
62
+ CSV_DEFAULT_QUOTE = '"'
63
+
64
+ if $options[:input].nil?
65
+ error "Please specify input file"
66
+ end
67
+
68
+ if File.exists?($options[:input]) && File.directory?($options[:input])
69
+ error "`#{$options[:input]}` is a directory! input must be a file"
70
+ end
71
+
72
+ unless File.exists?($options[:input])
73
+ error "file `#{$options[:input]}` not found!"
74
+ end
75
+
76
+ if $options[:actions].empty?
77
+ error "Please specify at least one action: --set-delim / --set-encoding"
78
+ end
79
+
80
+ $options[:delim] ||= Idata::Detector::new($options[:input]).find
81
+ $options[:format] ||= 'CSV'
82
+ $options[:quote] ||= CSV_DEFAULT_QUOTE
83
+ $options[:table] ||= 'items'
84
+ $options[:actions][:set_delim] ||= CSV_DEFAULT_DELIMITER
85
+
86
+ p $options
87
+
88
+ $tmpfile = "/tmp/#{Digest::SHA1.hexdigest(rand(100000).to_s)}.csv"
89
+
90
+ class String
91
+ def underscore
92
+ return self if self.nil?
93
+ return self.strip.gsub(/[^a-z0-9]+/, "_")
94
+ end
95
+ end
96
+
97
+ class MyParser
98
+ def initialize
99
+ end
100
+
101
+ def run
102
+ load_fx if $options[:format] == 'FX' || $options[:format] == 'RPT'
103
+ load_csv if $options[:format] == 'CSV'
104
+ end
105
+
106
+ def load_csv
107
+ # Load CSV data from input file to a temp array
108
+ csv_data = []
109
+ CSV.foreach($options[:input], :col_sep => $options[:delim], :quote_char => $options[:quote], :converters => $csv_converters) do |csv|
110
+ csv_data << csv
111
+ end
112
+
113
+ # Serialize array into a new CSV (with standard delimiter, quote) for later use with PostgreSQL
114
+ CSV.open($tmpfile, "wb", :col_sep => CSV_DEFAULT_DELIMITER, :quote_char => CSV_DEFAULT_QUOTE) do |writer|
115
+ csv_data.each do |csv|
116
+ writer << csv unless csv.empty? # performance caveat here
117
+ end
118
+ end
119
+
120
+ # Send to PostgreSQL
121
+ create_table_from_csv($tmpfile)
122
+ end
123
+
124
+ def load_fx
125
+ # Load data
126
+ data = IO.read($options[:input])
127
+
128
+ # Remove the leading "FEFF" char (Byte Order Mark) from the data
129
+ # Such char usually exists in .RPT file
130
+ data.gsub!(["feff".hex].pack('U*'), '')
131
+ data = data.split(/[\r\n]+/)
132
+
133
+ # Note: shift must be made in order
134
+ header = data.shift
135
+
136
+ # in case of RPT, remove the first line if it only contains the dash (-) char
137
+ if $options[:format] == 'RPT'
138
+ data.shift if data[0] =~ /^[\-\s]*$/
139
+ end
140
+
141
+ headers = header.scan(/[^\s]+\s+/)
142
+
143
+ # Parse
144
+ ranges = headers.map{|s| "a#{s.size}"}.join("")
145
+ headers.map!{|s| s.downcase.strip }
146
+
147
+ # Write
148
+ CSV.open($tmpfile, "wb", :col_sep => CSV_DEFAULT_DELIMITER, :quote_char => CSV_DEFAULT_QUOTE) do |csv|
149
+ csv << headers
150
+ data.each_with_index{|s, index|
151
+ record = s.unpack(ranges).map{|e| e.strip}
152
+
153
+ # take advantage of CSV converters
154
+ $csv_converters.each {|converter|
155
+ converter_lambda = CSV::Converters[converter]
156
+ record.map!(&converter_lambda)
157
+ }
158
+
159
+ csv << record
160
+ }
161
+ end
162
+
163
+ # Send to PostgreSQL
164
+ create_table_from_csv($tmpfile)
165
+ end
166
+
167
+ def create_table_from_csv(csv_path)
168
+ # Get headers
169
+ csv = CSV.open(csv_path, :headers => true, :col_sep => CSV_DEFAULT_DELIMITER, :quote_char => CSV_DEFAULT_QUOTE)
170
+
171
+ first = csv.first
172
+ unless first
173
+ raise "File Empty!!!"
174
+ end
175
+
176
+ # sanitize
177
+ headers = first.headers
178
+ headers.each_with_index {|e, index|
179
+ if e.nil? or e.empty?
180
+ headers[index] = "column_#{index + 1}"
181
+ end
182
+ }
183
+ headers.map!{|e| e.downcase.underscore }
184
+
185
+ # check if every field name is unique
186
+ if headers.count != headers.uniq.count
187
+ error "duplicate field name [#{headers.sort.join(', ')}]"
188
+ end
189
+
190
+ create_table_sql = headers.map{|e| "\"#{e}\" text"}.join(",")
191
+ create_table_sql = "create table #{$options[:table]}( #{create_table_sql} );"
192
+ puts create_table_sql
193
+ importcmd = %Q{
194
+ sqlite3 /tmp/i <<!
195
+ #{create_table_sql}
196
+ .headers off
197
+ .mode csv
198
+ .separator "#{$options[:delim]}"
199
+ .import #{$options[:input]} items
200
+ .separator "#{$options[:actions][:set_delim]}"
201
+ .output #{$options[:input]}
202
+ SELECT * FROM items;
203
+ !
204
+ }
205
+
206
+ `#{importcmd}`
207
+
208
+ # Clean up
209
+ File.delete(csv_path) if File.exists?(csv_path)
210
+
211
+ if $?.exitstatus != 0
212
+ puts "Something went wrong!"
213
+ end
214
+ end
215
+ end
216
+
217
+ # Run!
218
+ e = MyParser.new
219
+ e.run
220
+
data/lib/idata/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Idata
2
- VERSION = "1.2.3"
2
+ VERSION = "1.2.4"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: idata
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.3
4
+ version: 1.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nghi Pham
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-15 00:00:00.000000000 Z
11
+ date: 2016-07-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -87,6 +87,7 @@ files:
87
87
  - README.md
88
88
  - README2.md
89
89
  - Rakefile
90
+ - bin/icsvutils
90
91
  - bin/ieval
91
92
  - bin/iexport
92
93
  - bin/iload