idata 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 27e594b49b95b536a5d01ed6f90155751104ec1a
4
+ data.tar.gz: a4e52832bccdc2ab55ae33e7e6f659857b800991
5
+ SHA512:
6
+ metadata.gz: f55f15be3fbc959bcfacdb68f2b45e5b97fa560ee953890a66eb5a4715366019240e61c1b9ef36df9ed77d0ba396d6e38b32177a555d75aeb4f0deb04ae99757
7
+ data.tar.gz: 0e8061590a8d5da84ee9f74d137c2f6d022a58009d9d56ca965241c10e064a9cde59fb1f0d784cab643d88d19c1cdbe3a2f688ce2bbfcc42f4cd50b14e2bcfda
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in idata.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 TODO: Write your name
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Idata
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'idata'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install idata
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/bin/ieval ADDED
@@ -0,0 +1,222 @@
1
+ # DATA LOADER
2
+ #
3
+ # @author Nghi Pham
4
+ # @date April 2014
5
+ #
6
+ # The script loads data from a fixed-width text file or a CSV file and fills in
7
+ # a corresponding table in the specified database
8
+ # Issue ruby load.rb --help for guideline/examples
9
+ #
10
+ # @todo Make the script OOP
11
+ # @todo Constantize default values
12
+ require 'optparse'
13
+ require 'csv'
14
+ require 'active_record'
15
+ require 'rubygems'
16
+ require 'digest/sha1'
17
+ require 'fileutils'
18
+
19
+ # Default configuration settings
20
+ POSTGRESQL_PORT = 5432
21
+
22
+ # Parse script inputs
23
+ $options = {}
24
+ parser = OptionParser.new("", 24) do |opts|
25
+ opts.banner = "\nProgram: Data Loader\nAuthor: MCKI\n\n"
26
+
27
+ opts.on("-e", "--eval SCRIPT", "Ruby SCRIPT to execute") do |v|
28
+ $options[:eval] = v
29
+ end
30
+
31
+ opts.on("-t", "--table TABLE", "Table name to be created") do |v|
32
+ $options[:table] = v
33
+ end
34
+
35
+ opts.on("-h", "--host HOST", "PostgreSQL host") do |v|
36
+ $options[:host] = v
37
+ end
38
+
39
+ opts.on("-d", "--database DATABASE", "PostgreSQL database") do |v|
40
+ $options[:database] = v
41
+ end
42
+
43
+ opts.on("-u", "--username USER", "PostgreSQL username") do |v|
44
+ $options[:username] = v
45
+ end
46
+
47
+ opts.on("-p", "--password PASSWORD", "PostgreSQL password") do |v|
48
+ $options[:password] = v
49
+ end
50
+
51
+ opts.on("-l", "--listen PORT", "PostgreSQL listen port (default to 5432)") do |v|
52
+ $options[:listen] = v
53
+ end
54
+
55
+ opts.on_tail('--help', 'Displays this help') do
56
+ puts opts, "", help
57
+ exit
58
+ end
59
+ end
60
+
61
+ # Help message
62
+ def help
63
+ return <<-eos
64
+
65
+ EXAMPLES
66
+ -------------------------------------------------------
67
+ Run a custom Ruby command on the every record represented by the `item` variable
68
+
69
+ ruby eval.rb --host=localhost --username=postgres --password=postgres \\
70
+ --table=vendors --database=db \\
71
+ --eval="item.last_name = item.full_name.split(/\\s+/).last"
72
+
73
+ eos
74
+ end
75
+
76
+ begin
77
+ parser.parse!
78
+ rescue SystemExit => ex
79
+ exit
80
+ end
81
+
82
+ # Certain arguments are required
83
+ if $options[:eval].nil?
84
+ puts "\nPlease specify Ruby script to execute: -e\n\n"
85
+ exit
86
+ end
87
+
88
+ if $options[:table].nil?
89
+ puts "\nPlease specify SQL table name: -t\n\n"
90
+ exit
91
+ end
92
+
93
+ if $options[:host].nil?
94
+ puts "\nPlease specify host name: -h\n\n"
95
+ exit
96
+ end
97
+
98
+ if $options[:database].nil?
99
+ puts "\nPlease specify PostgreSQL database name: -d\n\n"
100
+ exit
101
+ end
102
+
103
+ if $options[:username].nil?
104
+ puts "\nPlease specify PostgreSQL username: -d\n\n"
105
+ exit
106
+ end
107
+
108
+ # Default in case arguments are not provided
109
+ $options[:listen] ||= POSTGRESQL_PORT
110
+
111
+ # Connect to PostgreSQL
112
+ # $options = {host: 'localhost', database: 'db', username: 'postgres', password: 'postgres', table: 'products', listen: 5432}
113
+ ActiveRecord::Base.establish_connection(
114
+ 'adapter' => 'postgresql',
115
+ 'host' => $options[:host],
116
+ 'database' => $options[:database],
117
+ 'username' => $options[:username],
118
+ 'password' => $options[:password],
119
+ 'port' => $options[:listen],
120
+ 'timeout' => 15000
121
+ )
122
+
123
+ class Product < ActiveRecord::Base
124
+ self.primary_key = :id
125
+ self.table_name = $options[:table]
126
+ end
127
+
128
+ # Temp file & temp table name
129
+ $tmpfile = "/tmp/#{Digest::SHA1.hexdigest(rand(100000).to_s)}.csv"
130
+ $tmpname = "_tmp_#{Digest::SHA1.hexdigest(rand(100000).to_s)}"
131
+
132
+ # Check if PostgreSQL host is local
133
+ def local?
134
+ return ['localhost', '127.0.0.1'].include?($options[:host])
135
+ end
136
+
137
+ # Execute query
138
+ def query(*query_str)
139
+ ActiveRecord::Base.connection.execute(query_str.join("; "))
140
+ end
141
+
142
+ # Create table from CSV
143
+ def create_table_from_csv(name, csv_path)
144
+ # Get headers
145
+ csv = CSV.open(csv_path, :headers => true)
146
+ first = csv.first
147
+ unless first
148
+ raise "File Empty!!!"
149
+ end
150
+
151
+ # sanitize
152
+ headers = first.headers
153
+ headers.each_with_index {|e, index|
154
+ if e.nil? or e.empty?
155
+ headers[index] = "column_#{index + 1}"
156
+ end
157
+ }
158
+ headers.map!{|e| e.downcase.underscore }
159
+
160
+ # check if every field name is unique
161
+ if headers.count != headers.uniq.count
162
+ raise "Field name must be UNIQUE: \nPlease check your input headers: [#{headers.join(', ')}]"
163
+ end
164
+
165
+ # Create table
166
+ create_table_sql = headers.map{|e| e == 'id' ? "\"#{e}\" integer" : "\"#{e}\" text"}.join(",")
167
+ create_table_sql = "drop table if exists #{name}; create table #{name}( #{create_table_sql} );"
168
+ query(create_table_sql)
169
+
170
+ # Dump data
171
+ insert_data_sql = headers.map{|e| "\"#{e}\""}.join(",")
172
+ insert_data_sql = "COPY #{name}( #{insert_data_sql} ) FROM '#{csv_path}' DELIMITER ',' CSV HEADER;"
173
+
174
+ # Change output file permission so that postgres user can read it
175
+ begin
176
+ FileUtils.chmod 0755, csv_path
177
+ rescue Exception => ex
178
+ puts "Error while changing file permission"
179
+ end
180
+
181
+ if local?
182
+ query(insert_data_sql)
183
+ else
184
+ puts "\nWARNING: pushing data to remote server [#{$options[:host]}].\nBe sure you have the correct version of `psql` command installed\n\n"
185
+ insert_data_sql = "PGPASSWORD=#{$options[:username]} psql -U #{$options[:username]} -h #{$options[:host]} -p #{$options[:listen]} #{$options[:database]} -c \"\\#{insert_data_sql}\""
186
+
187
+ `#{insert_data_sql}`
188
+ `PGPASSWORD=""`
189
+ end
190
+ end
191
+
192
+ if !local? and $options[:password].nil?
193
+ puts "\n\nERROR: You are connecting to a remote server\nPlease make sure you specify SQL password: --password \n\n"
194
+ exit
195
+ end
196
+
197
+ fields_changed = []
198
+
199
+ CSV.open($tmpfile, "wb", :col_sep => ',') do |csv|
200
+ csv << Product.first.attributes.keys
201
+
202
+ Product.all.each do |item|
203
+ # Execute the script provided on every record
204
+ eval($options[:eval])
205
+ fields_changed += item.changes.keys
206
+ fields_changed.uniq!
207
+ csv << item.attributes.values
208
+ end
209
+ end
210
+
211
+ unless fields_changed.empty?
212
+ create_table_from_csv($tmpname, $tmpfile)
213
+ update_sql = fields_changed.map{|f| "\"#{f}\" = tmp.\"#{f}\""}.join(", ")
214
+ update_sql = "UPDATE #{$options[:table]} origin SET #{update_sql} FROM #{$tmpname} tmp WHERE origin.id = tmp.id"
215
+ query(update_sql)
216
+
217
+ cleanup_sql = "DROP TABLE #{$tmpname};"
218
+ query(cleanup_sql)
219
+
220
+ puts "\nDone\n\n"
221
+ end
222
+
data/bin/iexport ADDED
@@ -0,0 +1,163 @@
1
+ # DATA LOADER
2
+ #
3
+ # @author Nghi Pham
4
+ # @date April 2014
5
+ #
6
+ # The script loads data from a fixed-width text file or a CSV file and fills in
7
+ # a corresponding table in the specified database
8
+ # Issue ruby load.rb --help for guideline/examples
9
+ #
10
+ require 'optparse'
11
+ require 'csv'
12
+ require 'active_record'
13
+ require 'rubygems'
14
+ require 'digest/sha1'
15
+ require 'fileutils'
16
+
17
+ SUPPORTED_INPUT_FORMATS = ['CSV']
18
+ POSTGRESQL_PORT = 5432
19
+ CSV_DEFAULT_DELIMITER = ','
20
+
21
+ $options = {}
22
+ parser = OptionParser.new("", 24) do |opts|
23
+ opts.banner = "\nProgram: Data Exporter\nAuthor: MCKI\n\n"
24
+
25
+ opts.on("-f", "--format FORMAT", "Output file format") do |v|
26
+ $options[:format] = v
27
+ end
28
+
29
+ opts.on("--delim DELIMITER", "CSV delimiter") do |v|
30
+ $options[:delim] = v
31
+ end
32
+
33
+ opts.on("-t", "--table TABLE", "Table name to be created") do |v|
34
+ $options[:table] = v
35
+ end
36
+
37
+ opts.on("-s", "--select FIELDS", "Fields to export") do |v|
38
+ $options[:select] = v
39
+ end
40
+
41
+ opts.on("-w", "--where CONDITION", "CONDITION") do |v|
42
+ $options[:where] = v
43
+ end
44
+
45
+ opts.on("-o", "--output FILE", "Output file") do |v|
46
+ $options[:output] = v
47
+ end
48
+
49
+ opts.on("-h", "--host HOST", "PostgreSQL host") do |v|
50
+ $options[:host] = v
51
+ end
52
+
53
+ opts.on("-d", "--database DATABASE", "PostgreSQL database") do |v|
54
+ $options[:database] = v
55
+ end
56
+
57
+ opts.on("-u", "--username USER", "PostgreSQL username") do |v|
58
+ $options[:username] = v
59
+ end
60
+
61
+ opts.on("-p", "--password PASSWORD", "PostgreSQL password") do |v|
62
+ $options[:password] = v
63
+ end
64
+
65
+ opts.on("-l", "--listen PORT", "PostgreSQL listen port (default to 5432)") do |v|
66
+ $options[:listen] = v
67
+ end
68
+
69
+ opts.on_tail('--help', 'Displays this help') do
70
+ puts opts, "", help
71
+ exit
72
+ end
73
+ end
74
+
75
+ def help
76
+ return <<-eos
77
+
78
+ EXAMPLES
79
+ -------------------------------------------------------
80
+
81
+ ruby export.rb --host=localhost --username=postgres --database=db --table=items \\
82
+ --output=/tmp/data.csv --format=csv --delim="\\t" \\
83
+ --select="id, name AS vendor_name, age" --where="active = 1"
84
+
85
+ eos
86
+ end
87
+
88
+ begin
89
+ parser.parse!
90
+ rescue SystemExit => ex
91
+ exit
92
+ end
93
+
94
+ if $options[:format].nil?
95
+ puts "\nPlease specify input file format: -f\n\n"
96
+ exit
97
+ end
98
+
99
+ # downcase for consistency
100
+ $options[:format].upcase!
101
+
102
+ if !SUPPORTED_INPUT_FORMATS.include?($options[:format])
103
+ puts "\nInvalid input file format, supported formats are: #{SUPPORTED_INPUT_FORMATS.join(', ')}\n\n"
104
+ exit
105
+ end
106
+
107
+ if $options[:table].nil?
108
+ puts "\nPlease specify SQL table name: -t\n\n"
109
+ exit
110
+ end
111
+
112
+ if $options[:output].nil?
113
+ puts "\nPlease specify output file: -o\n\n"
114
+ exit
115
+ end
116
+
117
+ if $options[:host].nil?
118
+ puts "\nPlease specify host name: -h\n\n"
119
+ exit
120
+ end
121
+
122
+ if $options[:database].nil?
123
+ puts "\nPlease specify PostgreSQL database name: -d\n\n"
124
+ exit
125
+ end
126
+
127
+ if $options[:username].nil?
128
+ puts "\nPlease specify PostgreSQL username: -d\n\n"
129
+ exit
130
+ end
131
+
132
+ # Default value
133
+ $options[:listen] ||= POSTGRESQL_PORT
134
+ $options[:delim] ||= CSV_DEFAULT_DELIMITER
135
+ $options[:delim] = "\t" if $options[:delim] == '\t'
136
+ $options[:select] ||= '*'
137
+ $options[:where] ||= 'true'
138
+
139
+ #$options = {host: 'localhost', database: 'db', username: 'postgres', password: 'postgres', table: 'products', listen: 5432}
140
+ ActiveRecord::Base.establish_connection(
141
+ 'adapter' => 'postgresql',
142
+ 'host' => $options[:host],
143
+ 'database' => $options[:database],
144
+ 'username' => $options[:username],
145
+ 'password' => $options[:password],
146
+ 'port' => $options[:listen],
147
+ 'timeout' => 15000
148
+ )
149
+
150
+ class Product < ActiveRecord::Base
151
+ self.table_name = $options[:table]
152
+ end
153
+
154
+ CSV.open($options[:output], "wb", :col_sep => $options[:delim]) do |csv|
155
+ scope = Product.select($options[:select]).where($options[:where])
156
+ csv << scope.first.attributes.keys
157
+
158
+ scope.each do |item|
159
+ csv << item.attributes.values
160
+ end
161
+ end
162
+
163
+ puts "\nFile #{$options[:output]} created!\n\n"
data/bin/iload ADDED
@@ -0,0 +1,276 @@
1
+ # DATA LOADER
2
+ #
3
+ # @author Nghi Pham
4
+ # @date April 2014
5
+ #
6
+ # The script loads data from a fixed-width text file or a CSV file and fills in
7
+ # a corresponding table in the specified database
8
+ # Issue ruby load.rb --help for guideline/examples
9
+ #
10
+ require 'optparse'
11
+ require 'csv'
12
+ require 'active_record'
13
+ require 'rubygems'
14
+ require 'digest/sha1'
15
+ require 'fileutils'
16
+
17
+ SUPPORTED_INPUT_FORMATS = ['CSV', 'FX']
18
+ POSTGRESQL_PORT = 5432
19
+ CSV_DEFAULT_DELIMITER = ','
20
+
21
+ $options = {}
22
+ parser = OptionParser.new("", 24) do |opts|
23
+ opts.banner = "\nProgram: Data Loader\nAuthor: MCKI\n\n"
24
+
25
+ opts.on("-i", "--input INPUT", "INPUT text file (fixed-width)") do |v|
26
+ $options[:input] = v
27
+ end
28
+
29
+ opts.on("-f", "--format FORMAT", "Input file format, available values include (CSV|FX) ") do |v|
30
+ $options[:format] = v
31
+ end
32
+
33
+ opts.on("--delim DELIMITER", "Field DELIMITER (for CSV format only - default to COMMA ',')") do |v|
34
+ $options[:delim] = v
35
+ end
36
+
37
+ # opts.on("-o", "--output CSV", "Temporary CSV output file") do |v|
38
+ # $options[:output] = v
39
+ # end
40
+
41
+ opts.on("-t", "--table TABLE", "Table name to be created") do |v|
42
+ $options[:table] = v
43
+ end
44
+
45
+ opts.on("-h", "--host HOST", "PostgreSQL host") do |v|
46
+ $options[:host] = v
47
+ end
48
+
49
+ opts.on("-d", "--database DATABASE", "PostgreSQL database") do |v|
50
+ $options[:database] = v
51
+ end
52
+
53
+ opts.on("-u", "--username USER", "PostgreSQL username") do |v|
54
+ $options[:username] = v
55
+ end
56
+
57
+ opts.on("-p", "--password PASSWORD", "PostgreSQL password") do |v|
58
+ $options[:password] = v
59
+ end
60
+
61
+ opts.on("-l", "--listen PORT", "PostgreSQL listen port (default to 5432)") do |v|
62
+ $options[:listen] = v
63
+ end
64
+
65
+ opts.on_tail('--help', 'Displays this help') do
66
+ puts opts, "", help
67
+ exit
68
+ end
69
+ end
70
+
71
+ def help
72
+ return <<-eos
73
+
74
+ EXAMPLES
75
+ -------------------------------------------------------
76
+ Load data from text file and store to a table name "vendors"
77
+
78
+ ruby load.rb --host=localhost --username=postgres --password=postgres \\
79
+ --input=/home/administrator/VendorMaster.txt --format=fx \\
80
+ --table=vendors --database=db
81
+
82
+
83
+ The same usage in short form:
84
+
85
+ ruby load.rb -h localhost -u postgres -p postgres \\
86
+ -i /home/administrator/VendorMaster.txt -f fx \\
87
+ -t vendors -d db
88
+
89
+ eos
90
+ end
91
+
92
+ begin
93
+ parser.parse!
94
+ rescue SystemExit => ex
95
+ exit
96
+ end
97
+
98
+ if $options[:input].nil?
99
+ puts "\nPlease specify input file: -i\n\n"
100
+ exit
101
+ end
102
+
103
+ if $options[:format].nil?
104
+ puts "\nPlease specify input file format: -f\n\n"
105
+ exit
106
+ end
107
+
108
+ # downcase for consistency
109
+ $options[:format].upcase!
110
+
111
+ if !SUPPORTED_INPUT_FORMATS.include?($options[:format])
112
+ puts "\nInvalid input file format, supported formats are: #{SUPPORTED_INPUT_FORMATS.join(', ')}\n\n"
113
+ exit
114
+ end
115
+
116
+ if $options[:table].nil?
117
+ puts "\nPlease specify table name: -t\n\n"
118
+ exit
119
+ end
120
+
121
+ unless File.exists?($options[:input])
122
+ puts "\nFile does not exist"
123
+ exit
124
+ end
125
+
126
+ if $options[:host].nil?
127
+ puts "\nPlease specify host name: -h\n\n"
128
+ exit
129
+ end
130
+
131
+ if $options[:database].nil?
132
+ puts "\nPlease specify PostgreSQL database name: -d\n\n"
133
+ exit
134
+ end
135
+
136
+ if $options[:username].nil?
137
+ puts "\nPlease specify PostgreSQL username: -d\n\n"
138
+ exit
139
+ end
140
+
141
+ # Default value
142
+ $options[:listen] ||= POSTGRESQL_PORT
143
+ $options[:delim] ||= CSV_DEFAULT_DELIMITER
144
+ $options[:delim] = "\t" if $options[:delim] == '\t'
145
+ $tmpfile = "/tmp/#{Digest::SHA1.hexdigest(rand(100000).to_s)}.csv"
146
+
147
+ # Database dump
148
+ ActiveRecord::Base.establish_connection(
149
+ 'adapter' => 'postgresql',
150
+ 'host' => $options[:host],
151
+ 'database' => $options[:database],
152
+ 'username' => $options[:username],
153
+ 'password' => $options[:password],
154
+ 'port' => $options[:listen],
155
+ 'timeout' => 15000
156
+ )
157
+
158
+ class String
159
+ def underscore
160
+ return self if self.nil?
161
+ return self.strip.gsub(/[^a-z0-9]+/, "_")
162
+ end
163
+ end
164
+
165
+ class MyParser
166
+ def initialize
167
+ # remote server always requires password
168
+ if !local? and $options[:password].nil?
169
+ raise "You are connecting to a remote server\nPlease make sure you specify SQL password: --password "
170
+ end
171
+ end
172
+
173
+ def run
174
+ load_fx if $options[:format] == 'FX'
175
+ load_csv if $options[:format] == 'CSV'
176
+ end
177
+
178
+ def load_csv
179
+ # Copy file to /tmp/ folder to prevent Permission Error
180
+ FileUtils.cp $options[:input], $tmpfile
181
+ create_table_from_csv($tmpfile)
182
+ end
183
+
184
+ def load_fx
185
+ # Load data
186
+ data = IO.read($options[:input]).split("\n")
187
+ header = data.shift
188
+ headers = header.scan(/[^\s]+\s+/)
189
+
190
+ # Parse
191
+ ranges = headers.map{|s| "a#{s.size}"}.join("")
192
+ headers.map!{|s| s.downcase.strip }
193
+
194
+ # Write
195
+ CSV.open($tmpfile, "wb", :col_sep => CSV_DEFAULT_DELIMITER, quote_char: "\b") do |csv|
196
+ csv << headers
197
+
198
+ data.each_with_index{|s, index|
199
+ record = s.unpack(ranges).map{|e| e.strip}
200
+ csv << record
201
+ }
202
+ end
203
+
204
+ create_table_from_csv($tmpfile)
205
+ end
206
+
207
+ def create_table_from_csv(csv_path)
208
+ # Get headers
209
+ csv = CSV.open(csv_path, :headers => true, :col_sep => $options[:delim], quote_char: "\b")
210
+
211
+ first = csv.first
212
+ unless first
213
+ raise "File Empty!!!"
214
+ end
215
+
216
+ # sanitize
217
+ headers = first.headers
218
+ headers.each_with_index {|e, index|
219
+ if e.nil? or e.empty?
220
+ headers[index] = "column_#{index + 1}"
221
+ end
222
+ }
223
+ headers.map!{|e| e.downcase.underscore }
224
+
225
+ # check if every field name is unique
226
+ if headers.count != headers.uniq.count
227
+ raise "Field name must be UNIQUE: \nPlease check your input headers: [#{headers.sort.join(', ')}]"
228
+ end
229
+
230
+ # Create table
231
+ create_table_sql = headers.map{|e| "\"#{e}\" text"}.join(",")
232
+ create_table_sql = "drop table if exists #{$options[:table]}; create table #{$options[:table]}( id serial not null, #{create_table_sql} );"
233
+ query(create_table_sql)
234
+
235
+ # Dump data
236
+ pg_tab = "E'\t'" if $options[:delim] == "\t"
237
+ insert_data_sql = headers.map{|e| "\"#{e}\""}.join(",")
238
+ insert_data_sql = "COPY #{$options[:table]}( #{insert_data_sql} ) FROM '#{csv_path}' DELIMITER #{pg_tab} CSV HEADER QUOTE E'\b';"
239
+
240
+ # Change output file permission so that postgres user can read it
241
+ begin
242
+ FileUtils.chmod 0755, csv_path
243
+ rescue Exception => ex
244
+ puts "Error while changing file permission"
245
+ end
246
+
247
+ if local?
248
+ query(insert_data_sql)
249
+ else
250
+ puts "\nWARNING: pushing data to remote server [#{$options[:host]}].\nBe sure you have the correct version of `psql` command installed\n\n"
251
+ insert_data_sql = "PGPASSWORD=#{$options[:username]} psql -U #{$options[:username]} -h #{$options[:host]} -p #{$options[:listen]} #{$options[:database]} -c \"\\#{insert_data_sql}\""
252
+
253
+ `#{insert_data_sql}`
254
+ `PGPASSWORD=""`
255
+ end
256
+
257
+ puts "\nTable `#{$options[:table]}` loaded \n\n"
258
+ end
259
+
260
+ private
261
+ def query(*query_str)
262
+ ActiveRecord::Base.connection.execute(query_str.join("; "))
263
+ end
264
+
265
+ def local?
266
+ return ['localhost', '127.0.0.1'].include?($options[:host])
267
+ end
268
+ end
269
+
270
+ begin
271
+ e = MyParser.new
272
+ e.run
273
+ rescue Exception => ex
274
+ puts "\n\n*********** ERROR ***********\n#{ex.message}\n\n"
275
+ exit
276
+ end
data/bin/ipatch ADDED
@@ -0,0 +1,107 @@
1
+ # DATA PATCHER
2
+ #
3
+ # @author Nghi Pham
4
+ # @date April 2014
5
+ #
6
+ # Any custom patches to the database goes here
7
+ #
8
+ require 'optparse'
9
+ require 'csv'
10
+ require 'active_record'
11
+ require 'rubygems'
12
+
13
+ # Default values
14
+ POSTGRESQL_PORT = 5432
15
+
16
+ # User input
17
+ $options = {}
18
+ parser = OptionParser.new("", 24) do |opts|
19
+ opts.banner = "\nProgram: Data Patcher\nAuthor: MCKI\n\n"
20
+
21
+ opts.on("-q", "--query QUERY", "Custom query") do |v|
22
+ $options[:query] = v
23
+ end
24
+
25
+ opts.on("-h", "--host HOST", "PostgreSQL host") do |v|
26
+ $options[:host] = v
27
+ end
28
+
29
+ opts.on("-d", "--database DATABASE", "PostgreSQL database") do |v|
30
+ $options[:database] = v
31
+ end
32
+
33
+ opts.on("-u", "--username USER", "PostgreSQL username") do |v|
34
+ $options[:username] = v
35
+ end
36
+
37
+ opts.on("-p", "--password PASSWORD", "PostgreSQL password") do |v|
38
+ $options[:password] = v
39
+ end
40
+
41
+ opts.on("-l", "--listen PORT", "PostgreSQL listen port (default to 5432)") do |v|
42
+ $options[:listen] = v
43
+ end
44
+
45
+ opts.on_tail('--help', 'Displays this help') do
46
+ puts opts, "", help
47
+ exit
48
+ end
49
+ end
50
+
51
+ def help
52
+ return <<-eos
53
+
54
+ EXAMPLES
55
+ -------------------------------------------------------
56
+ Send custom SQL to a data table
57
+
58
+ ruby patch.rb --host=localhost --username=postgres --password=postgres --database=db \\
59
+ --query="ALTER TABLE vendors ADD COLUMN tmp text; UPDATE vendors SET tmp = vendor_number"
60
+
61
+ eos
62
+ end
63
+
64
+ begin
65
+ parser.parse!
66
+ rescue SystemExit => ex
67
+ exit
68
+ end
69
+
70
+ if $options[:query].nil?
71
+ puts "\nPlease specify SQL query: -q\n\n"
72
+ exit
73
+ end
74
+
75
+ if $options[:host].nil?
76
+ puts "\nPlease specify host name: -h\n\n"
77
+ exit
78
+ end
79
+
80
+ if $options[:database].nil?
81
+ puts "\nPlease specify PostgreSQL database name: -d\n\n"
82
+ exit
83
+ end
84
+
85
+ if $options[:username].nil?
86
+ puts "\nPlease specify PostgreSQL username: -d\n\n"
87
+ exit
88
+ end
89
+
90
+ # Default value
91
+ $options[:listen] ||= POSTGRESQL_PORT
92
+
93
+ # Database
94
+ ActiveRecord::Base.establish_connection(
95
+ 'adapter' => 'postgresql',
96
+ 'host' => $options[:host],
97
+ 'database' => $options[:database],
98
+ 'username' => $options[:username],
99
+ 'password' => $options[:password],
100
+ 'port' => $options[:listen],
101
+ 'timeout' => 15000
102
+ )
103
+
104
+ # Execute the query, use it at your own risk!!!
105
+ ActiveRecord::Base.connection.execute($options[:query])
106
+
107
+ puts "\nDone!\n\n"
data/bin/isanitize ADDED
@@ -0,0 +1,4 @@
1
+ file = ARGV[0]
2
+ s = IO.read(file)
3
+ s = s.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
4
+ File.open(file, 'wb') {|f| f.write(s)}
data/bin/ivalidate ADDED
@@ -0,0 +1,258 @@
1
+ # DATA VALIDATOR
2
+ #
3
+ # @author Nghi Pham
4
+ # @date April 2014
5
+ #
6
+ # Data validation includes:
7
+ # * Uniqueness
8
+ # * Integrity (cross reference)
9
+ # * Data type: numeric, text, enum, etc.
10
+ # * Data format: text size, text values, enum, inclusion, exclusion, etc.
11
+ #
12
+ # Issue ruby load.rb --help for guideline/examples
13
+ #
14
+ require 'optparse'
15
+ require 'csv'
16
+ require 'active_record'
17
+ require 'rubygems'
18
+ require 'digest/sha1'
19
+
20
+ $options = {
21
+ :unique => [],
22
+ :not_null => [],
23
+ :match => [],
24
+ :not_match => [],
25
+ :cross_reference => []
26
+ }
27
+ parser = OptionParser.new("", 24) do |opts|
28
+ opts.banner = "\nProgram: Data Validator\nAuthor: MCKI\n\n"
29
+
30
+ opts.on("--unique FIELD", "Check if FIELD is unique") do |v|
31
+ $options[:unique] << v
32
+ end
33
+
34
+ opts.on("--not-null FIELD", "Check if FIELD is not null or empty") do |v|
35
+ $options[:not_null] << v
36
+ end
37
+
38
+ opts.on("--match REGEXP", "Check if FIELD matches REGEXP") do |v|
39
+ $options[:match] << v
40
+ end
41
+
42
+ opts.on("--not-match REGEXP", "Check if FIELD does not match REGEXP") do |v|
43
+ $options[:not_match] << v
44
+ end
45
+
46
+ opts.on("--cross-reference EXPR", "Reference") do |v|
47
+ $options[:cross_reference] << v
48
+ end
49
+
50
+ opts.on("-t", "--table TABLE", "Table to verify") do |v|
51
+ $options[:table] = v
52
+ end
53
+
54
+ opts.on("--log-to FIELD", "Field to log error to") do |v|
55
+ $options[:log_to] = v
56
+ end
57
+
58
+ opts.on("-h", "--host HOST", "PostgreSQL host") do |v|
59
+ $options[:host] = v
60
+ end
61
+
62
+ opts.on("-d", "--database DATABASE", "PostgreSQL database") do |v|
63
+ $options[:database] = v
64
+ end
65
+
66
+ opts.on("-u", "--username USER", "PostgreSQL username") do |v|
67
+ $options[:username] = v
68
+ end
69
+
70
+ opts.on("-p", "--password PASSWORD", "PostgreSQL password") do |v|
71
+ $options[:password] = v
72
+ end
73
+
74
+ opts.on("-l", "--listen PORT", "PostgreSQL listen port (default to 5432)") do |v|
75
+ $options[:listen] = v
76
+ end
77
+
78
+ opts.on_tail('--help', 'Displays this help') do
79
+ puts opts, "", help
80
+ exit
81
+ end
82
+ end
83
+
84
+ def help
85
+ return <<-eos
86
+
87
+ EXAMPLES
88
+ -------------------------------------------------------
89
+ Validate `mfrs` table against several rules and write error logs to errors field:
90
+
91
+ ruby validate.rb -h localhost -u postgres -p postgres -d db \\
92
+ --table=mfrs \\
93
+ --log-to=errors \\
94
+ --unique="mfr_lic" \\
95
+ --unique="mfr_number" \\
96
+ --not-null="mfr_number" \\
97
+ --not-null="mfr_lic" \\
98
+ --match="mfr_number/^2.*/" \\
99
+ --match="mfr_name/^[a-zA-Z]+/" \\
100
+ --not-match="status/(failed|inactive|wrong)/"
101
+
102
+ eos
103
+ end
104
+
105
+ begin
106
+ parser.parse!
107
+ rescue SystemExit => ex
108
+ exit
109
+ end
110
+
111
+ if $options[:table].nil?
112
+ puts "\nPlease specify table name: -t\n\n"
113
+ exit
114
+ end
115
+
116
+ if $options[:log_to].nil?
117
+ puts "\nPlease specify field to log errors: --log-to\n\n"
118
+ exit
119
+ end
120
+
121
+ if $options[:host].nil?
122
+ puts "\nPlease specify host name: -h\n\n"
123
+ exit
124
+ end
125
+
126
+ if $options[:database].nil?
127
+ puts "\nPlease specify PostgreSQL database name: -d\n\n"
128
+ exit
129
+ end
130
+
131
+ if $options[:username].nil?
132
+ puts "\nPlease specify PostgreSQL username: -d\n\n"
133
+ exit
134
+ end
135
+
136
+ # Default value
137
+ $options[:listen] ||= 5432
138
+ $options[:output] ||= "/tmp/#{Digest::SHA1.hexdigest(rand(100000).to_s)}.csv"
139
+
140
+ # Database dump
141
+ ActiveRecord::Base.establish_connection(
142
+ 'adapter' => 'postgresql',
143
+ 'host' => $options[:host],
144
+ 'database' => $options[:database],
145
+ 'username' => $options[:username],
146
+ 'password' => $options[:password],
147
+ 'port' => $options[:listen],
148
+ 'timeout' => 15000
149
+ )
150
+
151
+ # --------------------------------------------------------------------
152
+ # Preliminary check
153
+ # Add column errors if not yet exists
154
+ # --------------------------------------------------------------------
155
+ pre_sql = <<-eos
156
+ DO $$
157
+ BEGIN
158
+ BEGIN
159
+ ALTER TABLE #{$options[:table]} ADD COLUMN #{$options[:log_to]} text default '';
160
+ EXCEPTION
161
+ WHEN duplicate_column THEN RAISE NOTICE 'column #{$options[:log_to]} already exists';
162
+ END;
163
+ END;
164
+ $$
165
+ eos
166
+
167
+ ActiveRecord::Base.connection.execute(pre_sql)
168
+
169
+ # --------------------------------------------------------------------
170
+ # Check unique field
171
+ # --------------------------------------------------------------------
172
+ $options[:unique].each do |field|
173
+ puts "Checking unique fields: #{field}"
174
+
175
+ uniq_sql = <<-eos
176
+ UPDATE #{$options[:table]} SET #{$options[:log_to]} = array_to_string(string_to_array(#{$options[:log_to]}, ' || ') || string_to_array('#{field}-not-unique', ' || '), ' || ')
177
+ WHERE #{field} IN (
178
+ SELECT #{field} FROM #{$options[:table]} GROUP BY #{field}
179
+ HAVING count(*) > 1
180
+ ) AND #{field} IS NOT NULL AND length(trim(#{field})) <> 0;
181
+ eos
182
+
183
+ ActiveRecord::Base.connection.execute(uniq_sql)
184
+ end
185
+
186
+ # --------------------------------------------------------------------
187
+ # Check not-null field
188
+ # --------------------------------------------------------------------
189
+ $options[:not_null].each do |field|
190
+ puts "Checking not-null fields: #{field}"
191
+
192
+ not_null_sql = <<-eos
193
+ UPDATE #{$options[:table]} SET #{$options[:log_to]} = array_to_string(string_to_array(#{$options[:log_to]}, ' || ') || string_to_array('#{field}-null', ' || '), ' || ')
194
+ WHERE #{field} IS NULL OR length(trim(#{field})) = 0;
195
+ eos
196
+
197
+ ActiveRecord::Base.connection.execute(not_null_sql)
198
+ end
199
+
200
+ # --------------------------------------------------------------------
201
+ # Check field satisfying --match=FIELD/REGEXP/
202
+ # --------------------------------------------------------------------
203
+ $options[:match].each do |value|
204
+ field = value[/^[^\/]+/]
205
+ regexp = value[/(?<=\/).*(?=\/)/]
206
+ puts "Checking REGEXP matching: #{regexp}"
207
+
208
+ match_sql = <<-eos
209
+ UPDATE #{$options[:table]} SET #{$options[:log_to]} = array_to_string(string_to_array(#{$options[:log_to]}, ' || ') || string_to_array('#{regexp.gsub("'", "''")}-not-matched', ' || '), ' || ')
210
+ WHERE #{field} IS NOT NULL AND length(trim(#{field})) <> 0 AND #{field} !~ '#{regexp}';
211
+ eos
212
+
213
+ ActiveRecord::Base.connection.execute(match_sql)
214
+ end
215
+
216
+ # --------------------------------------------------------------------
217
+ # Check field satisfying --not-match=FIELD/REGEXP/
218
+ # --------------------------------------------------------------------
219
+ $options[:not_match].each do |value|
220
+ field = value[/^[^\/]+/]
221
+ regexp = value[/(?<=\/).*(?=\/)/]
222
+ puts "Checking REGEXP not matching: #{regexp}"
223
+
224
+ not_match_sql = <<-eos
225
+ UPDATE #{$options[:table]} SET #{$options[:log_to]} = array_to_string(string_to_array(#{$options[:log_to]}, ' || ') || string_to_array('#{regexp.gsub("'", "''")}-unexpectedly-matched', ' || '), ' || ')
226
+ WHERE #{field} IS NOT NULL AND length(trim(#{field})) <> 0 AND #{field} ~ '#{regexp}';
227
+ eos
228
+
229
+ ActiveRecord::Base.connection.execute(not_match_sql)
230
+ end
231
+
232
+ # --------------------------------------------------------------------
233
+ # Check field satisfying --cross-reference
234
+ # --------------------------------------------------------------------
235
+ $options[:cross_reference].each do |value|
236
+ values = value.split(/[|\.]/)
237
+
238
+ if values.size != 3
239
+ raise "Error: Wrong argument for --cross-reference switch"
240
+ exit(0)
241
+ end
242
+
243
+ field = values[0]
244
+ ref_table = values[1]
245
+ ref_field = values[2]
246
+
247
+ puts "Checking data integrity: #{value}"
248
+
249
+ # @todo: poor performance here, think of a better SQL!!!
250
+ ref_sql = <<-eos
251
+ UPDATE #{$options[:table]} SET #{$options[:log_to]} = array_to_string(string_to_array(#{$options[:log_to]}, ' || ') || string_to_array('#{field}-not-referenced-to-#{values[1]}.#{values[2]}', ' || '), ' || ')
252
+ WHERE #{field} NOT IN (
253
+ SELECT #{ref_field} FROM #{ref_table}
254
+ ) AND #{field} IS NOT NULL AND length(trim(#{field})) <> 0;
255
+ eos
256
+
257
+ ActiveRecord::Base.connection.execute(ref_sql)
258
+ end
data/idata.gemspec ADDED
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'idata/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "idata"
8
+ spec.version = Idata::VERSION
9
+ spec.authors = ["Nghi Pham"]
10
+ spec.email = ["minhnghivn@gmail.com"]
11
+ spec.description = %q{Tools for importing data from raw files}
12
+ spec.summary = %q{Tools for importing data from raw files}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ #spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.executables = ["iload", "ieval", "ipatch", "ivalidate", "iexport", "isanitize"]
19
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_development_dependency "bundler", ">= 1.3"
23
+ spec.add_development_dependency "rake", ">= 0.9"
24
+
25
+ spec.add_dependency "rails", ">= 4.0"
26
+ spec.add_dependency "pg"
27
+ end
@@ -0,0 +1,3 @@
1
+ module Idata
2
+ VERSION = "0.0.1"
3
+ end
data/lib/idata.rb ADDED
@@ -0,0 +1,5 @@
1
+ require "idata/version"
2
+
3
+ module Idata
4
+ # Your code goes here...
5
+ end
metadata ADDED
@@ -0,0 +1,120 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: idata
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Nghi Pham
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-04-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0.9'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0.9'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rails
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '4.0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '4.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pg
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: Tools for importing data from raw files
70
+ email:
71
+ - minhnghivn@gmail.com
72
+ executables:
73
+ - iload
74
+ - ieval
75
+ - ipatch
76
+ - ivalidate
77
+ - iexport
78
+ - isanitize
79
+ extensions: []
80
+ extra_rdoc_files: []
81
+ files:
82
+ - .gitignore
83
+ - Gemfile
84
+ - LICENSE.txt
85
+ - README.md
86
+ - Rakefile
87
+ - bin/ieval
88
+ - bin/iexport
89
+ - bin/iload
90
+ - bin/ipatch
91
+ - bin/isanitize
92
+ - bin/ivalidate
93
+ - idata.gemspec
94
+ - lib/idata.rb
95
+ - lib/idata/version.rb
96
+ homepage: ''
97
+ licenses:
98
+ - MIT
99
+ metadata: {}
100
+ post_install_message:
101
+ rdoc_options: []
102
+ require_paths:
103
+ - lib
104
+ required_ruby_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - '>='
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ required_rubygems_version: !ruby/object:Gem::Requirement
110
+ requirements:
111
+ - - '>='
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ requirements: []
115
+ rubyforge_project:
116
+ rubygems_version: 2.2.1
117
+ signing_key:
118
+ specification_version: 4
119
+ summary: Tools for importing data from raw files
120
+ test_files: []