idata 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 27e594b49b95b536a5d01ed6f90155751104ec1a
4
+ data.tar.gz: a4e52832bccdc2ab55ae33e7e6f659857b800991
5
+ SHA512:
6
+ metadata.gz: f55f15be3fbc959bcfacdb68f2b45e5b97fa560ee953890a66eb5a4715366019240e61c1b9ef36df9ed77d0ba396d6e38b32177a555d75aeb4f0deb04ae99757
7
+ data.tar.gz: 0e8061590a8d5da84ee9f74d137c2f6d022a58009d9d56ca965241c10e064a9cde59fb1f0d784cab643d88d19c1cdbe3a2f688ce2bbfcc42f4cd50b14e2bcfda
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in idata.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 TODO: Write your name
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Idata
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'idata'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install idata
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/bin/ieval ADDED
@@ -0,0 +1,222 @@
1
+ # DATA LOADER
2
+ #
3
+ # @author Nghi Pham
4
+ # @date April 2014
5
+ #
6
+ # The script loads data from a fixed-width text file or a CSV file and fills in
7
+ # a corresponding table in the specified database
8
+ # Issue ruby load.rb --help for guideline/examples
9
+ #
10
+ # @todo Make the script OOP
11
+ # @todo Constantize default values
12
+ require 'optparse'
13
+ require 'csv'
14
+ require 'active_record'
15
+ require 'rubygems'
16
+ require 'digest/sha1'
17
+ require 'fileutils'
18
+
19
+ # Default configuration settings
20
+ POSTGRESQL_PORT = 5432
21
+
22
+ # Parse script inputs
23
+ $options = {}
24
+ parser = OptionParser.new("", 24) do |opts|
25
+ opts.banner = "\nProgram: Data Loader\nAuthor: MCKI\n\n"
26
+
27
+ opts.on("-e", "--eval SCRIPT", "Ruby SCRIPT to execute") do |v|
28
+ $options[:eval] = v
29
+ end
30
+
31
+ opts.on("-t", "--table TABLE", "Table name to be created") do |v|
32
+ $options[:table] = v
33
+ end
34
+
35
+ opts.on("-h", "--host HOST", "PostgreSQL host") do |v|
36
+ $options[:host] = v
37
+ end
38
+
39
+ opts.on("-d", "--database DATABASE", "PostgreSQL database") do |v|
40
+ $options[:database] = v
41
+ end
42
+
43
+ opts.on("-u", "--username USER", "PostgreSQL username") do |v|
44
+ $options[:username] = v
45
+ end
46
+
47
+ opts.on("-p", "--password PASSWORD", "PostgreSQL password") do |v|
48
+ $options[:password] = v
49
+ end
50
+
51
+ opts.on("-l", "--listen PORT", "PostgreSQL listen port (default to 5432)") do |v|
52
+ $options[:listen] = v
53
+ end
54
+
55
+ opts.on_tail('--help', 'Displays this help') do
56
+ puts opts, "", help
57
+ exit
58
+ end
59
+ end
60
+
61
+ # Help message
62
+ def help
63
+ return <<-eos
64
+
65
+ EXAMPLES
66
+ -------------------------------------------------------
67
+ Run a custom Ruby command on the every record represented by the `item` variable
68
+
69
+ ruby eval.rb --host=localhost --username=postgres --password=postgres \\
70
+ --table=vendors --database=db \\
71
+ --eval="item.last_name = item.full_name.split(/\\s+/).last"
72
+
73
+ eos
74
+ end
75
+
76
+ begin
77
+ parser.parse!
78
+ rescue SystemExit => ex
79
+ exit
80
+ end
81
+
82
+ # Certain arguments are required
83
+ if $options[:eval].nil?
84
+ puts "\nPlease specify Ruby script to execute: -e\n\n"
85
+ exit
86
+ end
87
+
88
+ if $options[:table].nil?
89
+ puts "\nPlease specify SQL table name: -t\n\n"
90
+ exit
91
+ end
92
+
93
+ if $options[:host].nil?
94
+ puts "\nPlease specify host name: -h\n\n"
95
+ exit
96
+ end
97
+
98
+ if $options[:database].nil?
99
+ puts "\nPlease specify PostgreSQL database name: -d\n\n"
100
+ exit
101
+ end
102
+
103
+ if $options[:username].nil?
104
+ puts "\nPlease specify PostgreSQL username: -d\n\n"
105
+ exit
106
+ end
107
+
108
+ # Default in case arguments are not provided
109
+ $options[:listen] ||= POSTGRESQL_PORT
110
+
111
+ # Connect to PostgreSQL
112
+ # $options = {host: 'localhost', database: 'db', username: 'postgres', password: 'postgres', table: 'products', listen: 5432}
113
+ ActiveRecord::Base.establish_connection(
114
+ 'adapter' => 'postgresql',
115
+ 'host' => $options[:host],
116
+ 'database' => $options[:database],
117
+ 'username' => $options[:username],
118
+ 'password' => $options[:password],
119
+ 'port' => $options[:listen],
120
+ 'timeout' => 15000
121
+ )
122
+
123
+ class Product < ActiveRecord::Base
124
+ self.primary_key = :id
125
+ self.table_name = $options[:table]
126
+ end
127
+
128
+ # Temp file & temp table name
129
+ $tmpfile = "/tmp/#{Digest::SHA1.hexdigest(rand(100000).to_s)}.csv"
130
+ $tmpname = "_tmp_#{Digest::SHA1.hexdigest(rand(100000).to_s)}"
131
+
132
+ # Check if PostgreSQL host is local
133
+ def local?
134
+ return ['localhost', '127.0.0.1'].include?($options[:host])
135
+ end
136
+
137
+ # Execute query
138
+ def query(*query_str)
139
+ ActiveRecord::Base.connection.execute(query_str.join("; "))
140
+ end
141
+
142
+ # Create table from CSV
143
+ def create_table_from_csv(name, csv_path)
144
+ # Get headers
145
+ csv = CSV.open(csv_path, :headers => true)
146
+ first = csv.first
147
+ unless first
148
+ raise "File Empty!!!"
149
+ end
150
+
151
+ # sanitize
152
+ headers = first.headers
153
+ headers.each_with_index {|e, index|
154
+ if e.nil? or e.empty?
155
+ headers[index] = "column_#{index + 1}"
156
+ end
157
+ }
158
+ headers.map!{|e| e.downcase.underscore }
159
+
160
+ # check if every field name is unique
161
+ if headers.count != headers.uniq.count
162
+ raise "Field name must be UNIQUE: \nPlease check your input headers: [#{headers.join(', ')}]"
163
+ end
164
+
165
+ # Create table
166
+ create_table_sql = headers.map{|e| e == 'id' ? "\"#{e}\" integer" : "\"#{e}\" text"}.join(",")
167
+ create_table_sql = "drop table if exists #{name}; create table #{name}( #{create_table_sql} );"
168
+ query(create_table_sql)
169
+
170
+ # Dump data
171
+ insert_data_sql = headers.map{|e| "\"#{e}\""}.join(",")
172
+ insert_data_sql = "COPY #{name}( #{insert_data_sql} ) FROM '#{csv_path}' DELIMITER ',' CSV HEADER;"
173
+
174
+ # Change output file permission so that postgres user can read it
175
+ begin
176
+ FileUtils.chmod 0755, csv_path
177
+ rescue Exception => ex
178
+ puts "Error while changing file permission"
179
+ end
180
+
181
+ if local?
182
+ query(insert_data_sql)
183
+ else
184
+ puts "\nWARNING: pushing data to remote server [#{$options[:host]}].\nBe sure you have the correct version of `psql` command installed\n\n"
185
+ insert_data_sql = "PGPASSWORD=#{$options[:username]} psql -U #{$options[:username]} -h #{$options[:host]} -p #{$options[:listen]} #{$options[:database]} -c \"\\#{insert_data_sql}\""
186
+
187
+ `#{insert_data_sql}`
188
+ `PGPASSWORD=""`
189
+ end
190
+ end
191
+
192
+ if !local? and $options[:password].nil?
193
+ puts "\n\nERROR: You are connecting to a remote server\nPlease make sure you specify SQL password: --password \n\n"
194
+ exit
195
+ end
196
+
197
+ fields_changed = []
198
+
199
+ CSV.open($tmpfile, "wb", :col_sep => ',') do |csv|
200
+ csv << Product.first.attributes.keys
201
+
202
+ Product.all.each do |item|
203
+ # Execute the script provided on every record
204
+ eval($options[:eval])
205
+ fields_changed += item.changes.keys
206
+ fields_changed.uniq!
207
+ csv << item.attributes.values
208
+ end
209
+ end
210
+
211
+ unless fields_changed.empty?
212
+ create_table_from_csv($tmpname, $tmpfile)
213
+ update_sql = fields_changed.map{|f| "\"#{f}\" = tmp.\"#{f}\""}.join(", ")
214
+ update_sql = "UPDATE #{$options[:table]} origin SET #{update_sql} FROM #{$tmpname} tmp WHERE origin.id = tmp.id"
215
+ query(update_sql)
216
+
217
+ cleanup_sql = "DROP TABLE #{$tmpname};"
218
+ query(cleanup_sql)
219
+
220
+ puts "\nDone\n\n"
221
+ end
222
+
data/bin/iexport ADDED
@@ -0,0 +1,163 @@
1
+ # DATA LOADER
2
+ #
3
+ # @author Nghi Pham
4
+ # @date April 2014
5
+ #
6
+ # The script loads data from a fixed-width text file or a CSV file and fills in
7
+ # a corresponding table in the specified database
8
+ # Issue ruby load.rb --help for guideline/examples
9
+ #
10
+ require 'optparse'
11
+ require 'csv'
12
+ require 'active_record'
13
+ require 'rubygems'
14
+ require 'digest/sha1'
15
+ require 'fileutils'
16
+
17
+ SUPPORTED_INPUT_FORMATS = ['CSV']
18
+ POSTGRESQL_PORT = 5432
19
+ CSV_DEFAULT_DELIMITER = ','
20
+
21
+ $options = {}
22
+ parser = OptionParser.new("", 24) do |opts|
23
+ opts.banner = "\nProgram: Data Exporter\nAuthor: MCKI\n\n"
24
+
25
+ opts.on("-f", "--format FORMAT", "Output file format") do |v|
26
+ $options[:format] = v
27
+ end
28
+
29
+ opts.on("--delim DELIMITER", "CSV delimiter") do |v|
30
+ $options[:delim] = v
31
+ end
32
+
33
+ opts.on("-t", "--table TABLE", "Table name to be created") do |v|
34
+ $options[:table] = v
35
+ end
36
+
37
+ opts.on("-s", "--select FIELDS", "Fields to export") do |v|
38
+ $options[:select] = v
39
+ end
40
+
41
+ opts.on("-w", "--where CONDITION", "CONDITION") do |v|
42
+ $options[:where] = v
43
+ end
44
+
45
+ opts.on("-o", "--output FILE", "Output file") do |v|
46
+ $options[:output] = v
47
+ end
48
+
49
+ opts.on("-h", "--host HOST", "PostgreSQL host") do |v|
50
+ $options[:host] = v
51
+ end
52
+
53
+ opts.on("-d", "--database DATABASE", "PostgreSQL database") do |v|
54
+ $options[:database] = v
55
+ end
56
+
57
+ opts.on("-u", "--username USER", "PostgreSQL username") do |v|
58
+ $options[:username] = v
59
+ end
60
+
61
+ opts.on("-p", "--password PASSWORD", "PostgreSQL password") do |v|
62
+ $options[:password] = v
63
+ end
64
+
65
+ opts.on("-l", "--listen PORT", "PostgreSQL listen port (default to 5432)") do |v|
66
+ $options[:listen] = v
67
+ end
68
+
69
+ opts.on_tail('--help', 'Displays this help') do
70
+ puts opts, "", help
71
+ exit
72
+ end
73
+ end
74
+
75
+ def help
76
+ return <<-eos
77
+
78
+ EXAMPLES
79
+ -------------------------------------------------------
80
+
81
+ ruby export.rb --host=localhost --username=postgres --database=db --table=items \\
82
+ --output=/tmp/data.csv --format=csv --delim="\\t" \\
83
+ --select="id, name AS vendor_name, age" --where="active = 1"
84
+
85
+ eos
86
+ end
87
+
88
+ begin
89
+ parser.parse!
90
+ rescue SystemExit => ex
91
+ exit
92
+ end
93
+
94
+ if $options[:format].nil?
95
+ puts "\nPlease specify input file format: -f\n\n"
96
+ exit
97
+ end
98
+
99
+ # downcase for consistency
100
+ $options[:format].upcase!
101
+
102
+ if !SUPPORTED_INPUT_FORMATS.include?($options[:format])
103
+ puts "\nInvalid input file format, supported formats are: #{SUPPORTED_INPUT_FORMATS.join(', ')}\n\n"
104
+ exit
105
+ end
106
+
107
+ if $options[:table].nil?
108
+ puts "\nPlease specify SQL table name: -t\n\n"
109
+ exit
110
+ end
111
+
112
+ if $options[:output].nil?
113
+ puts "\nPlease specify output file: -o\n\n"
114
+ exit
115
+ end
116
+
117
+ if $options[:host].nil?
118
+ puts "\nPlease specify host name: -h\n\n"
119
+ exit
120
+ end
121
+
122
+ if $options[:database].nil?
123
+ puts "\nPlease specify PostgreSQL database name: -d\n\n"
124
+ exit
125
+ end
126
+
127
+ if $options[:username].nil?
128
+ puts "\nPlease specify PostgreSQL username: -d\n\n"
129
+ exit
130
+ end
131
+
132
+ # Default value
133
+ $options[:listen] ||= POSTGRESQL_PORT
134
+ $options[:delim] ||= CSV_DEFAULT_DELIMITER
135
+ $options[:delim] = "\t" if $options[:delim] == '\t'
136
+ $options[:select] ||= '*'
137
+ $options[:where] ||= 'true'
138
+
139
+ #$options = {host: 'localhost', database: 'db', username: 'postgres', password: 'postgres', table: 'products', listen: 5432}
140
+ ActiveRecord::Base.establish_connection(
141
+ 'adapter' => 'postgresql',
142
+ 'host' => $options[:host],
143
+ 'database' => $options[:database],
144
+ 'username' => $options[:username],
145
+ 'password' => $options[:password],
146
+ 'port' => $options[:listen],
147
+ 'timeout' => 15000
148
+ )
149
+
150
+ class Product < ActiveRecord::Base
151
+ self.table_name = $options[:table]
152
+ end
153
+
154
+ CSV.open($options[:output], "wb", :col_sep => $options[:delim]) do |csv|
155
+ scope = Product.select($options[:select]).where($options[:where])
156
+ csv << scope.first.attributes.keys
157
+
158
+ scope.each do |item|
159
+ csv << item.attributes.values
160
+ end
161
+ end
162
+
163
+ puts "\nFile #{$options[:output]} created!\n\n"
data/bin/iload ADDED
@@ -0,0 +1,276 @@
1
+ # DATA LOADER
2
+ #
3
+ # @author Nghi Pham
4
+ # @date April 2014
5
+ #
6
+ # The script loads data from a fixed-width text file or a CSV file and fills in
7
+ # a corresponding table in the specified database
8
+ # Issue ruby load.rb --help for guideline/examples
9
+ #
10
+ require 'optparse'
11
+ require 'csv'
12
+ require 'active_record'
13
+ require 'rubygems'
14
+ require 'digest/sha1'
15
+ require 'fileutils'
16
+
17
+ SUPPORTED_INPUT_FORMATS = ['CSV', 'FX']
18
+ POSTGRESQL_PORT = 5432
19
+ CSV_DEFAULT_DELIMITER = ','
20
+
21
+ $options = {}
22
+ parser = OptionParser.new("", 24) do |opts|
23
+ opts.banner = "\nProgram: Data Loader\nAuthor: MCKI\n\n"
24
+
25
+ opts.on("-i", "--input INPUT", "INPUT text file (fixed-width)") do |v|
26
+ $options[:input] = v
27
+ end
28
+
29
+ opts.on("-f", "--format FORMAT", "Input file format, available values include (CSV|FX) ") do |v|
30
+ $options[:format] = v
31
+ end
32
+
33
+ opts.on("--delim DELIMITER", "Field DELIMITER (for CSV format only - default to COMMA ',')") do |v|
34
+ $options[:delim] = v
35
+ end
36
+
37
+ # opts.on("-o", "--output CSV", "Temporary CSV output file") do |v|
38
+ # $options[:output] = v
39
+ # end
40
+
41
+ opts.on("-t", "--table TABLE", "Table name to be created") do |v|
42
+ $options[:table] = v
43
+ end
44
+
45
+ opts.on("-h", "--host HOST", "PostgreSQL host") do |v|
46
+ $options[:host] = v
47
+ end
48
+
49
+ opts.on("-d", "--database DATABASE", "PostgreSQL database") do |v|
50
+ $options[:database] = v
51
+ end
52
+
53
+ opts.on("-u", "--username USER", "PostgreSQL username") do |v|
54
+ $options[:username] = v
55
+ end
56
+
57
+ opts.on("-p", "--password PASSWORD", "PostgreSQL password") do |v|
58
+ $options[:password] = v
59
+ end
60
+
61
+ opts.on("-l", "--listen PORT", "PostgreSQL listen port (default to 5432)") do |v|
62
+ $options[:listen] = v
63
+ end
64
+
65
+ opts.on_tail('--help', 'Displays this help') do
66
+ puts opts, "", help
67
+ exit
68
+ end
69
+ end
70
+
71
+ def help
72
+ return <<-eos
73
+
74
+ EXAMPLES
75
+ -------------------------------------------------------
76
+ Load data from text file and store to a table name "vendors"
77
+
78
+ ruby load.rb --host=localhost --username=postgres --password=postgres \\
79
+ --input=/home/administrator/VendorMaster.txt --format=fx \\
80
+ --table=vendors --database=db
81
+
82
+
83
+ The same usage in short form:
84
+
85
+ ruby load.rb -h localhost -u postgres -p postgres \\
86
+ -i /home/administrator/VendorMaster.txt -f fx \\
87
+ -t vendors -d db
88
+
89
+ eos
90
+ end
91
+
92
+ begin
93
+ parser.parse!
94
+ rescue SystemExit => ex
95
+ exit
96
+ end
97
+
98
+ if $options[:input].nil?
99
+ puts "\nPlease specify input file: -i\n\n"
100
+ exit
101
+ end
102
+
103
+ if $options[:format].nil?
104
+ puts "\nPlease specify input file format: -f\n\n"
105
+ exit
106
+ end
107
+
108
+ # downcase for consistency
109
+ $options[:format].upcase!
110
+
111
+ if !SUPPORTED_INPUT_FORMATS.include?($options[:format])
112
+ puts "\nInvalid input file format, supported formats are: #{SUPPORTED_INPUT_FORMATS.join(', ')}\n\n"
113
+ exit
114
+ end
115
+
116
+ if $options[:table].nil?
117
+ puts "\nPlease specify table name: -t\n\n"
118
+ exit
119
+ end
120
+
121
+ unless File.exists?($options[:input])
122
+ puts "\nFile does not exist"
123
+ exit
124
+ end
125
+
126
+ if $options[:host].nil?
127
+ puts "\nPlease specify host name: -h\n\n"
128
+ exit
129
+ end
130
+
131
+ if $options[:database].nil?
132
+ puts "\nPlease specify PostgreSQL database name: -d\n\n"
133
+ exit
134
+ end
135
+
136
+ if $options[:username].nil?
137
+ puts "\nPlease specify PostgreSQL username: -d\n\n"
138
+ exit
139
+ end
140
+
141
+ # Default value
142
+ $options[:listen] ||= POSTGRESQL_PORT
143
+ $options[:delim] ||= CSV_DEFAULT_DELIMITER
144
+ $options[:delim] = "\t" if $options[:delim] == '\t'
145
+ $tmpfile = "/tmp/#{Digest::SHA1.hexdigest(rand(100000).to_s)}.csv"
146
+
147
+ # Database dump
148
+ ActiveRecord::Base.establish_connection(
149
+ 'adapter' => 'postgresql',
150
+ 'host' => $options[:host],
151
+ 'database' => $options[:database],
152
+ 'username' => $options[:username],
153
+ 'password' => $options[:password],
154
+ 'port' => $options[:listen],
155
+ 'timeout' => 15000
156
+ )
157
+
158
+ class String
159
+ def underscore
160
+ return self if self.nil?
161
+ return self.strip.gsub(/[^a-z0-9]+/, "_")
162
+ end
163
+ end
164
+
165
+ class MyParser
166
+ def initialize
167
+ # remote server always requires password
168
+ if !local? and $options[:password].nil?
169
+ raise "You are connecting to a remote server\nPlease make sure you specify SQL password: --password "
170
+ end
171
+ end
172
+
173
+ def run
174
+ load_fx if $options[:format] == 'FX'
175
+ load_csv if $options[:format] == 'CSV'
176
+ end
177
+
178
+ def load_csv
179
+ # Copy file to /tmp/ folder to prevent Permission Error
180
+ FileUtils.cp $options[:input], $tmpfile
181
+ create_table_from_csv($tmpfile)
182
+ end
183
+
184
+ def load_fx
185
+ # Load data
186
+ data = IO.read($options[:input]).split("\n")
187
+ header = data.shift
188
+ headers = header.scan(/[^\s]+\s+/)
189
+
190
+ # Parse
191
+ ranges = headers.map{|s| "a#{s.size}"}.join("")
192
+ headers.map!{|s| s.downcase.strip }
193
+
194
+ # Write
195
+ CSV.open($tmpfile, "wb", :col_sep => CSV_DEFAULT_DELIMITER, quote_char: "\b") do |csv|
196
+ csv << headers
197
+
198
+ data.each_with_index{|s, index|
199
+ record = s.unpack(ranges).map{|e| e.strip}
200
+ csv << record
201
+ }
202
+ end
203
+
204
+ create_table_from_csv($tmpfile)
205
+ end
206
+
207
+ def create_table_from_csv(csv_path)
208
+ # Get headers
209
+ csv = CSV.open(csv_path, :headers => true, :col_sep => $options[:delim], quote_char: "\b")
210
+
211
+ first = csv.first
212
+ unless first
213
+ raise "File Empty!!!"
214
+ end
215
+
216
+ # sanitize
217
+ headers = first.headers
218
+ headers.each_with_index {|e, index|
219
+ if e.nil? or e.empty?
220
+ headers[index] = "column_#{index + 1}"
221
+ end
222
+ }
223
+ headers.map!{|e| e.downcase.underscore }
224
+
225
+ # check if every field name is unique
226
+ if headers.count != headers.uniq.count
227
+ raise "Field name must be UNIQUE: \nPlease check your input headers: [#{headers.sort.join(', ')}]"
228
+ end
229
+
230
+ # Create table
231
+ create_table_sql = headers.map{|e| "\"#{e}\" text"}.join(",")
232
+ create_table_sql = "drop table if exists #{$options[:table]}; create table #{$options[:table]}( id serial not null, #{create_table_sql} );"
233
+ query(create_table_sql)
234
+
235
+ # Dump data
236
+ pg_tab = "E'\t'" if $options[:delim] == "\t"
237
+ insert_data_sql = headers.map{|e| "\"#{e}\""}.join(",")
238
+ insert_data_sql = "COPY #{$options[:table]}( #{insert_data_sql} ) FROM '#{csv_path}' DELIMITER #{pg_tab} CSV HEADER QUOTE E'\b';"
239
+
240
+ # Change output file permission so that postgres user can read it
241
+ begin
242
+ FileUtils.chmod 0755, csv_path
243
+ rescue Exception => ex
244
+ puts "Error while changing file permission"
245
+ end
246
+
247
+ if local?
248
+ query(insert_data_sql)
249
+ else
250
+ puts "\nWARNING: pushing data to remote server [#{$options[:host]}].\nBe sure you have the correct version of `psql` command installed\n\n"
251
+ insert_data_sql = "PGPASSWORD=#{$options[:username]} psql -U #{$options[:username]} -h #{$options[:host]} -p #{$options[:listen]} #{$options[:database]} -c \"\\#{insert_data_sql}\""
252
+
253
+ `#{insert_data_sql}`
254
+ `PGPASSWORD=""`
255
+ end
256
+
257
+ puts "\nTable `#{$options[:table]}` loaded \n\n"
258
+ end
259
+
260
+ private
261
+ def query(*query_str)
262
+ ActiveRecord::Base.connection.execute(query_str.join("; "))
263
+ end
264
+
265
+ def local?
266
+ return ['localhost', '127.0.0.1'].include?($options[:host])
267
+ end
268
+ end
269
+
270
+ begin
271
+ e = MyParser.new
272
+ e.run
273
+ rescue Exception => ex
274
+ puts "\n\n*********** ERROR ***********\n#{ex.message}\n\n"
275
+ exit
276
+ end
data/bin/ipatch ADDED
@@ -0,0 +1,107 @@
1
+ # DATA PATCHER
2
+ #
3
+ # @author Nghi Pham
4
+ # @date April 2014
5
+ #
6
+ # Any custom patches to the database goes here
7
+ #
8
+ require 'optparse'
9
+ require 'csv'
10
+ require 'active_record'
11
+ require 'rubygems'
12
+
13
+ # Default values
14
+ POSTGRESQL_PORT = 5432
15
+
16
+ # User input
17
+ $options = {}
18
+ parser = OptionParser.new("", 24) do |opts|
19
+ opts.banner = "\nProgram: Data Patcher\nAuthor: MCKI\n\n"
20
+
21
+ opts.on("-q", "--query QUERY", "Custom query") do |v|
22
+ $options[:query] = v
23
+ end
24
+
25
+ opts.on("-h", "--host HOST", "PostgreSQL host") do |v|
26
+ $options[:host] = v
27
+ end
28
+
29
+ opts.on("-d", "--database DATABASE", "PostgreSQL database") do |v|
30
+ $options[:database] = v
31
+ end
32
+
33
+ opts.on("-u", "--username USER", "PostgreSQL username") do |v|
34
+ $options[:username] = v
35
+ end
36
+
37
+ opts.on("-p", "--password PASSWORD", "PostgreSQL password") do |v|
38
+ $options[:password] = v
39
+ end
40
+
41
+ opts.on("-l", "--listen PORT", "PostgreSQL listen port (default to 5432)") do |v|
42
+ $options[:listen] = v
43
+ end
44
+
45
+ opts.on_tail('--help', 'Displays this help') do
46
+ puts opts, "", help
47
+ exit
48
+ end
49
+ end
50
+
51
+ def help
52
+ return <<-eos
53
+
54
+ EXAMPLES
55
+ -------------------------------------------------------
56
+ Send custom SQL to a data table
57
+
58
+ ruby patch.rb --host=localhost --username=postgres --password=postgres --database=db \\
59
+ --query="ALTER TABLE vendors ADD COLUMN tmp text; UPDATE vendors SET tmp = vendor_number"
60
+
61
+ eos
62
+ end
63
+
64
+ begin
65
+ parser.parse!
66
+ rescue SystemExit => ex
67
+ exit
68
+ end
69
+
70
+ if $options[:query].nil?
71
+ puts "\nPlease specify SQL query: -q\n\n"
72
+ exit
73
+ end
74
+
75
+ if $options[:host].nil?
76
+ puts "\nPlease specify host name: -h\n\n"
77
+ exit
78
+ end
79
+
80
+ if $options[:database].nil?
81
+ puts "\nPlease specify PostgreSQL database name: -d\n\n"
82
+ exit
83
+ end
84
+
85
+ if $options[:username].nil?
86
+ puts "\nPlease specify PostgreSQL username: -d\n\n"
87
+ exit
88
+ end
89
+
90
+ # Default value
91
+ $options[:listen] ||= POSTGRESQL_PORT
92
+
93
+ # Database
94
+ ActiveRecord::Base.establish_connection(
95
+ 'adapter' => 'postgresql',
96
+ 'host' => $options[:host],
97
+ 'database' => $options[:database],
98
+ 'username' => $options[:username],
99
+ 'password' => $options[:password],
100
+ 'port' => $options[:listen],
101
+ 'timeout' => 15000
102
+ )
103
+
104
+ # Execute the query, use it at your own risk!!!
105
+ ActiveRecord::Base.connection.execute($options[:query])
106
+
107
+ puts "\nDone!\n\n"
data/bin/isanitize ADDED
@@ -0,0 +1,4 @@
1
+ file = ARGV[0]
2
+ s = IO.read(file)
3
+ s = s.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
4
+ File.open(file, 'wb') {|f| f.write(s)}
data/bin/ivalidate ADDED
@@ -0,0 +1,258 @@
1
+ # DATA VALIDATOR
2
+ #
3
+ # @author Nghi Pham
4
+ # @date April 2014
5
+ #
6
+ # Data validation includes:
7
+ # * Uniqueness
8
+ # * Integrity (cross reference)
9
+ # * Data type: numeric, text, enum, etc.
10
+ # * Data format: text size, text values, enum, inclusion, exclusion, etc.
11
+ #
12
+ # Issue ruby load.rb --help for guideline/examples
13
+ #
14
+ require 'optparse'
15
+ require 'csv'
16
+ require 'active_record'
17
+ require 'rubygems'
18
+ require 'digest/sha1'
19
+
20
+ $options = {
21
+ :unique => [],
22
+ :not_null => [],
23
+ :match => [],
24
+ :not_match => [],
25
+ :cross_reference => []
26
+ }
27
+ parser = OptionParser.new("", 24) do |opts|
28
+ opts.banner = "\nProgram: Data Validator\nAuthor: MCKI\n\n"
29
+
30
+ opts.on("--unique FIELD", "Check if FIELD is unique") do |v|
31
+ $options[:unique] << v
32
+ end
33
+
34
+ opts.on("--not-null FIELD", "Check if FIELD is not null or empty") do |v|
35
+ $options[:not_null] << v
36
+ end
37
+
38
+ opts.on("--match REGEXP", "Check if FIELD matches REGEXP") do |v|
39
+ $options[:match] << v
40
+ end
41
+
42
+ opts.on("--not-match REGEXP", "Check if FIELD does not match REGEXP") do |v|
43
+ $options[:not_match] << v
44
+ end
45
+
46
+ opts.on("--cross-reference EXPR", "Reference") do |v|
47
+ $options[:cross_reference] << v
48
+ end
49
+
50
+ opts.on("-t", "--table TABLE", "Table to verify") do |v|
51
+ $options[:table] = v
52
+ end
53
+
54
+ opts.on("--log-to FIELD", "Field to log error to") do |v|
55
+ $options[:log_to] = v
56
+ end
57
+
58
+ opts.on("-h", "--host HOST", "PostgreSQL host") do |v|
59
+ $options[:host] = v
60
+ end
61
+
62
+ opts.on("-d", "--database DATABASE", "PostgreSQL database") do |v|
63
+ $options[:database] = v
64
+ end
65
+
66
+ opts.on("-u", "--username USER", "PostgreSQL username") do |v|
67
+ $options[:username] = v
68
+ end
69
+
70
+ opts.on("-p", "--password PASSWORD", "PostgreSQL password") do |v|
71
+ $options[:password] = v
72
+ end
73
+
74
+ opts.on("-l", "--listen PORT", "PostgreSQL listen port (default to 5432)") do |v|
75
+ $options[:listen] = v
76
+ end
77
+
78
+ opts.on_tail('--help', 'Displays this help') do
79
+ puts opts, "", help
80
+ exit
81
+ end
82
+ end
83
+
84
+ def help
85
+ return <<-eos
86
+
87
+ EXAMPLES
88
+ -------------------------------------------------------
89
+ Validate `mfrs` table against several rules and write error logs to errors field:
90
+
91
+ ruby validate.rb -h localhost -u postgres -p postgres -d db \\
92
+ --table=mfrs \\
93
+ --log-to=errors \\
94
+ --unique="mfr_lic" \\
95
+ --unique="mfr_number" \\
96
+ --not-null="mfr_number" \\
97
+ --not-null="mfr_lic" \\
98
+ --match="mfr_number/^2.*/" \\
99
+ --match="mfr_name/^[a-zA-Z]+/" \\
100
+ --not-match="status/(failed|inactive|wrong)/"
101
+
102
+ eos
103
+ end
104
+
105
+ begin
106
+ parser.parse!
107
+ rescue SystemExit => ex
108
+ exit
109
+ end
110
+
111
+ if $options[:table].nil?
112
+ puts "\nPlease specify table name: -t\n\n"
113
+ exit
114
+ end
115
+
116
+ if $options[:log_to].nil?
117
+ puts "\nPlease specify field to log errors: --log-to\n\n"
118
+ exit
119
+ end
120
+
121
+ if $options[:host].nil?
122
+ puts "\nPlease specify host name: -h\n\n"
123
+ exit
124
+ end
125
+
126
+ if $options[:database].nil?
127
+ puts "\nPlease specify PostgreSQL database name: -d\n\n"
128
+ exit
129
+ end
130
+
131
+ if $options[:username].nil?
132
+ puts "\nPlease specify PostgreSQL username: -d\n\n"
133
+ exit
134
+ end
135
+
136
+ # Default value
137
+ $options[:listen] ||= 5432
138
+ $options[:output] ||= "/tmp/#{Digest::SHA1.hexdigest(rand(100000).to_s)}.csv"
139
+
140
+ # Database dump
141
+ ActiveRecord::Base.establish_connection(
142
+ 'adapter' => 'postgresql',
143
+ 'host' => $options[:host],
144
+ 'database' => $options[:database],
145
+ 'username' => $options[:username],
146
+ 'password' => $options[:password],
147
+ 'port' => $options[:listen],
148
+ 'timeout' => 15000
149
+ )
150
+
151
+ # --------------------------------------------------------------------
152
+ # Preliminary check
153
+ # Add column errors if not yet exists
154
+ # --------------------------------------------------------------------
155
+ pre_sql = <<-eos
156
+ DO $$
157
+ BEGIN
158
+ BEGIN
159
+ ALTER TABLE #{$options[:table]} ADD COLUMN #{$options[:log_to]} text default '';
160
+ EXCEPTION
161
+ WHEN duplicate_column THEN RAISE NOTICE 'column #{$options[:log_to]} already exists';
162
+ END;
163
+ END;
164
+ $$
165
+ eos
166
+
167
+ ActiveRecord::Base.connection.execute(pre_sql)
168
+
169
+ # --------------------------------------------------------------------
170
+ # Check unique field
171
+ # --------------------------------------------------------------------
172
+ $options[:unique].each do |field|
173
+ puts "Checking unique fields: #{field}"
174
+
175
+ uniq_sql = <<-eos
176
+ UPDATE #{$options[:table]} SET #{$options[:log_to]} = array_to_string(string_to_array(#{$options[:log_to]}, ' || ') || string_to_array('#{field}-not-unique', ' || '), ' || ')
177
+ WHERE #{field} IN (
178
+ SELECT #{field} FROM #{$options[:table]} GROUP BY #{field}
179
+ HAVING count(*) > 1
180
+ ) AND #{field} IS NOT NULL AND length(trim(#{field})) <> 0;
181
+ eos
182
+
183
+ ActiveRecord::Base.connection.execute(uniq_sql)
184
+ end
185
+
186
+ # --------------------------------------------------------------------
187
+ # Check not-null field
188
+ # --------------------------------------------------------------------
189
+ $options[:not_null].each do |field|
190
+ puts "Checking not-null fields: #{field}"
191
+
192
+ not_null_sql = <<-eos
193
+ UPDATE #{$options[:table]} SET #{$options[:log_to]} = array_to_string(string_to_array(#{$options[:log_to]}, ' || ') || string_to_array('#{field}-null', ' || '), ' || ')
194
+ WHERE #{field} IS NULL OR length(trim(#{field})) = 0;
195
+ eos
196
+
197
+ ActiveRecord::Base.connection.execute(not_null_sql)
198
+ end
199
+
200
+ # --------------------------------------------------------------------
201
+ # Check field satisfying --match=FIELD/REGEXP/
202
+ # --------------------------------------------------------------------
203
+ $options[:match].each do |value|
204
+ field = value[/^[^\/]+/]
205
+ regexp = value[/(?<=\/).*(?=\/)/]
206
+ puts "Checking REGEXP matching: #{regexp}"
207
+
208
+ match_sql = <<-eos
209
+ UPDATE #{$options[:table]} SET #{$options[:log_to]} = array_to_string(string_to_array(#{$options[:log_to]}, ' || ') || string_to_array('#{regexp.gsub("'", "''")}-not-matched', ' || '), ' || ')
210
+ WHERE #{field} IS NOT NULL AND length(trim(#{field})) <> 0 AND #{field} !~ '#{regexp}';
211
+ eos
212
+
213
+ ActiveRecord::Base.connection.execute(match_sql)
214
+ end
215
+
216
+ # --------------------------------------------------------------------
217
+ # Check field satisfying --not-match=FIELD/REGEXP/
218
+ # --------------------------------------------------------------------
219
+ $options[:not_match].each do |value|
220
+ field = value[/^[^\/]+/]
221
+ regexp = value[/(?<=\/).*(?=\/)/]
222
+ puts "Checking REGEXP not matching: #{regexp}"
223
+
224
+ not_match_sql = <<-eos
225
+ UPDATE #{$options[:table]} SET #{$options[:log_to]} = array_to_string(string_to_array(#{$options[:log_to]}, ' || ') || string_to_array('#{regexp.gsub("'", "''")}-unexpectedly-matched', ' || '), ' || ')
226
+ WHERE #{field} IS NOT NULL AND length(trim(#{field})) <> 0 AND #{field} ~ '#{regexp}';
227
+ eos
228
+
229
+ ActiveRecord::Base.connection.execute(not_match_sql)
230
+ end
231
+
232
+ # --------------------------------------------------------------------
233
+ # Check field satisfying --cross-reference
234
+ # --------------------------------------------------------------------
235
+ $options[:cross_reference].each do |value|
236
+ values = value.split(/[|\.]/)
237
+
238
+ if values.size != 3
239
+ raise "Error: Wrong argument for --cross-reference switch"
240
+ exit(0)
241
+ end
242
+
243
+ field = values[0]
244
+ ref_table = values[1]
245
+ ref_field = values[2]
246
+
247
+ puts "Checking data integrity: #{value}"
248
+
249
+ # @todo: poor performance here, think of a better SQL!!!
250
+ ref_sql = <<-eos
251
+ UPDATE #{$options[:table]} SET #{$options[:log_to]} = array_to_string(string_to_array(#{$options[:log_to]}, ' || ') || string_to_array('#{field}-not-referenced-to-#{values[1]}.#{values[2]}', ' || '), ' || ')
252
+ WHERE #{field} NOT IN (
253
+ SELECT #{ref_field} FROM #{ref_table}
254
+ ) AND #{field} IS NOT NULL AND length(trim(#{field})) <> 0;
255
+ eos
256
+
257
+ ActiveRecord::Base.connection.execute(ref_sql)
258
+ end
data/idata.gemspec ADDED
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'idata/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "idata"
8
+ spec.version = Idata::VERSION
9
+ spec.authors = ["Nghi Pham"]
10
+ spec.email = ["minhnghivn@gmail.com"]
11
+ spec.description = %q{Tools for importing data from raw files}
12
+ spec.summary = %q{Tools for importing data from raw files}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ #spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.executables = ["iload", "ieval", "ipatch", "ivalidate", "iexport", "isanitize"]
19
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_development_dependency "bundler", ">= 1.3"
23
+ spec.add_development_dependency "rake", ">= 0.9"
24
+
25
+ spec.add_dependency "rails", ">= 4.0"
26
+ spec.add_dependency "pg"
27
+ end
@@ -0,0 +1,3 @@
1
+ module Idata
2
+ VERSION = "0.0.1"
3
+ end
data/lib/idata.rb ADDED
@@ -0,0 +1,5 @@
1
+ require "idata/version"
2
+
3
+ module Idata
4
+ # Your code goes here...
5
+ end
metadata ADDED
@@ -0,0 +1,120 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: idata
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Nghi Pham
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-04-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0.9'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0.9'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rails
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '4.0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '4.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pg
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: Tools for importing data from raw files
70
+ email:
71
+ - minhnghivn@gmail.com
72
+ executables:
73
+ - iload
74
+ - ieval
75
+ - ipatch
76
+ - ivalidate
77
+ - iexport
78
+ - isanitize
79
+ extensions: []
80
+ extra_rdoc_files: []
81
+ files:
82
+ - .gitignore
83
+ - Gemfile
84
+ - LICENSE.txt
85
+ - README.md
86
+ - Rakefile
87
+ - bin/ieval
88
+ - bin/iexport
89
+ - bin/iload
90
+ - bin/ipatch
91
+ - bin/isanitize
92
+ - bin/ivalidate
93
+ - idata.gemspec
94
+ - lib/idata.rb
95
+ - lib/idata/version.rb
96
+ homepage: ''
97
+ licenses:
98
+ - MIT
99
+ metadata: {}
100
+ post_install_message:
101
+ rdoc_options: []
102
+ require_paths:
103
+ - lib
104
+ required_ruby_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - '>='
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ required_rubygems_version: !ruby/object:Gem::Requirement
110
+ requirements:
111
+ - - '>='
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ requirements: []
115
+ rubyforge_project:
116
+ rubygems_version: 2.2.1
117
+ signing_key:
118
+ specification_version: 4
119
+ summary: Tools for importing data from raw files
120
+ test_files: []