idata 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +1 -0
- data/bin/ieval +222 -0
- data/bin/iexport +163 -0
- data/bin/iload +276 -0
- data/bin/ipatch +107 -0
- data/bin/isanitize +4 -0
- data/bin/ivalidate +258 -0
- data/idata.gemspec +27 -0
- data/lib/idata/version.rb +3 -0
- data/lib/idata.rb +5 -0
- metadata +120 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 27e594b49b95b536a5d01ed6f90155751104ec1a
|
4
|
+
data.tar.gz: a4e52832bccdc2ab55ae33e7e6f659857b800991
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f55f15be3fbc959bcfacdb68f2b45e5b97fa560ee953890a66eb5a4715366019240e61c1b9ef36df9ed77d0ba396d6e38b32177a555d75aeb4f0deb04ae99757
|
7
|
+
data.tar.gz: 0e8061590a8d5da84ee9f74d137c2f6d022a58009d9d56ca965241c10e064a9cde59fb1f0d784cab643d88d19c1cdbe3a2f688ce2bbfcc42f4cd50b14e2bcfda
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 TODO: Write your name
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# Idata
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'idata'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install idata
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
TODO: Write usage instructions here
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
1. Fork it
|
26
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
28
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/bin/ieval
ADDED
@@ -0,0 +1,222 @@
|
|
1
|
+
# DATA LOADER
|
2
|
+
#
|
3
|
+
# @author Nghi Pham
|
4
|
+
# @date April 2014
|
5
|
+
#
|
6
|
+
# The script loads data from a fixed-width text file or a CSV file and fills in
|
7
|
+
# a corresponding table in the specified database
|
8
|
+
# Issue ruby load.rb --help for guideline/examples
|
9
|
+
#
|
10
|
+
# @todo Make the script OOP
|
11
|
+
# @todo Constantize default values
|
12
|
+
require 'optparse'
|
13
|
+
require 'csv'
|
14
|
+
require 'active_record'
|
15
|
+
require 'rubygems'
|
16
|
+
require 'digest/sha1'
|
17
|
+
require 'fileutils'
|
18
|
+
|
19
|
+
# Default configuration settings
|
20
|
+
POSTGRESQL_PORT = 5432
|
21
|
+
|
22
|
+
# Parse script inputs
|
23
|
+
$options = {}
|
24
|
+
parser = OptionParser.new("", 24) do |opts|
|
25
|
+
opts.banner = "\nProgram: Data Loader\nAuthor: MCKI\n\n"
|
26
|
+
|
27
|
+
opts.on("-e", "--eval SCRIPT", "Ruby SCRIPT to execute") do |v|
|
28
|
+
$options[:eval] = v
|
29
|
+
end
|
30
|
+
|
31
|
+
opts.on("-t", "--table TABLE", "Table name to be created") do |v|
|
32
|
+
$options[:table] = v
|
33
|
+
end
|
34
|
+
|
35
|
+
opts.on("-h", "--host HOST", "PostgreSQL host") do |v|
|
36
|
+
$options[:host] = v
|
37
|
+
end
|
38
|
+
|
39
|
+
opts.on("-d", "--database DATABASE", "PostgreSQL database") do |v|
|
40
|
+
$options[:database] = v
|
41
|
+
end
|
42
|
+
|
43
|
+
opts.on("-u", "--username USER", "PostgreSQL username") do |v|
|
44
|
+
$options[:username] = v
|
45
|
+
end
|
46
|
+
|
47
|
+
opts.on("-p", "--password PASSWORD", "PostgreSQL password") do |v|
|
48
|
+
$options[:password] = v
|
49
|
+
end
|
50
|
+
|
51
|
+
opts.on("-l", "--listen PORT", "PostgreSQL listen port (default to 5432)") do |v|
|
52
|
+
$options[:listen] = v
|
53
|
+
end
|
54
|
+
|
55
|
+
opts.on_tail('--help', 'Displays this help') do
|
56
|
+
puts opts, "", help
|
57
|
+
exit
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Help message
|
62
|
+
def help
|
63
|
+
return <<-eos
|
64
|
+
|
65
|
+
EXAMPLES
|
66
|
+
-------------------------------------------------------
|
67
|
+
Run a custom Ruby command on the every record represented by the `item` variable
|
68
|
+
|
69
|
+
ruby eval.rb --host=localhost --username=postgres --password=postgres \\
|
70
|
+
--table=vendors --database=db \\
|
71
|
+
--eval="item.last_name = item.full_name.split(/\\s+/).last"
|
72
|
+
|
73
|
+
eos
|
74
|
+
end
|
75
|
+
|
76
|
+
begin
|
77
|
+
parser.parse!
|
78
|
+
rescue SystemExit => ex
|
79
|
+
exit
|
80
|
+
end
|
81
|
+
|
82
|
+
# Certain arguments are required
|
83
|
+
if $options[:eval].nil?
|
84
|
+
puts "\nPlease specify Ruby script to execute: -e\n\n"
|
85
|
+
exit
|
86
|
+
end
|
87
|
+
|
88
|
+
if $options[:table].nil?
|
89
|
+
puts "\nPlease specify SQL table name: -t\n\n"
|
90
|
+
exit
|
91
|
+
end
|
92
|
+
|
93
|
+
if $options[:host].nil?
|
94
|
+
puts "\nPlease specify host name: -h\n\n"
|
95
|
+
exit
|
96
|
+
end
|
97
|
+
|
98
|
+
if $options[:database].nil?
|
99
|
+
puts "\nPlease specify PostgreSQL database name: -d\n\n"
|
100
|
+
exit
|
101
|
+
end
|
102
|
+
|
103
|
+
if $options[:username].nil?
|
104
|
+
puts "\nPlease specify PostgreSQL username: -d\n\n"
|
105
|
+
exit
|
106
|
+
end
|
107
|
+
|
108
|
+
# Default in case arguments are not provided
|
109
|
+
$options[:listen] ||= POSTGRESQL_PORT
|
110
|
+
|
111
|
+
# Connect to PostgreSQL
|
112
|
+
# $options = {host: 'localhost', database: 'db', username: 'postgres', password: 'postgres', table: 'products', listen: 5432}
|
113
|
+
ActiveRecord::Base.establish_connection(
|
114
|
+
'adapter' => 'postgresql',
|
115
|
+
'host' => $options[:host],
|
116
|
+
'database' => $options[:database],
|
117
|
+
'username' => $options[:username],
|
118
|
+
'password' => $options[:password],
|
119
|
+
'port' => $options[:listen],
|
120
|
+
'timeout' => 15000
|
121
|
+
)
|
122
|
+
|
123
|
+
class Product < ActiveRecord::Base
|
124
|
+
self.primary_key = :id
|
125
|
+
self.table_name = $options[:table]
|
126
|
+
end
|
127
|
+
|
128
|
+
# Temp file & temp table name
|
129
|
+
$tmpfile = "/tmp/#{Digest::SHA1.hexdigest(rand(100000).to_s)}.csv"
|
130
|
+
$tmpname = "_tmp_#{Digest::SHA1.hexdigest(rand(100000).to_s)}"
|
131
|
+
|
132
|
+
# Check if PostgreSQL host is local
|
133
|
+
def local?
|
134
|
+
return ['localhost', '127.0.0.1'].include?($options[:host])
|
135
|
+
end
|
136
|
+
|
137
|
+
# Execute query
|
138
|
+
def query(*query_str)
|
139
|
+
ActiveRecord::Base.connection.execute(query_str.join("; "))
|
140
|
+
end
|
141
|
+
|
142
|
+
# Create table from CSV
|
143
|
+
def create_table_from_csv(name, csv_path)
|
144
|
+
# Get headers
|
145
|
+
csv = CSV.open(csv_path, :headers => true)
|
146
|
+
first = csv.first
|
147
|
+
unless first
|
148
|
+
raise "File Empty!!!"
|
149
|
+
end
|
150
|
+
|
151
|
+
# sanitize
|
152
|
+
headers = first.headers
|
153
|
+
headers.each_with_index {|e, index|
|
154
|
+
if e.nil? or e.empty?
|
155
|
+
headers[index] = "column_#{index + 1}"
|
156
|
+
end
|
157
|
+
}
|
158
|
+
headers.map!{|e| e.downcase.underscore }
|
159
|
+
|
160
|
+
# check if every field name is unique
|
161
|
+
if headers.count != headers.uniq.count
|
162
|
+
raise "Field name must be UNIQUE: \nPlease check your input headers: [#{headers.join(', ')}]"
|
163
|
+
end
|
164
|
+
|
165
|
+
# Create table
|
166
|
+
create_table_sql = headers.map{|e| e == 'id' ? "\"#{e}\" integer" : "\"#{e}\" text"}.join(",")
|
167
|
+
create_table_sql = "drop table if exists #{name}; create table #{name}( #{create_table_sql} );"
|
168
|
+
query(create_table_sql)
|
169
|
+
|
170
|
+
# Dump data
|
171
|
+
insert_data_sql = headers.map{|e| "\"#{e}\""}.join(",")
|
172
|
+
insert_data_sql = "COPY #{name}( #{insert_data_sql} ) FROM '#{csv_path}' DELIMITER ',' CSV HEADER;"
|
173
|
+
|
174
|
+
# Change output file permission so that postgres user can read it
|
175
|
+
begin
|
176
|
+
FileUtils.chmod 0755, csv_path
|
177
|
+
rescue Exception => ex
|
178
|
+
puts "Error while changing file permission"
|
179
|
+
end
|
180
|
+
|
181
|
+
if local?
|
182
|
+
query(insert_data_sql)
|
183
|
+
else
|
184
|
+
puts "\nWARNING: pushing data to remote server [#{$options[:host]}].\nBe sure you have the correct version of `psql` command installed\n\n"
|
185
|
+
insert_data_sql = "PGPASSWORD=#{$options[:username]} psql -U #{$options[:username]} -h #{$options[:host]} -p #{$options[:listen]} #{$options[:database]} -c \"\\#{insert_data_sql}\""
|
186
|
+
|
187
|
+
`#{insert_data_sql}`
|
188
|
+
`PGPASSWORD=""`
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
if !local? and $options[:password].nil?
|
193
|
+
puts "\n\nERROR: You are connecting to a remote server\nPlease make sure you specify SQL password: --password \n\n"
|
194
|
+
exit
|
195
|
+
end
|
196
|
+
|
197
|
+
fields_changed = []
|
198
|
+
|
199
|
+
CSV.open($tmpfile, "wb", :col_sep => ',') do |csv|
|
200
|
+
csv << Product.first.attributes.keys
|
201
|
+
|
202
|
+
Product.all.each do |item|
|
203
|
+
# Execute the script provided on every record
|
204
|
+
eval($options[:eval])
|
205
|
+
fields_changed += item.changes.keys
|
206
|
+
fields_changed.uniq!
|
207
|
+
csv << item.attributes.values
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
unless fields_changed.empty?
|
212
|
+
create_table_from_csv($tmpname, $tmpfile)
|
213
|
+
update_sql = fields_changed.map{|f| "\"#{f}\" = tmp.\"#{f}\""}.join(", ")
|
214
|
+
update_sql = "UPDATE #{$options[:table]} origin SET #{update_sql} FROM #{$tmpname} tmp WHERE origin.id = tmp.id"
|
215
|
+
query(update_sql)
|
216
|
+
|
217
|
+
cleanup_sql = "DROP TABLE #{$tmpname};"
|
218
|
+
query(cleanup_sql)
|
219
|
+
|
220
|
+
puts "\nDone\n\n"
|
221
|
+
end
|
222
|
+
|
data/bin/iexport
ADDED
@@ -0,0 +1,163 @@
|
|
1
|
+
# DATA LOADER
|
2
|
+
#
|
3
|
+
# @author Nghi Pham
|
4
|
+
# @date April 2014
|
5
|
+
#
|
6
|
+
# The script loads data from a fixed-width text file or a CSV file and fills in
|
7
|
+
# a corresponding table in the specified database
|
8
|
+
# Issue ruby load.rb --help for guideline/examples
|
9
|
+
#
|
10
|
+
require 'optparse'
|
11
|
+
require 'csv'
|
12
|
+
require 'active_record'
|
13
|
+
require 'rubygems'
|
14
|
+
require 'digest/sha1'
|
15
|
+
require 'fileutils'
|
16
|
+
|
17
|
+
SUPPORTED_INPUT_FORMATS = ['CSV']
|
18
|
+
POSTGRESQL_PORT = 5432
|
19
|
+
CSV_DEFAULT_DELIMITER = ','
|
20
|
+
|
21
|
+
$options = {}
|
22
|
+
parser = OptionParser.new("", 24) do |opts|
|
23
|
+
opts.banner = "\nProgram: Data Exporter\nAuthor: MCKI\n\n"
|
24
|
+
|
25
|
+
opts.on("-f", "--format FORMAT", "Output file format") do |v|
|
26
|
+
$options[:format] = v
|
27
|
+
end
|
28
|
+
|
29
|
+
opts.on("--delim DELIMITER", "CSV delimiter") do |v|
|
30
|
+
$options[:delim] = v
|
31
|
+
end
|
32
|
+
|
33
|
+
opts.on("-t", "--table TABLE", "Table name to be created") do |v|
|
34
|
+
$options[:table] = v
|
35
|
+
end
|
36
|
+
|
37
|
+
opts.on("-s", "--select FIELDS", "Fields to export") do |v|
|
38
|
+
$options[:select] = v
|
39
|
+
end
|
40
|
+
|
41
|
+
opts.on("-w", "--where CONDITION", "CONDITION") do |v|
|
42
|
+
$options[:where] = v
|
43
|
+
end
|
44
|
+
|
45
|
+
opts.on("-o", "--output FILE", "Output file") do |v|
|
46
|
+
$options[:output] = v
|
47
|
+
end
|
48
|
+
|
49
|
+
opts.on("-h", "--host HOST", "PostgreSQL host") do |v|
|
50
|
+
$options[:host] = v
|
51
|
+
end
|
52
|
+
|
53
|
+
opts.on("-d", "--database DATABASE", "PostgreSQL database") do |v|
|
54
|
+
$options[:database] = v
|
55
|
+
end
|
56
|
+
|
57
|
+
opts.on("-u", "--username USER", "PostgreSQL username") do |v|
|
58
|
+
$options[:username] = v
|
59
|
+
end
|
60
|
+
|
61
|
+
opts.on("-p", "--password PASSWORD", "PostgreSQL password") do |v|
|
62
|
+
$options[:password] = v
|
63
|
+
end
|
64
|
+
|
65
|
+
opts.on("-l", "--listen PORT", "PostgreSQL listen port (default to 5432)") do |v|
|
66
|
+
$options[:listen] = v
|
67
|
+
end
|
68
|
+
|
69
|
+
opts.on_tail('--help', 'Displays this help') do
|
70
|
+
puts opts, "", help
|
71
|
+
exit
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def help
|
76
|
+
return <<-eos
|
77
|
+
|
78
|
+
EXAMPLES
|
79
|
+
-------------------------------------------------------
|
80
|
+
|
81
|
+
ruby export.rb --host=localhost --username=postgres --database=db --table=items \\
|
82
|
+
--output=/tmp/data.csv --format=csv --delim="\\t" \\
|
83
|
+
--select="id, name AS vendor_name, age" --where="active = 1"
|
84
|
+
|
85
|
+
eos
|
86
|
+
end
|
87
|
+
|
88
|
+
begin
|
89
|
+
parser.parse!
|
90
|
+
rescue SystemExit => ex
|
91
|
+
exit
|
92
|
+
end
|
93
|
+
|
94
|
+
if $options[:format].nil?
|
95
|
+
puts "\nPlease specify input file format: -f\n\n"
|
96
|
+
exit
|
97
|
+
end
|
98
|
+
|
99
|
+
# downcase for consistency
|
100
|
+
$options[:format].upcase!
|
101
|
+
|
102
|
+
if !SUPPORTED_INPUT_FORMATS.include?($options[:format])
|
103
|
+
puts "\nInvalid input file format, supported formats are: #{SUPPORTED_INPUT_FORMATS.join(', ')}\n\n"
|
104
|
+
exit
|
105
|
+
end
|
106
|
+
|
107
|
+
if $options[:table].nil?
|
108
|
+
puts "\nPlease specify SQL table name: -t\n\n"
|
109
|
+
exit
|
110
|
+
end
|
111
|
+
|
112
|
+
if $options[:output].nil?
|
113
|
+
puts "\nPlease specify output file: -o\n\n"
|
114
|
+
exit
|
115
|
+
end
|
116
|
+
|
117
|
+
if $options[:host].nil?
|
118
|
+
puts "\nPlease specify host name: -h\n\n"
|
119
|
+
exit
|
120
|
+
end
|
121
|
+
|
122
|
+
if $options[:database].nil?
|
123
|
+
puts "\nPlease specify PostgreSQL database name: -d\n\n"
|
124
|
+
exit
|
125
|
+
end
|
126
|
+
|
127
|
+
if $options[:username].nil?
|
128
|
+
puts "\nPlease specify PostgreSQL username: -d\n\n"
|
129
|
+
exit
|
130
|
+
end
|
131
|
+
|
132
|
+
# Default value
|
133
|
+
$options[:listen] ||= POSTGRESQL_PORT
|
134
|
+
$options[:delim] ||= CSV_DEFAULT_DELIMITER
|
135
|
+
$options[:delim] = "\t" if $options[:delim] == '\t'
|
136
|
+
$options[:select] ||= '*'
|
137
|
+
$options[:where] ||= 'true'
|
138
|
+
|
139
|
+
#$options = {host: 'localhost', database: 'db', username: 'postgres', password: 'postgres', table: 'products', listen: 5432}
|
140
|
+
ActiveRecord::Base.establish_connection(
|
141
|
+
'adapter' => 'postgresql',
|
142
|
+
'host' => $options[:host],
|
143
|
+
'database' => $options[:database],
|
144
|
+
'username' => $options[:username],
|
145
|
+
'password' => $options[:password],
|
146
|
+
'port' => $options[:listen],
|
147
|
+
'timeout' => 15000
|
148
|
+
)
|
149
|
+
|
150
|
+
class Product < ActiveRecord::Base
|
151
|
+
self.table_name = $options[:table]
|
152
|
+
end
|
153
|
+
|
154
|
+
CSV.open($options[:output], "wb", :col_sep => $options[:delim]) do |csv|
|
155
|
+
scope = Product.select($options[:select]).where($options[:where])
|
156
|
+
csv << scope.first.attributes.keys
|
157
|
+
|
158
|
+
scope.each do |item|
|
159
|
+
csv << item.attributes.values
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
puts "\nFile #{$options[:output]} created!\n\n"
|
data/bin/iload
ADDED
@@ -0,0 +1,276 @@
|
|
1
|
+
# DATA LOADER
|
2
|
+
#
|
3
|
+
# @author Nghi Pham
|
4
|
+
# @date April 2014
|
5
|
+
#
|
6
|
+
# The script loads data from a fixed-width text file or a CSV file and fills in
|
7
|
+
# a corresponding table in the specified database
|
8
|
+
# Issue ruby load.rb --help for guideline/examples
|
9
|
+
#
|
10
|
+
require 'optparse'
|
11
|
+
require 'csv'
|
12
|
+
require 'active_record'
|
13
|
+
require 'rubygems'
|
14
|
+
require 'digest/sha1'
|
15
|
+
require 'fileutils'
|
16
|
+
|
17
|
+
SUPPORTED_INPUT_FORMATS = ['CSV', 'FX']
|
18
|
+
POSTGRESQL_PORT = 5432
|
19
|
+
CSV_DEFAULT_DELIMITER = ','
|
20
|
+
|
21
|
+
$options = {}
|
22
|
+
parser = OptionParser.new("", 24) do |opts|
|
23
|
+
opts.banner = "\nProgram: Data Loader\nAuthor: MCKI\n\n"
|
24
|
+
|
25
|
+
opts.on("-i", "--input INPUT", "INPUT text file (fixed-width)") do |v|
|
26
|
+
$options[:input] = v
|
27
|
+
end
|
28
|
+
|
29
|
+
opts.on("-f", "--format FORMAT", "Input file format, available values include (CSV|FX) ") do |v|
|
30
|
+
$options[:format] = v
|
31
|
+
end
|
32
|
+
|
33
|
+
opts.on("--delim DELIMITER", "Field DELIMITER (for CSV format only - default to COMMA ',')") do |v|
|
34
|
+
$options[:delim] = v
|
35
|
+
end
|
36
|
+
|
37
|
+
# opts.on("-o", "--output CSV", "Temporary CSV output file") do |v|
|
38
|
+
# $options[:output] = v
|
39
|
+
# end
|
40
|
+
|
41
|
+
opts.on("-t", "--table TABLE", "Table name to be created") do |v|
|
42
|
+
$options[:table] = v
|
43
|
+
end
|
44
|
+
|
45
|
+
opts.on("-h", "--host HOST", "PostgreSQL host") do |v|
|
46
|
+
$options[:host] = v
|
47
|
+
end
|
48
|
+
|
49
|
+
opts.on("-d", "--database DATABASE", "PostgreSQL database") do |v|
|
50
|
+
$options[:database] = v
|
51
|
+
end
|
52
|
+
|
53
|
+
opts.on("-u", "--username USER", "PostgreSQL username") do |v|
|
54
|
+
$options[:username] = v
|
55
|
+
end
|
56
|
+
|
57
|
+
opts.on("-p", "--password PASSWORD", "PostgreSQL password") do |v|
|
58
|
+
$options[:password] = v
|
59
|
+
end
|
60
|
+
|
61
|
+
opts.on("-l", "--listen PORT", "PostgreSQL listen port (default to 5432)") do |v|
|
62
|
+
$options[:listen] = v
|
63
|
+
end
|
64
|
+
|
65
|
+
opts.on_tail('--help', 'Displays this help') do
|
66
|
+
puts opts, "", help
|
67
|
+
exit
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def help
|
72
|
+
return <<-eos
|
73
|
+
|
74
|
+
EXAMPLES
|
75
|
+
-------------------------------------------------------
|
76
|
+
Load data from text file and store to a table name "vendors"
|
77
|
+
|
78
|
+
ruby load.rb --host=localhost --username=postgres --password=postgres \\
|
79
|
+
--input=/home/administrator/VendorMaster.txt --format=fx \\
|
80
|
+
--table=vendors --database=db
|
81
|
+
|
82
|
+
|
83
|
+
The same usage in short form:
|
84
|
+
|
85
|
+
ruby load.rb -h localhost -u postgres -p postgres \\
|
86
|
+
-i /home/administrator/VendorMaster.txt -f fx \\
|
87
|
+
-t vendors -d db
|
88
|
+
|
89
|
+
eos
|
90
|
+
end
|
91
|
+
|
92
|
+
begin
|
93
|
+
parser.parse!
|
94
|
+
rescue SystemExit => ex
|
95
|
+
exit
|
96
|
+
end
|
97
|
+
|
98
|
+
if $options[:input].nil?
|
99
|
+
puts "\nPlease specify input file: -i\n\n"
|
100
|
+
exit
|
101
|
+
end
|
102
|
+
|
103
|
+
if $options[:format].nil?
|
104
|
+
puts "\nPlease specify input file format: -f\n\n"
|
105
|
+
exit
|
106
|
+
end
|
107
|
+
|
108
|
+
# downcase for consistency
|
109
|
+
$options[:format].upcase!
|
110
|
+
|
111
|
+
if !SUPPORTED_INPUT_FORMATS.include?($options[:format])
|
112
|
+
puts "\nInvalid input file format, supported formats are: #{SUPPORTED_INPUT_FORMATS.join(', ')}\n\n"
|
113
|
+
exit
|
114
|
+
end
|
115
|
+
|
116
|
+
if $options[:table].nil?
|
117
|
+
puts "\nPlease specify table name: -t\n\n"
|
118
|
+
exit
|
119
|
+
end
|
120
|
+
|
121
|
+
unless File.exists?($options[:input])
|
122
|
+
puts "\nFile does not exist"
|
123
|
+
exit
|
124
|
+
end
|
125
|
+
|
126
|
+
if $options[:host].nil?
|
127
|
+
puts "\nPlease specify host name: -h\n\n"
|
128
|
+
exit
|
129
|
+
end
|
130
|
+
|
131
|
+
if $options[:database].nil?
|
132
|
+
puts "\nPlease specify PostgreSQL database name: -d\n\n"
|
133
|
+
exit
|
134
|
+
end
|
135
|
+
|
136
|
+
if $options[:username].nil?
|
137
|
+
puts "\nPlease specify PostgreSQL username: -d\n\n"
|
138
|
+
exit
|
139
|
+
end
|
140
|
+
|
141
|
+
# Default value
|
142
|
+
$options[:listen] ||= POSTGRESQL_PORT
|
143
|
+
$options[:delim] ||= CSV_DEFAULT_DELIMITER
|
144
|
+
$options[:delim] = "\t" if $options[:delim] == '\t'
|
145
|
+
$tmpfile = "/tmp/#{Digest::SHA1.hexdigest(rand(100000).to_s)}.csv"
|
146
|
+
|
147
|
+
# Database dump
|
148
|
+
ActiveRecord::Base.establish_connection(
|
149
|
+
'adapter' => 'postgresql',
|
150
|
+
'host' => $options[:host],
|
151
|
+
'database' => $options[:database],
|
152
|
+
'username' => $options[:username],
|
153
|
+
'password' => $options[:password],
|
154
|
+
'port' => $options[:listen],
|
155
|
+
'timeout' => 15000
|
156
|
+
)
|
157
|
+
|
158
|
+
class String
|
159
|
+
def underscore
|
160
|
+
return self if self.nil?
|
161
|
+
return self.strip.gsub(/[^a-z0-9]+/, "_")
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
class MyParser
|
166
|
+
def initialize
|
167
|
+
# remote server always requires password
|
168
|
+
if !local? and $options[:password].nil?
|
169
|
+
raise "You are connecting to a remote server\nPlease make sure you specify SQL password: --password "
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def run
|
174
|
+
load_fx if $options[:format] == 'FX'
|
175
|
+
load_csv if $options[:format] == 'CSV'
|
176
|
+
end
|
177
|
+
|
178
|
+
def load_csv
|
179
|
+
# Copy file to /tmp/ folder to prevent Permission Error
|
180
|
+
FileUtils.cp $options[:input], $tmpfile
|
181
|
+
create_table_from_csv($tmpfile)
|
182
|
+
end
|
183
|
+
|
184
|
+
def load_fx
|
185
|
+
# Load data
|
186
|
+
data = IO.read($options[:input]).split("\n")
|
187
|
+
header = data.shift
|
188
|
+
headers = header.scan(/[^\s]+\s+/)
|
189
|
+
|
190
|
+
# Parse
|
191
|
+
ranges = headers.map{|s| "a#{s.size}"}.join("")
|
192
|
+
headers.map!{|s| s.downcase.strip }
|
193
|
+
|
194
|
+
# Write
|
195
|
+
CSV.open($tmpfile, "wb", :col_sep => CSV_DEFAULT_DELIMITER, quote_char: "\b") do |csv|
|
196
|
+
csv << headers
|
197
|
+
|
198
|
+
data.each_with_index{|s, index|
|
199
|
+
record = s.unpack(ranges).map{|e| e.strip}
|
200
|
+
csv << record
|
201
|
+
}
|
202
|
+
end
|
203
|
+
|
204
|
+
create_table_from_csv($tmpfile)
|
205
|
+
end
|
206
|
+
|
207
|
+
def create_table_from_csv(csv_path)
|
208
|
+
# Get headers
|
209
|
+
csv = CSV.open(csv_path, :headers => true, :col_sep => $options[:delim], quote_char: "\b")
|
210
|
+
|
211
|
+
first = csv.first
|
212
|
+
unless first
|
213
|
+
raise "File Empty!!!"
|
214
|
+
end
|
215
|
+
|
216
|
+
# sanitize
|
217
|
+
headers = first.headers
|
218
|
+
headers.each_with_index {|e, index|
|
219
|
+
if e.nil? or e.empty?
|
220
|
+
headers[index] = "column_#{index + 1}"
|
221
|
+
end
|
222
|
+
}
|
223
|
+
headers.map!{|e| e.downcase.underscore }
|
224
|
+
|
225
|
+
# check if every field name is unique
|
226
|
+
if headers.count != headers.uniq.count
|
227
|
+
raise "Field name must be UNIQUE: \nPlease check your input headers: [#{headers.sort.join(', ')}]"
|
228
|
+
end
|
229
|
+
|
230
|
+
# Create table
|
231
|
+
create_table_sql = headers.map{|e| "\"#{e}\" text"}.join(",")
|
232
|
+
create_table_sql = "drop table if exists #{$options[:table]}; create table #{$options[:table]}( id serial not null, #{create_table_sql} );"
|
233
|
+
query(create_table_sql)
|
234
|
+
|
235
|
+
# Dump data
|
236
|
+
pg_tab = "E'\t'" if $options[:delim] == "\t"
|
237
|
+
insert_data_sql = headers.map{|e| "\"#{e}\""}.join(",")
|
238
|
+
insert_data_sql = "COPY #{$options[:table]}( #{insert_data_sql} ) FROM '#{csv_path}' DELIMITER #{pg_tab} CSV HEADER QUOTE E'\b';"
|
239
|
+
|
240
|
+
# Change output file permission so that postgres user can read it
|
241
|
+
begin
|
242
|
+
FileUtils.chmod 0755, csv_path
|
243
|
+
rescue Exception => ex
|
244
|
+
puts "Error while changing file permission"
|
245
|
+
end
|
246
|
+
|
247
|
+
if local?
|
248
|
+
query(insert_data_sql)
|
249
|
+
else
|
250
|
+
puts "\nWARNING: pushing data to remote server [#{$options[:host]}].\nBe sure you have the correct version of `psql` command installed\n\n"
|
251
|
+
insert_data_sql = "PGPASSWORD=#{$options[:username]} psql -U #{$options[:username]} -h #{$options[:host]} -p #{$options[:listen]} #{$options[:database]} -c \"\\#{insert_data_sql}\""
|
252
|
+
|
253
|
+
`#{insert_data_sql}`
|
254
|
+
`PGPASSWORD=""`
|
255
|
+
end
|
256
|
+
|
257
|
+
puts "\nTable `#{$options[:table]}` loaded \n\n"
|
258
|
+
end
|
259
|
+
|
260
|
+
private
|
261
|
+
def query(*query_str)
|
262
|
+
ActiveRecord::Base.connection.execute(query_str.join("; "))
|
263
|
+
end
|
264
|
+
|
265
|
+
def local?
|
266
|
+
return ['localhost', '127.0.0.1'].include?($options[:host])
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
begin
|
271
|
+
e = MyParser.new
|
272
|
+
e.run
|
273
|
+
rescue Exception => ex
|
274
|
+
puts "\n\n*********** ERROR ***********\n#{ex.message}\n\n"
|
275
|
+
exit
|
276
|
+
end
|
data/bin/ipatch
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
# DATA PATCHER
|
2
|
+
#
|
3
|
+
# @author Nghi Pham
|
4
|
+
# @date April 2014
|
5
|
+
#
|
6
|
+
# Any custom patches to the database goes here
|
7
|
+
#
|
8
|
+
require 'optparse'
|
9
|
+
require 'csv'
|
10
|
+
require 'active_record'
|
11
|
+
require 'rubygems'
|
12
|
+
|
13
|
+
# Default values
|
14
|
+
POSTGRESQL_PORT = 5432
|
15
|
+
|
16
|
+
# User input
|
17
|
+
$options = {}
|
18
|
+
parser = OptionParser.new("", 24) do |opts|
|
19
|
+
opts.banner = "\nProgram: Data Patcher\nAuthor: MCKI\n\n"
|
20
|
+
|
21
|
+
opts.on("-q", "--query QUERY", "Custom query") do |v|
|
22
|
+
$options[:query] = v
|
23
|
+
end
|
24
|
+
|
25
|
+
opts.on("-h", "--host HOST", "PostgreSQL host") do |v|
|
26
|
+
$options[:host] = v
|
27
|
+
end
|
28
|
+
|
29
|
+
opts.on("-d", "--database DATABASE", "PostgreSQL database") do |v|
|
30
|
+
$options[:database] = v
|
31
|
+
end
|
32
|
+
|
33
|
+
opts.on("-u", "--username USER", "PostgreSQL username") do |v|
|
34
|
+
$options[:username] = v
|
35
|
+
end
|
36
|
+
|
37
|
+
opts.on("-p", "--password PASSWORD", "PostgreSQL password") do |v|
|
38
|
+
$options[:password] = v
|
39
|
+
end
|
40
|
+
|
41
|
+
opts.on("-l", "--listen PORT", "PostgreSQL listen port (default to 5432)") do |v|
|
42
|
+
$options[:listen] = v
|
43
|
+
end
|
44
|
+
|
45
|
+
opts.on_tail('--help', 'Displays this help') do
|
46
|
+
puts opts, "", help
|
47
|
+
exit
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def help
|
52
|
+
return <<-eos
|
53
|
+
|
54
|
+
EXAMPLES
|
55
|
+
-------------------------------------------------------
|
56
|
+
Send custom SQL to a data table
|
57
|
+
|
58
|
+
ruby patch.rb --host=localhost --username=postgres --password=postgres --database=db \\
|
59
|
+
--query="ALTER TABLE vendors ADD COLUMN tmp text; UPDATE vendors SET tmp = vendor_number"
|
60
|
+
|
61
|
+
eos
|
62
|
+
end
|
63
|
+
|
64
|
+
begin
|
65
|
+
parser.parse!
|
66
|
+
rescue SystemExit => ex
|
67
|
+
exit
|
68
|
+
end
|
69
|
+
|
70
|
+
if $options[:query].nil?
|
71
|
+
puts "\nPlease specify SQL query: -q\n\n"
|
72
|
+
exit
|
73
|
+
end
|
74
|
+
|
75
|
+
if $options[:host].nil?
|
76
|
+
puts "\nPlease specify host name: -h\n\n"
|
77
|
+
exit
|
78
|
+
end
|
79
|
+
|
80
|
+
if $options[:database].nil?
|
81
|
+
puts "\nPlease specify PostgreSQL database name: -d\n\n"
|
82
|
+
exit
|
83
|
+
end
|
84
|
+
|
85
|
+
if $options[:username].nil?
|
86
|
+
puts "\nPlease specify PostgreSQL username: -d\n\n"
|
87
|
+
exit
|
88
|
+
end
|
89
|
+
|
90
|
+
# Default value
|
91
|
+
$options[:listen] ||= POSTGRESQL_PORT
|
92
|
+
|
93
|
+
# Database
|
94
|
+
ActiveRecord::Base.establish_connection(
|
95
|
+
'adapter' => 'postgresql',
|
96
|
+
'host' => $options[:host],
|
97
|
+
'database' => $options[:database],
|
98
|
+
'username' => $options[:username],
|
99
|
+
'password' => $options[:password],
|
100
|
+
'port' => $options[:listen],
|
101
|
+
'timeout' => 15000
|
102
|
+
)
|
103
|
+
|
104
|
+
# Execute the query, use it at your own risk!!!
|
105
|
+
ActiveRecord::Base.connection.execute($options[:query])
|
106
|
+
|
107
|
+
puts "\nDone!\n\n"
|
data/bin/isanitize
ADDED
data/bin/ivalidate
ADDED
@@ -0,0 +1,258 @@
|
|
1
|
+
# DATA VALIDATOR
|
2
|
+
#
|
3
|
+
# @author Nghi Pham
|
4
|
+
# @date April 2014
|
5
|
+
#
|
6
|
+
# Data validation includes:
|
7
|
+
# * Uniqueness
|
8
|
+
# * Integrity (cross reference)
|
9
|
+
# * Data type: numeric, text, enum, etc.
|
10
|
+
# * Data format: text size, text values, enum, inclusion, exclusion, etc.
|
11
|
+
#
|
12
|
+
# Issue ruby load.rb --help for guideline/examples
|
13
|
+
#
|
14
|
+
require 'optparse'
|
15
|
+
require 'csv'
|
16
|
+
require 'active_record'
|
17
|
+
require 'rubygems'
|
18
|
+
require 'digest/sha1'
|
19
|
+
|
20
|
+
$options = {
|
21
|
+
:unique => [],
|
22
|
+
:not_null => [],
|
23
|
+
:match => [],
|
24
|
+
:not_match => [],
|
25
|
+
:cross_reference => []
|
26
|
+
}
|
27
|
+
parser = OptionParser.new("", 24) do |opts|
|
28
|
+
opts.banner = "\nProgram: Data Validator\nAuthor: MCKI\n\n"
|
29
|
+
|
30
|
+
opts.on("--unique FIELD", "Check if FIELD is unique") do |v|
|
31
|
+
$options[:unique] << v
|
32
|
+
end
|
33
|
+
|
34
|
+
opts.on("--not-null FIELD", "Check if FIELD is not null or empty") do |v|
|
35
|
+
$options[:not_null] << v
|
36
|
+
end
|
37
|
+
|
38
|
+
opts.on("--match REGEXP", "Check if FIELD matches REGEXP") do |v|
|
39
|
+
$options[:match] << v
|
40
|
+
end
|
41
|
+
|
42
|
+
opts.on("--not-match REGEXP", "Check if FIELD does not match REGEXP") do |v|
|
43
|
+
$options[:not_match] << v
|
44
|
+
end
|
45
|
+
|
46
|
+
opts.on("--cross-reference EXPR", "Reference") do |v|
|
47
|
+
$options[:cross_reference] << v
|
48
|
+
end
|
49
|
+
|
50
|
+
opts.on("-t", "--table TABLE", "Table to verify") do |v|
|
51
|
+
$options[:table] = v
|
52
|
+
end
|
53
|
+
|
54
|
+
opts.on("--log-to FIELD", "Field to log error to") do |v|
|
55
|
+
$options[:log_to] = v
|
56
|
+
end
|
57
|
+
|
58
|
+
opts.on("-h", "--host HOST", "PostgreSQL host") do |v|
|
59
|
+
$options[:host] = v
|
60
|
+
end
|
61
|
+
|
62
|
+
opts.on("-d", "--database DATABASE", "PostgreSQL database") do |v|
|
63
|
+
$options[:database] = v
|
64
|
+
end
|
65
|
+
|
66
|
+
opts.on("-u", "--username USER", "PostgreSQL username") do |v|
|
67
|
+
$options[:username] = v
|
68
|
+
end
|
69
|
+
|
70
|
+
opts.on("-p", "--password PASSWORD", "PostgreSQL password") do |v|
|
71
|
+
$options[:password] = v
|
72
|
+
end
|
73
|
+
|
74
|
+
opts.on("-l", "--listen PORT", "PostgreSQL listen port (default to 5432)") do |v|
|
75
|
+
$options[:listen] = v
|
76
|
+
end
|
77
|
+
|
78
|
+
opts.on_tail('--help', 'Displays this help') do
|
79
|
+
puts opts, "", help
|
80
|
+
exit
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def help
|
85
|
+
return <<-eos
|
86
|
+
|
87
|
+
EXAMPLES
|
88
|
+
-------------------------------------------------------
|
89
|
+
Validate `mfrs` table against several rules and write error logs to errors field:
|
90
|
+
|
91
|
+
ruby validate.rb -h localhost -u postgres -p postgres -d db \\
|
92
|
+
--table=mfrs \\
|
93
|
+
--log-to=errors \\
|
94
|
+
--unique="mfr_lic" \\
|
95
|
+
--unique="mfr_number" \\
|
96
|
+
--not-null="mfr_number" \\
|
97
|
+
--not-null="mfr_lic" \\
|
98
|
+
--match="mfr_number/^2.*/" \\
|
99
|
+
--match="mfr_name/^[a-zA-Z]+/" \\
|
100
|
+
--not-match="status/(failed|inactive|wrong)/"
|
101
|
+
|
102
|
+
eos
|
103
|
+
end
|
104
|
+
|
105
|
+
begin
|
106
|
+
parser.parse!
|
107
|
+
rescue SystemExit => ex
|
108
|
+
exit
|
109
|
+
end
|
110
|
+
|
111
|
+
if $options[:table].nil?
|
112
|
+
puts "\nPlease specify table name: -t\n\n"
|
113
|
+
exit
|
114
|
+
end
|
115
|
+
|
116
|
+
if $options[:log_to].nil?
|
117
|
+
puts "\nPlease specify field to log errors: --log-to\n\n"
|
118
|
+
exit
|
119
|
+
end
|
120
|
+
|
121
|
+
if $options[:host].nil?
|
122
|
+
puts "\nPlease specify host name: -h\n\n"
|
123
|
+
exit
|
124
|
+
end
|
125
|
+
|
126
|
+
if $options[:database].nil?
|
127
|
+
puts "\nPlease specify PostgreSQL database name: -d\n\n"
|
128
|
+
exit
|
129
|
+
end
|
130
|
+
|
131
|
+
if $options[:username].nil?
|
132
|
+
puts "\nPlease specify PostgreSQL username: -d\n\n"
|
133
|
+
exit
|
134
|
+
end
|
135
|
+
|
136
|
+
# Default value
|
137
|
+
$options[:listen] ||= 5432
|
138
|
+
$options[:output] ||= "/tmp/#{Digest::SHA1.hexdigest(rand(100000).to_s)}.csv"
|
139
|
+
|
140
|
+
# Database dump
|
141
|
+
ActiveRecord::Base.establish_connection(
|
142
|
+
'adapter' => 'postgresql',
|
143
|
+
'host' => $options[:host],
|
144
|
+
'database' => $options[:database],
|
145
|
+
'username' => $options[:username],
|
146
|
+
'password' => $options[:password],
|
147
|
+
'port' => $options[:listen],
|
148
|
+
'timeout' => 15000
|
149
|
+
)
|
150
|
+
|
151
|
+
# --------------------------------------------------------------------
|
152
|
+
# Preliminary check
|
153
|
+
# Add column errors if not yet exists
|
154
|
+
# --------------------------------------------------------------------
|
155
|
+
pre_sql = <<-eos
|
156
|
+
DO $$
|
157
|
+
BEGIN
|
158
|
+
BEGIN
|
159
|
+
ALTER TABLE #{$options[:table]} ADD COLUMN #{$options[:log_to]} text default '';
|
160
|
+
EXCEPTION
|
161
|
+
WHEN duplicate_column THEN RAISE NOTICE 'column #{$options[:log_to]} already exists';
|
162
|
+
END;
|
163
|
+
END;
|
164
|
+
$$
|
165
|
+
eos
|
166
|
+
|
167
|
+
ActiveRecord::Base.connection.execute(pre_sql)
|
168
|
+
|
169
|
+
# --------------------------------------------------------------------
|
170
|
+
# Check unique field
|
171
|
+
# --------------------------------------------------------------------
|
172
|
+
$options[:unique].each do |field|
|
173
|
+
puts "Checking unique fields: #{field}"
|
174
|
+
|
175
|
+
uniq_sql = <<-eos
|
176
|
+
UPDATE #{$options[:table]} SET #{$options[:log_to]} = array_to_string(string_to_array(#{$options[:log_to]}, ' || ') || string_to_array('#{field}-not-unique', ' || '), ' || ')
|
177
|
+
WHERE #{field} IN (
|
178
|
+
SELECT #{field} FROM #{$options[:table]} GROUP BY #{field}
|
179
|
+
HAVING count(*) > 1
|
180
|
+
) AND #{field} IS NOT NULL AND length(trim(#{field})) <> 0;
|
181
|
+
eos
|
182
|
+
|
183
|
+
ActiveRecord::Base.connection.execute(uniq_sql)
|
184
|
+
end
|
185
|
+
|
186
|
+
# --------------------------------------------------------------------
|
187
|
+
# Check not-null field
|
188
|
+
# --------------------------------------------------------------------
|
189
|
+
$options[:not_null].each do |field|
|
190
|
+
puts "Checking not-null fields: #{field}"
|
191
|
+
|
192
|
+
not_null_sql = <<-eos
|
193
|
+
UPDATE #{$options[:table]} SET #{$options[:log_to]} = array_to_string(string_to_array(#{$options[:log_to]}, ' || ') || string_to_array('#{field}-null', ' || '), ' || ')
|
194
|
+
WHERE #{field} IS NULL OR length(trim(#{field})) = 0;
|
195
|
+
eos
|
196
|
+
|
197
|
+
ActiveRecord::Base.connection.execute(not_null_sql)
|
198
|
+
end
|
199
|
+
|
200
|
+
# --------------------------------------------------------------------
|
201
|
+
# Check field satisfying --match=FIELD/REGEXP/
|
202
|
+
# --------------------------------------------------------------------
|
203
|
+
$options[:match].each do |value|
|
204
|
+
field = value[/^[^\/]+/]
|
205
|
+
regexp = value[/(?<=\/).*(?=\/)/]
|
206
|
+
puts "Checking REGEXP matching: #{regexp}"
|
207
|
+
|
208
|
+
match_sql = <<-eos
|
209
|
+
UPDATE #{$options[:table]} SET #{$options[:log_to]} = array_to_string(string_to_array(#{$options[:log_to]}, ' || ') || string_to_array('#{regexp.gsub("'", "''")}-not-matched', ' || '), ' || ')
|
210
|
+
WHERE #{field} IS NOT NULL AND length(trim(#{field})) <> 0 AND #{field} !~ '#{regexp}';
|
211
|
+
eos
|
212
|
+
|
213
|
+
ActiveRecord::Base.connection.execute(match_sql)
|
214
|
+
end
|
215
|
+
|
216
|
+
# --------------------------------------------------------------------
|
217
|
+
# Check field satisfying --not-match=FIELD/REGEXP/
|
218
|
+
# --------------------------------------------------------------------
|
219
|
+
$options[:not_match].each do |value|
|
220
|
+
field = value[/^[^\/]+/]
|
221
|
+
regexp = value[/(?<=\/).*(?=\/)/]
|
222
|
+
puts "Checking REGEXP not matching: #{regexp}"
|
223
|
+
|
224
|
+
not_match_sql = <<-eos
|
225
|
+
UPDATE #{$options[:table]} SET #{$options[:log_to]} = array_to_string(string_to_array(#{$options[:log_to]}, ' || ') || string_to_array('#{regexp.gsub("'", "''")}-unexpectedly-matched', ' || '), ' || ')
|
226
|
+
WHERE #{field} IS NOT NULL AND length(trim(#{field})) <> 0 AND #{field} ~ '#{regexp}';
|
227
|
+
eos
|
228
|
+
|
229
|
+
ActiveRecord::Base.connection.execute(not_match_sql)
|
230
|
+
end
|
231
|
+
|
232
|
+
# --------------------------------------------------------------------
|
233
|
+
# Check field satisfying --cross-reference
|
234
|
+
# --------------------------------------------------------------------
|
235
|
+
$options[:cross_reference].each do |value|
|
236
|
+
values = value.split(/[|\.]/)
|
237
|
+
|
238
|
+
if values.size != 3
|
239
|
+
raise "Error: Wrong argument for --cross-reference switch"
|
240
|
+
exit(0)
|
241
|
+
end
|
242
|
+
|
243
|
+
field = values[0]
|
244
|
+
ref_table = values[1]
|
245
|
+
ref_field = values[2]
|
246
|
+
|
247
|
+
puts "Checking data integrity: #{value}"
|
248
|
+
|
249
|
+
# @todo: poor performance here, think of a better SQL!!!
|
250
|
+
ref_sql = <<-eos
|
251
|
+
UPDATE #{$options[:table]} SET #{$options[:log_to]} = array_to_string(string_to_array(#{$options[:log_to]}, ' || ') || string_to_array('#{field}-not-referenced-to-#{values[1]}.#{values[2]}', ' || '), ' || ')
|
252
|
+
WHERE #{field} NOT IN (
|
253
|
+
SELECT #{ref_field} FROM #{ref_table}
|
254
|
+
) AND #{field} IS NOT NULL AND length(trim(#{field})) <> 0;
|
255
|
+
eos
|
256
|
+
|
257
|
+
ActiveRecord::Base.connection.execute(ref_sql)
|
258
|
+
end
|
data/idata.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'idata/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "idata"
|
8
|
+
spec.version = Idata::VERSION
|
9
|
+
spec.authors = ["Nghi Pham"]
|
10
|
+
spec.email = ["minhnghivn@gmail.com"]
|
11
|
+
spec.description = %q{Tools for importing data from raw files}
|
12
|
+
spec.summary = %q{Tools for importing data from raw files}
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
#spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.executables = ["iload", "ieval", "ipatch", "ivalidate", "iexport", "isanitize"]
|
19
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.add_development_dependency "bundler", ">= 1.3"
|
23
|
+
spec.add_development_dependency "rake", ">= 0.9"
|
24
|
+
|
25
|
+
spec.add_dependency "rails", ">= 4.0"
|
26
|
+
spec.add_dependency "pg"
|
27
|
+
end
|
data/lib/idata.rb
ADDED
metadata
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: idata
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Nghi Pham
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-04-21 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0.9'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0.9'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rails
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '4.0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '4.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: pg
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: Tools for importing data from raw files
|
70
|
+
email:
|
71
|
+
- minhnghivn@gmail.com
|
72
|
+
executables:
|
73
|
+
- iload
|
74
|
+
- ieval
|
75
|
+
- ipatch
|
76
|
+
- ivalidate
|
77
|
+
- iexport
|
78
|
+
- isanitize
|
79
|
+
extensions: []
|
80
|
+
extra_rdoc_files: []
|
81
|
+
files:
|
82
|
+
- .gitignore
|
83
|
+
- Gemfile
|
84
|
+
- LICENSE.txt
|
85
|
+
- README.md
|
86
|
+
- Rakefile
|
87
|
+
- bin/ieval
|
88
|
+
- bin/iexport
|
89
|
+
- bin/iload
|
90
|
+
- bin/ipatch
|
91
|
+
- bin/isanitize
|
92
|
+
- bin/ivalidate
|
93
|
+
- idata.gemspec
|
94
|
+
- lib/idata.rb
|
95
|
+
- lib/idata/version.rb
|
96
|
+
homepage: ''
|
97
|
+
licenses:
|
98
|
+
- MIT
|
99
|
+
metadata: {}
|
100
|
+
post_install_message:
|
101
|
+
rdoc_options: []
|
102
|
+
require_paths:
|
103
|
+
- lib
|
104
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
105
|
+
requirements:
|
106
|
+
- - '>='
|
107
|
+
- !ruby/object:Gem::Version
|
108
|
+
version: '0'
|
109
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
110
|
+
requirements:
|
111
|
+
- - '>='
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
version: '0'
|
114
|
+
requirements: []
|
115
|
+
rubyforge_project:
|
116
|
+
rubygems_version: 2.2.1
|
117
|
+
signing_key:
|
118
|
+
specification_version: 4
|
119
|
+
summary: Tools for importing data from raw files
|
120
|
+
test_files: []
|