idata 0.1.28 → 0.1.29

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d40de88a668b52fa14ae83259cf63b59697a1deb
4
- data.tar.gz: 759fd49acead65db9206210d9db3054b4fe4e4a1
3
+ metadata.gz: d37af89a0b0962c026e20d63637f3a750030429f
4
+ data.tar.gz: a67cdfd07638f57788c837667c01e4f259867fa6
5
5
  SHA512:
6
- metadata.gz: bc2af89be0e37eb04be0cc237e063b5ade86c22e3028726df179ac68bd0d3fc58aab9cde81782215a89cc1d0fc8a9445ad3425da0e3c6e8e8ce690c9153001b3
7
- data.tar.gz: 349f28149ca318f7bd42fcb38685cba845992c6b0e380351392fe1b2d6b34cb982409721d44de91ce783ab4df1d103a3bd59d678fb6498f723db47bdb9d666c8
6
+ metadata.gz: 58eec49a072286d87b643f58be7c456e0d9140c7341b0816fc46b790241a9ef07fff0c2e86f72b2eda17f3c76defebb3259bd0fc50f912a9e90ff0fda2cd0511
7
+ data.tar.gz: bd83cd1f605863ed36bdf6c71236233bed41a0830013683b6906c9834fe23b12d482807eef38d318f6dec364ab14d55d8017734555ec01a63b6faa0270d67f10
data/README.md CHANGED
@@ -1,14 +1,26 @@
1
- # OVERVIEW
1
+ # Overview
2
2
  We provide some useful utilities for validating data in a PostgreSQL data table
3
3
  These utilities can be used as simple terminal commands and can be installed by:
4
4
 
5
5
  gem install idata
6
6
 
7
+ idata comes along with the following commands:
8
+ * iload
9
+ * ivalidate
10
+ * ipatch
11
+ * ieval
12
+ * iexport
13
+ * imerge
14
+ * isanitize
15
+
16
+ Run a command with --help switch for the details
17
+
7
18
  Prequisites:
8
19
  * PostgreSQL 9.0 or above
9
20
  * Ruby 2.0 or above
21
+ * An auto ID field is required for data table to be validated using ivalidate
10
22
 
11
- # USAGE
23
+ # Usage
12
24
  Suppose we have an `items` table, and we want to validate its records against certain criteria like:
13
25
 
14
26
  * `item_id` must not be null
@@ -65,7 +77,7 @@ For example, the following two checks are equivalent:
65
77
  Note: run `ivalidate --help` to see the full list of supported switches
66
78
 
67
79
 
68
- # PUT IT ALL TOGETHER
80
+ # Put it all together
69
81
  You can put several `ivalidate` commands (for several data tables) in one single bash/sh file.
70
82
  Besides `ivalidate`, we also support some other utilities to:
71
83
  + Load data from text files to SQL tables
data/bin/ivalidate CHANGED
@@ -73,6 +73,10 @@ parser = OptionParser.new("", 24) do |opts|
73
73
  opts.on("--pretty", "Use more human-readable error message") do |v|
74
74
  $options[:pretty] = v
75
75
  end
76
+
77
+ opts.on("--case-insensitive", "Use more human-readable error message") do |v|
78
+ $options[:case_insensitive] = v
79
+ end
76
80
 
77
81
  opts.on("-h", "--host HOST", "PostgreSQL host") do |v|
78
82
  $options[:host] = v
@@ -185,6 +189,15 @@ class String
185
189
 
186
190
  "(#{sql})"
187
191
  end
192
+
193
+ def lower
194
+ a = self.split(/\s*,\s*/)
195
+ sql = a.map{|s|
196
+ "lower(#{s})"
197
+ }.join(",")
198
+
199
+ sql
200
+ end
188
201
  end
189
202
 
190
203
 
@@ -215,11 +228,15 @@ ActiveRecord::Base.connection.execute(pre_sql)
215
228
  $options[:unique].each do |field|
216
229
  begin
217
230
  puts "Checking uniqueness: #{field}"
218
-
231
+ if $options[:case_insensitive]
232
+ f_lower = field.lower
233
+ else
234
+ f_lower = field
235
+ end
219
236
  uniq_sql = <<-eos
220
237
  UPDATE #{$options[:table]} SET #{$options[:log_to]} = array_to_string(string_to_array(#{$options[:log_to]}, ' || ') || string_to_array('[#{field}] is not unique', ' || '), ' || ')
221
238
  WHERE id IN (
222
- SELECT unnest(array_agg(id)) FROM #{$options[:table]} GROUP BY #{field}
239
+ SELECT unnest(array_agg(id)) FROM #{$options[:table]} GROUP BY #{f_lower}
223
240
  HAVING count(*) > 1
224
241
  ) AND #{field.not_null_sql};
225
242
  eos
@@ -239,19 +256,24 @@ $options[:consistent_by].each do |fields|
239
256
 
240
257
  raise "input must be in field1|field2 format" if fields.count != 2
241
258
 
242
- f1 = fields.first
243
- f2 = fields.last
259
+ f1_case = f1 = fields.first
260
+ f2_case = f2 = fields.last
261
+
262
+ if $options[:case_insensitive]
263
+ f1_case = f1_case.lower
264
+ f2_case = f2_case.lower
265
+ end
244
266
 
245
- puts "Checking uniqueness: #{f1} | #{f2}"
267
+ puts "Checking consistency: #{f1} | #{f2}"
246
268
 
247
269
  uniq_sql = <<-eos
248
- UPDATE #{$options[:table]} SET #{$options[:log_to]} = array_to_string(string_to_array(#{$options[:log_to]}, ' || ') || string_to_array('same [#{f2}] but with different #{f1}', ' || '), ' || ')
270
+ UPDATE #{$options[:table]} SET #{$options[:log_to]} = array_to_string(string_to_array(#{$options[:log_to]}, ' || ') || string_to_array('same [#{f2}] but with different [#{f1}]', ' || '), ' || ')
249
271
  WHERE id IN
250
272
  (
251
273
  SELECT unnest(array_agg(id)) FROM #{$options[:table]}
252
274
  WHERE #{f1.not_null_sql} AND #{f2.not_null_sql}
253
- GROUP BY #{f2}
254
- HAVING COUNT(distinct #{f1}) > 1
275
+ GROUP BY #{f2_case}
276
+ HAVING COUNT(distinct #{f1_case}) > 1
255
277
  );
256
278
  eos
257
279
 
@@ -340,19 +362,25 @@ $options[:cross_reference].each do |value|
340
362
  raise "Error: Wrong argument for --cross-reference switch"
341
363
  exit(0)
342
364
  end
343
-
365
+
344
366
  field = values[0]
345
367
  ref_table = values[1]
346
368
  ref_field = values[2]
347
369
 
348
370
  puts "Checking data integrity: #{value}"
371
+
372
+ if $options[:case_insensitive]
373
+ join_condition = "on lower(origin.#{field}) = lower(target.#{ref_field})"
374
+ else
375
+ join_condition = "on origin.#{field} = target.#{ref_field}"
376
+ end
349
377
 
350
378
  # @todo: poor performance here, think of a better SQL!!!
351
379
  ref_sql = <<-eos
352
380
  UPDATE #{$options[:table]} SET #{$options[:log_to]} = array_to_string(string_to_array(#{$options[:log_to]}, ' || ') || string_to_array('#{field} does not reference #{values[1]}.#{values[2]}', ' || '), ' || ')
353
381
  WHERE #{field} IN (
354
382
  SELECT origin.#{field} from #{$options[:table]} origin LEFT JOIN #{ref_table} target
355
- on origin.#{field} = target.#{ref_field}
383
+ #{join_condition}
356
384
  where target.#{ref_field} is null
357
385
  ) AND #{field} IS NOT NULL AND length(trim(#{field})) <> 0;
358
386
  eos
data/lib/idata/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Idata
2
- VERSION = "0.1.28"
2
+ VERSION = "0.1.29"
3
3
  end
data/sample.sh CHANGED
@@ -94,7 +94,7 @@ ivalidate --table=$ITEM \
94
94
  # Step 3 - Generate summary report
95
95
  ###################################################################################
96
96
  # After the validation step above, an additional field named validation_errors
97
- # is added to every table. In case the record does not pass a validation creterion, a corresponding error shall be logged to this field
97
+ # is added to every table. In case the record does not pass a validation criterion, a corresponding error shall be logged to this field
98
98
  # One record may have more than one error logged
99
99
  #
100
100
  # You can simply look at the validation_errors field to see errors associated to a record
@@ -104,7 +104,6 @@ ivalidate --table=$ITEM \
104
104
  # 2 Extract the first 1000 sample records for every error
105
105
  # 3 Put all together into one single Excel report
106
106
 
107
-
108
107
  # 1) Create error summary report table and write to /tmp/summary.csv
109
108
  # This can be done using the iexport utility which can generate a CSV file from a data table or from a custom query
110
109
  # Run iexport --help for more information
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: idata
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.28
4
+ version: 0.1.29
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nghi Pham
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-21 00:00:00.000000000 Z
11
+ date: 2014-07-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler