idata 0.1.28 → 0.1.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d40de88a668b52fa14ae83259cf63b59697a1deb
4
- data.tar.gz: 759fd49acead65db9206210d9db3054b4fe4e4a1
3
+ metadata.gz: d37af89a0b0962c026e20d63637f3a750030429f
4
+ data.tar.gz: a67cdfd07638f57788c837667c01e4f259867fa6
5
5
  SHA512:
6
- metadata.gz: bc2af89be0e37eb04be0cc237e063b5ade86c22e3028726df179ac68bd0d3fc58aab9cde81782215a89cc1d0fc8a9445ad3425da0e3c6e8e8ce690c9153001b3
7
- data.tar.gz: 349f28149ca318f7bd42fcb38685cba845992c6b0e380351392fe1b2d6b34cb982409721d44de91ce783ab4df1d103a3bd59d678fb6498f723db47bdb9d666c8
6
+ metadata.gz: 58eec49a072286d87b643f58be7c456e0d9140c7341b0816fc46b790241a9ef07fff0c2e86f72b2eda17f3c76defebb3259bd0fc50f912a9e90ff0fda2cd0511
7
+ data.tar.gz: bd83cd1f605863ed36bdf6c71236233bed41a0830013683b6906c9834fe23b12d482807eef38d318f6dec364ab14d55d8017734555ec01a63b6faa0270d67f10
data/README.md CHANGED
@@ -1,14 +1,26 @@
1
- # OVERVIEW
1
+ # Overview
2
2
  We provide some useful utilities for validating data in a PostgreSQL data table
3
3
  These utilities can be used as simple terminal commands and can be installed by:
4
4
 
5
5
  gem install idata
6
6
 
7
+ idata comes along with the following commands:
8
+ * iload
9
+ * ivalidate
10
+ * ipatch
11
+ * ieval
12
+ * iexport
13
+ * imerge
14
+ * isanitize
15
+
16
+ Run a command with --help switch for the details
17
+
7
18
  Prequisites:
8
19
  * PostgreSQL 9.0 or above
9
20
  * Ruby 2.0 or above
21
+ * An auto ID field is required for data table to be validated using ivalidate
10
22
 
11
- # USAGE
23
+ # Usage
12
24
  Suppose we have an `items` table, and we want to validate its records against certain criteria like:
13
25
 
14
26
  * `item_id` must not be null
@@ -65,7 +77,7 @@ For example, the following two checks are equivalent:
65
77
  Note: run `ivalidate --help` to see the full list of supported switches
66
78
 
67
79
 
68
- # PUT IT ALL TOGETHER
80
+ # Put it all together
69
81
  You can put several `ivalidate` commands (for several data tables) in one single bash/sh file.
70
82
  Besides `ivalidate`, we also support some other utilities to:
71
83
  + Load data from text files to SQL tables
data/bin/ivalidate CHANGED
@@ -73,6 +73,10 @@ parser = OptionParser.new("", 24) do |opts|
73
73
  opts.on("--pretty", "Use more human-readable error message") do |v|
74
74
  $options[:pretty] = v
75
75
  end
76
+
77
+ opts.on("--case-insensitive", "Use more human-readable error message") do |v|
78
+ $options[:case_insensitive] = v
79
+ end
76
80
 
77
81
  opts.on("-h", "--host HOST", "PostgreSQL host") do |v|
78
82
  $options[:host] = v
@@ -185,6 +189,15 @@ class String
185
189
 
186
190
  "(#{sql})"
187
191
  end
192
+
193
+ def lower
194
+ a = self.split(/\s*,\s*/)
195
+ sql = a.map{|s|
196
+ "lower(#{s})"
197
+ }.join(",")
198
+
199
+ sql
200
+ end
188
201
  end
189
202
 
190
203
 
@@ -215,11 +228,15 @@ ActiveRecord::Base.connection.execute(pre_sql)
215
228
  $options[:unique].each do |field|
216
229
  begin
217
230
  puts "Checking uniqueness: #{field}"
218
-
231
+ if $options[:case_insensitive]
232
+ f_lower = field.lower
233
+ else
234
+ f_lower = field
235
+ end
219
236
  uniq_sql = <<-eos
220
237
  UPDATE #{$options[:table]} SET #{$options[:log_to]} = array_to_string(string_to_array(#{$options[:log_to]}, ' || ') || string_to_array('[#{field}] is not unique', ' || '), ' || ')
221
238
  WHERE id IN (
222
- SELECT unnest(array_agg(id)) FROM #{$options[:table]} GROUP BY #{field}
239
+ SELECT unnest(array_agg(id)) FROM #{$options[:table]} GROUP BY #{f_lower}
223
240
  HAVING count(*) > 1
224
241
  ) AND #{field.not_null_sql};
225
242
  eos
@@ -239,19 +256,24 @@ $options[:consistent_by].each do |fields|
239
256
 
240
257
  raise "input must be in field1|field2 format" if fields.count != 2
241
258
 
242
- f1 = fields.first
243
- f2 = fields.last
259
+ f1_case = f1 = fields.first
260
+ f2_case = f2 = fields.last
261
+
262
+ if $options[:case_insensitive]
263
+ f1_case = f1_case.lower
264
+ f2_case = f2_case.lower
265
+ end
244
266
 
245
- puts "Checking uniqueness: #{f1} | #{f2}"
267
+ puts "Checking consistency: #{f1} | #{f2}"
246
268
 
247
269
  uniq_sql = <<-eos
248
- UPDATE #{$options[:table]} SET #{$options[:log_to]} = array_to_string(string_to_array(#{$options[:log_to]}, ' || ') || string_to_array('same [#{f2}] but with different #{f1}', ' || '), ' || ')
270
+ UPDATE #{$options[:table]} SET #{$options[:log_to]} = array_to_string(string_to_array(#{$options[:log_to]}, ' || ') || string_to_array('same [#{f2}] but with different [#{f1}]', ' || '), ' || ')
249
271
  WHERE id IN
250
272
  (
251
273
  SELECT unnest(array_agg(id)) FROM #{$options[:table]}
252
274
  WHERE #{f1.not_null_sql} AND #{f2.not_null_sql}
253
- GROUP BY #{f2}
254
- HAVING COUNT(distinct #{f1}) > 1
275
+ GROUP BY #{f2_case}
276
+ HAVING COUNT(distinct #{f1_case}) > 1
255
277
  );
256
278
  eos
257
279
 
@@ -340,19 +362,25 @@ $options[:cross_reference].each do |value|
340
362
  raise "Error: Wrong argument for --cross-reference switch"
341
363
  exit(0)
342
364
  end
343
-
365
+
344
366
  field = values[0]
345
367
  ref_table = values[1]
346
368
  ref_field = values[2]
347
369
 
348
370
  puts "Checking data integrity: #{value}"
371
+
372
+ if $options[:case_insensitive]
373
+ join_condition = "on lower(origin.#{field}) = lower(target.#{ref_field})"
374
+ else
375
+ join_condition = "on origin.#{field} = target.#{ref_field}"
376
+ end
349
377
 
350
378
  # @todo: poor performance here, think of a better SQL!!!
351
379
  ref_sql = <<-eos
352
380
  UPDATE #{$options[:table]} SET #{$options[:log_to]} = array_to_string(string_to_array(#{$options[:log_to]}, ' || ') || string_to_array('#{field} does not reference #{values[1]}.#{values[2]}', ' || '), ' || ')
353
381
  WHERE #{field} IN (
354
382
  SELECT origin.#{field} from #{$options[:table]} origin LEFT JOIN #{ref_table} target
355
- on origin.#{field} = target.#{ref_field}
383
+ #{join_condition}
356
384
  where target.#{ref_field} is null
357
385
  ) AND #{field} IS NOT NULL AND length(trim(#{field})) <> 0;
358
386
  eos
data/lib/idata/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Idata
2
- VERSION = "0.1.28"
2
+ VERSION = "0.1.29"
3
3
  end
data/sample.sh CHANGED
@@ -94,7 +94,7 @@ ivalidate --table=$ITEM \
94
94
  # Step 3 - Generate summary report
95
95
  ###################################################################################
96
96
  # After the validation step above, an additional field named validation_errors
97
- # is added to every table. In case the record does not pass a validation creterion, a corresponding error shall be logged to this field
97
+ # is added to every table. In case the record does not pass a validation criterion, a corresponding error shall be logged to this field
98
98
  # One record may have more than one error logged
99
99
  #
100
100
  # You can simply look at the validation_errors field to see errors associated to a record
@@ -104,7 +104,6 @@ ivalidate --table=$ITEM \
104
104
  # 2 Extract the first 1000 sample records for every error
105
105
  # 3 Put all together into one single Excel report
106
106
 
107
-
108
107
  # 1) Create error summary report table and write to /tmp/summary.csv
109
108
  # This can be done using the iexport utility which can generate a CSV file from a data table or from a custom query
110
109
  # Run iexport --help for more information
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: idata
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.28
4
+ version: 0.1.29
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nghi Pham
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-21 00:00:00.000000000 Z
11
+ date: 2014-07-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler