idata 0.1.33 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/full.sh CHANGED
@@ -199,6 +199,9 @@ ivalidate --case-insensitive --pretty -t $GL \
199
199
  --match="exp_acct_no/[a-zA-Z0-9]/" \
200
200
  --not-null=exp_acct_name \
201
201
  --match="exp_acct_name/[a-zA-Z0-9]/" \
202
+ --not-null=fq_acct_no \
203
+ --unique=fq_acct_no \
204
+ --rquery="(fq_acct_no IS NOT NULL AND corp_acct_fmt IS NOT NULL AND fq_acct_no !~ ('^' || regexp_replace(replace(corp_acct_fmt, '-', '}-'), '(?=[abcABC])[A-Z]', '[0-9]{', 'g') || '}$')) -- Invalid fq_acct_no" \
202
205
  --consistent-by="corp_acct_no|corp_acct_name" \
203
206
  --consistent-by="corp_acct_name|corp_acct_no" \
204
207
  --consistent-by="exp_acct_no|corp_acct_no, corp_acct_name, cc_acct_no, cc_acct_name, exp_acct_name" \
@@ -227,7 +230,6 @@ ivalidate --case-insensitive --pretty -t $LOCATION \
227
230
  --rquery="(loc_type ~* '^(LOC_TYPE_SUPPLY|S)$' and (corp_acct_no is null or corp_name is null or corp_id is null)) -- either corp id/name or corp_acct_no is null" \
228
231
  --not-null="active" \
229
232
  --match="active/^(Y|N|1|2|3)$/" \
230
- --not-null="corp_acct_no" \
231
233
  --match="corp_acct_no/[a-zA-Z0-9]/" \
232
234
  --rquery="((inventory_path_name != '' AND inventory_path_name IS NOT NULL AND lower(inventory_path_name) != 'default') AND (inventory_loc_seq_no IS NULL OR inventory_loc_seq_no = '')) -- [inventory_loc_seq_no] is null" \
233
235
  --rquery="((inventory_path_name != '' AND inventory_path_name IS NOT NULL AND lower(inventory_path_name) != 'default') AND (inventory_location_name IS NULL OR inventory_location_name = '')) -- [inventory_location_name] is null" \
@@ -238,13 +240,15 @@ ivalidate --case-insensitive --pretty -t $LOCATION \
238
240
  --consistent-by="corp_id|corp_name" \
239
241
  --consistent-by="name|facility_code, loc_id" \
240
242
  --consistent-by="loc_id|facility_code, name" \
241
- --cross-reference="inventory_path_name|$LOCATION.name" \
242
243
  --cross-reference="inventory_location_name|$LOCATION.name" \
243
244
  --cross-reference="corp_id|$GL.corp_acct_no" \
244
245
  --cross-reference="corp_name|$GL.corp_acct_name"
245
246
 
246
247
 
247
248
  # validate CONTRACTS ORIGINAL
249
+ # @note Check unique keyset with item_id included for MSCM only
250
+ # Accepted:
251
+ # --not-null=contract_gpo_name \
248
252
  ivalidate --case-insensitive --pretty -t $CONTRACTO \
249
253
  --log-to=validation_errors \
250
254
  --not-null=contract_number \
@@ -258,7 +262,6 @@ ivalidate --case-insensitive --pretty -t $CONTRACTO \
258
262
  --not-null=item_descr \
259
263
  --not-null=item_qoe \
260
264
  --not-null=contract_price \
261
- --not-null=contract_gpo_name \
262
265
  --not-null=contract_gpo_id \
263
266
  --match="contract_number/[a-zA-Z0-9]/" \
264
267
  --match="contract_gpo_name/[a-zA-Z0-9]/" \
@@ -268,6 +271,7 @@ ivalidate --case-insensitive --pretty -t $CONTRACTO \
268
271
  --match="vendor_name/[a-zA-Z0-9]/" \
269
272
  --match="mfr_item_id/[a-zA-Z0-9]/" \
270
273
  --match="mfr_name/[a-zA-Z0-9]/" \
274
+ --rquery="(lower(mfr_name) = 'unknown' AND mfr_number IS NULL) -- Unknown mfr_name" \
271
275
  --query="to_date(contract_end, 'YYYY-MM-DD') >= to_date(contract_start, 'YYYY-MM-DD') -- [contract_end] comes before [contract_start]" \
272
276
  --match="contract_status/^(1|2|3|A|I|Inactive|Active|Y)$/" \
273
277
  --match="item_status/^(1|2|3|A|I|Inactive|Active|Y)$/" \
@@ -283,7 +287,7 @@ ivalidate --case-insensitive --pretty -t $CONTRACTO \
283
287
  --match="contract_price/^[0-9]+(\.{0,1}[0-9]+|[0-9]*)$/" \
284
288
  --match="item_qoe/^[0-9]+(\.{0,1}[0-9]+|[0-9]*)$/" \
285
289
  --rquery="(item_uom NOT IN (SELECT code FROM uomstd) AND item_uom !~ '^[a-zA-Z0-9]{1,3}$') -- invalid item_uom" \
286
- --unique="contract_gpo_name, contract_number, contract_start, contract_end, vendor_name, mfr_item_id, mfr_name, item_uom, corp_id" \
290
+ --unique="contract_gpo_name, contract_number, contract_start, contract_end, vendor_name, mfr_item_id, mfr_name, item_uom, corp_id, item_id" \
287
291
 
288
292
  # validate ITEM
289
293
  # Accepted:
@@ -305,6 +309,7 @@ ivalidate --case-insensitive --pretty -t $ITEM \
305
309
  --not-null="mfr_number" \
306
310
  --not-null="mfr_name" \
307
311
  --not-null="active" \
312
+ --rquery="(lower(mfr_name) = 'unknown' AND mfr_number IS NULL) -- Unknown mfr_name" \
308
313
  --match="corp_id/[a-zA-Z0-9]/" \
309
314
  --match="corp_name/[a-zA-Z0-9]/" \
310
315
  --match="vendor_code/[a-zA-Z0-9]/" \
@@ -362,6 +367,7 @@ ivalidate --case-insensitive --pretty -t $PO \
362
367
  --consistent-by="vendor_name|vendor_code" \
363
368
  --consistent-by="mfr_name|mfr_number" \
364
369
  --unique="po_no, po_line_number" \
370
+ --rquery="(lower(mfr_name) = 'unknown' AND mfr_number IS NULL) -- Unknown mfr_name" \
365
371
  --rquery="(item_id not like '%~%' and item_id not in (select item_id from items)) -- [item_id] does not reference [items.item_id]" \
366
372
  --cross-reference="vendor_code|$VENDOR.vendor_code" \
367
373
  --cross-reference="vendor_name|$VENDOR.vendor_name" \
@@ -372,7 +378,7 @@ ivalidate --case-insensitive --pretty -t $PO \
372
378
  --cross-reference="cost_center_id|$GL.cc_acct_no" \
373
379
  --cross-reference="cost_center_name|$GL.cc_acct_name" \
374
380
  --rquery="(purchase_uom NOT IN (SELECT code FROM uomstd) AND purchase_uom !~ '^[a-zA-Z0-9]{1,3}$') -- invalid [purchase_uom]" \
375
- --rquery="(item_id IS NOT NULL AND (vendor_code IS NOT NULL OR vendor_name IS NOT NULL) AND vendor_item_id IS NULL) -- [vendor_item_id] is null" \
381
+ --rquery="((item_id IS NULL OR item_id !~ '[a-zA-Z0-9]') AND (vendor_item_id IS NULL OR vendor_item_id !~ '[a-zA-Z0-9]')) -- [vendor_item_id] is either null or invalid" \
376
382
  --match="purchase_price/^[0-9]+(\.{0,1}[0-9]+|[0-9]*)$/" \
377
383
  --match="purchase_qoe/^[0-9]+(\.{0,1}[0-9]+|[0-9]*)$/"
378
384
 
@@ -586,6 +592,7 @@ imerge --output=$OUTPUT_DIR/$ORGNAME.xls \
586
592
  --input="ContractMaster:$OUTPUT_DIR/$CONTRACTO.csv" \
587
593
  --input="ItemMaster:$OUTPUT_DIR/$ITEM.csv" \
588
594
  --input="MfrMaster:$OUTPUT_DIR/$MFR.csv" \
595
+ --input="VendorMaster:$OUTPUT_DIR/$VENDOR.csv" \
589
596
  --input="PurchaseOrder:$OUTPUT_DIR/$PO.csv" \
590
597
  --input="User:$OUTPUT_DIR/$USER.csv" \
591
598
  --input="Location:$OUTPUT_DIR/$LOCATION.csv" \
@@ -594,3 +601,66 @@ imerge --output=$OUTPUT_DIR/$ORGNAME.xls \
594
601
  --input="Inventory:$OUTPUT_DIR/$INVENTORY.csv"
595
602
 
596
603
  exit
604
+
605
+ ####################################################
606
+ # EXPORT FOR UPLOADING
607
+ ####################################################
608
+ iexport -t $ITEMCOST \
609
+ -o "$OUTPUT_DIR/$ITEMCOST.csv" -f csv --no-quote-empty --no-quotes --headers --delim=$'\t' \
610
+ --exclude="id, validation_errors"
611
+
612
+ iexport -t $CONTRACTO \
613
+ -o "$OUTPUT_DIR/$CONTRACTO.csv" -f csv --no-quote-empty --no-quotes --headers --delim=$'\t' \
614
+ --exclude="id, validation_errors"
615
+
616
+ iexport -t $VENDOR \
617
+ -o "$OUTPUT_DIR/$VENDOR.csv" -f csv --no-quote-empty --quotes --headers --delim=$'\t' \
618
+ --exclude="id, validation_errors"
619
+
620
+ iexport -t $MFR \
621
+ -o "$OUTPUT_DIR/$MFR.csv" -f csv --no-quote-empty --quotes --headers --delim=$'\t' \
622
+ --exclude="id, validation_errors"
623
+
624
+ iexport -t $GL \
625
+ -o "$OUTPUT_DIR/$GL.csv" -f csv --no-quote-empty --quotes --headers --delim=$'\t' \
626
+ --exclude="id, validation_errors"
627
+
628
+ iexport -t $PO \
629
+ -o "$OUTPUT_DIR/$PO.csv" -f csv --no-quote-empty --no-quotes --headers --delim=$'\t' \
630
+ --exclude="id, validation_errors"
631
+
632
+ iexport -t $INVENTORY \
633
+ -o "$OUTPUT_DIR/$INVENTORY.csv" -f csv --no-quote-empty --quotes --headers --delim=$'\t' \
634
+ --exclude="id, validation_errors"
635
+
636
+ iexport -t $REQ \
637
+ -o "$OUTPUT_DIR/$REQ.csv" -f csv --no-quote-empty --quotes --headers --delim=$'\t' \
638
+ --exclude="id, validation_errors"
639
+
640
+ iexport -t $ITEM \
641
+ -o "$OUTPUT_DIR/$ITEM.csv" -f csv --no-quote-empty --no-quotes --headers --delim=$'\t' \
642
+ --query="select item_id, item_descr,vendor_name,vendor_code,vendor_item_id,mfr_name,mfr_number,mfr_item_id,corp_id,corp_name, active, array_to_string(array_agg(item_uom), ',') item_uom, array_to_string(array_agg(item_qoe),',') item_qoe,array_to_string(array_agg(item_price),',') item_price
643
+ from
644
+ (
645
+ select * from items order by item_id, item_descr,vendor_name,vendor_code,vendor_item_id,mfr_name,mfr_number,mfr_item_id,corp_id,corp_name, active, item_qoe::float desc
646
+ ) abc
647
+ group by item_id, item_descr,vendor_name,vendor_code,vendor_item_id,mfr_name,mfr_number,mfr_item_id,corp_id,corp_name, active
648
+ " \
649
+ --exclude="id, validation_errors, group_index"
650
+
651
+
652
+ ipatch -q "
653
+ update users set phone = regexp_replace(phone, '[^0123456789]', '', 'g');
654
+ update users set phone = '1234567890' where phone is null or length(phone) < 10;
655
+ update users set first_name = username where length(first_name) < 2;
656
+ update users set last_name = username where length(last_name) < 2;
657
+ "
658
+ iexport -t $USER \
659
+ -o "$OUTPUT_DIR/$USER.csv" -f csv --no-quote-empty --no-quotes --no-headers --delim=',' \
660
+ --query="select first_name, last_name, phone, 0 as tmp1, -1 as tmp2, -1 as tmp3, -1 as tmp4, -1 as tmp5, email, '12345678' as passwd, 'Analyst' as tmp6 from users WHERE email IS NOT NULL AND length(email) > 0"
661
+
662
+ iexport -t $LOCATION \
663
+ -o "$OUTPUT_DIR/$LOCATION.csv" -f csv --no-quote-empty --quotes --headers --delim=$'\t' \
664
+ --exclude="id, validation_errors"
665
+
666
+
data/lib/idata/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Idata
2
- VERSION = "0.1.33"
2
+ VERSION = "0.2.1"
3
3
  end
data/sample.sh CHANGED
@@ -13,11 +13,11 @@
13
13
  # SET UP ENVIRONMENT VARIABLES
14
14
  ###################################################################################
15
15
  # Instead of passing PostgreSQL credentials as parameters to every validation command,
16
- # you can set the corresponding environment variables which can be used by the those commands
16
+ # you can set the corresponding environment variables which can be used by those commands
17
17
  export HOST="localhost"
18
18
  export USERNAME="postgres"
19
- export PASSWORD="postgres"
20
- export DATABASE="northeast_georgia"
19
+ export PASSWORD="t0p!Secret"
20
+ export DATABASE="sampledb"
21
21
  export LISTEN=5432
22
22
 
23
23
  # Input file paths and corresponding table names
@@ -39,6 +39,7 @@ REPORT="/tmp/report.xls"
39
39
  # Load data from VendorMaster.csv to the corresponding vendors table
40
40
  # and from ItemMaster.csv to items table.
41
41
  # Note: instead of using iload utility,you can use the PSQL COPY of PostgreSQL
42
+ # in such case, make sure an AUTO ID (unique) field is added to every table
42
43
  iload -i "$FVENDOR" -t "$VENDOR" -f csv
43
44
  iload -i "$FITEM" -t "$ITEM" -f csv
44
45
 
@@ -76,6 +77,8 @@ ivalidate --table=$ITEM \
76
77
  --not-null="mfr_number" \
77
78
  --not-null="mfr_name" \
78
79
  --not-null="active" \
80
+ --rquery="cast(item_id as integer) > 1000 -- invalid item_id" \
81
+ --rquery="purchase_date < mfr_date -- purchase_date comes before mfr_date" \
79
82
  --match="corp_id/[a-zA-Z0-9]/" \
80
83
  --match="corp_name/[a-zA-Z0-9]/" \
81
84
  --match="vendor_code/[a-zA-Z0-9]/" \
@@ -102,7 +105,7 @@ ivalidate --table=$ITEM \
102
105
  # Just to make a MORE comprehensive report, we can:
103
106
  # 1 Create a summary table which tells us how many errors found, how many records associated with each...
104
107
  # 2 Extract the first 1000 sample records for every error
105
- # 3 Put all together into one single Excel report
108
+ # 3 Put them all together into one single Excel report
106
109
 
107
110
  # 1) Create error summary report table and write to /tmp/summary.csv
108
111
  # This can be done using the iexport utility which can generate a CSV file from a data table or from a custom query
@@ -111,8 +114,8 @@ iexport --output="$TMP/summary.csv" -f csv --no-quote-empty --quotes --headers \
111
114
  --query="(select '$FVENDOR' as input_file, unnest(string_to_array(validation_errors, ' || ')) as error, count(*), round((count(*) * 100)::numeric / (select count(*) from $VENDOR), 2)::varchar || '%' as percentage from $VENDOR group by error order by error) union
112
115
  (select '$FITEM' as input_file, unnest(string_to_array(validation_errors, ' || ')) as error, count(*), round((count(*) * 100)::numeric / (select count(*) from $ITEM), 2)::varchar || '%' as percentage from $ITEM group by error order by error)"
113
116
 
114
- # Export the first 1000 records of every error in the items table
115
- # Write the results to /tmp/items.csv
117
+ # Export the first 1000 sample records of every error in the vendors table
118
+ # Write the results to /tmp/vendors.csv
116
119
  iexport --table=$VENDOR --output="$TMP/$VENDOR.csv" -f csv --no-quote-empty --quotes --headers \
117
120
  --query="select * from (select ROW_NUMBER() OVER (PARTITION BY error) AS group_index, *
118
121
  FROM ( select unnest(string_to_array(validation_errors, ' || ')) as error, * from
@@ -120,8 +123,8 @@ iexport --table=$VENDOR --output="$TMP/$VENDOR.csv" -f csv --no-quote-empty --qu
120
123
  where group_index <= 1000" \
121
124
  --exclude="id, validation_errors, group_index"
122
125
 
123
- # 2) Export the first 1000 records of every error in the vendors table
124
- # Write the results to /tmp/vendors.csv
126
+ # 2) Export the first 1000 sample records for every error in the items table
127
+ # Write the results to /tmp/items.csv
125
128
  iexport --table=$ITEM --output="$TMP/$ITEM.csv" -f csv --no-quote-empty --quotes --headers \
126
129
  --query="select * from (select ROW_NUMBER() OVER (PARTITION BY error) AS group_index, *
127
130
  FROM ( select unnest(string_to_array(validation_errors, ' || ')) as error, * from
@@ -130,11 +133,11 @@ iexport --table=$ITEM --output="$TMP/$ITEM.csv" -f csv --no-quote-empty --quotes
130
133
  --exclude="id, validation_errors, group_index"
131
134
 
132
135
  # 3) Put the above 3 CSV files into one Excel file /tmp/report.xls
133
- # This can be done using imerge which takes a list of CSV files put them to corresponding sheets
136
+ # This can be done using the imerge utility which takes a list of CSV files and put them into corresponding sheets
134
137
  # of one single Excel file
135
138
  imerge --output=$REPORT \
136
139
  --input="Summary:$TMP/summary.csv" \
137
- --input="$FVENDOR:$TMP/$VENDOR.csv" \
140
+ --input="VendorMaster:$TMP/$VENDOR.csv" \
138
141
  --input="ItemMaster:$TMP/$ITEM.csv"
139
142
 
140
143
  # CLEANUP
metadata CHANGED
@@ -1,69 +1,69 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: idata
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.33
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nghi Pham
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-31 00:00:00.000000000 Z
11
+ date: 2014-09-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.3'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.3'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ~>
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
33
  version: '10.0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ~>
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '10.0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rails
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ~>
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
47
  version: '4.0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ~>
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '4.0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: pg
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - ~>
59
+ - - "~>"
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0.16'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - ~>
66
+ - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0.16'
69
69
  description: 'Included: iload, ivalidate, isanitize, ipatch, ieval, iexpor'
@@ -80,10 +80,11 @@ executables:
80
80
  extensions: []
81
81
  extra_rdoc_files: []
82
82
  files:
83
- - .gitignore
83
+ - ".gitignore"
84
84
  - Gemfile
85
85
  - LICENSE.txt
86
86
  - README.md
87
+ - README2.md
87
88
  - Rakefile
88
89
  - bin/ieval
89
90
  - bin/iexport
@@ -92,6 +93,8 @@ files:
92
93
  - bin/ipatch
93
94
  - bin/isanitize
94
95
  - bin/ivalidate
96
+ - bin/ivalidate2
97
+ - full-pg-lawson.sh
95
98
  - full-pg.sh
96
99
  - full.sh
97
100
  - idata.gemspec
@@ -108,18 +111,19 @@ require_paths:
108
111
  - lib
109
112
  required_ruby_version: !ruby/object:Gem::Requirement
110
113
  requirements:
111
- - - '>='
114
+ - - ">="
112
115
  - !ruby/object:Gem::Version
113
116
  version: '0'
114
117
  required_rubygems_version: !ruby/object:Gem::Requirement
115
118
  requirements:
116
- - - '>='
119
+ - - ">="
117
120
  - !ruby/object:Gem::Version
118
121
  version: '0'
119
122
  requirements: []
120
123
  rubyforge_project:
121
- rubygems_version: 2.2.1
124
+ rubygems_version: 2.2.2
122
125
  signing_key:
123
126
  specification_version: 4
124
127
  summary: Data validation utilities
125
128
  test_files: []
129
+ has_rdoc: