dataops-testgen 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. dataops_testgen-2.2.0.dist-info/LICENSE +203 -0
  2. dataops_testgen-2.2.0.dist-info/METADATA +287 -0
  3. dataops_testgen-2.2.0.dist-info/NOTICE +5 -0
  4. dataops_testgen-2.2.0.dist-info/RECORD +270 -0
  5. dataops_testgen-2.2.0.dist-info/WHEEL +5 -0
  6. dataops_testgen-2.2.0.dist-info/entry_points.txt +2 -0
  7. dataops_testgen-2.2.0.dist-info/top_level.txt +1 -0
  8. testgen/__init__.py +0 -0
  9. testgen/__main__.py +770 -0
  10. testgen/commands/__init__.py +0 -0
  11. testgen/commands/queries/__init__.py +0 -0
  12. testgen/commands/queries/execute_cat_tests_query.py +95 -0
  13. testgen/commands/queries/execute_tests_query.py +160 -0
  14. testgen/commands/queries/generate_tests_query.py +94 -0
  15. testgen/commands/queries/profiling_query.py +366 -0
  16. testgen/commands/queries/test_parameter_validation_query.py +88 -0
  17. testgen/commands/run_execute_cat_tests.py +162 -0
  18. testgen/commands/run_execute_tests.py +168 -0
  19. testgen/commands/run_generate_tests.py +107 -0
  20. testgen/commands/run_get_entities.py +122 -0
  21. testgen/commands/run_launch_db_config.py +84 -0
  22. testgen/commands/run_observability_exporter.py +330 -0
  23. testgen/commands/run_profiling_bridge.py +495 -0
  24. testgen/commands/run_quick_start.py +168 -0
  25. testgen/commands/run_setup_profiling_tools.py +96 -0
  26. testgen/commands/run_test_definition.py +146 -0
  27. testgen/commands/run_test_parameter_validation.py +135 -0
  28. testgen/commands/run_upgrade_db_config.py +156 -0
  29. testgen/common/__init__.py +8 -0
  30. testgen/common/clean_sql.py +53 -0
  31. testgen/common/credentials.py +25 -0
  32. testgen/common/database/__init__.py +0 -0
  33. testgen/common/database/database_service.py +629 -0
  34. testgen/common/database/flavor/__init__.py +0 -0
  35. testgen/common/database/flavor/flavor_service.py +75 -0
  36. testgen/common/database/flavor/mssql_flavor_service.py +34 -0
  37. testgen/common/database/flavor/postgresql_flavor_service.py +5 -0
  38. testgen/common/database/flavor/redshift_flavor_service.py +22 -0
  39. testgen/common/database/flavor/snowflake_flavor_service.py +69 -0
  40. testgen/common/database/flavor/trino_flavor_service.py +21 -0
  41. testgen/common/date_service.py +68 -0
  42. testgen/common/display_service.py +85 -0
  43. testgen/common/docker_service.py +76 -0
  44. testgen/common/encrypt.py +55 -0
  45. testgen/common/get_pipeline_parms.py +57 -0
  46. testgen/common/logs.py +79 -0
  47. testgen/common/process_service.py +62 -0
  48. testgen/common/read_file.py +69 -0
  49. testgen/settings.py +440 -0
  50. testgen/template/dbsetup/010_create_base_schema.sql +2 -0
  51. testgen/template/dbsetup/020_create_standard_functions_sprocs.sql +179 -0
  52. testgen/template/dbsetup/030_initialize_new_schema_structure.sql +735 -0
  53. testgen/template/dbsetup/040_populate_new_schema_project.sql +59 -0
  54. testgen/template/dbsetup/050_populate_new_schema_metadata.sql +1517 -0
  55. testgen/template/dbsetup/060_create_standard_views.sql +248 -0
  56. testgen/template/dbsetup/070_create_default_users.sql +17 -0
  57. testgen/template/dbsetup/075_grant_role_rights.sql +43 -0
  58. testgen/template/dbsetup/080_set_current_revision.sql +5 -0
  59. testgen/template/dbupgrade/0100_incremental_upgrade.sql +5 -0
  60. testgen/template/dbupgrade/0101_incremental_upgrade.sql +15 -0
  61. testgen/template/dbupgrade/0102_incremental_upgrade.sql +4 -0
  62. testgen/template/dbupgrade/0103_incremental_upgrade.sql +22 -0
  63. testgen/template/dbupgrade/0104_incremental_upgrade.sql +44 -0
  64. testgen/template/dbupgrade/0105_incremental_upgrade.sql +1 -0
  65. testgen/template/dbupgrade/0106_incremental_upgrade.sql +5 -0
  66. testgen/template/dbupgrade/0107_incremental_upgrade.sql +3 -0
  67. testgen/template/dbupgrade_helpers/get_tg_revision.sql +2 -0
  68. testgen/template/exec_cat_tests/ex_cat_build_agg_table_tests.sql +116 -0
  69. testgen/template/exec_cat_tests/ex_cat_get_distinct_tables.sql +11 -0
  70. testgen/template/exec_cat_tests/ex_cat_results_parse.sql +69 -0
  71. testgen/template/exec_cat_tests/ex_cat_retrieve_agg_test_parms.sql +6 -0
  72. testgen/template/exec_cat_tests/ex_cat_test_query.sql +8 -0
  73. testgen/template/execution/ex_finalize_test_run_results.sql +37 -0
  74. testgen/template/execution/ex_get_tests_non_cat.sql +47 -0
  75. testgen/template/execution/ex_update_test_record_in_testrun_table.sql +27 -0
  76. testgen/template/execution/ex_write_test_record_to_testrun_table.sql +6 -0
  77. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_no_drops_generic.sql +48 -0
  78. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_num_incr_generic.sql +34 -0
  79. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_above_generic.sql +49 -0
  80. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_within_generic.sql +49 -0
  81. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql +49 -0
  82. testgen/template/flavors/generic/exec_query_tests/ex_custom_query_generic.sql +39 -0
  83. testgen/template/flavors/generic/exec_query_tests/ex_data_match_2way_generic.sql +58 -0
  84. testgen/template/flavors/generic/exec_query_tests/ex_data_match_generic.sql +44 -0
  85. testgen/template/flavors/generic/exec_query_tests/ex_prior_match_generic.sql +37 -0
  86. testgen/template/flavors/generic/exec_query_tests/ex_relative_entropy_generic.sql +53 -0
  87. testgen/template/flavors/generic/exec_query_tests/ex_window_match_no_drops_generic.sql +46 -0
  88. testgen/template/flavors/generic/exec_query_tests/ex_window_match_same_generic.sql +59 -0
  89. testgen/template/flavors/generic/profiling/contingency_counts.sql +3 -0
  90. testgen/template/flavors/generic/validate_tests/ex_get_project_column_list_generic.sql +3 -0
  91. testgen/template/flavors/mssql/exec_query_tests/ex_relative_entropy_mssql.sql +53 -0
  92. testgen/template/flavors/mssql/profiling/project_ddf_query_mssql.sql +35 -0
  93. testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml +246 -0
  94. testgen/template/flavors/mssql/profiling/project_secondary_profiling_query_mssql.sql +36 -0
  95. testgen/template/flavors/mssql/setup_profiling_tools/00_drop_existing_functions_mssql.sql +8 -0
  96. testgen/template/flavors/mssql/setup_profiling_tools/01_create_functions_mssql.sql +12 -0
  97. testgen/template/flavors/mssql/setup_profiling_tools/02_create_functions_mssql.sql +54 -0
  98. testgen/template/flavors/mssql/setup_profiling_tools/create_qc_schema_mssql.sql +4 -0
  99. testgen/template/flavors/mssql/setup_profiling_tools/grant_execute_privileges_mssql.sql +1 -0
  100. testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_no_drops_postgresql.sql +46 -0
  101. testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_same_postgresql.sql +59 -0
  102. testgen/template/flavors/postgresql/profiling/project_ddf_query_postgresql.sql +42 -0
  103. testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml +225 -0
  104. testgen/template/flavors/postgresql/profiling/project_secondary_profiling_query_postgresql.sql +28 -0
  105. testgen/template/flavors/postgresql/setup_profiling_tools/create_functions_postgresql.sql +157 -0
  106. testgen/template/flavors/postgresql/setup_profiling_tools/create_qc_schema_postgresql.sql +1 -0
  107. testgen/template/flavors/postgresql/setup_profiling_tools/grant_execute_privileges_postgresql.sql +2 -0
  108. testgen/template/flavors/redshift/profiling/project_ddf_query_redshift.sql +38 -0
  109. testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml +221 -0
  110. testgen/template/flavors/redshift/profiling/project_secondary_profiling_query_redshift.sql +29 -0
  111. testgen/template/flavors/redshift/setup_profiling_tools/create_functions_redshift.sql +115 -0
  112. testgen/template/flavors/redshift/setup_profiling_tools/create_qc_schema_redshift.sql +1 -0
  113. testgen/template/flavors/redshift/setup_profiling_tools/grant_execute_privileges_redshift.sql +2 -0
  114. testgen/template/flavors/snowflake/profiling/project_ddf_query_snowflake.sql +38 -0
  115. testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml +220 -0
  116. testgen/template/flavors/snowflake/profiling/project_secondary_profiling_query_snowflake.sql +29 -0
  117. testgen/template/flavors/snowflake/setup_profiling_tools/create_functions_snowflake.sql +69 -0
  118. testgen/template/flavors/snowflake/setup_profiling_tools/create_qc_schema_snowflake.sql +1 -0
  119. testgen/template/flavors/snowflake/setup_profiling_tools/grant_execute_privileges_snowflake.sql +6 -0
  120. testgen/template/flavors/trino/profiling/project_profiling_query_trino.yaml +219 -0
  121. testgen/template/flavors/trino/setup_profiling_tools/create_functions_trino.sql +92 -0
  122. testgen/template/flavors/trino/setup_profiling_tools/create_qc_schema_trino.sql +1 -0
  123. testgen/template/gen_funny_cat_tests/gen_test_constant.sql +104 -0
  124. testgen/template/gen_funny_cat_tests/gen_test_distinct_value_ct.sql +98 -0
  125. testgen/template/gen_funny_cat_tests/gen_test_row_ct.sql +57 -0
  126. testgen/template/gen_funny_cat_tests/gen_test_row_ct_pct.sql +59 -0
  127. testgen/template/generation/gen_delete_old_tests.sql +5 -0
  128. testgen/template/generation/gen_insert_test_suite.sql +5 -0
  129. testgen/template/generation/gen_retrieve_or_insert_test_suite.sql +58 -0
  130. testgen/template/generation/gen_standard_test_type_list.sql +13 -0
  131. testgen/template/generation/gen_standard_tests.sql +48 -0
  132. testgen/template/get_entities/get_connection.sql +21 -0
  133. testgen/template/get_entities/get_connections_list.sql +9 -0
  134. testgen/template/get_entities/get_latest.sql +4 -0
  135. testgen/template/get_entities/get_profile.sql +12 -0
  136. testgen/template/get_entities/get_profile_info.sql +17 -0
  137. testgen/template/get_entities/get_profile_list.sql +17 -0
  138. testgen/template/get_entities/get_profile_screen.sql +275 -0
  139. testgen/template/get_entities/get_project_list.sql +6 -0
  140. testgen/template/get_entities/get_table_group_list.sql +10 -0
  141. testgen/template/get_entities/get_test_generation_list.sql +18 -0
  142. testgen/template/get_entities/get_test_info.sql +41 -0
  143. testgen/template/get_entities/get_test_results_for_run_cli.sql +16 -0
  144. testgen/template/get_entities/get_test_run_list.sql +24 -0
  145. testgen/template/get_entities/get_test_suite.sql +13 -0
  146. testgen/template/get_entities/get_test_suite_list.sql +18 -0
  147. testgen/template/get_entities/list_test_types.sql +4 -0
  148. testgen/template/observability/get_event_data.sql +23 -0
  149. testgen/template/observability/get_test_results.sql +41 -0
  150. testgen/template/observability/update_test_results_exported_to_observability.sql +12 -0
  151. testgen/template/parms/parms_profiling.sql +34 -0
  152. testgen/template/parms/parms_test_execution.sql +13 -0
  153. testgen/template/parms/parms_test_gen.sql +23 -0
  154. testgen/template/profiling/contingency_columns.sql +7 -0
  155. testgen/template/profiling/datatype_suggestions.sql +56 -0
  156. testgen/template/profiling/functional_datatype.sql +523 -0
  157. testgen/template/profiling/functional_tabletype_stage.sql +48 -0
  158. testgen/template/profiling/functional_tabletype_update.sql +8 -0
  159. testgen/template/profiling/pii_flag.sql +133 -0
  160. testgen/template/profiling/profile_anomalies_screen_column.sql +22 -0
  161. testgen/template/profiling/profile_anomalies_screen_multi_column.sql +58 -0
  162. testgen/template/profiling/profile_anomalies_screen_table.sql +22 -0
  163. testgen/template/profiling/profile_anomalies_screen_table_dates.sql +30 -0
  164. testgen/template/profiling/profile_anomalies_screen_variants.sql +40 -0
  165. testgen/template/profiling/profile_anomaly_types_get.sql +3 -0
  166. testgen/template/profiling/project_get_table_sample_count.sql +22 -0
  167. testgen/template/profiling/project_profile_run_record_insert.sql +8 -0
  168. testgen/template/profiling/project_profile_run_record_update.sql +5 -0
  169. testgen/template/profiling/project_profile_run_record_update_status.sql +5 -0
  170. testgen/template/profiling/project_update_profile_results_to_estimates.sql +32 -0
  171. testgen/template/profiling/refresh_anomalies.sql +33 -0
  172. testgen/template/profiling/refresh_data_chars_from_profiling.sql +156 -0
  173. testgen/template/profiling/secondary_profiling_columns.sql +12 -0
  174. testgen/template/profiling/secondary_profiling_delete.sql +4 -0
  175. testgen/template/profiling/secondary_profiling_update.sql +18 -0
  176. testgen/template/quick_start/populate_target_data.sql +1077 -0
  177. testgen/template/quick_start/recreate_target_data_schema.sql +167 -0
  178. testgen/template/quick_start/update_target_data.sql +100 -0
  179. testgen/template/updates/create_tmp_test_definition.sql +19 -0
  180. testgen/template/updates/get_test_def_parms.sql +38 -0
  181. testgen/template/updates/populate_stg_test_definitions.sql +184 -0
  182. testgen/template/validate_tests/ex_disable_tests_test_definitions.sql +5 -0
  183. testgen/template/validate_tests/ex_flag_tests_test_definitions.sql +64 -0
  184. testgen/template/validate_tests/ex_get_project_column_list_generic.sql +3 -0
  185. testgen/template/validate_tests/ex_get_test_column_list_tg.sql +65 -0
  186. testgen/template/validate_tests/ex_write_test_val_errors.sql +22 -0
  187. testgen/ui/__init__.py +0 -0
  188. testgen/ui/app.py +98 -0
  189. testgen/ui/assets/dk_logo.svg +46 -0
  190. testgen/ui/assets/question_mark.png +0 -0
  191. testgen/ui/assets/scripts.js +68 -0
  192. testgen/ui/assets/style.css +140 -0
  193. testgen/ui/bootstrap.py +109 -0
  194. testgen/ui/components/__init__.py +0 -0
  195. testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fBBc4.woff2 +0 -0
  196. testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fChc4EsA.woff2 +0 -0
  197. testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu4mxK.woff2 +0 -0
  198. testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu7GxKOzY.woff2 +0 -0
  199. testgen/ui/components/frontend/css/material-symbols-rounded.css +24 -0
  200. testgen/ui/components/frontend/css/material-symbols-rounded.woff2 +0 -0
  201. testgen/ui/components/frontend/css/roboto-font-faces.css +35 -0
  202. testgen/ui/components/frontend/css/shared.css +36 -0
  203. testgen/ui/components/frontend/img/dk_logo.svg +46 -0
  204. testgen/ui/components/frontend/index.html +17 -0
  205. testgen/ui/components/frontend/js/components/breadcrumbs.js +86 -0
  206. testgen/ui/components/frontend/js/components/button.js +66 -0
  207. testgen/ui/components/frontend/js/components/location.js +62 -0
  208. testgen/ui/components/frontend/js/components/select.js +75 -0
  209. testgen/ui/components/frontend/js/components/sidebar.js +358 -0
  210. testgen/ui/components/frontend/js/main.js +99 -0
  211. testgen/ui/components/frontend/js/streamlit.js +19 -0
  212. testgen/ui/components/frontend/js/van.min.js +1 -0
  213. testgen/ui/components/utils/__init__.py +0 -0
  214. testgen/ui/components/utils/callbacks.py +51 -0
  215. testgen/ui/components/utils/component.py +13 -0
  216. testgen/ui/components/widgets/__init__.py +6 -0
  217. testgen/ui/components/widgets/breadcrumbs.py +32 -0
  218. testgen/ui/components/widgets/location.py +65 -0
  219. testgen/ui/components/widgets/modal.py +97 -0
  220. testgen/ui/components/widgets/sidebar.py +69 -0
  221. testgen/ui/navigation/__init__.py +0 -0
  222. testgen/ui/navigation/menu.py +42 -0
  223. testgen/ui/navigation/page.py +20 -0
  224. testgen/ui/navigation/router.py +63 -0
  225. testgen/ui/queries/__init__.py +0 -0
  226. testgen/ui/queries/authentication_queries.py +47 -0
  227. testgen/ui/queries/connection_queries.py +121 -0
  228. testgen/ui/queries/profiling_queries.py +148 -0
  229. testgen/ui/queries/project_queries.py +9 -0
  230. testgen/ui/queries/table_group_queries.py +186 -0
  231. testgen/ui/queries/test_definition_queries.py +270 -0
  232. testgen/ui/queries/test_run_queries.py +32 -0
  233. testgen/ui/queries/test_suite_queries.py +145 -0
  234. testgen/ui/scripts/__init__.py +0 -0
  235. testgen/ui/scripts/patch_streamlit.py +111 -0
  236. testgen/ui/services/__init__.py +0 -0
  237. testgen/ui/services/authentication_service.py +119 -0
  238. testgen/ui/services/connection_service.py +220 -0
  239. testgen/ui/services/database_service.py +282 -0
  240. testgen/ui/services/form_service.py +1008 -0
  241. testgen/ui/services/javascript_service.py +44 -0
  242. testgen/ui/services/query_service.py +316 -0
  243. testgen/ui/services/string_service.py +12 -0
  244. testgen/ui/services/table_group_service.py +130 -0
  245. testgen/ui/services/test_definition_service.py +117 -0
  246. testgen/ui/services/test_run_service.py +13 -0
  247. testgen/ui/services/test_suite_service.py +76 -0
  248. testgen/ui/services/toolbar_service.py +77 -0
  249. testgen/ui/session.py +46 -0
  250. testgen/ui/views/__init__.py +0 -0
  251. testgen/ui/views/app_log_modal.py +92 -0
  252. testgen/ui/views/connections.py +72 -0
  253. testgen/ui/views/connections_base.py +367 -0
  254. testgen/ui/views/login.py +40 -0
  255. testgen/ui/views/not_found.py +16 -0
  256. testgen/ui/views/overview.py +34 -0
  257. testgen/ui/views/profiling_anomalies.py +501 -0
  258. testgen/ui/views/profiling_details.py +335 -0
  259. testgen/ui/views/profiling_modal.py +40 -0
  260. testgen/ui/views/profiling_results.py +206 -0
  261. testgen/ui/views/profiling_summary.py +177 -0
  262. testgen/ui/views/project_settings.py +74 -0
  263. testgen/ui/views/table_groups.py +530 -0
  264. testgen/ui/views/test_definitions.py +1020 -0
  265. testgen/ui/views/test_results.py +908 -0
  266. testgen/ui/views/test_runs.py +195 -0
  267. testgen/ui/views/test_suites.py +545 -0
  268. testgen/utils/__init__.py +0 -0
  269. testgen/utils/plugins.py +17 -0
  270. testgen/utils/singleton.py +14 -0
@@ -0,0 +1,133 @@
1
+ -- Primary Screen: Alpha
2
+ WITH screen
3
+ AS ( SELECT id AS profile_results_id,
4
+ table_name, column_name,
5
+ CASE
6
+ WHEN functional_data_type IN ('Person Full Name', 'Person Given Name', 'Person Last Name') THEN 'B/NAME/Individual'
7
+
8
+ WHEN LOWER(column_name) SIMILAR TO '%(maiden|surname)%' THEN 'B/NAME/Individual'
9
+
10
+ WHEN functional_data_type = 'Historical Date'
11
+ AND LOWER(column_name) SIMILAR TO '%(dob|birth)%' THEN 'B/DEMO/Birthdate'
12
+
13
+ WHEN LOWER(column_name)
14
+ SIMILAR TO '%(nationality|race|ethnicity|gender|sex|marital)%' THEN 'B/DEMO/Demographic'
15
+
16
+ WHEN LOWER(column_name) ILIKE '%med%record%' THEN 'A/DEMO/Medical'
17
+
18
+ WHEN LOWER(column_name) SIMILAR TO '%(password|pwd|auth)%' THEN 'A/ID/Security'
19
+
20
+ WHEN max_length < 10
21
+ AND avg_embedded_spaces < 0.1
22
+ AND (column_name ILIKE 'pin%' OR column_name ILIKE '%pin') THEN 'A/ID/Security'
23
+
24
+ WHEN std_pattern_match = 'SSN'
25
+ AND LOWER(column_name) SIMILAR TO '%(ss|soc|sec)%' THEN 'A/ID/SSN'
26
+
27
+ WHEN TRIM(fn_parsefreq(top_patterns, 1, 2))
28
+ IN ('NNNNNNNNN', 'NNN-NN-NNNN', 'NNN NN NNNN')
29
+ AND LEFT(min_text, 1) = '9'
30
+ AND avg_length BETWEEN 8.8 AND 11.2
31
+ AND LOWER(column_name) SIMILAR TO '%(tax|tin|fed)%' THEN 'A/ID/Tax'
32
+
33
+ WHEN TRIM(fn_parsefreq(top_patterns, 1, 2))
34
+ IN ('NNNNNNNNN', 'ANNNNNNNN')
35
+ AND avg_length BETWEEN 8.8 AND 9.2
36
+ AND LOWER(column_name) SIMILAR TO '%(passp|pp)%' THEN 'A/ID/Passport'
37
+
38
+ WHEN std_pattern_match = 'CREDIT_CARD'
39
+ AND LOWER(column_name) SIMILAR TO '%(credit|card|cc|acct|account)%' THEN 'A/ID/Credit'
40
+
41
+ WHEN TRIM(fn_parsefreq(top_patterns, 1, 2))
42
+ ILIKE '[Aa]{6}[A-Za-z0-9]{2}N{0,3}'
43
+ AND TRIM(fn_parsefreq(top_patterns, 2, 2))
44
+ ILIKE '[Aa]{6}[A-Za-z0-9]{2}N{0,3}'
45
+ AND avg_length BETWEEN 7.8 AND 11.2
46
+ AND LOWER(column_name) SIMILAR TO '%(swift|bic)%' THEN 'A/ID/Bank'
47
+
48
+ WHEN max_length <= 34
49
+ AND UPPER(LEFT(TRIM(fn_parsefreq(top_patterns, 1, 2)), 2))
50
+ = 'AA'
51
+ AND (column_name ILIKE 'iban%' OR column_name ILIKE '%iban') THEN 'A/ID/Bank'
52
+
53
+ WHEN avg_length BETWEEN 5 AND 20
54
+ AND LOWER(column_name) SIMILAR TO '%(bank|checking|saving|debit)%' THEN 'A/ID/Bank'
55
+
56
+ WHEN avg_embedded_spaces < 0.5
57
+ AND avg_length < 20
58
+ AND (LOWER(column_name) SIMILAR TO '%(dr|op)%lic%'
59
+ OR LOWER(column_name) SIMILAR TO '%(driver|license|operator)%') THEN 'A/ID/License'
60
+
61
+ WHEN LOWER(column_name) IN ('patient_id', 'pat_id') THEN 'A/ID/Medical'
62
+
63
+ WHEN LOWER(column_name) IN ('member_id') THEN 'B/ID/Commercial'
64
+
65
+ END AS pii_flag
66
+
67
+ FROM profile_results p
68
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
69
+ AND general_type = 'A' )
70
+ UPDATE profile_results
71
+ SET pii_flag = screen.pii_flag
72
+ FROM screen
73
+ WHERE screen.pii_flag > ''
74
+ AND profile_results.id = screen.profile_results_id;
75
+
76
+ -- Secondary Screen - Alpha
77
+ WITH table_pii_counts
78
+ AS ( SELECT table_name, COUNT(pii_flag) AS pii_ct
79
+ FROM profile_results
80
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
81
+ GROUP BY table_name ),
82
+ screen
83
+ AS ( SELECT id AS profile_results_id,
84
+ p.table_name, p.column_name,
85
+ CASE
86
+ WHEN functional_data_type = 'Email' THEN 'B/CONTACT/Email'
87
+ WHEN functional_data_type IN ('Address', 'City', 'State', 'Zip')
88
+ THEN 'B/CONTACT/Address'
89
+ WHEN functional_data_type = 'Phone'
90
+ THEN 'B/CONTACT/Phone'
91
+
92
+ WHEN LOWER(column_name) SIMILAR TO '%(insur|health|med|patient)%'
93
+ THEN 'A/DEMO/Medical'
94
+
95
+ WHEN LOWER(column_name) SIMILAR TO '%(vehicle|vin|auto|car)%'
96
+ AND avg_length BETWEEN 16 AND 18
97
+ AND max_length < 20
98
+ AND TRIM(fn_parsefreq(top_patterns, 1, 2))
99
+ = 'AAANAAAAANNNNNNNN' THEN 'B/ID/Auto'
100
+
101
+ WHEN LOWER(column_name) SIMILAR TO
102
+ '%(voice|fingerprint|retina|auth|biometric|iris|face_recog)%'
103
+ THEN 'A/ID/Security'
104
+
105
+ WHEN LOWER(column_name) = 'dna'
106
+ OR LOWER(column_name) ILIKE '%\_dna'
107
+ OR LOWER(column_name) ILIKE 'dna\_%'
108
+ THEN 'A/DEMO/Demographic'
109
+
110
+ WHEN column_name ILIKE '%rout%'
111
+ AND avg_length BETWEEN 8.8 AND 11.2
112
+ AND TRIM(fn_parsefreq(top_patterns, 1, 2))
113
+ IN ('NNNNNNNNN', 'NNNN-NNNN-N') THEN 'C/ID/Bank'
114
+
115
+ WHEN LOWER(column_name) SIMILAR TO '%(salary|income|wage)%'
116
+ THEN 'B/DEMO/Financial'
117
+
118
+ WHEN LOWER(column_name) SIMILAR TO '%(user_id|userid)%'
119
+ THEN 'C/ID/Security'
120
+
121
+ END AS pii_flag
122
+ FROM profile_results p
123
+ INNER JOIN table_pii_counts t
124
+ ON (p.table_name = t.table_name)
125
+ WHERE p.profile_run_id = '{PROFILE_RUN_ID}'
126
+ AND p.general_type = 'A'
127
+ AND p.pii_flag IS NULL
128
+ AND t.pii_ct > 1 )
129
+ UPDATE profile_results
130
+ SET pii_flag = screen.pii_flag
131
+ FROM screen
132
+ WHERE screen.pii_flag > ''
133
+ AND profile_results.id = screen.profile_results_id;
@@ -0,0 +1,22 @@
1
+ INSERT INTO profile_anomaly_results
2
+ (project_code, table_groups_id, profile_run_id, anomaly_id,
3
+ schema_name, table_name, column_name, column_type, detail)
4
+ SELECT p.project_code,
5
+ p.table_groups_id,
6
+ p.profile_run_id,
7
+ '{ANOMALY_ID}' as anomaly_id,
8
+ p.schema_name,
9
+ p.table_name,
10
+ p.column_name,
11
+ p.column_type,
12
+ {DETAIL_EXPRESSION} AS detail
13
+ FROM profile_results p
14
+ LEFT JOIN v_inactive_anomalies i
15
+ ON (p.table_groups_id = i.table_groups_id
16
+ AND p.schema_name = i.schema_name
17
+ AND p.table_name = i.table_name
18
+ AND p.column_name = i.column_name
19
+ AND '{ANOMALY_ID}' = i.anomaly_id)
20
+ WHERE p.profile_run_id = '{PROFILE_RUN_ID}'::UUID
21
+ AND i.anomaly_id IS NULL
22
+ AND {ANOMALY_CRITERIA};
@@ -0,0 +1,58 @@
1
+ WITH mults AS ( SELECT p.project_code,
2
+ p.table_groups_id,
3
+ p.schema_name,
4
+ p.column_name,
5
+ COUNT(*) AS column_ct,
6
+ COUNT(DISTINCT p.column_type) AS type_ct,
7
+ COUNT(DISTINCT p.general_type) AS general_type_ct,
8
+ MIN(p.column_type::TEXT) AS min_type,
9
+ MAX(p.column_type::TEXT) AS max_type,
10
+ MIN(p.distinct_pattern_ct) AS min_pattern_ct,
11
+ MAX(p.distinct_pattern_ct) AS max_pattern_ct,
12
+ SUM(p.distinct_pattern_ct) AS sum_pattern_ct,
13
+ STRING_AGG(table_name, ', ' order by table_name) as table_list,
14
+ MAX(RIGHT(REPEAT('0', 20) || SPLIT_PART(p.top_patterns, '|', 1), 20) || '|' || SPLIT_PART(p.top_patterns, '|', 2) )as very_top_pattern
15
+ FROM profile_results p
16
+ WHERE p.profile_run_id = '{PROFILE_RUN_ID}'::UUID
17
+ GROUP BY p.project_code, p.table_groups_id, schema_name, p.column_name
18
+ HAVING COUNT(*) > 1 ),
19
+ subset AS
20
+ (
21
+ SELECT p.project_code,
22
+ p.table_groups_id,
23
+ p.profile_run_id,
24
+ '{ANOMALY_ID}' as anomaly_id,
25
+ p.schema_name,
26
+ p.table_name,
27
+ p.column_name,
28
+ p.column_type,
29
+ p.top_patterns,
30
+ ltrim(m.very_top_pattern, '0') as very_top_pattern,
31
+ m.table_list,
32
+ {DETAIL_EXPRESSION} AS detail
33
+ FROM profile_results p
34
+ INNER JOIN mults m
35
+ ON p.project_code = m.project_code
36
+ AND p.table_groups_id = m.table_groups_id
37
+ AND p.schema_name = m.schema_name
38
+ AND p.column_name = m.column_name
39
+ LEFT JOIN v_inactive_anomalies i
40
+ ON (p.table_groups_id = i.table_groups_id
41
+ AND p.schema_name = i.schema_name
42
+ AND p.table_name = i.table_name
43
+ AND p.column_name = i.column_name
44
+ AND '{ANOMALY_ID}' = i.anomaly_id)
45
+ WHERE p.profile_run_id = '{PROFILE_RUN_ID}'::UUID
46
+ AND i.anomaly_id IS NULL
47
+ AND {ANOMALY_CRITERIA}
48
+ )
49
+ INSERT INTO profile_anomaly_results
50
+ (project_code, table_groups_id, profile_run_id, anomaly_id,
51
+ schema_name, table_name, column_name, column_type, detail)
52
+ SELECT project_code, table_groups_id, profile_run_id, anomaly_id,
53
+ schema_name, '(multi-table)' as table_name,
54
+ column_name, '(multiple)' as column_type,
55
+ detail || ' , Tables: ' || table_list AS detail
56
+ FROM subset
57
+ GROUP BY project_code, table_groups_id, profile_run_id, anomaly_id,
58
+ schema_name, column_name, table_list, detail;
@@ -0,0 +1,22 @@
1
+ INSERT INTO profile_anomaly_results
2
+ (project_code, table_groups_id, profile_run_id, anomaly_id,
3
+ schema_name, table_name, column_name, detail, disposition)
4
+ SELECT p.project_code,
5
+ p.table_groups_id,
6
+ p.profile_run_id,
7
+ '{ANOMALY_ID}' as anomaly_id,
8
+ p.schema_name,
9
+ p.table_name,
10
+ '(Table)' as column_name,
11
+ {DETAIL_EXPRESSION} AS detail,
12
+ CASE WHEN i.anomaly_id IS NULL THEN NULL ELSE 'Inactive' END as disposition
13
+ FROM profile_results p
14
+ LEFT JOIN v_inactive_anomalies i
15
+ ON (p.table_groups_id = i.table_groups_id
16
+ AND p.schema_name = i.schema_name
17
+ AND p.table_name = i.table_name
18
+ AND '{ANOMALY_ID}' = i.anomaly_id)
19
+ WHERE p.profile_run_id = '{PROFILE_RUN_ID}'::UUID
20
+ GROUP BY p.project_code, p.table_groups_id, p.profile_run_id,
21
+ p.schema_name, p.table_name
22
+ HAVING {ANOMALY_CRITERIA};
@@ -0,0 +1,30 @@
1
+ INSERT INTO profile_anomaly_results
2
+ (project_code, table_groups_id, profile_run_id, anomaly_id,
3
+ schema_name, table_name, column_name, detail)
4
+ SELECT p.project_code,
5
+ p.table_groups_id,
6
+ p.profile_run_id,
7
+ '{ANOMALY_ID}' as anomaly_id,
8
+ p.schema_name,
9
+ p.table_name,
10
+ CASE
11
+ WHEN COUNT(p.column_name) > 2 THEN '(multi-column)'
12
+ ELSE STRING_AGG(p.column_name, ', ' ORDER BY p.position)
13
+ END as column_name,
14
+ {DETAIL_EXPRESSION}
15
+ || CASE
16
+ WHEN COUNT(p.column_name) > 2 THEN ', Columns: ' || STRING_AGG(p.column_name, ', ' ORDER BY p.position)
17
+ ELSE ''
18
+ END as detail
19
+ FROM profile_results p
20
+ LEFT JOIN v_inactive_anomalies i
21
+ ON (p.table_groups_id = i.table_groups_id
22
+ AND p.schema_name = i.schema_name
23
+ AND p.table_name = i.table_name
24
+ AND '{ANOMALY_ID}' = i.anomaly_id)
25
+ WHERE p.profile_run_id = '{PROFILE_RUN_ID}'::UUID
26
+ AND i.anomaly_id IS NULL
27
+ AND p.general_type = 'D'
28
+ GROUP BY p.project_code, p.table_groups_id, p.profile_run_id,
29
+ p.schema_name, p.table_name, i.anomaly_id
30
+ HAVING {ANOMALY_CRITERIA};
@@ -0,0 +1,40 @@
1
+ INSERT INTO profile_anomaly_results
2
+ (project_code, table_groups_id, profile_run_id, anomaly_id,
3
+ schema_name, table_name, column_name, column_type, detail)
4
+ WITH all_matches
5
+ AS ( SELECT p.project_code,
6
+ p.table_groups_id,
7
+ p.profile_run_id,
8
+ p.schema_name,
9
+ p.table_name,
10
+ p.column_name,
11
+ p.column_type,
12
+ fn_extract_distinct_items(STRING_AGG(fn_extract_intersecting_items(LOWER(fn_extract_top_values(p.top_freq_values)),
13
+ v.check_values, '|'),
14
+ '|'),
15
+ '|') AS intersect_list
16
+ FROM profile_results p
17
+ CROSS JOIN variant_codings v
18
+ LEFT JOIN v_inactive_anomalies i
19
+ ON (p.table_groups_id = i.table_groups_id
20
+ AND p.schema_name = i.schema_name
21
+ AND p.table_name = i.table_name
22
+ AND p.column_name = i.column_name
23
+ AND '{ANOMALY_ID}' = i.anomaly_id)
24
+ WHERE p.profile_run_id = '{PROFILE_RUN_ID}'::UUID
25
+ AND {ANOMALY_CRITERIA}
26
+ AND p.top_freq_values > ''
27
+ AND i.anomaly_id IS NULL
28
+ AND fn_count_intersecting_items(LOWER(fn_extract_top_values(p.top_freq_values)), v.check_values, '|') > 1
29
+ GROUP BY p.project_code,
30
+ p.table_groups_id,
31
+ p.profile_run_id,
32
+ p.schema_name,
33
+ p.table_name,
34
+ p.column_name,
35
+ p.column_type )
36
+ SELECT project_code, table_groups_id, profile_run_id,
37
+ '{ANOMALY_ID}' AS anomaly_id,
38
+ schema_name, table_name, column_name, column_type,
39
+ {DETAIL_EXPRESSION} AS detail
40
+ FROM all_matches;
@@ -0,0 +1,3 @@
1
+ SELECT id, anomaly_type, data_object, anomaly_criteria, detail_expression
2
+ FROM profile_anomaly_types t
3
+ ORDER BY id;
@@ -0,0 +1,22 @@
1
+ SELECT '{SAMPLING_TABLE}' as schema_table,
2
+ CASE
3
+ WHEN count(*) <= {PROFILE_SAMPLE_MIN_COUNT}
4
+ THEN -1
5
+ ELSE
6
+ CASE
7
+ WHEN ROUND(CAST({PROFILE_SAMPLE_PERCENT} as FLOAT) * CAST(COUNT(*) as FLOAT) / 100.0, 0) > {PROFILE_SAMPLE_MIN_COUNT}
8
+ THEN LEAST(999000, ROUND(CAST({PROFILE_SAMPLE_PERCENT} as FLOAT) * CAST(COUNT(*) as FLOAT) / 100.0, 0))
9
+ ELSE {PROFILE_SAMPLE_MIN_COUNT}
10
+ END
11
+ END as sample_count,
12
+ CASE
13
+ WHEN count(*) <= {PROFILE_SAMPLE_MIN_COUNT}
14
+ THEN 1
15
+ ELSE (CAST(COUNT(*) as FLOAT)
16
+ / CASE
17
+ WHEN ROUND(CAST({PROFILE_SAMPLE_PERCENT} as FLOAT) * CAST(COUNT(*) as FLOAT) / 100.0, 0) > {PROFILE_SAMPLE_MIN_COUNT}
18
+ THEN LEAST(999000, ROUND(CAST({PROFILE_SAMPLE_PERCENT} as FLOAT) * CAST(COUNT(*) as FLOAT) / 100.0, 0))
19
+ ELSE {PROFILE_SAMPLE_MIN_COUNT}
20
+ END )
21
+ END as sample_ratio
22
+ from {SAMPLING_TABLE};
@@ -0,0 +1,8 @@
1
+ INSERT INTO profiling_runs (id, project_code, connection_id, table_groups_id, profiling_starttime, process_id)
2
+ (SELECT '{PROFILE_RUN_ID}' :: UUID as id,
3
+ '{PROJECT_CODE}' as project_code,
4
+ {CONNECTION_ID} as connection_id,
5
+ '{TABLE_GROUPS_ID}' :: UUID as table_groups_id,
6
+ '{RUN_DATE}' as profiling_starttime,
7
+ '{PROCESS_ID}' as process_id
8
+ );
@@ -0,0 +1,5 @@
1
+ UPDATE profiling_runs
2
+ SET status = CASE WHEN length('{EXCEPTION_MESSAGE}') = 0 then 'Complete' else 'Error' end,
3
+ profiling_endtime = '{NOW}',
4
+ log_message = '{EXCEPTION_MESSAGE}'
5
+ where id = '{PROFILE_RUN_ID}' :: UUID;
@@ -0,0 +1,5 @@
1
+ UPDATE profiling_runs
2
+ SET status = '{STATUS}',
3
+ profiling_endtime = '{NOW}',
4
+ log_message = '{EXCEPTION_MESSAGE}'
5
+ where id = '{PROFILE_RUN_ID}' :: UUID;
@@ -0,0 +1,32 @@
1
+
2
+ -- Update sampled profile results for given profile_run to estimated values
3
+ -- We don't update distinct counts, because these should already be representative
4
+ -- in a random sample.
5
+
6
+ update profile_results
7
+ set sample_ratio = {PROFILE_SAMPLE_RATIO},
8
+ record_ct = ROUND(record_ct * {PROFILE_SAMPLE_RATIO}, 0),
9
+ value_ct = ROUND(value_ct * {PROFILE_SAMPLE_RATIO}, 0),
10
+ -- distinct_value_ct = ROUND(record_ct * {PROFILE_SAMPLE_RATIO} *(distinct_value_ct::numeric/record_ct::numeric), 0),
11
+ null_value_ct = ROUND(null_value_ct * {PROFILE_SAMPLE_RATIO}, 0),
12
+ zero_value_ct = ROUND(zero_value_ct * {PROFILE_SAMPLE_RATIO}, 0),
13
+ lead_space_ct = ROUND(lead_space_ct * {PROFILE_SAMPLE_RATIO}, 0),
14
+ embedded_space_ct = ROUND(embedded_space_ct * {PROFILE_SAMPLE_RATIO}, 0),
15
+ includes_digit_ct = ROUND(includes_digit_ct * {PROFILE_SAMPLE_RATIO}, 0),
16
+ filled_value_ct = ROUND(filled_value_ct * {PROFILE_SAMPLE_RATIO}, 0),
17
+ numeric_ct = ROUND(numeric_ct * {PROFILE_SAMPLE_RATIO}, 0),
18
+ date_ct = ROUND(date_ct * {PROFILE_SAMPLE_RATIO}, 0),
19
+ before_1yr_date_ct = ROUND(before_1yr_date_ct * {PROFILE_SAMPLE_RATIO}, 0),
20
+ before_5yr_date_ct = ROUND(before_5yr_date_ct * {PROFILE_SAMPLE_RATIO}, 0),
21
+ before_20yr_date_ct = ROUND(before_20yr_date_ct * {PROFILE_SAMPLE_RATIO}, 0),
22
+ within_1yr_date_ct = ROUND(within_1yr_date_ct * {PROFILE_SAMPLE_RATIO}, 0),
23
+ within_1mo_date_ct = ROUND(within_1mo_date_ct * {PROFILE_SAMPLE_RATIO}, 0),
24
+ future_date_ct = ROUND(future_date_ct * {PROFILE_SAMPLE_RATIO}, 0),
25
+ boolean_true_ct = ROUND(boolean_true_ct * {PROFILE_SAMPLE_RATIO}, 0),
26
+ date_days_present = ROUND(date_days_present * {PROFILE_SAMPLE_RATIO}, 0)
27
+ where profile_run_id = '{PROFILE_RUN_ID}'
28
+ and schema_name = split_part('{SAMPLING_TABLE}', '.', 1)
29
+ and table_name = split_part('{SAMPLING_TABLE}', '.', 2)
30
+ and sample_ratio IS NULL;
31
+
32
+
@@ -0,0 +1,33 @@
1
+
2
+
3
+ WITH anomalies
4
+ AS ( SELECT profile_run_id,
5
+ COUNT(*) as anomaly_ct,
6
+ COUNT(DISTINCT schema_name || '.' || table_name) as anomaly_table_ct,
7
+ COUNT(DISTINCT schema_name || '.' || table_name || '.' || column_name) as anomaly_column_ct
8
+ FROM profile_anomaly_results
9
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'::UUID
10
+ GROUP BY profile_run_id ),
11
+ profiles
12
+ AS ( SELECT r.id as profile_run_id,
13
+ COUNT(DISTINCT p.schema_name || '.' || p.table_name) as table_ct,
14
+ COUNT(*) as column_ct
15
+ FROM profiling_runs r
16
+ INNER JOIN profile_results p
17
+ ON r.id = p.profile_run_id
18
+ WHERE r.id = '{PROFILE_RUN_ID}'::UUID
19
+ GROUP BY r.id ),
20
+ stats
21
+ AS ( SELECT p.profile_run_id, table_ct, column_ct,
22
+ a.anomaly_ct, a.anomaly_table_ct, a.anomaly_column_ct
23
+ FROM profiles p
24
+ LEFT JOIN anomalies a
25
+ ON (p.profile_run_id = a.profile_run_id) )
26
+ UPDATE profiling_runs
27
+ SET table_ct = stats.table_ct,
28
+ column_ct = stats.column_ct,
29
+ anomaly_ct = COALESCE(stats.anomaly_ct, 0),
30
+ anomaly_table_ct = COALESCE(stats.anomaly_table_ct, 0),
31
+ anomaly_column_ct = COALESCE(stats.anomaly_column_ct, 0)
32
+ FROM stats
33
+ WHERE profiling_runs.id = stats.profile_run_id ;
@@ -0,0 +1,156 @@
1
+ -- ==============================================================================
2
+ -- | Table Characteristics
3
+ -- ==============================================================================
4
+
5
+ -- Update existing records
6
+ WITH new_chars
7
+ AS ( SELECT p.table_groups_id,
8
+ p.schema_name, p.table_name, p.functional_table_type,
9
+ run_date AS add_date,
10
+ MAX(record_ct) AS record_ct,
11
+ COUNT(*) AS column_ct,
12
+ MAX(record_ct) * COUNT(*) AS data_point_ct
13
+ FROM v_latest_profile_results p
14
+ WHERE p.table_groups_id = '{TABLE_GROUPS_ID}'
15
+ GROUP BY p.table_groups_id,
16
+ p.schema_name, p.table_name, p.functional_table_type, run_date )
17
+ UPDATE data_table_chars
18
+ SET functional_table_type = n.functional_table_type,
19
+ record_ct = n.record_ct,
20
+ column_ct = n.column_ct,
21
+ data_point_ct = n.data_point_ct,
22
+ drop_date = NULL
23
+ FROM new_chars n
24
+ INNER JOIN data_table_chars d
25
+ ON (n.table_groups_id = d.table_groups_id
26
+ AND n.schema_name = d.schema_name
27
+ AND n.table_name = d.table_name)
28
+ WHERE data_table_chars.table_id = d.table_id;
29
+
30
+ -- Add new records
31
+ WITH new_chars
32
+ AS ( SELECT p.table_groups_id,
33
+ p.schema_name, p.table_name, p.functional_table_type,
34
+ run_date AS add_date,
35
+ NULL::TIMESTAMP AS drop_date,
36
+ MAX(record_ct) AS record_ct,
37
+ COUNT(*) AS column_ct,
38
+ MAX(record_ct) * COUNT(*) AS data_point_ct
39
+ FROM v_latest_profile_results p
40
+ WHERE p.table_groups_id = '{TABLE_GROUPS_ID}'
41
+ GROUP BY p.table_groups_id,
42
+ p.schema_name, p.table_name, p.functional_table_type, run_date )
43
+ INSERT INTO data_table_chars
44
+ (table_groups_id, schema_name, table_name, functional_table_type, add_date,
45
+ record_ct, column_ct, data_point_ct)
46
+ SELECT n.table_groups_id, n.schema_name, n.table_name, n.functional_table_type, n.add_date,
47
+ n.record_ct, n.column_ct, n.data_point_ct
48
+ FROM new_chars n
49
+ LEFT JOIN data_table_chars d
50
+ ON (n.table_groups_id = d.table_groups_id
51
+ AND n.schema_name = d.schema_name
52
+ AND n.table_name = d.table_name)
53
+ WHERE d.table_id IS NULL;
54
+
55
+ -- Mark dropped records
56
+ WITH new_chars
57
+ AS ( SELECT p.table_groups_id,
58
+ p.schema_name, p.table_name
59
+ FROM v_latest_profile_results p
60
+ WHERE p.table_groups_id = '{TABLE_GROUPS_ID}'
61
+ GROUP BY p.table_groups_id,
62
+ p.schema_name, p.table_name ),
63
+ last_run
64
+ AS ( SELECT table_groups_id, MAX(run_date) as last_run_date
65
+ FROM v_latest_profile_results
66
+ WHERE table_groups_id = '{TABLE_GROUPS_ID}'
67
+ GROUP BY table_groups_id)
68
+ UPDATE data_table_chars
69
+ SET drop_date = l.last_run_date
70
+ FROM last_run l
71
+ INNER JOIN data_table_chars d
72
+ ON (l.table_groups_id = d.table_groups_id)
73
+ LEFT JOIN new_chars n
74
+ ON (d.table_groups_id = n.table_groups_id
75
+ AND d.schema_name = n.schema_name
76
+ AND d.table_name = n.table_name)
77
+ WHERE data_table_chars.table_id = d.table_id
78
+ AND n.table_name IS NULL;
79
+
80
+ -- ==============================================================================
81
+ -- | Column Characteristics
82
+ -- ==============================================================================
83
+
84
+ -- Update existing records
85
+ WITH new_chars
86
+ AS ( SELECT p.table_groups_id,
87
+ p.schema_name, p.table_name, p.column_name,
88
+ p.general_type, p.column_type, p.functional_data_type,
89
+ run_date
90
+ FROM v_latest_profile_results p
91
+ WHERE p.table_groups_id = '{TABLE_GROUPS_ID}')
92
+ UPDATE data_column_chars
93
+ SET last_mod_date = CASE WHEN n.column_type <> d.column_type THEN n.run_date ELSE d.last_mod_date END,
94
+ general_type = n.general_type,
95
+ column_type = n.column_type,
96
+ functional_data_type = n.functional_data_type,
97
+ drop_date = NULL
98
+ FROM new_chars n
99
+ INNER JOIN data_column_chars d
100
+ ON (n.table_groups_id = d.table_groups_id
101
+ AND n.schema_name = d.schema_name
102
+ AND n.table_name = d.table_name
103
+ AND n.column_name = d.column_name)
104
+ WHERE data_column_chars.table_id = d.table_id
105
+ AND data_column_chars.column_name = d.column_name;
106
+
107
+ -- Add new records
108
+ WITH new_chars
109
+ AS ( SELECT p.table_groups_id,
110
+ p.schema_name, p.table_name, p.column_name,
111
+ p.general_type, p.column_type, p.functional_data_type,
112
+ run_date AS add_date
113
+ FROM v_latest_profile_results p
114
+ WHERE p.table_groups_id = '{TABLE_GROUPS_ID}')
115
+ INSERT INTO data_column_chars
116
+ (table_groups_id, schema_name, table_name, table_id, column_name,
117
+ general_type, column_type, functional_data_type, add_date, last_mod_date)
118
+ SELECT n.table_groups_id, n.schema_name, n.table_name, dtc.table_id, n.column_name,
119
+ n.general_type, n.column_type, n.functional_data_type,
120
+ n.add_date, n.add_date as last_mod_date
121
+ FROM new_chars n
122
+ INNER JOIN data_table_chars dtc
123
+ ON (n.table_groups_id = dtc.table_groups_id
124
+ AND n.schema_name = dtc.schema_name
125
+ AND n.table_name = dtc.table_name)
126
+ LEFT JOIN data_column_chars d
127
+ ON (n.table_groups_id = d.table_groups_id
128
+ AND n.schema_name = d.schema_name
129
+ AND n.table_name = d.table_name
130
+ AND n.column_name = d.column_name)
131
+ WHERE d.table_id IS NULL;
132
+
133
+ -- Mark dropped records
134
+ WITH new_chars
135
+ AS ( SELECT p.table_groups_id,
136
+ p.schema_name, p.table_name, p.column_name
137
+ FROM v_latest_profile_results p
138
+ WHERE p.table_groups_id = '{TABLE_GROUPS_ID}'),
139
+ last_run
140
+ AS ( SELECT table_groups_id, MAX(run_date) as last_run_date
141
+ FROM v_latest_profile_results
142
+ WHERE table_groups_id = '{TABLE_GROUPS_ID}'
143
+ GROUP BY table_groups_id)
144
+ UPDATE data_column_chars
145
+ SET drop_date = l.last_run_date
146
+ FROM last_run l
147
+ INNER JOIN data_column_chars d
148
+ ON (l.table_groups_id = d.table_groups_id)
149
+ LEFT JOIN new_chars n
150
+ ON (d.table_groups_id = n.table_groups_id
151
+ AND d.schema_name = n.schema_name
152
+ AND d.table_name = n.table_name
153
+ AND d.column_name = n.column_name)
154
+ WHERE data_column_chars.table_id = d.table_id
155
+ AND data_column_chars.column_name = d.column_name
156
+ AND n.column_name IS NULL;
@@ -0,0 +1,12 @@
1
+ -- Looking for columns not already freq'd,
2
+ -- but with max_length * distinct_value_ct that fit in result
3
+ SELECT schema_name,
4
+ table_name,
5
+ column_name
6
+ FROM profile_results p
7
+ WHERE p.profile_run_id = '{PROFILE_RUN_ID}'
8
+ AND p.top_freq_values IS NULL
9
+ AND p.general_type = 'A'
10
+ AND p.distinct_value_ct BETWEEN 2 and 70
11
+ AND p.max_length <= 70
12
+ ;
@@ -0,0 +1,4 @@
1
+ DELETE FROM stg_secondary_profile_updates s
2
+ WHERE s.project_code = '{PROJECT_CODE}'
3
+ AND s.schema_name = '{DATA_SCHEMA}'
4
+ AND s.run_date = '{RUN_DATE}';
@@ -0,0 +1,18 @@
1
+ UPDATE profile_results
2
+ SET top_freq_values = u.top_freq_values,
3
+ distinct_value_hash = u.distinct_value_hash
4
+ FROM profile_results p
5
+ INNER JOIN stg_secondary_profile_updates u
6
+ ON p.project_code = u.project_code
7
+ AND p.schema_name = u.schema_name
8
+ AND p.run_date = u.run_date
9
+ AND p.table_name = u.table_name
10
+ AND p.column_name = u.column_name
11
+ WHERE p.project_code = profile_results.project_code
12
+ AND p.schema_name = profile_results.schema_name
13
+ AND p.run_date = profile_results.run_date
14
+ AND p.table_name = profile_results.table_name
15
+ AND p.column_name = profile_results.column_name
16
+ AND p.project_code = '{PROJECT_CODE}'
17
+ AND p.schema_name = '{DATA_SCHEMA}'
18
+ AND p.run_date = '{RUN_DATE}';