dataops-testgen 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. dataops_testgen-2.2.0.dist-info/LICENSE +203 -0
  2. dataops_testgen-2.2.0.dist-info/METADATA +287 -0
  3. dataops_testgen-2.2.0.dist-info/NOTICE +5 -0
  4. dataops_testgen-2.2.0.dist-info/RECORD +270 -0
  5. dataops_testgen-2.2.0.dist-info/WHEEL +5 -0
  6. dataops_testgen-2.2.0.dist-info/entry_points.txt +2 -0
  7. dataops_testgen-2.2.0.dist-info/top_level.txt +1 -0
  8. testgen/__init__.py +0 -0
  9. testgen/__main__.py +770 -0
  10. testgen/commands/__init__.py +0 -0
  11. testgen/commands/queries/__init__.py +0 -0
  12. testgen/commands/queries/execute_cat_tests_query.py +95 -0
  13. testgen/commands/queries/execute_tests_query.py +160 -0
  14. testgen/commands/queries/generate_tests_query.py +94 -0
  15. testgen/commands/queries/profiling_query.py +366 -0
  16. testgen/commands/queries/test_parameter_validation_query.py +88 -0
  17. testgen/commands/run_execute_cat_tests.py +162 -0
  18. testgen/commands/run_execute_tests.py +168 -0
  19. testgen/commands/run_generate_tests.py +107 -0
  20. testgen/commands/run_get_entities.py +122 -0
  21. testgen/commands/run_launch_db_config.py +84 -0
  22. testgen/commands/run_observability_exporter.py +330 -0
  23. testgen/commands/run_profiling_bridge.py +495 -0
  24. testgen/commands/run_quick_start.py +168 -0
  25. testgen/commands/run_setup_profiling_tools.py +96 -0
  26. testgen/commands/run_test_definition.py +146 -0
  27. testgen/commands/run_test_parameter_validation.py +135 -0
  28. testgen/commands/run_upgrade_db_config.py +156 -0
  29. testgen/common/__init__.py +8 -0
  30. testgen/common/clean_sql.py +53 -0
  31. testgen/common/credentials.py +25 -0
  32. testgen/common/database/__init__.py +0 -0
  33. testgen/common/database/database_service.py +629 -0
  34. testgen/common/database/flavor/__init__.py +0 -0
  35. testgen/common/database/flavor/flavor_service.py +75 -0
  36. testgen/common/database/flavor/mssql_flavor_service.py +34 -0
  37. testgen/common/database/flavor/postgresql_flavor_service.py +5 -0
  38. testgen/common/database/flavor/redshift_flavor_service.py +22 -0
  39. testgen/common/database/flavor/snowflake_flavor_service.py +69 -0
  40. testgen/common/database/flavor/trino_flavor_service.py +21 -0
  41. testgen/common/date_service.py +68 -0
  42. testgen/common/display_service.py +85 -0
  43. testgen/common/docker_service.py +76 -0
  44. testgen/common/encrypt.py +55 -0
  45. testgen/common/get_pipeline_parms.py +57 -0
  46. testgen/common/logs.py +79 -0
  47. testgen/common/process_service.py +62 -0
  48. testgen/common/read_file.py +69 -0
  49. testgen/settings.py +440 -0
  50. testgen/template/dbsetup/010_create_base_schema.sql +2 -0
  51. testgen/template/dbsetup/020_create_standard_functions_sprocs.sql +179 -0
  52. testgen/template/dbsetup/030_initialize_new_schema_structure.sql +735 -0
  53. testgen/template/dbsetup/040_populate_new_schema_project.sql +59 -0
  54. testgen/template/dbsetup/050_populate_new_schema_metadata.sql +1517 -0
  55. testgen/template/dbsetup/060_create_standard_views.sql +248 -0
  56. testgen/template/dbsetup/070_create_default_users.sql +17 -0
  57. testgen/template/dbsetup/075_grant_role_rights.sql +43 -0
  58. testgen/template/dbsetup/080_set_current_revision.sql +5 -0
  59. testgen/template/dbupgrade/0100_incremental_upgrade.sql +5 -0
  60. testgen/template/dbupgrade/0101_incremental_upgrade.sql +15 -0
  61. testgen/template/dbupgrade/0102_incremental_upgrade.sql +4 -0
  62. testgen/template/dbupgrade/0103_incremental_upgrade.sql +22 -0
  63. testgen/template/dbupgrade/0104_incremental_upgrade.sql +44 -0
  64. testgen/template/dbupgrade/0105_incremental_upgrade.sql +1 -0
  65. testgen/template/dbupgrade/0106_incremental_upgrade.sql +5 -0
  66. testgen/template/dbupgrade/0107_incremental_upgrade.sql +3 -0
  67. testgen/template/dbupgrade_helpers/get_tg_revision.sql +2 -0
  68. testgen/template/exec_cat_tests/ex_cat_build_agg_table_tests.sql +116 -0
  69. testgen/template/exec_cat_tests/ex_cat_get_distinct_tables.sql +11 -0
  70. testgen/template/exec_cat_tests/ex_cat_results_parse.sql +69 -0
  71. testgen/template/exec_cat_tests/ex_cat_retrieve_agg_test_parms.sql +6 -0
  72. testgen/template/exec_cat_tests/ex_cat_test_query.sql +8 -0
  73. testgen/template/execution/ex_finalize_test_run_results.sql +37 -0
  74. testgen/template/execution/ex_get_tests_non_cat.sql +47 -0
  75. testgen/template/execution/ex_update_test_record_in_testrun_table.sql +27 -0
  76. testgen/template/execution/ex_write_test_record_to_testrun_table.sql +6 -0
  77. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_no_drops_generic.sql +48 -0
  78. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_num_incr_generic.sql +34 -0
  79. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_above_generic.sql +49 -0
  80. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_within_generic.sql +49 -0
  81. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql +49 -0
  82. testgen/template/flavors/generic/exec_query_tests/ex_custom_query_generic.sql +39 -0
  83. testgen/template/flavors/generic/exec_query_tests/ex_data_match_2way_generic.sql +58 -0
  84. testgen/template/flavors/generic/exec_query_tests/ex_data_match_generic.sql +44 -0
  85. testgen/template/flavors/generic/exec_query_tests/ex_prior_match_generic.sql +37 -0
  86. testgen/template/flavors/generic/exec_query_tests/ex_relative_entropy_generic.sql +53 -0
  87. testgen/template/flavors/generic/exec_query_tests/ex_window_match_no_drops_generic.sql +46 -0
  88. testgen/template/flavors/generic/exec_query_tests/ex_window_match_same_generic.sql +59 -0
  89. testgen/template/flavors/generic/profiling/contingency_counts.sql +3 -0
  90. testgen/template/flavors/generic/validate_tests/ex_get_project_column_list_generic.sql +3 -0
  91. testgen/template/flavors/mssql/exec_query_tests/ex_relative_entropy_mssql.sql +53 -0
  92. testgen/template/flavors/mssql/profiling/project_ddf_query_mssql.sql +35 -0
  93. testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml +246 -0
  94. testgen/template/flavors/mssql/profiling/project_secondary_profiling_query_mssql.sql +36 -0
  95. testgen/template/flavors/mssql/setup_profiling_tools/00_drop_existing_functions_mssql.sql +8 -0
  96. testgen/template/flavors/mssql/setup_profiling_tools/01_create_functions_mssql.sql +12 -0
  97. testgen/template/flavors/mssql/setup_profiling_tools/02_create_functions_mssql.sql +54 -0
  98. testgen/template/flavors/mssql/setup_profiling_tools/create_qc_schema_mssql.sql +4 -0
  99. testgen/template/flavors/mssql/setup_profiling_tools/grant_execute_privileges_mssql.sql +1 -0
  100. testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_no_drops_postgresql.sql +46 -0
  101. testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_same_postgresql.sql +59 -0
  102. testgen/template/flavors/postgresql/profiling/project_ddf_query_postgresql.sql +42 -0
  103. testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml +225 -0
  104. testgen/template/flavors/postgresql/profiling/project_secondary_profiling_query_postgresql.sql +28 -0
  105. testgen/template/flavors/postgresql/setup_profiling_tools/create_functions_postgresql.sql +157 -0
  106. testgen/template/flavors/postgresql/setup_profiling_tools/create_qc_schema_postgresql.sql +1 -0
  107. testgen/template/flavors/postgresql/setup_profiling_tools/grant_execute_privileges_postgresql.sql +2 -0
  108. testgen/template/flavors/redshift/profiling/project_ddf_query_redshift.sql +38 -0
  109. testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml +221 -0
  110. testgen/template/flavors/redshift/profiling/project_secondary_profiling_query_redshift.sql +29 -0
  111. testgen/template/flavors/redshift/setup_profiling_tools/create_functions_redshift.sql +115 -0
  112. testgen/template/flavors/redshift/setup_profiling_tools/create_qc_schema_redshift.sql +1 -0
  113. testgen/template/flavors/redshift/setup_profiling_tools/grant_execute_privileges_redshift.sql +2 -0
  114. testgen/template/flavors/snowflake/profiling/project_ddf_query_snowflake.sql +38 -0
  115. testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml +220 -0
  116. testgen/template/flavors/snowflake/profiling/project_secondary_profiling_query_snowflake.sql +29 -0
  117. testgen/template/flavors/snowflake/setup_profiling_tools/create_functions_snowflake.sql +69 -0
  118. testgen/template/flavors/snowflake/setup_profiling_tools/create_qc_schema_snowflake.sql +1 -0
  119. testgen/template/flavors/snowflake/setup_profiling_tools/grant_execute_privileges_snowflake.sql +6 -0
  120. testgen/template/flavors/trino/profiling/project_profiling_query_trino.yaml +219 -0
  121. testgen/template/flavors/trino/setup_profiling_tools/create_functions_trino.sql +92 -0
  122. testgen/template/flavors/trino/setup_profiling_tools/create_qc_schema_trino.sql +1 -0
  123. testgen/template/gen_funny_cat_tests/gen_test_constant.sql +104 -0
  124. testgen/template/gen_funny_cat_tests/gen_test_distinct_value_ct.sql +98 -0
  125. testgen/template/gen_funny_cat_tests/gen_test_row_ct.sql +57 -0
  126. testgen/template/gen_funny_cat_tests/gen_test_row_ct_pct.sql +59 -0
  127. testgen/template/generation/gen_delete_old_tests.sql +5 -0
  128. testgen/template/generation/gen_insert_test_suite.sql +5 -0
  129. testgen/template/generation/gen_retrieve_or_insert_test_suite.sql +58 -0
  130. testgen/template/generation/gen_standard_test_type_list.sql +13 -0
  131. testgen/template/generation/gen_standard_tests.sql +48 -0
  132. testgen/template/get_entities/get_connection.sql +21 -0
  133. testgen/template/get_entities/get_connections_list.sql +9 -0
  134. testgen/template/get_entities/get_latest.sql +4 -0
  135. testgen/template/get_entities/get_profile.sql +12 -0
  136. testgen/template/get_entities/get_profile_info.sql +17 -0
  137. testgen/template/get_entities/get_profile_list.sql +17 -0
  138. testgen/template/get_entities/get_profile_screen.sql +275 -0
  139. testgen/template/get_entities/get_project_list.sql +6 -0
  140. testgen/template/get_entities/get_table_group_list.sql +10 -0
  141. testgen/template/get_entities/get_test_generation_list.sql +18 -0
  142. testgen/template/get_entities/get_test_info.sql +41 -0
  143. testgen/template/get_entities/get_test_results_for_run_cli.sql +16 -0
  144. testgen/template/get_entities/get_test_run_list.sql +24 -0
  145. testgen/template/get_entities/get_test_suite.sql +13 -0
  146. testgen/template/get_entities/get_test_suite_list.sql +18 -0
  147. testgen/template/get_entities/list_test_types.sql +4 -0
  148. testgen/template/observability/get_event_data.sql +23 -0
  149. testgen/template/observability/get_test_results.sql +41 -0
  150. testgen/template/observability/update_test_results_exported_to_observability.sql +12 -0
  151. testgen/template/parms/parms_profiling.sql +34 -0
  152. testgen/template/parms/parms_test_execution.sql +13 -0
  153. testgen/template/parms/parms_test_gen.sql +23 -0
  154. testgen/template/profiling/contingency_columns.sql +7 -0
  155. testgen/template/profiling/datatype_suggestions.sql +56 -0
  156. testgen/template/profiling/functional_datatype.sql +523 -0
  157. testgen/template/profiling/functional_tabletype_stage.sql +48 -0
  158. testgen/template/profiling/functional_tabletype_update.sql +8 -0
  159. testgen/template/profiling/pii_flag.sql +133 -0
  160. testgen/template/profiling/profile_anomalies_screen_column.sql +22 -0
  161. testgen/template/profiling/profile_anomalies_screen_multi_column.sql +58 -0
  162. testgen/template/profiling/profile_anomalies_screen_table.sql +22 -0
  163. testgen/template/profiling/profile_anomalies_screen_table_dates.sql +30 -0
  164. testgen/template/profiling/profile_anomalies_screen_variants.sql +40 -0
  165. testgen/template/profiling/profile_anomaly_types_get.sql +3 -0
  166. testgen/template/profiling/project_get_table_sample_count.sql +22 -0
  167. testgen/template/profiling/project_profile_run_record_insert.sql +8 -0
  168. testgen/template/profiling/project_profile_run_record_update.sql +5 -0
  169. testgen/template/profiling/project_profile_run_record_update_status.sql +5 -0
  170. testgen/template/profiling/project_update_profile_results_to_estimates.sql +32 -0
  171. testgen/template/profiling/refresh_anomalies.sql +33 -0
  172. testgen/template/profiling/refresh_data_chars_from_profiling.sql +156 -0
  173. testgen/template/profiling/secondary_profiling_columns.sql +12 -0
  174. testgen/template/profiling/secondary_profiling_delete.sql +4 -0
  175. testgen/template/profiling/secondary_profiling_update.sql +18 -0
  176. testgen/template/quick_start/populate_target_data.sql +1077 -0
  177. testgen/template/quick_start/recreate_target_data_schema.sql +167 -0
  178. testgen/template/quick_start/update_target_data.sql +100 -0
  179. testgen/template/updates/create_tmp_test_definition.sql +19 -0
  180. testgen/template/updates/get_test_def_parms.sql +38 -0
  181. testgen/template/updates/populate_stg_test_definitions.sql +184 -0
  182. testgen/template/validate_tests/ex_disable_tests_test_definitions.sql +5 -0
  183. testgen/template/validate_tests/ex_flag_tests_test_definitions.sql +64 -0
  184. testgen/template/validate_tests/ex_get_project_column_list_generic.sql +3 -0
  185. testgen/template/validate_tests/ex_get_test_column_list_tg.sql +65 -0
  186. testgen/template/validate_tests/ex_write_test_val_errors.sql +22 -0
  187. testgen/ui/__init__.py +0 -0
  188. testgen/ui/app.py +98 -0
  189. testgen/ui/assets/dk_logo.svg +46 -0
  190. testgen/ui/assets/question_mark.png +0 -0
  191. testgen/ui/assets/scripts.js +68 -0
  192. testgen/ui/assets/style.css +140 -0
  193. testgen/ui/bootstrap.py +109 -0
  194. testgen/ui/components/__init__.py +0 -0
  195. testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fBBc4.woff2 +0 -0
  196. testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fChc4EsA.woff2 +0 -0
  197. testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu4mxK.woff2 +0 -0
  198. testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu7GxKOzY.woff2 +0 -0
  199. testgen/ui/components/frontend/css/material-symbols-rounded.css +24 -0
  200. testgen/ui/components/frontend/css/material-symbols-rounded.woff2 +0 -0
  201. testgen/ui/components/frontend/css/roboto-font-faces.css +35 -0
  202. testgen/ui/components/frontend/css/shared.css +36 -0
  203. testgen/ui/components/frontend/img/dk_logo.svg +46 -0
  204. testgen/ui/components/frontend/index.html +17 -0
  205. testgen/ui/components/frontend/js/components/breadcrumbs.js +86 -0
  206. testgen/ui/components/frontend/js/components/button.js +66 -0
  207. testgen/ui/components/frontend/js/components/location.js +62 -0
  208. testgen/ui/components/frontend/js/components/select.js +75 -0
  209. testgen/ui/components/frontend/js/components/sidebar.js +358 -0
  210. testgen/ui/components/frontend/js/main.js +99 -0
  211. testgen/ui/components/frontend/js/streamlit.js +19 -0
  212. testgen/ui/components/frontend/js/van.min.js +1 -0
  213. testgen/ui/components/utils/__init__.py +0 -0
  214. testgen/ui/components/utils/callbacks.py +51 -0
  215. testgen/ui/components/utils/component.py +13 -0
  216. testgen/ui/components/widgets/__init__.py +6 -0
  217. testgen/ui/components/widgets/breadcrumbs.py +32 -0
  218. testgen/ui/components/widgets/location.py +65 -0
  219. testgen/ui/components/widgets/modal.py +97 -0
  220. testgen/ui/components/widgets/sidebar.py +69 -0
  221. testgen/ui/navigation/__init__.py +0 -0
  222. testgen/ui/navigation/menu.py +42 -0
  223. testgen/ui/navigation/page.py +20 -0
  224. testgen/ui/navigation/router.py +63 -0
  225. testgen/ui/queries/__init__.py +0 -0
  226. testgen/ui/queries/authentication_queries.py +47 -0
  227. testgen/ui/queries/connection_queries.py +121 -0
  228. testgen/ui/queries/profiling_queries.py +148 -0
  229. testgen/ui/queries/project_queries.py +9 -0
  230. testgen/ui/queries/table_group_queries.py +186 -0
  231. testgen/ui/queries/test_definition_queries.py +270 -0
  232. testgen/ui/queries/test_run_queries.py +32 -0
  233. testgen/ui/queries/test_suite_queries.py +145 -0
  234. testgen/ui/scripts/__init__.py +0 -0
  235. testgen/ui/scripts/patch_streamlit.py +111 -0
  236. testgen/ui/services/__init__.py +0 -0
  237. testgen/ui/services/authentication_service.py +119 -0
  238. testgen/ui/services/connection_service.py +220 -0
  239. testgen/ui/services/database_service.py +282 -0
  240. testgen/ui/services/form_service.py +1008 -0
  241. testgen/ui/services/javascript_service.py +44 -0
  242. testgen/ui/services/query_service.py +316 -0
  243. testgen/ui/services/string_service.py +12 -0
  244. testgen/ui/services/table_group_service.py +130 -0
  245. testgen/ui/services/test_definition_service.py +117 -0
  246. testgen/ui/services/test_run_service.py +13 -0
  247. testgen/ui/services/test_suite_service.py +76 -0
  248. testgen/ui/services/toolbar_service.py +77 -0
  249. testgen/ui/session.py +46 -0
  250. testgen/ui/views/__init__.py +0 -0
  251. testgen/ui/views/app_log_modal.py +92 -0
  252. testgen/ui/views/connections.py +72 -0
  253. testgen/ui/views/connections_base.py +367 -0
  254. testgen/ui/views/login.py +40 -0
  255. testgen/ui/views/not_found.py +16 -0
  256. testgen/ui/views/overview.py +34 -0
  257. testgen/ui/views/profiling_anomalies.py +501 -0
  258. testgen/ui/views/profiling_details.py +335 -0
  259. testgen/ui/views/profiling_modal.py +40 -0
  260. testgen/ui/views/profiling_results.py +206 -0
  261. testgen/ui/views/profiling_summary.py +177 -0
  262. testgen/ui/views/project_settings.py +74 -0
  263. testgen/ui/views/table_groups.py +530 -0
  264. testgen/ui/views/test_definitions.py +1020 -0
  265. testgen/ui/views/test_results.py +908 -0
  266. testgen/ui/views/test_runs.py +195 -0
  267. testgen/ui/views/test_suites.py +545 -0
  268. testgen/utils/__init__.py +0 -0
  269. testgen/utils/plugins.py +17 -0
  270. testgen/utils/singleton.py +14 -0
@@ -0,0 +1,523 @@
1
+ -- First Clear --
2
+ UPDATE profile_results
3
+ SET functional_data_type = NULL,
4
+ functional_table_type = NULL
5
+ WHERE profile_run_id = '{PROFILE_RUN_ID}';
6
+
7
+
8
+ -- 1. Assign CONSTANT and TBD - this is the first step of elimination
9
+ /*
10
+ TBD - If record_ct in a table is zero. If we have less than 5 records or all records are blanks
11
+ Constant - If the distinct_value_ct is 1 and more than 75% of the records are filled
12
+ */
13
+
14
+ UPDATE profile_results
15
+ SET functional_data_type =
16
+ CASE WHEN record_ct = 0 then 'TBD (Not enough data)'
17
+ WHEN record_ct > 0 and ((value_ct < 5 OR zero_length_ct / nullif(value_ct, 0)::FLOAT = 1))
18
+ THEN 'TBD (Not enough data)'
19
+ ELSE functional_data_type
20
+ END
21
+ WHERE profile_run_id = '{PROFILE_RUN_ID}';
22
+
23
+
24
+ UPDATE profile_results
25
+ SET functional_data_type =
26
+ CASE WHEN distinct_value_ct = 1
27
+ AND (((value_ct :: float - coalesce(filled_value_ct, 0::bigint) :: float)/record_ct :: float) :: float *100.00 ) > 75
28
+ -- this tells us how much actual values we have filled in; threshold -> if there is only 1 value and it's 75% of the records -> then it's a constant
29
+ THEN 'Constant'
30
+ ELSE functional_data_type END
31
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
32
+ AND functional_data_type IS NULL;
33
+
34
+ -- 1A. Assign ID's based on masks
35
+ UPDATE profile_results
36
+ SET functional_data_type = 'ID-SK'
37
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
38
+ AND functional_data_type IS NULL
39
+ AND column_name ILIKE '{PROFILE_SK_COLUMN_MASK}';
40
+
41
+ UPDATE profile_results
42
+ SET functional_data_type = 'ID'
43
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
44
+ AND functional_data_type IS NULL
45
+ AND column_name ILIKE '{PROFILE_ID_COLUMN_MASK}';
46
+
47
+ -- 2. Assign DATE
48
+ /*
49
+ . Historical Date - If more than 95% of records have 1 year ago date value
50
+ . Future Date - If more than 95% of records have a future date value
51
+ . Schedule Date - If more than 50% of records have a future date present
52
+ . If we have 10-90% of records from (before 1 year ago and within a year and for future 1 year)
53
+ then, classify further as the following :-
54
+ Transactional Date - If the data has a record for everyday or at least twice a week
55
+ or we have at least 28 days of data in current year
56
+ or we have at least 28 days of data in last 5 years years
57
+ Transactional Date (Wk) - If the data available is for every week of the year or at least twice a month
58
+ or 2 weeks a month from the last 5 years
59
+ Transactional Date (Mo) - If the data available is for every month of the year or at least 5 months
60
+ or 5 month a year from the last 5 years
61
+ Transactional Date (Qtr) - If the data available is for every quarter of the year
62
+ Date (TBD) - If none of the above are satisfied
63
+ . Check varchar attributes (or attributes not give date datatype)
64
+ Look at min_length and max_length to determine if a field is date or timestamp
65
+
66
+ */
67
+
68
+ UPDATE profile_results
69
+ SET functional_data_type =
70
+ CASE
71
+ WHEN before_20yr_date_ct / NULLIF(value_ct::FLOAT, 0) * 100 >= 75 THEN 'Historical Date'
72
+ WHEN future_date_ct / NULLIF(value_ct::FLOAT, 0) * 100 >= 95 THEN 'Future Date'
73
+ WHEN future_date_ct / NULLIF(value_ct::FLOAT, 0) * 100 >= 50 THEN 'Schedule Date'
74
+ WHEN before_1yr_date_ct / NULLIF(value_ct::FLOAT, 0) * 100 BETWEEN 10 AND 90
75
+ AND within_1yr_date_ct / NULLIF(value_ct::FLOAT, 0) * 100 BETWEEN 10 AND 90
76
+ AND future_date_ct / NULLIF(value_ct::FLOAT, 0) * 100 BETWEEN 0 AND 10
77
+ THEN
78
+ CASE
79
+ WHEN date_days_present = DATEDIFF('DAY', min_date, max_date) + 1 -- everyday
80
+ OR date_days_present >=
81
+ 2 * (DATEDIFF('WEEK', min_date, max_date) + 1) -- 2 days a week based on overall data
82
+ OR ROUND(within_1yr_date_ct::FLOAT / value_ct * distinct_value_ct) /
83
+ LEAST(365, NULLIF(DATEDIFF('DAY', (run_date::DATE - 365):: TIMESTAMP, max_date), 0))::FLOAT * 100 >=
84
+ 28 -- current year
85
+ OR ROUND(distinct_value_ct * (1 - before_5yr_date_ct / NULLIF(value_ct::FLOAT, 0))) /
86
+ LEAST(NULLIF(DATEDIFF('DAY', (run_date::DATE - 365 * 5)::TIMESTAMP, max_date) + 1, 0), 365 * 5) * 100 >=
87
+ 28 -- last 5 years
88
+ THEN 'Transactional Date'
89
+ WHEN date_weeks_present =
90
+ NULLIF(DATEDIFF('WEEK', min_date, max_date), 0)::FLOAT + 1 -- 1 day a week
91
+ OR
92
+ date_weeks_present >= 2 * (DATEDIFF('MONTH', min_date, max_date) + 1) -- 2 weeks a month
93
+ OR ROUND(distinct_value_ct * (1 - before_5yr_date_ct / NULLIF(value_ct::FLOAT, 0))) >=
94
+ 2 *
95
+ (DATEDIFF('MONTH', (run_date::DATE - 365)::TIMESTAMP, max_date) + 1) -- 2 weeks a month from the last 5 years to current
96
+ THEN 'Transactional Date (Wk)'
97
+ WHEN date_months_present =
98
+ NULLIF(DATEDIFF('MONTH', min_date, max_date), 0)::FLOAT + 1 -- every month
99
+ OR
100
+ date_months_present >= 5 * (DATEDIFF('YEAR', min_date, max_date) + 1) -- 5 months a year
101
+ OR ROUND(distinct_value_ct * (1 - before_5yr_date_ct / NULLIF(value_ct::FLOAT, 0))) >=
102
+ 5 *
103
+ (DATEDIFF('YEAR', (run_date::DATE - 365*5)::TIMESTAMP, max_date) + 1) -- 5 months a year from the last 5 years to current
104
+ THEN 'Transactional Date (Mo)'
105
+ WHEN distinct_value_ct = DATEDIFF('QUARTER', min_date, max_date) + 1 -- every quarter
106
+ THEN 'Transactional Date (Qtr)'
107
+ ELSE 'Date (TBD)'
108
+ END
109
+ WHEN column_type = 'date'
110
+ THEN 'Date Stamp'
111
+ WHEN column_type = 'timestamp'
112
+ THEN 'DateTime Stamp'
113
+ ELSE functional_data_type
114
+ END
115
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
116
+ AND functional_data_type IS NULL
117
+ AND (general_type = 'D' OR (value_ct = date_ct + zero_length_ct AND value_ct > 0));
118
+
119
+ -- Character Date
120
+ UPDATE profile_results
121
+ SET functional_data_type = 'Date Stamp'
122
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
123
+ AND functional_data_type IS NULL
124
+ AND distinct_pattern_ct = 1
125
+ AND min_text >= '1900' AND max_text <= '2200'
126
+ AND TRIM(SPLIT_PART(top_patterns, '|', 2)) = 'NNNN-NN-NN';
127
+
128
+ -- Character Timestamp
129
+ UPDATE profile_results
130
+ SET functional_data_type = 'DateTime Stamp'
131
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
132
+ AND functional_data_type IS NULL
133
+ AND distinct_pattern_ct = 1
134
+ AND TRIM(SPLIT_PART(top_patterns, '|', 2)) = 'NNNN-NN-NN NN:NN:NN';
135
+
136
+ -- Assign PERIODS: Period Year, Period Qtr, Period Month, Period Week, Period DOW
137
+ UPDATE profile_results
138
+ SET functional_data_type = 'Period Year'
139
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
140
+ AND functional_data_type IS NULL
141
+ AND (column_name ILIKE '%year%' OR column_name ILIKE '%yr%')
142
+ AND ( (min_value >= 1900
143
+ AND max_value <= DATE_PART('YEAR', NOW()) + 20
144
+ AND COALESCE(fractional_sum, 0) = 0)
145
+ OR
146
+ (min_text >= '1900'
147
+ AND max_text <= (DATE_PART('YEAR', NOW()) + 20)::VARCHAR
148
+ AND avg_length = 4
149
+ AND avg_embedded_spaces = 0)
150
+ );
151
+
152
+ UPDATE profile_results
153
+ SET functional_data_type = 'Period Quarter'
154
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
155
+ AND functional_data_type IS NULL
156
+ AND (column_name ILIKE '%qtr%' or column_name ILIKE '%quarter%')
157
+ AND ( (min_value = 1 AND max_value = 4
158
+ AND COALESCE(fractional_sum, 0) = 0)
159
+ OR
160
+ (min_text >= '1900' AND max_text <= '2200'
161
+ AND avg_length BETWEEN 6 and 7
162
+ AND SPLIT_PART(top_patterns, '|', 2) ~ '^\s*NNNN[-_]AN\s*$')
163
+ );
164
+
165
+ UPDATE profile_results
166
+ SET functional_data_type = 'Period Year-Mon'
167
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
168
+ AND functional_data_type IS NULL
169
+ AND column_name ILIKE '%mo%'
170
+ AND min_text >= '1900' AND max_text <= '2200'
171
+ AND (
172
+ (avg_length BETWEEN 6.8 AND 7.2
173
+ AND SPLIT_PART(top_patterns, '|', 2) ~ '^\s*NNNN[-_]NN\s*$')
174
+ OR (avg_length BETWEEN 7.8 AND 8.2
175
+ AND UPPER(SPLIT_PART(top_patterns, '|', 2)) ~ '^\s*NNNN[-_]AAA\s*$')
176
+ );
177
+
178
+ UPDATE profile_results
179
+ SET functional_data_type = 'Period Month'
180
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
181
+ AND functional_data_type IS NULL
182
+ AND column_name ILIKE '%mo%'
183
+ AND (
184
+ (max_length = 2 AND (min_text = '01' OR min_text = '1') AND max_text = '12')
185
+ OR (min_value = 1 AND max_value = 12 AND COALESCE(SIGN(fractional_sum), 0) = 0)
186
+ OR (max_length = 9 AND min_text ILIKE 'April' AND max_text ILIKE 'SEPTEMBER')
187
+ OR (max_length = 3 AND min_text ILIKE 'APR' AND max_text ILIKE 'SEP')
188
+ );
189
+
190
+ UPDATE profile_results
191
+ SET functional_data_type = 'Period Mon-NN'
192
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
193
+ AND functional_data_type IS NULL
194
+ AND min_text ~ '(?i)^(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[\s-]?\d{1,2}$'
195
+ AND max_text ~ '(?i)^(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[\s-]?\d{1,2}$'
196
+ AND avg_length BETWEEN 5.8 AND 6.2
197
+ AND TRIM(fn_parsefreq(top_patterns, 1, 2)) ~ '(?i)AAA[\s-]NN';
198
+
199
+ UPDATE profile_results
200
+ SET functional_data_type = 'Period Week'
201
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
202
+ AND functional_data_type IS NULL
203
+ AND ( column_name ILIKE '%wk%' OR column_name ILIKE '%week%' )
204
+ AND distinct_value_ct BETWEEN 10 AND 53
205
+ AND ( ( min_text IN ('1', '01') AND max_text IN ('52','53') )
206
+ OR ( min_value = 1 AND max_value IN (52, 53) AND COALESCE(SIGN(fractional_sum), 0) = 0 ) );
207
+
208
+ UPDATE profile_results
209
+ SET functional_data_type = 'Period DOW'
210
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
211
+ AND functional_data_type IS NULL
212
+ AND ( column_name ILIKE '%day%' OR column_name ILIKE '%dow%')
213
+ AND distinct_value_ct = 7
214
+ AND ( ( min_text = '1' AND max_text = '7' )
215
+ OR ( min_value = 1 AND max_value = 7 AND COALESCE(SIGN(fractional_sum), 0) = 0)
216
+ OR ( min_text ILIKE 'FRIDAY' AND max_text ILIKE 'WEDNESDAY' AND max_length = 9)
217
+ OR ( min_text ILIKE 'FRI' AND max_text ILIKE 'WED' AND max_length = 3) );
218
+
219
+
220
+ -- 3. Assign ADDRESS RELATED FIELDS, PHONE AND EMAIL
221
+ /*
222
+ Zip - Length must be less than or equal to 11. We're also looking at the column name
223
+ Email - Check column name and top patterns. top_patterns must have @ and .
224
+ Phone - Length must be less than or equal to 11. We're also looking at the column name
225
+ Address - Column name check. If the field is populated then it should have at least 4 distinct pattern count
226
+ State - Column name must have 'state' in it. A valid state must have max_length greater than or equal to 2.
227
+ To avoid confusing with a field serving different purpose, we've checking distinct_value_ct.
228
+ Also, a valid state should not have a number in the data.
229
+
230
+ */
231
+
232
+ UPDATE profile_results
233
+ SET functional_data_type =
234
+ CASE WHEN (std_pattern_match = 'ZIP_USA' AND (column_name ILIKE '%zip%' OR column_name ILIKE '%postal%'))
235
+ THEN 'Zip'
236
+ WHEN std_pattern_match = 'EMAIL'
237
+ THEN 'Email'
238
+ WHEN (column_name ILIKE '%phone%' AND max_length BETWEEN 7 AND 11)
239
+ OR std_pattern_match = 'PHONE_USA'
240
+ THEN 'Phone'
241
+ WHEN (column_name ILIKE '%address' AND column_name NOT ILIKE '%email%')
242
+ OR std_pattern_match = 'STREET_ADDR'
243
+ THEN 'Address'
244
+ WHEN std_pattern_match = 'STATE_USA'
245
+ THEN 'State'
246
+ ELSE functional_data_type
247
+ END
248
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
249
+ AND functional_data_type IS NULL;
250
+
251
+ -- Update City based on position of State and Zip
252
+ UPDATE profile_results
253
+ SET functional_data_type = 'City'
254
+ FROM profile_results c
255
+ INNER JOIN profile_results z
256
+ ON (c.profile_run_id = z.profile_run_id
257
+ AND c.table_name = z.table_name
258
+ AND c.position + 2 = z.position
259
+ AND 'Zip' = z.functional_data_type)
260
+ INNER JOIN profile_results s
261
+ ON (c.profile_run_id = s.profile_run_id
262
+ AND c.table_name = s.table_name
263
+ AND c.position + 1 = s.position
264
+ AND 'State' = s.functional_data_type)
265
+ WHERE c.profile_run_id = '{PROFILE_RUN_ID}'
266
+ AND LOWER(c.column_name) SIMILAR TO '%c(|i)ty%'
267
+ AND c.functional_data_type NOT IN ('State', 'Zip')
268
+ AND profile_results.id = c.id;
269
+
270
+ -- Assign Name
271
+ UPDATE profile_results
272
+ SET functional_data_type = 'Person Full Name'
273
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
274
+ AND functional_data_type IS NULL
275
+ AND avg_length <= 20
276
+ AND avg_embedded_spaces BETWEEN 0.9 AND 2.0
277
+ AND ( column_name ~ '(approver|full|contact|emp|employee|hcp|manager|mgr_|party|person|preferred|rep|reviewer|salesperson|spouse)(_| |)(name|nm)$'
278
+ OR column_name IN ('name', 'nm') );
279
+
280
+ -- Assign First Name
281
+ UPDATE profile_results
282
+ SET functional_data_type = 'Person Given Name'
283
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
284
+ AND avg_length <= 8
285
+ AND avg_embedded_spaces < 0.2
286
+ AND (LOWER(column_name) SIMILAR TO '%f(|i)rst(_| |)n(|a)m%%'
287
+ OR LOWER(column_name) SIMILAR TO '%(middle|mdl)(_| |)n(|a)m%%'
288
+ OR LOWER(column_name) SIMILAR TO '%nick(_| |)n(|a)m%%');
289
+
290
+ -- Assign Last Name
291
+ UPDATE profile_results
292
+ SET functional_data_type = 'Person Last Name'
293
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
294
+ AND avg_length BETWEEN 5 and 8
295
+ AND avg_embedded_spaces < 0.2
296
+ AND (LOWER(column_name) SIMILAR TO '%l(|a)st(_| |)n(|a)m%'
297
+ OR LOWER(column_name) SIMILAR TO '%maiden(_| |)n(|a)m%'
298
+ OR LOWER(column_name) SIMILAR TO '%sur(_| |)n(|a)m%');
299
+
300
+ UPDATE profile_results
301
+ SET functional_data_type = 'Entity Name'
302
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
303
+ AND functional_data_type IS NULL
304
+ AND general_type = 'A'
305
+ AND column_name ~ '(acct|account|affiliation|branch|business|co|comp|company|corp|corporate|cust|customer|distributor|employer|entity|firm|franchise|hco|org|organization|site|supplier|vendor|hospital|practice|clinic)(_| |)(name|nm)$';
306
+
307
+ -- Assign Boolean
308
+ /*
309
+ Boolean - If distinct_value_ct is equal to (1 or 2) and (min_text and max_text) values fall in the categories specified
310
+ Numeric column types are not boolean.
311
+ */
312
+ UPDATE profile_results
313
+ SET functional_data_type =
314
+ CASE WHEN general_type = 'B'
315
+ OR (distinct_value_ct = 2
316
+ AND ((LOWER(min_text) = 'no' AND LOWER(max_text) = 'yes')
317
+ OR (LOWER(min_text) = 'n' AND LOWER(max_text) = 'y')
318
+ OR (LOWER(min_text) = 'false' AND LOWER(max_text) = 'true')
319
+ OR (LOWER(min_text) = '0' AND LOWER(max_text) = '1')
320
+ OR (min_value = 0 AND max_value = 1 AND lower(column_type) NOT ILIKE '%numeric%')))
321
+ THEN 'Boolean'
322
+ WHEN general_type = 'B'
323
+ OR (distinct_value_ct = 1 -- we can have only 1 value populated but it can still be boolean
324
+ AND ( (LOWER(min_text) in ('no','yes') AND LOWER(max_text) in ('no','yes'))
325
+ OR (LOWER(min_text) in ('n','y') AND LOWER(max_text) in ('n','y'))
326
+ OR (LOWER(min_text) in ('false','true') AND LOWER(max_text) in ('f','t'))
327
+ OR (LOWER(min_text) in ('0','1') AND LOWER(max_text) in ('0','1'))
328
+ OR (min_value = 0 AND max_value = 1 AND lower(column_type) NOT ILIKE '%numeric%')))
329
+ THEN 'Boolean'
330
+ ELSE functional_data_type
331
+ END
332
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
333
+ AND functional_data_type IS NULL;
334
+
335
+
336
+ -- 4. Assign CODE, CATEGORY, ID, ATTRIBUTE & DESCRIPTION
337
+ /*
338
+ For character fields,
339
+ Id - If more than 80% of records are populated and 95% are unique without spaces and consistent length
340
+ and have a distinct record count of more than 200
341
+ Code - If more than 80% of records are populated and 95% are unique without spaces and consistent length
342
+ and have a distinct record count of less than or equal to 200.
343
+ If distinct record count is more than 200 and the field has varying length,
344
+ Attribute - Short length with less than 3 words
345
+ Description - More than 3 words and longer length
346
+ . If distinct record count is between 2 and 200,
347
+ Code - No spaces (single word) with less than 15 maximum length
348
+ Category - Spaces allowed, no restriction on length
349
+ */
350
+ UPDATE profile_results
351
+ SET functional_data_type =
352
+ CASE WHEN ( lower(column_name) ~ '_(average|avg|count|ct|sum|total|tot)$'
353
+ OR lower(column_name) ~ '^(average|avg|count|ct|sum|total|tot)_' )
354
+ AND numeric_ct = value_ct
355
+ AND value_ct > 1 THEN 'Measurement Text'
356
+ WHEN includes_digit_ct > 0
357
+ AND ( (max_length <= 20 AND avg_embedded_spaces < 0.1 -- Short without spaces
358
+ AND value_ct / NULLIF(record_ct, 0)::FLOAT > 0.8 -- mostly populated
359
+ AND distinct_value_ct / NULLIF(value_ct, 0)::FLOAT > 0.95) -- mostly unique
360
+ OR (avg_embedded_spaces < 0.1 -- id should not have spaces and have consistent length
361
+ AND (round(max_length - avg_length) <= 1 OR round(avg_length - min_length) <= 1) ) )
362
+ THEN CASE WHEN distinct_value_ct > 200 THEN 'ID'
363
+ WHEN distinct_value_ct <= 200 AND avg_embedded_spaces < 1 THEN 'Code'
364
+ ELSE functional_data_type
365
+ END
366
+ WHEN distinct_value_ct > 200
367
+ THEN CASE WHEN max_length - ROUND(avg_length) > 1 AND ROUND(avg_length) - min_length > 1 -- varies length => text
368
+ THEN CASE WHEN avg_embedded_spaces BETWEEN 0 AND 3 -- less than 3 words
369
+ AND max_length <= 30 -- and shorter length
370
+ AND fn_charcount(max_text, ' ') < 5
371
+ THEN 'Attribute'
372
+ ELSE 'Description'
373
+ END
374
+ END
375
+ WHEN distinct_value_ct BETWEEN 2 AND 200
376
+ THEN CASE WHEN (avg_embedded_spaces < 1 AND max_length < 15)
377
+ OR (fn_charcount(top_patterns, 'A') > 0 AND fn_charcount(top_patterns, 'N') > 0)
378
+ THEN 'Code'
379
+ ELSE 'Category'
380
+ END
381
+ ELSE functional_data_type
382
+ END
383
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
384
+ AND functional_data_type IS NULL
385
+ AND general_type='A'
386
+ AND LOWER(datatype_suggestion) SIMILAR TO '(%varchar%)';
387
+
388
+ -- 5. Assign FLAG
389
+ /*
390
+ Flag - is set only if there is an unknown data type or if it's null. Alpha values with distinct_value_ct between 3 and 5,
391
+ Few, short words with only alpha characters.
392
+ */
393
+
394
+ UPDATE profile_results
395
+ SET functional_data_type =
396
+ CASE
397
+ WHEN general_type = 'A' AND distinct_value_ct BETWEEN 3 AND 5
398
+ AND (lower(column_type) NOT ILIKE '%numeric%' OR lower(datatype_suggestion) NOT ILIKE '%numeric%')-- should not be decimal
399
+ AND (min_length > 1 AND max_length <= 7)
400
+ AND functional_data_type IS NULL
401
+ AND fn_charcount(top_patterns, 'A') > 0
402
+ THEN 'Flag'
403
+ END
404
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
405
+ AND functional_data_type IS NULL;
406
+
407
+
408
+ -- 6. Assign the remaining types where functional data type is null
409
+
410
+ UPDATE profile_results
411
+ SET functional_data_type =
412
+ CASE
413
+ WHEN (max_value - min_value + 1 = distinct_value_ct) AND (fractional_sum IS NULL OR fractional_sum > 0)
414
+ THEN 'Sequence'
415
+ WHEN general_type='N'
416
+ AND LOWER(column_name) SIMILAR TO '%(no|num|number|nbr)'
417
+ AND (column_type ILIKE '%int%'
418
+ OR
419
+ (RTRIM(SPLIT_PART(column_type, ',', 2), ')') > '0'
420
+ AND fractional_sum = 0) -- 0 implies integer; null is float or non-numeric
421
+ ) THEN
422
+ CASE
423
+ WHEN ROUND(100.0 * value_ct::FLOAT/NULLIF(record_ct, 0)) > 70 THEN 'ID'
424
+ ELSE 'Attribute-Numeric'
425
+ END
426
+ WHEN general_type='N'
427
+ AND ( column_type ILIKE '%int%'
428
+ OR
429
+ (RTRIM(SPLIT_PART(column_type, ',', 2), ')') > '0'
430
+ AND fractional_sum = 0) -- 0 implies integer; null is float or non-numeric
431
+ ) THEN 'Measurement Discrete'
432
+ WHEN general_type='N' and distinct_value_ct > 1 and min_value < 0
433
+ then 'Measurement'
434
+ WHEN general_type='N' and distinct_value_ct > 1 and min_value >= 0
435
+ and stdev_value/nullif(avg_value,0) >= 0.10
436
+ then 'Measurement'
437
+ ELSE 'UNKNOWN'
438
+ END
439
+
440
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
441
+ AND functional_data_type IS NULL;
442
+
443
+ -- Assign City
444
+ UPDATE profile_results
445
+ SET functional_data_type = 'City'
446
+ FROM ( SELECT p.id
447
+ FROM profile_results p
448
+ LEFT JOIN profile_results pn
449
+ ON p.profile_run_id = pn.profile_run_id
450
+ AND p.table_name = pn.table_name
451
+ AND p.position = pn.position - 1
452
+ WHERE p.profile_run_id = '{PROFILE_RUN_ID}'
453
+ AND p.includes_digit_ct::FLOAT/NULLIF(p.value_ct,0)::FLOAT < 0.05
454
+ AND p.numeric_ct::FLOAT/NULLIF(p.value_ct,0)::FLOAT < 0.05
455
+ AND p.date_ct::FLOAT/NULLIF(p.value_ct,0)::FLOAT < 0.05
456
+ AND pn.functional_data_type = 'State'
457
+ AND p.avg_length BETWEEN 7 AND 12
458
+ AND p.avg_embedded_spaces < 1
459
+ AND p.distinct_value_ct BETWEEN 15 AND 40000 ) c
460
+ WHERE profile_results.id = c.id;
461
+
462
+ -- 7. Assign 'ID-Unique' functional data type to the columns that are identity columns
463
+
464
+ UPDATE profile_results
465
+ SET functional_data_type = 'ID-Unique'
466
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
467
+ AND functional_data_type IN ('ID', 'ID-Secondary')
468
+ AND record_ct = distinct_value_ct
469
+ AND record_ct > 50;
470
+
471
+ -- Update alpha ID's to ID-Secondary and ID-Grouping
472
+
473
+ UPDATE profile_results
474
+ SET functional_data_type = CASE
475
+ WHEN ROUND(100.0 * value_ct::FLOAT/NULLIF(record_ct, 0)) > 70
476
+ AND ROUND(100.0 * distinct_value_ct::FLOAT/NULLIF(value_ct, 0)) >= 75 THEN 'ID-Secondary'
477
+ WHEN ROUND(100.0 * value_ct::FLOAT/NULLIF(record_ct, 0)) > 70
478
+ AND ROUND(100.0 * distinct_value_ct::FLOAT/NULLIF(value_ct, 0)) < 75 THEN 'ID-Group'
479
+ ELSE functional_data_type
480
+ END
481
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
482
+ AND functional_data_type = 'ID';
483
+
484
+ -- 8. Assign 'ID-FK' functional data type to the columns that are foreign keys of the identity columns identified in the previous step
485
+
486
+ UPDATE profile_results
487
+ SET functional_data_type = 'ID-FK'
488
+ FROM (Select table_groups_id, table_name, column_name
489
+ from profile_results
490
+ where functional_data_type = 'ID-Unique'
491
+ and profile_run_id = '{PROFILE_RUN_ID}') ui
492
+ WHERE profile_results.profile_run_id = '{PROFILE_RUN_ID}'
493
+ and profile_results.column_name = ui.column_name
494
+ and profile_results.table_groups_id = ui.table_groups_id
495
+ and profile_results.table_name <> ui.table_name
496
+ and profile_results.functional_data_type <> 'ID-Unique';
497
+
498
+ -- Assign
499
+
500
+ -- 9. Functional Data Type: 'Measurement Pct'
501
+
502
+ UPDATE profile_results
503
+ SET functional_data_type = 'Measurement Pct'
504
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
505
+ AND functional_data_type IN ('Measurement', 'Measurement Discrete', 'UNKNOWN')
506
+ AND general_type = 'N'
507
+ AND min_value >= -200
508
+ AND max_value <= 200
509
+ AND (column_name ILIKE '%pct%' OR column_name ILIKE '%percent%');
510
+
511
+ UPDATE profile_results
512
+ SET functional_data_type = 'Measurement Pct'
513
+ WHERE profile_run_id = '{PROFILE_RUN_ID}'
514
+ AND functional_data_type = 'Code'
515
+ AND distinct_pattern_ct between 1 and 3
516
+ AND value_ct = includes_digit_ct
517
+ AND min_text >= '0'
518
+ AND max_text <= '99'
519
+ AND TRIM(SPLIT_PART(top_patterns, '|', 2)) ~ '^N{1,3}(\.N+)?%$'
520
+ AND (TRIM(SPLIT_PART(top_patterns, '|', 4)) ~ '^N{1,3}(\.N+)?%$' OR distinct_pattern_ct < 2)
521
+ AND (TRIM(SPLIT_PART(top_patterns, '|', 6)) ~ '^N{1,3}(\.N+)?%$' OR distinct_pattern_ct < 3);
522
+
523
+ --- END OF QUERY ---
@@ -0,0 +1,48 @@
1
+ WITH tablesrank AS
2
+ (SELECT DISTINCT p.project_code,
3
+ p.schema_name,
4
+ p.table_name,
5
+ p.run_date,
6
+ p.record_ct,
7
+ p.functional_data_type,
8
+ DENSE_RANK() OVER (PARTITION BY p.schema_name, p.table_name ORDER BY p.run_date DESC) AS rnk
9
+ FROM profile_results p
10
+ INNER JOIN (SELECT DISTINCT schema_name, table_name
11
+ FROM profile_results
12
+ WHERE project_code = '{PROJECT_CODE}'
13
+ AND schema_name = '{DATA_SCHEMA}'
14
+ AND run_date = '{RUN_DATE}') pt
15
+ ON (p.schema_name = pt.schema_name
16
+ AND p.table_name = pt.table_name)
17
+ WHERE p.project_code = '{PROJECT_CODE}'
18
+ AND p.schema_name = '{DATA_SCHEMA}'
19
+ ORDER BY p.schema_name, p.table_name, p.run_date DESC),
20
+ tablescount AS
21
+ (SELECT *
22
+ , LAG(record_ct, 1)
23
+ OVER (PARTITION BY schema_name, table_name ORDER BY schema_name, table_name, run_date) AS prev_record_ct
24
+ , LAG(run_date, 1)
25
+ OVER (PARTITION BY schema_name, table_name ORDER BY schema_name, table_name, run_date) AS prev_run_date
26
+ FROM tablesrank
27
+ ),
28
+ tablestat AS
29
+ (SELECT project_code,
30
+ schema_name,
31
+ table_name,
32
+ CASE
33
+ -- table period is cumulative is the current record count is always greater than the previous record count
34
+ WHEN SUM(CASE WHEN record_ct - prev_record_ct < 0 THEN 1 ELSE 0 END) = 0 THEN 'cumulative'
35
+ ELSE 'window' END AS table_period,
36
+ CASE
37
+ WHEN SUM(CASE WHEN functional_data_type = 'Measurement' THEN 1 ELSE 0 END) > 0
38
+ AND SUM(CASE WHEN functional_data_type ILIKE '%Transactional Date%' THEN 1 ELSE 0 END) > 0
39
+ THEN 'transaction'
40
+ ELSE 'domain' END AS table_type
41
+ FROM tablescount
42
+ GROUP BY project_code, schema_name, table_name
43
+ ORDER BY project_code, schema_name, table_name)
44
+ INSERT INTO stg_functional_table_updates
45
+ (project_code, schema_name, run_date, table_name, table_period, table_type)
46
+ SELECT project_code, schema_name, '{RUN_DATE}' as run_date,
47
+ table_name, table_period, table_type
48
+ FROM tablestat;
@@ -0,0 +1,8 @@
1
+ UPDATE profile_results
2
+ SET functional_table_type = COALESCE(s.table_period)||'-'||COALESCE(s.table_type)
3
+ FROM stg_functional_table_updates s
4
+ WHERE s.project_code = profile_results.project_code
5
+ AND s.schema_name = profile_results.schema_name
6
+ AND s.table_name = profile_results.table_name
7
+ AND s.run_date = profile_results.run_date
8
+ AND s.run_date = '{RUN_DATE}';