dataops-testgen 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. dataops_testgen-2.2.0.dist-info/LICENSE +203 -0
  2. dataops_testgen-2.2.0.dist-info/METADATA +287 -0
  3. dataops_testgen-2.2.0.dist-info/NOTICE +5 -0
  4. dataops_testgen-2.2.0.dist-info/RECORD +270 -0
  5. dataops_testgen-2.2.0.dist-info/WHEEL +5 -0
  6. dataops_testgen-2.2.0.dist-info/entry_points.txt +2 -0
  7. dataops_testgen-2.2.0.dist-info/top_level.txt +1 -0
  8. testgen/__init__.py +0 -0
  9. testgen/__main__.py +770 -0
  10. testgen/commands/__init__.py +0 -0
  11. testgen/commands/queries/__init__.py +0 -0
  12. testgen/commands/queries/execute_cat_tests_query.py +95 -0
  13. testgen/commands/queries/execute_tests_query.py +160 -0
  14. testgen/commands/queries/generate_tests_query.py +94 -0
  15. testgen/commands/queries/profiling_query.py +366 -0
  16. testgen/commands/queries/test_parameter_validation_query.py +88 -0
  17. testgen/commands/run_execute_cat_tests.py +162 -0
  18. testgen/commands/run_execute_tests.py +168 -0
  19. testgen/commands/run_generate_tests.py +107 -0
  20. testgen/commands/run_get_entities.py +122 -0
  21. testgen/commands/run_launch_db_config.py +84 -0
  22. testgen/commands/run_observability_exporter.py +330 -0
  23. testgen/commands/run_profiling_bridge.py +495 -0
  24. testgen/commands/run_quick_start.py +168 -0
  25. testgen/commands/run_setup_profiling_tools.py +96 -0
  26. testgen/commands/run_test_definition.py +146 -0
  27. testgen/commands/run_test_parameter_validation.py +135 -0
  28. testgen/commands/run_upgrade_db_config.py +156 -0
  29. testgen/common/__init__.py +8 -0
  30. testgen/common/clean_sql.py +53 -0
  31. testgen/common/credentials.py +25 -0
  32. testgen/common/database/__init__.py +0 -0
  33. testgen/common/database/database_service.py +629 -0
  34. testgen/common/database/flavor/__init__.py +0 -0
  35. testgen/common/database/flavor/flavor_service.py +75 -0
  36. testgen/common/database/flavor/mssql_flavor_service.py +34 -0
  37. testgen/common/database/flavor/postgresql_flavor_service.py +5 -0
  38. testgen/common/database/flavor/redshift_flavor_service.py +22 -0
  39. testgen/common/database/flavor/snowflake_flavor_service.py +69 -0
  40. testgen/common/database/flavor/trino_flavor_service.py +21 -0
  41. testgen/common/date_service.py +68 -0
  42. testgen/common/display_service.py +85 -0
  43. testgen/common/docker_service.py +76 -0
  44. testgen/common/encrypt.py +55 -0
  45. testgen/common/get_pipeline_parms.py +57 -0
  46. testgen/common/logs.py +79 -0
  47. testgen/common/process_service.py +62 -0
  48. testgen/common/read_file.py +69 -0
  49. testgen/settings.py +440 -0
  50. testgen/template/dbsetup/010_create_base_schema.sql +2 -0
  51. testgen/template/dbsetup/020_create_standard_functions_sprocs.sql +179 -0
  52. testgen/template/dbsetup/030_initialize_new_schema_structure.sql +735 -0
  53. testgen/template/dbsetup/040_populate_new_schema_project.sql +59 -0
  54. testgen/template/dbsetup/050_populate_new_schema_metadata.sql +1517 -0
  55. testgen/template/dbsetup/060_create_standard_views.sql +248 -0
  56. testgen/template/dbsetup/070_create_default_users.sql +17 -0
  57. testgen/template/dbsetup/075_grant_role_rights.sql +43 -0
  58. testgen/template/dbsetup/080_set_current_revision.sql +5 -0
  59. testgen/template/dbupgrade/0100_incremental_upgrade.sql +5 -0
  60. testgen/template/dbupgrade/0101_incremental_upgrade.sql +15 -0
  61. testgen/template/dbupgrade/0102_incremental_upgrade.sql +4 -0
  62. testgen/template/dbupgrade/0103_incremental_upgrade.sql +22 -0
  63. testgen/template/dbupgrade/0104_incremental_upgrade.sql +44 -0
  64. testgen/template/dbupgrade/0105_incremental_upgrade.sql +1 -0
  65. testgen/template/dbupgrade/0106_incremental_upgrade.sql +5 -0
  66. testgen/template/dbupgrade/0107_incremental_upgrade.sql +3 -0
  67. testgen/template/dbupgrade_helpers/get_tg_revision.sql +2 -0
  68. testgen/template/exec_cat_tests/ex_cat_build_agg_table_tests.sql +116 -0
  69. testgen/template/exec_cat_tests/ex_cat_get_distinct_tables.sql +11 -0
  70. testgen/template/exec_cat_tests/ex_cat_results_parse.sql +69 -0
  71. testgen/template/exec_cat_tests/ex_cat_retrieve_agg_test_parms.sql +6 -0
  72. testgen/template/exec_cat_tests/ex_cat_test_query.sql +8 -0
  73. testgen/template/execution/ex_finalize_test_run_results.sql +37 -0
  74. testgen/template/execution/ex_get_tests_non_cat.sql +47 -0
  75. testgen/template/execution/ex_update_test_record_in_testrun_table.sql +27 -0
  76. testgen/template/execution/ex_write_test_record_to_testrun_table.sql +6 -0
  77. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_no_drops_generic.sql +48 -0
  78. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_num_incr_generic.sql +34 -0
  79. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_above_generic.sql +49 -0
  80. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_within_generic.sql +49 -0
  81. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql +49 -0
  82. testgen/template/flavors/generic/exec_query_tests/ex_custom_query_generic.sql +39 -0
  83. testgen/template/flavors/generic/exec_query_tests/ex_data_match_2way_generic.sql +58 -0
  84. testgen/template/flavors/generic/exec_query_tests/ex_data_match_generic.sql +44 -0
  85. testgen/template/flavors/generic/exec_query_tests/ex_prior_match_generic.sql +37 -0
  86. testgen/template/flavors/generic/exec_query_tests/ex_relative_entropy_generic.sql +53 -0
  87. testgen/template/flavors/generic/exec_query_tests/ex_window_match_no_drops_generic.sql +46 -0
  88. testgen/template/flavors/generic/exec_query_tests/ex_window_match_same_generic.sql +59 -0
  89. testgen/template/flavors/generic/profiling/contingency_counts.sql +3 -0
  90. testgen/template/flavors/generic/validate_tests/ex_get_project_column_list_generic.sql +3 -0
  91. testgen/template/flavors/mssql/exec_query_tests/ex_relative_entropy_mssql.sql +53 -0
  92. testgen/template/flavors/mssql/profiling/project_ddf_query_mssql.sql +35 -0
  93. testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml +246 -0
  94. testgen/template/flavors/mssql/profiling/project_secondary_profiling_query_mssql.sql +36 -0
  95. testgen/template/flavors/mssql/setup_profiling_tools/00_drop_existing_functions_mssql.sql +8 -0
  96. testgen/template/flavors/mssql/setup_profiling_tools/01_create_functions_mssql.sql +12 -0
  97. testgen/template/flavors/mssql/setup_profiling_tools/02_create_functions_mssql.sql +54 -0
  98. testgen/template/flavors/mssql/setup_profiling_tools/create_qc_schema_mssql.sql +4 -0
  99. testgen/template/flavors/mssql/setup_profiling_tools/grant_execute_privileges_mssql.sql +1 -0
  100. testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_no_drops_postgresql.sql +46 -0
  101. testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_same_postgresql.sql +59 -0
  102. testgen/template/flavors/postgresql/profiling/project_ddf_query_postgresql.sql +42 -0
  103. testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml +225 -0
  104. testgen/template/flavors/postgresql/profiling/project_secondary_profiling_query_postgresql.sql +28 -0
  105. testgen/template/flavors/postgresql/setup_profiling_tools/create_functions_postgresql.sql +157 -0
  106. testgen/template/flavors/postgresql/setup_profiling_tools/create_qc_schema_postgresql.sql +1 -0
  107. testgen/template/flavors/postgresql/setup_profiling_tools/grant_execute_privileges_postgresql.sql +2 -0
  108. testgen/template/flavors/redshift/profiling/project_ddf_query_redshift.sql +38 -0
  109. testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml +221 -0
  110. testgen/template/flavors/redshift/profiling/project_secondary_profiling_query_redshift.sql +29 -0
  111. testgen/template/flavors/redshift/setup_profiling_tools/create_functions_redshift.sql +115 -0
  112. testgen/template/flavors/redshift/setup_profiling_tools/create_qc_schema_redshift.sql +1 -0
  113. testgen/template/flavors/redshift/setup_profiling_tools/grant_execute_privileges_redshift.sql +2 -0
  114. testgen/template/flavors/snowflake/profiling/project_ddf_query_snowflake.sql +38 -0
  115. testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml +220 -0
  116. testgen/template/flavors/snowflake/profiling/project_secondary_profiling_query_snowflake.sql +29 -0
  117. testgen/template/flavors/snowflake/setup_profiling_tools/create_functions_snowflake.sql +69 -0
  118. testgen/template/flavors/snowflake/setup_profiling_tools/create_qc_schema_snowflake.sql +1 -0
  119. testgen/template/flavors/snowflake/setup_profiling_tools/grant_execute_privileges_snowflake.sql +6 -0
  120. testgen/template/flavors/trino/profiling/project_profiling_query_trino.yaml +219 -0
  121. testgen/template/flavors/trino/setup_profiling_tools/create_functions_trino.sql +92 -0
  122. testgen/template/flavors/trino/setup_profiling_tools/create_qc_schema_trino.sql +1 -0
  123. testgen/template/gen_funny_cat_tests/gen_test_constant.sql +104 -0
  124. testgen/template/gen_funny_cat_tests/gen_test_distinct_value_ct.sql +98 -0
  125. testgen/template/gen_funny_cat_tests/gen_test_row_ct.sql +57 -0
  126. testgen/template/gen_funny_cat_tests/gen_test_row_ct_pct.sql +59 -0
  127. testgen/template/generation/gen_delete_old_tests.sql +5 -0
  128. testgen/template/generation/gen_insert_test_suite.sql +5 -0
  129. testgen/template/generation/gen_retrieve_or_insert_test_suite.sql +58 -0
  130. testgen/template/generation/gen_standard_test_type_list.sql +13 -0
  131. testgen/template/generation/gen_standard_tests.sql +48 -0
  132. testgen/template/get_entities/get_connection.sql +21 -0
  133. testgen/template/get_entities/get_connections_list.sql +9 -0
  134. testgen/template/get_entities/get_latest.sql +4 -0
  135. testgen/template/get_entities/get_profile.sql +12 -0
  136. testgen/template/get_entities/get_profile_info.sql +17 -0
  137. testgen/template/get_entities/get_profile_list.sql +17 -0
  138. testgen/template/get_entities/get_profile_screen.sql +275 -0
  139. testgen/template/get_entities/get_project_list.sql +6 -0
  140. testgen/template/get_entities/get_table_group_list.sql +10 -0
  141. testgen/template/get_entities/get_test_generation_list.sql +18 -0
  142. testgen/template/get_entities/get_test_info.sql +41 -0
  143. testgen/template/get_entities/get_test_results_for_run_cli.sql +16 -0
  144. testgen/template/get_entities/get_test_run_list.sql +24 -0
  145. testgen/template/get_entities/get_test_suite.sql +13 -0
  146. testgen/template/get_entities/get_test_suite_list.sql +18 -0
  147. testgen/template/get_entities/list_test_types.sql +4 -0
  148. testgen/template/observability/get_event_data.sql +23 -0
  149. testgen/template/observability/get_test_results.sql +41 -0
  150. testgen/template/observability/update_test_results_exported_to_observability.sql +12 -0
  151. testgen/template/parms/parms_profiling.sql +34 -0
  152. testgen/template/parms/parms_test_execution.sql +13 -0
  153. testgen/template/parms/parms_test_gen.sql +23 -0
  154. testgen/template/profiling/contingency_columns.sql +7 -0
  155. testgen/template/profiling/datatype_suggestions.sql +56 -0
  156. testgen/template/profiling/functional_datatype.sql +523 -0
  157. testgen/template/profiling/functional_tabletype_stage.sql +48 -0
  158. testgen/template/profiling/functional_tabletype_update.sql +8 -0
  159. testgen/template/profiling/pii_flag.sql +133 -0
  160. testgen/template/profiling/profile_anomalies_screen_column.sql +22 -0
  161. testgen/template/profiling/profile_anomalies_screen_multi_column.sql +58 -0
  162. testgen/template/profiling/profile_anomalies_screen_table.sql +22 -0
  163. testgen/template/profiling/profile_anomalies_screen_table_dates.sql +30 -0
  164. testgen/template/profiling/profile_anomalies_screen_variants.sql +40 -0
  165. testgen/template/profiling/profile_anomaly_types_get.sql +3 -0
  166. testgen/template/profiling/project_get_table_sample_count.sql +22 -0
  167. testgen/template/profiling/project_profile_run_record_insert.sql +8 -0
  168. testgen/template/profiling/project_profile_run_record_update.sql +5 -0
  169. testgen/template/profiling/project_profile_run_record_update_status.sql +5 -0
  170. testgen/template/profiling/project_update_profile_results_to_estimates.sql +32 -0
  171. testgen/template/profiling/refresh_anomalies.sql +33 -0
  172. testgen/template/profiling/refresh_data_chars_from_profiling.sql +156 -0
  173. testgen/template/profiling/secondary_profiling_columns.sql +12 -0
  174. testgen/template/profiling/secondary_profiling_delete.sql +4 -0
  175. testgen/template/profiling/secondary_profiling_update.sql +18 -0
  176. testgen/template/quick_start/populate_target_data.sql +1077 -0
  177. testgen/template/quick_start/recreate_target_data_schema.sql +167 -0
  178. testgen/template/quick_start/update_target_data.sql +100 -0
  179. testgen/template/updates/create_tmp_test_definition.sql +19 -0
  180. testgen/template/updates/get_test_def_parms.sql +38 -0
  181. testgen/template/updates/populate_stg_test_definitions.sql +184 -0
  182. testgen/template/validate_tests/ex_disable_tests_test_definitions.sql +5 -0
  183. testgen/template/validate_tests/ex_flag_tests_test_definitions.sql +64 -0
  184. testgen/template/validate_tests/ex_get_project_column_list_generic.sql +3 -0
  185. testgen/template/validate_tests/ex_get_test_column_list_tg.sql +65 -0
  186. testgen/template/validate_tests/ex_write_test_val_errors.sql +22 -0
  187. testgen/ui/__init__.py +0 -0
  188. testgen/ui/app.py +98 -0
  189. testgen/ui/assets/dk_logo.svg +46 -0
  190. testgen/ui/assets/question_mark.png +0 -0
  191. testgen/ui/assets/scripts.js +68 -0
  192. testgen/ui/assets/style.css +140 -0
  193. testgen/ui/bootstrap.py +109 -0
  194. testgen/ui/components/__init__.py +0 -0
  195. testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fBBc4.woff2 +0 -0
  196. testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fChc4EsA.woff2 +0 -0
  197. testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu4mxK.woff2 +0 -0
  198. testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu7GxKOzY.woff2 +0 -0
  199. testgen/ui/components/frontend/css/material-symbols-rounded.css +24 -0
  200. testgen/ui/components/frontend/css/material-symbols-rounded.woff2 +0 -0
  201. testgen/ui/components/frontend/css/roboto-font-faces.css +35 -0
  202. testgen/ui/components/frontend/css/shared.css +36 -0
  203. testgen/ui/components/frontend/img/dk_logo.svg +46 -0
  204. testgen/ui/components/frontend/index.html +17 -0
  205. testgen/ui/components/frontend/js/components/breadcrumbs.js +86 -0
  206. testgen/ui/components/frontend/js/components/button.js +66 -0
  207. testgen/ui/components/frontend/js/components/location.js +62 -0
  208. testgen/ui/components/frontend/js/components/select.js +75 -0
  209. testgen/ui/components/frontend/js/components/sidebar.js +358 -0
  210. testgen/ui/components/frontend/js/main.js +99 -0
  211. testgen/ui/components/frontend/js/streamlit.js +19 -0
  212. testgen/ui/components/frontend/js/van.min.js +1 -0
  213. testgen/ui/components/utils/__init__.py +0 -0
  214. testgen/ui/components/utils/callbacks.py +51 -0
  215. testgen/ui/components/utils/component.py +13 -0
  216. testgen/ui/components/widgets/__init__.py +6 -0
  217. testgen/ui/components/widgets/breadcrumbs.py +32 -0
  218. testgen/ui/components/widgets/location.py +65 -0
  219. testgen/ui/components/widgets/modal.py +97 -0
  220. testgen/ui/components/widgets/sidebar.py +69 -0
  221. testgen/ui/navigation/__init__.py +0 -0
  222. testgen/ui/navigation/menu.py +42 -0
  223. testgen/ui/navigation/page.py +20 -0
  224. testgen/ui/navigation/router.py +63 -0
  225. testgen/ui/queries/__init__.py +0 -0
  226. testgen/ui/queries/authentication_queries.py +47 -0
  227. testgen/ui/queries/connection_queries.py +121 -0
  228. testgen/ui/queries/profiling_queries.py +148 -0
  229. testgen/ui/queries/project_queries.py +9 -0
  230. testgen/ui/queries/table_group_queries.py +186 -0
  231. testgen/ui/queries/test_definition_queries.py +270 -0
  232. testgen/ui/queries/test_run_queries.py +32 -0
  233. testgen/ui/queries/test_suite_queries.py +145 -0
  234. testgen/ui/scripts/__init__.py +0 -0
  235. testgen/ui/scripts/patch_streamlit.py +111 -0
  236. testgen/ui/services/__init__.py +0 -0
  237. testgen/ui/services/authentication_service.py +119 -0
  238. testgen/ui/services/connection_service.py +220 -0
  239. testgen/ui/services/database_service.py +282 -0
  240. testgen/ui/services/form_service.py +1008 -0
  241. testgen/ui/services/javascript_service.py +44 -0
  242. testgen/ui/services/query_service.py +316 -0
  243. testgen/ui/services/string_service.py +12 -0
  244. testgen/ui/services/table_group_service.py +130 -0
  245. testgen/ui/services/test_definition_service.py +117 -0
  246. testgen/ui/services/test_run_service.py +13 -0
  247. testgen/ui/services/test_suite_service.py +76 -0
  248. testgen/ui/services/toolbar_service.py +77 -0
  249. testgen/ui/session.py +46 -0
  250. testgen/ui/views/__init__.py +0 -0
  251. testgen/ui/views/app_log_modal.py +92 -0
  252. testgen/ui/views/connections.py +72 -0
  253. testgen/ui/views/connections_base.py +367 -0
  254. testgen/ui/views/login.py +40 -0
  255. testgen/ui/views/not_found.py +16 -0
  256. testgen/ui/views/overview.py +34 -0
  257. testgen/ui/views/profiling_anomalies.py +501 -0
  258. testgen/ui/views/profiling_details.py +335 -0
  259. testgen/ui/views/profiling_modal.py +40 -0
  260. testgen/ui/views/profiling_results.py +206 -0
  261. testgen/ui/views/profiling_summary.py +177 -0
  262. testgen/ui/views/project_settings.py +74 -0
  263. testgen/ui/views/table_groups.py +530 -0
  264. testgen/ui/views/test_definitions.py +1020 -0
  265. testgen/ui/views/test_results.py +908 -0
  266. testgen/ui/views/test_runs.py +195 -0
  267. testgen/ui/views/test_suites.py +545 -0
  268. testgen/utils/__init__.py +0 -0
  269. testgen/utils/plugins.py +17 -0
  270. testgen/utils/singleton.py +14 -0
@@ -0,0 +1,29 @@
1
+ -- Get Freqs for selected columns
2
+ WITH ranked_vals AS (
3
+ SELECT "{COL_NAME}",
4
+ COUNT(*) AS ct,
5
+ ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC, "{COL_NAME}") AS rn
6
+ FROM {DATA_SCHEMA}.{DATA_TABLE}
7
+ WHERE "{COL_NAME}" > ' '
8
+ GROUP BY "{COL_NAME}"
9
+ ),
10
+ consol_vals AS (
11
+ SELECT COALESCE(CASE WHEN rn <= 10 THEN '| ' || "{COL_NAME}" || ' | ' || CAST(ct AS VARCHAR)
12
+ ELSE NULL
13
+ END, '| Other Values (' || CAST(COUNT(DISTINCT "{COL_NAME}") as VARCHAR) || ') | ' || CAST(SUM(ct) as VARCHAR) ) AS val,
14
+ MIN(rn) as min_rn
15
+ FROM ranked_vals
16
+ GROUP BY CASE WHEN rn <= 10 THEN '| ' || "{COL_NAME}" || ' | ' || CAST(ct AS VARCHAR)
17
+ ELSE NULL
18
+ END
19
+ )
20
+ SELECT '{PROJECT_CODE}' as project_code,
21
+ '{DATA_SCHEMA}' as schema_name,
22
+ '{RUN_DATE}' as run_date,
23
+ '{DATA_TABLE}' as table_name,
24
+ '{COL_NAME}' as column_name,
25
+ REPLACE(LISTAGG(val, '^#^') WITHIN GROUP (ORDER BY min_rn), '^#^', CHR(10)) AS top_freq_values,
26
+ ( SELECT MD5(LISTAGG(DISTINCT NULLIF("{COL_NAME}", ''), '|')
27
+ WITHIN GROUP (ORDER BY NULLIF("{COL_NAME}", ''))) as dvh
28
+ FROM {DATA_SCHEMA}.{DATA_TABLE} ) as distinct_value_hash
29
+ FROM consol_vals;
@@ -0,0 +1,69 @@
1
+ CREATE OR REPLACE FUNCTION {DATA_QC_SCHEMA}.fndk_isnum(strparm VARCHAR)
2
+ RETURNS INTEGER
3
+ LANGUAGE SQL
4
+ IMMUTABLE
5
+ AS
6
+ $$
7
+ SELECT CASE
8
+ WHEN REGEXP_LIKE(strparm::VARCHAR, '^\\s*[+-]?\\$?\\s*[0-9]+(,[0-9]{3})*(\\.[0-9]*)?[\\%]?\\s*$') THEN 1
9
+ ELSE 0
10
+ END
11
+ $$;
12
+
13
+
14
+ CREATE OR REPLACE FUNCTION {DATA_QC_SCHEMA}.fndk_isdate(strparm VARCHAR)
15
+ RETURNS INTEGER
16
+ LANGUAGE SQL
17
+ IMMUTABLE
18
+ AS
19
+ $$
20
+ SELECT CASE
21
+ -- YYYY-MM-DD HH:MM:SS SSSSSS
22
+ WHEN TRY_TO_DATE(strparm, 'YYYY-MM-DD HH:MI:SS SSSSSS') IS NOT NULL THEN 1
23
+
24
+ -- YYYY-MM-DD HH:MM:SS
25
+ WHEN TRY_TO_DATE(strparm, 'YYYY-MM-DD HH:MI:SS') IS NOT NULL THEN 1
26
+
27
+ -- YYYYMMDDHHMMSSSSSS
28
+ WHEN TRY_TO_DATE(strparm, 'YYYYMMDDHHMISSSSSS') IS NOT NULL THEN 1
29
+
30
+ -- YYYYMMDDHHMMSS
31
+ WHEN TRY_TO_DATE(strparm, 'YYYYMMDDHHMISS') IS NOT NULL THEN 1
32
+
33
+ -- YYYYMMDD
34
+ WHEN LENGTH(strparm) = 8 AND TRY_TO_DATE(strparm, 'YYYYMMDD') IS NOT NULL THEN 1
35
+
36
+ -- YYYY-MON-DD HH:MM:SS SSSSSS
37
+ --WHEN TRY_TO_DATE(strparm, 'YYYY-MON-DD HH:MI:SS SSSSSS') IS NOT NULL THEN 1
38
+
39
+ -- YYYY-MON-DD HH:MM:SS
40
+ --WHEN TRY_TO_DATE(strparm, 'YYYY-MON-DD HH:MI:SS') IS NOT NULL THEN 1
41
+
42
+ -- Exclude anything else long
43
+ WHEN LENGTH(strparm) > 11 THEN 0
44
+
45
+ -- YYYY-MON-DD
46
+ WHEN TRY_TO_DATE(strparm, 'YYYY-MON-DD') IS NOT NULL THEN 1
47
+
48
+ -- YYYY-MM-DD
49
+ WHEN TRY_TO_DATE(strparm, 'YYYY-MM-DD') IS NOT NULL THEN 1
50
+
51
+ -- MM/DD/YYYY
52
+ WHEN TRY_TO_DATE(strparm, 'MM/DD/YYYY') IS NOT NULL THEN 1
53
+
54
+ -- MM/DD/YY
55
+ WHEN TRY_TO_DATE(strparm, 'MM/DD/YY') IS NOT NULL THEN 1
56
+
57
+ --MM-DD-YYYY
58
+ WHEN TRY_TO_DATE(strparm, 'MM-DD-YYYY') IS NOT NULL THEN 1
59
+
60
+ --MM-DD-YY
61
+ WHEN TRY_TO_DATE(strparm, 'MM-DD-YY') IS NOT NULL THEN 1
62
+
63
+ --DD-MMM-YYYY
64
+ WHEN TRY_TO_DATE(strparm, 'DD-MON-YYYY') IS NOT NULL THEN 1
65
+
66
+
67
+ ELSE 0
68
+ END
69
+ $$;
@@ -0,0 +1 @@
1
+ CREATE SCHEMA IF NOT exists {DATA_QC_SCHEMA};
@@ -0,0 +1,6 @@
1
+
2
+ CREATE ROLE IF NOT EXISTS dk_qc_role;
3
+ GRANT ALL PRIVILEGES ON SCHEMA {DATA_QC_SCHEMA} TO ROLE dk_qc_role;
4
+ GRANT USAGE ON FUNCTION {DATA_QC_SCHEMA}.fndk_isnum(VARCHAR) TO dk_qc_role;
5
+ GRANT USAGE ON FUNCTION {DATA_QC_SCHEMA}.fndk_isdate(VARCHAR) TO dk_qc_role;
6
+ GRANT ROLE dk_qc_role TO USER {DB_USER};
@@ -0,0 +1,219 @@
1
+ ---
2
+ strTemplate01_sampling: "SELECT "
3
+ strTemplate01_else: "SELECT "
4
+ strTemplate02_all: |
5
+ {CONNECTION_ID} as connection_id,
6
+ '{PROJECT_CODE}' as project_code,
7
+ '{TABLE_GROUPS_ID}' as table_groups_id,
8
+ '{DATA_SCHEMA}' AS schema_name,
9
+ '{RUN_DATE}' AS run_date,
10
+ '{DATA_TABLE}' AS table_name,
11
+ {COL_POS} AS position,
12
+ '{COL_NAME_SANITIZED}' AS column_name,
13
+ '{COL_TYPE}' AS column_type,
14
+ '{COL_GEN_TYPE}' AS general_type,
15
+ COUNT(*) AS record_ct,
16
+ COUNT("{COL_NAME}") AS value_ct,
17
+ COUNT(DISTINCT "{COL_NAME}") AS distinct_value_ct,
18
+ SUM(NVL2("{COL_NAME}", 0, 1)) AS null_value_ct,
19
+ strTemplate03_ADN: MIN(LENGTH("{COL_NAME}")) AS min_length,
20
+ MAX(LENGTH("{COL_NAME}")) AS max_length,
21
+ AVG(CAST(NULLIF(LENGTH("{COL_NAME}"), 0) AS REAL)) AS avg_length,
22
+ strTemplate03_else: NULL as min_length,
23
+ NULL as max_length,
24
+ NULL as avg_length,
25
+ strTemplate04_A: SUM(CASE
26
+ WHEN REGEXP_LIKE(TRIM("{COL_NAME}") , '^0(\.0*)?$') = TRUE THEN 1 ELSE 0
27
+ END) AS zero_value_ct,
28
+ strTemplate04_N: CAST(SUM( 1 - ABS(SIGN("{COL_NAME}")) ) AS BIGINT) AS zero_value_ct,
29
+ strTemplate04_else: NULL as zero_value_ct,
30
+ strTemplate05_A: COUNT(DISTINCT UPPER(TRANSLATE("{COL_NAME}", ' '',.-', ''))) as distinct_std_value_ct,
31
+ SUM(CASE
32
+ WHEN "{COL_NAME}" = '' THEN 1
33
+ ELSE 0
34
+ END) AS zero_length_ct,
35
+ SUM( CASE
36
+ WHEN "{COL_NAME}" BETWEEN ' !' AND '!' THEN 1
37
+ ELSE 0
38
+ END ) AS lead_space_ct,
39
+ SUM( CASE WHEN "{COL_NAME}" LIKE '"%"' OR "{COL_NAME}" LIKE '''%''' THEN 1 ELSE 0 END ) as quoted_value_ct,
40
+ SUM( CASE WHEN REGEXP_LIKE("{COL_NAME}" , '[0-9]') = TRUE THEN 1 ELSE 0 END ) as includes_digit_ct,
41
+ SUM( CASE
42
+ WHEN "{COL_NAME}" IN ('.', '?') THEN 1
43
+ WHEN REGEXP_LIKE(LOWER("{COL_NAME}") ,'(^.{2,}|-{2,}|0{2,}|9{2,}|x{2,}|z{2,}$)') = TRUE THEN 1
44
+ WHEN LOWER("{COL_NAME}") IN ('blank','error','missing','tbd',
45
+ 'n/a','#na','none','null','unknown') THEN 1
46
+ WHEN LOWER("{COL_NAME}") IN ('(blank)','(error)','(missing)','(tbd)',
47
+ '(n/a)','(#na)','(none)','(null)','(unknown)') THEN 1
48
+ WHEN LOWER("{COL_NAME}") IN ('[blank]','[error]','[missing]','[tbd]',
49
+ '[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1
50
+ ELSE 0
51
+ END ) AS filled_value_ct,
52
+ SUBSTRING(MIN(NULLIF("{COL_NAME}", '')), 1, 100) AS min_text,
53
+ SUBSTRING(MAX(NULLIF("{COL_NAME}", '')), 1, 100) AS max_text,
54
+ SUM(fndk_isnum(SUBSTRING("{COL_NAME}", 1, 31))) AS numeric_ct,
55
+ SUM(fndk_isdate(SUBSTRING("{COL_NAME}", 1, 26))) AS date_ct,
56
+ CASE
57
+ WHEN CAST(SUM( CASE WHEN REGEXP_LIKE("{COL_NAME}" , '^[0-9]{1,5}[a-zA-Z]?\\s\\w{1,5}\\.?\\s?\\w*\\s?\\w*\\s[a-zA-Z]{1,6}\\.?\\s?[0-9]{0,5}[A-Z]{0,1}$') = TRUE
58
+ THEN 1 END) AS REAL)/CAST(COUNT("{COL_NAME}") AS REAL) > 0.8 THEN 'STREET_ADDR'
59
+ WHEN CAST(SUM(CASE WHEN "{COL_NAME}" IN ('AL','AK','AS','AZ','AR','CA','CO','CT','DE','DC','FM','FL','GA','GU','HI','ID','IL','IN','IA','KS','KY','LA','ME','MH','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','MP','OH','OK','OR','PW','PA','PR','RI','SC','SD','TN','TX','UT','VT','VI','VA','WA','WV','WI','WY','AE','AP','AA')
60
+ THEN 1 END) AS REAL)/CAST(COUNT("{COL_NAME}") AS REAL) > 0.9 THEN 'STATE_USA'
61
+ WHEN CAST(SUM( CASE WHEN REGEXP_LIKE("{COL_NAME}" , '^([\\+]1 |1-|)[\\+]?[(]?[0-9]{3}[)][ ]?[-\\s\\.]?[0-9]{3}[-\\s\.]?[0-9]{4,6}$') = TRUE
62
+ OR REGEXP_LIKE("{COL_NAME}" , '^([\\+]1 |1-|)[2-9][01][0-9][-| ]?[0-9]{3}[-| ]?[0-9]{4}$') = TRUE
63
+ THEN 1 END) AS REAL)/CAST(COUNT("{COL_NAME}") AS REAL) > 0.9 THEN 'PHONE_USA'
64
+ WHEN CAST(SUM( CASE WHEN REGEXP_LIKE("{COL_NAME}" , '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}$') = TRUE
65
+ THEN 1 END) AS REAL)/CAST(COUNT("{COL_NAME}") AS REAL) > 0.9 THEN 'EMAIL'
66
+ WHEN CAST(SUM( CASE WHEN TRANSLATE("{COL_NAME}",'012345678','999999999') IN ('99999', '999999999', '99999-9999')
67
+ THEN 1 END) AS REAL)/CAST(COUNT("{COL_NAME}") AS REAL) > 0.9 THEN 'ZIP_USA'
68
+ WHEN CAST(SUM( CASE WHEN REGEXP_LIKE("{COL_NAME}" , '^[\\w\\s\-]+\\.(txt|csv|tsv|dat|doc|pdf|xlsx)$') = TRUE
69
+ THEN 1 END) AS REAL)/CAST(COUNT("{COL_NAME}") AS REAL) > 0.9 THEN 'FILE_NAME'
70
+ WHEN CAST(SUM( CASE WHEN REGEXP_LIKE("{COL_NAME}" , '^([0-9]{4}[- ]){3}[0-9]{4}$') = TRUE
71
+ THEN 1 END) AS REAL)/CAST(COUNT("{COL_NAME}") AS REAL) > 0.8 THEN 'CREDIT_CARD'
72
+ WHEN CAST(SUM( CASE WHEN REGEXP_LIKE("{COL_NAME}" , '^([^,|\t]{1,20}[,|\t]){2,}[^,|\t]{0,20}([,|\t]{0,1}[^,|\t]{0,20})*$') = TRUE
73
+ AND REGEXP_LIKE("{COL_NAME}" , '\\s(and|but|or|yet)\\s') != TRUE
74
+ THEN 1 END) AS REAL)/CAST(COUNT("{COL_NAME}") AS REAL) > 0.8 THEN 'DELIMITED_DATA'
75
+ WHEN CAST(SUM ( CASE WHEN REGEXP_LIKE("{COL_NAME}" , '^[0-8][0-9]{2}-[0-9]{2}-[0-9]{4}$') = TRUE
76
+ AND SUBSTRING("{COL_NAME}", 1, 3) NOT BETWEEN '734' AND '749'
77
+ AND SUBSTRING("{COL_NAME}", 1, 3) <> '666' THEN 1 END) AS REAL)/CAST(COUNT("{COL_NAME}") AS REAL) > 0.9 THEN 'SSN'
78
+ END as std_pattern_match,
79
+ strTemplate05_else: NULL as distinct_std_value_ct,
80
+ NULL as zero_length_ct,
81
+ NULL as lead_space_ct,
82
+ NULL as quoted_value_ct,
83
+ NULL as includes_digit_ct,
84
+ NULL as filled_value_ct,
85
+ NULL as min_text,
86
+ NULL as max_text,
87
+ NULL as numeric_ct,
88
+ NULL as date_ct,
89
+ NULL as std_pattern_match,
90
+ strTemplate06_A_patterns: (SELECT SUBSTRING(LISTAGG(pattern, ' | ') WITHIN GROUP (ORDER BY ct DESC), 1, 1000) AS concat_pats
91
+ FROM ( SELECT CAST(COUNT(*) AS VARCHAR(10)) || ' | ' || pattern AS pattern,
92
+ COUNT(*) AS ct
93
+ FROM ( SELECT REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE(
94
+ "{COL_NAME}", '[a-z]', 'a'),
95
+ '[A-Z]', 'A'),
96
+ '[0-9]', 'N') AS pattern
97
+ FROM {DATA_SCHEMA}.{DATA_TABLE}
98
+ WHERE "{COL_NAME}" > ' ' AND (SELECT MAX(LENGTH("{COL_NAME}"))
99
+ FROM {DATA_SCHEMA}.{DATA_TABLE}) BETWEEN 3 and {PARM_MAX_PATTERN_LENGTH}) p
100
+ GROUP BY pattern
101
+ HAVING pattern > ' '
102
+ ORDER BY COUNT(*) DESC LIMIT 5) as ps) AS top_patterns,
103
+ strTemplate06_else: NULL as top_patterns,
104
+ strTemplate07_A_freq: ( SELECT SUBSTRING(LISTAGG(val, ' | ') WITHIN GROUP (ORDER BY ct DESC), 1, 1000) as concat_vals
105
+ FROM (
106
+ SELECT CAST(COUNT(*) as VARCHAR(10)) || ' | ' || "{COL_NAME}" as val, COUNT(*) as ct
107
+ FROM {DATA_SCHEMA}.{DATA_TABLE}
108
+ WHERE "{COL_NAME}" > ' '
109
+ GROUP BY "{COL_NAME}"
110
+ HAVING "{COL_NAME}" > ' '
111
+ ORDER BY COUNT(*), "{COL_NAME}" DESC
112
+ LIMIT 10
113
+ ) ps
114
+ ) AS top_freq_values,
115
+ strTemplate07_else: NULL as top_freq_values,
116
+ strTemplate08_N: MIN("{COL_NAME}") AS min_value,
117
+ MIN(CASE WHEN "{COL_NAME}" > 0 THEN "{COL_NAME}" ELSE NULL END) AS min_value_over_0,
118
+ MAX("{COL_NAME}") AS max_value,
119
+ AVG(CAST("{COL_NAME}" AS REAL)) AS avg_value,
120
+ STDDEV(CAST("{COL_NAME}" AS REAL)) AS stdev_value,
121
+ MIN(pct_25) as percentile_25,
122
+ MIN(pct_50) as percentile_50,
123
+ MIN(pct_75) as percentile_75,
124
+ strTemplate08_else: NULL as min_value,
125
+ NULL as min_value_over_0,
126
+ NULL as max_value,
127
+ NULL as avg_value,
128
+ NULL as stdev_value,
129
+ NULL as percentile_25,
130
+ NULL as percentile_50,
131
+ NULL as percentile_75,
132
+ strTemplate10_N_dec: SUM(ROUND(MOD("{COL_NAME}", 1), 5)) as fractional_sum,
133
+ strTemplate10_else: NULL as fractional_sum,
134
+ strTemplate11_D: CASE
135
+ WHEN MIN("{COL_NAME}") IS NULL THEN NULL
136
+ ELSE GREATEST(MIN("{COL_NAME}"), '0001-01-01')
137
+ END as min_date,
138
+ MAX("{COL_NAME}") as max_date,
139
+ SUM(CASE
140
+ WHEN DATE_DIFF('MONTH', TIMESTAMP "{COL_NAME}", TIMESTAMP '{RUN_DATE}') > 12 THEN 1
141
+ ELSE 0
142
+ END) AS before_1yr_date_ct,
143
+ SUM(CASE
144
+ WHEN DATE_DIFF('MONTH', TIMESTAMP "{COL_NAME}", TIMESTAMP '{RUN_DATE}') > 60 THEN 1
145
+ ELSE 0
146
+ END) AS before_5yr_date_ct,
147
+ SUM(CASE
148
+ WHEN DATE_DIFF('MONTH', TIMESTAMP "{COL_NAME}", TIMESTAMP '{RUN_DATE}') > 240 THEN 1
149
+ ELSE 0
150
+ END) AS before_20yr_date_ct,
151
+ SUM(CASE
152
+ WHEN DATE_DIFF('DAY', TIMESTAMP "{COL_NAME}", TIMESTAMP '{RUN_DATE}') BETWEEN 0 AND 365 THEN 1
153
+ ELSE 0
154
+ END) AS within_1yr_date_ct,
155
+ SUM(CASE
156
+ WHEN DATE_DIFF('DAY', TIMESTAMP "{COL_NAME}", TIMESTAMP '{RUN_DATE}') BETWEEN 0 AND 30 THEN 1
157
+ ELSE 0
158
+ END) AS within_1mo_date_ct,
159
+ SUM(CASE
160
+ WHEN "{COL_NAME}" > '{RUN_DATE}' THEN 1 ELSE 0
161
+ END) AS future_date_ct,
162
+ COUNT(DISTINCT DATE_DIFF('day', TIMESTAMP "{COL_NAME}", TIMESTAMP '{RUN_DATE}' ) ) as date_days_present,
163
+ COUNT(DISTINCT DATE_DIFF('week', TIMESTAMP "{COL_NAME}", TIMESTAMP '{RUN_DATE}' ) ) as date_weeks_present,
164
+ COUNT(DISTINCT DATE_DIFF('month', TIMESTAMP "{COL_NAME}", TIMESTAMP '{RUN_DATE}' ) ) as date_months_present,
165
+
166
+ strTemplate11_else: NULL as min_date,
167
+ NULL as max_date,
168
+ NULL as before_1yr_date_ct,
169
+ NULL as before_5yr_date_ct,
170
+ NULL as before_20yr_date_ct,
171
+ NULL as within_1yr_date_ct,
172
+ NULL as within_1mo_date_ct,
173
+ NULL as future_date_ct,
174
+ NULL as date_days_present,
175
+ NULL as date_weeks_present,
176
+ NULL as date_months_present,
177
+
178
+ strTemplate12_B: SUM(CAST("{COL_NAME}" AS INTEGER)) AS boolean_true_ct,
179
+
180
+ strTemplate12_else: NULL as boolean_true_ct,
181
+
182
+ strTemplate13_ALL: NULL AS datatype_suggestion,
183
+ strTemplate14_A_do_patterns: ( SELECT COUNT(DISTINCT REGEXP_REPLACE( REGEXP_REPLACE( REGEXP_REPLACE(
184
+ "{COL_NAME}", '[a-z]', 'a'),
185
+ '[A-Z]', 'A'),
186
+ '[0-9]', 'N')
187
+ ) AS pattern_ct
188
+ FROM {DATA_SCHEMA}.{DATA_TABLE}
189
+ WHERE "{COL_NAME}" > ' ' ) AS distinct_pattern_ct,
190
+ SUM(CAST(SIGN(REGEXP_COUNT(TRIM("{COL_NAME}"), ' ')) AS BIGINT)) AS embedded_space_ct,
191
+ AVG(CAST(REGEXP_COUNT(TRIM("{COL_NAME}"), ' ') AS REAL)) AS avg_embedded_spaces,
192
+
193
+ strTemplate14_A_no_patterns: NULL as distinct_pattern_ct,
194
+ SUM(CAST(SIGN(REGEXP_COUNT(TRIM("{COL_NAME}"), ' ')) AS BIGINT)) AS embedded_space_ct,
195
+ AVG(CAST(REGEXP_COUNT(TRIM("{COL_NAME}"), ' ') AS REAL)) AS avg_embedded_spaces,
196
+
197
+ strTemplate14_else: NULL as distinct_pattern_ct,
198
+ NULL as embedded_space_ct,
199
+ NULL as avg_embedded_spaces,
200
+
201
+ strTemplate15_ALL: NULL as functional_data_type,
202
+ NULL as functional_table_type,
203
+
204
+ strTemplate16_ALL: " '{PROFILE_RUN_ID}' as profile_run_id"
205
+
206
+ strTemplate98_sampling: ' FROM {DATA_SCHEMA}.{DATA_TABLE} '
207
+
208
+ strTemplate98_else: ' FROM {DATA_SCHEMA}.{DATA_TABLE}'
209
+
210
+ strTemplate99_N: |
211
+ , (SELECT
212
+ APPROX_PERCENTILE("{COL_NAME}", 0.25) AS pct_25,
213
+ APPROX_PERCENTILE("{COL_NAME}", 0.50) AS pct_50,
214
+ APPROX_PERCENTILE("{COL_NAME}", 0.75) AS pct_75
215
+ FROM {DATA_SCHEMA}.{DATA_TABLE} LIMIT 1) pctile
216
+
217
+ strTemplate99_else: ' '
218
+
219
+ strTemplate100_sampling: 'WHERE RAND() <= 1.0 / {PROFILE_SAMPLE_RATIO}'
@@ -0,0 +1,92 @@
1
+
2
+ -- The following functions are inline functions
3
+ -- INLINE FUNCTION TO CHECK FOR A NUMBER
4
+
5
+ WITH FUNCTION num_check(a varchar)
6
+ RETURNS integer
7
+ RETURN
8
+ CASE WHEN regexp_like(a, '^[0-9]+(\.[0-9]+)?$') = TRUE THEN 1
9
+ WHEN regexp_like(a, '\$[0-9]+(\.[0-9]+)?$') = TRUE THEN 1
10
+ WHEN regexp_like(a, '^[0-9]+(\.[0-9]+)?\$') = TRUE THEN 1
11
+ ELSE 0
12
+ END
13
+ SELECT num_check('1234567'), num_check('$45.945843'), num_check('0.123$');
14
+
15
+
16
+ -- INLINE FUNCTION TO CHECK FOR A DATE
17
+
18
+ WITH FUNCTION date_check(a varchar)
19
+ RETURNS integer
20
+ RETURN
21
+ CASE WHEN REGEXP_LIKE(a, '^(\d{4})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])\s(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\s[0-9]{6})?$')
22
+ THEN CASE WHEN CAST(SUBSTRING(a, 1, 4) AS INT) BETWEEN 1800 AND 2200
23
+ AND( ( SUBSTRING(a, 6, 2) IN ('01', '03', '05', '07', '08', '10', '12')
24
+ AND CAST(SUBSTRING(a, 9, 2) AS INT) BETWEEN 1 AND 31)
25
+ OR (SUBSTRING(a, 6, 2) IN ('04', '06', '09') AND CAST(SUBSTRING(a, 9, 2) AS INT) BETWEEN 1 AND 30)
26
+ OR (SUBSTRING(a, 6, 2) = '02' AND CAST(SUBSTRING(a, 9, 2) AS INT) BETWEEN 1 AND 29)
27
+ )
28
+ THEN 1
29
+ ELSE 0
30
+ END
31
+ WHEN REGEXP_LIKE(a, '^(\d{4})(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])(2[0-3]|[01][0-9])([0-5][0-9])([0-5][0-9])([0-9]{6})$')
32
+ OR REGEXP_LIKE(a, '^(\d{4})(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])$')
33
+ THEN CASE WHEN CAST(SUBSTRING(a, 1, 4) AS INT) BETWEEN 1800 AND 2200
34
+ AND ( (SUBSTRING(a, 5, 2) IN ('01', '03', '05', '07', '08', '10', '12')
35
+ AND CAST(SUBSTRING(a, 7, 2) AS INT) BETWEEN 1 AND 31)
36
+ OR (SUBSTRING(a, 5, 2) IN ('04', '06', '09') AND CAST(SUBSTRING(a, 7, 2) AS INT) BETWEEN 1 AND 30)
37
+ OR (SUBSTRING(a, 5, 2) = '02' AND CAST(SUBSTRING(a, 7, 2) AS INT) BETWEEN 1 AND 29)
38
+ )
39
+ THEN 1
40
+ ELSE 0
41
+ END
42
+ WHEN LENGTH(a) > 11 THEN 0
43
+ WHEN REGEXP_LIKE(REGEXP_REPLACE(UPPER(a), '(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)', '12'), '[12][09][0-9][0-9]-[0-1]?[0-9]-[0-3]?[0-9]')
44
+ THEN CASE WHEN CAST(SPLIT_PART(a, '-', 1) AS INT) BETWEEN 1800 AND 2200
45
+ AND ( (UPPER(SPLIT_PART(a, '-', 2)) IN ('01', '03', '05', '07', '08',
46
+ '1', '3', '5', '7', '8', '10', '12',
47
+ 'JAN', 'MAR', 'MAY', 'JUL', 'AUG',
48
+ 'OCT', 'DEC')
49
+ AND CAST(SPLIT_PART(a, '-', 3) AS INT) BETWEEN 1 AND 31)
50
+ OR (UPPER(SPLIT_PART(a, '-', 2)) IN ('04', '06', '09', '4', '6', '9', '11', 'APR', 'JUN', 'SEP', 'NOV')
51
+ AND CAST(SPLIT_PART(a, '-', 3) AS INT) BETWEEN 1 AND 30)
52
+ OR (UPPER(SPLIT_PART(a, '-', 2)) IN ('02', '2', 'FEB') AND CAST(SPLIT_PART(a, '-', 3) AS INT) BETWEEN 1 AND 29)
53
+ )
54
+ THEN 1
55
+ ELSE 0
56
+ END
57
+ WHEN REGEXP_LIKE(REPLACE(a, '-', '/') , '^[0-1]?[0-9]/[0-3]?[0-9]/[12][09][0-9][0-9]$')
58
+ OR REGEXP_LIKE(REPLACE(a, '-', '/') , '^[0-1]?[0-9]/[0-3]?[0-9]/[0-9][0-9]$')
59
+ THEN CASE WHEN CAST(SPLIT_PART(REPLACE(a, '-', '/'), '/', 1) AS INT) BETWEEN 1 AND 12
60
+ AND ( (CAST(SPLIT_PART(REPLACE(a, '-', '/'), '/', 1) AS INT) IN (1, 3, 5, 7, 8, 10, 12)
61
+ AND CAST(SPLIT_PART(REPLACE(a, '-', '/'), '/', 2) AS INT) BETWEEN 1 AND 31)
62
+ OR (CAST(SPLIT_PART(REPLACE(a, '-', '/'), '/', 1) AS INT) IN (4, 6, 9, 11)
63
+ AND CAST(SPLIT_PART(REPLACE(a, '-', '/'), '/', 2) AS INT) BETWEEN 1 AND 30)
64
+ OR (CAST(SPLIT_PART(REPLACE(a, '-', '/'), '/', 1) AS INT) = 2
65
+ AND CAST(SPLIT_PART(REPLACE(a, '-', '/'), '/', 2) AS INT) BETWEEN 1 AND 29)
66
+ )
67
+ AND CAST(('20' || SUBSTRING(SPLIT_PART(REPLACE(a, '-', '/'), '/', 3), -2 )) AS INT) BETWEEN 1800 AND 2200
68
+ THEN 1
69
+ ELSE 0
70
+ END
71
+ WHEN REGEXP_LIKE(UPPER(a) , '[0-3]?[0-9]-(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)-[12][09][0-9][0-9]')
72
+ THEN CASE WHEN CAST(SPLIT_PART(a, '-', 3) AS INT) BETWEEN 1800 AND 2200
73
+ AND ( (UPPER(SPLIT_PART(a, '-', 2)) IN ('JAN', 'MAR', 'MAY', 'JUL', 'AUG', 'OCT', 'DEC')
74
+ AND CAST(SPLIT_PART(a, '-', 1) AS INT) BETWEEN 1 AND 31)
75
+ OR (UPPER(SPLIT_PART(a, '-', 2)) IN ('APR', 'JUN', 'SEP', 'NOV')
76
+ AND CAST(SPLIT_PART(a, '-', 1) AS INT) BETWEEN 1 AND 30)
77
+ OR (UPPER(SPLIT_PART(a, '-', 2)) = 'FEB'
78
+ AND CAST(SPLIT_PART(a, '-', 1) AS INT) BETWEEN 1 AND 29)
79
+ )
80
+ THEN 1
81
+ ELSE 0
82
+ END
83
+ ELSE 0
84
+ END
85
+ SELECT date_check('2002-02-30 12:01:35'),
86
+ date_check('2002-02-21 12:01:35 121324'),
87
+ date_check('20100314224518304596'),
88
+ date_check('20100230'),
89
+ date_check('201002301234'),
90
+ date_check('2010-03-30'), date_check('2010-MAR-30'),
91
+ date_check('05-21-22'), date_check('10/23/2023'),
92
+ date_check('10-SEP-2024');
@@ -0,0 +1 @@
1
+ CREATE SCHEMA IF NOT exists {DATA_QC_SCHEMA};
@@ -0,0 +1,104 @@
1
+ -- Then insert new tests where a locked test is not already present
2
+ INSERT INTO test_definitions (project_code, table_groups_id, profile_run_id,
3
+ test_type, test_suite, test_suite_id,
4
+ schema_name, table_name, column_name, skip_errors,
5
+ last_auto_gen_date, test_active,
6
+ baseline_value, threshold_value, profiling_as_of_date)
7
+ WITH last_run AS (SELECT r.table_groups_id, MAX(run_date) AS last_run_date
8
+ FROM profile_results p
9
+ INNER JOIN profiling_runs r
10
+ ON (p.profile_run_id = r.id)
11
+ INNER JOIN test_suites tg
12
+ ON p.project_code = tg.project_code
13
+ AND p.connection_id = tg.connection_id
14
+ WHERE p.project_code = '{PROJECT_CODE}'
15
+ AND r.table_groups_id = '{TABLE_GROUPS_ID}'::UUID
16
+ AND tg.test_suite = '{TEST_SUITE}'
17
+ AND p.run_date::DATE <= '{AS_OF_DATE}'
18
+ GROUP BY r.table_groups_id),
19
+ curprof AS (SELECT p.*
20
+ FROM last_run lr
21
+ INNER JOIN profile_results p
22
+ ON (lr.table_groups_id = p.table_groups_id
23
+ AND lr.last_run_date = p.run_date) ),
24
+ locked AS (SELECT schema_name, table_name, column_name, test_type
25
+ FROM test_definitions
26
+ WHERE table_groups_id = '{TABLE_GROUPS_ID}'::UUID
27
+ AND test_suite = '{TEST_SUITE}'
28
+ AND lock_refresh = 'Y'),
29
+ all_runs AS ( SELECT DISTINCT p.table_groups_id, p.schema_name, p.run_date,
30
+ DENSE_RANK() OVER (PARTITION BY p.table_groups_id ORDER BY p.run_date DESC) as run_rank
31
+ FROM profile_results p
32
+ INNER JOIN test_suites ts
33
+ ON p.connection_id = ts.connection_id
34
+ AND p.project_code = ts.project_code
35
+ WHERE p.table_groups_id = '{TABLE_GROUPS_ID}'::UUID
36
+ AND ts.test_suite = '{TEST_SUITE}'
37
+ AND p.run_date::DATE <= '{AS_OF_DATE}'),
38
+ recent_runs AS (SELECT table_groups_id, schema_name, run_date, run_rank
39
+ FROM all_runs
40
+ WHERE run_rank <= 5),
41
+ rightcols as (SELECT p.schema_name, p.table_name, p.column_name,
42
+ SUM(CASE WHEN distinct_value_ct = 1 THEN 0 ELSE 1 END) as always_one_val,
43
+ COUNT(DISTINCT CASE
44
+ WHEN p.general_type = 'A' THEN min_text
45
+ WHEN p.general_type = 'N' THEN min_value::VARCHAR
46
+ WHEN p.general_type IN ('D','T') THEN min_date::VARCHAR
47
+ WHEN p.general_type = 'B'
48
+ AND boolean_true_ct = value_ct THEN 'TRUE'
49
+ WHEN p.general_type = 'B'
50
+ AND p.boolean_true_ct = 0
51
+ AND p.distinct_value_ct = 1 THEN 'FALSE'
52
+ END ) as agg_distinct_val_ct
53
+ FROM recent_runs rr
54
+ INNER JOIN profile_results p
55
+ ON (rr.table_groups_id = p.table_groups_id
56
+ AND rr.run_date = p.run_date)
57
+ GROUP BY p.schema_name, p.table_name, p.column_name
58
+ HAVING SUM(CASE WHEN distinct_value_ct = 1 THEN 0 ELSE 1 END) = 0
59
+ AND SUM(CASE WHEN max_length < 100 THEN 0 ELSE 1 END) = 0
60
+ AND COUNT(DISTINCT CASE
61
+ WHEN p.general_type = 'A' THEN min_text
62
+ WHEN p.general_type = 'N' THEN min_value::VARCHAR
63
+ WHEN p.general_type IN ('D','T') THEN min_date::VARCHAR
64
+ WHEN p.general_type = 'B'
65
+ AND boolean_true_ct = value_ct THEN 'TRUE'
66
+ WHEN p.general_type = 'B'
67
+ AND p.boolean_true_ct = 0
68
+ AND p.distinct_value_ct = 1 THEN 'FALSE'
69
+ END ) = 1 ),
70
+ newtests AS ( SELECT 'Constant'::VARCHAR AS test_type,
71
+ '{TEST_SUITE}'::VARCHAR AS test_suite,
72
+ '{TEST_SUITE_ID}'::UUID AS test_suite_id,
73
+ c.profile_run_id,
74
+ c.project_code,
75
+ c.schema_name, c.table_name, c.column_name,
76
+ c.run_date AS last_run_date,
77
+ case when general_type='A' then fn_quote_literal_escape(min_text, '{SQL_FLAVOR}')::VARCHAR
78
+ when general_type='D' then fn_quote_literal_escape(min_date :: VARCHAR, '{SQL_FLAVOR}')::VARCHAR
79
+ when general_type='N' then min_value::VARCHAR
80
+ when general_type='B' and boolean_true_ct = 0 then 'FALSE'::VARCHAR
81
+ when general_type='B' and boolean_true_ct > 0 then 'TRUE'::VARCHAR
82
+ end as baseline_value
83
+ FROM curprof c
84
+ INNER JOIN rightcols r
85
+ ON (c.schema_name = r.schema_name
86
+ AND c.table_name = r.table_name
87
+ AND c.column_name = r.column_name)
88
+ LEFT JOIN generation_sets s
89
+ ON ('Constant' = s.test_type
90
+ AND '{GENERATION_SET}' = s.generation_set)
91
+ WHERE (s.generation_set IS NOT NULL
92
+ OR '{GENERATION_SET}' = '') )
93
+ SELECT n.project_code, '{TABLE_GROUPS_ID}'::UUID as table_groups_id, n.profile_run_id,
94
+ n.test_type, n.test_suite, n.test_suite_id, n.schema_name, n.table_name, n.column_name,
95
+ 0 as skip_errors, '{RUN_DATE}'::TIMESTAMP as auto_gen_date,
96
+ 'Y' as test_active, COALESCE(baseline_value, '') as baseline_value,
97
+ '0' as threshold_value, '{AS_OF_DATE}'::TIMESTAMP
98
+ FROM newtests n
99
+ LEFT JOIN locked l
100
+ ON (n.schema_name = l.schema_name
101
+ AND n.table_name = l.table_name
102
+ AND n.column_name = l.column_name
103
+ AND n.test_type = l.test_type)
104
+ WHERE l.test_type IS NULL;
@@ -0,0 +1,98 @@
1
+ -- FIRST TYPE OF CONSTANT IS HANDLED IN SEPARATE SQL FILE gen_standard_tests.sql using generic parameters
2
+ -- Second type: constants with changing values (1 distinct value)
3
+ INSERT INTO test_definitions (project_code, table_groups_id, profile_run_id, test_type, test_suite, test_suite_id,
4
+ schema_name, table_name, column_name, skip_errors,
5
+ last_auto_gen_date, test_active,
6
+ baseline_value_ct, threshold_value, profiling_as_of_date)
7
+ WITH last_run AS (SELECT r.table_groups_id, MAX(run_date) AS last_run_date
8
+ FROM profile_results p
9
+ INNER JOIN profiling_runs r
10
+ ON (p.profile_run_id = r.id)
11
+ INNER JOIN test_suites tg
12
+ ON p.project_code = tg.project_code
13
+ AND p.connection_id = tg.connection_id
14
+ WHERE p.project_code = '{PROJECT_CODE}'
15
+ AND r.table_groups_id = '{TABLE_GROUPS_ID}'::UUID
16
+ AND tg.test_suite = '{TEST_SUITE}'
17
+ AND p.run_date::DATE <= '{AS_OF_DATE}'
18
+ GROUP BY r.table_groups_id),
19
+ curprof AS (SELECT p.*
20
+ FROM last_run lr
21
+ INNER JOIN profile_results p
22
+ ON (lr.table_groups_id = p.table_groups_id
23
+ AND lr.last_run_date = p.run_date) ),
24
+ locked AS (SELECT schema_name, table_name, column_name, test_type
25
+ FROM test_definitions
26
+ WHERE table_groups_id = '{TABLE_GROUPS_ID}'::UUID
27
+ AND test_suite = '{TEST_SUITE}'
28
+ AND lock_refresh = 'Y'),
29
+ all_runs AS ( SELECT DISTINCT p.table_groups_id, p.schema_name, p.run_date,
30
+ DENSE_RANK() OVER (PARTITION BY p.table_groups_id ORDER BY p.run_date DESC) as run_rank
31
+ FROM profile_results p
32
+ INNER JOIN test_suites ts
33
+ ON p.connection_id = ts.connection_id
34
+ AND p.project_code = ts.project_code
35
+ WHERE p.table_groups_id = '{TABLE_GROUPS_ID}'::UUID
36
+ AND ts.test_suite = '{TEST_SUITE}'
37
+ AND p.run_date::DATE <= '{AS_OF_DATE}'),
38
+ recent_runs AS (SELECT table_groups_id, schema_name, run_date, run_rank
39
+ FROM all_runs
40
+ WHERE run_rank <= 5),
41
+ rightcols as (SELECT p.schema_name, p.table_name, p.column_name,
42
+ SUM(CASE WHEN distinct_value_ct = 1 THEN 0 ELSE 1 END) as always_one_val,
43
+ COUNT(DISTINCT CASE
44
+ WHEN p.general_type = 'A' THEN min_text
45
+ WHEN p.general_type = 'N' THEN min_value::VARCHAR
46
+ WHEN p.general_type IN ('D','T') THEN min_date::VARCHAR
47
+ WHEN p.general_type = 'B'
48
+ AND boolean_true_ct = value_ct THEN 'TRUE'
49
+ WHEN p.general_type = 'B'
50
+ AND p.boolean_true_ct = 0
51
+ AND p.distinct_value_ct = 1 THEN 'FALSE'
52
+ END ) as agg_distinct_val_ct
53
+ FROM recent_runs rr
54
+ INNER JOIN profile_results p
55
+ ON (rr.table_groups_id = p.table_groups_id
56
+ AND rr.run_date = p.run_date)
57
+ GROUP BY p.schema_name, p.table_name, p.column_name
58
+ HAVING SUM(CASE WHEN distinct_value_ct = 1 THEN 0 ELSE 1 END) = 0
59
+ AND COUNT(DISTINCT CASE
60
+ WHEN p.general_type = 'A' THEN min_text
61
+ WHEN p.general_type = 'N' THEN min_value::VARCHAR
62
+ WHEN p.general_type IN ('D','T') THEN min_date::VARCHAR
63
+ WHEN p.general_type = 'B'
64
+ AND boolean_true_ct = value_ct THEN 'TRUE'
65
+ WHEN p.general_type = 'B'
66
+ AND p.boolean_true_ct = 0
67
+ AND p.distinct_value_ct = 1 THEN 'FALSE'
68
+ END ) > 1 ),
69
+ newtests AS ( SELECT 'Distinct_Value_Ct'::VARCHAR AS test_type,
70
+ '{TEST_SUITE}'::VARCHAR AS test_suite,
71
+ '{TEST_SUITE_ID}'::UUID AS test_suite_id,
72
+ c.project_code, c.table_groups_id, c.profile_run_id,
73
+ c.schema_name, c.table_name, c.column_name,
74
+ c.run_date AS last_run_date,
75
+ c.distinct_value_ct
76
+ FROM curprof c
77
+ INNER JOIN rightcols r
78
+ ON (c.schema_name = r.schema_name
79
+ AND c.table_name = r.table_name
80
+ AND c.column_name = r.column_name)
81
+ LEFT JOIN generation_sets s
82
+ ON ('Distinct_Value_Ct' = s.test_type
83
+ AND '{GENERATION_SET}' = s.generation_set)
84
+ WHERE (s.generation_set IS NOT NULL
85
+ OR '{GENERATION_SET}' = '') )
86
+ SELECT n.project_code, n.table_groups_id, n.profile_run_id,
87
+ n.test_type, n.test_suite, n.test_suite_id,
88
+ n.schema_name, n.table_name, n.column_name, 0 as skip_errors,
89
+ '{RUN_DATE}'::TIMESTAMP as last_auto_gen_date, 'Y' as test_active,
90
+ distinct_value_ct as baseline_value_ct, distinct_value_ct as threshold_value,
91
+ '{AS_OF_DATE}'::TIMESTAMP as profiling_as_of_date
92
+ FROM newtests n
93
+ LEFT JOIN locked l
94
+ ON (n.schema_name = l.schema_name
95
+ AND n.table_name = l.table_name
96
+ AND n.column_name = l.column_name
97
+ AND n.test_type = l.test_type)
98
+ WHERE l.test_type IS NULL;