dataops-testgen 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. dataops_testgen-2.2.0.dist-info/LICENSE +203 -0
  2. dataops_testgen-2.2.0.dist-info/METADATA +287 -0
  3. dataops_testgen-2.2.0.dist-info/NOTICE +5 -0
  4. dataops_testgen-2.2.0.dist-info/RECORD +270 -0
  5. dataops_testgen-2.2.0.dist-info/WHEEL +5 -0
  6. dataops_testgen-2.2.0.dist-info/entry_points.txt +2 -0
  7. dataops_testgen-2.2.0.dist-info/top_level.txt +1 -0
  8. testgen/__init__.py +0 -0
  9. testgen/__main__.py +770 -0
  10. testgen/commands/__init__.py +0 -0
  11. testgen/commands/queries/__init__.py +0 -0
  12. testgen/commands/queries/execute_cat_tests_query.py +95 -0
  13. testgen/commands/queries/execute_tests_query.py +160 -0
  14. testgen/commands/queries/generate_tests_query.py +94 -0
  15. testgen/commands/queries/profiling_query.py +366 -0
  16. testgen/commands/queries/test_parameter_validation_query.py +88 -0
  17. testgen/commands/run_execute_cat_tests.py +162 -0
  18. testgen/commands/run_execute_tests.py +168 -0
  19. testgen/commands/run_generate_tests.py +107 -0
  20. testgen/commands/run_get_entities.py +122 -0
  21. testgen/commands/run_launch_db_config.py +84 -0
  22. testgen/commands/run_observability_exporter.py +330 -0
  23. testgen/commands/run_profiling_bridge.py +495 -0
  24. testgen/commands/run_quick_start.py +168 -0
  25. testgen/commands/run_setup_profiling_tools.py +96 -0
  26. testgen/commands/run_test_definition.py +146 -0
  27. testgen/commands/run_test_parameter_validation.py +135 -0
  28. testgen/commands/run_upgrade_db_config.py +156 -0
  29. testgen/common/__init__.py +8 -0
  30. testgen/common/clean_sql.py +53 -0
  31. testgen/common/credentials.py +25 -0
  32. testgen/common/database/__init__.py +0 -0
  33. testgen/common/database/database_service.py +629 -0
  34. testgen/common/database/flavor/__init__.py +0 -0
  35. testgen/common/database/flavor/flavor_service.py +75 -0
  36. testgen/common/database/flavor/mssql_flavor_service.py +34 -0
  37. testgen/common/database/flavor/postgresql_flavor_service.py +5 -0
  38. testgen/common/database/flavor/redshift_flavor_service.py +22 -0
  39. testgen/common/database/flavor/snowflake_flavor_service.py +69 -0
  40. testgen/common/database/flavor/trino_flavor_service.py +21 -0
  41. testgen/common/date_service.py +68 -0
  42. testgen/common/display_service.py +85 -0
  43. testgen/common/docker_service.py +76 -0
  44. testgen/common/encrypt.py +55 -0
  45. testgen/common/get_pipeline_parms.py +57 -0
  46. testgen/common/logs.py +79 -0
  47. testgen/common/process_service.py +62 -0
  48. testgen/common/read_file.py +69 -0
  49. testgen/settings.py +440 -0
  50. testgen/template/dbsetup/010_create_base_schema.sql +2 -0
  51. testgen/template/dbsetup/020_create_standard_functions_sprocs.sql +179 -0
  52. testgen/template/dbsetup/030_initialize_new_schema_structure.sql +735 -0
  53. testgen/template/dbsetup/040_populate_new_schema_project.sql +59 -0
  54. testgen/template/dbsetup/050_populate_new_schema_metadata.sql +1517 -0
  55. testgen/template/dbsetup/060_create_standard_views.sql +248 -0
  56. testgen/template/dbsetup/070_create_default_users.sql +17 -0
  57. testgen/template/dbsetup/075_grant_role_rights.sql +43 -0
  58. testgen/template/dbsetup/080_set_current_revision.sql +5 -0
  59. testgen/template/dbupgrade/0100_incremental_upgrade.sql +5 -0
  60. testgen/template/dbupgrade/0101_incremental_upgrade.sql +15 -0
  61. testgen/template/dbupgrade/0102_incremental_upgrade.sql +4 -0
  62. testgen/template/dbupgrade/0103_incremental_upgrade.sql +22 -0
  63. testgen/template/dbupgrade/0104_incremental_upgrade.sql +44 -0
  64. testgen/template/dbupgrade/0105_incremental_upgrade.sql +1 -0
  65. testgen/template/dbupgrade/0106_incremental_upgrade.sql +5 -0
  66. testgen/template/dbupgrade/0107_incremental_upgrade.sql +3 -0
  67. testgen/template/dbupgrade_helpers/get_tg_revision.sql +2 -0
  68. testgen/template/exec_cat_tests/ex_cat_build_agg_table_tests.sql +116 -0
  69. testgen/template/exec_cat_tests/ex_cat_get_distinct_tables.sql +11 -0
  70. testgen/template/exec_cat_tests/ex_cat_results_parse.sql +69 -0
  71. testgen/template/exec_cat_tests/ex_cat_retrieve_agg_test_parms.sql +6 -0
  72. testgen/template/exec_cat_tests/ex_cat_test_query.sql +8 -0
  73. testgen/template/execution/ex_finalize_test_run_results.sql +37 -0
  74. testgen/template/execution/ex_get_tests_non_cat.sql +47 -0
  75. testgen/template/execution/ex_update_test_record_in_testrun_table.sql +27 -0
  76. testgen/template/execution/ex_write_test_record_to_testrun_table.sql +6 -0
  77. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_no_drops_generic.sql +48 -0
  78. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_num_incr_generic.sql +34 -0
  79. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_above_generic.sql +49 -0
  80. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_within_generic.sql +49 -0
  81. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql +49 -0
  82. testgen/template/flavors/generic/exec_query_tests/ex_custom_query_generic.sql +39 -0
  83. testgen/template/flavors/generic/exec_query_tests/ex_data_match_2way_generic.sql +58 -0
  84. testgen/template/flavors/generic/exec_query_tests/ex_data_match_generic.sql +44 -0
  85. testgen/template/flavors/generic/exec_query_tests/ex_prior_match_generic.sql +37 -0
  86. testgen/template/flavors/generic/exec_query_tests/ex_relative_entropy_generic.sql +53 -0
  87. testgen/template/flavors/generic/exec_query_tests/ex_window_match_no_drops_generic.sql +46 -0
  88. testgen/template/flavors/generic/exec_query_tests/ex_window_match_same_generic.sql +59 -0
  89. testgen/template/flavors/generic/profiling/contingency_counts.sql +3 -0
  90. testgen/template/flavors/generic/validate_tests/ex_get_project_column_list_generic.sql +3 -0
  91. testgen/template/flavors/mssql/exec_query_tests/ex_relative_entropy_mssql.sql +53 -0
  92. testgen/template/flavors/mssql/profiling/project_ddf_query_mssql.sql +35 -0
  93. testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml +246 -0
  94. testgen/template/flavors/mssql/profiling/project_secondary_profiling_query_mssql.sql +36 -0
  95. testgen/template/flavors/mssql/setup_profiling_tools/00_drop_existing_functions_mssql.sql +8 -0
  96. testgen/template/flavors/mssql/setup_profiling_tools/01_create_functions_mssql.sql +12 -0
  97. testgen/template/flavors/mssql/setup_profiling_tools/02_create_functions_mssql.sql +54 -0
  98. testgen/template/flavors/mssql/setup_profiling_tools/create_qc_schema_mssql.sql +4 -0
  99. testgen/template/flavors/mssql/setup_profiling_tools/grant_execute_privileges_mssql.sql +1 -0
  100. testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_no_drops_postgresql.sql +46 -0
  101. testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_same_postgresql.sql +59 -0
  102. testgen/template/flavors/postgresql/profiling/project_ddf_query_postgresql.sql +42 -0
  103. testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml +225 -0
  104. testgen/template/flavors/postgresql/profiling/project_secondary_profiling_query_postgresql.sql +28 -0
  105. testgen/template/flavors/postgresql/setup_profiling_tools/create_functions_postgresql.sql +157 -0
  106. testgen/template/flavors/postgresql/setup_profiling_tools/create_qc_schema_postgresql.sql +1 -0
  107. testgen/template/flavors/postgresql/setup_profiling_tools/grant_execute_privileges_postgresql.sql +2 -0
  108. testgen/template/flavors/redshift/profiling/project_ddf_query_redshift.sql +38 -0
  109. testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml +221 -0
  110. testgen/template/flavors/redshift/profiling/project_secondary_profiling_query_redshift.sql +29 -0
  111. testgen/template/flavors/redshift/setup_profiling_tools/create_functions_redshift.sql +115 -0
  112. testgen/template/flavors/redshift/setup_profiling_tools/create_qc_schema_redshift.sql +1 -0
  113. testgen/template/flavors/redshift/setup_profiling_tools/grant_execute_privileges_redshift.sql +2 -0
  114. testgen/template/flavors/snowflake/profiling/project_ddf_query_snowflake.sql +38 -0
  115. testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml +220 -0
  116. testgen/template/flavors/snowflake/profiling/project_secondary_profiling_query_snowflake.sql +29 -0
  117. testgen/template/flavors/snowflake/setup_profiling_tools/create_functions_snowflake.sql +69 -0
  118. testgen/template/flavors/snowflake/setup_profiling_tools/create_qc_schema_snowflake.sql +1 -0
  119. testgen/template/flavors/snowflake/setup_profiling_tools/grant_execute_privileges_snowflake.sql +6 -0
  120. testgen/template/flavors/trino/profiling/project_profiling_query_trino.yaml +219 -0
  121. testgen/template/flavors/trino/setup_profiling_tools/create_functions_trino.sql +92 -0
  122. testgen/template/flavors/trino/setup_profiling_tools/create_qc_schema_trino.sql +1 -0
  123. testgen/template/gen_funny_cat_tests/gen_test_constant.sql +104 -0
  124. testgen/template/gen_funny_cat_tests/gen_test_distinct_value_ct.sql +98 -0
  125. testgen/template/gen_funny_cat_tests/gen_test_row_ct.sql +57 -0
  126. testgen/template/gen_funny_cat_tests/gen_test_row_ct_pct.sql +59 -0
  127. testgen/template/generation/gen_delete_old_tests.sql +5 -0
  128. testgen/template/generation/gen_insert_test_suite.sql +5 -0
  129. testgen/template/generation/gen_retrieve_or_insert_test_suite.sql +58 -0
  130. testgen/template/generation/gen_standard_test_type_list.sql +13 -0
  131. testgen/template/generation/gen_standard_tests.sql +48 -0
  132. testgen/template/get_entities/get_connection.sql +21 -0
  133. testgen/template/get_entities/get_connections_list.sql +9 -0
  134. testgen/template/get_entities/get_latest.sql +4 -0
  135. testgen/template/get_entities/get_profile.sql +12 -0
  136. testgen/template/get_entities/get_profile_info.sql +17 -0
  137. testgen/template/get_entities/get_profile_list.sql +17 -0
  138. testgen/template/get_entities/get_profile_screen.sql +275 -0
  139. testgen/template/get_entities/get_project_list.sql +6 -0
  140. testgen/template/get_entities/get_table_group_list.sql +10 -0
  141. testgen/template/get_entities/get_test_generation_list.sql +18 -0
  142. testgen/template/get_entities/get_test_info.sql +41 -0
  143. testgen/template/get_entities/get_test_results_for_run_cli.sql +16 -0
  144. testgen/template/get_entities/get_test_run_list.sql +24 -0
  145. testgen/template/get_entities/get_test_suite.sql +13 -0
  146. testgen/template/get_entities/get_test_suite_list.sql +18 -0
  147. testgen/template/get_entities/list_test_types.sql +4 -0
  148. testgen/template/observability/get_event_data.sql +23 -0
  149. testgen/template/observability/get_test_results.sql +41 -0
  150. testgen/template/observability/update_test_results_exported_to_observability.sql +12 -0
  151. testgen/template/parms/parms_profiling.sql +34 -0
  152. testgen/template/parms/parms_test_execution.sql +13 -0
  153. testgen/template/parms/parms_test_gen.sql +23 -0
  154. testgen/template/profiling/contingency_columns.sql +7 -0
  155. testgen/template/profiling/datatype_suggestions.sql +56 -0
  156. testgen/template/profiling/functional_datatype.sql +523 -0
  157. testgen/template/profiling/functional_tabletype_stage.sql +48 -0
  158. testgen/template/profiling/functional_tabletype_update.sql +8 -0
  159. testgen/template/profiling/pii_flag.sql +133 -0
  160. testgen/template/profiling/profile_anomalies_screen_column.sql +22 -0
  161. testgen/template/profiling/profile_anomalies_screen_multi_column.sql +58 -0
  162. testgen/template/profiling/profile_anomalies_screen_table.sql +22 -0
  163. testgen/template/profiling/profile_anomalies_screen_table_dates.sql +30 -0
  164. testgen/template/profiling/profile_anomalies_screen_variants.sql +40 -0
  165. testgen/template/profiling/profile_anomaly_types_get.sql +3 -0
  166. testgen/template/profiling/project_get_table_sample_count.sql +22 -0
  167. testgen/template/profiling/project_profile_run_record_insert.sql +8 -0
  168. testgen/template/profiling/project_profile_run_record_update.sql +5 -0
  169. testgen/template/profiling/project_profile_run_record_update_status.sql +5 -0
  170. testgen/template/profiling/project_update_profile_results_to_estimates.sql +32 -0
  171. testgen/template/profiling/refresh_anomalies.sql +33 -0
  172. testgen/template/profiling/refresh_data_chars_from_profiling.sql +156 -0
  173. testgen/template/profiling/secondary_profiling_columns.sql +12 -0
  174. testgen/template/profiling/secondary_profiling_delete.sql +4 -0
  175. testgen/template/profiling/secondary_profiling_update.sql +18 -0
  176. testgen/template/quick_start/populate_target_data.sql +1077 -0
  177. testgen/template/quick_start/recreate_target_data_schema.sql +167 -0
  178. testgen/template/quick_start/update_target_data.sql +100 -0
  179. testgen/template/updates/create_tmp_test_definition.sql +19 -0
  180. testgen/template/updates/get_test_def_parms.sql +38 -0
  181. testgen/template/updates/populate_stg_test_definitions.sql +184 -0
  182. testgen/template/validate_tests/ex_disable_tests_test_definitions.sql +5 -0
  183. testgen/template/validate_tests/ex_flag_tests_test_definitions.sql +64 -0
  184. testgen/template/validate_tests/ex_get_project_column_list_generic.sql +3 -0
  185. testgen/template/validate_tests/ex_get_test_column_list_tg.sql +65 -0
  186. testgen/template/validate_tests/ex_write_test_val_errors.sql +22 -0
  187. testgen/ui/__init__.py +0 -0
  188. testgen/ui/app.py +98 -0
  189. testgen/ui/assets/dk_logo.svg +46 -0
  190. testgen/ui/assets/question_mark.png +0 -0
  191. testgen/ui/assets/scripts.js +68 -0
  192. testgen/ui/assets/style.css +140 -0
  193. testgen/ui/bootstrap.py +109 -0
  194. testgen/ui/components/__init__.py +0 -0
  195. testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fBBc4.woff2 +0 -0
  196. testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fChc4EsA.woff2 +0 -0
  197. testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu4mxK.woff2 +0 -0
  198. testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu7GxKOzY.woff2 +0 -0
  199. testgen/ui/components/frontend/css/material-symbols-rounded.css +24 -0
  200. testgen/ui/components/frontend/css/material-symbols-rounded.woff2 +0 -0
  201. testgen/ui/components/frontend/css/roboto-font-faces.css +35 -0
  202. testgen/ui/components/frontend/css/shared.css +36 -0
  203. testgen/ui/components/frontend/img/dk_logo.svg +46 -0
  204. testgen/ui/components/frontend/index.html +17 -0
  205. testgen/ui/components/frontend/js/components/breadcrumbs.js +86 -0
  206. testgen/ui/components/frontend/js/components/button.js +66 -0
  207. testgen/ui/components/frontend/js/components/location.js +62 -0
  208. testgen/ui/components/frontend/js/components/select.js +75 -0
  209. testgen/ui/components/frontend/js/components/sidebar.js +358 -0
  210. testgen/ui/components/frontend/js/main.js +99 -0
  211. testgen/ui/components/frontend/js/streamlit.js +19 -0
  212. testgen/ui/components/frontend/js/van.min.js +1 -0
  213. testgen/ui/components/utils/__init__.py +0 -0
  214. testgen/ui/components/utils/callbacks.py +51 -0
  215. testgen/ui/components/utils/component.py +13 -0
  216. testgen/ui/components/widgets/__init__.py +6 -0
  217. testgen/ui/components/widgets/breadcrumbs.py +32 -0
  218. testgen/ui/components/widgets/location.py +65 -0
  219. testgen/ui/components/widgets/modal.py +97 -0
  220. testgen/ui/components/widgets/sidebar.py +69 -0
  221. testgen/ui/navigation/__init__.py +0 -0
  222. testgen/ui/navigation/menu.py +42 -0
  223. testgen/ui/navigation/page.py +20 -0
  224. testgen/ui/navigation/router.py +63 -0
  225. testgen/ui/queries/__init__.py +0 -0
  226. testgen/ui/queries/authentication_queries.py +47 -0
  227. testgen/ui/queries/connection_queries.py +121 -0
  228. testgen/ui/queries/profiling_queries.py +148 -0
  229. testgen/ui/queries/project_queries.py +9 -0
  230. testgen/ui/queries/table_group_queries.py +186 -0
  231. testgen/ui/queries/test_definition_queries.py +270 -0
  232. testgen/ui/queries/test_run_queries.py +32 -0
  233. testgen/ui/queries/test_suite_queries.py +145 -0
  234. testgen/ui/scripts/__init__.py +0 -0
  235. testgen/ui/scripts/patch_streamlit.py +111 -0
  236. testgen/ui/services/__init__.py +0 -0
  237. testgen/ui/services/authentication_service.py +119 -0
  238. testgen/ui/services/connection_service.py +220 -0
  239. testgen/ui/services/database_service.py +282 -0
  240. testgen/ui/services/form_service.py +1008 -0
  241. testgen/ui/services/javascript_service.py +44 -0
  242. testgen/ui/services/query_service.py +316 -0
  243. testgen/ui/services/string_service.py +12 -0
  244. testgen/ui/services/table_group_service.py +130 -0
  245. testgen/ui/services/test_definition_service.py +117 -0
  246. testgen/ui/services/test_run_service.py +13 -0
  247. testgen/ui/services/test_suite_service.py +76 -0
  248. testgen/ui/services/toolbar_service.py +77 -0
  249. testgen/ui/session.py +46 -0
  250. testgen/ui/views/__init__.py +0 -0
  251. testgen/ui/views/app_log_modal.py +92 -0
  252. testgen/ui/views/connections.py +72 -0
  253. testgen/ui/views/connections_base.py +367 -0
  254. testgen/ui/views/login.py +40 -0
  255. testgen/ui/views/not_found.py +16 -0
  256. testgen/ui/views/overview.py +34 -0
  257. testgen/ui/views/profiling_anomalies.py +501 -0
  258. testgen/ui/views/profiling_details.py +335 -0
  259. testgen/ui/views/profiling_modal.py +40 -0
  260. testgen/ui/views/profiling_results.py +206 -0
  261. testgen/ui/views/profiling_summary.py +177 -0
  262. testgen/ui/views/project_settings.py +74 -0
  263. testgen/ui/views/table_groups.py +530 -0
  264. testgen/ui/views/test_definitions.py +1020 -0
  265. testgen/ui/views/test_results.py +908 -0
  266. testgen/ui/views/test_runs.py +195 -0
  267. testgen/ui/views/test_suites.py +545 -0
  268. testgen/utils/__init__.py +0 -0
  269. testgen/utils/plugins.py +17 -0
  270. testgen/utils/singleton.py +14 -0
@@ -0,0 +1,59 @@
1
+ SELECT '{PROJECT_CODE}' as project_code,
2
+ '{TEST_TYPE}' as test_type,
3
+ '{TEST_DEFINITION_ID}' as test_definition_id,
4
+ '{TEST_SUITE}' as test_suite,
5
+ '{TEST_RUN_ID}' as test_run_id,
6
+ '{RUN_DATE}' as test_time,
7
+ '{START_TIME}' as starttime,
8
+ CURRENT_TIMESTAMP as endtime,
9
+ '{SCHEMA_NAME}' as schema_name,
10
+ '{TABLE_NAME}' as table_name,
11
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
12
+ '{SKIP_ERRORS}' as threshold_value,
13
+ {SKIP_ERRORS} as skip_errors,
14
+ '{INPUT_PARAMETERS}' as input_parameters,
15
+ CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
16
+ CASE
17
+ WHEN COUNT(*) > 0 THEN
18
+ CONCAT(
19
+ CONCAT( CAST(COUNT(*) AS VARCHAR), ' error(s) identified, ' ),
20
+ CONCAT(
21
+ CASE
22
+ WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
23
+ ELSE 'within limit of '
24
+ END,
25
+ '{SKIP_ERRORS}.'
26
+ )
27
+ )
28
+ ELSE 'No errors found.'
29
+ END AS result_message,
30
+ COUNT(*) as result_measure,
31
+ '{SUBSET_DISPLAY}' as subset_condition,
32
+ NULL as result_query
33
+ FROM (
34
+ (
35
+ SELECT 'Prior Timeframe' as missing_from, {COLUMN_NAME}
36
+ FROM {SCHEMA_NAME}.{TABLE_NAME}
37
+ WHERE {SUBSET_CONDITION}
38
+ AND {WINDOW_DATE_COLUMN} >= DATEADD("day", - {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}))
39
+ EXCEPT
40
+ SELECT 'Prior Timeframe' as missing_from, {COLUMN_NAME}
41
+ FROM {SCHEMA_NAME}.{TABLE_NAME}
42
+ WHERE {SUBSET_CONDITION}
43
+ AND {WINDOW_DATE_COLUMN} >= DATEADD("day", - 2 * {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}))
44
+ AND {WINDOW_DATE_COLUMN} < DATEADD("day", - {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}))
45
+ )
46
+ UNION ALL
47
+ (
48
+ SELECT 'Latest Timeframe' as missing_from, {COLUMN_NAME}
49
+ FROM {SCHEMA_NAME}.{TABLE_NAME}
50
+ WHERE {SUBSET_CONDITION}
51
+ AND {WINDOW_DATE_COLUMN} >= DATEADD("day", - 2 * {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}))
52
+ AND {WINDOW_DATE_COLUMN} < DATEADD("day", - {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}))
53
+ EXCEPT
54
+ SELECT 'Latest Timeframe' as missing_from, {COLUMN_NAME}
55
+ FROM {SCHEMA_NAME}.{TABLE_NAME}
56
+ WHERE {SUBSET_CONDITION}
57
+ AND {WINDOW_DATE_COLUMN} >= DATEADD("day", - {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}))
58
+ )
59
+ ) test;
@@ -0,0 +1,3 @@
1
+ SELECT {CONTINGENCY_COLUMNS}, COUNT(*) as freq_ct
2
+ FROM {DATA_SCHEMA}.{DATA_TABLE}
3
+ GROUP BY {CONTINGENCY_COLUMNS};
@@ -0,0 +1,3 @@
1
+ select concat(concat(concat(table_schema, '.'), concat(table_name, '.')), column_name) as columns
2
+ from information_schema.columns
3
+ where table_schema in ({TEST_SCHEMAS});
@@ -0,0 +1,53 @@
1
+ -- Relative Entropy: measured by Jensen-Shannon Divergence
2
+ -- Smoothed and normalized version of KL divergence,
3
+ -- with scores between 0 (identical) and 1 (maximally different),
4
+ -- when using the base-2 logarithm. Formula is:
5
+ -- 0.5 * kl_divergence(p, m) + 0.5 * kl_divergence(q, m)
6
+ -- Log base 2 of x = LN(x)/LN(2)
7
+ WITH latest_ver
8
+ AS ( SELECT {CONCAT_COLUMNS} as category,
9
+ CAST(COUNT(*) as FLOAT) / CAST(SUM(COUNT(*)) OVER () as FLOAT) AS pct_of_total
10
+ FROM {SCHEMA_NAME}.{TABLE_NAME} v1
11
+ WHERE {SUBSET_CONDITION}
12
+ GROUP BY {COLUMN_NAME_NO_QUOTES} ),
13
+ older_ver
14
+ AS ( SELECT {CONCAT_MATCH_GROUPBY} as category,
15
+ CAST(COUNT(*) as FLOAT) / CAST(SUM(COUNT(*)) OVER () as FLOAT) AS pct_of_total
16
+ FROM {MATCH_SCHEMA_NAME}.{TABLE_NAME} v2
17
+ WHERE {MATCH_SUBSET_CONDITION}
18
+ GROUP BY {MATCH_GROUPBY_NAMES} ),
19
+ dataset
20
+ AS ( SELECT COALESCE(l.category, o.category) AS category,
21
+ COALESCE(o.pct_of_total, 0.0000001) AS old_pct,
22
+ COALESCE(l.pct_of_total, 0.0000001) AS new_pct,
23
+ (COALESCE(o.pct_of_total, 0.0000001)
24
+ + COALESCE(l.pct_of_total, 0.0000001))/2.0 AS avg_pct
25
+ FROM latest_ver l
26
+ FULL JOIN older_ver o
27
+ ON (l.category = o.category) )
28
+ SELECT '{PROJECT_CODE}' as project_code,
29
+ '{TEST_TYPE}' as test_type,
30
+ '{TEST_DEFINITION_ID}' as test_definition_id,
31
+ '{TEST_SUITE}' as test_suite,
32
+ '{TEST_RUN_ID}' as test_run_id,
33
+ '{RUN_DATE}' as test_time,
34
+ '{START_TIME}' as starttime,
35
+ CURRENT_TIMESTAMP as endtime,
36
+ '{SCHEMA_NAME}' as schema_name,
37
+ '{TABLE_NAME}' as table_name,
38
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
39
+ -- '{GROUPBY_NAMES}' as column_names,
40
+ '{THRESHOLD_VALUE}' as threshold_value,
41
+ NULL as skip_errors,
42
+ '{INPUT_PARAMETERS}' as input_parameters,
43
+ CASE WHEN js_divergence > {THRESHOLD_VALUE} THEN 0 ELSE 1 END as result_code,
44
+ CONCAT('Divergence Level: ',
45
+ CONCAT(CAST(js_divergence AS VARCHAR),
46
+ ', Threshold: {THRESHOLD_VALUE}.')) as result_message,
47
+ js_divergence as result_measure,
48
+ '{SUBSET_DISPLAY}' as subset_condition,
49
+ NULL as result_query
50
+ FROM (
51
+ SELECT 0.5 * ABS(SUM(new_pct * LOG(new_pct/avg_pct)/LOG(2)))
52
+ + 0.5 * ABS(SUM(old_pct * LOG(old_pct/avg_pct)/LOG(2))) as js_divergence
53
+ FROM dataset ) rslt;
@@ -0,0 +1,35 @@
1
+ SELECT '{PROJECT_CODE}' as project_code,
2
+ CURRENT_TIMESTAMP as refresh_timestamp,
3
+ c.table_schema,
4
+ c.table_name,
5
+ c.column_name,
6
+ CASE
7
+ WHEN c.data_type = 'datetime' THEN 'datetime'
8
+ WHEN c.data_type = 'datetime2' THEN 'datetime'
9
+ WHEN c.data_type = 'varchar'
10
+ THEN 'varchar(' + CAST(c.character_maximum_length AS VARCHAR) + ')'
11
+ WHEN c.data_type = 'char' THEN 'char(' + CAST(c.character_maximum_length AS VARCHAR) + ')'
12
+ WHEN c.data_type = 'numeric' THEN 'numeric(' + CAST(c.numeric_precision AS VARCHAR) + ',' +
13
+ CAST(c.numeric_scale AS VARCHAR) + ')'
14
+ ELSE c.data_type END AS data_type,
15
+ c.character_maximum_length,
16
+ c.ordinal_position,
17
+ CASE
18
+ WHEN LOWER(c.data_type) LIKE '%char%'
19
+ THEN 'A'
20
+ WHEN c.data_type = 'bit'
21
+ THEN 'B'
22
+ WHEN c.data_type = 'date'
23
+ OR c.data_type LIKE 'datetime%'
24
+ THEN 'D'
25
+ WHEN c.data_type like 'time%'
26
+ THEN 'T'
27
+ WHEN c.data_type IN ('bigint', 'double precision', 'integer', 'smallint', 'real')
28
+ OR c.data_type LIKE 'numeric%'
29
+ THEN 'N'
30
+ ELSE
31
+ 'X' END AS general_type,
32
+ case when c.numeric_scale > 0 then 1 else 0 END as is_decimal
33
+ FROM information_schema.columns c
34
+ WHERE c.table_schema = '{DATA_SCHEMA}' {TABLE_CRITERIA}
35
+ ORDER BY c.table_schema, c.table_name, c.ordinal_position;
@@ -0,0 +1,246 @@
1
+ ---
2
+ strTemplate01_sampling: "SELECT TOP {SAMPLE_SIZE} "
3
+ strTemplate01_else: "SELECT "
4
+ strTemplate02_all: |
5
+ {CONNECTION_ID} as connection_id,
6
+ '{PROJECT_CODE}' as project_code,
7
+ '{TABLE_GROUPS_ID}' as table_groups_id,
8
+ '{DATA_SCHEMA}' AS schema_name,
9
+ '{RUN_DATE}' AS run_date,
10
+ '{DATA_TABLE}' AS table_name,
11
+ {COL_POS} AS position,
12
+ '{COL_NAME_SANITIZED}' AS column_name,
13
+ '{COL_TYPE}' AS column_type,
14
+ '{COL_GEN_TYPE}' AS general_type,
15
+ COUNT(*) AS record_ct,
16
+ COUNT("{COL_NAME}") AS value_ct,
17
+ COUNT(DISTINCT "{COL_NAME}") AS distinct_value_ct,
18
+ SUM(CASE WHEN "{COL_NAME}" IS NULL THEN 1 ELSE 0 END) AS null_value_ct,
19
+ strTemplate03_ADN: MIN(LEN("{COL_NAME}")) AS min_length,
20
+ MAX(LEN("{COL_NAME}")) AS max_length,
21
+ AVG(CAST(NULLIF(LEN("{COL_NAME}"), 0) AS FLOAT)) AS avg_length,
22
+ strTemplate03_else: NULL as min_length,
23
+ NULL as max_length,
24
+ NULL as avg_length,
25
+ strTemplate04_A: SUM(CASE
26
+ WHEN LTRIM(RTRIM("{COL_NAME}")) LIKE '0([.]0*)' THEN 1 ELSE 0
27
+ END) AS zero_value_ct,
28
+ strTemplate04_N: CAST(SUM( 1 - ABS(SIGN("{COL_NAME}")))AS BIGINT ) AS zero_value_ct,
29
+ strTemplate04_else: NULL as zero_value_ct,
30
+ strTemplate05_A: COUNT(DISTINCT UPPER(REPLACE(TRANSLATE("{COL_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ',''))) as distinct_std_value_ct,
31
+ SUM(CASE
32
+ WHEN "{COL_NAME}" = '' THEN 1
33
+ ELSE 0
34
+ END) AS zero_length_ct,
35
+ SUM( CASE
36
+ WHEN "{COL_NAME}" BETWEEN ' !' AND '!' THEN 1
37
+ ELSE 0
38
+ END ) AS lead_space_ct,
39
+ SUM( CASE WHEN "{COL_NAME}" LIKE '"%"' OR "{COL_NAME}" LIKE '''%''' THEN 1 ELSE 0 END ) as quoted_value_ct,
40
+ SUM( CASE WHEN "{COL_NAME}" LIKE '%[0-9]%' THEN 1 ELSE 0 END ) as includes_digit_ct,
41
+ SUM( CASE
42
+ WHEN "{COL_NAME}" IN ('.', '?') OR "{COL_NAME}" LIKE ' ' THEN 1
43
+ WHEN LEN("{COL_NAME}") > 1
44
+ AND ( LOWER("{COL_NAME}") LIKE '%..%' OR LOWER("{COL_NAME}") LIKE '%--%'
45
+ OR (LEN(REPLACE("{COL_NAME}", '0', ''))= 0 )
46
+ OR (LEN(REPLACE("{COL_NAME}", '9', ''))= 0 )
47
+ OR (LEN(REPLACE(LOWER("{COL_NAME}"), 'x', ''))= 0 )
48
+ OR (LEN(REPLACE(LOWER("{COL_NAME}"), 'z', ''))= 0 )
49
+ ) THEN 1
50
+ WHEN LOWER("{COL_NAME}") IN ('blank','error','missing','tbd',
51
+ 'n/a','#na','none','null','unknown') THEN 1
52
+ WHEN LOWER("{COL_NAME}") IN ('(blank)','(error)','(missing)','(tbd)',
53
+ '(n/a)','(#na)','(none)','(null)','(unknown)') THEN 1
54
+ WHEN LOWER("{COL_NAME}") IN ('[blank]','[error]','[missing]','[tbd]',
55
+ '[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1
56
+ ELSE 0
57
+ END ) AS filled_value_ct,
58
+ LEFT(MIN(NULLIF("{COL_NAME}", '') COLLATE Latin1_General_BIN ), 100) AS min_text,
59
+ LEFT(MAX(NULLIF("{COL_NAME}", '') COLLATE Latin1_General_BIN ), 100) AS max_text,
60
+ SUM({DATA_QC_SCHEMA}.fndk_isnum(LEFT("{COL_NAME}", 31))) AS numeric_ct,
61
+ SUM({DATA_QC_SCHEMA}.fndk_isdate(LEFT("{COL_NAME}", 26))) AS date_ct,
62
+ CASE
63
+ WHEN CAST(SUM( CASE WHEN UPPER("{COL_NAME}") LIKE '[1-9]% [A-Z]% %'
64
+ AND CHARINDEX(' ', "{COL_NAME}") BETWEEN 2 and 6 THEN 1
65
+ END ) as FLOAT) /CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.8 THEN 'STREET_ADDR'
66
+ WHEN CAST(SUM(CASE WHEN "{COL_NAME}" IN ('AL','AK','AS','AZ','AR','CA','CO','CT','DE','DC','FM','FL','GA','GU','HI','ID','IL','IN','IA','KS','KY','LA','ME','MH','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','MP','OH','OK','OR','PW','PA','PR','RI','SC','SD','TN','TX','UT','VT','VI','VA','WA','WV','WI','WY','AE','AP','AA')
67
+ THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'STATE_USA'
68
+ WHEN CAST(SUM( CASE WHEN ("{COL_NAME}" LIKE '[+]1%[0-9][0-9][0-9]%[-. ][0-9][0-9][0-9]%[0-9][0-9][0-9][0-9,0-9,0-9,0-9,0-9,0-9]' AND "{COL_NAME}" NOT LIKE '%[^0-9+()-]%')
69
+ OR ("{COL_NAME}" LIKE '[+]1%[0-9][0-9][0-9][- ][0-9][0-9][0-9][- ][0-9][0-9][0-9][0-9]' AND "{COL_NAME}" NOT LIKE '%[^0-9+-]%')
70
+ THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'PHONE_USA'
71
+ WHEN CAST(SUM( CASE WHEN "{COL_NAME}" LIKE '%[_a-zA-Z0-9.-]%@%[a-zA-Z0-9.-]%.[a-zA-Z][a-zA-Z]%'
72
+ THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'EMAIL'
73
+ WHEN CAST(SUM( CASE WHEN TRANSLATE("{COL_NAME}",'012345678','999999999') IN ('99999', '999999999', '99999-9999')
74
+ THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'ZIP_USA'
75
+ WHEN CAST(SUM( CASE WHEN "{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS NOT LIKE ' %'
76
+ AND "{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '[a-z0-9 _-]%'
77
+ AND ("{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%.txt'
78
+ OR "{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%.csv'
79
+ OR "{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%.tsv'
80
+ OR "{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%.dat'
81
+ OR "{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%.doc'
82
+ OR "{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%.pdf'
83
+ OR "{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%.xlsx')
84
+ THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'FILE_NAME'
85
+ WHEN CAST(SUM( CASE WHEN "{COL_NAME}" LIKE '[0-9][0-9][0-9][0-9][- ][0-9][0-9][0-9][0-9][- ][0-9][0-9][0-9][0-9][- ][0-9][0-9][0-9][0-9]'
86
+ THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.8 THEN 'CREDIT_CARD'
87
+ WHEN CAST(SUM( CASE WHEN ( "{COL_NAME}" LIKE '%,%,%,%'
88
+ OR "{COL_NAME}" LIKE '%|%|%|%'
89
+ OR "{COL_NAME}" LIKE '%^%^%^%'
90
+ OR "{COL_NAME}" LIKE '%' + CHAR(9) + '%' + CHAR(9) + '%' + CHAR(9) + '%' )
91
+ AND NOT ( "{COL_NAME}" LIKE '% and %'
92
+ OR "{COL_NAME}" LIKE '% but %'
93
+ OR "{COL_NAME}" LIKE '% or %'
94
+ OR "{COL_NAME}" LIKE '% yet %' )
95
+ AND ISNULL(CAST(LEN("{COL_NAME}") - LEN(REPLACE("{COL_NAME}", ',', '')) as FLOAT)
96
+ / CAST(NULLIF(LEN("{COL_NAME}") - LEN(REPLACE("{COL_NAME}", ' ', '')), 0) as FLOAT), 1) > 0.6
97
+ THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.8 THEN 'DELIMITED_DATA'
98
+ WHEN CAST(SUM ( CASE WHEN "{COL_NAME}" LIKE '[0-8][0-9][0-9][- ][0-9][0-9][- ][0-9][0-9][0-9][0-9]'
99
+ AND LEFT("{COL_NAME}", 3) NOT BETWEEN '734' AND '749'
100
+ AND LEFT("{COL_NAME}", 3) <> '666' THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'SSN'
101
+ END as std_pattern_match,
102
+ strTemplate05_else: NULL as distinct_std_value_ct,
103
+ NULL as zero_length_ct,
104
+ NULL as lead_space_ct,
105
+ NULL as quoted_value_ct,
106
+ NULL as includes_digit_ct,
107
+ NULL as filled_value_ct,
108
+ NULL as min_text,
109
+ NULL as max_text,
110
+ NULL as numeric_ct,
111
+ NULL as date_ct,
112
+ NULL as std_pattern_match,
113
+ strTemplate06_A_patterns: ( SELECT LEFT(STRING_AGG(pattern, ' | ') WITHIN GROUP (ORDER BY ct DESC), 1000) AS concat_pats
114
+ FROM (
115
+ SELECT TOP 5 CAST(COUNT(*) AS VARCHAR(10)) + ' | ' + pattern AS pattern,
116
+ COUNT(*) AS ct
117
+ FROM ( SELECT TRANSLATE("{COL_NAME}" COLLATE Latin1_General_BIN,
118
+ 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',
119
+ 'aaaaaaaaaaaaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAAAAAANNNNNNNNNN' )
120
+ AS pattern
121
+ FROM {DATA_SCHEMA}.{DATA_TABLE} WITH (NOLOCK)
122
+ WHERE "{COL_NAME}" > ' ' AND ((SELECT MAX(LEN("{COL_NAME}"))
123
+ FROM {DATA_SCHEMA}.{DATA_TABLE} WITH (NOLOCK)) BETWEEN 3 and {PARM_MAX_PATTERN_LENGTH})) p
124
+ GROUP BY pattern
125
+ HAVING pattern > ' '
126
+ ORDER BY COUNT(*) DESC
127
+ ) ps) AS top_patterns,
128
+ strTemplate06_else: NULL as top_patterns,
129
+ strTemplate07_A_freq: ( SELECT LEFT(STRING_AGG(val, ' | ') WITHIN GROUP (ORDER BY ct DESC, val ASC), 1000) as concat_vals
130
+ FROM (
131
+ SELECT TOP 10 CAST(COUNT(*) as VARCHAR(10)) + ' | ' + "{COL_NAME}" as val,
132
+ COUNT(*) as ct
133
+ FROM {DATA_SCHEMA}.{DATA_TABLE} WITH (NOLOCK)
134
+ WHERE "{COL_NAME}" > ' '
135
+ GROUP BY "{COL_NAME}"
136
+ HAVING "{COL_NAME}" > ' '
137
+ ORDER BY COUNT(*) DESC
138
+ ) ps
139
+ ) AS top_freq_values,
140
+ strTemplate07_else: NULL as top_freq_values,
141
+ strTemplate08_N: MIN("{COL_NAME}") AS min_value,
142
+ MIN(CASE WHEN "{COL_NAME}" > 0 THEN "{COL_NAME}" ELSE NULL END) AS min_value_over_0,
143
+ MAX("{COL_NAME}") AS max_value,
144
+ AVG(CAST("{COL_NAME}" AS FLOAT)) AS avg_value,
145
+ STDEV(CAST("{COL_NAME}" AS FLOAT)) AS stdev_value,
146
+ MIN(pct_25) as percentile_25,
147
+ MIN(pct_50) as percentile_50,
148
+ MIN(pct_75) as percentile_75,
149
+ strTemplate08_else: NULL as min_value,
150
+ NULL as min_value_over_0,
151
+ NULL as max_value,
152
+ NULL as avg_value,
153
+ NULL as stdev_value,
154
+ NULL as percentile_25,
155
+ NULL as percentile_50,
156
+ NULL as percentile_75,
157
+ strTemplate10_N_dec: SUM(ROUND(("{COL_NAME}" % 1), 5)) as fractional_sum,
158
+
159
+ strTemplate10_else: NULL as fractional_sum,
160
+
161
+ strTemplate11_D: CASE
162
+ WHEN MIN("{COL_NAME}") IS NULL THEN NULL
163
+ ELSE CASE WHEN MIN("{COL_NAME}") >= CAST('0001-01-01' as date) THEN MIN("{COL_NAME}") ELSE CAST('0001-01-01' as date) END
164
+ END as min_date,
165
+ MAX("{COL_NAME}") as max_date,
166
+ SUM(CASE
167
+ WHEN DATEDIFF(month, "{COL_NAME}", '{RUN_DATE}') > 12 THEN 1
168
+ ELSE 0
169
+ END) AS before_1yr_date_ct,
170
+ SUM(CASE
171
+ WHEN DATEDIFF(month, "{COL_NAME}", '{RUN_DATE}') > 60 THEN 1
172
+ ELSE 0
173
+ END) AS before_5yr_date_ct,
174
+ SUM(CASE
175
+ WHEN DATEDIFF(month, "{COL_NAME}", '{RUN_DATE}') > 240 THEN 1
176
+ ELSE 0
177
+ END) AS before_20yr_date_ct,
178
+ SUM(CASE
179
+ WHEN DATEDIFF(day, "{COL_NAME}", '{RUN_DATE}') BETWEEN 0 AND 365 THEN 1
180
+ ELSE 0
181
+ END) AS within_1yr_date_ct,
182
+ SUM(CASE
183
+ WHEN DATEDIFF(day, "{COL_NAME}", '{RUN_DATE}') BETWEEN 0 AND 30 THEN 1
184
+ ELSE 0
185
+ END) AS within_1mo_date_ct,
186
+ SUM(CASE
187
+ WHEN "{COL_NAME}" > '{RUN_DATE}' THEN 1 ELSE 0
188
+ END) AS future_date_ct,
189
+ COUNT(DISTINCT DATEDIFF(day, "{COL_NAME}", '{RUN_DATE}' ) ) as date_days_present,
190
+ COUNT(DISTINCT DATEDIFF(week, "{COL_NAME}", '{RUN_DATE}' ) ) as date_weeks_present,
191
+ COUNT(DISTINCT DATEDIFF(month, "{COL_NAME}", '{RUN_DATE}' ) ) as date_months_present,
192
+
193
+ strTemplate11_else: NULL as min_date,
194
+ NULL as max_date,
195
+ NULL as before_1yr_date_ct,
196
+ NULL as before_5yr_date_ct,
197
+ NULL as before_20yr_date_ct,
198
+ NULL as within_1yr_date_ct,
199
+ NULL as within_1mo_date_ct,
200
+ NULL as future_date_ct,
201
+ NULL as date_days_present,
202
+ NULL as date_weeks_present,
203
+ NULL as date_months_present,
204
+
205
+ strTemplate12_B: SUM(CAST("{COL_NAME}" AS INTEGER)) AS boolean_true_ct,
206
+
207
+ strTemplate12_else: NULL as boolean_true_ct,
208
+
209
+ strTemplate13_ALL: NULL AS datatype_suggestion,
210
+ strTemplate14_A_do_patterns: ( SELECT COUNT(DISTINCT TRANSLATE("{COL_NAME}" COLLATE Latin1_General_BIN,
211
+ 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',
212
+ 'aaaaaaaaaaaaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAAAAAANNNNNNNNNN'
213
+ )
214
+ ) AS pattern_ct
215
+ FROM {DATA_SCHEMA}.{DATA_TABLE} WITH (NOLOCK)
216
+ WHERE "{COL_NAME}" > ' ' ) AS distinct_pattern_ct,
217
+ SUM(CAST(SIGN(LEN(RTRIM(LTRIM("{COL_NAME}"))) - LEN(REPLACE(RTRIM(LTRIM("{COL_NAME}")),' ',''))) AS BIGINT)) AS embedded_space_ct,
218
+ AVG(CAST(LEN(RTRIM(LTRIM("{COL_NAME}"))) - LEN(REPLACE(RTRIM(LTRIM("{COL_NAME}")),' ','')) AS FLOAT)) AS avg_embedded_spaces,
219
+
220
+ strTemplate14_A_no_patterns: NULL as distinct_pattern_ct,
221
+ SUM(CAST(SIGN(LEN(RTRIM(LTRIM("{COL_NAME}"))) - LEN(REPLACE(RTRIM(LTRIM("{COL_NAME}")),' ',''))) AS BIGINT)) AS embedded_space_ct,
222
+ AVG(CAST(LEN(RTRIM(LTRIM("{COL_NAME}"))) - LEN(REPLACE(RTRIM(LTRIM("{COL_NAME}")),' ','')) AS FLOAT)) AS avg_embedded_spaces,
223
+
224
+ strTemplate14_else: NULL as distinct_pattern_ct,
225
+ NULL as embedded_space_ct,
226
+ NULL as avg_embedded_spaces,
227
+
228
+ strTemplate15_ALL: NULL as functional_data_type,
229
+ NULL as functional_table_type,
230
+
231
+ strTemplate16_ALL: " '{PROFILE_RUN_ID}' as profile_run_id"
232
+
233
+ strTemplate98_sampling: ' FROM {DATA_SCHEMA}.{DATA_TABLE} WITH (NOLOCK)'
234
+
235
+ strTemplate98_else: ' FROM {DATA_SCHEMA}.{DATA_TABLE} WITH (NOLOCK)'
236
+
237
+ strTemplate99_N: |
238
+ , (SELECT TOP 1
239
+ PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_25,
240
+ PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_50,
241
+ PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_75
242
+ FROM {DATA_SCHEMA}.{DATA_TABLE} WITH (NOLOCK)) pctile
243
+
244
+ strTemplate99_else: ' '
245
+
246
+ strTemplate100_sampling: ' ORDER BY RAND()'
@@ -0,0 +1,36 @@
1
+ -- Get Freqs for selected columns
2
+ WITH ranked_vals
3
+ AS
4
+ (SELECT "{COL_NAME}",
5
+ COUNT(*) AS ct,
6
+ ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC) AS rn
7
+ FROM {DATA_SCHEMA}.{DATA_TABLE}
8
+ WHERE "{COL_NAME}" > ' '
9
+ GROUP BY "{COL_NAME}"
10
+ ),
11
+ consol_vals
12
+ AS (
13
+ SELECT COALESCE (CASE WHEN rn <= 10 THEN '| ' + "{COL_NAME}" + ' | ' + CAST (ct AS VARCHAR)
14
+ ELSE NULL
15
+ END,
16
+ '| Other Values (' + CAST ( CAST(COUNT (DISTINCT CAST ("{COL_NAME}" as VARCHAR)) AS VARCHAR ) + ') | '
17
+ + CAST (SUM (ct) as VARCHAR) AS VARCHAR)) AS val,
18
+ MIN (rn) as min_rn
19
+ FROM ranked_vals
20
+ GROUP BY CASE WHEN rn <= 10 THEN '| ' + "{COL_NAME}" + ' | ' + CAST (ct AS VARCHAR) ELSE NULL
21
+ END
22
+ )
23
+ SELECT '{PROJECT_CODE}' as project_code,
24
+ '{DATA_SCHEMA}' as schema_name,
25
+ '{RUN_DATE}' as run_date,
26
+ '{DATA_TABLE}' as table_name,
27
+ '{COL_NAME}' as column_name,
28
+ REPLACE(STRING_AGG(CONVERT(NVARCHAR(max), val), '^#^') WITHIN GROUP (ORDER BY min_rn), '^#^', CHAR(10)) AS top_freq_values,
29
+ (SELECT CONVERT(VARCHAR(40), HASHBYTES('MD5', STRING_AGG( NULLIF(dist_col_name,''),
30
+ '|') WITHIN GROUP (ORDER BY dist_col_name)), 2) as dvh
31
+ FROM (SELECT DISTINCT "{COL_NAME}" as dist_col_name
32
+ FROM {DATA_SCHEMA}.{DATA_TABLE}) a
33
+ ) as distinct_value_hash
34
+ FROM consol_vals;
35
+
36
+ -- Convert function has style = 2 : The characters 0x aren't added to the left of the converted result for style 2.
@@ -0,0 +1,8 @@
1
+ -- Step 1: Drop both functions if they exist
2
+ BEGIN
3
+ IF OBJECT_ID('{DATA_QC_SCHEMA}.fndk_isnum', 'FN') IS NOT NULL
4
+ DROP FUNCTION {DATA_QC_SCHEMA}.fndk_isnum;
5
+
6
+ IF OBJECT_ID('{DATA_QC_SCHEMA}.fndk_isdate', 'FN') IS NOT NULL
7
+ DROP FUNCTION {DATA_QC_SCHEMA}.fndk_isdate;
8
+ END
@@ -0,0 +1,12 @@
1
+ -- Step 2: Create isnum function
2
+ CREATE FUNCTION {DATA_QC_SCHEMA}.fndk_isnum (@strparm VARCHAR(500))
3
+ RETURNS INT
4
+ AS
5
+ BEGIN
6
+ IF TRY_CAST(NULLIF(@strparm, '') AS float) IS NOT NULL
7
+ BEGIN
8
+ RETURN(1)
9
+ END
10
+
11
+ RETURN(0)
12
+ END;
@@ -0,0 +1,54 @@
1
+ -- Step 3: Create isdate function
2
+
3
+ CREATE FUNCTION {DATA_QC_SCHEMA}.fndk_isdate(@strparm VARCHAR(500))
4
+ RETURNS INT
5
+ AS
6
+ BEGIN
7
+ DECLARE @ret INT
8
+
9
+ SET @ret =
10
+
11
+ CASE WHEN TRY_CAST(NULLIF(@strparm, '') AS float) IS NOT NULL
12
+ AND LEFT(NULLIF(@strparm, ''),4) BETWEEN 1800 AND 2200 THEN
13
+ CASE
14
+ WHEN LEN((NULLIF(@strparm, ''))) > 11 THEN 0
15
+ -- YYYYMMDD
16
+ WHEN TRY_CONVERT(DATE, NULLIF(@strparm, ''), 112) IS NOT NULL THEN 1
17
+
18
+ -- YYYY-MM-DD
19
+ WHEN TRY_CONVERT(DATE, NULLIF(@strparm, ''), 23) IS NOT NULL THEN 1
20
+
21
+ -- MM/DD/YYYY
22
+ WHEN TRY_CONVERT(DATE, NULLIF(@strparm, ''), 101) IS NOT NULL THEN 1
23
+
24
+ -- MM/DD/YY
25
+ WHEN TRY_CONVERT(DATE, NULLIF(@strparm, ''), 1) IS NOT NULL THEN 1
26
+
27
+ --MM-DD-YYYY
28
+ WHEN TRY_CONVERT(DATE, NULLIF(@strparm, ''), 110) IS NOT NULL THEN 1
29
+
30
+ --MM-DD-YY
31
+ WHEN TRY_CONVERT(DATE, NULLIF(@strparm, ''), 10) IS NOT NULL THEN 1
32
+
33
+
34
+ ELSE 0 END
35
+ --DD MMM YYYY
36
+ WHEN (TRY_CONVERT(DATE, NULLIF(@strparm, ''), 106) IS NOT NULL
37
+ AND LEFT(NULLIF(@strparm, ''), 4) BETWEEN 1800 AND 2200)
38
+ THEN 1
39
+
40
+ -- YYYY-MM-DD HH:MM:SS SSSSSS
41
+ WHEN (TRY_CONVERT(DATETIME2, NULLIF(@strparm, ''), 121) IS NOT NULL
42
+ AND LEFT(NULLIF(@strparm, ''), 4) BETWEEN 1800 AND 2200)
43
+ THEN 1
44
+
45
+ -- YYYY-MM-DD HH:MM:SS
46
+ WHEN (TRY_CONVERT(DATETIME2, NULLIF(@strparm, ''), 120) IS NOT NULL
47
+ AND LEFT(NULLIF(@strparm, ''), 4) BETWEEN 1800 AND 2200)
48
+ THEN 1
49
+ ELSE 0
50
+ END
51
+ RETURN @ret
52
+
53
+ END
54
+ ;
@@ -0,0 +1,4 @@
1
+ IF NOT EXISTS (SELECT * FROM sys.schemas WHERE name = '{DATA_QC_SCHEMA}')
2
+ BEGIN
3
+ EXEC('CREATE SCHEMA {DATA_QC_SCHEMA}')
4
+ END
@@ -0,0 +1 @@
1
+ GRANT EXECUTE ON SCHEMA::{DATA_QC_SCHEMA} TO {DB_USER};
@@ -0,0 +1,46 @@
1
+ SELECT '{PROJECT_CODE}' as project_code,
2
+ '{TEST_TYPE}' as test_type,
3
+ '{TEST_DEFINITION_ID}' as test_definition_id,
4
+ '{TEST_SUITE}' as test_suite,
5
+ '{TEST_RUN_ID}' as test_run_id,
6
+ '{RUN_DATE}' as test_time,
7
+ '{START_TIME}' as starttime,
8
+ CURRENT_TIMESTAMP as endtime,
9
+ '{SCHEMA_NAME}' as schema_name,
10
+ '{TABLE_NAME}' as table_name,
11
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
12
+ '{SKIP_ERRORS}' as threshold_value,
13
+ {SKIP_ERRORS} as skip_errors,
14
+ '{INPUT_PARAMETERS}' as input_parameters,
15
+ CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
16
+ CASE
17
+ WHEN COUNT(*) > 0 THEN
18
+ CONCAT(
19
+ CONCAT( CAST(COUNT(*) AS VARCHAR), ' error(s) identified, ' ),
20
+ CONCAT(
21
+ CASE
22
+ WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
23
+ ELSE 'within limit of '
24
+ END,
25
+ '{SKIP_ERRORS}.'
26
+ )
27
+ )
28
+ ELSE 'No errors found.'
29
+ END AS result_message,
30
+ COUNT(*) as result_measure,
31
+ '{SUBSET_DISPLAY}' as subset_condition,
32
+ NULL as result_query
33
+ FROM (
34
+ SELECT {COLUMN_NAME_NO_QUOTES}
35
+ FROM {SCHEMA_NAME}.{TABLE_NAME}
36
+ WHERE {SUBSET_CONDITION}
37
+ AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}) - 2 * {WINDOW_DAYS}
38
+ AND {WINDOW_DATE_COLUMN} < (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}) - {WINDOW_DAYS}
39
+ GROUP BY {COLUMN_NAME_NO_QUOTES}
40
+ EXCEPT
41
+ SELECT {COLUMN_NAME_NO_QUOTES}
42
+ FROM {SCHEMA_NAME}.{TABLE_NAME}
43
+ WHERE {SUBSET_CONDITION}
44
+ AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}) - {WINDOW_DAYS}
45
+ GROUP BY {COLUMN_NAME_NO_QUOTES}
46
+ ) test;
@@ -0,0 +1,59 @@
1
+ SELECT '{PROJECT_CODE}' as project_code,
2
+ '{TEST_TYPE}' as test_type,
3
+ '{TEST_DEFINITION_ID}' as test_definition_id,
4
+ '{TEST_SUITE}' as test_suite,
5
+ '{TEST_RUN_ID}' as test_run_id,
6
+ '{RUN_DATE}' as test_time,
7
+ '{START_TIME}' as starttime,
8
+ CURRENT_TIMESTAMP as endtime,
9
+ '{SCHEMA_NAME}' as schema_name,
10
+ '{TABLE_NAME}' as table_name,
11
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
12
+ '{SKIP_ERRORS}' as threshold_value,
13
+ {SKIP_ERRORS} as skip_errors,
14
+ '{INPUT_PARAMETERS}' as input_parameters,
15
+ CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
16
+ CASE
17
+ WHEN COUNT(*) > 0 THEN
18
+ CONCAT(
19
+ CONCAT( CAST(COUNT(*) AS VARCHAR), ' error(s) identified, ' ),
20
+ CONCAT(
21
+ CASE
22
+ WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
23
+ ELSE 'within limit of '
24
+ END,
25
+ '{SKIP_ERRORS}.'
26
+ )
27
+ )
28
+ ELSE 'No errors found.'
29
+ END AS result_message,
30
+ COUNT(*) as result_measure,
31
+ '{SUBSET_DISPLAY}' as subset_condition,
32
+ NULL as result_query
33
+ FROM (
34
+ (
35
+ SELECT 'Prior Timeframe' as missing_from, {COLUMN_NAME}
36
+ FROM {SCHEMA_NAME}.{TABLE_NAME}
37
+ WHERE {SUBSET_CONDITION}
38
+ AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}) - {WINDOW_DAYS}
39
+ EXCEPT
40
+ SELECT 'Prior Timeframe' as missing_from, {COLUMN_NAME}
41
+ FROM {SCHEMA_NAME}.{TABLE_NAME}
42
+ WHERE {SUBSET_CONDITION}
43
+ AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}) - 2 * {WINDOW_DAYS}
44
+ AND {WINDOW_DATE_COLUMN} < (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}) - {WINDOW_DAYS}
45
+ )
46
+ UNION ALL
47
+ (
48
+ SELECT 'Latest Timeframe' as missing_from, {COLUMN_NAME}
49
+ FROM {SCHEMA_NAME}.{TABLE_NAME}
50
+ WHERE {SUBSET_CONDITION}
51
+ AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}) - 2 * {WINDOW_DAYS}
52
+ AND {WINDOW_DATE_COLUMN} < (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}) - {WINDOW_DAYS}
53
+ EXCEPT
54
+ SELECT 'Latest Timeframe' as missing_from, {COLUMN_NAME}
55
+ FROM {SCHEMA_NAME}.{TABLE_NAME}
56
+ WHERE {SUBSET_CONDITION}
57
+ AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}) - {WINDOW_DAYS}
58
+ )
59
+ ) test;