dataops-testgen 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataops_testgen-2.2.0.dist-info/LICENSE +203 -0
- dataops_testgen-2.2.0.dist-info/METADATA +287 -0
- dataops_testgen-2.2.0.dist-info/NOTICE +5 -0
- dataops_testgen-2.2.0.dist-info/RECORD +270 -0
- dataops_testgen-2.2.0.dist-info/WHEEL +5 -0
- dataops_testgen-2.2.0.dist-info/entry_points.txt +2 -0
- dataops_testgen-2.2.0.dist-info/top_level.txt +1 -0
- testgen/__init__.py +0 -0
- testgen/__main__.py +770 -0
- testgen/commands/__init__.py +0 -0
- testgen/commands/queries/__init__.py +0 -0
- testgen/commands/queries/execute_cat_tests_query.py +95 -0
- testgen/commands/queries/execute_tests_query.py +160 -0
- testgen/commands/queries/generate_tests_query.py +94 -0
- testgen/commands/queries/profiling_query.py +366 -0
- testgen/commands/queries/test_parameter_validation_query.py +88 -0
- testgen/commands/run_execute_cat_tests.py +162 -0
- testgen/commands/run_execute_tests.py +168 -0
- testgen/commands/run_generate_tests.py +107 -0
- testgen/commands/run_get_entities.py +122 -0
- testgen/commands/run_launch_db_config.py +84 -0
- testgen/commands/run_observability_exporter.py +330 -0
- testgen/commands/run_profiling_bridge.py +495 -0
- testgen/commands/run_quick_start.py +168 -0
- testgen/commands/run_setup_profiling_tools.py +96 -0
- testgen/commands/run_test_definition.py +146 -0
- testgen/commands/run_test_parameter_validation.py +135 -0
- testgen/commands/run_upgrade_db_config.py +156 -0
- testgen/common/__init__.py +8 -0
- testgen/common/clean_sql.py +53 -0
- testgen/common/credentials.py +25 -0
- testgen/common/database/__init__.py +0 -0
- testgen/common/database/database_service.py +629 -0
- testgen/common/database/flavor/__init__.py +0 -0
- testgen/common/database/flavor/flavor_service.py +75 -0
- testgen/common/database/flavor/mssql_flavor_service.py +34 -0
- testgen/common/database/flavor/postgresql_flavor_service.py +5 -0
- testgen/common/database/flavor/redshift_flavor_service.py +22 -0
- testgen/common/database/flavor/snowflake_flavor_service.py +69 -0
- testgen/common/database/flavor/trino_flavor_service.py +21 -0
- testgen/common/date_service.py +68 -0
- testgen/common/display_service.py +85 -0
- testgen/common/docker_service.py +76 -0
- testgen/common/encrypt.py +55 -0
- testgen/common/get_pipeline_parms.py +57 -0
- testgen/common/logs.py +79 -0
- testgen/common/process_service.py +62 -0
- testgen/common/read_file.py +69 -0
- testgen/settings.py +440 -0
- testgen/template/dbsetup/010_create_base_schema.sql +2 -0
- testgen/template/dbsetup/020_create_standard_functions_sprocs.sql +179 -0
- testgen/template/dbsetup/030_initialize_new_schema_structure.sql +735 -0
- testgen/template/dbsetup/040_populate_new_schema_project.sql +59 -0
- testgen/template/dbsetup/050_populate_new_schema_metadata.sql +1517 -0
- testgen/template/dbsetup/060_create_standard_views.sql +248 -0
- testgen/template/dbsetup/070_create_default_users.sql +17 -0
- testgen/template/dbsetup/075_grant_role_rights.sql +43 -0
- testgen/template/dbsetup/080_set_current_revision.sql +5 -0
- testgen/template/dbupgrade/0100_incremental_upgrade.sql +5 -0
- testgen/template/dbupgrade/0101_incremental_upgrade.sql +15 -0
- testgen/template/dbupgrade/0102_incremental_upgrade.sql +4 -0
- testgen/template/dbupgrade/0103_incremental_upgrade.sql +22 -0
- testgen/template/dbupgrade/0104_incremental_upgrade.sql +44 -0
- testgen/template/dbupgrade/0105_incremental_upgrade.sql +1 -0
- testgen/template/dbupgrade/0106_incremental_upgrade.sql +5 -0
- testgen/template/dbupgrade/0107_incremental_upgrade.sql +3 -0
- testgen/template/dbupgrade_helpers/get_tg_revision.sql +2 -0
- testgen/template/exec_cat_tests/ex_cat_build_agg_table_tests.sql +116 -0
- testgen/template/exec_cat_tests/ex_cat_get_distinct_tables.sql +11 -0
- testgen/template/exec_cat_tests/ex_cat_results_parse.sql +69 -0
- testgen/template/exec_cat_tests/ex_cat_retrieve_agg_test_parms.sql +6 -0
- testgen/template/exec_cat_tests/ex_cat_test_query.sql +8 -0
- testgen/template/execution/ex_finalize_test_run_results.sql +37 -0
- testgen/template/execution/ex_get_tests_non_cat.sql +47 -0
- testgen/template/execution/ex_update_test_record_in_testrun_table.sql +27 -0
- testgen/template/execution/ex_write_test_record_to_testrun_table.sql +6 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_no_drops_generic.sql +48 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_num_incr_generic.sql +34 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_above_generic.sql +49 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_within_generic.sql +49 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql +49 -0
- testgen/template/flavors/generic/exec_query_tests/ex_custom_query_generic.sql +39 -0
- testgen/template/flavors/generic/exec_query_tests/ex_data_match_2way_generic.sql +58 -0
- testgen/template/flavors/generic/exec_query_tests/ex_data_match_generic.sql +44 -0
- testgen/template/flavors/generic/exec_query_tests/ex_prior_match_generic.sql +37 -0
- testgen/template/flavors/generic/exec_query_tests/ex_relative_entropy_generic.sql +53 -0
- testgen/template/flavors/generic/exec_query_tests/ex_window_match_no_drops_generic.sql +46 -0
- testgen/template/flavors/generic/exec_query_tests/ex_window_match_same_generic.sql +59 -0
- testgen/template/flavors/generic/profiling/contingency_counts.sql +3 -0
- testgen/template/flavors/generic/validate_tests/ex_get_project_column_list_generic.sql +3 -0
- testgen/template/flavors/mssql/exec_query_tests/ex_relative_entropy_mssql.sql +53 -0
- testgen/template/flavors/mssql/profiling/project_ddf_query_mssql.sql +35 -0
- testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml +246 -0
- testgen/template/flavors/mssql/profiling/project_secondary_profiling_query_mssql.sql +36 -0
- testgen/template/flavors/mssql/setup_profiling_tools/00_drop_existing_functions_mssql.sql +8 -0
- testgen/template/flavors/mssql/setup_profiling_tools/01_create_functions_mssql.sql +12 -0
- testgen/template/flavors/mssql/setup_profiling_tools/02_create_functions_mssql.sql +54 -0
- testgen/template/flavors/mssql/setup_profiling_tools/create_qc_schema_mssql.sql +4 -0
- testgen/template/flavors/mssql/setup_profiling_tools/grant_execute_privileges_mssql.sql +1 -0
- testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_no_drops_postgresql.sql +46 -0
- testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_same_postgresql.sql +59 -0
- testgen/template/flavors/postgresql/profiling/project_ddf_query_postgresql.sql +42 -0
- testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml +225 -0
- testgen/template/flavors/postgresql/profiling/project_secondary_profiling_query_postgresql.sql +28 -0
- testgen/template/flavors/postgresql/setup_profiling_tools/create_functions_postgresql.sql +157 -0
- testgen/template/flavors/postgresql/setup_profiling_tools/create_qc_schema_postgresql.sql +1 -0
- testgen/template/flavors/postgresql/setup_profiling_tools/grant_execute_privileges_postgresql.sql +2 -0
- testgen/template/flavors/redshift/profiling/project_ddf_query_redshift.sql +38 -0
- testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml +221 -0
- testgen/template/flavors/redshift/profiling/project_secondary_profiling_query_redshift.sql +29 -0
- testgen/template/flavors/redshift/setup_profiling_tools/create_functions_redshift.sql +115 -0
- testgen/template/flavors/redshift/setup_profiling_tools/create_qc_schema_redshift.sql +1 -0
- testgen/template/flavors/redshift/setup_profiling_tools/grant_execute_privileges_redshift.sql +2 -0
- testgen/template/flavors/snowflake/profiling/project_ddf_query_snowflake.sql +38 -0
- testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml +220 -0
- testgen/template/flavors/snowflake/profiling/project_secondary_profiling_query_snowflake.sql +29 -0
- testgen/template/flavors/snowflake/setup_profiling_tools/create_functions_snowflake.sql +69 -0
- testgen/template/flavors/snowflake/setup_profiling_tools/create_qc_schema_snowflake.sql +1 -0
- testgen/template/flavors/snowflake/setup_profiling_tools/grant_execute_privileges_snowflake.sql +6 -0
- testgen/template/flavors/trino/profiling/project_profiling_query_trino.yaml +219 -0
- testgen/template/flavors/trino/setup_profiling_tools/create_functions_trino.sql +92 -0
- testgen/template/flavors/trino/setup_profiling_tools/create_qc_schema_trino.sql +1 -0
- testgen/template/gen_funny_cat_tests/gen_test_constant.sql +104 -0
- testgen/template/gen_funny_cat_tests/gen_test_distinct_value_ct.sql +98 -0
- testgen/template/gen_funny_cat_tests/gen_test_row_ct.sql +57 -0
- testgen/template/gen_funny_cat_tests/gen_test_row_ct_pct.sql +59 -0
- testgen/template/generation/gen_delete_old_tests.sql +5 -0
- testgen/template/generation/gen_insert_test_suite.sql +5 -0
- testgen/template/generation/gen_retrieve_or_insert_test_suite.sql +58 -0
- testgen/template/generation/gen_standard_test_type_list.sql +13 -0
- testgen/template/generation/gen_standard_tests.sql +48 -0
- testgen/template/get_entities/get_connection.sql +21 -0
- testgen/template/get_entities/get_connections_list.sql +9 -0
- testgen/template/get_entities/get_latest.sql +4 -0
- testgen/template/get_entities/get_profile.sql +12 -0
- testgen/template/get_entities/get_profile_info.sql +17 -0
- testgen/template/get_entities/get_profile_list.sql +17 -0
- testgen/template/get_entities/get_profile_screen.sql +275 -0
- testgen/template/get_entities/get_project_list.sql +6 -0
- testgen/template/get_entities/get_table_group_list.sql +10 -0
- testgen/template/get_entities/get_test_generation_list.sql +18 -0
- testgen/template/get_entities/get_test_info.sql +41 -0
- testgen/template/get_entities/get_test_results_for_run_cli.sql +16 -0
- testgen/template/get_entities/get_test_run_list.sql +24 -0
- testgen/template/get_entities/get_test_suite.sql +13 -0
- testgen/template/get_entities/get_test_suite_list.sql +18 -0
- testgen/template/get_entities/list_test_types.sql +4 -0
- testgen/template/observability/get_event_data.sql +23 -0
- testgen/template/observability/get_test_results.sql +41 -0
- testgen/template/observability/update_test_results_exported_to_observability.sql +12 -0
- testgen/template/parms/parms_profiling.sql +34 -0
- testgen/template/parms/parms_test_execution.sql +13 -0
- testgen/template/parms/parms_test_gen.sql +23 -0
- testgen/template/profiling/contingency_columns.sql +7 -0
- testgen/template/profiling/datatype_suggestions.sql +56 -0
- testgen/template/profiling/functional_datatype.sql +523 -0
- testgen/template/profiling/functional_tabletype_stage.sql +48 -0
- testgen/template/profiling/functional_tabletype_update.sql +8 -0
- testgen/template/profiling/pii_flag.sql +133 -0
- testgen/template/profiling/profile_anomalies_screen_column.sql +22 -0
- testgen/template/profiling/profile_anomalies_screen_multi_column.sql +58 -0
- testgen/template/profiling/profile_anomalies_screen_table.sql +22 -0
- testgen/template/profiling/profile_anomalies_screen_table_dates.sql +30 -0
- testgen/template/profiling/profile_anomalies_screen_variants.sql +40 -0
- testgen/template/profiling/profile_anomaly_types_get.sql +3 -0
- testgen/template/profiling/project_get_table_sample_count.sql +22 -0
- testgen/template/profiling/project_profile_run_record_insert.sql +8 -0
- testgen/template/profiling/project_profile_run_record_update.sql +5 -0
- testgen/template/profiling/project_profile_run_record_update_status.sql +5 -0
- testgen/template/profiling/project_update_profile_results_to_estimates.sql +32 -0
- testgen/template/profiling/refresh_anomalies.sql +33 -0
- testgen/template/profiling/refresh_data_chars_from_profiling.sql +156 -0
- testgen/template/profiling/secondary_profiling_columns.sql +12 -0
- testgen/template/profiling/secondary_profiling_delete.sql +4 -0
- testgen/template/profiling/secondary_profiling_update.sql +18 -0
- testgen/template/quick_start/populate_target_data.sql +1077 -0
- testgen/template/quick_start/recreate_target_data_schema.sql +167 -0
- testgen/template/quick_start/update_target_data.sql +100 -0
- testgen/template/updates/create_tmp_test_definition.sql +19 -0
- testgen/template/updates/get_test_def_parms.sql +38 -0
- testgen/template/updates/populate_stg_test_definitions.sql +184 -0
- testgen/template/validate_tests/ex_disable_tests_test_definitions.sql +5 -0
- testgen/template/validate_tests/ex_flag_tests_test_definitions.sql +64 -0
- testgen/template/validate_tests/ex_get_project_column_list_generic.sql +3 -0
- testgen/template/validate_tests/ex_get_test_column_list_tg.sql +65 -0
- testgen/template/validate_tests/ex_write_test_val_errors.sql +22 -0
- testgen/ui/__init__.py +0 -0
- testgen/ui/app.py +98 -0
- testgen/ui/assets/dk_logo.svg +46 -0
- testgen/ui/assets/question_mark.png +0 -0
- testgen/ui/assets/scripts.js +68 -0
- testgen/ui/assets/style.css +140 -0
- testgen/ui/bootstrap.py +109 -0
- testgen/ui/components/__init__.py +0 -0
- testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fBBc4.woff2 +0 -0
- testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fChc4EsA.woff2 +0 -0
- testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu4mxK.woff2 +0 -0
- testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu7GxKOzY.woff2 +0 -0
- testgen/ui/components/frontend/css/material-symbols-rounded.css +24 -0
- testgen/ui/components/frontend/css/material-symbols-rounded.woff2 +0 -0
- testgen/ui/components/frontend/css/roboto-font-faces.css +35 -0
- testgen/ui/components/frontend/css/shared.css +36 -0
- testgen/ui/components/frontend/img/dk_logo.svg +46 -0
- testgen/ui/components/frontend/index.html +17 -0
- testgen/ui/components/frontend/js/components/breadcrumbs.js +86 -0
- testgen/ui/components/frontend/js/components/button.js +66 -0
- testgen/ui/components/frontend/js/components/location.js +62 -0
- testgen/ui/components/frontend/js/components/select.js +75 -0
- testgen/ui/components/frontend/js/components/sidebar.js +358 -0
- testgen/ui/components/frontend/js/main.js +99 -0
- testgen/ui/components/frontend/js/streamlit.js +19 -0
- testgen/ui/components/frontend/js/van.min.js +1 -0
- testgen/ui/components/utils/__init__.py +0 -0
- testgen/ui/components/utils/callbacks.py +51 -0
- testgen/ui/components/utils/component.py +13 -0
- testgen/ui/components/widgets/__init__.py +6 -0
- testgen/ui/components/widgets/breadcrumbs.py +32 -0
- testgen/ui/components/widgets/location.py +65 -0
- testgen/ui/components/widgets/modal.py +97 -0
- testgen/ui/components/widgets/sidebar.py +69 -0
- testgen/ui/navigation/__init__.py +0 -0
- testgen/ui/navigation/menu.py +42 -0
- testgen/ui/navigation/page.py +20 -0
- testgen/ui/navigation/router.py +63 -0
- testgen/ui/queries/__init__.py +0 -0
- testgen/ui/queries/authentication_queries.py +47 -0
- testgen/ui/queries/connection_queries.py +121 -0
- testgen/ui/queries/profiling_queries.py +148 -0
- testgen/ui/queries/project_queries.py +9 -0
- testgen/ui/queries/table_group_queries.py +186 -0
- testgen/ui/queries/test_definition_queries.py +270 -0
- testgen/ui/queries/test_run_queries.py +32 -0
- testgen/ui/queries/test_suite_queries.py +145 -0
- testgen/ui/scripts/__init__.py +0 -0
- testgen/ui/scripts/patch_streamlit.py +111 -0
- testgen/ui/services/__init__.py +0 -0
- testgen/ui/services/authentication_service.py +119 -0
- testgen/ui/services/connection_service.py +220 -0
- testgen/ui/services/database_service.py +282 -0
- testgen/ui/services/form_service.py +1008 -0
- testgen/ui/services/javascript_service.py +44 -0
- testgen/ui/services/query_service.py +316 -0
- testgen/ui/services/string_service.py +12 -0
- testgen/ui/services/table_group_service.py +130 -0
- testgen/ui/services/test_definition_service.py +117 -0
- testgen/ui/services/test_run_service.py +13 -0
- testgen/ui/services/test_suite_service.py +76 -0
- testgen/ui/services/toolbar_service.py +77 -0
- testgen/ui/session.py +46 -0
- testgen/ui/views/__init__.py +0 -0
- testgen/ui/views/app_log_modal.py +92 -0
- testgen/ui/views/connections.py +72 -0
- testgen/ui/views/connections_base.py +367 -0
- testgen/ui/views/login.py +40 -0
- testgen/ui/views/not_found.py +16 -0
- testgen/ui/views/overview.py +34 -0
- testgen/ui/views/profiling_anomalies.py +501 -0
- testgen/ui/views/profiling_details.py +335 -0
- testgen/ui/views/profiling_modal.py +40 -0
- testgen/ui/views/profiling_results.py +206 -0
- testgen/ui/views/profiling_summary.py +177 -0
- testgen/ui/views/project_settings.py +74 -0
- testgen/ui/views/table_groups.py +530 -0
- testgen/ui/views/test_definitions.py +1020 -0
- testgen/ui/views/test_results.py +908 -0
- testgen/ui/views/test_runs.py +195 -0
- testgen/ui/views/test_suites.py +545 -0
- testgen/utils/__init__.py +0 -0
- testgen/utils/plugins.py +17 -0
- testgen/utils/singleton.py +14 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
SELECT '{PROJECT_CODE}' as project_code,
|
|
2
|
+
'{TEST_TYPE}' as test_type,
|
|
3
|
+
'{TEST_DEFINITION_ID}' as test_definition_id,
|
|
4
|
+
'{TEST_SUITE}' as test_suite,
|
|
5
|
+
'{TEST_RUN_ID}' as test_run_id,
|
|
6
|
+
'{RUN_DATE}' as test_time,
|
|
7
|
+
'{START_TIME}' as starttime,
|
|
8
|
+
CURRENT_TIMESTAMP as endtime,
|
|
9
|
+
'{SCHEMA_NAME}' as schema_name,
|
|
10
|
+
'{TABLE_NAME}' as table_name,
|
|
11
|
+
'{COLUMN_NAME_NO_QUOTES}' as column_names,
|
|
12
|
+
'{SKIP_ERRORS}' as threshold_value,
|
|
13
|
+
{SKIP_ERRORS} as skip_errors,
|
|
14
|
+
'{INPUT_PARAMETERS}' as input_parameters,
|
|
15
|
+
CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
|
|
16
|
+
CASE
|
|
17
|
+
WHEN COUNT(*) > 0 THEN
|
|
18
|
+
CONCAT(
|
|
19
|
+
CONCAT( CAST(COUNT(*) AS VARCHAR), ' error(s) identified, ' ),
|
|
20
|
+
CONCAT(
|
|
21
|
+
CASE
|
|
22
|
+
WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
|
|
23
|
+
ELSE 'within limit of '
|
|
24
|
+
END,
|
|
25
|
+
'{SKIP_ERRORS}.'
|
|
26
|
+
)
|
|
27
|
+
)
|
|
28
|
+
ELSE 'No errors found.'
|
|
29
|
+
END AS result_message,
|
|
30
|
+
COUNT(*) as result_measure,
|
|
31
|
+
'{SUBSET_DISPLAY}' as subset_condition,
|
|
32
|
+
NULL as result_query
|
|
33
|
+
FROM (
|
|
34
|
+
(
|
|
35
|
+
SELECT 'Prior Timeframe' as missing_from, {COLUMN_NAME}
|
|
36
|
+
FROM {SCHEMA_NAME}.{TABLE_NAME}
|
|
37
|
+
WHERE {SUBSET_CONDITION}
|
|
38
|
+
AND {WINDOW_DATE_COLUMN} >= DATEADD("day", - {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}))
|
|
39
|
+
EXCEPT
|
|
40
|
+
SELECT 'Prior Timeframe' as missing_from, {COLUMN_NAME}
|
|
41
|
+
FROM {SCHEMA_NAME}.{TABLE_NAME}
|
|
42
|
+
WHERE {SUBSET_CONDITION}
|
|
43
|
+
AND {WINDOW_DATE_COLUMN} >= DATEADD("day", - 2 * {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}))
|
|
44
|
+
AND {WINDOW_DATE_COLUMN} < DATEADD("day", - {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}))
|
|
45
|
+
)
|
|
46
|
+
UNION ALL
|
|
47
|
+
(
|
|
48
|
+
SELECT 'Latest Timeframe' as missing_from, {COLUMN_NAME}
|
|
49
|
+
FROM {SCHEMA_NAME}.{TABLE_NAME}
|
|
50
|
+
WHERE {SUBSET_CONDITION}
|
|
51
|
+
AND {WINDOW_DATE_COLUMN} >= DATEADD("day", - 2 * {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}))
|
|
52
|
+
AND {WINDOW_DATE_COLUMN} < DATEADD("day", - {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}))
|
|
53
|
+
EXCEPT
|
|
54
|
+
SELECT 'Latest Timeframe' as missing_from, {COLUMN_NAME}
|
|
55
|
+
FROM {SCHEMA_NAME}.{TABLE_NAME}
|
|
56
|
+
WHERE {SUBSET_CONDITION}
|
|
57
|
+
AND {WINDOW_DATE_COLUMN} >= DATEADD("day", - {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}))
|
|
58
|
+
)
|
|
59
|
+
) test;
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
-- Relative Entropy: measured by Jensen-Shannon Divergence
|
|
2
|
+
-- Smoothed and normalized version of KL divergence,
|
|
3
|
+
-- with scores between 0 (identical) and 1 (maximally different),
|
|
4
|
+
-- when using the base-2 logarithm. Formula is:
|
|
5
|
+
-- 0.5 * kl_divergence(p, m) + 0.5 * kl_divergence(q, m)
|
|
6
|
+
-- Log base 2 of x = LN(x)/LN(2)
|
|
7
|
+
WITH latest_ver
|
|
8
|
+
AS ( SELECT {CONCAT_COLUMNS} as category,
|
|
9
|
+
CAST(COUNT(*) as FLOAT) / CAST(SUM(COUNT(*)) OVER () as FLOAT) AS pct_of_total
|
|
10
|
+
FROM {SCHEMA_NAME}.{TABLE_NAME} v1
|
|
11
|
+
WHERE {SUBSET_CONDITION}
|
|
12
|
+
GROUP BY {COLUMN_NAME_NO_QUOTES} ),
|
|
13
|
+
older_ver
|
|
14
|
+
AS ( SELECT {CONCAT_MATCH_GROUPBY} as category,
|
|
15
|
+
CAST(COUNT(*) as FLOAT) / CAST(SUM(COUNT(*)) OVER () as FLOAT) AS pct_of_total
|
|
16
|
+
FROM {MATCH_SCHEMA_NAME}.{TABLE_NAME} v2
|
|
17
|
+
WHERE {MATCH_SUBSET_CONDITION}
|
|
18
|
+
GROUP BY {MATCH_GROUPBY_NAMES} ),
|
|
19
|
+
dataset
|
|
20
|
+
AS ( SELECT COALESCE(l.category, o.category) AS category,
|
|
21
|
+
COALESCE(o.pct_of_total, 0.0000001) AS old_pct,
|
|
22
|
+
COALESCE(l.pct_of_total, 0.0000001) AS new_pct,
|
|
23
|
+
(COALESCE(o.pct_of_total, 0.0000001)
|
|
24
|
+
+ COALESCE(l.pct_of_total, 0.0000001))/2.0 AS avg_pct
|
|
25
|
+
FROM latest_ver l
|
|
26
|
+
FULL JOIN older_ver o
|
|
27
|
+
ON (l.category = o.category) )
|
|
28
|
+
SELECT '{PROJECT_CODE}' as project_code,
|
|
29
|
+
'{TEST_TYPE}' as test_type,
|
|
30
|
+
'{TEST_DEFINITION_ID}' as test_definition_id,
|
|
31
|
+
'{TEST_SUITE}' as test_suite,
|
|
32
|
+
'{TEST_RUN_ID}' as test_run_id,
|
|
33
|
+
'{RUN_DATE}' as test_time,
|
|
34
|
+
'{START_TIME}' as starttime,
|
|
35
|
+
CURRENT_TIMESTAMP as endtime,
|
|
36
|
+
'{SCHEMA_NAME}' as schema_name,
|
|
37
|
+
'{TABLE_NAME}' as table_name,
|
|
38
|
+
'{COLUMN_NAME_NO_QUOTES}' as column_names,
|
|
39
|
+
-- '{GROUPBY_NAMES}' as column_names,
|
|
40
|
+
'{THRESHOLD_VALUE}' as threshold_value,
|
|
41
|
+
NULL as skip_errors,
|
|
42
|
+
'{INPUT_PARAMETERS}' as input_parameters,
|
|
43
|
+
CASE WHEN js_divergence > {THRESHOLD_VALUE} THEN 0 ELSE 1 END as result_code,
|
|
44
|
+
CONCAT('Divergence Level: ',
|
|
45
|
+
CONCAT(CAST(js_divergence AS VARCHAR),
|
|
46
|
+
', Threshold: {THRESHOLD_VALUE}.')) as result_message,
|
|
47
|
+
js_divergence as result_measure,
|
|
48
|
+
'{SUBSET_DISPLAY}' as subset_condition,
|
|
49
|
+
NULL as result_query
|
|
50
|
+
FROM (
|
|
51
|
+
SELECT 0.5 * ABS(SUM(new_pct * LOG(new_pct/avg_pct)/LOG(2)))
|
|
52
|
+
+ 0.5 * ABS(SUM(old_pct * LOG(old_pct/avg_pct)/LOG(2))) as js_divergence
|
|
53
|
+
FROM dataset ) rslt;
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
SELECT '{PROJECT_CODE}' as project_code,
|
|
2
|
+
CURRENT_TIMESTAMP as refresh_timestamp,
|
|
3
|
+
c.table_schema,
|
|
4
|
+
c.table_name,
|
|
5
|
+
c.column_name,
|
|
6
|
+
CASE
|
|
7
|
+
WHEN c.data_type = 'datetime' THEN 'datetime'
|
|
8
|
+
WHEN c.data_type = 'datetime2' THEN 'datetime'
|
|
9
|
+
WHEN c.data_type = 'varchar'
|
|
10
|
+
THEN 'varchar(' + CAST(c.character_maximum_length AS VARCHAR) + ')'
|
|
11
|
+
WHEN c.data_type = 'char' THEN 'char(' + CAST(c.character_maximum_length AS VARCHAR) + ')'
|
|
12
|
+
WHEN c.data_type = 'numeric' THEN 'numeric(' + CAST(c.numeric_precision AS VARCHAR) + ',' +
|
|
13
|
+
CAST(c.numeric_scale AS VARCHAR) + ')'
|
|
14
|
+
ELSE c.data_type END AS data_type,
|
|
15
|
+
c.character_maximum_length,
|
|
16
|
+
c.ordinal_position,
|
|
17
|
+
CASE
|
|
18
|
+
WHEN LOWER(c.data_type) LIKE '%char%'
|
|
19
|
+
THEN 'A'
|
|
20
|
+
WHEN c.data_type = 'bit'
|
|
21
|
+
THEN 'B'
|
|
22
|
+
WHEN c.data_type = 'date'
|
|
23
|
+
OR c.data_type LIKE 'datetime%'
|
|
24
|
+
THEN 'D'
|
|
25
|
+
WHEN c.data_type like 'time%'
|
|
26
|
+
THEN 'T'
|
|
27
|
+
WHEN c.data_type IN ('bigint', 'double precision', 'integer', 'smallint', 'real')
|
|
28
|
+
OR c.data_type LIKE 'numeric%'
|
|
29
|
+
THEN 'N'
|
|
30
|
+
ELSE
|
|
31
|
+
'X' END AS general_type,
|
|
32
|
+
case when c.numeric_scale > 0 then 1 else 0 END as is_decimal
|
|
33
|
+
FROM information_schema.columns c
|
|
34
|
+
WHERE c.table_schema = '{DATA_SCHEMA}' {TABLE_CRITERIA}
|
|
35
|
+
ORDER BY c.table_schema, c.table_name, c.ordinal_position;
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
---
|
|
2
|
+
strTemplate01_sampling: "SELECT TOP {SAMPLE_SIZE} "
|
|
3
|
+
strTemplate01_else: "SELECT "
|
|
4
|
+
strTemplate02_all: |
|
|
5
|
+
{CONNECTION_ID} as connection_id,
|
|
6
|
+
'{PROJECT_CODE}' as project_code,
|
|
7
|
+
'{TABLE_GROUPS_ID}' as table_groups_id,
|
|
8
|
+
'{DATA_SCHEMA}' AS schema_name,
|
|
9
|
+
'{RUN_DATE}' AS run_date,
|
|
10
|
+
'{DATA_TABLE}' AS table_name,
|
|
11
|
+
{COL_POS} AS position,
|
|
12
|
+
'{COL_NAME_SANITIZED}' AS column_name,
|
|
13
|
+
'{COL_TYPE}' AS column_type,
|
|
14
|
+
'{COL_GEN_TYPE}' AS general_type,
|
|
15
|
+
COUNT(*) AS record_ct,
|
|
16
|
+
COUNT("{COL_NAME}") AS value_ct,
|
|
17
|
+
COUNT(DISTINCT "{COL_NAME}") AS distinct_value_ct,
|
|
18
|
+
SUM(CASE WHEN "{COL_NAME}" IS NULL THEN 1 ELSE 0 END) AS null_value_ct,
|
|
19
|
+
strTemplate03_ADN: MIN(LEN("{COL_NAME}")) AS min_length,
|
|
20
|
+
MAX(LEN("{COL_NAME}")) AS max_length,
|
|
21
|
+
AVG(CAST(NULLIF(LEN("{COL_NAME}"), 0) AS FLOAT)) AS avg_length,
|
|
22
|
+
strTemplate03_else: NULL as min_length,
|
|
23
|
+
NULL as max_length,
|
|
24
|
+
NULL as avg_length,
|
|
25
|
+
strTemplate04_A: SUM(CASE
|
|
26
|
+
WHEN LTRIM(RTRIM("{COL_NAME}")) LIKE '0([.]0*)' THEN 1 ELSE 0
|
|
27
|
+
END) AS zero_value_ct,
|
|
28
|
+
strTemplate04_N: CAST(SUM( 1 - ABS(SIGN("{COL_NAME}")))AS BIGINT ) AS zero_value_ct,
|
|
29
|
+
strTemplate04_else: NULL as zero_value_ct,
|
|
30
|
+
strTemplate05_A: COUNT(DISTINCT UPPER(REPLACE(TRANSLATE("{COL_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ',''))) as distinct_std_value_ct,
|
|
31
|
+
SUM(CASE
|
|
32
|
+
WHEN "{COL_NAME}" = '' THEN 1
|
|
33
|
+
ELSE 0
|
|
34
|
+
END) AS zero_length_ct,
|
|
35
|
+
SUM( CASE
|
|
36
|
+
WHEN "{COL_NAME}" BETWEEN ' !' AND '!' THEN 1
|
|
37
|
+
ELSE 0
|
|
38
|
+
END ) AS lead_space_ct,
|
|
39
|
+
SUM( CASE WHEN "{COL_NAME}" LIKE '"%"' OR "{COL_NAME}" LIKE '''%''' THEN 1 ELSE 0 END ) as quoted_value_ct,
|
|
40
|
+
SUM( CASE WHEN "{COL_NAME}" LIKE '%[0-9]%' THEN 1 ELSE 0 END ) as includes_digit_ct,
|
|
41
|
+
SUM( CASE
|
|
42
|
+
WHEN "{COL_NAME}" IN ('.', '?') OR "{COL_NAME}" LIKE ' ' THEN 1
|
|
43
|
+
WHEN LEN("{COL_NAME}") > 1
|
|
44
|
+
AND ( LOWER("{COL_NAME}") LIKE '%..%' OR LOWER("{COL_NAME}") LIKE '%--%'
|
|
45
|
+
OR (LEN(REPLACE("{COL_NAME}", '0', ''))= 0 )
|
|
46
|
+
OR (LEN(REPLACE("{COL_NAME}", '9', ''))= 0 )
|
|
47
|
+
OR (LEN(REPLACE(LOWER("{COL_NAME}"), 'x', ''))= 0 )
|
|
48
|
+
OR (LEN(REPLACE(LOWER("{COL_NAME}"), 'z', ''))= 0 )
|
|
49
|
+
) THEN 1
|
|
50
|
+
WHEN LOWER("{COL_NAME}") IN ('blank','error','missing','tbd',
|
|
51
|
+
'n/a','#na','none','null','unknown') THEN 1
|
|
52
|
+
WHEN LOWER("{COL_NAME}") IN ('(blank)','(error)','(missing)','(tbd)',
|
|
53
|
+
'(n/a)','(#na)','(none)','(null)','(unknown)') THEN 1
|
|
54
|
+
WHEN LOWER("{COL_NAME}") IN ('[blank]','[error]','[missing]','[tbd]',
|
|
55
|
+
'[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1
|
|
56
|
+
ELSE 0
|
|
57
|
+
END ) AS filled_value_ct,
|
|
58
|
+
LEFT(MIN(NULLIF("{COL_NAME}", '') COLLATE Latin1_General_BIN ), 100) AS min_text,
|
|
59
|
+
LEFT(MAX(NULLIF("{COL_NAME}", '') COLLATE Latin1_General_BIN ), 100) AS max_text,
|
|
60
|
+
SUM({DATA_QC_SCHEMA}.fndk_isnum(LEFT("{COL_NAME}", 31))) AS numeric_ct,
|
|
61
|
+
SUM({DATA_QC_SCHEMA}.fndk_isdate(LEFT("{COL_NAME}", 26))) AS date_ct,
|
|
62
|
+
CASE
|
|
63
|
+
WHEN CAST(SUM( CASE WHEN UPPER("{COL_NAME}") LIKE '[1-9]% [A-Z]% %'
|
|
64
|
+
AND CHARINDEX(' ', "{COL_NAME}") BETWEEN 2 and 6 THEN 1
|
|
65
|
+
END ) as FLOAT) /CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.8 THEN 'STREET_ADDR'
|
|
66
|
+
WHEN CAST(SUM(CASE WHEN "{COL_NAME}" IN ('AL','AK','AS','AZ','AR','CA','CO','CT','DE','DC','FM','FL','GA','GU','HI','ID','IL','IN','IA','KS','KY','LA','ME','MH','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','MP','OH','OK','OR','PW','PA','PR','RI','SC','SD','TN','TX','UT','VT','VI','VA','WA','WV','WI','WY','AE','AP','AA')
|
|
67
|
+
THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'STATE_USA'
|
|
68
|
+
WHEN CAST(SUM( CASE WHEN ("{COL_NAME}" LIKE '[+]1%[0-9][0-9][0-9]%[-. ][0-9][0-9][0-9]%[0-9][0-9][0-9][0-9,0-9,0-9,0-9,0-9,0-9]' AND "{COL_NAME}" NOT LIKE '%[^0-9+()-]%')
|
|
69
|
+
OR ("{COL_NAME}" LIKE '[+]1%[0-9][0-9][0-9][- ][0-9][0-9][0-9][- ][0-9][0-9][0-9][0-9]' AND "{COL_NAME}" NOT LIKE '%[^0-9+-]%')
|
|
70
|
+
THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'PHONE_USA'
|
|
71
|
+
WHEN CAST(SUM( CASE WHEN "{COL_NAME}" LIKE '%[_a-zA-Z0-9.-]%@%[a-zA-Z0-9.-]%.[a-zA-Z][a-zA-Z]%'
|
|
72
|
+
THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'EMAIL'
|
|
73
|
+
WHEN CAST(SUM( CASE WHEN TRANSLATE("{COL_NAME}",'012345678','999999999') IN ('99999', '999999999', '99999-9999')
|
|
74
|
+
THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'ZIP_USA'
|
|
75
|
+
WHEN CAST(SUM( CASE WHEN "{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS NOT LIKE ' %'
|
|
76
|
+
AND "{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '[a-z0-9 _-]%'
|
|
77
|
+
AND ("{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%.txt'
|
|
78
|
+
OR "{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%.csv'
|
|
79
|
+
OR "{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%.tsv'
|
|
80
|
+
OR "{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%.dat'
|
|
81
|
+
OR "{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%.doc'
|
|
82
|
+
OR "{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%.pdf'
|
|
83
|
+
OR "{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%.xlsx')
|
|
84
|
+
THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'FILE_NAME'
|
|
85
|
+
WHEN CAST(SUM( CASE WHEN "{COL_NAME}" LIKE '[0-9][0-9][0-9][0-9][- ][0-9][0-9][0-9][0-9][- ][0-9][0-9][0-9][0-9][- ][0-9][0-9][0-9][0-9]'
|
|
86
|
+
THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.8 THEN 'CREDIT_CARD'
|
|
87
|
+
WHEN CAST(SUM( CASE WHEN ( "{COL_NAME}" LIKE '%,%,%,%'
|
|
88
|
+
OR "{COL_NAME}" LIKE '%|%|%|%'
|
|
89
|
+
OR "{COL_NAME}" LIKE '%^%^%^%'
|
|
90
|
+
OR "{COL_NAME}" LIKE '%' + CHAR(9) + '%' + CHAR(9) + '%' + CHAR(9) + '%' )
|
|
91
|
+
AND NOT ( "{COL_NAME}" LIKE '% and %'
|
|
92
|
+
OR "{COL_NAME}" LIKE '% but %'
|
|
93
|
+
OR "{COL_NAME}" LIKE '% or %'
|
|
94
|
+
OR "{COL_NAME}" LIKE '% yet %' )
|
|
95
|
+
AND ISNULL(CAST(LEN("{COL_NAME}") - LEN(REPLACE("{COL_NAME}", ',', '')) as FLOAT)
|
|
96
|
+
/ CAST(NULLIF(LEN("{COL_NAME}") - LEN(REPLACE("{COL_NAME}", ' ', '')), 0) as FLOAT), 1) > 0.6
|
|
97
|
+
THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.8 THEN 'DELIMITED_DATA'
|
|
98
|
+
WHEN CAST(SUM ( CASE WHEN "{COL_NAME}" LIKE '[0-8][0-9][0-9][- ][0-9][0-9][- ][0-9][0-9][0-9][0-9]'
|
|
99
|
+
AND LEFT("{COL_NAME}", 3) NOT BETWEEN '734' AND '749'
|
|
100
|
+
AND LEFT("{COL_NAME}", 3) <> '666' THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'SSN'
|
|
101
|
+
END as std_pattern_match,
|
|
102
|
+
strTemplate05_else: NULL as distinct_std_value_ct,
|
|
103
|
+
NULL as zero_length_ct,
|
|
104
|
+
NULL as lead_space_ct,
|
|
105
|
+
NULL as quoted_value_ct,
|
|
106
|
+
NULL as includes_digit_ct,
|
|
107
|
+
NULL as filled_value_ct,
|
|
108
|
+
NULL as min_text,
|
|
109
|
+
NULL as max_text,
|
|
110
|
+
NULL as numeric_ct,
|
|
111
|
+
NULL as date_ct,
|
|
112
|
+
NULL as std_pattern_match,
|
|
113
|
+
strTemplate06_A_patterns: ( SELECT LEFT(STRING_AGG(pattern, ' | ') WITHIN GROUP (ORDER BY ct DESC), 1000) AS concat_pats
|
|
114
|
+
FROM (
|
|
115
|
+
SELECT TOP 5 CAST(COUNT(*) AS VARCHAR(10)) + ' | ' + pattern AS pattern,
|
|
116
|
+
COUNT(*) AS ct
|
|
117
|
+
FROM ( SELECT TRANSLATE("{COL_NAME}" COLLATE Latin1_General_BIN,
|
|
118
|
+
'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',
|
|
119
|
+
'aaaaaaaaaaaaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAAAAAANNNNNNNNNN' )
|
|
120
|
+
AS pattern
|
|
121
|
+
FROM {DATA_SCHEMA}.{DATA_TABLE} WITH (NOLOCK)
|
|
122
|
+
WHERE "{COL_NAME}" > ' ' AND ((SELECT MAX(LEN("{COL_NAME}"))
|
|
123
|
+
FROM {DATA_SCHEMA}.{DATA_TABLE} WITH (NOLOCK)) BETWEEN 3 and {PARM_MAX_PATTERN_LENGTH})) p
|
|
124
|
+
GROUP BY pattern
|
|
125
|
+
HAVING pattern > ' '
|
|
126
|
+
ORDER BY COUNT(*) DESC
|
|
127
|
+
) ps) AS top_patterns,
|
|
128
|
+
strTemplate06_else: NULL as top_patterns,
|
|
129
|
+
strTemplate07_A_freq: ( SELECT LEFT(STRING_AGG(val, ' | ') WITHIN GROUP (ORDER BY ct DESC, val ASC), 1000) as concat_vals
|
|
130
|
+
FROM (
|
|
131
|
+
SELECT TOP 10 CAST(COUNT(*) as VARCHAR(10)) + ' | ' + "{COL_NAME}" as val,
|
|
132
|
+
COUNT(*) as ct
|
|
133
|
+
FROM {DATA_SCHEMA}.{DATA_TABLE} WITH (NOLOCK)
|
|
134
|
+
WHERE "{COL_NAME}" > ' '
|
|
135
|
+
GROUP BY "{COL_NAME}"
|
|
136
|
+
HAVING "{COL_NAME}" > ' '
|
|
137
|
+
ORDER BY COUNT(*) DESC
|
|
138
|
+
) ps
|
|
139
|
+
) AS top_freq_values,
|
|
140
|
+
strTemplate07_else: NULL as top_freq_values,
|
|
141
|
+
strTemplate08_N: MIN("{COL_NAME}") AS min_value,
|
|
142
|
+
MIN(CASE WHEN "{COL_NAME}" > 0 THEN "{COL_NAME}" ELSE NULL END) AS min_value_over_0,
|
|
143
|
+
MAX("{COL_NAME}") AS max_value,
|
|
144
|
+
AVG(CAST("{COL_NAME}" AS FLOAT)) AS avg_value,
|
|
145
|
+
STDEV(CAST("{COL_NAME}" AS FLOAT)) AS stdev_value,
|
|
146
|
+
MIN(pct_25) as percentile_25,
|
|
147
|
+
MIN(pct_50) as percentile_50,
|
|
148
|
+
MIN(pct_75) as percentile_75,
|
|
149
|
+
strTemplate08_else: NULL as min_value,
|
|
150
|
+
NULL as min_value_over_0,
|
|
151
|
+
NULL as max_value,
|
|
152
|
+
NULL as avg_value,
|
|
153
|
+
NULL as stdev_value,
|
|
154
|
+
NULL as percentile_25,
|
|
155
|
+
NULL as percentile_50,
|
|
156
|
+
NULL as percentile_75,
|
|
157
|
+
strTemplate10_N_dec: SUM(ROUND(("{COL_NAME}" % 1), 5)) as fractional_sum,
|
|
158
|
+
|
|
159
|
+
strTemplate10_else: NULL as fractional_sum,
|
|
160
|
+
|
|
161
|
+
strTemplate11_D: CASE
|
|
162
|
+
WHEN MIN("{COL_NAME}") IS NULL THEN NULL
|
|
163
|
+
ELSE CASE WHEN MIN("{COL_NAME}") >= CAST('0001-01-01' as date) THEN MIN("{COL_NAME}") ELSE CAST('0001-01-01' as date) END
|
|
164
|
+
END as min_date,
|
|
165
|
+
MAX("{COL_NAME}") as max_date,
|
|
166
|
+
SUM(CASE
|
|
167
|
+
WHEN DATEDIFF(month, "{COL_NAME}", '{RUN_DATE}') > 12 THEN 1
|
|
168
|
+
ELSE 0
|
|
169
|
+
END) AS before_1yr_date_ct,
|
|
170
|
+
SUM(CASE
|
|
171
|
+
WHEN DATEDIFF(month, "{COL_NAME}", '{RUN_DATE}') > 60 THEN 1
|
|
172
|
+
ELSE 0
|
|
173
|
+
END) AS before_5yr_date_ct,
|
|
174
|
+
SUM(CASE
|
|
175
|
+
WHEN DATEDIFF(month, "{COL_NAME}", '{RUN_DATE}') > 240 THEN 1
|
|
176
|
+
ELSE 0
|
|
177
|
+
END) AS before_20yr_date_ct,
|
|
178
|
+
SUM(CASE
|
|
179
|
+
WHEN DATEDIFF(day, "{COL_NAME}", '{RUN_DATE}') BETWEEN 0 AND 365 THEN 1
|
|
180
|
+
ELSE 0
|
|
181
|
+
END) AS within_1yr_date_ct,
|
|
182
|
+
SUM(CASE
|
|
183
|
+
WHEN DATEDIFF(day, "{COL_NAME}", '{RUN_DATE}') BETWEEN 0 AND 30 THEN 1
|
|
184
|
+
ELSE 0
|
|
185
|
+
END) AS within_1mo_date_ct,
|
|
186
|
+
SUM(CASE
|
|
187
|
+
WHEN "{COL_NAME}" > '{RUN_DATE}' THEN 1 ELSE 0
|
|
188
|
+
END) AS future_date_ct,
|
|
189
|
+
COUNT(DISTINCT DATEDIFF(day, "{COL_NAME}", '{RUN_DATE}' ) ) as date_days_present,
|
|
190
|
+
COUNT(DISTINCT DATEDIFF(week, "{COL_NAME}", '{RUN_DATE}' ) ) as date_weeks_present,
|
|
191
|
+
COUNT(DISTINCT DATEDIFF(month, "{COL_NAME}", '{RUN_DATE}' ) ) as date_months_present,
|
|
192
|
+
|
|
193
|
+
strTemplate11_else: NULL as min_date,
|
|
194
|
+
NULL as max_date,
|
|
195
|
+
NULL as before_1yr_date_ct,
|
|
196
|
+
NULL as before_5yr_date_ct,
|
|
197
|
+
NULL as before_20yr_date_ct,
|
|
198
|
+
NULL as within_1yr_date_ct,
|
|
199
|
+
NULL as within_1mo_date_ct,
|
|
200
|
+
NULL as future_date_ct,
|
|
201
|
+
NULL as date_days_present,
|
|
202
|
+
NULL as date_weeks_present,
|
|
203
|
+
NULL as date_months_present,
|
|
204
|
+
|
|
205
|
+
strTemplate12_B: SUM(CAST("{COL_NAME}" AS INTEGER)) AS boolean_true_ct,
|
|
206
|
+
|
|
207
|
+
strTemplate12_else: NULL as boolean_true_ct,
|
|
208
|
+
|
|
209
|
+
strTemplate13_ALL: NULL AS datatype_suggestion,
|
|
210
|
+
strTemplate14_A_do_patterns: ( SELECT COUNT(DISTINCT TRANSLATE("{COL_NAME}" COLLATE Latin1_General_BIN,
|
|
211
|
+
'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',
|
|
212
|
+
'aaaaaaaaaaaaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAAAAAANNNNNNNNNN'
|
|
213
|
+
)
|
|
214
|
+
) AS pattern_ct
|
|
215
|
+
FROM {DATA_SCHEMA}.{DATA_TABLE} WITH (NOLOCK)
|
|
216
|
+
WHERE "{COL_NAME}" > ' ' ) AS distinct_pattern_ct,
|
|
217
|
+
SUM(CAST(SIGN(LEN(RTRIM(LTRIM("{COL_NAME}"))) - LEN(REPLACE(RTRIM(LTRIM("{COL_NAME}")),' ',''))) AS BIGINT)) AS embedded_space_ct,
|
|
218
|
+
AVG(CAST(LEN(RTRIM(LTRIM("{COL_NAME}"))) - LEN(REPLACE(RTRIM(LTRIM("{COL_NAME}")),' ','')) AS FLOAT)) AS avg_embedded_spaces,
|
|
219
|
+
|
|
220
|
+
strTemplate14_A_no_patterns: NULL as distinct_pattern_ct,
|
|
221
|
+
SUM(CAST(SIGN(LEN(RTRIM(LTRIM("{COL_NAME}"))) - LEN(REPLACE(RTRIM(LTRIM("{COL_NAME}")),' ',''))) AS BIGINT)) AS embedded_space_ct,
|
|
222
|
+
AVG(CAST(LEN(RTRIM(LTRIM("{COL_NAME}"))) - LEN(REPLACE(RTRIM(LTRIM("{COL_NAME}")),' ','')) AS FLOAT)) AS avg_embedded_spaces,
|
|
223
|
+
|
|
224
|
+
strTemplate14_else: NULL as distinct_pattern_ct,
|
|
225
|
+
NULL as embedded_space_ct,
|
|
226
|
+
NULL as avg_embedded_spaces,
|
|
227
|
+
|
|
228
|
+
strTemplate15_ALL: NULL as functional_data_type,
|
|
229
|
+
NULL as functional_table_type,
|
|
230
|
+
|
|
231
|
+
strTemplate16_ALL: " '{PROFILE_RUN_ID}' as profile_run_id"
|
|
232
|
+
|
|
233
|
+
strTemplate98_sampling: ' FROM {DATA_SCHEMA}.{DATA_TABLE} WITH (NOLOCK)'
|
|
234
|
+
|
|
235
|
+
strTemplate98_else: ' FROM {DATA_SCHEMA}.{DATA_TABLE} WITH (NOLOCK)'
|
|
236
|
+
|
|
237
|
+
strTemplate99_N: |
|
|
238
|
+
, (SELECT TOP 1
|
|
239
|
+
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_25,
|
|
240
|
+
PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_50,
|
|
241
|
+
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_75
|
|
242
|
+
FROM {DATA_SCHEMA}.{DATA_TABLE} WITH (NOLOCK)) pctile
|
|
243
|
+
|
|
244
|
+
strTemplate99_else: ' '
|
|
245
|
+
|
|
246
|
+
strTemplate100_sampling: ' ORDER BY RAND()'
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
-- Get Freqs for selected columns
|
|
2
|
+
WITH ranked_vals
|
|
3
|
+
AS
|
|
4
|
+
(SELECT "{COL_NAME}",
|
|
5
|
+
COUNT(*) AS ct,
|
|
6
|
+
ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC) AS rn
|
|
7
|
+
FROM {DATA_SCHEMA}.{DATA_TABLE}
|
|
8
|
+
WHERE "{COL_NAME}" > ' '
|
|
9
|
+
GROUP BY "{COL_NAME}"
|
|
10
|
+
),
|
|
11
|
+
consol_vals
|
|
12
|
+
AS (
|
|
13
|
+
SELECT COALESCE (CASE WHEN rn <= 10 THEN '| ' + "{COL_NAME}" + ' | ' + CAST (ct AS VARCHAR)
|
|
14
|
+
ELSE NULL
|
|
15
|
+
END,
|
|
16
|
+
'| Other Values (' + CAST ( CAST(COUNT (DISTINCT CAST ("{COL_NAME}" as VARCHAR)) AS VARCHAR ) + ') | '
|
|
17
|
+
+ CAST (SUM (ct) as VARCHAR) AS VARCHAR)) AS val,
|
|
18
|
+
MIN (rn) as min_rn
|
|
19
|
+
FROM ranked_vals
|
|
20
|
+
GROUP BY CASE WHEN rn <= 10 THEN '| ' + "{COL_NAME}" + ' | ' + CAST (ct AS VARCHAR) ELSE NULL
|
|
21
|
+
END
|
|
22
|
+
)
|
|
23
|
+
SELECT '{PROJECT_CODE}' as project_code,
|
|
24
|
+
'{DATA_SCHEMA}' as schema_name,
|
|
25
|
+
'{RUN_DATE}' as run_date,
|
|
26
|
+
'{DATA_TABLE}' as table_name,
|
|
27
|
+
'{COL_NAME}' as column_name,
|
|
28
|
+
REPLACE(STRING_AGG(CONVERT(NVARCHAR(max), val), '^#^') WITHIN GROUP (ORDER BY min_rn), '^#^', CHAR(10)) AS top_freq_values,
|
|
29
|
+
(SELECT CONVERT(VARCHAR(40), HASHBYTES('MD5', STRING_AGG( NULLIF(dist_col_name,''),
|
|
30
|
+
'|') WITHIN GROUP (ORDER BY dist_col_name)), 2) as dvh
|
|
31
|
+
FROM (SELECT DISTINCT "{COL_NAME}" as dist_col_name
|
|
32
|
+
FROM {DATA_SCHEMA}.{DATA_TABLE}) a
|
|
33
|
+
) as distinct_value_hash
|
|
34
|
+
FROM consol_vals;
|
|
35
|
+
|
|
36
|
+
-- Convert function has style = 2 : The characters 0x aren't added to the left of the converted result for style 2.
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
-- Step 1: Drop both functions if they exist
|
|
2
|
+
BEGIN
|
|
3
|
+
IF OBJECT_ID('{DATA_QC_SCHEMA}.fndk_isnum', 'FN') IS NOT NULL
|
|
4
|
+
DROP FUNCTION {DATA_QC_SCHEMA}.fndk_isnum;
|
|
5
|
+
|
|
6
|
+
IF OBJECT_ID('{DATA_QC_SCHEMA}.fndk_isdate', 'FN') IS NOT NULL
|
|
7
|
+
DROP FUNCTION {DATA_QC_SCHEMA}.fndk_isdate;
|
|
8
|
+
END
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
-- Step 3: Create isdate function
|
|
2
|
+
|
|
3
|
+
CREATE FUNCTION {DATA_QC_SCHEMA}.fndk_isdate(@strparm VARCHAR(500))
|
|
4
|
+
RETURNS INT
|
|
5
|
+
AS
|
|
6
|
+
BEGIN
|
|
7
|
+
DECLARE @ret INT
|
|
8
|
+
|
|
9
|
+
SET @ret =
|
|
10
|
+
|
|
11
|
+
CASE WHEN TRY_CAST(NULLIF(@strparm, '') AS float) IS NOT NULL
|
|
12
|
+
AND LEFT(NULLIF(@strparm, ''),4) BETWEEN 1800 AND 2200 THEN
|
|
13
|
+
CASE
|
|
14
|
+
WHEN LEN((NULLIF(@strparm, ''))) > 11 THEN 0
|
|
15
|
+
-- YYYYMMDD
|
|
16
|
+
WHEN TRY_CONVERT(DATE, NULLIF(@strparm, ''), 112) IS NOT NULL THEN 1
|
|
17
|
+
|
|
18
|
+
-- YYYY-MM-DD
|
|
19
|
+
WHEN TRY_CONVERT(DATE, NULLIF(@strparm, ''), 23) IS NOT NULL THEN 1
|
|
20
|
+
|
|
21
|
+
-- MM/DD/YYYY
|
|
22
|
+
WHEN TRY_CONVERT(DATE, NULLIF(@strparm, ''), 101) IS NOT NULL THEN 1
|
|
23
|
+
|
|
24
|
+
-- MM/DD/YY
|
|
25
|
+
WHEN TRY_CONVERT(DATE, NULLIF(@strparm, ''), 1) IS NOT NULL THEN 1
|
|
26
|
+
|
|
27
|
+
--MM-DD-YYYY
|
|
28
|
+
WHEN TRY_CONVERT(DATE, NULLIF(@strparm, ''), 110) IS NOT NULL THEN 1
|
|
29
|
+
|
|
30
|
+
--MM-DD-YY
|
|
31
|
+
WHEN TRY_CONVERT(DATE, NULLIF(@strparm, ''), 10) IS NOT NULL THEN 1
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
ELSE 0 END
|
|
35
|
+
--DD MMM YYYY
|
|
36
|
+
WHEN (TRY_CONVERT(DATE, NULLIF(@strparm, ''), 106) IS NOT NULL
|
|
37
|
+
AND LEFT(NULLIF(@strparm, ''), 4) BETWEEN 1800 AND 2200)
|
|
38
|
+
THEN 1
|
|
39
|
+
|
|
40
|
+
-- YYYY-MM-DD HH:MM:SS SSSSSS
|
|
41
|
+
WHEN (TRY_CONVERT(DATETIME2, NULLIF(@strparm, ''), 121) IS NOT NULL
|
|
42
|
+
AND LEFT(NULLIF(@strparm, ''), 4) BETWEEN 1800 AND 2200)
|
|
43
|
+
THEN 1
|
|
44
|
+
|
|
45
|
+
-- YYYY-MM-DD HH:MM:SS
|
|
46
|
+
WHEN (TRY_CONVERT(DATETIME2, NULLIF(@strparm, ''), 120) IS NOT NULL
|
|
47
|
+
AND LEFT(NULLIF(@strparm, ''), 4) BETWEEN 1800 AND 2200)
|
|
48
|
+
THEN 1
|
|
49
|
+
ELSE 0
|
|
50
|
+
END
|
|
51
|
+
RETURN @ret
|
|
52
|
+
|
|
53
|
+
END
|
|
54
|
+
;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
GRANT EXECUTE ON SCHEMA::{DATA_QC_SCHEMA} TO {DB_USER};
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
SELECT '{PROJECT_CODE}' as project_code,
|
|
2
|
+
'{TEST_TYPE}' as test_type,
|
|
3
|
+
'{TEST_DEFINITION_ID}' as test_definition_id,
|
|
4
|
+
'{TEST_SUITE}' as test_suite,
|
|
5
|
+
'{TEST_RUN_ID}' as test_run_id,
|
|
6
|
+
'{RUN_DATE}' as test_time,
|
|
7
|
+
'{START_TIME}' as starttime,
|
|
8
|
+
CURRENT_TIMESTAMP as endtime,
|
|
9
|
+
'{SCHEMA_NAME}' as schema_name,
|
|
10
|
+
'{TABLE_NAME}' as table_name,
|
|
11
|
+
'{COLUMN_NAME_NO_QUOTES}' as column_names,
|
|
12
|
+
'{SKIP_ERRORS}' as threshold_value,
|
|
13
|
+
{SKIP_ERRORS} as skip_errors,
|
|
14
|
+
'{INPUT_PARAMETERS}' as input_parameters,
|
|
15
|
+
CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
|
|
16
|
+
CASE
|
|
17
|
+
WHEN COUNT(*) > 0 THEN
|
|
18
|
+
CONCAT(
|
|
19
|
+
CONCAT( CAST(COUNT(*) AS VARCHAR), ' error(s) identified, ' ),
|
|
20
|
+
CONCAT(
|
|
21
|
+
CASE
|
|
22
|
+
WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
|
|
23
|
+
ELSE 'within limit of '
|
|
24
|
+
END,
|
|
25
|
+
'{SKIP_ERRORS}.'
|
|
26
|
+
)
|
|
27
|
+
)
|
|
28
|
+
ELSE 'No errors found.'
|
|
29
|
+
END AS result_message,
|
|
30
|
+
COUNT(*) as result_measure,
|
|
31
|
+
'{SUBSET_DISPLAY}' as subset_condition,
|
|
32
|
+
NULL as result_query
|
|
33
|
+
FROM (
|
|
34
|
+
SELECT {COLUMN_NAME_NO_QUOTES}
|
|
35
|
+
FROM {SCHEMA_NAME}.{TABLE_NAME}
|
|
36
|
+
WHERE {SUBSET_CONDITION}
|
|
37
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}) - 2 * {WINDOW_DAYS}
|
|
38
|
+
AND {WINDOW_DATE_COLUMN} < (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
39
|
+
GROUP BY {COLUMN_NAME_NO_QUOTES}
|
|
40
|
+
EXCEPT
|
|
41
|
+
SELECT {COLUMN_NAME_NO_QUOTES}
|
|
42
|
+
FROM {SCHEMA_NAME}.{TABLE_NAME}
|
|
43
|
+
WHERE {SUBSET_CONDITION}
|
|
44
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
45
|
+
GROUP BY {COLUMN_NAME_NO_QUOTES}
|
|
46
|
+
) test;
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
SELECT '{PROJECT_CODE}' as project_code,
|
|
2
|
+
'{TEST_TYPE}' as test_type,
|
|
3
|
+
'{TEST_DEFINITION_ID}' as test_definition_id,
|
|
4
|
+
'{TEST_SUITE}' as test_suite,
|
|
5
|
+
'{TEST_RUN_ID}' as test_run_id,
|
|
6
|
+
'{RUN_DATE}' as test_time,
|
|
7
|
+
'{START_TIME}' as starttime,
|
|
8
|
+
CURRENT_TIMESTAMP as endtime,
|
|
9
|
+
'{SCHEMA_NAME}' as schema_name,
|
|
10
|
+
'{TABLE_NAME}' as table_name,
|
|
11
|
+
'{COLUMN_NAME_NO_QUOTES}' as column_names,
|
|
12
|
+
'{SKIP_ERRORS}' as threshold_value,
|
|
13
|
+
{SKIP_ERRORS} as skip_errors,
|
|
14
|
+
'{INPUT_PARAMETERS}' as input_parameters,
|
|
15
|
+
CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
|
|
16
|
+
CASE
|
|
17
|
+
WHEN COUNT(*) > 0 THEN
|
|
18
|
+
CONCAT(
|
|
19
|
+
CONCAT( CAST(COUNT(*) AS VARCHAR), ' error(s) identified, ' ),
|
|
20
|
+
CONCAT(
|
|
21
|
+
CASE
|
|
22
|
+
WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
|
|
23
|
+
ELSE 'within limit of '
|
|
24
|
+
END,
|
|
25
|
+
'{SKIP_ERRORS}.'
|
|
26
|
+
)
|
|
27
|
+
)
|
|
28
|
+
ELSE 'No errors found.'
|
|
29
|
+
END AS result_message,
|
|
30
|
+
COUNT(*) as result_measure,
|
|
31
|
+
'{SUBSET_DISPLAY}' as subset_condition,
|
|
32
|
+
NULL as result_query
|
|
33
|
+
FROM (
|
|
34
|
+
(
|
|
35
|
+
SELECT 'Prior Timeframe' as missing_from, {COLUMN_NAME}
|
|
36
|
+
FROM {SCHEMA_NAME}.{TABLE_NAME}
|
|
37
|
+
WHERE {SUBSET_CONDITION}
|
|
38
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
39
|
+
EXCEPT
|
|
40
|
+
SELECT 'Prior Timeframe' as missing_from, {COLUMN_NAME}
|
|
41
|
+
FROM {SCHEMA_NAME}.{TABLE_NAME}
|
|
42
|
+
WHERE {SUBSET_CONDITION}
|
|
43
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}) - 2 * {WINDOW_DAYS}
|
|
44
|
+
AND {WINDOW_DATE_COLUMN} < (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
45
|
+
)
|
|
46
|
+
UNION ALL
|
|
47
|
+
(
|
|
48
|
+
SELECT 'Latest Timeframe' as missing_from, {COLUMN_NAME}
|
|
49
|
+
FROM {SCHEMA_NAME}.{TABLE_NAME}
|
|
50
|
+
WHERE {SUBSET_CONDITION}
|
|
51
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}) - 2 * {WINDOW_DAYS}
|
|
52
|
+
AND {WINDOW_DATE_COLUMN} < (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
53
|
+
EXCEPT
|
|
54
|
+
SELECT 'Latest Timeframe' as missing_from, {COLUMN_NAME}
|
|
55
|
+
FROM {SCHEMA_NAME}.{TABLE_NAME}
|
|
56
|
+
WHERE {SUBSET_CONDITION}
|
|
57
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {SCHEMA_NAME}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
58
|
+
)
|
|
59
|
+
) test;
|