dataops-testgen 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataops_testgen-2.2.0.dist-info/LICENSE +203 -0
- dataops_testgen-2.2.0.dist-info/METADATA +287 -0
- dataops_testgen-2.2.0.dist-info/NOTICE +5 -0
- dataops_testgen-2.2.0.dist-info/RECORD +270 -0
- dataops_testgen-2.2.0.dist-info/WHEEL +5 -0
- dataops_testgen-2.2.0.dist-info/entry_points.txt +2 -0
- dataops_testgen-2.2.0.dist-info/top_level.txt +1 -0
- testgen/__init__.py +0 -0
- testgen/__main__.py +770 -0
- testgen/commands/__init__.py +0 -0
- testgen/commands/queries/__init__.py +0 -0
- testgen/commands/queries/execute_cat_tests_query.py +95 -0
- testgen/commands/queries/execute_tests_query.py +160 -0
- testgen/commands/queries/generate_tests_query.py +94 -0
- testgen/commands/queries/profiling_query.py +366 -0
- testgen/commands/queries/test_parameter_validation_query.py +88 -0
- testgen/commands/run_execute_cat_tests.py +162 -0
- testgen/commands/run_execute_tests.py +168 -0
- testgen/commands/run_generate_tests.py +107 -0
- testgen/commands/run_get_entities.py +122 -0
- testgen/commands/run_launch_db_config.py +84 -0
- testgen/commands/run_observability_exporter.py +330 -0
- testgen/commands/run_profiling_bridge.py +495 -0
- testgen/commands/run_quick_start.py +168 -0
- testgen/commands/run_setup_profiling_tools.py +96 -0
- testgen/commands/run_test_definition.py +146 -0
- testgen/commands/run_test_parameter_validation.py +135 -0
- testgen/commands/run_upgrade_db_config.py +156 -0
- testgen/common/__init__.py +8 -0
- testgen/common/clean_sql.py +53 -0
- testgen/common/credentials.py +25 -0
- testgen/common/database/__init__.py +0 -0
- testgen/common/database/database_service.py +629 -0
- testgen/common/database/flavor/__init__.py +0 -0
- testgen/common/database/flavor/flavor_service.py +75 -0
- testgen/common/database/flavor/mssql_flavor_service.py +34 -0
- testgen/common/database/flavor/postgresql_flavor_service.py +5 -0
- testgen/common/database/flavor/redshift_flavor_service.py +22 -0
- testgen/common/database/flavor/snowflake_flavor_service.py +69 -0
- testgen/common/database/flavor/trino_flavor_service.py +21 -0
- testgen/common/date_service.py +68 -0
- testgen/common/display_service.py +85 -0
- testgen/common/docker_service.py +76 -0
- testgen/common/encrypt.py +55 -0
- testgen/common/get_pipeline_parms.py +57 -0
- testgen/common/logs.py +79 -0
- testgen/common/process_service.py +62 -0
- testgen/common/read_file.py +69 -0
- testgen/settings.py +440 -0
- testgen/template/dbsetup/010_create_base_schema.sql +2 -0
- testgen/template/dbsetup/020_create_standard_functions_sprocs.sql +179 -0
- testgen/template/dbsetup/030_initialize_new_schema_structure.sql +735 -0
- testgen/template/dbsetup/040_populate_new_schema_project.sql +59 -0
- testgen/template/dbsetup/050_populate_new_schema_metadata.sql +1517 -0
- testgen/template/dbsetup/060_create_standard_views.sql +248 -0
- testgen/template/dbsetup/070_create_default_users.sql +17 -0
- testgen/template/dbsetup/075_grant_role_rights.sql +43 -0
- testgen/template/dbsetup/080_set_current_revision.sql +5 -0
- testgen/template/dbupgrade/0100_incremental_upgrade.sql +5 -0
- testgen/template/dbupgrade/0101_incremental_upgrade.sql +15 -0
- testgen/template/dbupgrade/0102_incremental_upgrade.sql +4 -0
- testgen/template/dbupgrade/0103_incremental_upgrade.sql +22 -0
- testgen/template/dbupgrade/0104_incremental_upgrade.sql +44 -0
- testgen/template/dbupgrade/0105_incremental_upgrade.sql +1 -0
- testgen/template/dbupgrade/0106_incremental_upgrade.sql +5 -0
- testgen/template/dbupgrade/0107_incremental_upgrade.sql +3 -0
- testgen/template/dbupgrade_helpers/get_tg_revision.sql +2 -0
- testgen/template/exec_cat_tests/ex_cat_build_agg_table_tests.sql +116 -0
- testgen/template/exec_cat_tests/ex_cat_get_distinct_tables.sql +11 -0
- testgen/template/exec_cat_tests/ex_cat_results_parse.sql +69 -0
- testgen/template/exec_cat_tests/ex_cat_retrieve_agg_test_parms.sql +6 -0
- testgen/template/exec_cat_tests/ex_cat_test_query.sql +8 -0
- testgen/template/execution/ex_finalize_test_run_results.sql +37 -0
- testgen/template/execution/ex_get_tests_non_cat.sql +47 -0
- testgen/template/execution/ex_update_test_record_in_testrun_table.sql +27 -0
- testgen/template/execution/ex_write_test_record_to_testrun_table.sql +6 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_no_drops_generic.sql +48 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_num_incr_generic.sql +34 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_above_generic.sql +49 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_within_generic.sql +49 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql +49 -0
- testgen/template/flavors/generic/exec_query_tests/ex_custom_query_generic.sql +39 -0
- testgen/template/flavors/generic/exec_query_tests/ex_data_match_2way_generic.sql +58 -0
- testgen/template/flavors/generic/exec_query_tests/ex_data_match_generic.sql +44 -0
- testgen/template/flavors/generic/exec_query_tests/ex_prior_match_generic.sql +37 -0
- testgen/template/flavors/generic/exec_query_tests/ex_relative_entropy_generic.sql +53 -0
- testgen/template/flavors/generic/exec_query_tests/ex_window_match_no_drops_generic.sql +46 -0
- testgen/template/flavors/generic/exec_query_tests/ex_window_match_same_generic.sql +59 -0
- testgen/template/flavors/generic/profiling/contingency_counts.sql +3 -0
- testgen/template/flavors/generic/validate_tests/ex_get_project_column_list_generic.sql +3 -0
- testgen/template/flavors/mssql/exec_query_tests/ex_relative_entropy_mssql.sql +53 -0
- testgen/template/flavors/mssql/profiling/project_ddf_query_mssql.sql +35 -0
- testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml +246 -0
- testgen/template/flavors/mssql/profiling/project_secondary_profiling_query_mssql.sql +36 -0
- testgen/template/flavors/mssql/setup_profiling_tools/00_drop_existing_functions_mssql.sql +8 -0
- testgen/template/flavors/mssql/setup_profiling_tools/01_create_functions_mssql.sql +12 -0
- testgen/template/flavors/mssql/setup_profiling_tools/02_create_functions_mssql.sql +54 -0
- testgen/template/flavors/mssql/setup_profiling_tools/create_qc_schema_mssql.sql +4 -0
- testgen/template/flavors/mssql/setup_profiling_tools/grant_execute_privileges_mssql.sql +1 -0
- testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_no_drops_postgresql.sql +46 -0
- testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_same_postgresql.sql +59 -0
- testgen/template/flavors/postgresql/profiling/project_ddf_query_postgresql.sql +42 -0
- testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml +225 -0
- testgen/template/flavors/postgresql/profiling/project_secondary_profiling_query_postgresql.sql +28 -0
- testgen/template/flavors/postgresql/setup_profiling_tools/create_functions_postgresql.sql +157 -0
- testgen/template/flavors/postgresql/setup_profiling_tools/create_qc_schema_postgresql.sql +1 -0
- testgen/template/flavors/postgresql/setup_profiling_tools/grant_execute_privileges_postgresql.sql +2 -0
- testgen/template/flavors/redshift/profiling/project_ddf_query_redshift.sql +38 -0
- testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml +221 -0
- testgen/template/flavors/redshift/profiling/project_secondary_profiling_query_redshift.sql +29 -0
- testgen/template/flavors/redshift/setup_profiling_tools/create_functions_redshift.sql +115 -0
- testgen/template/flavors/redshift/setup_profiling_tools/create_qc_schema_redshift.sql +1 -0
- testgen/template/flavors/redshift/setup_profiling_tools/grant_execute_privileges_redshift.sql +2 -0
- testgen/template/flavors/snowflake/profiling/project_ddf_query_snowflake.sql +38 -0
- testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml +220 -0
- testgen/template/flavors/snowflake/profiling/project_secondary_profiling_query_snowflake.sql +29 -0
- testgen/template/flavors/snowflake/setup_profiling_tools/create_functions_snowflake.sql +69 -0
- testgen/template/flavors/snowflake/setup_profiling_tools/create_qc_schema_snowflake.sql +1 -0
- testgen/template/flavors/snowflake/setup_profiling_tools/grant_execute_privileges_snowflake.sql +6 -0
- testgen/template/flavors/trino/profiling/project_profiling_query_trino.yaml +219 -0
- testgen/template/flavors/trino/setup_profiling_tools/create_functions_trino.sql +92 -0
- testgen/template/flavors/trino/setup_profiling_tools/create_qc_schema_trino.sql +1 -0
- testgen/template/gen_funny_cat_tests/gen_test_constant.sql +104 -0
- testgen/template/gen_funny_cat_tests/gen_test_distinct_value_ct.sql +98 -0
- testgen/template/gen_funny_cat_tests/gen_test_row_ct.sql +57 -0
- testgen/template/gen_funny_cat_tests/gen_test_row_ct_pct.sql +59 -0
- testgen/template/generation/gen_delete_old_tests.sql +5 -0
- testgen/template/generation/gen_insert_test_suite.sql +5 -0
- testgen/template/generation/gen_retrieve_or_insert_test_suite.sql +58 -0
- testgen/template/generation/gen_standard_test_type_list.sql +13 -0
- testgen/template/generation/gen_standard_tests.sql +48 -0
- testgen/template/get_entities/get_connection.sql +21 -0
- testgen/template/get_entities/get_connections_list.sql +9 -0
- testgen/template/get_entities/get_latest.sql +4 -0
- testgen/template/get_entities/get_profile.sql +12 -0
- testgen/template/get_entities/get_profile_info.sql +17 -0
- testgen/template/get_entities/get_profile_list.sql +17 -0
- testgen/template/get_entities/get_profile_screen.sql +275 -0
- testgen/template/get_entities/get_project_list.sql +6 -0
- testgen/template/get_entities/get_table_group_list.sql +10 -0
- testgen/template/get_entities/get_test_generation_list.sql +18 -0
- testgen/template/get_entities/get_test_info.sql +41 -0
- testgen/template/get_entities/get_test_results_for_run_cli.sql +16 -0
- testgen/template/get_entities/get_test_run_list.sql +24 -0
- testgen/template/get_entities/get_test_suite.sql +13 -0
- testgen/template/get_entities/get_test_suite_list.sql +18 -0
- testgen/template/get_entities/list_test_types.sql +4 -0
- testgen/template/observability/get_event_data.sql +23 -0
- testgen/template/observability/get_test_results.sql +41 -0
- testgen/template/observability/update_test_results_exported_to_observability.sql +12 -0
- testgen/template/parms/parms_profiling.sql +34 -0
- testgen/template/parms/parms_test_execution.sql +13 -0
- testgen/template/parms/parms_test_gen.sql +23 -0
- testgen/template/profiling/contingency_columns.sql +7 -0
- testgen/template/profiling/datatype_suggestions.sql +56 -0
- testgen/template/profiling/functional_datatype.sql +523 -0
- testgen/template/profiling/functional_tabletype_stage.sql +48 -0
- testgen/template/profiling/functional_tabletype_update.sql +8 -0
- testgen/template/profiling/pii_flag.sql +133 -0
- testgen/template/profiling/profile_anomalies_screen_column.sql +22 -0
- testgen/template/profiling/profile_anomalies_screen_multi_column.sql +58 -0
- testgen/template/profiling/profile_anomalies_screen_table.sql +22 -0
- testgen/template/profiling/profile_anomalies_screen_table_dates.sql +30 -0
- testgen/template/profiling/profile_anomalies_screen_variants.sql +40 -0
- testgen/template/profiling/profile_anomaly_types_get.sql +3 -0
- testgen/template/profiling/project_get_table_sample_count.sql +22 -0
- testgen/template/profiling/project_profile_run_record_insert.sql +8 -0
- testgen/template/profiling/project_profile_run_record_update.sql +5 -0
- testgen/template/profiling/project_profile_run_record_update_status.sql +5 -0
- testgen/template/profiling/project_update_profile_results_to_estimates.sql +32 -0
- testgen/template/profiling/refresh_anomalies.sql +33 -0
- testgen/template/profiling/refresh_data_chars_from_profiling.sql +156 -0
- testgen/template/profiling/secondary_profiling_columns.sql +12 -0
- testgen/template/profiling/secondary_profiling_delete.sql +4 -0
- testgen/template/profiling/secondary_profiling_update.sql +18 -0
- testgen/template/quick_start/populate_target_data.sql +1077 -0
- testgen/template/quick_start/recreate_target_data_schema.sql +167 -0
- testgen/template/quick_start/update_target_data.sql +100 -0
- testgen/template/updates/create_tmp_test_definition.sql +19 -0
- testgen/template/updates/get_test_def_parms.sql +38 -0
- testgen/template/updates/populate_stg_test_definitions.sql +184 -0
- testgen/template/validate_tests/ex_disable_tests_test_definitions.sql +5 -0
- testgen/template/validate_tests/ex_flag_tests_test_definitions.sql +64 -0
- testgen/template/validate_tests/ex_get_project_column_list_generic.sql +3 -0
- testgen/template/validate_tests/ex_get_test_column_list_tg.sql +65 -0
- testgen/template/validate_tests/ex_write_test_val_errors.sql +22 -0
- testgen/ui/__init__.py +0 -0
- testgen/ui/app.py +98 -0
- testgen/ui/assets/dk_logo.svg +46 -0
- testgen/ui/assets/question_mark.png +0 -0
- testgen/ui/assets/scripts.js +68 -0
- testgen/ui/assets/style.css +140 -0
- testgen/ui/bootstrap.py +109 -0
- testgen/ui/components/__init__.py +0 -0
- testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fBBc4.woff2 +0 -0
- testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fChc4EsA.woff2 +0 -0
- testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu4mxK.woff2 +0 -0
- testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu7GxKOzY.woff2 +0 -0
- testgen/ui/components/frontend/css/material-symbols-rounded.css +24 -0
- testgen/ui/components/frontend/css/material-symbols-rounded.woff2 +0 -0
- testgen/ui/components/frontend/css/roboto-font-faces.css +35 -0
- testgen/ui/components/frontend/css/shared.css +36 -0
- testgen/ui/components/frontend/img/dk_logo.svg +46 -0
- testgen/ui/components/frontend/index.html +17 -0
- testgen/ui/components/frontend/js/components/breadcrumbs.js +86 -0
- testgen/ui/components/frontend/js/components/button.js +66 -0
- testgen/ui/components/frontend/js/components/location.js +62 -0
- testgen/ui/components/frontend/js/components/select.js +75 -0
- testgen/ui/components/frontend/js/components/sidebar.js +358 -0
- testgen/ui/components/frontend/js/main.js +99 -0
- testgen/ui/components/frontend/js/streamlit.js +19 -0
- testgen/ui/components/frontend/js/van.min.js +1 -0
- testgen/ui/components/utils/__init__.py +0 -0
- testgen/ui/components/utils/callbacks.py +51 -0
- testgen/ui/components/utils/component.py +13 -0
- testgen/ui/components/widgets/__init__.py +6 -0
- testgen/ui/components/widgets/breadcrumbs.py +32 -0
- testgen/ui/components/widgets/location.py +65 -0
- testgen/ui/components/widgets/modal.py +97 -0
- testgen/ui/components/widgets/sidebar.py +69 -0
- testgen/ui/navigation/__init__.py +0 -0
- testgen/ui/navigation/menu.py +42 -0
- testgen/ui/navigation/page.py +20 -0
- testgen/ui/navigation/router.py +63 -0
- testgen/ui/queries/__init__.py +0 -0
- testgen/ui/queries/authentication_queries.py +47 -0
- testgen/ui/queries/connection_queries.py +121 -0
- testgen/ui/queries/profiling_queries.py +148 -0
- testgen/ui/queries/project_queries.py +9 -0
- testgen/ui/queries/table_group_queries.py +186 -0
- testgen/ui/queries/test_definition_queries.py +270 -0
- testgen/ui/queries/test_run_queries.py +32 -0
- testgen/ui/queries/test_suite_queries.py +145 -0
- testgen/ui/scripts/__init__.py +0 -0
- testgen/ui/scripts/patch_streamlit.py +111 -0
- testgen/ui/services/__init__.py +0 -0
- testgen/ui/services/authentication_service.py +119 -0
- testgen/ui/services/connection_service.py +220 -0
- testgen/ui/services/database_service.py +282 -0
- testgen/ui/services/form_service.py +1008 -0
- testgen/ui/services/javascript_service.py +44 -0
- testgen/ui/services/query_service.py +316 -0
- testgen/ui/services/string_service.py +12 -0
- testgen/ui/services/table_group_service.py +130 -0
- testgen/ui/services/test_definition_service.py +117 -0
- testgen/ui/services/test_run_service.py +13 -0
- testgen/ui/services/test_suite_service.py +76 -0
- testgen/ui/services/toolbar_service.py +77 -0
- testgen/ui/session.py +46 -0
- testgen/ui/views/__init__.py +0 -0
- testgen/ui/views/app_log_modal.py +92 -0
- testgen/ui/views/connections.py +72 -0
- testgen/ui/views/connections_base.py +367 -0
- testgen/ui/views/login.py +40 -0
- testgen/ui/views/not_found.py +16 -0
- testgen/ui/views/overview.py +34 -0
- testgen/ui/views/profiling_anomalies.py +501 -0
- testgen/ui/views/profiling_details.py +335 -0
- testgen/ui/views/profiling_modal.py +40 -0
- testgen/ui/views/profiling_results.py +206 -0
- testgen/ui/views/profiling_summary.py +177 -0
- testgen/ui/views/project_settings.py +74 -0
- testgen/ui/views/table_groups.py +530 -0
- testgen/ui/views/test_definitions.py +1020 -0
- testgen/ui/views/test_results.py +908 -0
- testgen/ui/views/test_runs.py +195 -0
- testgen/ui/views/test_suites.py +545 -0
- testgen/utils/__init__.py +0 -0
- testgen/utils/plugins.py +17 -0
- testgen/utils/singleton.py +14 -0
|
@@ -0,0 +1,1517 @@
|
|
|
1
|
+
-- ==============================================================================
|
|
2
|
+
-- | This refreshes static metadata on new and existing databases.
|
|
3
|
+
-- | Nothing should be here that can't run on an existing database.
|
|
4
|
+
-- ==============================================================================
|
|
5
|
+
|
|
6
|
+
SET SEARCH_PATH TO {SCHEMA_NAME};
|
|
7
|
+
|
|
8
|
+
-- Drop constraints that prohibit record deletion
|
|
9
|
+
ALTER TABLE test_templates DROP CONSTRAINT test_templates_test_types_test_type_fk;
|
|
10
|
+
ALTER TABLE test_results DROP CONSTRAINT test_results_test_types_test_type_fk;
|
|
11
|
+
ALTER TABLE cat_test_conditions DROP CONSTRAINT cat_test_conditions_cat_tests_test_type_fk;
|
|
12
|
+
|
|
13
|
+
TRUNCATE TABLE profile_anomaly_types;
|
|
14
|
+
|
|
15
|
+
INSERT INTO profile_anomaly_types (id, anomaly_type, data_object, anomaly_name, anomaly_description, anomaly_criteria, detail_expression, issue_likelihood, suggested_action)
|
|
16
|
+
VALUES ('1001', 'Suggested_Type', 'Column', 'Suggested Data Type', 'Data stored as text all meets criteria for a more suitable type. ', '(functional_data_type NOT IN (''Boolean'', ''Flag'') ) AND (column_type ILIKE ''%ch
|
|
17
|
+
ar%'' OR column_type ILIKE ''text'') AND NOT (datatype_suggestion ILIKE ''%char%'' OR datatype_suggestion ILIKE ''text'')', 'p.datatype_suggestion::VARCHAR(200)', 'Likely', 'Consider changing the column data type to tighte
|
|
18
|
+
n controls over data ingested and to make values more efficient, consistent and suitable for downstream analysis.'),
|
|
19
|
+
('1002', 'Non_Standard_Blanks', 'Column', 'Non-Standard Blank Values', 'Values representing missing data may be unexpected or inconsistent. Non-standard values may include empty strings as opposed to nulls, dummy entries such as "MISSING" or repeated characters that may have been used to bypass entry requirements, processing artifacts such as "NULL", or spreadsheet artifacts such as "NA", "ERROR".', 'p.filled_value_ct > 0 OR p.zero_length_ct > 0', '''Filled Values: '' || p.filled_value_ct::VARCHAR || '', Empty String: '' || p.zero_length_ct::VARCHAR || '', Null: '' || p.null_value_ct::VARCHAR || '', Records: '' || p.record_ct::VARCHAR', 'Definite', 'Consider cleansing the column upon ingestion to replace all variants of missing data with a standard designation, like Null.'),
|
|
20
|
+
('1003', 'Invalid_Zip_USA', 'Column', 'Invalid USA Zip Code Format', 'Some values present do not conform with the expected format of USA Zip Codes.', 'p.std_pattern_match = ''ZIP_USA'' AND (p.general_type <> ''A'' OR p.filled_value_ct > 0 OR p.min_length >= 1 AND p.min_length <= 4 OR p.max_length > 10)', 'CASE WHEN p.general_type = ''N'' THEN ''Type: '' || p.column_type || '', '' ELSE '''' END || ''Min Length: '' || p.min_length::VARCHAR || '', Max Length: '' || p.max_length::VARCHAR || '', Filled Values: '' || p.filled_value_ct::VARCHAR', 'Definite', 'Consider correcting invalid column values or changing them to indicate a missing value if corrections cannot be made.'),
|
|
21
|
+
('1004', 'Multiple_Types_Minor', 'Multi-Col', 'Multiple Data Types per Column Name - Minor', 'Columns with the same name have the same general type across tables, but the types do not exactly match. Truncation issues may result if columns are commingled and assumed to be the same format.', 'm.general_type_ct = 1 AND m.type_ct > 1', '''Found '' || m.column_ct::VARCHAR || '' columns, '' || m.type_ct::VARCHAR(10) || '' types, '' || m.min_type || '' to '' || m.max_type', 'Possible', 'Consider changing the column data types to be fully consistent. This will tighten your standards at ingestion and assure that data is consistent between tables.'),
|
|
22
|
+
('1005', 'Multiple_Types_Major', 'Multi-Col', 'Multiple Data Types per Column Name - Major', 'Columns with the same name have broadly different types across tables. Differences could be significant enough to cause errors in downstream analysis, extra steps resulting in divergent business logic and inconsistencies in results.', 'm.general_type_ct > 1', '''Found '' || m.column_ct::VARCHAR || '' columns, '' || m.type_ct::VARCHAR(10) || '' types, '' || m.min_type || '' to '' || m.max_type', 'Likely', 'Ideally, you should change the column data types to be fully consistent. If the data is meant to be different, you should change column names so downstream users aren''t led astray.'),
|
|
23
|
+
('1006', 'No_Values', 'Column', 'No Column Values Present', 'This column is present in the table, but no values have been ingested or assigned in any records. This could indicate missing data or a processing error. Note that this considers dummy values and zero-length values as missing data. ', '(p.null_value_ct + p.filled_value_ct + p.zero_length_ct) = p.record_ct', '''Null: '' || p.null_value_ct::VARCHAR(10) || '', Filled: '' || p.filled_value_ct::VARCHAR(10) || '', Zero Len: '' || p.zero_length_ct::VARCHAR(10)', 'Possible', 'Review your source data, ingestion process, and any processing steps that update this column.'),
|
|
24
|
+
('1007', 'Column_Pattern_Mismatch', 'Column', 'Pattern Inconsistency Within Column', 'Alpha-numeric string data within this column conforms to 2-4 different patterns, with 95% matching the first pattern. This could indicate data errors in the remaining values. ', 'p.general_type = ''A''
|
|
25
|
+
AND p.max_length > 3
|
|
26
|
+
AND p.value_ct > (p.numeric_ct + p.filled_value_ct)
|
|
27
|
+
AND p.distinct_pattern_ct BETWEEN 2 AND 4
|
|
28
|
+
AND STRPOS(p.top_patterns, ''N'') > 0
|
|
29
|
+
AND (
|
|
30
|
+
( (STRPOS(p.top_patterns, ''A'') > 0 OR STRPOS(p.top_patterns, ''a'') > 0)
|
|
31
|
+
AND SPLIT_PART(p.top_patterns, ''|'', 3)::NUMERIC / SPLIT_PART(p.top_patterns, ''|'', 1)::NUMERIC < 0.05)
|
|
32
|
+
OR
|
|
33
|
+
SPLIT_PART(p.top_patterns, ''|'', 3)::NUMERIC / SPLIT_PART(p.top_patterns, ''|'', 1)::NUMERIC < 0.1
|
|
34
|
+
)', '''Patterns: '' || p.top_patterns', 'Likely', 'Review the values for any data that doesn''t conform to the most common pattern and correct any data errors.'),
|
|
35
|
+
('1008', 'Table_Pattern_Mismatch', 'Multi-Col', 'Pattern Inconsistency Across Tables', 'Alpha-numeric string data within this column matches a single pattern, but other columns with the same name have data that matches a different single pattern. Inconsistent formatting may contradict user assumptions and cause downstream errors, extra steps and inconsistent business logic.', 'p.general_type = ''A''
|
|
36
|
+
AND p.max_length > 3
|
|
37
|
+
AND p.value_ct > (p.numeric_ct + p.filled_value_ct)
|
|
38
|
+
AND m.max_pattern_ct = 1
|
|
39
|
+
AND m.column_ct > 1
|
|
40
|
+
AND SPLIT_PART(p.top_patterns, ''|'', 2) <> SPLIT_PART(m.very_top_pattern, ''|'', 2)
|
|
41
|
+
AND SPLIT_PART(p.top_patterns, ''|'', 1)::NUMERIC / SPLIT_PART(m.very_top_pattern, ''|'', 1)::NUMERIC < 0.1', '''Patterns: '' || SPLIT_PART(p.top_patterns, ''|'', 2) || '', '' || SPLIT_PART(ltrim(m.very_top_pattern, ''0''), ''|'', 2)', 'Likely', 'Review the profiled patterns for the same column in other tables. You may want to add a hygiene step to your processing to make patterns consistent.'),
|
|
42
|
+
('1009', 'Leading_Spaces', 'Column', 'Leading Spaces Found in Column Values', 'Spaces were found before data at the front of column string values. This likely contradicts user expectations and could be a sign of broader ingestion or processing errors.', 'p.lead_space_ct > 0', '''Cases Found: '' || p.lead_space_ct::VARCHAR(10)', 'Likely', 'Review your source data, ingestion process, and any processing steps that update this column.'),
|
|
43
|
+
('1010', 'Quoted_Values', 'Column', 'Quoted Values Found in Column Values', 'Column values were found within quotes. This likely contradicts user expectations and could be a sign of broader ingestion or processing errors.', 'p.quoted_value_ct > 0', '''Cases Found: '' || p.quoted_value_ct::VARCHAR(10)', 'Likely', 'Review your source data, ingestion process, and any processing steps that update this column.'),
|
|
44
|
+
('1011', 'Char_Column_Number_Values', 'Column', 'Character Column with Mostly Numeric Values', 'This column is defined as alpha, but more than 95% of its values are numeric. Numbers in alpha columns won''t sort correctly, and might contradict user expectations downstream. It''s also possible that more than one type of information is stored in the column, making it harder to retrieve.', 'p.general_type = ''A''
|
|
45
|
+
AND p.column_name NOT ILIKE ''%zip%''
|
|
46
|
+
AND p.functional_data_type NOT ILIKE ''id%''
|
|
47
|
+
AND p.value_ct > p.numeric_ct
|
|
48
|
+
AND p.numeric_ct::NUMERIC > (0.95 * p.value_ct::NUMERIC)', '''Numeric Ct: '' || p.numeric_ct || '' of '' || p.value_ct || '' (Numeric Percent: '' || ROUND(100.0 * p.numeric_ct::NUMERIC(18, 5) / p.value_ct::NUMERIC(18, 5), 2) || '' )''::VARCHAR(200)', 'Likely', 'Review your source data and ingestion process. Consider whether it might be better to store the numeric data in a numeric column. If the alpha data is significant, you could store it in a different column.'),
|
|
49
|
+
('1012', 'Char_Column_Date_Values', 'Column', 'Character Column with Mostly Date Values', 'This column is defined as alpha, but more than 95% of its values are dates. Dates in alpha columns might not sort correctly, and might contradict user expectations downstream. It''s also possible that more than one type of information is stored in the column, making it harder to retrieve. ', 'p.general_type = ''A''
|
|
50
|
+
AND p.value_ct > p.date_ct
|
|
51
|
+
AND p.date_ct::NUMERIC > (0.95 * p.value_ct::NUMERIC)', ''' Date Ct: '' || p.date_ct || '' of '' || p.value_ct || '' (Date Percent: '' || ROUND(100.0 * p.date_ct::NUMERIC(18, 5) / p.value_ct::NUMERIC(18, 5), 2) || '' )''::VARCHAR(200)' , 'Likely', 'Review your source data and ingestion process. Consider whether it might be better to store the date values as a date or datetime column. If the alpha data is also significant, you could store it in a different column.'),
|
|
52
|
+
('1013', 'Small Missing Value Ct', 'Column', 'Small Percentage of Missing Values Found', 'Under 3% of values in this column were found to be null, zero-length or dummy values, but values are not universally present. This could indicate unexpected missing values in a required column.', '(p.value_ct - p.zero_length_ct - p.filled_value_ct)::FLOAT / p.record_ct::FLOAT > 0.97
|
|
53
|
+
AND (p.value_ct - p.zero_length_ct - p.filled_value_ct) < p.record_ct', '(p.record_ct - (p.value_ct - p.zero_length_ct - p.filled_value_ct))::VARCHAR(20) ||
|
|
54
|
+
'' of '' || p.record_ct::VARCHAR(20) || '' blank values: '' ||
|
|
55
|
+
ROUND(100.0 * (p.record_ct - (p.value_ct - p.zero_length_ct - p.filled_value_ct))::NUMERIC(18, 5)
|
|
56
|
+
/ NULLIF(p.value_ct, 0)::NUMERIC(18, 5), 2)::VARCHAR(40) || ''%''', 'Possible', 'Review your source data and follow-up with data owners to determine whether this data needs to be corrected, supplemented or excluded.'),
|
|
57
|
+
('1014', 'Small Divergent Value Ct', 'Column', 'Small Percentage of Divergent Values Found', 'Under 3% of values in this column were found to be different from the most common value. This could indicate a data error.', '(100.0 * fn_parsefreq(p.top_freq_values, 1, 2)::FLOAT /
|
|
58
|
+
p.value_ct::FLOAT) > 97::FLOAT
|
|
59
|
+
AND (100.0 * fn_parsefreq(p.top_freq_values, 1, 2)::FLOAT /
|
|
60
|
+
NULLIF(p.value_ct, 0)::FLOAT) < 100::FLOAT', '''Single Value Pct: '' || ROUND(100.0 * fn_parsefreq(p.top_freq_values, 1, 2)::FLOAT
|
|
61
|
+
/ NULLIF(p.value_ct, 0)::FLOAT)::VARCHAR(40)
|
|
62
|
+
|| '', Value | Freq: '' || top_freq_values', 'Possible', 'Review your source data and follow-up with data owners to determine whether this data needs to be corrected.'),
|
|
63
|
+
('1015', 'Boolean_Value_Mismatch', 'Column', 'Unexpected Boolean Values Found', 'This column appears to contain boolean (True/False) data, but unexpected values were found. This could indicate inconsistent coding for the same intended values, potentially leading to downstream errors or inconsistent business logic. ', '(distinct_value_ct > 1 AND
|
|
64
|
+
((lower(top_freq_values) ILIKE ''| true |%'' OR lower(top_freq_values) ILIKE ''| false |%'') AND NOT (lower(top_freq_values) ILIKE ''%| true |%'' AND lower(top_freq_values) ILIKE ''%| false |%''))
|
|
65
|
+
OR ((lower(top_freq_values) ILIKE ''| yes |%'' OR lower(top_freq_values) ILIKE ''| no |%'' ) AND NOT (lower(top_freq_values) ILIKE ''%| yes |%'' AND lower(top_freq_values) ILIKE ''%| no |%'')) )', 'CASE WHEN p.top_freq_values IS NULL THEN ''Min: '' || p.min_text || '', Max: '' || p.max_text
|
|
66
|
+
ELSE ''Top Freq: '' || p.top_freq_values END', 'Likely', 'Review your source data and follow-up with data owners to determine whether this data needs to be corrected. '),
|
|
67
|
+
('1016', 'Potential_Duplicates', 'Column', 'Potential Duplicate Values Found', 'This column is largely unique, but some duplicate values are present. This pattern is uncommon and could indicate inadvertant duplication. ', 'p.distinct_value_ct > 1000
|
|
68
|
+
AND fn_parsefreq(p.top_freq_values, 1, 2)::BIGINT BETWEEN 2 AND 4', '''Top Freq: '' || p.top_freq_values', 'Possible', 'Review your source data and follow-up with data owners to determine whether this data needs to be corrected. '),
|
|
69
|
+
('1017', 'Standardized_Value_Matches', 'Column', 'Similar Values Match When Standardized', 'When column values are standardized (removing spaces, single-quotes, periods and dashes), matching values are found in other records. This may indicate that formats should be further standardized to allow consistent comparisons for merges, joins and roll-ups. It could also indicate the presence of unintended duplicates.', 'p.general_type = ''A'' AND p.distinct_std_value_ct <> p.distinct_value_ct', '''Distinct Values: '' || p.distinct_value_ct::VARCHAR
|
|
70
|
+
|| '', Standardized: '' || p.distinct_std_value_ct::VARCHAR', 'Likely', 'Review standardized vs. raw data values for all matches. Correct data if values should be consistent.'),
|
|
71
|
+
('1018', 'Unlikely_Date_Values', 'Column', 'Unlikely Dates out of Typical Range', 'Some date values in this column are earlier than 1900-01-01 or later than 30 years after Profiling date.', 'p.general_type = ''D''
|
|
72
|
+
AND (p.min_date BETWEEN ''0001-01-02''::DATE AND ''1900-01-01''::DATE
|
|
73
|
+
OR p.max_date > CURRENT_DATE + INTERVAL ''30 year'')', '''Date Range: '' || p.min_date::VARCHAR || '' thru '' || p.max_date::VARCHAR', 'Likely', 'Review your source data and follow-up with data owners to determine whether this data needs to be corrected or removed.'),
|
|
74
|
+
('1019', 'Recency_One_Year', 'Dates', 'Recency - No Table Dates within 1 Year', 'Among all date columns present in the table, none fall inside of one year from Profile date.', 'MAX(p.max_date) < CURRENT_DATE - INTERVAL ''1 year''', '''Most Recent Date: '' || MAX(p.max_date)::VARCHAR', 'Possible', 'Review your source data and follow-up with data owners to determine whether dates in table should be more recent.'),
|
|
75
|
+
('1020', 'Recency_Six_Months', 'Dates', 'Recency - No Table Dates within 6 Months', 'Among all date columns present in the table, the most recent date falls 6 months to 1 year back from Profile date. ', 'MAX(p.max_date) >= CURRENT_DATE - INTERVAL ''1 year'' AND MAX(p.max_date) < CURRENT_DATE - INTERVAL ''6 months''', '''Most Recent Date: '' || MAX(p.max_date)::VARCHAR', 'Possible', 'Review your source data and follow-up with data owners to determine whether dates in table should be more recent.'),
|
|
76
|
+
('1021', 'Unexpected US States', 'Column', 'Unexpected Column Contains US States', 'This column is not labeled as a state, but contains mostly US State abbreviations. This could indicate shifted or switched source data columns.', 'p.std_pattern_match = ''STATE_USA''
|
|
77
|
+
AND p.distinct_value_ct > 5
|
|
78
|
+
AND NOT (p.column_name ILIKE ''%state%'' OR p.column_name ILIKE ''%_st'')', '''Value Range: '' || p.min_text || '' thru '' || max_text || CASE WHEN p.top_freq_values > '''' THEN ''Top Freq Values: '' || REPLACE(p.top_freq_values, CHR(10), '' ; '') ELSE '''' END ', 'Possible', 'Review your source data and follow-up with data owners to determine whether column should be populated with US states.'),
|
|
79
|
+
('1022', 'Unexpected Emails', 'Column', 'Unexpected Column Contains Emails', 'This column is not labeled as email, but contains mostly email addresses. This could indicate shifted or switched source data columns.', 'p.std_pattern_match = ''EMAIL''
|
|
80
|
+
AND NOT (p.column_name ILIKE ''%email%'' OR p.column_name ILIKE ''%addr%'')', '''Value Range: '' || p.min_text || '' thru '' || max_text', 'Possible', 'Review your source data and follow-up with data owners to determine whether column should be populated with email addresses.'),
|
|
81
|
+
('1023', 'Small_Numeric_Value_Ct', 'Column', 'Unexpected Numeric Values Found',
|
|
82
|
+
'Under 3% of values in this column were found to be numeric. This could indicate a data error.', e'p.general_type = \'A\'
|
|
83
|
+
AND p.numeric_ct::FLOAT/NULLIF(p.record_ct, 0)::FLOAT < 0.03
|
|
84
|
+
AND p.numeric_ct > 0',
|
|
85
|
+
'''Numeric Ct: '' || p.numeric_ct || '' of '' || p.value_ct || '' (Numeric Percent: '' || ROUND(100.0 * p.numeric_ct::NUMERIC(18, 5)/NULLIF(p.value_ct, 0)::NUMERIC(18, 5), 2) || '' )''::VARCHAR(200)',
|
|
86
|
+
'Likely', 'Review your source data and follow-up with data owners to determine whether numeric values are invalid entries here.'),
|
|
87
|
+
('1024', 'Invalid_Zip3_USA', 'Column', 'Invalid USA ZIP-3 Format', 'The majority of values in this column are 3-digit zips, but divergent patterns were found. This could indicate an incorrect roll-up category or a PII concern.', 'p.distinct_pattern_ct > 1
|
|
88
|
+
AND (p.column_name ilike ''%zip%'' OR p.column_name ILIKE ''%postal%'')
|
|
89
|
+
AND SPLIT_PART(p.top_patterns, '' | '', 2) = ''NNN''
|
|
90
|
+
AND SPLIT_PART(p.top_patterns, '' | '', 1)::FLOAT/NULLIF(value_ct, 0)::FLOAT > 0.50', '''Pattern: '' || p.top_patterns', 'Definite', 'Review your source data, ingestion process, and any processing steps that update this column.'),
|
|
91
|
+
('1025', 'Delimited_Data_Embedded', 'Column', 'Delimited Data Embedded in Column', 'Delimited data, separated by a common delimiter (comma, tab, pipe or caret) is present in over 80% of column values. This could indicate data that was incorrectly ingested, or data that would be better represented in parsed form.', 'p.std_pattern_match = ''DELIMITED_DATA''', 'CASE WHEN p.top_freq_values IS NULL THEN ''Min: '' || p.min_text || '', Max: '' || p.max_text ELSE ''Top Freq: '' || p.top_freq_values END', 'Likely', 'Review your source data and follow-up with data consumers to determine the most useful representation of this data.'),
|
|
92
|
+
('1026', 'Char_Column_Number_Units', 'Column', 'Character Column with Numbers and Units',
|
|
93
|
+
'This column is defined as alpha, but values include numbers with percents or common units. Embedded measures in alpha columns are harder to access, won''t sort correctly, and might contradict user expectations downstream. Consider parsing into numeric and UOM columns to improve usability.',
|
|
94
|
+
'p.includes_digit_ct::FLOAT/NULLIF(p.value_ct, 0)::FLOAT > 0.5 AND TRIM(fn_parsefreq(p.top_freq_values, 1, 1)) ~ ''(?i)^[0-9]+(\.[0-9]+)? ?(%|lb|oz|kg|g|mg|km|m|cm|mm|mi|ft|in)$''',
|
|
95
|
+
'''Top Freq: '' || p.top_freq_values', 'Possible',
|
|
96
|
+
'Review your source data and ingestion process. Consider whether it might be better to parse the numeric and unit data and store in separate columns.'),
|
|
97
|
+
('1027', 'Variant_Coded_Values', 'Variant', 'Variant Codings for Same Values', 'This column contains more than one common variants that represent a single value or state. This can occur when data is integrated from multiple sources with different standards, or when free entry is permitted without validation. The variations can cause confusion and error for downstream data users and multiple versions of the truth. ', 'p.distinct_value_ct <= 20', '''Variants Found: '' || intersect_list', 'Definite', 'Review your source data and ingestion process. Consider cleansing this data to standardize on a single set of definitive codes.'),
|
|
98
|
+
('1100', 'Potential_PII', 'Column', 'Personally Identifiable Information', 'This column contains data that could be Personally Identifiable Information (PII)', 'p.pii_flag > ''''', '''Risk: '' || CASE LEFT(p.pii_flag, 1) WHEN ''A'' THEN ''HIGH'' WHEN ''B'' THEN ''MODERATE'' WHEN ''C'' THEN ''LOW'' END || '', PII Type: '' || SUBSTRING(p.pii_flag, 3)', 'Potential PII', 'PII may require steps to ensure data security and compliance with relevant privacy regulations and legal requirements. You may have to classify and inventory PII, implement appropriate access controls, encrypt data, and monitor for unauthorized access. Your organization might be required to update privacy policies and train staff on data protection practices. Note that PII that is lower-risk in isolation might be high-risk in conjunction with other data.');
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
TRUNCATE TABLE test_types;
|
|
102
|
+
|
|
103
|
+
INSERT INTO test_types
|
|
104
|
+
(id, test_type, test_name_short, test_name_long, test_description, except_message, measure_uom, measure_uom_description, selection_criteria, column_name_prompt, column_name_help, default_parm_columns, default_parm_values, default_parm_prompts, default_parm_help, default_severity, run_type, test_scope, dq_dimension, health_dimension, threshold_description, usage_notes, active)
|
|
105
|
+
VALUES ('1004', 'Alpha_Trunc', 'Alpha Truncation', 'Maximum character count consistent', 'Tests that the maximum count of characters in a column value has not dropped vs. baseline data', 'Maximum length of values has dropped from prior expected length.', 'Values over max', NULL, 'general_type =''A'' AND max_length > 0 AND ( (min_length = avg_length AND max_length = avg_length) OR (numeric_ct <> value_ct ) ) AND functional_table_type NOT LIKE ''%window%'' /* The conditions below are to eliminate overlap with : LOV_Match (excluded selection criteria for this test_type), Pattern_Match (excluded selection criteria for this test_type), Constant (excluded functional_data_type Constant and Boolean) */ AND ( (distinct_value_ct NOT BETWEEN 2 AND 10 AND functional_data_type NOT IN ( ''Constant'', ''Boolean'') ) AND NOT ( fn_charcount(top_patterns, E'' \| '' ) = 1 AND fn_charcount(top_patterns, E'' \| '' ) IS NOT NULL AND REPLACE(SPLIT_PART(top_patterns, ''|'' , 2), ''N'' , '''' ) > ''''))', NULL, NULL, 'threshold_value', 'max_length', 'Maximum String Length at Baseline', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Maximum length expected', 'Alpha Truncation tests that the longest text value in a column hasn''t become shorter than the longest value at baseline. This could indicate a problem in a cumulative dataset, where prior values should still exist unchanged. A failure here would suggest that some process changed data that you would still expect to be present and matching its value when the column was profiled. This test would not be appropriate for an incremental or windowed dataset.', 'Y'),
|
|
106
|
+
('1005', 'Avg_Shift', 'Average Shift', 'Column mean is consistent with reference', 'Tests for statistically-significant shift in mean value for column from average calculated at baseline.', 'Standardized difference between averages is over the selected threshold level.', 'Difference Measure', 'Cohen''s D Difference (0.20 small, 0.5 mod, 0.8 large, 1.2 very large, 2.0 huge)', 'general_type=''N'' AND distinct_value_ct > 10 AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%''', NULL, NULL, 'baseline_value_ct,baseline_avg,baseline_sd,threshold_value', 'value_ct,avg_value,stdev_value,0.5::VARCHAR', 'Value Ct at Baseline,Mean at Baseline,Std Deviation at Baseline,Threshold Difference Measure ', NULL, 'Warning', 'CAT', 'column', 'Consistency', 'Data Drift', 'Standardized Difference Measure', 'Average Shift tests that the average of a numeric column has not significantly changed since baseline, when profiling was done. A significant shift may indicate errors in processing, differences in source data, or valid changes that may nevertheless impact assumptions in downstream data products. The test uses Cohen''s D, a statistical technique to identify significant shifts in a value. Cohen''s D measures the difference between the two averages, reporting results on a standardized scale, which can be interpreted via a rule-of-thumb from small to huge. Depending on your data, some difference may be expected, so it''s reasonable to adjust the threshold value that triggers test failure. This test works well for measures, or even for identifiers if you expect them to increment consistently. You may want to periodically adjust the expected threshold, or even the expected average value if you expect shifting over time. Consider this test along with Variability Increase. If variability rises too, process or measurement flaws could be at work. If variability remains consistent, the issue is more likely to be with the source data itself. ', 'Y'),
|
|
107
|
+
('1007', 'Constant', 'Constant Match', 'All column values match constant value', 'Tests that all values in the column match the constant value identified in baseline data', 'A constant value is expected for this column.', 'Mismatched values', NULL, 'TEMPLATE', NULL, NULL, 'baseline_value,threshold_value', NULL, 'Constant Value at Baseline,Threshold Error Count', 'The single, unchanging value of the column, per baseline|The number of errors that are acceptable before test fails.', 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Count of records with unexpected values', 'Constant Match tests that a single value determined to be a constant in baseline profiling is still the only value for the column that appears in subsequent versions of the dataset. Sometimes new data or business knowledge may reveal that the value is not a constant at all, even though only one value was present at profiling. In this case, you will want to disable this test. Alternatively, you can use the Value Match test to provide a limited number of valid values for the column.', 'Y'),
|
|
108
|
+
('1009', 'Daily_Record_Ct', 'Daily Records', 'All dates present within date range', 'Tests for presence of every calendar date within min/max date range, per baseline data', 'Not every date value between min and max dates is present, unlike at baseline.', 'Missing dates', NULL, 'general_type= ''D'' AND date_days_present > 21 AND date_days_present - (DATEDIFF(''day'', ''1800-01-05''::DATE, max_date) - DATEDIFF(''day'', ''1800-01-05''::DATE, min_date) + 1) = 0 AND future_date_ct::FLOAT / NULLIF(value_ct, 0) <= 0.75', NULL, NULL, 'threshold_value', '0', 'Threshold Missing Calendar Days', NULL, 'Warning', 'CAT', 'column', 'Completeness', 'Volume', 'Missing calendar days within min/max range', 'Daily Records tests that at least one record is present for every day within the minimum and maximum date range for the column. The test is relevant for transactional data, where you would expect at least one transaction to be recorded each day. A failure here would suggest missing records for the number of days identified without data. You can adjust the threshold to accept a number of days that you know legitimately have no records. ', 'Y'),
|
|
109
|
+
('1011', 'Dec_Trunc', 'Decimal Truncation', 'Sum of fractional values at or above reference', 'Tests for decimal truncation by confirming that the sum of fractional values in data is no less than the sum at baseline', 'The sum of fractional values is under baseline, which may indicate decimal truncation', 'Fractional sum', 'The sum of all decimal values from all data for this column', 'fractional_sum IS NOT NULL AND functional_table_type LIKE''%cumulative%''', NULL, NULL, 'threshold_value', 'ROUND(fractional_sum, 0)', 'Sum of Fractional Values at Baseline', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Minimum expected sum of all fractional values', 'Decimal Truncation tests that the fractional (decimal) part of a numeric column has not been truncated since Baseline. This works by summing all the fractional values after the decimal point and confirming that the total is at least equal to the fractional total at baseline. This could indicate a problem in a cumulative dataset, where prior values should still exist unchanged. A failure here would suggest that some process changed data that you would still expect to be present and matching its value when the column was profiled. This test would not be appropriate for an incremental or windowed dataset.', 'Y'),
|
|
110
|
+
('1012', 'Distinct_Date_Ct', 'Date Count', 'Count of distinct dates at or above reference', 'Tests that the count of distinct dates referenced in the column has not dropped vs. baseline data', 'Drop in count of unique dates recorded in column.', 'Unique dates', 'Count of unique dates in transactional date column', 'general_type=''D'' and date_days_present IS NOT NULL AND functional_table_type NOT LIKE ''%window%''', NULL, NULL, 'baseline_value,threshold_value', 'date_days_present,date_days_present', 'Distinct Date Count at Baseline,Min Expected Date Count', NULL, 'Fail', 'CAT', 'column', 'Timeliness', 'Recency', 'Minimum distinct date count expected', 'Date Count tests that the count of distinct dates present in the column has not dropped since baseline. The test is relevant for cumulative datasets, where old records are retained. A failure here would indicate missing records, which could be caused by a processing error or changed upstream data sources.', 'Y'),
|
|
111
|
+
('1013', 'Distinct_Value_Ct', 'Value Count', 'Count of distinct values has not dropped', 'Tests that the count of unique values in the column has not changed from baseline.', 'Count of unique values in column has changed from baseline.', 'Unique Values', NULL, 'distinct_value_ct between 2 and 10 AND value_ct > 0 AND NOT (coalesce(top_freq_values,'''') > '''' AND distinct_value_ct BETWEEN 2 and 10) AND NOT (lower(functional_data_type) LIKE ''%sequence%'' OR lower(functional_data_type) LIKE ''%measurement%'' OR functional_data_type LIKE ''%date%'' OR general_type = ''D'')', NULL, NULL, 'baseline_value_ct,threshold_value', 'distinct_value_ct,distinct_value_ct', 'Distinct Value Count at Baseline,Min Expected Value Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected distinct value count', 'Value Count tests that the count of unique values present in the column has not dropped since baseline. The test is relevant for cumulative datasets, where old records are retained, or for any dataset where you would expect a set number of distinct values should be present. A failure here would indicate missing records or a change in categories or value assignment.', 'Y'),
|
|
112
|
+
('1014', 'Email_Format', 'Email Format', 'Email is correctly formatted', 'Tests that non-blank, non-empty email addresses match the standard format', 'Invalid email address formats found.', 'Invalid emails', 'Number of emails that do not match standard format', 'std_pattern_match=''EMAIL''', NULL, NULL, 'threshold_value', '0', 'Maximum Invalid Email Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of invalid email addresses', NULL, 'Y'),
|
|
113
|
+
('1015', 'Future_Date', 'Past Dates', 'Latest date is prior to test run date', 'Tests that the maximum date referenced in the column is no greater than the test date, consistent with baseline data', 'Future date found when absent in baseline data.', 'Future dates', NULL, 'general_type=''D''AND future_date_ct = 0', NULL, NULL, 'threshold_value', '0', 'Maximum Future Date Count', NULL, 'Fail', 'CAT', 'column', 'Timeliness', 'Recency', 'Expected count of future dates', NULL, 'Y'),
|
|
114
|
+
('1016', 'Future_Date_1Y', 'Future Year', 'Future dates within year of test run date', 'Tests that the maximum date referenced in the column is no greater than one year beyond the test date, consistent with baseline data', 'Future date beyond one-year found when absent in baseline.', 'Future dates post 1 year', NULL, 'general_type=''D''AND future_date_ct > 0 AND max_date <=''{AS_OF_DATE}''::DATE + INTERVAL''365 DAYS''', NULL, NULL, 'threshold_value', '0', 'Maximum Post 1-Year Future Date Count', NULL, 'Fail', 'CAT', 'column', 'Timeliness', 'Recency', 'Expected count of future dates beyond one year', 'Future Year looks for date values in the column that extend beyond one year after the test date. This would be appropriate for transactional dates where you would expect to find dates in the near future, but not beyond one year ahead. Errors could indicate invalid entries or possibly dummy dates representing blank values.', 'Y'),
|
|
115
|
+
('1017', 'Incr_Avg_Shift', 'New Shift', 'New record mean is consistent with reference', 'Tests for statistically-significant shift in mean of new values for column compared to average calculated at baseline.', 'Significant shift in average of new values vs. baseline avg', 'Z-score of mean shift', 'Absolute Z-score (number of SD''s outside mean) of prior avg - incremental avg', 'general_type=''N'' AND distinct_value_ct > 10 AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%''', NULL, NULL, 'baseline_value_ct,baseline_sum,baseline_avg,baseline_sd,threshold_value', 'value_ct,(avg_value * value_ct)::FLOAT,avg_value,stdev_value,2', 'Value Count at Baseline,Sum at Baseline,Mean Value at Baseline,Std Deviation at Baseline,Threshold Max Z-Score', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Maximum Z-Score (number of SD''s beyond mean) expected', 'This is a more sensitive test than Average Shift, because it calculates an incremental difference in the average of new values compared to the average of values at baseline. This is appropriate for a cumulative dataset only, because it calculates the average of new entries based on the assumption that the count and average of records present at baseline are still present at the time of the test. This test compares the mean of new values with the standard deviation of the baseline average to calculate a Z-score. If the new mean falls outside the Z-score threshold, a shift is detected. Potential Z-score thresholds may range from 0 to 3, depending on the sensitivity you prefer. A failed test could indicate a quality issue or a legitimate shift in new data that should be noted and assessed by business users. Consider this test along with Variability Increase. If variability rises too, process, methodology or measurement flaws could be at issue. If variability remains consistent, the problem is more likely to be with the source data itself.', 'Y'),
|
|
116
|
+
('1018', 'LOV_All', 'Value Match All', 'List of expected values all present in column', 'Tests that all values match a pipe-delimited list of expected values and that all expected values are present', 'Column values found don''t exactly match the expected list of values', 'Values found', NULL, NULL, NULL, NULL, 'threshold_value', NULL, 'List of Expected Values', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'List of values expected, in form (''Val1'',''Val2)', 'This is a more restrictive form of Value Match, testing that all values in the dataset match the list provided, and also that all values present in the list appear at least once in the dataset. This would be appropriate for tables where all category values in the column are represented at least once.', 'Y'),
|
|
117
|
+
('1019', 'LOV_Match', 'Value Match', 'All column values present in expected list', 'Tests that all values in the column match the list-of-values identified in baseline data.', 'Values not matching expected List-of-Values from baseline.', 'Non-matching records', NULL, 'top_freq_values > '''' AND distinct_value_ct BETWEEN 2 and 10 AND NOT (functional_data_type LIKE ''%date%'' OR lower(datatype_suggestion) LIKE ''%date%'' OR general_type = ''D'' OR lower(column_name) IN (''file_name'', ''filename''))', NULL, NULL, 'baseline_value,threshold_value', '''('' || SUBSTRING( CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 2) > '''' THEN '','''''' || TRIM( REPLACE ( SPLIT_PART(top_freq_values, ''|'' , 2), '''''''' , '''''''''''' ) ) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 4) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 4), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 6) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 6), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 8) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 8), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 10) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 10), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 12) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 12), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 14) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 14), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 16) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 16), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 18) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 18), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 20) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 20), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END, 2, 999) || '')'',0', 'List of Expected Values,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'List of values expected, in form (''Val1'',''Val2)', 'This tests that all values in the column match the hard-coded list provided. This is relevant when the list of allowable values is small and not expected to change often. Even if new values might occasionally be added, this test is useful for downstream data products to provide warning that assumptions and logic may need to change.', 'Y'),
|
|
118
|
+
('1020', 'Min_Date', 'Minimum Date', 'All dates on or after set minimum', 'Tests that the earliest date referenced in the column is no earlier than baseline data', 'The earliest date value found is before the earliest value at baseline.', 'Dates prior to limit', NULL, 'general_type=''D''and min_date IS NOT NULL AND distinct_value_ct > 1', NULL, NULL, 'baseline_value,threshold_value', 'min_date,0', 'Minimum Date at Baseline,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of dates prior to minimum', 'This test is appropriate for a cumulative dataset only, because it assumes all prior values are still present. It''s appropriate where new records are added with more recent dates, but old dates dates do not change.', 'Y'),
|
|
119
|
+
('1021', 'Min_Val', 'Minimum Value', 'All values at or above set minimum', 'Tests that the minimum value present in the column is no lower than the minimum value in baseline data', 'Minimum column value less than baseline.', 'Values under limit', NULL, 'general_type=''N''and min_value IS NOT NULL AND (distinct_value_ct >= 2 OR (distinct_value_ct=2 and min_value<>0 and max_value<>1))', NULL, NULL, 'baseline_value,threshold_value', 'min_value,0', 'Minimum Value at Baseline,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of values under limit', 'This test is appropriate for a cumulative dataset only, assuming all prior values are still present. It is also appropriate for any measure that has an absolute, definable minimum value, or a heuristic that makes senes for valid data.', 'Y'),
|
|
120
|
+
('1022', 'Missing_Pct', 'Percent Missing', 'Consistent ratio of missing values', 'Tests for statistically-significant shift in percentage of missing values in column vs. baseline data', 'Significant shift in percent of missing values vs. baseline.', 'Difference measure', 'Cohen''s H Difference (0.20 small, 0.5 mod, 0.8 large, 1.2 very large, 2.0 huge)', 'record_ct <> value_ct', NULL, NULL, 'baseline_ct,baseline_value_ct,threshold_value', 'record_ct,value_ct,2::VARCHAR(10)', 'Baseline Record Count,Baseline Value Count,Standardized Difference Measure', NULL, 'Warning', 'CAT', 'column', 'Completeness', 'Data Drift', 'Expected maximum Cohen''s H Difference', 'This test uses Cohen''s H, a statistical test to identify a significant difference between two ratios. Results are reported on a standardized scale, which can be interpreted via a rule-of-thumb from small to huge. An uptick in missing data may indicate a collection issue at the source. A larger change may indicate a processing failure. A drop in missing data may also be significant, if it affects assumptions built into analytic products downstream. You can refine the expected threshold value as you view legitimate results of the measure over time.', 'Y'),
|
|
121
|
+
('1023', 'Monthly_Rec_Ct', 'Monthly Records', 'At least one date per month present within date range', 'Tests for presence of at least one date per calendar month within min/max date range, per baseline data', 'At least one date per month expected in min/max date range.', 'Missing months', 'Calendar months without date values present', 'general_type= ''D'' AND date_days_present > 1 AND date_months_present > 2 AND date_months_present - (datediff( ''MON'' , min_date, max_date) + 1) = 0 AND future_date_ct::FLOAT / NULLIF(value_ct, 0) <= 0.75', NULL, NULL, 'threshold_value', '0', 'Threshold Count of Months without Dates', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Volume', 'Expected maximum count of calendar months without dates present', 'Monthly Records tests that at least one record is present for every calendar month within the minimum and maximum date range for the column. The test is relevant for transactional data, where you would expect at least one transaction to be recorded each month. A failure here would suggest missing records for the number of months identified without data. You can adjust the threshold to accept a number of month that you know legitimately have no records.', 'Y'),
|
|
122
|
+
('1024', 'Outlier_Pct_Above', 'Outliers Above', 'Consistent outlier counts over 2 SD above mean', 'Tests that percent of outliers over 2 SD above Mean doesn''t exceed threshold', 'Percent of outliers exceeding 2 SD above the mean is greater than expected threshold.', 'Pct records over limit', NULL, 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', NULL, NULL, 'baseline_avg,baseline_sd,threshold_value', 'avg_value,stdev_value,0.05', 'Baseline Mean, Baseline Std Deviation, Pct Records over 2 SD', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected maximum pct records over upper 2 SD limit', 'This test counts the number of data points that may be considered as outliers, determined by whether their value exceeds 2 standard deviations above the mean at baseline. Assuming a normal distribution, a small percentage (defaulted to 5%) of outliers is expected. The actual number may vary for different distributions. The expected threshold reflects the maximum percentage of outliers you expect to see. This test uses the baseline mean rather than the mean for the latest dataset to capture systemic shift as well as individual outliers. ', 'Y'),
|
|
123
|
+
('1025', 'Outlier_Pct_Below', 'Outliers Below', 'Consistent outlier counts under 2 SD below mean', 'Tests that percent of outliers over 2 SD below Mean doesn''t exceed threshold', 'Percent of outliers exceeding 2 SD below the mean is greater than expected threshold.', 'Pct records under limit', NULL, 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', NULL, NULL, 'baseline_avg,baseline_sd,threshold_value', 'avg_value,stdev_value,0.05', 'Baseline Mean, Baseline Std Deviation, Pct Records over 2 SD', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected maximum pct records over lower 2 SD limit', 'This test counts the number of data points that may be considered as outliers, determined by whether their value exceeds 2 standard deviations below the mean at baseline. Assuming a normal distribution, a small percentage (defaulted to 5%) of outliers is expected. The actual number may vary for different distributions. The expected threshold reflects the maximum percentage of outliers you expect to see. This test uses the baseline mean rather than the mean for the latest dataset to capture systemic shift as well as individual outliers. ', 'Y'),
|
|
124
|
+
('1026', 'Pattern_Match', 'Pattern Match', 'Column values match alpha-numeric pattern', 'Tests that all values in the column match the same alpha-numeric pattern identified in baseline data', 'Alpha values do not match consistent pattern in baseline.', 'Pattern Mismatches', NULL, 'fn_charcount(top_patterns, E'' \| '' ) = 1 AND REPLACE(SPLIT_PART(top_patterns, ''|'' , 2), ''N'' , '''' ) > '''' AND distinct_value_ct > 10', NULL, NULL, 'baseline_value,threshold_value', 'trim(REPLACE(REPLACE(REPLACE(SPLIT_PART(top_patterns, '' | '', 2), ''A'', ''[A-Z]''), ''N'', ''[0-9]''), ''a'', ''[a-z]'')),0', 'Pattern at Baseline,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of pattern mismatches', 'This test is appropriate for character fields that are expected to appear in a consistent format. It uses pattern matching syntax as appropriate for your database: REGEX matching if available, otherwise LIKE expressions. The expected threshold is the number of records that fail to match the defined pattern.', 'Y'),
|
|
125
|
+
('1028', 'Recency', 'Recency', 'Latest date within expected range of test date', 'Tests that the latest date in column is within a set number of days of the test date', 'Most recent date value not within expected days of test date.', 'Days before test', 'Number of days that most recent date precedes the date of test', 'general_type= ''D'' AND max_date <= run_date AND NOT column_name IN ( ''filedate'' , ''file_date'' ) AND NOT functional_data_type IN (''Future Date'', ''Schedule Date'') AND DATEDIFF( ''DAY'' , max_date, run_date) <= 62', NULL, NULL, 'threshold_value', 'CASE WHEN DATEDIFF( ''DAY'' , max_date, run_date) <= 3 THEN DATEDIFF(''DAY'', max_date, run_date) + 3 WHEN DATEDIFF(''DAY'', max_date, run_date) <= 7 then DATEDIFF(''DAY'', max_date, run_date) + 7 WHEN DATEDIFF( ''DAY'' , max_date, run_date) <= 31 THEN CEILING( DATEDIFF( ''DAY'' , max_date, run_date)::FLOAT / 7.0) * 7 WHEN DATEDIFF( ''DAY'' , max_date, run_date) > 31 THEN CEILING( DATEDIFF( ''DAY'' , max_date, run_date)::FLOAT / 30.0) * 30 END', 'Threshold Maximum Days before Test', NULL, 'Warning', 'CAT', 'column', 'Timeliness', 'Recency', 'Expected maximum count of days preceding test date', 'This test evaluates recency based on the latest referenced dates in the column. The test is appropriate for transactional dates and timestamps. The test can be especially valuable because timely data deliveries themselves may not assure that the most recent data is present. You can adjust the expected threshold to the maximum number of days that you expect the data to age before the dataset is refreshed. ', 'Y'),
|
|
126
|
+
('1030', 'Required', 'Required Entry', 'Required non-null value present', 'Tests that a non-null value is present in each record for the column, consistent with baseline data', 'Every record for this column is expected to be filled, but some are missing.', 'Missing values', NULL, 'record_ct = value_ct', NULL, NULL, 'threshold_value', '0', 'Threshold Missing Value Count', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Schema Drift', 'Expected count of missing values', NULL, 'Y'),
|
|
127
|
+
('1033', 'Street_Addr_Pattern', 'Street Address', 'Enough street address entries match defined pattern', 'Tests for percent of records matching standard street address pattern.', 'Percent of values matching standard street address format is under expected threshold.', 'Percent matches', 'Percent of records that match street address pattern', '(std_pattern_match=''STREET_ADDR'') AND (avg_length <> round(avg_length)) AND (avg_embedded_spaces BETWEEN 2 AND 6) AND (avg_length < 35)', NULL, NULL, 'threshold_value', '75', 'Threshold Pct that Match Address Pattern', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected percent of records that match standard street address pattern', 'The street address pattern used in this test should match the vast majority of USA addresses. You can adjust the threshold percent of matches based on the results you are getting -- you may well want to tighten it to make the test more sensitive to invalid entries.', 'Y'),
|
|
128
|
+
('1034', 'Unique', 'Unique Values', 'Each column value is unique', 'Tests that no values for the column are repeated in multiple records.', 'Column values should be unique per row.', 'Duplicate values', 'Count of non-unique values', 'record_ct > 500 and record_ct = distinct_value_ct and value_ct > 0', NULL, NULL, 'threshold_value', '0', 'Threshold Duplicate Value Count', NULL, 'Fail', 'CAT', 'column', 'Uniqueness', 'Schema Drift', 'Expected count of duplicate values', 'This test is ideal when the database itself does not enforce a primary key constraint on the table. It serves as an independent check on uniqueness. If''s also useful when there are a small number of exceptions to uniqueness, which can be reflected in the expected threshold count of duplicates.', 'Y'),
|
|
129
|
+
('1035', 'Unique_Pct', 'Percent Unique', 'Consistent ratio of unique values', 'Tests for statistically-significant shift in percentage of unique values vs. baseline data.', 'Significant shift in percent of unique values vs. baseline.', 'Difference measure', 'Cohen''s H Difference (0.20 small, 0.5 mod, 0.8 large, 1.2 very large, 2.0 huge)', 'distinct_value_ct > 10', NULL, NULL, 'baseline_value_ct,baseline_unique_ct,threshold_value', 'value_ct,distinct_value_ct,0.5', 'Value Count at Baseline,Distinct Value Count at Baseline,Standardized Difference Measure (0 to 1)', NULL, 'Warning', 'CAT', 'column', 'Uniqueness', 'Data Drift', 'Expected maximum Cohen''s H Difference', 'You can think of this as a test of similarity that measures whether the percentage of unique values is consistent with the percentage at baseline. A significant change might indicate duplication or a telling shift in cardinality between entities. The test uses Cohen''s H, a statistical test to identify a significant difference between two ratios. Results are reported on a standardized scale, which can be interpreted via a rule-of-thumb from small to huge. You can refine the expected threshold value as you view legitimate results of the measure over time.', 'Y'),
|
|
130
|
+
('1036', 'US_State', 'US State', 'Column value is two-letter US state code', 'Tests that the recorded column value is a valid US state.', 'Column Value is not a valid US state.', 'Not US States', 'Values that doo not match 2-character US state abbreviations.', 'general_type= ''A'' AND column_name ILIKE ''%state%'' AND distinct_value_ct < 70 AND max_length = 2', NULL, NULL, 'threshold_value', '0', 'Threshold Count not Matching State Abbreviations', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of values that are not US state abbreviations', 'This test validates entries against a fixed list of two-character US state codes and related Armed Forces codes.', 'Y'),
|
|
131
|
+
('1037', 'Weekly_Rec_Ct', 'Weekly Records', 'At least one date per week present within date range', 'Tests for presence of at least one date per calendar week within min/max date range, per baseline data', 'At least one date per week expected in min/max date range.', 'Missing weeks', 'Calendar weeks without date values present', 'general_type= ''D'' AND date_days_present > 1 AND date_weeks_present > 3 AND date_weeks_present - (DATEDIFF(''week'', ''1800-01-05''::DATE, max_date) - DATEDIFF(''week'', ''1800-01-05''::DATE, min_date) + 1) = 0 AND future_date_ct::FLOAT / NULLIF(value_ct, 0) <= 0.75', NULL, NULL, 'threshold_value', '0', 'Threshold Weeks without Dates', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Volume', 'Expected maximum count of calendar weeks without dates present', 'Weekly Records tests that at least one record is present for every calendar week within the minimum and maximum date range for the column. The test is relevant for transactional data, where you would expect at least one transaction to be recorded each week. A failure here would suggest missing records for the number of weeks identified without data. You can adjust the threshold to accept a number of weeks that you know legitimately have no records.', 'Y'),
|
|
132
|
+
('1040', 'Variability_Increase', 'Variability Increase', 'Variability has increased above threshold', 'Tests that the spread or dispersion of column values has increased significantly over baseline, indicating a drop in stability of the measure.', 'The Standard Deviation of the measure has increased beyond the defined threshold. This could signal a change in a process or a data quality issue.', 'Pct SD shift', 'Percent of baseline Standard Deviation', 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', NULL, NULL, 'baseline_sd,threshold_value', 'stdev_value,120', 'Std Deviation at Baseline,Expected Maximum Percent', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected maximum pct of baseline Standard Deviation (SD)', 'This test looks for percent shifts in standard deviation as a measure of the stability of a measure over time. A significant change could indicate that new values are erroneous, or that the cohort being evaluated is significantly different from baseline. An increase in particular could mark new problems in measurement, a more heterogeneous cohort, or that significant outliers have been introduced. Consider this test along with Average Shift and New Shift. If the average shifts as well, there may be a fundamental shift in the dataset or process used to collect the data point. This might suggest a data shift that should be noted and assessed by business users. If the average does not shift, this may point to a data quality or data collection problem. ', 'Y'),
|
|
133
|
+
('1041', 'Variability_Decrease', 'Variability Decrease', 'Variability has decreased below threshold', 'Tests that the spread or dispersion of column values has decreased significantly over baseline, indicating a shift in stability of the measure. This could signal a change in a process or a data quality issue.', 'The Standard Deviation of the measure has decreased below the defined threshold. This could signal a change in a process or a data quality issue.', 'Pct SD shift', 'Percent of baseline Standard Deviation', 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', NULL, NULL, 'baseline_sd,threshold_value', 'stdev_value, 80', 'Std Deviation at Baseline,Expected Minimum Percent', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected minimum pct of baseline Standard Deviation (SD)', 'This test looks for percent shifts in standard deviation as a measure of the stability of a measure over time. A significant change could indicate that new values are erroneous, or that the cohort being evaluated is significantly different from baseline. A decrease in particular could indicate an improved process, better precision in measurement, the elimination of outliers, or a more homogeneous cohort. ', 'Y'),
|
|
134
|
+
('1042', 'Valid_Month', 'Valid Month', 'Valid calendar month in expected format', 'Tests for the presence of a valid representation of a calendar month consistent with the format at baseline.', 'Column values are not a valid representation of a calendar month consistent with the format at baseline.', 'Invalid months', NULL, 'functional_data_type = ''Period Month''', NULL, NULL, 'threshold_value,baseline_value', '0,CASE WHEN max_length > 3 AND initcap(min_text) = min_text THEN ''''''January'''',''''February'''',''''March'''',''''April'''',''''May'''',''''June'''',''''July'''',''''August'''',''''September'''',''''October'''',''''November'''',''''December'''''' WHEN max_length > 3 AND upper(min_text) = min_text THEN ''''''JANUARY'''',''''FEBRUARY'''',''''MARCH'''',''''APRIL'''',''''MAY'''',''''JUNE'''',''''JULY'''',''''AUGUST'''',''''SEPTEMBER'''',''''OCTOBER'''',''''NOVEMBER'''',''''DECEMBER'''''' WHEN max_length > 3 AND lower(min_text) = min_text THEN ''''''january'''',''''february'''',''''march'''',''''april'''',''''may'''',''''june'''',''''july'''',''''august'''',''''september'''',''''october'''',''''november'''',''''december'''''' WHEN max_length = 3 AND initcap(min_text) = min_text THEN ''''''Jan'''',''''Feb'''',''''Mar'''',''''Apr'''',''''May'''',''''Jun'''',''''Jul'''',''''Aug'''',''''Sep'''',''''Oct'''',''''Nov'''',''''Dec'''''' WHEN max_length = 3 AND upper(min_text) = min_text THEN ''''''JAN'''',''''FEB'''',''''MAR'''',''''APR'''',''''MAY'''',''''JUN'''',''''JUL'''',''''AUG'''',''''SEP'''',''''OCT'''',''''NOV'''',''''DEC'''''' WHEN max_length = 3 AND lower(min_text) = min_text THEN ''''''jan'''',''''feb'''',''''mar'''',''''apr'''',''''may'''',''''jun'''',''''jul'''',''''aug'''',''''sep'''',''''oct'''',''''nov'''',''''dec'''''' WHEN max_length = 2 AND min_text = ''01'' THEN ''''''01'''',''''02'''',''''03'''',''''04'''',''''05'''',''''06'''',''''07'''',''''08'''',''''09'''',''''10'''',''''11'''',''''12'''''' WHEN max_length = 2 AND min_text = ''1'' THEN ''''''1'''',''''2'''',''''3'''',''''4'''',''''5'''',''''6'''',''''7'''',''''8'''',''''9'''',''''10'''',''''11'''',''''12'''''' WHEN min_value = 1 THEN ''1,2,3,4,5,6,7,8,9,10,11,12'' ELSE ''NULL'' END', 'Threshold Invalid Months,Valid Month List', 'The acceptable number of records with invalid months present.|List of valid month values for this field, in quotes if field is numeric, separated by commas.', 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of invalid months', NULL, 'N'),
|
|
135
|
+
('1043', 'Valid_Characters', 'Valid Characters', 'Column contains no invalid characters', 'Tests for the presence of non-printing characters, leading spaces, or surrounding quotes.', 'Invalid characters, such as non-printing characters, leading spaces, or surrounding quotes, were found.', 'Invalid records', 'Expected count of values with invalid characters', 'general_type = ''A''', NULL, NULL, 'threshold_value', '0', NULL, 'The acceptable number of records with invalid character values present.', 'Warning', 'CAT', 'column', 'Validity', 'Schema Drift', 'Threshold Invalid Value Count', 'This test looks for the presence of non-printing ASCII characters that are considered non-standard in basic text processing. It also identifies leading spaces and values enclosed in quotes. Values that fail this test may be artifacts of data conversion, or just more difficult to process or analyze downstream.', 'N'),
|
|
136
|
+
('1044', 'Valid_US_Zip', 'Valid US Zip', 'Valid USA Postal Codes', 'Tests that postal codes match the 5 or 9 digit standard US format', 'Invalid US Zip Code formats found.', 'Invalid Zip Codes', 'Expected count of values with invalid Zip Codes', 'functional_data_type = ''Zip''', NULL, NULL, 'threshold_value', '0', NULL, NULL, 'Warning', 'CAT', 'column', 'Validity', 'Schema Drift', 'Threshold Invalid Value Count', NULL, 'Y'),
|
|
137
|
+
('1045', 'Valid_US_Zip3', 'Valid US Zip-3 ', 'Valid USA Zip-3 Prefix', 'Tests that postal codes match the 3 digit format of a regional prefix.', 'Invalid 3-digit US Zip Code regional prefix formats found.', 'Invalid Zip-3 Prefix', 'Expected count of values with invalid Zip-3 Prefix Codes', 'functional_data_type = ''Zip3''', NULL, NULL, 'threshold_value', '0', NULL, NULL, 'Warning', 'CAT', 'column', 'Validity', 'Schema Drift', 'Threshold Invalid Zip3 Count', 'This test looks for the presence of values that fail to match the three-digit numeric code expected for US Zip Code regional prefixes. These prefixes are often used to roll up Zip Code data to a regional level, and may be critical to anonymize detailed data and protect PID. Depending on your needs and regulatory requirements, longer zip codes could place PID at risk.', 'Y'),
|
|
138
|
+
('1006', 'Condition_Flag', 'Custom Condition', 'Column values match pre-defined condition', 'Tests that each record in the table matches a pre-defined, custom condition', 'Value(s) found not matching defined condition.', 'Values Failing', NULL, NULL, 'Test Focus', 'Specify a brief descriptor of the focus of this test that is unique within this Test Suite for the Table and Test Type. This distinguishes this test from others of the same type on the same table. Example: `Quantity Consistency` if you are testing that quantity ordered matches quantity shipped.', 'threshold_value,custom_query', NULL, 'Threshold Error Count,Custom SQL Expression (TRUE on error)', 'The number of errors that are acceptable before test fails.|Expression should evaluate to TRUE to register an error or FALSE if no error. An expression can reference only columns in the selected table.', 'Fail', 'CAT', 'custom', 'Validity', 'Schema Drift', 'Count of records that don''t meet test condition', 'Custom Condition is a business-rule test for a user-defined error condition based on the value of one or more columns. The condition is applied to each record within the table, and the count of records failing the condition is added up. If that count exceeds a threshold of errors, the test as a whole is failed. This test is ideal for error conditions that TestGen cannot automatically infer, and any condition that involves the values of more than one column in the same record. Performance of this test is fast, since it is performed together with other aggregate tests. Interpretation is based on the user-defined meaning of the test.', 'Y'),
|
|
139
|
+
|
|
140
|
+
('1031', 'Row_Ct', 'Row Count', 'Number of rows is at or above threshold', 'Tests that the count of records has not decreased from the baseline count.', 'Row count less than baseline count.', 'Row count', NULL, 'TEMPLATE', NULL, NULL, 'threshold_value', NULL, 'Threshold Minimum Record Count', NULL, 'Fail', 'CAT', 'table', 'Completeness', 'Volume', 'Expected minimum row count', 'Because this tests the row count against a constant minimum threshold, it''s appropriate for any dataset, as long as the number of rows doesn''t radically change from refresh to refresh. But it''s not responsive to change over time. You may want to adjust the threshold periodically if you are dealing with a cumulative dataset.', 'Y'),
|
|
141
|
+
('1032', 'Row_Ct_Pct', 'Row Range', 'Number of rows within percent range of threshold', 'Tests that the count of records is within a percentage above or below the baseline count.', 'Row Count is outside of threshold percent of baseline count.', 'Percent of baseline', 'Row count percent above or below baseline', 'TEMPLATE', NULL, NULL, 'baseline_ct,threshold_value', NULL, 'Baseline Record Count,Threshold Pct Above or Below Baseline', NULL, 'Fail', 'CAT', 'table', 'Completeness', 'Volume', 'Expected percent window below or above baseline', 'This test is better than Row Count for an incremental or windowed dataset where you would expect the row count to range within a percentage of baseline.', 'Y'),
|
|
142
|
+
|
|
143
|
+
('1008', 'CUSTOM', 'Custom Test', 'Custom-defined business rule', 'Custom SQL Query Test', 'Errors were detected according to test definition.', 'Errors found', 'Count of errors identified by query', NULL, 'Test Focus', 'Specify a brief descriptor of the focus of this test that is unique within this Test Suite for the Table and Test Type. This distinguishes this test from others of the same type on the same table. Example: `Order Total Matches Detail` if you are testing that the total in one table matches the sum of lines in another.', 'custom_query', NULL, 'Custom SQL Query Returning Error Records', 'Query should return records indicating one or more errors. The test passes if no records are returned. Results of the query will be shown when you click `Review Source Data` for a failed test, so be sure to include enough data in your results to follow-up. \n\nA query can refer to any tables in the database. You must hard-code the schema or use `{DATA_SCHEMA}` to represent the schema defined for the Table Group.', 'Fail', 'QUERY', 'custom', 'Accuracy', 'Data Drift', 'Expected count of errors found by custom query', 'This business-rule test is highly flexible, covering any error state that can be expressed by a SQL query against one or more tables in the database. In operation, the user-defined query is embedded within a parent query returning the count of error rows identified. Any row returned by the query is interpreted as a single error condition in the test. Note that this query is run independently of other tests, and that performance will be slower, depending in large part on the efficiency of the query you write. Interpretation is based on the user-defined meaning of the test. Your query might be written to return errors in individual rows identified by joining tables. Or it might return an error based on a multi-column aggregate condition returning a single row if an error is found. This query is run separately when you click `Review Source Data` from Test Results, so be sure to include enough data in your results to follow-up. Interpretation is based on the user-defined meaning of the test.', 'Y'),
|
|
144
|
+
|
|
145
|
+
('1500', 'Aggregate_Balance', 'Aggregate Balance', 'Aggregate values per group match reference', 'Tests for exact match in aggregate values for each set of column values vs. reference dataset', 'Aggregate measure per set of column values does not exactly match reference dataset.', 'Mismatched measures', NULL, NULL, 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_schema_name,match_table_name,match_column_names,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'Record Subset Condition,Grouping Columns,Group Subset Condition,Matching Schema Name,Matching Table Name,Matching Aggregate Expression,Matching Record Subset Condition,Matching Grouping Columns,Matching Group Subset Condition', 'Condition defining a subset of records in main table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in main table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in main table (e.g. HAVING clause) - OPTIONAL|Schema location of matching table|Matching table name|Agregate column expression in matching table: one of `SUM([column_name])` or `COUNT([column_name])`|Condition defining a subset of records in matching table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in matching table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in matching table (e.g. HAVING clause) - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of group totals not matching aggregate value', 'This test compares sums or counts of a column rolled up to one or more category combinations across two different tables. Both tables must be accessible at the same time. It''s ideal for confirming that two datasets exactly match -- that the sum of a measure or count of a value hasn''t changed or shifted between categories. Use this test to compare a raw and processed version of the same dataset, or to confirm that an aggregated table exactly matches the detail table that it''s built from. An error here means that one or more value combinations fail to match. New categories or combinations will cause failure.', 'Y'),
|
|
146
|
+
('1501', 'Aggregate_Minimum', 'Aggregate Minimum', 'Aggregate values per group are at or above reference', 'Tests that aggregate values for each set of column values are at least the same as reference dataset', 'Aggregate measure per set of column values is not at least the same as reference dataset.', 'Mismatched measures', NULL, NULL, 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_schema_name,match_table_name,match_column_names,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'Record Subset Condition,Grouping Columns,Group Subset Condition,Matching Schema Name,Matching Table Name,Matching Aggregate Expression,Matching Record Subset Condition,Matching Grouping Columns,Matching Group Subset Condition', 'Condition defining a subset of records in main table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in main table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in main table (e.g. HAVING clause) - OPTIONAL|Schema location of reference table|Reference table name|Aggregate column expression in reference table (e.g. `SUM(sales)`)|Condition defining a subset of records in reference table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in reference table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in reference table (e.g. HAVING clause) - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Accuracy', 'Data Drift', 'Expected count of group totals below aggregate value', 'This test compares sums or counts of a column rolled up to one or more category combinations, but requires a match or increase in the aggregate value, rather than an exact match, across two different tables. Both tables must be accessible at the same time. Use this to confirm that aggregate values have not dropped for any set of categories, even if some values may rise. This test is useful to compare an older and newer version of a cumulative dataset. An error here means that one or more values per category set fail to match or exceed the prior dataset. New categories or combinations are allowed (but can be restricted independently with a Combo_Match test). Both tables must be present to run this test.', 'Y'),
|
|
147
|
+
('1502', 'Combo_Match', 'Reference Match', 'Column values or combinations found in reference', 'Tests for the presence of one or a set of column values in a reference table', 'Column value combinations are not found in reference table values.', 'Missing values', NULL, NULL, 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. \n\nDo not use continuous mesurements here. Do not use numeric values unless they represent discrete categories.', 'subset_condition,having_condition,match_schema_name,match_table_name,match_groupby_names,match_subset_condition,match_having_condition', NULL, 'Record Subset Condition,Group Subset Condition,Reference Schema Name,Reference Table Name,Matching Columns,Matching Record Subset Condition,Matching Group Subset Condition', 'Condition defining a subset of records in main table to evaluate, written like a condition within a SQL WHERE clause - OPTIONAL|Condition based on aggregate expression used to exclude value combinations in source table, written like a condition within a SQL HAVING clause (e.g. `SUM(sales) < 100`) - OPTIONAL|Schema location of matching table|Matching table name|Column Names in reference table used to validate source table values (separated by commas)|Condition defining a subset of records in reference table to match against, written like a condition within a SQL WHERE clause - OPTIONAL|Condition based on aggregate expression used to exclude value combinations in reference table, written like a condition within a SQL HAVING clause (e.g. `SUM(sales) < 100`) - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Validity', 'Schema Drift', 'Expected count of non-matching value combinations', 'This test verifies that values, or combinations of values, that are present in the main table are also found in a reference table. This is a useful test for referential integrity between fact and dimension tables. You can also use it to confirm the validity of a code or category, or of combinations of values that should only be found together within each record, such as product/size/color. An error here means that one or more category combinations in the main table are not found in the reference table. Both tables must be present to run this test.', 'Y'),
|
|
148
|
+
('1503', 'Distribution_Shift', 'Distribution Shift', 'Probability distribution consistent with reference', 'Tests the closeness of match between two distributions of aggregate measures across combinations of column values, using Jensen-Shannon Divergence test', 'Divergence between two distributions exceeds specified threshold.', 'Divergence level (0-1)', 'Jensen-Shannon Divergence, from 0 (identical distributions), to 1.0 (max divergence)', NULL, 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Do not use continuous mesurements here. Do not use numeric values unless they represent discrete categories.', 'subset_condition,match_schema_name,match_table_name,match_groupby_names,match_subset_condition', NULL, 'Record Subset Condition,Reference Schema Name,Reference Table Name,Matching Columns to Compare,Matching Record Subset Condition', 'Condition defining a subset of records in main table to evaluate, written like a condition within a SQL WHERE clause - OPTIONAL|Schema location of matching table|Matching table name|Column Names in reference table used to compare counts with source table values (separated by commas)|Condition defining a subset of records in reference table to match against, written like a condition within a SQL WHERE clause - OPTIONAL', 'Warning', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected maximum divergence level between 0 and 1', 'This test measures the similarity of two sets of counts per categories, by using their proportional counts as probability distributions. Using Jensen-Shannon divergence, a measure of relative entropy or difference between two distributions, the test assigns a score ranging from 0, meaning that the distributions are identical, to 1, meaning that the distributions are completely unrelated. This test can be used to compare datasets that may not match exactly, but should have similar distributions. For example, it is a useful sanity check for data from different sources that you would expect to have a consistent spread, such as shipment of building materials per state and construction projects by state. Scores can be compared over time even if the distributions are not identical -- a dataset can be expected to maintain a comparable divergence score with a reference dataset over time. Both tables must be present to run this test.', 'Y'),
|
|
149
|
+
('1508', 'Timeframe_Combo_Gain', 'Timeframe No Drops', 'Latest timeframe has at least all value combinations from prior period', 'Tests that column values in most recent time-window include at least same as prior time window', 'Column values in most recent time-window don''t include all values in prior window.', 'Mismatched values', NULL, NULL, 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Make sure not to use continuous measurements here. Do not use numeric values unless they represent discrete categories.', 'window_date_column,window_days,subset_condition', NULL, 'Date Column for Time Windows,Time Window in Days,Record Subset Condition', 'The date column used to define the time windows. This must be a DATE or DATETIME type.|Length in days of the time window. The test will compare the most recent period of days to the prior period of the same duration.|Condition defining a subset of records in main table to evaluate, written like a condition within a SQL WHERE clause - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of missing value combinations', 'This test checks a single transactional table to verify that categorical values or combinations that are present in the most recent time window you define include at least all those found in the prior time window of the same duration. Missing values in the latest time window will trigger the test to fail. New values are permitted. Use this test to confirm that codes or categories are not lost across successive time periods in a transactional table.', 'Y'),
|
|
150
|
+
('1509', 'Timeframe_Combo_Match', 'Timeframe Match', 'Column value combinations from latest timeframe same as prior period', 'Tests for presence of same column values in most recent time-window vs. prior time window', 'Column values don''t match in most recent time-windows.', 'Mismatched values', NULL, NULL, 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Do not use continuous measurements here. Do not use numeric values unless they represent discrete categories.', 'window_date_column,window_days,subset_condition', NULL, 'Date Column for Time Windows,Time Window in Days,Record Subset Condition', NULL, 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of non-matching value combinations', 'This test checks a single transactional table (such as a fact table) to verify that categorical values or combinations that are present in the most recent time window you define match those found in the prior time window of the same duration. New or missing values in the latest time window will trigger the test to fail. Use this test to confirm the consistency in the occurrence of codes or categories across successive time periods in a transactional table.', 'Y'),
|
|
151
|
+
|
|
152
|
+
('1504', 'Aggregate_Pct_Above', 'Aggregate Pct Above', 'Aggregate values per group exceed reference', 'Tests that aggregate values for each set of column values exceed values for reference dataset', 'Aggregate measure per set of column values fails to exceed the reference dataset.', 'Mismatched measures', NULL, NULL, 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_column_names,match_schema_name,match_table_name,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'TODO Fill in default_parm_prompts match_schema_name,TODO Fill in default_parm_prompts match_table_name,TODO Fill in default_parm_prompts match_column_names,TODO Fill in default_parm_prompts match_subset_condition,TODO Fill in default_parm_prompts match_groupby_names,TODO Fill in default_parm_prompts match_having_condition,TODO Fill in default_parm_prompts subset_condition,TODO Fill in default_parm_prompts groupby_names,TODO Fill in default_parm_prompts having_condition', NULL, 'Fail', 'QUERY', 'referential', 'Accuracy', 'Data Drift', 'Expected count of group totals with not exceeding aggregate measure', NULL, 'N'),
|
|
153
|
+
('1505', 'Aggregate_Pct_Within', 'Aggregate Pct Within', 'Aggregate values per group exceed reference', 'Tests that aggregate values for each set of column values exceed values for reference dataset', 'Aggregate measure per set of column values fails to exceed the reference dataset.', 'Mismatched measures', NULL, NULL, 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_column_names,match_schema_name,match_table_name,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'TODO Fill in default_parm_prompts match_schema_name,TODO Fill in default_parm_prompts match_table_name,TODO Fill in default_parm_prompts match_column_names,TODO Fill in default_parm_prompts match_subset_condition,TODO Fill in default_parm_prompts match_groupby_names,TODO Fill in default_parm_prompts match_having_condition,TODO Fill in default_parm_prompts subset_condition,TODO Fill in default_parm_prompts groupby_names,TODO Fill in default_parm_prompts having_condition', NULL, 'Fail', 'QUERY', 'referential', 'Accuracy', 'Data Drift', 'Expected count of group totals with not exceeding aggregate measure', NULL, 'N'),
|
|
154
|
+
('1506', 'Aggregate_Increase', 'Aggregate Increase', 'Aggregate values per group exceed reference', 'Tests that aggregate values for each set of column values exceed values for reference dataset', 'Aggregate measure per set of column values fails to exceed the reference dataset.', 'Mismatched measures', NULL, NULL, 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_column_names,match_schema_name,match_table_name,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'TODO Fill in default_parm_prompts match_schema_name,TODO Fill in default_parm_prompts match_table_name,TODO Fill in default_parm_prompts match_column_names,TODO Fill in default_parm_prompts match_subset_condition,TODO Fill in default_parm_prompts match_groupby_names,TODO Fill in default_parm_prompts match_having_condition,TODO Fill in default_parm_prompts subset_condition,TODO Fill in default_parm_prompts groupby_names,TODO Fill in default_parm_prompts having_condition', NULL, 'Fail', 'QUERY', 'referential', 'Accuracy', 'Data Drift', 'Expected count of group totals below reference value', NULL, 'N')
|
|
155
|
+
;
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
TRUNCATE TABLE generation_sets;
|
|
159
|
+
|
|
160
|
+
INSERT INTO generation_sets (generation_set, test_type)
|
|
161
|
+
VALUES ('Monitor', 'Recency'),
|
|
162
|
+
('Monitor', 'Row_Ct'),
|
|
163
|
+
('Monitor', 'Row_Ct_Pct'),
|
|
164
|
+
('Monitor', 'Daily_Record_Ct'),
|
|
165
|
+
('Monitor', 'Monthly_Rec_Ct'),
|
|
166
|
+
('Monitor', 'Weekly_Rec_Ct');
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
TRUNCATE TABLE test_templates;
|
|
170
|
+
|
|
171
|
+
INSERT INTO test_templates (id, test_type, sql_flavor, template_name)
|
|
172
|
+
VALUES ('2001', 'Combo_Match', 'redshift', 'ex_data_match_generic.sql'),
|
|
173
|
+
('2002', 'Aggregate_Minimum', 'redshift', 'ex_aggregate_match_no_drops_generic.sql'),
|
|
174
|
+
('2003', 'Distribution_Shift', 'redshift', 'ex_relative_entropy_generic.sql'),
|
|
175
|
+
('2004', 'CUSTOM', 'redshift', 'ex_custom_query_generic.sql'),
|
|
176
|
+
('2006', 'Aggregate_Balance', 'redshift', 'ex_aggregate_match_same_generic.sql'),
|
|
177
|
+
('2007', 'Timeframe_Combo_Gain', 'redshift', 'ex_window_match_no_drops_generic.sql'),
|
|
178
|
+
('2008', 'Timeframe_Combo_Match', 'redshift', 'ex_window_match_same_generic.sql'),
|
|
179
|
+
('2009', 'Aggregate_Increase', 'redshift', 'ex_aggregate_match_num_incr_generic.sql'),
|
|
180
|
+
|
|
181
|
+
('2101', 'Combo_Match', 'snowflake', 'ex_data_match_generic.sql'),
|
|
182
|
+
('2102', 'Aggregate_Minimum', 'snowflake', 'ex_aggregate_match_no_drops_generic.sql'),
|
|
183
|
+
('2103', 'Distribution_Shift', 'snowflake', 'ex_relative_entropy_generic.sql'),
|
|
184
|
+
('2104', 'CUSTOM', 'snowflake', 'ex_custom_query_generic.sql'),
|
|
185
|
+
('2106', 'Aggregate_Balance', 'snowflake', 'ex_aggregate_match_same_generic.sql'),
|
|
186
|
+
('2107', 'Timeframe_Combo_Gain', 'snowflake', 'ex_window_match_no_drops_generic.sql'),
|
|
187
|
+
('2108', 'Timeframe_Combo_Match', 'snowflake', 'ex_window_match_same_generic.sql'),
|
|
188
|
+
('2109', 'Aggregate_Increase', 'snowflake', 'ex_aggregate_match_num_incr_generic.sql'),
|
|
189
|
+
|
|
190
|
+
('2201', 'Combo_Match', 'mssql', 'ex_data_match_generic.sql'),
|
|
191
|
+
('2202', 'Aggregate_Minimum', 'mssql', 'ex_aggregate_match_no_drops_generic.sql'),
|
|
192
|
+
('2203', 'Distribution_Shift', 'mssql', 'ex_relative_entropy_mssql.sql'),
|
|
193
|
+
('2204', 'CUSTOM', 'mssql', 'ex_custom_query_generic.sql'),
|
|
194
|
+
('2206', 'Aggregate_Balance', 'mssql', 'ex_aggregate_match_same_generic.sql'),
|
|
195
|
+
('2207', 'Timeframe_Combo_Gain', 'mssql', 'ex_window_match_no_drops_generic.sql'),
|
|
196
|
+
('2208', 'Timeframe_Combo_Match', 'mssql', 'ex_window_match_same_generic.sql'),
|
|
197
|
+
('2209', 'Aggregate_Increase', 'mssql', 'ex_aggregate_match_num_incr_generic.sql'),
|
|
198
|
+
|
|
199
|
+
('2301', 'Combo_Match', 'postgresql', 'ex_data_match_generic.sql'),
|
|
200
|
+
('2302', 'Aggregate_Minimum', 'postgresql', 'ex_aggregate_match_no_drops_generic.sql'),
|
|
201
|
+
('2303', 'Distribution_Shift', 'postgresql', 'ex_relative_entropy_generic.sql'),
|
|
202
|
+
('2304', 'CUSTOM', 'postgresql', 'ex_custom_query_generic.sql'),
|
|
203
|
+
('2306', 'Aggregate_Balance', 'postgresql', 'ex_aggregate_match_same_generic.sql'),
|
|
204
|
+
('2307', 'Timeframe_Combo_Gain', 'postgresql', 'ex_window_match_no_drops_postgresql.sql'),
|
|
205
|
+
('2308', 'Timeframe_Combo_Match', 'postgresql', 'ex_window_match_same_postgresql.sql'),
|
|
206
|
+
('2309', 'Aggregate_Increase', 'postgresql', 'ex_aggregate_match_num_incr_generic.sql');
|
|
207
|
+
|
|
208
|
+
TRUNCATE TABLE cat_test_conditions;
|
|
209
|
+
|
|
210
|
+
INSERT INTO cat_test_conditions (id, test_type, sql_flavor, measure, test_operator, test_condition)
|
|
211
|
+
VALUES ('1001', 'Alpha_Trunc', 'redshift', 'MAX(LENGTH({COLUMN_NAME}))', '<', '{THRESHOLD_VALUE}'),
|
|
212
|
+
('1002', 'Avg_Shift', 'redshift', 'ABS( (AVG({COLUMN_NAME}::FLOAT) - {BASELINE_AVG}) / SQRT(((COUNT({COLUMN_NAME})::FLOAT-1)*STDDEV({COLUMN_NAME})^2 + ({BASELINE_VALUE_CT}::FLOAT-1) * {BASELINE_SD}::FLOAT^2) /NULLIF(COUNT({COLUMN_NAME})::FLOAT + {BASELINE_VALUE_CT}::FLOAT, 0) ))', '>=', '{THRESHOLD_VALUE}'),
|
|
213
|
+
('1003', 'Condition_Flag', 'redshift', 'SUM(CASE WHEN {CUSTOM_QUERY} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
214
|
+
('1004', 'Constant', 'redshift', 'SUM(CASE WHEN {COLUMN_NAME} <> {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
215
|
+
('1005', 'Daily_Record_Ct', 'redshift', 'DATEDIFF(''DAY'', MIN({COLUMN_NAME}), MAX({COLUMN_NAME}))+1-COUNT(DISTINCT {COLUMN_NAME})', '>', '{THRESHOLD_VALUE}'),
|
|
216
|
+
('1006', 'Dec_Trunc', 'redshift', 'ROUND(SUM(ABS({COLUMN_NAME})::DECIMAL(18,4) % 1), 0)', '<', '{THRESHOLD_VALUE}'),
|
|
217
|
+
('1007', 'Distinct_Date_Ct', 'redshift', 'COUNT(DISTINCT {COLUMN_NAME})', '<', '{THRESHOLD_VALUE}'),
|
|
218
|
+
('1008', 'Distinct_Value_Ct', 'redshift', 'COUNT(DISTINCT {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'),
|
|
219
|
+
('1009', 'Email_Format', 'redshift', 'SUM(CASE WHEN {COLUMN_NAME} !~ ''^[A-Za-z0-9._''''%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
220
|
+
('1010', 'Future_Date', 'redshift', 'SUM(GREATEST(0, SIGN({COLUMN_NAME}::DATE - ''{RUN_DATE}''::DATE)))', '>', '{THRESHOLD_VALUE}'),
|
|
221
|
+
('1011', 'Future_Date_1Y', 'redshift', 'SUM(GREATEST(0, SIGN({COLUMN_NAME}::DATE - (''{RUN_DATE}''::DATE+365))))', '>', '{THRESHOLD_VALUE}'),
|
|
222
|
+
('1012', 'Incr_Avg_Shift', 'redshift', 'NVL(ABS( ({BASELINE_AVG} - (SUM({COLUMN_NAME}) - {BASELINE_SUM}) / NULLIF(COUNT({COLUMN_NAME})::FLOAT - {BASELINE_VALUE_CT}, 0)) / {BASELINE_SD} ), 0)', '>=', '{THRESHOLD_VALUE}'),
|
|
223
|
+
('1013', 'LOV_All', 'redshift', 'LISTAGG(DISTINCT {COLUMN_NAME}, ''|'') WITHIN GROUP (ORDER BY {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'),
|
|
224
|
+
('1014', 'LOV_Match', 'redshift', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
225
|
+
('1015', 'Min_Date', 'redshift', 'SUM(CASE WHEN {COLUMN_NAME} < ''{BASELINE_VALUE}'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
226
|
+
('1016', 'Min_Val', 'redshift', 'SUM(CASE WHEN {COLUMN_NAME} < {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
227
|
+
('1017', 'Missing_Pct', 'redshift', 'ABS( 2.0 * ASIN( SQRT( {BASELINE_VALUE_CT}::FLOAT / {BASELINE_CT}::FLOAT ) ) - 2 * ASIN( SQRT( COUNT( {COLUMN_NAME} )::FLOAT / NULLIF(COUNT(*), 0)::FLOAT )) )', '>=', '{THRESHOLD_VALUE}'),
|
|
228
|
+
('1018', 'Monthly_Rec_Ct', 'redshift', '(MAX(DATEDIFF(month, {COLUMN_NAME}, ''{RUN_DATE}''::DATE)) - MIN(DATEDIFF(month, {COLUMN_NAME}, ''{RUN_DATE}''::DATE)) + 1) - COUNT(DISTINCT DATEDIFF(month, {COLUMN_NAME}, ''{RUN_DATE}''::DATE))', '>', '{THRESHOLD_VALUE}'),
|
|
229
|
+
('1019', 'Outlier_Pct_Above', 'redshift', 'SUM(CASE WHEN {COLUMN_NAME}::FLOAT > {BASELINE_AVG}+(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END)::FLOAT / NULLIF(COUNT({COLUMN_NAME}), 0)::FLOAT', '>', '{THRESHOLD_VALUE}'),
|
|
230
|
+
('1020', 'Outlier_Pct_Below', 'redshift', 'SUM(CASE WHEN {COLUMN_NAME}::FLOAT < {BASELINE_AVG}-(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END)::FLOAT / NULLIF(COUNT({COLUMN_NAME}), 0)::FLOAT', '>', '{THRESHOLD_VALUE}'),
|
|
231
|
+
('1021', 'Pattern_Match', 'redshift', 'COUNT(NULLIF({COLUMN_NAME}, '''')) - SUM((NULLIF({COLUMN_NAME}, '''') SIMILAR TO ''{BASELINE_VALUE}'')::BIGINT)', '>', '{THRESHOLD_VALUE}'),
|
|
232
|
+
('1022', 'Recency', 'redshift', 'DATEDIFF(''D'', MAX({COLUMN_NAME}), ''{RUN_DATE}''::DATE)', '>', '{THRESHOLD_VALUE}'),
|
|
233
|
+
('1023', 'Required', 'redshift', 'COUNT(*) - COUNT( {COLUMN_NAME} )', '>', '{THRESHOLD_VALUE}'),
|
|
234
|
+
('1024', 'Row_Ct', 'redshift', 'COUNT(*)', '<', '{THRESHOLD_VALUE}'),
|
|
235
|
+
('1025', 'Row_Ct_Pct', 'redshift', 'ABS(ROUND(100.0 * (COUNT(*) - {BASELINE_CT})::FLOAT / {BASELINE_CT}::FLOAT, 2))', '>', '{THRESHOLD_VALUE}'),
|
|
236
|
+
('1026', 'Street_Addr_Pattern', 'redshift', '100.0*SUM(({COLUMN_NAME} ~ ''^[0-9]{1,5}[a-zA-Z]?\\s\\w{1,5}\\.?\\s?\\w*\\s?\\w*\\s[a-zA-Z]{1,6}\\.?\\s?[0-9]{0,5}[A-Z]{0,1}$'')::BIGINT)::FLOAT / NULLIF(COUNT({COLUMN_NAME}), 0)::FLOAT', '<', '{THRESHOLD_VALUE}'),
|
|
237
|
+
('1027', 'US_State', 'redshift', 'SUM(CASE WHEN {COLUMN_NAME} NOT IN ('''',''AL'',''AK'',''AS'',''AZ'',''AR'',''CA'',''CO'',''CT'',''DE'',''DC'',''FM'',''FL'',''GA'',''GU'',''HI'',''ID'',''IL'',''IN'',''IA'',''KS'',''KY'',''LA'',''ME'',''MH'',''MD'',''MA'',''MI'',''MN'',''MS'',''MO'',''MT'',''NE'',''NV'',''NH'',''NJ'',''NM'',''NY'',''NC'',''ND'',''MP'',''OH'',''OK'',''OR'',''PW'',''PA'',''PR'',''RI'',''SC'',''SD'',''TN'',''TX'',''UT'',''VT'',''VI'',''VA'',''WA'',''WV'',''WI'',''WY'',''AE'',''AP'',''AA'') THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
238
|
+
('1028', 'Unique', 'redshift', 'COUNT(*) - COUNT(DISTINCT {COLUMN_NAME})', '>', '{THRESHOLD_VALUE}'),
|
|
239
|
+
('1029', 'Unique_Pct', 'redshift', 'ABS( 2.0 * ASIN( SQRT({BASELINE_UNIQUE_CT}::FLOAT / {BASELINE_VALUE_CT}::FLOAT ) ) - 2 * ASIN( SQRT( COUNT( DISTINCT {COLUMN_NAME} )::FLOAT / NULLIF(COUNT( {COLUMN_NAME} ), 0)::FLOAT )) )', '>=', '{THRESHOLD_VALUE}'),
|
|
240
|
+
('1030', 'Weekly_Rec_Ct', 'redshift', 'MAX(DATEDIFF(week, ''1800-01-01''::DATE, {COLUMN_NAME})) - MIN(DATEDIFF(week, ''1800-01-01''::DATE, {COLUMN_NAME}))+1 - COUNT(DISTINCT DATEDIFF(week, ''1800-01-01''::DATE, {COLUMN_NAME}))', '>', '{THRESHOLD_VALUE}'),
|
|
241
|
+
('2001', 'Alpha_Trunc', 'snowflake', 'MAX(LENGTH({COLUMN_NAME}))', '<', '{THRESHOLD_VALUE}'),
|
|
242
|
+
('2002', 'Avg_Shift', 'snowflake', 'ABS( (AVG({COLUMN_NAME}::FLOAT) - {BASELINE_AVG}) / SQRT(((COUNT({COLUMN_NAME})::FLOAT-1)*POWER(STDDEV({COLUMN_NAME}),2) + ({BASELINE_VALUE_CT}::FLOAT-1) * POWER({BASELINE_SD}::FLOAT,2)) /NULLIF(COUNT({COLUMN_NAME})::FLOAT + {BASELINE_VALUE_CT}::FLOAT, 0) ))', '>=', '{THRESHOLD_VALUE}'),
|
|
243
|
+
('2003', 'Condition_Flag', 'snowflake', 'SUM(CASE WHEN {CUSTOM_QUERY} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
244
|
+
('2004', 'Constant', 'snowflake', 'SUM(CASE WHEN {COLUMN_NAME} <> {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
245
|
+
('2005', 'Daily_Record_Ct', 'snowflake', 'DATEDIFF(day, MIN({COLUMN_NAME}), MAX({COLUMN_NAME}))+1-COUNT(DISTINCT {COLUMN_NAME})', '<', '{THRESHOLD_VALUE}'),
|
|
246
|
+
('2006', 'Dec_Trunc', 'snowflake', 'ROUND(SUM(ABS({COLUMN_NAME})::DECIMAL(18,4) % 1), 0)', '<', '{THRESHOLD_VALUE}'),
|
|
247
|
+
('2007', 'Distinct_Date_Ct', 'snowflake', 'COUNT(DISTINCT {COLUMN_NAME})', '<', '{THRESHOLD_VALUE}'),
|
|
248
|
+
('2008', 'Distinct_Value_Ct', 'snowflake', 'COUNT(DISTINCT {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'),
|
|
249
|
+
('2009', 'Email_Format', 'snowflake', 'SUM(CASE WHEN NOT REGEXP_LIKE({COLUMN_NAME}::VARCHAR, ''^[A-Za-z0-9._''''%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$'') THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
250
|
+
('2010', 'Future_Date', 'snowflake', 'SUM(GREATEST(0, SIGN({COLUMN_NAME}::DATE - ''{RUN_DATE}''::DATE)))', '>', '{THRESHOLD_VALUE}'),
|
|
251
|
+
('2011', 'Future_Date_1Y', 'snowflake', 'SUM(GREATEST(0, SIGN({COLUMN_NAME}::DATE - (''{RUN_DATE}''::DATE+365))))', '>', '{THRESHOLD_VALUE}'),
|
|
252
|
+
('2012', 'Incr_Avg_Shift', 'snowflake', 'COALESCE(ABS( ({BASELINE_AVG} - (SUM({COLUMN_NAME}) - {BASELINE_SUM}) / NULLIF(COUNT({COLUMN_NAME})::FLOAT - {BASELINE_VALUE_CT}, 0)) / {BASELINE_SD} ), 0)', '>=', '{THRESHOLD_VALUE}'),
|
|
253
|
+
('2013', 'LOV_All', 'snowflake', 'LISTAGG(DISTINCT {COLUMN_NAME}, ''|'') WITHIN GROUP (ORDER BY {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'),
|
|
254
|
+
('2014', 'LOV_Match', 'snowflake', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
255
|
+
('2015', 'Min_Date', 'snowflake', 'SUM(CASE WHEN {COLUMN_NAME} < ''{BASELINE_VALUE}'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
256
|
+
('2016', 'Min_Val', 'snowflake', 'SUM(CASE WHEN {COLUMN_NAME} < {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
257
|
+
('2017', 'Missing_Pct', 'snowflake', 'ABS( 2.0 * ASIN( SQRT( {BASELINE_VALUE_CT}::FLOAT / {BASELINE_CT}::FLOAT ) ) - 2 * ASIN( SQRT( COUNT( {COLUMN_NAME} )::FLOAT / NULLIF(COUNT(*), 0)::FLOAT )) )', '>=', '{THRESHOLD_VALUE}'),
|
|
258
|
+
('2018', 'Monthly_Rec_Ct', 'snowflake', '(MAX(DATEDIFF(month, {COLUMN_NAME}, ''{RUN_DATE}''::DATE)) - MIN(DATEDIFF(month, {COLUMN_NAME}, ''{RUN_DATE}''::DATE)) + 1) - COUNT(DISTINCT DATEDIFF(month, {COLUMN_NAME}, ''{RUN_DATE}''::DATE))', '>', '{THRESHOLD_VALUE}'),
|
|
259
|
+
('2019', 'Outlier_Pct_Above', 'snowflake', 'SUM(CASE WHEN {COLUMN_NAME}::FLOAT > {BASELINE_AVG}+(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END)::FLOAT / NULLIF(COUNT({COLUMN_NAME}), 0)::FLOAT', '>', '{THRESHOLD_VALUE}'),
|
|
260
|
+
('2020', 'Outlier_Pct_Below', 'snowflake', 'SUM(CASE WHEN {COLUMN_NAME}::FLOAT < {BASELINE_AVG}-(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END)::FLOAT / NULLIF(COUNT({COLUMN_NAME}), 0)::FLOAT', '>', '{THRESHOLD_VALUE}'),
|
|
261
|
+
('2021', 'Pattern_Match', 'snowflake', 'COUNT(NULLIF({COLUMN_NAME}, '''')) - SUM(REGEXP_LIKE(NULLIF({COLUMN_NAME}::VARCHAR, ''''), ''{BASELINE_VALUE}'')::BIGINT)', '>', '{THRESHOLD_VALUE}'),
|
|
262
|
+
('2022', 'Recency', 'snowflake', 'DATEDIFF(''D'', MAX({COLUMN_NAME}), ''{RUN_DATE}''::DATE)', '>', '{THRESHOLD_VALUE}'),
|
|
263
|
+
('2023', 'Required', 'snowflake', 'COUNT(*) - COUNT( {COLUMN_NAME} )', '>', '{THRESHOLD_VALUE}'),
|
|
264
|
+
('2024', 'Row_Ct', 'snowflake', 'COUNT(*)', '<', '{THRESHOLD_VALUE}'),
|
|
265
|
+
('2025', 'Row_Ct_Pct', 'snowflake', 'ABS(ROUND(100.0 * (COUNT(*) - {BASELINE_CT})::FLOAT / {BASELINE_CT}::FLOAT, 2))', '>', '{THRESHOLD_VALUE}'),
|
|
266
|
+
('2026', 'Street_Addr_Pattern', 'snowflake', '100.0*SUM((regexp_like({COLUMN_NAME}::VARCHAR, ''^[0-9]{1,5}[a-zA-Z]?\\s\\w{1,5}\\.?\\s?\\w*\\s?\\w*\\s[a-zA-Z]{1,6}\\.?\\s?[0-9]{0,5}[A-Z]{0,1}$''))::BIGINT)::FLOAT / NULLIF(COUNT({COLUMN_NAME}), 0)::FLOAT', '<', '{THRESHOLD_VALUE}'),
|
|
267
|
+
('2027', 'US_State', 'snowflake', 'SUM(CASE WHEN {COLUMN_NAME} NOT IN ('''',''AL'',''AK'',''AS'',''AZ'',''AR'',''CA'',''CO'',''CT'',''DE'',''DC'',''FM'',''FL'',''GA'',''GU'',''HI'',''ID'',''IL'',''IN'',''IA'',''KS'',''KY'',''LA'',''ME'',''MH'',''MD'',''MA'',''MI'',''MN'',''MS'',''MO'',''MT'',''NE'',''NV'',''NH'',''NJ'',''NM'',''NY'',''NC'',''ND'',''MP'',''OH'',''OK'',''OR'',''PW'',''PA'',''PR'',''RI'',''SC'',''SD'',''TN'',''TX'',''UT'',''VT'',''VI'',''VA'',''WA'',''WV'',''WI'',''WY'',''AE'',''AP'',''AA'') THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
268
|
+
('2028', 'Unique', 'snowflake', 'COUNT(*) - COUNT(DISTINCT {COLUMN_NAME})', '>', '{THRESHOLD_VALUE}'),
|
|
269
|
+
('2029', 'Unique_Pct', 'snowflake', 'ABS( 2.0 * ASIN( SQRT({BASELINE_UNIQUE_CT}::FLOAT / {BASELINE_VALUE_CT}::FLOAT ) ) - 2 * ASIN( SQRT( COUNT( DISTINCT {COLUMN_NAME} )::FLOAT / NULLIF(COUNT( {COLUMN_NAME} ), 0)::FLOAT )) )', '>=', '{THRESHOLD_VALUE}'),
|
|
270
|
+
('2030', 'Weekly_Rec_Ct', 'snowflake', 'MAX(DATEDIFF(week, ''1800-01-01''::DATE, {COLUMN_NAME})) - MIN(DATEDIFF(week, ''1800-01-01''::DATE, {COLUMN_NAME}))+1 - COUNT(DISTINCT DATEDIFF(week, ''1800-01-01''::DATE, {COLUMN_NAME}))', '>', '{THRESHOLD_VALUE}'),
|
|
271
|
+
('3001', 'Alpha_Trunc', 'mssql', 'MAX(LEN({COLUMN_NAME}))', '<', '{THRESHOLD_VALUE}'),
|
|
272
|
+
('3002', 'Avg_Shift', 'mssql', 'ABS( (AVG(CAST({COLUMN_NAME} AS FLOAT)) - {BASELINE_AVG}) / SQRT(((COUNT({COLUMN_NAME})-1)*POWER(STDEV({COLUMN_NAME}), 2) + ({BASELINE_VALUE_CT}-1) * POWER({BASELINE_SD}, 2)) /NULLIF(COUNT({COLUMN_NAME}) + {BASELINE_VALUE_CT}, 0) ))', '>=', '{THRESHOLD_VALUE}'),
|
|
273
|
+
('3003', 'Condition_Flag', 'mssql', 'SUM(CASE WHEN {CUSTOM_QUERY} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
274
|
+
('3004', 'Constant', 'mssql', 'SUM(CASE WHEN {COLUMN_NAME} <> {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
275
|
+
('3005', 'Daily_Record_Ct', 'mssql', 'DATEDIFF(day, MIN({COLUMN_NAME}), MAX({COLUMN_NAME}))+1-COUNT(DISTINCT {COLUMN_NAME})', '<', '{THRESHOLD_VALUE}'),
|
|
276
|
+
('3006', 'Dec_Trunc', 'mssql', 'ROUND(SUM(ABS(CAST({COLUMN_NAME} AS DECIMAL(18,4))) % 1), 0)', '<', '{THRESHOLD_VALUE}'),
|
|
277
|
+
('3007', 'Distinct_Date_Ct', 'mssql', 'COUNT(DISTINCT {COLUMN_NAME})', '<', '{THRESHOLD_VALUE}'),
|
|
278
|
+
('3008', 'Distinct_Value_Ct', 'mssql', 'COUNT(DISTINCT {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'),
|
|
279
|
+
('3009', 'Email_Format', 'mssql', 'SUM(CASE WHEN {COLUMN_NAME} NOT LIKE ''[A-Za-z0-9._''''%+-]%@[A-Za-z0-9.-]%.[A-Za-z][A-Za-z]%'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
280
|
+
('3010', 'Future_Date', 'mssql', 'SUM(CASE WHEN CAST({COLUMN_NAME} AS DATE) >= CONVERT(DATE, ''{RUN_DATE}'') THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
281
|
+
('3011', 'Future_Date_1Y', 'mssql', 'SUM(CASE WHEN CAST({COLUMN_NAME} AS DATE) >= DATEADD(DAY, 365, CONVERT(DATE, ''{RUN_DATE}'')) THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
282
|
+
('3012', 'Incr_Avg_Shift', 'mssql', 'COALESCE(ABS( ({BASELINE_AVG} - (SUM({COLUMN_NAME}) - {BASELINE_SUM}) / NULLIF(CAST(COUNT({COLUMN_NAME}) AS FLOAT) - {BASELINE_VALUE_CT}, 0)) / {BASELINE_SD} ), 0)', '>=', '{THRESHOLD_VALUE}'),
|
|
283
|
+
('3013', 'LOV_All', 'mssql', 'STRING_AGG(DISTINCT {COLUMN_NAME}, ''|'') WITHIN GROUP (ORDER BY {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'),
|
|
284
|
+
('3014', 'LOV_Match', 'mssql', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
285
|
+
('3015', 'Min_Date', 'mssql', 'SUM(CASE WHEN {COLUMN_NAME} < ''{BASELINE_VALUE}'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
286
|
+
('3016', 'Min_Val', 'mssql', 'SUM(CASE WHEN {COLUMN_NAME} < {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
287
|
+
('3017', 'Missing_Pct', 'mssql', 'ABS( 2.0 * ASIN( SQRT( CAST({BASELINE_VALUE_CT} AS FLOAT) / CAST({BASELINE_CT} AS FLOAT) ) ) - 2 * ASIN( SQRT( CAST(COUNT( {COLUMN_NAME} ) AS FLOAT) / CAST(NULLIF(COUNT(*), 0) AS FLOAT) )) )', '>=', '{THRESHOLD_VALUE}'),
|
|
288
|
+
('3018', 'Monthly_Rec_Ct', 'mssql', '(MAX(DATEDIFF(month, {COLUMN_NAME}, CAST(''{RUN_DATE}''AS DATE))) - MIN(DATEDIFF(month, {COLUMN_NAME}, CAST(''{RUN_DATE}'' AS DATE))) + 1) - COUNT(DISTINCT DATEDIFF(month, {COLUMN_NAME}, CAST(''{RUN_DATE}''AS DATE)))', '>', '{THRESHOLD_VALUE}'),
|
|
289
|
+
('3019', 'Outlier_Pct_Above', 'mssql', 'CAST(SUM(CASE WHEN CAST({COLUMN_NAME} AS FLOAT) > {BASELINE_AVG}+(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END) AS FLOAT) / CAST(COUNT({COLUMN_NAME}) AS FLOAT)', '>', '{THRESHOLD_VALUE}'),
|
|
290
|
+
('3020', 'Outlier_Pct_Below', 'mssql', 'CAST(SUM(CASE WHEN CAST( {COLUMN_NAME} AS FLOAT) < {BASELINE_AVG}-(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END) AS FLOAT) / CAST(COUNT({COLUMN_NAME}) AS FLOAT)', '>', '{THRESHOLD_VALUE}'),
|
|
291
|
+
('3021', 'Pattern_Match', 'mssql', 'COUNT(NULLIF({COLUMN_NAME}, '''')) - CAST(SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') LIKE ''{BASELINE_VALUE}'' THEN 1 ELSE 0 END) AS BIGINT)', '>', '{THRESHOLD_VALUE}'),
|
|
292
|
+
('3022', 'Recency', 'mssql', 'DATEDIFF(day, MAX({COLUMN_NAME}), CAST(''{RUN_DATE}''AS DATE))', '>', '{THRESHOLD_VALUE}'),
|
|
293
|
+
('3023', 'Required', 'mssql', 'COUNT(*) - COUNT( {COLUMN_NAME} )', '>', '{THRESHOLD_VALUE}'),
|
|
294
|
+
('3024', 'Row_Ct', 'mssql', 'COUNT(*)', '<', '{THRESHOLD_VALUE}'),
|
|
295
|
+
('3025', 'Row_Ct_Pct', 'mssql', 'ABS(ROUND(100.0 * CAST((COUNT(*) - {BASELINE_CT} ) AS FLOAT)/ CAST({BASELINE_CT} AS FLOAT, 2)))', '>', '{THRESHOLD_VALUE}'),
|
|
296
|
+
('3026', 'Street_Addr_Pattern', 'mssql', 'CAST(100.0*SUM(CASE WHEN UPPER({COLUMN_NAME}) LIKE ''[1-9]% [A-Z]% %'' AND CHARINDEX('' '', {COLUMN_NAME}) BETWEEN 2 AND 6 THEN 1 ELSE 0 END) as FLOAT) /CAST(COUNT({COLUMN_NAME}) AS FLOAT)', '<', '{THRESHOLD_VALUE}'),
|
|
297
|
+
('3027', 'US_State', 'mssql', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN (''AL'',''AK'',''AS'',''AZ'',''AR'',''CA'',''CO'',''CT'',''DE'',''DC'',''FM'',''FL'',''GA'',''GU'',''HI'',''ID'',''IL'',''IN'',''IA'',''KS'',''KY'',''LA'',''ME'',''MH'',''MD'',''MA'',''MI'',''MN'',''MS'',''MO'',''MT'',''NE'',''NV'',''NH'',''NJ'',''NM'',''NY'',''NC'',''ND'',''MP'',''OH'',''OK'',''OR'',''PW'',''PA'',''PR'',''RI'',''SC'',''SD'',''TN'',''TX'',''UT'',''VT'',''VI'',''VA'',''WA'',''WV'',''WI'',''WY'',''AE'',''AP'',''AA'') THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
298
|
+
('3028', 'Unique', 'mssql', 'COUNT(*) - COUNT(DISTINCT {COLUMN_NAME})', '>', '{THRESHOLD_VALUE}'),
|
|
299
|
+
('3029', 'Unique_Pct', 'mssql', 'ABS( 2.0 * ASIN( SQRT(CAST({BASELINE_UNIQUE_CT} AS FLOAT) / CAST({BASELINE_VALUE_CT} AS FLOAT) ) ) - 2 * ASIN( SQRT( CAST(COUNT( DISTINCT {COLUMN_NAME} ) AS FLOAT) / CAST(NULLIF(COUNT( {COLUMN_NAME} ), 0) AS FLOAT) )) )', '>=', '{THRESHOLD_VALUE}'),
|
|
300
|
+
('3030', 'Weekly_Rec_Ct', 'mssql', 'MAX(DATEDIFF(week, CAST(''1800-01-01'' AS DATE), "{COLUMN_NAME}")) - MIN(DATEDIFF(week, CAST(''1800-01-01'' AS DATE), "{COLUMN_NAME}"))+1 - COUNT(DISTINCT DATEDIFF(week, CAST(''1800-01-01'' AS DATE), "{COLUMN_NAME}"))', '>', '{THRESHOLD_VALUE}'),
|
|
301
|
+
('4001', 'Alpha_Trunc', 'postgresql', 'MAX(LENGTH({COLUMN_NAME}))', '<', '{THRESHOLD_VALUE}'),
|
|
302
|
+
('4002', 'Avg_Shift', 'postgresql', 'ABS( (AVG({COLUMN_NAME}::FLOAT) - {BASELINE_AVG}) / SQRT(((COUNT({COLUMN_NAME})::FLOAT-1)*STDDEV({COLUMN_NAME})^2 + ({BASELINE_VALUE_CT}::FLOAT-1) * {BASELINE_SD}::FLOAT^2) /NULLIF(COUNT({COLUMN_NAME})::FLOAT + {BASELINE_VALUE_CT}::FLOAT, 0) ))', '>=', '{THRESHOLD_VALUE}'),
|
|
303
|
+
('4003', 'Condition_Flag', 'postgresql', 'SUM(CASE WHEN {CUSTOM_QUERY} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
304
|
+
('4004', 'Constant', 'postgresql', 'SUM(CASE WHEN {COLUMN_NAME} <> {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
305
|
+
('4005', 'Daily_Record_Ct', 'postgresql', '{DATA_QC_SCHEMA}.DATEDIFF(''DAY'', MIN({COLUMN_NAME}), MAX({COLUMN_NAME}))+1-COUNT(DISTINCT {COLUMN_NAME})', '>', '{THRESHOLD_VALUE}'),
|
|
306
|
+
('4006', 'Dec_Trunc', 'postgresql', 'ROUND(SUM(ABS({COLUMN_NAME})::DECIMAL(18,4) % 1), 0)', '<', '{THRESHOLD_VALUE}'),
|
|
307
|
+
('4007', 'Distinct_Date_Ct', 'postgresql', 'COUNT(DISTINCT {COLUMN_NAME})', '<', '{THRESHOLD_VALUE}'),
|
|
308
|
+
('4008', 'Distinct_Value_Ct', 'postgresql', 'COUNT(DISTINCT {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'),
|
|
309
|
+
('4009', 'Email_Format', 'postgresql', 'SUM(CASE WHEN {COLUMN_NAME} !~ ''^[A-Za-z0-9._''''%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
310
|
+
('4010', 'Future_Date', 'postgresql', 'SUM(GREATEST(0, SIGN({COLUMN_NAME}::DATE - ''{RUN_DATE}''::DATE)))', '>', '{THRESHOLD_VALUE}'),
|
|
311
|
+
('4011', 'Future_Date_1Y', 'postgresql', 'SUM(GREATEST(0, SIGN({COLUMN_NAME}::DATE - (''{RUN_DATE}''::DATE+365))))', '>', '{THRESHOLD_VALUE}'),
|
|
312
|
+
('4012', 'Incr_Avg_Shift', 'postgresql', 'COALESCE(ABS( ({BASELINE_AVG} - (SUM({COLUMN_NAME}) - {BASELINE_SUM}) / NULLIF(COUNT({COLUMN_NAME})::FLOAT - {BASELINE_VALUE_CT}, 0)) / {BASELINE_SD} ), 0)', '>=', '{THRESHOLD_VALUE}'),
|
|
313
|
+
('4013', 'LOV_All', 'postgresql', 'STRING_AGG(DISTINCT {COLUMN_NAME}, ''|'') WITHIN GROUP (ORDER BY {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'),
|
|
314
|
+
('4014', 'LOV_Match', 'postgresql', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
315
|
+
('4015', 'Min_Date', 'postgresql', 'SUM(CASE WHEN {COLUMN_NAME} < ''{BASELINE_VALUE}'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
316
|
+
('4016', 'Min_Val', 'postgresql', 'SUM(CASE WHEN {COLUMN_NAME} < {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
317
|
+
('4017', 'Missing_Pct', 'postgresql', 'ABS( 2.0 * ASIN( SQRT( {BASELINE_VALUE_CT}::FLOAT / {BASELINE_CT}::FLOAT ) ) - 2 * ASIN( SQRT( COUNT( {COLUMN_NAME} )::FLOAT / NULLIF(COUNT(*), 0)::FLOAT )) )', '>=', '{THRESHOLD_VALUE}'),
|
|
318
|
+
('4018', 'Monthly_Rec_Ct', 'postgresql', '(MAX({DATA_QC_SCHEMA}.DATEDIFF(''MON'', {COLUMN_NAME}, ''{RUN_DATE}''::DATE)) - MIN({DATA_QC_SCHEMA}.DATEDIFF(''MON'', {COLUMN_NAME}, ''{RUN_DATE}''::DATE)) + 1) - COUNT(DISTINCT {DATA_QC_SCHEMA}.DATEDIFF(''MON'', {COLUMN_NAME}, ''{RUN_DATE}''::DATE))', '>', '{THRESHOLD_VALUE}'),
|
|
319
|
+
('4019', 'Outlier_Pct_Above', 'postgresql', 'SUM(CASE WHEN {COLUMN_NAME}::FLOAT > {BASELINE_AVG}+(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END)::FLOAT / NULLIF(COUNT({COLUMN_NAME}), 0)::FLOAT', '>', '{THRESHOLD_VALUE}'),
|
|
320
|
+
('4020', 'Outlier_Pct_Below', 'postgresql', 'SUM(CASE WHEN {COLUMN_NAME}::FLOAT < {BASELINE_AVG}-(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END)::FLOAT / NULLIF(COUNT({COLUMN_NAME}), 0)::FLOAT', '>', '{THRESHOLD_VALUE}'),
|
|
321
|
+
('4021', 'Pattern_Match', 'postgresql', 'COUNT(NULLIF({COLUMN_NAME}, '''')) - SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') ~ ''{BASELINE_VALUE}'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
322
|
+
('4022', 'Recency', 'postgresql', '{DATA_QC_SCHEMA}.DATEDIFF(''DAY'', MAX({COLUMN_NAME}), ''{RUN_DATE}''::DATE)', '>', '{THRESHOLD_VALUE}'),
|
|
323
|
+
('4023', 'Required', 'postgresql', 'COUNT(*) - COUNT({COLUMN_NAME})', '>', '{THRESHOLD_VALUE}'),
|
|
324
|
+
('4024', 'Row_Ct', 'postgresql', 'COUNT(*)', '<', '{THRESHOLD_VALUE}'),
|
|
325
|
+
('4025', 'Row_Ct_Pct', 'postgresql', 'ABS(ROUND(100.0 * (COUNT(*) - {BASELINE_CT})::DECIMAL(18,4) / {BASELINE_CT}::DECIMAL(18,4), 2))', '>', '{THRESHOLD_VALUE}'),
|
|
326
|
+
('4026', 'Street_Addr_Pattern', 'postgresql', '100.0*SUM(CASE WHEN {COLUMN_NAME} ~ ''^[0-9]{1,5}[a-zA-Z]?\s\w{1,5}\.?\s?\w*\s?\w*\s[a-zA-Z]{1,6}\.?\s?[0-9]{0,5}[A-Z]{0,1}$'' THEN 1 ELSE 0 END)::FLOAT / NULLIF(COUNT({COLUMN_NAME}), 0)::FLOAT', '<', '{THRESHOLD_VALUE}'),
|
|
327
|
+
('4027', 'US_State', 'postgresql', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN (''AL'',''AK'',''AS'',''AZ'',''AR'',''CA'',''CO'',''CT'',''DE'',''DC'',''FM'',''FL'',''GA'',''GU'',''HI'',''ID'',''IL'',''IN'',''IA'',''KS'',''KY'',''LA'',''ME'',''MH'',''MD'',''MA'',''MI'',''MN'',''MS'',''MO'',''MT'',''NE'',''NV'',''NH'',''NJ'',''NM'',''NY'',''NC'',''ND'',''MP'',''OH'',''OK'',''OR'',''PW'',''PA'',''PR'',''RI'',''SC'',''SD'',''TN'',''TX'',''UT'',''VT'',''VI'',''VA'',''WA'',''WV'',''WI'',''WY'',''AE'',''AP'',''AA'') THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
328
|
+
('4028', 'Unique', 'postgresql', 'COUNT(*) - COUNT(DISTINCT {COLUMN_NAME})', '>', '{THRESHOLD_VALUE}'),
|
|
329
|
+
('4029', 'Unique_Pct', 'postgresql', 'ABS( 2.0 * ASIN( SQRT({BASELINE_UNIQUE_CT}::FLOAT / {BASELINE_VALUE_CT}::FLOAT ) ) - 2 * ASIN( SQRT( COUNT( DISTINCT {COLUMN_NAME} )::FLOAT / NULLIF(COUNT( {COLUMN_NAME} ), 0)::FLOAT )) )', '>=', '{THRESHOLD_VALUE}'),
|
|
330
|
+
('4030', 'Weekly_Rec_Ct', 'postgresql', 'MAX({DATA_QC_SCHEMA}.DATEDIFF(''WEEK'', ''1800-01-01''::DATE, {COLUMN_NAME})) - MIN({DATA_QC_SCHEMA}.DATEDIFF(''WEEK'', ''1800-01-01''::DATE, {COLUMN_NAME}))+1 - COUNT(DISTINCT {DATA_QC_SCHEMA}.DATEDIFF(''WEEK'', ''1800-01-01''::DATE, {COLUMN_NAME}))', '>', '{THRESHOLD_VALUE}'),
|
|
331
|
+
('1031', 'Variability_Increase', 'redshift', '100.0*STDDEV(CAST("{COLUMN_NAME}" AS FLOAT))/{BASELINE_SD}', '>', '{THRESHOLD_VALUE}'),
|
|
332
|
+
('1032', 'Variability_Decrease', 'redshift', '100.0*STDDEV(CAST("{COLUMN_NAME}" AS FLOAT))/{BASELINE_SD}', '<', '{THRESHOLD_VALUE}'),
|
|
333
|
+
('2031', 'Variability_Increase', 'snowflake', '100.0*STDDEV(CAST("{COLUMN_NAME}" AS FLOAT))/{BASELINE_SD}', '>', '{THRESHOLD_VALUE}'),
|
|
334
|
+
('2032', 'Variability_Decrease', 'snowflake', '100.0*STDDEV(CAST("{COLUMN_NAME}" AS FLOAT))/{BASELINE_SD}', '<', '{THRESHOLD_VALUE}'),
|
|
335
|
+
('3031', 'Variability_Increase', 'mssql', '100.0*STDEV(CAST("{COLUMN_NAME}" AS FLOAT))/{BASELINE_SD}', '>', '{THRESHOLD_VALUE}'),
|
|
336
|
+
('3032', 'Variability_Decrease', 'mssql', '100.0*STDEV(CAST("{COLUMN_NAME}" AS FLOAT))/{BASELINE_SD}', '<', '{THRESHOLD_VALUE}'),
|
|
337
|
+
('4031', 'Variability_Increase', 'postgresql', '100.0*STDDEV(CAST("{COLUMN_NAME}" AS FLOAT))/{BASELINE_SD}', '>', '{THRESHOLD_VALUE}'),
|
|
338
|
+
('4032', 'Variability_Decrease', 'postgresql', '100.0*STDDEV(CAST("{COLUMN_NAME}" AS FLOAT))/{BASELINE_SD}', '<', '{THRESHOLD_VALUE}'),
|
|
339
|
+
|
|
340
|
+
('5001', 'Alpha_Trunc', 'trino', 'MAX(LENGTH({COLUMN_NAME}))', '<', '{THRESHOLD_VALUE}'),
|
|
341
|
+
('5002', 'Avg_Shift', 'trino', 'ABS( (CAST(AVG({COLUMN_NAME} AS REAL)) - {BASELINE_AVG}) / SQRT(((CAST(COUNT({COLUMN_NAME}) AS REAL)-1)*STDDEV({COLUMN_NAME})^2 + (CAST({BASELINE_VALUE_CT} AS REAL)-1) * CAST({BASELINE_SD} AS REAL)^2) /NULLIF(CAST(COUNT({COLUMN_NAME}) AS REAL) + CAST({BASELINE_VALUE_CT} AS REAL), 0) ))', '>=', '{THRESHOLD_VALUE}'),
|
|
342
|
+
('5003', 'Condition_Flag', 'trino', 'SUM(CASE WHEN {BASELINE_VALUE} IS NOT NULL THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
343
|
+
('5004', 'Constant', 'trino', 'SUM(CASE WHEN {COLUMN_NAME} <> {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
344
|
+
('5005', 'Daily_Record_Ct', 'trino', 'DATE_DIFF(''DAY'', MIN({COLUMN_NAME}), MAX({COLUMN_NAME}))+1-COUNT(DISTINCT {COLUMN_NAME})', '>', '{THRESHOLD_VALUE}'),
|
|
345
|
+
('5006', 'Dec_Trunc', 'trino', 'ROUND(SUM(ABS(CAST({COLUMN_NAME} AS DECIMAL(18,4))) % 1), 0)', '<', '{THRESHOLD_VALUE}'),
|
|
346
|
+
('5007', 'Distinct_Date_Ct', 'trino', 'COUNT(DISTINCT {COLUMN_NAME})', '<', '{THRESHOLD_VALUE}'),
|
|
347
|
+
('5008', 'Distinct_Value_Ct', 'trino', 'COUNT(DISTINCT {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'),
|
|
348
|
+
('5009', 'Email_Format', 'trino', 'SUM(CASE WHEN REGEXP_LIKE({COLUMN_NAME} , ''^[A-Za-z0-9._''''%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$'') != TRUE THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
349
|
+
('5010', 'Future_Date', 'trino', 'SUM(CASE WHEN CAST({COLUMN_NAME} AS DATE) >= CAST(''{RUN_DATE}'' AS DATE) THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
350
|
+
('5011', 'Future_Date_1Y', 'trino', 'SUM(CASE WHEN CAST({COLUMN_NAME} AS DATE) >= (FROM_ISO8601_DATE(''{RUN_DATE}'') + interval ''365'' day ) THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
351
|
+
('5012', 'Incr_Avg_Shift', 'trino', 'COALESCE(ABS( ({BASELINE_AVG} - (SUM({COLUMN_NAME}) - {BASELINE_SUM}) / NULLIF(CAST(COUNT({COLUMN_NAME}) AS REAL) - {BASELINE_VALUE_CT}, 0)) / {BASELINE_SD} ), 0)', '>=', '{THRESHOLD_VALUE}'),
|
|
352
|
+
('5013', 'LOV_All', 'trino', 'LISTAGG(DISTINCT {COLUMN_NAME}, ''|'') WITHIN GROUP (ORDER BY {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'),
|
|
353
|
+
('5014', 'LOV_Match', 'trino', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
354
|
+
('5015', 'Min_Date', 'trino', 'SUM(CASE WHEN {COLUMN_NAME} < CAST(''{BASELINE_VALUE}'' AS DATE) THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
355
|
+
('5016', 'Min_Val', 'trino', 'SUM(CASE WHEN {COLUMN_NAME} < {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
356
|
+
('5017', 'Missing_Pct', 'trino', 'ABS(2.0 * ASIN(SQRT(CAST({BASELINE_VALUE_CT} AS REAL) / CAST({BASELINE_CT} AS REAL))) - 2 * ASIN(SQRT(CAST(COUNT({COLUMN_NAME}) AS REAL) / CAST(NULLIF(COUNT(*), 0) AS REAL) )))', '>=', '{THRESHOLD_VALUE}'),
|
|
357
|
+
('5018', 'Monthly_Rec_Ct', 'trino', '(MAX(DATE_DIFF(''month'', {COLUMN_NAME}, CAST(''{RUN_DATE}'' AS DATE))) - MIN(DATE_DIFF(''month'', {COLUMN_NAME}, CAST(''{RUN_DATE}'' AS DATE))) + 1) - COUNT(DISTINCT DATE_DIFF(''month'', {COLUMN_NAME}, CAST(''{RUN_DATE}'' AS DATE)))', '>', '{THRESHOLD_VALUE}'),
|
|
358
|
+
('5019', 'Outlier_Pct_Above', 'trino', 'CAST(SUM(CASE WHEN CAST({COLUMN_NAME} AS REAL) > {BASELINE_AVG}+(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END) AS REAL) / CAST(COUNT({COLUMN_NAME}) AS REAL)', '>', '{THRESHOLD_VALUE}'),
|
|
359
|
+
('5020', 'Outlier_Pct_Below', 'trino', 'CAST(SUM(CASE WHEN CAST( {COLUMN_NAME} AS REAL) < {BASELINE_AVG}-(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END) AS REAL) / CAST(COUNT({COLUMN_NAME}) AS REAL)', '>', '{THRESHOLD_VALUE}'),
|
|
360
|
+
('5021', 'Pattern_Match', 'trino', 'COUNT(NULLIF({COLUMN_NAME}, '''')) - SUM(CASE WHEN REGEXP_LIKE(NULLIF({COLUMN_NAME}, '''') , ''{BASELINE_VALUE}'') = TRUE THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
361
|
+
('5022', 'Recency', 'trino', 'DATE_DIFF(''day'', MAX({COLUMN_NAME}), CAST(''{RUN_DATE}'' AS DATE))', '>', '{THRESHOLD_VALUE}'),
|
|
362
|
+
('5023', 'Required', 'trino', 'COUNT(*) - COUNT({COLUMN_NAME})', '>', '{THRESHOLD_VALUE}'),
|
|
363
|
+
('5024', 'Row_Ct', 'trino', 'COUNT(*)', '<', '{THRESHOLD_VALUE}'),
|
|
364
|
+
('5025', 'Row_Ct_Pct', 'trino', 'ABS(ROUND(100.0 * CAST((COUNT(*) - {BASELINE_CT}) AS DECIMAL(18,4)) /CAST( {BASELINE_CT} AS DECIMAL(18,4) ), 2))', '>', '{THRESHOLD_VALUE}'),
|
|
365
|
+
('5026', 'Street_Addr_Pattern', 'trino', 'CAST(100.0*SUM(CASE WHEN REGEXP_LIKE({COLUMN_NAME} , ''^[0-9]{1,5}[a-zA-Z]?\s\w{1,5}\.?\s?\w*\s?\w*\s[a-zA-Z]{1,6}\.?\s?[0-9]{0,5}[A-Z]{0,1}$'') = TRUE THEN 1 ELSE 0 END) AS REAL )/ CAST(COUNT({COLUMN_NAME}) AS REAL)', '<', '{THRESHOLD_VALUE}'),
|
|
366
|
+
('5027', 'US_State', 'trino', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN (''AL'',''AK'',''AS'',''AZ'',''AR'',''CA'',''CO'',''CT'',''DE'',''DC'',''FM'',''FL'',''GA'',''GU'',''HI'',''ID'',''IL'',''IN'',''IA'',''KS'',''KY'',''LA'',''ME'',''MH'',''MD'',''MA'',''MI'',''MN'',''MS'',''MO'',''MT'',''NE'',''NV'',''NH'',''NJ'',''NM'',''NY'',''NC'',''ND'',''MP'',''OH'',''OK'',''OR'',''PW'',''PA'',''PR'',''RI'',''SC'',''SD'',''TN'',''TX'',''UT'',''VT'',''VI'',''VA'',''WA'',''WV'',''WI'',''WY'',''AE'',''AP'',''AA'') THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
367
|
+
('5028', 'Unique', 'trino', 'COUNT(*) - COUNT(DISTINCT {COLUMN_NAME})', '>', '{THRESHOLD_VALUE}'),
|
|
368
|
+
('5029', 'Unique_Pct', 'trino', 'ABS( 2.0 * ASIN( SQRT(CAST({BASELINE_UNIQUE_CT} AS REAL) / CAST({BASELINE_VALUE_CT} AS REAL) ) ) - 2 * ASIN( SQRT( CAST(COUNT( DISTINCT {COLUMN_NAME} ) AS REAL) / CAST(NULLIF(COUNT( {COLUMN_NAME} ), 0) AS REAL) )))', '>=', '{THRESHOLD_VALUE}'),
|
|
369
|
+
('5030', 'Weekly_Rec_Ct', 'trino', 'MAX(DATE_DIFF(''week'', CAST(''1800-01-01'' AS DATE), "{COLUMN_NAME}")) - MIN(DATE_DIFF(''week'', CAST(''1800-01-01'' AS DATE), "{COLUMN_NAME}")) +1 - COUNT(DISTINCT DATE_DIFF(''week'', CAST(''1800-01-01'' AS DATE), "{COLUMN_NAME}"))', '>', '{THRESHOLD_VALUE}'),
|
|
370
|
+
('5031', 'Variability_Increase', 'trino', '100.0*STDDEV(CAST("{COLUMN_NAME}" AS REAL))/{BASELINE_SD}', '>', '{THRESHOLD_VALUE}'),
|
|
371
|
+
('5032', 'Variability_Decrease', 'trino', '100.0*STDDEV(CAST("{COLUMN_NAME}" AS REAL))/{BASELINE_SD}', '<', '{THRESHOLD_VALUE}'),
|
|
372
|
+
|
|
373
|
+
('1033', 'Valid_Month', 'redshift', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN ({BASELINE_VALUE}) THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
374
|
+
('2033', 'Valid_Month', 'snowflake', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN ({BASELINE_VALUE}) THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
375
|
+
('3033', 'Valid_Month', 'mssql', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN ({BASELINE_VALUE}) THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
376
|
+
('4033', 'Valid_Month', 'postgresql', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN ({BASELINE_VALUE}) THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
377
|
+
('5033', 'Valid_Month', 'trino', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN ({BASELINE_VALUE}) THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
378
|
+
|
|
379
|
+
('1034', 'Valid_US_Zip', 'redshift', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME},''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
380
|
+
('4034', 'Valid_US_Zip', 'postgresql', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME},''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
381
|
+
('2034', 'Valid_US_Zip', 'snowflake', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME},''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
382
|
+
('5034', 'Valid_US_Zip', 'trino', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME},''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
383
|
+
('3034', 'Valid_US_Zip', 'mssql', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME},''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
384
|
+
|
|
385
|
+
('1035', 'Valid_US_Zip3', 'redshift', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME},''012345678'',''999999999'') <> ''999'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
386
|
+
('4035', 'Valid_US_Zip3', 'postgresql', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME},''012345678'',''999999999'') <> ''999'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
387
|
+
('2035', 'Valid_US_Zip3', 'snowflake', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME},''012345678'',''999999999'') <> ''999'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
388
|
+
('5035', 'Valid_US_Zip3', 'trino', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME},''012345678'',''999999999'') <> ''999'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
389
|
+
('3035', 'Valid_US_Zip3', 'mssql', 'SUM(CASE WHEN TRANSLATE({COLUMN_NAME},''012345678'',''999999999'') <> ''999'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
390
|
+
|
|
391
|
+
('1036', 'Valid_Characters', 'redshift', 'SUM(CASE WHEN {COLUMN_NAME} ~ ''[[:cntrl:]]'' OR {COLUMN_NAME} LIKE '' %'' OR {COLUMN_NAME} LIKE ''''''%'''''' OR {COLUMN_NAME} LIKE ''"%"'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
392
|
+
('4036', 'Valid_Characters', 'postgresql', 'SUM(CASE WHEN {COLUMN_NAME} ~ ''[[:cntrl:]]'' OR {COLUMN_NAME} LIKE '' %'' OR {COLUMN_NAME}::VARCHAR LIKE ''''''%'''''' OR column_name::VARCHAR LIKE ''"%"'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
393
|
+
('2036', 'Valid_Characters', 'snowflake', 'SUM(CASE WHEN REGEXP_LIKE({COLUMN_NAME}::VARCHAR, ''.*[[:cntrl:]].*'') OR {COLUMN_NAME} LIKE '' %'' OR {COLUMN_NAME} LIKE ''''''%'''''' OR {COLUMN_NAME} LIKE ''"%"'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
394
|
+
('5036', 'Valid_Characters', 'trino', 'SUM(CASE WHEN REGEXP_LIKE({COLUMN_NAME}, ''[\x00-\x1F\x7F]'') OR {COLUMN_NAME} LIKE '' %'' OR {COLUMN_NAME} LIKE ''''''%'''''' OR column_name LIKE ''"%"'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
|
|
395
|
+
('3036', 'Valid_Characters', 'mssql', 'SUM(CASE WHEN PATINDEX(''%['' + CHAR(1) + ''-'' + CHAR(8) + CHAR(11) + CHAR(12) + CHAR(14) + ''-'' + CHAR(31) + '']%'', {COLUMN_NAME}) > 0 OR {COLUMN_NAME} LIKE '' %'' OR {COLUMN_NAME} LIKE ''''''%'''''' OR column_name LIKE ''"%"'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}');
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
TRUNCATE TABLE target_data_lookups;
|
|
399
|
+
|
|
400
|
+
INSERT INTO target_data_lookups
|
|
401
|
+
(id, test_id, error_type, test_type, sql_flavor, lookup_type, lookup_query)
|
|
402
|
+
VALUES
|
|
403
|
+
('1001', '1004', 'Test Results', 'Alpha_Trunc', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", LEN("{COLUMN_NAME}") as current_max_length, {THRESHOLD_VALUE} as previous_max_length FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT MAX(LEN("{COLUMN_NAME}")) as max_length FROM {TARGET_SCHEMA}.{TABLE_NAME}) a WHERE LEN("{COLUMN_NAME}") = a.max_length AND a.max_length < {THRESHOLD_VALUE} LIMIT 500;'),
|
|
404
|
+
('1002', '1005', 'Test Results', 'Avg_Shift', 'redshift', NULL, 'SELECT AVG("{COLUMN_NAME}" :: FLOAT) AS current_average FROM {TARGET_SCHEMA}.{TABLE_NAME};'),
|
|
405
|
+
('1003', '1006', 'Test Results', 'Condition_Flag', 'redshift', NULL, 'SELECT * FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {CUSTOM_QUERY} LIMIT 500;'),
|
|
406
|
+
('1004', '1007', 'Test Results', 'Constant', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" <> {BASELINE_VALUE} GROUP BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
407
|
+
('1005', '1009', 'Test Results', 'Daily_Record_Ct', 'redshift', NULL, 'WITH RECURSIVE daterange(all_dates) AS (SELECT MIN("{COLUMN_NAME}") :: DATE AS all_dates FROM {TARGET_SCHEMA}.{TABLE_NAME} UNION ALL SELECT DATEADD(DAY, 1, d.all_dates) :: DATE AS all_dates FROM daterange d WHERE d.all_dates < (SELECT MAX("{COLUMN_NAME}") :: DATE FROM {TARGET_SCHEMA}.{TABLE_NAME}) ), existing_periods AS ( SELECT DISTINCT "{COLUMN_NAME}" :: DATE AS period, COUNT(1) AS period_count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" :: DATE ) SELECT d.all_dates AS missing_period, MAX(b.period) AS prior_available_date, (SELECT period_count FROM existing_periods WHERE period = MAX(b.period) ) AS prior_available_date_count, MIN(c.period) AS next_available_date, (SELECT period_count FROM existing_periods WHERE period = MIN(c.period) ) AS next_available_date_count FROM daterange d LEFT JOIN existing_periods a ON d.all_dates = a.period LEFT JOIN existing_periods b ON b.period < d.all_dates LEFT JOIN existing_periods c ON c.period > d.all_dates WHERE a.period IS NULL AND d.all_dates BETWEEN b.period AND c.period GROUP BY d.all_dates ORDER BY d.all_dates LIMIT 500;'),
|
|
408
|
+
('1006', '1011', 'Test Results', 'Dec_Trunc', 'redshift', NULL, 'SELECT DISTINCT DECIMAL_SCALE("{COLUMN_NAME}" :: SUPER) AS decimal_scale, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY DECIMAL_SCALE("{COLUMN_NAME}" :: SUPER) LIMIT 500;'),
|
|
409
|
+
('1007', '1012', 'Test Results', 'Distinct_Date_Ct', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" IS NOT NULL GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;'),
|
|
410
|
+
('1008', '1013', 'Test Results', 'Distinct_Value_Ct', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" IS NOT NULL GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;'),
|
|
411
|
+
('1009', '1014', 'Test Results', 'Email_Format', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" !~ ''^[A-Za-z0-9._''''%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$'' GROUP BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
412
|
+
('1010', '1015', 'Test Results', 'Future_Date', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE GREATEST(0, SIGN("{COLUMN_NAME}"::DATE - ''{TEST_DATE}''::DATE)) > {THRESHOLD_VALUE} GROUP BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
413
|
+
('1011', '1016', 'Test Results', 'Future_Date_1Y', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE GREATEST(0, SIGN("{COLUMN_NAME}"::DATE - (''{TEST_DATE}''::DATE + 365))) > {THRESHOLD_VALUE} GROUP BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
414
|
+
('1012', '1017', 'Test Results', 'Incr_Avg_Shift', 'redshift', NULL, 'SELECT AVG("{COLUMN_NAME}" :: FLOAT) AS current_average, SUM("{COLUMN_NAME}" ::FLOAT) AS current_sum, NULLIF(COUNT("{COLUMN_NAME}" )::FLOAT, 0) as current_value_count FROM {TARGET_SCHEMA}.{TABLE_NAME};'),
|
|
415
|
+
('1013', '1018', 'Test Results', 'LOV_All', 'redshift', NULL, 'SELECT LISTAGG(DISTINCT "{COLUMN_NAME}", ''|'') WITHIN GROUP (ORDER BY "{COLUMN_NAME}") FROM {TARGET_SCHEMA}.{TABLE_NAME} HAVING LISTAGG(DISTINCT "{COLUMN_NAME}", ''|'') WITHIN GROUP (ORDER BY "{COLUMN_NAME}") <> ''{THRESHOLD_VALUE}'' LIMIT 500;'),
|
|
416
|
+
('1014', '1019', 'Test Results', 'LOV_Match', 'redshift', NULL, 'SELECT DISTINCT NULLIF("{COLUMN_NAME}", '''') AS "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE NULLIF("{COLUMN_NAME}", '''') NOT IN {BASELINE_VALUE} GROUP BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
417
|
+
('1015', '1020', 'Test Results', 'Min_Date', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" :: DATE < ''{BASELINE_VALUE}'' :: DATE GROUP BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
418
|
+
('1016', '1021', 'Test Results', 'Min_Val', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", (ABS("{COLUMN_NAME}") - ABS({BASELINE_VALUE})) AS difference_from_baseline FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" < {BASELINE_VALUE} LIMIT 500;'),
|
|
419
|
+
('1017', '1022', 'Test Results', 'Missing_Pct', 'redshift', NULL, 'SELECT TOP 10 * FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" IS NULL OR "{COLUMN_NAME}" :: VARCHAR(255) = '''' ;'),
|
|
420
|
+
('1018', '1023', 'Test Results', 'Monthly_Rec_Ct', 'redshift', NULL, 'WITH RECURSIVE daterange(all_dates) AS (SELECT DATE_TRUNC(''month'', MIN("{COLUMN_NAME}")) :: DATE AS all_dates FROM {TARGET_SCHEMA}.{TABLE_NAME} UNION ALL SELECT DATEADD(MONTH, 1, d.all_dates) :: DATE AS all_dates FROM daterange d WHERE d.all_dates < (SELECT DATE_TRUNC(''month'', MAX("{COLUMN_NAME}")) :: DATE FROM {TARGET_SCHEMA}.{TABLE_NAME}) ), existing_periods AS ( SELECT DISTINCT DATE_TRUNC(''month'',"{COLUMN_NAME}") :: DATE AS period, COUNT(1) AS period_count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY DATE_TRUNC(''month'',"{COLUMN_NAME}") :: DATE ) SELECT d.all_dates as missing_period, MAX(b.period) AS prior_available_month, (SELECT period_count FROM existing_periods WHERE period = MAX(b.period) ) AS prior_available_month_count, MIN(c.period) AS next_available_month, (SELECT period_count FROM existing_periods WHERE period = MIN(c.period) ) AS next_available_month_count FROM daterange d LEFT JOIN existing_periods a ON d.all_dates = a.period LEFT JOIN existing_periods b ON b.period < d.all_dates LEFT JOIN existing_periods c ON c.period > d.all_dates WHERE a.period IS NULL AND d.all_dates BETWEEN b.period AND c.period GROUP BY d.all_dates ORDER BY d.all_dates;'),
|
|
421
|
+
('1019', '1024', 'Test Results', 'Outlier_Pct_Above', 'redshift', NULL, 'SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" :: FLOAT > ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;'),
|
|
422
|
+
('1020', '1025', 'Test Results', 'Outlier_Pct_Below', 'redshift', NULL, 'SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" :: FLOAT < ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;'),
|
|
423
|
+
('1021', '1026', 'Test Results', 'Pattern_Match', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE NULLIF("{COLUMN_NAME}", '''') NOT SIMILAR TO ''{BASELINE_VALUE}'' GROUP BY "{COLUMN_NAME}";'),
|
|
424
|
+
('1022', '1028', 'Test Results', 'Recency', 'redshift', NULL, 'SELECT DISTINCT col AS latest_date_available, ''{TEST_DATE}'' :: DATE as test_run_date FROM (SELECT MAX("{COLUMN_NAME}") AS col FROM {TARGET_SCHEMA}.{TABLE_NAME}) WHERE DATEDIFF(''D'', col, ''{TEST_DATE}''::DATE) > {THRESHOLD_VALUE};'),
|
|
425
|
+
('1023', '1030', 'Test Results', 'Required', 'redshift', NULL, 'SELECT * FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" IS NULL LIMIT 500;'),
|
|
426
|
+
('1024', '1031', 'Test Results', 'Row_Ct', 'redshift', NULL, 'WITH CTE AS (SELECT COUNT(*) AS current_count FROM {TARGET_SCHEMA}.{TABLE_NAME}) SELECT current_count, ABS(ROUND(100 * (current_count - {THRESHOLD_VALUE}) :: FLOAT / {THRESHOLD_VALUE} :: FLOAT,2)) AS row_count_pct_decrease FROM cte WHERE current_count < {THRESHOLD_VALUE};'),
|
|
427
|
+
('1025', '1032', 'Test Results', 'Row_Ct_Pct', 'redshift', NULL, 'WITH CTE AS (SELECT COUNT(*) AS current_count FROM {TARGET_SCHEMA}.{TABLE_NAME}) SELECT current_count, {BASELINE_CT} AS baseline_count, ABS(ROUND(100 * (current_count - {BASELINE_CT}) :: FLOAT / {BASELINE_CT} :: FLOAT,2)) AS row_count_pct_difference FROM cte;'),
|
|
428
|
+
('1026', '1033', 'Test Results', 'Street_Addr_Pattern', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" !~ ''^[0-9]{1,5}[a-zA-Z]?\\s\\w{1,5}\\.?\\s?\\w*\\s?\\w*\\s[a-zA-Z]{1,6}\\.?\\s?[0-9]{0,5}[A-Z]{0,1}$'' GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC LIMIT 500;'),
|
|
429
|
+
('1027', '1036', 'Test Results', 'US_State', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE NULLIF("{COLUMN_NAME}", '''') NOT IN (''AL'',''AK'',''AS'',''AZ'',''AR'',''CA'',''CO'',''CT'',''DE'',''DC'',''FM'',''FL'',''GA'',''GU'',''HI'',''ID'',''IL'',''IN'',''IA'',''KS'',''KY'',''LA'',''ME'',''MH'',''MD'',''MA'',''MI'',''MN'',''MS'',''MO'',''MT'',''NE'',''NV'',''NH'',''NJ'',''NM'',''NY'',''NC'',''ND'',''MP'',''OH'',''OK'',''OR'',''PW'',''PA'',''PR'',''RI'',''SC'',''SD'',''TN'',''TX'',''UT'',''VT'',''VI'',''VA'',''WA'',''WV'',''WI'',''WY'',''AE'',''AP'',''AA'') GROUP BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
430
|
+
('1028', '1034', 'Test Results', 'Unique', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" HAVING COUNT(*) > 1 ORDER BY COUNT(*) DESC LIMIT 500;'),
|
|
431
|
+
('1029', '1035', 'Test Results', 'Unique_Pct', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC LIMIT 500;'),
|
|
432
|
+
('1030', '1037', 'Test Results', 'Weekly_Rec_Ct', 'redshift', NULL, 'WITH RECURSIVE daterange(all_dates) AS (SELECT DATE_TRUNC(''week'',MIN("{COLUMN_NAME}")) :: DATE AS all_dates FROM {TARGET_SCHEMA}.{TABLE_NAME} UNION ALL SELECT (d.all_dates + INTERVAL ''1 week'' ) :: DATE AS all_dates FROM daterange d WHERE d.all_dates < (SELECT DATE_TRUNC(''week'', MAX("{COLUMN_NAME}")) :: DATE FROM {TARGET_SCHEMA}.{TABLE_NAME}) ), existing_periods AS ( SELECT DISTINCT DATE_TRUNC(''week'',"{COLUMN_NAME}") :: DATE AS period, COUNT(1) as period_count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY DATE_TRUNC(''week'',"{COLUMN_NAME}") :: DATE ) SELECT d.all_dates as missing_period, MAX(b.period) AS prior_available_week, (SELECT period_count FROM existing_periods WHERE period = MAX(b.period) ) AS prior_available_week_count, MIN(c.period) AS next_available_week, (SELECT period_count FROM existing_periods WHERE period = MIN(c.period) ) AS next_available_week_count FROM daterange d LEFT JOIN existing_periods a ON d.all_dates = a.period LEFT JOIN existing_periods b ON b.period < d.all_dates LEFT JOIN existing_periods c ON c.period > d.all_dates WHERE a.period IS NULL AND d.all_dates BETWEEN b.period AND c.period GROUP BY d.all_dates ORDER BY d.all_dates;'),
|
|
433
|
+
('1031', '1040', 'Test Results', 'Variability_Increase', 'redshift', NULL, 'SELECT STDDEV(CAST("{COLUMN_NAME}" AS FLOAT)) as current_standard_deviation FROM {TARGET_SCHEMA}.{TABLE_NAME};'),
|
|
434
|
+
('1032', '1041', 'Test Results', 'Variability_Decrease', 'redshift', NULL, 'SELECT STDDEV(CAST("{COLUMN_NAME}" AS FLOAT)) as current_standard_deviation FROM {TARGET_SCHEMA}.{TABLE_NAME};'),
|
|
435
|
+
|
|
436
|
+
('1033', '1001', 'Profile Anomaly' , 'Suggested_Type', 'redshift', NULL, 'SELECT TOP 20 "{COLUMN_NAME}", COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC;'),
|
|
437
|
+
('1034', '1002', 'Profile Anomaly' , 'Non_Standard_Blanks', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE CASE WHEN "{COLUMN_NAME}" IN (''.'', ''?'', '' '') THEN 1 WHEN LOWER("{COLUMN_NAME}") SIMILAR TO ''(^.{2,}|-{2,}|0{2,}|9{2,}|x{2,}|z{2,}$)'' THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''blank'',''error'',''missing'',''tbd'', ''n/a'',''#na'',''none'',''null'',''unknown'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''(blank)'',''(error)'',''(missing)'',''(tbd)'', ''(n/a)'',''(#na)'',''(none)'',''(null)'',''(unknown)'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''[blank]'',''[error]'',''[missing]'',''[tbd]'', ''[n/a]'',''[#na]'',''[none]'',''[null]'',''[unknown]'') THEN 1 WHEN "{COLUMN_NAME}" = '''' THEN 1 WHEN "{COLUMN_NAME}" IS NULL THEN 1 ELSE 0 END = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";'),
|
|
438
|
+
('1035', '1003', 'Profile Anomaly' , 'Invalid_Zip_USA', 'redshift', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE("{COLUMN_NAME}",''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
439
|
+
('1036', '1004', 'Profile Anomaly' , 'Multiple_Types_Minor', 'redshift', NULL, 'SELECT DISTINCT column_name, table_name, CASE WHEN data_type = ''timestamp without time zone'' THEN ''timestamp'' WHEN data_type = ''character varying'' THEN ''varchar('' || CAST(character_maximum_length AS VARCHAR) || '')'' WHEN data_type = ''character'' THEN ''char('' || CAST(character_maximum_length AS VARCHAR) || '')'' WHEN data_type = ''numeric'' THEN ''numeric('' || CAST(numeric_precision AS VARCHAR) || '','' || CAST(numeric_scale AS VARCHAR) || '')'' ELSE data_type END AS data_type FROM information_schema.columns WHERE table_schema = ''{TARGET_SCHEMA}'' AND column_name = ''{COLUMN_NAME}'' ORDER BY data_type, table_name;'),
|
|
440
|
+
('1037', '1005', 'Profile Anomaly' , 'Multiple_Types_Major', 'redshift', NULL, 'SELECT DISTINCT column_name, table_name, CASE WHEN data_type = ''timestamp without time zone'' THEN ''timestamp'' WHEN data_type = ''character varying'' THEN ''varchar('' || CAST(character_maximum_length AS VARCHAR) || '')'' WHEN data_type = ''character'' THEN ''char('' || CAST(character_maximum_length AS VARCHAR) || '')'' WHEN data_type = ''numeric'' THEN ''numeric('' || CAST(numeric_precision AS VARCHAR) || '','' || CAST(numeric_scale AS VARCHAR) || '')'' ELSE data_type END AS data_type FROM information_schema.columns WHERE table_schema = ''{TARGET_SCHEMA}'' AND column_name = ''{COLUMN_NAME}'' ORDER BY data_type, table_name;'),
|
|
441
|
+
('1038', '1006', 'Profile Anomaly' , 'No_Values', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
|
|
442
|
+
('1039', '1007', 'Profile Anomaly' , 'Column_Pattern_Mismatch', 'redshift', NULL, 'SELECT A.* FROM ( SELECT TOP 5 DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 4)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}", ''[a-z]'', ''a''),''[A-Z]'', ''A''),''[0-9]'', ''N'') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC ) A UNION ALL SELECT B.* FROM ( SELECT TOP 5 DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 6)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}", ''[a-z]'', ''a''),''[A-Z]'', ''A''),''[0-9]'', ''N'') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC ) B UNION ALL SELECT C.* FROM ( SELECT TOP 5 DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 8)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}", ''[a-z]'', ''a''),''[A-Z]'', ''A''),''[0-9]'', ''N'') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC ) C UNION ALL SELECT D.* FROM ( SELECT TOP 5 DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 10)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}", ''[a-z]'', ''a''),''[A-Z]'', ''A''),''[0-9]'', ''N'') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC ) D ORDER BY top_pattern DESC, count DESC;' ),
|
|
443
|
+
('1040', '1008', 'Profile Anomaly' , 'Table_Pattern_Mismatch', 'redshift', NULL, 'SELECT column_name, table_name, data_type FROM information_schema.columns WHERE table_schema = ''{TARGET_SCHEMA}'' AND column_name = ''{COLUMN_NAME}'' ORDER BY data_type;' ),
|
|
444
|
+
('1041', '1009', 'Profile Anomaly' , 'Leading_Spaces', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" BETWEEN '' !'' AND ''!'' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
|
|
445
|
+
('1042', '1010', 'Profile Anomaly' , 'Quoted_Values', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" ILIKE ''"%"'' OR "{COLUMN_NAME}" ILIKE ''''''%'''''' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
|
|
446
|
+
('1043', '1011', 'Profile Anomaly' , 'Char_Column_Number_Values', 'redshift', NULL, 'SELECT A.* FROM ( SELECT TOP 10 DISTINCT ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM ( SELECT TOP 10 DISTINCT ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS B ORDER BY data_type, count DESC;' ),
|
|
447
|
+
('1044', '1012', 'Profile Anomaly' , 'Char_Column_Date_Values', 'redshift', NULL, 'SELECT A.* FROM ( SELECT TOP 10 DISTINCT ''Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isdate("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS A UNION ALL SELECT B.* FROM ( SELECT TOP 10 DISTINCT ''Non-Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isdate("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS B ORDER BY data_type, count DESC;' ),
|
|
448
|
+
('1045', '1013', 'Profile Anomaly' , 'Small Missing Value Ct', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" IN (''.'', ''?'', '' '') THEN 1 WHEN LOWER("{COLUMN_NAME}") SIMILAR TO ''(^.{2,}|-{2,}|0{2,}|9{2,}|x{2,}|z{2,}$)'' THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''blank'',''error'',''missing'',''tbd'', ''n/a'',''#na'',''none'',''null'',''unknown'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''(blank)'',''(error)'',''(missing)'',''(tbd)'', ''(n/a)'',''(#na)'',''(none)'',''(null)'',''(unknown)'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''[blank]'',''[error]'',''[missing]'',''[tbd]'', ''[n/a]'',''[#na]'',''[none]'',''[null]'',''[unknown]'') THEN 1 WHEN "{COLUMN_NAME}" = '''' THEN 1 WHEN "{COLUMN_NAME}" IS NULL THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
|
|
449
|
+
('1046', '1014', 'Profile Anomaly' , 'Small Divergent Value Ct', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC;' ),
|
|
450
|
+
('1047', '1015', 'Profile Anomaly' , 'Boolean_Value_Mismatch', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC;' ),
|
|
451
|
+
('1048', '1016', 'Profile Anomaly' , 'Potential_Duplicates', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" HAVING COUNT(*)> 1 ORDER BY COUNT(*) DESC LIMIT 500;' ),
|
|
452
|
+
('1049', '1017', 'Profile Anomaly' , 'Standardized_Value_Matches', 'redshift', NULL, 'WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", '' '''',.-'', '''')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", '' '''',.-'', '''')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", possible_standard_value, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", '' '''',.-'', '''')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}", possible_standard_value ORDER BY possible_standard_value ASC, count DESC LIMIT 500;' ),
|
|
453
|
+
('1050', '1018', 'Profile Anomaly' , 'Unlikely_Date_Values', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", ''{PROFILE_RUN_DATE}'' :: DATE AS profile_run_date, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} a WHERE ("{COLUMN_NAME}" < ''1900-01-01''::DATE) OR ("{COLUMN_NAME}" > ''{PROFILE_RUN_DATE}'' :: DATE + INTERVAL ''30 year'' ) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;' ),
|
|
454
|
+
('1051', '1019', 'Profile Anomaly' , 'Recency_One_Year', 'redshift', NULL, 'created_in_ui' ),
|
|
455
|
+
('1052', '1020', 'Profile Anomaly' , 'Recency_Six_Months', 'redshift', NULL, 'created_in_ui' ),
|
|
456
|
+
('1053', '1021', 'Profile Anomaly' , 'Unexpected US States', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;' ),
|
|
457
|
+
('1054', '1022', 'Profile Anomaly' , 'Unexpected Emails', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;' ),
|
|
458
|
+
('1055', '1023', 'Profile Anomaly' , 'Small_Numeric_Value_Ct', 'redshift', NULL, 'SELECT A.* FROM ( SELECT TOP 10 DISTINCT ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM ( SELECT TOP 10 DISTINCT ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS B ORDER BY data_type, count DESC;' ),
|
|
459
|
+
('1056', '1024', 'Profile Anomaly' , 'Invalid_Zip3_USA', 'redshift', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE("{COLUMN_NAME}",''012345678'',''999999999'') <> ''999'' GROUP BY "{COLUMN_NAME}" ORDER BY count DESC, "{COLUMN_NAME}" LIMIT 500;'),
|
|
460
|
+
('1057', '1025', 'Profile Anomaly' , 'Delimited_Data_Embedded', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" ~ ''^([^,|\t]{1,20}[,|\t]){2,}[^,|\t]{0,20}([,|\t]{0,1}[^,|\t]{0,20})*$'' AND "{COLUMN_NAME}" !~ ''\\s(and|but|or|yet)\\s'' GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC LIMIT 500;' ),
|
|
461
|
+
|
|
462
|
+
('1058', '1001', 'Profile Anomaly' , 'Suggested_Type', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC LIMIT 20;'),
|
|
463
|
+
('1059', '1002', 'Profile Anomaly' , 'Non_Standard_Blanks', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE CASE WHEN "{COLUMN_NAME}" IN (''.'', ''?'', '' '') THEN 1 WHEN LOWER("{COLUMN_NAME}") SIMILAR TO ''(^.{2,}|-{2,}|0{2,}|9{2,}|x{2,}|z{2,}$)'' THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''blank'',''error'',''missing'',''tbd'', ''n/a'',''#na'',''none'',''null'',''unknown'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''(blank)'',''(error)'',''(missing)'',''(tbd)'', ''(n/a)'',''(#na)'',''(none)'',''(null)'',''(unknown)'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''[blank]'',''[error]'',''[missing]'',''[tbd]'', ''[n/a]'',''[#na]'',''[none]'',''[null]'',''[unknown]'') THEN 1 WHEN "{COLUMN_NAME}" = '''' THEN 1 WHEN "{COLUMN_NAME}" IS NULL THEN 1 ELSE 0 END = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";'),
|
|
464
|
+
('1060', '1003', 'Profile Anomaly' , 'Invalid_Zip_USA', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE("{COLUMN_NAME}",''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
465
|
+
('1061', '1004', 'Profile Anomaly' , 'Multiple_Types_Minor', 'postgresql', NULL, 'SELECT DISTINCT column_name, columns.table_name, CASE WHEN data_type = ''timestamp without time zone'' THEN ''timestamp'' WHEN data_type = ''character varying'' THEN ''varchar('' || CAST(character_maximum_length AS VARCHAR) || '')'' WHEN data_type = ''character'' THEN ''char('' || CAST(character_maximum_length AS VARCHAR) || '')'' WHEN data_type = ''numeric'' THEN ''numeric('' || CAST(numeric_precision AS VARCHAR) || '','' || CAST(numeric_scale AS VARCHAR) || '')'' ELSE data_type END AS data_type FROM information_schema.columns JOIN information_schema.tables ON columns.table_name = tables.table_name AND columns.table_schema = tables.table_schema WHERE columns.table_schema = ''{TARGET_SCHEMA}'' AND columns.column_name = ''{COLUMN_NAME}'' AND UPPER(tables.table_type) = ''BASE TABLE'' ORDER BY data_type, table_name;'),
|
|
466
|
+
('1062', '1005', 'Profile Anomaly' , 'Multiple_Types_Major', 'postgresql', NULL, 'SELECT DISTINCT column_name, columns.table_name, CASE WHEN data_type = ''timestamp without time zone'' THEN ''timestamp'' WHEN data_type = ''character varying'' THEN ''varchar('' || CAST(character_maximum_length AS VARCHAR) || '')'' WHEN data_type = ''character'' THEN ''char('' || CAST(character_maximum_length AS VARCHAR) || '')'' WHEN data_type = ''numeric'' THEN ''numeric('' || CAST(numeric_precision AS VARCHAR) || '','' || CAST(numeric_scale AS VARCHAR) || '')'' ELSE data_type END AS data_type FROM information_schema.columns JOIN information_schema.tables ON columns.table_name = tables.table_name AND columns.table_schema = tables.table_schema WHERE columns.table_schema = ''{TARGET_SCHEMA}'' AND columns.column_name = ''{COLUMN_NAME}'' AND UPPER(tables.table_type) = ''BASE TABLE'' ORDER BY data_type, table_name;'),
|
|
467
|
+
('1063', '1006', 'Profile Anomaly' , 'No_Values', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
|
|
468
|
+
('1064', '1007', 'Profile Anomaly' , 'Column_Pattern_Mismatch', 'postgresql', NULL, 'SELECT A.* FROM ( SELECT DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 4)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}", ''[a-z]'', ''a'', ''g''), ''[A-Z]'', ''A'', ''g''), ''[0-9]'', ''N'', ''g'') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC LIMIT 5 ) A UNION ALL SELECT B.* FROM ( SELECT DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 6)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}", ''[a-z]'', ''a'', ''g''), ''[A-Z]'', ''A'', ''g''), ''[0-9]'', ''N'', ''g'') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC LIMIT 5 ) B UNION ALL SELECT C.* FROM ( SELECT DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 8)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}", ''[a-z]'', ''a'', ''g''), ''[A-Z]'', ''A'', ''g''), ''[0-9]'', ''N'', ''g'') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC LIMIT 5 ) C UNION ALL SELECT D.* FROM ( SELECT DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 10)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}", ''[a-z]'', ''a'', ''g''), ''[A-Z]'', ''A'', ''g''), ''[0-9]'', ''N'', ''g'') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC LIMIT 5) D ORDER BY top_pattern DESC, count DESC;' ),
|
|
469
|
+
('1065', '1008', 'Profile Anomaly' , 'Table_Pattern_Mismatch', 'postgresql', NULL, 'SELECT column_name, columns.table_name FROM information_schema.columns JOIN information_schema.tables ON columns.table_name = tables.table_name AND columns.table_schema = tables.table_schema WHERE columns.table_schema = ''{TARGET_SCHEMA}'' AND columns.column_name = ''{COLUMN_NAME}'' AND UPPER(tables.table_type) = ''BASE TABLE'' ORDER BY columns.table_name;' ),
|
|
470
|
+
('1066', '1009', 'Profile Anomaly' , 'Leading_Spaces', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" BETWEEN '' !'' AND ''!'' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
|
|
471
|
+
('1067', '1010', 'Profile Anomaly' , 'Quoted_Values', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" ILIKE ''"%"'' OR "{COLUMN_NAME}" ILIKE ''''''%'''''' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
|
|
472
|
+
('1068', '1011', 'Profile Anomaly' , 'Char_Column_Number_Values', 'postgresql', NULL, 'SELECT A.* FROM ( SELECT DISTINCT ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT 10 ) AS A UNION ALL SELECT B.* FROM ( SELECT DISTINCT ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT 10 ) AS B ORDER BY data_type, count DESC;' ),
|
|
473
|
+
('1069', '1012', 'Profile Anomaly' , 'Char_Column_Date_Values', 'postgresql', NULL, 'SELECT A.* FROM ( SELECT DISTINCT ''Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isdate("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT 10) AS A UNION ALL SELECT B.* FROM ( SELECT DISTINCT ''Non-Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isdate("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT 10) AS B ORDER BY data_type, count DESC;' ),
|
|
474
|
+
('1070', '1013', 'Profile Anomaly' , 'Small Missing Value Ct', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" IN (''.'', ''?'', '' '') THEN 1 WHEN LOWER("{COLUMN_NAME}") SIMILAR TO ''(^.{2,}|-{2,}|0{2,}|9{2,}|x{2,}|z{2,}$)'' THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''blank'',''error'',''missing'',''tbd'', ''n/a'',''#na'',''none'',''null'',''unknown'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''(blank)'',''(error)'',''(missing)'',''(tbd)'', ''(n/a)'',''(#na)'',''(none)'',''(null)'',''(unknown)'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''[blank]'',''[error]'',''[missing]'',''[tbd]'', ''[n/a]'',''[#na]'',''[none]'',''[null]'',''[unknown]'') THEN 1 WHEN "{COLUMN_NAME}" = '''' THEN 1 WHEN "{COLUMN_NAME}" IS NULL THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
|
|
475
|
+
('1071', '1014', 'Profile Anomaly' , 'Small Divergent Value Ct', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC;' ),
|
|
476
|
+
('1072', '1015', 'Profile Anomaly' , 'Boolean_Value_Mismatch', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC;' ),
|
|
477
|
+
('1073', '1016', 'Profile Anomaly' , 'Potential_Duplicates', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" HAVING COUNT(*)> 1 ORDER BY COUNT(*) DESC LIMIT 500;' ),
|
|
478
|
+
('1074', '1017', 'Profile Anomaly' , 'Standardized_Value_Matches', 'postgresql', NULL, 'WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", '' '''',.-'', '''')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", '' '''',.-'', '''')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", possible_standard_value, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", '' '''',.-'', '''')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}", possible_standard_value ORDER BY possible_standard_value ASC, count DESC LIMIT 500;' ),
|
|
479
|
+
('1075', '1018', 'Profile Anomaly' , 'Unlikely_Date_Values', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", ''{PROFILE_RUN_DATE}'' :: DATE AS profile_run_date, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} a WHERE ("{COLUMN_NAME}" < ''1900-01-01''::DATE) OR ("{COLUMN_NAME}" > ''{PROFILE_RUN_DATE}'' :: DATE + INTERVAL ''30 year'' ) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;' ),
|
|
480
|
+
('1076', '1019', 'Profile Anomaly' , 'Recency_One_Year', 'postgresql', NULL, 'created_in_ui' ),
|
|
481
|
+
('1077', '1020', 'Profile Anomaly' , 'Recency_Six_Months', 'postgresql', NULL, 'created_in_ui' ),
|
|
482
|
+
('1078', '1021', 'Profile Anomaly' , 'Unexpected US States', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;' ),
|
|
483
|
+
('1079', '1022', 'Profile Anomaly' , 'Unexpected Emails', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;' ),
|
|
484
|
+
('1080', '1023', 'Profile Anomaly' , 'Small_Numeric_Value_Ct', 'postgresql', NULL, 'SELECT A.* FROM ( SELECT DISTINCT ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT 10 ) AS A UNION ALL SELECT B.* FROM ( SELECT DISTINCT ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT 10 ) AS B ORDER BY data_type, count DESC;' ),
|
|
485
|
+
('1081', '1024', 'Profile Anomaly' , 'Invalid_Zip3_USA', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE("{COLUMN_NAME}",''012345678'',''999999999'') <> ''999'' GROUP BY "{COLUMN_NAME}" ORDER BY count DESC, "{COLUMN_NAME}" LIMIT 500;'),
|
|
486
|
+
('1082', '1025', 'Profile Anomaly' , 'Delimited_Data_Embedded', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" ~ ''^([^,|\t]{1,20}[,|\t]){2,}[^,|\t]{0,20}([,|\t]{0,1}[^,|\t]{0,20})*$'' AND "{COLUMN_NAME}" !~ ''\s(and|but|or|yet)\s'' GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC LIMIT 500;' ),
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
('1083', '1004', 'Test Results', 'Alpha_Trunc', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", LENGTH("{COLUMN_NAME}") as current_max_length, {THRESHOLD_VALUE} as previous_max_length FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT MAX(LENGTH("{COLUMN_NAME}")) as max_length FROM {TARGET_SCHEMA}.{TABLE_NAME}) a WHERE LENGTH("{COLUMN_NAME}") = a.max_length AND a.max_length < {THRESHOLD_VALUE} LIMIT 500;'),
|
|
490
|
+
('1084', '1005', 'Test Results', 'Avg_Shift', 'postgresql', NULL, 'SELECT AVG("{COLUMN_NAME}" :: FLOAT) AS current_average FROM {TARGET_SCHEMA}.{TABLE_NAME};'),
|
|
491
|
+
('1085', '1006', 'Test Results', 'Condition_Flag', 'postgresql', NULL, 'SELECT * FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {CUSTOM_QUERY} LIMIT 500;'),
|
|
492
|
+
('1086', '1007', 'Test Results', 'Constant', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" <> {BASELINE_VALUE} GROUP BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
493
|
+
('1087', '1009', 'Test Results', 'Daily_Record_Ct', 'postgresql', NULL, 'WITH RECURSIVE daterange(all_dates) AS (SELECT MIN("{COLUMN_NAME}") :: DATE AS all_dates FROM {TARGET_SCHEMA}.{TABLE_NAME} UNION ALL SELECT (d.all_dates :: DATE + INTERVAL ''1 day'') :: DATE AS all_dates FROM daterange d WHERE d.all_dates < (SELECT MAX("{COLUMN_NAME}") :: DATE FROM {TARGET_SCHEMA}.{TABLE_NAME}) ), existing_periods AS ( SELECT DISTINCT "{COLUMN_NAME}" :: DATE AS period, COUNT(1) AS period_count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" :: DATE ) SELECT d.all_dates AS missing_period, MAX(b.period) AS prior_available_date, (SELECT period_count FROM existing_periods WHERE period = MAX(b.period) ) AS prior_available_date_count, MIN(c.period) AS next_available_date, (SELECT period_count FROM existing_periods WHERE period = MIN(c.period) ) AS next_available_date_count FROM daterange d LEFT JOIN existing_periods a ON d.all_dates = a.period LEFT JOIN existing_periods b ON b.period < d.all_dates LEFT JOIN existing_periods c ON c.period > d.all_dates WHERE a.period IS NULL AND d.all_dates BETWEEN b.period AND c.period GROUP BY d.all_dates LIMIT 500;'),
|
|
494
|
+
('1088', '1011', 'Test Results', 'Dec_Trunc', 'postgresql', NULL, 'SELECT DISTINCT LENGTH(SPLIT_PART("{COLUMN_NAME}" :: TEXT, ''.'', 2)) AS decimal_scale, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY decimal_scale LIMIT 500;'),
|
|
495
|
+
('1089', '1012', 'Test Results', 'Distinct_Date_Ct', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" IS NOT NULL GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;'),
|
|
496
|
+
('1090', '1013', 'Test Results', 'Distinct_Value_Ct', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" IS NOT NULL GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;'),
|
|
497
|
+
('1091', '1014', 'Test Results', 'Email_Format', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" !~ ''^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$'' GROUP BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
498
|
+
('1092', '1015', 'Test Results', 'Future_Date', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE GREATEST(0, SIGN("{COLUMN_NAME}"::DATE - ''{TEST_DATE}''::DATE)) > {THRESHOLD_VALUE} GROUP BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
499
|
+
('1093', '1016', 'Test Results', 'Future_Date_1Y', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE GREATEST(0, SIGN("{COLUMN_NAME}"::DATE - (''{TEST_DATE}''::DATE + 365))) > {THRESHOLD_VALUE} GROUP BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
500
|
+
('1094', '1017', 'Test Results', 'Incr_Avg_Shift', 'postgresql', NULL, 'SELECT AVG("{COLUMN_NAME}" :: FLOAT) AS current_average, SUM("{COLUMN_NAME}" ::FLOAT) AS current_sum, NULLIF(COUNT("{COLUMN_NAME}" )::FLOAT, 0) as current_value_count FROM {TARGET_SCHEMA}.{TABLE_NAME};'),
|
|
501
|
+
('1095', '1018', 'Test Results', 'LOV_All', 'postgresql', NULL, 'SELECT STRING_AGG(DISTINCT "{COLUMN_NAME}", ''|'' ORDER BY "{COLUMN_NAME}" ASC) FROM {TARGET_SCHEMA}.{TABLE_NAME} HAVING STRING_AGG(DISTINCT "{COLUMN_NAME}", ''|'' ORDER BY "{COLUMN_NAME}" ASC) <> ''{THRESHOLD_VALUE}'' LIMIT 500;'),
|
|
502
|
+
('1096', '1019', 'Test Results', 'LOV_Match', 'postgresql', NULL, 'SELECT DISTINCT NULLIF("{COLUMN_NAME}", '''') AS "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE NULLIF("{COLUMN_NAME}", '''') NOT IN {BASELINE_VALUE} GROUP BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
503
|
+
('1097', '1020', 'Test Results', 'Min_Date', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" :: DATE < ''{BASELINE_VALUE}'' :: DATE GROUP BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
504
|
+
('1098', '1021', 'Test Results', 'Min_Val', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", (ABS("{COLUMN_NAME}") - ABS({BASELINE_VALUE})) AS difference_from_baseline FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" < {BASELINE_VALUE} LIMIT 500;'),
|
|
505
|
+
('1099', '1022', 'Test Results', 'Missing_Pct', 'postgresql', NULL, 'SELECT * FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" IS NULL OR "{COLUMN_NAME}" :: VARCHAR(255) = '''' LIMIT 10;'),
|
|
506
|
+
('1100', '1023', 'Test Results', 'Monthly_Rec_Ct', 'postgresql', NULL, 'WITH RECURSIVE daterange(all_dates) AS (SELECT DATE_TRUNC(''month'', MIN("{COLUMN_NAME}")) :: DATE AS all_dates FROM {TARGET_SCHEMA}.{TABLE_NAME} UNION ALL SELECT (d.all_dates :: DATE + INTERVAL ''1 month'') :: DATE AS all_dates FROM daterange d WHERE d.all_dates < (SELECT DATE_TRUNC(''month'', MAX("{COLUMN_NAME}")) :: DATE FROM {TARGET_SCHEMA}.{TABLE_NAME}) ), existing_periods AS ( SELECT DISTINCT DATE_TRUNC(''month'',"{COLUMN_NAME}") :: DATE AS period, COUNT(1) AS period_count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY DATE_TRUNC(''month'',"{COLUMN_NAME}") :: DATE ) SELECT d.all_dates as missing_period, MAX(b.period) AS prior_available_month, (SELECT period_count FROM existing_periods WHERE period = MAX(b.period) ) AS prior_available_month_count, MIN(c.period) AS next_available_month, (SELECT period_count FROM existing_periods WHERE period = MIN(c.period) ) AS next_available_month_count FROM daterange d LEFT JOIN existing_periods a ON d.all_dates = a.period LEFT JOIN existing_periods b ON b.period < d.all_dates LEFT JOIN existing_periods c ON c.period > d.all_dates WHERE a.period IS NULL AND d.all_dates BETWEEN b.period AND c.period GROUP BY d.all_dates ORDER BY d.all_dates;'),
|
|
507
|
+
('1101', '1024', 'Test Results', 'Outlier_Pct_Above', 'postgresql', NULL, 'SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" :: FLOAT > ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;'),
|
|
508
|
+
('1102', '1025', 'Test Results', 'Outlier_Pct_Below', 'postgresql', NULL, 'SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" :: FLOAT < ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;'),
|
|
509
|
+
('1103', '1026', 'Test Results', 'Pattern_Match', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE NULLIF("{COLUMN_NAME}", '''') NOT SIMILAR TO ''{BASELINE_VALUE}'' GROUP BY "{COLUMN_NAME}";'),
|
|
510
|
+
('1104', '1028', 'Test Results', 'Recency', 'postgresql', NULL, 'SELECT DISTINCT col AS latest_date_available, ''{TEST_DATE}'' :: DATE as test_run_date FROM (SELECT MAX("{COLUMN_NAME}") AS col FROM {TARGET_SCHEMA}.{TABLE_NAME}) a WHERE {DATA_QC_SCHEMA}.DATEDIFF(''day'', col, ''{TEST_DATE}''::DATE) > {THRESHOLD_VALUE};'),
|
|
511
|
+
('1105', '1030', 'Test Results', 'Required', 'postgresql', NULL, 'SELECT * FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" IS NULL LIMIT 500;'),
|
|
512
|
+
('1106', '1031', 'Test Results', 'Row_Ct', 'postgresql', NULL, 'WITH CTE AS (SELECT COUNT(*) AS current_count FROM {TARGET_SCHEMA}.{TABLE_NAME}) SELECT current_count, ABS(ROUND(100 * (current_count - {THRESHOLD_VALUE}) :: NUMERIC / {THRESHOLD_VALUE} :: NUMERIC,2)) AS row_count_pct_decrease FROM cte WHERE current_count < {THRESHOLD_VALUE};'),
|
|
513
|
+
('1107', '1032', 'Test Results', 'Row_Ct_Pct', 'postgresql', NULL, 'WITH CTE AS (SELECT COUNT(*) AS current_count FROM {TARGET_SCHEMA}.{TABLE_NAME}) SELECT current_count, {BASELINE_CT} AS baseline_count, ABS(ROUND(100 * (current_count - {BASELINE_CT}) :: NUMERIC / {BASELINE_CT} :: NUMERIC,2)) AS row_count_pct_difference FROM cte;'),
|
|
514
|
+
('1108', '1033', 'Test Results', 'Street_Addr_Pattern', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" !~ ''^[0-9]{1,5}[a-zA-Z]?\s\w{1,5}\.?\s?\w*\s?\w*\s[a-zA-Z]{1,6}\.?\s?[0-9]{0,5}[A-Z]{0,1}$'' GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC LIMIT 500;'),
|
|
515
|
+
('1109', '1036', 'Test Results', 'US_State', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE NULLIF("{COLUMN_NAME}", '''') NOT IN (''AL'',''AK'',''AS'',''AZ'',''AR'',''CA'',''CO'',''CT'',''DE'',''DC'',''FM'',''FL'',''GA'',''GU'',''HI'',''ID'',''IL'',''IN'',''IA'',''KS'',''KY'',''LA'',''ME'',''MH'',''MD'',''MA'',''MI'',''MN'',''MS'',''MO'',''MT'',''NE'',''NV'',''NH'',''NJ'',''NM'',''NY'',''NC'',''ND'',''MP'',''OH'',''OK'',''OR'',''PW'',''PA'',''PR'',''RI'',''SC'',''SD'',''TN'',''TX'',''UT'',''VT'',''VI'',''VA'',''WA'',''WV'',''WI'',''WY'',''AE'',''AP'',''AA'') GROUP BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
516
|
+
('1110', '1034', 'Test Results', 'Unique', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" HAVING COUNT(*) > 1 ORDER BY COUNT(*) DESC LIMIT 500;'),
|
|
517
|
+
('1111', '1035', 'Test Results', 'Unique_Pct', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC LIMIT 500;'),
|
|
518
|
+
('1112', '1037', 'Test Results', 'Weekly_Rec_Ct', 'postgresql', NULL, 'WITH RECURSIVE daterange(all_dates) AS (SELECT DATE_TRUNC(''week'', MIN("{COLUMN_NAME}")) :: DATE AS all_dates FROM {TARGET_SCHEMA}.{TABLE_NAME} UNION ALL SELECT (d.all_dates + INTERVAL ''1 week'' ) :: DATE AS all_dates FROM daterange d WHERE d.all_dates < (SELECT DATE_TRUNC(''week'' , MAX("{COLUMN_NAME}")) :: DATE FROM {TARGET_SCHEMA}.{TABLE_NAME}) ), existing_periods AS (SELECT DISTINCT DATE_TRUNC(''week'', "{COLUMN_NAME}") :: DATE AS period, COUNT(1) as period_count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY DATE_TRUNC(''week'', "{COLUMN_NAME}") :: DATE) SELECT d.all_dates as missing_period, MAX(b.period) AS prior_available_week, (SELECT period_count FROM existing_periods WHERE period = MAX(b.period) ) AS prior_available_week_count, MIN(c.period) AS next_available_week, (SELECT period_count FROM existing_periods WHERE period = MIN(c.period) ) AS next_available_week_count FROM daterange d LEFT JOIN existing_periods a ON d.all_dates = a.period LEFT JOIN existing_periods b ON b.period < d.all_dates LEFT JOIN existing_periods c ON c.period > d.all_dates WHERE a.period IS NULL AND d.all_dates BETWEEN b.period AND c.period GROUP BY d.all_dates ORDER BY d.all_dates;'),
|
|
519
|
+
('1113', '1040', 'Test Results', 'Variability_Increase', 'postgresql', NULL, 'SELECT STDDEV(CAST("{COLUMN_NAME}" AS FLOAT)) as current_standard_deviation FROM {TARGET_SCHEMA}.{TABLE_NAME};'),
|
|
520
|
+
('1114', '1041', 'Test Results', 'Variability_Decrease', 'postgresql', NULL, 'SELECT STDDEV(CAST("{COLUMN_NAME}" AS FLOAT)) as current_standard_deviation FROM {TARGET_SCHEMA}.{TABLE_NAME};'),
|
|
521
|
+
|
|
522
|
+
('1115', '1001', 'Profile Anomaly' , 'Suggested_Type', 'mssql', NULL, 'SELECT TOP 20 "{COLUMN_NAME}", COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC;'),
|
|
523
|
+
('1116', '1002', 'Profile Anomaly' , 'Non_Standard_Blanks', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE CASE WHEN "{COLUMN_NAME}" IN (''.'', ''?'') OR "{COLUMN_NAME}" LIKE '' '' THEN 1 WHEN LEN("{COLUMN_NAME}") > 1 AND ( LOWER("{COLUMN_NAME}") LIKE ''%..%'' OR LOWER("{COLUMN_NAME}") LIKE ''%--%'' OR (LEN(REPLACE("{COLUMN_NAME}", ''0'', ''''))= 0 ) OR (LEN(REPLACE("{COLUMN_NAME}", ''9'', ''''))= 0 ) OR (LEN(REPLACE(LOWER("{COLUMN_NAME}"), ''x'', ''''))= 0 ) OR (LEN(REPLACE(LOWER("{COLUMN_NAME}"), ''z'', ''''))= 0 ) ) THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''blank'',''error'',''missing'',''tbd'', ''n/a'',''#na'',''none'',''null'',''unknown'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''(blank)'',''(error)'',''(missing)'',''(tbd)'', ''(n/a)'',''(#na)'',''(none)'',''(null)'',''(unknown)'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''[blank]'',''[error]'',''[missing]'',''[tbd]'', ''[n/a]'',''[#na]'',''[none]'',''[null]'',''[unknown]'') THEN 1 WHEN "{COLUMN_NAME}" = '''' THEN 1 WHEN "{COLUMN_NAME}" IS NULL THEN 1 ELSE 0 END = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";'),
|
|
524
|
+
('1117', '1003', 'Profile Anomaly' , 'Invalid_Zip_USA', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE("{COLUMN_NAME}",''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";'),
|
|
525
|
+
('1118', '1004', 'Profile Anomaly' , 'Multiple_Types_Minor', 'mssql', NULL, 'SELECT TOP 500 column_name, columns.table_name, CASE WHEN data_type = ''datetime'' THEN ''datetime'' WHEN data_type = ''datetime2'' THEN ''datetime'' WHEN data_type = ''varchar'' THEN ''varchar('' + CAST(character_maximum_length AS VARCHAR) + '')'' WHEN data_type = ''char'' THEN ''char('' + CAST(character_maximum_length AS VARCHAR) + '')'' WHEN data_type = ''numeric'' THEN ''numeric('' + CAST(numeric_precision AS VARCHAR) + '','' + CAST(numeric_scale AS VARCHAR) + '')'' ELSE data_type END AS data_type FROM information_schema.columns JOIN information_schema.tables ON columns.table_name = tables.table_name AND columns.table_schema = tables.table_schema WHERE columns.table_schema = ''{TARGET_SCHEMA}'' AND columns.column_name = ''{COLUMN_NAME}'' AND tables.table_type = ''BASE TABLE'' ORDER BY data_type, table_name;'),
|
|
526
|
+
('1119', '1005', 'Profile Anomaly' , 'Multiple_Types_Major', 'mssql', NULL, 'SELECT TOP 500 column_name, columns.table_name, CASE WHEN data_type = ''datetime'' THEN ''datetime'' WHEN data_type = ''datetime2'' THEN ''datetime'' WHEN data_type = ''varchar'' THEN ''varchar('' + CAST(character_maximum_length AS VARCHAR) + '')'' WHEN data_type = ''char'' THEN ''char('' + CAST(character_maximum_length AS VARCHAR) + '')'' WHEN data_type = ''numeric'' THEN ''numeric('' + CAST(numeric_precision AS VARCHAR) + '','' + CAST(numeric_scale AS VARCHAR) + '')'' ELSE data_type END AS data_type FROM information_schema.columns JOIN information_schema.tables ON columns.table_name = tables.table_name AND columns.table_schema = tables.table_schema WHERE columns.table_schema = ''{TARGET_SCHEMA}'' AND columns.column_name = ''{COLUMN_NAME}'' AND tables.table_type = ''BASE TABLE'' ORDER BY data_type, table_name;'),
|
|
527
|
+
('1120', '1006', 'Profile Anomaly' , 'No_Values', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
|
|
528
|
+
('1121', '1007', 'Profile Anomaly' , 'Column_Pattern_Mismatch', 'mssql', NULL, 'WITH cte AS ( SELECT TRIM(value) AS top_pattern, ROW_NUMBER() OVER (ORDER BY CHARINDEX(''| ''+ TRIM(value) + '' |'', ''| '' + ''{DETAIL_EXPRESSION}'' + '' |'' ) ASC) as row_num FROM STRING_SPLIT(''{DETAIL_EXPRESSION}'', ''|'') ) SELECT DISTINCT TOP 5 c.top_pattern, a."{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} a, cte c WHERE c.row_num = 4 AND TRANSLATE(a."{COLUMN_NAME}" COLLATE Latin1_General_BIN, ''abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'', ''aaaaaaaaaaaaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAAAAAANNNNNNNNNN'') = c.top_pattern GROUP BY c.top_pattern, a."{COLUMN_NAME}" UNION ALL SELECT DISTINCT TOP 5 c.top_pattern, a."{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} a, cte c WHERE c.row_num = 6 AND TRANSLATE(a."{COLUMN_NAME}" COLLATE Latin1_General_BIN, ''abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'', ''aaaaaaaaaaaaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAAAAAANNNNNNNNNN'') = c.top_pattern GROUP BY c.top_pattern, a."{COLUMN_NAME}" UNION ALL SELECT DISTINCT TOP 5 c.top_pattern, a."{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} a, cte c WHERE c.row_num = 8 AND TRANSLATE(a."{COLUMN_NAME}" COLLATE Latin1_General_BIN, ''abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'', ''aaaaaaaaaaaaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAAAAAANNNNNNNNNN'') = c.top_pattern GROUP BY c.top_pattern, a."{COLUMN_NAME}" UNION ALL SELECT DISTINCT TOP 5 c.top_pattern, a."{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} a, cte c WHERE c.row_num = 10 AND TRANSLATE(a."{COLUMN_NAME}" COLLATE Latin1_General_BIN, ''abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'', ''aaaaaaaaaaaaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAAAAAANNNNNNNNNN'') = c.top_pattern GROUP BY c.top_pattern, a."{COLUMN_NAME}" ORDER BY top_pattern DESC, count DESC;' ),
|
|
529
|
+
('1122', '1008', 'Profile Anomaly' , 'Table_Pattern_Mismatch', 'mssql', NULL, 'SELECT TOP 500 column_name, columns.table_name FROM information_schema.columns JOIN information_schema.tables ON columns.table_name = tables.table_name AND columns.table_schema = tables.table_schema WHERE columns.table_schema = ''{TARGET_SCHEMA}'' AND columns.column_name = ''{COLUMN_NAME}'' AND UPPER(tables.table_type) = ''BASE TABLE'' ORDER BY table_name;' ),
|
|
530
|
+
('1123', '1009', 'Profile Anomaly' , 'Leading_Spaces', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" BETWEEN '' !'' AND ''!'' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
|
|
531
|
+
('1124', '1010', 'Profile Anomaly' , 'Quoted_Values', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" LIKE ''"%"'' OR "{COLUMN_NAME}" LIKE ''''''%'''''' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
|
|
532
|
+
('1125', '1011', 'Profile Anomaly' , 'Char_Column_Number_Values', 'mssql', NULL, 'SELECT A.* FROM ( SELECT DISTINCT TOP 10 ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS A UNION ALL SELECT B.* FROM ( SELECT DISTINCT TOP 10 ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS B ORDER BY data_type, count DESC;' ),
|
|
533
|
+
('1126', '1012', 'Profile Anomaly' , 'Char_Column_Date_Values', 'mssql', NULL, 'SELECT A.* FROM ( SELECT DISTINCT TOP 10 ''Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isdate("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM ( SELECT DISTINCT TOP 10 ''Non-Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isdate("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS B ORDER BY data_type, count DESC;' ),
|
|
534
|
+
('1127', '1013', 'Profile Anomaly' , 'Small Missing Value Ct', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" IN (''.'', ''?'', '' '') THEN 1 WHEN LEN("{COLUMN_NAME}") > 1 AND ( LOWER("{COLUMN_NAME}") LIKE ''%..%'' OR LOWER("{COLUMN_NAME}") LIKE ''%--%'' OR (LEN(REPLACE("{COLUMN_NAME}", ''0'', ''''))= 0 ) OR (LEN(REPLACE("{COLUMN_NAME}", ''9'', ''''))= 0 ) OR (LEN(REPLACE(LOWER("{COLUMN_NAME}"), ''x'', ''''))= 0 ) OR (LEN(REPLACE(LOWER("{COLUMN_NAME}"), ''z'', ''''))= 0 ) ) THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''blank'',''error'',''missing'',''tbd'', ''n/a'',''#na'',''none'',''null'',''unknown'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''(blank)'',''(error)'',''(missing)'',''(tbd)'', ''(n/a)'',''(#na)'',''(none)'',''(null)'',''(unknown)'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''[blank]'',''[error]'',''[missing]'',''[tbd]'', ''[n/a]'',''[#na]'',''[none]'',''[null]'',''[unknown]'') THEN 1 WHEN "{COLUMN_NAME}" = '''' THEN 1 WHEN "{COLUMN_NAME}" IS NULL THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
|
|
535
|
+
('1128', '1014', 'Profile Anomaly' , 'Small Divergent Value Ct', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC;' ),
|
|
536
|
+
('1129', '1015', 'Profile Anomaly' , 'Boolean_Value_Mismatch', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC;' ),
|
|
537
|
+
('1130', '1016', 'Profile Anomaly' , 'Potential_Duplicates', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" HAVING COUNT(*)> 1 ORDER BY COUNT(*) DESC;' ),
|
|
538
|
+
('1131', '1017', 'Profile Anomaly' , 'Standardized_Value_Matches', 'mssql', NULL, 'WITH CTE AS ( SELECT DISTINCT TOP 500 UPPER(REPLACE(TRANSLATE("{COLUMN_NAME}",'' '''''''',.-'',REPLICATE('' '', LEN('' '''''''',.-''))),'' '','''')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") as distinct_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY UPPER(REPLACE(TRANSLATE("{COLUMN_NAME}",'' '''''''',.-'',REPLICATE('' '', LEN('' '''''''',.-''))),'' '','''')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", possible_standard_value, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} a, cte b WHERE UPPER(REPLACE(TRANSLATE("{COLUMN_NAME}",'' '''''''',.-'',REPLICATE('' '', LEN('' '''''''',.-''))),'' '','''')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}", possible_standard_value ORDER BY possible_standard_value ASC, count DESC;' ),
|
|
539
|
+
('1132', '1018', 'Profile Anomaly' , 'Unlikely_Date_Values', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", CAST( ''{PROFILE_RUN_DATE}'' AS DATE) AS profile_run_date, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} a WHERE ("{COLUMN_NAME}" < CAST(''1900-01-01'' AS DATE) ) OR ("{COLUMN_NAME}" > DATEADD(YEAR, 30, CAST(''{PROFILE_RUN_DATE}'' AS DATE ))) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;' ),
|
|
540
|
+
('1133', '1019', 'Profile Anomaly' , 'Recency_One_Year', 'mssql', NULL, 'created_in_ui' ),
|
|
541
|
+
('1134', '1020', 'Profile Anomaly' , 'Recency_Six_Months', 'mssql', NULL, 'created_in_ui' ),
|
|
542
|
+
('1135', '1021', 'Profile Anomaly' , 'Unexpected US States', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;' ),
|
|
543
|
+
('1136', '1022', 'Profile Anomaly' , 'Unexpected Emails', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;' ),
|
|
544
|
+
('1137', '1023', 'Profile Anomaly' , 'Small_Numeric_Value_Ct', 'mssql', NULL, 'SELECT A.* FROM ( SELECT DISTINCT TOP 10 ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS A UNION ALL SELECT B.* FROM ( SELECT DISTINCT TOP 10 ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS B ORDER BY data_type, count DESC;' ),
|
|
545
|
+
('1138', '1024', 'Profile Anomaly' , 'Invalid_Zip3_USA', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE("{COLUMN_NAME}",''012345678'',''999999999'') <> ''999'' GROUP BY "{COLUMN_NAME}" ORDER BY count DESC, "{COLUMN_NAME}";'),
|
|
546
|
+
('1139', '1025', 'Profile Anomaly' , 'Delimited_Data_Embedded', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE ( "{COLUMN_NAME}" LIKE ''%,%,%,%'' OR "{COLUMN_NAME}" LIKE ''%|%|%|%'' OR "{COLUMN_NAME}" LIKE ''%^%^%^%'' OR "{COLUMN_NAME}" LIKE ''%'' + CHAR(9) + ''%'' + CHAR(9) + ''%'' + CHAR(9) + ''%'' ) AND NOT ( "{COLUMN_NAME}" LIKE ''% and %'' OR "{COLUMN_NAME}" LIKE ''% but %'' OR "{COLUMN_NAME}" LIKE ''% or %'' OR "{COLUMN_NAME}" LIKE ''% yet %'' ) AND ISNULL(CAST(LEN("{COLUMN_NAME}") - LEN(REPLACE("{COLUMN_NAME}", '','', '''')) as FLOAT) / CAST(NULLIF(LEN("{COLUMN_NAME}") - LEN(REPLACE("{COLUMN_NAME}", '' '', '''')), 0) as FLOAT), 1) > 0.6 GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC;' ),
|
|
547
|
+
|
|
548
|
+
('1140', '1004', 'Test Results', 'Alpha_Trunc', 'mssql', NULL, 'SELECT DISTINCT TOP 500 "{COLUMN_NAME}", LEN("{COLUMN_NAME}") as current_max_length, {THRESHOLD_VALUE} as previous_max_length FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT MAX(LEN("{COLUMN_NAME}")) as max_length FROM {TARGET_SCHEMA}.{TABLE_NAME}) a WHERE LEN("{COLUMN_NAME}") = a.max_length AND a.max_length < {THRESHOLD_VALUE} ;'),
|
|
549
|
+
('1141', '1005', 'Test Results', 'Avg_Shift', 'mssql', NULL, 'SELECT AVG(CAST("{COLUMN_NAME}" AS FLOAT)) AS current_average FROM {TARGET_SCHEMA}.{TABLE_NAME};'),
|
|
550
|
+
('1142', '1006', 'Test Results', 'Condition_Flag', 'mssql', NULL, 'SELECT TOP 500 * FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {CUSTOM_QUERY};'),
|
|
551
|
+
('1143', '1007', 'Test Results', 'Constant', 'mssql', NULL, 'SELECT DISTINCT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" <> {BASELINE_VALUE} GROUP BY "{COLUMN_NAME}";'),
|
|
552
|
+
('1144', '1009', 'Test Results', 'Daily_Record_Ct', 'mssql', NULL, 'WITH
|
|
553
|
+
Pass0 as (select 1 as C union all select 1), --2 rows
|
|
554
|
+
Pass1 as (select 1 as C from Pass0 as A, Pass0 as B),--4 rows
|
|
555
|
+
Pass2 as (select 1 as C from Pass1 as A, Pass1 as B),--16 rows
|
|
556
|
+
Pass3 as (select 1 as C from Pass2 as A, Pass2 as B),--256 rows
|
|
557
|
+
Pass4 as (select 1 as C from Pass3 as A, Pass3 as B),--65536 rows
|
|
558
|
+
All_Nums as (select row_number() over(order by C) as Number from Pass4),
|
|
559
|
+
tally as (SELECT Number FROM All_Nums WHERE Number <= 45000),
|
|
560
|
+
|
|
561
|
+
date_range as (SELECT CAST(DATEADD(DAY, DATEDIFF(DAY, 0, MIN("{COLUMN_NAME}")), 0) AS DATE) AS min_period,
|
|
562
|
+
CAST(DATEADD(DAY, DATEDIFF(DAY, 0, MAX("{COLUMN_NAME}")), 0) AS DATE) AS max_period,
|
|
563
|
+
DATEDIFF(DAY,
|
|
564
|
+
CAST(DATEADD(DAY, DATEDIFF(DAY, 0, MIN("{COLUMN_NAME}")), 0) AS DATE),
|
|
565
|
+
CAST(DATEADD(DAY, DATEDIFF(DAY, 0, MAX("{COLUMN_NAME}")), 0) AS DATE) ) + 1 as period_ct
|
|
566
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME} ),
|
|
567
|
+
check_periods as ( SELECT d.min_period, d.max_period, t.number,
|
|
568
|
+
DATEADD(DAY, -(t.number - 1), d.max_period) AS check_period
|
|
569
|
+
FROM date_range d
|
|
570
|
+
INNER JOIN tally t
|
|
571
|
+
ON (d.period_ct >= t.number) ),
|
|
572
|
+
data_by_period as (SELECT CAST(DATEADD(DAY, DATEDIFF(DAY, 0, "{COLUMN_NAME}"), 0) AS DATE) as data_period, COUNT(*) as record_ct
|
|
573
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
574
|
+
GROUP BY CAST(DATEADD(DAY, DATEDIFF(DAY, 0, "{COLUMN_NAME}"), 0) AS DATE) ),
|
|
575
|
+
data_by_prd_with_prior_next as (SELECT check_period,
|
|
576
|
+
RANK() OVER (ORDER BY check_period DESC) as ranked,
|
|
577
|
+
ISNULL(d.record_ct, 0) as record_ct,
|
|
578
|
+
ISNULL(LAG(d.record_ct) OVER (ORDER BY check_period), 0) as last_record_ct,
|
|
579
|
+
ISNULL(LEAD(d.record_ct) OVER (ORDER BY check_period), 0) as next_record_ct
|
|
580
|
+
FROM check_periods c
|
|
581
|
+
LEFT JOIN data_by_period d
|
|
582
|
+
ON (c.check_period = d.data_period) )
|
|
583
|
+
SELECT check_period, record_ct,
|
|
584
|
+
CASE
|
|
585
|
+
WHEN record_ct = 0 THEN ''MISSING''
|
|
586
|
+
ELSE ''Present''
|
|
587
|
+
END as status
|
|
588
|
+
FROM data_by_prd_with_prior_next
|
|
589
|
+
WHERE record_ct = 0
|
|
590
|
+
OR last_record_ct = 0
|
|
591
|
+
OR next_record_ct = 0
|
|
592
|
+
ORDER BY check_period DESC;'),
|
|
593
|
+
('1145', '1011', 'Test Results', 'Dec_Trunc', 'mssql', NULL, 'WITH CTE AS ( SELECT LEN(SUBSTRING(CAST(ABS("{COLUMN_NAME}") % 1 AS VARCHAR) , 3, LEN("{COLUMN_NAME}"))) AS decimal_scale FROM {TARGET_SCHEMA}.{TABLE_NAME} ) SELECT DISTINCT TOP 500 decimal_scale,COUNT(*) AS count FROM cte GROUP BY decimal_scale ORDER BY COUNT(*) DESC; '),
|
|
594
|
+
('1146', '1012', 'Test Results', 'Distinct_Date_Ct', 'mssql', NULL, 'SELECT DISTINCT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" IS NOT NULL GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;'),
|
|
595
|
+
('1147', '1013', 'Test Results', 'Distinct_Value_Ct', 'mssql', NULL, 'SELECT DISTINCT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" IS NOT NULL GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;'),
|
|
596
|
+
('1148', '1014', 'Test Results', 'Email_Format', 'mssql', NULL, 'SELECT DISTINCT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" NOT LIKE ''%[_a-zA-Z0-9.-]%@%[a-zA-Z0-9.-]%.[a-zA-Z][a-zA-Z]%'' GROUP BY "{COLUMN_NAME}";'),
|
|
597
|
+
('1149', '1015', 'Test Results', 'Future_Date', 'mssql', NULL, 'SELECT DISTINCT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE CAST("{COLUMN_NAME}" AS DATE) >= CONVERT(DATE, ''{TEST_DATE}'') GROUP BY "{COLUMN_NAME}";'),
|
|
598
|
+
('1150', '1016', 'Test Results', 'Future_Date_1Y', 'mssql', NULL, 'SELECT DISTINCT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE CAST("{COLUMN_NAME}" AS DATE) >= DATEADD(DAY, 365, CONVERT(DATE, ''{TEST_DATE}'')) GROUP BY "{COLUMN_NAME}";'),
|
|
599
|
+
('1151', '1017', 'Test Results', 'Incr_Avg_Shift', 'mssql', NULL, 'SELECT AVG(CAST("{COLUMN_NAME}" AS FLOAT)) AS current_average, SUM(CAST("{COLUMN_NAME}" AS FLOAT)) AS current_sum, NULLIF(CAST(COUNT("{COLUMN_NAME}") AS FLOAT), 0) as current_value_count FROM {TARGET_SCHEMA}.{TABLE_NAME};'),
|
|
600
|
+
('1152', '1018', 'Test Results', 'LOV_All', 'mssql', NULL, 'WITH CTE AS (SELECT DISTINCT "{COLUMN_NAME}" FROM {TARGET_SCHEMA}.{TABLE_NAME}) SELECT STRING_AGG( "{COLUMN_NAME}", ''|'' ) WITHIN GROUP (ORDER BY "{COLUMN_NAME}" ASC) FROM CTE HAVING STRING_AGG("{COLUMN_NAME}", ''|'') WITHIN GROUP (ORDER BY "{COLUMN_NAME}" ASC) <> ''{THRESHOLD_VALUE}'';'),
|
|
601
|
+
('1153', '1019', 'Test Results', 'LOV_Match', 'mssql', NULL, 'SELECT DISTINCT TOP 500 NULLIF("{COLUMN_NAME}", '''') AS "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE NULLIF("{COLUMN_NAME}", '''') NOT IN {BASELINE_VALUE} GROUP BY "{COLUMN_NAME}" ;'),
|
|
602
|
+
('1154', '1020', 'Test Results', 'Min_Date', 'mssql', NULL, 'SELECT DISTINCT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE CAST("{COLUMN_NAME}" AS DATE) < CAST(''{BASELINE_VALUE}'' AS DATE) GROUP BY "{COLUMN_NAME}";'),
|
|
603
|
+
('1155', '1021', 'Test Results', 'Min_Val', 'mssql', NULL, 'SELECT DISTINCT TOP 500 "{COLUMN_NAME}", (ABS("{COLUMN_NAME}") - ABS({BASELINE_VALUE})) AS difference_from_baseline FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" < {BASELINE_VALUE};'),
|
|
604
|
+
('1156', '1022', 'Test Results', 'Missing_Pct', 'mssql', NULL, 'SELECT TOP 10 * FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" IS NULL OR CAST("{COLUMN_NAME}" AS VARCHAR(255)) = '''';'),
|
|
605
|
+
('1157', '1023', 'Test Results', 'Monthly_Rec_Ct', 'mssql', NULL, 'WITH
|
|
606
|
+
Pass0 as (select 1 as C union all select 1), --2 rows
|
|
607
|
+
Pass1 as (select 1 as C from Pass0 as A, Pass0 as B),--4 rows
|
|
608
|
+
Pass2 as (select 1 as C from Pass1 as A, Pass1 as B),--16 rows
|
|
609
|
+
Pass3 as (select 1 as C from Pass2 as A, Pass2 as B),--256 rows
|
|
610
|
+
Pass4 as (select 1 as C from Pass3 as A, Pass3 as B),--65536 rows
|
|
611
|
+
All_Nums as (select row_number() over(order by C) as Number from Pass4),
|
|
612
|
+
tally as (SELECT Number FROM All_Nums WHERE Number <= 45000),
|
|
613
|
+
|
|
614
|
+
date_range as (SELECT CAST(DATEADD(MONTH, DATEDIFF(MONTH, 0, MIN("{COLUMN_NAME}")), 0) AS DATE) AS min_period,
|
|
615
|
+
CAST(DATEADD(MONTH, DATEDIFF(MONTH, 0, MAX("{COLUMN_NAME}")), 0) AS DATE) AS max_period,
|
|
616
|
+
DATEDIFF(MONTH,
|
|
617
|
+
CAST(DATEADD(MONTH, DATEDIFF(MONTH, 0, MIN("{COLUMN_NAME}")), 0) AS DATE),
|
|
618
|
+
CAST(DATEADD(MONTH, DATEDIFF(MONTH, 0, MAX("{COLUMN_NAME}")), 0) AS DATE) ) + 1 as period_ct
|
|
619
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME} ),
|
|
620
|
+
check_periods as ( SELECT d.min_period, d.max_period, t.number,
|
|
621
|
+
DATEADD(MONTH, -(t.number - 1), d.max_period) AS check_period
|
|
622
|
+
FROM date_range d
|
|
623
|
+
INNER JOIN tally t
|
|
624
|
+
ON (d.period_ct >= t.number) ),
|
|
625
|
+
data_by_period as (SELECT CAST(DATEADD(MONTH, DATEDIFF(MONTH, 0, "{COLUMN_NAME}"), 0) AS DATE) as data_period, COUNT(*) as record_ct
|
|
626
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
627
|
+
GROUP BY CAST(DATEADD(MONTH, DATEDIFF(MONTH, 0, "{COLUMN_NAME}"), 0) AS DATE) ),
|
|
628
|
+
data_by_prd_with_prior_next as (SELECT check_period,
|
|
629
|
+
RANK() OVER (ORDER BY check_period DESC) as ranked,
|
|
630
|
+
ISNULL(d.record_ct, 0) as record_ct,
|
|
631
|
+
ISNULL(LAG(d.record_ct) OVER (ORDER BY check_period), 0) as last_record_ct,
|
|
632
|
+
ISNULL(LEAD(d.record_ct) OVER (ORDER BY check_period), 0) as next_record_ct
|
|
633
|
+
FROM check_periods c
|
|
634
|
+
LEFT JOIN data_by_period d
|
|
635
|
+
ON (c.check_period = d.data_period) )
|
|
636
|
+
SELECT check_period, record_ct,
|
|
637
|
+
CASE
|
|
638
|
+
WHEN record_ct = 0 THEN ''MISSING''
|
|
639
|
+
ELSE ''Present''
|
|
640
|
+
END as status
|
|
641
|
+
FROM data_by_prd_with_prior_next
|
|
642
|
+
WHERE record_ct = 0
|
|
643
|
+
OR last_record_ct = 0
|
|
644
|
+
OR next_record_ct = 0
|
|
645
|
+
ORDER BY check_period DESC;'),
|
|
646
|
+
('1158', '1024', 'Test Results', 'Outlier_Pct_Above', 'mssql', NULL, 'SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE CAST("{COLUMN_NAME}" AS FLOAT) > ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;'),
|
|
647
|
+
('1159', '1025', 'Test Results', 'Outlier_Pct_Below', 'mssql', NULL, 'SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE CAST("{COLUMN_NAME}" AS FLOAT) < ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;'),
|
|
648
|
+
('1160', '1026', 'Test Results', 'Pattern_Match', 'mssql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE NULLIF("{COLUMN_NAME}", '''') NOT LIKE ''{BASELINE_VALUE}'' GROUP BY "{COLUMN_NAME}";'),
|
|
649
|
+
('1161', '1028', 'Test Results', 'Recency', 'mssql', NULL, 'SELECT DISTINCT col AS latest_date_available, CAST(''{TEST_DATE}'' AS DATE) AS test_run_date FROM (SELECT MAX("{COLUMN_NAME}") AS col FROM {TARGET_SCHEMA}.{TABLE_NAME}) a WHERE DATEDIFF(day, col, CAST(''{TEST_DATE}'' AS DATE)) > {THRESHOLD_VALUE};'),
|
|
650
|
+
('1162', '1030', 'Test Results', 'Required', 'mssql', NULL, 'SELECT TOP 500 * FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" IS NULL;'),
|
|
651
|
+
('1163', '1031', 'Test Results', 'Row_Ct', 'mssql', NULL, 'WITH CTE AS (SELECT COUNT(*) AS current_count FROM {TARGET_SCHEMA}.{TABLE_NAME}) SELECT current_count, ABS(ROUND(CAST(100 * (current_count - {THRESHOLD_VALUE}) AS NUMERIC) / CAST({THRESHOLD_VALUE} AS NUMERIC) ,2)) AS row_count_pct_decrease FROM cte WHERE current_count < {THRESHOLD_VALUE};'),
|
|
652
|
+
('1164', '1032', 'Test Results', 'Row_Ct_Pct', 'mssql', NULL, 'WITH CTE AS (SELECT COUNT(*) AS current_count FROM {TARGET_SCHEMA}.{TABLE_NAME}) SELECT current_count, {BASELINE_CT} AS baseline_count, ABS(ROUND(CAST(100 * (current_count - {BASELINE_CT}) AS NUMERIC) / CAST({BASELINE_CT} AS NUMERIC) ,2)) AS row_count_pct_difference FROM cte;'),
|
|
653
|
+
('1165', '1033', 'Test Results', 'Street_Addr_Pattern', 'mssql', NULL, 'SELECT DISTINCT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE UPPER("{COLUMN_NAME}") NOT LIKE ''[1-9]% [A-Z]% %'' AND CHARINDEX('' '', "{COLUMN_NAME}") NOT BETWEEN 2 AND 6 GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC;'),
|
|
654
|
+
('1166', '1036', 'Test Results', 'US_State', 'mssql', NULL, 'SELECT DISTINCT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE NULLIF("{COLUMN_NAME}", '''') NOT IN (''AL'',''AK'',''AS'',''AZ'',''AR'',''CA'',''CO'',''CT'',''DE'',''DC'',''FM'',''FL'',''GA'',''GU'',''HI'',''ID'',''IL'',''IN'',''IA'',''KS'',''KY'',''LA'',''ME'',''MH'',''MD'',''MA'',''MI'',''MN'',''MS'',''MO'',''MT'',''NE'',''NV'',''NH'',''NJ'',''NM'',''NY'',''NC'',''ND'',''MP'',''OH'',''OK'',''OR'',''PW'',''PA'',''PR'',''RI'',''SC'',''SD'',''TN'',''TX'',''UT'',''VT'',''VI'',''VA'',''WA'',''WV'',''WI'',''WY'',''AE'',''AP'',''AA'') GROUP BY "{COLUMN_NAME}";'),
|
|
655
|
+
('1167', '1034', 'Test Results', 'Unique', 'mssql', NULL, 'SELECT DISTINCT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" HAVING COUNT(*) > 1 ORDER BY COUNT(*) DESC;'),
|
|
656
|
+
('1168', '1035', 'Test Results', 'Unique_Pct', 'mssql', NULL, 'SELECT DISTINCT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC;'),
|
|
657
|
+
('1169', '1037', 'Test Results', 'Weekly_Rec_Ct', 'mssql', NULL, 'WITH
|
|
658
|
+
Pass0 as (select 1 as C union all select 1), --2 rows
|
|
659
|
+
Pass1 as (select 1 as C from Pass0 as A, Pass0 as B),--4 rows
|
|
660
|
+
Pass2 as (select 1 as C from Pass1 as A, Pass1 as B),--16 rows
|
|
661
|
+
Pass3 as (select 1 as C from Pass2 as A, Pass2 as B),--256 rows
|
|
662
|
+
Pass4 as (select 1 as C from Pass3 as A, Pass3 as B),--65536 rows
|
|
663
|
+
All_Nums as (select row_number() over(order by C) as Number from Pass4),
|
|
664
|
+
tally as (SELECT Number FROM All_Nums WHERE Number <= 45000),
|
|
665
|
+
|
|
666
|
+
date_range as (SELECT CAST(DATEADD(WEEK, DATEDIFF(WEEK, 0, MIN("{COLUMN_NAME}")), 0) AS DATE) AS min_period,
|
|
667
|
+
CAST(DATEADD(WEEK, DATEDIFF(WEEK, 0, MAX("{COLUMN_NAME}")), 0) AS DATE) AS max_period,
|
|
668
|
+
DATEDIFF(WEEK,
|
|
669
|
+
CAST(DATEADD(WEEK, DATEDIFF(WEEK, 0, MIN("{COLUMN_NAME}")), 0) AS DATE),
|
|
670
|
+
CAST(DATEADD(WEEK, DATEDIFF(WEEK, 0, MAX("{COLUMN_NAME}")), 0) AS DATE) ) + 1 as period_ct
|
|
671
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME} ),
|
|
672
|
+
check_periods as ( SELECT d.min_period, d.max_period, t.number,
|
|
673
|
+
DATEADD(WEEK, -(t.number - 1), d.max_period) AS check_period
|
|
674
|
+
FROM date_range d
|
|
675
|
+
INNER JOIN tally t
|
|
676
|
+
ON (d.period_ct >= t.number) ),
|
|
677
|
+
data_by_period as (SELECT CAST(DATEADD(WEEK, DATEDIFF(WEEK, 0, "{COLUMN_NAME}"), 0) AS DATE) as data_period, COUNT(*) as record_ct
|
|
678
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
679
|
+
GROUP BY CAST(DATEADD(WEEK, DATEDIFF(WEEK, 0, "{COLUMN_NAME}"), 0) AS DATE) ),
|
|
680
|
+
data_by_prd_with_prior_next as (SELECT check_period,
|
|
681
|
+
RANK() OVER (ORDER BY check_period DESC) as ranked,
|
|
682
|
+
ISNULL(d.record_ct, 0) as record_ct,
|
|
683
|
+
ISNULL(LAG(d.record_ct) OVER (ORDER BY check_period), 0) as last_record_ct,
|
|
684
|
+
ISNULL(LEAD(d.record_ct) OVER (ORDER BY check_period), 0) as next_record_ct
|
|
685
|
+
FROM check_periods c
|
|
686
|
+
LEFT JOIN data_by_period d
|
|
687
|
+
ON (c.check_period = d.data_period) )
|
|
688
|
+
SELECT check_period, record_ct,
|
|
689
|
+
CASE
|
|
690
|
+
WHEN record_ct = 0 THEN ''MISSING''
|
|
691
|
+
ELSE ''Present''
|
|
692
|
+
END as status
|
|
693
|
+
FROM data_by_prd_with_prior_next
|
|
694
|
+
WHERE record_ct = 0
|
|
695
|
+
OR last_record_ct = 0
|
|
696
|
+
OR next_record_ct = 0
|
|
697
|
+
ORDER BY check_period DESC;'),
|
|
698
|
+
('1170', '1040', 'Test Results', 'Variability_Increase', 'mssql', NULL, 'SELECT STDEV(CAST("{COLUMN_NAME}" AS FLOAT)) as current_standard_deviation FROM {TARGET_SCHEMA}.{TABLE_NAME};'),
|
|
699
|
+
('1171', '1041', 'Test Results', 'Variability_Decrease', 'mssql', NULL, 'SELECT STDEV(CAST("{COLUMN_NAME}" AS FLOAT)) as current_standard_deviation FROM {TARGET_SCHEMA}.{TABLE_NAME};'),
|
|
700
|
+
|
|
701
|
+
('1172', '1001', 'Profile Anomaly' , 'Suggested_Type', 'snowflake', NULL, 'SELECT TOP 20 "{COLUMN_NAME}", COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC;'),
|
|
702
|
+
('1173', '1002', 'Profile Anomaly' , 'Non_Standard_Blanks', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE CASE WHEN "{COLUMN_NAME}" IN (''.'', ''?'', '' '') THEN 1 WHEN LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP ''-{2,}'' OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP ''0{2,}'' OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP ''9{2,}'' OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP ''x{2,}'' OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP ''z{2,}'' THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''blank'',''error'',''missing'',''tbd'', ''n/a'',''#na'',''none'',''null'',''unknown'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''(blank)'',''(error)'',''(missing)'',''(tbd)'', ''(n/a)'',''(#na)'',''(none)'',''(null)'',''(unknown)'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''[blank]'',''[error]'',''[missing]'',''[tbd]'', ''[n/a]'',''[#na]'',''[none]'',''[null]'',''[unknown]'') THEN 1 WHEN "{COLUMN_NAME}" = '''' THEN 1 WHEN "{COLUMN_NAME}" IS NULL THEN 1 ELSE 0 END = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";'),
|
|
703
|
+
('1174', '1003', 'Profile Anomaly' , 'Invalid_Zip_USA', 'snowflake', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE("{COLUMN_NAME}",''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
704
|
+
('1175', '1004', 'Profile Anomaly' , 'Multiple_Types_Minor', 'snowflake', NULL, 'SELECT DISTINCT column_name, columns.table_name, CASE WHEN data_type ILIKE ''timestamp%'' THEN lower(data_type) WHEN data_type ILIKE ''date'' THEN lower(data_type) WHEN data_type ILIKE ''boolean'' THEN ''boolean'' WHEN data_type = ''TEXT'' THEN ''varchar('' || CAST(character_maximum_length AS VARCHAR) || '')'' WHEN data_type ILIKE ''char%'' THEN ''char('' || CAST(character_maximum_length AS VARCHAR) || '')'' WHEN data_type = ''NUMBER'' AND numeric_precision = 38 AND numeric_scale = 0 THEN ''bigint'' WHEN data_type ILIKE ''num%'' THEN ''numeric('' || CAST(numeric_precision AS VARCHAR) || '','' || CAST(numeric_scale AS VARCHAR) || '')'' ELSE data_type END AS data_type FROM information_schema.columns JOIN information_schema.tables ON columns.table_name = tables.table_name AND columns.table_schema = tables.table_schema WHERE columns.table_schema = ''{TARGET_SCHEMA}'' AND columns.column_name = ''{COLUMN_NAME}'' AND tables.table_type = ''BASE TABLE'' ORDER BY data_type, table_name;'),
|
|
705
|
+
('1176', '1005', 'Profile Anomaly' , 'Multiple_Types_Major', 'snowflake', NULL, 'SELECT DISTINCT column_name, columns.table_name, CASE WHEN data_type ILIKE ''timestamp%'' THEN lower(data_type) WHEN data_type ILIKE ''date'' THEN lower(data_type) WHEN data_type ILIKE ''boolean'' THEN ''boolean'' WHEN data_type = ''TEXT'' THEN ''varchar('' || CAST(character_maximum_length AS VARCHAR) || '')'' WHEN data_type ILIKE ''char%'' THEN ''char('' || CAST(character_maximum_length AS VARCHAR) || '')'' WHEN data_type = ''NUMBER'' AND numeric_precision = 38 AND numeric_scale = 0 THEN ''bigint'' WHEN data_type ILIKE ''num%'' THEN ''numeric('' || CAST(numeric_precision AS VARCHAR) || '','' || CAST(numeric_scale AS VARCHAR) || '')'' ELSE data_type END AS data_type FROM information_schema.columns JOIN information_schema.tables ON columns.table_name = tables.table_name AND columns.table_schema = tables.table_schema WHERE columns.table_schema = ''{TARGET_SCHEMA}'' AND columns.column_name = ''{COLUMN_NAME}'' AND tables.table_type = ''BASE TABLE'' ORDER BY data_type, table_name;'),
|
|
706
|
+
('1177', '1006', 'Profile Anomaly' , 'No_Values', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
|
|
707
|
+
('1178', '1007', 'Profile Anomaly' , 'Column_Pattern_Mismatch', 'snowflake', NULL, 'SELECT A.* FROM (SELECT DISTINCT TOP 5 b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 4)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}"::VARCHAR, ''[a-z]'', ''a''), ''[A-Z]'', ''A''), ''[0-9]'', ''N'') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC) A UNION ALL SELECT B.* FROM (SELECT DISTINCT TOP 5 b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 6)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}"::VARCHAR, ''[a-z]'', ''a''), ''[A-Z]'', ''A''), ''[0-9]'', ''N'') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC) B UNION ALL SELECT C.* FROM (SELECT DISTINCT TOP 5 b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 8)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}"::VARCHAR, ''[a-z]'', ''a''), ''[A-Z]'', ''A''), ''[0-9]'', ''N'') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC) C UNION ALL SELECT D.* FROM (SELECT DISTINCT TOP 5 b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 10)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}"::VARCHAR, ''[a-z]'', ''a''), ''[A-Z]'', ''A''), ''[0-9]'', ''N'') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC) D ORDER BY top_pattern DESC, count DESC;' ),
|
|
708
|
+
('1179', '1008', 'Profile Anomaly' , 'Table_Pattern_Mismatch', 'snowflake', NULL, 'SELECT DISTINCT column_name, columns.table_name FROM information_schema.columns JOIN information_schema.tables ON columns.table_name = tables.table_name AND columns.table_schema = tables.table_schema WHERE columns.table_schema = ''{TARGET_SCHEMA}'' AND columns.column_name = ''{COLUMN_NAME}'' AND UPPER(tables.table_type) = ''BASE TABLE'' ORDER BY table_name; ' ),
|
|
709
|
+
('1180', '1009', 'Profile Anomaly' , 'Leading_Spaces', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" BETWEEN '' !'' AND ''!'' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
|
|
710
|
+
('1181', '1010', 'Profile Anomaly' , 'Quoted_Values', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" ILIKE ''"%"'' OR "{COLUMN_NAME}" ILIKE ''''''%'''''' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
|
|
711
|
+
('1182', '1011', 'Profile Anomaly' , 'Char_Column_Number_Values', 'snowflake', NULL, 'SELECT A.* FROM (SELECT DISTINCT TOP 10 ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT TOP 10 ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS B ORDER BY data_type, count DESC;' ),
|
|
712
|
+
('1183', '1012', 'Profile Anomaly' , 'Char_Column_Date_Values', 'snowflake', NULL, 'SELECT A.* FROM (SELECT DISTINCT TOP 10 ''Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isdate("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT TOP 10 ''Non-Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isdate("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS B ORDER BY data_type, count DESC;' ),
|
|
713
|
+
('1184', '1013', 'Profile Anomaly' , 'Small Missing Value Ct', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" IN (''.'', ''?'', '' '') THEN 1 WHEN LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP ''-{2,}'' OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP ''0{2,}'' OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP ''9{2,}'' OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP ''x{2,}'' OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP ''z{2,}'' THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''blank'',''error'',''missing'',''tbd'', ''n/a'',''#na'',''none'',''null'',''unknown'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''(blank)'',''(error)'',''(missing)'',''(tbd)'', ''(n/a)'',''(#na)'',''(none)'',''(null)'',''(unknown)'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''[blank]'',''[error]'',''[missing]'',''[tbd]'', ''[n/a]'',''[#na]'',''[none]'',''[null]'',''[unknown]'') THEN 1 WHEN "{COLUMN_NAME}" = '''' THEN 1 WHEN "{COLUMN_NAME}" IS NULL THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
|
|
714
|
+
('1185', '1014', 'Profile Anomaly' , 'Small Divergent Value Ct', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC;' ),
|
|
715
|
+
('1186', '1015', 'Profile Anomaly' , 'Boolean_Value_Mismatch', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC;' ),
|
|
716
|
+
('1187', '1016', 'Profile Anomaly' , 'Potential_Duplicates', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" HAVING COUNT(*)> 1 ORDER BY COUNT(*) DESC LIMIT 500;' ),
|
|
717
|
+
('1188', '1017', 'Profile Anomaly' , 'Standardized_Value_Matches', 'snowflake', NULL, 'WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", '' '''',.-'', '''')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", '' '''',.-'', '''')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", possible_standard_value, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", '' '''',.-'', '''')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}", possible_standard_value ORDER BY possible_standard_value ASC, count DESC LIMIT 500;' ),
|
|
718
|
+
('1189', '1018', 'Profile Anomaly' , 'Unlikely_Date_Values', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", ''{PROFILE_RUN_DATE}'' :: DATE AS profile_run_date, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} a WHERE ("{COLUMN_NAME}" < ''1900-01-01''::DATE) OR ("{COLUMN_NAME}" > ''{PROFILE_RUN_DATE}'' :: DATE + INTERVAL ''30 year'' ) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;' ),
|
|
719
|
+
('1190', '1019', 'Profile Anomaly' , 'Recency_One_Year', 'snowflake', NULL, 'created_in_ui' ),
|
|
720
|
+
('1191', '1020', 'Profile Anomaly' , 'Recency_Six_Months', 'snowflake', NULL, 'created_in_ui' ),
|
|
721
|
+
('1192', '1021', 'Profile Anomaly' , 'Unexpected US States', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;' ),
|
|
722
|
+
('1193', '1022', 'Profile Anomaly' , 'Unexpected Emails', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;' ),
|
|
723
|
+
('1194', '1023', 'Profile Anomaly' , 'Small_Numeric_Value_Ct', 'snowflake', NULL, 'SELECT A.* FROM (SELECT DISTINCT TOP 10 ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT TOP 10 ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS B ORDER BY data_type, count DESC;' ),
|
|
724
|
+
('1195', '1024', 'Profile Anomaly' , 'Invalid_Zip3_USA', 'snowflake', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE("{COLUMN_NAME}",''012345678'',''999999999'') <> ''999'' GROUP BY "{COLUMN_NAME}" ORDER BY count DESC, "{COLUMN_NAME}" LIMIT 500;'),
|
|
725
|
+
('1196', '1025', 'Profile Anomaly' , 'Delimited_Data_Embedded', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE REGEXP_LIKE("{COLUMN_NAME}"::VARCHAR, ''^([^,|\t]{1,20}[,|\t]){2,}[^,|\t]{0,20}([,|\t]{0,1}[^,|\t]{0,20})*$'') AND NOT REGEXP_LIKE("{COLUMN_NAME}"::VARCHAR, ''.*\\s(and|but|or|yet)\\s.*'') GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC LIMIT 500;' ),
|
|
726
|
+
|
|
727
|
+
('1197', '1004', 'Test Results', 'Alpha_Trunc', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}" , LEN("{COLUMN_NAME}") as current_max_length, {THRESHOLD_VALUE} as previous_max_length FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT MAX(LEN("{COLUMN_NAME}")) as max_length FROM {TARGET_SCHEMA}.{TABLE_NAME}) a WHERE LEN("{COLUMN_NAME}") = a.max_length AND a.max_length < {THRESHOLD_VALUE} LIMIT 500;'),
|
|
728
|
+
('1198', '1005', 'Test Results', 'Avg_Shift', 'snowflake', NULL, 'SELECT AVG("{COLUMN_NAME}" :: FLOAT) AS current_average FROM {TARGET_SCHEMA}.{TABLE_NAME};'),
|
|
729
|
+
('1199', '1006', 'Test Results', 'Condition_Flag', 'snowflake', NULL, 'SELECT * FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {CUSTOM_QUERY} LIMIT 500;'),
|
|
730
|
+
('1200', '1007', 'Test Results', 'Constant', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" <> {BASELINE_VALUE} GROUP BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
731
|
+
('1201', '1009', 'Test Results', 'Daily_Record_Ct', 'snowflake', NULL, 'WITH RECURSIVE daterange(all_dates) AS (SELECT MIN("{COLUMN_NAME}") :: DATE AS all_dates FROM {TARGET_SCHEMA}.{TABLE_NAME} UNION ALL SELECT DATEADD(DAY, 1, d.all_dates) :: DATE AS all_dates FROM daterange d WHERE d.all_dates < (SELECT MAX("{COLUMN_NAME}") :: DATE FROM {TARGET_SCHEMA}.{TABLE_NAME}) ), existing_periods AS ( SELECT DISTINCT "{COLUMN_NAME}" :: DATE AS period, COUNT(1) AS period_count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" :: DATE ) SELECT p.missing_period, p.prior_available_date, e.period_count as prior_available_date_count, p.next_available_date, f.period_count as next_available_date_count FROM (SELECT d.all_dates AS missing_period, MAX(b.period) AS prior_available_date, MIN(c.period) AS next_available_date FROM daterange d LEFT JOIN existing_periods a ON d.all_dates = a.period LEFT JOIN existing_periods b ON b.period < d.all_dates LEFT JOIN existing_periods c ON c.period > d.all_dates WHERE a.period IS NULL AND d.all_dates BETWEEN b.period AND c.period GROUP BY d.all_dates) p LEFT JOIN existing_periods e ON (p.prior_available_date = e.period) LEFT JOIN existing_periods f ON (p.next_available_date = f.period) ORDER BY p.missing_period LIMIT 500;'),
|
|
732
|
+
('1202', '1011', 'Test Results', 'Dec_Trunc', 'snowflake', NULL, 'SELECT DISTINCT LENGTH(SPLIT_PART("{COLUMN_NAME}" :: TEXT, ''.'', 2)) AS decimal_scale, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY decimal_scale LIMIT 500;'),
|
|
733
|
+
('1203', '1012', 'Test Results', 'Distinct_Date_Ct', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" IS NOT NULL GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;'),
|
|
734
|
+
('1204', '1013', 'Test Results', 'Distinct_Value_Ct', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" IS NOT NULL GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;'),
|
|
735
|
+
('1205', '1014', 'Test Results', 'Email_Format', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE REGEXP_LIKE("{COLUMN_NAME}"::VARCHAR, ''^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}$'') != 1 GROUP BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
736
|
+
('1206', '1015', 'Test Results', 'Future_Date', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE GREATEST(0, SIGN("{COLUMN_NAME}"::DATE - ''{TEST_DATE}''::DATE)) > {THRESHOLD_VALUE} GROUP BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
737
|
+
('1207', '1016', 'Test Results', 'Future_Date_1Y', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE GREATEST(0, SIGN("{COLUMN_NAME}"::DATE - (''{TEST_DATE}''::DATE + 365))) > {THRESHOLD_VALUE} GROUP BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
738
|
+
('1208', '1017', 'Test Results', 'Incr_Avg_Shift', 'snowflake', NULL, 'SELECT AVG("{COLUMN_NAME}" :: FLOAT) AS current_average, SUM("{COLUMN_NAME}" ::FLOAT) AS current_sum, NULLIF(COUNT("{COLUMN_NAME}" )::FLOAT, 0) as current_value_count FROM {TARGET_SCHEMA}.{TABLE_NAME};'),
|
|
739
|
+
('1209', '1018', 'Test Results', 'LOV_All', 'snowflake', NULL, 'SELECT LISTAGG(DISTINCT "{COLUMN_NAME}", ''|'') WITHIN GROUP (ORDER BY "{COLUMN_NAME}") FROM {TARGET_SCHEMA}.{TABLE_NAME} HAVING LISTAGG(DISTINCT "{COLUMN_NAME}", ''|'') WITHIN GROUP (ORDER BY "{COLUMN_NAME}") <> ''{THRESHOLD_VALUE}'' LIMIT 500;'),
|
|
740
|
+
('1210', '1019', 'Test Results', 'LOV_Match', 'snowflake', NULL, 'SELECT DISTINCT NULLIF("{COLUMN_NAME}", '''') AS "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE NULLIF("{COLUMN_NAME}", '''') NOT IN {BASELINE_VALUE} GROUP BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
741
|
+
('1211', '1020', 'Test Results', 'Min_Date', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" :: DATE < ''{BASELINE_VALUE}'' :: DATE GROUP BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
742
|
+
('1212', '1021', 'Test Results', 'Min_Val', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", (ABS("{COLUMN_NAME}") - ABS({BASELINE_VALUE})) AS difference_from_baseline FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" < {BASELINE_VALUE} LIMIT 500;'),
|
|
743
|
+
('1213', '1022', 'Test Results', 'Missing_Pct', 'snowflake', NULL, 'SELECT TOP 10 * FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" IS NULL OR "{COLUMN_NAME}" :: VARCHAR(255) = '''' ;'),
|
|
744
|
+
('1214', '1023', 'Test Results', 'Monthly_Rec_Ct', 'snowflake', NULL, 'WITH RECURSIVE daterange(all_dates) AS (SELECT DATE_TRUNC(''month'', MIN("{COLUMN_NAME}")) :: DATE AS all_dates FROM {TARGET_SCHEMA}.{TABLE_NAME} UNION ALL SELECT DATEADD(MONTH, 1, d.all_dates) :: DATE AS all_dates FROM daterange d WHERE d.all_dates < (SELECT DATE_TRUNC(''month'', MAX("{COLUMN_NAME}")) :: DATE FROM {TARGET_SCHEMA}.{TABLE_NAME}) ), existing_periods AS (SELECT DISTINCT DATE_TRUNC(''month'',"{COLUMN_NAME}") :: DATE AS period, COUNT(1) AS period_count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY DATE_TRUNC(''month'',"{COLUMN_NAME}") :: DATE ) SELECT p.missing_period, p.prior_available_month, e.period_count as prior_available_month_count, p.next_available_month, f.period_count as next_available_month_count FROM (SELECT d.all_dates as missing_period, MAX(b.period) AS prior_available_month, MIN(c.period) AS next_available_month FROM daterange d LEFT JOIN existing_periods a ON d.all_dates = a.period LEFT JOIN existing_periods b ON b.period < d.all_dates LEFT JOIN existing_periods c ON c.period > d.all_dates WHERE a.period IS NULL AND d.all_dates BETWEEN b.period AND c.period GROUP BY d.all_dates) p LEFT JOIN existing_periods e ON (p.prior_available_month = e.period) LEFT JOIN existing_periods f ON (p.next_available_month = f.period) ORDER BY p.missing_period;'),
|
|
745
|
+
('1215', '1024', 'Test Results', 'Outlier_Pct_Above', 'snowflake', NULL, 'SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" :: FLOAT > ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;'),
|
|
746
|
+
('1216', '1025', 'Test Results', 'Outlier_Pct_Below', 'snowflake', NULL, 'SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" :: FLOAT < ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;'),
|
|
747
|
+
('1217', '1026', 'Test Results', 'Pattern_Match', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE REGEXP_LIKE(NULLIF("{COLUMN_NAME}"::VARCHAR, ''''),''{BASELINE_VALUE}'') != 1 GROUP BY "{COLUMN_NAME}";'),
|
|
748
|
+
('1218', '1028', 'Test Results', 'Recency', 'snowflake', NULL, 'SELECT DISTINCT col AS latest_date_available, ''{TEST_DATE}'' :: DATE as test_run_date FROM (SELECT MAX("{COLUMN_NAME}") AS col FROM {TARGET_SCHEMA}.{TABLE_NAME}) WHERE DATEDIFF(''D'', col, ''{TEST_DATE}''::DATE) > {THRESHOLD_VALUE};'),
|
|
749
|
+
('1219', '1030', 'Test Results', 'Required', 'snowflake', NULL, 'SELECT * FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" IS NULL LIMIT 500;'),
|
|
750
|
+
('1220', '1031', 'Test Results', 'Row_Ct', 'snowflake', NULL, 'WITH CTE AS (SELECT COUNT(*) AS current_count FROM {TARGET_SCHEMA}.{TABLE_NAME}) SELECT current_count, ABS(ROUND(100 *(current_count - {THRESHOLD_VALUE}) :: FLOAT / {THRESHOLD_VALUE} :: FLOAT,2)) AS row_count_pct_decrease FROM cte WHERE current_count < {THRESHOLD_VALUE};'),
|
|
751
|
+
('1221', '1032', 'Test Results', 'Row_Ct_Pct', 'snowflake', NULL, 'WITH CTE AS (SELECT COUNT(*) AS current_count FROM {TARGET_SCHEMA}.{TABLE_NAME}) SELECT current_count, {BASELINE_CT} AS baseline_count, ABS(ROUND(100 * (current_count - {BASELINE_CT}) :: FLOAT / {BASELINE_CT} :: FLOAT,2)) AS row_count_pct_difference FROM cte;'),
|
|
752
|
+
('1222', '1033', 'Test Results', 'Street_Addr_Pattern', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE REGEXP_LIKE("{COLUMN_NAME}"::VARCHAR, ''^[0-9]{1,5}[a-zA-Z]?\\s\\w{1,5}\\.?\\s?\\w*\\s?\\w*\\s[a-zA-Z]{1,6}\\.?\\s?[0-9]{0,5}[A-Z]{0,1}$'') != 1 GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC LIMIT 500;'),
|
|
753
|
+
('1223', '1036', 'Test Results', 'US_State', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE NULLIF("{COLUMN_NAME}", '''') NOT IN (''AL'',''AK'',''AS'',''AZ'',''AR'',''CA'',''CO'',''CT'',''DE'',''DC'',''FM'',''FL'',''GA'',''GU'',''HI'',''ID'',''IL'',''IN'',''IA'',''KS'',''KY'',''LA'',''ME'',''MH'',''MD'',''MA'',''MI'',''MN'',''MS'',''MO'',''MT'',''NE'',''NV'',''NH'',''NJ'',''NM'',''NY'',''NC'',''ND'',''MP'',''OH'',''OK'',''OR'',''PW'',''PA'',''PR'',''RI'',''SC'',''SD'',''TN'',''TX'',''UT'',''VT'',''VI'',''VA'',''WA'',''WV'',''WI'',''WY'',''AE'',''AP'',''AA'') GROUP BY "{COLUMN_NAME}" LIMIT 500;'),
|
|
754
|
+
('1224', '1034', 'Test Results', 'Unique', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" HAVING COUNT(*) > 1 ORDER BY COUNT(*) DESC LIMIT 500;'),
|
|
755
|
+
('1225', '1035', 'Test Results', 'Unique_Pct', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC LIMIT 500;'),
|
|
756
|
+
('1226', '1037', 'Test Results', 'Weekly_Rec_Ct', 'snowflake', NULL, 'WITH RECURSIVE daterange(all_dates) AS (SELECT DATE_TRUNC(''week'',MIN("{COLUMN_NAME}")) :: DATE AS all_dates FROM {TARGET_SCHEMA}.{TABLE_NAME} UNION ALL SELECT (d.all_dates + INTERVAL ''1 week'' ) :: DATE AS all_dates FROM daterange d WHERE d.all_dates < (SELECT DATE_TRUNC(''week'', MAX("{COLUMN_NAME}")) :: DATE FROM {TARGET_SCHEMA}.{TABLE_NAME}) ), existing_periods AS ( SELECT DISTINCT DATE_TRUNC(''week'',"{COLUMN_NAME}") :: DATE AS period, COUNT(1) as period_count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY DATE_TRUNC(''week'',"{COLUMN_NAME}") :: DATE ) SELECT p.missing_period, p.prior_available_week, e.period_count as prior_available_week_count, p.next_available_week, f.period_count as next_available_week_count FROM( SELECT d.all_dates as missing_period, MAX(b.period) AS prior_available_week, MIN(c.period) AS next_available_week FROM daterange d LEFT JOIN existing_periods a ON d.all_dates = a.period LEFT JOIN existing_periods b ON b.period < d.all_dates LEFT JOIN existing_periods c ON c.period > d.all_dates WHERE a.period IS NULL AND d.all_dates BETWEEN b.period AND c.period GROUP BY d.all_dates ) p LEFT JOIN existing_periods e ON (p.prior_available_week = e.period) LEFT JOIN existing_periods f ON (p.next_available_week = f.period) ORDER BY p.missing_period;'),
|
|
757
|
+
('1227', '1040', 'Test Results', 'Variability_Increase', 'snowflake', NULL, 'SELECT STDDEV(CAST("{COLUMN_NAME}" AS FLOAT)) as current_standard_deviation FROM {TARGET_SCHEMA}.{TABLE_NAME};'),
|
|
758
|
+
('1228', '1041', 'Test Results', 'Variability_Decrease', 'snowflake', NULL, 'SELECT STDDEV(CAST("{COLUMN_NAME}" AS FLOAT)) as current_standard_deviation FROM {TARGET_SCHEMA}.{TABLE_NAME};'),
|
|
759
|
+
|
|
760
|
+
('1229', '1027', 'Profile Anomaly' , 'Variant_Coded_Values', 'redshift', NULL, 'WITH val_array AS (SELECT 1 as valkey, SPLIT_TO_ARRAY(SUBSTRING (''{DETAIL_EXPRESSION}'', STRPOS(''{DETAIL_EXPRESSION}'', '':'') + 2), ''|'') vals), val_list AS ( SELECT valkey, val::VARCHAR FROM val_array v, v.vals val ) SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} t INNER JOIN val_list v ON (LOWER("{COLUMN_NAME}") = v.val) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";'),
|
|
761
|
+
('1230', '1027', 'Profile Anomaly' , 'Variant_Coded_Values', 'snowflake', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE lower("{COLUMN_NAME}") IN (SELECT trim(value) FROM TABLE (FLATTEN(INPUT => SPLIT(SUBSTRING(''{DETAIL_EXPRESSION}'', POSITION('':'', ''{DETAIL_EXPRESSION}'') + 2), ''|''))) ) GROUP BY "{COLUMN_NAME}";'),
|
|
762
|
+
('1231', '1027', 'Profile Anomaly' , 'Variant_Coded_Values', 'mssql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE LOWER("{COLUMN_NAME}") IN (SELECT trim(value) FROM STRING_SPLIT(SUBSTRING(''{DETAIL_EXPRESSION}'', CHARINDEX('':'', ''{DETAIL_EXPRESSION}'') + 2, 999), ''|'')) GROUP BY "{COLUMN_NAME}";'),
|
|
763
|
+
('1232', '1027', 'Profile Anomaly' , 'Variant_Coded_Values', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE LOWER("{COLUMN_NAME}") = ANY(STRING_TO_ARRAY(SUBSTRING(''{DETAIL_EXPRESSION}'', STRPOS(''{DETAIL_EXPRESSION}'', '':'') + 2), ''|'')) GROUP BY "{COLUMN_NAME}";'),
|
|
764
|
+
|
|
765
|
+
('1233', '1043', 'Test Results', 'Valid_Characters', 'redshift', NULL, 'SELECT TOP 20 "{COLUMN_NAME}", COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" ~ ''[[:cntrl:]]'' OR "{COLUMN_NAME}" LIKE '' %'' OR "{COLUMN_NAME}" LIKE ''''''%'''''' OR "{COLUMN_NAME}" LIKE ''"%"'' GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC;'),
|
|
766
|
+
('1234', '1043', 'Test Results', 'Valid_Characters', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" ~ ''[[:cntrl:]]'' OR "{COLUMN_NAME}" LIKE '' %'' OR "{COLUMN_NAME}" LIKE ''''''%'''''' OR "{COLUMN_NAME}" LIKE ''"%"'' GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC LIMIT 20;'),
|
|
767
|
+
('1235', '1043', 'Test Results', 'Valid_Characters', 'mssql', NULL, 'SELECT TOP 20 "{COLUMN_NAME}", COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE PATINDEX(''%['' + CHAR(1) + ''-'' + CHAR(8) + CHAR(11) + CHAR(12) + CHAR(14) + ''-'' + CHAR(31) + '']%'', "{COLUMN_NAME}") > 0 OR "{COLUMN_NAME}" LIKE '' %'' OR "{COLUMN_NAME}" LIKE ''''''%'''''' OR column_name LIKE ''"%"'' GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC;'),
|
|
768
|
+
('1236', '1043', 'Test Results', 'Valid_Characters', 'snowflake', NULL, 'SELECT TOP 20 "{COLUMN_NAME}", COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE REGEXP_LIKE("{COLUMN_NAME}", ''.*[[:cntrl:]].*'') OR "{COLUMN_NAME}"::VARCHAR LIKE '' %'' OR "{COLUMN_NAME}"::VARCHAR LIKE ''''''%'''''' OR "{COLUMN_NAME}"::VARCHAR LIKE ''"%"'' GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC;'),
|
|
769
|
+
('1237', '1044', 'Test Results', 'Valid_US_Zip', 'redshift', NULL, 'SELECT TOP 20 "{COLUMN_NAME}", COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE("{COLUMN_NAME}",''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC;'),
|
|
770
|
+
('1238', '1044', 'Test Results', 'Valid_US_Zip', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE("{COLUMN_NAME}",''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC LIMIT 20;'),
|
|
771
|
+
('1239', '1044', 'Test Results', 'Valid_US_Zip', 'mssql', NULL, 'SELECT TOP 20 "{COLUMN_NAME}", COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE("{COLUMN_NAME}",''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC;'),
|
|
772
|
+
('1240', '1044', 'Test Results', 'Valid_US_Zip', 'snowflake', NULL, 'SELECT TOP 20 "{COLUMN_NAME}", COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE("{COLUMN_NAME}",''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC;'),
|
|
773
|
+
|
|
774
|
+
('1241', '1045', 'Test Results', 'Valid_US_Zip3', 'redshift', NULL, 'SELECT TOP 20 "{COLUMN_NAME}", COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE("{COLUMN_NAME}",''012345678'',''999999999'') <> ''999'' GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC;'),
|
|
775
|
+
('1242', '1045', 'Test Results', 'Valid_US_Zip3', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE("{COLUMN_NAME}",''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') <> '''' GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC LIMIT 20;'),
|
|
776
|
+
('1243', '1045', 'Test Results', 'Valid_US_Zip3', 'mssql', NULL, 'SELECT TOP 20 "{COLUMN_NAME}", COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE("{COLUMN_NAME}",''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC;'),
|
|
777
|
+
('1244', '1045', 'Test Results', 'Valid_US_Zip3', 'snowflake', NULL, 'SELECT TOP 20 "{COLUMN_NAME}", COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE("{COLUMN_NAME}",''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC;'),
|
|
778
|
+
|
|
779
|
+
('1245', '1500', 'Test Results', 'Aggregate_Balance', 'redshift', NULL, 'SELECT *
|
|
780
|
+
FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) AS total, SUM(MATCH_TOTAL) AS MATCH_TOTAL
|
|
781
|
+
FROM
|
|
782
|
+
( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} AS total, NULL AS match_total
|
|
783
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
784
|
+
WHERE {SUBSET_CONDITION}
|
|
785
|
+
GROUP BY {GROUPBY_NAMES}
|
|
786
|
+
{HAVING_CONDITION}
|
|
787
|
+
UNION ALL
|
|
788
|
+
SELECT {MATCH_GROUPBY_NAMES}, NULL AS total, {MATCH_COLUMN_NAMES} AS match_total
|
|
789
|
+
FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
|
|
790
|
+
WHERE {MATCH_SUBSET_CONDITION}
|
|
791
|
+
GROUP BY {MATCH_GROUPBY_NAMES}
|
|
792
|
+
{MATCH_HAVING_CONDITION} ) a
|
|
793
|
+
GROUP BY {GROUPBY_NAMES} ) s
|
|
794
|
+
WHERE total <> match_total OR (total IS NOT NULL AND match_total IS NULL) OR (total IS NULL AND match_total IS NOT NULL)
|
|
795
|
+
ORDER BY {GROUPBY_NAMES};'),
|
|
796
|
+
('1246', '1500', 'Test Results', 'Aggregate_Balance', 'snowflake', NULL, 'SELECT *
|
|
797
|
+
FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) AS total, SUM(MATCH_TOTAL) AS MATCH_TOTAL
|
|
798
|
+
FROM
|
|
799
|
+
( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} AS total, NULL AS match_total
|
|
800
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
801
|
+
WHERE {SUBSET_CONDITION}
|
|
802
|
+
GROUP BY {GROUPBY_NAMES}
|
|
803
|
+
{HAVING_CONDITION}
|
|
804
|
+
UNION ALL
|
|
805
|
+
SELECT {MATCH_GROUPBY_NAMES}, NULL AS total, {MATCH_COLUMN_NAMES} AS match_total
|
|
806
|
+
FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
|
|
807
|
+
WHERE {MATCH_SUBSET_CONDITION}
|
|
808
|
+
GROUP BY {MATCH_GROUPBY_NAMES}
|
|
809
|
+
{MATCH_HAVING_CONDITION} ) a
|
|
810
|
+
GROUP BY {GROUPBY_NAMES} ) s
|
|
811
|
+
WHERE total <> match_total OR (total IS NOT NULL AND match_total IS NULL) OR (total IS NULL AND match_total IS NOT NULL)
|
|
812
|
+
ORDER BY {GROUPBY_NAMES};'),
|
|
813
|
+
('1247', '1500', 'Test Results', 'Aggregate_Balance', 'mssql', NULL, 'SELECT *
|
|
814
|
+
FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) AS total, SUM(MATCH_TOTAL) AS MATCH_TOTAL
|
|
815
|
+
FROM
|
|
816
|
+
( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} AS total, NULL AS match_total
|
|
817
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
818
|
+
WHERE {SUBSET_CONDITION}
|
|
819
|
+
GROUP BY {GROUPBY_NAMES}
|
|
820
|
+
{HAVING_CONDITION}
|
|
821
|
+
UNION ALL
|
|
822
|
+
SELECT {MATCH_GROUPBY_NAMES}, NULL AS total, {MATCH_COLUMN_NAMES} AS match_total
|
|
823
|
+
FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
|
|
824
|
+
WHERE {MATCH_SUBSET_CONDITION}
|
|
825
|
+
GROUP BY {MATCH_GROUPBY_NAMES}
|
|
826
|
+
{MATCH_HAVING_CONDITION} ) a
|
|
827
|
+
GROUP BY {GROUPBY_NAMES} ) s
|
|
828
|
+
WHERE total <> match_total OR (total IS NOT NULL AND match_total IS NULL) OR (total IS NULL AND match_total IS NOT NULL)
|
|
829
|
+
ORDER BY {GROUPBY_NAMES};'),
|
|
830
|
+
('1248', '1500', 'Test Results', 'Aggregate_Balance', 'postgresql', NULL, 'SELECT *
|
|
831
|
+
FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) AS total, SUM(MATCH_TOTAL) AS MATCH_TOTAL
|
|
832
|
+
FROM
|
|
833
|
+
( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} AS total, NULL AS match_total
|
|
834
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
835
|
+
WHERE {SUBSET_CONDITION}
|
|
836
|
+
GROUP BY {GROUPBY_NAMES}
|
|
837
|
+
{HAVING_CONDITION}
|
|
838
|
+
UNION ALL
|
|
839
|
+
SELECT {MATCH_GROUPBY_NAMES}, NULL AS total, {MATCH_COLUMN_NAMES} AS match_total
|
|
840
|
+
FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
|
|
841
|
+
WHERE {MATCH_SUBSET_CONDITION}
|
|
842
|
+
GROUP BY {MATCH_GROUPBY_NAMES}
|
|
843
|
+
{MATCH_HAVING_CONDITION} ) a
|
|
844
|
+
GROUP BY {GROUPBY_NAMES} ) s
|
|
845
|
+
WHERE total <> match_total OR (total IS NOT NULL AND match_total IS NULL) OR (total IS NULL AND match_total IS NOT NULL)
|
|
846
|
+
ORDER BY {GROUPBY_NAMES};'),
|
|
847
|
+
('1249', '1501', 'Test Results', 'Aggregate_Minimum', 'redshift', NULL, 'SELECT *
|
|
848
|
+
FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) as total, SUM(MATCH_TOTAL) as MATCH_TOTAL
|
|
849
|
+
FROM
|
|
850
|
+
( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} as total, NULL as match_total
|
|
851
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
852
|
+
WHERE {SUBSET_CONDITION}
|
|
853
|
+
GROUP BY {GROUPBY_NAMES}
|
|
854
|
+
{HAVING_CONDITION}
|
|
855
|
+
UNION ALL
|
|
856
|
+
SELECT {MATCH_GROUPBY_NAMES}, NULL as total, {MATCH_COLUMN_NAMES} as match_total
|
|
857
|
+
FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
|
|
858
|
+
WHERE {MATCH_SUBSET_CONDITION}
|
|
859
|
+
GROUP BY {MATCH_GROUPBY_NAMES}
|
|
860
|
+
{MATCH_HAVING_CONDITION} ) a
|
|
861
|
+
GROUP BY {GROUPBY_NAMES} ) s
|
|
862
|
+
WHERE total < match_total OR (total IS NULL AND match_total IS NOT NULL)
|
|
863
|
+
ORDER BY {GROUPBY_NAMES};'),
|
|
864
|
+
('1250', '1501', 'Test Results', 'Aggregate_Minimum', 'snowflake', NULL, 'SELECT *
|
|
865
|
+
FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) as total, SUM(MATCH_TOTAL) as MATCH_TOTAL
|
|
866
|
+
FROM
|
|
867
|
+
( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} as total, NULL as match_total
|
|
868
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
869
|
+
WHERE {SUBSET_CONDITION}
|
|
870
|
+
GROUP BY {GROUPBY_NAMES}
|
|
871
|
+
{HAVING_CONDITION}
|
|
872
|
+
UNION ALL
|
|
873
|
+
SELECT {MATCH_GROUPBY_NAMES}, NULL as total, {MATCH_COLUMN_NAMES} as match_total
|
|
874
|
+
FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
|
|
875
|
+
WHERE {MATCH_SUBSET_CONDITION}
|
|
876
|
+
GROUP BY {MATCH_GROUPBY_NAMES}
|
|
877
|
+
{MATCH_HAVING_CONDITION} ) a
|
|
878
|
+
GROUP BY {GROUPBY_NAMES} ) s
|
|
879
|
+
WHERE total < match_total OR (total IS NULL AND match_total IS NOT NULL)
|
|
880
|
+
ORDER BY {GROUPBY_NAMES};'),
|
|
881
|
+
('1251', '1501', 'Test Results', 'Aggregate_Minimum', 'mssql', NULL, 'SELECT *
|
|
882
|
+
FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) as total, SUM(MATCH_TOTAL) as MATCH_TOTAL
|
|
883
|
+
FROM
|
|
884
|
+
( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} as total, NULL as match_total
|
|
885
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
886
|
+
WHERE {SUBSET_CONDITION}
|
|
887
|
+
GROUP BY {GROUPBY_NAMES}
|
|
888
|
+
{HAVING_CONDITION}
|
|
889
|
+
UNION ALL
|
|
890
|
+
SELECT {MATCH_GROUPBY_NAMES}, NULL as total, {MATCH_COLUMN_NAMES} as match_total
|
|
891
|
+
FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
|
|
892
|
+
WHERE {MATCH_SUBSET_CONDITION}
|
|
893
|
+
GROUP BY {MATCH_GROUPBY_NAMES}
|
|
894
|
+
{MATCH_HAVING_CONDITION} ) a
|
|
895
|
+
GROUP BY {GROUPBY_NAMES} ) s
|
|
896
|
+
WHERE total < match_total OR (total IS NULL AND match_total IS NOT NULL)
|
|
897
|
+
ORDER BY {GROUPBY_NAMES};'),
|
|
898
|
+
('1252', '1501', 'Test Results', 'Aggregate_Minimum', 'postgresql', NULL, 'SELECT *
|
|
899
|
+
FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) as total, SUM(MATCH_TOTAL) as MATCH_TOTAL
|
|
900
|
+
FROM
|
|
901
|
+
( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} as total, NULL as match_total
|
|
902
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
903
|
+
WHERE {SUBSET_CONDITION}
|
|
904
|
+
GROUP BY {GROUPBY_NAMES}
|
|
905
|
+
{HAVING_CONDITION}
|
|
906
|
+
UNION ALL
|
|
907
|
+
SELECT {MATCH_GROUPBY_NAMES}, NULL as total, {MATCH_COLUMN_NAMES} as match_total
|
|
908
|
+
FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
|
|
909
|
+
WHERE {MATCH_SUBSET_CONDITION}
|
|
910
|
+
GROUP BY {MATCH_GROUPBY_NAMES}
|
|
911
|
+
{MATCH_HAVING_CONDITION} ) a
|
|
912
|
+
GROUP BY {GROUPBY_NAMES} ) s
|
|
913
|
+
WHERE total < match_total OR (total IS NULL AND match_total IS NOT NULL)
|
|
914
|
+
ORDER BY {GROUPBY_NAMES};'),
|
|
915
|
+
('1253', '1502', 'Test Results', 'Combo_Match', 'redshift', NULL, 'SELECT *
|
|
916
|
+
FROM ( SELECT {COLUMN_NAME_NO_QUOTES}
|
|
917
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
918
|
+
WHERE {SUBSET_CONDITION}
|
|
919
|
+
GROUP BY {COLUMN_NAME_NO_QUOTES}
|
|
920
|
+
{HAVING_CONDITION}
|
|
921
|
+
EXCEPT
|
|
922
|
+
SELECT {MATCH_GROUPBY_NAMES}
|
|
923
|
+
FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
|
|
924
|
+
WHERE {MATCH_SUBSET_CONDITION}
|
|
925
|
+
GROUP BY {MATCH_GROUPBY_NAMES}
|
|
926
|
+
{MATCH_HAVING_CONDITION}
|
|
927
|
+
) test
|
|
928
|
+
ORDER BY {COLUMN_NAME_NO_QUOTES};'),
|
|
929
|
+
('1254', '1502', 'Test Results', 'Combo_Match', 'snowflake', NULL, 'SELECT *
|
|
930
|
+
FROM ( SELECT {COLUMN_NAME_NO_QUOTES}
|
|
931
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
932
|
+
WHERE {SUBSET_CONDITION}
|
|
933
|
+
GROUP BY {COLUMN_NAME_NO_QUOTES}
|
|
934
|
+
{HAVING_CONDITION}
|
|
935
|
+
EXCEPT
|
|
936
|
+
SELECT {MATCH_GROUPBY_NAMES}
|
|
937
|
+
FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
|
|
938
|
+
WHERE {MATCH_SUBSET_CONDITION}
|
|
939
|
+
GROUP BY {MATCH_GROUPBY_NAMES}
|
|
940
|
+
{MATCH_HAVING_CONDITION}
|
|
941
|
+
) test
|
|
942
|
+
ORDER BY {COLUMN_NAME_NO_QUOTES};'),
|
|
943
|
+
('1255', '1502', 'Test Results', 'Combo_Match', 'mssql', NULL, 'SELECT *
|
|
944
|
+
FROM ( SELECT {COLUMN_NAME_NO_QUOTES}
|
|
945
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
946
|
+
WHERE {SUBSET_CONDITION}
|
|
947
|
+
GROUP BY {COLUMN_NAME_NO_QUOTES}
|
|
948
|
+
{HAVING_CONDITION}
|
|
949
|
+
EXCEPT
|
|
950
|
+
SELECT {MATCH_GROUPBY_NAMES}
|
|
951
|
+
FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
|
|
952
|
+
WHERE {MATCH_SUBSET_CONDITION}
|
|
953
|
+
GROUP BY {MATCH_GROUPBY_NAMES}
|
|
954
|
+
{MATCH_HAVING_CONDITION}
|
|
955
|
+
) test
|
|
956
|
+
ORDER BY {COLUMN_NAME_NO_QUOTES};'),
|
|
957
|
+
('1256', '1502', 'Test Results', 'Combo_Match', 'postgresql', NULL, 'SELECT *
|
|
958
|
+
FROM ( SELECT {COLUMN_NAME_NO_QUOTES}
|
|
959
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
960
|
+
WHERE {SUBSET_CONDITION}
|
|
961
|
+
GROUP BY {COLUMN_NAME_NO_QUOTES}
|
|
962
|
+
{HAVING_CONDITION}
|
|
963
|
+
EXCEPT
|
|
964
|
+
SELECT {MATCH_GROUPBY_NAMES}
|
|
965
|
+
FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
|
|
966
|
+
WHERE {MATCH_SUBSET_CONDITION}
|
|
967
|
+
GROUP BY {MATCH_GROUPBY_NAMES}
|
|
968
|
+
{MATCH_HAVING_CONDITION}
|
|
969
|
+
) test
|
|
970
|
+
ORDER BY {COLUMN_NAME_NO_QUOTES};'),
|
|
971
|
+
('1257', '1503', 'Test Results', 'Distribution_Shift', 'redshift', NULL, 'WITH latest_ver
|
|
972
|
+
AS ( SELECT {CONCAT_COLUMNS} as category,
|
|
973
|
+
COUNT(*)::FLOAT / SUM(COUNT(*)) OVER ()::FLOAT AS pct_of_total
|
|
974
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME} v1
|
|
975
|
+
WHERE {SUBSET_CONDITION}
|
|
976
|
+
GROUP BY {COLUMN_NAME_NO_QUOTES} ),
|
|
977
|
+
older_ver
|
|
978
|
+
AS ( SELECT {CONCAT_MATCH_GROUPBY} as category,
|
|
979
|
+
COUNT(*)::FLOAT / SUM(COUNT(*)) OVER ()::FLOAT AS pct_of_total
|
|
980
|
+
FROM {MATCH_SCHEMA_NAME}.{TABLE_NAME} v2
|
|
981
|
+
WHERE {MATCH_SUBSET_CONDITION}
|
|
982
|
+
GROUP BY {MATCH_GROUPBY_NAMES} )
|
|
983
|
+
SELECT COALESCE(l.category, o.category) AS category,
|
|
984
|
+
o.pct_of_total AS old_pct,
|
|
985
|
+
l.pct_of_total AS new_pct
|
|
986
|
+
FROM latest_ver l
|
|
987
|
+
FULL JOIN older_ver o
|
|
988
|
+
ON (l.category = o.category)
|
|
989
|
+
ORDER BY COALESCE(l.category, o.category)'),
|
|
990
|
+
('1258', '1503', 'Test Results', 'Distribution_Shift', 'snowflake', NULL, 'WITH latest_ver
|
|
991
|
+
AS ( SELECT {CONCAT_COLUMNS} as category,
|
|
992
|
+
COUNT(*)::FLOAT / SUM(COUNT(*)) OVER ()::FLOAT AS pct_of_total
|
|
993
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME} v1
|
|
994
|
+
WHERE {SUBSET_CONDITION}
|
|
995
|
+
GROUP BY {COLUMN_NAME_NO_QUOTES} ),
|
|
996
|
+
older_ver
|
|
997
|
+
AS ( SELECT {CONCAT_MATCH_GROUPBY} as category,
|
|
998
|
+
COUNT(*)::FLOAT / SUM(COUNT(*)) OVER ()::FLOAT AS pct_of_total
|
|
999
|
+
FROM {MATCH_SCHEMA_NAME}.{TABLE_NAME} v2
|
|
1000
|
+
WHERE {MATCH_SUBSET_CONDITION}
|
|
1001
|
+
GROUP BY {MATCH_GROUPBY_NAMES} )
|
|
1002
|
+
SELECT COALESCE(l.category, o.category) AS category,
|
|
1003
|
+
o.pct_of_total AS old_pct,
|
|
1004
|
+
l.pct_of_total AS new_pct
|
|
1005
|
+
FROM latest_ver l
|
|
1006
|
+
FULL JOIN older_ver o
|
|
1007
|
+
ON (l.category = o.category)
|
|
1008
|
+
ORDER BY COALESCE(l.category, o.category)'),
|
|
1009
|
+
('1259', '1503', 'Test Results', 'Distribution_Shift', 'mssql', NULL, 'WITH latest_ver
|
|
1010
|
+
AS ( SELECT {CONCAT_COLUMNS} as category,
|
|
1011
|
+
CAST(COUNT(*) as FLOAT) / CAST(SUM(COUNT(*)) OVER () as FLOAT) AS pct_of_total
|
|
1012
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME} v1
|
|
1013
|
+
WHERE {SUBSET_CONDITION}
|
|
1014
|
+
GROUP BY {COLUMN_NAME_NO_QUOTES} ),
|
|
1015
|
+
older_ver
|
|
1016
|
+
AS ( SELECT {CONCAT_MATCH_GROUPBY} as category,
|
|
1017
|
+
CAST(COUNT(*) as FLOAT) / CAST(SUM(COUNT(*)) OVER () as FLOAT) AS pct_of_total
|
|
1018
|
+
FROM {MATCH_SCHEMA_NAME}.{TABLE_NAME} v2
|
|
1019
|
+
WHERE {MATCH_SUBSET_CONDITION}
|
|
1020
|
+
GROUP BY {MATCH_GROUPBY_NAMES} )
|
|
1021
|
+
SELECT COALESCE(l.category, o.category) AS category,
|
|
1022
|
+
o.pct_of_total AS old_pct,
|
|
1023
|
+
l.pct_of_total AS new_pct
|
|
1024
|
+
FROM latest_ver l
|
|
1025
|
+
FULL JOIN older_ver o
|
|
1026
|
+
ON (l.category = o.category)
|
|
1027
|
+
ORDER BY COALESCE(l.category, o.category)'),
|
|
1028
|
+
('1260', '1503', 'Test Results', 'Distribution_Shift', 'postgresql', NULL, 'WITH latest_ver
|
|
1029
|
+
AS ( SELECT {CONCAT_COLUMNS} as category,
|
|
1030
|
+
COUNT(*)::FLOAT / SUM(COUNT(*)) OVER ()::FLOAT AS pct_of_total
|
|
1031
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME} v1
|
|
1032
|
+
WHERE {SUBSET_CONDITION}
|
|
1033
|
+
GROUP BY {COLUMN_NAME_NO_QUOTES} ),
|
|
1034
|
+
older_ver
|
|
1035
|
+
AS ( SELECT {CONCAT_MATCH_GROUPBY} as category,
|
|
1036
|
+
COUNT(*)::FLOAT / SUM(COUNT(*)) OVER ()::FLOAT AS pct_of_total
|
|
1037
|
+
FROM {MATCH_SCHEMA_NAME}.{TABLE_NAME} v2
|
|
1038
|
+
WHERE {MATCH_SUBSET_CONDITION}
|
|
1039
|
+
GROUP BY {MATCH_GROUPBY_NAMES} )
|
|
1040
|
+
SELECT COALESCE(l.category, o.category) AS category,
|
|
1041
|
+
o.pct_of_total AS old_pct,
|
|
1042
|
+
l.pct_of_total AS new_pct
|
|
1043
|
+
FROM latest_ver l
|
|
1044
|
+
FULL JOIN older_ver o
|
|
1045
|
+
ON (l.category = o.category)
|
|
1046
|
+
ORDER BY COALESCE(l.category, o.category)'),
|
|
1047
|
+
|
|
1048
|
+
('1261', '1508', 'Test Results', 'Timeframe_Combo_Gain', 'redshift', NULL, 'SELECT {COLUMN_NAME_NO_QUOTES}
|
|
1049
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1050
|
+
WHERE {SUBSET_CONDITION}
|
|
1051
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - 2 * {WINDOW_DAYS}
|
|
1052
|
+
AND {WINDOW_DATE_COLUMN} < (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
1053
|
+
GROUP BY {COLUMN_NAME_NO_QUOTES}
|
|
1054
|
+
EXCEPT
|
|
1055
|
+
SELECT {COLUMN_NAME_NO_QUOTES}
|
|
1056
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1057
|
+
WHERE {SUBSET_CONDITION}
|
|
1058
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
1059
|
+
GROUP BY {COLUMN_NAME_NO_QUOTES}'),
|
|
1060
|
+
('1262', '1508', 'Test Results', 'Timeframe_Combo_Gain', 'snowflake', NULL, 'SELECT {COLUMN_NAME_NO_QUOTES}
|
|
1061
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1062
|
+
WHERE {SUBSET_CONDITION}
|
|
1063
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - 2 * {WINDOW_DAYS}
|
|
1064
|
+
AND {WINDOW_DATE_COLUMN} < (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
1065
|
+
GROUP BY {COLUMN_NAME_NO_QUOTES}
|
|
1066
|
+
EXCEPT
|
|
1067
|
+
SELECT {COLUMN_NAME_NO_QUOTES}
|
|
1068
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1069
|
+
WHERE {SUBSET_CONDITION}
|
|
1070
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
1071
|
+
GROUP BY {COLUMN_NAME_NO_QUOTES}'),
|
|
1072
|
+
('1263', '1508', 'Test Results', 'Timeframe_Combo_Gain', 'mssql', NULL, 'SELECT {COLUMN_NAME_NO_QUOTES}
|
|
1073
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1074
|
+
WHERE {SUBSET_CONDITION}
|
|
1075
|
+
AND {WINDOW_DATE_COLUMN} >= DATEADD("day", - 2 * {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}))
|
|
1076
|
+
AND {WINDOW_DATE_COLUMN} < DATEADD("day", - {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}))
|
|
1077
|
+
GROUP BY {COLUMN_NAME_NO_QUOTES}
|
|
1078
|
+
EXCEPT
|
|
1079
|
+
SELECT {COLUMN_NAME_NO_QUOTES}
|
|
1080
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1081
|
+
WHERE {SUBSET_CONDITION}
|
|
1082
|
+
AND {WINDOW_DATE_COLUMN} >= DATEADD("day", - {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}))
|
|
1083
|
+
GROUP BY {COLUMN_NAME_NO_QUOTES}'),
|
|
1084
|
+
('1264', '1508', 'Test Results', 'Timeframe_Combo_Gain', 'postgresql', NULL, 'SELECT {COLUMN_NAME_NO_QUOTES}
|
|
1085
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1086
|
+
WHERE {SUBSET_CONDITION}
|
|
1087
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - 2 * {WINDOW_DAYS}
|
|
1088
|
+
AND {WINDOW_DATE_COLUMN} < (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
1089
|
+
GROUP BY {COLUMN_NAME_NO_QUOTES}
|
|
1090
|
+
EXCEPT
|
|
1091
|
+
SELECT {COLUMN_NAME_NO_QUOTES}
|
|
1092
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1093
|
+
WHERE {SUBSET_CONDITION}
|
|
1094
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
1095
|
+
GROUP BY {COLUMN_NAME_NO_QUOTES}'),
|
|
1096
|
+
('1265', '1509', 'Test Results', 'Timeframe_Combo_Match', 'redshift', NULL, ' (
|
|
1097
|
+
SELECT ''Prior Timeframe'' as missing_from, {COLUMN_NAME}
|
|
1098
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1099
|
+
WHERE {SUBSET_CONDITION}
|
|
1100
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
1101
|
+
EXCEPT
|
|
1102
|
+
SELECT ''Prior Timeframe'' as missing_from, {COLUMN_NAME}
|
|
1103
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1104
|
+
WHERE {SUBSET_CONDITION}
|
|
1105
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - 2 * {WINDOW_DAYS}
|
|
1106
|
+
AND {WINDOW_DATE_COLUMN} < (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
1107
|
+
)
|
|
1108
|
+
UNION ALL
|
|
1109
|
+
(
|
|
1110
|
+
SELECT ''Latest Timeframe'' as missing_from, {COLUMN_NAME}
|
|
1111
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1112
|
+
WHERE {SUBSET_CONDITION}
|
|
1113
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - 2 * {WINDOW_DAYS}
|
|
1114
|
+
AND {WINDOW_DATE_COLUMN} < (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
1115
|
+
EXCEPT
|
|
1116
|
+
SELECT ''Latest Timeframe'' as missing_from, {COLUMN_NAME}
|
|
1117
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1118
|
+
WHERE {SUBSET_CONDITION}
|
|
1119
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
1120
|
+
)'),
|
|
1121
|
+
('1266', '1509', 'Test Results', 'Timeframe_Combo_Match', 'snowflake', NULL, ' (
|
|
1122
|
+
SELECT ''Prior Timeframe'' as missing_from, {COLUMN_NAME}
|
|
1123
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1124
|
+
WHERE {SUBSET_CONDITION}
|
|
1125
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
1126
|
+
EXCEPT
|
|
1127
|
+
SELECT ''Prior Timeframe'' as missing_from, {COLUMN_NAME}
|
|
1128
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1129
|
+
WHERE {SUBSET_CONDITION}
|
|
1130
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - 2 * {WINDOW_DAYS}
|
|
1131
|
+
AND {WINDOW_DATE_COLUMN} < (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
1132
|
+
)
|
|
1133
|
+
UNION ALL
|
|
1134
|
+
(
|
|
1135
|
+
SELECT ''Latest Timeframe'' as missing_from, {COLUMN_NAME}
|
|
1136
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1137
|
+
WHERE {SUBSET_CONDITION}
|
|
1138
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - 2 * {WINDOW_DAYS}
|
|
1139
|
+
AND {WINDOW_DATE_COLUMN} < (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
1140
|
+
EXCEPT
|
|
1141
|
+
SELECT ''Latest Timeframe'' as missing_from, {COLUMN_NAME}
|
|
1142
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1143
|
+
WHERE {SUBSET_CONDITION}
|
|
1144
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
1145
|
+
)'),
|
|
1146
|
+
('1267', '1509', 'Test Results', 'Timeframe_Combo_Match', 'mssql', NULL, ' (
|
|
1147
|
+
SELECT ''Prior Timeframe'' as missing_from, {COLUMN_NAME}
|
|
1148
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1149
|
+
WHERE {SUBSET_CONDITION}
|
|
1150
|
+
AND {WINDOW_DATE_COLUMN} >= DATEADD("day", - {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}))
|
|
1151
|
+
EXCEPT
|
|
1152
|
+
SELECT ''Prior Timeframe'' as missing_from, {COLUMN_NAME}
|
|
1153
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1154
|
+
WHERE {SUBSET_CONDITION}
|
|
1155
|
+
AND {WINDOW_DATE_COLUMN} >= DATEADD("day", - 2 * {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}))
|
|
1156
|
+
AND {WINDOW_DATE_COLUMN} < DATEADD("day", - {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}))
|
|
1157
|
+
)
|
|
1158
|
+
UNION ALL
|
|
1159
|
+
(
|
|
1160
|
+
SELECT ''Latest Timeframe'' as missing_from, {COLUMN_NAME}
|
|
1161
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1162
|
+
WHERE {SUBSET_CONDITION}
|
|
1163
|
+
AND {WINDOW_DATE_COLUMN} >= DATEADD("day", - 2 * {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}))
|
|
1164
|
+
AND {WINDOW_DATE_COLUMN} < DATEADD("day", - {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}))
|
|
1165
|
+
EXCEPT
|
|
1166
|
+
SELECT ''Latest Timeframe'' as missing_from, {COLUMN_NAME}
|
|
1167
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1168
|
+
WHERE {SUBSET_CONDITION}
|
|
1169
|
+
AND {WINDOW_DATE_COLUMN} >= DATEADD("day", - {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}))
|
|
1170
|
+
)'),
|
|
1171
|
+
('1268', '1509', 'Test Results', 'Timeframe_Combo_Match', 'postgresql', NULL, ' (
|
|
1172
|
+
SELECT ''Prior Timeframe'' as missing_from, {COLUMN_NAME}
|
|
1173
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1174
|
+
WHERE {SUBSET_CONDITION}
|
|
1175
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
1176
|
+
EXCEPT
|
|
1177
|
+
SELECT ''Prior Timeframe'' as missing_from, {COLUMN_NAME}
|
|
1178
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1179
|
+
WHERE {SUBSET_CONDITION}
|
|
1180
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - 2 * {WINDOW_DAYS}
|
|
1181
|
+
AND {WINDOW_DATE_COLUMN} < (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
1182
|
+
)
|
|
1183
|
+
UNION ALL
|
|
1184
|
+
(
|
|
1185
|
+
SELECT ''Latest Timeframe'' as missing_from, {COLUMN_NAME}
|
|
1186
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1187
|
+
WHERE {SUBSET_CONDITION}
|
|
1188
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - 2 * {WINDOW_DAYS}
|
|
1189
|
+
AND {WINDOW_DATE_COLUMN} < (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
1190
|
+
EXCEPT
|
|
1191
|
+
SELECT ''Latest Timeframe'' as missing_from, {COLUMN_NAME}
|
|
1192
|
+
FROM {TARGET_SCHEMA}.{TABLE_NAME}
|
|
1193
|
+
WHERE {SUBSET_CONDITION}
|
|
1194
|
+
AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - {WINDOW_DAYS}
|
|
1195
|
+
)'),
|
|
1196
|
+
('1269', '1100', 'Profile Anomaly', 'Potential_PII', 'redshift', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;'),
|
|
1197
|
+
('1270', '1100', 'Profile Anomaly', 'Potential_PII', 'snowflake', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;'),
|
|
1198
|
+
('1271', '1100', 'Profile Anomaly', 'Potential_PII', 'mssql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;'),
|
|
1199
|
+
('1272', '1100', 'Profile Anomaly', 'Potential_PII', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;')
|
|
1200
|
+
;
|
|
1201
|
+
|
|
1202
|
+
|
|
1203
|
+
TRUNCATE TABLE variant_codings;
|
|
1204
|
+
|
|
1205
|
+
INSERT INTO variant_codings (value_type, check_values)
|
|
1206
|
+
VALUES ('measure', 'meter|m|metre'),
|
|
1207
|
+
('measure', 'centimeter|cm|centimetre'),
|
|
1208
|
+
('measure', 'millimeter|mm|millimetre'),
|
|
1209
|
+
('measure', 'kilometer|km|kilometre'),
|
|
1210
|
+
('measure', 'inches|inch|in|"'),
|
|
1211
|
+
('measure', 'foot|ft|feet|'''),
|
|
1212
|
+
('measure', 'yard|yd'),
|
|
1213
|
+
('measure', 'mile|mi|miles'),
|
|
1214
|
+
('measure', 'kilogram|kgs|kg'),
|
|
1215
|
+
('measure', 'gram|g'),
|
|
1216
|
+
('measure', 'milligram|mgs|mg'),
|
|
1217
|
+
('measure', 'pound|lb|lbs|pounds'),
|
|
1218
|
+
('measure', 'ounce|oz'),
|
|
1219
|
+
('measure', 'liter|l|litre|liters|litres'),
|
|
1220
|
+
('measure', 'milliliter|ml|millilitre'),
|
|
1221
|
+
('measure', 'cubic meter|m^3|m³|cubic metre'),
|
|
1222
|
+
('measure', 'cubic centimeter|cm^3|cm³|cubic centimetre'),
|
|
1223
|
+
('measure', 'gallon|gal|gallons'),
|
|
1224
|
+
('measure', 'quart|qt'),
|
|
1225
|
+
('measure', 'pint|pt'),
|
|
1226
|
+
('measure', 'cup|cups'),
|
|
1227
|
+
('measure', 'percent|pct|%'),
|
|
1228
|
+
('med_dose', 'fluid ounce|fl oz|fluid ounces'),
|
|
1229
|
+
('med_dose', 'tablet|tab|tabs'),
|
|
1230
|
+
('med_dose', 'capsule|cap|caps'),
|
|
1231
|
+
('med_dose', 'once daily|daily|qd'),
|
|
1232
|
+
('med_dose', 'twice daily|bid'),
|
|
1233
|
+
('med_dose', 'three times daily|tid'),
|
|
1234
|
+
('med_dose', 'four times daily|qid'),
|
|
1235
|
+
('med_dose', 'as needed|prn'),
|
|
1236
|
+
('med_dose', 'before meals|ac'),
|
|
1237
|
+
('med_dose', 'after meals|pc'),
|
|
1238
|
+
('med_dose', 'at bedtime|hs'),
|
|
1239
|
+
('med_dose', 'intravenous|iv'),
|
|
1240
|
+
('med_dose', 'subcutaneous|sc|sq'),
|
|
1241
|
+
('med_dose', 'intramuscular|im'),
|
|
1242
|
+
('med_dose', 'oral|po'),
|
|
1243
|
+
('med_dose', 'per rectum|pr'),
|
|
1244
|
+
('med_dose', 'drops|gtt|gtts'),
|
|
1245
|
+
|
|
1246
|
+
('med_tx', 'treatment|trx|tx'),
|
|
1247
|
+
('med_tx', 'new patients|new patient|new pt|nrx'),
|
|
1248
|
+
('med_tx', 'patient|pat|pt|px'),
|
|
1249
|
+
('med_tx', 'prescription|rx'),
|
|
1250
|
+
('med_tx', 'hcp|md|dr'),
|
|
1251
|
+
|
|
1252
|
+
('inv_uom', 'each|ea'),
|
|
1253
|
+
('inv_uom', 'piece|pc|pieces|pcs'),
|
|
1254
|
+
('inv_uom', 'set|sets'),
|
|
1255
|
+
('inv_uom', 'pack|pk|pks'),
|
|
1256
|
+
('inv_uom', 'box|bx|boxes'),
|
|
1257
|
+
('inv_uom', 'case|cases'),
|
|
1258
|
+
('inv_uom', 'bottle|btl|bottles|btls'),
|
|
1259
|
+
('inv_uom', 'dozen|dz'),
|
|
1260
|
+
('inv_uom', 'pair|pr|pairs'),
|
|
1261
|
+
('inv_uom', 'batch|lot|lots'),
|
|
1262
|
+
('inv_uom', 'bundle|bundles'),
|
|
1263
|
+
('inv_uom', 'units|unit|each|ea'),
|
|
1264
|
+
('inv_uom', 'carton|ctn|cartons'),
|
|
1265
|
+
('inv_uom', 'case|cs|ca'),
|
|
1266
|
+
('inv_uom', 'bag|bg|bags'),
|
|
1267
|
+
('status', 'positive|pos|p'),
|
|
1268
|
+
('status', 'negative|neg|n'),
|
|
1269
|
+
('status', 'complete|completed|comp|cmp|c'),
|
|
1270
|
+
('status', 'incomplete|incomp|inc|i'),
|
|
1271
|
+
('status', 'active|act|a'),
|
|
1272
|
+
('status', 'inactive|inact|in|ia|i'),
|
|
1273
|
+
('status', 'enabled|en'),
|
|
1274
|
+
('status', 'disabled|dis'),
|
|
1275
|
+
('status', 'open|opn|o'),
|
|
1276
|
+
('status', 'closed|cls|c'),
|
|
1277
|
+
('status', 'terminated|cancellation|cancelled|cancel|canc|cc'),
|
|
1278
|
+
('status', 'verified|confirmed|conf|cnf|cf'),
|
|
1279
|
+
('status', 'unconfirmed|unconf|ucf'),
|
|
1280
|
+
('status', 'not available|unavailable|n/a|na|unknown|unkn|unk|un'),
|
|
1281
|
+
('status', 'processed|proc|pr'),
|
|
1282
|
+
('status', 'unprocessed|unproc|upr'),
|
|
1283
|
+
('status', 'approved|accepted|accept|appr|ap'),
|
|
1284
|
+
('status', 'unapproved|unappr|uap'),
|
|
1285
|
+
('status', 'rejected|reject|rej|rj|declined'),
|
|
1286
|
+
('status', 'received|recv|rcvd|rcv'),
|
|
1287
|
+
('status', 'on hold|hold|held|paused|pause'),
|
|
1288
|
+
('status', 'not received|nrecv|nrc'),
|
|
1289
|
+
('status', 'shipped|dispatched|despatched|filled|sent|shp|s'),
|
|
1290
|
+
('status', 'not shipped|nshp|ns|unshipped'),
|
|
1291
|
+
('status', 'past due|overdue|late'),
|
|
1292
|
+
('status', 'true|yes|y'),
|
|
1293
|
+
('status', 'true|yes|t'),
|
|
1294
|
+
('status', 'false|no|n'),
|
|
1295
|
+
('status', 'false|no|f'),
|
|
1296
|
+
('status', 'pending|pend|pnd'),
|
|
1297
|
+
('status', 'in process|in progress|active'),
|
|
1298
|
+
('status', 'retain|keep'),
|
|
1299
|
+
('status', 'remove|drop|delete|del'),
|
|
1300
|
+
('status', 'low|lo|l'),
|
|
1301
|
+
('status', 'medium|moderate|med|m'),
|
|
1302
|
+
('status', 'high|hi|h'),
|
|
1303
|
+
('status', 'same|sm'),
|
|
1304
|
+
('status', 'average|mean|avg'),
|
|
1305
|
+
('status', 'decreased|decrease|decr|down|dn'),
|
|
1306
|
+
('status', 'increased|increase|incr|up'),
|
|
1307
|
+
('status', 'qualification|qual|q'),
|
|
1308
|
+
('status', 'qualified|qual|q'),
|
|
1309
|
+
('status', 'failed|fail|f'),
|
|
1310
|
+
('status', 'passed|pass|p|success'),
|
|
1311
|
+
('status', 'deferred|defer|delayed|delay'),
|
|
1312
|
+
('status', 'resolved|fixed|fx'),
|
|
1313
|
+
('crm', 'email|eml|em'),
|
|
1314
|
+
('crm', 'direct mail|mail|dm'),
|
|
1315
|
+
('crm', 'account|acct|act'),
|
|
1316
|
+
('crm', 'clinical|clinic|clin|c'),
|
|
1317
|
+
('crm', 'hospitals|hospital|hosp|hos'),
|
|
1318
|
+
('crm', 'private practice|practice|prac|clinical|clinic'),
|
|
1319
|
+
('crm', 'pharmacy|pharm|phar|rx'),
|
|
1320
|
+
('crm', 'community|comm|com'),
|
|
1321
|
+
('crm', 'academic|educational|ed'),
|
|
1322
|
+
('crm', 'government|govt|gov|gvt|federal|fed'),
|
|
1323
|
+
('demog', 'male|m'),
|
|
1324
|
+
('demog', 'female|f'),
|
|
1325
|
+
('demog', 'single|s'),
|
|
1326
|
+
('demog', 'married|m'),
|
|
1327
|
+
('demog', 'widowed|w'),
|
|
1328
|
+
('demog', 'separated|sep'),
|
|
1329
|
+
('demog', 'divorced|dvcd|div'),
|
|
1330
|
+
('demog', 'partnered|prt'),
|
|
1331
|
+
('demog', 'cohabiting|coh'),
|
|
1332
|
+
('demog', 'engaged|eng'),
|
|
1333
|
+
('demog', 'living|alive|lv'),
|
|
1334
|
+
('demog', 'deceased|dead|dec|dcd'),
|
|
1335
|
+
('demog', 'retired|ret|rt'),
|
|
1336
|
+
('demog', 'employed|emp'),
|
|
1337
|
+
('demog', 'unemployed|unemp'),
|
|
1338
|
+
('demog', 'student|stu|std|st'),
|
|
1339
|
+
('demog', 'child|ch'),
|
|
1340
|
+
('demog', 'adult|ad'),
|
|
1341
|
+
('demog', 'senior|sr'),
|
|
1342
|
+
('demog', 'veteran|vet'),
|
|
1343
|
+
('demog', 'homeowner|homeown'),
|
|
1344
|
+
('demog', 'renter|rnt'),
|
|
1345
|
+
('demog', 'urban|urb'),
|
|
1346
|
+
('demog', 'suburban|sub'),
|
|
1347
|
+
('demog', 'rural|rur'),
|
|
1348
|
+
('demog', 'cellular|cell|mobile|mob'),
|
|
1349
|
+
('chron', 'monday|mon|m'),
|
|
1350
|
+
('chron', 'tuesday|tue|tu'),
|
|
1351
|
+
('chron', 'wednesday|wed|w'),
|
|
1352
|
+
('chron', 'thursday|thu|th'),
|
|
1353
|
+
('chron', 'friday|fri|f'),
|
|
1354
|
+
('chron', 'saturday|sat|sa'),
|
|
1355
|
+
('chron', 'sunday|sun|su'),
|
|
1356
|
+
('chron', 'january|jan|01'),
|
|
1357
|
+
('chron', 'february|feb|02'),
|
|
1358
|
+
('chron', 'march|mar|03'),
|
|
1359
|
+
('chron', 'april|apr|04'),
|
|
1360
|
+
('chron', 'may|05'),
|
|
1361
|
+
('chron', 'june|jun|06'),
|
|
1362
|
+
('chron', 'july|jul|07'),
|
|
1363
|
+
('chron', 'august|aug|08'),
|
|
1364
|
+
('chron', 'september|sept|sep|09'),
|
|
1365
|
+
('chron', 'october|oct|10'),
|
|
1366
|
+
('chron', 'november|nov|11'),
|
|
1367
|
+
('chron', 'december|dec|12'),
|
|
1368
|
+
('chron', 'week-ending|week|wk|w'),
|
|
1369
|
+
('chron', 'month-ending|month-end|month|mo|m'),
|
|
1370
|
+
('chron', 'quarter|quarter-ending|quarter-end|qtr|q'),
|
|
1371
|
+
('chron', 'year|yr|fy|y'),
|
|
1372
|
+
('chron', 'year-to-date|ytd'),
|
|
1373
|
+
('currency', 'us dollars|dollars|usd|us|$'),
|
|
1374
|
+
('currency', 'euro|eur|€'),
|
|
1375
|
+
('currency', 'pound|pounds|gbp|£'),
|
|
1376
|
+
('currency', 'yen|jpy|¥'),
|
|
1377
|
+
('currency', 'yuan|cny|元'),
|
|
1378
|
+
('country', 'united states of america|united states|u.s.a.|u.s.|usa|us'),
|
|
1379
|
+
('country', 'united kingdom|great britain|england|britain|uk|gb|gbr'),
|
|
1380
|
+
('country', 'canada|ca|can'),
|
|
1381
|
+
('country', 'mexico|méxico|mx'),
|
|
1382
|
+
('country', 'australia|au|aus'),
|
|
1383
|
+
('country', 'germany|de|deu'),
|
|
1384
|
+
('country', 'france|fr|fra'),
|
|
1385
|
+
('country', 'italy|it|ita'),
|
|
1386
|
+
('country', 'japan|jp|jpn'),
|
|
1387
|
+
('country', 'china|cn|chn'),
|
|
1388
|
+
('country', 'india|in|ind'),
|
|
1389
|
+
('hr','full-time|ft'),
|
|
1390
|
+
('hr','part-time|pt'),
|
|
1391
|
+
('hr','contract|contractor'),
|
|
1392
|
+
('hr','temporary|temp|tmp'),
|
|
1393
|
+
('hr','intern|internship'),
|
|
1394
|
+
('hr','permanent|perm'),
|
|
1395
|
+
('hr','non-binary|nb'),
|
|
1396
|
+
('hr','active|employed|working'),
|
|
1397
|
+
('hr','inactive|unemployed|not working'),
|
|
1398
|
+
('hr','leave of absence|leave|loa'),
|
|
1399
|
+
('hr','maternity leave|mat leave|mat'),
|
|
1400
|
+
('hr','paternity leave|pat leave|pat'),
|
|
1401
|
+
('hr','sick leave|sick|illness'),
|
|
1402
|
+
('hr','vacation|vac|pto'),
|
|
1403
|
+
('hr','remote|work from home|home|wfh'),
|
|
1404
|
+
('hr','on-site|office based|wfo'),
|
|
1405
|
+
('hr','resigned|quit|left'),
|
|
1406
|
+
('hr','terminated|fired'),
|
|
1407
|
+
('hr','promotion|promoted'),
|
|
1408
|
+
('hr','transfer|transferred|xfer'),
|
|
1409
|
+
('hr','performance review|perf review|pr'),
|
|
1410
|
+
('hr','training|education|ed'),
|
|
1411
|
+
('hr','salary increase|increase|raise'),
|
|
1412
|
+
('hr','bonus|bon|incentive|incent'),
|
|
1413
|
+
('hr','employee referral|referral'),
|
|
1414
|
+
('hr','exit interview|exit'),
|
|
1415
|
+
('office','corporate|corp|co'),
|
|
1416
|
+
('office','headquarters|hq|head office'),
|
|
1417
|
+
('office','branch office|branch|local office'),
|
|
1418
|
+
('office','regional office|region office|regional hub|regionalregion'),
|
|
1419
|
+
('office','sales office|sales|field office'),
|
|
1420
|
+
('office','distribution center|distribution hub|distribution|dist'),
|
|
1421
|
+
('office','manufacturing plant|factory|manufacturing facility|manufacturing|mfg'),
|
|
1422
|
+
('office','research and development|r&d|innovation center'),
|
|
1423
|
+
('office','customer service center|customer support center|service center'),
|
|
1424
|
+
('office','logistics center|logistics|shipping center|shipping'),
|
|
1425
|
+
('office','data center|data hub|it center'),
|
|
1426
|
+
('office','administrative office|admin office|administration|admin'),
|
|
1427
|
+
('office','call center|contact center|customer call center'),
|
|
1428
|
+
('office','warehouse|storage facility|storage|fulfillment center|fulfillment'),
|
|
1429
|
+
('office','retail store|store|retail outlet|retail'),
|
|
1430
|
+
('office','outlet store|outlet|clearance center|clearance'),
|
|
1431
|
+
('office','training center|training facility|learning center'),
|
|
1432
|
+
('office','legal office|legal department|legal|compliance office|compliance'),
|
|
1433
|
+
('office','finance department|finance office|accounting|finance'),
|
|
1434
|
+
('office','human resources|hr'),
|
|
1435
|
+
('office','marketing department|marketing|mktg|sales and marketing|s&m'),
|
|
1436
|
+
('office','operations center|operations|ops center|ops'),
|
|
1437
|
+
('office','executive office|executive suite|c-suite'),
|
|
1438
|
+
('finance','fiscal year|fy'),
|
|
1439
|
+
('finance','forecast|fcast|fore|for'),
|
|
1440
|
+
('finance','actuals|actual|act'),
|
|
1441
|
+
('finance','estimated|estimates|estimate|est'),
|
|
1442
|
+
('finance','credit|cred|cr|c'),
|
|
1443
|
+
('finance','debit|deb|db|d'),
|
|
1444
|
+
('cust_service','new inquiry|ni|new question|nq|new ticket'),
|
|
1445
|
+
('cust_service','open ticket|ot|unresolved|ur'),
|
|
1446
|
+
('cust_service','pending review|pr|awaiting response|ar'),
|
|
1447
|
+
('cust_service','resolved|res|closed|cl'),
|
|
1448
|
+
('cust_service','escalated|esc|high priority|hp'),
|
|
1449
|
+
('cust_service','customer feedback|cf|feedback|fb'),
|
|
1450
|
+
('cust_service','complaint|comp|issue reported|ir'),
|
|
1451
|
+
('cust_service','refund request|rr|refund required|rf'),
|
|
1452
|
+
('cust_service','exchange request|er|exchange required|exr'),
|
|
1453
|
+
('cust_service','return initiated|ri|return started|rs'),
|
|
1454
|
+
('cust_service','follow-up required|follow-up needed|follow-up|follow up|f/u|fu'),
|
|
1455
|
+
('cust_service','customer satisfaction|csat|satisfaction level|sl'),
|
|
1456
|
+
('cust_service','service level agreement|sla|sla compliance|slac'),
|
|
1457
|
+
('cust_service','first response time|frt|initial response time|irt'),
|
|
1458
|
+
('cust_service','average handle time|aht|average resolution time|art'),
|
|
1459
|
+
('cust_service','customer retention|cr|retention rate|rr'),
|
|
1460
|
+
('cust_service','customer churn|churn rate|churn|ch'),
|
|
1461
|
+
('cust_service','net promoter score|nps|promoter score|ps'),
|
|
1462
|
+
('cust_service','case reopened|cr|reopened ticket|rt'),
|
|
1463
|
+
('cust_service','technical support|ts|tech help|th'),
|
|
1464
|
+
('cust_service','billing inquiry|bi|billing question|bq'),
|
|
1465
|
+
('cust_service','product inquiry|pi|product question|pq'),
|
|
1466
|
+
('cust_service','service feedback|sf|service review|sr'),
|
|
1467
|
+
('cust_service','live chat|instant messaging|im'),
|
|
1468
|
+
('cust_service','imessage|text message|text|sms'),
|
|
1469
|
+
('cust_service','email support|es|email inquiry|ei'),
|
|
1470
|
+
('cust_service','phone support|ps|call center|cc'),
|
|
1471
|
+
('cust_service','social media support|sms|social inquiry|social|si'),
|
|
1472
|
+
('cust_service','self-service|help center|ss'),
|
|
1473
|
+
('cust_service','knowledge base|kb|faq|frequently asked questions'),
|
|
1474
|
+
('cust_service','ticket closed without action|tcwa|closed no action|cna'),
|
|
1475
|
+
('pharma','phase 1|phase i'),
|
|
1476
|
+
('pharma','phase 2|phase ii'),
|
|
1477
|
+
('pharma','phase 3|phase iii|late phase'),
|
|
1478
|
+
('pharma','phase 4|phase iv|post-marketing surveillance'),
|
|
1479
|
+
('pharma','clinical trial|clinical study|research|trial'),
|
|
1480
|
+
('pharma','preclinical|pre-clinical|non-clinical studies'),
|
|
1481
|
+
('pharma','in vitro studies|ivs|laboratory studies|lab studies'),
|
|
1482
|
+
('pharma','in vivo studies|animal studies|animal model studies'),
|
|
1483
|
+
('pharma','regulatory submission|rs|submission to regulatory'),
|
|
1484
|
+
('pharma','regulatory approval|ra|approved by regulatory|approval|approved'),
|
|
1485
|
+
('pharma','marketed|commercialized|launched'),
|
|
1486
|
+
('pharma','under review|review by regulatory'),
|
|
1487
|
+
('pharma','rejected by regulatory|regulatory rejection|rejected'),
|
|
1488
|
+
('pharma','drug discovery|dd|early research'),
|
|
1489
|
+
('pharma','formulation development|fd|drug formulation'),
|
|
1490
|
+
('pharma','toxicology studies|tox studies|toxicological assessment|toxo'),
|
|
1491
|
+
('pharma','bioavailability study|ba study|absorption study'),
|
|
1492
|
+
('pharma','bioequivalence study|be study|equivalence study'),
|
|
1493
|
+
('pharma','pharmacokinetics|pk|pharmacokinetic studies'),
|
|
1494
|
+
('pharma','pharmacodynamics|pd|pharmacodynamic studies'),
|
|
1495
|
+
('pharma','clinical development plan|cdp|development strategy'),
|
|
1496
|
+
('pharma','investigational new drug|ind|ind application'),
|
|
1497
|
+
('pharma','new drug application|nda|drug registration'),
|
|
1498
|
+
('pharma','biologics license application|bla|biologics application'),
|
|
1499
|
+
('pharma','orphan drug designation|odd|orphan status|orphan drug'),
|
|
1500
|
+
('pharma','breakthrough therapy designation|btd|expedited development'),
|
|
1501
|
+
('pharma','fast track designation|ftd|accelerated review'),
|
|
1502
|
+
('pharma','priority review|pr|priority assessment'),
|
|
1503
|
+
('pharma','tentative approval|ta|conditional approval'),
|
|
1504
|
+
('pharma','off-label use|off-label|olu|unapproved use|unapproved');
|
|
1505
|
+
|
|
1506
|
+
-- Replace constraints
|
|
1507
|
+
ALTER TABLE test_templates
|
|
1508
|
+
ADD CONSTRAINT test_templates_test_types_test_type_fk
|
|
1509
|
+
FOREIGN KEY (test_type) REFERENCES test_types;
|
|
1510
|
+
|
|
1511
|
+
ALTER TABLE test_results
|
|
1512
|
+
ADD CONSTRAINT test_results_test_types_test_type_fk
|
|
1513
|
+
FOREIGN KEY (test_type) REFERENCES test_types;
|
|
1514
|
+
|
|
1515
|
+
ALTER TABLE cat_test_conditions
|
|
1516
|
+
ADD CONSTRAINT cat_test_conditions_cat_tests_test_type_fk
|
|
1517
|
+
FOREIGN KEY (test_type) REFERENCES test_types;
|