dataops-testgen 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataops_testgen-2.2.0.dist-info/LICENSE +203 -0
- dataops_testgen-2.2.0.dist-info/METADATA +287 -0
- dataops_testgen-2.2.0.dist-info/NOTICE +5 -0
- dataops_testgen-2.2.0.dist-info/RECORD +270 -0
- dataops_testgen-2.2.0.dist-info/WHEEL +5 -0
- dataops_testgen-2.2.0.dist-info/entry_points.txt +2 -0
- dataops_testgen-2.2.0.dist-info/top_level.txt +1 -0
- testgen/__init__.py +0 -0
- testgen/__main__.py +770 -0
- testgen/commands/__init__.py +0 -0
- testgen/commands/queries/__init__.py +0 -0
- testgen/commands/queries/execute_cat_tests_query.py +95 -0
- testgen/commands/queries/execute_tests_query.py +160 -0
- testgen/commands/queries/generate_tests_query.py +94 -0
- testgen/commands/queries/profiling_query.py +366 -0
- testgen/commands/queries/test_parameter_validation_query.py +88 -0
- testgen/commands/run_execute_cat_tests.py +162 -0
- testgen/commands/run_execute_tests.py +168 -0
- testgen/commands/run_generate_tests.py +107 -0
- testgen/commands/run_get_entities.py +122 -0
- testgen/commands/run_launch_db_config.py +84 -0
- testgen/commands/run_observability_exporter.py +330 -0
- testgen/commands/run_profiling_bridge.py +495 -0
- testgen/commands/run_quick_start.py +168 -0
- testgen/commands/run_setup_profiling_tools.py +96 -0
- testgen/commands/run_test_definition.py +146 -0
- testgen/commands/run_test_parameter_validation.py +135 -0
- testgen/commands/run_upgrade_db_config.py +156 -0
- testgen/common/__init__.py +8 -0
- testgen/common/clean_sql.py +53 -0
- testgen/common/credentials.py +25 -0
- testgen/common/database/__init__.py +0 -0
- testgen/common/database/database_service.py +629 -0
- testgen/common/database/flavor/__init__.py +0 -0
- testgen/common/database/flavor/flavor_service.py +75 -0
- testgen/common/database/flavor/mssql_flavor_service.py +34 -0
- testgen/common/database/flavor/postgresql_flavor_service.py +5 -0
- testgen/common/database/flavor/redshift_flavor_service.py +22 -0
- testgen/common/database/flavor/snowflake_flavor_service.py +69 -0
- testgen/common/database/flavor/trino_flavor_service.py +21 -0
- testgen/common/date_service.py +68 -0
- testgen/common/display_service.py +85 -0
- testgen/common/docker_service.py +76 -0
- testgen/common/encrypt.py +55 -0
- testgen/common/get_pipeline_parms.py +57 -0
- testgen/common/logs.py +79 -0
- testgen/common/process_service.py +62 -0
- testgen/common/read_file.py +69 -0
- testgen/settings.py +440 -0
- testgen/template/dbsetup/010_create_base_schema.sql +2 -0
- testgen/template/dbsetup/020_create_standard_functions_sprocs.sql +179 -0
- testgen/template/dbsetup/030_initialize_new_schema_structure.sql +735 -0
- testgen/template/dbsetup/040_populate_new_schema_project.sql +59 -0
- testgen/template/dbsetup/050_populate_new_schema_metadata.sql +1517 -0
- testgen/template/dbsetup/060_create_standard_views.sql +248 -0
- testgen/template/dbsetup/070_create_default_users.sql +17 -0
- testgen/template/dbsetup/075_grant_role_rights.sql +43 -0
- testgen/template/dbsetup/080_set_current_revision.sql +5 -0
- testgen/template/dbupgrade/0100_incremental_upgrade.sql +5 -0
- testgen/template/dbupgrade/0101_incremental_upgrade.sql +15 -0
- testgen/template/dbupgrade/0102_incremental_upgrade.sql +4 -0
- testgen/template/dbupgrade/0103_incremental_upgrade.sql +22 -0
- testgen/template/dbupgrade/0104_incremental_upgrade.sql +44 -0
- testgen/template/dbupgrade/0105_incremental_upgrade.sql +1 -0
- testgen/template/dbupgrade/0106_incremental_upgrade.sql +5 -0
- testgen/template/dbupgrade/0107_incremental_upgrade.sql +3 -0
- testgen/template/dbupgrade_helpers/get_tg_revision.sql +2 -0
- testgen/template/exec_cat_tests/ex_cat_build_agg_table_tests.sql +116 -0
- testgen/template/exec_cat_tests/ex_cat_get_distinct_tables.sql +11 -0
- testgen/template/exec_cat_tests/ex_cat_results_parse.sql +69 -0
- testgen/template/exec_cat_tests/ex_cat_retrieve_agg_test_parms.sql +6 -0
- testgen/template/exec_cat_tests/ex_cat_test_query.sql +8 -0
- testgen/template/execution/ex_finalize_test_run_results.sql +37 -0
- testgen/template/execution/ex_get_tests_non_cat.sql +47 -0
- testgen/template/execution/ex_update_test_record_in_testrun_table.sql +27 -0
- testgen/template/execution/ex_write_test_record_to_testrun_table.sql +6 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_no_drops_generic.sql +48 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_num_incr_generic.sql +34 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_above_generic.sql +49 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_within_generic.sql +49 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql +49 -0
- testgen/template/flavors/generic/exec_query_tests/ex_custom_query_generic.sql +39 -0
- testgen/template/flavors/generic/exec_query_tests/ex_data_match_2way_generic.sql +58 -0
- testgen/template/flavors/generic/exec_query_tests/ex_data_match_generic.sql +44 -0
- testgen/template/flavors/generic/exec_query_tests/ex_prior_match_generic.sql +37 -0
- testgen/template/flavors/generic/exec_query_tests/ex_relative_entropy_generic.sql +53 -0
- testgen/template/flavors/generic/exec_query_tests/ex_window_match_no_drops_generic.sql +46 -0
- testgen/template/flavors/generic/exec_query_tests/ex_window_match_same_generic.sql +59 -0
- testgen/template/flavors/generic/profiling/contingency_counts.sql +3 -0
- testgen/template/flavors/generic/validate_tests/ex_get_project_column_list_generic.sql +3 -0
- testgen/template/flavors/mssql/exec_query_tests/ex_relative_entropy_mssql.sql +53 -0
- testgen/template/flavors/mssql/profiling/project_ddf_query_mssql.sql +35 -0
- testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml +246 -0
- testgen/template/flavors/mssql/profiling/project_secondary_profiling_query_mssql.sql +36 -0
- testgen/template/flavors/mssql/setup_profiling_tools/00_drop_existing_functions_mssql.sql +8 -0
- testgen/template/flavors/mssql/setup_profiling_tools/01_create_functions_mssql.sql +12 -0
- testgen/template/flavors/mssql/setup_profiling_tools/02_create_functions_mssql.sql +54 -0
- testgen/template/flavors/mssql/setup_profiling_tools/create_qc_schema_mssql.sql +4 -0
- testgen/template/flavors/mssql/setup_profiling_tools/grant_execute_privileges_mssql.sql +1 -0
- testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_no_drops_postgresql.sql +46 -0
- testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_same_postgresql.sql +59 -0
- testgen/template/flavors/postgresql/profiling/project_ddf_query_postgresql.sql +42 -0
- testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml +225 -0
- testgen/template/flavors/postgresql/profiling/project_secondary_profiling_query_postgresql.sql +28 -0
- testgen/template/flavors/postgresql/setup_profiling_tools/create_functions_postgresql.sql +157 -0
- testgen/template/flavors/postgresql/setup_profiling_tools/create_qc_schema_postgresql.sql +1 -0
- testgen/template/flavors/postgresql/setup_profiling_tools/grant_execute_privileges_postgresql.sql +2 -0
- testgen/template/flavors/redshift/profiling/project_ddf_query_redshift.sql +38 -0
- testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml +221 -0
- testgen/template/flavors/redshift/profiling/project_secondary_profiling_query_redshift.sql +29 -0
- testgen/template/flavors/redshift/setup_profiling_tools/create_functions_redshift.sql +115 -0
- testgen/template/flavors/redshift/setup_profiling_tools/create_qc_schema_redshift.sql +1 -0
- testgen/template/flavors/redshift/setup_profiling_tools/grant_execute_privileges_redshift.sql +2 -0
- testgen/template/flavors/snowflake/profiling/project_ddf_query_snowflake.sql +38 -0
- testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml +220 -0
- testgen/template/flavors/snowflake/profiling/project_secondary_profiling_query_snowflake.sql +29 -0
- testgen/template/flavors/snowflake/setup_profiling_tools/create_functions_snowflake.sql +69 -0
- testgen/template/flavors/snowflake/setup_profiling_tools/create_qc_schema_snowflake.sql +1 -0
- testgen/template/flavors/snowflake/setup_profiling_tools/grant_execute_privileges_snowflake.sql +6 -0
- testgen/template/flavors/trino/profiling/project_profiling_query_trino.yaml +219 -0
- testgen/template/flavors/trino/setup_profiling_tools/create_functions_trino.sql +92 -0
- testgen/template/flavors/trino/setup_profiling_tools/create_qc_schema_trino.sql +1 -0
- testgen/template/gen_funny_cat_tests/gen_test_constant.sql +104 -0
- testgen/template/gen_funny_cat_tests/gen_test_distinct_value_ct.sql +98 -0
- testgen/template/gen_funny_cat_tests/gen_test_row_ct.sql +57 -0
- testgen/template/gen_funny_cat_tests/gen_test_row_ct_pct.sql +59 -0
- testgen/template/generation/gen_delete_old_tests.sql +5 -0
- testgen/template/generation/gen_insert_test_suite.sql +5 -0
- testgen/template/generation/gen_retrieve_or_insert_test_suite.sql +58 -0
- testgen/template/generation/gen_standard_test_type_list.sql +13 -0
- testgen/template/generation/gen_standard_tests.sql +48 -0
- testgen/template/get_entities/get_connection.sql +21 -0
- testgen/template/get_entities/get_connections_list.sql +9 -0
- testgen/template/get_entities/get_latest.sql +4 -0
- testgen/template/get_entities/get_profile.sql +12 -0
- testgen/template/get_entities/get_profile_info.sql +17 -0
- testgen/template/get_entities/get_profile_list.sql +17 -0
- testgen/template/get_entities/get_profile_screen.sql +275 -0
- testgen/template/get_entities/get_project_list.sql +6 -0
- testgen/template/get_entities/get_table_group_list.sql +10 -0
- testgen/template/get_entities/get_test_generation_list.sql +18 -0
- testgen/template/get_entities/get_test_info.sql +41 -0
- testgen/template/get_entities/get_test_results_for_run_cli.sql +16 -0
- testgen/template/get_entities/get_test_run_list.sql +24 -0
- testgen/template/get_entities/get_test_suite.sql +13 -0
- testgen/template/get_entities/get_test_suite_list.sql +18 -0
- testgen/template/get_entities/list_test_types.sql +4 -0
- testgen/template/observability/get_event_data.sql +23 -0
- testgen/template/observability/get_test_results.sql +41 -0
- testgen/template/observability/update_test_results_exported_to_observability.sql +12 -0
- testgen/template/parms/parms_profiling.sql +34 -0
- testgen/template/parms/parms_test_execution.sql +13 -0
- testgen/template/parms/parms_test_gen.sql +23 -0
- testgen/template/profiling/contingency_columns.sql +7 -0
- testgen/template/profiling/datatype_suggestions.sql +56 -0
- testgen/template/profiling/functional_datatype.sql +523 -0
- testgen/template/profiling/functional_tabletype_stage.sql +48 -0
- testgen/template/profiling/functional_tabletype_update.sql +8 -0
- testgen/template/profiling/pii_flag.sql +133 -0
- testgen/template/profiling/profile_anomalies_screen_column.sql +22 -0
- testgen/template/profiling/profile_anomalies_screen_multi_column.sql +58 -0
- testgen/template/profiling/profile_anomalies_screen_table.sql +22 -0
- testgen/template/profiling/profile_anomalies_screen_table_dates.sql +30 -0
- testgen/template/profiling/profile_anomalies_screen_variants.sql +40 -0
- testgen/template/profiling/profile_anomaly_types_get.sql +3 -0
- testgen/template/profiling/project_get_table_sample_count.sql +22 -0
- testgen/template/profiling/project_profile_run_record_insert.sql +8 -0
- testgen/template/profiling/project_profile_run_record_update.sql +5 -0
- testgen/template/profiling/project_profile_run_record_update_status.sql +5 -0
- testgen/template/profiling/project_update_profile_results_to_estimates.sql +32 -0
- testgen/template/profiling/refresh_anomalies.sql +33 -0
- testgen/template/profiling/refresh_data_chars_from_profiling.sql +156 -0
- testgen/template/profiling/secondary_profiling_columns.sql +12 -0
- testgen/template/profiling/secondary_profiling_delete.sql +4 -0
- testgen/template/profiling/secondary_profiling_update.sql +18 -0
- testgen/template/quick_start/populate_target_data.sql +1077 -0
- testgen/template/quick_start/recreate_target_data_schema.sql +167 -0
- testgen/template/quick_start/update_target_data.sql +100 -0
- testgen/template/updates/create_tmp_test_definition.sql +19 -0
- testgen/template/updates/get_test_def_parms.sql +38 -0
- testgen/template/updates/populate_stg_test_definitions.sql +184 -0
- testgen/template/validate_tests/ex_disable_tests_test_definitions.sql +5 -0
- testgen/template/validate_tests/ex_flag_tests_test_definitions.sql +64 -0
- testgen/template/validate_tests/ex_get_project_column_list_generic.sql +3 -0
- testgen/template/validate_tests/ex_get_test_column_list_tg.sql +65 -0
- testgen/template/validate_tests/ex_write_test_val_errors.sql +22 -0
- testgen/ui/__init__.py +0 -0
- testgen/ui/app.py +98 -0
- testgen/ui/assets/dk_logo.svg +46 -0
- testgen/ui/assets/question_mark.png +0 -0
- testgen/ui/assets/scripts.js +68 -0
- testgen/ui/assets/style.css +140 -0
- testgen/ui/bootstrap.py +109 -0
- testgen/ui/components/__init__.py +0 -0
- testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fBBc4.woff2 +0 -0
- testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fChc4EsA.woff2 +0 -0
- testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu4mxK.woff2 +0 -0
- testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu7GxKOzY.woff2 +0 -0
- testgen/ui/components/frontend/css/material-symbols-rounded.css +24 -0
- testgen/ui/components/frontend/css/material-symbols-rounded.woff2 +0 -0
- testgen/ui/components/frontend/css/roboto-font-faces.css +35 -0
- testgen/ui/components/frontend/css/shared.css +36 -0
- testgen/ui/components/frontend/img/dk_logo.svg +46 -0
- testgen/ui/components/frontend/index.html +17 -0
- testgen/ui/components/frontend/js/components/breadcrumbs.js +86 -0
- testgen/ui/components/frontend/js/components/button.js +66 -0
- testgen/ui/components/frontend/js/components/location.js +62 -0
- testgen/ui/components/frontend/js/components/select.js +75 -0
- testgen/ui/components/frontend/js/components/sidebar.js +358 -0
- testgen/ui/components/frontend/js/main.js +99 -0
- testgen/ui/components/frontend/js/streamlit.js +19 -0
- testgen/ui/components/frontend/js/van.min.js +1 -0
- testgen/ui/components/utils/__init__.py +0 -0
- testgen/ui/components/utils/callbacks.py +51 -0
- testgen/ui/components/utils/component.py +13 -0
- testgen/ui/components/widgets/__init__.py +6 -0
- testgen/ui/components/widgets/breadcrumbs.py +32 -0
- testgen/ui/components/widgets/location.py +65 -0
- testgen/ui/components/widgets/modal.py +97 -0
- testgen/ui/components/widgets/sidebar.py +69 -0
- testgen/ui/navigation/__init__.py +0 -0
- testgen/ui/navigation/menu.py +42 -0
- testgen/ui/navigation/page.py +20 -0
- testgen/ui/navigation/router.py +63 -0
- testgen/ui/queries/__init__.py +0 -0
- testgen/ui/queries/authentication_queries.py +47 -0
- testgen/ui/queries/connection_queries.py +121 -0
- testgen/ui/queries/profiling_queries.py +148 -0
- testgen/ui/queries/project_queries.py +9 -0
- testgen/ui/queries/table_group_queries.py +186 -0
- testgen/ui/queries/test_definition_queries.py +270 -0
- testgen/ui/queries/test_run_queries.py +32 -0
- testgen/ui/queries/test_suite_queries.py +145 -0
- testgen/ui/scripts/__init__.py +0 -0
- testgen/ui/scripts/patch_streamlit.py +111 -0
- testgen/ui/services/__init__.py +0 -0
- testgen/ui/services/authentication_service.py +119 -0
- testgen/ui/services/connection_service.py +220 -0
- testgen/ui/services/database_service.py +282 -0
- testgen/ui/services/form_service.py +1008 -0
- testgen/ui/services/javascript_service.py +44 -0
- testgen/ui/services/query_service.py +316 -0
- testgen/ui/services/string_service.py +12 -0
- testgen/ui/services/table_group_service.py +130 -0
- testgen/ui/services/test_definition_service.py +117 -0
- testgen/ui/services/test_run_service.py +13 -0
- testgen/ui/services/test_suite_service.py +76 -0
- testgen/ui/services/toolbar_service.py +77 -0
- testgen/ui/session.py +46 -0
- testgen/ui/views/__init__.py +0 -0
- testgen/ui/views/app_log_modal.py +92 -0
- testgen/ui/views/connections.py +72 -0
- testgen/ui/views/connections_base.py +367 -0
- testgen/ui/views/login.py +40 -0
- testgen/ui/views/not_found.py +16 -0
- testgen/ui/views/overview.py +34 -0
- testgen/ui/views/profiling_anomalies.py +501 -0
- testgen/ui/views/profiling_details.py +335 -0
- testgen/ui/views/profiling_modal.py +40 -0
- testgen/ui/views/profiling_results.py +206 -0
- testgen/ui/views/profiling_summary.py +177 -0
- testgen/ui/views/project_settings.py +74 -0
- testgen/ui/views/table_groups.py +530 -0
- testgen/ui/views/test_definitions.py +1020 -0
- testgen/ui/views/test_results.py +908 -0
- testgen/ui/views/test_runs.py +195 -0
- testgen/ui/views/test_suites.py +545 -0
- testgen/utils/__init__.py +0 -0
- testgen/utils/plugins.py +17 -0
- testgen/utils/singleton.py +14 -0
|
@@ -0,0 +1,523 @@
|
|
|
1
|
+
-- First Clear --
|
|
2
|
+
UPDATE profile_results
|
|
3
|
+
SET functional_data_type = NULL,
|
|
4
|
+
functional_table_type = NULL
|
|
5
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}';
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
-- 1. Assign CONSTANT and TBD - this is the first step of elimination
|
|
9
|
+
/*
|
|
10
|
+
TBD - If record_ct in a table is zero. If we have less than 5 records or all records are blanks
|
|
11
|
+
Constant - If the distinct_value_ct is 1 and more than 75% of the records are filled
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
UPDATE profile_results
|
|
15
|
+
SET functional_data_type =
|
|
16
|
+
CASE WHEN record_ct = 0 then 'TBD (Not enough data)'
|
|
17
|
+
WHEN record_ct > 0 and ((value_ct < 5 OR zero_length_ct / nullif(value_ct, 0)::FLOAT = 1))
|
|
18
|
+
THEN 'TBD (Not enough data)'
|
|
19
|
+
ELSE functional_data_type
|
|
20
|
+
END
|
|
21
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}';
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
UPDATE profile_results
|
|
25
|
+
SET functional_data_type =
|
|
26
|
+
CASE WHEN distinct_value_ct = 1
|
|
27
|
+
AND (((value_ct :: float - coalesce(filled_value_ct, 0::bigint) :: float)/record_ct :: float) :: float *100.00 ) > 75
|
|
28
|
+
-- this tells us how much actual values we have filled in; threshold -> if there is only 1 value and it's 75% of the records -> then it's a constant
|
|
29
|
+
THEN 'Constant'
|
|
30
|
+
ELSE functional_data_type END
|
|
31
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
32
|
+
AND functional_data_type IS NULL;
|
|
33
|
+
|
|
34
|
+
-- 1A. Assign ID's based on masks
|
|
35
|
+
UPDATE profile_results
|
|
36
|
+
SET functional_data_type = 'ID-SK'
|
|
37
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
38
|
+
AND functional_data_type IS NULL
|
|
39
|
+
AND column_name ILIKE '{PROFILE_SK_COLUMN_MASK}';
|
|
40
|
+
|
|
41
|
+
UPDATE profile_results
|
|
42
|
+
SET functional_data_type = 'ID'
|
|
43
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
44
|
+
AND functional_data_type IS NULL
|
|
45
|
+
AND column_name ILIKE '{PROFILE_ID_COLUMN_MASK}';
|
|
46
|
+
|
|
47
|
+
-- 2. Assign DATE
|
|
48
|
+
/*
|
|
49
|
+
. Historical Date - If more than 95% of records have 1 year ago date value
|
|
50
|
+
. Future Date - If more than 95% of records have a future date value
|
|
51
|
+
. Schedule Date - If more than 50% of records have a future date present
|
|
52
|
+
. If we have 10-90% of records from (before 1 year ago and within a year and for future 1 year)
|
|
53
|
+
then, classify further as the following :-
|
|
54
|
+
Transactional Date - If the data has a record for everyday or at least twice a week
|
|
55
|
+
or we have at least 28 days of data in current year
|
|
56
|
+
or we have at least 28 days of data in last 5 years years
|
|
57
|
+
Transactional Date (Wk) - If the data available is for every week of the year or at least twice a month
|
|
58
|
+
or 2 weeks a month from the last 5 years
|
|
59
|
+
Transactional Date (Mo) - If the data available is for every month of the year or at least 5 months
|
|
60
|
+
or 5 month a year from the last 5 years
|
|
61
|
+
Transactional Date (Qtr) - If the data available is for every quarter of the year
|
|
62
|
+
Date (TBD) - If none of the above are satisfied
|
|
63
|
+
. Check varchar attributes (or attributes not give date datatype)
|
|
64
|
+
Look at min_length and max_length to determine if a field is date or timestamp
|
|
65
|
+
|
|
66
|
+
*/
|
|
67
|
+
|
|
68
|
+
UPDATE profile_results
|
|
69
|
+
SET functional_data_type =
|
|
70
|
+
CASE
|
|
71
|
+
WHEN before_20yr_date_ct / NULLIF(value_ct::FLOAT, 0) * 100 >= 75 THEN 'Historical Date'
|
|
72
|
+
WHEN future_date_ct / NULLIF(value_ct::FLOAT, 0) * 100 >= 95 THEN 'Future Date'
|
|
73
|
+
WHEN future_date_ct / NULLIF(value_ct::FLOAT, 0) * 100 >= 50 THEN 'Schedule Date'
|
|
74
|
+
WHEN before_1yr_date_ct / NULLIF(value_ct::FLOAT, 0) * 100 BETWEEN 10 AND 90
|
|
75
|
+
AND within_1yr_date_ct / NULLIF(value_ct::FLOAT, 0) * 100 BETWEEN 10 AND 90
|
|
76
|
+
AND future_date_ct / NULLIF(value_ct::FLOAT, 0) * 100 BETWEEN 0 AND 10
|
|
77
|
+
THEN
|
|
78
|
+
CASE
|
|
79
|
+
WHEN date_days_present = DATEDIFF('DAY', min_date, max_date) + 1 -- everyday
|
|
80
|
+
OR date_days_present >=
|
|
81
|
+
2 * (DATEDIFF('WEEK', min_date, max_date) + 1) -- 2 days a week based on overall data
|
|
82
|
+
OR ROUND(within_1yr_date_ct::FLOAT / value_ct * distinct_value_ct) /
|
|
83
|
+
LEAST(365, NULLIF(DATEDIFF('DAY', (run_date::DATE - 365):: TIMESTAMP, max_date), 0))::FLOAT * 100 >=
|
|
84
|
+
28 -- current year
|
|
85
|
+
OR ROUND(distinct_value_ct * (1 - before_5yr_date_ct / NULLIF(value_ct::FLOAT, 0))) /
|
|
86
|
+
LEAST(NULLIF(DATEDIFF('DAY', (run_date::DATE - 365 * 5)::TIMESTAMP, max_date) + 1, 0), 365 * 5) * 100 >=
|
|
87
|
+
28 -- last 5 years
|
|
88
|
+
THEN 'Transactional Date'
|
|
89
|
+
WHEN date_weeks_present =
|
|
90
|
+
NULLIF(DATEDIFF('WEEK', min_date, max_date), 0)::FLOAT + 1 -- 1 day a week
|
|
91
|
+
OR
|
|
92
|
+
date_weeks_present >= 2 * (DATEDIFF('MONTH', min_date, max_date) + 1) -- 2 weeks a month
|
|
93
|
+
OR ROUND(distinct_value_ct * (1 - before_5yr_date_ct / NULLIF(value_ct::FLOAT, 0))) >=
|
|
94
|
+
2 *
|
|
95
|
+
(DATEDIFF('MONTH', (run_date::DATE - 365)::TIMESTAMP, max_date) + 1) -- 2 weeks a month from the last 5 years to current
|
|
96
|
+
THEN 'Transactional Date (Wk)'
|
|
97
|
+
WHEN date_months_present =
|
|
98
|
+
NULLIF(DATEDIFF('MONTH', min_date, max_date), 0)::FLOAT + 1 -- every month
|
|
99
|
+
OR
|
|
100
|
+
date_months_present >= 5 * (DATEDIFF('YEAR', min_date, max_date) + 1) -- 5 months a year
|
|
101
|
+
OR ROUND(distinct_value_ct * (1 - before_5yr_date_ct / NULLIF(value_ct::FLOAT, 0))) >=
|
|
102
|
+
5 *
|
|
103
|
+
(DATEDIFF('YEAR', (run_date::DATE - 365*5)::TIMESTAMP, max_date) + 1) -- 5 months a year from the last 5 years to current
|
|
104
|
+
THEN 'Transactional Date (Mo)'
|
|
105
|
+
WHEN distinct_value_ct = DATEDIFF('QUARTER', min_date, max_date) + 1 -- every quarter
|
|
106
|
+
THEN 'Transactional Date (Qtr)'
|
|
107
|
+
ELSE 'Date (TBD)'
|
|
108
|
+
END
|
|
109
|
+
WHEN column_type = 'date'
|
|
110
|
+
THEN 'Date Stamp'
|
|
111
|
+
WHEN column_type = 'timestamp'
|
|
112
|
+
THEN 'DateTime Stamp'
|
|
113
|
+
ELSE functional_data_type
|
|
114
|
+
END
|
|
115
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
116
|
+
AND functional_data_type IS NULL
|
|
117
|
+
AND (general_type = 'D' OR (value_ct = date_ct + zero_length_ct AND value_ct > 0));
|
|
118
|
+
|
|
119
|
+
-- Character Date
|
|
120
|
+
UPDATE profile_results
|
|
121
|
+
SET functional_data_type = 'Date Stamp'
|
|
122
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
123
|
+
AND functional_data_type IS NULL
|
|
124
|
+
AND distinct_pattern_ct = 1
|
|
125
|
+
AND min_text >= '1900' AND max_text <= '2200'
|
|
126
|
+
AND TRIM(SPLIT_PART(top_patterns, '|', 2)) = 'NNNN-NN-NN';
|
|
127
|
+
|
|
128
|
+
-- Character Timestamp
|
|
129
|
+
UPDATE profile_results
|
|
130
|
+
SET functional_data_type = 'DateTime Stamp'
|
|
131
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
132
|
+
AND functional_data_type IS NULL
|
|
133
|
+
AND distinct_pattern_ct = 1
|
|
134
|
+
AND TRIM(SPLIT_PART(top_patterns, '|', 2)) = 'NNNN-NN-NN NN:NN:NN';
|
|
135
|
+
|
|
136
|
+
-- Assign PERIODS: Period Year, Period Qtr, Period Month, Period Week, Period DOW
|
|
137
|
+
UPDATE profile_results
|
|
138
|
+
SET functional_data_type = 'Period Year'
|
|
139
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
140
|
+
AND functional_data_type IS NULL
|
|
141
|
+
AND (column_name ILIKE '%year%' OR column_name ILIKE '%yr%')
|
|
142
|
+
AND ( (min_value >= 1900
|
|
143
|
+
AND max_value <= DATE_PART('YEAR', NOW()) + 20
|
|
144
|
+
AND COALESCE(fractional_sum, 0) = 0)
|
|
145
|
+
OR
|
|
146
|
+
(min_text >= '1900'
|
|
147
|
+
AND max_text <= (DATE_PART('YEAR', NOW()) + 20)::VARCHAR
|
|
148
|
+
AND avg_length = 4
|
|
149
|
+
AND avg_embedded_spaces = 0)
|
|
150
|
+
);
|
|
151
|
+
|
|
152
|
+
UPDATE profile_results
|
|
153
|
+
SET functional_data_type = 'Period Quarter'
|
|
154
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
155
|
+
AND functional_data_type IS NULL
|
|
156
|
+
AND (column_name ILIKE '%qtr%' or column_name ILIKE '%quarter%')
|
|
157
|
+
AND ( (min_value = 1 AND max_value = 4
|
|
158
|
+
AND COALESCE(fractional_sum, 0) = 0)
|
|
159
|
+
OR
|
|
160
|
+
(min_text >= '1900' AND max_text <= '2200'
|
|
161
|
+
AND avg_length BETWEEN 6 and 7
|
|
162
|
+
AND SPLIT_PART(top_patterns, '|', 2) ~ '^\s*NNNN[-_]AN\s*$')
|
|
163
|
+
);
|
|
164
|
+
|
|
165
|
+
UPDATE profile_results
|
|
166
|
+
SET functional_data_type = 'Period Year-Mon'
|
|
167
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
168
|
+
AND functional_data_type IS NULL
|
|
169
|
+
AND column_name ILIKE '%mo%'
|
|
170
|
+
AND min_text >= '1900' AND max_text <= '2200'
|
|
171
|
+
AND (
|
|
172
|
+
(avg_length BETWEEN 6.8 AND 7.2
|
|
173
|
+
AND SPLIT_PART(top_patterns, '|', 2) ~ '^\s*NNNN[-_]NN\s*$')
|
|
174
|
+
OR (avg_length BETWEEN 7.8 AND 8.2
|
|
175
|
+
AND UPPER(SPLIT_PART(top_patterns, '|', 2)) ~ '^\s*NNNN[-_]AAA\s*$')
|
|
176
|
+
);
|
|
177
|
+
|
|
178
|
+
UPDATE profile_results
|
|
179
|
+
SET functional_data_type = 'Period Month'
|
|
180
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
181
|
+
AND functional_data_type IS NULL
|
|
182
|
+
AND column_name ILIKE '%mo%'
|
|
183
|
+
AND (
|
|
184
|
+
(max_length = 2 AND (min_text = '01' OR min_text = '1') AND max_text = '12')
|
|
185
|
+
OR (min_value = 1 AND max_value = 12 AND COALESCE(SIGN(fractional_sum), 0) = 0)
|
|
186
|
+
OR (max_length = 9 AND min_text ILIKE 'April' AND max_text ILIKE 'SEPTEMBER')
|
|
187
|
+
OR (max_length = 3 AND min_text ILIKE 'APR' AND max_text ILIKE 'SEP')
|
|
188
|
+
);
|
|
189
|
+
|
|
190
|
+
UPDATE profile_results
|
|
191
|
+
SET functional_data_type = 'Period Mon-NN'
|
|
192
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
193
|
+
AND functional_data_type IS NULL
|
|
194
|
+
AND min_text ~ '(?i)^(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[\s-]?\d{1,2}$'
|
|
195
|
+
AND max_text ~ '(?i)^(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[\s-]?\d{1,2}$'
|
|
196
|
+
AND avg_length BETWEEN 5.8 AND 6.2
|
|
197
|
+
AND TRIM(fn_parsefreq(top_patterns, 1, 2)) ~ '(?i)AAA[\s-]NN';
|
|
198
|
+
|
|
199
|
+
UPDATE profile_results
|
|
200
|
+
SET functional_data_type = 'Period Week'
|
|
201
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
202
|
+
AND functional_data_type IS NULL
|
|
203
|
+
AND ( column_name ILIKE '%wk%' OR column_name ILIKE '%week%' )
|
|
204
|
+
AND distinct_value_ct BETWEEN 10 AND 53
|
|
205
|
+
AND ( ( min_text IN ('1', '01') AND max_text IN ('52','53') )
|
|
206
|
+
OR ( min_value = 1 AND max_value IN (52, 53) AND COALESCE(SIGN(fractional_sum), 0) = 0 ) );
|
|
207
|
+
|
|
208
|
+
UPDATE profile_results
|
|
209
|
+
SET functional_data_type = 'Period DOW'
|
|
210
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
211
|
+
AND functional_data_type IS NULL
|
|
212
|
+
AND ( column_name ILIKE '%day%' OR column_name ILIKE '%dow%')
|
|
213
|
+
AND distinct_value_ct = 7
|
|
214
|
+
AND ( ( min_text = '1' AND max_text = '7' )
|
|
215
|
+
OR ( min_value = 1 AND max_value = 7 AND COALESCE(SIGN(fractional_sum), 0) = 0)
|
|
216
|
+
OR ( min_text ILIKE 'FRIDAY' AND max_text ILIKE 'WEDNESDAY' AND max_length = 9)
|
|
217
|
+
OR ( min_text ILIKE 'FRI' AND max_text ILIKE 'WED' AND max_length = 3) );
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
-- 3. Assign ADDRESS RELATED FIELDS, PHONE AND EMAIL
|
|
221
|
+
/*
|
|
222
|
+
Zip - Length must be less than or equal to 11. We're also looking at the column name
|
|
223
|
+
Email - Check column name and top patterns. top_patterns must have @ and .
|
|
224
|
+
Phone - Length must be less than or equal to 11. We're also looking at the column name
|
|
225
|
+
Address - Column name check. If the field is populated then it should have at least 4 distinct pattern count
|
|
226
|
+
State - Column name must have 'state' in it. A valid state must have max_length greater than or equal to 2.
|
|
227
|
+
To avoid confusing with a field serving different purpose, we've checking distinct_value_ct.
|
|
228
|
+
Also, a valid state should not have a number in the data.
|
|
229
|
+
|
|
230
|
+
*/
|
|
231
|
+
|
|
232
|
+
UPDATE profile_results
|
|
233
|
+
SET functional_data_type =
|
|
234
|
+
CASE WHEN (std_pattern_match = 'ZIP_USA' AND (column_name ILIKE '%zip%' OR column_name ILIKE '%postal%'))
|
|
235
|
+
THEN 'Zip'
|
|
236
|
+
WHEN std_pattern_match = 'EMAIL'
|
|
237
|
+
THEN 'Email'
|
|
238
|
+
WHEN (column_name ILIKE '%phone%' AND max_length BETWEEN 7 AND 11)
|
|
239
|
+
OR std_pattern_match = 'PHONE_USA'
|
|
240
|
+
THEN 'Phone'
|
|
241
|
+
WHEN (column_name ILIKE '%address' AND column_name NOT ILIKE '%email%')
|
|
242
|
+
OR std_pattern_match = 'STREET_ADDR'
|
|
243
|
+
THEN 'Address'
|
|
244
|
+
WHEN std_pattern_match = 'STATE_USA'
|
|
245
|
+
THEN 'State'
|
|
246
|
+
ELSE functional_data_type
|
|
247
|
+
END
|
|
248
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
249
|
+
AND functional_data_type IS NULL;
|
|
250
|
+
|
|
251
|
+
-- Update City based on position of State and Zip
|
|
252
|
+
UPDATE profile_results
|
|
253
|
+
SET functional_data_type = 'City'
|
|
254
|
+
FROM profile_results c
|
|
255
|
+
INNER JOIN profile_results z
|
|
256
|
+
ON (c.profile_run_id = z.profile_run_id
|
|
257
|
+
AND c.table_name = z.table_name
|
|
258
|
+
AND c.position + 2 = z.position
|
|
259
|
+
AND 'Zip' = z.functional_data_type)
|
|
260
|
+
INNER JOIN profile_results s
|
|
261
|
+
ON (c.profile_run_id = s.profile_run_id
|
|
262
|
+
AND c.table_name = s.table_name
|
|
263
|
+
AND c.position + 1 = s.position
|
|
264
|
+
AND 'State' = s.functional_data_type)
|
|
265
|
+
WHERE c.profile_run_id = '{PROFILE_RUN_ID}'
|
|
266
|
+
AND LOWER(c.column_name) SIMILAR TO '%c(|i)ty%'
|
|
267
|
+
AND c.functional_data_type NOT IN ('State', 'Zip')
|
|
268
|
+
AND profile_results.id = c.id;
|
|
269
|
+
|
|
270
|
+
-- Assign Name
|
|
271
|
+
UPDATE profile_results
|
|
272
|
+
SET functional_data_type = 'Person Full Name'
|
|
273
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
274
|
+
AND functional_data_type IS NULL
|
|
275
|
+
AND avg_length <= 20
|
|
276
|
+
AND avg_embedded_spaces BETWEEN 0.9 AND 2.0
|
|
277
|
+
AND ( column_name ~ '(approver|full|contact|emp|employee|hcp|manager|mgr_|party|person|preferred|rep|reviewer|salesperson|spouse)(_| |)(name|nm)$'
|
|
278
|
+
OR column_name IN ('name', 'nm') );
|
|
279
|
+
|
|
280
|
+
-- Assign First Name
|
|
281
|
+
UPDATE profile_results
|
|
282
|
+
SET functional_data_type = 'Person Given Name'
|
|
283
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
284
|
+
AND avg_length <= 8
|
|
285
|
+
AND avg_embedded_spaces < 0.2
|
|
286
|
+
AND (LOWER(column_name) SIMILAR TO '%f(|i)rst(_| |)n(|a)m%%'
|
|
287
|
+
OR LOWER(column_name) SIMILAR TO '%(middle|mdl)(_| |)n(|a)m%%'
|
|
288
|
+
OR LOWER(column_name) SIMILAR TO '%nick(_| |)n(|a)m%%');
|
|
289
|
+
|
|
290
|
+
-- Assign Last Name
|
|
291
|
+
UPDATE profile_results
|
|
292
|
+
SET functional_data_type = 'Person Last Name'
|
|
293
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
294
|
+
AND avg_length BETWEEN 5 and 8
|
|
295
|
+
AND avg_embedded_spaces < 0.2
|
|
296
|
+
AND (LOWER(column_name) SIMILAR TO '%l(|a)st(_| |)n(|a)m%'
|
|
297
|
+
OR LOWER(column_name) SIMILAR TO '%maiden(_| |)n(|a)m%'
|
|
298
|
+
OR LOWER(column_name) SIMILAR TO '%sur(_| |)n(|a)m%');
|
|
299
|
+
|
|
300
|
+
UPDATE profile_results
|
|
301
|
+
SET functional_data_type = 'Entity Name'
|
|
302
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
303
|
+
AND functional_data_type IS NULL
|
|
304
|
+
AND general_type = 'A'
|
|
305
|
+
AND column_name ~ '(acct|account|affiliation|branch|business|co|comp|company|corp|corporate|cust|customer|distributor|employer|entity|firm|franchise|hco|org|organization|site|supplier|vendor|hospital|practice|clinic)(_| |)(name|nm)$';
|
|
306
|
+
|
|
307
|
+
-- Assign Boolean
|
|
308
|
+
/*
|
|
309
|
+
Boolean - If distinct_value_ct is equal to (1 or 2) and (min_text and max_text) values fall in the categories specified
|
|
310
|
+
Numeric column types are not boolean.
|
|
311
|
+
*/
|
|
312
|
+
UPDATE profile_results
|
|
313
|
+
SET functional_data_type =
|
|
314
|
+
CASE WHEN general_type = 'B'
|
|
315
|
+
OR (distinct_value_ct = 2
|
|
316
|
+
AND ((LOWER(min_text) = 'no' AND LOWER(max_text) = 'yes')
|
|
317
|
+
OR (LOWER(min_text) = 'n' AND LOWER(max_text) = 'y')
|
|
318
|
+
OR (LOWER(min_text) = 'false' AND LOWER(max_text) = 'true')
|
|
319
|
+
OR (LOWER(min_text) = '0' AND LOWER(max_text) = '1')
|
|
320
|
+
OR (min_value = 0 AND max_value = 1 AND lower(column_type) NOT ILIKE '%numeric%')))
|
|
321
|
+
THEN 'Boolean'
|
|
322
|
+
WHEN general_type = 'B'
|
|
323
|
+
OR (distinct_value_ct = 1 -- we can have only 1 value populated but it can still be boolean
|
|
324
|
+
AND ( (LOWER(min_text) in ('no','yes') AND LOWER(max_text) in ('no','yes'))
|
|
325
|
+
OR (LOWER(min_text) in ('n','y') AND LOWER(max_text) in ('n','y'))
|
|
326
|
+
OR (LOWER(min_text) in ('false','true') AND LOWER(max_text) in ('f','t'))
|
|
327
|
+
OR (LOWER(min_text) in ('0','1') AND LOWER(max_text) in ('0','1'))
|
|
328
|
+
OR (min_value = 0 AND max_value = 1 AND lower(column_type) NOT ILIKE '%numeric%')))
|
|
329
|
+
THEN 'Boolean'
|
|
330
|
+
ELSE functional_data_type
|
|
331
|
+
END
|
|
332
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
333
|
+
AND functional_data_type IS NULL;
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
-- 4. Assign CODE, CATEGORY, ID, ATTRIBUTE & DESCRIPTION
|
|
337
|
+
/*
|
|
338
|
+
For character fields,
|
|
339
|
+
Id - If more than 80% of records are populated and 95% are unique without spaces and consistent length
|
|
340
|
+
and have a distinct record count of more than 200
|
|
341
|
+
Code - If more than 80% of records are populated and 95% are unique without spaces and consistent length
|
|
342
|
+
and have a distinct record count of less than or equal to 200.
|
|
343
|
+
If distinct record count is more than 200 and the field has varying length,
|
|
344
|
+
Attribute - Short length with less than 3 words
|
|
345
|
+
Description - More than 3 words and longer length
|
|
346
|
+
. If distinct record count is between 2 and 200,
|
|
347
|
+
Code - No spaces (single word) with less than 15 maximum length
|
|
348
|
+
Category - Spaces allowed, no restriction on length
|
|
349
|
+
*/
|
|
350
|
+
UPDATE profile_results
|
|
351
|
+
SET functional_data_type =
|
|
352
|
+
CASE WHEN ( lower(column_name) ~ '_(average|avg|count|ct|sum|total|tot)$'
|
|
353
|
+
OR lower(column_name) ~ '^(average|avg|count|ct|sum|total|tot)_' )
|
|
354
|
+
AND numeric_ct = value_ct
|
|
355
|
+
AND value_ct > 1 THEN 'Measurement Text'
|
|
356
|
+
WHEN includes_digit_ct > 0
|
|
357
|
+
AND ( (max_length <= 20 AND avg_embedded_spaces < 0.1 -- Short without spaces
|
|
358
|
+
AND value_ct / NULLIF(record_ct, 0)::FLOAT > 0.8 -- mostly populated
|
|
359
|
+
AND distinct_value_ct / NULLIF(value_ct, 0)::FLOAT > 0.95) -- mostly unique
|
|
360
|
+
OR (avg_embedded_spaces < 0.1 -- id should not have spaces and have consistent length
|
|
361
|
+
AND (round(max_length - avg_length) <= 1 OR round(avg_length - min_length) <= 1) ) )
|
|
362
|
+
THEN CASE WHEN distinct_value_ct > 200 THEN 'ID'
|
|
363
|
+
WHEN distinct_value_ct <= 200 AND avg_embedded_spaces < 1 THEN 'Code'
|
|
364
|
+
ELSE functional_data_type
|
|
365
|
+
END
|
|
366
|
+
WHEN distinct_value_ct > 200
|
|
367
|
+
THEN CASE WHEN max_length - ROUND(avg_length) > 1 AND ROUND(avg_length) - min_length > 1 -- varies length => text
|
|
368
|
+
THEN CASE WHEN avg_embedded_spaces BETWEEN 0 AND 3 -- less than 3 words
|
|
369
|
+
AND max_length <= 30 -- and shorter length
|
|
370
|
+
AND fn_charcount(max_text, ' ') < 5
|
|
371
|
+
THEN 'Attribute'
|
|
372
|
+
ELSE 'Description'
|
|
373
|
+
END
|
|
374
|
+
END
|
|
375
|
+
WHEN distinct_value_ct BETWEEN 2 AND 200
|
|
376
|
+
THEN CASE WHEN (avg_embedded_spaces < 1 AND max_length < 15)
|
|
377
|
+
OR (fn_charcount(top_patterns, 'A') > 0 AND fn_charcount(top_patterns, 'N') > 0)
|
|
378
|
+
THEN 'Code'
|
|
379
|
+
ELSE 'Category'
|
|
380
|
+
END
|
|
381
|
+
ELSE functional_data_type
|
|
382
|
+
END
|
|
383
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
384
|
+
AND functional_data_type IS NULL
|
|
385
|
+
AND general_type='A'
|
|
386
|
+
AND LOWER(datatype_suggestion) SIMILAR TO '(%varchar%)';
|
|
387
|
+
|
|
388
|
+
-- 5. Assign FLAG
|
|
389
|
+
/*
|
|
390
|
+
Flag - is set only if there is an unknown data type or if it's null. Alpha values with distinct_value_ct between 3 and 5,
|
|
391
|
+
Few, short words with only alpha characters.
|
|
392
|
+
*/
|
|
393
|
+
|
|
394
|
+
UPDATE profile_results
|
|
395
|
+
SET functional_data_type =
|
|
396
|
+
CASE
|
|
397
|
+
WHEN general_type = 'A' AND distinct_value_ct BETWEEN 3 AND 5
|
|
398
|
+
AND (lower(column_type) NOT ILIKE '%numeric%' OR lower(datatype_suggestion) NOT ILIKE '%numeric%')-- should not be decimal
|
|
399
|
+
AND (min_length > 1 AND max_length <= 7)
|
|
400
|
+
AND functional_data_type IS NULL
|
|
401
|
+
AND fn_charcount(top_patterns, 'A') > 0
|
|
402
|
+
THEN 'Flag'
|
|
403
|
+
END
|
|
404
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
405
|
+
AND functional_data_type IS NULL;
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
-- 6. Assign the remaining types where functional data type is null
|
|
409
|
+
|
|
410
|
+
UPDATE profile_results
|
|
411
|
+
SET functional_data_type =
|
|
412
|
+
CASE
|
|
413
|
+
WHEN (max_value - min_value + 1 = distinct_value_ct) AND (fractional_sum IS NULL OR fractional_sum > 0)
|
|
414
|
+
THEN 'Sequence'
|
|
415
|
+
WHEN general_type='N'
|
|
416
|
+
AND LOWER(column_name) SIMILAR TO '%(no|num|number|nbr)'
|
|
417
|
+
AND (column_type ILIKE '%int%'
|
|
418
|
+
OR
|
|
419
|
+
(RTRIM(SPLIT_PART(column_type, ',', 2), ')') > '0'
|
|
420
|
+
AND fractional_sum = 0) -- 0 implies integer; null is float or non-numeric
|
|
421
|
+
) THEN
|
|
422
|
+
CASE
|
|
423
|
+
WHEN ROUND(100.0 * value_ct::FLOAT/NULLIF(record_ct, 0)) > 70 THEN 'ID'
|
|
424
|
+
ELSE 'Attribute-Numeric'
|
|
425
|
+
END
|
|
426
|
+
WHEN general_type='N'
|
|
427
|
+
AND ( column_type ILIKE '%int%'
|
|
428
|
+
OR
|
|
429
|
+
(RTRIM(SPLIT_PART(column_type, ',', 2), ')') > '0'
|
|
430
|
+
AND fractional_sum = 0) -- 0 implies integer; null is float or non-numeric
|
|
431
|
+
) THEN 'Measurement Discrete'
|
|
432
|
+
WHEN general_type='N' and distinct_value_ct > 1 and min_value < 0
|
|
433
|
+
then 'Measurement'
|
|
434
|
+
WHEN general_type='N' and distinct_value_ct > 1 and min_value >= 0
|
|
435
|
+
and stdev_value/nullif(avg_value,0) >= 0.10
|
|
436
|
+
then 'Measurement'
|
|
437
|
+
ELSE 'UNKNOWN'
|
|
438
|
+
END
|
|
439
|
+
|
|
440
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
441
|
+
AND functional_data_type IS NULL;
|
|
442
|
+
|
|
443
|
+
-- Assign City
|
|
444
|
+
UPDATE profile_results
|
|
445
|
+
SET functional_data_type = 'City'
|
|
446
|
+
FROM ( SELECT p.id
|
|
447
|
+
FROM profile_results p
|
|
448
|
+
LEFT JOIN profile_results pn
|
|
449
|
+
ON p.profile_run_id = pn.profile_run_id
|
|
450
|
+
AND p.table_name = pn.table_name
|
|
451
|
+
AND p.position = pn.position - 1
|
|
452
|
+
WHERE p.profile_run_id = '{PROFILE_RUN_ID}'
|
|
453
|
+
AND p.includes_digit_ct::FLOAT/NULLIF(p.value_ct,0)::FLOAT < 0.05
|
|
454
|
+
AND p.numeric_ct::FLOAT/NULLIF(p.value_ct,0)::FLOAT < 0.05
|
|
455
|
+
AND p.date_ct::FLOAT/NULLIF(p.value_ct,0)::FLOAT < 0.05
|
|
456
|
+
AND pn.functional_data_type = 'State'
|
|
457
|
+
AND p.avg_length BETWEEN 7 AND 12
|
|
458
|
+
AND p.avg_embedded_spaces < 1
|
|
459
|
+
AND p.distinct_value_ct BETWEEN 15 AND 40000 ) c
|
|
460
|
+
WHERE profile_results.id = c.id;
|
|
461
|
+
|
|
462
|
+
-- 7. Assign 'ID-Unique' functional data type to the columns that are identity columns
|
|
463
|
+
|
|
464
|
+
UPDATE profile_results
|
|
465
|
+
SET functional_data_type = 'ID-Unique'
|
|
466
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
467
|
+
AND functional_data_type IN ('ID', 'ID-Secondary')
|
|
468
|
+
AND record_ct = distinct_value_ct
|
|
469
|
+
AND record_ct > 50;
|
|
470
|
+
|
|
471
|
+
-- Update alpha ID's to ID-Secondary and ID-Grouping
|
|
472
|
+
|
|
473
|
+
UPDATE profile_results
|
|
474
|
+
SET functional_data_type = CASE
|
|
475
|
+
WHEN ROUND(100.0 * value_ct::FLOAT/NULLIF(record_ct, 0)) > 70
|
|
476
|
+
AND ROUND(100.0 * distinct_value_ct::FLOAT/NULLIF(value_ct, 0)) >= 75 THEN 'ID-Secondary'
|
|
477
|
+
WHEN ROUND(100.0 * value_ct::FLOAT/NULLIF(record_ct, 0)) > 70
|
|
478
|
+
AND ROUND(100.0 * distinct_value_ct::FLOAT/NULLIF(value_ct, 0)) < 75 THEN 'ID-Group'
|
|
479
|
+
ELSE functional_data_type
|
|
480
|
+
END
|
|
481
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
482
|
+
AND functional_data_type = 'ID';
|
|
483
|
+
|
|
484
|
+
-- 8. Assign 'ID-FK' functional data type to the columns that are foreign keys of the identity columns identified in the previous step
|
|
485
|
+
|
|
486
|
+
UPDATE profile_results
|
|
487
|
+
SET functional_data_type = 'ID-FK'
|
|
488
|
+
FROM (Select table_groups_id, table_name, column_name
|
|
489
|
+
from profile_results
|
|
490
|
+
where functional_data_type = 'ID-Unique'
|
|
491
|
+
and profile_run_id = '{PROFILE_RUN_ID}') ui
|
|
492
|
+
WHERE profile_results.profile_run_id = '{PROFILE_RUN_ID}'
|
|
493
|
+
and profile_results.column_name = ui.column_name
|
|
494
|
+
and profile_results.table_groups_id = ui.table_groups_id
|
|
495
|
+
and profile_results.table_name <> ui.table_name
|
|
496
|
+
and profile_results.functional_data_type <> 'ID-Unique';
|
|
497
|
+
|
|
498
|
+
-- Assign
|
|
499
|
+
|
|
500
|
+
-- 9. Functional Data Type: 'Measurement Pct'
|
|
501
|
+
|
|
502
|
+
UPDATE profile_results
|
|
503
|
+
SET functional_data_type = 'Measurement Pct'
|
|
504
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
505
|
+
AND functional_data_type IN ('Measurement', 'Measurement Discrete', 'UNKNOWN')
|
|
506
|
+
AND general_type = 'N'
|
|
507
|
+
AND min_value >= -200
|
|
508
|
+
AND max_value <= 200
|
|
509
|
+
AND (column_name ILIKE '%pct%' OR column_name ILIKE '%percent%');
|
|
510
|
+
|
|
511
|
+
UPDATE profile_results
|
|
512
|
+
SET functional_data_type = 'Measurement Pct'
|
|
513
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
514
|
+
AND functional_data_type = 'Code'
|
|
515
|
+
AND distinct_pattern_ct between 1 and 3
|
|
516
|
+
AND value_ct = includes_digit_ct
|
|
517
|
+
AND min_text >= '0'
|
|
518
|
+
AND max_text <= '99'
|
|
519
|
+
AND TRIM(SPLIT_PART(top_patterns, '|', 2)) ~ '^N{1,3}(\.N+)?%$'
|
|
520
|
+
AND (TRIM(SPLIT_PART(top_patterns, '|', 4)) ~ '^N{1,3}(\.N+)?%$' OR distinct_pattern_ct < 2)
|
|
521
|
+
AND (TRIM(SPLIT_PART(top_patterns, '|', 6)) ~ '^N{1,3}(\.N+)?%$' OR distinct_pattern_ct < 3);
|
|
522
|
+
|
|
523
|
+
--- END OF QUERY ---
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
WITH tablesrank AS
|
|
2
|
+
(SELECT DISTINCT p.project_code,
|
|
3
|
+
p.schema_name,
|
|
4
|
+
p.table_name,
|
|
5
|
+
p.run_date,
|
|
6
|
+
p.record_ct,
|
|
7
|
+
p.functional_data_type,
|
|
8
|
+
DENSE_RANK() OVER (PARTITION BY p.schema_name, p.table_name ORDER BY p.run_date DESC) AS rnk
|
|
9
|
+
FROM profile_results p
|
|
10
|
+
INNER JOIN (SELECT DISTINCT schema_name, table_name
|
|
11
|
+
FROM profile_results
|
|
12
|
+
WHERE project_code = '{PROJECT_CODE}'
|
|
13
|
+
AND schema_name = '{DATA_SCHEMA}'
|
|
14
|
+
AND run_date = '{RUN_DATE}') pt
|
|
15
|
+
ON (p.schema_name = pt.schema_name
|
|
16
|
+
AND p.table_name = pt.table_name)
|
|
17
|
+
WHERE p.project_code = '{PROJECT_CODE}'
|
|
18
|
+
AND p.schema_name = '{DATA_SCHEMA}'
|
|
19
|
+
ORDER BY p.schema_name, p.table_name, p.run_date DESC),
|
|
20
|
+
tablescount AS
|
|
21
|
+
(SELECT *
|
|
22
|
+
, LAG(record_ct, 1)
|
|
23
|
+
OVER (PARTITION BY schema_name, table_name ORDER BY schema_name, table_name, run_date) AS prev_record_ct
|
|
24
|
+
, LAG(run_date, 1)
|
|
25
|
+
OVER (PARTITION BY schema_name, table_name ORDER BY schema_name, table_name, run_date) AS prev_run_date
|
|
26
|
+
FROM tablesrank
|
|
27
|
+
),
|
|
28
|
+
tablestat AS
|
|
29
|
+
(SELECT project_code,
|
|
30
|
+
schema_name,
|
|
31
|
+
table_name,
|
|
32
|
+
CASE
|
|
33
|
+
-- table period is cumulative is the current record count is always greater than the previous record count
|
|
34
|
+
WHEN SUM(CASE WHEN record_ct - prev_record_ct < 0 THEN 1 ELSE 0 END) = 0 THEN 'cumulative'
|
|
35
|
+
ELSE 'window' END AS table_period,
|
|
36
|
+
CASE
|
|
37
|
+
WHEN SUM(CASE WHEN functional_data_type = 'Measurement' THEN 1 ELSE 0 END) > 0
|
|
38
|
+
AND SUM(CASE WHEN functional_data_type ILIKE '%Transactional Date%' THEN 1 ELSE 0 END) > 0
|
|
39
|
+
THEN 'transaction'
|
|
40
|
+
ELSE 'domain' END AS table_type
|
|
41
|
+
FROM tablescount
|
|
42
|
+
GROUP BY project_code, schema_name, table_name
|
|
43
|
+
ORDER BY project_code, schema_name, table_name)
|
|
44
|
+
INSERT INTO stg_functional_table_updates
|
|
45
|
+
(project_code, schema_name, run_date, table_name, table_period, table_type)
|
|
46
|
+
SELECT project_code, schema_name, '{RUN_DATE}' as run_date,
|
|
47
|
+
table_name, table_period, table_type
|
|
48
|
+
FROM tablestat;
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
UPDATE profile_results
|
|
2
|
+
SET functional_table_type = COALESCE(s.table_period)||'-'||COALESCE(s.table_type)
|
|
3
|
+
FROM stg_functional_table_updates s
|
|
4
|
+
WHERE s.project_code = profile_results.project_code
|
|
5
|
+
AND s.schema_name = profile_results.schema_name
|
|
6
|
+
AND s.table_name = profile_results.table_name
|
|
7
|
+
AND s.run_date = profile_results.run_date
|
|
8
|
+
AND s.run_date = '{RUN_DATE}';
|