dataops-testgen 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataops_testgen-2.2.0.dist-info/LICENSE +203 -0
- dataops_testgen-2.2.0.dist-info/METADATA +287 -0
- dataops_testgen-2.2.0.dist-info/NOTICE +5 -0
- dataops_testgen-2.2.0.dist-info/RECORD +270 -0
- dataops_testgen-2.2.0.dist-info/WHEEL +5 -0
- dataops_testgen-2.2.0.dist-info/entry_points.txt +2 -0
- dataops_testgen-2.2.0.dist-info/top_level.txt +1 -0
- testgen/__init__.py +0 -0
- testgen/__main__.py +770 -0
- testgen/commands/__init__.py +0 -0
- testgen/commands/queries/__init__.py +0 -0
- testgen/commands/queries/execute_cat_tests_query.py +95 -0
- testgen/commands/queries/execute_tests_query.py +160 -0
- testgen/commands/queries/generate_tests_query.py +94 -0
- testgen/commands/queries/profiling_query.py +366 -0
- testgen/commands/queries/test_parameter_validation_query.py +88 -0
- testgen/commands/run_execute_cat_tests.py +162 -0
- testgen/commands/run_execute_tests.py +168 -0
- testgen/commands/run_generate_tests.py +107 -0
- testgen/commands/run_get_entities.py +122 -0
- testgen/commands/run_launch_db_config.py +84 -0
- testgen/commands/run_observability_exporter.py +330 -0
- testgen/commands/run_profiling_bridge.py +495 -0
- testgen/commands/run_quick_start.py +168 -0
- testgen/commands/run_setup_profiling_tools.py +96 -0
- testgen/commands/run_test_definition.py +146 -0
- testgen/commands/run_test_parameter_validation.py +135 -0
- testgen/commands/run_upgrade_db_config.py +156 -0
- testgen/common/__init__.py +8 -0
- testgen/common/clean_sql.py +53 -0
- testgen/common/credentials.py +25 -0
- testgen/common/database/__init__.py +0 -0
- testgen/common/database/database_service.py +629 -0
- testgen/common/database/flavor/__init__.py +0 -0
- testgen/common/database/flavor/flavor_service.py +75 -0
- testgen/common/database/flavor/mssql_flavor_service.py +34 -0
- testgen/common/database/flavor/postgresql_flavor_service.py +5 -0
- testgen/common/database/flavor/redshift_flavor_service.py +22 -0
- testgen/common/database/flavor/snowflake_flavor_service.py +69 -0
- testgen/common/database/flavor/trino_flavor_service.py +21 -0
- testgen/common/date_service.py +68 -0
- testgen/common/display_service.py +85 -0
- testgen/common/docker_service.py +76 -0
- testgen/common/encrypt.py +55 -0
- testgen/common/get_pipeline_parms.py +57 -0
- testgen/common/logs.py +79 -0
- testgen/common/process_service.py +62 -0
- testgen/common/read_file.py +69 -0
- testgen/settings.py +440 -0
- testgen/template/dbsetup/010_create_base_schema.sql +2 -0
- testgen/template/dbsetup/020_create_standard_functions_sprocs.sql +179 -0
- testgen/template/dbsetup/030_initialize_new_schema_structure.sql +735 -0
- testgen/template/dbsetup/040_populate_new_schema_project.sql +59 -0
- testgen/template/dbsetup/050_populate_new_schema_metadata.sql +1517 -0
- testgen/template/dbsetup/060_create_standard_views.sql +248 -0
- testgen/template/dbsetup/070_create_default_users.sql +17 -0
- testgen/template/dbsetup/075_grant_role_rights.sql +43 -0
- testgen/template/dbsetup/080_set_current_revision.sql +5 -0
- testgen/template/dbupgrade/0100_incremental_upgrade.sql +5 -0
- testgen/template/dbupgrade/0101_incremental_upgrade.sql +15 -0
- testgen/template/dbupgrade/0102_incremental_upgrade.sql +4 -0
- testgen/template/dbupgrade/0103_incremental_upgrade.sql +22 -0
- testgen/template/dbupgrade/0104_incremental_upgrade.sql +44 -0
- testgen/template/dbupgrade/0105_incremental_upgrade.sql +1 -0
- testgen/template/dbupgrade/0106_incremental_upgrade.sql +5 -0
- testgen/template/dbupgrade/0107_incremental_upgrade.sql +3 -0
- testgen/template/dbupgrade_helpers/get_tg_revision.sql +2 -0
- testgen/template/exec_cat_tests/ex_cat_build_agg_table_tests.sql +116 -0
- testgen/template/exec_cat_tests/ex_cat_get_distinct_tables.sql +11 -0
- testgen/template/exec_cat_tests/ex_cat_results_parse.sql +69 -0
- testgen/template/exec_cat_tests/ex_cat_retrieve_agg_test_parms.sql +6 -0
- testgen/template/exec_cat_tests/ex_cat_test_query.sql +8 -0
- testgen/template/execution/ex_finalize_test_run_results.sql +37 -0
- testgen/template/execution/ex_get_tests_non_cat.sql +47 -0
- testgen/template/execution/ex_update_test_record_in_testrun_table.sql +27 -0
- testgen/template/execution/ex_write_test_record_to_testrun_table.sql +6 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_no_drops_generic.sql +48 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_num_incr_generic.sql +34 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_above_generic.sql +49 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_within_generic.sql +49 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql +49 -0
- testgen/template/flavors/generic/exec_query_tests/ex_custom_query_generic.sql +39 -0
- testgen/template/flavors/generic/exec_query_tests/ex_data_match_2way_generic.sql +58 -0
- testgen/template/flavors/generic/exec_query_tests/ex_data_match_generic.sql +44 -0
- testgen/template/flavors/generic/exec_query_tests/ex_prior_match_generic.sql +37 -0
- testgen/template/flavors/generic/exec_query_tests/ex_relative_entropy_generic.sql +53 -0
- testgen/template/flavors/generic/exec_query_tests/ex_window_match_no_drops_generic.sql +46 -0
- testgen/template/flavors/generic/exec_query_tests/ex_window_match_same_generic.sql +59 -0
- testgen/template/flavors/generic/profiling/contingency_counts.sql +3 -0
- testgen/template/flavors/generic/validate_tests/ex_get_project_column_list_generic.sql +3 -0
- testgen/template/flavors/mssql/exec_query_tests/ex_relative_entropy_mssql.sql +53 -0
- testgen/template/flavors/mssql/profiling/project_ddf_query_mssql.sql +35 -0
- testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml +246 -0
- testgen/template/flavors/mssql/profiling/project_secondary_profiling_query_mssql.sql +36 -0
- testgen/template/flavors/mssql/setup_profiling_tools/00_drop_existing_functions_mssql.sql +8 -0
- testgen/template/flavors/mssql/setup_profiling_tools/01_create_functions_mssql.sql +12 -0
- testgen/template/flavors/mssql/setup_profiling_tools/02_create_functions_mssql.sql +54 -0
- testgen/template/flavors/mssql/setup_profiling_tools/create_qc_schema_mssql.sql +4 -0
- testgen/template/flavors/mssql/setup_profiling_tools/grant_execute_privileges_mssql.sql +1 -0
- testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_no_drops_postgresql.sql +46 -0
- testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_same_postgresql.sql +59 -0
- testgen/template/flavors/postgresql/profiling/project_ddf_query_postgresql.sql +42 -0
- testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml +225 -0
- testgen/template/flavors/postgresql/profiling/project_secondary_profiling_query_postgresql.sql +28 -0
- testgen/template/flavors/postgresql/setup_profiling_tools/create_functions_postgresql.sql +157 -0
- testgen/template/flavors/postgresql/setup_profiling_tools/create_qc_schema_postgresql.sql +1 -0
- testgen/template/flavors/postgresql/setup_profiling_tools/grant_execute_privileges_postgresql.sql +2 -0
- testgen/template/flavors/redshift/profiling/project_ddf_query_redshift.sql +38 -0
- testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml +221 -0
- testgen/template/flavors/redshift/profiling/project_secondary_profiling_query_redshift.sql +29 -0
- testgen/template/flavors/redshift/setup_profiling_tools/create_functions_redshift.sql +115 -0
- testgen/template/flavors/redshift/setup_profiling_tools/create_qc_schema_redshift.sql +1 -0
- testgen/template/flavors/redshift/setup_profiling_tools/grant_execute_privileges_redshift.sql +2 -0
- testgen/template/flavors/snowflake/profiling/project_ddf_query_snowflake.sql +38 -0
- testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml +220 -0
- testgen/template/flavors/snowflake/profiling/project_secondary_profiling_query_snowflake.sql +29 -0
- testgen/template/flavors/snowflake/setup_profiling_tools/create_functions_snowflake.sql +69 -0
- testgen/template/flavors/snowflake/setup_profiling_tools/create_qc_schema_snowflake.sql +1 -0
- testgen/template/flavors/snowflake/setup_profiling_tools/grant_execute_privileges_snowflake.sql +6 -0
- testgen/template/flavors/trino/profiling/project_profiling_query_trino.yaml +219 -0
- testgen/template/flavors/trino/setup_profiling_tools/create_functions_trino.sql +92 -0
- testgen/template/flavors/trino/setup_profiling_tools/create_qc_schema_trino.sql +1 -0
- testgen/template/gen_funny_cat_tests/gen_test_constant.sql +104 -0
- testgen/template/gen_funny_cat_tests/gen_test_distinct_value_ct.sql +98 -0
- testgen/template/gen_funny_cat_tests/gen_test_row_ct.sql +57 -0
- testgen/template/gen_funny_cat_tests/gen_test_row_ct_pct.sql +59 -0
- testgen/template/generation/gen_delete_old_tests.sql +5 -0
- testgen/template/generation/gen_insert_test_suite.sql +5 -0
- testgen/template/generation/gen_retrieve_or_insert_test_suite.sql +58 -0
- testgen/template/generation/gen_standard_test_type_list.sql +13 -0
- testgen/template/generation/gen_standard_tests.sql +48 -0
- testgen/template/get_entities/get_connection.sql +21 -0
- testgen/template/get_entities/get_connections_list.sql +9 -0
- testgen/template/get_entities/get_latest.sql +4 -0
- testgen/template/get_entities/get_profile.sql +12 -0
- testgen/template/get_entities/get_profile_info.sql +17 -0
- testgen/template/get_entities/get_profile_list.sql +17 -0
- testgen/template/get_entities/get_profile_screen.sql +275 -0
- testgen/template/get_entities/get_project_list.sql +6 -0
- testgen/template/get_entities/get_table_group_list.sql +10 -0
- testgen/template/get_entities/get_test_generation_list.sql +18 -0
- testgen/template/get_entities/get_test_info.sql +41 -0
- testgen/template/get_entities/get_test_results_for_run_cli.sql +16 -0
- testgen/template/get_entities/get_test_run_list.sql +24 -0
- testgen/template/get_entities/get_test_suite.sql +13 -0
- testgen/template/get_entities/get_test_suite_list.sql +18 -0
- testgen/template/get_entities/list_test_types.sql +4 -0
- testgen/template/observability/get_event_data.sql +23 -0
- testgen/template/observability/get_test_results.sql +41 -0
- testgen/template/observability/update_test_results_exported_to_observability.sql +12 -0
- testgen/template/parms/parms_profiling.sql +34 -0
- testgen/template/parms/parms_test_execution.sql +13 -0
- testgen/template/parms/parms_test_gen.sql +23 -0
- testgen/template/profiling/contingency_columns.sql +7 -0
- testgen/template/profiling/datatype_suggestions.sql +56 -0
- testgen/template/profiling/functional_datatype.sql +523 -0
- testgen/template/profiling/functional_tabletype_stage.sql +48 -0
- testgen/template/profiling/functional_tabletype_update.sql +8 -0
- testgen/template/profiling/pii_flag.sql +133 -0
- testgen/template/profiling/profile_anomalies_screen_column.sql +22 -0
- testgen/template/profiling/profile_anomalies_screen_multi_column.sql +58 -0
- testgen/template/profiling/profile_anomalies_screen_table.sql +22 -0
- testgen/template/profiling/profile_anomalies_screen_table_dates.sql +30 -0
- testgen/template/profiling/profile_anomalies_screen_variants.sql +40 -0
- testgen/template/profiling/profile_anomaly_types_get.sql +3 -0
- testgen/template/profiling/project_get_table_sample_count.sql +22 -0
- testgen/template/profiling/project_profile_run_record_insert.sql +8 -0
- testgen/template/profiling/project_profile_run_record_update.sql +5 -0
- testgen/template/profiling/project_profile_run_record_update_status.sql +5 -0
- testgen/template/profiling/project_update_profile_results_to_estimates.sql +32 -0
- testgen/template/profiling/refresh_anomalies.sql +33 -0
- testgen/template/profiling/refresh_data_chars_from_profiling.sql +156 -0
- testgen/template/profiling/secondary_profiling_columns.sql +12 -0
- testgen/template/profiling/secondary_profiling_delete.sql +4 -0
- testgen/template/profiling/secondary_profiling_update.sql +18 -0
- testgen/template/quick_start/populate_target_data.sql +1077 -0
- testgen/template/quick_start/recreate_target_data_schema.sql +167 -0
- testgen/template/quick_start/update_target_data.sql +100 -0
- testgen/template/updates/create_tmp_test_definition.sql +19 -0
- testgen/template/updates/get_test_def_parms.sql +38 -0
- testgen/template/updates/populate_stg_test_definitions.sql +184 -0
- testgen/template/validate_tests/ex_disable_tests_test_definitions.sql +5 -0
- testgen/template/validate_tests/ex_flag_tests_test_definitions.sql +64 -0
- testgen/template/validate_tests/ex_get_project_column_list_generic.sql +3 -0
- testgen/template/validate_tests/ex_get_test_column_list_tg.sql +65 -0
- testgen/template/validate_tests/ex_write_test_val_errors.sql +22 -0
- testgen/ui/__init__.py +0 -0
- testgen/ui/app.py +98 -0
- testgen/ui/assets/dk_logo.svg +46 -0
- testgen/ui/assets/question_mark.png +0 -0
- testgen/ui/assets/scripts.js +68 -0
- testgen/ui/assets/style.css +140 -0
- testgen/ui/bootstrap.py +109 -0
- testgen/ui/components/__init__.py +0 -0
- testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fBBc4.woff2 +0 -0
- testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fChc4EsA.woff2 +0 -0
- testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu4mxK.woff2 +0 -0
- testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu7GxKOzY.woff2 +0 -0
- testgen/ui/components/frontend/css/material-symbols-rounded.css +24 -0
- testgen/ui/components/frontend/css/material-symbols-rounded.woff2 +0 -0
- testgen/ui/components/frontend/css/roboto-font-faces.css +35 -0
- testgen/ui/components/frontend/css/shared.css +36 -0
- testgen/ui/components/frontend/img/dk_logo.svg +46 -0
- testgen/ui/components/frontend/index.html +17 -0
- testgen/ui/components/frontend/js/components/breadcrumbs.js +86 -0
- testgen/ui/components/frontend/js/components/button.js +66 -0
- testgen/ui/components/frontend/js/components/location.js +62 -0
- testgen/ui/components/frontend/js/components/select.js +75 -0
- testgen/ui/components/frontend/js/components/sidebar.js +358 -0
- testgen/ui/components/frontend/js/main.js +99 -0
- testgen/ui/components/frontend/js/streamlit.js +19 -0
- testgen/ui/components/frontend/js/van.min.js +1 -0
- testgen/ui/components/utils/__init__.py +0 -0
- testgen/ui/components/utils/callbacks.py +51 -0
- testgen/ui/components/utils/component.py +13 -0
- testgen/ui/components/widgets/__init__.py +6 -0
- testgen/ui/components/widgets/breadcrumbs.py +32 -0
- testgen/ui/components/widgets/location.py +65 -0
- testgen/ui/components/widgets/modal.py +97 -0
- testgen/ui/components/widgets/sidebar.py +69 -0
- testgen/ui/navigation/__init__.py +0 -0
- testgen/ui/navigation/menu.py +42 -0
- testgen/ui/navigation/page.py +20 -0
- testgen/ui/navigation/router.py +63 -0
- testgen/ui/queries/__init__.py +0 -0
- testgen/ui/queries/authentication_queries.py +47 -0
- testgen/ui/queries/connection_queries.py +121 -0
- testgen/ui/queries/profiling_queries.py +148 -0
- testgen/ui/queries/project_queries.py +9 -0
- testgen/ui/queries/table_group_queries.py +186 -0
- testgen/ui/queries/test_definition_queries.py +270 -0
- testgen/ui/queries/test_run_queries.py +32 -0
- testgen/ui/queries/test_suite_queries.py +145 -0
- testgen/ui/scripts/__init__.py +0 -0
- testgen/ui/scripts/patch_streamlit.py +111 -0
- testgen/ui/services/__init__.py +0 -0
- testgen/ui/services/authentication_service.py +119 -0
- testgen/ui/services/connection_service.py +220 -0
- testgen/ui/services/database_service.py +282 -0
- testgen/ui/services/form_service.py +1008 -0
- testgen/ui/services/javascript_service.py +44 -0
- testgen/ui/services/query_service.py +316 -0
- testgen/ui/services/string_service.py +12 -0
- testgen/ui/services/table_group_service.py +130 -0
- testgen/ui/services/test_definition_service.py +117 -0
- testgen/ui/services/test_run_service.py +13 -0
- testgen/ui/services/test_suite_service.py +76 -0
- testgen/ui/services/toolbar_service.py +77 -0
- testgen/ui/session.py +46 -0
- testgen/ui/views/__init__.py +0 -0
- testgen/ui/views/app_log_modal.py +92 -0
- testgen/ui/views/connections.py +72 -0
- testgen/ui/views/connections_base.py +367 -0
- testgen/ui/views/login.py +40 -0
- testgen/ui/views/not_found.py +16 -0
- testgen/ui/views/overview.py +34 -0
- testgen/ui/views/profiling_anomalies.py +501 -0
- testgen/ui/views/profiling_details.py +335 -0
- testgen/ui/views/profiling_modal.py +40 -0
- testgen/ui/views/profiling_results.py +206 -0
- testgen/ui/views/profiling_summary.py +177 -0
- testgen/ui/views/project_settings.py +74 -0
- testgen/ui/views/table_groups.py +530 -0
- testgen/ui/views/test_definitions.py +1020 -0
- testgen/ui/views/test_results.py +908 -0
- testgen/ui/views/test_runs.py +195 -0
- testgen/ui/views/test_suites.py +545 -0
- testgen/utils/__init__.py +0 -0
- testgen/utils/plugins.py +17 -0
- testgen/utils/singleton.py +14 -0
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
-- Primary Screen: Alpha
|
|
2
|
+
WITH screen
|
|
3
|
+
AS ( SELECT id AS profile_results_id,
|
|
4
|
+
table_name, column_name,
|
|
5
|
+
CASE
|
|
6
|
+
WHEN functional_data_type IN ('Person Full Name', 'Person Given Name', 'Person Last Name') THEN 'B/NAME/Individual'
|
|
7
|
+
|
|
8
|
+
WHEN LOWER(column_name) SIMILAR TO '%(maiden|surname)%' THEN 'B/NAME/Individual'
|
|
9
|
+
|
|
10
|
+
WHEN functional_data_type = 'Historical Date'
|
|
11
|
+
AND LOWER(column_name) SIMILAR TO '%(dob|birth)%' THEN 'B/DEMO/Birthdate'
|
|
12
|
+
|
|
13
|
+
WHEN LOWER(column_name)
|
|
14
|
+
SIMILAR TO '%(nationality|race|ethnicity|gender|sex|marital)%' THEN 'B/DEMO/Demographic'
|
|
15
|
+
|
|
16
|
+
WHEN LOWER(column_name) ILIKE '%med%record%' THEN 'A/DEMO/Medical'
|
|
17
|
+
|
|
18
|
+
WHEN LOWER(column_name) SIMILAR TO '%(password|pwd|auth)%' THEN 'A/ID/Security'
|
|
19
|
+
|
|
20
|
+
WHEN max_length < 10
|
|
21
|
+
AND avg_embedded_spaces < 0.1
|
|
22
|
+
AND (column_name ILIKE 'pin%' OR column_name ILIKE '%pin') THEN 'A/ID/Security'
|
|
23
|
+
|
|
24
|
+
WHEN std_pattern_match = 'SSN'
|
|
25
|
+
AND LOWER(column_name) SIMILAR TO '%(ss|soc|sec)%' THEN 'A/ID/SSN'
|
|
26
|
+
|
|
27
|
+
WHEN TRIM(fn_parsefreq(top_patterns, 1, 2))
|
|
28
|
+
IN ('NNNNNNNNN', 'NNN-NN-NNNN', 'NNN NN NNNN')
|
|
29
|
+
AND LEFT(min_text, 1) = '9'
|
|
30
|
+
AND avg_length BETWEEN 8.8 AND 11.2
|
|
31
|
+
AND LOWER(column_name) SIMILAR TO '%(tax|tin|fed)%' THEN 'A/ID/Tax'
|
|
32
|
+
|
|
33
|
+
WHEN TRIM(fn_parsefreq(top_patterns, 1, 2))
|
|
34
|
+
IN ('NNNNNNNNN', 'ANNNNNNNN')
|
|
35
|
+
AND avg_length BETWEEN 8.8 AND 9.2
|
|
36
|
+
AND LOWER(column_name) SIMILAR TO '%(passp|pp)%' THEN 'A/ID/Passport'
|
|
37
|
+
|
|
38
|
+
WHEN std_pattern_match = 'CREDIT_CARD'
|
|
39
|
+
AND LOWER(column_name) SIMILAR TO '%(credit|card|cc|acct|account)%' THEN 'A/ID/Credit'
|
|
40
|
+
|
|
41
|
+
WHEN TRIM(fn_parsefreq(top_patterns, 1, 2))
|
|
42
|
+
ILIKE '[Aa]{6}[A-Za-z0-9]{2}N{0,3}'
|
|
43
|
+
AND TRIM(fn_parsefreq(top_patterns, 2, 2))
|
|
44
|
+
ILIKE '[Aa]{6}[A-Za-z0-9]{2}N{0,3}'
|
|
45
|
+
AND avg_length BETWEEN 7.8 AND 11.2
|
|
46
|
+
AND LOWER(column_name) SIMILAR TO '%(swift|bic)%' THEN 'A/ID/Bank'
|
|
47
|
+
|
|
48
|
+
WHEN max_length <= 34
|
|
49
|
+
AND UPPER(LEFT(TRIM(fn_parsefreq(top_patterns, 1, 2)), 2))
|
|
50
|
+
= 'AA'
|
|
51
|
+
AND (column_name ILIKE 'iban%' OR column_name ILIKE '%iban') THEN 'A/ID/Bank'
|
|
52
|
+
|
|
53
|
+
WHEN avg_length BETWEEN 5 AND 20
|
|
54
|
+
AND LOWER(column_name) SIMILAR TO '%(bank|checking|saving|debit)%' THEN 'A/ID/Bank'
|
|
55
|
+
|
|
56
|
+
WHEN avg_embedded_spaces < 0.5
|
|
57
|
+
AND avg_length < 20
|
|
58
|
+
AND (LOWER(column_name) SIMILAR TO '%(dr|op)%lic%'
|
|
59
|
+
OR LOWER(column_name) SIMILAR TO '%(driver|license|operator)%') THEN 'A/ID/License'
|
|
60
|
+
|
|
61
|
+
WHEN LOWER(column_name) IN ('patient_id', 'pat_id') THEN 'A/ID/Medical'
|
|
62
|
+
|
|
63
|
+
WHEN LOWER(column_name) IN ('member_id') THEN 'B/ID/Commercial'
|
|
64
|
+
|
|
65
|
+
END AS pii_flag
|
|
66
|
+
|
|
67
|
+
FROM profile_results p
|
|
68
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
69
|
+
AND general_type = 'A' )
|
|
70
|
+
UPDATE profile_results
|
|
71
|
+
SET pii_flag = screen.pii_flag
|
|
72
|
+
FROM screen
|
|
73
|
+
WHERE screen.pii_flag > ''
|
|
74
|
+
AND profile_results.id = screen.profile_results_id;
|
|
75
|
+
|
|
76
|
+
-- Secondary Screen - Alpha
|
|
77
|
+
WITH table_pii_counts
|
|
78
|
+
AS ( SELECT table_name, COUNT(pii_flag) AS pii_ct
|
|
79
|
+
FROM profile_results
|
|
80
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'
|
|
81
|
+
GROUP BY table_name ),
|
|
82
|
+
screen
|
|
83
|
+
AS ( SELECT id AS profile_results_id,
|
|
84
|
+
p.table_name, p.column_name,
|
|
85
|
+
CASE
|
|
86
|
+
WHEN functional_data_type = 'Email' THEN 'B/CONTACT/Email'
|
|
87
|
+
WHEN functional_data_type IN ('Address', 'City', 'State', 'Zip')
|
|
88
|
+
THEN 'B/CONTACT/Address'
|
|
89
|
+
WHEN functional_data_type = 'Phone'
|
|
90
|
+
THEN 'B/CONTACT/Phone'
|
|
91
|
+
|
|
92
|
+
WHEN LOWER(column_name) SIMILAR TO '%(insur|health|med|patient)%'
|
|
93
|
+
THEN 'A/DEMO/Medical'
|
|
94
|
+
|
|
95
|
+
WHEN LOWER(column_name) SIMILAR TO '%(vehicle|vin|auto|car)%'
|
|
96
|
+
AND avg_length BETWEEN 16 AND 18
|
|
97
|
+
AND max_length < 20
|
|
98
|
+
AND TRIM(fn_parsefreq(top_patterns, 1, 2))
|
|
99
|
+
= 'AAANAAAAANNNNNNNN' THEN 'B/ID/Auto'
|
|
100
|
+
|
|
101
|
+
WHEN LOWER(column_name) SIMILAR TO
|
|
102
|
+
'%(voice|fingerprint|retina|auth|biometric|iris|face_recog)%'
|
|
103
|
+
THEN 'A/ID/Security'
|
|
104
|
+
|
|
105
|
+
WHEN LOWER(column_name) = 'dna'
|
|
106
|
+
OR LOWER(column_name) ILIKE '%\_dna'
|
|
107
|
+
OR LOWER(column_name) ILIKE 'dna\_%'
|
|
108
|
+
THEN 'A/DEMO/Demographic'
|
|
109
|
+
|
|
110
|
+
WHEN column_name ILIKE '%rout%'
|
|
111
|
+
AND avg_length BETWEEN 8.8 AND 11.2
|
|
112
|
+
AND TRIM(fn_parsefreq(top_patterns, 1, 2))
|
|
113
|
+
IN ('NNNNNNNNN', 'NNNN-NNNN-N') THEN 'C/ID/Bank'
|
|
114
|
+
|
|
115
|
+
WHEN LOWER(column_name) SIMILAR TO '%(salary|income|wage)%'
|
|
116
|
+
THEN 'B/DEMO/Financial'
|
|
117
|
+
|
|
118
|
+
WHEN LOWER(column_name) SIMILAR TO '%(user_id|userid)%'
|
|
119
|
+
THEN 'C/ID/Security'
|
|
120
|
+
|
|
121
|
+
END AS pii_flag
|
|
122
|
+
FROM profile_results p
|
|
123
|
+
INNER JOIN table_pii_counts t
|
|
124
|
+
ON (p.table_name = t.table_name)
|
|
125
|
+
WHERE p.profile_run_id = '{PROFILE_RUN_ID}'
|
|
126
|
+
AND p.general_type = 'A'
|
|
127
|
+
AND p.pii_flag IS NULL
|
|
128
|
+
AND t.pii_ct > 1 )
|
|
129
|
+
UPDATE profile_results
|
|
130
|
+
SET pii_flag = screen.pii_flag
|
|
131
|
+
FROM screen
|
|
132
|
+
WHERE screen.pii_flag > ''
|
|
133
|
+
AND profile_results.id = screen.profile_results_id;
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
INSERT INTO profile_anomaly_results
|
|
2
|
+
(project_code, table_groups_id, profile_run_id, anomaly_id,
|
|
3
|
+
schema_name, table_name, column_name, column_type, detail)
|
|
4
|
+
SELECT p.project_code,
|
|
5
|
+
p.table_groups_id,
|
|
6
|
+
p.profile_run_id,
|
|
7
|
+
'{ANOMALY_ID}' as anomaly_id,
|
|
8
|
+
p.schema_name,
|
|
9
|
+
p.table_name,
|
|
10
|
+
p.column_name,
|
|
11
|
+
p.column_type,
|
|
12
|
+
{DETAIL_EXPRESSION} AS detail
|
|
13
|
+
FROM profile_results p
|
|
14
|
+
LEFT JOIN v_inactive_anomalies i
|
|
15
|
+
ON (p.table_groups_id = i.table_groups_id
|
|
16
|
+
AND p.schema_name = i.schema_name
|
|
17
|
+
AND p.table_name = i.table_name
|
|
18
|
+
AND p.column_name = i.column_name
|
|
19
|
+
AND '{ANOMALY_ID}' = i.anomaly_id)
|
|
20
|
+
WHERE p.profile_run_id = '{PROFILE_RUN_ID}'::UUID
|
|
21
|
+
AND i.anomaly_id IS NULL
|
|
22
|
+
AND {ANOMALY_CRITERIA};
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
WITH mults AS ( SELECT p.project_code,
|
|
2
|
+
p.table_groups_id,
|
|
3
|
+
p.schema_name,
|
|
4
|
+
p.column_name,
|
|
5
|
+
COUNT(*) AS column_ct,
|
|
6
|
+
COUNT(DISTINCT p.column_type) AS type_ct,
|
|
7
|
+
COUNT(DISTINCT p.general_type) AS general_type_ct,
|
|
8
|
+
MIN(p.column_type::TEXT) AS min_type,
|
|
9
|
+
MAX(p.column_type::TEXT) AS max_type,
|
|
10
|
+
MIN(p.distinct_pattern_ct) AS min_pattern_ct,
|
|
11
|
+
MAX(p.distinct_pattern_ct) AS max_pattern_ct,
|
|
12
|
+
SUM(p.distinct_pattern_ct) AS sum_pattern_ct,
|
|
13
|
+
STRING_AGG(table_name, ', ' order by table_name) as table_list,
|
|
14
|
+
MAX(RIGHT(REPEAT('0', 20) || SPLIT_PART(p.top_patterns, '|', 1), 20) || '|' || SPLIT_PART(p.top_patterns, '|', 2) )as very_top_pattern
|
|
15
|
+
FROM profile_results p
|
|
16
|
+
WHERE p.profile_run_id = '{PROFILE_RUN_ID}'::UUID
|
|
17
|
+
GROUP BY p.project_code, p.table_groups_id, schema_name, p.column_name
|
|
18
|
+
HAVING COUNT(*) > 1 ),
|
|
19
|
+
subset AS
|
|
20
|
+
(
|
|
21
|
+
SELECT p.project_code,
|
|
22
|
+
p.table_groups_id,
|
|
23
|
+
p.profile_run_id,
|
|
24
|
+
'{ANOMALY_ID}' as anomaly_id,
|
|
25
|
+
p.schema_name,
|
|
26
|
+
p.table_name,
|
|
27
|
+
p.column_name,
|
|
28
|
+
p.column_type,
|
|
29
|
+
p.top_patterns,
|
|
30
|
+
ltrim(m.very_top_pattern, '0') as very_top_pattern,
|
|
31
|
+
m.table_list,
|
|
32
|
+
{DETAIL_EXPRESSION} AS detail
|
|
33
|
+
FROM profile_results p
|
|
34
|
+
INNER JOIN mults m
|
|
35
|
+
ON p.project_code = m.project_code
|
|
36
|
+
AND p.table_groups_id = m.table_groups_id
|
|
37
|
+
AND p.schema_name = m.schema_name
|
|
38
|
+
AND p.column_name = m.column_name
|
|
39
|
+
LEFT JOIN v_inactive_anomalies i
|
|
40
|
+
ON (p.table_groups_id = i.table_groups_id
|
|
41
|
+
AND p.schema_name = i.schema_name
|
|
42
|
+
AND p.table_name = i.table_name
|
|
43
|
+
AND p.column_name = i.column_name
|
|
44
|
+
AND '{ANOMALY_ID}' = i.anomaly_id)
|
|
45
|
+
WHERE p.profile_run_id = '{PROFILE_RUN_ID}'::UUID
|
|
46
|
+
AND i.anomaly_id IS NULL
|
|
47
|
+
AND {ANOMALY_CRITERIA}
|
|
48
|
+
)
|
|
49
|
+
INSERT INTO profile_anomaly_results
|
|
50
|
+
(project_code, table_groups_id, profile_run_id, anomaly_id,
|
|
51
|
+
schema_name, table_name, column_name, column_type, detail)
|
|
52
|
+
SELECT project_code, table_groups_id, profile_run_id, anomaly_id,
|
|
53
|
+
schema_name, '(multi-table)' as table_name,
|
|
54
|
+
column_name, '(multiple)' as column_type,
|
|
55
|
+
detail || ' , Tables: ' || table_list AS detail
|
|
56
|
+
FROM subset
|
|
57
|
+
GROUP BY project_code, table_groups_id, profile_run_id, anomaly_id,
|
|
58
|
+
schema_name, column_name, table_list, detail;
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
INSERT INTO profile_anomaly_results
|
|
2
|
+
(project_code, table_groups_id, profile_run_id, anomaly_id,
|
|
3
|
+
schema_name, table_name, column_name, detail, disposition)
|
|
4
|
+
SELECT p.project_code,
|
|
5
|
+
p.table_groups_id,
|
|
6
|
+
p.profile_run_id,
|
|
7
|
+
'{ANOMALY_ID}' as anomaly_id,
|
|
8
|
+
p.schema_name,
|
|
9
|
+
p.table_name,
|
|
10
|
+
'(Table)' as column_name,
|
|
11
|
+
{DETAIL_EXPRESSION} AS detail,
|
|
12
|
+
CASE WHEN i.anomaly_id IS NULL THEN NULL ELSE 'Inactive' END as disposition
|
|
13
|
+
FROM profile_results p
|
|
14
|
+
LEFT JOIN v_inactive_anomalies i
|
|
15
|
+
ON (p.table_groups_id = i.table_groups_id
|
|
16
|
+
AND p.schema_name = i.schema_name
|
|
17
|
+
AND p.table_name = i.table_name
|
|
18
|
+
AND '{ANOMALY_ID}' = i.anomaly_id)
|
|
19
|
+
WHERE p.profile_run_id = '{PROFILE_RUN_ID}'::UUID
|
|
20
|
+
GROUP BY p.project_code, p.table_groups_id, p.profile_run_id,
|
|
21
|
+
p.schema_name, p.table_name
|
|
22
|
+
HAVING {ANOMALY_CRITERIA};
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
INSERT INTO profile_anomaly_results
|
|
2
|
+
(project_code, table_groups_id, profile_run_id, anomaly_id,
|
|
3
|
+
schema_name, table_name, column_name, detail)
|
|
4
|
+
SELECT p.project_code,
|
|
5
|
+
p.table_groups_id,
|
|
6
|
+
p.profile_run_id,
|
|
7
|
+
'{ANOMALY_ID}' as anomaly_id,
|
|
8
|
+
p.schema_name,
|
|
9
|
+
p.table_name,
|
|
10
|
+
CASE
|
|
11
|
+
WHEN COUNT(p.column_name) > 2 THEN '(multi-column)'
|
|
12
|
+
ELSE STRING_AGG(p.column_name, ', ' ORDER BY p.position)
|
|
13
|
+
END as column_name,
|
|
14
|
+
{DETAIL_EXPRESSION}
|
|
15
|
+
|| CASE
|
|
16
|
+
WHEN COUNT(p.column_name) > 2 THEN ', Columns: ' || STRING_AGG(p.column_name, ', ' ORDER BY p.position)
|
|
17
|
+
ELSE ''
|
|
18
|
+
END as detail
|
|
19
|
+
FROM profile_results p
|
|
20
|
+
LEFT JOIN v_inactive_anomalies i
|
|
21
|
+
ON (p.table_groups_id = i.table_groups_id
|
|
22
|
+
AND p.schema_name = i.schema_name
|
|
23
|
+
AND p.table_name = i.table_name
|
|
24
|
+
AND '{ANOMALY_ID}' = i.anomaly_id)
|
|
25
|
+
WHERE p.profile_run_id = '{PROFILE_RUN_ID}'::UUID
|
|
26
|
+
AND i.anomaly_id IS NULL
|
|
27
|
+
AND p.general_type = 'D'
|
|
28
|
+
GROUP BY p.project_code, p.table_groups_id, p.profile_run_id,
|
|
29
|
+
p.schema_name, p.table_name, i.anomaly_id
|
|
30
|
+
HAVING {ANOMALY_CRITERIA};
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
INSERT INTO profile_anomaly_results
|
|
2
|
+
(project_code, table_groups_id, profile_run_id, anomaly_id,
|
|
3
|
+
schema_name, table_name, column_name, column_type, detail)
|
|
4
|
+
WITH all_matches
|
|
5
|
+
AS ( SELECT p.project_code,
|
|
6
|
+
p.table_groups_id,
|
|
7
|
+
p.profile_run_id,
|
|
8
|
+
p.schema_name,
|
|
9
|
+
p.table_name,
|
|
10
|
+
p.column_name,
|
|
11
|
+
p.column_type,
|
|
12
|
+
fn_extract_distinct_items(STRING_AGG(fn_extract_intersecting_items(LOWER(fn_extract_top_values(p.top_freq_values)),
|
|
13
|
+
v.check_values, '|'),
|
|
14
|
+
'|'),
|
|
15
|
+
'|') AS intersect_list
|
|
16
|
+
FROM profile_results p
|
|
17
|
+
CROSS JOIN variant_codings v
|
|
18
|
+
LEFT JOIN v_inactive_anomalies i
|
|
19
|
+
ON (p.table_groups_id = i.table_groups_id
|
|
20
|
+
AND p.schema_name = i.schema_name
|
|
21
|
+
AND p.table_name = i.table_name
|
|
22
|
+
AND p.column_name = i.column_name
|
|
23
|
+
AND '{ANOMALY_ID}' = i.anomaly_id)
|
|
24
|
+
WHERE p.profile_run_id = '{PROFILE_RUN_ID}'::UUID
|
|
25
|
+
AND {ANOMALY_CRITERIA}
|
|
26
|
+
AND p.top_freq_values > ''
|
|
27
|
+
AND i.anomaly_id IS NULL
|
|
28
|
+
AND fn_count_intersecting_items(LOWER(fn_extract_top_values(p.top_freq_values)), v.check_values, '|') > 1
|
|
29
|
+
GROUP BY p.project_code,
|
|
30
|
+
p.table_groups_id,
|
|
31
|
+
p.profile_run_id,
|
|
32
|
+
p.schema_name,
|
|
33
|
+
p.table_name,
|
|
34
|
+
p.column_name,
|
|
35
|
+
p.column_type )
|
|
36
|
+
SELECT project_code, table_groups_id, profile_run_id,
|
|
37
|
+
'{ANOMALY_ID}' AS anomaly_id,
|
|
38
|
+
schema_name, table_name, column_name, column_type,
|
|
39
|
+
{DETAIL_EXPRESSION} AS detail
|
|
40
|
+
FROM all_matches;
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
SELECT '{SAMPLING_TABLE}' as schema_table,
|
|
2
|
+
CASE
|
|
3
|
+
WHEN count(*) <= {PROFILE_SAMPLE_MIN_COUNT}
|
|
4
|
+
THEN -1
|
|
5
|
+
ELSE
|
|
6
|
+
CASE
|
|
7
|
+
WHEN ROUND(CAST({PROFILE_SAMPLE_PERCENT} as FLOAT) * CAST(COUNT(*) as FLOAT) / 100.0, 0) > {PROFILE_SAMPLE_MIN_COUNT}
|
|
8
|
+
THEN LEAST(999000, ROUND(CAST({PROFILE_SAMPLE_PERCENT} as FLOAT) * CAST(COUNT(*) as FLOAT) / 100.0, 0))
|
|
9
|
+
ELSE {PROFILE_SAMPLE_MIN_COUNT}
|
|
10
|
+
END
|
|
11
|
+
END as sample_count,
|
|
12
|
+
CASE
|
|
13
|
+
WHEN count(*) <= {PROFILE_SAMPLE_MIN_COUNT}
|
|
14
|
+
THEN 1
|
|
15
|
+
ELSE (CAST(COUNT(*) as FLOAT)
|
|
16
|
+
/ CASE
|
|
17
|
+
WHEN ROUND(CAST({PROFILE_SAMPLE_PERCENT} as FLOAT) * CAST(COUNT(*) as FLOAT) / 100.0, 0) > {PROFILE_SAMPLE_MIN_COUNT}
|
|
18
|
+
THEN LEAST(999000, ROUND(CAST({PROFILE_SAMPLE_PERCENT} as FLOAT) * CAST(COUNT(*) as FLOAT) / 100.0, 0))
|
|
19
|
+
ELSE {PROFILE_SAMPLE_MIN_COUNT}
|
|
20
|
+
END )
|
|
21
|
+
END as sample_ratio
|
|
22
|
+
from {SAMPLING_TABLE};
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
INSERT INTO profiling_runs (id, project_code, connection_id, table_groups_id, profiling_starttime, process_id)
|
|
2
|
+
(SELECT '{PROFILE_RUN_ID}' :: UUID as id,
|
|
3
|
+
'{PROJECT_CODE}' as project_code,
|
|
4
|
+
{CONNECTION_ID} as connection_id,
|
|
5
|
+
'{TABLE_GROUPS_ID}' :: UUID as table_groups_id,
|
|
6
|
+
'{RUN_DATE}' as profiling_starttime,
|
|
7
|
+
'{PROCESS_ID}' as process_id
|
|
8
|
+
);
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
|
|
2
|
+
-- Update sampled profile results for given profile_run to estimated values
|
|
3
|
+
-- We don't update distinct counts, because these should already be representative
|
|
4
|
+
-- in a random sample.
|
|
5
|
+
|
|
6
|
+
update profile_results
|
|
7
|
+
set sample_ratio = {PROFILE_SAMPLE_RATIO},
|
|
8
|
+
record_ct = ROUND(record_ct * {PROFILE_SAMPLE_RATIO}, 0),
|
|
9
|
+
value_ct = ROUND(value_ct * {PROFILE_SAMPLE_RATIO}, 0),
|
|
10
|
+
-- distinct_value_ct = ROUND(record_ct * {PROFILE_SAMPLE_RATIO} *(distinct_value_ct::numeric/record_ct::numeric), 0),
|
|
11
|
+
null_value_ct = ROUND(null_value_ct * {PROFILE_SAMPLE_RATIO}, 0),
|
|
12
|
+
zero_value_ct = ROUND(zero_value_ct * {PROFILE_SAMPLE_RATIO}, 0),
|
|
13
|
+
lead_space_ct = ROUND(lead_space_ct * {PROFILE_SAMPLE_RATIO}, 0),
|
|
14
|
+
embedded_space_ct = ROUND(embedded_space_ct * {PROFILE_SAMPLE_RATIO}, 0),
|
|
15
|
+
includes_digit_ct = ROUND(includes_digit_ct * {PROFILE_SAMPLE_RATIO}, 0),
|
|
16
|
+
filled_value_ct = ROUND(filled_value_ct * {PROFILE_SAMPLE_RATIO}, 0),
|
|
17
|
+
numeric_ct = ROUND(numeric_ct * {PROFILE_SAMPLE_RATIO}, 0),
|
|
18
|
+
date_ct = ROUND(date_ct * {PROFILE_SAMPLE_RATIO}, 0),
|
|
19
|
+
before_1yr_date_ct = ROUND(before_1yr_date_ct * {PROFILE_SAMPLE_RATIO}, 0),
|
|
20
|
+
before_5yr_date_ct = ROUND(before_5yr_date_ct * {PROFILE_SAMPLE_RATIO}, 0),
|
|
21
|
+
before_20yr_date_ct = ROUND(before_20yr_date_ct * {PROFILE_SAMPLE_RATIO}, 0),
|
|
22
|
+
within_1yr_date_ct = ROUND(within_1yr_date_ct * {PROFILE_SAMPLE_RATIO}, 0),
|
|
23
|
+
within_1mo_date_ct = ROUND(within_1mo_date_ct * {PROFILE_SAMPLE_RATIO}, 0),
|
|
24
|
+
future_date_ct = ROUND(future_date_ct * {PROFILE_SAMPLE_RATIO}, 0),
|
|
25
|
+
boolean_true_ct = ROUND(boolean_true_ct * {PROFILE_SAMPLE_RATIO}, 0),
|
|
26
|
+
date_days_present = ROUND(date_days_present * {PROFILE_SAMPLE_RATIO}, 0)
|
|
27
|
+
where profile_run_id = '{PROFILE_RUN_ID}'
|
|
28
|
+
and schema_name = split_part('{SAMPLING_TABLE}', '.', 1)
|
|
29
|
+
and table_name = split_part('{SAMPLING_TABLE}', '.', 2)
|
|
30
|
+
and sample_ratio IS NULL;
|
|
31
|
+
|
|
32
|
+
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
WITH anomalies
|
|
4
|
+
AS ( SELECT profile_run_id,
|
|
5
|
+
COUNT(*) as anomaly_ct,
|
|
6
|
+
COUNT(DISTINCT schema_name || '.' || table_name) as anomaly_table_ct,
|
|
7
|
+
COUNT(DISTINCT schema_name || '.' || table_name || '.' || column_name) as anomaly_column_ct
|
|
8
|
+
FROM profile_anomaly_results
|
|
9
|
+
WHERE profile_run_id = '{PROFILE_RUN_ID}'::UUID
|
|
10
|
+
GROUP BY profile_run_id ),
|
|
11
|
+
profiles
|
|
12
|
+
AS ( SELECT r.id as profile_run_id,
|
|
13
|
+
COUNT(DISTINCT p.schema_name || '.' || p.table_name) as table_ct,
|
|
14
|
+
COUNT(*) as column_ct
|
|
15
|
+
FROM profiling_runs r
|
|
16
|
+
INNER JOIN profile_results p
|
|
17
|
+
ON r.id = p.profile_run_id
|
|
18
|
+
WHERE r.id = '{PROFILE_RUN_ID}'::UUID
|
|
19
|
+
GROUP BY r.id ),
|
|
20
|
+
stats
|
|
21
|
+
AS ( SELECT p.profile_run_id, table_ct, column_ct,
|
|
22
|
+
a.anomaly_ct, a.anomaly_table_ct, a.anomaly_column_ct
|
|
23
|
+
FROM profiles p
|
|
24
|
+
LEFT JOIN anomalies a
|
|
25
|
+
ON (p.profile_run_id = a.profile_run_id) )
|
|
26
|
+
UPDATE profiling_runs
|
|
27
|
+
SET table_ct = stats.table_ct,
|
|
28
|
+
column_ct = stats.column_ct,
|
|
29
|
+
anomaly_ct = COALESCE(stats.anomaly_ct, 0),
|
|
30
|
+
anomaly_table_ct = COALESCE(stats.anomaly_table_ct, 0),
|
|
31
|
+
anomaly_column_ct = COALESCE(stats.anomaly_column_ct, 0)
|
|
32
|
+
FROM stats
|
|
33
|
+
WHERE profiling_runs.id = stats.profile_run_id ;
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
-- ==============================================================================
|
|
2
|
+
-- | Table Characteristics
|
|
3
|
+
-- ==============================================================================
|
|
4
|
+
|
|
5
|
+
-- Update existing records
|
|
6
|
+
WITH new_chars
|
|
7
|
+
AS ( SELECT p.table_groups_id,
|
|
8
|
+
p.schema_name, p.table_name, p.functional_table_type,
|
|
9
|
+
run_date AS add_date,
|
|
10
|
+
MAX(record_ct) AS record_ct,
|
|
11
|
+
COUNT(*) AS column_ct,
|
|
12
|
+
MAX(record_ct) * COUNT(*) AS data_point_ct
|
|
13
|
+
FROM v_latest_profile_results p
|
|
14
|
+
WHERE p.table_groups_id = '{TABLE_GROUPS_ID}'
|
|
15
|
+
GROUP BY p.table_groups_id,
|
|
16
|
+
p.schema_name, p.table_name, p.functional_table_type, run_date )
|
|
17
|
+
UPDATE data_table_chars
|
|
18
|
+
SET functional_table_type = n.functional_table_type,
|
|
19
|
+
record_ct = n.record_ct,
|
|
20
|
+
column_ct = n.column_ct,
|
|
21
|
+
data_point_ct = n.data_point_ct,
|
|
22
|
+
drop_date = NULL
|
|
23
|
+
FROM new_chars n
|
|
24
|
+
INNER JOIN data_table_chars d
|
|
25
|
+
ON (n.table_groups_id = d.table_groups_id
|
|
26
|
+
AND n.schema_name = d.schema_name
|
|
27
|
+
AND n.table_name = d.table_name)
|
|
28
|
+
WHERE data_table_chars.table_id = d.table_id;
|
|
29
|
+
|
|
30
|
+
-- Add new records
|
|
31
|
+
WITH new_chars
|
|
32
|
+
AS ( SELECT p.table_groups_id,
|
|
33
|
+
p.schema_name, p.table_name, p.functional_table_type,
|
|
34
|
+
run_date AS add_date,
|
|
35
|
+
NULL::TIMESTAMP AS drop_date,
|
|
36
|
+
MAX(record_ct) AS record_ct,
|
|
37
|
+
COUNT(*) AS column_ct,
|
|
38
|
+
MAX(record_ct) * COUNT(*) AS data_point_ct
|
|
39
|
+
FROM v_latest_profile_results p
|
|
40
|
+
WHERE p.table_groups_id = '{TABLE_GROUPS_ID}'
|
|
41
|
+
GROUP BY p.table_groups_id,
|
|
42
|
+
p.schema_name, p.table_name, p.functional_table_type, run_date )
|
|
43
|
+
INSERT INTO data_table_chars
|
|
44
|
+
(table_groups_id, schema_name, table_name, functional_table_type, add_date,
|
|
45
|
+
record_ct, column_ct, data_point_ct)
|
|
46
|
+
SELECT n.table_groups_id, n.schema_name, n.table_name, n.functional_table_type, n.add_date,
|
|
47
|
+
n.record_ct, n.column_ct, n.data_point_ct
|
|
48
|
+
FROM new_chars n
|
|
49
|
+
LEFT JOIN data_table_chars d
|
|
50
|
+
ON (n.table_groups_id = d.table_groups_id
|
|
51
|
+
AND n.schema_name = d.schema_name
|
|
52
|
+
AND n.table_name = d.table_name)
|
|
53
|
+
WHERE d.table_id IS NULL;
|
|
54
|
+
|
|
55
|
+
-- Mark dropped records
|
|
56
|
+
WITH new_chars
|
|
57
|
+
AS ( SELECT p.table_groups_id,
|
|
58
|
+
p.schema_name, p.table_name
|
|
59
|
+
FROM v_latest_profile_results p
|
|
60
|
+
WHERE p.table_groups_id = '{TABLE_GROUPS_ID}'
|
|
61
|
+
GROUP BY p.table_groups_id,
|
|
62
|
+
p.schema_name, p.table_name ),
|
|
63
|
+
last_run
|
|
64
|
+
AS ( SELECT table_groups_id, MAX(run_date) as last_run_date
|
|
65
|
+
FROM v_latest_profile_results
|
|
66
|
+
WHERE table_groups_id = '{TABLE_GROUPS_ID}'
|
|
67
|
+
GROUP BY table_groups_id)
|
|
68
|
+
UPDATE data_table_chars
|
|
69
|
+
SET drop_date = l.last_run_date
|
|
70
|
+
FROM last_run l
|
|
71
|
+
INNER JOIN data_table_chars d
|
|
72
|
+
ON (l.table_groups_id = d.table_groups_id)
|
|
73
|
+
LEFT JOIN new_chars n
|
|
74
|
+
ON (d.table_groups_id = n.table_groups_id
|
|
75
|
+
AND d.schema_name = n.schema_name
|
|
76
|
+
AND d.table_name = n.table_name)
|
|
77
|
+
WHERE data_table_chars.table_id = d.table_id
|
|
78
|
+
AND n.table_name IS NULL;
|
|
79
|
+
|
|
80
|
+
-- ==============================================================================
|
|
81
|
+
-- | Column Characteristics
|
|
82
|
+
-- ==============================================================================
|
|
83
|
+
|
|
84
|
+
-- Update existing records
|
|
85
|
+
WITH new_chars
|
|
86
|
+
AS ( SELECT p.table_groups_id,
|
|
87
|
+
p.schema_name, p.table_name, p.column_name,
|
|
88
|
+
p.general_type, p.column_type, p.functional_data_type,
|
|
89
|
+
run_date
|
|
90
|
+
FROM v_latest_profile_results p
|
|
91
|
+
WHERE p.table_groups_id = '{TABLE_GROUPS_ID}')
|
|
92
|
+
UPDATE data_column_chars
|
|
93
|
+
SET last_mod_date = CASE WHEN n.column_type <> d.column_type THEN n.run_date ELSE d.last_mod_date END,
|
|
94
|
+
general_type = n.general_type,
|
|
95
|
+
column_type = n.column_type,
|
|
96
|
+
functional_data_type = n.functional_data_type,
|
|
97
|
+
drop_date = NULL
|
|
98
|
+
FROM new_chars n
|
|
99
|
+
INNER JOIN data_column_chars d
|
|
100
|
+
ON (n.table_groups_id = d.table_groups_id
|
|
101
|
+
AND n.schema_name = d.schema_name
|
|
102
|
+
AND n.table_name = d.table_name
|
|
103
|
+
AND n.column_name = d.column_name)
|
|
104
|
+
WHERE data_column_chars.table_id = d.table_id
|
|
105
|
+
AND data_column_chars.column_name = d.column_name;
|
|
106
|
+
|
|
107
|
+
-- Add new records
|
|
108
|
+
WITH new_chars
|
|
109
|
+
AS ( SELECT p.table_groups_id,
|
|
110
|
+
p.schema_name, p.table_name, p.column_name,
|
|
111
|
+
p.general_type, p.column_type, p.functional_data_type,
|
|
112
|
+
run_date AS add_date
|
|
113
|
+
FROM v_latest_profile_results p
|
|
114
|
+
WHERE p.table_groups_id = '{TABLE_GROUPS_ID}')
|
|
115
|
+
INSERT INTO data_column_chars
|
|
116
|
+
(table_groups_id, schema_name, table_name, table_id, column_name,
|
|
117
|
+
general_type, column_type, functional_data_type, add_date, last_mod_date)
|
|
118
|
+
SELECT n.table_groups_id, n.schema_name, n.table_name, dtc.table_id, n.column_name,
|
|
119
|
+
n.general_type, n.column_type, n.functional_data_type,
|
|
120
|
+
n.add_date, n.add_date as last_mod_date
|
|
121
|
+
FROM new_chars n
|
|
122
|
+
INNER JOIN data_table_chars dtc
|
|
123
|
+
ON (n.table_groups_id = dtc.table_groups_id
|
|
124
|
+
AND n.schema_name = dtc.schema_name
|
|
125
|
+
AND n.table_name = dtc.table_name)
|
|
126
|
+
LEFT JOIN data_column_chars d
|
|
127
|
+
ON (n.table_groups_id = d.table_groups_id
|
|
128
|
+
AND n.schema_name = d.schema_name
|
|
129
|
+
AND n.table_name = d.table_name
|
|
130
|
+
AND n.column_name = d.column_name)
|
|
131
|
+
WHERE d.table_id IS NULL;
|
|
132
|
+
|
|
133
|
+
-- Mark dropped records
|
|
134
|
+
WITH new_chars
|
|
135
|
+
AS ( SELECT p.table_groups_id,
|
|
136
|
+
p.schema_name, p.table_name, p.column_name
|
|
137
|
+
FROM v_latest_profile_results p
|
|
138
|
+
WHERE p.table_groups_id = '{TABLE_GROUPS_ID}'),
|
|
139
|
+
last_run
|
|
140
|
+
AS ( SELECT table_groups_id, MAX(run_date) as last_run_date
|
|
141
|
+
FROM v_latest_profile_results
|
|
142
|
+
WHERE table_groups_id = '{TABLE_GROUPS_ID}'
|
|
143
|
+
GROUP BY table_groups_id)
|
|
144
|
+
UPDATE data_column_chars
|
|
145
|
+
SET drop_date = l.last_run_date
|
|
146
|
+
FROM last_run l
|
|
147
|
+
INNER JOIN data_column_chars d
|
|
148
|
+
ON (l.table_groups_id = d.table_groups_id)
|
|
149
|
+
LEFT JOIN new_chars n
|
|
150
|
+
ON (d.table_groups_id = n.table_groups_id
|
|
151
|
+
AND d.schema_name = n.schema_name
|
|
152
|
+
AND d.table_name = n.table_name
|
|
153
|
+
AND d.column_name = n.column_name)
|
|
154
|
+
WHERE data_column_chars.table_id = d.table_id
|
|
155
|
+
AND data_column_chars.column_name = d.column_name
|
|
156
|
+
AND n.column_name IS NULL;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
-- Looking for columns not already freq'd,
|
|
2
|
+
-- but with max_length * distinct_value_ct that fit in result
|
|
3
|
+
SELECT schema_name,
|
|
4
|
+
table_name,
|
|
5
|
+
column_name
|
|
6
|
+
FROM profile_results p
|
|
7
|
+
WHERE p.profile_run_id = '{PROFILE_RUN_ID}'
|
|
8
|
+
AND p.top_freq_values IS NULL
|
|
9
|
+
AND p.general_type = 'A'
|
|
10
|
+
AND p.distinct_value_ct BETWEEN 2 and 70
|
|
11
|
+
AND p.max_length <= 70
|
|
12
|
+
;
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
UPDATE profile_results
|
|
2
|
+
SET top_freq_values = u.top_freq_values,
|
|
3
|
+
distinct_value_hash = u.distinct_value_hash
|
|
4
|
+
FROM profile_results p
|
|
5
|
+
INNER JOIN stg_secondary_profile_updates u
|
|
6
|
+
ON p.project_code = u.project_code
|
|
7
|
+
AND p.schema_name = u.schema_name
|
|
8
|
+
AND p.run_date = u.run_date
|
|
9
|
+
AND p.table_name = u.table_name
|
|
10
|
+
AND p.column_name = u.column_name
|
|
11
|
+
WHERE p.project_code = profile_results.project_code
|
|
12
|
+
AND p.schema_name = profile_results.schema_name
|
|
13
|
+
AND p.run_date = profile_results.run_date
|
|
14
|
+
AND p.table_name = profile_results.table_name
|
|
15
|
+
AND p.column_name = profile_results.column_name
|
|
16
|
+
AND p.project_code = '{PROJECT_CODE}'
|
|
17
|
+
AND p.schema_name = '{DATA_SCHEMA}'
|
|
18
|
+
AND p.run_date = '{RUN_DATE}';
|