dataops-testgen 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataops_testgen-2.2.0.dist-info/LICENSE +203 -0
- dataops_testgen-2.2.0.dist-info/METADATA +287 -0
- dataops_testgen-2.2.0.dist-info/NOTICE +5 -0
- dataops_testgen-2.2.0.dist-info/RECORD +270 -0
- dataops_testgen-2.2.0.dist-info/WHEEL +5 -0
- dataops_testgen-2.2.0.dist-info/entry_points.txt +2 -0
- dataops_testgen-2.2.0.dist-info/top_level.txt +1 -0
- testgen/__init__.py +0 -0
- testgen/__main__.py +770 -0
- testgen/commands/__init__.py +0 -0
- testgen/commands/queries/__init__.py +0 -0
- testgen/commands/queries/execute_cat_tests_query.py +95 -0
- testgen/commands/queries/execute_tests_query.py +160 -0
- testgen/commands/queries/generate_tests_query.py +94 -0
- testgen/commands/queries/profiling_query.py +366 -0
- testgen/commands/queries/test_parameter_validation_query.py +88 -0
- testgen/commands/run_execute_cat_tests.py +162 -0
- testgen/commands/run_execute_tests.py +168 -0
- testgen/commands/run_generate_tests.py +107 -0
- testgen/commands/run_get_entities.py +122 -0
- testgen/commands/run_launch_db_config.py +84 -0
- testgen/commands/run_observability_exporter.py +330 -0
- testgen/commands/run_profiling_bridge.py +495 -0
- testgen/commands/run_quick_start.py +168 -0
- testgen/commands/run_setup_profiling_tools.py +96 -0
- testgen/commands/run_test_definition.py +146 -0
- testgen/commands/run_test_parameter_validation.py +135 -0
- testgen/commands/run_upgrade_db_config.py +156 -0
- testgen/common/__init__.py +8 -0
- testgen/common/clean_sql.py +53 -0
- testgen/common/credentials.py +25 -0
- testgen/common/database/__init__.py +0 -0
- testgen/common/database/database_service.py +629 -0
- testgen/common/database/flavor/__init__.py +0 -0
- testgen/common/database/flavor/flavor_service.py +75 -0
- testgen/common/database/flavor/mssql_flavor_service.py +34 -0
- testgen/common/database/flavor/postgresql_flavor_service.py +5 -0
- testgen/common/database/flavor/redshift_flavor_service.py +22 -0
- testgen/common/database/flavor/snowflake_flavor_service.py +69 -0
- testgen/common/database/flavor/trino_flavor_service.py +21 -0
- testgen/common/date_service.py +68 -0
- testgen/common/display_service.py +85 -0
- testgen/common/docker_service.py +76 -0
- testgen/common/encrypt.py +55 -0
- testgen/common/get_pipeline_parms.py +57 -0
- testgen/common/logs.py +79 -0
- testgen/common/process_service.py +62 -0
- testgen/common/read_file.py +69 -0
- testgen/settings.py +440 -0
- testgen/template/dbsetup/010_create_base_schema.sql +2 -0
- testgen/template/dbsetup/020_create_standard_functions_sprocs.sql +179 -0
- testgen/template/dbsetup/030_initialize_new_schema_structure.sql +735 -0
- testgen/template/dbsetup/040_populate_new_schema_project.sql +59 -0
- testgen/template/dbsetup/050_populate_new_schema_metadata.sql +1517 -0
- testgen/template/dbsetup/060_create_standard_views.sql +248 -0
- testgen/template/dbsetup/070_create_default_users.sql +17 -0
- testgen/template/dbsetup/075_grant_role_rights.sql +43 -0
- testgen/template/dbsetup/080_set_current_revision.sql +5 -0
- testgen/template/dbupgrade/0100_incremental_upgrade.sql +5 -0
- testgen/template/dbupgrade/0101_incremental_upgrade.sql +15 -0
- testgen/template/dbupgrade/0102_incremental_upgrade.sql +4 -0
- testgen/template/dbupgrade/0103_incremental_upgrade.sql +22 -0
- testgen/template/dbupgrade/0104_incremental_upgrade.sql +44 -0
- testgen/template/dbupgrade/0105_incremental_upgrade.sql +1 -0
- testgen/template/dbupgrade/0106_incremental_upgrade.sql +5 -0
- testgen/template/dbupgrade/0107_incremental_upgrade.sql +3 -0
- testgen/template/dbupgrade_helpers/get_tg_revision.sql +2 -0
- testgen/template/exec_cat_tests/ex_cat_build_agg_table_tests.sql +116 -0
- testgen/template/exec_cat_tests/ex_cat_get_distinct_tables.sql +11 -0
- testgen/template/exec_cat_tests/ex_cat_results_parse.sql +69 -0
- testgen/template/exec_cat_tests/ex_cat_retrieve_agg_test_parms.sql +6 -0
- testgen/template/exec_cat_tests/ex_cat_test_query.sql +8 -0
- testgen/template/execution/ex_finalize_test_run_results.sql +37 -0
- testgen/template/execution/ex_get_tests_non_cat.sql +47 -0
- testgen/template/execution/ex_update_test_record_in_testrun_table.sql +27 -0
- testgen/template/execution/ex_write_test_record_to_testrun_table.sql +6 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_no_drops_generic.sql +48 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_num_incr_generic.sql +34 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_above_generic.sql +49 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_within_generic.sql +49 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql +49 -0
- testgen/template/flavors/generic/exec_query_tests/ex_custom_query_generic.sql +39 -0
- testgen/template/flavors/generic/exec_query_tests/ex_data_match_2way_generic.sql +58 -0
- testgen/template/flavors/generic/exec_query_tests/ex_data_match_generic.sql +44 -0
- testgen/template/flavors/generic/exec_query_tests/ex_prior_match_generic.sql +37 -0
- testgen/template/flavors/generic/exec_query_tests/ex_relative_entropy_generic.sql +53 -0
- testgen/template/flavors/generic/exec_query_tests/ex_window_match_no_drops_generic.sql +46 -0
- testgen/template/flavors/generic/exec_query_tests/ex_window_match_same_generic.sql +59 -0
- testgen/template/flavors/generic/profiling/contingency_counts.sql +3 -0
- testgen/template/flavors/generic/validate_tests/ex_get_project_column_list_generic.sql +3 -0
- testgen/template/flavors/mssql/exec_query_tests/ex_relative_entropy_mssql.sql +53 -0
- testgen/template/flavors/mssql/profiling/project_ddf_query_mssql.sql +35 -0
- testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml +246 -0
- testgen/template/flavors/mssql/profiling/project_secondary_profiling_query_mssql.sql +36 -0
- testgen/template/flavors/mssql/setup_profiling_tools/00_drop_existing_functions_mssql.sql +8 -0
- testgen/template/flavors/mssql/setup_profiling_tools/01_create_functions_mssql.sql +12 -0
- testgen/template/flavors/mssql/setup_profiling_tools/02_create_functions_mssql.sql +54 -0
- testgen/template/flavors/mssql/setup_profiling_tools/create_qc_schema_mssql.sql +4 -0
- testgen/template/flavors/mssql/setup_profiling_tools/grant_execute_privileges_mssql.sql +1 -0
- testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_no_drops_postgresql.sql +46 -0
- testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_same_postgresql.sql +59 -0
- testgen/template/flavors/postgresql/profiling/project_ddf_query_postgresql.sql +42 -0
- testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml +225 -0
- testgen/template/flavors/postgresql/profiling/project_secondary_profiling_query_postgresql.sql +28 -0
- testgen/template/flavors/postgresql/setup_profiling_tools/create_functions_postgresql.sql +157 -0
- testgen/template/flavors/postgresql/setup_profiling_tools/create_qc_schema_postgresql.sql +1 -0
- testgen/template/flavors/postgresql/setup_profiling_tools/grant_execute_privileges_postgresql.sql +2 -0
- testgen/template/flavors/redshift/profiling/project_ddf_query_redshift.sql +38 -0
- testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml +221 -0
- testgen/template/flavors/redshift/profiling/project_secondary_profiling_query_redshift.sql +29 -0
- testgen/template/flavors/redshift/setup_profiling_tools/create_functions_redshift.sql +115 -0
- testgen/template/flavors/redshift/setup_profiling_tools/create_qc_schema_redshift.sql +1 -0
- testgen/template/flavors/redshift/setup_profiling_tools/grant_execute_privileges_redshift.sql +2 -0
- testgen/template/flavors/snowflake/profiling/project_ddf_query_snowflake.sql +38 -0
- testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml +220 -0
- testgen/template/flavors/snowflake/profiling/project_secondary_profiling_query_snowflake.sql +29 -0
- testgen/template/flavors/snowflake/setup_profiling_tools/create_functions_snowflake.sql +69 -0
- testgen/template/flavors/snowflake/setup_profiling_tools/create_qc_schema_snowflake.sql +1 -0
- testgen/template/flavors/snowflake/setup_profiling_tools/grant_execute_privileges_snowflake.sql +6 -0
- testgen/template/flavors/trino/profiling/project_profiling_query_trino.yaml +219 -0
- testgen/template/flavors/trino/setup_profiling_tools/create_functions_trino.sql +92 -0
- testgen/template/flavors/trino/setup_profiling_tools/create_qc_schema_trino.sql +1 -0
- testgen/template/gen_funny_cat_tests/gen_test_constant.sql +104 -0
- testgen/template/gen_funny_cat_tests/gen_test_distinct_value_ct.sql +98 -0
- testgen/template/gen_funny_cat_tests/gen_test_row_ct.sql +57 -0
- testgen/template/gen_funny_cat_tests/gen_test_row_ct_pct.sql +59 -0
- testgen/template/generation/gen_delete_old_tests.sql +5 -0
- testgen/template/generation/gen_insert_test_suite.sql +5 -0
- testgen/template/generation/gen_retrieve_or_insert_test_suite.sql +58 -0
- testgen/template/generation/gen_standard_test_type_list.sql +13 -0
- testgen/template/generation/gen_standard_tests.sql +48 -0
- testgen/template/get_entities/get_connection.sql +21 -0
- testgen/template/get_entities/get_connections_list.sql +9 -0
- testgen/template/get_entities/get_latest.sql +4 -0
- testgen/template/get_entities/get_profile.sql +12 -0
- testgen/template/get_entities/get_profile_info.sql +17 -0
- testgen/template/get_entities/get_profile_list.sql +17 -0
- testgen/template/get_entities/get_profile_screen.sql +275 -0
- testgen/template/get_entities/get_project_list.sql +6 -0
- testgen/template/get_entities/get_table_group_list.sql +10 -0
- testgen/template/get_entities/get_test_generation_list.sql +18 -0
- testgen/template/get_entities/get_test_info.sql +41 -0
- testgen/template/get_entities/get_test_results_for_run_cli.sql +16 -0
- testgen/template/get_entities/get_test_run_list.sql +24 -0
- testgen/template/get_entities/get_test_suite.sql +13 -0
- testgen/template/get_entities/get_test_suite_list.sql +18 -0
- testgen/template/get_entities/list_test_types.sql +4 -0
- testgen/template/observability/get_event_data.sql +23 -0
- testgen/template/observability/get_test_results.sql +41 -0
- testgen/template/observability/update_test_results_exported_to_observability.sql +12 -0
- testgen/template/parms/parms_profiling.sql +34 -0
- testgen/template/parms/parms_test_execution.sql +13 -0
- testgen/template/parms/parms_test_gen.sql +23 -0
- testgen/template/profiling/contingency_columns.sql +7 -0
- testgen/template/profiling/datatype_suggestions.sql +56 -0
- testgen/template/profiling/functional_datatype.sql +523 -0
- testgen/template/profiling/functional_tabletype_stage.sql +48 -0
- testgen/template/profiling/functional_tabletype_update.sql +8 -0
- testgen/template/profiling/pii_flag.sql +133 -0
- testgen/template/profiling/profile_anomalies_screen_column.sql +22 -0
- testgen/template/profiling/profile_anomalies_screen_multi_column.sql +58 -0
- testgen/template/profiling/profile_anomalies_screen_table.sql +22 -0
- testgen/template/profiling/profile_anomalies_screen_table_dates.sql +30 -0
- testgen/template/profiling/profile_anomalies_screen_variants.sql +40 -0
- testgen/template/profiling/profile_anomaly_types_get.sql +3 -0
- testgen/template/profiling/project_get_table_sample_count.sql +22 -0
- testgen/template/profiling/project_profile_run_record_insert.sql +8 -0
- testgen/template/profiling/project_profile_run_record_update.sql +5 -0
- testgen/template/profiling/project_profile_run_record_update_status.sql +5 -0
- testgen/template/profiling/project_update_profile_results_to_estimates.sql +32 -0
- testgen/template/profiling/refresh_anomalies.sql +33 -0
- testgen/template/profiling/refresh_data_chars_from_profiling.sql +156 -0
- testgen/template/profiling/secondary_profiling_columns.sql +12 -0
- testgen/template/profiling/secondary_profiling_delete.sql +4 -0
- testgen/template/profiling/secondary_profiling_update.sql +18 -0
- testgen/template/quick_start/populate_target_data.sql +1077 -0
- testgen/template/quick_start/recreate_target_data_schema.sql +167 -0
- testgen/template/quick_start/update_target_data.sql +100 -0
- testgen/template/updates/create_tmp_test_definition.sql +19 -0
- testgen/template/updates/get_test_def_parms.sql +38 -0
- testgen/template/updates/populate_stg_test_definitions.sql +184 -0
- testgen/template/validate_tests/ex_disable_tests_test_definitions.sql +5 -0
- testgen/template/validate_tests/ex_flag_tests_test_definitions.sql +64 -0
- testgen/template/validate_tests/ex_get_project_column_list_generic.sql +3 -0
- testgen/template/validate_tests/ex_get_test_column_list_tg.sql +65 -0
- testgen/template/validate_tests/ex_write_test_val_errors.sql +22 -0
- testgen/ui/__init__.py +0 -0
- testgen/ui/app.py +98 -0
- testgen/ui/assets/dk_logo.svg +46 -0
- testgen/ui/assets/question_mark.png +0 -0
- testgen/ui/assets/scripts.js +68 -0
- testgen/ui/assets/style.css +140 -0
- testgen/ui/bootstrap.py +109 -0
- testgen/ui/components/__init__.py +0 -0
- testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fBBc4.woff2 +0 -0
- testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fChc4EsA.woff2 +0 -0
- testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu4mxK.woff2 +0 -0
- testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu7GxKOzY.woff2 +0 -0
- testgen/ui/components/frontend/css/material-symbols-rounded.css +24 -0
- testgen/ui/components/frontend/css/material-symbols-rounded.woff2 +0 -0
- testgen/ui/components/frontend/css/roboto-font-faces.css +35 -0
- testgen/ui/components/frontend/css/shared.css +36 -0
- testgen/ui/components/frontend/img/dk_logo.svg +46 -0
- testgen/ui/components/frontend/index.html +17 -0
- testgen/ui/components/frontend/js/components/breadcrumbs.js +86 -0
- testgen/ui/components/frontend/js/components/button.js +66 -0
- testgen/ui/components/frontend/js/components/location.js +62 -0
- testgen/ui/components/frontend/js/components/select.js +75 -0
- testgen/ui/components/frontend/js/components/sidebar.js +358 -0
- testgen/ui/components/frontend/js/main.js +99 -0
- testgen/ui/components/frontend/js/streamlit.js +19 -0
- testgen/ui/components/frontend/js/van.min.js +1 -0
- testgen/ui/components/utils/__init__.py +0 -0
- testgen/ui/components/utils/callbacks.py +51 -0
- testgen/ui/components/utils/component.py +13 -0
- testgen/ui/components/widgets/__init__.py +6 -0
- testgen/ui/components/widgets/breadcrumbs.py +32 -0
- testgen/ui/components/widgets/location.py +65 -0
- testgen/ui/components/widgets/modal.py +97 -0
- testgen/ui/components/widgets/sidebar.py +69 -0
- testgen/ui/navigation/__init__.py +0 -0
- testgen/ui/navigation/menu.py +42 -0
- testgen/ui/navigation/page.py +20 -0
- testgen/ui/navigation/router.py +63 -0
- testgen/ui/queries/__init__.py +0 -0
- testgen/ui/queries/authentication_queries.py +47 -0
- testgen/ui/queries/connection_queries.py +121 -0
- testgen/ui/queries/profiling_queries.py +148 -0
- testgen/ui/queries/project_queries.py +9 -0
- testgen/ui/queries/table_group_queries.py +186 -0
- testgen/ui/queries/test_definition_queries.py +270 -0
- testgen/ui/queries/test_run_queries.py +32 -0
- testgen/ui/queries/test_suite_queries.py +145 -0
- testgen/ui/scripts/__init__.py +0 -0
- testgen/ui/scripts/patch_streamlit.py +111 -0
- testgen/ui/services/__init__.py +0 -0
- testgen/ui/services/authentication_service.py +119 -0
- testgen/ui/services/connection_service.py +220 -0
- testgen/ui/services/database_service.py +282 -0
- testgen/ui/services/form_service.py +1008 -0
- testgen/ui/services/javascript_service.py +44 -0
- testgen/ui/services/query_service.py +316 -0
- testgen/ui/services/string_service.py +12 -0
- testgen/ui/services/table_group_service.py +130 -0
- testgen/ui/services/test_definition_service.py +117 -0
- testgen/ui/services/test_run_service.py +13 -0
- testgen/ui/services/test_suite_service.py +76 -0
- testgen/ui/services/toolbar_service.py +77 -0
- testgen/ui/session.py +46 -0
- testgen/ui/views/__init__.py +0 -0
- testgen/ui/views/app_log_modal.py +92 -0
- testgen/ui/views/connections.py +72 -0
- testgen/ui/views/connections_base.py +367 -0
- testgen/ui/views/login.py +40 -0
- testgen/ui/views/not_found.py +16 -0
- testgen/ui/views/overview.py +34 -0
- testgen/ui/views/profiling_anomalies.py +501 -0
- testgen/ui/views/profiling_details.py +335 -0
- testgen/ui/views/profiling_modal.py +40 -0
- testgen/ui/views/profiling_results.py +206 -0
- testgen/ui/views/profiling_summary.py +177 -0
- testgen/ui/views/project_settings.py +74 -0
- testgen/ui/views/table_groups.py +530 -0
- testgen/ui/views/test_definitions.py +1020 -0
- testgen/ui/views/test_results.py +908 -0
- testgen/ui/views/test_runs.py +195 -0
- testgen/ui/views/test_suites.py +545 -0
- testgen/utils/__init__.py +0 -0
- testgen/utils/plugins.py +17 -0
- testgen/utils/singleton.py +14 -0
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
SELECT '{PROJECT_CODE}' as project_code,
|
|
2
|
+
CURRENT_TIMESTAMP AT TIME ZONE 'UTC' as refresh_timestamp,
|
|
3
|
+
c.table_schema,
|
|
4
|
+
c.table_name,
|
|
5
|
+
c.column_name,
|
|
6
|
+
CASE
|
|
7
|
+
WHEN c.data_type = 'timestamp without time zone' THEN 'timestamp'
|
|
8
|
+
WHEN c.data_type = 'text'
|
|
9
|
+
OR (c.data_type = 'character varying' and c.character_maximum_length is NULL) THEN 'varchar(65535)'
|
|
10
|
+
WHEN c.data_type = 'character varying'
|
|
11
|
+
THEN 'varchar(' || CAST(c.character_maximum_length AS VARCHAR) || ')'
|
|
12
|
+
WHEN c.data_type = 'character' THEN 'char(' || CAST(c.character_maximum_length AS VARCHAR) || ')'
|
|
13
|
+
WHEN c.data_type = 'numeric' THEN 'numeric'
|
|
14
|
+
|| COALESCE( '(' || CAST(c.numeric_precision AS VARCHAR) || ','
|
|
15
|
+
|| CAST(c.numeric_scale AS VARCHAR) || ')', '')
|
|
16
|
+
ELSE c.data_type
|
|
17
|
+
END AS data_type,
|
|
18
|
+
COALESCE(c.character_maximum_length, CASE WHEN c.data_type IN ('text', 'character varying') THEN 65535 END)
|
|
19
|
+
as character_maximum_length,
|
|
20
|
+
c.ordinal_position,
|
|
21
|
+
CASE
|
|
22
|
+
WHEN c.data_type ILIKE '%char%' or c.data_type = 'text'
|
|
23
|
+
THEN 'A'
|
|
24
|
+
WHEN c.data_type ILIKE 'boolean'
|
|
25
|
+
THEN 'B'
|
|
26
|
+
WHEN c.data_type ILIKE 'date'
|
|
27
|
+
OR c.data_type ILIKE 'timestamp%'
|
|
28
|
+
THEN 'D'
|
|
29
|
+
WHEN c.data_type ILIKE 'time without time zone'
|
|
30
|
+
THEN 'T'
|
|
31
|
+
WHEN LOWER(c.data_type) IN ('bigint', 'double precision', 'integer', 'smallint', 'real')
|
|
32
|
+
OR c.data_type ILIKE 'numeric%'
|
|
33
|
+
THEN 'N'
|
|
34
|
+
ELSE
|
|
35
|
+
'X' END AS general_type,
|
|
36
|
+
CASE
|
|
37
|
+
WHEN c.data_type = 'numeric' THEN COALESCE(numeric_scale, 1) > 0
|
|
38
|
+
ELSE numeric_scale > 0
|
|
39
|
+
END as is_decimal
|
|
40
|
+
FROM information_schema.columns c
|
|
41
|
+
WHERE c.table_schema = '{DATA_SCHEMA}' {TABLE_CRITERIA}
|
|
42
|
+
ORDER BY c.table_schema, c.table_name, c.ordinal_position
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
---
|
|
2
|
+
strTemplate01_sampling: "SELECT "
|
|
3
|
+
strTemplate01_else: "SELECT "
|
|
4
|
+
strTemplate02_all: |
|
|
5
|
+
{CONNECTION_ID} as connection_id,
|
|
6
|
+
'{PROJECT_CODE}' as project_code,
|
|
7
|
+
'{TABLE_GROUPS_ID}' as table_groups_id,
|
|
8
|
+
'{DATA_SCHEMA}' AS schema_name,
|
|
9
|
+
'{RUN_DATE}' AS run_date,
|
|
10
|
+
'{DATA_TABLE}' AS table_name,
|
|
11
|
+
{COL_POS} AS position,
|
|
12
|
+
'{COL_NAME_SANITIZED}' AS column_name,
|
|
13
|
+
'{COL_TYPE}' AS column_type,
|
|
14
|
+
'{COL_GEN_TYPE}' AS general_type,
|
|
15
|
+
COUNT(*) AS record_ct,
|
|
16
|
+
COUNT("{COL_NAME}") AS value_ct,
|
|
17
|
+
COUNT(DISTINCT "{COL_NAME}") AS distinct_value_ct,
|
|
18
|
+
SUM(CASE WHEN "{COL_NAME}" IS NULL THEN 1 ELSE 0 END) AS null_value_ct,
|
|
19
|
+
strTemplate03_ADN: MIN(LENGTH(CAST("{COL_NAME}" AS TEXT))) AS min_length,
|
|
20
|
+
MAX(LENGTH(CAST("{COL_NAME}" AS TEXT))) AS max_length,
|
|
21
|
+
AVG(NULLIF(LENGTH(CAST("{COL_NAME}" AS TEXT)), 0)::FLOAT) AS avg_length,
|
|
22
|
+
strTemplate03_else: NULL as min_length,
|
|
23
|
+
NULL as max_length,
|
|
24
|
+
NULL as avg_length,
|
|
25
|
+
strTemplate04_A: SUM(CASE
|
|
26
|
+
WHEN TRIM("{COL_NAME}") ~ '^0(\.0*)?$' THEN 1 ELSE 0
|
|
27
|
+
END) AS zero_value_ct,
|
|
28
|
+
strTemplate04_N: SUM( 1 - ABS(SIGN("{COL_NAME}")) )::BIGINT AS zero_value_ct,
|
|
29
|
+
strTemplate04_else: NULL as zero_value_ct,
|
|
30
|
+
strTemplate05_A: COUNT(DISTINCT UPPER(TRANSLATE("{COL_NAME}", ' '',.-', ''))) as distinct_std_value_ct,
|
|
31
|
+
SUM(CASE
|
|
32
|
+
WHEN "{COL_NAME}" = '' THEN 1
|
|
33
|
+
ELSE 0
|
|
34
|
+
END) AS zero_length_ct,
|
|
35
|
+
SUM( CASE
|
|
36
|
+
WHEN "{COL_NAME}" BETWEEN ' !' AND '!' THEN 1
|
|
37
|
+
ELSE 0
|
|
38
|
+
END ) AS lead_space_ct,
|
|
39
|
+
SUM( CASE WHEN "{COL_NAME}" ILIKE '"%"' OR "{COL_NAME}" ILIKE '''%''' THEN 1 ELSE 0 END ) as quoted_value_ct,
|
|
40
|
+
SUM( CASE WHEN "{COL_NAME}" ~ '[0-9]' THEN 1 ELSE 0 END ) as includes_digit_ct,
|
|
41
|
+
SUM( CASE
|
|
42
|
+
WHEN "{COL_NAME}" IN ('.', '?', ' ') THEN 1
|
|
43
|
+
WHEN LOWER("{COL_NAME}") SIMILAR TO '(^.{2,}|-{2,}|0{2,}|9{2,}|x{2,}|z{2,}$)' THEN 1
|
|
44
|
+
WHEN LOWER("{COL_NAME}") IN ('blank','error','missing','tbd',
|
|
45
|
+
'n/a','#na','none','null','unknown') THEN 1
|
|
46
|
+
WHEN LOWER("{COL_NAME}") IN ('(blank)','(error)','(missing)','(tbd)',
|
|
47
|
+
'(n/a)','(#na)','(none)','(null)','(unknown)') THEN 1
|
|
48
|
+
WHEN LOWER("{COL_NAME}") IN ('[blank]','[error]','[missing]','[tbd]',
|
|
49
|
+
'[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1
|
|
50
|
+
ELSE 0
|
|
51
|
+
END ) AS filled_value_ct,
|
|
52
|
+
LEFT(MIN(NULLIF("{COL_NAME}", '')), 100) AS min_text,
|
|
53
|
+
LEFT(MAX(NULLIF("{COL_NAME}", '')), 100) AS max_text,
|
|
54
|
+
SUM({DATA_QC_SCHEMA}.fndk_isnum(LEFT("{COL_NAME}", 31))) AS numeric_ct,
|
|
55
|
+
SUM({DATA_QC_SCHEMA}.fndk_isdate(LEFT("{COL_NAME}", 26))) AS date_ct,
|
|
56
|
+
CASE
|
|
57
|
+
WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^[0-9]{1,5}[a-zA-Z]?\s\w{1,5}\.?\s?\w*\s?\w*\s[a-zA-Z]{1,6}\.?\s?[0-9]{0,5}[A-Z]{0,1}$'
|
|
58
|
+
THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'STREET_ADDR'
|
|
59
|
+
WHEN SUM(CASE WHEN "{COL_NAME}" IN ('AL','AK','AS','AZ','AR','CA','CO','CT','DE','DC','FM','FL','GA','GU','HI','ID','IL','IN','IA','KS','KY','LA','ME','MH','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','MP','OH','OK','OR','PW','PA','PR','RI','SC','SD','TN','TX','UT','VT','VI','VA','WA','WV','WI','WY','AE','AP','AA')
|
|
60
|
+
THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'STATE_USA'
|
|
61
|
+
WHEN SUM( CASE WHEN "{COL_NAME}" SIMILAR TO '^([\+]1 |1-|)[\+]?[(]?[0-9]{3}[)][ ]?[-\s\.]?[0-9]{3}[-\s\.]?[0-9]{4,6}$'
|
|
62
|
+
OR "{COL_NAME}" SIMILAR TO '^([\+]1 |1-|)[2-9][01][0-9][-| ]?[0-9]{3}[-| ]?[0-9]{4}$'
|
|
63
|
+
THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'PHONE_USA'
|
|
64
|
+
WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$'
|
|
65
|
+
THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'EMAIL'
|
|
66
|
+
WHEN SUM( CASE WHEN TRANSLATE("{COL_NAME}",'012345678','999999999') IN ('99999', '999999999', '99999-9999')
|
|
67
|
+
THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'ZIP_USA'
|
|
68
|
+
WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^[\w\s\-]+(?<!\s)\.(txt|csv|tsv|dat|doc|pdf|xlsx)$'
|
|
69
|
+
THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'FILE_NAME'
|
|
70
|
+
WHEN SUM( CASE WHEN "{COL_NAME}" SIMILAR TO '^([0-9]{4}[- ]){3}[0-9]{4}$'
|
|
71
|
+
THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'CREDIT_CARD'
|
|
72
|
+
WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^([^,|\t]{1,20}[,|\t]){2,}[^,|\t]{0,20}([,|\t]{0,1}[^,|\t]{0,20})*$'
|
|
73
|
+
AND "{COL_NAME}" !~ '\s(and|but|or|yet)\s'
|
|
74
|
+
THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'DELIMITED_DATA'
|
|
75
|
+
WHEN SUM ( CASE WHEN "{COL_NAME}" SIMILAR TO '^[0-8][0-9]{2}-[0-9]{2}-[0-9]{4}$'
|
|
76
|
+
AND LEFT("{COL_NAME}", 3) NOT BETWEEN '734' AND '749'
|
|
77
|
+
AND LEFT("{COL_NAME}", 3) <> '666' THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'SSN'
|
|
78
|
+
END as std_pattern_match,
|
|
79
|
+
strTemplate05_else: NULL as distinct_std_value_ct,
|
|
80
|
+
NULL as zero_length_ct,
|
|
81
|
+
NULL as lead_space_ct,
|
|
82
|
+
NULL as quoted_value_ct,
|
|
83
|
+
NULL as includes_digit_ct,
|
|
84
|
+
NULL as filled_value_ct,
|
|
85
|
+
NULL as min_text,
|
|
86
|
+
NULL as max_text,
|
|
87
|
+
NULL as numeric_ct,
|
|
88
|
+
NULL as date_ct,
|
|
89
|
+
NULL as std_pattern_match,
|
|
90
|
+
strTemplate06_A_patterns: ( SELECT LEFT(STRING_AGG(pattern, ' | ' ORDER BY ct DESC) , 1000) AS concat_pats
|
|
91
|
+
FROM (
|
|
92
|
+
SELECT CAST(COUNT(*) AS VARCHAR(10)) || ' | ' || pattern AS pattern,
|
|
93
|
+
COUNT(*) AS ct
|
|
94
|
+
FROM ( SELECT REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE(
|
|
95
|
+
"{COL_NAME}", '[a-z]', 'a', 'g'),
|
|
96
|
+
'[A-Z]', 'A', 'g'),
|
|
97
|
+
'[0-9]', 'N', 'g') AS pattern
|
|
98
|
+
FROM {DATA_SCHEMA}.{DATA_TABLE}
|
|
99
|
+
WHERE "{COL_NAME}" > ' ' AND (SELECT MAX(LENGTH("{COL_NAME}"))
|
|
100
|
+
FROM {DATA_SCHEMA}.{DATA_TABLE}) BETWEEN 3 and {PARM_MAX_PATTERN_LENGTH}) p
|
|
101
|
+
GROUP BY pattern
|
|
102
|
+
HAVING pattern > ' '
|
|
103
|
+
ORDER BY COUNT(*) DESC
|
|
104
|
+
LIMIT 5
|
|
105
|
+
) ps) AS top_patterns,
|
|
106
|
+
strTemplate06_else: NULL as top_patterns,
|
|
107
|
+
strTemplate07_A_freq: ( SELECT LEFT(STRING_AGG(val, ' | ' ORDER BY ct DESC), 1000) as concat_vals
|
|
108
|
+
FROM (
|
|
109
|
+
SELECT TOP 10 CAST(COUNT(*) as VARCHAR(10)) || ' | ' || "{COL_NAME}" as val,
|
|
110
|
+
COUNT(*) as ct
|
|
111
|
+
FROM {DATA_SCHEMA}.{DATA_TABLE}
|
|
112
|
+
WHERE "{COL_NAME}" > ' '
|
|
113
|
+
GROUP BY "{COL_NAME}"
|
|
114
|
+
HAVING "{COL_NAME}" > ' '
|
|
115
|
+
ORDER BY COUNT(*), "{COL_NAME}" DESC
|
|
116
|
+
) ps
|
|
117
|
+
) AS top_freq_values,
|
|
118
|
+
strTemplate07_else: NULL as top_freq_values,
|
|
119
|
+
strTemplate08_N: MIN("{COL_NAME}") AS min_value,
|
|
120
|
+
MIN(CASE WHEN "{COL_NAME}" > 0 THEN "{COL_NAME}" ELSE NULL END) AS min_value_over_0,
|
|
121
|
+
MAX("{COL_NAME}") AS max_value,
|
|
122
|
+
AVG(CAST("{COL_NAME}" AS FLOAT)) AS avg_value,
|
|
123
|
+
STDDEV(CAST("{COL_NAME}" AS FLOAT)) AS stdev_value,
|
|
124
|
+
MIN(pct_25) as percentile_25,
|
|
125
|
+
MIN(pct_50) as percentile_50,
|
|
126
|
+
MIN(pct_75) as percentile_75,
|
|
127
|
+
strTemplate08_else: NULL as min_value,
|
|
128
|
+
NULL as min_value_over_0,
|
|
129
|
+
NULL as max_value,
|
|
130
|
+
NULL as avg_value,
|
|
131
|
+
NULL as stdev_value,
|
|
132
|
+
NULL as percentile_25,
|
|
133
|
+
NULL as percentile_50,
|
|
134
|
+
NULL as percentile_75,
|
|
135
|
+
strTemplate10_N_dec: SUM(ROUND(MOD("{COL_NAME}", 1), 5)) as fractional_sum,
|
|
136
|
+
|
|
137
|
+
strTemplate10_else: NULL as fractional_sum,
|
|
138
|
+
|
|
139
|
+
strTemplate11_D: CASE
|
|
140
|
+
WHEN MIN("{COL_NAME}") IS NULL THEN NULL
|
|
141
|
+
ELSE GREATEST(MIN("{COL_NAME}"), '0001-01-01')
|
|
142
|
+
END as min_date,
|
|
143
|
+
MAX("{COL_NAME}") as max_date,
|
|
144
|
+
SUM(CASE
|
|
145
|
+
WHEN {DATA_QC_SCHEMA}.DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 12 THEN 1
|
|
146
|
+
ELSE 0
|
|
147
|
+
END) AS before_1yr_date_ct,
|
|
148
|
+
SUM(CASE
|
|
149
|
+
WHEN {DATA_QC_SCHEMA}.DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 60 THEN 1
|
|
150
|
+
ELSE 0
|
|
151
|
+
END) AS before_5yr_date_ct,
|
|
152
|
+
SUM(CASE
|
|
153
|
+
WHEN {DATA_QC_SCHEMA}.DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 240 THEN 1
|
|
154
|
+
ELSE 0
|
|
155
|
+
END) AS before_20yr_date_ct,
|
|
156
|
+
SUM(CASE
|
|
157
|
+
WHEN {DATA_QC_SCHEMA}.DATEDIFF('DAY', "{COL_NAME}", '{RUN_DATE}') BETWEEN 0 AND 365 THEN 1
|
|
158
|
+
ELSE 0
|
|
159
|
+
END) AS within_1yr_date_ct,
|
|
160
|
+
SUM(CASE
|
|
161
|
+
WHEN {DATA_QC_SCHEMA}.DATEDIFF('DAY', "{COL_NAME}", '{RUN_DATE}') BETWEEN 0 AND 30 THEN 1
|
|
162
|
+
ELSE 0
|
|
163
|
+
END) AS within_1mo_date_ct,
|
|
164
|
+
SUM(CASE
|
|
165
|
+
WHEN "{COL_NAME}" > '{RUN_DATE}' THEN 1 ELSE 0
|
|
166
|
+
END) AS future_date_ct,
|
|
167
|
+
COUNT(DISTINCT {DATA_QC_SCHEMA}.DATEDIFF('DAY', "{COL_NAME}", '{RUN_DATE}' ) ) as date_days_present,
|
|
168
|
+
COUNT(DISTINCT {DATA_QC_SCHEMA}.DATEDIFF('WEEK', "{COL_NAME}", '{RUN_DATE}' ) ) as date_weeks_present,
|
|
169
|
+
COUNT(DISTINCT {DATA_QC_SCHEMA}.DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}' ) ) as date_months_present,
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
strTemplate11_else: NULL as min_date,
|
|
173
|
+
NULL as max_date,
|
|
174
|
+
NULL as before_1yr_date_ct,
|
|
175
|
+
NULL as before_5yr_date_ct,
|
|
176
|
+
NULL as before_20yr_date_ct,
|
|
177
|
+
NULL as within_1yr_date_ct,
|
|
178
|
+
NULL as within_1mo_date_ct,
|
|
179
|
+
NULL as future_date_ct,
|
|
180
|
+
NULL as date_days_present,
|
|
181
|
+
NULL as date_weeks_present,
|
|
182
|
+
NULL as date_months_present,
|
|
183
|
+
|
|
184
|
+
strTemplate12_B: SUM(CAST("{COL_NAME}" AS INTEGER)) AS boolean_true_ct,
|
|
185
|
+
|
|
186
|
+
strTemplate12_else: NULL as boolean_true_ct,
|
|
187
|
+
|
|
188
|
+
strTemplate13_ALL: NULL AS datatype_suggestion,
|
|
189
|
+
strTemplate14_A_do_patterns: ( SELECT COUNT(DISTINCT REGEXP_REPLACE( REGEXP_REPLACE( REGEXP_REPLACE(
|
|
190
|
+
"{COL_NAME}", '[a-z]', 'a', 'g'),
|
|
191
|
+
'[A-Z]', 'A', 'g'),
|
|
192
|
+
'[0-9]', 'N', 'g')
|
|
193
|
+
) AS pattern_ct
|
|
194
|
+
FROM {DATA_SCHEMA}.{DATA_TABLE}
|
|
195
|
+
WHERE "{COL_NAME}" > ' ' ) AS distinct_pattern_ct,
|
|
196
|
+
SUM(SIGN(LENGTH(TRIM("{COL_NAME}")) - LENGTH(REGEXP_REPLACE(TRIM("{COL_NAME}"), ' ', '', 'g')))::BIGINT) AS embedded_space_ct,
|
|
197
|
+
AVG(LENGTH(TRIM("{COL_NAME}")) - LENGTH(REGEXP_REPLACE(TRIM("{COL_NAME}"), ' ', '', 'g'))::FLOAT) AS avg_embedded_spaces,
|
|
198
|
+
|
|
199
|
+
strTemplate14_A_no_patterns: NULL as distinct_pattern_ct,
|
|
200
|
+
SUM(SIGN(LENGTH(TRIM("{COL_NAME}")) - LENGTH(REGEXP_REPLACE(TRIM("{COL_NAME}"), ' ', '', 'g')))::BIGINT) AS embedded_space_ct,
|
|
201
|
+
AVG(LENGTH(TRIM("{COL_NAME}")) - LENGTH(REGEXP_REPLACE(TRIM("{COL_NAME}"), ' ', '', 'g'))::FLOAT) AS avg_embedded_spaces,
|
|
202
|
+
|
|
203
|
+
strTemplate14_else: NULL as distinct_pattern_ct,
|
|
204
|
+
NULL as embedded_space_ct,
|
|
205
|
+
NULL as avg_embedded_spaces,
|
|
206
|
+
|
|
207
|
+
strTemplate15_ALL: NULL as functional_data_type,
|
|
208
|
+
NULL as functional_table_type,
|
|
209
|
+
|
|
210
|
+
strTemplate16_ALL: " '{PROFILE_RUN_ID}' as profile_run_id"
|
|
211
|
+
|
|
212
|
+
strTemplate98_sampling: ' FROM {DATA_SCHEMA}.{DATA_TABLE} '
|
|
213
|
+
|
|
214
|
+
strTemplate98_else: ' FROM {DATA_SCHEMA}.{DATA_TABLE} '
|
|
215
|
+
|
|
216
|
+
strTemplate99_N: |
|
|
217
|
+
, (SELECT
|
|
218
|
+
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{COL_NAME}") AS pct_25,
|
|
219
|
+
PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{COL_NAME}") AS pct_50,
|
|
220
|
+
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{COL_NAME}") AS pct_75
|
|
221
|
+
FROM {DATA_SCHEMA}.{DATA_TABLE} LIMIT 1) pctile
|
|
222
|
+
|
|
223
|
+
strTemplate99_else: ' '
|
|
224
|
+
|
|
225
|
+
strTemplate100_sampling: 'WHERE RAND() <= 1.0 / {PROFILE_SAMPLE_RATIO}'
|
testgen/template/flavors/postgresql/profiling/project_secondary_profiling_query_postgresql.sql
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
-- Get Freqs for selected columns
|
|
2
|
+
WITH ranked_vals AS (
|
|
3
|
+
SELECT "{COL_NAME}",
|
|
4
|
+
COUNT(*) AS ct,
|
|
5
|
+
ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC, "{COL_NAME}") AS rn
|
|
6
|
+
FROM {DATA_SCHEMA}.{DATA_TABLE}
|
|
7
|
+
WHERE "{COL_NAME}" > ' '
|
|
8
|
+
GROUP BY "{COL_NAME}"
|
|
9
|
+
),
|
|
10
|
+
consol_vals AS (
|
|
11
|
+
SELECT COALESCE(CASE WHEN rn <= 10 THEN '| ' || "{COL_NAME}" || ' | ' || CAST(ct AS VARCHAR)
|
|
12
|
+
ELSE NULL
|
|
13
|
+
END, '| Other Values (' || CAST(COUNT(DISTINCT "{COL_NAME}") as VARCHAR) || ') | ' || CAST(SUM(ct) as VARCHAR) ) AS val,
|
|
14
|
+
MIN(rn) as min_rn
|
|
15
|
+
FROM ranked_vals
|
|
16
|
+
GROUP BY CASE WHEN rn <= 10 THEN '| ' || "{COL_NAME}" || ' | ' || CAST(ct AS VARCHAR)
|
|
17
|
+
ELSE NULL
|
|
18
|
+
END
|
|
19
|
+
)
|
|
20
|
+
SELECT '{PROJECT_CODE}' as project_code,
|
|
21
|
+
'{DATA_SCHEMA}' as schema_name,
|
|
22
|
+
'{RUN_DATE}' as run_date,
|
|
23
|
+
'{DATA_TABLE}' as table_name,
|
|
24
|
+
'{COL_NAME}' as column_name,
|
|
25
|
+
REPLACE(STRING_AGG(val, '^#^' ORDER BY min_rn), '^#^', CHR(10)) AS top_freq_values,
|
|
26
|
+
( SELECT MD5(STRING_AGG(DISTINCT "{COL_NAME}", '|' ORDER BY "{COL_NAME}")) as dvh
|
|
27
|
+
FROM {DATA_SCHEMA}.{DATA_TABLE} ) as distinct_value_hash
|
|
28
|
+
FROM consol_vals;
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
CREATE OR REPLACE FUNCTION {DATA_QC_SCHEMA}.DATEDIFF(difftype character varying, firstdate timestamp without time zone, seconddate timestamp without time zone)
|
|
2
|
+
RETURNS BIGINT AS $$
|
|
3
|
+
SELECT
|
|
4
|
+
CASE
|
|
5
|
+
WHEN UPPER(difftype) IN ('DAY', 'DD', 'D') THEN
|
|
6
|
+
DATE(seconddate) - DATE(firstdate)
|
|
7
|
+
WHEN UPPER(difftype) IN ('WEEK','WK', 'W') THEN
|
|
8
|
+
(DATE(seconddate) - DATE(firstdate)) / 7
|
|
9
|
+
WHEN UPPER(difftype) IN ('MON', 'MONTH', 'MM') THEN
|
|
10
|
+
(DATE_PART('year', seconddate) - DATE_PART('year', firstdate)) * 12 + (DATE_PART('month', seconddate) - DATE_PART('month', firstdate))
|
|
11
|
+
WHEN UPPER(difftype) IN ('QUARTER', 'QTR', 'Q') THEN
|
|
12
|
+
((DATE_PART('year', seconddate) - DATE_PART('year', firstdate)) * 4) + (DATE_PART('quarter', seconddate) - DATE_PART('quarter', firstdate))
|
|
13
|
+
WHEN UPPER(difftype) IN ('YEAR', 'YY', 'Y') THEN
|
|
14
|
+
DATE_PART('year', seconddate) - DATE_PART('year', firstdate)
|
|
15
|
+
ELSE
|
|
16
|
+
NULL::BIGINT
|
|
17
|
+
END;
|
|
18
|
+
$$ LANGUAGE sql IMMUTABLE STRICT;
|
|
19
|
+
|
|
20
|
+
CREATE OR REPLACE FUNCTION {DATA_QC_SCHEMA}.fn_charcount(instring character varying, searchstring character varying) returns bigint
|
|
21
|
+
language plpgsql
|
|
22
|
+
as
|
|
23
|
+
$$
|
|
24
|
+
BEGIN
|
|
25
|
+
RETURN (CHAR_LENGTH(instring) - CHAR_LENGTH(REPLACE(instring, searchstring, ''))) / CHAR_LENGTH(searchstring);
|
|
26
|
+
END;
|
|
27
|
+
$$;
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
CREATE OR REPLACE FUNCTION {DATA_QC_SCHEMA}.fn_parsefreq(top_freq_values VARCHAR(1000), rowno INTEGER, colno INTEGER) returns VARCHAR(1000)
|
|
31
|
+
language plpgsql
|
|
32
|
+
as
|
|
33
|
+
$$
|
|
34
|
+
BEGIN
|
|
35
|
+
RETURN SPLIT_PART(SPLIT_PART(top_freq_values, CHR(10), rowno), '|', colno+1);
|
|
36
|
+
END;
|
|
37
|
+
$$;
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
CREATE
|
|
41
|
+
OR REPLACE FUNCTION {DATA_QC_SCHEMA}.fndk_isnum(VARCHAR)
|
|
42
|
+
RETURNS INTEGER
|
|
43
|
+
IMMUTABLE
|
|
44
|
+
AS
|
|
45
|
+
$$
|
|
46
|
+
SELECT CASE
|
|
47
|
+
WHEN $1 ~ E'^\\s*[+-]?\\$?\\s*[0-9]+(,[0-9]{3})*(\\.[0-9]*)?[\\%]?\\s*$' THEN 1
|
|
48
|
+
ELSE 0
|
|
49
|
+
END;
|
|
50
|
+
$$
|
|
51
|
+
LANGUAGE sql;
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
CREATE
|
|
58
|
+
OR REPLACE FUNCTION {DATA_QC_SCHEMA}.fndk_isdate(VARCHAR)
|
|
59
|
+
RETURNS INTEGER
|
|
60
|
+
IMMUTABLE
|
|
61
|
+
AS $$
|
|
62
|
+
SELECT CASE
|
|
63
|
+
-- YYYY-MM-DD HH:MM:SS SSSSSS or YYYY-MM-DD HH:MM:SS
|
|
64
|
+
WHEN $1 ~ '^(\\d{4})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])\\s(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\\s[0-9]{6})?$'
|
|
65
|
+
THEN CASE
|
|
66
|
+
WHEN LEFT($1, 4):: INT BETWEEN 1800 AND 2200
|
|
67
|
+
AND (
|
|
68
|
+
( SUBSTRING ($1, 6, 2) IN ('01', '03', '05', '07', '08',
|
|
69
|
+
'10', '12')
|
|
70
|
+
AND SUBSTRING ($1, 9, 2):: INT BETWEEN 1 AND 31 )
|
|
71
|
+
OR ( SUBSTRING ($1, 6, 2) IN ('04', '06', '09')
|
|
72
|
+
AND SUBSTRING ($1, 9, 2):: INT BETWEEN 1 AND 30 )
|
|
73
|
+
OR ( SUBSTRING ($1, 6, 2) = '02'
|
|
74
|
+
AND SUBSTRING ($1, 9, 2):: INT :: INT BETWEEN 1 AND 29)
|
|
75
|
+
)
|
|
76
|
+
THEN 1
|
|
77
|
+
ELSE 0
|
|
78
|
+
END
|
|
79
|
+
-- YYYYMMDDHHMMSSSSSS or YYYYMMDD
|
|
80
|
+
WHEN $1 ~ '^(\\d{4})(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])(2[0-3]|[01][0-9])([0-5][0-9])([0-5][0-9])([0-9]{6})$'
|
|
81
|
+
OR $1 ~ '^(\\d{4})(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])(2[0-3]|[01][0-9])$'
|
|
82
|
+
THEN CASE
|
|
83
|
+
WHEN LEFT($1, 4)::INT BETWEEN 1800 AND 2200
|
|
84
|
+
AND (
|
|
85
|
+
( SUBSTRING($1, 5, 2) IN ('01', '03', '05', '07', '08',
|
|
86
|
+
'10', '12')
|
|
87
|
+
AND SUBSTRING($1, 7, 2)::INT BETWEEN 1 AND 31 )
|
|
88
|
+
OR ( SUBSTRING($1, 5, 2) IN ('04', '06', '09')
|
|
89
|
+
AND SUBSTRING($1, 7, 2)::INT BETWEEN 1 AND 30 )
|
|
90
|
+
OR ( SUBSTRING($1, 5, 2) = '02'
|
|
91
|
+
AND SUBSTRING($1, 7, 2)::INT::INT BETWEEN 1 AND 29)
|
|
92
|
+
)
|
|
93
|
+
THEN 1
|
|
94
|
+
ELSE 0
|
|
95
|
+
END
|
|
96
|
+
-- Exclude anything else long
|
|
97
|
+
WHEN LENGTH($1) > 11 THEN 0
|
|
98
|
+
-- YYYY-MMM/MM-DD
|
|
99
|
+
WHEN REGEXP_REPLACE(UPPER($1), '(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)', '12', 'g')
|
|
100
|
+
~ '[12][09][0-9][0-9]-[0-1]?[0-9]-[0-3]?[0-9]'
|
|
101
|
+
THEN CASE
|
|
102
|
+
WHEN SPLIT_PART($1, '-', 1)::INT BETWEEN 1800 AND 2200
|
|
103
|
+
AND (
|
|
104
|
+
( UPPER(SPLIT_PART($1, '-', 2)) IN ('01', '03', '05', '07', '08',
|
|
105
|
+
'1', '3', '5', '7', '8', '10', '12',
|
|
106
|
+
'JAN', 'MAR', 'MAY', 'JUL', 'AUG',
|
|
107
|
+
'OCT', 'DEC')
|
|
108
|
+
AND SPLIT_PART($1, '-', 3)::INT BETWEEN 1 AND 31 )
|
|
109
|
+
OR ( UPPER(SPLIT_PART($1, '-', 2)) IN ('04', '06', '09', '4', '6', '9', '11',
|
|
110
|
+
'APR', 'JUN', 'SEP', 'NOV')
|
|
111
|
+
AND SPLIT_PART($1, '-', 3)::INT BETWEEN 1 AND 30 )
|
|
112
|
+
OR ( UPPER(SPLIT_PART($1, '-', 2)) IN ('02', '2', 'FEB')
|
|
113
|
+
AND SPLIT_PART($1, '-', 3)::INT BETWEEN 1 AND 29)
|
|
114
|
+
)
|
|
115
|
+
THEN 1
|
|
116
|
+
ELSE 0
|
|
117
|
+
END
|
|
118
|
+
-- MM/-DD/-YY/YYYY
|
|
119
|
+
WHEN REPLACE($1, '-', '/') ~ '^[0-1]?[0-9]/[0-3]?[0-9]/[12][09][0-9][0-9]$'
|
|
120
|
+
OR REPLACE($1, '-', '/') ~ '^[0-1]?[0-9]/[0-3]?[0-9]/[0-9][0-9]$'
|
|
121
|
+
THEN
|
|
122
|
+
CASE
|
|
123
|
+
WHEN SPLIT_PART(REPLACE($1, '-', '/'), '/', 1)::INT BETWEEN 1 AND 12
|
|
124
|
+
AND (
|
|
125
|
+
( SPLIT_PART(REPLACE($1, '-', '/'), '/', 1)::INT IN (1, 3, 5, 7, 8, 10, 12)
|
|
126
|
+
AND SPLIT_PART(REPLACE($1, '-', '/'), '/', 2)::INT BETWEEN 1 AND 31 )
|
|
127
|
+
OR ( SPLIT_PART(REPLACE($1, '-', '/'), '/', 1)::INT IN (4, 6, 9, 11)
|
|
128
|
+
AND SPLIT_PART(REPLACE($1, '-', '/'), '/', 2)::INT BETWEEN 1 AND 30 )
|
|
129
|
+
OR ( SPLIT_PART(REPLACE($1, '-', '/'), '/', 1)::INT = 2
|
|
130
|
+
AND SPLIT_PART(REPLACE($1, '-', '/'), '/', 2)::INT BETWEEN 1 AND 29)
|
|
131
|
+
)
|
|
132
|
+
AND
|
|
133
|
+
('20' || RIGHT(SPLIT_PART(REPLACE($1, '-', '/'), '/', 3), 2))::INT BETWEEN 1800 AND 2200
|
|
134
|
+
THEN 1
|
|
135
|
+
ELSE 0
|
|
136
|
+
END
|
|
137
|
+
-- DD-MMM-YYYY
|
|
138
|
+
WHEN UPPER($1) ~ '[0-3]?[0-9]-(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)-[12][09][0-9][0-9]'
|
|
139
|
+
THEN
|
|
140
|
+
CASE
|
|
141
|
+
WHEN SPLIT_PART($1, '-', 3)::INT BETWEEN 1800 AND 2200
|
|
142
|
+
AND (
|
|
143
|
+
( UPPER(SPLIT_PART($1, '-', 2)) IN ('JAN', 'MAR', 'MAY', 'JUL', 'AUG', 'OCT', 'DEC')
|
|
144
|
+
AND SPLIT_PART($1, '-', 1)::INT BETWEEN 1 AND 31 )
|
|
145
|
+
OR ( UPPER(SPLIT_PART($1, '-', 2)) IN ('APR', 'JUN', 'SEP', 'NOV')
|
|
146
|
+
AND SPLIT_PART($1, '-', 1)::INT BETWEEN 1 AND 30 )
|
|
147
|
+
OR ( UPPER(SPLIT_PART($1, '-', 2)) = 'FEB'
|
|
148
|
+
AND SPLIT_PART($1, '-', 1)::INT BETWEEN 1 AND 29)
|
|
149
|
+
)
|
|
150
|
+
THEN 1
|
|
151
|
+
ELSE 0
|
|
152
|
+
END
|
|
153
|
+
ELSE 0
|
|
154
|
+
END
|
|
155
|
+
as isdate
|
|
156
|
+
$$
|
|
157
|
+
LANGUAGE sql;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
CREATE SCHEMA IF NOT exists {DATA_QC_SCHEMA};
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
SELECT '{PROJECT_CODE}' as project_code,
|
|
2
|
+
CURRENT_TIMESTAMP AT TIME ZONE 'UTC' as refresh_timestamp,
|
|
3
|
+
c.table_schema,
|
|
4
|
+
c.table_name,
|
|
5
|
+
c.column_name,
|
|
6
|
+
CASE
|
|
7
|
+
WHEN c.data_type = 'timestamp without time zone' THEN 'timestamp'
|
|
8
|
+
WHEN c.data_type = 'character varying'
|
|
9
|
+
THEN 'varchar(' || CAST(c.character_maximum_length AS VARCHAR) || ')'
|
|
10
|
+
WHEN c.data_type = 'character' THEN 'char(' || CAST(c.character_maximum_length AS VARCHAR) || ')'
|
|
11
|
+
WHEN c.data_type = 'numeric' THEN 'numeric'
|
|
12
|
+
|| COALESCE( '(' || CAST(c.numeric_precision AS VARCHAR) || ','
|
|
13
|
+
|| CAST(c.numeric_scale AS VARCHAR) || ')', '')
|
|
14
|
+
ELSE c.data_type END AS data_type,
|
|
15
|
+
c.character_maximum_length,
|
|
16
|
+
c.ordinal_position,
|
|
17
|
+
CASE
|
|
18
|
+
WHEN c.data_type ILIKE '%char%'
|
|
19
|
+
THEN 'A'
|
|
20
|
+
WHEN c.data_type ILIKE 'boolean'
|
|
21
|
+
THEN 'B'
|
|
22
|
+
WHEN c.data_type ILIKE 'date'
|
|
23
|
+
OR c.data_type ILIKE 'timestamp%'
|
|
24
|
+
THEN 'D'
|
|
25
|
+
WHEN c.data_type ILIKE 'time without time zone'
|
|
26
|
+
THEN 'T'
|
|
27
|
+
WHEN LOWER(c.data_type) IN ('bigint', 'double precision', 'integer', 'smallint', 'real')
|
|
28
|
+
OR c.data_type ILIKE 'numeric%'
|
|
29
|
+
THEN 'N'
|
|
30
|
+
ELSE
|
|
31
|
+
'X' END AS general_type,
|
|
32
|
+
CASE
|
|
33
|
+
WHEN c.data_type = 'numeric' THEN COALESCE(numeric_scale, 1) > 0
|
|
34
|
+
ELSE numeric_scale > 0
|
|
35
|
+
END as is_decimal
|
|
36
|
+
FROM information_schema.columns c
|
|
37
|
+
WHERE c.table_schema = '{DATA_SCHEMA}' {TABLE_CRITERIA}
|
|
38
|
+
ORDER BY c.table_schema, c.table_name, c.ordinal_position
|