dataops-testgen 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataops_testgen-2.2.0.dist-info/LICENSE +203 -0
- dataops_testgen-2.2.0.dist-info/METADATA +287 -0
- dataops_testgen-2.2.0.dist-info/NOTICE +5 -0
- dataops_testgen-2.2.0.dist-info/RECORD +270 -0
- dataops_testgen-2.2.0.dist-info/WHEEL +5 -0
- dataops_testgen-2.2.0.dist-info/entry_points.txt +2 -0
- dataops_testgen-2.2.0.dist-info/top_level.txt +1 -0
- testgen/__init__.py +0 -0
- testgen/__main__.py +770 -0
- testgen/commands/__init__.py +0 -0
- testgen/commands/queries/__init__.py +0 -0
- testgen/commands/queries/execute_cat_tests_query.py +95 -0
- testgen/commands/queries/execute_tests_query.py +160 -0
- testgen/commands/queries/generate_tests_query.py +94 -0
- testgen/commands/queries/profiling_query.py +366 -0
- testgen/commands/queries/test_parameter_validation_query.py +88 -0
- testgen/commands/run_execute_cat_tests.py +162 -0
- testgen/commands/run_execute_tests.py +168 -0
- testgen/commands/run_generate_tests.py +107 -0
- testgen/commands/run_get_entities.py +122 -0
- testgen/commands/run_launch_db_config.py +84 -0
- testgen/commands/run_observability_exporter.py +330 -0
- testgen/commands/run_profiling_bridge.py +495 -0
- testgen/commands/run_quick_start.py +168 -0
- testgen/commands/run_setup_profiling_tools.py +96 -0
- testgen/commands/run_test_definition.py +146 -0
- testgen/commands/run_test_parameter_validation.py +135 -0
- testgen/commands/run_upgrade_db_config.py +156 -0
- testgen/common/__init__.py +8 -0
- testgen/common/clean_sql.py +53 -0
- testgen/common/credentials.py +25 -0
- testgen/common/database/__init__.py +0 -0
- testgen/common/database/database_service.py +629 -0
- testgen/common/database/flavor/__init__.py +0 -0
- testgen/common/database/flavor/flavor_service.py +75 -0
- testgen/common/database/flavor/mssql_flavor_service.py +34 -0
- testgen/common/database/flavor/postgresql_flavor_service.py +5 -0
- testgen/common/database/flavor/redshift_flavor_service.py +22 -0
- testgen/common/database/flavor/snowflake_flavor_service.py +69 -0
- testgen/common/database/flavor/trino_flavor_service.py +21 -0
- testgen/common/date_service.py +68 -0
- testgen/common/display_service.py +85 -0
- testgen/common/docker_service.py +76 -0
- testgen/common/encrypt.py +55 -0
- testgen/common/get_pipeline_parms.py +57 -0
- testgen/common/logs.py +79 -0
- testgen/common/process_service.py +62 -0
- testgen/common/read_file.py +69 -0
- testgen/settings.py +440 -0
- testgen/template/dbsetup/010_create_base_schema.sql +2 -0
- testgen/template/dbsetup/020_create_standard_functions_sprocs.sql +179 -0
- testgen/template/dbsetup/030_initialize_new_schema_structure.sql +735 -0
- testgen/template/dbsetup/040_populate_new_schema_project.sql +59 -0
- testgen/template/dbsetup/050_populate_new_schema_metadata.sql +1517 -0
- testgen/template/dbsetup/060_create_standard_views.sql +248 -0
- testgen/template/dbsetup/070_create_default_users.sql +17 -0
- testgen/template/dbsetup/075_grant_role_rights.sql +43 -0
- testgen/template/dbsetup/080_set_current_revision.sql +5 -0
- testgen/template/dbupgrade/0100_incremental_upgrade.sql +5 -0
- testgen/template/dbupgrade/0101_incremental_upgrade.sql +15 -0
- testgen/template/dbupgrade/0102_incremental_upgrade.sql +4 -0
- testgen/template/dbupgrade/0103_incremental_upgrade.sql +22 -0
- testgen/template/dbupgrade/0104_incremental_upgrade.sql +44 -0
- testgen/template/dbupgrade/0105_incremental_upgrade.sql +1 -0
- testgen/template/dbupgrade/0106_incremental_upgrade.sql +5 -0
- testgen/template/dbupgrade/0107_incremental_upgrade.sql +3 -0
- testgen/template/dbupgrade_helpers/get_tg_revision.sql +2 -0
- testgen/template/exec_cat_tests/ex_cat_build_agg_table_tests.sql +116 -0
- testgen/template/exec_cat_tests/ex_cat_get_distinct_tables.sql +11 -0
- testgen/template/exec_cat_tests/ex_cat_results_parse.sql +69 -0
- testgen/template/exec_cat_tests/ex_cat_retrieve_agg_test_parms.sql +6 -0
- testgen/template/exec_cat_tests/ex_cat_test_query.sql +8 -0
- testgen/template/execution/ex_finalize_test_run_results.sql +37 -0
- testgen/template/execution/ex_get_tests_non_cat.sql +47 -0
- testgen/template/execution/ex_update_test_record_in_testrun_table.sql +27 -0
- testgen/template/execution/ex_write_test_record_to_testrun_table.sql +6 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_no_drops_generic.sql +48 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_num_incr_generic.sql +34 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_above_generic.sql +49 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_within_generic.sql +49 -0
- testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql +49 -0
- testgen/template/flavors/generic/exec_query_tests/ex_custom_query_generic.sql +39 -0
- testgen/template/flavors/generic/exec_query_tests/ex_data_match_2way_generic.sql +58 -0
- testgen/template/flavors/generic/exec_query_tests/ex_data_match_generic.sql +44 -0
- testgen/template/flavors/generic/exec_query_tests/ex_prior_match_generic.sql +37 -0
- testgen/template/flavors/generic/exec_query_tests/ex_relative_entropy_generic.sql +53 -0
- testgen/template/flavors/generic/exec_query_tests/ex_window_match_no_drops_generic.sql +46 -0
- testgen/template/flavors/generic/exec_query_tests/ex_window_match_same_generic.sql +59 -0
- testgen/template/flavors/generic/profiling/contingency_counts.sql +3 -0
- testgen/template/flavors/generic/validate_tests/ex_get_project_column_list_generic.sql +3 -0
- testgen/template/flavors/mssql/exec_query_tests/ex_relative_entropy_mssql.sql +53 -0
- testgen/template/flavors/mssql/profiling/project_ddf_query_mssql.sql +35 -0
- testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml +246 -0
- testgen/template/flavors/mssql/profiling/project_secondary_profiling_query_mssql.sql +36 -0
- testgen/template/flavors/mssql/setup_profiling_tools/00_drop_existing_functions_mssql.sql +8 -0
- testgen/template/flavors/mssql/setup_profiling_tools/01_create_functions_mssql.sql +12 -0
- testgen/template/flavors/mssql/setup_profiling_tools/02_create_functions_mssql.sql +54 -0
- testgen/template/flavors/mssql/setup_profiling_tools/create_qc_schema_mssql.sql +4 -0
- testgen/template/flavors/mssql/setup_profiling_tools/grant_execute_privileges_mssql.sql +1 -0
- testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_no_drops_postgresql.sql +46 -0
- testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_same_postgresql.sql +59 -0
- testgen/template/flavors/postgresql/profiling/project_ddf_query_postgresql.sql +42 -0
- testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml +225 -0
- testgen/template/flavors/postgresql/profiling/project_secondary_profiling_query_postgresql.sql +28 -0
- testgen/template/flavors/postgresql/setup_profiling_tools/create_functions_postgresql.sql +157 -0
- testgen/template/flavors/postgresql/setup_profiling_tools/create_qc_schema_postgresql.sql +1 -0
- testgen/template/flavors/postgresql/setup_profiling_tools/grant_execute_privileges_postgresql.sql +2 -0
- testgen/template/flavors/redshift/profiling/project_ddf_query_redshift.sql +38 -0
- testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml +221 -0
- testgen/template/flavors/redshift/profiling/project_secondary_profiling_query_redshift.sql +29 -0
- testgen/template/flavors/redshift/setup_profiling_tools/create_functions_redshift.sql +115 -0
- testgen/template/flavors/redshift/setup_profiling_tools/create_qc_schema_redshift.sql +1 -0
- testgen/template/flavors/redshift/setup_profiling_tools/grant_execute_privileges_redshift.sql +2 -0
- testgen/template/flavors/snowflake/profiling/project_ddf_query_snowflake.sql +38 -0
- testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml +220 -0
- testgen/template/flavors/snowflake/profiling/project_secondary_profiling_query_snowflake.sql +29 -0
- testgen/template/flavors/snowflake/setup_profiling_tools/create_functions_snowflake.sql +69 -0
- testgen/template/flavors/snowflake/setup_profiling_tools/create_qc_schema_snowflake.sql +1 -0
- testgen/template/flavors/snowflake/setup_profiling_tools/grant_execute_privileges_snowflake.sql +6 -0
- testgen/template/flavors/trino/profiling/project_profiling_query_trino.yaml +219 -0
- testgen/template/flavors/trino/setup_profiling_tools/create_functions_trino.sql +92 -0
- testgen/template/flavors/trino/setup_profiling_tools/create_qc_schema_trino.sql +1 -0
- testgen/template/gen_funny_cat_tests/gen_test_constant.sql +104 -0
- testgen/template/gen_funny_cat_tests/gen_test_distinct_value_ct.sql +98 -0
- testgen/template/gen_funny_cat_tests/gen_test_row_ct.sql +57 -0
- testgen/template/gen_funny_cat_tests/gen_test_row_ct_pct.sql +59 -0
- testgen/template/generation/gen_delete_old_tests.sql +5 -0
- testgen/template/generation/gen_insert_test_suite.sql +5 -0
- testgen/template/generation/gen_retrieve_or_insert_test_suite.sql +58 -0
- testgen/template/generation/gen_standard_test_type_list.sql +13 -0
- testgen/template/generation/gen_standard_tests.sql +48 -0
- testgen/template/get_entities/get_connection.sql +21 -0
- testgen/template/get_entities/get_connections_list.sql +9 -0
- testgen/template/get_entities/get_latest.sql +4 -0
- testgen/template/get_entities/get_profile.sql +12 -0
- testgen/template/get_entities/get_profile_info.sql +17 -0
- testgen/template/get_entities/get_profile_list.sql +17 -0
- testgen/template/get_entities/get_profile_screen.sql +275 -0
- testgen/template/get_entities/get_project_list.sql +6 -0
- testgen/template/get_entities/get_table_group_list.sql +10 -0
- testgen/template/get_entities/get_test_generation_list.sql +18 -0
- testgen/template/get_entities/get_test_info.sql +41 -0
- testgen/template/get_entities/get_test_results_for_run_cli.sql +16 -0
- testgen/template/get_entities/get_test_run_list.sql +24 -0
- testgen/template/get_entities/get_test_suite.sql +13 -0
- testgen/template/get_entities/get_test_suite_list.sql +18 -0
- testgen/template/get_entities/list_test_types.sql +4 -0
- testgen/template/observability/get_event_data.sql +23 -0
- testgen/template/observability/get_test_results.sql +41 -0
- testgen/template/observability/update_test_results_exported_to_observability.sql +12 -0
- testgen/template/parms/parms_profiling.sql +34 -0
- testgen/template/parms/parms_test_execution.sql +13 -0
- testgen/template/parms/parms_test_gen.sql +23 -0
- testgen/template/profiling/contingency_columns.sql +7 -0
- testgen/template/profiling/datatype_suggestions.sql +56 -0
- testgen/template/profiling/functional_datatype.sql +523 -0
- testgen/template/profiling/functional_tabletype_stage.sql +48 -0
- testgen/template/profiling/functional_tabletype_update.sql +8 -0
- testgen/template/profiling/pii_flag.sql +133 -0
- testgen/template/profiling/profile_anomalies_screen_column.sql +22 -0
- testgen/template/profiling/profile_anomalies_screen_multi_column.sql +58 -0
- testgen/template/profiling/profile_anomalies_screen_table.sql +22 -0
- testgen/template/profiling/profile_anomalies_screen_table_dates.sql +30 -0
- testgen/template/profiling/profile_anomalies_screen_variants.sql +40 -0
- testgen/template/profiling/profile_anomaly_types_get.sql +3 -0
- testgen/template/profiling/project_get_table_sample_count.sql +22 -0
- testgen/template/profiling/project_profile_run_record_insert.sql +8 -0
- testgen/template/profiling/project_profile_run_record_update.sql +5 -0
- testgen/template/profiling/project_profile_run_record_update_status.sql +5 -0
- testgen/template/profiling/project_update_profile_results_to_estimates.sql +32 -0
- testgen/template/profiling/refresh_anomalies.sql +33 -0
- testgen/template/profiling/refresh_data_chars_from_profiling.sql +156 -0
- testgen/template/profiling/secondary_profiling_columns.sql +12 -0
- testgen/template/profiling/secondary_profiling_delete.sql +4 -0
- testgen/template/profiling/secondary_profiling_update.sql +18 -0
- testgen/template/quick_start/populate_target_data.sql +1077 -0
- testgen/template/quick_start/recreate_target_data_schema.sql +167 -0
- testgen/template/quick_start/update_target_data.sql +100 -0
- testgen/template/updates/create_tmp_test_definition.sql +19 -0
- testgen/template/updates/get_test_def_parms.sql +38 -0
- testgen/template/updates/populate_stg_test_definitions.sql +184 -0
- testgen/template/validate_tests/ex_disable_tests_test_definitions.sql +5 -0
- testgen/template/validate_tests/ex_flag_tests_test_definitions.sql +64 -0
- testgen/template/validate_tests/ex_get_project_column_list_generic.sql +3 -0
- testgen/template/validate_tests/ex_get_test_column_list_tg.sql +65 -0
- testgen/template/validate_tests/ex_write_test_val_errors.sql +22 -0
- testgen/ui/__init__.py +0 -0
- testgen/ui/app.py +98 -0
- testgen/ui/assets/dk_logo.svg +46 -0
- testgen/ui/assets/question_mark.png +0 -0
- testgen/ui/assets/scripts.js +68 -0
- testgen/ui/assets/style.css +140 -0
- testgen/ui/bootstrap.py +109 -0
- testgen/ui/components/__init__.py +0 -0
- testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fBBc4.woff2 +0 -0
- testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fChc4EsA.woff2 +0 -0
- testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu4mxK.woff2 +0 -0
- testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu7GxKOzY.woff2 +0 -0
- testgen/ui/components/frontend/css/material-symbols-rounded.css +24 -0
- testgen/ui/components/frontend/css/material-symbols-rounded.woff2 +0 -0
- testgen/ui/components/frontend/css/roboto-font-faces.css +35 -0
- testgen/ui/components/frontend/css/shared.css +36 -0
- testgen/ui/components/frontend/img/dk_logo.svg +46 -0
- testgen/ui/components/frontend/index.html +17 -0
- testgen/ui/components/frontend/js/components/breadcrumbs.js +86 -0
- testgen/ui/components/frontend/js/components/button.js +66 -0
- testgen/ui/components/frontend/js/components/location.js +62 -0
- testgen/ui/components/frontend/js/components/select.js +75 -0
- testgen/ui/components/frontend/js/components/sidebar.js +358 -0
- testgen/ui/components/frontend/js/main.js +99 -0
- testgen/ui/components/frontend/js/streamlit.js +19 -0
- testgen/ui/components/frontend/js/van.min.js +1 -0
- testgen/ui/components/utils/__init__.py +0 -0
- testgen/ui/components/utils/callbacks.py +51 -0
- testgen/ui/components/utils/component.py +13 -0
- testgen/ui/components/widgets/__init__.py +6 -0
- testgen/ui/components/widgets/breadcrumbs.py +32 -0
- testgen/ui/components/widgets/location.py +65 -0
- testgen/ui/components/widgets/modal.py +97 -0
- testgen/ui/components/widgets/sidebar.py +69 -0
- testgen/ui/navigation/__init__.py +0 -0
- testgen/ui/navigation/menu.py +42 -0
- testgen/ui/navigation/page.py +20 -0
- testgen/ui/navigation/router.py +63 -0
- testgen/ui/queries/__init__.py +0 -0
- testgen/ui/queries/authentication_queries.py +47 -0
- testgen/ui/queries/connection_queries.py +121 -0
- testgen/ui/queries/profiling_queries.py +148 -0
- testgen/ui/queries/project_queries.py +9 -0
- testgen/ui/queries/table_group_queries.py +186 -0
- testgen/ui/queries/test_definition_queries.py +270 -0
- testgen/ui/queries/test_run_queries.py +32 -0
- testgen/ui/queries/test_suite_queries.py +145 -0
- testgen/ui/scripts/__init__.py +0 -0
- testgen/ui/scripts/patch_streamlit.py +111 -0
- testgen/ui/services/__init__.py +0 -0
- testgen/ui/services/authentication_service.py +119 -0
- testgen/ui/services/connection_service.py +220 -0
- testgen/ui/services/database_service.py +282 -0
- testgen/ui/services/form_service.py +1008 -0
- testgen/ui/services/javascript_service.py +44 -0
- testgen/ui/services/query_service.py +316 -0
- testgen/ui/services/string_service.py +12 -0
- testgen/ui/services/table_group_service.py +130 -0
- testgen/ui/services/test_definition_service.py +117 -0
- testgen/ui/services/test_run_service.py +13 -0
- testgen/ui/services/test_suite_service.py +76 -0
- testgen/ui/services/toolbar_service.py +77 -0
- testgen/ui/session.py +46 -0
- testgen/ui/views/__init__.py +0 -0
- testgen/ui/views/app_log_modal.py +92 -0
- testgen/ui/views/connections.py +72 -0
- testgen/ui/views/connections_base.py +367 -0
- testgen/ui/views/login.py +40 -0
- testgen/ui/views/not_found.py +16 -0
- testgen/ui/views/overview.py +34 -0
- testgen/ui/views/profiling_anomalies.py +501 -0
- testgen/ui/views/profiling_details.py +335 -0
- testgen/ui/views/profiling_modal.py +40 -0
- testgen/ui/views/profiling_results.py +206 -0
- testgen/ui/views/profiling_summary.py +177 -0
- testgen/ui/views/project_settings.py +74 -0
- testgen/ui/views/table_groups.py +530 -0
- testgen/ui/views/test_definitions.py +1020 -0
- testgen/ui/views/test_results.py +908 -0
- testgen/ui/views/test_runs.py +195 -0
- testgen/ui/views/test_suites.py +545 -0
- testgen/utils/__init__.py +0 -0
- testgen/utils/plugins.py +17 -0
- testgen/utils/singleton.py +14 -0
|
@@ -0,0 +1,495 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import subprocess
|
|
3
|
+
import threading
|
|
4
|
+
import uuid
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
import testgen.common.process_service as process_service
|
|
9
|
+
from testgen import settings
|
|
10
|
+
from testgen.commands.queries.profiling_query import CProfilingSQL
|
|
11
|
+
from testgen.common import (
|
|
12
|
+
AssignConnectParms,
|
|
13
|
+
QuoteCSVItems,
|
|
14
|
+
RetrieveDBResultsToDictList,
|
|
15
|
+
RetrieveProfilingParms,
|
|
16
|
+
RunActionQueryList,
|
|
17
|
+
RunThreadedRetrievalQueryList,
|
|
18
|
+
WriteListToDB,
|
|
19
|
+
date_service,
|
|
20
|
+
read_template_sql_file,
|
|
21
|
+
)
|
|
22
|
+
from testgen.common.database.database_service import empty_cache
|
|
23
|
+
|
|
24
|
+
booClean = True
|
|
25
|
+
LOG = logging.getLogger("testgen")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def InitializeProfilingSQL(strProject, strSQLFlavor):
|
|
29
|
+
return CProfilingSQL(strProject, strSQLFlavor)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def CompileAnomalyTestQueries(clsProfiling):
|
|
33
|
+
str_query = clsProfiling.GetAnomalyTestTypesQuery()
|
|
34
|
+
lst_tests = RetrieveDBResultsToDictList("DKTG", str_query)
|
|
35
|
+
|
|
36
|
+
lst_queries = []
|
|
37
|
+
for dct_test_type in lst_tests:
|
|
38
|
+
str_query = clsProfiling.GetAnomalyTestQuery(dct_test_type)
|
|
39
|
+
if str_query:
|
|
40
|
+
lst_queries.append(str_query)
|
|
41
|
+
|
|
42
|
+
return lst_queries
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def save_contingency_rules(df_merged, threshold_ratio):
|
|
46
|
+
# Prep rows to save
|
|
47
|
+
lst_rules = []
|
|
48
|
+
for row in df_merged.itertuples():
|
|
49
|
+
# First causes second: almost all of first coincide with second value
|
|
50
|
+
if row.pair_to_first_ratio >= threshold_ratio:
|
|
51
|
+
profiling_run_id = row.profiling_run_id
|
|
52
|
+
schema_name = row.schema_name
|
|
53
|
+
table_name = row.table_name
|
|
54
|
+
cause_column_name = row.first_column_name
|
|
55
|
+
cause_column_value = getattr(row, row.first_column_name)
|
|
56
|
+
effect_column_name = row.second_column_name
|
|
57
|
+
effect_column_value = getattr(row, row.second_column_name)
|
|
58
|
+
pair_count = row.pair_count
|
|
59
|
+
cause_column_total = row.first_column_overall_count
|
|
60
|
+
effect_column_total = row.second_column_overall_count
|
|
61
|
+
rule_ratio = row.pair_to_first_ratio
|
|
62
|
+
lst_rules.append(
|
|
63
|
+
[
|
|
64
|
+
profiling_run_id,
|
|
65
|
+
schema_name,
|
|
66
|
+
table_name,
|
|
67
|
+
cause_column_name,
|
|
68
|
+
cause_column_value,
|
|
69
|
+
effect_column_name,
|
|
70
|
+
effect_column_value,
|
|
71
|
+
pair_count,
|
|
72
|
+
cause_column_total,
|
|
73
|
+
effect_column_total,
|
|
74
|
+
rule_ratio,
|
|
75
|
+
]
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Second causes first: almost all of second coincide with first value
|
|
79
|
+
if row.pair_to_second_ratio >= threshold_ratio:
|
|
80
|
+
profiling_run_id = row.profiling_run_id
|
|
81
|
+
schema_name = row.schema_name
|
|
82
|
+
table_name = row.table_name
|
|
83
|
+
cause_column_name = row.second_column_name
|
|
84
|
+
cause_column_value = getattr(row, row.second_column_name)
|
|
85
|
+
effect_column_name = row.first_column_name
|
|
86
|
+
effect_column_value = getattr(row, row.first_column_name)
|
|
87
|
+
pair_count = row.pair_count
|
|
88
|
+
cause_column_total = row.second_column_overall_count
|
|
89
|
+
effect_column_total = row.first_column_overall_count
|
|
90
|
+
rule_ratio = row.pair_to_second_ratio
|
|
91
|
+
lst_rules.append(
|
|
92
|
+
[
|
|
93
|
+
profiling_run_id,
|
|
94
|
+
schema_name,
|
|
95
|
+
table_name,
|
|
96
|
+
cause_column_name,
|
|
97
|
+
cause_column_value,
|
|
98
|
+
effect_column_name,
|
|
99
|
+
effect_column_value,
|
|
100
|
+
pair_count,
|
|
101
|
+
cause_column_total,
|
|
102
|
+
effect_column_total,
|
|
103
|
+
rule_ratio,
|
|
104
|
+
]
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
WriteListToDB(
|
|
108
|
+
"DKTG",
|
|
109
|
+
lst_rules,
|
|
110
|
+
[
|
|
111
|
+
"profile_run_id",
|
|
112
|
+
"schema_name",
|
|
113
|
+
"table_name",
|
|
114
|
+
"cause_column_name",
|
|
115
|
+
"cause_column_value",
|
|
116
|
+
"effect_column_name",
|
|
117
|
+
"effect_column_value",
|
|
118
|
+
"pair_count",
|
|
119
|
+
"cause_column_total",
|
|
120
|
+
"effect_column_total",
|
|
121
|
+
"rule_ratio",
|
|
122
|
+
],
|
|
123
|
+
"profile_pair_rules",
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def RunPairwiseContingencyCheck(clsProfiling, threshold_ratio):
|
|
128
|
+
# Goal: identify pairs of values that represent IF X=A THEN Y=B rules
|
|
129
|
+
|
|
130
|
+
# Define the threshold percent -- should be high
|
|
131
|
+
if threshold_ratio:
|
|
132
|
+
threshold_ratio = threshold_ratio / 100.0
|
|
133
|
+
else:
|
|
134
|
+
threshold_ratio = 0.95
|
|
135
|
+
str_max_values = "6"
|
|
136
|
+
|
|
137
|
+
# Retrieve columns to include in list from profiing results
|
|
138
|
+
clsProfiling.contingency_max_values = str_max_values
|
|
139
|
+
str_query = clsProfiling.GetContingencyColumns()
|
|
140
|
+
lst_tables = RetrieveDBResultsToDictList("DKTG", str_query)
|
|
141
|
+
|
|
142
|
+
# Retrieve record counts per column combination
|
|
143
|
+
df_merged = None
|
|
144
|
+
if lst_tables:
|
|
145
|
+
for dct_table in lst_tables:
|
|
146
|
+
df_merged = None
|
|
147
|
+
clsProfiling.data_schema = dct_table["schema_name"]
|
|
148
|
+
clsProfiling.data_table = dct_table["table_name"]
|
|
149
|
+
clsProfiling.contingency_columns = QuoteCSVItems(dct_table["contingency_columns"])
|
|
150
|
+
str_query = clsProfiling.GetContingencyCounts()
|
|
151
|
+
lst_counts = RetrieveDBResultsToDictList("PROJECT", str_query)
|
|
152
|
+
if lst_counts:
|
|
153
|
+
df = pd.DataFrame(lst_counts)
|
|
154
|
+
# Get list of columns
|
|
155
|
+
columns = dct_table["contingency_columns"].lower().split(",")
|
|
156
|
+
|
|
157
|
+
# Calculate overall counts for each column
|
|
158
|
+
overall_counts = {col: df.groupby(col)["freq_ct"].sum() for col in columns}
|
|
159
|
+
|
|
160
|
+
# Prepare to aggregate the data
|
|
161
|
+
contingency_table = []
|
|
162
|
+
for i, col1 in enumerate(columns):
|
|
163
|
+
for col2 in columns[i + 1 :]:
|
|
164
|
+
# Create a pivot table for each pair
|
|
165
|
+
pivot = df.pivot_table(index=col1, columns=col2, values="freq_ct", aggfunc="sum", fill_value=0)
|
|
166
|
+
pivot = pivot.stack().reset_index()
|
|
167
|
+
pivot.rename(columns={0: "pair_count"}, inplace=True)
|
|
168
|
+
|
|
169
|
+
# Add overall counts
|
|
170
|
+
pivot["first_column_overall_count"] = pivot[col1].map(overall_counts[col1])
|
|
171
|
+
pivot["second_column_overall_count"] = pivot[col2].map(overall_counts[col2])
|
|
172
|
+
|
|
173
|
+
# Add column names
|
|
174
|
+
pivot["first_column_name"] = col1
|
|
175
|
+
pivot["second_column_name"] = col2
|
|
176
|
+
|
|
177
|
+
contingency_table.append(pivot)
|
|
178
|
+
|
|
179
|
+
# Combine all pairs into a single DataFrame
|
|
180
|
+
contingency_table = pd.concat(contingency_table, ignore_index=True)
|
|
181
|
+
|
|
182
|
+
# Calculate the ratios
|
|
183
|
+
contingency_table["pair_to_first_ratio"] = (
|
|
184
|
+
contingency_table["pair_count"] / contingency_table["first_column_overall_count"]
|
|
185
|
+
)
|
|
186
|
+
contingency_table["pair_to_second_ratio"] = (
|
|
187
|
+
contingency_table["pair_count"] / contingency_table["second_column_overall_count"]
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
# Include rows where both cols meet minimum threshold count (max of 30 or 5%)
|
|
191
|
+
total_observations = contingency_table["pair_count"].sum()
|
|
192
|
+
threshold_min = max(total_observations * 0.05, 30)
|
|
193
|
+
contingency_table = contingency_table[
|
|
194
|
+
(contingency_table["first_column_overall_count"] >= threshold_min)
|
|
195
|
+
& (contingency_table["second_column_overall_count"] >= threshold_min)
|
|
196
|
+
]
|
|
197
|
+
# Drop rows where neither ratio meets the threshold ratio (keep if either meets it)
|
|
198
|
+
# -- note we still have to check individual columns when saving pairs
|
|
199
|
+
contingency_table = contingency_table[
|
|
200
|
+
~(
|
|
201
|
+
(contingency_table["pair_to_first_ratio"] < threshold_ratio)
|
|
202
|
+
& (contingency_table["pair_to_second_ratio"] < threshold_ratio)
|
|
203
|
+
)
|
|
204
|
+
]
|
|
205
|
+
|
|
206
|
+
# Add table name
|
|
207
|
+
contingency_table["profiling_run_id"] = clsProfiling.profile_run_id
|
|
208
|
+
contingency_table["schema_name"] = dct_table["schema_name"]
|
|
209
|
+
contingency_table["table_name"] = dct_table["table_name"]
|
|
210
|
+
|
|
211
|
+
# Combine with previous tables
|
|
212
|
+
if df_merged == None:
|
|
213
|
+
df_merged = contingency_table
|
|
214
|
+
else:
|
|
215
|
+
df_merged = pd.concat([df_merged, contingency_table], ignore_index=True)
|
|
216
|
+
|
|
217
|
+
if df_merged is not None:
|
|
218
|
+
if not df_merged.empty:
|
|
219
|
+
save_contingency_rules(df_merged, threshold_ratio)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def run_profiling_in_background(table_group_id):
|
|
223
|
+
msg = f"Starting run_profiling_in_background against table group_id: {table_group_id}"
|
|
224
|
+
if settings.IS_DEBUG:
|
|
225
|
+
LOG.info(msg + ". Running in debug mode (new thread instead of new process).")
|
|
226
|
+
empty_cache()
|
|
227
|
+
background_thread = threading.Thread(target=run_profiling_queries, args=(table_group_id,))
|
|
228
|
+
background_thread.start()
|
|
229
|
+
else:
|
|
230
|
+
LOG.info(msg)
|
|
231
|
+
script = ["testgen", "run-profile", "-tg", table_group_id]
|
|
232
|
+
subprocess.Popen(script) # NOQA S603
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def run_profiling_queries(strTableGroupsID, spinner=None):
|
|
236
|
+
if strTableGroupsID is None:
|
|
237
|
+
raise ValueError("Table Group ID was not specified")
|
|
238
|
+
|
|
239
|
+
booErrors = False
|
|
240
|
+
|
|
241
|
+
LOG.info("CurrentStep: Retrieving Parameters")
|
|
242
|
+
|
|
243
|
+
# Generate UUID for Profile Run ID
|
|
244
|
+
strProfileRunID = str(uuid.uuid4())
|
|
245
|
+
|
|
246
|
+
dctParms = RetrieveProfilingParms(strTableGroupsID)
|
|
247
|
+
|
|
248
|
+
LOG.info("CurrentStep: Initializing Query Generator")
|
|
249
|
+
clsProfiling = InitializeProfilingSQL(dctParms["project_code"], dctParms["sql_flavor"])
|
|
250
|
+
|
|
251
|
+
# Set Project Connection Parms in common.db_bridgers from retrieved parms
|
|
252
|
+
LOG.info("CurrentStep: Assigning Connection Parms")
|
|
253
|
+
AssignConnectParms(
|
|
254
|
+
dctParms["project_code"],
|
|
255
|
+
dctParms["connection_id"],
|
|
256
|
+
dctParms["project_host"],
|
|
257
|
+
dctParms["project_port"],
|
|
258
|
+
dctParms["project_db"],
|
|
259
|
+
dctParms["table_group_schema"],
|
|
260
|
+
dctParms["project_user"],
|
|
261
|
+
dctParms["sql_flavor"],
|
|
262
|
+
dctParms["url"],
|
|
263
|
+
dctParms["connect_by_url"],
|
|
264
|
+
dctParms["connect_by_key"],
|
|
265
|
+
dctParms["private_key"],
|
|
266
|
+
dctParms["private_key_passphrase"],
|
|
267
|
+
"PROJECT",
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
# Set General Parms
|
|
271
|
+
clsProfiling.table_groups_id = strTableGroupsID
|
|
272
|
+
clsProfiling.connection_id = dctParms["connection_id"]
|
|
273
|
+
clsProfiling.parm_do_sample = "N"
|
|
274
|
+
clsProfiling.parm_sample_size = 0
|
|
275
|
+
clsProfiling.parm_vldb_flag = "N"
|
|
276
|
+
clsProfiling.parm_do_freqs = "Y"
|
|
277
|
+
clsProfiling.parm_max_freq_length = 25
|
|
278
|
+
clsProfiling.parm_do_patterns = "Y"
|
|
279
|
+
clsProfiling.parm_max_pattern_length = 25
|
|
280
|
+
clsProfiling.profile_run_id = strProfileRunID
|
|
281
|
+
clsProfiling.data_qc_schema = dctParms["project_qc_schema"]
|
|
282
|
+
clsProfiling.data_schema = dctParms["table_group_schema"]
|
|
283
|
+
clsProfiling.parm_table_set = dctParms["profiling_table_set"]
|
|
284
|
+
clsProfiling.parm_table_include_mask = dctParms["profiling_include_mask"]
|
|
285
|
+
clsProfiling.parm_table_exclude_mask = dctParms["profiling_exclude_mask"]
|
|
286
|
+
clsProfiling.profile_id_column_mask = dctParms["profile_id_column_mask"]
|
|
287
|
+
clsProfiling.profile_sk_column_mask = dctParms["profile_sk_column_mask"]
|
|
288
|
+
clsProfiling.profile_use_sampling = dctParms["profile_use_sampling"]
|
|
289
|
+
clsProfiling.profile_sample_percent = dctParms["profile_sample_percent"]
|
|
290
|
+
clsProfiling.profile_sample_min_count = dctParms["profile_sample_min_count"]
|
|
291
|
+
clsProfiling.process_id = process_service.get_current_process_id()
|
|
292
|
+
|
|
293
|
+
# Add a record in profiling_runs table for the new profile
|
|
294
|
+
strProfileRunQuery = clsProfiling.GetProfileRunInfoRecordsQuery()
|
|
295
|
+
lstProfileRunQuery = [strProfileRunQuery]
|
|
296
|
+
RunActionQueryList("DKTG", lstProfileRunQuery)
|
|
297
|
+
if spinner:
|
|
298
|
+
spinner.next()
|
|
299
|
+
message = "Profiling completed "
|
|
300
|
+
try:
|
|
301
|
+
# Retrieve Column Metadata
|
|
302
|
+
LOG.info("CurrentStep: Getting DDF from project")
|
|
303
|
+
|
|
304
|
+
strQuery = clsProfiling.GetDDFQuery()
|
|
305
|
+
lstResult = RetrieveDBResultsToDictList("PROJECT", strQuery)
|
|
306
|
+
|
|
307
|
+
if len(lstResult) == 0:
|
|
308
|
+
LOG.warning("SQL retrieved 0 records")
|
|
309
|
+
|
|
310
|
+
if lstResult:
|
|
311
|
+
if clsProfiling.profile_use_sampling == "Y":
|
|
312
|
+
# Get distinct tables
|
|
313
|
+
distinct_tables = set()
|
|
314
|
+
for item in lstResult:
|
|
315
|
+
schema_name = item["table_schema"]
|
|
316
|
+
table_name = item["table_name"]
|
|
317
|
+
distinct_tables.add(f"{schema_name}.{table_name}")
|
|
318
|
+
|
|
319
|
+
# Convert the set to a list
|
|
320
|
+
distinct_tables_list = list(distinct_tables)
|
|
321
|
+
|
|
322
|
+
# Sampling tables
|
|
323
|
+
lstQueries = []
|
|
324
|
+
for parm_sampling_table in distinct_tables_list:
|
|
325
|
+
clsProfiling.sampling_table = parm_sampling_table
|
|
326
|
+
strQuery = clsProfiling.GetTableSampleCount()
|
|
327
|
+
lstQueries.append(strQuery)
|
|
328
|
+
|
|
329
|
+
lstSampleTables, _, intErrors = RunThreadedRetrievalQueryList(
|
|
330
|
+
"PROJECT", lstQueries, dctParms["max_threads"], spinner
|
|
331
|
+
)
|
|
332
|
+
dctSampleTables = {x[0]: [x[1], x[2]] for x in lstSampleTables}
|
|
333
|
+
if intErrors > 0:
|
|
334
|
+
booErrors = True
|
|
335
|
+
LOG.warning(
|
|
336
|
+
f"Errors were encountered retrieving sampling table counts. ({intErrors} errors occurred.) Please check log."
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
# Assemble profiling queries
|
|
340
|
+
if spinner:
|
|
341
|
+
spinner.next()
|
|
342
|
+
LOG.info("CurrentStep: Assembling profiling queries, round 1")
|
|
343
|
+
lstQueries = []
|
|
344
|
+
for dctColumnRecord in lstResult:
|
|
345
|
+
# Set Column Parms
|
|
346
|
+
clsProfiling.data_schema = dctColumnRecord["table_schema"]
|
|
347
|
+
clsProfiling.data_table = dctColumnRecord["table_name"]
|
|
348
|
+
clsProfiling.col_name = dctColumnRecord["column_name"]
|
|
349
|
+
clsProfiling.col_type = dctColumnRecord["data_type"]
|
|
350
|
+
clsProfiling.profile_run_id = strProfileRunID
|
|
351
|
+
clsProfiling.col_is_decimal = dctColumnRecord["is_decimal"]
|
|
352
|
+
clsProfiling.col_ordinal_position = dctColumnRecord["ordinal_position"]
|
|
353
|
+
clsProfiling.col_max_char_length = dctColumnRecord["character_maximum_length"]
|
|
354
|
+
clsProfiling.col_gen_type = dctColumnRecord["general_type"]
|
|
355
|
+
clsProfiling.parm_do_sample = "N"
|
|
356
|
+
|
|
357
|
+
if clsProfiling.profile_use_sampling == "Y":
|
|
358
|
+
if dctSampleTables[clsProfiling.data_schema + "." + clsProfiling.data_table][0] > -1:
|
|
359
|
+
clsProfiling.parm_sample_size = dctSampleTables[
|
|
360
|
+
clsProfiling.data_schema + "." + clsProfiling.data_table
|
|
361
|
+
][0]
|
|
362
|
+
clsProfiling.sample_ratio = dctSampleTables[
|
|
363
|
+
clsProfiling.data_schema + "." + clsProfiling.data_table
|
|
364
|
+
][1]
|
|
365
|
+
clsProfiling.parm_do_sample = clsProfiling.profile_use_sampling
|
|
366
|
+
else:
|
|
367
|
+
clsProfiling.parm_sample_size = 0
|
|
368
|
+
clsProfiling.sample_ratio = ""
|
|
369
|
+
|
|
370
|
+
strQuery = clsProfiling.GetProfilingQuery()
|
|
371
|
+
lstQueries.append(strQuery)
|
|
372
|
+
|
|
373
|
+
# Run Profiling Queries and save results
|
|
374
|
+
LOG.info("CurrentStep: Profiling Round 1")
|
|
375
|
+
LOG.debug("Running %s profiling queries", len(lstQueries))
|
|
376
|
+
|
|
377
|
+
lstProfiles, colProfileNames, intErrors = RunThreadedRetrievalQueryList(
|
|
378
|
+
"PROJECT", lstQueries, dctParms["max_threads"], spinner
|
|
379
|
+
)
|
|
380
|
+
if intErrors > 0:
|
|
381
|
+
booErrors = True
|
|
382
|
+
LOG.warning(
|
|
383
|
+
f"Errors were encountered executing profiling queries. ({intErrors} errors occurred.) Please check log."
|
|
384
|
+
)
|
|
385
|
+
LOG.info("CurrentStep: Saving Round 1 profiling results to Metadata")
|
|
386
|
+
WriteListToDB("DKTG", lstProfiles, colProfileNames, "profile_results")
|
|
387
|
+
|
|
388
|
+
if clsProfiling.profile_use_sampling == "Y":
|
|
389
|
+
lstQueries = []
|
|
390
|
+
for table_name, value in dctSampleTables.items():
|
|
391
|
+
if value[0] > -1:
|
|
392
|
+
clsProfiling.sampling_table = table_name
|
|
393
|
+
clsProfiling.sample_ratio = value[1]
|
|
394
|
+
strQuery = clsProfiling.UpdateProfileResultsToEst()
|
|
395
|
+
lstQueries.append(strQuery)
|
|
396
|
+
|
|
397
|
+
RunActionQueryList("DKTG", lstQueries)
|
|
398
|
+
|
|
399
|
+
if clsProfiling.parm_do_freqs == "Y":
|
|
400
|
+
lstUpdates = []
|
|
401
|
+
# Get secondary profiling columns
|
|
402
|
+
LOG.info("CurrentStep: Selecting columns for frequency analysis")
|
|
403
|
+
strQuery = clsProfiling.GetSecondProfilingColumnsQuery()
|
|
404
|
+
lstResult = RetrieveDBResultsToDictList("DKTG", strQuery)
|
|
405
|
+
|
|
406
|
+
if lstResult:
|
|
407
|
+
# Assemble secondary profiling queries
|
|
408
|
+
# - Freqs for columns not already freq'd, but with max actual value length under threshold
|
|
409
|
+
LOG.info("CurrentStep: Generating frequency queries")
|
|
410
|
+
lstQueries = []
|
|
411
|
+
for dctColumnRecord in lstResult:
|
|
412
|
+
clsProfiling.data_schema = dctColumnRecord["schema_name"]
|
|
413
|
+
clsProfiling.data_table = dctColumnRecord["table_name"]
|
|
414
|
+
clsProfiling.col_name = dctColumnRecord["column_name"]
|
|
415
|
+
|
|
416
|
+
strQuery = clsProfiling.GetSecondProfilingQuery()
|
|
417
|
+
lstQueries.append(strQuery)
|
|
418
|
+
# Run secondary profiling queries
|
|
419
|
+
LOG.info("CurrentStep: Retrieving %s frequency results from project", len(lstQueries))
|
|
420
|
+
lstUpdates, colProfileNames, intErrors = RunThreadedRetrievalQueryList(
|
|
421
|
+
"PROJECT", lstQueries, dctParms["max_threads"], spinner
|
|
422
|
+
)
|
|
423
|
+
if intErrors > 0:
|
|
424
|
+
booErrors = True
|
|
425
|
+
LOG.warning(
|
|
426
|
+
f"Errors were encountered executing frequency queries. ({intErrors} errors occurred.) Please check log."
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
if lstUpdates:
|
|
430
|
+
# Copy secondary results to DQ staging
|
|
431
|
+
LOG.info("CurrentStep: Writing frequency results to Staging")
|
|
432
|
+
WriteListToDB("DKTG", lstUpdates, colProfileNames, "stg_secondary_profile_updates")
|
|
433
|
+
|
|
434
|
+
LOG.info("CurrentStep: Generating profiling update queries")
|
|
435
|
+
|
|
436
|
+
lstQueries = []
|
|
437
|
+
|
|
438
|
+
if lstUpdates:
|
|
439
|
+
# Run single update query, then delete from staging
|
|
440
|
+
strQuery = clsProfiling.GetSecondProfilingUpdateQuery()
|
|
441
|
+
lstQueries.append(strQuery)
|
|
442
|
+
strQuery = clsProfiling.GetSecondProfilingStageDeleteQuery()
|
|
443
|
+
lstQueries.append(strQuery)
|
|
444
|
+
strQuery = clsProfiling.GetDataTypeSuggestionUpdateQuery()
|
|
445
|
+
lstQueries.append(strQuery)
|
|
446
|
+
strQuery = clsProfiling.GetFunctionalDataTypeUpdateQuery()
|
|
447
|
+
lstQueries.append(strQuery)
|
|
448
|
+
strQuery = clsProfiling.GetFunctionalTableTypeStageQuery()
|
|
449
|
+
lstQueries.append(strQuery)
|
|
450
|
+
strQuery = clsProfiling.GetFunctionalTableTypeUpdateQuery()
|
|
451
|
+
lstQueries.append(strQuery)
|
|
452
|
+
strQuery = clsProfiling.GetPIIFlagUpdateQuery()
|
|
453
|
+
lstQueries.append(strQuery)
|
|
454
|
+
lstQueries.extend(CompileAnomalyTestQueries(clsProfiling))
|
|
455
|
+
strQuery = clsProfiling.GetAnomalyRefreshQuery()
|
|
456
|
+
lstQueries.append(strQuery)
|
|
457
|
+
# Always runs last
|
|
458
|
+
strQuery = clsProfiling.GetDataCharsRefreshQuery()
|
|
459
|
+
lstQueries.append(strQuery)
|
|
460
|
+
|
|
461
|
+
LOG.info("CurrentStep: Running profiling update queries")
|
|
462
|
+
RunActionQueryList("DKTG", lstQueries)
|
|
463
|
+
|
|
464
|
+
if dctParms["profile_do_pair_rules"] == "Y":
|
|
465
|
+
LOG.info("CurrentStep: Compiling pairwise contingency rules")
|
|
466
|
+
RunPairwiseContingencyCheck(clsProfiling, dctParms["profile_pair_rule_pct"])
|
|
467
|
+
else:
|
|
468
|
+
LOG.info("No columns were selected to profile.")
|
|
469
|
+
except Exception as e:
|
|
470
|
+
booErrors = True
|
|
471
|
+
sqlsplit = e.args[0].split("[SQL", 1)
|
|
472
|
+
errorline = sqlsplit[0].replace("'", "''") if len(sqlsplit) > 0 else "unknown error"
|
|
473
|
+
clsProfiling.exception_message = f"{type(e).__name__}: {errorline}"
|
|
474
|
+
raise
|
|
475
|
+
finally:
|
|
476
|
+
LOG.info("Updating the profiling run record")
|
|
477
|
+
lstProfileRunQuery = [clsProfiling.GetProfileRunInfoRecordUpdateQuery()]
|
|
478
|
+
RunActionQueryList("DKTG", lstProfileRunQuery)
|
|
479
|
+
if booErrors:
|
|
480
|
+
str_error_status = "with errors. Check log for details."
|
|
481
|
+
else:
|
|
482
|
+
str_error_status = "successfully."
|
|
483
|
+
message += str_error_status
|
|
484
|
+
return message
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def update_profile_run_status(profile_run_id, status):
|
|
488
|
+
sql_template = read_template_sql_file("project_profile_run_record_update_status.sql", sub_directory="profiling")
|
|
489
|
+
|
|
490
|
+
sql_template = sql_template.replace("{STATUS}", status)
|
|
491
|
+
sql_template = sql_template.replace("{NOW}", date_service.get_now_as_string())
|
|
492
|
+
sql_template = sql_template.replace("{EXCEPTION_MESSAGE}", "")
|
|
493
|
+
sql_template = sql_template.replace("{PROFILE_RUN_ID}", profile_run_id)
|
|
494
|
+
|
|
495
|
+
RunActionQueryList("DKTG", [sql_template])
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from testgen import settings
|
|
4
|
+
from testgen.commands.run_get_entities import run_table_group_list
|
|
5
|
+
from testgen.commands.run_launch_db_config import run_launch_db_config
|
|
6
|
+
from testgen.commands.run_setup_profiling_tools import run_setup_profiling_tools
|
|
7
|
+
from testgen.common import display_service
|
|
8
|
+
from testgen.common.database.database_service import (
|
|
9
|
+
AssignConnectParms,
|
|
10
|
+
CreateDatabaseIfNotExists,
|
|
11
|
+
RunActionQueryList,
|
|
12
|
+
replace_params,
|
|
13
|
+
)
|
|
14
|
+
from testgen.common.read_file import read_template_sql_file
|
|
15
|
+
|
|
16
|
+
LOG = logging.getLogger("testgen")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _get_max_date(iteration: int):
|
|
20
|
+
if iteration == 0:
|
|
21
|
+
return "2023-05-31"
|
|
22
|
+
elif iteration == 1:
|
|
23
|
+
return "2023-06-30"
|
|
24
|
+
elif iteration == 2:
|
|
25
|
+
return "2023-07-31"
|
|
26
|
+
elif iteration == 3:
|
|
27
|
+
return "2023-08-30"
|
|
28
|
+
else:
|
|
29
|
+
raise ValueError(f"Unsupported iteration: {iteration}")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _get_max_customerid_seq(iteration: int):
|
|
33
|
+
if iteration == 0:
|
|
34
|
+
return "100501"
|
|
35
|
+
elif iteration == 1:
|
|
36
|
+
return "100508"
|
|
37
|
+
elif iteration == 2:
|
|
38
|
+
return "100523"
|
|
39
|
+
elif iteration == 3:
|
|
40
|
+
return "100527"
|
|
41
|
+
else:
|
|
42
|
+
raise ValueError(f"Unsupported iteration: {iteration}")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _get_max_supplierid_seq(iteration: int):
|
|
46
|
+
if iteration == 0:
|
|
47
|
+
return "40027"
|
|
48
|
+
elif iteration == 1:
|
|
49
|
+
return "40031"
|
|
50
|
+
elif iteration == 2:
|
|
51
|
+
return "40036"
|
|
52
|
+
elif iteration == 3:
|
|
53
|
+
return "40039"
|
|
54
|
+
else:
|
|
55
|
+
raise ValueError(f"Unsupported iteration: {iteration}")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _get_max_productid_seq(iteration: int):
|
|
59
|
+
if iteration == 0:
|
|
60
|
+
return "30041"
|
|
61
|
+
elif iteration == 1:
|
|
62
|
+
return "30045"
|
|
63
|
+
elif iteration == 2:
|
|
64
|
+
return "30049"
|
|
65
|
+
elif iteration == 3:
|
|
66
|
+
return "30054"
|
|
67
|
+
else:
|
|
68
|
+
raise ValueError(f"Unsupported iteration: {iteration}")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _prepare_connection_to_target_database(params_mapping):
|
|
72
|
+
AssignConnectParms(
|
|
73
|
+
params_mapping["PROJECT_KEY"],
|
|
74
|
+
None,
|
|
75
|
+
params_mapping["PROJECT_DB_HOST"],
|
|
76
|
+
params_mapping["PROJECT_DB_PORT"],
|
|
77
|
+
params_mapping["PROJECT_DB"],
|
|
78
|
+
params_mapping["PROJECT_SCHEMA"],
|
|
79
|
+
params_mapping["TESTGEN_ADMIN_USER"],
|
|
80
|
+
params_mapping["SQL_FLAVOR"],
|
|
81
|
+
None,
|
|
82
|
+
None,
|
|
83
|
+
False,
|
|
84
|
+
None,
|
|
85
|
+
None,
|
|
86
|
+
"PROJECT",
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _get_params_mapping(iteration: int = 0) -> dict:
|
|
91
|
+
return {
|
|
92
|
+
"TESTGEN_ADMIN_USER": settings.DATABASE_ADMIN_USER,
|
|
93
|
+
"TESTGEN_ADMIN_PASSWORD": settings.DATABASE_ADMIN_PASSWORD,
|
|
94
|
+
"PROJECT_DB": settings.PROJECT_DATABASE_NAME,
|
|
95
|
+
"PROJECT_SCHEMA": settings.PROJECT_DATABASE_SCHEMA,
|
|
96
|
+
"PROJECT_KEY": settings.PROJECT_KEY,
|
|
97
|
+
"PROJECT_DB_HOST": settings.PROJECT_DATABASE_HOST,
|
|
98
|
+
"PROJECT_DB_PORT": settings.PROJECT_DATABASE_PORT,
|
|
99
|
+
"SQL_FLAVOR": settings.PROJECT_SQL_FLAVOR,
|
|
100
|
+
"MAX_SUPPLIER_ID_SEQ": _get_max_supplierid_seq(iteration),
|
|
101
|
+
"MAX_PRODUCT_ID_SEQ": _get_max_productid_seq(iteration),
|
|
102
|
+
"MAX_CUSTOMER_ID_SEQ": _get_max_customerid_seq(iteration),
|
|
103
|
+
"MAX_DATE": _get_max_date(iteration),
|
|
104
|
+
"ITERATION_NUMBER": iteration,
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def run_quick_start(delete_target_db: bool) -> None:
|
|
109
|
+
# Init
|
|
110
|
+
params_mapping = _get_params_mapping()
|
|
111
|
+
_prepare_connection_to_target_database(params_mapping)
|
|
112
|
+
|
|
113
|
+
# Create DB
|
|
114
|
+
target_db_name = params_mapping["PROJECT_DB"]
|
|
115
|
+
display_service.echo(f"Creating target db : {target_db_name}")
|
|
116
|
+
CreateDatabaseIfNotExists(target_db_name, params_mapping, delete_target_db, drop_users_and_roles=False)
|
|
117
|
+
|
|
118
|
+
# run setup
|
|
119
|
+
command = "testgen setup-system-db --delete-db --yes"
|
|
120
|
+
display_service.echo(f"Running CLI command: {command}")
|
|
121
|
+
delete_db = True
|
|
122
|
+
run_launch_db_config(delete_db)
|
|
123
|
+
|
|
124
|
+
# Schema and Populate target db
|
|
125
|
+
display_service.echo(f"Populating target db : {target_db_name}")
|
|
126
|
+
queries = [
|
|
127
|
+
replace_params(read_template_sql_file("recreate_target_data_schema.sql", "quick_start"), params_mapping),
|
|
128
|
+
replace_params(read_template_sql_file("populate_target_data.sql", "quick_start"), params_mapping),
|
|
129
|
+
]
|
|
130
|
+
RunActionQueryList(
|
|
131
|
+
"PROJECT",
|
|
132
|
+
queries,
|
|
133
|
+
user_override=params_mapping["TESTGEN_ADMIN_USER"],
|
|
134
|
+
pwd_override=params_mapping["TESTGEN_ADMIN_PASSWORD"],
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Get table group id
|
|
138
|
+
project_key = params_mapping["PROJECT_KEY"]
|
|
139
|
+
rows, _ = run_table_group_list(project_key)
|
|
140
|
+
connection_id = str(rows[0][2])
|
|
141
|
+
|
|
142
|
+
# run qc
|
|
143
|
+
command = "testgen setup-target-db-functions --connection-id <CONNECTION_ID> --create-qc-schema --yes"
|
|
144
|
+
display_service.echo(f"Running CLI command: {command}")
|
|
145
|
+
create_qc_schema = True
|
|
146
|
+
db_user = params_mapping["TESTGEN_ADMIN_USER"]
|
|
147
|
+
db_password = params_mapping["TESTGEN_ADMIN_PASSWORD"]
|
|
148
|
+
dry_run = False
|
|
149
|
+
project_qc_schema = run_setup_profiling_tools(connection_id, dry_run, create_qc_schema, db_user, db_password)
|
|
150
|
+
display_service.echo(f"Schema {project_qc_schema} has been created in the target db")
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def run_quick_start_increment(iteration):
|
|
154
|
+
params_mapping = _get_params_mapping(iteration)
|
|
155
|
+
_prepare_connection_to_target_database(params_mapping)
|
|
156
|
+
|
|
157
|
+
target_db_name = params_mapping["PROJECT_DB"]
|
|
158
|
+
LOG.info(f"Incremental population of target db : {target_db_name}")
|
|
159
|
+
|
|
160
|
+
queries = [
|
|
161
|
+
replace_params(read_template_sql_file("update_target_data.sql", "quick_start"), params_mapping),
|
|
162
|
+
]
|
|
163
|
+
RunActionQueryList(
|
|
164
|
+
"PROJECT",
|
|
165
|
+
queries,
|
|
166
|
+
user_override=params_mapping["TESTGEN_ADMIN_USER"],
|
|
167
|
+
pwd_override=params_mapping["TESTGEN_ADMIN_PASSWORD"],
|
|
168
|
+
)
|