dataops-testgen 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. dataops_testgen-2.2.0.dist-info/LICENSE +203 -0
  2. dataops_testgen-2.2.0.dist-info/METADATA +287 -0
  3. dataops_testgen-2.2.0.dist-info/NOTICE +5 -0
  4. dataops_testgen-2.2.0.dist-info/RECORD +270 -0
  5. dataops_testgen-2.2.0.dist-info/WHEEL +5 -0
  6. dataops_testgen-2.2.0.dist-info/entry_points.txt +2 -0
  7. dataops_testgen-2.2.0.dist-info/top_level.txt +1 -0
  8. testgen/__init__.py +0 -0
  9. testgen/__main__.py +770 -0
  10. testgen/commands/__init__.py +0 -0
  11. testgen/commands/queries/__init__.py +0 -0
  12. testgen/commands/queries/execute_cat_tests_query.py +95 -0
  13. testgen/commands/queries/execute_tests_query.py +160 -0
  14. testgen/commands/queries/generate_tests_query.py +94 -0
  15. testgen/commands/queries/profiling_query.py +366 -0
  16. testgen/commands/queries/test_parameter_validation_query.py +88 -0
  17. testgen/commands/run_execute_cat_tests.py +162 -0
  18. testgen/commands/run_execute_tests.py +168 -0
  19. testgen/commands/run_generate_tests.py +107 -0
  20. testgen/commands/run_get_entities.py +122 -0
  21. testgen/commands/run_launch_db_config.py +84 -0
  22. testgen/commands/run_observability_exporter.py +330 -0
  23. testgen/commands/run_profiling_bridge.py +495 -0
  24. testgen/commands/run_quick_start.py +168 -0
  25. testgen/commands/run_setup_profiling_tools.py +96 -0
  26. testgen/commands/run_test_definition.py +146 -0
  27. testgen/commands/run_test_parameter_validation.py +135 -0
  28. testgen/commands/run_upgrade_db_config.py +156 -0
  29. testgen/common/__init__.py +8 -0
  30. testgen/common/clean_sql.py +53 -0
  31. testgen/common/credentials.py +25 -0
  32. testgen/common/database/__init__.py +0 -0
  33. testgen/common/database/database_service.py +629 -0
  34. testgen/common/database/flavor/__init__.py +0 -0
  35. testgen/common/database/flavor/flavor_service.py +75 -0
  36. testgen/common/database/flavor/mssql_flavor_service.py +34 -0
  37. testgen/common/database/flavor/postgresql_flavor_service.py +5 -0
  38. testgen/common/database/flavor/redshift_flavor_service.py +22 -0
  39. testgen/common/database/flavor/snowflake_flavor_service.py +69 -0
  40. testgen/common/database/flavor/trino_flavor_service.py +21 -0
  41. testgen/common/date_service.py +68 -0
  42. testgen/common/display_service.py +85 -0
  43. testgen/common/docker_service.py +76 -0
  44. testgen/common/encrypt.py +55 -0
  45. testgen/common/get_pipeline_parms.py +57 -0
  46. testgen/common/logs.py +79 -0
  47. testgen/common/process_service.py +62 -0
  48. testgen/common/read_file.py +69 -0
  49. testgen/settings.py +440 -0
  50. testgen/template/dbsetup/010_create_base_schema.sql +2 -0
  51. testgen/template/dbsetup/020_create_standard_functions_sprocs.sql +179 -0
  52. testgen/template/dbsetup/030_initialize_new_schema_structure.sql +735 -0
  53. testgen/template/dbsetup/040_populate_new_schema_project.sql +59 -0
  54. testgen/template/dbsetup/050_populate_new_schema_metadata.sql +1517 -0
  55. testgen/template/dbsetup/060_create_standard_views.sql +248 -0
  56. testgen/template/dbsetup/070_create_default_users.sql +17 -0
  57. testgen/template/dbsetup/075_grant_role_rights.sql +43 -0
  58. testgen/template/dbsetup/080_set_current_revision.sql +5 -0
  59. testgen/template/dbupgrade/0100_incremental_upgrade.sql +5 -0
  60. testgen/template/dbupgrade/0101_incremental_upgrade.sql +15 -0
  61. testgen/template/dbupgrade/0102_incremental_upgrade.sql +4 -0
  62. testgen/template/dbupgrade/0103_incremental_upgrade.sql +22 -0
  63. testgen/template/dbupgrade/0104_incremental_upgrade.sql +44 -0
  64. testgen/template/dbupgrade/0105_incremental_upgrade.sql +1 -0
  65. testgen/template/dbupgrade/0106_incremental_upgrade.sql +5 -0
  66. testgen/template/dbupgrade/0107_incremental_upgrade.sql +3 -0
  67. testgen/template/dbupgrade_helpers/get_tg_revision.sql +2 -0
  68. testgen/template/exec_cat_tests/ex_cat_build_agg_table_tests.sql +116 -0
  69. testgen/template/exec_cat_tests/ex_cat_get_distinct_tables.sql +11 -0
  70. testgen/template/exec_cat_tests/ex_cat_results_parse.sql +69 -0
  71. testgen/template/exec_cat_tests/ex_cat_retrieve_agg_test_parms.sql +6 -0
  72. testgen/template/exec_cat_tests/ex_cat_test_query.sql +8 -0
  73. testgen/template/execution/ex_finalize_test_run_results.sql +37 -0
  74. testgen/template/execution/ex_get_tests_non_cat.sql +47 -0
  75. testgen/template/execution/ex_update_test_record_in_testrun_table.sql +27 -0
  76. testgen/template/execution/ex_write_test_record_to_testrun_table.sql +6 -0
  77. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_no_drops_generic.sql +48 -0
  78. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_num_incr_generic.sql +34 -0
  79. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_above_generic.sql +49 -0
  80. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_within_generic.sql +49 -0
  81. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql +49 -0
  82. testgen/template/flavors/generic/exec_query_tests/ex_custom_query_generic.sql +39 -0
  83. testgen/template/flavors/generic/exec_query_tests/ex_data_match_2way_generic.sql +58 -0
  84. testgen/template/flavors/generic/exec_query_tests/ex_data_match_generic.sql +44 -0
  85. testgen/template/flavors/generic/exec_query_tests/ex_prior_match_generic.sql +37 -0
  86. testgen/template/flavors/generic/exec_query_tests/ex_relative_entropy_generic.sql +53 -0
  87. testgen/template/flavors/generic/exec_query_tests/ex_window_match_no_drops_generic.sql +46 -0
  88. testgen/template/flavors/generic/exec_query_tests/ex_window_match_same_generic.sql +59 -0
  89. testgen/template/flavors/generic/profiling/contingency_counts.sql +3 -0
  90. testgen/template/flavors/generic/validate_tests/ex_get_project_column_list_generic.sql +3 -0
  91. testgen/template/flavors/mssql/exec_query_tests/ex_relative_entropy_mssql.sql +53 -0
  92. testgen/template/flavors/mssql/profiling/project_ddf_query_mssql.sql +35 -0
  93. testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml +246 -0
  94. testgen/template/flavors/mssql/profiling/project_secondary_profiling_query_mssql.sql +36 -0
  95. testgen/template/flavors/mssql/setup_profiling_tools/00_drop_existing_functions_mssql.sql +8 -0
  96. testgen/template/flavors/mssql/setup_profiling_tools/01_create_functions_mssql.sql +12 -0
  97. testgen/template/flavors/mssql/setup_profiling_tools/02_create_functions_mssql.sql +54 -0
  98. testgen/template/flavors/mssql/setup_profiling_tools/create_qc_schema_mssql.sql +4 -0
  99. testgen/template/flavors/mssql/setup_profiling_tools/grant_execute_privileges_mssql.sql +1 -0
  100. testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_no_drops_postgresql.sql +46 -0
  101. testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_same_postgresql.sql +59 -0
  102. testgen/template/flavors/postgresql/profiling/project_ddf_query_postgresql.sql +42 -0
  103. testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml +225 -0
  104. testgen/template/flavors/postgresql/profiling/project_secondary_profiling_query_postgresql.sql +28 -0
  105. testgen/template/flavors/postgresql/setup_profiling_tools/create_functions_postgresql.sql +157 -0
  106. testgen/template/flavors/postgresql/setup_profiling_tools/create_qc_schema_postgresql.sql +1 -0
  107. testgen/template/flavors/postgresql/setup_profiling_tools/grant_execute_privileges_postgresql.sql +2 -0
  108. testgen/template/flavors/redshift/profiling/project_ddf_query_redshift.sql +38 -0
  109. testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml +221 -0
  110. testgen/template/flavors/redshift/profiling/project_secondary_profiling_query_redshift.sql +29 -0
  111. testgen/template/flavors/redshift/setup_profiling_tools/create_functions_redshift.sql +115 -0
  112. testgen/template/flavors/redshift/setup_profiling_tools/create_qc_schema_redshift.sql +1 -0
  113. testgen/template/flavors/redshift/setup_profiling_tools/grant_execute_privileges_redshift.sql +2 -0
  114. testgen/template/flavors/snowflake/profiling/project_ddf_query_snowflake.sql +38 -0
  115. testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml +220 -0
  116. testgen/template/flavors/snowflake/profiling/project_secondary_profiling_query_snowflake.sql +29 -0
  117. testgen/template/flavors/snowflake/setup_profiling_tools/create_functions_snowflake.sql +69 -0
  118. testgen/template/flavors/snowflake/setup_profiling_tools/create_qc_schema_snowflake.sql +1 -0
  119. testgen/template/flavors/snowflake/setup_profiling_tools/grant_execute_privileges_snowflake.sql +6 -0
  120. testgen/template/flavors/trino/profiling/project_profiling_query_trino.yaml +219 -0
  121. testgen/template/flavors/trino/setup_profiling_tools/create_functions_trino.sql +92 -0
  122. testgen/template/flavors/trino/setup_profiling_tools/create_qc_schema_trino.sql +1 -0
  123. testgen/template/gen_funny_cat_tests/gen_test_constant.sql +104 -0
  124. testgen/template/gen_funny_cat_tests/gen_test_distinct_value_ct.sql +98 -0
  125. testgen/template/gen_funny_cat_tests/gen_test_row_ct.sql +57 -0
  126. testgen/template/gen_funny_cat_tests/gen_test_row_ct_pct.sql +59 -0
  127. testgen/template/generation/gen_delete_old_tests.sql +5 -0
  128. testgen/template/generation/gen_insert_test_suite.sql +5 -0
  129. testgen/template/generation/gen_retrieve_or_insert_test_suite.sql +58 -0
  130. testgen/template/generation/gen_standard_test_type_list.sql +13 -0
  131. testgen/template/generation/gen_standard_tests.sql +48 -0
  132. testgen/template/get_entities/get_connection.sql +21 -0
  133. testgen/template/get_entities/get_connections_list.sql +9 -0
  134. testgen/template/get_entities/get_latest.sql +4 -0
  135. testgen/template/get_entities/get_profile.sql +12 -0
  136. testgen/template/get_entities/get_profile_info.sql +17 -0
  137. testgen/template/get_entities/get_profile_list.sql +17 -0
  138. testgen/template/get_entities/get_profile_screen.sql +275 -0
  139. testgen/template/get_entities/get_project_list.sql +6 -0
  140. testgen/template/get_entities/get_table_group_list.sql +10 -0
  141. testgen/template/get_entities/get_test_generation_list.sql +18 -0
  142. testgen/template/get_entities/get_test_info.sql +41 -0
  143. testgen/template/get_entities/get_test_results_for_run_cli.sql +16 -0
  144. testgen/template/get_entities/get_test_run_list.sql +24 -0
  145. testgen/template/get_entities/get_test_suite.sql +13 -0
  146. testgen/template/get_entities/get_test_suite_list.sql +18 -0
  147. testgen/template/get_entities/list_test_types.sql +4 -0
  148. testgen/template/observability/get_event_data.sql +23 -0
  149. testgen/template/observability/get_test_results.sql +41 -0
  150. testgen/template/observability/update_test_results_exported_to_observability.sql +12 -0
  151. testgen/template/parms/parms_profiling.sql +34 -0
  152. testgen/template/parms/parms_test_execution.sql +13 -0
  153. testgen/template/parms/parms_test_gen.sql +23 -0
  154. testgen/template/profiling/contingency_columns.sql +7 -0
  155. testgen/template/profiling/datatype_suggestions.sql +56 -0
  156. testgen/template/profiling/functional_datatype.sql +523 -0
  157. testgen/template/profiling/functional_tabletype_stage.sql +48 -0
  158. testgen/template/profiling/functional_tabletype_update.sql +8 -0
  159. testgen/template/profiling/pii_flag.sql +133 -0
  160. testgen/template/profiling/profile_anomalies_screen_column.sql +22 -0
  161. testgen/template/profiling/profile_anomalies_screen_multi_column.sql +58 -0
  162. testgen/template/profiling/profile_anomalies_screen_table.sql +22 -0
  163. testgen/template/profiling/profile_anomalies_screen_table_dates.sql +30 -0
  164. testgen/template/profiling/profile_anomalies_screen_variants.sql +40 -0
  165. testgen/template/profiling/profile_anomaly_types_get.sql +3 -0
  166. testgen/template/profiling/project_get_table_sample_count.sql +22 -0
  167. testgen/template/profiling/project_profile_run_record_insert.sql +8 -0
  168. testgen/template/profiling/project_profile_run_record_update.sql +5 -0
  169. testgen/template/profiling/project_profile_run_record_update_status.sql +5 -0
  170. testgen/template/profiling/project_update_profile_results_to_estimates.sql +32 -0
  171. testgen/template/profiling/refresh_anomalies.sql +33 -0
  172. testgen/template/profiling/refresh_data_chars_from_profiling.sql +156 -0
  173. testgen/template/profiling/secondary_profiling_columns.sql +12 -0
  174. testgen/template/profiling/secondary_profiling_delete.sql +4 -0
  175. testgen/template/profiling/secondary_profiling_update.sql +18 -0
  176. testgen/template/quick_start/populate_target_data.sql +1077 -0
  177. testgen/template/quick_start/recreate_target_data_schema.sql +167 -0
  178. testgen/template/quick_start/update_target_data.sql +100 -0
  179. testgen/template/updates/create_tmp_test_definition.sql +19 -0
  180. testgen/template/updates/get_test_def_parms.sql +38 -0
  181. testgen/template/updates/populate_stg_test_definitions.sql +184 -0
  182. testgen/template/validate_tests/ex_disable_tests_test_definitions.sql +5 -0
  183. testgen/template/validate_tests/ex_flag_tests_test_definitions.sql +64 -0
  184. testgen/template/validate_tests/ex_get_project_column_list_generic.sql +3 -0
  185. testgen/template/validate_tests/ex_get_test_column_list_tg.sql +65 -0
  186. testgen/template/validate_tests/ex_write_test_val_errors.sql +22 -0
  187. testgen/ui/__init__.py +0 -0
  188. testgen/ui/app.py +98 -0
  189. testgen/ui/assets/dk_logo.svg +46 -0
  190. testgen/ui/assets/question_mark.png +0 -0
  191. testgen/ui/assets/scripts.js +68 -0
  192. testgen/ui/assets/style.css +140 -0
  193. testgen/ui/bootstrap.py +109 -0
  194. testgen/ui/components/__init__.py +0 -0
  195. testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fBBc4.woff2 +0 -0
  196. testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fChc4EsA.woff2 +0 -0
  197. testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu4mxK.woff2 +0 -0
  198. testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu7GxKOzY.woff2 +0 -0
  199. testgen/ui/components/frontend/css/material-symbols-rounded.css +24 -0
  200. testgen/ui/components/frontend/css/material-symbols-rounded.woff2 +0 -0
  201. testgen/ui/components/frontend/css/roboto-font-faces.css +35 -0
  202. testgen/ui/components/frontend/css/shared.css +36 -0
  203. testgen/ui/components/frontend/img/dk_logo.svg +46 -0
  204. testgen/ui/components/frontend/index.html +17 -0
  205. testgen/ui/components/frontend/js/components/breadcrumbs.js +86 -0
  206. testgen/ui/components/frontend/js/components/button.js +66 -0
  207. testgen/ui/components/frontend/js/components/location.js +62 -0
  208. testgen/ui/components/frontend/js/components/select.js +75 -0
  209. testgen/ui/components/frontend/js/components/sidebar.js +358 -0
  210. testgen/ui/components/frontend/js/main.js +99 -0
  211. testgen/ui/components/frontend/js/streamlit.js +19 -0
  212. testgen/ui/components/frontend/js/van.min.js +1 -0
  213. testgen/ui/components/utils/__init__.py +0 -0
  214. testgen/ui/components/utils/callbacks.py +51 -0
  215. testgen/ui/components/utils/component.py +13 -0
  216. testgen/ui/components/widgets/__init__.py +6 -0
  217. testgen/ui/components/widgets/breadcrumbs.py +32 -0
  218. testgen/ui/components/widgets/location.py +65 -0
  219. testgen/ui/components/widgets/modal.py +97 -0
  220. testgen/ui/components/widgets/sidebar.py +69 -0
  221. testgen/ui/navigation/__init__.py +0 -0
  222. testgen/ui/navigation/menu.py +42 -0
  223. testgen/ui/navigation/page.py +20 -0
  224. testgen/ui/navigation/router.py +63 -0
  225. testgen/ui/queries/__init__.py +0 -0
  226. testgen/ui/queries/authentication_queries.py +47 -0
  227. testgen/ui/queries/connection_queries.py +121 -0
  228. testgen/ui/queries/profiling_queries.py +148 -0
  229. testgen/ui/queries/project_queries.py +9 -0
  230. testgen/ui/queries/table_group_queries.py +186 -0
  231. testgen/ui/queries/test_definition_queries.py +270 -0
  232. testgen/ui/queries/test_run_queries.py +32 -0
  233. testgen/ui/queries/test_suite_queries.py +145 -0
  234. testgen/ui/scripts/__init__.py +0 -0
  235. testgen/ui/scripts/patch_streamlit.py +111 -0
  236. testgen/ui/services/__init__.py +0 -0
  237. testgen/ui/services/authentication_service.py +119 -0
  238. testgen/ui/services/connection_service.py +220 -0
  239. testgen/ui/services/database_service.py +282 -0
  240. testgen/ui/services/form_service.py +1008 -0
  241. testgen/ui/services/javascript_service.py +44 -0
  242. testgen/ui/services/query_service.py +316 -0
  243. testgen/ui/services/string_service.py +12 -0
  244. testgen/ui/services/table_group_service.py +130 -0
  245. testgen/ui/services/test_definition_service.py +117 -0
  246. testgen/ui/services/test_run_service.py +13 -0
  247. testgen/ui/services/test_suite_service.py +76 -0
  248. testgen/ui/services/toolbar_service.py +77 -0
  249. testgen/ui/session.py +46 -0
  250. testgen/ui/views/__init__.py +0 -0
  251. testgen/ui/views/app_log_modal.py +92 -0
  252. testgen/ui/views/connections.py +72 -0
  253. testgen/ui/views/connections_base.py +367 -0
  254. testgen/ui/views/login.py +40 -0
  255. testgen/ui/views/not_found.py +16 -0
  256. testgen/ui/views/overview.py +34 -0
  257. testgen/ui/views/profiling_anomalies.py +501 -0
  258. testgen/ui/views/profiling_details.py +335 -0
  259. testgen/ui/views/profiling_modal.py +40 -0
  260. testgen/ui/views/profiling_results.py +206 -0
  261. testgen/ui/views/profiling_summary.py +177 -0
  262. testgen/ui/views/project_settings.py +74 -0
  263. testgen/ui/views/table_groups.py +530 -0
  264. testgen/ui/views/test_definitions.py +1020 -0
  265. testgen/ui/views/test_results.py +908 -0
  266. testgen/ui/views/test_runs.py +195 -0
  267. testgen/ui/views/test_suites.py +545 -0
  268. testgen/utils/__init__.py +0 -0
  269. testgen/utils/plugins.py +17 -0
  270. testgen/utils/singleton.py +14 -0
@@ -0,0 +1,501 @@
1
+ import typing
2
+
3
+ import plotly.express as px
4
+ import streamlit as st
5
+
6
+ import testgen.ui.services.database_service as db
7
+ import testgen.ui.services.form_service as fm
8
+ import testgen.ui.services.query_service as dq
9
+ import testgen.ui.services.toolbar_service as tb
10
+ from testgen.ui.components import widgets as testgen
11
+ from testgen.ui.navigation.page import Page
12
+ from testgen.ui.session import session
13
+ from testgen.ui.views.profiling_modal import view_profiling_modal
14
+
15
+
16
+ class ProfilingAnomaliesPage(Page):
17
+ path = "profiling/hygiene"
18
+ can_activate: typing.ClassVar = [
19
+ lambda: session.authentication_status or "login",
20
+ ]
21
+
22
+ def render(self) -> None:
23
+ export_container = fm.render_page_header(
24
+ "Hygiene Issues",
25
+ "https://docs.datakitchen.io/article/dataops-testgen-help/profile-anomalies",
26
+ lst_breadcrumbs=[
27
+ {"label": "Overview", "path": "overview"},
28
+ {"label": "Data Profiling", "path": "profiling"},
29
+ {"label": "Hygiene Issues", "path": None},
30
+ ],
31
+ )
32
+
33
+ if "project" not in st.session_state:
34
+ st.write("Select a Project from the Overview page.")
35
+ else:
36
+ str_project = st.session_state["project"]
37
+
38
+ # Setup Toolbar
39
+ tool_bar = tb.ToolBar(3, 1, 4, None)
40
+
41
+ # Look for drill-down from another page
42
+ # No need to clear -- will be sent every time page is accessed
43
+ str_drill_tg = st.session_state.get("drill_profile_tg")
44
+ str_drill_prun = st.session_state.get("drill_profile_run")
45
+
46
+ with tool_bar.long_slots[0]:
47
+ # Table Groups selection
48
+ df_tg = get_db_table_group_choices(str_project)
49
+ str_drill_tg_name = (
50
+ df_tg[df_tg["id"] == str_drill_tg]["table_groups_name"].values[0] if str_drill_tg else None
51
+ )
52
+ str_table_groups_id = fm.render_select(
53
+ "Table Group", df_tg, "table_groups_name", "id", str_default=str_drill_tg_name, boo_disabled=True
54
+ )
55
+
56
+ str_profile_run_id = str_drill_prun
57
+
58
+ with tool_bar.long_slots[1]:
59
+ # Likelihood selection - optional filter
60
+ lst_status_options = ["All Likelihoods", "Definite", "Likely", "Possible", "Potential PII"]
61
+ str_likelihood = st.selectbox("Issue Class", lst_status_options)
62
+
63
+ with tool_bar.short_slots[0]:
64
+ str_help = "Toggle on to perform actions on multiple Hygiene Issues"
65
+ do_multi_select = st.toggle("Multi-Select", help=str_help)
66
+
67
+ if str_table_groups_id:
68
+ # Get summary counts
69
+ df_sum = get_profiling_anomaly_summary(str_profile_run_id)
70
+
71
+ # Get hygiene issue list
72
+ df_pa = get_profiling_anomalies(str_profile_run_id, str_likelihood)
73
+
74
+ # Retrieve disposition action (cache refreshed)
75
+ df_action = get_anomaly_disposition(str_profile_run_id)
76
+ # Update action from disposition df
77
+ action_map = df_action.set_index("id")["action"].to_dict()
78
+ df_pa["action"] = df_pa["id"].map(action_map).fillna(df_pa["action"])
79
+
80
+ if not df_pa.empty:
81
+ # write_frequency_graph(df_pa)
82
+ write_summary_graph(df_sum)
83
+ lst_show_columns = [
84
+ "table_name",
85
+ "column_name",
86
+ "issue_likelihood",
87
+ "action",
88
+ "anomaly_name",
89
+ "detail",
90
+ ]
91
+ # TODO: Can we reintegrate percents below:
92
+ # tool_bar.set_prompt(
93
+ # f"Hygiene Issues Found: {df_sum.at[0, 'issue_ct']} issues in {df_sum.at[0, 'column_ct']} columns, {df_sum.at[0, 'table_ct']} tables in schema {df_pa.loc[0, 'schema_name']}"
94
+ # )
95
+ # Show main grid and retrieve selections
96
+ selected = fm.render_grid_select(
97
+ df_pa, lst_show_columns, int_height=400, do_multi_select=do_multi_select
98
+ )
99
+
100
+ with export_container:
101
+ lst_export_columns = [
102
+ "schema_name",
103
+ "table_name",
104
+ "column_name",
105
+ "anomaly_name",
106
+ "issue_likelihood",
107
+ "anomaly_description",
108
+ "action",
109
+ "detail",
110
+ "suggested_action",
111
+ ]
112
+ lst_wrap_columns = ["anomaly_description", "suggested_action"]
113
+ fm.render_excel_export(
114
+ df_pa, lst_export_columns, "Hygiene Screen", "{TIMESTAMP}", lst_wrap_columns
115
+ )
116
+
117
+ if selected:
118
+ # Always show details for last selected row
119
+ selected_row = selected[len(selected) - 1]
120
+ else:
121
+ selected_row = None
122
+
123
+ # Display hygiene issue detail for selected row
124
+ if not selected_row:
125
+ st.markdown(":orange[Select a record to see more information.]")
126
+ else:
127
+ col1, col2 = st.columns([0.7, 0.3])
128
+ with col1:
129
+ fm.render_html_list(
130
+ selected_row,
131
+ [
132
+ "anomaly_name",
133
+ "table_name",
134
+ "column_name",
135
+ "column_type",
136
+ "anomaly_description",
137
+ "detail",
138
+ "likelihood_explanation",
139
+ "suggested_action",
140
+ ],
141
+ "Hygiene Issue Detail",
142
+ int_data_width=700,
143
+ )
144
+ with col2:
145
+ # _, v_col2 = st.columns([0.3, 0.7])
146
+ v_col1, v_col2 = st.columns([0.5, 0.5])
147
+ view_profiling_modal(
148
+ v_col1, selected_row["table_name"], selected_row["column_name"],
149
+ str_profile_run_id=str_profile_run_id
150
+ )
151
+ view_bad_data(v_col2, selected_row)
152
+
153
+ # Need to render toolbar buttons after grid, so selection status is maintained
154
+ if tool_bar.button_slots[0].button(
155
+ "✓", help="Confirm this issue as relevant for this run", disabled=not selected
156
+ ):
157
+ fm.reset_post_updates(
158
+ do_disposition_update(selected, "Confirmed"),
159
+ as_toast=True,
160
+ clear_cache=True,
161
+ lst_cached_functions=[get_anomaly_disposition, get_profiling_anomaly_summary],
162
+ )
163
+ if tool_bar.button_slots[1].button(
164
+ "✘", help="Dismiss this issue as not relevant for this run", disabled=not selected
165
+ ):
166
+ fm.reset_post_updates(
167
+ do_disposition_update(selected, "Dismissed"),
168
+ as_toast=True,
169
+ clear_cache=True,
170
+ lst_cached_functions=[get_anomaly_disposition, get_profiling_anomaly_summary],
171
+ )
172
+ if tool_bar.button_slots[2].button(
173
+ "🔇", help="Mute this test to deactivate it for future runs", disabled=not selected
174
+ ):
175
+ fm.reset_post_updates(
176
+ do_disposition_update(selected, "Inactive"),
177
+ as_toast=True,
178
+ clear_cache=True,
179
+ lst_cached_functions=[get_anomaly_disposition, get_profiling_anomaly_summary],
180
+ )
181
+ if tool_bar.button_slots[3].button("↩︎", help="Clear action", disabled=not selected):
182
+ fm.reset_post_updates(
183
+ do_disposition_update(selected, "No Decision"),
184
+ as_toast=True,
185
+ clear_cache=True,
186
+ lst_cached_functions=[get_anomaly_disposition, get_profiling_anomaly_summary],
187
+ )
188
+ else:
189
+ tool_bar.set_prompt("No Hygiene Issues Found")
190
+
191
+ # Help Links
192
+ st.markdown(
193
+ "[Help on Hygiene Issues](https://docs.datakitchen.io/article/dataops-testgen-help/profile-anomalies)"
194
+ )
195
+
196
+ # with st.sidebar:
197
+ # st.divider()
198
+
199
+
200
+ @st.cache_data(show_spinner=False)
201
+ def get_db_table_group_choices(str_project_code):
202
+ str_schema = st.session_state["dbschema"]
203
+ return dq.run_table_groups_lookup_query(str_schema, str_project_code)
204
+
205
+
206
+ @st.cache_data(show_spinner="Retrieving Data")
207
+ def get_profiling_anomalies(str_profile_run_id, str_likelihood):
208
+ str_schema = st.session_state["dbschema"]
209
+ if str_likelihood == "All Likelihoods":
210
+ str_criteria = " AND t.issue_likelihood <> 'Potential PII'"
211
+ else:
212
+ str_criteria = f" AND t.issue_likelihood = '{str_likelihood}'"
213
+ # Define the query -- first visible column must be first, because will hold the multi-select box
214
+ str_sql = f"""
215
+ SELECT r.table_name, r.column_name, r.schema_name,
216
+ r.column_type,t.anomaly_name, t.issue_likelihood,
217
+ r.disposition, null as action,
218
+ CASE
219
+ WHEN t.issue_likelihood = 'Possible' THEN 'Possible: speculative test that often identifies problems'
220
+ WHEN t.issue_likelihood = 'Likely' THEN 'Likely: typically indicates a data problem'
221
+ WHEN t.issue_likelihood = 'Definite' THEN 'Definite: indicates a highly-likely data problem'
222
+ WHEN t.issue_likelihood = 'Potential PII'
223
+ THEN 'Potential PII: may require privacy policies, standards and procedures for access, storage and transmission.'
224
+ END as likelihood_explanation,
225
+ t.anomaly_description, r.detail, t.suggested_action,
226
+ r.anomaly_id, r.table_groups_id::VARCHAR, r.id::VARCHAR, p.profiling_starttime
227
+ FROM {str_schema}.profile_anomaly_results r
228
+ INNER JOIN {str_schema}.profile_anomaly_types t
229
+ ON r.anomaly_id = t.id
230
+ INNER JOIN {str_schema}.profiling_runs p
231
+ ON r.profile_run_id = p.id
232
+ WHERE r.profile_run_id = '{str_profile_run_id}'
233
+ {str_criteria}
234
+ ORDER BY r.schema_name, r.table_name, r.column_name;
235
+ """
236
+ # Retrieve data as df
237
+ df = db.retrieve_data(str_sql)
238
+
239
+ dct_replace = {"Confirmed": "✓", "Dismissed": "✘", "Inactive": "🔇"}
240
+ df["action"] = df["disposition"].replace(dct_replace)
241
+
242
+ return df
243
+
244
+
245
+ @st.cache_data(show_spinner="Retrieving Status")
246
+ def get_anomaly_disposition(str_profile_run_id):
247
+ str_schema = st.session_state["dbschema"]
248
+ str_sql = f"""
249
+ SELECT id::VARCHAR, disposition
250
+ FROM {str_schema}.profile_anomaly_results s
251
+ WHERE s.profile_run_id = '{str_profile_run_id}';
252
+ """
253
+ # Retrieve data as df
254
+ df = db.retrieve_data(str_sql)
255
+ dct_replace = {"Confirmed": "✓", "Dismissed": "✘", "Inactive": "🔇", "Passed": ""}
256
+ df["action"] = df["disposition"].replace(dct_replace)
257
+
258
+ return df[["id", "action"]]
259
+
260
+
261
+ @st.cache_data(show_spinner=False)
262
+ def get_profiling_anomaly_summary(str_profile_run_id):
263
+ str_schema = st.session_state["dbschema"]
264
+ # Define the query
265
+ str_sql = f"""
266
+ SELECT schema_name,
267
+ COUNT(DISTINCT s.table_name) as table_ct,
268
+ COUNT(DISTINCT s.column_name) as column_ct,
269
+ COUNT(*) as issue_ct,
270
+ SUM(CASE WHEN COALESCE(s.disposition, 'Confirmed') = 'Confirmed'
271
+ AND t.issue_likelihood = 'Definite' THEN 1 ELSE 0 END) as definite_ct,
272
+ SUM(CASE WHEN COALESCE(s.disposition, 'Confirmed') = 'Confirmed'
273
+ AND t.issue_likelihood = 'Likely' THEN 1 ELSE 0 END) as likely_ct,
274
+ SUM(CASE WHEN COALESCE(s.disposition, 'Confirmed') = 'Confirmed'
275
+ AND t.issue_likelihood = 'Possible' THEN 1 ELSE 0 END) as possible_ct,
276
+ SUM(CASE WHEN COALESCE(s.disposition, 'Confirmed')
277
+ IN ('Dismissed', 'Inactive') THEN 1 ELSE 0 END) as dismissed_ct
278
+ FROM {str_schema}.profile_anomaly_results s
279
+ LEFT JOIN {str_schema}.profile_anomaly_types t
280
+ ON (s.anomaly_id = t.id)
281
+ WHERE s.profile_run_id = '{str_profile_run_id}'
282
+ GROUP BY schema_name;
283
+ """
284
+ # Retrieve and return data as df
285
+ return db.retrieve_data(str_sql)
286
+
287
+
288
+ @st.cache_data(show_spinner=False)
289
+ def get_bad_data(selected_row):
290
+ str_schema = st.session_state["dbschema"]
291
+ # Define the query
292
+ str_sql = f"""
293
+ SELECT t.lookup_query, tg.table_group_schema, c.project_qc_schema,
294
+ c.sql_flavor, c.project_host, c.project_port, c.project_db, c.project_user, c.project_pw_encrypted,
295
+ c.url, c.connect_by_url, c.connect_by_key, c.private_key, c.private_key_passphrase
296
+ FROM {str_schema}.target_data_lookups t
297
+ INNER JOIN {str_schema}.table_groups tg
298
+ ON ('{selected_row["table_groups_id"]}'::UUID = tg.id)
299
+ INNER JOIN {str_schema}.connections c
300
+ ON (tg.connection_id = c.connection_id)
301
+ AND (t.sql_flavor = c.sql_flavor)
302
+ WHERE t.error_type = 'Profile Anomaly'
303
+ AND t.test_id = '{selected_row["anomaly_id"]}'
304
+ AND t.lookup_query > '';
305
+ """
306
+
307
+ def get_lookup_query(test_id, detail_exp, column_names):
308
+ if test_id in {"1019", "1020"}:
309
+ start_index = detail_exp.find("Columns: ")
310
+ if start_index == -1:
311
+ columns = [col.strip() for col in column_names.split(",")]
312
+ else:
313
+ start_index += len("Columns: ")
314
+ column_names_str = detail_exp[start_index:]
315
+ columns = [col.strip() for col in column_names_str.split(",")]
316
+ queries = [
317
+ f"SELECT '{column}' AS column_name, MAX({column}) AS max_date_available FROM {{TARGET_SCHEMA}}.{{TABLE_NAME}}"
318
+ for column in columns
319
+ ]
320
+ sql_query = " UNION ALL ".join(queries) + " ORDER BY max_date_available DESC;"
321
+ else:
322
+ sql_query = ""
323
+ return sql_query
324
+
325
+ def replace_parms(str_query):
326
+ str_query = (
327
+ get_lookup_query(selected_row["anomaly_id"], selected_row["detail"], selected_row["column_name"])
328
+ if lst_query[0]["lookup_query"] == "created_in_ui"
329
+ else lst_query[0]["lookup_query"]
330
+ )
331
+ str_query = str_query.replace("{TARGET_SCHEMA}", lst_query[0]["table_group_schema"])
332
+ str_query = str_query.replace("{TABLE_NAME}", selected_row["table_name"])
333
+ str_query = str_query.replace("{COLUMN_NAME}", selected_row["column_name"])
334
+ str_query = str_query.replace("{DATA_QC_SCHEMA}", lst_query[0]["project_qc_schema"])
335
+ str_query = str_query.replace("{DETAIL_EXPRESSION}", selected_row["detail"])
336
+ str_query = str_query.replace("{PROFILE_RUN_DATE}", selected_row["profiling_starttime"])
337
+ if str_query is None or str_query == "":
338
+ raise ValueError("Lookup query is not defined for this Anomoly Type.")
339
+ return str_query
340
+
341
+ try:
342
+ # Retrieve SQL for customer lookup
343
+ lst_query = db.retrieve_data_list(str_sql)
344
+
345
+ # Retrieve and return data as df
346
+ if lst_query:
347
+ str_sql = replace_parms(str_sql)
348
+ df = db.retrieve_target_db_df(
349
+ lst_query[0]["sql_flavor"],
350
+ lst_query[0]["project_host"],
351
+ lst_query[0]["project_port"],
352
+ lst_query[0]["project_db"],
353
+ lst_query[0]["project_user"],
354
+ lst_query[0]["project_pw_encrypted"],
355
+ str_sql,
356
+ lst_query[0]["url"],
357
+ lst_query[0]["connect_by_url"],
358
+ lst_query[0]["connect_by_key"],
359
+ lst_query[0]["private_key"],
360
+ lst_query[0]["private_key_passphrase"],
361
+ )
362
+ if df.empty:
363
+ return "ND", "Data that violates Hygiene Issue criteria is not present in the current dataset.", None
364
+ else:
365
+ return "OK", None, df
366
+ else:
367
+ return "NA", "A source data lookup for this Issue is not available.", None
368
+
369
+ except Exception as e:
370
+ return "ERR", f"Source data lookup query caused an error:\n\n{e.args[0]}", None
371
+
372
+
373
+ def write_summary_graph(df_sum):
374
+ df_graph = df_sum[["definite_ct", "likely_ct", "possible_ct", "dismissed_ct"]]
375
+
376
+ str_graph_caption = f"<i>Definite: {df_sum.at[0, 'definite_ct']}, Likely: {df_sum.at[0, 'likely_ct']}, Possible: {df_sum.at[0, 'possible_ct']}, Dismissed: {df_sum.at[0, 'dismissed_ct']}</i>"
377
+
378
+ fig = px.bar(
379
+ df_graph,
380
+ orientation="h",
381
+ title=None,
382
+ color_discrete_sequence=["red", "orange", "yellow", "green"],
383
+ barmode="stack",
384
+ )
385
+
386
+ fig.update_traces(hovertemplate="%{x}")
387
+
388
+ fig.update_layout(
389
+ showlegend=False,
390
+ legend_orientation="h",
391
+ legend_y=-0.2, # This value might need to be adjusted based on other chart elements
392
+ legend_x=0.5,
393
+ legend_xanchor="right",
394
+ legend_title_text="",
395
+ yaxis={
396
+ "showticklabels": False, # hides y-axis labels
397
+ "showgrid": False, # removes grid lines
398
+ "zeroline": False, # removes the zero line
399
+ "showline": False, # hides the axis line
400
+ "title_text": "",
401
+ },
402
+ xaxis={
403
+ "showticklabels": False, # hides y-axis labels
404
+ "showgrid": False, # removes grid lines
405
+ "zeroline": False, # removes the zero line
406
+ "showline": False, # hides the axis line
407
+ "title_text": "",
408
+ },
409
+ hovermode="closest",
410
+ height=100,
411
+ width=800,
412
+ margin={"l": 0, "r": 10, "b": 10, "t": 10}, # adjust margins around the plot
413
+ paper_bgcolor="rgba(0,0,0,0)",
414
+ plot_bgcolor="rgba(0,0,0,0)",
415
+ )
416
+
417
+ fig.add_annotation(
418
+ text=str_graph_caption,
419
+ xref="paper",
420
+ yref="paper",
421
+ # 'paper' coordinates are relative to the layout, with (0,0) at the bottom left and (1,1) at the top right
422
+ x=0,
423
+ y=0,
424
+ xanchor="left",
425
+ yanchor="top",
426
+ showarrow=False,
427
+ font={"size": 15, "color": "black"},
428
+ )
429
+
430
+ config = {"displayModeBar": False}
431
+ st.plotly_chart(fig, config=config)
432
+
433
+
434
+ def write_frequency_graph(df_tests):
435
+ # Count the frequency of each test_name
436
+ df_count = df_tests["anomaly_name"].value_counts().reset_index()
437
+ df_count.columns = ["anomaly_name", "frequency"]
438
+
439
+ # Sort the DataFrame by frequency in ascending order for display
440
+ df_count = df_count.sort_values(by="frequency", ascending=True)
441
+
442
+ # Create a horizontal bar chart using Plotly Express
443
+ fig = px.bar(df_count, x="frequency", y="anomaly_name", orientation="h", title="Issue Frequency")
444
+ fig.update_layout(title_font={"color": "green"}, paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)")
445
+ if len(df_count) <= 5:
446
+ # fig.update_layout(bargap=0.9)
447
+ fig.update_layout(height=300)
448
+
449
+ st.plotly_chart(fig)
450
+
451
+
452
+ def view_bad_data(button_container, selected_row):
453
+ str_header = f"Column: {selected_row['column_name']}, Table: {selected_row['table_name']}"
454
+ bad_data_modal = testgen.Modal(title=None, key="dk-anomaly-data-modal", max_width=1100)
455
+
456
+ with button_container:
457
+ if st.button(
458
+ ":green[Source Data →]", help="Review current source data for highlighted issue", use_container_width=True
459
+ ):
460
+ bad_data_modal.open()
461
+
462
+ if bad_data_modal.is_open():
463
+ with bad_data_modal.container():
464
+ fm.render_modal_header(selected_row["anomaly_name"], None)
465
+ st.caption(selected_row["anomaly_description"])
466
+ fm.show_prompt(str_header)
467
+
468
+ # Show the detail line
469
+ fm.render_html_list(selected_row, ["detail"], None, 700, ["Hygiene Issue Detail"])
470
+
471
+ with st.spinner("Retrieving source data..."):
472
+ bad_data_status, bad_data_msg, df_bad = get_bad_data(selected_row)
473
+ if bad_data_status in {"ND", "NA"}:
474
+ st.info(bad_data_msg)
475
+ elif bad_data_status == "ERR":
476
+ st.error(bad_data_msg)
477
+ elif df_bad is None:
478
+ st.error("An unknown error was encountered.")
479
+ else:
480
+ if bad_data_msg:
481
+ st.info(bad_data_msg)
482
+ # Pretify the dataframe
483
+ df_bad.columns = [col.replace("_", " ").title() for col in df_bad.columns]
484
+ df_bad.fillna("[NULL]", inplace=True)
485
+ # Display the dataframe
486
+ st.dataframe(df_bad, height=500, width=1050, hide_index=True)
487
+
488
+
489
+ def do_disposition_update(selected, str_new_status):
490
+ str_result = None
491
+ if selected:
492
+ if len(selected) > 1:
493
+ str_which = f"of {len(selected)} issues to {str_new_status}"
494
+ elif len(selected) == 1:
495
+ str_which = f"of one issue to {str_new_status}"
496
+
497
+ str_schema = st.session_state["dbschema"]
498
+ if not dq.update_anomaly_disposition(selected, str_schema, str_new_status):
499
+ str_result = f":red[**The update {str_which} did not succeed.**]"
500
+
501
+ return str_result