dataops-testgen 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. dataops_testgen-2.2.0.dist-info/LICENSE +203 -0
  2. dataops_testgen-2.2.0.dist-info/METADATA +287 -0
  3. dataops_testgen-2.2.0.dist-info/NOTICE +5 -0
  4. dataops_testgen-2.2.0.dist-info/RECORD +270 -0
  5. dataops_testgen-2.2.0.dist-info/WHEEL +5 -0
  6. dataops_testgen-2.2.0.dist-info/entry_points.txt +2 -0
  7. dataops_testgen-2.2.0.dist-info/top_level.txt +1 -0
  8. testgen/__init__.py +0 -0
  9. testgen/__main__.py +770 -0
  10. testgen/commands/__init__.py +0 -0
  11. testgen/commands/queries/__init__.py +0 -0
  12. testgen/commands/queries/execute_cat_tests_query.py +95 -0
  13. testgen/commands/queries/execute_tests_query.py +160 -0
  14. testgen/commands/queries/generate_tests_query.py +94 -0
  15. testgen/commands/queries/profiling_query.py +366 -0
  16. testgen/commands/queries/test_parameter_validation_query.py +88 -0
  17. testgen/commands/run_execute_cat_tests.py +162 -0
  18. testgen/commands/run_execute_tests.py +168 -0
  19. testgen/commands/run_generate_tests.py +107 -0
  20. testgen/commands/run_get_entities.py +122 -0
  21. testgen/commands/run_launch_db_config.py +84 -0
  22. testgen/commands/run_observability_exporter.py +330 -0
  23. testgen/commands/run_profiling_bridge.py +495 -0
  24. testgen/commands/run_quick_start.py +168 -0
  25. testgen/commands/run_setup_profiling_tools.py +96 -0
  26. testgen/commands/run_test_definition.py +146 -0
  27. testgen/commands/run_test_parameter_validation.py +135 -0
  28. testgen/commands/run_upgrade_db_config.py +156 -0
  29. testgen/common/__init__.py +8 -0
  30. testgen/common/clean_sql.py +53 -0
  31. testgen/common/credentials.py +25 -0
  32. testgen/common/database/__init__.py +0 -0
  33. testgen/common/database/database_service.py +629 -0
  34. testgen/common/database/flavor/__init__.py +0 -0
  35. testgen/common/database/flavor/flavor_service.py +75 -0
  36. testgen/common/database/flavor/mssql_flavor_service.py +34 -0
  37. testgen/common/database/flavor/postgresql_flavor_service.py +5 -0
  38. testgen/common/database/flavor/redshift_flavor_service.py +22 -0
  39. testgen/common/database/flavor/snowflake_flavor_service.py +69 -0
  40. testgen/common/database/flavor/trino_flavor_service.py +21 -0
  41. testgen/common/date_service.py +68 -0
  42. testgen/common/display_service.py +85 -0
  43. testgen/common/docker_service.py +76 -0
  44. testgen/common/encrypt.py +55 -0
  45. testgen/common/get_pipeline_parms.py +57 -0
  46. testgen/common/logs.py +79 -0
  47. testgen/common/process_service.py +62 -0
  48. testgen/common/read_file.py +69 -0
  49. testgen/settings.py +440 -0
  50. testgen/template/dbsetup/010_create_base_schema.sql +2 -0
  51. testgen/template/dbsetup/020_create_standard_functions_sprocs.sql +179 -0
  52. testgen/template/dbsetup/030_initialize_new_schema_structure.sql +735 -0
  53. testgen/template/dbsetup/040_populate_new_schema_project.sql +59 -0
  54. testgen/template/dbsetup/050_populate_new_schema_metadata.sql +1517 -0
  55. testgen/template/dbsetup/060_create_standard_views.sql +248 -0
  56. testgen/template/dbsetup/070_create_default_users.sql +17 -0
  57. testgen/template/dbsetup/075_grant_role_rights.sql +43 -0
  58. testgen/template/dbsetup/080_set_current_revision.sql +5 -0
  59. testgen/template/dbupgrade/0100_incremental_upgrade.sql +5 -0
  60. testgen/template/dbupgrade/0101_incremental_upgrade.sql +15 -0
  61. testgen/template/dbupgrade/0102_incremental_upgrade.sql +4 -0
  62. testgen/template/dbupgrade/0103_incremental_upgrade.sql +22 -0
  63. testgen/template/dbupgrade/0104_incremental_upgrade.sql +44 -0
  64. testgen/template/dbupgrade/0105_incremental_upgrade.sql +1 -0
  65. testgen/template/dbupgrade/0106_incremental_upgrade.sql +5 -0
  66. testgen/template/dbupgrade/0107_incremental_upgrade.sql +3 -0
  67. testgen/template/dbupgrade_helpers/get_tg_revision.sql +2 -0
  68. testgen/template/exec_cat_tests/ex_cat_build_agg_table_tests.sql +116 -0
  69. testgen/template/exec_cat_tests/ex_cat_get_distinct_tables.sql +11 -0
  70. testgen/template/exec_cat_tests/ex_cat_results_parse.sql +69 -0
  71. testgen/template/exec_cat_tests/ex_cat_retrieve_agg_test_parms.sql +6 -0
  72. testgen/template/exec_cat_tests/ex_cat_test_query.sql +8 -0
  73. testgen/template/execution/ex_finalize_test_run_results.sql +37 -0
  74. testgen/template/execution/ex_get_tests_non_cat.sql +47 -0
  75. testgen/template/execution/ex_update_test_record_in_testrun_table.sql +27 -0
  76. testgen/template/execution/ex_write_test_record_to_testrun_table.sql +6 -0
  77. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_no_drops_generic.sql +48 -0
  78. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_num_incr_generic.sql +34 -0
  79. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_above_generic.sql +49 -0
  80. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_within_generic.sql +49 -0
  81. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql +49 -0
  82. testgen/template/flavors/generic/exec_query_tests/ex_custom_query_generic.sql +39 -0
  83. testgen/template/flavors/generic/exec_query_tests/ex_data_match_2way_generic.sql +58 -0
  84. testgen/template/flavors/generic/exec_query_tests/ex_data_match_generic.sql +44 -0
  85. testgen/template/flavors/generic/exec_query_tests/ex_prior_match_generic.sql +37 -0
  86. testgen/template/flavors/generic/exec_query_tests/ex_relative_entropy_generic.sql +53 -0
  87. testgen/template/flavors/generic/exec_query_tests/ex_window_match_no_drops_generic.sql +46 -0
  88. testgen/template/flavors/generic/exec_query_tests/ex_window_match_same_generic.sql +59 -0
  89. testgen/template/flavors/generic/profiling/contingency_counts.sql +3 -0
  90. testgen/template/flavors/generic/validate_tests/ex_get_project_column_list_generic.sql +3 -0
  91. testgen/template/flavors/mssql/exec_query_tests/ex_relative_entropy_mssql.sql +53 -0
  92. testgen/template/flavors/mssql/profiling/project_ddf_query_mssql.sql +35 -0
  93. testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml +246 -0
  94. testgen/template/flavors/mssql/profiling/project_secondary_profiling_query_mssql.sql +36 -0
  95. testgen/template/flavors/mssql/setup_profiling_tools/00_drop_existing_functions_mssql.sql +8 -0
  96. testgen/template/flavors/mssql/setup_profiling_tools/01_create_functions_mssql.sql +12 -0
  97. testgen/template/flavors/mssql/setup_profiling_tools/02_create_functions_mssql.sql +54 -0
  98. testgen/template/flavors/mssql/setup_profiling_tools/create_qc_schema_mssql.sql +4 -0
  99. testgen/template/flavors/mssql/setup_profiling_tools/grant_execute_privileges_mssql.sql +1 -0
  100. testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_no_drops_postgresql.sql +46 -0
  101. testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_same_postgresql.sql +59 -0
  102. testgen/template/flavors/postgresql/profiling/project_ddf_query_postgresql.sql +42 -0
  103. testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml +225 -0
  104. testgen/template/flavors/postgresql/profiling/project_secondary_profiling_query_postgresql.sql +28 -0
  105. testgen/template/flavors/postgresql/setup_profiling_tools/create_functions_postgresql.sql +157 -0
  106. testgen/template/flavors/postgresql/setup_profiling_tools/create_qc_schema_postgresql.sql +1 -0
  107. testgen/template/flavors/postgresql/setup_profiling_tools/grant_execute_privileges_postgresql.sql +2 -0
  108. testgen/template/flavors/redshift/profiling/project_ddf_query_redshift.sql +38 -0
  109. testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml +221 -0
  110. testgen/template/flavors/redshift/profiling/project_secondary_profiling_query_redshift.sql +29 -0
  111. testgen/template/flavors/redshift/setup_profiling_tools/create_functions_redshift.sql +115 -0
  112. testgen/template/flavors/redshift/setup_profiling_tools/create_qc_schema_redshift.sql +1 -0
  113. testgen/template/flavors/redshift/setup_profiling_tools/grant_execute_privileges_redshift.sql +2 -0
  114. testgen/template/flavors/snowflake/profiling/project_ddf_query_snowflake.sql +38 -0
  115. testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml +220 -0
  116. testgen/template/flavors/snowflake/profiling/project_secondary_profiling_query_snowflake.sql +29 -0
  117. testgen/template/flavors/snowflake/setup_profiling_tools/create_functions_snowflake.sql +69 -0
  118. testgen/template/flavors/snowflake/setup_profiling_tools/create_qc_schema_snowflake.sql +1 -0
  119. testgen/template/flavors/snowflake/setup_profiling_tools/grant_execute_privileges_snowflake.sql +6 -0
  120. testgen/template/flavors/trino/profiling/project_profiling_query_trino.yaml +219 -0
  121. testgen/template/flavors/trino/setup_profiling_tools/create_functions_trino.sql +92 -0
  122. testgen/template/flavors/trino/setup_profiling_tools/create_qc_schema_trino.sql +1 -0
  123. testgen/template/gen_funny_cat_tests/gen_test_constant.sql +104 -0
  124. testgen/template/gen_funny_cat_tests/gen_test_distinct_value_ct.sql +98 -0
  125. testgen/template/gen_funny_cat_tests/gen_test_row_ct.sql +57 -0
  126. testgen/template/gen_funny_cat_tests/gen_test_row_ct_pct.sql +59 -0
  127. testgen/template/generation/gen_delete_old_tests.sql +5 -0
  128. testgen/template/generation/gen_insert_test_suite.sql +5 -0
  129. testgen/template/generation/gen_retrieve_or_insert_test_suite.sql +58 -0
  130. testgen/template/generation/gen_standard_test_type_list.sql +13 -0
  131. testgen/template/generation/gen_standard_tests.sql +48 -0
  132. testgen/template/get_entities/get_connection.sql +21 -0
  133. testgen/template/get_entities/get_connections_list.sql +9 -0
  134. testgen/template/get_entities/get_latest.sql +4 -0
  135. testgen/template/get_entities/get_profile.sql +12 -0
  136. testgen/template/get_entities/get_profile_info.sql +17 -0
  137. testgen/template/get_entities/get_profile_list.sql +17 -0
  138. testgen/template/get_entities/get_profile_screen.sql +275 -0
  139. testgen/template/get_entities/get_project_list.sql +6 -0
  140. testgen/template/get_entities/get_table_group_list.sql +10 -0
  141. testgen/template/get_entities/get_test_generation_list.sql +18 -0
  142. testgen/template/get_entities/get_test_info.sql +41 -0
  143. testgen/template/get_entities/get_test_results_for_run_cli.sql +16 -0
  144. testgen/template/get_entities/get_test_run_list.sql +24 -0
  145. testgen/template/get_entities/get_test_suite.sql +13 -0
  146. testgen/template/get_entities/get_test_suite_list.sql +18 -0
  147. testgen/template/get_entities/list_test_types.sql +4 -0
  148. testgen/template/observability/get_event_data.sql +23 -0
  149. testgen/template/observability/get_test_results.sql +41 -0
  150. testgen/template/observability/update_test_results_exported_to_observability.sql +12 -0
  151. testgen/template/parms/parms_profiling.sql +34 -0
  152. testgen/template/parms/parms_test_execution.sql +13 -0
  153. testgen/template/parms/parms_test_gen.sql +23 -0
  154. testgen/template/profiling/contingency_columns.sql +7 -0
  155. testgen/template/profiling/datatype_suggestions.sql +56 -0
  156. testgen/template/profiling/functional_datatype.sql +523 -0
  157. testgen/template/profiling/functional_tabletype_stage.sql +48 -0
  158. testgen/template/profiling/functional_tabletype_update.sql +8 -0
  159. testgen/template/profiling/pii_flag.sql +133 -0
  160. testgen/template/profiling/profile_anomalies_screen_column.sql +22 -0
  161. testgen/template/profiling/profile_anomalies_screen_multi_column.sql +58 -0
  162. testgen/template/profiling/profile_anomalies_screen_table.sql +22 -0
  163. testgen/template/profiling/profile_anomalies_screen_table_dates.sql +30 -0
  164. testgen/template/profiling/profile_anomalies_screen_variants.sql +40 -0
  165. testgen/template/profiling/profile_anomaly_types_get.sql +3 -0
  166. testgen/template/profiling/project_get_table_sample_count.sql +22 -0
  167. testgen/template/profiling/project_profile_run_record_insert.sql +8 -0
  168. testgen/template/profiling/project_profile_run_record_update.sql +5 -0
  169. testgen/template/profiling/project_profile_run_record_update_status.sql +5 -0
  170. testgen/template/profiling/project_update_profile_results_to_estimates.sql +32 -0
  171. testgen/template/profiling/refresh_anomalies.sql +33 -0
  172. testgen/template/profiling/refresh_data_chars_from_profiling.sql +156 -0
  173. testgen/template/profiling/secondary_profiling_columns.sql +12 -0
  174. testgen/template/profiling/secondary_profiling_delete.sql +4 -0
  175. testgen/template/profiling/secondary_profiling_update.sql +18 -0
  176. testgen/template/quick_start/populate_target_data.sql +1077 -0
  177. testgen/template/quick_start/recreate_target_data_schema.sql +167 -0
  178. testgen/template/quick_start/update_target_data.sql +100 -0
  179. testgen/template/updates/create_tmp_test_definition.sql +19 -0
  180. testgen/template/updates/get_test_def_parms.sql +38 -0
  181. testgen/template/updates/populate_stg_test_definitions.sql +184 -0
  182. testgen/template/validate_tests/ex_disable_tests_test_definitions.sql +5 -0
  183. testgen/template/validate_tests/ex_flag_tests_test_definitions.sql +64 -0
  184. testgen/template/validate_tests/ex_get_project_column_list_generic.sql +3 -0
  185. testgen/template/validate_tests/ex_get_test_column_list_tg.sql +65 -0
  186. testgen/template/validate_tests/ex_write_test_val_errors.sql +22 -0
  187. testgen/ui/__init__.py +0 -0
  188. testgen/ui/app.py +98 -0
  189. testgen/ui/assets/dk_logo.svg +46 -0
  190. testgen/ui/assets/question_mark.png +0 -0
  191. testgen/ui/assets/scripts.js +68 -0
  192. testgen/ui/assets/style.css +140 -0
  193. testgen/ui/bootstrap.py +109 -0
  194. testgen/ui/components/__init__.py +0 -0
  195. testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fBBc4.woff2 +0 -0
  196. testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fChc4EsA.woff2 +0 -0
  197. testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu4mxK.woff2 +0 -0
  198. testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu7GxKOzY.woff2 +0 -0
  199. testgen/ui/components/frontend/css/material-symbols-rounded.css +24 -0
  200. testgen/ui/components/frontend/css/material-symbols-rounded.woff2 +0 -0
  201. testgen/ui/components/frontend/css/roboto-font-faces.css +35 -0
  202. testgen/ui/components/frontend/css/shared.css +36 -0
  203. testgen/ui/components/frontend/img/dk_logo.svg +46 -0
  204. testgen/ui/components/frontend/index.html +17 -0
  205. testgen/ui/components/frontend/js/components/breadcrumbs.js +86 -0
  206. testgen/ui/components/frontend/js/components/button.js +66 -0
  207. testgen/ui/components/frontend/js/components/location.js +62 -0
  208. testgen/ui/components/frontend/js/components/select.js +75 -0
  209. testgen/ui/components/frontend/js/components/sidebar.js +358 -0
  210. testgen/ui/components/frontend/js/main.js +99 -0
  211. testgen/ui/components/frontend/js/streamlit.js +19 -0
  212. testgen/ui/components/frontend/js/van.min.js +1 -0
  213. testgen/ui/components/utils/__init__.py +0 -0
  214. testgen/ui/components/utils/callbacks.py +51 -0
  215. testgen/ui/components/utils/component.py +13 -0
  216. testgen/ui/components/widgets/__init__.py +6 -0
  217. testgen/ui/components/widgets/breadcrumbs.py +32 -0
  218. testgen/ui/components/widgets/location.py +65 -0
  219. testgen/ui/components/widgets/modal.py +97 -0
  220. testgen/ui/components/widgets/sidebar.py +69 -0
  221. testgen/ui/navigation/__init__.py +0 -0
  222. testgen/ui/navigation/menu.py +42 -0
  223. testgen/ui/navigation/page.py +20 -0
  224. testgen/ui/navigation/router.py +63 -0
  225. testgen/ui/queries/__init__.py +0 -0
  226. testgen/ui/queries/authentication_queries.py +47 -0
  227. testgen/ui/queries/connection_queries.py +121 -0
  228. testgen/ui/queries/profiling_queries.py +148 -0
  229. testgen/ui/queries/project_queries.py +9 -0
  230. testgen/ui/queries/table_group_queries.py +186 -0
  231. testgen/ui/queries/test_definition_queries.py +270 -0
  232. testgen/ui/queries/test_run_queries.py +32 -0
  233. testgen/ui/queries/test_suite_queries.py +145 -0
  234. testgen/ui/scripts/__init__.py +0 -0
  235. testgen/ui/scripts/patch_streamlit.py +111 -0
  236. testgen/ui/services/__init__.py +0 -0
  237. testgen/ui/services/authentication_service.py +119 -0
  238. testgen/ui/services/connection_service.py +220 -0
  239. testgen/ui/services/database_service.py +282 -0
  240. testgen/ui/services/form_service.py +1008 -0
  241. testgen/ui/services/javascript_service.py +44 -0
  242. testgen/ui/services/query_service.py +316 -0
  243. testgen/ui/services/string_service.py +12 -0
  244. testgen/ui/services/table_group_service.py +130 -0
  245. testgen/ui/services/test_definition_service.py +117 -0
  246. testgen/ui/services/test_run_service.py +13 -0
  247. testgen/ui/services/test_suite_service.py +76 -0
  248. testgen/ui/services/toolbar_service.py +77 -0
  249. testgen/ui/session.py +46 -0
  250. testgen/ui/views/__init__.py +0 -0
  251. testgen/ui/views/app_log_modal.py +92 -0
  252. testgen/ui/views/connections.py +72 -0
  253. testgen/ui/views/connections_base.py +367 -0
  254. testgen/ui/views/login.py +40 -0
  255. testgen/ui/views/not_found.py +16 -0
  256. testgen/ui/views/overview.py +34 -0
  257. testgen/ui/views/profiling_anomalies.py +501 -0
  258. testgen/ui/views/profiling_details.py +335 -0
  259. testgen/ui/views/profiling_modal.py +40 -0
  260. testgen/ui/views/profiling_results.py +206 -0
  261. testgen/ui/views/profiling_summary.py +177 -0
  262. testgen/ui/views/project_settings.py +74 -0
  263. testgen/ui/views/table_groups.py +530 -0
  264. testgen/ui/views/test_definitions.py +1020 -0
  265. testgen/ui/views/test_results.py +908 -0
  266. testgen/ui/views/test_runs.py +195 -0
  267. testgen/ui/views/test_suites.py +545 -0
  268. testgen/utils/__init__.py +0 -0
  269. testgen/utils/plugins.py +17 -0
  270. testgen/utils/singleton.py +14 -0
@@ -0,0 +1,335 @@
1
+ import pandas as pd
2
+ import plotly.express as px
3
+ import plotly.graph_objects as go
4
+ import streamlit as st
5
+
6
+ import testgen.ui.services.database_service as db
7
+ import testgen.ui.services.form_service as fm
8
+
9
+
10
+ @st.cache_data(show_spinner="Retrieving Details")
11
+ def get_profile_screen(str_profile_run_id, str_table_name, str_column_name):
12
+ str_schema = st.session_state["dbschema"]
13
+ # Define the query
14
+ str_sql = f"""
15
+ SELECT pr.column_name, t.anomaly_name, replace(pr.detail, ' | ', ' ') as detail
16
+ FROM {str_schema}.profile_anomaly_results pr
17
+ INNER JOIN {str_schema}.profile_anomaly_types t
18
+ ON (pr.anomaly_id = t.id)
19
+ WHERE pr.profile_run_id = '{str_profile_run_id}'::UUID
20
+ AND pr.table_name = '{str_table_name}'
21
+ AND pr.column_name = '{str_column_name}'
22
+ AND t.anomaly_name <> 'Suggested Data Type'
23
+ ORDER BY anomaly_name;
24
+ """
25
+ # Retrieve and return data as df
26
+ return db.retrieve_data(str_sql)
27
+
28
+
29
+ def reverse_count_category_pairs(input_str):
30
+ # Split the string by ' | ' to get individual elements
31
+ elements = input_str.split(" | ")
32
+ # Initialize an empty list to store reversed pairs
33
+ reversed_pairs = []
34
+ display_pairs = []
35
+
36
+ # Loop to populate the list with reversed pairs
37
+ for i in range(0, len(elements), 2):
38
+ count = elements[i]
39
+ category = elements[i + 1]
40
+
41
+ # Reverse count and category
42
+ reversed_pair = f"{category} | {count}"
43
+ reversed_pairs.append(reversed_pair)
44
+ # Reverse second version, for display on separate lines
45
+ display_pair = f"{category}: {count}"
46
+ display_pairs.append(display_pair)
47
+
48
+ # Join the reversed pairs back into a single string
49
+ reversed_str = " | ".join(reversed_pairs)
50
+
51
+ # Join the reversed pairs back into a single string
52
+ display_str = "<br>".join(display_pairs)
53
+
54
+ return reversed_str, display_str
55
+
56
+
57
+ def write_profile_screen(selected_row):
58
+ df_screen = get_profile_screen(
59
+ selected_row["profile_run_id"], selected_row["table_name"], selected_row["column_name"]
60
+ )
61
+ if not df_screen.empty:
62
+ with st.expander("**Hygiene Issues**"):
63
+ # fm.render_markdown_table(df_screen, ["column_name", "anomaly_name", "detail"])
64
+ st.dataframe(df_screen, use_container_width=True, hide_index=True)
65
+
66
+
67
+ def write_column_header(selected_row, form_data_width):
68
+ str_header = "Profiling Results"
69
+ lst_columns = [
70
+ "column_name",
71
+ "table_name",
72
+ "schema_name",
73
+ "general_type",
74
+ "column_type",
75
+ "semantic_data_type",
76
+ "datatype_suggestion",
77
+ ]
78
+ fm.render_html_list(selected_row, lst_columns, str_header, form_data_width)
79
+
80
+
81
+ def write_shared_header(selected_row, form_data_width):
82
+ str_header = "Data Overview"
83
+ # lst_columns = "record_ct, value_ct, distinct_value_ct, min_length, max_length, avg_length".split(", ")
84
+ lst_columns = "record_ct, value_ct, distinct_value_ct".split(", ")
85
+ fm.render_html_list(selected_row, lst_columns, str_header, form_data_width)
86
+
87
+
88
+ def write_alpha_missing_values(selected_row, form_data_width):
89
+ str_header = "Missing Values"
90
+ lst_columns = "null_value_ct, zero_length_ct, dummy_value_ct, zero_value_ct".split(", ")
91
+ fm.render_html_list(selected_row, lst_columns, str_header, form_data_width)
92
+
93
+
94
+ def write_numeric_missing_values(selected_row, form_data_width):
95
+ str_header = "Missing Values"
96
+ lst_columns = "null_value_ct, zero_value_ct".split(", ")
97
+ fm.render_html_list(selected_row, lst_columns, str_header, form_data_width)
98
+
99
+
100
+ def write_alpha_content_analysis(selected_row, form_data_width):
101
+ str_header = "Content Analysis"
102
+ lst_columns = "numeric_ct, date_ct, includes_digit_ct, embedded_space_ct, avg_embedded_spaces".split(", ")
103
+ fm.render_html_list(selected_row, lst_columns, str_header, form_data_width)
104
+
105
+
106
+ def write_alpha_value_analysis(selected_row, form_data_width):
107
+ str_header = "Value Analysis"
108
+ lst_columns = "min_length, max_length, avg_length, min_text, max_text, top_freq_values, distinct_pattern_ct, top_patterns, std_pattern_match".split(
109
+ ", "
110
+ )
111
+ if selected_row["top_patterns"]:
112
+ # Need to reverse this, as it's saved | NNNN | Category | NNN | Category
113
+ str_top_patterns, str_top_patterns_display = reverse_count_category_pairs(selected_row["top_patterns"])
114
+ selected_row["top_patterns"] = str_top_patterns_display
115
+
116
+ fm.render_html_list(selected_row, lst_columns, str_header, form_data_width)
117
+ # Now reset for graph
118
+ if selected_row["top_patterns"]:
119
+ selected_row["top_patterns"] = str_top_patterns
120
+
121
+
122
+ def write_numeric_value_analysis(selected_row, form_data_width):
123
+ str_header = "Values and Ranges"
124
+ lst_columns = "min_value, min_value_over_0, max_value, min_length, max_length, avg_length".split(", ")
125
+ fm.render_html_list(selected_row, lst_columns, str_header, form_data_width)
126
+
127
+
128
+ def write_stats_value_analysis(selected_row, form_data_width):
129
+ str_header = "Descriptive Statistics"
130
+ lst_columns = "avg_value, stdev_value, percentile_25, percentile_50, percentile_75".split(", ")
131
+ fm.render_html_list(selected_row, lst_columns, str_header, form_data_width)
132
+
133
+
134
+ def write_date_analysis(selected_row, form_data_width):
135
+ str_header = "Date Value Analysis"
136
+ lst_columns = "min_date, max_date, before_1yr_date_ct, before_5yr_date_ct, within_1yr_date_ct, within_1mo_date_ct, future_date_ct".split(
137
+ ", "
138
+ )
139
+ fm.render_html_list(selected_row, lst_columns, str_header, form_data_width)
140
+
141
+
142
+ def write_boolean_analysis(selected_row, form_data_width):
143
+ str_header = "Boolean Value Analysis"
144
+ lst_columns = "boolean_true_ct".split(", ")
145
+ fm.render_html_list(selected_row, lst_columns, str_header, form_data_width)
146
+
147
+
148
+ def write_missing_values_graph(value_ct, null_value_ct, zero_length_ct, dummy_values_ct):
149
+ lst_status = ["Value Present", "Null Value"]
150
+ lst_ct = [value_ct, null_value_ct]
151
+
152
+ if zero_length_ct:
153
+ lst_status.append("Zero-Length")
154
+ lst_ct.append(zero_length_ct)
155
+ if dummy_values_ct:
156
+ lst_status.append("Dummy Value")
157
+ lst_ct.append(dummy_values_ct)
158
+
159
+ dfg = pd.DataFrame({"Status": lst_status, "Count": lst_ct})
160
+
161
+ # fig = px.bar(dfg, x='Count', y='Status', orientation='h', title='Missing Values')
162
+ fig = px.pie(dfg, values="Count", names="Status", title="Missing Values")
163
+ # Show percentage in the pie chart
164
+ fig.update_traces(textinfo="percent+label")
165
+ fig.update_layout(
166
+ width=400,
167
+ title_font={"color": "green"},
168
+ paper_bgcolor="rgba(0,0,0,0)",
169
+ plot_bgcolor="rgba(0,0,0,0)",
170
+ )
171
+
172
+ # Create the stacked bar chart
173
+ st.plotly_chart(fig, use_container_width=True)
174
+
175
+
176
+ def write_top_freq_graph(input_str):
177
+ lines = input_str.strip().split("\n")
178
+
179
+ # Initialize empty lists to store categories and frequencies
180
+ categories = []
181
+ frequencies = []
182
+
183
+ # Loop through each line to extract category and frequency
184
+ for line in lines:
185
+ parts = line.split(" | ")
186
+ # Remove the leading pipe character from the category
187
+ category = parts[0].replace("| ", "").strip()
188
+ frequency = int(parts[1])
189
+
190
+ categories.append(category)
191
+ frequencies.append(frequency)
192
+
193
+ # Create a Pandas DataFrame
194
+ dff = pd.DataFrame({"Value": categories, "Frequency": frequencies})
195
+
196
+ # Calculate the total count and percentages
197
+ total_count = dff["Frequency"].sum()
198
+ dff["pct"] = (dff["Frequency"] / total_count * 100).round(2)
199
+
200
+ # Create the Plotly Express histogram
201
+ fig = px.bar(dff, x="Value", y="Frequency", title="Value Frequency", text=dff["pct"].apply(lambda x: f"{x}%"))
202
+ # Update the trace to position text labels
203
+ fig.update_traces(textposition="outside")
204
+ fig.update_xaxes(type="category")
205
+ fig.update_layout(
206
+ width=400,
207
+ height=500,
208
+ title_font={"color": "green"},
209
+ paper_bgcolor="rgba(0,0,0,0)",
210
+ plot_bgcolor="rgba(0,0,0,0)",
211
+ )
212
+
213
+ st.plotly_chart(fig)
214
+
215
+
216
+ def write_top_patterns_graph(input_str):
217
+ # Split the string by ' | ' to get individual elements
218
+ elements = input_str.split(" | ")
219
+
220
+ # Initialize empty lists to store categories and frequencies
221
+ categories = []
222
+ frequencies = []
223
+
224
+ # Loop to populate the lists with data
225
+ for i in range(0, len(elements), 2):
226
+ categories.append(elements[i])
227
+ frequencies.append(int(elements[i + 1])) # Convert string to integer for count
228
+
229
+ # Create a DataFrame using the populated lists
230
+ dff = pd.DataFrame({"Category": categories, "Frequency": frequencies})
231
+
232
+ # Create the Plotly Express histogram
233
+ fig = px.bar(dff, x="Category", y="Frequency", title="Top Patterns")
234
+ fig.update_layout(
235
+ width=400,
236
+ title_font={"color": "green"},
237
+ paper_bgcolor="rgba(0,0,0,0)",
238
+ plot_bgcolor="rgba(0,0,0,0)",
239
+ )
240
+
241
+ st.plotly_chart(fig)
242
+
243
+
244
+ def write_box_plot(min_value, max_value, avg_value, stdev_value, percentile_25, percentile_75):
245
+ # Pick right IQR values
246
+ iqr_25 = percentile_25 if percentile_25 else avg_value - stdev_value
247
+ iqr_75 = percentile_75 if percentile_75 else avg_value + stdev_value
248
+
249
+ # Create a DataFrame for the box plot
250
+ df = pd.DataFrame(
251
+ {
252
+ # "Value": [min_value, avg_value - stdev_value, avg_value, avg_value + stdev_value, max_value],
253
+ "Value": [min_value, iqr_25, avg_value, iqr_75, max_value],
254
+ "Category": ["Data Distribution"] * 5,
255
+ }
256
+ )
257
+
258
+ # Create a box plot
259
+ fig = px.box(df, y="Value", title="Summary Stats", labels={"Value": "Value"})
260
+
261
+ # Add Dot plot for min, max, and average
262
+ # fig.add_scatter(
263
+ # y=[min_value, avg_value, max_value],
264
+ # mode="markers",
265
+ # marker={"size": [10, 15, 10], "color": ["blue", "green", "red"]},
266
+ # name="Min, Avg, Max",
267
+ # )
268
+
269
+ # Add line for standard deviation
270
+ fig.add_shape(
271
+ go.layout.Shape(
272
+ type="line",
273
+ x0=0.5,
274
+ x1=0.5,
275
+ y0=avg_value - stdev_value,
276
+ y1=avg_value + stdev_value,
277
+ line={
278
+ "color": "Purple",
279
+ "width": 4,
280
+ "dash": "dot",
281
+ },
282
+ )
283
+ )
284
+
285
+ fig.update_layout(
286
+ width=400,
287
+ title_font={"color": "green"},
288
+ paper_bgcolor="rgba(0,0,0,0)",
289
+ plot_bgcolor="rgba(0,0,0,0)",
290
+ )
291
+ st.plotly_chart(fig)
292
+
293
+
294
+ def show_profiling_detail(selected_row, form_data_width=400):
295
+ write_profile_screen(selected_row)
296
+
297
+ layout_column_1, layout_column_2 = st.columns([0.5, 0.5])
298
+
299
+ with layout_column_1:
300
+ write_column_header(selected_row, form_data_width)
301
+ write_shared_header(selected_row, form_data_width)
302
+ if selected_row["general_type_abbr"] == "A":
303
+ write_alpha_missing_values(selected_row, form_data_width)
304
+ write_alpha_content_analysis(selected_row, form_data_width)
305
+ write_alpha_value_analysis(selected_row, form_data_width)
306
+ elif selected_row["general_type_abbr"] == "N":
307
+ write_numeric_missing_values(selected_row, form_data_width)
308
+ write_numeric_value_analysis(selected_row, form_data_width)
309
+ write_stats_value_analysis(selected_row, form_data_width)
310
+ elif selected_row["general_type_abbr"] == "D":
311
+ write_date_analysis(selected_row, form_data_width)
312
+ # elif selected_row['general_type_abbr'] == "T":
313
+ elif selected_row["general_type_abbr"] == "B":
314
+ write_boolean_analysis(selected_row, form_data_width)
315
+
316
+ with layout_column_2:
317
+ if selected_row["avg_value"] is not None:
318
+ write_box_plot(
319
+ selected_row["min_value"],
320
+ selected_row["max_value"],
321
+ selected_row["avg_value"],
322
+ selected_row["stdev_value"],
323
+ selected_row["percentile_25"],
324
+ selected_row["percentile_75"],
325
+ )
326
+ if selected_row["top_freq_values"] is not None:
327
+ write_top_freq_graph(selected_row["top_freq_values"])
328
+ if selected_row["top_patterns"] is not None:
329
+ write_top_patterns_graph(selected_row["top_patterns"])
330
+ write_missing_values_graph(
331
+ selected_row["value_ct"],
332
+ selected_row["null_value_ct"],
333
+ selected_row["zero_length_ct"],
334
+ selected_row["dummy_value_ct"],
335
+ )
@@ -0,0 +1,40 @@
1
+ import logging
2
+
3
+ import streamlit as st
4
+
5
+ import testgen.ui.queries.profiling_queries as profiling_queries
6
+ import testgen.ui.services.form_service as fm
7
+ from testgen.ui.components import widgets as testgen
8
+ from testgen.ui.views.profiling_details import show_profiling_detail
9
+
10
+ LOG = logging.getLogger("testgen")
11
+
12
+ BUTTON_TEXT = ":green[Profiling →]" # Profiling ⚲
13
+ BUTTON_HELP = "Review profiling for highlighted column"
14
+ FORM_HEADER = "Profiling Results"
15
+
16
+
17
+ def view_profiling_modal(button_container, str_table_name, str_column_name,
18
+ str_profile_run_id=None, str_table_groups_id=None):
19
+ str_prompt = f"Column: {str_column_name}, Table: {str_table_name}"
20
+
21
+ modal_viewer = testgen.Modal(title=None, key="dk-view", max_width=1100)
22
+
23
+ with button_container:
24
+ if st.button(
25
+ BUTTON_TEXT, help=BUTTON_HELP, use_container_width=True
26
+ ):
27
+ modal_viewer.open()
28
+
29
+ if modal_viewer.is_open():
30
+ with modal_viewer.container():
31
+ if not str_profile_run_id:
32
+ if str_table_groups_id:
33
+ str_profile_run_id = profiling_queries.get_latest_profile_run(str_table_groups_id)
34
+
35
+ if str_profile_run_id:
36
+ df = profiling_queries.get_profiling_detail(str_profile_run_id, str_table_name, str_column_name)
37
+ if not df.empty:
38
+ fm.render_modal_header(str_title=FORM_HEADER, str_prompt=str_prompt)
39
+ show_profiling_detail(df.iloc[0], 300)
40
+
@@ -0,0 +1,206 @@
1
+ import typing
2
+
3
+ import streamlit as st
4
+
5
+ import testgen.ui.queries.profiling_queries as profiling_queries
6
+ import testgen.ui.services.form_service as fm
7
+ import testgen.ui.services.toolbar_service as tb
8
+ from testgen.common import date_service
9
+ from testgen.ui.navigation.page import Page
10
+ from testgen.ui.session import session
11
+ from testgen.ui.views.profiling_details import show_profiling_detail
12
+
13
+ FORM_DATA_WIDTH = 400
14
+
15
+
16
+ class ProfilingResultsPage(Page):
17
+ path = "profiling/results"
18
+ can_activate: typing.ClassVar = [
19
+ lambda: session.authentication_status or "login",
20
+ ]
21
+
22
+ def render(self) -> None:
23
+ export_container = fm.render_page_header(
24
+ "Data Profiling Results",
25
+ "https://docs.datakitchen.io/article/dataops-testgen-help/investigate-profiling",
26
+ lst_breadcrumbs=[
27
+ {"label": "Overview", "path": "overview"},
28
+ {"label": "Data Profiling", "path": "profiling"},
29
+ {"label": "Profiling Results", "path": None},
30
+ ],
31
+ )
32
+
33
+ if "project" not in st.session_state:
34
+ st.write("Select a Project from the Overview page.")
35
+ else:
36
+ # Retrieve State Variables
37
+
38
+ str_project = st.session_state["project"]
39
+ # Look for drill-down from another page
40
+ if "drill_profile_run" in st.session_state:
41
+ str_profile_run_id = st.session_state["drill_profile_run"]
42
+ else:
43
+ str_profile_run_id = ""
44
+
45
+ # Setup Toolbar
46
+ tool_bar = tb.ToolBar(4, 0, 1, None)
47
+
48
+ # Retrieve Choices data
49
+ if str_profile_run_id:
50
+ # Lookup profiling run date and table group name from passed profile run
51
+ str_lookfor_run_date, str_lookfor_table_group = profiling_queries.lookup_db_parentage_from_run(
52
+ str_profile_run_id
53
+ )
54
+ str_lookfor_run_date = date_service.get_timezoned_timestamp(st.session_state, str_lookfor_run_date)
55
+ else:
56
+ str_lookfor_run_date = ""
57
+ str_lookfor_table_group = ""
58
+
59
+ with tool_bar.long_slots[0]:
60
+ # Prompt for Table Group (with passed default)
61
+ df = profiling_queries.run_table_groups_lookup_query(str_project)
62
+ str_table_groups_id = fm.render_select(
63
+ "Table Group", df, "table_groups_name", "id", True, str_lookfor_table_group, True
64
+ )
65
+
66
+ with tool_bar.long_slots[1]:
67
+ # Prompt for Profile Run (with passed default)
68
+ df = profiling_queries.get_db_profile_run_choices(str_table_groups_id)
69
+ date_service.create_timezoned_column_in_dataframe(
70
+ st.session_state, df, "profile_run_date_with_timezone", "profile_run_date"
71
+ )
72
+ str_profile_run_id = fm.render_select(
73
+ "Profile Run", df, "profile_run_date_with_timezone", "id", True, str_lookfor_run_date, True
74
+ )
75
+
76
+ # Reset passed parameter
77
+ # st.session_state["drill_profile_run"] = None
78
+
79
+ with tool_bar.long_slots[2]:
80
+ # Prompt for Table Name
81
+ df = profiling_queries.run_table_lookup_query(str_table_groups_id)
82
+ str_table_name = fm.render_select("Table Name", df, "table_name", "table_name", False)
83
+
84
+ with tool_bar.long_slots[3]:
85
+ # Prompt for Column Name
86
+ if str_table_name:
87
+ df = profiling_queries.run_column_lookup_query(str_table_groups_id, str_table_name)
88
+ str_column_name = fm.render_select("Column Name", df, "column_name", "column_name", False)
89
+ if not str_column_name:
90
+ # Use SQL wildcard to match all values
91
+ str_column_name = "%%"
92
+ else:
93
+ # Use SQL wildcard to match all values
94
+ str_table_name = "%%"
95
+ str_column_name = "%%"
96
+
97
+ # Display main results grid
98
+ if str_profile_run_id:
99
+ df = profiling_queries.get_profiling_detail(str_profile_run_id, str_table_name, str_column_name)
100
+ show_columns = [
101
+ "schema_name",
102
+ "table_name",
103
+ "column_name",
104
+ "column_type",
105
+ "semantic_data_type",
106
+ "anomalies",
107
+ ]
108
+
109
+ # Show CREATE script button
110
+ if len(df) > 0 and str_table_name != "%%":
111
+ with st.expander("📜 **Table CREATE script with suggested datatypes**"):
112
+ st.code(generate_create_script(df), "sql")
113
+
114
+ selected_row = fm.render_grid_select(df, show_columns)
115
+
116
+ with export_container:
117
+ lst_export_columns = [
118
+ "schema_name",
119
+ "table_name",
120
+ "column_name",
121
+ "position",
122
+ "column_type",
123
+ "general_type",
124
+ "semantic_table_type",
125
+ "semantic_data_type",
126
+ "datatype_suggestion",
127
+ "anomalies",
128
+ "record_ct",
129
+ "value_ct",
130
+ "distinct_value_ct",
131
+ "top_freq_values",
132
+ "null_value_ct",
133
+ "min_length",
134
+ "max_length",
135
+ "avg_length",
136
+ "distinct_std_value_ct",
137
+ "numeric_ct",
138
+ "date_ct",
139
+ "dummy_value_ct",
140
+ "zero_length_ct",
141
+ "lead_space_ct",
142
+ "quoted_value_ct",
143
+ "includes_digit_ct",
144
+ "embedded_space_ct",
145
+ "avg_embedded_spaces",
146
+ "min_text",
147
+ "max_text",
148
+ "std_pattern_match",
149
+ "distinct_pattern_ct",
150
+ "top_patterns",
151
+ "distinct_value_hash",
152
+ "min_value",
153
+ "min_value_over_0",
154
+ "max_value",
155
+ "avg_value",
156
+ "stdev_value",
157
+ "percentile_25",
158
+ "percentile_50",
159
+ "percentile_75",
160
+ "zero_value_ct",
161
+ "fractional_sum",
162
+ "min_date",
163
+ "max_date",
164
+ "before_1yr_date_ct",
165
+ "before_5yr_date_ct",
166
+ "within_1yr_date_ct",
167
+ "within_1mo_date_ct",
168
+ "future_date_ct",
169
+ "date_days_present",
170
+ "date_weeks_present",
171
+ "date_months_present",
172
+ "boolean_true_ct",
173
+ ]
174
+ lst_wrap_columns = ["top_freq_values", "top_patterns"]
175
+ str_caption = "{TIMESTAMP}"
176
+ fm.render_excel_export(df, lst_export_columns, "Profiling Results", str_caption, lst_wrap_columns)
177
+
178
+ # Display profiling for selected row
179
+ if not selected_row:
180
+ st.markdown(":orange[Select a row to see profiling details.]")
181
+ else:
182
+ show_profiling_detail(selected_row[0], FORM_DATA_WIDTH)
183
+ else:
184
+ st.markdown(":orange[Select a profiling run.]")
185
+
186
+
187
+ def generate_create_script(df):
188
+ ddf = df[["schema_name", "table_name", "column_name", "column_type", "datatype_suggestion"]].copy()
189
+ ddf.fillna("", inplace=True)
190
+
191
+ ddf["comment"] = ddf.apply(
192
+ lambda row: "-- WAS " + row["column_type"] if row["column_type"] != row["datatype_suggestion"] else "", axis=1
193
+ )
194
+ max_len_name = ddf.apply(lambda row: len(row["column_name"]), axis=1).max() + 3
195
+ max_len_type = ddf.apply(lambda row: len(row["datatype_suggestion"]), axis=1).max() + 3
196
+
197
+ str_header = f"CREATE TABLE {df.at[0, 'schema_name']}.{ddf.at[0, 'table_name']} ( "
198
+ col_defs = ddf.apply(
199
+ lambda row: f" {row['column_name']:<{max_len_name}} {row['datatype_suggestion']:<{max_len_type}}, {row['comment']}",
200
+ axis=1,
201
+ ).tolist()
202
+ str_footer = ");"
203
+ # Drop final comma in column definitions
204
+ col_defs[-1] = col_defs[-1].replace(", --", " --")
205
+
206
+ return "\n".join([str_header, *list(col_defs), str_footer])