semantic-link-labs 0.12.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (243) hide show
  1. semantic_link_labs-0.12.8.dist-info/METADATA +354 -0
  2. semantic_link_labs-0.12.8.dist-info/RECORD +243 -0
  3. semantic_link_labs-0.12.8.dist-info/WHEEL +5 -0
  4. semantic_link_labs-0.12.8.dist-info/licenses/LICENSE +21 -0
  5. semantic_link_labs-0.12.8.dist-info/top_level.txt +1 -0
  6. sempy_labs/__init__.py +606 -0
  7. sempy_labs/_a_lib_info.py +2 -0
  8. sempy_labs/_ai.py +437 -0
  9. sempy_labs/_authentication.py +264 -0
  10. sempy_labs/_bpa_translation/_model/_translations_am-ET.po +869 -0
  11. sempy_labs/_bpa_translation/_model/_translations_ar-AE.po +908 -0
  12. sempy_labs/_bpa_translation/_model/_translations_bg-BG.po +968 -0
  13. sempy_labs/_bpa_translation/_model/_translations_ca-ES.po +963 -0
  14. sempy_labs/_bpa_translation/_model/_translations_cs-CZ.po +943 -0
  15. sempy_labs/_bpa_translation/_model/_translations_da-DK.po +945 -0
  16. sempy_labs/_bpa_translation/_model/_translations_de-DE.po +988 -0
  17. sempy_labs/_bpa_translation/_model/_translations_el-GR.po +993 -0
  18. sempy_labs/_bpa_translation/_model/_translations_es-ES.po +971 -0
  19. sempy_labs/_bpa_translation/_model/_translations_fa-IR.po +933 -0
  20. sempy_labs/_bpa_translation/_model/_translations_fi-FI.po +942 -0
  21. sempy_labs/_bpa_translation/_model/_translations_fr-FR.po +994 -0
  22. sempy_labs/_bpa_translation/_model/_translations_ga-IE.po +967 -0
  23. sempy_labs/_bpa_translation/_model/_translations_he-IL.po +902 -0
  24. sempy_labs/_bpa_translation/_model/_translations_hi-IN.po +944 -0
  25. sempy_labs/_bpa_translation/_model/_translations_hu-HU.po +963 -0
  26. sempy_labs/_bpa_translation/_model/_translations_id-ID.po +946 -0
  27. sempy_labs/_bpa_translation/_model/_translations_is-IS.po +939 -0
  28. sempy_labs/_bpa_translation/_model/_translations_it-IT.po +986 -0
  29. sempy_labs/_bpa_translation/_model/_translations_ja-JP.po +846 -0
  30. sempy_labs/_bpa_translation/_model/_translations_ko-KR.po +839 -0
  31. sempy_labs/_bpa_translation/_model/_translations_mt-MT.po +967 -0
  32. sempy_labs/_bpa_translation/_model/_translations_nl-NL.po +978 -0
  33. sempy_labs/_bpa_translation/_model/_translations_pl-PL.po +962 -0
  34. sempy_labs/_bpa_translation/_model/_translations_pt-BR.po +962 -0
  35. sempy_labs/_bpa_translation/_model/_translations_pt-PT.po +957 -0
  36. sempy_labs/_bpa_translation/_model/_translations_ro-RO.po +968 -0
  37. sempy_labs/_bpa_translation/_model/_translations_ru-RU.po +964 -0
  38. sempy_labs/_bpa_translation/_model/_translations_sk-SK.po +952 -0
  39. sempy_labs/_bpa_translation/_model/_translations_sl-SL.po +950 -0
  40. sempy_labs/_bpa_translation/_model/_translations_sv-SE.po +942 -0
  41. sempy_labs/_bpa_translation/_model/_translations_ta-IN.po +976 -0
  42. sempy_labs/_bpa_translation/_model/_translations_te-IN.po +947 -0
  43. sempy_labs/_bpa_translation/_model/_translations_th-TH.po +924 -0
  44. sempy_labs/_bpa_translation/_model/_translations_tr-TR.po +953 -0
  45. sempy_labs/_bpa_translation/_model/_translations_uk-UA.po +961 -0
  46. sempy_labs/_bpa_translation/_model/_translations_zh-CN.po +804 -0
  47. sempy_labs/_bpa_translation/_model/_translations_zu-ZA.po +969 -0
  48. sempy_labs/_capacities.py +1198 -0
  49. sempy_labs/_capacity_migration.py +660 -0
  50. sempy_labs/_clear_cache.py +351 -0
  51. sempy_labs/_connections.py +610 -0
  52. sempy_labs/_dashboards.py +69 -0
  53. sempy_labs/_data_access_security.py +98 -0
  54. sempy_labs/_data_pipelines.py +162 -0
  55. sempy_labs/_dataflows.py +668 -0
  56. sempy_labs/_dax.py +501 -0
  57. sempy_labs/_daxformatter.py +80 -0
  58. sempy_labs/_delta_analyzer.py +467 -0
  59. sempy_labs/_delta_analyzer_history.py +301 -0
  60. sempy_labs/_dictionary_diffs.py +221 -0
  61. sempy_labs/_documentation.py +147 -0
  62. sempy_labs/_domains.py +51 -0
  63. sempy_labs/_eventhouses.py +182 -0
  64. sempy_labs/_external_data_shares.py +230 -0
  65. sempy_labs/_gateways.py +521 -0
  66. sempy_labs/_generate_semantic_model.py +521 -0
  67. sempy_labs/_get_connection_string.py +84 -0
  68. sempy_labs/_git.py +543 -0
  69. sempy_labs/_graphQL.py +90 -0
  70. sempy_labs/_helper_functions.py +2833 -0
  71. sempy_labs/_icons.py +149 -0
  72. sempy_labs/_job_scheduler.py +609 -0
  73. sempy_labs/_kql_databases.py +149 -0
  74. sempy_labs/_kql_querysets.py +124 -0
  75. sempy_labs/_kusto.py +137 -0
  76. sempy_labs/_labels.py +124 -0
  77. sempy_labs/_list_functions.py +1720 -0
  78. sempy_labs/_managed_private_endpoints.py +253 -0
  79. sempy_labs/_mirrored_databases.py +416 -0
  80. sempy_labs/_mirrored_warehouses.py +60 -0
  81. sempy_labs/_ml_experiments.py +113 -0
  82. sempy_labs/_model_auto_build.py +140 -0
  83. sempy_labs/_model_bpa.py +557 -0
  84. sempy_labs/_model_bpa_bulk.py +378 -0
  85. sempy_labs/_model_bpa_rules.py +859 -0
  86. sempy_labs/_model_dependencies.py +343 -0
  87. sempy_labs/_mounted_data_factories.py +123 -0
  88. sempy_labs/_notebooks.py +441 -0
  89. sempy_labs/_one_lake_integration.py +151 -0
  90. sempy_labs/_onelake.py +131 -0
  91. sempy_labs/_query_scale_out.py +433 -0
  92. sempy_labs/_refresh_semantic_model.py +435 -0
  93. sempy_labs/_semantic_models.py +468 -0
  94. sempy_labs/_spark.py +455 -0
  95. sempy_labs/_sql.py +241 -0
  96. sempy_labs/_sql_audit_settings.py +207 -0
  97. sempy_labs/_sql_endpoints.py +214 -0
  98. sempy_labs/_tags.py +201 -0
  99. sempy_labs/_translations.py +43 -0
  100. sempy_labs/_user_delegation_key.py +44 -0
  101. sempy_labs/_utils.py +79 -0
  102. sempy_labs/_vertipaq.py +1021 -0
  103. sempy_labs/_vpax.py +388 -0
  104. sempy_labs/_warehouses.py +234 -0
  105. sempy_labs/_workloads.py +140 -0
  106. sempy_labs/_workspace_identity.py +72 -0
  107. sempy_labs/_workspaces.py +595 -0
  108. sempy_labs/admin/__init__.py +170 -0
  109. sempy_labs/admin/_activities.py +167 -0
  110. sempy_labs/admin/_apps.py +145 -0
  111. sempy_labs/admin/_artifacts.py +65 -0
  112. sempy_labs/admin/_basic_functions.py +463 -0
  113. sempy_labs/admin/_capacities.py +508 -0
  114. sempy_labs/admin/_dataflows.py +45 -0
  115. sempy_labs/admin/_datasets.py +186 -0
  116. sempy_labs/admin/_domains.py +522 -0
  117. sempy_labs/admin/_external_data_share.py +100 -0
  118. sempy_labs/admin/_git.py +72 -0
  119. sempy_labs/admin/_items.py +265 -0
  120. sempy_labs/admin/_labels.py +211 -0
  121. sempy_labs/admin/_reports.py +241 -0
  122. sempy_labs/admin/_scanner.py +118 -0
  123. sempy_labs/admin/_shared.py +82 -0
  124. sempy_labs/admin/_sharing_links.py +110 -0
  125. sempy_labs/admin/_tags.py +131 -0
  126. sempy_labs/admin/_tenant.py +503 -0
  127. sempy_labs/admin/_tenant_keys.py +89 -0
  128. sempy_labs/admin/_users.py +140 -0
  129. sempy_labs/admin/_workspaces.py +236 -0
  130. sempy_labs/deployment_pipeline/__init__.py +23 -0
  131. sempy_labs/deployment_pipeline/_items.py +580 -0
  132. sempy_labs/directlake/__init__.py +57 -0
  133. sempy_labs/directlake/_autosync.py +58 -0
  134. sempy_labs/directlake/_directlake_schema_compare.py +120 -0
  135. sempy_labs/directlake/_directlake_schema_sync.py +161 -0
  136. sempy_labs/directlake/_dl_helper.py +274 -0
  137. sempy_labs/directlake/_generate_shared_expression.py +94 -0
  138. sempy_labs/directlake/_get_directlake_lakehouse.py +62 -0
  139. sempy_labs/directlake/_get_shared_expression.py +34 -0
  140. sempy_labs/directlake/_guardrails.py +96 -0
  141. sempy_labs/directlake/_list_directlake_model_calc_tables.py +70 -0
  142. sempy_labs/directlake/_show_unsupported_directlake_objects.py +90 -0
  143. sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +239 -0
  144. sempy_labs/directlake/_update_directlake_partition_entity.py +259 -0
  145. sempy_labs/directlake/_warm_cache.py +236 -0
  146. sempy_labs/dotnet_lib/dotnet.runtime.config.json +10 -0
  147. sempy_labs/environment/__init__.py +23 -0
  148. sempy_labs/environment/_items.py +212 -0
  149. sempy_labs/environment/_pubstage.py +223 -0
  150. sempy_labs/eventstream/__init__.py +37 -0
  151. sempy_labs/eventstream/_items.py +263 -0
  152. sempy_labs/eventstream/_topology.py +652 -0
  153. sempy_labs/graph/__init__.py +59 -0
  154. sempy_labs/graph/_groups.py +651 -0
  155. sempy_labs/graph/_sensitivity_labels.py +120 -0
  156. sempy_labs/graph/_teams.py +125 -0
  157. sempy_labs/graph/_user_licenses.py +96 -0
  158. sempy_labs/graph/_users.py +516 -0
  159. sempy_labs/graph_model/__init__.py +15 -0
  160. sempy_labs/graph_model/_background_jobs.py +63 -0
  161. sempy_labs/graph_model/_items.py +149 -0
  162. sempy_labs/lakehouse/__init__.py +67 -0
  163. sempy_labs/lakehouse/_blobs.py +247 -0
  164. sempy_labs/lakehouse/_get_lakehouse_columns.py +102 -0
  165. sempy_labs/lakehouse/_get_lakehouse_tables.py +274 -0
  166. sempy_labs/lakehouse/_helper.py +250 -0
  167. sempy_labs/lakehouse/_lakehouse.py +351 -0
  168. sempy_labs/lakehouse/_livy_sessions.py +143 -0
  169. sempy_labs/lakehouse/_materialized_lake_views.py +157 -0
  170. sempy_labs/lakehouse/_partitioning.py +165 -0
  171. sempy_labs/lakehouse/_schemas.py +217 -0
  172. sempy_labs/lakehouse/_shortcuts.py +440 -0
  173. sempy_labs/migration/__init__.py +35 -0
  174. sempy_labs/migration/_create_pqt_file.py +238 -0
  175. sempy_labs/migration/_direct_lake_to_import.py +105 -0
  176. sempy_labs/migration/_migrate_calctables_to_lakehouse.py +398 -0
  177. sempy_labs/migration/_migrate_calctables_to_semantic_model.py +148 -0
  178. sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +533 -0
  179. sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +172 -0
  180. sempy_labs/migration/_migration_validation.py +71 -0
  181. sempy_labs/migration/_refresh_calc_tables.py +131 -0
  182. sempy_labs/mirrored_azure_databricks_catalog/__init__.py +15 -0
  183. sempy_labs/mirrored_azure_databricks_catalog/_discover.py +213 -0
  184. sempy_labs/mirrored_azure_databricks_catalog/_refresh_catalog_metadata.py +45 -0
  185. sempy_labs/ml_model/__init__.py +23 -0
  186. sempy_labs/ml_model/_functions.py +427 -0
  187. sempy_labs/report/_BPAReportTemplate.json +232 -0
  188. sempy_labs/report/__init__.py +55 -0
  189. sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +9 -0
  190. sempy_labs/report/_bpareporttemplate/.platform +11 -0
  191. sempy_labs/report/_bpareporttemplate/StaticResources/SharedResources/BaseThemes/CY24SU06.json +710 -0
  192. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/page.json +11 -0
  193. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/1b08bce3bebabb0a27a8/visual.json +191 -0
  194. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/2f22ddb70c301693c165/visual.json +438 -0
  195. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/3b1182230aa6c600b43a/visual.json +127 -0
  196. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/58577ba6380c69891500/visual.json +576 -0
  197. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/a2a8fa5028b3b776c96c/visual.json +207 -0
  198. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/adfd47ef30652707b987/visual.json +506 -0
  199. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/b6a80ee459e716e170b1/visual.json +127 -0
  200. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/ce3130a721c020cc3d81/visual.json +513 -0
  201. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/page.json +8 -0
  202. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/visuals/66e60dfb526437cd78d1/visual.json +112 -0
  203. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/page.json +11 -0
  204. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/07deb8bce824e1be37d7/visual.json +513 -0
  205. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0b1c68838818b32ad03b/visual.json +352 -0
  206. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0c171de9d2683d10b930/visual.json +37 -0
  207. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0efa01be0510e40a645e/visual.json +542 -0
  208. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/6bf2f0eb830ab53cc668/visual.json +221 -0
  209. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/88d8141cb8500b60030c/visual.json +127 -0
  210. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/a753273590beed656a03/visual.json +576 -0
  211. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/b8fdc82cddd61ac447bc/visual.json +127 -0
  212. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/page.json +9 -0
  213. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/visuals/ce8532a7e25020271077/visual.json +38 -0
  214. sempy_labs/report/_bpareporttemplate/definition/pages/pages.json +10 -0
  215. sempy_labs/report/_bpareporttemplate/definition/report.json +176 -0
  216. sempy_labs/report/_bpareporttemplate/definition/version.json +4 -0
  217. sempy_labs/report/_bpareporttemplate/definition.pbir +14 -0
  218. sempy_labs/report/_download_report.py +76 -0
  219. sempy_labs/report/_export_report.py +257 -0
  220. sempy_labs/report/_generate_report.py +427 -0
  221. sempy_labs/report/_paginated.py +76 -0
  222. sempy_labs/report/_report_bpa.py +354 -0
  223. sempy_labs/report/_report_bpa_rules.py +115 -0
  224. sempy_labs/report/_report_functions.py +581 -0
  225. sempy_labs/report/_report_helper.py +227 -0
  226. sempy_labs/report/_report_list_functions.py +110 -0
  227. sempy_labs/report/_report_rebind.py +149 -0
  228. sempy_labs/report/_reportwrapper.py +3100 -0
  229. sempy_labs/report/_save_report.py +147 -0
  230. sempy_labs/snowflake_database/__init__.py +10 -0
  231. sempy_labs/snowflake_database/_items.py +105 -0
  232. sempy_labs/sql_database/__init__.py +21 -0
  233. sempy_labs/sql_database/_items.py +201 -0
  234. sempy_labs/sql_database/_mirroring.py +79 -0
  235. sempy_labs/theme/__init__.py +12 -0
  236. sempy_labs/theme/_org_themes.py +129 -0
  237. sempy_labs/tom/__init__.py +3 -0
  238. sempy_labs/tom/_model.py +5977 -0
  239. sempy_labs/variable_library/__init__.py +19 -0
  240. sempy_labs/variable_library/_functions.py +403 -0
  241. sempy_labs/warehouse/__init__.py +28 -0
  242. sempy_labs/warehouse/_items.py +234 -0
  243. sempy_labs/warehouse/_restore_points.py +309 -0
@@ -0,0 +1,1021 @@
1
+ import sempy.fabric as fabric
2
+ import pandas as pd
3
+ from IPython.display import display, HTML
4
+ import zipfile
5
+ import os
6
+ import shutil
7
+ import datetime
8
+ import warnings
9
+ from sempy_labs._helper_functions import (
10
+ format_dax_object_name,
11
+ save_as_delta_table,
12
+ resolve_workspace_capacity,
13
+ _get_column_aggregate,
14
+ resolve_workspace_name_and_id,
15
+ resolve_dataset_name_and_id,
16
+ _create_spark_session,
17
+ resolve_workspace_id,
18
+ resolve_workspace_name,
19
+ )
20
+ from sempy_labs._list_functions import list_relationships, list_tables
21
+ from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
22
+ from typing import Optional
23
+ from sempy._utils._log import log
24
+ import sempy_labs._icons as icons
25
+ from pathlib import Path
26
+ from uuid import UUID
27
+
28
+
29
+ @log
30
+ def vertipaq_analyzer(
31
+ dataset: str | UUID,
32
+ workspace: Optional[str | UUID] = None,
33
+ export: Optional[str] = None,
34
+ read_stats_from_data: bool = False,
35
+ **kwargs,
36
+ ) -> dict[str, pd.DataFrame]:
37
+ """
38
+ Displays an HTML visualization of the `Vertipaq Analyzer <https://www.sqlbi.com/tools/vertipaq-analyzer/>`_ statistics from a semantic model.
39
+
40
+ `Vertipaq Analyzer <https://www.sqlbi.com/tools/vertipaq-analyzer/>`_ is an open-sourced tool built by SQLBI. It provides a detailed analysis of the VertiPaq engine, which is the in-memory engine used by Power BI and Analysis Services Tabular models.
41
+
42
+ Parameters
43
+ ----------
44
+ dataset : str | uuid.UUID
45
+ Name or ID of the semantic model.
46
+ workspace : str| uuid.UUID, default=None
47
+ The Fabric workspace name or ID in which the semantic model exists.
48
+ Defaults to None which resolves to the workspace of the attached lakehouse
49
+ or if no lakehouse attached, resolves to the workspace of the notebook.
50
+ export : str, default=None
51
+ Specifying 'zip' will export the results to a zip file in your lakehouse (which can be imported using the import_vertipaq_analyzer function.
52
+ Specifying 'table' will export the results to delta tables (appended) in your lakehouse.
53
+ Default value: None.
54
+ read_stats_from_data : bool, default=False
55
+ Setting this parameter to true has the function get Column Cardinality and Missing Rows using DAX (Direct Lake semantic models achieve this using a Spark query to the lakehouse).
56
+
57
+ Returns
58
+ -------
59
+ dict[str, pandas.DataFrame]
60
+ A dictionary of pandas dataframes showing the vertipaq analyzer statistics.
61
+ """
62
+
63
+ from sempy_labs.tom import connect_semantic_model
64
+
65
+ if "lakehouse_workspace" in kwargs:
66
+ print(
67
+ f"{icons.info} The 'lakehouse_workspace' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
68
+ )
69
+ del kwargs["lakehouse_workspace"]
70
+
71
+ pd.options.mode.copy_on_write = True
72
+ warnings.filterwarnings(
73
+ "ignore", message="createDataFrame attempted Arrow optimization*"
74
+ )
75
+
76
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
77
+ (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
78
+
79
+ fabric.refresh_tom_cache(workspace=workspace)
80
+
81
+ vertipaq_map = {
82
+ "Model": {
83
+ "Dataset Name": [icons.data_type_string, icons.no_format],
84
+ "Total Size": [icons.data_type_long, icons.int_format],
85
+ "Table Count": [icons.data_type_long, icons.int_format],
86
+ "Column Count": [icons.data_type_long, icons.int_format],
87
+ "Compatibility Level": [icons.data_type_long, icons.no_format],
88
+ "Default Mode": [icons.data_type_string, icons.no_format],
89
+ },
90
+ "Tables": {
91
+ "Table Name": [icons.data_type_string, icons.no_format],
92
+ "Type": [icons.data_type_string, icons.no_format],
93
+ "Row Count": [icons.data_type_long, icons.int_format],
94
+ "Total Size": [icons.data_type_long, icons.int_format],
95
+ "Dictionary Size": [icons.data_type_long, icons.int_format],
96
+ "Data Size": [icons.data_type_long, icons.int_format],
97
+ "Hierarchy Size": [icons.data_type_long, icons.int_format],
98
+ "Relationship Size": [icons.data_type_long, icons.int_format],
99
+ "User Hierarchy Size": [icons.data_type_long, icons.int_format],
100
+ "Partitions": [icons.data_type_long, icons.int_format],
101
+ "Columns": [icons.data_type_long, icons.int_format],
102
+ "% DB": [icons.data_type_double, icons.pct_format],
103
+ },
104
+ "Partitions": {
105
+ "Table Name": [icons.data_type_string, icons.no_format],
106
+ "Partition Name": [icons.data_type_string, icons.no_format],
107
+ "Mode": [icons.data_type_string, icons.no_format],
108
+ "Record Count": [icons.data_type_long, icons.int_format],
109
+ "Segment Count": [icons.data_type_long, icons.int_format],
110
+ "Records per Segment": [icons.data_type_double, icons.int_format],
111
+ },
112
+ "Columns": {
113
+ "Table Name": [icons.data_type_string, icons.no_format],
114
+ "Column Name": [icons.data_type_string, icons.no_format],
115
+ "Type": [icons.data_type_string, icons.no_format],
116
+ "Cardinality": [icons.data_type_long, icons.int_format],
117
+ "Total Size": [icons.data_type_long, icons.int_format],
118
+ "Data Size": [icons.data_type_long, icons.int_format],
119
+ "Dictionary Size": [icons.data_type_long, icons.int_format],
120
+ "Hierarchy Size": [icons.data_type_long, icons.int_format],
121
+ "% Table": [icons.data_type_double, icons.pct_format],
122
+ "% DB": [icons.data_type_double, icons.pct_format],
123
+ "Data Type": [icons.data_type_string, icons.no_format],
124
+ "Encoding": [icons.data_type_string, icons.no_format],
125
+ "Is Resident": [icons.data_type_bool, icons.no_format],
126
+ "Temperature": [icons.data_type_double, icons.int_format],
127
+ "Last Accessed": [icons.data_type_timestamp, icons.no_format],
128
+ },
129
+ "Hierarchies": {
130
+ "Table Name": [icons.data_type_string, icons.no_format],
131
+ "Hierarchy Name": [icons.data_type_string, icons.no_format],
132
+ "Used Size": [icons.data_type_long, icons.int_format],
133
+ },
134
+ "Relationships": {
135
+ "From Object": [icons.data_type_string, icons.no_format],
136
+ "To Object": [icons.data_type_string, icons.no_format],
137
+ "Multiplicity": [icons.data_type_string, icons.no_format],
138
+ "Used Size": [icons.data_type_long, icons.int_format],
139
+ "Max From Cardinality": [icons.data_type_long, icons.int_format],
140
+ "Max To Cardinality": [icons.data_type_long, icons.int_format],
141
+ "Missing Rows": [icons.data_type_long, icons.int_format],
142
+ },
143
+ }
144
+
145
+ with connect_semantic_model(
146
+ dataset=dataset_id, workspace=workspace_id, readonly=True
147
+ ) as tom:
148
+ compat_level = tom.model.Model.Database.CompatibilityLevel
149
+ is_direct_lake = tom.is_direct_lake()
150
+ def_mode = tom.model.DefaultMode
151
+ table_count = tom.model.Tables.Count
152
+ column_count = len(list(tom.all_columns()))
153
+ if table_count == 0:
154
+ print(
155
+ f"{icons.warning} The '{dataset_name}' semantic model within the '{workspace_name}' workspace has no tables. Vertipaq Analyzer can only be run if the semantic model has tables."
156
+ )
157
+ return
158
+
159
+ dfT = list_tables(dataset=dataset_id, extended=True, workspace=workspace_id)
160
+
161
+ dfT.rename(columns={"Name": "Table Name"}, inplace=True)
162
+ columns_to_keep = list(vertipaq_map["Tables"].keys())
163
+ dfT = dfT[dfT.columns.intersection(columns_to_keep)]
164
+
165
+ dfC = fabric.list_columns(dataset=dataset_id, extended=True, workspace=workspace_id)
166
+ dfC["Column Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"])
167
+ dfC.rename(columns={"Column Cardinality": "Cardinality"}, inplace=True)
168
+ dfH = fabric.list_hierarchies(
169
+ dataset=dataset_id, extended=True, workspace=workspace_id
170
+ )
171
+ dfR = list_relationships(dataset=dataset_id, extended=True, workspace=workspace_id)
172
+ dfP = fabric.list_partitions(
173
+ dataset=dataset_id, extended=True, workspace=workspace_id
174
+ )
175
+
176
+ artifact_type = None
177
+ lakehouse_workspace_id = None
178
+ lakehouse_name = None
179
+ # if is_direct_lake:
180
+ # artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
181
+ # get_direct_lake_source(dataset=dataset_id, workspace=workspace_id)
182
+ # )
183
+
184
+ dfR["Missing Rows"] = 0
185
+ dfR["Missing Rows"] = dfR["Missing Rows"].astype(int)
186
+
187
+ # Direct Lake
188
+ if read_stats_from_data:
189
+ if is_direct_lake and artifact_type == "Lakehouse":
190
+ dfC = pd.merge(
191
+ dfC,
192
+ dfP[["Table Name", "Query", "Source Type"]],
193
+ on="Table Name",
194
+ how="left",
195
+ )
196
+ dfC_flt = dfC[
197
+ (dfC["Source Type"] == "Entity")
198
+ & (~dfC["Column Name"].str.startswith("RowNumber-"))
199
+ ]
200
+
201
+ object_workspace = resolve_workspace_name(
202
+ workspace_id=lakehouse_workspace_id
203
+ )
204
+ current_workspace_id = resolve_workspace_id()
205
+ if current_workspace_id != lakehouse_workspace_id:
206
+ lakeTables = get_lakehouse_tables(
207
+ lakehouse=lakehouse_name, workspace=object_workspace
208
+ )
209
+
210
+ sql_statements = []
211
+ spark = _create_spark_session()
212
+ # Loop through tables
213
+ for lakeTName in dfC_flt["Query"].unique():
214
+ query = "SELECT "
215
+ columns_in_table = dfC_flt.loc[
216
+ dfC_flt["Query"] == lakeTName, "Source"
217
+ ].unique()
218
+
219
+ # Loop through columns within those tables
220
+ for scName in columns_in_table:
221
+ query = query + f"COUNT(DISTINCT(`{scName}`)) AS `{scName}`, "
222
+
223
+ query = query[:-2]
224
+ if lakehouse_workspace_id == current_workspace_id:
225
+ query = query + f" FROM {lakehouse_name}.{lakeTName}"
226
+ else:
227
+ lakeTables_filt = lakeTables[lakeTables["Table Name"] == lakeTName]
228
+ tPath = lakeTables_filt["Location"].iloc[0]
229
+
230
+ df = spark.read.format("delta").load(tPath)
231
+ tempTableName = "delta_table_" + lakeTName
232
+ df.createOrReplaceTempView(tempTableName)
233
+ query = query + f" FROM {tempTableName}"
234
+ sql_statements.append((lakeTName, query))
235
+
236
+ for o in sql_statements:
237
+ tName = o[0]
238
+ query = o[1]
239
+
240
+ df = spark.sql(query)
241
+
242
+ for column in df.columns:
243
+ x = df.collect()[0][column]
244
+ for i, r in dfC.iterrows():
245
+ if r["Query"] == tName and r["Source"] == column:
246
+ dfC.at[i, "Cardinality"] = x
247
+
248
+ # Remove column added temporarily
249
+ dfC.drop(columns=["Query", "Source Type"], inplace=True)
250
+
251
+ # Direct Lake missing rows
252
+ dfR = pd.merge(
253
+ dfR,
254
+ dfP[["Table Name", "Query"]],
255
+ left_on="From Table",
256
+ right_on="Table Name",
257
+ how="left",
258
+ )
259
+ dfR.rename(columns={"Query": "From Lake Table"}, inplace=True)
260
+ dfR.drop(columns=["Table Name"], inplace=True)
261
+ dfR = pd.merge(
262
+ dfR,
263
+ dfP[["Table Name", "Query"]],
264
+ left_on="To Table",
265
+ right_on="Table Name",
266
+ how="left",
267
+ )
268
+ dfR.rename(columns={"Query": "To Lake Table"}, inplace=True)
269
+ dfR.drop(columns=["Table Name"], inplace=True)
270
+ dfR = pd.merge(
271
+ dfR,
272
+ dfC[["Column Object", "Source"]],
273
+ left_on="From Object",
274
+ right_on="Column Object",
275
+ how="left",
276
+ )
277
+ dfR.rename(columns={"Source": "From Lake Column"}, inplace=True)
278
+ dfR.drop(columns=["Column Object"], inplace=True)
279
+ dfR = pd.merge(
280
+ dfR,
281
+ dfC[["Column Object", "Source"]],
282
+ left_on="To Object",
283
+ right_on="Column Object",
284
+ how="left",
285
+ )
286
+ dfR.rename(columns={"Source": "To Lake Column"}, inplace=True)
287
+ dfR.drop(columns=["Column Object"], inplace=True)
288
+
289
+ spark = _create_spark_session()
290
+ for i, r in dfR.iterrows():
291
+ fromTable = r["From Lake Table"]
292
+ fromColumn = r["From Lake Column"]
293
+ toTable = r["To Lake Table"]
294
+ toColumn = r["To Lake Column"]
295
+
296
+ if lakehouse_workspace_id == current_workspace_id:
297
+ query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {fromTable} as f\nleft join {toTable} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null"
298
+ else:
299
+ tempTableFrom = f"delta_table_{fromTable}"
300
+ tempTableTo = f"delta_table_{toTable}"
301
+
302
+ query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {tempTableFrom} as f\nleft join {tempTableTo} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null"
303
+
304
+ # query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {fromTable} as f\nleft join {toTable} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null"
305
+
306
+ df = spark.sql(query)
307
+ missingRows = df.collect()[0][0]
308
+ dfR.at[i, "Missing Rows"] = missingRows
309
+
310
+ dfR["Missing Rows"] = dfR["Missing Rows"].astype(int)
311
+ elif not is_direct_lake:
312
+ # Calculate missing rows using DAX for non-direct lake
313
+ for i, r in dfR.iterrows():
314
+ fromTable = r["From Table"]
315
+ fromColumn = r["From Column"]
316
+ toTable = r["To Table"]
317
+ toColumn = r["To Column"]
318
+ isActive = bool(r["Active"])
319
+ fromObject = format_dax_object_name(fromTable, fromColumn)
320
+ toObject = format_dax_object_name(toTable, toColumn)
321
+
322
+ missingRows = 0
323
+
324
+ query = f"evaluate\nsummarizecolumns(\n\"1\",calculate(countrows('{fromTable}'),isblank({toObject}))\n)"
325
+
326
+ if not isActive:
327
+ query = f"evaluate\nsummarizecolumns(\n\"1\",calculate(countrows('{fromTable}'),userelationship({fromObject},{toObject}),isblank({toObject}))\n)"
328
+
329
+ result = fabric.evaluate_dax(
330
+ dataset=dataset_id, dax_string=query, workspace=workspace_id
331
+ )
332
+
333
+ try:
334
+ missingRows = result.iloc[0, 0]
335
+ except Exception:
336
+ pass
337
+
338
+ dfR.at[i, "Missing Rows"] = missingRows
339
+ dfR["Missing Rows"] = dfR["Missing Rows"].astype(int)
340
+
341
+ table_totals = dfC.groupby("Table Name")["Total Size"].transform("sum")
342
+ db_total_size = dfC["Total Size"].sum()
343
+ dfC["% Table"] = round((dfC["Total Size"] / table_totals) * 100, 2)
344
+ dfC["% DB"] = round((dfC["Total Size"] / db_total_size) * 100, 2)
345
+ columnList = list(vertipaq_map["Columns"].keys())
346
+
347
+ dfC = dfC[dfC["Type"] != "RowNumber"].reset_index(drop=True)
348
+
349
+ colSize = dfC[columnList].sort_values(by="Total Size", ascending=False)
350
+ temp = dfC[columnList].sort_values(by="Temperature", ascending=False)
351
+ colSize.reset_index(drop=True, inplace=True)
352
+ temp.reset_index(drop=True, inplace=True)
353
+
354
+ export_Col = colSize.copy()
355
+ export_Table = dfT.copy()
356
+
357
+ # Relationships
358
+ dfR = pd.merge(
359
+ dfR,
360
+ dfC[["Column Object", "Cardinality"]],
361
+ left_on="From Object",
362
+ right_on="Column Object",
363
+ how="left",
364
+ )
365
+ dfR.rename(columns={"Cardinality": "Max From Cardinality"}, inplace=True)
366
+ dfR = pd.merge(
367
+ dfR,
368
+ dfC[["Column Object", "Cardinality"]],
369
+ left_on="To Object",
370
+ right_on="Column Object",
371
+ how="left",
372
+ )
373
+ dfR.rename(columns={"Cardinality": "Max To Cardinality"}, inplace=True)
374
+ dfR = dfR[
375
+ [
376
+ "From Object",
377
+ "To Object",
378
+ "Multiplicity",
379
+ "Used Size",
380
+ "Max From Cardinality",
381
+ "Max To Cardinality",
382
+ "Missing Rows",
383
+ ]
384
+ ].sort_values(by="Used Size", ascending=False)
385
+ dfR.reset_index(drop=True, inplace=True)
386
+ export_Rel = dfR.copy()
387
+
388
+ # Partitions
389
+ dfP = dfP[
390
+ [
391
+ "Table Name",
392
+ "Partition Name",
393
+ "Mode",
394
+ "Record Count",
395
+ "Segment Count",
396
+ # "Records per Segment",
397
+ ]
398
+ ].sort_values(by="Record Count", ascending=False)
399
+ dfP["Records per Segment"] = round(
400
+ dfP["Record Count"] / dfP["Segment Count"], 2
401
+ ) # Remove after records per segment is fixed
402
+ dfP.reset_index(drop=True, inplace=True)
403
+ export_Part = dfP.copy()
404
+
405
+ # Hierarchies
406
+ dfH_filt = dfH[dfH["Level Ordinal"] == 0]
407
+ dfH_filt = dfH_filt[["Table Name", "Hierarchy Name", "Used Size"]].sort_values(
408
+ by="Used Size", ascending=False
409
+ )
410
+ dfH_filt.reset_index(drop=True, inplace=True)
411
+ dfH_filt.fillna({"Used Size": 0}, inplace=True)
412
+ dfH_filt["Used Size"] = dfH_filt["Used Size"].astype(int)
413
+ export_Hier = dfH_filt.copy()
414
+
415
+ # Model
416
+ # Converting to KB/MB/GB necessitates division by 1024 * 1000.
417
+ if db_total_size >= 1000000000:
418
+ y = db_total_size / (1024**3) * 1000000000
419
+ elif db_total_size >= 1000000:
420
+ y = db_total_size / (1024**2) * 1000000
421
+ elif db_total_size >= 1000:
422
+ y = db_total_size / (1024) * 1000
423
+ else:
424
+ y = db_total_size
425
+ y = round(y)
426
+
427
+ dfModel = pd.DataFrame(
428
+ {
429
+ "Dataset Name": dataset_name,
430
+ "Total Size": y,
431
+ "Table Count": table_count,
432
+ "Column Count": column_count,
433
+ "Compatibility Level": compat_level,
434
+ "Default Mode": def_mode,
435
+ },
436
+ index=[0],
437
+ )
438
+ dfModel.reset_index(drop=True, inplace=True)
439
+ dfModel["Default Mode"] = dfModel["Default Mode"].astype(str)
440
+ export_Model = dfModel.copy()
441
+
442
+ def _style_columns_based_on_types(dataframe: pd.DataFrame, column_type_mapping):
443
+ # Define formatting functions based on the type mappings
444
+ format_funcs = {
445
+ "int": lambda x: "{:,}".format(x) if pd.notnull(x) else "",
446
+ "pct": lambda x: "{:.2f}%".format(x) if pd.notnull(x) else "",
447
+ "": lambda x: "{}".format(x),
448
+ }
449
+
450
+ # Apply the formatting function to each column based on its specified type
451
+ for col, dt in column_type_mapping.items():
452
+ if dt in format_funcs:
453
+ dataframe[col] = dataframe[col].map(format_funcs[dt])
454
+
455
+ return dataframe
456
+
457
+ dfModel = _style_columns_based_on_types(
458
+ dfModel,
459
+ column_type_mapping={
460
+ key: values[1] for key, values in vertipaq_map["Model"].items()
461
+ },
462
+ )
463
+ dfT = _style_columns_based_on_types(
464
+ dfT,
465
+ column_type_mapping={
466
+ key: values[1] for key, values in vertipaq_map["Tables"].items()
467
+ },
468
+ )
469
+ dfP = _style_columns_based_on_types(
470
+ dfP,
471
+ column_type_mapping={
472
+ key: values[1] for key, values in vertipaq_map["Partitions"].items()
473
+ },
474
+ )
475
+ colSize = _style_columns_based_on_types(
476
+ colSize,
477
+ column_type_mapping={
478
+ key: values[1] for key, values in vertipaq_map["Columns"].items()
479
+ },
480
+ )
481
+ temp = _style_columns_based_on_types(
482
+ temp,
483
+ column_type_mapping={
484
+ key: values[1] for key, values in vertipaq_map["Columns"].items()
485
+ },
486
+ )
487
+ dfR = _style_columns_based_on_types(
488
+ dfR,
489
+ column_type_mapping={
490
+ key: values[1] for key, values in vertipaq_map["Relationships"].items()
491
+ },
492
+ )
493
+ dfH_filt = _style_columns_based_on_types(
494
+ dfH_filt,
495
+ column_type_mapping={
496
+ key: values[1] for key, values in vertipaq_map["Hierarchies"].items()
497
+ },
498
+ )
499
+
500
+ dataFrames = {
501
+ "dfModel": dfModel,
502
+ "dfT": dfT,
503
+ "dfP": dfP,
504
+ "colSize": colSize,
505
+ "temp": temp,
506
+ "dfR": dfR,
507
+ "dfH_filt": dfH_filt,
508
+ }
509
+
510
+ dfs = {}
511
+ for fileName, df in dataFrames.items():
512
+ dfs[fileName] = df
513
+
514
+ if export is None:
515
+ visualize_vertipaq(dfs)
516
+ return {
517
+ "Model Summary": export_Model,
518
+ "Tables": export_Table,
519
+ "Partitions": export_Part,
520
+ "Columns": export_Col,
521
+ "Relationships": export_Rel,
522
+ "Hierarchies": export_Hier,
523
+ }
524
+
525
+ # Export vertipaq to delta tables in lakehouse
526
+ if export in ["table", "zip"]:
527
+ if not lakehouse_attached():
528
+ raise ValueError(
529
+ f"{icons.red_dot} In order to save the Vertipaq Analyzer results, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
530
+ )
531
+
532
+ if export == "table":
533
+ lakeTName = "vertipaqanalyzer_model"
534
+
535
+ lakeT = get_lakehouse_tables()
536
+ lakeT_filt = lakeT[lakeT["Table Name"] == lakeTName]
537
+
538
+ if len(lakeT_filt) == 0:
539
+ runId = 1
540
+ else:
541
+ max_run_id = _get_column_aggregate(table_name=lakeTName)
542
+ runId = max_run_id + 1
543
+
544
+ dfMap = {
545
+ "Columns": ["Columns", export_Col],
546
+ "Tables": ["Tables", export_Table],
547
+ "Partitions": ["Partitions", export_Part],
548
+ "Relationships": ["Relationships", export_Rel],
549
+ "Hierarchies": ["Hierarchies", export_Hier],
550
+ "Model": ["Model", export_Model],
551
+ }
552
+
553
+ print(
554
+ f"{icons.in_progress} Saving Vertipaq Analyzer to delta tables in the lakehouse...\n"
555
+ )
556
+ now = datetime.datetime.now()
557
+ dfD = fabric.list_datasets(workspace=workspace_id, mode="rest")
558
+ dfD_filt = dfD[dfD["Dataset Id"] == dataset_id]
559
+ configured_by = dfD_filt["Configured By"].iloc[0]
560
+ capacity_id, capacity_name = resolve_workspace_capacity(workspace=workspace_id)
561
+
562
+ for key_name, (obj, df) in dfMap.items():
563
+ df["Capacity Name"] = capacity_name
564
+ df["Capacity Id"] = capacity_id
565
+ df["Configured By"] = configured_by
566
+ df["Workspace Name"] = workspace_name
567
+ df["Workspace Id"] = workspace_id
568
+ df["Dataset Name"] = dataset_name
569
+ df["Dataset Id"] = dataset_id
570
+ df["RunId"] = runId
571
+ df["Timestamp"] = now
572
+
573
+ colName = "Capacity Name"
574
+ df.insert(0, colName, df.pop(colName))
575
+ colName = "Capacity Id"
576
+ df.insert(1, colName, df.pop(colName))
577
+ colName = "Workspace Name"
578
+ df.insert(2, colName, df.pop(colName))
579
+ colName = "Workspace Id"
580
+ df.insert(3, colName, df.pop(colName))
581
+ colName = "Dataset Name"
582
+ df.insert(4, colName, df.pop(colName))
583
+ colName = "Dataset Id"
584
+ df.insert(5, colName, df.pop(colName))
585
+ colName = "Configured By"
586
+ df.insert(6, colName, df.pop(colName))
587
+
588
+ df.columns = df.columns.str.replace(" ", "_")
589
+
590
+ schema = {
591
+ "Capacity_Name": icons.data_type_string,
592
+ "Capacity_Id": icons.data_type_string,
593
+ "Workspace_Name": icons.data_type_string,
594
+ "Workspace_Id": icons.data_type_string,
595
+ "Dataset_Name": icons.data_type_string,
596
+ "Dataset_Id": icons.data_type_string,
597
+ "Configured_By": icons.data_type_string,
598
+ }
599
+
600
+ schema.update(
601
+ {
602
+ key.replace(" ", "_"): value[0]
603
+ for key, value in vertipaq_map[key_name].items()
604
+ }
605
+ )
606
+ schema["RunId"] = icons.data_type_long
607
+ schema["Timestamp"] = icons.data_type_timestamp
608
+
609
+ delta_table_name = f"VertipaqAnalyzer_{obj}".lower()
610
+ save_as_delta_table(
611
+ dataframe=df,
612
+ delta_table_name=delta_table_name,
613
+ write_mode="append",
614
+ schema=schema,
615
+ merge_schema=True,
616
+ )
617
+
618
+ # Export vertipaq to zip file within the lakehouse
619
+ if export == "zip":
620
+ dataFrames = {
621
+ "dfModel": dfModel,
622
+ "dfT": dfT,
623
+ "dfP": dfP,
624
+ "colSize": colSize,
625
+ "temp": temp,
626
+ "dfR": dfR,
627
+ "dfH_filt": dfH_filt,
628
+ }
629
+
630
+ zipFileName = f"{workspace_name}.{dataset_name}.zip"
631
+
632
+ folderPath = "/lakehouse/default/Files"
633
+ subFolderPath = os.path.join(folderPath, "VertipaqAnalyzer")
634
+ ext = ".csv"
635
+ if not os.path.exists(subFolderPath):
636
+ os.makedirs(subFolderPath, exist_ok=True)
637
+ zipFilePath = os.path.join(subFolderPath, zipFileName)
638
+
639
+ # Create CSV files based on dataframes
640
+ for fileName, df in dataFrames.items():
641
+ filePath = os.path.join(subFolderPath, f"{fileName}{ext}")
642
+ df.to_csv(filePath, index=False)
643
+
644
+ # Create a zip file and add CSV files to it
645
+ with zipfile.ZipFile(zipFilePath, "w") as zipf:
646
+ for fileName in dataFrames:
647
+ filePath = os.path.join(subFolderPath, f"{fileName}{ext}")
648
+ zipf.write(filePath, os.path.basename(filePath))
649
+
650
+ # Clean up: remove the individual CSV files
651
+ for fileName, df in dataFrames.items():
652
+ filePath = os.path.join(subFolderPath, fileName) + ext
653
+ if os.path.exists(filePath):
654
+ os.remove(filePath)
655
+ print(
656
+ f"{icons.green_dot} The Vertipaq Analyzer info for the '{dataset_name}' semantic model in the '{workspace_name}' workspace has been saved "
657
+ f"to the 'Vertipaq Analyzer/{zipFileName}' in the default lakehouse attached to this notebook."
658
+ )
659
+
660
+
661
+ def visualize_vertipaq(dataframes):
662
+
663
+ # Tooltips for columns within the visual
664
+ data = [
665
+ {
666
+ "ViewName": "Model",
667
+ "ColumnName": "Dataset Name",
668
+ "Tooltip": "The name of the semantic model",
669
+ },
670
+ {
671
+ "ViewName": "Model",
672
+ "ColumnName": "Total Size",
673
+ "Tooltip": "The size of the model (in bytes)",
674
+ },
675
+ {
676
+ "ViewName": "Model",
677
+ "ColumnName": "Table Count",
678
+ "Tooltip": "The number of tables in the semantic model",
679
+ },
680
+ {
681
+ "ViewName": "Model",
682
+ "ColumnName": "Column Count",
683
+ "Tooltip": "The number of columns in the semantic model",
684
+ },
685
+ {
686
+ "ViewName": "Model",
687
+ "ColumnName": "Compatibility Level",
688
+ "Tooltip": "The compatibility level of the semantic model",
689
+ },
690
+ {
691
+ "ViewName": "Model",
692
+ "ColumnName": "Default Mode",
693
+ "Tooltip": "The default query mode of the semantic model",
694
+ },
695
+ {
696
+ "ViewName": "Table",
697
+ "ColumnName": "Table Name",
698
+ "Tooltip": "The name of the table",
699
+ },
700
+ {"ViewName": "Table", "ColumnName": "Type", "Tooltip": "The type of table"},
701
+ {
702
+ "ViewName": "Table",
703
+ "ColumnName": "Row Count",
704
+ "Tooltip": "The number of rows in the table",
705
+ },
706
+ {
707
+ "ViewName": "Table",
708
+ "ColumnName": "Total Size",
709
+ "Tooltip": "Data Size + Dictionary Size + Hierarchy Size (in bytes)",
710
+ },
711
+ {
712
+ "ViewName": "Table",
713
+ "ColumnName": "Data Size",
714
+ "Tooltip": "The size of the data for all the columns in this table (in bytes)",
715
+ },
716
+ {
717
+ "ViewName": "Table",
718
+ "ColumnName": "Dictionary Size",
719
+ "Tooltip": "The size of the column's dictionary for all columns in this table (in bytes)",
720
+ },
721
+ {
722
+ "ViewName": "Table",
723
+ "ColumnName": "Hierarchy Size",
724
+ "Tooltip": "The size of hierarchy structures for all columns in this table (in bytes)",
725
+ },
726
+ {
727
+ "ViewName": "Table",
728
+ "ColumnName": "% DB",
729
+ "Tooltip": "The size of the table relative to the size of the semantic model",
730
+ },
731
+ {
732
+ "ViewName": "Table",
733
+ "ColumnName": "Partitions",
734
+ "Tooltip": "The number of partitions in the table",
735
+ },
736
+ {
737
+ "ViewName": "Table",
738
+ "ColumnName": "Columns",
739
+ "Tooltip": "The number of columns in the table",
740
+ },
741
+ {
742
+ "ViewName": "Partition",
743
+ "ColumnName": "Table Name",
744
+ "Tooltip": "The name of the table",
745
+ },
746
+ {
747
+ "ViewName": "Partition",
748
+ "ColumnName": "Partition Name",
749
+ "Tooltip": "The name of the partition within the table",
750
+ },
751
+ {
752
+ "ViewName": "Partition",
753
+ "ColumnName": "Mode",
754
+ "Tooltip": "The query mode of the partition",
755
+ },
756
+ {
757
+ "ViewName": "Partition",
758
+ "ColumnName": "Record Count",
759
+ "Tooltip": "The number of rows in the partition",
760
+ },
761
+ {
762
+ "ViewName": "Partition",
763
+ "ColumnName": "Segment Count",
764
+ "Tooltip": "The number of segments within the partition",
765
+ },
766
+ {
767
+ "ViewName": "Partition",
768
+ "ColumnName": "Records per Segment",
769
+ "Tooltip": "The number of rows per segment",
770
+ },
771
+ {
772
+ "ViewName": "Column",
773
+ "ColumnName": "Table Name",
774
+ "Tooltip": "The name of the table",
775
+ },
776
+ {
777
+ "ViewName": "Column",
778
+ "ColumnName": "Column Name",
779
+ "Tooltip": "The name of the column",
780
+ },
781
+ {
782
+ "ViewName": "Column",
783
+ "ColumnName": "Type",
784
+ "Tooltip": "The type of column",
785
+ },
786
+ {
787
+ "ViewName": "Column",
788
+ "ColumnName": "Cardinality",
789
+ "Tooltip": "The number of unique rows in the column",
790
+ },
791
+ {
792
+ "ViewName": "Column",
793
+ "ColumnName": "Total Size",
794
+ "Tooltip": "Data Size + Dictionary Size + Hierarchy Size (in bytes)",
795
+ },
796
+ {
797
+ "ViewName": "Column",
798
+ "ColumnName": "Data Size",
799
+ "Tooltip": "The size of the data for the column (in bytes)",
800
+ },
801
+ {
802
+ "ViewName": "Column",
803
+ "ColumnName": "Dictionary Size",
804
+ "Tooltip": "The size of the column's dictionary (in bytes)",
805
+ },
806
+ {
807
+ "ViewName": "Column",
808
+ "ColumnName": "Hierarchy Size",
809
+ "Tooltip": "The size of hierarchy structures (in bytes)",
810
+ },
811
+ {
812
+ "ViewName": "Column",
813
+ "ColumnName": "% Table",
814
+ "Tooltip": "The size of the column relative to the size of the table",
815
+ },
816
+ {
817
+ "ViewName": "Column",
818
+ "ColumnName": "% DB",
819
+ "Tooltip": "The size of the column relative to the size of the semantic model",
820
+ },
821
+ {
822
+ "ViewName": "Column",
823
+ "ColumnName": "Data Type",
824
+ "Tooltip": "The data type of the column",
825
+ },
826
+ {
827
+ "ViewName": "Column",
828
+ "ColumnName": "Encoding",
829
+ "Tooltip": "The encoding type for the column",
830
+ },
831
+ {
832
+ "ViewName": "Column",
833
+ "ColumnName": "Is Resident",
834
+ "Tooltip": "Indicates whether the column is in memory or not",
835
+ },
836
+ {
837
+ "ViewName": "Column",
838
+ "ColumnName": "Temperature",
839
+ "Tooltip": "A decimal indicating the frequency and recency of queries against the column",
840
+ },
841
+ {
842
+ "ViewName": "Column",
843
+ "ColumnName": "Last Accessed",
844
+ "Tooltip": "The time the column was last queried",
845
+ },
846
+ {
847
+ "ViewName": "Hierarchy",
848
+ "ColumnName": "Table Name",
849
+ "Tooltip": "The name of the table",
850
+ },
851
+ {
852
+ "ViewName": "Hierarchy",
853
+ "ColumnName": "Hierarchy Name",
854
+ "Tooltip": "The name of the hierarchy",
855
+ },
856
+ {
857
+ "ViewName": "Hierarchy",
858
+ "ColumnName": "Used Size",
859
+ "Tooltip": "The size of user hierarchy structures (in bytes)",
860
+ },
861
+ {
862
+ "ViewName": "Relationship",
863
+ "ColumnName": "From Object",
864
+ "Tooltip": "The from table/column in the relationship",
865
+ },
866
+ {
867
+ "ViewName": "Relationship",
868
+ "ColumnName": "To Object",
869
+ "Tooltip": "The to table/column in the relationship",
870
+ },
871
+ {
872
+ "ViewName": "Relationship",
873
+ "ColumnName": "Multiplicity",
874
+ "Tooltip": "The cardinality on each side of the relationship",
875
+ },
876
+ {
877
+ "ViewName": "Relationship",
878
+ "ColumnName": "Used Size",
879
+ "Tooltip": "The size of the relationship (in bytes)",
880
+ },
881
+ {
882
+ "ViewName": "Relationship",
883
+ "ColumnName": "Max From Cardinality",
884
+ "Tooltip": "The number of unique values in the column used in the from side of the relationship",
885
+ },
886
+ {
887
+ "ViewName": "Relationship",
888
+ "ColumnName": "Max To Cardinality",
889
+ "Tooltip": "The number of unique values in the column used in the to side of the relationship",
890
+ },
891
+ {
892
+ "ViewName": "Relationship",
893
+ "ColumnName": "Missing Rows",
894
+ "Tooltip": "The number of rows in the 'from' table which do not map to the key column in the 'to' table",
895
+ },
896
+ ]
897
+
898
+ # Create DataFrame
899
+ tooltipDF = pd.DataFrame(data)
900
+
901
+ # define the dictionary with {"Tab name":df}
902
+ df_dict = {
903
+ "Model Summary": dataframes["dfModel"],
904
+ "Tables": dataframes["dfT"],
905
+ "Partitions": dataframes["dfP"],
906
+ "Columns (Total Size)": dataframes["colSize"],
907
+ "Columns (Temperature)": dataframes["temp"],
908
+ "Relationships": dataframes["dfR"],
909
+ "Hierarchies": dataframes["dfH_filt"],
910
+ }
911
+
912
+ mapping = {
913
+ "Model Summary": "Model",
914
+ "Tables": "Table",
915
+ "Partitions": "Partition",
916
+ "Columns (Total Size)": "Column",
917
+ "Columns (Temperature)": "Column",
918
+ "Relationships": "Relationship",
919
+ "Hierarchies": "Hierarchy",
920
+ }
921
+
922
+ # Basic styles for the tabs and tab content
923
+ styles = """
924
+ <style>
925
+ .tab { overflow: hidden; border: 1px solid #ccc; background-color: #f1f1f1; }
926
+ .tab button { background-color: inherit; float: left; border: none; outline: none; cursor: pointer; padding: 14px 16px; transition: 0.3s; }
927
+ .tab button:hover { background-color: #ddd; }
928
+ .tab button.active { background-color: #ccc; }
929
+ .tabcontent { display: none; padding: 6px 12px; border: 1px solid #ccc; border-top: none; }
930
+ </style>
931
+ """
932
+ # JavaScript for tab functionality
933
+ script = """
934
+ <script>
935
+ function openTab(evt, tabName) {
936
+ var i, tabcontent, tablinks;
937
+ tabcontent = document.getElementsByClassName("tabcontent");
938
+ for (i = 0; i < tabcontent.length; i++) {
939
+ tabcontent[i].style.display = "none";
940
+ }
941
+ tablinks = document.getElementsByClassName("tablinks");
942
+ for (i = 0; i < tablinks.length; i++) {
943
+ tablinks[i].className = tablinks[i].className.replace(" active", "");
944
+ }
945
+ document.getElementById(tabName).style.display = "block";
946
+ evt.currentTarget.className += " active";
947
+ }
948
+ </script>
949
+ """
950
+
951
+ # HTML for tabs
952
+ tab_html = '<div class="tab">'
953
+ content_html = ""
954
+ for i, (title, df) in enumerate(df_dict.items()):
955
+ tab_id = f"tab{i}"
956
+ tab_html += f'<button class="tablinks" onclick="openTab(event, \'{tab_id}\')">{title}</button>'
957
+
958
+ vw = mapping.get(title)
959
+
960
+ df_html = df.to_html()
961
+ for col in df.columns:
962
+ tt = None
963
+ try:
964
+ tooltipDF_filt = tooltipDF[
965
+ (tooltipDF["ViewName"] == vw) & (tooltipDF["ColumnName"] == col)
966
+ ]
967
+ tt = tooltipDF_filt["Tooltip"].iloc[0]
968
+ except Exception:
969
+ pass
970
+ df_html = df_html.replace(f"<th>{col}</th>", f'<th title="{tt}">{col}</th>')
971
+ content_html += (
972
+ f'<div id="{tab_id}" class="tabcontent"><h3>{title}</h3>{df_html}</div>'
973
+ )
974
+ tab_html += "</div>"
975
+
976
+ # Display the tabs, tab contents, and run the script
977
+ display(HTML(styles + tab_html + content_html + script))
978
+ # Default to open the first tab
979
+ display(
980
+ HTML("<script>document.getElementsByClassName('tablinks')[0].click();</script>")
981
+ )
982
+
983
+
984
+ @log
985
+ def import_vertipaq_analyzer(folder_path: str, file_name: str):
986
+ """
987
+ Imports and visualizes the vertipaq analyzer info from a saved .zip file in your lakehouse.
988
+
989
+ Parameters
990
+ ----------
991
+ folder_path : str
992
+ The folder within your lakehouse in which the .zip file containing the vertipaq analyzer info has been saved.
993
+ file_name : str
994
+ The file name of the file which contains the vertipaq analyzer info.
995
+
996
+ Returns
997
+ -------
998
+ str
999
+ A visualization of the Vertipaq Analyzer statistics.
1000
+ """
1001
+
1002
+ pd.options.mode.copy_on_write = True
1003
+
1004
+ zipFilePath = os.path.join(folder_path, file_name)
1005
+ extracted_dir = os.path.join(folder_path, "extracted_dataframes")
1006
+
1007
+ with zipfile.ZipFile(zipFilePath, "r") as zip_ref:
1008
+ zip_ref.extractall(extracted_dir)
1009
+
1010
+ # Read all CSV files into a dictionary of DataFrames
1011
+ dfs = {}
1012
+ for file_name in zip_ref.namelist():
1013
+ df = pd.read_csv(extracted_dir + "/" + file_name)
1014
+ file_path = Path(file_name)
1015
+ df_name = file_path.stem
1016
+ dfs[df_name] = df
1017
+
1018
+ visualize_vertipaq(dfs)
1019
+
1020
+ # Clean up: remove the extracted directory
1021
+ shutil.rmtree(extracted_dir)