semantic-link-labs 0.12.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- semantic_link_labs-0.12.8.dist-info/METADATA +354 -0
- semantic_link_labs-0.12.8.dist-info/RECORD +243 -0
- semantic_link_labs-0.12.8.dist-info/WHEEL +5 -0
- semantic_link_labs-0.12.8.dist-info/licenses/LICENSE +21 -0
- semantic_link_labs-0.12.8.dist-info/top_level.txt +1 -0
- sempy_labs/__init__.py +606 -0
- sempy_labs/_a_lib_info.py +2 -0
- sempy_labs/_ai.py +437 -0
- sempy_labs/_authentication.py +264 -0
- sempy_labs/_bpa_translation/_model/_translations_am-ET.po +869 -0
- sempy_labs/_bpa_translation/_model/_translations_ar-AE.po +908 -0
- sempy_labs/_bpa_translation/_model/_translations_bg-BG.po +968 -0
- sempy_labs/_bpa_translation/_model/_translations_ca-ES.po +963 -0
- sempy_labs/_bpa_translation/_model/_translations_cs-CZ.po +943 -0
- sempy_labs/_bpa_translation/_model/_translations_da-DK.po +945 -0
- sempy_labs/_bpa_translation/_model/_translations_de-DE.po +988 -0
- sempy_labs/_bpa_translation/_model/_translations_el-GR.po +993 -0
- sempy_labs/_bpa_translation/_model/_translations_es-ES.po +971 -0
- sempy_labs/_bpa_translation/_model/_translations_fa-IR.po +933 -0
- sempy_labs/_bpa_translation/_model/_translations_fi-FI.po +942 -0
- sempy_labs/_bpa_translation/_model/_translations_fr-FR.po +994 -0
- sempy_labs/_bpa_translation/_model/_translations_ga-IE.po +967 -0
- sempy_labs/_bpa_translation/_model/_translations_he-IL.po +902 -0
- sempy_labs/_bpa_translation/_model/_translations_hi-IN.po +944 -0
- sempy_labs/_bpa_translation/_model/_translations_hu-HU.po +963 -0
- sempy_labs/_bpa_translation/_model/_translations_id-ID.po +946 -0
- sempy_labs/_bpa_translation/_model/_translations_is-IS.po +939 -0
- sempy_labs/_bpa_translation/_model/_translations_it-IT.po +986 -0
- sempy_labs/_bpa_translation/_model/_translations_ja-JP.po +846 -0
- sempy_labs/_bpa_translation/_model/_translations_ko-KR.po +839 -0
- sempy_labs/_bpa_translation/_model/_translations_mt-MT.po +967 -0
- sempy_labs/_bpa_translation/_model/_translations_nl-NL.po +978 -0
- sempy_labs/_bpa_translation/_model/_translations_pl-PL.po +962 -0
- sempy_labs/_bpa_translation/_model/_translations_pt-BR.po +962 -0
- sempy_labs/_bpa_translation/_model/_translations_pt-PT.po +957 -0
- sempy_labs/_bpa_translation/_model/_translations_ro-RO.po +968 -0
- sempy_labs/_bpa_translation/_model/_translations_ru-RU.po +964 -0
- sempy_labs/_bpa_translation/_model/_translations_sk-SK.po +952 -0
- sempy_labs/_bpa_translation/_model/_translations_sl-SL.po +950 -0
- sempy_labs/_bpa_translation/_model/_translations_sv-SE.po +942 -0
- sempy_labs/_bpa_translation/_model/_translations_ta-IN.po +976 -0
- sempy_labs/_bpa_translation/_model/_translations_te-IN.po +947 -0
- sempy_labs/_bpa_translation/_model/_translations_th-TH.po +924 -0
- sempy_labs/_bpa_translation/_model/_translations_tr-TR.po +953 -0
- sempy_labs/_bpa_translation/_model/_translations_uk-UA.po +961 -0
- sempy_labs/_bpa_translation/_model/_translations_zh-CN.po +804 -0
- sempy_labs/_bpa_translation/_model/_translations_zu-ZA.po +969 -0
- sempy_labs/_capacities.py +1198 -0
- sempy_labs/_capacity_migration.py +660 -0
- sempy_labs/_clear_cache.py +351 -0
- sempy_labs/_connections.py +610 -0
- sempy_labs/_dashboards.py +69 -0
- sempy_labs/_data_access_security.py +98 -0
- sempy_labs/_data_pipelines.py +162 -0
- sempy_labs/_dataflows.py +668 -0
- sempy_labs/_dax.py +501 -0
- sempy_labs/_daxformatter.py +80 -0
- sempy_labs/_delta_analyzer.py +467 -0
- sempy_labs/_delta_analyzer_history.py +301 -0
- sempy_labs/_dictionary_diffs.py +221 -0
- sempy_labs/_documentation.py +147 -0
- sempy_labs/_domains.py +51 -0
- sempy_labs/_eventhouses.py +182 -0
- sempy_labs/_external_data_shares.py +230 -0
- sempy_labs/_gateways.py +521 -0
- sempy_labs/_generate_semantic_model.py +521 -0
- sempy_labs/_get_connection_string.py +84 -0
- sempy_labs/_git.py +543 -0
- sempy_labs/_graphQL.py +90 -0
- sempy_labs/_helper_functions.py +2833 -0
- sempy_labs/_icons.py +149 -0
- sempy_labs/_job_scheduler.py +609 -0
- sempy_labs/_kql_databases.py +149 -0
- sempy_labs/_kql_querysets.py +124 -0
- sempy_labs/_kusto.py +137 -0
- sempy_labs/_labels.py +124 -0
- sempy_labs/_list_functions.py +1720 -0
- sempy_labs/_managed_private_endpoints.py +253 -0
- sempy_labs/_mirrored_databases.py +416 -0
- sempy_labs/_mirrored_warehouses.py +60 -0
- sempy_labs/_ml_experiments.py +113 -0
- sempy_labs/_model_auto_build.py +140 -0
- sempy_labs/_model_bpa.py +557 -0
- sempy_labs/_model_bpa_bulk.py +378 -0
- sempy_labs/_model_bpa_rules.py +859 -0
- sempy_labs/_model_dependencies.py +343 -0
- sempy_labs/_mounted_data_factories.py +123 -0
- sempy_labs/_notebooks.py +441 -0
- sempy_labs/_one_lake_integration.py +151 -0
- sempy_labs/_onelake.py +131 -0
- sempy_labs/_query_scale_out.py +433 -0
- sempy_labs/_refresh_semantic_model.py +435 -0
- sempy_labs/_semantic_models.py +468 -0
- sempy_labs/_spark.py +455 -0
- sempy_labs/_sql.py +241 -0
- sempy_labs/_sql_audit_settings.py +207 -0
- sempy_labs/_sql_endpoints.py +214 -0
- sempy_labs/_tags.py +201 -0
- sempy_labs/_translations.py +43 -0
- sempy_labs/_user_delegation_key.py +44 -0
- sempy_labs/_utils.py +79 -0
- sempy_labs/_vertipaq.py +1021 -0
- sempy_labs/_vpax.py +388 -0
- sempy_labs/_warehouses.py +234 -0
- sempy_labs/_workloads.py +140 -0
- sempy_labs/_workspace_identity.py +72 -0
- sempy_labs/_workspaces.py +595 -0
- sempy_labs/admin/__init__.py +170 -0
- sempy_labs/admin/_activities.py +167 -0
- sempy_labs/admin/_apps.py +145 -0
- sempy_labs/admin/_artifacts.py +65 -0
- sempy_labs/admin/_basic_functions.py +463 -0
- sempy_labs/admin/_capacities.py +508 -0
- sempy_labs/admin/_dataflows.py +45 -0
- sempy_labs/admin/_datasets.py +186 -0
- sempy_labs/admin/_domains.py +522 -0
- sempy_labs/admin/_external_data_share.py +100 -0
- sempy_labs/admin/_git.py +72 -0
- sempy_labs/admin/_items.py +265 -0
- sempy_labs/admin/_labels.py +211 -0
- sempy_labs/admin/_reports.py +241 -0
- sempy_labs/admin/_scanner.py +118 -0
- sempy_labs/admin/_shared.py +82 -0
- sempy_labs/admin/_sharing_links.py +110 -0
- sempy_labs/admin/_tags.py +131 -0
- sempy_labs/admin/_tenant.py +503 -0
- sempy_labs/admin/_tenant_keys.py +89 -0
- sempy_labs/admin/_users.py +140 -0
- sempy_labs/admin/_workspaces.py +236 -0
- sempy_labs/deployment_pipeline/__init__.py +23 -0
- sempy_labs/deployment_pipeline/_items.py +580 -0
- sempy_labs/directlake/__init__.py +57 -0
- sempy_labs/directlake/_autosync.py +58 -0
- sempy_labs/directlake/_directlake_schema_compare.py +120 -0
- sempy_labs/directlake/_directlake_schema_sync.py +161 -0
- sempy_labs/directlake/_dl_helper.py +274 -0
- sempy_labs/directlake/_generate_shared_expression.py +94 -0
- sempy_labs/directlake/_get_directlake_lakehouse.py +62 -0
- sempy_labs/directlake/_get_shared_expression.py +34 -0
- sempy_labs/directlake/_guardrails.py +96 -0
- sempy_labs/directlake/_list_directlake_model_calc_tables.py +70 -0
- sempy_labs/directlake/_show_unsupported_directlake_objects.py +90 -0
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +239 -0
- sempy_labs/directlake/_update_directlake_partition_entity.py +259 -0
- sempy_labs/directlake/_warm_cache.py +236 -0
- sempy_labs/dotnet_lib/dotnet.runtime.config.json +10 -0
- sempy_labs/environment/__init__.py +23 -0
- sempy_labs/environment/_items.py +212 -0
- sempy_labs/environment/_pubstage.py +223 -0
- sempy_labs/eventstream/__init__.py +37 -0
- sempy_labs/eventstream/_items.py +263 -0
- sempy_labs/eventstream/_topology.py +652 -0
- sempy_labs/graph/__init__.py +59 -0
- sempy_labs/graph/_groups.py +651 -0
- sempy_labs/graph/_sensitivity_labels.py +120 -0
- sempy_labs/graph/_teams.py +125 -0
- sempy_labs/graph/_user_licenses.py +96 -0
- sempy_labs/graph/_users.py +516 -0
- sempy_labs/graph_model/__init__.py +15 -0
- sempy_labs/graph_model/_background_jobs.py +63 -0
- sempy_labs/graph_model/_items.py +149 -0
- sempy_labs/lakehouse/__init__.py +67 -0
- sempy_labs/lakehouse/_blobs.py +247 -0
- sempy_labs/lakehouse/_get_lakehouse_columns.py +102 -0
- sempy_labs/lakehouse/_get_lakehouse_tables.py +274 -0
- sempy_labs/lakehouse/_helper.py +250 -0
- sempy_labs/lakehouse/_lakehouse.py +351 -0
- sempy_labs/lakehouse/_livy_sessions.py +143 -0
- sempy_labs/lakehouse/_materialized_lake_views.py +157 -0
- sempy_labs/lakehouse/_partitioning.py +165 -0
- sempy_labs/lakehouse/_schemas.py +217 -0
- sempy_labs/lakehouse/_shortcuts.py +440 -0
- sempy_labs/migration/__init__.py +35 -0
- sempy_labs/migration/_create_pqt_file.py +238 -0
- sempy_labs/migration/_direct_lake_to_import.py +105 -0
- sempy_labs/migration/_migrate_calctables_to_lakehouse.py +398 -0
- sempy_labs/migration/_migrate_calctables_to_semantic_model.py +148 -0
- sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +533 -0
- sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +172 -0
- sempy_labs/migration/_migration_validation.py +71 -0
- sempy_labs/migration/_refresh_calc_tables.py +131 -0
- sempy_labs/mirrored_azure_databricks_catalog/__init__.py +15 -0
- sempy_labs/mirrored_azure_databricks_catalog/_discover.py +213 -0
- sempy_labs/mirrored_azure_databricks_catalog/_refresh_catalog_metadata.py +45 -0
- sempy_labs/ml_model/__init__.py +23 -0
- sempy_labs/ml_model/_functions.py +427 -0
- sempy_labs/report/_BPAReportTemplate.json +232 -0
- sempy_labs/report/__init__.py +55 -0
- sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +9 -0
- sempy_labs/report/_bpareporttemplate/.platform +11 -0
- sempy_labs/report/_bpareporttemplate/StaticResources/SharedResources/BaseThemes/CY24SU06.json +710 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/page.json +11 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/1b08bce3bebabb0a27a8/visual.json +191 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/2f22ddb70c301693c165/visual.json +438 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/3b1182230aa6c600b43a/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/58577ba6380c69891500/visual.json +576 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/a2a8fa5028b3b776c96c/visual.json +207 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/adfd47ef30652707b987/visual.json +506 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/b6a80ee459e716e170b1/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/ce3130a721c020cc3d81/visual.json +513 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/page.json +8 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/visuals/66e60dfb526437cd78d1/visual.json +112 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/page.json +11 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/07deb8bce824e1be37d7/visual.json +513 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0b1c68838818b32ad03b/visual.json +352 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0c171de9d2683d10b930/visual.json +37 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0efa01be0510e40a645e/visual.json +542 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/6bf2f0eb830ab53cc668/visual.json +221 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/88d8141cb8500b60030c/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/a753273590beed656a03/visual.json +576 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/b8fdc82cddd61ac447bc/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/page.json +9 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/visuals/ce8532a7e25020271077/visual.json +38 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/pages.json +10 -0
- sempy_labs/report/_bpareporttemplate/definition/report.json +176 -0
- sempy_labs/report/_bpareporttemplate/definition/version.json +4 -0
- sempy_labs/report/_bpareporttemplate/definition.pbir +14 -0
- sempy_labs/report/_download_report.py +76 -0
- sempy_labs/report/_export_report.py +257 -0
- sempy_labs/report/_generate_report.py +427 -0
- sempy_labs/report/_paginated.py +76 -0
- sempy_labs/report/_report_bpa.py +354 -0
- sempy_labs/report/_report_bpa_rules.py +115 -0
- sempy_labs/report/_report_functions.py +581 -0
- sempy_labs/report/_report_helper.py +227 -0
- sempy_labs/report/_report_list_functions.py +110 -0
- sempy_labs/report/_report_rebind.py +149 -0
- sempy_labs/report/_reportwrapper.py +3100 -0
- sempy_labs/report/_save_report.py +147 -0
- sempy_labs/snowflake_database/__init__.py +10 -0
- sempy_labs/snowflake_database/_items.py +105 -0
- sempy_labs/sql_database/__init__.py +21 -0
- sempy_labs/sql_database/_items.py +201 -0
- sempy_labs/sql_database/_mirroring.py +79 -0
- sempy_labs/theme/__init__.py +12 -0
- sempy_labs/theme/_org_themes.py +129 -0
- sempy_labs/tom/__init__.py +3 -0
- sempy_labs/tom/_model.py +5977 -0
- sempy_labs/variable_library/__init__.py +19 -0
- sempy_labs/variable_library/_functions.py +403 -0
- sempy_labs/warehouse/__init__.py +28 -0
- sempy_labs/warehouse/_items.py +234 -0
- sempy_labs/warehouse/_restore_points.py +309 -0
sempy_labs/_vertipaq.py
ADDED
|
@@ -0,0 +1,1021 @@
|
|
|
1
|
+
import sempy.fabric as fabric
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from IPython.display import display, HTML
|
|
4
|
+
import zipfile
|
|
5
|
+
import os
|
|
6
|
+
import shutil
|
|
7
|
+
import datetime
|
|
8
|
+
import warnings
|
|
9
|
+
from sempy_labs._helper_functions import (
|
|
10
|
+
format_dax_object_name,
|
|
11
|
+
save_as_delta_table,
|
|
12
|
+
resolve_workspace_capacity,
|
|
13
|
+
_get_column_aggregate,
|
|
14
|
+
resolve_workspace_name_and_id,
|
|
15
|
+
resolve_dataset_name_and_id,
|
|
16
|
+
_create_spark_session,
|
|
17
|
+
resolve_workspace_id,
|
|
18
|
+
resolve_workspace_name,
|
|
19
|
+
)
|
|
20
|
+
from sempy_labs._list_functions import list_relationships, list_tables
|
|
21
|
+
from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
|
|
22
|
+
from typing import Optional
|
|
23
|
+
from sempy._utils._log import log
|
|
24
|
+
import sempy_labs._icons as icons
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from uuid import UUID
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@log
|
|
30
|
+
def vertipaq_analyzer(
|
|
31
|
+
dataset: str | UUID,
|
|
32
|
+
workspace: Optional[str | UUID] = None,
|
|
33
|
+
export: Optional[str] = None,
|
|
34
|
+
read_stats_from_data: bool = False,
|
|
35
|
+
**kwargs,
|
|
36
|
+
) -> dict[str, pd.DataFrame]:
|
|
37
|
+
"""
|
|
38
|
+
Displays an HTML visualization of the `Vertipaq Analyzer <https://www.sqlbi.com/tools/vertipaq-analyzer/>`_ statistics from a semantic model.
|
|
39
|
+
|
|
40
|
+
`Vertipaq Analyzer <https://www.sqlbi.com/tools/vertipaq-analyzer/>`_ is an open-sourced tool built by SQLBI. It provides a detailed analysis of the VertiPaq engine, which is the in-memory engine used by Power BI and Analysis Services Tabular models.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
dataset : str | uuid.UUID
|
|
45
|
+
Name or ID of the semantic model.
|
|
46
|
+
workspace : str| uuid.UUID, default=None
|
|
47
|
+
The Fabric workspace name or ID in which the semantic model exists.
|
|
48
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
49
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
50
|
+
export : str, default=None
|
|
51
|
+
Specifying 'zip' will export the results to a zip file in your lakehouse (which can be imported using the import_vertipaq_analyzer function.
|
|
52
|
+
Specifying 'table' will export the results to delta tables (appended) in your lakehouse.
|
|
53
|
+
Default value: None.
|
|
54
|
+
read_stats_from_data : bool, default=False
|
|
55
|
+
Setting this parameter to true has the function get Column Cardinality and Missing Rows using DAX (Direct Lake semantic models achieve this using a Spark query to the lakehouse).
|
|
56
|
+
|
|
57
|
+
Returns
|
|
58
|
+
-------
|
|
59
|
+
dict[str, pandas.DataFrame]
|
|
60
|
+
A dictionary of pandas dataframes showing the vertipaq analyzer statistics.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
from sempy_labs.tom import connect_semantic_model
|
|
64
|
+
|
|
65
|
+
if "lakehouse_workspace" in kwargs:
|
|
66
|
+
print(
|
|
67
|
+
f"{icons.info} The 'lakehouse_workspace' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
|
|
68
|
+
)
|
|
69
|
+
del kwargs["lakehouse_workspace"]
|
|
70
|
+
|
|
71
|
+
pd.options.mode.copy_on_write = True
|
|
72
|
+
warnings.filterwarnings(
|
|
73
|
+
"ignore", message="createDataFrame attempted Arrow optimization*"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
77
|
+
(dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
|
|
78
|
+
|
|
79
|
+
fabric.refresh_tom_cache(workspace=workspace)
|
|
80
|
+
|
|
81
|
+
vertipaq_map = {
|
|
82
|
+
"Model": {
|
|
83
|
+
"Dataset Name": [icons.data_type_string, icons.no_format],
|
|
84
|
+
"Total Size": [icons.data_type_long, icons.int_format],
|
|
85
|
+
"Table Count": [icons.data_type_long, icons.int_format],
|
|
86
|
+
"Column Count": [icons.data_type_long, icons.int_format],
|
|
87
|
+
"Compatibility Level": [icons.data_type_long, icons.no_format],
|
|
88
|
+
"Default Mode": [icons.data_type_string, icons.no_format],
|
|
89
|
+
},
|
|
90
|
+
"Tables": {
|
|
91
|
+
"Table Name": [icons.data_type_string, icons.no_format],
|
|
92
|
+
"Type": [icons.data_type_string, icons.no_format],
|
|
93
|
+
"Row Count": [icons.data_type_long, icons.int_format],
|
|
94
|
+
"Total Size": [icons.data_type_long, icons.int_format],
|
|
95
|
+
"Dictionary Size": [icons.data_type_long, icons.int_format],
|
|
96
|
+
"Data Size": [icons.data_type_long, icons.int_format],
|
|
97
|
+
"Hierarchy Size": [icons.data_type_long, icons.int_format],
|
|
98
|
+
"Relationship Size": [icons.data_type_long, icons.int_format],
|
|
99
|
+
"User Hierarchy Size": [icons.data_type_long, icons.int_format],
|
|
100
|
+
"Partitions": [icons.data_type_long, icons.int_format],
|
|
101
|
+
"Columns": [icons.data_type_long, icons.int_format],
|
|
102
|
+
"% DB": [icons.data_type_double, icons.pct_format],
|
|
103
|
+
},
|
|
104
|
+
"Partitions": {
|
|
105
|
+
"Table Name": [icons.data_type_string, icons.no_format],
|
|
106
|
+
"Partition Name": [icons.data_type_string, icons.no_format],
|
|
107
|
+
"Mode": [icons.data_type_string, icons.no_format],
|
|
108
|
+
"Record Count": [icons.data_type_long, icons.int_format],
|
|
109
|
+
"Segment Count": [icons.data_type_long, icons.int_format],
|
|
110
|
+
"Records per Segment": [icons.data_type_double, icons.int_format],
|
|
111
|
+
},
|
|
112
|
+
"Columns": {
|
|
113
|
+
"Table Name": [icons.data_type_string, icons.no_format],
|
|
114
|
+
"Column Name": [icons.data_type_string, icons.no_format],
|
|
115
|
+
"Type": [icons.data_type_string, icons.no_format],
|
|
116
|
+
"Cardinality": [icons.data_type_long, icons.int_format],
|
|
117
|
+
"Total Size": [icons.data_type_long, icons.int_format],
|
|
118
|
+
"Data Size": [icons.data_type_long, icons.int_format],
|
|
119
|
+
"Dictionary Size": [icons.data_type_long, icons.int_format],
|
|
120
|
+
"Hierarchy Size": [icons.data_type_long, icons.int_format],
|
|
121
|
+
"% Table": [icons.data_type_double, icons.pct_format],
|
|
122
|
+
"% DB": [icons.data_type_double, icons.pct_format],
|
|
123
|
+
"Data Type": [icons.data_type_string, icons.no_format],
|
|
124
|
+
"Encoding": [icons.data_type_string, icons.no_format],
|
|
125
|
+
"Is Resident": [icons.data_type_bool, icons.no_format],
|
|
126
|
+
"Temperature": [icons.data_type_double, icons.int_format],
|
|
127
|
+
"Last Accessed": [icons.data_type_timestamp, icons.no_format],
|
|
128
|
+
},
|
|
129
|
+
"Hierarchies": {
|
|
130
|
+
"Table Name": [icons.data_type_string, icons.no_format],
|
|
131
|
+
"Hierarchy Name": [icons.data_type_string, icons.no_format],
|
|
132
|
+
"Used Size": [icons.data_type_long, icons.int_format],
|
|
133
|
+
},
|
|
134
|
+
"Relationships": {
|
|
135
|
+
"From Object": [icons.data_type_string, icons.no_format],
|
|
136
|
+
"To Object": [icons.data_type_string, icons.no_format],
|
|
137
|
+
"Multiplicity": [icons.data_type_string, icons.no_format],
|
|
138
|
+
"Used Size": [icons.data_type_long, icons.int_format],
|
|
139
|
+
"Max From Cardinality": [icons.data_type_long, icons.int_format],
|
|
140
|
+
"Max To Cardinality": [icons.data_type_long, icons.int_format],
|
|
141
|
+
"Missing Rows": [icons.data_type_long, icons.int_format],
|
|
142
|
+
},
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
with connect_semantic_model(
|
|
146
|
+
dataset=dataset_id, workspace=workspace_id, readonly=True
|
|
147
|
+
) as tom:
|
|
148
|
+
compat_level = tom.model.Model.Database.CompatibilityLevel
|
|
149
|
+
is_direct_lake = tom.is_direct_lake()
|
|
150
|
+
def_mode = tom.model.DefaultMode
|
|
151
|
+
table_count = tom.model.Tables.Count
|
|
152
|
+
column_count = len(list(tom.all_columns()))
|
|
153
|
+
if table_count == 0:
|
|
154
|
+
print(
|
|
155
|
+
f"{icons.warning} The '{dataset_name}' semantic model within the '{workspace_name}' workspace has no tables. Vertipaq Analyzer can only be run if the semantic model has tables."
|
|
156
|
+
)
|
|
157
|
+
return
|
|
158
|
+
|
|
159
|
+
dfT = list_tables(dataset=dataset_id, extended=True, workspace=workspace_id)
|
|
160
|
+
|
|
161
|
+
dfT.rename(columns={"Name": "Table Name"}, inplace=True)
|
|
162
|
+
columns_to_keep = list(vertipaq_map["Tables"].keys())
|
|
163
|
+
dfT = dfT[dfT.columns.intersection(columns_to_keep)]
|
|
164
|
+
|
|
165
|
+
dfC = fabric.list_columns(dataset=dataset_id, extended=True, workspace=workspace_id)
|
|
166
|
+
dfC["Column Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"])
|
|
167
|
+
dfC.rename(columns={"Column Cardinality": "Cardinality"}, inplace=True)
|
|
168
|
+
dfH = fabric.list_hierarchies(
|
|
169
|
+
dataset=dataset_id, extended=True, workspace=workspace_id
|
|
170
|
+
)
|
|
171
|
+
dfR = list_relationships(dataset=dataset_id, extended=True, workspace=workspace_id)
|
|
172
|
+
dfP = fabric.list_partitions(
|
|
173
|
+
dataset=dataset_id, extended=True, workspace=workspace_id
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
artifact_type = None
|
|
177
|
+
lakehouse_workspace_id = None
|
|
178
|
+
lakehouse_name = None
|
|
179
|
+
# if is_direct_lake:
|
|
180
|
+
# artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
|
|
181
|
+
# get_direct_lake_source(dataset=dataset_id, workspace=workspace_id)
|
|
182
|
+
# )
|
|
183
|
+
|
|
184
|
+
dfR["Missing Rows"] = 0
|
|
185
|
+
dfR["Missing Rows"] = dfR["Missing Rows"].astype(int)
|
|
186
|
+
|
|
187
|
+
# Direct Lake
|
|
188
|
+
if read_stats_from_data:
|
|
189
|
+
if is_direct_lake and artifact_type == "Lakehouse":
|
|
190
|
+
dfC = pd.merge(
|
|
191
|
+
dfC,
|
|
192
|
+
dfP[["Table Name", "Query", "Source Type"]],
|
|
193
|
+
on="Table Name",
|
|
194
|
+
how="left",
|
|
195
|
+
)
|
|
196
|
+
dfC_flt = dfC[
|
|
197
|
+
(dfC["Source Type"] == "Entity")
|
|
198
|
+
& (~dfC["Column Name"].str.startswith("RowNumber-"))
|
|
199
|
+
]
|
|
200
|
+
|
|
201
|
+
object_workspace = resolve_workspace_name(
|
|
202
|
+
workspace_id=lakehouse_workspace_id
|
|
203
|
+
)
|
|
204
|
+
current_workspace_id = resolve_workspace_id()
|
|
205
|
+
if current_workspace_id != lakehouse_workspace_id:
|
|
206
|
+
lakeTables = get_lakehouse_tables(
|
|
207
|
+
lakehouse=lakehouse_name, workspace=object_workspace
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
sql_statements = []
|
|
211
|
+
spark = _create_spark_session()
|
|
212
|
+
# Loop through tables
|
|
213
|
+
for lakeTName in dfC_flt["Query"].unique():
|
|
214
|
+
query = "SELECT "
|
|
215
|
+
columns_in_table = dfC_flt.loc[
|
|
216
|
+
dfC_flt["Query"] == lakeTName, "Source"
|
|
217
|
+
].unique()
|
|
218
|
+
|
|
219
|
+
# Loop through columns within those tables
|
|
220
|
+
for scName in columns_in_table:
|
|
221
|
+
query = query + f"COUNT(DISTINCT(`{scName}`)) AS `{scName}`, "
|
|
222
|
+
|
|
223
|
+
query = query[:-2]
|
|
224
|
+
if lakehouse_workspace_id == current_workspace_id:
|
|
225
|
+
query = query + f" FROM {lakehouse_name}.{lakeTName}"
|
|
226
|
+
else:
|
|
227
|
+
lakeTables_filt = lakeTables[lakeTables["Table Name"] == lakeTName]
|
|
228
|
+
tPath = lakeTables_filt["Location"].iloc[0]
|
|
229
|
+
|
|
230
|
+
df = spark.read.format("delta").load(tPath)
|
|
231
|
+
tempTableName = "delta_table_" + lakeTName
|
|
232
|
+
df.createOrReplaceTempView(tempTableName)
|
|
233
|
+
query = query + f" FROM {tempTableName}"
|
|
234
|
+
sql_statements.append((lakeTName, query))
|
|
235
|
+
|
|
236
|
+
for o in sql_statements:
|
|
237
|
+
tName = o[0]
|
|
238
|
+
query = o[1]
|
|
239
|
+
|
|
240
|
+
df = spark.sql(query)
|
|
241
|
+
|
|
242
|
+
for column in df.columns:
|
|
243
|
+
x = df.collect()[0][column]
|
|
244
|
+
for i, r in dfC.iterrows():
|
|
245
|
+
if r["Query"] == tName and r["Source"] == column:
|
|
246
|
+
dfC.at[i, "Cardinality"] = x
|
|
247
|
+
|
|
248
|
+
# Remove column added temporarily
|
|
249
|
+
dfC.drop(columns=["Query", "Source Type"], inplace=True)
|
|
250
|
+
|
|
251
|
+
# Direct Lake missing rows
|
|
252
|
+
dfR = pd.merge(
|
|
253
|
+
dfR,
|
|
254
|
+
dfP[["Table Name", "Query"]],
|
|
255
|
+
left_on="From Table",
|
|
256
|
+
right_on="Table Name",
|
|
257
|
+
how="left",
|
|
258
|
+
)
|
|
259
|
+
dfR.rename(columns={"Query": "From Lake Table"}, inplace=True)
|
|
260
|
+
dfR.drop(columns=["Table Name"], inplace=True)
|
|
261
|
+
dfR = pd.merge(
|
|
262
|
+
dfR,
|
|
263
|
+
dfP[["Table Name", "Query"]],
|
|
264
|
+
left_on="To Table",
|
|
265
|
+
right_on="Table Name",
|
|
266
|
+
how="left",
|
|
267
|
+
)
|
|
268
|
+
dfR.rename(columns={"Query": "To Lake Table"}, inplace=True)
|
|
269
|
+
dfR.drop(columns=["Table Name"], inplace=True)
|
|
270
|
+
dfR = pd.merge(
|
|
271
|
+
dfR,
|
|
272
|
+
dfC[["Column Object", "Source"]],
|
|
273
|
+
left_on="From Object",
|
|
274
|
+
right_on="Column Object",
|
|
275
|
+
how="left",
|
|
276
|
+
)
|
|
277
|
+
dfR.rename(columns={"Source": "From Lake Column"}, inplace=True)
|
|
278
|
+
dfR.drop(columns=["Column Object"], inplace=True)
|
|
279
|
+
dfR = pd.merge(
|
|
280
|
+
dfR,
|
|
281
|
+
dfC[["Column Object", "Source"]],
|
|
282
|
+
left_on="To Object",
|
|
283
|
+
right_on="Column Object",
|
|
284
|
+
how="left",
|
|
285
|
+
)
|
|
286
|
+
dfR.rename(columns={"Source": "To Lake Column"}, inplace=True)
|
|
287
|
+
dfR.drop(columns=["Column Object"], inplace=True)
|
|
288
|
+
|
|
289
|
+
spark = _create_spark_session()
|
|
290
|
+
for i, r in dfR.iterrows():
|
|
291
|
+
fromTable = r["From Lake Table"]
|
|
292
|
+
fromColumn = r["From Lake Column"]
|
|
293
|
+
toTable = r["To Lake Table"]
|
|
294
|
+
toColumn = r["To Lake Column"]
|
|
295
|
+
|
|
296
|
+
if lakehouse_workspace_id == current_workspace_id:
|
|
297
|
+
query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {fromTable} as f\nleft join {toTable} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null"
|
|
298
|
+
else:
|
|
299
|
+
tempTableFrom = f"delta_table_{fromTable}"
|
|
300
|
+
tempTableTo = f"delta_table_{toTable}"
|
|
301
|
+
|
|
302
|
+
query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {tempTableFrom} as f\nleft join {tempTableTo} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null"
|
|
303
|
+
|
|
304
|
+
# query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {fromTable} as f\nleft join {toTable} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null"
|
|
305
|
+
|
|
306
|
+
df = spark.sql(query)
|
|
307
|
+
missingRows = df.collect()[0][0]
|
|
308
|
+
dfR.at[i, "Missing Rows"] = missingRows
|
|
309
|
+
|
|
310
|
+
dfR["Missing Rows"] = dfR["Missing Rows"].astype(int)
|
|
311
|
+
elif not is_direct_lake:
|
|
312
|
+
# Calculate missing rows using DAX for non-direct lake
|
|
313
|
+
for i, r in dfR.iterrows():
|
|
314
|
+
fromTable = r["From Table"]
|
|
315
|
+
fromColumn = r["From Column"]
|
|
316
|
+
toTable = r["To Table"]
|
|
317
|
+
toColumn = r["To Column"]
|
|
318
|
+
isActive = bool(r["Active"])
|
|
319
|
+
fromObject = format_dax_object_name(fromTable, fromColumn)
|
|
320
|
+
toObject = format_dax_object_name(toTable, toColumn)
|
|
321
|
+
|
|
322
|
+
missingRows = 0
|
|
323
|
+
|
|
324
|
+
query = f"evaluate\nsummarizecolumns(\n\"1\",calculate(countrows('{fromTable}'),isblank({toObject}))\n)"
|
|
325
|
+
|
|
326
|
+
if not isActive:
|
|
327
|
+
query = f"evaluate\nsummarizecolumns(\n\"1\",calculate(countrows('{fromTable}'),userelationship({fromObject},{toObject}),isblank({toObject}))\n)"
|
|
328
|
+
|
|
329
|
+
result = fabric.evaluate_dax(
|
|
330
|
+
dataset=dataset_id, dax_string=query, workspace=workspace_id
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
try:
|
|
334
|
+
missingRows = result.iloc[0, 0]
|
|
335
|
+
except Exception:
|
|
336
|
+
pass
|
|
337
|
+
|
|
338
|
+
dfR.at[i, "Missing Rows"] = missingRows
|
|
339
|
+
dfR["Missing Rows"] = dfR["Missing Rows"].astype(int)
|
|
340
|
+
|
|
341
|
+
table_totals = dfC.groupby("Table Name")["Total Size"].transform("sum")
|
|
342
|
+
db_total_size = dfC["Total Size"].sum()
|
|
343
|
+
dfC["% Table"] = round((dfC["Total Size"] / table_totals) * 100, 2)
|
|
344
|
+
dfC["% DB"] = round((dfC["Total Size"] / db_total_size) * 100, 2)
|
|
345
|
+
columnList = list(vertipaq_map["Columns"].keys())
|
|
346
|
+
|
|
347
|
+
dfC = dfC[dfC["Type"] != "RowNumber"].reset_index(drop=True)
|
|
348
|
+
|
|
349
|
+
colSize = dfC[columnList].sort_values(by="Total Size", ascending=False)
|
|
350
|
+
temp = dfC[columnList].sort_values(by="Temperature", ascending=False)
|
|
351
|
+
colSize.reset_index(drop=True, inplace=True)
|
|
352
|
+
temp.reset_index(drop=True, inplace=True)
|
|
353
|
+
|
|
354
|
+
export_Col = colSize.copy()
|
|
355
|
+
export_Table = dfT.copy()
|
|
356
|
+
|
|
357
|
+
# Relationships
|
|
358
|
+
dfR = pd.merge(
|
|
359
|
+
dfR,
|
|
360
|
+
dfC[["Column Object", "Cardinality"]],
|
|
361
|
+
left_on="From Object",
|
|
362
|
+
right_on="Column Object",
|
|
363
|
+
how="left",
|
|
364
|
+
)
|
|
365
|
+
dfR.rename(columns={"Cardinality": "Max From Cardinality"}, inplace=True)
|
|
366
|
+
dfR = pd.merge(
|
|
367
|
+
dfR,
|
|
368
|
+
dfC[["Column Object", "Cardinality"]],
|
|
369
|
+
left_on="To Object",
|
|
370
|
+
right_on="Column Object",
|
|
371
|
+
how="left",
|
|
372
|
+
)
|
|
373
|
+
dfR.rename(columns={"Cardinality": "Max To Cardinality"}, inplace=True)
|
|
374
|
+
dfR = dfR[
|
|
375
|
+
[
|
|
376
|
+
"From Object",
|
|
377
|
+
"To Object",
|
|
378
|
+
"Multiplicity",
|
|
379
|
+
"Used Size",
|
|
380
|
+
"Max From Cardinality",
|
|
381
|
+
"Max To Cardinality",
|
|
382
|
+
"Missing Rows",
|
|
383
|
+
]
|
|
384
|
+
].sort_values(by="Used Size", ascending=False)
|
|
385
|
+
dfR.reset_index(drop=True, inplace=True)
|
|
386
|
+
export_Rel = dfR.copy()
|
|
387
|
+
|
|
388
|
+
# Partitions
|
|
389
|
+
dfP = dfP[
|
|
390
|
+
[
|
|
391
|
+
"Table Name",
|
|
392
|
+
"Partition Name",
|
|
393
|
+
"Mode",
|
|
394
|
+
"Record Count",
|
|
395
|
+
"Segment Count",
|
|
396
|
+
# "Records per Segment",
|
|
397
|
+
]
|
|
398
|
+
].sort_values(by="Record Count", ascending=False)
|
|
399
|
+
dfP["Records per Segment"] = round(
|
|
400
|
+
dfP["Record Count"] / dfP["Segment Count"], 2
|
|
401
|
+
) # Remove after records per segment is fixed
|
|
402
|
+
dfP.reset_index(drop=True, inplace=True)
|
|
403
|
+
export_Part = dfP.copy()
|
|
404
|
+
|
|
405
|
+
# Hierarchies
|
|
406
|
+
dfH_filt = dfH[dfH["Level Ordinal"] == 0]
|
|
407
|
+
dfH_filt = dfH_filt[["Table Name", "Hierarchy Name", "Used Size"]].sort_values(
|
|
408
|
+
by="Used Size", ascending=False
|
|
409
|
+
)
|
|
410
|
+
dfH_filt.reset_index(drop=True, inplace=True)
|
|
411
|
+
dfH_filt.fillna({"Used Size": 0}, inplace=True)
|
|
412
|
+
dfH_filt["Used Size"] = dfH_filt["Used Size"].astype(int)
|
|
413
|
+
export_Hier = dfH_filt.copy()
|
|
414
|
+
|
|
415
|
+
# Model
|
|
416
|
+
# Converting to KB/MB/GB necessitates division by 1024 * 1000.
|
|
417
|
+
if db_total_size >= 1000000000:
|
|
418
|
+
y = db_total_size / (1024**3) * 1000000000
|
|
419
|
+
elif db_total_size >= 1000000:
|
|
420
|
+
y = db_total_size / (1024**2) * 1000000
|
|
421
|
+
elif db_total_size >= 1000:
|
|
422
|
+
y = db_total_size / (1024) * 1000
|
|
423
|
+
else:
|
|
424
|
+
y = db_total_size
|
|
425
|
+
y = round(y)
|
|
426
|
+
|
|
427
|
+
dfModel = pd.DataFrame(
|
|
428
|
+
{
|
|
429
|
+
"Dataset Name": dataset_name,
|
|
430
|
+
"Total Size": y,
|
|
431
|
+
"Table Count": table_count,
|
|
432
|
+
"Column Count": column_count,
|
|
433
|
+
"Compatibility Level": compat_level,
|
|
434
|
+
"Default Mode": def_mode,
|
|
435
|
+
},
|
|
436
|
+
index=[0],
|
|
437
|
+
)
|
|
438
|
+
dfModel.reset_index(drop=True, inplace=True)
|
|
439
|
+
dfModel["Default Mode"] = dfModel["Default Mode"].astype(str)
|
|
440
|
+
export_Model = dfModel.copy()
|
|
441
|
+
|
|
442
|
+
def _style_columns_based_on_types(dataframe: pd.DataFrame, column_type_mapping):
|
|
443
|
+
# Define formatting functions based on the type mappings
|
|
444
|
+
format_funcs = {
|
|
445
|
+
"int": lambda x: "{:,}".format(x) if pd.notnull(x) else "",
|
|
446
|
+
"pct": lambda x: "{:.2f}%".format(x) if pd.notnull(x) else "",
|
|
447
|
+
"": lambda x: "{}".format(x),
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
# Apply the formatting function to each column based on its specified type
|
|
451
|
+
for col, dt in column_type_mapping.items():
|
|
452
|
+
if dt in format_funcs:
|
|
453
|
+
dataframe[col] = dataframe[col].map(format_funcs[dt])
|
|
454
|
+
|
|
455
|
+
return dataframe
|
|
456
|
+
|
|
457
|
+
dfModel = _style_columns_based_on_types(
|
|
458
|
+
dfModel,
|
|
459
|
+
column_type_mapping={
|
|
460
|
+
key: values[1] for key, values in vertipaq_map["Model"].items()
|
|
461
|
+
},
|
|
462
|
+
)
|
|
463
|
+
dfT = _style_columns_based_on_types(
|
|
464
|
+
dfT,
|
|
465
|
+
column_type_mapping={
|
|
466
|
+
key: values[1] for key, values in vertipaq_map["Tables"].items()
|
|
467
|
+
},
|
|
468
|
+
)
|
|
469
|
+
dfP = _style_columns_based_on_types(
|
|
470
|
+
dfP,
|
|
471
|
+
column_type_mapping={
|
|
472
|
+
key: values[1] for key, values in vertipaq_map["Partitions"].items()
|
|
473
|
+
},
|
|
474
|
+
)
|
|
475
|
+
colSize = _style_columns_based_on_types(
|
|
476
|
+
colSize,
|
|
477
|
+
column_type_mapping={
|
|
478
|
+
key: values[1] for key, values in vertipaq_map["Columns"].items()
|
|
479
|
+
},
|
|
480
|
+
)
|
|
481
|
+
temp = _style_columns_based_on_types(
|
|
482
|
+
temp,
|
|
483
|
+
column_type_mapping={
|
|
484
|
+
key: values[1] for key, values in vertipaq_map["Columns"].items()
|
|
485
|
+
},
|
|
486
|
+
)
|
|
487
|
+
dfR = _style_columns_based_on_types(
|
|
488
|
+
dfR,
|
|
489
|
+
column_type_mapping={
|
|
490
|
+
key: values[1] for key, values in vertipaq_map["Relationships"].items()
|
|
491
|
+
},
|
|
492
|
+
)
|
|
493
|
+
dfH_filt = _style_columns_based_on_types(
|
|
494
|
+
dfH_filt,
|
|
495
|
+
column_type_mapping={
|
|
496
|
+
key: values[1] for key, values in vertipaq_map["Hierarchies"].items()
|
|
497
|
+
},
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
dataFrames = {
|
|
501
|
+
"dfModel": dfModel,
|
|
502
|
+
"dfT": dfT,
|
|
503
|
+
"dfP": dfP,
|
|
504
|
+
"colSize": colSize,
|
|
505
|
+
"temp": temp,
|
|
506
|
+
"dfR": dfR,
|
|
507
|
+
"dfH_filt": dfH_filt,
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
dfs = {}
|
|
511
|
+
for fileName, df in dataFrames.items():
|
|
512
|
+
dfs[fileName] = df
|
|
513
|
+
|
|
514
|
+
if export is None:
|
|
515
|
+
visualize_vertipaq(dfs)
|
|
516
|
+
return {
|
|
517
|
+
"Model Summary": export_Model,
|
|
518
|
+
"Tables": export_Table,
|
|
519
|
+
"Partitions": export_Part,
|
|
520
|
+
"Columns": export_Col,
|
|
521
|
+
"Relationships": export_Rel,
|
|
522
|
+
"Hierarchies": export_Hier,
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
# Export vertipaq to delta tables in lakehouse
|
|
526
|
+
if export in ["table", "zip"]:
|
|
527
|
+
if not lakehouse_attached():
|
|
528
|
+
raise ValueError(
|
|
529
|
+
f"{icons.red_dot} In order to save the Vertipaq Analyzer results, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
if export == "table":
|
|
533
|
+
lakeTName = "vertipaqanalyzer_model"
|
|
534
|
+
|
|
535
|
+
lakeT = get_lakehouse_tables()
|
|
536
|
+
lakeT_filt = lakeT[lakeT["Table Name"] == lakeTName]
|
|
537
|
+
|
|
538
|
+
if len(lakeT_filt) == 0:
|
|
539
|
+
runId = 1
|
|
540
|
+
else:
|
|
541
|
+
max_run_id = _get_column_aggregate(table_name=lakeTName)
|
|
542
|
+
runId = max_run_id + 1
|
|
543
|
+
|
|
544
|
+
dfMap = {
|
|
545
|
+
"Columns": ["Columns", export_Col],
|
|
546
|
+
"Tables": ["Tables", export_Table],
|
|
547
|
+
"Partitions": ["Partitions", export_Part],
|
|
548
|
+
"Relationships": ["Relationships", export_Rel],
|
|
549
|
+
"Hierarchies": ["Hierarchies", export_Hier],
|
|
550
|
+
"Model": ["Model", export_Model],
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
print(
|
|
554
|
+
f"{icons.in_progress} Saving Vertipaq Analyzer to delta tables in the lakehouse...\n"
|
|
555
|
+
)
|
|
556
|
+
now = datetime.datetime.now()
|
|
557
|
+
dfD = fabric.list_datasets(workspace=workspace_id, mode="rest")
|
|
558
|
+
dfD_filt = dfD[dfD["Dataset Id"] == dataset_id]
|
|
559
|
+
configured_by = dfD_filt["Configured By"].iloc[0]
|
|
560
|
+
capacity_id, capacity_name = resolve_workspace_capacity(workspace=workspace_id)
|
|
561
|
+
|
|
562
|
+
for key_name, (obj, df) in dfMap.items():
|
|
563
|
+
df["Capacity Name"] = capacity_name
|
|
564
|
+
df["Capacity Id"] = capacity_id
|
|
565
|
+
df["Configured By"] = configured_by
|
|
566
|
+
df["Workspace Name"] = workspace_name
|
|
567
|
+
df["Workspace Id"] = workspace_id
|
|
568
|
+
df["Dataset Name"] = dataset_name
|
|
569
|
+
df["Dataset Id"] = dataset_id
|
|
570
|
+
df["RunId"] = runId
|
|
571
|
+
df["Timestamp"] = now
|
|
572
|
+
|
|
573
|
+
colName = "Capacity Name"
|
|
574
|
+
df.insert(0, colName, df.pop(colName))
|
|
575
|
+
colName = "Capacity Id"
|
|
576
|
+
df.insert(1, colName, df.pop(colName))
|
|
577
|
+
colName = "Workspace Name"
|
|
578
|
+
df.insert(2, colName, df.pop(colName))
|
|
579
|
+
colName = "Workspace Id"
|
|
580
|
+
df.insert(3, colName, df.pop(colName))
|
|
581
|
+
colName = "Dataset Name"
|
|
582
|
+
df.insert(4, colName, df.pop(colName))
|
|
583
|
+
colName = "Dataset Id"
|
|
584
|
+
df.insert(5, colName, df.pop(colName))
|
|
585
|
+
colName = "Configured By"
|
|
586
|
+
df.insert(6, colName, df.pop(colName))
|
|
587
|
+
|
|
588
|
+
df.columns = df.columns.str.replace(" ", "_")
|
|
589
|
+
|
|
590
|
+
schema = {
|
|
591
|
+
"Capacity_Name": icons.data_type_string,
|
|
592
|
+
"Capacity_Id": icons.data_type_string,
|
|
593
|
+
"Workspace_Name": icons.data_type_string,
|
|
594
|
+
"Workspace_Id": icons.data_type_string,
|
|
595
|
+
"Dataset_Name": icons.data_type_string,
|
|
596
|
+
"Dataset_Id": icons.data_type_string,
|
|
597
|
+
"Configured_By": icons.data_type_string,
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
schema.update(
|
|
601
|
+
{
|
|
602
|
+
key.replace(" ", "_"): value[0]
|
|
603
|
+
for key, value in vertipaq_map[key_name].items()
|
|
604
|
+
}
|
|
605
|
+
)
|
|
606
|
+
schema["RunId"] = icons.data_type_long
|
|
607
|
+
schema["Timestamp"] = icons.data_type_timestamp
|
|
608
|
+
|
|
609
|
+
delta_table_name = f"VertipaqAnalyzer_{obj}".lower()
|
|
610
|
+
save_as_delta_table(
|
|
611
|
+
dataframe=df,
|
|
612
|
+
delta_table_name=delta_table_name,
|
|
613
|
+
write_mode="append",
|
|
614
|
+
schema=schema,
|
|
615
|
+
merge_schema=True,
|
|
616
|
+
)
|
|
617
|
+
|
|
618
|
+
# Export vertipaq to zip file within the lakehouse
|
|
619
|
+
if export == "zip":
|
|
620
|
+
dataFrames = {
|
|
621
|
+
"dfModel": dfModel,
|
|
622
|
+
"dfT": dfT,
|
|
623
|
+
"dfP": dfP,
|
|
624
|
+
"colSize": colSize,
|
|
625
|
+
"temp": temp,
|
|
626
|
+
"dfR": dfR,
|
|
627
|
+
"dfH_filt": dfH_filt,
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
zipFileName = f"{workspace_name}.{dataset_name}.zip"
|
|
631
|
+
|
|
632
|
+
folderPath = "/lakehouse/default/Files"
|
|
633
|
+
subFolderPath = os.path.join(folderPath, "VertipaqAnalyzer")
|
|
634
|
+
ext = ".csv"
|
|
635
|
+
if not os.path.exists(subFolderPath):
|
|
636
|
+
os.makedirs(subFolderPath, exist_ok=True)
|
|
637
|
+
zipFilePath = os.path.join(subFolderPath, zipFileName)
|
|
638
|
+
|
|
639
|
+
# Create CSV files based on dataframes
|
|
640
|
+
for fileName, df in dataFrames.items():
|
|
641
|
+
filePath = os.path.join(subFolderPath, f"{fileName}{ext}")
|
|
642
|
+
df.to_csv(filePath, index=False)
|
|
643
|
+
|
|
644
|
+
# Create a zip file and add CSV files to it
|
|
645
|
+
with zipfile.ZipFile(zipFilePath, "w") as zipf:
|
|
646
|
+
for fileName in dataFrames:
|
|
647
|
+
filePath = os.path.join(subFolderPath, f"{fileName}{ext}")
|
|
648
|
+
zipf.write(filePath, os.path.basename(filePath))
|
|
649
|
+
|
|
650
|
+
# Clean up: remove the individual CSV files
|
|
651
|
+
for fileName, df in dataFrames.items():
|
|
652
|
+
filePath = os.path.join(subFolderPath, fileName) + ext
|
|
653
|
+
if os.path.exists(filePath):
|
|
654
|
+
os.remove(filePath)
|
|
655
|
+
print(
|
|
656
|
+
f"{icons.green_dot} The Vertipaq Analyzer info for the '{dataset_name}' semantic model in the '{workspace_name}' workspace has been saved "
|
|
657
|
+
f"to the 'Vertipaq Analyzer/{zipFileName}' in the default lakehouse attached to this notebook."
|
|
658
|
+
)
|
|
659
|
+
|
|
660
|
+
|
|
661
|
+
def visualize_vertipaq(dataframes):
|
|
662
|
+
|
|
663
|
+
# Tooltips for columns within the visual
|
|
664
|
+
data = [
|
|
665
|
+
{
|
|
666
|
+
"ViewName": "Model",
|
|
667
|
+
"ColumnName": "Dataset Name",
|
|
668
|
+
"Tooltip": "The name of the semantic model",
|
|
669
|
+
},
|
|
670
|
+
{
|
|
671
|
+
"ViewName": "Model",
|
|
672
|
+
"ColumnName": "Total Size",
|
|
673
|
+
"Tooltip": "The size of the model (in bytes)",
|
|
674
|
+
},
|
|
675
|
+
{
|
|
676
|
+
"ViewName": "Model",
|
|
677
|
+
"ColumnName": "Table Count",
|
|
678
|
+
"Tooltip": "The number of tables in the semantic model",
|
|
679
|
+
},
|
|
680
|
+
{
|
|
681
|
+
"ViewName": "Model",
|
|
682
|
+
"ColumnName": "Column Count",
|
|
683
|
+
"Tooltip": "The number of columns in the semantic model",
|
|
684
|
+
},
|
|
685
|
+
{
|
|
686
|
+
"ViewName": "Model",
|
|
687
|
+
"ColumnName": "Compatibility Level",
|
|
688
|
+
"Tooltip": "The compatibility level of the semantic model",
|
|
689
|
+
},
|
|
690
|
+
{
|
|
691
|
+
"ViewName": "Model",
|
|
692
|
+
"ColumnName": "Default Mode",
|
|
693
|
+
"Tooltip": "The default query mode of the semantic model",
|
|
694
|
+
},
|
|
695
|
+
{
|
|
696
|
+
"ViewName": "Table",
|
|
697
|
+
"ColumnName": "Table Name",
|
|
698
|
+
"Tooltip": "The name of the table",
|
|
699
|
+
},
|
|
700
|
+
{"ViewName": "Table", "ColumnName": "Type", "Tooltip": "The type of table"},
|
|
701
|
+
{
|
|
702
|
+
"ViewName": "Table",
|
|
703
|
+
"ColumnName": "Row Count",
|
|
704
|
+
"Tooltip": "The number of rows in the table",
|
|
705
|
+
},
|
|
706
|
+
{
|
|
707
|
+
"ViewName": "Table",
|
|
708
|
+
"ColumnName": "Total Size",
|
|
709
|
+
"Tooltip": "Data Size + Dictionary Size + Hierarchy Size (in bytes)",
|
|
710
|
+
},
|
|
711
|
+
{
|
|
712
|
+
"ViewName": "Table",
|
|
713
|
+
"ColumnName": "Data Size",
|
|
714
|
+
"Tooltip": "The size of the data for all the columns in this table (in bytes)",
|
|
715
|
+
},
|
|
716
|
+
{
|
|
717
|
+
"ViewName": "Table",
|
|
718
|
+
"ColumnName": "Dictionary Size",
|
|
719
|
+
"Tooltip": "The size of the column's dictionary for all columns in this table (in bytes)",
|
|
720
|
+
},
|
|
721
|
+
{
|
|
722
|
+
"ViewName": "Table",
|
|
723
|
+
"ColumnName": "Hierarchy Size",
|
|
724
|
+
"Tooltip": "The size of hierarchy structures for all columns in this table (in bytes)",
|
|
725
|
+
},
|
|
726
|
+
{
|
|
727
|
+
"ViewName": "Table",
|
|
728
|
+
"ColumnName": "% DB",
|
|
729
|
+
"Tooltip": "The size of the table relative to the size of the semantic model",
|
|
730
|
+
},
|
|
731
|
+
{
|
|
732
|
+
"ViewName": "Table",
|
|
733
|
+
"ColumnName": "Partitions",
|
|
734
|
+
"Tooltip": "The number of partitions in the table",
|
|
735
|
+
},
|
|
736
|
+
{
|
|
737
|
+
"ViewName": "Table",
|
|
738
|
+
"ColumnName": "Columns",
|
|
739
|
+
"Tooltip": "The number of columns in the table",
|
|
740
|
+
},
|
|
741
|
+
{
|
|
742
|
+
"ViewName": "Partition",
|
|
743
|
+
"ColumnName": "Table Name",
|
|
744
|
+
"Tooltip": "The name of the table",
|
|
745
|
+
},
|
|
746
|
+
{
|
|
747
|
+
"ViewName": "Partition",
|
|
748
|
+
"ColumnName": "Partition Name",
|
|
749
|
+
"Tooltip": "The name of the partition within the table",
|
|
750
|
+
},
|
|
751
|
+
{
|
|
752
|
+
"ViewName": "Partition",
|
|
753
|
+
"ColumnName": "Mode",
|
|
754
|
+
"Tooltip": "The query mode of the partition",
|
|
755
|
+
},
|
|
756
|
+
{
|
|
757
|
+
"ViewName": "Partition",
|
|
758
|
+
"ColumnName": "Record Count",
|
|
759
|
+
"Tooltip": "The number of rows in the partition",
|
|
760
|
+
},
|
|
761
|
+
{
|
|
762
|
+
"ViewName": "Partition",
|
|
763
|
+
"ColumnName": "Segment Count",
|
|
764
|
+
"Tooltip": "The number of segments within the partition",
|
|
765
|
+
},
|
|
766
|
+
{
|
|
767
|
+
"ViewName": "Partition",
|
|
768
|
+
"ColumnName": "Records per Segment",
|
|
769
|
+
"Tooltip": "The number of rows per segment",
|
|
770
|
+
},
|
|
771
|
+
{
|
|
772
|
+
"ViewName": "Column",
|
|
773
|
+
"ColumnName": "Table Name",
|
|
774
|
+
"Tooltip": "The name of the table",
|
|
775
|
+
},
|
|
776
|
+
{
|
|
777
|
+
"ViewName": "Column",
|
|
778
|
+
"ColumnName": "Column Name",
|
|
779
|
+
"Tooltip": "The name of the column",
|
|
780
|
+
},
|
|
781
|
+
{
|
|
782
|
+
"ViewName": "Column",
|
|
783
|
+
"ColumnName": "Type",
|
|
784
|
+
"Tooltip": "The type of column",
|
|
785
|
+
},
|
|
786
|
+
{
|
|
787
|
+
"ViewName": "Column",
|
|
788
|
+
"ColumnName": "Cardinality",
|
|
789
|
+
"Tooltip": "The number of unique rows in the column",
|
|
790
|
+
},
|
|
791
|
+
{
|
|
792
|
+
"ViewName": "Column",
|
|
793
|
+
"ColumnName": "Total Size",
|
|
794
|
+
"Tooltip": "Data Size + Dictionary Size + Hierarchy Size (in bytes)",
|
|
795
|
+
},
|
|
796
|
+
{
|
|
797
|
+
"ViewName": "Column",
|
|
798
|
+
"ColumnName": "Data Size",
|
|
799
|
+
"Tooltip": "The size of the data for the column (in bytes)",
|
|
800
|
+
},
|
|
801
|
+
{
|
|
802
|
+
"ViewName": "Column",
|
|
803
|
+
"ColumnName": "Dictionary Size",
|
|
804
|
+
"Tooltip": "The size of the column's dictionary (in bytes)",
|
|
805
|
+
},
|
|
806
|
+
{
|
|
807
|
+
"ViewName": "Column",
|
|
808
|
+
"ColumnName": "Hierarchy Size",
|
|
809
|
+
"Tooltip": "The size of hierarchy structures (in bytes)",
|
|
810
|
+
},
|
|
811
|
+
{
|
|
812
|
+
"ViewName": "Column",
|
|
813
|
+
"ColumnName": "% Table",
|
|
814
|
+
"Tooltip": "The size of the column relative to the size of the table",
|
|
815
|
+
},
|
|
816
|
+
{
|
|
817
|
+
"ViewName": "Column",
|
|
818
|
+
"ColumnName": "% DB",
|
|
819
|
+
"Tooltip": "The size of the column relative to the size of the semantic model",
|
|
820
|
+
},
|
|
821
|
+
{
|
|
822
|
+
"ViewName": "Column",
|
|
823
|
+
"ColumnName": "Data Type",
|
|
824
|
+
"Tooltip": "The data type of the column",
|
|
825
|
+
},
|
|
826
|
+
{
|
|
827
|
+
"ViewName": "Column",
|
|
828
|
+
"ColumnName": "Encoding",
|
|
829
|
+
"Tooltip": "The encoding type for the column",
|
|
830
|
+
},
|
|
831
|
+
{
|
|
832
|
+
"ViewName": "Column",
|
|
833
|
+
"ColumnName": "Is Resident",
|
|
834
|
+
"Tooltip": "Indicates whether the column is in memory or not",
|
|
835
|
+
},
|
|
836
|
+
{
|
|
837
|
+
"ViewName": "Column",
|
|
838
|
+
"ColumnName": "Temperature",
|
|
839
|
+
"Tooltip": "A decimal indicating the frequency and recency of queries against the column",
|
|
840
|
+
},
|
|
841
|
+
{
|
|
842
|
+
"ViewName": "Column",
|
|
843
|
+
"ColumnName": "Last Accessed",
|
|
844
|
+
"Tooltip": "The time the column was last queried",
|
|
845
|
+
},
|
|
846
|
+
{
|
|
847
|
+
"ViewName": "Hierarchy",
|
|
848
|
+
"ColumnName": "Table Name",
|
|
849
|
+
"Tooltip": "The name of the table",
|
|
850
|
+
},
|
|
851
|
+
{
|
|
852
|
+
"ViewName": "Hierarchy",
|
|
853
|
+
"ColumnName": "Hierarchy Name",
|
|
854
|
+
"Tooltip": "The name of the hierarchy",
|
|
855
|
+
},
|
|
856
|
+
{
|
|
857
|
+
"ViewName": "Hierarchy",
|
|
858
|
+
"ColumnName": "Used Size",
|
|
859
|
+
"Tooltip": "The size of user hierarchy structures (in bytes)",
|
|
860
|
+
},
|
|
861
|
+
{
|
|
862
|
+
"ViewName": "Relationship",
|
|
863
|
+
"ColumnName": "From Object",
|
|
864
|
+
"Tooltip": "The from table/column in the relationship",
|
|
865
|
+
},
|
|
866
|
+
{
|
|
867
|
+
"ViewName": "Relationship",
|
|
868
|
+
"ColumnName": "To Object",
|
|
869
|
+
"Tooltip": "The to table/column in the relationship",
|
|
870
|
+
},
|
|
871
|
+
{
|
|
872
|
+
"ViewName": "Relationship",
|
|
873
|
+
"ColumnName": "Multiplicity",
|
|
874
|
+
"Tooltip": "The cardinality on each side of the relationship",
|
|
875
|
+
},
|
|
876
|
+
{
|
|
877
|
+
"ViewName": "Relationship",
|
|
878
|
+
"ColumnName": "Used Size",
|
|
879
|
+
"Tooltip": "The size of the relationship (in bytes)",
|
|
880
|
+
},
|
|
881
|
+
{
|
|
882
|
+
"ViewName": "Relationship",
|
|
883
|
+
"ColumnName": "Max From Cardinality",
|
|
884
|
+
"Tooltip": "The number of unique values in the column used in the from side of the relationship",
|
|
885
|
+
},
|
|
886
|
+
{
|
|
887
|
+
"ViewName": "Relationship",
|
|
888
|
+
"ColumnName": "Max To Cardinality",
|
|
889
|
+
"Tooltip": "The number of unique values in the column used in the to side of the relationship",
|
|
890
|
+
},
|
|
891
|
+
{
|
|
892
|
+
"ViewName": "Relationship",
|
|
893
|
+
"ColumnName": "Missing Rows",
|
|
894
|
+
"Tooltip": "The number of rows in the 'from' table which do not map to the key column in the 'to' table",
|
|
895
|
+
},
|
|
896
|
+
]
|
|
897
|
+
|
|
898
|
+
# Create DataFrame
|
|
899
|
+
tooltipDF = pd.DataFrame(data)
|
|
900
|
+
|
|
901
|
+
# define the dictionary with {"Tab name":df}
|
|
902
|
+
df_dict = {
|
|
903
|
+
"Model Summary": dataframes["dfModel"],
|
|
904
|
+
"Tables": dataframes["dfT"],
|
|
905
|
+
"Partitions": dataframes["dfP"],
|
|
906
|
+
"Columns (Total Size)": dataframes["colSize"],
|
|
907
|
+
"Columns (Temperature)": dataframes["temp"],
|
|
908
|
+
"Relationships": dataframes["dfR"],
|
|
909
|
+
"Hierarchies": dataframes["dfH_filt"],
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
mapping = {
|
|
913
|
+
"Model Summary": "Model",
|
|
914
|
+
"Tables": "Table",
|
|
915
|
+
"Partitions": "Partition",
|
|
916
|
+
"Columns (Total Size)": "Column",
|
|
917
|
+
"Columns (Temperature)": "Column",
|
|
918
|
+
"Relationships": "Relationship",
|
|
919
|
+
"Hierarchies": "Hierarchy",
|
|
920
|
+
}
|
|
921
|
+
|
|
922
|
+
# Basic styles for the tabs and tab content
|
|
923
|
+
styles = """
|
|
924
|
+
<style>
|
|
925
|
+
.tab { overflow: hidden; border: 1px solid #ccc; background-color: #f1f1f1; }
|
|
926
|
+
.tab button { background-color: inherit; float: left; border: none; outline: none; cursor: pointer; padding: 14px 16px; transition: 0.3s; }
|
|
927
|
+
.tab button:hover { background-color: #ddd; }
|
|
928
|
+
.tab button.active { background-color: #ccc; }
|
|
929
|
+
.tabcontent { display: none; padding: 6px 12px; border: 1px solid #ccc; border-top: none; }
|
|
930
|
+
</style>
|
|
931
|
+
"""
|
|
932
|
+
# JavaScript for tab functionality
|
|
933
|
+
script = """
|
|
934
|
+
<script>
|
|
935
|
+
function openTab(evt, tabName) {
|
|
936
|
+
var i, tabcontent, tablinks;
|
|
937
|
+
tabcontent = document.getElementsByClassName("tabcontent");
|
|
938
|
+
for (i = 0; i < tabcontent.length; i++) {
|
|
939
|
+
tabcontent[i].style.display = "none";
|
|
940
|
+
}
|
|
941
|
+
tablinks = document.getElementsByClassName("tablinks");
|
|
942
|
+
for (i = 0; i < tablinks.length; i++) {
|
|
943
|
+
tablinks[i].className = tablinks[i].className.replace(" active", "");
|
|
944
|
+
}
|
|
945
|
+
document.getElementById(tabName).style.display = "block";
|
|
946
|
+
evt.currentTarget.className += " active";
|
|
947
|
+
}
|
|
948
|
+
</script>
|
|
949
|
+
"""
|
|
950
|
+
|
|
951
|
+
# HTML for tabs
|
|
952
|
+
tab_html = '<div class="tab">'
|
|
953
|
+
content_html = ""
|
|
954
|
+
for i, (title, df) in enumerate(df_dict.items()):
|
|
955
|
+
tab_id = f"tab{i}"
|
|
956
|
+
tab_html += f'<button class="tablinks" onclick="openTab(event, \'{tab_id}\')">{title}</button>'
|
|
957
|
+
|
|
958
|
+
vw = mapping.get(title)
|
|
959
|
+
|
|
960
|
+
df_html = df.to_html()
|
|
961
|
+
for col in df.columns:
|
|
962
|
+
tt = None
|
|
963
|
+
try:
|
|
964
|
+
tooltipDF_filt = tooltipDF[
|
|
965
|
+
(tooltipDF["ViewName"] == vw) & (tooltipDF["ColumnName"] == col)
|
|
966
|
+
]
|
|
967
|
+
tt = tooltipDF_filt["Tooltip"].iloc[0]
|
|
968
|
+
except Exception:
|
|
969
|
+
pass
|
|
970
|
+
df_html = df_html.replace(f"<th>{col}</th>", f'<th title="{tt}">{col}</th>')
|
|
971
|
+
content_html += (
|
|
972
|
+
f'<div id="{tab_id}" class="tabcontent"><h3>{title}</h3>{df_html}</div>'
|
|
973
|
+
)
|
|
974
|
+
tab_html += "</div>"
|
|
975
|
+
|
|
976
|
+
# Display the tabs, tab contents, and run the script
|
|
977
|
+
display(HTML(styles + tab_html + content_html + script))
|
|
978
|
+
# Default to open the first tab
|
|
979
|
+
display(
|
|
980
|
+
HTML("<script>document.getElementsByClassName('tablinks')[0].click();</script>")
|
|
981
|
+
)
|
|
982
|
+
|
|
983
|
+
|
|
984
|
+
@log
|
|
985
|
+
def import_vertipaq_analyzer(folder_path: str, file_name: str):
|
|
986
|
+
"""
|
|
987
|
+
Imports and visualizes the vertipaq analyzer info from a saved .zip file in your lakehouse.
|
|
988
|
+
|
|
989
|
+
Parameters
|
|
990
|
+
----------
|
|
991
|
+
folder_path : str
|
|
992
|
+
The folder within your lakehouse in which the .zip file containing the vertipaq analyzer info has been saved.
|
|
993
|
+
file_name : str
|
|
994
|
+
The file name of the file which contains the vertipaq analyzer info.
|
|
995
|
+
|
|
996
|
+
Returns
|
|
997
|
+
-------
|
|
998
|
+
str
|
|
999
|
+
A visualization of the Vertipaq Analyzer statistics.
|
|
1000
|
+
"""
|
|
1001
|
+
|
|
1002
|
+
pd.options.mode.copy_on_write = True
|
|
1003
|
+
|
|
1004
|
+
zipFilePath = os.path.join(folder_path, file_name)
|
|
1005
|
+
extracted_dir = os.path.join(folder_path, "extracted_dataframes")
|
|
1006
|
+
|
|
1007
|
+
with zipfile.ZipFile(zipFilePath, "r") as zip_ref:
|
|
1008
|
+
zip_ref.extractall(extracted_dir)
|
|
1009
|
+
|
|
1010
|
+
# Read all CSV files into a dictionary of DataFrames
|
|
1011
|
+
dfs = {}
|
|
1012
|
+
for file_name in zip_ref.namelist():
|
|
1013
|
+
df = pd.read_csv(extracted_dir + "/" + file_name)
|
|
1014
|
+
file_path = Path(file_name)
|
|
1015
|
+
df_name = file_path.stem
|
|
1016
|
+
dfs[df_name] = df
|
|
1017
|
+
|
|
1018
|
+
visualize_vertipaq(dfs)
|
|
1019
|
+
|
|
1020
|
+
# Clean up: remove the extracted directory
|
|
1021
|
+
shutil.rmtree(extracted_dir)
|