semantic-link-labs 0.9.9__py3-none-any.whl → 0.9.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (49) hide show
  1. {semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.11.dist-info}/METADATA +30 -22
  2. {semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.11.dist-info}/RECORD +47 -40
  3. {semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.11.dist-info}/WHEEL +1 -1
  4. sempy_labs/__init__.py +28 -1
  5. sempy_labs/_clear_cache.py +12 -0
  6. sempy_labs/_dax.py +8 -2
  7. sempy_labs/_delta_analyzer.py +17 -26
  8. sempy_labs/_environments.py +19 -1
  9. sempy_labs/_generate_semantic_model.py +7 -8
  10. sempy_labs/_helper_functions.py +351 -151
  11. sempy_labs/_kql_databases.py +18 -0
  12. sempy_labs/_kusto.py +137 -0
  13. sempy_labs/_list_functions.py +18 -36
  14. sempy_labs/_model_bpa_rules.py +13 -3
  15. sempy_labs/_notebooks.py +44 -11
  16. sempy_labs/_semantic_models.py +93 -1
  17. sempy_labs/_sql.py +3 -2
  18. sempy_labs/_tags.py +194 -0
  19. sempy_labs/_variable_libraries.py +89 -0
  20. sempy_labs/_vertipaq.py +6 -6
  21. sempy_labs/_vpax.py +386 -0
  22. sempy_labs/_warehouses.py +3 -3
  23. sempy_labs/admin/__init__.py +14 -0
  24. sempy_labs/admin/_artifacts.py +3 -3
  25. sempy_labs/admin/_capacities.py +161 -1
  26. sempy_labs/admin/_dataflows.py +45 -0
  27. sempy_labs/admin/_items.py +16 -11
  28. sempy_labs/admin/_tags.py +126 -0
  29. sempy_labs/admin/_tenant.py +5 -5
  30. sempy_labs/directlake/_generate_shared_expression.py +29 -26
  31. sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +55 -5
  32. sempy_labs/dotnet_lib/dotnet.runtime.config.json +10 -0
  33. sempy_labs/lakehouse/__init__.py +16 -0
  34. sempy_labs/lakehouse/_blobs.py +115 -63
  35. sempy_labs/lakehouse/_get_lakehouse_columns.py +41 -18
  36. sempy_labs/lakehouse/_get_lakehouse_tables.py +62 -47
  37. sempy_labs/lakehouse/_helper.py +211 -0
  38. sempy_labs/lakehouse/_lakehouse.py +45 -36
  39. sempy_labs/lakehouse/_livy_sessions.py +137 -0
  40. sempy_labs/migration/_migrate_calctables_to_lakehouse.py +7 -12
  41. sempy_labs/migration/_refresh_calc_tables.py +7 -6
  42. sempy_labs/report/_download_report.py +1 -1
  43. sempy_labs/report/_generate_report.py +5 -1
  44. sempy_labs/report/_reportwrapper.py +31 -18
  45. sempy_labs/tom/_model.py +104 -35
  46. sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +0 -9
  47. sempy_labs/report/_bpareporttemplate/.platform +0 -11
  48. {semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.11.dist-info}/licenses/LICENSE +0 -0
  49. {semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.11.dist-info}/top_level.txt +0 -0
sempy_labs/_tags.py ADDED
@@ -0,0 +1,194 @@
1
+ from sempy_labs._helper_functions import (
2
+ _base_api,
3
+ _create_dataframe,
4
+ _update_dataframe_datatypes,
5
+ resolve_item_name_and_id,
6
+ resolve_workspace_name_and_id,
7
+ _is_valid_uuid,
8
+ )
9
+ import pandas as pd
10
+ from typing import Optional, List
11
+ from uuid import UUID
12
+ import sempy_labs._icons as icons
13
+
14
+
15
+ def list_tags() -> pd.DataFrame:
16
+ """
17
+ Shows a list of all the tenant's tags.
18
+
19
+ This is a wrapper function for the following API: `Tags - List Tags <https://learn.microsoft.com/rest/api/fabric/core/tags/list-tags>`_.
20
+
21
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
22
+
23
+ Returns
24
+ -------
25
+ pandas.DataFrame
26
+ A pandas dataframe showing a list of all the tenant's tags.
27
+ """
28
+
29
+ columns = {
30
+ "Tag Name": "string",
31
+ "Tag Id": "string",
32
+ }
33
+ df = _create_dataframe(columns=columns)
34
+
35
+ responses = _base_api(
36
+ request="/v1/tags",
37
+ uses_pagination=True,
38
+ client="fabric_sp",
39
+ )
40
+
41
+ dfs = []
42
+
43
+ for r in responses:
44
+ for v in r.get("value", []):
45
+ new_data = {
46
+ "Tag Name": v.get("displayName"),
47
+ "Tag Id": v.get("id"),
48
+ }
49
+ dfs.append(pd.DataFrame(new_data, index=[0]))
50
+
51
+ if dfs:
52
+ df = pd.concat(dfs, ignore_index=True)
53
+ _update_dataframe_datatypes(dataframe=df, column_map=columns)
54
+
55
+ return df
56
+
57
+
58
+ def resolve_tags(tags: str | List[str]) -> List[str]:
59
+ """
60
+ Resolves the tags to a list of strings.
61
+
62
+ Parameters
63
+ ----------
64
+ tags : str | List[str]
65
+ The tags to resolve.
66
+
67
+ Returns
68
+ -------
69
+ List[str]
70
+ A list of resolved tags.
71
+ """
72
+
73
+ if isinstance(tags, str):
74
+ tags = [tags]
75
+
76
+ if all(_is_valid_uuid(tag) for tag in tags):
77
+ return tags
78
+
79
+ df = list_tags()
80
+
81
+ tag_list = []
82
+ for tag in tags:
83
+ if _is_valid_uuid(tag):
84
+ tag_list.append(tag)
85
+ else:
86
+ df_filt = df[df["Tag Name"] == tag]
87
+ if df_filt.empty:
88
+ raise ValueError(f"Tag '{tag}' not found in the tenant's tags.")
89
+ tag_id = df_filt["Tag Id"].iloc[0]
90
+ tag_list.append(tag_id)
91
+
92
+ return tag_list
93
+
94
+
95
+ def apply_tags(
96
+ item: str | UUID,
97
+ type: str,
98
+ tags: str | UUID | List[str | UUID],
99
+ workspace: Optional[str | UUID] = None,
100
+ ):
101
+ """
102
+ Shows a list of all the tenant's tags.
103
+
104
+ This is a wrapper function for the following API: `Tags - Apply Tags <https://learn.microsoft.com/rest/api/fabric/core/tags/apply-tags>`_.
105
+
106
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
107
+
108
+ Parameters
109
+ ----------
110
+ item : str | uuid.UUID
111
+ The name or ID of the item to apply tags to.
112
+ type : str
113
+ The type of the item to apply tags to. For example: "Lakehouse".
114
+ tags : str | uuid.UUID | List[str | uuid.UUID]
115
+ The name or ID of the tag(s) to apply to the item.
116
+ workspace : str | uuid.UUID, default=None
117
+ The workspace name or ID.
118
+ Defaults to None which resolves to the workspace of the attached lakehouse
119
+ or if no lakehouse attached, resolves to the workspace of the notebook.
120
+ """
121
+
122
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
123
+ (item_name, item_id) = resolve_item_name_and_id(item, type, workspace_id)
124
+
125
+ if isinstance(tags, str):
126
+ tags = [tags]
127
+
128
+ tag_list = resolve_tags(tags)
129
+
130
+ payload = {
131
+ "tags": tag_list,
132
+ }
133
+
134
+ _base_api(
135
+ request=f"/v1/workspaces/{workspace_id}/items/{item_id}/applyTags",
136
+ client="fabric_sp",
137
+ method="post",
138
+ payload=payload,
139
+ )
140
+
141
+ print(
142
+ f"{icons.green_dot} Tags {tags} applied to the '{item_name}' {type.lower()} within the '{workspace_name}' workspace"
143
+ )
144
+
145
+
146
+ def unapply_tags(
147
+ item: str | UUID,
148
+ type: str,
149
+ tags: str | UUID | List[str | UUID],
150
+ workspace: Optional[str | UUID] = None,
151
+ ):
152
+ """
153
+ Shows a list of all the tenant's tags.
154
+
155
+ This is a wrapper function for the following API: `Tags - Unapply Tags <https://learn.microsoft.com/rest/api/fabric/core/tags/unapply-tags>`_.
156
+
157
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
158
+
159
+ Parameters
160
+ ----------
161
+ item : str | uuid.UUID
162
+ The name or ID of the item to apply tags to.
163
+ type : str
164
+ The type of the item to apply tags to. For example: "Lakehouse".
165
+ tags : str | uuid.UUID | List[str | uuid.UUID]
166
+ The name or ID of the tag(s) to apply to the item.
167
+ workspace : str | uuid.UUID, default=None
168
+ The workspace name or ID.
169
+ Defaults to None which resolves to the workspace of the attached lakehouse
170
+ or if no lakehouse attached, resolves to the workspace of the notebook.
171
+ """
172
+
173
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
174
+ (item_name, item_id) = resolve_item_name_and_id(item, type, workspace_id)
175
+
176
+ if isinstance(tags, str):
177
+ tags = [tags]
178
+
179
+ tag_list = resolve_tags(tags)
180
+
181
+ payload = {
182
+ "tags": tag_list,
183
+ }
184
+
185
+ _base_api(
186
+ request=f"/v1/workspaces/{workspace_id}/items/{item_id}/unapplyTags",
187
+ client="fabric_sp",
188
+ method="post",
189
+ payload=payload,
190
+ )
191
+
192
+ print(
193
+ f"{icons.green_dot} Tags {tags} applied to the '{item_name}' {type.lower()} within the '{workspace_name}' workspace"
194
+ )
@@ -0,0 +1,89 @@
1
+ from sempy_labs._helper_functions import (
2
+ resolve_workspace_name_and_id,
3
+ resolve_workspace_id,
4
+ _base_api,
5
+ _create_dataframe,
6
+ _update_dataframe_datatypes,
7
+ delete_item,
8
+ )
9
+ import pandas as pd
10
+ from typing import Optional
11
+ from uuid import UUID
12
+
13
+
14
+ def list_variable_libraries(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
15
+ """
16
+ Shows the variable libraries within a workspace.
17
+
18
+ This is a wrapper function for the following API: `Items - List Variable Libraries <https://learn.microsoft.com/rest/api/fabric/variablelibrary/items/list-variable-libraries>`_.
19
+
20
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
21
+
22
+ Parameters
23
+ ----------
24
+ workspace : str | uuid.UUID, default=None
25
+ The Fabric workspace name or ID.
26
+ Defaults to None which resolves to the workspace of the attached lakehouse
27
+ or if no lakehouse attached, resolves to the workspace of the notebook.
28
+
29
+ Returns
30
+ -------
31
+ pandas.DataFrame
32
+ A pandas dataframe showing the variable libraries within a workspace.
33
+ """
34
+
35
+ columns = {
36
+ "Variable Library Name": "string",
37
+ "Variable Library Id": "string",
38
+ "Description": "string",
39
+ "Active Value Set Name": "string",
40
+ }
41
+ df = _create_dataframe(columns=columns)
42
+
43
+ workspace_id = resolve_workspace_id(workspace)
44
+
45
+ responses = _base_api(
46
+ request=f"/v1/workspaces/{workspace_id}/VariableLibraries",
47
+ uses_pagination=True,
48
+ client="fabric_sp",
49
+ )
50
+
51
+ dfs = []
52
+ for r in responses:
53
+ for v in r.get("value", []):
54
+ prop = v.get("properties", {})
55
+
56
+ new_data = {
57
+ "Variable Library Name": v.get("displayName"),
58
+ "Variable Library Id": v.get("id"),
59
+ "Description": v.get("description"),
60
+ "Active Value Set Name": prop.get("activeValueSetName"),
61
+ }
62
+ dfs.append(pd.DataFrame(new_data, index=[0]))
63
+
64
+ if dfs:
65
+ df = pd.concat(dfs, ignore_index=True)
66
+ _update_dataframe_datatypes(dataframe=df, column_map=columns)
67
+
68
+ return df
69
+
70
+
71
+ def delete_variable_library(
72
+ variable_library: str | UUID, workspace: Optional[str | UUID] = None
73
+ ):
74
+ """
75
+ Deletes a variable library.
76
+
77
+ This is a wrapper function for the following API: `Items - Delete Variable Library <https://learn.microsoft.com/rest/api/fabric/warehouse/items/delete-variable-library>`_.
78
+
79
+ Parameters
80
+ ----------
81
+ navariable_libraryme: str | uuid.UUID
82
+ Name or ID of the variable library.
83
+ workspace : str | uuid.UUID, default=None
84
+ The Fabric workspace name or ID.
85
+ Defaults to None which resolves to the workspace of the attached lakehouse
86
+ or if no lakehouse attached, resolves to the workspace of the notebook.
87
+ """
88
+
89
+ delete_item(item=variable_library, type="VariableLibrary", workspace=workspace)
sempy_labs/_vertipaq.py CHANGED
@@ -8,7 +8,6 @@ import datetime
8
8
  import warnings
9
9
  from sempy_labs._helper_functions import (
10
10
  format_dax_object_name,
11
- resolve_lakehouse_name,
12
11
  save_as_delta_table,
13
12
  resolve_workspace_capacity,
14
13
  _get_column_aggregate,
@@ -20,7 +19,6 @@ from sempy_labs._helper_functions import (
20
19
  )
21
20
  from sempy_labs._list_functions import list_relationships, list_tables
22
21
  from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
23
- from sempy_labs.directlake import get_direct_lake_source
24
22
  from typing import Optional
25
23
  from sempy._utils._log import log
26
24
  import sempy_labs._icons as icons
@@ -176,10 +174,12 @@ def vertipaq_analyzer(
176
174
  )
177
175
 
178
176
  artifact_type = None
179
- if is_direct_lake:
180
- artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
181
- get_direct_lake_source(dataset=dataset_id, workspace=workspace_id)
182
- )
177
+ lakehouse_workspace_id = None
178
+ lakehouse_name = None
179
+ # if is_direct_lake:
180
+ # artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
181
+ # get_direct_lake_source(dataset=dataset_id, workspace=workspace_id)
182
+ # )
183
183
 
184
184
  dfR["Missing Rows"] = 0
185
185
  dfR["Missing Rows"] = dfR["Missing Rows"].astype(int)
sempy_labs/_vpax.py ADDED
@@ -0,0 +1,386 @@
1
+ import sempy
2
+ import re
3
+ from urllib.parse import urlparse
4
+ import sempy.fabric as fabric
5
+ import sys
6
+ from pathlib import Path
7
+ from typing import Optional
8
+ from uuid import UUID
9
+ from sempy_labs._helper_functions import (
10
+ resolve_workspace_name_and_id,
11
+ resolve_dataset_name_and_id,
12
+ resolve_lakehouse_name_and_id,
13
+ _mount,
14
+ _get_column_aggregate,
15
+ resolve_item_type,
16
+ file_exists,
17
+ create_abfss_path_from_path,
18
+ )
19
+ import sempy_labs._icons as icons
20
+ import zipfile
21
+ import requests
22
+
23
+
24
+ VPA_VERSION = "1.10.0"
25
+ NUGET_BASE_URL = "https://www.nuget.org/api/v2/package"
26
+ ASSEMBLIES = [
27
+ "Dax.Metadata",
28
+ "Dax.Model.Extractor",
29
+ "Dax.ViewVpaExport",
30
+ "Dax.Vpax",
31
+ ]
32
+
33
+ _vpa_initialized = False
34
+ current_dir = Path(__file__).parent
35
+ nuget_dir = current_dir / "nuget_dlls"
36
+
37
+
38
+ def find_lib_folder(pkg_folder: Path) -> Path:
39
+ lib_base = pkg_folder / "lib"
40
+ if not lib_base.exists():
41
+ raise FileNotFoundError(f"No 'lib' directory in package {pkg_folder}")
42
+
43
+ # Prefer netstandard2.0 if available
44
+ candidates = sorted(lib_base.iterdir())
45
+ for preferred in ["netstandard2.0", "net6.0", "net5.0", "netcoreapp3.1", "net472"]:
46
+ if (lib_base / preferred).exists():
47
+ return lib_base / preferred
48
+
49
+ # Fallback: first available folder
50
+ for candidate in candidates:
51
+ if candidate.is_dir():
52
+ return candidate
53
+
54
+ raise FileNotFoundError(f"No usable framework folder found in {lib_base}")
55
+
56
+
57
+ def download_and_extract_package(
58
+ package_name: str, version: str, target_dir: Path
59
+ ) -> Path:
60
+ nupkg_url = f"{NUGET_BASE_URL}/{package_name}/{version}"
61
+ nupkg_path = target_dir / f"{package_name}.{version}.nupkg"
62
+
63
+ if not nupkg_path.exists():
64
+ r = requests.get(nupkg_url)
65
+ r.raise_for_status()
66
+ target_dir.mkdir(parents=True, exist_ok=True)
67
+ with open(nupkg_path, "wb") as f:
68
+ f.write(r.content)
69
+
70
+ extract_path = target_dir / f"{package_name}_{version}"
71
+ if not extract_path.exists():
72
+ with zipfile.ZipFile(nupkg_path, "r") as zip_ref:
73
+ zip_ref.extractall(extract_path)
74
+ return extract_path
75
+
76
+
77
+ def download_and_load_nuget_package(
78
+ package_name, version, target_dir: Path = None, load_assembly=True
79
+ ):
80
+
81
+ from System.Reflection import Assembly
82
+
83
+ if target_dir is None:
84
+ target_dir = nuget_dir
85
+
86
+ # Download and extract
87
+ pkg_folder = download_and_extract_package(package_name, version, target_dir)
88
+ lib_folder = find_lib_folder(pkg_folder)
89
+
90
+ dll_path = lib_folder / f"{package_name}.dll"
91
+ if not dll_path.exists():
92
+ raise FileNotFoundError(f"{dll_path} not found")
93
+
94
+ sys.path.append(str(lib_folder))
95
+ if load_assembly:
96
+ Assembly.LoadFile(str(dll_path))
97
+
98
+
99
+ def init_vertipaq_analyzer():
100
+ global _vpa_initialized
101
+ if _vpa_initialized:
102
+ return
103
+
104
+ from clr_loader import get_coreclr
105
+ from pythonnet import set_runtime
106
+
107
+ # Load the runtime and set it BEFORE importing clr
108
+ runtime_config_path = current_dir / "dotnet_lib" / "dotnet.runtime.config.json"
109
+ rt = get_coreclr(runtime_config=str(runtime_config_path))
110
+ set_runtime(rt)
111
+
112
+ sempy.fabric._client._utils._init_analysis_services()
113
+
114
+ from System.Reflection import Assembly
115
+
116
+ for name in ASSEMBLIES:
117
+ download_and_load_nuget_package(
118
+ name, VPA_VERSION, nuget_dir, load_assembly=False
119
+ )
120
+
121
+ download_and_load_nuget_package("Newtonsoft.Json", "13.0.1")
122
+ download_and_load_nuget_package("System.IO.Packaging", "7.0.0")
123
+
124
+ # For some reason I have to load these after and not inside the download_and_load_nuget_package function
125
+ dll_paths = [
126
+ f"{nuget_dir}/Dax.Model.Extractor_1.10.0/lib/net6.0/Dax.Model.Extractor.dll",
127
+ f"{nuget_dir}/Dax.Metadata_1.10.0/lib/netstandard2.0/Dax.Metadata.dll",
128
+ f"{nuget_dir}/Dax.ViewVpaExport_1.10.0/lib/netstandard2.0/Dax.ViewVpaExport.dll",
129
+ f"{nuget_dir}/Dax.Vpax_1.10.0/lib/net6.0/Dax.Vpax.dll",
130
+ ]
131
+ for dll_path in dll_paths:
132
+ Assembly.LoadFile(dll_path)
133
+
134
+ _vpa_initialized = True
135
+
136
+
137
+ def create_vpax(
138
+ dataset: str | UUID,
139
+ workspace: Optional[str | UUID] = None,
140
+ lakehouse: Optional[str | UUID] = None,
141
+ lakehouse_workspace: Optional[str | UUID] = None,
142
+ file_path: Optional[str] = None,
143
+ read_stats_from_data: bool = False,
144
+ read_direct_query_stats: bool = False,
145
+ direct_lake_stats_mode: str = "ResidentOnly",
146
+ overwrite: bool = False,
147
+ ):
148
+ """
149
+ Creates a .vpax file for a semantic model and saves it to a lakehouse. This is based on `SQL BI's VertiPaq Analyzer <https://www.sqlbi.com/tools/vertipaq-analyzer/>`_.
150
+
151
+ Parameters
152
+ ----------
153
+ dataset : str | uuid.UUID
154
+ Name or ID of the semantic model.
155
+ workspace : str | uuid.UUID, default=None
156
+ The workspace name or ID.
157
+ Defaults to None which resolves to the workspace of the attached lakehouse
158
+ or if no lakehouse attached, resolves to the workspace of the notebook.
159
+ lakehouse : str | uuid.UUID, default=None
160
+ The lakehouse name or ID.
161
+ Defaults to None which resolves to the attached lakehouse.
162
+ lakehouse_workspace : str | uuid.UUID, default=None
163
+ The workspace name or ID of the lakehouse.
164
+ Defaults to None which resolves to the workspace of the attached lakehouse.
165
+ file_path : str, default=None
166
+ The path where the .vpax file will be saved in the lakehouse.
167
+ Defaults to None which resolves to the dataset name.
168
+ read_stats_from_data : bool, default=False
169
+ Whether to read statistics from the data.
170
+ read_direct_query_stats : bool, default=False
171
+ Whether to analyze DirectQuery tables.
172
+ direct_lake_stats_mode : str, default='ResidentOnly'
173
+ The Direct Lake extraction mode. Options are 'ResidentOnly' or 'Full'. This parameter is ignored if read_stats_from_data is False. This parameter is only relevant for tables which use Direct Lake mode.
174
+ If set to 'ResidentOnly', column statistics are obtained only for the columns which are in memory.
175
+ If set to 'Full', column statistics are obtained for all columns - pending the proper identification of the Direct Lake source.
176
+ overwrite : bool, default=False
177
+ Whether to overwrite the .vpax file if it already exists in the lakehouse.
178
+ """
179
+
180
+ init_vertipaq_analyzer()
181
+
182
+ import notebookutils
183
+ from Dax.Metadata import DirectLakeExtractionMode
184
+ from Dax.Model.Extractor import TomExtractor
185
+ from Dax.Vpax.Tools import VpaxTools
186
+ from Dax.ViewVpaExport import Model
187
+ from System.IO import MemoryStream, FileMode, FileStream, FileAccess, FileShare
188
+
189
+ direct_lake_stats_mode = direct_lake_stats_mode.capitalize()
190
+
191
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
192
+ (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
193
+ (lakehouse_workspace_name, lakehouse_workspace_id) = resolve_workspace_name_and_id(
194
+ lakehouse_workspace
195
+ )
196
+ (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
197
+ lakehouse=lakehouse, workspace=lakehouse_workspace_id
198
+ )
199
+
200
+ local_path = _mount(lakehouse=lakehouse_id, workspace=lakehouse_workspace_id)
201
+ if file_path is None:
202
+ file_path = dataset_name
203
+
204
+ if file_path.endswith(".vpax"):
205
+ file_path = file_path[:-5]
206
+ save_location = f"Files/{file_path}.vpax"
207
+ path = f"{local_path}/{save_location}"
208
+
209
+ # Check if the .vpax file already exists in the lakehouse
210
+ if not overwrite:
211
+ new_path = create_abfss_path_from_path(
212
+ lakehouse_id, lakehouse_workspace_id, save_location
213
+ )
214
+ if file_exists(new_path):
215
+ print(
216
+ f"{icons.warning} The {save_location} file already exists in the '{lakehouse_name}' lakehouse. Set overwrite=True to overwrite the file."
217
+ )
218
+ return
219
+
220
+ vpax_stream = MemoryStream()
221
+ extractor_app_name = "VPAX Notebook"
222
+ extractor_app_version = "1.0"
223
+ column_batch_size = 50
224
+ token = notebookutils.credentials.getToken("pbi")
225
+ connection_string = f"data source=powerbi://api.powerbi.com/v1.0/myorg/{workspace_name};initial catalog={dataset_name};User ID=;Password={token};Persist Security Info=True;Impersonation Level=Impersonate"
226
+
227
+ print(f"{icons.in_progress} Extracting .vpax metadata...")
228
+
229
+ # Get stats for the model; for direct lake only get is_resident
230
+ dax_model = TomExtractor.GetDaxModel(
231
+ connection_string,
232
+ extractor_app_name,
233
+ extractor_app_version,
234
+ read_stats_from_data,
235
+ 0,
236
+ read_direct_query_stats,
237
+ DirectLakeExtractionMode.ResidentOnly,
238
+ column_batch_size,
239
+ )
240
+ vpa_model = Model(dax_model)
241
+ tom_database = TomExtractor.GetDatabase(connection_string)
242
+
243
+ # Calculate Direct Lake stats for columns which are IsResident=False
244
+ from sempy_labs.tom import connect_semantic_model
245
+
246
+ with connect_semantic_model(dataset=dataset, workspace=workspace) as tom:
247
+ is_direct_lake = tom.is_direct_lake()
248
+ if read_stats_from_data and is_direct_lake and direct_lake_stats_mode == "Full":
249
+
250
+ df_not_resident = fabric.evaluate_dax(
251
+ dataset=dataset,
252
+ workspace=workspace,
253
+ dax_string=""" SELECT [DIMENSION_NAME] AS [TableName], [ATTRIBUTE_NAME] AS [ColumnName] FROM $SYSTEM.DISCOVER_STORAGE_TABLE_COLUMNS WHERE NOT [ISROWNUMBER] AND NOT [DICTIONARY_ISRESIDENT]""",
254
+ )
255
+
256
+ import Microsoft.AnalysisServices.Tabular as TOM
257
+
258
+ print(f"{icons.in_progress} Calculating Direct Lake statistics...")
259
+
260
+ # For SQL endpoints (do once)
261
+ dfI = fabric.list_items(workspace=workspace)
262
+ # Get list of tables in Direct Lake mode which have columns that are not resident
263
+ tbls = [
264
+ t
265
+ for t in tom.model.Tables
266
+ if t.Name in df_not_resident["TableName"].values
267
+ and any(p.Mode == TOM.ModeType.DirectLake for p in t.Partitions)
268
+ ]
269
+ for t in tbls:
270
+ column_cardinalities = {}
271
+ table_name = t.Name
272
+ partition = next(p for p in t.Partitions)
273
+ entity_name = partition.Source.EntityName
274
+ schema_name = partition.Source.SchemaName
275
+ if len(schema_name) == 0 or schema_name == "dbo":
276
+ schema_name = None
277
+ expr_name = partition.Source.ExpressionSource.Name
278
+ expr = tom.model.Expressions[expr_name].Expression
279
+ item_id = None
280
+ if "Sql.Database(" in expr:
281
+ matches = re.findall(r'"([^"]+)"', expr)
282
+ sql_endpoint_id = matches[1]
283
+ dfI_filt = dfI[dfI["Id"] == sql_endpoint_id]
284
+ item_name = (
285
+ dfI_filt["Display Name"].iloc[0] if not dfI_filt.empty else None
286
+ )
287
+ dfI_filt2 = dfI[
288
+ (dfI["Display Name"] == item_name)
289
+ & (dfI["Type"].isin(["Lakehouse", "Warehouse"]))
290
+ ]
291
+ item_id = dfI_filt2["Id"].iloc[0]
292
+ item_type = dfI_filt2["Type"].iloc[0]
293
+ item_workspace_id = workspace_id
294
+ elif "AzureStorage.DataLake(" in expr:
295
+ match = re.search(r'AzureStorage\.DataLake\("([^"]+)"', expr)
296
+ if match:
297
+ url = match.group(1)
298
+ path_parts = urlparse(url).path.strip("/").split("/")
299
+ if len(path_parts) >= 2:
300
+ item_workspace_id, item_id = (
301
+ path_parts[0],
302
+ path_parts[1],
303
+ )
304
+ item_type = resolve_item_type(
305
+ item_id=item_id, workspace=workspace_id
306
+ )
307
+ else:
308
+ raise NotImplementedError(
309
+ f"Direct Lake source '{expr}' is not supported. Please report this issue on GitHub (https://github.com/microsoft/semantic-link-labs/issues)."
310
+ )
311
+
312
+ if not item_id:
313
+ print(
314
+ f"{icons.info} Cannot determine the Direct Lake source of the '{table_name}' table."
315
+ )
316
+ elif item_type == "Warehouse":
317
+ print(
318
+ f"{icons.info} The '{table_name}' table references a warehouse. Warehouses are not yet supported for this method."
319
+ )
320
+ else:
321
+ df_not_resident_cols = df_not_resident[
322
+ df_not_resident["TableName"] == table_name
323
+ ]
324
+ col_dict = {
325
+ c.Name: c.SourceColumn
326
+ for c in t.Columns
327
+ if c.Type != TOM.ColumnType.RowNumber
328
+ and c.Name in df_not_resident_cols["ColumnName"].values
329
+ }
330
+ col_agg = _get_column_aggregate(
331
+ lakehouse=item_id,
332
+ workspace=item_workspace_id,
333
+ table_name=entity_name,
334
+ schema_name=schema_name,
335
+ column_name=list(col_dict.values()),
336
+ function="distinct",
337
+ )
338
+ column_cardinalities = {
339
+ column_name: col_agg[source_column]
340
+ for column_name, source_column in col_dict.items()
341
+ if source_column in col_agg
342
+ }
343
+
344
+ # Update the dax_model file with column cardinalities
345
+ tbl = next(
346
+ table
347
+ for table in dax_model.Tables
348
+ if str(table.TableName) == table_name
349
+ )
350
+ # print(
351
+ # f"{icons.in_progress} Calculating column cardinalities for the '{table_name}' table..."
352
+ # )
353
+ cols = [
354
+ col
355
+ for col in tbl.Columns
356
+ if str(col.ColumnType) != "RowNumber"
357
+ and str(col.ColumnName) in column_cardinalities
358
+ ]
359
+ for col in cols:
360
+ # print(str(col.ColumnName), col.ColumnCardinality)
361
+ col.ColumnCardinality = column_cardinalities.get(
362
+ str(col.ColumnName)
363
+ )
364
+
365
+ VpaxTools.ExportVpax(vpax_stream, dax_model, vpa_model, tom_database)
366
+
367
+ print(f"{icons.in_progress} Exporting .vpax file...")
368
+
369
+ mode = FileMode.Create
370
+ file_stream = FileStream(path, mode, FileAccess.Write, FileShare.Read)
371
+ vpax_stream.CopyTo(file_stream)
372
+ file_stream.Close()
373
+
374
+ print(
375
+ f"{icons.green_dot} The {file_path}.vpax file has been saved in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace_name}' workspace."
376
+ )
377
+
378
+
379
+ def _dax_distinctcount(table_name, columns):
380
+
381
+ dax = "EVALUATE\nROW("
382
+ for c in columns:
383
+ full_name = f"'{table_name}'[{c}]"
384
+ dax += f"""\n"{c}", DISTINCTCOUNT({full_name}),"""
385
+
386
+ return f"{dax.rstrip(',')}\n)"