semantic-link-labs 0.9.10__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.10.0.dist-info}/METADATA +28 -21
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.10.0.dist-info}/RECORD +38 -31
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.10.0.dist-info}/WHEEL +1 -1
- sempy_labs/__init__.py +26 -1
- sempy_labs/_delta_analyzer.py +9 -8
- sempy_labs/_dictionary_diffs.py +221 -0
- sempy_labs/_environments.py +19 -1
- sempy_labs/_generate_semantic_model.py +1 -1
- sempy_labs/_helper_functions.py +358 -134
- sempy_labs/_kusto.py +25 -23
- sempy_labs/_list_functions.py +13 -35
- sempy_labs/_model_bpa_rules.py +13 -3
- sempy_labs/_notebooks.py +44 -11
- sempy_labs/_semantic_models.py +93 -1
- sempy_labs/_sql.py +4 -3
- sempy_labs/_tags.py +194 -0
- sempy_labs/_user_delegation_key.py +42 -0
- sempy_labs/_variable_libraries.py +89 -0
- sempy_labs/_vpax.py +388 -0
- sempy_labs/admin/__init__.py +8 -0
- sempy_labs/admin/_tags.py +126 -0
- sempy_labs/directlake/_generate_shared_expression.py +5 -1
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +55 -5
- sempy_labs/dotnet_lib/dotnet.runtime.config.json +10 -0
- sempy_labs/lakehouse/__init__.py +14 -0
- sempy_labs/lakehouse/_blobs.py +100 -85
- sempy_labs/lakehouse/_get_lakehouse_tables.py +1 -13
- sempy_labs/lakehouse/_helper.py +211 -0
- sempy_labs/lakehouse/_lakehouse.py +1 -1
- sempy_labs/lakehouse/_livy_sessions.py +137 -0
- sempy_labs/report/__init__.py +2 -0
- sempy_labs/report/_download_report.py +1 -1
- sempy_labs/report/_generate_report.py +5 -1
- sempy_labs/report/_report_helper.py +27 -128
- sempy_labs/report/_reportwrapper.py +1903 -1165
- sempy_labs/tom/_model.py +83 -21
- sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +0 -9
- sempy_labs/report/_bpareporttemplate/.platform +0 -11
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.10.0.dist-info}/licenses/LICENSE +0 -0
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.10.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
from sempy_labs._helper_functions import (
|
|
2
|
+
resolve_workspace_name_and_id,
|
|
3
|
+
resolve_workspace_id,
|
|
4
|
+
_base_api,
|
|
5
|
+
_create_dataframe,
|
|
6
|
+
_update_dataframe_datatypes,
|
|
7
|
+
delete_item,
|
|
8
|
+
)
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from typing import Optional
|
|
11
|
+
from uuid import UUID
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def list_variable_libraries(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
|
|
15
|
+
"""
|
|
16
|
+
Shows the variable libraries within a workspace.
|
|
17
|
+
|
|
18
|
+
This is a wrapper function for the following API: `Items - List Variable Libraries <https://learn.microsoft.com/rest/api/fabric/variablelibrary/items/list-variable-libraries>`_.
|
|
19
|
+
|
|
20
|
+
Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
workspace : str | uuid.UUID, default=None
|
|
25
|
+
The Fabric workspace name or ID.
|
|
26
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
27
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
28
|
+
|
|
29
|
+
Returns
|
|
30
|
+
-------
|
|
31
|
+
pandas.DataFrame
|
|
32
|
+
A pandas dataframe showing the variable libraries within a workspace.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
columns = {
|
|
36
|
+
"Variable Library Name": "string",
|
|
37
|
+
"Variable Library Id": "string",
|
|
38
|
+
"Description": "string",
|
|
39
|
+
"Active Value Set Name": "string",
|
|
40
|
+
}
|
|
41
|
+
df = _create_dataframe(columns=columns)
|
|
42
|
+
|
|
43
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
44
|
+
|
|
45
|
+
responses = _base_api(
|
|
46
|
+
request=f"/v1/workspaces/{workspace_id}/VariableLibraries",
|
|
47
|
+
uses_pagination=True,
|
|
48
|
+
client="fabric_sp",
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
dfs = []
|
|
52
|
+
for r in responses:
|
|
53
|
+
for v in r.get("value", []):
|
|
54
|
+
prop = v.get("properties", {})
|
|
55
|
+
|
|
56
|
+
new_data = {
|
|
57
|
+
"Variable Library Name": v.get("displayName"),
|
|
58
|
+
"Variable Library Id": v.get("id"),
|
|
59
|
+
"Description": v.get("description"),
|
|
60
|
+
"Active Value Set Name": prop.get("activeValueSetName"),
|
|
61
|
+
}
|
|
62
|
+
dfs.append(pd.DataFrame(new_data, index=[0]))
|
|
63
|
+
|
|
64
|
+
if dfs:
|
|
65
|
+
df = pd.concat(dfs, ignore_index=True)
|
|
66
|
+
_update_dataframe_datatypes(dataframe=df, column_map=columns)
|
|
67
|
+
|
|
68
|
+
return df
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def delete_variable_library(
|
|
72
|
+
variable_library: str | UUID, workspace: Optional[str | UUID] = None
|
|
73
|
+
):
|
|
74
|
+
"""
|
|
75
|
+
Deletes a variable library.
|
|
76
|
+
|
|
77
|
+
This is a wrapper function for the following API: `Items - Delete Variable Library <https://learn.microsoft.com/rest/api/fabric/warehouse/items/delete-variable-library>`_.
|
|
78
|
+
|
|
79
|
+
Parameters
|
|
80
|
+
----------
|
|
81
|
+
navariable_libraryme: str | uuid.UUID
|
|
82
|
+
Name or ID of the variable library.
|
|
83
|
+
workspace : str | uuid.UUID, default=None
|
|
84
|
+
The Fabric workspace name or ID.
|
|
85
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
86
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
delete_item(item=variable_library, type="VariableLibrary", workspace=workspace)
|
sempy_labs/_vpax.py
ADDED
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
import sempy
|
|
2
|
+
import re
|
|
3
|
+
from urllib.parse import urlparse
|
|
4
|
+
import sempy.fabric as fabric
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Optional
|
|
8
|
+
from uuid import UUID
|
|
9
|
+
from sempy_labs._helper_functions import (
|
|
10
|
+
resolve_workspace_name_and_id,
|
|
11
|
+
resolve_dataset_name_and_id,
|
|
12
|
+
resolve_lakehouse_name_and_id,
|
|
13
|
+
_mount,
|
|
14
|
+
_get_column_aggregate,
|
|
15
|
+
resolve_item_type,
|
|
16
|
+
file_exists,
|
|
17
|
+
create_abfss_path_from_path,
|
|
18
|
+
)
|
|
19
|
+
from sempy._utils._log import log
|
|
20
|
+
import sempy_labs._icons as icons
|
|
21
|
+
import zipfile
|
|
22
|
+
import requests
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
VPA_VERSION = "1.10.0"
|
|
26
|
+
NUGET_BASE_URL = "https://www.nuget.org/api/v2/package"
|
|
27
|
+
ASSEMBLIES = [
|
|
28
|
+
"Dax.Metadata",
|
|
29
|
+
"Dax.Model.Extractor",
|
|
30
|
+
"Dax.ViewVpaExport",
|
|
31
|
+
"Dax.Vpax",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
_vpa_initialized = False
|
|
35
|
+
current_dir = Path(__file__).parent
|
|
36
|
+
nuget_dir = current_dir / "nuget_dlls"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def find_lib_folder(pkg_folder: Path) -> Path:
|
|
40
|
+
lib_base = pkg_folder / "lib"
|
|
41
|
+
if not lib_base.exists():
|
|
42
|
+
raise FileNotFoundError(f"No 'lib' directory in package {pkg_folder}")
|
|
43
|
+
|
|
44
|
+
# Prefer netstandard2.0 if available
|
|
45
|
+
candidates = sorted(lib_base.iterdir())
|
|
46
|
+
for preferred in ["netstandard2.0", "net6.0", "net5.0", "netcoreapp3.1", "net472"]:
|
|
47
|
+
if (lib_base / preferred).exists():
|
|
48
|
+
return lib_base / preferred
|
|
49
|
+
|
|
50
|
+
# Fallback: first available folder
|
|
51
|
+
for candidate in candidates:
|
|
52
|
+
if candidate.is_dir():
|
|
53
|
+
return candidate
|
|
54
|
+
|
|
55
|
+
raise FileNotFoundError(f"No usable framework folder found in {lib_base}")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def download_and_extract_package(
|
|
59
|
+
package_name: str, version: str, target_dir: Path
|
|
60
|
+
) -> Path:
|
|
61
|
+
nupkg_url = f"{NUGET_BASE_URL}/{package_name}/{version}"
|
|
62
|
+
nupkg_path = target_dir / f"{package_name}.{version}.nupkg"
|
|
63
|
+
|
|
64
|
+
if not nupkg_path.exists():
|
|
65
|
+
r = requests.get(nupkg_url)
|
|
66
|
+
r.raise_for_status()
|
|
67
|
+
target_dir.mkdir(parents=True, exist_ok=True)
|
|
68
|
+
with open(nupkg_path, "wb") as f:
|
|
69
|
+
f.write(r.content)
|
|
70
|
+
|
|
71
|
+
extract_path = target_dir / f"{package_name}_{version}"
|
|
72
|
+
if not extract_path.exists():
|
|
73
|
+
with zipfile.ZipFile(nupkg_path, "r") as zip_ref:
|
|
74
|
+
zip_ref.extractall(extract_path)
|
|
75
|
+
return extract_path
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def download_and_load_nuget_package(
|
|
79
|
+
package_name, version, target_dir: Path = None, load_assembly=True
|
|
80
|
+
):
|
|
81
|
+
|
|
82
|
+
from System.Reflection import Assembly
|
|
83
|
+
|
|
84
|
+
if target_dir is None:
|
|
85
|
+
target_dir = nuget_dir
|
|
86
|
+
|
|
87
|
+
# Download and extract
|
|
88
|
+
pkg_folder = download_and_extract_package(package_name, version, target_dir)
|
|
89
|
+
lib_folder = find_lib_folder(pkg_folder)
|
|
90
|
+
|
|
91
|
+
dll_path = lib_folder / f"{package_name}.dll"
|
|
92
|
+
if not dll_path.exists():
|
|
93
|
+
raise FileNotFoundError(f"{dll_path} not found")
|
|
94
|
+
|
|
95
|
+
sys.path.append(str(lib_folder))
|
|
96
|
+
if load_assembly:
|
|
97
|
+
Assembly.LoadFile(str(dll_path))
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def init_vertipaq_analyzer():
|
|
101
|
+
global _vpa_initialized
|
|
102
|
+
if _vpa_initialized:
|
|
103
|
+
return
|
|
104
|
+
|
|
105
|
+
from clr_loader import get_coreclr
|
|
106
|
+
from pythonnet import set_runtime
|
|
107
|
+
|
|
108
|
+
# Load the runtime and set it BEFORE importing clr
|
|
109
|
+
runtime_config_path = current_dir / "dotnet_lib" / "dotnet.runtime.config.json"
|
|
110
|
+
rt = get_coreclr(runtime_config=str(runtime_config_path))
|
|
111
|
+
set_runtime(rt)
|
|
112
|
+
|
|
113
|
+
sempy.fabric._client._utils._init_analysis_services()
|
|
114
|
+
|
|
115
|
+
from System.Reflection import Assembly
|
|
116
|
+
|
|
117
|
+
for name in ASSEMBLIES:
|
|
118
|
+
download_and_load_nuget_package(
|
|
119
|
+
name, VPA_VERSION, nuget_dir, load_assembly=False
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
download_and_load_nuget_package("Newtonsoft.Json", "13.0.1")
|
|
123
|
+
download_and_load_nuget_package("System.IO.Packaging", "7.0.0")
|
|
124
|
+
|
|
125
|
+
# For some reason I have to load these after and not inside the download_and_load_nuget_package function
|
|
126
|
+
dll_paths = [
|
|
127
|
+
f"{nuget_dir}/Dax.Model.Extractor_1.10.0/lib/net6.0/Dax.Model.Extractor.dll",
|
|
128
|
+
f"{nuget_dir}/Dax.Metadata_1.10.0/lib/netstandard2.0/Dax.Metadata.dll",
|
|
129
|
+
f"{nuget_dir}/Dax.ViewVpaExport_1.10.0/lib/netstandard2.0/Dax.ViewVpaExport.dll",
|
|
130
|
+
f"{nuget_dir}/Dax.Vpax_1.10.0/lib/net6.0/Dax.Vpax.dll",
|
|
131
|
+
]
|
|
132
|
+
for dll_path in dll_paths:
|
|
133
|
+
Assembly.LoadFile(dll_path)
|
|
134
|
+
|
|
135
|
+
_vpa_initialized = True
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
@log
|
|
139
|
+
def create_vpax(
|
|
140
|
+
dataset: str | UUID,
|
|
141
|
+
workspace: Optional[str | UUID] = None,
|
|
142
|
+
lakehouse: Optional[str | UUID] = None,
|
|
143
|
+
lakehouse_workspace: Optional[str | UUID] = None,
|
|
144
|
+
file_path: Optional[str] = None,
|
|
145
|
+
read_stats_from_data: bool = False,
|
|
146
|
+
read_direct_query_stats: bool = False,
|
|
147
|
+
direct_lake_stats_mode: str = "ResidentOnly",
|
|
148
|
+
overwrite: bool = False,
|
|
149
|
+
):
|
|
150
|
+
"""
|
|
151
|
+
Creates a .vpax file for a semantic model and saves it to a lakehouse. This is based on `SQL BI's VertiPaq Analyzer <https://www.sqlbi.com/tools/vertipaq-analyzer/>`_.
|
|
152
|
+
|
|
153
|
+
Parameters
|
|
154
|
+
----------
|
|
155
|
+
dataset : str | uuid.UUID
|
|
156
|
+
Name or ID of the semantic model.
|
|
157
|
+
workspace : str | uuid.UUID, default=None
|
|
158
|
+
The workspace name or ID.
|
|
159
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
160
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
161
|
+
lakehouse : str | uuid.UUID, default=None
|
|
162
|
+
The lakehouse name or ID.
|
|
163
|
+
Defaults to None which resolves to the attached lakehouse.
|
|
164
|
+
lakehouse_workspace : str | uuid.UUID, default=None
|
|
165
|
+
The workspace name or ID of the lakehouse.
|
|
166
|
+
Defaults to None which resolves to the workspace of the attached lakehouse.
|
|
167
|
+
file_path : str, default=None
|
|
168
|
+
The path where the .vpax file will be saved in the lakehouse.
|
|
169
|
+
Defaults to None which resolves to the dataset name.
|
|
170
|
+
read_stats_from_data : bool, default=False
|
|
171
|
+
Whether to read statistics from the data.
|
|
172
|
+
read_direct_query_stats : bool, default=False
|
|
173
|
+
Whether to analyze DirectQuery tables.
|
|
174
|
+
direct_lake_stats_mode : str, default='ResidentOnly'
|
|
175
|
+
The Direct Lake extraction mode. Options are 'ResidentOnly' or 'Full'. This parameter is ignored if read_stats_from_data is False. This parameter is only relevant for tables which use Direct Lake mode.
|
|
176
|
+
If set to 'ResidentOnly', column statistics are obtained only for the columns which are in memory.
|
|
177
|
+
If set to 'Full', column statistics are obtained for all columns - pending the proper identification of the Direct Lake source.
|
|
178
|
+
overwrite : bool, default=False
|
|
179
|
+
Whether to overwrite the .vpax file if it already exists in the lakehouse.
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
init_vertipaq_analyzer()
|
|
183
|
+
|
|
184
|
+
import notebookutils
|
|
185
|
+
from Dax.Metadata import DirectLakeExtractionMode
|
|
186
|
+
from Dax.Model.Extractor import TomExtractor
|
|
187
|
+
from Dax.Vpax.Tools import VpaxTools
|
|
188
|
+
from Dax.ViewVpaExport import Model
|
|
189
|
+
from System.IO import MemoryStream, FileMode, FileStream, FileAccess, FileShare
|
|
190
|
+
|
|
191
|
+
direct_lake_stats_mode = direct_lake_stats_mode.capitalize()
|
|
192
|
+
|
|
193
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
194
|
+
(dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
|
|
195
|
+
(lakehouse_workspace_name, lakehouse_workspace_id) = resolve_workspace_name_and_id(
|
|
196
|
+
lakehouse_workspace
|
|
197
|
+
)
|
|
198
|
+
(lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
|
|
199
|
+
lakehouse=lakehouse, workspace=lakehouse_workspace_id
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
local_path = _mount(lakehouse=lakehouse_id, workspace=lakehouse_workspace_id)
|
|
203
|
+
if file_path is None:
|
|
204
|
+
file_path = dataset_name
|
|
205
|
+
|
|
206
|
+
if file_path.endswith(".vpax"):
|
|
207
|
+
file_path = file_path[:-5]
|
|
208
|
+
save_location = f"Files/{file_path}.vpax"
|
|
209
|
+
path = f"{local_path}/{save_location}"
|
|
210
|
+
|
|
211
|
+
# Check if the .vpax file already exists in the lakehouse
|
|
212
|
+
if not overwrite:
|
|
213
|
+
new_path = create_abfss_path_from_path(
|
|
214
|
+
lakehouse_id, lakehouse_workspace_id, save_location
|
|
215
|
+
)
|
|
216
|
+
if file_exists(new_path):
|
|
217
|
+
print(
|
|
218
|
+
f"{icons.warning} The {save_location} file already exists in the '{lakehouse_name}' lakehouse. Set overwrite=True to overwrite the file."
|
|
219
|
+
)
|
|
220
|
+
return
|
|
221
|
+
|
|
222
|
+
vpax_stream = MemoryStream()
|
|
223
|
+
extractor_app_name = "VPAX Notebook"
|
|
224
|
+
extractor_app_version = "1.0"
|
|
225
|
+
column_batch_size = 50
|
|
226
|
+
token = notebookutils.credentials.getToken("pbi")
|
|
227
|
+
connection_string = f"data source=powerbi://api.powerbi.com/v1.0/myorg/{workspace_name};initial catalog={dataset_name};User ID=;Password={token};Persist Security Info=True;Impersonation Level=Impersonate"
|
|
228
|
+
|
|
229
|
+
print(f"{icons.in_progress} Extracting .vpax metadata...")
|
|
230
|
+
|
|
231
|
+
# Get stats for the model; for direct lake only get is_resident
|
|
232
|
+
dax_model = TomExtractor.GetDaxModel(
|
|
233
|
+
connection_string,
|
|
234
|
+
extractor_app_name,
|
|
235
|
+
extractor_app_version,
|
|
236
|
+
read_stats_from_data,
|
|
237
|
+
0,
|
|
238
|
+
read_direct_query_stats,
|
|
239
|
+
DirectLakeExtractionMode.ResidentOnly,
|
|
240
|
+
column_batch_size,
|
|
241
|
+
)
|
|
242
|
+
vpa_model = Model(dax_model)
|
|
243
|
+
tom_database = TomExtractor.GetDatabase(connection_string)
|
|
244
|
+
|
|
245
|
+
# Calculate Direct Lake stats for columns which are IsResident=False
|
|
246
|
+
from sempy_labs.tom import connect_semantic_model
|
|
247
|
+
|
|
248
|
+
with connect_semantic_model(dataset=dataset, workspace=workspace) as tom:
|
|
249
|
+
is_direct_lake = tom.is_direct_lake()
|
|
250
|
+
if read_stats_from_data and is_direct_lake and direct_lake_stats_mode == "Full":
|
|
251
|
+
|
|
252
|
+
df_not_resident = fabric.evaluate_dax(
|
|
253
|
+
dataset=dataset,
|
|
254
|
+
workspace=workspace,
|
|
255
|
+
dax_string=""" SELECT [DIMENSION_NAME] AS [TableName], [ATTRIBUTE_NAME] AS [ColumnName] FROM $SYSTEM.DISCOVER_STORAGE_TABLE_COLUMNS WHERE NOT [ISROWNUMBER] AND NOT [DICTIONARY_ISRESIDENT]""",
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
import Microsoft.AnalysisServices.Tabular as TOM
|
|
259
|
+
|
|
260
|
+
print(f"{icons.in_progress} Calculating Direct Lake statistics...")
|
|
261
|
+
|
|
262
|
+
# For SQL endpoints (do once)
|
|
263
|
+
dfI = fabric.list_items(workspace=workspace)
|
|
264
|
+
# Get list of tables in Direct Lake mode which have columns that are not resident
|
|
265
|
+
tbls = [
|
|
266
|
+
t
|
|
267
|
+
for t in tom.model.Tables
|
|
268
|
+
if t.Name in df_not_resident["TableName"].values
|
|
269
|
+
and any(p.Mode == TOM.ModeType.DirectLake for p in t.Partitions)
|
|
270
|
+
]
|
|
271
|
+
for t in tbls:
|
|
272
|
+
column_cardinalities = {}
|
|
273
|
+
table_name = t.Name
|
|
274
|
+
partition = next(p for p in t.Partitions)
|
|
275
|
+
entity_name = partition.Source.EntityName
|
|
276
|
+
schema_name = partition.Source.SchemaName
|
|
277
|
+
if len(schema_name) == 0 or schema_name == "dbo":
|
|
278
|
+
schema_name = None
|
|
279
|
+
expr_name = partition.Source.ExpressionSource.Name
|
|
280
|
+
expr = tom.model.Expressions[expr_name].Expression
|
|
281
|
+
item_id = None
|
|
282
|
+
if "Sql.Database(" in expr:
|
|
283
|
+
matches = re.findall(r'"([^"]+)"', expr)
|
|
284
|
+
sql_endpoint_id = matches[1]
|
|
285
|
+
dfI_filt = dfI[dfI["Id"] == sql_endpoint_id]
|
|
286
|
+
item_name = (
|
|
287
|
+
dfI_filt["Display Name"].iloc[0] if not dfI_filt.empty else None
|
|
288
|
+
)
|
|
289
|
+
dfI_filt2 = dfI[
|
|
290
|
+
(dfI["Display Name"] == item_name)
|
|
291
|
+
& (dfI["Type"].isin(["Lakehouse", "Warehouse"]))
|
|
292
|
+
]
|
|
293
|
+
item_id = dfI_filt2["Id"].iloc[0]
|
|
294
|
+
item_type = dfI_filt2["Type"].iloc[0]
|
|
295
|
+
item_workspace_id = workspace_id
|
|
296
|
+
elif "AzureStorage.DataLake(" in expr:
|
|
297
|
+
match = re.search(r'AzureStorage\.DataLake\("([^"]+)"', expr)
|
|
298
|
+
if match:
|
|
299
|
+
url = match.group(1)
|
|
300
|
+
path_parts = urlparse(url).path.strip("/").split("/")
|
|
301
|
+
if len(path_parts) >= 2:
|
|
302
|
+
item_workspace_id, item_id = (
|
|
303
|
+
path_parts[0],
|
|
304
|
+
path_parts[1],
|
|
305
|
+
)
|
|
306
|
+
item_type = resolve_item_type(
|
|
307
|
+
item_id=item_id, workspace=workspace_id
|
|
308
|
+
)
|
|
309
|
+
else:
|
|
310
|
+
raise NotImplementedError(
|
|
311
|
+
f"Direct Lake source '{expr}' is not supported. Please report this issue on GitHub (https://github.com/microsoft/semantic-link-labs/issues)."
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
if not item_id:
|
|
315
|
+
print(
|
|
316
|
+
f"{icons.info} Cannot determine the Direct Lake source of the '{table_name}' table."
|
|
317
|
+
)
|
|
318
|
+
elif item_type == "Warehouse":
|
|
319
|
+
print(
|
|
320
|
+
f"{icons.info} The '{table_name}' table references a warehouse. Warehouses are not yet supported for this method."
|
|
321
|
+
)
|
|
322
|
+
else:
|
|
323
|
+
df_not_resident_cols = df_not_resident[
|
|
324
|
+
df_not_resident["TableName"] == table_name
|
|
325
|
+
]
|
|
326
|
+
col_dict = {
|
|
327
|
+
c.Name: c.SourceColumn
|
|
328
|
+
for c in t.Columns
|
|
329
|
+
if c.Type != TOM.ColumnType.RowNumber
|
|
330
|
+
and c.Name in df_not_resident_cols["ColumnName"].values
|
|
331
|
+
}
|
|
332
|
+
col_agg = _get_column_aggregate(
|
|
333
|
+
lakehouse=item_id,
|
|
334
|
+
workspace=item_workspace_id,
|
|
335
|
+
table_name=entity_name,
|
|
336
|
+
schema_name=schema_name,
|
|
337
|
+
column_name=list(col_dict.values()),
|
|
338
|
+
function="distinct",
|
|
339
|
+
)
|
|
340
|
+
column_cardinalities = {
|
|
341
|
+
column_name: col_agg[source_column]
|
|
342
|
+
for column_name, source_column in col_dict.items()
|
|
343
|
+
if source_column in col_agg
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
# Update the dax_model file with column cardinalities
|
|
347
|
+
tbl = next(
|
|
348
|
+
table
|
|
349
|
+
for table in dax_model.Tables
|
|
350
|
+
if str(table.TableName) == table_name
|
|
351
|
+
)
|
|
352
|
+
# print(
|
|
353
|
+
# f"{icons.in_progress} Calculating column cardinalities for the '{table_name}' table..."
|
|
354
|
+
# )
|
|
355
|
+
cols = [
|
|
356
|
+
col
|
|
357
|
+
for col in tbl.Columns
|
|
358
|
+
if str(col.ColumnType) != "RowNumber"
|
|
359
|
+
and str(col.ColumnName) in column_cardinalities
|
|
360
|
+
]
|
|
361
|
+
for col in cols:
|
|
362
|
+
# print(str(col.ColumnName), col.ColumnCardinality)
|
|
363
|
+
col.ColumnCardinality = column_cardinalities.get(
|
|
364
|
+
str(col.ColumnName)
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
VpaxTools.ExportVpax(vpax_stream, dax_model, vpa_model, tom_database)
|
|
368
|
+
|
|
369
|
+
print(f"{icons.in_progress} Exporting .vpax file...")
|
|
370
|
+
|
|
371
|
+
mode = FileMode.Create
|
|
372
|
+
file_stream = FileStream(path, mode, FileAccess.Write, FileShare.Read)
|
|
373
|
+
vpax_stream.CopyTo(file_stream)
|
|
374
|
+
file_stream.Close()
|
|
375
|
+
|
|
376
|
+
print(
|
|
377
|
+
f"{icons.green_dot} The {file_path}.vpax file has been saved in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace_name}' workspace."
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def _dax_distinctcount(table_name, columns):
|
|
382
|
+
|
|
383
|
+
dax = "EVALUATE\nROW("
|
|
384
|
+
for c in columns:
|
|
385
|
+
full_name = f"'{table_name}'[{c}]"
|
|
386
|
+
dax += f"""\n"{c}", DISTINCTCOUNT({full_name}),"""
|
|
387
|
+
|
|
388
|
+
return f"{dax.rstrip(',')}\n)"
|
sempy_labs/admin/__init__.py
CHANGED
|
@@ -84,6 +84,11 @@ from sempy_labs.admin._git import (
|
|
|
84
84
|
from sempy_labs.admin._dataflows import (
|
|
85
85
|
export_dataflow,
|
|
86
86
|
)
|
|
87
|
+
from sempy_labs.admin._tags import (
|
|
88
|
+
list_tags,
|
|
89
|
+
create_tags,
|
|
90
|
+
delete_tag,
|
|
91
|
+
)
|
|
87
92
|
|
|
88
93
|
__all__ = [
|
|
89
94
|
"list_items",
|
|
@@ -139,4 +144,7 @@ __all__ = [
|
|
|
139
144
|
"list_report_subscriptions",
|
|
140
145
|
"get_refreshables",
|
|
141
146
|
"export_dataflow",
|
|
147
|
+
"list_tags",
|
|
148
|
+
"create_tags",
|
|
149
|
+
"delete_tag",
|
|
142
150
|
]
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
from sempy_labs._helper_functions import (
|
|
2
|
+
_base_api,
|
|
3
|
+
_is_valid_uuid,
|
|
4
|
+
)
|
|
5
|
+
from uuid import UUID
|
|
6
|
+
from sempy_labs._tags import list_tags
|
|
7
|
+
import sempy_labs._icons as icons
|
|
8
|
+
from typing import List
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def resolve_tag_id(tag: str | UUID):
|
|
12
|
+
|
|
13
|
+
if _is_valid_uuid(tag):
|
|
14
|
+
tag_id = tag
|
|
15
|
+
else:
|
|
16
|
+
df = list_tags()
|
|
17
|
+
df[df["Tag Name"] == tag]
|
|
18
|
+
if df.empty:
|
|
19
|
+
raise ValueError(f"{icons.red_dot} The '{tag}' tag does not exist.")
|
|
20
|
+
tag_id = df.iloc[0]["Tag Id"]
|
|
21
|
+
|
|
22
|
+
return tag_id
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def create_tags(tags: str | List[str]):
|
|
26
|
+
"""
|
|
27
|
+
Creates a new tag or tags.
|
|
28
|
+
|
|
29
|
+
This is a wrapper function for the following API: `Tags - Bulk Create Tags <https://learn.microsoft.com/rest/api/fabric/admin/tags/bulk-create-tags>`_.
|
|
30
|
+
|
|
31
|
+
Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
tags : str | List[str]
|
|
36
|
+
The name of the tag or tags to create.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
if isinstance(tags, str):
|
|
40
|
+
tags = [tags]
|
|
41
|
+
|
|
42
|
+
# Check the length of the tags
|
|
43
|
+
for tag in tags:
|
|
44
|
+
if len(tag) > 40:
|
|
45
|
+
raise ValueError(
|
|
46
|
+
f"{icons.red_dot} The '{tag}' tag name is too long. It must be 40 characters or less."
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Check if the tags already exist
|
|
50
|
+
df = list_tags()
|
|
51
|
+
existing_names = df["Tag Name"].tolist()
|
|
52
|
+
existing_ids = df["Tag Id"].tolist()
|
|
53
|
+
|
|
54
|
+
available_tags = [
|
|
55
|
+
tag for tag in tags if tag not in existing_names and tag not in existing_ids
|
|
56
|
+
]
|
|
57
|
+
unavailable_tags = [
|
|
58
|
+
tag for tag in tags if tag in existing_names or tag in existing_ids
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
print(f"{icons.warning} The following tags already exist: {unavailable_tags}")
|
|
62
|
+
if not available_tags:
|
|
63
|
+
print(f"{icons.info} No new tags to create.")
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
payload = [{"displayName": name} for name in available_tags]
|
|
67
|
+
|
|
68
|
+
for tag in tags:
|
|
69
|
+
_base_api(
|
|
70
|
+
request="/v1/admin/bulkCreateTags",
|
|
71
|
+
client="fabric_sp",
|
|
72
|
+
method="post",
|
|
73
|
+
payload=payload,
|
|
74
|
+
status_codes=201,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
print(f"{icons.green_dot} The '{available_tags}' tag(s) have been created.")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def delete_tag(tag: str | UUID):
|
|
81
|
+
"""
|
|
82
|
+
Deletes a tag.
|
|
83
|
+
|
|
84
|
+
This is a wrapper function for the following API: `Tags - Delete Tag <https://learn.microsoft.com/rest/api/fabric/admin/tags/delete-tag>`_.
|
|
85
|
+
|
|
86
|
+
Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
|
|
87
|
+
|
|
88
|
+
Parameters
|
|
89
|
+
----------
|
|
90
|
+
tag : str | uuid.UUID
|
|
91
|
+
The name or ID of the tag to delete.
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
tag_id = resolve_tag_id(tag)
|
|
95
|
+
|
|
96
|
+
_base_api(request=f"/v1/admin/tags/{tag_id}", client="fabric_sp", method="delete")
|
|
97
|
+
|
|
98
|
+
print(f"{icons.green_dot} The '{tag}' tag has been deleted.")
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def update_tag(name: str, tag: str | UUID):
|
|
102
|
+
"""
|
|
103
|
+
Updates the name of a tag.
|
|
104
|
+
|
|
105
|
+
This is a wrapper function for the following API: `Tags - Update Tag <https://learn.microsoft.com/rest/api/fabric/admin/tags/update-tag>`_.
|
|
106
|
+
|
|
107
|
+
Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
|
|
108
|
+
|
|
109
|
+
Parameters
|
|
110
|
+
----------
|
|
111
|
+
name : str
|
|
112
|
+
The new name of the tag.
|
|
113
|
+
tag : str | uuid.UUID
|
|
114
|
+
The name or ID of the tag to update.
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
tag_id = resolve_tag_id(tag)
|
|
118
|
+
|
|
119
|
+
_base_api(
|
|
120
|
+
request=f"/v1/admin/tags/{tag_id}",
|
|
121
|
+
client="fabric_sp",
|
|
122
|
+
method="patch",
|
|
123
|
+
payload={"displayName": name},
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
print(f"{icons.green_dot} The '{tag}' tag has been renamed to '{name}'.")
|
|
@@ -3,6 +3,7 @@ from sempy_labs._helper_functions import (
|
|
|
3
3
|
_base_api,
|
|
4
4
|
resolve_lakehouse_name_and_id,
|
|
5
5
|
resolve_item_name_and_id,
|
|
6
|
+
_get_fabric_context_setting,
|
|
6
7
|
)
|
|
7
8
|
from typing import Optional
|
|
8
9
|
import sempy_labs._icons as icons
|
|
@@ -85,4 +86,7 @@ def generate_shared_expression(
|
|
|
85
86
|
return f"{start_expr}{mid_expr}{end_expr}"
|
|
86
87
|
else:
|
|
87
88
|
# Build DL/OL expression
|
|
88
|
-
|
|
89
|
+
env = _get_fabric_context_setting("spark.trident.pbienv").lower()
|
|
90
|
+
env = "" if env == "prod" else f"{env}-"
|
|
91
|
+
|
|
92
|
+
return f"""let\n\tSource = AzureStorage.DataLake("https://{env}onelake.dfs.fabric.microsoft.com/{workspace_id}/{item_id}")\nin\n\tSource"""
|