semantic-link-labs 0.9.9__py3-none-any.whl → 0.9.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.11.dist-info}/METADATA +30 -22
- {semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.11.dist-info}/RECORD +47 -40
- {semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.11.dist-info}/WHEEL +1 -1
- sempy_labs/__init__.py +28 -1
- sempy_labs/_clear_cache.py +12 -0
- sempy_labs/_dax.py +8 -2
- sempy_labs/_delta_analyzer.py +17 -26
- sempy_labs/_environments.py +19 -1
- sempy_labs/_generate_semantic_model.py +7 -8
- sempy_labs/_helper_functions.py +351 -151
- sempy_labs/_kql_databases.py +18 -0
- sempy_labs/_kusto.py +137 -0
- sempy_labs/_list_functions.py +18 -36
- sempy_labs/_model_bpa_rules.py +13 -3
- sempy_labs/_notebooks.py +44 -11
- sempy_labs/_semantic_models.py +93 -1
- sempy_labs/_sql.py +3 -2
- sempy_labs/_tags.py +194 -0
- sempy_labs/_variable_libraries.py +89 -0
- sempy_labs/_vertipaq.py +6 -6
- sempy_labs/_vpax.py +386 -0
- sempy_labs/_warehouses.py +3 -3
- sempy_labs/admin/__init__.py +14 -0
- sempy_labs/admin/_artifacts.py +3 -3
- sempy_labs/admin/_capacities.py +161 -1
- sempy_labs/admin/_dataflows.py +45 -0
- sempy_labs/admin/_items.py +16 -11
- sempy_labs/admin/_tags.py +126 -0
- sempy_labs/admin/_tenant.py +5 -5
- sempy_labs/directlake/_generate_shared_expression.py +29 -26
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +55 -5
- sempy_labs/dotnet_lib/dotnet.runtime.config.json +10 -0
- sempy_labs/lakehouse/__init__.py +16 -0
- sempy_labs/lakehouse/_blobs.py +115 -63
- sempy_labs/lakehouse/_get_lakehouse_columns.py +41 -18
- sempy_labs/lakehouse/_get_lakehouse_tables.py +62 -47
- sempy_labs/lakehouse/_helper.py +211 -0
- sempy_labs/lakehouse/_lakehouse.py +45 -36
- sempy_labs/lakehouse/_livy_sessions.py +137 -0
- sempy_labs/migration/_migrate_calctables_to_lakehouse.py +7 -12
- sempy_labs/migration/_refresh_calc_tables.py +7 -6
- sempy_labs/report/_download_report.py +1 -1
- sempy_labs/report/_generate_report.py +5 -1
- sempy_labs/report/_reportwrapper.py +31 -18
- sempy_labs/tom/_model.py +104 -35
- sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +0 -9
- sempy_labs/report/_bpareporttemplate/.platform +0 -11
- {semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.11.dist-info}/licenses/LICENSE +0 -0
- {semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.11.dist-info}/top_level.txt +0 -0
sempy_labs/_tags.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
from sempy_labs._helper_functions import (
|
|
2
|
+
_base_api,
|
|
3
|
+
_create_dataframe,
|
|
4
|
+
_update_dataframe_datatypes,
|
|
5
|
+
resolve_item_name_and_id,
|
|
6
|
+
resolve_workspace_name_and_id,
|
|
7
|
+
_is_valid_uuid,
|
|
8
|
+
)
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from typing import Optional, List
|
|
11
|
+
from uuid import UUID
|
|
12
|
+
import sempy_labs._icons as icons
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def list_tags() -> pd.DataFrame:
|
|
16
|
+
"""
|
|
17
|
+
Shows a list of all the tenant's tags.
|
|
18
|
+
|
|
19
|
+
This is a wrapper function for the following API: `Tags - List Tags <https://learn.microsoft.com/rest/api/fabric/core/tags/list-tags>`_.
|
|
20
|
+
|
|
21
|
+
Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
|
|
22
|
+
|
|
23
|
+
Returns
|
|
24
|
+
-------
|
|
25
|
+
pandas.DataFrame
|
|
26
|
+
A pandas dataframe showing a list of all the tenant's tags.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
columns = {
|
|
30
|
+
"Tag Name": "string",
|
|
31
|
+
"Tag Id": "string",
|
|
32
|
+
}
|
|
33
|
+
df = _create_dataframe(columns=columns)
|
|
34
|
+
|
|
35
|
+
responses = _base_api(
|
|
36
|
+
request="/v1/tags",
|
|
37
|
+
uses_pagination=True,
|
|
38
|
+
client="fabric_sp",
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
dfs = []
|
|
42
|
+
|
|
43
|
+
for r in responses:
|
|
44
|
+
for v in r.get("value", []):
|
|
45
|
+
new_data = {
|
|
46
|
+
"Tag Name": v.get("displayName"),
|
|
47
|
+
"Tag Id": v.get("id"),
|
|
48
|
+
}
|
|
49
|
+
dfs.append(pd.DataFrame(new_data, index=[0]))
|
|
50
|
+
|
|
51
|
+
if dfs:
|
|
52
|
+
df = pd.concat(dfs, ignore_index=True)
|
|
53
|
+
_update_dataframe_datatypes(dataframe=df, column_map=columns)
|
|
54
|
+
|
|
55
|
+
return df
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def resolve_tags(tags: str | List[str]) -> List[str]:
|
|
59
|
+
"""
|
|
60
|
+
Resolves the tags to a list of strings.
|
|
61
|
+
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
tags : str | List[str]
|
|
65
|
+
The tags to resolve.
|
|
66
|
+
|
|
67
|
+
Returns
|
|
68
|
+
-------
|
|
69
|
+
List[str]
|
|
70
|
+
A list of resolved tags.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
if isinstance(tags, str):
|
|
74
|
+
tags = [tags]
|
|
75
|
+
|
|
76
|
+
if all(_is_valid_uuid(tag) for tag in tags):
|
|
77
|
+
return tags
|
|
78
|
+
|
|
79
|
+
df = list_tags()
|
|
80
|
+
|
|
81
|
+
tag_list = []
|
|
82
|
+
for tag in tags:
|
|
83
|
+
if _is_valid_uuid(tag):
|
|
84
|
+
tag_list.append(tag)
|
|
85
|
+
else:
|
|
86
|
+
df_filt = df[df["Tag Name"] == tag]
|
|
87
|
+
if df_filt.empty:
|
|
88
|
+
raise ValueError(f"Tag '{tag}' not found in the tenant's tags.")
|
|
89
|
+
tag_id = df_filt["Tag Id"].iloc[0]
|
|
90
|
+
tag_list.append(tag_id)
|
|
91
|
+
|
|
92
|
+
return tag_list
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def apply_tags(
|
|
96
|
+
item: str | UUID,
|
|
97
|
+
type: str,
|
|
98
|
+
tags: str | UUID | List[str | UUID],
|
|
99
|
+
workspace: Optional[str | UUID] = None,
|
|
100
|
+
):
|
|
101
|
+
"""
|
|
102
|
+
Shows a list of all the tenant's tags.
|
|
103
|
+
|
|
104
|
+
This is a wrapper function for the following API: `Tags - Apply Tags <https://learn.microsoft.com/rest/api/fabric/core/tags/apply-tags>`_.
|
|
105
|
+
|
|
106
|
+
Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
|
|
107
|
+
|
|
108
|
+
Parameters
|
|
109
|
+
----------
|
|
110
|
+
item : str | uuid.UUID
|
|
111
|
+
The name or ID of the item to apply tags to.
|
|
112
|
+
type : str
|
|
113
|
+
The type of the item to apply tags to. For example: "Lakehouse".
|
|
114
|
+
tags : str | uuid.UUID | List[str | uuid.UUID]
|
|
115
|
+
The name or ID of the tag(s) to apply to the item.
|
|
116
|
+
workspace : str | uuid.UUID, default=None
|
|
117
|
+
The workspace name or ID.
|
|
118
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
119
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
123
|
+
(item_name, item_id) = resolve_item_name_and_id(item, type, workspace_id)
|
|
124
|
+
|
|
125
|
+
if isinstance(tags, str):
|
|
126
|
+
tags = [tags]
|
|
127
|
+
|
|
128
|
+
tag_list = resolve_tags(tags)
|
|
129
|
+
|
|
130
|
+
payload = {
|
|
131
|
+
"tags": tag_list,
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
_base_api(
|
|
135
|
+
request=f"/v1/workspaces/{workspace_id}/items/{item_id}/applyTags",
|
|
136
|
+
client="fabric_sp",
|
|
137
|
+
method="post",
|
|
138
|
+
payload=payload,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
print(
|
|
142
|
+
f"{icons.green_dot} Tags {tags} applied to the '{item_name}' {type.lower()} within the '{workspace_name}' workspace"
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def unapply_tags(
|
|
147
|
+
item: str | UUID,
|
|
148
|
+
type: str,
|
|
149
|
+
tags: str | UUID | List[str | UUID],
|
|
150
|
+
workspace: Optional[str | UUID] = None,
|
|
151
|
+
):
|
|
152
|
+
"""
|
|
153
|
+
Shows a list of all the tenant's tags.
|
|
154
|
+
|
|
155
|
+
This is a wrapper function for the following API: `Tags - Unapply Tags <https://learn.microsoft.com/rest/api/fabric/core/tags/unapply-tags>`_.
|
|
156
|
+
|
|
157
|
+
Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
|
|
158
|
+
|
|
159
|
+
Parameters
|
|
160
|
+
----------
|
|
161
|
+
item : str | uuid.UUID
|
|
162
|
+
The name or ID of the item to apply tags to.
|
|
163
|
+
type : str
|
|
164
|
+
The type of the item to apply tags to. For example: "Lakehouse".
|
|
165
|
+
tags : str | uuid.UUID | List[str | uuid.UUID]
|
|
166
|
+
The name or ID of the tag(s) to apply to the item.
|
|
167
|
+
workspace : str | uuid.UUID, default=None
|
|
168
|
+
The workspace name or ID.
|
|
169
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
170
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
174
|
+
(item_name, item_id) = resolve_item_name_and_id(item, type, workspace_id)
|
|
175
|
+
|
|
176
|
+
if isinstance(tags, str):
|
|
177
|
+
tags = [tags]
|
|
178
|
+
|
|
179
|
+
tag_list = resolve_tags(tags)
|
|
180
|
+
|
|
181
|
+
payload = {
|
|
182
|
+
"tags": tag_list,
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
_base_api(
|
|
186
|
+
request=f"/v1/workspaces/{workspace_id}/items/{item_id}/unapplyTags",
|
|
187
|
+
client="fabric_sp",
|
|
188
|
+
method="post",
|
|
189
|
+
payload=payload,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
print(
|
|
193
|
+
f"{icons.green_dot} Tags {tags} applied to the '{item_name}' {type.lower()} within the '{workspace_name}' workspace"
|
|
194
|
+
)
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
from sempy_labs._helper_functions import (
|
|
2
|
+
resolve_workspace_name_and_id,
|
|
3
|
+
resolve_workspace_id,
|
|
4
|
+
_base_api,
|
|
5
|
+
_create_dataframe,
|
|
6
|
+
_update_dataframe_datatypes,
|
|
7
|
+
delete_item,
|
|
8
|
+
)
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from typing import Optional
|
|
11
|
+
from uuid import UUID
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def list_variable_libraries(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
|
|
15
|
+
"""
|
|
16
|
+
Shows the variable libraries within a workspace.
|
|
17
|
+
|
|
18
|
+
This is a wrapper function for the following API: `Items - List Variable Libraries <https://learn.microsoft.com/rest/api/fabric/variablelibrary/items/list-variable-libraries>`_.
|
|
19
|
+
|
|
20
|
+
Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
workspace : str | uuid.UUID, default=None
|
|
25
|
+
The Fabric workspace name or ID.
|
|
26
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
27
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
28
|
+
|
|
29
|
+
Returns
|
|
30
|
+
-------
|
|
31
|
+
pandas.DataFrame
|
|
32
|
+
A pandas dataframe showing the variable libraries within a workspace.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
columns = {
|
|
36
|
+
"Variable Library Name": "string",
|
|
37
|
+
"Variable Library Id": "string",
|
|
38
|
+
"Description": "string",
|
|
39
|
+
"Active Value Set Name": "string",
|
|
40
|
+
}
|
|
41
|
+
df = _create_dataframe(columns=columns)
|
|
42
|
+
|
|
43
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
44
|
+
|
|
45
|
+
responses = _base_api(
|
|
46
|
+
request=f"/v1/workspaces/{workspace_id}/VariableLibraries",
|
|
47
|
+
uses_pagination=True,
|
|
48
|
+
client="fabric_sp",
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
dfs = []
|
|
52
|
+
for r in responses:
|
|
53
|
+
for v in r.get("value", []):
|
|
54
|
+
prop = v.get("properties", {})
|
|
55
|
+
|
|
56
|
+
new_data = {
|
|
57
|
+
"Variable Library Name": v.get("displayName"),
|
|
58
|
+
"Variable Library Id": v.get("id"),
|
|
59
|
+
"Description": v.get("description"),
|
|
60
|
+
"Active Value Set Name": prop.get("activeValueSetName"),
|
|
61
|
+
}
|
|
62
|
+
dfs.append(pd.DataFrame(new_data, index=[0]))
|
|
63
|
+
|
|
64
|
+
if dfs:
|
|
65
|
+
df = pd.concat(dfs, ignore_index=True)
|
|
66
|
+
_update_dataframe_datatypes(dataframe=df, column_map=columns)
|
|
67
|
+
|
|
68
|
+
return df
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def delete_variable_library(
|
|
72
|
+
variable_library: str | UUID, workspace: Optional[str | UUID] = None
|
|
73
|
+
):
|
|
74
|
+
"""
|
|
75
|
+
Deletes a variable library.
|
|
76
|
+
|
|
77
|
+
This is a wrapper function for the following API: `Items - Delete Variable Library <https://learn.microsoft.com/rest/api/fabric/warehouse/items/delete-variable-library>`_.
|
|
78
|
+
|
|
79
|
+
Parameters
|
|
80
|
+
----------
|
|
81
|
+
navariable_libraryme: str | uuid.UUID
|
|
82
|
+
Name or ID of the variable library.
|
|
83
|
+
workspace : str | uuid.UUID, default=None
|
|
84
|
+
The Fabric workspace name or ID.
|
|
85
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
86
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
delete_item(item=variable_library, type="VariableLibrary", workspace=workspace)
|
sempy_labs/_vertipaq.py
CHANGED
|
@@ -8,7 +8,6 @@ import datetime
|
|
|
8
8
|
import warnings
|
|
9
9
|
from sempy_labs._helper_functions import (
|
|
10
10
|
format_dax_object_name,
|
|
11
|
-
resolve_lakehouse_name,
|
|
12
11
|
save_as_delta_table,
|
|
13
12
|
resolve_workspace_capacity,
|
|
14
13
|
_get_column_aggregate,
|
|
@@ -20,7 +19,6 @@ from sempy_labs._helper_functions import (
|
|
|
20
19
|
)
|
|
21
20
|
from sempy_labs._list_functions import list_relationships, list_tables
|
|
22
21
|
from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
|
|
23
|
-
from sempy_labs.directlake import get_direct_lake_source
|
|
24
22
|
from typing import Optional
|
|
25
23
|
from sempy._utils._log import log
|
|
26
24
|
import sempy_labs._icons as icons
|
|
@@ -176,10 +174,12 @@ def vertipaq_analyzer(
|
|
|
176
174
|
)
|
|
177
175
|
|
|
178
176
|
artifact_type = None
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
177
|
+
lakehouse_workspace_id = None
|
|
178
|
+
lakehouse_name = None
|
|
179
|
+
# if is_direct_lake:
|
|
180
|
+
# artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
|
|
181
|
+
# get_direct_lake_source(dataset=dataset_id, workspace=workspace_id)
|
|
182
|
+
# )
|
|
183
183
|
|
|
184
184
|
dfR["Missing Rows"] = 0
|
|
185
185
|
dfR["Missing Rows"] = dfR["Missing Rows"].astype(int)
|
sempy_labs/_vpax.py
ADDED
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
import sempy
|
|
2
|
+
import re
|
|
3
|
+
from urllib.parse import urlparse
|
|
4
|
+
import sempy.fabric as fabric
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Optional
|
|
8
|
+
from uuid import UUID
|
|
9
|
+
from sempy_labs._helper_functions import (
|
|
10
|
+
resolve_workspace_name_and_id,
|
|
11
|
+
resolve_dataset_name_and_id,
|
|
12
|
+
resolve_lakehouse_name_and_id,
|
|
13
|
+
_mount,
|
|
14
|
+
_get_column_aggregate,
|
|
15
|
+
resolve_item_type,
|
|
16
|
+
file_exists,
|
|
17
|
+
create_abfss_path_from_path,
|
|
18
|
+
)
|
|
19
|
+
import sempy_labs._icons as icons
|
|
20
|
+
import zipfile
|
|
21
|
+
import requests
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
VPA_VERSION = "1.10.0"
|
|
25
|
+
NUGET_BASE_URL = "https://www.nuget.org/api/v2/package"
|
|
26
|
+
ASSEMBLIES = [
|
|
27
|
+
"Dax.Metadata",
|
|
28
|
+
"Dax.Model.Extractor",
|
|
29
|
+
"Dax.ViewVpaExport",
|
|
30
|
+
"Dax.Vpax",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
_vpa_initialized = False
|
|
34
|
+
current_dir = Path(__file__).parent
|
|
35
|
+
nuget_dir = current_dir / "nuget_dlls"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def find_lib_folder(pkg_folder: Path) -> Path:
|
|
39
|
+
lib_base = pkg_folder / "lib"
|
|
40
|
+
if not lib_base.exists():
|
|
41
|
+
raise FileNotFoundError(f"No 'lib' directory in package {pkg_folder}")
|
|
42
|
+
|
|
43
|
+
# Prefer netstandard2.0 if available
|
|
44
|
+
candidates = sorted(lib_base.iterdir())
|
|
45
|
+
for preferred in ["netstandard2.0", "net6.0", "net5.0", "netcoreapp3.1", "net472"]:
|
|
46
|
+
if (lib_base / preferred).exists():
|
|
47
|
+
return lib_base / preferred
|
|
48
|
+
|
|
49
|
+
# Fallback: first available folder
|
|
50
|
+
for candidate in candidates:
|
|
51
|
+
if candidate.is_dir():
|
|
52
|
+
return candidate
|
|
53
|
+
|
|
54
|
+
raise FileNotFoundError(f"No usable framework folder found in {lib_base}")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def download_and_extract_package(
|
|
58
|
+
package_name: str, version: str, target_dir: Path
|
|
59
|
+
) -> Path:
|
|
60
|
+
nupkg_url = f"{NUGET_BASE_URL}/{package_name}/{version}"
|
|
61
|
+
nupkg_path = target_dir / f"{package_name}.{version}.nupkg"
|
|
62
|
+
|
|
63
|
+
if not nupkg_path.exists():
|
|
64
|
+
r = requests.get(nupkg_url)
|
|
65
|
+
r.raise_for_status()
|
|
66
|
+
target_dir.mkdir(parents=True, exist_ok=True)
|
|
67
|
+
with open(nupkg_path, "wb") as f:
|
|
68
|
+
f.write(r.content)
|
|
69
|
+
|
|
70
|
+
extract_path = target_dir / f"{package_name}_{version}"
|
|
71
|
+
if not extract_path.exists():
|
|
72
|
+
with zipfile.ZipFile(nupkg_path, "r") as zip_ref:
|
|
73
|
+
zip_ref.extractall(extract_path)
|
|
74
|
+
return extract_path
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def download_and_load_nuget_package(
|
|
78
|
+
package_name, version, target_dir: Path = None, load_assembly=True
|
|
79
|
+
):
|
|
80
|
+
|
|
81
|
+
from System.Reflection import Assembly
|
|
82
|
+
|
|
83
|
+
if target_dir is None:
|
|
84
|
+
target_dir = nuget_dir
|
|
85
|
+
|
|
86
|
+
# Download and extract
|
|
87
|
+
pkg_folder = download_and_extract_package(package_name, version, target_dir)
|
|
88
|
+
lib_folder = find_lib_folder(pkg_folder)
|
|
89
|
+
|
|
90
|
+
dll_path = lib_folder / f"{package_name}.dll"
|
|
91
|
+
if not dll_path.exists():
|
|
92
|
+
raise FileNotFoundError(f"{dll_path} not found")
|
|
93
|
+
|
|
94
|
+
sys.path.append(str(lib_folder))
|
|
95
|
+
if load_assembly:
|
|
96
|
+
Assembly.LoadFile(str(dll_path))
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def init_vertipaq_analyzer():
|
|
100
|
+
global _vpa_initialized
|
|
101
|
+
if _vpa_initialized:
|
|
102
|
+
return
|
|
103
|
+
|
|
104
|
+
from clr_loader import get_coreclr
|
|
105
|
+
from pythonnet import set_runtime
|
|
106
|
+
|
|
107
|
+
# Load the runtime and set it BEFORE importing clr
|
|
108
|
+
runtime_config_path = current_dir / "dotnet_lib" / "dotnet.runtime.config.json"
|
|
109
|
+
rt = get_coreclr(runtime_config=str(runtime_config_path))
|
|
110
|
+
set_runtime(rt)
|
|
111
|
+
|
|
112
|
+
sempy.fabric._client._utils._init_analysis_services()
|
|
113
|
+
|
|
114
|
+
from System.Reflection import Assembly
|
|
115
|
+
|
|
116
|
+
for name in ASSEMBLIES:
|
|
117
|
+
download_and_load_nuget_package(
|
|
118
|
+
name, VPA_VERSION, nuget_dir, load_assembly=False
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
download_and_load_nuget_package("Newtonsoft.Json", "13.0.1")
|
|
122
|
+
download_and_load_nuget_package("System.IO.Packaging", "7.0.0")
|
|
123
|
+
|
|
124
|
+
# For some reason I have to load these after and not inside the download_and_load_nuget_package function
|
|
125
|
+
dll_paths = [
|
|
126
|
+
f"{nuget_dir}/Dax.Model.Extractor_1.10.0/lib/net6.0/Dax.Model.Extractor.dll",
|
|
127
|
+
f"{nuget_dir}/Dax.Metadata_1.10.0/lib/netstandard2.0/Dax.Metadata.dll",
|
|
128
|
+
f"{nuget_dir}/Dax.ViewVpaExport_1.10.0/lib/netstandard2.0/Dax.ViewVpaExport.dll",
|
|
129
|
+
f"{nuget_dir}/Dax.Vpax_1.10.0/lib/net6.0/Dax.Vpax.dll",
|
|
130
|
+
]
|
|
131
|
+
for dll_path in dll_paths:
|
|
132
|
+
Assembly.LoadFile(dll_path)
|
|
133
|
+
|
|
134
|
+
_vpa_initialized = True
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def create_vpax(
|
|
138
|
+
dataset: str | UUID,
|
|
139
|
+
workspace: Optional[str | UUID] = None,
|
|
140
|
+
lakehouse: Optional[str | UUID] = None,
|
|
141
|
+
lakehouse_workspace: Optional[str | UUID] = None,
|
|
142
|
+
file_path: Optional[str] = None,
|
|
143
|
+
read_stats_from_data: bool = False,
|
|
144
|
+
read_direct_query_stats: bool = False,
|
|
145
|
+
direct_lake_stats_mode: str = "ResidentOnly",
|
|
146
|
+
overwrite: bool = False,
|
|
147
|
+
):
|
|
148
|
+
"""
|
|
149
|
+
Creates a .vpax file for a semantic model and saves it to a lakehouse. This is based on `SQL BI's VertiPaq Analyzer <https://www.sqlbi.com/tools/vertipaq-analyzer/>`_.
|
|
150
|
+
|
|
151
|
+
Parameters
|
|
152
|
+
----------
|
|
153
|
+
dataset : str | uuid.UUID
|
|
154
|
+
Name or ID of the semantic model.
|
|
155
|
+
workspace : str | uuid.UUID, default=None
|
|
156
|
+
The workspace name or ID.
|
|
157
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
158
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
159
|
+
lakehouse : str | uuid.UUID, default=None
|
|
160
|
+
The lakehouse name or ID.
|
|
161
|
+
Defaults to None which resolves to the attached lakehouse.
|
|
162
|
+
lakehouse_workspace : str | uuid.UUID, default=None
|
|
163
|
+
The workspace name or ID of the lakehouse.
|
|
164
|
+
Defaults to None which resolves to the workspace of the attached lakehouse.
|
|
165
|
+
file_path : str, default=None
|
|
166
|
+
The path where the .vpax file will be saved in the lakehouse.
|
|
167
|
+
Defaults to None which resolves to the dataset name.
|
|
168
|
+
read_stats_from_data : bool, default=False
|
|
169
|
+
Whether to read statistics from the data.
|
|
170
|
+
read_direct_query_stats : bool, default=False
|
|
171
|
+
Whether to analyze DirectQuery tables.
|
|
172
|
+
direct_lake_stats_mode : str, default='ResidentOnly'
|
|
173
|
+
The Direct Lake extraction mode. Options are 'ResidentOnly' or 'Full'. This parameter is ignored if read_stats_from_data is False. This parameter is only relevant for tables which use Direct Lake mode.
|
|
174
|
+
If set to 'ResidentOnly', column statistics are obtained only for the columns which are in memory.
|
|
175
|
+
If set to 'Full', column statistics are obtained for all columns - pending the proper identification of the Direct Lake source.
|
|
176
|
+
overwrite : bool, default=False
|
|
177
|
+
Whether to overwrite the .vpax file if it already exists in the lakehouse.
|
|
178
|
+
"""
|
|
179
|
+
|
|
180
|
+
init_vertipaq_analyzer()
|
|
181
|
+
|
|
182
|
+
import notebookutils
|
|
183
|
+
from Dax.Metadata import DirectLakeExtractionMode
|
|
184
|
+
from Dax.Model.Extractor import TomExtractor
|
|
185
|
+
from Dax.Vpax.Tools import VpaxTools
|
|
186
|
+
from Dax.ViewVpaExport import Model
|
|
187
|
+
from System.IO import MemoryStream, FileMode, FileStream, FileAccess, FileShare
|
|
188
|
+
|
|
189
|
+
direct_lake_stats_mode = direct_lake_stats_mode.capitalize()
|
|
190
|
+
|
|
191
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
192
|
+
(dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
|
|
193
|
+
(lakehouse_workspace_name, lakehouse_workspace_id) = resolve_workspace_name_and_id(
|
|
194
|
+
lakehouse_workspace
|
|
195
|
+
)
|
|
196
|
+
(lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
|
|
197
|
+
lakehouse=lakehouse, workspace=lakehouse_workspace_id
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
local_path = _mount(lakehouse=lakehouse_id, workspace=lakehouse_workspace_id)
|
|
201
|
+
if file_path is None:
|
|
202
|
+
file_path = dataset_name
|
|
203
|
+
|
|
204
|
+
if file_path.endswith(".vpax"):
|
|
205
|
+
file_path = file_path[:-5]
|
|
206
|
+
save_location = f"Files/{file_path}.vpax"
|
|
207
|
+
path = f"{local_path}/{save_location}"
|
|
208
|
+
|
|
209
|
+
# Check if the .vpax file already exists in the lakehouse
|
|
210
|
+
if not overwrite:
|
|
211
|
+
new_path = create_abfss_path_from_path(
|
|
212
|
+
lakehouse_id, lakehouse_workspace_id, save_location
|
|
213
|
+
)
|
|
214
|
+
if file_exists(new_path):
|
|
215
|
+
print(
|
|
216
|
+
f"{icons.warning} The {save_location} file already exists in the '{lakehouse_name}' lakehouse. Set overwrite=True to overwrite the file."
|
|
217
|
+
)
|
|
218
|
+
return
|
|
219
|
+
|
|
220
|
+
vpax_stream = MemoryStream()
|
|
221
|
+
extractor_app_name = "VPAX Notebook"
|
|
222
|
+
extractor_app_version = "1.0"
|
|
223
|
+
column_batch_size = 50
|
|
224
|
+
token = notebookutils.credentials.getToken("pbi")
|
|
225
|
+
connection_string = f"data source=powerbi://api.powerbi.com/v1.0/myorg/{workspace_name};initial catalog={dataset_name};User ID=;Password={token};Persist Security Info=True;Impersonation Level=Impersonate"
|
|
226
|
+
|
|
227
|
+
print(f"{icons.in_progress} Extracting .vpax metadata...")
|
|
228
|
+
|
|
229
|
+
# Get stats for the model; for direct lake only get is_resident
|
|
230
|
+
dax_model = TomExtractor.GetDaxModel(
|
|
231
|
+
connection_string,
|
|
232
|
+
extractor_app_name,
|
|
233
|
+
extractor_app_version,
|
|
234
|
+
read_stats_from_data,
|
|
235
|
+
0,
|
|
236
|
+
read_direct_query_stats,
|
|
237
|
+
DirectLakeExtractionMode.ResidentOnly,
|
|
238
|
+
column_batch_size,
|
|
239
|
+
)
|
|
240
|
+
vpa_model = Model(dax_model)
|
|
241
|
+
tom_database = TomExtractor.GetDatabase(connection_string)
|
|
242
|
+
|
|
243
|
+
# Calculate Direct Lake stats for columns which are IsResident=False
|
|
244
|
+
from sempy_labs.tom import connect_semantic_model
|
|
245
|
+
|
|
246
|
+
with connect_semantic_model(dataset=dataset, workspace=workspace) as tom:
|
|
247
|
+
is_direct_lake = tom.is_direct_lake()
|
|
248
|
+
if read_stats_from_data and is_direct_lake and direct_lake_stats_mode == "Full":
|
|
249
|
+
|
|
250
|
+
df_not_resident = fabric.evaluate_dax(
|
|
251
|
+
dataset=dataset,
|
|
252
|
+
workspace=workspace,
|
|
253
|
+
dax_string=""" SELECT [DIMENSION_NAME] AS [TableName], [ATTRIBUTE_NAME] AS [ColumnName] FROM $SYSTEM.DISCOVER_STORAGE_TABLE_COLUMNS WHERE NOT [ISROWNUMBER] AND NOT [DICTIONARY_ISRESIDENT]""",
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
import Microsoft.AnalysisServices.Tabular as TOM
|
|
257
|
+
|
|
258
|
+
print(f"{icons.in_progress} Calculating Direct Lake statistics...")
|
|
259
|
+
|
|
260
|
+
# For SQL endpoints (do once)
|
|
261
|
+
dfI = fabric.list_items(workspace=workspace)
|
|
262
|
+
# Get list of tables in Direct Lake mode which have columns that are not resident
|
|
263
|
+
tbls = [
|
|
264
|
+
t
|
|
265
|
+
for t in tom.model.Tables
|
|
266
|
+
if t.Name in df_not_resident["TableName"].values
|
|
267
|
+
and any(p.Mode == TOM.ModeType.DirectLake for p in t.Partitions)
|
|
268
|
+
]
|
|
269
|
+
for t in tbls:
|
|
270
|
+
column_cardinalities = {}
|
|
271
|
+
table_name = t.Name
|
|
272
|
+
partition = next(p for p in t.Partitions)
|
|
273
|
+
entity_name = partition.Source.EntityName
|
|
274
|
+
schema_name = partition.Source.SchemaName
|
|
275
|
+
if len(schema_name) == 0 or schema_name == "dbo":
|
|
276
|
+
schema_name = None
|
|
277
|
+
expr_name = partition.Source.ExpressionSource.Name
|
|
278
|
+
expr = tom.model.Expressions[expr_name].Expression
|
|
279
|
+
item_id = None
|
|
280
|
+
if "Sql.Database(" in expr:
|
|
281
|
+
matches = re.findall(r'"([^"]+)"', expr)
|
|
282
|
+
sql_endpoint_id = matches[1]
|
|
283
|
+
dfI_filt = dfI[dfI["Id"] == sql_endpoint_id]
|
|
284
|
+
item_name = (
|
|
285
|
+
dfI_filt["Display Name"].iloc[0] if not dfI_filt.empty else None
|
|
286
|
+
)
|
|
287
|
+
dfI_filt2 = dfI[
|
|
288
|
+
(dfI["Display Name"] == item_name)
|
|
289
|
+
& (dfI["Type"].isin(["Lakehouse", "Warehouse"]))
|
|
290
|
+
]
|
|
291
|
+
item_id = dfI_filt2["Id"].iloc[0]
|
|
292
|
+
item_type = dfI_filt2["Type"].iloc[0]
|
|
293
|
+
item_workspace_id = workspace_id
|
|
294
|
+
elif "AzureStorage.DataLake(" in expr:
|
|
295
|
+
match = re.search(r'AzureStorage\.DataLake\("([^"]+)"', expr)
|
|
296
|
+
if match:
|
|
297
|
+
url = match.group(1)
|
|
298
|
+
path_parts = urlparse(url).path.strip("/").split("/")
|
|
299
|
+
if len(path_parts) >= 2:
|
|
300
|
+
item_workspace_id, item_id = (
|
|
301
|
+
path_parts[0],
|
|
302
|
+
path_parts[1],
|
|
303
|
+
)
|
|
304
|
+
item_type = resolve_item_type(
|
|
305
|
+
item_id=item_id, workspace=workspace_id
|
|
306
|
+
)
|
|
307
|
+
else:
|
|
308
|
+
raise NotImplementedError(
|
|
309
|
+
f"Direct Lake source '{expr}' is not supported. Please report this issue on GitHub (https://github.com/microsoft/semantic-link-labs/issues)."
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
if not item_id:
|
|
313
|
+
print(
|
|
314
|
+
f"{icons.info} Cannot determine the Direct Lake source of the '{table_name}' table."
|
|
315
|
+
)
|
|
316
|
+
elif item_type == "Warehouse":
|
|
317
|
+
print(
|
|
318
|
+
f"{icons.info} The '{table_name}' table references a warehouse. Warehouses are not yet supported for this method."
|
|
319
|
+
)
|
|
320
|
+
else:
|
|
321
|
+
df_not_resident_cols = df_not_resident[
|
|
322
|
+
df_not_resident["TableName"] == table_name
|
|
323
|
+
]
|
|
324
|
+
col_dict = {
|
|
325
|
+
c.Name: c.SourceColumn
|
|
326
|
+
for c in t.Columns
|
|
327
|
+
if c.Type != TOM.ColumnType.RowNumber
|
|
328
|
+
and c.Name in df_not_resident_cols["ColumnName"].values
|
|
329
|
+
}
|
|
330
|
+
col_agg = _get_column_aggregate(
|
|
331
|
+
lakehouse=item_id,
|
|
332
|
+
workspace=item_workspace_id,
|
|
333
|
+
table_name=entity_name,
|
|
334
|
+
schema_name=schema_name,
|
|
335
|
+
column_name=list(col_dict.values()),
|
|
336
|
+
function="distinct",
|
|
337
|
+
)
|
|
338
|
+
column_cardinalities = {
|
|
339
|
+
column_name: col_agg[source_column]
|
|
340
|
+
for column_name, source_column in col_dict.items()
|
|
341
|
+
if source_column in col_agg
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
# Update the dax_model file with column cardinalities
|
|
345
|
+
tbl = next(
|
|
346
|
+
table
|
|
347
|
+
for table in dax_model.Tables
|
|
348
|
+
if str(table.TableName) == table_name
|
|
349
|
+
)
|
|
350
|
+
# print(
|
|
351
|
+
# f"{icons.in_progress} Calculating column cardinalities for the '{table_name}' table..."
|
|
352
|
+
# )
|
|
353
|
+
cols = [
|
|
354
|
+
col
|
|
355
|
+
for col in tbl.Columns
|
|
356
|
+
if str(col.ColumnType) != "RowNumber"
|
|
357
|
+
and str(col.ColumnName) in column_cardinalities
|
|
358
|
+
]
|
|
359
|
+
for col in cols:
|
|
360
|
+
# print(str(col.ColumnName), col.ColumnCardinality)
|
|
361
|
+
col.ColumnCardinality = column_cardinalities.get(
|
|
362
|
+
str(col.ColumnName)
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
VpaxTools.ExportVpax(vpax_stream, dax_model, vpa_model, tom_database)
|
|
366
|
+
|
|
367
|
+
print(f"{icons.in_progress} Exporting .vpax file...")
|
|
368
|
+
|
|
369
|
+
mode = FileMode.Create
|
|
370
|
+
file_stream = FileStream(path, mode, FileAccess.Write, FileShare.Read)
|
|
371
|
+
vpax_stream.CopyTo(file_stream)
|
|
372
|
+
file_stream.Close()
|
|
373
|
+
|
|
374
|
+
print(
|
|
375
|
+
f"{icons.green_dot} The {file_path}.vpax file has been saved in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace_name}' workspace."
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def _dax_distinctcount(table_name, columns):
|
|
380
|
+
|
|
381
|
+
dax = "EVALUATE\nROW("
|
|
382
|
+
for c in columns:
|
|
383
|
+
full_name = f"'{table_name}'[{c}]"
|
|
384
|
+
dax += f"""\n"{c}", DISTINCTCOUNT({full_name}),"""
|
|
385
|
+
|
|
386
|
+
return f"{dax.rstrip(',')}\n)"
|