semantic-link-labs 0.9.10__py3-none-any.whl → 0.9.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.9.11.dist-info}/METADATA +27 -21
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.9.11.dist-info}/RECORD +34 -29
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.9.11.dist-info}/WHEEL +1 -1
- sempy_labs/__init__.py +22 -1
- sempy_labs/_delta_analyzer.py +9 -8
- sempy_labs/_environments.py +19 -1
- sempy_labs/_generate_semantic_model.py +1 -1
- sempy_labs/_helper_functions.py +193 -134
- sempy_labs/_kusto.py +25 -23
- sempy_labs/_list_functions.py +13 -35
- sempy_labs/_model_bpa_rules.py +13 -3
- sempy_labs/_notebooks.py +44 -11
- sempy_labs/_semantic_models.py +93 -1
- sempy_labs/_sql.py +3 -2
- sempy_labs/_tags.py +194 -0
- sempy_labs/_variable_libraries.py +89 -0
- sempy_labs/_vpax.py +386 -0
- sempy_labs/admin/__init__.py +8 -0
- sempy_labs/admin/_tags.py +126 -0
- sempy_labs/directlake/_generate_shared_expression.py +5 -1
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +55 -5
- sempy_labs/dotnet_lib/dotnet.runtime.config.json +10 -0
- sempy_labs/lakehouse/__init__.py +16 -0
- sempy_labs/lakehouse/_blobs.py +115 -63
- sempy_labs/lakehouse/_get_lakehouse_tables.py +1 -13
- sempy_labs/lakehouse/_helper.py +211 -0
- sempy_labs/lakehouse/_lakehouse.py +1 -1
- sempy_labs/lakehouse/_livy_sessions.py +137 -0
- sempy_labs/report/_download_report.py +1 -1
- sempy_labs/report/_generate_report.py +5 -1
- sempy_labs/report/_reportwrapper.py +31 -18
- sempy_labs/tom/_model.py +83 -21
- sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +0 -9
- sempy_labs/report/_bpareporttemplate/.platform +0 -11
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.9.11.dist-info}/licenses/LICENSE +0 -0
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.9.11.dist-info}/top_level.txt +0 -0
sempy_labs/lakehouse/_blobs.py
CHANGED
|
@@ -11,6 +11,7 @@ from typing import Optional, List
|
|
|
11
11
|
import sempy_labs._icons as icons
|
|
12
12
|
import xml.etree.ElementTree as ET
|
|
13
13
|
import pandas as pd
|
|
14
|
+
from sempy.fabric.exceptions import FabricHTTPException
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
def _request_blob_api(
|
|
@@ -18,6 +19,7 @@ def _request_blob_api(
|
|
|
18
19
|
method: str = "get",
|
|
19
20
|
payload: Optional[dict] = None,
|
|
20
21
|
status_codes: int | List[int] = 200,
|
|
22
|
+
uses_pagination: bool = False,
|
|
21
23
|
):
|
|
22
24
|
|
|
23
25
|
import requests
|
|
@@ -31,21 +33,41 @@ def _request_blob_api(
|
|
|
31
33
|
|
|
32
34
|
headers = {
|
|
33
35
|
"Authorization": f"Bearer {token}",
|
|
34
|
-
"Content-Type": "application/
|
|
36
|
+
"Content-Type": "application/xml",
|
|
35
37
|
"x-ms-version": "2025-05-05",
|
|
36
38
|
}
|
|
37
39
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
40
|
+
base_url = "https://onelake.blob.fabric.microsoft.com/"
|
|
41
|
+
full_url = f"{base_url}{request}"
|
|
42
|
+
results = []
|
|
43
|
+
|
|
44
|
+
while True:
|
|
45
|
+
response = requests.request(
|
|
46
|
+
method.upper(),
|
|
47
|
+
full_url,
|
|
48
|
+
headers=headers,
|
|
49
|
+
data=payload if method.lower() != "get" else None,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
if response.status_code not in status_codes:
|
|
53
|
+
raise FabricHTTPException(response)
|
|
54
|
+
|
|
55
|
+
if not uses_pagination:
|
|
56
|
+
return response
|
|
57
|
+
|
|
58
|
+
# Parse XML to find blobs and NextMarker
|
|
59
|
+
root = ET.fromstring(response.content)
|
|
60
|
+
results.append(root)
|
|
61
|
+
|
|
62
|
+
next_marker = root.findtext(".//NextMarker")
|
|
63
|
+
if not next_marker:
|
|
64
|
+
break # No more pages
|
|
44
65
|
|
|
45
|
-
|
|
46
|
-
|
|
66
|
+
# Append the marker to the original request (assuming query string format)
|
|
67
|
+
delimiter = "&" if "?" in request else "?"
|
|
68
|
+
full_url = f"{base_url}{request}{delimiter}marker={next_marker}"
|
|
47
69
|
|
|
48
|
-
return
|
|
70
|
+
return results
|
|
49
71
|
|
|
50
72
|
|
|
51
73
|
@log
|
|
@@ -90,12 +112,6 @@ def list_blobs(
|
|
|
90
112
|
)
|
|
91
113
|
path_prefix = f"{workspace_id}/{lakehouse_id}/{container}"
|
|
92
114
|
|
|
93
|
-
response = _request_blob_api(
|
|
94
|
-
request=f"{path_prefix}?restype=container&comp=list&include=deleted"
|
|
95
|
-
)
|
|
96
|
-
root = ET.fromstring(response.content)
|
|
97
|
-
response_json = _xml_to_dict(root)
|
|
98
|
-
|
|
99
115
|
columns = {
|
|
100
116
|
"Blob Name": "str",
|
|
101
117
|
"Is Deleted": "bool",
|
|
@@ -122,37 +138,55 @@ def list_blobs(
|
|
|
122
138
|
|
|
123
139
|
df = _create_dataframe(columns=columns)
|
|
124
140
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
"
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
141
|
+
url = f"{path_prefix}?restype=container&comp=list&include=deleted"
|
|
142
|
+
|
|
143
|
+
responses = _request_blob_api(
|
|
144
|
+
request=url,
|
|
145
|
+
uses_pagination=True,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
dfs = []
|
|
149
|
+
for root in responses:
|
|
150
|
+
response_json = _xml_to_dict(root)
|
|
151
|
+
|
|
152
|
+
blobs = (
|
|
153
|
+
response_json.get("EnumerationResults", {}).get("Blobs", {}).get("Blob", [])
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
if isinstance(blobs, dict):
|
|
157
|
+
blobs = [blobs]
|
|
158
|
+
|
|
159
|
+
for blob in blobs:
|
|
160
|
+
p = blob.get("Properties", {})
|
|
161
|
+
new_data = {
|
|
162
|
+
"Blob Name": blob.get("Name"),
|
|
163
|
+
"Is Deleted": blob.get("Deleted", False),
|
|
164
|
+
"Deletion Id": blob.get("DeletionId"),
|
|
165
|
+
"Creation Time": p.get("Creation-Time"),
|
|
166
|
+
"Expiry Time": p.get("Expiry-Time"),
|
|
167
|
+
"Etag": p.get("Etag"),
|
|
168
|
+
"Resource Type": p.get("ResourceType"),
|
|
169
|
+
"Content Length": p.get("Content-Length"),
|
|
170
|
+
"Content Type": p.get("Content-Type"),
|
|
171
|
+
"Content Encoding": p.get("Content-Encoding"),
|
|
172
|
+
"Content Language": p.get("Content-Language"),
|
|
173
|
+
"Content CRC64": p.get("Content-CRC64"),
|
|
174
|
+
"Content MD5": p.get("Content-MD5"),
|
|
175
|
+
"Cache Control": p.get("Cache-Control"),
|
|
176
|
+
"Content Disposition": p.get("Content-Disposition"),
|
|
177
|
+
"Blob Type": p.get("BlobType"),
|
|
178
|
+
"Access Tier": p.get("AccessTier"),
|
|
179
|
+
"Access Tier Inferred": p.get("AccessTierInferred"),
|
|
180
|
+
"Server Encrypted": p.get("ServerEncrypted"),
|
|
181
|
+
"Deleted Time": p.get("DeletedTime"),
|
|
182
|
+
"Remaining Retention Days": p.get("RemainingRetentionDays"),
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
dfs.append(pd.DataFrame(new_data, index=[0]))
|
|
186
|
+
|
|
187
|
+
if dfs:
|
|
188
|
+
df = pd.concat(dfs, ignore_index=True)
|
|
189
|
+
_update_dataframe_datatypes(dataframe=df, column_map=columns)
|
|
156
190
|
|
|
157
191
|
return df
|
|
158
192
|
|
|
@@ -182,7 +216,7 @@ def recover_lakehouse_object(
|
|
|
182
216
|
workspace_id = resolve_workspace_id(workspace)
|
|
183
217
|
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
|
|
184
218
|
|
|
185
|
-
|
|
219
|
+
blob_name = f"{lakehouse_id}/{file_path}"
|
|
186
220
|
|
|
187
221
|
container = file_path.split("/")[0]
|
|
188
222
|
if container not in ["Tables", "Files"]:
|
|
@@ -190,29 +224,45 @@ def recover_lakehouse_object(
|
|
|
190
224
|
f"{icons.red_dot} Invalid container '{container}' within the file_path parameter. Expected 'Tables' or 'Files'."
|
|
191
225
|
)
|
|
192
226
|
|
|
193
|
-
|
|
227
|
+
# Undelete the blob
|
|
228
|
+
print(f"{icons.in_progress} Attempting to recover the '{blob_name}' blob...")
|
|
194
229
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
230
|
+
try:
|
|
231
|
+
_request_blob_api(
|
|
232
|
+
request=f"{workspace_id}/{lakehouse_id}/{file_path}?comp=undelete",
|
|
233
|
+
method="put",
|
|
234
|
+
)
|
|
235
|
+
print(
|
|
236
|
+
f"{icons.green_dot} The '{blob_name}' blob recover attempt was successful."
|
|
237
|
+
)
|
|
238
|
+
except FabricHTTPException as e:
|
|
239
|
+
if e.status_code == 404:
|
|
240
|
+
print(
|
|
241
|
+
f"{icons.warning} The '{blob_name}' blob was not found. No action taken."
|
|
242
|
+
)
|
|
243
|
+
else:
|
|
244
|
+
print(
|
|
245
|
+
f"{icons.red_dot} An error occurred while recovering the '{blob_name}' blob: {e}"
|
|
203
246
|
)
|
|
204
|
-
print(f"{icons.green_dot} The '{blob_name}' blob has been restored.")
|
|
205
247
|
|
|
206
248
|
|
|
207
|
-
def
|
|
249
|
+
def get_user_delegation_key():
|
|
250
|
+
"""
|
|
251
|
+
Gets a key that can be used to sign a user delegation SAS (shared access signature). A user delegation SAS grants access to Azure Blob Storage resources by using Microsoft Entra credentials.
|
|
208
252
|
|
|
209
|
-
|
|
253
|
+
This is a wrapper function for the following API: `Get User Delegation Key <https://learn.microsoft.com/rest/api/storageservices/get-user-delegation-key>`_.
|
|
254
|
+
|
|
255
|
+
Returns
|
|
256
|
+
-------
|
|
257
|
+
str
|
|
258
|
+
The user delegation key value.
|
|
259
|
+
"""
|
|
210
260
|
|
|
211
261
|
from datetime import datetime, timedelta, timezone
|
|
212
262
|
|
|
213
263
|
utc_now = datetime.now(timezone.utc)
|
|
214
264
|
start_time = utc_now + timedelta(minutes=2)
|
|
215
|
-
expiry_time = start_time + timedelta(minutes=
|
|
265
|
+
expiry_time = start_time + timedelta(minutes=60)
|
|
216
266
|
start_str = start_time.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
217
267
|
expiry_str = expiry_time.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
218
268
|
|
|
@@ -223,9 +273,11 @@ def _get_user_delegation_key():
|
|
|
223
273
|
</KeyInfo>"""
|
|
224
274
|
|
|
225
275
|
response = _request_blob_api(
|
|
226
|
-
request="restype=service&comp=userdelegationkey",
|
|
276
|
+
request="?restype=service&comp=userdelegationkey",
|
|
227
277
|
method="post",
|
|
228
278
|
payload=payload,
|
|
229
279
|
)
|
|
230
280
|
|
|
231
|
-
|
|
281
|
+
root = ET.fromstring(response.content)
|
|
282
|
+
response_json = _xml_to_dict(root)
|
|
283
|
+
return response_json.get("UserDelegationKey", {}).get("Value", None)
|
|
@@ -9,8 +9,6 @@ from sempy_labs._helper_functions import (
|
|
|
9
9
|
save_as_delta_table,
|
|
10
10
|
_base_api,
|
|
11
11
|
_create_dataframe,
|
|
12
|
-
resolve_workspace_id,
|
|
13
|
-
resolve_lakehouse_id,
|
|
14
12
|
_read_delta_table,
|
|
15
13
|
_get_delta_table,
|
|
16
14
|
_mount,
|
|
@@ -85,16 +83,6 @@ def get_lakehouse_tables(
|
|
|
85
83
|
if count_rows: # Setting countrows defaults to extended=True
|
|
86
84
|
extended = True
|
|
87
85
|
|
|
88
|
-
if (
|
|
89
|
-
workspace_id != resolve_workspace_id()
|
|
90
|
-
and lakehouse_id != resolve_lakehouse_id()
|
|
91
|
-
and count_rows
|
|
92
|
-
):
|
|
93
|
-
raise ValueError(
|
|
94
|
-
f"{icons.red_dot} If 'count_rows' is set to True, you must run this function against the default lakehouse attached to the notebook. "
|
|
95
|
-
"Count rows runs a spark query and cross-workspace spark queries are currently not supported."
|
|
96
|
-
)
|
|
97
|
-
|
|
98
86
|
responses = _base_api(
|
|
99
87
|
request=f"v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables",
|
|
100
88
|
uses_pagination=True,
|
|
@@ -123,7 +111,7 @@ def get_lakehouse_tables(
|
|
|
123
111
|
if extended:
|
|
124
112
|
sku_value = get_sku_size(workspace_id)
|
|
125
113
|
guardrail = get_directlake_guardrails_for_sku(sku_value)
|
|
126
|
-
local_path = _mount()
|
|
114
|
+
local_path = _mount(lakehouse=lakehouse_id, workspace=workspace_id)
|
|
127
115
|
|
|
128
116
|
df["Files"], df["Row Groups"], df["Table Size"] = None, None, None
|
|
129
117
|
if count_rows:
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
from uuid import UUID
|
|
2
|
+
from typing import Optional, Literal
|
|
3
|
+
import pyarrow.dataset as ds
|
|
4
|
+
from sempy_labs._helper_functions import (
|
|
5
|
+
_mount,
|
|
6
|
+
delete_item,
|
|
7
|
+
_base_api,
|
|
8
|
+
resolve_workspace_name_and_id,
|
|
9
|
+
resolve_lakehouse_name_and_id,
|
|
10
|
+
)
|
|
11
|
+
from sempy._utils._log import log
|
|
12
|
+
import sempy_labs._icons as icons
|
|
13
|
+
import os
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@log
|
|
17
|
+
def is_v_ordered(
|
|
18
|
+
table_name: str,
|
|
19
|
+
lakehouse: Optional[str | UUID] = None,
|
|
20
|
+
workspace: Optional[str | UUID] = None,
|
|
21
|
+
schema: Optional[str] = None,
|
|
22
|
+
) -> bool:
|
|
23
|
+
"""
|
|
24
|
+
Checks if a delta table is v-ordered.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
table_name : str
|
|
29
|
+
The name of the table to check.
|
|
30
|
+
lakehouse : str | uuid.UUID, default=None
|
|
31
|
+
The Fabric lakehouse name or ID.
|
|
32
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
33
|
+
workspace : str | uuid.UUID, default=None
|
|
34
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
35
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
36
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
37
|
+
schema : str, optional
|
|
38
|
+
The schema of the table to check. If not provided, the default schema is used.
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
bool
|
|
43
|
+
True if the table is v-ordered, False otherwise.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
local_path = _mount(lakehouse=lakehouse, workspace=workspace)
|
|
47
|
+
table_path = (
|
|
48
|
+
f"{local_path}/Tables/{schema}/{table_name}"
|
|
49
|
+
if schema
|
|
50
|
+
else f"{local_path}/Tables/{table_name}"
|
|
51
|
+
)
|
|
52
|
+
ds_schema = ds.dataset(table_path).schema.metadata
|
|
53
|
+
|
|
54
|
+
return any(b"vorder" in key for key in ds_schema.keys())
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def delete_lakehouse(
|
|
58
|
+
lakehouse: str | UUID, workspace: Optional[str | UUID] = None
|
|
59
|
+
) -> None:
|
|
60
|
+
"""
|
|
61
|
+
Deletes a lakehouse.
|
|
62
|
+
|
|
63
|
+
This is a wrapper function for the following API: `Items - Delete Lakehouse <https://learn.microsoft.com/rest/api/fabric/lakehouse/items/delete-lakehouse>`_.
|
|
64
|
+
|
|
65
|
+
Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
|
|
66
|
+
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
lakehouse : str | uuid.UUID
|
|
70
|
+
The name or ID of the lakehouse to delete.
|
|
71
|
+
workspace : str | uuid.UUID, default=None
|
|
72
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
73
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
74
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
delete_item(item=lakehouse, item_type="lakehouse", workspace=workspace)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def update_lakehouse(
|
|
81
|
+
name: Optional[str] = None,
|
|
82
|
+
description: Optional[str] = None,
|
|
83
|
+
lakehouse: Optional[str | UUID] = None,
|
|
84
|
+
workspace: Optional[str | UUID] = None,
|
|
85
|
+
):
|
|
86
|
+
"""
|
|
87
|
+
Updates a lakehouse.
|
|
88
|
+
|
|
89
|
+
This is a wrapper function for the following API: `Items - Update Lakehouse <https://learn.microsoft.com/rest/api/fabric/lakehouse/items/update-lakehouse>`_.
|
|
90
|
+
|
|
91
|
+
Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
|
|
92
|
+
|
|
93
|
+
Parameters
|
|
94
|
+
----------
|
|
95
|
+
name: str, default=None
|
|
96
|
+
The new name of the lakehouse.
|
|
97
|
+
Defaults to None which does not update the name.
|
|
98
|
+
description: str, default=None
|
|
99
|
+
The new description of the lakehouse.
|
|
100
|
+
Defaults to None which does not update the description.
|
|
101
|
+
lakehouse : str | uuid.UUID, default=None
|
|
102
|
+
The name or ID of the lakehouse to update.
|
|
103
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
104
|
+
workspace : str | uuid.UUID, default=None
|
|
105
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
106
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
107
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
if not name and not description:
|
|
111
|
+
raise ValueError(
|
|
112
|
+
f"{icons.red_dot} Either name or description must be provided."
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
116
|
+
(lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
|
|
117
|
+
lakehouse, workspace_id
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
payload = {}
|
|
121
|
+
if name:
|
|
122
|
+
payload["displayName"] = name
|
|
123
|
+
if description:
|
|
124
|
+
payload["description"] = description
|
|
125
|
+
|
|
126
|
+
_base_api(
|
|
127
|
+
request=f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}",
|
|
128
|
+
method="patch",
|
|
129
|
+
client="fabric_sp",
|
|
130
|
+
payload=payload,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
print(
|
|
134
|
+
f"{icons.green_dot} The '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has been updated accordingly."
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
@log
|
|
139
|
+
def load_table(
|
|
140
|
+
table_name: str,
|
|
141
|
+
file_path: str,
|
|
142
|
+
mode: Literal["Overwrite", "Append"],
|
|
143
|
+
lakehouse: Optional[str | UUID] = None,
|
|
144
|
+
workspace: Optional[str | UUID] = None,
|
|
145
|
+
):
|
|
146
|
+
"""
|
|
147
|
+
Loads a table into a lakehouse. Currently only files are supported, not folders.
|
|
148
|
+
|
|
149
|
+
This is a wrapper function for the following API: `Tables - Load Table <https://learn.microsoft.com/rest/api/fabric/lakehouse/tables/load-table>`_.
|
|
150
|
+
|
|
151
|
+
Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
|
|
152
|
+
|
|
153
|
+
Parameters
|
|
154
|
+
----------
|
|
155
|
+
table_name : str
|
|
156
|
+
The name of the table to load.
|
|
157
|
+
file_path : str
|
|
158
|
+
The path to the data to load.
|
|
159
|
+
mode : Literal["Overwrite", "Append"]
|
|
160
|
+
The mode to use when loading the data.
|
|
161
|
+
"Overwrite" will overwrite the existing data.
|
|
162
|
+
"Append" will append the data to the existing data.
|
|
163
|
+
lakehouse : str | uuid.UUID, default=None
|
|
164
|
+
The name or ID of the lakehouse to load the table into.
|
|
165
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
166
|
+
workspace : str | uuid.UUID, default=None
|
|
167
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
168
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
169
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
173
|
+
(lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
|
|
174
|
+
lakehouse, workspace_id
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
file_extension = os.path.splitext(file_path)[1]
|
|
178
|
+
|
|
179
|
+
payload = {
|
|
180
|
+
"relativePath": file_path,
|
|
181
|
+
"pathType": "File",
|
|
182
|
+
"mode": mode,
|
|
183
|
+
"formatOptions": {},
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
if file_extension == ".csv":
|
|
187
|
+
payload["formatOptions"] = {"format": "Csv", "header": True, "delimiter": ","}
|
|
188
|
+
elif file_extension == ".parquet":
|
|
189
|
+
payload["formatOptions"] = {
|
|
190
|
+
"format": "Parquet",
|
|
191
|
+
"header": True,
|
|
192
|
+
}
|
|
193
|
+
# Solve for loading folders
|
|
194
|
+
# elif file_extension == '':
|
|
195
|
+
# payload['pathType'] = "Folder"
|
|
196
|
+
# payload["recursive"] = recursive
|
|
197
|
+
# payload['formatOptions']
|
|
198
|
+
else:
|
|
199
|
+
raise NotImplementedError()
|
|
200
|
+
|
|
201
|
+
_base_api(
|
|
202
|
+
request=f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables/{table_name}/load",
|
|
203
|
+
client="fabric_sp",
|
|
204
|
+
method="post",
|
|
205
|
+
status_codes=202,
|
|
206
|
+
lro_return_status_code=True,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
print(
|
|
210
|
+
f"{icons.green_dot} The '{table_name}' table has been loaded into the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace."
|
|
211
|
+
)
|
|
@@ -113,7 +113,7 @@ def vacuum_lakehouse_tables(
|
|
|
113
113
|
Parameters
|
|
114
114
|
----------
|
|
115
115
|
tables : str | List[str] | None
|
|
116
|
-
The table(s) to vacuum. If no tables are specified, all tables in the lakehouse will be
|
|
116
|
+
The table(s) to vacuum. If no tables are specified, all tables in the lakehouse will be vacuumed.
|
|
117
117
|
lakehouse : str | uuid.UUID, default=None
|
|
118
118
|
The Fabric lakehouse name or ID.
|
|
119
119
|
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
from sempy_labs._helper_functions import (
|
|
2
|
+
resolve_workspace_id,
|
|
3
|
+
resolve_lakehouse_id,
|
|
4
|
+
_base_api,
|
|
5
|
+
_create_dataframe,
|
|
6
|
+
_update_dataframe_datatypes,
|
|
7
|
+
)
|
|
8
|
+
import pandas as pd
|
|
9
|
+
from typing import Optional
|
|
10
|
+
from uuid import UUID
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def list_livy_sessions(
|
|
14
|
+
lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
|
|
15
|
+
) -> pd.DataFrame:
|
|
16
|
+
"""
|
|
17
|
+
Shows a list of livy sessions from the specified item identifier.
|
|
18
|
+
|
|
19
|
+
This is a wrapper function for the following API: `Livy Sessions - List Livy Sessions <https://learn.microsoft.com/rest/api/fabric/lakehouse/livy-sessions/list-livy-sessions>`_.
|
|
20
|
+
|
|
21
|
+
Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
lakehouse : str | uuid.UUID, default=None
|
|
26
|
+
The Fabric lakehouse name or ID.
|
|
27
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
28
|
+
workspace : str | uuid.UUID, default=None
|
|
29
|
+
The Fabric workspace name or ID.
|
|
30
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
31
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
32
|
+
|
|
33
|
+
Returns
|
|
34
|
+
-------
|
|
35
|
+
pandas.DataFrame
|
|
36
|
+
A pandas dataframe showing a list of livy sessions from the specified item identifier.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
columns = {
|
|
40
|
+
"Spark Application Id": "string",
|
|
41
|
+
"State:": "string",
|
|
42
|
+
"Livy Id": "string",
|
|
43
|
+
"Origin": "string",
|
|
44
|
+
"Attempt Number": "int",
|
|
45
|
+
"Max Number Of Attempts": "int",
|
|
46
|
+
"Livy Name": "string",
|
|
47
|
+
"Submitter Id": "string",
|
|
48
|
+
"Submitter Type": "string",
|
|
49
|
+
"Item Workspace Id": "string",
|
|
50
|
+
"Item Id": "string",
|
|
51
|
+
"Item Reference Type": "string",
|
|
52
|
+
"Item Name": "string",
|
|
53
|
+
"Item Type": "string",
|
|
54
|
+
"Job Type": "string",
|
|
55
|
+
"Submitted Date Time": "str",
|
|
56
|
+
"Start Date Time": "str",
|
|
57
|
+
"End Date Time": "string",
|
|
58
|
+
"Queued Duration Value": "int",
|
|
59
|
+
"Queued Duration Time Unit": "string",
|
|
60
|
+
"Running Duration Value": "int",
|
|
61
|
+
"Running Duration Time Unit": "string",
|
|
62
|
+
"Total Duration Value": "int",
|
|
63
|
+
"Total Duration Time Unit": "string",
|
|
64
|
+
"Job Instance Id": "string",
|
|
65
|
+
"Creator Item Workspace Id": "string",
|
|
66
|
+
"Creator Item Id": "string",
|
|
67
|
+
"Creator Item Reference Type": "string",
|
|
68
|
+
"Creator Item Name": "string",
|
|
69
|
+
"Creator Item Type": "string",
|
|
70
|
+
"Cancellation Reason": "string",
|
|
71
|
+
"Capacity Id": "string",
|
|
72
|
+
"Operation Name": "string",
|
|
73
|
+
"Runtime Version": "string",
|
|
74
|
+
"Livy Session Item Resource Uri": "string",
|
|
75
|
+
}
|
|
76
|
+
df = _create_dataframe(columns=columns)
|
|
77
|
+
|
|
78
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
79
|
+
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
|
|
80
|
+
|
|
81
|
+
responses = _base_api(
|
|
82
|
+
request=f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/livySessions",
|
|
83
|
+
uses_pagination=True,
|
|
84
|
+
client="fabric_sp",
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
dfs = []
|
|
88
|
+
|
|
89
|
+
for r in responses:
|
|
90
|
+
for v in r.get("value", []):
|
|
91
|
+
queued_duration = v.get("queuedDuration", {})
|
|
92
|
+
running_duration = v.get("runningDuration", {})
|
|
93
|
+
total_duration = v.get("totalDuration", {})
|
|
94
|
+
new_data = {
|
|
95
|
+
"Spark Application Id": v.get("sparkApplicationId"),
|
|
96
|
+
"State:": v.get("state"),
|
|
97
|
+
"Livy Id": v.get("livyId"),
|
|
98
|
+
"Origin": v.get("origin"),
|
|
99
|
+
"Attempt Number": v.get("attemptNumber"),
|
|
100
|
+
"Max Number Of Attempts": v.get("maxNumberOfAttempts"),
|
|
101
|
+
"Livy Name": v.get("livyName"),
|
|
102
|
+
"Submitter Id": v["submitter"].get("id"),
|
|
103
|
+
"Submitter Type": v["submitter"].get("type"),
|
|
104
|
+
"Item Workspace Id": v["item"].get("workspaceId"),
|
|
105
|
+
"Item Id": v["item"].get("itemId"),
|
|
106
|
+
"Item Reference Type": v["item"].get("referenceType"),
|
|
107
|
+
"Item Name": v.get("itemName"),
|
|
108
|
+
"Item Type": v.get("itemType"),
|
|
109
|
+
"Job Type": v.get("jobType"),
|
|
110
|
+
"Submitted Date Time": v.get("submittedDateTime"),
|
|
111
|
+
"Start Date Time": v.get("startDateTime"),
|
|
112
|
+
"End Date Time": v.get("endDateTime"),
|
|
113
|
+
"Queued Duration Value": queued_duration.get("value"),
|
|
114
|
+
"Queued Duration Time Unit": queued_duration.get("timeUnit"),
|
|
115
|
+
"Running Duration Value": running_duration.get("value"),
|
|
116
|
+
"Running Duration Time Unit": running_duration.get("timeUnit"),
|
|
117
|
+
"Total Duration Value": total_duration.get("value"),
|
|
118
|
+
"Total Duration Time Unit": total_duration.get("timeUnit"),
|
|
119
|
+
"Job Instance Id": v.get("jobInstanceId"),
|
|
120
|
+
"Creator Item Workspace Id": v["creatorItem"].get("workspaceId"),
|
|
121
|
+
"Creator Item Id": v["creatorItem"].get("itemId"),
|
|
122
|
+
"Creator Item Reference Type": v["creatorItem"].get("referenceType"),
|
|
123
|
+
"Creator Item Name": v.get("creatorItemName"),
|
|
124
|
+
"Creator Item Type": v.get("creatorItemType"),
|
|
125
|
+
"Cancellation Reason": v.get("cancellationReason"),
|
|
126
|
+
"Capacity Id": v.get("capacityId"),
|
|
127
|
+
"Operation Name": v.get("operationName"),
|
|
128
|
+
"Runtime Version": v.get("runtimeVersion"),
|
|
129
|
+
"Livy Session Item Resource Uri": v.get("livySessionItemResourceUri"),
|
|
130
|
+
}
|
|
131
|
+
dfs.append(pd.DataFrame(new_data, index=[0]))
|
|
132
|
+
|
|
133
|
+
if dfs:
|
|
134
|
+
df = pd.concat(dfs, ignore_index=True)
|
|
135
|
+
_update_dataframe_datatypes(dataframe=df, column_map=columns)
|
|
136
|
+
|
|
137
|
+
return df
|
|
@@ -22,7 +22,7 @@ def download_report(
|
|
|
22
22
|
"""
|
|
23
23
|
Downloads the specified report from the specified workspace to a Power BI .pbix file.
|
|
24
24
|
|
|
25
|
-
This is a wrapper function for the following API: `Reports - Export Report In Group <https://learn.microsoft.com/rest/api/power-bi/reports/export-report-in-group
|
|
25
|
+
This is a wrapper function for the following API: `Reports - Export Report In Group <https://learn.microsoft.com/rest/api/power-bi/reports/export-report-in-group>`_.
|
|
26
26
|
|
|
27
27
|
Parameters
|
|
28
28
|
----------
|
|
@@ -178,6 +178,7 @@ def update_report_from_reportjson(
|
|
|
178
178
|
)
|
|
179
179
|
|
|
180
180
|
|
|
181
|
+
@log
|
|
181
182
|
def get_report_definition(
|
|
182
183
|
report: str | UUID,
|
|
183
184
|
workspace: Optional[str | UUID] = None,
|
|
@@ -206,7 +207,10 @@ def get_report_definition(
|
|
|
206
207
|
"""
|
|
207
208
|
|
|
208
209
|
return get_item_definition(
|
|
209
|
-
item=report,
|
|
210
|
+
item=report,
|
|
211
|
+
type="Report",
|
|
212
|
+
workspace=workspace,
|
|
213
|
+
return_dataframe=return_dataframe,
|
|
210
214
|
)
|
|
211
215
|
|
|
212
216
|
|