semantic-link-labs 0.9.10__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.10.0.dist-info}/METADATA +28 -21
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.10.0.dist-info}/RECORD +38 -31
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.10.0.dist-info}/WHEEL +1 -1
- sempy_labs/__init__.py +26 -1
- sempy_labs/_delta_analyzer.py +9 -8
- sempy_labs/_dictionary_diffs.py +221 -0
- sempy_labs/_environments.py +19 -1
- sempy_labs/_generate_semantic_model.py +1 -1
- sempy_labs/_helper_functions.py +358 -134
- sempy_labs/_kusto.py +25 -23
- sempy_labs/_list_functions.py +13 -35
- sempy_labs/_model_bpa_rules.py +13 -3
- sempy_labs/_notebooks.py +44 -11
- sempy_labs/_semantic_models.py +93 -1
- sempy_labs/_sql.py +4 -3
- sempy_labs/_tags.py +194 -0
- sempy_labs/_user_delegation_key.py +42 -0
- sempy_labs/_variable_libraries.py +89 -0
- sempy_labs/_vpax.py +388 -0
- sempy_labs/admin/__init__.py +8 -0
- sempy_labs/admin/_tags.py +126 -0
- sempy_labs/directlake/_generate_shared_expression.py +5 -1
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +55 -5
- sempy_labs/dotnet_lib/dotnet.runtime.config.json +10 -0
- sempy_labs/lakehouse/__init__.py +14 -0
- sempy_labs/lakehouse/_blobs.py +100 -85
- sempy_labs/lakehouse/_get_lakehouse_tables.py +1 -13
- sempy_labs/lakehouse/_helper.py +211 -0
- sempy_labs/lakehouse/_lakehouse.py +1 -1
- sempy_labs/lakehouse/_livy_sessions.py +137 -0
- sempy_labs/report/__init__.py +2 -0
- sempy_labs/report/_download_report.py +1 -1
- sempy_labs/report/_generate_report.py +5 -1
- sempy_labs/report/_report_helper.py +27 -128
- sempy_labs/report/_reportwrapper.py +1903 -1165
- sempy_labs/tom/_model.py +83 -21
- sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +0 -9
- sempy_labs/report/_bpareporttemplate/.platform +0 -11
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.10.0.dist-info}/licenses/LICENSE +0 -0
- {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.10.0.dist-info}/top_level.txt +0 -0
|
@@ -7,7 +7,7 @@ from sempy_labs._helper_functions import (
|
|
|
7
7
|
)
|
|
8
8
|
from sempy._utils._log import log
|
|
9
9
|
from sempy_labs.tom import connect_semantic_model
|
|
10
|
-
from typing import Optional
|
|
10
|
+
from typing import Optional, List
|
|
11
11
|
import sempy_labs._icons as icons
|
|
12
12
|
from uuid import UUID
|
|
13
13
|
import re
|
|
@@ -19,7 +19,9 @@ def _extract_expression_list(expression):
|
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
21
|
pattern_sql = r'Sql\.Database\s*\(\s*"([^"]+)"\s*,\s*"([^"]+)"\s*\)'
|
|
22
|
-
pattern_no_sql =
|
|
22
|
+
pattern_no_sql = (
|
|
23
|
+
r'AzureStorage\.DataLake\(".*?/([0-9a-fA-F\-]{36})/([0-9a-fA-F\-]{36})"'
|
|
24
|
+
)
|
|
23
25
|
|
|
24
26
|
match_sql = re.search(pattern_sql, expression)
|
|
25
27
|
match_no_sql = re.search(pattern_no_sql, expression)
|
|
@@ -102,6 +104,7 @@ def update_direct_lake_model_connection(
|
|
|
102
104
|
source_type: str = "Lakehouse",
|
|
103
105
|
source_workspace: Optional[str | UUID] = None,
|
|
104
106
|
use_sql_endpoint: bool = True,
|
|
107
|
+
tables: Optional[str | List[str]] = None,
|
|
105
108
|
):
|
|
106
109
|
"""
|
|
107
110
|
Remaps a Direct Lake semantic model's SQL Endpoint connection to a new lakehouse/warehouse.
|
|
@@ -126,12 +129,19 @@ def update_direct_lake_model_connection(
|
|
|
126
129
|
use_sql_endpoint : bool, default=True
|
|
127
130
|
If True, the SQL Endpoint will be used for the connection.
|
|
128
131
|
If False, Direct Lake over OneLake will be used.
|
|
132
|
+
tables : str | List[str], default=None
|
|
133
|
+
The name(s) of the table(s) to update in the Direct Lake semantic model.
|
|
134
|
+
If None, all tables will be updated (if there is only one expression).
|
|
135
|
+
If multiple tables are specified, they must be provided as a list.
|
|
129
136
|
"""
|
|
130
137
|
if use_sql_endpoint:
|
|
131
138
|
icons.sll_tags.append("UpdateDLConnection_SQL")
|
|
132
139
|
else:
|
|
133
140
|
icons.sll_tags.append("UpdateDLConnection_DLOL")
|
|
134
141
|
|
|
142
|
+
if isinstance(tables, str):
|
|
143
|
+
tables = [tables]
|
|
144
|
+
|
|
135
145
|
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
136
146
|
(dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
|
|
137
147
|
|
|
@@ -174,7 +184,12 @@ def update_direct_lake_model_connection(
|
|
|
174
184
|
)
|
|
175
185
|
|
|
176
186
|
# Update the single connection expression
|
|
177
|
-
if len(expressions)
|
|
187
|
+
if len(expressions) > 1 and not tables:
|
|
188
|
+
print(
|
|
189
|
+
f"{icons.info} Multiple expressions found in the model. Please specify the tables to update using the 'tables parameter."
|
|
190
|
+
)
|
|
191
|
+
return
|
|
192
|
+
elif len(expressions) == 1 and not tables:
|
|
178
193
|
expr = expressions[0]
|
|
179
194
|
tom.model.Expressions[expr].Expression = shared_expression
|
|
180
195
|
|
|
@@ -182,6 +197,41 @@ def update_direct_lake_model_connection(
|
|
|
182
197
|
f"{icons.green_dot} The expression in the '{dataset_name}' semantic model within the '{workspace_name}' workspace has been updated to point to the '{source}' {source_type.lower()} in the '{source_workspace}' workspace."
|
|
183
198
|
)
|
|
184
199
|
else:
|
|
185
|
-
|
|
186
|
-
|
|
200
|
+
import sempy
|
|
201
|
+
|
|
202
|
+
sempy.fabric._client._utils._init_analysis_services()
|
|
203
|
+
import Microsoft.AnalysisServices.Tabular as TOM
|
|
204
|
+
|
|
205
|
+
expr_list = _extract_expression_list(shared_expression)
|
|
206
|
+
|
|
207
|
+
expr_name = next(
|
|
208
|
+
(name for name, exp in expression_dict.items() if exp == expr_list),
|
|
209
|
+
None,
|
|
187
210
|
)
|
|
211
|
+
|
|
212
|
+
# If the expression does not already exist, create it
|
|
213
|
+
def generate_unique_name(existing_names):
|
|
214
|
+
i = 1
|
|
215
|
+
while True:
|
|
216
|
+
candidate = f"DatabaseQuery{i}"
|
|
217
|
+
if candidate not in existing_names:
|
|
218
|
+
return candidate
|
|
219
|
+
i += 1
|
|
220
|
+
|
|
221
|
+
if not expr_name:
|
|
222
|
+
expr_name = generate_unique_name(expressions)
|
|
223
|
+
tom.add_expression(name=expr_name, expression=shared_expression)
|
|
224
|
+
|
|
225
|
+
all_tables = [t.Name for t in tom.model.Tables]
|
|
226
|
+
for t_name in tables:
|
|
227
|
+
if t_name not in all_tables:
|
|
228
|
+
raise ValueError(
|
|
229
|
+
f"{icons.red_dot} The table '{t_name}' does not exist in the '{dataset_name}' semantic model within the '{workspace_name}' workspace."
|
|
230
|
+
)
|
|
231
|
+
p = next(p for p in tom.model.Tables[t_name].Partitions)
|
|
232
|
+
if p.Mode != TOM.ModeType.DirectLake:
|
|
233
|
+
raise ValueError(
|
|
234
|
+
f"{icons.red_dot} The table '{t_name}' in the '{dataset_name}' semantic model within the '{workspace_name}' workspace is not in Direct Lake mode. This function is only applicable to Direct Lake tables."
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
p.Source.ExpressionSource = tom.model.Expressions[expr_name]
|
sempy_labs/lakehouse/__init__.py
CHANGED
|
@@ -21,6 +21,15 @@ from sempy_labs.lakehouse._blobs import (
|
|
|
21
21
|
recover_lakehouse_object,
|
|
22
22
|
list_blobs,
|
|
23
23
|
)
|
|
24
|
+
from sempy_labs.lakehouse._livy_sessions import (
|
|
25
|
+
list_livy_sessions,
|
|
26
|
+
)
|
|
27
|
+
from sempy_labs.lakehouse._helper import (
|
|
28
|
+
is_v_ordered,
|
|
29
|
+
delete_lakehouse,
|
|
30
|
+
update_lakehouse,
|
|
31
|
+
load_table,
|
|
32
|
+
)
|
|
24
33
|
|
|
25
34
|
__all__ = [
|
|
26
35
|
"get_lakehouse_columns",
|
|
@@ -36,4 +45,9 @@ __all__ = [
|
|
|
36
45
|
"list_shortcuts",
|
|
37
46
|
"recover_lakehouse_object",
|
|
38
47
|
"list_blobs",
|
|
48
|
+
"list_livy_sessions",
|
|
49
|
+
"is_v_ordered",
|
|
50
|
+
"delete_lakehouse",
|
|
51
|
+
"update_lakehouse",
|
|
52
|
+
"load_table",
|
|
39
53
|
]
|
sempy_labs/lakehouse/_blobs.py
CHANGED
|
@@ -11,6 +11,7 @@ from typing import Optional, List
|
|
|
11
11
|
import sempy_labs._icons as icons
|
|
12
12
|
import xml.etree.ElementTree as ET
|
|
13
13
|
import pandas as pd
|
|
14
|
+
from sempy.fabric.exceptions import FabricHTTPException
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
def _request_blob_api(
|
|
@@ -18,6 +19,7 @@ def _request_blob_api(
|
|
|
18
19
|
method: str = "get",
|
|
19
20
|
payload: Optional[dict] = None,
|
|
20
21
|
status_codes: int | List[int] = 200,
|
|
22
|
+
uses_pagination: bool = False,
|
|
21
23
|
):
|
|
22
24
|
|
|
23
25
|
import requests
|
|
@@ -31,21 +33,41 @@ def _request_blob_api(
|
|
|
31
33
|
|
|
32
34
|
headers = {
|
|
33
35
|
"Authorization": f"Bearer {token}",
|
|
34
|
-
"Content-Type": "application/
|
|
36
|
+
"Content-Type": "application/xml",
|
|
35
37
|
"x-ms-version": "2025-05-05",
|
|
36
38
|
}
|
|
37
39
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
40
|
+
base_url = "https://onelake.blob.fabric.microsoft.com/"
|
|
41
|
+
full_url = f"{base_url}{request}"
|
|
42
|
+
results = []
|
|
43
|
+
|
|
44
|
+
while True:
|
|
45
|
+
response = requests.request(
|
|
46
|
+
method.upper(),
|
|
47
|
+
full_url,
|
|
48
|
+
headers=headers,
|
|
49
|
+
data=payload if method.lower() != "get" else None,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
if response.status_code not in status_codes:
|
|
53
|
+
raise FabricHTTPException(response)
|
|
54
|
+
|
|
55
|
+
if not uses_pagination:
|
|
56
|
+
return response
|
|
57
|
+
|
|
58
|
+
# Parse XML to find blobs and NextMarker
|
|
59
|
+
root = ET.fromstring(response.content)
|
|
60
|
+
results.append(root)
|
|
61
|
+
|
|
62
|
+
next_marker = root.findtext(".//NextMarker")
|
|
63
|
+
if not next_marker:
|
|
64
|
+
break # No more pages
|
|
44
65
|
|
|
45
|
-
|
|
46
|
-
|
|
66
|
+
# Append the marker to the original request (assuming query string format)
|
|
67
|
+
delimiter = "&" if "?" in request else "?"
|
|
68
|
+
full_url = f"{base_url}{request}{delimiter}marker={next_marker}"
|
|
47
69
|
|
|
48
|
-
return
|
|
70
|
+
return results
|
|
49
71
|
|
|
50
72
|
|
|
51
73
|
@log
|
|
@@ -90,12 +112,6 @@ def list_blobs(
|
|
|
90
112
|
)
|
|
91
113
|
path_prefix = f"{workspace_id}/{lakehouse_id}/{container}"
|
|
92
114
|
|
|
93
|
-
response = _request_blob_api(
|
|
94
|
-
request=f"{path_prefix}?restype=container&comp=list&include=deleted"
|
|
95
|
-
)
|
|
96
|
-
root = ET.fromstring(response.content)
|
|
97
|
-
response_json = _xml_to_dict(root)
|
|
98
|
-
|
|
99
115
|
columns = {
|
|
100
116
|
"Blob Name": "str",
|
|
101
117
|
"Is Deleted": "bool",
|
|
@@ -122,37 +138,55 @@ def list_blobs(
|
|
|
122
138
|
|
|
123
139
|
df = _create_dataframe(columns=columns)
|
|
124
140
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
"
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
141
|
+
url = f"{path_prefix}?restype=container&comp=list&include=deleted"
|
|
142
|
+
|
|
143
|
+
responses = _request_blob_api(
|
|
144
|
+
request=url,
|
|
145
|
+
uses_pagination=True,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
dfs = []
|
|
149
|
+
for root in responses:
|
|
150
|
+
response_json = _xml_to_dict(root)
|
|
151
|
+
|
|
152
|
+
blobs = (
|
|
153
|
+
response_json.get("EnumerationResults", {}).get("Blobs", {}).get("Blob", [])
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
if isinstance(blobs, dict):
|
|
157
|
+
blobs = [blobs]
|
|
158
|
+
|
|
159
|
+
for blob in blobs:
|
|
160
|
+
p = blob.get("Properties", {})
|
|
161
|
+
new_data = {
|
|
162
|
+
"Blob Name": blob.get("Name"),
|
|
163
|
+
"Is Deleted": blob.get("Deleted", False),
|
|
164
|
+
"Deletion Id": blob.get("DeletionId"),
|
|
165
|
+
"Creation Time": p.get("Creation-Time"),
|
|
166
|
+
"Expiry Time": p.get("Expiry-Time"),
|
|
167
|
+
"Etag": p.get("Etag"),
|
|
168
|
+
"Resource Type": p.get("ResourceType"),
|
|
169
|
+
"Content Length": p.get("Content-Length"),
|
|
170
|
+
"Content Type": p.get("Content-Type"),
|
|
171
|
+
"Content Encoding": p.get("Content-Encoding"),
|
|
172
|
+
"Content Language": p.get("Content-Language"),
|
|
173
|
+
"Content CRC64": p.get("Content-CRC64"),
|
|
174
|
+
"Content MD5": p.get("Content-MD5"),
|
|
175
|
+
"Cache Control": p.get("Cache-Control"),
|
|
176
|
+
"Content Disposition": p.get("Content-Disposition"),
|
|
177
|
+
"Blob Type": p.get("BlobType"),
|
|
178
|
+
"Access Tier": p.get("AccessTier"),
|
|
179
|
+
"Access Tier Inferred": p.get("AccessTierInferred"),
|
|
180
|
+
"Server Encrypted": p.get("ServerEncrypted"),
|
|
181
|
+
"Deleted Time": p.get("DeletedTime"),
|
|
182
|
+
"Remaining Retention Days": p.get("RemainingRetentionDays"),
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
dfs.append(pd.DataFrame(new_data, index=[0]))
|
|
186
|
+
|
|
187
|
+
if dfs:
|
|
188
|
+
df = pd.concat(dfs, ignore_index=True)
|
|
189
|
+
_update_dataframe_datatypes(dataframe=df, column_map=columns)
|
|
156
190
|
|
|
157
191
|
return df
|
|
158
192
|
|
|
@@ -182,7 +216,7 @@ def recover_lakehouse_object(
|
|
|
182
216
|
workspace_id = resolve_workspace_id(workspace)
|
|
183
217
|
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
|
|
184
218
|
|
|
185
|
-
|
|
219
|
+
blob_name = f"{lakehouse_id}/{file_path}"
|
|
186
220
|
|
|
187
221
|
container = file_path.split("/")[0]
|
|
188
222
|
if container not in ["Tables", "Files"]:
|
|
@@ -190,42 +224,23 @@ def recover_lakehouse_object(
|
|
|
190
224
|
f"{icons.red_dot} Invalid container '{container}' within the file_path parameter. Expected 'Tables' or 'Files'."
|
|
191
225
|
)
|
|
192
226
|
|
|
193
|
-
|
|
227
|
+
# Undelete the blob
|
|
228
|
+
print(f"{icons.in_progress} Attempting to recover the '{blob_name}' blob...")
|
|
194
229
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
230
|
+
try:
|
|
231
|
+
_request_blob_api(
|
|
232
|
+
request=f"{workspace_id}/{lakehouse_id}/{file_path}?comp=undelete",
|
|
233
|
+
method="put",
|
|
234
|
+
)
|
|
235
|
+
print(
|
|
236
|
+
f"{icons.green_dot} The '{blob_name}' blob recover attempt was successful."
|
|
237
|
+
)
|
|
238
|
+
except FabricHTTPException as e:
|
|
239
|
+
if e.status_code == 404:
|
|
240
|
+
print(
|
|
241
|
+
f"{icons.warning} The '{blob_name}' blob was not found. No action taken."
|
|
242
|
+
)
|
|
243
|
+
else:
|
|
244
|
+
print(
|
|
245
|
+
f"{icons.red_dot} An error occurred while recovering the '{blob_name}' blob: {e}"
|
|
203
246
|
)
|
|
204
|
-
print(f"{icons.green_dot} The '{blob_name}' blob has been restored.")
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
def _get_user_delegation_key():
|
|
208
|
-
|
|
209
|
-
# https://learn.microsoft.com/rest/api/storageservices/get-user-delegation-key
|
|
210
|
-
|
|
211
|
-
from datetime import datetime, timedelta, timezone
|
|
212
|
-
|
|
213
|
-
utc_now = datetime.now(timezone.utc)
|
|
214
|
-
start_time = utc_now + timedelta(minutes=2)
|
|
215
|
-
expiry_time = start_time + timedelta(minutes=45)
|
|
216
|
-
start_str = start_time.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
217
|
-
expiry_str = expiry_time.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
218
|
-
|
|
219
|
-
payload = f"""<?xml version="1.0" encoding="utf-8"?>
|
|
220
|
-
<KeyInfo>
|
|
221
|
-
<Start>{start_str}</Start>
|
|
222
|
-
<Expiry>{expiry_str}</Expiry>
|
|
223
|
-
</KeyInfo>"""
|
|
224
|
-
|
|
225
|
-
response = _request_blob_api(
|
|
226
|
-
request="restype=service&comp=userdelegationkey",
|
|
227
|
-
method="post",
|
|
228
|
-
payload=payload,
|
|
229
|
-
)
|
|
230
|
-
|
|
231
|
-
return response.content
|
|
@@ -9,8 +9,6 @@ from sempy_labs._helper_functions import (
|
|
|
9
9
|
save_as_delta_table,
|
|
10
10
|
_base_api,
|
|
11
11
|
_create_dataframe,
|
|
12
|
-
resolve_workspace_id,
|
|
13
|
-
resolve_lakehouse_id,
|
|
14
12
|
_read_delta_table,
|
|
15
13
|
_get_delta_table,
|
|
16
14
|
_mount,
|
|
@@ -85,16 +83,6 @@ def get_lakehouse_tables(
|
|
|
85
83
|
if count_rows: # Setting countrows defaults to extended=True
|
|
86
84
|
extended = True
|
|
87
85
|
|
|
88
|
-
if (
|
|
89
|
-
workspace_id != resolve_workspace_id()
|
|
90
|
-
and lakehouse_id != resolve_lakehouse_id()
|
|
91
|
-
and count_rows
|
|
92
|
-
):
|
|
93
|
-
raise ValueError(
|
|
94
|
-
f"{icons.red_dot} If 'count_rows' is set to True, you must run this function against the default lakehouse attached to the notebook. "
|
|
95
|
-
"Count rows runs a spark query and cross-workspace spark queries are currently not supported."
|
|
96
|
-
)
|
|
97
|
-
|
|
98
86
|
responses = _base_api(
|
|
99
87
|
request=f"v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables",
|
|
100
88
|
uses_pagination=True,
|
|
@@ -123,7 +111,7 @@ def get_lakehouse_tables(
|
|
|
123
111
|
if extended:
|
|
124
112
|
sku_value = get_sku_size(workspace_id)
|
|
125
113
|
guardrail = get_directlake_guardrails_for_sku(sku_value)
|
|
126
|
-
local_path = _mount()
|
|
114
|
+
local_path = _mount(lakehouse=lakehouse_id, workspace=workspace_id)
|
|
127
115
|
|
|
128
116
|
df["Files"], df["Row Groups"], df["Table Size"] = None, None, None
|
|
129
117
|
if count_rows:
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
from uuid import UUID
|
|
2
|
+
from typing import Optional, Literal
|
|
3
|
+
import pyarrow.dataset as ds
|
|
4
|
+
from sempy_labs._helper_functions import (
|
|
5
|
+
_mount,
|
|
6
|
+
delete_item,
|
|
7
|
+
_base_api,
|
|
8
|
+
resolve_workspace_name_and_id,
|
|
9
|
+
resolve_lakehouse_name_and_id,
|
|
10
|
+
)
|
|
11
|
+
from sempy._utils._log import log
|
|
12
|
+
import sempy_labs._icons as icons
|
|
13
|
+
import os
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@log
|
|
17
|
+
def is_v_ordered(
|
|
18
|
+
table_name: str,
|
|
19
|
+
lakehouse: Optional[str | UUID] = None,
|
|
20
|
+
workspace: Optional[str | UUID] = None,
|
|
21
|
+
schema: Optional[str] = None,
|
|
22
|
+
) -> bool:
|
|
23
|
+
"""
|
|
24
|
+
Checks if a delta table is v-ordered.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
table_name : str
|
|
29
|
+
The name of the table to check.
|
|
30
|
+
lakehouse : str | uuid.UUID, default=None
|
|
31
|
+
The Fabric lakehouse name or ID.
|
|
32
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
33
|
+
workspace : str | uuid.UUID, default=None
|
|
34
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
35
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
36
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
37
|
+
schema : str, optional
|
|
38
|
+
The schema of the table to check. If not provided, the default schema is used.
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
bool
|
|
43
|
+
True if the table is v-ordered, False otherwise.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
local_path = _mount(lakehouse=lakehouse, workspace=workspace)
|
|
47
|
+
table_path = (
|
|
48
|
+
f"{local_path}/Tables/{schema}/{table_name}"
|
|
49
|
+
if schema
|
|
50
|
+
else f"{local_path}/Tables/{table_name}"
|
|
51
|
+
)
|
|
52
|
+
ds_schema = ds.dataset(table_path).schema.metadata
|
|
53
|
+
|
|
54
|
+
return any(b"vorder" in key for key in ds_schema.keys())
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def delete_lakehouse(
|
|
58
|
+
lakehouse: str | UUID, workspace: Optional[str | UUID] = None
|
|
59
|
+
) -> None:
|
|
60
|
+
"""
|
|
61
|
+
Deletes a lakehouse.
|
|
62
|
+
|
|
63
|
+
This is a wrapper function for the following API: `Items - Delete Lakehouse <https://learn.microsoft.com/rest/api/fabric/lakehouse/items/delete-lakehouse>`_.
|
|
64
|
+
|
|
65
|
+
Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
|
|
66
|
+
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
lakehouse : str | uuid.UUID
|
|
70
|
+
The name or ID of the lakehouse to delete.
|
|
71
|
+
workspace : str | uuid.UUID, default=None
|
|
72
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
73
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
74
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
delete_item(item=lakehouse, item_type="lakehouse", workspace=workspace)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def update_lakehouse(
|
|
81
|
+
name: Optional[str] = None,
|
|
82
|
+
description: Optional[str] = None,
|
|
83
|
+
lakehouse: Optional[str | UUID] = None,
|
|
84
|
+
workspace: Optional[str | UUID] = None,
|
|
85
|
+
):
|
|
86
|
+
"""
|
|
87
|
+
Updates a lakehouse.
|
|
88
|
+
|
|
89
|
+
This is a wrapper function for the following API: `Items - Update Lakehouse <https://learn.microsoft.com/rest/api/fabric/lakehouse/items/update-lakehouse>`_.
|
|
90
|
+
|
|
91
|
+
Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
|
|
92
|
+
|
|
93
|
+
Parameters
|
|
94
|
+
----------
|
|
95
|
+
name: str, default=None
|
|
96
|
+
The new name of the lakehouse.
|
|
97
|
+
Defaults to None which does not update the name.
|
|
98
|
+
description: str, default=None
|
|
99
|
+
The new description of the lakehouse.
|
|
100
|
+
Defaults to None which does not update the description.
|
|
101
|
+
lakehouse : str | uuid.UUID, default=None
|
|
102
|
+
The name or ID of the lakehouse to update.
|
|
103
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
104
|
+
workspace : str | uuid.UUID, default=None
|
|
105
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
106
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
107
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
if not name and not description:
|
|
111
|
+
raise ValueError(
|
|
112
|
+
f"{icons.red_dot} Either name or description must be provided."
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
116
|
+
(lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
|
|
117
|
+
lakehouse, workspace_id
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
payload = {}
|
|
121
|
+
if name:
|
|
122
|
+
payload["displayName"] = name
|
|
123
|
+
if description:
|
|
124
|
+
payload["description"] = description
|
|
125
|
+
|
|
126
|
+
_base_api(
|
|
127
|
+
request=f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}",
|
|
128
|
+
method="patch",
|
|
129
|
+
client="fabric_sp",
|
|
130
|
+
payload=payload,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
print(
|
|
134
|
+
f"{icons.green_dot} The '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has been updated accordingly."
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
@log
|
|
139
|
+
def load_table(
|
|
140
|
+
table_name: str,
|
|
141
|
+
file_path: str,
|
|
142
|
+
mode: Literal["Overwrite", "Append"],
|
|
143
|
+
lakehouse: Optional[str | UUID] = None,
|
|
144
|
+
workspace: Optional[str | UUID] = None,
|
|
145
|
+
):
|
|
146
|
+
"""
|
|
147
|
+
Loads a table into a lakehouse. Currently only files are supported, not folders.
|
|
148
|
+
|
|
149
|
+
This is a wrapper function for the following API: `Tables - Load Table <https://learn.microsoft.com/rest/api/fabric/lakehouse/tables/load-table>`_.
|
|
150
|
+
|
|
151
|
+
Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
|
|
152
|
+
|
|
153
|
+
Parameters
|
|
154
|
+
----------
|
|
155
|
+
table_name : str
|
|
156
|
+
The name of the table to load.
|
|
157
|
+
file_path : str
|
|
158
|
+
The path to the data to load.
|
|
159
|
+
mode : Literal["Overwrite", "Append"]
|
|
160
|
+
The mode to use when loading the data.
|
|
161
|
+
"Overwrite" will overwrite the existing data.
|
|
162
|
+
"Append" will append the data to the existing data.
|
|
163
|
+
lakehouse : str | uuid.UUID, default=None
|
|
164
|
+
The name or ID of the lakehouse to load the table into.
|
|
165
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
166
|
+
workspace : str | uuid.UUID, default=None
|
|
167
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
168
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
169
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
173
|
+
(lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
|
|
174
|
+
lakehouse, workspace_id
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
file_extension = os.path.splitext(file_path)[1]
|
|
178
|
+
|
|
179
|
+
payload = {
|
|
180
|
+
"relativePath": file_path,
|
|
181
|
+
"pathType": "File",
|
|
182
|
+
"mode": mode,
|
|
183
|
+
"formatOptions": {},
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
if file_extension == ".csv":
|
|
187
|
+
payload["formatOptions"] = {"format": "Csv", "header": True, "delimiter": ","}
|
|
188
|
+
elif file_extension == ".parquet":
|
|
189
|
+
payload["formatOptions"] = {
|
|
190
|
+
"format": "Parquet",
|
|
191
|
+
"header": True,
|
|
192
|
+
}
|
|
193
|
+
# Solve for loading folders
|
|
194
|
+
# elif file_extension == '':
|
|
195
|
+
# payload['pathType'] = "Folder"
|
|
196
|
+
# payload["recursive"] = recursive
|
|
197
|
+
# payload['formatOptions']
|
|
198
|
+
else:
|
|
199
|
+
raise NotImplementedError()
|
|
200
|
+
|
|
201
|
+
_base_api(
|
|
202
|
+
request=f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables/{table_name}/load",
|
|
203
|
+
client="fabric_sp",
|
|
204
|
+
method="post",
|
|
205
|
+
status_codes=202,
|
|
206
|
+
lro_return_status_code=True,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
print(
|
|
210
|
+
f"{icons.green_dot} The '{table_name}' table has been loaded into the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace."
|
|
211
|
+
)
|
|
@@ -113,7 +113,7 @@ def vacuum_lakehouse_tables(
|
|
|
113
113
|
Parameters
|
|
114
114
|
----------
|
|
115
115
|
tables : str | List[str] | None
|
|
116
|
-
The table(s) to vacuum. If no tables are specified, all tables in the lakehouse will be
|
|
116
|
+
The table(s) to vacuum. If no tables are specified, all tables in the lakehouse will be vacuumed.
|
|
117
117
|
lakehouse : str | uuid.UUID, default=None
|
|
118
118
|
The Fabric lakehouse name or ID.
|
|
119
119
|
Defaults to None which resolves to the lakehouse attached to the notebook.
|