semantic-link-labs 0.11.2__py3-none-any.whl → 0.11.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.11.2.dist-info → semantic_link_labs-0.11.3.dist-info}/METADATA +4 -4
- {semantic_link_labs-0.11.2.dist-info → semantic_link_labs-0.11.3.dist-info}/RECORD +26 -24
- sempy_labs/__init__.py +12 -18
- sempy_labs/_a_lib_info.py +1 -1
- sempy_labs/_external_data_shares.py +55 -1
- sempy_labs/_helper_functions.py +169 -5
- sempy_labs/_labels.py +126 -0
- sempy_labs/_list_functions.py +1 -1
- sempy_labs/_notebooks.py +152 -3
- sempy_labs/directlake/_dl_helper.py +4 -1
- sempy_labs/graph/_users.py +3 -5
- sempy_labs/lakehouse/_helper.py +18 -9
- sempy_labs/lakehouse/_lakehouse.py +18 -9
- sempy_labs/migration/_migrate_calctables_to_lakehouse.py +38 -47
- sempy_labs/migration/_migrate_calctables_to_semantic_model.py +12 -22
- sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +7 -11
- sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +14 -23
- sempy_labs/ml_model/__init__.py +23 -0
- sempy_labs/ml_model/_functions.py +427 -0
- sempy_labs/report/_reportwrapper.py +1 -1
- sempy_labs/tom/_model.py +8 -3
- sempy_labs/variable_library/__init__.py +19 -0
- sempy_labs/variable_library/_functions.py +403 -0
- sempy_labs/_dax_query_view.py +0 -57
- sempy_labs/_ml_models.py +0 -111
- sempy_labs/_variable_libraries.py +0 -92
- {semantic_link_labs-0.11.2.dist-info → semantic_link_labs-0.11.3.dist-info}/WHEEL +0 -0
- {semantic_link_labs-0.11.2.dist-info → semantic_link_labs-0.11.3.dist-info}/licenses/LICENSE +0 -0
- {semantic_link_labs-0.11.2.dist-info → semantic_link_labs-0.11.3.dist-info}/top_level.txt +0 -0
sempy_labs/_labels.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import sempy.fabric as fabric
|
|
2
|
+
import requests
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from typing import Optional, Union
|
|
5
|
+
from uuid import UUID
|
|
6
|
+
from sempy.fabric.exceptions import FabricHTTPException
|
|
7
|
+
from sempy._utils._log import log
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@log
|
|
11
|
+
def list_item_labels(workspace: Optional[Union[str, UUID]] = None) -> pd.DataFrame:
|
|
12
|
+
"""
|
|
13
|
+
List all items within a workspace and shows their sensitivity labels.
|
|
14
|
+
|
|
15
|
+
NOTE: This function uses an internal API and is subject to change/break without notice.
|
|
16
|
+
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
workspace : str | uuid.UUID, default=None
|
|
20
|
+
The Fabric workspace name or ID.
|
|
21
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
22
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
23
|
+
Returns
|
|
24
|
+
-------
|
|
25
|
+
pandas.DataFrame
|
|
26
|
+
A pandas dataframe showing a list of all items within a workspace and their sensitivity labels.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
import notebookutils
|
|
30
|
+
|
|
31
|
+
token = notebookutils.credentials.getToken("pbi")
|
|
32
|
+
headers = {"Authorization": f"Bearer {token}"}
|
|
33
|
+
|
|
34
|
+
# Item types handled in special payload fields
|
|
35
|
+
grouped_types = {
|
|
36
|
+
"dashboards": "Dashboard",
|
|
37
|
+
"reports": "Report",
|
|
38
|
+
"models": "SemanticModel",
|
|
39
|
+
"dataflows": "Dataflow",
|
|
40
|
+
"datamarts": "Datamart",
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
# All other item types go into 'artifacts'
|
|
44
|
+
fabric_items = [
|
|
45
|
+
"Datamart",
|
|
46
|
+
"Lakehouse",
|
|
47
|
+
"Eventhouse",
|
|
48
|
+
"Environment",
|
|
49
|
+
"KQLDatabase",
|
|
50
|
+
"KQLQueryset",
|
|
51
|
+
"KQLDashboard",
|
|
52
|
+
"DataPipeline",
|
|
53
|
+
"Notebook",
|
|
54
|
+
"SparkJobDefinition",
|
|
55
|
+
"MLExperiment",
|
|
56
|
+
"MLModel",
|
|
57
|
+
"Warehouse",
|
|
58
|
+
"Eventstream",
|
|
59
|
+
"SQLEndpoint",
|
|
60
|
+
"MirroredWarehouse",
|
|
61
|
+
"MirroredDatabase",
|
|
62
|
+
"Reflex",
|
|
63
|
+
"GraphQLApi",
|
|
64
|
+
"MountedDataFactory",
|
|
65
|
+
"SQLDatabase",
|
|
66
|
+
"CopyJob",
|
|
67
|
+
"VariableLibrary",
|
|
68
|
+
"Dataflow",
|
|
69
|
+
"ApacheAirflowJob",
|
|
70
|
+
"WarehouseSnapshot",
|
|
71
|
+
"DigitalTwinBuilder",
|
|
72
|
+
"DigitalTwinBuilderFlow",
|
|
73
|
+
"MirroredAzureDatabricksCatalog",
|
|
74
|
+
"DataAgent",
|
|
75
|
+
"UserDataFunction",
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
dfI = fabric.list_items(workspace=workspace)
|
|
79
|
+
|
|
80
|
+
payload = {
|
|
81
|
+
key: [{"artifactId": i} for i in dfI[dfI["Type"] == value]["Id"].tolist()]
|
|
82
|
+
for key, value in grouped_types.items()
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
# Add generic artifact types
|
|
86
|
+
artifact_ids = dfI[dfI["Type"].isin(fabric_items)]["Id"].tolist()
|
|
87
|
+
if artifact_ids:
|
|
88
|
+
payload["artifacts"] = [{"artifactId": i} for i in artifact_ids]
|
|
89
|
+
|
|
90
|
+
client = fabric.PowerBIRestClient()
|
|
91
|
+
response = client.get("/v1.0/myorg/capacities")
|
|
92
|
+
if response.status_code != 200:
|
|
93
|
+
raise FabricHTTPException("Failed to retrieve URL prefix.")
|
|
94
|
+
context = response.json().get("@odata.context")
|
|
95
|
+
prefix = context.split("/v1.0")[0]
|
|
96
|
+
|
|
97
|
+
response = requests.post(
|
|
98
|
+
f"{prefix}/metadata/informationProtection/artifacts",
|
|
99
|
+
json=payload,
|
|
100
|
+
headers=headers,
|
|
101
|
+
)
|
|
102
|
+
if response.status_code != 200:
|
|
103
|
+
raise FabricHTTPException(f"Failed to retrieve labels: {response.text}")
|
|
104
|
+
result = response.json()
|
|
105
|
+
|
|
106
|
+
label_keys = [
|
|
107
|
+
"artifactInformationProtections",
|
|
108
|
+
"datasetInformationProtections",
|
|
109
|
+
"reportInformationProtections",
|
|
110
|
+
"dashboardInformationProtections",
|
|
111
|
+
]
|
|
112
|
+
|
|
113
|
+
rows = [
|
|
114
|
+
{
|
|
115
|
+
"Id": item.get("artifactObjectId"),
|
|
116
|
+
"Label Id": item.get("labelId"),
|
|
117
|
+
"Label Name": item.get("name"),
|
|
118
|
+
"Parent Label Name": item.get("parent", {}).get("name"),
|
|
119
|
+
"Label Description": item.get("tooltip"),
|
|
120
|
+
}
|
|
121
|
+
for key in label_keys
|
|
122
|
+
for item in result.get(key, [])
|
|
123
|
+
]
|
|
124
|
+
|
|
125
|
+
df_labels = pd.DataFrame(rows)
|
|
126
|
+
return dfI.merge(df_labels, on="Id", how="left")
|
sempy_labs/_list_functions.py
CHANGED
|
@@ -1131,7 +1131,7 @@ def list_reports_using_semantic_model(
|
|
|
1131
1131
|
dataset: str | UUID, workspace: Optional[str | UUID] = None
|
|
1132
1132
|
) -> pd.DataFrame:
|
|
1133
1133
|
"""
|
|
1134
|
-
Shows a list of all the reports
|
|
1134
|
+
Shows a list of all the reports which use a given semantic model. This is limited to the reports which are in the same workspace as the semantic model.
|
|
1135
1135
|
|
|
1136
1136
|
Parameters
|
|
1137
1137
|
----------
|
sempy_labs/_notebooks.py
CHANGED
|
@@ -1,20 +1,21 @@
|
|
|
1
1
|
import sempy.fabric as fabric
|
|
2
2
|
import pandas as pd
|
|
3
3
|
import sempy_labs._icons as icons
|
|
4
|
-
from typing import Optional
|
|
4
|
+
from typing import Optional, List
|
|
5
5
|
import base64
|
|
6
6
|
import requests
|
|
7
7
|
from sempy._utils._log import log
|
|
8
|
-
from ._helper_functions import (
|
|
8
|
+
from sempy_labs._helper_functions import (
|
|
9
9
|
resolve_workspace_name_and_id,
|
|
10
10
|
resolve_workspace_id,
|
|
11
11
|
_decode_b64,
|
|
12
12
|
_base_api,
|
|
13
13
|
resolve_item_id,
|
|
14
14
|
create_item,
|
|
15
|
+
_create_dataframe,
|
|
15
16
|
)
|
|
16
17
|
from sempy.fabric.exceptions import FabricHTTPException
|
|
17
|
-
import
|
|
18
|
+
from os import PathLike
|
|
18
19
|
from uuid import UUID
|
|
19
20
|
|
|
20
21
|
_notebook_prefix = "notebook-content."
|
|
@@ -114,6 +115,7 @@ def import_notebook_from_web(
|
|
|
114
115
|
description: Optional[str] = None,
|
|
115
116
|
workspace: Optional[str | UUID] = None,
|
|
116
117
|
overwrite: bool = False,
|
|
118
|
+
folder: Optional[str | PathLike] = None,
|
|
117
119
|
):
|
|
118
120
|
"""
|
|
119
121
|
Creates a new notebook within a workspace based on a Jupyter notebook hosted in the web.
|
|
@@ -136,6 +138,9 @@ def import_notebook_from_web(
|
|
|
136
138
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
137
139
|
overwrite : bool, default=False
|
|
138
140
|
If set to True, overwrites the existing notebook in the workspace if it exists.
|
|
141
|
+
folder : str | os.PathLike, default=None
|
|
142
|
+
The folder within the workspace where the notebook will be created.
|
|
143
|
+
Defaults to None which places the notebook in the root of the workspace.
|
|
139
144
|
"""
|
|
140
145
|
|
|
141
146
|
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
@@ -161,6 +166,7 @@ def import_notebook_from_web(
|
|
|
161
166
|
workspace=workspace_id,
|
|
162
167
|
description=description,
|
|
163
168
|
format="ipynb",
|
|
169
|
+
folder=folder,
|
|
164
170
|
)
|
|
165
171
|
elif len(dfI_filt) > 0 and overwrite:
|
|
166
172
|
print(f"{icons.info} Overwrite of notebooks is currently not supported.")
|
|
@@ -181,6 +187,7 @@ def create_notebook(
|
|
|
181
187
|
description: Optional[str] = None,
|
|
182
188
|
workspace: Optional[str | UUID] = None,
|
|
183
189
|
format: Optional[str] = None,
|
|
190
|
+
folder: Optional[str | PathLike] = None,
|
|
184
191
|
):
|
|
185
192
|
"""
|
|
186
193
|
Creates a new notebook with a definition within a workspace.
|
|
@@ -203,6 +210,9 @@ def create_notebook(
|
|
|
203
210
|
format : str, default=None
|
|
204
211
|
If 'ipynb' is provided than notebook_content should be standard ipynb format
|
|
205
212
|
otherwise notebook_content should be GIT friendly format
|
|
213
|
+
folder : str | os.PathLike, default=None
|
|
214
|
+
The folder within the workspace where the notebook will be created.
|
|
215
|
+
Defaults to None which places the notebook in the root of the workspace.
|
|
206
216
|
"""
|
|
207
217
|
|
|
208
218
|
notebook_payload = base64.b64encode(notebook_content).decode("utf-8")
|
|
@@ -226,6 +236,7 @@ def create_notebook(
|
|
|
226
236
|
workspace=workspace,
|
|
227
237
|
description=description,
|
|
228
238
|
definition=definition_payload,
|
|
239
|
+
folder=folder,
|
|
229
240
|
)
|
|
230
241
|
|
|
231
242
|
|
|
@@ -287,3 +298,141 @@ def update_notebook_definition(
|
|
|
287
298
|
print(
|
|
288
299
|
f"{icons.green_dot} The '{name}' notebook was updated within the '{workspace_name}' workspace."
|
|
289
300
|
)
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
@log
|
|
304
|
+
def list_notebooks(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
|
|
305
|
+
"""
|
|
306
|
+
Shows the notebooks within a workspace.
|
|
307
|
+
|
|
308
|
+
Parameters
|
|
309
|
+
----------
|
|
310
|
+
workspace : str | uuid.UUID, default=None
|
|
311
|
+
The Fabric workspace name or ID.
|
|
312
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
313
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
314
|
+
|
|
315
|
+
Returns
|
|
316
|
+
-------
|
|
317
|
+
pandas.DataFrame
|
|
318
|
+
A pandas dataframe showing the SQL endpoints within a workspace.
|
|
319
|
+
"""
|
|
320
|
+
|
|
321
|
+
columns = {
|
|
322
|
+
"Notebook Id": "string",
|
|
323
|
+
"Notebook Name": "string",
|
|
324
|
+
"Description": "string",
|
|
325
|
+
}
|
|
326
|
+
df = _create_dataframe(columns=columns)
|
|
327
|
+
|
|
328
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
329
|
+
|
|
330
|
+
responses = _base_api(
|
|
331
|
+
request=f"/v1/workspaces/{workspace_id}/notebooks", uses_pagination=True
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
rows = []
|
|
335
|
+
for r in responses:
|
|
336
|
+
for v in r.get("value", []):
|
|
337
|
+
rows.append(
|
|
338
|
+
{
|
|
339
|
+
"Notebook Id": v.get("id"),
|
|
340
|
+
"Notebook Name": v.get("displayName"),
|
|
341
|
+
"Description": v.get("description"),
|
|
342
|
+
}
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
if rows:
|
|
346
|
+
df = pd.DataFrame(rows, columns=list(columns.keys()))
|
|
347
|
+
|
|
348
|
+
return df
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
@log
|
|
352
|
+
def search_notebooks(
|
|
353
|
+
search_string: str,
|
|
354
|
+
notebook: Optional[str | UUID] = None,
|
|
355
|
+
workspace: Optional[str | UUID | List[str | UUID]] = None,
|
|
356
|
+
) -> pd.DataFrame:
|
|
357
|
+
"""
|
|
358
|
+
Searches notebooks within a workspace or across multiple workspaces for a given search string.
|
|
359
|
+
|
|
360
|
+
Parameters
|
|
361
|
+
----------
|
|
362
|
+
search_string : str
|
|
363
|
+
The string to search for within the notebook definitions.
|
|
364
|
+
notebook : str | uuid.UUID, default=None
|
|
365
|
+
The name or ID of a specific notebook to search within.
|
|
366
|
+
Defaults to None which searches across all notebooks in the specified workspace(s).
|
|
367
|
+
workspace : str | uuid.UUID | list, default=None
|
|
368
|
+
The name or ID of the workspace or a list of workspaces to search within.
|
|
369
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
370
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
371
|
+
If a list is provided, it should contain workspace names or IDs.
|
|
372
|
+
|
|
373
|
+
Returns
|
|
374
|
+
-------
|
|
375
|
+
pandas.DataFrame
|
|
376
|
+
A pandas dataframe showing the notebooks that contain the search string in their definitions.
|
|
377
|
+
The dataframe includes the workspace name, workspace ID, notebook name, and notebook ID.
|
|
378
|
+
"""
|
|
379
|
+
|
|
380
|
+
if not workspace:
|
|
381
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
382
|
+
workspace_ids = [workspace_id]
|
|
383
|
+
elif isinstance(workspace, str):
|
|
384
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
385
|
+
workspace_ids = [workspace_id]
|
|
386
|
+
elif isinstance(workspace, list):
|
|
387
|
+
workspace_ids = [resolve_workspace_id(ws) for ws in workspace]
|
|
388
|
+
else:
|
|
389
|
+
raise ValueError(
|
|
390
|
+
"Workspace must be a string, UUID, or a list of strings/UUIDs."
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
dfW = fabric.list_workspaces()
|
|
394
|
+
dfW_filt = dfW[dfW["Id"].isin(workspace_ids)]
|
|
395
|
+
|
|
396
|
+
columns = {
|
|
397
|
+
"Workspace Name": "string",
|
|
398
|
+
"Workspace Id": "string",
|
|
399
|
+
"Notebook Name": "string",
|
|
400
|
+
"Notebook Id": "string",
|
|
401
|
+
}
|
|
402
|
+
df = _create_dataframe(columns=columns)
|
|
403
|
+
|
|
404
|
+
rows = []
|
|
405
|
+
for _, r in dfW_filt.iterrows():
|
|
406
|
+
w_id = r["Id"]
|
|
407
|
+
w_name = r["Name"]
|
|
408
|
+
dfN = list_notebooks(workspace=w_id)
|
|
409
|
+
if notebook is not None:
|
|
410
|
+
item_id = resolve_item_id(item=notebook, type="Notebook", workspace=w_id)
|
|
411
|
+
dfN = dfN[dfN["Notebook Id"] == item_id]
|
|
412
|
+
for _, n in dfN.iterrows():
|
|
413
|
+
notebook_id = n["Notebook Id"]
|
|
414
|
+
notebook_name = n["Notebook Name"]
|
|
415
|
+
definition = _base_api(
|
|
416
|
+
request=f"v1/workspaces/{w_id}/notebooks/{notebook_id}/getDefinition",
|
|
417
|
+
method="post",
|
|
418
|
+
client="fabric_sp",
|
|
419
|
+
status_codes=None,
|
|
420
|
+
lro_return_json=True,
|
|
421
|
+
)
|
|
422
|
+
for part in definition.get("definition").get("parts"):
|
|
423
|
+
payload = _decode_b64(part["payload"])
|
|
424
|
+
if part["path"] == "notebook-content.py":
|
|
425
|
+
if search_string in payload:
|
|
426
|
+
rows.append(
|
|
427
|
+
{
|
|
428
|
+
"Workspace Name": w_name,
|
|
429
|
+
"Workspace Id": w_id,
|
|
430
|
+
"Notebook Name": notebook_name,
|
|
431
|
+
"Notebook Id": notebook_id,
|
|
432
|
+
}
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
if rows:
|
|
436
|
+
df = pd.DataFrame(rows, columns=list(columns.keys()))
|
|
437
|
+
|
|
438
|
+
return df
|
|
@@ -225,7 +225,10 @@ def get_direct_lake_source(
|
|
|
225
225
|
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
226
226
|
sql_endpoint_id = get_direct_lake_sql_endpoint(dataset=dataset, workspace=workspace)
|
|
227
227
|
dfI = fabric.list_items(workspace=workspace)
|
|
228
|
-
dfI_filt = dfI[
|
|
228
|
+
dfI_filt = dfI[
|
|
229
|
+
(dfI["Id"] == sql_endpoint_id)
|
|
230
|
+
& (dfI["Type"].isin(["SQLEndpoint", "Warehouse"]))
|
|
231
|
+
]
|
|
229
232
|
|
|
230
233
|
artifact_type, artifact_name, artifact_id = None, None, None
|
|
231
234
|
|
sempy_labs/graph/_users.py
CHANGED
|
@@ -137,7 +137,6 @@ def send_mail(
|
|
|
137
137
|
cc_recipients: Optional[str | List[str]] = None,
|
|
138
138
|
bcc_recipients: Optional[str | List[str]] = None,
|
|
139
139
|
priority: Literal["Normal", "High", "Low"] = "Normal",
|
|
140
|
-
follow_up_flag: bool = False,
|
|
141
140
|
attachments: Optional[str | List[str]] = None,
|
|
142
141
|
):
|
|
143
142
|
"""
|
|
@@ -165,8 +164,6 @@ def send_mail(
|
|
|
165
164
|
The email address of the BCC recipients.
|
|
166
165
|
priority : Literal["Normal", "High", "Low"], default="Normal"
|
|
167
166
|
The email priority.
|
|
168
|
-
follow_up_flag : bool, default=False
|
|
169
|
-
Whether to set a follow-up flag for the email.
|
|
170
167
|
attachments : str | List[str], default=None
|
|
171
168
|
The abfss path or a list of the abfss paths of the attachments to include in the email.
|
|
172
169
|
"""
|
|
@@ -220,8 +217,8 @@ def send_mail(
|
|
|
220
217
|
if bcc_email_addresses:
|
|
221
218
|
payload["message"]["bccRecipients"] = bcc_email_addresses
|
|
222
219
|
|
|
223
|
-
if follow_up_flag:
|
|
224
|
-
|
|
220
|
+
# if follow_up_flag:
|
|
221
|
+
# payload["message"]["flag"] = {"flagStatus": "flagged"}
|
|
225
222
|
|
|
226
223
|
content_types = {
|
|
227
224
|
".txt": "text/plain",
|
|
@@ -244,6 +241,7 @@ def send_mail(
|
|
|
244
241
|
".pbip": "application/vnd.ms-powerbi.report",
|
|
245
242
|
".pbit": "application/vnd.ms-powerbi.report",
|
|
246
243
|
".vpax": "application/zip",
|
|
244
|
+
".geojson": "application/geo+json",
|
|
247
245
|
}
|
|
248
246
|
|
|
249
247
|
def file_path_to_content_bytes(file_path):
|
sempy_labs/lakehouse/_helper.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from uuid import UUID
|
|
2
2
|
from typing import Optional, Literal
|
|
3
3
|
import pyarrow.dataset as ds
|
|
4
|
-
from
|
|
4
|
+
from sempy_labs._helper_functions import (
|
|
5
5
|
_mount,
|
|
6
6
|
delete_item,
|
|
7
7
|
_base_api,
|
|
@@ -68,14 +68,23 @@ def is_v_ordered(
|
|
|
68
68
|
latest_file = os.path.join(delta_log_path, json_files[0])
|
|
69
69
|
|
|
70
70
|
with open(latest_file, "r") as f:
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
71
|
+
all_data = [
|
|
72
|
+
json.loads(line) for line in f if line.strip()
|
|
73
|
+
] # one dict per line
|
|
74
|
+
for data in all_data:
|
|
75
|
+
if "metaData" in data:
|
|
76
|
+
return (
|
|
77
|
+
data.get("metaData", {})
|
|
78
|
+
.get("configuration", {})
|
|
79
|
+
.get("delta.parquet.vorder.enabled", "false")
|
|
80
|
+
== "true"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# If no metaData, fall back to commitInfo
|
|
84
|
+
for data in all_data:
|
|
85
|
+
if "commitInfo" in data:
|
|
86
|
+
tags = data["commitInfo"].get("tags", {})
|
|
87
|
+
return tags.get("VORDER", "false").lower() == "true"
|
|
79
88
|
|
|
80
89
|
return False # Default if not found
|
|
81
90
|
|
|
@@ -2,7 +2,7 @@ from tqdm.auto import tqdm
|
|
|
2
2
|
from typing import List, Optional, Union
|
|
3
3
|
from sempy._utils._log import log
|
|
4
4
|
from uuid import UUID
|
|
5
|
-
from
|
|
5
|
+
from sempy_labs._helper_functions import (
|
|
6
6
|
_base_api,
|
|
7
7
|
resolve_lakehouse_name_and_id,
|
|
8
8
|
resolve_workspace_name_and_id,
|
|
@@ -13,7 +13,7 @@ import sempy_labs._icons as icons
|
|
|
13
13
|
import re
|
|
14
14
|
import time
|
|
15
15
|
import pandas as pd
|
|
16
|
-
from
|
|
16
|
+
from sempy_labs._job_scheduler import (
|
|
17
17
|
_get_item_job_instance,
|
|
18
18
|
)
|
|
19
19
|
|
|
@@ -100,11 +100,15 @@ def optimize_lakehouse_tables(
|
|
|
100
100
|
tables = [tables]
|
|
101
101
|
|
|
102
102
|
df_tables = df_delta[df_delta["Table Name"].isin(tables)] if tables else df_delta
|
|
103
|
+
df_tables.reset_index(drop=True, inplace=True)
|
|
103
104
|
|
|
104
|
-
|
|
105
|
+
total = len(df_tables)
|
|
106
|
+
for idx, r in (bar := tqdm(df_tables.iterrows(), total=total, bar_format="{desc}")):
|
|
105
107
|
table_name = r["Table Name"]
|
|
106
108
|
path = r["Location"]
|
|
107
|
-
bar.set_description(
|
|
109
|
+
bar.set_description(
|
|
110
|
+
f"Optimizing the '{table_name}' table ({idx + 1}/{total})..."
|
|
111
|
+
)
|
|
108
112
|
_optimize_table(path=path)
|
|
109
113
|
|
|
110
114
|
|
|
@@ -145,11 +149,13 @@ def vacuum_lakehouse_tables(
|
|
|
145
149
|
tables = [tables]
|
|
146
150
|
|
|
147
151
|
df_tables = df_delta[df_delta["Table Name"].isin(tables)] if tables else df_delta
|
|
152
|
+
df_tables.reset_index(drop=True, inplace=True)
|
|
148
153
|
|
|
149
|
-
|
|
154
|
+
total = len(df_tables)
|
|
155
|
+
for idx, r in (bar := tqdm(df_tables.iterrows(), total=total, bar_format="{desc}")):
|
|
150
156
|
table_name = r["Table Name"]
|
|
151
157
|
path = r["Location"]
|
|
152
|
-
bar.set_description(f"Vacuuming the '{table_name}' table...")
|
|
158
|
+
bar.set_description(f"Vacuuming the '{table_name}' table ({idx}/{total})...")
|
|
153
159
|
_vacuum_table(path=path, retain_n_hours=retain_n_hours)
|
|
154
160
|
|
|
155
161
|
|
|
@@ -231,7 +237,7 @@ def run_table_maintenance(
|
|
|
231
237
|
if optimize:
|
|
232
238
|
payload["executionData"]["optimizeSettings"] = {}
|
|
233
239
|
if v_order:
|
|
234
|
-
payload["executionData"]["optimizeSettings"] = {"
|
|
240
|
+
payload["executionData"]["optimizeSettings"] = {"vOrder": True}
|
|
235
241
|
if vacuum:
|
|
236
242
|
payload["executionData"]["vacuumSettings"] = {}
|
|
237
243
|
if vacuum and retention_period is not None:
|
|
@@ -242,16 +248,19 @@ def run_table_maintenance(
|
|
|
242
248
|
method="post",
|
|
243
249
|
payload=payload,
|
|
244
250
|
status_codes=202,
|
|
251
|
+
client="fabric_sp",
|
|
245
252
|
)
|
|
246
253
|
|
|
247
|
-
|
|
254
|
+
print(
|
|
255
|
+
f"{icons.in_progress} The table maintenance job for the '{table_name}' table in the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has been initiated."
|
|
256
|
+
)
|
|
248
257
|
|
|
249
258
|
status_url = response.headers.get("Location").split("fabric.microsoft.com")[1]
|
|
250
259
|
status = None
|
|
251
260
|
while status not in ["Completed", "Failed"]:
|
|
252
261
|
response = _base_api(request=status_url)
|
|
253
262
|
status = response.json().get("status")
|
|
254
|
-
time.sleep(
|
|
263
|
+
time.sleep(3)
|
|
255
264
|
|
|
256
265
|
df = _get_item_job_instance(url=status_url)
|
|
257
266
|
|