semantic-link-labs 0.11.2__py3-none-any.whl → 0.11.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (29) hide show
  1. {semantic_link_labs-0.11.2.dist-info → semantic_link_labs-0.11.3.dist-info}/METADATA +4 -4
  2. {semantic_link_labs-0.11.2.dist-info → semantic_link_labs-0.11.3.dist-info}/RECORD +26 -24
  3. sempy_labs/__init__.py +12 -18
  4. sempy_labs/_a_lib_info.py +1 -1
  5. sempy_labs/_external_data_shares.py +55 -1
  6. sempy_labs/_helper_functions.py +169 -5
  7. sempy_labs/_labels.py +126 -0
  8. sempy_labs/_list_functions.py +1 -1
  9. sempy_labs/_notebooks.py +152 -3
  10. sempy_labs/directlake/_dl_helper.py +4 -1
  11. sempy_labs/graph/_users.py +3 -5
  12. sempy_labs/lakehouse/_helper.py +18 -9
  13. sempy_labs/lakehouse/_lakehouse.py +18 -9
  14. sempy_labs/migration/_migrate_calctables_to_lakehouse.py +38 -47
  15. sempy_labs/migration/_migrate_calctables_to_semantic_model.py +12 -22
  16. sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +7 -11
  17. sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +14 -23
  18. sempy_labs/ml_model/__init__.py +23 -0
  19. sempy_labs/ml_model/_functions.py +427 -0
  20. sempy_labs/report/_reportwrapper.py +1 -1
  21. sempy_labs/tom/_model.py +8 -3
  22. sempy_labs/variable_library/__init__.py +19 -0
  23. sempy_labs/variable_library/_functions.py +403 -0
  24. sempy_labs/_dax_query_view.py +0 -57
  25. sempy_labs/_ml_models.py +0 -111
  26. sempy_labs/_variable_libraries.py +0 -92
  27. {semantic_link_labs-0.11.2.dist-info → semantic_link_labs-0.11.3.dist-info}/WHEEL +0 -0
  28. {semantic_link_labs-0.11.2.dist-info → semantic_link_labs-0.11.3.dist-info}/licenses/LICENSE +0 -0
  29. {semantic_link_labs-0.11.2.dist-info → semantic_link_labs-0.11.3.dist-info}/top_level.txt +0 -0
sempy_labs/_labels.py ADDED
@@ -0,0 +1,126 @@
1
+ import sempy.fabric as fabric
2
+ import requests
3
+ import pandas as pd
4
+ from typing import Optional, Union
5
+ from uuid import UUID
6
+ from sempy.fabric.exceptions import FabricHTTPException
7
+ from sempy._utils._log import log
8
+
9
+
10
+ @log
11
+ def list_item_labels(workspace: Optional[Union[str, UUID]] = None) -> pd.DataFrame:
12
+ """
13
+ List all items within a workspace and shows their sensitivity labels.
14
+
15
+ NOTE: This function uses an internal API and is subject to change/break without notice.
16
+
17
+ Parameters
18
+ ----------
19
+ workspace : str | uuid.UUID, default=None
20
+ The Fabric workspace name or ID.
21
+ Defaults to None which resolves to the workspace of the attached lakehouse
22
+ or if no lakehouse attached, resolves to the workspace of the notebook.
23
+ Returns
24
+ -------
25
+ pandas.DataFrame
26
+ A pandas dataframe showing a list of all items within a workspace and their sensitivity labels.
27
+ """
28
+
29
+ import notebookutils
30
+
31
+ token = notebookutils.credentials.getToken("pbi")
32
+ headers = {"Authorization": f"Bearer {token}"}
33
+
34
+ # Item types handled in special payload fields
35
+ grouped_types = {
36
+ "dashboards": "Dashboard",
37
+ "reports": "Report",
38
+ "models": "SemanticModel",
39
+ "dataflows": "Dataflow",
40
+ "datamarts": "Datamart",
41
+ }
42
+
43
+ # All other item types go into 'artifacts'
44
+ fabric_items = [
45
+ "Datamart",
46
+ "Lakehouse",
47
+ "Eventhouse",
48
+ "Environment",
49
+ "KQLDatabase",
50
+ "KQLQueryset",
51
+ "KQLDashboard",
52
+ "DataPipeline",
53
+ "Notebook",
54
+ "SparkJobDefinition",
55
+ "MLExperiment",
56
+ "MLModel",
57
+ "Warehouse",
58
+ "Eventstream",
59
+ "SQLEndpoint",
60
+ "MirroredWarehouse",
61
+ "MirroredDatabase",
62
+ "Reflex",
63
+ "GraphQLApi",
64
+ "MountedDataFactory",
65
+ "SQLDatabase",
66
+ "CopyJob",
67
+ "VariableLibrary",
68
+ "Dataflow",
69
+ "ApacheAirflowJob",
70
+ "WarehouseSnapshot",
71
+ "DigitalTwinBuilder",
72
+ "DigitalTwinBuilderFlow",
73
+ "MirroredAzureDatabricksCatalog",
74
+ "DataAgent",
75
+ "UserDataFunction",
76
+ ]
77
+
78
+ dfI = fabric.list_items(workspace=workspace)
79
+
80
+ payload = {
81
+ key: [{"artifactId": i} for i in dfI[dfI["Type"] == value]["Id"].tolist()]
82
+ for key, value in grouped_types.items()
83
+ }
84
+
85
+ # Add generic artifact types
86
+ artifact_ids = dfI[dfI["Type"].isin(fabric_items)]["Id"].tolist()
87
+ if artifact_ids:
88
+ payload["artifacts"] = [{"artifactId": i} for i in artifact_ids]
89
+
90
+ client = fabric.PowerBIRestClient()
91
+ response = client.get("/v1.0/myorg/capacities")
92
+ if response.status_code != 200:
93
+ raise FabricHTTPException("Failed to retrieve URL prefix.")
94
+ context = response.json().get("@odata.context")
95
+ prefix = context.split("/v1.0")[0]
96
+
97
+ response = requests.post(
98
+ f"{prefix}/metadata/informationProtection/artifacts",
99
+ json=payload,
100
+ headers=headers,
101
+ )
102
+ if response.status_code != 200:
103
+ raise FabricHTTPException(f"Failed to retrieve labels: {response.text}")
104
+ result = response.json()
105
+
106
+ label_keys = [
107
+ "artifactInformationProtections",
108
+ "datasetInformationProtections",
109
+ "reportInformationProtections",
110
+ "dashboardInformationProtections",
111
+ ]
112
+
113
+ rows = [
114
+ {
115
+ "Id": item.get("artifactObjectId"),
116
+ "Label Id": item.get("labelId"),
117
+ "Label Name": item.get("name"),
118
+ "Parent Label Name": item.get("parent", {}).get("name"),
119
+ "Label Description": item.get("tooltip"),
120
+ }
121
+ for key in label_keys
122
+ for item in result.get(key, [])
123
+ ]
124
+
125
+ df_labels = pd.DataFrame(rows)
126
+ return dfI.merge(df_labels, on="Id", how="left")
@@ -1131,7 +1131,7 @@ def list_reports_using_semantic_model(
1131
1131
  dataset: str | UUID, workspace: Optional[str | UUID] = None
1132
1132
  ) -> pd.DataFrame:
1133
1133
  """
1134
- Shows a list of all the reports (in all workspaces) which use a given semantic model.
1134
+ Shows a list of all the reports which use a given semantic model. This is limited to the reports which are in the same workspace as the semantic model.
1135
1135
 
1136
1136
  Parameters
1137
1137
  ----------
sempy_labs/_notebooks.py CHANGED
@@ -1,20 +1,21 @@
1
1
  import sempy.fabric as fabric
2
2
  import pandas as pd
3
3
  import sempy_labs._icons as icons
4
- from typing import Optional
4
+ from typing import Optional, List
5
5
  import base64
6
6
  import requests
7
7
  from sempy._utils._log import log
8
- from ._helper_functions import (
8
+ from sempy_labs._helper_functions import (
9
9
  resolve_workspace_name_and_id,
10
10
  resolve_workspace_id,
11
11
  _decode_b64,
12
12
  _base_api,
13
13
  resolve_item_id,
14
14
  create_item,
15
+ _create_dataframe,
15
16
  )
16
17
  from sempy.fabric.exceptions import FabricHTTPException
17
- import os
18
+ from os import PathLike
18
19
  from uuid import UUID
19
20
 
20
21
  _notebook_prefix = "notebook-content."
@@ -114,6 +115,7 @@ def import_notebook_from_web(
114
115
  description: Optional[str] = None,
115
116
  workspace: Optional[str | UUID] = None,
116
117
  overwrite: bool = False,
118
+ folder: Optional[str | PathLike] = None,
117
119
  ):
118
120
  """
119
121
  Creates a new notebook within a workspace based on a Jupyter notebook hosted in the web.
@@ -136,6 +138,9 @@ def import_notebook_from_web(
136
138
  or if no lakehouse attached, resolves to the workspace of the notebook.
137
139
  overwrite : bool, default=False
138
140
  If set to True, overwrites the existing notebook in the workspace if it exists.
141
+ folder : str | os.PathLike, default=None
142
+ The folder within the workspace where the notebook will be created.
143
+ Defaults to None which places the notebook in the root of the workspace.
139
144
  """
140
145
 
141
146
  (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
@@ -161,6 +166,7 @@ def import_notebook_from_web(
161
166
  workspace=workspace_id,
162
167
  description=description,
163
168
  format="ipynb",
169
+ folder=folder,
164
170
  )
165
171
  elif len(dfI_filt) > 0 and overwrite:
166
172
  print(f"{icons.info} Overwrite of notebooks is currently not supported.")
@@ -181,6 +187,7 @@ def create_notebook(
181
187
  description: Optional[str] = None,
182
188
  workspace: Optional[str | UUID] = None,
183
189
  format: Optional[str] = None,
190
+ folder: Optional[str | PathLike] = None,
184
191
  ):
185
192
  """
186
193
  Creates a new notebook with a definition within a workspace.
@@ -203,6 +210,9 @@ def create_notebook(
203
210
  format : str, default=None
204
211
  If 'ipynb' is provided than notebook_content should be standard ipynb format
205
212
  otherwise notebook_content should be GIT friendly format
213
+ folder : str | os.PathLike, default=None
214
+ The folder within the workspace where the notebook will be created.
215
+ Defaults to None which places the notebook in the root of the workspace.
206
216
  """
207
217
 
208
218
  notebook_payload = base64.b64encode(notebook_content).decode("utf-8")
@@ -226,6 +236,7 @@ def create_notebook(
226
236
  workspace=workspace,
227
237
  description=description,
228
238
  definition=definition_payload,
239
+ folder=folder,
229
240
  )
230
241
 
231
242
 
@@ -287,3 +298,141 @@ def update_notebook_definition(
287
298
  print(
288
299
  f"{icons.green_dot} The '{name}' notebook was updated within the '{workspace_name}' workspace."
289
300
  )
301
+
302
+
303
+ @log
304
+ def list_notebooks(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
305
+ """
306
+ Shows the notebooks within a workspace.
307
+
308
+ Parameters
309
+ ----------
310
+ workspace : str | uuid.UUID, default=None
311
+ The Fabric workspace name or ID.
312
+ Defaults to None which resolves to the workspace of the attached lakehouse
313
+ or if no lakehouse attached, resolves to the workspace of the notebook.
314
+
315
+ Returns
316
+ -------
317
+ pandas.DataFrame
318
+ A pandas dataframe showing the SQL endpoints within a workspace.
319
+ """
320
+
321
+ columns = {
322
+ "Notebook Id": "string",
323
+ "Notebook Name": "string",
324
+ "Description": "string",
325
+ }
326
+ df = _create_dataframe(columns=columns)
327
+
328
+ workspace_id = resolve_workspace_id(workspace)
329
+
330
+ responses = _base_api(
331
+ request=f"/v1/workspaces/{workspace_id}/notebooks", uses_pagination=True
332
+ )
333
+
334
+ rows = []
335
+ for r in responses:
336
+ for v in r.get("value", []):
337
+ rows.append(
338
+ {
339
+ "Notebook Id": v.get("id"),
340
+ "Notebook Name": v.get("displayName"),
341
+ "Description": v.get("description"),
342
+ }
343
+ )
344
+
345
+ if rows:
346
+ df = pd.DataFrame(rows, columns=list(columns.keys()))
347
+
348
+ return df
349
+
350
+
351
+ @log
352
+ def search_notebooks(
353
+ search_string: str,
354
+ notebook: Optional[str | UUID] = None,
355
+ workspace: Optional[str | UUID | List[str | UUID]] = None,
356
+ ) -> pd.DataFrame:
357
+ """
358
+ Searches notebooks within a workspace or across multiple workspaces for a given search string.
359
+
360
+ Parameters
361
+ ----------
362
+ search_string : str
363
+ The string to search for within the notebook definitions.
364
+ notebook : str | uuid.UUID, default=None
365
+ The name or ID of a specific notebook to search within.
366
+ Defaults to None which searches across all notebooks in the specified workspace(s).
367
+ workspace : str | uuid.UUID | list, default=None
368
+ The name or ID of the workspace or a list of workspaces to search within.
369
+ Defaults to None which resolves to the workspace of the attached lakehouse
370
+ or if no lakehouse attached, resolves to the workspace of the notebook.
371
+ If a list is provided, it should contain workspace names or IDs.
372
+
373
+ Returns
374
+ -------
375
+ pandas.DataFrame
376
+ A pandas dataframe showing the notebooks that contain the search string in their definitions.
377
+ The dataframe includes the workspace name, workspace ID, notebook name, and notebook ID.
378
+ """
379
+
380
+ if not workspace:
381
+ workspace_id = resolve_workspace_id(workspace)
382
+ workspace_ids = [workspace_id]
383
+ elif isinstance(workspace, str):
384
+ workspace_id = resolve_workspace_id(workspace)
385
+ workspace_ids = [workspace_id]
386
+ elif isinstance(workspace, list):
387
+ workspace_ids = [resolve_workspace_id(ws) for ws in workspace]
388
+ else:
389
+ raise ValueError(
390
+ "Workspace must be a string, UUID, or a list of strings/UUIDs."
391
+ )
392
+
393
+ dfW = fabric.list_workspaces()
394
+ dfW_filt = dfW[dfW["Id"].isin(workspace_ids)]
395
+
396
+ columns = {
397
+ "Workspace Name": "string",
398
+ "Workspace Id": "string",
399
+ "Notebook Name": "string",
400
+ "Notebook Id": "string",
401
+ }
402
+ df = _create_dataframe(columns=columns)
403
+
404
+ rows = []
405
+ for _, r in dfW_filt.iterrows():
406
+ w_id = r["Id"]
407
+ w_name = r["Name"]
408
+ dfN = list_notebooks(workspace=w_id)
409
+ if notebook is not None:
410
+ item_id = resolve_item_id(item=notebook, type="Notebook", workspace=w_id)
411
+ dfN = dfN[dfN["Notebook Id"] == item_id]
412
+ for _, n in dfN.iterrows():
413
+ notebook_id = n["Notebook Id"]
414
+ notebook_name = n["Notebook Name"]
415
+ definition = _base_api(
416
+ request=f"v1/workspaces/{w_id}/notebooks/{notebook_id}/getDefinition",
417
+ method="post",
418
+ client="fabric_sp",
419
+ status_codes=None,
420
+ lro_return_json=True,
421
+ )
422
+ for part in definition.get("definition").get("parts"):
423
+ payload = _decode_b64(part["payload"])
424
+ if part["path"] == "notebook-content.py":
425
+ if search_string in payload:
426
+ rows.append(
427
+ {
428
+ "Workspace Name": w_name,
429
+ "Workspace Id": w_id,
430
+ "Notebook Name": notebook_name,
431
+ "Notebook Id": notebook_id,
432
+ }
433
+ )
434
+
435
+ if rows:
436
+ df = pd.DataFrame(rows, columns=list(columns.keys()))
437
+
438
+ return df
@@ -225,7 +225,10 @@ def get_direct_lake_source(
225
225
  (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
226
226
  sql_endpoint_id = get_direct_lake_sql_endpoint(dataset=dataset, workspace=workspace)
227
227
  dfI = fabric.list_items(workspace=workspace)
228
- dfI_filt = dfI[(dfI["Id"] == sql_endpoint_id) & (dfI["Type"] == "SQLEndpoint")]
228
+ dfI_filt = dfI[
229
+ (dfI["Id"] == sql_endpoint_id)
230
+ & (dfI["Type"].isin(["SQLEndpoint", "Warehouse"]))
231
+ ]
229
232
 
230
233
  artifact_type, artifact_name, artifact_id = None, None, None
231
234
 
@@ -137,7 +137,6 @@ def send_mail(
137
137
  cc_recipients: Optional[str | List[str]] = None,
138
138
  bcc_recipients: Optional[str | List[str]] = None,
139
139
  priority: Literal["Normal", "High", "Low"] = "Normal",
140
- follow_up_flag: bool = False,
141
140
  attachments: Optional[str | List[str]] = None,
142
141
  ):
143
142
  """
@@ -165,8 +164,6 @@ def send_mail(
165
164
  The email address of the BCC recipients.
166
165
  priority : Literal["Normal", "High", "Low"], default="Normal"
167
166
  The email priority.
168
- follow_up_flag : bool, default=False
169
- Whether to set a follow-up flag for the email.
170
167
  attachments : str | List[str], default=None
171
168
  The abfss path or a list of the abfss paths of the attachments to include in the email.
172
169
  """
@@ -220,8 +217,8 @@ def send_mail(
220
217
  if bcc_email_addresses:
221
218
  payload["message"]["bccRecipients"] = bcc_email_addresses
222
219
 
223
- if follow_up_flag:
224
- payload["message"]["flag"] = {"flagStatus": "flagged"}
220
+ # if follow_up_flag:
221
+ # payload["message"]["flag"] = {"flagStatus": "flagged"}
225
222
 
226
223
  content_types = {
227
224
  ".txt": "text/plain",
@@ -244,6 +241,7 @@ def send_mail(
244
241
  ".pbip": "application/vnd.ms-powerbi.report",
245
242
  ".pbit": "application/vnd.ms-powerbi.report",
246
243
  ".vpax": "application/zip",
244
+ ".geojson": "application/geo+json",
247
245
  }
248
246
 
249
247
  def file_path_to_content_bytes(file_path):
@@ -1,7 +1,7 @@
1
1
  from uuid import UUID
2
2
  from typing import Optional, Literal
3
3
  import pyarrow.dataset as ds
4
- from .._helper_functions import (
4
+ from sempy_labs._helper_functions import (
5
5
  _mount,
6
6
  delete_item,
7
7
  _base_api,
@@ -68,14 +68,23 @@ def is_v_ordered(
68
68
  latest_file = os.path.join(delta_log_path, json_files[0])
69
69
 
70
70
  with open(latest_file, "r") as f:
71
- for line in f:
72
- try:
73
- data = json.loads(line)
74
- if "commitInfo" in data:
75
- tags = data["commitInfo"].get("tags", {})
76
- return tags.get("VORDER", "false").lower() == "true"
77
- except json.JSONDecodeError:
78
- continue # Skip malformed lines
71
+ all_data = [
72
+ json.loads(line) for line in f if line.strip()
73
+ ] # one dict per line
74
+ for data in all_data:
75
+ if "metaData" in data:
76
+ return (
77
+ data.get("metaData", {})
78
+ .get("configuration", {})
79
+ .get("delta.parquet.vorder.enabled", "false")
80
+ == "true"
81
+ )
82
+
83
+ # If no metaData, fall back to commitInfo
84
+ for data in all_data:
85
+ if "commitInfo" in data:
86
+ tags = data["commitInfo"].get("tags", {})
87
+ return tags.get("VORDER", "false").lower() == "true"
79
88
 
80
89
  return False # Default if not found
81
90
 
@@ -2,7 +2,7 @@ from tqdm.auto import tqdm
2
2
  from typing import List, Optional, Union
3
3
  from sempy._utils._log import log
4
4
  from uuid import UUID
5
- from .._helper_functions import (
5
+ from sempy_labs._helper_functions import (
6
6
  _base_api,
7
7
  resolve_lakehouse_name_and_id,
8
8
  resolve_workspace_name_and_id,
@@ -13,7 +13,7 @@ import sempy_labs._icons as icons
13
13
  import re
14
14
  import time
15
15
  import pandas as pd
16
- from .._job_scheduler import (
16
+ from sempy_labs._job_scheduler import (
17
17
  _get_item_job_instance,
18
18
  )
19
19
 
@@ -100,11 +100,15 @@ def optimize_lakehouse_tables(
100
100
  tables = [tables]
101
101
 
102
102
  df_tables = df_delta[df_delta["Table Name"].isin(tables)] if tables else df_delta
103
+ df_tables.reset_index(drop=True, inplace=True)
103
104
 
104
- for _, r in (bar := tqdm(df_tables.iterrows())):
105
+ total = len(df_tables)
106
+ for idx, r in (bar := tqdm(df_tables.iterrows(), total=total, bar_format="{desc}")):
105
107
  table_name = r["Table Name"]
106
108
  path = r["Location"]
107
- bar.set_description(f"Optimizing the '{table_name}' table...")
109
+ bar.set_description(
110
+ f"Optimizing the '{table_name}' table ({idx + 1}/{total})..."
111
+ )
108
112
  _optimize_table(path=path)
109
113
 
110
114
 
@@ -145,11 +149,13 @@ def vacuum_lakehouse_tables(
145
149
  tables = [tables]
146
150
 
147
151
  df_tables = df_delta[df_delta["Table Name"].isin(tables)] if tables else df_delta
152
+ df_tables.reset_index(drop=True, inplace=True)
148
153
 
149
- for _, r in (bar := tqdm(df_tables.iterrows())):
154
+ total = len(df_tables)
155
+ for idx, r in (bar := tqdm(df_tables.iterrows(), total=total, bar_format="{desc}")):
150
156
  table_name = r["Table Name"]
151
157
  path = r["Location"]
152
- bar.set_description(f"Vacuuming the '{table_name}' table...")
158
+ bar.set_description(f"Vacuuming the '{table_name}' table ({idx}/{total})...")
153
159
  _vacuum_table(path=path, retain_n_hours=retain_n_hours)
154
160
 
155
161
 
@@ -231,7 +237,7 @@ def run_table_maintenance(
231
237
  if optimize:
232
238
  payload["executionData"]["optimizeSettings"] = {}
233
239
  if v_order:
234
- payload["executionData"]["optimizeSettings"] = {"vorder": True}
240
+ payload["executionData"]["optimizeSettings"] = {"vOrder": True}
235
241
  if vacuum:
236
242
  payload["executionData"]["vacuumSettings"] = {}
237
243
  if vacuum and retention_period is not None:
@@ -242,16 +248,19 @@ def run_table_maintenance(
242
248
  method="post",
243
249
  payload=payload,
244
250
  status_codes=202,
251
+ client="fabric_sp",
245
252
  )
246
253
 
247
- f"{icons.in_progress} The table maintenance job for the '{table_name}' table in the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has been initiated."
254
+ print(
255
+ f"{icons.in_progress} The table maintenance job for the '{table_name}' table in the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has been initiated."
256
+ )
248
257
 
249
258
  status_url = response.headers.get("Location").split("fabric.microsoft.com")[1]
250
259
  status = None
251
260
  while status not in ["Completed", "Failed"]:
252
261
  response = _base_api(request=status_url)
253
262
  status = response.json().get("status")
254
- time.sleep(10)
263
+ time.sleep(3)
255
264
 
256
265
  df = _get_item_job_instance(url=status_url)
257
266