semantic-link-labs 0.9.10__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (40) hide show
  1. {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.10.0.dist-info}/METADATA +28 -21
  2. {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.10.0.dist-info}/RECORD +38 -31
  3. {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.10.0.dist-info}/WHEEL +1 -1
  4. sempy_labs/__init__.py +26 -1
  5. sempy_labs/_delta_analyzer.py +9 -8
  6. sempy_labs/_dictionary_diffs.py +221 -0
  7. sempy_labs/_environments.py +19 -1
  8. sempy_labs/_generate_semantic_model.py +1 -1
  9. sempy_labs/_helper_functions.py +358 -134
  10. sempy_labs/_kusto.py +25 -23
  11. sempy_labs/_list_functions.py +13 -35
  12. sempy_labs/_model_bpa_rules.py +13 -3
  13. sempy_labs/_notebooks.py +44 -11
  14. sempy_labs/_semantic_models.py +93 -1
  15. sempy_labs/_sql.py +4 -3
  16. sempy_labs/_tags.py +194 -0
  17. sempy_labs/_user_delegation_key.py +42 -0
  18. sempy_labs/_variable_libraries.py +89 -0
  19. sempy_labs/_vpax.py +388 -0
  20. sempy_labs/admin/__init__.py +8 -0
  21. sempy_labs/admin/_tags.py +126 -0
  22. sempy_labs/directlake/_generate_shared_expression.py +5 -1
  23. sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +55 -5
  24. sempy_labs/dotnet_lib/dotnet.runtime.config.json +10 -0
  25. sempy_labs/lakehouse/__init__.py +14 -0
  26. sempy_labs/lakehouse/_blobs.py +100 -85
  27. sempy_labs/lakehouse/_get_lakehouse_tables.py +1 -13
  28. sempy_labs/lakehouse/_helper.py +211 -0
  29. sempy_labs/lakehouse/_lakehouse.py +1 -1
  30. sempy_labs/lakehouse/_livy_sessions.py +137 -0
  31. sempy_labs/report/__init__.py +2 -0
  32. sempy_labs/report/_download_report.py +1 -1
  33. sempy_labs/report/_generate_report.py +5 -1
  34. sempy_labs/report/_report_helper.py +27 -128
  35. sempy_labs/report/_reportwrapper.py +1903 -1165
  36. sempy_labs/tom/_model.py +83 -21
  37. sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +0 -9
  38. sempy_labs/report/_bpareporttemplate/.platform +0 -11
  39. {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.10.0.dist-info}/licenses/LICENSE +0 -0
  40. {semantic_link_labs-0.9.10.dist-info → semantic_link_labs-0.10.0.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,7 @@ from sempy_labs._helper_functions import (
7
7
  )
8
8
  from sempy._utils._log import log
9
9
  from sempy_labs.tom import connect_semantic_model
10
- from typing import Optional
10
+ from typing import Optional, List
11
11
  import sempy_labs._icons as icons
12
12
  from uuid import UUID
13
13
  import re
@@ -19,7 +19,9 @@ def _extract_expression_list(expression):
19
19
  """
20
20
 
21
21
  pattern_sql = r'Sql\.Database\s*\(\s*"([^"]+)"\s*,\s*"([^"]+)"\s*\)'
22
- pattern_no_sql = r'AzureDataLakeStorage\s*\{\s*"server".*?:\s*onelake\.dfs\.fabric\.microsoft\.com"\s*,\s*"path"\s*:\s*"/([\da-fA-F-]+)\s*/\s*([\da-fA-F-]+)\s*/"\s*\}'
22
+ pattern_no_sql = (
23
+ r'AzureStorage\.DataLake\(".*?/([0-9a-fA-F\-]{36})/([0-9a-fA-F\-]{36})"'
24
+ )
23
25
 
24
26
  match_sql = re.search(pattern_sql, expression)
25
27
  match_no_sql = re.search(pattern_no_sql, expression)
@@ -102,6 +104,7 @@ def update_direct_lake_model_connection(
102
104
  source_type: str = "Lakehouse",
103
105
  source_workspace: Optional[str | UUID] = None,
104
106
  use_sql_endpoint: bool = True,
107
+ tables: Optional[str | List[str]] = None,
105
108
  ):
106
109
  """
107
110
  Remaps a Direct Lake semantic model's SQL Endpoint connection to a new lakehouse/warehouse.
@@ -126,12 +129,19 @@ def update_direct_lake_model_connection(
126
129
  use_sql_endpoint : bool, default=True
127
130
  If True, the SQL Endpoint will be used for the connection.
128
131
  If False, Direct Lake over OneLake will be used.
132
+ tables : str | List[str], default=None
133
+ The name(s) of the table(s) to update in the Direct Lake semantic model.
134
+ If None, all tables will be updated (if there is only one expression).
135
+ If multiple tables are specified, they must be provided as a list.
129
136
  """
130
137
  if use_sql_endpoint:
131
138
  icons.sll_tags.append("UpdateDLConnection_SQL")
132
139
  else:
133
140
  icons.sll_tags.append("UpdateDLConnection_DLOL")
134
141
 
142
+ if isinstance(tables, str):
143
+ tables = [tables]
144
+
135
145
  (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
136
146
  (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
137
147
 
@@ -174,7 +184,12 @@ def update_direct_lake_model_connection(
174
184
  )
175
185
 
176
186
  # Update the single connection expression
177
- if len(expressions) == 1:
187
+ if len(expressions) > 1 and not tables:
188
+ print(
189
+ f"{icons.info} Multiple expressions found in the model. Please specify the tables to update using the 'tables parameter."
190
+ )
191
+ return
192
+ elif len(expressions) == 1 and not tables:
178
193
  expr = expressions[0]
179
194
  tom.model.Expressions[expr].Expression = shared_expression
180
195
 
@@ -182,6 +197,41 @@ def update_direct_lake_model_connection(
182
197
  f"{icons.green_dot} The expression in the '{dataset_name}' semantic model within the '{workspace_name}' workspace has been updated to point to the '{source}' {source_type.lower()} in the '{source_workspace}' workspace."
183
198
  )
184
199
  else:
185
- print(
186
- f"{icons.info} Multiple expressions found in the model. Please use the update_direct_lake_partition_entity function to update specific tables."
200
+ import sempy
201
+
202
+ sempy.fabric._client._utils._init_analysis_services()
203
+ import Microsoft.AnalysisServices.Tabular as TOM
204
+
205
+ expr_list = _extract_expression_list(shared_expression)
206
+
207
+ expr_name = next(
208
+ (name for name, exp in expression_dict.items() if exp == expr_list),
209
+ None,
187
210
  )
211
+
212
+ # If the expression does not already exist, create it
213
+ def generate_unique_name(existing_names):
214
+ i = 1
215
+ while True:
216
+ candidate = f"DatabaseQuery{i}"
217
+ if candidate not in existing_names:
218
+ return candidate
219
+ i += 1
220
+
221
+ if not expr_name:
222
+ expr_name = generate_unique_name(expressions)
223
+ tom.add_expression(name=expr_name, expression=shared_expression)
224
+
225
+ all_tables = [t.Name for t in tom.model.Tables]
226
+ for t_name in tables:
227
+ if t_name not in all_tables:
228
+ raise ValueError(
229
+ f"{icons.red_dot} The table '{t_name}' does not exist in the '{dataset_name}' semantic model within the '{workspace_name}' workspace."
230
+ )
231
+ p = next(p for p in tom.model.Tables[t_name].Partitions)
232
+ if p.Mode != TOM.ModeType.DirectLake:
233
+ raise ValueError(
234
+ f"{icons.red_dot} The table '{t_name}' in the '{dataset_name}' semantic model within the '{workspace_name}' workspace is not in Direct Lake mode. This function is only applicable to Direct Lake tables."
235
+ )
236
+
237
+ p.Source.ExpressionSource = tom.model.Expressions[expr_name]
@@ -0,0 +1,10 @@
1
+ {
2
+ "runtimeOptions": {
3
+ "tfm": "net6.0",
4
+ "framework": {
5
+ "name": "Microsoft.NETCore.App",
6
+ "version": "6.0.0"
7
+ },
8
+ "rollForward": "Major"
9
+ }
10
+ }
@@ -21,6 +21,15 @@ from sempy_labs.lakehouse._blobs import (
21
21
  recover_lakehouse_object,
22
22
  list_blobs,
23
23
  )
24
+ from sempy_labs.lakehouse._livy_sessions import (
25
+ list_livy_sessions,
26
+ )
27
+ from sempy_labs.lakehouse._helper import (
28
+ is_v_ordered,
29
+ delete_lakehouse,
30
+ update_lakehouse,
31
+ load_table,
32
+ )
24
33
 
25
34
  __all__ = [
26
35
  "get_lakehouse_columns",
@@ -36,4 +45,9 @@ __all__ = [
36
45
  "list_shortcuts",
37
46
  "recover_lakehouse_object",
38
47
  "list_blobs",
48
+ "list_livy_sessions",
49
+ "is_v_ordered",
50
+ "delete_lakehouse",
51
+ "update_lakehouse",
52
+ "load_table",
39
53
  ]
@@ -11,6 +11,7 @@ from typing import Optional, List
11
11
  import sempy_labs._icons as icons
12
12
  import xml.etree.ElementTree as ET
13
13
  import pandas as pd
14
+ from sempy.fabric.exceptions import FabricHTTPException
14
15
 
15
16
 
16
17
  def _request_blob_api(
@@ -18,6 +19,7 @@ def _request_blob_api(
18
19
  method: str = "get",
19
20
  payload: Optional[dict] = None,
20
21
  status_codes: int | List[int] = 200,
22
+ uses_pagination: bool = False,
21
23
  ):
22
24
 
23
25
  import requests
@@ -31,21 +33,41 @@ def _request_blob_api(
31
33
 
32
34
  headers = {
33
35
  "Authorization": f"Bearer {token}",
34
- "Content-Type": "application/json",
36
+ "Content-Type": "application/xml",
35
37
  "x-ms-version": "2025-05-05",
36
38
  }
37
39
 
38
- response = requests.request(
39
- method.upper(),
40
- f"https://onelake.blob.fabric.microsoft.com/{request}",
41
- headers=headers,
42
- json=payload,
43
- )
40
+ base_url = "https://onelake.blob.fabric.microsoft.com/"
41
+ full_url = f"{base_url}{request}"
42
+ results = []
43
+
44
+ while True:
45
+ response = requests.request(
46
+ method.upper(),
47
+ full_url,
48
+ headers=headers,
49
+ data=payload if method.lower() != "get" else None,
50
+ )
51
+
52
+ if response.status_code not in status_codes:
53
+ raise FabricHTTPException(response)
54
+
55
+ if not uses_pagination:
56
+ return response
57
+
58
+ # Parse XML to find blobs and NextMarker
59
+ root = ET.fromstring(response.content)
60
+ results.append(root)
61
+
62
+ next_marker = root.findtext(".//NextMarker")
63
+ if not next_marker:
64
+ break # No more pages
44
65
 
45
- if response.status_code not in status_codes:
46
- raise FabricHTTPException(response)
66
+ # Append the marker to the original request (assuming query string format)
67
+ delimiter = "&" if "?" in request else "?"
68
+ full_url = f"{base_url}{request}{delimiter}marker={next_marker}"
47
69
 
48
- return response
70
+ return results
49
71
 
50
72
 
51
73
  @log
@@ -90,12 +112,6 @@ def list_blobs(
90
112
  )
91
113
  path_prefix = f"{workspace_id}/{lakehouse_id}/{container}"
92
114
 
93
- response = _request_blob_api(
94
- request=f"{path_prefix}?restype=container&comp=list&include=deleted"
95
- )
96
- root = ET.fromstring(response.content)
97
- response_json = _xml_to_dict(root)
98
-
99
115
  columns = {
100
116
  "Blob Name": "str",
101
117
  "Is Deleted": "bool",
@@ -122,37 +138,55 @@ def list_blobs(
122
138
 
123
139
  df = _create_dataframe(columns=columns)
124
140
 
125
- for blob in (
126
- response_json.get("EnumerationResults", {}).get("Blobs", {}).get("Blob", {})
127
- ):
128
- p = blob.get("Properties", {})
129
- new_data = {
130
- "Blob Name": blob.get("Name"),
131
- "Is Deleted": blob.get("Deleted", False),
132
- "Deletion Id": blob.get("DeletionId"),
133
- "Creation Time": p.get("Creation-Time"),
134
- "Expiry Time": p.get("Expiry-Time"),
135
- "Etag": p.get("Etag"),
136
- "Resource Type": p.get("ResourceType"),
137
- "Content Length": p.get("Content-Length"),
138
- "Content Type": p.get("Content-Type"),
139
- "Content Encoding": p.get("Content-Encoding"),
140
- "Content Language": p.get("Content-Language"),
141
- "Content CRC64": p.get("Content-CRC64"),
142
- "Content MD5": p.get("Content-MD5"),
143
- "Cache Control": p.get("Cache-Control"),
144
- "Content Disposition": p.get("Content-Disposition"),
145
- "Blob Type": p.get("BlobType"),
146
- "Access Tier": p.get("AccessTier"),
147
- "Access Tier Inferred": p.get("AccessTierInferred"),
148
- "Server Encrypted": p.get("ServerEncrypted"),
149
- "Deleted Time": p.get("DeletedTime"),
150
- "Remaining Retention Days": p.get("RemainingRetentionDays"),
151
- }
152
-
153
- df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
154
-
155
- _update_dataframe_datatypes(dataframe=df, column_map=columns)
141
+ url = f"{path_prefix}?restype=container&comp=list&include=deleted"
142
+
143
+ responses = _request_blob_api(
144
+ request=url,
145
+ uses_pagination=True,
146
+ )
147
+
148
+ dfs = []
149
+ for root in responses:
150
+ response_json = _xml_to_dict(root)
151
+
152
+ blobs = (
153
+ response_json.get("EnumerationResults", {}).get("Blobs", {}).get("Blob", [])
154
+ )
155
+
156
+ if isinstance(blobs, dict):
157
+ blobs = [blobs]
158
+
159
+ for blob in blobs:
160
+ p = blob.get("Properties", {})
161
+ new_data = {
162
+ "Blob Name": blob.get("Name"),
163
+ "Is Deleted": blob.get("Deleted", False),
164
+ "Deletion Id": blob.get("DeletionId"),
165
+ "Creation Time": p.get("Creation-Time"),
166
+ "Expiry Time": p.get("Expiry-Time"),
167
+ "Etag": p.get("Etag"),
168
+ "Resource Type": p.get("ResourceType"),
169
+ "Content Length": p.get("Content-Length"),
170
+ "Content Type": p.get("Content-Type"),
171
+ "Content Encoding": p.get("Content-Encoding"),
172
+ "Content Language": p.get("Content-Language"),
173
+ "Content CRC64": p.get("Content-CRC64"),
174
+ "Content MD5": p.get("Content-MD5"),
175
+ "Cache Control": p.get("Cache-Control"),
176
+ "Content Disposition": p.get("Content-Disposition"),
177
+ "Blob Type": p.get("BlobType"),
178
+ "Access Tier": p.get("AccessTier"),
179
+ "Access Tier Inferred": p.get("AccessTierInferred"),
180
+ "Server Encrypted": p.get("ServerEncrypted"),
181
+ "Deleted Time": p.get("DeletedTime"),
182
+ "Remaining Retention Days": p.get("RemainingRetentionDays"),
183
+ }
184
+
185
+ dfs.append(pd.DataFrame(new_data, index=[0]))
186
+
187
+ if dfs:
188
+ df = pd.concat(dfs, ignore_index=True)
189
+ _update_dataframe_datatypes(dataframe=df, column_map=columns)
156
190
 
157
191
  return df
158
192
 
@@ -182,7 +216,7 @@ def recover_lakehouse_object(
182
216
  workspace_id = resolve_workspace_id(workspace)
183
217
  lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
184
218
 
185
- blob_path_prefix = f"{lakehouse_id}/{file_path}"
219
+ blob_name = f"{lakehouse_id}/{file_path}"
186
220
 
187
221
  container = file_path.split("/")[0]
188
222
  if container not in ["Tables", "Files"]:
@@ -190,42 +224,23 @@ def recover_lakehouse_object(
190
224
  f"{icons.red_dot} Invalid container '{container}' within the file_path parameter. Expected 'Tables' or 'Files'."
191
225
  )
192
226
 
193
- df = list_blobs(lakehouse=lakehouse, workspace=workspace, container=container)
227
+ # Undelete the blob
228
+ print(f"{icons.in_progress} Attempting to recover the '{blob_name}' blob...")
194
229
 
195
- for _, r in df.iterrows():
196
- blob_name = r.get("Blob Name")
197
- is_deleted = r.get("Is Deleted")
198
- if blob_name.startswith(blob_path_prefix) and is_deleted:
199
- print(f"{icons.in_progress} Restoring the '{blob_name}' blob...")
200
- _request_blob_api(
201
- request=f"{workspace_id}/{lakehouse_id}/{file_path}?comp=undelete",
202
- method="put",
230
+ try:
231
+ _request_blob_api(
232
+ request=f"{workspace_id}/{lakehouse_id}/{file_path}?comp=undelete",
233
+ method="put",
234
+ )
235
+ print(
236
+ f"{icons.green_dot} The '{blob_name}' blob recover attempt was successful."
237
+ )
238
+ except FabricHTTPException as e:
239
+ if e.status_code == 404:
240
+ print(
241
+ f"{icons.warning} The '{blob_name}' blob was not found. No action taken."
242
+ )
243
+ else:
244
+ print(
245
+ f"{icons.red_dot} An error occurred while recovering the '{blob_name}' blob: {e}"
203
246
  )
204
- print(f"{icons.green_dot} The '{blob_name}' blob has been restored.")
205
-
206
-
207
- def _get_user_delegation_key():
208
-
209
- # https://learn.microsoft.com/rest/api/storageservices/get-user-delegation-key
210
-
211
- from datetime import datetime, timedelta, timezone
212
-
213
- utc_now = datetime.now(timezone.utc)
214
- start_time = utc_now + timedelta(minutes=2)
215
- expiry_time = start_time + timedelta(minutes=45)
216
- start_str = start_time.strftime("%Y-%m-%dT%H:%M:%SZ")
217
- expiry_str = expiry_time.strftime("%Y-%m-%dT%H:%M:%SZ")
218
-
219
- payload = f"""<?xml version="1.0" encoding="utf-8"?>
220
- <KeyInfo>
221
- <Start>{start_str}</Start>
222
- <Expiry>{expiry_str}</Expiry>
223
- </KeyInfo>"""
224
-
225
- response = _request_blob_api(
226
- request="restype=service&comp=userdelegationkey",
227
- method="post",
228
- payload=payload,
229
- )
230
-
231
- return response.content
@@ -9,8 +9,6 @@ from sempy_labs._helper_functions import (
9
9
  save_as_delta_table,
10
10
  _base_api,
11
11
  _create_dataframe,
12
- resolve_workspace_id,
13
- resolve_lakehouse_id,
14
12
  _read_delta_table,
15
13
  _get_delta_table,
16
14
  _mount,
@@ -85,16 +83,6 @@ def get_lakehouse_tables(
85
83
  if count_rows: # Setting countrows defaults to extended=True
86
84
  extended = True
87
85
 
88
- if (
89
- workspace_id != resolve_workspace_id()
90
- and lakehouse_id != resolve_lakehouse_id()
91
- and count_rows
92
- ):
93
- raise ValueError(
94
- f"{icons.red_dot} If 'count_rows' is set to True, you must run this function against the default lakehouse attached to the notebook. "
95
- "Count rows runs a spark query and cross-workspace spark queries are currently not supported."
96
- )
97
-
98
86
  responses = _base_api(
99
87
  request=f"v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables",
100
88
  uses_pagination=True,
@@ -123,7 +111,7 @@ def get_lakehouse_tables(
123
111
  if extended:
124
112
  sku_value = get_sku_size(workspace_id)
125
113
  guardrail = get_directlake_guardrails_for_sku(sku_value)
126
- local_path = _mount()
114
+ local_path = _mount(lakehouse=lakehouse_id, workspace=workspace_id)
127
115
 
128
116
  df["Files"], df["Row Groups"], df["Table Size"] = None, None, None
129
117
  if count_rows:
@@ -0,0 +1,211 @@
1
+ from uuid import UUID
2
+ from typing import Optional, Literal
3
+ import pyarrow.dataset as ds
4
+ from sempy_labs._helper_functions import (
5
+ _mount,
6
+ delete_item,
7
+ _base_api,
8
+ resolve_workspace_name_and_id,
9
+ resolve_lakehouse_name_and_id,
10
+ )
11
+ from sempy._utils._log import log
12
+ import sempy_labs._icons as icons
13
+ import os
14
+
15
+
16
+ @log
17
+ def is_v_ordered(
18
+ table_name: str,
19
+ lakehouse: Optional[str | UUID] = None,
20
+ workspace: Optional[str | UUID] = None,
21
+ schema: Optional[str] = None,
22
+ ) -> bool:
23
+ """
24
+ Checks if a delta table is v-ordered.
25
+
26
+ Parameters
27
+ ----------
28
+ table_name : str
29
+ The name of the table to check.
30
+ lakehouse : str | uuid.UUID, default=None
31
+ The Fabric lakehouse name or ID.
32
+ Defaults to None which resolves to the lakehouse attached to the notebook.
33
+ workspace : str | uuid.UUID, default=None
34
+ The Fabric workspace name or ID used by the lakehouse.
35
+ Defaults to None which resolves to the workspace of the attached lakehouse
36
+ or if no lakehouse attached, resolves to the workspace of the notebook.
37
+ schema : str, optional
38
+ The schema of the table to check. If not provided, the default schema is used.
39
+
40
+ Returns
41
+ -------
42
+ bool
43
+ True if the table is v-ordered, False otherwise.
44
+ """
45
+
46
+ local_path = _mount(lakehouse=lakehouse, workspace=workspace)
47
+ table_path = (
48
+ f"{local_path}/Tables/{schema}/{table_name}"
49
+ if schema
50
+ else f"{local_path}/Tables/{table_name}"
51
+ )
52
+ ds_schema = ds.dataset(table_path).schema.metadata
53
+
54
+ return any(b"vorder" in key for key in ds_schema.keys())
55
+
56
+
57
+ def delete_lakehouse(
58
+ lakehouse: str | UUID, workspace: Optional[str | UUID] = None
59
+ ) -> None:
60
+ """
61
+ Deletes a lakehouse.
62
+
63
+ This is a wrapper function for the following API: `Items - Delete Lakehouse <https://learn.microsoft.com/rest/api/fabric/lakehouse/items/delete-lakehouse>`_.
64
+
65
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
66
+
67
+ Parameters
68
+ ----------
69
+ lakehouse : str | uuid.UUID
70
+ The name or ID of the lakehouse to delete.
71
+ workspace : str | uuid.UUID, default=None
72
+ The Fabric workspace name or ID used by the lakehouse.
73
+ Defaults to None which resolves to the workspace of the attached lakehouse
74
+ or if no lakehouse attached, resolves to the workspace of the notebook.
75
+ """
76
+
77
+ delete_item(item=lakehouse, item_type="lakehouse", workspace=workspace)
78
+
79
+
80
+ def update_lakehouse(
81
+ name: Optional[str] = None,
82
+ description: Optional[str] = None,
83
+ lakehouse: Optional[str | UUID] = None,
84
+ workspace: Optional[str | UUID] = None,
85
+ ):
86
+ """
87
+ Updates a lakehouse.
88
+
89
+ This is a wrapper function for the following API: `Items - Update Lakehouse <https://learn.microsoft.com/rest/api/fabric/lakehouse/items/update-lakehouse>`_.
90
+
91
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
92
+
93
+ Parameters
94
+ ----------
95
+ name: str, default=None
96
+ The new name of the lakehouse.
97
+ Defaults to None which does not update the name.
98
+ description: str, default=None
99
+ The new description of the lakehouse.
100
+ Defaults to None which does not update the description.
101
+ lakehouse : str | uuid.UUID, default=None
102
+ The name or ID of the lakehouse to update.
103
+ Defaults to None which resolves to the lakehouse attached to the notebook.
104
+ workspace : str | uuid.UUID, default=None
105
+ The Fabric workspace name or ID used by the lakehouse.
106
+ Defaults to None which resolves to the workspace of the attached lakehouse
107
+ or if no lakehouse attached, resolves to the workspace of the notebook.
108
+ """
109
+
110
+ if not name and not description:
111
+ raise ValueError(
112
+ f"{icons.red_dot} Either name or description must be provided."
113
+ )
114
+
115
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
116
+ (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
117
+ lakehouse, workspace_id
118
+ )
119
+
120
+ payload = {}
121
+ if name:
122
+ payload["displayName"] = name
123
+ if description:
124
+ payload["description"] = description
125
+
126
+ _base_api(
127
+ request=f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}",
128
+ method="patch",
129
+ client="fabric_sp",
130
+ payload=payload,
131
+ )
132
+
133
+ print(
134
+ f"{icons.green_dot} The '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has been updated accordingly."
135
+ )
136
+
137
+
138
+ @log
139
+ def load_table(
140
+ table_name: str,
141
+ file_path: str,
142
+ mode: Literal["Overwrite", "Append"],
143
+ lakehouse: Optional[str | UUID] = None,
144
+ workspace: Optional[str | UUID] = None,
145
+ ):
146
+ """
147
+ Loads a table into a lakehouse. Currently only files are supported, not folders.
148
+
149
+ This is a wrapper function for the following API: `Tables - Load Table <https://learn.microsoft.com/rest/api/fabric/lakehouse/tables/load-table>`_.
150
+
151
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
152
+
153
+ Parameters
154
+ ----------
155
+ table_name : str
156
+ The name of the table to load.
157
+ file_path : str
158
+ The path to the data to load.
159
+ mode : Literal["Overwrite", "Append"]
160
+ The mode to use when loading the data.
161
+ "Overwrite" will overwrite the existing data.
162
+ "Append" will append the data to the existing data.
163
+ lakehouse : str | uuid.UUID, default=None
164
+ The name or ID of the lakehouse to load the table into.
165
+ Defaults to None which resolves to the lakehouse attached to the notebook.
166
+ workspace : str | uuid.UUID, default=None
167
+ The Fabric workspace name or ID used by the lakehouse.
168
+ Defaults to None which resolves to the workspace of the attached lakehouse
169
+ or if no lakehouse attached, resolves to the workspace of the notebook.
170
+ """
171
+
172
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
173
+ (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
174
+ lakehouse, workspace_id
175
+ )
176
+
177
+ file_extension = os.path.splitext(file_path)[1]
178
+
179
+ payload = {
180
+ "relativePath": file_path,
181
+ "pathType": "File",
182
+ "mode": mode,
183
+ "formatOptions": {},
184
+ }
185
+
186
+ if file_extension == ".csv":
187
+ payload["formatOptions"] = {"format": "Csv", "header": True, "delimiter": ","}
188
+ elif file_extension == ".parquet":
189
+ payload["formatOptions"] = {
190
+ "format": "Parquet",
191
+ "header": True,
192
+ }
193
+ # Solve for loading folders
194
+ # elif file_extension == '':
195
+ # payload['pathType'] = "Folder"
196
+ # payload["recursive"] = recursive
197
+ # payload['formatOptions']
198
+ else:
199
+ raise NotImplementedError()
200
+
201
+ _base_api(
202
+ request=f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables/{table_name}/load",
203
+ client="fabric_sp",
204
+ method="post",
205
+ status_codes=202,
206
+ lro_return_status_code=True,
207
+ )
208
+
209
+ print(
210
+ f"{icons.green_dot} The '{table_name}' table has been loaded into the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace."
211
+ )
@@ -113,7 +113,7 @@ def vacuum_lakehouse_tables(
113
113
  Parameters
114
114
  ----------
115
115
  tables : str | List[str] | None
116
- The table(s) to vacuum. If no tables are specified, all tables in the lakehouse will be optimized.
116
+ The table(s) to vacuum. If no tables are specified, all tables in the lakehouse will be vacuumed.
117
117
  lakehouse : str | uuid.UUID, default=None
118
118
  The Fabric lakehouse name or ID.
119
119
  Defaults to None which resolves to the lakehouse attached to the notebook.