semantic-link-labs 0.12.3__py3-none-any.whl → 0.12.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (36) hide show
  1. {semantic_link_labs-0.12.3.dist-info → semantic_link_labs-0.12.4.dist-info}/METADATA +4 -3
  2. {semantic_link_labs-0.12.3.dist-info → semantic_link_labs-0.12.4.dist-info}/RECORD +35 -29
  3. sempy_labs/__init__.py +10 -8
  4. sempy_labs/_a_lib_info.py +1 -1
  5. sempy_labs/_authentication.py +1 -1
  6. sempy_labs/_capacities.py +1 -1
  7. sempy_labs/_git.py +1 -1
  8. sempy_labs/_helper_functions.py +27 -4
  9. sempy_labs/_list_functions.py +55 -5
  10. sempy_labs/_managed_private_endpoints.py +1 -1
  11. sempy_labs/_notebooks.py +4 -2
  12. sempy_labs/_sql_audit_settings.py +208 -0
  13. sempy_labs/_sql_endpoints.py +18 -3
  14. sempy_labs/_utils.py +2 -0
  15. sempy_labs/admin/__init__.py +6 -0
  16. sempy_labs/admin/_items.py +3 -3
  17. sempy_labs/admin/_labels.py +211 -0
  18. sempy_labs/directlake/_warm_cache.py +3 -1
  19. sempy_labs/eventstream/__init__.py +37 -0
  20. sempy_labs/eventstream/_items.py +263 -0
  21. sempy_labs/eventstream/_topology.py +652 -0
  22. sempy_labs/graph/__init__.py +8 -0
  23. sempy_labs/graph/_groups.py +60 -53
  24. sempy_labs/graph/_sensitivity_labels.py +39 -0
  25. sempy_labs/graph/_teams.py +19 -18
  26. sempy_labs/graph/_user_licenses.py +96 -0
  27. sempy_labs/graph/_users.py +23 -16
  28. sempy_labs/lakehouse/_get_lakehouse_tables.py +33 -1
  29. sempy_labs/lakehouse/_lakehouse.py +6 -2
  30. sempy_labs/lakehouse/_partitioning.py +165 -0
  31. sempy_labs/report/_reportwrapper.py +15 -5
  32. sempy_labs/tom/_model.py +81 -4
  33. sempy_labs/_eventstreams.py +0 -123
  34. {semantic_link_labs-0.12.3.dist-info → semantic_link_labs-0.12.4.dist-info}/WHEEL +0 -0
  35. {semantic_link_labs-0.12.3.dist-info → semantic_link_labs-0.12.4.dist-info}/licenses/LICENSE +0 -0
  36. {semantic_link_labs-0.12.3.dist-info → semantic_link_labs-0.12.4.dist-info}/top_level.txt +0 -0
@@ -55,7 +55,7 @@ def list_groups() -> pd.DataFrame:
55
55
  A pandas dataframe showing a list of groups and their properties.
56
56
  """
57
57
 
58
- result = _base_api(request="groups", client="graph").json()
58
+ result = _base_api(request="groups", client="graph", uses_pagination=True)
59
59
 
60
60
  columns = {
61
61
  "Group Id": "string",
@@ -76,24 +76,25 @@ def list_groups() -> pd.DataFrame:
76
76
  df = _create_dataframe(columns=columns)
77
77
 
78
78
  rows = []
79
- for v in result.get("value"):
80
- rows.append(
81
- {
82
- "Group Id": v.get("id"),
83
- "Group Name": v.get("displayName"),
84
- "Mail": v.get("mail"),
85
- "Description": v.get("description"),
86
- "Classification": v.get("classification"),
87
- "Mail Enabled": v.get("mailEnabled"),
88
- "Security Enabled": v.get("securityEnabled"),
89
- "Created Date Time": v.get("createdDateTime"),
90
- "Expiration Date Time": v.get("expirationDateTime"),
91
- "Renewed Date Time": v.get("renewedDateTime"),
92
- "Deleted Date Time": v.get("deletedDateTime"),
93
- "Visibility": v.get("visibility"),
94
- "Security Identifier": v.get("securityIdentifier"),
95
- }
96
- )
79
+ for r in result:
80
+ for v in r.get("value", []):
81
+ rows.append(
82
+ {
83
+ "Group Id": v.get("id"),
84
+ "Group Name": v.get("displayName"),
85
+ "Mail": v.get("mail"),
86
+ "Description": v.get("description"),
87
+ "Classification": v.get("classification"),
88
+ "Mail Enabled": v.get("mailEnabled"),
89
+ "Security Enabled": v.get("securityEnabled"),
90
+ "Created Date Time": v.get("createdDateTime"),
91
+ "Expiration Date Time": v.get("expirationDateTime"),
92
+ "Renewed Date Time": v.get("renewedDateTime"),
93
+ "Deleted Date Time": v.get("deletedDateTime"),
94
+ "Visibility": v.get("visibility"),
95
+ "Security Identifier": v.get("securityIdentifier"),
96
+ }
97
+ )
97
98
 
98
99
  if rows:
99
100
  df = pd.DataFrame(rows, columns=list(columns.keys()))
@@ -190,7 +191,9 @@ def list_group_members(group: str | UUID) -> pd.DataFrame:
190
191
 
191
192
  group_id = resolve_group_id(group)
192
193
 
193
- result = _base_api(request=f"groups/{group_id}/members", client="graph").json()
194
+ result = _base_api(
195
+ request=f"groups/{group_id}/members", client="graph", uses_pagination=True
196
+ )
194
197
 
195
198
  columns = {
196
199
  "Member Id": "string",
@@ -209,22 +212,23 @@ def list_group_members(group: str | UUID) -> pd.DataFrame:
209
212
  df = _create_dataframe(columns=columns)
210
213
 
211
214
  rows = []
212
- for v in result.get("value"):
213
- rows.append(
214
- {
215
- "Member Id": v.get("id"),
216
- "Member Name": v.get("displayName"),
217
- "User Principal Name": v.get("userPrincipalName"),
218
- "Mail": v.get("mail"),
219
- "Job Title": v.get("jobTitle"),
220
- "Office Location": v.get("officeLocation"),
221
- "Mobile Phone": v.get("mobilePhone"),
222
- "Business Phones": str(v.get("businessPhones")),
223
- "Preferred Language": v.get("preferredLanguage"),
224
- "Given Name": v.get("givenName"),
225
- "Surname": v.get("surname"),
226
- }
227
- )
215
+ for r in result:
216
+ for v in r.get("value", []):
217
+ rows.append(
218
+ {
219
+ "Member Id": v.get("id"),
220
+ "Member Name": v.get("displayName"),
221
+ "User Principal Name": v.get("userPrincipalName"),
222
+ "Mail": v.get("mail"),
223
+ "Job Title": v.get("jobTitle"),
224
+ "Office Location": v.get("officeLocation"),
225
+ "Mobile Phone": v.get("mobilePhone"),
226
+ "Business Phones": str(v.get("businessPhones")),
227
+ "Preferred Language": v.get("preferredLanguage"),
228
+ "Given Name": v.get("givenName"),
229
+ "Surname": v.get("surname"),
230
+ }
231
+ )
228
232
 
229
233
  if rows:
230
234
  df = pd.DataFrame(rows, columns=list(columns.keys()))
@@ -254,7 +258,9 @@ def list_group_owners(group: str | UUID) -> pd.DataFrame:
254
258
 
255
259
  group_id = resolve_group_id(group)
256
260
 
257
- result = _base_api(request=f"groups/{group_id}/owners", client="graph").json()
261
+ result = _base_api(
262
+ request=f"groups/{group_id}/owners", client="graph", uses_pagination=True
263
+ )
258
264
 
259
265
  columns = {
260
266
  "Owner Id": "string",
@@ -273,22 +279,23 @@ def list_group_owners(group: str | UUID) -> pd.DataFrame:
273
279
  df = _create_dataframe(columns=columns)
274
280
 
275
281
  rows = []
276
- for v in result.get("value"):
277
- rows.append(
278
- {
279
- "Owner Id": v.get("id"),
280
- "Owner Name": v.get("displayName"),
281
- "User Principal Name": v.get("userPrincipalName"),
282
- "Mail": v.get("mail"),
283
- "Job Title": v.get("jobTitle"),
284
- "Office Location": v.get("officeLocation"),
285
- "Mobile Phone": v.get("mobilePhone"),
286
- "Business Phones": str(v.get("businessPhones")),
287
- "Preferred Language": v.get("preferredLanguage"),
288
- "Given Name": v.get("givenName"),
289
- "Surname": v.get("surname"),
290
- }
291
- )
282
+ for r in result:
283
+ for v in r.get("value", []):
284
+ rows.append(
285
+ {
286
+ "Owner Id": v.get("id"),
287
+ "Owner Name": v.get("displayName"),
288
+ "User Principal Name": v.get("userPrincipalName"),
289
+ "Mail": v.get("mail"),
290
+ "Job Title": v.get("jobTitle"),
291
+ "Office Location": v.get("officeLocation"),
292
+ "Mobile Phone": v.get("mobilePhone"),
293
+ "Business Phones": str(v.get("businessPhones")),
294
+ "Preferred Language": v.get("preferredLanguage"),
295
+ "Given Name": v.get("givenName"),
296
+ "Surname": v.get("surname"),
297
+ }
298
+ )
292
299
 
293
300
  if rows:
294
301
  df = pd.DataFrame(rows, columns=list(columns.keys()))
@@ -5,6 +5,7 @@ from sempy_labs._helper_functions import (
5
5
  _base_api,
6
6
  _create_dataframe,
7
7
  _update_dataframe_datatypes,
8
+ _is_valid_uuid,
8
9
  )
9
10
  from sempy._utils._log import log
10
11
 
@@ -79,3 +80,41 @@ def list_sensitivity_labels(user: Optional[str | UUID] = None) -> pd.DataFrame:
79
80
  _update_dataframe_datatypes(dataframe=df, column_map=columns)
80
81
 
81
82
  return df
83
+
84
+
85
+ @log
86
+ def resolve_sensitivity_label_id(
87
+ label: str | UUID, user: Optional[str | UUID] = None
88
+ ) -> UUID | None:
89
+ """
90
+ Resolve a sensitivity label name or ID to its corresponding sensitivity label ID.
91
+
92
+ Service Principal Authentication is required (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
93
+
94
+ Parameters
95
+ ----------
96
+ label : str | uuid.UUID
97
+ The name or ID of the sensitivity label.
98
+ user : str | uuid.UUID, default=None
99
+ The user ID or user principal name.
100
+
101
+ Returns
102
+ -------
103
+ uuid.UUID | None
104
+ The ID of the sensitivity label if found, otherwise None.
105
+ """
106
+
107
+ if _is_valid_uuid(label):
108
+ return str(label)
109
+
110
+ df = list_sensitivity_labels(user=user)
111
+
112
+ if df.empty:
113
+ return None
114
+
115
+ # Try to find the label by name
116
+ label_row = df[df["Sensitivity Label Name"] == label]
117
+ if not label_row.empty:
118
+ return label_row["Sensitivity Label Id"].iloc[0]
119
+
120
+ return None
@@ -23,7 +23,7 @@ def list_teams() -> pd.DataFrame:
23
23
  A pandas dataframe showing a list of teams and their properties.
24
24
  """
25
25
 
26
- result = _base_api(request="teams", client="graph").json()
26
+ result = _base_api(request="teams", client="graph", uses_pagination=True)
27
27
 
28
28
  columns = {
29
29
  "Team Id": "str",
@@ -43,23 +43,24 @@ def list_teams() -> pd.DataFrame:
43
43
  df = _create_dataframe(columns=columns)
44
44
 
45
45
  rows = []
46
- for v in result.get("value"):
47
- rows.append(
48
- {
49
- "Team Id": v.get("id"),
50
- "Team Name": v.get("displayName"),
51
- "Description": v.get("description"),
52
- "Creation Date Time": v.get("createdDateTime"),
53
- "Classification": v.get("classification"),
54
- "Specialization": v.get("specialization"),
55
- "Visibility": v.get("visibility"),
56
- "Web Url": v.get("webUrl"),
57
- "Archived": v.get("isArchived"),
58
- "Favorite By Me": v.get("isFavoriteByMe"),
59
- "Discoverable By Me": v.get("isDiscoverableByMe"),
60
- "Member Count": v.get("memberCount"),
61
- }
62
- )
46
+ for r in result:
47
+ for v in r.get("value", []):
48
+ rows.append(
49
+ {
50
+ "Team Id": v.get("id"),
51
+ "Team Name": v.get("displayName"),
52
+ "Description": v.get("description"),
53
+ "Creation Date Time": v.get("createdDateTime"),
54
+ "Classification": v.get("classification"),
55
+ "Specialization": v.get("specialization"),
56
+ "Visibility": v.get("visibility"),
57
+ "Web Url": v.get("webUrl"),
58
+ "Archived": v.get("isArchived"),
59
+ "Favorite By Me": v.get("isFavoriteByMe"),
60
+ "Discoverable By Me": v.get("isDiscoverableByMe"),
61
+ "Member Count": v.get("memberCount"),
62
+ }
63
+ )
63
64
 
64
65
  if rows:
65
66
  df = pd.DataFrame(rows, columns=list(columns.keys()))
@@ -0,0 +1,96 @@
1
+ from uuid import UUID
2
+ import sempy_labs._icons as icons
3
+ from typing import List, Optional
4
+ from sempy_labs._helper_functions import (
5
+ _base_api,
6
+ )
7
+ from sempy._utils._log import log
8
+ from sempy_labs.graph._users import resolve_user_id
9
+
10
+
11
+ @log
12
+ def add_user_license(
13
+ user: str | UUID, sku_id: UUID, disabled_plans: Optional[UUID | List[UUID]] = None
14
+ ):
15
+ """
16
+ Assigns a license to a user.
17
+
18
+ This is a wrapper function for the following API: `user: assignLicense <https://learn.microsoft.com/graph/api/user-assignlicense>`_.
19
+
20
+ Service Principal Authentication is required (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
21
+
22
+ Parameters
23
+ ----------
24
+ user : str | uuid.UUID
25
+ The user ID or user principal name.
26
+ sku_id : uuid.UUID
27
+ The SKU ID of the license to assign.
28
+ disabled_plans : Optional[uuid.UUID | List[uuid.UUID]], default=None
29
+ A single service plan ID or a list of service plan IDs to disable within the assigned license.
30
+ """
31
+
32
+ user_id = resolve_user_id(user)
33
+
34
+ payload = {
35
+ "addLicenses": [
36
+ {
37
+ "skuId": sku_id,
38
+ },
39
+ ],
40
+ "removeLicenses": [],
41
+ }
42
+
43
+ if disabled_plans:
44
+ if isinstance(disabled_plans, str):
45
+ disabled_plans = [disabled_plans]
46
+ payload["addLicenses"][0]["disabledPlans"] = disabled_plans
47
+
48
+ _base_api(
49
+ request=f"users/{user_id}/assignLicense",
50
+ client="graph",
51
+ method="post",
52
+ payload=payload,
53
+ )
54
+
55
+ print(
56
+ f"{icons.green_dot} The '{sku_id}' license has been assigned to the user '{user}'."
57
+ )
58
+
59
+
60
+ @log
61
+ def remove_user_license(user: str | UUID, sku_ids: UUID | List[UUID]):
62
+ """
63
+ Removes a license from a user.
64
+
65
+ This is a wrapper function for the following API: `user: assignLicense <https://learn.microsoft.com/graph/api/user-assignlicense>`_.
66
+
67
+ Service Principal Authentication is required (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
68
+
69
+ Parameters
70
+ ----------
71
+ user : str | uuid.UUID
72
+ The user ID or user principal name.
73
+ sku_id : uuid.UUID
74
+ The SKU ID of the license to remove.
75
+ """
76
+
77
+ user_id = resolve_user_id(user)
78
+
79
+ if isinstance(sku_ids, str):
80
+ sku_ids = [sku_ids]
81
+
82
+ payload = {
83
+ "addLicenses": [],
84
+ "removeLicenses": sku_ids,
85
+ }
86
+
87
+ _base_api(
88
+ request=f"users/{user_id}/assignLicense",
89
+ client="graph",
90
+ method="post",
91
+ payload=payload,
92
+ )
93
+
94
+ print(
95
+ f"{icons.green_dot} The '{', '.join([str(s) for s in sku_ids])}' license(s) have been removed from the user '{user}'."
96
+ )
@@ -8,6 +8,7 @@ from .._helper_functions import (
8
8
  _is_valid_uuid,
9
9
  _base_api,
10
10
  _create_dataframe,
11
+ _update_dataframe_datatypes,
11
12
  _mount,
12
13
  )
13
14
  from sempy._utils._log import log
@@ -91,7 +92,7 @@ def list_users() -> pd.DataFrame:
91
92
  A pandas dataframe showing a list of users and their properties.
92
93
  """
93
94
 
94
- result = _base_api(request="users", client="graph").json()
95
+ result = _base_api(request="users", client="graph", uses_pagination=True)
95
96
 
96
97
  columns = {
97
98
  "User Id": "string",
@@ -108,21 +109,27 @@ def list_users() -> pd.DataFrame:
108
109
 
109
110
  df = _create_dataframe(columns=columns)
110
111
 
111
- for v in result.get("value"):
112
- new_data = {
113
- "User Id": v.get("id"),
114
- "User Principal Name": v.get("userPrincipalName"),
115
- "User Name": v.get("displayName"),
116
- "Mail": v.get("mail"),
117
- "Job Title": v.get("jobTitle"),
118
- "Office Location": v.get("officeLocation"),
119
- "Mobile Phone": v.get("mobilePhone"),
120
- "Business Phones": str(v.get("businessPhones")),
121
- "Preferred Language": v.get("preferredLanguage"),
122
- "Surname": v.get("surname"),
123
- }
124
-
125
- df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
112
+ rows = []
113
+ for r in result:
114
+ for v in r.get("value", []):
115
+ rows.append(
116
+ {
117
+ "User Id": v.get("id"),
118
+ "User Principal Name": v.get("userPrincipalName"),
119
+ "User Name": v.get("displayName"),
120
+ "Mail": v.get("mail"),
121
+ "Job Title": v.get("jobTitle"),
122
+ "Office Location": v.get("officeLocation"),
123
+ "Mobile Phone": v.get("mobilePhone"),
124
+ "Business Phones": str(v.get("businessPhones")),
125
+ "Preferred Language": v.get("preferredLanguage"),
126
+ "Surname": v.get("surname"),
127
+ }
128
+ )
129
+
130
+ if rows:
131
+ df = pd.DataFrame(rows, columns=list(columns.keys()))
132
+ _update_dataframe_datatypes(dataframe=df, column_map=columns)
126
133
 
127
134
  return df
128
135
 
@@ -33,6 +33,7 @@ def get_lakehouse_tables(
33
33
  extended: bool = False,
34
34
  count_rows: bool = False,
35
35
  export: bool = False,
36
+ exclude_shortcuts: bool = False,
36
37
  ) -> pd.DataFrame:
37
38
  """
38
39
  Shows the tables of a lakehouse and their respective properties. Option to include additional properties relevant to Direct Lake guardrails.
@@ -60,6 +61,8 @@ def get_lakehouse_tables(
60
61
  Obtains a row count for each lakehouse table.
61
62
  export : bool, default=False
62
63
  Exports the resulting dataframe to a delta table in the lakehouse.
64
+ exclude_shortcuts : bool, default=False
65
+ If True, excludes shortcuts.
63
66
 
64
67
  Returns
65
68
  -------
@@ -83,6 +86,9 @@ def get_lakehouse_tables(
83
86
  lakehouse=lakehouse, workspace=workspace_id
84
87
  )
85
88
 
89
+ # Test if valid lakehouse:
90
+ x = _base_api(f"v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}")
91
+
86
92
  if count_rows: # Setting countrows defaults to extended=True
87
93
  extended = True
88
94
 
@@ -94,7 +100,7 @@ def get_lakehouse_tables(
94
100
  client="fabric_sp",
95
101
  )
96
102
 
97
- except Exception as e:
103
+ except Exception:
98
104
  API_called = False
99
105
 
100
106
  rows = []
@@ -246,6 +252,32 @@ def get_lakehouse_tables(
246
252
  df["Row Count"] = df["Row Count"].astype(int)
247
253
  df["Row Count Guardrail Hit"] = df["Row Count"] > df["Row Count Guardrail"]
248
254
 
255
+ if exclude_shortcuts:
256
+ from sempy_labs.lakehouse._shortcuts import list_shortcuts
257
+
258
+ # Exclude shortcuts
259
+ shortcuts = (
260
+ list_shortcuts(lakehouse=lakehouse, workspace=workspace)
261
+ .query("`Shortcut Path`.str.startswith('/Tables')", engine="python")
262
+ .assign(
263
+ FullPath=lambda df: df["Shortcut Path"].str.rstrip("/")
264
+ + "/"
265
+ + df["Shortcut Name"]
266
+ )["FullPath"]
267
+ .tolist()
268
+ )
269
+
270
+ df["FullPath"] = df.apply(
271
+ lambda x: (
272
+ f"/Tables/{x['Table Name']}"
273
+ if pd.isna(x["Schema Name"]) or x["Schema Name"] == ""
274
+ else f"/Tables/{x['Schema Name']}/{x['Table Name']}"
275
+ ),
276
+ axis=1,
277
+ )
278
+
279
+ df = df[~df["FullPath"].isin(shortcuts)].reset_index(drop=True)
280
+
249
281
  if export:
250
282
  if not lakehouse_attached():
251
283
  raise ValueError(
@@ -93,7 +93,9 @@ def optimize_lakehouse_tables(
93
93
 
94
94
  from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
95
95
 
96
- df = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
96
+ df = get_lakehouse_tables(
97
+ lakehouse=lakehouse, workspace=workspace, exclude_shortcuts=True
98
+ )
97
99
  df_delta = df[df["Format"] == "delta"]
98
100
 
99
101
  if isinstance(tables, str):
@@ -142,7 +144,9 @@ def vacuum_lakehouse_tables(
142
144
 
143
145
  from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
144
146
 
145
- df = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
147
+ df = get_lakehouse_tables(
148
+ lakehouse=lakehouse, workspace=workspace, exclude_shortcuts=True
149
+ )
146
150
  df_delta = df[df["Format"] == "delta"]
147
151
 
148
152
  if isinstance(tables, str):
@@ -0,0 +1,165 @@
1
+ from typing import Optional, List
2
+ from uuid import UUID
3
+ from sempy_labs._helper_functions import (
4
+ _create_spark_session,
5
+ create_abfss_path,
6
+ resolve_workspace_id,
7
+ resolve_lakehouse_id,
8
+ _get_delta_table,
9
+ )
10
+ from sempy._utils._log import log
11
+
12
+
13
+ @log
14
+ def _get_partitions(
15
+ table_name: str,
16
+ schema_name: Optional[str] = None,
17
+ lakehouse: Optional[str | UUID] = None,
18
+ workspace: Optional[str | UUID] = None,
19
+ ):
20
+
21
+ workspace_id = resolve_workspace_id(workspace)
22
+ lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
23
+ path = create_abfss_path(lakehouse_id, workspace_id, table_name, schema_name)
24
+
25
+ delta_table = _get_delta_table(path)
26
+ details_df = delta_table.detail()
27
+
28
+ return details_df.collect()[0].asDict()
29
+
30
+
31
+ @log
32
+ def is_partitioned(
33
+ table: str,
34
+ schema: Optional[str] = None,
35
+ lakehouse: Optional[str | UUID] = None,
36
+ workspace: Optional[str | UUID] = None,
37
+ ) -> bool:
38
+ """
39
+ Checks if a delta table is partitioned.
40
+
41
+ Parameters
42
+ ----------
43
+ table : str
44
+ The name of the delta table.
45
+ schema : str, optional
46
+ The schema of the table to check. If not provided, the default schema is used.
47
+ lakehouse : str | uuid.UUID, default=None
48
+ The Fabric lakehouse name or ID.
49
+ Defaults to None which resolves to the lakehouse attached to the notebook.
50
+ workspace : str | uuid.UUID, default=None
51
+ The Fabric workspace name or ID used by the lakehouse.
52
+ Defaults to None which resolves to the workspace of the attached lakehouse
53
+ or if no lakehouse attached, resolves to the workspace of the notebook.
54
+
55
+ Returns
56
+ -------
57
+ bool
58
+ True if the table is partitioned, False otherwise.
59
+ """
60
+
61
+ details = _get_partitions(
62
+ table_name=table, schema_name=schema, lakehouse=lakehouse, workspace=workspace
63
+ )
64
+ return len(details["partitionColumns"]) > 0
65
+
66
+
67
+ @log
68
+ def list_partitioned_columns(
69
+ table: str,
70
+ schema: Optional[str] = None,
71
+ lakehouse: Optional[str | UUID] = None,
72
+ workspace: Optional[str | UUID] = None,
73
+ ) -> List[str]:
74
+ """
75
+ Lists the partitioned columns of a delta table.
76
+
77
+ Parameters
78
+ ----------
79
+ table : str
80
+ The name of the delta table.
81
+ schema : str, optional
82
+ The schema of the table to check. If not provided, the default schema is used.
83
+ lakehouse : str | uuid.UUID, default=None
84
+ The Fabric lakehouse name or ID.
85
+ Defaults to None which resolves to the lakehouse attached to the notebook.
86
+ workspace : str | uuid.UUID, default=None
87
+ The Fabric workspace name or ID used by the lakehouse.
88
+ Defaults to None which resolves to the workspace of the attached lakehouse
89
+ or if no lakehouse attached, resolves to the workspace of the notebook.
90
+
91
+ Returns
92
+ -------
93
+ List[str]
94
+ The list of partitioned columns.
95
+ """
96
+
97
+ details = _get_partitions(
98
+ table_name=table, schema_name=schema, lakehouse=lakehouse, workspace=workspace
99
+ )
100
+
101
+ return details["partitionColumns"]
102
+
103
+
104
+ @log
105
+ def is_over_partitioned(
106
+ table: str,
107
+ schema: Optional[str] = None,
108
+ lakehouse: Optional[str | UUID] = None,
109
+ workspace: Optional[str | UUID] = None,
110
+ total_table_size_gb: int = 1000,
111
+ average_partition_size_gb: int = 1,
112
+ ) -> bool:
113
+ """
114
+ Checks if a delta table is over-partitioned.
115
+
116
+ Parameters
117
+ ----------
118
+ table : str
119
+ The name of the delta table.
120
+ schema : str, optional
121
+ The schema of the table to check. If not provided, the default schema is used.
122
+ lakehouse : str | uuid.UUID, default=None
123
+ The Fabric lakehouse name or ID.
124
+ Defaults to None which resolves to the lakehouse attached to the notebook.
125
+ workspace : str | uuid.UUID, default=None
126
+ The Fabric workspace name or ID used by the lakehouse.
127
+ Defaults to None which resolves to the workspace of the attached lakehouse
128
+ or if no lakehouse attached, resolves to the workspace of the notebook.
129
+ total_table_size_gb : int, default=1000
130
+ Threshold for total table size in GB (default 1TB).
131
+ average_partition_size_gb : int, default=1
132
+ Threshold for average partition size in GB.
133
+
134
+ Returns
135
+ -------
136
+ bool
137
+ True if the table is over-partitioned, False otherwise.
138
+ """
139
+
140
+ workspace_id = resolve_workspace_id(workspace)
141
+ lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
142
+ path = create_abfss_path(lakehouse_id, workspace_id, table, schema)
143
+ # Get DeltaTable details
144
+ spark = _create_spark_session()
145
+ details_df = spark.sql(f"DESCRIBE DETAIL delta.`{path}`")
146
+ details = details_df.collect()[0].asDict()
147
+
148
+ # Extract relevant fields
149
+ size_bytes = details["sizeInBytes"]
150
+ partition_cols = details["partitionColumns"]
151
+ num_files = details["numFiles"]
152
+
153
+ total_size_gb = size_bytes / (1024**3)
154
+
155
+ # Only check if the table is partitioned
156
+ if len(partition_cols) > 0 and num_files > 0:
157
+ avg_partition_size_gb = total_size_gb / num_files
158
+
159
+ if (
160
+ total_size_gb < total_table_size_gb
161
+ or avg_partition_size_gb < average_partition_size_gb
162
+ ):
163
+ return True
164
+
165
+ return False