PyPI - semantic-link-labs - Versions diffs - 0.12.3__py3-none-any.whl → 0.12.4__py3-none-any.whl - Mend

semantic-link-labs 0.12.3py3-none-any.whl → 0.12.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (36) hide show

{semantic_link_labs-0.12.3.dist-info → semantic_link_labs-0.12.4.dist-info}/METADATA +4 -3
{semantic_link_labs-0.12.3.dist-info → semantic_link_labs-0.12.4.dist-info}/RECORD +35 -29
sempy_labs/__init__.py +10 -8
sempy_labs/_a_lib_info.py +1 -1
sempy_labs/_authentication.py +1 -1
sempy_labs/_capacities.py +1 -1
sempy_labs/_git.py +1 -1
sempy_labs/_helper_functions.py +27 -4
sempy_labs/_list_functions.py +55 -5
sempy_labs/_managed_private_endpoints.py +1 -1
sempy_labs/_notebooks.py +4 -2
sempy_labs/_sql_audit_settings.py +208 -0
sempy_labs/_sql_endpoints.py +18 -3
sempy_labs/_utils.py +2 -0
sempy_labs/admin/__init__.py +6 -0
sempy_labs/admin/_items.py +3 -3
sempy_labs/admin/_labels.py +211 -0
sempy_labs/directlake/_warm_cache.py +3 -1
sempy_labs/eventstream/__init__.py +37 -0
sempy_labs/eventstream/_items.py +263 -0
sempy_labs/eventstream/_topology.py +652 -0
sempy_labs/graph/__init__.py +8 -0
sempy_labs/graph/_groups.py +60 -53
sempy_labs/graph/_sensitivity_labels.py +39 -0
sempy_labs/graph/_teams.py +19 -18
sempy_labs/graph/_user_licenses.py +96 -0
sempy_labs/graph/_users.py +23 -16
sempy_labs/lakehouse/_get_lakehouse_tables.py +33 -1
sempy_labs/lakehouse/_lakehouse.py +6 -2
sempy_labs/lakehouse/_partitioning.py +165 -0
sempy_labs/report/_reportwrapper.py +15 -5
sempy_labs/tom/_model.py +81 -4
sempy_labs/_eventstreams.py +0 -123
{semantic_link_labs-0.12.3.dist-info → semantic_link_labs-0.12.4.dist-info}/WHEEL +0 -0
{semantic_link_labs-0.12.3.dist-info → semantic_link_labs-0.12.4.dist-info}/licenses/LICENSE +0 -0
{semantic_link_labs-0.12.3.dist-info → semantic_link_labs-0.12.4.dist-info}/top_level.txt +0 -0

sempy_labs/graph/_groups.py CHANGED Viewed

@@ -55,7 +55,7 @@ def list_groups() -> pd.DataFrame:
         A pandas dataframe showing a list of groups and their properties.
     """
-    result = _base_api(request="groups", client="graph").json()
+    result = _base_api(request="groups", client="graph", uses_pagination=True)
     columns = {
         "Group Id": "string",
@@ -76,24 +76,25 @@ def list_groups() -> pd.DataFrame:
     df = _create_dataframe(columns=columns)
     rows = []
-    for v in result.get("value"):
-        rows.append(
-            {
-                "Group Id": v.get("id"),
-                "Group Name": v.get("displayName"),
-                "Mail": v.get("mail"),
-                "Description": v.get("description"),
-                "Classification": v.get("classification"),
-                "Mail Enabled": v.get("mailEnabled"),
-                "Security Enabled": v.get("securityEnabled"),
-                "Created Date Time": v.get("createdDateTime"),
-                "Expiration Date Time": v.get("expirationDateTime"),
-                "Renewed Date Time": v.get("renewedDateTime"),
-                "Deleted Date Time": v.get("deletedDateTime"),
-                "Visibility": v.get("visibility"),
-                "Security Identifier": v.get("securityIdentifier"),
-            }
-        )
+    for r in result:
+        for v in r.get("value", []):
+            rows.append(
+                {
+                    "Group Id": v.get("id"),
+                    "Group Name": v.get("displayName"),
+                    "Mail": v.get("mail"),
+                    "Description": v.get("description"),
+                    "Classification": v.get("classification"),
+                    "Mail Enabled": v.get("mailEnabled"),
+                    "Security Enabled": v.get("securityEnabled"),
+                    "Created Date Time": v.get("createdDateTime"),
+                    "Expiration Date Time": v.get("expirationDateTime"),
+                    "Renewed Date Time": v.get("renewedDateTime"),
+                    "Deleted Date Time": v.get("deletedDateTime"),
+                    "Visibility": v.get("visibility"),
+                    "Security Identifier": v.get("securityIdentifier"),
+                }
+            )
     if rows:
         df = pd.DataFrame(rows, columns=list(columns.keys()))
@@ -190,7 +191,9 @@ def list_group_members(group: str | UUID) -> pd.DataFrame:
     group_id = resolve_group_id(group)
-    result = _base_api(request=f"groups/{group_id}/members", client="graph").json()
+    result = _base_api(
+        request=f"groups/{group_id}/members", client="graph", uses_pagination=True
+    )
     columns = {
         "Member Id": "string",
@@ -209,22 +212,23 @@ def list_group_members(group: str | UUID) -> pd.DataFrame:
     df = _create_dataframe(columns=columns)
     rows = []
-    for v in result.get("value"):
-        rows.append(
-            {
-                "Member Id": v.get("id"),
-                "Member Name": v.get("displayName"),
-                "User Principal Name": v.get("userPrincipalName"),
-                "Mail": v.get("mail"),
-                "Job Title": v.get("jobTitle"),
-                "Office Location": v.get("officeLocation"),
-                "Mobile Phone": v.get("mobilePhone"),
-                "Business Phones": str(v.get("businessPhones")),
-                "Preferred Language": v.get("preferredLanguage"),
-                "Given Name": v.get("givenName"),
-                "Surname": v.get("surname"),
-            }
-        )
+    for r in result:
+        for v in r.get("value", []):
+            rows.append(
+                {
+                    "Member Id": v.get("id"),
+                    "Member Name": v.get("displayName"),
+                    "User Principal Name": v.get("userPrincipalName"),
+                    "Mail": v.get("mail"),
+                    "Job Title": v.get("jobTitle"),
+                    "Office Location": v.get("officeLocation"),
+                    "Mobile Phone": v.get("mobilePhone"),
+                    "Business Phones": str(v.get("businessPhones")),
+                    "Preferred Language": v.get("preferredLanguage"),
+                    "Given Name": v.get("givenName"),
+                    "Surname": v.get("surname"),
+                }
+            )
     if rows:
         df = pd.DataFrame(rows, columns=list(columns.keys()))
@@ -254,7 +258,9 @@ def list_group_owners(group: str | UUID) -> pd.DataFrame:
     group_id = resolve_group_id(group)
-    result = _base_api(request=f"groups/{group_id}/owners", client="graph").json()
+    result = _base_api(
+        request=f"groups/{group_id}/owners", client="graph", uses_pagination=True
+    )
     columns = {
         "Owner Id": "string",
@@ -273,22 +279,23 @@ def list_group_owners(group: str | UUID) -> pd.DataFrame:
     df = _create_dataframe(columns=columns)
     rows = []
-    for v in result.get("value"):
-        rows.append(
-            {
-                "Owner Id": v.get("id"),
-                "Owner Name": v.get("displayName"),
-                "User Principal Name": v.get("userPrincipalName"),
-                "Mail": v.get("mail"),
-                "Job Title": v.get("jobTitle"),
-                "Office Location": v.get("officeLocation"),
-                "Mobile Phone": v.get("mobilePhone"),
-                "Business Phones": str(v.get("businessPhones")),
-                "Preferred Language": v.get("preferredLanguage"),
-                "Given Name": v.get("givenName"),
-                "Surname": v.get("surname"),
-            }
-        )
+    for r in result:
+        for v in r.get("value", []):
+            rows.append(
+                {
+                    "Owner Id": v.get("id"),
+                    "Owner Name": v.get("displayName"),
+                    "User Principal Name": v.get("userPrincipalName"),
+                    "Mail": v.get("mail"),
+                    "Job Title": v.get("jobTitle"),
+                    "Office Location": v.get("officeLocation"),
+                    "Mobile Phone": v.get("mobilePhone"),
+                    "Business Phones": str(v.get("businessPhones")),
+                    "Preferred Language": v.get("preferredLanguage"),
+                    "Given Name": v.get("givenName"),
+                    "Surname": v.get("surname"),
+                }
+            )
     if rows:
         df = pd.DataFrame(rows, columns=list(columns.keys()))

sempy_labs/graph/_sensitivity_labels.py CHANGED Viewed

@@ -5,6 +5,7 @@ from sempy_labs._helper_functions import (
     _base_api,
     _create_dataframe,
     _update_dataframe_datatypes,
+    _is_valid_uuid,
 )
 from sempy._utils._log import log
@@ -79,3 +80,41 @@ def list_sensitivity_labels(user: Optional[str | UUID] = None) -> pd.DataFrame:
         _update_dataframe_datatypes(dataframe=df, column_map=columns)
     return df
+@log
+def resolve_sensitivity_label_id(
+    label: str | UUID, user: Optional[str | UUID] = None
+) -> UUID | None:
+    """
+    Resolve a sensitivity label name or ID to its corresponding sensitivity label ID.
+    Service Principal Authentication is required (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
+    Parameters
+    ----------
+    label : str | uuid.UUID
+        The name or ID of the sensitivity label.
+    user : str | uuid.UUID, default=None
+        The user ID or user principal name.
+    Returns
+    -------
+    uuid.UUID | None
+        The ID of the sensitivity label if found, otherwise None.
+    """
+    if _is_valid_uuid(label):
+        return str(label)
+    df = list_sensitivity_labels(user=user)
+    if df.empty:
+        return None
+    # Try to find the label by name
+    label_row = df[df["Sensitivity Label Name"] == label]
+    if not label_row.empty:
+        return label_row["Sensitivity Label Id"].iloc[0]
+    return None

sempy_labs/graph/_teams.py CHANGED Viewed

@@ -23,7 +23,7 @@ def list_teams() -> pd.DataFrame:
         A pandas dataframe showing a list of teams and their properties.
     """
-    result = _base_api(request="teams", client="graph").json()
+    result = _base_api(request="teams", client="graph", uses_pagination=True)
     columns = {
         "Team Id": "str",
@@ -43,23 +43,24 @@ def list_teams() -> pd.DataFrame:
     df = _create_dataframe(columns=columns)
     rows = []
-    for v in result.get("value"):
-        rows.append(
-            {
-                "Team Id": v.get("id"),
-                "Team Name": v.get("displayName"),
-                "Description": v.get("description"),
-                "Creation Date Time": v.get("createdDateTime"),
-                "Classification": v.get("classification"),
-                "Specialization": v.get("specialization"),
-                "Visibility": v.get("visibility"),
-                "Web Url": v.get("webUrl"),
-                "Archived": v.get("isArchived"),
-                "Favorite By Me": v.get("isFavoriteByMe"),
-                "Discoverable By Me": v.get("isDiscoverableByMe"),
-                "Member Count": v.get("memberCount"),
-            }
-        )
+    for r in result:
+        for v in r.get("value", []):
+            rows.append(
+                {
+                    "Team Id": v.get("id"),
+                    "Team Name": v.get("displayName"),
+                    "Description": v.get("description"),
+                    "Creation Date Time": v.get("createdDateTime"),
+                    "Classification": v.get("classification"),
+                    "Specialization": v.get("specialization"),
+                    "Visibility": v.get("visibility"),
+                    "Web Url": v.get("webUrl"),
+                    "Archived": v.get("isArchived"),
+                    "Favorite By Me": v.get("isFavoriteByMe"),
+                    "Discoverable By Me": v.get("isDiscoverableByMe"),
+                    "Member Count": v.get("memberCount"),
+                }
+            )
     if rows:
         df = pd.DataFrame(rows, columns=list(columns.keys()))

sempy_labs/graph/_user_licenses.py ADDED Viewed

@@ -0,0 +1,96 @@
+from uuid import UUID
+import sempy_labs._icons as icons
+from typing import List, Optional
+from sempy_labs._helper_functions import (
+    _base_api,
+)
+from sempy._utils._log import log
+from sempy_labs.graph._users import resolve_user_id
+@log
+def add_user_license(
+    user: str | UUID, sku_id: UUID, disabled_plans: Optional[UUID | List[UUID]] = None
+):
+    """
+    Assigns a license to a user.
+    This is a wrapper function for the following API: `user: assignLicense <https://learn.microsoft.com/graph/api/user-assignlicense>`_.
+    Service Principal Authentication is required (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
+    Parameters
+    ----------
+    user : str | uuid.UUID
+        The user ID or user principal name.
+    sku_id : uuid.UUID
+        The SKU ID of the license to assign.
+    disabled_plans : Optional[uuid.UUID | List[uuid.UUID]], default=None
+        A single service plan ID or a list of service plan IDs to disable within the assigned license.
+    """
+    user_id = resolve_user_id(user)
+    payload = {
+        "addLicenses": [
+            {
+                "skuId": sku_id,
+            },
+        ],
+        "removeLicenses": [],
+    }
+    if disabled_plans:
+        if isinstance(disabled_plans, str):
+            disabled_plans = [disabled_plans]
+        payload["addLicenses"][0]["disabledPlans"] = disabled_plans
+    _base_api(
+        request=f"users/{user_id}/assignLicense",
+        client="graph",
+        method="post",
+        payload=payload,
+    )
+    print(
+        f"{icons.green_dot} The '{sku_id}' license has been assigned to the user '{user}'."
+    )
+@log
+def remove_user_license(user: str | UUID, sku_ids: UUID | List[UUID]):
+    """
+    Removes a license from a user.
+    This is a wrapper function for the following API: `user: assignLicense <https://learn.microsoft.com/graph/api/user-assignlicense>`_.
+    Service Principal Authentication is required (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
+    Parameters
+    ----------
+    user : str | uuid.UUID
+        The user ID or user principal name.
+    sku_id : uuid.UUID
+        The SKU ID of the license to remove.
+    """
+    user_id = resolve_user_id(user)
+    if isinstance(sku_ids, str):
+        sku_ids = [sku_ids]
+    payload = {
+        "addLicenses": [],
+        "removeLicenses": sku_ids,
+    }
+    _base_api(
+        request=f"users/{user_id}/assignLicense",
+        client="graph",
+        method="post",
+        payload=payload,
+    )
+    print(
+        f"{icons.green_dot} The '{', '.join([str(s) for s in sku_ids])}' license(s) have been removed from the user '{user}'."
+    )

sempy_labs/graph/_users.py CHANGED Viewed

@@ -8,6 +8,7 @@ from .._helper_functions import (
     _is_valid_uuid,
     _base_api,
     _create_dataframe,
+    _update_dataframe_datatypes,
     _mount,
 )
 from sempy._utils._log import log
@@ -91,7 +92,7 @@ def list_users() -> pd.DataFrame:
         A pandas dataframe showing a list of users and their properties.
     """
-    result = _base_api(request="users", client="graph").json()
+    result = _base_api(request="users", client="graph", uses_pagination=True)
     columns = {
         "User Id": "string",
@@ -108,21 +109,27 @@ def list_users() -> pd.DataFrame:
     df = _create_dataframe(columns=columns)
-    for v in result.get("value"):
-        new_data = {
-            "User Id": v.get("id"),
-            "User Principal Name": v.get("userPrincipalName"),
-            "User Name": v.get("displayName"),
-            "Mail": v.get("mail"),
-            "Job Title": v.get("jobTitle"),
-            "Office Location": v.get("officeLocation"),
-            "Mobile Phone": v.get("mobilePhone"),
-            "Business Phones": str(v.get("businessPhones")),
-            "Preferred Language": v.get("preferredLanguage"),
-            "Surname": v.get("surname"),
-        }
-        df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
+    rows = []
+    for r in result:
+        for v in r.get("value", []):
+            rows.append(
+                {
+                    "User Id": v.get("id"),
+                    "User Principal Name": v.get("userPrincipalName"),
+                    "User Name": v.get("displayName"),
+                    "Mail": v.get("mail"),
+                    "Job Title": v.get("jobTitle"),
+                    "Office Location": v.get("officeLocation"),
+                    "Mobile Phone": v.get("mobilePhone"),
+                    "Business Phones": str(v.get("businessPhones")),
+                    "Preferred Language": v.get("preferredLanguage"),
+                    "Surname": v.get("surname"),
+                }
+            )
+    if rows:
+        df = pd.DataFrame(rows, columns=list(columns.keys()))
+        _update_dataframe_datatypes(dataframe=df, column_map=columns)
     return df

sempy_labs/lakehouse/_get_lakehouse_tables.py CHANGED Viewed

@@ -33,6 +33,7 @@ def get_lakehouse_tables(
     extended: bool = False,
     count_rows: bool = False,
     export: bool = False,
+    exclude_shortcuts: bool = False,
 ) -> pd.DataFrame:
     """
     Shows the tables of a lakehouse and their respective properties. Option to include additional properties relevant to Direct Lake guardrails.
@@ -60,6 +61,8 @@ def get_lakehouse_tables(
         Obtains a row count for each lakehouse table.
     export : bool, default=False
         Exports the resulting dataframe to a delta table in the lakehouse.
+    exclude_shortcuts : bool, default=False
+        If True, excludes shortcuts.
     Returns
     -------
@@ -83,6 +86,9 @@ def get_lakehouse_tables(
         lakehouse=lakehouse, workspace=workspace_id
     )
+    # Test if valid lakehouse:
+    x = _base_api(f"v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}")
     if count_rows:  # Setting countrows defaults to extended=True
         extended = True
@@ -94,7 +100,7 @@ def get_lakehouse_tables(
             client="fabric_sp",
         )
-    except Exception as e:
+    except Exception:
         API_called = False
     rows = []
@@ -246,6 +252,32 @@ def get_lakehouse_tables(
         df["Row Count"] = df["Row Count"].astype(int)
         df["Row Count Guardrail Hit"] = df["Row Count"] > df["Row Count Guardrail"]
+    if exclude_shortcuts:
+        from sempy_labs.lakehouse._shortcuts import list_shortcuts
+        # Exclude shortcuts
+        shortcuts = (
+            list_shortcuts(lakehouse=lakehouse, workspace=workspace)
+            .query("`Shortcut Path`.str.startswith('/Tables')", engine="python")
+            .assign(
+                FullPath=lambda df: df["Shortcut Path"].str.rstrip("/")
+                + "/"
+                + df["Shortcut Name"]
+            )["FullPath"]
+            .tolist()
+        )
+        df["FullPath"] = df.apply(
+            lambda x: (
+                f"/Tables/{x['Table Name']}"
+                if pd.isna(x["Schema Name"]) or x["Schema Name"] == ""
+                else f"/Tables/{x['Schema Name']}/{x['Table Name']}"
+            ),
+            axis=1,
+        )
+        df = df[~df["FullPath"].isin(shortcuts)].reset_index(drop=True)
     if export:
         if not lakehouse_attached():
             raise ValueError(

sempy_labs/lakehouse/_lakehouse.py CHANGED Viewed

@@ -93,7 +93,9 @@ def optimize_lakehouse_tables(
     from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
-    df = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
+    df = get_lakehouse_tables(
+        lakehouse=lakehouse, workspace=workspace, exclude_shortcuts=True
+    )
     df_delta = df[df["Format"] == "delta"]
     if isinstance(tables, str):
@@ -142,7 +144,9 @@ def vacuum_lakehouse_tables(
     from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
-    df = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
+    df = get_lakehouse_tables(
+        lakehouse=lakehouse, workspace=workspace, exclude_shortcuts=True
+    )
     df_delta = df[df["Format"] == "delta"]
     if isinstance(tables, str):

sempy_labs/lakehouse/_partitioning.py ADDED Viewed

@@ -0,0 +1,165 @@
+from typing import Optional, List
+from uuid import UUID
+from sempy_labs._helper_functions import (
+    _create_spark_session,
+    create_abfss_path,
+    resolve_workspace_id,
+    resolve_lakehouse_id,
+    _get_delta_table,
+)
+from sempy._utils._log import log
+@log
+def _get_partitions(
+    table_name: str,
+    schema_name: Optional[str] = None,
+    lakehouse: Optional[str | UUID] = None,
+    workspace: Optional[str | UUID] = None,
+):
+    workspace_id = resolve_workspace_id(workspace)
+    lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
+    path = create_abfss_path(lakehouse_id, workspace_id, table_name, schema_name)
+    delta_table = _get_delta_table(path)
+    details_df = delta_table.detail()
+    return details_df.collect()[0].asDict()
+@log
+def is_partitioned(
+    table: str,
+    schema: Optional[str] = None,
+    lakehouse: Optional[str | UUID] = None,
+    workspace: Optional[str | UUID] = None,
+) -> bool:
+    """
+    Checks if a delta table is partitioned.
+    Parameters
+    ----------
+    table : str
+        The name of the delta table.
+    schema : str, optional
+        The schema of the table to check. If not provided, the default schema is used.
+    lakehouse : str | uuid.UUID, default=None
+        The Fabric lakehouse name or ID.
+        Defaults to None which resolves to the lakehouse attached to the notebook.
+    workspace : str | uuid.UUID, default=None
+        The Fabric workspace name or ID used by the lakehouse.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    Returns
+    -------
+    bool
+        True if the table is partitioned, False otherwise.
+    """
+    details = _get_partitions(
+        table_name=table, schema_name=schema, lakehouse=lakehouse, workspace=workspace
+    )
+    return len(details["partitionColumns"]) > 0
+@log
+def list_partitioned_columns(
+    table: str,
+    schema: Optional[str] = None,
+    lakehouse: Optional[str | UUID] = None,
+    workspace: Optional[str | UUID] = None,
+) -> List[str]:
+    """
+    Lists the partitioned columns of a delta table.
+    Parameters
+    ----------
+    table : str
+        The name of the delta table.
+    schema : str, optional
+        The schema of the table to check. If not provided, the default schema is used.
+    lakehouse : str | uuid.UUID, default=None
+        The Fabric lakehouse name or ID.
+        Defaults to None which resolves to the lakehouse attached to the notebook.
+    workspace : str | uuid.UUID, default=None
+        The Fabric workspace name or ID used by the lakehouse.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    Returns
+    -------
+    List[str]
+        The list of partitioned columns.
+    """
+    details = _get_partitions(
+        table_name=table, schema_name=schema, lakehouse=lakehouse, workspace=workspace
+    )
+    return details["partitionColumns"]
+@log
+def is_over_partitioned(
+    table: str,
+    schema: Optional[str] = None,
+    lakehouse: Optional[str | UUID] = None,
+    workspace: Optional[str | UUID] = None,
+    total_table_size_gb: int = 1000,
+    average_partition_size_gb: int = 1,
+) -> bool:
+    """
+    Checks if a delta table is over-partitioned.
+    Parameters
+    ----------
+    table : str
+        The name of the delta table.
+    schema : str, optional
+        The schema of the table to check. If not provided, the default schema is used.
+    lakehouse : str | uuid.UUID, default=None
+        The Fabric lakehouse name or ID.
+        Defaults to None which resolves to the lakehouse attached to the notebook.
+    workspace : str | uuid.UUID, default=None
+        The Fabric workspace name or ID used by the lakehouse.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    total_table_size_gb : int, default=1000
+        Threshold for total table size in GB (default 1TB).
+    average_partition_size_gb : int, default=1
+        Threshold for average partition size in GB.
+    Returns
+    -------
+    bool
+        True if the table is over-partitioned, False otherwise.
+    """
+    workspace_id = resolve_workspace_id(workspace)
+    lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
+    path = create_abfss_path(lakehouse_id, workspace_id, table, schema)
+    # Get DeltaTable details
+    spark = _create_spark_session()
+    details_df = spark.sql(f"DESCRIBE DETAIL delta.`{path}`")
+    details = details_df.collect()[0].asDict()
+    # Extract relevant fields
+    size_bytes = details["sizeInBytes"]
+    partition_cols = details["partitionColumns"]
+    num_files = details["numFiles"]
+    total_size_gb = size_bytes / (1024**3)
+    # Only check if the table is partitioned
+    if len(partition_cols) > 0 and num_files > 0:
+        avg_partition_size_gb = total_size_gb / num_files
+        if (
+            total_size_gb < total_table_size_gb
+            or avg_partition_size_gb < average_partition_size_gb
+        ):
+            return True
+    return False

semantic-link-labs 0.12.3__py3-none-any.whl → 0.12.4__py3-none-any.whl

Potentially problematic release.

semantic-link-labs 0.12.3py3-none-any.whl → 0.12.4py3-none-any.whl