PyPI - semantic-link-labs - Versions diffs - 0.9.9__py3-none-any.whl → 0.9.10__py3-none-any.whl - Mend

semantic-link-labs 0.9.9py3-none-any.whl → 0.9.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (29) hide show

{semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.10.dist-info}/METADATA +5 -3
{semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.10.dist-info}/RECORD +29 -27
{semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.10.dist-info}/WHEEL +1 -1
sempy_labs/__init__.py +6 -0
sempy_labs/_clear_cache.py +12 -0
sempy_labs/_dax.py +8 -2
sempy_labs/_delta_analyzer.py +8 -18
sempy_labs/_generate_semantic_model.py +6 -7
sempy_labs/_helper_functions.py +205 -64
sempy_labs/_kql_databases.py +18 -0
sempy_labs/_kusto.py +135 -0
sempy_labs/_list_functions.py +5 -1
sempy_labs/_vertipaq.py +6 -6
sempy_labs/_warehouses.py +3 -3
sempy_labs/admin/__init__.py +6 -0
sempy_labs/admin/_artifacts.py +3 -3
sempy_labs/admin/_capacities.py +161 -1
sempy_labs/admin/_dataflows.py +45 -0
sempy_labs/admin/_items.py +16 -11
sempy_labs/admin/_tenant.py +5 -5
sempy_labs/directlake/_generate_shared_expression.py +25 -26
sempy_labs/lakehouse/_get_lakehouse_columns.py +41 -18
sempy_labs/lakehouse/_get_lakehouse_tables.py +66 -39
sempy_labs/lakehouse/_lakehouse.py +44 -35
sempy_labs/migration/_migrate_calctables_to_lakehouse.py +7 -12
sempy_labs/migration/_refresh_calc_tables.py +7 -6
sempy_labs/tom/_model.py +21 -14
{semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.10.dist-info}/licenses/LICENSE +0 -0
{semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.10.dist-info}/top_level.txt +0 -0

sempy_labs/admin/_capacities.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import Optional, Tuple
 from sempy._utils._log import log
 from sempy_labs._helper_functions import (
     _base_api,
+    _build_url,
     _create_dataframe,
     _update_dataframe_datatypes,
     _is_valid_uuid,
@@ -57,6 +58,24 @@ def _resolve_capacity_name_and_id(
     return capacity_name, capacity_id
+def _resolve_capacity_id(
+    capacity: str | UUID,
+) -> UUID:
+    if _is_valid_uuid(capacity):
+        capacity_id = capacity
+    else:
+        dfC = list_capacities(capacity=capacity)
+        if dfC.empty:
+            raise ValueError(
+                f"{icons.red_dot} The '{capacity}' capacity was not found."
+            )
+        capacity_id = dfC["Capacity Id"].iloc[0]
+    return capacity_id
 def _list_capacities_meta() -> pd.DataFrame:
     """
     Shows the a list of capacities and their properties. This function is the admin version.
@@ -221,7 +240,7 @@ def list_capacities(
         "Sku": "string",
         "Region": "string",
         "State": "string",
-        "Admins": "string",
+        "Admins": "list",
     }
     df = _create_dataframe(columns=columns)
@@ -309,3 +328,144 @@ def list_capacity_users(capacity: str | UUID) -> pd.DataFrame:
     _update_dataframe_datatypes(dataframe=df, column_map=columns)
     return df
+@log
+def get_refreshables(
+    top: Optional[int] = None,
+    expand: Optional[str] = None,
+    filter: Optional[str] = None,
+    skip: Optional[int] = None,
+    capacity: Optional[str | UUID] = None,
+) -> pd.DataFrame | dict:
+    """
+    Returns a list of refreshables for the organization within a capacity.
+    Power BI retains a seven-day refresh history for each dataset, up to a maximum of sixty refreshes.
+    This is a wrapper function for the following API: `Admin - Get Refreshables <https://learn.microsoft.com/rest/api/power-bi/admin/get-refreshables>`_.
+    Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
+    Parameters
+    ----------
+    top : int, default=None
+        Returns only the first n results.
+    expand : str, default=None
+        Accepts a comma-separated list of data types, which will be expanded inline in the response. Supports capacities and groups.
+    filter : str, default=None
+        Returns a subset of a results based on Odata filter query parameter condition.
+    skip : int, default=None
+        Skips the first n results. Use with top to fetch results beyond the first 1000.
+    capacity : str | uuid.UUID, default=None
+        The capacity name or ID to filter. If None, all capacities are returned.
+    Returns
+    -------
+    pandas.DataFrame
+        Returns a list of refreshables for the organization within a capacity.
+    """
+    columns = {
+        "Workspace Id": "string",
+        "Workspace Name": "string",
+        "Item Id": "string",
+        "Item Name": "string",
+        "Item Kind": "string",
+        "Capacity Id": "string",
+        "Capacity Name": "string",
+        "Capacity SKU": "string",
+        "Refresh Count": "int",
+        "Refresh Failures": "int",
+        "Average Duration": "float",
+        "Median Duration": "float",
+        "Refreshes Per Day": "int",
+        "Refresh Type": "string",
+        "Start Time": "string",
+        "End Time": "string",
+        "Status": "string",
+        "Request Id": "string",
+        "Service Exception Json": "string",
+        "Extended Status": "dict",
+        "Refresh Attempts": "list",
+        "Refresh Schedule Days": "list",
+        "Refresh Schedule Times": "list",
+        "Refresh Schedule Enabled": "bool",
+        "Refresh Schedule Local Timezone Id": "string",
+        "Refresh Schedule Notify Option": "string",
+        "Configured By": "list",
+    }
+    df = _create_dataframe(columns=columns)
+    params = {}
+    url = (
+        "/v1.0/myorg/admin/capacities/refreshables"
+        if capacity is None
+        else f"/v1.0/myorg/admin/capacities/{_resolve_capacity_id(capacity=capacity)}/refreshables"
+    )
+    if top is not None:
+        params["$top"] = top
+    if expand is not None:
+        params["$expand"] = expand
+    if filter is not None:
+        params["$filter"] = filter
+    if skip is not None:
+        params["$skip"] = skip
+    url = _build_url(url, params)
+    responses = _base_api(request=url, client="fabric_sp")
+    refreshables = []
+    for i in responses.json().get("value", []):
+        last_refresh = i.get("lastRefresh", {})
+        refresh_schedule = i.get("refreshSchedule", {})
+        new_data = {
+            "Workspace Id": i.get("group", {}).get("id"),
+            "Workspace Name": i.get("group", {}).get("name"),
+            "Item Id": i.get("id"),
+            "Item Name": i.get("name"),
+            "Item Kind": i.get("kind"),
+            "Capacity Id": (
+                i.get("capacity", {}).get("id").lower()
+                if i.get("capacity", {}).get("id")
+                else None
+            ),
+            "Capacity Name": i.get("capacity", {}).get("displayName"),
+            "Capacity SKU": i.get("capacity", {}).get("sku"),
+            "Refresh Count": i.get("refreshCount", 0),
+            "Refresh Failures": i.get("refreshFailures", 0),
+            "Average Duration": i.get("averageDuration", 0),
+            "Median Duration": i.get("medianDuration", 0),
+            "Refreshes Per Day": i.get("refreshesPerDay", 0),
+            "Refresh Type": last_refresh.get("refreshType"),
+            "Start Time": last_refresh.get("startTime"),
+            "End Time": last_refresh.get("endTime"),
+            "Status": last_refresh.get("status"),
+            "Request Id": last_refresh.get("requestId"),
+            "Service Exception Json": last_refresh.get("serviceExceptionJson"),
+            "Extended Status": last_refresh.get("extendedStatus"),
+            "Refresh Attempts": last_refresh.get("refreshAttempts"),
+            "Refresh Schedule Days": refresh_schedule.get("days"),
+            "Refresh Schedule Times": refresh_schedule.get("times"),
+            "Refresh Schedule Enabled": refresh_schedule.get("enabled"),
+            "Refresh Schedule Local Timezone Id": refresh_schedule.get(
+                "localTimeZoneId"
+            ),
+            "Refresh Schedule Notify Option": refresh_schedule.get("notifyOption"),
+            "Configured By": i.get("configuredBy"),
+        }
+        refreshables.append(new_data)
+    if len(refreshables) > 0:
+        df = pd.DataFrame(refreshables)
+        _update_dataframe_datatypes(dataframe=df, column_map=columns)
+    return df

sempy_labs/admin/_dataflows.py ADDED Viewed

@@ -0,0 +1,45 @@
+from typing import Optional
+from sempy_labs._helper_functions import (
+    _base_api,
+)
+from sempy_labs.admin._items import (
+    _resolve_item_id,
+)
+from uuid import UUID
+from sempy._utils._log import log
+@log
+def export_dataflow(
+    dataflow: str | UUID,
+    workspace: Optional[str | UUID] = None,
+) -> dict:
+    """
+    Shows a list of datasets for the organization.
+    This is a wrapper function for the following API: `Admin - Dataflows ExportDataflowAsAdmin <https://learn.microsoft.com/rest/api/power-bi/admin/dataflows-export-dataflow-as-admin>`_.
+    Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
+    Parameters
+    ----------
+    dataflow : str | UUID, default=None
+        The dataflow Name or Id.
+    workspace : str | uuid.UUID, default=None
+        The Fabric workspace name or id.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+        Only used if given a dataflow name and not an id.
+    Returns
+    -------
+    dict
+        Exported Json file.
+    """
+    dataflow_id = _resolve_item_id(item=dataflow, type="dataflow", workspace=workspace)
+    url = f"/v1.0/myorg/admin/dataflows/{dataflow_id}/export"
+    response = _base_api(request=url, client="fabric_sp")
+    return response.json()

sempy_labs/admin/_items.py CHANGED Viewed

@@ -17,20 +17,26 @@ from sempy_labs._helper_functions import (
 def _resolve_item_id(
-    item_name: str,
+    item: str,
     type: Optional[str] = None,
     workspace: Optional[str | UUID] = None,
 ) -> UUID:
+    if _is_valid_uuid(item):
+        item_id = item
-    dfI = list_items(workspace=workspace, type=type)
-    dfI_filt = dfI[dfI["Item Name"] == item_name]
+    else:
+        workspace_id = _resolve_workspace_name_and_id(workspace)[1]
+        dfI = list_items(workspace=workspace_id, type=type)
+        dfI_filt = dfI[dfI["Item Name"] == item]
-    if len(dfI_filt) == 0:
-        raise ValueError(
-            f"The '{item_name}' {type} does not exist within the '{workspace}' workspace or is not of type '{type}'."
-        )
+        if len(dfI_filt) == 0:
+            raise ValueError(
+                f"The '{item}' {type} does not exist within the '{workspace}' workspace or is not of type '{type}'."
+            )
+        item_id = dfI_filt["Item Id"].iloc[0]
-    return dfI_filt["Item Id"].iloc[0]
+    return item_id
 def _resolve_item_name_and_id(
@@ -84,9 +90,8 @@ def list_items(
     capacity : str | uuid.UUID, default=None
         The capacity name or id.
     workspace : str | uuid.UUID, default=None
-        The Fabric workspace name.
-        Defaults to None which resolves to the workspace of the attached lakehouse
-        or if no lakehouse attached, resolves to the workspace of the notebook.
+        The Fabric workspace name or id.
+        Defaults to None which looks into all the workspaces.
     state : str, default=None
         The item state.
     type : str, default=None

sempy_labs/admin/_tenant.py CHANGED Viewed

@@ -32,7 +32,7 @@ def list_tenant_settings() -> pd.DataFrame:
         "Enabled": "bool",
         "Can Specify Security Groups": "bool",
         "Tenant Setting Group": "string",
-        "Enabled Security Groups": "string",
+        "Enabled Security Groups": "list",
     }
     df = _create_dataframe(columns=columns)
@@ -86,9 +86,9 @@ def list_capacity_tenant_settings_overrides(
         "Setting Title": "string",
         "Setting Enabled": "bool",
         "Can Specify Security Groups": "bool",
-        "Enabled Security Groups": "string",
+        "Enabled Security Groups": "list",
         "Tenant Setting Group": "string",
-        "Tenant Setting Properties": "string",
+        "Tenant Setting Properties": "list",
         "Delegate to Workspace": "bool",
         "Delegated From": "string",
     }
@@ -395,7 +395,7 @@ def list_workspaces_tenant_settings_overrides() -> pd.DataFrame:
         "Title": "string",
         "Enabled": "bool",
         "Can Specify Security Groups": "bool",
-        "Enabled Security Groups": "string",
+        "Enabled Security Groups": "list",
         "Tenant Setting Group": "string",
         "Delegated From": "string",
     }
@@ -454,7 +454,7 @@ def list_domain_tenant_settings_overrides() -> pd.DataFrame:
         "Title": "string",
         "Enabled": "bool",
         "Can Specify Security Groups": "bool",
-        "Enabled Security Groups": "string",
+        "Enabled Security Groups": "list",
         "Tenant Setting Group": "string",
         "Delegated To Workspace": "bool",
         "Delegated From": "string",

sempy_labs/directlake/_generate_shared_expression.py CHANGED Viewed

@@ -56,34 +56,33 @@ def generate_shared_expression(
             item=item_name, type=item_type, workspace=workspace_id
         )
-    item_type_rest = f"{item_type.lower()}s"
-    response = _base_api(
-        request=f"/v1/workspaces/{workspace_id}/{item_type_rest}/{item_id}"
-    )
+    if use_sql_endpoint:
+        item_type_rest = f"{item_type.lower()}s"
+        response = _base_api(
+            request=f"/v1/workspaces/{workspace_id}/{item_type_rest}/{item_id}"
+        )
-    prop = response.json().get("properties")
+        prop = response.json().get("properties")
-    if item_type == "Lakehouse":
-        sqlprop = prop.get("sqlEndpointProperties")
-        sqlEPCS = sqlprop.get("connectionString")
-        sqlepid = sqlprop.get("id")
-        provStatus = sqlprop.get("provisioningStatus")
-    elif item_type == "Warehouse":
-        sqlEPCS = prop.get("connectionString")
-        sqlepid = item_id
-        provStatus = None
+        if item_type == "Lakehouse":
+            sqlprop = prop.get("sqlEndpointProperties")
+            sqlEPCS = sqlprop.get("connectionString")
+            sqlepid = sqlprop.get("id")
+            provStatus = sqlprop.get("provisioningStatus")
+        elif item_type == "Warehouse":
+            sqlEPCS = prop.get("connectionString")
+            sqlepid = item_id
+            provStatus = None
-    if provStatus == "InProgress":
-        raise ValueError(
-            f"{icons.red_dot} The SQL Endpoint for the '{item_name}' lakehouse within the '{workspace_name}' workspace has not yet been provisioned. Please wait until it has been provisioned."
-        )
+        if provStatus == "InProgress":
+            raise ValueError(
+                f"{icons.red_dot} The SQL Endpoint for the '{item_name}' {item_type.lower()} within the '{workspace_name}' workspace has not yet been provisioned. Please wait until it has been provisioned."
+            )
-    start_expr = "let\n\tdatabase = "
-    end_expr = "\nin\n\tdatabase"
-    mid_expr = f'Sql.Database("{sqlEPCS}", "{sqlepid}")'
-    # Build DL/OL expression
-    if not use_sql_endpoint and item_type == "Lakehouse":
-        return f'AzureDataLakeStorage{{"server":"onelake.dfs.fabric.microsoft.com","path":"/{workspace_id}/{item_id}/"}}'
-    else:
+        start_expr = "let\n\tdatabase = "
+        end_expr = "\nin\n\tdatabase"
+        mid_expr = f'Sql.Database("{sqlEPCS}", "{sqlepid}")'
         return f"{start_expr}{mid_expr}{end_expr}"
+    else:
+        # Build DL/OL expression
+        return f"""let\n\tSource = AzureStorage.DataLake("onelake.dfs.fabric.microsoft.com/{workspace_id}/{item_id}")\nin\n\tSource"""

sempy_labs/lakehouse/_get_lakehouse_columns.py CHANGED Viewed

@@ -1,14 +1,17 @@
 import pandas as pd
+import re
 from sempy_labs._helper_functions import (
     format_dax_object_name,
     resolve_workspace_name_and_id,
     resolve_lakehouse_name_and_id,
     _create_dataframe,
-    _create_spark_session,
+    _get_delta_table,
+    _pure_python_notebook,
 )
 from typing import Optional
 from sempy._utils._log import log
 from uuid import UUID
+import sempy_labs._icons as icons
 @log
@@ -16,7 +19,9 @@ def get_lakehouse_columns(
     lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
 ) -> pd.DataFrame:
     """
-    Shows the tables and columns of a lakehouse and their respective properties.
+    Shows the tables and columns of a lakehouse and their respective properties. This function can be executed in either a PySpark or pure Python notebook. Note that data types may show differently when using PySpark vs pure Python.
+    Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
     Parameters
     ----------
@@ -34,7 +39,6 @@ def get_lakehouse_columns(
         Shows the tables/columns within a lakehouse and their properties.
     """
     from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
-    from delta import DeltaTable
     columns = {
         "Workspace Name": "string",
@@ -51,29 +55,48 @@ def get_lakehouse_columns(
         lakehouse=lakehouse, workspace=workspace_id
     )
-    spark = _create_spark_session()
     tables = get_lakehouse_tables(
         lakehouse=lakehouse_id, workspace=workspace_id, extended=False, count_rows=False
     )
     tables_filt = tables[tables["Format"] == "delta"]
-    for _, r in tables_filt.iterrows():
-        table_name = r["Table Name"]
-        path = r["Location"]
-        delta_table = DeltaTable.forPath(spark, path)
-        sparkdf = delta_table.toDF()
-        for col_name, data_type in sparkdf.dtypes:
-            full_column_name = format_dax_object_name(table_name, col_name)
-            new_data = {
+    def add_column_metadata(table_name, col_name, data_type):
+        new_rows.append(
+            {
                 "Workspace Name": workspace_name,
-                "Lakehouse Name": lakehouse,
+                "Lakehouse Name": lakehouse_name,
                 "Table Name": table_name,
                 "Column Name": col_name,
-                "Full Column Name": full_column_name,
+                "Full Column Name": format_dax_object_name(table_name, col_name),
                 "Data Type": data_type,
             }
-            df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
+        )
+    new_rows = []
+    for _, r in tables_filt.iterrows():
+        table_name = r["Table Name"]
+        path = r["Location"]
+        if _pure_python_notebook():
+            from deltalake import DeltaTable
+            table_schema = DeltaTable(path).schema()
+            for field in table_schema.fields:
+                col_name = field.name
+                match = re.search(r'"(.*?)"', str(field.type))
+                if not match:
+                    raise ValueError(
+                        f"{icons.red_dot} Could not find data type for column {col_name}."
+                    )
+                data_type = match.group(1)
+                add_column_metadata(table_name, col_name, data_type)
+        else:
+            delta_table = _get_delta_table(path=path)
+            table_df = delta_table.toDF()
+            for col_name, data_type in table_df.dtypes:
+                add_column_metadata(table_name, col_name, data_type)
-    return df
+    return pd.concat([df, pd.DataFrame(new_rows)], ignore_index=True)

sempy_labs/lakehouse/_get_lakehouse_tables.py CHANGED Viewed

@@ -1,7 +1,7 @@
-import sempy.fabric as fabric
+import os
 import pandas as pd
 import pyarrow.parquet as pq
-import datetime
+from datetime import datetime
 from sempy_labs._helper_functions import (
     _get_column_aggregate,
     resolve_workspace_name_and_id,
@@ -9,7 +9,13 @@ from sempy_labs._helper_functions import (
     save_as_delta_table,
     _base_api,
     _create_dataframe,
-    _create_spark_session,
+    resolve_workspace_id,
+    resolve_lakehouse_id,
+    _read_delta_table,
+    _get_delta_table,
+    _mount,
+    create_abfss_path,
+    _pure_python_notebook,
 )
 from sempy_labs.directlake._guardrails import (
     get_sku_size,
@@ -33,8 +39,12 @@ def get_lakehouse_tables(
     """
     Shows the tables of a lakehouse and their respective properties. Option to include additional properties relevant to Direct Lake guardrails.
+    This function can be executed in either a PySpark or pure Python notebook.
     This is a wrapper function for the following API: `Tables - List Tables <https://learn.microsoft.com/rest/api/fabric/lakehouse/tables/list-tables>`_ plus extended capabilities.
+    Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
     Parameters
     ----------
     lakehouse : str | uuid.UUID, default=None
@@ -76,8 +86,8 @@ def get_lakehouse_tables(
         extended = True
     if (
-        workspace_id != fabric.get_workspace_id()
-        and lakehouse_id != fabric.get_lakehouse_id()
+        workspace_id != resolve_workspace_id()
+        and lakehouse_id != resolve_lakehouse_id()
         and count_rows
     ):
         raise ValueError(
@@ -88,6 +98,7 @@ def get_lakehouse_tables(
     responses = _base_api(
         request=f"v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables",
         uses_pagination=True,
+        client="fabric_sp",
     )
     if not responses[0].get("data"):
@@ -112,40 +123,59 @@ def get_lakehouse_tables(
     if extended:
         sku_value = get_sku_size(workspace_id)
         guardrail = get_directlake_guardrails_for_sku(sku_value)
-        spark = _create_spark_session()
-        df["Files"] = None
-        df["Row Groups"] = None
-        df["Table Size"] = None
+        local_path = _mount()
+        df["Files"], df["Row Groups"], df["Table Size"] = None, None, None
         if count_rows:
             df["Row Count"] = None
         for i, r in df.iterrows():
-            tName = r["Table Name"]
+            table_name = r["Table Name"]
             if r["Type"] == "Managed" and r["Format"] == "delta":
-                detail_df = spark.sql(f"DESCRIBE DETAIL `{tName}`").collect()[0]
-                num_files = detail_df.numFiles
-                size_in_bytes = detail_df.sizeInBytes
-                delta_table_path = f"Tables/{tName}"
-                latest_files = (
-                    spark.read.format("delta").load(delta_table_path).inputFiles()
+                delta_table_path = create_abfss_path(
+                    lakehouse_id, workspace_id, table_name
                 )
-                file_paths = [f.split("/")[-1] for f in latest_files]
-                # Handle FileNotFoundError
+                if _pure_python_notebook():
+                    from deltalake import DeltaTable
+                    delta_table = DeltaTable(delta_table_path)
+                    latest_files = [
+                        file["path"]
+                        for file in delta_table.get_add_actions().to_pylist()
+                    ]
+                    size_in_bytes = 0
+                    for f in latest_files:
+                        local_file_path = os.path.join(
+                            local_path, "Tables", table_name, os.path.basename(f)
+                        )
+                        if os.path.exists(local_file_path):
+                            size_in_bytes += os.path.getsize(local_file_path)
+                    num_latest_files = len(latest_files)
+                else:
+                    delta_table = _get_delta_table(delta_table_path)
+                    latest_files = _read_delta_table(delta_table_path).inputFiles()
+                    table_df = delta_table.toDF()
+                    table_details = delta_table.detail().collect()[0].asDict()
+                    num_latest_files = table_details.get("numFiles", 0)
+                    size_in_bytes = table_details.get("sizeInBytes", 0)
+                table_path = os.path.join(local_path, "Tables", table_name)
+                file_paths = [os.path.basename(f) for f in latest_files]
                 num_rowgroups = 0
                 for filename in file_paths:
-                    try:
-                        num_rowgroups += pq.ParquetFile(
-                            f"/lakehouse/default/{delta_table_path}/{filename}"
-                        ).num_row_groups
-                    except FileNotFoundError:
-                        continue
-                df.at[i, "Files"] = num_files
+                    parquet_file = pq.ParquetFile(f"{table_path}/{filename}")
+                    num_rowgroups += parquet_file.num_row_groups
+                df.at[i, "Files"] = num_latest_files
                 df.at[i, "Row Groups"] = num_rowgroups
                 df.at[i, "Table Size"] = size_in_bytes
             if count_rows:
-                num_rows = spark.table(tName).count()
-                df.at[i, "Row Count"] = num_rows
+                if _pure_python_notebook():
+                    row_count = delta_table.to_pyarrow_table().num_rows
+                else:
+                    row_count = table_df.count()
+                df.at[i, "Row Count"] = row_count
     if extended:
         intColumns = ["Files", "Row Groups", "Table Size"]
@@ -168,19 +198,16 @@ def get_lakehouse_tables(
     if export:
         if not lakehouse_attached():
             raise ValueError(
-                f"{icons.red_dot} In order to save the report.json file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
+                f"{icons.red_dot} In order to save the dataframe, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
             )
-        (current_lakehouse_name, current_lakehouse_id) = resolve_lakehouse_name_and_id()
-        lakeTName = "lakehouse_table_details"
-        lakeT_filt = df[df["Table Name"] == lakeTName]
+        lake_table_name = "lakehouse_table_details"
+        df_filt = df[df["Table Name"] == lake_table_name]
-        if len(lakeT_filt) == 0:
+        if df_filt.empty:
             run_id = 1
         else:
-            max_run_id = _get_column_aggregate(
-                lakehouse=current_lakehouse_name, table_name=lakeTName
-            )
+            max_run_id = _get_column_aggregate(table_name=lake_table_name)
             run_id = max_run_id + 1
         export_df = df.copy()
@@ -224,13 +251,13 @@ def get_lakehouse_tables(
                     export_df[c] = export_df[c].astype(bool)
         print(
-            f"{icons.in_progress} Saving Lakehouse table properties to the '{lakeTName}' table in the lakehouse...\n"
+            f"{icons.in_progress} Saving Lakehouse table properties to the '{lake_table_name}' table in the lakehouse...\n"
         )
-        export_df["Timestamp"] = datetime.datetime.now()
+        export_df["Timestamp"] = datetime.now()
         export_df["RunId"] = run_id
         save_as_delta_table(
-            dataframe=export_df, delta_table_name=lakeTName, write_mode="append"
+            dataframe=export_df, delta_table_name=lake_table_name, write_mode="append"
         )
     return df

semantic-link-labs 0.9.9__py3-none-any.whl → 0.9.10__py3-none-any.whl

Potentially problematic release.

semantic-link-labs 0.9.9py3-none-any.whl → 0.9.10py3-none-any.whl