PyPI - semantic-link-labs - Versions diffs - 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

semantic-link-labs 0.6.0py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (103) hide show

sempy_labs/directlake/_dl_helper.py ADDED Viewed

@@ -0,0 +1,233 @@
+import sempy.fabric as fabric
+import numpy as np
+import pandas as pd
+from typing import Optional, List, Union, Tuple
+from uuid import UUID
+import sempy_labs._icons as icons
+from sempy._utils._log import log
+from sempy_labs._helper_functions import retry, resolve_dataset_id
+def check_fallback_reason(
+    dataset: str, workspace: Optional[str] = None
+) -> pd.DataFrame:
+    """
+    Shows the reason a table in a Direct Lake semantic model would fallback to DirectQuery.
+    Parameters
+    ----------
+    dataset : str
+        Name of the semantic model.
+    workspace : str, default=None
+        The Fabric workspace name.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    Returns
+    -------
+    pandas.DataFrame
+        The tables in the semantic model and their fallback reason.
+    """
+    workspace = fabric.resolve_workspace_name(workspace)
+    dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
+    dfP_filt = dfP[dfP["Mode"] == "DirectLake"]
+    if len(dfP_filt) == 0:
+        raise ValueError(
+            f"{icons.red_dot} The '{dataset}' semantic model is not in Direct Lake. This function is only applicable to Direct Lake semantic models."
+        )
+    df = fabric.evaluate_dax(
+        dataset=dataset,
+        workspace=workspace,
+        dax_string="""
+            SELECT [TableName] AS [Table Name],[FallbackReason] AS [FallbackReasonID]
+            FROM $SYSTEM.TMSCHEMA_DELTA_TABLE_METADATA_STORAGES
+            """,
+    )
+    value_mapping = {
+        0: "No reason for fallback",
+        1: "This table is not framed",
+        2: "This object is a view in the lakehouse",
+        3: "The table does not exist in the lakehouse",
+        4: "Transient error",
+        5: "Using OLS will result in fallback to DQ",
+        6: "Using RLS will result in fallback to DQ",
+    }
+    # Create a new column based on the mapping
+    df["Fallback Reason Detail"] = np.vectorize(value_mapping.get)(
+        df["FallbackReasonID"]
+    )
+    return df
+@log
+def generate_direct_lake_semantic_model(
+    dataset: str,
+    lakehouse_tables: Union[str, List[str]],
+    workspace: Optional[str] = None,
+    lakehouse: Optional[str] = None,
+    lakehouse_workspace: Optional[str] = None,
+    overwrite: Optional[bool] = False,
+    refresh: Optional[bool] = True,
+):
+    """
+    Dynamically generates a Direct Lake semantic model based on tables in a Fabric lakehouse.
+    Parameters
+    ----------
+    dataset : str
+        Name of the semantic model to be created.
+    lakehouse_tables : str | List[str]
+        The table(s) within the Fabric lakehouse to add to the semantic model. All columns from these tables will be added to the semantic model.
+    workspace : str, default=None
+        The Fabric workspace name in which the semantic model will reside.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    lakehouse : str, default=None
+        The lakehouse which stores the delta tables which will feed the Direct Lake semantic model.
+        Defaults to None which resolves to the attached lakehouse.
+    lakehouse_workspace : str, default=None
+        The Fabric workspace in which the lakehouse resides.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    overwrite : bool, default=False
+        If set to True, overwrites the existing semantic model if it already exists.
+    refresh: bool, default=True
+        If True, refreshes the newly created semantic model after it is created.
+    Returns
+    -------
+    """
+    from sempy_labs.lakehouse import get_lakehouse_tables, get_lakehouse_columns
+    from sempy_labs import create_blank_semantic_model, refresh_semantic_model
+    from sempy_labs.tom import connect_semantic_model
+    from sempy_labs.directlake import get_shared_expression
+    if isinstance(lakehouse_tables, str):
+        lakehouse_tables = [lakehouse_tables]
+    dfLT = get_lakehouse_tables(lakehouse=lakehouse, workspace=lakehouse_workspace)
+    # Validate lakehouse tables
+    for t in lakehouse_tables:
+        if t not in dfLT["Table Name"].values:
+            raise ValueError(
+                f"{icons.red_dot} The '{t}' table does not exist as a delta table in the '{lakehouse}' within the '{workspace}' workspace."
+            )
+    dfLC = get_lakehouse_columns(lakehouse=lakehouse, workspace=lakehouse_workspace)
+    expr = get_shared_expression(lakehouse=lakehouse, workspace=lakehouse_workspace)
+    dfD = fabric.list_datasets(workspace=workspace)
+    dfD_filt = dfD[dfD["Dataset Name"] == dataset]
+    dfD_filt_len = len(dfD_filt)
+    if dfD_filt_len > 0 and overwrite is False:
+        raise ValueError(
+            f"{icons.red_dot} The '{dataset}' semantic model within the '{workspace}' workspace already exists. Overwrite is set to False so the new semantic model has not been created."
+        )
+    if dfD_filt_len > 0 and overwrite:
+        print(
+            f"{icons.warning} Overwriting the existing '{dataset}' semantic model within the '{workspace}' workspace."
+        )
+    create_blank_semantic_model(dataset=dataset, workspace=workspace)
+    @retry(
+        sleep_time=1,
+        timeout_error_message=f"{icons.red_dot} Function timed out after 1 minute",
+    )
+    def dyn_connect():
+        with connect_semantic_model(
+            dataset=dataset, readonly=True, workspace=workspace
+        ) as tom:
+            tom.model
+    dyn_connect()
+    expression_name = "DatabaseQuery"
+    with connect_semantic_model(
+        dataset=dataset, workspace=workspace, readonly=False
+    ) as tom:
+        if not any(e.Name == expression_name for e in tom.model.Expressions):
+            tom.add_expression(name=expression_name, expression=expr)
+        for t in lakehouse_tables:
+            tom.add_table(name=t)
+            tom.add_entity_partition(table_name=t, entity_name=t)
+            dfLC_filt = dfLC[dfLC["Table Name"] == t]
+            for i, r in dfLC_filt.iterrows():
+                lakeCName = r["Column Name"]
+                dType = r["Data Type"]
+                dt = icons.data_type_mapping.get(dType)
+                tom.add_data_column(
+                    table_name=t,
+                    column_name=lakeCName,
+                    source_column=lakeCName,
+                    data_type=dt,
+                )
+    if refresh:
+        refresh_semantic_model(dataset=dataset, workspace=workspace)
+def get_direct_lake_source(
+    dataset: str, workspace: Optional[str] = None
+) -> Tuple[str, str, UUID, UUID]:
+    """
+    Obtains the source information for a direct lake semantic model.
+    Parameters
+    ----------
+    dataset : str
+        The name of the semantic model.
+    workspace : str, default=None
+        The Fabric workspace name.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    Returns
+    -------
+    Tuple[str, str, UUID, UUID]
+        If the source of the direct lake semantic model is a lakehouse this will return: 'Lakehouse', Lakehouse Name, SQL Endpoint Id, Workspace Id
+        If the source of the direct lake semantic model is a warehouse this will return: 'Warehouse', Warehouse Name, Warehouse Id, Workspace Id
+        If the semantic model is not a Direct Lake semantic model, it will return None, None, None.
+    """
+    workspace = fabric.resolve_workspace_name(workspace)
+    dataset_id = resolve_dataset_id(dataset, workspace)
+    client = fabric.PowerBIRestClient()
+    request_body = {
+        "artifacts": [
+            {
+                "objectId": dataset_id,
+                "type": "dataset",
+            }
+        ]
+    }
+    response = client.post(
+        "metadata/relations/upstream?apiVersion=3", json=request_body
+    )
+    artifacts = response.json().get("artifacts", [])
+    sql_id, sql_object_name, sql_workspace_id, artifact_type = None, None, None, None
+    for artifact in artifacts:
+        object_type = artifact.get("typeName")
+        display_name = artifact.get("displayName")
+        if object_type in ["Datawarehouse", "Lakewarehouse"]:
+            artifact_type = (
+                "Warehouse" if object_type == "Datawarehouse" else "Lakehouse"
+            )
+            sql_id = artifact.get("objectId")
+            sql_workspace_id = artifact.get("workspace", {}).get("objectId")
+            sql_object_name = display_name
+            break
+    return artifact_type, sql_object_name, sql_id, sql_workspace_id

sempy_labs/directlake/_get_directlake_lakehouse.py CHANGED Viewed

@@ -6,7 +6,6 @@ from sempy_labs._helper_functions import (
 )
 from typing import Optional, Tuple
 from uuid import UUID
-import sempy_labs._icons as icons
 def get_direct_lake_lakehouse(
@@ -49,13 +48,13 @@ def get_direct_lake_lakehouse(
         lakehouse_id = fabric.get_lakehouse_id()
         lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace)
-    dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
-    dfP_filt = dfP[dfP["Mode"] == "DirectLake"]
+    # dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
+    # dfP_filt = dfP[dfP["Mode"] == "DirectLake"]
-    if len(dfP_filt) == 0:
-        raise ValueError(
-            f"{icons.red_dot} The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode."
-        )
+    # if len(dfP_filt) == 0:
+    #    raise ValueError(
+    #        f"{icons.red_dot} The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode."
+    #    )
     sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace)

sempy_labs/directlake/_get_shared_expression.py CHANGED Viewed

@@ -7,7 +7,7 @@ import sempy_labs._icons as icons
 def get_shared_expression(
     lakehouse: Optional[str] = None, workspace: Optional[str] = None
-):
+) -> str:
     """
     Dynamically generates the M expression used by a Direct Lake model for a given lakehouse.

sempy_labs/directlake/_guardrails.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import sempy.fabric as fabric
 import pandas as pd
 from typing import Optional
+import sempy_labs._icons as icons
 def get_direct_lake_guardrails() -> pd.DataFrame:
@@ -27,14 +28,14 @@ def get_direct_lake_guardrails() -> pd.DataFrame:
     return df
-def get_sku_size(workspace: Optional[str] = None):
+def get_sku_size(workspace: Optional[str] = None) -> str:
     """
     Shows the SKU size for a workspace.
     Parameters
     ----------
     workspace : str, default=None
-        The Fabric workspace.
+        The Fabric workspace name.
         Defaults to None which resolves to the workspace of the attached lakehouse
         or if no lakehouse attached, resolves to the workspace of the notebook.
@@ -46,18 +47,21 @@ def get_sku_size(workspace: Optional[str] = None):
     workspace = fabric.resolve_workspace_name(workspace)
+    dfW = fabric.list_workspaces(filter=f"name eq '{workspace}'")
+    if len(dfW) == 0:
+        raise ValueError(f"{icons.red_dot} The '{workspace}' is not a valid workspace.")
+    capacity_id = dfW["Capacity Id"].iloc[0]
     dfC = fabric.list_capacities()
-    dfW = fabric.list_workspaces().sort_values(by="Name", ascending=True)
-    dfC.rename(columns={"Id": "Capacity Id"}, inplace=True)
-    dfCW = pd.merge(
-        dfW,
-        dfC[["Capacity Id", "Sku", "Region", "State"]],
-        on="Capacity Id",
-        how="inner",
-    )
-    sku_value = dfCW.loc[dfCW["Name"] == workspace, "Sku"].iloc[0]
-    return sku_value
+    dfC_filt = dfC[dfC["Id"] == capacity_id]
+    if len(dfC_filt) == 0:
+        raise ValueError(
+            f"{icons.red_dot} The '{capacity_id}' Id is not a valid capacity Id."
+        )
+    return dfC_filt["Sku"].iloc[0]
 def get_directlake_guardrails_for_sku(sku_size: str) -> pd.DataFrame:

sempy_labs/directlake/_update_directlake_partition_entity.py CHANGED Viewed

@@ -1,7 +1,8 @@
+import sempy
 import sempy.fabric as fabric
 from sempy_labs.tom import connect_semantic_model
-from sempy_labs._helper_functions import resolve_lakehouse_name
 from sempy_labs._refresh_semantic_model import refresh_semantic_model
+from sempy_labs.directlake._dl_helper import get_direct_lake_source
 from typing import List, Optional, Union
 import sempy_labs._icons as icons
@@ -11,8 +12,7 @@ def update_direct_lake_partition_entity(
     table_name: Union[str, List[str]],
     entity_name: Union[str, List[str]],
     workspace: Optional[str] = None,
-    lakehouse: Optional[str] = None,
-    lakehouse_workspace: Optional[str] = None,
+    **kwargs,
 ):
     """
     Remaps a table (or tables) in a Direct Lake semantic model to a table in a lakehouse.
@@ -29,23 +29,30 @@ def update_direct_lake_partition_entity(
         The Fabric workspace name in which the semantic model exists.
         Defaults to None which resolves to the workspace of the attached lakehouse
         or if no lakehouse attached, resolves to the workspace of the notebook.
-    lakehouse : str, default=None
-        The Fabric lakehouse used by the Direct Lake semantic model.
-        Defaults to None which resolves to the lakehouse attached to the notebook.
-    lakehouse_workspace : str, default=None
-        The Fabric workspace used by the lakehouse.
-        Defaults to None which resolves to the workspace of the attached lakehouse
-        or if no lakehouse attached, resolves to the workspace of the notebook.
     """
+    if "lakehouse" in kwargs:
+        print(
+            "The 'lakehouse' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
+        )
+        del kwargs["lakehouse"]
+    if "lakehouse_workspace" in kwargs:
+        print(
+            "The 'lakehouse_workspace' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
+        )
+        del kwargs["lakehouse_workspace"]
     workspace = fabric.resolve_workspace_name(workspace)
-    if lakehouse_workspace is None:
-        lakehouse_workspace = workspace
+    artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
+        get_direct_lake_source(dataset=dataset, workspace=workspace)
+    )
-    if lakehouse is None:
-        lakehouse_id = fabric.get_lakehouse_id()
-        lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace)
+    if artifact_type == "Warehouse":
+        raise ValueError(
+            f"{icons.red_dot} This function is only valid for Direct Lake semantic models which source from lakehouses, not warehouses."
+        )
+    lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
     # Support both str & list types
     if isinstance(table_name, str):
@@ -70,7 +77,7 @@ def update_direct_lake_partition_entity(
         for tName in table_name:
             i = table_name.index(tName)
             eName = entity_name[i]
-            part_name = (
+            part_name = next(
                 p.Name
                 for t in tom.model.Tables
                 for p in t.Partitions
@@ -85,7 +92,7 @@ def update_direct_lake_partition_entity(
                 tom.model.Tables[tName].Partitions[part_name].EntityName = eName
                 print(
                     f"{icons.green_dot} The '{tName}' table in the '{dataset}' semantic model has been updated to point to the '{eName}' table "
-                    f"in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
+                    f"in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
                 )
@@ -93,7 +100,8 @@ def add_table_to_direct_lake_semantic_model(
     dataset: str,
     table_name: str,
     lakehouse_table_name: str,
-    workspace: Optional[str | None] = None,
+    refresh: Optional[bool] = True,
+    workspace: Optional[str] = None,
 ):
     """
     Adds a table and all of its columns to a Direct Lake semantic model, based on a Fabric lakehouse table.
@@ -106,6 +114,8 @@ def add_table_to_direct_lake_semantic_model(
         Name of the table in the semantic model.
     lakehouse_table_name : str
         The name of the Fabric lakehouse table.
+    refresh : bool, default=True
+        Refreshes the table after it is added to the semantic model.
     workspace : str, default=None
         The name of the Fabric workspace in which the semantic model resides.
         Defaults to None which resolves to the workspace of the attached lakehouse
@@ -115,22 +125,33 @@ def add_table_to_direct_lake_semantic_model(
     -------
     """
+    sempy.fabric._client._utils._init_analysis_services()
     import Microsoft.AnalysisServices.Tabular as TOM
     from sempy_labs.lakehouse._get_lakehouse_columns import get_lakehouse_columns
     from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
-    from sempy_labs.directlake._get_directlake_lakehouse import (
-        get_direct_lake_lakehouse,
-    )
     workspace = fabric.resolve_workspace_name(workspace)
+    artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
+        get_direct_lake_source(dataset=dataset, workspace=workspace)
+    )
+    if artifact_type == "Warehouse":
+        raise ValueError(
+            f"{icons.red_dot} This function is only valid for Direct Lake semantic models which source from Fabric lakehouses (not warehouses)."
+        )
+    lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
     with connect_semantic_model(
         dataset=dataset, readonly=False, workspace=workspace
     ) as tom:
-        if tom.is_direct_lake() is False:
+        table_count = tom.model.Tables.Count
+        if tom.is_direct_lake() is False and table_count > 0:
             raise ValueError(
-                "This function is only valid for Direct Lake semantic models."
+                "This function is only valid for Direct Lake semantic models or semantic models with no tables."
             )
         if any(
@@ -154,19 +175,19 @@ def add_table_to_direct_lake_semantic_model(
                 f"The '{table_name}' table already exists in the '{dataset}' semantic model within the '{workspace}' workspace."
             )
-        lake_name, lake_id = get_direct_lake_lakehouse(
-            dataset=dataset, workspace=workspace
+        dfL = get_lakehouse_tables(
+            lakehouse=lakehouse_name, workspace=lakehouse_workspace
         )
-        dfL = get_lakehouse_tables(lakehouse=lake_name, workspace=workspace)
         dfL_filt = dfL[dfL["Table Name"] == lakehouse_table_name]
         if len(dfL_filt) == 0:
             raise ValueError(
-                f"The '{lakehouse_table_name}' table does not exist in the '{lake_name}' lakehouse within the '{workspace}' workspace."
+                f"The '{lakehouse_table_name}' table does not exist in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
             )
-        dfLC = get_lakehouse_columns(lakehouse=lake_name, workspace=workspace)
+        dfLC = get_lakehouse_columns(
+            lakehouse=lakehouse_name, workspace=lakehouse_workspace
+        )
         dfLC_filt = dfLC[dfLC["Table Name"] == lakehouse_table_name]
         tom.add_table(name=table_name)
@@ -194,4 +215,7 @@ def add_table_to_direct_lake_semantic_model(
                 f"{icons.green_dot} The '{lakeCName}' column has been added to the '{table_name}' table as a '{dt}' data type in the '{dataset}' semantic model within the '{workspace}' workspace."
             )
-        refresh_semantic_model(dataset=dataset, tables=table_name, workspace=workspace)
+        if refresh:
+            refresh_semantic_model(
+                dataset=dataset, tables=table_name, workspace=workspace
+            )

sempy_labs/directlake/_warm_cache.py CHANGED Viewed

@@ -17,7 +17,7 @@ def warm_direct_lake_cache_perspective(
     perspective: str,
     add_dependencies: Optional[bool] = False,
     workspace: Optional[str] = None,
-):
+) -> pd.DataFrame:
     """
     Warms the cache of a Direct Lake semantic model by running a simple DAX query against the columns in a perspective.

sempy_labs/lakehouse/_get_lakehouse_tables.py CHANGED Viewed

@@ -16,6 +16,7 @@ from sempy_labs.lakehouse._lakehouse import lakehouse_attached
 from typing import Optional
 import sempy_labs._icons as icons
 from sempy._utils._log import log
+from sempy.fabric.exceptions import FabricHTTPException
 @log
@@ -51,6 +52,8 @@ def get_lakehouse_tables(
         Shows the tables/columns within a lakehouse and their properties.
     """
+    from sempy_labs._helper_functions import pagination
     df = pd.DataFrame(
         columns=[
             "Workspace Name",
@@ -73,34 +76,52 @@ def get_lakehouse_tables(
     if count_rows:  # Setting countrows defaults to extended=True
         extended = True
+    if (
+        workspace_id != fabric.get_workspace_id()
+        and lakehouse_id != fabric.get_lakehouse_id()
+        and count_rows
+    ):
+        raise ValueError(
+            f"{icons.red_dot} If 'count_rows' is set to True, you must run this function against the default lakehouse attached to the notebook. "
+            "Count rows runs a spark query and cross-workspace spark queries are currently not supported."
+        )
     client = fabric.FabricRestClient()
     response = client.get(
         f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables"
     )
-    for i in response.json()["data"]:
-        tName = i["name"]
-        tType = i["type"]
-        tFormat = i["format"]
-        tLocation = i["location"]
-        if not extended:
+    if response.status_code != 200:
+        raise FabricHTTPException(response)
+    responses = pagination(client, response)
+    dfs = []
+    for r in responses:
+        for i in r.get("data", []):
             new_data = {
                 "Workspace Name": workspace,
                 "Lakehouse Name": lakehouse,
-                "Table Name": tName,
-                "Format": tFormat,
-                "Type": tType,
-                "Location": tLocation,
+                "Table Name": i.get("name"),
+                "Format": i.get("format"),
+                "Type": i.get("type"),
+                "Location": i.get("location"),
             }
-            df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
-        else:
-            sku_value = get_sku_size(workspace)
-            guardrail = get_directlake_guardrails_for_sku(sku_value)
-            spark = SparkSession.builder.getOrCreate()
+            dfs.append(pd.DataFrame(new_data, index=[0]))
+    df = pd.concat(dfs, ignore_index=True)
-            intColumns = ["Files", "Row Groups", "Table Size"]
-            if tType == "Managed" and tFormat == "delta":
+    if extended:
+        sku_value = get_sku_size(workspace)
+        guardrail = get_directlake_guardrails_for_sku(sku_value)
+        spark = SparkSession.builder.getOrCreate()
+        df["Files"] = None
+        df["Row Groups"] = None
+        df["Table Size"] = None
+        if count_rows:
+            df["Row Count"] = None
+        for i, r in df.iterrows():
+            tName = r["Table Name"]
+            if r["Type"] == "Managed" and r["Format"] == "delta":
                 detail_df = spark.sql(f"DESCRIBE DETAIL `{tName}`").collect()[0]
                 num_files = detail_df.numFiles
                 size_in_bytes = detail_df.sizeInBytes
@@ -120,60 +141,31 @@ def get_lakehouse_tables(
                         ).num_row_groups
                     except FileNotFoundError:
                         continue
-                if count_rows:
-                    num_rows = spark.table(tName).count()
-                    intColumns.append("Row Count")
-                    new_data = {
-                        "Workspace Name": workspace,
-                        "Lakehouse Name": lakehouse,
-                        "Table Name": tName,
-                        "Format": tFormat,
-                        "Type": tType,
-                        "Location": tLocation,
-                        "Files": num_files,
-                        "Row Groups": num_rowgroups,
-                        "Row Count": num_rows,
-                        "Table Size": size_in_bytes,
-                    }
-                else:
-                    new_data = {
-                        "Workspace Name": workspace,
-                        "Lakehouse Name": lakehouse,
-                        "Table Name": tName,
-                        "Format": tFormat,
-                        "Type": tType,
-                        "Location": tLocation,
-                        "Files": num_files,
-                        "Row Groups": num_rowgroups,
-                        "Table Size": size_in_bytes,
-                    }
-                df = pd.concat(
-                    [df, pd.DataFrame(new_data, index=[0])], ignore_index=True
-                )
-                df[intColumns] = df[intColumns].astype(int)
-            df["SKU"] = guardrail["Fabric SKUs"].iloc[0]
-            df["Parquet File Guardrail"] = guardrail["Parquet files per table"].iloc[0]
-            df["Row Group Guardrail"] = guardrail["Row groups per table"].iloc[0]
-            df["Row Count Guardrail"] = (
-                guardrail["Rows per table (millions)"].iloc[0] * 1000000
-            )
-            df["Parquet File Guardrail Hit"] = (
-                df["Files"] > df["Parquet File Guardrail"]
-            )
-            df["Row Group Guardrail Hit"] = df["Row Groups"] > df["Row Group Guardrail"]
+                df.at[i, "Files"] = num_files
+                df.at[i, "Row Groups"] = num_rowgroups
+                df.at[i, "Table Size"] = size_in_bytes
             if count_rows:
-                df["Row Count Guardrail Hit"] = (
-                    df["Row Count"] > df["Row Count Guardrail"]
-                )
+                num_rows = spark.table(tName).count()
+                df.at[i, "Row Count"] = num_rows
+    if extended:
+        intColumns = ["Files", "Row Groups", "Table Size"]
+        df[intColumns] = df[intColumns].astype(int)
+        df["SKU"] = guardrail["Fabric SKUs"].iloc[0]
+        df["Parquet File Guardrail"] = guardrail["Parquet files per table"].iloc[0]
+        df["Row Group Guardrail"] = guardrail["Row groups per table"].iloc[0]
+        df["Row Count Guardrail"] = (
+            guardrail["Rows per table (millions)"].iloc[0] * 1000000
+        )
+        df["Parquet File Guardrail Hit"] = df["Files"] > df["Parquet File Guardrail"]
+        df["Row Group Guardrail Hit"] = df["Row Groups"] > df["Row Group Guardrail"]
+    if count_rows:
+        df["Row Count"] = df["Row Count"].astype(int)
+        df["Row Count Guardrail Hit"] = df["Row Count"] > df["Row Count Guardrail"]
     if export:
-        lakeAttach = lakehouse_attached()
-        if lakeAttach is False:
+        if not lakehouse_attached():
             raise ValueError(
                 f"{icons.red_dot} In order to save the report.json file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
             )

semantic-link-labs 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

Potentially problematic release.

semantic-link-labs 0.6.0py3-none-any.whl → 0.7.0py3-none-any.whl