PyPI - semantic-link-labs - Versions diffs - 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

semantic-link-labs 0.5.0py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (113) hide show

sempy_labs/_vertipaq.py CHANGED Viewed

@@ -1,28 +1,34 @@
-import sempy
 import sempy.fabric as fabric
 import pandas as pd
 from IPython.display import display, HTML
-import zipfile, os, shutil, datetime, warnings
+import zipfile
+import os
+import shutil
+import datetime
+import warnings
 from pyspark.sql import SparkSession
 from sempy_labs._helper_functions import (
     format_dax_object_name,
-    get_direct_lake_sql_endpoint,
     resolve_lakehouse_name,
+    resolve_dataset_id,
+    save_as_delta_table,
+    resolve_workspace_capacity,
 )
 from sempy_labs._list_functions import list_relationships
-from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
-from sempy_labs.lakehouse._lakehouse import lakehouse_attached
-from typing import List, Optional, Union
+from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
+from sempy_labs.directlake import get_direct_lake_source
+from typing import Optional
 from sempy._utils._log import log
 import sempy_labs._icons as icons
 @log
 def vertipaq_analyzer(
     dataset: str,
     workspace: Optional[str] = None,
     export: Optional[str] = None,
-    lakehouse_workspace: Optional[str] = None,
     read_stats_from_data: Optional[bool] = False,
+    **kwargs,
 ):
     """
     Displays an HTML visualization of the Vertipaq Analyzer statistics from a semantic model.
@@ -39,10 +45,6 @@ def vertipaq_analyzer(
         Specifying 'zip' will export the results to a zip file in your lakehouse (which can be imported using the import_vertipaq_analyzer function.
         Specifying 'table' will export the results to delta tables (appended) in your lakehouse.
         Default value: None.
-    lakehouse_workspace : str, default=None
-        The Fabric workspace used by the lakehouse (for Direct Lake semantic models).
-        Defaults to None which resolves to the workspace of the attached lakehouse
-        or if no lakehouse attached, resolves to the workspace of the notebook.
     read_stats_from_data : bool, default=False
         Setting this parameter to true has the function get Column Cardinality and Missing Rows using DAX (Direct Lake semantic models achieve this using a Spark query to the lakehouse).
@@ -51,6 +53,14 @@ def vertipaq_analyzer(
     """
+    from sempy_labs.tom import connect_semantic_model
+    if "lakehouse_workspace" in kwargs:
+        print(
+            f"{icons.info} The 'lakehouse_workspace' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
+        )
+        del kwargs["lakehouse_workspace"]
     pd.options.mode.copy_on_write = True
     warnings.filterwarnings(
         "ignore", message="createDataFrame attempted Arrow optimization*"
@@ -58,9 +68,6 @@ def vertipaq_analyzer(
     workspace = fabric.resolve_workspace_name(workspace)
-    if lakehouse_workspace is None:
-        lakehouse_workspace = workspace
     dfT = fabric.list_tables(dataset=dataset, extended=True, workspace=workspace)
     dfT.rename(columns={"Name": "Table Name"}, inplace=True)
     dfC = fabric.list_columns(dataset=dataset, extended=True, workspace=workspace)
@@ -71,18 +78,24 @@ def vertipaq_analyzer(
     dfR["From Object"] = format_dax_object_name(dfR["From Table"], dfR["From Column"])
     dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"])
     dfP = fabric.list_partitions(dataset=dataset, extended=True, workspace=workspace)
-    dfD = fabric.list_datasets(
-        workspace=workspace,
-        additional_xmla_properties=["CompatibilityLevel", "Model.DefaultMode"],
+    artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
+        get_direct_lake_source(dataset=dataset, workspace=workspace)
     )
-    dfD = dfD[dfD["Dataset Name"] == dataset]
-    dfD["Compatibility Level"] = dfD["Compatibility Level"].astype(int)
-    isDirectLake = any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows())
+    with connect_semantic_model(
+        dataset=dataset, readonly=True, workspace=workspace
+    ) as tom:
+        compat_level = tom.model.Model.Database.CompatibilityLevel
+        is_direct_lake = tom.is_direct_lake()
+        def_mode = tom.model.DefaultMode
+        table_count = tom.model.Tables.Count
+        column_count = len(list(tom.all_columns()))
     dfR["Missing Rows"] = None
     # Direct Lake
     if read_stats_from_data:
-        if isDirectLake:
+        if is_direct_lake and artifact_type == "Lakehouse":
             dfC = pd.merge(
                 dfC,
                 dfP[["Table Name", "Query", "Source Type"]],
@@ -93,66 +106,54 @@ def vertipaq_analyzer(
                 (dfC["Source Type"] == "Entity")
                 & (~dfC["Column Name"].str.startswith("RowNumber-"))
             ]
-            sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace)
-            # Get lakehouse name from SQL Endpoint ID
-            dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint")
-            dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)]
-            if len(dfI_filt) == 0:
-                raise ValueError(f"{icons.red_dot} The lakehouse (SQL Endpoint) used by the '{dataset}' semantic model does not reside in the '{lakehouse_workspace}' workspace. Please update the lakehouse_workspace parameter.")
-            else:
-                lakehouseName = dfI_filt["Display Name"].iloc[0]
-                current_workspace_id = fabric.get_workspace_id()
-                current_workspace = fabric.resolve_workspace_name(current_workspace_id)
-                if current_workspace != lakehouse_workspace:
-                    lakeTables = get_lakehouse_tables(
-                        lakehouse=lakehouseName, workspace=lakehouse_workspace
-                    )
-                sql_statements = []
-                spark = SparkSession.builder.getOrCreate()
-                # Loop through tables
-                for lakeTName in dfC_flt["Query"].unique():
-                    query = "SELECT "
-                    columns_in_table = dfC_flt.loc[
-                        dfC_flt["Query"] == lakeTName, "Source"
-                    ].unique()
-                    # Loop through columns within those tables
-                    for scName in columns_in_table:
-                        query = query + f"COUNT(DISTINCT({scName})) AS {scName}, "
-                    query = query[:-2]
-                    if lakehouse_workspace == current_workspace:
-                        query = query + f" FROM {lakehouseName}.{lakeTName}"
-                    else:
-                        lakeTables_filt = lakeTables[
-                            lakeTables["Table Name"] == lakeTName
-                        ]
-                        tPath = lakeTables_filt["Location"].iloc[0]
-                        df = spark.read.format("delta").load(tPath)
-                        tempTableName = "delta_table_" + lakeTName
-                        df.createOrReplaceTempView(tempTableName)
-                        query = query + f" FROM {tempTableName}"
-                    sql_statements.append((lakeTName, query))
-                for o in sql_statements:
-                    tName = o[0]
-                    query = o[1]
-                    df = spark.sql(query)
-                    for column in df.columns:
-                        x = df.collect()[0][column]
-                        for i, r in dfC.iterrows():
-                            if r["Query"] == tName and r["Source"] == column:
-                                dfC.at[i, "Cardinality"] = x
-                # Remove column added temporarily
-                dfC.drop(columns=["Query", "Source Type"], inplace=True)
+            object_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
+            current_workspace_id = fabric.get_workspace_id()
+            if current_workspace_id != lakehouse_workspace_id:
+                lakeTables = get_lakehouse_tables(
+                    lakehouse=lakehouse_name, workspace=object_workspace
+                )
+            sql_statements = []
+            spark = SparkSession.builder.getOrCreate()
+            # Loop through tables
+            for lakeTName in dfC_flt["Query"].unique():
+                query = "SELECT "
+                columns_in_table = dfC_flt.loc[
+                    dfC_flt["Query"] == lakeTName, "Source"
+                ].unique()
+                # Loop through columns within those tables
+                for scName in columns_in_table:
+                    query = query + f"COUNT(DISTINCT(`{scName}`)) AS `{scName}`, "
+                query = query[:-2]
+                if lakehouse_workspace_id == current_workspace_id:
+                    query = query + f" FROM {lakehouse_name}.{lakeTName}"
+                else:
+                    lakeTables_filt = lakeTables[lakeTables["Table Name"] == lakeTName]
+                    tPath = lakeTables_filt["Location"].iloc[0]
+                    df = spark.read.format("delta").load(tPath)
+                    tempTableName = "delta_table_" + lakeTName
+                    df.createOrReplaceTempView(tempTableName)
+                    query = query + f" FROM {tempTableName}"
+                sql_statements.append((lakeTName, query))
+            for o in sql_statements:
+                tName = o[0]
+                query = o[1]
+                df = spark.sql(query)
+                for column in df.columns:
+                    x = df.collect()[0][column]
+                    for i, r in dfC.iterrows():
+                        if r["Query"] == tName and r["Source"] == column:
+                            dfC.at[i, "Cardinality"] = x
+            # Remove column added temporarily
+            dfC.drop(columns=["Query", "Source Type"], inplace=True)
             # Direct Lake missing rows
             dfR = pd.merge(
@@ -199,11 +200,11 @@ def vertipaq_analyzer(
                 toTable = r["To Lake Table"]
                 toColumn = r["To Lake Column"]
-                if lakehouse_workspace == current_workspace:
+                if lakehouse_workspace_id == current_workspace_id:
                     query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {fromTable} as f\nleft join {toTable} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null"
                 else:
-                    tempTableFrom = "delta_table_" + fromTable
-                    tempTableTo = "delta_table_" + toTable
+                    tempTableFrom = f"delta_table_{fromTable}"
+                    tempTableTo = f"delta_table_{toTable}"
                     query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {tempTableFrom} as f\nleft join {tempTableTo} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null"
@@ -214,7 +215,7 @@ def vertipaq_analyzer(
                 dfR.at[i, "Missing Rows"] = missingRows
             dfR["Missing Rows"] = dfR["Missing Rows"].astype(int)
-        else:
+        elif not is_direct_lake:
             # Calculate missing rows using DAX for non-direct lake
             for i, r in dfR.iterrows():
                 fromTable = r["From Table"]
@@ -238,7 +239,7 @@ def vertipaq_analyzer(
                 try:
                     missingRows = result.iloc[0, 0]
-                except:
+                except Exception:
                     pass
                 dfR.at[i, "Missing Rows"] = missingRows
@@ -308,7 +309,6 @@ def vertipaq_analyzer(
     )
     dfTable = pd.merge(dfTable, dfTP, on="Table Name", how="left")
     dfTable = pd.merge(dfTable, dfTC, on="Table Name", how="left")
-    dfTable = dfTable.drop_duplicates()  # Drop duplicates (temporary)
     dfTable = dfTable.sort_values(by="Total Size", ascending=False)
     dfTable.reset_index(drop=True, inplace=True)
     export_Table = dfTable.copy()
@@ -318,7 +318,7 @@ def vertipaq_analyzer(
     pctList = ["% DB"]
     dfTable[pctList] = dfTable[pctList].applymap("{:.2f}%".format)
-    ## Relationships
+    #  Relationships
     # dfR.drop(columns=['Max From Cardinality', 'Max To Cardinality'], inplace=True)
     dfR = pd.merge(
         dfR,
@@ -359,12 +359,17 @@ def vertipaq_analyzer(
         intList.remove("Missing Rows")
     dfR[intList] = dfR[intList].applymap("{:,}".format)
-    ## Partitions
+    # Partitions
     dfP = dfP[
-        ["Table Name", "Partition Name", "Mode", "Record Count", "Segment Count"]
-    ].sort_values(
-        by="Record Count", ascending=False
-    )  # , 'Records per Segment'
+        [
+            "Table Name",
+            "Partition Name",
+            "Mode",
+            "Record Count",
+            "Segment Count",
+            # "Records per Segment",
+        ]
+    ].sort_values(by="Record Count", ascending=False)
     dfP["Records per Segment"] = round(
         dfP["Record Count"] / dfP["Segment Count"], 2
     )  # Remove after records per segment is fixed
@@ -373,17 +378,19 @@ def vertipaq_analyzer(
     intList = ["Record Count", "Segment Count", "Records per Segment"]
     dfP[intList] = dfP[intList].applymap("{:,}".format)
-    ## Hierarchies
+    # Hierarchies
     dfH_filt = dfH[dfH["Level Ordinal"] == 0]
     dfH_filt = dfH_filt[["Table Name", "Hierarchy Name", "Used Size"]].sort_values(
         by="Used Size", ascending=False
     )
     dfH_filt.reset_index(drop=True, inplace=True)
+    dfH_filt.fillna({"Used Size": 0}, inplace=True)
+    dfH_filt["Used Size"] = dfH_filt["Used Size"].astype(int)
     export_Hier = dfH_filt.copy()
     intList = ["Used Size"]
     dfH_filt[intList] = dfH_filt[intList].applymap("{:,}".format)
-    ## Model
+    # Model
     if total_size >= 1000000000:
         y = total_size / (1024**3) * 1000000000
     elif total_size >= 1000000:
@@ -392,23 +399,19 @@ def vertipaq_analyzer(
         y = total_size / (1024) * 1000
     y = round(y)
-    tblCount = len(dfT)
-    colCount = len(dfC_filt)
-    compatLevel = dfD["Compatibility Level"].iloc[0]
-    defMode = dfD["Model Default Mode"].iloc[0]
     dfModel = pd.DataFrame(
         {
             "Dataset Name": dataset,
             "Total Size": y,
-            "Table Count": tblCount,
-            "Column Count": colCount,
-            "Compatibility Level": compatLevel,
-            "Default Mode": defMode,
+            "Table Count": table_count,
+            "Column Count": column_count,
+            "Compatibility Level": compat_level,
+            "Default Mode": def_mode,
         },
         index=[0],
     )
     dfModel.reset_index(drop=True, inplace=True)
+    dfModel["Default Mode"] = dfModel["Default Mode"].astype(str)
     export_Model = dfModel.copy()
     intList = ["Total Size", "Table Count", "Column Count"]
     dfModel[intList] = dfModel[intList].applymap("{:,}".format)
@@ -429,22 +432,24 @@ def vertipaq_analyzer(
     visualize_vertipaq(dfs)
-    ### Export vertipaq to delta tables in lakehouse
+    # Export vertipaq to delta tables in lakehouse
     if export in ["table", "zip"]:
-        lakeAttach = lakehouse_attached()
-        if lakeAttach is False:
-            raise ValueError(f"{icons.red_dot} In order to save the Vertipaq Analyzer results, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook.")
+        if not lakehouse_attached():
+            raise ValueError(
+                f"{icons.red_dot} In order to save the Vertipaq Analyzer results, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
+            )
     if export == "table":
         spark = SparkSession.builder.getOrCreate()
         lakehouse_id = fabric.get_lakehouse_id()
+        lake_workspace = fabric.resolve_workspace_name()
         lakehouse = resolve_lakehouse_name(
-            lakehouse_id=lakehouse_id, workspace=workspace
+            lakehouse_id=lakehouse_id, workspace=lake_workspace
         )
         lakeTName = "vertipaq_analyzer_model"
-        lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
+        lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=lake_workspace)
         lakeT_filt = lakeT[lakeT["Table Name"] == lakeTName]
         query = f"SELECT MAX(RunId) FROM {lakehouse}.{lakeTName}"
@@ -465,29 +470,52 @@ def vertipaq_analyzer(
             "export_Model": ["Model", export_Model],
         }
-        print(f"{icons.in_progress} Saving Vertipaq Analyzer to delta tables in the lakehouse...\n")
+        print(
+            f"{icons.in_progress} Saving Vertipaq Analyzer to delta tables in the lakehouse...\n"
+        )
         now = datetime.datetime.now()
+        dfD = fabric.list_datasets(workspace=workspace, mode="rest")
+        dfD_filt = dfD[dfD["Dataset Name"] == dataset]
+        configured_by = dfD_filt["Configured By"].iloc[0]
+        capacity_id, capacity_name = resolve_workspace_capacity(workspace=workspace)
         for key, (obj, df) in dfMap.items():
-            df["Timestamp"] = now
+            df["Capacity Name"] = capacity_name
+            df["Capacity Id"] = capacity_id
+            df["Configured By"] = configured_by
             df["Workspace Name"] = workspace
+            df["Workspace Id"] = fabric.resolve_workspace_id(workspace)
             df["Dataset Name"] = dataset
+            df["Dataset Id"] = resolve_dataset_id(dataset, workspace)
             df["RunId"] = runId
+            df["Timestamp"] = now
-            colName = "Workspace Name"
+            colName = "Capacity Name"
             df.insert(0, colName, df.pop(colName))
-            colName = "Dataset Name"
+            colName = "Capacity Id"
             df.insert(1, colName, df.pop(colName))
+            colName = "Workspace Name"
+            df.insert(2, colName, df.pop(colName))
+            colName = "Workspace Id"
+            df.insert(3, colName, df.pop(colName))
+            colName = "Dataset Name"
+            df.insert(4, colName, df.pop(colName))
+            colName = "Dataset Id"
+            df.insert(5, colName, df.pop(colName))
+            colName = "Configured By"
+            df.insert(6, colName, df.pop(colName))
             df.columns = df.columns.str.replace(" ", "_")
             delta_table_name = f"VertipaqAnalyzer_{obj}".lower()
-            spark_df = spark.createDataFrame(df)
-            spark_df.write.mode("append").format("delta").saveAsTable(delta_table_name)
-            print(
-                f"{icons.bullet} Vertipaq Analyzer results for '{obj}' have been appended to the '{delta_table_name}' delta table."
+            save_as_delta_table(
+                dataframe=df,
+                delta_table_name=delta_table_name,
+                write_mode="append",
+                merge_schema=True,
             )
-    ### Export vertipaq to zip file within the lakehouse
+    # Export vertipaq to zip file within the lakehouse
     if export == "zip":
         dataFrames = {
             "dfModel": dfModel,
@@ -510,13 +538,13 @@ def vertipaq_analyzer(
         # Create CSV files based on dataframes
         for fileName, df in dataFrames.items():
-            filePath = os.path.join(subFolderPath, fileName + ext)
+            filePath = os.path.join(subFolderPath, f"{fileName}{ext}")
             df.to_csv(filePath, index=False)
         # Create a zip file and add CSV files to it
         with zipfile.ZipFile(zipFilePath, "w") as zipf:
             for fileName in dataFrames:
-                filePath = os.path.join(subFolderPath, fileName + ext)
+                filePath = os.path.join(subFolderPath, f"{fileName}{ext}")
                 zipf.write(filePath, os.path.basename(filePath))
         # Clean up: remove the individual CSV files
@@ -525,7 +553,8 @@ def vertipaq_analyzer(
             if os.path.exists(filePath):
                 os.remove(filePath)
         print(
-            f"{icons.green_dot} The Vertipaq Analyzer info for the '{dataset}' semantic model in the '{workspace}' workspace has been saved to the 'Vertipaq Analyzer/{zipFileName}' in the default lakehouse attached to this notebook."
+            f"{icons.green_dot} The Vertipaq Analyzer info for the '{dataset}' semantic model in the '{workspace}' workspace has been saved "
+            f"to the 'Vertipaq Analyzer/{zipFileName}' in the default lakehouse attached to this notebook."
         )
@@ -832,7 +861,7 @@ def visualize_vertipaq(dataframes):
                     (tooltipDF["ViewName"] == vw) & (tooltipDF["ColumnName"] == col)
                 ]
                 tt = tooltipDF_filt["Tooltip"].iloc[0]
-            except:
+            except Exception:
                 pass
             df_html = df_html.replace(f"<th>{col}</th>", f'<th title="{tt}">{col}</th>')
         content_html += (

sempy_labs/directlake/__init__.py CHANGED Viewed

@@ -1,7 +1,9 @@
 from sempy_labs.directlake._directlake_schema_compare import direct_lake_schema_compare
 from sempy_labs.directlake._directlake_schema_sync import direct_lake_schema_sync
-from sempy_labs.directlake._fallback import (
+from sempy_labs.directlake._dl_helper import (
     check_fallback_reason,
+    generate_direct_lake_semantic_model,
+    get_direct_lake_source,
 )
 from sempy_labs.directlake._get_directlake_lakehouse import get_direct_lake_lakehouse
 from sempy_labs.directlake._get_shared_expression import get_shared_expression
@@ -21,6 +23,7 @@ from sempy_labs.directlake._update_directlake_model_lakehouse_connection import
 )
 from sempy_labs.directlake._update_directlake_partition_entity import (
     update_direct_lake_partition_entity,
+    add_table_to_direct_lake_semantic_model,
 )
 from sempy_labs.directlake._warm_cache import (
     warm_direct_lake_cache_isresident,
@@ -42,4 +45,7 @@ __all__ = [
     "update_direct_lake_partition_entity",
     "warm_direct_lake_cache_isresident",
     "warm_direct_lake_cache_perspective",
+    "add_table_to_direct_lake_semantic_model",
+    "generate_direct_lake_semantic_model",
+    "get_direct_lake_source",
 ]

sempy_labs/directlake/_directlake_schema_compare.py CHANGED Viewed

@@ -1,24 +1,22 @@
-import sempy
 import sempy.fabric as fabric
 import pandas as pd
 from sempy_labs._helper_functions import (
     format_dax_object_name,
-    resolve_lakehouse_name,
-    get_direct_lake_sql_endpoint,
 )
 from IPython.display import display
-from sempy_labs.lakehouse._get_lakehouse_columns import get_lakehouse_columns
+from sempy_labs.lakehouse import get_lakehouse_columns
+from sempy_labs.directlake._dl_helper import get_direct_lake_source
 from sempy_labs._list_functions import list_tables
 from typing import Optional
 import sempy_labs._icons as icons
 from sempy._utils._log import log
 @log
 def direct_lake_schema_compare(
     dataset: str,
     workspace: Optional[str] = None,
-    lakehouse: Optional[str] = None,
-    lakehouse_workspace: Optional[str] = None,
+    **kwargs,
 ):
     """
     Checks that all the tables in a Direct Lake semantic model map to tables in their corresponding lakehouse and that the columns in each table exist.
@@ -31,38 +29,41 @@ def direct_lake_schema_compare(
         The Fabric workspace name.
         Defaults to None which resolves to the workspace of the attached lakehouse
         or if no lakehouse attached, resolves to the workspace of the notebook.
-    lakehouse : str, default=None
-        The Fabric lakehouse used by the Direct Lake semantic model.
-        Defaults to None which resolves to the lakehouse attached to the notebook.
-    lakehouse_workspace : str, default=None
-        The Fabric workspace used by the lakehouse.
-        Defaults to None which resolves to the workspace of the attached lakehouse
-        or if no lakehouse attached, resolves to the workspace of the notebook.
     """
+    if "lakehouse" in kwargs:
+        print(
+            "The 'lakehouse' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
+        )
+        del kwargs["lakehouse"]
+    if "lakehouse_workspace" in kwargs:
+        print(
+            "The 'lakehouse_workspace' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
+        )
+        del kwargs["lakehouse_workspace"]
     workspace = fabric.resolve_workspace_name(workspace)
-    if lakehouse_workspace is None:
-        lakehouse_workspace = workspace
+    artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
+        get_direct_lake_source(dataset=dataset, workspace=workspace)
+    )
+    lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
-    if lakehouse is None:
-        lakehouse_id = fabric.get_lakehouse_id()
-        lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace)
+    if artifact_type == "Warehouse":
+        raise ValueError(
+            f"{icons.red_dot} This function is only valid for Direct Lake semantic models which source from Fabric lakehouses (not warehouses)."
+        )
     dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
-    sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace)
-    dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint")
-    dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)]
-    if len(dfI_filt) == 0:
-        raise ValueError(f"{icons.red_dot} The SQL Endpoint in the '{dataset}' semantic model in the '{workspace} workspace does not point to the '{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace as specified.")
     if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()):
-        raise ValueError(f"{icons.red_dot} The '{dataset}' semantic model is not in Direct Lake mode.")
+        raise ValueError(
+            f"{icons.red_dot} The '{dataset}' semantic model is not in Direct Lake mode."
+        )
     dfT = list_tables(dataset, workspace)
     dfC = fabric.list_columns(dataset=dataset, workspace=workspace)
-    lc = get_lakehouse_columns(lakehouse, lakehouse_workspace)
+    lc = get_lakehouse_columns(lakehouse_name, lakehouse_workspace)
     dfT.rename(columns={"Type": "Table Type"}, inplace=True)
     dfP_filt = dfP[dfP["Mode"] == "DirectLake"]
@@ -88,19 +89,21 @@ def direct_lake_schema_compare(
     if len(missingtbls) == 0:
         print(
-            f"{icons.green_dot} All tables exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
+            f"{icons.green_dot} All tables exist in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
         )
     else:
         print(
-            f"{icons.yellow_dot} The following tables exist in the '{dataset}' semantic model within the '{workspace}' workspace but do not exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
+            f"{icons.yellow_dot} The following tables exist in the '{dataset}' semantic model within the '{workspace}' workspace"
+            f" but do not exist in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
         )
         display(missingtbls)
     if len(missingcols) == 0:
         print(
-            f"{icons.green_dot} All columns exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
+            f"{icons.green_dot} All columns exist in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
         )
     else:
         print(
-            f"{icons.yellow_dot} The following columns exist in the '{dataset}' semantic model within the '{workspace}' workspace but do not exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
+            f"{icons.yellow_dot} The following columns exist in the '{dataset}' semantic model within the '{workspace}' workspace "
+            f"but do not exist in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
         )
         display(missingcols)

semantic-link-labs 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

Potentially problematic release.

semantic-link-labs 0.5.0py3-none-any.whl → 0.7.0py3-none-any.whl