PyPI - semantic-link-labs - Versions diffs - 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

semantic-link-labs 0.6.0py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (103) hide show

sempy_labs/_vertipaq.py CHANGED Viewed

@@ -9,12 +9,14 @@ import warnings
 from pyspark.sql import SparkSession
 from sempy_labs._helper_functions import (
     format_dax_object_name,
-    get_direct_lake_sql_endpoint,
     resolve_lakehouse_name,
+    resolve_dataset_id,
+    save_as_delta_table,
+    resolve_workspace_capacity,
 )
 from sempy_labs._list_functions import list_relationships
-from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
-from sempy_labs.lakehouse._lakehouse import lakehouse_attached
+from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
+from sempy_labs.directlake import get_direct_lake_source
 from typing import Optional
 from sempy._utils._log import log
 import sempy_labs._icons as icons
@@ -25,8 +27,8 @@ def vertipaq_analyzer(
     dataset: str,
     workspace: Optional[str] = None,
     export: Optional[str] = None,
-    lakehouse_workspace: Optional[str] = None,
     read_stats_from_data: Optional[bool] = False,
+    **kwargs,
 ):
     """
     Displays an HTML visualization of the Vertipaq Analyzer statistics from a semantic model.
@@ -43,10 +45,6 @@ def vertipaq_analyzer(
         Specifying 'zip' will export the results to a zip file in your lakehouse (which can be imported using the import_vertipaq_analyzer function.
         Specifying 'table' will export the results to delta tables (appended) in your lakehouse.
         Default value: None.
-    lakehouse_workspace : str, default=None
-        The Fabric workspace used by the lakehouse (for Direct Lake semantic models).
-        Defaults to None which resolves to the workspace of the attached lakehouse
-        or if no lakehouse attached, resolves to the workspace of the notebook.
     read_stats_from_data : bool, default=False
         Setting this parameter to true has the function get Column Cardinality and Missing Rows using DAX (Direct Lake semantic models achieve this using a Spark query to the lakehouse).
@@ -57,6 +55,12 @@ def vertipaq_analyzer(
     from sempy_labs.tom import connect_semantic_model
+    if "lakehouse_workspace" in kwargs:
+        print(
+            f"{icons.info} The 'lakehouse_workspace' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
+        )
+        del kwargs["lakehouse_workspace"]
     pd.options.mode.copy_on_write = True
     warnings.filterwarnings(
         "ignore", message="createDataFrame attempted Arrow optimization*"
@@ -64,9 +68,6 @@ def vertipaq_analyzer(
     workspace = fabric.resolve_workspace_name(workspace)
-    if lakehouse_workspace is None:
-        lakehouse_workspace = workspace
     dfT = fabric.list_tables(dataset=dataset, extended=True, workspace=workspace)
     dfT.rename(columns={"Name": "Table Name"}, inplace=True)
     dfC = fabric.list_columns(dataset=dataset, extended=True, workspace=workspace)
@@ -77,6 +78,9 @@ def vertipaq_analyzer(
     dfR["From Object"] = format_dax_object_name(dfR["From Table"], dfR["From Column"])
     dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"])
     dfP = fabric.list_partitions(dataset=dataset, extended=True, workspace=workspace)
+    artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
+        get_direct_lake_source(dataset=dataset, workspace=workspace)
+    )
     with connect_semantic_model(
         dataset=dataset, readonly=True, workspace=workspace
@@ -91,7 +95,7 @@ def vertipaq_analyzer(
     # Direct Lake
     if read_stats_from_data:
-        if is_direct_lake:
+        if is_direct_lake and artifact_type == "Lakehouse":
             dfC = pd.merge(
                 dfC,
                 dfP[["Table Name", "Query", "Source Type"]],
@@ -102,69 +106,54 @@ def vertipaq_analyzer(
                 (dfC["Source Type"] == "Entity")
                 & (~dfC["Column Name"].str.startswith("RowNumber-"))
             ]
-            sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace)
-            # Get lakehouse name from SQL Endpoint ID
-            dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint")
-            dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)]
-            if len(dfI_filt) == 0:
-                raise ValueError(
-                    f"{icons.red_dot} The lakehouse (SQL Endpoint) used by the '{dataset}' semantic model does not reside in the '{lakehouse_workspace}' workspace."
-                    "Please update the lakehouse_workspace parameter."
+            object_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
+            current_workspace_id = fabric.get_workspace_id()
+            if current_workspace_id != lakehouse_workspace_id:
+                lakeTables = get_lakehouse_tables(
+                    lakehouse=lakehouse_name, workspace=object_workspace
                 )
-            else:
-                lakehouseName = dfI_filt["Display Name"].iloc[0]
-                current_workspace_id = fabric.get_workspace_id()
-                current_workspace = fabric.resolve_workspace_name(current_workspace_id)
-                if current_workspace != lakehouse_workspace:
-                    lakeTables = get_lakehouse_tables(
-                        lakehouse=lakehouseName, workspace=lakehouse_workspace
-                    )
-                sql_statements = []
-                spark = SparkSession.builder.getOrCreate()
-                # Loop through tables
-                for lakeTName in dfC_flt["Query"].unique():
-                    query = "SELECT "
-                    columns_in_table = dfC_flt.loc[
-                        dfC_flt["Query"] == lakeTName, "Source"
-                    ].unique()
-                    # Loop through columns within those tables
-                    for scName in columns_in_table:
-                        query = query + f"COUNT(DISTINCT({scName})) AS {scName}, "
-                    query = query[:-2]
-                    if lakehouse_workspace == current_workspace:
-                        query = query + f" FROM {lakehouseName}.{lakeTName}"
-                    else:
-                        lakeTables_filt = lakeTables[
-                            lakeTables["Table Name"] == lakeTName
-                        ]
-                        tPath = lakeTables_filt["Location"].iloc[0]
-                        df = spark.read.format("delta").load(tPath)
-                        tempTableName = "delta_table_" + lakeTName
-                        df.createOrReplaceTempView(tempTableName)
-                        query = query + f" FROM {tempTableName}"
-                    sql_statements.append((lakeTName, query))
-                for o in sql_statements:
-                    tName = o[0]
-                    query = o[1]
-                    df = spark.sql(query)
-                    for column in df.columns:
-                        x = df.collect()[0][column]
-                        for i, r in dfC.iterrows():
-                            if r["Query"] == tName and r["Source"] == column:
-                                dfC.at[i, "Cardinality"] = x
-                # Remove column added temporarily
-                dfC.drop(columns=["Query", "Source Type"], inplace=True)
+            sql_statements = []
+            spark = SparkSession.builder.getOrCreate()
+            # Loop through tables
+            for lakeTName in dfC_flt["Query"].unique():
+                query = "SELECT "
+                columns_in_table = dfC_flt.loc[
+                    dfC_flt["Query"] == lakeTName, "Source"
+                ].unique()
+                # Loop through columns within those tables
+                for scName in columns_in_table:
+                    query = query + f"COUNT(DISTINCT(`{scName}`)) AS `{scName}`, "
+                query = query[:-2]
+                if lakehouse_workspace_id == current_workspace_id:
+                    query = query + f" FROM {lakehouse_name}.{lakeTName}"
+                else:
+                    lakeTables_filt = lakeTables[lakeTables["Table Name"] == lakeTName]
+                    tPath = lakeTables_filt["Location"].iloc[0]
+                    df = spark.read.format("delta").load(tPath)
+                    tempTableName = "delta_table_" + lakeTName
+                    df.createOrReplaceTempView(tempTableName)
+                    query = query + f" FROM {tempTableName}"
+                sql_statements.append((lakeTName, query))
+            for o in sql_statements:
+                tName = o[0]
+                query = o[1]
+                df = spark.sql(query)
+                for column in df.columns:
+                    x = df.collect()[0][column]
+                    for i, r in dfC.iterrows():
+                        if r["Query"] == tName and r["Source"] == column:
+                            dfC.at[i, "Cardinality"] = x
+            # Remove column added temporarily
+            dfC.drop(columns=["Query", "Source Type"], inplace=True)
             # Direct Lake missing rows
             dfR = pd.merge(
@@ -211,11 +200,11 @@ def vertipaq_analyzer(
                 toTable = r["To Lake Table"]
                 toColumn = r["To Lake Column"]
-                if lakehouse_workspace == current_workspace:
+                if lakehouse_workspace_id == current_workspace_id:
                     query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {fromTable} as f\nleft join {toTable} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null"
                 else:
-                    tempTableFrom = "delta_table_" + fromTable
-                    tempTableTo = "delta_table_" + toTable
+                    tempTableFrom = f"delta_table_{fromTable}"
+                    tempTableTo = f"delta_table_{toTable}"
                     query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {tempTableFrom} as f\nleft join {tempTableTo} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null"
@@ -226,7 +215,7 @@ def vertipaq_analyzer(
                 dfR.at[i, "Missing Rows"] = missingRows
             dfR["Missing Rows"] = dfR["Missing Rows"].astype(int)
-        else:
+        elif not is_direct_lake:
             # Calculate missing rows using DAX for non-direct lake
             for i, r in dfR.iterrows():
                 fromTable = r["From Table"]
@@ -395,6 +384,8 @@ def vertipaq_analyzer(
         by="Used Size", ascending=False
     )
     dfH_filt.reset_index(drop=True, inplace=True)
+    dfH_filt.fillna({"Used Size": 0}, inplace=True)
+    dfH_filt["Used Size"] = dfH_filt["Used Size"].astype(int)
     export_Hier = dfH_filt.copy()
     intList = ["Used Size"]
     dfH_filt[intList] = dfH_filt[intList].applymap("{:,}".format)
@@ -420,6 +411,7 @@ def vertipaq_analyzer(
         index=[0],
     )
     dfModel.reset_index(drop=True, inplace=True)
+    dfModel["Default Mode"] = dfModel["Default Mode"].astype(str)
     export_Model = dfModel.copy()
     intList = ["Total Size", "Table Count", "Column Count"]
     dfModel[intList] = dfModel[intList].applymap("{:,}".format)
@@ -442,8 +434,7 @@ def vertipaq_analyzer(
     # Export vertipaq to delta tables in lakehouse
     if export in ["table", "zip"]:
-        lakeAttach = lakehouse_attached()
-        if lakeAttach is False:
+        if not lakehouse_attached():
             raise ValueError(
                 f"{icons.red_dot} In order to save the Vertipaq Analyzer results, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
             )
@@ -452,12 +443,13 @@ def vertipaq_analyzer(
         spark = SparkSession.builder.getOrCreate()
         lakehouse_id = fabric.get_lakehouse_id()
+        lake_workspace = fabric.resolve_workspace_name()
         lakehouse = resolve_lakehouse_name(
-            lakehouse_id=lakehouse_id, workspace=workspace
+            lakehouse_id=lakehouse_id, workspace=lake_workspace
         )
         lakeTName = "vertipaq_analyzer_model"
-        lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
+        lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=lake_workspace)
         lakeT_filt = lakeT[lakeT["Table Name"] == lakeTName]
         query = f"SELECT MAX(RunId) FROM {lakehouse}.{lakeTName}"
@@ -482,24 +474,45 @@ def vertipaq_analyzer(
             f"{icons.in_progress} Saving Vertipaq Analyzer to delta tables in the lakehouse...\n"
         )
         now = datetime.datetime.now()
+        dfD = fabric.list_datasets(workspace=workspace, mode="rest")
+        dfD_filt = dfD[dfD["Dataset Name"] == dataset]
+        configured_by = dfD_filt["Configured By"].iloc[0]
+        capacity_id, capacity_name = resolve_workspace_capacity(workspace=workspace)
         for key, (obj, df) in dfMap.items():
-            df["Timestamp"] = now
+            df["Capacity Name"] = capacity_name
+            df["Capacity Id"] = capacity_id
+            df["Configured By"] = configured_by
             df["Workspace Name"] = workspace
+            df["Workspace Id"] = fabric.resolve_workspace_id(workspace)
             df["Dataset Name"] = dataset
+            df["Dataset Id"] = resolve_dataset_id(dataset, workspace)
             df["RunId"] = runId
+            df["Timestamp"] = now
-            colName = "Workspace Name"
+            colName = "Capacity Name"
             df.insert(0, colName, df.pop(colName))
-            colName = "Dataset Name"
+            colName = "Capacity Id"
             df.insert(1, colName, df.pop(colName))
+            colName = "Workspace Name"
+            df.insert(2, colName, df.pop(colName))
+            colName = "Workspace Id"
+            df.insert(3, colName, df.pop(colName))
+            colName = "Dataset Name"
+            df.insert(4, colName, df.pop(colName))
+            colName = "Dataset Id"
+            df.insert(5, colName, df.pop(colName))
+            colName = "Configured By"
+            df.insert(6, colName, df.pop(colName))
             df.columns = df.columns.str.replace(" ", "_")
             delta_table_name = f"VertipaqAnalyzer_{obj}".lower()
-            spark_df = spark.createDataFrame(df)
-            spark_df.write.mode("append").format("delta").saveAsTable(delta_table_name)
-            print(
-                f"{icons.bullet} Vertipaq Analyzer results for '{obj}' have been appended to the '{delta_table_name}' delta table."
+            save_as_delta_table(
+                dataframe=df,
+                delta_table_name=delta_table_name,
+                write_mode="append",
+                merge_schema=True,
             )
     # Export vertipaq to zip file within the lakehouse
@@ -525,13 +538,13 @@ def vertipaq_analyzer(
         # Create CSV files based on dataframes
         for fileName, df in dataFrames.items():
-            filePath = os.path.join(subFolderPath, fileName + ext)
+            filePath = os.path.join(subFolderPath, f"{fileName}{ext}")
             df.to_csv(filePath, index=False)
         # Create a zip file and add CSV files to it
         with zipfile.ZipFile(zipFilePath, "w") as zipf:
             for fileName in dataFrames:
-                filePath = os.path.join(subFolderPath, fileName + ext)
+                filePath = os.path.join(subFolderPath, f"{fileName}{ext}")
                 zipf.write(filePath, os.path.basename(filePath))
         # Clean up: remove the individual CSV files

sempy_labs/directlake/__init__.py CHANGED Viewed

@@ -1,7 +1,9 @@
 from sempy_labs.directlake._directlake_schema_compare import direct_lake_schema_compare
 from sempy_labs.directlake._directlake_schema_sync import direct_lake_schema_sync
-from sempy_labs.directlake._fallback import (
+from sempy_labs.directlake._dl_helper import (
     check_fallback_reason,
+    generate_direct_lake_semantic_model,
+    get_direct_lake_source,
 )
 from sempy_labs.directlake._get_directlake_lakehouse import get_direct_lake_lakehouse
 from sempy_labs.directlake._get_shared_expression import get_shared_expression
@@ -44,4 +46,6 @@ __all__ = [
     "warm_direct_lake_cache_isresident",
     "warm_direct_lake_cache_perspective",
     "add_table_to_direct_lake_semantic_model",
+    "generate_direct_lake_semantic_model",
+    "get_direct_lake_source",
 ]

sempy_labs/directlake/_directlake_schema_compare.py CHANGED Viewed

@@ -2,11 +2,10 @@ import sempy.fabric as fabric
 import pandas as pd
 from sempy_labs._helper_functions import (
     format_dax_object_name,
-    resolve_lakehouse_name,
-    get_direct_lake_sql_endpoint,
 )
 from IPython.display import display
-from sempy_labs.lakehouse._get_lakehouse_columns import get_lakehouse_columns
+from sempy_labs.lakehouse import get_lakehouse_columns
+from sempy_labs.directlake._dl_helper import get_direct_lake_source
 from sempy_labs._list_functions import list_tables
 from typing import Optional
 import sempy_labs._icons as icons
@@ -17,8 +16,7 @@ from sempy._utils._log import log
 def direct_lake_schema_compare(
     dataset: str,
     workspace: Optional[str] = None,
-    lakehouse: Optional[str] = None,
-    lakehouse_workspace: Optional[str] = None,
+    **kwargs,
 ):
     """
     Checks that all the tables in a Direct Lake semantic model map to tables in their corresponding lakehouse and that the columns in each table exist.
@@ -31,35 +29,33 @@ def direct_lake_schema_compare(
         The Fabric workspace name.
         Defaults to None which resolves to the workspace of the attached lakehouse
         or if no lakehouse attached, resolves to the workspace of the notebook.
-    lakehouse : str, default=None
-        The Fabric lakehouse used by the Direct Lake semantic model.
-        Defaults to None which resolves to the lakehouse attached to the notebook.
-    lakehouse_workspace : str, default=None
-        The Fabric workspace used by the lakehouse.
-        Defaults to None which resolves to the workspace of the attached lakehouse
-        or if no lakehouse attached, resolves to the workspace of the notebook.
     """
-    workspace = fabric.resolve_workspace_name(workspace)
-    if lakehouse_workspace is None:
-        lakehouse_workspace = workspace
+    if "lakehouse" in kwargs:
+        print(
+            "The 'lakehouse' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
+        )
+        del kwargs["lakehouse"]
+    if "lakehouse_workspace" in kwargs:
+        print(
+            "The 'lakehouse_workspace' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
+        )
+        del kwargs["lakehouse_workspace"]
-    if lakehouse is None:
-        lakehouse_id = fabric.get_lakehouse_id()
-        lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace)
+    workspace = fabric.resolve_workspace_name(workspace)
-    dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
-    sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace)
-    dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint")
-    dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)]
+    artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
+        get_direct_lake_source(dataset=dataset, workspace=workspace)
+    )
+    lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
-    if len(dfI_filt) == 0:
+    if artifact_type == "Warehouse":
         raise ValueError(
-            f"{icons.red_dot} The SQL Endpoint in the '{dataset}' semantic model in the '{workspace} workspace does not point to the "
-            f"'{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace as specified."
+            f"{icons.red_dot} This function is only valid for Direct Lake semantic models which source from Fabric lakehouses (not warehouses)."
         )
+    dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
     if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()):
         raise ValueError(
             f"{icons.red_dot} The '{dataset}' semantic model is not in Direct Lake mode."
@@ -67,7 +63,7 @@ def direct_lake_schema_compare(
     dfT = list_tables(dataset, workspace)
     dfC = fabric.list_columns(dataset=dataset, workspace=workspace)
-    lc = get_lakehouse_columns(lakehouse, lakehouse_workspace)
+    lc = get_lakehouse_columns(lakehouse_name, lakehouse_workspace)
     dfT.rename(columns={"Type": "Table Type"}, inplace=True)
     dfP_filt = dfP[dfP["Mode"] == "DirectLake"]
@@ -93,21 +89,21 @@ def direct_lake_schema_compare(
     if len(missingtbls) == 0:
         print(
-            f"{icons.green_dot} All tables exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
+            f"{icons.green_dot} All tables exist in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
         )
     else:
         print(
             f"{icons.yellow_dot} The following tables exist in the '{dataset}' semantic model within the '{workspace}' workspace"
-            f" but do not exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
+            f" but do not exist in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
         )
         display(missingtbls)
     if len(missingcols) == 0:
         print(
-            f"{icons.green_dot} All columns exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
+            f"{icons.green_dot} All columns exist in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
         )
     else:
         print(
             f"{icons.yellow_dot} The following columns exist in the '{dataset}' semantic model within the '{workspace}' workspace "
-            f"but do not exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
+            f"but do not exist in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
         )
         display(missingcols)

sempy_labs/directlake/_directlake_schema_sync.py CHANGED Viewed

@@ -1,13 +1,8 @@
 import sempy
 import sempy.fabric as fabric
-import pandas as pd
-from sempy_labs.lakehouse._get_lakehouse_columns import get_lakehouse_columns
+from sempy_labs.lakehouse import get_lakehouse_columns
+from sempy_labs.directlake._dl_helper import get_direct_lake_source
 from sempy_labs.tom import connect_semantic_model
-from sempy_labs._helper_functions import (
-    format_dax_object_name,
-    resolve_lakehouse_name,
-    get_direct_lake_sql_endpoint,
-)
 from typing import Optional
 from sempy._utils._log import log
 import sempy_labs._icons as icons
@@ -18,8 +13,7 @@ def direct_lake_schema_sync(
     dataset: str,
     workspace: Optional[str] = None,
     add_to_model: Optional[bool] = False,
-    lakehouse: Optional[str] = None,
-    lakehouse_workspace: Optional[str] = None,
+    **kwargs,
 ):
     """
     Shows/adds columns which exist in the lakehouse but do not exist in the semantic model (only for tables in the semantic model).
@@ -34,84 +28,79 @@ def direct_lake_schema_sync(
         or if no lakehouse attached, resolves to the workspace of the notebook.
     add_to_model : bool, default=False
         If set to True, columns which exist in the lakehouse but do not exist in the semantic model are added to the semantic model. No new tables are added.
-    lakehouse : str, default=None
-        The Fabric lakehouse used by the Direct Lake semantic model.
-        Defaults to None which resolves to the lakehouse attached to the notebook.
-    lakehouse_workspace : str, default=None
-        The Fabric workspace used by the lakehouse.
-        Defaults to None which resolves to the workspace of the attached lakehouse
-        or if no lakehouse attached, resolves to the workspace of the notebook.
     """
     sempy.fabric._client._utils._init_analysis_services()
     import Microsoft.AnalysisServices.Tabular as TOM
     import System
-    workspace = fabric.resolve_workspace_name(workspace)
-    if lakehouse_workspace is None:
-        lakehouse_workspace = workspace
-    if lakehouse is None:
-        lakehouse_id = fabric.get_lakehouse_id()
-        lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace)
+    if "lakehouse" in kwargs:
+        print(
+            "The 'lakehouse' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
+        )
+        del kwargs["lakehouse"]
+    if "lakehouse_workspace" in kwargs:
+        print(
+            "The 'lakehouse_workspace' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
+        )
+        del kwargs["lakehouse_workspace"]
-    sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace)
+    workspace = fabric.resolve_workspace_name(workspace)
-    dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint")
-    dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)]
+    artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
+        get_direct_lake_source(dataset=dataset, workspace=workspace)
+    )
-    if len(dfI_filt) == 0:
+    if artifact_type == "Warehouse":
         raise ValueError(
-            f"{icons.red_dot} The SQL Endpoint in the '{dataset}' semantic model in the '{workspace} workspace does not point to the "
-            f"'{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace as specified."
+            f"{icons.red_dot} This function is only valid for Direct Lake semantic models which source from Fabric lakehouses (not warehouses)."
         )
+    lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
-    dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
-    dfP_filt = dfP[dfP["Source Type"] == "Entity"]
-    dfC = fabric.list_columns(dataset=dataset, workspace=workspace)
-    dfC_filt = dfC[dfC["Table Name"].isin(dfP_filt["Table Name"].values)]
-    dfC_filt = pd.merge(
-        dfC_filt, dfP_filt[["Table Name", "Query"]], on="Table Name", how="left"
-    )
-    dfC_filt["Column Object"] = format_dax_object_name(
-        dfC_filt["Query"], dfC_filt["Source"]
-    )
+    if artifact_type == "Warehouse":
+        raise ValueError(
+            f"{icons.red_dot} This function is only valid for Direct Lake semantic models which source from Fabric lakehouses (not warehouses)."
+        )
-    lc = get_lakehouse_columns(lakehouse, lakehouse_workspace)
-    lc_filt = lc[lc["Table Name"].isin(dfP_filt["Query"].values)]
+    lc = get_lakehouse_columns(lakehouse_name, lakehouse_workspace)
     with connect_semantic_model(
         dataset=dataset, readonly=False, workspace=workspace
     ) as tom:
-        for i, r in lc_filt.iterrows():
+        for i, r in lc.iterrows():
             lakeTName = r["Table Name"]
             lakeCName = r["Column Name"]
-            fullColName = r["Full Column Name"]
             dType = r["Data Type"]
-            if fullColName not in dfC_filt["Column Object"].values:
-                dfL = dfP_filt[dfP_filt["Query"] == lakeTName]
-                tName = dfL["Table Name"].iloc[0]
-                if add_to_model:
-                    col = TOM.DataColumn()
-                    col.Name = lakeCName
-                    col.SourceColumn = lakeCName
-                    dt = icons.data_type_mapping.get(dType)
-                    try:
-                        col.DataType = System.Enum.Parse(TOM.DataType, dt)
-                    except Exception as e:
-                        raise ValueError(
-                            f"{icons.red_dot} Failed to map '{dType}' data type to the semantic model data types."
-                        ) from e
-                    tom.model.Tables[tName].Columns.Add(col)
-                    print(
-                        f"{icons.green_dot} The '{lakeCName}' column has been added to the '{tName}' table as a '{dt}' "
-                        f"data type within the '{dataset}' semantic model within the '{workspace}' workspace."
-                    )
-                else:
+            if any(
+                p.Source.EntityName == lakeTName
+                for p in tom.all_partitions()
+                if p.SourceType == TOM.PartitionSourceType.Entity
+            ):
+                table_name = next(
+                    t.Name
+                    for t in tom.model.Tables
+                    for p in t.Partitions
+                    if p.SourceType == TOM.PartitionSourceType.Entity
+                    and p.Source.EntityName == lakeTName
+                )
+                if not any(
+                    c.SourceColumn == lakeCName and c.Parent.Name == table_name
+                    for c in tom.all_columns()
+                ):
                     print(
-                        f"{icons.yellow_dot} The {fullColName} column exists in the lakehouse but not in the '{tName}' table in the '{dataset}' semantic model within the '{workspace}' workspace."
+                        f"{icons.yellow_dot} The '{lakeCName}' column exists in the '{lakeTName}' lakehouse table but not in the '{dataset}' semantic model within the '{workspace}' workspace."
                     )
+                    if add_to_model:
+                        dt = icons.data_type_mapping.get(dType)
+                        tom.add_data_column(
+                            table_name=table_name,
+                            column_name=lakeCName,
+                            source_column=lakeCName,
+                            data_type=System.Enum.Parse(TOM.DataType, dt),
+                        )
+                        print(
+                            f"{icons.green_dot} The '{lakeCName}' column in the '{lakeTName}' lakehouse table was added to the '{dataset}' semantic model within the '{workspace}' workspace."
+                        )

semantic-link-labs 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

Potentially problematic release.

semantic-link-labs 0.6.0py3-none-any.whl → 0.7.0py3-none-any.whl