PyPI - semantic-link-labs - Versions diffs - 0.8.10__py3-none-any.whl → 0.9.0__py3-none-any.whl - Mend

semantic-link-labs 0.8.10py3-none-any.whl → 0.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (81) hide show

{semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.9.0.dist-info}/METADATA +6 -5
{semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.9.0.dist-info}/RECORD +81 -80
{semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.9.0.dist-info}/WHEEL +1 -1
sempy_labs/__init__.py +34 -3
sempy_labs/_authentication.py +80 -4
sempy_labs/_capacities.py +770 -200
sempy_labs/_capacity_migration.py +7 -37
sempy_labs/_clear_cache.py +37 -35
sempy_labs/_connections.py +13 -13
sempy_labs/_data_pipelines.py +20 -20
sempy_labs/_dataflows.py +27 -28
sempy_labs/_dax.py +41 -47
sempy_labs/_deployment_pipelines.py +1 -1
sempy_labs/_environments.py +26 -23
sempy_labs/_eventhouses.py +16 -15
sempy_labs/_eventstreams.py +16 -15
sempy_labs/_external_data_shares.py +18 -20
sempy_labs/_gateways.py +16 -14
sempy_labs/_generate_semantic_model.py +107 -62
sempy_labs/_git.py +105 -43
sempy_labs/_helper_functions.py +251 -194
sempy_labs/_job_scheduler.py +227 -0
sempy_labs/_kql_databases.py +16 -15
sempy_labs/_kql_querysets.py +16 -15
sempy_labs/_list_functions.py +150 -126
sempy_labs/_managed_private_endpoints.py +19 -17
sempy_labs/_mirrored_databases.py +51 -48
sempy_labs/_mirrored_warehouses.py +5 -4
sempy_labs/_ml_experiments.py +16 -15
sempy_labs/_ml_models.py +15 -14
sempy_labs/_model_bpa.py +210 -207
sempy_labs/_model_bpa_bulk.py +2 -2
sempy_labs/_model_bpa_rules.py +3 -3
sempy_labs/_model_dependencies.py +55 -29
sempy_labs/_notebooks.py +29 -25
sempy_labs/_one_lake_integration.py +23 -26
sempy_labs/_query_scale_out.py +75 -64
sempy_labs/_refresh_semantic_model.py +25 -26
sempy_labs/_spark.py +33 -32
sempy_labs/_sql.py +19 -12
sempy_labs/_translations.py +10 -7
sempy_labs/_vertipaq.py +38 -33
sempy_labs/_warehouses.py +26 -25
sempy_labs/_workspace_identity.py +11 -10
sempy_labs/_workspaces.py +40 -33
sempy_labs/admin/_basic_functions.py +166 -115
sempy_labs/admin/_domains.py +7 -2
sempy_labs/admin/_external_data_share.py +3 -3
sempy_labs/admin/_git.py +4 -1
sempy_labs/admin/_items.py +11 -6
sempy_labs/admin/_scanner.py +10 -5
sempy_labs/directlake/_directlake_schema_compare.py +25 -16
sempy_labs/directlake/_directlake_schema_sync.py +24 -12
sempy_labs/directlake/_dl_helper.py +74 -55
sempy_labs/directlake/_generate_shared_expression.py +10 -9
sempy_labs/directlake/_get_directlake_lakehouse.py +32 -36
sempy_labs/directlake/_get_shared_expression.py +4 -3
sempy_labs/directlake/_guardrails.py +12 -6
sempy_labs/directlake/_list_directlake_model_calc_tables.py +15 -9
sempy_labs/directlake/_show_unsupported_directlake_objects.py +16 -10
sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +35 -31
sempy_labs/directlake/_update_directlake_partition_entity.py +39 -31
sempy_labs/directlake/_warm_cache.py +87 -65
sempy_labs/lakehouse/_get_lakehouse_columns.py +23 -26
sempy_labs/lakehouse/_get_lakehouse_tables.py +27 -38
sempy_labs/lakehouse/_lakehouse.py +7 -20
sempy_labs/lakehouse/_shortcuts.py +42 -23
sempy_labs/migration/_create_pqt_file.py +16 -11
sempy_labs/migration/_refresh_calc_tables.py +16 -10
sempy_labs/report/_download_report.py +9 -8
sempy_labs/report/_generate_report.py +85 -44
sempy_labs/report/_paginated.py +9 -9
sempy_labs/report/_report_bpa.py +15 -11
sempy_labs/report/_report_functions.py +80 -91
sempy_labs/report/_report_helper.py +8 -4
sempy_labs/report/_report_list_functions.py +24 -13
sempy_labs/report/_report_rebind.py +17 -16
sempy_labs/report/_reportwrapper.py +41 -33
sempy_labs/tom/_model.py +139 -21
{semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.9.0.dist-info}/LICENSE +0 -0
{semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.9.0.dist-info}/top_level.txt +0 -0

sempy_labs/directlake/_warm_cache.py CHANGED Viewed

@@ -3,34 +3,39 @@ import pandas as pd
 from tqdm.auto import tqdm
 import numpy as np
 import time
-from sempy_labs._helper_functions import format_dax_object_name
+from sempy_labs._helper_functions import (
+    format_dax_object_name,
+    resolve_dataset_name_and_id,
+    resolve_workspace_name_and_id,
+)
 from sempy_labs._refresh_semantic_model import refresh_semantic_model
 from sempy_labs._model_dependencies import get_measure_dependencies
 from typing import Optional
 from sempy._utils._log import log
 import sempy_labs._icons as icons
+from uuid import UUID
 @log
 def warm_direct_lake_cache_perspective(
-    dataset: str,
+    dataset: str | UUID,
     perspective: str,
     add_dependencies: bool = False,
-    workspace: Optional[str] = None,
+    workspace: Optional[str | UUID] = None,
 ) -> pd.DataFrame:
     """
     Warms the cache of a Direct Lake semantic model by running a simple DAX query against the columns in a perspective.
     Parameters
     ----------
-    dataset : str
-        Name of the semantic model.
+    dataset : str | uuid.UUID
+        Name or ID of the semantic model.
     perspective : str
         Name of the perspective which contains objects to be used for warming the cache.
     add_dependencies : bool, default=False
         Includes object dependencies in the cache warming process.
-    workspace : str, default=None
-        The Fabric workspace name.
+    workspace : str | uuid.UUID, default=None
+        The Fabric workspace name or ID.
         Defaults to None which resolves to the workspace of the attached lakehouse
         or if no lakehouse attached, resolves to the workspace of the notebook.
@@ -40,15 +45,16 @@ def warm_direct_lake_cache_perspective(
         Returns a pandas dataframe showing the columns that have been put into memory.
     """
-    workspace = fabric.resolve_workspace_name(workspace)
+    (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
+    (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
-    dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
-    if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()):
+    dfP = fabric.list_partitions(dataset=dataset_id, workspace=workspace_id)
+    if not any(r["Mode"] == "DirectLake" for _, r in dfP.iterrows()):
         raise ValueError(
-            f"{icons.red_dot} The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode."
+            f"{icons.red_dot} The '{dataset_name}' semantic model in the '{workspace_name}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode."
         )
-    dfPersp = fabric.list_perspectives(dataset=dataset, workspace=workspace)
+    dfPersp = fabric.list_perspectives(dataset=dataset_id, workspace=workspace_id)
     dfPersp["DAX Object Name"] = format_dax_object_name(
         dfPersp["Table Name"], dfPersp["Object Name"]
     )
@@ -65,7 +71,7 @@ def warm_direct_lake_cache_perspective(
     if add_dependencies:
         # Measure dependencies
-        md = get_measure_dependencies(dataset, workspace)
+        md = get_measure_dependencies(dataset_id, workspace_id)
         md["Referenced Full Object"] = format_dax_object_name(
             md["Referenced Table"], md["Referenced Object"]
         )
@@ -78,7 +84,7 @@ def warm_direct_lake_cache_perspective(
         # Hierarchy dependencies
         dfPersp_h = dfPersp_filt[(dfPersp_filt["Object Type"] == "Hierarchy")]
-        dfH = fabric.list_hierarchies(dataset=dataset, workspace=workspace)
+        dfH = fabric.list_hierarchies(dataset=dataset_id, workspace=workspace_id)
         dfH["Hierarchy Object"] = format_dax_object_name(
             dfH["Table Name"], dfH["Hierarchy Name"]
         )
@@ -92,7 +98,7 @@ def warm_direct_lake_cache_perspective(
         # Relationship dependencies
         unique_table_names = dfPersp_filt["Table Name"].unique()
-        dfR = fabric.list_relationships(dataset=dataset, workspace=workspace)
+        dfR = fabric.list_relationships(dataset=dataset_id, workspace=workspace_id)
         dfR["From Object"] = format_dax_object_name(
             dfR["From Table"], dfR["From Column"]
         )
@@ -120,41 +126,22 @@ def warm_direct_lake_cache_perspective(
     df["Table Name"] = df["Table Name"].str[1:-1]
     df["Column Name"] = df["Column Name"].str[0:-1]
-    tbls = list(set(value.split("[")[0] for value in merged_list_unique))
-    for tableName in (bar := tqdm(tbls)):
-        filtered_list = [
-            value for value in merged_list_unique if value.startswith(f"{tableName}[")
-        ]
-        bar.set_description(f"Warming the '{tableName}' table...")
-        css = ",".join(map(str, filtered_list))
-        dax = """EVALUATE TOPN(1,SUMMARIZECOLUMNS(""" + css + "))" ""
-        fabric.evaluate_dax(dataset=dataset, dax_string=dax, workspace=workspace)
-    print(f"{icons.green_dot} The following columns have been put into memory:")
-    new_column_order = ["Table Name", "Column Name", "DAX Object Name"]
-    df = df.reindex(columns=new_column_order)
-    df = df[["Table Name", "Column Name"]].sort_values(
-        by=["Table Name", "Column Name"], ascending=True
-    )
-    return df
+    return _put_columns_into_memory(dataset=dataset, workspace=workspace, col_df=df)
 @log
 def warm_direct_lake_cache_isresident(
-    dataset: str, workspace: Optional[str] = None
+    dataset: str | UUID, workspace: Optional[str | UUID] = None
 ) -> pd.DataFrame:
     """
     Performs a refresh on the semantic model and puts the columns which were in memory prior to the refresh back into memory.
     Parameters
     ----------
-    dataset : str
-        Name of the semantic model.
-    workspace : str, default=None
-        The Fabric workspace name.
+    dataset : str | uuid.UUID
+        Name or ID of the semantic model.
+    workspace : str | uuid.UUID, default=None
+        The Fabric workspace name or ID.
         Defaults to None which resolves to the workspace of the attached lakehouse
         or if no lakehouse attached, resolves to the workspace of the notebook.
@@ -164,46 +151,81 @@ def warm_direct_lake_cache_isresident(
         Returns a pandas dataframe showing the columns that have been put into memory.
     """
-    workspace = fabric.resolve_workspace_name(workspace)
+    (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
+    (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
-    dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
-    if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()):
+    dfP = fabric.list_partitions(dataset=dataset_id, workspace=workspace_id)
+    if not any(r["Mode"] == "DirectLake" for _, r in dfP.iterrows()):
         raise ValueError(
-            f"{icons.red_dot} The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode."
+            f"{icons.red_dot} The '{dataset_name}' semantic model in the '{workspace_name}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode."
         )
     # Identify columns which are currently in memory (Is Resident = True)
-    dfC = fabric.list_columns(dataset=dataset, workspace=workspace, extended=True)
-    dfC["DAX Object Name"] = format_dax_object_name(
-        dfC["Table Name"], dfC["Column Name"]
-    )
+    dfC = fabric.list_columns(dataset=dataset_id, workspace=workspace_id, extended=True)
     dfC_filtered = dfC[dfC["Is Resident"] == True]
     if len(dfC_filtered) == 0:
         raise ValueError(
-            f"{icons.yellow_dot} At present, no columns are in memory in the '{dataset}' semantic model in the '{workspace}' workspace."
+            f"{icons.yellow_dot} At present, no columns are in memory in the '{dataset_name}' semantic model in the '{workspace_name}' workspace."
         )
     # Refresh/frame dataset
-    refresh_semantic_model(dataset=dataset, refresh_type="full", workspace=workspace)
+    refresh_semantic_model(
+        dataset=dataset_id, refresh_type="full", workspace=workspace_id
+    )
     time.sleep(2)
-    # Run basic query to get columns into memory; completed one table at a time (so as not to overload the capacity)
-    tbls = dfC_filtered["Table Name"].unique()
+    return _put_columns_into_memory(
+        dataset=dataset, workspace=workspace, col_df=dfC_filtered
+    )
+def _put_columns_into_memory(dataset, workspace, col_df, return_dataframe: bool = True):
+    row_limit = 1000000
+    dfT = fabric.list_tables(dataset=dataset, workspace=workspace, extended=True)
+    col_df = col_df.copy()
+    col_df["DAX Object"] = format_dax_object_name(
+        col_df["Table Name"], col_df["Column Name"]
+    )
+    tbls = col_df["Table Name"].unique()
     for table_name in (bar := tqdm(tbls)):
-        bar.set_description(f"Warming the '{table_name}' table...")
-        css = ", ".join(
-            dfC_filtered[dfC_filtered["Table Name"] == table_name]["DAX Object Name"]
-            .astype(str)
-            .tolist()
+        dfT_filt = dfT[dfT["Name"] == table_name]
+        col_df_filt = col_df[col_df["Table Name"] == table_name]
+        if not dfT_filt.empty:
+            row_count = dfT_filt["Row Count"].iloc[0]
+            bar.set_description(f"Warming the '{table_name}' table...")
+            if row_count < row_limit:
+                columns = col_df_filt["DAX Object"].tolist()
+                css = ", ".join(columns)
+                dax = f"EVALUATE TOPN(1, SELECTCOLUMNS('{table_name}', {css}))"
+                fabric.evaluate_dax(
+                    dataset=dataset, dax_string=dax, workspace=workspace
+                )
+            else:
+                for _, r in col_df_filt.iterrows():
+                    dax_object = r["DAX Object"]
+                    dax = f"""EVALUATE TOPN(1, SELECTCOLUMNS('{table_name}', {dax_object}))"""
+                    fabric.evaluate_dax(
+                        dataset=dataset, dax_string=dax, workspace=workspace
+                    )
+    if return_dataframe:
+        print(
+            f"{icons.green_dot} The following columns have been put into memory. Temperature indicates the current column temperature."
         )
-        dax = f"""EVALUATE TOPN(1,SUMMARIZECOLUMNS({css}))"""
-        fabric.evaluate_dax(dataset=dataset, dax_string=dax, workspace=workspace)
-    print(
-        f"{icons.green_dot} The following columns have been put into memory. Temperature indicates the column temperature prior to the semantic model refresh."
-    )
+        dfC = fabric.list_columns(dataset=dataset, workspace=workspace, extended=True)
+        dfC["DAX Object"] = format_dax_object_name(
+            dfC["Table Name"], dfC["Column Name"]
+        )
+        dfC_filt = dfC[dfC["DAX Object"].isin(col_df["DAX Object"].values)]
-    return dfC_filtered[
-        ["Table Name", "Column Name", "Is Resident", "Temperature"]
-    ].sort_values(by=["Table Name", "Column Name"], ascending=True)
+        return (
+            dfC_filt[["Table Name", "Column Name", "Is Resident", "Temperature"]]
+            .sort_values(by=["Table Name", "Column Name"], ascending=True)
+            .reset_index(drop=True)
+        )

sempy_labs/lakehouse/_get_lakehouse_columns.py CHANGED Viewed

@@ -1,29 +1,29 @@
-import sempy.fabric as fabric
 import pandas as pd
 from pyspark.sql import SparkSession
 from sempy_labs._helper_functions import (
-    resolve_lakehouse_name,
     format_dax_object_name,
-    resolve_lakehouse_id,
+    resolve_workspace_name_and_id,
+    resolve_lakehouse_name_and_id,
 )
 from typing import Optional
 from sempy._utils._log import log
+from uuid import UUID
 @log
 def get_lakehouse_columns(
-    lakehouse: Optional[str] = None, workspace: Optional[str] = None
+    lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
 ) -> pd.DataFrame:
     """
     Shows the tables and columns of a lakehouse and their respective properties.
     Parameters
     ----------
-    lakehouse : str, default=None
-        The Fabric lakehouse.
+    lakehouse : str | uuid.UUID, default=None
+        The Fabric lakehouse name or ID.
         Defaults to None which resolves to the lakehouse attached to the notebook.
-    lakehouse_workspace : str, default=None
-        The Fabric workspace used by the lakehouse.
+    lakehouse_workspace : str | uuid.UUID, default=None
+        The Fabric workspace name or ID used by the lakehouse.
         Defaults to None which resolves to the workspace of the attached lakehouse
         or if no lakehouse attached, resolves to the workspace of the notebook.
@@ -46,35 +46,32 @@ def get_lakehouse_columns(
         ]
     )
-    workspace = fabric.resolve_workspace_name(workspace)
-    if lakehouse is None:
-        lakehouse_id = fabric.get_lakehouse_id()
-        lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
-    else:
-        lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
+    (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
+    (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
+        lakehouse=lakehouse, workspace=workspace_id
+    )
     spark = SparkSession.builder.getOrCreate()
     tables = get_lakehouse_tables(
-        lakehouse=lakehouse, workspace=workspace, extended=False, count_rows=False
+        lakehouse=lakehouse_id, workspace=workspace_id, extended=False, count_rows=False
     )
     tables_filt = tables[tables["Format"] == "delta"]
-    for i, r in tables_filt.iterrows():
-        tName = r["Table Name"]
-        tPath = r["Location"]
-        delta_table = DeltaTable.forPath(spark, tPath)
+    for _, r in tables_filt.iterrows():
+        table_name = r["Table Name"]
+        path = r["Location"]
+        delta_table = DeltaTable.forPath(spark, path)
         sparkdf = delta_table.toDF()
-        for cName, data_type in sparkdf.dtypes:
-            tc = format_dax_object_name(tName, cName)
+        for col_name, data_type in sparkdf.dtypes:
+            full_column_name = format_dax_object_name(table_name, col_name)
             new_data = {
-                "Workspace Name": workspace,
+                "Workspace Name": workspace_name,
                 "Lakehouse Name": lakehouse,
-                "Table Name": tName,
-                "Column Name": cName,
-                "Full Column Name": tc,
+                "Table Name": table_name,
+                "Column Name": col_name,
+                "Full Column Name": full_column_name,
                 "Data Type": data_type,
             }
             df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)

sempy_labs/lakehouse/_get_lakehouse_tables.py CHANGED Viewed

@@ -4,10 +4,11 @@ from pyspark.sql import SparkSession
 import pyarrow.parquet as pq
 import datetime
 from sempy_labs._helper_functions import (
-    resolve_lakehouse_id,
-    resolve_lakehouse_name,
+    _get_column_aggregate,
     resolve_workspace_name_and_id,
+    resolve_lakehouse_name_and_id,
     pagination,
+    save_as_delta_table,
 )
 from sempy_labs.directlake._guardrails import (
     get_sku_size,
@@ -18,12 +19,13 @@ from typing import Optional
 import sempy_labs._icons as icons
 from sempy._utils._log import log
 from sempy.fabric.exceptions import FabricHTTPException
+from uuid import UUID
 @log
 def get_lakehouse_tables(
-    lakehouse: Optional[str] = None,
-    workspace: Optional[str] = None,
+    lakehouse: Optional[str | UUID] = None,
+    workspace: Optional[str | UUID] = None,
     extended: bool = False,
     count_rows: bool = False,
     export: bool = False,
@@ -35,11 +37,11 @@ def get_lakehouse_tables(
     Parameters
     ----------
-    lakehouse : str, default=None
-        The Fabric lakehouse.
+    lakehouse : str | uuid.UUID, default=None
+        The Fabric lakehouse name or ID.
         Defaults to None which resolves to the lakehouse attached to the notebook.
-    workspace : str, default=None
-        The Fabric workspace used by the lakehouse.
+    workspace : str | uuid.UUID, default=None
+        The Fabric workspace name or ID used by the lakehouse.
         Defaults to None which resolves to the workspace of the attached lakehouse
         or if no lakehouse attached, resolves to the workspace of the notebook.
     extended : bool, default=False
@@ -66,13 +68,10 @@ def get_lakehouse_tables(
         ]
     )
-    (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
-    if lakehouse is None:
-        lakehouse_id = fabric.get_lakehouse_id()
-        lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
-    else:
-        lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
+    (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
+    (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
+        lakehouse=lakehouse, workspace=workspace_id
+    )
     if count_rows:  # Setting countrows defaults to extended=True
         extended = True
@@ -104,8 +103,8 @@ def get_lakehouse_tables(
     for r in responses:
         for i in r.get("data", []):
             new_data = {
-                "Workspace Name": workspace,
-                "Lakehouse Name": lakehouse,
+                "Workspace Name": workspace_name,
+                "Lakehouse Name": lakehouse_name,
                 "Table Name": i.get("name"),
                 "Format": i.get("format"),
                 "Type": i.get("type"),
@@ -117,7 +116,7 @@ def get_lakehouse_tables(
         df = pd.concat(dfs, ignore_index=True)
     if extended:
-        sku_value = get_sku_size(workspace)
+        sku_value = get_sku_size(workspace_id)
         guardrail = get_directlake_guardrails_for_sku(sku_value)
         spark = SparkSession.builder.getOrCreate()
         df["Files"] = None
@@ -178,23 +177,17 @@ def get_lakehouse_tables(
                 f"{icons.red_dot} In order to save the report.json file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
             )
-        spark = SparkSession.builder.getOrCreate()
-        lakehouse_id = fabric.get_lakehouse_id()
-        lakehouse = resolve_lakehouse_name(
-            lakehouse_id=lakehouse_id, workspace=workspace
-        )
+        (current_lakehouse_name, current_lakehouse_id) = resolve_lakehouse_name_and_id()
         lakeTName = "lakehouse_table_details"
         lakeT_filt = df[df["Table Name"] == lakeTName]
-        query = f"SELECT MAX(RunId) FROM {lakehouse}.{lakeTName}"
         if len(lakeT_filt) == 0:
-            runId = 1
+            run_id = 1
         else:
-            dfSpark = spark.sql(query)
-            maxRunId = dfSpark.collect()[0][0]
-            runId = maxRunId + 1
+            max_run_id = _get_column_aggregate(
+                lakehouse=current_lakehouse_name, table_name=lakeTName
+            )
+            run_id = max_run_id + 1
         export_df = df.copy()
@@ -239,15 +232,11 @@ def get_lakehouse_tables(
         print(
             f"{icons.in_progress} Saving Lakehouse table properties to the '{lakeTName}' table in the lakehouse...\n"
         )
-        now = datetime.datetime.now()
-        export_df["Timestamp"] = now
-        export_df["RunId"] = runId
+        export_df["Timestamp"] = datetime.datetime.now()
+        export_df["RunId"] = run_id
-        export_df.columns = export_df.columns.str.replace(" ", "_")
-        spark_df = spark.createDataFrame(export_df)
-        spark_df.write.mode("append").format("delta").saveAsTable(lakeTName)
-        print(
-            f"{icons.bullet} Lakehouse table properties have been saved to the '{lakeTName}' delta table."
+        save_as_delta_table(
+            dataframe=export_df, delta_table_name=lakeTName, write_mode="append"
         )
     return df

sempy_labs/lakehouse/_lakehouse.py CHANGED Viewed

@@ -1,8 +1,7 @@
-import sempy.fabric as fabric
 from tqdm.auto import tqdm
-from sempy_labs._helper_functions import resolve_lakehouse_name
 from typing import List, Optional, Union
 from sempy._utils._log import log
+from uuid import UUID
 def lakehouse_attached() -> bool:
@@ -29,7 +28,7 @@ def lakehouse_attached() -> bool:
 def optimize_lakehouse_tables(
     tables: Optional[Union[str, List[str]]] = None,
     lakehouse: Optional[str] = None,
-    workspace: Optional[str] = None,
+    workspace: Optional[str | UUID] = None,
 ):
     """
     Runs the `OPTIMIZE <https://docs.delta.io/latest/optimizations-oss.html>`_ function over the specified lakehouse tables.
@@ -42,8 +41,8 @@ def optimize_lakehouse_tables(
     lakehouse : str, default=None
         The Fabric lakehouse.
         Defaults to None which resolves to the lakehouse attached to the notebook.
-    workspace : str, default=None
-        The Fabric workspace used by the lakehouse.
+    workspace : str | uuid.UUID, default=None
+        The Fabric workspace name or ID used by the lakehouse.
         Defaults to None which resolves to the workspace of the attached lakehouse
         or if no lakehouse attached, resolves to the workspace of the notebook.
     """
@@ -52,12 +51,6 @@ def optimize_lakehouse_tables(
     from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
     from delta import DeltaTable
-    workspace = fabric.resolve_workspace_name(workspace)
-    if lakehouse is None:
-        lakehouse_id = fabric.get_lakehouse_id()
-        lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
     lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
     lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"]
@@ -83,7 +76,7 @@ def optimize_lakehouse_tables(
 def vacuum_lakehouse_tables(
     tables: Optional[Union[str, List[str]]] = None,
     lakehouse: Optional[str] = None,
-    workspace: Optional[str] = None,
+    workspace: Optional[str | UUID] = None,
     retain_n_hours: Optional[int] = None,
 ):
     """
@@ -96,8 +89,8 @@ def vacuum_lakehouse_tables(
     lakehouse : str, default=None
         The Fabric lakehouse.
         Defaults to None which resolves to the lakehouse attached to the notebook.
-    workspace : str, default=None
-        The Fabric workspace used by the lakehouse.
+    workspace : str | uuid.UUID, default=None
+        The Fabric workspace name or ID used by the lakehouse.
         Defaults to None which resolves to the workspace of the attached lakehouse
         or if no lakehouse attached, resolves to the workspace of the notebook.
     retain_n_hours : int, default=None
@@ -111,12 +104,6 @@ def vacuum_lakehouse_tables(
     from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
     from delta import DeltaTable
-    workspace = fabric.resolve_workspace_name(workspace)
-    if lakehouse is None:
-        lakehouse_id = fabric.get_lakehouse_id()
-        lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
     lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
     lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"]

semantic-link-labs 0.8.10__py3-none-any.whl → 0.9.0__py3-none-any.whl

Potentially problematic release.

semantic-link-labs 0.8.10py3-none-any.whl → 0.9.0py3-none-any.whl