PyPI - semantic-link-labs - Versions diffs - 0.7.0__py3-none-any.whl → 0.7.1__py3-none-any.whl - Mend

semantic-link-labs 0.7.0py3-none-any.whl → 0.7.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (12) hide show

{semantic_link_labs-0.7.0.dist-info → semantic_link_labs-0.7.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: semantic-link-labs
-Version: 0.7.0
+Version: 0.7.1
 Summary: Semantic Link Labs for Microsoft Fabric
 Author: Microsoft Corporation
 License: MIT License
@@ -25,7 +25,7 @@ Requires-Dist: pytest >=8.2.1 ; extra == 'test'
 # Semantic Link Labs
 [![PyPI version](https://badge.fury.io/py/semantic-link-labs.svg)](https://badge.fury.io/py/semantic-link-labs)
-[![Read The Docs](https://readthedocs.org/projects/semantic-link-labs/badge/?version=0.7.0&style=flat)](https://readthedocs.org/projects/semantic-link-labs/)
+[![Read The Docs](https://readthedocs.org/projects/semantic-link-labs/badge/?version=0.7.1&style=flat)](https://readthedocs.org/projects/semantic-link-labs/)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 [![Downloads](https://static.pepy.tech/badge/semantic-link-labs)](https://pepy.tech/project/semantic-link-labs)

{semantic_link_labs-0.7.0.dist-info → semantic_link_labs-0.7.1.dist-info}/RECORD RENAMED Viewed

@@ -4,16 +4,16 @@ sempy_labs/_clear_cache.py,sha256=NckXmtDCgRqlNL5FvLTut2XWLI0Hft3O4sAaXS1tPfo,17
 sempy_labs/_connections.py,sha256=w1dFC4WeTNFmLGD2EL_Syk0Wb1Eij18we2FVn_VaCD8,7641
 sempy_labs/_dax.py,sha256=dt1GgHceyM7f6phRBPxRKnmQy_KYKpcgFQHuOjGbpLo,2029
 sempy_labs/_generate_semantic_model.py,sha256=igKsVX-5Nqpipjg0taLFro8OsD3ogwSwKsyVAmuRwG4,8647
-sempy_labs/_helper_functions.py,sha256=rTetza9TRLtKOjEd0ZHz-xDZc1nbsN58ldHkLK8CiMo,24769
+sempy_labs/_helper_functions.py,sha256=DFfThu8nIvRTGACP8gCJ4tObyzsCrd4Ox9Tk3CmIwyk,24909
 sempy_labs/_icons.py,sha256=UK7chr_tEkZd4Y7Es_KyTc4dFgtYS4f31ggWxyqC9uY,853
-sempy_labs/_list_functions.py,sha256=CwNI7tEvn8upIpCaLDII4QLQVrJhJECPJdo6vZsg0sw,90578
+sempy_labs/_list_functions.py,sha256=MKw5JLHnRVRiOwzsvUmyesyOWUpatcPB8WHg1QXS83w,92070
 sempy_labs/_model_auto_build.py,sha256=fX3bCLFCOMQHuheKIoB48fUABG7XAT7qqsMbUiWSrY0,5071
 sempy_labs/_model_bpa.py,sha256=U9rHoGzuAmV1dtJvgSVk3BiUwDp6WTFt1l0CbkmKcdE,20439
-sempy_labs/_model_bpa_bulk.py,sha256=nvQKQ5h7Zs7rPJbybkrx1_cz3xdA-dLcZcWizIX5_oo,14702
-sempy_labs/_model_bpa_rules.py,sha256=uC2nKnT3b6lRMaGB7VokSORXVZvRSTQs2DzFSx4nIYY,47294
+sempy_labs/_model_bpa_bulk.py,sha256=b0Y6XbzKQawExSW6yEpwgBlptFe7Y7rFtY3mombBun4,15023
+sempy_labs/_model_bpa_rules.py,sha256=jujUiwUbIJW16UR3tUgxmj21PdSUxjzfiEFEEf-w40Q,48095
 sempy_labs/_model_dependencies.py,sha256=nZdqq2iMhZejnS_LCd2rpK6r1B7jWpa3URkxobRPifY,12986
 sempy_labs/_one_lake_integration.py,sha256=eIuLxlw8eXfUH2avKhsyLmXZbTllSwGsz2j_HMAikpQ,6234
-sempy_labs/_query_scale_out.py,sha256=fliTIx_POeuzjV0bhYM4-2QD74c4r3soxs0_bSaoD28,14441
+sempy_labs/_query_scale_out.py,sha256=EKU0saCRfKy2yyTpdbBVC_MwiHXxycSa_4k2O1pOqJg,13967
 sempy_labs/_refresh_semantic_model.py,sha256=2qzP9KqmwA20RuL1o6Lt9bIjC-KtdX8ZgcTvJParg-w,7157
 sempy_labs/_translations.py,sha256=BcrVIrBNSKtbFz4Y9t1Dh1SZCu0K4NHu7n01Z6O76IY,19665
 sempy_labs/_vertipaq.py,sha256=zMKtcCQ2gpgoDLisTbTjFNe60Cg2PlAQ6HvkSlbpKPo,33660
@@ -55,10 +55,10 @@ sempy_labs/directlake/_show_unsupported_directlake_objects.py,sha256=QNj2wHzFGtj
 sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py,sha256=b_Y5_GSfWC25wH6R7L37-AHO9fvKkmxRGaP6dVDC7-w,3233
 sempy_labs/directlake/_update_directlake_partition_entity.py,sha256=Pbx7LCdKyqEfX1npLvhw0WzFnOEbluwB3_xW0ELvHL4,8580
 sempy_labs/directlake/_warm_cache.py,sha256=ZgPricISRszx-yDERXihBDGVhEFB9yX-nBtLX0ZJTXI,8258
-sempy_labs/lakehouse/__init__.py,sha256=i6VRx4dR1SIN-1GxioiNwhC4FxbozRCIz5TfXjb9rKc,587
+sempy_labs/lakehouse/__init__.py,sha256=6LVQltQ3cjyiuxvjXTuNdJ163zSqi4h_tEZY4zsxuSw,647
 sempy_labs/lakehouse/_get_lakehouse_columns.py,sha256=Bb_iCTlNwl0wdN4dW_E7tVnfbHhHwQT_l0SUqvcbYpo,2582
 sempy_labs/lakehouse/_get_lakehouse_tables.py,sha256=1IXa_u1c4CJSlmP1rxBCMcOrQw-vmRXjqd5U5xsx_5c,8800
-sempy_labs/lakehouse/_lakehouse.py,sha256=5A4SwVak8AlRVBUeHg9_Zfq1Id8yInRtnimvjo8oUxY,2782
+sempy_labs/lakehouse/_lakehouse.py,sha256=qtCVr1cM0TWY6z5YS57w0nj3DEfXT5xmyDtr3676kAk,5172
 sempy_labs/lakehouse/_shortcuts.py,sha256=MT_Cqog5cTMz9fN3M_ZjAaQSjXXiyCyPWGY8LbaXZsI,6977
 sempy_labs/migration/__init__.py,sha256=w4vvGk6wTWXVfofJDmio2yIFvSSJsxOpjv6mvNGmrOI,1043
 sempy_labs/migration/_create_pqt_file.py,sha256=oYoKD78K9Ox1fqtkh-BfU_G5nUIoK_-5ChvCKDsYsWU,9257
@@ -104,8 +104,8 @@ sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/page.
 sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/visuals/ce8532a7e25020271077/visual.json,sha256=mlY6t9OlSe-Y6_QmXJpS1vggU6Y3FjISUKECL8FVSg8,931
 sempy_labs/tom/__init__.py,sha256=Qbs8leW0fjzvWwOjyWK3Hjeehu7IvpB1beASGsi28bk,121
 sempy_labs/tom/_model.py,sha256=M-es2bES3Usj5uVmt5vwNmtm9vWzeqtVtKREpxjnjiI,151050
-semantic_link_labs-0.7.0.dist-info/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
-semantic_link_labs-0.7.0.dist-info/METADATA,sha256=DM8hGBclkGwWLsMT2CeZWdz6OM2NA6oL8n0zWbWtHxs,11241
-semantic_link_labs-0.7.0.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
-semantic_link_labs-0.7.0.dist-info/top_level.txt,sha256=kiQX1y42Dbein1l3Q8jMUYyRulDjdlc2tMepvtrvixQ,11
-semantic_link_labs-0.7.0.dist-info/RECORD,,
+semantic_link_labs-0.7.1.dist-info/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
+semantic_link_labs-0.7.1.dist-info/METADATA,sha256=fIK5PQeOgEH9rm-JPDN3noaIiYaMidQE5AKDjdPlDrE,11241
+semantic_link_labs-0.7.1.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
+semantic_link_labs-0.7.1.dist-info/top_level.txt,sha256=kiQX1y42Dbein1l3Q8jMUYyRulDjdlc2tMepvtrvixQ,11
+semantic_link_labs-0.7.1.dist-info/RECORD,,

{semantic_link_labs-0.7.0.dist-info → semantic_link_labs-0.7.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (73.0.1)
+Generator: setuptools (74.0.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

sempy_labs/_helper_functions.py CHANGED Viewed

@@ -11,6 +11,7 @@ from typing import Optional, Tuple, List
 from uuid import UUID
 import sempy_labs._icons as icons
 from sempy.fabric.exceptions import FabricHTTPException
+import urllib.parse
 def create_abfss_path(
@@ -681,7 +682,8 @@ def resolve_workspace_capacity(workspace: Optional[str] = None) -> Tuple[UUID, s
     """
     workspace = fabric.resolve_workspace_name(workspace)
-    dfW = fabric.list_workspaces(filter=f"name eq '{workspace}'")
+    filter_condition = urllib.parse.quote(workspace)
+    dfW = fabric.list_workspaces(filter=f"name eq '{filter_condition}'")
     capacity_id = dfW["Capacity Id"].iloc[0]
     dfC = fabric.list_capacities()
     dfC_filt = dfC[dfC["Id"] == capacity_id]
@@ -711,7 +713,8 @@ def get_capacity_id(workspace: Optional[str] = None) -> UUID:
     """
     workspace = fabric.resolve_workspace_name(workspace)
-    dfW = fabric.list_workspaces(filter=f"name eq '{workspace}'")
+    filter_condition = urllib.parse.quote(workspace)
+    dfW = fabric.list_workspaces(filter=f"name eq '{filter_condition}'")
     if len(dfW) == 0:
         raise ValueError(f"{icons.red_dot} The '{workspace}' does not exist'.")

sempy_labs/_list_functions.py CHANGED Viewed

@@ -8,12 +8,11 @@ from sempy_labs._helper_functions import (
     _decode_b64,
     pagination,
     lro,
+    resolve_item_type,
 )
 import pandas as pd
 import base64
 import requests
-import time
-import json
 from pyspark.sql import SparkSession
 from typing import Optional
 import sempy_labs._icons as icons
@@ -1529,7 +1528,7 @@ def list_shortcuts(
     lakehouse: Optional[str] = None, workspace: Optional[str] = None
 ) -> pd.DataFrame:
     """
-    Shows all shortcuts which exist in a Fabric lakehouse.
+    Shows all shortcuts which exist in a Fabric lakehouse and their properties.
     Parameters
     ----------
@@ -1551,71 +1550,84 @@ def list_shortcuts(
     if lakehouse is None:
         lakehouse_id = fabric.get_lakehouse_id()
-        lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
     else:
         lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
+    client = fabric.FabricRestClient()
     df = pd.DataFrame(
         columns=[
             "Shortcut Name",
             "Shortcut Path",
-            "Source",
-            "Source Lakehouse Name",
+            "Source Type",
+            "Source Workspace Id",
             "Source Workspace Name",
-            "Source Path",
-            "Source Connection ID",
-            "Source Location",
-            "Source SubPath",
+            "Source Item Id",
+            "Source Item Name",
+            "Source Item Type",
+            "OneLake Path",
+            "Connection Id",
+            "Location",
+            "Bucket",
+            "SubPath",
         ]
     )
-    client = fabric.FabricRestClient()
     response = client.get(
         f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts"
     )
     if response.status_code != 200:
         raise FabricHTTPException(response)
     responses = pagination(client, response)
     for r in responses:
-        for s in r.get("value", []):
-            shortcutName = s.get("name")
-            shortcutPath = s.get("path")
-            source = list(s["target"].keys())[0]
-            (
-                sourceLakehouseName,
-                sourceWorkspaceName,
-                sourcePath,
-                connectionId,
-                location,
-                subpath,
-            ) = (None, None, None, None, None, None)
-            if source == "oneLake":
-                sourceLakehouseId = s.get("target", {}).get(source, {}).get("itemId")
-                sourcePath = s.get("target", {}).get(source, {}).get("path")
-                sourceWorkspaceId = (
-                    s.get("target", {}).get(source, {}).get("workspaceId")
-                )
-                sourceWorkspaceName = fabric.resolve_workspace_name(sourceWorkspaceId)
-                sourceLakehouseName = resolve_lakehouse_name(
-                    sourceLakehouseId, sourceWorkspaceName
-                )
-            else:
-                connectionId = s.get("target", {}).get(source, {}).get("connectionId")
-                location = s.get("target", {}).get(source, {}).get("location")
-                subpath = s.get("target", {}).get(source, {}).get("subpath")
+        for i in r.get("value", []):
+            tgt = i.get("target", {})
+            s3_compat = tgt.get("s3Compatible", {})
+            gcs = tgt.get("googleCloudStorage", {})
+            eds = tgt.get("externalDataShare", {})
+            connection_id = (
+                s3_compat.get("connectionId")
+                or gcs.get("connectionId")
+                or eds.get("connectionId")
+                or None
+            )
+            location = s3_compat.get("location") or gcs.get("location") or None
+            sub_path = s3_compat.get("subpath") or gcs.get("subpath") or None
+            source_workspace_id = tgt.get("oneLake", {}).get("workspaceId")
+            source_item_id = tgt.get("oneLake", {}).get("itemId")
+            source_workspace_name = (
+                fabric.resolve_workspace_name(source_workspace_id)
+                if source_workspace_id is not None
+                else None
+            )
             new_data = {
-                "Shortcut Name": shortcutName,
-                "Shortcut Path": shortcutPath,
-                "Source": source,
-                "Source Lakehouse Name": sourceLakehouseName,
-                "Source Workspace Name": sourceWorkspaceName,
-                "Source Path": sourcePath,
-                "Source Connection ID": connectionId,
-                "Source Location": location,
-                "Source SubPath": subpath,
+                "Shortcut Name": i.get("name"),
+                "Shortcut Path": i.get("path"),
+                "Source Type": tgt.get("type"),
+                "Source Workspace Id": source_workspace_id,
+                "Source Workspace Name": source_workspace_name,
+                "Source Item Id": source_item_id,
+                "Source Item Name": (
+                    fabric.resolve_item_name(
+                        source_item_id, workspace=source_workspace_name
+                    )
+                    if source_item_id is not None
+                    else None
+                ),
+                "Source Item Type": (
+                    resolve_item_type(source_item_id, workspace=source_workspace_name)
+                    if source_item_id is not None
+                    else None
+                ),
+                "OneLake Path": tgt.get("oneLake", {}).get("path"),
+                "Connection Id": connection_id,
+                "Location": location,
+                "Bucket": s3_compat.get("bucket"),
+                "SubPath": sub_path,
             }
             df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
@@ -1722,9 +1734,9 @@ def create_custom_pool(
     min_node_count : int
         The `minimum node count <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
     max_node_count : int
-        The `maximum node count <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
+        The `maximum node count <https://learn.microsoft.com/en-us/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
     min_executors : int
-        The `minimum executors <https://learn.microsoft.com/en-us/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
+        The `minimum executors <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
     max_executors : int
         The `maximum executors <https://learn.microsoft.com/en-us/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
     node_family : str, default='MemoryOptimized'
@@ -1799,10 +1811,10 @@ def update_custom_pool(
         The `minimum node count <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
         Defaults to None which keeps the existing property setting.
     max_node_count : int, default=None
-        The `maximum node count <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
+        The `maximum node count <https://learn.microsoft.com/en-us/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
         Defaults to None which keeps the existing property setting.
     min_executors : int, default=None
-        The `minimum executors <https://learn.microsoft.com/en-us/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
+        The `minimum executors <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
         Defaults to None which keeps the existing property setting.
     max_executors : int, default=None
         The `maximum executors <https://learn.microsoft.com/en-us/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
@@ -2092,7 +2104,7 @@ def update_spark_settings(
         `Default pool <https://learn.microsoft.com/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP#poolproperties>`_ for workspace.
         Defaults to None which keeps the existing property setting.
     max_node_count : int, default=None
-        The `maximum node count <https://learn.microsoft.com/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP#starterpoolproperties>`_.
+        The `maximum node count <https://learn.microsoft.com/en-us/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP#starterpoolproperties>`_.
         Defaults to None which keeps the existing property setting.
     max_executors : int, default=None
         The `maximum executors <https://learn.microsoft.com/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP#starterpoolproperties>`_.
@@ -2161,7 +2173,10 @@ def update_spark_settings(
 def add_user_to_workspace(
-    email_address: str, role_name: str, workspace: Optional[str] = None
+    email_address: str,
+    role_name: str,
+    principal_type: Optional[str] = "User",
+    workspace: Optional[str] = None,
 ):
     """
     Adds a user to a workspace.
@@ -2172,13 +2187,12 @@ def add_user_to_workspace(
         The email address of the user.
     role_name : str
         The `role <https://learn.microsoft.com/rest/api/power-bi/groups/add-group-user#groupuseraccessright>`_ of the user within the workspace.
+    principal_type : str, default='User'
+        The `principal type <https://learn.microsoft.com/rest/api/power-bi/groups/add-group-user#principaltype>`_.
     workspace : str, default=None
         The name of the workspace.
         Defaults to None which resolves to the workspace of the attached lakehouse
         or if no lakehouse attached, resolves to the workspace of the notebook.
-    Returns
-    -------
     """
     (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
@@ -2190,10 +2204,21 @@ def add_user_to_workspace(
             f"{icons.red_dot} Invalid role. The 'role_name' parameter must be one of the following: {role_names}."
         )
     plural = "n" if role_name == "Admin" else ""
+    principal_types = ["App", "Group", "None", "User"]
+    principal_type = principal_type.capitalize()
+    if principal_type not in principal_types:
+        raise ValueError(
+            f"{icons.red_dot} Invalid princpal type. Valid options: {principal_types}."
+        )
     client = fabric.PowerBIRestClient()
-    request_body = {"emailAddress": email_address, "groupUserAccessRight": role_name}
+    request_body = {
+        "emailAddress": email_address,
+        "groupUserAccessRight": role_name,
+        "principalType": principal_type,
+        "identifier": email_address,
+    }
     response = client.post(
         f"/v1.0/myorg/groups/{workspace_id}/users", json=request_body
@@ -2236,7 +2261,10 @@ def delete_user_from_workspace(email_address: str, workspace: Optional[str] = No
 def update_workspace_user(
-    email_address: str, role_name: str, workspace: Optional[str] = None
+    email_address: str,
+    role_name: str,
+    principal_type: Optional[str] = "User",
+    workspace: Optional[str] = None,
 ):
     """
     Updates a user's role within a workspace.
@@ -2247,13 +2275,12 @@ def update_workspace_user(
         The email address of the user.
     role_name : str
         The `role <https://learn.microsoft.com/rest/api/power-bi/groups/add-group-user#groupuseraccessright>`_ of the user within the workspace.
+    principal_type : str, default='User'
+        The `principal type <https://learn.microsoft.com/rest/api/power-bi/groups/add-group-user#principaltype>`_.
     workspace : str, default=None
         The name of the workspace.
         Defaults to None which resolves to the workspace of the attached lakehouse
         or if no lakehouse attached, resolves to the workspace of the notebook.
-    Returns
-    -------
     """
     (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
@@ -2264,8 +2291,19 @@ def update_workspace_user(
         raise ValueError(
             f"{icons.red_dot} Invalid role. The 'role_name' parameter must be one of the following: {role_names}."
         )
+    principal_types = ["App", "Group", "None", "User"]
+    principal_type = principal_type.capitalize()
+    if principal_type not in principal_types:
+        raise ValueError(
+            f"{icons.red_dot} Invalid princpal type. Valid options: {principal_types}."
+        )
-    request_body = {"emailAddress": email_address, "groupUserAccessRight": role_name}
+    request_body = {
+        "emailAddress": email_address,
+        "groupUserAccessRight": role_name,
+        "principalType": principal_type,
+        "identifier": email_address,
+    }
     client = fabric.PowerBIRestClient()
     response = client.put(f"/v1.0/myorg/groups/{workspace_id}/users", json=request_body)

sempy_labs/_model_bpa_bulk.py CHANGED Viewed

@@ -21,6 +21,7 @@ def run_model_bpa_bulk(
     extended: Optional[bool] = False,
     language: Optional[str] = None,
     workspace: Optional[str | List[str]] = None,
+    skip_models: Optional[str | List[str]] = ["ModelBPA", "Fabric Capacity Metrics"],
 ):
     """
     Runs the semantic model Best Practice Analyzer across all semantic models in a workspace (or all accessible workspaces).
@@ -41,18 +42,22 @@ def run_model_bpa_bulk(
     workspace : str | List[str], default=None
         The workspace or list of workspaces to scan.
         Defaults to None which scans all accessible workspaces.
-    Returns
-    -------
+    skip_models : str | List[str], default=['ModelBPA', 'Fabric Capacity Metrics']
+        The semantic models to always skip when running this analysis.
     """
     import pyspark.sql.functions as F
     if not lakehouse_attached():
         raise ValueError(
-            "No lakehouse is attached to this notebook. Must attach a lakehouse to the notebook."
+            f"{icons.red_dot} No lakehouse is attached to this notebook. Must attach a lakehouse to the notebook."
         )
+    if isinstance(skip_models, str):
+        skip_models = [skip_models]
+    skip_models.extend(["ModelBPA", "Fabric Capacity Metrics"])
     cols = [
         "Capacity Name",
         "Capacity Id",
@@ -113,8 +118,7 @@ def run_model_bpa_bulk(
                 or set(["Lakehouse", "SemanticModel"]).issubset(set(x["Type"]))
             )
             default_semantic_models = filtered_df["Display Name"].unique().tolist()
-            # Skip ModelBPA :)
-            skip_models = default_semantic_models + [icons.model_bpa_name]
+            skip_models.extend(default_semantic_models)
             dfD_filt = dfD[~dfD["Dataset Name"].isin(skip_models)]
             if len(dfD_filt) > 0:

sempy_labs/_model_bpa_rules.py CHANGED Viewed

@@ -135,6 +135,17 @@ def model_bpa_rules(
                     "Setting the 'Data Coverage Definition' property may lead to better performance because the engine knows when it can only query the import-portion of the table and when it needs to query the DirectQuery portion of the table.",
                     "https://learn.microsoft.com/analysis-services/tom/table-partitions?view=asallproducts-allversions",
                 ),
+                (
+                    "Performance",
+                    "Model",
+                    "Warning",
+                    "Dual mode is only relevant for dimension tables if DirectQuery is used for the corresponding fact table",
+                    lambda obj: not any(
+                        p.Mode == TOM.ModeType.DirectQuery for p in tom.all_partitions()
+                    )
+                    and any(p.Mode == TOM.ModeType.Dual for p in tom.all_partitions()),
+                    "Only use Dual mode for dimension tables/partitions where a corresponding fact table is in DirectQuery. Using Dual mode in other circumstances (i.e. rest of the model is in Import mode) may lead to performance issues especially if the number of measures in the model is high.",
+                ),
                 (
                     "Performance",
                     "Table",
@@ -590,13 +601,13 @@ def model_bpa_rules(
                         re.search(
                             r"USERELATIONSHIP\s*\(\s*\'*"
                             + obj.FromTable.Name
-                            + "'*\["
+                            + r"'*\["
                             + obj.FromColumn.Name
-                            + "\]\s*,\s*'*"
+                            + r"\]\s*,\s*'*"
                             + obj.ToTable.Name
-                            + "'*\["
+                            + r"'*\["
                             + obj.ToColumn.Name
-                            + "\]",
+                            + r"\]",
                             m.Expression,
                             flags=re.IGNORECASE,
                         )

sempy_labs/_query_scale_out.py CHANGED Viewed

@@ -21,10 +21,6 @@ def qso_sync(dataset: str, workspace: Optional[str] = None):
         The Fabric workspace name.
         Defaults to None which resolves to the workspace of the attached lakehouse
         or if no lakehouse attached, resolves to the workspace of the notebook.
-    Returns
-    -------
     """
     # https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/trigger-query-scale-out-sync-in-group
@@ -63,7 +59,6 @@ def qso_sync_status(
     -------
     Tuple[pandas.DataFrame, pandas.DataFrame]
         2 pandas dataframes showing the query scale-out sync status.
     """
     # https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/get-query-scale-out-sync-status-in-group
@@ -161,7 +156,6 @@ def disable_qso(dataset: str, workspace: Optional[str] = None) -> pd.DataFrame:
     -------
     pandas.DataFrame
         A pandas dataframe showing the current query scale out settings.
     """
     (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
@@ -177,6 +171,7 @@ def disable_qso(dataset: str, workspace: Optional[str] = None) -> pd.DataFrame:
         raise FabricHTTPException(response)
     df = list_qso_settings(dataset=dataset, workspace=workspace)
     print(
         f"{icons.green_dot} Query scale out has been disabled for the '{dataset}' semantic model within the '{workspace}' workspace."
     )
@@ -210,7 +205,6 @@ def set_qso(
     -------
     pandas.DataFrame
         A pandas dataframe showing the current query scale-out settings.
     """
     # https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/update-dataset-in-group
@@ -225,31 +219,27 @@ def set_qso(
     request_body = {
         "queryScaleOutSettings": {
             "autoSyncReadOnlyReplicas": auto_sync,
-            "maxReadOnlyReplicas": str(max_read_only_replicas),
+            "maxReadOnlyReplicas": max_read_only_replicas,
         }
     }
-    ssm = set_semantic_model_storage_format(
+    set_semantic_model_storage_format(
         dataset=dataset, storage_format="Large", workspace=workspace
     )
-    if ssm == 200:
-        client = fabric.PowerBIRestClient()
-        response = client.patch(
-            f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}",
-            json=request_body,
-        )
-        if response.status_code != 200:
-            raise FabricHTTPException(response)
+    client = fabric.PowerBIRestClient()
+    response = client.patch(
+        f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}",
+        json=request_body,
+    )
+    if response.status_code != 200:
+        raise FabricHTTPException(response)
-        df = list_qso_settings(dataset=dataset, workspace=workspace)
-        print(
-            f"{icons.green_dot} Query scale out has been set on the '{dataset}' semantic model within the '{workspace}' workspace."
-        )
-        return df
-    else:
-        raise ValueError(
-            f"{icons.red_dot} Failed to set the '{dataset}' semantic model within the '{workspace}' workspace to large semantic model storage format. This is a prerequisite for enabling Query Scale Out.\n\"https://learn.microsoft.com/power-bi/enterprise/service-premium-scale-out#prerequisites\""
-        )
+    df = list_qso_settings(dataset=dataset, workspace=workspace)
+    print(
+        f"{icons.green_dot} Query scale out has been set on the '{dataset}' semantic model within the '{workspace}' workspace."
+    )
+    return df
 def set_semantic_model_storage_format(
@@ -268,10 +258,6 @@ def set_semantic_model_storage_format(
         The Fabric workspace name.
         Defaults to None which resolves to the workspace of the attached lakehouse
         or if no lakehouse attached, resolves to the workspace of the notebook.
-    Returns
-    -------
     """
     (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)

sempy_labs/lakehouse/__init__.py CHANGED Viewed

@@ -3,6 +3,7 @@ from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
 from sempy_labs.lakehouse._lakehouse import (
     lakehouse_attached,
     optimize_lakehouse_tables,
+    vacuum_lakehouse_tables,
 )
 from sempy_labs.lakehouse._shortcuts import (
@@ -19,4 +20,5 @@ __all__ = [
     # create_shortcut,
     "create_shortcut_onelake",
     "delete_shortcut",
+    "vacuum_lakehouse_tables",
 ]

sempy_labs/lakehouse/_lakehouse.py CHANGED Viewed

@@ -69,18 +69,74 @@ def optimize_lakehouse_tables(
     else:
         tables_filt = lakeTablesDelta.copy()
-    tableCount = len(tables_filt)
     spark = SparkSession.builder.getOrCreate()
-    i = 1
     for _, r in (bar := tqdm(tables_filt.iterrows())):
         tableName = r["Table Name"]
         tablePath = r["Location"]
         bar.set_description(f"Optimizing the '{tableName}' table...")
         deltaTable = DeltaTable.forPath(spark, tablePath)
         deltaTable.optimize().executeCompaction()
-        print(
-            f"{icons.green_dot} The '{tableName}' table has been optimized. ({str(i)}/{str(tableCount)})"
-        )
-        i += 1
+@log
+def vacuum_lakehouse_tables(
+    tables: Optional[Union[str, List[str]]] = None,
+    lakehouse: Optional[str] = None,
+    workspace: Optional[str] = None,
+    retain_n_hours: Optional[int] = None,
+):
+    """
+    Runs the `VACUUM <https://docs.delta.io/latest/delta-utility.html#remove-files-no-longer-referenced-by-a-delta-table>`_ function over the specified lakehouse tables.
+    Parameters
+    ----------
+    tables : str | List[str] | None
+        The table(s) to vacuum. If no tables are specified, all tables in the lakehouse will be optimized.
+    lakehouse : str, default=None
+        The Fabric lakehouse.
+        Defaults to None which resolves to the lakehouse attached to the notebook.
+    workspace : str, default=None
+        The Fabric workspace used by the lakehouse.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    retain_n_hours : int, default=None
+        The number of hours to retain historical versions of Delta table files.
+        Files older than this retention period will be deleted during the vacuum operation.
+        If not specified, the default retention period configured for the Delta table will be used.
+        The default retention period is 168 hours (7 days) unless manually configured via table properties.
+    """
+    from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
+    from delta import DeltaTable
+    workspace = fabric.resolve_workspace_name(workspace)
+    if lakehouse is None:
+        lakehouse_id = fabric.get_lakehouse_id()
+        lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
+    lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
+    lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"]
+    if isinstance(tables, str):
+        tables = [tables]
+    if tables is not None:
+        tables_filt = lakeTablesDelta[lakeTablesDelta["Table Name"].isin(tables)]
+    else:
+        tables_filt = lakeTablesDelta.copy()
+    spark = SparkSession.builder.getOrCreate()
+    spark.conf.set("spark.databricks.delta.vacuum.parallelDelete.enabled", "true")
+    for _, r in (bar := tqdm(tables_filt.iterrows())):
+        tableName = r["Table Name"]
+        tablePath = r["Location"]
+        bar.set_description(f"Vacuuming the '{tableName}' table...")
+        deltaTable = DeltaTable.forPath(spark, tablePath)
+        if retain_n_hours is None:
+            deltaTable.vacuum()
+        else:
+            deltaTable.vacuum(retain_n_hours)

{semantic_link_labs-0.7.0.dist-info → semantic_link_labs-0.7.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{semantic_link_labs-0.7.0.dist-info → semantic_link_labs-0.7.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

semantic-link-labs 0.7.0__py3-none-any.whl → 0.7.1__py3-none-any.whl

Potentially problematic release.

semantic-link-labs 0.7.0py3-none-any.whl → 0.7.1py3-none-any.whl