semantic-link-labs 0.7.0__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: semantic-link-labs
3
- Version: 0.7.0
3
+ Version: 0.7.1
4
4
  Summary: Semantic Link Labs for Microsoft Fabric
5
5
  Author: Microsoft Corporation
6
6
  License: MIT License
@@ -25,7 +25,7 @@ Requires-Dist: pytest >=8.2.1 ; extra == 'test'
25
25
  # Semantic Link Labs
26
26
 
27
27
  [![PyPI version](https://badge.fury.io/py/semantic-link-labs.svg)](https://badge.fury.io/py/semantic-link-labs)
28
- [![Read The Docs](https://readthedocs.org/projects/semantic-link-labs/badge/?version=0.7.0&style=flat)](https://readthedocs.org/projects/semantic-link-labs/)
28
+ [![Read The Docs](https://readthedocs.org/projects/semantic-link-labs/badge/?version=0.7.1&style=flat)](https://readthedocs.org/projects/semantic-link-labs/)
29
29
  [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
30
30
  [![Downloads](https://static.pepy.tech/badge/semantic-link-labs)](https://pepy.tech/project/semantic-link-labs)
31
31
 
@@ -4,16 +4,16 @@ sempy_labs/_clear_cache.py,sha256=NckXmtDCgRqlNL5FvLTut2XWLI0Hft3O4sAaXS1tPfo,17
4
4
  sempy_labs/_connections.py,sha256=w1dFC4WeTNFmLGD2EL_Syk0Wb1Eij18we2FVn_VaCD8,7641
5
5
  sempy_labs/_dax.py,sha256=dt1GgHceyM7f6phRBPxRKnmQy_KYKpcgFQHuOjGbpLo,2029
6
6
  sempy_labs/_generate_semantic_model.py,sha256=igKsVX-5Nqpipjg0taLFro8OsD3ogwSwKsyVAmuRwG4,8647
7
- sempy_labs/_helper_functions.py,sha256=rTetza9TRLtKOjEd0ZHz-xDZc1nbsN58ldHkLK8CiMo,24769
7
+ sempy_labs/_helper_functions.py,sha256=DFfThu8nIvRTGACP8gCJ4tObyzsCrd4Ox9Tk3CmIwyk,24909
8
8
  sempy_labs/_icons.py,sha256=UK7chr_tEkZd4Y7Es_KyTc4dFgtYS4f31ggWxyqC9uY,853
9
- sempy_labs/_list_functions.py,sha256=CwNI7tEvn8upIpCaLDII4QLQVrJhJECPJdo6vZsg0sw,90578
9
+ sempy_labs/_list_functions.py,sha256=MKw5JLHnRVRiOwzsvUmyesyOWUpatcPB8WHg1QXS83w,92070
10
10
  sempy_labs/_model_auto_build.py,sha256=fX3bCLFCOMQHuheKIoB48fUABG7XAT7qqsMbUiWSrY0,5071
11
11
  sempy_labs/_model_bpa.py,sha256=U9rHoGzuAmV1dtJvgSVk3BiUwDp6WTFt1l0CbkmKcdE,20439
12
- sempy_labs/_model_bpa_bulk.py,sha256=nvQKQ5h7Zs7rPJbybkrx1_cz3xdA-dLcZcWizIX5_oo,14702
13
- sempy_labs/_model_bpa_rules.py,sha256=uC2nKnT3b6lRMaGB7VokSORXVZvRSTQs2DzFSx4nIYY,47294
12
+ sempy_labs/_model_bpa_bulk.py,sha256=b0Y6XbzKQawExSW6yEpwgBlptFe7Y7rFtY3mombBun4,15023
13
+ sempy_labs/_model_bpa_rules.py,sha256=jujUiwUbIJW16UR3tUgxmj21PdSUxjzfiEFEEf-w40Q,48095
14
14
  sempy_labs/_model_dependencies.py,sha256=nZdqq2iMhZejnS_LCd2rpK6r1B7jWpa3URkxobRPifY,12986
15
15
  sempy_labs/_one_lake_integration.py,sha256=eIuLxlw8eXfUH2avKhsyLmXZbTllSwGsz2j_HMAikpQ,6234
16
- sempy_labs/_query_scale_out.py,sha256=fliTIx_POeuzjV0bhYM4-2QD74c4r3soxs0_bSaoD28,14441
16
+ sempy_labs/_query_scale_out.py,sha256=EKU0saCRfKy2yyTpdbBVC_MwiHXxycSa_4k2O1pOqJg,13967
17
17
  sempy_labs/_refresh_semantic_model.py,sha256=2qzP9KqmwA20RuL1o6Lt9bIjC-KtdX8ZgcTvJParg-w,7157
18
18
  sempy_labs/_translations.py,sha256=BcrVIrBNSKtbFz4Y9t1Dh1SZCu0K4NHu7n01Z6O76IY,19665
19
19
  sempy_labs/_vertipaq.py,sha256=zMKtcCQ2gpgoDLisTbTjFNe60Cg2PlAQ6HvkSlbpKPo,33660
@@ -55,10 +55,10 @@ sempy_labs/directlake/_show_unsupported_directlake_objects.py,sha256=QNj2wHzFGtj
55
55
  sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py,sha256=b_Y5_GSfWC25wH6R7L37-AHO9fvKkmxRGaP6dVDC7-w,3233
56
56
  sempy_labs/directlake/_update_directlake_partition_entity.py,sha256=Pbx7LCdKyqEfX1npLvhw0WzFnOEbluwB3_xW0ELvHL4,8580
57
57
  sempy_labs/directlake/_warm_cache.py,sha256=ZgPricISRszx-yDERXihBDGVhEFB9yX-nBtLX0ZJTXI,8258
58
- sempy_labs/lakehouse/__init__.py,sha256=i6VRx4dR1SIN-1GxioiNwhC4FxbozRCIz5TfXjb9rKc,587
58
+ sempy_labs/lakehouse/__init__.py,sha256=6LVQltQ3cjyiuxvjXTuNdJ163zSqi4h_tEZY4zsxuSw,647
59
59
  sempy_labs/lakehouse/_get_lakehouse_columns.py,sha256=Bb_iCTlNwl0wdN4dW_E7tVnfbHhHwQT_l0SUqvcbYpo,2582
60
60
  sempy_labs/lakehouse/_get_lakehouse_tables.py,sha256=1IXa_u1c4CJSlmP1rxBCMcOrQw-vmRXjqd5U5xsx_5c,8800
61
- sempy_labs/lakehouse/_lakehouse.py,sha256=5A4SwVak8AlRVBUeHg9_Zfq1Id8yInRtnimvjo8oUxY,2782
61
+ sempy_labs/lakehouse/_lakehouse.py,sha256=qtCVr1cM0TWY6z5YS57w0nj3DEfXT5xmyDtr3676kAk,5172
62
62
  sempy_labs/lakehouse/_shortcuts.py,sha256=MT_Cqog5cTMz9fN3M_ZjAaQSjXXiyCyPWGY8LbaXZsI,6977
63
63
  sempy_labs/migration/__init__.py,sha256=w4vvGk6wTWXVfofJDmio2yIFvSSJsxOpjv6mvNGmrOI,1043
64
64
  sempy_labs/migration/_create_pqt_file.py,sha256=oYoKD78K9Ox1fqtkh-BfU_G5nUIoK_-5ChvCKDsYsWU,9257
@@ -104,8 +104,8 @@ sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/page.
104
104
  sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/visuals/ce8532a7e25020271077/visual.json,sha256=mlY6t9OlSe-Y6_QmXJpS1vggU6Y3FjISUKECL8FVSg8,931
105
105
  sempy_labs/tom/__init__.py,sha256=Qbs8leW0fjzvWwOjyWK3Hjeehu7IvpB1beASGsi28bk,121
106
106
  sempy_labs/tom/_model.py,sha256=M-es2bES3Usj5uVmt5vwNmtm9vWzeqtVtKREpxjnjiI,151050
107
- semantic_link_labs-0.7.0.dist-info/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
108
- semantic_link_labs-0.7.0.dist-info/METADATA,sha256=DM8hGBclkGwWLsMT2CeZWdz6OM2NA6oL8n0zWbWtHxs,11241
109
- semantic_link_labs-0.7.0.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
110
- semantic_link_labs-0.7.0.dist-info/top_level.txt,sha256=kiQX1y42Dbein1l3Q8jMUYyRulDjdlc2tMepvtrvixQ,11
111
- semantic_link_labs-0.7.0.dist-info/RECORD,,
107
+ semantic_link_labs-0.7.1.dist-info/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
108
+ semantic_link_labs-0.7.1.dist-info/METADATA,sha256=fIK5PQeOgEH9rm-JPDN3noaIiYaMidQE5AKDjdPlDrE,11241
109
+ semantic_link_labs-0.7.1.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
110
+ semantic_link_labs-0.7.1.dist-info/top_level.txt,sha256=kiQX1y42Dbein1l3Q8jMUYyRulDjdlc2tMepvtrvixQ,11
111
+ semantic_link_labs-0.7.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (73.0.1)
2
+ Generator: setuptools (74.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -11,6 +11,7 @@ from typing import Optional, Tuple, List
11
11
  from uuid import UUID
12
12
  import sempy_labs._icons as icons
13
13
  from sempy.fabric.exceptions import FabricHTTPException
14
+ import urllib.parse
14
15
 
15
16
 
16
17
  def create_abfss_path(
@@ -681,7 +682,8 @@ def resolve_workspace_capacity(workspace: Optional[str] = None) -> Tuple[UUID, s
681
682
  """
682
683
 
683
684
  workspace = fabric.resolve_workspace_name(workspace)
684
- dfW = fabric.list_workspaces(filter=f"name eq '{workspace}'")
685
+ filter_condition = urllib.parse.quote(workspace)
686
+ dfW = fabric.list_workspaces(filter=f"name eq '{filter_condition}'")
685
687
  capacity_id = dfW["Capacity Id"].iloc[0]
686
688
  dfC = fabric.list_capacities()
687
689
  dfC_filt = dfC[dfC["Id"] == capacity_id]
@@ -711,7 +713,8 @@ def get_capacity_id(workspace: Optional[str] = None) -> UUID:
711
713
  """
712
714
 
713
715
  workspace = fabric.resolve_workspace_name(workspace)
714
- dfW = fabric.list_workspaces(filter=f"name eq '{workspace}'")
716
+ filter_condition = urllib.parse.quote(workspace)
717
+ dfW = fabric.list_workspaces(filter=f"name eq '{filter_condition}'")
715
718
  if len(dfW) == 0:
716
719
  raise ValueError(f"{icons.red_dot} The '{workspace}' does not exist'.")
717
720
 
@@ -8,12 +8,11 @@ from sempy_labs._helper_functions import (
8
8
  _decode_b64,
9
9
  pagination,
10
10
  lro,
11
+ resolve_item_type,
11
12
  )
12
13
  import pandas as pd
13
14
  import base64
14
15
  import requests
15
- import time
16
- import json
17
16
  from pyspark.sql import SparkSession
18
17
  from typing import Optional
19
18
  import sempy_labs._icons as icons
@@ -1529,7 +1528,7 @@ def list_shortcuts(
1529
1528
  lakehouse: Optional[str] = None, workspace: Optional[str] = None
1530
1529
  ) -> pd.DataFrame:
1531
1530
  """
1532
- Shows all shortcuts which exist in a Fabric lakehouse.
1531
+ Shows all shortcuts which exist in a Fabric lakehouse and their properties.
1533
1532
 
1534
1533
  Parameters
1535
1534
  ----------
@@ -1551,71 +1550,84 @@ def list_shortcuts(
1551
1550
 
1552
1551
  if lakehouse is None:
1553
1552
  lakehouse_id = fabric.get_lakehouse_id()
1554
- lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
1555
1553
  else:
1556
1554
  lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
1557
1555
 
1556
+ client = fabric.FabricRestClient()
1557
+
1558
1558
  df = pd.DataFrame(
1559
1559
  columns=[
1560
1560
  "Shortcut Name",
1561
1561
  "Shortcut Path",
1562
- "Source",
1563
- "Source Lakehouse Name",
1562
+ "Source Type",
1563
+ "Source Workspace Id",
1564
1564
  "Source Workspace Name",
1565
- "Source Path",
1566
- "Source Connection ID",
1567
- "Source Location",
1568
- "Source SubPath",
1565
+ "Source Item Id",
1566
+ "Source Item Name",
1567
+ "Source Item Type",
1568
+ "OneLake Path",
1569
+ "Connection Id",
1570
+ "Location",
1571
+ "Bucket",
1572
+ "SubPath",
1569
1573
  ]
1570
1574
  )
1571
1575
 
1572
- client = fabric.FabricRestClient()
1573
1576
  response = client.get(
1574
1577
  f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts"
1575
1578
  )
1579
+
1576
1580
  if response.status_code != 200:
1577
1581
  raise FabricHTTPException(response)
1578
1582
 
1579
1583
  responses = pagination(client, response)
1580
1584
 
1581
1585
  for r in responses:
1582
- for s in r.get("value", []):
1583
- shortcutName = s.get("name")
1584
- shortcutPath = s.get("path")
1585
- source = list(s["target"].keys())[0]
1586
- (
1587
- sourceLakehouseName,
1588
- sourceWorkspaceName,
1589
- sourcePath,
1590
- connectionId,
1591
- location,
1592
- subpath,
1593
- ) = (None, None, None, None, None, None)
1594
- if source == "oneLake":
1595
- sourceLakehouseId = s.get("target", {}).get(source, {}).get("itemId")
1596
- sourcePath = s.get("target", {}).get(source, {}).get("path")
1597
- sourceWorkspaceId = (
1598
- s.get("target", {}).get(source, {}).get("workspaceId")
1599
- )
1600
- sourceWorkspaceName = fabric.resolve_workspace_name(sourceWorkspaceId)
1601
- sourceLakehouseName = resolve_lakehouse_name(
1602
- sourceLakehouseId, sourceWorkspaceName
1603
- )
1604
- else:
1605
- connectionId = s.get("target", {}).get(source, {}).get("connectionId")
1606
- location = s.get("target", {}).get(source, {}).get("location")
1607
- subpath = s.get("target", {}).get(source, {}).get("subpath")
1586
+ for i in r.get("value", []):
1587
+ tgt = i.get("target", {})
1588
+ s3_compat = tgt.get("s3Compatible", {})
1589
+ gcs = tgt.get("googleCloudStorage", {})
1590
+ eds = tgt.get("externalDataShare", {})
1591
+ connection_id = (
1592
+ s3_compat.get("connectionId")
1593
+ or gcs.get("connectionId")
1594
+ or eds.get("connectionId")
1595
+ or None
1596
+ )
1597
+ location = s3_compat.get("location") or gcs.get("location") or None
1598
+ sub_path = s3_compat.get("subpath") or gcs.get("subpath") or None
1599
+ source_workspace_id = tgt.get("oneLake", {}).get("workspaceId")
1600
+ source_item_id = tgt.get("oneLake", {}).get("itemId")
1601
+ source_workspace_name = (
1602
+ fabric.resolve_workspace_name(source_workspace_id)
1603
+ if source_workspace_id is not None
1604
+ else None
1605
+ )
1608
1606
 
1609
1607
  new_data = {
1610
- "Shortcut Name": shortcutName,
1611
- "Shortcut Path": shortcutPath,
1612
- "Source": source,
1613
- "Source Lakehouse Name": sourceLakehouseName,
1614
- "Source Workspace Name": sourceWorkspaceName,
1615
- "Source Path": sourcePath,
1616
- "Source Connection ID": connectionId,
1617
- "Source Location": location,
1618
- "Source SubPath": subpath,
1608
+ "Shortcut Name": i.get("name"),
1609
+ "Shortcut Path": i.get("path"),
1610
+ "Source Type": tgt.get("type"),
1611
+ "Source Workspace Id": source_workspace_id,
1612
+ "Source Workspace Name": source_workspace_name,
1613
+ "Source Item Id": source_item_id,
1614
+ "Source Item Name": (
1615
+ fabric.resolve_item_name(
1616
+ source_item_id, workspace=source_workspace_name
1617
+ )
1618
+ if source_item_id is not None
1619
+ else None
1620
+ ),
1621
+ "Source Item Type": (
1622
+ resolve_item_type(source_item_id, workspace=source_workspace_name)
1623
+ if source_item_id is not None
1624
+ else None
1625
+ ),
1626
+ "OneLake Path": tgt.get("oneLake", {}).get("path"),
1627
+ "Connection Id": connection_id,
1628
+ "Location": location,
1629
+ "Bucket": s3_compat.get("bucket"),
1630
+ "SubPath": sub_path,
1619
1631
  }
1620
1632
  df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
1621
1633
 
@@ -1722,9 +1734,9 @@ def create_custom_pool(
1722
1734
  min_node_count : int
1723
1735
  The `minimum node count <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
1724
1736
  max_node_count : int
1725
- The `maximum node count <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
1737
+ The `maximum node count <https://learn.microsoft.com/en-us/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
1726
1738
  min_executors : int
1727
- The `minimum executors <https://learn.microsoft.com/en-us/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
1739
+ The `minimum executors <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
1728
1740
  max_executors : int
1729
1741
  The `maximum executors <https://learn.microsoft.com/en-us/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
1730
1742
  node_family : str, default='MemoryOptimized'
@@ -1799,10 +1811,10 @@ def update_custom_pool(
1799
1811
  The `minimum node count <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
1800
1812
  Defaults to None which keeps the existing property setting.
1801
1813
  max_node_count : int, default=None
1802
- The `maximum node count <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
1814
+ The `maximum node count <https://learn.microsoft.com/en-us/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
1803
1815
  Defaults to None which keeps the existing property setting.
1804
1816
  min_executors : int, default=None
1805
- The `minimum executors <https://learn.microsoft.com/en-us/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
1817
+ The `minimum executors <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
1806
1818
  Defaults to None which keeps the existing property setting.
1807
1819
  max_executors : int, default=None
1808
1820
  The `maximum executors <https://learn.microsoft.com/en-us/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
@@ -2092,7 +2104,7 @@ def update_spark_settings(
2092
2104
  `Default pool <https://learn.microsoft.com/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP#poolproperties>`_ for workspace.
2093
2105
  Defaults to None which keeps the existing property setting.
2094
2106
  max_node_count : int, default=None
2095
- The `maximum node count <https://learn.microsoft.com/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP#starterpoolproperties>`_.
2107
+ The `maximum node count <https://learn.microsoft.com/en-us/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP#starterpoolproperties>`_.
2096
2108
  Defaults to None which keeps the existing property setting.
2097
2109
  max_executors : int, default=None
2098
2110
  The `maximum executors <https://learn.microsoft.com/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP#starterpoolproperties>`_.
@@ -2161,7 +2173,10 @@ def update_spark_settings(
2161
2173
 
2162
2174
 
2163
2175
  def add_user_to_workspace(
2164
- email_address: str, role_name: str, workspace: Optional[str] = None
2176
+ email_address: str,
2177
+ role_name: str,
2178
+ principal_type: Optional[str] = "User",
2179
+ workspace: Optional[str] = None,
2165
2180
  ):
2166
2181
  """
2167
2182
  Adds a user to a workspace.
@@ -2172,13 +2187,12 @@ def add_user_to_workspace(
2172
2187
  The email address of the user.
2173
2188
  role_name : str
2174
2189
  The `role <https://learn.microsoft.com/rest/api/power-bi/groups/add-group-user#groupuseraccessright>`_ of the user within the workspace.
2190
+ principal_type : str, default='User'
2191
+ The `principal type <https://learn.microsoft.com/rest/api/power-bi/groups/add-group-user#principaltype>`_.
2175
2192
  workspace : str, default=None
2176
2193
  The name of the workspace.
2177
2194
  Defaults to None which resolves to the workspace of the attached lakehouse
2178
2195
  or if no lakehouse attached, resolves to the workspace of the notebook.
2179
-
2180
- Returns
2181
- -------
2182
2196
  """
2183
2197
 
2184
2198
  (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
@@ -2190,10 +2204,21 @@ def add_user_to_workspace(
2190
2204
  f"{icons.red_dot} Invalid role. The 'role_name' parameter must be one of the following: {role_names}."
2191
2205
  )
2192
2206
  plural = "n" if role_name == "Admin" else ""
2207
+ principal_types = ["App", "Group", "None", "User"]
2208
+ principal_type = principal_type.capitalize()
2209
+ if principal_type not in principal_types:
2210
+ raise ValueError(
2211
+ f"{icons.red_dot} Invalid princpal type. Valid options: {principal_types}."
2212
+ )
2193
2213
 
2194
2214
  client = fabric.PowerBIRestClient()
2195
2215
 
2196
- request_body = {"emailAddress": email_address, "groupUserAccessRight": role_name}
2216
+ request_body = {
2217
+ "emailAddress": email_address,
2218
+ "groupUserAccessRight": role_name,
2219
+ "principalType": principal_type,
2220
+ "identifier": email_address,
2221
+ }
2197
2222
 
2198
2223
  response = client.post(
2199
2224
  f"/v1.0/myorg/groups/{workspace_id}/users", json=request_body
@@ -2236,7 +2261,10 @@ def delete_user_from_workspace(email_address: str, workspace: Optional[str] = No
2236
2261
 
2237
2262
 
2238
2263
  def update_workspace_user(
2239
- email_address: str, role_name: str, workspace: Optional[str] = None
2264
+ email_address: str,
2265
+ role_name: str,
2266
+ principal_type: Optional[str] = "User",
2267
+ workspace: Optional[str] = None,
2240
2268
  ):
2241
2269
  """
2242
2270
  Updates a user's role within a workspace.
@@ -2247,13 +2275,12 @@ def update_workspace_user(
2247
2275
  The email address of the user.
2248
2276
  role_name : str
2249
2277
  The `role <https://learn.microsoft.com/rest/api/power-bi/groups/add-group-user#groupuseraccessright>`_ of the user within the workspace.
2278
+ principal_type : str, default='User'
2279
+ The `principal type <https://learn.microsoft.com/rest/api/power-bi/groups/add-group-user#principaltype>`_.
2250
2280
  workspace : str, default=None
2251
2281
  The name of the workspace.
2252
2282
  Defaults to None which resolves to the workspace of the attached lakehouse
2253
2283
  or if no lakehouse attached, resolves to the workspace of the notebook.
2254
-
2255
- Returns
2256
- -------
2257
2284
  """
2258
2285
 
2259
2286
  (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
@@ -2264,8 +2291,19 @@ def update_workspace_user(
2264
2291
  raise ValueError(
2265
2292
  f"{icons.red_dot} Invalid role. The 'role_name' parameter must be one of the following: {role_names}."
2266
2293
  )
2294
+ principal_types = ["App", "Group", "None", "User"]
2295
+ principal_type = principal_type.capitalize()
2296
+ if principal_type not in principal_types:
2297
+ raise ValueError(
2298
+ f"{icons.red_dot} Invalid princpal type. Valid options: {principal_types}."
2299
+ )
2267
2300
 
2268
- request_body = {"emailAddress": email_address, "groupUserAccessRight": role_name}
2301
+ request_body = {
2302
+ "emailAddress": email_address,
2303
+ "groupUserAccessRight": role_name,
2304
+ "principalType": principal_type,
2305
+ "identifier": email_address,
2306
+ }
2269
2307
 
2270
2308
  client = fabric.PowerBIRestClient()
2271
2309
  response = client.put(f"/v1.0/myorg/groups/{workspace_id}/users", json=request_body)
@@ -21,6 +21,7 @@ def run_model_bpa_bulk(
21
21
  extended: Optional[bool] = False,
22
22
  language: Optional[str] = None,
23
23
  workspace: Optional[str | List[str]] = None,
24
+ skip_models: Optional[str | List[str]] = ["ModelBPA", "Fabric Capacity Metrics"],
24
25
  ):
25
26
  """
26
27
  Runs the semantic model Best Practice Analyzer across all semantic models in a workspace (or all accessible workspaces).
@@ -41,18 +42,22 @@ def run_model_bpa_bulk(
41
42
  workspace : str | List[str], default=None
42
43
  The workspace or list of workspaces to scan.
43
44
  Defaults to None which scans all accessible workspaces.
44
-
45
- Returns
46
- -------
45
+ skip_models : str | List[str], default=['ModelBPA', 'Fabric Capacity Metrics']
46
+ The semantic models to always skip when running this analysis.
47
47
  """
48
48
 
49
49
  import pyspark.sql.functions as F
50
50
 
51
51
  if not lakehouse_attached():
52
52
  raise ValueError(
53
- "No lakehouse is attached to this notebook. Must attach a lakehouse to the notebook."
53
+ f"{icons.red_dot} No lakehouse is attached to this notebook. Must attach a lakehouse to the notebook."
54
54
  )
55
55
 
56
+ if isinstance(skip_models, str):
57
+ skip_models = [skip_models]
58
+
59
+ skip_models.extend(["ModelBPA", "Fabric Capacity Metrics"])
60
+
56
61
  cols = [
57
62
  "Capacity Name",
58
63
  "Capacity Id",
@@ -113,8 +118,7 @@ def run_model_bpa_bulk(
113
118
  or set(["Lakehouse", "SemanticModel"]).issubset(set(x["Type"]))
114
119
  )
115
120
  default_semantic_models = filtered_df["Display Name"].unique().tolist()
116
- # Skip ModelBPA :)
117
- skip_models = default_semantic_models + [icons.model_bpa_name]
121
+ skip_models.extend(default_semantic_models)
118
122
  dfD_filt = dfD[~dfD["Dataset Name"].isin(skip_models)]
119
123
 
120
124
  if len(dfD_filt) > 0:
@@ -135,6 +135,17 @@ def model_bpa_rules(
135
135
  "Setting the 'Data Coverage Definition' property may lead to better performance because the engine knows when it can only query the import-portion of the table and when it needs to query the DirectQuery portion of the table.",
136
136
  "https://learn.microsoft.com/analysis-services/tom/table-partitions?view=asallproducts-allversions",
137
137
  ),
138
+ (
139
+ "Performance",
140
+ "Model",
141
+ "Warning",
142
+ "Dual mode is only relevant for dimension tables if DirectQuery is used for the corresponding fact table",
143
+ lambda obj: not any(
144
+ p.Mode == TOM.ModeType.DirectQuery for p in tom.all_partitions()
145
+ )
146
+ and any(p.Mode == TOM.ModeType.Dual for p in tom.all_partitions()),
147
+ "Only use Dual mode for dimension tables/partitions where a corresponding fact table is in DirectQuery. Using Dual mode in other circumstances (i.e. rest of the model is in Import mode) may lead to performance issues especially if the number of measures in the model is high.",
148
+ ),
138
149
  (
139
150
  "Performance",
140
151
  "Table",
@@ -590,13 +601,13 @@ def model_bpa_rules(
590
601
  re.search(
591
602
  r"USERELATIONSHIP\s*\(\s*\'*"
592
603
  + obj.FromTable.Name
593
- + "'*\["
604
+ + r"'*\["
594
605
  + obj.FromColumn.Name
595
- + "\]\s*,\s*'*"
606
+ + r"\]\s*,\s*'*"
596
607
  + obj.ToTable.Name
597
- + "'*\["
608
+ + r"'*\["
598
609
  + obj.ToColumn.Name
599
- + "\]",
610
+ + r"\]",
600
611
  m.Expression,
601
612
  flags=re.IGNORECASE,
602
613
  )
@@ -21,10 +21,6 @@ def qso_sync(dataset: str, workspace: Optional[str] = None):
21
21
  The Fabric workspace name.
22
22
  Defaults to None which resolves to the workspace of the attached lakehouse
23
23
  or if no lakehouse attached, resolves to the workspace of the notebook.
24
-
25
- Returns
26
- -------
27
-
28
24
  """
29
25
 
30
26
  # https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/trigger-query-scale-out-sync-in-group
@@ -63,7 +59,6 @@ def qso_sync_status(
63
59
  -------
64
60
  Tuple[pandas.DataFrame, pandas.DataFrame]
65
61
  2 pandas dataframes showing the query scale-out sync status.
66
-
67
62
  """
68
63
 
69
64
  # https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/get-query-scale-out-sync-status-in-group
@@ -161,7 +156,6 @@ def disable_qso(dataset: str, workspace: Optional[str] = None) -> pd.DataFrame:
161
156
  -------
162
157
  pandas.DataFrame
163
158
  A pandas dataframe showing the current query scale out settings.
164
-
165
159
  """
166
160
 
167
161
  (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
@@ -177,6 +171,7 @@ def disable_qso(dataset: str, workspace: Optional[str] = None) -> pd.DataFrame:
177
171
  raise FabricHTTPException(response)
178
172
 
179
173
  df = list_qso_settings(dataset=dataset, workspace=workspace)
174
+
180
175
  print(
181
176
  f"{icons.green_dot} Query scale out has been disabled for the '{dataset}' semantic model within the '{workspace}' workspace."
182
177
  )
@@ -210,7 +205,6 @@ def set_qso(
210
205
  -------
211
206
  pandas.DataFrame
212
207
  A pandas dataframe showing the current query scale-out settings.
213
-
214
208
  """
215
209
 
216
210
  # https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/update-dataset-in-group
@@ -225,31 +219,27 @@ def set_qso(
225
219
  request_body = {
226
220
  "queryScaleOutSettings": {
227
221
  "autoSyncReadOnlyReplicas": auto_sync,
228
- "maxReadOnlyReplicas": str(max_read_only_replicas),
222
+ "maxReadOnlyReplicas": max_read_only_replicas,
229
223
  }
230
224
  }
231
225
 
232
- ssm = set_semantic_model_storage_format(
226
+ set_semantic_model_storage_format(
233
227
  dataset=dataset, storage_format="Large", workspace=workspace
234
228
  )
235
- if ssm == 200:
236
- client = fabric.PowerBIRestClient()
237
- response = client.patch(
238
- f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}",
239
- json=request_body,
240
- )
241
- if response.status_code != 200:
242
- raise FabricHTTPException(response)
229
+ client = fabric.PowerBIRestClient()
230
+ response = client.patch(
231
+ f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}",
232
+ json=request_body,
233
+ )
234
+ if response.status_code != 200:
235
+ raise FabricHTTPException(response)
243
236
 
244
- df = list_qso_settings(dataset=dataset, workspace=workspace)
245
- print(
246
- f"{icons.green_dot} Query scale out has been set on the '{dataset}' semantic model within the '{workspace}' workspace."
247
- )
248
- return df
249
- else:
250
- raise ValueError(
251
- f"{icons.red_dot} Failed to set the '{dataset}' semantic model within the '{workspace}' workspace to large semantic model storage format. This is a prerequisite for enabling Query Scale Out.\n\"https://learn.microsoft.com/power-bi/enterprise/service-premium-scale-out#prerequisites\""
252
- )
237
+ df = list_qso_settings(dataset=dataset, workspace=workspace)
238
+ print(
239
+ f"{icons.green_dot} Query scale out has been set on the '{dataset}' semantic model within the '{workspace}' workspace."
240
+ )
241
+
242
+ return df
253
243
 
254
244
 
255
245
  def set_semantic_model_storage_format(
@@ -268,10 +258,6 @@ def set_semantic_model_storage_format(
268
258
  The Fabric workspace name.
269
259
  Defaults to None which resolves to the workspace of the attached lakehouse
270
260
  or if no lakehouse attached, resolves to the workspace of the notebook.
271
-
272
- Returns
273
- -------
274
-
275
261
  """
276
262
 
277
263
  (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
@@ -3,6 +3,7 @@ from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
3
3
  from sempy_labs.lakehouse._lakehouse import (
4
4
  lakehouse_attached,
5
5
  optimize_lakehouse_tables,
6
+ vacuum_lakehouse_tables,
6
7
  )
7
8
 
8
9
  from sempy_labs.lakehouse._shortcuts import (
@@ -19,4 +20,5 @@ __all__ = [
19
20
  # create_shortcut,
20
21
  "create_shortcut_onelake",
21
22
  "delete_shortcut",
23
+ "vacuum_lakehouse_tables",
22
24
  ]
@@ -69,18 +69,74 @@ def optimize_lakehouse_tables(
69
69
  else:
70
70
  tables_filt = lakeTablesDelta.copy()
71
71
 
72
- tableCount = len(tables_filt)
73
-
74
72
  spark = SparkSession.builder.getOrCreate()
75
73
 
76
- i = 1
77
74
  for _, r in (bar := tqdm(tables_filt.iterrows())):
78
75
  tableName = r["Table Name"]
79
76
  tablePath = r["Location"]
80
77
  bar.set_description(f"Optimizing the '{tableName}' table...")
81
78
  deltaTable = DeltaTable.forPath(spark, tablePath)
82
79
  deltaTable.optimize().executeCompaction()
83
- print(
84
- f"{icons.green_dot} The '{tableName}' table has been optimized. ({str(i)}/{str(tableCount)})"
85
- )
86
- i += 1
80
+
81
+
82
+ @log
83
+ def vacuum_lakehouse_tables(
84
+ tables: Optional[Union[str, List[str]]] = None,
85
+ lakehouse: Optional[str] = None,
86
+ workspace: Optional[str] = None,
87
+ retain_n_hours: Optional[int] = None,
88
+ ):
89
+ """
90
+ Runs the `VACUUM <https://docs.delta.io/latest/delta-utility.html#remove-files-no-longer-referenced-by-a-delta-table>`_ function over the specified lakehouse tables.
91
+
92
+ Parameters
93
+ ----------
94
+ tables : str | List[str] | None
95
+ The table(s) to vacuum. If no tables are specified, all tables in the lakehouse will be optimized.
96
+ lakehouse : str, default=None
97
+ The Fabric lakehouse.
98
+ Defaults to None which resolves to the lakehouse attached to the notebook.
99
+ workspace : str, default=None
100
+ The Fabric workspace used by the lakehouse.
101
+ Defaults to None which resolves to the workspace of the attached lakehouse
102
+ or if no lakehouse attached, resolves to the workspace of the notebook.
103
+ retain_n_hours : int, default=None
104
+ The number of hours to retain historical versions of Delta table files.
105
+ Files older than this retention period will be deleted during the vacuum operation.
106
+ If not specified, the default retention period configured for the Delta table will be used.
107
+ The default retention period is 168 hours (7 days) unless manually configured via table properties.
108
+ """
109
+
110
+ from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
111
+ from delta import DeltaTable
112
+
113
+ workspace = fabric.resolve_workspace_name(workspace)
114
+
115
+ if lakehouse is None:
116
+ lakehouse_id = fabric.get_lakehouse_id()
117
+ lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
118
+
119
+ lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
120
+ lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"]
121
+
122
+ if isinstance(tables, str):
123
+ tables = [tables]
124
+
125
+ if tables is not None:
126
+ tables_filt = lakeTablesDelta[lakeTablesDelta["Table Name"].isin(tables)]
127
+ else:
128
+ tables_filt = lakeTablesDelta.copy()
129
+
130
+ spark = SparkSession.builder.getOrCreate()
131
+ spark.conf.set("spark.databricks.delta.vacuum.parallelDelete.enabled", "true")
132
+
133
+ for _, r in (bar := tqdm(tables_filt.iterrows())):
134
+ tableName = r["Table Name"]
135
+ tablePath = r["Location"]
136
+ bar.set_description(f"Vacuuming the '{tableName}' table...")
137
+ deltaTable = DeltaTable.forPath(spark, tablePath)
138
+
139
+ if retain_n_hours is None:
140
+ deltaTable.vacuum()
141
+ else:
142
+ deltaTable.vacuum(retain_n_hours)