PyPI - semantic-link-labs - Versions diffs - 0.9.9__py3-none-any.whl → 0.9.11__py3-none-any.whl - Mend

semantic-link-labs 0.9.9py3-none-any.whl → 0.9.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (49) hide show

{semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.11.dist-info}/METADATA +30 -22
{semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.11.dist-info}/RECORD +47 -40
{semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.11.dist-info}/WHEEL +1 -1
sempy_labs/__init__.py +28 -1
sempy_labs/_clear_cache.py +12 -0
sempy_labs/_dax.py +8 -2
sempy_labs/_delta_analyzer.py +17 -26
sempy_labs/_environments.py +19 -1
sempy_labs/_generate_semantic_model.py +7 -8
sempy_labs/_helper_functions.py +351 -151
sempy_labs/_kql_databases.py +18 -0
sempy_labs/_kusto.py +137 -0
sempy_labs/_list_functions.py +18 -36
sempy_labs/_model_bpa_rules.py +13 -3
sempy_labs/_notebooks.py +44 -11
sempy_labs/_semantic_models.py +93 -1
sempy_labs/_sql.py +3 -2
sempy_labs/_tags.py +194 -0
sempy_labs/_variable_libraries.py +89 -0
sempy_labs/_vertipaq.py +6 -6
sempy_labs/_vpax.py +386 -0
sempy_labs/_warehouses.py +3 -3
sempy_labs/admin/__init__.py +14 -0
sempy_labs/admin/_artifacts.py +3 -3
sempy_labs/admin/_capacities.py +161 -1
sempy_labs/admin/_dataflows.py +45 -0
sempy_labs/admin/_items.py +16 -11
sempy_labs/admin/_tags.py +126 -0
sempy_labs/admin/_tenant.py +5 -5
sempy_labs/directlake/_generate_shared_expression.py +29 -26
sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +55 -5
sempy_labs/dotnet_lib/dotnet.runtime.config.json +10 -0
sempy_labs/lakehouse/__init__.py +16 -0
sempy_labs/lakehouse/_blobs.py +115 -63
sempy_labs/lakehouse/_get_lakehouse_columns.py +41 -18
sempy_labs/lakehouse/_get_lakehouse_tables.py +62 -47
sempy_labs/lakehouse/_helper.py +211 -0
sempy_labs/lakehouse/_lakehouse.py +45 -36
sempy_labs/lakehouse/_livy_sessions.py +137 -0
sempy_labs/migration/_migrate_calctables_to_lakehouse.py +7 -12
sempy_labs/migration/_refresh_calc_tables.py +7 -6
sempy_labs/report/_download_report.py +1 -1
sempy_labs/report/_generate_report.py +5 -1
sempy_labs/report/_reportwrapper.py +31 -18
sempy_labs/tom/_model.py +104 -35
sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +0 -9
sempy_labs/report/_bpareporttemplate/.platform +0 -11
{semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.11.dist-info}/licenses/LICENSE +0 -0
{semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.11.dist-info}/top_level.txt +0 -0

sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py CHANGED Viewed

@@ -7,7 +7,7 @@ from sempy_labs._helper_functions import (
 )
 from sempy._utils._log import log
 from sempy_labs.tom import connect_semantic_model
-from typing import Optional
+from typing import Optional, List
 import sempy_labs._icons as icons
 from uuid import UUID
 import re
@@ -19,7 +19,9 @@ def _extract_expression_list(expression):
     """
     pattern_sql = r'Sql\.Database\s*\(\s*"([^"]+)"\s*,\s*"([^"]+)"\s*\)'
-    pattern_no_sql = r'AzureDataLakeStorage\s*\{\s*"server".*?:\s*onelake\.dfs\.fabric\.microsoft\.com"\s*,\s*"path"\s*:\s*"/([\da-fA-F-]+)\s*/\s*([\da-fA-F-]+)\s*/"\s*\}'
+    pattern_no_sql = (
+        r'AzureStorage\.DataLake\(".*?/([0-9a-fA-F\-]{36})/([0-9a-fA-F\-]{36})"'
+    )
     match_sql = re.search(pattern_sql, expression)
     match_no_sql = re.search(pattern_no_sql, expression)
@@ -102,6 +104,7 @@ def update_direct_lake_model_connection(
     source_type: str = "Lakehouse",
     source_workspace: Optional[str | UUID] = None,
     use_sql_endpoint: bool = True,
+    tables: Optional[str | List[str]] = None,
 ):
     """
     Remaps a Direct Lake semantic model's SQL Endpoint connection to a new lakehouse/warehouse.
@@ -126,12 +129,19 @@ def update_direct_lake_model_connection(
     use_sql_endpoint : bool, default=True
         If True, the SQL Endpoint will be used for the connection.
         If False, Direct Lake over OneLake will be used.
+    tables : str | List[str], default=None
+        The name(s) of the table(s) to update in the Direct Lake semantic model.
+        If None, all tables will be updated (if there is only one expression).
+        If multiple tables are specified, they must be provided as a list.
     """
     if use_sql_endpoint:
         icons.sll_tags.append("UpdateDLConnection_SQL")
     else:
         icons.sll_tags.append("UpdateDLConnection_DLOL")
+    if isinstance(tables, str):
+        tables = [tables]
     (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
     (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
@@ -174,7 +184,12 @@ def update_direct_lake_model_connection(
             )
         # Update the single connection expression
-        if len(expressions) == 1:
+        if len(expressions) > 1 and not tables:
+            print(
+                f"{icons.info} Multiple expressions found in the model. Please specify the tables to update using the 'tables parameter."
+            )
+            return
+        elif len(expressions) == 1 and not tables:
             expr = expressions[0]
             tom.model.Expressions[expr].Expression = shared_expression
@@ -182,6 +197,41 @@ def update_direct_lake_model_connection(
                 f"{icons.green_dot} The expression in the '{dataset_name}' semantic model within the '{workspace_name}' workspace has been updated to point to the '{source}' {source_type.lower()} in the '{source_workspace}' workspace."
             )
         else:
-            print(
-                f"{icons.info} Multiple expressions found in the model. Please use the update_direct_lake_partition_entity function to update specific tables."
+            import sempy
+            sempy.fabric._client._utils._init_analysis_services()
+            import Microsoft.AnalysisServices.Tabular as TOM
+            expr_list = _extract_expression_list(shared_expression)
+            expr_name = next(
+                (name for name, exp in expression_dict.items() if exp == expr_list),
+                None,
             )
+            # If the expression does not already exist, create it
+            def generate_unique_name(existing_names):
+                i = 1
+                while True:
+                    candidate = f"DatabaseQuery{i}"
+                    if candidate not in existing_names:
+                        return candidate
+                    i += 1
+            if not expr_name:
+                expr_name = generate_unique_name(expressions)
+                tom.add_expression(name=expr_name, expression=shared_expression)
+            all_tables = [t.Name for t in tom.model.Tables]
+            for t_name in tables:
+                if t_name not in all_tables:
+                    raise ValueError(
+                        f"{icons.red_dot} The table '{t_name}' does not exist in the '{dataset_name}' semantic model within the '{workspace_name}' workspace."
+                    )
+                p = next(p for p in tom.model.Tables[t_name].Partitions)
+                if p.Mode != TOM.ModeType.DirectLake:
+                    raise ValueError(
+                        f"{icons.red_dot} The table '{t_name}' in the '{dataset_name}' semantic model within the '{workspace_name}' workspace is not in Direct Lake mode. This function is only applicable to Direct Lake tables."
+                    )
+                p.Source.ExpressionSource = tom.model.Expressions[expr_name]

sempy_labs/dotnet_lib/dotnet.runtime.config.json ADDED Viewed

@@ -0,0 +1,10 @@
+{
+  "runtimeOptions": {
+    "tfm": "net6.0",
+    "framework": {
+      "name": "Microsoft.NETCore.App",
+      "version": "6.0.0"
+    },
+    "rollForward": "Major"
+  }
+}

sempy_labs/lakehouse/__init__.py CHANGED Viewed

@@ -20,6 +20,16 @@ from sempy_labs.lakehouse._shortcuts import (
 from sempy_labs.lakehouse._blobs import (
     recover_lakehouse_object,
     list_blobs,
+    get_user_delegation_key,
+)
+from sempy_labs.lakehouse._livy_sessions import (
+    list_livy_sessions,
+)
+from sempy_labs.lakehouse._helper import (
+    is_v_ordered,
+    delete_lakehouse,
+    update_lakehouse,
+    load_table,
 )
 __all__ = [
@@ -36,4 +46,10 @@ __all__ = [
     "list_shortcuts",
     "recover_lakehouse_object",
     "list_blobs",
+    "list_livy_sessions",
+    "is_v_ordered",
+    "delete_lakehouse",
+    "update_lakehouse",
+    "load_table",
+    "get_user_delegation_key",
 ]

sempy_labs/lakehouse/_blobs.py CHANGED Viewed

@@ -11,6 +11,7 @@ from typing import Optional, List
 import sempy_labs._icons as icons
 import xml.etree.ElementTree as ET
 import pandas as pd
+from sempy.fabric.exceptions import FabricHTTPException
 def _request_blob_api(
@@ -18,6 +19,7 @@ def _request_blob_api(
     method: str = "get",
     payload: Optional[dict] = None,
     status_codes: int | List[int] = 200,
+    uses_pagination: bool = False,
 ):
     import requests
@@ -31,21 +33,41 @@ def _request_blob_api(
     headers = {
         "Authorization": f"Bearer {token}",
-        "Content-Type": "application/json",
+        "Content-Type": "application/xml",
         "x-ms-version": "2025-05-05",
     }
-    response = requests.request(
-        method.upper(),
-        f"https://onelake.blob.fabric.microsoft.com/{request}",
-        headers=headers,
-        json=payload,
-    )
+    base_url = "https://onelake.blob.fabric.microsoft.com/"
+    full_url = f"{base_url}{request}"
+    results = []
+    while True:
+        response = requests.request(
+            method.upper(),
+            full_url,
+            headers=headers,
+            data=payload if method.lower() != "get" else None,
+        )
+        if response.status_code not in status_codes:
+            raise FabricHTTPException(response)
+        if not uses_pagination:
+            return response
+        # Parse XML to find blobs and NextMarker
+        root = ET.fromstring(response.content)
+        results.append(root)
+        next_marker = root.findtext(".//NextMarker")
+        if not next_marker:
+            break  # No more pages
-    if response.status_code not in status_codes:
-        raise FabricHTTPException(response)
+        # Append the marker to the original request (assuming query string format)
+        delimiter = "&" if "?" in request else "?"
+        full_url = f"{base_url}{request}{delimiter}marker={next_marker}"
-    return response
+    return results
 @log
@@ -90,12 +112,6 @@ def list_blobs(
             )
         path_prefix = f"{workspace_id}/{lakehouse_id}/{container}"
-    response = _request_blob_api(
-        request=f"{path_prefix}?restype=container&comp=list&include=deleted"
-    )
-    root = ET.fromstring(response.content)
-    response_json = _xml_to_dict(root)
     columns = {
         "Blob Name": "str",
         "Is Deleted": "bool",
@@ -122,37 +138,55 @@ def list_blobs(
     df = _create_dataframe(columns=columns)
-    for blob in (
-        response_json.get("EnumerationResults", {}).get("Blobs", {}).get("Blob", {})
-    ):
-        p = blob.get("Properties", {})
-        new_data = {
-            "Blob Name": blob.get("Name"),
-            "Is Deleted": blob.get("Deleted", False),
-            "Deletion Id": blob.get("DeletionId"),
-            "Creation Time": p.get("Creation-Time"),
-            "Expiry Time": p.get("Expiry-Time"),
-            "Etag": p.get("Etag"),
-            "Resource Type": p.get("ResourceType"),
-            "Content Length": p.get("Content-Length"),
-            "Content Type": p.get("Content-Type"),
-            "Content Encoding": p.get("Content-Encoding"),
-            "Content Language": p.get("Content-Language"),
-            "Content CRC64": p.get("Content-CRC64"),
-            "Content MD5": p.get("Content-MD5"),
-            "Cache Control": p.get("Cache-Control"),
-            "Content Disposition": p.get("Content-Disposition"),
-            "Blob Type": p.get("BlobType"),
-            "Access Tier": p.get("AccessTier"),
-            "Access Tier Inferred": p.get("AccessTierInferred"),
-            "Server Encrypted": p.get("ServerEncrypted"),
-            "Deleted Time": p.get("DeletedTime"),
-            "Remaining Retention Days": p.get("RemainingRetentionDays"),
-        }
-        df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
-    _update_dataframe_datatypes(dataframe=df, column_map=columns)
+    url = f"{path_prefix}?restype=container&comp=list&include=deleted"
+    responses = _request_blob_api(
+        request=url,
+        uses_pagination=True,
+    )
+    dfs = []
+    for root in responses:
+        response_json = _xml_to_dict(root)
+        blobs = (
+            response_json.get("EnumerationResults", {}).get("Blobs", {}).get("Blob", [])
+        )
+        if isinstance(blobs, dict):
+            blobs = [blobs]
+        for blob in blobs:
+            p = blob.get("Properties", {})
+            new_data = {
+                "Blob Name": blob.get("Name"),
+                "Is Deleted": blob.get("Deleted", False),
+                "Deletion Id": blob.get("DeletionId"),
+                "Creation Time": p.get("Creation-Time"),
+                "Expiry Time": p.get("Expiry-Time"),
+                "Etag": p.get("Etag"),
+                "Resource Type": p.get("ResourceType"),
+                "Content Length": p.get("Content-Length"),
+                "Content Type": p.get("Content-Type"),
+                "Content Encoding": p.get("Content-Encoding"),
+                "Content Language": p.get("Content-Language"),
+                "Content CRC64": p.get("Content-CRC64"),
+                "Content MD5": p.get("Content-MD5"),
+                "Cache Control": p.get("Cache-Control"),
+                "Content Disposition": p.get("Content-Disposition"),
+                "Blob Type": p.get("BlobType"),
+                "Access Tier": p.get("AccessTier"),
+                "Access Tier Inferred": p.get("AccessTierInferred"),
+                "Server Encrypted": p.get("ServerEncrypted"),
+                "Deleted Time": p.get("DeletedTime"),
+                "Remaining Retention Days": p.get("RemainingRetentionDays"),
+            }
+            dfs.append(pd.DataFrame(new_data, index=[0]))
+    if dfs:
+        df = pd.concat(dfs, ignore_index=True)
+        _update_dataframe_datatypes(dataframe=df, column_map=columns)
     return df
@@ -182,7 +216,7 @@ def recover_lakehouse_object(
     workspace_id = resolve_workspace_id(workspace)
     lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
-    blob_path_prefix = f"{lakehouse_id}/{file_path}"
+    blob_name = f"{lakehouse_id}/{file_path}"
     container = file_path.split("/")[0]
     if container not in ["Tables", "Files"]:
@@ -190,29 +224,45 @@ def recover_lakehouse_object(
             f"{icons.red_dot} Invalid container '{container}' within the file_path parameter. Expected 'Tables' or 'Files'."
         )
-    df = list_blobs(lakehouse=lakehouse, workspace=workspace, container=container)
+    # Undelete the blob
+    print(f"{icons.in_progress} Attempting to recover the '{blob_name}' blob...")
-    for _, r in df.iterrows():
-        blob_name = r.get("Blob Name")
-        is_deleted = r.get("Is Deleted")
-        if blob_name.startswith(blob_path_prefix) and is_deleted:
-            print(f"{icons.in_progress} Restoring the '{blob_name}' blob...")
-            _request_blob_api(
-                request=f"{workspace_id}/{lakehouse_id}/{file_path}?comp=undelete",
-                method="put",
+    try:
+        _request_blob_api(
+            request=f"{workspace_id}/{lakehouse_id}/{file_path}?comp=undelete",
+            method="put",
+        )
+        print(
+            f"{icons.green_dot} The '{blob_name}' blob recover attempt was successful."
+        )
+    except FabricHTTPException as e:
+        if e.status_code == 404:
+            print(
+                f"{icons.warning} The '{blob_name}' blob was not found. No action taken."
+            )
+        else:
+            print(
+                f"{icons.red_dot} An error occurred while recovering the '{blob_name}' blob: {e}"
             )
-            print(f"{icons.green_dot} The '{blob_name}' blob has been restored.")
-def _get_user_delegation_key():
+def get_user_delegation_key():
+    """
+    Gets a key that can be used to sign a user delegation SAS (shared access signature). A user delegation SAS grants access to Azure Blob Storage resources by using Microsoft Entra credentials.
-    # https://learn.microsoft.com/rest/api/storageservices/get-user-delegation-key
+    This is a wrapper function for the following API: `Get User Delegation Key <https://learn.microsoft.com/rest/api/storageservices/get-user-delegation-key>`_.
+    Returns
+    -------
+    str
+        The user delegation key value.
+    """
     from datetime import datetime, timedelta, timezone
     utc_now = datetime.now(timezone.utc)
     start_time = utc_now + timedelta(minutes=2)
-    expiry_time = start_time + timedelta(minutes=45)
+    expiry_time = start_time + timedelta(minutes=60)
     start_str = start_time.strftime("%Y-%m-%dT%H:%M:%SZ")
     expiry_str = expiry_time.strftime("%Y-%m-%dT%H:%M:%SZ")
@@ -223,9 +273,11 @@ def _get_user_delegation_key():
     </KeyInfo>"""
     response = _request_blob_api(
-        request="restype=service&comp=userdelegationkey",
+        request="?restype=service&comp=userdelegationkey",
         method="post",
         payload=payload,
     )
-    return response.content
+    root = ET.fromstring(response.content)
+    response_json = _xml_to_dict(root)
+    return response_json.get("UserDelegationKey", {}).get("Value", None)

sempy_labs/lakehouse/_get_lakehouse_columns.py CHANGED Viewed

@@ -1,14 +1,17 @@
 import pandas as pd
+import re
 from sempy_labs._helper_functions import (
     format_dax_object_name,
     resolve_workspace_name_and_id,
     resolve_lakehouse_name_and_id,
     _create_dataframe,
-    _create_spark_session,
+    _get_delta_table,
+    _pure_python_notebook,
 )
 from typing import Optional
 from sempy._utils._log import log
 from uuid import UUID
+import sempy_labs._icons as icons
 @log
@@ -16,7 +19,9 @@ def get_lakehouse_columns(
     lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
 ) -> pd.DataFrame:
     """
-    Shows the tables and columns of a lakehouse and their respective properties.
+    Shows the tables and columns of a lakehouse and their respective properties. This function can be executed in either a PySpark or pure Python notebook. Note that data types may show differently when using PySpark vs pure Python.
+    Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
     Parameters
     ----------
@@ -34,7 +39,6 @@ def get_lakehouse_columns(
         Shows the tables/columns within a lakehouse and their properties.
     """
     from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
-    from delta import DeltaTable
     columns = {
         "Workspace Name": "string",
@@ -51,29 +55,48 @@ def get_lakehouse_columns(
         lakehouse=lakehouse, workspace=workspace_id
     )
-    spark = _create_spark_session()
     tables = get_lakehouse_tables(
         lakehouse=lakehouse_id, workspace=workspace_id, extended=False, count_rows=False
     )
     tables_filt = tables[tables["Format"] == "delta"]
-    for _, r in tables_filt.iterrows():
-        table_name = r["Table Name"]
-        path = r["Location"]
-        delta_table = DeltaTable.forPath(spark, path)
-        sparkdf = delta_table.toDF()
-        for col_name, data_type in sparkdf.dtypes:
-            full_column_name = format_dax_object_name(table_name, col_name)
-            new_data = {
+    def add_column_metadata(table_name, col_name, data_type):
+        new_rows.append(
+            {
                 "Workspace Name": workspace_name,
-                "Lakehouse Name": lakehouse,
+                "Lakehouse Name": lakehouse_name,
                 "Table Name": table_name,
                 "Column Name": col_name,
-                "Full Column Name": full_column_name,
+                "Full Column Name": format_dax_object_name(table_name, col_name),
                 "Data Type": data_type,
             }
-            df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
+        )
+    new_rows = []
+    for _, r in tables_filt.iterrows():
+        table_name = r["Table Name"]
+        path = r["Location"]
+        if _pure_python_notebook():
+            from deltalake import DeltaTable
+            table_schema = DeltaTable(path).schema()
+            for field in table_schema.fields:
+                col_name = field.name
+                match = re.search(r'"(.*?)"', str(field.type))
+                if not match:
+                    raise ValueError(
+                        f"{icons.red_dot} Could not find data type for column {col_name}."
+                    )
+                data_type = match.group(1)
+                add_column_metadata(table_name, col_name, data_type)
+        else:
+            delta_table = _get_delta_table(path=path)
+            table_df = delta_table.toDF()
+            for col_name, data_type in table_df.dtypes:
+                add_column_metadata(table_name, col_name, data_type)
-    return df
+    return pd.concat([df, pd.DataFrame(new_rows)], ignore_index=True)

semantic-link-labs 0.9.9__py3-none-any.whl → 0.9.11__py3-none-any.whl

Potentially problematic release.

semantic-link-labs 0.9.9py3-none-any.whl → 0.9.11py3-none-any.whl