PyPI - semantic-link-labs - Versions diffs - 0.11.2__py3-none-any.whl → 0.11.3__py3-none-any.whl - Mend

semantic-link-labs 0.11.2py3-none-any.whl → 0.11.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (29) hide show

{semantic_link_labs-0.11.2.dist-info → semantic_link_labs-0.11.3.dist-info}/METADATA +4 -4
{semantic_link_labs-0.11.2.dist-info → semantic_link_labs-0.11.3.dist-info}/RECORD +26 -24
sempy_labs/__init__.py +12 -18
sempy_labs/_a_lib_info.py +1 -1
sempy_labs/_external_data_shares.py +55 -1
sempy_labs/_helper_functions.py +169 -5
sempy_labs/_labels.py +126 -0
sempy_labs/_list_functions.py +1 -1
sempy_labs/_notebooks.py +152 -3
sempy_labs/directlake/_dl_helper.py +4 -1
sempy_labs/graph/_users.py +3 -5
sempy_labs/lakehouse/_helper.py +18 -9
sempy_labs/lakehouse/_lakehouse.py +18 -9
sempy_labs/migration/_migrate_calctables_to_lakehouse.py +38 -47
sempy_labs/migration/_migrate_calctables_to_semantic_model.py +12 -22
sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +7 -11
sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +14 -23
sempy_labs/ml_model/__init__.py +23 -0
sempy_labs/ml_model/_functions.py +427 -0
sempy_labs/report/_reportwrapper.py +1 -1
sempy_labs/tom/_model.py +8 -3
sempy_labs/variable_library/__init__.py +19 -0
sempy_labs/variable_library/_functions.py +403 -0
sempy_labs/_dax_query_view.py +0 -57
sempy_labs/_ml_models.py +0 -111
sempy_labs/_variable_libraries.py +0 -92
{semantic_link_labs-0.11.2.dist-info → semantic_link_labs-0.11.3.dist-info}/WHEEL +0 -0
{semantic_link_labs-0.11.2.dist-info → semantic_link_labs-0.11.3.dist-info}/licenses/LICENSE +0 -0
{semantic_link_labs-0.11.2.dist-info → semantic_link_labs-0.11.3.dist-info}/top_level.txt +0 -0

sempy_labs/_labels.py ADDED Viewed

@@ -0,0 +1,126 @@
+import sempy.fabric as fabric
+import requests
+import pandas as pd
+from typing import Optional, Union
+from uuid import UUID
+from sempy.fabric.exceptions import FabricHTTPException
+from sempy._utils._log import log
+@log
+def list_item_labels(workspace: Optional[Union[str, UUID]] = None) -> pd.DataFrame:
+    """
+    List all items within a workspace and shows their sensitivity labels.
+    NOTE: This function uses an internal API and is subject to change/break without notice.
+    Parameters
+    ----------
+    workspace : str | uuid.UUID, default=None
+        The Fabric workspace name or ID.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    Returns
+    -------
+    pandas.DataFrame
+        A pandas dataframe showing a list of all items within a workspace and their sensitivity labels.
+    """
+    import notebookutils
+    token = notebookutils.credentials.getToken("pbi")
+    headers = {"Authorization": f"Bearer {token}"}
+    # Item types handled in special payload fields
+    grouped_types = {
+        "dashboards": "Dashboard",
+        "reports": "Report",
+        "models": "SemanticModel",
+        "dataflows": "Dataflow",
+        "datamarts": "Datamart",
+    }
+    # All other item types go into 'artifacts'
+    fabric_items = [
+        "Datamart",
+        "Lakehouse",
+        "Eventhouse",
+        "Environment",
+        "KQLDatabase",
+        "KQLQueryset",
+        "KQLDashboard",
+        "DataPipeline",
+        "Notebook",
+        "SparkJobDefinition",
+        "MLExperiment",
+        "MLModel",
+        "Warehouse",
+        "Eventstream",
+        "SQLEndpoint",
+        "MirroredWarehouse",
+        "MirroredDatabase",
+        "Reflex",
+        "GraphQLApi",
+        "MountedDataFactory",
+        "SQLDatabase",
+        "CopyJob",
+        "VariableLibrary",
+        "Dataflow",
+        "ApacheAirflowJob",
+        "WarehouseSnapshot",
+        "DigitalTwinBuilder",
+        "DigitalTwinBuilderFlow",
+        "MirroredAzureDatabricksCatalog",
+        "DataAgent",
+        "UserDataFunction",
+    ]
+    dfI = fabric.list_items(workspace=workspace)
+    payload = {
+        key: [{"artifactId": i} for i in dfI[dfI["Type"] == value]["Id"].tolist()]
+        for key, value in grouped_types.items()
+    }
+    # Add generic artifact types
+    artifact_ids = dfI[dfI["Type"].isin(fabric_items)]["Id"].tolist()
+    if artifact_ids:
+        payload["artifacts"] = [{"artifactId": i} for i in artifact_ids]
+    client = fabric.PowerBIRestClient()
+    response = client.get("/v1.0/myorg/capacities")
+    if response.status_code != 200:
+        raise FabricHTTPException("Failed to retrieve URL prefix.")
+    context = response.json().get("@odata.context")
+    prefix = context.split("/v1.0")[0]
+    response = requests.post(
+        f"{prefix}/metadata/informationProtection/artifacts",
+        json=payload,
+        headers=headers,
+    )
+    if response.status_code != 200:
+        raise FabricHTTPException(f"Failed to retrieve labels: {response.text}")
+    result = response.json()
+    label_keys = [
+        "artifactInformationProtections",
+        "datasetInformationProtections",
+        "reportInformationProtections",
+        "dashboardInformationProtections",
+    ]
+    rows = [
+        {
+            "Id": item.get("artifactObjectId"),
+            "Label Id": item.get("labelId"),
+            "Label Name": item.get("name"),
+            "Parent Label Name": item.get("parent", {}).get("name"),
+            "Label Description": item.get("tooltip"),
+        }
+        for key in label_keys
+        for item in result.get(key, [])
+    ]
+    df_labels = pd.DataFrame(rows)
+    return dfI.merge(df_labels, on="Id", how="left")

sempy_labs/_list_functions.py CHANGED Viewed

@@ -1131,7 +1131,7 @@ def list_reports_using_semantic_model(
     dataset: str | UUID, workspace: Optional[str | UUID] = None
 ) -> pd.DataFrame:
     """
-    Shows a list of all the reports (in all workspaces) which use a given semantic model.
+    Shows a list of all the reports which use a given semantic model. This is limited to the reports which are in the same workspace as the semantic model.
     Parameters
     ----------

sempy_labs/_notebooks.py CHANGED Viewed

@@ -1,20 +1,21 @@
 import sempy.fabric as fabric
 import pandas as pd
 import sempy_labs._icons as icons
-from typing import Optional
+from typing import Optional, List
 import base64
 import requests
 from sempy._utils._log import log
-from ._helper_functions import (
+from sempy_labs._helper_functions import (
     resolve_workspace_name_and_id,
     resolve_workspace_id,
     _decode_b64,
     _base_api,
     resolve_item_id,
     create_item,
+    _create_dataframe,
 )
 from sempy.fabric.exceptions import FabricHTTPException
-import os
+from os import PathLike
 from uuid import UUID
 _notebook_prefix = "notebook-content."
@@ -114,6 +115,7 @@ def import_notebook_from_web(
     description: Optional[str] = None,
     workspace: Optional[str | UUID] = None,
     overwrite: bool = False,
+    folder: Optional[str | PathLike] = None,
 ):
     """
     Creates a new notebook within a workspace based on a Jupyter notebook hosted in the web.
@@ -136,6 +138,9 @@ def import_notebook_from_web(
         or if no lakehouse attached, resolves to the workspace of the notebook.
     overwrite : bool, default=False
         If set to True, overwrites the existing notebook in the workspace if it exists.
+    folder : str | os.PathLike, default=None
+        The folder within the workspace where the notebook will be created.
+        Defaults to None which places the notebook in the root of the workspace.
     """
     (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
@@ -161,6 +166,7 @@ def import_notebook_from_web(
             workspace=workspace_id,
             description=description,
             format="ipynb",
+            folder=folder,
         )
     elif len(dfI_filt) > 0 and overwrite:
         print(f"{icons.info} Overwrite of notebooks is currently not supported.")
@@ -181,6 +187,7 @@ def create_notebook(
     description: Optional[str] = None,
     workspace: Optional[str | UUID] = None,
     format: Optional[str] = None,
+    folder: Optional[str | PathLike] = None,
 ):
     """
     Creates a new notebook with a definition within a workspace.
@@ -203,6 +210,9 @@ def create_notebook(
     format : str, default=None
         If 'ipynb' is provided than notebook_content should be standard ipynb format
         otherwise notebook_content should be GIT friendly format
+    folder : str | os.PathLike, default=None
+        The folder within the workspace where the notebook will be created.
+        Defaults to None which places the notebook in the root of the workspace.
     """
     notebook_payload = base64.b64encode(notebook_content).decode("utf-8")
@@ -226,6 +236,7 @@ def create_notebook(
         workspace=workspace,
         description=description,
         definition=definition_payload,
+        folder=folder,
     )
@@ -287,3 +298,141 @@ def update_notebook_definition(
     print(
         f"{icons.green_dot} The '{name}' notebook was updated within the '{workspace_name}' workspace."
     )
+@log
+def list_notebooks(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
+    """
+    Shows the notebooks within a workspace.
+    Parameters
+    ----------
+    workspace : str | uuid.UUID, default=None
+        The Fabric workspace name or ID.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    Returns
+    -------
+    pandas.DataFrame
+        A pandas dataframe showing the SQL endpoints within a workspace.
+    """
+    columns = {
+        "Notebook Id": "string",
+        "Notebook Name": "string",
+        "Description": "string",
+    }
+    df = _create_dataframe(columns=columns)
+    workspace_id = resolve_workspace_id(workspace)
+    responses = _base_api(
+        request=f"/v1/workspaces/{workspace_id}/notebooks", uses_pagination=True
+    )
+    rows = []
+    for r in responses:
+        for v in r.get("value", []):
+            rows.append(
+                {
+                    "Notebook Id": v.get("id"),
+                    "Notebook Name": v.get("displayName"),
+                    "Description": v.get("description"),
+                }
+            )
+    if rows:
+        df = pd.DataFrame(rows, columns=list(columns.keys()))
+    return df
+@log
+def search_notebooks(
+    search_string: str,
+    notebook: Optional[str | UUID] = None,
+    workspace: Optional[str | UUID | List[str | UUID]] = None,
+) -> pd.DataFrame:
+    """
+    Searches notebooks within a workspace or across multiple workspaces for a given search string.
+    Parameters
+    ----------
+    search_string : str
+        The string to search for within the notebook definitions.
+    notebook : str | uuid.UUID, default=None
+        The name or ID of a specific notebook to search within.
+        Defaults to None which searches across all notebooks in the specified workspace(s).
+    workspace : str | uuid.UUID | list, default=None
+        The name or ID of the workspace or a list of workspaces to search within.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+        If a list is provided, it should contain workspace names or IDs.
+    Returns
+    -------
+    pandas.DataFrame
+        A pandas dataframe showing the notebooks that contain the search string in their definitions.
+        The dataframe includes the workspace name, workspace ID, notebook name, and notebook ID.
+    """
+    if not workspace:
+        workspace_id = resolve_workspace_id(workspace)
+        workspace_ids = [workspace_id]
+    elif isinstance(workspace, str):
+        workspace_id = resolve_workspace_id(workspace)
+        workspace_ids = [workspace_id]
+    elif isinstance(workspace, list):
+        workspace_ids = [resolve_workspace_id(ws) for ws in workspace]
+    else:
+        raise ValueError(
+            "Workspace must be a string, UUID, or a list of strings/UUIDs."
+        )
+    dfW = fabric.list_workspaces()
+    dfW_filt = dfW[dfW["Id"].isin(workspace_ids)]
+    columns = {
+        "Workspace Name": "string",
+        "Workspace Id": "string",
+        "Notebook Name": "string",
+        "Notebook Id": "string",
+    }
+    df = _create_dataframe(columns=columns)
+    rows = []
+    for _, r in dfW_filt.iterrows():
+        w_id = r["Id"]
+        w_name = r["Name"]
+        dfN = list_notebooks(workspace=w_id)
+        if notebook is not None:
+            item_id = resolve_item_id(item=notebook, type="Notebook", workspace=w_id)
+            dfN = dfN[dfN["Notebook Id"] == item_id]
+        for _, n in dfN.iterrows():
+            notebook_id = n["Notebook Id"]
+            notebook_name = n["Notebook Name"]
+            definition = _base_api(
+                request=f"v1/workspaces/{w_id}/notebooks/{notebook_id}/getDefinition",
+                method="post",
+                client="fabric_sp",
+                status_codes=None,
+                lro_return_json=True,
+            )
+            for part in definition.get("definition").get("parts"):
+                payload = _decode_b64(part["payload"])
+                if part["path"] == "notebook-content.py":
+                    if search_string in payload:
+                        rows.append(
+                            {
+                                "Workspace Name": w_name,
+                                "Workspace Id": w_id,
+                                "Notebook Name": notebook_name,
+                                "Notebook Id": notebook_id,
+                            }
+                        )
+    if rows:
+        df = pd.DataFrame(rows, columns=list(columns.keys()))
+    return df

sempy_labs/directlake/_dl_helper.py CHANGED Viewed

@@ -225,7 +225,10 @@ def get_direct_lake_source(
     (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
     sql_endpoint_id = get_direct_lake_sql_endpoint(dataset=dataset, workspace=workspace)
     dfI = fabric.list_items(workspace=workspace)
-    dfI_filt = dfI[(dfI["Id"] == sql_endpoint_id) & (dfI["Type"] == "SQLEndpoint")]
+    dfI_filt = dfI[
+        (dfI["Id"] == sql_endpoint_id)
+        & (dfI["Type"].isin(["SQLEndpoint", "Warehouse"]))
+    ]
     artifact_type, artifact_name, artifact_id = None, None, None

sempy_labs/graph/_users.py CHANGED Viewed

@@ -137,7 +137,6 @@ def send_mail(
     cc_recipients: Optional[str | List[str]] = None,
     bcc_recipients: Optional[str | List[str]] = None,
     priority: Literal["Normal", "High", "Low"] = "Normal",
-    follow_up_flag: bool = False,
     attachments: Optional[str | List[str]] = None,
 ):
     """
@@ -165,8 +164,6 @@ def send_mail(
         The email address of the BCC recipients.
     priority : Literal["Normal", "High", "Low"], default="Normal"
         The email priority.
-    follow_up_flag : bool, default=False
-        Whether to set a follow-up flag for the email.
     attachments : str | List[str], default=None
         The abfss path or a list of the abfss paths of the attachments to include in the email.
     """
@@ -220,8 +217,8 @@ def send_mail(
     if bcc_email_addresses:
         payload["message"]["bccRecipients"] = bcc_email_addresses
-    if follow_up_flag:
-        payload["message"]["flag"] = {"flagStatus": "flagged"}
+    # if follow_up_flag:
+    #    payload["message"]["flag"] = {"flagStatus": "flagged"}
     content_types = {
         ".txt": "text/plain",
@@ -244,6 +241,7 @@ def send_mail(
         ".pbip": "application/vnd.ms-powerbi.report",
         ".pbit": "application/vnd.ms-powerbi.report",
         ".vpax": "application/zip",
+        ".geojson": "application/geo+json",
     }
     def file_path_to_content_bytes(file_path):

sempy_labs/lakehouse/_helper.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from uuid import UUID
 from typing import Optional, Literal
 import pyarrow.dataset as ds
-from .._helper_functions import (
+from sempy_labs._helper_functions import (
     _mount,
     delete_item,
     _base_api,
@@ -68,14 +68,23 @@ def is_v_ordered(
         latest_file = os.path.join(delta_log_path, json_files[0])
         with open(latest_file, "r") as f:
-            for line in f:
-                try:
-                    data = json.loads(line)
-                    if "commitInfo" in data:
-                        tags = data["commitInfo"].get("tags", {})
-                        return tags.get("VORDER", "false").lower() == "true"
-                except json.JSONDecodeError:
-                    continue  # Skip malformed lines
+            all_data = [
+                json.loads(line) for line in f if line.strip()
+            ]  # one dict per line
+            for data in all_data:
+                if "metaData" in data:
+                    return (
+                        data.get("metaData", {})
+                        .get("configuration", {})
+                        .get("delta.parquet.vorder.enabled", "false")
+                        == "true"
+                    )
+            # If no metaData, fall back to commitInfo
+            for data in all_data:
+                if "commitInfo" in data:
+                    tags = data["commitInfo"].get("tags", {})
+                    return tags.get("VORDER", "false").lower() == "true"
         return False  # Default if not found

sempy_labs/lakehouse/_lakehouse.py CHANGED Viewed

@@ -2,7 +2,7 @@ from tqdm.auto import tqdm
 from typing import List, Optional, Union
 from sempy._utils._log import log
 from uuid import UUID
-from .._helper_functions import (
+from sempy_labs._helper_functions import (
     _base_api,
     resolve_lakehouse_name_and_id,
     resolve_workspace_name_and_id,
@@ -13,7 +13,7 @@ import sempy_labs._icons as icons
 import re
 import time
 import pandas as pd
-from .._job_scheduler import (
+from sempy_labs._job_scheduler import (
     _get_item_job_instance,
 )
@@ -100,11 +100,15 @@ def optimize_lakehouse_tables(
         tables = [tables]
     df_tables = df_delta[df_delta["Table Name"].isin(tables)] if tables else df_delta
+    df_tables.reset_index(drop=True, inplace=True)
-    for _, r in (bar := tqdm(df_tables.iterrows())):
+    total = len(df_tables)
+    for idx, r in (bar := tqdm(df_tables.iterrows(), total=total, bar_format="{desc}")):
         table_name = r["Table Name"]
         path = r["Location"]
-        bar.set_description(f"Optimizing the '{table_name}' table...")
+        bar.set_description(
+            f"Optimizing the '{table_name}' table ({idx + 1}/{total})..."
+        )
         _optimize_table(path=path)
@@ -145,11 +149,13 @@ def vacuum_lakehouse_tables(
         tables = [tables]
     df_tables = df_delta[df_delta["Table Name"].isin(tables)] if tables else df_delta
+    df_tables.reset_index(drop=True, inplace=True)
-    for _, r in (bar := tqdm(df_tables.iterrows())):
+    total = len(df_tables)
+    for idx, r in (bar := tqdm(df_tables.iterrows(), total=total, bar_format="{desc}")):
         table_name = r["Table Name"]
         path = r["Location"]
-        bar.set_description(f"Vacuuming the '{table_name}' table...")
+        bar.set_description(f"Vacuuming the '{table_name}' table ({idx}/{total})...")
         _vacuum_table(path=path, retain_n_hours=retain_n_hours)
@@ -231,7 +237,7 @@ def run_table_maintenance(
     if optimize:
         payload["executionData"]["optimizeSettings"] = {}
     if v_order:
-        payload["executionData"]["optimizeSettings"] = {"vorder": True}
+        payload["executionData"]["optimizeSettings"] = {"vOrder": True}
     if vacuum:
         payload["executionData"]["vacuumSettings"] = {}
     if vacuum and retention_period is not None:
@@ -242,16 +248,19 @@ def run_table_maintenance(
         method="post",
         payload=payload,
         status_codes=202,
+        client="fabric_sp",
     )
-    f"{icons.in_progress} The table maintenance job for the '{table_name}' table in the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has been initiated."
+    print(
+        f"{icons.in_progress} The table maintenance job for the '{table_name}' table in the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has been initiated."
+    )
     status_url = response.headers.get("Location").split("fabric.microsoft.com")[1]
     status = None
     while status not in ["Completed", "Failed"]:
         response = _base_api(request=status_url)
         status = response.json().get("status")
-        time.sleep(10)
+        time.sleep(3)
     df = _get_item_job_instance(url=status_url)

semantic-link-labs 0.11.2__py3-none-any.whl → 0.11.3__py3-none-any.whl

Potentially problematic release.

semantic-link-labs 0.11.2py3-none-any.whl → 0.11.3py3-none-any.whl