PyPI - semantic-link-labs - Versions diffs - 0.12.8__py3-none-any.whl - Mend

semantic-link-labs 0.12.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (243) hide show

semantic_link_labs-0.12.8.dist-info/METADATA +354 -0
semantic_link_labs-0.12.8.dist-info/RECORD +243 -0
semantic_link_labs-0.12.8.dist-info/WHEEL +5 -0
semantic_link_labs-0.12.8.dist-info/licenses/LICENSE +21 -0
semantic_link_labs-0.12.8.dist-info/top_level.txt +1 -0
sempy_labs/__init__.py +606 -0
sempy_labs/_a_lib_info.py +2 -0
sempy_labs/_ai.py +437 -0
sempy_labs/_authentication.py +264 -0
sempy_labs/_bpa_translation/_model/_translations_am-ET.po +869 -0
sempy_labs/_bpa_translation/_model/_translations_ar-AE.po +908 -0
sempy_labs/_bpa_translation/_model/_translations_bg-BG.po +968 -0
sempy_labs/_bpa_translation/_model/_translations_ca-ES.po +963 -0
sempy_labs/_bpa_translation/_model/_translations_cs-CZ.po +943 -0
sempy_labs/_bpa_translation/_model/_translations_da-DK.po +945 -0
sempy_labs/_bpa_translation/_model/_translations_de-DE.po +988 -0
sempy_labs/_bpa_translation/_model/_translations_el-GR.po +993 -0
sempy_labs/_bpa_translation/_model/_translations_es-ES.po +971 -0
sempy_labs/_bpa_translation/_model/_translations_fa-IR.po +933 -0
sempy_labs/_bpa_translation/_model/_translations_fi-FI.po +942 -0
sempy_labs/_bpa_translation/_model/_translations_fr-FR.po +994 -0
sempy_labs/_bpa_translation/_model/_translations_ga-IE.po +967 -0
sempy_labs/_bpa_translation/_model/_translations_he-IL.po +902 -0
sempy_labs/_bpa_translation/_model/_translations_hi-IN.po +944 -0
sempy_labs/_bpa_translation/_model/_translations_hu-HU.po +963 -0
sempy_labs/_bpa_translation/_model/_translations_id-ID.po +946 -0
sempy_labs/_bpa_translation/_model/_translations_is-IS.po +939 -0
sempy_labs/_bpa_translation/_model/_translations_it-IT.po +986 -0
sempy_labs/_bpa_translation/_model/_translations_ja-JP.po +846 -0
sempy_labs/_bpa_translation/_model/_translations_ko-KR.po +839 -0
sempy_labs/_bpa_translation/_model/_translations_mt-MT.po +967 -0
sempy_labs/_bpa_translation/_model/_translations_nl-NL.po +978 -0
sempy_labs/_bpa_translation/_model/_translations_pl-PL.po +962 -0
sempy_labs/_bpa_translation/_model/_translations_pt-BR.po +962 -0
sempy_labs/_bpa_translation/_model/_translations_pt-PT.po +957 -0
sempy_labs/_bpa_translation/_model/_translations_ro-RO.po +968 -0
sempy_labs/_bpa_translation/_model/_translations_ru-RU.po +964 -0
sempy_labs/_bpa_translation/_model/_translations_sk-SK.po +952 -0
sempy_labs/_bpa_translation/_model/_translations_sl-SL.po +950 -0
sempy_labs/_bpa_translation/_model/_translations_sv-SE.po +942 -0
sempy_labs/_bpa_translation/_model/_translations_ta-IN.po +976 -0
sempy_labs/_bpa_translation/_model/_translations_te-IN.po +947 -0
sempy_labs/_bpa_translation/_model/_translations_th-TH.po +924 -0
sempy_labs/_bpa_translation/_model/_translations_tr-TR.po +953 -0
sempy_labs/_bpa_translation/_model/_translations_uk-UA.po +961 -0
sempy_labs/_bpa_translation/_model/_translations_zh-CN.po +804 -0
sempy_labs/_bpa_translation/_model/_translations_zu-ZA.po +969 -0
sempy_labs/_capacities.py +1198 -0
sempy_labs/_capacity_migration.py +660 -0
sempy_labs/_clear_cache.py +351 -0
sempy_labs/_connections.py +610 -0
sempy_labs/_dashboards.py +69 -0
sempy_labs/_data_access_security.py +98 -0
sempy_labs/_data_pipelines.py +162 -0
sempy_labs/_dataflows.py +668 -0
sempy_labs/_dax.py +501 -0
sempy_labs/_daxformatter.py +80 -0
sempy_labs/_delta_analyzer.py +467 -0
sempy_labs/_delta_analyzer_history.py +301 -0
sempy_labs/_dictionary_diffs.py +221 -0
sempy_labs/_documentation.py +147 -0
sempy_labs/_domains.py +51 -0
sempy_labs/_eventhouses.py +182 -0
sempy_labs/_external_data_shares.py +230 -0
sempy_labs/_gateways.py +521 -0
sempy_labs/_generate_semantic_model.py +521 -0
sempy_labs/_get_connection_string.py +84 -0
sempy_labs/_git.py +543 -0
sempy_labs/_graphQL.py +90 -0
sempy_labs/_helper_functions.py +2833 -0
sempy_labs/_icons.py +149 -0
sempy_labs/_job_scheduler.py +609 -0
sempy_labs/_kql_databases.py +149 -0
sempy_labs/_kql_querysets.py +124 -0
sempy_labs/_kusto.py +137 -0
sempy_labs/_labels.py +124 -0
sempy_labs/_list_functions.py +1720 -0
sempy_labs/_managed_private_endpoints.py +253 -0
sempy_labs/_mirrored_databases.py +416 -0
sempy_labs/_mirrored_warehouses.py +60 -0
sempy_labs/_ml_experiments.py +113 -0
sempy_labs/_model_auto_build.py +140 -0
sempy_labs/_model_bpa.py +557 -0
sempy_labs/_model_bpa_bulk.py +378 -0
sempy_labs/_model_bpa_rules.py +859 -0
sempy_labs/_model_dependencies.py +343 -0
sempy_labs/_mounted_data_factories.py +123 -0
sempy_labs/_notebooks.py +441 -0
sempy_labs/_one_lake_integration.py +151 -0
sempy_labs/_onelake.py +131 -0
sempy_labs/_query_scale_out.py +433 -0
sempy_labs/_refresh_semantic_model.py +435 -0
sempy_labs/_semantic_models.py +468 -0
sempy_labs/_spark.py +455 -0
sempy_labs/_sql.py +241 -0
sempy_labs/_sql_audit_settings.py +207 -0
sempy_labs/_sql_endpoints.py +214 -0
sempy_labs/_tags.py +201 -0
sempy_labs/_translations.py +43 -0
sempy_labs/_user_delegation_key.py +44 -0
sempy_labs/_utils.py +79 -0
sempy_labs/_vertipaq.py +1021 -0
sempy_labs/_vpax.py +388 -0
sempy_labs/_warehouses.py +234 -0
sempy_labs/_workloads.py +140 -0
sempy_labs/_workspace_identity.py +72 -0
sempy_labs/_workspaces.py +595 -0
sempy_labs/admin/__init__.py +170 -0
sempy_labs/admin/_activities.py +167 -0
sempy_labs/admin/_apps.py +145 -0
sempy_labs/admin/_artifacts.py +65 -0
sempy_labs/admin/_basic_functions.py +463 -0
sempy_labs/admin/_capacities.py +508 -0
sempy_labs/admin/_dataflows.py +45 -0
sempy_labs/admin/_datasets.py +186 -0
sempy_labs/admin/_domains.py +522 -0
sempy_labs/admin/_external_data_share.py +100 -0
sempy_labs/admin/_git.py +72 -0
sempy_labs/admin/_items.py +265 -0
sempy_labs/admin/_labels.py +211 -0
sempy_labs/admin/_reports.py +241 -0
sempy_labs/admin/_scanner.py +118 -0
sempy_labs/admin/_shared.py +82 -0
sempy_labs/admin/_sharing_links.py +110 -0
sempy_labs/admin/_tags.py +131 -0
sempy_labs/admin/_tenant.py +503 -0
sempy_labs/admin/_tenant_keys.py +89 -0
sempy_labs/admin/_users.py +140 -0
sempy_labs/admin/_workspaces.py +236 -0
sempy_labs/deployment_pipeline/__init__.py +23 -0
sempy_labs/deployment_pipeline/_items.py +580 -0
sempy_labs/directlake/__init__.py +57 -0
sempy_labs/directlake/_autosync.py +58 -0
sempy_labs/directlake/_directlake_schema_compare.py +120 -0
sempy_labs/directlake/_directlake_schema_sync.py +161 -0
sempy_labs/directlake/_dl_helper.py +274 -0
sempy_labs/directlake/_generate_shared_expression.py +94 -0
sempy_labs/directlake/_get_directlake_lakehouse.py +62 -0
sempy_labs/directlake/_get_shared_expression.py +34 -0
sempy_labs/directlake/_guardrails.py +96 -0
sempy_labs/directlake/_list_directlake_model_calc_tables.py +70 -0
sempy_labs/directlake/_show_unsupported_directlake_objects.py +90 -0
sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +239 -0
sempy_labs/directlake/_update_directlake_partition_entity.py +259 -0
sempy_labs/directlake/_warm_cache.py +236 -0
sempy_labs/dotnet_lib/dotnet.runtime.config.json +10 -0
sempy_labs/environment/__init__.py +23 -0
sempy_labs/environment/_items.py +212 -0
sempy_labs/environment/_pubstage.py +223 -0
sempy_labs/eventstream/__init__.py +37 -0
sempy_labs/eventstream/_items.py +263 -0
sempy_labs/eventstream/_topology.py +652 -0
sempy_labs/graph/__init__.py +59 -0
sempy_labs/graph/_groups.py +651 -0
sempy_labs/graph/_sensitivity_labels.py +120 -0
sempy_labs/graph/_teams.py +125 -0
sempy_labs/graph/_user_licenses.py +96 -0
sempy_labs/graph/_users.py +516 -0
sempy_labs/graph_model/__init__.py +15 -0
sempy_labs/graph_model/_background_jobs.py +63 -0
sempy_labs/graph_model/_items.py +149 -0
sempy_labs/lakehouse/__init__.py +67 -0
sempy_labs/lakehouse/_blobs.py +247 -0
sempy_labs/lakehouse/_get_lakehouse_columns.py +102 -0
sempy_labs/lakehouse/_get_lakehouse_tables.py +274 -0
sempy_labs/lakehouse/_helper.py +250 -0
sempy_labs/lakehouse/_lakehouse.py +351 -0
sempy_labs/lakehouse/_livy_sessions.py +143 -0
sempy_labs/lakehouse/_materialized_lake_views.py +157 -0
sempy_labs/lakehouse/_partitioning.py +165 -0
sempy_labs/lakehouse/_schemas.py +217 -0
sempy_labs/lakehouse/_shortcuts.py +440 -0
sempy_labs/migration/__init__.py +35 -0
sempy_labs/migration/_create_pqt_file.py +238 -0
sempy_labs/migration/_direct_lake_to_import.py +105 -0
sempy_labs/migration/_migrate_calctables_to_lakehouse.py +398 -0
sempy_labs/migration/_migrate_calctables_to_semantic_model.py +148 -0
sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +533 -0
sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +172 -0
sempy_labs/migration/_migration_validation.py +71 -0
sempy_labs/migration/_refresh_calc_tables.py +131 -0
sempy_labs/mirrored_azure_databricks_catalog/__init__.py +15 -0
sempy_labs/mirrored_azure_databricks_catalog/_discover.py +213 -0
sempy_labs/mirrored_azure_databricks_catalog/_refresh_catalog_metadata.py +45 -0
sempy_labs/ml_model/__init__.py +23 -0
sempy_labs/ml_model/_functions.py +427 -0
sempy_labs/report/_BPAReportTemplate.json +232 -0
sempy_labs/report/__init__.py +55 -0
sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +9 -0
sempy_labs/report/_bpareporttemplate/.platform +11 -0
sempy_labs/report/_bpareporttemplate/StaticResources/SharedResources/BaseThemes/CY24SU06.json +710 -0
sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/page.json +11 -0
sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/1b08bce3bebabb0a27a8/visual.json +191 -0
sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/2f22ddb70c301693c165/visual.json +438 -0
sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/3b1182230aa6c600b43a/visual.json +127 -0
sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/58577ba6380c69891500/visual.json +576 -0
sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/a2a8fa5028b3b776c96c/visual.json +207 -0
sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/adfd47ef30652707b987/visual.json +506 -0
sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/b6a80ee459e716e170b1/visual.json +127 -0
sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/ce3130a721c020cc3d81/visual.json +513 -0
sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/page.json +8 -0
sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/visuals/66e60dfb526437cd78d1/visual.json +112 -0
sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/page.json +11 -0
sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/07deb8bce824e1be37d7/visual.json +513 -0
sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0b1c68838818b32ad03b/visual.json +352 -0
sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0c171de9d2683d10b930/visual.json +37 -0
sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0efa01be0510e40a645e/visual.json +542 -0
sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/6bf2f0eb830ab53cc668/visual.json +221 -0
sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/88d8141cb8500b60030c/visual.json +127 -0
sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/a753273590beed656a03/visual.json +576 -0
sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/b8fdc82cddd61ac447bc/visual.json +127 -0
sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/page.json +9 -0
sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/visuals/ce8532a7e25020271077/visual.json +38 -0
sempy_labs/report/_bpareporttemplate/definition/pages/pages.json +10 -0
sempy_labs/report/_bpareporttemplate/definition/report.json +176 -0
sempy_labs/report/_bpareporttemplate/definition/version.json +4 -0
sempy_labs/report/_bpareporttemplate/definition.pbir +14 -0
sempy_labs/report/_download_report.py +76 -0
sempy_labs/report/_export_report.py +257 -0
sempy_labs/report/_generate_report.py +427 -0
sempy_labs/report/_paginated.py +76 -0
sempy_labs/report/_report_bpa.py +354 -0
sempy_labs/report/_report_bpa_rules.py +115 -0
sempy_labs/report/_report_functions.py +581 -0
sempy_labs/report/_report_helper.py +227 -0
sempy_labs/report/_report_list_functions.py +110 -0
sempy_labs/report/_report_rebind.py +149 -0
sempy_labs/report/_reportwrapper.py +3100 -0
sempy_labs/report/_save_report.py +147 -0
sempy_labs/snowflake_database/__init__.py +10 -0
sempy_labs/snowflake_database/_items.py +105 -0
sempy_labs/sql_database/__init__.py +21 -0
sempy_labs/sql_database/_items.py +201 -0
sempy_labs/sql_database/_mirroring.py +79 -0
sempy_labs/theme/__init__.py +12 -0
sempy_labs/theme/_org_themes.py +129 -0
sempy_labs/tom/__init__.py +3 -0
sempy_labs/tom/_model.py +5977 -0
sempy_labs/variable_library/__init__.py +19 -0
sempy_labs/variable_library/_functions.py +403 -0
sempy_labs/warehouse/__init__.py +28 -0
sempy_labs/warehouse/_items.py +234 -0
sempy_labs/warehouse/_restore_points.py +309 -0

sempy_labs/lakehouse/_lakehouse.py ADDED Viewed

@@ -0,0 +1,351 @@
+from tqdm.auto import tqdm
+from typing import List, Optional, Union
+from sempy._utils._log import log
+from uuid import UUID
+from sempy_labs._helper_functions import (
+    _base_api,
+    resolve_lakehouse_name_and_id,
+    resolve_workspace_name_and_id,
+    _create_spark_session,
+    _pure_python_notebook,
+    _create_dataframe,
+    _update_dataframe_datatypes,
+    resolve_workspace_id,
+)
+import sempy_labs._icons as icons
+import re
+import pandas as pd
+@log
+def list_lakehouses(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
+    """
+    Shows the lakehouses within a workspace.
+    Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
+    Parameters
+    ----------
+    workspace : str | uuid.UUID, default=None
+        The Fabric workspace name or ID.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    Returns
+    -------
+    pandas.DataFrame
+        A pandas dataframe showing the lakehouses within a workspace.
+    """
+    columns = {
+        "Lakehouse Name": "string",
+        "Lakehouse ID": "string",
+        "Description": "string",
+        "OneLake Tables Path": "string",
+        "OneLake Files Path": "string",
+        "SQL Endpoint Connection String": "string",
+        "SQL Endpoint ID": "string",
+        "SQL Endpoint Provisioning Status": "string",
+        "Schema Enabled": "bool",
+        "Default Schema": "string",
+        "Sensitivity Label Id": "string",
+    }
+    df = _create_dataframe(columns=columns)
+    workspace_id = resolve_workspace_id(workspace)
+    responses = _base_api(
+        request=f"/v1/workspaces/{workspace_id}/lakehouses",
+        uses_pagination=True,
+        client="fabric_sp",
+    )
+    rows = []
+    for r in responses:
+        for v in r.get("value", []):
+            prop = v.get("properties", {})
+            sqlEPProp = prop.get("sqlEndpointProperties", {})
+            default_schema = prop.get("defaultSchema", None)
+            rows.append(
+                {
+                    "Lakehouse Name": v.get("displayName"),
+                    "Lakehouse ID": v.get("id"),
+                    "Description": v.get("description"),
+                    "OneLake Tables Path": prop.get("oneLakeTablesPath"),
+                    "OneLake Files Path": prop.get("oneLakeFilesPath"),
+                    "SQL Endpoint Connection String": sqlEPProp.get("connectionString"),
+                    "SQL Endpoint ID": sqlEPProp.get("id"),
+                    "SQL Endpoint Provisioning Status": sqlEPProp.get(
+                        "provisioningStatus"
+                    ),
+                    "Schema Enabled": True if default_schema else False,
+                    "Default Schema": default_schema,
+                    "Sensitivity Label Id": v.get("sensitivityLabel", {}).get(
+                        "sensitivityLabelId"
+                    ),
+                }
+            )
+    if rows:
+        df = pd.DataFrame(rows, columns=list(columns.keys()))
+        _update_dataframe_datatypes(dataframe=df, column_map=columns)
+    return df
+@log
+def lakehouse_attached() -> bool:
+    """
+    Identifies if a lakehouse is attached to the notebook.
+    Returns
+    -------
+    bool
+        Returns True if a lakehouse is attached to the notebook.
+    """
+    from sempy_labs._helper_functions import _get_fabric_context_setting
+    lake_id = _get_fabric_context_setting(name="trident.lakehouse.id")
+    if len(lake_id) > 0:
+        return True
+    else:
+        return False
+@log
+def _optimize_table(path):
+    if _pure_python_notebook():
+        from deltalake import DeltaTable
+        DeltaTable(path).optimize.compact()
+    else:
+        from delta import DeltaTable
+        spark = _create_spark_session()
+        DeltaTable.forPath(spark, path).optimize().executeCompaction()
+@log
+def _vacuum_table(path, retain_n_hours):
+    if _pure_python_notebook():
+        from deltalake import DeltaTable
+        DeltaTable(path).vacuum(retention_hours=retain_n_hours)
+    else:
+        from delta import DeltaTable
+        spark = _create_spark_session()
+        spark.conf.set("spark.databricks.delta.vacuum.parallelDelete.enabled", "true")
+        DeltaTable.forPath(spark, path).vacuum(retain_n_hours)
+@log
+def optimize_lakehouse_tables(
+    tables: Optional[Union[str, List[str]]] = None,
+    lakehouse: Optional[str | UUID] = None,
+    workspace: Optional[str | UUID] = None,
+):
+    """
+    Runs the `OPTIMIZE <https://docs.delta.io/latest/optimizations-oss.html>`_ function over the specified lakehouse tables.
+    Parameters
+    ----------
+    tables : str | List[str], default=None
+        The table(s) to optimize.
+        Defaults to None which resovles to optimizing all tables within the lakehouse.
+    lakehouse : str | uuid.UUID, default=None
+        The Fabric lakehouse name or ID.
+        Defaults to None which resolves to the lakehouse attached to the notebook.
+    workspace : str | uuid.UUID, default=None
+        The Fabric workspace name or ID used by the lakehouse.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    """
+    from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
+    df = get_lakehouse_tables(
+        lakehouse=lakehouse, workspace=workspace, exclude_shortcuts=True
+    )
+    df_delta = df[df["Format"] == "delta"]
+    if isinstance(tables, str):
+        tables = [tables]
+    df_tables = df_delta[df_delta["Table Name"].isin(tables)] if tables else df_delta
+    df_tables.reset_index(drop=True, inplace=True)
+    total = len(df_tables)
+    for idx, r in (bar := tqdm(df_tables.iterrows(), total=total, bar_format="{desc}")):
+        table_name = r["Table Name"]
+        path = r["Location"]
+        bar.set_description(
+            f"Optimizing the '{table_name}' table ({idx + 1}/{total})..."
+        )
+        _optimize_table(path=path)
+@log
+def vacuum_lakehouse_tables(
+    tables: Optional[Union[str, List[str]]] = None,
+    lakehouse: Optional[str | UUID] = None,
+    workspace: Optional[str | UUID] = None,
+    retain_n_hours: Optional[int] = None,
+):
+    """
+    Runs the `VACUUM <https://docs.delta.io/latest/delta-utility.html#remove-files-no-longer-referenced-by-a-delta-table>`_ function over the specified lakehouse tables.
+    Parameters
+    ----------
+    tables : str | List[str] | None
+        The table(s) to vacuum. If no tables are specified, all tables in the lakehouse will be vacuumed.
+    lakehouse : str | uuid.UUID, default=None
+        The Fabric lakehouse name or ID.
+        Defaults to None which resolves to the lakehouse attached to the notebook.
+    workspace : str | uuid.UUID, default=None
+        The Fabric workspace name or ID used by the lakehouse.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    retain_n_hours : int, default=None
+        The number of hours to retain historical versions of Delta table files.
+        Files older than this retention period will be deleted during the vacuum operation.
+        If not specified, the default retention period configured for the Delta table will be used.
+        The default retention period is 168 hours (7 days) unless manually configured via table properties.
+    """
+    from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
+    df = get_lakehouse_tables(
+        lakehouse=lakehouse, workspace=workspace, exclude_shortcuts=True
+    )
+    df_delta = df[df["Format"] == "delta"]
+    if isinstance(tables, str):
+        tables = [tables]
+    df_tables = df_delta[df_delta["Table Name"].isin(tables)] if tables else df_delta
+    df_tables.reset_index(drop=True, inplace=True)
+    total = len(df_tables)
+    for idx, r in (bar := tqdm(df_tables.iterrows(), total=total, bar_format="{desc}")):
+        table_name = r["Table Name"]
+        path = r["Location"]
+        bar.set_description(f"Vacuuming the '{table_name}' table ({idx}/{total})...")
+        _vacuum_table(path=path, retain_n_hours=retain_n_hours)
+@log
+def run_table_maintenance(
+    table_name: str,
+    optimize: bool = False,
+    v_order: bool = False,
+    vacuum: bool = False,
+    retention_period: Optional[str] = None,
+    schema: Optional[str] = None,
+    lakehouse: Optional[str | UUID] = None,
+    workspace: Optional[str | UUID] = None,
+) -> pd.DataFrame:
+    """
+    Runs table maintenance operations on the specified table within the lakehouse.
+    This is a wrapper function for the following API: `Background Jobs - Run On Demand Table Maintenance <https://learn.microsoft.com/rest/api/fabric/lakehouse/background-jobs/run-on-demand-table-maintenance>`_.
+    Parameters
+    ----------
+    table_name : str
+        Name of the delta table on which to run maintenance operations.
+    optimize : bool, default=False
+        If True, the `OPTIMIZE <https://docs.delta.io/latest/optimizations-oss.html>`_ function will be run on the table.
+    v_order : bool, default=False
+        If True, v-order will be enabled for the table.
+    vacuum : bool, default=False
+        If True, the `VACUUM <https://docs.delta.io/latest/delta-utility.html#remove-files-no-longer-referenced-by-a-delta-table>`_ function will be run on the table.
+    retention_period : str, default=None
+        If specified, the retention period for the vacuum operation. Must be in the 'd:hh:mm:ss' format.
+    schema : str, default=None
+        The schema of the tables within the lakehouse.
+    lakehouse : str | uuid.UUID, default=None
+        The Fabric lakehouse name or ID.
+        Defaults to None which resolves to the lakehouse attached to the notebook.
+    workspace : str | uuid.UUID, default=None
+        The Fabric workspace name or ID used by the lakehouse.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    Returns
+    -------
+    pandas.DataFrame
+        A DataFrame containing the job instance details of the table maintenance operation.
+    """
+    (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
+    (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
+        lakehouse=lakehouse, workspace=workspace_id
+    )
+    if not optimize and not vacuum:
+        raise ValueError(
+            f"{icons.warning} At least one of 'optimize' or 'vacuum' must be set to True."
+        )
+    if not vacuum and retention_period is not None:
+        raise ValueError(
+            f"{icons.warning} The 'retention_period' parameter can only be set if 'vacuum' is set to True."
+        )
+    if retention_period is not None:
+        def is_valid_format(time_string):
+            pattern = r"^\d+:[0-2][0-9]:[0-5][0-9]:[0-5][0-9]$"
+            return bool(re.match(pattern, time_string))
+        if not is_valid_format(retention_period):
+            raise ValueError(
+                f"{icons.red_dot} The 'retention_period' parameter must be in the 'd:hh:mm:ss' format."
+            )
+    payload = {
+        "executionData": {
+            "tableName": table_name,
+        }
+    }
+    if schema is not None:
+        payload["executionData"]["schemaName"] = schema
+    if optimize:
+        payload["executionData"]["optimizeSettings"] = {}
+    if v_order:
+        payload["executionData"]["optimizeSettings"] = {"vOrder": True}
+    if vacuum:
+        payload["executionData"]["vacuumSettings"] = {}
+    if vacuum and retention_period is not None:
+        payload["executionData"]["vacuumSettings"]["retentionPeriod"] = retention_period
+    print(
+        f"{icons.in_progress} The table maintenance job for the '{table_name}' table in the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has been initiated."
+    )
+    df = _base_api(
+        request=f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/jobs/instances?jobType=TableMaintenance",
+        method="post",
+        payload=payload,
+        status_codes=[200, 202],
+        client="fabric_sp",
+        lro_return_df=True,
+    )
+    status = df["Status"].iloc[0]
+    if status == "Completed":
+        print(
+            f"{icons.green_dot} The table maintenance job for the '{table_name}' table in the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has succeeded."
+        )
+    else:
+        print(status)
+        print(
+            f"{icons.red_dot} The table maintenance job for the '{table_name}' table in the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has failed."
+        )
+    return df

sempy_labs/lakehouse/_livy_sessions.py ADDED Viewed

@@ -0,0 +1,143 @@
+from sempy_labs._helper_functions import (
+    resolve_workspace_id,
+    resolve_lakehouse_id,
+    _base_api,
+    _create_dataframe,
+    _update_dataframe_datatypes,
+)
+import pandas as pd
+from typing import Optional
+from uuid import UUID
+from sempy._utils._log import log
+@log
+def list_livy_sessions(
+    lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
+) -> pd.DataFrame:
+    """
+    Shows a list of livy sessions from the specified item identifier.
+    This is a wrapper function for the following API: `Livy Sessions - List Livy Sessions <https://learn.microsoft.com/rest/api/fabric/lakehouse/livy-sessions/list-livy-sessions>`_.
+    Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
+    Parameters
+    ----------
+    lakehouse : str | uuid.UUID, default=None
+        The Fabric lakehouse name or ID.
+        Defaults to None which resolves to the lakehouse attached to the notebook.
+    workspace : str | uuid.UUID, default=None
+        The Fabric workspace name or ID.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    Returns
+    -------
+    pandas.DataFrame
+        A pandas dataframe showing a list of livy sessions from the specified item identifier.
+    """
+    columns = {
+        "Spark Application Id": "string",
+        "State:": "string",
+        "Livy Id": "string",
+        "Origin": "string",
+        "Attempt Number": "int",
+        "Max Number Of Attempts": "int",
+        "Livy Name": "string",
+        "Submitter Id": "string",
+        "Submitter Type": "string",
+        "Item Workspace Id": "string",
+        "Item Id": "string",
+        "Item Reference Type": "string",
+        "Item Name": "string",
+        "Item Type": "string",
+        "Job Type": "string",
+        "Submitted Date Time": "str",
+        "Start Date Time": "str",
+        "End Date Time": "string",
+        "Queued Duration Value": "int",
+        "Queued Duration Time Unit": "string",
+        "Running Duration Value": "int",
+        "Running Duration Time Unit": "string",
+        "Total Duration Value": "int",
+        "Total Duration Time Unit": "string",
+        "Job Instance Id": "string",
+        "Creator Item Workspace Id": "string",
+        "Creator Item Id": "string",
+        "Creator Item Reference Type": "string",
+        "Creator Item Name": "string",
+        "Creator Item Type": "string",
+        "Cancellation Reason": "string",
+        "Capacity Id": "string",
+        "Operation Name": "string",
+        "Runtime Version": "string",
+        "Livy Session Item Resource Uri": "string",
+    }
+    df = _create_dataframe(columns=columns)
+    workspace_id = resolve_workspace_id(workspace)
+    lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
+    responses = _base_api(
+        request=f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/livySessions",
+        uses_pagination=True,
+        client="fabric_sp",
+    )
+    rows = []
+    for r in responses:
+        for v in r.get("value", []):
+            queued_duration = v.get("queuedDuration", {})
+            running_duration = v.get("runningDuration", {})
+            total_duration = v.get("totalDuration", {})
+            rows.append(
+                {
+                    "Spark Application Id": v.get("sparkApplicationId"),
+                    "State:": v.get("state"),
+                    "Livy Id": v.get("livyId"),
+                    "Origin": v.get("origin"),
+                    "Attempt Number": v.get("attemptNumber"),
+                    "Max Number Of Attempts": v.get("maxNumberOfAttempts"),
+                    "Livy Name": v.get("livyName"),
+                    "Submitter Id": v["submitter"].get("id"),
+                    "Submitter Type": v["submitter"].get("type"),
+                    "Item Workspace Id": v["item"].get("workspaceId"),
+                    "Item Id": v["item"].get("itemId"),
+                    "Item Reference Type": v["item"].get("referenceType"),
+                    "Item Name": v.get("itemName"),
+                    "Item Type": v.get("itemType"),
+                    "Job Type": v.get("jobType"),
+                    "Submitted Date Time": v.get("submittedDateTime"),
+                    "Start Date Time": v.get("startDateTime"),
+                    "End Date Time": v.get("endDateTime"),
+                    "Queued Duration Value": queued_duration.get("value"),
+                    "Queued Duration Time Unit": queued_duration.get("timeUnit"),
+                    "Running Duration Value": running_duration.get("value"),
+                    "Running Duration Time Unit": running_duration.get("timeUnit"),
+                    "Total Duration Value": total_duration.get("value"),
+                    "Total Duration Time Unit": total_duration.get("timeUnit"),
+                    "Job Instance Id": v.get("jobInstanceId"),
+                    "Creator Item Workspace Id": v["creatorItem"].get("workspaceId"),
+                    "Creator Item Id": v["creatorItem"].get("itemId"),
+                    "Creator Item Reference Type": v["creatorItem"].get(
+                        "referenceType"
+                    ),
+                    "Creator Item Name": v.get("creatorItemName"),
+                    "Creator Item Type": v.get("creatorItemType"),
+                    "Cancellation Reason": v.get("cancellationReason"),
+                    "Capacity Id": v.get("capacityId"),
+                    "Operation Name": v.get("operationName"),
+                    "Runtime Version": v.get("runtimeVersion"),
+                    "Livy Session Item Resource Uri": v.get(
+                        "livySessionItemResourceUri"
+                    ),
+                }
+            )
+    if rows:
+        df = pd.DataFrame(rows, columns=list(columns.keys()))
+        _update_dataframe_datatypes(dataframe=df, column_map=columns)
+    return df

sempy_labs/lakehouse/_materialized_lake_views.py ADDED Viewed

@@ -0,0 +1,157 @@
+from typing import Optional
+from sempy_labs._helper_functions import (
+    resolve_workspace_id,
+    resolve_workspace_name_and_id,
+    resolve_lakehouse_name_and_id,
+    _base_api,
+    _create_dataframe,
+)
+from uuid import UUID
+from sempy._utils._log import log
+import sempy_labs._icons as icons
+import pandas as pd
+@log
+def refresh_materialized_lake_views(
+    lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
+) -> pd.DataFrame:
+    """
+    Run on-demand Refresh MaterializedLakeViews job instance.
+    This is a wrapper function for the following API: `Background Jobs - Run On Demand Refresh Materialized Lake Views <https://learn.microsoft.com/rest/api/fabric/lakehouse/background-jobs/run-on-demand-refresh-materialized-lake-views>`_.
+    Parameters
+    ----------
+    lakehouse : str | uuid.UUID, default=None
+        The Fabric lakehouse name or ID.
+        Defaults to None which resolves to the lakehouse attached to the notebook.
+    workspace : str | uuid.UUID, default=None
+        The Fabric workspace name or ID used by the lakehouse.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    Returns
+    -------
+    pandas.DataFrame
+        A DataFrame containing the job instance details of the refresh materialized lake views operation.
+    """
+    (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
+    (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
+        lakehouse=lakehouse, workspace=workspace_id
+    )
+    print(
+        f"{icons.in_progress} The refresh materialized lake views job for the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has been initiated."
+    )
+    df = _base_api(
+        request=f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/jobs/instances?jobType=RefreshMaterializedLakeViews",
+        lro_return_df=True,
+        method="post",
+    )
+    status = df["Status"].iloc[0]
+    if status == "Completed":
+        print(
+            f"{icons.green_dot} The refresh materialized lake views job for the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has succeeded."
+        )
+    else:
+        print(status)
+        print(
+            f"{icons.red_dot} The refresh materialized lake views job for the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has failed."
+        )
+    return df
+def _get_materialized_lake_views_schedule(
+    lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
+) -> pd.DataFrame:
+    """
+    Get the schedule details for the MaterializedLakeViews job instance.
+    Parameters
+    ----------
+    lakehouse : str | uuid.UUID, default=None
+        The Fabric lakehouse name or ID.
+        Defaults to None which resolves to the lakehouse attached to the notebook.
+    workspace : str | uuid.UUID, default=None
+        The Fabric workspace name or ID used by the lakehouse.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    Returns
+    -------
+    pandas.DataFrame
+        A DataFrame containing the schedule details of the materialized lake views job instance.
+    """
+    workspace_id = resolve_workspace_id(workspace)
+    (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
+        lakehouse=lakehouse, workspace=workspace_id
+    )
+    columns = {
+        "Job Schedule Id": "string",
+        "Enabled": "bool",
+        "Created DateTime": "datetime",
+        "Type": "string",
+        "Start DateTime": "datetime",
+        "End DateTime": "datetime",
+        "Local TimeZoneId": "string",
+        "Interval": "int",
+        "Owner Id": "string",
+        "Owner Type": "string",
+    }
+    df = _create_dataframe(columns=columns)
+    response = _base_api(
+        request=f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/jobs/RefreshMaterializedLakeViews/schedules",
+    )
+    df = pd.json_normalize(response.json().get("value", []))
+    return df
+@log
+def _delete_materialized_lake_view_schedule(
+    schedule_id: UUID,
+    lakehouse: Optional[str | UUID] = None,
+    workspace: Optional[str | UUID] = None,
+):
+    """
+    Delete an existing Refresh MaterializedLakeViews schedule for a lakehouse.
+    This is a wrapper function for the following API: `Background Jobs - Delete Refresh Materialized Lake Views Schedule <https://learn.microsoft.com/rest/api/fabric/lakehouse/background-jobs/delete-refresh-materialized-lake-views-schedule>`_.
+    Parameters
+    ----------
+    schedule_id : uuid.UUID
+        The ID of the job schedule to delete.
+    lakehouse : str | uuid.UUID, default=None
+        The Fabric lakehouse name or ID.
+        Defaults to None which resolves to the lakehouse attached to the notebook.
+    workspace : str | uuid.UUID, default=None
+        The Fabric workspace name or ID used by the lakehouse.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    """
+    workspace_id = resolve_workspace_id(workspace)
+    (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
+        lakehouse=lakehouse, workspace=workspace_id
+    )
+    _base_api(
+        request=f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/jobs/RefreshMaterializedLakeViews/schedules/{schedule_id}",
+        method="delete",
+    )
+    print(
+        f"{icons.green_dot} The materialized lake view schedule with ID '{schedule_id}' has been deleted from the '{lakehouse_name}' lakehouse within the '{workspace_id}' workspace."
+    )