semantic-link-labs 0.12.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- semantic_link_labs-0.12.8.dist-info/METADATA +354 -0
- semantic_link_labs-0.12.8.dist-info/RECORD +243 -0
- semantic_link_labs-0.12.8.dist-info/WHEEL +5 -0
- semantic_link_labs-0.12.8.dist-info/licenses/LICENSE +21 -0
- semantic_link_labs-0.12.8.dist-info/top_level.txt +1 -0
- sempy_labs/__init__.py +606 -0
- sempy_labs/_a_lib_info.py +2 -0
- sempy_labs/_ai.py +437 -0
- sempy_labs/_authentication.py +264 -0
- sempy_labs/_bpa_translation/_model/_translations_am-ET.po +869 -0
- sempy_labs/_bpa_translation/_model/_translations_ar-AE.po +908 -0
- sempy_labs/_bpa_translation/_model/_translations_bg-BG.po +968 -0
- sempy_labs/_bpa_translation/_model/_translations_ca-ES.po +963 -0
- sempy_labs/_bpa_translation/_model/_translations_cs-CZ.po +943 -0
- sempy_labs/_bpa_translation/_model/_translations_da-DK.po +945 -0
- sempy_labs/_bpa_translation/_model/_translations_de-DE.po +988 -0
- sempy_labs/_bpa_translation/_model/_translations_el-GR.po +993 -0
- sempy_labs/_bpa_translation/_model/_translations_es-ES.po +971 -0
- sempy_labs/_bpa_translation/_model/_translations_fa-IR.po +933 -0
- sempy_labs/_bpa_translation/_model/_translations_fi-FI.po +942 -0
- sempy_labs/_bpa_translation/_model/_translations_fr-FR.po +994 -0
- sempy_labs/_bpa_translation/_model/_translations_ga-IE.po +967 -0
- sempy_labs/_bpa_translation/_model/_translations_he-IL.po +902 -0
- sempy_labs/_bpa_translation/_model/_translations_hi-IN.po +944 -0
- sempy_labs/_bpa_translation/_model/_translations_hu-HU.po +963 -0
- sempy_labs/_bpa_translation/_model/_translations_id-ID.po +946 -0
- sempy_labs/_bpa_translation/_model/_translations_is-IS.po +939 -0
- sempy_labs/_bpa_translation/_model/_translations_it-IT.po +986 -0
- sempy_labs/_bpa_translation/_model/_translations_ja-JP.po +846 -0
- sempy_labs/_bpa_translation/_model/_translations_ko-KR.po +839 -0
- sempy_labs/_bpa_translation/_model/_translations_mt-MT.po +967 -0
- sempy_labs/_bpa_translation/_model/_translations_nl-NL.po +978 -0
- sempy_labs/_bpa_translation/_model/_translations_pl-PL.po +962 -0
- sempy_labs/_bpa_translation/_model/_translations_pt-BR.po +962 -0
- sempy_labs/_bpa_translation/_model/_translations_pt-PT.po +957 -0
- sempy_labs/_bpa_translation/_model/_translations_ro-RO.po +968 -0
- sempy_labs/_bpa_translation/_model/_translations_ru-RU.po +964 -0
- sempy_labs/_bpa_translation/_model/_translations_sk-SK.po +952 -0
- sempy_labs/_bpa_translation/_model/_translations_sl-SL.po +950 -0
- sempy_labs/_bpa_translation/_model/_translations_sv-SE.po +942 -0
- sempy_labs/_bpa_translation/_model/_translations_ta-IN.po +976 -0
- sempy_labs/_bpa_translation/_model/_translations_te-IN.po +947 -0
- sempy_labs/_bpa_translation/_model/_translations_th-TH.po +924 -0
- sempy_labs/_bpa_translation/_model/_translations_tr-TR.po +953 -0
- sempy_labs/_bpa_translation/_model/_translations_uk-UA.po +961 -0
- sempy_labs/_bpa_translation/_model/_translations_zh-CN.po +804 -0
- sempy_labs/_bpa_translation/_model/_translations_zu-ZA.po +969 -0
- sempy_labs/_capacities.py +1198 -0
- sempy_labs/_capacity_migration.py +660 -0
- sempy_labs/_clear_cache.py +351 -0
- sempy_labs/_connections.py +610 -0
- sempy_labs/_dashboards.py +69 -0
- sempy_labs/_data_access_security.py +98 -0
- sempy_labs/_data_pipelines.py +162 -0
- sempy_labs/_dataflows.py +668 -0
- sempy_labs/_dax.py +501 -0
- sempy_labs/_daxformatter.py +80 -0
- sempy_labs/_delta_analyzer.py +467 -0
- sempy_labs/_delta_analyzer_history.py +301 -0
- sempy_labs/_dictionary_diffs.py +221 -0
- sempy_labs/_documentation.py +147 -0
- sempy_labs/_domains.py +51 -0
- sempy_labs/_eventhouses.py +182 -0
- sempy_labs/_external_data_shares.py +230 -0
- sempy_labs/_gateways.py +521 -0
- sempy_labs/_generate_semantic_model.py +521 -0
- sempy_labs/_get_connection_string.py +84 -0
- sempy_labs/_git.py +543 -0
- sempy_labs/_graphQL.py +90 -0
- sempy_labs/_helper_functions.py +2833 -0
- sempy_labs/_icons.py +149 -0
- sempy_labs/_job_scheduler.py +609 -0
- sempy_labs/_kql_databases.py +149 -0
- sempy_labs/_kql_querysets.py +124 -0
- sempy_labs/_kusto.py +137 -0
- sempy_labs/_labels.py +124 -0
- sempy_labs/_list_functions.py +1720 -0
- sempy_labs/_managed_private_endpoints.py +253 -0
- sempy_labs/_mirrored_databases.py +416 -0
- sempy_labs/_mirrored_warehouses.py +60 -0
- sempy_labs/_ml_experiments.py +113 -0
- sempy_labs/_model_auto_build.py +140 -0
- sempy_labs/_model_bpa.py +557 -0
- sempy_labs/_model_bpa_bulk.py +378 -0
- sempy_labs/_model_bpa_rules.py +859 -0
- sempy_labs/_model_dependencies.py +343 -0
- sempy_labs/_mounted_data_factories.py +123 -0
- sempy_labs/_notebooks.py +441 -0
- sempy_labs/_one_lake_integration.py +151 -0
- sempy_labs/_onelake.py +131 -0
- sempy_labs/_query_scale_out.py +433 -0
- sempy_labs/_refresh_semantic_model.py +435 -0
- sempy_labs/_semantic_models.py +468 -0
- sempy_labs/_spark.py +455 -0
- sempy_labs/_sql.py +241 -0
- sempy_labs/_sql_audit_settings.py +207 -0
- sempy_labs/_sql_endpoints.py +214 -0
- sempy_labs/_tags.py +201 -0
- sempy_labs/_translations.py +43 -0
- sempy_labs/_user_delegation_key.py +44 -0
- sempy_labs/_utils.py +79 -0
- sempy_labs/_vertipaq.py +1021 -0
- sempy_labs/_vpax.py +388 -0
- sempy_labs/_warehouses.py +234 -0
- sempy_labs/_workloads.py +140 -0
- sempy_labs/_workspace_identity.py +72 -0
- sempy_labs/_workspaces.py +595 -0
- sempy_labs/admin/__init__.py +170 -0
- sempy_labs/admin/_activities.py +167 -0
- sempy_labs/admin/_apps.py +145 -0
- sempy_labs/admin/_artifacts.py +65 -0
- sempy_labs/admin/_basic_functions.py +463 -0
- sempy_labs/admin/_capacities.py +508 -0
- sempy_labs/admin/_dataflows.py +45 -0
- sempy_labs/admin/_datasets.py +186 -0
- sempy_labs/admin/_domains.py +522 -0
- sempy_labs/admin/_external_data_share.py +100 -0
- sempy_labs/admin/_git.py +72 -0
- sempy_labs/admin/_items.py +265 -0
- sempy_labs/admin/_labels.py +211 -0
- sempy_labs/admin/_reports.py +241 -0
- sempy_labs/admin/_scanner.py +118 -0
- sempy_labs/admin/_shared.py +82 -0
- sempy_labs/admin/_sharing_links.py +110 -0
- sempy_labs/admin/_tags.py +131 -0
- sempy_labs/admin/_tenant.py +503 -0
- sempy_labs/admin/_tenant_keys.py +89 -0
- sempy_labs/admin/_users.py +140 -0
- sempy_labs/admin/_workspaces.py +236 -0
- sempy_labs/deployment_pipeline/__init__.py +23 -0
- sempy_labs/deployment_pipeline/_items.py +580 -0
- sempy_labs/directlake/__init__.py +57 -0
- sempy_labs/directlake/_autosync.py +58 -0
- sempy_labs/directlake/_directlake_schema_compare.py +120 -0
- sempy_labs/directlake/_directlake_schema_sync.py +161 -0
- sempy_labs/directlake/_dl_helper.py +274 -0
- sempy_labs/directlake/_generate_shared_expression.py +94 -0
- sempy_labs/directlake/_get_directlake_lakehouse.py +62 -0
- sempy_labs/directlake/_get_shared_expression.py +34 -0
- sempy_labs/directlake/_guardrails.py +96 -0
- sempy_labs/directlake/_list_directlake_model_calc_tables.py +70 -0
- sempy_labs/directlake/_show_unsupported_directlake_objects.py +90 -0
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +239 -0
- sempy_labs/directlake/_update_directlake_partition_entity.py +259 -0
- sempy_labs/directlake/_warm_cache.py +236 -0
- sempy_labs/dotnet_lib/dotnet.runtime.config.json +10 -0
- sempy_labs/environment/__init__.py +23 -0
- sempy_labs/environment/_items.py +212 -0
- sempy_labs/environment/_pubstage.py +223 -0
- sempy_labs/eventstream/__init__.py +37 -0
- sempy_labs/eventstream/_items.py +263 -0
- sempy_labs/eventstream/_topology.py +652 -0
- sempy_labs/graph/__init__.py +59 -0
- sempy_labs/graph/_groups.py +651 -0
- sempy_labs/graph/_sensitivity_labels.py +120 -0
- sempy_labs/graph/_teams.py +125 -0
- sempy_labs/graph/_user_licenses.py +96 -0
- sempy_labs/graph/_users.py +516 -0
- sempy_labs/graph_model/__init__.py +15 -0
- sempy_labs/graph_model/_background_jobs.py +63 -0
- sempy_labs/graph_model/_items.py +149 -0
- sempy_labs/lakehouse/__init__.py +67 -0
- sempy_labs/lakehouse/_blobs.py +247 -0
- sempy_labs/lakehouse/_get_lakehouse_columns.py +102 -0
- sempy_labs/lakehouse/_get_lakehouse_tables.py +274 -0
- sempy_labs/lakehouse/_helper.py +250 -0
- sempy_labs/lakehouse/_lakehouse.py +351 -0
- sempy_labs/lakehouse/_livy_sessions.py +143 -0
- sempy_labs/lakehouse/_materialized_lake_views.py +157 -0
- sempy_labs/lakehouse/_partitioning.py +165 -0
- sempy_labs/lakehouse/_schemas.py +217 -0
- sempy_labs/lakehouse/_shortcuts.py +440 -0
- sempy_labs/migration/__init__.py +35 -0
- sempy_labs/migration/_create_pqt_file.py +238 -0
- sempy_labs/migration/_direct_lake_to_import.py +105 -0
- sempy_labs/migration/_migrate_calctables_to_lakehouse.py +398 -0
- sempy_labs/migration/_migrate_calctables_to_semantic_model.py +148 -0
- sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +533 -0
- sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +172 -0
- sempy_labs/migration/_migration_validation.py +71 -0
- sempy_labs/migration/_refresh_calc_tables.py +131 -0
- sempy_labs/mirrored_azure_databricks_catalog/__init__.py +15 -0
- sempy_labs/mirrored_azure_databricks_catalog/_discover.py +213 -0
- sempy_labs/mirrored_azure_databricks_catalog/_refresh_catalog_metadata.py +45 -0
- sempy_labs/ml_model/__init__.py +23 -0
- sempy_labs/ml_model/_functions.py +427 -0
- sempy_labs/report/_BPAReportTemplate.json +232 -0
- sempy_labs/report/__init__.py +55 -0
- sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +9 -0
- sempy_labs/report/_bpareporttemplate/.platform +11 -0
- sempy_labs/report/_bpareporttemplate/StaticResources/SharedResources/BaseThemes/CY24SU06.json +710 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/page.json +11 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/1b08bce3bebabb0a27a8/visual.json +191 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/2f22ddb70c301693c165/visual.json +438 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/3b1182230aa6c600b43a/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/58577ba6380c69891500/visual.json +576 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/a2a8fa5028b3b776c96c/visual.json +207 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/adfd47ef30652707b987/visual.json +506 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/b6a80ee459e716e170b1/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/ce3130a721c020cc3d81/visual.json +513 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/page.json +8 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/visuals/66e60dfb526437cd78d1/visual.json +112 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/page.json +11 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/07deb8bce824e1be37d7/visual.json +513 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0b1c68838818b32ad03b/visual.json +352 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0c171de9d2683d10b930/visual.json +37 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0efa01be0510e40a645e/visual.json +542 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/6bf2f0eb830ab53cc668/visual.json +221 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/88d8141cb8500b60030c/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/a753273590beed656a03/visual.json +576 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/b8fdc82cddd61ac447bc/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/page.json +9 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/visuals/ce8532a7e25020271077/visual.json +38 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/pages.json +10 -0
- sempy_labs/report/_bpareporttemplate/definition/report.json +176 -0
- sempy_labs/report/_bpareporttemplate/definition/version.json +4 -0
- sempy_labs/report/_bpareporttemplate/definition.pbir +14 -0
- sempy_labs/report/_download_report.py +76 -0
- sempy_labs/report/_export_report.py +257 -0
- sempy_labs/report/_generate_report.py +427 -0
- sempy_labs/report/_paginated.py +76 -0
- sempy_labs/report/_report_bpa.py +354 -0
- sempy_labs/report/_report_bpa_rules.py +115 -0
- sempy_labs/report/_report_functions.py +581 -0
- sempy_labs/report/_report_helper.py +227 -0
- sempy_labs/report/_report_list_functions.py +110 -0
- sempy_labs/report/_report_rebind.py +149 -0
- sempy_labs/report/_reportwrapper.py +3100 -0
- sempy_labs/report/_save_report.py +147 -0
- sempy_labs/snowflake_database/__init__.py +10 -0
- sempy_labs/snowflake_database/_items.py +105 -0
- sempy_labs/sql_database/__init__.py +21 -0
- sempy_labs/sql_database/_items.py +201 -0
- sempy_labs/sql_database/_mirroring.py +79 -0
- sempy_labs/theme/__init__.py +12 -0
- sempy_labs/theme/_org_themes.py +129 -0
- sempy_labs/tom/__init__.py +3 -0
- sempy_labs/tom/_model.py +5977 -0
- sempy_labs/variable_library/__init__.py +19 -0
- sempy_labs/variable_library/_functions.py +403 -0
- sempy_labs/warehouse/__init__.py +28 -0
- sempy_labs/warehouse/_items.py +234 -0
- sempy_labs/warehouse/_restore_points.py +309 -0
|
@@ -0,0 +1,2833 @@
|
|
|
1
|
+
import sempy.fabric as fabric
|
|
2
|
+
import re
|
|
3
|
+
import json
|
|
4
|
+
import base64
|
|
5
|
+
import time
|
|
6
|
+
import uuid
|
|
7
|
+
from sempy.fabric.exceptions import FabricHTTPException, WorkspaceNotFoundException
|
|
8
|
+
import pandas as pd
|
|
9
|
+
from functools import wraps
|
|
10
|
+
import datetime
|
|
11
|
+
from typing import Optional, Tuple, List, Dict
|
|
12
|
+
from uuid import UUID
|
|
13
|
+
import sempy_labs._icons as icons
|
|
14
|
+
from azure.core.credentials import TokenCredential, AccessToken
|
|
15
|
+
import urllib.parse
|
|
16
|
+
import numpy as np
|
|
17
|
+
from IPython.display import display, HTML
|
|
18
|
+
import requests
|
|
19
|
+
import sempy_labs._authentication as auth
|
|
20
|
+
from jsonpath_ng.ext import parse
|
|
21
|
+
from jsonpath_ng.jsonpath import Fields, Index
|
|
22
|
+
from sempy._utils._log import log
|
|
23
|
+
from os import PathLike
|
|
24
|
+
import sempy_labs._utils as utils
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _build_url(url: str, params: dict) -> str:
|
|
28
|
+
"""
|
|
29
|
+
Build the url with a list of parameters
|
|
30
|
+
"""
|
|
31
|
+
url_parts = list(urllib.parse.urlparse(url))
|
|
32
|
+
url_parts[4] = urllib.parse.urlencode(params)
|
|
33
|
+
url = urllib.parse.urlunparse(url_parts)
|
|
34
|
+
|
|
35
|
+
return url
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _encode_user(user: str) -> str:
|
|
39
|
+
|
|
40
|
+
return urllib.parse.quote(user, safe="@")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@log
|
|
44
|
+
def create_abfss_path(
|
|
45
|
+
lakehouse_id: UUID,
|
|
46
|
+
lakehouse_workspace_id: UUID,
|
|
47
|
+
delta_table_name: Optional[str] = None,
|
|
48
|
+
schema: Optional[str] = None,
|
|
49
|
+
) -> str:
|
|
50
|
+
"""
|
|
51
|
+
Creates an abfss path for a delta table in a Fabric lakehouse.
|
|
52
|
+
|
|
53
|
+
Parameters
|
|
54
|
+
----------
|
|
55
|
+
lakehouse_id : uuid.UUID
|
|
56
|
+
ID of the Fabric lakehouse.
|
|
57
|
+
lakehouse_workspace_id : uuid.UUID
|
|
58
|
+
ID of the Fabric workspace.
|
|
59
|
+
delta_table_name : str, default=None
|
|
60
|
+
Name of the delta table name.
|
|
61
|
+
schema : str, default=None
|
|
62
|
+
The schema of the delta table.
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
str
|
|
67
|
+
An abfss path which can be used to save/reference a delta table in a Fabric lakehouse or lakehouse.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
fp = _get_default_file_path()
|
|
71
|
+
path = f"abfss://{lakehouse_workspace_id}@{fp}/{lakehouse_id}"
|
|
72
|
+
|
|
73
|
+
if delta_table_name is not None:
|
|
74
|
+
path += "/Tables"
|
|
75
|
+
if schema is not None:
|
|
76
|
+
path += f"/{schema}/{delta_table_name}"
|
|
77
|
+
else:
|
|
78
|
+
path += f"/{delta_table_name}"
|
|
79
|
+
|
|
80
|
+
return path
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@log
|
|
84
|
+
def create_abfss_path_from_path(
|
|
85
|
+
lakehouse_id: UUID, workspace_id: UUID, file_path: str
|
|
86
|
+
) -> str:
|
|
87
|
+
|
|
88
|
+
fp = _get_default_file_path()
|
|
89
|
+
|
|
90
|
+
return f"abfss://{workspace_id}@{fp}/{lakehouse_id}/{file_path}"
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _get_default_file_path() -> str:
|
|
94
|
+
|
|
95
|
+
default_file_storage = _get_fabric_context_setting(name="fs.defaultFS")
|
|
96
|
+
|
|
97
|
+
return default_file_storage.split("@")[-1][:-1]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _split_abfss_path(path: str) -> Tuple[UUID, UUID, str]:
|
|
101
|
+
|
|
102
|
+
parsed_url = urllib.parse.urlparse(path)
|
|
103
|
+
|
|
104
|
+
workspace_id = parsed_url.netloc.split("@")[0]
|
|
105
|
+
item_id = parsed_url.path.lstrip("/").split("/")[0]
|
|
106
|
+
delta_table_name = parsed_url.path.split("/")[-1]
|
|
107
|
+
|
|
108
|
+
return workspace_id, item_id, delta_table_name
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@log
|
|
112
|
+
def format_dax_object_name(table: str, column: str) -> str:
|
|
113
|
+
"""
|
|
114
|
+
Formats a table/column combination to the 'Table Name'[Column Name] format.
|
|
115
|
+
|
|
116
|
+
Parameters
|
|
117
|
+
----------
|
|
118
|
+
table : str
|
|
119
|
+
The name of the table.
|
|
120
|
+
column : str
|
|
121
|
+
The name of the column.
|
|
122
|
+
|
|
123
|
+
Returns
|
|
124
|
+
-------
|
|
125
|
+
str
|
|
126
|
+
The fully qualified object name.
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
return "'" + table + "'[" + column + "]"
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
@log
|
|
133
|
+
def create_relationship_name(
|
|
134
|
+
from_table: str, from_column: str, to_table: str, to_column: str
|
|
135
|
+
) -> str:
|
|
136
|
+
"""
|
|
137
|
+
Formats a relationship's table/columns into a fully qualified name.
|
|
138
|
+
|
|
139
|
+
Parameters
|
|
140
|
+
----------
|
|
141
|
+
from_table : str
|
|
142
|
+
The name of the table on the 'from' side of the relationship.
|
|
143
|
+
from_column : str
|
|
144
|
+
The name of the column on the 'from' side of the relationship.
|
|
145
|
+
to_table : str
|
|
146
|
+
The name of the table on the 'to' side of the relationship.
|
|
147
|
+
to_column : str
|
|
148
|
+
The name of the column on the 'to' side of the relationship.
|
|
149
|
+
|
|
150
|
+
Returns
|
|
151
|
+
-------
|
|
152
|
+
str
|
|
153
|
+
The fully qualified relationship name.
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
return (
|
|
157
|
+
format_dax_object_name(from_table, from_column)
|
|
158
|
+
+ " -> "
|
|
159
|
+
+ format_dax_object_name(to_table, to_column)
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
@log
|
|
164
|
+
def resolve_report_id(
|
|
165
|
+
report: str | UUID, workspace: Optional[str | UUID] = None
|
|
166
|
+
) -> UUID:
|
|
167
|
+
"""
|
|
168
|
+
Obtains the ID of the Power BI report.
|
|
169
|
+
|
|
170
|
+
Parameters
|
|
171
|
+
----------
|
|
172
|
+
report : str | uuid.UUID
|
|
173
|
+
The name or ID of the Power BI report.
|
|
174
|
+
workspace : str | uuid.UUID, default=None
|
|
175
|
+
The Fabric workspace name or ID.
|
|
176
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
177
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
178
|
+
|
|
179
|
+
Returns
|
|
180
|
+
-------
|
|
181
|
+
uuid.UUID
|
|
182
|
+
The ID of the Power BI report.
|
|
183
|
+
"""
|
|
184
|
+
|
|
185
|
+
return resolve_item_id(item=report, type="Report", workspace=workspace)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
@log
|
|
189
|
+
def resolve_report_name(report_id: UUID, workspace: Optional[str | UUID] = None) -> str:
|
|
190
|
+
"""
|
|
191
|
+
Obtains the name of the Power BI report.
|
|
192
|
+
|
|
193
|
+
Parameters
|
|
194
|
+
----------
|
|
195
|
+
report_id : uuid.UUID
|
|
196
|
+
The name of the Power BI report.
|
|
197
|
+
workspace : str | uuid.UUID, default=None
|
|
198
|
+
The Fabric workspace name or ID.
|
|
199
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
200
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
201
|
+
|
|
202
|
+
Returns
|
|
203
|
+
-------
|
|
204
|
+
str
|
|
205
|
+
The name of the Power BI report.
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
return resolve_item_name(item_id=report_id, workspace=workspace)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
@log
|
|
212
|
+
def delete_item(
|
|
213
|
+
item: str | UUID, type: str, workspace: Optional[str | UUID] = None
|
|
214
|
+
) -> None:
|
|
215
|
+
"""
|
|
216
|
+
Deletes an item from a Fabric workspace.
|
|
217
|
+
|
|
218
|
+
Parameters
|
|
219
|
+
----------
|
|
220
|
+
item : str | uuid.UUID
|
|
221
|
+
The name or ID of the item to be deleted.
|
|
222
|
+
type : str
|
|
223
|
+
The type of the item to be deleted.
|
|
224
|
+
workspace : str | uuid.UUID, default=None
|
|
225
|
+
The Fabric workspace name or ID.
|
|
226
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
227
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
228
|
+
"""
|
|
229
|
+
|
|
230
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
231
|
+
(item_name, item_id) = resolve_item_name_and_id(item, type, workspace_id)
|
|
232
|
+
|
|
233
|
+
fabric.delete_item(item_id=item_id, workspace=workspace_id)
|
|
234
|
+
|
|
235
|
+
print(
|
|
236
|
+
f"{icons.green_dot} The '{item_name}' {type} has been successfully deleted from the '{workspace_name}' workspace."
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
@log
|
|
241
|
+
def create_folder_if_not_exists(
|
|
242
|
+
folder: str | PathLike, workspace: Optional[str | UUID] = None
|
|
243
|
+
) -> UUID:
|
|
244
|
+
try:
|
|
245
|
+
x = fabric.resolve_folder_id(folder=folder, workspace=workspace)
|
|
246
|
+
except:
|
|
247
|
+
x = fabric.create_folder(folder=folder, workspace=workspace)
|
|
248
|
+
|
|
249
|
+
return x
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
@log
|
|
253
|
+
def create_item(
|
|
254
|
+
name: str,
|
|
255
|
+
type: str,
|
|
256
|
+
description: Optional[str] = None,
|
|
257
|
+
definition: Optional[dict] = None,
|
|
258
|
+
workspace: Optional[str | UUID] = None,
|
|
259
|
+
folder: Optional[str | PathLike] = None,
|
|
260
|
+
):
|
|
261
|
+
"""
|
|
262
|
+
Creates an item in a Fabric workspace.
|
|
263
|
+
|
|
264
|
+
Parameters
|
|
265
|
+
----------
|
|
266
|
+
name : str
|
|
267
|
+
The name of the item to be created.
|
|
268
|
+
type : str
|
|
269
|
+
The type of the item to be created.
|
|
270
|
+
description : str, default=None
|
|
271
|
+
A description of the item to be created.
|
|
272
|
+
definition : dict, default=None
|
|
273
|
+
The definition of the item to be created.
|
|
274
|
+
workspace : str | uuid.UUID, default=None
|
|
275
|
+
The Fabric workspace name or ID.
|
|
276
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
277
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
278
|
+
folder : str | os.PathLike, default=None
|
|
279
|
+
The folder within the workspace where the item will be created.
|
|
280
|
+
Defaults to None which places the item in the root of the workspace.
|
|
281
|
+
"""
|
|
282
|
+
|
|
283
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
284
|
+
item_type_url = utils.items.get(type)
|
|
285
|
+
|
|
286
|
+
payload = {
|
|
287
|
+
"displayName": name,
|
|
288
|
+
}
|
|
289
|
+
if description:
|
|
290
|
+
payload["description"] = description
|
|
291
|
+
if definition:
|
|
292
|
+
payload["definition"] = definition
|
|
293
|
+
if folder:
|
|
294
|
+
payload["folderId"] = create_folder_if_not_exists(
|
|
295
|
+
folder=folder, workspace=workspace_id
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
_base_api(
|
|
299
|
+
request=f"/v1/workspaces/{workspace_id}/{item_type_url}",
|
|
300
|
+
method="post",
|
|
301
|
+
payload=payload,
|
|
302
|
+
status_codes=[201, 202],
|
|
303
|
+
lro_return_status_code=True,
|
|
304
|
+
client="fabric_sp",
|
|
305
|
+
)
|
|
306
|
+
print(
|
|
307
|
+
f"{icons.green_dot} The '{name}' {type} has been successfully created within the '{workspace_name}' workspace."
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
@log
|
|
312
|
+
def copy_item(
|
|
313
|
+
item: str | UUID,
|
|
314
|
+
type: str,
|
|
315
|
+
target_name: Optional[str] = None,
|
|
316
|
+
source_workspace: Optional[str | UUID] = None,
|
|
317
|
+
target_workspace: Optional[str | UUID] = None,
|
|
318
|
+
overwrite: bool = False,
|
|
319
|
+
keep_existing_bindings: bool = False,
|
|
320
|
+
):
|
|
321
|
+
"""
|
|
322
|
+
Copies an item (with its definition) from one location to another location.
|
|
323
|
+
|
|
324
|
+
Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
|
|
325
|
+
|
|
326
|
+
Parameters
|
|
327
|
+
----------
|
|
328
|
+
item : str | uuid.UUID
|
|
329
|
+
The name or ID of the item to be copied.
|
|
330
|
+
type : str
|
|
331
|
+
The type of the item.
|
|
332
|
+
target_name: str, default=None
|
|
333
|
+
The name of the item in the target workspace. Defaults to the same name as the source item.
|
|
334
|
+
source_workspace : str | uuid.UUID, default=None
|
|
335
|
+
The workspace name or ID in which the item exists.
|
|
336
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
337
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
338
|
+
target_workspace : str | uuid.UUID, default=None
|
|
339
|
+
The workspace name or ID to which the item will be copied.
|
|
340
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
341
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
342
|
+
overwrite : bool, default=False
|
|
343
|
+
If True, overwrites the item in the target workspace if it already exists.
|
|
344
|
+
keep_existing_bindings : bool, default=False
|
|
345
|
+
If True, ensures that reports are re-bound to the original semantic model.
|
|
346
|
+
If False, reports are binded to the semantic model to which the item is bound.
|
|
347
|
+
"""
|
|
348
|
+
|
|
349
|
+
from sempy_labs.report import report_rebind
|
|
350
|
+
|
|
351
|
+
(item_name, item_id) = resolve_item_name_and_id(
|
|
352
|
+
item=item, type=type, workspace=source_workspace
|
|
353
|
+
)
|
|
354
|
+
(source_workspace_name, source_workspace_id) = resolve_workspace_name_and_id(
|
|
355
|
+
source_workspace
|
|
356
|
+
)
|
|
357
|
+
(target_workspace_name, target_workspace_id) = resolve_workspace_name_and_id(
|
|
358
|
+
target_workspace
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
if target_name is None:
|
|
362
|
+
target_name = item_name
|
|
363
|
+
|
|
364
|
+
if source_workspace_id == target_workspace_id and target_name == item_name:
|
|
365
|
+
raise ValueError(
|
|
366
|
+
f"{icons.red_dot} The source and target workspaces are the same and the target name is the same as the source name. No action taken."
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
type_url = utils.items.get(type)
|
|
370
|
+
result = _base_api(
|
|
371
|
+
request=f"v1/workspaces/{source_workspace_id}/{type_url}/{item_id}",
|
|
372
|
+
client="fabric_sp",
|
|
373
|
+
)
|
|
374
|
+
description = result.json().get("description")
|
|
375
|
+
|
|
376
|
+
payload = get_item_definition(
|
|
377
|
+
item=item_id,
|
|
378
|
+
type=type,
|
|
379
|
+
workspace=source_workspace_id,
|
|
380
|
+
return_dataframe=False,
|
|
381
|
+
decode=False,
|
|
382
|
+
)
|
|
383
|
+
payload["displayName"] = target_name
|
|
384
|
+
if description:
|
|
385
|
+
payload["description"] = description
|
|
386
|
+
|
|
387
|
+
# Check if item exists in target workspace
|
|
388
|
+
exists = False
|
|
389
|
+
try:
|
|
390
|
+
target_item_id = resolve_item_id(
|
|
391
|
+
item=target_name, type=type, workspace=target_workspace_id
|
|
392
|
+
)
|
|
393
|
+
exists = True
|
|
394
|
+
except Exception:
|
|
395
|
+
exists = False
|
|
396
|
+
|
|
397
|
+
if exists and not overwrite:
|
|
398
|
+
raise ValueError(
|
|
399
|
+
f"{icons.warning} The item '{target_name}' of type '{type}' already exists in the target workspace '{target_workspace_name}' and overwrite is set to False."
|
|
400
|
+
)
|
|
401
|
+
elif exists and overwrite:
|
|
402
|
+
# Update item definition
|
|
403
|
+
print(
|
|
404
|
+
f"{icons.in_progress} Updating existing item '{target_name}' of type '{type}' in the target workspace '{target_workspace_name}'..."
|
|
405
|
+
)
|
|
406
|
+
# Get the existing source model
|
|
407
|
+
if type == "Report" and keep_existing_bindings:
|
|
408
|
+
result = _base_api(
|
|
409
|
+
request=f"v1.0/myorg/groups/{target_workspace_id}/reports/{target_item_id}",
|
|
410
|
+
client="fabric_sp",
|
|
411
|
+
).json()
|
|
412
|
+
dataset_id = result.get("datasetId")
|
|
413
|
+
dataset_workspace_id = result.get("datasetWorkspaceId")
|
|
414
|
+
_base_api(
|
|
415
|
+
request=f"/v1/workspaces/{target_workspace_id}/{type_url}/{target_item_id}/updateDefinition",
|
|
416
|
+
method="post",
|
|
417
|
+
client="fabric_sp",
|
|
418
|
+
payload=payload,
|
|
419
|
+
lro_return_status_code=True,
|
|
420
|
+
status_codes=None,
|
|
421
|
+
)
|
|
422
|
+
print(
|
|
423
|
+
f"{icons.green_dot} The item '{target_name}' of type '{type}' has been successfully updated in the target workspace '{target_workspace_name}'."
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
if keep_existing_bindings:
|
|
427
|
+
report_rebind(
|
|
428
|
+
report=target_item_id,
|
|
429
|
+
dataset=dataset_id,
|
|
430
|
+
report_workspace=target_workspace,
|
|
431
|
+
dataset_workspace=dataset_workspace_id,
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
else:
|
|
435
|
+
print(
|
|
436
|
+
f"{icons.in_progress} Creating new item '{target_name}' of type '{type}' in the target workspace '{target_workspace_name}'..."
|
|
437
|
+
)
|
|
438
|
+
create_item(
|
|
439
|
+
name=target_name,
|
|
440
|
+
type=type,
|
|
441
|
+
definition=payload["definition"],
|
|
442
|
+
workspace=target_workspace_id,
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
@log
|
|
447
|
+
def is_base64(s):
|
|
448
|
+
try:
|
|
449
|
+
# Add padding if needed
|
|
450
|
+
s_padded = s + "=" * (-len(s) % 4)
|
|
451
|
+
decoded = base64.b64decode(s_padded, validate=True)
|
|
452
|
+
# Optional: check if re-encoding gives the original (excluding padding)
|
|
453
|
+
return base64.b64encode(decoded).decode().rstrip("=") == s.rstrip("=")
|
|
454
|
+
except Exception:
|
|
455
|
+
return False
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
@log
|
|
459
|
+
def decode_payload(payload):
|
|
460
|
+
|
|
461
|
+
if is_base64(payload):
|
|
462
|
+
try:
|
|
463
|
+
decoded_payload = json.loads(base64.b64decode(payload).decode("utf-8"))
|
|
464
|
+
except Exception:
|
|
465
|
+
decoded_payload = base64.b64decode(payload)
|
|
466
|
+
elif isinstance(payload, dict):
|
|
467
|
+
decoded_payload = payload
|
|
468
|
+
else:
|
|
469
|
+
raise ValueError("Payload must be a dictionary or a base64 encoded value.")
|
|
470
|
+
|
|
471
|
+
return decoded_payload
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
@log
|
|
475
|
+
def get_item_definition(
|
|
476
|
+
item: str | UUID,
|
|
477
|
+
type: str,
|
|
478
|
+
workspace: Optional[str | UUID] = None,
|
|
479
|
+
return_dataframe: bool = False,
|
|
480
|
+
decode: bool = True,
|
|
481
|
+
format: Optional[str] = None,
|
|
482
|
+
) -> dict | pd.DataFrame:
|
|
483
|
+
"""
|
|
484
|
+
Gets a Fabric item's defintion.
|
|
485
|
+
|
|
486
|
+
This is a wrapper function for the following API: `<https://learn.microsoft.com/rest/api/fabric/core/items/get-item-definition>`_.
|
|
487
|
+
|
|
488
|
+
Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
|
|
489
|
+
|
|
490
|
+
Parameters
|
|
491
|
+
----------
|
|
492
|
+
item : str | uuid.UUID
|
|
493
|
+
The name or ID of the item to be copied.
|
|
494
|
+
type : str
|
|
495
|
+
The `type <https://learn.microsoft.com/rest/api/fabric/core/items/list-items?tabs=HTTP#itemtype>`_ of the item.
|
|
496
|
+
target_name: str, default=None
|
|
497
|
+
The name of the item in the target workspace. Defaults to the same name as the source item.
|
|
498
|
+
workspace : str | uuid.UUID, default=None
|
|
499
|
+
The workspace name or ID.
|
|
500
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
501
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
502
|
+
return_dataframe : bool, default=False
|
|
503
|
+
If True, returns a pandas dataframe.
|
|
504
|
+
If False, returns a dictionary.
|
|
505
|
+
decode : bool, default=True
|
|
506
|
+
If True, decodes the base64 payload.
|
|
507
|
+
format : str, default=None
|
|
508
|
+
The `format <https://learn.microsoft.com/rest/api/fabric/core/items/get-item-definition?tabs=HTTP#itemdefinition>`_ of the item definition.
|
|
509
|
+
"""
|
|
510
|
+
|
|
511
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
512
|
+
item_id = resolve_item_id(item=item, type=type, workspace=workspace_id)
|
|
513
|
+
|
|
514
|
+
item_type_url = utils.items.get(type)
|
|
515
|
+
if not item_type_url:
|
|
516
|
+
raise ValueError(f"{icons.red_dot} Invalid item type '{type}'.")
|
|
517
|
+
|
|
518
|
+
url = f"/v1/workspaces/{workspace_id}/{item_type_url}/{item_id}/getDefinition"
|
|
519
|
+
if format:
|
|
520
|
+
url += f"?format={format}"
|
|
521
|
+
|
|
522
|
+
result = _base_api(
|
|
523
|
+
request=url,
|
|
524
|
+
method="post",
|
|
525
|
+
status_codes=None,
|
|
526
|
+
lro_return_json=True,
|
|
527
|
+
client="fabric_sp",
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
if return_dataframe:
|
|
531
|
+
return pd.json_normalize(result["definition"]["parts"]).rename(
|
|
532
|
+
columns={
|
|
533
|
+
"path": "Path",
|
|
534
|
+
"payload": "Payload",
|
|
535
|
+
"payloadType": "Payload Type",
|
|
536
|
+
}
|
|
537
|
+
)
|
|
538
|
+
|
|
539
|
+
definition = {"definition": {"parts": []}}
|
|
540
|
+
if decode:
|
|
541
|
+
for part in result.get("definition", {}).get("parts", []):
|
|
542
|
+
path = part.get("path")
|
|
543
|
+
payload = part.get("payload")
|
|
544
|
+
decoded_payload = decode_payload(payload)
|
|
545
|
+
|
|
546
|
+
# Keep structure similar to original but replace payload with decoded version
|
|
547
|
+
definition["definition"]["parts"].append(
|
|
548
|
+
{"path": path, "payload": decoded_payload}
|
|
549
|
+
)
|
|
550
|
+
return definition
|
|
551
|
+
else:
|
|
552
|
+
return result
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
@log
|
|
556
|
+
def _get_item_definition(
|
|
557
|
+
item: str | UUID,
|
|
558
|
+
type: str,
|
|
559
|
+
workspace: Optional[str | UUID] = None,
|
|
560
|
+
format: Optional[str] = None,
|
|
561
|
+
return_dataframe: bool = True,
|
|
562
|
+
decode: bool = True,
|
|
563
|
+
):
|
|
564
|
+
|
|
565
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
566
|
+
item_id = resolve_item_id(item, type, workspace_id)
|
|
567
|
+
item_type_url = utils.item_types.get(type)[1]
|
|
568
|
+
path = utils.item_types.get(type)[2]
|
|
569
|
+
|
|
570
|
+
url = f"/v1/workspaces/{workspace_id}/{item_type_url}/{item_id}/getDefinition"
|
|
571
|
+
if format:
|
|
572
|
+
url += f"?format={format}"
|
|
573
|
+
|
|
574
|
+
result = _base_api(
|
|
575
|
+
request=url,
|
|
576
|
+
method="post",
|
|
577
|
+
status_codes=None,
|
|
578
|
+
lro_return_json=True,
|
|
579
|
+
client="fabric_sp",
|
|
580
|
+
)
|
|
581
|
+
|
|
582
|
+
if return_dataframe:
|
|
583
|
+
return pd.json_normalize(result["definition"]["parts"])
|
|
584
|
+
|
|
585
|
+
value = next(
|
|
586
|
+
p.get("payload") for p in result["definition"]["parts"] if p.get("path") == path
|
|
587
|
+
)
|
|
588
|
+
if decode:
|
|
589
|
+
return json.loads(_decode_b64(value))
|
|
590
|
+
else:
|
|
591
|
+
return value
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
@log
|
|
595
|
+
def resolve_lakehouse_name_and_id(
|
|
596
|
+
lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
|
|
597
|
+
) -> Tuple[str, UUID]:
|
|
598
|
+
|
|
599
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
600
|
+
type = "Lakehouse"
|
|
601
|
+
|
|
602
|
+
if lakehouse is None:
|
|
603
|
+
lakehouse_id = _get_fabric_context_setting(name="trident.lakehouse.id")
|
|
604
|
+
if lakehouse_id == "":
|
|
605
|
+
raise ValueError(
|
|
606
|
+
f"{icons.red_dot} Cannot resolve a lakehouse. Please enter a valid lakehouse or make sure a lakehouse is attached to the notebook."
|
|
607
|
+
)
|
|
608
|
+
(lakehouse_name, lakehouse_id) = resolve_item_name_and_id(
|
|
609
|
+
item=lakehouse_id, type=type, workspace=workspace_id
|
|
610
|
+
)
|
|
611
|
+
|
|
612
|
+
else:
|
|
613
|
+
(lakehouse_name, lakehouse_id) = resolve_item_name_and_id(
|
|
614
|
+
item=lakehouse, type=type, workspace=workspace_id
|
|
615
|
+
)
|
|
616
|
+
|
|
617
|
+
return lakehouse_name, lakehouse_id
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
@log
|
|
621
|
+
def resolve_dataset_name_and_id(
|
|
622
|
+
dataset: str | UUID, workspace: Optional[str | UUID] = None
|
|
623
|
+
) -> Tuple[str, UUID]:
|
|
624
|
+
|
|
625
|
+
(dataset_name, dataset_id) = resolve_item_name_and_id(
|
|
626
|
+
item=dataset, type="SemanticModel", workspace=workspace
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
return dataset_name, dataset_id
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
@log
|
|
633
|
+
def resolve_dataset_id(
|
|
634
|
+
dataset: str | UUID, workspace: Optional[str | UUID] = None
|
|
635
|
+
) -> UUID:
|
|
636
|
+
"""
|
|
637
|
+
Obtains the ID of the semantic model.
|
|
638
|
+
|
|
639
|
+
Parameters
|
|
640
|
+
----------
|
|
641
|
+
dataset : str | uuid.UUID
|
|
642
|
+
The name or ID of the semantic model.
|
|
643
|
+
workspace : str | uuid.UUID, default=None
|
|
644
|
+
The Fabric workspace name.
|
|
645
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
646
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
647
|
+
|
|
648
|
+
Returns
|
|
649
|
+
-------
|
|
650
|
+
uuid.UUID
|
|
651
|
+
The ID of the semantic model.
|
|
652
|
+
"""
|
|
653
|
+
|
|
654
|
+
return resolve_item_id(item=dataset, type="SemanticModel", workspace=workspace)
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
@log
|
|
658
|
+
def resolve_dataset_name(
|
|
659
|
+
dataset_id: UUID, workspace: Optional[str | UUID] = None
|
|
660
|
+
) -> str:
|
|
661
|
+
"""
|
|
662
|
+
Obtains the name of the semantic model.
|
|
663
|
+
|
|
664
|
+
Parameters
|
|
665
|
+
----------
|
|
666
|
+
dataset_id : uuid.UUID
|
|
667
|
+
The name of the semantic model.
|
|
668
|
+
workspace : str | uuid.UUID, default=None
|
|
669
|
+
The Fabric workspace name.
|
|
670
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
671
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
672
|
+
|
|
673
|
+
Returns
|
|
674
|
+
-------
|
|
675
|
+
str
|
|
676
|
+
The name of the semantic model.
|
|
677
|
+
"""
|
|
678
|
+
|
|
679
|
+
return resolve_item_name(item_id=dataset_id, workspace=workspace)
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
@log
|
|
683
|
+
def resolve_lakehouse_name(
|
|
684
|
+
lakehouse_id: Optional[UUID] = None, workspace: Optional[str | UUID] = None
|
|
685
|
+
) -> str:
|
|
686
|
+
"""
|
|
687
|
+
Obtains the name of the Fabric lakehouse.
|
|
688
|
+
|
|
689
|
+
Parameters
|
|
690
|
+
----------
|
|
691
|
+
lakehouse_id : uuid.UUID, default=None
|
|
692
|
+
The name of the Fabric lakehouse.
|
|
693
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
694
|
+
workspace : str | uuid.UUID, default=None
|
|
695
|
+
The Fabric workspace name or ID.
|
|
696
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
697
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
698
|
+
|
|
699
|
+
Returns
|
|
700
|
+
-------
|
|
701
|
+
str
|
|
702
|
+
The name of the Fabric lakehouse.
|
|
703
|
+
"""
|
|
704
|
+
|
|
705
|
+
if lakehouse_id is None:
|
|
706
|
+
lakehouse_id = _get_fabric_context_setting(name="trident.lakehouse.id")
|
|
707
|
+
if lakehouse_id == "":
|
|
708
|
+
raise ValueError(
|
|
709
|
+
f"{icons.red_dot} Cannot resolve a lakehouse. Please enter a valid lakehouse or make sure a lakehouse is attached to the notebook."
|
|
710
|
+
)
|
|
711
|
+
|
|
712
|
+
return resolve_item_name(item_id=lakehouse_id, workspace=workspace)
|
|
713
|
+
|
|
714
|
+
|
|
715
|
+
@log
|
|
716
|
+
def resolve_lakehouse_id(
|
|
717
|
+
lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
|
|
718
|
+
) -> UUID:
|
|
719
|
+
"""
|
|
720
|
+
Obtains the ID of the Fabric lakehouse.
|
|
721
|
+
|
|
722
|
+
Parameters
|
|
723
|
+
----------
|
|
724
|
+
lakehouse : str | uuid.UUID, default=None
|
|
725
|
+
The name or ID of the Fabric lakehouse.
|
|
726
|
+
workspace : str | uuid.UUID, default=None
|
|
727
|
+
The Fabric workspace name or ID.
|
|
728
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
729
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
730
|
+
|
|
731
|
+
Returns
|
|
732
|
+
-------
|
|
733
|
+
uuid.UUID
|
|
734
|
+
The ID of the Fabric lakehouse.
|
|
735
|
+
"""
|
|
736
|
+
|
|
737
|
+
if lakehouse is None:
|
|
738
|
+
lakehouse_id = _get_fabric_context_setting(name="trident.lakehouse.id")
|
|
739
|
+
if lakehouse_id == "":
|
|
740
|
+
raise ValueError(
|
|
741
|
+
f"{icons.red_dot} Cannot resolve a lakehouse. Please enter a valid lakehouse or make sure a lakehouse is attached to the notebook."
|
|
742
|
+
)
|
|
743
|
+
else:
|
|
744
|
+
lakehouse_id = resolve_item_id(
|
|
745
|
+
item=lakehouse, type="Lakehouse", workspace=workspace
|
|
746
|
+
)
|
|
747
|
+
|
|
748
|
+
return lakehouse_id
|
|
749
|
+
|
|
750
|
+
|
|
751
|
+
@log
|
|
752
|
+
def get_direct_lake_sql_endpoint(
|
|
753
|
+
dataset: str | UUID, workspace: Optional[str | UUID] = None
|
|
754
|
+
) -> UUID:
|
|
755
|
+
"""
|
|
756
|
+
Obtains the SQL Endpoint ID of the semantic model.
|
|
757
|
+
|
|
758
|
+
Parameters
|
|
759
|
+
----------
|
|
760
|
+
dataset : str | uuid.UUID
|
|
761
|
+
The name or ID of the semantic model.
|
|
762
|
+
workspace : str | uuid.UUID, default=None
|
|
763
|
+
The Fabric workspace name or ID.
|
|
764
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
765
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
766
|
+
|
|
767
|
+
Returns
|
|
768
|
+
-------
|
|
769
|
+
uuid.UUID
|
|
770
|
+
The ID of SQL Endpoint.
|
|
771
|
+
"""
|
|
772
|
+
|
|
773
|
+
from sempy_labs.tom import connect_semantic_model
|
|
774
|
+
|
|
775
|
+
# dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
|
|
776
|
+
# dfP_filt = dfP[dfP["Mode"] == "DirectLake"]
|
|
777
|
+
|
|
778
|
+
# if len(dfP_filt) == 0:
|
|
779
|
+
# raise ValueError(
|
|
780
|
+
# f"The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode."
|
|
781
|
+
# )
|
|
782
|
+
|
|
783
|
+
with connect_semantic_model(
|
|
784
|
+
dataset=dataset, readonly=True, workspace=workspace
|
|
785
|
+
) as tom:
|
|
786
|
+
sqlEndpointId = None
|
|
787
|
+
for e in tom.model.Expressions:
|
|
788
|
+
if e.Name == "DatabaseQuery":
|
|
789
|
+
expr = e.Expression
|
|
790
|
+
matches = re.findall(r'"([^"]+)"', expr)
|
|
791
|
+
sqlEndpointId = matches[1]
|
|
792
|
+
|
|
793
|
+
if sqlEndpointId is None:
|
|
794
|
+
raise ValueError(f"{icons.red_dot} SQL Endpoint not found.")
|
|
795
|
+
|
|
796
|
+
return sqlEndpointId
|
|
797
|
+
|
|
798
|
+
|
|
799
|
+
@log
|
|
800
|
+
def generate_embedded_filter(filter: str) -> str:
|
|
801
|
+
"""
|
|
802
|
+
Converts the filter expression to a filter expression which can be used by a Power BI embedded URL.
|
|
803
|
+
|
|
804
|
+
Parameters
|
|
805
|
+
----------
|
|
806
|
+
filter : str
|
|
807
|
+
The filter expression for an embedded Power BI report.
|
|
808
|
+
|
|
809
|
+
Returns
|
|
810
|
+
-------
|
|
811
|
+
str
|
|
812
|
+
A filter expression usable by a Power BI embedded URL.
|
|
813
|
+
"""
|
|
814
|
+
|
|
815
|
+
pattern = r"'[^']+'\[[^\[]+\]"
|
|
816
|
+
matches = re.findall(pattern, filter)
|
|
817
|
+
for match in matches:
|
|
818
|
+
matchReplace = (
|
|
819
|
+
match.replace("'", "")
|
|
820
|
+
.replace("[", "/")
|
|
821
|
+
.replace("]", "")
|
|
822
|
+
.replace(" ", "_x0020_")
|
|
823
|
+
.replace("@", "_00x40_")
|
|
824
|
+
.replace("+", "_0x2B_")
|
|
825
|
+
.replace("{", "_007B_")
|
|
826
|
+
.replace("}", "_007D_")
|
|
827
|
+
)
|
|
828
|
+
filter = filter.replace(match, matchReplace)
|
|
829
|
+
|
|
830
|
+
pattern = r"\[[^\[]+\]"
|
|
831
|
+
matches = re.findall(pattern, filter)
|
|
832
|
+
for match in matches:
|
|
833
|
+
matchReplace = (
|
|
834
|
+
match.replace("'", "")
|
|
835
|
+
.replace("[", "/")
|
|
836
|
+
.replace("]", "")
|
|
837
|
+
.replace(" ", "_x0020_")
|
|
838
|
+
.replace("@", "_00x40_")
|
|
839
|
+
.replace("+", "_0x2B_")
|
|
840
|
+
.replace("{", "_007B_")
|
|
841
|
+
.replace("}", "_007D_")
|
|
842
|
+
)
|
|
843
|
+
filter = filter.replace(match, matchReplace)
|
|
844
|
+
|
|
845
|
+
revised_filter = (
|
|
846
|
+
filter.replace("<=", "le")
|
|
847
|
+
.replace(">=", "ge")
|
|
848
|
+
.replace("<>", "ne")
|
|
849
|
+
.replace("!=", "ne")
|
|
850
|
+
.replace("==", "eq")
|
|
851
|
+
.replace("=", "eq")
|
|
852
|
+
.replace("<", "lt")
|
|
853
|
+
.replace(">", "gt")
|
|
854
|
+
.replace(" && ", " and ")
|
|
855
|
+
.replace(" & ", " and ")
|
|
856
|
+
.replace(" || ", " or ")
|
|
857
|
+
.replace(" | ", " or ")
|
|
858
|
+
.replace("{", "(")
|
|
859
|
+
.replace("}", ")")
|
|
860
|
+
)
|
|
861
|
+
|
|
862
|
+
return revised_filter
|
|
863
|
+
|
|
864
|
+
|
|
865
|
+
@log
|
|
866
|
+
def save_as_delta_table(
|
|
867
|
+
dataframe,
|
|
868
|
+
delta_table_name: str,
|
|
869
|
+
write_mode: str,
|
|
870
|
+
merge_schema: bool = False,
|
|
871
|
+
schema: Optional[dict] = None,
|
|
872
|
+
lakehouse: Optional[str | UUID] = None,
|
|
873
|
+
workspace: Optional[str | UUID] = None,
|
|
874
|
+
):
|
|
875
|
+
"""
|
|
876
|
+
Saves a pandas or spark dataframe as a delta table in a Fabric lakehouse.
|
|
877
|
+
|
|
878
|
+
This function may be executed in either a PySpark or pure Python notebook. If executing in a pure Python notebook, the dataframe must be a pandas dataframe.
|
|
879
|
+
|
|
880
|
+
Parameters
|
|
881
|
+
----------
|
|
882
|
+
dataframe : pandas.DataFrame | spark.Dataframe
|
|
883
|
+
The dataframe to be saved as a delta table.
|
|
884
|
+
delta_table_name : str
|
|
885
|
+
The name of the delta table.
|
|
886
|
+
write_mode : str
|
|
887
|
+
The write mode for the save operation. Options: 'append', 'overwrite'.
|
|
888
|
+
merge_schema : bool, default=False
|
|
889
|
+
Merges the schemas of the dataframe to the delta table.
|
|
890
|
+
schema : dict, default=None
|
|
891
|
+
A dictionary showing the schema of the columns and their data types.
|
|
892
|
+
lakehouse : str | uuid.UUID, default=None
|
|
893
|
+
The Fabric lakehouse name or ID.
|
|
894
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
895
|
+
workspace : str | uuid.UUID, default=None
|
|
896
|
+
The Fabric workspace name or ID.
|
|
897
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
898
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
899
|
+
"""
|
|
900
|
+
|
|
901
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
902
|
+
(lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
|
|
903
|
+
lakehouse=lakehouse, workspace=workspace_id
|
|
904
|
+
)
|
|
905
|
+
|
|
906
|
+
write_modes = ["append", "overwrite"]
|
|
907
|
+
write_mode = write_mode.lower()
|
|
908
|
+
|
|
909
|
+
if write_mode not in write_modes:
|
|
910
|
+
raise ValueError(
|
|
911
|
+
f"{icons.red_dot} Invalid 'write_type' parameter. Choose from one of the following values: {write_modes}."
|
|
912
|
+
)
|
|
913
|
+
|
|
914
|
+
if " " in delta_table_name:
|
|
915
|
+
raise ValueError(
|
|
916
|
+
f"{icons.red_dot} Invalid 'delta_table_name'. Delta tables in the lakehouse cannot have spaces in their names."
|
|
917
|
+
)
|
|
918
|
+
|
|
919
|
+
import pyarrow as pa
|
|
920
|
+
from pyspark.sql.types import (
|
|
921
|
+
StringType,
|
|
922
|
+
IntegerType,
|
|
923
|
+
FloatType,
|
|
924
|
+
DateType,
|
|
925
|
+
StructType,
|
|
926
|
+
StructField,
|
|
927
|
+
BooleanType,
|
|
928
|
+
LongType,
|
|
929
|
+
DoubleType,
|
|
930
|
+
TimestampType,
|
|
931
|
+
)
|
|
932
|
+
|
|
933
|
+
def get_type_mapping(pure_python):
|
|
934
|
+
common_mapping = {
|
|
935
|
+
"string": ("pa", pa.string(), StringType()),
|
|
936
|
+
"str": ("pa", pa.string(), StringType()),
|
|
937
|
+
"integer": ("pa", pa.int32(), IntegerType()),
|
|
938
|
+
"int": ("pa", pa.int32(), IntegerType()),
|
|
939
|
+
"float": ("pa", pa.float32(), FloatType()),
|
|
940
|
+
"double": ("pa", pa.float64(), DoubleType()),
|
|
941
|
+
"long": ("pa", pa.int64(), LongType()),
|
|
942
|
+
"bool": ("pa", pa.bool_(), BooleanType()),
|
|
943
|
+
"boolean": ("pa", pa.bool_(), BooleanType()),
|
|
944
|
+
"date": ("pa", pa.date32(), DateType()),
|
|
945
|
+
"timestamp": ("pa", pa.timestamp("us"), TimestampType()),
|
|
946
|
+
}
|
|
947
|
+
return {k: v[1] if pure_python else v[2] for k, v in common_mapping.items()}
|
|
948
|
+
|
|
949
|
+
def build_schema(schema_dict, type_mapping, use_arrow=True):
|
|
950
|
+
if use_arrow:
|
|
951
|
+
fields = [
|
|
952
|
+
pa.field(name, type_mapping.get(dtype.lower()))
|
|
953
|
+
for name, dtype in schema_dict.items()
|
|
954
|
+
]
|
|
955
|
+
return pa.schema(fields)
|
|
956
|
+
else:
|
|
957
|
+
return StructType(
|
|
958
|
+
[
|
|
959
|
+
StructField(name, type_mapping.get(dtype.lower()), True)
|
|
960
|
+
for name, dtype in schema_dict.items()
|
|
961
|
+
]
|
|
962
|
+
)
|
|
963
|
+
|
|
964
|
+
# Main logic
|
|
965
|
+
schema_map = None
|
|
966
|
+
if schema is not None:
|
|
967
|
+
use_arrow = _pure_python_notebook()
|
|
968
|
+
type_mapping = get_type_mapping(use_arrow)
|
|
969
|
+
schema_map = build_schema(schema, type_mapping, use_arrow)
|
|
970
|
+
|
|
971
|
+
if isinstance(dataframe, pd.DataFrame):
|
|
972
|
+
dataframe.columns = [col.replace(" ", "_") for col in dataframe.columns]
|
|
973
|
+
if _pure_python_notebook():
|
|
974
|
+
spark_df = dataframe
|
|
975
|
+
else:
|
|
976
|
+
spark = _create_spark_session()
|
|
977
|
+
if schema is None:
|
|
978
|
+
spark_df = spark.createDataFrame(dataframe)
|
|
979
|
+
else:
|
|
980
|
+
spark_df = spark.createDataFrame(dataframe, schema_map)
|
|
981
|
+
else:
|
|
982
|
+
for col_name in dataframe.columns:
|
|
983
|
+
new_name = col_name.replace(" ", "_")
|
|
984
|
+
dataframe = dataframe.withColumnRenamed(col_name, new_name)
|
|
985
|
+
spark_df = dataframe
|
|
986
|
+
|
|
987
|
+
file_path = create_abfss_path(
|
|
988
|
+
lakehouse_id=lakehouse_id,
|
|
989
|
+
lakehouse_workspace_id=workspace_id,
|
|
990
|
+
delta_table_name=delta_table_name,
|
|
991
|
+
)
|
|
992
|
+
|
|
993
|
+
if _pure_python_notebook():
|
|
994
|
+
from deltalake import write_deltalake
|
|
995
|
+
|
|
996
|
+
write_args = {
|
|
997
|
+
"table_or_uri": file_path,
|
|
998
|
+
"data": spark_df,
|
|
999
|
+
"mode": write_mode,
|
|
1000
|
+
"schema": schema_map,
|
|
1001
|
+
}
|
|
1002
|
+
|
|
1003
|
+
if merge_schema:
|
|
1004
|
+
write_args["schema_mode"] = "merge"
|
|
1005
|
+
write_args["engine"] = "rust"
|
|
1006
|
+
|
|
1007
|
+
write_deltalake(**write_args)
|
|
1008
|
+
else:
|
|
1009
|
+
writer = spark_df.write.mode(write_mode).format("delta")
|
|
1010
|
+
if merge_schema:
|
|
1011
|
+
writer = writer.option("mergeSchema", "true")
|
|
1012
|
+
|
|
1013
|
+
writer.save(file_path)
|
|
1014
|
+
|
|
1015
|
+
print(
|
|
1016
|
+
f"{icons.green_dot} The dataframe has been saved as the '{delta_table_name}' table in the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace."
|
|
1017
|
+
)
|
|
1018
|
+
|
|
1019
|
+
|
|
1020
|
+
@log
|
|
1021
|
+
def language_validate(language: str):
|
|
1022
|
+
"""
|
|
1023
|
+
Validateds that the language specified exists within the supported langauges.
|
|
1024
|
+
|
|
1025
|
+
Parameters
|
|
1026
|
+
----------
|
|
1027
|
+
language : str
|
|
1028
|
+
The language code.
|
|
1029
|
+
|
|
1030
|
+
Returns
|
|
1031
|
+
-------
|
|
1032
|
+
bool
|
|
1033
|
+
A True/False indication as to whether the language code is supported.
|
|
1034
|
+
"""
|
|
1035
|
+
|
|
1036
|
+
url = "https://learn.microsoft.com/azure/ai-services/translator/language-support"
|
|
1037
|
+
|
|
1038
|
+
tables = pd.read_html(url)
|
|
1039
|
+
df = tables[0]
|
|
1040
|
+
|
|
1041
|
+
df_filt = df[df["Language code"] == language]
|
|
1042
|
+
|
|
1043
|
+
df_filt2 = df[df["Language"] == language.capitalize()]
|
|
1044
|
+
|
|
1045
|
+
if len(df_filt) == 1:
|
|
1046
|
+
lang = df_filt["Language"].iloc[0]
|
|
1047
|
+
elif len(df_filt2) == 1:
|
|
1048
|
+
lang = df_filt2["Language"].iloc[0]
|
|
1049
|
+
else:
|
|
1050
|
+
raise ValueError(
|
|
1051
|
+
f"{icons.red_dot} The '{language}' language is not a valid language code. Please refer to this link for a list of valid language codes: {url}."
|
|
1052
|
+
)
|
|
1053
|
+
|
|
1054
|
+
return lang
|
|
1055
|
+
|
|
1056
|
+
|
|
1057
|
+
@log
|
|
1058
|
+
def resolve_workspace_id(
|
|
1059
|
+
workspace: Optional[str | UUID] = None,
|
|
1060
|
+
) -> UUID:
|
|
1061
|
+
if workspace is None:
|
|
1062
|
+
workspace_id = _get_fabric_context_setting(name="trident.workspace.id")
|
|
1063
|
+
elif _is_valid_uuid(workspace):
|
|
1064
|
+
# Check (optional)
|
|
1065
|
+
workspace_id = workspace
|
|
1066
|
+
try:
|
|
1067
|
+
_base_api(request=f"/v1/workspaces/{workspace_id}", client="fabric_sp")
|
|
1068
|
+
except FabricHTTPException:
|
|
1069
|
+
raise ValueError(
|
|
1070
|
+
f"{icons.red_dot} The '{workspace_id}' workspace was not found."
|
|
1071
|
+
)
|
|
1072
|
+
else:
|
|
1073
|
+
responses = _base_api(
|
|
1074
|
+
request="/v1/workspaces", client="fabric_sp", uses_pagination=True
|
|
1075
|
+
)
|
|
1076
|
+
workspace_id = None
|
|
1077
|
+
for r in responses:
|
|
1078
|
+
for v in r.get("value", []):
|
|
1079
|
+
display_name = v.get("displayName")
|
|
1080
|
+
if display_name == workspace:
|
|
1081
|
+
workspace_id = v.get("id")
|
|
1082
|
+
break
|
|
1083
|
+
|
|
1084
|
+
if workspace_id is None:
|
|
1085
|
+
raise WorkspaceNotFoundException(workspace)
|
|
1086
|
+
|
|
1087
|
+
return workspace_id
|
|
1088
|
+
|
|
1089
|
+
|
|
1090
|
+
@log
|
|
1091
|
+
def resolve_workspace_name(
|
|
1092
|
+
workspace_id: Optional[UUID] = None, throw_error: bool = True
|
|
1093
|
+
) -> str:
|
|
1094
|
+
|
|
1095
|
+
if workspace_id is None:
|
|
1096
|
+
workspace_id = _get_fabric_context_setting(name="trident.workspace.id")
|
|
1097
|
+
|
|
1098
|
+
try:
|
|
1099
|
+
response = _base_api(
|
|
1100
|
+
request=f"/v1/workspaces/{workspace_id}", client="fabric_sp"
|
|
1101
|
+
).json()
|
|
1102
|
+
except FabricHTTPException:
|
|
1103
|
+
if throw_error:
|
|
1104
|
+
raise ValueError(
|
|
1105
|
+
f"{icons.red_dot} The '{workspace_id}' workspace was not found."
|
|
1106
|
+
)
|
|
1107
|
+
else:
|
|
1108
|
+
return workspace_id
|
|
1109
|
+
|
|
1110
|
+
return response.get("displayName")
|
|
1111
|
+
|
|
1112
|
+
|
|
1113
|
+
@log
|
|
1114
|
+
def resolve_workspace_name_and_id(
|
|
1115
|
+
workspace: Optional[str | UUID] = None,
|
|
1116
|
+
) -> Tuple[str, str]:
|
|
1117
|
+
"""
|
|
1118
|
+
Obtains the name and ID of the Fabric workspace.
|
|
1119
|
+
|
|
1120
|
+
Parameters
|
|
1121
|
+
----------
|
|
1122
|
+
workspace : str | uuid.UUID, default=None
|
|
1123
|
+
The Fabric workspace name or ID.
|
|
1124
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
1125
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
1126
|
+
|
|
1127
|
+
Returns
|
|
1128
|
+
-------
|
|
1129
|
+
str, uuid.UUID
|
|
1130
|
+
The name and ID of the Fabric workspace.
|
|
1131
|
+
"""
|
|
1132
|
+
|
|
1133
|
+
if workspace is None:
|
|
1134
|
+
workspace_id = _get_fabric_context_setting(name="trident.workspace.id")
|
|
1135
|
+
workspace_name = resolve_workspace_name(workspace_id)
|
|
1136
|
+
elif _is_valid_uuid(workspace):
|
|
1137
|
+
workspace_id = workspace
|
|
1138
|
+
workspace_name = resolve_workspace_name(workspace_id)
|
|
1139
|
+
else:
|
|
1140
|
+
responses = _base_api(
|
|
1141
|
+
request="/v1/workspaces", client="fabric_sp", uses_pagination=True
|
|
1142
|
+
)
|
|
1143
|
+
workspace_id = None
|
|
1144
|
+
workspace_name = None
|
|
1145
|
+
for r in responses:
|
|
1146
|
+
for v in r.get("value", []):
|
|
1147
|
+
display_name = v.get("displayName")
|
|
1148
|
+
if display_name == workspace:
|
|
1149
|
+
workspace_name = workspace
|
|
1150
|
+
workspace_id = v.get("id")
|
|
1151
|
+
break
|
|
1152
|
+
|
|
1153
|
+
if workspace_name is None or workspace_id is None:
|
|
1154
|
+
raise WorkspaceNotFoundException(workspace)
|
|
1155
|
+
|
|
1156
|
+
return workspace_name, workspace_id
|
|
1157
|
+
|
|
1158
|
+
|
|
1159
|
+
@log
|
|
1160
|
+
def resolve_item_id(
|
|
1161
|
+
item: str | UUID, type: Optional[str] = None, workspace: Optional[str | UUID] = None
|
|
1162
|
+
) -> UUID:
|
|
1163
|
+
|
|
1164
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
1165
|
+
item_id = None
|
|
1166
|
+
|
|
1167
|
+
if _is_valid_uuid(item):
|
|
1168
|
+
# Check (optional)
|
|
1169
|
+
item_id = item
|
|
1170
|
+
try:
|
|
1171
|
+
_base_api(
|
|
1172
|
+
request=f"/v1/workspaces/{workspace_id}/items/{item_id}",
|
|
1173
|
+
client="fabric_sp",
|
|
1174
|
+
)
|
|
1175
|
+
except FabricHTTPException:
|
|
1176
|
+
raise ValueError(
|
|
1177
|
+
f"{icons.red_dot} The '{item_id}' item was not found in the '{workspace_name}' workspace."
|
|
1178
|
+
)
|
|
1179
|
+
else:
|
|
1180
|
+
if type is None:
|
|
1181
|
+
raise ValueError(
|
|
1182
|
+
f"{icons.red_dot} The 'type' parameter is required if specifying an item name."
|
|
1183
|
+
)
|
|
1184
|
+
responses = _base_api(
|
|
1185
|
+
request=f"/v1/workspaces/{workspace_id}/items?type={type}",
|
|
1186
|
+
client="fabric_sp",
|
|
1187
|
+
uses_pagination=True,
|
|
1188
|
+
)
|
|
1189
|
+
for r in responses:
|
|
1190
|
+
for v in r.get("value", []):
|
|
1191
|
+
display_name = v.get("displayName")
|
|
1192
|
+
if display_name == item:
|
|
1193
|
+
item_id = v.get("id")
|
|
1194
|
+
break
|
|
1195
|
+
|
|
1196
|
+
if item_id is None:
|
|
1197
|
+
raise ValueError(
|
|
1198
|
+
f"{icons.red_dot} There's no item '{item}' of type '{type}' in the '{workspace_name}' workspace."
|
|
1199
|
+
)
|
|
1200
|
+
|
|
1201
|
+
return item_id
|
|
1202
|
+
|
|
1203
|
+
|
|
1204
|
+
@log
|
|
1205
|
+
def resolve_item_name_and_id(
|
|
1206
|
+
item: str | UUID, type: Optional[str] = None, workspace: Optional[str | UUID] = None
|
|
1207
|
+
) -> Tuple[str, UUID]:
|
|
1208
|
+
|
|
1209
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
1210
|
+
item_id = resolve_item_id(item=item, type=type, workspace=workspace_id)
|
|
1211
|
+
item_name = (
|
|
1212
|
+
_base_api(
|
|
1213
|
+
request=f"/v1/workspaces/{workspace_id}/items/{item_id}", client="fabric_sp"
|
|
1214
|
+
)
|
|
1215
|
+
.json()
|
|
1216
|
+
.get("displayName")
|
|
1217
|
+
)
|
|
1218
|
+
|
|
1219
|
+
return item_name, item_id
|
|
1220
|
+
|
|
1221
|
+
|
|
1222
|
+
@log
|
|
1223
|
+
def resolve_item_name(item_id: UUID, workspace: Optional[str | UUID] = None) -> str:
|
|
1224
|
+
|
|
1225
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
1226
|
+
try:
|
|
1227
|
+
item_name = (
|
|
1228
|
+
_base_api(
|
|
1229
|
+
request=f"/v1/workspaces/{workspace_id}/items/{item_id}",
|
|
1230
|
+
client="fabric_sp",
|
|
1231
|
+
)
|
|
1232
|
+
.json()
|
|
1233
|
+
.get("displayName")
|
|
1234
|
+
)
|
|
1235
|
+
except FabricHTTPException:
|
|
1236
|
+
raise ValueError(
|
|
1237
|
+
f"{icons.red_dot} The '{item_id}' item was not found in the '{workspace_id}' workspace."
|
|
1238
|
+
)
|
|
1239
|
+
|
|
1240
|
+
return item_name
|
|
1241
|
+
|
|
1242
|
+
|
|
1243
|
+
@log
|
|
1244
|
+
def _extract_json(dataframe: pd.DataFrame) -> dict:
|
|
1245
|
+
|
|
1246
|
+
payload = dataframe["payload"].iloc[0]
|
|
1247
|
+
json_file = _decode_b64(payload)
|
|
1248
|
+
|
|
1249
|
+
return json.loads(json_file)
|
|
1250
|
+
|
|
1251
|
+
|
|
1252
|
+
@log
|
|
1253
|
+
def _conv_b64(file, json_dumps: bool = True):
|
|
1254
|
+
|
|
1255
|
+
if json_dumps:
|
|
1256
|
+
file = json.dumps(file)
|
|
1257
|
+
return base64.b64encode(file.encode("utf-8")).decode("utf-8")
|
|
1258
|
+
|
|
1259
|
+
|
|
1260
|
+
@log
|
|
1261
|
+
def _decode_b64(file, format: Optional[str] = "utf-8"):
|
|
1262
|
+
|
|
1263
|
+
return base64.b64decode(file).decode(format)
|
|
1264
|
+
|
|
1265
|
+
|
|
1266
|
+
@log
|
|
1267
|
+
def is_default_semantic_model(
|
|
1268
|
+
dataset: str | UUID, workspace: Optional[str | UUID] = None
|
|
1269
|
+
) -> bool:
|
|
1270
|
+
"""
|
|
1271
|
+
Identifies whether a semantic model is a default semantic model.
|
|
1272
|
+
|
|
1273
|
+
Parameters
|
|
1274
|
+
----------
|
|
1275
|
+
dataset : str | uuid.UUID
|
|
1276
|
+
The name or ID of the semantic model.
|
|
1277
|
+
workspace : str | uuid.UUID, default=None
|
|
1278
|
+
The Fabric workspace name or ID.
|
|
1279
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
1280
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
1281
|
+
|
|
1282
|
+
Returns
|
|
1283
|
+
-------
|
|
1284
|
+
bool
|
|
1285
|
+
A True/False value indicating whether the semantic model is a default semantic model.
|
|
1286
|
+
"""
|
|
1287
|
+
|
|
1288
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
1289
|
+
(dataset_name, dataset_id) = resolve_item_name_and_id(
|
|
1290
|
+
item=dataset, type="SemanticModel", workspace=workspace_id
|
|
1291
|
+
)
|
|
1292
|
+
|
|
1293
|
+
dfI = fabric.list_items(workspace=workspace_id)
|
|
1294
|
+
filtered_df = dfI.groupby("Display Name").filter(
|
|
1295
|
+
lambda x: set(["Warehouse", "SemanticModel"]).issubset(set(x["Type"]))
|
|
1296
|
+
or set(["Lakehouse", "SemanticModel"]).issubset(set(x["Type"]))
|
|
1297
|
+
)
|
|
1298
|
+
default_semantic_models = filtered_df["Display Name"].unique().tolist()
|
|
1299
|
+
|
|
1300
|
+
return dataset_name in default_semantic_models
|
|
1301
|
+
|
|
1302
|
+
|
|
1303
|
+
@log
|
|
1304
|
+
def resolve_item_type(item_id: UUID, workspace: Optional[str | UUID] = None) -> str:
|
|
1305
|
+
"""
|
|
1306
|
+
Obtains the item type for a given Fabric Item Id within a Fabric workspace.
|
|
1307
|
+
|
|
1308
|
+
Parameters
|
|
1309
|
+
----------
|
|
1310
|
+
item_id : uuid.UUID
|
|
1311
|
+
The item/artifact Id.
|
|
1312
|
+
workspace : str | uuid.UUID, default=None
|
|
1313
|
+
The Fabric workspace name or ID.
|
|
1314
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
1315
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
1316
|
+
|
|
1317
|
+
Returns
|
|
1318
|
+
-------
|
|
1319
|
+
str
|
|
1320
|
+
The item type for the item Id.
|
|
1321
|
+
"""
|
|
1322
|
+
|
|
1323
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
1324
|
+
dfI = fabric.list_items(workspace=workspace_id)
|
|
1325
|
+
dfI_filt = dfI[dfI["Id"] == item_id]
|
|
1326
|
+
|
|
1327
|
+
if dfI_filt.empty:
|
|
1328
|
+
raise ValueError(
|
|
1329
|
+
f"{icons.red_dot} Invalid 'item_id' parameter. The '{item_id}' item was not found in the '{workspace_name}' workspace."
|
|
1330
|
+
)
|
|
1331
|
+
return dfI_filt["Type"].iloc[0]
|
|
1332
|
+
|
|
1333
|
+
|
|
1334
|
+
@log
|
|
1335
|
+
def resolve_dataset_from_report(
|
|
1336
|
+
report: str | UUID, workspace: Optional[str | UUID] = None
|
|
1337
|
+
) -> Tuple[UUID, str, UUID, str]:
|
|
1338
|
+
"""
|
|
1339
|
+
Obtains the basic semantic model properties from which the report's data is sourced.
|
|
1340
|
+
|
|
1341
|
+
Parameters
|
|
1342
|
+
----------
|
|
1343
|
+
report : str | uuid.UUID
|
|
1344
|
+
The name or ID of the Power BI report.
|
|
1345
|
+
workspace : str | uuid.UUID, default=None
|
|
1346
|
+
The Fabric workspace name or ID.
|
|
1347
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
1348
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
1349
|
+
|
|
1350
|
+
Returns
|
|
1351
|
+
-------
|
|
1352
|
+
Tuple[uuid.UUID, str, uuid.UUID, str]
|
|
1353
|
+
The semantic model UUID, semantic model name, semantic model workspace UUID, semantic model workspace name
|
|
1354
|
+
"""
|
|
1355
|
+
|
|
1356
|
+
from sempy_labs.report._generate_report import _get_report
|
|
1357
|
+
|
|
1358
|
+
dfR = _get_report(report=report, workspace=workspace)
|
|
1359
|
+
dataset_id = dfR["Dataset Id"].iloc[0]
|
|
1360
|
+
dataset_workspace_id = dfR["Dataset Workspace Id"].iloc[0]
|
|
1361
|
+
dataset_workspace = resolve_workspace_name(workspace_id=dataset_workspace_id)
|
|
1362
|
+
dataset_name = resolve_dataset_name(
|
|
1363
|
+
dataset_id=dataset_id, workspace=dataset_workspace
|
|
1364
|
+
)
|
|
1365
|
+
|
|
1366
|
+
return dataset_id, dataset_name, dataset_workspace_id, dataset_workspace
|
|
1367
|
+
|
|
1368
|
+
|
|
1369
|
+
def _add_part(target_dict, path, payload):
|
|
1370
|
+
|
|
1371
|
+
part = {"path": path, "payload": payload, "payloadType": "InlineBase64"}
|
|
1372
|
+
|
|
1373
|
+
target_dict["definition"]["parts"].append(part)
|
|
1374
|
+
|
|
1375
|
+
|
|
1376
|
+
@log
|
|
1377
|
+
def resolve_workspace_capacity(
|
|
1378
|
+
workspace: Optional[str | UUID] = None,
|
|
1379
|
+
) -> Tuple[UUID, str]:
|
|
1380
|
+
"""
|
|
1381
|
+
Obtains the capacity Id and capacity name for a given workspace.
|
|
1382
|
+
|
|
1383
|
+
Parameters
|
|
1384
|
+
----------
|
|
1385
|
+
workspace : str | uuid.UUID, default=None
|
|
1386
|
+
The Fabric workspace name or UUID.
|
|
1387
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
1388
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
1389
|
+
|
|
1390
|
+
Returns
|
|
1391
|
+
-------
|
|
1392
|
+
Tuple[uuid.UUID, str]
|
|
1393
|
+
capacity Id; capacity came.
|
|
1394
|
+
"""
|
|
1395
|
+
from sempy_labs._capacities import list_capacities
|
|
1396
|
+
|
|
1397
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
1398
|
+
filter_condition = urllib.parse.quote(workspace_id)
|
|
1399
|
+
dfW = fabric.list_workspaces(filter=f"id eq '{filter_condition}'")
|
|
1400
|
+
capacity_id = dfW["Capacity Id"].iloc[0]
|
|
1401
|
+
dfC = list_capacities()
|
|
1402
|
+
dfC_filt = dfC[dfC["Id"] == capacity_id]
|
|
1403
|
+
if len(dfC_filt) == 1:
|
|
1404
|
+
capacity_name = dfC_filt["Display Name"].iloc[0]
|
|
1405
|
+
else:
|
|
1406
|
+
capacity_name = None
|
|
1407
|
+
|
|
1408
|
+
return capacity_id, capacity_name
|
|
1409
|
+
|
|
1410
|
+
|
|
1411
|
+
@log
|
|
1412
|
+
def get_capacity_id(workspace: Optional[str | UUID] = None) -> UUID:
|
|
1413
|
+
"""
|
|
1414
|
+
Obtains the Capacity Id for a given workspace.
|
|
1415
|
+
|
|
1416
|
+
Parameters
|
|
1417
|
+
----------
|
|
1418
|
+
workspace : str | uuid.UUID, default=None
|
|
1419
|
+
The Fabric workspace name or ID.
|
|
1420
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
1421
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
1422
|
+
|
|
1423
|
+
Returns
|
|
1424
|
+
-------
|
|
1425
|
+
uuid.UUID
|
|
1426
|
+
The capacity Id.
|
|
1427
|
+
"""
|
|
1428
|
+
|
|
1429
|
+
if workspace is None:
|
|
1430
|
+
capacity_id = _get_fabric_context_setting(name="trident.capacity.id")
|
|
1431
|
+
else:
|
|
1432
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
1433
|
+
filter_condition = urllib.parse.quote(workspace_id)
|
|
1434
|
+
dfW = fabric.list_workspaces(filter=f"id eq '{filter_condition}'")
|
|
1435
|
+
if len(dfW) == 0:
|
|
1436
|
+
raise ValueError(f"{icons.red_dot} The '{workspace_name}' does not exist'.")
|
|
1437
|
+
|
|
1438
|
+
capacity_id = dfW["Capacity Id"].iloc[0]
|
|
1439
|
+
|
|
1440
|
+
return capacity_id
|
|
1441
|
+
|
|
1442
|
+
|
|
1443
|
+
@log
|
|
1444
|
+
def get_capacity_name(workspace: Optional[str | UUID] = None) -> str:
|
|
1445
|
+
"""
|
|
1446
|
+
Obtains the capacity name for a given workspace.
|
|
1447
|
+
|
|
1448
|
+
Parameters
|
|
1449
|
+
----------
|
|
1450
|
+
workspace : str | uuid.UUID, default=None
|
|
1451
|
+
The Fabric workspace name or ID.
|
|
1452
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
1453
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
1454
|
+
|
|
1455
|
+
Returns
|
|
1456
|
+
-------
|
|
1457
|
+
str
|
|
1458
|
+
The capacity name.
|
|
1459
|
+
"""
|
|
1460
|
+
|
|
1461
|
+
from sempy_labs._capacities import list_capacities
|
|
1462
|
+
|
|
1463
|
+
capacity_id = get_capacity_id(workspace)
|
|
1464
|
+
dfC = list_capacities()
|
|
1465
|
+
dfC_filt = dfC[dfC["Id"] == capacity_id]
|
|
1466
|
+
if dfC_filt.empty:
|
|
1467
|
+
raise ValueError(
|
|
1468
|
+
f"{icons.red_dot} The '{capacity_id}' capacity Id does not exist."
|
|
1469
|
+
)
|
|
1470
|
+
|
|
1471
|
+
return dfC_filt["Display Name"].iloc[0]
|
|
1472
|
+
|
|
1473
|
+
|
|
1474
|
+
@log
|
|
1475
|
+
def resolve_capacity_name(capacity_id: Optional[UUID] = None) -> str:
|
|
1476
|
+
"""
|
|
1477
|
+
Obtains the capacity name for a given capacity Id.
|
|
1478
|
+
|
|
1479
|
+
Parameters
|
|
1480
|
+
----------
|
|
1481
|
+
capacity_id : uuid.UUID, default=None
|
|
1482
|
+
The capacity Id.
|
|
1483
|
+
Defaults to None which resolves to the capacity name of the workspace of the attached lakehouse
|
|
1484
|
+
or if no lakehouse attached, resolves to the capacity name of the workspace of the notebook.
|
|
1485
|
+
|
|
1486
|
+
Returns
|
|
1487
|
+
-------
|
|
1488
|
+
str
|
|
1489
|
+
The capacity name.
|
|
1490
|
+
"""
|
|
1491
|
+
from sempy_labs._capacities import list_capacities
|
|
1492
|
+
|
|
1493
|
+
if capacity_id is None:
|
|
1494
|
+
return get_capacity_name()
|
|
1495
|
+
|
|
1496
|
+
dfC = list_capacities()
|
|
1497
|
+
dfC_filt = dfC[dfC["Id"] == capacity_id]
|
|
1498
|
+
|
|
1499
|
+
if dfC_filt.empty:
|
|
1500
|
+
raise ValueError(
|
|
1501
|
+
f"{icons.red_dot} The '{capacity_id}' capacity Id does not exist."
|
|
1502
|
+
)
|
|
1503
|
+
|
|
1504
|
+
return dfC_filt["Display Name"].iloc[0]
|
|
1505
|
+
|
|
1506
|
+
|
|
1507
|
+
@log
|
|
1508
|
+
def resolve_capacity_id(capacity: Optional[str | UUID] = None, **kwargs) -> UUID:
|
|
1509
|
+
"""
|
|
1510
|
+
Obtains the capacity Id for a given capacity name.
|
|
1511
|
+
|
|
1512
|
+
Parameters
|
|
1513
|
+
----------
|
|
1514
|
+
capacity : str | uuid.UUID, default=None
|
|
1515
|
+
The capacity name or ID.
|
|
1516
|
+
Defaults to None which resolves to the capacity id of the workspace of the attached lakehouse
|
|
1517
|
+
or if no lakehouse attached, resolves to the capacity name of the workspace of the notebook.
|
|
1518
|
+
|
|
1519
|
+
Returns
|
|
1520
|
+
-------
|
|
1521
|
+
uuid.UUID
|
|
1522
|
+
The capacity Id.
|
|
1523
|
+
"""
|
|
1524
|
+
from sempy_labs._capacities import list_capacities
|
|
1525
|
+
|
|
1526
|
+
if "capacity_name" in kwargs:
|
|
1527
|
+
capacity = kwargs["capacity_name"]
|
|
1528
|
+
print(
|
|
1529
|
+
f"{icons.warning} The 'capacity_name' parameter is deprecated. Please use 'capacity' instead."
|
|
1530
|
+
)
|
|
1531
|
+
|
|
1532
|
+
if capacity is None:
|
|
1533
|
+
return get_capacity_id()
|
|
1534
|
+
if _is_valid_uuid(capacity):
|
|
1535
|
+
return capacity
|
|
1536
|
+
|
|
1537
|
+
dfC = list_capacities()
|
|
1538
|
+
dfC_filt = dfC[dfC["Display Name"] == capacity]
|
|
1539
|
+
|
|
1540
|
+
if dfC_filt.empty:
|
|
1541
|
+
raise ValueError(f"{icons.red_dot} The '{capacity}' capacity does not exist.")
|
|
1542
|
+
|
|
1543
|
+
return dfC_filt["Id"].iloc[0]
|
|
1544
|
+
|
|
1545
|
+
|
|
1546
|
+
def retry(sleep_time: int, timeout_error_message: str):
|
|
1547
|
+
def decorator(func):
|
|
1548
|
+
@wraps(func)
|
|
1549
|
+
def wrapper(*args, **kwargs):
|
|
1550
|
+
start_time = datetime.datetime.now()
|
|
1551
|
+
timeout = datetime.timedelta(minutes=1)
|
|
1552
|
+
while datetime.datetime.now() - start_time <= timeout:
|
|
1553
|
+
try:
|
|
1554
|
+
return func(*args, **kwargs)
|
|
1555
|
+
except Exception:
|
|
1556
|
+
time.sleep(sleep_time)
|
|
1557
|
+
raise TimeoutError(timeout_error_message)
|
|
1558
|
+
|
|
1559
|
+
return wrapper
|
|
1560
|
+
|
|
1561
|
+
return decorator
|
|
1562
|
+
|
|
1563
|
+
|
|
1564
|
+
def lro(
|
|
1565
|
+
client,
|
|
1566
|
+
response,
|
|
1567
|
+
status_codes: Optional[List[str]] = [200, 202],
|
|
1568
|
+
sleep_time: Optional[int] = 1,
|
|
1569
|
+
return_status_code: bool = False,
|
|
1570
|
+
job_scheduler: bool = False,
|
|
1571
|
+
):
|
|
1572
|
+
from sempy_labs._job_scheduler import _get_item_job_instance
|
|
1573
|
+
|
|
1574
|
+
if response.status_code not in status_codes:
|
|
1575
|
+
raise FabricHTTPException(response)
|
|
1576
|
+
if response.status_code == status_codes[0]:
|
|
1577
|
+
if return_status_code:
|
|
1578
|
+
result = response.status_code
|
|
1579
|
+
else:
|
|
1580
|
+
result = response
|
|
1581
|
+
if response.status_code == status_codes[1]:
|
|
1582
|
+
if job_scheduler:
|
|
1583
|
+
status_url = response.headers.get("Location").split("fabric.microsoft.com")[
|
|
1584
|
+
1
|
|
1585
|
+
]
|
|
1586
|
+
status = None
|
|
1587
|
+
while status not in ["Completed", "Failed"]:
|
|
1588
|
+
response = _base_api(request=status_url)
|
|
1589
|
+
status = response.json().get("status")
|
|
1590
|
+
time.sleep(3)
|
|
1591
|
+
|
|
1592
|
+
return _get_item_job_instance(url=status_url)
|
|
1593
|
+
else:
|
|
1594
|
+
operation_id = response.headers["x-ms-operation-id"]
|
|
1595
|
+
response = client.get(f"/v1/operations/{operation_id}")
|
|
1596
|
+
response_body = json.loads(response.content)
|
|
1597
|
+
while response_body["status"] not in ["Succeeded", "Failed"]:
|
|
1598
|
+
time.sleep(sleep_time)
|
|
1599
|
+
response = client.get(f"/v1/operations/{operation_id}")
|
|
1600
|
+
response_body = json.loads(response.content)
|
|
1601
|
+
if response_body["status"] != "Succeeded":
|
|
1602
|
+
raise FabricHTTPException(response)
|
|
1603
|
+
if return_status_code:
|
|
1604
|
+
result = response.status_code
|
|
1605
|
+
else:
|
|
1606
|
+
response = client.get(f"/v1/operations/{operation_id}/result")
|
|
1607
|
+
result = response
|
|
1608
|
+
|
|
1609
|
+
return result
|
|
1610
|
+
|
|
1611
|
+
|
|
1612
|
+
def pagination(client, response):
|
|
1613
|
+
|
|
1614
|
+
responses = []
|
|
1615
|
+
response_json = response.json()
|
|
1616
|
+
responses.append(response_json)
|
|
1617
|
+
|
|
1618
|
+
# Check for pagination
|
|
1619
|
+
continuation_token = response_json.get("continuationToken")
|
|
1620
|
+
continuation_uri = response_json.get("continuationUri")
|
|
1621
|
+
|
|
1622
|
+
# Loop to handle pagination
|
|
1623
|
+
while continuation_token is not None:
|
|
1624
|
+
response = client.get(continuation_uri)
|
|
1625
|
+
response_json = response.json()
|
|
1626
|
+
responses.append(response_json)
|
|
1627
|
+
|
|
1628
|
+
# Update the continuation token and URI for the next iteration
|
|
1629
|
+
continuation_token = response_json.get("continuationToken")
|
|
1630
|
+
continuation_uri = response_json.get("continuationUri")
|
|
1631
|
+
|
|
1632
|
+
return responses
|
|
1633
|
+
|
|
1634
|
+
|
|
1635
|
+
def graph_pagination(response, headers):
|
|
1636
|
+
|
|
1637
|
+
responses = []
|
|
1638
|
+
response_json = response.json()
|
|
1639
|
+
responses.append(response_json)
|
|
1640
|
+
|
|
1641
|
+
# Check for pagination
|
|
1642
|
+
odata_next_link = response_json.get("@odata.nextLink")
|
|
1643
|
+
|
|
1644
|
+
# Loop to handle pagination
|
|
1645
|
+
while odata_next_link is not None:
|
|
1646
|
+
response = requests.get(odata_next_link, headers=headers)
|
|
1647
|
+
response_json = response.json()
|
|
1648
|
+
responses.append(response_json)
|
|
1649
|
+
|
|
1650
|
+
# Update the odata next link for the next iteration
|
|
1651
|
+
odata_next_link = response_json.get("@odata.nextLink")
|
|
1652
|
+
|
|
1653
|
+
return responses
|
|
1654
|
+
|
|
1655
|
+
|
|
1656
|
+
def resolve_deployment_pipeline_id(deployment_pipeline: str | UUID) -> UUID:
|
|
1657
|
+
"""
|
|
1658
|
+
Obtains the Id for a given deployment pipeline.
|
|
1659
|
+
|
|
1660
|
+
Parameters
|
|
1661
|
+
----------
|
|
1662
|
+
deployment_pipeline : str | uuid.UUID
|
|
1663
|
+
The deployment pipeline name or ID.
|
|
1664
|
+
|
|
1665
|
+
Returns
|
|
1666
|
+
-------
|
|
1667
|
+
uuid.UUID
|
|
1668
|
+
The deployment pipeline Id.
|
|
1669
|
+
"""
|
|
1670
|
+
|
|
1671
|
+
from sempy_labs._deployment_pipelines import list_deployment_pipelines
|
|
1672
|
+
|
|
1673
|
+
if _is_valid_uuid(deployment_pipeline):
|
|
1674
|
+
return deployment_pipeline
|
|
1675
|
+
else:
|
|
1676
|
+
|
|
1677
|
+
dfP = list_deployment_pipelines()
|
|
1678
|
+
dfP_filt = dfP[dfP["Deployment Pipeline Name"] == deployment_pipeline]
|
|
1679
|
+
if len(dfP_filt) == 0:
|
|
1680
|
+
raise ValueError(
|
|
1681
|
+
f"{icons.red_dot} The '{deployment_pipeline}' deployment pipeline is not valid."
|
|
1682
|
+
)
|
|
1683
|
+
return dfP_filt["Deployment Pipeline Id"].iloc[0]
|
|
1684
|
+
|
|
1685
|
+
|
|
1686
|
+
class FabricTokenCredential(TokenCredential):
|
|
1687
|
+
|
|
1688
|
+
def get_token(
|
|
1689
|
+
self,
|
|
1690
|
+
scopes: str,
|
|
1691
|
+
claims: Optional[str] = None,
|
|
1692
|
+
tenant_id: Optional[str] = None,
|
|
1693
|
+
enable_cae: bool = False,
|
|
1694
|
+
**kwargs: any,
|
|
1695
|
+
) -> AccessToken:
|
|
1696
|
+
|
|
1697
|
+
import notebookutils
|
|
1698
|
+
|
|
1699
|
+
token = notebookutils.credentials.getToken("storage")
|
|
1700
|
+
return AccessToken(token, 0)
|
|
1701
|
+
|
|
1702
|
+
|
|
1703
|
+
def _get_adls_client(account_name):
|
|
1704
|
+
|
|
1705
|
+
from azure.storage.filedatalake import DataLakeServiceClient
|
|
1706
|
+
|
|
1707
|
+
account_url = f"https://{account_name}.dfs.core.windows.net"
|
|
1708
|
+
|
|
1709
|
+
return DataLakeServiceClient(account_url, credential=FabricTokenCredential())
|
|
1710
|
+
|
|
1711
|
+
|
|
1712
|
+
def _get_blob_client(workspace_id: UUID, item_id: UUID):
|
|
1713
|
+
|
|
1714
|
+
from azure.storage.blob import BlobServiceClient
|
|
1715
|
+
|
|
1716
|
+
endpoint = _get_fabric_context_setting(name="trident.onelake.endpoint").replace(
|
|
1717
|
+
".dfs.", ".blob."
|
|
1718
|
+
)
|
|
1719
|
+
url = f"https://{endpoint}/{workspace_id}/{item_id}"
|
|
1720
|
+
|
|
1721
|
+
# account_url = f"https://{account_name}.blob.core.windows.net"
|
|
1722
|
+
|
|
1723
|
+
return BlobServiceClient(url, credential=FabricTokenCredential())
|
|
1724
|
+
|
|
1725
|
+
|
|
1726
|
+
@log
|
|
1727
|
+
def resolve_warehouse_id(
|
|
1728
|
+
warehouse: str | UUID, workspace: Optional[str | UUID]
|
|
1729
|
+
) -> UUID:
|
|
1730
|
+
"""
|
|
1731
|
+
Obtains the Id for a given warehouse.
|
|
1732
|
+
|
|
1733
|
+
Parameters
|
|
1734
|
+
----------
|
|
1735
|
+
warehouse : str | uuid.UUID
|
|
1736
|
+
The warehouse name or ID.
|
|
1737
|
+
workspace : str | uuid.UUID, default=None
|
|
1738
|
+
The Fabric workspace name or ID in which the semantic model resides.
|
|
1739
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
1740
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
1741
|
+
|
|
1742
|
+
Returns
|
|
1743
|
+
-------
|
|
1744
|
+
uuid.UUID
|
|
1745
|
+
The warehouse Id.
|
|
1746
|
+
"""
|
|
1747
|
+
|
|
1748
|
+
return resolve_item_id(item=warehouse, type="Warehouse", workspace=workspace)
|
|
1749
|
+
|
|
1750
|
+
|
|
1751
|
+
def get_language_codes(languages: str | List[str]):
|
|
1752
|
+
|
|
1753
|
+
if isinstance(languages, str):
|
|
1754
|
+
languages = [languages]
|
|
1755
|
+
|
|
1756
|
+
for i, lang in enumerate(languages):
|
|
1757
|
+
for k, v in icons.language_map.items():
|
|
1758
|
+
if v == lang.capitalize():
|
|
1759
|
+
languages[i] = k
|
|
1760
|
+
break
|
|
1761
|
+
|
|
1762
|
+
return languages
|
|
1763
|
+
|
|
1764
|
+
|
|
1765
|
+
def _get_azure_token_credentials(
|
|
1766
|
+
key_vault_uri: str,
|
|
1767
|
+
key_vault_tenant_id: str,
|
|
1768
|
+
key_vault_client_id: str,
|
|
1769
|
+
key_vault_client_secret: str,
|
|
1770
|
+
audience: str = "https://management.azure.com/.default",
|
|
1771
|
+
) -> Tuple[str, str, dict]:
|
|
1772
|
+
|
|
1773
|
+
import notebookutils
|
|
1774
|
+
from azure.identity import ClientSecretCredential
|
|
1775
|
+
|
|
1776
|
+
# "https://analysis.windows.net/powerbi/api/.default"
|
|
1777
|
+
|
|
1778
|
+
tenant_id = notebookutils.credentials.getSecret(key_vault_uri, key_vault_tenant_id)
|
|
1779
|
+
client_id = notebookutils.credentials.getSecret(key_vault_uri, key_vault_client_id)
|
|
1780
|
+
client_secret = notebookutils.credentials.getSecret(
|
|
1781
|
+
key_vault_uri, key_vault_client_secret
|
|
1782
|
+
)
|
|
1783
|
+
|
|
1784
|
+
credential = ClientSecretCredential(
|
|
1785
|
+
tenant_id=tenant_id, client_id=client_id, client_secret=client_secret
|
|
1786
|
+
)
|
|
1787
|
+
|
|
1788
|
+
token = credential.get_token(audience).token
|
|
1789
|
+
|
|
1790
|
+
headers = {
|
|
1791
|
+
"Authorization": f"Bearer {token}",
|
|
1792
|
+
"Content-Type": "application/json",
|
|
1793
|
+
}
|
|
1794
|
+
|
|
1795
|
+
return token, credential, headers
|
|
1796
|
+
|
|
1797
|
+
|
|
1798
|
+
def convert_to_alphanumeric_lowercase(input_string):
|
|
1799
|
+
|
|
1800
|
+
# Removes non-alphanumeric characters
|
|
1801
|
+
cleaned_string = re.sub(r"[^a-zA-Z0-9]", "", input_string)
|
|
1802
|
+
cleaned_string = cleaned_string.lower()
|
|
1803
|
+
|
|
1804
|
+
return cleaned_string
|
|
1805
|
+
|
|
1806
|
+
|
|
1807
|
+
@log
|
|
1808
|
+
def resolve_environment_id(
|
|
1809
|
+
environment: str | UUID, workspace: Optional[str | UUID] = None
|
|
1810
|
+
) -> UUID:
|
|
1811
|
+
"""
|
|
1812
|
+
Obtains the environment Id for a given environment.
|
|
1813
|
+
|
|
1814
|
+
Parameters
|
|
1815
|
+
----------
|
|
1816
|
+
environment: str | uuid.UUID
|
|
1817
|
+
Name of the environment.
|
|
1818
|
+
workspace : str | uuid.UUID, default=None
|
|
1819
|
+
The Fabric workspace name or ID in which the semantic model resides.
|
|
1820
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
1821
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
1822
|
+
|
|
1823
|
+
Returns
|
|
1824
|
+
-------
|
|
1825
|
+
uuid.UUID
|
|
1826
|
+
The environment Id.
|
|
1827
|
+
"""
|
|
1828
|
+
|
|
1829
|
+
return resolve_item_id(item=environment, type="Environment", workspace=workspace)
|
|
1830
|
+
|
|
1831
|
+
|
|
1832
|
+
def _make_clickable(val):
|
|
1833
|
+
|
|
1834
|
+
return f'<a target="_blank" href="{val}">{val}</a>'
|
|
1835
|
+
|
|
1836
|
+
|
|
1837
|
+
@log
|
|
1838
|
+
def convert_to_friendly_case(text: str) -> str:
|
|
1839
|
+
"""
|
|
1840
|
+
Converts a string of pascal/camel/snake case to business-friendly case.
|
|
1841
|
+
|
|
1842
|
+
Parameters
|
|
1843
|
+
----------
|
|
1844
|
+
text : str
|
|
1845
|
+
The text to convert.
|
|
1846
|
+
|
|
1847
|
+
Returns
|
|
1848
|
+
-------
|
|
1849
|
+
str
|
|
1850
|
+
Text converted into a business-friendly text.
|
|
1851
|
+
"""
|
|
1852
|
+
if text is not None:
|
|
1853
|
+
text = text.replace("_", " ")
|
|
1854
|
+
# Insert space before each capital letter, avoiding double spaces
|
|
1855
|
+
text = re.sub(r"(?<!\s)(?=[A-Z])", " ", text)
|
|
1856
|
+
# Strip leading/trailing whitespace and capitalize the first letter of each word
|
|
1857
|
+
text = text.strip().title()
|
|
1858
|
+
|
|
1859
|
+
return text
|
|
1860
|
+
|
|
1861
|
+
|
|
1862
|
+
@log
|
|
1863
|
+
def resolve_notebook_id(
|
|
1864
|
+
notebook: str | UUID, workspace: Optional[str | UUID] = None
|
|
1865
|
+
) -> UUID:
|
|
1866
|
+
"""
|
|
1867
|
+
Obtains the notebook Id for a given notebook.
|
|
1868
|
+
|
|
1869
|
+
Parameters
|
|
1870
|
+
----------
|
|
1871
|
+
notebook: str | uuid.UUID
|
|
1872
|
+
Name or ID of the notebook.
|
|
1873
|
+
workspace : str | uuid.UUID, default=None
|
|
1874
|
+
The Fabric workspace name or ID in which the semantic model resides.
|
|
1875
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
1876
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
1877
|
+
|
|
1878
|
+
Returns
|
|
1879
|
+
-------
|
|
1880
|
+
uuid.UUID
|
|
1881
|
+
The notebook Id.
|
|
1882
|
+
"""
|
|
1883
|
+
|
|
1884
|
+
return resolve_item_id(item=notebook, type="Notebook", workspace=workspace)
|
|
1885
|
+
|
|
1886
|
+
|
|
1887
|
+
def generate_guid():
|
|
1888
|
+
|
|
1889
|
+
return str(uuid.uuid4())
|
|
1890
|
+
|
|
1891
|
+
|
|
1892
|
+
@log
|
|
1893
|
+
def _get_column_aggregate(
|
|
1894
|
+
table_name: str,
|
|
1895
|
+
column_name: str | List[str] = "RunId",
|
|
1896
|
+
lakehouse: Optional[str | UUID] = None,
|
|
1897
|
+
workspace: Optional[str | UUID] = None,
|
|
1898
|
+
function: str = "max",
|
|
1899
|
+
default_value: int = 0,
|
|
1900
|
+
schema_name: Optional[str] = None,
|
|
1901
|
+
) -> int | Dict[str, int]:
|
|
1902
|
+
|
|
1903
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
1904
|
+
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
|
|
1905
|
+
path = create_abfss_path(lakehouse_id, workspace_id, table_name, schema_name)
|
|
1906
|
+
df = _read_delta_table(path)
|
|
1907
|
+
|
|
1908
|
+
function = function.lower()
|
|
1909
|
+
|
|
1910
|
+
if isinstance(column_name, str):
|
|
1911
|
+
column_name = [column_name]
|
|
1912
|
+
|
|
1913
|
+
if _pure_python_notebook():
|
|
1914
|
+
import polars as pl
|
|
1915
|
+
|
|
1916
|
+
if not isinstance(df, pd.DataFrame):
|
|
1917
|
+
df.to_pandas()
|
|
1918
|
+
|
|
1919
|
+
df = pl.from_pandas(df)
|
|
1920
|
+
|
|
1921
|
+
def get_expr(col):
|
|
1922
|
+
col_dtype = df.schema[col]
|
|
1923
|
+
|
|
1924
|
+
if "approx" in function:
|
|
1925
|
+
return pl.col(col).unique().count().alias(col)
|
|
1926
|
+
elif "distinct" in function:
|
|
1927
|
+
if col_dtype == pl.Decimal:
|
|
1928
|
+
return pl.col(col).cast(pl.Float64).n_unique().alias(col)
|
|
1929
|
+
else:
|
|
1930
|
+
return pl.col(col).n_unique().alias(col)
|
|
1931
|
+
elif function == "sum":
|
|
1932
|
+
return pl.col(col).sum().alias(col)
|
|
1933
|
+
elif function == "min":
|
|
1934
|
+
return pl.col(col).min().alias(col)
|
|
1935
|
+
elif function == "max":
|
|
1936
|
+
return pl.col(col).max().alias(col)
|
|
1937
|
+
elif function == "count":
|
|
1938
|
+
return pl.col(col).count().alias(col)
|
|
1939
|
+
elif function in {"avg", "mean"}:
|
|
1940
|
+
return pl.col(col).mean().alias(col)
|
|
1941
|
+
else:
|
|
1942
|
+
raise ValueError(f"Unsupported function: {function}")
|
|
1943
|
+
|
|
1944
|
+
exprs = [get_expr(col) for col in column_name]
|
|
1945
|
+
aggs = df.select(exprs).to_dict(as_series=False)
|
|
1946
|
+
|
|
1947
|
+
if len(column_name) == 1:
|
|
1948
|
+
result = aggs[column_name[0]][0] or default_value
|
|
1949
|
+
else:
|
|
1950
|
+
result = {col: aggs[col][0] for col in column_name}
|
|
1951
|
+
else:
|
|
1952
|
+
from pyspark.sql.functions import (
|
|
1953
|
+
count,
|
|
1954
|
+
sum,
|
|
1955
|
+
min,
|
|
1956
|
+
max,
|
|
1957
|
+
avg,
|
|
1958
|
+
approx_count_distinct,
|
|
1959
|
+
countDistinct,
|
|
1960
|
+
)
|
|
1961
|
+
|
|
1962
|
+
result = None
|
|
1963
|
+
if "approx" in function:
|
|
1964
|
+
spark_func = approx_count_distinct
|
|
1965
|
+
elif "distinct" in function:
|
|
1966
|
+
spark_func = countDistinct
|
|
1967
|
+
elif function == "count":
|
|
1968
|
+
spark_func = count
|
|
1969
|
+
elif function == "sum":
|
|
1970
|
+
spark_func = sum
|
|
1971
|
+
elif function == "min":
|
|
1972
|
+
spark_func = min
|
|
1973
|
+
elif function == "max":
|
|
1974
|
+
spark_func = max
|
|
1975
|
+
elif function == "avg":
|
|
1976
|
+
spark_func = avg
|
|
1977
|
+
else:
|
|
1978
|
+
raise ValueError(f"Unsupported function: {function}")
|
|
1979
|
+
|
|
1980
|
+
agg_exprs = []
|
|
1981
|
+
for col in column_name:
|
|
1982
|
+
agg_exprs.append(spark_func(col).alias(col))
|
|
1983
|
+
|
|
1984
|
+
aggs = df.agg(*agg_exprs).collect()[0]
|
|
1985
|
+
if len(column_name) == 1:
|
|
1986
|
+
result = aggs[0] or default_value
|
|
1987
|
+
else:
|
|
1988
|
+
result = {col: aggs[col] for col in column_name}
|
|
1989
|
+
|
|
1990
|
+
return result
|
|
1991
|
+
|
|
1992
|
+
|
|
1993
|
+
def _validate_weight(weight: float):
|
|
1994
|
+
|
|
1995
|
+
if weight is not None and (weight <= 0 or weight >= 1):
|
|
1996
|
+
raise ValueError(
|
|
1997
|
+
f"{icons.red_dot} Invalid weight parameter. Weight must be a value between 0 and 1."
|
|
1998
|
+
)
|
|
1999
|
+
|
|
2000
|
+
|
|
2001
|
+
def _create_spark_dataframe(df: pd.DataFrame):
|
|
2002
|
+
|
|
2003
|
+
spark = _create_spark_session()
|
|
2004
|
+
return spark.createDataFrame(df)
|
|
2005
|
+
|
|
2006
|
+
|
|
2007
|
+
def _make_list_unique(my_list):
|
|
2008
|
+
|
|
2009
|
+
return list(set(my_list))
|
|
2010
|
+
|
|
2011
|
+
|
|
2012
|
+
def _get_partition_map(
|
|
2013
|
+
dataset: str, workspace: Optional[str | UUID] = None
|
|
2014
|
+
) -> pd.DataFrame:
|
|
2015
|
+
|
|
2016
|
+
partitions = fabric.evaluate_dax(
|
|
2017
|
+
dataset=dataset,
|
|
2018
|
+
workspace=workspace,
|
|
2019
|
+
dax_string="""
|
|
2020
|
+
select [ID] AS [PartitionID], [TableID], [Name] AS [PartitionName] from $system.tmschema_partitions
|
|
2021
|
+
""",
|
|
2022
|
+
)
|
|
2023
|
+
|
|
2024
|
+
tables = fabric.evaluate_dax(
|
|
2025
|
+
dataset=dataset,
|
|
2026
|
+
workspace=workspace,
|
|
2027
|
+
dax_string="""
|
|
2028
|
+
select [ID] AS [TableID], [Name] AS [TableName] from $system.tmschema_tables
|
|
2029
|
+
""",
|
|
2030
|
+
)
|
|
2031
|
+
|
|
2032
|
+
partition_map = pd.merge(partitions, tables, on="TableID", how="left")
|
|
2033
|
+
partition_map["PartitionID"] = partition_map["PartitionID"].astype(str)
|
|
2034
|
+
partition_counts = partition_map.groupby("TableID")["PartitionID"].transform(
|
|
2035
|
+
"count"
|
|
2036
|
+
)
|
|
2037
|
+
partition_map["Object Name"] = partition_map.apply(
|
|
2038
|
+
lambda row: (
|
|
2039
|
+
f"'{row['TableName']}'[{row['PartitionName']}]"
|
|
2040
|
+
if partition_counts[row.name] > 1
|
|
2041
|
+
else row["TableName"]
|
|
2042
|
+
),
|
|
2043
|
+
axis=1,
|
|
2044
|
+
)
|
|
2045
|
+
return partition_map
|
|
2046
|
+
|
|
2047
|
+
|
|
2048
|
+
def _show_chart(spec, title):
|
|
2049
|
+
|
|
2050
|
+
h = f"""
|
|
2051
|
+
<!DOCTYPE html>
|
|
2052
|
+
<html>
|
|
2053
|
+
<head>
|
|
2054
|
+
<script src="https://cdn.jsdelivr.net/npm/vega@5"></script>
|
|
2055
|
+
<script src="https://cdn.jsdelivr.net/npm/vega-lite@5"></script>
|
|
2056
|
+
<script src="https://cdn.jsdelivr.net/npm/vega-embed@6"></script>
|
|
2057
|
+
<style>
|
|
2058
|
+
table, th, td {{
|
|
2059
|
+
border: 10px solid #e7e9eb;
|
|
2060
|
+
border-collapse: collapse;
|
|
2061
|
+
}}
|
|
2062
|
+
</style>
|
|
2063
|
+
</head>
|
|
2064
|
+
<body>
|
|
2065
|
+
<table>
|
|
2066
|
+
<tr>
|
|
2067
|
+
<td style="text-align: center;">
|
|
2068
|
+
<h1>{title}</h1>
|
|
2069
|
+
</td>
|
|
2070
|
+
</tr>
|
|
2071
|
+
<tr>
|
|
2072
|
+
<td>
|
|
2073
|
+
<div id="vis"></div>
|
|
2074
|
+
</td>
|
|
2075
|
+
</tr>
|
|
2076
|
+
</table>
|
|
2077
|
+
<script type="text/javascript">
|
|
2078
|
+
var spec = {spec};
|
|
2079
|
+
var opt = {{"renderer": "canvas", "actions": false}};
|
|
2080
|
+
vegaEmbed("#vis", spec, opt);
|
|
2081
|
+
</script>
|
|
2082
|
+
</body>
|
|
2083
|
+
</html>"""
|
|
2084
|
+
|
|
2085
|
+
display(HTML(h))
|
|
2086
|
+
|
|
2087
|
+
|
|
2088
|
+
def _process_and_display_chart(df, title, widget):
|
|
2089
|
+
|
|
2090
|
+
# Convert time columns to milliseconds
|
|
2091
|
+
df["Start"] = df["Start Time"].astype(np.int64) / int(1e6)
|
|
2092
|
+
df["End"] = df["End Time"].astype(np.int64) / int(1e6)
|
|
2093
|
+
|
|
2094
|
+
# Calculate the time offset for proper Gantt chart rendering
|
|
2095
|
+
Offset = min(df["Start"])
|
|
2096
|
+
df["Start"] = df["Start"] - Offset
|
|
2097
|
+
df["End"] = df["End"] - Offset
|
|
2098
|
+
|
|
2099
|
+
unique_objects = df["Object Name"].nunique()
|
|
2100
|
+
height = min(max(400, unique_objects * 30), 1000)
|
|
2101
|
+
|
|
2102
|
+
# Vega-Lite spec for Gantt chart
|
|
2103
|
+
spec = (
|
|
2104
|
+
"""{
|
|
2105
|
+
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
|
|
2106
|
+
"description": "A simple bar chart with ranged data (aka Gantt Chart).",
|
|
2107
|
+
"data": { "values": """
|
|
2108
|
+
+ df.to_json(orient="records")
|
|
2109
|
+
+ """ },
|
|
2110
|
+
"width": 700,
|
|
2111
|
+
"height": """
|
|
2112
|
+
+ str(height)
|
|
2113
|
+
+ """,
|
|
2114
|
+
"mark": "bar",
|
|
2115
|
+
"encoding": {
|
|
2116
|
+
"y": {
|
|
2117
|
+
"field": "Object Name",
|
|
2118
|
+
"type": "ordinal",
|
|
2119
|
+
"axis": {
|
|
2120
|
+
"labelFontSize": 15,
|
|
2121
|
+
"titleFontSize": 20,
|
|
2122
|
+
"title": "Object"
|
|
2123
|
+
}
|
|
2124
|
+
},
|
|
2125
|
+
"x": {
|
|
2126
|
+
"field": "Start",
|
|
2127
|
+
"type": "quantitative",
|
|
2128
|
+
"title": "milliseconds",
|
|
2129
|
+
"axis": {
|
|
2130
|
+
"titleFontSize": 20
|
|
2131
|
+
}
|
|
2132
|
+
},
|
|
2133
|
+
"x2": {"field": "End"},
|
|
2134
|
+
"color": {
|
|
2135
|
+
"field": "Event Subclass",
|
|
2136
|
+
"scale": {
|
|
2137
|
+
"domain": ["Process", "ExecuteSql"],
|
|
2138
|
+
"range": ["#FFC000","#0070C0"]
|
|
2139
|
+
},
|
|
2140
|
+
"legend": {
|
|
2141
|
+
"labelFontSize": 20,
|
|
2142
|
+
"titleFontSize": 20,
|
|
2143
|
+
"title": "Event Type"
|
|
2144
|
+
}
|
|
2145
|
+
},
|
|
2146
|
+
"tooltip": [
|
|
2147
|
+
{"field": "Duration", "type": "quantitative", "format": ","},
|
|
2148
|
+
{"field": "Cpu Time", "type": "quantitative", "format": ","},
|
|
2149
|
+
{"field": "Event Subclass", "type": "nominal"}
|
|
2150
|
+
]
|
|
2151
|
+
}
|
|
2152
|
+
}"""
|
|
2153
|
+
)
|
|
2154
|
+
|
|
2155
|
+
with widget:
|
|
2156
|
+
widget.clear_output(wait=True)
|
|
2157
|
+
_show_chart(spec, title=title)
|
|
2158
|
+
|
|
2159
|
+
|
|
2160
|
+
def _convert_data_type(input_data_type: str) -> str:
|
|
2161
|
+
|
|
2162
|
+
if not input_data_type:
|
|
2163
|
+
return None
|
|
2164
|
+
|
|
2165
|
+
input_data_type = input_data_type.lower()
|
|
2166
|
+
|
|
2167
|
+
data_type_mapping = {
|
|
2168
|
+
"string": "String",
|
|
2169
|
+
"int": "Int64",
|
|
2170
|
+
"tinyint": "Int64",
|
|
2171
|
+
"smallint": "Int64",
|
|
2172
|
+
"bigint": "Int64",
|
|
2173
|
+
"boolean": "Boolean",
|
|
2174
|
+
"timestamp": "DateTime",
|
|
2175
|
+
"date": "DateTime",
|
|
2176
|
+
"double": "Double",
|
|
2177
|
+
"float": "Double",
|
|
2178
|
+
"binary": "Boolean",
|
|
2179
|
+
"long": "Int64",
|
|
2180
|
+
}
|
|
2181
|
+
|
|
2182
|
+
if input_data_type.startswith("decimal"):
|
|
2183
|
+
return "Double"
|
|
2184
|
+
|
|
2185
|
+
return data_type_mapping.get(input_data_type)
|
|
2186
|
+
|
|
2187
|
+
|
|
2188
|
+
def _is_valid_uuid(
|
|
2189
|
+
guid: str,
|
|
2190
|
+
):
|
|
2191
|
+
"""
|
|
2192
|
+
Validates if a string is a valid GUID in version 4
|
|
2193
|
+
|
|
2194
|
+
Parameters
|
|
2195
|
+
----------
|
|
2196
|
+
guid : str
|
|
2197
|
+
GUID to be validated.
|
|
2198
|
+
|
|
2199
|
+
Returns
|
|
2200
|
+
-------
|
|
2201
|
+
bool
|
|
2202
|
+
Boolean that indicates if the string is a GUID or not.
|
|
2203
|
+
"""
|
|
2204
|
+
|
|
2205
|
+
try:
|
|
2206
|
+
UUID(str(guid), version=4)
|
|
2207
|
+
return True
|
|
2208
|
+
except ValueError:
|
|
2209
|
+
return False
|
|
2210
|
+
|
|
2211
|
+
|
|
2212
|
+
def _get_fabric_context_setting(name: str):
|
|
2213
|
+
|
|
2214
|
+
from synapse.ml.internal_utils.session_utils import get_fabric_context
|
|
2215
|
+
|
|
2216
|
+
return get_fabric_context().get(name)
|
|
2217
|
+
|
|
2218
|
+
|
|
2219
|
+
def get_tenant_id():
|
|
2220
|
+
|
|
2221
|
+
return _get_fabric_context_setting(name="trident.tenant.id")
|
|
2222
|
+
|
|
2223
|
+
|
|
2224
|
+
def _base_api(
|
|
2225
|
+
request: str,
|
|
2226
|
+
client: str = "fabric",
|
|
2227
|
+
method: str = "get",
|
|
2228
|
+
payload: Optional[str] = None,
|
|
2229
|
+
status_codes: Optional[int] = 200,
|
|
2230
|
+
uses_pagination: bool = False,
|
|
2231
|
+
lro_return_json: bool = False,
|
|
2232
|
+
lro_return_status_code: bool = False,
|
|
2233
|
+
lro_return_df: bool = False,
|
|
2234
|
+
):
|
|
2235
|
+
import notebookutils
|
|
2236
|
+
from sempy_labs._authentication import _get_headers
|
|
2237
|
+
|
|
2238
|
+
if (lro_return_json or lro_return_status_code) and status_codes is None:
|
|
2239
|
+
status_codes = [200, 202]
|
|
2240
|
+
|
|
2241
|
+
class FabricDefaultCredential(TokenCredential):
|
|
2242
|
+
|
|
2243
|
+
def get_token(self, *scopes, **kwargs) -> AccessToken:
|
|
2244
|
+
from sempy.fabric._credentials import build_access_token
|
|
2245
|
+
|
|
2246
|
+
return build_access_token(notebookutils.credentials.getToken("pbi"))
|
|
2247
|
+
|
|
2248
|
+
if isinstance(status_codes, int):
|
|
2249
|
+
status_codes = [status_codes]
|
|
2250
|
+
|
|
2251
|
+
if client == "fabric":
|
|
2252
|
+
c = fabric.FabricRestClient(credential=FabricDefaultCredential())
|
|
2253
|
+
elif client == "fabric_sp":
|
|
2254
|
+
token = auth.token_provider.get() or FabricDefaultCredential()
|
|
2255
|
+
c = fabric.FabricRestClient(credential=token)
|
|
2256
|
+
elif client in ["azure", "graph", "onelake"]:
|
|
2257
|
+
pass
|
|
2258
|
+
else:
|
|
2259
|
+
raise ValueError(f"{icons.red_dot} The '{client}' client is not supported.")
|
|
2260
|
+
|
|
2261
|
+
if client not in ["azure", "graph", "onelake"]:
|
|
2262
|
+
if method == "get":
|
|
2263
|
+
response = c.get(request)
|
|
2264
|
+
elif method == "delete":
|
|
2265
|
+
response = c.delete(request)
|
|
2266
|
+
elif method == "post":
|
|
2267
|
+
response = c.post(request, json=payload)
|
|
2268
|
+
elif method == "patch":
|
|
2269
|
+
response = c.patch(request, json=payload)
|
|
2270
|
+
elif method == "put":
|
|
2271
|
+
response = c.put(request, json=payload)
|
|
2272
|
+
else:
|
|
2273
|
+
raise NotImplementedError
|
|
2274
|
+
else:
|
|
2275
|
+
if client == "onelake":
|
|
2276
|
+
import notebookutils
|
|
2277
|
+
|
|
2278
|
+
token = notebookutils.credentials.getToken("storage")
|
|
2279
|
+
headers = {"Authorization": f"Bearer {token}"}
|
|
2280
|
+
url = f"https://onelake.table.fabric.microsoft.com/delta/{request}"
|
|
2281
|
+
else:
|
|
2282
|
+
headers = _get_headers(auth.token_provider.get(), audience=client)
|
|
2283
|
+
if client == "graph":
|
|
2284
|
+
url = f"https://graph.microsoft.com/v1.0/{request}"
|
|
2285
|
+
elif client == "azure":
|
|
2286
|
+
url = request
|
|
2287
|
+
response = requests.request(
|
|
2288
|
+
method.upper(),
|
|
2289
|
+
url,
|
|
2290
|
+
headers=headers,
|
|
2291
|
+
json=payload,
|
|
2292
|
+
)
|
|
2293
|
+
|
|
2294
|
+
if lro_return_df:
|
|
2295
|
+
return lro(c, response, status_codes, job_scheduler=True)
|
|
2296
|
+
elif lro_return_json:
|
|
2297
|
+
return lro(c, response, status_codes).json()
|
|
2298
|
+
elif lro_return_status_code:
|
|
2299
|
+
return lro(c, response, status_codes, return_status_code=True)
|
|
2300
|
+
else:
|
|
2301
|
+
if response.status_code not in status_codes:
|
|
2302
|
+
raise FabricHTTPException(response)
|
|
2303
|
+
if uses_pagination:
|
|
2304
|
+
if client == "graph":
|
|
2305
|
+
responses = graph_pagination(response, headers)
|
|
2306
|
+
else:
|
|
2307
|
+
responses = pagination(c, response)
|
|
2308
|
+
return responses
|
|
2309
|
+
else:
|
|
2310
|
+
return response
|
|
2311
|
+
|
|
2312
|
+
|
|
2313
|
+
def _create_dataframe(columns: dict) -> pd.DataFrame:
|
|
2314
|
+
|
|
2315
|
+
return pd.DataFrame(columns=list(columns.keys()))
|
|
2316
|
+
|
|
2317
|
+
|
|
2318
|
+
def _update_dataframe_datatypes(dataframe: pd.DataFrame, column_map: dict):
|
|
2319
|
+
"""
|
|
2320
|
+
Updates the datatypes of columns in a pandas dataframe based on a column map.
|
|
2321
|
+
|
|
2322
|
+
Example:
|
|
2323
|
+
{
|
|
2324
|
+
"Order": "int",
|
|
2325
|
+
"Public": "bool",
|
|
2326
|
+
}
|
|
2327
|
+
"""
|
|
2328
|
+
|
|
2329
|
+
for column, data_type in column_map.items():
|
|
2330
|
+
if column in dataframe.columns:
|
|
2331
|
+
if data_type == "int":
|
|
2332
|
+
dataframe[column] = dataframe[column].astype(int)
|
|
2333
|
+
elif data_type == "bool":
|
|
2334
|
+
dataframe[column] = dataframe[column].astype(bool)
|
|
2335
|
+
elif data_type == "float":
|
|
2336
|
+
dataframe[column] = dataframe[column].astype(float)
|
|
2337
|
+
elif data_type == "datetime":
|
|
2338
|
+
dataframe[column] = pd.to_datetime(dataframe[column])
|
|
2339
|
+
# This is for a special case in admin.list_reports where datetime itself does not work. Coerce fixes the issue.
|
|
2340
|
+
elif data_type == "datetime_coerce":
|
|
2341
|
+
dataframe[column] = pd.to_datetime(dataframe[column], errors="coerce")
|
|
2342
|
+
# This is for list_synonyms since the weight column is float and can have NaN values.
|
|
2343
|
+
elif data_type == "float_fillna":
|
|
2344
|
+
dataframe[column] = dataframe[column].fillna(0).astype(float)
|
|
2345
|
+
# This is to avoid NaN values in integer columns (for delta analyzer)
|
|
2346
|
+
elif data_type == "int_fillna":
|
|
2347
|
+
dataframe[column] = (
|
|
2348
|
+
pd.to_numeric(dataframe[column], errors="coerce")
|
|
2349
|
+
.fillna(0)
|
|
2350
|
+
.astype(int)
|
|
2351
|
+
)
|
|
2352
|
+
elif data_type in ["str", "string"]:
|
|
2353
|
+
try:
|
|
2354
|
+
dataframe[column] = dataframe[column].astype(str)
|
|
2355
|
+
except Exception:
|
|
2356
|
+
pass
|
|
2357
|
+
# Avoid having empty lists or lists with a value of None.
|
|
2358
|
+
elif data_type in ["list"]:
|
|
2359
|
+
dataframe[column] = dataframe[column].apply(
|
|
2360
|
+
lambda x: (
|
|
2361
|
+
None
|
|
2362
|
+
if (type(x) == list and len(x) == 1 and x[0] == None)
|
|
2363
|
+
or (type(x) == list and len(x) == 0)
|
|
2364
|
+
else x
|
|
2365
|
+
)
|
|
2366
|
+
)
|
|
2367
|
+
elif data_type in ["dict"]:
|
|
2368
|
+
dataframe[column] = dataframe[column]
|
|
2369
|
+
else:
|
|
2370
|
+
raise NotImplementedError
|
|
2371
|
+
|
|
2372
|
+
|
|
2373
|
+
def _print_success(item_name, item_type, workspace_name, action="created"):
|
|
2374
|
+
if action == "created":
|
|
2375
|
+
print(
|
|
2376
|
+
f"{icons.green_dot} The '{item_name}' {item_type} has been successfully created in the '{workspace_name}' workspace."
|
|
2377
|
+
)
|
|
2378
|
+
elif action == "deleted":
|
|
2379
|
+
print(
|
|
2380
|
+
f"{icons.green_dot} The '{item_name}' {item_type} has been successfully deleted from the '{workspace_name}' workspace."
|
|
2381
|
+
)
|
|
2382
|
+
else:
|
|
2383
|
+
raise NotImplementedError
|
|
2384
|
+
|
|
2385
|
+
|
|
2386
|
+
def _pure_python_notebook() -> bool:
|
|
2387
|
+
|
|
2388
|
+
from sempy.fabric._environment import _on_jupyter
|
|
2389
|
+
|
|
2390
|
+
return _on_jupyter()
|
|
2391
|
+
|
|
2392
|
+
|
|
2393
|
+
def _create_spark_session():
|
|
2394
|
+
|
|
2395
|
+
if _pure_python_notebook():
|
|
2396
|
+
raise ValueError(
|
|
2397
|
+
f"{icons.red_dot} This function is only available in a PySpark notebook."
|
|
2398
|
+
)
|
|
2399
|
+
|
|
2400
|
+
from pyspark.sql import SparkSession
|
|
2401
|
+
|
|
2402
|
+
return SparkSession.builder.getOrCreate()
|
|
2403
|
+
|
|
2404
|
+
|
|
2405
|
+
def _get_delta_table(path: str) -> str:
|
|
2406
|
+
|
|
2407
|
+
from delta import DeltaTable
|
|
2408
|
+
|
|
2409
|
+
spark = _create_spark_session()
|
|
2410
|
+
|
|
2411
|
+
return DeltaTable.forPath(spark, path)
|
|
2412
|
+
|
|
2413
|
+
|
|
2414
|
+
def _read_delta_table(path: str, to_pandas: bool = True, to_df: bool = False):
|
|
2415
|
+
|
|
2416
|
+
if _pure_python_notebook():
|
|
2417
|
+
from deltalake import DeltaTable
|
|
2418
|
+
|
|
2419
|
+
df = DeltaTable(table_uri=path)
|
|
2420
|
+
if to_pandas:
|
|
2421
|
+
df = df.to_pandas()
|
|
2422
|
+
else:
|
|
2423
|
+
spark = _create_spark_session()
|
|
2424
|
+
df = spark.read.format("delta").load(path)
|
|
2425
|
+
if to_df:
|
|
2426
|
+
df = df.toDF()
|
|
2427
|
+
|
|
2428
|
+
return df
|
|
2429
|
+
|
|
2430
|
+
|
|
2431
|
+
def _read_delta_table_history(path) -> pd.DataFrame:
|
|
2432
|
+
|
|
2433
|
+
if _pure_python_notebook():
|
|
2434
|
+
from deltalake import DeltaTable
|
|
2435
|
+
|
|
2436
|
+
df = pd.DataFrame(DeltaTable(table_uri=path).history())
|
|
2437
|
+
else:
|
|
2438
|
+
from delta import DeltaTable
|
|
2439
|
+
|
|
2440
|
+
spark = _create_spark_session()
|
|
2441
|
+
delta_table = DeltaTable.forPath(spark, path)
|
|
2442
|
+
df = delta_table.history().toPandas()
|
|
2443
|
+
|
|
2444
|
+
return df
|
|
2445
|
+
|
|
2446
|
+
|
|
2447
|
+
def _delta_table_row_count(path: str) -> int:
|
|
2448
|
+
|
|
2449
|
+
if _pure_python_notebook():
|
|
2450
|
+
from deltalake import DeltaTable
|
|
2451
|
+
|
|
2452
|
+
dt = DeltaTable(path)
|
|
2453
|
+
arrow_table = dt.to_pyarrow_table()
|
|
2454
|
+
return arrow_table.num_rows
|
|
2455
|
+
else:
|
|
2456
|
+
return _read_delta_table(path).count()
|
|
2457
|
+
|
|
2458
|
+
|
|
2459
|
+
def _run_spark_sql_query(query):
|
|
2460
|
+
|
|
2461
|
+
spark = _create_spark_session()
|
|
2462
|
+
|
|
2463
|
+
return spark.sql(query)
|
|
2464
|
+
|
|
2465
|
+
|
|
2466
|
+
def _mount(
|
|
2467
|
+
lakehouse: Optional[str | UUID] = None,
|
|
2468
|
+
workspace: Optional[str | UUID] = None,
|
|
2469
|
+
verbose: bool = False,
|
|
2470
|
+
) -> str:
|
|
2471
|
+
"""
|
|
2472
|
+
Mounts a lakehouse to a notebook if it is not already mounted. Returns the local path to the lakehouse.
|
|
2473
|
+
"""
|
|
2474
|
+
|
|
2475
|
+
import notebookutils
|
|
2476
|
+
|
|
2477
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace=workspace)
|
|
2478
|
+
(lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
|
|
2479
|
+
lakehouse=lakehouse, workspace=workspace
|
|
2480
|
+
)
|
|
2481
|
+
|
|
2482
|
+
# Hide display mounts
|
|
2483
|
+
current_setting = ""
|
|
2484
|
+
try:
|
|
2485
|
+
current_setting = notebookutils.conf.get(
|
|
2486
|
+
"spark.notebookutils.displaymountpoint.enabled"
|
|
2487
|
+
)
|
|
2488
|
+
notebookutils.conf.set("spark.notebookutils.displaymountpoint.enabled", "false")
|
|
2489
|
+
except Exception:
|
|
2490
|
+
pass
|
|
2491
|
+
|
|
2492
|
+
lake_path = create_abfss_path(lakehouse_id, workspace_id)
|
|
2493
|
+
mounts = notebookutils.fs.mounts()
|
|
2494
|
+
mount_point = f"/{workspace_name.replace(' ', '')}{lakehouse_name.replace(' ', '')}"
|
|
2495
|
+
if not any(i.get("source") == lake_path for i in mounts):
|
|
2496
|
+
# Mount lakehouse if not mounted
|
|
2497
|
+
notebookutils.fs.mount(lake_path, mount_point)
|
|
2498
|
+
if verbose:
|
|
2499
|
+
print(
|
|
2500
|
+
f"{icons.green_dot} Mounted the '{lakehouse_name}' lakehouse within the '{workspace_name}' to the notebook."
|
|
2501
|
+
)
|
|
2502
|
+
|
|
2503
|
+
mounts = notebookutils.fs.mounts()
|
|
2504
|
+
|
|
2505
|
+
# Set display mounts to original setting
|
|
2506
|
+
try:
|
|
2507
|
+
if current_setting != "false":
|
|
2508
|
+
notebookutils.conf.set(
|
|
2509
|
+
"spark.notebookutils.displaymountpoint.enabled", "true"
|
|
2510
|
+
)
|
|
2511
|
+
except Exception:
|
|
2512
|
+
pass
|
|
2513
|
+
|
|
2514
|
+
local_path = next(
|
|
2515
|
+
i.get("localPath") for i in mounts if i.get("source") == lake_path
|
|
2516
|
+
)
|
|
2517
|
+
|
|
2518
|
+
return local_path
|
|
2519
|
+
|
|
2520
|
+
|
|
2521
|
+
def _get_or_create_workspace(
|
|
2522
|
+
workspace: str,
|
|
2523
|
+
capacity: Optional[str | UUID] = None,
|
|
2524
|
+
description: Optional[str] = None,
|
|
2525
|
+
) -> Tuple[str, UUID]:
|
|
2526
|
+
|
|
2527
|
+
capacity_id = resolve_capacity_id(capacity)
|
|
2528
|
+
dfW = fabric.list_workspaces()
|
|
2529
|
+
dfW_filt_name = dfW[dfW["Name"] == workspace]
|
|
2530
|
+
dfW_filt_id = dfW[dfW["Id"] == workspace]
|
|
2531
|
+
|
|
2532
|
+
# Workspace already exists
|
|
2533
|
+
if (not dfW_filt_name.empty) or (not dfW_filt_id.empty):
|
|
2534
|
+
print(f"{icons.green_dot} The '{workspace}' workspace already exists.")
|
|
2535
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
2536
|
+
return (workspace_name, workspace_id)
|
|
2537
|
+
|
|
2538
|
+
# Do not create workspace with name of an ID
|
|
2539
|
+
if _is_valid_uuid(workspace):
|
|
2540
|
+
raise ValueError(f"{icons.warning} Must enter a workspace name, not an ID.")
|
|
2541
|
+
|
|
2542
|
+
print(f"{icons.in_progress} Creating the '{workspace}' workspace...")
|
|
2543
|
+
workspace_id = fabric.create_workspace(
|
|
2544
|
+
display_name=workspace, capacity_id=capacity_id, description=description
|
|
2545
|
+
)
|
|
2546
|
+
print(
|
|
2547
|
+
f"{icons.green_dot} The '{workspace}' workspace has been successfully created."
|
|
2548
|
+
)
|
|
2549
|
+
|
|
2550
|
+
return (workspace, workspace_id)
|
|
2551
|
+
|
|
2552
|
+
|
|
2553
|
+
def _get_or_create_lakehouse(
|
|
2554
|
+
lakehouse: str,
|
|
2555
|
+
workspace: Optional[str | UUID] = None,
|
|
2556
|
+
description: Optional[str] = None,
|
|
2557
|
+
) -> Tuple[str, UUID]:
|
|
2558
|
+
|
|
2559
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
2560
|
+
|
|
2561
|
+
dfI = fabric.list_items(type="Lakehouse", workspace=workspace)
|
|
2562
|
+
dfI_filt_name = dfI[dfI["Display Name"] == lakehouse]
|
|
2563
|
+
dfI_filt_id = dfI[dfI["Id"] == lakehouse]
|
|
2564
|
+
|
|
2565
|
+
if (not dfI_filt_name.empty) or (not dfI_filt_id.empty):
|
|
2566
|
+
print(f"{icons.green_dot} The '{lakehouse}' lakehouse already exists.")
|
|
2567
|
+
(lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
|
|
2568
|
+
lakehouse=lakehouse, workspace=workspace
|
|
2569
|
+
)
|
|
2570
|
+
return (lakehouse_name, lakehouse_id)
|
|
2571
|
+
if _is_valid_uuid(lakehouse):
|
|
2572
|
+
raise ValueError(f"{icons.warning} Must enter a lakehouse name, not an ID.")
|
|
2573
|
+
|
|
2574
|
+
print(f"{icons.in_progress} Creating the '{lakehouse}' lakehouse...")
|
|
2575
|
+
lakehouse_id = fabric.create_lakehouse(
|
|
2576
|
+
display_name=lakehouse, workspace=workspace, description=description
|
|
2577
|
+
)
|
|
2578
|
+
print(
|
|
2579
|
+
f"{icons.green_dot} The '{lakehouse}' lakehouse has been successfully created within the '{workspace_name}' workspace."
|
|
2580
|
+
)
|
|
2581
|
+
|
|
2582
|
+
return (lakehouse, lakehouse_id)
|
|
2583
|
+
|
|
2584
|
+
|
|
2585
|
+
def _get_or_create_warehouse(
|
|
2586
|
+
warehouse: str,
|
|
2587
|
+
workspace: Optional[str | UUID] = None,
|
|
2588
|
+
description: Optional[str] = None,
|
|
2589
|
+
) -> Tuple[str, UUID]:
|
|
2590
|
+
|
|
2591
|
+
from sempy_labs._warehouses import create_warehouse
|
|
2592
|
+
|
|
2593
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
2594
|
+
|
|
2595
|
+
dfI = fabric.list_items(type="Warehouse", workspace=workspace)
|
|
2596
|
+
dfI_filt_name = dfI[dfI["Display Name"] == warehouse]
|
|
2597
|
+
dfI_filt_id = dfI[dfI["Id"] == warehouse]
|
|
2598
|
+
|
|
2599
|
+
if (not dfI_filt_name.empty) or (not dfI_filt_id.empty):
|
|
2600
|
+
print(f"{icons.green_dot} The '{warehouse}' warehouse already exists.")
|
|
2601
|
+
(warehouse_name, warehouse_id) = resolve_item_name_and_id(
|
|
2602
|
+
warehouse=warehouse, type="Warehouse", workspace=workspace
|
|
2603
|
+
)
|
|
2604
|
+
return (warehouse_name, warehouse_id)
|
|
2605
|
+
if _is_valid_uuid(warehouse):
|
|
2606
|
+
raise ValueError(f"{icons.warning} Must enter a warehouse name, not an ID.")
|
|
2607
|
+
|
|
2608
|
+
print(f"{icons.in_progress} Creating the '{warehouse}' warehouse...")
|
|
2609
|
+
warehouse_id = create_warehouse(
|
|
2610
|
+
display_name=warehouse, workspace=workspace, description=description
|
|
2611
|
+
)
|
|
2612
|
+
print(
|
|
2613
|
+
f"{icons.green_dot} The '{warehouse}' warehouse has been successfully created within the '{workspace_name}' workspace."
|
|
2614
|
+
)
|
|
2615
|
+
|
|
2616
|
+
return (warehouse, warehouse_id)
|
|
2617
|
+
|
|
2618
|
+
|
|
2619
|
+
def _xml_to_dict(element):
|
|
2620
|
+
data = {element.tag: {} if element.attrib else None}
|
|
2621
|
+
children = list(element)
|
|
2622
|
+
if children:
|
|
2623
|
+
temp_dict = {}
|
|
2624
|
+
for child in children:
|
|
2625
|
+
child_dict = _xml_to_dict(child)
|
|
2626
|
+
for key, value in child_dict.items():
|
|
2627
|
+
if key in temp_dict:
|
|
2628
|
+
if isinstance(temp_dict[key], list):
|
|
2629
|
+
temp_dict[key].append(value)
|
|
2630
|
+
else:
|
|
2631
|
+
temp_dict[key] = [temp_dict[key], value]
|
|
2632
|
+
else:
|
|
2633
|
+
temp_dict[key] = value
|
|
2634
|
+
data[element.tag] = temp_dict
|
|
2635
|
+
else:
|
|
2636
|
+
data[element.tag] = (
|
|
2637
|
+
element.text.strip() if element.text and element.text.strip() else None
|
|
2638
|
+
)
|
|
2639
|
+
return data
|
|
2640
|
+
|
|
2641
|
+
|
|
2642
|
+
def file_exists(file_path: str) -> bool:
|
|
2643
|
+
"""
|
|
2644
|
+
Check if a file exists in the given path.
|
|
2645
|
+
|
|
2646
|
+
Parameters
|
|
2647
|
+
----------
|
|
2648
|
+
file_path : str
|
|
2649
|
+
The path to the file.
|
|
2650
|
+
|
|
2651
|
+
Returns
|
|
2652
|
+
-------
|
|
2653
|
+
bool
|
|
2654
|
+
True if the file exists, False otherwise.
|
|
2655
|
+
"""
|
|
2656
|
+
|
|
2657
|
+
import notebookutils
|
|
2658
|
+
|
|
2659
|
+
return len(notebookutils.fs.ls(file_path)) > 0
|
|
2660
|
+
|
|
2661
|
+
|
|
2662
|
+
def generate_number_guid():
|
|
2663
|
+
|
|
2664
|
+
guid = uuid.uuid4()
|
|
2665
|
+
return str(guid.int & ((1 << 64) - 1))
|
|
2666
|
+
|
|
2667
|
+
|
|
2668
|
+
def get_url_content(url: str):
|
|
2669
|
+
|
|
2670
|
+
if "github.com" in url and "/blob/" in url:
|
|
2671
|
+
url = url.replace("github.com", "raw.githubusercontent.com")
|
|
2672
|
+
url = url.replace("/blob/", "/")
|
|
2673
|
+
|
|
2674
|
+
response = requests.get(url)
|
|
2675
|
+
if response.ok:
|
|
2676
|
+
try:
|
|
2677
|
+
data = response.json() # Only works if the response is valid JSON
|
|
2678
|
+
except ValueError:
|
|
2679
|
+
data = response.text # Fallback: get raw text content
|
|
2680
|
+
return data
|
|
2681
|
+
else:
|
|
2682
|
+
print(f"Failed to fetch raw content: {response.status_code}")
|
|
2683
|
+
|
|
2684
|
+
|
|
2685
|
+
def generate_hex(length: int = 10) -> str:
|
|
2686
|
+
"""
|
|
2687
|
+
Generate a random hex string of the specified length. Used for generating IDs for report objects (page, visual, bookmark etc.).
|
|
2688
|
+
"""
|
|
2689
|
+
import secrets
|
|
2690
|
+
|
|
2691
|
+
return secrets.token_hex(length)
|
|
2692
|
+
|
|
2693
|
+
|
|
2694
|
+
def decode_payload(payload):
|
|
2695
|
+
|
|
2696
|
+
if is_base64(payload):
|
|
2697
|
+
try:
|
|
2698
|
+
decoded_payload = json.loads(base64.b64decode(payload).decode("utf-8"))
|
|
2699
|
+
except Exception:
|
|
2700
|
+
decoded_payload = base64.b64decode(payload)
|
|
2701
|
+
elif isinstance(payload, dict):
|
|
2702
|
+
decoded_payload = payload
|
|
2703
|
+
else:
|
|
2704
|
+
raise ValueError("Payload must be a dictionary or a base64 encoded value.")
|
|
2705
|
+
|
|
2706
|
+
return decoded_payload
|
|
2707
|
+
|
|
2708
|
+
|
|
2709
|
+
def is_base64(s):
|
|
2710
|
+
try:
|
|
2711
|
+
# Add padding if needed
|
|
2712
|
+
s_padded = s + "=" * (-len(s) % 4)
|
|
2713
|
+
decoded = base64.b64decode(s_padded, validate=True)
|
|
2714
|
+
# Optional: check if re-encoding gives the original (excluding padding)
|
|
2715
|
+
return base64.b64encode(decoded).decode().rstrip("=") == s.rstrip("=")
|
|
2716
|
+
except Exception:
|
|
2717
|
+
return False
|
|
2718
|
+
|
|
2719
|
+
|
|
2720
|
+
def get_jsonpath_value(
|
|
2721
|
+
data, path, default=None, remove_quotes=False, fix_true: bool = False
|
|
2722
|
+
):
|
|
2723
|
+
matches = parse(path).find(data)
|
|
2724
|
+
result = matches[0].value if matches else default
|
|
2725
|
+
if result and remove_quotes and isinstance(result, str):
|
|
2726
|
+
if result.startswith("'") and result.endswith("'"):
|
|
2727
|
+
result = result[1:-1]
|
|
2728
|
+
if fix_true and isinstance(result, str):
|
|
2729
|
+
if result.lower() == "true":
|
|
2730
|
+
result = True
|
|
2731
|
+
elif result.lower() == "false":
|
|
2732
|
+
result = False
|
|
2733
|
+
return result
|
|
2734
|
+
|
|
2735
|
+
|
|
2736
|
+
def set_json_value(payload: dict, json_path: str, json_value: str | dict | List):
|
|
2737
|
+
|
|
2738
|
+
jsonpath_expr = parse(json_path)
|
|
2739
|
+
matches = jsonpath_expr.find(payload)
|
|
2740
|
+
|
|
2741
|
+
if matches:
|
|
2742
|
+
# Update all matches
|
|
2743
|
+
for match in matches:
|
|
2744
|
+
parent = match.context.value
|
|
2745
|
+
path = match.path
|
|
2746
|
+
if isinstance(path, Fields):
|
|
2747
|
+
parent[path.fields[0]] = json_value
|
|
2748
|
+
elif isinstance(path, Index):
|
|
2749
|
+
parent[path.index] = json_value
|
|
2750
|
+
else:
|
|
2751
|
+
# Handle creation
|
|
2752
|
+
parts = json_path.lstrip("$").strip(".").split(".")
|
|
2753
|
+
current = payload
|
|
2754
|
+
|
|
2755
|
+
for i, part in enumerate(parts):
|
|
2756
|
+
is_last = i == len(parts) - 1
|
|
2757
|
+
|
|
2758
|
+
# Detect list syntax like "lockAspect[*]"
|
|
2759
|
+
list_match = re.match(r"(\w+)\[\*\]", part)
|
|
2760
|
+
if list_match:
|
|
2761
|
+
list_key = list_match.group(1)
|
|
2762
|
+
if list_key not in current or not isinstance(current[list_key], list):
|
|
2763
|
+
# Initialize with one dict element
|
|
2764
|
+
current[list_key] = [{}]
|
|
2765
|
+
|
|
2766
|
+
for item in current[list_key]:
|
|
2767
|
+
if is_last:
|
|
2768
|
+
# Last part, assign value
|
|
2769
|
+
item = json_value
|
|
2770
|
+
else:
|
|
2771
|
+
# Proceed to next level
|
|
2772
|
+
if not isinstance(item, dict):
|
|
2773
|
+
raise ValueError(
|
|
2774
|
+
f"Expected dict in list for key '{list_key}', got {type(item)}"
|
|
2775
|
+
)
|
|
2776
|
+
next_part = ".".join(parts[i + 1 :])
|
|
2777
|
+
set_json_value(item, "$." + next_part, json_value)
|
|
2778
|
+
return payload
|
|
2779
|
+
else:
|
|
2780
|
+
if part not in current or not isinstance(current[part], dict):
|
|
2781
|
+
current[part] = {} if not is_last else json_value
|
|
2782
|
+
elif is_last:
|
|
2783
|
+
current[part] = json_value
|
|
2784
|
+
current = current[part]
|
|
2785
|
+
|
|
2786
|
+
return payload
|
|
2787
|
+
|
|
2788
|
+
|
|
2789
|
+
def remove_json_value(path: str, payload: dict, json_path: str, verbose: bool = True):
|
|
2790
|
+
|
|
2791
|
+
if not isinstance(payload, dict):
|
|
2792
|
+
raise ValueError(
|
|
2793
|
+
f"{icons.red_dot} Cannot apply json_path to non-dictionary payload in '{path}'."
|
|
2794
|
+
)
|
|
2795
|
+
|
|
2796
|
+
jsonpath_expr = parse(json_path)
|
|
2797
|
+
matches = jsonpath_expr.find(payload)
|
|
2798
|
+
|
|
2799
|
+
if not matches and verbose:
|
|
2800
|
+
print(
|
|
2801
|
+
f"{icons.red_dot} No match found for '{json_path}' in '{path}'. Skipping."
|
|
2802
|
+
)
|
|
2803
|
+
return payload
|
|
2804
|
+
|
|
2805
|
+
for match in matches:
|
|
2806
|
+
parent = match.context.value
|
|
2807
|
+
path_expr = match.path
|
|
2808
|
+
|
|
2809
|
+
if isinstance(path_expr, Fields):
|
|
2810
|
+
key = path_expr.fields[0]
|
|
2811
|
+
if key in parent:
|
|
2812
|
+
del parent[key]
|
|
2813
|
+
if verbose:
|
|
2814
|
+
print(f"{icons.green_dot} Removed key '{key}' from '{path}'.")
|
|
2815
|
+
elif isinstance(path_expr, Index):
|
|
2816
|
+
index = path_expr.index
|
|
2817
|
+
if isinstance(parent, list) and 0 <= index < len(parent):
|
|
2818
|
+
parent.pop(index)
|
|
2819
|
+
if verbose:
|
|
2820
|
+
print(f"{icons.green_dot} Removed index [{index}] from '{path}'.")
|
|
2821
|
+
|
|
2822
|
+
return payload
|
|
2823
|
+
|
|
2824
|
+
|
|
2825
|
+
def _get_url_prefix() -> str:
|
|
2826
|
+
|
|
2827
|
+
client = fabric.PowerBIRestClient()
|
|
2828
|
+
response = client.get("/v1.0/myorg/capacities")
|
|
2829
|
+
if response.status_code != 200:
|
|
2830
|
+
raise FabricHTTPException("Failed to retrieve URL prefix.")
|
|
2831
|
+
context = response.json().get("@odata.context")
|
|
2832
|
+
|
|
2833
|
+
return context.split("/v1.0")[0]
|