semantic-link-labs 0.8.9__py3-none-any.whl → 0.8.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.8.9.dist-info → semantic_link_labs-0.8.11.dist-info}/METADATA +5 -2
- {semantic_link_labs-0.8.9.dist-info → semantic_link_labs-0.8.11.dist-info}/RECORD +76 -75
- sempy_labs/__init__.py +14 -2
- sempy_labs/_authentication.py +31 -2
- sempy_labs/_clear_cache.py +39 -37
- sempy_labs/_connections.py +13 -13
- sempy_labs/_data_pipelines.py +20 -20
- sempy_labs/_dataflows.py +27 -28
- sempy_labs/_dax.py +41 -47
- sempy_labs/_environments.py +26 -23
- sempy_labs/_eventhouses.py +16 -15
- sempy_labs/_eventstreams.py +16 -15
- sempy_labs/_external_data_shares.py +18 -20
- sempy_labs/_gateways.py +57 -11
- sempy_labs/_generate_semantic_model.py +100 -71
- sempy_labs/_git.py +134 -67
- sempy_labs/_helper_functions.py +199 -145
- sempy_labs/_job_scheduler.py +92 -0
- sempy_labs/_kql_databases.py +16 -15
- sempy_labs/_kql_querysets.py +16 -15
- sempy_labs/_list_functions.py +281 -120
- sempy_labs/_managed_private_endpoints.py +19 -17
- sempy_labs/_mirrored_databases.py +51 -48
- sempy_labs/_mirrored_warehouses.py +5 -4
- sempy_labs/_ml_experiments.py +16 -15
- sempy_labs/_ml_models.py +15 -14
- sempy_labs/_model_bpa.py +27 -25
- sempy_labs/_model_bpa_bulk.py +3 -3
- sempy_labs/_model_dependencies.py +60 -28
- sempy_labs/_notebooks.py +73 -39
- sempy_labs/_one_lake_integration.py +23 -26
- sempy_labs/_query_scale_out.py +67 -64
- sempy_labs/_refresh_semantic_model.py +47 -42
- sempy_labs/_spark.py +33 -32
- sempy_labs/_sql.py +12 -9
- sempy_labs/_translations.py +10 -7
- sempy_labs/_vertipaq.py +34 -31
- sempy_labs/_warehouses.py +22 -21
- sempy_labs/_workspace_identity.py +11 -10
- sempy_labs/_workspaces.py +40 -33
- sempy_labs/admin/__init__.py +4 -0
- sempy_labs/admin/_basic_functions.py +44 -12
- sempy_labs/admin/_external_data_share.py +3 -3
- sempy_labs/admin/_items.py +4 -4
- sempy_labs/admin/_scanner.py +7 -5
- sempy_labs/directlake/_directlake_schema_compare.py +18 -14
- sempy_labs/directlake/_directlake_schema_sync.py +18 -12
- sempy_labs/directlake/_dl_helper.py +36 -32
- sempy_labs/directlake/_generate_shared_expression.py +10 -9
- sempy_labs/directlake/_get_directlake_lakehouse.py +16 -13
- sempy_labs/directlake/_get_shared_expression.py +4 -3
- sempy_labs/directlake/_guardrails.py +12 -6
- sempy_labs/directlake/_list_directlake_model_calc_tables.py +15 -9
- sempy_labs/directlake/_show_unsupported_directlake_objects.py +16 -10
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +35 -31
- sempy_labs/directlake/_update_directlake_partition_entity.py +34 -31
- sempy_labs/directlake/_warm_cache.py +87 -65
- sempy_labs/lakehouse/_get_lakehouse_columns.py +10 -8
- sempy_labs/lakehouse/_get_lakehouse_tables.py +10 -9
- sempy_labs/lakehouse/_lakehouse.py +17 -13
- sempy_labs/lakehouse/_shortcuts.py +42 -23
- sempy_labs/migration/_create_pqt_file.py +16 -11
- sempy_labs/migration/_refresh_calc_tables.py +16 -10
- sempy_labs/report/_download_report.py +9 -8
- sempy_labs/report/_generate_report.py +40 -44
- sempy_labs/report/_paginated.py +9 -9
- sempy_labs/report/_report_bpa.py +13 -9
- sempy_labs/report/_report_functions.py +80 -91
- sempy_labs/report/_report_helper.py +8 -4
- sempy_labs/report/_report_list_functions.py +24 -13
- sempy_labs/report/_report_rebind.py +17 -16
- sempy_labs/report/_reportwrapper.py +41 -33
- sempy_labs/tom/_model.py +117 -38
- {semantic_link_labs-0.8.9.dist-info → semantic_link_labs-0.8.11.dist-info}/LICENSE +0 -0
- {semantic_link_labs-0.8.9.dist-info → semantic_link_labs-0.8.11.dist-info}/WHEEL +0 -0
- {semantic_link_labs-0.8.9.dist-info → semantic_link_labs-0.8.11.dist-info}/top_level.txt +0 -0
|
@@ -3,34 +3,39 @@ import pandas as pd
|
|
|
3
3
|
from tqdm.auto import tqdm
|
|
4
4
|
import numpy as np
|
|
5
5
|
import time
|
|
6
|
-
from sempy_labs._helper_functions import
|
|
6
|
+
from sempy_labs._helper_functions import (
|
|
7
|
+
format_dax_object_name,
|
|
8
|
+
resolve_dataset_name_and_id,
|
|
9
|
+
resolve_workspace_name_and_id,
|
|
10
|
+
)
|
|
7
11
|
from sempy_labs._refresh_semantic_model import refresh_semantic_model
|
|
8
12
|
from sempy_labs._model_dependencies import get_measure_dependencies
|
|
9
13
|
from typing import Optional
|
|
10
14
|
from sempy._utils._log import log
|
|
11
15
|
import sempy_labs._icons as icons
|
|
16
|
+
from uuid import UUID
|
|
12
17
|
|
|
13
18
|
|
|
14
19
|
@log
|
|
15
20
|
def warm_direct_lake_cache_perspective(
|
|
16
|
-
dataset: str,
|
|
21
|
+
dataset: str | UUID,
|
|
17
22
|
perspective: str,
|
|
18
23
|
add_dependencies: bool = False,
|
|
19
|
-
workspace: Optional[str] = None,
|
|
24
|
+
workspace: Optional[str | UUID] = None,
|
|
20
25
|
) -> pd.DataFrame:
|
|
21
26
|
"""
|
|
22
27
|
Warms the cache of a Direct Lake semantic model by running a simple DAX query against the columns in a perspective.
|
|
23
28
|
|
|
24
29
|
Parameters
|
|
25
30
|
----------
|
|
26
|
-
dataset : str
|
|
27
|
-
Name of the semantic model.
|
|
31
|
+
dataset : str | uuid.UUID
|
|
32
|
+
Name or ID of the semantic model.
|
|
28
33
|
perspective : str
|
|
29
34
|
Name of the perspective which contains objects to be used for warming the cache.
|
|
30
35
|
add_dependencies : bool, default=False
|
|
31
36
|
Includes object dependencies in the cache warming process.
|
|
32
|
-
workspace : str, default=None
|
|
33
|
-
The Fabric workspace name.
|
|
37
|
+
workspace : str | uuid.UUID, default=None
|
|
38
|
+
The Fabric workspace name or ID.
|
|
34
39
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
35
40
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
36
41
|
|
|
@@ -40,15 +45,16 @@ def warm_direct_lake_cache_perspective(
|
|
|
40
45
|
Returns a pandas dataframe showing the columns that have been put into memory.
|
|
41
46
|
"""
|
|
42
47
|
|
|
43
|
-
|
|
48
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
49
|
+
(dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
|
|
44
50
|
|
|
45
|
-
dfP = fabric.list_partitions(dataset=
|
|
46
|
-
if not any(r["Mode"] == "DirectLake" for
|
|
51
|
+
dfP = fabric.list_partitions(dataset=dataset_id, workspace=workspace_id)
|
|
52
|
+
if not any(r["Mode"] == "DirectLake" for _, r in dfP.iterrows()):
|
|
47
53
|
raise ValueError(
|
|
48
|
-
f"{icons.red_dot} The '{
|
|
54
|
+
f"{icons.red_dot} The '{dataset_name}' semantic model in the '{workspace_name}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode."
|
|
49
55
|
)
|
|
50
56
|
|
|
51
|
-
dfPersp = fabric.list_perspectives(dataset=
|
|
57
|
+
dfPersp = fabric.list_perspectives(dataset=dataset_id, workspace=workspace_id)
|
|
52
58
|
dfPersp["DAX Object Name"] = format_dax_object_name(
|
|
53
59
|
dfPersp["Table Name"], dfPersp["Object Name"]
|
|
54
60
|
)
|
|
@@ -65,7 +71,7 @@ def warm_direct_lake_cache_perspective(
|
|
|
65
71
|
|
|
66
72
|
if add_dependencies:
|
|
67
73
|
# Measure dependencies
|
|
68
|
-
md = get_measure_dependencies(
|
|
74
|
+
md = get_measure_dependencies(dataset_id, workspace_id)
|
|
69
75
|
md["Referenced Full Object"] = format_dax_object_name(
|
|
70
76
|
md["Referenced Table"], md["Referenced Object"]
|
|
71
77
|
)
|
|
@@ -78,7 +84,7 @@ def warm_direct_lake_cache_perspective(
|
|
|
78
84
|
|
|
79
85
|
# Hierarchy dependencies
|
|
80
86
|
dfPersp_h = dfPersp_filt[(dfPersp_filt["Object Type"] == "Hierarchy")]
|
|
81
|
-
dfH = fabric.list_hierarchies(dataset=
|
|
87
|
+
dfH = fabric.list_hierarchies(dataset=dataset_id, workspace=workspace_id)
|
|
82
88
|
dfH["Hierarchy Object"] = format_dax_object_name(
|
|
83
89
|
dfH["Table Name"], dfH["Hierarchy Name"]
|
|
84
90
|
)
|
|
@@ -92,7 +98,7 @@ def warm_direct_lake_cache_perspective(
|
|
|
92
98
|
|
|
93
99
|
# Relationship dependencies
|
|
94
100
|
unique_table_names = dfPersp_filt["Table Name"].unique()
|
|
95
|
-
dfR = fabric.list_relationships(dataset=
|
|
101
|
+
dfR = fabric.list_relationships(dataset=dataset_id, workspace=workspace_id)
|
|
96
102
|
dfR["From Object"] = format_dax_object_name(
|
|
97
103
|
dfR["From Table"], dfR["From Column"]
|
|
98
104
|
)
|
|
@@ -120,41 +126,22 @@ def warm_direct_lake_cache_perspective(
|
|
|
120
126
|
df["Table Name"] = df["Table Name"].str[1:-1]
|
|
121
127
|
df["Column Name"] = df["Column Name"].str[0:-1]
|
|
122
128
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
for tableName in (bar := tqdm(tbls)):
|
|
126
|
-
filtered_list = [
|
|
127
|
-
value for value in merged_list_unique if value.startswith(f"{tableName}[")
|
|
128
|
-
]
|
|
129
|
-
bar.set_description(f"Warming the '{tableName}' table...")
|
|
130
|
-
css = ",".join(map(str, filtered_list))
|
|
131
|
-
dax = """EVALUATE TOPN(1,SUMMARIZECOLUMNS(""" + css + "))" ""
|
|
132
|
-
fabric.evaluate_dax(dataset=dataset, dax_string=dax, workspace=workspace)
|
|
133
|
-
|
|
134
|
-
print(f"{icons.green_dot} The following columns have been put into memory:")
|
|
135
|
-
|
|
136
|
-
new_column_order = ["Table Name", "Column Name", "DAX Object Name"]
|
|
137
|
-
df = df.reindex(columns=new_column_order)
|
|
138
|
-
df = df[["Table Name", "Column Name"]].sort_values(
|
|
139
|
-
by=["Table Name", "Column Name"], ascending=True
|
|
140
|
-
)
|
|
141
|
-
|
|
142
|
-
return df
|
|
129
|
+
return _put_columns_into_memory(dataset=dataset, workspace=workspace, col_df=df)
|
|
143
130
|
|
|
144
131
|
|
|
145
132
|
@log
|
|
146
133
|
def warm_direct_lake_cache_isresident(
|
|
147
|
-
dataset: str, workspace: Optional[str] = None
|
|
134
|
+
dataset: str | UUID, workspace: Optional[str | UUID] = None
|
|
148
135
|
) -> pd.DataFrame:
|
|
149
136
|
"""
|
|
150
137
|
Performs a refresh on the semantic model and puts the columns which were in memory prior to the refresh back into memory.
|
|
151
138
|
|
|
152
139
|
Parameters
|
|
153
140
|
----------
|
|
154
|
-
dataset : str
|
|
155
|
-
Name of the semantic model.
|
|
156
|
-
workspace : str, default=None
|
|
157
|
-
The Fabric workspace name.
|
|
141
|
+
dataset : str | uuid.UUID
|
|
142
|
+
Name or ID of the semantic model.
|
|
143
|
+
workspace : str | uuid.UUID, default=None
|
|
144
|
+
The Fabric workspace name or ID.
|
|
158
145
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
159
146
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
160
147
|
|
|
@@ -164,46 +151,81 @@ def warm_direct_lake_cache_isresident(
|
|
|
164
151
|
Returns a pandas dataframe showing the columns that have been put into memory.
|
|
165
152
|
"""
|
|
166
153
|
|
|
167
|
-
|
|
154
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
155
|
+
(dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
|
|
168
156
|
|
|
169
|
-
dfP = fabric.list_partitions(dataset=
|
|
170
|
-
if not any(r["Mode"] == "DirectLake" for
|
|
157
|
+
dfP = fabric.list_partitions(dataset=dataset_id, workspace=workspace_id)
|
|
158
|
+
if not any(r["Mode"] == "DirectLake" for _, r in dfP.iterrows()):
|
|
171
159
|
raise ValueError(
|
|
172
|
-
f"{icons.red_dot} The '{
|
|
160
|
+
f"{icons.red_dot} The '{dataset_name}' semantic model in the '{workspace_name}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode."
|
|
173
161
|
)
|
|
174
162
|
|
|
175
163
|
# Identify columns which are currently in memory (Is Resident = True)
|
|
176
|
-
dfC = fabric.list_columns(dataset=
|
|
177
|
-
dfC["DAX Object Name"] = format_dax_object_name(
|
|
178
|
-
dfC["Table Name"], dfC["Column Name"]
|
|
179
|
-
)
|
|
164
|
+
dfC = fabric.list_columns(dataset=dataset_id, workspace=workspace_id, extended=True)
|
|
180
165
|
dfC_filtered = dfC[dfC["Is Resident"] == True]
|
|
181
166
|
|
|
182
167
|
if len(dfC_filtered) == 0:
|
|
183
168
|
raise ValueError(
|
|
184
|
-
f"{icons.yellow_dot} At present, no columns are in memory in the '{
|
|
169
|
+
f"{icons.yellow_dot} At present, no columns are in memory in the '{dataset_name}' semantic model in the '{workspace_name}' workspace."
|
|
185
170
|
)
|
|
186
171
|
|
|
187
172
|
# Refresh/frame dataset
|
|
188
|
-
refresh_semantic_model(
|
|
173
|
+
refresh_semantic_model(
|
|
174
|
+
dataset=dataset_id, refresh_type="full", workspace=workspace_id
|
|
175
|
+
)
|
|
189
176
|
time.sleep(2)
|
|
190
177
|
|
|
191
|
-
|
|
192
|
-
|
|
178
|
+
return _put_columns_into_memory(
|
|
179
|
+
dataset=dataset, workspace=workspace, col_df=dfC_filtered
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _put_columns_into_memory(dataset, workspace, col_df, return_dataframe: bool = True):
|
|
184
|
+
|
|
185
|
+
row_limit = 1000000
|
|
186
|
+
|
|
187
|
+
dfT = fabric.list_tables(dataset=dataset, workspace=workspace, extended=True)
|
|
188
|
+
col_df = col_df.copy()
|
|
189
|
+
|
|
190
|
+
col_df["DAX Object"] = format_dax_object_name(
|
|
191
|
+
col_df["Table Name"], col_df["Column Name"]
|
|
192
|
+
)
|
|
193
|
+
tbls = col_df["Table Name"].unique()
|
|
194
|
+
|
|
193
195
|
for table_name in (bar := tqdm(tbls)):
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
.
|
|
198
|
-
.
|
|
196
|
+
dfT_filt = dfT[dfT["Name"] == table_name]
|
|
197
|
+
col_df_filt = col_df[col_df["Table Name"] == table_name]
|
|
198
|
+
if not dfT_filt.empty:
|
|
199
|
+
row_count = dfT_filt["Row Count"].iloc[0]
|
|
200
|
+
bar.set_description(f"Warming the '{table_name}' table...")
|
|
201
|
+
if row_count < row_limit:
|
|
202
|
+
columns = col_df_filt["DAX Object"].tolist()
|
|
203
|
+
css = ", ".join(columns)
|
|
204
|
+
dax = f"EVALUATE TOPN(1, SELECTCOLUMNS('{table_name}', {css}))"
|
|
205
|
+
fabric.evaluate_dax(
|
|
206
|
+
dataset=dataset, dax_string=dax, workspace=workspace
|
|
207
|
+
)
|
|
208
|
+
else:
|
|
209
|
+
for _, r in col_df_filt.iterrows():
|
|
210
|
+
dax_object = r["DAX Object"]
|
|
211
|
+
dax = f"""EVALUATE TOPN(1, SELECTCOLUMNS('{table_name}', {dax_object}))"""
|
|
212
|
+
fabric.evaluate_dax(
|
|
213
|
+
dataset=dataset, dax_string=dax, workspace=workspace
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
if return_dataframe:
|
|
217
|
+
print(
|
|
218
|
+
f"{icons.green_dot} The following columns have been put into memory. Temperature indicates the current column temperature."
|
|
199
219
|
)
|
|
200
|
-
dax = f"""EVALUATE TOPN(1,SUMMARIZECOLUMNS({css}))"""
|
|
201
|
-
fabric.evaluate_dax(dataset=dataset, dax_string=dax, workspace=workspace)
|
|
202
220
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
221
|
+
dfC = fabric.list_columns(dataset=dataset, workspace=workspace, extended=True)
|
|
222
|
+
dfC["DAX Object"] = format_dax_object_name(
|
|
223
|
+
dfC["Table Name"], dfC["Column Name"]
|
|
224
|
+
)
|
|
225
|
+
dfC_filt = dfC[dfC["DAX Object"].isin(col_df["DAX Object"].values)]
|
|
206
226
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
227
|
+
return (
|
|
228
|
+
dfC_filt[["Table Name", "Column Name", "Is Resident", "Temperature"]]
|
|
229
|
+
.sort_values(by=["Table Name", "Column Name"], ascending=True)
|
|
230
|
+
.reset_index(drop=True)
|
|
231
|
+
)
|
|
@@ -5,14 +5,16 @@ from sempy_labs._helper_functions import (
|
|
|
5
5
|
resolve_lakehouse_name,
|
|
6
6
|
format_dax_object_name,
|
|
7
7
|
resolve_lakehouse_id,
|
|
8
|
+
resolve_workspace_name_and_id,
|
|
8
9
|
)
|
|
9
10
|
from typing import Optional
|
|
10
11
|
from sempy._utils._log import log
|
|
12
|
+
from uuid import UUID
|
|
11
13
|
|
|
12
14
|
|
|
13
15
|
@log
|
|
14
16
|
def get_lakehouse_columns(
|
|
15
|
-
lakehouse: Optional[str] = None, workspace: Optional[str] = None
|
|
17
|
+
lakehouse: Optional[str] = None, workspace: Optional[str | UUID] = None
|
|
16
18
|
) -> pd.DataFrame:
|
|
17
19
|
"""
|
|
18
20
|
Shows the tables and columns of a lakehouse and their respective properties.
|
|
@@ -22,8 +24,8 @@ def get_lakehouse_columns(
|
|
|
22
24
|
lakehouse : str, default=None
|
|
23
25
|
The Fabric lakehouse.
|
|
24
26
|
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
25
|
-
lakehouse_workspace : str, default=None
|
|
26
|
-
The Fabric workspace used by the lakehouse.
|
|
27
|
+
lakehouse_workspace : str | uuid.UUID, default=None
|
|
28
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
27
29
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
28
30
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
29
31
|
|
|
@@ -46,18 +48,18 @@ def get_lakehouse_columns(
|
|
|
46
48
|
]
|
|
47
49
|
)
|
|
48
50
|
|
|
49
|
-
|
|
51
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
50
52
|
|
|
51
53
|
if lakehouse is None:
|
|
52
54
|
lakehouse_id = fabric.get_lakehouse_id()
|
|
53
|
-
lakehouse = resolve_lakehouse_name(lakehouse_id,
|
|
55
|
+
lakehouse = resolve_lakehouse_name(lakehouse_id, workspace_id)
|
|
54
56
|
else:
|
|
55
|
-
lakehouse_id = resolve_lakehouse_id(lakehouse,
|
|
57
|
+
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
|
|
56
58
|
|
|
57
59
|
spark = SparkSession.builder.getOrCreate()
|
|
58
60
|
|
|
59
61
|
tables = get_lakehouse_tables(
|
|
60
|
-
lakehouse=lakehouse, workspace=
|
|
62
|
+
lakehouse=lakehouse, workspace=workspace_id, extended=False, count_rows=False
|
|
61
63
|
)
|
|
62
64
|
tables_filt = tables[tables["Format"] == "delta"]
|
|
63
65
|
|
|
@@ -70,7 +72,7 @@ def get_lakehouse_columns(
|
|
|
70
72
|
for cName, data_type in sparkdf.dtypes:
|
|
71
73
|
tc = format_dax_object_name(tName, cName)
|
|
72
74
|
new_data = {
|
|
73
|
-
"Workspace Name":
|
|
75
|
+
"Workspace Name": workspace_name,
|
|
74
76
|
"Lakehouse Name": lakehouse,
|
|
75
77
|
"Table Name": tName,
|
|
76
78
|
"Column Name": cName,
|
|
@@ -18,12 +18,13 @@ from typing import Optional
|
|
|
18
18
|
import sempy_labs._icons as icons
|
|
19
19
|
from sempy._utils._log import log
|
|
20
20
|
from sempy.fabric.exceptions import FabricHTTPException
|
|
21
|
+
from uuid import UUID
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
@log
|
|
24
25
|
def get_lakehouse_tables(
|
|
25
26
|
lakehouse: Optional[str] = None,
|
|
26
|
-
workspace: Optional[str] = None,
|
|
27
|
+
workspace: Optional[str | UUID] = None,
|
|
27
28
|
extended: bool = False,
|
|
28
29
|
count_rows: bool = False,
|
|
29
30
|
export: bool = False,
|
|
@@ -38,8 +39,8 @@ def get_lakehouse_tables(
|
|
|
38
39
|
lakehouse : str, default=None
|
|
39
40
|
The Fabric lakehouse.
|
|
40
41
|
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
41
|
-
workspace : str, default=None
|
|
42
|
-
The Fabric workspace used by the lakehouse.
|
|
42
|
+
workspace : str | uuid.UUID, default=None
|
|
43
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
43
44
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
44
45
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
45
46
|
extended : bool, default=False
|
|
@@ -66,13 +67,13 @@ def get_lakehouse_tables(
|
|
|
66
67
|
]
|
|
67
68
|
)
|
|
68
69
|
|
|
69
|
-
(
|
|
70
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
70
71
|
|
|
71
72
|
if lakehouse is None:
|
|
72
73
|
lakehouse_id = fabric.get_lakehouse_id()
|
|
73
|
-
lakehouse = resolve_lakehouse_name(lakehouse_id,
|
|
74
|
+
lakehouse = resolve_lakehouse_name(lakehouse_id, workspace_id)
|
|
74
75
|
else:
|
|
75
|
-
lakehouse_id = resolve_lakehouse_id(lakehouse,
|
|
76
|
+
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
|
|
76
77
|
|
|
77
78
|
if count_rows: # Setting countrows defaults to extended=True
|
|
78
79
|
extended = True
|
|
@@ -104,7 +105,7 @@ def get_lakehouse_tables(
|
|
|
104
105
|
for r in responses:
|
|
105
106
|
for i in r.get("data", []):
|
|
106
107
|
new_data = {
|
|
107
|
-
"Workspace Name":
|
|
108
|
+
"Workspace Name": workspace_name,
|
|
108
109
|
"Lakehouse Name": lakehouse,
|
|
109
110
|
"Table Name": i.get("name"),
|
|
110
111
|
"Format": i.get("format"),
|
|
@@ -117,7 +118,7 @@ def get_lakehouse_tables(
|
|
|
117
118
|
df = pd.concat(dfs, ignore_index=True)
|
|
118
119
|
|
|
119
120
|
if extended:
|
|
120
|
-
sku_value = get_sku_size(
|
|
121
|
+
sku_value = get_sku_size(workspace_id)
|
|
121
122
|
guardrail = get_directlake_guardrails_for_sku(sku_value)
|
|
122
123
|
spark = SparkSession.builder.getOrCreate()
|
|
123
124
|
df["Files"] = None
|
|
@@ -182,7 +183,7 @@ def get_lakehouse_tables(
|
|
|
182
183
|
|
|
183
184
|
lakehouse_id = fabric.get_lakehouse_id()
|
|
184
185
|
lakehouse = resolve_lakehouse_name(
|
|
185
|
-
lakehouse_id=lakehouse_id, workspace=
|
|
186
|
+
lakehouse_id=lakehouse_id, workspace=workspace_id
|
|
186
187
|
)
|
|
187
188
|
lakeTName = "lakehouse_table_details"
|
|
188
189
|
lakeT_filt = df[df["Table Name"] == lakeTName]
|
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
import sempy.fabric as fabric
|
|
2
2
|
from tqdm.auto import tqdm
|
|
3
|
-
from sempy_labs._helper_functions import
|
|
3
|
+
from sempy_labs._helper_functions import (
|
|
4
|
+
resolve_lakehouse_name,
|
|
5
|
+
resolve_workspace_name_and_id,
|
|
6
|
+
)
|
|
4
7
|
from typing import List, Optional, Union
|
|
5
8
|
from sempy._utils._log import log
|
|
9
|
+
from uuid import UUID
|
|
6
10
|
|
|
7
11
|
|
|
8
12
|
def lakehouse_attached() -> bool:
|
|
@@ -29,7 +33,7 @@ def lakehouse_attached() -> bool:
|
|
|
29
33
|
def optimize_lakehouse_tables(
|
|
30
34
|
tables: Optional[Union[str, List[str]]] = None,
|
|
31
35
|
lakehouse: Optional[str] = None,
|
|
32
|
-
workspace: Optional[str] = None,
|
|
36
|
+
workspace: Optional[str | UUID] = None,
|
|
33
37
|
):
|
|
34
38
|
"""
|
|
35
39
|
Runs the `OPTIMIZE <https://docs.delta.io/latest/optimizations-oss.html>`_ function over the specified lakehouse tables.
|
|
@@ -42,8 +46,8 @@ def optimize_lakehouse_tables(
|
|
|
42
46
|
lakehouse : str, default=None
|
|
43
47
|
The Fabric lakehouse.
|
|
44
48
|
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
45
|
-
workspace : str, default=None
|
|
46
|
-
The Fabric workspace used by the lakehouse.
|
|
49
|
+
workspace : str | uuid.UUID, default=None
|
|
50
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
47
51
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
48
52
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
49
53
|
"""
|
|
@@ -52,13 +56,13 @@ def optimize_lakehouse_tables(
|
|
|
52
56
|
from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
|
|
53
57
|
from delta import DeltaTable
|
|
54
58
|
|
|
55
|
-
|
|
59
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
56
60
|
|
|
57
61
|
if lakehouse is None:
|
|
58
62
|
lakehouse_id = fabric.get_lakehouse_id()
|
|
59
|
-
lakehouse = resolve_lakehouse_name(lakehouse_id,
|
|
63
|
+
lakehouse = resolve_lakehouse_name(lakehouse_id, workspace_id)
|
|
60
64
|
|
|
61
|
-
lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=
|
|
65
|
+
lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace_id)
|
|
62
66
|
lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"]
|
|
63
67
|
|
|
64
68
|
if isinstance(tables, str):
|
|
@@ -83,7 +87,7 @@ def optimize_lakehouse_tables(
|
|
|
83
87
|
def vacuum_lakehouse_tables(
|
|
84
88
|
tables: Optional[Union[str, List[str]]] = None,
|
|
85
89
|
lakehouse: Optional[str] = None,
|
|
86
|
-
workspace: Optional[str] = None,
|
|
90
|
+
workspace: Optional[str | UUID] = None,
|
|
87
91
|
retain_n_hours: Optional[int] = None,
|
|
88
92
|
):
|
|
89
93
|
"""
|
|
@@ -96,8 +100,8 @@ def vacuum_lakehouse_tables(
|
|
|
96
100
|
lakehouse : str, default=None
|
|
97
101
|
The Fabric lakehouse.
|
|
98
102
|
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
99
|
-
workspace : str, default=None
|
|
100
|
-
The Fabric workspace used by the lakehouse.
|
|
103
|
+
workspace : str | uuid.UUID, default=None
|
|
104
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
101
105
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
102
106
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
103
107
|
retain_n_hours : int, default=None
|
|
@@ -111,13 +115,13 @@ def vacuum_lakehouse_tables(
|
|
|
111
115
|
from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
|
|
112
116
|
from delta import DeltaTable
|
|
113
117
|
|
|
114
|
-
|
|
118
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
115
119
|
|
|
116
120
|
if lakehouse is None:
|
|
117
121
|
lakehouse_id = fabric.get_lakehouse_id()
|
|
118
|
-
lakehouse = resolve_lakehouse_name(lakehouse_id,
|
|
122
|
+
lakehouse = resolve_lakehouse_name(lakehouse_id, workspace_id)
|
|
119
123
|
|
|
120
|
-
lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=
|
|
124
|
+
lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace_id)
|
|
121
125
|
lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"]
|
|
122
126
|
|
|
123
127
|
if isinstance(tables, str):
|
|
@@ -7,14 +7,15 @@ from sempy_labs._helper_functions import (
|
|
|
7
7
|
from typing import Optional
|
|
8
8
|
import sempy_labs._icons as icons
|
|
9
9
|
from sempy.fabric.exceptions import FabricHTTPException
|
|
10
|
+
from uuid import UUID
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
def create_shortcut_onelake(
|
|
13
14
|
table_name: str,
|
|
14
15
|
source_lakehouse: str,
|
|
15
|
-
source_workspace: str,
|
|
16
|
+
source_workspace: str | UUID,
|
|
16
17
|
destination_lakehouse: str,
|
|
17
|
-
destination_workspace: Optional[str] = None,
|
|
18
|
+
destination_workspace: Optional[str | UUID] = None,
|
|
18
19
|
shortcut_name: Optional[str] = None,
|
|
19
20
|
):
|
|
20
21
|
"""
|
|
@@ -28,28 +29,44 @@ def create_shortcut_onelake(
|
|
|
28
29
|
The table name for which a shortcut will be created.
|
|
29
30
|
source_lakehouse : str
|
|
30
31
|
The Fabric lakehouse in which the table resides.
|
|
31
|
-
source_workspace : str
|
|
32
|
-
The name of the Fabric workspace in which the source lakehouse exists.
|
|
32
|
+
source_workspace : str | uuid.UUID
|
|
33
|
+
The name or ID of the Fabric workspace in which the source lakehouse exists.
|
|
33
34
|
destination_lakehouse : str
|
|
34
35
|
The Fabric lakehouse in which the shortcut will be created.
|
|
35
|
-
destination_workspace : str, default=None
|
|
36
|
-
The name of the Fabric workspace in which the shortcut will be created.
|
|
36
|
+
destination_workspace : str | uuid.UUID, default=None
|
|
37
|
+
The name or ID of the Fabric workspace in which the shortcut will be created.
|
|
37
38
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
38
39
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
39
40
|
shortcut_name : str, default=None
|
|
40
41
|
The name of the shortcut 'table' to be created. This defaults to the 'table_name' parameter value.
|
|
41
42
|
"""
|
|
42
43
|
|
|
43
|
-
|
|
44
|
-
|
|
44
|
+
(source_workspace_name, source_workspace_id) = resolve_workspace_name_and_id(
|
|
45
|
+
source_workspace
|
|
46
|
+
)
|
|
47
|
+
source_lakehouse_id = resolve_lakehouse_id(source_lakehouse, source_workspace_id)
|
|
48
|
+
source_lakehouse_name = fabric.resolve_item_name(
|
|
49
|
+
item_id=source_lakehouse_id, type="Lakehouse", workspace=source_workspace_id
|
|
50
|
+
)
|
|
45
51
|
|
|
46
52
|
if destination_workspace is None:
|
|
47
|
-
|
|
53
|
+
destination_workspace_name = source_workspace_name
|
|
54
|
+
destination_workspace_id = source_workspace_id
|
|
55
|
+
else:
|
|
56
|
+
destination_workspace_name = destination_workspace
|
|
57
|
+
destination_workspace_id = fabric.resolve_workspace_id(
|
|
58
|
+
destination_workspace_name
|
|
59
|
+
)
|
|
48
60
|
|
|
49
|
-
|
|
50
|
-
|
|
61
|
+
destination_workspace_id = fabric.resolve_workspace_id(destination_workspace)
|
|
62
|
+
destination_lakehouse_id = resolve_lakehouse_id(
|
|
51
63
|
destination_lakehouse, destination_workspace
|
|
52
64
|
)
|
|
65
|
+
destination_lakehouse_name = fabric.resolve_item_name(
|
|
66
|
+
item_id=destination_lakehouse_id,
|
|
67
|
+
type="Lakehouse",
|
|
68
|
+
workspace=destination_workspace_id,
|
|
69
|
+
)
|
|
53
70
|
|
|
54
71
|
if shortcut_name is None:
|
|
55
72
|
shortcut_name = table_name
|
|
@@ -62,8 +79,8 @@ def create_shortcut_onelake(
|
|
|
62
79
|
"name": shortcut_name.replace(" ", ""),
|
|
63
80
|
"target": {
|
|
64
81
|
"oneLake": {
|
|
65
|
-
"workspaceId":
|
|
66
|
-
"itemId":
|
|
82
|
+
"workspaceId": source_workspace_id,
|
|
83
|
+
"itemId": source_lakehouse_id,
|
|
67
84
|
"path": tablePath,
|
|
68
85
|
}
|
|
69
86
|
},
|
|
@@ -71,13 +88,13 @@ def create_shortcut_onelake(
|
|
|
71
88
|
|
|
72
89
|
try:
|
|
73
90
|
response = client.post(
|
|
74
|
-
f"/v1/workspaces/{
|
|
91
|
+
f"/v1/workspaces/{destination_workspace_id}/items/{destination_lakehouse_id}/shortcuts",
|
|
75
92
|
json=request_body,
|
|
76
93
|
)
|
|
77
94
|
if response.status_code == 201:
|
|
78
95
|
print(
|
|
79
|
-
f"{icons.green_dot} The shortcut '{shortcut_name}' was created in the '{
|
|
80
|
-
f" the '{
|
|
96
|
+
f"{icons.green_dot} The shortcut '{shortcut_name}' was created in the '{destination_lakehouse_name}' lakehouse within"
|
|
97
|
+
f" the '{destination_workspace_name} workspace. It is based on the '{table_name}' table in the '{source_lakehouse_name}' lakehouse within the '{source_workspace_name}' workspace."
|
|
81
98
|
)
|
|
82
99
|
else:
|
|
83
100
|
print(response.status_code)
|
|
@@ -166,7 +183,9 @@ def create_shortcut(
|
|
|
166
183
|
|
|
167
184
|
|
|
168
185
|
def delete_shortcut(
|
|
169
|
-
shortcut_name: str,
|
|
186
|
+
shortcut_name: str,
|
|
187
|
+
lakehouse: Optional[str] = None,
|
|
188
|
+
workspace: Optional[str | UUID] = None,
|
|
170
189
|
):
|
|
171
190
|
"""
|
|
172
191
|
Deletes a shortcut.
|
|
@@ -180,19 +199,19 @@ def delete_shortcut(
|
|
|
180
199
|
lakehouse : str, default=None
|
|
181
200
|
The Fabric lakehouse name in which the shortcut resides.
|
|
182
201
|
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
183
|
-
workspace : str, default=None
|
|
184
|
-
The name of the Fabric workspace in which lakehouse resides.
|
|
202
|
+
workspace : str | UUID, default=None
|
|
203
|
+
The name or ID of the Fabric workspace in which lakehouse resides.
|
|
185
204
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
186
205
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
187
206
|
"""
|
|
188
207
|
|
|
189
|
-
(
|
|
208
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
190
209
|
|
|
191
210
|
if lakehouse is None:
|
|
192
211
|
lakehouse_id = fabric.get_lakehouse_id()
|
|
193
|
-
lakehouse = resolve_lakehouse_name(lakehouse_id,
|
|
212
|
+
lakehouse = resolve_lakehouse_name(lakehouse_id, workspace_id)
|
|
194
213
|
else:
|
|
195
|
-
lakehouse_id = resolve_lakehouse_id(lakehouse,
|
|
214
|
+
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
|
|
196
215
|
|
|
197
216
|
client = fabric.FabricRestClient()
|
|
198
217
|
response = client.delete(
|
|
@@ -202,5 +221,5 @@ def delete_shortcut(
|
|
|
202
221
|
if response.status_code != 200:
|
|
203
222
|
raise FabricHTTPException(response)
|
|
204
223
|
print(
|
|
205
|
-
f"{icons.green_dot} The '{shortcut_name}' shortcut in the '{lakehouse}' within the '{
|
|
224
|
+
f"{icons.green_dot} The '{shortcut_name}' shortcut in the '{lakehouse}' within the '{workspace_name}' workspace has been deleted."
|
|
206
225
|
)
|