semantic-link-labs 0.8.10__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.9.0.dist-info}/METADATA +6 -5
- {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.9.0.dist-info}/RECORD +81 -80
- {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.9.0.dist-info}/WHEEL +1 -1
- sempy_labs/__init__.py +34 -3
- sempy_labs/_authentication.py +80 -4
- sempy_labs/_capacities.py +770 -200
- sempy_labs/_capacity_migration.py +7 -37
- sempy_labs/_clear_cache.py +37 -35
- sempy_labs/_connections.py +13 -13
- sempy_labs/_data_pipelines.py +20 -20
- sempy_labs/_dataflows.py +27 -28
- sempy_labs/_dax.py +41 -47
- sempy_labs/_deployment_pipelines.py +1 -1
- sempy_labs/_environments.py +26 -23
- sempy_labs/_eventhouses.py +16 -15
- sempy_labs/_eventstreams.py +16 -15
- sempy_labs/_external_data_shares.py +18 -20
- sempy_labs/_gateways.py +16 -14
- sempy_labs/_generate_semantic_model.py +107 -62
- sempy_labs/_git.py +105 -43
- sempy_labs/_helper_functions.py +251 -194
- sempy_labs/_job_scheduler.py +227 -0
- sempy_labs/_kql_databases.py +16 -15
- sempy_labs/_kql_querysets.py +16 -15
- sempy_labs/_list_functions.py +150 -126
- sempy_labs/_managed_private_endpoints.py +19 -17
- sempy_labs/_mirrored_databases.py +51 -48
- sempy_labs/_mirrored_warehouses.py +5 -4
- sempy_labs/_ml_experiments.py +16 -15
- sempy_labs/_ml_models.py +15 -14
- sempy_labs/_model_bpa.py +210 -207
- sempy_labs/_model_bpa_bulk.py +2 -2
- sempy_labs/_model_bpa_rules.py +3 -3
- sempy_labs/_model_dependencies.py +55 -29
- sempy_labs/_notebooks.py +29 -25
- sempy_labs/_one_lake_integration.py +23 -26
- sempy_labs/_query_scale_out.py +75 -64
- sempy_labs/_refresh_semantic_model.py +25 -26
- sempy_labs/_spark.py +33 -32
- sempy_labs/_sql.py +19 -12
- sempy_labs/_translations.py +10 -7
- sempy_labs/_vertipaq.py +38 -33
- sempy_labs/_warehouses.py +26 -25
- sempy_labs/_workspace_identity.py +11 -10
- sempy_labs/_workspaces.py +40 -33
- sempy_labs/admin/_basic_functions.py +166 -115
- sempy_labs/admin/_domains.py +7 -2
- sempy_labs/admin/_external_data_share.py +3 -3
- sempy_labs/admin/_git.py +4 -1
- sempy_labs/admin/_items.py +11 -6
- sempy_labs/admin/_scanner.py +10 -5
- sempy_labs/directlake/_directlake_schema_compare.py +25 -16
- sempy_labs/directlake/_directlake_schema_sync.py +24 -12
- sempy_labs/directlake/_dl_helper.py +74 -55
- sempy_labs/directlake/_generate_shared_expression.py +10 -9
- sempy_labs/directlake/_get_directlake_lakehouse.py +32 -36
- sempy_labs/directlake/_get_shared_expression.py +4 -3
- sempy_labs/directlake/_guardrails.py +12 -6
- sempy_labs/directlake/_list_directlake_model_calc_tables.py +15 -9
- sempy_labs/directlake/_show_unsupported_directlake_objects.py +16 -10
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +35 -31
- sempy_labs/directlake/_update_directlake_partition_entity.py +39 -31
- sempy_labs/directlake/_warm_cache.py +87 -65
- sempy_labs/lakehouse/_get_lakehouse_columns.py +23 -26
- sempy_labs/lakehouse/_get_lakehouse_tables.py +27 -38
- sempy_labs/lakehouse/_lakehouse.py +7 -20
- sempy_labs/lakehouse/_shortcuts.py +42 -23
- sempy_labs/migration/_create_pqt_file.py +16 -11
- sempy_labs/migration/_refresh_calc_tables.py +16 -10
- sempy_labs/report/_download_report.py +9 -8
- sempy_labs/report/_generate_report.py +85 -44
- sempy_labs/report/_paginated.py +9 -9
- sempy_labs/report/_report_bpa.py +15 -11
- sempy_labs/report/_report_functions.py +80 -91
- sempy_labs/report/_report_helper.py +8 -4
- sempy_labs/report/_report_list_functions.py +24 -13
- sempy_labs/report/_report_rebind.py +17 -16
- sempy_labs/report/_reportwrapper.py +41 -33
- sempy_labs/tom/_model.py +139 -21
- {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.9.0.dist-info}/LICENSE +0 -0
- {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.9.0.dist-info}/top_level.txt +0 -0
|
@@ -3,34 +3,39 @@ import pandas as pd
|
|
|
3
3
|
from tqdm.auto import tqdm
|
|
4
4
|
import numpy as np
|
|
5
5
|
import time
|
|
6
|
-
from sempy_labs._helper_functions import
|
|
6
|
+
from sempy_labs._helper_functions import (
|
|
7
|
+
format_dax_object_name,
|
|
8
|
+
resolve_dataset_name_and_id,
|
|
9
|
+
resolve_workspace_name_and_id,
|
|
10
|
+
)
|
|
7
11
|
from sempy_labs._refresh_semantic_model import refresh_semantic_model
|
|
8
12
|
from sempy_labs._model_dependencies import get_measure_dependencies
|
|
9
13
|
from typing import Optional
|
|
10
14
|
from sempy._utils._log import log
|
|
11
15
|
import sempy_labs._icons as icons
|
|
16
|
+
from uuid import UUID
|
|
12
17
|
|
|
13
18
|
|
|
14
19
|
@log
|
|
15
20
|
def warm_direct_lake_cache_perspective(
|
|
16
|
-
dataset: str,
|
|
21
|
+
dataset: str | UUID,
|
|
17
22
|
perspective: str,
|
|
18
23
|
add_dependencies: bool = False,
|
|
19
|
-
workspace: Optional[str] = None,
|
|
24
|
+
workspace: Optional[str | UUID] = None,
|
|
20
25
|
) -> pd.DataFrame:
|
|
21
26
|
"""
|
|
22
27
|
Warms the cache of a Direct Lake semantic model by running a simple DAX query against the columns in a perspective.
|
|
23
28
|
|
|
24
29
|
Parameters
|
|
25
30
|
----------
|
|
26
|
-
dataset : str
|
|
27
|
-
Name of the semantic model.
|
|
31
|
+
dataset : str | uuid.UUID
|
|
32
|
+
Name or ID of the semantic model.
|
|
28
33
|
perspective : str
|
|
29
34
|
Name of the perspective which contains objects to be used for warming the cache.
|
|
30
35
|
add_dependencies : bool, default=False
|
|
31
36
|
Includes object dependencies in the cache warming process.
|
|
32
|
-
workspace : str, default=None
|
|
33
|
-
The Fabric workspace name.
|
|
37
|
+
workspace : str | uuid.UUID, default=None
|
|
38
|
+
The Fabric workspace name or ID.
|
|
34
39
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
35
40
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
36
41
|
|
|
@@ -40,15 +45,16 @@ def warm_direct_lake_cache_perspective(
|
|
|
40
45
|
Returns a pandas dataframe showing the columns that have been put into memory.
|
|
41
46
|
"""
|
|
42
47
|
|
|
43
|
-
|
|
48
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
49
|
+
(dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
|
|
44
50
|
|
|
45
|
-
dfP = fabric.list_partitions(dataset=
|
|
46
|
-
if not any(r["Mode"] == "DirectLake" for
|
|
51
|
+
dfP = fabric.list_partitions(dataset=dataset_id, workspace=workspace_id)
|
|
52
|
+
if not any(r["Mode"] == "DirectLake" for _, r in dfP.iterrows()):
|
|
47
53
|
raise ValueError(
|
|
48
|
-
f"{icons.red_dot} The '{
|
|
54
|
+
f"{icons.red_dot} The '{dataset_name}' semantic model in the '{workspace_name}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode."
|
|
49
55
|
)
|
|
50
56
|
|
|
51
|
-
dfPersp = fabric.list_perspectives(dataset=
|
|
57
|
+
dfPersp = fabric.list_perspectives(dataset=dataset_id, workspace=workspace_id)
|
|
52
58
|
dfPersp["DAX Object Name"] = format_dax_object_name(
|
|
53
59
|
dfPersp["Table Name"], dfPersp["Object Name"]
|
|
54
60
|
)
|
|
@@ -65,7 +71,7 @@ def warm_direct_lake_cache_perspective(
|
|
|
65
71
|
|
|
66
72
|
if add_dependencies:
|
|
67
73
|
# Measure dependencies
|
|
68
|
-
md = get_measure_dependencies(
|
|
74
|
+
md = get_measure_dependencies(dataset_id, workspace_id)
|
|
69
75
|
md["Referenced Full Object"] = format_dax_object_name(
|
|
70
76
|
md["Referenced Table"], md["Referenced Object"]
|
|
71
77
|
)
|
|
@@ -78,7 +84,7 @@ def warm_direct_lake_cache_perspective(
|
|
|
78
84
|
|
|
79
85
|
# Hierarchy dependencies
|
|
80
86
|
dfPersp_h = dfPersp_filt[(dfPersp_filt["Object Type"] == "Hierarchy")]
|
|
81
|
-
dfH = fabric.list_hierarchies(dataset=
|
|
87
|
+
dfH = fabric.list_hierarchies(dataset=dataset_id, workspace=workspace_id)
|
|
82
88
|
dfH["Hierarchy Object"] = format_dax_object_name(
|
|
83
89
|
dfH["Table Name"], dfH["Hierarchy Name"]
|
|
84
90
|
)
|
|
@@ -92,7 +98,7 @@ def warm_direct_lake_cache_perspective(
|
|
|
92
98
|
|
|
93
99
|
# Relationship dependencies
|
|
94
100
|
unique_table_names = dfPersp_filt["Table Name"].unique()
|
|
95
|
-
dfR = fabric.list_relationships(dataset=
|
|
101
|
+
dfR = fabric.list_relationships(dataset=dataset_id, workspace=workspace_id)
|
|
96
102
|
dfR["From Object"] = format_dax_object_name(
|
|
97
103
|
dfR["From Table"], dfR["From Column"]
|
|
98
104
|
)
|
|
@@ -120,41 +126,22 @@ def warm_direct_lake_cache_perspective(
|
|
|
120
126
|
df["Table Name"] = df["Table Name"].str[1:-1]
|
|
121
127
|
df["Column Name"] = df["Column Name"].str[0:-1]
|
|
122
128
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
for tableName in (bar := tqdm(tbls)):
|
|
126
|
-
filtered_list = [
|
|
127
|
-
value for value in merged_list_unique if value.startswith(f"{tableName}[")
|
|
128
|
-
]
|
|
129
|
-
bar.set_description(f"Warming the '{tableName}' table...")
|
|
130
|
-
css = ",".join(map(str, filtered_list))
|
|
131
|
-
dax = """EVALUATE TOPN(1,SUMMARIZECOLUMNS(""" + css + "))" ""
|
|
132
|
-
fabric.evaluate_dax(dataset=dataset, dax_string=dax, workspace=workspace)
|
|
133
|
-
|
|
134
|
-
print(f"{icons.green_dot} The following columns have been put into memory:")
|
|
135
|
-
|
|
136
|
-
new_column_order = ["Table Name", "Column Name", "DAX Object Name"]
|
|
137
|
-
df = df.reindex(columns=new_column_order)
|
|
138
|
-
df = df[["Table Name", "Column Name"]].sort_values(
|
|
139
|
-
by=["Table Name", "Column Name"], ascending=True
|
|
140
|
-
)
|
|
141
|
-
|
|
142
|
-
return df
|
|
129
|
+
return _put_columns_into_memory(dataset=dataset, workspace=workspace, col_df=df)
|
|
143
130
|
|
|
144
131
|
|
|
145
132
|
@log
|
|
146
133
|
def warm_direct_lake_cache_isresident(
|
|
147
|
-
dataset: str, workspace: Optional[str] = None
|
|
134
|
+
dataset: str | UUID, workspace: Optional[str | UUID] = None
|
|
148
135
|
) -> pd.DataFrame:
|
|
149
136
|
"""
|
|
150
137
|
Performs a refresh on the semantic model and puts the columns which were in memory prior to the refresh back into memory.
|
|
151
138
|
|
|
152
139
|
Parameters
|
|
153
140
|
----------
|
|
154
|
-
dataset : str
|
|
155
|
-
Name of the semantic model.
|
|
156
|
-
workspace : str, default=None
|
|
157
|
-
The Fabric workspace name.
|
|
141
|
+
dataset : str | uuid.UUID
|
|
142
|
+
Name or ID of the semantic model.
|
|
143
|
+
workspace : str | uuid.UUID, default=None
|
|
144
|
+
The Fabric workspace name or ID.
|
|
158
145
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
159
146
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
160
147
|
|
|
@@ -164,46 +151,81 @@ def warm_direct_lake_cache_isresident(
|
|
|
164
151
|
Returns a pandas dataframe showing the columns that have been put into memory.
|
|
165
152
|
"""
|
|
166
153
|
|
|
167
|
-
|
|
154
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
155
|
+
(dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
|
|
168
156
|
|
|
169
|
-
dfP = fabric.list_partitions(dataset=
|
|
170
|
-
if not any(r["Mode"] == "DirectLake" for
|
|
157
|
+
dfP = fabric.list_partitions(dataset=dataset_id, workspace=workspace_id)
|
|
158
|
+
if not any(r["Mode"] == "DirectLake" for _, r in dfP.iterrows()):
|
|
171
159
|
raise ValueError(
|
|
172
|
-
f"{icons.red_dot} The '{
|
|
160
|
+
f"{icons.red_dot} The '{dataset_name}' semantic model in the '{workspace_name}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode."
|
|
173
161
|
)
|
|
174
162
|
|
|
175
163
|
# Identify columns which are currently in memory (Is Resident = True)
|
|
176
|
-
dfC = fabric.list_columns(dataset=
|
|
177
|
-
dfC["DAX Object Name"] = format_dax_object_name(
|
|
178
|
-
dfC["Table Name"], dfC["Column Name"]
|
|
179
|
-
)
|
|
164
|
+
dfC = fabric.list_columns(dataset=dataset_id, workspace=workspace_id, extended=True)
|
|
180
165
|
dfC_filtered = dfC[dfC["Is Resident"] == True]
|
|
181
166
|
|
|
182
167
|
if len(dfC_filtered) == 0:
|
|
183
168
|
raise ValueError(
|
|
184
|
-
f"{icons.yellow_dot} At present, no columns are in memory in the '{
|
|
169
|
+
f"{icons.yellow_dot} At present, no columns are in memory in the '{dataset_name}' semantic model in the '{workspace_name}' workspace."
|
|
185
170
|
)
|
|
186
171
|
|
|
187
172
|
# Refresh/frame dataset
|
|
188
|
-
refresh_semantic_model(
|
|
173
|
+
refresh_semantic_model(
|
|
174
|
+
dataset=dataset_id, refresh_type="full", workspace=workspace_id
|
|
175
|
+
)
|
|
189
176
|
time.sleep(2)
|
|
190
177
|
|
|
191
|
-
|
|
192
|
-
|
|
178
|
+
return _put_columns_into_memory(
|
|
179
|
+
dataset=dataset, workspace=workspace, col_df=dfC_filtered
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _put_columns_into_memory(dataset, workspace, col_df, return_dataframe: bool = True):
|
|
184
|
+
|
|
185
|
+
row_limit = 1000000
|
|
186
|
+
|
|
187
|
+
dfT = fabric.list_tables(dataset=dataset, workspace=workspace, extended=True)
|
|
188
|
+
col_df = col_df.copy()
|
|
189
|
+
|
|
190
|
+
col_df["DAX Object"] = format_dax_object_name(
|
|
191
|
+
col_df["Table Name"], col_df["Column Name"]
|
|
192
|
+
)
|
|
193
|
+
tbls = col_df["Table Name"].unique()
|
|
194
|
+
|
|
193
195
|
for table_name in (bar := tqdm(tbls)):
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
.
|
|
198
|
-
.
|
|
196
|
+
dfT_filt = dfT[dfT["Name"] == table_name]
|
|
197
|
+
col_df_filt = col_df[col_df["Table Name"] == table_name]
|
|
198
|
+
if not dfT_filt.empty:
|
|
199
|
+
row_count = dfT_filt["Row Count"].iloc[0]
|
|
200
|
+
bar.set_description(f"Warming the '{table_name}' table...")
|
|
201
|
+
if row_count < row_limit:
|
|
202
|
+
columns = col_df_filt["DAX Object"].tolist()
|
|
203
|
+
css = ", ".join(columns)
|
|
204
|
+
dax = f"EVALUATE TOPN(1, SELECTCOLUMNS('{table_name}', {css}))"
|
|
205
|
+
fabric.evaluate_dax(
|
|
206
|
+
dataset=dataset, dax_string=dax, workspace=workspace
|
|
207
|
+
)
|
|
208
|
+
else:
|
|
209
|
+
for _, r in col_df_filt.iterrows():
|
|
210
|
+
dax_object = r["DAX Object"]
|
|
211
|
+
dax = f"""EVALUATE TOPN(1, SELECTCOLUMNS('{table_name}', {dax_object}))"""
|
|
212
|
+
fabric.evaluate_dax(
|
|
213
|
+
dataset=dataset, dax_string=dax, workspace=workspace
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
if return_dataframe:
|
|
217
|
+
print(
|
|
218
|
+
f"{icons.green_dot} The following columns have been put into memory. Temperature indicates the current column temperature."
|
|
199
219
|
)
|
|
200
|
-
dax = f"""EVALUATE TOPN(1,SUMMARIZECOLUMNS({css}))"""
|
|
201
|
-
fabric.evaluate_dax(dataset=dataset, dax_string=dax, workspace=workspace)
|
|
202
220
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
221
|
+
dfC = fabric.list_columns(dataset=dataset, workspace=workspace, extended=True)
|
|
222
|
+
dfC["DAX Object"] = format_dax_object_name(
|
|
223
|
+
dfC["Table Name"], dfC["Column Name"]
|
|
224
|
+
)
|
|
225
|
+
dfC_filt = dfC[dfC["DAX Object"].isin(col_df["DAX Object"].values)]
|
|
206
226
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
227
|
+
return (
|
|
228
|
+
dfC_filt[["Table Name", "Column Name", "Is Resident", "Temperature"]]
|
|
229
|
+
.sort_values(by=["Table Name", "Column Name"], ascending=True)
|
|
230
|
+
.reset_index(drop=True)
|
|
231
|
+
)
|
|
@@ -1,29 +1,29 @@
|
|
|
1
|
-
import sempy.fabric as fabric
|
|
2
1
|
import pandas as pd
|
|
3
2
|
from pyspark.sql import SparkSession
|
|
4
3
|
from sempy_labs._helper_functions import (
|
|
5
|
-
resolve_lakehouse_name,
|
|
6
4
|
format_dax_object_name,
|
|
7
|
-
|
|
5
|
+
resolve_workspace_name_and_id,
|
|
6
|
+
resolve_lakehouse_name_and_id,
|
|
8
7
|
)
|
|
9
8
|
from typing import Optional
|
|
10
9
|
from sempy._utils._log import log
|
|
10
|
+
from uuid import UUID
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
@log
|
|
14
14
|
def get_lakehouse_columns(
|
|
15
|
-
lakehouse: Optional[str] = None, workspace: Optional[str] = None
|
|
15
|
+
lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
|
|
16
16
|
) -> pd.DataFrame:
|
|
17
17
|
"""
|
|
18
18
|
Shows the tables and columns of a lakehouse and their respective properties.
|
|
19
19
|
|
|
20
20
|
Parameters
|
|
21
21
|
----------
|
|
22
|
-
lakehouse : str, default=None
|
|
23
|
-
The Fabric lakehouse.
|
|
22
|
+
lakehouse : str | uuid.UUID, default=None
|
|
23
|
+
The Fabric lakehouse name or ID.
|
|
24
24
|
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
25
|
-
lakehouse_workspace : str, default=None
|
|
26
|
-
The Fabric workspace used by the lakehouse.
|
|
25
|
+
lakehouse_workspace : str | uuid.UUID, default=None
|
|
26
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
27
27
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
28
28
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
29
29
|
|
|
@@ -46,35 +46,32 @@ def get_lakehouse_columns(
|
|
|
46
46
|
]
|
|
47
47
|
)
|
|
48
48
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
|
|
54
|
-
else:
|
|
55
|
-
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
|
|
49
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
50
|
+
(lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
|
|
51
|
+
lakehouse=lakehouse, workspace=workspace_id
|
|
52
|
+
)
|
|
56
53
|
|
|
57
54
|
spark = SparkSession.builder.getOrCreate()
|
|
58
55
|
|
|
59
56
|
tables = get_lakehouse_tables(
|
|
60
|
-
lakehouse=
|
|
57
|
+
lakehouse=lakehouse_id, workspace=workspace_id, extended=False, count_rows=False
|
|
61
58
|
)
|
|
62
59
|
tables_filt = tables[tables["Format"] == "delta"]
|
|
63
60
|
|
|
64
|
-
for
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
delta_table = DeltaTable.forPath(spark,
|
|
61
|
+
for _, r in tables_filt.iterrows():
|
|
62
|
+
table_name = r["Table Name"]
|
|
63
|
+
path = r["Location"]
|
|
64
|
+
delta_table = DeltaTable.forPath(spark, path)
|
|
68
65
|
sparkdf = delta_table.toDF()
|
|
69
66
|
|
|
70
|
-
for
|
|
71
|
-
|
|
67
|
+
for col_name, data_type in sparkdf.dtypes:
|
|
68
|
+
full_column_name = format_dax_object_name(table_name, col_name)
|
|
72
69
|
new_data = {
|
|
73
|
-
"Workspace Name":
|
|
70
|
+
"Workspace Name": workspace_name,
|
|
74
71
|
"Lakehouse Name": lakehouse,
|
|
75
|
-
"Table Name":
|
|
76
|
-
"Column Name":
|
|
77
|
-
"Full Column Name":
|
|
72
|
+
"Table Name": table_name,
|
|
73
|
+
"Column Name": col_name,
|
|
74
|
+
"Full Column Name": full_column_name,
|
|
78
75
|
"Data Type": data_type,
|
|
79
76
|
}
|
|
80
77
|
df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
|
|
@@ -4,10 +4,11 @@ from pyspark.sql import SparkSession
|
|
|
4
4
|
import pyarrow.parquet as pq
|
|
5
5
|
import datetime
|
|
6
6
|
from sempy_labs._helper_functions import (
|
|
7
|
-
|
|
8
|
-
resolve_lakehouse_name,
|
|
7
|
+
_get_column_aggregate,
|
|
9
8
|
resolve_workspace_name_and_id,
|
|
9
|
+
resolve_lakehouse_name_and_id,
|
|
10
10
|
pagination,
|
|
11
|
+
save_as_delta_table,
|
|
11
12
|
)
|
|
12
13
|
from sempy_labs.directlake._guardrails import (
|
|
13
14
|
get_sku_size,
|
|
@@ -18,12 +19,13 @@ from typing import Optional
|
|
|
18
19
|
import sempy_labs._icons as icons
|
|
19
20
|
from sempy._utils._log import log
|
|
20
21
|
from sempy.fabric.exceptions import FabricHTTPException
|
|
22
|
+
from uuid import UUID
|
|
21
23
|
|
|
22
24
|
|
|
23
25
|
@log
|
|
24
26
|
def get_lakehouse_tables(
|
|
25
|
-
lakehouse: Optional[str] = None,
|
|
26
|
-
workspace: Optional[str] = None,
|
|
27
|
+
lakehouse: Optional[str | UUID] = None,
|
|
28
|
+
workspace: Optional[str | UUID] = None,
|
|
27
29
|
extended: bool = False,
|
|
28
30
|
count_rows: bool = False,
|
|
29
31
|
export: bool = False,
|
|
@@ -35,11 +37,11 @@ def get_lakehouse_tables(
|
|
|
35
37
|
|
|
36
38
|
Parameters
|
|
37
39
|
----------
|
|
38
|
-
lakehouse : str, default=None
|
|
39
|
-
The Fabric lakehouse.
|
|
40
|
+
lakehouse : str | uuid.UUID, default=None
|
|
41
|
+
The Fabric lakehouse name or ID.
|
|
40
42
|
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
41
|
-
workspace : str, default=None
|
|
42
|
-
The Fabric workspace used by the lakehouse.
|
|
43
|
+
workspace : str | uuid.UUID, default=None
|
|
44
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
43
45
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
44
46
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
45
47
|
extended : bool, default=False
|
|
@@ -66,13 +68,10 @@ def get_lakehouse_tables(
|
|
|
66
68
|
]
|
|
67
69
|
)
|
|
68
70
|
|
|
69
|
-
(
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
|
|
74
|
-
else:
|
|
75
|
-
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
|
|
71
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
72
|
+
(lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
|
|
73
|
+
lakehouse=lakehouse, workspace=workspace_id
|
|
74
|
+
)
|
|
76
75
|
|
|
77
76
|
if count_rows: # Setting countrows defaults to extended=True
|
|
78
77
|
extended = True
|
|
@@ -104,8 +103,8 @@ def get_lakehouse_tables(
|
|
|
104
103
|
for r in responses:
|
|
105
104
|
for i in r.get("data", []):
|
|
106
105
|
new_data = {
|
|
107
|
-
"Workspace Name":
|
|
108
|
-
"Lakehouse Name":
|
|
106
|
+
"Workspace Name": workspace_name,
|
|
107
|
+
"Lakehouse Name": lakehouse_name,
|
|
109
108
|
"Table Name": i.get("name"),
|
|
110
109
|
"Format": i.get("format"),
|
|
111
110
|
"Type": i.get("type"),
|
|
@@ -117,7 +116,7 @@ def get_lakehouse_tables(
|
|
|
117
116
|
df = pd.concat(dfs, ignore_index=True)
|
|
118
117
|
|
|
119
118
|
if extended:
|
|
120
|
-
sku_value = get_sku_size(
|
|
119
|
+
sku_value = get_sku_size(workspace_id)
|
|
121
120
|
guardrail = get_directlake_guardrails_for_sku(sku_value)
|
|
122
121
|
spark = SparkSession.builder.getOrCreate()
|
|
123
122
|
df["Files"] = None
|
|
@@ -178,23 +177,17 @@ def get_lakehouse_tables(
|
|
|
178
177
|
f"{icons.red_dot} In order to save the report.json file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
|
|
179
178
|
)
|
|
180
179
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
lakehouse_id = fabric.get_lakehouse_id()
|
|
184
|
-
lakehouse = resolve_lakehouse_name(
|
|
185
|
-
lakehouse_id=lakehouse_id, workspace=workspace
|
|
186
|
-
)
|
|
180
|
+
(current_lakehouse_name, current_lakehouse_id) = resolve_lakehouse_name_and_id()
|
|
187
181
|
lakeTName = "lakehouse_table_details"
|
|
188
182
|
lakeT_filt = df[df["Table Name"] == lakeTName]
|
|
189
183
|
|
|
190
|
-
query = f"SELECT MAX(RunId) FROM {lakehouse}.{lakeTName}"
|
|
191
|
-
|
|
192
184
|
if len(lakeT_filt) == 0:
|
|
193
|
-
|
|
185
|
+
run_id = 1
|
|
194
186
|
else:
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
187
|
+
max_run_id = _get_column_aggregate(
|
|
188
|
+
lakehouse=current_lakehouse_name, table_name=lakeTName
|
|
189
|
+
)
|
|
190
|
+
run_id = max_run_id + 1
|
|
198
191
|
|
|
199
192
|
export_df = df.copy()
|
|
200
193
|
|
|
@@ -239,15 +232,11 @@ def get_lakehouse_tables(
|
|
|
239
232
|
print(
|
|
240
233
|
f"{icons.in_progress} Saving Lakehouse table properties to the '{lakeTName}' table in the lakehouse...\n"
|
|
241
234
|
)
|
|
242
|
-
|
|
243
|
-
export_df["
|
|
244
|
-
export_df["RunId"] = runId
|
|
235
|
+
export_df["Timestamp"] = datetime.datetime.now()
|
|
236
|
+
export_df["RunId"] = run_id
|
|
245
237
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
spark_df.write.mode("append").format("delta").saveAsTable(lakeTName)
|
|
249
|
-
print(
|
|
250
|
-
f"{icons.bullet} Lakehouse table properties have been saved to the '{lakeTName}' delta table."
|
|
238
|
+
save_as_delta_table(
|
|
239
|
+
dataframe=export_df, delta_table_name=lakeTName, write_mode="append"
|
|
251
240
|
)
|
|
252
241
|
|
|
253
242
|
return df
|
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
import sempy.fabric as fabric
|
|
2
1
|
from tqdm.auto import tqdm
|
|
3
|
-
from sempy_labs._helper_functions import resolve_lakehouse_name
|
|
4
2
|
from typing import List, Optional, Union
|
|
5
3
|
from sempy._utils._log import log
|
|
4
|
+
from uuid import UUID
|
|
6
5
|
|
|
7
6
|
|
|
8
7
|
def lakehouse_attached() -> bool:
|
|
@@ -29,7 +28,7 @@ def lakehouse_attached() -> bool:
|
|
|
29
28
|
def optimize_lakehouse_tables(
|
|
30
29
|
tables: Optional[Union[str, List[str]]] = None,
|
|
31
30
|
lakehouse: Optional[str] = None,
|
|
32
|
-
workspace: Optional[str] = None,
|
|
31
|
+
workspace: Optional[str | UUID] = None,
|
|
33
32
|
):
|
|
34
33
|
"""
|
|
35
34
|
Runs the `OPTIMIZE <https://docs.delta.io/latest/optimizations-oss.html>`_ function over the specified lakehouse tables.
|
|
@@ -42,8 +41,8 @@ def optimize_lakehouse_tables(
|
|
|
42
41
|
lakehouse : str, default=None
|
|
43
42
|
The Fabric lakehouse.
|
|
44
43
|
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
45
|
-
workspace : str, default=None
|
|
46
|
-
The Fabric workspace used by the lakehouse.
|
|
44
|
+
workspace : str | uuid.UUID, default=None
|
|
45
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
47
46
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
48
47
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
49
48
|
"""
|
|
@@ -52,12 +51,6 @@ def optimize_lakehouse_tables(
|
|
|
52
51
|
from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
|
|
53
52
|
from delta import DeltaTable
|
|
54
53
|
|
|
55
|
-
workspace = fabric.resolve_workspace_name(workspace)
|
|
56
|
-
|
|
57
|
-
if lakehouse is None:
|
|
58
|
-
lakehouse_id = fabric.get_lakehouse_id()
|
|
59
|
-
lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
|
|
60
|
-
|
|
61
54
|
lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
|
|
62
55
|
lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"]
|
|
63
56
|
|
|
@@ -83,7 +76,7 @@ def optimize_lakehouse_tables(
|
|
|
83
76
|
def vacuum_lakehouse_tables(
|
|
84
77
|
tables: Optional[Union[str, List[str]]] = None,
|
|
85
78
|
lakehouse: Optional[str] = None,
|
|
86
|
-
workspace: Optional[str] = None,
|
|
79
|
+
workspace: Optional[str | UUID] = None,
|
|
87
80
|
retain_n_hours: Optional[int] = None,
|
|
88
81
|
):
|
|
89
82
|
"""
|
|
@@ -96,8 +89,8 @@ def vacuum_lakehouse_tables(
|
|
|
96
89
|
lakehouse : str, default=None
|
|
97
90
|
The Fabric lakehouse.
|
|
98
91
|
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
99
|
-
workspace : str, default=None
|
|
100
|
-
The Fabric workspace used by the lakehouse.
|
|
92
|
+
workspace : str | uuid.UUID, default=None
|
|
93
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
101
94
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
102
95
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
103
96
|
retain_n_hours : int, default=None
|
|
@@ -111,12 +104,6 @@ def vacuum_lakehouse_tables(
|
|
|
111
104
|
from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
|
|
112
105
|
from delta import DeltaTable
|
|
113
106
|
|
|
114
|
-
workspace = fabric.resolve_workspace_name(workspace)
|
|
115
|
-
|
|
116
|
-
if lakehouse is None:
|
|
117
|
-
lakehouse_id = fabric.get_lakehouse_id()
|
|
118
|
-
lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
|
|
119
|
-
|
|
120
107
|
lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
|
|
121
108
|
lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"]
|
|
122
109
|
|