semantic-link-labs 0.8.10__py3-none-any.whl → 0.8.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.8.11.dist-info}/METADATA +3 -2
- {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.8.11.dist-info}/RECORD +73 -72
- sempy_labs/__init__.py +6 -2
- sempy_labs/_clear_cache.py +39 -37
- sempy_labs/_connections.py +13 -13
- sempy_labs/_data_pipelines.py +20 -20
- sempy_labs/_dataflows.py +27 -28
- sempy_labs/_dax.py +41 -47
- sempy_labs/_environments.py +26 -23
- sempy_labs/_eventhouses.py +16 -15
- sempy_labs/_eventstreams.py +16 -15
- sempy_labs/_external_data_shares.py +18 -20
- sempy_labs/_gateways.py +14 -14
- sempy_labs/_generate_semantic_model.py +99 -62
- sempy_labs/_git.py +105 -43
- sempy_labs/_helper_functions.py +148 -131
- sempy_labs/_job_scheduler.py +92 -0
- sempy_labs/_kql_databases.py +16 -15
- sempy_labs/_kql_querysets.py +16 -15
- sempy_labs/_list_functions.py +114 -99
- sempy_labs/_managed_private_endpoints.py +19 -17
- sempy_labs/_mirrored_databases.py +51 -48
- sempy_labs/_mirrored_warehouses.py +5 -4
- sempy_labs/_ml_experiments.py +16 -15
- sempy_labs/_ml_models.py +15 -14
- sempy_labs/_model_bpa.py +3 -3
- sempy_labs/_model_dependencies.py +55 -29
- sempy_labs/_notebooks.py +27 -25
- sempy_labs/_one_lake_integration.py +23 -26
- sempy_labs/_query_scale_out.py +67 -64
- sempy_labs/_refresh_semantic_model.py +25 -26
- sempy_labs/_spark.py +33 -32
- sempy_labs/_sql.py +12 -9
- sempy_labs/_translations.py +10 -7
- sempy_labs/_vertipaq.py +34 -31
- sempy_labs/_warehouses.py +22 -21
- sempy_labs/_workspace_identity.py +11 -10
- sempy_labs/_workspaces.py +40 -33
- sempy_labs/admin/_basic_functions.py +10 -12
- sempy_labs/admin/_external_data_share.py +3 -3
- sempy_labs/admin/_items.py +4 -4
- sempy_labs/admin/_scanner.py +3 -1
- sempy_labs/directlake/_directlake_schema_compare.py +18 -14
- sempy_labs/directlake/_directlake_schema_sync.py +18 -12
- sempy_labs/directlake/_dl_helper.py +25 -26
- sempy_labs/directlake/_generate_shared_expression.py +10 -9
- sempy_labs/directlake/_get_directlake_lakehouse.py +16 -13
- sempy_labs/directlake/_get_shared_expression.py +4 -3
- sempy_labs/directlake/_guardrails.py +12 -6
- sempy_labs/directlake/_list_directlake_model_calc_tables.py +15 -9
- sempy_labs/directlake/_show_unsupported_directlake_objects.py +16 -10
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +35 -31
- sempy_labs/directlake/_update_directlake_partition_entity.py +34 -31
- sempy_labs/directlake/_warm_cache.py +87 -65
- sempy_labs/lakehouse/_get_lakehouse_columns.py +10 -8
- sempy_labs/lakehouse/_get_lakehouse_tables.py +10 -9
- sempy_labs/lakehouse/_lakehouse.py +17 -13
- sempy_labs/lakehouse/_shortcuts.py +42 -23
- sempy_labs/migration/_create_pqt_file.py +16 -11
- sempy_labs/migration/_refresh_calc_tables.py +16 -10
- sempy_labs/report/_download_report.py +9 -8
- sempy_labs/report/_generate_report.py +40 -44
- sempy_labs/report/_paginated.py +9 -9
- sempy_labs/report/_report_bpa.py +13 -9
- sempy_labs/report/_report_functions.py +80 -91
- sempy_labs/report/_report_helper.py +8 -4
- sempy_labs/report/_report_list_functions.py +24 -13
- sempy_labs/report/_report_rebind.py +17 -16
- sempy_labs/report/_reportwrapper.py +41 -33
- sempy_labs/tom/_model.py +43 -6
- {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.8.11.dist-info}/LICENSE +0 -0
- {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.8.11.dist-info}/WHEEL +0 -0
- {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.8.11.dist-info}/top_level.txt +0 -0
|
@@ -3,36 +3,41 @@ import sempy.fabric as fabric
|
|
|
3
3
|
from sempy_labs.tom import connect_semantic_model
|
|
4
4
|
from sempy_labs._refresh_semantic_model import refresh_semantic_model
|
|
5
5
|
from sempy_labs.directlake._dl_helper import get_direct_lake_source
|
|
6
|
-
from sempy_labs._helper_functions import
|
|
6
|
+
from sempy_labs._helper_functions import (
|
|
7
|
+
_convert_data_type,
|
|
8
|
+
resolve_dataset_name_and_id,
|
|
9
|
+
resolve_workspace_name_and_id,
|
|
10
|
+
)
|
|
7
11
|
from typing import List, Optional, Union
|
|
8
12
|
import sempy_labs._icons as icons
|
|
13
|
+
from uuid import UUID
|
|
9
14
|
|
|
10
15
|
|
|
11
16
|
def update_direct_lake_partition_entity(
|
|
12
|
-
dataset: str,
|
|
17
|
+
dataset: str | UUID,
|
|
13
18
|
table_name: Union[str, List[str]],
|
|
14
19
|
entity_name: Union[str, List[str]],
|
|
15
|
-
workspace: Optional[str] = None,
|
|
20
|
+
workspace: Optional[str | UUID] = None,
|
|
16
21
|
):
|
|
17
22
|
"""
|
|
18
23
|
Remaps a table (or tables) in a Direct Lake semantic model to a table in a lakehouse.
|
|
19
24
|
|
|
20
25
|
Parameters
|
|
21
26
|
----------
|
|
22
|
-
dataset : str
|
|
23
|
-
Name of the semantic model.
|
|
27
|
+
dataset : str | uuid.UUID
|
|
28
|
+
Name or ID of the semantic model.
|
|
24
29
|
table_name : str, List[str]
|
|
25
30
|
Name of the table(s) in the semantic model.
|
|
26
31
|
entity_name : str, List[str]
|
|
27
32
|
Name of the lakehouse table to be mapped to the semantic model table.
|
|
28
|
-
workspace : str, default=None
|
|
29
|
-
The Fabric workspace name in which the semantic model exists.
|
|
33
|
+
workspace : str | uuid.UUID, default=None
|
|
34
|
+
The Fabric workspace name or ID in which the semantic model exists.
|
|
30
35
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
31
36
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
32
37
|
"""
|
|
33
38
|
|
|
34
|
-
|
|
35
|
-
|
|
39
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
40
|
+
(dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
|
|
36
41
|
|
|
37
42
|
# Support both str & list types
|
|
38
43
|
if isinstance(table_name, str):
|
|
@@ -48,12 +53,12 @@ def update_direct_lake_partition_entity(
|
|
|
48
53
|
icons.sll_tags.append("UpdateDLPartition")
|
|
49
54
|
|
|
50
55
|
with connect_semantic_model(
|
|
51
|
-
dataset=
|
|
56
|
+
dataset=dataset_id, readonly=False, workspace=workspace_id
|
|
52
57
|
) as tom:
|
|
53
58
|
|
|
54
59
|
if not tom.is_direct_lake():
|
|
55
60
|
raise ValueError(
|
|
56
|
-
f"{icons.red_dot} The '{
|
|
61
|
+
f"{icons.red_dot} The '{dataset_name}' semantic model within the '{workspace_name}' workspace is not in Direct Lake mode."
|
|
57
62
|
)
|
|
58
63
|
|
|
59
64
|
for tName in table_name:
|
|
@@ -68,42 +73,39 @@ def update_direct_lake_partition_entity(
|
|
|
68
73
|
|
|
69
74
|
if part_name is None:
|
|
70
75
|
raise ValueError(
|
|
71
|
-
f"{icons.red_dot} The '{tName}' table in the '{
|
|
76
|
+
f"{icons.red_dot} The '{tName}' table in the '{dataset_name}' semantic model has not been updated."
|
|
72
77
|
)
|
|
73
78
|
|
|
74
79
|
tom.model.Tables[tName].Partitions[part_name].Source.EntityName = eName
|
|
75
80
|
print(
|
|
76
|
-
f"{icons.green_dot} The '{tName}' table in the '{
|
|
81
|
+
f"{icons.green_dot} The '{tName}' table in the '{dataset_name}' semantic model within the '{workspace_name}' workspace has been updated to point to the '{eName}' table."
|
|
77
82
|
)
|
|
78
83
|
|
|
79
84
|
|
|
80
85
|
def add_table_to_direct_lake_semantic_model(
|
|
81
|
-
dataset: str,
|
|
86
|
+
dataset: str | UUID,
|
|
82
87
|
table_name: str,
|
|
83
88
|
lakehouse_table_name: str,
|
|
84
89
|
refresh: bool = True,
|
|
85
|
-
workspace: Optional[str] = None,
|
|
90
|
+
workspace: Optional[str | UUID] = None,
|
|
86
91
|
):
|
|
87
92
|
"""
|
|
88
93
|
Adds a table and all of its columns to a Direct Lake semantic model, based on a Fabric lakehouse table.
|
|
89
94
|
|
|
90
95
|
Parameters
|
|
91
96
|
----------
|
|
92
|
-
dataset : str
|
|
93
|
-
Name of the semantic model.
|
|
97
|
+
dataset : str | uuid.UUID
|
|
98
|
+
Name or ID of the semantic model.
|
|
94
99
|
table_name : str, List[str]
|
|
95
100
|
Name of the table in the semantic model.
|
|
96
101
|
lakehouse_table_name : str
|
|
97
102
|
The name of the Fabric lakehouse table.
|
|
98
103
|
refresh : bool, default=True
|
|
99
104
|
Refreshes the table after it is added to the semantic model.
|
|
100
|
-
workspace : str, default=None
|
|
101
|
-
The name of the Fabric workspace in which the semantic model resides.
|
|
105
|
+
workspace : str | uuid.UUID, default=None
|
|
106
|
+
The name or ID of the Fabric workspace in which the semantic model resides.
|
|
102
107
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
103
108
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
104
|
-
|
|
105
|
-
Returns
|
|
106
|
-
-------
|
|
107
109
|
"""
|
|
108
110
|
|
|
109
111
|
sempy.fabric._client._utils._init_analysis_services()
|
|
@@ -111,10 +113,11 @@ def add_table_to_direct_lake_semantic_model(
|
|
|
111
113
|
from sempy_labs.lakehouse._get_lakehouse_columns import get_lakehouse_columns
|
|
112
114
|
from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
|
|
113
115
|
|
|
114
|
-
|
|
116
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
117
|
+
(dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
|
|
115
118
|
|
|
116
119
|
artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
|
|
117
|
-
get_direct_lake_source(dataset=
|
|
120
|
+
get_direct_lake_source(dataset=dataset_id, workspace=workspace_id)
|
|
118
121
|
)
|
|
119
122
|
|
|
120
123
|
if artifact_type == "Warehouse":
|
|
@@ -125,7 +128,7 @@ def add_table_to_direct_lake_semantic_model(
|
|
|
125
128
|
lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
|
|
126
129
|
|
|
127
130
|
with connect_semantic_model(
|
|
128
|
-
dataset=
|
|
131
|
+
dataset=dataset_id, readonly=False, workspace=workspace_id
|
|
129
132
|
) as tom:
|
|
130
133
|
|
|
131
134
|
table_count = tom.model.Tables.Count
|
|
@@ -148,12 +151,12 @@ def add_table_to_direct_lake_semantic_model(
|
|
|
148
151
|
== TOM.PartitionSourceType.Entity
|
|
149
152
|
)
|
|
150
153
|
raise ValueError(
|
|
151
|
-
f"The '{lakehouse_table_name}' table already exists in the '{
|
|
154
|
+
f"The '{lakehouse_table_name}' table already exists in the '{dataset_name}' semantic model within the '{workspace_name}' workspace as the '{t_name}' table."
|
|
152
155
|
)
|
|
153
156
|
|
|
154
157
|
if any(t.Name == table_name for t in tom.model.Tables):
|
|
155
158
|
raise ValueError(
|
|
156
|
-
f"The '{table_name}' table already exists in the '{
|
|
159
|
+
f"The '{table_name}' table already exists in the '{dataset_name}' semantic model within the '{workspace_name}' workspace."
|
|
157
160
|
)
|
|
158
161
|
|
|
159
162
|
dfL = get_lakehouse_tables(
|
|
@@ -173,13 +176,13 @@ def add_table_to_direct_lake_semantic_model(
|
|
|
173
176
|
|
|
174
177
|
tom.add_table(name=table_name)
|
|
175
178
|
print(
|
|
176
|
-
f"{icons.green_dot} The '{table_name}' table has been added to the '{
|
|
179
|
+
f"{icons.green_dot} The '{table_name}' table has been added to the '{dataset_name}' semantic model within the '{workspace_name}' workspace."
|
|
177
180
|
)
|
|
178
181
|
tom.add_entity_partition(
|
|
179
182
|
table_name=table_name, entity_name=lakehouse_table_name
|
|
180
183
|
)
|
|
181
184
|
print(
|
|
182
|
-
f"{icons.green_dot} The '{lakehouse_table_name}' partition has been added to the '{table_name}' table in the '{
|
|
185
|
+
f"{icons.green_dot} The '{lakehouse_table_name}' partition has been added to the '{table_name}' table in the '{dataset_name}' semantic model within the '{workspace_name}' workspace."
|
|
183
186
|
)
|
|
184
187
|
|
|
185
188
|
for i, r in dfLC_filt.iterrows():
|
|
@@ -193,10 +196,10 @@ def add_table_to_direct_lake_semantic_model(
|
|
|
193
196
|
data_type=dt,
|
|
194
197
|
)
|
|
195
198
|
print(
|
|
196
|
-
f"{icons.green_dot} The '{lakeCName}' column has been added to the '{table_name}' table as a '{dt}' data type in the '{
|
|
199
|
+
f"{icons.green_dot} The '{lakeCName}' column has been added to the '{table_name}' table as a '{dt}' data type in the '{dataset_name}' semantic model within the '{workspace_name}' workspace."
|
|
197
200
|
)
|
|
198
201
|
|
|
199
202
|
if refresh:
|
|
200
203
|
refresh_semantic_model(
|
|
201
|
-
dataset=
|
|
204
|
+
dataset=dataset_id, tables=table_name, workspace=workspace_id
|
|
202
205
|
)
|
|
@@ -3,34 +3,39 @@ import pandas as pd
|
|
|
3
3
|
from tqdm.auto import tqdm
|
|
4
4
|
import numpy as np
|
|
5
5
|
import time
|
|
6
|
-
from sempy_labs._helper_functions import
|
|
6
|
+
from sempy_labs._helper_functions import (
|
|
7
|
+
format_dax_object_name,
|
|
8
|
+
resolve_dataset_name_and_id,
|
|
9
|
+
resolve_workspace_name_and_id,
|
|
10
|
+
)
|
|
7
11
|
from sempy_labs._refresh_semantic_model import refresh_semantic_model
|
|
8
12
|
from sempy_labs._model_dependencies import get_measure_dependencies
|
|
9
13
|
from typing import Optional
|
|
10
14
|
from sempy._utils._log import log
|
|
11
15
|
import sempy_labs._icons as icons
|
|
16
|
+
from uuid import UUID
|
|
12
17
|
|
|
13
18
|
|
|
14
19
|
@log
|
|
15
20
|
def warm_direct_lake_cache_perspective(
|
|
16
|
-
dataset: str,
|
|
21
|
+
dataset: str | UUID,
|
|
17
22
|
perspective: str,
|
|
18
23
|
add_dependencies: bool = False,
|
|
19
|
-
workspace: Optional[str] = None,
|
|
24
|
+
workspace: Optional[str | UUID] = None,
|
|
20
25
|
) -> pd.DataFrame:
|
|
21
26
|
"""
|
|
22
27
|
Warms the cache of a Direct Lake semantic model by running a simple DAX query against the columns in a perspective.
|
|
23
28
|
|
|
24
29
|
Parameters
|
|
25
30
|
----------
|
|
26
|
-
dataset : str
|
|
27
|
-
Name of the semantic model.
|
|
31
|
+
dataset : str | uuid.UUID
|
|
32
|
+
Name or ID of the semantic model.
|
|
28
33
|
perspective : str
|
|
29
34
|
Name of the perspective which contains objects to be used for warming the cache.
|
|
30
35
|
add_dependencies : bool, default=False
|
|
31
36
|
Includes object dependencies in the cache warming process.
|
|
32
|
-
workspace : str, default=None
|
|
33
|
-
The Fabric workspace name.
|
|
37
|
+
workspace : str | uuid.UUID, default=None
|
|
38
|
+
The Fabric workspace name or ID.
|
|
34
39
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
35
40
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
36
41
|
|
|
@@ -40,15 +45,16 @@ def warm_direct_lake_cache_perspective(
|
|
|
40
45
|
Returns a pandas dataframe showing the columns that have been put into memory.
|
|
41
46
|
"""
|
|
42
47
|
|
|
43
|
-
|
|
48
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
49
|
+
(dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
|
|
44
50
|
|
|
45
|
-
dfP = fabric.list_partitions(dataset=
|
|
46
|
-
if not any(r["Mode"] == "DirectLake" for
|
|
51
|
+
dfP = fabric.list_partitions(dataset=dataset_id, workspace=workspace_id)
|
|
52
|
+
if not any(r["Mode"] == "DirectLake" for _, r in dfP.iterrows()):
|
|
47
53
|
raise ValueError(
|
|
48
|
-
f"{icons.red_dot} The '{
|
|
54
|
+
f"{icons.red_dot} The '{dataset_name}' semantic model in the '{workspace_name}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode."
|
|
49
55
|
)
|
|
50
56
|
|
|
51
|
-
dfPersp = fabric.list_perspectives(dataset=
|
|
57
|
+
dfPersp = fabric.list_perspectives(dataset=dataset_id, workspace=workspace_id)
|
|
52
58
|
dfPersp["DAX Object Name"] = format_dax_object_name(
|
|
53
59
|
dfPersp["Table Name"], dfPersp["Object Name"]
|
|
54
60
|
)
|
|
@@ -65,7 +71,7 @@ def warm_direct_lake_cache_perspective(
|
|
|
65
71
|
|
|
66
72
|
if add_dependencies:
|
|
67
73
|
# Measure dependencies
|
|
68
|
-
md = get_measure_dependencies(
|
|
74
|
+
md = get_measure_dependencies(dataset_id, workspace_id)
|
|
69
75
|
md["Referenced Full Object"] = format_dax_object_name(
|
|
70
76
|
md["Referenced Table"], md["Referenced Object"]
|
|
71
77
|
)
|
|
@@ -78,7 +84,7 @@ def warm_direct_lake_cache_perspective(
|
|
|
78
84
|
|
|
79
85
|
# Hierarchy dependencies
|
|
80
86
|
dfPersp_h = dfPersp_filt[(dfPersp_filt["Object Type"] == "Hierarchy")]
|
|
81
|
-
dfH = fabric.list_hierarchies(dataset=
|
|
87
|
+
dfH = fabric.list_hierarchies(dataset=dataset_id, workspace=workspace_id)
|
|
82
88
|
dfH["Hierarchy Object"] = format_dax_object_name(
|
|
83
89
|
dfH["Table Name"], dfH["Hierarchy Name"]
|
|
84
90
|
)
|
|
@@ -92,7 +98,7 @@ def warm_direct_lake_cache_perspective(
|
|
|
92
98
|
|
|
93
99
|
# Relationship dependencies
|
|
94
100
|
unique_table_names = dfPersp_filt["Table Name"].unique()
|
|
95
|
-
dfR = fabric.list_relationships(dataset=
|
|
101
|
+
dfR = fabric.list_relationships(dataset=dataset_id, workspace=workspace_id)
|
|
96
102
|
dfR["From Object"] = format_dax_object_name(
|
|
97
103
|
dfR["From Table"], dfR["From Column"]
|
|
98
104
|
)
|
|
@@ -120,41 +126,22 @@ def warm_direct_lake_cache_perspective(
|
|
|
120
126
|
df["Table Name"] = df["Table Name"].str[1:-1]
|
|
121
127
|
df["Column Name"] = df["Column Name"].str[0:-1]
|
|
122
128
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
for tableName in (bar := tqdm(tbls)):
|
|
126
|
-
filtered_list = [
|
|
127
|
-
value for value in merged_list_unique if value.startswith(f"{tableName}[")
|
|
128
|
-
]
|
|
129
|
-
bar.set_description(f"Warming the '{tableName}' table...")
|
|
130
|
-
css = ",".join(map(str, filtered_list))
|
|
131
|
-
dax = """EVALUATE TOPN(1,SUMMARIZECOLUMNS(""" + css + "))" ""
|
|
132
|
-
fabric.evaluate_dax(dataset=dataset, dax_string=dax, workspace=workspace)
|
|
133
|
-
|
|
134
|
-
print(f"{icons.green_dot} The following columns have been put into memory:")
|
|
135
|
-
|
|
136
|
-
new_column_order = ["Table Name", "Column Name", "DAX Object Name"]
|
|
137
|
-
df = df.reindex(columns=new_column_order)
|
|
138
|
-
df = df[["Table Name", "Column Name"]].sort_values(
|
|
139
|
-
by=["Table Name", "Column Name"], ascending=True
|
|
140
|
-
)
|
|
141
|
-
|
|
142
|
-
return df
|
|
129
|
+
return _put_columns_into_memory(dataset=dataset, workspace=workspace, col_df=df)
|
|
143
130
|
|
|
144
131
|
|
|
145
132
|
@log
|
|
146
133
|
def warm_direct_lake_cache_isresident(
|
|
147
|
-
dataset: str, workspace: Optional[str] = None
|
|
134
|
+
dataset: str | UUID, workspace: Optional[str | UUID] = None
|
|
148
135
|
) -> pd.DataFrame:
|
|
149
136
|
"""
|
|
150
137
|
Performs a refresh on the semantic model and puts the columns which were in memory prior to the refresh back into memory.
|
|
151
138
|
|
|
152
139
|
Parameters
|
|
153
140
|
----------
|
|
154
|
-
dataset : str
|
|
155
|
-
Name of the semantic model.
|
|
156
|
-
workspace : str, default=None
|
|
157
|
-
The Fabric workspace name.
|
|
141
|
+
dataset : str | uuid.UUID
|
|
142
|
+
Name or ID of the semantic model.
|
|
143
|
+
workspace : str | uuid.UUID, default=None
|
|
144
|
+
The Fabric workspace name or ID.
|
|
158
145
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
159
146
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
160
147
|
|
|
@@ -164,46 +151,81 @@ def warm_direct_lake_cache_isresident(
|
|
|
164
151
|
Returns a pandas dataframe showing the columns that have been put into memory.
|
|
165
152
|
"""
|
|
166
153
|
|
|
167
|
-
|
|
154
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
155
|
+
(dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
|
|
168
156
|
|
|
169
|
-
dfP = fabric.list_partitions(dataset=
|
|
170
|
-
if not any(r["Mode"] == "DirectLake" for
|
|
157
|
+
dfP = fabric.list_partitions(dataset=dataset_id, workspace=workspace_id)
|
|
158
|
+
if not any(r["Mode"] == "DirectLake" for _, r in dfP.iterrows()):
|
|
171
159
|
raise ValueError(
|
|
172
|
-
f"{icons.red_dot} The '{
|
|
160
|
+
f"{icons.red_dot} The '{dataset_name}' semantic model in the '{workspace_name}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode."
|
|
173
161
|
)
|
|
174
162
|
|
|
175
163
|
# Identify columns which are currently in memory (Is Resident = True)
|
|
176
|
-
dfC = fabric.list_columns(dataset=
|
|
177
|
-
dfC["DAX Object Name"] = format_dax_object_name(
|
|
178
|
-
dfC["Table Name"], dfC["Column Name"]
|
|
179
|
-
)
|
|
164
|
+
dfC = fabric.list_columns(dataset=dataset_id, workspace=workspace_id, extended=True)
|
|
180
165
|
dfC_filtered = dfC[dfC["Is Resident"] == True]
|
|
181
166
|
|
|
182
167
|
if len(dfC_filtered) == 0:
|
|
183
168
|
raise ValueError(
|
|
184
|
-
f"{icons.yellow_dot} At present, no columns are in memory in the '{
|
|
169
|
+
f"{icons.yellow_dot} At present, no columns are in memory in the '{dataset_name}' semantic model in the '{workspace_name}' workspace."
|
|
185
170
|
)
|
|
186
171
|
|
|
187
172
|
# Refresh/frame dataset
|
|
188
|
-
refresh_semantic_model(
|
|
173
|
+
refresh_semantic_model(
|
|
174
|
+
dataset=dataset_id, refresh_type="full", workspace=workspace_id
|
|
175
|
+
)
|
|
189
176
|
time.sleep(2)
|
|
190
177
|
|
|
191
|
-
|
|
192
|
-
|
|
178
|
+
return _put_columns_into_memory(
|
|
179
|
+
dataset=dataset, workspace=workspace, col_df=dfC_filtered
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _put_columns_into_memory(dataset, workspace, col_df, return_dataframe: bool = True):
|
|
184
|
+
|
|
185
|
+
row_limit = 1000000
|
|
186
|
+
|
|
187
|
+
dfT = fabric.list_tables(dataset=dataset, workspace=workspace, extended=True)
|
|
188
|
+
col_df = col_df.copy()
|
|
189
|
+
|
|
190
|
+
col_df["DAX Object"] = format_dax_object_name(
|
|
191
|
+
col_df["Table Name"], col_df["Column Name"]
|
|
192
|
+
)
|
|
193
|
+
tbls = col_df["Table Name"].unique()
|
|
194
|
+
|
|
193
195
|
for table_name in (bar := tqdm(tbls)):
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
.
|
|
198
|
-
.
|
|
196
|
+
dfT_filt = dfT[dfT["Name"] == table_name]
|
|
197
|
+
col_df_filt = col_df[col_df["Table Name"] == table_name]
|
|
198
|
+
if not dfT_filt.empty:
|
|
199
|
+
row_count = dfT_filt["Row Count"].iloc[0]
|
|
200
|
+
bar.set_description(f"Warming the '{table_name}' table...")
|
|
201
|
+
if row_count < row_limit:
|
|
202
|
+
columns = col_df_filt["DAX Object"].tolist()
|
|
203
|
+
css = ", ".join(columns)
|
|
204
|
+
dax = f"EVALUATE TOPN(1, SELECTCOLUMNS('{table_name}', {css}))"
|
|
205
|
+
fabric.evaluate_dax(
|
|
206
|
+
dataset=dataset, dax_string=dax, workspace=workspace
|
|
207
|
+
)
|
|
208
|
+
else:
|
|
209
|
+
for _, r in col_df_filt.iterrows():
|
|
210
|
+
dax_object = r["DAX Object"]
|
|
211
|
+
dax = f"""EVALUATE TOPN(1, SELECTCOLUMNS('{table_name}', {dax_object}))"""
|
|
212
|
+
fabric.evaluate_dax(
|
|
213
|
+
dataset=dataset, dax_string=dax, workspace=workspace
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
if return_dataframe:
|
|
217
|
+
print(
|
|
218
|
+
f"{icons.green_dot} The following columns have been put into memory. Temperature indicates the current column temperature."
|
|
199
219
|
)
|
|
200
|
-
dax = f"""EVALUATE TOPN(1,SUMMARIZECOLUMNS({css}))"""
|
|
201
|
-
fabric.evaluate_dax(dataset=dataset, dax_string=dax, workspace=workspace)
|
|
202
220
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
221
|
+
dfC = fabric.list_columns(dataset=dataset, workspace=workspace, extended=True)
|
|
222
|
+
dfC["DAX Object"] = format_dax_object_name(
|
|
223
|
+
dfC["Table Name"], dfC["Column Name"]
|
|
224
|
+
)
|
|
225
|
+
dfC_filt = dfC[dfC["DAX Object"].isin(col_df["DAX Object"].values)]
|
|
206
226
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
227
|
+
return (
|
|
228
|
+
dfC_filt[["Table Name", "Column Name", "Is Resident", "Temperature"]]
|
|
229
|
+
.sort_values(by=["Table Name", "Column Name"], ascending=True)
|
|
230
|
+
.reset_index(drop=True)
|
|
231
|
+
)
|
|
@@ -5,14 +5,16 @@ from sempy_labs._helper_functions import (
|
|
|
5
5
|
resolve_lakehouse_name,
|
|
6
6
|
format_dax_object_name,
|
|
7
7
|
resolve_lakehouse_id,
|
|
8
|
+
resolve_workspace_name_and_id,
|
|
8
9
|
)
|
|
9
10
|
from typing import Optional
|
|
10
11
|
from sempy._utils._log import log
|
|
12
|
+
from uuid import UUID
|
|
11
13
|
|
|
12
14
|
|
|
13
15
|
@log
|
|
14
16
|
def get_lakehouse_columns(
|
|
15
|
-
lakehouse: Optional[str] = None, workspace: Optional[str] = None
|
|
17
|
+
lakehouse: Optional[str] = None, workspace: Optional[str | UUID] = None
|
|
16
18
|
) -> pd.DataFrame:
|
|
17
19
|
"""
|
|
18
20
|
Shows the tables and columns of a lakehouse and their respective properties.
|
|
@@ -22,8 +24,8 @@ def get_lakehouse_columns(
|
|
|
22
24
|
lakehouse : str, default=None
|
|
23
25
|
The Fabric lakehouse.
|
|
24
26
|
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
25
|
-
lakehouse_workspace : str, default=None
|
|
26
|
-
The Fabric workspace used by the lakehouse.
|
|
27
|
+
lakehouse_workspace : str | uuid.UUID, default=None
|
|
28
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
27
29
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
28
30
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
29
31
|
|
|
@@ -46,18 +48,18 @@ def get_lakehouse_columns(
|
|
|
46
48
|
]
|
|
47
49
|
)
|
|
48
50
|
|
|
49
|
-
|
|
51
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
50
52
|
|
|
51
53
|
if lakehouse is None:
|
|
52
54
|
lakehouse_id = fabric.get_lakehouse_id()
|
|
53
|
-
lakehouse = resolve_lakehouse_name(lakehouse_id,
|
|
55
|
+
lakehouse = resolve_lakehouse_name(lakehouse_id, workspace_id)
|
|
54
56
|
else:
|
|
55
|
-
lakehouse_id = resolve_lakehouse_id(lakehouse,
|
|
57
|
+
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
|
|
56
58
|
|
|
57
59
|
spark = SparkSession.builder.getOrCreate()
|
|
58
60
|
|
|
59
61
|
tables = get_lakehouse_tables(
|
|
60
|
-
lakehouse=lakehouse, workspace=
|
|
62
|
+
lakehouse=lakehouse, workspace=workspace_id, extended=False, count_rows=False
|
|
61
63
|
)
|
|
62
64
|
tables_filt = tables[tables["Format"] == "delta"]
|
|
63
65
|
|
|
@@ -70,7 +72,7 @@ def get_lakehouse_columns(
|
|
|
70
72
|
for cName, data_type in sparkdf.dtypes:
|
|
71
73
|
tc = format_dax_object_name(tName, cName)
|
|
72
74
|
new_data = {
|
|
73
|
-
"Workspace Name":
|
|
75
|
+
"Workspace Name": workspace_name,
|
|
74
76
|
"Lakehouse Name": lakehouse,
|
|
75
77
|
"Table Name": tName,
|
|
76
78
|
"Column Name": cName,
|
|
@@ -18,12 +18,13 @@ from typing import Optional
|
|
|
18
18
|
import sempy_labs._icons as icons
|
|
19
19
|
from sempy._utils._log import log
|
|
20
20
|
from sempy.fabric.exceptions import FabricHTTPException
|
|
21
|
+
from uuid import UUID
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
@log
|
|
24
25
|
def get_lakehouse_tables(
|
|
25
26
|
lakehouse: Optional[str] = None,
|
|
26
|
-
workspace: Optional[str] = None,
|
|
27
|
+
workspace: Optional[str | UUID] = None,
|
|
27
28
|
extended: bool = False,
|
|
28
29
|
count_rows: bool = False,
|
|
29
30
|
export: bool = False,
|
|
@@ -38,8 +39,8 @@ def get_lakehouse_tables(
|
|
|
38
39
|
lakehouse : str, default=None
|
|
39
40
|
The Fabric lakehouse.
|
|
40
41
|
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
41
|
-
workspace : str, default=None
|
|
42
|
-
The Fabric workspace used by the lakehouse.
|
|
42
|
+
workspace : str | uuid.UUID, default=None
|
|
43
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
43
44
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
44
45
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
45
46
|
extended : bool, default=False
|
|
@@ -66,13 +67,13 @@ def get_lakehouse_tables(
|
|
|
66
67
|
]
|
|
67
68
|
)
|
|
68
69
|
|
|
69
|
-
(
|
|
70
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
70
71
|
|
|
71
72
|
if lakehouse is None:
|
|
72
73
|
lakehouse_id = fabric.get_lakehouse_id()
|
|
73
|
-
lakehouse = resolve_lakehouse_name(lakehouse_id,
|
|
74
|
+
lakehouse = resolve_lakehouse_name(lakehouse_id, workspace_id)
|
|
74
75
|
else:
|
|
75
|
-
lakehouse_id = resolve_lakehouse_id(lakehouse,
|
|
76
|
+
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
|
|
76
77
|
|
|
77
78
|
if count_rows: # Setting countrows defaults to extended=True
|
|
78
79
|
extended = True
|
|
@@ -104,7 +105,7 @@ def get_lakehouse_tables(
|
|
|
104
105
|
for r in responses:
|
|
105
106
|
for i in r.get("data", []):
|
|
106
107
|
new_data = {
|
|
107
|
-
"Workspace Name":
|
|
108
|
+
"Workspace Name": workspace_name,
|
|
108
109
|
"Lakehouse Name": lakehouse,
|
|
109
110
|
"Table Name": i.get("name"),
|
|
110
111
|
"Format": i.get("format"),
|
|
@@ -117,7 +118,7 @@ def get_lakehouse_tables(
|
|
|
117
118
|
df = pd.concat(dfs, ignore_index=True)
|
|
118
119
|
|
|
119
120
|
if extended:
|
|
120
|
-
sku_value = get_sku_size(
|
|
121
|
+
sku_value = get_sku_size(workspace_id)
|
|
121
122
|
guardrail = get_directlake_guardrails_for_sku(sku_value)
|
|
122
123
|
spark = SparkSession.builder.getOrCreate()
|
|
123
124
|
df["Files"] = None
|
|
@@ -182,7 +183,7 @@ def get_lakehouse_tables(
|
|
|
182
183
|
|
|
183
184
|
lakehouse_id = fabric.get_lakehouse_id()
|
|
184
185
|
lakehouse = resolve_lakehouse_name(
|
|
185
|
-
lakehouse_id=lakehouse_id, workspace=
|
|
186
|
+
lakehouse_id=lakehouse_id, workspace=workspace_id
|
|
186
187
|
)
|
|
187
188
|
lakeTName = "lakehouse_table_details"
|
|
188
189
|
lakeT_filt = df[df["Table Name"] == lakeTName]
|
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
import sempy.fabric as fabric
|
|
2
2
|
from tqdm.auto import tqdm
|
|
3
|
-
from sempy_labs._helper_functions import
|
|
3
|
+
from sempy_labs._helper_functions import (
|
|
4
|
+
resolve_lakehouse_name,
|
|
5
|
+
resolve_workspace_name_and_id,
|
|
6
|
+
)
|
|
4
7
|
from typing import List, Optional, Union
|
|
5
8
|
from sempy._utils._log import log
|
|
9
|
+
from uuid import UUID
|
|
6
10
|
|
|
7
11
|
|
|
8
12
|
def lakehouse_attached() -> bool:
|
|
@@ -29,7 +33,7 @@ def lakehouse_attached() -> bool:
|
|
|
29
33
|
def optimize_lakehouse_tables(
|
|
30
34
|
tables: Optional[Union[str, List[str]]] = None,
|
|
31
35
|
lakehouse: Optional[str] = None,
|
|
32
|
-
workspace: Optional[str] = None,
|
|
36
|
+
workspace: Optional[str | UUID] = None,
|
|
33
37
|
):
|
|
34
38
|
"""
|
|
35
39
|
Runs the `OPTIMIZE <https://docs.delta.io/latest/optimizations-oss.html>`_ function over the specified lakehouse tables.
|
|
@@ -42,8 +46,8 @@ def optimize_lakehouse_tables(
|
|
|
42
46
|
lakehouse : str, default=None
|
|
43
47
|
The Fabric lakehouse.
|
|
44
48
|
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
45
|
-
workspace : str, default=None
|
|
46
|
-
The Fabric workspace used by the lakehouse.
|
|
49
|
+
workspace : str | uuid.UUID, default=None
|
|
50
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
47
51
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
48
52
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
49
53
|
"""
|
|
@@ -52,13 +56,13 @@ def optimize_lakehouse_tables(
|
|
|
52
56
|
from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
|
|
53
57
|
from delta import DeltaTable
|
|
54
58
|
|
|
55
|
-
|
|
59
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
56
60
|
|
|
57
61
|
if lakehouse is None:
|
|
58
62
|
lakehouse_id = fabric.get_lakehouse_id()
|
|
59
|
-
lakehouse = resolve_lakehouse_name(lakehouse_id,
|
|
63
|
+
lakehouse = resolve_lakehouse_name(lakehouse_id, workspace_id)
|
|
60
64
|
|
|
61
|
-
lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=
|
|
65
|
+
lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace_id)
|
|
62
66
|
lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"]
|
|
63
67
|
|
|
64
68
|
if isinstance(tables, str):
|
|
@@ -83,7 +87,7 @@ def optimize_lakehouse_tables(
|
|
|
83
87
|
def vacuum_lakehouse_tables(
|
|
84
88
|
tables: Optional[Union[str, List[str]]] = None,
|
|
85
89
|
lakehouse: Optional[str] = None,
|
|
86
|
-
workspace: Optional[str] = None,
|
|
90
|
+
workspace: Optional[str | UUID] = None,
|
|
87
91
|
retain_n_hours: Optional[int] = None,
|
|
88
92
|
):
|
|
89
93
|
"""
|
|
@@ -96,8 +100,8 @@ def vacuum_lakehouse_tables(
|
|
|
96
100
|
lakehouse : str, default=None
|
|
97
101
|
The Fabric lakehouse.
|
|
98
102
|
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
99
|
-
workspace : str, default=None
|
|
100
|
-
The Fabric workspace used by the lakehouse.
|
|
103
|
+
workspace : str | uuid.UUID, default=None
|
|
104
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
101
105
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
102
106
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
103
107
|
retain_n_hours : int, default=None
|
|
@@ -111,13 +115,13 @@ def vacuum_lakehouse_tables(
|
|
|
111
115
|
from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
|
|
112
116
|
from delta import DeltaTable
|
|
113
117
|
|
|
114
|
-
|
|
118
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
115
119
|
|
|
116
120
|
if lakehouse is None:
|
|
117
121
|
lakehouse_id = fabric.get_lakehouse_id()
|
|
118
|
-
lakehouse = resolve_lakehouse_name(lakehouse_id,
|
|
122
|
+
lakehouse = resolve_lakehouse_name(lakehouse_id, workspace_id)
|
|
119
123
|
|
|
120
|
-
lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=
|
|
124
|
+
lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace_id)
|
|
121
125
|
lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"]
|
|
122
126
|
|
|
123
127
|
if isinstance(tables, str):
|