semantic-link-labs 0.9.6__py3-none-any.whl → 0.9.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.9.6.dist-info → semantic_link_labs-0.9.7.dist-info}/METADATA +7 -5
- {semantic_link_labs-0.9.6.dist-info → semantic_link_labs-0.9.7.dist-info}/RECORD +35 -32
- {semantic_link_labs-0.9.6.dist-info → semantic_link_labs-0.9.7.dist-info}/WHEEL +1 -1
- sempy_labs/__init__.py +4 -0
- sempy_labs/_ai.py +3 -1
- sempy_labs/_capacities.py +0 -1
- sempy_labs/_dax_query_view.py +2 -0
- sempy_labs/_delta_analyzer_history.py +298 -0
- sempy_labs/_helper_functions.py +171 -15
- sempy_labs/_icons.py +6 -6
- sempy_labs/_list_functions.py +3 -1
- sempy_labs/_model_bpa_bulk.py +10 -11
- sempy_labs/_model_bpa_rules.py +1 -1
- sempy_labs/admin/_basic_functions.py +28 -2
- sempy_labs/admin/_reports.py +1 -1
- sempy_labs/admin/_scanner.py +0 -2
- sempy_labs/admin/_tenant.py +8 -3
- sempy_labs/directlake/_generate_shared_expression.py +9 -1
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +82 -36
- sempy_labs/directlake/_update_directlake_partition_entity.py +3 -0
- sempy_labs/graph/_groups.py +6 -0
- sempy_labs/graph/_teams.py +2 -0
- sempy_labs/graph/_users.py +4 -0
- sempy_labs/lakehouse/__init__.py +12 -3
- sempy_labs/lakehouse/_blobs.py +231 -0
- sempy_labs/lakehouse/_shortcuts.py +22 -3
- sempy_labs/migration/_direct_lake_to_import.py +47 -10
- sempy_labs/report/__init__.py +4 -0
- sempy_labs/report/_report_functions.py +3 -3
- sempy_labs/report/_report_helper.py +17 -5
- sempy_labs/report/_reportwrapper.py +17 -8
- sempy_labs/report/_save_report.py +147 -0
- sempy_labs/tom/_model.py +154 -23
- {semantic_link_labs-0.9.6.dist-info → semantic_link_labs-0.9.7.dist-info/licenses}/LICENSE +0 -0
- {semantic_link_labs-0.9.6.dist-info → semantic_link_labs-0.9.7.dist-info}/top_level.txt +0 -0
|
@@ -1,18 +1,64 @@
|
|
|
1
|
-
import sempy.fabric as fabric
|
|
2
1
|
from sempy_labs.directlake._generate_shared_expression import generate_shared_expression
|
|
3
2
|
from sempy_labs._helper_functions import (
|
|
4
|
-
resolve_lakehouse_name,
|
|
5
3
|
resolve_dataset_name_and_id,
|
|
6
4
|
resolve_workspace_name_and_id,
|
|
7
5
|
resolve_item_name_and_id,
|
|
8
6
|
resolve_lakehouse_name_and_id,
|
|
9
7
|
)
|
|
8
|
+
from sempy._utils._log import log
|
|
10
9
|
from sempy_labs.tom import connect_semantic_model
|
|
11
10
|
from typing import Optional
|
|
12
11
|
import sempy_labs._icons as icons
|
|
13
12
|
from uuid import UUID
|
|
13
|
+
import re
|
|
14
14
|
|
|
15
15
|
|
|
16
|
+
def _extract_expression_list(expression):
|
|
17
|
+
"""
|
|
18
|
+
Finds the pattern for DL/SQL & DL/OL expressions in the semantic model.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
pattern_sql = r'Sql\.Database\s*\(\s*"([^"]+)"\s*,\s*"([^"]+)"\s*\)'
|
|
22
|
+
pattern_no_sql = r'AzureDataLakeStorage\s*\{\s*"server".*?:\s*onelake\.dfs\.fabric\.microsoft\.com"\s*,\s*"path"\s*:\s*"/([\da-fA-F-]+)\s*/\s*([\da-fA-F-]+)\s*/"\s*\}'
|
|
23
|
+
|
|
24
|
+
match_sql = re.search(pattern_sql, expression)
|
|
25
|
+
match_no_sql = re.search(pattern_no_sql, expression)
|
|
26
|
+
|
|
27
|
+
result = []
|
|
28
|
+
if match_sql:
|
|
29
|
+
value_1, value_2 = match_sql.groups()
|
|
30
|
+
result = [value_1, value_2, True]
|
|
31
|
+
elif match_no_sql:
|
|
32
|
+
value_1, value_2 = match_no_sql.groups()
|
|
33
|
+
result = [value_1, value_2, False]
|
|
34
|
+
|
|
35
|
+
return result
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _get_direct_lake_expressions(
|
|
39
|
+
dataset: str | UUID, workspace: Optional[str | UUID] = None
|
|
40
|
+
) -> dict:
|
|
41
|
+
"""
|
|
42
|
+
Extracts a dictionary of all Direct Lake expressions from a semantic model.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
from sempy_labs.tom import connect_semantic_model
|
|
46
|
+
|
|
47
|
+
result = {}
|
|
48
|
+
|
|
49
|
+
with connect_semantic_model(dataset=dataset, workspace=workspace) as tom:
|
|
50
|
+
for e in tom.model.Expressions:
|
|
51
|
+
expr_name = e.Name
|
|
52
|
+
expr = e.Expression
|
|
53
|
+
|
|
54
|
+
list_values = _extract_expression_list(expr)
|
|
55
|
+
if list_values:
|
|
56
|
+
result[expr_name] = list_values
|
|
57
|
+
|
|
58
|
+
return result
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@log
|
|
16
62
|
def update_direct_lake_model_lakehouse_connection(
|
|
17
63
|
dataset: str | UUID,
|
|
18
64
|
workspace: Optional[str | UUID] = None,
|
|
@@ -39,41 +85,23 @@ def update_direct_lake_model_lakehouse_connection(
|
|
|
39
85
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
40
86
|
"""
|
|
41
87
|
|
|
42
|
-
(
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
icons.sll_tags.append("UpdateDLConnection")
|
|
50
|
-
|
|
51
|
-
shEx = generate_shared_expression(
|
|
52
|
-
item_name=lakehouse, item_type="Lakehouse", workspace=lakehouse_workspace
|
|
53
|
-
)
|
|
54
|
-
|
|
55
|
-
with connect_semantic_model(
|
|
56
|
-
dataset=dataset_id, readonly=False, workspace=workspace_id
|
|
57
|
-
) as tom:
|
|
58
|
-
|
|
59
|
-
if not tom.is_direct_lake():
|
|
60
|
-
raise ValueError(
|
|
61
|
-
f"{icons.red_dot} The '{dataset_name}' semantic model is not in Direct Lake. This function is only applicable to Direct Lake semantic models."
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
tom.model.Expressions["DatabaseQuery"].Expression = shEx
|
|
65
|
-
|
|
66
|
-
print(
|
|
67
|
-
f"{icons.green_dot} The expression in the '{dataset_name}' semantic model has been updated to point to the '{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace."
|
|
88
|
+
update_direct_lake_model_connection(
|
|
89
|
+
dataset=dataset,
|
|
90
|
+
workspace=workspace,
|
|
91
|
+
source=lakehouse,
|
|
92
|
+
source_type="Lakehouse",
|
|
93
|
+
source_workspace=lakehouse_workspace,
|
|
68
94
|
)
|
|
69
95
|
|
|
70
96
|
|
|
97
|
+
@log
|
|
71
98
|
def update_direct_lake_model_connection(
|
|
72
99
|
dataset: str | UUID,
|
|
73
100
|
workspace: Optional[str | UUID] = None,
|
|
74
101
|
source: Optional[str] = None,
|
|
75
102
|
source_type: str = "Lakehouse",
|
|
76
103
|
source_workspace: Optional[str | UUID] = None,
|
|
104
|
+
use_sql_endpoint: bool = True,
|
|
77
105
|
):
|
|
78
106
|
"""
|
|
79
107
|
Remaps a Direct Lake semantic model's SQL Endpoint connection to a new lakehouse/warehouse.
|
|
@@ -95,7 +123,14 @@ def update_direct_lake_model_connection(
|
|
|
95
123
|
The Fabric workspace name or ID used by the lakehouse/warehouse.
|
|
96
124
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
97
125
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
126
|
+
use_sql_endpoint : bool, default=True
|
|
127
|
+
If True, the SQL Endpoint will be used for the connection.
|
|
128
|
+
If False, Direct Lake over OneLake will be used.
|
|
98
129
|
"""
|
|
130
|
+
if use_sql_endpoint:
|
|
131
|
+
icons.sll_tags.append("UpdateDLConnection_SQL")
|
|
132
|
+
else:
|
|
133
|
+
icons.sll_tags.append("UpdateDLConnection_DLOL")
|
|
99
134
|
|
|
100
135
|
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
101
136
|
(dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
|
|
@@ -119,12 +154,16 @@ def update_direct_lake_model_connection(
|
|
|
119
154
|
item=source, type=source_type, workspace=source_workspace
|
|
120
155
|
)
|
|
121
156
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
157
|
+
shared_expression = generate_shared_expression(
|
|
158
|
+
item_name=source_name,
|
|
159
|
+
item_type=source_type,
|
|
160
|
+
workspace=source_workspace,
|
|
161
|
+
use_sql_endpoint=use_sql_endpoint,
|
|
126
162
|
)
|
|
127
163
|
|
|
164
|
+
expression_dict = _get_direct_lake_expressions(dataset=dataset, workspace=workspace)
|
|
165
|
+
expressions = list(expression_dict.keys())
|
|
166
|
+
|
|
128
167
|
with connect_semantic_model(
|
|
129
168
|
dataset=dataset_id, readonly=False, workspace=workspace_id
|
|
130
169
|
) as tom:
|
|
@@ -134,8 +173,15 @@ def update_direct_lake_model_connection(
|
|
|
134
173
|
f"{icons.red_dot} The '{dataset_name}' semantic model within the '{workspace_name}' workspace is not in Direct Lake. This function is only applicable to Direct Lake semantic models."
|
|
135
174
|
)
|
|
136
175
|
|
|
137
|
-
|
|
176
|
+
# Update the single connection expression
|
|
177
|
+
if len(expressions) == 1:
|
|
178
|
+
expr = expressions[0]
|
|
179
|
+
tom.model.Expressions[expr].Expression = shared_expression
|
|
138
180
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
181
|
+
print(
|
|
182
|
+
f"{icons.green_dot} The expression in the '{dataset_name}' semantic model within the '{workspace_name}' workspace has been updated to point to the '{source}' {source_type.lower()} in the '{source_workspace}' workspace."
|
|
183
|
+
)
|
|
184
|
+
else:
|
|
185
|
+
print(
|
|
186
|
+
f"{icons.info} Multiple expressions found in the model. Please use the update_direct_lake_partition_entity function to update specific tables."
|
|
187
|
+
)
|
|
@@ -8,11 +8,13 @@ from sempy_labs._helper_functions import (
|
|
|
8
8
|
resolve_workspace_name_and_id,
|
|
9
9
|
resolve_workspace_name,
|
|
10
10
|
)
|
|
11
|
+
from sempy._utils._log import log
|
|
11
12
|
from typing import List, Optional, Union
|
|
12
13
|
import sempy_labs._icons as icons
|
|
13
14
|
from uuid import UUID
|
|
14
15
|
|
|
15
16
|
|
|
17
|
+
@log
|
|
16
18
|
def update_direct_lake_partition_entity(
|
|
17
19
|
dataset: str | UUID,
|
|
18
20
|
table_name: Union[str, List[str]],
|
|
@@ -96,6 +98,7 @@ def update_direct_lake_partition_entity(
|
|
|
96
98
|
)
|
|
97
99
|
|
|
98
100
|
|
|
101
|
+
@log
|
|
99
102
|
def add_table_to_direct_lake_semantic_model(
|
|
100
103
|
dataset: str | UUID,
|
|
101
104
|
table_name: str,
|
sempy_labs/graph/_groups.py
CHANGED
|
@@ -6,6 +6,7 @@ from sempy_labs._helper_functions import (
|
|
|
6
6
|
_create_dataframe,
|
|
7
7
|
_update_dataframe_datatypes,
|
|
8
8
|
)
|
|
9
|
+
from sempy._utils._log import log
|
|
9
10
|
import sempy_labs._icons as icons
|
|
10
11
|
from typing import List, Literal
|
|
11
12
|
|
|
@@ -38,6 +39,7 @@ def resolve_group_id(group: str | UUID) -> UUID:
|
|
|
38
39
|
return group_id
|
|
39
40
|
|
|
40
41
|
|
|
42
|
+
@log
|
|
41
43
|
def list_groups() -> pd.DataFrame:
|
|
42
44
|
"""
|
|
43
45
|
Shows a list of groups and their properties.
|
|
@@ -158,6 +160,7 @@ def _get_group(group_id: UUID) -> pd.DataFrame:
|
|
|
158
160
|
return df
|
|
159
161
|
|
|
160
162
|
|
|
163
|
+
@log
|
|
161
164
|
def list_group_members(group: str | UUID) -> pd.DataFrame:
|
|
162
165
|
"""
|
|
163
166
|
Shows a list of the members of a group.
|
|
@@ -217,6 +220,7 @@ def list_group_members(group: str | UUID) -> pd.DataFrame:
|
|
|
217
220
|
return df
|
|
218
221
|
|
|
219
222
|
|
|
223
|
+
@log
|
|
220
224
|
def list_group_owners(group: str | UUID) -> pd.DataFrame:
|
|
221
225
|
"""
|
|
222
226
|
Shows a list of the owners of a group.
|
|
@@ -332,6 +336,7 @@ def _base_add_to_group(
|
|
|
332
336
|
)
|
|
333
337
|
|
|
334
338
|
|
|
339
|
+
@log
|
|
335
340
|
def add_group_members(
|
|
336
341
|
group: str | UUID,
|
|
337
342
|
user: str | UUID | List[str | UUID],
|
|
@@ -376,6 +381,7 @@ def add_group_owners(
|
|
|
376
381
|
_base_add_to_group(group=group, object=user, object_type="owners")
|
|
377
382
|
|
|
378
383
|
|
|
384
|
+
@log
|
|
379
385
|
def renew_group(group: str | UUID):
|
|
380
386
|
"""
|
|
381
387
|
Renews the group.
|
sempy_labs/graph/_teams.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
2
|
from uuid import UUID
|
|
3
|
+
from sempy._utils._log import log
|
|
3
4
|
from sempy_labs._helper_functions import (
|
|
4
5
|
_base_api,
|
|
5
6
|
_create_dataframe,
|
|
@@ -7,6 +8,7 @@ from sempy_labs._helper_functions import (
|
|
|
7
8
|
)
|
|
8
9
|
|
|
9
10
|
|
|
11
|
+
@log
|
|
10
12
|
def list_teams() -> pd.DataFrame:
|
|
11
13
|
"""
|
|
12
14
|
Shows a list of teams and their properties.
|
sempy_labs/graph/_users.py
CHANGED
|
@@ -7,6 +7,7 @@ from sempy_labs._helper_functions import (
|
|
|
7
7
|
_base_api,
|
|
8
8
|
_create_dataframe,
|
|
9
9
|
)
|
|
10
|
+
from sempy._utils._log import log
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
def resolve_user_id(user: str | UUID) -> UUID:
|
|
@@ -33,6 +34,7 @@ def resolve_user_id(user: str | UUID) -> UUID:
|
|
|
33
34
|
return result.get("id")
|
|
34
35
|
|
|
35
36
|
|
|
37
|
+
@log
|
|
36
38
|
def get_user(user: str | UUID) -> pd.DataFrame:
|
|
37
39
|
"""
|
|
38
40
|
Shows properties of a given user.
|
|
@@ -70,6 +72,7 @@ def get_user(user: str | UUID) -> pd.DataFrame:
|
|
|
70
72
|
return pd.DataFrame([new_data])
|
|
71
73
|
|
|
72
74
|
|
|
75
|
+
@log
|
|
73
76
|
def list_users() -> pd.DataFrame:
|
|
74
77
|
"""
|
|
75
78
|
Shows a list of users and their properties.
|
|
@@ -120,6 +123,7 @@ def list_users() -> pd.DataFrame:
|
|
|
120
123
|
return df
|
|
121
124
|
|
|
122
125
|
|
|
126
|
+
@log
|
|
123
127
|
def send_mail(
|
|
124
128
|
user: UUID | str,
|
|
125
129
|
subject: str,
|
sempy_labs/lakehouse/__init__.py
CHANGED
|
@@ -1,12 +1,15 @@
|
|
|
1
|
-
from sempy_labs.lakehouse._get_lakehouse_columns import
|
|
2
|
-
|
|
1
|
+
from sempy_labs.lakehouse._get_lakehouse_columns import (
|
|
2
|
+
get_lakehouse_columns,
|
|
3
|
+
)
|
|
4
|
+
from sempy_labs.lakehouse._get_lakehouse_tables import (
|
|
5
|
+
get_lakehouse_tables,
|
|
6
|
+
)
|
|
3
7
|
from sempy_labs.lakehouse._lakehouse import (
|
|
4
8
|
lakehouse_attached,
|
|
5
9
|
optimize_lakehouse_tables,
|
|
6
10
|
vacuum_lakehouse_tables,
|
|
7
11
|
run_table_maintenance,
|
|
8
12
|
)
|
|
9
|
-
|
|
10
13
|
from sempy_labs.lakehouse._shortcuts import (
|
|
11
14
|
# create_shortcut,
|
|
12
15
|
create_shortcut_onelake,
|
|
@@ -14,6 +17,10 @@ from sempy_labs.lakehouse._shortcuts import (
|
|
|
14
17
|
reset_shortcut_cache,
|
|
15
18
|
list_shortcuts,
|
|
16
19
|
)
|
|
20
|
+
from sempy_labs.lakehouse._blobs import (
|
|
21
|
+
recover_lakehouse_object,
|
|
22
|
+
list_blobs,
|
|
23
|
+
)
|
|
17
24
|
|
|
18
25
|
__all__ = [
|
|
19
26
|
"get_lakehouse_columns",
|
|
@@ -27,4 +34,6 @@ __all__ = [
|
|
|
27
34
|
"reset_shortcut_cache",
|
|
28
35
|
"run_table_maintenance",
|
|
29
36
|
"list_shortcuts",
|
|
37
|
+
"recover_lakehouse_object",
|
|
38
|
+
"list_blobs",
|
|
30
39
|
]
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
from sempy_labs._helper_functions import (
|
|
2
|
+
resolve_workspace_id,
|
|
3
|
+
resolve_lakehouse_id,
|
|
4
|
+
_xml_to_dict,
|
|
5
|
+
_create_dataframe,
|
|
6
|
+
_update_dataframe_datatypes,
|
|
7
|
+
)
|
|
8
|
+
from sempy._utils._log import log
|
|
9
|
+
from uuid import UUID
|
|
10
|
+
from typing import Optional, List
|
|
11
|
+
import sempy_labs._icons as icons
|
|
12
|
+
import xml.etree.ElementTree as ET
|
|
13
|
+
import pandas as pd
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _request_blob_api(
|
|
17
|
+
request: str,
|
|
18
|
+
method: str = "get",
|
|
19
|
+
payload: Optional[dict] = None,
|
|
20
|
+
status_codes: int | List[int] = 200,
|
|
21
|
+
):
|
|
22
|
+
|
|
23
|
+
import requests
|
|
24
|
+
import notebookutils
|
|
25
|
+
from sempy.fabric.exceptions import FabricHTTPException
|
|
26
|
+
|
|
27
|
+
if isinstance(status_codes, int):
|
|
28
|
+
status_codes = [status_codes]
|
|
29
|
+
|
|
30
|
+
token = notebookutils.credentials.getToken("storage")
|
|
31
|
+
|
|
32
|
+
headers = {
|
|
33
|
+
"Authorization": f"Bearer {token}",
|
|
34
|
+
"Content-Type": "application/json",
|
|
35
|
+
"x-ms-version": "2025-05-05",
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
response = requests.request(
|
|
39
|
+
method.upper(),
|
|
40
|
+
f"https://onelake.blob.fabric.microsoft.com/{request}",
|
|
41
|
+
headers=headers,
|
|
42
|
+
json=payload,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
if response.status_code not in status_codes:
|
|
46
|
+
raise FabricHTTPException(response)
|
|
47
|
+
|
|
48
|
+
return response
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@log
|
|
52
|
+
def list_blobs(
|
|
53
|
+
lakehouse: Optional[str | UUID] = None,
|
|
54
|
+
workspace: Optional[str | UUID] = None,
|
|
55
|
+
container: Optional[str] = None,
|
|
56
|
+
) -> pd.DataFrame:
|
|
57
|
+
"""
|
|
58
|
+
Returns a list of blobs for a given lakehouse.
|
|
59
|
+
|
|
60
|
+
This function leverages the following API: `List Blobs <https://learn.microsoft.com/rest/api/storageservices/list-blobs?tabs=microsoft-entra-id>`_.
|
|
61
|
+
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
lakehouse : str | uuid.UUID, default=None
|
|
65
|
+
The Fabric lakehouse name or ID.
|
|
66
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
67
|
+
workspace : str | uuid.UUID, default=None
|
|
68
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
69
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
70
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
71
|
+
container : str, default=None
|
|
72
|
+
The container name to list blobs from. If None, lists all blobs in the lakehouse.
|
|
73
|
+
Valid values are "Tables" or "Files". If not specified, the function will list all blobs in the lakehouse.
|
|
74
|
+
|
|
75
|
+
Returns
|
|
76
|
+
-------
|
|
77
|
+
pandas.DataFrame
|
|
78
|
+
A pandas dataframe showing a list of blobs in the lakehouse.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
82
|
+
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
|
|
83
|
+
|
|
84
|
+
if container is None:
|
|
85
|
+
path_prefix = f"{workspace_id}/{lakehouse_id}"
|
|
86
|
+
else:
|
|
87
|
+
if container not in ["Tables", "Files"]:
|
|
88
|
+
raise ValueError(
|
|
89
|
+
f"{icons.red_dot} Invalid container '{container}' within the file_path parameter. Expected 'Tables' or 'Files'."
|
|
90
|
+
)
|
|
91
|
+
path_prefix = f"{workspace_id}/{lakehouse_id}/{container}"
|
|
92
|
+
|
|
93
|
+
response = _request_blob_api(
|
|
94
|
+
request=f"{path_prefix}?restype=container&comp=list&include=deleted"
|
|
95
|
+
)
|
|
96
|
+
root = ET.fromstring(response.content)
|
|
97
|
+
response_json = _xml_to_dict(root)
|
|
98
|
+
|
|
99
|
+
columns = {
|
|
100
|
+
"Blob Name": "str",
|
|
101
|
+
"Is Deleted": "bool",
|
|
102
|
+
"Deletion Id": "str",
|
|
103
|
+
"Creation Time": "datetime",
|
|
104
|
+
"Expiry Time": "datetime",
|
|
105
|
+
"Etag": "str",
|
|
106
|
+
"Resource Type": "str",
|
|
107
|
+
"Content Length": "int",
|
|
108
|
+
"Content Type": "str",
|
|
109
|
+
"Content Encoding": "str",
|
|
110
|
+
"Content Language": "str",
|
|
111
|
+
"Content CRC64": "str",
|
|
112
|
+
"Content MD5": "str",
|
|
113
|
+
"Cache Control": "str",
|
|
114
|
+
"Content Disposition": "str",
|
|
115
|
+
"Blob Type": "str",
|
|
116
|
+
"Access Tier": "str",
|
|
117
|
+
"Access Tier Inferred": "str",
|
|
118
|
+
"Server Encrypted": "bool",
|
|
119
|
+
"Deleted Time": "str",
|
|
120
|
+
"Remaining Retention Days": "str",
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
df = _create_dataframe(columns=columns)
|
|
124
|
+
|
|
125
|
+
for blob in (
|
|
126
|
+
response_json.get("EnumerationResults", {}).get("Blobs", {}).get("Blob", {})
|
|
127
|
+
):
|
|
128
|
+
p = blob.get("Properties", {})
|
|
129
|
+
new_data = {
|
|
130
|
+
"Blob Name": blob.get("Name"),
|
|
131
|
+
"Is Deleted": blob.get("Deleted", False),
|
|
132
|
+
"Deletion Id": blob.get("DeletionId"),
|
|
133
|
+
"Creation Time": p.get("Creation-Time"),
|
|
134
|
+
"Expiry Time": p.get("Expiry-Time"),
|
|
135
|
+
"Etag": p.get("Etag"),
|
|
136
|
+
"Resource Type": p.get("ResourceType"),
|
|
137
|
+
"Content Length": p.get("Content-Length"),
|
|
138
|
+
"Content Type": p.get("Content-Type"),
|
|
139
|
+
"Content Encoding": p.get("Content-Encoding"),
|
|
140
|
+
"Content Language": p.get("Content-Language"),
|
|
141
|
+
"Content CRC64": p.get("Content-CRC64"),
|
|
142
|
+
"Content MD5": p.get("Content-MD5"),
|
|
143
|
+
"Cache Control": p.get("Cache-Control"),
|
|
144
|
+
"Content Disposition": p.get("Content-Disposition"),
|
|
145
|
+
"Blob Type": p.get("BlobType"),
|
|
146
|
+
"Access Tier": p.get("AccessTier"),
|
|
147
|
+
"Access Tier Inferred": p.get("AccessTierInferred"),
|
|
148
|
+
"Server Encrypted": p.get("ServerEncrypted"),
|
|
149
|
+
"Deleted Time": p.get("DeletedTime"),
|
|
150
|
+
"Remaining Retention Days": p.get("RemainingRetentionDays"),
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
|
|
154
|
+
|
|
155
|
+
_update_dataframe_datatypes(dataframe=df, column_map=columns)
|
|
156
|
+
|
|
157
|
+
return df
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
@log
|
|
161
|
+
def recover_lakehouse_object(
|
|
162
|
+
file_path: str,
|
|
163
|
+
lakehouse: Optional[str | UUID] = None,
|
|
164
|
+
workspace: Optional[str | UUID] = None,
|
|
165
|
+
):
|
|
166
|
+
"""
|
|
167
|
+
Recovers an object (i.e. table, file, folder) in a lakehouse from a deleted state. Only `soft-deleted objects <https://learn.microsoft.com/fabric/onelake/onelake-disaster-recovery#soft-delete-for-onelake-files>`_ can be recovered (deleted for less than 7 days).
|
|
168
|
+
|
|
169
|
+
Parameters
|
|
170
|
+
----------
|
|
171
|
+
file_path : str
|
|
172
|
+
The file path of the object to restore. For example: "Tables/my_delta_table".
|
|
173
|
+
lakehouse : str | uuid.UUID, default=None
|
|
174
|
+
The Fabric lakehouse name or ID.
|
|
175
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
176
|
+
workspace : str | uuid.UUID, default=None
|
|
177
|
+
The Fabric workspace name or ID used by the lakehouse.
|
|
178
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
179
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
workspace_id = resolve_workspace_id(workspace)
|
|
183
|
+
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
|
|
184
|
+
|
|
185
|
+
blob_path_prefix = f"{lakehouse_id}/{file_path}"
|
|
186
|
+
|
|
187
|
+
container = file_path.split("/")[0]
|
|
188
|
+
if container not in ["Tables", "Files"]:
|
|
189
|
+
raise ValueError(
|
|
190
|
+
f"{icons.red_dot} Invalid container '{container}' within the file_path parameter. Expected 'Tables' or 'Files'."
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
df = list_blobs(lakehouse=lakehouse, workspace=workspace, container=container)
|
|
194
|
+
|
|
195
|
+
for _, r in df.iterrows():
|
|
196
|
+
blob_name = r.get("Blob Name")
|
|
197
|
+
is_deleted = r.get("Is Deleted")
|
|
198
|
+
if blob_name.startswith(blob_path_prefix) and is_deleted:
|
|
199
|
+
print(f"{icons.in_progress} Restoring the '{blob_name}' blob...")
|
|
200
|
+
_request_blob_api(
|
|
201
|
+
request=f"{workspace_id}/{lakehouse_id}/{file_path}?comp=undelete",
|
|
202
|
+
method="put",
|
|
203
|
+
)
|
|
204
|
+
print(f"{icons.green_dot} The '{blob_name}' blob has been restored.")
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _get_user_delegation_key():
|
|
208
|
+
|
|
209
|
+
# https://learn.microsoft.com/rest/api/storageservices/get-user-delegation-key
|
|
210
|
+
|
|
211
|
+
from datetime import datetime, timedelta, timezone
|
|
212
|
+
|
|
213
|
+
utc_now = datetime.now(timezone.utc)
|
|
214
|
+
start_time = utc_now + timedelta(minutes=2)
|
|
215
|
+
expiry_time = start_time + timedelta(minutes=45)
|
|
216
|
+
start_str = start_time.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
217
|
+
expiry_str = expiry_time.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
218
|
+
|
|
219
|
+
payload = f"""<?xml version="1.0" encoding="utf-8"?>
|
|
220
|
+
<KeyInfo>
|
|
221
|
+
<Start>{start_str}</Start>
|
|
222
|
+
<Expiry>{expiry_str}</Expiry>
|
|
223
|
+
</KeyInfo>"""
|
|
224
|
+
|
|
225
|
+
response = _request_blob_api(
|
|
226
|
+
request="restype=service&comp=userdelegationkey",
|
|
227
|
+
method="post",
|
|
228
|
+
payload=payload,
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
return response.content
|
|
@@ -24,12 +24,15 @@ def create_shortcut_onelake(
|
|
|
24
24
|
shortcut_name: Optional[str] = None,
|
|
25
25
|
source_path: str = "Tables",
|
|
26
26
|
destination_path: str = "Tables",
|
|
27
|
+
shortcut_conflict_policy: Optional[str] = None,
|
|
27
28
|
):
|
|
28
29
|
"""
|
|
29
30
|
Creates a `shortcut <https://learn.microsoft.com/fabric/onelake/onelake-shortcuts>`_ to a delta table in OneLake.
|
|
30
31
|
|
|
31
32
|
This is a wrapper function for the following API: `OneLake Shortcuts - Create Shortcut <https://learn.microsoft.com/rest/api/fabric/core/onelake-shortcuts/create-shortcut>`_.
|
|
32
33
|
|
|
34
|
+
Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
|
|
35
|
+
|
|
33
36
|
Parameters
|
|
34
37
|
----------
|
|
35
38
|
table_name : str
|
|
@@ -51,6 +54,8 @@ def create_shortcut_onelake(
|
|
|
51
54
|
A string representing the full path to the table/file in the source lakehouse, including either "Files" or "Tables". Examples: Tables/FolderName/SubFolderName; Files/FolderName/SubFolderName.
|
|
52
55
|
destination_path: str, default="Tables"
|
|
53
56
|
A string representing the full path where the shortcut is created, including either "Files" or "Tables". Examples: Tables/FolderName/SubFolderName; Files/FolderName/SubFolderName.
|
|
57
|
+
shortcut_conflict_policy : str, default=None
|
|
58
|
+
When provided, it defines the action to take when a shortcut with the same name and path already exists. The default action is 'Abort'. Additional ShortcutConflictPolicy types may be added over time.
|
|
54
59
|
"""
|
|
55
60
|
|
|
56
61
|
if not (source_path.startswith("Files") or source_path.startswith("Tables")):
|
|
@@ -103,7 +108,8 @@ def create_shortcut_onelake(
|
|
|
103
108
|
# Check if the shortcut already exists
|
|
104
109
|
try:
|
|
105
110
|
response = _base_api(
|
|
106
|
-
request=f"/v1/workspaces/{destination_workspace_id}/items/{destination_lakehouse_id}/shortcuts/{destination_path}/{actual_shortcut_name}"
|
|
111
|
+
request=f"/v1/workspaces/{destination_workspace_id}/items/{destination_lakehouse_id}/shortcuts/{destination_path}/{actual_shortcut_name}",
|
|
112
|
+
client="fabric_sp",
|
|
107
113
|
)
|
|
108
114
|
response_json = response.json()
|
|
109
115
|
del response_json["target"]["type"]
|
|
@@ -119,11 +125,21 @@ def create_shortcut_onelake(
|
|
|
119
125
|
except FabricHTTPException:
|
|
120
126
|
pass
|
|
121
127
|
|
|
128
|
+
url = f"/v1/workspaces/{destination_workspace_id}/items/{destination_lakehouse_id}/shortcuts"
|
|
129
|
+
|
|
130
|
+
if shortcut_conflict_policy:
|
|
131
|
+
if shortcut_conflict_policy not in ["Abort", "GenerateUniqueName"]:
|
|
132
|
+
raise ValueError(
|
|
133
|
+
f"{icons.red_dot} The 'shortcut_conflict_policy' parameter must be either 'Abort' or 'GenerateUniqueName'."
|
|
134
|
+
)
|
|
135
|
+
url += f"?shortcutConflictPolicy={shortcut_conflict_policy}"
|
|
136
|
+
|
|
122
137
|
_base_api(
|
|
123
|
-
request=
|
|
138
|
+
request=url,
|
|
124
139
|
payload=payload,
|
|
125
140
|
status_codes=201,
|
|
126
141
|
method="post",
|
|
142
|
+
client="fabric_sp",
|
|
127
143
|
)
|
|
128
144
|
|
|
129
145
|
print(
|
|
@@ -211,6 +227,8 @@ def delete_shortcut(
|
|
|
211
227
|
|
|
212
228
|
This is a wrapper function for the following API: `OneLake Shortcuts - Delete Shortcut <https://learn.microsoft.com/rest/api/fabric/core/onelake-shortcuts/delete-shortcut>`_.
|
|
213
229
|
|
|
230
|
+
Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
|
|
231
|
+
|
|
214
232
|
Parameters
|
|
215
233
|
----------
|
|
216
234
|
shortcut_name : str
|
|
@@ -234,6 +252,7 @@ def delete_shortcut(
|
|
|
234
252
|
_base_api(
|
|
235
253
|
request=f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts/{shortcut_path}/{shortcut_name}",
|
|
236
254
|
method="delete",
|
|
255
|
+
client="fabric_sp",
|
|
237
256
|
)
|
|
238
257
|
|
|
239
258
|
print(
|
|
@@ -288,7 +307,7 @@ def list_shortcuts(
|
|
|
288
307
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
289
308
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
290
309
|
path: str, default=None
|
|
291
|
-
The path within lakehouse where to look for shortcuts. If
|
|
310
|
+
The path within lakehouse where to look for shortcuts. If provided, must start with either "Files" or "Tables". Examples: Tables/FolderName/SubFolderName; Files/FolderName/SubFolderName.
|
|
292
311
|
Defaults to None which will retun all shortcuts on the given lakehouse
|
|
293
312
|
|
|
294
313
|
Returns
|