semantic-link-labs 0.8.10__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.9.0.dist-info}/METADATA +6 -5
- {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.9.0.dist-info}/RECORD +81 -80
- {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.9.0.dist-info}/WHEEL +1 -1
- sempy_labs/__init__.py +34 -3
- sempy_labs/_authentication.py +80 -4
- sempy_labs/_capacities.py +770 -200
- sempy_labs/_capacity_migration.py +7 -37
- sempy_labs/_clear_cache.py +37 -35
- sempy_labs/_connections.py +13 -13
- sempy_labs/_data_pipelines.py +20 -20
- sempy_labs/_dataflows.py +27 -28
- sempy_labs/_dax.py +41 -47
- sempy_labs/_deployment_pipelines.py +1 -1
- sempy_labs/_environments.py +26 -23
- sempy_labs/_eventhouses.py +16 -15
- sempy_labs/_eventstreams.py +16 -15
- sempy_labs/_external_data_shares.py +18 -20
- sempy_labs/_gateways.py +16 -14
- sempy_labs/_generate_semantic_model.py +107 -62
- sempy_labs/_git.py +105 -43
- sempy_labs/_helper_functions.py +251 -194
- sempy_labs/_job_scheduler.py +227 -0
- sempy_labs/_kql_databases.py +16 -15
- sempy_labs/_kql_querysets.py +16 -15
- sempy_labs/_list_functions.py +150 -126
- sempy_labs/_managed_private_endpoints.py +19 -17
- sempy_labs/_mirrored_databases.py +51 -48
- sempy_labs/_mirrored_warehouses.py +5 -4
- sempy_labs/_ml_experiments.py +16 -15
- sempy_labs/_ml_models.py +15 -14
- sempy_labs/_model_bpa.py +210 -207
- sempy_labs/_model_bpa_bulk.py +2 -2
- sempy_labs/_model_bpa_rules.py +3 -3
- sempy_labs/_model_dependencies.py +55 -29
- sempy_labs/_notebooks.py +29 -25
- sempy_labs/_one_lake_integration.py +23 -26
- sempy_labs/_query_scale_out.py +75 -64
- sempy_labs/_refresh_semantic_model.py +25 -26
- sempy_labs/_spark.py +33 -32
- sempy_labs/_sql.py +19 -12
- sempy_labs/_translations.py +10 -7
- sempy_labs/_vertipaq.py +38 -33
- sempy_labs/_warehouses.py +26 -25
- sempy_labs/_workspace_identity.py +11 -10
- sempy_labs/_workspaces.py +40 -33
- sempy_labs/admin/_basic_functions.py +166 -115
- sempy_labs/admin/_domains.py +7 -2
- sempy_labs/admin/_external_data_share.py +3 -3
- sempy_labs/admin/_git.py +4 -1
- sempy_labs/admin/_items.py +11 -6
- sempy_labs/admin/_scanner.py +10 -5
- sempy_labs/directlake/_directlake_schema_compare.py +25 -16
- sempy_labs/directlake/_directlake_schema_sync.py +24 -12
- sempy_labs/directlake/_dl_helper.py +74 -55
- sempy_labs/directlake/_generate_shared_expression.py +10 -9
- sempy_labs/directlake/_get_directlake_lakehouse.py +32 -36
- sempy_labs/directlake/_get_shared_expression.py +4 -3
- sempy_labs/directlake/_guardrails.py +12 -6
- sempy_labs/directlake/_list_directlake_model_calc_tables.py +15 -9
- sempy_labs/directlake/_show_unsupported_directlake_objects.py +16 -10
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +35 -31
- sempy_labs/directlake/_update_directlake_partition_entity.py +39 -31
- sempy_labs/directlake/_warm_cache.py +87 -65
- sempy_labs/lakehouse/_get_lakehouse_columns.py +23 -26
- sempy_labs/lakehouse/_get_lakehouse_tables.py +27 -38
- sempy_labs/lakehouse/_lakehouse.py +7 -20
- sempy_labs/lakehouse/_shortcuts.py +42 -23
- sempy_labs/migration/_create_pqt_file.py +16 -11
- sempy_labs/migration/_refresh_calc_tables.py +16 -10
- sempy_labs/report/_download_report.py +9 -8
- sempy_labs/report/_generate_report.py +85 -44
- sempy_labs/report/_paginated.py +9 -9
- sempy_labs/report/_report_bpa.py +15 -11
- sempy_labs/report/_report_functions.py +80 -91
- sempy_labs/report/_report_helper.py +8 -4
- sempy_labs/report/_report_list_functions.py +24 -13
- sempy_labs/report/_report_rebind.py +17 -16
- sempy_labs/report/_reportwrapper.py +41 -33
- sempy_labs/tom/_model.py +139 -21
- {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.9.0.dist-info}/LICENSE +0 -0
- {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.9.0.dist-info}/top_level.txt +0 -0
sempy_labs/_spark.py
CHANGED
|
@@ -6,9 +6,10 @@ from sempy_labs._helper_functions import (
|
|
|
6
6
|
resolve_workspace_name_and_id,
|
|
7
7
|
)
|
|
8
8
|
from sempy.fabric.exceptions import FabricHTTPException
|
|
9
|
+
from uuid import UUID
|
|
9
10
|
|
|
10
11
|
|
|
11
|
-
def list_custom_pools(workspace: Optional[str] = None) -> pd.DataFrame:
|
|
12
|
+
def list_custom_pools(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
|
|
12
13
|
"""
|
|
13
14
|
Lists all `custom pools <https://learn.microsoft.com/fabric/data-engineering/create-custom-spark-pools>`_ within a workspace.
|
|
14
15
|
|
|
@@ -16,7 +17,7 @@ def list_custom_pools(workspace: Optional[str] = None) -> pd.DataFrame:
|
|
|
16
17
|
|
|
17
18
|
Parameters
|
|
18
19
|
----------
|
|
19
|
-
workspace : str, default=None
|
|
20
|
+
workspace : str | uuid.UUID, default=None
|
|
20
21
|
The name of the Fabric workspace.
|
|
21
22
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
22
23
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
@@ -27,7 +28,7 @@ def list_custom_pools(workspace: Optional[str] = None) -> pd.DataFrame:
|
|
|
27
28
|
A pandas dataframe showing all the custom pools within the Fabric workspace.
|
|
28
29
|
"""
|
|
29
30
|
|
|
30
|
-
(
|
|
31
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
31
32
|
|
|
32
33
|
df = pd.DataFrame(
|
|
33
34
|
columns=[
|
|
@@ -95,7 +96,7 @@ def create_custom_pool(
|
|
|
95
96
|
node_family: str = "MemoryOptimized",
|
|
96
97
|
auto_scale_enabled: bool = True,
|
|
97
98
|
dynamic_executor_allocation_enabled: bool = True,
|
|
98
|
-
workspace: Optional[str] = None,
|
|
99
|
+
workspace: Optional[str | UUID] = None,
|
|
99
100
|
):
|
|
100
101
|
"""
|
|
101
102
|
Creates a `custom pool <https://learn.microsoft.com/fabric/data-engineering/create-custom-spark-pools>`_ within a workspace.
|
|
@@ -122,13 +123,13 @@ def create_custom_pool(
|
|
|
122
123
|
The status of `auto scale <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
|
|
123
124
|
dynamic_executor_allocation_enabled : bool, default=True
|
|
124
125
|
The status of the `dynamic executor allocation <https://learn.microsoft.com/en-us/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
|
|
125
|
-
workspace : str, default=None
|
|
126
|
-
The name of the Fabric workspace.
|
|
126
|
+
workspace : str | uuid.UUID, default=None
|
|
127
|
+
The name or ID of the Fabric workspace.
|
|
127
128
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
128
129
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
129
130
|
"""
|
|
130
131
|
|
|
131
|
-
(
|
|
132
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
132
133
|
|
|
133
134
|
request_body = {
|
|
134
135
|
"name": pool_name,
|
|
@@ -154,7 +155,7 @@ def create_custom_pool(
|
|
|
154
155
|
if response.status_code != 201:
|
|
155
156
|
raise FabricHTTPException(response)
|
|
156
157
|
print(
|
|
157
|
-
f"{icons.green_dot} The '{pool_name}' spark pool has been created within the '{
|
|
158
|
+
f"{icons.green_dot} The '{pool_name}' spark pool has been created within the '{workspace_name}' workspace."
|
|
158
159
|
)
|
|
159
160
|
|
|
160
161
|
|
|
@@ -168,7 +169,7 @@ def update_custom_pool(
|
|
|
168
169
|
node_family: Optional[str] = None,
|
|
169
170
|
auto_scale_enabled: Optional[bool] = None,
|
|
170
171
|
dynamic_executor_allocation_enabled: Optional[bool] = None,
|
|
171
|
-
workspace: Optional[str] = None,
|
|
172
|
+
workspace: Optional[str | UUID] = None,
|
|
172
173
|
):
|
|
173
174
|
"""
|
|
174
175
|
Updates the properties of a `custom pool <https://learn.microsoft.com/fabric/data-engineering/create-custom-spark-pools>`_ within a workspace.
|
|
@@ -203,20 +204,20 @@ def update_custom_pool(
|
|
|
203
204
|
dynamic_executor_allocation_enabled : bool, default=None
|
|
204
205
|
The status of the `dynamic executor allocation <https://learn.microsoft.com/en-us/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
|
|
205
206
|
Defaults to None which keeps the existing property setting.
|
|
206
|
-
workspace : str, default=None
|
|
207
|
-
The name of the Fabric workspace.
|
|
207
|
+
workspace : str | uuid.UUID, default=None
|
|
208
|
+
The name or ID of the Fabric workspace.
|
|
208
209
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
209
210
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
210
211
|
"""
|
|
211
212
|
|
|
212
|
-
(
|
|
213
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
213
214
|
|
|
214
215
|
df = list_custom_pools(workspace=workspace)
|
|
215
216
|
df_pool = df[df["Custom Pool Name"] == pool_name]
|
|
216
217
|
|
|
217
218
|
if len(df_pool) == 0:
|
|
218
219
|
raise ValueError(
|
|
219
|
-
f"{icons.red_dot} The '{pool_name}' custom pool does not exist within the '{
|
|
220
|
+
f"{icons.red_dot} The '{pool_name}' custom pool does not exist within the '{workspace_name}'. Please choose a valid custom pool."
|
|
220
221
|
)
|
|
221
222
|
|
|
222
223
|
if node_family is None:
|
|
@@ -262,11 +263,11 @@ def update_custom_pool(
|
|
|
262
263
|
if response.status_code != 200:
|
|
263
264
|
raise FabricHTTPException(response)
|
|
264
265
|
print(
|
|
265
|
-
f"{icons.green_dot} The '{pool_name}' spark pool within the '{
|
|
266
|
+
f"{icons.green_dot} The '{pool_name}' spark pool within the '{workspace_name}' workspace has been updated."
|
|
266
267
|
)
|
|
267
268
|
|
|
268
269
|
|
|
269
|
-
def delete_custom_pool(pool_name: str, workspace: Optional[str] = None):
|
|
270
|
+
def delete_custom_pool(pool_name: str, workspace: Optional[str | UUID] = None):
|
|
270
271
|
"""
|
|
271
272
|
Deletes a `custom pool <https://learn.microsoft.com/fabric/data-engineering/create-custom-spark-pools>`_ within a workspace.
|
|
272
273
|
|
|
@@ -276,35 +277,35 @@ def delete_custom_pool(pool_name: str, workspace: Optional[str] = None):
|
|
|
276
277
|
----------
|
|
277
278
|
pool_name : str
|
|
278
279
|
The custom pool name.
|
|
279
|
-
workspace : str, default=None
|
|
280
|
-
The name of the Fabric workspace.
|
|
280
|
+
workspace : str | uuid.UUID, default=None
|
|
281
|
+
The name or ID of the Fabric workspace.
|
|
281
282
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
282
283
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
283
284
|
"""
|
|
284
285
|
|
|
285
|
-
(
|
|
286
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
286
287
|
|
|
287
|
-
dfL = list_custom_pools(workspace=
|
|
288
|
+
dfL = list_custom_pools(workspace=workspace_id)
|
|
288
289
|
dfL_filt = dfL[dfL["Custom Pool Name"] == pool_name]
|
|
289
290
|
|
|
290
291
|
if len(dfL_filt) == 0:
|
|
291
292
|
raise ValueError(
|
|
292
|
-
f"{icons.red_dot} The '{pool_name}' custom pool does not exist within the '{
|
|
293
|
+
f"{icons.red_dot} The '{pool_name}' custom pool does not exist within the '{workspace_name}' workspace."
|
|
293
294
|
)
|
|
294
|
-
|
|
295
|
+
pool_id = dfL_filt["Custom Pool ID"].iloc[0]
|
|
295
296
|
|
|
296
297
|
client = fabric.FabricRestClient()
|
|
297
|
-
response = client.delete(f"/v1/workspaces/{workspace_id}/spark/pools/{
|
|
298
|
+
response = client.delete(f"/v1/workspaces/{workspace_id}/spark/pools/{pool_id}")
|
|
298
299
|
|
|
299
300
|
if response.status_code != 200:
|
|
300
301
|
raise FabricHTTPException(response)
|
|
301
302
|
print(
|
|
302
|
-
f"{icons.green_dot} The '{pool_name}' spark pool has been deleted from the '{
|
|
303
|
+
f"{icons.green_dot} The '{pool_name}' spark pool has been deleted from the '{workspace_name}' workspace."
|
|
303
304
|
)
|
|
304
305
|
|
|
305
306
|
|
|
306
307
|
def get_spark_settings(
|
|
307
|
-
workspace: Optional[str] = None, return_dataframe: bool = True
|
|
308
|
+
workspace: Optional[str | UUID] = None, return_dataframe: bool = True
|
|
308
309
|
) -> pd.DataFrame | dict:
|
|
309
310
|
"""
|
|
310
311
|
Shows the spark settings for a workspace.
|
|
@@ -313,8 +314,8 @@ def get_spark_settings(
|
|
|
313
314
|
|
|
314
315
|
Parameters
|
|
315
316
|
----------
|
|
316
|
-
workspace : str, default=None
|
|
317
|
-
The name of the Fabric workspace.
|
|
317
|
+
workspace : str | uuid.UUID, default=None
|
|
318
|
+
The name or ID of the Fabric workspace.
|
|
318
319
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
319
320
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
320
321
|
return_dataframe : bool, default=True
|
|
@@ -326,7 +327,7 @@ def get_spark_settings(
|
|
|
326
327
|
A pandas dataframe showing the spark settings for a workspace.
|
|
327
328
|
"""
|
|
328
329
|
|
|
329
|
-
(
|
|
330
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
330
331
|
|
|
331
332
|
df = pd.DataFrame(
|
|
332
333
|
columns=[
|
|
@@ -393,7 +394,7 @@ def update_spark_settings(
|
|
|
393
394
|
max_executors: Optional[int] = None,
|
|
394
395
|
environment_name: Optional[str] = None,
|
|
395
396
|
runtime_version: Optional[str] = None,
|
|
396
|
-
workspace: Optional[str] = None,
|
|
397
|
+
workspace: Optional[str | UUID] = None,
|
|
397
398
|
):
|
|
398
399
|
"""
|
|
399
400
|
Updates the spark settings for a workspace.
|
|
@@ -426,13 +427,13 @@ def update_spark_settings(
|
|
|
426
427
|
runtime_version : str, default=None
|
|
427
428
|
The `runtime version <https://learn.microsoft.com/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP#environmentproperties>`_.
|
|
428
429
|
Defaults to None which keeps the existing property setting.
|
|
429
|
-
workspace : str, default=None
|
|
430
|
-
The name of the Fabric workspace.
|
|
430
|
+
workspace : str | uuid.UUID, default=None
|
|
431
|
+
The name or ID of the Fabric workspace.
|
|
431
432
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
432
433
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
433
434
|
"""
|
|
434
435
|
|
|
435
|
-
(
|
|
436
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
436
437
|
|
|
437
438
|
request_body = get_spark_settings(workspace=workspace, return_dataframe=False)
|
|
438
439
|
|
|
@@ -463,5 +464,5 @@ def update_spark_settings(
|
|
|
463
464
|
if response.status_code != 200:
|
|
464
465
|
raise FabricHTTPException(response)
|
|
465
466
|
print(
|
|
466
|
-
f"{icons.green_dot} The spark settings within the '{
|
|
467
|
+
f"{icons.green_dot} The spark settings within the '{workspace_name}' workspace have been updated accordingly."
|
|
467
468
|
)
|
sempy_labs/_sql.py
CHANGED
|
@@ -3,10 +3,14 @@ import pandas as pd
|
|
|
3
3
|
from typing import Optional, Union, List
|
|
4
4
|
from sempy._utils._log import log
|
|
5
5
|
import struct
|
|
6
|
-
import uuid
|
|
7
6
|
from itertools import chain, repeat
|
|
8
7
|
from sempy.fabric.exceptions import FabricHTTPException
|
|
9
|
-
from sempy_labs._helper_functions import
|
|
8
|
+
from sempy_labs._helper_functions import (
|
|
9
|
+
resolve_lakehouse_name_and_id,
|
|
10
|
+
resolve_item_name_and_id,
|
|
11
|
+
resolve_workspace_name_and_id,
|
|
12
|
+
)
|
|
13
|
+
from uuid import UUID
|
|
10
14
|
|
|
11
15
|
|
|
12
16
|
def _bytes2mswin_bstr(value: bytes) -> bytes:
|
|
@@ -31,22 +35,25 @@ def _bytes2mswin_bstr(value: bytes) -> bytes:
|
|
|
31
35
|
class ConnectBase:
|
|
32
36
|
def __init__(
|
|
33
37
|
self,
|
|
34
|
-
|
|
35
|
-
workspace: Optional[Union[str,
|
|
38
|
+
item: str,
|
|
39
|
+
workspace: Optional[Union[str, UUID]] = None,
|
|
36
40
|
timeout: Optional[int] = None,
|
|
37
41
|
endpoint_type: str = "warehouse",
|
|
38
42
|
):
|
|
39
43
|
from sempy.fabric._token_provider import SynapseTokenProvider
|
|
40
44
|
import pyodbc
|
|
41
45
|
|
|
42
|
-
|
|
43
|
-
workspace_id = fabric.resolve_workspace_id(workspace)
|
|
46
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
44
47
|
|
|
45
|
-
# Resolve the appropriate ID (warehouse or lakehouse)
|
|
48
|
+
# Resolve the appropriate ID and name (warehouse or lakehouse)
|
|
46
49
|
if endpoint_type == "warehouse":
|
|
47
|
-
resource_id =
|
|
50
|
+
(resource_id, resource_name) = resolve_item_name_and_id(
|
|
51
|
+
item=item, type=endpoint_type.capitalize(), workspace=workspace_id
|
|
52
|
+
)
|
|
48
53
|
else:
|
|
49
|
-
resource_id =
|
|
54
|
+
(resource_id, resource_name) = resolve_lakehouse_name_and_id(
|
|
55
|
+
lakehouse=item, workspace=workspace_id
|
|
56
|
+
)
|
|
50
57
|
|
|
51
58
|
# Get the TDS endpoint
|
|
52
59
|
client = fabric.FabricRestClient()
|
|
@@ -69,7 +76,7 @@ class ConnectBase:
|
|
|
69
76
|
# Set up the connection string
|
|
70
77
|
access_token = SynapseTokenProvider()()
|
|
71
78
|
tokenstruct = _bytes2mswin_bstr(access_token.encode())
|
|
72
|
-
conn_str = f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={tds_endpoint};DATABASE={
|
|
79
|
+
conn_str = f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={tds_endpoint};DATABASE={resource_name};Encrypt=Yes;"
|
|
73
80
|
|
|
74
81
|
if timeout is not None:
|
|
75
82
|
conn_str += f"Connect Timeout={timeout};"
|
|
@@ -139,7 +146,7 @@ class ConnectWarehouse(ConnectBase):
|
|
|
139
146
|
def __init__(
|
|
140
147
|
self,
|
|
141
148
|
warehouse: str,
|
|
142
|
-
workspace: Optional[Union[str,
|
|
149
|
+
workspace: Optional[Union[str, UUID]] = None,
|
|
143
150
|
timeout: Optional[int] = None,
|
|
144
151
|
):
|
|
145
152
|
super().__init__(
|
|
@@ -154,7 +161,7 @@ class ConnectLakehouse(ConnectBase):
|
|
|
154
161
|
def __init__(
|
|
155
162
|
self,
|
|
156
163
|
lakehouse: str,
|
|
157
|
-
workspace: Optional[Union[str,
|
|
164
|
+
workspace: Optional[Union[str, UUID]] = None,
|
|
158
165
|
timeout: Optional[int] = None,
|
|
159
166
|
):
|
|
160
167
|
super().__init__(
|
sempy_labs/_translations.py
CHANGED
|
@@ -3,29 +3,32 @@ import pandas as pd
|
|
|
3
3
|
from typing import List, Optional, Union
|
|
4
4
|
from sempy._utils._log import log
|
|
5
5
|
import sempy_labs._icons as icons
|
|
6
|
-
from sempy_labs._helper_functions import
|
|
6
|
+
from sempy_labs._helper_functions import (
|
|
7
|
+
get_language_codes,
|
|
8
|
+
)
|
|
9
|
+
from uuid import UUID
|
|
7
10
|
|
|
8
11
|
|
|
9
12
|
@log
|
|
10
13
|
def translate_semantic_model(
|
|
11
|
-
dataset: str,
|
|
14
|
+
dataset: str | UUID,
|
|
12
15
|
languages: Union[str, List[str]],
|
|
13
16
|
exclude_characters: Optional[str] = None,
|
|
14
|
-
workspace: Optional[str] = None,
|
|
17
|
+
workspace: Optional[str | UUID] = None,
|
|
15
18
|
) -> pd.DataFrame:
|
|
16
19
|
"""
|
|
17
20
|
Translates names, descriptions, display folders for all objects in a semantic model.
|
|
18
21
|
|
|
19
22
|
Parameters
|
|
20
23
|
----------
|
|
21
|
-
dataset : str
|
|
22
|
-
Name of the semantic model.
|
|
24
|
+
dataset : str | uuid.UUID
|
|
25
|
+
Name or ID of the semantic model.
|
|
23
26
|
languages : str, List[str]
|
|
24
27
|
The language code(s) in which to translate the semantic model.
|
|
25
28
|
exclude_characters : str
|
|
26
29
|
A string specifying characters which will be replaced by a space in the translation text when sent to the translation service.
|
|
27
|
-
workspace : str, default=None
|
|
28
|
-
The Fabric workspace name.
|
|
30
|
+
workspace : str | uuid.UUID, default=None
|
|
31
|
+
The Fabric workspace name or ID.
|
|
29
32
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
30
33
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
31
34
|
|
sempy_labs/_vertipaq.py
CHANGED
|
@@ -10,10 +10,11 @@ from pyspark.sql import SparkSession
|
|
|
10
10
|
from sempy_labs._helper_functions import (
|
|
11
11
|
format_dax_object_name,
|
|
12
12
|
resolve_lakehouse_name,
|
|
13
|
-
resolve_dataset_id,
|
|
14
13
|
save_as_delta_table,
|
|
15
14
|
resolve_workspace_capacity,
|
|
16
|
-
|
|
15
|
+
_get_column_aggregate,
|
|
16
|
+
resolve_workspace_name_and_id,
|
|
17
|
+
resolve_dataset_name_and_id,
|
|
17
18
|
)
|
|
18
19
|
from sempy_labs._list_functions import list_relationships, list_tables
|
|
19
20
|
from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
|
|
@@ -22,12 +23,13 @@ from typing import Optional
|
|
|
22
23
|
from sempy._utils._log import log
|
|
23
24
|
import sempy_labs._icons as icons
|
|
24
25
|
from pathlib import Path
|
|
26
|
+
from uuid import UUID
|
|
25
27
|
|
|
26
28
|
|
|
27
29
|
@log
|
|
28
30
|
def vertipaq_analyzer(
|
|
29
|
-
dataset: str,
|
|
30
|
-
workspace: Optional[str] = None,
|
|
31
|
+
dataset: str | UUID,
|
|
32
|
+
workspace: Optional[str | UUID] = None,
|
|
31
33
|
export: Optional[str] = None,
|
|
32
34
|
read_stats_from_data: bool = False,
|
|
33
35
|
**kwargs,
|
|
@@ -37,10 +39,10 @@ def vertipaq_analyzer(
|
|
|
37
39
|
|
|
38
40
|
Parameters
|
|
39
41
|
----------
|
|
40
|
-
dataset : str
|
|
41
|
-
Name of the semantic model.
|
|
42
|
-
workspace : str, default=None
|
|
43
|
-
The Fabric workspace name in which the semantic model exists.
|
|
42
|
+
dataset : str | uuid.UUID
|
|
43
|
+
Name or ID of the semantic model.
|
|
44
|
+
workspace : str| uuid.UUID, default=None
|
|
45
|
+
The Fabric workspace name or ID in which the semantic model exists.
|
|
44
46
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
45
47
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
46
48
|
export : str, default=None
|
|
@@ -49,10 +51,6 @@ def vertipaq_analyzer(
|
|
|
49
51
|
Default value: None.
|
|
50
52
|
read_stats_from_data : bool, default=False
|
|
51
53
|
Setting this parameter to true has the function get Column Cardinality and Missing Rows using DAX (Direct Lake semantic models achieve this using a Spark query to the lakehouse).
|
|
52
|
-
|
|
53
|
-
Returns
|
|
54
|
-
-------
|
|
55
|
-
|
|
56
54
|
"""
|
|
57
55
|
|
|
58
56
|
from sempy_labs.tom import connect_semantic_model
|
|
@@ -68,7 +66,8 @@ def vertipaq_analyzer(
|
|
|
68
66
|
"ignore", message="createDataFrame attempted Arrow optimization*"
|
|
69
67
|
)
|
|
70
68
|
|
|
71
|
-
|
|
69
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
70
|
+
(dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
|
|
72
71
|
|
|
73
72
|
vertipaq_map = {
|
|
74
73
|
"Model": {
|
|
@@ -135,7 +134,7 @@ def vertipaq_analyzer(
|
|
|
135
134
|
}
|
|
136
135
|
|
|
137
136
|
with connect_semantic_model(
|
|
138
|
-
dataset=
|
|
137
|
+
dataset=dataset_id, workspace=workspace_id, readonly=True
|
|
139
138
|
) as tom:
|
|
140
139
|
compat_level = tom.model.Model.Database.CompatibilityLevel
|
|
141
140
|
is_direct_lake = tom.is_direct_lake()
|
|
@@ -144,24 +143,28 @@ def vertipaq_analyzer(
|
|
|
144
143
|
column_count = len(list(tom.all_columns()))
|
|
145
144
|
if table_count == 0:
|
|
146
145
|
print(
|
|
147
|
-
f"{icons.warning} The '{
|
|
146
|
+
f"{icons.warning} The '{dataset_name}' semantic model within the '{workspace_name}' workspace has no tables. Vertipaq Analyzer can only be run if the semantic model has tables."
|
|
148
147
|
)
|
|
149
148
|
return
|
|
150
149
|
|
|
151
|
-
dfT = list_tables(dataset=
|
|
150
|
+
dfT = list_tables(dataset=dataset_id, extended=True, workspace=workspace_id)
|
|
152
151
|
|
|
153
152
|
dfT.rename(columns={"Name": "Table Name"}, inplace=True)
|
|
154
153
|
columns_to_keep = list(vertipaq_map["Tables"].keys())
|
|
155
154
|
dfT = dfT[dfT.columns.intersection(columns_to_keep)]
|
|
156
155
|
|
|
157
|
-
dfC = fabric.list_columns(dataset=
|
|
156
|
+
dfC = fabric.list_columns(dataset=dataset_id, extended=True, workspace=workspace_id)
|
|
158
157
|
dfC["Column Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"])
|
|
159
158
|
dfC.rename(columns={"Column Cardinality": "Cardinality"}, inplace=True)
|
|
160
|
-
dfH = fabric.list_hierarchies(
|
|
161
|
-
|
|
162
|
-
|
|
159
|
+
dfH = fabric.list_hierarchies(
|
|
160
|
+
dataset=dataset_id, extended=True, workspace=workspace_id
|
|
161
|
+
)
|
|
162
|
+
dfR = list_relationships(dataset=dataset_id, extended=True, workspace=workspace_id)
|
|
163
|
+
dfP = fabric.list_partitions(
|
|
164
|
+
dataset=dataset_id, extended=True, workspace=workspace_id
|
|
165
|
+
)
|
|
163
166
|
artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
|
|
164
|
-
get_direct_lake_source(dataset=
|
|
167
|
+
get_direct_lake_source(dataset=dataset_id, workspace=workspace_id)
|
|
165
168
|
)
|
|
166
169
|
|
|
167
170
|
dfR["Missing Rows"] = 0
|
|
@@ -308,7 +311,7 @@ def vertipaq_analyzer(
|
|
|
308
311
|
query = f"evaluate\nsummarizecolumns(\n\"1\",calculate(countrows('{fromTable}'),userelationship({fromObject},{toObject}),isblank({toObject}))\n)"
|
|
309
312
|
|
|
310
313
|
result = fabric.evaluate_dax(
|
|
311
|
-
dataset=
|
|
314
|
+
dataset=dataset_id, dax_string=query, workspace=workspace_id
|
|
312
315
|
)
|
|
313
316
|
|
|
314
317
|
try:
|
|
@@ -407,7 +410,7 @@ def vertipaq_analyzer(
|
|
|
407
410
|
|
|
408
411
|
dfModel = pd.DataFrame(
|
|
409
412
|
{
|
|
410
|
-
"Dataset Name":
|
|
413
|
+
"Dataset Name": dataset_name,
|
|
411
414
|
"Total Size": y,
|
|
412
415
|
"Table Count": table_count,
|
|
413
416
|
"Column Count": column_count,
|
|
@@ -516,7 +519,9 @@ def vertipaq_analyzer(
|
|
|
516
519
|
if len(lakeT_filt) == 0:
|
|
517
520
|
runId = 1
|
|
518
521
|
else:
|
|
519
|
-
max_run_id =
|
|
522
|
+
max_run_id = _get_column_aggregate(
|
|
523
|
+
lakehouse=lakehouse, table_name=lakeTName
|
|
524
|
+
)
|
|
520
525
|
runId = max_run_id + 1
|
|
521
526
|
|
|
522
527
|
dfMap = {
|
|
@@ -532,19 +537,19 @@ def vertipaq_analyzer(
|
|
|
532
537
|
f"{icons.in_progress} Saving Vertipaq Analyzer to delta tables in the lakehouse...\n"
|
|
533
538
|
)
|
|
534
539
|
now = datetime.datetime.now()
|
|
535
|
-
dfD = fabric.list_datasets(workspace=
|
|
536
|
-
dfD_filt = dfD[dfD["Dataset
|
|
540
|
+
dfD = fabric.list_datasets(workspace=workspace_id, mode="rest")
|
|
541
|
+
dfD_filt = dfD[dfD["Dataset Id"] == dataset_id]
|
|
537
542
|
configured_by = dfD_filt["Configured By"].iloc[0]
|
|
538
|
-
capacity_id, capacity_name = resolve_workspace_capacity(workspace=
|
|
543
|
+
capacity_id, capacity_name = resolve_workspace_capacity(workspace=workspace_id)
|
|
539
544
|
|
|
540
545
|
for key_name, (obj, df) in dfMap.items():
|
|
541
546
|
df["Capacity Name"] = capacity_name
|
|
542
547
|
df["Capacity Id"] = capacity_id
|
|
543
548
|
df["Configured By"] = configured_by
|
|
544
|
-
df["Workspace Name"] =
|
|
545
|
-
df["Workspace Id"] =
|
|
546
|
-
df["Dataset Name"] =
|
|
547
|
-
df["Dataset Id"] =
|
|
549
|
+
df["Workspace Name"] = workspace_name
|
|
550
|
+
df["Workspace Id"] = workspace_id
|
|
551
|
+
df["Dataset Name"] = dataset_name
|
|
552
|
+
df["Dataset Id"] = dataset_id
|
|
548
553
|
df["RunId"] = runId
|
|
549
554
|
df["Timestamp"] = now
|
|
550
555
|
|
|
@@ -605,7 +610,7 @@ def vertipaq_analyzer(
|
|
|
605
610
|
"dfH_filt": dfH_filt,
|
|
606
611
|
}
|
|
607
612
|
|
|
608
|
-
zipFileName = f"{
|
|
613
|
+
zipFileName = f"{workspace_name}.{dataset_name}.zip"
|
|
609
614
|
|
|
610
615
|
folderPath = "/lakehouse/default/Files"
|
|
611
616
|
subFolderPath = os.path.join(folderPath, "VertipaqAnalyzer")
|
|
@@ -631,7 +636,7 @@ def vertipaq_analyzer(
|
|
|
631
636
|
if os.path.exists(filePath):
|
|
632
637
|
os.remove(filePath)
|
|
633
638
|
print(
|
|
634
|
-
f"{icons.green_dot} The Vertipaq Analyzer info for the '{
|
|
639
|
+
f"{icons.green_dot} The Vertipaq Analyzer info for the '{dataset_name}' semantic model in the '{workspace_name}' workspace has been saved "
|
|
635
640
|
f"to the 'Vertipaq Analyzer/{zipFileName}' in the default lakehouse attached to this notebook."
|
|
636
641
|
)
|
|
637
642
|
|
sempy_labs/_warehouses.py
CHANGED
|
@@ -8,13 +8,14 @@ import pandas as pd
|
|
|
8
8
|
from typing import Optional
|
|
9
9
|
import sempy_labs._icons as icons
|
|
10
10
|
from sempy.fabric.exceptions import FabricHTTPException
|
|
11
|
+
from uuid import UUID
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
def create_warehouse(
|
|
14
15
|
warehouse: str,
|
|
15
16
|
description: Optional[str] = None,
|
|
16
17
|
case_insensitive_collation: bool = False,
|
|
17
|
-
workspace: Optional[str] = None,
|
|
18
|
+
workspace: Optional[str | UUID] = None,
|
|
18
19
|
):
|
|
19
20
|
"""
|
|
20
21
|
Creates a Fabric warehouse.
|
|
@@ -29,13 +30,13 @@ def create_warehouse(
|
|
|
29
30
|
A description of the warehouse.
|
|
30
31
|
case_insensitive_collation: bool, default=False
|
|
31
32
|
If True, creates the warehouse with case-insensitive collation.
|
|
32
|
-
workspace : str, default=None
|
|
33
|
-
The Fabric workspace name.
|
|
33
|
+
workspace : str | uuid.UUID, default=None
|
|
34
|
+
The Fabric workspace name or ID.
|
|
34
35
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
35
36
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
36
37
|
"""
|
|
37
38
|
|
|
38
|
-
(
|
|
39
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
39
40
|
|
|
40
41
|
request_body = {"displayName": warehouse}
|
|
41
42
|
|
|
@@ -55,11 +56,11 @@ def create_warehouse(
|
|
|
55
56
|
lro(client, response, status_codes=[201, 202])
|
|
56
57
|
|
|
57
58
|
print(
|
|
58
|
-
f"{icons.green_dot} The '{warehouse}' warehouse has been created within the '{
|
|
59
|
+
f"{icons.green_dot} The '{warehouse}' warehouse has been created within the '{workspace_name}' workspace."
|
|
59
60
|
)
|
|
60
61
|
|
|
61
62
|
|
|
62
|
-
def list_warehouses(workspace: Optional[str] = None) -> pd.DataFrame:
|
|
63
|
+
def list_warehouses(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
|
|
63
64
|
"""
|
|
64
65
|
Shows the warehouses within a workspace.
|
|
65
66
|
|
|
@@ -67,8 +68,8 @@ def list_warehouses(workspace: Optional[str] = None) -> pd.DataFrame:
|
|
|
67
68
|
|
|
68
69
|
Parameters
|
|
69
70
|
----------
|
|
70
|
-
workspace : str, default=None
|
|
71
|
-
The Fabric workspace name.
|
|
71
|
+
workspace : str | uuid.UUID, default=None
|
|
72
|
+
The Fabric workspace name or ID.
|
|
72
73
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
73
74
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
74
75
|
|
|
@@ -89,7 +90,7 @@ def list_warehouses(workspace: Optional[str] = None) -> pd.DataFrame:
|
|
|
89
90
|
]
|
|
90
91
|
)
|
|
91
92
|
|
|
92
|
-
(
|
|
93
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
93
94
|
|
|
94
95
|
client = fabric.FabricRestClient()
|
|
95
96
|
response = client.get(f"/v1/workspaces/{workspace_id}/warehouses")
|
|
@@ -115,7 +116,7 @@ def list_warehouses(workspace: Optional[str] = None) -> pd.DataFrame:
|
|
|
115
116
|
return df
|
|
116
117
|
|
|
117
118
|
|
|
118
|
-
def delete_warehouse(name: str, workspace: Optional[str] = None):
|
|
119
|
+
def delete_warehouse(name: str, workspace: Optional[str | UUID] = None):
|
|
119
120
|
"""
|
|
120
121
|
Deletes a Fabric warehouse.
|
|
121
122
|
|
|
@@ -125,16 +126,16 @@ def delete_warehouse(name: str, workspace: Optional[str] = None):
|
|
|
125
126
|
----------
|
|
126
127
|
name: str
|
|
127
128
|
Name of the warehouse.
|
|
128
|
-
workspace : str, default=None
|
|
129
|
-
The Fabric workspace name.
|
|
129
|
+
workspace : str | uuid.UUID, default=None
|
|
130
|
+
The Fabric workspace name or ID.
|
|
130
131
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
131
132
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
132
133
|
"""
|
|
133
134
|
|
|
134
|
-
(
|
|
135
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
135
136
|
|
|
136
137
|
item_id = fabric.resolve_item_id(
|
|
137
|
-
item_name=name, type="Warehouse", workspace=
|
|
138
|
+
item_name=name, type="Warehouse", workspace=workspace_id
|
|
138
139
|
)
|
|
139
140
|
|
|
140
141
|
client = fabric.FabricRestClient()
|
|
@@ -144,22 +145,22 @@ def delete_warehouse(name: str, workspace: Optional[str] = None):
|
|
|
144
145
|
raise FabricHTTPException(response)
|
|
145
146
|
|
|
146
147
|
print(
|
|
147
|
-
f"{icons.green_dot} The '{name}' warehouse within the '{
|
|
148
|
+
f"{icons.green_dot} The '{name}' warehouse within the '{workspace_name}' workspace has been deleted."
|
|
148
149
|
)
|
|
149
150
|
|
|
150
151
|
|
|
151
152
|
def get_warehouse_tables(
|
|
152
|
-
warehouse: str, workspace: Optional[str] = None
|
|
153
|
+
warehouse: str | UUID, workspace: Optional[str | UUID] = None
|
|
153
154
|
) -> pd.DataFrame:
|
|
154
155
|
"""
|
|
155
156
|
Shows a list of the tables in the Fabric warehouse. This function is based on INFORMATION_SCHEMA.TABLES.
|
|
156
157
|
|
|
157
158
|
Parameters
|
|
158
159
|
----------
|
|
159
|
-
warehouse : str
|
|
160
|
-
Name of the Fabric warehouse.
|
|
161
|
-
workspace : str, default=None
|
|
162
|
-
The Fabric workspace name.
|
|
160
|
+
warehouse : str | uuid.UUID
|
|
161
|
+
Name or ID of the Fabric warehouse.
|
|
162
|
+
workspace : str | uuid.UUID, default=None
|
|
163
|
+
The Fabric workspace name or ID.
|
|
163
164
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
164
165
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
165
166
|
|
|
@@ -184,17 +185,17 @@ def get_warehouse_tables(
|
|
|
184
185
|
|
|
185
186
|
|
|
186
187
|
def get_warehouse_columns(
|
|
187
|
-
warehouse: str, workspace: Optional[str] = None
|
|
188
|
+
warehouse: str | UUID, workspace: Optional[str | UUID] = None
|
|
188
189
|
) -> pd.DataFrame:
|
|
189
190
|
"""
|
|
190
191
|
Shows a list of the columns in each table within the Fabric warehouse. This function is based on INFORMATION_SCHEMA.COLUMNS.
|
|
191
192
|
|
|
192
193
|
Parameters
|
|
193
194
|
----------
|
|
194
|
-
warehouse : str
|
|
195
|
-
Name of the Fabric warehouse.
|
|
196
|
-
workspace : str, default=None
|
|
197
|
-
The Fabric workspace name.
|
|
195
|
+
warehouse : str | uuid.UUID
|
|
196
|
+
Name or ID of the Fabric warehouse.
|
|
197
|
+
workspace : str | uuid.UUID, default=None
|
|
198
|
+
The Fabric workspace name or ID.
|
|
198
199
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
199
200
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
200
201
|
|