semantic-link-labs 0.8.11__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.8.11.dist-info → semantic_link_labs-0.9.1.dist-info}/METADATA +9 -6
- {semantic_link_labs-0.8.11.dist-info → semantic_link_labs-0.9.1.dist-info}/RECORD +40 -40
- {semantic_link_labs-0.8.11.dist-info → semantic_link_labs-0.9.1.dist-info}/WHEEL +1 -1
- sempy_labs/__init__.py +29 -2
- sempy_labs/_authentication.py +78 -4
- sempy_labs/_capacities.py +770 -200
- sempy_labs/_capacity_migration.py +7 -37
- sempy_labs/_clear_cache.py +8 -8
- sempy_labs/_deployment_pipelines.py +1 -1
- sempy_labs/_gateways.py +2 -0
- sempy_labs/_generate_semantic_model.py +8 -0
- sempy_labs/_helper_functions.py +119 -79
- sempy_labs/_job_scheduler.py +138 -3
- sempy_labs/_list_functions.py +40 -31
- sempy_labs/_model_bpa.py +207 -204
- sempy_labs/_model_bpa_bulk.py +2 -2
- sempy_labs/_model_bpa_rules.py +3 -3
- sempy_labs/_notebooks.py +2 -0
- sempy_labs/_query_scale_out.py +8 -0
- sempy_labs/_sql.py +11 -7
- sempy_labs/_vertipaq.py +4 -2
- sempy_labs/_warehouses.py +6 -6
- sempy_labs/admin/_basic_functions.py +156 -103
- sempy_labs/admin/_domains.py +7 -2
- sempy_labs/admin/_git.py +4 -1
- sempy_labs/admin/_items.py +7 -2
- sempy_labs/admin/_scanner.py +7 -4
- sempy_labs/directlake/_directlake_schema_compare.py +7 -2
- sempy_labs/directlake/_directlake_schema_sync.py +6 -0
- sempy_labs/directlake/_dl_helper.py +51 -31
- sempy_labs/directlake/_get_directlake_lakehouse.py +20 -27
- sempy_labs/directlake/_update_directlake_partition_entity.py +5 -0
- sempy_labs/lakehouse/_get_lakehouse_columns.py +17 -22
- sempy_labs/lakehouse/_get_lakehouse_tables.py +20 -32
- sempy_labs/lakehouse/_lakehouse.py +2 -19
- sempy_labs/report/_generate_report.py +45 -0
- sempy_labs/report/_report_bpa.py +2 -2
- sempy_labs/tom/_model.py +97 -16
- {semantic_link_labs-0.8.11.dist-info → semantic_link_labs-0.9.1.dist-info}/LICENSE +0 -0
- {semantic_link_labs-0.8.11.dist-info → semantic_link_labs-0.9.1.dist-info}/top_level.txt +0 -0
|
@@ -138,14 +138,11 @@ def migrate_workspaces(
|
|
|
138
138
|
@log
|
|
139
139
|
def migrate_capacities(
|
|
140
140
|
azure_subscription_id: str,
|
|
141
|
-
key_vault_uri: str,
|
|
142
|
-
key_vault_tenant_id: str,
|
|
143
|
-
key_vault_client_id: str,
|
|
144
|
-
key_vault_client_secret: str,
|
|
145
141
|
resource_group: str | dict,
|
|
146
142
|
capacities: Optional[str | List[str]] = None,
|
|
147
143
|
use_existing_rg_for_A_sku: bool = True,
|
|
148
144
|
p_sku_only: bool = True,
|
|
145
|
+
**kwargs,
|
|
149
146
|
):
|
|
150
147
|
"""
|
|
151
148
|
This function creates new Fabric capacities for given A or P sku capacities and reassigns their workspaces to the newly created capacity.
|
|
@@ -154,14 +151,6 @@ def migrate_capacities(
|
|
|
154
151
|
----------
|
|
155
152
|
azure_subscription_id : str
|
|
156
153
|
The Azure subscription ID.
|
|
157
|
-
key_vault_uri : str
|
|
158
|
-
The name of the `Azure key vault <https://azure.microsoft.com/products/key-vault>`_ URI. Example: "https://<Key Vault Name>.vault.azure.net/"
|
|
159
|
-
key_vault_tenant_id : str
|
|
160
|
-
The name of the Azure key vault secret storing the Tenant ID.
|
|
161
|
-
key_vault_client_id : str
|
|
162
|
-
The name of the Azure key vault secret storing the Client ID.
|
|
163
|
-
key_vault_client_secret : str
|
|
164
|
-
The name of the Azure key vault secret storing the Client Secret.
|
|
165
154
|
resource_group : str | dict
|
|
166
155
|
The name of the Azure resource group.
|
|
167
156
|
For A skus, this parameter will be ignored and the resource group used for the F sku will be the same as the A sku's resource group.
|
|
@@ -233,10 +222,6 @@ def migrate_capacities(
|
|
|
233
222
|
create_fabric_capacity(
|
|
234
223
|
capacity_name=tgt_capacity,
|
|
235
224
|
azure_subscription_id=azure_subscription_id,
|
|
236
|
-
key_vault_uri=key_vault_uri,
|
|
237
|
-
key_vault_tenant_id=key_vault_tenant_id,
|
|
238
|
-
key_vault_client_id=key_vault_client_id,
|
|
239
|
-
key_vault_client_secret=key_vault_client_secret,
|
|
240
225
|
resource_group=rg,
|
|
241
226
|
region=region,
|
|
242
227
|
sku=icons.sku_mapping.get(sku_size),
|
|
@@ -248,7 +233,7 @@ def migrate_capacities(
|
|
|
248
233
|
)
|
|
249
234
|
|
|
250
235
|
# Migrate settings to new capacity
|
|
251
|
-
migrate_settings(source_capacity=cap_name, target_capacity=tgt_capacity)
|
|
236
|
+
# migrate_settings(source_capacity=cap_name, target_capacity=tgt_capacity)
|
|
252
237
|
|
|
253
238
|
|
|
254
239
|
@log
|
|
@@ -624,15 +609,12 @@ def migrate_spark_settings(source_capacity: str, target_capacity: str):
|
|
|
624
609
|
@log
|
|
625
610
|
def migrate_fabric_trial_capacity(
|
|
626
611
|
azure_subscription_id: str,
|
|
627
|
-
key_vault_uri: str,
|
|
628
|
-
key_vault_tenant_id: str,
|
|
629
|
-
key_vault_client_id: str,
|
|
630
|
-
key_vault_client_secret: str,
|
|
631
612
|
resource_group: str,
|
|
632
613
|
source_capacity: str,
|
|
633
614
|
target_capacity: str,
|
|
634
615
|
target_capacity_sku: str = "F64",
|
|
635
616
|
target_capacity_admin_members: Optional[str | List[str]] = None,
|
|
617
|
+
**kwargs,
|
|
636
618
|
):
|
|
637
619
|
"""
|
|
638
620
|
This function migrates a Fabric trial capacity to a Fabric capacity. If the 'target_capacity' does not exist, it is created with the relevant target capacity parameters (sku, region, admin members).
|
|
@@ -641,14 +623,6 @@ def migrate_fabric_trial_capacity(
|
|
|
641
623
|
----------
|
|
642
624
|
azure_subscription_id : str
|
|
643
625
|
The Azure subscription ID.
|
|
644
|
-
key_vault_uri : str
|
|
645
|
-
The name of the `Azure key vault <https://azure.microsoft.com/products/key-vault>`_ URI. Example: "https://<Key Vault Name>.vault.azure.net/"
|
|
646
|
-
key_vault_tenant_id : str
|
|
647
|
-
The name of the Azure key vault secret storing the Tenant ID.
|
|
648
|
-
key_vault_client_id : str
|
|
649
|
-
The name of the Azure key vault secret storing the Client ID.
|
|
650
|
-
key_vault_client_secret : str
|
|
651
|
-
The name of the Azure key vault secret storing the Client Secret.
|
|
652
626
|
resource_group : str
|
|
653
627
|
The name of the Azure resource group.
|
|
654
628
|
source_capacity : str
|
|
@@ -701,10 +675,6 @@ def migrate_fabric_trial_capacity(
|
|
|
701
675
|
create_fabric_capacity(
|
|
702
676
|
capacity_name=target_capacity,
|
|
703
677
|
azure_subscription_id=azure_subscription_id,
|
|
704
|
-
key_vault_uri=key_vault_uri,
|
|
705
|
-
key_vault_tenant_id=key_vault_tenant_id,
|
|
706
|
-
key_vault_client_id=key_vault_client_id,
|
|
707
|
-
key_vault_client_secret=key_vault_client_secret,
|
|
708
678
|
resource_group=resource_group,
|
|
709
679
|
region=target_capacity_region,
|
|
710
680
|
admin_members=target_capacity_admin_members,
|
|
@@ -718,7 +688,7 @@ def migrate_fabric_trial_capacity(
|
|
|
718
688
|
)
|
|
719
689
|
|
|
720
690
|
# This migrates all the capacity settings
|
|
721
|
-
migrate_settings(
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
)
|
|
691
|
+
# migrate_settings(
|
|
692
|
+
# source_capacity=source_capacity,
|
|
693
|
+
# target_capacity=target_capacity,
|
|
694
|
+
# )
|
sempy_labs/_clear_cache.py
CHANGED
|
@@ -13,6 +13,7 @@ from sempy.fabric.exceptions import FabricHTTPException
|
|
|
13
13
|
from uuid import UUID
|
|
14
14
|
|
|
15
15
|
|
|
16
|
+
@log
|
|
16
17
|
def clear_cache(dataset: str | UUID, workspace: Optional[str | UUID] = None):
|
|
17
18
|
"""
|
|
18
19
|
Clears the cache of a semantic model.
|
|
@@ -105,7 +106,7 @@ def backup_semantic_model(
|
|
|
105
106
|
|
|
106
107
|
@log
|
|
107
108
|
def restore_semantic_model(
|
|
108
|
-
dataset: str
|
|
109
|
+
dataset: str,
|
|
109
110
|
file_path: str,
|
|
110
111
|
allow_overwrite: bool = True,
|
|
111
112
|
ignore_incompatibilities: bool = True,
|
|
@@ -118,8 +119,8 @@ def restore_semantic_model(
|
|
|
118
119
|
|
|
119
120
|
Parameters
|
|
120
121
|
----------
|
|
121
|
-
dataset : str
|
|
122
|
-
Name
|
|
122
|
+
dataset : str
|
|
123
|
+
Name of the semantic model.
|
|
123
124
|
file_path : str
|
|
124
125
|
The location in which to backup the semantic model. Must end in '.abf'.
|
|
125
126
|
Example 1: file_path = 'MyModel.abf'
|
|
@@ -142,11 +143,10 @@ def restore_semantic_model(
|
|
|
142
143
|
)
|
|
143
144
|
|
|
144
145
|
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
145
|
-
(dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
|
|
146
146
|
|
|
147
147
|
tmsl = {
|
|
148
148
|
"restore": {
|
|
149
|
-
"database":
|
|
149
|
+
"database": dataset,
|
|
150
150
|
"file": file_path,
|
|
151
151
|
"allowOverwrite": allow_overwrite,
|
|
152
152
|
"security": "copyAll",
|
|
@@ -160,7 +160,7 @@ def restore_semantic_model(
|
|
|
160
160
|
fabric.execute_tmsl(script=tmsl, workspace=workspace_id)
|
|
161
161
|
|
|
162
162
|
print(
|
|
163
|
-
f"{icons.green_dot} The '{
|
|
163
|
+
f"{icons.green_dot} The '{dataset}' semantic model has been restored to the '{workspace_name}' workspace based on the '{file_path}' backup file."
|
|
164
164
|
)
|
|
165
165
|
|
|
166
166
|
|
|
@@ -318,8 +318,8 @@ def list_storage_account_files(
|
|
|
318
318
|
]
|
|
319
319
|
)
|
|
320
320
|
|
|
321
|
-
|
|
322
|
-
fs =
|
|
321
|
+
client = _get_adls_client(storage_account)
|
|
322
|
+
fs = client.get_file_system_client(container)
|
|
323
323
|
|
|
324
324
|
for x in list(fs.get_paths()):
|
|
325
325
|
if not x.is_directory:
|
|
@@ -148,7 +148,7 @@ def list_deployment_pipeline_stage_items(
|
|
|
148
148
|
raise ValueError(
|
|
149
149
|
f"{icons.red_dot} The '{stage_name}' stage does not exist within the '{deployment_pipeline}' deployment pipeline."
|
|
150
150
|
)
|
|
151
|
-
stage_id = dfPS_filt["Deployment Pipeline Stage
|
|
151
|
+
stage_id = dfPS_filt["Deployment Pipeline Stage Id"].iloc[0]
|
|
152
152
|
|
|
153
153
|
client = fabric.FabricRestClient()
|
|
154
154
|
response = client.get(
|
sempy_labs/_gateways.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import sempy.fabric as fabric
|
|
2
|
+
from sempy._utils._log import log
|
|
2
3
|
import pandas as pd
|
|
3
4
|
from typing import Optional
|
|
4
5
|
from sempy.fabric.exceptions import FabricHTTPException
|
|
@@ -13,6 +14,7 @@ from uuid import UUID
|
|
|
13
14
|
import sempy_labs._icons as icons
|
|
14
15
|
|
|
15
16
|
|
|
17
|
+
@log
|
|
16
18
|
def list_gateways() -> pd.DataFrame:
|
|
17
19
|
"""
|
|
18
20
|
Returns a list of all gateways the user has permission for, including on-premises, on-premises (personal mode), and virtual network gateways.
|
|
@@ -3,6 +3,7 @@ import pandas as pd
|
|
|
3
3
|
import json
|
|
4
4
|
import os
|
|
5
5
|
from typing import Optional, List
|
|
6
|
+
from sempy._utils._log import log
|
|
6
7
|
from sempy_labs._helper_functions import (
|
|
7
8
|
resolve_lakehouse_name,
|
|
8
9
|
resolve_workspace_name_and_id,
|
|
@@ -17,6 +18,7 @@ from sempy_labs._refresh_semantic_model import refresh_semantic_model
|
|
|
17
18
|
from uuid import UUID
|
|
18
19
|
|
|
19
20
|
|
|
21
|
+
@log
|
|
20
22
|
def create_blank_semantic_model(
|
|
21
23
|
dataset: str,
|
|
22
24
|
compatibility_level: int = 1605,
|
|
@@ -117,6 +119,7 @@ def create_blank_semantic_model(
|
|
|
117
119
|
)
|
|
118
120
|
|
|
119
121
|
|
|
122
|
+
@log
|
|
120
123
|
def create_semantic_model_from_bim(
|
|
121
124
|
dataset: str, bim_file: dict, workspace: Optional[str | UUID] = None
|
|
122
125
|
):
|
|
@@ -183,6 +186,7 @@ def create_semantic_model_from_bim(
|
|
|
183
186
|
)
|
|
184
187
|
|
|
185
188
|
|
|
189
|
+
@log
|
|
186
190
|
def update_semantic_model_from_bim(
|
|
187
191
|
dataset: str | UUID, bim_file: dict, workspace: Optional[str | UUID] = None
|
|
188
192
|
):
|
|
@@ -242,6 +246,7 @@ def update_semantic_model_from_bim(
|
|
|
242
246
|
)
|
|
243
247
|
|
|
244
248
|
|
|
249
|
+
@log
|
|
245
250
|
def deploy_semantic_model(
|
|
246
251
|
source_dataset: str,
|
|
247
252
|
source_workspace: Optional[str | UUID] = None,
|
|
@@ -323,6 +328,7 @@ def deploy_semantic_model(
|
|
|
323
328
|
refresh_semantic_model(dataset=target_dataset, workspace=target_workspace_id)
|
|
324
329
|
|
|
325
330
|
|
|
331
|
+
@log
|
|
326
332
|
def get_semantic_model_bim(
|
|
327
333
|
dataset: str | UUID,
|
|
328
334
|
workspace: Optional[str | UUID] = None,
|
|
@@ -386,6 +392,7 @@ def get_semantic_model_bim(
|
|
|
386
392
|
return bimJson
|
|
387
393
|
|
|
388
394
|
|
|
395
|
+
@log
|
|
389
396
|
def get_semantic_model_definition(
|
|
390
397
|
dataset: str | UUID,
|
|
391
398
|
format: str = "TMSL",
|
|
@@ -454,6 +461,7 @@ def get_semantic_model_definition(
|
|
|
454
461
|
return decoded_parts
|
|
455
462
|
|
|
456
463
|
|
|
464
|
+
@log
|
|
457
465
|
def get_semantic_model_size(
|
|
458
466
|
dataset: str | UUID, workspace: Optional[str | UUID] = None
|
|
459
467
|
):
|
sempy_labs/_helper_functions.py
CHANGED
|
@@ -11,8 +11,8 @@ import datetime
|
|
|
11
11
|
from typing import Optional, Tuple, List
|
|
12
12
|
from uuid import UUID
|
|
13
13
|
import sempy_labs._icons as icons
|
|
14
|
-
import urllib.parse
|
|
15
14
|
from azure.core.credentials import TokenCredential, AccessToken
|
|
15
|
+
import urllib.parse
|
|
16
16
|
import numpy as np
|
|
17
17
|
from IPython.display import display, HTML
|
|
18
18
|
|
|
@@ -52,6 +52,24 @@ def create_abfss_path(
|
|
|
52
52
|
return f"abfss://{lakehouse_workspace_id}@onelake.dfs.fabric.microsoft.com/{lakehouse_id}/Tables/{delta_table_name}"
|
|
53
53
|
|
|
54
54
|
|
|
55
|
+
def _get_default_file_path() -> str:
|
|
56
|
+
|
|
57
|
+
default_file_storage = _get_fabric_context_setting(name="fs.defaultFS")
|
|
58
|
+
|
|
59
|
+
return default_file_storage.split("@")[-1][:-1]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _split_abfss_path(path: str) -> Tuple[UUID, UUID, str]:
|
|
63
|
+
|
|
64
|
+
parsed_url = urllib.parse.urlparse(path)
|
|
65
|
+
|
|
66
|
+
workspace_id = parsed_url.netloc.split("@")[0]
|
|
67
|
+
item_id = parsed_url.path.lstrip("/").split("/")[0]
|
|
68
|
+
delta_table_name = parsed_url.path.split("/")[-1]
|
|
69
|
+
|
|
70
|
+
return workspace_id, item_id, delta_table_name
|
|
71
|
+
|
|
72
|
+
|
|
55
73
|
def format_dax_object_name(table: str, column: str) -> str:
|
|
56
74
|
"""
|
|
57
75
|
Formats a table/column combination to the 'Table Name'[Column Name] format.
|
|
@@ -172,23 +190,40 @@ def resolve_item_name_and_id(
|
|
|
172
190
|
return item_name, item_id
|
|
173
191
|
|
|
174
192
|
|
|
175
|
-
def
|
|
176
|
-
|
|
193
|
+
def resolve_lakehouse_name_and_id(
|
|
194
|
+
lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
|
|
177
195
|
) -> Tuple[str, UUID]:
|
|
178
196
|
|
|
179
197
|
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
198
|
+
type = "Lakehouse"
|
|
180
199
|
|
|
181
|
-
if
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
item_id=
|
|
200
|
+
if lakehouse is None:
|
|
201
|
+
lakehouse_id = fabric.get_lakehouse_id()
|
|
202
|
+
lakehouse_name = fabric.resolve_item_name(
|
|
203
|
+
item_id=lakehouse_id, type=type, workspace=workspace_id
|
|
204
|
+
)
|
|
205
|
+
elif _is_valid_uuid(lakehouse):
|
|
206
|
+
lakehouse_id = lakehouse
|
|
207
|
+
lakehouse_name = fabric.resolve_item_name(
|
|
208
|
+
item_id=lakehouse_id, type=type, workspace=workspace_id
|
|
185
209
|
)
|
|
186
210
|
else:
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
item_name=
|
|
211
|
+
lakehouse_name = lakehouse
|
|
212
|
+
lakehouse_id = fabric.resolve_item_id(
|
|
213
|
+
item_name=lakehouse, type=type, workspace=workspace_id
|
|
190
214
|
)
|
|
191
215
|
|
|
216
|
+
return lakehouse_name, lakehouse_id
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def resolve_dataset_name_and_id(
|
|
220
|
+
dataset: str | UUID, workspace: Optional[str | UUID] = None
|
|
221
|
+
) -> Tuple[str, UUID]:
|
|
222
|
+
|
|
223
|
+
(dataset_name, dataset_id) = resolve_item_name_and_id(
|
|
224
|
+
item=dataset, type="SemanticModel", workspace=workspace
|
|
225
|
+
)
|
|
226
|
+
|
|
192
227
|
return dataset_name, dataset_id
|
|
193
228
|
|
|
194
229
|
|
|
@@ -280,15 +315,15 @@ def resolve_lakehouse_name(
|
|
|
280
315
|
|
|
281
316
|
|
|
282
317
|
def resolve_lakehouse_id(
|
|
283
|
-
lakehouse: str, workspace: Optional[str | UUID] = None
|
|
318
|
+
lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
|
|
284
319
|
) -> UUID:
|
|
285
320
|
"""
|
|
286
321
|
Obtains the ID of the Fabric lakehouse.
|
|
287
322
|
|
|
288
323
|
Parameters
|
|
289
324
|
----------
|
|
290
|
-
lakehouse : str
|
|
291
|
-
The name of the Fabric lakehouse.
|
|
325
|
+
lakehouse : str | uuid.UUID, default=None
|
|
326
|
+
The name or ID of the Fabric lakehouse.
|
|
292
327
|
workspace : str | uuid.UUID, default=None
|
|
293
328
|
The Fabric workspace name or ID.
|
|
294
329
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
@@ -300,9 +335,16 @@ def resolve_lakehouse_id(
|
|
|
300
335
|
The ID of the Fabric lakehouse.
|
|
301
336
|
"""
|
|
302
337
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
)
|
|
338
|
+
if lakehouse is None:
|
|
339
|
+
lakehouse_id = fabric.get_lakehouse_id()
|
|
340
|
+
elif _is_valid_uuid(lakehouse):
|
|
341
|
+
lakehouse_id = lakehouse
|
|
342
|
+
else:
|
|
343
|
+
lakehouse_id = fabric.resolve_item_id(
|
|
344
|
+
item_name=lakehouse, type="Lakehouse", workspace=workspace
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
return lakehouse_id
|
|
306
348
|
|
|
307
349
|
|
|
308
350
|
def get_direct_lake_sql_endpoint(
|
|
@@ -328,9 +370,6 @@ def get_direct_lake_sql_endpoint(
|
|
|
328
370
|
|
|
329
371
|
from sempy_labs.tom import connect_semantic_model
|
|
330
372
|
|
|
331
|
-
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
332
|
-
(dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
|
|
333
|
-
|
|
334
373
|
# dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
|
|
335
374
|
# dfP_filt = dfP[dfP["Mode"] == "DirectLake"]
|
|
336
375
|
|
|
@@ -340,7 +379,7 @@ def get_direct_lake_sql_endpoint(
|
|
|
340
379
|
# )
|
|
341
380
|
|
|
342
381
|
with connect_semantic_model(
|
|
343
|
-
dataset=
|
|
382
|
+
dataset=dataset, readonly=True, workspace=workspace
|
|
344
383
|
) as tom:
|
|
345
384
|
sqlEndpointId = None
|
|
346
385
|
for e in tom.model.Expressions:
|
|
@@ -426,7 +465,7 @@ def save_as_delta_table(
|
|
|
426
465
|
write_mode: str,
|
|
427
466
|
merge_schema: bool = False,
|
|
428
467
|
schema: Optional[dict] = None,
|
|
429
|
-
lakehouse: Optional[str] = None,
|
|
468
|
+
lakehouse: Optional[str | UUID] = None,
|
|
430
469
|
workspace: Optional[str | UUID] = None,
|
|
431
470
|
):
|
|
432
471
|
"""
|
|
@@ -444,8 +483,8 @@ def save_as_delta_table(
|
|
|
444
483
|
Merges the schemas of the dataframe to the delta table.
|
|
445
484
|
schema : dict, default=None
|
|
446
485
|
A dictionary showing the schema of the columns and their data types.
|
|
447
|
-
lakehouse : str, default=None
|
|
448
|
-
The Fabric lakehouse
|
|
486
|
+
lakehouse : str | uuid.UUID, default=None
|
|
487
|
+
The Fabric lakehouse name or ID.
|
|
449
488
|
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
450
489
|
workspace : str | uuid.UUID, default=None
|
|
451
490
|
The Fabric workspace name or ID.
|
|
@@ -468,21 +507,16 @@ def save_as_delta_table(
|
|
|
468
507
|
)
|
|
469
508
|
|
|
470
509
|
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
510
|
+
(lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
|
|
511
|
+
lakehouse=lakehouse, workspace=workspace_id
|
|
512
|
+
)
|
|
471
513
|
|
|
472
|
-
|
|
473
|
-
lakehouse_id = fabric.get_lakehouse_id()
|
|
474
|
-
lakehouse = resolve_lakehouse_name(
|
|
475
|
-
lakehouse_id=lakehouse_id, workspace=workspace_id
|
|
476
|
-
)
|
|
477
|
-
else:
|
|
478
|
-
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
|
|
479
|
-
|
|
480
|
-
writeModes = ["append", "overwrite"]
|
|
514
|
+
write_modes = ["append", "overwrite"]
|
|
481
515
|
write_mode = write_mode.lower()
|
|
482
516
|
|
|
483
|
-
if write_mode not in
|
|
517
|
+
if write_mode not in write_modes:
|
|
484
518
|
raise ValueError(
|
|
485
|
-
f"{icons.red_dot} Invalid 'write_type' parameter. Choose from one of the following values: {
|
|
519
|
+
f"{icons.red_dot} Invalid 'write_type' parameter. Choose from one of the following values: {write_modes}."
|
|
486
520
|
)
|
|
487
521
|
|
|
488
522
|
if " " in delta_table_name:
|
|
@@ -507,16 +541,19 @@ def save_as_delta_table(
|
|
|
507
541
|
"timestamp": TimestampType(),
|
|
508
542
|
}
|
|
509
543
|
|
|
510
|
-
if
|
|
511
|
-
|
|
544
|
+
if isinstance(dataframe, pd.DataFrame):
|
|
545
|
+
if schema is None:
|
|
546
|
+
spark_df = spark.createDataFrame(dataframe)
|
|
547
|
+
else:
|
|
548
|
+
schema_map = StructType(
|
|
549
|
+
[
|
|
550
|
+
StructField(column_name, type_mapping[data_type], True)
|
|
551
|
+
for column_name, data_type in schema.items()
|
|
552
|
+
]
|
|
553
|
+
)
|
|
554
|
+
spark_df = spark.createDataFrame(dataframe, schema_map)
|
|
512
555
|
else:
|
|
513
|
-
|
|
514
|
-
[
|
|
515
|
-
StructField(column_name, type_mapping[data_type], True)
|
|
516
|
-
for column_name, data_type in schema.items()
|
|
517
|
-
]
|
|
518
|
-
)
|
|
519
|
-
spark_df = spark.createDataFrame(dataframe, schema_map)
|
|
556
|
+
spark_df = dataframe
|
|
520
557
|
|
|
521
558
|
filePath = create_abfss_path(
|
|
522
559
|
lakehouse_id=lakehouse_id,
|
|
@@ -531,7 +568,7 @@ def save_as_delta_table(
|
|
|
531
568
|
else:
|
|
532
569
|
spark_df.write.mode(write_mode).format("delta").save(filePath)
|
|
533
570
|
print(
|
|
534
|
-
f"{icons.green_dot} The dataframe has been saved as the '{delta_table_name}' table in the '{
|
|
571
|
+
f"{icons.green_dot} The dataframe has been saved as the '{delta_table_name}' table in the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace."
|
|
535
572
|
)
|
|
536
573
|
|
|
537
574
|
|
|
@@ -621,9 +658,7 @@ def _conv_b64(file):
|
|
|
621
658
|
|
|
622
659
|
def _decode_b64(file, format: Optional[str] = "utf-8"):
|
|
623
660
|
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
return result
|
|
661
|
+
return base64.b64decode(file).decode(format)
|
|
627
662
|
|
|
628
663
|
|
|
629
664
|
def is_default_semantic_model(
|
|
@@ -690,15 +725,15 @@ def resolve_item_type(item_id: UUID, workspace: Optional[str | UUID] = None) ->
|
|
|
690
725
|
|
|
691
726
|
|
|
692
727
|
def resolve_dataset_from_report(
|
|
693
|
-
report: str, workspace: Optional[str | UUID] = None
|
|
728
|
+
report: str | UUID, workspace: Optional[str | UUID] = None
|
|
694
729
|
) -> Tuple[UUID, str, UUID, str]:
|
|
695
730
|
"""
|
|
696
731
|
Obtains the basic semantic model properties from which the report's data is sourced.
|
|
697
732
|
|
|
698
733
|
Parameters
|
|
699
734
|
----------
|
|
700
|
-
report : str
|
|
701
|
-
The name of the Power BI report.
|
|
735
|
+
report : str | uuid.UUID
|
|
736
|
+
The name or ID of the Power BI report.
|
|
702
737
|
workspace : str | uuid.UUID, default=None
|
|
703
738
|
The Fabric workspace name or ID.
|
|
704
739
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
@@ -706,20 +741,15 @@ def resolve_dataset_from_report(
|
|
|
706
741
|
|
|
707
742
|
Returns
|
|
708
743
|
-------
|
|
709
|
-
Tuple[UUID, str, UUID, str]
|
|
744
|
+
Tuple[uuid.UUID, str, uuid.UUID, str]
|
|
710
745
|
The semantic model UUID, semantic model name, semantic model workspace UUID, semantic model workspace name
|
|
711
746
|
"""
|
|
712
747
|
|
|
713
|
-
|
|
748
|
+
from sempy_labs.report._generate_report import _get_report
|
|
714
749
|
|
|
715
|
-
dfR =
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
raise ValueError(
|
|
719
|
-
f"{icons.red_dot} The '{report}' report does not exist within the '{workspace_name}' workspace."
|
|
720
|
-
)
|
|
721
|
-
dataset_id = dfR_filt["Dataset Id"].iloc[0]
|
|
722
|
-
dataset_workspace_id = dfR_filt["Dataset Workspace Id"].iloc[0]
|
|
750
|
+
dfR = _get_report(report=report, workspace=workspace)
|
|
751
|
+
dataset_id = dfR["Dataset Id"].iloc[0]
|
|
752
|
+
dataset_workspace_id = dfR["Dataset Workspace Id"].iloc[0]
|
|
723
753
|
dataset_workspace = fabric.resolve_workspace_name(dataset_workspace_id)
|
|
724
754
|
dataset_name = resolve_dataset_name(
|
|
725
755
|
dataset_id=dataset_id, workspace=dataset_workspace
|
|
@@ -975,7 +1005,7 @@ def resolve_deployment_pipeline_id(deployment_pipeline: str) -> UUID:
|
|
|
975
1005
|
|
|
976
1006
|
Returns
|
|
977
1007
|
-------
|
|
978
|
-
UUID
|
|
1008
|
+
uuid.UUID
|
|
979
1009
|
The deployment pipeline Id.
|
|
980
1010
|
"""
|
|
981
1011
|
|
|
@@ -1024,14 +1054,16 @@ def _get_adls_client(account_name):
|
|
|
1024
1054
|
return service_client
|
|
1025
1055
|
|
|
1026
1056
|
|
|
1027
|
-
def resolve_warehouse_id(
|
|
1057
|
+
def resolve_warehouse_id(
|
|
1058
|
+
warehouse: str | UUID, workspace: Optional[str | UUID]
|
|
1059
|
+
) -> UUID:
|
|
1028
1060
|
"""
|
|
1029
1061
|
Obtains the Id for a given warehouse.
|
|
1030
1062
|
|
|
1031
1063
|
Parameters
|
|
1032
1064
|
----------
|
|
1033
|
-
warehouse : str
|
|
1034
|
-
The warehouse name
|
|
1065
|
+
warehouse : str | uuid.UUID
|
|
1066
|
+
The warehouse name or ID.
|
|
1035
1067
|
workspace : str | uuid.UUID, default=None
|
|
1036
1068
|
The Fabric workspace name or ID in which the semantic model resides.
|
|
1037
1069
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
@@ -1039,13 +1071,16 @@ def resolve_warehouse_id(warehouse: str, workspace: Optional[str | UUID]) -> UUI
|
|
|
1039
1071
|
|
|
1040
1072
|
Returns
|
|
1041
1073
|
-------
|
|
1042
|
-
UUID
|
|
1074
|
+
uuid.UUID
|
|
1043
1075
|
The warehouse Id.
|
|
1044
1076
|
"""
|
|
1045
1077
|
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1078
|
+
if _is_valid_uuid(warehouse):
|
|
1079
|
+
return warehouse
|
|
1080
|
+
else:
|
|
1081
|
+
return fabric.resolve_item_id(
|
|
1082
|
+
item_name=warehouse, type="Warehouse", workspace=workspace
|
|
1083
|
+
)
|
|
1049
1084
|
|
|
1050
1085
|
|
|
1051
1086
|
def get_language_codes(languages: str | List[str]):
|
|
@@ -1188,16 +1223,24 @@ def generate_guid():
|
|
|
1188
1223
|
return str(uuid.uuid4())
|
|
1189
1224
|
|
|
1190
1225
|
|
|
1191
|
-
def
|
|
1226
|
+
def _get_column_aggregate(
|
|
1227
|
+
lakehouse: str,
|
|
1228
|
+
table_name: str,
|
|
1229
|
+
column_name: str = "RunId",
|
|
1230
|
+
function: str = "max",
|
|
1231
|
+
default_value: int = 0,
|
|
1232
|
+
) -> int:
|
|
1192
1233
|
|
|
1193
1234
|
from pyspark.sql import SparkSession
|
|
1194
1235
|
|
|
1195
1236
|
spark = SparkSession.builder.getOrCreate()
|
|
1196
|
-
|
|
1237
|
+
function = function.upper()
|
|
1238
|
+
query = f"SELECT {function}({column_name}) FROM {lakehouse}.{table_name}"
|
|
1239
|
+
if "COUNT" in function and "DISTINCT" in function:
|
|
1240
|
+
query = f"SELECT COUNT(DISTINCT({column_name})) FROM {lakehouse}.{table_name}"
|
|
1197
1241
|
dfSpark = spark.sql(query)
|
|
1198
|
-
max_run_id = dfSpark.collect()[0][0] or 0
|
|
1199
1242
|
|
|
1200
|
-
return
|
|
1243
|
+
return dfSpark.collect()[0][0] or default_value
|
|
1201
1244
|
|
|
1202
1245
|
|
|
1203
1246
|
def _make_list_unique(my_list):
|
|
@@ -1209,20 +1252,17 @@ def _get_partition_map(
|
|
|
1209
1252
|
dataset: str, workspace: Optional[str | UUID] = None
|
|
1210
1253
|
) -> pd.DataFrame:
|
|
1211
1254
|
|
|
1212
|
-
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
1213
|
-
(dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
|
|
1214
|
-
|
|
1215
1255
|
partitions = fabric.evaluate_dax(
|
|
1216
|
-
dataset=
|
|
1217
|
-
workspace=
|
|
1256
|
+
dataset=dataset,
|
|
1257
|
+
workspace=workspace,
|
|
1218
1258
|
dax_string="""
|
|
1219
1259
|
select [ID] AS [PartitionID], [TableID], [Name] AS [PartitionName] from $system.tmschema_partitions
|
|
1220
1260
|
""",
|
|
1221
1261
|
)
|
|
1222
1262
|
|
|
1223
1263
|
tables = fabric.evaluate_dax(
|
|
1224
|
-
dataset=
|
|
1225
|
-
workspace=
|
|
1264
|
+
dataset=dataset,
|
|
1265
|
+
workspace=workspace,
|
|
1226
1266
|
dax_string="""
|
|
1227
1267
|
select [ID] AS [TableID], [Name] AS [TableName] from $system.tmschema_tables
|
|
1228
1268
|
""",
|