semantic-link-labs 0.11.1__py3-none-any.whl → 0.11.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.11.1.dist-info → semantic_link_labs-0.11.3.dist-info}/METADATA +7 -6
- semantic_link_labs-0.11.3.dist-info/RECORD +212 -0
- sempy_labs/__init__.py +65 -71
- sempy_labs/_a_lib_info.py +1 -1
- sempy_labs/_ai.py +1 -1
- sempy_labs/_capacities.py +2 -2
- sempy_labs/_capacity_migration.py +5 -5
- sempy_labs/_clear_cache.py +1 -1
- sempy_labs/_connections.py +2 -2
- sempy_labs/_dashboards.py +16 -16
- sempy_labs/_data_pipelines.py +1 -1
- sempy_labs/_dataflows.py +101 -26
- sempy_labs/_dax.py +3 -3
- sempy_labs/_delta_analyzer.py +4 -4
- sempy_labs/_delta_analyzer_history.py +1 -1
- sempy_labs/_deployment_pipelines.py +1 -1
- sempy_labs/_environments.py +22 -21
- sempy_labs/_eventhouses.py +12 -11
- sempy_labs/_eventstreams.py +12 -11
- sempy_labs/_external_data_shares.py +78 -23
- sempy_labs/_gateways.py +47 -45
- sempy_labs/_generate_semantic_model.py +3 -3
- sempy_labs/_git.py +1 -1
- sempy_labs/_graphQL.py +12 -11
- sempy_labs/_helper_functions.py +169 -5
- sempy_labs/_job_scheduler.py +56 -54
- sempy_labs/_kql_databases.py +16 -17
- sempy_labs/_kql_querysets.py +12 -11
- sempy_labs/_kusto.py +2 -2
- sempy_labs/_labels.py +126 -0
- sempy_labs/_list_functions.py +2 -2
- sempy_labs/_managed_private_endpoints.py +18 -15
- sempy_labs/_mirrored_databases.py +16 -15
- sempy_labs/_mirrored_warehouses.py +12 -11
- sempy_labs/_ml_experiments.py +11 -10
- sempy_labs/_model_auto_build.py +3 -3
- sempy_labs/_model_bpa.py +5 -5
- sempy_labs/_model_bpa_bulk.py +3 -3
- sempy_labs/_model_dependencies.py +1 -1
- sempy_labs/_mounted_data_factories.py +12 -12
- sempy_labs/_notebooks.py +151 -2
- sempy_labs/_one_lake_integration.py +1 -1
- sempy_labs/_query_scale_out.py +1 -1
- sempy_labs/_refresh_semantic_model.py +1 -1
- sempy_labs/_semantic_models.py +30 -28
- sempy_labs/_spark.py +1 -1
- sempy_labs/_sql.py +1 -1
- sempy_labs/_sql_endpoints.py +12 -11
- sempy_labs/_sqldatabase.py +15 -15
- sempy_labs/_tags.py +11 -10
- sempy_labs/_translations.py +1 -1
- sempy_labs/_user_delegation_key.py +2 -2
- sempy_labs/_vertipaq.py +3 -3
- sempy_labs/_vpax.py +1 -1
- sempy_labs/_warehouses.py +15 -14
- sempy_labs/_workloads.py +1 -1
- sempy_labs/_workspace_identity.py +1 -1
- sempy_labs/_workspaces.py +14 -13
- sempy_labs/admin/__init__.py +18 -18
- sempy_labs/admin/_activities.py +46 -46
- sempy_labs/admin/_apps.py +28 -26
- sempy_labs/admin/_artifacts.py +15 -15
- sempy_labs/admin/_basic_functions.py +1 -2
- sempy_labs/admin/_capacities.py +84 -82
- sempy_labs/admin/_dataflows.py +2 -2
- sempy_labs/admin/_datasets.py +50 -48
- sempy_labs/admin/_domains.py +25 -19
- sempy_labs/admin/_external_data_share.py +24 -22
- sempy_labs/admin/_git.py +17 -17
- sempy_labs/admin/_items.py +47 -45
- sempy_labs/admin/_reports.py +61 -58
- sempy_labs/admin/_scanner.py +2 -2
- sempy_labs/admin/_shared.py +18 -18
- sempy_labs/admin/_tags.py +2 -2
- sempy_labs/admin/_tenant.py +57 -51
- sempy_labs/admin/_users.py +16 -15
- sempy_labs/admin/_workspaces.py +2 -2
- sempy_labs/directlake/__init__.py +12 -12
- sempy_labs/directlake/_directlake_schema_compare.py +3 -3
- sempy_labs/directlake/_directlake_schema_sync.py +9 -7
- sempy_labs/directlake/_dl_helper.py +5 -2
- sempy_labs/directlake/_generate_shared_expression.py +1 -1
- sempy_labs/directlake/_get_directlake_lakehouse.py +1 -1
- sempy_labs/directlake/_guardrails.py +1 -1
- sempy_labs/directlake/_list_directlake_model_calc_tables.py +3 -3
- sempy_labs/directlake/_show_unsupported_directlake_objects.py +1 -1
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +3 -3
- sempy_labs/directlake/_update_directlake_partition_entity.py +4 -4
- sempy_labs/directlake/_warm_cache.py +3 -3
- sempy_labs/graph/__init__.py +3 -3
- sempy_labs/graph/_groups.py +81 -78
- sempy_labs/graph/_teams.py +21 -21
- sempy_labs/graph/_users.py +109 -10
- sempy_labs/lakehouse/__init__.py +7 -7
- sempy_labs/lakehouse/_blobs.py +30 -30
- sempy_labs/lakehouse/_get_lakehouse_columns.py +2 -2
- sempy_labs/lakehouse/_get_lakehouse_tables.py +29 -27
- sempy_labs/lakehouse/_helper.py +38 -1
- sempy_labs/lakehouse/_lakehouse.py +16 -7
- sempy_labs/lakehouse/_livy_sessions.py +47 -42
- sempy_labs/lakehouse/_shortcuts.py +22 -21
- sempy_labs/migration/__init__.py +8 -8
- sempy_labs/migration/_create_pqt_file.py +2 -2
- sempy_labs/migration/_migrate_calctables_to_lakehouse.py +35 -44
- sempy_labs/migration/_migrate_calctables_to_semantic_model.py +9 -20
- sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +5 -9
- sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +11 -20
- sempy_labs/migration/_migration_validation.py +1 -2
- sempy_labs/migration/_refresh_calc_tables.py +2 -2
- sempy_labs/mirrored_azure_databricks_catalog/__init__.py +2 -2
- sempy_labs/mirrored_azure_databricks_catalog/_discover.py +40 -40
- sempy_labs/mirrored_azure_databricks_catalog/_refresh_catalog_metadata.py +1 -1
- sempy_labs/ml_model/__init__.py +23 -0
- sempy_labs/ml_model/_functions.py +427 -0
- sempy_labs/report/__init__.py +10 -10
- sempy_labs/report/_download_report.py +2 -2
- sempy_labs/report/_export_report.py +2 -2
- sempy_labs/report/_generate_report.py +1 -1
- sempy_labs/report/_paginated.py +1 -1
- sempy_labs/report/_report_bpa.py +4 -3
- sempy_labs/report/_report_functions.py +3 -3
- sempy_labs/report/_report_list_functions.py +3 -3
- sempy_labs/report/_report_rebind.py +1 -1
- sempy_labs/report/_reportwrapper.py +248 -250
- sempy_labs/report/_save_report.py +3 -3
- sempy_labs/theme/_org_themes.py +19 -6
- sempy_labs/tom/__init__.py +1 -1
- sempy_labs/tom/_model.py +13 -8
- sempy_labs/variable_library/__init__.py +19 -0
- sempy_labs/variable_library/_functions.py +403 -0
- semantic_link_labs-0.11.1.dist-info/RECORD +0 -210
- sempy_labs/_dax_query_view.py +0 -57
- sempy_labs/_ml_models.py +0 -110
- sempy_labs/_variable_libraries.py +0 -91
- {semantic_link_labs-0.11.1.dist-info → semantic_link_labs-0.11.3.dist-info}/WHEEL +0 -0
- {semantic_link_labs-0.11.1.dist-info → semantic_link_labs-0.11.3.dist-info}/licenses/LICENSE +0 -0
- {semantic_link_labs-0.11.1.dist-info → semantic_link_labs-0.11.3.dist-info}/top_level.txt +0 -0
|
@@ -2,7 +2,7 @@ import os
|
|
|
2
2
|
import pandas as pd
|
|
3
3
|
import pyarrow.parquet as pq
|
|
4
4
|
from datetime import datetime
|
|
5
|
-
from
|
|
5
|
+
from .._helper_functions import (
|
|
6
6
|
_get_column_aggregate,
|
|
7
7
|
resolve_workspace_name_and_id,
|
|
8
8
|
resolve_lakehouse_name_and_id,
|
|
@@ -15,11 +15,11 @@ from sempy_labs._helper_functions import (
|
|
|
15
15
|
create_abfss_path,
|
|
16
16
|
_pure_python_notebook,
|
|
17
17
|
)
|
|
18
|
-
from
|
|
18
|
+
from ..directlake._guardrails import (
|
|
19
19
|
get_sku_size,
|
|
20
20
|
get_directlake_guardrails_for_sku,
|
|
21
21
|
)
|
|
22
|
-
from
|
|
22
|
+
from ._lakehouse import lakehouse_attached
|
|
23
23
|
from typing import Optional
|
|
24
24
|
import sempy_labs._icons as icons
|
|
25
25
|
from sempy._utils._log import log
|
|
@@ -97,7 +97,7 @@ def get_lakehouse_tables(
|
|
|
97
97
|
except Exception as e:
|
|
98
98
|
API_called = False
|
|
99
99
|
|
|
100
|
-
|
|
100
|
+
rows = []
|
|
101
101
|
local_path = None
|
|
102
102
|
if API_called:
|
|
103
103
|
if not responses[0].get("data"):
|
|
@@ -105,16 +105,17 @@ def get_lakehouse_tables(
|
|
|
105
105
|
|
|
106
106
|
for r in responses:
|
|
107
107
|
for i in r.get("data", []):
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
108
|
+
rows.append(
|
|
109
|
+
{
|
|
110
|
+
"Workspace Name": workspace_name,
|
|
111
|
+
"Lakehouse Name": lakehouse_name,
|
|
112
|
+
"Schema Name": "",
|
|
113
|
+
"Table Name": i.get("name"),
|
|
114
|
+
"Format": i.get("format"),
|
|
115
|
+
"Type": i.get("type"),
|
|
116
|
+
"Location": i.get("location"),
|
|
117
|
+
}
|
|
118
|
+
)
|
|
118
119
|
else:
|
|
119
120
|
local_path = _mount(lakehouse=lakehouse_id, workspace=workspace_id)
|
|
120
121
|
tables_path = os.path.join(local_path, "Tables")
|
|
@@ -127,19 +128,20 @@ def get_lakehouse_tables(
|
|
|
127
128
|
location_path = create_abfss_path(
|
|
128
129
|
lakehouse_id, workspace_id, table_name, schema_name
|
|
129
130
|
)
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
131
|
+
rows.append(
|
|
132
|
+
{
|
|
133
|
+
"Workspace Name": workspace_name,
|
|
134
|
+
"Lakehouse Name": lakehouse_name,
|
|
135
|
+
"Schema Name": schema_name,
|
|
136
|
+
"Table Name": table_name,
|
|
137
|
+
"Format": "delta",
|
|
138
|
+
"Type": "Managed",
|
|
139
|
+
"Location": location_path,
|
|
140
|
+
}
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
if rows:
|
|
144
|
+
df = pd.DataFrame(rows, columns=list(columns.keys()))
|
|
143
145
|
|
|
144
146
|
if extended:
|
|
145
147
|
sku_value = get_sku_size(workspace_id)
|
sempy_labs/lakehouse/_helper.py
CHANGED
|
@@ -11,6 +11,7 @@ from sempy_labs._helper_functions import (
|
|
|
11
11
|
from sempy._utils._log import log
|
|
12
12
|
import sempy_labs._icons as icons
|
|
13
13
|
import os
|
|
14
|
+
import json
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
@log
|
|
@@ -51,7 +52,43 @@ def is_v_ordered(
|
|
|
51
52
|
)
|
|
52
53
|
ds_schema = ds.dataset(table_path).schema.metadata
|
|
53
54
|
|
|
54
|
-
|
|
55
|
+
if ds_schema:
|
|
56
|
+
return any(b"vorder" in key for key in ds_schema.keys())
|
|
57
|
+
|
|
58
|
+
delta_log_path = os.path.join(table_path, "_delta_log")
|
|
59
|
+
|
|
60
|
+
def read_vorder_tag(delta_log_path):
|
|
61
|
+
json_files = sorted(
|
|
62
|
+
[f for f in os.listdir(delta_log_path) if f.endswith(".json")], reverse=True
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
if not json_files:
|
|
66
|
+
return False
|
|
67
|
+
|
|
68
|
+
latest_file = os.path.join(delta_log_path, json_files[0])
|
|
69
|
+
|
|
70
|
+
with open(latest_file, "r") as f:
|
|
71
|
+
all_data = [
|
|
72
|
+
json.loads(line) for line in f if line.strip()
|
|
73
|
+
] # one dict per line
|
|
74
|
+
for data in all_data:
|
|
75
|
+
if "metaData" in data:
|
|
76
|
+
return (
|
|
77
|
+
data.get("metaData", {})
|
|
78
|
+
.get("configuration", {})
|
|
79
|
+
.get("delta.parquet.vorder.enabled", "false")
|
|
80
|
+
== "true"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# If no metaData, fall back to commitInfo
|
|
84
|
+
for data in all_data:
|
|
85
|
+
if "commitInfo" in data:
|
|
86
|
+
tags = data["commitInfo"].get("tags", {})
|
|
87
|
+
return tags.get("VORDER", "false").lower() == "true"
|
|
88
|
+
|
|
89
|
+
return False # Default if not found
|
|
90
|
+
|
|
91
|
+
return read_vorder_tag(delta_log_path)
|
|
55
92
|
|
|
56
93
|
|
|
57
94
|
@log
|
|
@@ -100,11 +100,15 @@ def optimize_lakehouse_tables(
|
|
|
100
100
|
tables = [tables]
|
|
101
101
|
|
|
102
102
|
df_tables = df_delta[df_delta["Table Name"].isin(tables)] if tables else df_delta
|
|
103
|
+
df_tables.reset_index(drop=True, inplace=True)
|
|
103
104
|
|
|
104
|
-
|
|
105
|
+
total = len(df_tables)
|
|
106
|
+
for idx, r in (bar := tqdm(df_tables.iterrows(), total=total, bar_format="{desc}")):
|
|
105
107
|
table_name = r["Table Name"]
|
|
106
108
|
path = r["Location"]
|
|
107
|
-
bar.set_description(
|
|
109
|
+
bar.set_description(
|
|
110
|
+
f"Optimizing the '{table_name}' table ({idx + 1}/{total})..."
|
|
111
|
+
)
|
|
108
112
|
_optimize_table(path=path)
|
|
109
113
|
|
|
110
114
|
|
|
@@ -145,11 +149,13 @@ def vacuum_lakehouse_tables(
|
|
|
145
149
|
tables = [tables]
|
|
146
150
|
|
|
147
151
|
df_tables = df_delta[df_delta["Table Name"].isin(tables)] if tables else df_delta
|
|
152
|
+
df_tables.reset_index(drop=True, inplace=True)
|
|
148
153
|
|
|
149
|
-
|
|
154
|
+
total = len(df_tables)
|
|
155
|
+
for idx, r in (bar := tqdm(df_tables.iterrows(), total=total, bar_format="{desc}")):
|
|
150
156
|
table_name = r["Table Name"]
|
|
151
157
|
path = r["Location"]
|
|
152
|
-
bar.set_description(f"Vacuuming the '{table_name}' table...")
|
|
158
|
+
bar.set_description(f"Vacuuming the '{table_name}' table ({idx}/{total})...")
|
|
153
159
|
_vacuum_table(path=path, retain_n_hours=retain_n_hours)
|
|
154
160
|
|
|
155
161
|
|
|
@@ -231,7 +237,7 @@ def run_table_maintenance(
|
|
|
231
237
|
if optimize:
|
|
232
238
|
payload["executionData"]["optimizeSettings"] = {}
|
|
233
239
|
if v_order:
|
|
234
|
-
payload["executionData"]["optimizeSettings"] = {"
|
|
240
|
+
payload["executionData"]["optimizeSettings"] = {"vOrder": True}
|
|
235
241
|
if vacuum:
|
|
236
242
|
payload["executionData"]["vacuumSettings"] = {}
|
|
237
243
|
if vacuum and retention_period is not None:
|
|
@@ -242,16 +248,19 @@ def run_table_maintenance(
|
|
|
242
248
|
method="post",
|
|
243
249
|
payload=payload,
|
|
244
250
|
status_codes=202,
|
|
251
|
+
client="fabric_sp",
|
|
245
252
|
)
|
|
246
253
|
|
|
247
|
-
|
|
254
|
+
print(
|
|
255
|
+
f"{icons.in_progress} The table maintenance job for the '{table_name}' table in the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has been initiated."
|
|
256
|
+
)
|
|
248
257
|
|
|
249
258
|
status_url = response.headers.get("Location").split("fabric.microsoft.com")[1]
|
|
250
259
|
status = None
|
|
251
260
|
while status not in ["Completed", "Failed"]:
|
|
252
261
|
response = _base_api(request=status_url)
|
|
253
262
|
status = response.json().get("status")
|
|
254
|
-
time.sleep(
|
|
263
|
+
time.sleep(3)
|
|
255
264
|
|
|
256
265
|
df = _get_item_job_instance(url=status_url)
|
|
257
266
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from
|
|
1
|
+
from .._helper_functions import (
|
|
2
2
|
resolve_workspace_id,
|
|
3
3
|
resolve_lakehouse_id,
|
|
4
4
|
_base_api,
|
|
@@ -86,53 +86,58 @@ def list_livy_sessions(
|
|
|
86
86
|
client="fabric_sp",
|
|
87
87
|
)
|
|
88
88
|
|
|
89
|
-
|
|
89
|
+
rows = []
|
|
90
90
|
for r in responses:
|
|
91
91
|
for v in r.get("value", []):
|
|
92
92
|
queued_duration = v.get("queuedDuration", {})
|
|
93
93
|
running_duration = v.get("runningDuration", {})
|
|
94
94
|
total_duration = v.get("totalDuration", {})
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
95
|
+
rows.append(
|
|
96
|
+
{
|
|
97
|
+
"Spark Application Id": v.get("sparkApplicationId"),
|
|
98
|
+
"State:": v.get("state"),
|
|
99
|
+
"Livy Id": v.get("livyId"),
|
|
100
|
+
"Origin": v.get("origin"),
|
|
101
|
+
"Attempt Number": v.get("attemptNumber"),
|
|
102
|
+
"Max Number Of Attempts": v.get("maxNumberOfAttempts"),
|
|
103
|
+
"Livy Name": v.get("livyName"),
|
|
104
|
+
"Submitter Id": v["submitter"].get("id"),
|
|
105
|
+
"Submitter Type": v["submitter"].get("type"),
|
|
106
|
+
"Item Workspace Id": v["item"].get("workspaceId"),
|
|
107
|
+
"Item Id": v["item"].get("itemId"),
|
|
108
|
+
"Item Reference Type": v["item"].get("referenceType"),
|
|
109
|
+
"Item Name": v.get("itemName"),
|
|
110
|
+
"Item Type": v.get("itemType"),
|
|
111
|
+
"Job Type": v.get("jobType"),
|
|
112
|
+
"Submitted Date Time": v.get("submittedDateTime"),
|
|
113
|
+
"Start Date Time": v.get("startDateTime"),
|
|
114
|
+
"End Date Time": v.get("endDateTime"),
|
|
115
|
+
"Queued Duration Value": queued_duration.get("value"),
|
|
116
|
+
"Queued Duration Time Unit": queued_duration.get("timeUnit"),
|
|
117
|
+
"Running Duration Value": running_duration.get("value"),
|
|
118
|
+
"Running Duration Time Unit": running_duration.get("timeUnit"),
|
|
119
|
+
"Total Duration Value": total_duration.get("value"),
|
|
120
|
+
"Total Duration Time Unit": total_duration.get("timeUnit"),
|
|
121
|
+
"Job Instance Id": v.get("jobInstanceId"),
|
|
122
|
+
"Creator Item Workspace Id": v["creatorItem"].get("workspaceId"),
|
|
123
|
+
"Creator Item Id": v["creatorItem"].get("itemId"),
|
|
124
|
+
"Creator Item Reference Type": v["creatorItem"].get(
|
|
125
|
+
"referenceType"
|
|
126
|
+
),
|
|
127
|
+
"Creator Item Name": v.get("creatorItemName"),
|
|
128
|
+
"Creator Item Type": v.get("creatorItemType"),
|
|
129
|
+
"Cancellation Reason": v.get("cancellationReason"),
|
|
130
|
+
"Capacity Id": v.get("capacityId"),
|
|
131
|
+
"Operation Name": v.get("operationName"),
|
|
132
|
+
"Runtime Version": v.get("runtimeVersion"),
|
|
133
|
+
"Livy Session Item Resource Uri": v.get(
|
|
134
|
+
"livySessionItemResourceUri"
|
|
135
|
+
),
|
|
136
|
+
}
|
|
137
|
+
)
|
|
133
138
|
|
|
134
|
-
if
|
|
135
|
-
df = pd.
|
|
139
|
+
if rows:
|
|
140
|
+
df = pd.DataFrame(rows, columns=list(columns.keys()))
|
|
136
141
|
_update_dataframe_datatypes(dataframe=df, column_map=columns)
|
|
137
142
|
|
|
138
143
|
return df
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import sempy.fabric as fabric
|
|
2
2
|
import pandas as pd
|
|
3
|
-
from
|
|
3
|
+
from .._helper_functions import (
|
|
4
4
|
resolve_lakehouse_name_and_id,
|
|
5
5
|
resolve_workspace_name_and_id,
|
|
6
6
|
_base_api,
|
|
@@ -371,7 +371,7 @@ def list_shortcuts(
|
|
|
371
371
|
"S3Compatible": "s3Compatible",
|
|
372
372
|
}
|
|
373
373
|
|
|
374
|
-
|
|
374
|
+
rows = []
|
|
375
375
|
for r in responses:
|
|
376
376
|
for i in r.get("value", []):
|
|
377
377
|
tgt = i.get("target", {})
|
|
@@ -402,25 +402,26 @@ def list_shortcuts(
|
|
|
402
402
|
source_item_type = dfI_filt["Type"].iloc[0]
|
|
403
403
|
source_item_name = dfI_filt["Display Name"].iloc[0]
|
|
404
404
|
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
405
|
+
rows.append(
|
|
406
|
+
{
|
|
407
|
+
"Shortcut Name": i.get("name"),
|
|
408
|
+
"Shortcut Path": i.get("path"),
|
|
409
|
+
"Source Type": tgt_type,
|
|
410
|
+
"Source Workspace Id": source_workspace_id,
|
|
411
|
+
"Source Workspace Name": source_workspace_name,
|
|
412
|
+
"Source Item Id": source_item_id,
|
|
413
|
+
"Source Item Name": source_item_name,
|
|
414
|
+
"Source Item Type": source_item_type,
|
|
415
|
+
"OneLake Path": tgt.get(sources.get("oneLake"), {}).get("path"),
|
|
416
|
+
"Connection Id": connection_id,
|
|
417
|
+
"Location": location,
|
|
418
|
+
"Bucket": bucket,
|
|
419
|
+
"SubPath": sub_path,
|
|
420
|
+
"Source Properties Raw": str(tgt),
|
|
421
|
+
}
|
|
422
|
+
)
|
|
422
423
|
|
|
423
|
-
if
|
|
424
|
-
df = pd.
|
|
424
|
+
if rows:
|
|
425
|
+
df = pd.DataFrame(rows, columns=list(columns.keys()))
|
|
425
426
|
|
|
426
427
|
return df
|
sempy_labs/migration/__init__.py
CHANGED
|
@@ -1,24 +1,24 @@
|
|
|
1
|
-
from
|
|
2
|
-
from
|
|
1
|
+
from ._create_pqt_file import create_pqt_file
|
|
2
|
+
from ._migrate_calctables_to_lakehouse import (
|
|
3
3
|
migrate_calc_tables_to_lakehouse,
|
|
4
4
|
migrate_field_parameters,
|
|
5
5
|
)
|
|
6
|
-
from
|
|
6
|
+
from ._migrate_calctables_to_semantic_model import (
|
|
7
7
|
migrate_calc_tables_to_semantic_model,
|
|
8
8
|
)
|
|
9
|
-
from
|
|
9
|
+
from ._migrate_model_objects_to_semantic_model import (
|
|
10
10
|
migrate_model_objects_to_semantic_model,
|
|
11
11
|
)
|
|
12
|
-
from
|
|
12
|
+
from ._migrate_tables_columns_to_semantic_model import (
|
|
13
13
|
migrate_tables_columns_to_semantic_model,
|
|
14
14
|
)
|
|
15
|
-
from
|
|
15
|
+
from ._migration_validation import (
|
|
16
16
|
migration_validation,
|
|
17
17
|
)
|
|
18
|
-
from
|
|
18
|
+
from ._refresh_calc_tables import (
|
|
19
19
|
refresh_calc_tables,
|
|
20
20
|
)
|
|
21
|
-
from
|
|
21
|
+
from ._direct_lake_to_import import (
|
|
22
22
|
migrate_direct_lake_to_import,
|
|
23
23
|
)
|
|
24
24
|
|
|
@@ -2,12 +2,12 @@ import sempy
|
|
|
2
2
|
import json
|
|
3
3
|
import os
|
|
4
4
|
import shutil
|
|
5
|
-
from
|
|
5
|
+
from ..lakehouse._lakehouse import lakehouse_attached
|
|
6
6
|
from sempy._utils._log import log
|
|
7
7
|
from typing import Optional
|
|
8
8
|
import sempy_labs._icons as icons
|
|
9
9
|
from uuid import UUID
|
|
10
|
-
from
|
|
10
|
+
from .._helper_functions import (
|
|
11
11
|
resolve_dataset_name_and_id,
|
|
12
12
|
resolve_workspace_name_and_id,
|
|
13
13
|
)
|
|
@@ -4,26 +4,27 @@ import pandas as pd
|
|
|
4
4
|
import re
|
|
5
5
|
from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
|
|
6
6
|
from sempy_labs._helper_functions import (
|
|
7
|
-
resolve_lakehouse_name,
|
|
8
|
-
resolve_lakehouse_id,
|
|
9
7
|
retry,
|
|
10
8
|
generate_guid,
|
|
11
9
|
save_as_delta_table,
|
|
10
|
+
resolve_lakehouse_name_and_id,
|
|
11
|
+
resolve_workspace_name_and_id,
|
|
12
12
|
)
|
|
13
13
|
from sempy_labs.tom import connect_semantic_model
|
|
14
14
|
from typing import Optional
|
|
15
15
|
from sempy._utils._log import log
|
|
16
16
|
import sempy_labs._icons as icons
|
|
17
|
+
from uuid import UUID
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
@log
|
|
20
21
|
def migrate_calc_tables_to_lakehouse(
|
|
21
22
|
dataset: str,
|
|
22
23
|
new_dataset: str,
|
|
23
|
-
workspace: Optional[str] = None,
|
|
24
|
-
new_dataset_workspace: Optional[str] = None,
|
|
25
|
-
lakehouse: Optional[str] = None,
|
|
26
|
-
lakehouse_workspace: Optional[str] = None,
|
|
24
|
+
workspace: Optional[str | UUID] = None,
|
|
25
|
+
new_dataset_workspace: Optional[str | UUID] = None,
|
|
26
|
+
lakehouse: Optional[str | UUID] = None,
|
|
27
|
+
lakehouse_workspace: Optional[str | UUID] = None,
|
|
27
28
|
):
|
|
28
29
|
"""
|
|
29
30
|
Creates delta tables in your lakehouse based on the DAX expression of a calculated table in an import/DirectQuery semantic model.
|
|
@@ -35,18 +36,18 @@ def migrate_calc_tables_to_lakehouse(
|
|
|
35
36
|
Name of the import/DirectQuery semantic model.
|
|
36
37
|
new_dataset : str
|
|
37
38
|
Name of the Direct Lake semantic model.
|
|
38
|
-
workspace : str, default=None
|
|
39
|
+
workspace : str | uuid.UUID, default=None
|
|
39
40
|
The Fabric workspace name in which the import/DirectQuery semantic model exists.
|
|
40
41
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
41
42
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
42
|
-
new_dataset_workspace : str
|
|
43
|
+
new_dataset_workspace : str | uuid.UUID
|
|
43
44
|
The Fabric workspace name in which the Direct Lake semantic model will be created.
|
|
44
45
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
45
46
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
46
|
-
lakehouse : str, default=None
|
|
47
|
+
lakehouse : str | uuid.UUID, default=None
|
|
47
48
|
The Fabric lakehouse used by the Direct Lake semantic model.
|
|
48
49
|
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
49
|
-
lakehouse_workspace : str, default=None
|
|
50
|
+
lakehouse_workspace : str | uuid.UUID, default=None
|
|
50
51
|
The Fabric workspace used by the lakehouse.
|
|
51
52
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
52
53
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
@@ -57,22 +58,16 @@ def migrate_calc_tables_to_lakehouse(
|
|
|
57
58
|
f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values."
|
|
58
59
|
)
|
|
59
60
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
if lakehouse is None:
|
|
72
|
-
lakehouse_id = fabric.get_lakehouse_id()
|
|
73
|
-
lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace)
|
|
74
|
-
else:
|
|
75
|
-
lakehouse_id = resolve_lakehouse_id(lakehouse, lakehouse_workspace)
|
|
61
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
62
|
+
(new_dataset_workspace_name, new_dataset_workspace_id) = (
|
|
63
|
+
resolve_workspace_name_and_id(new_dataset_workspace)
|
|
64
|
+
)
|
|
65
|
+
(lakehouse_workspace_id, lakehouse_workspace_name) = resolve_workspace_name_and_id(
|
|
66
|
+
lakehouse_workspace
|
|
67
|
+
)
|
|
68
|
+
(lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
|
|
69
|
+
lakehouse, lakehouse_workspace
|
|
70
|
+
)
|
|
76
71
|
|
|
77
72
|
dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
|
|
78
73
|
dfP_filt = dfP[(dfP["Source Type"] == "Calculated")]
|
|
@@ -90,7 +85,7 @@ def migrate_calc_tables_to_lakehouse(
|
|
|
90
85
|
|
|
91
86
|
if dtName in lakeTables["Table Name"].values:
|
|
92
87
|
print(
|
|
93
|
-
f"{icons.red_dot} The '{tName}' table already exists as '{dtName}' in the '{
|
|
88
|
+
f"{icons.red_dot} The '{tName}' table already exists as '{dtName}' in the '{lakehouse_name}' lakehouse in the '{lakehouse_workspace_name}' workspace."
|
|
94
89
|
)
|
|
95
90
|
killFunction = True
|
|
96
91
|
|
|
@@ -99,7 +94,7 @@ def migrate_calc_tables_to_lakehouse(
|
|
|
99
94
|
|
|
100
95
|
if len(dfP_filt) == 0:
|
|
101
96
|
print(
|
|
102
|
-
f"{icons.yellow_dot} The '{dataset}' semantic model in the '{
|
|
97
|
+
f"{icons.yellow_dot} The '{dataset}' semantic model in the '{workspace_name}' workspace has no calculated tables."
|
|
103
98
|
)
|
|
104
99
|
return
|
|
105
100
|
|
|
@@ -175,7 +170,6 @@ def migrate_calc_tables_to_lakehouse(
|
|
|
175
170
|
if str(c.Type) == "Calculated"
|
|
176
171
|
and c.Name == new_column_name
|
|
177
172
|
)
|
|
178
|
-
|
|
179
173
|
if dataType == "Int64":
|
|
180
174
|
df[new_column_name] = df[
|
|
181
175
|
new_column_name
|
|
@@ -197,7 +191,7 @@ def migrate_calc_tables_to_lakehouse(
|
|
|
197
191
|
|
|
198
192
|
save_as_delta_table(
|
|
199
193
|
dataframe=df,
|
|
200
|
-
|
|
194
|
+
delta_table_name=delta_table_name,
|
|
201
195
|
lakehouse=lakehouse,
|
|
202
196
|
workspace=lakehouse_workspace,
|
|
203
197
|
write_mode="overwrite",
|
|
@@ -231,20 +225,21 @@ def migrate_calc_tables_to_lakehouse(
|
|
|
231
225
|
|
|
232
226
|
print(
|
|
233
227
|
f"{icons.green_dot} Calculated table '{t.Name}' has been created as delta table '{delta_table_name.lower()}' "
|
|
234
|
-
f"in the '{
|
|
228
|
+
f"in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace_name}' workspace."
|
|
235
229
|
)
|
|
236
|
-
except Exception:
|
|
230
|
+
except Exception as e:
|
|
237
231
|
print(
|
|
238
232
|
f"{icons.red_dot} Failed to create calculated table '{t.Name}' as a delta table in the lakehouse."
|
|
239
233
|
)
|
|
234
|
+
print(e)
|
|
240
235
|
|
|
241
236
|
|
|
242
237
|
@log
|
|
243
238
|
def migrate_field_parameters(
|
|
244
239
|
dataset: str,
|
|
245
240
|
new_dataset: str,
|
|
246
|
-
workspace: Optional[str] = None,
|
|
247
|
-
new_dataset_workspace: Optional[str] = None,
|
|
241
|
+
workspace: Optional[str | UUID] = None,
|
|
242
|
+
new_dataset_workspace: Optional[str | UUID] = None,
|
|
248
243
|
):
|
|
249
244
|
"""
|
|
250
245
|
Migrates field parameters from one semantic model to another.
|
|
@@ -255,11 +250,11 @@ def migrate_field_parameters(
|
|
|
255
250
|
Name of the import/DirectQuery semantic model.
|
|
256
251
|
new_dataset : str
|
|
257
252
|
Name of the Direct Lake semantic model.
|
|
258
|
-
workspace : str, default=None
|
|
253
|
+
workspace : str | uuid.UUID, default=None
|
|
259
254
|
The Fabric workspace name in which the import/DirectQuery semantic model exists.
|
|
260
255
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
261
256
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
262
|
-
new_dataset_workspace : str
|
|
257
|
+
new_dataset_workspace : str | uuid.UUID, default=None
|
|
263
258
|
The Fabric workspace name in which the Direct Lake semantic model will be created.
|
|
264
259
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
265
260
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
@@ -270,15 +265,11 @@ def migrate_field_parameters(
|
|
|
270
265
|
sempy.fabric._client._utils._init_analysis_services()
|
|
271
266
|
import Microsoft.AnalysisServices.Tabular as TOM
|
|
272
267
|
|
|
273
|
-
if workspace is None:
|
|
274
|
-
workspace_id = fabric.get_workspace_id()
|
|
275
|
-
workspace = fabric.resolve_workspace_name(workspace_id)
|
|
276
|
-
|
|
277
|
-
if new_dataset_workspace is None:
|
|
278
|
-
new_dataset_workspace = workspace
|
|
279
|
-
|
|
280
268
|
icons.sll_tags.append("DirectLakeMigration")
|
|
281
269
|
fabric.refresh_tom_cache(workspace=workspace)
|
|
270
|
+
(new_dataset_workspace_name, new_dataset_workspace_id) = (
|
|
271
|
+
resolve_workspace_name_and_id(new_dataset_workspace)
|
|
272
|
+
)
|
|
282
273
|
|
|
283
274
|
dfC = fabric.list_columns(dataset=dataset, workspace=workspace)
|
|
284
275
|
dfC["Column Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"])
|
|
@@ -399,7 +390,7 @@ def migrate_field_parameters(
|
|
|
399
390
|
tom.model.Tables[tName].Columns["Value3"].Name = col3
|
|
400
391
|
|
|
401
392
|
print(
|
|
402
|
-
f"{icons.green_dot} The '{tName}' table has been added as a field parameter to the '{new_dataset}' semantic model in the '{
|
|
393
|
+
f"{icons.green_dot} The '{tName}' table has been added as a field parameter to the '{new_dataset}' semantic model in the '{new_dataset_workspace_name}' workspace."
|
|
403
394
|
)
|
|
404
395
|
except Exception:
|
|
405
396
|
print(
|