semantic-link-labs 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- semantic_link_labs-0.4.1.dist-info/LICENSE +21 -0
- semantic_link_labs-0.4.1.dist-info/METADATA +22 -0
- semantic_link_labs-0.4.1.dist-info/RECORD +52 -0
- semantic_link_labs-0.4.1.dist-info/WHEEL +5 -0
- semantic_link_labs-0.4.1.dist-info/top_level.txt +1 -0
- sempy_labs/__init__.py +154 -0
- sempy_labs/_ai.py +496 -0
- sempy_labs/_clear_cache.py +39 -0
- sempy_labs/_connections.py +234 -0
- sempy_labs/_dax.py +70 -0
- sempy_labs/_generate_semantic_model.py +280 -0
- sempy_labs/_helper_functions.py +506 -0
- sempy_labs/_icons.py +4 -0
- sempy_labs/_list_functions.py +1372 -0
- sempy_labs/_model_auto_build.py +143 -0
- sempy_labs/_model_bpa.py +1354 -0
- sempy_labs/_model_dependencies.py +341 -0
- sempy_labs/_one_lake_integration.py +155 -0
- sempy_labs/_query_scale_out.py +447 -0
- sempy_labs/_refresh_semantic_model.py +184 -0
- sempy_labs/_tom.py +3766 -0
- sempy_labs/_translations.py +378 -0
- sempy_labs/_vertipaq.py +893 -0
- sempy_labs/directlake/__init__.py +45 -0
- sempy_labs/directlake/_directlake_schema_compare.py +110 -0
- sempy_labs/directlake/_directlake_schema_sync.py +128 -0
- sempy_labs/directlake/_fallback.py +62 -0
- sempy_labs/directlake/_get_directlake_lakehouse.py +69 -0
- sempy_labs/directlake/_get_shared_expression.py +59 -0
- sempy_labs/directlake/_guardrails.py +84 -0
- sempy_labs/directlake/_list_directlake_model_calc_tables.py +54 -0
- sempy_labs/directlake/_show_unsupported_directlake_objects.py +89 -0
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +81 -0
- sempy_labs/directlake/_update_directlake_partition_entity.py +64 -0
- sempy_labs/directlake/_warm_cache.py +210 -0
- sempy_labs/lakehouse/__init__.py +24 -0
- sempy_labs/lakehouse/_get_lakehouse_columns.py +81 -0
- sempy_labs/lakehouse/_get_lakehouse_tables.py +250 -0
- sempy_labs/lakehouse/_lakehouse.py +85 -0
- sempy_labs/lakehouse/_shortcuts.py +296 -0
- sempy_labs/migration/__init__.py +29 -0
- sempy_labs/migration/_create_pqt_file.py +239 -0
- sempy_labs/migration/_migrate_calctables_to_lakehouse.py +429 -0
- sempy_labs/migration/_migrate_calctables_to_semantic_model.py +150 -0
- sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +524 -0
- sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +165 -0
- sempy_labs/migration/_migration_validation.py +227 -0
- sempy_labs/migration/_refresh_calc_tables.py +129 -0
- sempy_labs/report/__init__.py +35 -0
- sempy_labs/report/_generate_report.py +253 -0
- sempy_labs/report/_report_functions.py +855 -0
- sempy_labs/report/_report_rebind.py +131 -0
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
import sempy.fabric as fabric
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from pyspark.sql import SparkSession
|
|
4
|
+
import pyarrow.parquet as pq
|
|
5
|
+
import datetime
|
|
6
|
+
from sempy_labs._helper_functions import (
|
|
7
|
+
resolve_lakehouse_id,
|
|
8
|
+
resolve_lakehouse_name,
|
|
9
|
+
resolve_workspace_name_and_id,
|
|
10
|
+
)
|
|
11
|
+
from sempy_labs.directlake._guardrails import (
|
|
12
|
+
get_sku_size,
|
|
13
|
+
get_directlake_guardrails_for_sku,
|
|
14
|
+
)
|
|
15
|
+
from sempy_labs.lakehouse._lakehouse import lakehouse_attached
|
|
16
|
+
from typing import Optional
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_lakehouse_tables(
|
|
20
|
+
lakehouse: Optional[str] = None,
|
|
21
|
+
workspace: Optional[str] = None,
|
|
22
|
+
extended: Optional[bool] = False,
|
|
23
|
+
count_rows: Optional[bool] = False,
|
|
24
|
+
export: Optional[bool] = False,
|
|
25
|
+
):
|
|
26
|
+
"""
|
|
27
|
+
Shows the tables of a lakehouse and their respective properties. Option to include additional properties relevant to Direct Lake guardrails.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
lakehouse : str, default=None
|
|
32
|
+
The Fabric lakehouse.
|
|
33
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
34
|
+
lakehouse_workspace : str, default=None
|
|
35
|
+
The Fabric workspace used by the lakehouse.
|
|
36
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
37
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
38
|
+
extended : bool, default=False
|
|
39
|
+
Obtains additional columns relevant to the size of each table.
|
|
40
|
+
count_rows : bool, default=False
|
|
41
|
+
Obtains a row count for each lakehouse table.
|
|
42
|
+
export : bool, default=False
|
|
43
|
+
Exports the resulting dataframe to a delta table in the lakehouse.
|
|
44
|
+
|
|
45
|
+
Returns
|
|
46
|
+
-------
|
|
47
|
+
pandas.DataFrame
|
|
48
|
+
Shows the tables/columns within a lakehouse and their properties.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
df = pd.DataFrame(
|
|
52
|
+
columns=[
|
|
53
|
+
"Workspace Name",
|
|
54
|
+
"Lakehouse Name",
|
|
55
|
+
"Table Name",
|
|
56
|
+
"Format",
|
|
57
|
+
"Type",
|
|
58
|
+
"Location",
|
|
59
|
+
]
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
(workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
63
|
+
|
|
64
|
+
if lakehouse == None:
|
|
65
|
+
lakehouse_id = fabric.get_lakehouse_id()
|
|
66
|
+
lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
|
|
67
|
+
else:
|
|
68
|
+
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
|
|
69
|
+
|
|
70
|
+
if count_rows: # Setting countrows defaults to extended=True
|
|
71
|
+
extended = True
|
|
72
|
+
|
|
73
|
+
client = fabric.FabricRestClient()
|
|
74
|
+
response = client.get(
|
|
75
|
+
f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
for i in response.json()["data"]:
|
|
79
|
+
tName = i["name"]
|
|
80
|
+
tType = i["type"]
|
|
81
|
+
tFormat = i["format"]
|
|
82
|
+
tLocation = i["location"]
|
|
83
|
+
if extended == False:
|
|
84
|
+
new_data = {
|
|
85
|
+
"Workspace Name": workspace,
|
|
86
|
+
"Lakehouse Name": lakehouse,
|
|
87
|
+
"Table Name": tName,
|
|
88
|
+
"Format": tFormat,
|
|
89
|
+
"Type": tType,
|
|
90
|
+
"Location": tLocation,
|
|
91
|
+
}
|
|
92
|
+
df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
|
|
93
|
+
else:
|
|
94
|
+
sku_value = get_sku_size(workspace)
|
|
95
|
+
guardrail = get_directlake_guardrails_for_sku(sku_value)
|
|
96
|
+
|
|
97
|
+
spark = SparkSession.builder.getOrCreate()
|
|
98
|
+
|
|
99
|
+
intColumns = ["Files", "Row Groups", "Table Size"]
|
|
100
|
+
if tType == "Managed" and tFormat == "delta":
|
|
101
|
+
detail_df = spark.sql(f"DESCRIBE DETAIL `{tName}`").collect()[0]
|
|
102
|
+
num_files = detail_df.numFiles
|
|
103
|
+
size_in_bytes = detail_df.sizeInBytes
|
|
104
|
+
|
|
105
|
+
delta_table_path = f"Tables/{tName}"
|
|
106
|
+
latest_files = (
|
|
107
|
+
spark.read.format("delta").load(delta_table_path).inputFiles()
|
|
108
|
+
)
|
|
109
|
+
file_paths = [f.split("/")[-1] for f in latest_files]
|
|
110
|
+
|
|
111
|
+
# Handle FileNotFoundError
|
|
112
|
+
num_rowgroups = 0
|
|
113
|
+
for filename in file_paths:
|
|
114
|
+
try:
|
|
115
|
+
num_rowgroups += pq.ParquetFile(
|
|
116
|
+
f"/lakehouse/default/{delta_table_path}/{filename}"
|
|
117
|
+
).num_row_groups
|
|
118
|
+
except FileNotFoundError:
|
|
119
|
+
continue
|
|
120
|
+
|
|
121
|
+
if count_rows:
|
|
122
|
+
num_rows = spark.table(tName).count()
|
|
123
|
+
intColumns.append("Row Count")
|
|
124
|
+
new_data = {
|
|
125
|
+
"Workspace Name": workspace,
|
|
126
|
+
"Lakehouse Name": lakehouse,
|
|
127
|
+
"Table Name": tName,
|
|
128
|
+
"Format": tFormat,
|
|
129
|
+
"Type": tType,
|
|
130
|
+
"Location": tLocation,
|
|
131
|
+
"Files": num_files,
|
|
132
|
+
"Row Groups": num_rowgroups,
|
|
133
|
+
"Row Count": num_rows,
|
|
134
|
+
"Table Size": size_in_bytes,
|
|
135
|
+
}
|
|
136
|
+
else:
|
|
137
|
+
new_data = {
|
|
138
|
+
"Workspace Name": workspace,
|
|
139
|
+
"Lakehouse Name": lakehouse,
|
|
140
|
+
"Table Name": tName,
|
|
141
|
+
"Format": tFormat,
|
|
142
|
+
"Type": tType,
|
|
143
|
+
"Location": tLocation,
|
|
144
|
+
"Files": num_files,
|
|
145
|
+
"Row Groups": num_rowgroups,
|
|
146
|
+
"Table Size": size_in_bytes,
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
df = pd.concat(
|
|
150
|
+
[df, pd.DataFrame(new_data, index=[0])], ignore_index=True
|
|
151
|
+
)
|
|
152
|
+
df[intColumns] = df[intColumns].astype(int)
|
|
153
|
+
|
|
154
|
+
df["SKU"] = guardrail["Fabric SKUs"].iloc[0]
|
|
155
|
+
df["Parquet File Guardrail"] = guardrail["Parquet files per table"].iloc[0]
|
|
156
|
+
df["Row Group Guardrail"] = guardrail["Row groups per table"].iloc[0]
|
|
157
|
+
df["Row Count Guardrail"] = (
|
|
158
|
+
guardrail["Rows per table (millions)"].iloc[0] * 1000000
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
df["Parquet File Guardrail Hit"] = (
|
|
162
|
+
df["Files"] > df["Parquet File Guardrail"]
|
|
163
|
+
)
|
|
164
|
+
df["Row Group Guardrail Hit"] = df["Row Groups"] > df["Row Group Guardrail"]
|
|
165
|
+
|
|
166
|
+
if count_rows:
|
|
167
|
+
df["Row Count Guardrail Hit"] = (
|
|
168
|
+
df["Row Count"] > df["Row Count Guardrail"]
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
if export:
|
|
172
|
+
lakeAttach = lakehouse_attached()
|
|
173
|
+
if lakeAttach == False:
|
|
174
|
+
print(
|
|
175
|
+
f"In order to save the report.json file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
|
|
176
|
+
)
|
|
177
|
+
return
|
|
178
|
+
spark = SparkSession.builder.getOrCreate()
|
|
179
|
+
|
|
180
|
+
lakehouse_id = fabric.get_lakehouse_id()
|
|
181
|
+
lakehouse = resolve_lakehouse_name(
|
|
182
|
+
lakehouse_id=lakehouse_id, workspace=workspace
|
|
183
|
+
)
|
|
184
|
+
lakeTName = "lakehouse_table_details"
|
|
185
|
+
lakeT_filt = df[df["Table Name"] == lakeTName]
|
|
186
|
+
|
|
187
|
+
query = f"SELECT MAX(RunId) FROM {lakehouse}.{lakeTName}"
|
|
188
|
+
|
|
189
|
+
if len(lakeT_filt) == 0:
|
|
190
|
+
runId = 1
|
|
191
|
+
else:
|
|
192
|
+
dfSpark = spark.sql(query)
|
|
193
|
+
maxRunId = dfSpark.collect()[0][0]
|
|
194
|
+
runId = maxRunId + 1
|
|
195
|
+
|
|
196
|
+
export_df = df.copy()
|
|
197
|
+
|
|
198
|
+
cols = [
|
|
199
|
+
"Files",
|
|
200
|
+
"Row Groups",
|
|
201
|
+
"Row Count",
|
|
202
|
+
"Table Size",
|
|
203
|
+
"SKU",
|
|
204
|
+
"Parquet File Guardrail",
|
|
205
|
+
"Row Group Guardrail",
|
|
206
|
+
"Row Count Guardrail",
|
|
207
|
+
"Parquet File Guardrail Hit",
|
|
208
|
+
"Row Group Guardrail Hit",
|
|
209
|
+
"Row Count Guardrail Hit",
|
|
210
|
+
]
|
|
211
|
+
|
|
212
|
+
for c in cols:
|
|
213
|
+
if c not in export_df:
|
|
214
|
+
if c in [
|
|
215
|
+
"Files",
|
|
216
|
+
"Row Groups",
|
|
217
|
+
"Row Count",
|
|
218
|
+
"Table Size",
|
|
219
|
+
"Parquet File Guardrail",
|
|
220
|
+
"Row Group Guardrail",
|
|
221
|
+
"Row Count Guardrail",
|
|
222
|
+
]:
|
|
223
|
+
export_df[c] = 0
|
|
224
|
+
export_df[c] = export_df[c].astype(int)
|
|
225
|
+
elif c in ["SKU"]:
|
|
226
|
+
export_df[c] = None
|
|
227
|
+
export_df[c] = export_df[c].astype(str)
|
|
228
|
+
elif c in [
|
|
229
|
+
"Parquet File Guardrail Hit",
|
|
230
|
+
"Row Group Guardrail Hit",
|
|
231
|
+
"Row Count Guardrail Hit",
|
|
232
|
+
]:
|
|
233
|
+
export_df[c] = False
|
|
234
|
+
export_df[c] = export_df[c].astype(bool)
|
|
235
|
+
|
|
236
|
+
print(
|
|
237
|
+
f"Saving Lakehouse table properties to the '{lakeTName}' table in the lakehouse...\n"
|
|
238
|
+
)
|
|
239
|
+
now = datetime.datetime.now()
|
|
240
|
+
export_df["Timestamp"] = now
|
|
241
|
+
export_df["RunId"] = runId
|
|
242
|
+
|
|
243
|
+
export_df.columns = export_df.columns.str.replace(" ", "_")
|
|
244
|
+
spark_df = spark.createDataFrame(export_df)
|
|
245
|
+
spark_df.write.mode("append").format("delta").saveAsTable(lakeTName)
|
|
246
|
+
print(
|
|
247
|
+
f"\u2022 Lakehouse table properties have been saved to the '{lakeTName}' delta table."
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
return df
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import sempy
|
|
2
|
+
import sempy.fabric as fabric
|
|
3
|
+
from tqdm.auto import tqdm
|
|
4
|
+
from pyspark.sql import SparkSession
|
|
5
|
+
from sempy_labs._helper_functions import resolve_lakehouse_name
|
|
6
|
+
from typing import List, Optional, Union
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def lakehouse_attached() -> bool:
|
|
10
|
+
"""
|
|
11
|
+
Identifies if a lakehouse is attached to the notebook.
|
|
12
|
+
|
|
13
|
+
Returns
|
|
14
|
+
-------
|
|
15
|
+
bool
|
|
16
|
+
Returns True if a lakehouse is attached to the notebook.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
spark = SparkSession.builder.getOrCreate()
|
|
20
|
+
lakeId = spark.conf.get("trident.lakehouse.id")
|
|
21
|
+
|
|
22
|
+
if len(lakeId) > 0:
|
|
23
|
+
return True
|
|
24
|
+
else:
|
|
25
|
+
return False
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def optimize_lakehouse_tables(
|
|
29
|
+
tables: Optional[Union[str, List[str]]] = None,
|
|
30
|
+
lakehouse: Optional[str] = None,
|
|
31
|
+
workspace: Optional[str] = None,
|
|
32
|
+
):
|
|
33
|
+
"""
|
|
34
|
+
Runs the `OPTIMIZE <https://docs.delta.io/latest/optimizations-oss.html>`_ function over the specified lakehouse tables.
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
tables : str | List[str] | None
|
|
39
|
+
The table(s) to optimize. If no tables are specified, all tables in the lakehouse will be optimized.
|
|
40
|
+
lakehouse : str, default=None
|
|
41
|
+
The Fabric lakehouse.
|
|
42
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
43
|
+
workspace : str, default=None
|
|
44
|
+
The Fabric workspace used by the lakehouse.
|
|
45
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
46
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
|
|
50
|
+
from delta import DeltaTable
|
|
51
|
+
|
|
52
|
+
if workspace == None:
|
|
53
|
+
workspace_id = fabric.get_workspace_id()
|
|
54
|
+
workspace = fabric.resolve_workspace_name(workspace_id)
|
|
55
|
+
|
|
56
|
+
if lakehouse == None:
|
|
57
|
+
lakehouse_id = fabric.get_lakehouse_id()
|
|
58
|
+
lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
|
|
59
|
+
|
|
60
|
+
lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
|
|
61
|
+
lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"]
|
|
62
|
+
|
|
63
|
+
if isinstance(tables, str):
|
|
64
|
+
tables = [tables]
|
|
65
|
+
|
|
66
|
+
if tables is not None:
|
|
67
|
+
tables_filt = lakeTablesDelta[lakeTablesDelta["Table Name"].isin(tables)]
|
|
68
|
+
else:
|
|
69
|
+
tables_filt = lakeTablesDelta.copy()
|
|
70
|
+
|
|
71
|
+
tableCount = len(tables_filt)
|
|
72
|
+
|
|
73
|
+
spark = SparkSession.builder.getOrCreate()
|
|
74
|
+
|
|
75
|
+
i = 1
|
|
76
|
+
for _, r in (bar := tqdm(tables_filt.iterrows())):
|
|
77
|
+
tableName = r["Table Name"]
|
|
78
|
+
tablePath = r["Location"]
|
|
79
|
+
bar.set_description(f"Optimizing the '{tableName}' table...")
|
|
80
|
+
deltaTable = DeltaTable.forPath(spark, tablePath)
|
|
81
|
+
deltaTable.optimize().executeCompaction()
|
|
82
|
+
print(
|
|
83
|
+
f"The '{tableName}' table has been optimized. ({str(i)}/{str(tableCount)})"
|
|
84
|
+
)
|
|
85
|
+
i += 1
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
import sempy
|
|
2
|
+
import sempy.fabric as fabric
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from sempy_labs._helper_functions import (
|
|
5
|
+
resolve_lakehouse_name,
|
|
6
|
+
resolve_lakehouse_id,
|
|
7
|
+
resolve_workspace_name_and_id,
|
|
8
|
+
)
|
|
9
|
+
from typing import List, Optional, Union
|
|
10
|
+
import sempy_labs._icons as icons
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def create_shortcut_onelake(
|
|
14
|
+
table_name: str,
|
|
15
|
+
source_lakehouse: str,
|
|
16
|
+
source_workspace: str,
|
|
17
|
+
destination_lakehouse: str,
|
|
18
|
+
destination_workspace: Optional[str] = None,
|
|
19
|
+
shortcut_name: Optional[str] = None,
|
|
20
|
+
):
|
|
21
|
+
"""
|
|
22
|
+
Creates a `shortcut <https://learn.microsoft.com/fabric/onelake/onelake-shortcuts>`_ to a delta table in OneLake.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
table_name : str
|
|
27
|
+
The table name for which a shortcut will be created.
|
|
28
|
+
source_lakehouse : str
|
|
29
|
+
The Fabric lakehouse in which the table resides.
|
|
30
|
+
source_workspace : str
|
|
31
|
+
The name of the Fabric workspace in which the source lakehouse exists.
|
|
32
|
+
destination_lakehouse : str
|
|
33
|
+
The Fabric lakehouse in which the shortcut will be created.
|
|
34
|
+
destination_workspace : str, default=None
|
|
35
|
+
The name of the Fabric workspace in which the shortcut will be created.
|
|
36
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
37
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
38
|
+
shortcut_name : str, default=None
|
|
39
|
+
The name of the shortcut 'table' to be created. This defaults to the 'table_name' parameter value.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
sourceWorkspaceId = fabric.resolve_workspace_id(source_workspace)
|
|
43
|
+
sourceLakehouseId = resolve_lakehouse_id(source_lakehouse, source_workspace)
|
|
44
|
+
|
|
45
|
+
if destination_workspace == None:
|
|
46
|
+
destination_workspace = source_workspace
|
|
47
|
+
|
|
48
|
+
destinationWorkspaceId = fabric.resolve_workspace_id(destination_workspace)
|
|
49
|
+
destinationLakehouseId = resolve_lakehouse_id(
|
|
50
|
+
destination_lakehouse, destination_workspace
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
if shortcut_name == None:
|
|
54
|
+
shortcut_name = table_name
|
|
55
|
+
|
|
56
|
+
client = fabric.FabricRestClient()
|
|
57
|
+
tablePath = "Tables/" + table_name
|
|
58
|
+
|
|
59
|
+
request_body = {
|
|
60
|
+
"path": "Tables",
|
|
61
|
+
"name": shortcut_name.replace(" ", ""),
|
|
62
|
+
"target": {
|
|
63
|
+
"oneLake": {
|
|
64
|
+
"workspaceId": sourceWorkspaceId,
|
|
65
|
+
"itemId": sourceLakehouseId,
|
|
66
|
+
"path": tablePath,
|
|
67
|
+
}
|
|
68
|
+
},
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
response = client.post(
|
|
73
|
+
f"/v1/workspaces/{destinationWorkspaceId}/items/{destinationLakehouseId}/shortcuts",
|
|
74
|
+
json=request_body,
|
|
75
|
+
)
|
|
76
|
+
if response.status_code == 201:
|
|
77
|
+
print(
|
|
78
|
+
f"{icons.green_dot} The shortcut '{shortcut_name}' was created in the '{destination_lakehouse}' lakehouse within the '{destination_workspace} workspace. It is based on the '{table_name}' table in the '{source_lakehouse}' lakehouse within the '{source_workspace}' workspace."
|
|
79
|
+
)
|
|
80
|
+
else:
|
|
81
|
+
print(response.status_code)
|
|
82
|
+
except Exception as e:
|
|
83
|
+
print(
|
|
84
|
+
f"{icons.red_dot} Failed to create a shortcut for the '{table_name}' table: {e}"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def create_shortcut(
|
|
89
|
+
shortcut_name: str,
|
|
90
|
+
location: str,
|
|
91
|
+
subpath: str,
|
|
92
|
+
source: str,
|
|
93
|
+
connection_id: str,
|
|
94
|
+
lakehouse: Optional[str] = None,
|
|
95
|
+
workspace: Optional[str] = None,
|
|
96
|
+
):
|
|
97
|
+
"""
|
|
98
|
+
Creates a `shortcut <https://learn.microsoft.com/fabric/onelake/onelake-shortcuts>`_ to an ADLS Gen2 or Amazon S3 source.
|
|
99
|
+
|
|
100
|
+
Parameters
|
|
101
|
+
----------
|
|
102
|
+
shortcut_name : str
|
|
103
|
+
location : str
|
|
104
|
+
subpath : str
|
|
105
|
+
source : str
|
|
106
|
+
connection_id: str
|
|
107
|
+
lakehouse : str
|
|
108
|
+
The Fabric lakehouse in which the shortcut will be created.
|
|
109
|
+
workspace : str, default=None
|
|
110
|
+
The name of the Fabric workspace in which the shortcut will be created.
|
|
111
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
112
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
source_titles = {"adlsGen2": "ADLS Gen2", "amazonS3": "Amazon S3"}
|
|
116
|
+
|
|
117
|
+
sourceValues = list(source_titles.keys())
|
|
118
|
+
|
|
119
|
+
if source not in sourceValues:
|
|
120
|
+
print(
|
|
121
|
+
f"{icons.red_dot} The 'source' parameter must be one of these values: {sourceValues}."
|
|
122
|
+
)
|
|
123
|
+
return
|
|
124
|
+
|
|
125
|
+
sourceTitle = source_titles[source]
|
|
126
|
+
|
|
127
|
+
(workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
128
|
+
|
|
129
|
+
if lakehouse == None:
|
|
130
|
+
lakehouse_id = fabric.get_lakehouse_id()
|
|
131
|
+
else:
|
|
132
|
+
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
|
|
133
|
+
|
|
134
|
+
client = fabric.FabricRestClient()
|
|
135
|
+
shortcutActualName = shortcut_name.replace(" ", "")
|
|
136
|
+
|
|
137
|
+
request_body = {
|
|
138
|
+
"path": "Tables",
|
|
139
|
+
"name": shortcutActualName,
|
|
140
|
+
"target": {
|
|
141
|
+
source: {
|
|
142
|
+
"location": location,
|
|
143
|
+
"subpath": subpath,
|
|
144
|
+
"connectionId": connection_id,
|
|
145
|
+
}
|
|
146
|
+
},
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
response = client.post(
|
|
151
|
+
f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts",
|
|
152
|
+
json=request_body,
|
|
153
|
+
)
|
|
154
|
+
if response.status_code == 201:
|
|
155
|
+
print(
|
|
156
|
+
f"{icons.green_dot} The shortcut '{shortcutActualName}' was created in the '{lakehouse}' lakehouse within the '{workspace} workspace. It is based on the '{subpath}' table in '{sourceTitle}'."
|
|
157
|
+
)
|
|
158
|
+
else:
|
|
159
|
+
print(response.status_code)
|
|
160
|
+
except:
|
|
161
|
+
print(
|
|
162
|
+
f"{icons.red_dot} Failed to create a shortcut for the '{shortcut_name}' table."
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def list_shortcuts(
|
|
167
|
+
lakehouse: Optional[str] = None, workspace: Optional[str] = None
|
|
168
|
+
) -> pd.DataFrame:
|
|
169
|
+
"""
|
|
170
|
+
Shows all shortcuts which exist in a Fabric lakehouse.
|
|
171
|
+
|
|
172
|
+
Parameters
|
|
173
|
+
----------
|
|
174
|
+
lakehouse : str, default=None
|
|
175
|
+
The Fabric lakehouse name.
|
|
176
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
177
|
+
workspace : str, default=None
|
|
178
|
+
The name of the Fabric workspace in which lakehouse resides.
|
|
179
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
180
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
181
|
+
|
|
182
|
+
Returns
|
|
183
|
+
-------
|
|
184
|
+
pandas.DataFrame
|
|
185
|
+
A pandas dataframe showing all the shortcuts which exist in the specified lakehouse.
|
|
186
|
+
"""
|
|
187
|
+
|
|
188
|
+
(workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
189
|
+
|
|
190
|
+
if lakehouse == None:
|
|
191
|
+
lakehouse_id = fabric.get_lakehouse_id()
|
|
192
|
+
lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
|
|
193
|
+
else:
|
|
194
|
+
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
|
|
195
|
+
|
|
196
|
+
df = pd.DataFrame(
|
|
197
|
+
columns=[
|
|
198
|
+
"Shortcut Name",
|
|
199
|
+
"Shortcut Path",
|
|
200
|
+
"Source",
|
|
201
|
+
"Source Lakehouse Name",
|
|
202
|
+
"Source Workspace Name",
|
|
203
|
+
"Source Path",
|
|
204
|
+
"Source Connection ID",
|
|
205
|
+
"Source Location",
|
|
206
|
+
"Source SubPath",
|
|
207
|
+
]
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
client = fabric.FabricRestClient()
|
|
211
|
+
response = client.get(
|
|
212
|
+
f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts"
|
|
213
|
+
)
|
|
214
|
+
if response.status_code == 200:
|
|
215
|
+
for s in response.json()["value"]:
|
|
216
|
+
shortcutName = s["name"]
|
|
217
|
+
shortcutPath = s["path"]
|
|
218
|
+
source = list(s["target"].keys())[0]
|
|
219
|
+
(
|
|
220
|
+
sourceLakehouseName,
|
|
221
|
+
sourceWorkspaceName,
|
|
222
|
+
sourcePath,
|
|
223
|
+
connectionId,
|
|
224
|
+
location,
|
|
225
|
+
subpath,
|
|
226
|
+
) = (None, None, None, None, None, None)
|
|
227
|
+
if source == "oneLake":
|
|
228
|
+
sourceLakehouseId = s["target"][source]["itemId"]
|
|
229
|
+
sourcePath = s["target"][source]["path"]
|
|
230
|
+
sourceWorkspaceId = s["target"][source]["workspaceId"]
|
|
231
|
+
sourceWorkspaceName = fabric.resolve_workspace_name(sourceWorkspaceId)
|
|
232
|
+
sourceLakehouseName = resolve_lakehouse_name(
|
|
233
|
+
sourceLakehouseId, sourceWorkspaceName
|
|
234
|
+
)
|
|
235
|
+
else:
|
|
236
|
+
connectionId = s["target"][source]["connectionId"]
|
|
237
|
+
location = s["target"][source]["location"]
|
|
238
|
+
subpath = s["target"][source]["subpath"]
|
|
239
|
+
|
|
240
|
+
new_data = {
|
|
241
|
+
"Shortcut Name": shortcutName,
|
|
242
|
+
"Shortcut Path": shortcutPath,
|
|
243
|
+
"Source": source,
|
|
244
|
+
"Source Lakehouse Name": sourceLakehouseName,
|
|
245
|
+
"Source Workspace Name": sourceWorkspaceName,
|
|
246
|
+
"Source Path": sourcePath,
|
|
247
|
+
"Source Connection ID": connectionId,
|
|
248
|
+
"Source Location": location,
|
|
249
|
+
"Source SubPath": subpath,
|
|
250
|
+
}
|
|
251
|
+
df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
|
|
252
|
+
|
|
253
|
+
print(
|
|
254
|
+
f"This function relies on an API which is not yet official as of May 21, 2024. Once the API becomes official this function will work as expected."
|
|
255
|
+
)
|
|
256
|
+
return df
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def delete_shortcut(
|
|
260
|
+
shortcut_name: str, lakehouse: Optional[str] = None, workspace: Optional[str] = None
|
|
261
|
+
):
|
|
262
|
+
"""
|
|
263
|
+
Deletes a shortcut.
|
|
264
|
+
|
|
265
|
+
Parameters
|
|
266
|
+
----------
|
|
267
|
+
shortcut_name : str
|
|
268
|
+
The name of the shortcut.
|
|
269
|
+
lakehouse : str, default=None
|
|
270
|
+
The Fabric lakehouse name in which the shortcut resides.
|
|
271
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
272
|
+
workspace : str, default=None
|
|
273
|
+
The name of the Fabric workspace in which lakehouse resides.
|
|
274
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
275
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
276
|
+
"""
|
|
277
|
+
|
|
278
|
+
(workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
279
|
+
|
|
280
|
+
if lakehouse == None:
|
|
281
|
+
lakehouse_id = fabric.get_lakehouse_id()
|
|
282
|
+
lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
|
|
283
|
+
else:
|
|
284
|
+
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
|
|
285
|
+
|
|
286
|
+
client = fabric.FabricRestClient()
|
|
287
|
+
response = client.delete(
|
|
288
|
+
f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts/Tables/{shortcut_name}"
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
if response.status_code == 200:
|
|
292
|
+
print(
|
|
293
|
+
f"{icons.green_dot} The '{shortcut_name}' shortcut in the '{lakehouse}' within the '{workspace}' workspace has been deleted."
|
|
294
|
+
)
|
|
295
|
+
else:
|
|
296
|
+
print(f"{icons.red_dot} The '{shortcut_name}' has not been deleted.")
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from sempy_labs.migration._create_pqt_file import create_pqt_file
|
|
2
|
+
from sempy_labs.migration._migrate_calctables_to_lakehouse import (
|
|
3
|
+
migrate_calc_tables_to_lakehouse,
|
|
4
|
+
migrate_field_parameters,
|
|
5
|
+
)
|
|
6
|
+
from sempy_labs.migration._migrate_calctables_to_semantic_model import (
|
|
7
|
+
migrate_calc_tables_to_semantic_model,
|
|
8
|
+
)
|
|
9
|
+
from sempy_labs.migration._migrate_model_objects_to_semantic_model import (
|
|
10
|
+
migrate_model_objects_to_semantic_model,
|
|
11
|
+
)
|
|
12
|
+
from sempy_labs.migration._migrate_tables_columns_to_semantic_model import (
|
|
13
|
+
migrate_tables_columns_to_semantic_model,
|
|
14
|
+
)
|
|
15
|
+
from sempy_labs.migration._migration_validation import (
|
|
16
|
+
migration_validation,
|
|
17
|
+
# list_semantic_model_objects
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"create_pqt_file",
|
|
22
|
+
"migrate_calc_tables_to_lakehouse",
|
|
23
|
+
"migrate_field_parameters",
|
|
24
|
+
"migrate_calc_tables_to_semantic_model",
|
|
25
|
+
"migrate_model_objects_to_semantic_model",
|
|
26
|
+
"migrate_tables_columns_to_semantic_model",
|
|
27
|
+
"migration_validation",
|
|
28
|
+
# list_semantic_model_objects
|
|
29
|
+
]
|