semantic-link-labs 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- semantic_link_labs-0.4.1.dist-info/LICENSE +21 -0
- semantic_link_labs-0.4.1.dist-info/METADATA +22 -0
- semantic_link_labs-0.4.1.dist-info/RECORD +52 -0
- semantic_link_labs-0.4.1.dist-info/WHEEL +5 -0
- semantic_link_labs-0.4.1.dist-info/top_level.txt +1 -0
- sempy_labs/__init__.py +154 -0
- sempy_labs/_ai.py +496 -0
- sempy_labs/_clear_cache.py +39 -0
- sempy_labs/_connections.py +234 -0
- sempy_labs/_dax.py +70 -0
- sempy_labs/_generate_semantic_model.py +280 -0
- sempy_labs/_helper_functions.py +506 -0
- sempy_labs/_icons.py +4 -0
- sempy_labs/_list_functions.py +1372 -0
- sempy_labs/_model_auto_build.py +143 -0
- sempy_labs/_model_bpa.py +1354 -0
- sempy_labs/_model_dependencies.py +341 -0
- sempy_labs/_one_lake_integration.py +155 -0
- sempy_labs/_query_scale_out.py +447 -0
- sempy_labs/_refresh_semantic_model.py +184 -0
- sempy_labs/_tom.py +3766 -0
- sempy_labs/_translations.py +378 -0
- sempy_labs/_vertipaq.py +893 -0
- sempy_labs/directlake/__init__.py +45 -0
- sempy_labs/directlake/_directlake_schema_compare.py +110 -0
- sempy_labs/directlake/_directlake_schema_sync.py +128 -0
- sempy_labs/directlake/_fallback.py +62 -0
- sempy_labs/directlake/_get_directlake_lakehouse.py +69 -0
- sempy_labs/directlake/_get_shared_expression.py +59 -0
- sempy_labs/directlake/_guardrails.py +84 -0
- sempy_labs/directlake/_list_directlake_model_calc_tables.py +54 -0
- sempy_labs/directlake/_show_unsupported_directlake_objects.py +89 -0
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +81 -0
- sempy_labs/directlake/_update_directlake_partition_entity.py +64 -0
- sempy_labs/directlake/_warm_cache.py +210 -0
- sempy_labs/lakehouse/__init__.py +24 -0
- sempy_labs/lakehouse/_get_lakehouse_columns.py +81 -0
- sempy_labs/lakehouse/_get_lakehouse_tables.py +250 -0
- sempy_labs/lakehouse/_lakehouse.py +85 -0
- sempy_labs/lakehouse/_shortcuts.py +296 -0
- sempy_labs/migration/__init__.py +29 -0
- sempy_labs/migration/_create_pqt_file.py +239 -0
- sempy_labs/migration/_migrate_calctables_to_lakehouse.py +429 -0
- sempy_labs/migration/_migrate_calctables_to_semantic_model.py +150 -0
- sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +524 -0
- sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +165 -0
- sempy_labs/migration/_migration_validation.py +227 -0
- sempy_labs/migration/_refresh_calc_tables.py +129 -0
- sempy_labs/report/__init__.py +35 -0
- sempy_labs/report/_generate_report.py +253 -0
- sempy_labs/report/_report_functions.py +855 -0
- sempy_labs/report/_report_rebind.py +131 -0
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
import sempy
|
|
2
|
+
import sempy.fabric as fabric
|
|
3
|
+
import json, os, shutil
|
|
4
|
+
import xml.etree.ElementTree as ET
|
|
5
|
+
from sempy_labs._list_functions import list_tables
|
|
6
|
+
from sempy_labs.lakehouse._lakehouse import lakehouse_attached
|
|
7
|
+
from sempy._utils._log import log
|
|
8
|
+
from typing import Optional
|
|
9
|
+
import sempy_labs._icons as icons
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@log
|
|
13
|
+
def create_pqt_file(
|
|
14
|
+
dataset: str, workspace: Optional[str] = None, file_name: Optional[str] = None
|
|
15
|
+
):
|
|
16
|
+
"""
|
|
17
|
+
Dynamically generates a `Power Query Template <https://learn.microsoft.com/power-query/power-query-template>`_ file based on the semantic model. The .pqt file is saved within the Files section of your lakehouse.
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
dataset : str
|
|
22
|
+
Name of the semantic model.
|
|
23
|
+
workspace : str, default=None
|
|
24
|
+
The Fabric workspace name.
|
|
25
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
26
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
27
|
+
file_name : str, default=None
|
|
28
|
+
The name of the Power Query Template file to be generated.
|
|
29
|
+
Defaults to None which resolves to 'PowerQueryTemplate'.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
if file_name is None:
|
|
33
|
+
file_name = "PowerQueryTemplate"
|
|
34
|
+
|
|
35
|
+
lakeAttach = lakehouse_attached()
|
|
36
|
+
|
|
37
|
+
if lakeAttach == False:
|
|
38
|
+
print(
|
|
39
|
+
f"{icons.red_dot} In order to run the 'create_pqt_file' function, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
|
|
40
|
+
)
|
|
41
|
+
return
|
|
42
|
+
|
|
43
|
+
if workspace == None:
|
|
44
|
+
workspace_id = fabric.get_workspace_id()
|
|
45
|
+
workspace = fabric.resolve_workspace_name(workspace_id)
|
|
46
|
+
|
|
47
|
+
folderPath = "/lakehouse/default/Files"
|
|
48
|
+
subFolderPath = os.path.join(folderPath, "pqtnewfolder")
|
|
49
|
+
os.makedirs(subFolderPath, exist_ok=True)
|
|
50
|
+
|
|
51
|
+
dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
|
|
52
|
+
dfT = list_tables(dataset, workspace)
|
|
53
|
+
dfE = fabric.list_expressions(dataset=dataset, workspace=workspace)
|
|
54
|
+
|
|
55
|
+
# Check if M-partitions are used
|
|
56
|
+
if any(dfP["Source Type"] == "M"):
|
|
57
|
+
|
|
58
|
+
class QueryMetadata:
|
|
59
|
+
def __init__(
|
|
60
|
+
self,
|
|
61
|
+
QueryName,
|
|
62
|
+
QueryGroupId=None,
|
|
63
|
+
LastKnownIsParameter=None,
|
|
64
|
+
LastKnownResultTypeName=None,
|
|
65
|
+
LoadEnabled=True,
|
|
66
|
+
IsHidden=False,
|
|
67
|
+
):
|
|
68
|
+
self.QueryName = QueryName
|
|
69
|
+
self.QueryGroupId = QueryGroupId
|
|
70
|
+
self.LastKnownIsParameter = LastKnownIsParameter
|
|
71
|
+
self.LastKnownResultTypeName = LastKnownResultTypeName
|
|
72
|
+
self.LoadEnabled = LoadEnabled
|
|
73
|
+
self.IsHidden = IsHidden
|
|
74
|
+
|
|
75
|
+
class RootObject:
|
|
76
|
+
def __init__(
|
|
77
|
+
self, DocumentLocale, EngineVersion, QueriesMetadata, QueryGroups=None
|
|
78
|
+
):
|
|
79
|
+
if QueryGroups is None:
|
|
80
|
+
QueryGroups = []
|
|
81
|
+
self.DocumentLocale = DocumentLocale
|
|
82
|
+
self.EngineVersion = EngineVersion
|
|
83
|
+
self.QueriesMetadata = QueriesMetadata
|
|
84
|
+
self.QueryGroups = QueryGroups
|
|
85
|
+
|
|
86
|
+
# STEP 1: Create MashupDocument.pq
|
|
87
|
+
mdfileName = "MashupDocument.pq"
|
|
88
|
+
mdFilePath = os.path.join(subFolderPath, mdfileName)
|
|
89
|
+
sb = "section Section1;"
|
|
90
|
+
for table_name in dfP["Table Name"].unique():
|
|
91
|
+
tName = '#"' + table_name + '"'
|
|
92
|
+
sourceExpression = dfT.loc[
|
|
93
|
+
(dfT["Name"] == table_name), "Source Expression"
|
|
94
|
+
].iloc[0]
|
|
95
|
+
refreshPolicy = dfT.loc[(dfT["Name"] == table_name), "Refresh Policy"].iloc[
|
|
96
|
+
0
|
|
97
|
+
]
|
|
98
|
+
sourceType = dfP.loc[(dfP["Table Name"] == table_name), "Source Type"].iloc[
|
|
99
|
+
0
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
if sourceType == "M" or refreshPolicy:
|
|
103
|
+
sb = sb + "\n" + "shared " + tName + " = "
|
|
104
|
+
|
|
105
|
+
partitions_in_table = dfP.loc[
|
|
106
|
+
dfP["Table Name"] == table_name, "Partition Name"
|
|
107
|
+
].unique()
|
|
108
|
+
|
|
109
|
+
i = 1
|
|
110
|
+
for partition_name in partitions_in_table:
|
|
111
|
+
pSourceType = dfP.loc[
|
|
112
|
+
(dfP["Table Name"] == table_name)
|
|
113
|
+
& (dfP["Partition Name"] == partition_name),
|
|
114
|
+
"Source Type",
|
|
115
|
+
].iloc[0]
|
|
116
|
+
pQuery = dfP.loc[
|
|
117
|
+
(dfP["Table Name"] == table_name)
|
|
118
|
+
& (dfP["Partition Name"] == partition_name),
|
|
119
|
+
"Query",
|
|
120
|
+
].iloc[0]
|
|
121
|
+
|
|
122
|
+
if pQuery is not None:
|
|
123
|
+
pQueryNoSpaces = (
|
|
124
|
+
pQuery.replace(" ", "")
|
|
125
|
+
.replace("\n", "")
|
|
126
|
+
.replace("\t", "")
|
|
127
|
+
.replace("\r", "")
|
|
128
|
+
)
|
|
129
|
+
if pQueryNoSpaces.startswith('letSource=""'):
|
|
130
|
+
pQuery = 'let\n\tSource = ""\nin\n\tSource'
|
|
131
|
+
|
|
132
|
+
if pSourceType == "M" and i == 1:
|
|
133
|
+
sb = sb + pQuery + ";"
|
|
134
|
+
elif refreshPolicy and i == 1:
|
|
135
|
+
sb = sb + sourceExpression + ";"
|
|
136
|
+
i += 1
|
|
137
|
+
|
|
138
|
+
for index, row in dfE.iterrows():
|
|
139
|
+
expr = row["Expression"]
|
|
140
|
+
eName = row["Name"]
|
|
141
|
+
eName = '#"' + eName + '"'
|
|
142
|
+
sb = sb + "\n" + "shared " + eName + " = " + expr + ";"
|
|
143
|
+
|
|
144
|
+
with open(mdFilePath, "w") as file:
|
|
145
|
+
file.write(sb)
|
|
146
|
+
|
|
147
|
+
# STEP 2: Create the MashupMetadata.json file
|
|
148
|
+
mmfileName = "MashupMetadata.json"
|
|
149
|
+
mmFilePath = os.path.join(subFolderPath, mmfileName)
|
|
150
|
+
queryMetadata = []
|
|
151
|
+
|
|
152
|
+
for tName in dfP["Table Name"].unique():
|
|
153
|
+
sourceType = dfP.loc[(dfP["Table Name"] == tName), "Source Type"].iloc[0]
|
|
154
|
+
refreshPolicy = dfT.loc[(dfT["Name"] == tName), "Refresh Policy"].iloc[0]
|
|
155
|
+
if sourceType == "M" or refreshPolicy:
|
|
156
|
+
queryMetadata.append(
|
|
157
|
+
QueryMetadata(tName, None, None, None, True, False)
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
for i, r in dfE.iterrows():
|
|
161
|
+
eName = r["Name"]
|
|
162
|
+
eKind = r["Kind"]
|
|
163
|
+
if eKind == "M":
|
|
164
|
+
queryMetadata.append(
|
|
165
|
+
QueryMetadata(eName, None, None, None, True, False)
|
|
166
|
+
)
|
|
167
|
+
else:
|
|
168
|
+
queryMetadata.append(
|
|
169
|
+
QueryMetadata(eName, None, None, None, False, False)
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
rootObject = RootObject("en-US", "2.126.453.0", queryMetadata)
|
|
173
|
+
|
|
174
|
+
def obj_to_dict(obj):
|
|
175
|
+
if isinstance(obj, list):
|
|
176
|
+
return [obj_to_dict(e) for e in obj]
|
|
177
|
+
elif hasattr(obj, "__dict__"):
|
|
178
|
+
return {k: obj_to_dict(v) for k, v in obj.__dict__.items()}
|
|
179
|
+
else:
|
|
180
|
+
return obj
|
|
181
|
+
|
|
182
|
+
jsonContent = json.dumps(obj_to_dict(rootObject), indent=4)
|
|
183
|
+
|
|
184
|
+
with open(mmFilePath, "w") as json_file:
|
|
185
|
+
json_file.write(jsonContent)
|
|
186
|
+
|
|
187
|
+
# STEP 3: Create Metadata.json file
|
|
188
|
+
mFileName = "Metadata.json"
|
|
189
|
+
mFilePath = os.path.join(subFolderPath, mFileName)
|
|
190
|
+
metaData = {"Name": "fileName", "Description": "", "Version": "1.0.0.0"}
|
|
191
|
+
jsonContent = json.dumps(metaData, indent=4)
|
|
192
|
+
|
|
193
|
+
with open(mFilePath, "w") as json_file:
|
|
194
|
+
json_file.write(jsonContent)
|
|
195
|
+
|
|
196
|
+
# STEP 4: Create [Content_Types].xml file:
|
|
197
|
+
ns = "http://schemas.openxmlformats.org/package/2006/content-types"
|
|
198
|
+
ET.register_namespace("", ns)
|
|
199
|
+
types = ET.Element("{%s}Types" % ns)
|
|
200
|
+
default1 = ET.SubElement(
|
|
201
|
+
types,
|
|
202
|
+
"{%s}Default" % ns,
|
|
203
|
+
{"Extension": "json", "ContentType": "application/json"},
|
|
204
|
+
)
|
|
205
|
+
default2 = ET.SubElement(
|
|
206
|
+
types,
|
|
207
|
+
"{%s}Default" % ns,
|
|
208
|
+
{"Extension": "pq", "ContentType": "application/x-ms-m"},
|
|
209
|
+
)
|
|
210
|
+
xmlDocument = ET.ElementTree(types)
|
|
211
|
+
xmlFileName = "[Content_Types].xml"
|
|
212
|
+
xmlFilePath = os.path.join(subFolderPath, xmlFileName)
|
|
213
|
+
xmlDocument.write(
|
|
214
|
+
xmlFilePath, xml_declaration=True, encoding="utf-8", method="xml"
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# STEP 5: Zip up the 4 files
|
|
218
|
+
zipFileName = file_name + ".zip"
|
|
219
|
+
zipFilePath = os.path.join(folderPath, zipFileName)
|
|
220
|
+
shutil.make_archive(zipFilePath[:-4], "zip", subFolderPath)
|
|
221
|
+
|
|
222
|
+
# STEP 6: Convert the zip file back into a .pqt file
|
|
223
|
+
newExt = ".pqt"
|
|
224
|
+
directory = os.path.dirname(zipFilePath)
|
|
225
|
+
fileNameWithoutExtension = os.path.splitext(os.path.basename(zipFilePath))[0]
|
|
226
|
+
newFilePath = os.path.join(directory, fileNameWithoutExtension + newExt)
|
|
227
|
+
shutil.move(zipFilePath, newFilePath)
|
|
228
|
+
|
|
229
|
+
# STEP 7: Delete subFolder directory which is no longer needed
|
|
230
|
+
shutil.rmtree(subFolderPath, ignore_errors=True)
|
|
231
|
+
|
|
232
|
+
print(
|
|
233
|
+
f"{icons.green_dot} '{file_name}.pqt' has been created based on the '{dataset}' semantic model in the '{workspace}' workspace within the Files section of your lakehouse."
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
else:
|
|
237
|
+
print(
|
|
238
|
+
f"{icons.yellow_dot} The '{dataset}' semantic model in the '{workspace}' workspace does not use Power Query so a Power Query Template file cannot be generated."
|
|
239
|
+
)
|
|
@@ -0,0 +1,429 @@
|
|
|
1
|
+
import sempy
|
|
2
|
+
import sempy.fabric as fabric
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import re, datetime, time
|
|
5
|
+
from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
|
|
6
|
+
from sempy_labs._helper_functions import (
|
|
7
|
+
resolve_lakehouse_name,
|
|
8
|
+
resolve_lakehouse_id,
|
|
9
|
+
create_abfss_path,
|
|
10
|
+
)
|
|
11
|
+
from sempy_labs._tom import connect_semantic_model
|
|
12
|
+
from pyspark.sql import SparkSession
|
|
13
|
+
from typing import List, Optional, Union
|
|
14
|
+
from sempy._utils._log import log
|
|
15
|
+
import sempy_labs._icons as icons
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@log
|
|
19
|
+
def migrate_calc_tables_to_lakehouse(
|
|
20
|
+
dataset: str,
|
|
21
|
+
new_dataset: str,
|
|
22
|
+
workspace: Optional[str] = None,
|
|
23
|
+
new_dataset_workspace: Optional[str] = None,
|
|
24
|
+
lakehouse: Optional[str] = None,
|
|
25
|
+
lakehouse_workspace: Optional[str] = None,
|
|
26
|
+
):
|
|
27
|
+
"""
|
|
28
|
+
Creates delta tables in your lakehouse based on the DAX expression of a calculated table in an import/DirectQuery semantic model. The DAX expression encapsulating the calculated table logic is stored in the new Direct Lake semantic model as model annotations.
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
dataset : str
|
|
33
|
+
Name of the import/DirectQuery semantic model.
|
|
34
|
+
new_dataset : str
|
|
35
|
+
Name of the Direct Lake semantic model.
|
|
36
|
+
workspace : str, default=None
|
|
37
|
+
The Fabric workspace name in which the import/DirectQuery semantic model exists.
|
|
38
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
39
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
40
|
+
new_dataset_workspace : str
|
|
41
|
+
The Fabric workspace name in which the Direct Lake semantic model will be created.
|
|
42
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
43
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
44
|
+
lakehouse : str, default=None
|
|
45
|
+
The Fabric lakehouse used by the Direct Lake semantic model.
|
|
46
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
47
|
+
lakehouse_workspace : str, default=None
|
|
48
|
+
The Fabric workspace used by the lakehouse.
|
|
49
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
50
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
workspace = fabric.resolve_workspace_name(workspace)
|
|
54
|
+
|
|
55
|
+
if new_dataset_workspace == None:
|
|
56
|
+
new_dataset_workspace = workspace
|
|
57
|
+
|
|
58
|
+
if lakehouse_workspace == None:
|
|
59
|
+
lakehouse_workspace = new_dataset_workspace
|
|
60
|
+
lakehouse_workspace_id = fabric.resolve_workspace_id(lakehouse_workspace)
|
|
61
|
+
else:
|
|
62
|
+
lakehouse_workspace_id = fabric.resolve_workspace_id(lakehouse_workspace)
|
|
63
|
+
|
|
64
|
+
if lakehouse == None:
|
|
65
|
+
lakehouse_id = fabric.get_lakehouse_id()
|
|
66
|
+
lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace)
|
|
67
|
+
else:
|
|
68
|
+
lakehouse_id = resolve_lakehouse_id(lakehouse, lakehouse_workspace)
|
|
69
|
+
|
|
70
|
+
dfC = fabric.list_columns(dataset=dataset, workspace=workspace)
|
|
71
|
+
# dfC['Column Object'] = "'" + dfC['Table Name'] + "'[" + dfC['Column Name'] + "]"
|
|
72
|
+
dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
|
|
73
|
+
dfP_filt = dfP[(dfP["Source Type"] == "Calculated")]
|
|
74
|
+
dfP_filt = dfP_filt[
|
|
75
|
+
~dfP_filt["Query"].str.contains("NAMEOF")
|
|
76
|
+
] # Remove field parameters
|
|
77
|
+
# dfC_CalcColumn = dfC[dfC['Type'] == 'Calculated']
|
|
78
|
+
lakeTables = get_lakehouse_tables(lakehouse, lakehouse_workspace)
|
|
79
|
+
|
|
80
|
+
# Do not execute the function if lakehouse tables already exist with the same name
|
|
81
|
+
killFunction = False
|
|
82
|
+
for i, r in dfP_filt.iterrows():
|
|
83
|
+
tName = r["Table Name"]
|
|
84
|
+
dtName = tName.replace(" ", "_")
|
|
85
|
+
|
|
86
|
+
if dtName in lakeTables["Table Name"].values:
|
|
87
|
+
print(
|
|
88
|
+
f"{icons.red_dot} The '{tName}' table already exists as '{dtName}' in the '{lakehouse}' lakehouse in the '{workspace}' workspace."
|
|
89
|
+
)
|
|
90
|
+
killFunction = True
|
|
91
|
+
|
|
92
|
+
if killFunction:
|
|
93
|
+
return
|
|
94
|
+
|
|
95
|
+
spark = SparkSession.builder.getOrCreate()
|
|
96
|
+
|
|
97
|
+
if len(dfP_filt) == 0:
|
|
98
|
+
print(
|
|
99
|
+
f"{icons.yellow_dot} The '{dataset}' semantic model in the '{workspace}' workspace has no calculated tables."
|
|
100
|
+
)
|
|
101
|
+
return
|
|
102
|
+
|
|
103
|
+
start_time = datetime.datetime.now()
|
|
104
|
+
timeout = datetime.timedelta(minutes=1)
|
|
105
|
+
success = False
|
|
106
|
+
|
|
107
|
+
while not success:
|
|
108
|
+
try:
|
|
109
|
+
with connect_semantic_model(
|
|
110
|
+
dataset=dataset, workspace=workspace, readonly=True
|
|
111
|
+
) as tom:
|
|
112
|
+
success = True
|
|
113
|
+
for t in tom.model.Tables:
|
|
114
|
+
if tom.is_auto_date_table(table_name=t.Name):
|
|
115
|
+
print(
|
|
116
|
+
f"{icons.yellow_dot} The '{t.Name}' table is an auto-datetime table and is not supported in the Direct Lake migration process. Please create a proper Date/Calendar table in your lakehoues and use it in your Direct Lake model."
|
|
117
|
+
)
|
|
118
|
+
else:
|
|
119
|
+
for p in t.Partitions:
|
|
120
|
+
if str(p.SourceType) == "Calculated":
|
|
121
|
+
query = p.Source.Expression
|
|
122
|
+
if "NAMEOF" not in query: # exclude field parameters
|
|
123
|
+
daxQuery = ""
|
|
124
|
+
if query.lower().startswith("calendar") and any(
|
|
125
|
+
str(c.Type) == "Calculated" for c in t.Columns
|
|
126
|
+
):
|
|
127
|
+
daxQuery = f"ADDCOLUMNS(\n{query},"
|
|
128
|
+
for c in t.Columns:
|
|
129
|
+
if str(c.Type) == "Calculated":
|
|
130
|
+
expr = c.Expression
|
|
131
|
+
expr = expr.replace(
|
|
132
|
+
f"'{t.Name}'", ""
|
|
133
|
+
).replace(f"{t.Name}[Date]", "[Date]")
|
|
134
|
+
expr = expr.replace(
|
|
135
|
+
"[MonthNo]", "MONTH([Date])"
|
|
136
|
+
).replace(
|
|
137
|
+
"[QuarterNo]",
|
|
138
|
+
"INT((MONTH([Date]) + 2) / 3)",
|
|
139
|
+
)
|
|
140
|
+
daxQuery = (
|
|
141
|
+
f'{daxQuery}\n"{c.Name}",{expr},'
|
|
142
|
+
)
|
|
143
|
+
daxQuery = (
|
|
144
|
+
"EVALUATE\n" + daxQuery.rstrip(",") + "\n)"
|
|
145
|
+
)
|
|
146
|
+
else:
|
|
147
|
+
daxQuery = f"EVALUATE\n{query}"
|
|
148
|
+
daxQueryTopN = (
|
|
149
|
+
daxQuery.replace(
|
|
150
|
+
"EVALUATE\n", "EVALUATE\nTOPN(1,"
|
|
151
|
+
)
|
|
152
|
+
+ ")"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
try:
|
|
156
|
+
df = fabric.evaluate_dax(
|
|
157
|
+
dataset=dataset,
|
|
158
|
+
dax_string=daxQueryTopN,
|
|
159
|
+
workspace=workspace,
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
for col in df.columns:
|
|
163
|
+
pattern = r"\[([^\]]+)\]"
|
|
164
|
+
|
|
165
|
+
matches = re.findall(pattern, col)
|
|
166
|
+
new_column_name = matches[0].replace(
|
|
167
|
+
" ", ""
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
df.rename(
|
|
171
|
+
columns={col: new_column_name},
|
|
172
|
+
inplace=True,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
dataType = next(
|
|
177
|
+
str(c.DataType)
|
|
178
|
+
for c in tom.model.Tables[
|
|
179
|
+
t.Name
|
|
180
|
+
].Columns
|
|
181
|
+
if str(c.Type)
|
|
182
|
+
== "CalculatedTableColumn"
|
|
183
|
+
and c.SourceColumn == col
|
|
184
|
+
)
|
|
185
|
+
except:
|
|
186
|
+
dataType = next(
|
|
187
|
+
str(c.DataType)
|
|
188
|
+
for c in tom.model.Tables[
|
|
189
|
+
t.Name
|
|
190
|
+
].Columns
|
|
191
|
+
if str(c.Type) == "Calculated"
|
|
192
|
+
and c.Name == new_column_name
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
if dataType == "Int64":
|
|
196
|
+
df[new_column_name] = df[
|
|
197
|
+
new_column_name
|
|
198
|
+
].astype(int)
|
|
199
|
+
elif dataType in ["Decimal", "Double"]:
|
|
200
|
+
df[new_column_name] = df[
|
|
201
|
+
new_column_name
|
|
202
|
+
].astype(float)
|
|
203
|
+
elif dataType == "Boolean":
|
|
204
|
+
df[new_column_name] = df[
|
|
205
|
+
new_column_name
|
|
206
|
+
].astype(bool)
|
|
207
|
+
elif dataType == "DateTime":
|
|
208
|
+
df[new_column_name] = pd.to_datetime(
|
|
209
|
+
df[new_column_name]
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
delta_table_name = t.Name.replace(
|
|
213
|
+
" ", "_"
|
|
214
|
+
).lower()
|
|
215
|
+
|
|
216
|
+
spark_df = spark.createDataFrame(df)
|
|
217
|
+
filePath = create_abfss_path(
|
|
218
|
+
lakehouse_id=lakehouse_id,
|
|
219
|
+
lakehouse_workspace_id=lakehouse_workspace_id,
|
|
220
|
+
delta_table_name=delta_table_name,
|
|
221
|
+
)
|
|
222
|
+
spark_df.write.mode("overwrite").format(
|
|
223
|
+
"delta"
|
|
224
|
+
).save(filePath)
|
|
225
|
+
|
|
226
|
+
start_time2 = datetime.datetime.now()
|
|
227
|
+
timeout2 = datetime.timedelta(minutes=1)
|
|
228
|
+
success2 = False
|
|
229
|
+
|
|
230
|
+
while not success2:
|
|
231
|
+
try:
|
|
232
|
+
with connect_semantic_model(
|
|
233
|
+
dataset=new_dataset,
|
|
234
|
+
readonly=False,
|
|
235
|
+
workspace=new_dataset_workspace,
|
|
236
|
+
) as tom2:
|
|
237
|
+
success2 = True
|
|
238
|
+
tom2.set_annotation(
|
|
239
|
+
object=tom2.model,
|
|
240
|
+
name=t.Name,
|
|
241
|
+
value=daxQuery,
|
|
242
|
+
)
|
|
243
|
+
except Exception as e:
|
|
244
|
+
if (
|
|
245
|
+
datetime.datetime.now()
|
|
246
|
+
- start_time2
|
|
247
|
+
> timeout2
|
|
248
|
+
):
|
|
249
|
+
break
|
|
250
|
+
time.sleep(1)
|
|
251
|
+
|
|
252
|
+
print(
|
|
253
|
+
f"{icons.green_dot} Calculated table '{t.Name}' has been created as delta table '{delta_table_name.lower()}' in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
|
|
254
|
+
)
|
|
255
|
+
except:
|
|
256
|
+
print(
|
|
257
|
+
f"{icons.red_dot} Failed to create calculated table '{t.Name}' as a delta table in the lakehouse."
|
|
258
|
+
)
|
|
259
|
+
except Exception as e:
|
|
260
|
+
if datetime.datetime.now() - start_time > timeout:
|
|
261
|
+
break
|
|
262
|
+
time.sleep(1)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
@log
|
|
266
|
+
def migrate_field_parameters(
|
|
267
|
+
dataset: str,
|
|
268
|
+
new_dataset: str,
|
|
269
|
+
workspace: Optional[str] = None,
|
|
270
|
+
new_dataset_workspace: Optional[str] = None,
|
|
271
|
+
):
|
|
272
|
+
"""
|
|
273
|
+
Migrates field parameters from one semantic model to another.
|
|
274
|
+
|
|
275
|
+
Parameters
|
|
276
|
+
----------
|
|
277
|
+
dataset : str
|
|
278
|
+
Name of the import/DirectQuery semantic model.
|
|
279
|
+
new_dataset : str
|
|
280
|
+
Name of the Direct Lake semantic model.
|
|
281
|
+
workspace : str, default=None
|
|
282
|
+
The Fabric workspace name in which the import/DirectQuery semantic model exists.
|
|
283
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
284
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
285
|
+
new_dataset_workspace : str
|
|
286
|
+
The Fabric workspace name in which the Direct Lake semantic model will be created.
|
|
287
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
288
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
289
|
+
"""
|
|
290
|
+
|
|
291
|
+
from .HelperFunctions import format_dax_object_name
|
|
292
|
+
|
|
293
|
+
sempy.fabric._client._utils._init_analysis_services()
|
|
294
|
+
import Microsoft.AnalysisServices.Tabular as TOM
|
|
295
|
+
|
|
296
|
+
if workspace == None:
|
|
297
|
+
workspace_id = fabric.get_workspace_id()
|
|
298
|
+
workspace = fabric.resolve_workspace_name(workspace_id)
|
|
299
|
+
|
|
300
|
+
if new_dataset_workspace == None:
|
|
301
|
+
new_dataset_workspace = workspace
|
|
302
|
+
|
|
303
|
+
dfC = fabric.list_columns(dataset=dataset, workspace=workspace)
|
|
304
|
+
dfC["Column Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"])
|
|
305
|
+
dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
|
|
306
|
+
dfP_filt = dfP[(dfP["Source Type"] == "Calculated")]
|
|
307
|
+
dfP_filt = dfP_filt[
|
|
308
|
+
dfP_filt["Query"].str.contains("NAMEOF")
|
|
309
|
+
] # Only field parameters
|
|
310
|
+
dfC_CalcColumn = dfC[dfC["Type"] == "Calculated"]
|
|
311
|
+
|
|
312
|
+
if len(dfP_filt) == 0:
|
|
313
|
+
print(
|
|
314
|
+
f"{icons.green_dot} The '{dataset}' semantic model in the '{workspace}' workspace has no field parameters."
|
|
315
|
+
)
|
|
316
|
+
return
|
|
317
|
+
|
|
318
|
+
start_time = datetime.datetime.now()
|
|
319
|
+
timeout = datetime.timedelta(minutes=1)
|
|
320
|
+
success = False
|
|
321
|
+
|
|
322
|
+
while not success:
|
|
323
|
+
try:
|
|
324
|
+
with connect_semantic_model(
|
|
325
|
+
dataset=new_dataset, workspace=new_dataset_workspace, readonly=False
|
|
326
|
+
) as tom:
|
|
327
|
+
success = True
|
|
328
|
+
|
|
329
|
+
for i, r in dfP_filt.iterrows():
|
|
330
|
+
tName = r["Table Name"]
|
|
331
|
+
query = r["Query"]
|
|
332
|
+
|
|
333
|
+
# For field parameters, remove calc columns from the query
|
|
334
|
+
rows = query.strip().split("\n")
|
|
335
|
+
filtered_rows = [
|
|
336
|
+
row
|
|
337
|
+
for row in rows
|
|
338
|
+
if not any(
|
|
339
|
+
value in row
|
|
340
|
+
for value in dfC_CalcColumn["Column Object"].values
|
|
341
|
+
)
|
|
342
|
+
]
|
|
343
|
+
updated_query_string = "\n".join(filtered_rows)
|
|
344
|
+
|
|
345
|
+
# Remove extra comma
|
|
346
|
+
lines = updated_query_string.strip().split("\n")
|
|
347
|
+
lines[-2] = lines[-2].rstrip(",")
|
|
348
|
+
expr = "\n".join(lines)
|
|
349
|
+
|
|
350
|
+
try:
|
|
351
|
+
par = TOM.Partition()
|
|
352
|
+
par.Name = tName
|
|
353
|
+
|
|
354
|
+
parSource = TOM.CalculatedPartitionSource()
|
|
355
|
+
par.Source = parSource
|
|
356
|
+
parSource.Expression = expr
|
|
357
|
+
|
|
358
|
+
tbl = TOM.Table()
|
|
359
|
+
tbl.Name = tName
|
|
360
|
+
tbl.Partitions.Add(par)
|
|
361
|
+
|
|
362
|
+
columns = ["Value1", "Value2", "Value3"]
|
|
363
|
+
|
|
364
|
+
for colName in columns:
|
|
365
|
+
col = TOM.CalculatedTableColumn()
|
|
366
|
+
col.Name = colName
|
|
367
|
+
col.SourceColumn = "[" + colName + "]"
|
|
368
|
+
col.DataType = TOM.DataType.String
|
|
369
|
+
|
|
370
|
+
tbl.Columns.Add(col)
|
|
371
|
+
|
|
372
|
+
tom.model.Tables.Add(tbl)
|
|
373
|
+
|
|
374
|
+
ep = TOM.JsonExtendedProperty()
|
|
375
|
+
ep.Name = "ParameterMetadata"
|
|
376
|
+
ep.Value = '{"version":3,"kind":2}'
|
|
377
|
+
|
|
378
|
+
rcd = TOM.RelatedColumnDetails()
|
|
379
|
+
gpc = TOM.GroupByColumn()
|
|
380
|
+
gpc.GroupingColumn = tom.model.Tables[tName].Columns["Value2"]
|
|
381
|
+
rcd.GroupByColumns.Add(gpc)
|
|
382
|
+
|
|
383
|
+
# Update column properties
|
|
384
|
+
tom.model.Tables[tName].Columns["Value2"].IsHidden = True
|
|
385
|
+
tom.model.Tables[tName].Columns["Value3"].IsHidden = True
|
|
386
|
+
tom.model.Tables[tName].Columns[
|
|
387
|
+
"Value3"
|
|
388
|
+
].DataType = TOM.DataType.Int64
|
|
389
|
+
tom.model.Tables[tName].Columns["Value1"].SortByColumn = (
|
|
390
|
+
tom.model.Tables[tName].Columns["Value3"]
|
|
391
|
+
)
|
|
392
|
+
tom.model.Tables[tName].Columns["Value2"].SortByColumn = (
|
|
393
|
+
tom.model.Tables[tName].Columns["Value3"]
|
|
394
|
+
)
|
|
395
|
+
tom.model.Tables[tName].Columns[
|
|
396
|
+
"Value2"
|
|
397
|
+
].ExtendedProperties.Add(ep)
|
|
398
|
+
tom.model.Tables[tName].Columns[
|
|
399
|
+
"Value1"
|
|
400
|
+
].RelatedColumnDetails = rcd
|
|
401
|
+
|
|
402
|
+
dfC_filt1 = dfC[
|
|
403
|
+
(dfC["Table Name"] == tName) & (dfC["Source"] == "[Value1]")
|
|
404
|
+
]
|
|
405
|
+
col1 = dfC_filt1["Column Name"].iloc[0]
|
|
406
|
+
dfC_filt2 = dfC[
|
|
407
|
+
(dfC["Table Name"] == tName) & (dfC["Source"] == "[Value2]")
|
|
408
|
+
]
|
|
409
|
+
col2 = dfC_filt2["Column Name"].iloc[0]
|
|
410
|
+
dfC_filt3 = dfC[
|
|
411
|
+
(dfC["Table Name"] == tName) & (dfC["Source"] == "[Value3]")
|
|
412
|
+
]
|
|
413
|
+
col3 = dfC_filt3["Column Name"].iloc[0]
|
|
414
|
+
|
|
415
|
+
tom.model.Tables[tName].Columns["Value1"].Name = col1
|
|
416
|
+
tom.model.Tables[tName].Columns["Value2"].Name = col2
|
|
417
|
+
tom.model.Tables[tName].Columns["Value3"].Name = col3
|
|
418
|
+
|
|
419
|
+
print(
|
|
420
|
+
f"{icons.green_dot} The '{tName}' table has been added as a field parameter to the '{new_dataset}' semantic model in the '{new_dataset_workspace}' workspace."
|
|
421
|
+
)
|
|
422
|
+
except:
|
|
423
|
+
print(
|
|
424
|
+
f"{icons.red_dot} The '{tName}' table has not been added as a field parameter."
|
|
425
|
+
)
|
|
426
|
+
except Exception as e:
|
|
427
|
+
if datetime.datetime.now() - start_time > timeout:
|
|
428
|
+
break
|
|
429
|
+
time.sleep(1)
|