semantic-link-labs 0.6.0__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- semantic_link_labs-0.7.1.dist-info/METADATA +148 -0
- semantic_link_labs-0.7.1.dist-info/RECORD +111 -0
- {semantic_link_labs-0.6.0.dist-info → semantic_link_labs-0.7.1.dist-info}/WHEEL +1 -1
- sempy_labs/__init__.py +26 -2
- sempy_labs/_ai.py +3 -65
- sempy_labs/_bpa_translation/_translations_am-ET.po +828 -0
- sempy_labs/_bpa_translation/_translations_ar-AE.po +860 -0
- sempy_labs/_bpa_translation/_translations_cs-CZ.po +894 -0
- sempy_labs/_bpa_translation/_translations_da-DK.po +894 -0
- sempy_labs/_bpa_translation/_translations_de-DE.po +933 -0
- sempy_labs/_bpa_translation/_translations_el-GR.po +936 -0
- sempy_labs/_bpa_translation/_translations_es-ES.po +915 -0
- sempy_labs/_bpa_translation/_translations_fa-IR.po +883 -0
- sempy_labs/_bpa_translation/_translations_fr-FR.po +938 -0
- sempy_labs/_bpa_translation/_translations_ga-IE.po +912 -0
- sempy_labs/_bpa_translation/_translations_he-IL.po +855 -0
- sempy_labs/_bpa_translation/_translations_hi-IN.po +892 -0
- sempy_labs/_bpa_translation/_translations_hu-HU.po +910 -0
- sempy_labs/_bpa_translation/_translations_is-IS.po +887 -0
- sempy_labs/_bpa_translation/_translations_it-IT.po +931 -0
- sempy_labs/_bpa_translation/_translations_ja-JP.po +805 -0
- sempy_labs/_bpa_translation/_translations_nl-NL.po +924 -0
- sempy_labs/_bpa_translation/_translations_pl-PL.po +913 -0
- sempy_labs/_bpa_translation/_translations_pt-BR.po +909 -0
- sempy_labs/_bpa_translation/_translations_pt-PT.po +904 -0
- sempy_labs/_bpa_translation/_translations_ru-RU.po +909 -0
- sempy_labs/_bpa_translation/_translations_ta-IN.po +922 -0
- sempy_labs/_bpa_translation/_translations_te-IN.po +896 -0
- sempy_labs/_bpa_translation/_translations_th-TH.po +873 -0
- sempy_labs/_bpa_translation/_translations_zh-CN.po +767 -0
- sempy_labs/_bpa_translation/_translations_zu-ZA.po +916 -0
- sempy_labs/_clear_cache.py +9 -4
- sempy_labs/_generate_semantic_model.py +30 -56
- sempy_labs/_helper_functions.py +361 -14
- sempy_labs/_icons.py +10 -1
- sempy_labs/_list_functions.py +539 -260
- sempy_labs/_model_bpa.py +194 -18
- sempy_labs/_model_bpa_bulk.py +367 -0
- sempy_labs/_model_bpa_rules.py +19 -8
- sempy_labs/_model_dependencies.py +12 -10
- sempy_labs/_one_lake_integration.py +7 -7
- sempy_labs/_query_scale_out.py +61 -96
- sempy_labs/_refresh_semantic_model.py +7 -0
- sempy_labs/_translations.py +154 -1
- sempy_labs/_vertipaq.py +103 -90
- sempy_labs/directlake/__init__.py +5 -1
- sempy_labs/directlake/_directlake_schema_compare.py +27 -31
- sempy_labs/directlake/_directlake_schema_sync.py +55 -66
- sempy_labs/directlake/_dl_helper.py +233 -0
- sempy_labs/directlake/_get_directlake_lakehouse.py +6 -7
- sempy_labs/directlake/_get_shared_expression.py +1 -1
- sempy_labs/directlake/_guardrails.py +17 -13
- sempy_labs/directlake/_update_directlake_partition_entity.py +54 -30
- sempy_labs/directlake/_warm_cache.py +1 -1
- sempy_labs/lakehouse/__init__.py +2 -0
- sempy_labs/lakehouse/_get_lakehouse_tables.py +61 -69
- sempy_labs/lakehouse/_lakehouse.py +66 -9
- sempy_labs/lakehouse/_shortcuts.py +1 -1
- sempy_labs/migration/_create_pqt_file.py +174 -182
- sempy_labs/migration/_migrate_calctables_to_lakehouse.py +236 -268
- sempy_labs/migration/_migrate_calctables_to_semantic_model.py +75 -73
- sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +442 -426
- sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +91 -97
- sempy_labs/migration/_refresh_calc_tables.py +92 -101
- sempy_labs/report/_BPAReportTemplate.json +232 -0
- sempy_labs/report/__init__.py +6 -2
- sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +9 -0
- sempy_labs/report/_bpareporttemplate/.platform +11 -0
- sempy_labs/report/_bpareporttemplate/StaticResources/SharedResources/BaseThemes/CY24SU06.json +710 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/page.json +11 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/1b08bce3bebabb0a27a8/visual.json +191 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/2f22ddb70c301693c165/visual.json +438 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/3b1182230aa6c600b43a/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/58577ba6380c69891500/visual.json +576 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/a2a8fa5028b3b776c96c/visual.json +207 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/adfd47ef30652707b987/visual.json +506 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/b6a80ee459e716e170b1/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/ce3130a721c020cc3d81/visual.json +513 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/page.json +8 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/visuals/66e60dfb526437cd78d1/visual.json +112 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/page.json +11 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/07deb8bce824e1be37d7/visual.json +513 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0b1c68838818b32ad03b/visual.json +352 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0c171de9d2683d10b930/visual.json +37 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0efa01be0510e40a645e/visual.json +542 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/6bf2f0eb830ab53cc668/visual.json +221 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/88d8141cb8500b60030c/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/a753273590beed656a03/visual.json +576 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/b8fdc82cddd61ac447bc/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/page.json +9 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/visuals/ce8532a7e25020271077/visual.json +38 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/pages.json +10 -0
- sempy_labs/report/_bpareporttemplate/definition/report.json +176 -0
- sempy_labs/report/_bpareporttemplate/definition/version.json +4 -0
- sempy_labs/report/_bpareporttemplate/definition.pbir +14 -0
- sempy_labs/report/_generate_report.py +255 -139
- sempy_labs/report/_report_functions.py +26 -33
- sempy_labs/report/_report_rebind.py +31 -26
- sempy_labs/tom/_model.py +75 -58
- semantic_link_labs-0.6.0.dist-info/METADATA +0 -22
- semantic_link_labs-0.6.0.dist-info/RECORD +0 -54
- sempy_labs/directlake/_fallback.py +0 -60
- {semantic_link_labs-0.6.0.dist-info → semantic_link_labs-0.7.1.dist-info}/LICENSE +0 -0
- {semantic_link_labs-0.6.0.dist-info → semantic_link_labs-0.7.1.dist-info}/top_level.txt +0 -0
sempy_labs/_vertipaq.py
CHANGED
|
@@ -9,12 +9,14 @@ import warnings
|
|
|
9
9
|
from pyspark.sql import SparkSession
|
|
10
10
|
from sempy_labs._helper_functions import (
|
|
11
11
|
format_dax_object_name,
|
|
12
|
-
get_direct_lake_sql_endpoint,
|
|
13
12
|
resolve_lakehouse_name,
|
|
13
|
+
resolve_dataset_id,
|
|
14
|
+
save_as_delta_table,
|
|
15
|
+
resolve_workspace_capacity,
|
|
14
16
|
)
|
|
15
17
|
from sempy_labs._list_functions import list_relationships
|
|
16
|
-
from sempy_labs.lakehouse
|
|
17
|
-
from sempy_labs.
|
|
18
|
+
from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
|
|
19
|
+
from sempy_labs.directlake import get_direct_lake_source
|
|
18
20
|
from typing import Optional
|
|
19
21
|
from sempy._utils._log import log
|
|
20
22
|
import sempy_labs._icons as icons
|
|
@@ -25,8 +27,8 @@ def vertipaq_analyzer(
|
|
|
25
27
|
dataset: str,
|
|
26
28
|
workspace: Optional[str] = None,
|
|
27
29
|
export: Optional[str] = None,
|
|
28
|
-
lakehouse_workspace: Optional[str] = None,
|
|
29
30
|
read_stats_from_data: Optional[bool] = False,
|
|
31
|
+
**kwargs,
|
|
30
32
|
):
|
|
31
33
|
"""
|
|
32
34
|
Displays an HTML visualization of the Vertipaq Analyzer statistics from a semantic model.
|
|
@@ -43,10 +45,6 @@ def vertipaq_analyzer(
|
|
|
43
45
|
Specifying 'zip' will export the results to a zip file in your lakehouse (which can be imported using the import_vertipaq_analyzer function.
|
|
44
46
|
Specifying 'table' will export the results to delta tables (appended) in your lakehouse.
|
|
45
47
|
Default value: None.
|
|
46
|
-
lakehouse_workspace : str, default=None
|
|
47
|
-
The Fabric workspace used by the lakehouse (for Direct Lake semantic models).
|
|
48
|
-
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
49
|
-
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
50
48
|
read_stats_from_data : bool, default=False
|
|
51
49
|
Setting this parameter to true has the function get Column Cardinality and Missing Rows using DAX (Direct Lake semantic models achieve this using a Spark query to the lakehouse).
|
|
52
50
|
|
|
@@ -57,6 +55,12 @@ def vertipaq_analyzer(
|
|
|
57
55
|
|
|
58
56
|
from sempy_labs.tom import connect_semantic_model
|
|
59
57
|
|
|
58
|
+
if "lakehouse_workspace" in kwargs:
|
|
59
|
+
print(
|
|
60
|
+
f"{icons.info} The 'lakehouse_workspace' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
|
|
61
|
+
)
|
|
62
|
+
del kwargs["lakehouse_workspace"]
|
|
63
|
+
|
|
60
64
|
pd.options.mode.copy_on_write = True
|
|
61
65
|
warnings.filterwarnings(
|
|
62
66
|
"ignore", message="createDataFrame attempted Arrow optimization*"
|
|
@@ -64,9 +68,6 @@ def vertipaq_analyzer(
|
|
|
64
68
|
|
|
65
69
|
workspace = fabric.resolve_workspace_name(workspace)
|
|
66
70
|
|
|
67
|
-
if lakehouse_workspace is None:
|
|
68
|
-
lakehouse_workspace = workspace
|
|
69
|
-
|
|
70
71
|
dfT = fabric.list_tables(dataset=dataset, extended=True, workspace=workspace)
|
|
71
72
|
dfT.rename(columns={"Name": "Table Name"}, inplace=True)
|
|
72
73
|
dfC = fabric.list_columns(dataset=dataset, extended=True, workspace=workspace)
|
|
@@ -77,6 +78,9 @@ def vertipaq_analyzer(
|
|
|
77
78
|
dfR["From Object"] = format_dax_object_name(dfR["From Table"], dfR["From Column"])
|
|
78
79
|
dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"])
|
|
79
80
|
dfP = fabric.list_partitions(dataset=dataset, extended=True, workspace=workspace)
|
|
81
|
+
artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
|
|
82
|
+
get_direct_lake_source(dataset=dataset, workspace=workspace)
|
|
83
|
+
)
|
|
80
84
|
|
|
81
85
|
with connect_semantic_model(
|
|
82
86
|
dataset=dataset, readonly=True, workspace=workspace
|
|
@@ -91,7 +95,7 @@ def vertipaq_analyzer(
|
|
|
91
95
|
|
|
92
96
|
# Direct Lake
|
|
93
97
|
if read_stats_from_data:
|
|
94
|
-
if is_direct_lake:
|
|
98
|
+
if is_direct_lake and artifact_type == "Lakehouse":
|
|
95
99
|
dfC = pd.merge(
|
|
96
100
|
dfC,
|
|
97
101
|
dfP[["Table Name", "Query", "Source Type"]],
|
|
@@ -102,69 +106,54 @@ def vertipaq_analyzer(
|
|
|
102
106
|
(dfC["Source Type"] == "Entity")
|
|
103
107
|
& (~dfC["Column Name"].str.startswith("RowNumber-"))
|
|
104
108
|
]
|
|
105
|
-
sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace)
|
|
106
109
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
raise ValueError(
|
|
113
|
-
f"{icons.red_dot} The lakehouse (SQL Endpoint) used by the '{dataset}' semantic model does not reside in the '{lakehouse_workspace}' workspace."
|
|
114
|
-
"Please update the lakehouse_workspace parameter."
|
|
110
|
+
object_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
|
|
111
|
+
current_workspace_id = fabric.get_workspace_id()
|
|
112
|
+
if current_workspace_id != lakehouse_workspace_id:
|
|
113
|
+
lakeTables = get_lakehouse_tables(
|
|
114
|
+
lakehouse=lakehouse_name, workspace=object_workspace
|
|
115
115
|
)
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
df = spark.sql(query)
|
|
159
|
-
|
|
160
|
-
for column in df.columns:
|
|
161
|
-
x = df.collect()[0][column]
|
|
162
|
-
for i, r in dfC.iterrows():
|
|
163
|
-
if r["Query"] == tName and r["Source"] == column:
|
|
164
|
-
dfC.at[i, "Cardinality"] = x
|
|
165
|
-
|
|
166
|
-
# Remove column added temporarily
|
|
167
|
-
dfC.drop(columns=["Query", "Source Type"], inplace=True)
|
|
116
|
+
|
|
117
|
+
sql_statements = []
|
|
118
|
+
spark = SparkSession.builder.getOrCreate()
|
|
119
|
+
# Loop through tables
|
|
120
|
+
for lakeTName in dfC_flt["Query"].unique():
|
|
121
|
+
query = "SELECT "
|
|
122
|
+
columns_in_table = dfC_flt.loc[
|
|
123
|
+
dfC_flt["Query"] == lakeTName, "Source"
|
|
124
|
+
].unique()
|
|
125
|
+
|
|
126
|
+
# Loop through columns within those tables
|
|
127
|
+
for scName in columns_in_table:
|
|
128
|
+
query = query + f"COUNT(DISTINCT(`{scName}`)) AS `{scName}`, "
|
|
129
|
+
|
|
130
|
+
query = query[:-2]
|
|
131
|
+
if lakehouse_workspace_id == current_workspace_id:
|
|
132
|
+
query = query + f" FROM {lakehouse_name}.{lakeTName}"
|
|
133
|
+
else:
|
|
134
|
+
lakeTables_filt = lakeTables[lakeTables["Table Name"] == lakeTName]
|
|
135
|
+
tPath = lakeTables_filt["Location"].iloc[0]
|
|
136
|
+
|
|
137
|
+
df = spark.read.format("delta").load(tPath)
|
|
138
|
+
tempTableName = "delta_table_" + lakeTName
|
|
139
|
+
df.createOrReplaceTempView(tempTableName)
|
|
140
|
+
query = query + f" FROM {tempTableName}"
|
|
141
|
+
sql_statements.append((lakeTName, query))
|
|
142
|
+
|
|
143
|
+
for o in sql_statements:
|
|
144
|
+
tName = o[0]
|
|
145
|
+
query = o[1]
|
|
146
|
+
|
|
147
|
+
df = spark.sql(query)
|
|
148
|
+
|
|
149
|
+
for column in df.columns:
|
|
150
|
+
x = df.collect()[0][column]
|
|
151
|
+
for i, r in dfC.iterrows():
|
|
152
|
+
if r["Query"] == tName and r["Source"] == column:
|
|
153
|
+
dfC.at[i, "Cardinality"] = x
|
|
154
|
+
|
|
155
|
+
# Remove column added temporarily
|
|
156
|
+
dfC.drop(columns=["Query", "Source Type"], inplace=True)
|
|
168
157
|
|
|
169
158
|
# Direct Lake missing rows
|
|
170
159
|
dfR = pd.merge(
|
|
@@ -211,11 +200,11 @@ def vertipaq_analyzer(
|
|
|
211
200
|
toTable = r["To Lake Table"]
|
|
212
201
|
toColumn = r["To Lake Column"]
|
|
213
202
|
|
|
214
|
-
if
|
|
203
|
+
if lakehouse_workspace_id == current_workspace_id:
|
|
215
204
|
query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {fromTable} as f\nleft join {toTable} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null"
|
|
216
205
|
else:
|
|
217
|
-
tempTableFrom = "delta_table_"
|
|
218
|
-
tempTableTo = "delta_table_"
|
|
206
|
+
tempTableFrom = f"delta_table_{fromTable}"
|
|
207
|
+
tempTableTo = f"delta_table_{toTable}"
|
|
219
208
|
|
|
220
209
|
query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {tempTableFrom} as f\nleft join {tempTableTo} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null"
|
|
221
210
|
|
|
@@ -226,7 +215,7 @@ def vertipaq_analyzer(
|
|
|
226
215
|
dfR.at[i, "Missing Rows"] = missingRows
|
|
227
216
|
|
|
228
217
|
dfR["Missing Rows"] = dfR["Missing Rows"].astype(int)
|
|
229
|
-
|
|
218
|
+
elif not is_direct_lake:
|
|
230
219
|
# Calculate missing rows using DAX for non-direct lake
|
|
231
220
|
for i, r in dfR.iterrows():
|
|
232
221
|
fromTable = r["From Table"]
|
|
@@ -395,6 +384,8 @@ def vertipaq_analyzer(
|
|
|
395
384
|
by="Used Size", ascending=False
|
|
396
385
|
)
|
|
397
386
|
dfH_filt.reset_index(drop=True, inplace=True)
|
|
387
|
+
dfH_filt.fillna({"Used Size": 0}, inplace=True)
|
|
388
|
+
dfH_filt["Used Size"] = dfH_filt["Used Size"].astype(int)
|
|
398
389
|
export_Hier = dfH_filt.copy()
|
|
399
390
|
intList = ["Used Size"]
|
|
400
391
|
dfH_filt[intList] = dfH_filt[intList].applymap("{:,}".format)
|
|
@@ -420,6 +411,7 @@ def vertipaq_analyzer(
|
|
|
420
411
|
index=[0],
|
|
421
412
|
)
|
|
422
413
|
dfModel.reset_index(drop=True, inplace=True)
|
|
414
|
+
dfModel["Default Mode"] = dfModel["Default Mode"].astype(str)
|
|
423
415
|
export_Model = dfModel.copy()
|
|
424
416
|
intList = ["Total Size", "Table Count", "Column Count"]
|
|
425
417
|
dfModel[intList] = dfModel[intList].applymap("{:,}".format)
|
|
@@ -442,8 +434,7 @@ def vertipaq_analyzer(
|
|
|
442
434
|
|
|
443
435
|
# Export vertipaq to delta tables in lakehouse
|
|
444
436
|
if export in ["table", "zip"]:
|
|
445
|
-
|
|
446
|
-
if lakeAttach is False:
|
|
437
|
+
if not lakehouse_attached():
|
|
447
438
|
raise ValueError(
|
|
448
439
|
f"{icons.red_dot} In order to save the Vertipaq Analyzer results, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
|
|
449
440
|
)
|
|
@@ -452,12 +443,13 @@ def vertipaq_analyzer(
|
|
|
452
443
|
spark = SparkSession.builder.getOrCreate()
|
|
453
444
|
|
|
454
445
|
lakehouse_id = fabric.get_lakehouse_id()
|
|
446
|
+
lake_workspace = fabric.resolve_workspace_name()
|
|
455
447
|
lakehouse = resolve_lakehouse_name(
|
|
456
|
-
lakehouse_id=lakehouse_id, workspace=
|
|
448
|
+
lakehouse_id=lakehouse_id, workspace=lake_workspace
|
|
457
449
|
)
|
|
458
450
|
lakeTName = "vertipaq_analyzer_model"
|
|
459
451
|
|
|
460
|
-
lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=
|
|
452
|
+
lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=lake_workspace)
|
|
461
453
|
lakeT_filt = lakeT[lakeT["Table Name"] == lakeTName]
|
|
462
454
|
|
|
463
455
|
query = f"SELECT MAX(RunId) FROM {lakehouse}.{lakeTName}"
|
|
@@ -482,24 +474,45 @@ def vertipaq_analyzer(
|
|
|
482
474
|
f"{icons.in_progress} Saving Vertipaq Analyzer to delta tables in the lakehouse...\n"
|
|
483
475
|
)
|
|
484
476
|
now = datetime.datetime.now()
|
|
477
|
+
dfD = fabric.list_datasets(workspace=workspace, mode="rest")
|
|
478
|
+
dfD_filt = dfD[dfD["Dataset Name"] == dataset]
|
|
479
|
+
configured_by = dfD_filt["Configured By"].iloc[0]
|
|
480
|
+
capacity_id, capacity_name = resolve_workspace_capacity(workspace=workspace)
|
|
481
|
+
|
|
485
482
|
for key, (obj, df) in dfMap.items():
|
|
486
|
-
df["
|
|
483
|
+
df["Capacity Name"] = capacity_name
|
|
484
|
+
df["Capacity Id"] = capacity_id
|
|
485
|
+
df["Configured By"] = configured_by
|
|
487
486
|
df["Workspace Name"] = workspace
|
|
487
|
+
df["Workspace Id"] = fabric.resolve_workspace_id(workspace)
|
|
488
488
|
df["Dataset Name"] = dataset
|
|
489
|
+
df["Dataset Id"] = resolve_dataset_id(dataset, workspace)
|
|
489
490
|
df["RunId"] = runId
|
|
491
|
+
df["Timestamp"] = now
|
|
490
492
|
|
|
491
|
-
colName = "
|
|
493
|
+
colName = "Capacity Name"
|
|
492
494
|
df.insert(0, colName, df.pop(colName))
|
|
493
|
-
colName = "
|
|
495
|
+
colName = "Capacity Id"
|
|
494
496
|
df.insert(1, colName, df.pop(colName))
|
|
497
|
+
colName = "Workspace Name"
|
|
498
|
+
df.insert(2, colName, df.pop(colName))
|
|
499
|
+
colName = "Workspace Id"
|
|
500
|
+
df.insert(3, colName, df.pop(colName))
|
|
501
|
+
colName = "Dataset Name"
|
|
502
|
+
df.insert(4, colName, df.pop(colName))
|
|
503
|
+
colName = "Dataset Id"
|
|
504
|
+
df.insert(5, colName, df.pop(colName))
|
|
505
|
+
colName = "Configured By"
|
|
506
|
+
df.insert(6, colName, df.pop(colName))
|
|
495
507
|
|
|
496
508
|
df.columns = df.columns.str.replace(" ", "_")
|
|
497
509
|
|
|
498
510
|
delta_table_name = f"VertipaqAnalyzer_{obj}".lower()
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
511
|
+
save_as_delta_table(
|
|
512
|
+
dataframe=df,
|
|
513
|
+
delta_table_name=delta_table_name,
|
|
514
|
+
write_mode="append",
|
|
515
|
+
merge_schema=True,
|
|
503
516
|
)
|
|
504
517
|
|
|
505
518
|
# Export vertipaq to zip file within the lakehouse
|
|
@@ -525,13 +538,13 @@ def vertipaq_analyzer(
|
|
|
525
538
|
|
|
526
539
|
# Create CSV files based on dataframes
|
|
527
540
|
for fileName, df in dataFrames.items():
|
|
528
|
-
filePath = os.path.join(subFolderPath, fileName
|
|
541
|
+
filePath = os.path.join(subFolderPath, f"{fileName}{ext}")
|
|
529
542
|
df.to_csv(filePath, index=False)
|
|
530
543
|
|
|
531
544
|
# Create a zip file and add CSV files to it
|
|
532
545
|
with zipfile.ZipFile(zipFilePath, "w") as zipf:
|
|
533
546
|
for fileName in dataFrames:
|
|
534
|
-
filePath = os.path.join(subFolderPath, fileName
|
|
547
|
+
filePath = os.path.join(subFolderPath, f"{fileName}{ext}")
|
|
535
548
|
zipf.write(filePath, os.path.basename(filePath))
|
|
536
549
|
|
|
537
550
|
# Clean up: remove the individual CSV files
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from sempy_labs.directlake._directlake_schema_compare import direct_lake_schema_compare
|
|
2
2
|
from sempy_labs.directlake._directlake_schema_sync import direct_lake_schema_sync
|
|
3
|
-
from sempy_labs.directlake.
|
|
3
|
+
from sempy_labs.directlake._dl_helper import (
|
|
4
4
|
check_fallback_reason,
|
|
5
|
+
generate_direct_lake_semantic_model,
|
|
6
|
+
get_direct_lake_source,
|
|
5
7
|
)
|
|
6
8
|
from sempy_labs.directlake._get_directlake_lakehouse import get_direct_lake_lakehouse
|
|
7
9
|
from sempy_labs.directlake._get_shared_expression import get_shared_expression
|
|
@@ -44,4 +46,6 @@ __all__ = [
|
|
|
44
46
|
"warm_direct_lake_cache_isresident",
|
|
45
47
|
"warm_direct_lake_cache_perspective",
|
|
46
48
|
"add_table_to_direct_lake_semantic_model",
|
|
49
|
+
"generate_direct_lake_semantic_model",
|
|
50
|
+
"get_direct_lake_source",
|
|
47
51
|
]
|
|
@@ -2,11 +2,10 @@ import sempy.fabric as fabric
|
|
|
2
2
|
import pandas as pd
|
|
3
3
|
from sempy_labs._helper_functions import (
|
|
4
4
|
format_dax_object_name,
|
|
5
|
-
resolve_lakehouse_name,
|
|
6
|
-
get_direct_lake_sql_endpoint,
|
|
7
5
|
)
|
|
8
6
|
from IPython.display import display
|
|
9
|
-
from sempy_labs.lakehouse
|
|
7
|
+
from sempy_labs.lakehouse import get_lakehouse_columns
|
|
8
|
+
from sempy_labs.directlake._dl_helper import get_direct_lake_source
|
|
10
9
|
from sempy_labs._list_functions import list_tables
|
|
11
10
|
from typing import Optional
|
|
12
11
|
import sempy_labs._icons as icons
|
|
@@ -17,8 +16,7 @@ from sempy._utils._log import log
|
|
|
17
16
|
def direct_lake_schema_compare(
|
|
18
17
|
dataset: str,
|
|
19
18
|
workspace: Optional[str] = None,
|
|
20
|
-
|
|
21
|
-
lakehouse_workspace: Optional[str] = None,
|
|
19
|
+
**kwargs,
|
|
22
20
|
):
|
|
23
21
|
"""
|
|
24
22
|
Checks that all the tables in a Direct Lake semantic model map to tables in their corresponding lakehouse and that the columns in each table exist.
|
|
@@ -31,35 +29,33 @@ def direct_lake_schema_compare(
|
|
|
31
29
|
The Fabric workspace name.
|
|
32
30
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
33
31
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
34
|
-
lakehouse : str, default=None
|
|
35
|
-
The Fabric lakehouse used by the Direct Lake semantic model.
|
|
36
|
-
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
37
|
-
lakehouse_workspace : str, default=None
|
|
38
|
-
The Fabric workspace used by the lakehouse.
|
|
39
|
-
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
40
|
-
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
41
32
|
"""
|
|
42
33
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
34
|
+
if "lakehouse" in kwargs:
|
|
35
|
+
print(
|
|
36
|
+
"The 'lakehouse' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
|
|
37
|
+
)
|
|
38
|
+
del kwargs["lakehouse"]
|
|
39
|
+
if "lakehouse_workspace" in kwargs:
|
|
40
|
+
print(
|
|
41
|
+
"The 'lakehouse_workspace' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
|
|
42
|
+
)
|
|
43
|
+
del kwargs["lakehouse_workspace"]
|
|
47
44
|
|
|
48
|
-
|
|
49
|
-
lakehouse_id = fabric.get_lakehouse_id()
|
|
50
|
-
lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace)
|
|
45
|
+
workspace = fabric.resolve_workspace_name(workspace)
|
|
51
46
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
47
|
+
artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
|
|
48
|
+
get_direct_lake_source(dataset=dataset, workspace=workspace)
|
|
49
|
+
)
|
|
50
|
+
lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
|
|
56
51
|
|
|
57
|
-
if
|
|
52
|
+
if artifact_type == "Warehouse":
|
|
58
53
|
raise ValueError(
|
|
59
|
-
f"{icons.red_dot}
|
|
60
|
-
f"'{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace as specified."
|
|
54
|
+
f"{icons.red_dot} This function is only valid for Direct Lake semantic models which source from Fabric lakehouses (not warehouses)."
|
|
61
55
|
)
|
|
62
56
|
|
|
57
|
+
dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
|
|
58
|
+
|
|
63
59
|
if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()):
|
|
64
60
|
raise ValueError(
|
|
65
61
|
f"{icons.red_dot} The '{dataset}' semantic model is not in Direct Lake mode."
|
|
@@ -67,7 +63,7 @@ def direct_lake_schema_compare(
|
|
|
67
63
|
|
|
68
64
|
dfT = list_tables(dataset, workspace)
|
|
69
65
|
dfC = fabric.list_columns(dataset=dataset, workspace=workspace)
|
|
70
|
-
lc = get_lakehouse_columns(
|
|
66
|
+
lc = get_lakehouse_columns(lakehouse_name, lakehouse_workspace)
|
|
71
67
|
|
|
72
68
|
dfT.rename(columns={"Type": "Table Type"}, inplace=True)
|
|
73
69
|
dfP_filt = dfP[dfP["Mode"] == "DirectLake"]
|
|
@@ -93,21 +89,21 @@ def direct_lake_schema_compare(
|
|
|
93
89
|
|
|
94
90
|
if len(missingtbls) == 0:
|
|
95
91
|
print(
|
|
96
|
-
f"{icons.green_dot} All tables exist in the '{
|
|
92
|
+
f"{icons.green_dot} All tables exist in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
|
|
97
93
|
)
|
|
98
94
|
else:
|
|
99
95
|
print(
|
|
100
96
|
f"{icons.yellow_dot} The following tables exist in the '{dataset}' semantic model within the '{workspace}' workspace"
|
|
101
|
-
f" but do not exist in the '{
|
|
97
|
+
f" but do not exist in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
|
|
102
98
|
)
|
|
103
99
|
display(missingtbls)
|
|
104
100
|
if len(missingcols) == 0:
|
|
105
101
|
print(
|
|
106
|
-
f"{icons.green_dot} All columns exist in the '{
|
|
102
|
+
f"{icons.green_dot} All columns exist in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
|
|
107
103
|
)
|
|
108
104
|
else:
|
|
109
105
|
print(
|
|
110
106
|
f"{icons.yellow_dot} The following columns exist in the '{dataset}' semantic model within the '{workspace}' workspace "
|
|
111
|
-
f"but do not exist in the '{
|
|
107
|
+
f"but do not exist in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
|
|
112
108
|
)
|
|
113
109
|
display(missingcols)
|
|
@@ -1,13 +1,8 @@
|
|
|
1
1
|
import sempy
|
|
2
2
|
import sempy.fabric as fabric
|
|
3
|
-
|
|
4
|
-
from sempy_labs.
|
|
3
|
+
from sempy_labs.lakehouse import get_lakehouse_columns
|
|
4
|
+
from sempy_labs.directlake._dl_helper import get_direct_lake_source
|
|
5
5
|
from sempy_labs.tom import connect_semantic_model
|
|
6
|
-
from sempy_labs._helper_functions import (
|
|
7
|
-
format_dax_object_name,
|
|
8
|
-
resolve_lakehouse_name,
|
|
9
|
-
get_direct_lake_sql_endpoint,
|
|
10
|
-
)
|
|
11
6
|
from typing import Optional
|
|
12
7
|
from sempy._utils._log import log
|
|
13
8
|
import sempy_labs._icons as icons
|
|
@@ -18,8 +13,7 @@ def direct_lake_schema_sync(
|
|
|
18
13
|
dataset: str,
|
|
19
14
|
workspace: Optional[str] = None,
|
|
20
15
|
add_to_model: Optional[bool] = False,
|
|
21
|
-
|
|
22
|
-
lakehouse_workspace: Optional[str] = None,
|
|
16
|
+
**kwargs,
|
|
23
17
|
):
|
|
24
18
|
"""
|
|
25
19
|
Shows/adds columns which exist in the lakehouse but do not exist in the semantic model (only for tables in the semantic model).
|
|
@@ -34,84 +28,79 @@ def direct_lake_schema_sync(
|
|
|
34
28
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
35
29
|
add_to_model : bool, default=False
|
|
36
30
|
If set to True, columns which exist in the lakehouse but do not exist in the semantic model are added to the semantic model. No new tables are added.
|
|
37
|
-
lakehouse : str, default=None
|
|
38
|
-
The Fabric lakehouse used by the Direct Lake semantic model.
|
|
39
|
-
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
40
|
-
lakehouse_workspace : str, default=None
|
|
41
|
-
The Fabric workspace used by the lakehouse.
|
|
42
|
-
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
43
|
-
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
44
31
|
"""
|
|
45
32
|
|
|
46
33
|
sempy.fabric._client._utils._init_analysis_services()
|
|
47
34
|
import Microsoft.AnalysisServices.Tabular as TOM
|
|
48
35
|
import System
|
|
49
36
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
if
|
|
56
|
-
|
|
57
|
-
|
|
37
|
+
if "lakehouse" in kwargs:
|
|
38
|
+
print(
|
|
39
|
+
"The 'lakehouse' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
|
|
40
|
+
)
|
|
41
|
+
del kwargs["lakehouse"]
|
|
42
|
+
if "lakehouse_workspace" in kwargs:
|
|
43
|
+
print(
|
|
44
|
+
"The 'lakehouse_workspace' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
|
|
45
|
+
)
|
|
46
|
+
del kwargs["lakehouse_workspace"]
|
|
58
47
|
|
|
59
|
-
|
|
48
|
+
workspace = fabric.resolve_workspace_name(workspace)
|
|
60
49
|
|
|
61
|
-
|
|
62
|
-
|
|
50
|
+
artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
|
|
51
|
+
get_direct_lake_source(dataset=dataset, workspace=workspace)
|
|
52
|
+
)
|
|
63
53
|
|
|
64
|
-
if
|
|
54
|
+
if artifact_type == "Warehouse":
|
|
65
55
|
raise ValueError(
|
|
66
|
-
f"{icons.red_dot}
|
|
67
|
-
f"'{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace as specified."
|
|
56
|
+
f"{icons.red_dot} This function is only valid for Direct Lake semantic models which source from Fabric lakehouses (not warehouses)."
|
|
68
57
|
)
|
|
58
|
+
lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
|
|
69
59
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
dfC_filt = pd.merge(
|
|
75
|
-
dfC_filt, dfP_filt[["Table Name", "Query"]], on="Table Name", how="left"
|
|
76
|
-
)
|
|
77
|
-
dfC_filt["Column Object"] = format_dax_object_name(
|
|
78
|
-
dfC_filt["Query"], dfC_filt["Source"]
|
|
79
|
-
)
|
|
60
|
+
if artifact_type == "Warehouse":
|
|
61
|
+
raise ValueError(
|
|
62
|
+
f"{icons.red_dot} This function is only valid for Direct Lake semantic models which source from Fabric lakehouses (not warehouses)."
|
|
63
|
+
)
|
|
80
64
|
|
|
81
|
-
lc = get_lakehouse_columns(
|
|
82
|
-
lc_filt = lc[lc["Table Name"].isin(dfP_filt["Query"].values)]
|
|
65
|
+
lc = get_lakehouse_columns(lakehouse_name, lakehouse_workspace)
|
|
83
66
|
|
|
84
67
|
with connect_semantic_model(
|
|
85
68
|
dataset=dataset, readonly=False, workspace=workspace
|
|
86
69
|
) as tom:
|
|
87
70
|
|
|
88
|
-
for i, r in
|
|
71
|
+
for i, r in lc.iterrows():
|
|
89
72
|
lakeTName = r["Table Name"]
|
|
90
73
|
lakeCName = r["Column Name"]
|
|
91
|
-
fullColName = r["Full Column Name"]
|
|
92
74
|
dType = r["Data Type"]
|
|
93
75
|
|
|
94
|
-
if
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
if
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
tom.
|
|
110
|
-
|
|
111
|
-
f"{icons.green_dot} The '{lakeCName}' column has been added to the '{tName}' table as a '{dt}' "
|
|
112
|
-
f"data type within the '{dataset}' semantic model within the '{workspace}' workspace."
|
|
113
|
-
)
|
|
114
|
-
else:
|
|
76
|
+
if any(
|
|
77
|
+
p.Source.EntityName == lakeTName
|
|
78
|
+
for p in tom.all_partitions()
|
|
79
|
+
if p.SourceType == TOM.PartitionSourceType.Entity
|
|
80
|
+
):
|
|
81
|
+
table_name = next(
|
|
82
|
+
t.Name
|
|
83
|
+
for t in tom.model.Tables
|
|
84
|
+
for p in t.Partitions
|
|
85
|
+
if p.SourceType == TOM.PartitionSourceType.Entity
|
|
86
|
+
and p.Source.EntityName == lakeTName
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
if not any(
|
|
90
|
+
c.SourceColumn == lakeCName and c.Parent.Name == table_name
|
|
91
|
+
for c in tom.all_columns()
|
|
92
|
+
):
|
|
115
93
|
print(
|
|
116
|
-
f"{icons.yellow_dot} The {
|
|
94
|
+
f"{icons.yellow_dot} The '{lakeCName}' column exists in the '{lakeTName}' lakehouse table but not in the '{dataset}' semantic model within the '{workspace}' workspace."
|
|
117
95
|
)
|
|
96
|
+
if add_to_model:
|
|
97
|
+
dt = icons.data_type_mapping.get(dType)
|
|
98
|
+
tom.add_data_column(
|
|
99
|
+
table_name=table_name,
|
|
100
|
+
column_name=lakeCName,
|
|
101
|
+
source_column=lakeCName,
|
|
102
|
+
data_type=System.Enum.Parse(TOM.DataType, dt),
|
|
103
|
+
)
|
|
104
|
+
print(
|
|
105
|
+
f"{icons.green_dot} The '{lakeCName}' column in the '{lakeTName}' lakehouse table was added to the '{dataset}' semantic model within the '{workspace}' workspace."
|
|
106
|
+
)
|