semantic-link-labs 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- semantic_link_labs-0.7.0.dist-info/METADATA +148 -0
- semantic_link_labs-0.7.0.dist-info/RECORD +111 -0
- {semantic_link_labs-0.6.0.dist-info → semantic_link_labs-0.7.0.dist-info}/WHEEL +1 -1
- sempy_labs/__init__.py +26 -2
- sempy_labs/_ai.py +3 -65
- sempy_labs/_bpa_translation/_translations_am-ET.po +828 -0
- sempy_labs/_bpa_translation/_translations_ar-AE.po +860 -0
- sempy_labs/_bpa_translation/_translations_cs-CZ.po +894 -0
- sempy_labs/_bpa_translation/_translations_da-DK.po +894 -0
- sempy_labs/_bpa_translation/_translations_de-DE.po +933 -0
- sempy_labs/_bpa_translation/_translations_el-GR.po +936 -0
- sempy_labs/_bpa_translation/_translations_es-ES.po +915 -0
- sempy_labs/_bpa_translation/_translations_fa-IR.po +883 -0
- sempy_labs/_bpa_translation/_translations_fr-FR.po +938 -0
- sempy_labs/_bpa_translation/_translations_ga-IE.po +912 -0
- sempy_labs/_bpa_translation/_translations_he-IL.po +855 -0
- sempy_labs/_bpa_translation/_translations_hi-IN.po +892 -0
- sempy_labs/_bpa_translation/_translations_hu-HU.po +910 -0
- sempy_labs/_bpa_translation/_translations_is-IS.po +887 -0
- sempy_labs/_bpa_translation/_translations_it-IT.po +931 -0
- sempy_labs/_bpa_translation/_translations_ja-JP.po +805 -0
- sempy_labs/_bpa_translation/_translations_nl-NL.po +924 -0
- sempy_labs/_bpa_translation/_translations_pl-PL.po +913 -0
- sempy_labs/_bpa_translation/_translations_pt-BR.po +909 -0
- sempy_labs/_bpa_translation/_translations_pt-PT.po +904 -0
- sempy_labs/_bpa_translation/_translations_ru-RU.po +909 -0
- sempy_labs/_bpa_translation/_translations_ta-IN.po +922 -0
- sempy_labs/_bpa_translation/_translations_te-IN.po +896 -0
- sempy_labs/_bpa_translation/_translations_th-TH.po +873 -0
- sempy_labs/_bpa_translation/_translations_zh-CN.po +767 -0
- sempy_labs/_bpa_translation/_translations_zu-ZA.po +916 -0
- sempy_labs/_clear_cache.py +9 -4
- sempy_labs/_generate_semantic_model.py +30 -56
- sempy_labs/_helper_functions.py +358 -14
- sempy_labs/_icons.py +10 -1
- sempy_labs/_list_functions.py +478 -237
- sempy_labs/_model_bpa.py +194 -18
- sempy_labs/_model_bpa_bulk.py +363 -0
- sempy_labs/_model_bpa_rules.py +4 -4
- sempy_labs/_model_dependencies.py +12 -10
- sempy_labs/_one_lake_integration.py +7 -7
- sempy_labs/_query_scale_out.py +45 -66
- sempy_labs/_refresh_semantic_model.py +7 -0
- sempy_labs/_translations.py +154 -1
- sempy_labs/_vertipaq.py +103 -90
- sempy_labs/directlake/__init__.py +5 -1
- sempy_labs/directlake/_directlake_schema_compare.py +27 -31
- sempy_labs/directlake/_directlake_schema_sync.py +55 -66
- sempy_labs/directlake/_dl_helper.py +233 -0
- sempy_labs/directlake/_get_directlake_lakehouse.py +6 -7
- sempy_labs/directlake/_get_shared_expression.py +1 -1
- sempy_labs/directlake/_guardrails.py +17 -13
- sempy_labs/directlake/_update_directlake_partition_entity.py +54 -30
- sempy_labs/directlake/_warm_cache.py +1 -1
- sempy_labs/lakehouse/_get_lakehouse_tables.py +61 -69
- sempy_labs/lakehouse/_lakehouse.py +3 -2
- sempy_labs/lakehouse/_shortcuts.py +1 -1
- sempy_labs/migration/_create_pqt_file.py +174 -182
- sempy_labs/migration/_migrate_calctables_to_lakehouse.py +236 -268
- sempy_labs/migration/_migrate_calctables_to_semantic_model.py +75 -73
- sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +442 -426
- sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +91 -97
- sempy_labs/migration/_refresh_calc_tables.py +92 -101
- sempy_labs/report/_BPAReportTemplate.json +232 -0
- sempy_labs/report/__init__.py +6 -2
- sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +9 -0
- sempy_labs/report/_bpareporttemplate/.platform +11 -0
- sempy_labs/report/_bpareporttemplate/StaticResources/SharedResources/BaseThemes/CY24SU06.json +710 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/page.json +11 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/1b08bce3bebabb0a27a8/visual.json +191 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/2f22ddb70c301693c165/visual.json +438 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/3b1182230aa6c600b43a/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/58577ba6380c69891500/visual.json +576 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/a2a8fa5028b3b776c96c/visual.json +207 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/adfd47ef30652707b987/visual.json +506 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/b6a80ee459e716e170b1/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/ce3130a721c020cc3d81/visual.json +513 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/page.json +8 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/visuals/66e60dfb526437cd78d1/visual.json +112 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/page.json +11 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/07deb8bce824e1be37d7/visual.json +513 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0b1c68838818b32ad03b/visual.json +352 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0c171de9d2683d10b930/visual.json +37 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0efa01be0510e40a645e/visual.json +542 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/6bf2f0eb830ab53cc668/visual.json +221 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/88d8141cb8500b60030c/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/a753273590beed656a03/visual.json +576 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/b8fdc82cddd61ac447bc/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/page.json +9 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/visuals/ce8532a7e25020271077/visual.json +38 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/pages.json +10 -0
- sempy_labs/report/_bpareporttemplate/definition/report.json +176 -0
- sempy_labs/report/_bpareporttemplate/definition/version.json +4 -0
- sempy_labs/report/_bpareporttemplate/definition.pbir +14 -0
- sempy_labs/report/_generate_report.py +255 -139
- sempy_labs/report/_report_functions.py +26 -33
- sempy_labs/report/_report_rebind.py +31 -26
- sempy_labs/tom/_model.py +75 -58
- semantic_link_labs-0.6.0.dist-info/METADATA +0 -22
- semantic_link_labs-0.6.0.dist-info/RECORD +0 -54
- sempy_labs/directlake/_fallback.py +0 -60
- {semantic_link_labs-0.6.0.dist-info → semantic_link_labs-0.7.0.dist-info}/LICENSE +0 -0
- {semantic_link_labs-0.6.0.dist-info → semantic_link_labs-0.7.0.dist-info}/top_level.txt +0 -0
sempy_labs/_model_bpa.py
CHANGED
|
@@ -9,14 +9,20 @@ from sempy_labs._helper_functions import (
|
|
|
9
9
|
format_dax_object_name,
|
|
10
10
|
resolve_lakehouse_name,
|
|
11
11
|
create_relationship_name,
|
|
12
|
+
save_as_delta_table,
|
|
13
|
+
resolve_workspace_capacity,
|
|
14
|
+
resolve_dataset_id,
|
|
12
15
|
)
|
|
13
|
-
from sempy_labs.lakehouse
|
|
14
|
-
from sempy_labs.lakehouse._lakehouse import lakehouse_attached
|
|
16
|
+
from sempy_labs.lakehouse import get_lakehouse_tables, lakehouse_attached
|
|
15
17
|
from sempy_labs.tom import connect_semantic_model
|
|
16
18
|
from sempy_labs._model_bpa_rules import model_bpa_rules
|
|
17
19
|
from typing import Optional
|
|
18
20
|
from sempy._utils._log import log
|
|
19
21
|
import sempy_labs._icons as icons
|
|
22
|
+
from pyspark.sql.functions import col, flatten
|
|
23
|
+
from pyspark.sql.types import StructType, StructField, StringType
|
|
24
|
+
import polib
|
|
25
|
+
import os
|
|
20
26
|
|
|
21
27
|
|
|
22
28
|
@log
|
|
@@ -27,6 +33,7 @@ def run_model_bpa(
|
|
|
27
33
|
export: Optional[bool] = False,
|
|
28
34
|
return_dataframe: Optional[bool] = False,
|
|
29
35
|
extended: Optional[bool] = False,
|
|
36
|
+
language: Optional[str] = None,
|
|
30
37
|
**kwargs,
|
|
31
38
|
):
|
|
32
39
|
"""
|
|
@@ -48,6 +55,9 @@ def run_model_bpa(
|
|
|
48
55
|
If True, returns a pandas dataframe instead of the visualization.
|
|
49
56
|
extended : bool, default=False
|
|
50
57
|
If True, runs the set_vertipaq_annotations function to collect Vertipaq Analyzer statistics to be used in the analysis of the semantic model.
|
|
58
|
+
language : str, default=None
|
|
59
|
+
Specifying a language code (i.e. 'it-IT' for Italian) will auto-translate the Category, Rule Name and Description into the specified language.
|
|
60
|
+
Defaults to None which resolves to English.
|
|
51
61
|
|
|
52
62
|
Returns
|
|
53
63
|
-------
|
|
@@ -55,6 +65,8 @@ def run_model_bpa(
|
|
|
55
65
|
A pandas dataframe in HTML format showing semantic model objects which violated the best practice analyzer rules.
|
|
56
66
|
"""
|
|
57
67
|
|
|
68
|
+
from synapse.ml.services import Translate
|
|
69
|
+
|
|
58
70
|
if "extend" in kwargs:
|
|
59
71
|
print(
|
|
60
72
|
"The 'extend' parameter has been deprecated. Please remove this parameter from the function going forward."
|
|
@@ -65,9 +77,66 @@ def run_model_bpa(
|
|
|
65
77
|
"ignore",
|
|
66
78
|
message="This pattern is interpreted as a regular expression, and has match groups.",
|
|
67
79
|
)
|
|
80
|
+
warnings.filterwarnings(
|
|
81
|
+
"ignore", category=UserWarning, message=".*Arrow optimization.*"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
language_list = [
|
|
85
|
+
"it-IT",
|
|
86
|
+
"es-ES",
|
|
87
|
+
"he-IL",
|
|
88
|
+
"pt-PT",
|
|
89
|
+
"zh-CN",
|
|
90
|
+
"fr-FR",
|
|
91
|
+
"da-DK",
|
|
92
|
+
"cs-CZ",
|
|
93
|
+
"de-DE",
|
|
94
|
+
"el-GR",
|
|
95
|
+
"fa-IR",
|
|
96
|
+
"ga-IE",
|
|
97
|
+
"hi-IN",
|
|
98
|
+
"hu-HU",
|
|
99
|
+
"is-IS",
|
|
100
|
+
"ja-JP",
|
|
101
|
+
"nl-NL",
|
|
102
|
+
"pl-PL",
|
|
103
|
+
"pt-BR",
|
|
104
|
+
"ru-RU",
|
|
105
|
+
"te-IN",
|
|
106
|
+
"ta-IN",
|
|
107
|
+
"th-TH",
|
|
108
|
+
"zu-ZA",
|
|
109
|
+
"am-ET",
|
|
110
|
+
"ar-AE",
|
|
111
|
+
]
|
|
112
|
+
|
|
113
|
+
# Map languages to the closest language (first 2 letters matching)
|
|
114
|
+
def map_language(language, language_list):
|
|
115
|
+
|
|
116
|
+
mapped = False
|
|
117
|
+
|
|
118
|
+
if language in language_list:
|
|
119
|
+
mapped is True
|
|
120
|
+
return language
|
|
121
|
+
|
|
122
|
+
language_prefix = language[:2]
|
|
123
|
+
for lang_code in language_list:
|
|
124
|
+
if lang_code.startswith(language_prefix):
|
|
125
|
+
mapped is True
|
|
126
|
+
return lang_code
|
|
127
|
+
if not mapped:
|
|
128
|
+
return language
|
|
129
|
+
|
|
130
|
+
if language is not None:
|
|
131
|
+
language = map_language(language, language_list)
|
|
68
132
|
|
|
69
133
|
workspace = fabric.resolve_workspace_name(workspace)
|
|
70
134
|
|
|
135
|
+
if language is not None and language not in language_list:
|
|
136
|
+
print(
|
|
137
|
+
f"{icons.yellow_dot} The '{language}' language code is not in our predefined language list. Please file an issue and let us know which language code you are using: https://github.com/microsoft/semantic-link-labs/issues/new?assignees=&labels=&projects=&template=bug_report.md&title=."
|
|
138
|
+
)
|
|
139
|
+
|
|
71
140
|
if extended:
|
|
72
141
|
with connect_semantic_model(
|
|
73
142
|
dataset=dataset, workspace=workspace, readonly=False
|
|
@@ -80,14 +149,104 @@ def run_model_bpa(
|
|
|
80
149
|
|
|
81
150
|
dep = get_model_calc_dependencies(dataset=dataset, workspace=workspace)
|
|
82
151
|
|
|
152
|
+
def translate_using_po(rule_file):
|
|
153
|
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
154
|
+
translation_file = (
|
|
155
|
+
f"{current_dir}/_bpa_translation/_translations_{language}.po"
|
|
156
|
+
)
|
|
157
|
+
for c in ["Category", "Description", "Rule Name"]:
|
|
158
|
+
po = polib.pofile(translation_file)
|
|
159
|
+
for entry in po:
|
|
160
|
+
if entry.tcomment == c.lower().replace(" ", "_"):
|
|
161
|
+
rule_file.loc[rule_file["Rule Name"] == entry.msgid, c] = (
|
|
162
|
+
entry.msgstr
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
def translate_using_spark(rule_file):
|
|
166
|
+
rules_temp = rule_file.copy()
|
|
167
|
+
rules_temp = rules_temp.drop(["Expression", "URL", "Severity"], axis=1)
|
|
168
|
+
|
|
169
|
+
schema = StructType(
|
|
170
|
+
[
|
|
171
|
+
StructField("Category", StringType(), True),
|
|
172
|
+
StructField("Scope", StringType(), True),
|
|
173
|
+
StructField("Rule Name", StringType(), True),
|
|
174
|
+
StructField("Description", StringType(), True),
|
|
175
|
+
]
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
spark = SparkSession.builder.getOrCreate()
|
|
179
|
+
dfRules = spark.createDataFrame(rules_temp, schema)
|
|
180
|
+
|
|
181
|
+
columns = ["Category", "Rule Name", "Description"]
|
|
182
|
+
for clm in columns:
|
|
183
|
+
translate = (
|
|
184
|
+
Translate()
|
|
185
|
+
.setTextCol(clm)
|
|
186
|
+
.setToLanguage(language)
|
|
187
|
+
.setOutputCol("translation")
|
|
188
|
+
.setConcurrency(5)
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
if clm == "Rule Name":
|
|
192
|
+
transDF = (
|
|
193
|
+
translate.transform(dfRules)
|
|
194
|
+
.withColumn(
|
|
195
|
+
"translation", flatten(col("translation.translations"))
|
|
196
|
+
)
|
|
197
|
+
.withColumn("translation", col("translation.text"))
|
|
198
|
+
.select(clm, "translation")
|
|
199
|
+
)
|
|
200
|
+
else:
|
|
201
|
+
transDF = (
|
|
202
|
+
translate.transform(dfRules)
|
|
203
|
+
.withColumn(
|
|
204
|
+
"translation", flatten(col("translation.translations"))
|
|
205
|
+
)
|
|
206
|
+
.withColumn("translation", col("translation.text"))
|
|
207
|
+
.select("Rule Name", clm, "translation")
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
df_panda = transDF.toPandas()
|
|
211
|
+
rule_file = pd.merge(
|
|
212
|
+
rule_file,
|
|
213
|
+
df_panda[["Rule Name", "translation"]],
|
|
214
|
+
on="Rule Name",
|
|
215
|
+
how="left",
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
rule_file = rule_file.rename(
|
|
219
|
+
columns={"translation": f"{clm}Translated"}
|
|
220
|
+
)
|
|
221
|
+
rule_file[f"{clm}Translated"] = rule_file[f"{clm}Translated"].apply(
|
|
222
|
+
lambda x: x[0] if x is not None else None
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
for clm in columns:
|
|
226
|
+
rule_file = rule_file.drop([clm], axis=1)
|
|
227
|
+
rule_file = rule_file.rename(columns={f"{clm}Translated": clm})
|
|
228
|
+
|
|
229
|
+
return rule_file
|
|
230
|
+
|
|
231
|
+
translated = False
|
|
232
|
+
|
|
233
|
+
# Translations
|
|
234
|
+
if language is not None and rules is None and language in language_list:
|
|
235
|
+
rules = model_bpa_rules(
|
|
236
|
+
dataset=dataset, workspace=workspace, dependencies=dep
|
|
237
|
+
)
|
|
238
|
+
translate_using_po(rules)
|
|
239
|
+
translated = True
|
|
83
240
|
if rules is None:
|
|
84
241
|
rules = model_bpa_rules(
|
|
85
242
|
dataset=dataset, workspace=workspace, dependencies=dep
|
|
86
243
|
)
|
|
244
|
+
if language is not None and not translated:
|
|
245
|
+
rules = translate_using_spark(rules)
|
|
87
246
|
|
|
88
|
-
rules["Severity"].replace("Warning",
|
|
89
|
-
rules["Severity"].replace("Error",
|
|
90
|
-
rules["Severity"].replace("Info",
|
|
247
|
+
rules["Severity"].replace("Warning", icons.warning, inplace=True)
|
|
248
|
+
rules["Severity"].replace("Error", icons.error, inplace=True)
|
|
249
|
+
rules["Severity"].replace("Info", icons.info, inplace=True)
|
|
91
250
|
|
|
92
251
|
pd.set_option("display.max_colwidth", 1000)
|
|
93
252
|
|
|
@@ -191,8 +350,7 @@ def run_model_bpa(
|
|
|
191
350
|
]
|
|
192
351
|
|
|
193
352
|
if export:
|
|
194
|
-
|
|
195
|
-
if lakeAttach is False:
|
|
353
|
+
if not lakehouse_attached():
|
|
196
354
|
raise ValueError(
|
|
197
355
|
f"{icons.red_dot} In order to save the Best Practice Analyzer results, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
|
|
198
356
|
)
|
|
@@ -201,16 +359,15 @@ def run_model_bpa(
|
|
|
201
359
|
delta_table_name = "modelbparesults"
|
|
202
360
|
|
|
203
361
|
lakehouse_id = fabric.get_lakehouse_id()
|
|
362
|
+
lake_workspace = fabric.get_workspace_id()
|
|
204
363
|
lakehouse = resolve_lakehouse_name(
|
|
205
|
-
lakehouse_id=lakehouse_id, workspace=
|
|
364
|
+
lakehouse_id=lakehouse_id, workspace=lake_workspace
|
|
206
365
|
)
|
|
207
366
|
|
|
208
|
-
lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=
|
|
367
|
+
lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=lake_workspace)
|
|
209
368
|
lakeT_filt = lakeT[lakeT["Table Name"] == delta_table_name]
|
|
210
369
|
|
|
211
|
-
dfExport["Severity"].replace(
|
|
212
|
-
dfExport["Severity"].replace("\u274C", "Error", inplace=True)
|
|
213
|
-
dfExport["Severity"].replace("ℹ️", "Info", inplace=True)
|
|
370
|
+
dfExport["Severity"].replace(icons.severity_mapping, inplace=True)
|
|
214
371
|
|
|
215
372
|
spark = SparkSession.builder.getOrCreate()
|
|
216
373
|
query = f"SELECT MAX(RunId) FROM {lakehouse}.{delta_table_name}"
|
|
@@ -223,23 +380,42 @@ def run_model_bpa(
|
|
|
223
380
|
runId = maxRunId + 1
|
|
224
381
|
|
|
225
382
|
now = datetime.datetime.now()
|
|
383
|
+
dfD = fabric.list_datasets(workspace=workspace, mode="rest")
|
|
384
|
+
dfD_filt = dfD[dfD["Dataset Name"] == dataset]
|
|
385
|
+
configured_by = dfD_filt["Configured By"].iloc[0]
|
|
386
|
+
capacity_id, capacity_name = resolve_workspace_capacity(workspace=workspace)
|
|
387
|
+
dfExport["Capacity Name"] = capacity_name
|
|
388
|
+
dfExport["Capacity Id"] = capacity_id
|
|
226
389
|
dfExport["Workspace Name"] = workspace
|
|
390
|
+
dfExport["Workspace Id"] = fabric.resolve_workspace_id(workspace)
|
|
227
391
|
dfExport["Dataset Name"] = dataset
|
|
392
|
+
dfExport["Dataset Id"] = resolve_dataset_id(dataset, workspace)
|
|
393
|
+
dfExport["Configured By"] = configured_by
|
|
228
394
|
dfExport["Timestamp"] = now
|
|
229
395
|
dfExport["RunId"] = runId
|
|
396
|
+
dfExport["Configured By"] = configured_by
|
|
230
397
|
|
|
231
398
|
dfExport["RunId"] = dfExport["RunId"].astype("int")
|
|
232
399
|
|
|
233
|
-
colName = "
|
|
400
|
+
colName = "Capacity Name"
|
|
234
401
|
dfExport.insert(0, colName, dfExport.pop(colName))
|
|
235
|
-
colName = "
|
|
402
|
+
colName = "Capacity Id"
|
|
236
403
|
dfExport.insert(1, colName, dfExport.pop(colName))
|
|
404
|
+
colName = "Workspace Name"
|
|
405
|
+
dfExport.insert(2, colName, dfExport.pop(colName))
|
|
406
|
+
colName = "Workspace Id"
|
|
407
|
+
dfExport.insert(3, colName, dfExport.pop(colName))
|
|
408
|
+
colName = "Dataset Name"
|
|
409
|
+
dfExport.insert(4, colName, dfExport.pop(colName))
|
|
410
|
+
colName = "Configured By"
|
|
411
|
+
dfExport.insert(5, colName, dfExport.pop(colName))
|
|
237
412
|
|
|
238
413
|
dfExport.columns = dfExport.columns.str.replace(" ", "_")
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
414
|
+
save_as_delta_table(
|
|
415
|
+
dataframe=dfExport,
|
|
416
|
+
delta_table_name=delta_table_name,
|
|
417
|
+
write_mode="append",
|
|
418
|
+
merge_schema=True,
|
|
243
419
|
)
|
|
244
420
|
|
|
245
421
|
if return_dataframe:
|
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
import sempy.fabric as fabric
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import datetime
|
|
4
|
+
from pyspark.sql import SparkSession
|
|
5
|
+
from sempy_labs._helper_functions import (
|
|
6
|
+
resolve_lakehouse_name,
|
|
7
|
+
save_as_delta_table,
|
|
8
|
+
resolve_workspace_capacity,
|
|
9
|
+
retry,
|
|
10
|
+
)
|
|
11
|
+
from sempy_labs.lakehouse import get_lakehouse_tables, lakehouse_attached
|
|
12
|
+
from sempy_labs._model_bpa import run_model_bpa
|
|
13
|
+
from typing import Optional, List
|
|
14
|
+
from sempy._utils._log import log
|
|
15
|
+
import sempy_labs._icons as icons
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@log
|
|
19
|
+
def run_model_bpa_bulk(
|
|
20
|
+
rules: Optional[pd.DataFrame] = None,
|
|
21
|
+
extended: Optional[bool] = False,
|
|
22
|
+
language: Optional[str] = None,
|
|
23
|
+
workspace: Optional[str | List[str]] = None,
|
|
24
|
+
):
|
|
25
|
+
"""
|
|
26
|
+
Runs the semantic model Best Practice Analyzer across all semantic models in a workspace (or all accessible workspaces).
|
|
27
|
+
Saves (appends) the results to the 'modelbparesults' delta table in the lakehouse attached to the notebook.
|
|
28
|
+
Default semantic models are skipped in this analysis.
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
dataset : str
|
|
33
|
+
Name of the semantic model.
|
|
34
|
+
rules : pandas.DataFrame, default=None
|
|
35
|
+
A pandas dataframe containing rules to be evaluated. Based on the format of the dataframe produced by the model_bpa_rules function.
|
|
36
|
+
extended : bool, default=False
|
|
37
|
+
If True, runs the set_vertipaq_annotations function to collect Vertipaq Analyzer statistics to be used in the analysis of the semantic model.
|
|
38
|
+
language : str, default=None
|
|
39
|
+
The language (code) in which the rules will appear. For example, specifying 'it-IT' will show the Rule Name, Category and Description in Italian.
|
|
40
|
+
Defaults to None which resolves to English.
|
|
41
|
+
workspace : str | List[str], default=None
|
|
42
|
+
The workspace or list of workspaces to scan.
|
|
43
|
+
Defaults to None which scans all accessible workspaces.
|
|
44
|
+
|
|
45
|
+
Returns
|
|
46
|
+
-------
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
import pyspark.sql.functions as F
|
|
50
|
+
|
|
51
|
+
if not lakehouse_attached():
|
|
52
|
+
raise ValueError(
|
|
53
|
+
"No lakehouse is attached to this notebook. Must attach a lakehouse to the notebook."
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
cols = [
|
|
57
|
+
"Capacity Name",
|
|
58
|
+
"Capacity Id",
|
|
59
|
+
"Workspace Name",
|
|
60
|
+
"Workspace Id",
|
|
61
|
+
"Dataset Name",
|
|
62
|
+
"Dataset Id",
|
|
63
|
+
"Configured By",
|
|
64
|
+
"Rule Name",
|
|
65
|
+
"Category",
|
|
66
|
+
"Severity",
|
|
67
|
+
"Object Type",
|
|
68
|
+
"Object Name",
|
|
69
|
+
"Description",
|
|
70
|
+
"URL",
|
|
71
|
+
"RunId",
|
|
72
|
+
"Timestamp",
|
|
73
|
+
]
|
|
74
|
+
now = datetime.datetime.now()
|
|
75
|
+
output_table = "modelbparesults"
|
|
76
|
+
spark = SparkSession.builder.getOrCreate()
|
|
77
|
+
lakehouse_workspace = fabric.resolve_workspace_name()
|
|
78
|
+
lakehouse_id = fabric.get_lakehouse_id()
|
|
79
|
+
lakehouse = resolve_lakehouse_name(
|
|
80
|
+
lakehouse_id=lakehouse_id, workspace=lakehouse_workspace
|
|
81
|
+
)
|
|
82
|
+
lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=lakehouse_workspace)
|
|
83
|
+
lakeT_filt = lakeT[lakeT["Table Name"] == output_table]
|
|
84
|
+
# query = f"SELECT MAX(RunId) FROM {lakehouse}.{output_table}"
|
|
85
|
+
if len(lakeT_filt) == 0:
|
|
86
|
+
runId = 1
|
|
87
|
+
else:
|
|
88
|
+
dfSpark = spark.table(f"`{lakehouse_id}`.{output_table}").select(F.max("RunId"))
|
|
89
|
+
maxRunId = dfSpark.collect()[0][0]
|
|
90
|
+
runId = maxRunId + 1
|
|
91
|
+
|
|
92
|
+
if isinstance(workspace, str):
|
|
93
|
+
workspace = [workspace]
|
|
94
|
+
|
|
95
|
+
dfW = fabric.list_workspaces()
|
|
96
|
+
if workspace is None:
|
|
97
|
+
dfW_filt = dfW.copy()
|
|
98
|
+
else:
|
|
99
|
+
dfW_filt = dfW[dfW["Name"].isin(workspace)]
|
|
100
|
+
|
|
101
|
+
for i, r in dfW_filt.iterrows():
|
|
102
|
+
wksp = r["Name"]
|
|
103
|
+
wksp_id = r["Id"]
|
|
104
|
+
capacity_id, capacity_name = resolve_workspace_capacity(workspace=wksp)
|
|
105
|
+
df = pd.DataFrame(columns=cols)
|
|
106
|
+
dfD = fabric.list_datasets(workspace=wksp, mode="rest")
|
|
107
|
+
|
|
108
|
+
# Exclude default semantic models
|
|
109
|
+
if len(dfD) > 0:
|
|
110
|
+
dfI = fabric.list_items(workspace=wksp)
|
|
111
|
+
filtered_df = dfI.groupby("Display Name").filter(
|
|
112
|
+
lambda x: set(["Warehouse", "SemanticModel"]).issubset(set(x["Type"]))
|
|
113
|
+
or set(["Lakehouse", "SemanticModel"]).issubset(set(x["Type"]))
|
|
114
|
+
)
|
|
115
|
+
default_semantic_models = filtered_df["Display Name"].unique().tolist()
|
|
116
|
+
# Skip ModelBPA :)
|
|
117
|
+
skip_models = default_semantic_models + [icons.model_bpa_name]
|
|
118
|
+
dfD_filt = dfD[~dfD["Dataset Name"].isin(skip_models)]
|
|
119
|
+
|
|
120
|
+
if len(dfD_filt) > 0:
|
|
121
|
+
for i2, r2 in dfD_filt.iterrows():
|
|
122
|
+
dataset_name = r2["Dataset Name"]
|
|
123
|
+
config_by = r2["Configured By"]
|
|
124
|
+
dataset_id = r2["Dataset Id"]
|
|
125
|
+
print(
|
|
126
|
+
f"{icons.in_progress} Collecting Model BPA stats for the '{dataset_name}' semantic model within the '{wksp}' workspace."
|
|
127
|
+
)
|
|
128
|
+
try:
|
|
129
|
+
bpa_df = run_model_bpa(
|
|
130
|
+
dataset=dataset_name,
|
|
131
|
+
workspace=wksp,
|
|
132
|
+
language=language,
|
|
133
|
+
return_dataframe=True,
|
|
134
|
+
rules=rules,
|
|
135
|
+
extended=extended,
|
|
136
|
+
)
|
|
137
|
+
bpa_df["Capacity Id"] = capacity_id
|
|
138
|
+
bpa_df["Capacity Name"] = capacity_name
|
|
139
|
+
bpa_df["Workspace Name"] = wksp
|
|
140
|
+
bpa_df["Workspace Id"] = wksp_id
|
|
141
|
+
bpa_df["Dataset Name"] = dataset_name
|
|
142
|
+
bpa_df["Dataset Id"] = dataset_id
|
|
143
|
+
bpa_df["Configured By"] = config_by
|
|
144
|
+
bpa_df["Timestamp"] = now
|
|
145
|
+
bpa_df["RunId"] = runId
|
|
146
|
+
bpa_df = bpa_df[cols]
|
|
147
|
+
|
|
148
|
+
bpa_df["RunId"] = bpa_df["RunId"].astype("int")
|
|
149
|
+
|
|
150
|
+
df = pd.concat([df, bpa_df], ignore_index=True)
|
|
151
|
+
print(
|
|
152
|
+
f"{icons.green_dot} Collected Model BPA stats for the '{dataset_name}' semantic model within the '{wksp}' workspace."
|
|
153
|
+
)
|
|
154
|
+
except Exception as e:
|
|
155
|
+
print(
|
|
156
|
+
f"{icons.red_dot} Model BPA failed for the '{dataset_name}' semantic model within the '{wksp}' workspace."
|
|
157
|
+
)
|
|
158
|
+
print(e)
|
|
159
|
+
|
|
160
|
+
df["Severity"].replace(icons.severity_mapping, inplace=True)
|
|
161
|
+
|
|
162
|
+
# Append save results individually for each workspace (so as not to create a giant dataframe)
|
|
163
|
+
print(
|
|
164
|
+
f"{icons.in_progress} Saving the Model BPA results of the '{wksp}' workspace to the '{output_table}' within the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace..."
|
|
165
|
+
)
|
|
166
|
+
save_as_delta_table(
|
|
167
|
+
dataframe=df,
|
|
168
|
+
delta_table_name=output_table,
|
|
169
|
+
write_mode="append",
|
|
170
|
+
merge_schema=True,
|
|
171
|
+
)
|
|
172
|
+
print(
|
|
173
|
+
f"{icons.green_dot} Saved BPA results to the '{output_table}' delta table."
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
print(f"{icons.green_dot} Bulk BPA scan complete.")
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
@log
|
|
180
|
+
def create_model_bpa_semantic_model(
|
|
181
|
+
dataset: Optional[str] = icons.model_bpa_name,
|
|
182
|
+
lakehouse: Optional[str] = None,
|
|
183
|
+
lakehouse_workspace: Optional[str] = None,
|
|
184
|
+
):
|
|
185
|
+
"""
|
|
186
|
+
Dynamically generates a Direct Lake semantic model based on the 'modelbparesults' delta table which contains the Best Practice Analyzer results.
|
|
187
|
+
This semantic model used in combination with the corresponding Best Practice Analyzer report can be used to analyze multiple semantic models
|
|
188
|
+
on multiple workspaces at once (and over time).
|
|
189
|
+
|
|
190
|
+
The semantic model is always created within the same workspace as the lakehouse.
|
|
191
|
+
|
|
192
|
+
Parameters
|
|
193
|
+
----------
|
|
194
|
+
dataset : str, default='ModelBPA'
|
|
195
|
+
Name of the semantic model to be created.
|
|
196
|
+
lakehouse : str, default=None
|
|
197
|
+
Name of the Fabric lakehouse which contains the 'modelbparesults' delta table.
|
|
198
|
+
Defaults to None which resolves to the default lakehouse attached to the notebook.
|
|
199
|
+
lakehouse_workspace : str, default=None
|
|
200
|
+
The workspace in which the lakehouse resides.
|
|
201
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
202
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
203
|
+
|
|
204
|
+
Returns
|
|
205
|
+
-------
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
from sempy_labs._helper_functions import resolve_lakehouse_name
|
|
209
|
+
from sempy_labs.directlake import (
|
|
210
|
+
get_shared_expression,
|
|
211
|
+
add_table_to_direct_lake_semantic_model,
|
|
212
|
+
)
|
|
213
|
+
from sempy_labs import create_blank_semantic_model, refresh_semantic_model
|
|
214
|
+
from sempy_labs.tom import connect_semantic_model
|
|
215
|
+
|
|
216
|
+
lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace)
|
|
217
|
+
|
|
218
|
+
if lakehouse is None:
|
|
219
|
+
lakehouse_id = fabric.get_lakehouse_id()
|
|
220
|
+
lakehouse = resolve_lakehouse_name(
|
|
221
|
+
lakehouse_id=lakehouse_id, workspace=lakehouse_workspace
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
# Generate the shared expression based on the lakehouse and lakehouse workspace
|
|
225
|
+
expr = get_shared_expression(lakehouse=lakehouse, workspace=lakehouse_workspace)
|
|
226
|
+
|
|
227
|
+
# Create blank model
|
|
228
|
+
create_blank_semantic_model(dataset=dataset, workspace=lakehouse_workspace)
|
|
229
|
+
|
|
230
|
+
@retry(
|
|
231
|
+
sleep_time=1,
|
|
232
|
+
timeout_error_message=f"{icons.red_dot} Function timed out after 1 minute",
|
|
233
|
+
)
|
|
234
|
+
def dyn_connect():
|
|
235
|
+
with connect_semantic_model(
|
|
236
|
+
dataset=dataset, readonly=True, workspace=lakehouse_workspace
|
|
237
|
+
) as tom:
|
|
238
|
+
|
|
239
|
+
tom.model
|
|
240
|
+
|
|
241
|
+
dyn_connect()
|
|
242
|
+
|
|
243
|
+
table_exists = False
|
|
244
|
+
with connect_semantic_model(
|
|
245
|
+
dataset=dataset, readonly=False, workspace=lakehouse_workspace
|
|
246
|
+
) as tom:
|
|
247
|
+
t_name = "BPAResults"
|
|
248
|
+
t_name_full = f"'{t_name}'"
|
|
249
|
+
# Create the shared expression
|
|
250
|
+
if not any(e.Name == "DatabaseQuery" for e in tom.model.Expressions):
|
|
251
|
+
tom.add_expression(name="DatabaseQuery", expression=expr)
|
|
252
|
+
# Add the table to the model
|
|
253
|
+
if any(t.Name == t_name for t in tom.model.Tables):
|
|
254
|
+
table_exists = True
|
|
255
|
+
if not table_exists:
|
|
256
|
+
add_table_to_direct_lake_semantic_model(
|
|
257
|
+
dataset=dataset,
|
|
258
|
+
table_name=t_name,
|
|
259
|
+
lakehouse_table_name="modelbparesults",
|
|
260
|
+
workspace=lakehouse_workspace,
|
|
261
|
+
refresh=False,
|
|
262
|
+
)
|
|
263
|
+
with connect_semantic_model(
|
|
264
|
+
dataset=dataset, readonly=False, workspace=lakehouse_workspace
|
|
265
|
+
) as tom:
|
|
266
|
+
# Fix column names
|
|
267
|
+
for c in tom.all_columns():
|
|
268
|
+
if c.Name == "Dataset_Name":
|
|
269
|
+
c.Name = "Model"
|
|
270
|
+
elif c.Name == "Dataset_Id":
|
|
271
|
+
c.Name = "Model Id"
|
|
272
|
+
elif c.Name == "Workspace_Name":
|
|
273
|
+
c.Name = "Workspace"
|
|
274
|
+
elif c.Name == "Capacity_Name":
|
|
275
|
+
c.Name = "Capacity"
|
|
276
|
+
elif c.Name == "Configured_By":
|
|
277
|
+
c.Name = "Model Owner"
|
|
278
|
+
elif c.Name == "URL":
|
|
279
|
+
c.DataCategory = "WebURL"
|
|
280
|
+
elif c.Name == "RunId":
|
|
281
|
+
tom.set_summarize_by(
|
|
282
|
+
table_name=c.Parent.Name, column_name=c.Name, value="None"
|
|
283
|
+
)
|
|
284
|
+
c.Name = c.Name.replace("_", " ")
|
|
285
|
+
|
|
286
|
+
# Implement pattern for base measures
|
|
287
|
+
def get_expr(table_name, calculation):
|
|
288
|
+
return f"IF(HASONEFILTER({table_name}[RunId]),{calculation},CALCULATE({calculation},FILTER(VALUES({table_name}[RunId]),{table_name}[RunId] = [Max Run Id])))"
|
|
289
|
+
|
|
290
|
+
# Add measures
|
|
291
|
+
int_format = "#,0"
|
|
292
|
+
m_name = "Max Run Id"
|
|
293
|
+
if not any(m.Name == m_name for m in tom.all_measures()):
|
|
294
|
+
tom.add_measure(
|
|
295
|
+
table_name=t_name,
|
|
296
|
+
measure_name=m_name,
|
|
297
|
+
expression=f"CALCULATE(MAX({t_name_full}[RunId]),{t_name_full}[RunId])",
|
|
298
|
+
format_string=int_format,
|
|
299
|
+
)
|
|
300
|
+
m_name = "Capacities"
|
|
301
|
+
if not any(m.Name == m_name for m in tom.all_measures()):
|
|
302
|
+
calc = f"COUNTROWS(DISTINCT({t_name_full}[Capacity]))"
|
|
303
|
+
tom.add_measure(
|
|
304
|
+
table_name=t_name,
|
|
305
|
+
measure_name=m_name,
|
|
306
|
+
expression=get_expr(t_name_full, calc),
|
|
307
|
+
format_string=int_format,
|
|
308
|
+
)
|
|
309
|
+
m_name = "Models"
|
|
310
|
+
if not any(m.Name == m_name for m in tom.all_measures()):
|
|
311
|
+
calc = f"COUNTROWS(DISTINCT({t_name_full}[Model]))"
|
|
312
|
+
tom.add_measure(
|
|
313
|
+
table_name=t_name,
|
|
314
|
+
measure_name=m_name,
|
|
315
|
+
expression=get_expr(t_name_full, calc),
|
|
316
|
+
format_string=int_format,
|
|
317
|
+
)
|
|
318
|
+
m_name = "Workspaces"
|
|
319
|
+
if not any(m.Name == m_name for m in tom.all_measures()):
|
|
320
|
+
calc = f"COUNTROWS(DISTINCT({t_name_full}[Workspace]))"
|
|
321
|
+
tom.add_measure(
|
|
322
|
+
table_name=t_name,
|
|
323
|
+
measure_name=m_name,
|
|
324
|
+
expression=get_expr(t_name_full, calc),
|
|
325
|
+
format_string=int_format,
|
|
326
|
+
)
|
|
327
|
+
m_name = "Violations"
|
|
328
|
+
if not any(m.Name == m_name for m in tom.all_measures()):
|
|
329
|
+
calc = f"COUNTROWS({t_name_full})"
|
|
330
|
+
tom.add_measure(
|
|
331
|
+
table_name=t_name,
|
|
332
|
+
measure_name=m_name,
|
|
333
|
+
expression=get_expr(t_name_full, calc),
|
|
334
|
+
format_string=int_format,
|
|
335
|
+
)
|
|
336
|
+
m_name = "Error Violations"
|
|
337
|
+
if not any(m.Name == m_name for m in tom.all_measures()):
|
|
338
|
+
tom.add_measure(
|
|
339
|
+
table_name=t_name,
|
|
340
|
+
measure_name=m_name,
|
|
341
|
+
expression=f'CALCULATE([Violations],{t_name_full}[Severity]="Error")',
|
|
342
|
+
format_string=int_format,
|
|
343
|
+
)
|
|
344
|
+
m_name = "Rules Violated"
|
|
345
|
+
if not any(m.Name == m_name for m in tom.all_measures()):
|
|
346
|
+
calc = f"COUNTROWS(DISTINCT({t_name_full}[Rule Name]))"
|
|
347
|
+
tom.add_measure(
|
|
348
|
+
table_name=t_name,
|
|
349
|
+
measure_name=m_name,
|
|
350
|
+
expression=get_expr(t_name_full, calc),
|
|
351
|
+
format_string=int_format,
|
|
352
|
+
)
|
|
353
|
+
m_name = "Rule Severity"
|
|
354
|
+
if not any(m.Name == m_name for m in tom.all_measures()):
|
|
355
|
+
tom.add_measure(
|
|
356
|
+
table_name=t_name,
|
|
357
|
+
measure_name=m_name,
|
|
358
|
+
expression=f"IF(ISFILTERED({t_name_full}[Rule Name]),IF( HASONEVALUE({t_name_full}[Rule Name]),MIN({t_name_full}[Severity])))",
|
|
359
|
+
)
|
|
360
|
+
# tom.add_measure(table_name=t_name, measure_name='Rules Followed', expression="[Rules] - [Rules Violated]")
|
|
361
|
+
|
|
362
|
+
# Refresh the model
|
|
363
|
+
refresh_semantic_model(dataset=dataset, workspace=lakehouse_workspace)
|
sempy_labs/_model_bpa_rules.py
CHANGED
|
@@ -8,9 +8,9 @@ from typing import Optional
|
|
|
8
8
|
|
|
9
9
|
def model_bpa_rules(
|
|
10
10
|
dataset: str,
|
|
11
|
-
workspace: Optional[str
|
|
12
|
-
dependencies: Optional[pd.DataFrame
|
|
13
|
-
):
|
|
11
|
+
workspace: Optional[str] = None,
|
|
12
|
+
dependencies: Optional[pd.DataFrame] = None,
|
|
13
|
+
) -> pd.DataFrame:
|
|
14
14
|
"""
|
|
15
15
|
Shows the default rules for the semantic model BPA used by the run_model_bpa function.
|
|
16
16
|
|
|
@@ -413,7 +413,7 @@ def model_bpa_rules(
|
|
|
413
413
|
re.search(
|
|
414
414
|
r"USERELATIONSHIP\s*\(\s*.+?(?=])\]\s*,\s*'*"
|
|
415
415
|
+ obj.Name
|
|
416
|
-
+ "'*\[",
|
|
416
|
+
+ r"'*\[",
|
|
417
417
|
m.Expression,
|
|
418
418
|
flags=re.IGNORECASE,
|
|
419
419
|
)
|