semantic-link-labs 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (103) hide show
  1. semantic_link_labs-0.7.0.dist-info/METADATA +148 -0
  2. semantic_link_labs-0.7.0.dist-info/RECORD +111 -0
  3. {semantic_link_labs-0.6.0.dist-info → semantic_link_labs-0.7.0.dist-info}/WHEEL +1 -1
  4. sempy_labs/__init__.py +26 -2
  5. sempy_labs/_ai.py +3 -65
  6. sempy_labs/_bpa_translation/_translations_am-ET.po +828 -0
  7. sempy_labs/_bpa_translation/_translations_ar-AE.po +860 -0
  8. sempy_labs/_bpa_translation/_translations_cs-CZ.po +894 -0
  9. sempy_labs/_bpa_translation/_translations_da-DK.po +894 -0
  10. sempy_labs/_bpa_translation/_translations_de-DE.po +933 -0
  11. sempy_labs/_bpa_translation/_translations_el-GR.po +936 -0
  12. sempy_labs/_bpa_translation/_translations_es-ES.po +915 -0
  13. sempy_labs/_bpa_translation/_translations_fa-IR.po +883 -0
  14. sempy_labs/_bpa_translation/_translations_fr-FR.po +938 -0
  15. sempy_labs/_bpa_translation/_translations_ga-IE.po +912 -0
  16. sempy_labs/_bpa_translation/_translations_he-IL.po +855 -0
  17. sempy_labs/_bpa_translation/_translations_hi-IN.po +892 -0
  18. sempy_labs/_bpa_translation/_translations_hu-HU.po +910 -0
  19. sempy_labs/_bpa_translation/_translations_is-IS.po +887 -0
  20. sempy_labs/_bpa_translation/_translations_it-IT.po +931 -0
  21. sempy_labs/_bpa_translation/_translations_ja-JP.po +805 -0
  22. sempy_labs/_bpa_translation/_translations_nl-NL.po +924 -0
  23. sempy_labs/_bpa_translation/_translations_pl-PL.po +913 -0
  24. sempy_labs/_bpa_translation/_translations_pt-BR.po +909 -0
  25. sempy_labs/_bpa_translation/_translations_pt-PT.po +904 -0
  26. sempy_labs/_bpa_translation/_translations_ru-RU.po +909 -0
  27. sempy_labs/_bpa_translation/_translations_ta-IN.po +922 -0
  28. sempy_labs/_bpa_translation/_translations_te-IN.po +896 -0
  29. sempy_labs/_bpa_translation/_translations_th-TH.po +873 -0
  30. sempy_labs/_bpa_translation/_translations_zh-CN.po +767 -0
  31. sempy_labs/_bpa_translation/_translations_zu-ZA.po +916 -0
  32. sempy_labs/_clear_cache.py +9 -4
  33. sempy_labs/_generate_semantic_model.py +30 -56
  34. sempy_labs/_helper_functions.py +358 -14
  35. sempy_labs/_icons.py +10 -1
  36. sempy_labs/_list_functions.py +478 -237
  37. sempy_labs/_model_bpa.py +194 -18
  38. sempy_labs/_model_bpa_bulk.py +363 -0
  39. sempy_labs/_model_bpa_rules.py +4 -4
  40. sempy_labs/_model_dependencies.py +12 -10
  41. sempy_labs/_one_lake_integration.py +7 -7
  42. sempy_labs/_query_scale_out.py +45 -66
  43. sempy_labs/_refresh_semantic_model.py +7 -0
  44. sempy_labs/_translations.py +154 -1
  45. sempy_labs/_vertipaq.py +103 -90
  46. sempy_labs/directlake/__init__.py +5 -1
  47. sempy_labs/directlake/_directlake_schema_compare.py +27 -31
  48. sempy_labs/directlake/_directlake_schema_sync.py +55 -66
  49. sempy_labs/directlake/_dl_helper.py +233 -0
  50. sempy_labs/directlake/_get_directlake_lakehouse.py +6 -7
  51. sempy_labs/directlake/_get_shared_expression.py +1 -1
  52. sempy_labs/directlake/_guardrails.py +17 -13
  53. sempy_labs/directlake/_update_directlake_partition_entity.py +54 -30
  54. sempy_labs/directlake/_warm_cache.py +1 -1
  55. sempy_labs/lakehouse/_get_lakehouse_tables.py +61 -69
  56. sempy_labs/lakehouse/_lakehouse.py +3 -2
  57. sempy_labs/lakehouse/_shortcuts.py +1 -1
  58. sempy_labs/migration/_create_pqt_file.py +174 -182
  59. sempy_labs/migration/_migrate_calctables_to_lakehouse.py +236 -268
  60. sempy_labs/migration/_migrate_calctables_to_semantic_model.py +75 -73
  61. sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +442 -426
  62. sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +91 -97
  63. sempy_labs/migration/_refresh_calc_tables.py +92 -101
  64. sempy_labs/report/_BPAReportTemplate.json +232 -0
  65. sempy_labs/report/__init__.py +6 -2
  66. sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +9 -0
  67. sempy_labs/report/_bpareporttemplate/.platform +11 -0
  68. sempy_labs/report/_bpareporttemplate/StaticResources/SharedResources/BaseThemes/CY24SU06.json +710 -0
  69. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/page.json +11 -0
  70. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/1b08bce3bebabb0a27a8/visual.json +191 -0
  71. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/2f22ddb70c301693c165/visual.json +438 -0
  72. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/3b1182230aa6c600b43a/visual.json +127 -0
  73. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/58577ba6380c69891500/visual.json +576 -0
  74. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/a2a8fa5028b3b776c96c/visual.json +207 -0
  75. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/adfd47ef30652707b987/visual.json +506 -0
  76. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/b6a80ee459e716e170b1/visual.json +127 -0
  77. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/ce3130a721c020cc3d81/visual.json +513 -0
  78. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/page.json +8 -0
  79. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/visuals/66e60dfb526437cd78d1/visual.json +112 -0
  80. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/page.json +11 -0
  81. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/07deb8bce824e1be37d7/visual.json +513 -0
  82. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0b1c68838818b32ad03b/visual.json +352 -0
  83. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0c171de9d2683d10b930/visual.json +37 -0
  84. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0efa01be0510e40a645e/visual.json +542 -0
  85. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/6bf2f0eb830ab53cc668/visual.json +221 -0
  86. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/88d8141cb8500b60030c/visual.json +127 -0
  87. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/a753273590beed656a03/visual.json +576 -0
  88. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/b8fdc82cddd61ac447bc/visual.json +127 -0
  89. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/page.json +9 -0
  90. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/visuals/ce8532a7e25020271077/visual.json +38 -0
  91. sempy_labs/report/_bpareporttemplate/definition/pages/pages.json +10 -0
  92. sempy_labs/report/_bpareporttemplate/definition/report.json +176 -0
  93. sempy_labs/report/_bpareporttemplate/definition/version.json +4 -0
  94. sempy_labs/report/_bpareporttemplate/definition.pbir +14 -0
  95. sempy_labs/report/_generate_report.py +255 -139
  96. sempy_labs/report/_report_functions.py +26 -33
  97. sempy_labs/report/_report_rebind.py +31 -26
  98. sempy_labs/tom/_model.py +75 -58
  99. semantic_link_labs-0.6.0.dist-info/METADATA +0 -22
  100. semantic_link_labs-0.6.0.dist-info/RECORD +0 -54
  101. sempy_labs/directlake/_fallback.py +0 -60
  102. {semantic_link_labs-0.6.0.dist-info → semantic_link_labs-0.7.0.dist-info}/LICENSE +0 -0
  103. {semantic_link_labs-0.6.0.dist-info → semantic_link_labs-0.7.0.dist-info}/top_level.txt +0 -0
sempy_labs/_model_bpa.py CHANGED
@@ -9,14 +9,20 @@ from sempy_labs._helper_functions import (
9
9
  format_dax_object_name,
10
10
  resolve_lakehouse_name,
11
11
  create_relationship_name,
12
+ save_as_delta_table,
13
+ resolve_workspace_capacity,
14
+ resolve_dataset_id,
12
15
  )
13
- from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
14
- from sempy_labs.lakehouse._lakehouse import lakehouse_attached
16
+ from sempy_labs.lakehouse import get_lakehouse_tables, lakehouse_attached
15
17
  from sempy_labs.tom import connect_semantic_model
16
18
  from sempy_labs._model_bpa_rules import model_bpa_rules
17
19
  from typing import Optional
18
20
  from sempy._utils._log import log
19
21
  import sempy_labs._icons as icons
22
+ from pyspark.sql.functions import col, flatten
23
+ from pyspark.sql.types import StructType, StructField, StringType
24
+ import polib
25
+ import os
20
26
 
21
27
 
22
28
  @log
@@ -27,6 +33,7 @@ def run_model_bpa(
27
33
  export: Optional[bool] = False,
28
34
  return_dataframe: Optional[bool] = False,
29
35
  extended: Optional[bool] = False,
36
+ language: Optional[str] = None,
30
37
  **kwargs,
31
38
  ):
32
39
  """
@@ -48,6 +55,9 @@ def run_model_bpa(
48
55
  If True, returns a pandas dataframe instead of the visualization.
49
56
  extended : bool, default=False
50
57
  If True, runs the set_vertipaq_annotations function to collect Vertipaq Analyzer statistics to be used in the analysis of the semantic model.
58
+ language : str, default=None
59
+ Specifying a language code (i.e. 'it-IT' for Italian) will auto-translate the Category, Rule Name and Description into the specified language.
60
+ Defaults to None which resolves to English.
51
61
 
52
62
  Returns
53
63
  -------
@@ -55,6 +65,8 @@ def run_model_bpa(
55
65
  A pandas dataframe in HTML format showing semantic model objects which violated the best practice analyzer rules.
56
66
  """
57
67
 
68
+ from synapse.ml.services import Translate
69
+
58
70
  if "extend" in kwargs:
59
71
  print(
60
72
  "The 'extend' parameter has been deprecated. Please remove this parameter from the function going forward."
@@ -65,9 +77,66 @@ def run_model_bpa(
65
77
  "ignore",
66
78
  message="This pattern is interpreted as a regular expression, and has match groups.",
67
79
  )
80
+ warnings.filterwarnings(
81
+ "ignore", category=UserWarning, message=".*Arrow optimization.*"
82
+ )
83
+
84
+ language_list = [
85
+ "it-IT",
86
+ "es-ES",
87
+ "he-IL",
88
+ "pt-PT",
89
+ "zh-CN",
90
+ "fr-FR",
91
+ "da-DK",
92
+ "cs-CZ",
93
+ "de-DE",
94
+ "el-GR",
95
+ "fa-IR",
96
+ "ga-IE",
97
+ "hi-IN",
98
+ "hu-HU",
99
+ "is-IS",
100
+ "ja-JP",
101
+ "nl-NL",
102
+ "pl-PL",
103
+ "pt-BR",
104
+ "ru-RU",
105
+ "te-IN",
106
+ "ta-IN",
107
+ "th-TH",
108
+ "zu-ZA",
109
+ "am-ET",
110
+ "ar-AE",
111
+ ]
112
+
113
+ # Map languages to the closest language (first 2 letters matching)
114
+ def map_language(language, language_list):
115
+
116
+ mapped = False
117
+
118
+ if language in language_list:
119
+ mapped is True
120
+ return language
121
+
122
+ language_prefix = language[:2]
123
+ for lang_code in language_list:
124
+ if lang_code.startswith(language_prefix):
125
+ mapped is True
126
+ return lang_code
127
+ if not mapped:
128
+ return language
129
+
130
+ if language is not None:
131
+ language = map_language(language, language_list)
68
132
 
69
133
  workspace = fabric.resolve_workspace_name(workspace)
70
134
 
135
+ if language is not None and language not in language_list:
136
+ print(
137
+ f"{icons.yellow_dot} The '{language}' language code is not in our predefined language list. Please file an issue and let us know which language code you are using: https://github.com/microsoft/semantic-link-labs/issues/new?assignees=&labels=&projects=&template=bug_report.md&title=."
138
+ )
139
+
71
140
  if extended:
72
141
  with connect_semantic_model(
73
142
  dataset=dataset, workspace=workspace, readonly=False
@@ -80,14 +149,104 @@ def run_model_bpa(
80
149
 
81
150
  dep = get_model_calc_dependencies(dataset=dataset, workspace=workspace)
82
151
 
152
+ def translate_using_po(rule_file):
153
+ current_dir = os.path.dirname(os.path.abspath(__file__))
154
+ translation_file = (
155
+ f"{current_dir}/_bpa_translation/_translations_{language}.po"
156
+ )
157
+ for c in ["Category", "Description", "Rule Name"]:
158
+ po = polib.pofile(translation_file)
159
+ for entry in po:
160
+ if entry.tcomment == c.lower().replace(" ", "_"):
161
+ rule_file.loc[rule_file["Rule Name"] == entry.msgid, c] = (
162
+ entry.msgstr
163
+ )
164
+
165
+ def translate_using_spark(rule_file):
166
+ rules_temp = rule_file.copy()
167
+ rules_temp = rules_temp.drop(["Expression", "URL", "Severity"], axis=1)
168
+
169
+ schema = StructType(
170
+ [
171
+ StructField("Category", StringType(), True),
172
+ StructField("Scope", StringType(), True),
173
+ StructField("Rule Name", StringType(), True),
174
+ StructField("Description", StringType(), True),
175
+ ]
176
+ )
177
+
178
+ spark = SparkSession.builder.getOrCreate()
179
+ dfRules = spark.createDataFrame(rules_temp, schema)
180
+
181
+ columns = ["Category", "Rule Name", "Description"]
182
+ for clm in columns:
183
+ translate = (
184
+ Translate()
185
+ .setTextCol(clm)
186
+ .setToLanguage(language)
187
+ .setOutputCol("translation")
188
+ .setConcurrency(5)
189
+ )
190
+
191
+ if clm == "Rule Name":
192
+ transDF = (
193
+ translate.transform(dfRules)
194
+ .withColumn(
195
+ "translation", flatten(col("translation.translations"))
196
+ )
197
+ .withColumn("translation", col("translation.text"))
198
+ .select(clm, "translation")
199
+ )
200
+ else:
201
+ transDF = (
202
+ translate.transform(dfRules)
203
+ .withColumn(
204
+ "translation", flatten(col("translation.translations"))
205
+ )
206
+ .withColumn("translation", col("translation.text"))
207
+ .select("Rule Name", clm, "translation")
208
+ )
209
+
210
+ df_panda = transDF.toPandas()
211
+ rule_file = pd.merge(
212
+ rule_file,
213
+ df_panda[["Rule Name", "translation"]],
214
+ on="Rule Name",
215
+ how="left",
216
+ )
217
+
218
+ rule_file = rule_file.rename(
219
+ columns={"translation": f"{clm}Translated"}
220
+ )
221
+ rule_file[f"{clm}Translated"] = rule_file[f"{clm}Translated"].apply(
222
+ lambda x: x[0] if x is not None else None
223
+ )
224
+
225
+ for clm in columns:
226
+ rule_file = rule_file.drop([clm], axis=1)
227
+ rule_file = rule_file.rename(columns={f"{clm}Translated": clm})
228
+
229
+ return rule_file
230
+
231
+ translated = False
232
+
233
+ # Translations
234
+ if language is not None and rules is None and language in language_list:
235
+ rules = model_bpa_rules(
236
+ dataset=dataset, workspace=workspace, dependencies=dep
237
+ )
238
+ translate_using_po(rules)
239
+ translated = True
83
240
  if rules is None:
84
241
  rules = model_bpa_rules(
85
242
  dataset=dataset, workspace=workspace, dependencies=dep
86
243
  )
244
+ if language is not None and not translated:
245
+ rules = translate_using_spark(rules)
87
246
 
88
- rules["Severity"].replace("Warning", "⚠️", inplace=True)
89
- rules["Severity"].replace("Error", "\u274C", inplace=True)
90
- rules["Severity"].replace("Info", "ℹ️", inplace=True)
247
+ rules["Severity"].replace("Warning", icons.warning, inplace=True)
248
+ rules["Severity"].replace("Error", icons.error, inplace=True)
249
+ rules["Severity"].replace("Info", icons.info, inplace=True)
91
250
 
92
251
  pd.set_option("display.max_colwidth", 1000)
93
252
 
@@ -191,8 +350,7 @@ def run_model_bpa(
191
350
  ]
192
351
 
193
352
  if export:
194
- lakeAttach = lakehouse_attached()
195
- if lakeAttach is False:
353
+ if not lakehouse_attached():
196
354
  raise ValueError(
197
355
  f"{icons.red_dot} In order to save the Best Practice Analyzer results, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
198
356
  )
@@ -201,16 +359,15 @@ def run_model_bpa(
201
359
  delta_table_name = "modelbparesults"
202
360
 
203
361
  lakehouse_id = fabric.get_lakehouse_id()
362
+ lake_workspace = fabric.get_workspace_id()
204
363
  lakehouse = resolve_lakehouse_name(
205
- lakehouse_id=lakehouse_id, workspace=workspace
364
+ lakehouse_id=lakehouse_id, workspace=lake_workspace
206
365
  )
207
366
 
208
- lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
367
+ lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=lake_workspace)
209
368
  lakeT_filt = lakeT[lakeT["Table Name"] == delta_table_name]
210
369
 
211
- dfExport["Severity"].replace("⚠️", "Warning", inplace=True)
212
- dfExport["Severity"].replace("\u274C", "Error", inplace=True)
213
- dfExport["Severity"].replace("ℹ️", "Info", inplace=True)
370
+ dfExport["Severity"].replace(icons.severity_mapping, inplace=True)
214
371
 
215
372
  spark = SparkSession.builder.getOrCreate()
216
373
  query = f"SELECT MAX(RunId) FROM {lakehouse}.{delta_table_name}"
@@ -223,23 +380,42 @@ def run_model_bpa(
223
380
  runId = maxRunId + 1
224
381
 
225
382
  now = datetime.datetime.now()
383
+ dfD = fabric.list_datasets(workspace=workspace, mode="rest")
384
+ dfD_filt = dfD[dfD["Dataset Name"] == dataset]
385
+ configured_by = dfD_filt["Configured By"].iloc[0]
386
+ capacity_id, capacity_name = resolve_workspace_capacity(workspace=workspace)
387
+ dfExport["Capacity Name"] = capacity_name
388
+ dfExport["Capacity Id"] = capacity_id
226
389
  dfExport["Workspace Name"] = workspace
390
+ dfExport["Workspace Id"] = fabric.resolve_workspace_id(workspace)
227
391
  dfExport["Dataset Name"] = dataset
392
+ dfExport["Dataset Id"] = resolve_dataset_id(dataset, workspace)
393
+ dfExport["Configured By"] = configured_by
228
394
  dfExport["Timestamp"] = now
229
395
  dfExport["RunId"] = runId
396
+ dfExport["Configured By"] = configured_by
230
397
 
231
398
  dfExport["RunId"] = dfExport["RunId"].astype("int")
232
399
 
233
- colName = "Workspace Name"
400
+ colName = "Capacity Name"
234
401
  dfExport.insert(0, colName, dfExport.pop(colName))
235
- colName = "Dataset Name"
402
+ colName = "Capacity Id"
236
403
  dfExport.insert(1, colName, dfExport.pop(colName))
404
+ colName = "Workspace Name"
405
+ dfExport.insert(2, colName, dfExport.pop(colName))
406
+ colName = "Workspace Id"
407
+ dfExport.insert(3, colName, dfExport.pop(colName))
408
+ colName = "Dataset Name"
409
+ dfExport.insert(4, colName, dfExport.pop(colName))
410
+ colName = "Configured By"
411
+ dfExport.insert(5, colName, dfExport.pop(colName))
237
412
 
238
413
  dfExport.columns = dfExport.columns.str.replace(" ", "_")
239
- spark_df = spark.createDataFrame(dfExport)
240
- spark_df.write.mode("append").format("delta").saveAsTable(delta_table_name)
241
- print(
242
- f"{icons.green_dot} Model Best Practice Analyzer results for the '{dataset}' semantic model have been appended to the '{delta_table_name}' delta table."
414
+ save_as_delta_table(
415
+ dataframe=dfExport,
416
+ delta_table_name=delta_table_name,
417
+ write_mode="append",
418
+ merge_schema=True,
243
419
  )
244
420
 
245
421
  if return_dataframe:
@@ -0,0 +1,363 @@
1
+ import sempy.fabric as fabric
2
+ import pandas as pd
3
+ import datetime
4
+ from pyspark.sql import SparkSession
5
+ from sempy_labs._helper_functions import (
6
+ resolve_lakehouse_name,
7
+ save_as_delta_table,
8
+ resolve_workspace_capacity,
9
+ retry,
10
+ )
11
+ from sempy_labs.lakehouse import get_lakehouse_tables, lakehouse_attached
12
+ from sempy_labs._model_bpa import run_model_bpa
13
+ from typing import Optional, List
14
+ from sempy._utils._log import log
15
+ import sempy_labs._icons as icons
16
+
17
+
18
+ @log
19
+ def run_model_bpa_bulk(
20
+ rules: Optional[pd.DataFrame] = None,
21
+ extended: Optional[bool] = False,
22
+ language: Optional[str] = None,
23
+ workspace: Optional[str | List[str]] = None,
24
+ ):
25
+ """
26
+ Runs the semantic model Best Practice Analyzer across all semantic models in a workspace (or all accessible workspaces).
27
+ Saves (appends) the results to the 'modelbparesults' delta table in the lakehouse attached to the notebook.
28
+ Default semantic models are skipped in this analysis.
29
+
30
+ Parameters
31
+ ----------
32
+ dataset : str
33
+ Name of the semantic model.
34
+ rules : pandas.DataFrame, default=None
35
+ A pandas dataframe containing rules to be evaluated. Based on the format of the dataframe produced by the model_bpa_rules function.
36
+ extended : bool, default=False
37
+ If True, runs the set_vertipaq_annotations function to collect Vertipaq Analyzer statistics to be used in the analysis of the semantic model.
38
+ language : str, default=None
39
+ The language (code) in which the rules will appear. For example, specifying 'it-IT' will show the Rule Name, Category and Description in Italian.
40
+ Defaults to None which resolves to English.
41
+ workspace : str | List[str], default=None
42
+ The workspace or list of workspaces to scan.
43
+ Defaults to None which scans all accessible workspaces.
44
+
45
+ Returns
46
+ -------
47
+ """
48
+
49
+ import pyspark.sql.functions as F
50
+
51
+ if not lakehouse_attached():
52
+ raise ValueError(
53
+ "No lakehouse is attached to this notebook. Must attach a lakehouse to the notebook."
54
+ )
55
+
56
+ cols = [
57
+ "Capacity Name",
58
+ "Capacity Id",
59
+ "Workspace Name",
60
+ "Workspace Id",
61
+ "Dataset Name",
62
+ "Dataset Id",
63
+ "Configured By",
64
+ "Rule Name",
65
+ "Category",
66
+ "Severity",
67
+ "Object Type",
68
+ "Object Name",
69
+ "Description",
70
+ "URL",
71
+ "RunId",
72
+ "Timestamp",
73
+ ]
74
+ now = datetime.datetime.now()
75
+ output_table = "modelbparesults"
76
+ spark = SparkSession.builder.getOrCreate()
77
+ lakehouse_workspace = fabric.resolve_workspace_name()
78
+ lakehouse_id = fabric.get_lakehouse_id()
79
+ lakehouse = resolve_lakehouse_name(
80
+ lakehouse_id=lakehouse_id, workspace=lakehouse_workspace
81
+ )
82
+ lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=lakehouse_workspace)
83
+ lakeT_filt = lakeT[lakeT["Table Name"] == output_table]
84
+ # query = f"SELECT MAX(RunId) FROM {lakehouse}.{output_table}"
85
+ if len(lakeT_filt) == 0:
86
+ runId = 1
87
+ else:
88
+ dfSpark = spark.table(f"`{lakehouse_id}`.{output_table}").select(F.max("RunId"))
89
+ maxRunId = dfSpark.collect()[0][0]
90
+ runId = maxRunId + 1
91
+
92
+ if isinstance(workspace, str):
93
+ workspace = [workspace]
94
+
95
+ dfW = fabric.list_workspaces()
96
+ if workspace is None:
97
+ dfW_filt = dfW.copy()
98
+ else:
99
+ dfW_filt = dfW[dfW["Name"].isin(workspace)]
100
+
101
+ for i, r in dfW_filt.iterrows():
102
+ wksp = r["Name"]
103
+ wksp_id = r["Id"]
104
+ capacity_id, capacity_name = resolve_workspace_capacity(workspace=wksp)
105
+ df = pd.DataFrame(columns=cols)
106
+ dfD = fabric.list_datasets(workspace=wksp, mode="rest")
107
+
108
+ # Exclude default semantic models
109
+ if len(dfD) > 0:
110
+ dfI = fabric.list_items(workspace=wksp)
111
+ filtered_df = dfI.groupby("Display Name").filter(
112
+ lambda x: set(["Warehouse", "SemanticModel"]).issubset(set(x["Type"]))
113
+ or set(["Lakehouse", "SemanticModel"]).issubset(set(x["Type"]))
114
+ )
115
+ default_semantic_models = filtered_df["Display Name"].unique().tolist()
116
+ # Skip ModelBPA :)
117
+ skip_models = default_semantic_models + [icons.model_bpa_name]
118
+ dfD_filt = dfD[~dfD["Dataset Name"].isin(skip_models)]
119
+
120
+ if len(dfD_filt) > 0:
121
+ for i2, r2 in dfD_filt.iterrows():
122
+ dataset_name = r2["Dataset Name"]
123
+ config_by = r2["Configured By"]
124
+ dataset_id = r2["Dataset Id"]
125
+ print(
126
+ f"{icons.in_progress} Collecting Model BPA stats for the '{dataset_name}' semantic model within the '{wksp}' workspace."
127
+ )
128
+ try:
129
+ bpa_df = run_model_bpa(
130
+ dataset=dataset_name,
131
+ workspace=wksp,
132
+ language=language,
133
+ return_dataframe=True,
134
+ rules=rules,
135
+ extended=extended,
136
+ )
137
+ bpa_df["Capacity Id"] = capacity_id
138
+ bpa_df["Capacity Name"] = capacity_name
139
+ bpa_df["Workspace Name"] = wksp
140
+ bpa_df["Workspace Id"] = wksp_id
141
+ bpa_df["Dataset Name"] = dataset_name
142
+ bpa_df["Dataset Id"] = dataset_id
143
+ bpa_df["Configured By"] = config_by
144
+ bpa_df["Timestamp"] = now
145
+ bpa_df["RunId"] = runId
146
+ bpa_df = bpa_df[cols]
147
+
148
+ bpa_df["RunId"] = bpa_df["RunId"].astype("int")
149
+
150
+ df = pd.concat([df, bpa_df], ignore_index=True)
151
+ print(
152
+ f"{icons.green_dot} Collected Model BPA stats for the '{dataset_name}' semantic model within the '{wksp}' workspace."
153
+ )
154
+ except Exception as e:
155
+ print(
156
+ f"{icons.red_dot} Model BPA failed for the '{dataset_name}' semantic model within the '{wksp}' workspace."
157
+ )
158
+ print(e)
159
+
160
+ df["Severity"].replace(icons.severity_mapping, inplace=True)
161
+
162
+ # Append save results individually for each workspace (so as not to create a giant dataframe)
163
+ print(
164
+ f"{icons.in_progress} Saving the Model BPA results of the '{wksp}' workspace to the '{output_table}' within the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace..."
165
+ )
166
+ save_as_delta_table(
167
+ dataframe=df,
168
+ delta_table_name=output_table,
169
+ write_mode="append",
170
+ merge_schema=True,
171
+ )
172
+ print(
173
+ f"{icons.green_dot} Saved BPA results to the '{output_table}' delta table."
174
+ )
175
+
176
+ print(f"{icons.green_dot} Bulk BPA scan complete.")
177
+
178
+
179
+ @log
180
+ def create_model_bpa_semantic_model(
181
+ dataset: Optional[str] = icons.model_bpa_name,
182
+ lakehouse: Optional[str] = None,
183
+ lakehouse_workspace: Optional[str] = None,
184
+ ):
185
+ """
186
+ Dynamically generates a Direct Lake semantic model based on the 'modelbparesults' delta table which contains the Best Practice Analyzer results.
187
+ This semantic model used in combination with the corresponding Best Practice Analyzer report can be used to analyze multiple semantic models
188
+ on multiple workspaces at once (and over time).
189
+
190
+ The semantic model is always created within the same workspace as the lakehouse.
191
+
192
+ Parameters
193
+ ----------
194
+ dataset : str, default='ModelBPA'
195
+ Name of the semantic model to be created.
196
+ lakehouse : str, default=None
197
+ Name of the Fabric lakehouse which contains the 'modelbparesults' delta table.
198
+ Defaults to None which resolves to the default lakehouse attached to the notebook.
199
+ lakehouse_workspace : str, default=None
200
+ The workspace in which the lakehouse resides.
201
+ Defaults to None which resolves to the workspace of the attached lakehouse
202
+ or if no lakehouse attached, resolves to the workspace of the notebook.
203
+
204
+ Returns
205
+ -------
206
+ """
207
+
208
+ from sempy_labs._helper_functions import resolve_lakehouse_name
209
+ from sempy_labs.directlake import (
210
+ get_shared_expression,
211
+ add_table_to_direct_lake_semantic_model,
212
+ )
213
+ from sempy_labs import create_blank_semantic_model, refresh_semantic_model
214
+ from sempy_labs.tom import connect_semantic_model
215
+
216
+ lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace)
217
+
218
+ if lakehouse is None:
219
+ lakehouse_id = fabric.get_lakehouse_id()
220
+ lakehouse = resolve_lakehouse_name(
221
+ lakehouse_id=lakehouse_id, workspace=lakehouse_workspace
222
+ )
223
+
224
+ # Generate the shared expression based on the lakehouse and lakehouse workspace
225
+ expr = get_shared_expression(lakehouse=lakehouse, workspace=lakehouse_workspace)
226
+
227
+ # Create blank model
228
+ create_blank_semantic_model(dataset=dataset, workspace=lakehouse_workspace)
229
+
230
+ @retry(
231
+ sleep_time=1,
232
+ timeout_error_message=f"{icons.red_dot} Function timed out after 1 minute",
233
+ )
234
+ def dyn_connect():
235
+ with connect_semantic_model(
236
+ dataset=dataset, readonly=True, workspace=lakehouse_workspace
237
+ ) as tom:
238
+
239
+ tom.model
240
+
241
+ dyn_connect()
242
+
243
+ table_exists = False
244
+ with connect_semantic_model(
245
+ dataset=dataset, readonly=False, workspace=lakehouse_workspace
246
+ ) as tom:
247
+ t_name = "BPAResults"
248
+ t_name_full = f"'{t_name}'"
249
+ # Create the shared expression
250
+ if not any(e.Name == "DatabaseQuery" for e in tom.model.Expressions):
251
+ tom.add_expression(name="DatabaseQuery", expression=expr)
252
+ # Add the table to the model
253
+ if any(t.Name == t_name for t in tom.model.Tables):
254
+ table_exists = True
255
+ if not table_exists:
256
+ add_table_to_direct_lake_semantic_model(
257
+ dataset=dataset,
258
+ table_name=t_name,
259
+ lakehouse_table_name="modelbparesults",
260
+ workspace=lakehouse_workspace,
261
+ refresh=False,
262
+ )
263
+ with connect_semantic_model(
264
+ dataset=dataset, readonly=False, workspace=lakehouse_workspace
265
+ ) as tom:
266
+ # Fix column names
267
+ for c in tom.all_columns():
268
+ if c.Name == "Dataset_Name":
269
+ c.Name = "Model"
270
+ elif c.Name == "Dataset_Id":
271
+ c.Name = "Model Id"
272
+ elif c.Name == "Workspace_Name":
273
+ c.Name = "Workspace"
274
+ elif c.Name == "Capacity_Name":
275
+ c.Name = "Capacity"
276
+ elif c.Name == "Configured_By":
277
+ c.Name = "Model Owner"
278
+ elif c.Name == "URL":
279
+ c.DataCategory = "WebURL"
280
+ elif c.Name == "RunId":
281
+ tom.set_summarize_by(
282
+ table_name=c.Parent.Name, column_name=c.Name, value="None"
283
+ )
284
+ c.Name = c.Name.replace("_", " ")
285
+
286
+ # Implement pattern for base measures
287
+ def get_expr(table_name, calculation):
288
+ return f"IF(HASONEFILTER({table_name}[RunId]),{calculation},CALCULATE({calculation},FILTER(VALUES({table_name}[RunId]),{table_name}[RunId] = [Max Run Id])))"
289
+
290
+ # Add measures
291
+ int_format = "#,0"
292
+ m_name = "Max Run Id"
293
+ if not any(m.Name == m_name for m in tom.all_measures()):
294
+ tom.add_measure(
295
+ table_name=t_name,
296
+ measure_name=m_name,
297
+ expression=f"CALCULATE(MAX({t_name_full}[RunId]),{t_name_full}[RunId])",
298
+ format_string=int_format,
299
+ )
300
+ m_name = "Capacities"
301
+ if not any(m.Name == m_name for m in tom.all_measures()):
302
+ calc = f"COUNTROWS(DISTINCT({t_name_full}[Capacity]))"
303
+ tom.add_measure(
304
+ table_name=t_name,
305
+ measure_name=m_name,
306
+ expression=get_expr(t_name_full, calc),
307
+ format_string=int_format,
308
+ )
309
+ m_name = "Models"
310
+ if not any(m.Name == m_name for m in tom.all_measures()):
311
+ calc = f"COUNTROWS(DISTINCT({t_name_full}[Model]))"
312
+ tom.add_measure(
313
+ table_name=t_name,
314
+ measure_name=m_name,
315
+ expression=get_expr(t_name_full, calc),
316
+ format_string=int_format,
317
+ )
318
+ m_name = "Workspaces"
319
+ if not any(m.Name == m_name for m in tom.all_measures()):
320
+ calc = f"COUNTROWS(DISTINCT({t_name_full}[Workspace]))"
321
+ tom.add_measure(
322
+ table_name=t_name,
323
+ measure_name=m_name,
324
+ expression=get_expr(t_name_full, calc),
325
+ format_string=int_format,
326
+ )
327
+ m_name = "Violations"
328
+ if not any(m.Name == m_name for m in tom.all_measures()):
329
+ calc = f"COUNTROWS({t_name_full})"
330
+ tom.add_measure(
331
+ table_name=t_name,
332
+ measure_name=m_name,
333
+ expression=get_expr(t_name_full, calc),
334
+ format_string=int_format,
335
+ )
336
+ m_name = "Error Violations"
337
+ if not any(m.Name == m_name for m in tom.all_measures()):
338
+ tom.add_measure(
339
+ table_name=t_name,
340
+ measure_name=m_name,
341
+ expression=f'CALCULATE([Violations],{t_name_full}[Severity]="Error")',
342
+ format_string=int_format,
343
+ )
344
+ m_name = "Rules Violated"
345
+ if not any(m.Name == m_name for m in tom.all_measures()):
346
+ calc = f"COUNTROWS(DISTINCT({t_name_full}[Rule Name]))"
347
+ tom.add_measure(
348
+ table_name=t_name,
349
+ measure_name=m_name,
350
+ expression=get_expr(t_name_full, calc),
351
+ format_string=int_format,
352
+ )
353
+ m_name = "Rule Severity"
354
+ if not any(m.Name == m_name for m in tom.all_measures()):
355
+ tom.add_measure(
356
+ table_name=t_name,
357
+ measure_name=m_name,
358
+ expression=f"IF(ISFILTERED({t_name_full}[Rule Name]),IF( HASONEVALUE({t_name_full}[Rule Name]),MIN({t_name_full}[Severity])))",
359
+ )
360
+ # tom.add_measure(table_name=t_name, measure_name='Rules Followed', expression="[Rules] - [Rules Violated]")
361
+
362
+ # Refresh the model
363
+ refresh_semantic_model(dataset=dataset, workspace=lakehouse_workspace)
@@ -8,9 +8,9 @@ from typing import Optional
8
8
 
9
9
  def model_bpa_rules(
10
10
  dataset: str,
11
- workspace: Optional[str | None] = None,
12
- dependencies: Optional[pd.DataFrame | None] = None,
13
- ):
11
+ workspace: Optional[str] = None,
12
+ dependencies: Optional[pd.DataFrame] = None,
13
+ ) -> pd.DataFrame:
14
14
  """
15
15
  Shows the default rules for the semantic model BPA used by the run_model_bpa function.
16
16
 
@@ -413,7 +413,7 @@ def model_bpa_rules(
413
413
  re.search(
414
414
  r"USERELATIONSHIP\s*\(\s*.+?(?=])\]\s*,\s*'*"
415
415
  + obj.Name
416
- + "'*\[",
416
+ + r"'*\[",
417
417
  m.Expression,
418
418
  flags=re.IGNORECASE,
419
419
  )