semantic-link-labs 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (113) hide show
  1. semantic_link_labs-0.7.0.dist-info/METADATA +148 -0
  2. semantic_link_labs-0.7.0.dist-info/RECORD +111 -0
  3. {semantic_link_labs-0.5.0.dist-info → semantic_link_labs-0.7.0.dist-info}/WHEEL +1 -1
  4. sempy_labs/__init__.py +45 -15
  5. sempy_labs/_ai.py +42 -85
  6. sempy_labs/_bpa_translation/_translations_am-ET.po +828 -0
  7. sempy_labs/_bpa_translation/_translations_ar-AE.po +860 -0
  8. sempy_labs/_bpa_translation/_translations_cs-CZ.po +894 -0
  9. sempy_labs/_bpa_translation/_translations_da-DK.po +894 -0
  10. sempy_labs/_bpa_translation/_translations_de-DE.po +933 -0
  11. sempy_labs/_bpa_translation/_translations_el-GR.po +936 -0
  12. sempy_labs/_bpa_translation/_translations_es-ES.po +915 -0
  13. sempy_labs/_bpa_translation/_translations_fa-IR.po +883 -0
  14. sempy_labs/_bpa_translation/_translations_fr-FR.po +938 -0
  15. sempy_labs/_bpa_translation/_translations_ga-IE.po +912 -0
  16. sempy_labs/_bpa_translation/_translations_he-IL.po +855 -0
  17. sempy_labs/_bpa_translation/_translations_hi-IN.po +892 -0
  18. sempy_labs/_bpa_translation/_translations_hu-HU.po +910 -0
  19. sempy_labs/_bpa_translation/_translations_is-IS.po +887 -0
  20. sempy_labs/_bpa_translation/_translations_it-IT.po +931 -0
  21. sempy_labs/_bpa_translation/_translations_ja-JP.po +805 -0
  22. sempy_labs/_bpa_translation/_translations_nl-NL.po +924 -0
  23. sempy_labs/_bpa_translation/_translations_pl-PL.po +913 -0
  24. sempy_labs/_bpa_translation/_translations_pt-BR.po +909 -0
  25. sempy_labs/_bpa_translation/_translations_pt-PT.po +904 -0
  26. sempy_labs/_bpa_translation/_translations_ru-RU.po +909 -0
  27. sempy_labs/_bpa_translation/_translations_ta-IN.po +922 -0
  28. sempy_labs/_bpa_translation/_translations_te-IN.po +896 -0
  29. sempy_labs/_bpa_translation/_translations_th-TH.po +873 -0
  30. sempy_labs/_bpa_translation/_translations_zh-CN.po +767 -0
  31. sempy_labs/_bpa_translation/_translations_zu-ZA.po +916 -0
  32. sempy_labs/_clear_cache.py +12 -8
  33. sempy_labs/_connections.py +77 -70
  34. sempy_labs/_dax.py +7 -9
  35. sempy_labs/_generate_semantic_model.py +75 -90
  36. sempy_labs/_helper_functions.py +371 -20
  37. sempy_labs/_icons.py +23 -0
  38. sempy_labs/_list_functions.py +855 -427
  39. sempy_labs/_model_auto_build.py +4 -3
  40. sempy_labs/_model_bpa.py +307 -1118
  41. sempy_labs/_model_bpa_bulk.py +363 -0
  42. sempy_labs/_model_bpa_rules.py +831 -0
  43. sempy_labs/_model_dependencies.py +20 -16
  44. sempy_labs/_one_lake_integration.py +18 -12
  45. sempy_labs/_query_scale_out.py +116 -129
  46. sempy_labs/_refresh_semantic_model.py +23 -10
  47. sempy_labs/_translations.py +367 -288
  48. sempy_labs/_vertipaq.py +152 -123
  49. sempy_labs/directlake/__init__.py +7 -1
  50. sempy_labs/directlake/_directlake_schema_compare.py +33 -30
  51. sempy_labs/directlake/_directlake_schema_sync.py +60 -77
  52. sempy_labs/directlake/_dl_helper.py +233 -0
  53. sempy_labs/directlake/_get_directlake_lakehouse.py +7 -8
  54. sempy_labs/directlake/_get_shared_expression.py +5 -3
  55. sempy_labs/directlake/_guardrails.py +20 -16
  56. sempy_labs/directlake/_list_directlake_model_calc_tables.py +17 -10
  57. sempy_labs/directlake/_show_unsupported_directlake_objects.py +3 -2
  58. sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +10 -5
  59. sempy_labs/directlake/_update_directlake_partition_entity.py +169 -22
  60. sempy_labs/directlake/_warm_cache.py +7 -4
  61. sempy_labs/lakehouse/_get_lakehouse_columns.py +1 -1
  62. sempy_labs/lakehouse/_get_lakehouse_tables.py +65 -71
  63. sempy_labs/lakehouse/_lakehouse.py +5 -3
  64. sempy_labs/lakehouse/_shortcuts.py +20 -13
  65. sempy_labs/migration/__init__.py +1 -1
  66. sempy_labs/migration/_create_pqt_file.py +184 -186
  67. sempy_labs/migration/_migrate_calctables_to_lakehouse.py +240 -269
  68. sempy_labs/migration/_migrate_calctables_to_semantic_model.py +78 -77
  69. sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +444 -425
  70. sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +96 -102
  71. sempy_labs/migration/_migration_validation.py +2 -2
  72. sempy_labs/migration/_refresh_calc_tables.py +94 -100
  73. sempy_labs/report/_BPAReportTemplate.json +232 -0
  74. sempy_labs/report/__init__.py +6 -2
  75. sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +9 -0
  76. sempy_labs/report/_bpareporttemplate/.platform +11 -0
  77. sempy_labs/report/_bpareporttemplate/StaticResources/SharedResources/BaseThemes/CY24SU06.json +710 -0
  78. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/page.json +11 -0
  79. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/1b08bce3bebabb0a27a8/visual.json +191 -0
  80. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/2f22ddb70c301693c165/visual.json +438 -0
  81. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/3b1182230aa6c600b43a/visual.json +127 -0
  82. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/58577ba6380c69891500/visual.json +576 -0
  83. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/a2a8fa5028b3b776c96c/visual.json +207 -0
  84. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/adfd47ef30652707b987/visual.json +506 -0
  85. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/b6a80ee459e716e170b1/visual.json +127 -0
  86. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/ce3130a721c020cc3d81/visual.json +513 -0
  87. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/page.json +8 -0
  88. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/visuals/66e60dfb526437cd78d1/visual.json +112 -0
  89. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/page.json +11 -0
  90. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/07deb8bce824e1be37d7/visual.json +513 -0
  91. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0b1c68838818b32ad03b/visual.json +352 -0
  92. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0c171de9d2683d10b930/visual.json +37 -0
  93. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0efa01be0510e40a645e/visual.json +542 -0
  94. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/6bf2f0eb830ab53cc668/visual.json +221 -0
  95. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/88d8141cb8500b60030c/visual.json +127 -0
  96. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/a753273590beed656a03/visual.json +576 -0
  97. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/b8fdc82cddd61ac447bc/visual.json +127 -0
  98. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/page.json +9 -0
  99. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/visuals/ce8532a7e25020271077/visual.json +38 -0
  100. sempy_labs/report/_bpareporttemplate/definition/pages/pages.json +10 -0
  101. sempy_labs/report/_bpareporttemplate/definition/report.json +176 -0
  102. sempy_labs/report/_bpareporttemplate/definition/version.json +4 -0
  103. sempy_labs/report/_bpareporttemplate/definition.pbir +14 -0
  104. sempy_labs/report/_generate_report.py +260 -139
  105. sempy_labs/report/_report_functions.py +90 -59
  106. sempy_labs/report/_report_rebind.py +40 -34
  107. sempy_labs/tom/__init__.py +1 -4
  108. sempy_labs/tom/_model.py +601 -181
  109. semantic_link_labs-0.5.0.dist-info/METADATA +0 -22
  110. semantic_link_labs-0.5.0.dist-info/RECORD +0 -53
  111. sempy_labs/directlake/_fallback.py +0 -58
  112. {semantic_link_labs-0.5.0.dist-info → semantic_link_labs-0.7.0.dist-info}/LICENSE +0 -0
  113. {semantic_link_labs-0.5.0.dist-info → semantic_link_labs-0.7.0.dist-info}/top_level.txt +0 -0
sempy_labs/_model_bpa.py CHANGED
@@ -1,712 +1,39 @@
1
- import sempy
2
1
  import sempy.fabric as fabric
3
2
  import pandas as pd
4
- import re, unicodedata, warnings, datetime
5
- import numpy as np
3
+ import warnings
4
+ import datetime
6
5
  from IPython.display import display, HTML
7
6
  from pyspark.sql import SparkSession
8
- from sempy_labs._model_dependencies import get_measure_dependencies
9
- from sempy_labs._helper_functions import format_dax_object_name, resolve_lakehouse_name
10
- from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
11
- from sempy_labs.lakehouse._lakehouse import lakehouse_attached
12
- from typing import List, Optional, Union
7
+ from sempy_labs._model_dependencies import get_model_calc_dependencies
8
+ from sempy_labs._helper_functions import (
9
+ format_dax_object_name,
10
+ resolve_lakehouse_name,
11
+ create_relationship_name,
12
+ save_as_delta_table,
13
+ resolve_workspace_capacity,
14
+ resolve_dataset_id,
15
+ )
16
+ from sempy_labs.lakehouse import get_lakehouse_tables, lakehouse_attached
17
+ from sempy_labs.tom import connect_semantic_model
18
+ from sempy_labs._model_bpa_rules import model_bpa_rules
19
+ from typing import Optional
13
20
  from sempy._utils._log import log
14
21
  import sempy_labs._icons as icons
15
-
16
- def model_bpa_rules():
17
- """
18
- Shows the default rules for the semantic model BPA used by the run_model_bpa function.
19
-
20
- Parameters
21
- ----------
22
-
23
-
24
- Returns
25
- -------
26
- pandas.DataFrame
27
- A pandas dataframe containing the default rules for the run_model_bpa function.
28
- """
29
-
30
- df_rules = pd.DataFrame(
31
- [
32
- (
33
- "Performance",
34
- "Column",
35
- "Warning",
36
- "Do not use floating point data types",
37
- lambda df: df["Data Type"] == "Double",
38
- 'The "Double" floating point data type should be avoided, as it can result in unpredictable roundoff errors and decreased performance in certain scenarios. Use "Int64" or "Decimal" where appropriate (but note that "Decimal" is limited to 4 digits after the decimal sign).',
39
- ),
40
- (
41
- "Performance",
42
- "Column",
43
- "Warning",
44
- "Avoid using calculated columns",
45
- lambda df: df["Type"] == "Calculated",
46
- "Calculated columns do not compress as well as data columns so they take up more memory. They also slow down processing times for both the table as well as process recalc. Offload calculated column logic to your data warehouse and turn these calculated columns into data columns.",
47
- "https://www.elegantbi.com/post/top10bestpractices",
48
- ),
49
- (
50
- "Performance",
51
- "Relationship",
52
- "Warning",
53
- "Check if bi-directional and many-to-many relationships are valid",
54
- lambda df: (df["Multiplicity"] == "m:m")
55
- | (df["Cross Filtering Behavior"] == "BothDirections"),
56
- "Bi-directional and many-to-many relationships may cause performance degradation or even have unintended consequences. Make sure to check these specific relationships to ensure they are working as designed and are actually necessary.",
57
- "https://www.sqlbi.com/articles/bidirectional-relationships-and-ambiguity-in-dax",
58
- ),
59
- (
60
- "Performance",
61
- "Row Level Security",
62
- "Info",
63
- "Check if dynamic row level security (RLS) is necessary",
64
- lambda df: df["Is Dynamic"],
65
- "Usage of dynamic row level security (RLS) can add memory and performance overhead. Please research the pros/cons of using it.",
66
- "https://docs.microsoft.com/power-bi/admin/service-admin-rls",
67
- ),
68
- (
69
- "Performance",
70
- "Table",
71
- "Warning",
72
- "Avoid using many-to-many relationships on tables used for dynamic row level security",
73
- lambda df: (df["Used in M2M Relationship"] == True)
74
- & (df["Used in Dynamic RLS"] == True),
75
- "Using many-to-many relationships on tables which use dynamic row level security can cause serious query performance degradation. This pattern's performance problems compound when snowflaking multiple many-to-many relationships against a table which contains row level security. Instead, use one of the patterns shown in the article below where a single dimension table relates many-to-one to a security table.",
76
- "https://www.elegantbi.com/post/dynamicrlspatterns",
77
- ),
78
- (
79
- "Performance",
80
- "Relationship",
81
- "Warning",
82
- "Many-to-many relationships should be single-direction",
83
- lambda df: (df["Multiplicity"] == "m:m")
84
- & (df["Cross Filtering Behavior"] == "BothDirections"),
85
- ),
86
- (
87
- "Performance",
88
- "Column",
89
- "Warning",
90
- "Set IsAvailableInMdx to false on non-attribute columns",
91
- lambda df: (df["Is Direct Lake"] == False)
92
- & (df["Is Available in MDX"] == True)
93
- & ((df["Hidden"] == True) | (df["Parent Is Hidden"] == True))
94
- & (df["Used in Sort By"] == False)
95
- & (df["Used in Hierarchy"] == False)
96
- & (df["Sort By Column"] == None),
97
- "To speed up processing time and conserve memory after processing, attribute hierarchies should not be built for columns that are never used for slicing by MDX clients. In other words, all hidden columns that are not used as a Sort By Column or referenced in user hierarchies should have their IsAvailableInMdx property set to false. The IsAvailableInMdx property is not relevant for Direct Lake models.",
98
- "https://blog.crossjoin.co.uk/2018/07/02/isavailableinmdx-ssas-tabular",
99
- ),
100
- # ('Performance', 'Partition', 'Warning', "Set 'Data Coverage Definition' property on the DirectQuery partition of a hybrid table",
101
- # lambda df: (df['Data Coverage Definition Expression'].isnull()) & (df['Mode'] == 'DirectQuery') & (df['Import Partitions'] > 0) & (df['Has Date Table']),
102
- # "Setting the 'Data Coverage Definition' property may lead to better performance because the engine knows when it can only query the import-portion of the table and when it needs to query the DirectQuery portion of the table.",
103
- # "https://learn.microsoft.com/analysis-services/tom/table-partitions?view=asallproducts-allversions",
104
- # ),
105
- (
106
- "Performance",
107
- "Table",
108
- "Warning",
109
- "Set dimensions tables to dual mode instead of import when using DirectQuery on fact tables",
110
- lambda df: (df["Import Partitions"] == 1)
111
- & (df["Model Has DQ"])
112
- & (df["Used in Relationship x:1"]),
113
- "https://learn.microsoft.com/power-bi/transform-model/desktop-storage-mode#propagation-of-the-dual-setting",
114
- ),
115
- (
116
- "Performance",
117
- "Partition",
118
- "Warning",
119
- "Minimize Power Query transformations",
120
- lambda df: (df["Source Type"] == "M")
121
- & (
122
- ('Table.Combine("' in df["Query"])
123
- | ('Table.Join("' in df["Query"])
124
- | ('Table.NestedJoin("' in df["Query"])
125
- | ('Table.AddColumn("' in df["Query"])
126
- | ('Table.Group("' in df["Query"])
127
- | ('Table.Sort("' in df["Query"])
128
- | ('Table.Sort("' in df["Query"])
129
- | ('Table.Pivot("' in df["Query"])
130
- | ('Table.Unpivot("' in df["Query"])
131
- | ('Table.UnpivotOtherColumns("' in df["Query"])
132
- | ('Table.Distinct("' in df["Query"])
133
- | ('[Query=(""SELECT' in df["Query"])
134
- | ("Value.NativeQuery" in df["Query"])
135
- | ("OleDb.Query" in df["Query"])
136
- | ("Odbc.Query" in df["Query"])
137
- ),
138
- "Minimize Power Query transformations in order to improve model processing performance. It is a best practice to offload these transformations to the data warehouse if possible. Also, please check whether query folding is occurring within your model. Please reference the article below for more information on query folding.",
139
- "https://docs.microsoft.com/power-query/power-query-folding",
140
- ),
141
- (
142
- "Performance",
143
- "Table",
144
- "Warning",
145
- "Consider a star-schema instead of a snowflake architecture",
146
- lambda df: (df["Type"] != "Calculation Group")
147
- & df["Used in Relationship Both Sides"],
148
- "Generally speaking, a star-schema is the optimal architecture for tabular models. That being the case, there are valid cases to use a snowflake approach. Please check your model and consider moving to a star-schema architecture.",
149
- "https://docs.microsoft.com/power-bi/guidance/star-schema",
150
- ),
151
- (
152
- "Performance",
153
- "Table",
154
- "Warning",
155
- "Reduce usage of calculated tables",
156
- lambda df: df["Type"] == "Calculated Table",
157
- "Migrate calculated table logic to your data warehouse. Reliance on calculated tables will lead to technical debt and potential misalignments if you have multiple models on your platform.",
158
- ),
159
- (
160
- "Performance",
161
- "Column",
162
- "Warning",
163
- "Reduce usage of calculated columns that use the RELATED function",
164
- lambda df: (df["Type"] == "Calculated")
165
- & (df["Source"].str.contains(r"related\s*\(", case=False)),
166
- "Calculated columns do not compress as well as data columns and may cause longer processing times. As such, calculated columns should be avoided if possible. One scenario where they may be easier to avoid is if they use the RELATED function.",
167
- "https://www.sqlbi.com/articles/storage-differences-between-calculated-columns-and-calculated-tables",
168
- ),
169
- (
170
- "Performance",
171
- "Model",
172
- "Warning",
173
- "Avoid excessive bi-directional or many-to-many relationships",
174
- lambda df: (
175
- df["M2M or BiDi Relationship Count"] / df["Relationship Count"]
176
- )
177
- > 0.3,
178
- "Limit use of b-di and many-to-many relationships. This rule flags the model if more than 30% of relationships are bi-di or many-to-many.",
179
- "https://www.sqlbi.com/articles/bidirectional-relationships-and-ambiguity-in-dax",
180
- ),
181
- (
182
- "Performance",
183
- "Column",
184
- "Warning",
185
- "Avoid bi-directional or many-to-many relationships against high-cardinality columns",
186
- lambda df: df["Used in M2M/BiDi Relationship"]
187
- & df["Column Cardinality"]
188
- > 100000,
189
- "For best performance, it is recommended to avoid using bi-directional relationships against high-cardinality columns",
190
- ),
191
- (
192
- "Performance",
193
- "Table",
194
- "Warning",
195
- "Remove auto-date table",
196
- lambda df: (df["Type"] == "Calculated Table")
197
- & (
198
- (df["Name"].str.startswith("DateTableTemplate_"))
199
- | (df["Name"].str.startswith("LocalDateTable_"))
200
- ),
201
- "Avoid using auto-date tables. Make sure to turn off auto-date table in the settings in Power BI Desktop. This will save memory resources.",
202
- "https://www.youtube.com/watch?v=xu3uDEHtCrg",
203
- ),
204
- (
205
- "Performance",
206
- "Table",
207
- "Warning",
208
- "Date/calendar tables should be marked as a date table",
209
- lambda df: (
210
- (df["Name"].str.contains(r"date", case=False))
211
- | (df["Name"].str.contains(r"calendar", case=False))
212
- )
213
- & (df["Data Category"] != "Time"),
214
- "This rule looks for tables that contain the words 'date' or 'calendar' as they should likely be marked as a date table.",
215
- "https://docs.microsoft.com/power-bi/transform-model/desktop-date-tables",
216
- ),
217
- (
218
- "Performance",
219
- "Table",
220
- "Warning",
221
- "Large tables should be partitioned",
222
- lambda df: (df["Is Direct Lake"] == False)
223
- & (df["Partition Count"] == 1)
224
- & (df["Row Count"] > 25000000),
225
- "Large tables should be partitioned in order to optimize processing. This is not relevant for semantic models in Direct Lake mode as they can only have one partition per table.",
226
- ),
227
- (
228
- "Performance",
229
- "Row Level Security",
230
- "Warning",
231
- "Limit row level security (RLS) logic",
232
- lambda df: df["Filter Expression"].str.contains(
233
- "|".join(["right", "left", "filter", "upper", "lower", "find"]),
234
- case=False,
235
- ),
236
- "Try to simplify the DAX used for row level security. Usage of the functions within this rule can likely be offloaded to the upstream systems (data warehouse).",
237
- ),
238
- (
239
- "Performance",
240
- "Model",
241
- "Warning",
242
- "Model should have a date table",
243
- lambda df: df["Has Date Table"],
244
- "Generally speaking, models should generally have a date table. Models that do not have a date table generally are not taking advantage of features such as time intelligence or may not have a properly structured architecture.",
245
- ),
246
- (
247
- "Performance",
248
- "Measure",
249
- "Warning",
250
- "Measures using time intelligence and model is using Direct Query",
251
- lambda df: df["DQ Date Function Used"],
252
- "At present, time intelligence functions are known to not perform as well when using Direct Query. If you are having performance issues, you may want to try alternative solutions such as adding columns in the fact table that show previous year or previous month data.",
253
- ),
254
- (
255
- "Error Prevention",
256
- "Calculation Item",
257
- "Error",
258
- "Calculation items must have an expression",
259
- lambda df: df["Expression"].str.len() == 0,
260
- "Calculation items must have an expression. Without an expression, they will not show any values.",
261
- ),
262
- (
263
- "Error Prevention",
264
- ["Table", "Column", "Measure", "Hierarchy", "Partition"],
265
- "Error",
266
- "Avoid invalid characters in names",
267
- lambda df: df["Name"].apply(
268
- lambda x: any(
269
- unicodedata.category(char) == "Cc" and not char.isspace()
270
- for char in x
271
- )
272
- ),
273
- "This rule identifies if a name for a given object in your model (i.e. table/column/measure) which contains an invalid character. Invalid characters will cause an error when deploying the model (and failure to deploy). This rule has a fix expression which converts the invalid character into a space, resolving the issue.",
274
- ),
275
- (
276
- "Error Prevention",
277
- ["Table", "Column", "Measure", "Hierarchy"],
278
- "Error",
279
- "Avoid invalid characters in descriptions",
280
- lambda df: df["Description"].apply(
281
- lambda x: any(
282
- unicodedata.category(char) == "Cc" and not char.isspace()
283
- for char in x
284
- )
285
- ),
286
- "This rule identifies if a description for a given object in your model (i.e. table/column/measure) which contains an invalid character. Invalid characters will cause an error when deploying the model (and failure to deploy). This rule has a fix expression which converts the invalid character into a space, resolving the issue.",
287
- ),
288
- (
289
- "Error Prevention",
290
- "Relationship",
291
- "Warning",
292
- "Relationship columns should be of the same data type",
293
- lambda df: df["From Column Data Type"] != df["To Column Data Type"],
294
- "Columns used in a relationship should be of the same data type. Ideally, they will be of integer data type (see the related rule '[Formatting] Relationship columns should be of integer data type'). Having columns within a relationship which are of different data types may lead to various issues.",
295
- ),
296
- (
297
- "Error Prevention",
298
- "Column",
299
- "Error",
300
- "Data columns must have a source column",
301
- lambda df: (df["Type"] == "Data") & (df["Source"].str.len() == 0),
302
- "Data columns must have a source column. A data column without a source column will cause an error when processing the model.",
303
- ),
304
- (
305
- "Error Prevention",
306
- "Column",
307
- "Warning",
308
- "Set IsAvailableInMdx to true on necessary columns",
309
- lambda df: (df["Is Direct Lake"] == False)
310
- & (df["Is Available in MDX"] == False)
311
- & (
312
- (df["Used in Sort By"] == True)
313
- | (df["Used in Hierarchy"] == True)
314
- | (df["Sort By Column"] != None)
315
- ),
316
- "In order to avoid errors, ensure that attribute hierarchies are enabled if a column is used for sorting another column, used in a hierarchy, used in variations, or is sorted by another column. The IsAvailableInMdx property is not relevant for Direct Lake models.",
317
- ),
318
- (
319
- "Error Prevention",
320
- "Table",
321
- "Error",
322
- "Avoid the USERELATIONSHIP function and RLS against the same table",
323
- lambda df: (df["USERELATIONSHIP Used"] == True)
324
- & (df["Used in RLS"] == True),
325
- "The USERELATIONSHIP function may not be used against a table which also leverages row-level security (RLS). This will generate an error when using the particular measure in a visual. This rule will highlight the table which is used in a measure's USERELATIONSHIP function as well as RLS.",
326
- "https://blog.crossjoin.co.uk/2013/05/10/userelationship-and-tabular-row-security",
327
- ),
328
- (
329
- "DAX Expressions",
330
- "Measure",
331
- "Warning",
332
- "Avoid using the IFERROR function",
333
- lambda df: df["Measure Expression"].str.contains(
334
- r"irerror\s*\(", case=False
335
- ),
336
- "Avoid using the IFERROR function as it may cause performance degradation. If you are concerned about a divide-by-zero error, use the DIVIDE function as it naturally resolves such errors as blank (or you can customize what should be shown in case of such an error).",
337
- "https://www.elegantbi.com/post/top10bestpractices",
338
- ),
339
- (
340
- "DAX Expressions",
341
- "Measure",
342
- "Warning",
343
- "Use the TREATAS function instead of INTERSECT for virtual relationships",
344
- lambda df: df["Measure Expression"].str.contains(
345
- r"intersect\s*\(", case=False
346
- ),
347
- "The TREATAS function is more efficient and provides better performance than the INTERSECT function when used in virutal relationships.",
348
- "https://www.sqlbi.com/articles/propagate-filters-using-treatas-in-dax",
349
- ),
350
- (
351
- "DAX Expressions",
352
- "Measure",
353
- "Warning",
354
- "The EVALUATEANDLOG function should not be used in production models",
355
- lambda df: df["Measure Expression"].str.contains(
356
- r"evaluateandlog\s*\(", case=False
357
- ),
358
- "The EVALUATEANDLOG function is meant to be used only in development/test environments and should not be used in production models.",
359
- "https://pbidax.wordpress.com/2022/08/16/introduce-the-dax-evaluateandlog-function",
360
- ),
361
- (
362
- "DAX Expressions",
363
- "Measure",
364
- "Warning",
365
- "Measures should not be direct references of other measures",
366
- lambda df: df["Measure Expression"]
367
- .str.strip()
368
- .isin(df["Measure Object"]),
369
- "This rule identifies measures which are simply a reference to another measure. As an example, consider a model with two measures: [MeasureA] and [MeasureB]. This rule would be triggered for MeasureB if MeasureB's DAX was MeasureB:=[MeasureA]. Such duplicative measures should be removed.",
370
- ),
371
- (
372
- "DAX Expressions",
373
- "Measure",
374
- "Warning",
375
- "No two measures should have the same definition",
376
- lambda df: df["Measure Expression"]
377
- .apply(lambda x: re.sub(r"\s+", "", x))
378
- .duplicated(keep=False),
379
- "Two measures with different names and defined by the same DAX expression should be avoided to reduce redundancy.",
380
- ),
381
- (
382
- "DAX Expressions",
383
- "Measure",
384
- "Warning",
385
- "Avoid addition or subtraction of constant values to results of divisions",
386
- lambda df: df["Measure Expression"].str.contains(
387
- "(?i)DIVIDE\\s*\\((\\s*.*?)\\)\\s*[+-]\\s*1"
388
- or "\\/\\s*.*(?=[-+]\\s*1)",
389
- regex=True,
390
- ),
391
- ),
392
- (
393
- "DAX Expressions",
394
- "Measure",
395
- "Warning",
396
- "Avoid using '1-(x/y)' syntax",
397
- lambda df: df["Measure Expression"].str.contains(
398
- "[0-9]+\\s*[-+]\\s*[\\(]*\\s*(?i)SUM\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*\\[[A-Za-z0-9 _]+\\]\\s*\\)\\s*\\/"
399
- or "[0-9]+\\s*[-+]\\s*(?i)DIVIDE\\s*\\(",
400
- regex=True,
401
- ),
402
- "Instead of using the '1-(x/y)' or '1+(x/y)' syntax to achieve a percentage calculation, use the basic DAX functions (as shown below). Using the improved syntax will generally improve the performance. The '1+/-...' syntax always returns a value whereas the solution without the '1+/-...' does not (as the value may be 'blank'). Therefore the '1+/-...' syntax may return more rows/columns which may result in a slower query speed. Let's clarify with an example: Avoid this: 1 - SUM ( 'Sales'[CostAmount] ) / SUM( 'Sales'[SalesAmount] ) Better: DIVIDE ( SUM ( 'Sales'[SalesAmount] ) - SUM ( 'Sales'[CostAmount] ), SUM ( 'Sales'[SalesAmount] ) ) Best: VAR x = SUM ( 'Sales'[SalesAmount] ) RETURN DIVIDE ( x - SUM ( 'Sales'[CostAmount] ), x )",
403
- ),
404
- (
405
- "DAX Expressions",
406
- "Measure",
407
- "Warning",
408
- "Filter measure values by columns, not tables",
409
- lambda df: df["Measure Expression"].str.contains(
410
- "(?i)CALCULATE\\s*\\(\\s*[^,]+,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*\\[[^\\]]+\\]"
411
- or "(?i)CALCULATETABLE\\s*\\([^,]*,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*\\[",
412
- regex=True,
413
- ),
414
- "Instead of using this pattern FILTER('Table',[Measure]>Value) for the filter parameters of a CALCULATE or CALCULATETABLE function, use one of the options below (if possible). Filtering on a specific column will produce a smaller table for the engine to process, thereby enabling faster performance. Using the VALUES function or the ALL function depends on the desired measure result.\nOption 1: FILTER(VALUES('Table'[Column]),[Measure] > Value)\nOption 2: FILTER(ALL('Table'[Column]),[Measure] > Value)",
415
- "https://docs.microsoft.com/power-bi/guidance/dax-avoid-avoid-filter-as-filter-argument",
416
- ),
417
- (
418
- "DAX Expressions",
419
- "Measure",
420
- "Warning",
421
- "Filter column values with proper syntax",
422
- lambda df: df["Measure Expression"].str.contains(
423
- "(?i)CALCULATE\\s*\\(\\s*[^,]+,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*'*[A-Za-z0-9 _]+'*\\[[A-Za-z0-9 _]+\\]"
424
- or "(?i)CALCULATETABLE\\s*\\([^,]*,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*'*[A-Za-z0-9 _]+'*\\[[A-Za-z0-9 _]+\\]",
425
- regex=True,
426
- ),
427
- "Instead of using this pattern FILTER('Table','Table'[Column]=\"Value\") for the filter parameters of a CALCULATE or CALCULATETABLE function, use one of the options below. As far as whether to use the KEEPFILTERS function, see the second reference link below.\nOption 1: KEEPFILTERS('Table'[Column]=\"Value\")\nOption 2: 'Table'[Column]=\"Value\"",
428
- "https://docs.microsoft.com/power-bi/guidance/dax-avoid-avoid-filter-as-filter-argument Reference: https://www.sqlbi.com/articles/using-keepfilters-in-dax",
429
- ),
430
- (
431
- "DAX Expressions",
432
- "Measure",
433
- "Warning",
434
- "Use the DIVIDE function for division",
435
- lambda df: df["Measure Expression"].str.contains(
436
- '\\]\\s*\\/(?!\\/)(?!\\*)" or "\\)\\s*\\/(?!\\/)(?!\\*)', regex=True
437
- ),
438
- 'Use the DIVIDE function instead of using "/". The DIVIDE function resolves divide-by-zero cases. As such, it is recommended to use to avoid errors.',
439
- "https://docs.microsoft.com/power-bi/guidance/dax-divide-function-operator",
440
- ),
441
- (
442
- "DAX Expressions",
443
- "Measure",
444
- "Error",
445
- "Column references should be fully qualified",
446
- lambda df: df["Has Unqualified Column Reference"],
447
- "Using fully qualified column references makes it easier to distinguish between column and measure references, and also helps avoid certain errors. When referencing a column in DAX, first specify the table name, then specify the column name in square brackets.",
448
- "https://www.elegantbi.com/post/top10bestpractices",
449
- ),
450
- (
451
- "DAX Expressions",
452
- "Measure",
453
- "Error",
454
- "Measure references should be unqualified",
455
- lambda df: df["Has Fully Qualified Measure Reference"],
456
- "Using unqualified measure references makes it easier to distinguish between column and measure references, and also helps avoid certain errors. When referencing a measure using DAX, do not specify the table name. Use only the measure name in square brackets.",
457
- "https://www.elegantbi.com/post/top10bestpractices",
458
- ),
459
- (
460
- "DAX Expressions",
461
- "Relationship",
462
- "Warning",
463
- "Inactive relationships that are never activated",
464
- lambda df: df["Inactive without USERELATIONSHIP"],
465
- "Inactive relationships are activated using the USERELATIONSHIP function. If an inactive relationship is not referenced in any measure via this function, the relationship will not be used. It should be determined whether the relationship is not necessary or to activate the relationship via this method.",
466
- "https://dax.guide/userelationship",
467
- ),
468
- (
469
- "Maintenance",
470
- "Column",
471
- "Warning",
472
- "Remove unnecessary columns",
473
- lambda df: (df["Hidden"] | df["Parent Is Hidden"])
474
- & ~df["Used in Relationship"]
475
- & ~df["Used in Sort By"]
476
- & ~df["Used in Hierarchy"]
477
- & (df["Referenced By"] == 0)
478
- & ~(df["Used in RLS"]), # usedInOLS
479
- "Hidden columns that are not referenced by any DAX expressions, relationships, hierarchy levels or Sort By-properties should be removed.",
480
- ),
481
- (
482
- "Maintenance",
483
- "Measure",
484
- "Warning",
485
- "Remove unnecessary measures",
486
- lambda df: df["Measure Hidden"] & (df["Referenced By"] == 0),
487
- "Hidden measures that are not referenced by any DAX expressions should be removed for maintainability.",
488
- ),
489
- # ('Maintenance', 'Role', 'Warning', 'Remove roles with no members',
490
- # lambda df: df['Member Count'] == 0,
491
- # ),
492
- (
493
- "Maintenance",
494
- "Table",
495
- "Warning",
496
- "Ensure tables have relationships",
497
- lambda df: (df["Used in Relationship"] == False)
498
- & (df["Type"] != "Calculation Group"),
499
- "This rule highlights tables which are not connected to any other table in the model with a relationship.",
500
- ),
501
- (
502
- "Maintenance",
503
- "Table",
504
- "Warning",
505
- "Calculation groups with no calculation items",
506
- lambda df: (df["Type"] == "Calculation Group")
507
- & (df["Has Calculation Items"]),
508
- ),
509
- (
510
- "Maintenance",
511
- "Column",
512
- "Info",
513
- "Visible objects with no description",
514
- lambda df: (df["Hidden"] == False) & (df["Description"].str.len() == 0),
515
- "Calculation groups have no function unless they have calculation items.",
516
- ),
517
- (
518
- "Formatting",
519
- "Column",
520
- "Warning",
521
- "Provide format string for 'Date' columns",
522
- lambda df: (df["Column Name"].str.contains(r"date", case=False))
523
- & (df["Data Type"] == "DateTime")
524
- & (df["Format String"] != "mm/dd/yyyy"),
525
- 'Columns of type "DateTime" that have "Month" in their names should be formatted as "mm/dd/yyyy".',
526
- ),
527
- (
528
- "Formatting",
529
- "Column",
530
- "Warning",
531
- "Do not summarize numeric columns",
532
- lambda df: (
533
- (df["Data Type"] == "Int64")
534
- | (df["Data Type"] == "Decimal")
535
- | (df["Data Type"] == "Double")
536
- )
537
- & (df["Summarize By"] != "None")
538
- & ~((df["Hidden"]) | (df["Parent Is Hidden"])),
539
- 'Numeric columns (integer, decimal, double) should have their SummarizeBy property set to "None" to avoid accidental summation in Power BI (create measures instead).',
540
- ),
541
- (
542
- "Formatting",
543
- "Measure",
544
- "Info",
545
- "Provide format string for measures",
546
- lambda df: ~((df["Measure Hidden"]) | (df["Parent Is Hidden"]))
547
- & (df["Format String"].str.len() == 0),
548
- "Visible measures should have their format string property assigned.",
549
- ),
550
- (
551
- "Formatting",
552
- "Column",
553
- "Info",
554
- "Add data category for columns",
555
- lambda df: (df["Data Category"] == "")
556
- & (
557
- (
558
- (
559
- (df["Column Name"].str.contains(r"country", case=False))
560
- | (df["Column Name"].str.contains(r"city", case=False))
561
- | (df["Column Name"].str.contains(r"continent", case=False))
562
- )
563
- & (df["Data Type"] == "String")
564
- )
565
- | (
566
- (
567
- (df["Column Name"].str.contains(r"latitude", case=False))
568
- | (df["Column Name"].str.contains(r"longitude", case=False))
569
- )
570
- & (df["Data Type"] == "String")
571
- )
572
- ),
573
- "Add Data Category property for appropriate columns.",
574
- "https://docs.microsoft.com/power-bi/transform-model/desktop-data-categorization",
575
- ),
576
- (
577
- "Formatting",
578
- "Measure",
579
- "Warning",
580
- "Percentages should be formatted with thousands separators and 1 decimal",
581
- lambda df: (df["Format String"].str.contains("%"))
582
- & (df["Format String"] != "#,0.0%;-#,0.0%;#,0.0%"),
583
- ),
584
- (
585
- "Formatting",
586
- "Measure",
587
- "Warning",
588
- "Whole numbers should be formatted with thousands separators and no decimals",
589
- lambda df: (~df["Format String"].str.contains("$"))
590
- & ~(df["Format String"].str.contains("%"))
591
- & ~((df["Format String"] == "#,0") | (df["Format String"] == "#,0.0")),
592
- ),
593
- (
594
- "Formatting",
595
- "Column",
596
- "Info",
597
- "Hide foreign keys",
598
- lambda df: (df["Foreign Key"]) & (df["Hidden"] == False),
599
- "Foreign keys should always be hidden.",
600
- ),
601
- (
602
- "Formatting",
603
- "Column",
604
- "Info",
605
- "Mark primary keys",
606
- lambda df: (df["Primary Key"]) & (df["Key"] == False),
607
- "Set the 'Key' property to 'True' for primary key columns within the column properties.",
608
- ),
609
- (
610
- "Formatting",
611
- "Column",
612
- "Info",
613
- "Month (as a string) must be sorted",
614
- lambda df: (df["Column Name"].str.contains(r"month", case=False))
615
- & ~(df["Column Name"].str.contains(r"months", case=False))
616
- & (df["Data Type"] == "String")
617
- & (df["Sort By Column"] == ""),
618
- "This rule highlights month columns which are strings and are not sorted. If left unsorted, they will sort alphabetically (i.e. April, August...). Make sure to sort such columns so that they sort properly (January, February, March...).",
619
- ),
620
- (
621
- "Formatting",
622
- "Relationship",
623
- "Warning",
624
- "Relationship columns should be of integer data type",
625
- lambda df: (df["From Column Data Type"] != "Int64")
626
- | (df["To Column Data Type"] != "Int64"),
627
- "It is a best practice for relationship columns to be of integer data type. This applies not only to data warehousing but data modeling as well.",
628
- ),
629
- (
630
- "Formatting",
631
- "Column",
632
- "Warning",
633
- 'Provide format string for "Month" columns',
634
- lambda df: (df["Column Name"].str.contains(r"month", case=False))
635
- & (df["Data Type"] == "DateTime")
636
- & (df["Format String"] != "MMMM yyyy"),
637
- 'Columns of type "DateTime" that have "Month" in their names should be formatted as "MMMM yyyy".',
638
- ),
639
- (
640
- "Formatting",
641
- "Column",
642
- "Info",
643
- "Format flag columns as Yes/No value strings",
644
- lambda df: (
645
- df["Column Name"].str.startswith("Is")
646
- & (df["Data Type"] == "Int64")
647
- & ~(df["Hidden"] | df["Parent Is Hidden"])
648
- )
649
- | (
650
- df["Column Name"].str.endswith(" Flag")
651
- & (df["Data Type"] != "String")
652
- & ~(df["Hidden"] | df["Parent Is Hidden"])
653
- ),
654
- "Flags must be properly formatted as Yes/No as this is easier to read than using 0/1 integer values.",
655
- ),
656
- # ('Formatting', ['Table', 'Column', 'Measure', 'Partition', 'Hierarchy'], 'Error', 'Objects should not start or end with a space',
657
- # lambda df: (df['Name'].str[0] == ' ') | (df['Name'].str[-1] == ' '),
658
- # 'Objects should not start or end with a space. This usually happens by accident and is difficult to find.',
659
- # ),
660
- (
661
- "Formatting",
662
- ["Table", "Column", "Measure", "Partition", "Hierarchy"],
663
- "Info",
664
- "First letter of objects must be capitalized",
665
- lambda df: df["Name"].str[0].str.upper() != df["Name"].str[0],
666
- "The first letter of object names should be capitalized to maintain professional quality.",
667
- ),
668
- (
669
- "Naming Conventions",
670
- ["Table", "Column", "Measure", "Partition", "Hierarchy"],
671
- "Warning",
672
- "Object names must not contain special characters",
673
- lambda df: df["Name"].str.contains(r"[\t\r\n]"),
674
- "Object names should not include tabs, line breaks, etc.",
675
- ), # ,
676
- # ('Error Prevention', ['Table'], 'Error', 'Avoid invalid characters in names',
677
- # lambda df: df['Name'].str.char.iscontrol() & ~ df['Name'].str.char.isspace(),
678
- # )#,
679
- ],
680
- columns=[
681
- "Category",
682
- "Scope",
683
- "Severity",
684
- "Rule Name",
685
- "Expression",
686
- "Description",
687
- "URL",
688
- ],
689
- )
690
-
691
- df_rules["Severity"] = (
692
- df_rules["Severity"]
693
- .replace("Warning", "⚠️")
694
- .replace("Error", "\u274C")
695
- .replace("Info", "ℹ️")
696
- )
697
-
698
- pd.set_option("display.max_colwidth", 1000)
699
-
700
- return df_rules
22
+ from pyspark.sql.functions import col, flatten
23
+ from pyspark.sql.types import StructType, StructField, StringType
24
+ import polib
25
+ import os
701
26
 
702
27
 
703
28
  @log
704
29
  def run_model_bpa(
705
30
  dataset: str,
706
- rules_dataframe: Optional[pd.DataFrame] = None,
31
+ rules: Optional[pd.DataFrame] = None,
707
32
  workspace: Optional[str] = None,
708
33
  export: Optional[bool] = False,
709
34
  return_dataframe: Optional[bool] = False,
35
+ extended: Optional[bool] = False,
36
+ language: Optional[str] = None,
710
37
  **kwargs,
711
38
  ):
712
39
  """
@@ -716,7 +43,7 @@ def run_model_bpa(
716
43
  ----------
717
44
  dataset : str
718
45
  Name of the semantic model.
719
- rules_dataframe : pandas.DataFrame, default=None
46
+ rules : pandas.DataFrame, default=None
720
47
  A pandas dataframe containing rules to be evaluated.
721
48
  workspace : str, default=None
722
49
  The Fabric workspace name.
@@ -726,6 +53,11 @@ def run_model_bpa(
726
53
  If True, exports the resulting dataframe to a delta table in the lakehouse attached to the notebook.
727
54
  return_dataframe : bool, default=False
728
55
  If True, returns a pandas dataframe instead of the visualization.
56
+ extended : bool, default=False
57
+ If True, runs the set_vertipaq_annotations function to collect Vertipaq Analyzer statistics to be used in the analysis of the semantic model.
58
+ language : str, default=None
59
+ Specifying a language code (i.e. 'it-IT' for Italian) will auto-translate the Category, Rule Name and Description into the specified language.
60
+ Defaults to None which resolves to English.
729
61
 
730
62
  Returns
731
63
  -------
@@ -733,6 +65,8 @@ def run_model_bpa(
733
65
  A pandas dataframe in HTML format showing semantic model objects which violated the best practice analyzer rules.
734
66
  """
735
67
 
68
+ from synapse.ml.services import Translate
69
+
736
70
  if "extend" in kwargs:
737
71
  print(
738
72
  "The 'extend' parameter has been deprecated. Please remove this parameter from the function going forward."
@@ -743,461 +77,297 @@ def run_model_bpa(
743
77
  "ignore",
744
78
  message="This pattern is interpreted as a regular expression, and has match groups.",
745
79
  )
746
-
747
- workspace = fabric.resolve_workspace_name(workspace)
748
-
749
- if rules_dataframe is None:
750
- rules_dataframe = model_bpa_rules()
751
-
752
- dfT = fabric.list_tables(dataset=dataset, workspace=workspace, extended=True)
753
- dfT = dfT.drop_duplicates()
754
- dfC = fabric.list_columns(
755
- dataset=dataset,
756
- workspace=workspace,
757
- extended=True,
758
- additional_xmla_properties=["Parent.DataCategory", "Parent.IsHidden"],
759
- )
760
- dfC = dfC[~dfC["Column Name"].str.startswith("RowNumber-")]
761
-
762
- dfM = fabric.list_measures(
763
- dataset=dataset,
764
- workspace=workspace,
765
- additional_xmla_properties=["Parent.IsHidden"],
766
- )
767
- dfR = fabric.list_relationships(
768
- dataset=dataset,
769
- workspace=workspace,
770
- additional_xmla_properties=["FromCardinality", "ToCardinality"],
771
- )
772
- dfP = fabric.list_partitions(
773
- dataset=dataset,
774
- workspace=workspace,
775
- additional_xmla_properties=["DataCoverageDefinition.Expression"],
776
- )
777
- dfH = fabric.list_hierarchies(dataset=dataset, workspace=workspace)
778
- dfRole = fabric.get_roles(dataset=dataset, workspace=workspace)
779
- dfRM = fabric.get_roles(dataset=dataset, workspace=workspace, include_members=True)
780
- dfRLS = fabric.get_row_level_security_permissions(
781
- dataset=dataset, workspace=workspace
782
- )
783
- # dfTr = fabric.list_translations(dataset = datasetName, workspace = workspaceName)
784
- # dfE = fabric.list_expressions(dataset = datasetName, workspace = workspaceName)
785
- dfCI = fabric.list_calculation_items(dataset=dataset, workspace=workspace)
786
- # dfDS = fabric.list_datasources(dataset = datasetName, workspace = workspaceName)
787
- # dfPersp = fabric.list_perspectives(dataset = datasetName, workspace = workspaceName)
788
- dfD = fabric.list_datasets(mode="rest", workspace=workspace)
789
- dfD = dfD[dfD["Dataset Name"] == dataset]
790
- # datasetOwner = dfD['Configured By'].iloc[0]
791
- md = get_measure_dependencies(dataset, workspace)
792
- isDirectLake = any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows())
793
- dfC["Is Direct Lake"] = isDirectLake
794
- dfT["Is Direct Lake"] = isDirectLake
795
-
796
- cols = ["From Cardinality", "To Cardinality"]
797
-
798
- for col in cols:
799
- if col not in dfR:
800
- dfR[col] = None
801
-
802
- cols = ["Parent Is Hidden"]
803
-
804
- for col in cols:
805
- if col not in dfM:
806
- dfM[col] = None
807
-
808
- # Data Coverage Definition rule
809
- dfP_imp = dfP[dfP["Mode"] == "Import"]
810
- dfTP = dfP_imp.groupby("Table Name")["Partition Name"].count().reset_index()
811
- dfTP.rename(columns={"Partition Name": "Import Partitions"}, inplace=True)
812
- dfP = pd.merge(
813
- dfP, dfTP[["Table Name", "Import Partitions"]], on="Table Name", how="left"
814
- )
815
- dfP["Import Partitions"].fillna(0, inplace=True)
816
- dfC_DateKey = dfC[
817
- (dfC["Parent Data Category"] == "Time")
818
- & (dfC["Data Type"] == "DateTime")
819
- & (dfC["Key"])
80
+ warnings.filterwarnings(
81
+ "ignore", category=UserWarning, message=".*Arrow optimization.*"
82
+ )
83
+
84
+ language_list = [
85
+ "it-IT",
86
+ "es-ES",
87
+ "he-IL",
88
+ "pt-PT",
89
+ "zh-CN",
90
+ "fr-FR",
91
+ "da-DK",
92
+ "cs-CZ",
93
+ "de-DE",
94
+ "el-GR",
95
+ "fa-IR",
96
+ "ga-IE",
97
+ "hi-IN",
98
+ "hu-HU",
99
+ "is-IS",
100
+ "ja-JP",
101
+ "nl-NL",
102
+ "pl-PL",
103
+ "pt-BR",
104
+ "ru-RU",
105
+ "te-IN",
106
+ "ta-IN",
107
+ "th-TH",
108
+ "zu-ZA",
109
+ "am-ET",
110
+ "ar-AE",
820
111
  ]
821
- hasDateTable = False
822
-
823
- if len(dfC_DateKey) > 0:
824
- hasDateTable = True
825
-
826
- dfP["Has Date Table"] = hasDateTable
827
-
828
- # Set dims to dual mode
829
- dfR_one = dfR[dfR["To Cardinality"] == "One"]
830
- dfTP = dfP_imp.groupby("Table Name")["Partition Name"].count().reset_index()
831
- dfTP.rename(columns={"Partition Name": "Import Partitions"}, inplace=True)
832
- dfT = pd.merge(dfT, dfTP, left_on="Name", right_on="Table Name", how="left")
833
- dfT.drop(columns=["Table Name"], inplace=True)
834
- dfT["Import Partitions"].fillna(0, inplace=True)
835
- hasDQ = any(r["Mode"] == "DirectQuery" for i, r in dfP.iterrows())
836
- dfT["Model Has DQ"] = hasDQ
837
- dfT["Used in Relationship x:1"] = dfT["Name"].isin(dfR_one["To Table"])
838
-
839
- dfF = fabric.evaluate_dax(
840
- dataset=dataset,
841
- workspace=workspace,
842
- dax_string="""
843
- SELECT [FUNCTION_NAME]
844
- FROM $SYSTEM.MDSCHEMA_FUNCTIONS
845
- WHERE [INTERFACE_NAME] = 'DATETIME'
846
- """,
847
- )
848
112
 
849
- dfC["Name"] = dfC["Column Name"]
850
- dfH["Name"] = dfH["Hierarchy Name"]
851
- dfM["Name"] = dfM["Measure Name"]
852
- dfP["Name"] = dfP["Partition Name"]
853
- dfRole["Name"] = dfRole["Role"]
854
- dfD["Name"] = dfD["Dataset Name"]
855
- dfH["Description"] = dfH["Hierarchy Description"]
856
- dfM["Description"] = dfM["Measure Description"]
857
- dfH["Hierarchy Object"] = format_dax_object_name(
858
- dfH["Table Name"], dfH["Hierarchy Name"]
859
- )
113
+ # Map languages to the closest language (first 2 letters matching)
114
+ def map_language(language, language_list):
860
115
 
861
- dfCI["Calculation Object"] = format_dax_object_name(
862
- dfCI["Calculation Group Name"], dfCI["Calculation Item Name"]
863
- )
116
+ mapped = False
864
117
 
865
- dfRole["Member Count"] = dfRM["Role"].isin(dfRole["Role"]).sum()
866
- dfRLS["Is Dynamic"] = dfRLS["Filter Expression"].str.contains(
867
- r"userprincipalname\s*\(", case=False
868
- ) | dfRLS["Filter Expression"].str.contains(r"username\s*\(", case=False)
118
+ if language in language_list:
119
+ mapped is True
120
+ return language
869
121
 
870
- # Partition Count
871
- partition_count = (
872
- dfP.groupby("Table Name").size().reset_index(name="Partition Count")
873
- )
874
- dfT = pd.merge(
875
- dfT, partition_count, left_on="Name", right_on="Table Name", how="left"
876
- ).drop("Table Name", axis=1)
877
- dfT["Partition Count"] = dfT["Partition Count"].fillna(0).astype(int)
878
-
879
- dfT = dfT.merge(
880
- dfP[["Table Name", "Partition Name"]],
881
- how="left",
882
- left_on="Name",
883
- right_on="Table Name",
884
- )
885
- dfT["First Partition Name"] = dfT.groupby("Name")["Partition Name"].transform(
886
- "first"
887
- )
888
- dfT.drop("Table Name", axis=1, inplace=True)
122
+ language_prefix = language[:2]
123
+ for lang_code in language_list:
124
+ if lang_code.startswith(language_prefix):
125
+ mapped is True
126
+ return lang_code
127
+ if not mapped:
128
+ return language
889
129
 
890
- dfC["Sort By Column Object"] = format_dax_object_name(
891
- dfC["Table Name"], dfC["Sort By Column"]
892
- )
893
- dfC["Column Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"])
894
- dfM["Measure Object"] = "[" + dfM["Measure Name"] + "]"
895
- dfM["Measure Fully Qualified"] = format_dax_object_name(
896
- dfM["Table Name"], dfM["Measure Name"]
897
- )
898
- dfM["Measure Fully Qualified No Spaces"] = (
899
- dfM["Table Name"] + "[" + dfM["Measure Name"] + "]"
900
- )
901
- # dfM['Measure Fully Qualified No Spaces'] = dfM.apply(lambda row: row['Table Name'] + '[' + row['Measure Name'] + ']' if ' ' not in row['Table Name'] else '', axis=1)
902
- dfC["Column Unqualified"] = "[" + dfC["Column Name"] + "]"
903
- dfC["Column Object No Spaces"] = dfC.apply(
904
- lambda row: (
905
- row["Table Name"] + "[" + row["Column Name"] + "]"
906
- if " " not in row["Table Name"]
907
- else ""
908
- ),
909
- axis=1,
910
- )
911
- dfC["Used in Sort By"] = dfC["Column Object"].isin(dfC["Sort By Column Object"])
912
- dfH["Column Object"] = format_dax_object_name(dfH["Table Name"], dfH["Column Name"])
913
- dfC["Used in Hierarchy"] = dfC["Column Object"].isin(dfH["Column Object"])
914
- dfR["From Object"] = format_dax_object_name(dfR["From Table"], dfR["From Column"])
915
- dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"])
916
- dfT["Used in Relationship"] = dfT["Name"].isin(dfR["From Table"]) | dfT[
917
- "Name"
918
- ].isin(dfR["To Table"])
919
- dfT["Used in Relationship Both Sides"] = dfT["Name"].isin(dfR["From Table"]) & dfT[
920
- "Name"
921
- ].isin(dfR["To Table"])
922
- dfC["Used in Relationship"] = dfC["Column Object"].isin(dfR["From Object"]) | dfC[
923
- "Column Object"
924
- ].isin(dfR["To Object"])
925
-
926
- dfR_filt = dfR[
927
- (dfR["Cross Filtering Behavior"] == "BothDirections")
928
- | (dfR["Multiplicity"] == "m:m")
929
- ]
930
- dfC["Used in M2M/BiDi Relationship"] = dfC["Column Object"].isin(
931
- dfR_filt["From Object"]
932
- ) | dfC["Column Object"].isin(dfR_filt["To Object"])
933
- dfC["Foreign Key"] = dfC["Column Object"].isin(
934
- dfR[dfR["From Cardinality"] == "Many"]["From Object"]
935
- )
936
- dfC["Primary Key"] = dfC["Column Object"].isin(
937
- dfR[dfR["To Cardinality"] == "One"]["To Object"]
938
- )
939
- dfT["Used in M2M Relationship"] = dfT["Name"].isin(
940
- dfR[dfR["Multiplicity"] == "m:m"][["From Table"]]
941
- ) | dfT["Name"].isin(dfR[dfR["Multiplicity"] == "m:m"][["To Table"]])
942
- dfT["Used in Dynamic RLS"] = dfT["Name"].isin(dfRLS[dfRLS["Is Dynamic"]]["Table"])
943
- dfT["Used in RLS"] = dfT["Name"].isin(
944
- dfRLS.loc[dfRLS["Filter Expression"].str.len() > 0, "Table"]
945
- )
946
- dfC["Primary Key"] = dfC["Column Object"].isin(
947
- dfR.loc[dfR["To Cardinality"] == "One", "To Object"]
948
- )
949
- dfD["Has Date Table"] = any(
950
- (r["Parent Data Category"] == "Time")
951
- & (r["Data Type"] == "DateTime")
952
- & (r["Key"] == True)
953
- for i, r in dfC.iterrows()
954
- )
955
- # dfC['In Date Table'] = dfC['Table Name'].isin(dfT.loc[dfT['Data Category'] == "Time", 'Name'])
956
- dfD["Relationship Count"] = len(dfR)
957
- dfD["M2M or BiDi Relationship Count"] = len(
958
- dfR[
959
- (dfR["Multiplicity"] == "m:m")
960
- | (dfR["Cross Filtering Behavior"] == "BothDirections")
961
- ]
962
- )
963
- dfD["Calculation Group Count"] = len(dfT[dfT["Type"] == "Calculation Group"])
964
- dfT["Has Calculation Items"] = np.where(
965
- (dfT["Type"] == "Calculation Group")
966
- & dfT["Name"].isin(dfCI["Calculation Group Name"]),
967
- True,
968
- False,
969
- )
970
- dfP["Partition Object"] = format_dax_object_name(
971
- dfP["Table Name"], dfP["Partition Name"]
972
- )
973
- dfRLS["RLS Object"] = format_dax_object_name(dfRLS["Role"], dfRLS["Table"])
974
-
975
- function_pattern = "|".join(dfF["FUNCTION_NAME"].map(re.escape))
976
-
977
- dfM["DQ Date Function Used"] = any(dfP["Mode"] == "DirectQuery") & dfM[
978
- "Measure Expression"
979
- ].str.contains(f"({function_pattern})\\s*\\(", case=False, regex=True)
980
-
981
- md["Reference"] = (
982
- "'" + md["Referenced Table"] + "'[" + md["Referenced Object"] + "]"
983
- )
984
-
985
- dfC["Referenced By"] = (
986
- md[
987
- (md["Referenced Object Type"] == "Column")
988
- & (md["Reference"].isin(dfC["Column Object"]))
989
- ]
990
- .groupby("Reference")
991
- .size()
992
- .reset_index(name="Count")["Count"]
993
- )
994
- dfC["Referenced By"].fillna(0, inplace=True)
995
- dfC["Referenced By"] = dfC["Referenced By"].fillna(0).astype(int)
130
+ if language is not None:
131
+ language = map_language(language, language_list)
996
132
 
997
- dfM["Referenced By"] = (
998
- md[
999
- (md["Referenced Object Type"] == "Measure")
1000
- & (md["Referenced Object"].isin(dfM["Measure Name"]))
1001
- ]
1002
- .groupby("Referenced Object")
1003
- .size()
1004
- .reset_index(name="Count")["Count"]
1005
- )
1006
- dfM["Referenced By"].fillna(0, inplace=True)
1007
- dfM["Referenced By"] = dfM["Referenced By"].fillna(0).astype(int)
133
+ workspace = fabric.resolve_workspace_name(workspace)
1008
134
 
1009
- pattern = r"[^\( ][a-zA-Z0-9_()-]+\[[^\[]+\]|'[^']+'\[[^\[]+\]|\[[^\[]+\]"
135
+ if language is not None and language not in language_list:
136
+ print(
137
+ f"{icons.yellow_dot} The '{language}' language code is not in our predefined language list. Please file an issue and let us know which language code you are using: https://github.com/microsoft/semantic-link-labs/issues/new?assignees=&labels=&projects=&template=bug_report.md&title=."
138
+ )
1010
139
 
1011
- dfM["Has Fully Qualified Measure Reference"] = False
1012
- dfM["Has Unqualified Column Reference"] = False
140
+ if extended:
141
+ with connect_semantic_model(
142
+ dataset=dataset, workspace=workspace, readonly=False
143
+ ) as tom:
144
+ tom.set_vertipaq_annotations()
1013
145
 
1014
- for i, r in dfM.iterrows():
1015
- tName = r["Table Name"]
1016
- mName = r["Measure Name"]
1017
- expr = r["Measure Expression"]
146
+ with connect_semantic_model(
147
+ dataset=dataset, workspace=workspace, readonly=True
148
+ ) as tom:
1018
149
 
1019
- matches = re.findall(pattern, expr)
150
+ dep = get_model_calc_dependencies(dataset=dataset, workspace=workspace)
1020
151
 
1021
- for m in matches:
1022
- if m[0] == "[":
1023
- if (m in dfC["Column Unqualified"].values) and (
1024
- dfC[dfC["Table Name"] == tName]["Column Unqualified"] == m
1025
- ).any():
1026
- dfM.at[i, "Has Unqualified Column Reference"] = True
1027
- else:
1028
- if (m in dfM["Measure Fully Qualified"].values) | (
1029
- m in dfM["Measure Fully Qualified No Spaces"].values
1030
- ):
1031
- dfM.at[i, "Has Fully Qualified Measure Reference"] = True
1032
-
1033
- dfR["Inactive without USERELATIONSHIP"] = False
1034
- for i, r in dfR[dfR["Active"] == False].iterrows():
1035
- fromTable = r["From Table"]
1036
- fromColumn = r["From Column"]
1037
- toTable = r["To Table"]
1038
- toColumn = r["To Column"]
1039
-
1040
- dfM_filt = dfM[
1041
- dfM["Measure Expression"].str.contains(
1042
- r"(?i)USERELATIONSHIP\s*\(\s*'*"
1043
- + re.escape(fromTable)
1044
- + r"'*\["
1045
- + re.escape(fromColumn)
1046
- + r"\]\s*,\s*'*"
1047
- + re.escape(toTable)
1048
- + r"'*\["
1049
- + re.escape(toColumn)
1050
- + r"\]",
1051
- regex=True,
152
+ def translate_using_po(rule_file):
153
+ current_dir = os.path.dirname(os.path.abspath(__file__))
154
+ translation_file = (
155
+ f"{current_dir}/_bpa_translation/_translations_{language}.po"
1052
156
  )
1053
- ]
1054
- if len(dfM_filt) == 0:
1055
- dfR.at[i, "Inactive without USERELATIONSHIP"] = True
1056
-
1057
- dfC["Used in RLS"] = (
1058
- dfC["Column Object No Spaces"].isin(dfRLS["Filter Expression"])
1059
- | dfC["Column Object"].isin(dfRLS["Filter Expression"])
1060
- | dfC.apply(
1061
- lambda row: any(
1062
- row["Column Name"] in expr
1063
- for expr in dfRLS.loc[
1064
- dfRLS["Table"] == row["Table Name"], "Filter Expression"
1065
- ]
1066
- ),
1067
- axis=1,
1068
- )
1069
- )
1070
-
1071
- # Merge dfR and dfC based on 'From Object' and 'Column Object'
1072
- merged_from = pd.merge(
1073
- dfR, dfC, left_on="From Object", right_on="Column Object", how="left"
1074
- )
1075
- merged_to = pd.merge(
1076
- dfR, dfC, left_on="To Object", right_on="Column Object", how="left"
1077
- )
157
+ for c in ["Category", "Description", "Rule Name"]:
158
+ po = polib.pofile(translation_file)
159
+ for entry in po:
160
+ if entry.tcomment == c.lower().replace(" ", "_"):
161
+ rule_file.loc[rule_file["Rule Name"] == entry.msgid, c] = (
162
+ entry.msgstr
163
+ )
1078
164
 
1079
- dfR["From Column Data Type"] = merged_from["Data Type"]
1080
- dfR["To Column Data Type"] = merged_to["Data Type"]
165
+ def translate_using_spark(rule_file):
166
+ rules_temp = rule_file.copy()
167
+ rules_temp = rules_temp.drop(["Expression", "URL", "Severity"], axis=1)
1081
168
 
1082
- # Check if USERELATIONSHIP objects are used in a given column, table
1083
- userelationship_pattern = re.compile(
1084
- r"USERELATIONSHIP\s*\(\s*(.*?)\s*,\s*(.*?)\s*\)", re.DOTALL | re.IGNORECASE
1085
- )
1086
-
1087
- # Function to extract objects within USERELATIONSHIP function
1088
- def extract_objects(measure_expression):
1089
- matches = userelationship_pattern.findall(measure_expression)
1090
- if matches:
1091
- return [obj.strip() for match in matches for obj in match]
1092
- else:
1093
- return []
169
+ schema = StructType(
170
+ [
171
+ StructField("Category", StringType(), True),
172
+ StructField("Scope", StringType(), True),
173
+ StructField("Rule Name", StringType(), True),
174
+ StructField("Description", StringType(), True),
175
+ ]
176
+ )
1094
177
 
1095
- dfM["USERELATIONSHIP Objects"] = dfM["Measure Expression"].apply(extract_objects)
1096
- flat_object_list = [
1097
- item for sublist in dfM["USERELATIONSHIP Objects"] for item in sublist
1098
- ]
1099
- dfC["USERELATIONSHIP Used"] = dfC["Column Object"].isin(flat_object_list) | dfC[
1100
- "Column Object No Spaces"
1101
- ].isin(flat_object_list)
1102
- dfT["USERELATIONSHIP Used"] = dfT["Name"].isin(
1103
- dfC[dfC["USERELATIONSHIP Used"]]["Table Name"]
1104
- )
1105
- dfR["Relationship Name"] = (
1106
- format_dax_object_name(dfR["From Table"], dfR["From Column"])
1107
- + " -> "
1108
- + format_dax_object_name(dfR["To Table"], dfR["To Column"])
1109
- )
1110
- dfH = dfH[
1111
- [
1112
- "Name",
1113
- "Description",
1114
- "Table Name",
1115
- "Hierarchy Name",
1116
- "Hierarchy Description",
1117
- "Hierarchy Object",
1118
- ]
1119
- ].drop_duplicates()
1120
-
1121
- scope_to_dataframe = {
1122
- "Table": (dfT, ["Name"]),
1123
- "Partition": (dfP, ["Partition Object"]),
1124
- "Column": (dfC, ["Column Object"]),
1125
- "Hierarchy": (dfH, ["Hierarchy Object"]),
1126
- "Measure": (dfM, ["Measure Name"]),
1127
- "Calculation Item": (dfCI, ["Calculation Object"]),
1128
- "Relationship": (dfR, ["Relationship Name"]),
1129
- "Row Level Security": (dfRLS, ["RLS Object"]),
1130
- "Role": (dfRole, ["Role"]),
1131
- "Model": (dfD, ["Dataset Name"]),
1132
- }
178
+ spark = SparkSession.builder.getOrCreate()
179
+ dfRules = spark.createDataFrame(rules_temp, schema)
180
+
181
+ columns = ["Category", "Rule Name", "Description"]
182
+ for clm in columns:
183
+ translate = (
184
+ Translate()
185
+ .setTextCol(clm)
186
+ .setToLanguage(language)
187
+ .setOutputCol("translation")
188
+ .setConcurrency(5)
189
+ )
1133
190
 
1134
- def execute_rule(row):
1135
- scopes = row["Scope"]
191
+ if clm == "Rule Name":
192
+ transDF = (
193
+ translate.transform(dfRules)
194
+ .withColumn(
195
+ "translation", flatten(col("translation.translations"))
196
+ )
197
+ .withColumn("translation", col("translation.text"))
198
+ .select(clm, "translation")
199
+ )
200
+ else:
201
+ transDF = (
202
+ translate.transform(dfRules)
203
+ .withColumn(
204
+ "translation", flatten(col("translation.translations"))
205
+ )
206
+ .withColumn("translation", col("translation.text"))
207
+ .select("Rule Name", clm, "translation")
208
+ )
1136
209
 
1137
- # support both str and list as scope type
1138
- if isinstance(scopes, str):
1139
- scopes = [scopes]
210
+ df_panda = transDF.toPandas()
211
+ rule_file = pd.merge(
212
+ rule_file,
213
+ df_panda[["Rule Name", "translation"]],
214
+ on="Rule Name",
215
+ how="left",
216
+ )
1140
217
 
1141
- # collect output dataframes
1142
- df_outputs = []
218
+ rule_file = rule_file.rename(
219
+ columns={"translation": f"{clm}Translated"}
220
+ )
221
+ rule_file[f"{clm}Translated"] = rule_file[f"{clm}Translated"].apply(
222
+ lambda x: x[0] if x is not None else None
223
+ )
1143
224
 
1144
- for scope in scopes:
1145
- # common fields for each scope
1146
- (df, violation_cols_or_func) = scope_to_dataframe[scope]
225
+ for clm in columns:
226
+ rule_file = rule_file.drop([clm], axis=1)
227
+ rule_file = rule_file.rename(columns={f"{clm}Translated": clm})
1147
228
 
1148
- if scope in ["Hierarchy", "Measure"] and len(df) == 0:
1149
- continue
1150
- # execute rule and subset df
1151
- df_violations = df[row["Expression"](df)]
229
+ return rule_file
1152
230
 
1153
- # subset the right output columns (e.g. Table Name & Column Name)
1154
- if isinstance(violation_cols_or_func, list):
1155
- violation_func = lambda violations: violations[violation_cols_or_func]
1156
- else:
1157
- violation_func = violation_cols_or_func
231
+ translated = False
1158
232
 
1159
- # build output data frame
1160
- df_output = violation_func(df_violations).copy()
233
+ # Translations
234
+ if language is not None and rules is None and language in language_list:
235
+ rules = model_bpa_rules(
236
+ dataset=dataset, workspace=workspace, dependencies=dep
237
+ )
238
+ translate_using_po(rules)
239
+ translated = True
240
+ if rules is None:
241
+ rules = model_bpa_rules(
242
+ dataset=dataset, workspace=workspace, dependencies=dep
243
+ )
244
+ if language is not None and not translated:
245
+ rules = translate_using_spark(rules)
1161
246
 
1162
- df_output.columns = ["Object Name"]
1163
- df_output["Rule Name"] = row["Rule Name"]
1164
- df_output["Category"] = row["Category"]
247
+ rules["Severity"].replace("Warning", icons.warning, inplace=True)
248
+ rules["Severity"].replace("Error", icons.error, inplace=True)
249
+ rules["Severity"].replace("Info", icons.info, inplace=True)
1165
250
 
1166
- df_output["Object Type"] = scope
1167
- df_output["Severity"] = row["Severity"]
1168
- df_output["Description"] = row["Description"]
1169
- df_output["URL"] = row["URL"]
251
+ pd.set_option("display.max_colwidth", 1000)
1170
252
 
1171
- df_outputs.append(df_output)
253
+ violations = pd.DataFrame(columns=["Object Name", "Scope", "Rule Name"])
1172
254
 
1173
- return df_outputs
255
+ scope_to_dataframe = {
256
+ "Relationship": (
257
+ tom.model.Relationships,
258
+ lambda obj: create_relationship_name(
259
+ obj.FromTable.Name,
260
+ obj.FromColumn.Name,
261
+ obj.ToTable.Name,
262
+ obj.ToColumn.Name,
263
+ ),
264
+ ),
265
+ "Column": (
266
+ tom.all_columns(),
267
+ lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name),
268
+ ),
269
+ "Measure": (tom.all_measures(), lambda obj: obj.Name),
270
+ "Hierarchy": (
271
+ tom.all_hierarchies(),
272
+ lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name),
273
+ ),
274
+ "Table": (tom.model.Tables, lambda obj: obj.Name),
275
+ "Role": (tom.model.Roles, lambda obj: obj.Name),
276
+ "Model": (tom.model, lambda obj: obj.Model.Name),
277
+ "Calculation Item": (
278
+ tom.all_calculation_items(),
279
+ lambda obj: format_dax_object_name(obj.Parent.Table.Name, obj.Name),
280
+ ),
281
+ "Row Level Security": (
282
+ tom.all_rls(),
283
+ lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name),
284
+ ),
285
+ "Partition": (
286
+ tom.all_partitions(),
287
+ lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name),
288
+ ),
289
+ }
1174
290
 
1175
- # flatten list of lists
1176
- flatten_dfs = [
1177
- df for dfs in rules_dataframe.apply(execute_rule, axis=1).tolist() for df in dfs
1178
- ]
291
+ for i, r in rules.iterrows():
292
+ ruleName = r["Rule Name"]
293
+ expr = r["Expression"]
294
+ scopes = r["Scope"]
295
+
296
+ if isinstance(scopes, str):
297
+ scopes = [scopes]
298
+
299
+ for scope in scopes:
300
+ func = scope_to_dataframe[scope][0]
301
+ nm = scope_to_dataframe[scope][1]
302
+
303
+ if scope == "Model":
304
+ x = []
305
+ if expr(func):
306
+ x = ["Model"]
307
+ elif scope == "Measure":
308
+ x = [nm(obj) for obj in tom.all_measures() if expr(obj)]
309
+ elif scope == "Column":
310
+ x = [nm(obj) for obj in tom.all_columns() if expr(obj)]
311
+ elif scope == "Partition":
312
+ x = [nm(obj) for obj in tom.all_partitions() if expr(obj)]
313
+ elif scope == "Hierarchy":
314
+ x = [nm(obj) for obj in tom.all_hierarchies() if expr(obj)]
315
+ elif scope == "Table":
316
+ x = [nm(obj) for obj in tom.model.Tables if expr(obj)]
317
+ elif scope == "Relationship":
318
+ x = [nm(obj) for obj in tom.model.Relationships if expr(obj)]
319
+ elif scope == "Role":
320
+ x = [nm(obj) for obj in tom.model.Roles if expr(obj)]
321
+ elif scope == "Row Level Security":
322
+ x = [nm(obj) for obj in tom.all_rls() if expr(obj)]
323
+ elif scope == "Calculation Item":
324
+ x = [nm(obj) for obj in tom.all_calculation_items() if expr(obj)]
325
+
326
+ if len(x) > 0:
327
+ new_data = {"Object Name": x, "Scope": scope, "Rule Name": ruleName}
328
+ violations = pd.concat(
329
+ [violations, pd.DataFrame(new_data)], ignore_index=True
330
+ )
1179
331
 
1180
- finalDF = pd.concat(flatten_dfs, ignore_index=True)
332
+ prepDF = pd.merge(
333
+ violations,
334
+ rules[["Rule Name", "Category", "Severity", "Description", "URL"]],
335
+ left_on="Rule Name",
336
+ right_on="Rule Name",
337
+ how="left",
338
+ )
339
+ prepDF.rename(columns={"Scope": "Object Type"}, inplace=True)
340
+ finalDF = prepDF[
341
+ [
342
+ "Category",
343
+ "Rule Name",
344
+ "Severity",
345
+ "Object Type",
346
+ "Object Name",
347
+ "Description",
348
+ "URL",
349
+ ]
350
+ ]
1181
351
 
1182
352
  if export:
1183
- lakeAttach = lakehouse_attached()
1184
- if lakeAttach is False:
1185
- raise ValueError(f"{icons.red_dot} In order to save the Best Practice Analyzer results, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook.")
353
+ if not lakehouse_attached():
354
+ raise ValueError(
355
+ f"{icons.red_dot} In order to save the Best Practice Analyzer results, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
356
+ )
1186
357
 
1187
358
  dfExport = finalDF.copy()
1188
359
  delta_table_name = "modelbparesults"
1189
360
 
1190
361
  lakehouse_id = fabric.get_lakehouse_id()
362
+ lake_workspace = fabric.get_workspace_id()
1191
363
  lakehouse = resolve_lakehouse_name(
1192
- lakehouse_id=lakehouse_id, workspace=workspace
364
+ lakehouse_id=lakehouse_id, workspace=lake_workspace
1193
365
  )
1194
366
 
1195
- lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
367
+ lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=lake_workspace)
1196
368
  lakeT_filt = lakeT[lakeT["Table Name"] == delta_table_name]
1197
369
 
1198
- dfExport["Severity"].replace("⚠️", "Warning", inplace=True)
1199
- dfExport["Severity"].replace("\u274C", "Error", inplace=True)
1200
- dfExport["Severity"].replace("ℹ️", "Info", inplace=True)
370
+ dfExport["Severity"].replace(icons.severity_mapping, inplace=True)
1201
371
 
1202
372
  spark = SparkSession.builder.getOrCreate()
1203
373
  query = f"SELECT MAX(RunId) FROM {lakehouse}.{delta_table_name}"
@@ -1210,23 +380,42 @@ def run_model_bpa(
1210
380
  runId = maxRunId + 1
1211
381
 
1212
382
  now = datetime.datetime.now()
383
+ dfD = fabric.list_datasets(workspace=workspace, mode="rest")
384
+ dfD_filt = dfD[dfD["Dataset Name"] == dataset]
385
+ configured_by = dfD_filt["Configured By"].iloc[0]
386
+ capacity_id, capacity_name = resolve_workspace_capacity(workspace=workspace)
387
+ dfExport["Capacity Name"] = capacity_name
388
+ dfExport["Capacity Id"] = capacity_id
1213
389
  dfExport["Workspace Name"] = workspace
390
+ dfExport["Workspace Id"] = fabric.resolve_workspace_id(workspace)
1214
391
  dfExport["Dataset Name"] = dataset
392
+ dfExport["Dataset Id"] = resolve_dataset_id(dataset, workspace)
393
+ dfExport["Configured By"] = configured_by
1215
394
  dfExport["Timestamp"] = now
1216
395
  dfExport["RunId"] = runId
396
+ dfExport["Configured By"] = configured_by
1217
397
 
1218
398
  dfExport["RunId"] = dfExport["RunId"].astype("int")
1219
399
 
1220
- colName = "Workspace Name"
400
+ colName = "Capacity Name"
1221
401
  dfExport.insert(0, colName, dfExport.pop(colName))
1222
- colName = "Dataset Name"
402
+ colName = "Capacity Id"
1223
403
  dfExport.insert(1, colName, dfExport.pop(colName))
404
+ colName = "Workspace Name"
405
+ dfExport.insert(2, colName, dfExport.pop(colName))
406
+ colName = "Workspace Id"
407
+ dfExport.insert(3, colName, dfExport.pop(colName))
408
+ colName = "Dataset Name"
409
+ dfExport.insert(4, colName, dfExport.pop(colName))
410
+ colName = "Configured By"
411
+ dfExport.insert(5, colName, dfExport.pop(colName))
1224
412
 
1225
413
  dfExport.columns = dfExport.columns.str.replace(" ", "_")
1226
- spark_df = spark.createDataFrame(dfExport)
1227
- spark_df.write.mode("append").format("delta").saveAsTable(delta_table_name)
1228
- print(
1229
- f"{icons.green_dot} Model Best Practice Analyzer results for the '{dataset}' semantic model have been appended to the '{delta_table_name}' delta table."
414
+ save_as_delta_table(
415
+ dataframe=dfExport,
416
+ delta_table_name=delta_table_name,
417
+ write_mode="append",
418
+ merge_schema=True,
1230
419
  )
1231
420
 
1232
421
  if return_dataframe:
@@ -1330,7 +519,7 @@ def run_model_bpa(
1330
519
  content_html += '<table border="1">'
1331
520
  content_html += "<tr><th>Rule Name</th><th>Object Type</th><th>Object Name</th><th>Severity</th></tr>"
1332
521
  for _, row in df.iterrows():
1333
- content_html += f"<tr>"
522
+ content_html += "<tr>"
1334
523
  if pd.notnull(row["URL"]):
1335
524
  content_html += f'<td class="tooltip" onmouseover="adjustTooltipPosition(event)"><a href="{row["URL"]}">{row["Rule Name"]}</a><span class="tooltiptext">{row["Description"]}</span></td>'
1336
525
  elif pd.notnull(row["Description"]):
@@ -1340,7 +529,7 @@ def run_model_bpa(
1340
529
  content_html += f'<td>{row["Object Type"]}</td>'
1341
530
  content_html += f'<td>{row["Object Name"]}</td>'
1342
531
  content_html += f'<td>{row["Severity"]}</td>'
1343
- content_html += f"</tr>"
532
+ content_html += "</tr>"
1344
533
  content_html += "</table>"
1345
534
 
1346
535
  content_html += "</div>"