semantic-link-labs 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.5.0.dist-info → semantic_link_labs-0.6.0.dist-info}/METADATA +2 -2
- semantic_link_labs-0.6.0.dist-info/RECORD +54 -0
- {semantic_link_labs-0.5.0.dist-info → semantic_link_labs-0.6.0.dist-info}/WHEEL +1 -1
- sempy_labs/__init__.py +19 -13
- sempy_labs/_ai.py +43 -24
- sempy_labs/_clear_cache.py +4 -5
- sempy_labs/_connections.py +77 -70
- sempy_labs/_dax.py +7 -9
- sempy_labs/_generate_semantic_model.py +55 -44
- sempy_labs/_helper_functions.py +13 -6
- sempy_labs/_icons.py +14 -0
- sempy_labs/_list_functions.py +491 -304
- sempy_labs/_model_auto_build.py +4 -3
- sempy_labs/_model_bpa.py +131 -1118
- sempy_labs/_model_bpa_rules.py +831 -0
- sempy_labs/_model_dependencies.py +14 -12
- sempy_labs/_one_lake_integration.py +11 -5
- sempy_labs/_query_scale_out.py +89 -81
- sempy_labs/_refresh_semantic_model.py +16 -10
- sempy_labs/_translations.py +213 -287
- sempy_labs/_vertipaq.py +53 -37
- sempy_labs/directlake/__init__.py +2 -0
- sempy_labs/directlake/_directlake_schema_compare.py +12 -5
- sempy_labs/directlake/_directlake_schema_sync.py +13 -19
- sempy_labs/directlake/_fallback.py +5 -3
- sempy_labs/directlake/_get_directlake_lakehouse.py +1 -1
- sempy_labs/directlake/_get_shared_expression.py +4 -2
- sempy_labs/directlake/_guardrails.py +3 -3
- sempy_labs/directlake/_list_directlake_model_calc_tables.py +17 -10
- sempy_labs/directlake/_show_unsupported_directlake_objects.py +3 -2
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +10 -5
- sempy_labs/directlake/_update_directlake_partition_entity.py +132 -9
- sempy_labs/directlake/_warm_cache.py +6 -3
- sempy_labs/lakehouse/_get_lakehouse_columns.py +1 -1
- sempy_labs/lakehouse/_get_lakehouse_tables.py +5 -3
- sempy_labs/lakehouse/_lakehouse.py +2 -1
- sempy_labs/lakehouse/_shortcuts.py +19 -12
- sempy_labs/migration/__init__.py +1 -1
- sempy_labs/migration/_create_pqt_file.py +21 -15
- sempy_labs/migration/_migrate_calctables_to_lakehouse.py +16 -13
- sempy_labs/migration/_migrate_calctables_to_semantic_model.py +17 -18
- sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +43 -40
- sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +14 -14
- sempy_labs/migration/_migration_validation.py +2 -2
- sempy_labs/migration/_refresh_calc_tables.py +8 -5
- sempy_labs/report/__init__.py +2 -2
- sempy_labs/report/_generate_report.py +10 -5
- sempy_labs/report/_report_functions.py +67 -29
- sempy_labs/report/_report_rebind.py +9 -8
- sempy_labs/tom/__init__.py +1 -4
- sempy_labs/tom/_model.py +555 -152
- semantic_link_labs-0.5.0.dist-info/RECORD +0 -53
- {semantic_link_labs-0.5.0.dist-info → semantic_link_labs-0.6.0.dist-info}/LICENSE +0 -0
- {semantic_link_labs-0.5.0.dist-info → semantic_link_labs-0.6.0.dist-info}/top_level.txt +0 -0
sempy_labs/_model_bpa.py
CHANGED
|
@@ -1,712 +1,32 @@
|
|
|
1
|
-
import sempy
|
|
2
1
|
import sempy.fabric as fabric
|
|
3
2
|
import pandas as pd
|
|
4
|
-
import
|
|
5
|
-
import
|
|
3
|
+
import warnings
|
|
4
|
+
import datetime
|
|
6
5
|
from IPython.display import display, HTML
|
|
7
6
|
from pyspark.sql import SparkSession
|
|
8
|
-
from sempy_labs._model_dependencies import
|
|
9
|
-
from sempy_labs._helper_functions import
|
|
7
|
+
from sempy_labs._model_dependencies import get_model_calc_dependencies
|
|
8
|
+
from sempy_labs._helper_functions import (
|
|
9
|
+
format_dax_object_name,
|
|
10
|
+
resolve_lakehouse_name,
|
|
11
|
+
create_relationship_name,
|
|
12
|
+
)
|
|
10
13
|
from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
|
|
11
14
|
from sempy_labs.lakehouse._lakehouse import lakehouse_attached
|
|
12
|
-
from
|
|
15
|
+
from sempy_labs.tom import connect_semantic_model
|
|
16
|
+
from sempy_labs._model_bpa_rules import model_bpa_rules
|
|
17
|
+
from typing import Optional
|
|
13
18
|
from sempy._utils._log import log
|
|
14
19
|
import sempy_labs._icons as icons
|
|
15
20
|
|
|
16
|
-
def model_bpa_rules():
|
|
17
|
-
"""
|
|
18
|
-
Shows the default rules for the semantic model BPA used by the run_model_bpa function.
|
|
19
|
-
|
|
20
|
-
Parameters
|
|
21
|
-
----------
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
Returns
|
|
25
|
-
-------
|
|
26
|
-
pandas.DataFrame
|
|
27
|
-
A pandas dataframe containing the default rules for the run_model_bpa function.
|
|
28
|
-
"""
|
|
29
|
-
|
|
30
|
-
df_rules = pd.DataFrame(
|
|
31
|
-
[
|
|
32
|
-
(
|
|
33
|
-
"Performance",
|
|
34
|
-
"Column",
|
|
35
|
-
"Warning",
|
|
36
|
-
"Do not use floating point data types",
|
|
37
|
-
lambda df: df["Data Type"] == "Double",
|
|
38
|
-
'The "Double" floating point data type should be avoided, as it can result in unpredictable roundoff errors and decreased performance in certain scenarios. Use "Int64" or "Decimal" where appropriate (but note that "Decimal" is limited to 4 digits after the decimal sign).',
|
|
39
|
-
),
|
|
40
|
-
(
|
|
41
|
-
"Performance",
|
|
42
|
-
"Column",
|
|
43
|
-
"Warning",
|
|
44
|
-
"Avoid using calculated columns",
|
|
45
|
-
lambda df: df["Type"] == "Calculated",
|
|
46
|
-
"Calculated columns do not compress as well as data columns so they take up more memory. They also slow down processing times for both the table as well as process recalc. Offload calculated column logic to your data warehouse and turn these calculated columns into data columns.",
|
|
47
|
-
"https://www.elegantbi.com/post/top10bestpractices",
|
|
48
|
-
),
|
|
49
|
-
(
|
|
50
|
-
"Performance",
|
|
51
|
-
"Relationship",
|
|
52
|
-
"Warning",
|
|
53
|
-
"Check if bi-directional and many-to-many relationships are valid",
|
|
54
|
-
lambda df: (df["Multiplicity"] == "m:m")
|
|
55
|
-
| (df["Cross Filtering Behavior"] == "BothDirections"),
|
|
56
|
-
"Bi-directional and many-to-many relationships may cause performance degradation or even have unintended consequences. Make sure to check these specific relationships to ensure they are working as designed and are actually necessary.",
|
|
57
|
-
"https://www.sqlbi.com/articles/bidirectional-relationships-and-ambiguity-in-dax",
|
|
58
|
-
),
|
|
59
|
-
(
|
|
60
|
-
"Performance",
|
|
61
|
-
"Row Level Security",
|
|
62
|
-
"Info",
|
|
63
|
-
"Check if dynamic row level security (RLS) is necessary",
|
|
64
|
-
lambda df: df["Is Dynamic"],
|
|
65
|
-
"Usage of dynamic row level security (RLS) can add memory and performance overhead. Please research the pros/cons of using it.",
|
|
66
|
-
"https://docs.microsoft.com/power-bi/admin/service-admin-rls",
|
|
67
|
-
),
|
|
68
|
-
(
|
|
69
|
-
"Performance",
|
|
70
|
-
"Table",
|
|
71
|
-
"Warning",
|
|
72
|
-
"Avoid using many-to-many relationships on tables used for dynamic row level security",
|
|
73
|
-
lambda df: (df["Used in M2M Relationship"] == True)
|
|
74
|
-
& (df["Used in Dynamic RLS"] == True),
|
|
75
|
-
"Using many-to-many relationships on tables which use dynamic row level security can cause serious query performance degradation. This pattern's performance problems compound when snowflaking multiple many-to-many relationships against a table which contains row level security. Instead, use one of the patterns shown in the article below where a single dimension table relates many-to-one to a security table.",
|
|
76
|
-
"https://www.elegantbi.com/post/dynamicrlspatterns",
|
|
77
|
-
),
|
|
78
|
-
(
|
|
79
|
-
"Performance",
|
|
80
|
-
"Relationship",
|
|
81
|
-
"Warning",
|
|
82
|
-
"Many-to-many relationships should be single-direction",
|
|
83
|
-
lambda df: (df["Multiplicity"] == "m:m")
|
|
84
|
-
& (df["Cross Filtering Behavior"] == "BothDirections"),
|
|
85
|
-
),
|
|
86
|
-
(
|
|
87
|
-
"Performance",
|
|
88
|
-
"Column",
|
|
89
|
-
"Warning",
|
|
90
|
-
"Set IsAvailableInMdx to false on non-attribute columns",
|
|
91
|
-
lambda df: (df["Is Direct Lake"] == False)
|
|
92
|
-
& (df["Is Available in MDX"] == True)
|
|
93
|
-
& ((df["Hidden"] == True) | (df["Parent Is Hidden"] == True))
|
|
94
|
-
& (df["Used in Sort By"] == False)
|
|
95
|
-
& (df["Used in Hierarchy"] == False)
|
|
96
|
-
& (df["Sort By Column"] == None),
|
|
97
|
-
"To speed up processing time and conserve memory after processing, attribute hierarchies should not be built for columns that are never used for slicing by MDX clients. In other words, all hidden columns that are not used as a Sort By Column or referenced in user hierarchies should have their IsAvailableInMdx property set to false. The IsAvailableInMdx property is not relevant for Direct Lake models.",
|
|
98
|
-
"https://blog.crossjoin.co.uk/2018/07/02/isavailableinmdx-ssas-tabular",
|
|
99
|
-
),
|
|
100
|
-
# ('Performance', 'Partition', 'Warning', "Set 'Data Coverage Definition' property on the DirectQuery partition of a hybrid table",
|
|
101
|
-
# lambda df: (df['Data Coverage Definition Expression'].isnull()) & (df['Mode'] == 'DirectQuery') & (df['Import Partitions'] > 0) & (df['Has Date Table']),
|
|
102
|
-
# "Setting the 'Data Coverage Definition' property may lead to better performance because the engine knows when it can only query the import-portion of the table and when it needs to query the DirectQuery portion of the table.",
|
|
103
|
-
# "https://learn.microsoft.com/analysis-services/tom/table-partitions?view=asallproducts-allversions",
|
|
104
|
-
# ),
|
|
105
|
-
(
|
|
106
|
-
"Performance",
|
|
107
|
-
"Table",
|
|
108
|
-
"Warning",
|
|
109
|
-
"Set dimensions tables to dual mode instead of import when using DirectQuery on fact tables",
|
|
110
|
-
lambda df: (df["Import Partitions"] == 1)
|
|
111
|
-
& (df["Model Has DQ"])
|
|
112
|
-
& (df["Used in Relationship x:1"]),
|
|
113
|
-
"https://learn.microsoft.com/power-bi/transform-model/desktop-storage-mode#propagation-of-the-dual-setting",
|
|
114
|
-
),
|
|
115
|
-
(
|
|
116
|
-
"Performance",
|
|
117
|
-
"Partition",
|
|
118
|
-
"Warning",
|
|
119
|
-
"Minimize Power Query transformations",
|
|
120
|
-
lambda df: (df["Source Type"] == "M")
|
|
121
|
-
& (
|
|
122
|
-
('Table.Combine("' in df["Query"])
|
|
123
|
-
| ('Table.Join("' in df["Query"])
|
|
124
|
-
| ('Table.NestedJoin("' in df["Query"])
|
|
125
|
-
| ('Table.AddColumn("' in df["Query"])
|
|
126
|
-
| ('Table.Group("' in df["Query"])
|
|
127
|
-
| ('Table.Sort("' in df["Query"])
|
|
128
|
-
| ('Table.Sort("' in df["Query"])
|
|
129
|
-
| ('Table.Pivot("' in df["Query"])
|
|
130
|
-
| ('Table.Unpivot("' in df["Query"])
|
|
131
|
-
| ('Table.UnpivotOtherColumns("' in df["Query"])
|
|
132
|
-
| ('Table.Distinct("' in df["Query"])
|
|
133
|
-
| ('[Query=(""SELECT' in df["Query"])
|
|
134
|
-
| ("Value.NativeQuery" in df["Query"])
|
|
135
|
-
| ("OleDb.Query" in df["Query"])
|
|
136
|
-
| ("Odbc.Query" in df["Query"])
|
|
137
|
-
),
|
|
138
|
-
"Minimize Power Query transformations in order to improve model processing performance. It is a best practice to offload these transformations to the data warehouse if possible. Also, please check whether query folding is occurring within your model. Please reference the article below for more information on query folding.",
|
|
139
|
-
"https://docs.microsoft.com/power-query/power-query-folding",
|
|
140
|
-
),
|
|
141
|
-
(
|
|
142
|
-
"Performance",
|
|
143
|
-
"Table",
|
|
144
|
-
"Warning",
|
|
145
|
-
"Consider a star-schema instead of a snowflake architecture",
|
|
146
|
-
lambda df: (df["Type"] != "Calculation Group")
|
|
147
|
-
& df["Used in Relationship Both Sides"],
|
|
148
|
-
"Generally speaking, a star-schema is the optimal architecture for tabular models. That being the case, there are valid cases to use a snowflake approach. Please check your model and consider moving to a star-schema architecture.",
|
|
149
|
-
"https://docs.microsoft.com/power-bi/guidance/star-schema",
|
|
150
|
-
),
|
|
151
|
-
(
|
|
152
|
-
"Performance",
|
|
153
|
-
"Table",
|
|
154
|
-
"Warning",
|
|
155
|
-
"Reduce usage of calculated tables",
|
|
156
|
-
lambda df: df["Type"] == "Calculated Table",
|
|
157
|
-
"Migrate calculated table logic to your data warehouse. Reliance on calculated tables will lead to technical debt and potential misalignments if you have multiple models on your platform.",
|
|
158
|
-
),
|
|
159
|
-
(
|
|
160
|
-
"Performance",
|
|
161
|
-
"Column",
|
|
162
|
-
"Warning",
|
|
163
|
-
"Reduce usage of calculated columns that use the RELATED function",
|
|
164
|
-
lambda df: (df["Type"] == "Calculated")
|
|
165
|
-
& (df["Source"].str.contains(r"related\s*\(", case=False)),
|
|
166
|
-
"Calculated columns do not compress as well as data columns and may cause longer processing times. As such, calculated columns should be avoided if possible. One scenario where they may be easier to avoid is if they use the RELATED function.",
|
|
167
|
-
"https://www.sqlbi.com/articles/storage-differences-between-calculated-columns-and-calculated-tables",
|
|
168
|
-
),
|
|
169
|
-
(
|
|
170
|
-
"Performance",
|
|
171
|
-
"Model",
|
|
172
|
-
"Warning",
|
|
173
|
-
"Avoid excessive bi-directional or many-to-many relationships",
|
|
174
|
-
lambda df: (
|
|
175
|
-
df["M2M or BiDi Relationship Count"] / df["Relationship Count"]
|
|
176
|
-
)
|
|
177
|
-
> 0.3,
|
|
178
|
-
"Limit use of b-di and many-to-many relationships. This rule flags the model if more than 30% of relationships are bi-di or many-to-many.",
|
|
179
|
-
"https://www.sqlbi.com/articles/bidirectional-relationships-and-ambiguity-in-dax",
|
|
180
|
-
),
|
|
181
|
-
(
|
|
182
|
-
"Performance",
|
|
183
|
-
"Column",
|
|
184
|
-
"Warning",
|
|
185
|
-
"Avoid bi-directional or many-to-many relationships against high-cardinality columns",
|
|
186
|
-
lambda df: df["Used in M2M/BiDi Relationship"]
|
|
187
|
-
& df["Column Cardinality"]
|
|
188
|
-
> 100000,
|
|
189
|
-
"For best performance, it is recommended to avoid using bi-directional relationships against high-cardinality columns",
|
|
190
|
-
),
|
|
191
|
-
(
|
|
192
|
-
"Performance",
|
|
193
|
-
"Table",
|
|
194
|
-
"Warning",
|
|
195
|
-
"Remove auto-date table",
|
|
196
|
-
lambda df: (df["Type"] == "Calculated Table")
|
|
197
|
-
& (
|
|
198
|
-
(df["Name"].str.startswith("DateTableTemplate_"))
|
|
199
|
-
| (df["Name"].str.startswith("LocalDateTable_"))
|
|
200
|
-
),
|
|
201
|
-
"Avoid using auto-date tables. Make sure to turn off auto-date table in the settings in Power BI Desktop. This will save memory resources.",
|
|
202
|
-
"https://www.youtube.com/watch?v=xu3uDEHtCrg",
|
|
203
|
-
),
|
|
204
|
-
(
|
|
205
|
-
"Performance",
|
|
206
|
-
"Table",
|
|
207
|
-
"Warning",
|
|
208
|
-
"Date/calendar tables should be marked as a date table",
|
|
209
|
-
lambda df: (
|
|
210
|
-
(df["Name"].str.contains(r"date", case=False))
|
|
211
|
-
| (df["Name"].str.contains(r"calendar", case=False))
|
|
212
|
-
)
|
|
213
|
-
& (df["Data Category"] != "Time"),
|
|
214
|
-
"This rule looks for tables that contain the words 'date' or 'calendar' as they should likely be marked as a date table.",
|
|
215
|
-
"https://docs.microsoft.com/power-bi/transform-model/desktop-date-tables",
|
|
216
|
-
),
|
|
217
|
-
(
|
|
218
|
-
"Performance",
|
|
219
|
-
"Table",
|
|
220
|
-
"Warning",
|
|
221
|
-
"Large tables should be partitioned",
|
|
222
|
-
lambda df: (df["Is Direct Lake"] == False)
|
|
223
|
-
& (df["Partition Count"] == 1)
|
|
224
|
-
& (df["Row Count"] > 25000000),
|
|
225
|
-
"Large tables should be partitioned in order to optimize processing. This is not relevant for semantic models in Direct Lake mode as they can only have one partition per table.",
|
|
226
|
-
),
|
|
227
|
-
(
|
|
228
|
-
"Performance",
|
|
229
|
-
"Row Level Security",
|
|
230
|
-
"Warning",
|
|
231
|
-
"Limit row level security (RLS) logic",
|
|
232
|
-
lambda df: df["Filter Expression"].str.contains(
|
|
233
|
-
"|".join(["right", "left", "filter", "upper", "lower", "find"]),
|
|
234
|
-
case=False,
|
|
235
|
-
),
|
|
236
|
-
"Try to simplify the DAX used for row level security. Usage of the functions within this rule can likely be offloaded to the upstream systems (data warehouse).",
|
|
237
|
-
),
|
|
238
|
-
(
|
|
239
|
-
"Performance",
|
|
240
|
-
"Model",
|
|
241
|
-
"Warning",
|
|
242
|
-
"Model should have a date table",
|
|
243
|
-
lambda df: df["Has Date Table"],
|
|
244
|
-
"Generally speaking, models should generally have a date table. Models that do not have a date table generally are not taking advantage of features such as time intelligence or may not have a properly structured architecture.",
|
|
245
|
-
),
|
|
246
|
-
(
|
|
247
|
-
"Performance",
|
|
248
|
-
"Measure",
|
|
249
|
-
"Warning",
|
|
250
|
-
"Measures using time intelligence and model is using Direct Query",
|
|
251
|
-
lambda df: df["DQ Date Function Used"],
|
|
252
|
-
"At present, time intelligence functions are known to not perform as well when using Direct Query. If you are having performance issues, you may want to try alternative solutions such as adding columns in the fact table that show previous year or previous month data.",
|
|
253
|
-
),
|
|
254
|
-
(
|
|
255
|
-
"Error Prevention",
|
|
256
|
-
"Calculation Item",
|
|
257
|
-
"Error",
|
|
258
|
-
"Calculation items must have an expression",
|
|
259
|
-
lambda df: df["Expression"].str.len() == 0,
|
|
260
|
-
"Calculation items must have an expression. Without an expression, they will not show any values.",
|
|
261
|
-
),
|
|
262
|
-
(
|
|
263
|
-
"Error Prevention",
|
|
264
|
-
["Table", "Column", "Measure", "Hierarchy", "Partition"],
|
|
265
|
-
"Error",
|
|
266
|
-
"Avoid invalid characters in names",
|
|
267
|
-
lambda df: df["Name"].apply(
|
|
268
|
-
lambda x: any(
|
|
269
|
-
unicodedata.category(char) == "Cc" and not char.isspace()
|
|
270
|
-
for char in x
|
|
271
|
-
)
|
|
272
|
-
),
|
|
273
|
-
"This rule identifies if a name for a given object in your model (i.e. table/column/measure) which contains an invalid character. Invalid characters will cause an error when deploying the model (and failure to deploy). This rule has a fix expression which converts the invalid character into a space, resolving the issue.",
|
|
274
|
-
),
|
|
275
|
-
(
|
|
276
|
-
"Error Prevention",
|
|
277
|
-
["Table", "Column", "Measure", "Hierarchy"],
|
|
278
|
-
"Error",
|
|
279
|
-
"Avoid invalid characters in descriptions",
|
|
280
|
-
lambda df: df["Description"].apply(
|
|
281
|
-
lambda x: any(
|
|
282
|
-
unicodedata.category(char) == "Cc" and not char.isspace()
|
|
283
|
-
for char in x
|
|
284
|
-
)
|
|
285
|
-
),
|
|
286
|
-
"This rule identifies if a description for a given object in your model (i.e. table/column/measure) which contains an invalid character. Invalid characters will cause an error when deploying the model (and failure to deploy). This rule has a fix expression which converts the invalid character into a space, resolving the issue.",
|
|
287
|
-
),
|
|
288
|
-
(
|
|
289
|
-
"Error Prevention",
|
|
290
|
-
"Relationship",
|
|
291
|
-
"Warning",
|
|
292
|
-
"Relationship columns should be of the same data type",
|
|
293
|
-
lambda df: df["From Column Data Type"] != df["To Column Data Type"],
|
|
294
|
-
"Columns used in a relationship should be of the same data type. Ideally, they will be of integer data type (see the related rule '[Formatting] Relationship columns should be of integer data type'). Having columns within a relationship which are of different data types may lead to various issues.",
|
|
295
|
-
),
|
|
296
|
-
(
|
|
297
|
-
"Error Prevention",
|
|
298
|
-
"Column",
|
|
299
|
-
"Error",
|
|
300
|
-
"Data columns must have a source column",
|
|
301
|
-
lambda df: (df["Type"] == "Data") & (df["Source"].str.len() == 0),
|
|
302
|
-
"Data columns must have a source column. A data column without a source column will cause an error when processing the model.",
|
|
303
|
-
),
|
|
304
|
-
(
|
|
305
|
-
"Error Prevention",
|
|
306
|
-
"Column",
|
|
307
|
-
"Warning",
|
|
308
|
-
"Set IsAvailableInMdx to true on necessary columns",
|
|
309
|
-
lambda df: (df["Is Direct Lake"] == False)
|
|
310
|
-
& (df["Is Available in MDX"] == False)
|
|
311
|
-
& (
|
|
312
|
-
(df["Used in Sort By"] == True)
|
|
313
|
-
| (df["Used in Hierarchy"] == True)
|
|
314
|
-
| (df["Sort By Column"] != None)
|
|
315
|
-
),
|
|
316
|
-
"In order to avoid errors, ensure that attribute hierarchies are enabled if a column is used for sorting another column, used in a hierarchy, used in variations, or is sorted by another column. The IsAvailableInMdx property is not relevant for Direct Lake models.",
|
|
317
|
-
),
|
|
318
|
-
(
|
|
319
|
-
"Error Prevention",
|
|
320
|
-
"Table",
|
|
321
|
-
"Error",
|
|
322
|
-
"Avoid the USERELATIONSHIP function and RLS against the same table",
|
|
323
|
-
lambda df: (df["USERELATIONSHIP Used"] == True)
|
|
324
|
-
& (df["Used in RLS"] == True),
|
|
325
|
-
"The USERELATIONSHIP function may not be used against a table which also leverages row-level security (RLS). This will generate an error when using the particular measure in a visual. This rule will highlight the table which is used in a measure's USERELATIONSHIP function as well as RLS.",
|
|
326
|
-
"https://blog.crossjoin.co.uk/2013/05/10/userelationship-and-tabular-row-security",
|
|
327
|
-
),
|
|
328
|
-
(
|
|
329
|
-
"DAX Expressions",
|
|
330
|
-
"Measure",
|
|
331
|
-
"Warning",
|
|
332
|
-
"Avoid using the IFERROR function",
|
|
333
|
-
lambda df: df["Measure Expression"].str.contains(
|
|
334
|
-
r"irerror\s*\(", case=False
|
|
335
|
-
),
|
|
336
|
-
"Avoid using the IFERROR function as it may cause performance degradation. If you are concerned about a divide-by-zero error, use the DIVIDE function as it naturally resolves such errors as blank (or you can customize what should be shown in case of such an error).",
|
|
337
|
-
"https://www.elegantbi.com/post/top10bestpractices",
|
|
338
|
-
),
|
|
339
|
-
(
|
|
340
|
-
"DAX Expressions",
|
|
341
|
-
"Measure",
|
|
342
|
-
"Warning",
|
|
343
|
-
"Use the TREATAS function instead of INTERSECT for virtual relationships",
|
|
344
|
-
lambda df: df["Measure Expression"].str.contains(
|
|
345
|
-
r"intersect\s*\(", case=False
|
|
346
|
-
),
|
|
347
|
-
"The TREATAS function is more efficient and provides better performance than the INTERSECT function when used in virutal relationships.",
|
|
348
|
-
"https://www.sqlbi.com/articles/propagate-filters-using-treatas-in-dax",
|
|
349
|
-
),
|
|
350
|
-
(
|
|
351
|
-
"DAX Expressions",
|
|
352
|
-
"Measure",
|
|
353
|
-
"Warning",
|
|
354
|
-
"The EVALUATEANDLOG function should not be used in production models",
|
|
355
|
-
lambda df: df["Measure Expression"].str.contains(
|
|
356
|
-
r"evaluateandlog\s*\(", case=False
|
|
357
|
-
),
|
|
358
|
-
"The EVALUATEANDLOG function is meant to be used only in development/test environments and should not be used in production models.",
|
|
359
|
-
"https://pbidax.wordpress.com/2022/08/16/introduce-the-dax-evaluateandlog-function",
|
|
360
|
-
),
|
|
361
|
-
(
|
|
362
|
-
"DAX Expressions",
|
|
363
|
-
"Measure",
|
|
364
|
-
"Warning",
|
|
365
|
-
"Measures should not be direct references of other measures",
|
|
366
|
-
lambda df: df["Measure Expression"]
|
|
367
|
-
.str.strip()
|
|
368
|
-
.isin(df["Measure Object"]),
|
|
369
|
-
"This rule identifies measures which are simply a reference to another measure. As an example, consider a model with two measures: [MeasureA] and [MeasureB]. This rule would be triggered for MeasureB if MeasureB's DAX was MeasureB:=[MeasureA]. Such duplicative measures should be removed.",
|
|
370
|
-
),
|
|
371
|
-
(
|
|
372
|
-
"DAX Expressions",
|
|
373
|
-
"Measure",
|
|
374
|
-
"Warning",
|
|
375
|
-
"No two measures should have the same definition",
|
|
376
|
-
lambda df: df["Measure Expression"]
|
|
377
|
-
.apply(lambda x: re.sub(r"\s+", "", x))
|
|
378
|
-
.duplicated(keep=False),
|
|
379
|
-
"Two measures with different names and defined by the same DAX expression should be avoided to reduce redundancy.",
|
|
380
|
-
),
|
|
381
|
-
(
|
|
382
|
-
"DAX Expressions",
|
|
383
|
-
"Measure",
|
|
384
|
-
"Warning",
|
|
385
|
-
"Avoid addition or subtraction of constant values to results of divisions",
|
|
386
|
-
lambda df: df["Measure Expression"].str.contains(
|
|
387
|
-
"(?i)DIVIDE\\s*\\((\\s*.*?)\\)\\s*[+-]\\s*1"
|
|
388
|
-
or "\\/\\s*.*(?=[-+]\\s*1)",
|
|
389
|
-
regex=True,
|
|
390
|
-
),
|
|
391
|
-
),
|
|
392
|
-
(
|
|
393
|
-
"DAX Expressions",
|
|
394
|
-
"Measure",
|
|
395
|
-
"Warning",
|
|
396
|
-
"Avoid using '1-(x/y)' syntax",
|
|
397
|
-
lambda df: df["Measure Expression"].str.contains(
|
|
398
|
-
"[0-9]+\\s*[-+]\\s*[\\(]*\\s*(?i)SUM\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*\\[[A-Za-z0-9 _]+\\]\\s*\\)\\s*\\/"
|
|
399
|
-
or "[0-9]+\\s*[-+]\\s*(?i)DIVIDE\\s*\\(",
|
|
400
|
-
regex=True,
|
|
401
|
-
),
|
|
402
|
-
"Instead of using the '1-(x/y)' or '1+(x/y)' syntax to achieve a percentage calculation, use the basic DAX functions (as shown below). Using the improved syntax will generally improve the performance. The '1+/-...' syntax always returns a value whereas the solution without the '1+/-...' does not (as the value may be 'blank'). Therefore the '1+/-...' syntax may return more rows/columns which may result in a slower query speed. Let's clarify with an example: Avoid this: 1 - SUM ( 'Sales'[CostAmount] ) / SUM( 'Sales'[SalesAmount] ) Better: DIVIDE ( SUM ( 'Sales'[SalesAmount] ) - SUM ( 'Sales'[CostAmount] ), SUM ( 'Sales'[SalesAmount] ) ) Best: VAR x = SUM ( 'Sales'[SalesAmount] ) RETURN DIVIDE ( x - SUM ( 'Sales'[CostAmount] ), x )",
|
|
403
|
-
),
|
|
404
|
-
(
|
|
405
|
-
"DAX Expressions",
|
|
406
|
-
"Measure",
|
|
407
|
-
"Warning",
|
|
408
|
-
"Filter measure values by columns, not tables",
|
|
409
|
-
lambda df: df["Measure Expression"].str.contains(
|
|
410
|
-
"(?i)CALCULATE\\s*\\(\\s*[^,]+,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*\\[[^\\]]+\\]"
|
|
411
|
-
or "(?i)CALCULATETABLE\\s*\\([^,]*,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*\\[",
|
|
412
|
-
regex=True,
|
|
413
|
-
),
|
|
414
|
-
"Instead of using this pattern FILTER('Table',[Measure]>Value) for the filter parameters of a CALCULATE or CALCULATETABLE function, use one of the options below (if possible). Filtering on a specific column will produce a smaller table for the engine to process, thereby enabling faster performance. Using the VALUES function or the ALL function depends on the desired measure result.\nOption 1: FILTER(VALUES('Table'[Column]),[Measure] > Value)\nOption 2: FILTER(ALL('Table'[Column]),[Measure] > Value)",
|
|
415
|
-
"https://docs.microsoft.com/power-bi/guidance/dax-avoid-avoid-filter-as-filter-argument",
|
|
416
|
-
),
|
|
417
|
-
(
|
|
418
|
-
"DAX Expressions",
|
|
419
|
-
"Measure",
|
|
420
|
-
"Warning",
|
|
421
|
-
"Filter column values with proper syntax",
|
|
422
|
-
lambda df: df["Measure Expression"].str.contains(
|
|
423
|
-
"(?i)CALCULATE\\s*\\(\\s*[^,]+,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*'*[A-Za-z0-9 _]+'*\\[[A-Za-z0-9 _]+\\]"
|
|
424
|
-
or "(?i)CALCULATETABLE\\s*\\([^,]*,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*'*[A-Za-z0-9 _]+'*\\[[A-Za-z0-9 _]+\\]",
|
|
425
|
-
regex=True,
|
|
426
|
-
),
|
|
427
|
-
"Instead of using this pattern FILTER('Table','Table'[Column]=\"Value\") for the filter parameters of a CALCULATE or CALCULATETABLE function, use one of the options below. As far as whether to use the KEEPFILTERS function, see the second reference link below.\nOption 1: KEEPFILTERS('Table'[Column]=\"Value\")\nOption 2: 'Table'[Column]=\"Value\"",
|
|
428
|
-
"https://docs.microsoft.com/power-bi/guidance/dax-avoid-avoid-filter-as-filter-argument Reference: https://www.sqlbi.com/articles/using-keepfilters-in-dax",
|
|
429
|
-
),
|
|
430
|
-
(
|
|
431
|
-
"DAX Expressions",
|
|
432
|
-
"Measure",
|
|
433
|
-
"Warning",
|
|
434
|
-
"Use the DIVIDE function for division",
|
|
435
|
-
lambda df: df["Measure Expression"].str.contains(
|
|
436
|
-
'\\]\\s*\\/(?!\\/)(?!\\*)" or "\\)\\s*\\/(?!\\/)(?!\\*)', regex=True
|
|
437
|
-
),
|
|
438
|
-
'Use the DIVIDE function instead of using "/". The DIVIDE function resolves divide-by-zero cases. As such, it is recommended to use to avoid errors.',
|
|
439
|
-
"https://docs.microsoft.com/power-bi/guidance/dax-divide-function-operator",
|
|
440
|
-
),
|
|
441
|
-
(
|
|
442
|
-
"DAX Expressions",
|
|
443
|
-
"Measure",
|
|
444
|
-
"Error",
|
|
445
|
-
"Column references should be fully qualified",
|
|
446
|
-
lambda df: df["Has Unqualified Column Reference"],
|
|
447
|
-
"Using fully qualified column references makes it easier to distinguish between column and measure references, and also helps avoid certain errors. When referencing a column in DAX, first specify the table name, then specify the column name in square brackets.",
|
|
448
|
-
"https://www.elegantbi.com/post/top10bestpractices",
|
|
449
|
-
),
|
|
450
|
-
(
|
|
451
|
-
"DAX Expressions",
|
|
452
|
-
"Measure",
|
|
453
|
-
"Error",
|
|
454
|
-
"Measure references should be unqualified",
|
|
455
|
-
lambda df: df["Has Fully Qualified Measure Reference"],
|
|
456
|
-
"Using unqualified measure references makes it easier to distinguish between column and measure references, and also helps avoid certain errors. When referencing a measure using DAX, do not specify the table name. Use only the measure name in square brackets.",
|
|
457
|
-
"https://www.elegantbi.com/post/top10bestpractices",
|
|
458
|
-
),
|
|
459
|
-
(
|
|
460
|
-
"DAX Expressions",
|
|
461
|
-
"Relationship",
|
|
462
|
-
"Warning",
|
|
463
|
-
"Inactive relationships that are never activated",
|
|
464
|
-
lambda df: df["Inactive without USERELATIONSHIP"],
|
|
465
|
-
"Inactive relationships are activated using the USERELATIONSHIP function. If an inactive relationship is not referenced in any measure via this function, the relationship will not be used. It should be determined whether the relationship is not necessary or to activate the relationship via this method.",
|
|
466
|
-
"https://dax.guide/userelationship",
|
|
467
|
-
),
|
|
468
|
-
(
|
|
469
|
-
"Maintenance",
|
|
470
|
-
"Column",
|
|
471
|
-
"Warning",
|
|
472
|
-
"Remove unnecessary columns",
|
|
473
|
-
lambda df: (df["Hidden"] | df["Parent Is Hidden"])
|
|
474
|
-
& ~df["Used in Relationship"]
|
|
475
|
-
& ~df["Used in Sort By"]
|
|
476
|
-
& ~df["Used in Hierarchy"]
|
|
477
|
-
& (df["Referenced By"] == 0)
|
|
478
|
-
& ~(df["Used in RLS"]), # usedInOLS
|
|
479
|
-
"Hidden columns that are not referenced by any DAX expressions, relationships, hierarchy levels or Sort By-properties should be removed.",
|
|
480
|
-
),
|
|
481
|
-
(
|
|
482
|
-
"Maintenance",
|
|
483
|
-
"Measure",
|
|
484
|
-
"Warning",
|
|
485
|
-
"Remove unnecessary measures",
|
|
486
|
-
lambda df: df["Measure Hidden"] & (df["Referenced By"] == 0),
|
|
487
|
-
"Hidden measures that are not referenced by any DAX expressions should be removed for maintainability.",
|
|
488
|
-
),
|
|
489
|
-
# ('Maintenance', 'Role', 'Warning', 'Remove roles with no members',
|
|
490
|
-
# lambda df: df['Member Count'] == 0,
|
|
491
|
-
# ),
|
|
492
|
-
(
|
|
493
|
-
"Maintenance",
|
|
494
|
-
"Table",
|
|
495
|
-
"Warning",
|
|
496
|
-
"Ensure tables have relationships",
|
|
497
|
-
lambda df: (df["Used in Relationship"] == False)
|
|
498
|
-
& (df["Type"] != "Calculation Group"),
|
|
499
|
-
"This rule highlights tables which are not connected to any other table in the model with a relationship.",
|
|
500
|
-
),
|
|
501
|
-
(
|
|
502
|
-
"Maintenance",
|
|
503
|
-
"Table",
|
|
504
|
-
"Warning",
|
|
505
|
-
"Calculation groups with no calculation items",
|
|
506
|
-
lambda df: (df["Type"] == "Calculation Group")
|
|
507
|
-
& (df["Has Calculation Items"]),
|
|
508
|
-
),
|
|
509
|
-
(
|
|
510
|
-
"Maintenance",
|
|
511
|
-
"Column",
|
|
512
|
-
"Info",
|
|
513
|
-
"Visible objects with no description",
|
|
514
|
-
lambda df: (df["Hidden"] == False) & (df["Description"].str.len() == 0),
|
|
515
|
-
"Calculation groups have no function unless they have calculation items.",
|
|
516
|
-
),
|
|
517
|
-
(
|
|
518
|
-
"Formatting",
|
|
519
|
-
"Column",
|
|
520
|
-
"Warning",
|
|
521
|
-
"Provide format string for 'Date' columns",
|
|
522
|
-
lambda df: (df["Column Name"].str.contains(r"date", case=False))
|
|
523
|
-
& (df["Data Type"] == "DateTime")
|
|
524
|
-
& (df["Format String"] != "mm/dd/yyyy"),
|
|
525
|
-
'Columns of type "DateTime" that have "Month" in their names should be formatted as "mm/dd/yyyy".',
|
|
526
|
-
),
|
|
527
|
-
(
|
|
528
|
-
"Formatting",
|
|
529
|
-
"Column",
|
|
530
|
-
"Warning",
|
|
531
|
-
"Do not summarize numeric columns",
|
|
532
|
-
lambda df: (
|
|
533
|
-
(df["Data Type"] == "Int64")
|
|
534
|
-
| (df["Data Type"] == "Decimal")
|
|
535
|
-
| (df["Data Type"] == "Double")
|
|
536
|
-
)
|
|
537
|
-
& (df["Summarize By"] != "None")
|
|
538
|
-
& ~((df["Hidden"]) | (df["Parent Is Hidden"])),
|
|
539
|
-
'Numeric columns (integer, decimal, double) should have their SummarizeBy property set to "None" to avoid accidental summation in Power BI (create measures instead).',
|
|
540
|
-
),
|
|
541
|
-
(
|
|
542
|
-
"Formatting",
|
|
543
|
-
"Measure",
|
|
544
|
-
"Info",
|
|
545
|
-
"Provide format string for measures",
|
|
546
|
-
lambda df: ~((df["Measure Hidden"]) | (df["Parent Is Hidden"]))
|
|
547
|
-
& (df["Format String"].str.len() == 0),
|
|
548
|
-
"Visible measures should have their format string property assigned.",
|
|
549
|
-
),
|
|
550
|
-
(
|
|
551
|
-
"Formatting",
|
|
552
|
-
"Column",
|
|
553
|
-
"Info",
|
|
554
|
-
"Add data category for columns",
|
|
555
|
-
lambda df: (df["Data Category"] == "")
|
|
556
|
-
& (
|
|
557
|
-
(
|
|
558
|
-
(
|
|
559
|
-
(df["Column Name"].str.contains(r"country", case=False))
|
|
560
|
-
| (df["Column Name"].str.contains(r"city", case=False))
|
|
561
|
-
| (df["Column Name"].str.contains(r"continent", case=False))
|
|
562
|
-
)
|
|
563
|
-
& (df["Data Type"] == "String")
|
|
564
|
-
)
|
|
565
|
-
| (
|
|
566
|
-
(
|
|
567
|
-
(df["Column Name"].str.contains(r"latitude", case=False))
|
|
568
|
-
| (df["Column Name"].str.contains(r"longitude", case=False))
|
|
569
|
-
)
|
|
570
|
-
& (df["Data Type"] == "String")
|
|
571
|
-
)
|
|
572
|
-
),
|
|
573
|
-
"Add Data Category property for appropriate columns.",
|
|
574
|
-
"https://docs.microsoft.com/power-bi/transform-model/desktop-data-categorization",
|
|
575
|
-
),
|
|
576
|
-
(
|
|
577
|
-
"Formatting",
|
|
578
|
-
"Measure",
|
|
579
|
-
"Warning",
|
|
580
|
-
"Percentages should be formatted with thousands separators and 1 decimal",
|
|
581
|
-
lambda df: (df["Format String"].str.contains("%"))
|
|
582
|
-
& (df["Format String"] != "#,0.0%;-#,0.0%;#,0.0%"),
|
|
583
|
-
),
|
|
584
|
-
(
|
|
585
|
-
"Formatting",
|
|
586
|
-
"Measure",
|
|
587
|
-
"Warning",
|
|
588
|
-
"Whole numbers should be formatted with thousands separators and no decimals",
|
|
589
|
-
lambda df: (~df["Format String"].str.contains("$"))
|
|
590
|
-
& ~(df["Format String"].str.contains("%"))
|
|
591
|
-
& ~((df["Format String"] == "#,0") | (df["Format String"] == "#,0.0")),
|
|
592
|
-
),
|
|
593
|
-
(
|
|
594
|
-
"Formatting",
|
|
595
|
-
"Column",
|
|
596
|
-
"Info",
|
|
597
|
-
"Hide foreign keys",
|
|
598
|
-
lambda df: (df["Foreign Key"]) & (df["Hidden"] == False),
|
|
599
|
-
"Foreign keys should always be hidden.",
|
|
600
|
-
),
|
|
601
|
-
(
|
|
602
|
-
"Formatting",
|
|
603
|
-
"Column",
|
|
604
|
-
"Info",
|
|
605
|
-
"Mark primary keys",
|
|
606
|
-
lambda df: (df["Primary Key"]) & (df["Key"] == False),
|
|
607
|
-
"Set the 'Key' property to 'True' for primary key columns within the column properties.",
|
|
608
|
-
),
|
|
609
|
-
(
|
|
610
|
-
"Formatting",
|
|
611
|
-
"Column",
|
|
612
|
-
"Info",
|
|
613
|
-
"Month (as a string) must be sorted",
|
|
614
|
-
lambda df: (df["Column Name"].str.contains(r"month", case=False))
|
|
615
|
-
& ~(df["Column Name"].str.contains(r"months", case=False))
|
|
616
|
-
& (df["Data Type"] == "String")
|
|
617
|
-
& (df["Sort By Column"] == ""),
|
|
618
|
-
"This rule highlights month columns which are strings and are not sorted. If left unsorted, they will sort alphabetically (i.e. April, August...). Make sure to sort such columns so that they sort properly (January, February, March...).",
|
|
619
|
-
),
|
|
620
|
-
(
|
|
621
|
-
"Formatting",
|
|
622
|
-
"Relationship",
|
|
623
|
-
"Warning",
|
|
624
|
-
"Relationship columns should be of integer data type",
|
|
625
|
-
lambda df: (df["From Column Data Type"] != "Int64")
|
|
626
|
-
| (df["To Column Data Type"] != "Int64"),
|
|
627
|
-
"It is a best practice for relationship columns to be of integer data type. This applies not only to data warehousing but data modeling as well.",
|
|
628
|
-
),
|
|
629
|
-
(
|
|
630
|
-
"Formatting",
|
|
631
|
-
"Column",
|
|
632
|
-
"Warning",
|
|
633
|
-
'Provide format string for "Month" columns',
|
|
634
|
-
lambda df: (df["Column Name"].str.contains(r"month", case=False))
|
|
635
|
-
& (df["Data Type"] == "DateTime")
|
|
636
|
-
& (df["Format String"] != "MMMM yyyy"),
|
|
637
|
-
'Columns of type "DateTime" that have "Month" in their names should be formatted as "MMMM yyyy".',
|
|
638
|
-
),
|
|
639
|
-
(
|
|
640
|
-
"Formatting",
|
|
641
|
-
"Column",
|
|
642
|
-
"Info",
|
|
643
|
-
"Format flag columns as Yes/No value strings",
|
|
644
|
-
lambda df: (
|
|
645
|
-
df["Column Name"].str.startswith("Is")
|
|
646
|
-
& (df["Data Type"] == "Int64")
|
|
647
|
-
& ~(df["Hidden"] | df["Parent Is Hidden"])
|
|
648
|
-
)
|
|
649
|
-
| (
|
|
650
|
-
df["Column Name"].str.endswith(" Flag")
|
|
651
|
-
& (df["Data Type"] != "String")
|
|
652
|
-
& ~(df["Hidden"] | df["Parent Is Hidden"])
|
|
653
|
-
),
|
|
654
|
-
"Flags must be properly formatted as Yes/No as this is easier to read than using 0/1 integer values.",
|
|
655
|
-
),
|
|
656
|
-
# ('Formatting', ['Table', 'Column', 'Measure', 'Partition', 'Hierarchy'], 'Error', 'Objects should not start or end with a space',
|
|
657
|
-
# lambda df: (df['Name'].str[0] == ' ') | (df['Name'].str[-1] == ' '),
|
|
658
|
-
# 'Objects should not start or end with a space. This usually happens by accident and is difficult to find.',
|
|
659
|
-
# ),
|
|
660
|
-
(
|
|
661
|
-
"Formatting",
|
|
662
|
-
["Table", "Column", "Measure", "Partition", "Hierarchy"],
|
|
663
|
-
"Info",
|
|
664
|
-
"First letter of objects must be capitalized",
|
|
665
|
-
lambda df: df["Name"].str[0].str.upper() != df["Name"].str[0],
|
|
666
|
-
"The first letter of object names should be capitalized to maintain professional quality.",
|
|
667
|
-
),
|
|
668
|
-
(
|
|
669
|
-
"Naming Conventions",
|
|
670
|
-
["Table", "Column", "Measure", "Partition", "Hierarchy"],
|
|
671
|
-
"Warning",
|
|
672
|
-
"Object names must not contain special characters",
|
|
673
|
-
lambda df: df["Name"].str.contains(r"[\t\r\n]"),
|
|
674
|
-
"Object names should not include tabs, line breaks, etc.",
|
|
675
|
-
), # ,
|
|
676
|
-
# ('Error Prevention', ['Table'], 'Error', 'Avoid invalid characters in names',
|
|
677
|
-
# lambda df: df['Name'].str.char.iscontrol() & ~ df['Name'].str.char.isspace(),
|
|
678
|
-
# )#,
|
|
679
|
-
],
|
|
680
|
-
columns=[
|
|
681
|
-
"Category",
|
|
682
|
-
"Scope",
|
|
683
|
-
"Severity",
|
|
684
|
-
"Rule Name",
|
|
685
|
-
"Expression",
|
|
686
|
-
"Description",
|
|
687
|
-
"URL",
|
|
688
|
-
],
|
|
689
|
-
)
|
|
690
|
-
|
|
691
|
-
df_rules["Severity"] = (
|
|
692
|
-
df_rules["Severity"]
|
|
693
|
-
.replace("Warning", "⚠️")
|
|
694
|
-
.replace("Error", "\u274C")
|
|
695
|
-
.replace("Info", "ℹ️")
|
|
696
|
-
)
|
|
697
|
-
|
|
698
|
-
pd.set_option("display.max_colwidth", 1000)
|
|
699
|
-
|
|
700
|
-
return df_rules
|
|
701
|
-
|
|
702
21
|
|
|
703
22
|
@log
|
|
704
23
|
def run_model_bpa(
|
|
705
24
|
dataset: str,
|
|
706
|
-
|
|
25
|
+
rules: Optional[pd.DataFrame] = None,
|
|
707
26
|
workspace: Optional[str] = None,
|
|
708
27
|
export: Optional[bool] = False,
|
|
709
28
|
return_dataframe: Optional[bool] = False,
|
|
29
|
+
extended: Optional[bool] = False,
|
|
710
30
|
**kwargs,
|
|
711
31
|
):
|
|
712
32
|
"""
|
|
@@ -716,7 +36,7 @@ def run_model_bpa(
|
|
|
716
36
|
----------
|
|
717
37
|
dataset : str
|
|
718
38
|
Name of the semantic model.
|
|
719
|
-
|
|
39
|
+
rules : pandas.DataFrame, default=None
|
|
720
40
|
A pandas dataframe containing rules to be evaluated.
|
|
721
41
|
workspace : str, default=None
|
|
722
42
|
The Fabric workspace name.
|
|
@@ -726,6 +46,8 @@ def run_model_bpa(
|
|
|
726
46
|
If True, exports the resulting dataframe to a delta table in the lakehouse attached to the notebook.
|
|
727
47
|
return_dataframe : bool, default=False
|
|
728
48
|
If True, returns a pandas dataframe instead of the visualization.
|
|
49
|
+
extended : bool, default=False
|
|
50
|
+
If True, runs the set_vertipaq_annotations function to collect Vertipaq Analyzer statistics to be used in the analysis of the semantic model.
|
|
729
51
|
|
|
730
52
|
Returns
|
|
731
53
|
-------
|
|
@@ -746,443 +68,134 @@ def run_model_bpa(
|
|
|
746
68
|
|
|
747
69
|
workspace = fabric.resolve_workspace_name(workspace)
|
|
748
70
|
|
|
749
|
-
if
|
|
750
|
-
|
|
71
|
+
if extended:
|
|
72
|
+
with connect_semantic_model(
|
|
73
|
+
dataset=dataset, workspace=workspace, readonly=False
|
|
74
|
+
) as tom:
|
|
75
|
+
tom.set_vertipaq_annotations()
|
|
751
76
|
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
dataset=dataset,
|
|
756
|
-
workspace=workspace,
|
|
757
|
-
extended=True,
|
|
758
|
-
additional_xmla_properties=["Parent.DataCategory", "Parent.IsHidden"],
|
|
759
|
-
)
|
|
760
|
-
dfC = dfC[~dfC["Column Name"].str.startswith("RowNumber-")]
|
|
77
|
+
with connect_semantic_model(
|
|
78
|
+
dataset=dataset, workspace=workspace, readonly=True
|
|
79
|
+
) as tom:
|
|
761
80
|
|
|
762
|
-
|
|
763
|
-
dataset=dataset,
|
|
764
|
-
workspace=workspace,
|
|
765
|
-
additional_xmla_properties=["Parent.IsHidden"],
|
|
766
|
-
)
|
|
767
|
-
dfR = fabric.list_relationships(
|
|
768
|
-
dataset=dataset,
|
|
769
|
-
workspace=workspace,
|
|
770
|
-
additional_xmla_properties=["FromCardinality", "ToCardinality"],
|
|
771
|
-
)
|
|
772
|
-
dfP = fabric.list_partitions(
|
|
773
|
-
dataset=dataset,
|
|
774
|
-
workspace=workspace,
|
|
775
|
-
additional_xmla_properties=["DataCoverageDefinition.Expression"],
|
|
776
|
-
)
|
|
777
|
-
dfH = fabric.list_hierarchies(dataset=dataset, workspace=workspace)
|
|
778
|
-
dfRole = fabric.get_roles(dataset=dataset, workspace=workspace)
|
|
779
|
-
dfRM = fabric.get_roles(dataset=dataset, workspace=workspace, include_members=True)
|
|
780
|
-
dfRLS = fabric.get_row_level_security_permissions(
|
|
781
|
-
dataset=dataset, workspace=workspace
|
|
782
|
-
)
|
|
783
|
-
# dfTr = fabric.list_translations(dataset = datasetName, workspace = workspaceName)
|
|
784
|
-
# dfE = fabric.list_expressions(dataset = datasetName, workspace = workspaceName)
|
|
785
|
-
dfCI = fabric.list_calculation_items(dataset=dataset, workspace=workspace)
|
|
786
|
-
# dfDS = fabric.list_datasources(dataset = datasetName, workspace = workspaceName)
|
|
787
|
-
# dfPersp = fabric.list_perspectives(dataset = datasetName, workspace = workspaceName)
|
|
788
|
-
dfD = fabric.list_datasets(mode="rest", workspace=workspace)
|
|
789
|
-
dfD = dfD[dfD["Dataset Name"] == dataset]
|
|
790
|
-
# datasetOwner = dfD['Configured By'].iloc[0]
|
|
791
|
-
md = get_measure_dependencies(dataset, workspace)
|
|
792
|
-
isDirectLake = any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows())
|
|
793
|
-
dfC["Is Direct Lake"] = isDirectLake
|
|
794
|
-
dfT["Is Direct Lake"] = isDirectLake
|
|
81
|
+
dep = get_model_calc_dependencies(dataset=dataset, workspace=workspace)
|
|
795
82
|
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
if col not in dfR:
|
|
800
|
-
dfR[col] = None
|
|
801
|
-
|
|
802
|
-
cols = ["Parent Is Hidden"]
|
|
803
|
-
|
|
804
|
-
for col in cols:
|
|
805
|
-
if col not in dfM:
|
|
806
|
-
dfM[col] = None
|
|
807
|
-
|
|
808
|
-
# Data Coverage Definition rule
|
|
809
|
-
dfP_imp = dfP[dfP["Mode"] == "Import"]
|
|
810
|
-
dfTP = dfP_imp.groupby("Table Name")["Partition Name"].count().reset_index()
|
|
811
|
-
dfTP.rename(columns={"Partition Name": "Import Partitions"}, inplace=True)
|
|
812
|
-
dfP = pd.merge(
|
|
813
|
-
dfP, dfTP[["Table Name", "Import Partitions"]], on="Table Name", how="left"
|
|
814
|
-
)
|
|
815
|
-
dfP["Import Partitions"].fillna(0, inplace=True)
|
|
816
|
-
dfC_DateKey = dfC[
|
|
817
|
-
(dfC["Parent Data Category"] == "Time")
|
|
818
|
-
& (dfC["Data Type"] == "DateTime")
|
|
819
|
-
& (dfC["Key"])
|
|
820
|
-
]
|
|
821
|
-
hasDateTable = False
|
|
822
|
-
|
|
823
|
-
if len(dfC_DateKey) > 0:
|
|
824
|
-
hasDateTable = True
|
|
825
|
-
|
|
826
|
-
dfP["Has Date Table"] = hasDateTable
|
|
827
|
-
|
|
828
|
-
# Set dims to dual mode
|
|
829
|
-
dfR_one = dfR[dfR["To Cardinality"] == "One"]
|
|
830
|
-
dfTP = dfP_imp.groupby("Table Name")["Partition Name"].count().reset_index()
|
|
831
|
-
dfTP.rename(columns={"Partition Name": "Import Partitions"}, inplace=True)
|
|
832
|
-
dfT = pd.merge(dfT, dfTP, left_on="Name", right_on="Table Name", how="left")
|
|
833
|
-
dfT.drop(columns=["Table Name"], inplace=True)
|
|
834
|
-
dfT["Import Partitions"].fillna(0, inplace=True)
|
|
835
|
-
hasDQ = any(r["Mode"] == "DirectQuery" for i, r in dfP.iterrows())
|
|
836
|
-
dfT["Model Has DQ"] = hasDQ
|
|
837
|
-
dfT["Used in Relationship x:1"] = dfT["Name"].isin(dfR_one["To Table"])
|
|
838
|
-
|
|
839
|
-
dfF = fabric.evaluate_dax(
|
|
840
|
-
dataset=dataset,
|
|
841
|
-
workspace=workspace,
|
|
842
|
-
dax_string="""
|
|
843
|
-
SELECT [FUNCTION_NAME]
|
|
844
|
-
FROM $SYSTEM.MDSCHEMA_FUNCTIONS
|
|
845
|
-
WHERE [INTERFACE_NAME] = 'DATETIME'
|
|
846
|
-
""",
|
|
847
|
-
)
|
|
848
|
-
|
|
849
|
-
dfC["Name"] = dfC["Column Name"]
|
|
850
|
-
dfH["Name"] = dfH["Hierarchy Name"]
|
|
851
|
-
dfM["Name"] = dfM["Measure Name"]
|
|
852
|
-
dfP["Name"] = dfP["Partition Name"]
|
|
853
|
-
dfRole["Name"] = dfRole["Role"]
|
|
854
|
-
dfD["Name"] = dfD["Dataset Name"]
|
|
855
|
-
dfH["Description"] = dfH["Hierarchy Description"]
|
|
856
|
-
dfM["Description"] = dfM["Measure Description"]
|
|
857
|
-
dfH["Hierarchy Object"] = format_dax_object_name(
|
|
858
|
-
dfH["Table Name"], dfH["Hierarchy Name"]
|
|
859
|
-
)
|
|
860
|
-
|
|
861
|
-
dfCI["Calculation Object"] = format_dax_object_name(
|
|
862
|
-
dfCI["Calculation Group Name"], dfCI["Calculation Item Name"]
|
|
863
|
-
)
|
|
864
|
-
|
|
865
|
-
dfRole["Member Count"] = dfRM["Role"].isin(dfRole["Role"]).sum()
|
|
866
|
-
dfRLS["Is Dynamic"] = dfRLS["Filter Expression"].str.contains(
|
|
867
|
-
r"userprincipalname\s*\(", case=False
|
|
868
|
-
) | dfRLS["Filter Expression"].str.contains(r"username\s*\(", case=False)
|
|
869
|
-
|
|
870
|
-
# Partition Count
|
|
871
|
-
partition_count = (
|
|
872
|
-
dfP.groupby("Table Name").size().reset_index(name="Partition Count")
|
|
873
|
-
)
|
|
874
|
-
dfT = pd.merge(
|
|
875
|
-
dfT, partition_count, left_on="Name", right_on="Table Name", how="left"
|
|
876
|
-
).drop("Table Name", axis=1)
|
|
877
|
-
dfT["Partition Count"] = dfT["Partition Count"].fillna(0).astype(int)
|
|
878
|
-
|
|
879
|
-
dfT = dfT.merge(
|
|
880
|
-
dfP[["Table Name", "Partition Name"]],
|
|
881
|
-
how="left",
|
|
882
|
-
left_on="Name",
|
|
883
|
-
right_on="Table Name",
|
|
884
|
-
)
|
|
885
|
-
dfT["First Partition Name"] = dfT.groupby("Name")["Partition Name"].transform(
|
|
886
|
-
"first"
|
|
887
|
-
)
|
|
888
|
-
dfT.drop("Table Name", axis=1, inplace=True)
|
|
889
|
-
|
|
890
|
-
dfC["Sort By Column Object"] = format_dax_object_name(
|
|
891
|
-
dfC["Table Name"], dfC["Sort By Column"]
|
|
892
|
-
)
|
|
893
|
-
dfC["Column Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"])
|
|
894
|
-
dfM["Measure Object"] = "[" + dfM["Measure Name"] + "]"
|
|
895
|
-
dfM["Measure Fully Qualified"] = format_dax_object_name(
|
|
896
|
-
dfM["Table Name"], dfM["Measure Name"]
|
|
897
|
-
)
|
|
898
|
-
dfM["Measure Fully Qualified No Spaces"] = (
|
|
899
|
-
dfM["Table Name"] + "[" + dfM["Measure Name"] + "]"
|
|
900
|
-
)
|
|
901
|
-
# dfM['Measure Fully Qualified No Spaces'] = dfM.apply(lambda row: row['Table Name'] + '[' + row['Measure Name'] + ']' if ' ' not in row['Table Name'] else '', axis=1)
|
|
902
|
-
dfC["Column Unqualified"] = "[" + dfC["Column Name"] + "]"
|
|
903
|
-
dfC["Column Object No Spaces"] = dfC.apply(
|
|
904
|
-
lambda row: (
|
|
905
|
-
row["Table Name"] + "[" + row["Column Name"] + "]"
|
|
906
|
-
if " " not in row["Table Name"]
|
|
907
|
-
else ""
|
|
908
|
-
),
|
|
909
|
-
axis=1,
|
|
910
|
-
)
|
|
911
|
-
dfC["Used in Sort By"] = dfC["Column Object"].isin(dfC["Sort By Column Object"])
|
|
912
|
-
dfH["Column Object"] = format_dax_object_name(dfH["Table Name"], dfH["Column Name"])
|
|
913
|
-
dfC["Used in Hierarchy"] = dfC["Column Object"].isin(dfH["Column Object"])
|
|
914
|
-
dfR["From Object"] = format_dax_object_name(dfR["From Table"], dfR["From Column"])
|
|
915
|
-
dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"])
|
|
916
|
-
dfT["Used in Relationship"] = dfT["Name"].isin(dfR["From Table"]) | dfT[
|
|
917
|
-
"Name"
|
|
918
|
-
].isin(dfR["To Table"])
|
|
919
|
-
dfT["Used in Relationship Both Sides"] = dfT["Name"].isin(dfR["From Table"]) & dfT[
|
|
920
|
-
"Name"
|
|
921
|
-
].isin(dfR["To Table"])
|
|
922
|
-
dfC["Used in Relationship"] = dfC["Column Object"].isin(dfR["From Object"]) | dfC[
|
|
923
|
-
"Column Object"
|
|
924
|
-
].isin(dfR["To Object"])
|
|
925
|
-
|
|
926
|
-
dfR_filt = dfR[
|
|
927
|
-
(dfR["Cross Filtering Behavior"] == "BothDirections")
|
|
928
|
-
| (dfR["Multiplicity"] == "m:m")
|
|
929
|
-
]
|
|
930
|
-
dfC["Used in M2M/BiDi Relationship"] = dfC["Column Object"].isin(
|
|
931
|
-
dfR_filt["From Object"]
|
|
932
|
-
) | dfC["Column Object"].isin(dfR_filt["To Object"])
|
|
933
|
-
dfC["Foreign Key"] = dfC["Column Object"].isin(
|
|
934
|
-
dfR[dfR["From Cardinality"] == "Many"]["From Object"]
|
|
935
|
-
)
|
|
936
|
-
dfC["Primary Key"] = dfC["Column Object"].isin(
|
|
937
|
-
dfR[dfR["To Cardinality"] == "One"]["To Object"]
|
|
938
|
-
)
|
|
939
|
-
dfT["Used in M2M Relationship"] = dfT["Name"].isin(
|
|
940
|
-
dfR[dfR["Multiplicity"] == "m:m"][["From Table"]]
|
|
941
|
-
) | dfT["Name"].isin(dfR[dfR["Multiplicity"] == "m:m"][["To Table"]])
|
|
942
|
-
dfT["Used in Dynamic RLS"] = dfT["Name"].isin(dfRLS[dfRLS["Is Dynamic"]]["Table"])
|
|
943
|
-
dfT["Used in RLS"] = dfT["Name"].isin(
|
|
944
|
-
dfRLS.loc[dfRLS["Filter Expression"].str.len() > 0, "Table"]
|
|
945
|
-
)
|
|
946
|
-
dfC["Primary Key"] = dfC["Column Object"].isin(
|
|
947
|
-
dfR.loc[dfR["To Cardinality"] == "One", "To Object"]
|
|
948
|
-
)
|
|
949
|
-
dfD["Has Date Table"] = any(
|
|
950
|
-
(r["Parent Data Category"] == "Time")
|
|
951
|
-
& (r["Data Type"] == "DateTime")
|
|
952
|
-
& (r["Key"] == True)
|
|
953
|
-
for i, r in dfC.iterrows()
|
|
954
|
-
)
|
|
955
|
-
# dfC['In Date Table'] = dfC['Table Name'].isin(dfT.loc[dfT['Data Category'] == "Time", 'Name'])
|
|
956
|
-
dfD["Relationship Count"] = len(dfR)
|
|
957
|
-
dfD["M2M or BiDi Relationship Count"] = len(
|
|
958
|
-
dfR[
|
|
959
|
-
(dfR["Multiplicity"] == "m:m")
|
|
960
|
-
| (dfR["Cross Filtering Behavior"] == "BothDirections")
|
|
961
|
-
]
|
|
962
|
-
)
|
|
963
|
-
dfD["Calculation Group Count"] = len(dfT[dfT["Type"] == "Calculation Group"])
|
|
964
|
-
dfT["Has Calculation Items"] = np.where(
|
|
965
|
-
(dfT["Type"] == "Calculation Group")
|
|
966
|
-
& dfT["Name"].isin(dfCI["Calculation Group Name"]),
|
|
967
|
-
True,
|
|
968
|
-
False,
|
|
969
|
-
)
|
|
970
|
-
dfP["Partition Object"] = format_dax_object_name(
|
|
971
|
-
dfP["Table Name"], dfP["Partition Name"]
|
|
972
|
-
)
|
|
973
|
-
dfRLS["RLS Object"] = format_dax_object_name(dfRLS["Role"], dfRLS["Table"])
|
|
974
|
-
|
|
975
|
-
function_pattern = "|".join(dfF["FUNCTION_NAME"].map(re.escape))
|
|
976
|
-
|
|
977
|
-
dfM["DQ Date Function Used"] = any(dfP["Mode"] == "DirectQuery") & dfM[
|
|
978
|
-
"Measure Expression"
|
|
979
|
-
].str.contains(f"({function_pattern})\\s*\\(", case=False, regex=True)
|
|
980
|
-
|
|
981
|
-
md["Reference"] = (
|
|
982
|
-
"'" + md["Referenced Table"] + "'[" + md["Referenced Object"] + "]"
|
|
983
|
-
)
|
|
984
|
-
|
|
985
|
-
dfC["Referenced By"] = (
|
|
986
|
-
md[
|
|
987
|
-
(md["Referenced Object Type"] == "Column")
|
|
988
|
-
& (md["Reference"].isin(dfC["Column Object"]))
|
|
989
|
-
]
|
|
990
|
-
.groupby("Reference")
|
|
991
|
-
.size()
|
|
992
|
-
.reset_index(name="Count")["Count"]
|
|
993
|
-
)
|
|
994
|
-
dfC["Referenced By"].fillna(0, inplace=True)
|
|
995
|
-
dfC["Referenced By"] = dfC["Referenced By"].fillna(0).astype(int)
|
|
996
|
-
|
|
997
|
-
dfM["Referenced By"] = (
|
|
998
|
-
md[
|
|
999
|
-
(md["Referenced Object Type"] == "Measure")
|
|
1000
|
-
& (md["Referenced Object"].isin(dfM["Measure Name"]))
|
|
1001
|
-
]
|
|
1002
|
-
.groupby("Referenced Object")
|
|
1003
|
-
.size()
|
|
1004
|
-
.reset_index(name="Count")["Count"]
|
|
1005
|
-
)
|
|
1006
|
-
dfM["Referenced By"].fillna(0, inplace=True)
|
|
1007
|
-
dfM["Referenced By"] = dfM["Referenced By"].fillna(0).astype(int)
|
|
1008
|
-
|
|
1009
|
-
pattern = r"[^\( ][a-zA-Z0-9_()-]+\[[^\[]+\]|'[^']+'\[[^\[]+\]|\[[^\[]+\]"
|
|
1010
|
-
|
|
1011
|
-
dfM["Has Fully Qualified Measure Reference"] = False
|
|
1012
|
-
dfM["Has Unqualified Column Reference"] = False
|
|
1013
|
-
|
|
1014
|
-
for i, r in dfM.iterrows():
|
|
1015
|
-
tName = r["Table Name"]
|
|
1016
|
-
mName = r["Measure Name"]
|
|
1017
|
-
expr = r["Measure Expression"]
|
|
1018
|
-
|
|
1019
|
-
matches = re.findall(pattern, expr)
|
|
1020
|
-
|
|
1021
|
-
for m in matches:
|
|
1022
|
-
if m[0] == "[":
|
|
1023
|
-
if (m in dfC["Column Unqualified"].values) and (
|
|
1024
|
-
dfC[dfC["Table Name"] == tName]["Column Unqualified"] == m
|
|
1025
|
-
).any():
|
|
1026
|
-
dfM.at[i, "Has Unqualified Column Reference"] = True
|
|
1027
|
-
else:
|
|
1028
|
-
if (m in dfM["Measure Fully Qualified"].values) | (
|
|
1029
|
-
m in dfM["Measure Fully Qualified No Spaces"].values
|
|
1030
|
-
):
|
|
1031
|
-
dfM.at[i, "Has Fully Qualified Measure Reference"] = True
|
|
1032
|
-
|
|
1033
|
-
dfR["Inactive without USERELATIONSHIP"] = False
|
|
1034
|
-
for i, r in dfR[dfR["Active"] == False].iterrows():
|
|
1035
|
-
fromTable = r["From Table"]
|
|
1036
|
-
fromColumn = r["From Column"]
|
|
1037
|
-
toTable = r["To Table"]
|
|
1038
|
-
toColumn = r["To Column"]
|
|
1039
|
-
|
|
1040
|
-
dfM_filt = dfM[
|
|
1041
|
-
dfM["Measure Expression"].str.contains(
|
|
1042
|
-
r"(?i)USERELATIONSHIP\s*\(\s*'*"
|
|
1043
|
-
+ re.escape(fromTable)
|
|
1044
|
-
+ r"'*\["
|
|
1045
|
-
+ re.escape(fromColumn)
|
|
1046
|
-
+ r"\]\s*,\s*'*"
|
|
1047
|
-
+ re.escape(toTable)
|
|
1048
|
-
+ r"'*\["
|
|
1049
|
-
+ re.escape(toColumn)
|
|
1050
|
-
+ r"\]",
|
|
1051
|
-
regex=True,
|
|
83
|
+
if rules is None:
|
|
84
|
+
rules = model_bpa_rules(
|
|
85
|
+
dataset=dataset, workspace=workspace, dependencies=dep
|
|
1052
86
|
)
|
|
1053
|
-
]
|
|
1054
|
-
if len(dfM_filt) == 0:
|
|
1055
|
-
dfR.at[i, "Inactive without USERELATIONSHIP"] = True
|
|
1056
87
|
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
| dfC.apply(
|
|
1061
|
-
lambda row: any(
|
|
1062
|
-
row["Column Name"] in expr
|
|
1063
|
-
for expr in dfRLS.loc[
|
|
1064
|
-
dfRLS["Table"] == row["Table Name"], "Filter Expression"
|
|
1065
|
-
]
|
|
1066
|
-
),
|
|
1067
|
-
axis=1,
|
|
1068
|
-
)
|
|
1069
|
-
)
|
|
88
|
+
rules["Severity"].replace("Warning", "⚠️", inplace=True)
|
|
89
|
+
rules["Severity"].replace("Error", "\u274C", inplace=True)
|
|
90
|
+
rules["Severity"].replace("Info", "ℹ️", inplace=True)
|
|
1070
91
|
|
|
1071
|
-
|
|
1072
|
-
merged_from = pd.merge(
|
|
1073
|
-
dfR, dfC, left_on="From Object", right_on="Column Object", how="left"
|
|
1074
|
-
)
|
|
1075
|
-
merged_to = pd.merge(
|
|
1076
|
-
dfR, dfC, left_on="To Object", right_on="Column Object", how="left"
|
|
1077
|
-
)
|
|
92
|
+
pd.set_option("display.max_colwidth", 1000)
|
|
1078
93
|
|
|
1079
|
-
|
|
1080
|
-
dfR["To Column Data Type"] = merged_to["Data Type"]
|
|
94
|
+
violations = pd.DataFrame(columns=["Object Name", "Scope", "Rule Name"])
|
|
1081
95
|
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
96
|
+
scope_to_dataframe = {
|
|
97
|
+
"Relationship": (
|
|
98
|
+
tom.model.Relationships,
|
|
99
|
+
lambda obj: create_relationship_name(
|
|
100
|
+
obj.FromTable.Name,
|
|
101
|
+
obj.FromColumn.Name,
|
|
102
|
+
obj.ToTable.Name,
|
|
103
|
+
obj.ToColumn.Name,
|
|
104
|
+
),
|
|
105
|
+
),
|
|
106
|
+
"Column": (
|
|
107
|
+
tom.all_columns(),
|
|
108
|
+
lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name),
|
|
109
|
+
),
|
|
110
|
+
"Measure": (tom.all_measures(), lambda obj: obj.Name),
|
|
111
|
+
"Hierarchy": (
|
|
112
|
+
tom.all_hierarchies(),
|
|
113
|
+
lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name),
|
|
114
|
+
),
|
|
115
|
+
"Table": (tom.model.Tables, lambda obj: obj.Name),
|
|
116
|
+
"Role": (tom.model.Roles, lambda obj: obj.Name),
|
|
117
|
+
"Model": (tom.model, lambda obj: obj.Model.Name),
|
|
118
|
+
"Calculation Item": (
|
|
119
|
+
tom.all_calculation_items(),
|
|
120
|
+
lambda obj: format_dax_object_name(obj.Parent.Table.Name, obj.Name),
|
|
121
|
+
),
|
|
122
|
+
"Row Level Security": (
|
|
123
|
+
tom.all_rls(),
|
|
124
|
+
lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name),
|
|
125
|
+
),
|
|
126
|
+
"Partition": (
|
|
127
|
+
tom.all_partitions(),
|
|
128
|
+
lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name),
|
|
129
|
+
),
|
|
130
|
+
}
|
|
1086
131
|
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
132
|
+
for i, r in rules.iterrows():
|
|
133
|
+
ruleName = r["Rule Name"]
|
|
134
|
+
expr = r["Expression"]
|
|
135
|
+
scopes = r["Scope"]
|
|
136
|
+
|
|
137
|
+
if isinstance(scopes, str):
|
|
138
|
+
scopes = [scopes]
|
|
139
|
+
|
|
140
|
+
for scope in scopes:
|
|
141
|
+
func = scope_to_dataframe[scope][0]
|
|
142
|
+
nm = scope_to_dataframe[scope][1]
|
|
143
|
+
|
|
144
|
+
if scope == "Model":
|
|
145
|
+
x = []
|
|
146
|
+
if expr(func):
|
|
147
|
+
x = ["Model"]
|
|
148
|
+
elif scope == "Measure":
|
|
149
|
+
x = [nm(obj) for obj in tom.all_measures() if expr(obj)]
|
|
150
|
+
elif scope == "Column":
|
|
151
|
+
x = [nm(obj) for obj in tom.all_columns() if expr(obj)]
|
|
152
|
+
elif scope == "Partition":
|
|
153
|
+
x = [nm(obj) for obj in tom.all_partitions() if expr(obj)]
|
|
154
|
+
elif scope == "Hierarchy":
|
|
155
|
+
x = [nm(obj) for obj in tom.all_hierarchies() if expr(obj)]
|
|
156
|
+
elif scope == "Table":
|
|
157
|
+
x = [nm(obj) for obj in tom.model.Tables if expr(obj)]
|
|
158
|
+
elif scope == "Relationship":
|
|
159
|
+
x = [nm(obj) for obj in tom.model.Relationships if expr(obj)]
|
|
160
|
+
elif scope == "Role":
|
|
161
|
+
x = [nm(obj) for obj in tom.model.Roles if expr(obj)]
|
|
162
|
+
elif scope == "Row Level Security":
|
|
163
|
+
x = [nm(obj) for obj in tom.all_rls() if expr(obj)]
|
|
164
|
+
elif scope == "Calculation Item":
|
|
165
|
+
x = [nm(obj) for obj in tom.all_calculation_items() if expr(obj)]
|
|
166
|
+
|
|
167
|
+
if len(x) > 0:
|
|
168
|
+
new_data = {"Object Name": x, "Scope": scope, "Rule Name": ruleName}
|
|
169
|
+
violations = pd.concat(
|
|
170
|
+
[violations, pd.DataFrame(new_data)], ignore_index=True
|
|
171
|
+
)
|
|
1094
172
|
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
"Description",
|
|
1114
|
-
"Table Name",
|
|
1115
|
-
"Hierarchy Name",
|
|
1116
|
-
"Hierarchy Description",
|
|
1117
|
-
"Hierarchy Object",
|
|
173
|
+
prepDF = pd.merge(
|
|
174
|
+
violations,
|
|
175
|
+
rules[["Rule Name", "Category", "Severity", "Description", "URL"]],
|
|
176
|
+
left_on="Rule Name",
|
|
177
|
+
right_on="Rule Name",
|
|
178
|
+
how="left",
|
|
179
|
+
)
|
|
180
|
+
prepDF.rename(columns={"Scope": "Object Type"}, inplace=True)
|
|
181
|
+
finalDF = prepDF[
|
|
182
|
+
[
|
|
183
|
+
"Category",
|
|
184
|
+
"Rule Name",
|
|
185
|
+
"Severity",
|
|
186
|
+
"Object Type",
|
|
187
|
+
"Object Name",
|
|
188
|
+
"Description",
|
|
189
|
+
"URL",
|
|
190
|
+
]
|
|
1118
191
|
]
|
|
1119
|
-
].drop_duplicates()
|
|
1120
|
-
|
|
1121
|
-
scope_to_dataframe = {
|
|
1122
|
-
"Table": (dfT, ["Name"]),
|
|
1123
|
-
"Partition": (dfP, ["Partition Object"]),
|
|
1124
|
-
"Column": (dfC, ["Column Object"]),
|
|
1125
|
-
"Hierarchy": (dfH, ["Hierarchy Object"]),
|
|
1126
|
-
"Measure": (dfM, ["Measure Name"]),
|
|
1127
|
-
"Calculation Item": (dfCI, ["Calculation Object"]),
|
|
1128
|
-
"Relationship": (dfR, ["Relationship Name"]),
|
|
1129
|
-
"Row Level Security": (dfRLS, ["RLS Object"]),
|
|
1130
|
-
"Role": (dfRole, ["Role"]),
|
|
1131
|
-
"Model": (dfD, ["Dataset Name"]),
|
|
1132
|
-
}
|
|
1133
|
-
|
|
1134
|
-
def execute_rule(row):
|
|
1135
|
-
scopes = row["Scope"]
|
|
1136
|
-
|
|
1137
|
-
# support both str and list as scope type
|
|
1138
|
-
if isinstance(scopes, str):
|
|
1139
|
-
scopes = [scopes]
|
|
1140
|
-
|
|
1141
|
-
# collect output dataframes
|
|
1142
|
-
df_outputs = []
|
|
1143
|
-
|
|
1144
|
-
for scope in scopes:
|
|
1145
|
-
# common fields for each scope
|
|
1146
|
-
(df, violation_cols_or_func) = scope_to_dataframe[scope]
|
|
1147
|
-
|
|
1148
|
-
if scope in ["Hierarchy", "Measure"] and len(df) == 0:
|
|
1149
|
-
continue
|
|
1150
|
-
# execute rule and subset df
|
|
1151
|
-
df_violations = df[row["Expression"](df)]
|
|
1152
|
-
|
|
1153
|
-
# subset the right output columns (e.g. Table Name & Column Name)
|
|
1154
|
-
if isinstance(violation_cols_or_func, list):
|
|
1155
|
-
violation_func = lambda violations: violations[violation_cols_or_func]
|
|
1156
|
-
else:
|
|
1157
|
-
violation_func = violation_cols_or_func
|
|
1158
|
-
|
|
1159
|
-
# build output data frame
|
|
1160
|
-
df_output = violation_func(df_violations).copy()
|
|
1161
|
-
|
|
1162
|
-
df_output.columns = ["Object Name"]
|
|
1163
|
-
df_output["Rule Name"] = row["Rule Name"]
|
|
1164
|
-
df_output["Category"] = row["Category"]
|
|
1165
|
-
|
|
1166
|
-
df_output["Object Type"] = scope
|
|
1167
|
-
df_output["Severity"] = row["Severity"]
|
|
1168
|
-
df_output["Description"] = row["Description"]
|
|
1169
|
-
df_output["URL"] = row["URL"]
|
|
1170
|
-
|
|
1171
|
-
df_outputs.append(df_output)
|
|
1172
|
-
|
|
1173
|
-
return df_outputs
|
|
1174
|
-
|
|
1175
|
-
# flatten list of lists
|
|
1176
|
-
flatten_dfs = [
|
|
1177
|
-
df for dfs in rules_dataframe.apply(execute_rule, axis=1).tolist() for df in dfs
|
|
1178
|
-
]
|
|
1179
|
-
|
|
1180
|
-
finalDF = pd.concat(flatten_dfs, ignore_index=True)
|
|
1181
192
|
|
|
1182
193
|
if export:
|
|
1183
194
|
lakeAttach = lakehouse_attached()
|
|
1184
195
|
if lakeAttach is False:
|
|
1185
|
-
raise ValueError(
|
|
196
|
+
raise ValueError(
|
|
197
|
+
f"{icons.red_dot} In order to save the Best Practice Analyzer results, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
|
|
198
|
+
)
|
|
1186
199
|
|
|
1187
200
|
dfExport = finalDF.copy()
|
|
1188
201
|
delta_table_name = "modelbparesults"
|
|
@@ -1330,7 +343,7 @@ def run_model_bpa(
|
|
|
1330
343
|
content_html += '<table border="1">'
|
|
1331
344
|
content_html += "<tr><th>Rule Name</th><th>Object Type</th><th>Object Name</th><th>Severity</th></tr>"
|
|
1332
345
|
for _, row in df.iterrows():
|
|
1333
|
-
content_html +=
|
|
346
|
+
content_html += "<tr>"
|
|
1334
347
|
if pd.notnull(row["URL"]):
|
|
1335
348
|
content_html += f'<td class="tooltip" onmouseover="adjustTooltipPosition(event)"><a href="{row["URL"]}">{row["Rule Name"]}</a><span class="tooltiptext">{row["Description"]}</span></td>'
|
|
1336
349
|
elif pd.notnull(row["Description"]):
|
|
@@ -1340,7 +353,7 @@ def run_model_bpa(
|
|
|
1340
353
|
content_html += f'<td>{row["Object Type"]}</td>'
|
|
1341
354
|
content_html += f'<td>{row["Object Name"]}</td>'
|
|
1342
355
|
content_html += f'<td>{row["Severity"]}</td>'
|
|
1343
|
-
content_html +=
|
|
356
|
+
content_html += "</tr>"
|
|
1344
357
|
content_html += "</table>"
|
|
1345
358
|
|
|
1346
359
|
content_html += "</div>"
|