semantic-link-labs 0.7.2__py3-none-any.whl → 0.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (34) hide show
  1. {semantic_link_labs-0.7.2.dist-info → semantic_link_labs-0.7.3.dist-info}/METADATA +3 -2
  2. {semantic_link_labs-0.7.2.dist-info → semantic_link_labs-0.7.3.dist-info}/RECORD +34 -27
  3. {semantic_link_labs-0.7.2.dist-info → semantic_link_labs-0.7.3.dist-info}/WHEEL +1 -1
  4. sempy_labs/__init__.py +60 -3
  5. sempy_labs/_bpa_translation/_translations_sv-SE.po +914 -0
  6. sempy_labs/_clear_cache.py +298 -3
  7. sempy_labs/_dataflows.py +130 -0
  8. sempy_labs/_deployment_pipelines.py +171 -0
  9. sempy_labs/_generate_semantic_model.py +148 -27
  10. sempy_labs/_git.py +380 -0
  11. sempy_labs/_helper_functions.py +57 -0
  12. sempy_labs/_list_functions.py +144 -121
  13. sempy_labs/_model_bpa.py +85 -83
  14. sempy_labs/_model_bpa_bulk.py +3 -1
  15. sempy_labs/_model_bpa_rules.py +788 -800
  16. sempy_labs/_sql.py +96 -0
  17. sempy_labs/_translations.py +0 -1
  18. sempy_labs/_workspace_identity.py +66 -0
  19. sempy_labs/directlake/__init__.py +2 -0
  20. sempy_labs/directlake/_directlake_schema_compare.py +1 -2
  21. sempy_labs/directlake/_dl_helper.py +4 -7
  22. sempy_labs/directlake/_generate_shared_expression.py +85 -0
  23. sempy_labs/directlake/_show_unsupported_directlake_objects.py +1 -2
  24. sempy_labs/lakehouse/_get_lakehouse_tables.py +7 -3
  25. sempy_labs/migration/_migrate_calctables_to_lakehouse.py +5 -0
  26. sempy_labs/migration/_migrate_calctables_to_semantic_model.py +5 -0
  27. sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +6 -2
  28. sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +6 -5
  29. sempy_labs/migration/_migration_validation.py +6 -0
  30. sempy_labs/report/_report_functions.py +21 -42
  31. sempy_labs/report/_report_rebind.py +5 -0
  32. sempy_labs/tom/_model.py +91 -52
  33. {semantic_link_labs-0.7.2.dist-info → semantic_link_labs-0.7.3.dist-info}/LICENSE +0 -0
  34. {semantic_link_labs-0.7.2.dist-info → semantic_link_labs-0.7.3.dist-info}/top_level.txt +0 -0
@@ -1,27 +1,18 @@
1
1
  import sempy
2
- import sempy.fabric as fabric
3
2
  import pandas as pd
4
3
  import re
5
- from sempy_labs.tom import connect_semantic_model
6
4
  from typing import Optional
7
5
 
8
6
 
9
7
  def model_bpa_rules(
10
- dataset: str,
11
- workspace: Optional[str] = None,
12
8
  dependencies: Optional[pd.DataFrame] = None,
9
+ **kwargs,
13
10
  ) -> pd.DataFrame:
14
11
  """
15
12
  Shows the default rules for the semantic model BPA used by the run_model_bpa function.
16
13
 
17
14
  Parameters
18
15
  ----------
19
- dataset : str
20
- Name of the semantic model.
21
- workspace : str, default=None
22
- The Fabric workspace name.
23
- Defaults to None which resolves to the workspace of the attached lakehouse
24
- or if no lakehouse attached, resolves to the workspace of the notebook.
25
16
  dependencies : pd.DataFrame, default=None
26
17
  A pandas dataframe with the output of the 'get_model_calc_dependencies' function.
27
18
 
@@ -34,809 +25,806 @@ def model_bpa_rules(
34
25
  sempy.fabric._client._utils._init_analysis_services()
35
26
  import Microsoft.AnalysisServices.Tabular as TOM
36
27
 
37
- workspace = fabric.resolve_workspace_name(workspace)
38
-
39
- with connect_semantic_model(
40
- dataset=dataset, workspace=workspace, readonly=True
41
- ) as tom:
28
+ if "dataset" in kwargs:
29
+ print(
30
+ "The 'dataset' parameter has been deprecated. Please remove this parameter from the function going forward."
31
+ )
32
+ del kwargs["dataset"]
33
+ if "workspace" in kwargs:
34
+ print(
35
+ "The 'workspace' parameter has been deprecated. Please remove this parameter from the function going forward."
36
+ )
37
+ del kwargs["workspace"]
42
38
 
43
- rules = pd.DataFrame(
44
- [
45
- (
46
- "Performance",
47
- "Column",
48
- "Warning",
49
- "Do not use floating point data types",
50
- lambda obj: obj.DataType == TOM.DataType.Double,
51
- 'The "Double" floating point data type should be avoided, as it can result in unpredictable roundoff errors and decreased performance in certain scenarios. Use "Int64" or "Decimal" where appropriate (but note that "Decimal" is limited to 4 digits after the decimal sign).',
52
- ),
53
- (
54
- "Performance",
55
- "Column",
56
- "Warning",
57
- "Avoid using calculated columns",
58
- lambda obj: obj.Type == TOM.ColumnType.Calculated,
59
- "Calculated columns do not compress as well as data columns so they take up more memory. They also slow down processing times for both the table as well as process recalc. Offload calculated column logic to your data warehouse and turn these calculated columns into data columns.",
60
- "https://www.elegantbi.com/post/top10bestpractices",
61
- ),
62
- (
63
- "Performance",
64
- "Relationship",
65
- "Warning",
66
- "Check if bi-directional and many-to-many relationships are valid",
67
- lambda obj: (
68
- obj.FromCardinality == TOM.RelationshipEndCardinality.Many
69
- and obj.ToCardinality == TOM.RelationshipEndCardinality.Many
70
- )
71
- or str(obj.CrossFilteringBehavior) == "BothDirections"
72
- "Bi-directional and many-to-many relationships may cause performance degradation or even have unintended consequences. Make sure to check these specific relationships to ensure they are working as designed and are actually necessary.",
73
- "https://www.sqlbi.com/articles/bidirectional-relationships-and-ambiguity-in-dax",
74
- ),
75
- (
76
- "Performance",
77
- "Row Level Security",
78
- "Info",
79
- "Check if dynamic row level security (RLS) is necessary",
80
- lambda obj: any(
81
- re.search(pattern, obj.FilterExpression, flags=re.IGNORECASE)
82
- for pattern in ["USERPRINCIPALNAME()", "USERNAME()"]
83
- ),
84
- "Usage of dynamic row level security (RLS) can add memory and performance overhead. Please research the pros/cons of using it.",
85
- "https://docs.microsoft.com/power-bi/admin/service-admin-rls",
86
- ),
87
- (
88
- "Performance",
89
- "Table",
90
- "Warning",
91
- "Avoid using many-to-many relationships on tables used for dynamic row level security",
92
- lambda obj: any(
93
- r.FromCardinality == TOM.RelationshipEndCardinality.Many
94
- and r.ToCardinality == TOM.RelationshipEndCardinality.Many
39
+ rules = pd.DataFrame(
40
+ [
41
+ (
42
+ "Performance",
43
+ "Column",
44
+ "Warning",
45
+ "Do not use floating point data types",
46
+ lambda obj, tom: obj.DataType == TOM.DataType.Double,
47
+ 'The "Double" floating point data type should be avoided, as it can result in unpredictable roundoff errors and decreased performance in certain scenarios. Use "Int64" or "Decimal" where appropriate (but note that "Decimal" is limited to 4 digits after the decimal sign).',
48
+ ),
49
+ (
50
+ "Performance",
51
+ "Column",
52
+ "Warning",
53
+ "Avoid using calculated columns",
54
+ lambda obj, tom: obj.Type == TOM.ColumnType.Calculated,
55
+ "Calculated columns do not compress as well as data columns so they take up more memory. They also slow down processing times for both the table as well as process recalc. Offload calculated column logic to your data warehouse and turn these calculated columns into data columns.",
56
+ "https://www.elegantbi.com/post/top10bestpractices",
57
+ ),
58
+ (
59
+ "Performance",
60
+ "Relationship",
61
+ "Warning",
62
+ "Check if bi-directional and many-to-many relationships are valid",
63
+ lambda obj, tom: (
64
+ obj.FromCardinality == TOM.RelationshipEndCardinality.Many
65
+ and obj.ToCardinality == TOM.RelationshipEndCardinality.Many
66
+ )
67
+ or str(obj.CrossFilteringBehavior) == "BothDirections"
68
+ "Bi-directional and many-to-many relationships may cause performance degradation or even have unintended consequences. Make sure to check these specific relationships to ensure they are working as designed and are actually necessary.",
69
+ "https://www.sqlbi.com/articles/bidirectional-relationships-and-ambiguity-in-dax",
70
+ ),
71
+ (
72
+ "Performance",
73
+ "Row Level Security",
74
+ "Info",
75
+ "Check if dynamic row level security (RLS) is necessary",
76
+ lambda obj, tom: any(
77
+ re.search(pattern, obj.FilterExpression, flags=re.IGNORECASE)
78
+ for pattern in ["USERPRINCIPALNAME()", "USERNAME()"]
79
+ ),
80
+ "Usage of dynamic row level security (RLS) can add memory and performance overhead. Please research the pros/cons of using it.",
81
+ "https://docs.microsoft.com/power-bi/admin/service-admin-rls",
82
+ ),
83
+ (
84
+ "Performance",
85
+ "Table",
86
+ "Warning",
87
+ "Avoid using many-to-many relationships on tables used for dynamic row level security",
88
+ lambda obj, tom: any(
89
+ r.FromCardinality == TOM.RelationshipEndCardinality.Many
90
+ and r.ToCardinality == TOM.RelationshipEndCardinality.Many
91
+ for r in tom.used_in_relationships(object=obj)
92
+ )
93
+ and any(t.Name == obj.Name for t in tom.all_rls()),
94
+ "Using many-to-many relationships on tables which use dynamic row level security can cause serious query performance degradation. This pattern's performance problems compound when snowflaking multiple many-to-many relationships against a table which contains row level security. Instead, use one of the patterns shown in the article below where a single dimension table relates many-to-one to a security table.",
95
+ "https://www.elegantbi.com/post/dynamicrlspatterns",
96
+ ),
97
+ (
98
+ "Performance",
99
+ "Relationship",
100
+ "Warning",
101
+ "Many-to-many relationships should be single-direction",
102
+ lambda obj, tom: (
103
+ obj.FromCardinality == TOM.RelationshipEndCardinality.Many
104
+ and obj.ToCardinality == TOM.RelationshipEndCardinality.Many
105
+ )
106
+ and obj.CrossFilteringBehavior
107
+ == TOM.CrossFilteringBehavior.BothDirections,
108
+ ),
109
+ (
110
+ "Performance",
111
+ "Column",
112
+ "Warning",
113
+ "Set IsAvailableInMdx to false on non-attribute columns",
114
+ lambda obj, tom: tom.is_direct_lake() is False
115
+ and obj.IsAvailableInMDX
116
+ and (obj.IsHidden or obj.Parent.IsHidden)
117
+ and obj.SortByColumn is None
118
+ and not any(tom.used_in_sort_by(column=obj))
119
+ and not any(tom.used_in_hierarchies(column=obj)),
120
+ "To speed up processing time and conserve memory after processing, attribute hierarchies should not be built for columns that are never used for slicing by MDX clients. In other words, all hidden columns that are not used as a Sort By Column or referenced in user hierarchies should have their IsAvailableInMdx property set to false. The IsAvailableInMdx property is not relevant for Direct Lake models.",
121
+ "https://blog.crossjoin.co.uk/2018/07/02/isavailableinmdx-ssas-tabular",
122
+ ),
123
+ (
124
+ "Performance",
125
+ "Partition",
126
+ "Warning",
127
+ "Set 'Data Coverage Definition' property on the DirectQuery partition of a hybrid table",
128
+ lambda obj, tom: tom.is_hybrid_table(table_name=obj.Parent.Name)
129
+ and obj.Mode == TOM.ModeType.DirectQuery
130
+ and obj.DataCoverageDefinition is None,
131
+ "Setting the 'Data Coverage Definition' property may lead to better performance because the engine knows when it can only query the import-portion of the table and when it needs to query the DirectQuery portion of the table.",
132
+ "https://learn.microsoft.com/analysis-services/tom/table-partitions?view=asallproducts-allversions",
133
+ ),
134
+ (
135
+ "Performance",
136
+ "Model",
137
+ "Warning",
138
+ "Dual mode is only relevant for dimension tables if DirectQuery is used for the corresponding fact table",
139
+ lambda obj, tom: not any(
140
+ p.Mode == TOM.ModeType.DirectQuery for p in tom.all_partitions()
141
+ )
142
+ and any(p.Mode == TOM.ModeType.Dual for p in tom.all_partitions()),
143
+ "Only use Dual mode for dimension tables/partitions where a corresponding fact table is in DirectQuery. Using Dual mode in other circumstances (i.e. rest of the model is in Import mode) may lead to performance issues especially if the number of measures in the model is high.",
144
+ ),
145
+ (
146
+ "Performance",
147
+ "Table",
148
+ "Warning",
149
+ "Set dimensions tables to dual mode instead of import when using DirectQuery on fact tables",
150
+ lambda obj, tom: sum(
151
+ 1 for p in obj.Partitions if p.Mode == TOM.ModeType.Import
152
+ )
153
+ == 1
154
+ and obj.Partitions.Count == 1
155
+ and tom.has_hybrid_table()
156
+ and any(
157
+ r.ToCardinality == TOM.RelationshipEndCardinality.One
158
+ and r.ToTable.Name == obj.Name
159
+ for r in tom.used_in_relationships(object=obj)
160
+ ),
161
+ "https://learn.microsoft.com/power-bi/transform-model/desktop-storage-mode#propagation-of-the-dual-setting",
162
+ ),
163
+ (
164
+ "Performance",
165
+ "Partition",
166
+ "Warning",
167
+ "Minimize Power Query transformations",
168
+ lambda obj, tom: obj.SourceType == TOM.PartitionSourceType.M
169
+ and any(
170
+ item in obj.Source.Expression
171
+ for item in [
172
+ 'Table.Combine("',
173
+ 'Table.Join("',
174
+ 'Table.NestedJoin("',
175
+ 'Table.AddColumn("',
176
+ 'Table.Group("',
177
+ 'Table.Sort("',
178
+ 'Table.Pivot("',
179
+ 'Table.Unpivot("',
180
+ 'Table.UnpivotOtherColumns("',
181
+ 'Table.Distinct("',
182
+ '[Query=(""SELECT',
183
+ "Value.NativeQuery",
184
+ "OleDb.Query",
185
+ "Odbc.Query",
186
+ ]
187
+ ),
188
+ "Minimize Power Query transformations in order to improve model processing performance. It is a best practice to offload these transformations to the data warehouse if possible. Also, please check whether query folding is occurring within your model. Please reference the article below for more information on query folding.",
189
+ "https://docs.microsoft.com/power-query/power-query-folding",
190
+ ),
191
+ (
192
+ "Performance",
193
+ "Table",
194
+ "Warning",
195
+ "Consider a star-schema instead of a snowflake architecture",
196
+ lambda obj, tom: obj.CalculationGroup is None
197
+ and (
198
+ any(
199
+ r.FromTable.Name == obj.Name
95
200
  for r in tom.used_in_relationships(object=obj)
96
201
  )
97
- and any(t.Name == obj.Name for t in tom.all_rls()),
98
- "Using many-to-many relationships on tables which use dynamic row level security can cause serious query performance degradation. This pattern's performance problems compound when snowflaking multiple many-to-many relationships against a table which contains row level security. Instead, use one of the patterns shown in the article below where a single dimension table relates many-to-one to a security table.",
99
- "https://www.elegantbi.com/post/dynamicrlspatterns",
100
- ),
101
- (
102
- "Performance",
103
- "Relationship",
104
- "Warning",
105
- "Many-to-many relationships should be single-direction",
106
- lambda obj: (
107
- obj.FromCardinality == TOM.RelationshipEndCardinality.Many
108
- and obj.ToCardinality == TOM.RelationshipEndCardinality.Many
109
- )
110
- and obj.CrossFilteringBehavior
111
- == TOM.CrossFilteringBehavior.BothDirections,
112
- ),
113
- (
114
- "Performance",
115
- "Column",
116
- "Warning",
117
- "Set IsAvailableInMdx to false on non-attribute columns",
118
- lambda obj: tom.is_direct_lake() is False
119
- and obj.IsAvailableInMDX
120
- and (obj.IsHidden or obj.Parent.IsHidden)
121
- and obj.SortByColumn is None
122
- and not any(tom.used_in_sort_by(column=obj))
123
- and not any(tom.used_in_hierarchies(column=obj)),
124
- "To speed up processing time and conserve memory after processing, attribute hierarchies should not be built for columns that are never used for slicing by MDX clients. In other words, all hidden columns that are not used as a Sort By Column or referenced in user hierarchies should have their IsAvailableInMdx property set to false. The IsAvailableInMdx property is not relevant for Direct Lake models.",
125
- "https://blog.crossjoin.co.uk/2018/07/02/isavailableinmdx-ssas-tabular",
126
- ),
127
- (
128
- "Performance",
129
- "Partition",
130
- "Warning",
131
- "Set 'Data Coverage Definition' property on the DirectQuery partition of a hybrid table",
132
- lambda obj: tom.is_hybrid_table(table_name=obj.Parent.Name)
133
- and obj.Mode == TOM.ModeType.DirectQuery
134
- and obj.DataCoverageDefinition is None,
135
- "Setting the 'Data Coverage Definition' property may lead to better performance because the engine knows when it can only query the import-portion of the table and when it needs to query the DirectQuery portion of the table.",
136
- "https://learn.microsoft.com/analysis-services/tom/table-partitions?view=asallproducts-allversions",
137
- ),
138
- (
139
- "Performance",
140
- "Model",
141
- "Warning",
142
- "Dual mode is only relevant for dimension tables if DirectQuery is used for the corresponding fact table",
143
- lambda obj: not any(
144
- p.Mode == TOM.ModeType.DirectQuery for p in tom.all_partitions()
145
- )
146
- and any(p.Mode == TOM.ModeType.Dual for p in tom.all_partitions()),
147
- "Only use Dual mode for dimension tables/partitions where a corresponding fact table is in DirectQuery. Using Dual mode in other circumstances (i.e. rest of the model is in Import mode) may lead to performance issues especially if the number of measures in the model is high.",
148
- ),
149
- (
150
- "Performance",
151
- "Table",
152
- "Warning",
153
- "Set dimensions tables to dual mode instead of import when using DirectQuery on fact tables",
154
- lambda obj: sum(
155
- 1 for p in obj.Partitions if p.Mode == TOM.ModeType.Import
156
- )
157
- == 1
158
- and obj.Partitions.Count == 1
159
- and tom.has_hybrid_table()
160
202
  and any(
161
- r.ToCardinality == TOM.RelationshipEndCardinality.One
162
- and r.ToTable.Name == obj.Name
203
+ r.ToTable.Name == obj.Name
163
204
  for r in tom.used_in_relationships(object=obj)
164
- ),
165
- "https://learn.microsoft.com/power-bi/transform-model/desktop-storage-mode#propagation-of-the-dual-setting",
166
- ),
167
- (
168
- "Performance",
169
- "Partition",
170
- "Warning",
171
- "Minimize Power Query transformations",
172
- lambda obj: obj.SourceType == TOM.PartitionSourceType.M
173
- and any(
174
- item in obj.Source.Expression
175
- for item in [
176
- 'Table.Combine("',
177
- 'Table.Join("',
178
- 'Table.NestedJoin("',
179
- 'Table.AddColumn("',
180
- 'Table.Group("',
181
- 'Table.Sort("',
182
- 'Table.Pivot("',
183
- 'Table.Unpivot("',
184
- 'Table.UnpivotOtherColumns("',
185
- 'Table.Distinct("',
186
- '[Query=(""SELECT',
187
- "Value.NativeQuery",
188
- "OleDb.Query",
189
- "Odbc.Query",
190
- ]
191
- ),
192
- "Minimize Power Query transformations in order to improve model processing performance. It is a best practice to offload these transformations to the data warehouse if possible. Also, please check whether query folding is occurring within your model. Please reference the article below for more information on query folding.",
193
- "https://docs.microsoft.com/power-query/power-query-folding",
194
- ),
195
- (
196
- "Performance",
197
- "Table",
198
- "Warning",
199
- "Consider a star-schema instead of a snowflake architecture",
200
- lambda obj: obj.CalculationGroup is None
201
- and (
202
- any(
203
- r.FromTable.Name == obj.Name
204
- for r in tom.used_in_relationships(object=obj)
205
- )
206
- and any(
207
- r.ToTable.Name == obj.Name
208
- for r in tom.used_in_relationships(object=obj)
209
- )
210
- ),
211
- "Generally speaking, a star-schema is the optimal architecture for tabular models. That being the case, there are valid cases to use a snowflake approach. Please check your model and consider moving to a star-schema architecture.",
212
- "https://docs.microsoft.com/power-bi/guidance/star-schema",
213
- ),
214
- (
215
- "Performance",
216
- "Model",
217
- "Warning",
218
- "Avoid using views when using Direct Lake mode",
219
- lambda obj: tom.is_direct_lake_using_view(),
220
- "In Direct Lake mode, views will always fall back to DirectQuery. Thus, in order to obtain the best performance use lakehouse tables instead of views.",
221
- "https://learn.microsoft.com/fabric/get-started/direct-lake-overview#fallback",
222
- ),
223
- (
224
- "Performance",
225
- "Measure",
226
- "Warning",
227
- "Avoid adding 0 to a measure",
228
- lambda obj: obj.Expression.replace(" ", "").startswith("0+")
229
- or obj.Expression.replace(" ", "").endswith("+0")
230
- or re.search(
231
- r"DIVIDE\s*\(\s*[^,]+,\s*[^,]+,\s*0\s*\)",
232
- obj.Expression,
233
- flags=re.IGNORECASE,
234
205
  )
235
- or re.search(
236
- r"IFERROR\s*\(\s*[^,]+,\s*0\s*\)",
237
- obj.Expression,
238
- flags=re.IGNORECASE,
239
- ),
240
- "Adding 0 to a measure in order for it not to show a blank value may negatively impact performance.",
241
- ),
242
- (
243
- "Performance",
244
- "Table",
245
- "Warning",
246
- "Reduce usage of calculated tables",
247
- lambda obj: tom.is_field_parameter(table_name=obj.Name) is False
248
- and tom.is_calculated_table(table_name=obj.Name),
249
- "Migrate calculated table logic to your data warehouse. Reliance on calculated tables will lead to technical debt and potential misalignments if you have multiple models on your platform.",
250
206
  ),
251
- (
252
- "Performance",
253
- "Column",
254
- "Warning",
255
- "Reduce usage of calculated columns that use the RELATED function",
256
- lambda obj: obj.Type == TOM.ColumnType.Calculated
257
- and re.search(r"related\s*\(", obj.Expression, flags=re.IGNORECASE),
258
- "Calculated columns do not compress as well as data columns and may cause longer processing times. As such, calculated columns should be avoided if possible. One scenario where they may be easier to avoid is if they use the RELATED function.",
259
- "https://www.sqlbi.com/articles/storage-differences-between-calculated-columns-and-calculated-tables",
260
- ),
261
- (
262
- "Performance",
263
- "Model",
264
- "Warning",
265
- "Avoid excessive bi-directional or many-to-many relationships",
266
- lambda obj: (
267
- (
268
- sum(
269
- 1
270
- for r in obj.Relationships
271
- if r.CrossFilteringBehavior
272
- == TOM.CrossFilteringBehavior.BothDirections
273
- )
274
- + sum(
275
- 1
276
- for r in obj.Relationships
277
- if (
278
- r.FromCardinality
279
- == TOM.RelationshipEndCardinality.Many
280
- )
281
- and (
282
- r.ToCardinality
283
- == TOM.RelationshipEndCardinality.Many
284
- )
285
- )
207
+ "Generally speaking, a star-schema is the optimal architecture for tabular models. That being the case, there are valid cases to use a snowflake approach. Please check your model and consider moving to a star-schema architecture.",
208
+ "https://docs.microsoft.com/power-bi/guidance/star-schema",
209
+ ),
210
+ (
211
+ "Performance",
212
+ "Model",
213
+ "Warning",
214
+ "Avoid using views when using Direct Lake mode",
215
+ lambda obj, tom: tom.is_direct_lake_using_view(),
216
+ "In Direct Lake mode, views will always fall back to DirectQuery. Thus, in order to obtain the best performance use lakehouse tables instead of views.",
217
+ "https://learn.microsoft.com/fabric/get-started/direct-lake-overview#fallback",
218
+ ),
219
+ (
220
+ "Performance",
221
+ "Measure",
222
+ "Warning",
223
+ "Avoid adding 0 to a measure",
224
+ lambda obj, tom: obj.Expression.replace(" ", "").startswith("0+")
225
+ or obj.Expression.replace(" ", "").endswith("+0")
226
+ or re.search(
227
+ r"DIVIDE\s*\(\s*[^,]+,\s*[^,]+,\s*0\s*\)",
228
+ obj.Expression,
229
+ flags=re.IGNORECASE,
230
+ )
231
+ or re.search(
232
+ r"IFERROR\s*\(\s*[^,]+,\s*0\s*\)",
233
+ obj.Expression,
234
+ flags=re.IGNORECASE,
235
+ ),
236
+ "Adding 0 to a measure in order for it not to show a blank value may negatively impact performance.",
237
+ ),
238
+ (
239
+ "Performance",
240
+ "Table",
241
+ "Warning",
242
+ "Reduce usage of calculated tables",
243
+ lambda obj, tom: tom.is_field_parameter(table_name=obj.Name) is False
244
+ and tom.is_calculated_table(table_name=obj.Name),
245
+ "Migrate calculated table logic to your data warehouse. Reliance on calculated tables will lead to technical debt and potential misalignments if you have multiple models on your platform.",
246
+ ),
247
+ (
248
+ "Performance",
249
+ "Column",
250
+ "Warning",
251
+ "Reduce usage of calculated columns that use the RELATED function",
252
+ lambda obj, tom: obj.Type == TOM.ColumnType.Calculated
253
+ and re.search(r"related\s*\(", obj.Expression, flags=re.IGNORECASE),
254
+ "Calculated columns do not compress as well as data columns and may cause longer processing times. As such, calculated columns should be avoided if possible. One scenario where they may be easier to avoid is if they use the RELATED function.",
255
+ "https://www.sqlbi.com/articles/storage-differences-between-calculated-columns-and-calculated-tables",
256
+ ),
257
+ (
258
+ "Performance",
259
+ "Model",
260
+ "Warning",
261
+ "Avoid excessive bi-directional or many-to-many relationships",
262
+ lambda obj, tom: (
263
+ (
264
+ sum(
265
+ 1
266
+ for r in obj.Relationships
267
+ if r.CrossFilteringBehavior
268
+ == TOM.CrossFilteringBehavior.BothDirections
286
269
  )
287
- / max(int(obj.Relationships.Count), 1)
288
- )
289
- > 0.3,
290
- "Limit use of b-di and many-to-many relationships. This rule flags the model if more than 30% of relationships are bi-di or many-to-many.",
291
- "https://www.sqlbi.com/articles/bidirectional-relationships-and-ambiguity-in-dax",
292
- ),
293
- # ('Performance', 'Column', 'Warning', 'Avoid bi-directional or many-to-many relationships against high-cardinality columns',
294
- # lambda obj: ((str(r.FromCardinality) == 'Many' and str(r.ToCardinality == 'Many')) or (str(r.CrossFilteringBehavior) == 'BothDirections') for r in tom.used_in_relationships(object = obj)) and tom.cardinality(column = obj) > 100000,
295
- # 'For best performance, it is recommended to avoid using bi-directional relationships against high-cardinality columns',
296
- # ),
297
- (
298
- "Performance",
299
- "Table",
300
- "Warning",
301
- "Remove auto-date table",
302
- lambda obj: tom.is_calculated_table(table_name=obj.Name)
303
- and (
304
- obj.Name.startswith("DateTableTemplate_")
305
- or obj.Name.startswith("LocalDateTable_")
306
- ),
307
- "Avoid using auto-date tables. Make sure to turn off auto-date table in the settings in Power BI Desktop. This will save memory resources.",
308
- "https://www.youtube.com/watch?v=xu3uDEHtCrg",
309
- ),
310
- (
311
- "Performance",
312
- "Table",
313
- "Warning",
314
- "Date/calendar tables should be marked as a date table",
315
- lambda obj: (
316
- re.search(r"date", obj.Name, flags=re.IGNORECASE)
317
- or re.search(r"calendar", obj.Name, flags=re.IGNORECASE)
318
- )
319
- and str(obj.DataCategory) != "Time",
320
- "This rule looks for tables that contain the words 'date' or 'calendar' as they should likely be marked as a date table.",
321
- "https://docs.microsoft.com/power-bi/transform-model/desktop-date-tables",
322
- ),
323
- (
324
- "Performance",
325
- "Table",
326
- "Warning",
327
- "Large tables should be partitioned",
328
- lambda obj: tom.is_direct_lake() is False
329
- and int(obj.Partitions.Count) == 1
330
- and tom.row_count(object=obj) > 25000000,
331
- "Large tables should be partitioned in order to optimize processing. This is not relevant for semantic models in Direct Lake mode as they can only have one partition per table.",
332
- ),
333
- (
334
- "Performance",
335
- "Row Level Security",
336
- "Warning",
337
- "Limit row level security (RLS) logic",
338
- lambda obj: any(
339
- item in obj.FilterExpression.lower()
340
- for item in [
341
- "right(",
342
- "left(",
343
- "filter(",
344
- "upper(",
345
- "lower(",
346
- "find(",
347
- ]
348
- ),
349
- "Try to simplify the DAX used for row level security. Usage of the functions within this rule can likely be offloaded to the upstream systems (data warehouse).",
350
- ),
351
- (
352
- "Performance",
353
- "Model",
354
- "Warning",
355
- "Model should have a date table",
356
- lambda obj: not any(
357
- (c.IsKey and c.DataType == TOM.DataType.DateTime)
358
- and str(t.DataCategory) == "Time"
359
- for t in obj.Tables
360
- for c in t.Columns
361
- ),
362
- "Generally speaking, models should generally have a date table. Models that do not have a date table generally are not taking advantage of features such as time intelligence or may not have a properly structured architecture.",
363
- ),
364
- # ('Performance', 'Measure', 'Warning', 'Measures using time intelligence and model is using Direct Query',
365
- # lambda obj: any(str(p.Mode) == 'DirectQuery' for p in tom.all_partitions()) and any(re.search(pattern + '\s*\(', obj.Expression, flags=re.IGNORECASE) for pattern in ['CLOSINGBALANCEMONTH', 'CLOSINGBALANCEQUARTER', 'CLOSINGBALANCEYEAR', \
366
- # 'DATEADD', 'DATESBETWEEN', 'DATESINPERIOD', 'DATESMTD', 'DATESQTD', 'DATESYTD', 'ENDOFMONTH', 'ENDOFQUARTER', 'ENDOFYEAR', 'FIRSTDATE', 'FIRSTNONBLANK', 'FIRSTNONBLANKVALUE', 'LASTDATE', 'LASTNONBLANK', 'LASTNONBLANKVALUE', \
367
- # 'NEXTDAY', 'NEXTMONTH', 'NEXTQUARTER', 'NEXTYEAR', 'OPENINGBALANCEMONTH', 'OPENINGBALANCEQUARTER', 'OPENINGBALANCEYEAR', 'PARALLELPERIOD', 'PREVIOUSDAY', 'PREVIOUSMONTH', 'PREVIOUSQUARTER', 'PREVIOUSYEAR', 'SAMEPERIODLASTYEAR', \
368
- # 'STARTOFMONTH', 'STARTOFQUARTER', 'STARTOFYEAR', 'TOTALMTD', 'TOTALQTD', 'TOTALYTD']),
369
- # 'At present, time intelligence functions are known to not perform as well when using Direct Query. If you are having performance issues, you may want to try alternative solutions such as adding columns in the fact table that show previous year or previous month data.',
370
- # ),
371
- (
372
- "Error Prevention",
373
- "Calculation Item",
374
- "Error",
375
- "Calculation items must have an expression",
376
- lambda obj: len(obj.Expression) == 0,
377
- "Calculation items must have an expression. Without an expression, they will not show any values.",
378
- ),
379
- # ('Error Prevention', ['Table', 'Column', 'Measure', 'Hierarchy', 'Partition'], 'Error', 'Avoid invalid characters in names',
380
- # lambda obj: obj.Name
381
- # 'This rule identifies if a name for a given object in your model (i.e. table/column/measure) which contains an invalid character. Invalid characters will cause an error when deploying the model (and failure to deploy). This rule has a fix expression which converts the invalid character into a space, resolving the issue.',
382
- # ),
383
- # ('Error Prevention', ['Table', 'Column', 'Measure', 'Hierarchy'], 'Error', 'Avoid invalid characters in descriptions',
384
- # lambda obj: obj.Description
385
- # 'This rule identifies if a description for a given object in your model (i.e. table/column/measure) which contains an invalid character. Invalid characters will cause an error when deploying the model (and failure to deploy). This rule has a fix expression which converts the invalid character into a space, resolving the issue.',
386
- # ),
387
- (
388
- "Error Prevention",
389
- "Relationship",
390
- "Warning",
391
- "Relationship columns should be of the same data type",
392
- lambda obj: obj.FromColumn.DataType != obj.ToColumn.DataType,
393
- "Columns used in a relationship should be of the same data type. Ideally, they will be of integer data type (see the related rule '[Formatting] Relationship columns should be of integer data type'). Having columns within a relationship which are of different data types may lead to various issues.",
394
- ),
395
- (
396
- "Error Prevention",
397
- "Column",
398
- "Error",
399
- "Data columns must have a source column",
400
- lambda obj: obj.Type == TOM.ColumnType.Data
401
- and len(obj.SourceColumn) == 0,
402
- "Data columns must have a source column. A data column without a source column will cause an error when processing the model.",
403
- ),
404
- (
405
- "Error Prevention",
406
- "Column",
407
- "Warning",
408
- "Set IsAvailableInMdx to true on necessary columns",
409
- lambda obj: tom.is_direct_lake() is False
410
- and obj.IsAvailableInMDX is False
411
- and (
412
- tom.used_in_sort_by(column=obj)
413
- or tom.used_in_hierarchies(column=obj)
414
- or obj.SortByColumn is not None
415
- ),
416
- "In order to avoid errors, ensure that attribute hierarchies are enabled if a column is used for sorting another column, used in a hierarchy, used in variations, or is sorted by another column. The IsAvailableInMdx property is not relevant for Direct Lake models.",
417
- ),
418
- (
419
- "Error Prevention",
420
- "Table",
421
- "Error",
422
- "Avoid the USERELATIONSHIP function and RLS against the same table",
423
- lambda obj: any(
424
- re.search(
425
- r"USERELATIONSHIP\s*\(\s*.+?(?=])\]\s*,\s*'*"
426
- + obj.Name
427
- + r"'*\[",
428
- m.Expression,
429
- flags=re.IGNORECASE,
270
+ + sum(
271
+ 1
272
+ for r in obj.Relationships
273
+ if (
274
+ r.FromCardinality == TOM.RelationshipEndCardinality.Many
275
+ )
276
+ and (r.ToCardinality == TOM.RelationshipEndCardinality.Many)
430
277
  )
431
- for m in tom.all_measures()
432
- )
433
- and any(r.Table.Name == obj.Name for r in tom.all_rls()),
434
- "The USERELATIONSHIP function may not be used against a table which also leverages row-level security (RLS). This will generate an error when using the particular measure in a visual. This rule will highlight the table which is used in a measure's USERELATIONSHIP function as well as RLS.",
435
- "https://blog.crossjoin.co.uk/2013/05/10/userelationship-and-tabular-row-security",
436
- ),
437
- (
438
- "DAX Expressions",
439
- "Measure",
440
- "Warning",
441
- "Avoid using the IFERROR function",
442
- lambda obj: re.search(
443
- r"iferror\s*\(", obj.Expression, flags=re.IGNORECASE
444
- ),
445
- "Avoid using the IFERROR function as it may cause performance degradation. If you are concerned about a divide-by-zero error, use the DIVIDE function as it naturally resolves such errors as blank (or you can customize what should be shown in case of such an error).",
446
- "https://www.elegantbi.com/post/top10bestpractices",
447
- ),
448
- (
449
- "DAX Expressions",
450
- "Measure",
451
- "Warning",
452
- "Use the TREATAS function instead of INTERSECT for virtual relationships",
453
- lambda obj: re.search(
454
- r"intersect\s*\(", obj.Expression, flags=re.IGNORECASE
455
- ),
456
- "The TREATAS function is more efficient and provides better performance than the INTERSECT function when used in virutal relationships.",
457
- "https://www.sqlbi.com/articles/propagate-filters-using-treatas-in-dax",
458
- ),
459
- (
460
- "DAX Expressions",
461
- "Measure",
462
- "Warning",
463
- "The EVALUATEANDLOG function should not be used in production models",
464
- lambda obj: re.search(
465
- r"evaluateandlog\s*\(", obj.Expression, flags=re.IGNORECASE
466
- ),
467
- "The EVALUATEANDLOG function is meant to be used only in development/test environments and should not be used in production models.",
468
- "https://pbidax.wordpress.com/2022/08/16/introduce-the-dax-evaluateandlog-function",
469
- ),
470
- (
471
- "DAX Expressions",
472
- "Measure",
473
- "Warning",
474
- "Measures should not be direct references of other measures",
475
- lambda obj: any(
476
- obj.Expression == f"[{m.Name}]" for m in tom.all_measures()
477
- ),
478
- "This rule identifies measures which are simply a reference to another measure. As an example, consider a model with two measures: [MeasureA] and [MeasureB]. This rule would be triggered for MeasureB if MeasureB's DAX was MeasureB:=[MeasureA]. Such duplicative measures should be removed.",
479
- ),
480
- (
481
- "DAX Expressions",
482
- "Measure",
483
- "Warning",
484
- "No two measures should have the same definition",
485
- lambda obj: any(
486
- re.sub(r"\s+", "", obj.Expression)
487
- == re.sub(r"\s+", "", m.Expression)
488
- and obj.Name != m.Name
489
- for m in tom.all_measures()
490
- ),
491
- "Two measures with different names and defined by the same DAX expression should be avoided to reduce redundancy.",
492
- ),
493
- (
494
- "DAX Expressions",
495
- "Measure",
496
- "Warning",
497
- "Avoid addition or subtraction of constant values to results of divisions",
498
- lambda obj: re.search(
499
- r"DIVIDE\s*\((\s*.*?)\)\s*[+-]\s*1|\/\s*.*(?=[-+]\s*1)",
500
- obj.Expression,
501
- flags=re.IGNORECASE,
502
- ),
503
- ),
504
- (
505
- "DAX Expressions",
506
- "Measure",
507
- "Warning",
508
- "Avoid using '1-(x/y)' syntax",
509
- lambda obj: re.search(
510
- r"[0-9]+\s*[-+]\s*[\(]*\s*SUM\s*\(\s*\'*[A-Za-z0-9 _]+\'*\s*\[[A-Za-z0-9 _]+\]\s*\)\s*/",
511
- obj.Expression,
512
- flags=re.IGNORECASE,
513
278
  )
514
- or re.search(
515
- r"[0-9]+\s*[-+]\s*DIVIDE\s*\(",
516
- obj.Expression,
517
- flags=re.IGNORECASE,
518
- ),
519
- "Instead of using the '1-(x/y)' or '1+(x/y)' syntax to achieve a percentage calculation, use the basic DAX functions (as shown below). Using the improved syntax will generally improve the performance. The '1+/-...' syntax always returns a value whereas the solution without the '1+/-...' does not (as the value may be 'blank'). Therefore the '1+/-...' syntax may return more rows/columns which may result in a slower query speed. Let's clarify with an example: Avoid this: 1 - SUM ( 'Sales'[CostAmount] ) / SUM( 'Sales'[SalesAmount] ) Better: DIVIDE ( SUM ( 'Sales'[SalesAmount] ) - SUM ( 'Sales'[CostAmount] ), SUM ( 'Sales'[SalesAmount] ) ) Best: VAR x = SUM ( 'Sales'[SalesAmount] ) RETURN DIVIDE ( x - SUM ( 'Sales'[CostAmount] ), x )",
520
- ),
521
- (
522
- "DAX Expressions",
523
- "Measure",
524
- "Warning",
525
- "Filter measure values by columns, not tables",
526
- lambda obj: re.search(
527
- r"CALCULATE\s*\(\s*[^,]+,\s*FILTER\s*\(\s*\'*[A-Za-z0-9 _]+\'*\s*,\s*\[[^\]]+\]",
528
- obj.Expression,
529
- flags=re.IGNORECASE,
530
- )
531
- or re.search(
532
- r"CALCULATETABLE\s*\(\s*[^,]*,\s*FILTER\s*\(\s*\'*[A-Za-z0-9 _]+\'*\s*,\s*\[",
533
- obj.Expression,
534
- flags=re.IGNORECASE,
535
- ),
536
- "Instead of using this pattern FILTER('Table',[Measure]>Value) for the filter parameters of a CALCULATE or CALCULATETABLE function, use one of the options below (if possible). Filtering on a specific column will produce a smaller table for the engine to process, thereby enabling faster performance. Using the VALUES function or the ALL function depends on the desired measure result.\nOption 1: FILTER(VALUES('Table'[Column]),[Measure] > Value)\nOption 2: FILTER(ALL('Table'[Column]),[Measure] > Value)",
537
- "https://docs.microsoft.com/power-bi/guidance/dax-avoid-avoid-filter-as-filter-argument",
538
- ),
539
- (
540
- "DAX Expressions",
541
- "Measure",
542
- "Warning",
543
- "Filter column values with proper syntax",
544
- lambda obj: re.search(
545
- r"CALCULATE\s*\(\s*[^,]+,\s*FILTER\s*\(\s*'*[A-Za-z0-9 _]+'*\s*,\s*'*[A-Za-z0-9 _]+'*\[[A-Za-z0-9 _]+\]",
546
- obj.Expression,
279
+ / max(int(obj.Relationships.Count), 1)
280
+ )
281
+ > 0.3,
282
+ "Limit use of b-di and many-to-many relationships. This rule flags the model if more than 30% of relationships are bi-di or many-to-many.",
283
+ "https://www.sqlbi.com/articles/bidirectional-relationships-and-ambiguity-in-dax",
284
+ ),
285
+ # ('Performance', 'Column', 'Warning', 'Avoid bi-directional or many-to-many relationships against high-cardinality columns',
286
+ # lambda obj, tom: ((str(r.FromCardinality) == 'Many' and str(r.ToCardinality == 'Many')) or (str(r.CrossFilteringBehavior) == 'BothDirections') for r in tom.used_in_relationships(object = obj)) and tom.cardinality(column = obj) > 100000,
287
+ # 'For best performance, it is recommended to avoid using bi-directional relationships against high-cardinality columns',
288
+ # ),
289
+ (
290
+ "Performance",
291
+ "Table",
292
+ "Warning",
293
+ "Remove auto-date table",
294
+ lambda obj, tom: tom.is_calculated_table(table_name=obj.Name)
295
+ and (
296
+ obj.Name.startswith("DateTableTemplate_")
297
+ or obj.Name.startswith("LocalDateTable_")
298
+ ),
299
+ "Avoid using auto-date tables. Make sure to turn off auto-date table in the settings in Power BI Desktop. This will save memory resources.",
300
+ "https://www.youtube.com/watch?v=xu3uDEHtCrg",
301
+ ),
302
+ (
303
+ "Performance",
304
+ "Table",
305
+ "Warning",
306
+ "Date/calendar tables should be marked as a date table",
307
+ lambda obj, tom: (
308
+ re.search(r"date", obj.Name, flags=re.IGNORECASE)
309
+ or re.search(r"calendar", obj.Name, flags=re.IGNORECASE)
310
+ )
311
+ and str(obj.DataCategory) != "Time",
312
+ "This rule looks for tables that contain the words 'date' or 'calendar' as they should likely be marked as a date table.",
313
+ "https://docs.microsoft.com/power-bi/transform-model/desktop-date-tables",
314
+ ),
315
+ (
316
+ "Performance",
317
+ "Table",
318
+ "Warning",
319
+ "Large tables should be partitioned",
320
+ lambda obj, tom: tom.is_direct_lake() is False
321
+ and int(obj.Partitions.Count) == 1
322
+ and tom.row_count(object=obj) > 25000000,
323
+ "Large tables should be partitioned in order to optimize processing. This is not relevant for semantic models in Direct Lake mode as they can only have one partition per table.",
324
+ ),
325
+ (
326
+ "Performance",
327
+ "Row Level Security",
328
+ "Warning",
329
+ "Limit row level security (RLS) logic",
330
+ lambda obj, tom: any(
331
+ item in obj.FilterExpression.lower()
332
+ for item in [
333
+ "right(",
334
+ "left(",
335
+ "filter(",
336
+ "upper(",
337
+ "lower(",
338
+ "find(",
339
+ ]
340
+ ),
341
+ "Try to simplify the DAX used for row level security. Usage of the functions within this rule can likely be offloaded to the upstream systems (data warehouse).",
342
+ ),
343
+ (
344
+ "Performance",
345
+ "Model",
346
+ "Warning",
347
+ "Model should have a date table",
348
+ lambda obj, tom: not any(
349
+ (c.IsKey and c.DataType == TOM.DataType.DateTime)
350
+ and str(t.DataCategory) == "Time"
351
+ for t in obj.Tables
352
+ for c in t.Columns
353
+ ),
354
+ "Generally speaking, models should generally have a date table. Models that do not have a date table generally are not taking advantage of features such as time intelligence or may not have a properly structured architecture.",
355
+ ),
356
+ # ('Performance', 'Measure', 'Warning', 'Measures using time intelligence and model is using Direct Query',
357
+ # lambda obj, tom: any(str(p.Mode) == 'DirectQuery' for p in tom.all_partitions()) and any(re.search(pattern + '\s*\(', obj.Expression, flags=re.IGNORECASE) for pattern in ['CLOSINGBALANCEMONTH', 'CLOSINGBALANCEQUARTER', 'CLOSINGBALANCEYEAR', \
358
+ # 'DATEADD', 'DATESBETWEEN', 'DATESINPERIOD', 'DATESMTD', 'DATESQTD', 'DATESYTD', 'ENDOFMONTH', 'ENDOFQUARTER', 'ENDOFYEAR', 'FIRSTDATE', 'FIRSTNONBLANK', 'FIRSTNONBLANKVALUE', 'LASTDATE', 'LASTNONBLANK', 'LASTNONBLANKVALUE', \
359
+ # 'NEXTDAY', 'NEXTMONTH', 'NEXTQUARTER', 'NEXTYEAR', 'OPENINGBALANCEMONTH', 'OPENINGBALANCEQUARTER', 'OPENINGBALANCEYEAR', 'PARALLELPERIOD', 'PREVIOUSDAY', 'PREVIOUSMONTH', 'PREVIOUSQUARTER', 'PREVIOUSYEAR', 'SAMEPERIODLASTYEAR', \
360
+ # 'STARTOFMONTH', 'STARTOFQUARTER', 'STARTOFYEAR', 'TOTALMTD', 'TOTALQTD', 'TOTALYTD']),
361
+ # 'At present, time intelligence functions are known to not perform as well when using Direct Query. If you are having performance issues, you may want to try alternative solutions such as adding columns in the fact table that show previous year or previous month data.',
362
+ # ),
363
+ (
364
+ "Error Prevention",
365
+ "Calculation Item",
366
+ "Error",
367
+ "Calculation items must have an expression",
368
+ lambda obj, tom: len(obj.Expression) == 0,
369
+ "Calculation items must have an expression. Without an expression, they will not show any values.",
370
+ ),
371
+ # ('Error Prevention', ['Table', 'Column', 'Measure', 'Hierarchy', 'Partition'], 'Error', 'Avoid invalid characters in names',
372
+ # lambda obj, tom: obj.Name
373
+ # 'This rule identifies if a name for a given object in your model (i.e. table/column/measure) which contains an invalid character. Invalid characters will cause an error when deploying the model (and failure to deploy). This rule has a fix expression which converts the invalid character into a space, resolving the issue.',
374
+ # ),
375
+ # ('Error Prevention', ['Table', 'Column', 'Measure', 'Hierarchy'], 'Error', 'Avoid invalid characters in descriptions',
376
+ # lambda obj, tom: obj.Description
377
+ # 'This rule identifies if a description for a given object in your model (i.e. table/column/measure) which contains an invalid character. Invalid characters will cause an error when deploying the model (and failure to deploy). This rule has a fix expression which converts the invalid character into a space, resolving the issue.',
378
+ # ),
379
+ (
380
+ "Error Prevention",
381
+ "Relationship",
382
+ "Warning",
383
+ "Relationship columns should be of the same data type",
384
+ lambda obj, tom: obj.FromColumn.DataType != obj.ToColumn.DataType,
385
+ "Columns used in a relationship should be of the same data type. Ideally, they will be of integer data type (see the related rule '[Formatting] Relationship columns should be of integer data type'). Having columns within a relationship which are of different data types may lead to various issues.",
386
+ ),
387
+ (
388
+ "Error Prevention",
389
+ "Column",
390
+ "Error",
391
+ "Data columns must have a source column",
392
+ lambda obj, tom: obj.Type == TOM.ColumnType.Data
393
+ and len(obj.SourceColumn) == 0,
394
+ "Data columns must have a source column. A data column without a source column will cause an error when processing the model.",
395
+ ),
396
+ (
397
+ "Error Prevention",
398
+ "Column",
399
+ "Warning",
400
+ "Set IsAvailableInMdx to true on necessary columns",
401
+ lambda obj, tom: tom.is_direct_lake() is False
402
+ and obj.IsAvailableInMDX is False
403
+ and (
404
+ tom.used_in_sort_by(column=obj)
405
+ or tom.used_in_hierarchies(column=obj)
406
+ or obj.SortByColumn is not None
407
+ ),
408
+ "In order to avoid errors, ensure that attribute hierarchies are enabled if a column is used for sorting another column, used in a hierarchy, used in variations, or is sorted by another column. The IsAvailableInMdx property is not relevant for Direct Lake models.",
409
+ ),
410
+ (
411
+ "Error Prevention",
412
+ "Table",
413
+ "Error",
414
+ "Avoid the USERELATIONSHIP function and RLS against the same table",
415
+ lambda obj, tom: any(
416
+ re.search(
417
+ r"USERELATIONSHIP\s*\(\s*.+?(?=])\]\s*,\s*'*"
418
+ + obj.Name
419
+ + r"'*\[",
420
+ m.Expression,
547
421
  flags=re.IGNORECASE,
548
422
  )
549
- or re.search(
550
- r"CALCULATETABLE\s*\([^,]*,\s*FILTER\s*\(\s*'*[A-Za-z0-9 _]+'*\s*,\s*'*[A-Za-z0-9 _]+'*\[[A-Za-z0-9 _]+\]",
551
- obj.Expression,
423
+ for m in tom.all_measures()
424
+ )
425
+ and any(r.Table.Name == obj.Name for r in tom.all_rls()),
426
+ "The USERELATIONSHIP function may not be used against a table which also leverages row-level security (RLS). This will generate an error when using the particular measure in a visual. This rule will highlight the table which is used in a measure's USERELATIONSHIP function as well as RLS.",
427
+ "https://blog.crossjoin.co.uk/2013/05/10/userelationship-and-tabular-row-security",
428
+ ),
429
+ (
430
+ "DAX Expressions",
431
+ "Measure",
432
+ "Warning",
433
+ "Avoid using the IFERROR function",
434
+ lambda obj, tom: re.search(
435
+ r"iferror\s*\(", obj.Expression, flags=re.IGNORECASE
436
+ ),
437
+ "Avoid using the IFERROR function as it may cause performance degradation. If you are concerned about a divide-by-zero error, use the DIVIDE function as it naturally resolves such errors as blank (or you can customize what should be shown in case of such an error).",
438
+ "https://www.elegantbi.com/post/top10bestpractices",
439
+ ),
440
+ (
441
+ "DAX Expressions",
442
+ "Measure",
443
+ "Warning",
444
+ "Use the TREATAS function instead of INTERSECT for virtual relationships",
445
+ lambda obj, tom: re.search(
446
+ r"intersect\s*\(", obj.Expression, flags=re.IGNORECASE
447
+ ),
448
+ "The TREATAS function is more efficient and provides better performance than the INTERSECT function when used in virutal relationships.",
449
+ "https://www.sqlbi.com/articles/propagate-filters-using-treatas-in-dax",
450
+ ),
451
+ (
452
+ "DAX Expressions",
453
+ "Measure",
454
+ "Warning",
455
+ "The EVALUATEANDLOG function should not be used in production models",
456
+ lambda obj, tom: re.search(
457
+ r"evaluateandlog\s*\(", obj.Expression, flags=re.IGNORECASE
458
+ ),
459
+ "The EVALUATEANDLOG function is meant to be used only in development/test environments and should not be used in production models.",
460
+ "https://pbidax.wordpress.com/2022/08/16/introduce-the-dax-evaluateandlog-function",
461
+ ),
462
+ (
463
+ "DAX Expressions",
464
+ "Measure",
465
+ "Warning",
466
+ "Measures should not be direct references of other measures",
467
+ lambda obj, tom: any(
468
+ obj.Expression == f"[{m.Name}]" for m in tom.all_measures()
469
+ ),
470
+ "This rule identifies measures which are simply a reference to another measure. As an example, consider a model with two measures: [MeasureA] and [MeasureB]. This rule would be triggered for MeasureB if MeasureB's DAX was MeasureB:=[MeasureA]. Such duplicative measures should be removed.",
471
+ ),
472
+ (
473
+ "DAX Expressions",
474
+ "Measure",
475
+ "Warning",
476
+ "No two measures should have the same definition",
477
+ lambda obj, tom: any(
478
+ re.sub(r"\s+", "", obj.Expression)
479
+ == re.sub(r"\s+", "", m.Expression)
480
+ and obj.Name != m.Name
481
+ for m in tom.all_measures()
482
+ ),
483
+ "Two measures with different names and defined by the same DAX expression should be avoided to reduce redundancy.",
484
+ ),
485
+ (
486
+ "DAX Expressions",
487
+ "Measure",
488
+ "Warning",
489
+ "Avoid addition or subtraction of constant values to results of divisions",
490
+ lambda obj, tom: re.search(
491
+ r"DIVIDE\s*\((\s*.*?)\)\s*[+-]\s*1|\/\s*.*(?=[-+]\s*1)",
492
+ obj.Expression,
493
+ flags=re.IGNORECASE,
494
+ ),
495
+ ),
496
+ (
497
+ "DAX Expressions",
498
+ "Measure",
499
+ "Warning",
500
+ "Avoid using '1-(x/y)' syntax",
501
+ lambda obj, tom: re.search(
502
+ r"[0-9]+\s*[-+]\s*[\(]*\s*SUM\s*\(\s*\'*[A-Za-z0-9 _]+\'*\s*\[[A-Za-z0-9 _]+\]\s*\)\s*/",
503
+ obj.Expression,
504
+ flags=re.IGNORECASE,
505
+ )
506
+ or re.search(
507
+ r"[0-9]+\s*[-+]\s*DIVIDE\s*\(",
508
+ obj.Expression,
509
+ flags=re.IGNORECASE,
510
+ ),
511
+ "Instead of using the '1-(x/y)' or '1+(x/y)' syntax to achieve a percentage calculation, use the basic DAX functions (as shown below). Using the improved syntax will generally improve the performance. The '1+/-...' syntax always returns a value whereas the solution without the '1+/-...' does not (as the value may be 'blank'). Therefore the '1+/-...' syntax may return more rows/columns which may result in a slower query speed. Let's clarify with an example: Avoid this: 1 - SUM ( 'Sales'[CostAmount] ) / SUM( 'Sales'[SalesAmount] ) Better: DIVIDE ( SUM ( 'Sales'[SalesAmount] ) - SUM ( 'Sales'[CostAmount] ), SUM ( 'Sales'[SalesAmount] ) ) Best: VAR x = SUM ( 'Sales'[SalesAmount] ) RETURN DIVIDE ( x - SUM ( 'Sales'[CostAmount] ), x )",
512
+ ),
513
+ (
514
+ "DAX Expressions",
515
+ "Measure",
516
+ "Warning",
517
+ "Filter measure values by columns, not tables",
518
+ lambda obj, tom: re.search(
519
+ r"CALCULATE\s*\(\s*[^,]+,\s*FILTER\s*\(\s*\'*[A-Za-z0-9 _]+\'*\s*,\s*\[[^\]]+\]",
520
+ obj.Expression,
521
+ flags=re.IGNORECASE,
522
+ )
523
+ or re.search(
524
+ r"CALCULATETABLE\s*\(\s*[^,]*,\s*FILTER\s*\(\s*\'*[A-Za-z0-9 _]+\'*\s*,\s*\[",
525
+ obj.Expression,
526
+ flags=re.IGNORECASE,
527
+ ),
528
+ "Instead of using this pattern FILTER('Table',[Measure]>Value) for the filter parameters of a CALCULATE or CALCULATETABLE function, use one of the options below (if possible). Filtering on a specific column will produce a smaller table for the engine to process, thereby enabling faster performance. Using the VALUES function or the ALL function depends on the desired measure result.\nOption 1: FILTER(VALUES('Table'[Column]),[Measure] > Value)\nOption 2: FILTER(ALL('Table'[Column]),[Measure] > Value)",
529
+ "https://docs.microsoft.com/power-bi/guidance/dax-avoid-avoid-filter-as-filter-argument",
530
+ ),
531
+ (
532
+ "DAX Expressions",
533
+ "Measure",
534
+ "Warning",
535
+ "Filter column values with proper syntax",
536
+ lambda obj, tom: re.search(
537
+ r"CALCULATE\s*\(\s*[^,]+,\s*FILTER\s*\(\s*'*[A-Za-z0-9 _]+'*\s*,\s*'*[A-Za-z0-9 _]+'*\[[A-Za-z0-9 _]+\]",
538
+ obj.Expression,
539
+ flags=re.IGNORECASE,
540
+ )
541
+ or re.search(
542
+ r"CALCULATETABLE\s*\([^,]*,\s*FILTER\s*\(\s*'*[A-Za-z0-9 _]+'*\s*,\s*'*[A-Za-z0-9 _]+'*\[[A-Za-z0-9 _]+\]",
543
+ obj.Expression,
544
+ flags=re.IGNORECASE,
545
+ ),
546
+ "Instead of using this pattern FILTER('Table','Table'[Column]=\"Value\") for the filter parameters of a CALCULATE or CALCULATETABLE function, use one of the options below. As far as whether to use the KEEPFILTERS function, see the second reference link below.\nOption 1: KEEPFILTERS('Table'[Column]=\"Value\")\nOption 2: 'Table'[Column]=\"Value\"",
547
+ "https://docs.microsoft.com/power-bi/guidance/dax-avoid-avoid-filter-as-filter-argument Reference: https://www.sqlbi.com/articles/using-keepfilters-in-dax",
548
+ ),
549
+ (
550
+ "DAX Expressions",
551
+ "Measure",
552
+ "Warning",
553
+ "Use the DIVIDE function for division",
554
+ lambda obj, tom: re.search(
555
+ r"\]\s*\/(?!\/)(?!\*)\" or \"\)\s*\/(?!\/)(?!\*)",
556
+ obj.Expression,
557
+ flags=re.IGNORECASE,
558
+ ),
559
+ 'Use the DIVIDE function instead of using "/". The DIVIDE function resolves divide-by-zero cases. As such, it is recommended to use to avoid errors.',
560
+ "https://docs.microsoft.com/power-bi/guidance/dax-divide-function-operator",
561
+ ),
562
+ (
563
+ "DAX Expressions",
564
+ "Measure",
565
+ "Error",
566
+ "Column references should be fully qualified",
567
+ lambda obj, tom: any(
568
+ tom.unqualified_columns(object=obj, dependencies=dependencies)
569
+ ),
570
+ "Using fully qualified column references makes it easier to distinguish between column and measure references, and also helps avoid certain errors. When referencing a column in DAX, first specify the table name, then specify the column name in square brackets.",
571
+ "https://www.elegantbi.com/post/top10bestpractices",
572
+ ),
573
+ (
574
+ "DAX Expressions",
575
+ "Measure",
576
+ "Error",
577
+ "Measure references should be unqualified",
578
+ lambda obj, tom: any(
579
+ tom.fully_qualified_measures(object=obj, dependencies=dependencies)
580
+ ),
581
+ "Using unqualified measure references makes it easier to distinguish between column and measure references, and also helps avoid certain errors. When referencing a measure using DAX, do not specify the table name. Use only the measure name in square brackets.",
582
+ "https://www.elegantbi.com/post/top10bestpractices",
583
+ ),
584
+ (
585
+ "DAX Expressions",
586
+ "Relationship",
587
+ "Warning",
588
+ "Inactive relationships that are never activated",
589
+ lambda obj, tom: obj.IsActive is False
590
+ and not any(
591
+ re.search(
592
+ r"USERELATIONSHIP\s*\(\s*\'*"
593
+ + obj.FromTable.Name
594
+ + r"'*\["
595
+ + obj.FromColumn.Name
596
+ + r"\]\s*,\s*'*"
597
+ + obj.ToTable.Name
598
+ + r"'*\["
599
+ + obj.ToColumn.Name
600
+ + r"\]",
601
+ m.Expression,
552
602
  flags=re.IGNORECASE,
553
- ),
554
- "Instead of using this pattern FILTER('Table','Table'[Column]=\"Value\") for the filter parameters of a CALCULATE or CALCULATETABLE function, use one of the options below. As far as whether to use the KEEPFILTERS function, see the second reference link below.\nOption 1: KEEPFILTERS('Table'[Column]=\"Value\")\nOption 2: 'Table'[Column]=\"Value\"",
555
- "https://docs.microsoft.com/power-bi/guidance/dax-avoid-avoid-filter-as-filter-argument Reference: https://www.sqlbi.com/articles/using-keepfilters-in-dax",
556
- ),
557
- (
558
- "DAX Expressions",
559
- "Measure",
560
- "Warning",
561
- "Use the DIVIDE function for division",
562
- lambda obj: re.search(
563
- r"\]\s*\/(?!\/)(?!\*)\" or \"\)\s*\/(?!\/)(?!\*)",
564
- obj.Expression,
565
- flags=re.IGNORECASE,
566
- ),
567
- 'Use the DIVIDE function instead of using "/". The DIVIDE function resolves divide-by-zero cases. As such, it is recommended to use to avoid errors.',
568
- "https://docs.microsoft.com/power-bi/guidance/dax-divide-function-operator",
569
- ),
570
- (
571
- "DAX Expressions",
572
- "Measure",
573
- "Error",
574
- "Column references should be fully qualified",
575
- lambda obj: any(
576
- tom.unqualified_columns(object=obj, dependencies=dependencies)
577
- ),
578
- "Using fully qualified column references makes it easier to distinguish between column and measure references, and also helps avoid certain errors. When referencing a column in DAX, first specify the table name, then specify the column name in square brackets.",
579
- "https://www.elegantbi.com/post/top10bestpractices",
580
- ),
581
- (
582
- "DAX Expressions",
583
- "Measure",
584
- "Error",
585
- "Measure references should be unqualified",
586
- lambda obj: any(
587
- tom.fully_qualified_measures(
588
- object=obj, dependencies=dependencies
589
- )
590
- ),
591
- "Using unqualified measure references makes it easier to distinguish between column and measure references, and also helps avoid certain errors. When referencing a measure using DAX, do not specify the table name. Use only the measure name in square brackets.",
592
- "https://www.elegantbi.com/post/top10bestpractices",
593
- ),
594
- (
595
- "DAX Expressions",
596
- "Relationship",
597
- "Warning",
598
- "Inactive relationships that are never activated",
599
- lambda obj: obj.IsActive is False
600
- and not any(
601
- re.search(
602
- r"USERELATIONSHIP\s*\(\s*\'*"
603
- + obj.FromTable.Name
604
- + r"'*\["
605
- + obj.FromColumn.Name
606
- + r"\]\s*,\s*'*"
607
- + obj.ToTable.Name
608
- + r"'*\["
609
- + obj.ToColumn.Name
610
- + r"\]",
611
- m.Expression,
612
- flags=re.IGNORECASE,
613
- )
614
- for m in tom.all_measures()
615
- ),
616
- "Inactive relationships are activated using the USERELATIONSHIP function. If an inactive relationship is not referenced in any measure via this function, the relationship will not be used. It should be determined whether the relationship is not necessary or to activate the relationship via this method.",
617
- "https://dax.guide/userelationship",
618
- ),
619
- (
620
- "Maintenance",
621
- "Column",
622
- "Warning",
623
- "Remove unnecessary columns",
624
- lambda obj: (obj.IsHidden or obj.Parent.IsHidden)
625
- and not any(tom.used_in_relationships(object=obj))
626
- and not any(tom.used_in_hierarchies(column=obj))
627
- and not any(tom.used_in_sort_by(column=obj))
628
- and any(tom.depends_on(object=obj, dependencies=dependencies)),
629
- "Hidden columns that are not referenced by any DAX expressions, relationships, hierarchy levels or Sort By-properties should be removed.",
630
- ),
631
- (
632
- "Maintenance",
633
- "Measure",
634
- "Warning",
635
- "Remove unnecessary measures",
636
- lambda obj: obj.IsHidden
637
- and not any(
638
- tom.referenced_by(object=obj, dependencies=dependencies)
639
- ),
640
- "Hidden measures that are not referenced by any DAX expressions should be removed for maintainability.",
641
- ),
642
- (
643
- "Maintenance",
644
- "Table",
645
- "Warning",
646
- "Ensure tables have relationships",
647
- lambda obj: any(tom.used_in_relationships(object=obj)) is False
648
- and obj.CalculationGroup is None,
649
- "This rule highlights tables which are not connected to any other table in the model with a relationship.",
650
- ),
651
- (
652
- "Maintenance",
653
- "Table",
654
- "Warning",
655
- "Calculation groups with no calculation items",
656
- lambda obj: obj.CalculationGroup is not None
657
- and not any(obj.CalculationGroup.CalculationItems),
658
- ),
659
- (
660
- "Maintenance",
661
- "Column",
662
- "Info",
663
- "Visible objects with no description",
664
- lambda obj: obj.IsHidden is False and len(obj.Description) == 0,
665
- "Calculation groups have no function unless they have calculation items.",
666
- ),
667
- (
668
- "Formatting",
669
- "Column",
670
- "Warning",
671
- "Provide format string for 'Date' columns",
672
- lambda obj: (re.search(r"date", obj.Name, flags=re.IGNORECASE))
673
- and (obj.DataType == TOM.DataType.DateTime)
674
- and (obj.FormatString != "mm/dd/yyyy"),
675
- 'Columns of type "DateTime" that have "Month" in their names should be formatted as "mm/dd/yyyy".',
676
- ),
677
- (
678
- "Formatting",
679
- "Column",
680
- "Warning",
681
- "Do not summarize numeric columns",
682
- lambda obj: (
683
- (obj.DataType == TOM.DataType.Int64)
684
- or (obj.DataType == TOM.DataType.Decimal)
685
- or (obj.DataType == TOM.DataType.Double)
686
603
  )
687
- and (str(obj.SummarizeBy) != "None")
688
- and not ((obj.IsHidden) or (obj.Parent.IsHidden)),
689
- 'Numeric columns (integer, decimal, double) should have their SummarizeBy property set to "None" to avoid accidental summation in Power BI (create measures instead).',
690
- ),
691
- (
692
- "Formatting",
693
- "Measure",
694
- "Info",
695
- "Provide format string for measures",
696
- lambda obj: obj.IsHidden is False and len(obj.FormatString) == 0,
697
- "Visible measures should have their format string property assigned.",
698
- ),
699
- (
700
- "Formatting",
701
- "Column",
702
- "Info",
703
- "Add data category for columns",
704
- lambda obj: len(obj.DataCategory) == 0
705
- and any(
706
- obj.Name.lower().startswith(item.lower())
707
- for item in [
708
- "country",
709
- "city",
710
- "continent",
711
- "latitude",
712
- "longitude",
713
- ]
714
- ),
715
- "Add Data Category property for appropriate columns.",
716
- "https://docs.microsoft.com/power-bi/transform-model/desktop-data-categorization",
717
- ),
718
- (
719
- "Formatting",
720
- "Measure",
721
- "Warning",
722
- "Percentages should be formatted with thousands separators and 1 decimal",
723
- lambda obj: "%" in obj.FormatString
724
- and obj.FormatString != "#,0.0%;-#,0.0%;#,0.0%",
725
- ),
726
- (
727
- "Formatting",
728
- "Measure",
729
- "Warning",
730
- "Whole numbers should be formatted with thousands separators and no decimals",
731
- lambda obj: "$" not in obj.FormatString
732
- and "%" not in obj.FormatString
733
- and obj.FormatString not in ["#,0", "#,0.0"],
734
- ),
735
- (
736
- "Formatting",
737
- "Column",
738
- "Info",
739
- "Hide foreign keys",
740
- lambda obj: obj.IsHidden is False
741
- and any(
742
- r.FromColumn.Name == obj.Name
743
- and r.FromCardinality == TOM.RelationshipEndCardinality.Many
744
- for r in tom.used_in_relationships(object=obj)
745
- ),
746
- "Foreign keys should always be hidden.",
747
- ),
748
- (
749
- "Formatting",
750
- "Column",
751
- "Info",
752
- "Mark primary keys",
753
- lambda obj: any(
754
- r.ToTable.Name == obj.Table.Name
755
- and r.ToColumn.Name == obj.Name
756
- and r.ToCardinality == TOM.RelationshipEndCardinality.One
757
- for r in tom.used_in_relationships(object=obj)
758
- )
759
- and obj.IsKey is False
760
- and obj.Table.DataCategory != "Time",
761
- "Set the 'Key' property to 'True' for primary key columns within the column properties.",
762
- ),
763
- (
764
- "Formatting",
765
- "Column",
766
- "Info",
767
- "Month (as a string) must be sorted",
768
- lambda obj: (re.search(r"month", obj.Name, flags=re.IGNORECASE))
769
- and not (re.search(r"months", obj.Name, flags=re.IGNORECASE))
770
- and (obj.DataType == TOM.DataType.String)
771
- and len(str(obj.SortByColumn)) == 0,
772
- "This rule highlights month columns which are strings and are not sorted. If left unsorted, they will sort alphabetically (i.e. April, August...). Make sure to sort such columns so that they sort properly (January, February, March...).",
773
- ),
774
- (
775
- "Formatting",
776
- "Relationship",
777
- "Warning",
778
- "Relationship columns should be of integer data type",
779
- lambda obj: obj.FromColumn.DataType != TOM.DataType.Int64
780
- or obj.ToColumn.DataType != TOM.DataType.Int64,
781
- "It is a best practice for relationship columns to be of integer data type. This applies not only to data warehousing but data modeling as well.",
782
- ),
783
- (
784
- "Formatting",
785
- "Column",
786
- "Warning",
787
- 'Provide format string for "Month" columns',
788
- lambda obj: re.search(r"month", obj.Name, flags=re.IGNORECASE)
789
- and obj.DataType == TOM.DataType.DateTime
790
- and obj.FormatString != "MMMM yyyy",
791
- 'Columns of type "DateTime" that have "Month" in their names should be formatted as "MMMM yyyy".',
792
- ),
793
- (
794
- "Formatting",
795
- "Column",
796
- "Info",
797
- "Format flag columns as Yes/No value strings",
798
- lambda obj: obj.Name.lower().startswith("is")
799
- and obj.DataType == TOM.DataType.Int64
800
- and not (obj.IsHidden or obj.Parent.IsHidden)
801
- or obj.Name.lower().endswith(" flag")
802
- and obj.DataType != TOM.DataType.String
803
- and not (obj.IsHidden or obj.Parent.IsHidden),
804
- "Flags must be properly formatted as Yes/No as this is easier to read than using 0/1 integer values.",
805
- ),
806
- (
807
- "Formatting",
808
- ["Table", "Column", "Measure", "Partition", "Hierarchy"],
809
- "Error",
810
- "Objects should not start or end with a space",
811
- lambda obj: obj.Name[0] == " " or obj.Name[-1] == " ",
812
- "Objects should not start or end with a space. This usually happens by accident and is difficult to find.",
813
- ),
814
- (
815
- "Formatting",
816
- ["Table", "Column", "Measure", "Partition", "Hierarchy"],
817
- "Info",
818
- "First letter of objects must be capitalized",
819
- lambda obj: obj.Name[0] != obj.Name[0].upper(),
820
- "The first letter of object names should be capitalized to maintain professional quality.",
821
- ),
822
- (
823
- "Naming Conventions",
824
- ["Table", "Column", "Measure", "Partition", "Hierarchy"],
825
- "Warning",
826
- "Object names must not contain special characters",
827
- lambda obj: re.search(r"[\t\r\n]", obj.Name),
828
- "Object names should not include tabs, line breaks, etc.",
829
- ),
830
- ],
831
- columns=[
832
- "Category",
833
- "Scope",
834
- "Severity",
835
- "Rule Name",
836
- "Expression",
837
- "Description",
838
- "URL",
839
- ],
840
- )
604
+ for m in tom.all_measures()
605
+ ),
606
+ "Inactive relationships are activated using the USERELATIONSHIP function. If an inactive relationship is not referenced in any measure via this function, the relationship will not be used. It should be determined whether the relationship is not necessary or to activate the relationship via this method.",
607
+ "https://dax.guide/userelationship",
608
+ ),
609
+ (
610
+ "Maintenance",
611
+ "Column",
612
+ "Warning",
613
+ "Remove unnecessary columns",
614
+ lambda obj, tom: (obj.IsHidden or obj.Parent.IsHidden)
615
+ and not any(tom.used_in_relationships(object=obj))
616
+ and not any(tom.used_in_hierarchies(column=obj))
617
+ and not any(tom.used_in_sort_by(column=obj))
618
+ and any(tom.depends_on(object=obj, dependencies=dependencies)),
619
+ "Hidden columns that are not referenced by any DAX expressions, relationships, hierarchy levels or Sort By-properties should be removed.",
620
+ ),
621
+ (
622
+ "Maintenance",
623
+ "Measure",
624
+ "Warning",
625
+ "Remove unnecessary measures",
626
+ lambda obj, tom: obj.IsHidden
627
+ and not any(tom.referenced_by(object=obj, dependencies=dependencies)),
628
+ "Hidden measures that are not referenced by any DAX expressions should be removed for maintainability.",
629
+ ),
630
+ (
631
+ "Maintenance",
632
+ "Table",
633
+ "Warning",
634
+ "Ensure tables have relationships",
635
+ lambda obj, tom: any(tom.used_in_relationships(object=obj)) is False
636
+ and obj.CalculationGroup is None,
637
+ "This rule highlights tables which are not connected to any other table in the model with a relationship.",
638
+ ),
639
+ (
640
+ "Maintenance",
641
+ "Table",
642
+ "Warning",
643
+ "Calculation groups with no calculation items",
644
+ lambda obj, tom: obj.CalculationGroup is not None
645
+ and not any(obj.CalculationGroup.CalculationItems),
646
+ ),
647
+ (
648
+ "Maintenance",
649
+ "Column",
650
+ "Info",
651
+ "Visible objects with no description",
652
+ lambda obj, tom: obj.IsHidden is False and len(obj.Description) == 0,
653
+ "Calculation groups have no function unless they have calculation items.",
654
+ ),
655
+ (
656
+ "Formatting",
657
+ "Column",
658
+ "Warning",
659
+ "Provide format string for 'Date' columns",
660
+ lambda obj, tom: (re.search(r"date", obj.Name, flags=re.IGNORECASE))
661
+ and (obj.DataType == TOM.DataType.DateTime)
662
+ and (obj.FormatString != "mm/dd/yyyy"),
663
+ 'Columns of type "DateTime" that have "Month" in their names should be formatted as "mm/dd/yyyy".',
664
+ ),
665
+ (
666
+ "Formatting",
667
+ "Column",
668
+ "Warning",
669
+ "Do not summarize numeric columns",
670
+ lambda obj, tom: (
671
+ (obj.DataType == TOM.DataType.Int64)
672
+ or (obj.DataType == TOM.DataType.Decimal)
673
+ or (obj.DataType == TOM.DataType.Double)
674
+ )
675
+ and (str(obj.SummarizeBy) != "None")
676
+ and not ((obj.IsHidden) or (obj.Parent.IsHidden)),
677
+ 'Numeric columns (integer, decimal, double) should have their SummarizeBy property set to "None" to avoid accidental summation in Power BI (create measures instead).',
678
+ ),
679
+ (
680
+ "Formatting",
681
+ "Measure",
682
+ "Info",
683
+ "Provide format string for measures",
684
+ lambda obj, tom: obj.IsHidden is False and len(obj.FormatString) == 0,
685
+ "Visible measures should have their format string property assigned.",
686
+ ),
687
+ (
688
+ "Formatting",
689
+ "Column",
690
+ "Info",
691
+ "Add data category for columns",
692
+ lambda obj, tom: len(obj.DataCategory) == 0
693
+ and any(
694
+ obj.Name.lower().startswith(item.lower())
695
+ for item in [
696
+ "country",
697
+ "city",
698
+ "continent",
699
+ "latitude",
700
+ "longitude",
701
+ ]
702
+ ),
703
+ "Add Data Category property for appropriate columns.",
704
+ "https://docs.microsoft.com/power-bi/transform-model/desktop-data-categorization",
705
+ ),
706
+ (
707
+ "Formatting",
708
+ "Measure",
709
+ "Warning",
710
+ "Percentages should be formatted with thousands separators and 1 decimal",
711
+ lambda obj, tom: "%" in obj.FormatString
712
+ and obj.FormatString != "#,0.0%;-#,0.0%;#,0.0%",
713
+ ),
714
+ (
715
+ "Formatting",
716
+ "Measure",
717
+ "Warning",
718
+ "Whole numbers should be formatted with thousands separators and no decimals",
719
+ lambda obj, tom: "$" not in obj.FormatString
720
+ and "%" not in obj.FormatString
721
+ and obj.FormatString not in ["#,0", "#,0.0"],
722
+ ),
723
+ (
724
+ "Formatting",
725
+ "Column",
726
+ "Info",
727
+ "Hide foreign keys",
728
+ lambda obj, tom: obj.IsHidden is False
729
+ and any(
730
+ r.FromColumn.Name == obj.Name
731
+ and r.FromCardinality == TOM.RelationshipEndCardinality.Many
732
+ for r in tom.used_in_relationships(object=obj)
733
+ ),
734
+ "Foreign keys should always be hidden.",
735
+ ),
736
+ (
737
+ "Formatting",
738
+ "Column",
739
+ "Info",
740
+ "Mark primary keys",
741
+ lambda obj, tom: any(
742
+ r.ToTable.Name == obj.Table.Name
743
+ and r.ToColumn.Name == obj.Name
744
+ and r.ToCardinality == TOM.RelationshipEndCardinality.One
745
+ for r in tom.used_in_relationships(object=obj)
746
+ )
747
+ and obj.IsKey is False
748
+ and obj.Table.DataCategory != "Time",
749
+ "Set the 'Key' property to 'True' for primary key columns within the column properties.",
750
+ ),
751
+ (
752
+ "Formatting",
753
+ "Column",
754
+ "Info",
755
+ "Month (as a string) must be sorted",
756
+ lambda obj, tom: (re.search(r"month", obj.Name, flags=re.IGNORECASE))
757
+ and not (re.search(r"months", obj.Name, flags=re.IGNORECASE))
758
+ and (obj.DataType == TOM.DataType.String)
759
+ and len(str(obj.SortByColumn)) == 0,
760
+ "This rule highlights month columns which are strings and are not sorted. If left unsorted, they will sort alphabetically (i.e. April, August...). Make sure to sort such columns so that they sort properly (January, February, March...).",
761
+ ),
762
+ (
763
+ "Formatting",
764
+ "Relationship",
765
+ "Warning",
766
+ "Relationship columns should be of integer data type",
767
+ lambda obj, tom: obj.FromColumn.DataType != TOM.DataType.Int64
768
+ or obj.ToColumn.DataType != TOM.DataType.Int64,
769
+ "It is a best practice for relationship columns to be of integer data type. This applies not only to data warehousing but data modeling as well.",
770
+ ),
771
+ (
772
+ "Formatting",
773
+ "Column",
774
+ "Warning",
775
+ 'Provide format string for "Month" columns',
776
+ lambda obj, tom: re.search(r"month", obj.Name, flags=re.IGNORECASE)
777
+ and obj.DataType == TOM.DataType.DateTime
778
+ and obj.FormatString != "MMMM yyyy",
779
+ 'Columns of type "DateTime" that have "Month" in their names should be formatted as "MMMM yyyy".',
780
+ ),
781
+ (
782
+ "Formatting",
783
+ "Column",
784
+ "Info",
785
+ "Format flag columns as Yes/No value strings",
786
+ lambda obj, tom: obj.Name.lower().startswith("is")
787
+ and obj.DataType == TOM.DataType.Int64
788
+ and not (obj.IsHidden or obj.Parent.IsHidden)
789
+ or obj.Name.lower().endswith(" flag")
790
+ and obj.DataType != TOM.DataType.String
791
+ and not (obj.IsHidden or obj.Parent.IsHidden),
792
+ "Flags must be properly formatted as Yes/No as this is easier to read than using 0/1 integer values.",
793
+ ),
794
+ (
795
+ "Formatting",
796
+ ["Table", "Column", "Measure", "Partition", "Hierarchy"],
797
+ "Error",
798
+ "Objects should not start or end with a space",
799
+ lambda obj, tom: obj.Name[0] == " " or obj.Name[-1] == " ",
800
+ "Objects should not start or end with a space. This usually happens by accident and is difficult to find.",
801
+ ),
802
+ (
803
+ "Formatting",
804
+ ["Table", "Column", "Measure", "Partition", "Hierarchy"],
805
+ "Info",
806
+ "First letter of objects must be capitalized",
807
+ lambda obj, tom: obj.Name[0] != obj.Name[0].upper(),
808
+ "The first letter of object names should be capitalized to maintain professional quality.",
809
+ ),
810
+ (
811
+ "Naming Conventions",
812
+ ["Table", "Column", "Measure", "Partition", "Hierarchy"],
813
+ "Warning",
814
+ "Object names must not contain special characters",
815
+ lambda obj, tom: re.search(r"[\t\r\n]", obj.Name),
816
+ "Object names should not include tabs, line breaks, etc.",
817
+ ),
818
+ ],
819
+ columns=[
820
+ "Category",
821
+ "Scope",
822
+ "Severity",
823
+ "Rule Name",
824
+ "Expression",
825
+ "Description",
826
+ "URL",
827
+ ],
828
+ )
841
829
 
842
830
  return rules