powerbi-ontology-extractor 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__init__.py +1 -0
- cli/pbi_ontology_cli.py +286 -0
- powerbi_ontology/__init__.py +38 -0
- powerbi_ontology/analyzer.py +420 -0
- powerbi_ontology/chat.py +303 -0
- powerbi_ontology/cli.py +530 -0
- powerbi_ontology/contract_builder.py +269 -0
- powerbi_ontology/dax_parser.py +305 -0
- powerbi_ontology/export/__init__.py +17 -0
- powerbi_ontology/export/contract_to_owl.py +408 -0
- powerbi_ontology/export/fabric_iq.py +243 -0
- powerbi_ontology/export/fabric_iq_to_owl.py +463 -0
- powerbi_ontology/export/json_schema.py +110 -0
- powerbi_ontology/export/ontoguard.py +177 -0
- powerbi_ontology/export/owl.py +522 -0
- powerbi_ontology/extractor.py +368 -0
- powerbi_ontology/mcp_config.py +237 -0
- powerbi_ontology/mcp_models.py +166 -0
- powerbi_ontology/mcp_server.py +1106 -0
- powerbi_ontology/ontology_diff.py +776 -0
- powerbi_ontology/ontology_generator.py +406 -0
- powerbi_ontology/review.py +556 -0
- powerbi_ontology/schema_mapper.py +369 -0
- powerbi_ontology/semantic_debt.py +584 -0
- powerbi_ontology/utils/__init__.py +13 -0
- powerbi_ontology/utils/pbix_reader.py +558 -0
- powerbi_ontology/utils/visualizer.py +332 -0
- powerbi_ontology_extractor-0.1.0.dist-info/METADATA +507 -0
- powerbi_ontology_extractor-0.1.0.dist-info/RECORD +33 -0
- powerbi_ontology_extractor-0.1.0.dist-info/WHEEL +5 -0
- powerbi_ontology_extractor-0.1.0.dist-info/entry_points.txt +4 -0
- powerbi_ontology_extractor-0.1.0.dist-info/licenses/LICENSE +21 -0
- powerbi_ontology_extractor-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,584 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Semantic Debt Analysis for Multi-Dashboard Environments.
|
|
3
|
+
|
|
4
|
+
Detects conflicting definitions across multiple Power BI dashboards:
|
|
5
|
+
- Measures with same name but different DAX formulas
|
|
6
|
+
- Properties with same name but different data types
|
|
7
|
+
- Entities with same name but different structures
|
|
8
|
+
- Conflicting business rules
|
|
9
|
+
- Incompatible relationships
|
|
10
|
+
|
|
11
|
+
Use case: "Revenue" defined differently in Sales.pbix vs Finance.pbix
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import logging
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from enum import Enum
|
|
17
|
+
from typing import Any, Dict, List, Tuple
|
|
18
|
+
from difflib import SequenceMatcher
|
|
19
|
+
|
|
20
|
+
from powerbi_ontology.ontology_generator import (
|
|
21
|
+
Ontology,
|
|
22
|
+
OntologyEntity,
|
|
23
|
+
OntologyProperty,
|
|
24
|
+
OntologyRelationship,
|
|
25
|
+
BusinessRule,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ConflictSeverity(Enum):
|
|
32
|
+
"""Severity levels for semantic conflicts."""
|
|
33
|
+
CRITICAL = "critical" # Completely different definitions, will cause errors
|
|
34
|
+
WARNING = "warning" # Partial differences, needs attention
|
|
35
|
+
INFO = "info" # Minor differences, can be ignored
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ConflictType(Enum):
|
|
39
|
+
"""Types of semantic conflicts."""
|
|
40
|
+
MEASURE_CONFLICT = "measure_conflict" # Same measure name, different DAX
|
|
41
|
+
TYPE_CONFLICT = "type_conflict" # Same property name, different type
|
|
42
|
+
ENTITY_CONFLICT = "entity_conflict" # Same entity name, different structure
|
|
43
|
+
RELATIONSHIP_CONFLICT = "relationship_conflict" # Different relationship between same entities
|
|
44
|
+
RULE_CONFLICT = "rule_conflict" # Conflicting business rules
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class SemanticConflict:
|
|
49
|
+
"""Represents a semantic conflict between dashboards."""
|
|
50
|
+
conflict_type: ConflictType
|
|
51
|
+
severity: ConflictSeverity
|
|
52
|
+
name: str # Name of conflicting element
|
|
53
|
+
sources: List[str] # List of source files/ontologies
|
|
54
|
+
details: Dict[str, str] # Details per source
|
|
55
|
+
description: str = ""
|
|
56
|
+
recommendation: str = ""
|
|
57
|
+
|
|
58
|
+
def to_dict(self) -> dict:
|
|
59
|
+
"""Convert to dictionary."""
|
|
60
|
+
return {
|
|
61
|
+
"conflict_type": self.conflict_type.value,
|
|
62
|
+
"severity": self.severity.value,
|
|
63
|
+
"name": self.name,
|
|
64
|
+
"sources": self.sources,
|
|
65
|
+
"details": self.details,
|
|
66
|
+
"description": self.description,
|
|
67
|
+
"recommendation": self.recommendation,
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class SemanticDebtReport:
|
|
73
|
+
"""Report of semantic debt analysis."""
|
|
74
|
+
ontologies_analyzed: List[str]
|
|
75
|
+
conflicts: List[SemanticConflict] = field(default_factory=list)
|
|
76
|
+
summary: Dict[str, Any] = field(default_factory=dict)
|
|
77
|
+
recommendations: List[str] = field(default_factory=list)
|
|
78
|
+
|
|
79
|
+
def add_conflict(self, conflict: SemanticConflict):
|
|
80
|
+
"""Add a conflict to the report."""
|
|
81
|
+
self.conflicts.append(conflict)
|
|
82
|
+
|
|
83
|
+
def generate_summary(self):
|
|
84
|
+
"""Generate summary statistics."""
|
|
85
|
+
self.summary = {
|
|
86
|
+
"total_conflicts": len(self.conflicts),
|
|
87
|
+
"critical": sum(1 for c in self.conflicts if c.severity == ConflictSeverity.CRITICAL),
|
|
88
|
+
"warning": sum(1 for c in self.conflicts if c.severity == ConflictSeverity.WARNING),
|
|
89
|
+
"info": sum(1 for c in self.conflicts if c.severity == ConflictSeverity.INFO),
|
|
90
|
+
"by_type": {},
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
for conflict_type in ConflictType:
|
|
94
|
+
count = sum(1 for c in self.conflicts if c.conflict_type == conflict_type)
|
|
95
|
+
if count > 0:
|
|
96
|
+
self.summary["by_type"][conflict_type.value] = count
|
|
97
|
+
|
|
98
|
+
def to_dict(self) -> dict:
|
|
99
|
+
"""Convert to dictionary."""
|
|
100
|
+
self.generate_summary()
|
|
101
|
+
return {
|
|
102
|
+
"ontologies_analyzed": self.ontologies_analyzed,
|
|
103
|
+
"summary": self.summary,
|
|
104
|
+
"conflicts": [c.to_dict() for c in self.conflicts],
|
|
105
|
+
"recommendations": self.recommendations,
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
def to_markdown(self) -> str:
|
|
109
|
+
"""Generate markdown report."""
|
|
110
|
+
self.generate_summary()
|
|
111
|
+
|
|
112
|
+
lines = [
|
|
113
|
+
"# Semantic Debt Analysis Report",
|
|
114
|
+
"",
|
|
115
|
+
"## Summary",
|
|
116
|
+
"",
|
|
117
|
+
f"- **Ontologies analyzed:** {len(self.ontologies_analyzed)}",
|
|
118
|
+
f"- **Total conflicts:** {self.summary['total_conflicts']}",
|
|
119
|
+
f" - 🔴 Critical: {self.summary['critical']}",
|
|
120
|
+
f" - 🟡 Warning: {self.summary['warning']}",
|
|
121
|
+
f" - 🔵 Info: {self.summary['info']}",
|
|
122
|
+
"",
|
|
123
|
+
]
|
|
124
|
+
|
|
125
|
+
if self.summary.get("by_type"):
|
|
126
|
+
lines.append("### Conflicts by Type")
|
|
127
|
+
lines.append("")
|
|
128
|
+
for ctype, count in self.summary["by_type"].items():
|
|
129
|
+
lines.append(f"- {ctype}: {count}")
|
|
130
|
+
lines.append("")
|
|
131
|
+
|
|
132
|
+
# Critical conflicts first
|
|
133
|
+
critical = [c for c in self.conflicts if c.severity == ConflictSeverity.CRITICAL]
|
|
134
|
+
if critical:
|
|
135
|
+
lines.append("## 🔴 Critical Conflicts")
|
|
136
|
+
lines.append("")
|
|
137
|
+
for c in critical:
|
|
138
|
+
lines.extend(self._format_conflict(c))
|
|
139
|
+
|
|
140
|
+
# Warning conflicts
|
|
141
|
+
warnings = [c for c in self.conflicts if c.severity == ConflictSeverity.WARNING]
|
|
142
|
+
if warnings:
|
|
143
|
+
lines.append("## 🟡 Warnings")
|
|
144
|
+
lines.append("")
|
|
145
|
+
for c in warnings:
|
|
146
|
+
lines.extend(self._format_conflict(c))
|
|
147
|
+
|
|
148
|
+
# Info conflicts
|
|
149
|
+
infos = [c for c in self.conflicts if c.severity == ConflictSeverity.INFO]
|
|
150
|
+
if infos:
|
|
151
|
+
lines.append("## 🔵 Info")
|
|
152
|
+
lines.append("")
|
|
153
|
+
for c in infos:
|
|
154
|
+
lines.extend(self._format_conflict(c))
|
|
155
|
+
|
|
156
|
+
# Recommendations
|
|
157
|
+
if self.recommendations:
|
|
158
|
+
lines.append("## Recommendations")
|
|
159
|
+
lines.append("")
|
|
160
|
+
for i, rec in enumerate(self.recommendations, 1):
|
|
161
|
+
lines.append(f"{i}. {rec}")
|
|
162
|
+
lines.append("")
|
|
163
|
+
|
|
164
|
+
return "\n".join(lines)
|
|
165
|
+
|
|
166
|
+
def _format_conflict(self, conflict: SemanticConflict) -> List[str]:
|
|
167
|
+
"""Format a single conflict for markdown."""
|
|
168
|
+
lines = [
|
|
169
|
+
f"### {conflict.name}",
|
|
170
|
+
"",
|
|
171
|
+
f"**Type:** {conflict.conflict_type.value}",
|
|
172
|
+
"",
|
|
173
|
+
f"**Description:** {conflict.description}",
|
|
174
|
+
"",
|
|
175
|
+
"**Sources:**",
|
|
176
|
+
"",
|
|
177
|
+
]
|
|
178
|
+
|
|
179
|
+
for source, detail in conflict.details.items():
|
|
180
|
+
lines.append(f"- `{source}`: {detail}")
|
|
181
|
+
|
|
182
|
+
lines.append("")
|
|
183
|
+
|
|
184
|
+
if conflict.recommendation:
|
|
185
|
+
lines.append(f"**Recommendation:** {conflict.recommendation}")
|
|
186
|
+
lines.append("")
|
|
187
|
+
|
|
188
|
+
return lines
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
class SemanticDebtAnalyzer:
|
|
192
|
+
"""
|
|
193
|
+
Analyzes semantic debt across multiple ontologies.
|
|
194
|
+
|
|
195
|
+
Detects conflicting definitions that could cause inconsistencies
|
|
196
|
+
when AI agents work across multiple Power BI dashboards.
|
|
197
|
+
"""
|
|
198
|
+
|
|
199
|
+
def __init__(self, similarity_threshold: float = 0.8):
|
|
200
|
+
"""
|
|
201
|
+
Initialize analyzer.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
similarity_threshold: Threshold for name similarity matching (0-1)
|
|
205
|
+
"""
|
|
206
|
+
self.similarity_threshold = similarity_threshold
|
|
207
|
+
self.ontologies: Dict[str, Ontology] = {}
|
|
208
|
+
|
|
209
|
+
def add_ontology(self, name: str, ontology: Ontology):
|
|
210
|
+
"""
|
|
211
|
+
Add an ontology for analysis.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
name: Identifier for this ontology (e.g., filename)
|
|
215
|
+
ontology: Ontology object
|
|
216
|
+
"""
|
|
217
|
+
self.ontologies[name] = ontology
|
|
218
|
+
logger.info(f"Added ontology '{name}' with {len(ontology.entities)} entities")
|
|
219
|
+
|
|
220
|
+
def load_ontologies_from_directory(self, directory: str, pattern: str = "*.json"):
|
|
221
|
+
"""
|
|
222
|
+
Load multiple ontologies from a directory.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
directory: Directory path
|
|
226
|
+
pattern: Glob pattern for files
|
|
227
|
+
"""
|
|
228
|
+
import json
|
|
229
|
+
from pathlib import Path
|
|
230
|
+
|
|
231
|
+
dir_path = Path(directory)
|
|
232
|
+
for file_path in dir_path.glob(pattern):
|
|
233
|
+
try:
|
|
234
|
+
with open(file_path) as f:
|
|
235
|
+
data = json.load(f)
|
|
236
|
+
|
|
237
|
+
# Simple conversion - assumes same format as ontology_editor.py
|
|
238
|
+
ontology = self._json_to_ontology(data)
|
|
239
|
+
self.add_ontology(file_path.name, ontology)
|
|
240
|
+
except Exception as e:
|
|
241
|
+
logger.warning(f"Failed to load {file_path}: {e}")
|
|
242
|
+
|
|
243
|
+
def _json_to_ontology(self, data: dict) -> Ontology:
|
|
244
|
+
"""Convert JSON data to Ontology object."""
|
|
245
|
+
from powerbi_ontology.ontology_generator import Constraint
|
|
246
|
+
|
|
247
|
+
entities = []
|
|
248
|
+
for e_data in data.get("entities", []):
|
|
249
|
+
props = []
|
|
250
|
+
for p_data in e_data.get("properties", []):
|
|
251
|
+
constraints = [
|
|
252
|
+
Constraint(type=c["type"], value=c["value"], message=c.get("message", ""))
|
|
253
|
+
for c in p_data.get("constraints", [])
|
|
254
|
+
]
|
|
255
|
+
props.append(OntologyProperty(
|
|
256
|
+
name=p_data["name"],
|
|
257
|
+
data_type=p_data.get("data_type", "String"),
|
|
258
|
+
required=p_data.get("required", False),
|
|
259
|
+
unique=p_data.get("unique", False),
|
|
260
|
+
description=p_data.get("description", ""),
|
|
261
|
+
constraints=constraints,
|
|
262
|
+
))
|
|
263
|
+
|
|
264
|
+
entities.append(OntologyEntity(
|
|
265
|
+
name=e_data["name"],
|
|
266
|
+
description=e_data.get("description", ""),
|
|
267
|
+
entity_type=e_data.get("entity_type", "standard"),
|
|
268
|
+
properties=props,
|
|
269
|
+
constraints=[],
|
|
270
|
+
))
|
|
271
|
+
|
|
272
|
+
relationships = []
|
|
273
|
+
for r_data in data.get("relationships", []):
|
|
274
|
+
relationships.append(OntologyRelationship(
|
|
275
|
+
from_entity=r_data["from_entity"],
|
|
276
|
+
to_entity=r_data["to_entity"],
|
|
277
|
+
from_property=r_data.get("from_property", ""),
|
|
278
|
+
to_property=r_data.get("to_property", ""),
|
|
279
|
+
relationship_type=r_data.get("relationship_type", "related_to"),
|
|
280
|
+
cardinality=r_data.get("cardinality", "one-to-many"),
|
|
281
|
+
description=r_data.get("description", ""),
|
|
282
|
+
))
|
|
283
|
+
|
|
284
|
+
rules = []
|
|
285
|
+
for b_data in data.get("business_rules", []):
|
|
286
|
+
rules.append(BusinessRule(
|
|
287
|
+
name=b_data["name"],
|
|
288
|
+
entity=b_data.get("entity", ""),
|
|
289
|
+
condition=b_data.get("condition", ""),
|
|
290
|
+
action=b_data.get("action", ""),
|
|
291
|
+
classification=b_data.get("classification", ""),
|
|
292
|
+
description=b_data.get("description", ""),
|
|
293
|
+
priority=b_data.get("priority", 1),
|
|
294
|
+
))
|
|
295
|
+
|
|
296
|
+
return Ontology(
|
|
297
|
+
name=data.get("name", "Unnamed"),
|
|
298
|
+
version=data.get("version", "1.0"),
|
|
299
|
+
source=data.get("source", ""),
|
|
300
|
+
entities=entities,
|
|
301
|
+
relationships=relationships,
|
|
302
|
+
business_rules=rules,
|
|
303
|
+
metadata=data.get("metadata", {}),
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
def analyze(self) -> SemanticDebtReport:
|
|
307
|
+
"""
|
|
308
|
+
Perform semantic debt analysis.
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
SemanticDebtReport with all detected conflicts
|
|
312
|
+
"""
|
|
313
|
+
if len(self.ontologies) < 2:
|
|
314
|
+
logger.warning("Need at least 2 ontologies for comparison")
|
|
315
|
+
return SemanticDebtReport(
|
|
316
|
+
ontologies_analyzed=list(self.ontologies.keys()),
|
|
317
|
+
conflicts=[],
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
report = SemanticDebtReport(ontologies_analyzed=list(self.ontologies.keys()))
|
|
321
|
+
|
|
322
|
+
# Analyze different conflict types
|
|
323
|
+
self._analyze_entity_conflicts(report)
|
|
324
|
+
self._analyze_property_type_conflicts(report)
|
|
325
|
+
self._analyze_relationship_conflicts(report)
|
|
326
|
+
self._analyze_business_rule_conflicts(report)
|
|
327
|
+
|
|
328
|
+
# Generate recommendations
|
|
329
|
+
self._generate_recommendations(report)
|
|
330
|
+
|
|
331
|
+
report.generate_summary()
|
|
332
|
+
return report
|
|
333
|
+
|
|
334
|
+
def _analyze_entity_conflicts(self, report: SemanticDebtReport):
|
|
335
|
+
"""Detect entities with same name but different structures."""
|
|
336
|
+
entity_map: Dict[str, Dict[str, OntologyEntity]] = {}
|
|
337
|
+
|
|
338
|
+
# Group entities by name
|
|
339
|
+
for ont_name, ont in self.ontologies.items():
|
|
340
|
+
for entity in ont.entities:
|
|
341
|
+
if entity.name not in entity_map:
|
|
342
|
+
entity_map[entity.name] = {}
|
|
343
|
+
entity_map[entity.name][ont_name] = entity
|
|
344
|
+
|
|
345
|
+
# Check for conflicts
|
|
346
|
+
for entity_name, sources in entity_map.items():
|
|
347
|
+
if len(sources) < 2:
|
|
348
|
+
continue
|
|
349
|
+
|
|
350
|
+
# Compare property sets
|
|
351
|
+
source_names = list(sources.keys())
|
|
352
|
+
for i in range(len(source_names)):
|
|
353
|
+
for j in range(i + 1, len(source_names)):
|
|
354
|
+
src1, src2 = source_names[i], source_names[j]
|
|
355
|
+
entity1, entity2 = sources[src1], sources[src2]
|
|
356
|
+
|
|
357
|
+
props1 = set(p.name for p in entity1.properties)
|
|
358
|
+
props2 = set(p.name for p in entity2.properties)
|
|
359
|
+
|
|
360
|
+
# Check for structural differences
|
|
361
|
+
only_in_1 = props1 - props2
|
|
362
|
+
only_in_2 = props2 - props1
|
|
363
|
+
|
|
364
|
+
if only_in_1 or only_in_2:
|
|
365
|
+
severity = self._determine_entity_severity(entity1, entity2)
|
|
366
|
+
|
|
367
|
+
details = {
|
|
368
|
+
src1: f"Properties: {', '.join(sorted(props1))}",
|
|
369
|
+
src2: f"Properties: {', '.join(sorted(props2))}",
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
missing_desc = []
|
|
373
|
+
if only_in_1:
|
|
374
|
+
missing_desc.append(f"only in {src1}: {', '.join(sorted(only_in_1))}")
|
|
375
|
+
if only_in_2:
|
|
376
|
+
missing_desc.append(f"only in {src2}: {', '.join(sorted(only_in_2))}")
|
|
377
|
+
|
|
378
|
+
report.add_conflict(SemanticConflict(
|
|
379
|
+
conflict_type=ConflictType.ENTITY_CONFLICT,
|
|
380
|
+
severity=severity,
|
|
381
|
+
name=entity_name,
|
|
382
|
+
sources=[src1, src2],
|
|
383
|
+
details=details,
|
|
384
|
+
description=f"Entity '{entity_name}' has different structures: {'; '.join(missing_desc)}",
|
|
385
|
+
recommendation=f"Unify entity '{entity_name}' structure across dashboards or rename to avoid confusion.",
|
|
386
|
+
))
|
|
387
|
+
|
|
388
|
+
def _analyze_property_type_conflicts(self, report: SemanticDebtReport):
|
|
389
|
+
"""Detect properties with same name but different types."""
|
|
390
|
+
# Group properties by (entity_name, property_name)
|
|
391
|
+
prop_map: Dict[Tuple[str, str], Dict[str, OntologyProperty]] = {}
|
|
392
|
+
|
|
393
|
+
for ont_name, ont in self.ontologies.items():
|
|
394
|
+
for entity in ont.entities:
|
|
395
|
+
for prop in entity.properties:
|
|
396
|
+
key = (entity.name, prop.name)
|
|
397
|
+
if key not in prop_map:
|
|
398
|
+
prop_map[key] = {}
|
|
399
|
+
prop_map[key][ont_name] = prop
|
|
400
|
+
|
|
401
|
+
# Check for type conflicts
|
|
402
|
+
for (entity_name, prop_name), sources in prop_map.items():
|
|
403
|
+
if len(sources) < 2:
|
|
404
|
+
continue
|
|
405
|
+
|
|
406
|
+
types = {src: prop.data_type for src, prop in sources.items()}
|
|
407
|
+
unique_types = set(types.values())
|
|
408
|
+
|
|
409
|
+
if len(unique_types) > 1:
|
|
410
|
+
severity = ConflictSeverity.CRITICAL
|
|
411
|
+
|
|
412
|
+
details = {src: f"Type: {t}" for src, t in types.items()}
|
|
413
|
+
|
|
414
|
+
report.add_conflict(SemanticConflict(
|
|
415
|
+
conflict_type=ConflictType.TYPE_CONFLICT,
|
|
416
|
+
severity=severity,
|
|
417
|
+
name=f"{entity_name}.{prop_name}",
|
|
418
|
+
sources=list(sources.keys()),
|
|
419
|
+
details=details,
|
|
420
|
+
description=f"Property '{entity_name}.{prop_name}' has different types: {', '.join(unique_types)}",
|
|
421
|
+
recommendation=f"Standardize the data type for '{prop_name}' across all dashboards.",
|
|
422
|
+
))
|
|
423
|
+
|
|
424
|
+
def _analyze_relationship_conflicts(self, report: SemanticDebtReport):
|
|
425
|
+
"""Detect conflicting relationships between same entities."""
|
|
426
|
+
# Group relationships by (from_entity, to_entity)
|
|
427
|
+
rel_map: Dict[Tuple[str, str], Dict[str, OntologyRelationship]] = {}
|
|
428
|
+
|
|
429
|
+
for ont_name, ont in self.ontologies.items():
|
|
430
|
+
for rel in ont.relationships:
|
|
431
|
+
key = (rel.from_entity, rel.to_entity)
|
|
432
|
+
if key not in rel_map:
|
|
433
|
+
rel_map[key] = {}
|
|
434
|
+
rel_map[key][ont_name] = rel
|
|
435
|
+
|
|
436
|
+
# Check for conflicts
|
|
437
|
+
for (from_ent, to_ent), sources in rel_map.items():
|
|
438
|
+
if len(sources) < 2:
|
|
439
|
+
continue
|
|
440
|
+
|
|
441
|
+
cardinalities = {src: rel.cardinality for src, rel in sources.items()}
|
|
442
|
+
unique_cards = set(cardinalities.values())
|
|
443
|
+
|
|
444
|
+
if len(unique_cards) > 1:
|
|
445
|
+
severity = ConflictSeverity.WARNING
|
|
446
|
+
|
|
447
|
+
details = {
|
|
448
|
+
src: f"Type: {rel.relationship_type}, Cardinality: {rel.cardinality}"
|
|
449
|
+
for src, rel in sources.items()
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
report.add_conflict(SemanticConflict(
|
|
453
|
+
conflict_type=ConflictType.RELATIONSHIP_CONFLICT,
|
|
454
|
+
severity=severity,
|
|
455
|
+
name=f"{from_ent} → {to_ent}",
|
|
456
|
+
sources=list(sources.keys()),
|
|
457
|
+
details=details,
|
|
458
|
+
description=f"Relationship '{from_ent} → {to_ent}' has different cardinalities: {', '.join(unique_cards)}",
|
|
459
|
+
recommendation="Verify the correct cardinality and update dashboards accordingly.",
|
|
460
|
+
))
|
|
461
|
+
|
|
462
|
+
def _analyze_business_rule_conflicts(self, report: SemanticDebtReport):
|
|
463
|
+
"""Detect conflicting business rules."""
|
|
464
|
+
# Group rules by name
|
|
465
|
+
rule_map: Dict[str, Dict[str, BusinessRule]] = {}
|
|
466
|
+
|
|
467
|
+
for ont_name, ont in self.ontologies.items():
|
|
468
|
+
for rule in ont.business_rules:
|
|
469
|
+
if rule.name not in rule_map:
|
|
470
|
+
rule_map[rule.name] = {}
|
|
471
|
+
rule_map[rule.name][ont_name] = rule
|
|
472
|
+
|
|
473
|
+
# Check for conflicts
|
|
474
|
+
for rule_name, sources in rule_map.items():
|
|
475
|
+
if len(sources) < 2:
|
|
476
|
+
continue
|
|
477
|
+
|
|
478
|
+
conditions = {src: rule.condition for src, rule in sources.items()}
|
|
479
|
+
unique_conditions = set(conditions.values())
|
|
480
|
+
|
|
481
|
+
if len(unique_conditions) > 1:
|
|
482
|
+
# Check similarity
|
|
483
|
+
conds_list = list(unique_conditions)
|
|
484
|
+
similarity = self._text_similarity(conds_list[0], conds_list[1])
|
|
485
|
+
|
|
486
|
+
if similarity < self.similarity_threshold:
|
|
487
|
+
severity = ConflictSeverity.CRITICAL
|
|
488
|
+
else:
|
|
489
|
+
severity = ConflictSeverity.WARNING
|
|
490
|
+
|
|
491
|
+
details = {
|
|
492
|
+
src: f"Condition: {rule.condition}, Action: {rule.action}"
|
|
493
|
+
for src, rule in sources.items()
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
report.add_conflict(SemanticConflict(
|
|
497
|
+
conflict_type=ConflictType.RULE_CONFLICT,
|
|
498
|
+
severity=severity,
|
|
499
|
+
name=rule_name,
|
|
500
|
+
sources=list(sources.keys()),
|
|
501
|
+
details=details,
|
|
502
|
+
description=f"Business rule '{rule_name}' has different conditions across dashboards.",
|
|
503
|
+
recommendation=f"Consolidate rule '{rule_name}' into a single source of truth.",
|
|
504
|
+
))
|
|
505
|
+
|
|
506
|
+
def _determine_entity_severity(
|
|
507
|
+
self, entity1: OntologyEntity, entity2: OntologyEntity
|
|
508
|
+
) -> ConflictSeverity:
|
|
509
|
+
"""Determine severity based on structural differences."""
|
|
510
|
+
props1 = set(p.name for p in entity1.properties)
|
|
511
|
+
props2 = set(p.name for p in entity2.properties)
|
|
512
|
+
|
|
513
|
+
common = props1 & props2
|
|
514
|
+
total = props1 | props2
|
|
515
|
+
|
|
516
|
+
if not total:
|
|
517
|
+
return ConflictSeverity.INFO
|
|
518
|
+
|
|
519
|
+
overlap_ratio = len(common) / len(total)
|
|
520
|
+
|
|
521
|
+
if overlap_ratio < 0.5:
|
|
522
|
+
return ConflictSeverity.CRITICAL
|
|
523
|
+
elif overlap_ratio < 0.8:
|
|
524
|
+
return ConflictSeverity.WARNING
|
|
525
|
+
else:
|
|
526
|
+
return ConflictSeverity.INFO
|
|
527
|
+
|
|
528
|
+
def _text_similarity(self, text1: str, text2: str) -> float:
|
|
529
|
+
"""Calculate similarity between two texts."""
|
|
530
|
+
return SequenceMatcher(None, text1.lower(), text2.lower()).ratio()
|
|
531
|
+
|
|
532
|
+
def _generate_recommendations(self, report: SemanticDebtReport):
|
|
533
|
+
"""Generate overall recommendations based on conflicts."""
|
|
534
|
+
if not report.conflicts:
|
|
535
|
+
report.recommendations.append("No semantic conflicts detected. Good job!")
|
|
536
|
+
return
|
|
537
|
+
|
|
538
|
+
critical_count = sum(1 for c in report.conflicts if c.severity == ConflictSeverity.CRITICAL)
|
|
539
|
+
warning_count = sum(1 for c in report.conflicts if c.severity == ConflictSeverity.WARNING)
|
|
540
|
+
|
|
541
|
+
if critical_count > 0:
|
|
542
|
+
report.recommendations.append(
|
|
543
|
+
f"Address {critical_count} critical conflict(s) immediately - they may cause data inconsistencies."
|
|
544
|
+
)
|
|
545
|
+
|
|
546
|
+
# Check for specific patterns
|
|
547
|
+
type_conflicts = [c for c in report.conflicts if c.conflict_type == ConflictType.TYPE_CONFLICT]
|
|
548
|
+
if type_conflicts:
|
|
549
|
+
report.recommendations.append(
|
|
550
|
+
"Create a shared data dictionary to standardize property types across dashboards."
|
|
551
|
+
)
|
|
552
|
+
|
|
553
|
+
entity_conflicts = [c for c in report.conflicts if c.conflict_type == ConflictType.ENTITY_CONFLICT]
|
|
554
|
+
if entity_conflicts:
|
|
555
|
+
report.recommendations.append(
|
|
556
|
+
"Consider creating a master ontology schema that all dashboards inherit from."
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
rule_conflicts = [c for c in report.conflicts if c.conflict_type == ConflictType.RULE_CONFLICT]
|
|
560
|
+
if rule_conflicts:
|
|
561
|
+
report.recommendations.append(
|
|
562
|
+
"Centralize business rules in a single repository to ensure consistency."
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
if warning_count > 3:
|
|
566
|
+
report.recommendations.append(
|
|
567
|
+
"Schedule a semantic alignment review with stakeholders from different dashboard teams."
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
def analyze_ontologies(ontologies: Dict[str, Ontology]) -> SemanticDebtReport:
|
|
572
|
+
"""
|
|
573
|
+
Convenience function to analyze multiple ontologies.
|
|
574
|
+
|
|
575
|
+
Args:
|
|
576
|
+
ontologies: Dictionary mapping names to Ontology objects
|
|
577
|
+
|
|
578
|
+
Returns:
|
|
579
|
+
SemanticDebtReport
|
|
580
|
+
"""
|
|
581
|
+
analyzer = SemanticDebtAnalyzer()
|
|
582
|
+
for name, ont in ontologies.items():
|
|
583
|
+
analyzer.add_ontology(name, ont)
|
|
584
|
+
return analyzer.analyze()
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Utility modules for PowerBI Ontology Extractor."""
|
|
2
|
+
|
|
3
|
+
from powerbi_ontology.utils.pbix_reader import PBIXReader
|
|
4
|
+
|
|
5
|
+
__all__ = ["PBIXReader"]
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def __getattr__(name):
|
|
9
|
+
"""Lazy import to avoid circular dependency with ontology_generator."""
|
|
10
|
+
if name == "OntologyVisualizer":
|
|
11
|
+
from powerbi_ontology.utils.visualizer import OntologyVisualizer
|
|
12
|
+
return OntologyVisualizer
|
|
13
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|