powerbi-ontology-extractor 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,584 @@
1
+ """
2
+ Semantic Debt Analysis for Multi-Dashboard Environments.
3
+
4
+ Detects conflicting definitions across multiple Power BI dashboards:
5
+ - Measures with same name but different DAX formulas
6
+ - Properties with same name but different data types
7
+ - Entities with same name but different structures
8
+ - Conflicting business rules
9
+ - Incompatible relationships
10
+
11
+ Use case: "Revenue" defined differently in Sales.pbix vs Finance.pbix
12
+ """
13
+
14
+ import logging
15
+ from dataclasses import dataclass, field
16
+ from enum import Enum
17
+ from typing import Any, Dict, List, Tuple
18
+ from difflib import SequenceMatcher
19
+
20
+ from powerbi_ontology.ontology_generator import (
21
+ Ontology,
22
+ OntologyEntity,
23
+ OntologyProperty,
24
+ OntologyRelationship,
25
+ BusinessRule,
26
+ )
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class ConflictSeverity(Enum):
32
+ """Severity levels for semantic conflicts."""
33
+ CRITICAL = "critical" # Completely different definitions, will cause errors
34
+ WARNING = "warning" # Partial differences, needs attention
35
+ INFO = "info" # Minor differences, can be ignored
36
+
37
+
38
+ class ConflictType(Enum):
39
+ """Types of semantic conflicts."""
40
+ MEASURE_CONFLICT = "measure_conflict" # Same measure name, different DAX
41
+ TYPE_CONFLICT = "type_conflict" # Same property name, different type
42
+ ENTITY_CONFLICT = "entity_conflict" # Same entity name, different structure
43
+ RELATIONSHIP_CONFLICT = "relationship_conflict" # Different relationship between same entities
44
+ RULE_CONFLICT = "rule_conflict" # Conflicting business rules
45
+
46
+
47
+ @dataclass
48
+ class SemanticConflict:
49
+ """Represents a semantic conflict between dashboards."""
50
+ conflict_type: ConflictType
51
+ severity: ConflictSeverity
52
+ name: str # Name of conflicting element
53
+ sources: List[str] # List of source files/ontologies
54
+ details: Dict[str, str] # Details per source
55
+ description: str = ""
56
+ recommendation: str = ""
57
+
58
+ def to_dict(self) -> dict:
59
+ """Convert to dictionary."""
60
+ return {
61
+ "conflict_type": self.conflict_type.value,
62
+ "severity": self.severity.value,
63
+ "name": self.name,
64
+ "sources": self.sources,
65
+ "details": self.details,
66
+ "description": self.description,
67
+ "recommendation": self.recommendation,
68
+ }
69
+
70
+
71
+ @dataclass
72
+ class SemanticDebtReport:
73
+ """Report of semantic debt analysis."""
74
+ ontologies_analyzed: List[str]
75
+ conflicts: List[SemanticConflict] = field(default_factory=list)
76
+ summary: Dict[str, Any] = field(default_factory=dict)
77
+ recommendations: List[str] = field(default_factory=list)
78
+
79
+ def add_conflict(self, conflict: SemanticConflict):
80
+ """Add a conflict to the report."""
81
+ self.conflicts.append(conflict)
82
+
83
+ def generate_summary(self):
84
+ """Generate summary statistics."""
85
+ self.summary = {
86
+ "total_conflicts": len(self.conflicts),
87
+ "critical": sum(1 for c in self.conflicts if c.severity == ConflictSeverity.CRITICAL),
88
+ "warning": sum(1 for c in self.conflicts if c.severity == ConflictSeverity.WARNING),
89
+ "info": sum(1 for c in self.conflicts if c.severity == ConflictSeverity.INFO),
90
+ "by_type": {},
91
+ }
92
+
93
+ for conflict_type in ConflictType:
94
+ count = sum(1 for c in self.conflicts if c.conflict_type == conflict_type)
95
+ if count > 0:
96
+ self.summary["by_type"][conflict_type.value] = count
97
+
98
+ def to_dict(self) -> dict:
99
+ """Convert to dictionary."""
100
+ self.generate_summary()
101
+ return {
102
+ "ontologies_analyzed": self.ontologies_analyzed,
103
+ "summary": self.summary,
104
+ "conflicts": [c.to_dict() for c in self.conflicts],
105
+ "recommendations": self.recommendations,
106
+ }
107
+
108
+ def to_markdown(self) -> str:
109
+ """Generate markdown report."""
110
+ self.generate_summary()
111
+
112
+ lines = [
113
+ "# Semantic Debt Analysis Report",
114
+ "",
115
+ "## Summary",
116
+ "",
117
+ f"- **Ontologies analyzed:** {len(self.ontologies_analyzed)}",
118
+ f"- **Total conflicts:** {self.summary['total_conflicts']}",
119
+ f" - 🔴 Critical: {self.summary['critical']}",
120
+ f" - 🟡 Warning: {self.summary['warning']}",
121
+ f" - 🔵 Info: {self.summary['info']}",
122
+ "",
123
+ ]
124
+
125
+ if self.summary.get("by_type"):
126
+ lines.append("### Conflicts by Type")
127
+ lines.append("")
128
+ for ctype, count in self.summary["by_type"].items():
129
+ lines.append(f"- {ctype}: {count}")
130
+ lines.append("")
131
+
132
+ # Critical conflicts first
133
+ critical = [c for c in self.conflicts if c.severity == ConflictSeverity.CRITICAL]
134
+ if critical:
135
+ lines.append("## 🔴 Critical Conflicts")
136
+ lines.append("")
137
+ for c in critical:
138
+ lines.extend(self._format_conflict(c))
139
+
140
+ # Warning conflicts
141
+ warnings = [c for c in self.conflicts if c.severity == ConflictSeverity.WARNING]
142
+ if warnings:
143
+ lines.append("## 🟡 Warnings")
144
+ lines.append("")
145
+ for c in warnings:
146
+ lines.extend(self._format_conflict(c))
147
+
148
+ # Info conflicts
149
+ infos = [c for c in self.conflicts if c.severity == ConflictSeverity.INFO]
150
+ if infos:
151
+ lines.append("## 🔵 Info")
152
+ lines.append("")
153
+ for c in infos:
154
+ lines.extend(self._format_conflict(c))
155
+
156
+ # Recommendations
157
+ if self.recommendations:
158
+ lines.append("## Recommendations")
159
+ lines.append("")
160
+ for i, rec in enumerate(self.recommendations, 1):
161
+ lines.append(f"{i}. {rec}")
162
+ lines.append("")
163
+
164
+ return "\n".join(lines)
165
+
166
+ def _format_conflict(self, conflict: SemanticConflict) -> List[str]:
167
+ """Format a single conflict for markdown."""
168
+ lines = [
169
+ f"### {conflict.name}",
170
+ "",
171
+ f"**Type:** {conflict.conflict_type.value}",
172
+ "",
173
+ f"**Description:** {conflict.description}",
174
+ "",
175
+ "**Sources:**",
176
+ "",
177
+ ]
178
+
179
+ for source, detail in conflict.details.items():
180
+ lines.append(f"- `{source}`: {detail}")
181
+
182
+ lines.append("")
183
+
184
+ if conflict.recommendation:
185
+ lines.append(f"**Recommendation:** {conflict.recommendation}")
186
+ lines.append("")
187
+
188
+ return lines
189
+
190
+
191
+ class SemanticDebtAnalyzer:
192
+ """
193
+ Analyzes semantic debt across multiple ontologies.
194
+
195
+ Detects conflicting definitions that could cause inconsistencies
196
+ when AI agents work across multiple Power BI dashboards.
197
+ """
198
+
199
+ def __init__(self, similarity_threshold: float = 0.8):
200
+ """
201
+ Initialize analyzer.
202
+
203
+ Args:
204
+ similarity_threshold: Threshold for name similarity matching (0-1)
205
+ """
206
+ self.similarity_threshold = similarity_threshold
207
+ self.ontologies: Dict[str, Ontology] = {}
208
+
209
+ def add_ontology(self, name: str, ontology: Ontology):
210
+ """
211
+ Add an ontology for analysis.
212
+
213
+ Args:
214
+ name: Identifier for this ontology (e.g., filename)
215
+ ontology: Ontology object
216
+ """
217
+ self.ontologies[name] = ontology
218
+ logger.info(f"Added ontology '{name}' with {len(ontology.entities)} entities")
219
+
220
+ def load_ontologies_from_directory(self, directory: str, pattern: str = "*.json"):
221
+ """
222
+ Load multiple ontologies from a directory.
223
+
224
+ Args:
225
+ directory: Directory path
226
+ pattern: Glob pattern for files
227
+ """
228
+ import json
229
+ from pathlib import Path
230
+
231
+ dir_path = Path(directory)
232
+ for file_path in dir_path.glob(pattern):
233
+ try:
234
+ with open(file_path) as f:
235
+ data = json.load(f)
236
+
237
+ # Simple conversion - assumes same format as ontology_editor.py
238
+ ontology = self._json_to_ontology(data)
239
+ self.add_ontology(file_path.name, ontology)
240
+ except Exception as e:
241
+ logger.warning(f"Failed to load {file_path}: {e}")
242
+
243
+ def _json_to_ontology(self, data: dict) -> Ontology:
244
+ """Convert JSON data to Ontology object."""
245
+ from powerbi_ontology.ontology_generator import Constraint
246
+
247
+ entities = []
248
+ for e_data in data.get("entities", []):
249
+ props = []
250
+ for p_data in e_data.get("properties", []):
251
+ constraints = [
252
+ Constraint(type=c["type"], value=c["value"], message=c.get("message", ""))
253
+ for c in p_data.get("constraints", [])
254
+ ]
255
+ props.append(OntologyProperty(
256
+ name=p_data["name"],
257
+ data_type=p_data.get("data_type", "String"),
258
+ required=p_data.get("required", False),
259
+ unique=p_data.get("unique", False),
260
+ description=p_data.get("description", ""),
261
+ constraints=constraints,
262
+ ))
263
+
264
+ entities.append(OntologyEntity(
265
+ name=e_data["name"],
266
+ description=e_data.get("description", ""),
267
+ entity_type=e_data.get("entity_type", "standard"),
268
+ properties=props,
269
+ constraints=[],
270
+ ))
271
+
272
+ relationships = []
273
+ for r_data in data.get("relationships", []):
274
+ relationships.append(OntologyRelationship(
275
+ from_entity=r_data["from_entity"],
276
+ to_entity=r_data["to_entity"],
277
+ from_property=r_data.get("from_property", ""),
278
+ to_property=r_data.get("to_property", ""),
279
+ relationship_type=r_data.get("relationship_type", "related_to"),
280
+ cardinality=r_data.get("cardinality", "one-to-many"),
281
+ description=r_data.get("description", ""),
282
+ ))
283
+
284
+ rules = []
285
+ for b_data in data.get("business_rules", []):
286
+ rules.append(BusinessRule(
287
+ name=b_data["name"],
288
+ entity=b_data.get("entity", ""),
289
+ condition=b_data.get("condition", ""),
290
+ action=b_data.get("action", ""),
291
+ classification=b_data.get("classification", ""),
292
+ description=b_data.get("description", ""),
293
+ priority=b_data.get("priority", 1),
294
+ ))
295
+
296
+ return Ontology(
297
+ name=data.get("name", "Unnamed"),
298
+ version=data.get("version", "1.0"),
299
+ source=data.get("source", ""),
300
+ entities=entities,
301
+ relationships=relationships,
302
+ business_rules=rules,
303
+ metadata=data.get("metadata", {}),
304
+ )
305
+
306
+ def analyze(self) -> SemanticDebtReport:
307
+ """
308
+ Perform semantic debt analysis.
309
+
310
+ Returns:
311
+ SemanticDebtReport with all detected conflicts
312
+ """
313
+ if len(self.ontologies) < 2:
314
+ logger.warning("Need at least 2 ontologies for comparison")
315
+ return SemanticDebtReport(
316
+ ontologies_analyzed=list(self.ontologies.keys()),
317
+ conflicts=[],
318
+ )
319
+
320
+ report = SemanticDebtReport(ontologies_analyzed=list(self.ontologies.keys()))
321
+
322
+ # Analyze different conflict types
323
+ self._analyze_entity_conflicts(report)
324
+ self._analyze_property_type_conflicts(report)
325
+ self._analyze_relationship_conflicts(report)
326
+ self._analyze_business_rule_conflicts(report)
327
+
328
+ # Generate recommendations
329
+ self._generate_recommendations(report)
330
+
331
+ report.generate_summary()
332
+ return report
333
+
334
+ def _analyze_entity_conflicts(self, report: SemanticDebtReport):
335
+ """Detect entities with same name but different structures."""
336
+ entity_map: Dict[str, Dict[str, OntologyEntity]] = {}
337
+
338
+ # Group entities by name
339
+ for ont_name, ont in self.ontologies.items():
340
+ for entity in ont.entities:
341
+ if entity.name not in entity_map:
342
+ entity_map[entity.name] = {}
343
+ entity_map[entity.name][ont_name] = entity
344
+
345
+ # Check for conflicts
346
+ for entity_name, sources in entity_map.items():
347
+ if len(sources) < 2:
348
+ continue
349
+
350
+ # Compare property sets
351
+ source_names = list(sources.keys())
352
+ for i in range(len(source_names)):
353
+ for j in range(i + 1, len(source_names)):
354
+ src1, src2 = source_names[i], source_names[j]
355
+ entity1, entity2 = sources[src1], sources[src2]
356
+
357
+ props1 = set(p.name for p in entity1.properties)
358
+ props2 = set(p.name for p in entity2.properties)
359
+
360
+ # Check for structural differences
361
+ only_in_1 = props1 - props2
362
+ only_in_2 = props2 - props1
363
+
364
+ if only_in_1 or only_in_2:
365
+ severity = self._determine_entity_severity(entity1, entity2)
366
+
367
+ details = {
368
+ src1: f"Properties: {', '.join(sorted(props1))}",
369
+ src2: f"Properties: {', '.join(sorted(props2))}",
370
+ }
371
+
372
+ missing_desc = []
373
+ if only_in_1:
374
+ missing_desc.append(f"only in {src1}: {', '.join(sorted(only_in_1))}")
375
+ if only_in_2:
376
+ missing_desc.append(f"only in {src2}: {', '.join(sorted(only_in_2))}")
377
+
378
+ report.add_conflict(SemanticConflict(
379
+ conflict_type=ConflictType.ENTITY_CONFLICT,
380
+ severity=severity,
381
+ name=entity_name,
382
+ sources=[src1, src2],
383
+ details=details,
384
+ description=f"Entity '{entity_name}' has different structures: {'; '.join(missing_desc)}",
385
+ recommendation=f"Unify entity '{entity_name}' structure across dashboards or rename to avoid confusion.",
386
+ ))
387
+
388
+ def _analyze_property_type_conflicts(self, report: SemanticDebtReport):
389
+ """Detect properties with same name but different types."""
390
+ # Group properties by (entity_name, property_name)
391
+ prop_map: Dict[Tuple[str, str], Dict[str, OntologyProperty]] = {}
392
+
393
+ for ont_name, ont in self.ontologies.items():
394
+ for entity in ont.entities:
395
+ for prop in entity.properties:
396
+ key = (entity.name, prop.name)
397
+ if key not in prop_map:
398
+ prop_map[key] = {}
399
+ prop_map[key][ont_name] = prop
400
+
401
+ # Check for type conflicts
402
+ for (entity_name, prop_name), sources in prop_map.items():
403
+ if len(sources) < 2:
404
+ continue
405
+
406
+ types = {src: prop.data_type for src, prop in sources.items()}
407
+ unique_types = set(types.values())
408
+
409
+ if len(unique_types) > 1:
410
+ severity = ConflictSeverity.CRITICAL
411
+
412
+ details = {src: f"Type: {t}" for src, t in types.items()}
413
+
414
+ report.add_conflict(SemanticConflict(
415
+ conflict_type=ConflictType.TYPE_CONFLICT,
416
+ severity=severity,
417
+ name=f"{entity_name}.{prop_name}",
418
+ sources=list(sources.keys()),
419
+ details=details,
420
+ description=f"Property '{entity_name}.{prop_name}' has different types: {', '.join(unique_types)}",
421
+ recommendation=f"Standardize the data type for '{prop_name}' across all dashboards.",
422
+ ))
423
+
424
+ def _analyze_relationship_conflicts(self, report: SemanticDebtReport):
425
+ """Detect conflicting relationships between same entities."""
426
+ # Group relationships by (from_entity, to_entity)
427
+ rel_map: Dict[Tuple[str, str], Dict[str, OntologyRelationship]] = {}
428
+
429
+ for ont_name, ont in self.ontologies.items():
430
+ for rel in ont.relationships:
431
+ key = (rel.from_entity, rel.to_entity)
432
+ if key not in rel_map:
433
+ rel_map[key] = {}
434
+ rel_map[key][ont_name] = rel
435
+
436
+ # Check for conflicts
437
+ for (from_ent, to_ent), sources in rel_map.items():
438
+ if len(sources) < 2:
439
+ continue
440
+
441
+ cardinalities = {src: rel.cardinality for src, rel in sources.items()}
442
+ unique_cards = set(cardinalities.values())
443
+
444
+ if len(unique_cards) > 1:
445
+ severity = ConflictSeverity.WARNING
446
+
447
+ details = {
448
+ src: f"Type: {rel.relationship_type}, Cardinality: {rel.cardinality}"
449
+ for src, rel in sources.items()
450
+ }
451
+
452
+ report.add_conflict(SemanticConflict(
453
+ conflict_type=ConflictType.RELATIONSHIP_CONFLICT,
454
+ severity=severity,
455
+ name=f"{from_ent} → {to_ent}",
456
+ sources=list(sources.keys()),
457
+ details=details,
458
+ description=f"Relationship '{from_ent} → {to_ent}' has different cardinalities: {', '.join(unique_cards)}",
459
+ recommendation="Verify the correct cardinality and update dashboards accordingly.",
460
+ ))
461
+
462
+ def _analyze_business_rule_conflicts(self, report: SemanticDebtReport):
463
+ """Detect conflicting business rules."""
464
+ # Group rules by name
465
+ rule_map: Dict[str, Dict[str, BusinessRule]] = {}
466
+
467
+ for ont_name, ont in self.ontologies.items():
468
+ for rule in ont.business_rules:
469
+ if rule.name not in rule_map:
470
+ rule_map[rule.name] = {}
471
+ rule_map[rule.name][ont_name] = rule
472
+
473
+ # Check for conflicts
474
+ for rule_name, sources in rule_map.items():
475
+ if len(sources) < 2:
476
+ continue
477
+
478
+ conditions = {src: rule.condition for src, rule in sources.items()}
479
+ unique_conditions = set(conditions.values())
480
+
481
+ if len(unique_conditions) > 1:
482
+ # Check similarity
483
+ conds_list = list(unique_conditions)
484
+ similarity = self._text_similarity(conds_list[0], conds_list[1])
485
+
486
+ if similarity < self.similarity_threshold:
487
+ severity = ConflictSeverity.CRITICAL
488
+ else:
489
+ severity = ConflictSeverity.WARNING
490
+
491
+ details = {
492
+ src: f"Condition: {rule.condition}, Action: {rule.action}"
493
+ for src, rule in sources.items()
494
+ }
495
+
496
+ report.add_conflict(SemanticConflict(
497
+ conflict_type=ConflictType.RULE_CONFLICT,
498
+ severity=severity,
499
+ name=rule_name,
500
+ sources=list(sources.keys()),
501
+ details=details,
502
+ description=f"Business rule '{rule_name}' has different conditions across dashboards.",
503
+ recommendation=f"Consolidate rule '{rule_name}' into a single source of truth.",
504
+ ))
505
+
506
+ def _determine_entity_severity(
507
+ self, entity1: OntologyEntity, entity2: OntologyEntity
508
+ ) -> ConflictSeverity:
509
+ """Determine severity based on structural differences."""
510
+ props1 = set(p.name for p in entity1.properties)
511
+ props2 = set(p.name for p in entity2.properties)
512
+
513
+ common = props1 & props2
514
+ total = props1 | props2
515
+
516
+ if not total:
517
+ return ConflictSeverity.INFO
518
+
519
+ overlap_ratio = len(common) / len(total)
520
+
521
+ if overlap_ratio < 0.5:
522
+ return ConflictSeverity.CRITICAL
523
+ elif overlap_ratio < 0.8:
524
+ return ConflictSeverity.WARNING
525
+ else:
526
+ return ConflictSeverity.INFO
527
+
528
+ def _text_similarity(self, text1: str, text2: str) -> float:
529
+ """Calculate similarity between two texts."""
530
+ return SequenceMatcher(None, text1.lower(), text2.lower()).ratio()
531
+
532
+ def _generate_recommendations(self, report: SemanticDebtReport):
533
+ """Generate overall recommendations based on conflicts."""
534
+ if not report.conflicts:
535
+ report.recommendations.append("No semantic conflicts detected. Good job!")
536
+ return
537
+
538
+ critical_count = sum(1 for c in report.conflicts if c.severity == ConflictSeverity.CRITICAL)
539
+ warning_count = sum(1 for c in report.conflicts if c.severity == ConflictSeverity.WARNING)
540
+
541
+ if critical_count > 0:
542
+ report.recommendations.append(
543
+ f"Address {critical_count} critical conflict(s) immediately - they may cause data inconsistencies."
544
+ )
545
+
546
+ # Check for specific patterns
547
+ type_conflicts = [c for c in report.conflicts if c.conflict_type == ConflictType.TYPE_CONFLICT]
548
+ if type_conflicts:
549
+ report.recommendations.append(
550
+ "Create a shared data dictionary to standardize property types across dashboards."
551
+ )
552
+
553
+ entity_conflicts = [c for c in report.conflicts if c.conflict_type == ConflictType.ENTITY_CONFLICT]
554
+ if entity_conflicts:
555
+ report.recommendations.append(
556
+ "Consider creating a master ontology schema that all dashboards inherit from."
557
+ )
558
+
559
+ rule_conflicts = [c for c in report.conflicts if c.conflict_type == ConflictType.RULE_CONFLICT]
560
+ if rule_conflicts:
561
+ report.recommendations.append(
562
+ "Centralize business rules in a single repository to ensure consistency."
563
+ )
564
+
565
+ if warning_count > 3:
566
+ report.recommendations.append(
567
+ "Schedule a semantic alignment review with stakeholders from different dashboard teams."
568
+ )
569
+
570
+
571
+ def analyze_ontologies(ontologies: Dict[str, Ontology]) -> SemanticDebtReport:
572
+ """
573
+ Convenience function to analyze multiple ontologies.
574
+
575
+ Args:
576
+ ontologies: Dictionary mapping names to Ontology objects
577
+
578
+ Returns:
579
+ SemanticDebtReport
580
+ """
581
+ analyzer = SemanticDebtAnalyzer()
582
+ for name, ont in ontologies.items():
583
+ analyzer.add_ontology(name, ont)
584
+ return analyzer.analyze()
@@ -0,0 +1,13 @@
1
+ """Utility modules for PowerBI Ontology Extractor."""
2
+
3
+ from powerbi_ontology.utils.pbix_reader import PBIXReader
4
+
5
+ __all__ = ["PBIXReader"]
6
+
7
+
8
+ def __getattr__(name):
9
+ """Lazy import to avoid circular dependency with ontology_generator."""
10
+ if name == "OntologyVisualizer":
11
+ from powerbi_ontology.utils.visualizer import OntologyVisualizer
12
+ return OntologyVisualizer
13
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")