powerbi-ontology-extractor 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,406 @@
1
+ """
2
+ Ontology Generator
3
+
4
+ Converts Power BI semantic models to formal ontologies.
5
+ Implements the "70% auto-generated" concept from the article.
6
+ """
7
+
8
+ import logging
9
+ from dataclasses import dataclass, field
10
+ from typing import Dict, List, Optional
11
+
12
+ from powerbi_ontology.dax_parser import DAXParser
13
+ from powerbi_ontology.extractor import SemanticModel, Entity, Relationship, Measure
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ @dataclass
19
+ class Constraint:
20
+ """Represents a constraint on a property."""
21
+ type: str # "range", "regex", "enum", "reference"
22
+ value: any
23
+ message: str = ""
24
+
25
+
26
+ @dataclass
27
+ class OntologyProperty:
28
+ """Represents a property in an ontology entity."""
29
+ name: str
30
+ data_type: str
31
+ required: bool = False
32
+ unique: bool = False
33
+ constraints: List[Constraint] = field(default_factory=list)
34
+ description: str = ""
35
+ source_column: str = ""
36
+
37
+
38
+ @dataclass
39
+ class OntologyEntity:
40
+ """Represents an entity in the ontology."""
41
+ name: str
42
+ description: str = ""
43
+ properties: List[OntologyProperty] = field(default_factory=list)
44
+ constraints: List[Constraint] = field(default_factory=list)
45
+ source_table: str = ""
46
+ entity_type: str = "standard" # "dimension", "fact", "bridge", "date"
47
+
48
+
49
+ @dataclass
50
+ class OntologyRelationship:
51
+ """Represents a relationship in the ontology."""
52
+ from_entity: str
53
+ from_property: str
54
+ to_entity: str
55
+ to_property: str
56
+ relationship_type: str # "has", "belongs_to", "contains", etc.
57
+ cardinality: str
58
+ description: str = ""
59
+ source_relationship: str = ""
60
+
61
+
62
+ @dataclass
63
+ class BusinessRule:
64
+ """Represents a business rule in the ontology."""
65
+ name: str
66
+ entity: str
67
+ condition: str
68
+ action: str = ""
69
+ classification: str = ""
70
+ description: str = ""
71
+ priority: int = 1
72
+ source_measure: str = ""
73
+
74
+
75
+ @dataclass
76
+ class Pattern:
77
+ """Represents a detected pattern in the semantic model."""
78
+ pattern_type: str # "date_table", "dimension", "fact", "bridge"
79
+ entity_name: str
80
+ confidence: float
81
+ description: str = ""
82
+
83
+
84
+ @dataclass
85
+ class Enhancement:
86
+ """Represents a suggested enhancement to the ontology."""
87
+ type: str # "missing_rule", "validation_constraint", "semantic_relationship"
88
+ description: str
89
+ entity: str = ""
90
+ property: str = ""
91
+ suggested_value: any = None
92
+
93
+
94
+ @dataclass
95
+ class Ontology:
96
+ """Formal ontology generated from Power BI semantic model."""
97
+ name: str
98
+ version: str = "1.0.0"
99
+ source: str = ""
100
+ entities: List[OntologyEntity] = field(default_factory=list)
101
+ relationships: List[OntologyRelationship] = field(default_factory=list)
102
+ business_rules: List[BusinessRule] = field(default_factory=list)
103
+ metadata: Dict = field(default_factory=dict)
104
+
105
+ def add_business_rule(self, rule: BusinessRule):
106
+ """Add a business rule to the ontology."""
107
+ self.business_rules.append(rule)
108
+
109
+ def export_fabric_iq(self, filepath: str):
110
+ """Export to Fabric IQ format."""
111
+ from powerbi_ontology.export.fabric_iq import FabricIQExporter
112
+ exporter = FabricIQExporter(self)
113
+ fabric_json = exporter.export()
114
+ import json
115
+ with open(filepath, 'w') as f:
116
+ json.dump(fabric_json, f, indent=2)
117
+
118
+
119
+ class OntologyGenerator:
120
+ """
121
+ Generates formal ontologies from Power BI semantic models.
122
+
123
+ This implements the "70% auto-generated" strategy:
124
+ - Automatically extracts entities, relationships, and business rules
125
+ - Business analyst reviews and adds the missing 30%
126
+ """
127
+
128
+ def __init__(self, semantic_model: SemanticModel):
129
+ """
130
+ Initialize ontology generator.
131
+
132
+ Args:
133
+ semantic_model: Extracted semantic model from Power BI
134
+ """
135
+ self.semantic_model = semantic_model
136
+ self.dax_parser = DAXParser()
137
+
138
+ def generate(self) -> Ontology:
139
+ """
140
+ Generate complete ontology from semantic model.
141
+
142
+ Returns:
143
+ Ontology object
144
+ """
145
+ logger.info(f"Generating ontology from semantic model: {self.semantic_model.name}")
146
+
147
+ ontology = Ontology(
148
+ name=f"{self.semantic_model.name}_Ontology",
149
+ version="1.0.0",
150
+ source=f"Power BI: {self.semantic_model.source_file}",
151
+ metadata={
152
+ "generation_date": str(__import__("datetime").datetime.now().isoformat()),
153
+ "source_model": self.semantic_model.name
154
+ }
155
+ )
156
+
157
+ # Map entities
158
+ ontology.entities = [self.map_entity(entity) for entity in self.semantic_model.entities]
159
+
160
+ # Map relationships
161
+ ontology.relationships = [
162
+ self.map_relationship(rel) for rel in self.semantic_model.relationships
163
+ ]
164
+
165
+ # Map measures to business rules
166
+ for measure in self.semantic_model.measures:
167
+ parsed = self.dax_parser.parse_measure(measure.name, measure.dax_formula)
168
+ for rule in parsed.business_rules:
169
+ ontology.business_rules.append(
170
+ self.map_measure_to_rule(measure, rule)
171
+ )
172
+
173
+ # Detect patterns
174
+ patterns = self.detect_patterns()
175
+ logger.info(f"Detected {len(patterns)} patterns")
176
+
177
+ # Apply pattern-based enhancements
178
+ self._apply_patterns(ontology, patterns)
179
+
180
+ return ontology
181
+
182
+ def map_entity(self, entity: Entity) -> OntologyEntity:
183
+ """
184
+ Map Power BI entity to ontology entity.
185
+
186
+ Args:
187
+ entity: Power BI entity
188
+
189
+ Returns:
190
+ OntologyEntity
191
+ """
192
+ properties = []
193
+ for prop in entity.properties:
194
+ ontology_prop = OntologyProperty(
195
+ name=prop.name,
196
+ data_type=prop.data_type,
197
+ required=prop.required,
198
+ unique=prop.unique,
199
+ description=prop.description,
200
+ source_column=prop.source_column
201
+ )
202
+ properties.append(ontology_prop)
203
+
204
+ return OntologyEntity(
205
+ name=entity.name,
206
+ description=entity.description,
207
+ properties=properties,
208
+ source_table=entity.source_table,
209
+ entity_type=self._classify_entity_type(entity)
210
+ )
211
+
212
+ def map_relationship(self, rel: Relationship) -> OntologyRelationship:
213
+ """
214
+ Map Power BI relationship to ontology relationship.
215
+
216
+ Args:
217
+ rel: Power BI relationship
218
+
219
+ Returns:
220
+ OntologyRelationship
221
+ """
222
+ # Determine semantic relationship type
223
+ relationship_type = self._determine_relationship_type(rel)
224
+
225
+ return OntologyRelationship(
226
+ from_entity=rel.from_entity,
227
+ from_property=rel.from_property,
228
+ to_entity=rel.to_entity,
229
+ to_property=rel.to_property,
230
+ relationship_type=relationship_type,
231
+ cardinality=rel.cardinality,
232
+ description=f"Relationship from {rel.from_entity} to {rel.to_entity}",
233
+ source_relationship=rel.name
234
+ )
235
+
236
+ def map_measure_to_rule(self, measure: Measure, parsed_rule) -> BusinessRule:
237
+ """
238
+ Map DAX measure to business rule.
239
+
240
+ Args:
241
+ measure: Power BI measure
242
+ parsed_rule: Parsed business rule from DAX
243
+
244
+ Returns:
245
+ BusinessRule
246
+ """
247
+ return BusinessRule(
248
+ name=parsed_rule.name,
249
+ entity=parsed_rule.entity or measure.table,
250
+ condition=parsed_rule.condition,
251
+ action=parsed_rule.action,
252
+ classification=parsed_rule.classification,
253
+ description=parsed_rule.description or measure.description,
254
+ priority=parsed_rule.priority,
255
+ source_measure=measure.name
256
+ )
257
+
258
+ def detect_patterns(self) -> List[Pattern]:
259
+ """
260
+ Detect common patterns in the semantic model.
261
+
262
+ Returns:
263
+ List of detected patterns
264
+ """
265
+ patterns = []
266
+
267
+ for entity in self.semantic_model.entities:
268
+ entity_name_lower = entity.name.lower()
269
+
270
+ # Date table pattern
271
+ if any(keyword in entity_name_lower for keyword in ['date', 'calendar', 'time']):
272
+ # Check for date-like columns
273
+ date_columns = ['year', 'month', 'day', 'quarter', 'week']
274
+ has_date_columns = any(
275
+ any(dc in prop.name.lower() for dc in date_columns)
276
+ for prop in entity.properties
277
+ )
278
+ if has_date_columns:
279
+ patterns.append(Pattern(
280
+ pattern_type="date_table",
281
+ entity_name=entity.name,
282
+ confidence=0.9,
283
+ description="Date/Calendar table detected"
284
+ ))
285
+
286
+ # Dimension table pattern (small, many relationships)
287
+ relationship_count = sum(
288
+ 1 for rel in self.semantic_model.relationships
289
+ if rel.from_entity == entity.name or rel.to_entity == entity.name
290
+ )
291
+ if relationship_count >= 3 and len(entity.properties) < 20:
292
+ patterns.append(Pattern(
293
+ pattern_type="dimension",
294
+ entity_name=entity.name,
295
+ confidence=0.7,
296
+ description="Dimension table pattern detected"
297
+ ))
298
+
299
+ # Fact table pattern (large, few relationships, has measures)
300
+ measure_count = sum(
301
+ 1 for measure in self.semantic_model.measures
302
+ if measure.table == entity.name
303
+ )
304
+ if measure_count > 0 and relationship_count <= 3:
305
+ patterns.append(Pattern(
306
+ pattern_type="fact",
307
+ entity_name=entity.name,
308
+ confidence=0.8,
309
+ description="Fact table pattern detected"
310
+ ))
311
+
312
+ return patterns
313
+
314
+ def suggest_enhancements(self) -> List[Enhancement]:
315
+ """
316
+ Suggest enhancements to the ontology.
317
+
318
+ Returns:
319
+ List of Enhancement suggestions
320
+ """
321
+ enhancements = []
322
+
323
+ for entity in self.semantic_model.entities:
324
+ for prop in entity.properties:
325
+ prop_name_lower = prop.name.lower()
326
+
327
+ # Email validation
328
+ if 'email' in prop_name_lower and prop.data_type == "String":
329
+ enhancements.append(Enhancement(
330
+ type="validation_constraint",
331
+ description=f"Add email format validation to {entity.name}.{prop.name}",
332
+ entity=entity.name,
333
+ property=prop.name,
334
+ suggested_value={"type": "regex", "pattern": r"^[^\s@]+@[^\s@]+\.[^\s@]+$"}
335
+ ))
336
+
337
+ # URL validation
338
+ if 'url' in prop_name_lower or 'website' in prop_name_lower:
339
+ enhancements.append(Enhancement(
340
+ type="validation_constraint",
341
+ description=f"Add URL format validation to {entity.name}.{prop.name}",
342
+ entity=entity.name,
343
+ property=prop.name,
344
+ suggested_value={"type": "regex", "pattern": r"^https?://"}
345
+ ))
346
+
347
+ # Range constraints for numeric fields
348
+ if prop.data_type in ["Integer", "Decimal"]:
349
+ if 'age' in prop_name_lower:
350
+ enhancements.append(Enhancement(
351
+ type="validation_constraint",
352
+ description=f"Add age range constraint (0-150) to {entity.name}.{prop.name}",
353
+ entity=entity.name,
354
+ property=prop.name,
355
+ suggested_value={"type": "range", "min": 0, "max": 150}
356
+ ))
357
+ elif 'score' in prop_name_lower or 'rating' in prop_name_lower:
358
+ enhancements.append(Enhancement(
359
+ type="validation_constraint",
360
+ description=f"Add score range constraint (0-100) to {entity.name}.{prop.name}",
361
+ entity=entity.name,
362
+ property=prop.name,
363
+ suggested_value={"type": "range", "min": 0, "max": 100}
364
+ ))
365
+
366
+ return enhancements
367
+
368
+ def _classify_entity_type(self, entity: Entity) -> str:
369
+ """Classify entity type based on characteristics."""
370
+ # This is a simplified classification
371
+ # Full classification uses pattern detection
372
+ if any(keyword in entity.name.lower() for keyword in ['date', 'calendar', 'time']):
373
+ return "date"
374
+ return "standard"
375
+
376
+ def _determine_relationship_type(self, rel: Relationship) -> str:
377
+ """Determine semantic relationship type from Power BI relationship."""
378
+ # Heuristic mapping based on entity names
379
+ from_lower = rel.from_entity.lower()
380
+ to_lower = rel.to_entity.lower()
381
+
382
+ # Common patterns
383
+ if 'customer' in from_lower and 'order' in to_lower:
384
+ return "has"
385
+ elif 'order' in from_lower and 'customer' in to_lower:
386
+ return "belongs_to"
387
+ elif 'product' in from_lower and 'order' in to_lower:
388
+ return "contained_in"
389
+ elif 'shipment' in from_lower and 'customer' in to_lower:
390
+ return "belongs_to"
391
+ else:
392
+ # Default based on cardinality
393
+ if rel.cardinality == "one-to-many":
394
+ return "has"
395
+ elif rel.cardinality == "many-to-one":
396
+ return "belongs_to"
397
+ else:
398
+ return "related_to"
399
+
400
+ def _apply_patterns(self, ontology: Ontology, patterns: List[Pattern]):
401
+ """Apply detected patterns to enhance ontology."""
402
+ for pattern in patterns:
403
+ # Find corresponding entity
404
+ entity = next((e for e in ontology.entities if e.name == pattern.entity_name), None)
405
+ if entity:
406
+ entity.entity_type = pattern.pattern_type