powerbi-ontology-extractor 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__init__.py +1 -0
- cli/pbi_ontology_cli.py +286 -0
- powerbi_ontology/__init__.py +38 -0
- powerbi_ontology/analyzer.py +420 -0
- powerbi_ontology/chat.py +303 -0
- powerbi_ontology/cli.py +530 -0
- powerbi_ontology/contract_builder.py +269 -0
- powerbi_ontology/dax_parser.py +305 -0
- powerbi_ontology/export/__init__.py +17 -0
- powerbi_ontology/export/contract_to_owl.py +408 -0
- powerbi_ontology/export/fabric_iq.py +243 -0
- powerbi_ontology/export/fabric_iq_to_owl.py +463 -0
- powerbi_ontology/export/json_schema.py +110 -0
- powerbi_ontology/export/ontoguard.py +177 -0
- powerbi_ontology/export/owl.py +522 -0
- powerbi_ontology/extractor.py +368 -0
- powerbi_ontology/mcp_config.py +237 -0
- powerbi_ontology/mcp_models.py +166 -0
- powerbi_ontology/mcp_server.py +1106 -0
- powerbi_ontology/ontology_diff.py +776 -0
- powerbi_ontology/ontology_generator.py +406 -0
- powerbi_ontology/review.py +556 -0
- powerbi_ontology/schema_mapper.py +369 -0
- powerbi_ontology/semantic_debt.py +584 -0
- powerbi_ontology/utils/__init__.py +13 -0
- powerbi_ontology/utils/pbix_reader.py +558 -0
- powerbi_ontology/utils/visualizer.py +332 -0
- powerbi_ontology_extractor-0.1.0.dist-info/METADATA +507 -0
- powerbi_ontology_extractor-0.1.0.dist-info/RECORD +33 -0
- powerbi_ontology_extractor-0.1.0.dist-info/WHEEL +5 -0
- powerbi_ontology_extractor-0.1.0.dist-info/entry_points.txt +4 -0
- powerbi_ontology_extractor-0.1.0.dist-info/licenses/LICENSE +21 -0
- powerbi_ontology_extractor-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,406 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Ontology Generator
|
|
3
|
+
|
|
4
|
+
Converts Power BI semantic models to formal ontologies.
|
|
5
|
+
Implements the "70% auto-generated" concept from the article.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Dict, List, Optional
|
|
11
|
+
|
|
12
|
+
from powerbi_ontology.dax_parser import DAXParser
|
|
13
|
+
from powerbi_ontology.extractor import SemanticModel, Entity, Relationship, Measure
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class Constraint:
|
|
20
|
+
"""Represents a constraint on a property."""
|
|
21
|
+
type: str # "range", "regex", "enum", "reference"
|
|
22
|
+
value: any
|
|
23
|
+
message: str = ""
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class OntologyProperty:
|
|
28
|
+
"""Represents a property in an ontology entity."""
|
|
29
|
+
name: str
|
|
30
|
+
data_type: str
|
|
31
|
+
required: bool = False
|
|
32
|
+
unique: bool = False
|
|
33
|
+
constraints: List[Constraint] = field(default_factory=list)
|
|
34
|
+
description: str = ""
|
|
35
|
+
source_column: str = ""
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class OntologyEntity:
|
|
40
|
+
"""Represents an entity in the ontology."""
|
|
41
|
+
name: str
|
|
42
|
+
description: str = ""
|
|
43
|
+
properties: List[OntologyProperty] = field(default_factory=list)
|
|
44
|
+
constraints: List[Constraint] = field(default_factory=list)
|
|
45
|
+
source_table: str = ""
|
|
46
|
+
entity_type: str = "standard" # "dimension", "fact", "bridge", "date"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class OntologyRelationship:
|
|
51
|
+
"""Represents a relationship in the ontology."""
|
|
52
|
+
from_entity: str
|
|
53
|
+
from_property: str
|
|
54
|
+
to_entity: str
|
|
55
|
+
to_property: str
|
|
56
|
+
relationship_type: str # "has", "belongs_to", "contains", etc.
|
|
57
|
+
cardinality: str
|
|
58
|
+
description: str = ""
|
|
59
|
+
source_relationship: str = ""
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class BusinessRule:
|
|
64
|
+
"""Represents a business rule in the ontology."""
|
|
65
|
+
name: str
|
|
66
|
+
entity: str
|
|
67
|
+
condition: str
|
|
68
|
+
action: str = ""
|
|
69
|
+
classification: str = ""
|
|
70
|
+
description: str = ""
|
|
71
|
+
priority: int = 1
|
|
72
|
+
source_measure: str = ""
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass
|
|
76
|
+
class Pattern:
|
|
77
|
+
"""Represents a detected pattern in the semantic model."""
|
|
78
|
+
pattern_type: str # "date_table", "dimension", "fact", "bridge"
|
|
79
|
+
entity_name: str
|
|
80
|
+
confidence: float
|
|
81
|
+
description: str = ""
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@dataclass
|
|
85
|
+
class Enhancement:
|
|
86
|
+
"""Represents a suggested enhancement to the ontology."""
|
|
87
|
+
type: str # "missing_rule", "validation_constraint", "semantic_relationship"
|
|
88
|
+
description: str
|
|
89
|
+
entity: str = ""
|
|
90
|
+
property: str = ""
|
|
91
|
+
suggested_value: any = None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@dataclass
|
|
95
|
+
class Ontology:
|
|
96
|
+
"""Formal ontology generated from Power BI semantic model."""
|
|
97
|
+
name: str
|
|
98
|
+
version: str = "1.0.0"
|
|
99
|
+
source: str = ""
|
|
100
|
+
entities: List[OntologyEntity] = field(default_factory=list)
|
|
101
|
+
relationships: List[OntologyRelationship] = field(default_factory=list)
|
|
102
|
+
business_rules: List[BusinessRule] = field(default_factory=list)
|
|
103
|
+
metadata: Dict = field(default_factory=dict)
|
|
104
|
+
|
|
105
|
+
def add_business_rule(self, rule: BusinessRule):
|
|
106
|
+
"""Add a business rule to the ontology."""
|
|
107
|
+
self.business_rules.append(rule)
|
|
108
|
+
|
|
109
|
+
def export_fabric_iq(self, filepath: str):
|
|
110
|
+
"""Export to Fabric IQ format."""
|
|
111
|
+
from powerbi_ontology.export.fabric_iq import FabricIQExporter
|
|
112
|
+
exporter = FabricIQExporter(self)
|
|
113
|
+
fabric_json = exporter.export()
|
|
114
|
+
import json
|
|
115
|
+
with open(filepath, 'w') as f:
|
|
116
|
+
json.dump(fabric_json, f, indent=2)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class OntologyGenerator:
|
|
120
|
+
"""
|
|
121
|
+
Generates formal ontologies from Power BI semantic models.
|
|
122
|
+
|
|
123
|
+
This implements the "70% auto-generated" strategy:
|
|
124
|
+
- Automatically extracts entities, relationships, and business rules
|
|
125
|
+
- Business analyst reviews and adds the missing 30%
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
def __init__(self, semantic_model: SemanticModel):
|
|
129
|
+
"""
|
|
130
|
+
Initialize ontology generator.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
semantic_model: Extracted semantic model from Power BI
|
|
134
|
+
"""
|
|
135
|
+
self.semantic_model = semantic_model
|
|
136
|
+
self.dax_parser = DAXParser()
|
|
137
|
+
|
|
138
|
+
def generate(self) -> Ontology:
|
|
139
|
+
"""
|
|
140
|
+
Generate complete ontology from semantic model.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
Ontology object
|
|
144
|
+
"""
|
|
145
|
+
logger.info(f"Generating ontology from semantic model: {self.semantic_model.name}")
|
|
146
|
+
|
|
147
|
+
ontology = Ontology(
|
|
148
|
+
name=f"{self.semantic_model.name}_Ontology",
|
|
149
|
+
version="1.0.0",
|
|
150
|
+
source=f"Power BI: {self.semantic_model.source_file}",
|
|
151
|
+
metadata={
|
|
152
|
+
"generation_date": str(__import__("datetime").datetime.now().isoformat()),
|
|
153
|
+
"source_model": self.semantic_model.name
|
|
154
|
+
}
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
# Map entities
|
|
158
|
+
ontology.entities = [self.map_entity(entity) for entity in self.semantic_model.entities]
|
|
159
|
+
|
|
160
|
+
# Map relationships
|
|
161
|
+
ontology.relationships = [
|
|
162
|
+
self.map_relationship(rel) for rel in self.semantic_model.relationships
|
|
163
|
+
]
|
|
164
|
+
|
|
165
|
+
# Map measures to business rules
|
|
166
|
+
for measure in self.semantic_model.measures:
|
|
167
|
+
parsed = self.dax_parser.parse_measure(measure.name, measure.dax_formula)
|
|
168
|
+
for rule in parsed.business_rules:
|
|
169
|
+
ontology.business_rules.append(
|
|
170
|
+
self.map_measure_to_rule(measure, rule)
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# Detect patterns
|
|
174
|
+
patterns = self.detect_patterns()
|
|
175
|
+
logger.info(f"Detected {len(patterns)} patterns")
|
|
176
|
+
|
|
177
|
+
# Apply pattern-based enhancements
|
|
178
|
+
self._apply_patterns(ontology, patterns)
|
|
179
|
+
|
|
180
|
+
return ontology
|
|
181
|
+
|
|
182
|
+
def map_entity(self, entity: Entity) -> OntologyEntity:
|
|
183
|
+
"""
|
|
184
|
+
Map Power BI entity to ontology entity.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
entity: Power BI entity
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
OntologyEntity
|
|
191
|
+
"""
|
|
192
|
+
properties = []
|
|
193
|
+
for prop in entity.properties:
|
|
194
|
+
ontology_prop = OntologyProperty(
|
|
195
|
+
name=prop.name,
|
|
196
|
+
data_type=prop.data_type,
|
|
197
|
+
required=prop.required,
|
|
198
|
+
unique=prop.unique,
|
|
199
|
+
description=prop.description,
|
|
200
|
+
source_column=prop.source_column
|
|
201
|
+
)
|
|
202
|
+
properties.append(ontology_prop)
|
|
203
|
+
|
|
204
|
+
return OntologyEntity(
|
|
205
|
+
name=entity.name,
|
|
206
|
+
description=entity.description,
|
|
207
|
+
properties=properties,
|
|
208
|
+
source_table=entity.source_table,
|
|
209
|
+
entity_type=self._classify_entity_type(entity)
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
def map_relationship(self, rel: Relationship) -> OntologyRelationship:
|
|
213
|
+
"""
|
|
214
|
+
Map Power BI relationship to ontology relationship.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
rel: Power BI relationship
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
OntologyRelationship
|
|
221
|
+
"""
|
|
222
|
+
# Determine semantic relationship type
|
|
223
|
+
relationship_type = self._determine_relationship_type(rel)
|
|
224
|
+
|
|
225
|
+
return OntologyRelationship(
|
|
226
|
+
from_entity=rel.from_entity,
|
|
227
|
+
from_property=rel.from_property,
|
|
228
|
+
to_entity=rel.to_entity,
|
|
229
|
+
to_property=rel.to_property,
|
|
230
|
+
relationship_type=relationship_type,
|
|
231
|
+
cardinality=rel.cardinality,
|
|
232
|
+
description=f"Relationship from {rel.from_entity} to {rel.to_entity}",
|
|
233
|
+
source_relationship=rel.name
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
def map_measure_to_rule(self, measure: Measure, parsed_rule) -> BusinessRule:
|
|
237
|
+
"""
|
|
238
|
+
Map DAX measure to business rule.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
measure: Power BI measure
|
|
242
|
+
parsed_rule: Parsed business rule from DAX
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
BusinessRule
|
|
246
|
+
"""
|
|
247
|
+
return BusinessRule(
|
|
248
|
+
name=parsed_rule.name,
|
|
249
|
+
entity=parsed_rule.entity or measure.table,
|
|
250
|
+
condition=parsed_rule.condition,
|
|
251
|
+
action=parsed_rule.action,
|
|
252
|
+
classification=parsed_rule.classification,
|
|
253
|
+
description=parsed_rule.description or measure.description,
|
|
254
|
+
priority=parsed_rule.priority,
|
|
255
|
+
source_measure=measure.name
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
def detect_patterns(self) -> List[Pattern]:
|
|
259
|
+
"""
|
|
260
|
+
Detect common patterns in the semantic model.
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
List of detected patterns
|
|
264
|
+
"""
|
|
265
|
+
patterns = []
|
|
266
|
+
|
|
267
|
+
for entity in self.semantic_model.entities:
|
|
268
|
+
entity_name_lower = entity.name.lower()
|
|
269
|
+
|
|
270
|
+
# Date table pattern
|
|
271
|
+
if any(keyword in entity_name_lower for keyword in ['date', 'calendar', 'time']):
|
|
272
|
+
# Check for date-like columns
|
|
273
|
+
date_columns = ['year', 'month', 'day', 'quarter', 'week']
|
|
274
|
+
has_date_columns = any(
|
|
275
|
+
any(dc in prop.name.lower() for dc in date_columns)
|
|
276
|
+
for prop in entity.properties
|
|
277
|
+
)
|
|
278
|
+
if has_date_columns:
|
|
279
|
+
patterns.append(Pattern(
|
|
280
|
+
pattern_type="date_table",
|
|
281
|
+
entity_name=entity.name,
|
|
282
|
+
confidence=0.9,
|
|
283
|
+
description="Date/Calendar table detected"
|
|
284
|
+
))
|
|
285
|
+
|
|
286
|
+
# Dimension table pattern (small, many relationships)
|
|
287
|
+
relationship_count = sum(
|
|
288
|
+
1 for rel in self.semantic_model.relationships
|
|
289
|
+
if rel.from_entity == entity.name or rel.to_entity == entity.name
|
|
290
|
+
)
|
|
291
|
+
if relationship_count >= 3 and len(entity.properties) < 20:
|
|
292
|
+
patterns.append(Pattern(
|
|
293
|
+
pattern_type="dimension",
|
|
294
|
+
entity_name=entity.name,
|
|
295
|
+
confidence=0.7,
|
|
296
|
+
description="Dimension table pattern detected"
|
|
297
|
+
))
|
|
298
|
+
|
|
299
|
+
# Fact table pattern (large, few relationships, has measures)
|
|
300
|
+
measure_count = sum(
|
|
301
|
+
1 for measure in self.semantic_model.measures
|
|
302
|
+
if measure.table == entity.name
|
|
303
|
+
)
|
|
304
|
+
if measure_count > 0 and relationship_count <= 3:
|
|
305
|
+
patterns.append(Pattern(
|
|
306
|
+
pattern_type="fact",
|
|
307
|
+
entity_name=entity.name,
|
|
308
|
+
confidence=0.8,
|
|
309
|
+
description="Fact table pattern detected"
|
|
310
|
+
))
|
|
311
|
+
|
|
312
|
+
return patterns
|
|
313
|
+
|
|
314
|
+
def suggest_enhancements(self) -> List[Enhancement]:
|
|
315
|
+
"""
|
|
316
|
+
Suggest enhancements to the ontology.
|
|
317
|
+
|
|
318
|
+
Returns:
|
|
319
|
+
List of Enhancement suggestions
|
|
320
|
+
"""
|
|
321
|
+
enhancements = []
|
|
322
|
+
|
|
323
|
+
for entity in self.semantic_model.entities:
|
|
324
|
+
for prop in entity.properties:
|
|
325
|
+
prop_name_lower = prop.name.lower()
|
|
326
|
+
|
|
327
|
+
# Email validation
|
|
328
|
+
if 'email' in prop_name_lower and prop.data_type == "String":
|
|
329
|
+
enhancements.append(Enhancement(
|
|
330
|
+
type="validation_constraint",
|
|
331
|
+
description=f"Add email format validation to {entity.name}.{prop.name}",
|
|
332
|
+
entity=entity.name,
|
|
333
|
+
property=prop.name,
|
|
334
|
+
suggested_value={"type": "regex", "pattern": r"^[^\s@]+@[^\s@]+\.[^\s@]+$"}
|
|
335
|
+
))
|
|
336
|
+
|
|
337
|
+
# URL validation
|
|
338
|
+
if 'url' in prop_name_lower or 'website' in prop_name_lower:
|
|
339
|
+
enhancements.append(Enhancement(
|
|
340
|
+
type="validation_constraint",
|
|
341
|
+
description=f"Add URL format validation to {entity.name}.{prop.name}",
|
|
342
|
+
entity=entity.name,
|
|
343
|
+
property=prop.name,
|
|
344
|
+
suggested_value={"type": "regex", "pattern": r"^https?://"}
|
|
345
|
+
))
|
|
346
|
+
|
|
347
|
+
# Range constraints for numeric fields
|
|
348
|
+
if prop.data_type in ["Integer", "Decimal"]:
|
|
349
|
+
if 'age' in prop_name_lower:
|
|
350
|
+
enhancements.append(Enhancement(
|
|
351
|
+
type="validation_constraint",
|
|
352
|
+
description=f"Add age range constraint (0-150) to {entity.name}.{prop.name}",
|
|
353
|
+
entity=entity.name,
|
|
354
|
+
property=prop.name,
|
|
355
|
+
suggested_value={"type": "range", "min": 0, "max": 150}
|
|
356
|
+
))
|
|
357
|
+
elif 'score' in prop_name_lower or 'rating' in prop_name_lower:
|
|
358
|
+
enhancements.append(Enhancement(
|
|
359
|
+
type="validation_constraint",
|
|
360
|
+
description=f"Add score range constraint (0-100) to {entity.name}.{prop.name}",
|
|
361
|
+
entity=entity.name,
|
|
362
|
+
property=prop.name,
|
|
363
|
+
suggested_value={"type": "range", "min": 0, "max": 100}
|
|
364
|
+
))
|
|
365
|
+
|
|
366
|
+
return enhancements
|
|
367
|
+
|
|
368
|
+
def _classify_entity_type(self, entity: Entity) -> str:
|
|
369
|
+
"""Classify entity type based on characteristics."""
|
|
370
|
+
# This is a simplified classification
|
|
371
|
+
# Full classification uses pattern detection
|
|
372
|
+
if any(keyword in entity.name.lower() for keyword in ['date', 'calendar', 'time']):
|
|
373
|
+
return "date"
|
|
374
|
+
return "standard"
|
|
375
|
+
|
|
376
|
+
def _determine_relationship_type(self, rel: Relationship) -> str:
|
|
377
|
+
"""Determine semantic relationship type from Power BI relationship."""
|
|
378
|
+
# Heuristic mapping based on entity names
|
|
379
|
+
from_lower = rel.from_entity.lower()
|
|
380
|
+
to_lower = rel.to_entity.lower()
|
|
381
|
+
|
|
382
|
+
# Common patterns
|
|
383
|
+
if 'customer' in from_lower and 'order' in to_lower:
|
|
384
|
+
return "has"
|
|
385
|
+
elif 'order' in from_lower and 'customer' in to_lower:
|
|
386
|
+
return "belongs_to"
|
|
387
|
+
elif 'product' in from_lower and 'order' in to_lower:
|
|
388
|
+
return "contained_in"
|
|
389
|
+
elif 'shipment' in from_lower and 'customer' in to_lower:
|
|
390
|
+
return "belongs_to"
|
|
391
|
+
else:
|
|
392
|
+
# Default based on cardinality
|
|
393
|
+
if rel.cardinality == "one-to-many":
|
|
394
|
+
return "has"
|
|
395
|
+
elif rel.cardinality == "many-to-one":
|
|
396
|
+
return "belongs_to"
|
|
397
|
+
else:
|
|
398
|
+
return "related_to"
|
|
399
|
+
|
|
400
|
+
def _apply_patterns(self, ontology: Ontology, patterns: List[Pattern]):
|
|
401
|
+
"""Apply detected patterns to enhance ontology."""
|
|
402
|
+
for pattern in patterns:
|
|
403
|
+
# Find corresponding entity
|
|
404
|
+
entity = next((e for e in ontology.entities if e.name == pattern.entity_name), None)
|
|
405
|
+
if entity:
|
|
406
|
+
entity.entity_type = pattern.pattern_type
|