powerbi-ontology-extractor 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1106 @@
1
+ """
2
+ MCP Server for PowerBI Ontology Extractor.
3
+
4
+ This module provides an MCP server that exposes PowerBI ontology extraction
5
+ and analysis capabilities as MCP tools, allowing AI agents to work with
6
+ Power BI semantic models through the Model Context Protocol.
7
+
8
+ Usage:
9
+ python -m powerbi_ontology.mcp_server
10
+
11
+ Or configure in MCP client with:
12
+ {
13
+ "mcpServers": {
14
+ "powerbi-ontology": {
15
+ "command": "python",
16
+ "args": ["-m", "powerbi_ontology.mcp_server"],
17
+ "cwd": "/path/to/powerbi-ontology-extractor",
18
+ "env": {
19
+ "POWERBI_MCP_CONFIG": "config/mcp_config.yaml",
20
+ "OPENAI_API_KEY": "${OPENAI_API_KEY}"
21
+ }
22
+ }
23
+ }
24
+ }
25
+ """
26
+
27
+ import json
28
+ import logging
29
+ import os
30
+ import tempfile
31
+ from pathlib import Path
32
+ from typing import Any, Dict, List, Optional
33
+
34
+ try:
35
+ from fastmcp import FastMCP
36
+ except ImportError:
37
+ raise ImportError(
38
+ "fastmcp is required for MCP server. Install with: pip install fastmcp"
39
+ )
40
+
41
+ from powerbi_ontology.mcp_config import get_config, reload_config
42
+ from powerbi_ontology.mcp_models import (
43
+ ExtractResult,
44
+ GenerateResult,
45
+ ExportOWLResult,
46
+ ExportJSONResult,
47
+ AnalyzeDebtResult,
48
+ DiffResult,
49
+ MergeResult,
50
+ ChatResult,
51
+ ExportFormat,
52
+ MergeStrategy,
53
+ )
54
+
55
+ # Configure logging
56
+ logging.basicConfig(
57
+ level=logging.INFO,
58
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
59
+ )
60
+ logger = logging.getLogger(__name__)
61
+
62
+ # Initialize configuration
63
+ config = get_config()
64
+
65
+ # Set log level from config
66
+ logging.getLogger().setLevel(getattr(logging, config.log_level, logging.INFO))
67
+
68
+ # Initialize FastMCP server
69
+ mcp = FastMCP("PowerBI-Ontology")
70
+
71
+
72
+ # ============================================================================
73
+ # Helper functions
74
+ # ============================================================================
75
+
76
+ def _semantic_model_to_dict(model) -> Dict[str, Any]:
77
+ """Convert SemanticModel to dictionary."""
78
+ return {
79
+ "name": model.name,
80
+ "source_file": model.source_file,
81
+ "entities": [
82
+ {
83
+ "name": e.name,
84
+ "description": e.description,
85
+ "source_table": e.source_table,
86
+ "primary_key": e.primary_key,
87
+ "properties": [
88
+ {
89
+ "name": p.name,
90
+ "data_type": p.data_type,
91
+ "required": p.required,
92
+ "unique": p.unique,
93
+ "description": p.description,
94
+ "source_column": p.source_column,
95
+ }
96
+ for p in e.properties
97
+ ],
98
+ }
99
+ for e in model.entities
100
+ ],
101
+ "relationships": [
102
+ {
103
+ "from_entity": r.from_entity,
104
+ "from_property": r.from_property,
105
+ "to_entity": r.to_entity,
106
+ "to_property": r.to_property,
107
+ "cardinality": r.cardinality,
108
+ "cross_filter_direction": r.cross_filter_direction,
109
+ "is_active": r.is_active,
110
+ "name": r.name,
111
+ }
112
+ for r in model.relationships
113
+ ],
114
+ "measures": [
115
+ {
116
+ "name": m.name,
117
+ "dax_formula": m.dax_formula,
118
+ "description": m.description,
119
+ "folder": m.folder,
120
+ "table": m.table,
121
+ "dependencies": m.dependencies,
122
+ }
123
+ for m in model.measures
124
+ ],
125
+ "hierarchies": [
126
+ {
127
+ "name": h.name,
128
+ "table": h.table,
129
+ "levels": h.levels,
130
+ "hierarchy_type": h.hierarchy_type,
131
+ }
132
+ for h in model.hierarchies
133
+ ],
134
+ "security_rules": [
135
+ {
136
+ "role": s.role,
137
+ "table": s.table,
138
+ "dax_filter": s.dax_filter,
139
+ "description": s.description,
140
+ }
141
+ for s in model.security_rules
142
+ ],
143
+ "metadata": model.metadata,
144
+ }
145
+
146
+
147
+ def _ontology_to_dict(ontology) -> Dict[str, Any]:
148
+ """Convert Ontology to dictionary."""
149
+ return {
150
+ "name": ontology.name,
151
+ "version": ontology.version,
152
+ "source": ontology.source,
153
+ "entities": [
154
+ {
155
+ "name": e.name,
156
+ "description": e.description,
157
+ "entity_type": e.entity_type,
158
+ "source_table": e.source_table,
159
+ "properties": [
160
+ {
161
+ "name": p.name,
162
+ "data_type": p.data_type,
163
+ "required": p.required,
164
+ "unique": p.unique,
165
+ "description": p.description,
166
+ "source_column": p.source_column,
167
+ "constraints": [
168
+ {"type": c.type, "value": c.value, "message": c.message}
169
+ for c in (p.constraints or [])
170
+ ],
171
+ }
172
+ for p in e.properties
173
+ ],
174
+ "constraints": [
175
+ {"type": c.type, "value": c.value, "message": c.message}
176
+ for c in (e.constraints or [])
177
+ ],
178
+ }
179
+ for e in ontology.entities
180
+ ],
181
+ "relationships": [
182
+ {
183
+ "from_entity": r.from_entity,
184
+ "from_property": r.from_property,
185
+ "to_entity": r.to_entity,
186
+ "to_property": r.to_property,
187
+ "relationship_type": r.relationship_type,
188
+ "cardinality": r.cardinality,
189
+ "description": r.description,
190
+ "source_relationship": r.source_relationship,
191
+ }
192
+ for r in ontology.relationships
193
+ ],
194
+ "business_rules": [
195
+ {
196
+ "name": r.name,
197
+ "entity": r.entity,
198
+ "condition": r.condition,
199
+ "action": r.action,
200
+ "classification": r.classification,
201
+ "description": r.description,
202
+ "priority": r.priority,
203
+ "source_measure": r.source_measure,
204
+ }
205
+ for r in ontology.business_rules
206
+ ],
207
+ "metadata": ontology.metadata or {},
208
+ }
209
+
210
+
211
+ def _dict_to_ontology(data: Dict[str, Any]):
212
+ """Convert dictionary to Ontology object."""
213
+ from powerbi_ontology.ontology_generator import (
214
+ Ontology,
215
+ OntologyEntity,
216
+ OntologyProperty,
217
+ OntologyRelationship,
218
+ BusinessRule,
219
+ Constraint,
220
+ )
221
+
222
+ entities = []
223
+ for e_data in data.get("entities", []):
224
+ props = []
225
+ for p_data in e_data.get("properties", []):
226
+ constraints = [
227
+ Constraint(type=c["type"], value=c["value"], message=c.get("message", ""))
228
+ for c in p_data.get("constraints", [])
229
+ ]
230
+ props.append(OntologyProperty(
231
+ name=p_data["name"],
232
+ data_type=p_data.get("data_type", "String"),
233
+ required=p_data.get("required", False),
234
+ unique=p_data.get("unique", False),
235
+ description=p_data.get("description", ""),
236
+ source_column=p_data.get("source_column", ""),
237
+ constraints=constraints,
238
+ ))
239
+
240
+ entity_constraints = [
241
+ Constraint(type=c["type"], value=c["value"], message=c.get("message", ""))
242
+ for c in e_data.get("constraints", [])
243
+ ]
244
+
245
+ entities.append(OntologyEntity(
246
+ name=e_data["name"],
247
+ description=e_data.get("description", ""),
248
+ entity_type=e_data.get("entity_type", "standard"),
249
+ source_table=e_data.get("source_table", ""),
250
+ properties=props,
251
+ constraints=entity_constraints,
252
+ ))
253
+
254
+ relationships = []
255
+ for r_data in data.get("relationships", []):
256
+ relationships.append(OntologyRelationship(
257
+ from_entity=r_data["from_entity"],
258
+ to_entity=r_data["to_entity"],
259
+ from_property=r_data.get("from_property", ""),
260
+ to_property=r_data.get("to_property", ""),
261
+ relationship_type=r_data.get("relationship_type", "related_to"),
262
+ cardinality=r_data.get("cardinality", "one-to-many"),
263
+ description=r_data.get("description", ""),
264
+ source_relationship=r_data.get("source_relationship", ""),
265
+ ))
266
+
267
+ rules = []
268
+ for b_data in data.get("business_rules", []):
269
+ rules.append(BusinessRule(
270
+ name=b_data["name"],
271
+ entity=b_data.get("entity", ""),
272
+ condition=b_data.get("condition", ""),
273
+ action=b_data.get("action", ""),
274
+ classification=b_data.get("classification", ""),
275
+ description=b_data.get("description", ""),
276
+ priority=b_data.get("priority", 1),
277
+ source_measure=b_data.get("source_measure", ""),
278
+ ))
279
+
280
+ return Ontology(
281
+ name=data.get("name", "Unnamed"),
282
+ version=data.get("version", "1.0"),
283
+ source=data.get("source", ""),
284
+ entities=entities,
285
+ relationships=relationships,
286
+ business_rules=rules,
287
+ metadata=data.get("metadata", {}),
288
+ )
289
+
290
+
291
+ # ============================================================================
292
+ # MCP Tool Implementations
293
+ # ============================================================================
294
+
295
+ def _pbix_extract_impl(
296
+ pbix_path: str,
297
+ include_measures: bool = True,
298
+ include_security: bool = True,
299
+ ) -> Dict[str, Any]:
300
+ """
301
+ Extract semantic model from a Power BI .pbix file.
302
+
303
+ This tool extracts the complete semantic model including:
304
+ - Tables/Entities with columns and data types
305
+ - Relationships between tables
306
+ - DAX measures and calculated columns
307
+ - Row-Level Security (RLS) rules
308
+ - Hierarchies
309
+
310
+ Args:
311
+ pbix_path: Path to the .pbix file
312
+ include_measures: Whether to extract DAX measures (default: true)
313
+ include_security: Whether to extract RLS rules (default: true)
314
+
315
+ Returns:
316
+ Dictionary containing:
317
+ - success (bool): Whether extraction succeeded
318
+ - entities_count (int): Number of entities extracted
319
+ - relationships_count (int): Number of relationships
320
+ - measures_count (int): Number of DAX measures
321
+ - security_rules_count (int): Number of RLS rules
322
+ - model_data (dict): Complete semantic model data
323
+
324
+ Example:
325
+ {
326
+ "pbix_path": "/path/to/Sales.pbix",
327
+ "include_measures": true,
328
+ "include_security": true
329
+ }
330
+ """
331
+ logger.info(f"Extracting semantic model from: {pbix_path}")
332
+
333
+ try:
334
+ from powerbi_ontology.extractor import PowerBIExtractor
335
+
336
+ # Validate file exists
337
+ path = Path(pbix_path)
338
+ if not path.exists():
339
+ return ExtractResult(
340
+ success=False,
341
+ error=f"File not found: {pbix_path}"
342
+ ).to_dict()
343
+
344
+ if not path.suffix.lower() == ".pbix":
345
+ return ExtractResult(
346
+ success=False,
347
+ error=f"Invalid file type: {path.suffix}. Expected .pbix"
348
+ ).to_dict()
349
+
350
+ # Check file size
351
+ file_size_mb = path.stat().st_size / (1024 * 1024)
352
+ if file_size_mb > config.max_file_size_mb:
353
+ return ExtractResult(
354
+ success=False,
355
+ error=f"File too large: {file_size_mb:.1f}MB. Max: {config.max_file_size_mb}MB"
356
+ ).to_dict()
357
+
358
+ # Extract
359
+ extractor = PowerBIExtractor(str(path))
360
+ semantic_model = extractor.extract()
361
+
362
+ # Convert to dict
363
+ model_data = _semantic_model_to_dict(semantic_model)
364
+
365
+ # Optionally exclude measures or security
366
+ if not include_measures:
367
+ model_data["measures"] = []
368
+
369
+ if not include_security:
370
+ model_data["security_rules"] = []
371
+
372
+ result = ExtractResult(
373
+ success=True,
374
+ entities_count=len(semantic_model.entities),
375
+ relationships_count=len(semantic_model.relationships),
376
+ measures_count=len(semantic_model.measures) if include_measures else 0,
377
+ security_rules_count=len(semantic_model.security_rules) if include_security else 0,
378
+ model_data=model_data,
379
+ source_file=str(path.absolute()),
380
+ )
381
+
382
+ logger.info(
383
+ f"Extracted: {result.entities_count} entities, "
384
+ f"{result.relationships_count} relationships, "
385
+ f"{result.measures_count} measures"
386
+ )
387
+
388
+ return result.to_dict()
389
+
390
+ except Exception as e:
391
+ logger.error(f"Extraction failed: {e}", exc_info=True)
392
+ return ExtractResult(
393
+ success=False,
394
+ error=f"Extraction failed: {str(e)}"
395
+ ).to_dict()
396
+
397
+
398
+ def _ontology_generate_impl(
399
+ model_data: Dict[str, Any],
400
+ detect_patterns: bool = True,
401
+ ) -> Dict[str, Any]:
402
+ """
403
+ Generate an ontology from a semantic model.
404
+
405
+ This tool converts a Power BI semantic model (from pbix_extract) into
406
+ a formal ontology with entities, relationships, and business rules.
407
+
408
+ The generator:
409
+ - Maps tables to ontology entities
410
+ - Converts relationships to semantic relationships
411
+ - Extracts business rules from DAX measures
412
+ - Detects patterns (date tables, dimensions, facts)
413
+ - Suggests enhancements
414
+
415
+ Args:
416
+ model_data: Semantic model data from pbix_extract
417
+ detect_patterns: Whether to detect common patterns (default: true)
418
+
419
+ Returns:
420
+ Dictionary containing:
421
+ - success (bool): Whether generation succeeded
422
+ - ontology_data (dict): Generated ontology
423
+ - patterns_detected (list): List of detected patterns
424
+ - enhancements_suggested (int): Number of suggested enhancements
425
+
426
+ Example:
427
+ {
428
+ "model_data": {...}, // Output from pbix_extract
429
+ "detect_patterns": true
430
+ }
431
+ """
432
+ logger.info("Generating ontology from semantic model")
433
+
434
+ try:
435
+ from powerbi_ontology.extractor import (
436
+ SemanticModel,
437
+ Entity,
438
+ Property,
439
+ Relationship,
440
+ Measure,
441
+ Hierarchy,
442
+ SecurityRule,
443
+ )
444
+ from powerbi_ontology.ontology_generator import OntologyGenerator
445
+
446
+ # Reconstruct SemanticModel from dict
447
+ entities = []
448
+ for e_data in model_data.get("entities", []):
449
+ props = [
450
+ Property(
451
+ name=p["name"],
452
+ data_type=p.get("data_type", "String"),
453
+ required=p.get("required", False),
454
+ unique=p.get("unique", False),
455
+ description=p.get("description", ""),
456
+ source_column=p.get("source_column", ""),
457
+ )
458
+ for p in e_data.get("properties", [])
459
+ ]
460
+ entities.append(Entity(
461
+ name=e_data["name"],
462
+ description=e_data.get("description", ""),
463
+ properties=props,
464
+ source_table=e_data.get("source_table", ""),
465
+ primary_key=e_data.get("primary_key"),
466
+ ))
467
+
468
+ relationships = [
469
+ Relationship(
470
+ from_entity=r["from_entity"],
471
+ from_property=r.get("from_property", ""),
472
+ to_entity=r["to_entity"],
473
+ to_property=r.get("to_property", ""),
474
+ cardinality=r.get("cardinality", "many-to-one"),
475
+ cross_filter_direction=r.get("cross_filter_direction", "single"),
476
+ is_active=r.get("is_active", True),
477
+ name=r.get("name", ""),
478
+ )
479
+ for r in model_data.get("relationships", [])
480
+ ]
481
+
482
+ measures = [
483
+ Measure(
484
+ name=m["name"],
485
+ dax_formula=m.get("dax_formula", ""),
486
+ description=m.get("description", ""),
487
+ folder=m.get("folder", ""),
488
+ table=m.get("table", ""),
489
+ dependencies=m.get("dependencies", []),
490
+ )
491
+ for m in model_data.get("measures", [])
492
+ ]
493
+
494
+ hierarchies = [
495
+ Hierarchy(
496
+ name=h["name"],
497
+ table=h.get("table", ""),
498
+ levels=h.get("levels", []),
499
+ hierarchy_type=h.get("hierarchy_type", "custom"),
500
+ )
501
+ for h in model_data.get("hierarchies", [])
502
+ ]
503
+
504
+ security_rules = [
505
+ SecurityRule(
506
+ role=s["role"],
507
+ table=s.get("table", ""),
508
+ dax_filter=s.get("dax_filter", ""),
509
+ description=s.get("description", ""),
510
+ )
511
+ for s in model_data.get("security_rules", [])
512
+ ]
513
+
514
+ semantic_model = SemanticModel(
515
+ name=model_data.get("name", "Unnamed"),
516
+ entities=entities,
517
+ relationships=relationships,
518
+ measures=measures,
519
+ hierarchies=hierarchies,
520
+ security_rules=security_rules,
521
+ metadata=model_data.get("metadata", {}),
522
+ source_file=model_data.get("source_file", ""),
523
+ )
524
+
525
+ # Generate ontology
526
+ generator = OntologyGenerator(semantic_model)
527
+ ontology = generator.generate()
528
+
529
+ # Detect patterns
530
+ patterns_detected = []
531
+ if detect_patterns:
532
+ patterns = generator.detect_patterns()
533
+ patterns_detected = [
534
+ f"{p.pattern_type}: {p.entity_name} ({p.confidence:.0%})"
535
+ for p in patterns
536
+ ]
537
+
538
+ # Suggest enhancements
539
+ enhancements = generator.suggest_enhancements()
540
+
541
+ result = GenerateResult(
542
+ success=True,
543
+ ontology_data=_ontology_to_dict(ontology),
544
+ patterns_detected=patterns_detected,
545
+ enhancements_suggested=len(enhancements),
546
+ )
547
+
548
+ logger.info(
549
+ f"Generated ontology with {len(ontology.entities)} entities, "
550
+ f"{len(ontology.relationships)} relationships, "
551
+ f"{len(ontology.business_rules)} rules"
552
+ )
553
+
554
+ return result.to_dict()
555
+
556
+ except Exception as e:
557
+ logger.error(f"Generation failed: {e}", exc_info=True)
558
+ return GenerateResult(
559
+ success=False,
560
+ error=f"Generation failed: {str(e)}"
561
+ ).to_dict()
562
+
563
+
564
+ def _export_owl_impl(
565
+ ontology_data: Dict[str, Any],
566
+ format: str = "xml",
567
+ include_action_rules: bool = True,
568
+ ) -> Dict[str, Any]:
569
+ """
570
+ Export ontology to OWL format.
571
+
572
+ This tool exports an ontology to OWL/RDF format for use with:
573
+ - Triple stores (Blazegraph, Virtuoso)
574
+ - OntoGuard semantic validation
575
+ - Other semantic web tools
576
+
577
+ The export includes:
578
+ - OWL classes for entities
579
+ - Datatype properties for columns
580
+ - Object properties for relationships
581
+ - Action rules for OntoGuard (if enabled)
582
+ - Constraints as OWL restrictions
583
+
584
+ Args:
585
+ ontology_data: Ontology data from ontology_generate
586
+ format: Output format - "xml", "turtle", "json-ld", "n3" (default: xml)
587
+ include_action_rules: Generate OntoGuard-compatible action rules (default: true)
588
+
589
+ Returns:
590
+ Dictionary containing:
591
+ - success (bool): Whether export succeeded
592
+ - owl_content (str): OWL content in requested format
593
+ - summary (dict): Export statistics
594
+
595
+ Example:
596
+ {
597
+ "ontology_data": {...}, // Output from ontology_generate
598
+ "format": "turtle",
599
+ "include_action_rules": true
600
+ }
601
+ """
602
+ logger.info(f"Exporting ontology to OWL format: {format}")
603
+
604
+ try:
605
+ from powerbi_ontology.export.owl import OWLExporter
606
+
607
+ # Convert dict to Ontology
608
+ ontology = _dict_to_ontology(ontology_data)
609
+
610
+ # Create exporter
611
+ exporter = OWLExporter(
612
+ ontology,
613
+ include_action_rules=include_action_rules,
614
+ include_constraints=config.include_constraints,
615
+ default_roles=config.default_roles,
616
+ )
617
+
618
+ # Export
619
+ owl_content = exporter.export(format=format)
620
+ summary = exporter.get_export_summary()
621
+
622
+ result = ExportOWLResult(
623
+ success=True,
624
+ owl_content=owl_content,
625
+ summary=summary,
626
+ )
627
+
628
+ logger.info(
629
+ f"Exported OWL with {summary.get('total_triples', 0)} triples, "
630
+ f"{summary.get('classes', 0)} classes"
631
+ )
632
+
633
+ return result.to_dict()
634
+
635
+ except Exception as e:
636
+ logger.error(f"Export failed: {e}", exc_info=True)
637
+ return ExportOWLResult(
638
+ success=False,
639
+ error=f"Export failed: {str(e)}"
640
+ ).to_dict()
641
+
642
+
643
+ def _export_json_impl(
644
+ ontology_data: Dict[str, Any],
645
+ output_path: Optional[str] = None,
646
+ ) -> Dict[str, Any]:
647
+ """
648
+ Export ontology to JSON format.
649
+
650
+ This tool exports an ontology to JSON format for:
651
+ - Storage and versioning
652
+ - Loading in Streamlit UI
653
+ - Integration with other tools
654
+
655
+ Args:
656
+ ontology_data: Ontology data from ontology_generate
657
+ output_path: Optional file path to save (if None, returns content only)
658
+
659
+ Returns:
660
+ Dictionary containing:
661
+ - success (bool): Whether export succeeded
662
+ - json_content (str): JSON content
663
+ - output_path (str): Path where file was saved (if requested)
664
+
665
+ Example:
666
+ {
667
+ "ontology_data": {...},
668
+ "output_path": "/path/to/output.json"
669
+ }
670
+ """
671
+ logger.info("Exporting ontology to JSON format")
672
+
673
+ try:
674
+ json_content = json.dumps(ontology_data, indent=2, ensure_ascii=False)
675
+
676
+ if output_path:
677
+ path = Path(output_path)
678
+ path.parent.mkdir(parents=True, exist_ok=True)
679
+ path.write_text(json_content, encoding="utf-8")
680
+ logger.info(f"Saved JSON to: {output_path}")
681
+
682
+ result = ExportJSONResult(
683
+ success=True,
684
+ json_content=json_content,
685
+ output_path=output_path,
686
+ )
687
+
688
+ return result.to_dict()
689
+
690
+ except Exception as e:
691
+ logger.error(f"Export failed: {e}", exc_info=True)
692
+ return ExportJSONResult(
693
+ success=False,
694
+ error=f"Export failed: {str(e)}"
695
+ ).to_dict()
696
+
697
+
698
+ def _analyze_debt_impl(
699
+ ontologies: Dict[str, Dict[str, Any]],
700
+ ) -> Dict[str, Any]:
701
+ """
702
+ Analyze semantic debt across multiple ontologies.
703
+
704
+ This tool detects conflicting definitions between Power BI dashboards:
705
+ - Measures with same name but different DAX formulas
706
+ - Properties with same name but different data types
707
+ - Entities with same name but different structures
708
+ - Conflicting business rules
709
+
710
+ Use case: Detect when "Revenue" is defined differently in Sales.pbix vs Finance.pbix
711
+
712
+ Args:
713
+ ontologies: Dictionary mapping names to ontology data
714
+ (must have at least 2 ontologies)
715
+
716
+ Returns:
717
+ Dictionary containing:
718
+ - success (bool): Whether analysis succeeded
719
+ - total_conflicts (int): Total number of conflicts
720
+ - critical_count (int): Number of critical conflicts
721
+ - warning_count (int): Number of warnings
722
+ - info_count (int): Number of info-level issues
723
+ - conflicts (list): List of conflict details
724
+ - report_markdown (str): Markdown report
725
+
726
+ Example:
727
+ {
728
+ "ontologies": {
729
+ "Sales.pbix": {...},
730
+ "Finance.pbix": {...}
731
+ }
732
+ }
733
+ """
734
+ logger.info(f"Analyzing semantic debt across {len(ontologies)} ontologies")
735
+
736
+ try:
737
+ from powerbi_ontology.semantic_debt import SemanticDebtAnalyzer
738
+
739
+ if len(ontologies) < 2:
740
+ return AnalyzeDebtResult(
741
+ success=False,
742
+ error="Need at least 2 ontologies for comparison"
743
+ ).to_dict()
744
+
745
+ analyzer = SemanticDebtAnalyzer(
746
+ similarity_threshold=config.similarity_threshold
747
+ )
748
+
749
+ # Add ontologies
750
+ for name, ont_data in ontologies.items():
751
+ ontology = _dict_to_ontology(ont_data)
752
+ analyzer.add_ontology(name, ontology)
753
+
754
+ # Analyze
755
+ report = analyzer.analyze()
756
+
757
+ result = AnalyzeDebtResult(
758
+ success=True,
759
+ total_conflicts=len(report.conflicts),
760
+ critical_count=report.summary.get("critical", 0),
761
+ warning_count=report.summary.get("warning", 0),
762
+ info_count=report.summary.get("info", 0),
763
+ conflicts=[c.to_dict() for c in report.conflicts],
764
+ report_markdown=report.to_markdown(),
765
+ )
766
+
767
+ logger.info(
768
+ f"Analysis complete: {result.total_conflicts} conflicts "
769
+ f"({result.critical_count} critical)"
770
+ )
771
+
772
+ return result.to_dict()
773
+
774
+ except Exception as e:
775
+ logger.error(f"Analysis failed: {e}", exc_info=True)
776
+ return AnalyzeDebtResult(
777
+ success=False,
778
+ error=f"Analysis failed: {str(e)}"
779
+ ).to_dict()
780
+
781
+
782
+ def _ontology_diff_impl(
783
+ source_ontology: Dict[str, Any],
784
+ target_ontology: Dict[str, Any],
785
+ ) -> Dict[str, Any]:
786
+ """
787
+ Compare two ontology versions.
788
+
789
+ This tool provides Git-like diff functionality for ontologies:
790
+ - Detect added elements
791
+ - Detect removed elements
792
+ - Detect modified elements
793
+ - Generate changelog
794
+
795
+ Use case: Track changes between ontology versions
796
+
797
+ Args:
798
+ source_ontology: Original/old ontology version
799
+ target_ontology: New/updated ontology version
800
+
801
+ Returns:
802
+ Dictionary containing:
803
+ - success (bool): Whether diff succeeded
804
+ - has_changes (bool): Whether any changes were detected
805
+ - total_changes (int): Total number of changes
806
+ - added (int): Number of added elements
807
+ - removed (int): Number of removed elements
808
+ - modified (int): Number of modified elements
809
+ - changes (list): List of change details
810
+ - changelog (str): Markdown changelog
811
+
812
+ Example:
813
+ {
814
+ "source_ontology": {...}, // v1
815
+ "target_ontology": {...} // v2
816
+ }
817
+ """
818
+ logger.info("Comparing ontology versions")
819
+
820
+ try:
821
+ from powerbi_ontology.ontology_diff import OntologyDiff
822
+
823
+ source = _dict_to_ontology(source_ontology)
824
+ target = _dict_to_ontology(target_ontology)
825
+
826
+ differ = OntologyDiff(source, target)
827
+ report = differ.diff()
828
+
829
+ result = DiffResult(
830
+ success=True,
831
+ has_changes=report.has_changes(),
832
+ total_changes=len(report.changes),
833
+ added=report.summary.get("added", 0),
834
+ removed=report.summary.get("removed", 0),
835
+ modified=report.summary.get("modified", 0),
836
+ changes=[c.to_dict() for c in report.changes],
837
+ changelog=report.to_changelog(),
838
+ )
839
+
840
+ logger.info(
841
+ f"Diff complete: {result.total_changes} changes "
842
+ f"(+{result.added} -{result.removed} ~{result.modified})"
843
+ )
844
+
845
+ return result.to_dict()
846
+
847
+ except Exception as e:
848
+ logger.error(f"Diff failed: {e}", exc_info=True)
849
+ return DiffResult(
850
+ success=False,
851
+ error=f"Diff failed: {str(e)}"
852
+ ).to_dict()
853
+
854
+
855
+ def _ontology_merge_impl(
856
+ base_ontology: Dict[str, Any],
857
+ ours_ontology: Dict[str, Any],
858
+ theirs_ontology: Dict[str, Any],
859
+ strategy: str = "ours",
860
+ ) -> Dict[str, Any]:
861
+ """
862
+ Merge two ontology versions with a common base.
863
+
864
+ This tool performs three-way merge of ontologies:
865
+ - Combines changes from both versions
866
+ - Detects conflicts
867
+ - Applies resolution strategy
868
+
869
+ Strategies:
870
+ - "ours": Prefer our changes on conflicts
871
+ - "theirs": Prefer their changes on conflicts
872
+ - "union": Include both (may cause duplicates)
873
+
874
+ Args:
875
+ base_ontology: Common ancestor version
876
+ ours_ontology: Our modified version
877
+ theirs_ontology: Their modified version
878
+ strategy: Conflict resolution - "ours", "theirs", "union" (default: ours)
879
+
880
+ Returns:
881
+ Dictionary containing:
882
+ - success (bool): Whether merge succeeded
883
+ - merged_ontology (dict): Merged ontology data
884
+ - conflicts_count (int): Number of conflicts encountered
885
+ - conflicts (list): List of conflict details
886
+ - new_version (str): Version number of merged ontology
887
+
888
+ Example:
889
+ {
890
+ "base_ontology": {...}, // common ancestor
891
+ "ours_ontology": {...}, // our changes
892
+ "theirs_ontology": {...}, // their changes
893
+ "strategy": "ours"
894
+ }
895
+ """
896
+ logger.info(f"Merging ontologies with strategy: {strategy}")
897
+
898
+ try:
899
+ from powerbi_ontology.ontology_diff import OntologyMerge
900
+
901
+ base = _dict_to_ontology(base_ontology)
902
+ ours = _dict_to_ontology(ours_ontology)
903
+ theirs = _dict_to_ontology(theirs_ontology)
904
+
905
+ merger = OntologyMerge(base, ours, theirs)
906
+ merged, conflicts = merger.merge(strategy=strategy)
907
+
908
+ result = MergeResult(
909
+ success=True,
910
+ merged_ontology=_ontology_to_dict(merged),
911
+ conflicts_count=len(conflicts),
912
+ conflicts=conflicts,
913
+ new_version=merged.version,
914
+ )
915
+
916
+ logger.info(
917
+ f"Merge complete: version {result.new_version}, "
918
+ f"{result.conflicts_count} conflicts"
919
+ )
920
+
921
+ return result.to_dict()
922
+
923
+ except Exception as e:
924
+ logger.error(f"Merge failed: {e}", exc_info=True)
925
+ return MergeResult(
926
+ success=False,
927
+ error=f"Merge failed: {str(e)}"
928
+ ).to_dict()
929
+
930
+
931
+ def _ontology_chat_ask_impl(
932
+ question: str,
933
+ ontology_data: Dict[str, Any],
934
+ user_role: str = "Analyst",
935
+ ) -> Dict[str, Any]:
936
+ """
937
+ Ask a question about an ontology using AI.
938
+
939
+ This tool uses OpenAI API to answer questions about ontology content:
940
+ - Entity structure and relationships
941
+ - DAX measures and calculations
942
+ - Business rules and permissions
943
+ - Data model analysis
944
+
945
+ Requires OPENAI_API_KEY environment variable.
946
+
947
+ Args:
948
+ question: Question in natural language (Russian or English)
949
+ ontology_data: Ontology data to query
950
+ user_role: User's role for permission context (default: Analyst)
951
+
952
+ Returns:
953
+ Dictionary containing:
954
+ - success (bool): Whether query succeeded
955
+ - answer (str): AI-generated answer
956
+ - suggested_questions (list): Follow-up question suggestions
957
+
958
+ Example:
959
+ {
960
+ "question": "Какие entities связаны с Customer?",
961
+ "ontology_data": {...},
962
+ "user_role": "Analyst"
963
+ }
964
+ """
965
+ logger.info(f"Processing chat question: {question[:50]}...")
966
+
967
+ try:
968
+ from powerbi_ontology.chat import OntologyChat
969
+
970
+ # Check for API key
971
+ if not os.getenv("OPENAI_API_KEY"):
972
+ return ChatResult(
973
+ success=False,
974
+ error="OPENAI_API_KEY not set. Chat requires OpenAI API access."
975
+ ).to_dict()
976
+
977
+ # Convert to Ontology
978
+ ontology = _dict_to_ontology(ontology_data)
979
+
980
+ # Create chat instance
981
+ chat = OntologyChat(
982
+ model=config.chat_model,
983
+ )
984
+
985
+ # Ask question
986
+ answer = chat.ask(
987
+ question=question,
988
+ ontology=ontology,
989
+ user_role=user_role,
990
+ include_history=False, # Stateless for MCP
991
+ )
992
+
993
+ # Get suggestions
994
+ suggestions = chat.get_suggestions(ontology)
995
+
996
+ result = ChatResult(
997
+ success=True,
998
+ answer=answer,
999
+ suggested_questions=suggestions,
1000
+ )
1001
+
1002
+ logger.info("Chat response generated successfully")
1003
+
1004
+ return result.to_dict()
1005
+
1006
+ except Exception as e:
1007
+ logger.error(f"Chat failed: {e}", exc_info=True)
1008
+ return ChatResult(
1009
+ success=False,
1010
+ error=f"Chat failed: {str(e)}"
1011
+ ).to_dict()
1012
+
1013
+
1014
+ # ============================================================================
1015
+ # MCP Tool Registration
1016
+ # ============================================================================
1017
+
1018
+ @mcp.tool()
1019
+ def pbix_extract(
1020
+ pbix_path: str,
1021
+ include_measures: bool = True,
1022
+ include_security: bool = True,
1023
+ ) -> Dict[str, Any]:
1024
+ """Extract semantic model from a Power BI .pbix file."""
1025
+ return _pbix_extract_impl(pbix_path, include_measures, include_security)
1026
+
1027
+
1028
+ @mcp.tool()
1029
+ def ontology_generate(
1030
+ model_data: Dict[str, Any],
1031
+ detect_patterns: bool = True,
1032
+ ) -> Dict[str, Any]:
1033
+ """Generate an ontology from a semantic model."""
1034
+ return _ontology_generate_impl(model_data, detect_patterns)
1035
+
1036
+
1037
+ @mcp.tool()
1038
+ def export_owl(
1039
+ ontology_data: Dict[str, Any],
1040
+ format: str = "xml",
1041
+ include_action_rules: bool = True,
1042
+ ) -> Dict[str, Any]:
1043
+ """Export ontology to OWL format."""
1044
+ return _export_owl_impl(ontology_data, format, include_action_rules)
1045
+
1046
+
1047
+ @mcp.tool()
1048
+ def export_json(
1049
+ ontology_data: Dict[str, Any],
1050
+ output_path: Optional[str] = None,
1051
+ ) -> Dict[str, Any]:
1052
+ """Export ontology to JSON format."""
1053
+ return _export_json_impl(ontology_data, output_path)
1054
+
1055
+
1056
+ @mcp.tool()
1057
+ def analyze_debt(
1058
+ ontologies: Dict[str, Dict[str, Any]],
1059
+ ) -> Dict[str, Any]:
1060
+ """Analyze semantic debt across multiple ontologies."""
1061
+ return _analyze_debt_impl(ontologies)
1062
+
1063
+
1064
+ @mcp.tool()
1065
+ def ontology_diff(
1066
+ source_ontology: Dict[str, Any],
1067
+ target_ontology: Dict[str, Any],
1068
+ ) -> Dict[str, Any]:
1069
+ """Compare two ontology versions."""
1070
+ return _ontology_diff_impl(source_ontology, target_ontology)
1071
+
1072
+
1073
+ @mcp.tool()
1074
+ def ontology_merge(
1075
+ base_ontology: Dict[str, Any],
1076
+ ours_ontology: Dict[str, Any],
1077
+ theirs_ontology: Dict[str, Any],
1078
+ strategy: str = "ours",
1079
+ ) -> Dict[str, Any]:
1080
+ """Merge two ontology versions with a common base."""
1081
+ return _ontology_merge_impl(base_ontology, ours_ontology, theirs_ontology, strategy)
1082
+
1083
+
1084
+ @mcp.tool()
1085
+ def ontology_chat_ask(
1086
+ question: str,
1087
+ ontology_data: Dict[str, Any],
1088
+ user_role: str = "Analyst",
1089
+ ) -> Dict[str, Any]:
1090
+ """Ask a question about an ontology using AI."""
1091
+ return _ontology_chat_ask_impl(question, ontology_data, user_role)
1092
+
1093
+
1094
+ # ============================================================================
1095
+ # Entry Point
1096
+ # ============================================================================
1097
+
1098
+ def main():
1099
+ """Main entry point for the MCP server."""
1100
+ logger.info(f"Starting {config.server_name} v{config.server_version}")
1101
+ logger.info(f"Log level: {config.log_level}")
1102
+ mcp.run()
1103
+
1104
+
1105
+ if __name__ == "__main__":
1106
+ main()