powerbi-ontology-extractor 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,368 @@
1
+ """
2
+ Power BI Semantic Model Extractor
3
+
4
+ Extracts semantic intelligence from Power BI .pbix files.
5
+ """
6
+
7
+ import logging
8
+ from dataclasses import dataclass, field
9
+ from typing import Dict, List, Optional
10
+
11
+ from powerbi_ontology.utils.pbix_reader import PBIXReader
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ @dataclass
17
+ class Property:
18
+ """Represents a property/column in an entity."""
19
+ name: str
20
+ data_type: str # String, Integer, Decimal, Date, Boolean, etc.
21
+ required: bool = False
22
+ unique: bool = False
23
+ description: str = ""
24
+ source_column: str = ""
25
+
26
+
27
+ @dataclass
28
+ class Entity:
29
+ """Represents an entity (table) in the semantic model."""
30
+ name: str
31
+ description: str = ""
32
+ properties: List[Property] = field(default_factory=list)
33
+ source_table: str = ""
34
+ primary_key: Optional[str] = None
35
+
36
+
37
+ @dataclass
38
+ class Relationship:
39
+ """Represents a relationship between entities."""
40
+ from_entity: str
41
+ from_property: str
42
+ to_entity: str
43
+ to_property: str
44
+ cardinality: str # "one-to-many", "many-to-one", "one-to-one", "many-to-many"
45
+ cross_filter_direction: str = "single" # "single", "both"
46
+ is_active: bool = True
47
+ name: str = ""
48
+
49
+
50
+ @dataclass
51
+ class Measure:
52
+ """Represents a DAX measure."""
53
+ name: str
54
+ dax_formula: str
55
+ description: str = ""
56
+ dependencies: List[str] = field(default_factory=list)
57
+ folder: str = ""
58
+ table: str = ""
59
+
60
+
61
+ @dataclass
62
+ class Hierarchy:
63
+ """Represents a hierarchy (date or custom)."""
64
+ name: str
65
+ table: str
66
+ levels: List[str] = field(default_factory=list)
67
+ hierarchy_type: str = "custom" # "date" or "custom"
68
+
69
+
70
+ @dataclass
71
+ class SecurityRule:
72
+ """Represents a row-level security (RLS) rule."""
73
+ role: str
74
+ table: str
75
+ dax_filter: str
76
+ description: str = ""
77
+
78
+
79
+ @dataclass
80
+ class SemanticModel:
81
+ """Complete semantic model extracted from Power BI."""
82
+ name: str
83
+ entities: List[Entity] = field(default_factory=list)
84
+ relationships: List[Relationship] = field(default_factory=list)
85
+ measures: List[Measure] = field(default_factory=list)
86
+ hierarchies: List[Hierarchy] = field(default_factory=list)
87
+ security_rules: List[SecurityRule] = field(default_factory=list)
88
+ metadata: Dict = field(default_factory=dict)
89
+ source_file: str = ""
90
+
91
+ def to_ontology(self):
92
+ """Convert to ontology format (delegates to OntologyGenerator)."""
93
+ from powerbi_ontology.ontology_generator import OntologyGenerator
94
+ generator = OntologyGenerator(self)
95
+ return generator.generate()
96
+
97
+
98
+ class PowerBIExtractor:
99
+ """
100
+ Core class for extracting semantic intelligence from Power BI .pbix files.
101
+ """
102
+
103
+ def __init__(self, pbix_path: str):
104
+ """
105
+ Initialize extractor.
106
+
107
+ Args:
108
+ pbix_path: Path to the .pbix file
109
+ """
110
+ self.pbix_path = pbix_path
111
+ self.reader: Optional[PBIXReader] = None
112
+
113
+ def extract(self) -> SemanticModel:
114
+ """
115
+ Extract complete semantic model from .pbix file.
116
+
117
+ Returns:
118
+ SemanticModel with all extracted information
119
+ """
120
+ logger.info(f"Extracting semantic model from {self.pbix_path}")
121
+
122
+ self.reader = PBIXReader(self.pbix_path)
123
+ self.reader.extract_to_temp()
124
+
125
+ model_data = self.reader.read_model()
126
+
127
+ # Extract model name
128
+ model_name = model_data.get("name", "Unknown")
129
+ if isinstance(model_data, dict) and "model" in model_data:
130
+ model_name = model_data["model"].get("name", model_name)
131
+
132
+ semantic_model = SemanticModel(
133
+ name=model_name,
134
+ source_file=self.pbix_path,
135
+ metadata={"extraction_date": str(__import__("datetime").datetime.now().isoformat())}
136
+ )
137
+
138
+ # Extract all components
139
+ semantic_model.entities = self.extract_entities()
140
+ semantic_model.relationships = self.extract_relationships()
141
+ semantic_model.measures = self.extract_measures()
142
+ semantic_model.hierarchies = self.extract_hierarchies()
143
+ semantic_model.security_rules = self.extract_security_rules()
144
+
145
+ logger.info(
146
+ f"Extracted: {len(semantic_model.entities)} entities, "
147
+ f"{len(semantic_model.relationships)} relationships, "
148
+ f"{len(semantic_model.measures)} measures"
149
+ )
150
+
151
+ return semantic_model
152
+
153
+ def extract_entities(self) -> List[Entity]:
154
+ """
155
+ Extract entities (tables) from Power BI model.
156
+
157
+ Returns:
158
+ List of Entity objects
159
+ """
160
+ tables = self.reader.get_tables()
161
+ entities = []
162
+
163
+ for table in tables:
164
+ table_name = table.get("name", "Unknown")
165
+ description = table.get("description", "")
166
+
167
+ # Extract columns as properties
168
+ properties = []
169
+ columns = table.get("columns", [])
170
+
171
+ for col in columns:
172
+ prop = Property(
173
+ name=col.get("name", ""),
174
+ data_type=self._map_data_type(col.get("dataType", "string")),
175
+ required=col.get("isNullable", True) is False,
176
+ unique=col.get("isUnique", False) or col.get("isKey", False),
177
+ description=col.get("description", ""),
178
+ source_column=col.get("name", "")
179
+ )
180
+ properties.append(prop)
181
+
182
+ # Identify primary key
183
+ primary_key = None
184
+ for col in columns:
185
+ if col.get("isKey", False) or col.get("isUnique", False):
186
+ primary_key = col.get("name")
187
+ break
188
+
189
+ entity = Entity(
190
+ name=table_name,
191
+ description=description,
192
+ properties=properties,
193
+ source_table=table_name,
194
+ primary_key=primary_key
195
+ )
196
+ entities.append(entity)
197
+
198
+ return entities
199
+
200
+ def extract_relationships(self) -> List[Relationship]:
201
+ """
202
+ Extract relationships between entities.
203
+
204
+ Returns:
205
+ List of Relationship objects
206
+ """
207
+ relationships_data = self.reader.get_relationships()
208
+ relationships = []
209
+
210
+ for rel in relationships_data:
211
+ from_table = rel.get("fromTable", "")
212
+ from_column = rel.get("fromColumn", "")
213
+ to_table = rel.get("toTable", "")
214
+ to_column = rel.get("toColumn", "")
215
+
216
+ # Determine cardinality
217
+ cardinality = "many-to-one" # Default
218
+ if rel.get("fromCardinality") == "one" and rel.get("toCardinality") == "many":
219
+ cardinality = "one-to-many"
220
+ elif rel.get("fromCardinality") == "one" and rel.get("toCardinality") == "one":
221
+ cardinality = "one-to-one"
222
+ elif rel.get("fromCardinality") == "many" and rel.get("toCardinality") == "many":
223
+ cardinality = "many-to-many"
224
+
225
+ cross_filter = rel.get("crossFilteringBehavior", "singleDirection")
226
+ if cross_filter == "bothDirections":
227
+ cross_filter_direction = "both"
228
+ else:
229
+ cross_filter_direction = "single"
230
+
231
+ relationship = Relationship(
232
+ from_entity=from_table,
233
+ from_property=from_column,
234
+ to_entity=to_table,
235
+ to_property=to_column,
236
+ cardinality=cardinality,
237
+ cross_filter_direction=cross_filter_direction,
238
+ is_active=rel.get("isActive", True),
239
+ name=rel.get("name", f"{from_table}_{to_table}")
240
+ )
241
+ relationships.append(relationship)
242
+
243
+ return relationships
244
+
245
+ def extract_measures(self) -> List[Measure]:
246
+ """
247
+ Extract DAX measures from all tables.
248
+
249
+ Returns:
250
+ List of Measure objects
251
+ """
252
+ measures_data = self.reader.get_measures()
253
+ measures = []
254
+
255
+ for measure_data in measures_data:
256
+ measure = Measure(
257
+ name=measure_data.get("name", ""),
258
+ dax_formula=measure_data.get("expression", ""),
259
+ description=measure_data.get("description", ""),
260
+ folder=measure_data.get("displayFolder", ""),
261
+ table=measure_data.get("table", "")
262
+ )
263
+
264
+ # Extract dependencies (basic - can be enhanced)
265
+ measure.dependencies = self._extract_measure_dependencies(measure.dax_formula)
266
+
267
+ measures.append(measure)
268
+
269
+ return measures
270
+
271
+ def extract_hierarchies(self) -> List[Hierarchy]:
272
+ """
273
+ Extract hierarchies (date and custom).
274
+
275
+ Returns:
276
+ List of Hierarchy objects
277
+ """
278
+ tables = self.reader.get_tables()
279
+ hierarchies = []
280
+
281
+ for table in tables:
282
+ table_name = table.get("name", "")
283
+
284
+ # Extract hierarchies from table
285
+ table_hierarchies = table.get("hierarchies", [])
286
+ for hier in table_hierarchies:
287
+ hierarchy = Hierarchy(
288
+ name=hier.get("name", ""),
289
+ table=table_name,
290
+ levels=[level.get("name", "") for level in hier.get("levels", [])],
291
+ hierarchy_type="date" if "date" in table_name.lower() else "custom"
292
+ )
293
+ hierarchies.append(hierarchy)
294
+
295
+ return hierarchies
296
+
297
+ def extract_security_rules(self) -> List[SecurityRule]:
298
+ """
299
+ Extract row-level security (RLS) rules.
300
+
301
+ Returns:
302
+ List of SecurityRule objects
303
+ """
304
+ model_data = self.reader.read_model()
305
+ security_rules = []
306
+
307
+ # Handle different schema versions
308
+ roles = []
309
+ if isinstance(model_data, dict):
310
+ if "model" in model_data:
311
+ model_data = model_data["model"]
312
+ roles = model_data.get("roles", [])
313
+
314
+ for role in roles:
315
+ role_name = role.get("name", "")
316
+ table_permissions = role.get("tablePermissions", [])
317
+
318
+ for perm in table_permissions:
319
+ table_name = perm.get("name", "")
320
+ filter_expression = perm.get("filterExpression", "")
321
+
322
+ if filter_expression:
323
+ rule = SecurityRule(
324
+ role=role_name,
325
+ table=table_name,
326
+ dax_filter=filter_expression,
327
+ description=f"RLS rule for {table_name} in role {role_name}"
328
+ )
329
+ security_rules.append(rule)
330
+
331
+ return security_rules
332
+
333
+ def _map_data_type(self, pbix_type: str) -> str:
334
+ """Map Power BI data type to ontology data type."""
335
+ type_mapping = {
336
+ "string": "String",
337
+ "int64": "Integer",
338
+ "double": "Decimal",
339
+ "datetime": "Date",
340
+ "boolean": "Boolean",
341
+ "decimal": "Decimal",
342
+ }
343
+ return type_mapping.get(pbix_type.lower(), "String")
344
+
345
+ def _extract_measure_dependencies(self, dax_formula: str) -> List[str]:
346
+ """
347
+ Extract table/column dependencies from DAX formula (basic implementation).
348
+
349
+ This is a simplified version - full parsing is done in dax_parser.py
350
+ """
351
+ dependencies = []
352
+ # Simple regex-based extraction (enhanced in dax_parser)
353
+ import re
354
+ # Match table[column] patterns
355
+ pattern = r'(\w+)\[(\w+)\]'
356
+ matches = re.findall(pattern, dax_formula)
357
+ for table, column in matches:
358
+ dependencies.append(f"{table}.{column}")
359
+ return list(set(dependencies))
360
+
361
+ def __enter__(self):
362
+ """Context manager entry."""
363
+ return self
364
+
365
+ def __exit__(self, exc_type, exc_val, exc_tb):
366
+ """Context manager exit - cleanup."""
367
+ if self.reader:
368
+ self.reader.cleanup()
@@ -0,0 +1,237 @@
1
+ """
2
+ Configuration loader for MCP Server.
3
+
4
+ Loads and validates configuration from YAML file.
5
+ """
6
+
7
+ import logging
8
+ import os
9
+ from pathlib import Path
10
+ from typing import Any, Dict, List, Optional
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ # Default configuration
16
+ DEFAULT_CONFIG = {
17
+ "server": {
18
+ "name": "PowerBI Ontology Extractor MCP",
19
+ "version": "0.1.0",
20
+ "description": "Extract semantic intelligence from Power BI files via MCP",
21
+ },
22
+ "log_level": "INFO",
23
+ "extraction": {
24
+ "include_measures": True,
25
+ "include_security": True,
26
+ "cleanup_temp": True,
27
+ "max_file_size_mb": 100,
28
+ },
29
+ "export": {
30
+ "default_format": "xml",
31
+ "include_action_rules": True,
32
+ "include_constraints": True,
33
+ "default_roles": ["Admin", "Analyst", "Viewer"],
34
+ },
35
+ "analysis": {
36
+ "similarity_threshold": 0.8,
37
+ },
38
+ "chat": {
39
+ "model": "gpt-4o-mini",
40
+ "temperature": 0.3,
41
+ "max_tokens": 1000,
42
+ },
43
+ "cache": {
44
+ "enabled": True,
45
+ "ttl_seconds": 3600,
46
+ },
47
+ }
48
+
49
+
50
+ class MCPConfig:
51
+ """Configuration manager for MCP Server."""
52
+
53
+ def __init__(self, config_path: Optional[str] = None):
54
+ """
55
+ Initialize configuration.
56
+
57
+ Args:
58
+ config_path: Path to config.yaml file. If None, checks:
59
+ 1. POWERBI_MCP_CONFIG environment variable
60
+ 2. config/mcp_config.yaml relative to package
61
+ 3. Uses default configuration
62
+ """
63
+ self._config: Dict[str, Any] = DEFAULT_CONFIG.copy()
64
+ self._config_path: Optional[str] = None
65
+
66
+ # Load configuration
67
+ self._load_config(config_path)
68
+
69
+ def _load_config(self, config_path: Optional[str] = None):
70
+ """Load configuration from YAML file."""
71
+ try:
72
+ import yaml
73
+ except ImportError:
74
+ logger.warning("PyYAML not installed, using default configuration")
75
+ return
76
+
77
+ # Determine config path
78
+ if config_path is None:
79
+ config_path = os.getenv("POWERBI_MCP_CONFIG")
80
+
81
+ if config_path is None or not Path(config_path).exists():
82
+ # Try default locations
83
+ possible_paths = [
84
+ Path("config/mcp_config.yaml"),
85
+ Path(__file__).parent.parent / "config" / "mcp_config.yaml",
86
+ Path.home() / ".powerbi-ontology" / "mcp_config.yaml",
87
+ ]
88
+
89
+ for path in possible_paths:
90
+ if path.exists():
91
+ config_path = str(path)
92
+ break
93
+
94
+ if config_path is None or not Path(config_path).exists():
95
+ logger.info("No config file found, using default configuration")
96
+ return
97
+
98
+ self._config_path = config_path
99
+ logger.info(f"Loading configuration from: {config_path}")
100
+
101
+ try:
102
+ with open(config_path, "r", encoding="utf-8") as f:
103
+ loaded_config = yaml.safe_load(f) or {}
104
+
105
+ # Deep merge with defaults
106
+ self._config = self._deep_merge(DEFAULT_CONFIG, loaded_config)
107
+ logger.info("Configuration loaded successfully")
108
+
109
+ except Exception as e:
110
+ logger.warning(f"Error loading config file: {e}, using defaults")
111
+
112
+ def _deep_merge(self, base: Dict, override: Dict) -> Dict:
113
+ """Deep merge two dictionaries."""
114
+ result = base.copy()
115
+
116
+ for key, value in override.items():
117
+ if key in result and isinstance(result[key], dict) and isinstance(value, dict):
118
+ result[key] = self._deep_merge(result[key], value)
119
+ else:
120
+ result[key] = value
121
+
122
+ return result
123
+
124
+ # Server settings
125
+ @property
126
+ def server_name(self) -> str:
127
+ return self._config["server"]["name"]
128
+
129
+ @property
130
+ def server_version(self) -> str:
131
+ return self._config["server"]["version"]
132
+
133
+ @property
134
+ def server_description(self) -> str:
135
+ return self._config["server"]["description"]
136
+
137
+ # Logging
138
+ @property
139
+ def log_level(self) -> str:
140
+ return self._config.get("log_level", "INFO").upper()
141
+
142
+ # Extraction settings
143
+ @property
144
+ def include_measures(self) -> bool:
145
+ return self._config["extraction"]["include_measures"]
146
+
147
+ @property
148
+ def include_security(self) -> bool:
149
+ return self._config["extraction"]["include_security"]
150
+
151
+ @property
152
+ def cleanup_temp(self) -> bool:
153
+ return self._config["extraction"]["cleanup_temp"]
154
+
155
+ @property
156
+ def max_file_size_mb(self) -> int:
157
+ return self._config["extraction"]["max_file_size_mb"]
158
+
159
+ # Export settings
160
+ @property
161
+ def default_format(self) -> str:
162
+ return self._config["export"]["default_format"]
163
+
164
+ @property
165
+ def include_action_rules(self) -> bool:
166
+ return self._config["export"]["include_action_rules"]
167
+
168
+ @property
169
+ def include_constraints(self) -> bool:
170
+ return self._config["export"]["include_constraints"]
171
+
172
+ @property
173
+ def default_roles(self) -> List[str]:
174
+ return self._config["export"]["default_roles"]
175
+
176
+ # Analysis settings
177
+ @property
178
+ def similarity_threshold(self) -> float:
179
+ return self._config["analysis"]["similarity_threshold"]
180
+
181
+ # Chat settings
182
+ @property
183
+ def chat_model(self) -> str:
184
+ return self._config["chat"]["model"]
185
+
186
+ @property
187
+ def chat_temperature(self) -> float:
188
+ return self._config["chat"]["temperature"]
189
+
190
+ @property
191
+ def chat_max_tokens(self) -> int:
192
+ return self._config["chat"]["max_tokens"]
193
+
194
+ # Cache settings
195
+ @property
196
+ def cache_enabled(self) -> bool:
197
+ return self._config["cache"]["enabled"]
198
+
199
+ @property
200
+ def cache_ttl(self) -> int:
201
+ return self._config["cache"]["ttl_seconds"]
202
+
203
+ def get(self, key: str, default: Any = None) -> Any:
204
+ """Get configuration value by dot-notation key."""
205
+ keys = key.split(".")
206
+ value = self._config
207
+
208
+ for k in keys:
209
+ if isinstance(value, dict) and k in value:
210
+ value = value[k]
211
+ else:
212
+ return default
213
+
214
+ return value
215
+
216
+ def to_dict(self) -> Dict[str, Any]:
217
+ """Return configuration as dictionary."""
218
+ return self._config.copy()
219
+
220
+
221
+ # Global configuration instance
222
+ _config: Optional[MCPConfig] = None
223
+
224
+
225
+ def get_config() -> MCPConfig:
226
+ """Get or create global configuration instance."""
227
+ global _config
228
+ if _config is None:
229
+ _config = MCPConfig()
230
+ return _config
231
+
232
+
233
+ def reload_config(config_path: Optional[str] = None) -> MCPConfig:
234
+ """Reload configuration from file."""
235
+ global _config
236
+ _config = MCPConfig(config_path)
237
+ return _config