powerbi-ontology-extractor 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__init__.py +1 -0
- cli/pbi_ontology_cli.py +286 -0
- powerbi_ontology/__init__.py +38 -0
- powerbi_ontology/analyzer.py +420 -0
- powerbi_ontology/chat.py +303 -0
- powerbi_ontology/cli.py +530 -0
- powerbi_ontology/contract_builder.py +269 -0
- powerbi_ontology/dax_parser.py +305 -0
- powerbi_ontology/export/__init__.py +17 -0
- powerbi_ontology/export/contract_to_owl.py +408 -0
- powerbi_ontology/export/fabric_iq.py +243 -0
- powerbi_ontology/export/fabric_iq_to_owl.py +463 -0
- powerbi_ontology/export/json_schema.py +110 -0
- powerbi_ontology/export/ontoguard.py +177 -0
- powerbi_ontology/export/owl.py +522 -0
- powerbi_ontology/extractor.py +368 -0
- powerbi_ontology/mcp_config.py +237 -0
- powerbi_ontology/mcp_models.py +166 -0
- powerbi_ontology/mcp_server.py +1106 -0
- powerbi_ontology/ontology_diff.py +776 -0
- powerbi_ontology/ontology_generator.py +406 -0
- powerbi_ontology/review.py +556 -0
- powerbi_ontology/schema_mapper.py +369 -0
- powerbi_ontology/semantic_debt.py +584 -0
- powerbi_ontology/utils/__init__.py +13 -0
- powerbi_ontology/utils/pbix_reader.py +558 -0
- powerbi_ontology/utils/visualizer.py +332 -0
- powerbi_ontology_extractor-0.1.0.dist-info/METADATA +507 -0
- powerbi_ontology_extractor-0.1.0.dist-info/RECORD +33 -0
- powerbi_ontology_extractor-0.1.0.dist-info/WHEEL +5 -0
- powerbi_ontology_extractor-0.1.0.dist-info/entry_points.txt +4 -0
- powerbi_ontology_extractor-0.1.0.dist-info/licenses/LICENSE +21 -0
- powerbi_ontology_extractor-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,558 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PBIX Reader Utility
|
|
3
|
+
|
|
4
|
+
Reads Power BI .pbix files using PBIXRay library to parse binary DataModel.
|
|
5
|
+
Supports both modern .pbix files (binary DataModel) and legacy files (model.bim JSON).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
import tempfile
|
|
11
|
+
import zipfile
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any, Dict, List, Optional
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
# Try to import pbixray for modern .pbix parsing
|
|
18
|
+
try:
|
|
19
|
+
from pbixray import PBIXRay
|
|
20
|
+
PBIXRAY_AVAILABLE = True
|
|
21
|
+
except ImportError:
|
|
22
|
+
PBIXRAY_AVAILABLE = False
|
|
23
|
+
logger.warning("pbixray not installed. Install with: pip install pbixray")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class PBIXReader:
|
|
27
|
+
"""
|
|
28
|
+
Reads Power BI .pbix files and extracts semantic model information.
|
|
29
|
+
|
|
30
|
+
Uses PBIXRay library to parse binary DataModel (XPress9 compressed).
|
|
31
|
+
Falls back to JSON model.bim for legacy/export files.
|
|
32
|
+
|
|
33
|
+
.pbix files are ZIP archives containing:
|
|
34
|
+
- DataModel (binary, XPress9 compressed) - modern files
|
|
35
|
+
- DataModel/model.bim (JSON) - legacy/exported files
|
|
36
|
+
- Report/Layout (JSON, UTF-16) - report visualizations
|
|
37
|
+
- [DataMashup] - Power Query M code
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(self, pbix_path: str):
|
|
41
|
+
"""
|
|
42
|
+
Initialize PBIX reader.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
pbix_path: Path to the .pbix file
|
|
46
|
+
"""
|
|
47
|
+
self.pbix_path = Path(pbix_path)
|
|
48
|
+
if not self.pbix_path.exists():
|
|
49
|
+
raise FileNotFoundError(f"Power BI file not found: {pbix_path}")
|
|
50
|
+
|
|
51
|
+
self.temp_dir: Optional[Path] = None
|
|
52
|
+
self._pbixray: Optional[Any] = None
|
|
53
|
+
self._model_data: Optional[Dict] = None
|
|
54
|
+
self._use_pbixray: bool = False
|
|
55
|
+
self._tables_cache: Optional[List[Dict]] = None
|
|
56
|
+
self._relationships_cache: Optional[List[Dict]] = None
|
|
57
|
+
self._measures_cache: Optional[List[Dict]] = None
|
|
58
|
+
|
|
59
|
+
def __enter__(self):
|
|
60
|
+
"""Context manager entry."""
|
|
61
|
+
self.extract_to_temp()
|
|
62
|
+
return self
|
|
63
|
+
|
|
64
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
65
|
+
"""Context manager exit - cleanup temp files."""
|
|
66
|
+
self.cleanup()
|
|
67
|
+
|
|
68
|
+
def extract_to_temp(self) -> Path:
|
|
69
|
+
"""
|
|
70
|
+
Extract .pbix file to temporary directory (for legacy support).
|
|
71
|
+
Also initializes PBIXRay if available.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Path to temporary extraction directory
|
|
75
|
+
"""
|
|
76
|
+
if self.temp_dir:
|
|
77
|
+
return self.temp_dir
|
|
78
|
+
|
|
79
|
+
# Try PBIXRay first for modern .pbix files
|
|
80
|
+
if PBIXRAY_AVAILABLE:
|
|
81
|
+
try:
|
|
82
|
+
self._pbixray = PBIXRay(str(self.pbix_path))
|
|
83
|
+
# Test if we can read tables
|
|
84
|
+
_ = self._pbixray.tables
|
|
85
|
+
self._use_pbixray = True
|
|
86
|
+
logger.info(f"Using PBIXRay for {self.pbix_path}")
|
|
87
|
+
except Exception as e:
|
|
88
|
+
logger.warning(f"PBIXRay failed, falling back to JSON: {e}")
|
|
89
|
+
self._use_pbixray = False
|
|
90
|
+
|
|
91
|
+
# Extract to temp for fallback/additional data
|
|
92
|
+
try:
|
|
93
|
+
self.temp_dir = Path(tempfile.mkdtemp(prefix="pbix_extract_"))
|
|
94
|
+
|
|
95
|
+
with zipfile.ZipFile(self.pbix_path, 'r') as zip_ref:
|
|
96
|
+
zip_ref.extractall(self.temp_dir)
|
|
97
|
+
|
|
98
|
+
logger.info(f"Extracted .pbix file to {self.temp_dir}")
|
|
99
|
+
return self.temp_dir
|
|
100
|
+
except zipfile.BadZipFile:
|
|
101
|
+
raise ValueError(f"Invalid .pbix file format: {self.pbix_path}")
|
|
102
|
+
except Exception as e:
|
|
103
|
+
raise RuntimeError(f"Failed to extract .pbix file: {e}")
|
|
104
|
+
|
|
105
|
+
def read_model(self) -> Dict:
|
|
106
|
+
"""
|
|
107
|
+
Read and parse the semantic model data.
|
|
108
|
+
Uses PBIXRay for binary DataModel, falls back to JSON.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
Parsed model data as dict
|
|
112
|
+
"""
|
|
113
|
+
if self._model_data:
|
|
114
|
+
return self._model_data
|
|
115
|
+
|
|
116
|
+
if not self.temp_dir:
|
|
117
|
+
self.extract_to_temp()
|
|
118
|
+
|
|
119
|
+
if self._use_pbixray:
|
|
120
|
+
# Build model dict from PBIXRay data
|
|
121
|
+
self._model_data = self._build_model_from_pbixray()
|
|
122
|
+
return self._model_data
|
|
123
|
+
|
|
124
|
+
# Fallback to JSON model.bim
|
|
125
|
+
return self._read_model_json()
|
|
126
|
+
|
|
127
|
+
def _build_model_from_pbixray(self) -> Dict:
|
|
128
|
+
"""Build model dict from PBIXRay data."""
|
|
129
|
+
model = {
|
|
130
|
+
"name": self.pbix_path.stem,
|
|
131
|
+
"tables": self.get_tables(),
|
|
132
|
+
"relationships": self.get_relationships(),
|
|
133
|
+
}
|
|
134
|
+
return {"model": model}
|
|
135
|
+
|
|
136
|
+
def _read_model_json(self) -> Dict:
|
|
137
|
+
"""Read model from JSON file (legacy support)."""
|
|
138
|
+
# Try different possible paths for model.bim
|
|
139
|
+
possible_paths = [
|
|
140
|
+
self.temp_dir / "DataModel" / "model.bim",
|
|
141
|
+
self.temp_dir / "model.bim",
|
|
142
|
+
self.temp_dir / "DataModelSchema",
|
|
143
|
+
]
|
|
144
|
+
|
|
145
|
+
model_path = None
|
|
146
|
+
for path in possible_paths:
|
|
147
|
+
if path.exists():
|
|
148
|
+
model_path = path
|
|
149
|
+
break
|
|
150
|
+
|
|
151
|
+
if not model_path:
|
|
152
|
+
# Try to find any .bim file
|
|
153
|
+
bim_files = list(self.temp_dir.rglob("*.bim"))
|
|
154
|
+
if bim_files:
|
|
155
|
+
model_path = bim_files[0]
|
|
156
|
+
else:
|
|
157
|
+
raise FileNotFoundError(
|
|
158
|
+
f"model.bim not found and PBIXRay not available for: {self.pbix_path}"
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
try:
|
|
162
|
+
with open(model_path, 'r', encoding='utf-8') as f:
|
|
163
|
+
self._model_data = json.load(f)
|
|
164
|
+
logger.info(f"Successfully read model.bim from {model_path}")
|
|
165
|
+
return self._model_data
|
|
166
|
+
except json.JSONDecodeError as e:
|
|
167
|
+
raise ValueError(f"Invalid JSON in model.bim: {e}")
|
|
168
|
+
except Exception as e:
|
|
169
|
+
raise RuntimeError(f"Failed to read model.bim: {e}")
|
|
170
|
+
|
|
171
|
+
def get_tables(self) -> List[Dict]:
|
|
172
|
+
"""
|
|
173
|
+
Extract table definitions from model.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
List of table definitions with columns
|
|
177
|
+
"""
|
|
178
|
+
if self._tables_cache is not None:
|
|
179
|
+
return self._tables_cache
|
|
180
|
+
|
|
181
|
+
if not self.temp_dir:
|
|
182
|
+
self.extract_to_temp()
|
|
183
|
+
|
|
184
|
+
if self._use_pbixray:
|
|
185
|
+
self._tables_cache = self._get_tables_pbixray()
|
|
186
|
+
return self._tables_cache
|
|
187
|
+
|
|
188
|
+
# Fallback to JSON
|
|
189
|
+
return self._get_tables_json()
|
|
190
|
+
|
|
191
|
+
def _get_tables_pbixray(self) -> List[Dict]:
|
|
192
|
+
"""Get tables from PBIXRay."""
|
|
193
|
+
tables = []
|
|
194
|
+
|
|
195
|
+
# Get schema for column info
|
|
196
|
+
schema_df = self._pbixray.schema
|
|
197
|
+
|
|
198
|
+
for table_name in self._pbixray.tables:
|
|
199
|
+
# Get column info from schema
|
|
200
|
+
table_schema = schema_df[schema_df['TableName'] == table_name]
|
|
201
|
+
|
|
202
|
+
columns = []
|
|
203
|
+
for _, row in table_schema.iterrows():
|
|
204
|
+
col = {
|
|
205
|
+
"name": row['ColumnName'],
|
|
206
|
+
"dataType": self._map_pandas_type(row['PandasDataType']),
|
|
207
|
+
"isNullable": True,
|
|
208
|
+
"isKey": False,
|
|
209
|
+
"isUnique": False,
|
|
210
|
+
}
|
|
211
|
+
columns.append(col)
|
|
212
|
+
|
|
213
|
+
# Get measures for this table
|
|
214
|
+
measures = []
|
|
215
|
+
if self._pbixray.dax_measures is not None and not self._pbixray.dax_measures.empty:
|
|
216
|
+
table_measures = self._pbixray.dax_measures[
|
|
217
|
+
self._pbixray.dax_measures['TableName'] == table_name
|
|
218
|
+
]
|
|
219
|
+
for _, row in table_measures.iterrows():
|
|
220
|
+
measure = {
|
|
221
|
+
"name": row['Name'],
|
|
222
|
+
"expression": row['Expression'] if row['Expression'] else "",
|
|
223
|
+
"displayFolder": row['DisplayFolder'] if row['DisplayFolder'] else "",
|
|
224
|
+
"description": row['Description'] if row['Description'] else "",
|
|
225
|
+
}
|
|
226
|
+
measures.append(measure)
|
|
227
|
+
|
|
228
|
+
# Get hierarchies (from DAX columns)
|
|
229
|
+
hierarchies = []
|
|
230
|
+
|
|
231
|
+
table_def = {
|
|
232
|
+
"name": table_name,
|
|
233
|
+
"description": "",
|
|
234
|
+
"columns": columns,
|
|
235
|
+
"measures": measures,
|
|
236
|
+
"hierarchies": hierarchies,
|
|
237
|
+
}
|
|
238
|
+
tables.append(table_def)
|
|
239
|
+
|
|
240
|
+
return tables
|
|
241
|
+
|
|
242
|
+
def _get_tables_json(self) -> List[Dict]:
|
|
243
|
+
"""Get tables from JSON model."""
|
|
244
|
+
model = self.read_model()
|
|
245
|
+
|
|
246
|
+
# Handle different Power BI schema versions
|
|
247
|
+
if isinstance(model, dict):
|
|
248
|
+
if "model" in model:
|
|
249
|
+
model = model["model"]
|
|
250
|
+
if "tables" in model:
|
|
251
|
+
return model["tables"]
|
|
252
|
+
if "model" in model and "tables" in model["model"]:
|
|
253
|
+
return model["model"]["tables"]
|
|
254
|
+
|
|
255
|
+
return []
|
|
256
|
+
|
|
257
|
+
def get_relationships(self) -> List[Dict]:
|
|
258
|
+
"""
|
|
259
|
+
Extract relationship definitions from model.
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
List of relationship definitions
|
|
263
|
+
"""
|
|
264
|
+
if self._relationships_cache is not None:
|
|
265
|
+
return self._relationships_cache
|
|
266
|
+
|
|
267
|
+
if not self.temp_dir:
|
|
268
|
+
self.extract_to_temp()
|
|
269
|
+
|
|
270
|
+
if self._use_pbixray:
|
|
271
|
+
self._relationships_cache = self._get_relationships_pbixray()
|
|
272
|
+
return self._relationships_cache
|
|
273
|
+
|
|
274
|
+
# Fallback to JSON
|
|
275
|
+
return self._get_relationships_json()
|
|
276
|
+
|
|
277
|
+
def _get_relationships_pbixray(self) -> List[Dict]:
|
|
278
|
+
"""Get relationships from PBIXRay."""
|
|
279
|
+
relationships = []
|
|
280
|
+
|
|
281
|
+
if self._pbixray.relationships is None or self._pbixray.relationships.empty:
|
|
282
|
+
return relationships
|
|
283
|
+
|
|
284
|
+
for _, row in self._pbixray.relationships.iterrows():
|
|
285
|
+
# Map cardinality
|
|
286
|
+
cardinality_map = {
|
|
287
|
+
"M:1": ("many", "one"),
|
|
288
|
+
"1:M": ("one", "many"),
|
|
289
|
+
"1:1": ("one", "one"),
|
|
290
|
+
"M:M": ("many", "many"),
|
|
291
|
+
}
|
|
292
|
+
card = row.get('Cardinality', 'M:1')
|
|
293
|
+
from_card, to_card = cardinality_map.get(card, ("many", "one"))
|
|
294
|
+
|
|
295
|
+
# Map cross filter behavior
|
|
296
|
+
cross_filter = row.get('CrossFilteringBehavior', 'Single')
|
|
297
|
+
cross_filter_behavior = "bothDirections" if cross_filter == "Both" else "singleDirection"
|
|
298
|
+
|
|
299
|
+
rel = {
|
|
300
|
+
"fromTable": row['FromTableName'],
|
|
301
|
+
"fromColumn": row['FromColumnName'],
|
|
302
|
+
"toTable": row['ToTableName'] if row['ToTableName'] else "",
|
|
303
|
+
"toColumn": row['ToColumnName'] if row['ToColumnName'] else "",
|
|
304
|
+
"fromCardinality": from_card,
|
|
305
|
+
"toCardinality": to_card,
|
|
306
|
+
"crossFilteringBehavior": cross_filter_behavior,
|
|
307
|
+
"isActive": bool(row.get('IsActive', True)),
|
|
308
|
+
"name": f"{row['FromTableName']}_{row['ToTableName'] or 'Unknown'}",
|
|
309
|
+
}
|
|
310
|
+
relationships.append(rel)
|
|
311
|
+
|
|
312
|
+
return relationships
|
|
313
|
+
|
|
314
|
+
def _get_relationships_json(self) -> List[Dict]:
|
|
315
|
+
"""Get relationships from JSON model."""
|
|
316
|
+
model = self.read_model()
|
|
317
|
+
|
|
318
|
+
# Handle different Power BI schema versions
|
|
319
|
+
if isinstance(model, dict):
|
|
320
|
+
if "model" in model:
|
|
321
|
+
model = model["model"]
|
|
322
|
+
if "relationships" in model:
|
|
323
|
+
return model["relationships"]
|
|
324
|
+
if "model" in model and "relationships" in model["model"]:
|
|
325
|
+
return model["model"]["relationships"]
|
|
326
|
+
|
|
327
|
+
return []
|
|
328
|
+
|
|
329
|
+
def get_measures(self) -> List[Dict]:
|
|
330
|
+
"""
|
|
331
|
+
Extract DAX measures from all tables.
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
List of measure definitions
|
|
335
|
+
"""
|
|
336
|
+
if self._measures_cache is not None:
|
|
337
|
+
return self._measures_cache
|
|
338
|
+
|
|
339
|
+
if not self.temp_dir:
|
|
340
|
+
self.extract_to_temp()
|
|
341
|
+
|
|
342
|
+
if self._use_pbixray:
|
|
343
|
+
self._measures_cache = self._get_measures_pbixray()
|
|
344
|
+
return self._measures_cache
|
|
345
|
+
|
|
346
|
+
# Fallback to JSON
|
|
347
|
+
return self._get_measures_json()
|
|
348
|
+
|
|
349
|
+
def _get_measures_pbixray(self) -> List[Dict]:
|
|
350
|
+
"""Get measures from PBIXRay."""
|
|
351
|
+
measures = []
|
|
352
|
+
|
|
353
|
+
if self._pbixray.dax_measures is None or self._pbixray.dax_measures.empty:
|
|
354
|
+
return measures
|
|
355
|
+
|
|
356
|
+
for _, row in self._pbixray.dax_measures.iterrows():
|
|
357
|
+
measure = {
|
|
358
|
+
"name": row['Name'],
|
|
359
|
+
"expression": row['Expression'] if row['Expression'] else "",
|
|
360
|
+
"displayFolder": row['DisplayFolder'] if row['DisplayFolder'] else "",
|
|
361
|
+
"description": row['Description'] if row['Description'] else "",
|
|
362
|
+
"table": row['TableName'],
|
|
363
|
+
}
|
|
364
|
+
measures.append(measure)
|
|
365
|
+
|
|
366
|
+
return measures
|
|
367
|
+
|
|
368
|
+
def _get_measures_json(self) -> List[Dict]:
|
|
369
|
+
"""Get measures from JSON model."""
|
|
370
|
+
tables = self._get_tables_json()
|
|
371
|
+
measures = []
|
|
372
|
+
|
|
373
|
+
for table in tables:
|
|
374
|
+
if "measures" in table:
|
|
375
|
+
for measure in table["measures"]:
|
|
376
|
+
measure["table"] = table.get("name", "Unknown")
|
|
377
|
+
measures.append(measure)
|
|
378
|
+
|
|
379
|
+
return measures
|
|
380
|
+
|
|
381
|
+
def get_power_query(self) -> List[Dict]:
|
|
382
|
+
"""
|
|
383
|
+
Extract Power Query (M) expressions.
|
|
384
|
+
|
|
385
|
+
Returns:
|
|
386
|
+
List of Power Query expressions per table
|
|
387
|
+
"""
|
|
388
|
+
if not self._use_pbixray:
|
|
389
|
+
return []
|
|
390
|
+
|
|
391
|
+
if self._pbixray.power_query is None or self._pbixray.power_query.empty:
|
|
392
|
+
return []
|
|
393
|
+
|
|
394
|
+
queries = []
|
|
395
|
+
for _, row in self._pbixray.power_query.iterrows():
|
|
396
|
+
queries.append({
|
|
397
|
+
"table": row['TableName'],
|
|
398
|
+
"expression": row['Expression'],
|
|
399
|
+
})
|
|
400
|
+
|
|
401
|
+
return queries
|
|
402
|
+
|
|
403
|
+
def get_dax_columns(self) -> List[Dict]:
|
|
404
|
+
"""
|
|
405
|
+
Extract calculated columns (DAX expressions).
|
|
406
|
+
|
|
407
|
+
Returns:
|
|
408
|
+
List of DAX column definitions
|
|
409
|
+
"""
|
|
410
|
+
if not self._use_pbixray:
|
|
411
|
+
return []
|
|
412
|
+
|
|
413
|
+
if self._pbixray.dax_columns is None or self._pbixray.dax_columns.empty:
|
|
414
|
+
return []
|
|
415
|
+
|
|
416
|
+
columns = []
|
|
417
|
+
for _, row in self._pbixray.dax_columns.iterrows():
|
|
418
|
+
columns.append({
|
|
419
|
+
"table": row['TableName'],
|
|
420
|
+
"name": row['ColumnName'],
|
|
421
|
+
"expression": row['Expression'],
|
|
422
|
+
})
|
|
423
|
+
|
|
424
|
+
return columns
|
|
425
|
+
|
|
426
|
+
def get_rls_rules(self) -> List[Dict]:
|
|
427
|
+
"""
|
|
428
|
+
Extract Row-Level Security (RLS) rules.
|
|
429
|
+
|
|
430
|
+
Returns:
|
|
431
|
+
List of RLS rule definitions
|
|
432
|
+
"""
|
|
433
|
+
if not self._use_pbixray:
|
|
434
|
+
# Try JSON fallback
|
|
435
|
+
return self._get_rls_json()
|
|
436
|
+
|
|
437
|
+
if self._pbixray.rls is None or self._pbixray.rls.empty:
|
|
438
|
+
return []
|
|
439
|
+
|
|
440
|
+
rules = []
|
|
441
|
+
for _, row in self._pbixray.rls.iterrows():
|
|
442
|
+
rules.append({
|
|
443
|
+
"role": row.get('RoleName', ''),
|
|
444
|
+
"table": row.get('TableName', ''),
|
|
445
|
+
"filter_expression": row.get('FilterExpression', ''),
|
|
446
|
+
})
|
|
447
|
+
|
|
448
|
+
return rules
|
|
449
|
+
|
|
450
|
+
def _get_rls_json(self) -> List[Dict]:
|
|
451
|
+
"""Get RLS from JSON model."""
|
|
452
|
+
model = self.read_model()
|
|
453
|
+
rules = []
|
|
454
|
+
|
|
455
|
+
if isinstance(model, dict):
|
|
456
|
+
if "model" in model:
|
|
457
|
+
model = model["model"]
|
|
458
|
+
|
|
459
|
+
roles = model.get("roles", [])
|
|
460
|
+
for role in roles:
|
|
461
|
+
role_name = role.get("name", "")
|
|
462
|
+
for perm in role.get("tablePermissions", []):
|
|
463
|
+
if perm.get("filterExpression"):
|
|
464
|
+
rules.append({
|
|
465
|
+
"role": role_name,
|
|
466
|
+
"table": perm.get("name", ""),
|
|
467
|
+
"filter_expression": perm.get("filterExpression", ""),
|
|
468
|
+
})
|
|
469
|
+
|
|
470
|
+
return rules
|
|
471
|
+
|
|
472
|
+
def get_table_data(self, table_name: str) -> Optional[Any]:
|
|
473
|
+
"""
|
|
474
|
+
Get actual data from a table (PBIXRay only).
|
|
475
|
+
|
|
476
|
+
Args:
|
|
477
|
+
table_name: Name of the table
|
|
478
|
+
|
|
479
|
+
Returns:
|
|
480
|
+
DataFrame with table data or None
|
|
481
|
+
"""
|
|
482
|
+
if not self._use_pbixray:
|
|
483
|
+
logger.warning("Table data extraction requires PBIXRay")
|
|
484
|
+
return None
|
|
485
|
+
|
|
486
|
+
try:
|
|
487
|
+
return self._pbixray.get_table(table_name)
|
|
488
|
+
except Exception as e:
|
|
489
|
+
logger.error(f"Failed to get table data for {table_name}: {e}")
|
|
490
|
+
return None
|
|
491
|
+
|
|
492
|
+
def _map_pandas_type(self, pandas_type: str) -> str:
|
|
493
|
+
"""Map pandas dtype to Power BI data type."""
|
|
494
|
+
type_mapping = {
|
|
495
|
+
"string": "string",
|
|
496
|
+
"object": "string",
|
|
497
|
+
"int64": "int64",
|
|
498
|
+
"Int64": "int64",
|
|
499
|
+
"float64": "double",
|
|
500
|
+
"Float64": "double",
|
|
501
|
+
"bool": "boolean",
|
|
502
|
+
"datetime64[ns]": "datetime",
|
|
503
|
+
"datetime64": "datetime",
|
|
504
|
+
}
|
|
505
|
+
return type_mapping.get(pandas_type, "string")
|
|
506
|
+
|
|
507
|
+
def cleanup(self):
|
|
508
|
+
"""Remove temporary extraction directory."""
|
|
509
|
+
if self.temp_dir and self.temp_dir.exists():
|
|
510
|
+
import shutil
|
|
511
|
+
try:
|
|
512
|
+
shutil.rmtree(self.temp_dir)
|
|
513
|
+
logger.info(f"Cleaned up temporary directory: {self.temp_dir}")
|
|
514
|
+
except Exception as e:
|
|
515
|
+
logger.warning(f"Failed to cleanup temp directory: {e}")
|
|
516
|
+
finally:
|
|
517
|
+
self.temp_dir = None
|
|
518
|
+
|
|
519
|
+
# Clear PBIXRay reference
|
|
520
|
+
self._pbixray = None
|
|
521
|
+
|
|
522
|
+
@property
|
|
523
|
+
def is_pbixray_available(self) -> bool:
|
|
524
|
+
"""Check if PBIXRay is being used."""
|
|
525
|
+
return self._use_pbixray
|
|
526
|
+
|
|
527
|
+
def read_report(self) -> Optional[Dict]:
|
|
528
|
+
"""
|
|
529
|
+
Read and parse the report.json file (optional, for context).
|
|
530
|
+
|
|
531
|
+
Returns:
|
|
532
|
+
Parsed JSON report data or None if not found
|
|
533
|
+
"""
|
|
534
|
+
if not self.temp_dir:
|
|
535
|
+
self.extract_to_temp()
|
|
536
|
+
|
|
537
|
+
report_path = self.temp_dir / "Report" / "report.json"
|
|
538
|
+
if not report_path.exists():
|
|
539
|
+
# Try Layout file (UTF-16)
|
|
540
|
+
layout_path = self.temp_dir / "Report" / "Layout"
|
|
541
|
+
if layout_path.exists():
|
|
542
|
+
try:
|
|
543
|
+
with open(layout_path, 'rb') as f:
|
|
544
|
+
content = f.read()
|
|
545
|
+
text = content.decode('utf-16-le')
|
|
546
|
+
return json.loads(text)
|
|
547
|
+
except Exception as e:
|
|
548
|
+
logger.warning(f"Failed to read Layout: {e}")
|
|
549
|
+
|
|
550
|
+
logger.warning("report.json not found in .pbix file")
|
|
551
|
+
return None
|
|
552
|
+
|
|
553
|
+
try:
|
|
554
|
+
with open(report_path, 'r', encoding='utf-8') as f:
|
|
555
|
+
return json.load(f)
|
|
556
|
+
except Exception as e:
|
|
557
|
+
logger.warning(f"Failed to read report.json: {e}")
|
|
558
|
+
return None
|