powerbi-ontology-extractor 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,530 @@
1
+ """
2
+ CLI for PowerBI Ontology Extractor.
3
+
4
+ Provides command-line interface for:
5
+ - Extracting ontologies from .pbix files
6
+ - Batch processing directories
7
+ - Exporting to various formats (OWL, JSON)
8
+ - Semantic debt analysis
9
+ - Ontology diff
10
+
11
+ Usage:
12
+ pbix2owl extract --input file.pbix --output ontology.owl
13
+ pbix2owl batch --input ./dashboards/ --output ./ontologies/
14
+ pbix2owl analyze --input ./ontologies/ --output report.md
15
+ pbix2owl diff --source v1.json --target v2.json
16
+ """
17
+
18
+ import json
19
+ import logging
20
+ from concurrent.futures import ThreadPoolExecutor, as_completed
21
+ from pathlib import Path
22
+ from typing import Optional
23
+
24
+ import click
25
+ from rich.console import Console
26
+ from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn
27
+ from rich.table import Table
28
+ from rich.panel import Panel
29
+
30
+ from powerbi_ontology.extractor import PowerBIExtractor
31
+ from powerbi_ontology.ontology_generator import OntologyGenerator, Ontology
32
+ from powerbi_ontology.export.owl import OWLExporter
33
+ from powerbi_ontology.semantic_debt import SemanticDebtAnalyzer
34
+ from powerbi_ontology.ontology_diff import OntologyDiff
35
+
36
+ console = Console()
37
+ logger = logging.getLogger(__name__)
38
+
39
+
40
+ def setup_logging(verbose: bool):
41
+ """Configure logging based on verbosity."""
42
+ level = logging.DEBUG if verbose else logging.INFO
43
+ logging.basicConfig(
44
+ level=level,
45
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
46
+ )
47
+
48
+
49
+ @click.group()
50
+ @click.version_option(version="0.1.0", prog_name="pbix2owl")
51
+ @click.option("-v", "--verbose", is_flag=True, help="Enable verbose output")
52
+ def cli(verbose: bool):
53
+ """PowerBI Ontology Extractor - Extract semantic intelligence from Power BI files."""
54
+ setup_logging(verbose)
55
+
56
+
57
+ @cli.command()
58
+ @click.option("-i", "--input", "input_path", required=True, type=click.Path(exists=True), help="Input .pbix file")
59
+ @click.option("-o", "--output", "output_path", required=True, type=click.Path(), help="Output file (OWL or JSON)")
60
+ @click.option("-f", "--format", "output_format", type=click.Choice(["owl", "json"]), default="owl", help="Output format")
61
+ @click.option("--include-rules/--no-rules", default=True, help="Include action rules in OWL")
62
+ @click.option("--include-constraints/--no-constraints", default=True, help="Include constraints in OWL")
63
+ def extract(input_path: str, output_path: str, output_format: str, include_rules: bool, include_constraints: bool):
64
+ """Extract ontology from a single .pbix file."""
65
+ input_file = Path(input_path)
66
+ output_file = Path(output_path)
67
+
68
+ with console.status(f"[bold green]Processing {input_file.name}..."):
69
+ try:
70
+ # Extract semantic model
71
+ extractor = PowerBIExtractor(str(input_file))
72
+ semantic_model = extractor.extract()
73
+
74
+ # Generate ontology
75
+ generator = OntologyGenerator(semantic_model)
76
+ ontology = generator.generate()
77
+
78
+ # Export
79
+ if output_format == "owl":
80
+ owl_exporter = OWLExporter(
81
+ ontology,
82
+ include_action_rules=include_rules,
83
+ include_constraints=include_constraints,
84
+ )
85
+ owl_exporter.save(str(output_file))
86
+ else:
87
+ # JSON export
88
+ with open(output_file, "w") as f:
89
+ json.dump(_ontology_to_dict(ontology), f, indent=2)
90
+
91
+ console.print(f"[green]✓[/green] Exported to {output_file}")
92
+
93
+ # Show summary
94
+ _show_extraction_summary(ontology)
95
+
96
+ except Exception as e:
97
+ console.print(f"[red]✗ Error:[/red] {e}")
98
+ raise click.Abort()
99
+
100
+
101
+ @cli.command()
102
+ @click.option("-i", "--input", "input_dir", required=True, type=click.Path(exists=True), help="Input directory with .pbix files")
103
+ @click.option("-o", "--output", "output_dir", required=True, type=click.Path(), help="Output directory for ontologies")
104
+ @click.option("-f", "--format", "output_format", type=click.Choice(["owl", "json"]), default="owl", help="Output format")
105
+ @click.option("-w", "--workers", default=4, help="Number of parallel workers")
106
+ @click.option("--pattern", default="*.pbix", help="File pattern to match")
107
+ @click.option("--recursive/--no-recursive", default=False, help="Search recursively")
108
+ def batch(input_dir: str, output_dir: str, output_format: str, workers: int, pattern: str, recursive: bool):
109
+ """Batch process multiple .pbix files."""
110
+ input_path = Path(input_dir)
111
+ output_path = Path(output_dir)
112
+ output_path.mkdir(parents=True, exist_ok=True)
113
+
114
+ # Find files
115
+ if recursive:
116
+ files = list(input_path.rglob(pattern))
117
+ else:
118
+ files = list(input_path.glob(pattern))
119
+
120
+ if not files:
121
+ console.print(f"[yellow]No files matching '{pattern}' found in {input_dir}[/yellow]")
122
+ return
123
+
124
+ console.print(f"[bold]Found {len(files)} files to process[/bold]\n")
125
+
126
+ results = {"success": [], "failed": []}
127
+
128
+ with Progress(
129
+ SpinnerColumn(),
130
+ TextColumn("[progress.description]{task.description}"),
131
+ BarColumn(),
132
+ TaskProgressColumn(),
133
+ console=console,
134
+ ) as progress:
135
+ task = progress.add_task("[cyan]Processing files...", total=len(files))
136
+
137
+ with ThreadPoolExecutor(max_workers=workers) as executor:
138
+ futures = {
139
+ executor.submit(_process_single_file, f, output_path, output_format): f
140
+ for f in files
141
+ }
142
+
143
+ for future in as_completed(futures):
144
+ file = futures[future]
145
+ try:
146
+ result = future.result()
147
+ if result["success"]:
148
+ results["success"].append(result)
149
+ else:
150
+ results["failed"].append(result)
151
+ except Exception as e:
152
+ results["failed"].append({"file": str(file), "error": str(e)})
153
+
154
+ progress.advance(task)
155
+
156
+ # Show results
157
+ _show_batch_results(results)
158
+
159
+
160
+ @cli.command()
161
+ @click.option("-i", "--input", "input_dir", required=True, type=click.Path(exists=True), help="Directory with ontology files")
162
+ @click.option("-o", "--output", "output_path", type=click.Path(), help="Output report file")
163
+ @click.option("-f", "--format", "output_format", type=click.Choice(["markdown", "json"]), default="markdown", help="Report format")
164
+ @click.option("--pattern", default="*.json", help="File pattern to match")
165
+ def analyze(input_dir: str, output_path: Optional[str], output_format: str, pattern: str):
166
+ """Analyze semantic debt across multiple ontologies."""
167
+ input_path = Path(input_dir)
168
+ files = list(input_path.glob(pattern))
169
+
170
+ if len(files) < 2:
171
+ console.print("[yellow]Need at least 2 ontology files for analysis[/yellow]")
172
+ return
173
+
174
+ console.print(f"[bold]Analyzing {len(files)} ontologies...[/bold]\n")
175
+
176
+ analyzer = SemanticDebtAnalyzer()
177
+
178
+ with console.status("[bold green]Loading ontologies..."):
179
+ for file in files:
180
+ try:
181
+ with open(file) as f:
182
+ data = json.load(f)
183
+ ontology = _dict_to_ontology(data)
184
+ analyzer.add_ontology(file.name, ontology)
185
+ except Exception as e:
186
+ console.print(f"[yellow]Warning: Could not load {file.name}: {e}[/yellow]")
187
+
188
+ report = analyzer.analyze()
189
+
190
+ # Output report
191
+ if output_format == "markdown":
192
+ content = report.to_markdown()
193
+ else:
194
+ content = json.dumps(report.to_dict(), indent=2)
195
+
196
+ if output_path:
197
+ Path(output_path).write_text(content)
198
+ console.print(f"[green]✓[/green] Report saved to {output_path}")
199
+ else:
200
+ console.print(content)
201
+
202
+ # Show summary panel
203
+ _show_analysis_summary(report)
204
+
205
+
206
+ @cli.command()
207
+ @click.option("-s", "--source", required=True, type=click.Path(exists=True), help="Source ontology file")
208
+ @click.option("-t", "--target", required=True, type=click.Path(exists=True), help="Target ontology file")
209
+ @click.option("-o", "--output", "output_path", type=click.Path(), help="Output diff file")
210
+ @click.option("-f", "--format", "output_format", type=click.Choice(["changelog", "unified", "json"]), default="changelog", help="Output format")
211
+ def diff(source: str, target: str, output_path: Optional[str], output_format: str):
212
+ """Compare two ontology versions."""
213
+ source_path = Path(source)
214
+ target_path = Path(target)
215
+
216
+ with console.status("[bold green]Comparing ontologies..."):
217
+ # Load ontologies
218
+ with open(source_path) as f:
219
+ source_data = json.load(f)
220
+ with open(target_path) as f:
221
+ target_data = json.load(f)
222
+
223
+ source_ont = _dict_to_ontology(source_data)
224
+ target_ont = _dict_to_ontology(target_data)
225
+
226
+ # Perform diff
227
+ differ = OntologyDiff(source_ont, target_ont)
228
+ report = differ.diff()
229
+
230
+ # Output
231
+ if output_format == "changelog":
232
+ content = report.to_changelog()
233
+ elif output_format == "unified":
234
+ content = report.to_unified_diff()
235
+ else:
236
+ content = json.dumps(report.to_dict(), indent=2)
237
+
238
+ if output_path:
239
+ Path(output_path).write_text(content)
240
+ console.print(f"[green]✓[/green] Diff saved to {output_path}")
241
+ else:
242
+ console.print(content)
243
+
244
+ # Show summary
245
+ _show_diff_summary(report)
246
+
247
+
248
+ def _process_single_file(file: Path, output_dir: Path, output_format: str) -> dict:
249
+ """Process a single .pbix file."""
250
+ try:
251
+ extractor = PowerBIExtractor(str(file))
252
+ semantic_model = extractor.extract()
253
+
254
+ generator = OntologyGenerator(semantic_model)
255
+ ontology = generator.generate()
256
+
257
+ # Determine output filename
258
+ output_name = file.stem + (".owl" if output_format == "owl" else ".json")
259
+ output_file = output_dir / output_name
260
+
261
+ if output_format == "owl":
262
+ owl_exporter = OWLExporter(ontology)
263
+ owl_exporter.save(str(output_file))
264
+ else:
265
+ with open(output_file, "w") as f:
266
+ json.dump(_ontology_to_dict(ontology), f, indent=2)
267
+
268
+ return {
269
+ "success": True,
270
+ "file": str(file),
271
+ "output": str(output_file),
272
+ "entities": len(ontology.entities),
273
+ "relationships": len(ontology.relationships),
274
+ }
275
+
276
+ except Exception as e:
277
+ return {
278
+ "success": False,
279
+ "file": str(file),
280
+ "error": str(e),
281
+ }
282
+
283
+
284
+ def _ontology_to_dict(ontology: Ontology) -> dict:
285
+ """Convert Ontology to dictionary."""
286
+ return {
287
+ "name": ontology.name,
288
+ "version": ontology.version,
289
+ "source": ontology.source,
290
+ "entities": [
291
+ {
292
+ "name": e.name,
293
+ "description": e.description,
294
+ "entity_type": e.entity_type,
295
+ "properties": [
296
+ {
297
+ "name": p.name,
298
+ "data_type": p.data_type,
299
+ "required": p.required,
300
+ "unique": p.unique,
301
+ "description": p.description,
302
+ "constraints": [
303
+ {"type": c.type, "value": c.value, "message": c.message}
304
+ for c in (p.constraints or [])
305
+ ],
306
+ }
307
+ for p in e.properties
308
+ ],
309
+ "constraints": [],
310
+ }
311
+ for e in ontology.entities
312
+ ],
313
+ "relationships": [
314
+ {
315
+ "from_entity": r.from_entity,
316
+ "to_entity": r.to_entity,
317
+ "from_property": r.from_property,
318
+ "to_property": r.to_property,
319
+ "relationship_type": r.relationship_type,
320
+ "cardinality": r.cardinality,
321
+ "description": r.description,
322
+ }
323
+ for r in ontology.relationships
324
+ ],
325
+ "business_rules": [
326
+ {
327
+ "name": r.name,
328
+ "entity": r.entity,
329
+ "condition": r.condition,
330
+ "action": r.action,
331
+ "classification": r.classification,
332
+ "description": r.description,
333
+ "priority": r.priority,
334
+ }
335
+ for r in ontology.business_rules
336
+ ],
337
+ "metadata": ontology.metadata or {},
338
+ }
339
+
340
+
341
+ def _dict_to_ontology(data: dict) -> Ontology:
342
+ """Convert dictionary to Ontology."""
343
+ from powerbi_ontology.ontology_generator import (
344
+ OntologyEntity,
345
+ OntologyProperty,
346
+ OntologyRelationship,
347
+ BusinessRule,
348
+ Constraint,
349
+ )
350
+
351
+ entities = []
352
+ for e_data in data.get("entities", []):
353
+ props = []
354
+ for p_data in e_data.get("properties", []):
355
+ constraints = [
356
+ Constraint(type=c["type"], value=c["value"], message=c.get("message", ""))
357
+ for c in p_data.get("constraints", [])
358
+ ]
359
+ props.append(OntologyProperty(
360
+ name=p_data["name"],
361
+ data_type=p_data.get("data_type", "String"),
362
+ required=p_data.get("required", False),
363
+ unique=p_data.get("unique", False),
364
+ description=p_data.get("description", ""),
365
+ constraints=constraints,
366
+ ))
367
+
368
+ entities.append(OntologyEntity(
369
+ name=e_data["name"],
370
+ description=e_data.get("description", ""),
371
+ entity_type=e_data.get("entity_type", "standard"),
372
+ properties=props,
373
+ constraints=[],
374
+ ))
375
+
376
+ relationships = []
377
+ for r_data in data.get("relationships", []):
378
+ relationships.append(OntologyRelationship(
379
+ from_entity=r_data["from_entity"],
380
+ to_entity=r_data["to_entity"],
381
+ from_property=r_data.get("from_property", ""),
382
+ to_property=r_data.get("to_property", ""),
383
+ relationship_type=r_data.get("relationship_type", "related_to"),
384
+ cardinality=r_data.get("cardinality", "one-to-many"),
385
+ description=r_data.get("description", ""),
386
+ ))
387
+
388
+ rules = []
389
+ for b_data in data.get("business_rules", []):
390
+ rules.append(BusinessRule(
391
+ name=b_data["name"],
392
+ entity=b_data.get("entity", ""),
393
+ condition=b_data.get("condition", ""),
394
+ action=b_data.get("action", ""),
395
+ classification=b_data.get("classification", ""),
396
+ description=b_data.get("description", ""),
397
+ priority=b_data.get("priority", 1),
398
+ ))
399
+
400
+ return Ontology(
401
+ name=data.get("name", "Unnamed"),
402
+ version=data.get("version", "1.0"),
403
+ source=data.get("source", ""),
404
+ entities=entities,
405
+ relationships=relationships,
406
+ business_rules=rules,
407
+ metadata=data.get("metadata", {}),
408
+ )
409
+
410
+
411
+ def _show_extraction_summary(ontology: Ontology):
412
+ """Display extraction summary."""
413
+ table = Table(title="Extraction Summary", show_header=False)
414
+ table.add_column("Metric", style="cyan")
415
+ table.add_column("Value", style="green")
416
+
417
+ table.add_row("Ontology Name", ontology.name)
418
+ table.add_row("Version", ontology.version)
419
+ table.add_row("Entities", str(len(ontology.entities)))
420
+ table.add_row("Relationships", str(len(ontology.relationships)))
421
+ table.add_row("Business Rules", str(len(ontology.business_rules)))
422
+
423
+ total_props = sum(len(e.properties) for e in ontology.entities)
424
+ table.add_row("Total Properties", str(total_props))
425
+
426
+ console.print(table)
427
+
428
+
429
+ def _show_batch_results(results: dict):
430
+ """Display batch processing results."""
431
+ console.print()
432
+
433
+ # Success table
434
+ if results["success"]:
435
+ table = Table(title=f"[green]✓ Successfully Processed ({len(results['success'])} files)[/green]")
436
+ table.add_column("File", style="cyan")
437
+ table.add_column("Entities", justify="right")
438
+ table.add_column("Relationships", justify="right")
439
+ table.add_column("Output")
440
+
441
+ for r in results["success"]:
442
+ table.add_row(
443
+ Path(r["file"]).name,
444
+ str(r.get("entities", 0)),
445
+ str(r.get("relationships", 0)),
446
+ Path(r["output"]).name,
447
+ )
448
+
449
+ console.print(table)
450
+
451
+ # Failure table
452
+ if results["failed"]:
453
+ console.print()
454
+ table = Table(title=f"[red]✗ Failed ({len(results['failed'])} files)[/red]")
455
+ table.add_column("File", style="cyan")
456
+ table.add_column("Error", style="red")
457
+
458
+ for r in results["failed"]:
459
+ table.add_row(
460
+ Path(r["file"]).name,
461
+ r.get("error", "Unknown error")[:60],
462
+ )
463
+
464
+ console.print(table)
465
+
466
+ # Summary panel
467
+ total = len(results["success"]) + len(results["failed"])
468
+ success_rate = len(results["success"]) / total * 100 if total > 0 else 0
469
+
470
+ console.print()
471
+ console.print(Panel(
472
+ f"[bold]Total:[/bold] {total} files\n"
473
+ f"[green]Success:[/green] {len(results['success'])}\n"
474
+ f"[red]Failed:[/red] {len(results['failed'])}\n"
475
+ f"[cyan]Success Rate:[/cyan] {success_rate:.1f}%",
476
+ title="Batch Summary",
477
+ ))
478
+
479
+
480
+ def _show_analysis_summary(report):
481
+ """Display semantic debt analysis summary."""
482
+ console.print()
483
+
484
+ if not report.conflicts:
485
+ console.print(Panel(
486
+ "[green]No semantic conflicts detected![/green]\n"
487
+ "All ontologies are semantically consistent.",
488
+ title="Analysis Result",
489
+ ))
490
+ return
491
+
492
+ summary = report.summary
493
+ console.print(Panel(
494
+ f"[bold]Total Conflicts:[/bold] {summary.get('total_conflicts', 0)}\n"
495
+ f"[red]🔴 Critical:[/red] {summary.get('critical', 0)}\n"
496
+ f"[yellow]🟡 Warning:[/yellow] {summary.get('warning', 0)}\n"
497
+ f"[blue]🔵 Info:[/blue] {summary.get('info', 0)}",
498
+ title="Semantic Debt Summary",
499
+ ))
500
+
501
+
502
+ def _show_diff_summary(report):
503
+ """Display diff summary."""
504
+ console.print()
505
+
506
+ if not report.has_changes():
507
+ console.print(Panel(
508
+ "[green]No changes detected![/green]\n"
509
+ "Ontologies are identical.",
510
+ title="Diff Result",
511
+ ))
512
+ return
513
+
514
+ summary = report.summary
515
+ console.print(Panel(
516
+ f"[bold]Total Changes:[/bold] {summary.get('total_changes', 0)}\n"
517
+ f"[green]➕ Added:[/green] {summary.get('added', 0)}\n"
518
+ f"[red]➖ Removed:[/red] {summary.get('removed', 0)}\n"
519
+ f"[yellow]📝 Modified:[/yellow] {summary.get('modified', 0)}",
520
+ title="Diff Summary",
521
+ ))
522
+
523
+
524
+ def main():
525
+ """Entry point for CLI."""
526
+ cli()
527
+
528
+
529
+ if __name__ == "__main__":
530
+ main()