powerbi-ontology-extractor 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- powerbi_ontology_extractor-0.1.0/LICENSE +21 -0
- powerbi_ontology_extractor-0.1.0/MANIFEST.in +18 -0
- powerbi_ontology_extractor-0.1.0/PKG-INFO +507 -0
- powerbi_ontology_extractor-0.1.0/README.md +451 -0
- powerbi_ontology_extractor-0.1.0/cli/__init__.py +1 -0
- powerbi_ontology_extractor-0.1.0/cli/pbi_ontology_cli.py +286 -0
- powerbi_ontology_extractor-0.1.0/config/mcp_config.yaml +44 -0
- powerbi_ontology_extractor-0.1.0/examples/__init__.py +1 -0
- powerbi_ontology_extractor-0.1.0/examples/detect_semantic_conflicts.py +133 -0
- powerbi_ontology_extractor-0.1.0/examples/extract_supply_chain_dashboard.py +189 -0
- powerbi_ontology_extractor-0.1.0/examples/generate_customer_ontology.py +47 -0
- powerbi_ontology_extractor-0.1.0/examples/sample_ontology.json +200 -0
- powerbi_ontology_extractor-0.1.0/examples/sample_pbix/Adventure_Works_DW_2020.pbix +0 -0
- powerbi_ontology_extractor-0.1.0/examples/sample_pbix/Sales_Returns_Sample.pbix +0 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/__init__.py +38 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/analyzer.py +420 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/chat.py +303 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/cli.py +530 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/contract_builder.py +269 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/dax_parser.py +305 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/export/__init__.py +17 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/export/contract_to_owl.py +408 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/export/fabric_iq.py +243 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/export/fabric_iq_to_owl.py +463 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/export/json_schema.py +110 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/export/ontoguard.py +177 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/export/owl.py +522 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/extractor.py +368 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/mcp_config.py +237 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/mcp_models.py +166 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/mcp_server.py +1106 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/ontology_diff.py +776 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/ontology_generator.py +406 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/review.py +556 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/schema_mapper.py +369 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/semantic_debt.py +584 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/utils/__init__.py +13 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/utils/pbix_reader.py +558 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology/utils/visualizer.py +332 -0
- powerbi_ontology_extractor-0.1.0/powerbi_ontology_extractor.egg-info/SOURCES.txt +41 -0
- powerbi_ontology_extractor-0.1.0/pyproject.toml +159 -0
- powerbi_ontology_extractor-0.1.0/requirements.txt +32 -0
- powerbi_ontology_extractor-0.1.0/setup.cfg +4 -0
- powerbi_ontology_extractor-0.1.0/setup.py +44 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 PowerBI Ontology Extractor Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
include LICENSE
|
|
2
|
+
include README.md
|
|
3
|
+
include requirements.txt
|
|
4
|
+
include pyproject.toml
|
|
5
|
+
|
|
6
|
+
recursive-include powerbi_ontology *.py *.yaml *.json
|
|
7
|
+
recursive-include config *.yaml *.json
|
|
8
|
+
recursive-include examples *.py *.json *.pbix
|
|
9
|
+
|
|
10
|
+
exclude .gitignore
|
|
11
|
+
exclude .env
|
|
12
|
+
exclude *.pyc
|
|
13
|
+
|
|
14
|
+
prune tests
|
|
15
|
+
prune docs
|
|
16
|
+
prune .git
|
|
17
|
+
prune __pycache__
|
|
18
|
+
prune *.egg-info
|
|
@@ -0,0 +1,507 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: powerbi-ontology-extractor
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Extract semantic intelligence from Power BI .pbix files and convert to formal ontologies
|
|
5
|
+
Home-page: https://github.com/cloudbadal007/powerbi-ontology-extractor
|
|
6
|
+
Author: PowerBI Ontology Extractor Contributors
|
|
7
|
+
Author-email:
|
|
8
|
+
License: MIT
|
|
9
|
+
Project-URL: Homepage, https://github.com/vpakspace/powerbi-ontology-extractor
|
|
10
|
+
Project-URL: Documentation, https://github.com/vpakspace/powerbi-ontology-extractor#readme
|
|
11
|
+
Project-URL: Repository, https://github.com/vpakspace/powerbi-ontology-extractor
|
|
12
|
+
Project-URL: Issues, https://github.com/vpakspace/powerbi-ontology-extractor/issues
|
|
13
|
+
Project-URL: Changelog, https://github.com/vpakspace/powerbi-ontology-extractor/blob/main/CHANGELOG.md
|
|
14
|
+
Keywords: powerbi,ontology,semantic-model,dax,fabric-iq,ontoguard,ai-agents,business-intelligence
|
|
15
|
+
Classifier: Development Status :: 3 - Alpha
|
|
16
|
+
Classifier: Intended Audience :: Developers
|
|
17
|
+
Classifier: Intended Audience :: Science/Research
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
21
|
+
Classifier: Programming Language :: Python :: 3
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
26
|
+
Requires-Python: >=3.9
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
License-File: LICENSE
|
|
29
|
+
Requires-Dist: pydantic>=2.0.0
|
|
30
|
+
Requires-Dist: networkx>=3.0
|
|
31
|
+
Requires-Dist: pyparsing>=3.0.0
|
|
32
|
+
Requires-Dist: pandas>=2.0.0
|
|
33
|
+
Requires-Dist: click>=8.0.0
|
|
34
|
+
Requires-Dist: rich>=13.0.0
|
|
35
|
+
Requires-Dist: matplotlib>=3.7.0
|
|
36
|
+
Requires-Dist: plotly>=5.14.0
|
|
37
|
+
Requires-Dist: rdflib>=6.3.0
|
|
38
|
+
Requires-Dist: jsonschema>=4.17.0
|
|
39
|
+
Requires-Dist: pyyaml>=6.0
|
|
40
|
+
Requires-Dist: pbixray>=0.5.0
|
|
41
|
+
Requires-Dist: fastmcp>=0.1.0
|
|
42
|
+
Requires-Dist: openai>=1.0.0
|
|
43
|
+
Provides-Extra: dev
|
|
44
|
+
Requires-Dist: pytest>=7.4.0; extra == "dev"
|
|
45
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
46
|
+
Requires-Dist: pytest-mock>=3.11.0; extra == "dev"
|
|
47
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
48
|
+
Requires-Dist: flake8>=6.0.0; extra == "dev"
|
|
49
|
+
Requires-Dist: mypy>=1.5.0; extra == "dev"
|
|
50
|
+
Requires-Dist: isort>=5.12.0; extra == "dev"
|
|
51
|
+
Requires-Dist: sphinx>=7.0.0; extra == "dev"
|
|
52
|
+
Requires-Dist: sphinx-rtd-theme>=1.3.0; extra == "dev"
|
|
53
|
+
Dynamic: home-page
|
|
54
|
+
Dynamic: license-file
|
|
55
|
+
Dynamic: requires-python
|
|
56
|
+
|
|
57
|
+
# PowerBI Ontology Extractor
|
|
58
|
+
|
|
59
|
+
<div align="center">
|
|
60
|
+
|
|
61
|
+

|
|
62
|
+
|
|
63
|
+
**Transform 20 million Power BI dashboards into AI-ready ontologies**
|
|
64
|
+
|
|
65
|
+
[](https://github.com/vpakspace/powerbi-ontology-extractor)
|
|
66
|
+
[](https://github.com/vpakspace/powerbi-ontology-extractor)
|
|
67
|
+
[](https://www.python.org/downloads/)
|
|
68
|
+
[](https://opensource.org/licenses/MIT)
|
|
69
|
+
|
|
70
|
+
[Installation](#installation) • [Quick Start](#-quick-start) • [Features](#-key-features) • [Documentation](#-documentation) • [Contributing](#-contributing)
|
|
71
|
+
|
|
72
|
+
</div>
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
## 🎯 The Problem
|
|
77
|
+
|
|
78
|
+
Enterprises have **20+ million Power BI semantic models** that are actually **informal ontologies** trapped in proprietary .pbix files.
|
|
79
|
+
|
|
80
|
+
- **The Challenge**: Each Power BI model contains entities, relationships, and business logic—but AI agents can't access this semantic intelligence
|
|
81
|
+
- **The Cost**: Enterprises spend $50K-$200K per semantic definition to reconcile conflicts across dashboards
|
|
82
|
+
- **The $4.6M Mistake**: A logistics company lost $4.6M when an AI agent used a renamed column (`Warehouse_Location` → `FacilityID`) because there was no semantic binding validation
|
|
83
|
+
|
|
84
|
+
## 💡 The Solution
|
|
85
|
+
|
|
86
|
+
PowerBI Ontology Extractor **unlocks the hidden ontologies** in your Power BI dashboards and transforms them into formal, AI-ready ontologies.
|
|
87
|
+
|
|
88
|
+
```
|
|
89
|
+
┌─────────────────────┐ ┌──────────────────────┐ ┌─────────────────────────────┐
|
|
90
|
+
│ Power BI .pbix │────▶│ Ontology Extractor │────▶│ OntoGuard │
|
|
91
|
+
│ (20M+ dashboards) │ │ (this project) │ │ Semantic Firewall │
|
|
92
|
+
└─────────────────────┘ └──────────────────────┘ └─────────────────────────────┘
|
|
93
|
+
│ │
|
|
94
|
+
│ OWL/Fabric IQ │ Semantic Validation
|
|
95
|
+
▼ ▼
|
|
96
|
+
┌──────────────────────┐ ┌─────────────────────────────┐
|
|
97
|
+
│ Semantic Contract │────▶│ Universal Agent Connector │
|
|
98
|
+
│ (permissions) │ │ AI Agent Infrastructure │
|
|
99
|
+
└──────────────────────┘ └─────────────────────────────┘
|
|
100
|
+
│
|
|
101
|
+
▼
|
|
102
|
+
┌─────────────────────────────┐
|
|
103
|
+
│ AI Agents │
|
|
104
|
+
│ (Claude, GPT, etc.) │
|
|
105
|
+
└─────────────────────────────┘
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
**30-minute workflow**:
|
|
109
|
+
```
|
|
110
|
+
Power BI (.pbix) → Ontology Extractor → OntoGuard → Universal Agent Connector → AI Agent
|
|
111
|
+
10 min 10 min 5 min 3 min 2 min
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
## 🚀 Quick Start
|
|
117
|
+
|
|
118
|
+
### Installation
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
# Clone repository
|
|
122
|
+
git clone https://github.com/vpakspace/powerbi-ontology-extractor.git
|
|
123
|
+
cd powerbi-ontology-extractor
|
|
124
|
+
|
|
125
|
+
# Install dependencies
|
|
126
|
+
pip install -r requirements.txt
|
|
127
|
+
pip install -e .
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### Basic Usage
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
from powerbi_ontology import PowerBIExtractor, OntologyGenerator
|
|
134
|
+
|
|
135
|
+
# Step 1: Extract semantic model from Power BI
|
|
136
|
+
extractor = PowerBIExtractor("path/to/dashboard.pbix")
|
|
137
|
+
semantic_model = extractor.extract()
|
|
138
|
+
|
|
139
|
+
# Step 2: Generate formal ontology
|
|
140
|
+
generator = OntologyGenerator(semantic_model)
|
|
141
|
+
ontology = generator.generate()
|
|
142
|
+
|
|
143
|
+
print(f"✅ Extracted {len(ontology.entities)} entities")
|
|
144
|
+
print(f"✅ Found {len(ontology.relationships)} relationships")
|
|
145
|
+
print(f"✅ Generated {len(ontology.business_rules)} business rules")
|
|
146
|
+
|
|
147
|
+
# Step 3: Export to OWL for OntoGuard
|
|
148
|
+
from powerbi_ontology.export import OWLExporter
|
|
149
|
+
|
|
150
|
+
exporter = OWLExporter(ontology)
|
|
151
|
+
exporter.save("ontology.owl")
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### Visual Ontology Editor (No-Code UI)
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
# Start Streamlit UI
|
|
158
|
+
streamlit run ontology_editor.py --server.port 8503
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
**Features**:
|
|
162
|
+
- 📂 Load from .pbix files or JSON
|
|
163
|
+
- 📦 Edit entities with properties and constraints
|
|
164
|
+
- 🔗 Manage relationships between entities
|
|
165
|
+
- 🔐 Configure permission matrix (RBAC)
|
|
166
|
+
- 📜 Add business rules with classification
|
|
167
|
+
- 🦉 Preview and export OWL
|
|
168
|
+
- 🔀 Diff & Merge ontology versions
|
|
169
|
+
- 💬 **AI Chat** - Ask questions about your ontology!
|
|
170
|
+
|
|
171
|
+
---
|
|
172
|
+
|
|
173
|
+
## 🔥 Key Features
|
|
174
|
+
|
|
175
|
+
### 1. Automatic Extraction (PBIXRay)
|
|
176
|
+
- ✅ Reads Power BI .pbix files (binary DataModel via PBIXRay)
|
|
177
|
+
- ✅ Extracts tables, columns, relationships, hierarchies
|
|
178
|
+
- ✅ Parses DAX measures and calculated columns
|
|
179
|
+
- ✅ Captures Row-Level Security (RLS) rules
|
|
180
|
+
- ✅ Fallback to JSON model.bim for legacy files
|
|
181
|
+
|
|
182
|
+
### 2. DAX to Business Rules
|
|
183
|
+
- ✅ Parses DAX formulas automatically
|
|
184
|
+
- ✅ Extracts conditional logic (IF, SWITCH, CALCULATE)
|
|
185
|
+
- ✅ Converts filters to business rules
|
|
186
|
+
- ✅ Classifies measure types (aggregation, conditional, time intelligence)
|
|
187
|
+
|
|
188
|
+
### 3. Ontology Generation (70% Automated)
|
|
189
|
+
- ✅ Entities from tables
|
|
190
|
+
- ✅ Properties from columns (with data types)
|
|
191
|
+
- ✅ Relationships from foreign keys (with cardinality)
|
|
192
|
+
- ✅ Business rules from DAX measures
|
|
193
|
+
- ✅ Constraints (required, unique, range, regex, enum)
|
|
194
|
+
- ✅ Pattern detection (date tables, dimensions, facts)
|
|
195
|
+
|
|
196
|
+
### 4. Multi-Format Export
|
|
197
|
+
| Format | Use Case |
|
|
198
|
+
|--------|----------|
|
|
199
|
+
| **OWL/RDF** | OntoGuard semantic validation |
|
|
200
|
+
| **Fabric IQ** | Microsoft Fabric deployment |
|
|
201
|
+
| **JSON** | Universal agent connector |
|
|
202
|
+
| **Semantic Contract** | Role-based AI agent permissions |
|
|
203
|
+
|
|
204
|
+
### 5. Schema Drift Detection (Prevents $4.6M Mistakes!)
|
|
205
|
+
- ✅ Validates schema bindings
|
|
206
|
+
- ✅ Detects column renames/deletions
|
|
207
|
+
- ✅ Type normalization (varchar→text, int→integer)
|
|
208
|
+
- ✅ Severity levels: CRITICAL, WARNING, INFO
|
|
209
|
+
- ✅ Auto-fix suggestions
|
|
210
|
+
|
|
211
|
+
### 6. Multi-Dashboard Semantic Debt Analysis
|
|
212
|
+
- ✅ Analyzes multiple Power BI dashboards
|
|
213
|
+
- ✅ Detects conflicting definitions ("Revenue" defined differently)
|
|
214
|
+
- ✅ 5 conflict types: MEASURE, TYPE, ENTITY, RELATIONSHIP, RULE
|
|
215
|
+
- ✅ Generates consolidation reports
|
|
216
|
+
|
|
217
|
+
### 7. Ontology Diff & Merge
|
|
218
|
+
- ✅ Git-like diff between ontology versions
|
|
219
|
+
- ✅ Detect added/removed/modified elements
|
|
220
|
+
- ✅ Three-way merge (base, ours, theirs)
|
|
221
|
+
- ✅ Conflict detection and resolution strategies
|
|
222
|
+
|
|
223
|
+
### 8. Collaborative Review Workflow
|
|
224
|
+
- ✅ Comments on entities/properties/rules
|
|
225
|
+
- ✅ Reply and resolve threads
|
|
226
|
+
- ✅ Approval workflow: draft → review → approved → published
|
|
227
|
+
- ✅ Audit trail of all actions
|
|
228
|
+
|
|
229
|
+
### 9. CLI Tool for Automation
|
|
230
|
+
```bash
|
|
231
|
+
# Extract single .pbix file
|
|
232
|
+
pbix2owl extract -i dashboard.pbix -o ontology.owl
|
|
233
|
+
|
|
234
|
+
# Batch process directory (8 parallel workers)
|
|
235
|
+
pbix2owl batch -i ./dashboards/ -o ./ontologies/ -w 8 --recursive
|
|
236
|
+
|
|
237
|
+
# Analyze semantic debt
|
|
238
|
+
pbix2owl analyze -i ./ontologies/ -o report.md
|
|
239
|
+
|
|
240
|
+
# Compare versions (diff)
|
|
241
|
+
pbix2owl diff -s v1.json -t v2.json -o changelog.md
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
### 10. AI-Powered Ontology Chat 🆕
|
|
245
|
+
- ✅ Ask questions about loaded ontology in natural language
|
|
246
|
+
- ✅ OpenAI API integration (gpt-4o-mini)
|
|
247
|
+
- ✅ Role-based context (Admin/Analyst/Viewer)
|
|
248
|
+
- ✅ Bilingual support (Russian/English)
|
|
249
|
+
- ✅ Suggested questions based on ontology content
|
|
250
|
+
|
|
251
|
+
**Example questions**:
|
|
252
|
+
- "What entities exist in the ontology?"
|
|
253
|
+
- "How are Customer and Sales related?"
|
|
254
|
+
- "Show all DAX measures"
|
|
255
|
+
- "What permissions does Analyst role have?"
|
|
256
|
+
|
|
257
|
+
---
|
|
258
|
+
|
|
259
|
+
## 📊 Real-World Example
|
|
260
|
+
|
|
261
|
+
**Tested with Microsoft official samples**:
|
|
262
|
+
|
|
263
|
+
| File | Size | Entities | Relationships | DAX Measures | OWL Triples |
|
|
264
|
+
|------|------|----------|---------------|--------------|-------------|
|
|
265
|
+
| Sales_Returns_Sample.pbix | 6.3 MB | 15 | 9 | 58 | 1,734 |
|
|
266
|
+
| Adventure_Works_DW_2020.pbix | 7.8 MB | 11 | 13 | 0 | 1,083 |
|
|
267
|
+
|
|
268
|
+
```python
|
|
269
|
+
from powerbi_ontology import PowerBIExtractor, OntologyGenerator
|
|
270
|
+
from powerbi_ontology.export import OWLExporter
|
|
271
|
+
|
|
272
|
+
# Extract from Power BI
|
|
273
|
+
extractor = PowerBIExtractor("Sales_Returns_Sample.pbix")
|
|
274
|
+
model = extractor.extract()
|
|
275
|
+
|
|
276
|
+
# Generate ontology
|
|
277
|
+
ontology = OntologyGenerator(model).generate()
|
|
278
|
+
|
|
279
|
+
# Export to OWL (for OntoGuard)
|
|
280
|
+
exporter = OWLExporter(ontology, default_roles=["Admin", "Analyst", "Viewer"])
|
|
281
|
+
exporter.save("sales_ontology.owl")
|
|
282
|
+
|
|
283
|
+
# Summary
|
|
284
|
+
summary = exporter.get_export_summary()
|
|
285
|
+
print(f"Classes: {summary['classes']}")
|
|
286
|
+
print(f"Properties: {summary['datatype_properties']}")
|
|
287
|
+
print(f"Action Rules: {summary['action_rules']}") # CRUD per entity × role
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
---
|
|
291
|
+
|
|
292
|
+
## 🔗 Integration Ecosystem
|
|
293
|
+
|
|
294
|
+
### OntoGuard (Semantic Firewall)
|
|
295
|
+
|
|
296
|
+
```python
|
|
297
|
+
from powerbi_ontology.export import OWLExporter
|
|
298
|
+
|
|
299
|
+
exporter = OWLExporter(ontology)
|
|
300
|
+
exporter.save("ontology.owl")
|
|
301
|
+
|
|
302
|
+
# Use with OntoGuard for AI agent validation
|
|
303
|
+
# github.com/vpakspace/ontoguard-ai
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
### Universal Agent Connector (MCP)
|
|
307
|
+
|
|
308
|
+
```python
|
|
309
|
+
from powerbi_ontology import ContractBuilder
|
|
310
|
+
from powerbi_ontology.export import ContractToOWLConverter
|
|
311
|
+
|
|
312
|
+
# Create semantic contract for AI agent
|
|
313
|
+
builder = ContractBuilder(ontology)
|
|
314
|
+
contract = builder.build_contract(
|
|
315
|
+
agent_name="SalesAnalyst",
|
|
316
|
+
permissions={
|
|
317
|
+
"read": ["Customer", "Sales", "Product"],
|
|
318
|
+
"write": {"Sales": ["Status"]},
|
|
319
|
+
"execute": ["GenerateReport"]
|
|
320
|
+
}
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
# Export for MCP
|
|
324
|
+
converter = ContractToOWLConverter(contract)
|
|
325
|
+
converter.save("sales_agent_contract.owl")
|
|
326
|
+
|
|
327
|
+
# Use with Universal Agent Connector
|
|
328
|
+
# github.com/vpakspace/universal-agent-connector
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
### Microsoft Fabric IQ
|
|
332
|
+
|
|
333
|
+
```python
|
|
334
|
+
from powerbi_ontology.export import FabricIQExporter
|
|
335
|
+
|
|
336
|
+
exporter = FabricIQExporter(ontology)
|
|
337
|
+
fabric_json = exporter.export()
|
|
338
|
+
|
|
339
|
+
# Deploy as Ontology Item to OneLake
|
|
340
|
+
```
|
|
341
|
+
|
|
342
|
+
---
|
|
343
|
+
|
|
344
|
+
## 🧪 Testing
|
|
345
|
+
|
|
346
|
+
```bash
|
|
347
|
+
# Run all tests (340 tests, 82% coverage)
|
|
348
|
+
pytest
|
|
349
|
+
|
|
350
|
+
# Run with coverage report
|
|
351
|
+
pytest --cov=powerbi_ontology --cov-report=html
|
|
352
|
+
|
|
353
|
+
# Run specific test module
|
|
354
|
+
pytest tests/test_owl_exporter.py -v
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
**Test Statistics**:
|
|
358
|
+
- 340 tests passing
|
|
359
|
+
- 82% code coverage
|
|
360
|
+
- E2E tests with real .pbix files
|
|
361
|
+
- OntoGuard integration tests
|
|
362
|
+
|
|
363
|
+
---
|
|
364
|
+
|
|
365
|
+
## 📁 Project Structure
|
|
366
|
+
|
|
367
|
+
```
|
|
368
|
+
powerbi-ontology-extractor/
|
|
369
|
+
├── powerbi_ontology/
|
|
370
|
+
│ ├── __init__.py
|
|
371
|
+
│ ├── extractor.py # PowerBIExtractor
|
|
372
|
+
│ ├── ontology_generator.py # OntologyGenerator
|
|
373
|
+
│ ├── pbix_reader.py # PBIXRay integration
|
|
374
|
+
│ ├── dax_parser.py # DAX formula parsing
|
|
375
|
+
│ ├── semantic_debt.py # Multi-dashboard analysis
|
|
376
|
+
│ ├── ontology_diff.py # Diff & Merge
|
|
377
|
+
│ ├── review.py # Collaborative review
|
|
378
|
+
│ ├── chat.py # AI Chat (OpenAI)
|
|
379
|
+
│ ├── cli.py # CLI commands
|
|
380
|
+
│ ├── export/
|
|
381
|
+
│ │ ├── owl.py # OWL/RDF export
|
|
382
|
+
│ │ ├── fabric_iq.py # Fabric IQ export
|
|
383
|
+
│ │ ├── fabric_iq_to_owl.py
|
|
384
|
+
│ │ └── contract_to_owl.py
|
|
385
|
+
│ └── utils/
|
|
386
|
+
│ ├── visualizer.py
|
|
387
|
+
│ └── validators.py
|
|
388
|
+
├── ontology_editor.py # Streamlit UI (1300+ lines)
|
|
389
|
+
├── examples/
|
|
390
|
+
│ ├── sample_pbix/ # Microsoft official samples
|
|
391
|
+
│ └── sample_ontology.json
|
|
392
|
+
├── tests/ # 340 tests
|
|
393
|
+
├── requirements.txt
|
|
394
|
+
└── README.md
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
---
|
|
398
|
+
|
|
399
|
+
## 📊 Project Status
|
|
400
|
+
|
|
401
|
+
| Feature | Status | Coverage |
|
|
402
|
+
|---------|--------|----------|
|
|
403
|
+
| PBIX Extraction (PBIXRay) | ✅ Complete | 51% |
|
|
404
|
+
| DAX Parser | ✅ Complete | 73% |
|
|
405
|
+
| Ontology Generator | ✅ Complete | 83% |
|
|
406
|
+
| OWL Exporter | ✅ Complete | 95% |
|
|
407
|
+
| Fabric IQ Exporter | ✅ Complete | 97% |
|
|
408
|
+
| Contract Builder | ✅ Complete | 98% |
|
|
409
|
+
| Schema Drift Detection | ✅ Complete | 84% |
|
|
410
|
+
| Semantic Debt Analysis | ✅ Complete | 84% |
|
|
411
|
+
| Ontology Diff & Merge | ✅ Complete | 84% |
|
|
412
|
+
| Review Workflow | ✅ Complete | 93% |
|
|
413
|
+
| CLI Tool | ✅ Complete | 60% |
|
|
414
|
+
| Visual Editor (Streamlit) | ✅ Complete | - |
|
|
415
|
+
| AI Chat (OpenAI) | ✅ Complete | - |
|
|
416
|
+
|
|
417
|
+
**Overall**: 340 tests, 82% coverage
|
|
418
|
+
|
|
419
|
+
---
|
|
420
|
+
|
|
421
|
+
## 🛠️ Development Setup
|
|
422
|
+
|
|
423
|
+
```bash
|
|
424
|
+
# Clone repository
|
|
425
|
+
git clone https://github.com/vpakspace/powerbi-ontology-extractor.git
|
|
426
|
+
cd powerbi-ontology-extractor
|
|
427
|
+
|
|
428
|
+
# Create virtual environment
|
|
429
|
+
python -m venv venv
|
|
430
|
+
source venv/bin/activate # Windows: venv\Scripts\activate
|
|
431
|
+
|
|
432
|
+
# Install dependencies
|
|
433
|
+
pip install -r requirements.txt
|
|
434
|
+
pip install -e .
|
|
435
|
+
|
|
436
|
+
# Run tests
|
|
437
|
+
pytest
|
|
438
|
+
|
|
439
|
+
# Start Streamlit UI
|
|
440
|
+
streamlit run ontology_editor.py --server.port 8503
|
|
441
|
+
```
|
|
442
|
+
|
|
443
|
+
### Environment Variables
|
|
444
|
+
|
|
445
|
+
Create `.env` file for AI Chat:
|
|
446
|
+
```bash
|
|
447
|
+
# Required for Ontology Chat
|
|
448
|
+
OPENAI_API_KEY=your-openai-api-key
|
|
449
|
+
|
|
450
|
+
# Optional: Model selection (default: gpt-4o-mini)
|
|
451
|
+
# OPENAI_MODEL=gpt-4o-mini
|
|
452
|
+
|
|
453
|
+
# Optional: Local models via Ollama
|
|
454
|
+
# OLLAMA_BASE_URL=http://localhost:11434/v1
|
|
455
|
+
```
|
|
456
|
+
|
|
457
|
+
---
|
|
458
|
+
|
|
459
|
+
## 🤝 Contributing
|
|
460
|
+
|
|
461
|
+
Contributions welcome! See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
462
|
+
|
|
463
|
+
**Ways to contribute**:
|
|
464
|
+
- 🐛 Report bugs via [GitHub Issues](https://github.com/vpakspace/powerbi-ontology-extractor/issues)
|
|
465
|
+
- 💡 Suggest features
|
|
466
|
+
- 📝 Improve documentation
|
|
467
|
+
- 🔧 Submit pull requests
|
|
468
|
+
- ⭐ Star the repository
|
|
469
|
+
|
|
470
|
+
---
|
|
471
|
+
|
|
472
|
+
## 📄 License
|
|
473
|
+
|
|
474
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
475
|
+
|
|
476
|
+
---
|
|
477
|
+
|
|
478
|
+
## 🔗 Related Projects
|
|
479
|
+
|
|
480
|
+
| Project | Description |
|
|
481
|
+
|---------|-------------|
|
|
482
|
+
| [OntoGuard AI](https://github.com/vpakspace/ontoguard-ai) | Semantic Firewall for AI Agents |
|
|
483
|
+
| [Universal Agent Connector](https://github.com/vpakspace/universal-agent-connector) | MCP Infrastructure + Streamlit UI |
|
|
484
|
+
|
|
485
|
+
---
|
|
486
|
+
|
|
487
|
+
## 📞 Contact
|
|
488
|
+
|
|
489
|
+
- 🐛 **Issues**: [GitHub Issues](https://github.com/vpakspace/powerbi-ontology-extractor/issues)
|
|
490
|
+
- 💬 **Discussions**: [GitHub Discussions](https://github.com/vpakspace/powerbi-ontology-extractor/discussions)
|
|
491
|
+
|
|
492
|
+
---
|
|
493
|
+
|
|
494
|
+
<div align="center">
|
|
495
|
+
|
|
496
|
+
**Ready to unlock the semantic intelligence in your Power BI dashboards?** 🚀
|
|
497
|
+
|
|
498
|
+
```bash
|
|
499
|
+
git clone https://github.com/vpakspace/powerbi-ontology-extractor.git
|
|
500
|
+
cd powerbi-ontology-extractor
|
|
501
|
+
pip install -r requirements.txt
|
|
502
|
+
streamlit run ontology_editor.py
|
|
503
|
+
```
|
|
504
|
+
|
|
505
|
+
**Star ⭐ this repo if you find it useful!**
|
|
506
|
+
|
|
507
|
+
</div>
|