powerbi-ontology-extractor 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,269 @@
1
+ """
2
+ Contract Builder
3
+
4
+ Builds semantic contracts for AI agents from ontologies.
5
+ """
6
+
7
+ import logging
8
+ from dataclasses import dataclass, field
9
+ from typing import Dict, List, Optional
10
+
11
+ from powerbi_ontology.ontology_generator import Ontology, BusinessRule, Constraint
12
+ from powerbi_ontology.dax_parser import DAXParser
13
+ from powerbi_ontology.extractor import SemanticModel, Measure
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ @dataclass
19
+ class ContractPermissions:
20
+ """Permissions for an AI agent contract."""
21
+ read_entities: List[str] = field(default_factory=list)
22
+ write_properties: Dict[str, List[str]] = field(default_factory=dict) # entity -> properties
23
+ executable_actions: List[str] = field(default_factory=list)
24
+ required_role: str = ""
25
+ context_filters: Dict[str, str] = field(default_factory=dict) # entity -> filter condition
26
+
27
+
28
+ @dataclass
29
+ class AuditConfig:
30
+ """Audit configuration for contract."""
31
+ log_reads: bool = True
32
+ log_writes: bool = True
33
+ log_actions: bool = True
34
+ alert_on_violation: bool = True
35
+
36
+
37
+ @dataclass
38
+ class SemanticContract:
39
+ """Semantic contract for an AI agent."""
40
+ agent_name: str
41
+ ontology_version: str
42
+ permissions: ContractPermissions
43
+ business_rules: List[BusinessRule] = field(default_factory=list)
44
+ validation_constraints: List[Constraint] = field(default_factory=list)
45
+ audit_settings: AuditConfig = field(default_factory=AuditConfig)
46
+ metadata: Dict = field(default_factory=dict)
47
+
48
+
49
+ class ContractBuilder:
50
+ """
51
+ Builds semantic contracts for AI agents from ontologies.
52
+
53
+ AI agents need semantic contracts that define:
54
+ - What entities they can read
55
+ - What properties they can write
56
+ - What actions they can execute
57
+ - What business rules govern their behavior
58
+ """
59
+
60
+ def __init__(self, ontology: Ontology):
61
+ """
62
+ Initialize contract builder.
63
+
64
+ Args:
65
+ ontology: The ontology to build contracts from
66
+ """
67
+ self.ontology = ontology
68
+ self.dax_parser = DAXParser()
69
+
70
+ def build_contract(
71
+ self,
72
+ agent_name: str,
73
+ permissions: Dict[str, any]
74
+ ) -> SemanticContract:
75
+ """
76
+ Build a semantic contract for an AI agent.
77
+
78
+ Args:
79
+ agent_name: Name of the AI agent
80
+ permissions: Dictionary with read, write, execute, role keys
81
+
82
+ Returns:
83
+ SemanticContract
84
+ """
85
+ logger.info(f"Building contract for agent: {agent_name}")
86
+
87
+ contract_permissions = ContractPermissions(
88
+ read_entities=permissions.get("read", []),
89
+ write_properties=permissions.get("write", {}),
90
+ executable_actions=permissions.get("execute", []),
91
+ required_role=permissions.get("role", ""),
92
+ context_filters=permissions.get("filters", {})
93
+ )
94
+
95
+ contract = SemanticContract(
96
+ agent_name=agent_name,
97
+ ontology_version=self.ontology.version,
98
+ permissions=contract_permissions,
99
+ metadata={
100
+ "created_date": str(__import__("datetime").datetime.now().isoformat()),
101
+ "ontology_source": self.ontology.source
102
+ }
103
+ )
104
+
105
+ # Add relevant business rules
106
+ self._add_relevant_business_rules(contract)
107
+
108
+ # Add validation constraints
109
+ self.add_validation_constraints(contract)
110
+
111
+ return contract
112
+
113
+ def generate_permissions_from_dashboard(
114
+ self,
115
+ semantic_model: SemanticModel
116
+ ) -> Dict[str, any]:
117
+ """
118
+ Generate suggested permissions from a Power BI dashboard.
119
+
120
+ Analyzes what entities the dashboard uses and suggests appropriate permissions.
121
+
122
+ Args:
123
+ semantic_model: Semantic model from Power BI dashboard
124
+
125
+ Returns:
126
+ Dictionary with suggested permissions
127
+ """
128
+ # Get all entities used in the dashboard
129
+ entities_used = set()
130
+ for entity in semantic_model.entities:
131
+ entities_used.add(entity.name)
132
+
133
+ # Get entities from relationships
134
+ for rel in semantic_model.relationships:
135
+ entities_used.add(rel.from_entity)
136
+ entities_used.add(rel.to_entity)
137
+
138
+ # Get entities from measures
139
+ for measure in semantic_model.measures:
140
+ parsed = self.dax_parser.parse_measure(measure.name, measure.dax_formula)
141
+ for dep in parsed.dependencies:
142
+ if '.' in dep:
143
+ entity = dep.split('.')[0]
144
+ entities_used.add(entity)
145
+
146
+ return {
147
+ "read": list(entities_used),
148
+ "write": {}, # Dashboard typically doesn't write
149
+ "execute": [], # Dashboard typically doesn't execute actions
150
+ "role": "Viewer" # Default role
151
+ }
152
+
153
+ def add_business_rules(
154
+ self,
155
+ contract: SemanticContract,
156
+ rules: List[BusinessRule]
157
+ ):
158
+ """
159
+ Add business rules to contract.
160
+
161
+ Args:
162
+ contract: SemanticContract to add rules to
163
+ rules: List of BusinessRule objects
164
+ """
165
+ contract.business_rules.extend(rules)
166
+ logger.info(f"Added {len(rules)} business rules to contract")
167
+
168
+ def add_validation_constraints(self, contract: SemanticContract):
169
+ """
170
+ Add validation constraints from ontology to contract.
171
+
172
+ Args:
173
+ contract: SemanticContract to add constraints to
174
+ """
175
+ # Get entities that agent can read/write
176
+ relevant_entities = set(contract.permissions.read_entities)
177
+ relevant_entities.update(contract.permissions.write_properties.keys())
178
+
179
+ constraints = []
180
+ for entity_name in relevant_entities:
181
+ entity = next(
182
+ (e for e in self.ontology.entities if e.name == entity_name),
183
+ None
184
+ )
185
+ if entity:
186
+ # Add constraints from entity properties
187
+ for prop in entity.properties:
188
+ constraints.extend(prop.constraints)
189
+ # Add constraints from entity
190
+ constraints.extend(entity.constraints)
191
+
192
+ contract.validation_constraints = constraints
193
+ logger.info(f"Added {len(constraints)} validation constraints to contract")
194
+
195
+ def export_contract(self, contract: SemanticContract, format: str = "json") -> str:
196
+ """
197
+ Export contract to different formats.
198
+
199
+ Args:
200
+ contract: SemanticContract to export
201
+ format: Export format ("json", "ontoguard", "fabric_iq")
202
+
203
+ Returns:
204
+ Exported contract as string
205
+ """
206
+ if format == "json":
207
+ import json
208
+ return json.dumps(self._contract_to_dict(contract), indent=2)
209
+ elif format == "ontoguard":
210
+ from powerbi_ontology.export.ontoguard import OntoGuardExporter
211
+ # Convert contract to ontology-like structure for export
212
+ return OntoGuardExporter(self.ontology).export_contract(contract)
213
+ elif format == "fabric_iq":
214
+ from powerbi_ontology.export.fabric_iq import FabricIQExporter
215
+ return FabricIQExporter(self.ontology).export_contract(contract)
216
+ else:
217
+ raise ValueError(f"Unknown export format: {format}")
218
+
219
+ def _add_relevant_business_rules(self, contract: SemanticContract):
220
+ """Add business rules relevant to the agent's permissions."""
221
+ relevant_entities = set(contract.permissions.read_entities)
222
+ relevant_entities.update(contract.permissions.write_properties.keys())
223
+
224
+ relevant_rules = [
225
+ rule for rule in self.ontology.business_rules
226
+ if rule.entity in relevant_entities
227
+ ]
228
+
229
+ contract.business_rules = relevant_rules
230
+ logger.info(f"Added {len(relevant_rules)} relevant business rules")
231
+
232
+ def _contract_to_dict(self, contract: SemanticContract) -> Dict:
233
+ """Convert contract to dictionary for JSON export."""
234
+ return {
235
+ "agent_name": contract.agent_name,
236
+ "ontology_version": contract.ontology_version,
237
+ "permissions": {
238
+ "read_entities": contract.permissions.read_entities,
239
+ "write_properties": contract.permissions.write_properties,
240
+ "executable_actions": contract.permissions.executable_actions,
241
+ "required_role": contract.permissions.required_role,
242
+ "context_filters": contract.permissions.context_filters
243
+ },
244
+ "business_rules": [
245
+ {
246
+ "name": rule.name,
247
+ "entity": rule.entity,
248
+ "condition": rule.condition,
249
+ "action": rule.action,
250
+ "description": rule.description
251
+ }
252
+ for rule in contract.business_rules
253
+ ],
254
+ "validation_constraints": [
255
+ {
256
+ "type": constraint.type,
257
+ "value": str(constraint.value),
258
+ "message": constraint.message
259
+ }
260
+ for constraint in contract.validation_constraints
261
+ ],
262
+ "audit_settings": {
263
+ "log_reads": contract.audit_settings.log_reads,
264
+ "log_writes": contract.audit_settings.log_writes,
265
+ "log_actions": contract.audit_settings.log_actions,
266
+ "alert_on_violation": contract.audit_settings.alert_on_violation
267
+ },
268
+ "metadata": contract.metadata
269
+ }
@@ -0,0 +1,305 @@
1
+ """
2
+ DAX Parser
3
+
4
+ Parses DAX formulas to extract business rules and semantic meaning.
5
+ """
6
+
7
+ import logging
8
+ import re
9
+ from dataclasses import dataclass
10
+ from typing import List, Optional, Set
11
+
12
+ from pyparsing import (
13
+ CaselessKeyword, Word, alphanums, nums, oneOf, opAssoc, infixNotation,
14
+ ParseException, Suppress, Optional as Opt, Group
15
+ )
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @dataclass
21
+ class BusinessRule:
22
+ """Represents a business rule extracted from DAX."""
23
+ name: str
24
+ condition: str
25
+ action: str = ""
26
+ priority: int = 1
27
+ description: str = ""
28
+ entity: str = ""
29
+ classification: str = ""
30
+
31
+
32
+ @dataclass
33
+ class ParsedRule:
34
+ """Parsed DAX measure with extracted information."""
35
+ measure_name: str
36
+ dax_formula: str
37
+ business_rules: List[BusinessRule]
38
+ dependencies: List[str]
39
+ measure_type: str # AGGREGATION, CALCULATION, CONDITIONAL, FILTER, TIME_INTELLIGENCE
40
+
41
+
42
+ class DAXParser:
43
+ """
44
+ Parses DAX formulas to extract business logic and semantic meaning.
45
+
46
+ Background: DAX measures contain business logic that should be extracted
47
+ as formal business rules. For example:
48
+ - HighRiskCustomers = CALCULATE(COUNT(...), RiskScore > 80)
49
+ - This becomes: BusinessRule(condition="RiskScore > 80", classification="HighRisk")
50
+ """
51
+
52
+ def __init__(self):
53
+ """Initialize DAX parser."""
54
+ self._setup_parser()
55
+
56
+ def _setup_parser(self):
57
+ """Setup pyparsing grammar for DAX."""
58
+ # Basic tokens
59
+ identifier = Word(alphanums + "_")
60
+ number = Word(nums + ".-")
61
+
62
+ # DAX functions
63
+ calculate = CaselessKeyword("CALCULATE")
64
+ sum_func = CaselessKeyword("SUM")
65
+ count_func = CaselessKeyword("COUNT")
66
+ distinctcount = CaselessKeyword("DISTINCTCOUNT")
67
+ if_func = CaselessKeyword("IF")
68
+ switch_func = CaselessKeyword("SWITCH")
69
+
70
+ # Operators
71
+ gt = ">"
72
+ lt = "<"
73
+ eq = "="
74
+ ge = ">="
75
+ le = "<="
76
+ and_op = CaselessKeyword("AND")
77
+ or_op = CaselessKeyword("OR")
78
+
79
+ # Store for later use
80
+ self.identifier = identifier
81
+ self.number = number
82
+
83
+ def parse_measure(self, measure_name: str, dax_formula: str) -> ParsedRule:
84
+ """
85
+ Parse a DAX measure to extract business rules.
86
+
87
+ Args:
88
+ measure_name: Name of the measure
89
+ dax_formula: DAX formula string
90
+
91
+ Returns:
92
+ ParsedRule with extracted information
93
+ """
94
+ logger.debug(f"Parsing measure: {measure_name}")
95
+
96
+ business_rules = []
97
+ dependencies = self.identify_dependencies(dax_formula)
98
+ measure_type = self.classify_measure_type(dax_formula)
99
+
100
+ # Extract business logic
101
+ extracted_rules = self.extract_business_logic(measure_name, dax_formula)
102
+ business_rules.extend(extracted_rules)
103
+
104
+ return ParsedRule(
105
+ measure_name=measure_name,
106
+ dax_formula=dax_formula,
107
+ business_rules=business_rules,
108
+ dependencies=dependencies,
109
+ measure_type=measure_type
110
+ )
111
+
112
+ def extract_business_logic(self, measure_name: str, dax_formula: str) -> List[BusinessRule]:
113
+ """
114
+ Extract business logic from DAX formula.
115
+
116
+ Args:
117
+ measure_name: Name of the measure
118
+ dax_formula: DAX formula string
119
+
120
+ Returns:
121
+ List of BusinessRule objects
122
+ """
123
+ rules = []
124
+ dax_upper = dax_formula.upper()
125
+
126
+ # Pattern 1: CALCULATE with filter conditions
127
+ # Example: CALCULATE(COUNT(...), RiskScore > 80)
128
+ calculate_pattern = r'CALCULATE\s*\([^,]+,\s*([^)]+)\)'
129
+ calculate_matches = re.finditer(calculate_pattern, dax_formula, re.IGNORECASE)
130
+
131
+ for match in calculate_matches:
132
+ filter_condition = match.group(1).strip()
133
+ # Extract condition parts
134
+ condition = self._parse_condition(filter_condition)
135
+ if condition:
136
+ rule = BusinessRule(
137
+ name=f"{measure_name}_Filter",
138
+ condition=condition,
139
+ action="filter",
140
+ description=f"Filter condition from {measure_name}: {condition}",
141
+ entity=self._extract_entity_from_condition(condition)
142
+ )
143
+ rules.append(rule)
144
+
145
+ # Pattern 2: IF conditions
146
+ # Example: IF(RiskScore > 80, "High", "Low")
147
+ if_pattern = r'IF\s*\(\s*([^,]+),\s*([^,]+),\s*([^)]+)\)'
148
+ if_matches = re.finditer(if_pattern, dax_formula, re.IGNORECASE)
149
+
150
+ for match in if_matches:
151
+ condition = match.group(1).strip()
152
+ true_value = match.group(2).strip()
153
+ false_value = match.group(3).strip()
154
+
155
+ parsed_condition = self._parse_condition(condition)
156
+ if parsed_condition:
157
+ rule = BusinessRule(
158
+ name=f"{measure_name}_Condition",
159
+ condition=parsed_condition,
160
+ action=f"classify_as_{true_value.replace('\"', '').replace(' ', '_').lower()}",
161
+ classification=true_value.replace('"', '').strip(),
162
+ description=f"IF condition: {parsed_condition} then {true_value} else {false_value}",
163
+ entity=self._extract_entity_from_condition(condition)
164
+ )
165
+ rules.append(rule)
166
+
167
+ # Pattern 3: SWITCH statements
168
+ # Example: SWITCH(TRUE(), RiskScore > 80, "High", RiskScore > 50, "Medium", "Low")
169
+ switch_pattern = r'SWITCH\s*\([^,]+,\s*([^)]+)\)'
170
+ switch_matches = re.finditer(switch_pattern, dax_formula, re.IGNORECASE)
171
+
172
+ for match in switch_matches:
173
+ switch_body = match.group(1)
174
+ # Parse switch cases
175
+ cases = self._parse_switch_cases(switch_body)
176
+ for case_condition, case_value in cases:
177
+ parsed_condition = self._parse_condition(case_condition)
178
+ if parsed_condition:
179
+ rule = BusinessRule(
180
+ name=f"{measure_name}_Switch_{case_value.replace('\"', '').replace(' ', '_')}",
181
+ condition=parsed_condition,
182
+ action=f"classify_as_{case_value.replace('\"', '').replace(' ', '_').lower()}",
183
+ classification=case_value.replace('"', '').strip(),
184
+ description=f"SWITCH case: {parsed_condition} -> {case_value}",
185
+ entity=self._extract_entity_from_condition(case_condition)
186
+ )
187
+ rules.append(rule)
188
+
189
+ # Pattern 4: Simple threshold conditions
190
+ # Example: RiskScore > 80
191
+ threshold_pattern = r'(\w+)\s*(>|<|>=|<=|=)\s*(\d+\.?\d*)'
192
+ threshold_matches = re.finditer(threshold_pattern, dax_formula)
193
+
194
+ for match in threshold_matches:
195
+ field = match.group(1)
196
+ operator = match.group(2)
197
+ value = match.group(3)
198
+
199
+ # Only add if not already captured by other patterns
200
+ if not any(field in r.condition for r in rules):
201
+ rule = BusinessRule(
202
+ name=f"{measure_name}_Threshold",
203
+ condition=f"{field} {operator} {value}",
204
+ action="threshold_check",
205
+ description=f"Threshold condition: {field} {operator} {value}",
206
+ entity=self._extract_entity_from_field(field)
207
+ )
208
+ rules.append(rule)
209
+
210
+ return rules
211
+
212
+ def _parse_condition(self, condition: str) -> Optional[str]:
213
+ """Parse a condition string and normalize it."""
214
+ # Clean up the condition
215
+ condition = condition.strip()
216
+ # Remove extra whitespace
217
+ condition = re.sub(r'\s+', ' ', condition)
218
+ return condition if condition else None
219
+
220
+ def _parse_switch_cases(self, switch_body: str) -> List[tuple]:
221
+ """Parse SWITCH cases from switch body."""
222
+ cases = []
223
+ # Simple parsing - split by comma and pair up
224
+ parts = [p.strip() for p in switch_body.split(',')]
225
+ # SWITCH format: condition1, value1, condition2, value2, ..., default_value
226
+ i = 0
227
+ while i < len(parts) - 1:
228
+ condition = parts[i]
229
+ value = parts[i + 1]
230
+ cases.append((condition, value))
231
+ i += 2
232
+ return cases
233
+
234
+ def _extract_entity_from_condition(self, condition: str) -> str:
235
+ """Extract entity name from condition (e.g., 'Customer[RiskScore]' -> 'Customer')."""
236
+ # Match table[column] pattern
237
+ match = re.search(r'(\w+)\[', condition)
238
+ if match:
239
+ return match.group(1)
240
+ return ""
241
+
242
+ def _extract_entity_from_field(self, field: str) -> str:
243
+ """Extract entity from field name (heuristic)."""
244
+ # If field contains underscore, might be entity_field
245
+ if '_' in field:
246
+ parts = field.split('_')
247
+ return parts[0].capitalize()
248
+ return ""
249
+
250
+ def identify_dependencies(self, dax_formula: str) -> List[str]:
251
+ """
252
+ Identify table/column dependencies from DAX formula.
253
+
254
+ Args:
255
+ dax_formula: DAX formula string
256
+
257
+ Returns:
258
+ List of dependencies in format "Table.Column"
259
+ """
260
+ dependencies = set()
261
+
262
+ # Match table[column] patterns
263
+ pattern = r'(\w+)\[(\w+)\]'
264
+ matches = re.findall(pattern, dax_formula)
265
+ for table, column in matches:
266
+ dependencies.add(f"{table}.{column}")
267
+
268
+ # Also match table references (without column)
269
+ table_pattern = r'\b([A-Z][a-zA-Z0-9_]*)\['
270
+ table_matches = re.findall(table_pattern, dax_formula)
271
+ for table in table_matches:
272
+ if table.upper() not in ['IF', 'CALCULATE', 'SUM', 'COUNT', 'AVG', 'MAX', 'MIN']:
273
+ dependencies.add(f"{table}.*")
274
+
275
+ return sorted(list(dependencies))
276
+
277
+ def classify_measure_type(self, dax_formula: str) -> str:
278
+ """
279
+ Classify the type of DAX measure.
280
+
281
+ Returns:
282
+ MeasureType: AGGREGATION, CALCULATION, CONDITIONAL, FILTER, TIME_INTELLIGENCE
283
+ """
284
+ dax_upper = dax_formula.upper()
285
+
286
+ # Time intelligence functions
287
+ time_intel_keywords = ['DATEADD', 'TOTALYTD', 'TOTALQTD', 'TOTALMTD', 'SAMEPERIODLASTYEAR']
288
+ if any(keyword in dax_upper for keyword in time_intel_keywords):
289
+ return "TIME_INTELLIGENCE"
290
+
291
+ # Conditional logic
292
+ if 'IF' in dax_upper or 'SWITCH' in dax_upper:
293
+ return "CONDITIONAL"
294
+
295
+ # Filter logic
296
+ if 'CALCULATE' in dax_upper and ('FILTER' in dax_upper or '>' in dax_formula or '<' in dax_formula):
297
+ return "FILTER"
298
+
299
+ # Aggregation functions
300
+ agg_keywords = ['SUM', 'COUNT', 'AVG', 'AVERAGE', 'MAX', 'MIN', 'DISTINCTCOUNT']
301
+ if any(keyword in dax_upper for keyword in agg_keywords):
302
+ return "AGGREGATION"
303
+
304
+ # Default to calculation
305
+ return "CALCULATION"
@@ -0,0 +1,17 @@
1
+ """Export modules for different ontology formats."""
2
+
3
+ from powerbi_ontology.export.fabric_iq import FabricIQExporter
4
+ from powerbi_ontology.export.ontoguard import OntoGuardExporter
5
+ from powerbi_ontology.export.json_schema import JSONSchemaExporter
6
+ from powerbi_ontology.export.owl import OWLExporter
7
+ from powerbi_ontology.export.fabric_iq_to_owl import FabricIQToOWLConverter
8
+ from powerbi_ontology.export.contract_to_owl import ContractToOWLConverter
9
+
10
+ __all__ = [
11
+ "FabricIQExporter",
12
+ "OntoGuardExporter",
13
+ "JSONSchemaExporter",
14
+ "OWLExporter",
15
+ "FabricIQToOWLConverter",
16
+ "ContractToOWLConverter",
17
+ ]