PyPI - powerbi-ontology-extractor - Versions diffs - 0.1.0__py3-none-any.whl - Mend

powerbi-ontology-extractor 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

cli/__init__.py +1 -0
cli/pbi_ontology_cli.py +286 -0
powerbi_ontology/__init__.py +38 -0
powerbi_ontology/analyzer.py +420 -0
powerbi_ontology/chat.py +303 -0
powerbi_ontology/cli.py +530 -0
powerbi_ontology/contract_builder.py +269 -0
powerbi_ontology/dax_parser.py +305 -0
powerbi_ontology/export/__init__.py +17 -0
powerbi_ontology/export/contract_to_owl.py +408 -0
powerbi_ontology/export/fabric_iq.py +243 -0
powerbi_ontology/export/fabric_iq_to_owl.py +463 -0
powerbi_ontology/export/json_schema.py +110 -0
powerbi_ontology/export/ontoguard.py +177 -0
powerbi_ontology/export/owl.py +522 -0
powerbi_ontology/extractor.py +368 -0
powerbi_ontology/mcp_config.py +237 -0
powerbi_ontology/mcp_models.py +166 -0
powerbi_ontology/mcp_server.py +1106 -0
powerbi_ontology/ontology_diff.py +776 -0
powerbi_ontology/ontology_generator.py +406 -0
powerbi_ontology/review.py +556 -0
powerbi_ontology/schema_mapper.py +369 -0
powerbi_ontology/semantic_debt.py +584 -0
powerbi_ontology/utils/__init__.py +13 -0
powerbi_ontology/utils/pbix_reader.py +558 -0
powerbi_ontology/utils/visualizer.py +332 -0
powerbi_ontology_extractor-0.1.0.dist-info/METADATA +507 -0
powerbi_ontology_extractor-0.1.0.dist-info/RECORD +33 -0
powerbi_ontology_extractor-0.1.0.dist-info/WHEEL +5 -0
powerbi_ontology_extractor-0.1.0.dist-info/entry_points.txt +4 -0
powerbi_ontology_extractor-0.1.0.dist-info/licenses/LICENSE +21 -0
powerbi_ontology_extractor-0.1.0.dist-info/top_level.txt +2 -0

powerbi_ontology/contract_builder.py ADDED Viewed

@@ -0,0 +1,269 @@
+"""
+Contract Builder
+Builds semantic contracts for AI agents from ontologies.
+"""
+import logging
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional
+from powerbi_ontology.ontology_generator import Ontology, BusinessRule, Constraint
+from powerbi_ontology.dax_parser import DAXParser
+from powerbi_ontology.extractor import SemanticModel, Measure
+logger = logging.getLogger(__name__)
+@dataclass
+class ContractPermissions:
+    """Permissions for an AI agent contract."""
+    read_entities: List[str] = field(default_factory=list)
+    write_properties: Dict[str, List[str]] = field(default_factory=dict)  # entity -> properties
+    executable_actions: List[str] = field(default_factory=list)
+    required_role: str = ""
+    context_filters: Dict[str, str] = field(default_factory=dict)  # entity -> filter condition
+@dataclass
+class AuditConfig:
+    """Audit configuration for contract."""
+    log_reads: bool = True
+    log_writes: bool = True
+    log_actions: bool = True
+    alert_on_violation: bool = True
+@dataclass
+class SemanticContract:
+    """Semantic contract for an AI agent."""
+    agent_name: str
+    ontology_version: str
+    permissions: ContractPermissions
+    business_rules: List[BusinessRule] = field(default_factory=list)
+    validation_constraints: List[Constraint] = field(default_factory=list)
+    audit_settings: AuditConfig = field(default_factory=AuditConfig)
+    metadata: Dict = field(default_factory=dict)
+class ContractBuilder:
+    """
+    Builds semantic contracts for AI agents from ontologies.
+    AI agents need semantic contracts that define:
+    - What entities they can read
+    - What properties they can write
+    - What actions they can execute
+    - What business rules govern their behavior
+    """
+    def __init__(self, ontology: Ontology):
+        """
+        Initialize contract builder.
+        Args:
+            ontology: The ontology to build contracts from
+        """
+        self.ontology = ontology
+        self.dax_parser = DAXParser()
+    def build_contract(
+        self,
+        agent_name: str,
+        permissions: Dict[str, any]
+    ) -> SemanticContract:
+        """
+        Build a semantic contract for an AI agent.
+        Args:
+            agent_name: Name of the AI agent
+            permissions: Dictionary with read, write, execute, role keys
+        Returns:
+            SemanticContract
+        """
+        logger.info(f"Building contract for agent: {agent_name}")
+        contract_permissions = ContractPermissions(
+            read_entities=permissions.get("read", []),
+            write_properties=permissions.get("write", {}),
+            executable_actions=permissions.get("execute", []),
+            required_role=permissions.get("role", ""),
+            context_filters=permissions.get("filters", {})
+        )
+        contract = SemanticContract(
+            agent_name=agent_name,
+            ontology_version=self.ontology.version,
+            permissions=contract_permissions,
+            metadata={
+                "created_date": str(__import__("datetime").datetime.now().isoformat()),
+                "ontology_source": self.ontology.source
+            }
+        )
+        # Add relevant business rules
+        self._add_relevant_business_rules(contract)
+        # Add validation constraints
+        self.add_validation_constraints(contract)
+        return contract
+    def generate_permissions_from_dashboard(
+        self,
+        semantic_model: SemanticModel
+    ) -> Dict[str, any]:
+        """
+        Generate suggested permissions from a Power BI dashboard.
+        Analyzes what entities the dashboard uses and suggests appropriate permissions.
+        Args:
+            semantic_model: Semantic model from Power BI dashboard
+        Returns:
+            Dictionary with suggested permissions
+        """
+        # Get all entities used in the dashboard
+        entities_used = set()
+        for entity in semantic_model.entities:
+            entities_used.add(entity.name)
+        # Get entities from relationships
+        for rel in semantic_model.relationships:
+            entities_used.add(rel.from_entity)
+            entities_used.add(rel.to_entity)
+        # Get entities from measures
+        for measure in semantic_model.measures:
+            parsed = self.dax_parser.parse_measure(measure.name, measure.dax_formula)
+            for dep in parsed.dependencies:
+                if '.' in dep:
+                    entity = dep.split('.')[0]
+                    entities_used.add(entity)
+        return {
+            "read": list(entities_used),
+            "write": {},  # Dashboard typically doesn't write
+            "execute": [],  # Dashboard typically doesn't execute actions
+            "role": "Viewer"  # Default role
+        }
+    def add_business_rules(
+        self,
+        contract: SemanticContract,
+        rules: List[BusinessRule]
+    ):
+        """
+        Add business rules to contract.
+        Args:
+            contract: SemanticContract to add rules to
+            rules: List of BusinessRule objects
+        """
+        contract.business_rules.extend(rules)
+        logger.info(f"Added {len(rules)} business rules to contract")
+    def add_validation_constraints(self, contract: SemanticContract):
+        """
+        Add validation constraints from ontology to contract.
+        Args:
+            contract: SemanticContract to add constraints to
+        """
+        # Get entities that agent can read/write
+        relevant_entities = set(contract.permissions.read_entities)
+        relevant_entities.update(contract.permissions.write_properties.keys())
+        constraints = []
+        for entity_name in relevant_entities:
+            entity = next(
+                (e for e in self.ontology.entities if e.name == entity_name),
+                None
+            )
+            if entity:
+                # Add constraints from entity properties
+                for prop in entity.properties:
+                    constraints.extend(prop.constraints)
+                # Add constraints from entity
+                constraints.extend(entity.constraints)
+        contract.validation_constraints = constraints
+        logger.info(f"Added {len(constraints)} validation constraints to contract")
+    def export_contract(self, contract: SemanticContract, format: str = "json") -> str:
+        """
+        Export contract to different formats.
+        Args:
+            contract: SemanticContract to export
+            format: Export format ("json", "ontoguard", "fabric_iq")
+        Returns:
+            Exported contract as string
+        """
+        if format == "json":
+            import json
+            return json.dumps(self._contract_to_dict(contract), indent=2)
+        elif format == "ontoguard":
+            from powerbi_ontology.export.ontoguard import OntoGuardExporter
+            # Convert contract to ontology-like structure for export
+            return OntoGuardExporter(self.ontology).export_contract(contract)
+        elif format == "fabric_iq":
+            from powerbi_ontology.export.fabric_iq import FabricIQExporter
+            return FabricIQExporter(self.ontology).export_contract(contract)
+        else:
+            raise ValueError(f"Unknown export format: {format}")
+    def _add_relevant_business_rules(self, contract: SemanticContract):
+        """Add business rules relevant to the agent's permissions."""
+        relevant_entities = set(contract.permissions.read_entities)
+        relevant_entities.update(contract.permissions.write_properties.keys())
+        relevant_rules = [
+            rule for rule in self.ontology.business_rules
+            if rule.entity in relevant_entities
+        ]
+        contract.business_rules = relevant_rules
+        logger.info(f"Added {len(relevant_rules)} relevant business rules")
+    def _contract_to_dict(self, contract: SemanticContract) -> Dict:
+        """Convert contract to dictionary for JSON export."""
+        return {
+            "agent_name": contract.agent_name,
+            "ontology_version": contract.ontology_version,
+            "permissions": {
+                "read_entities": contract.permissions.read_entities,
+                "write_properties": contract.permissions.write_properties,
+                "executable_actions": contract.permissions.executable_actions,
+                "required_role": contract.permissions.required_role,
+                "context_filters": contract.permissions.context_filters
+            },
+            "business_rules": [
+                {
+                    "name": rule.name,
+                    "entity": rule.entity,
+                    "condition": rule.condition,
+                    "action": rule.action,
+                    "description": rule.description
+                }
+                for rule in contract.business_rules
+            ],
+            "validation_constraints": [
+                {
+                    "type": constraint.type,
+                    "value": str(constraint.value),
+                    "message": constraint.message
+                }
+                for constraint in contract.validation_constraints
+            ],
+            "audit_settings": {
+                "log_reads": contract.audit_settings.log_reads,
+                "log_writes": contract.audit_settings.log_writes,
+                "log_actions": contract.audit_settings.log_actions,
+                "alert_on_violation": contract.audit_settings.alert_on_violation
+            },
+            "metadata": contract.metadata
+        }

powerbi_ontology/dax_parser.py ADDED Viewed

@@ -0,0 +1,305 @@
+"""
+DAX Parser
+Parses DAX formulas to extract business rules and semantic meaning.
+"""
+import logging
+import re
+from dataclasses import dataclass
+from typing import List, Optional, Set
+from pyparsing import (
+    CaselessKeyword, Word, alphanums, nums, oneOf, opAssoc, infixNotation,
+    ParseException, Suppress, Optional as Opt, Group
+)
+logger = logging.getLogger(__name__)
+@dataclass
+class BusinessRule:
+    """Represents a business rule extracted from DAX."""
+    name: str
+    condition: str
+    action: str = ""
+    priority: int = 1
+    description: str = ""
+    entity: str = ""
+    classification: str = ""
+@dataclass
+class ParsedRule:
+    """Parsed DAX measure with extracted information."""
+    measure_name: str
+    dax_formula: str
+    business_rules: List[BusinessRule]
+    dependencies: List[str]
+    measure_type: str  # AGGREGATION, CALCULATION, CONDITIONAL, FILTER, TIME_INTELLIGENCE
+class DAXParser:
+    """
+    Parses DAX formulas to extract business logic and semantic meaning.
+    Background: DAX measures contain business logic that should be extracted
+    as formal business rules. For example:
+    - HighRiskCustomers = CALCULATE(COUNT(...), RiskScore > 80)
+    - This becomes: BusinessRule(condition="RiskScore > 80", classification="HighRisk")
+    """
+    def __init__(self):
+        """Initialize DAX parser."""
+        self._setup_parser()
+    def _setup_parser(self):
+        """Setup pyparsing grammar for DAX."""
+        # Basic tokens
+        identifier = Word(alphanums + "_")
+        number = Word(nums + ".-")
+        # DAX functions
+        calculate = CaselessKeyword("CALCULATE")
+        sum_func = CaselessKeyword("SUM")
+        count_func = CaselessKeyword("COUNT")
+        distinctcount = CaselessKeyword("DISTINCTCOUNT")
+        if_func = CaselessKeyword("IF")
+        switch_func = CaselessKeyword("SWITCH")
+        # Operators
+        gt = ">"
+        lt = "<"
+        eq = "="
+        ge = ">="
+        le = "<="
+        and_op = CaselessKeyword("AND")
+        or_op = CaselessKeyword("OR")
+        # Store for later use
+        self.identifier = identifier
+        self.number = number
+    def parse_measure(self, measure_name: str, dax_formula: str) -> ParsedRule:
+        """
+        Parse a DAX measure to extract business rules.
+        Args:
+            measure_name: Name of the measure
+            dax_formula: DAX formula string
+        Returns:
+            ParsedRule with extracted information
+        """
+        logger.debug(f"Parsing measure: {measure_name}")
+        business_rules = []
+        dependencies = self.identify_dependencies(dax_formula)
+        measure_type = self.classify_measure_type(dax_formula)
+        # Extract business logic
+        extracted_rules = self.extract_business_logic(measure_name, dax_formula)
+        business_rules.extend(extracted_rules)
+        return ParsedRule(
+            measure_name=measure_name,
+            dax_formula=dax_formula,
+            business_rules=business_rules,
+            dependencies=dependencies,
+            measure_type=measure_type
+        )
+    def extract_business_logic(self, measure_name: str, dax_formula: str) -> List[BusinessRule]:
+        """
+        Extract business logic from DAX formula.
+        Args:
+            measure_name: Name of the measure
+            dax_formula: DAX formula string
+        Returns:
+            List of BusinessRule objects
+        """
+        rules = []
+        dax_upper = dax_formula.upper()
+        # Pattern 1: CALCULATE with filter conditions
+        # Example: CALCULATE(COUNT(...), RiskScore > 80)
+        calculate_pattern = r'CALCULATE\s*\([^,]+,\s*([^)]+)\)'
+        calculate_matches = re.finditer(calculate_pattern, dax_formula, re.IGNORECASE)
+        for match in calculate_matches:
+            filter_condition = match.group(1).strip()
+            # Extract condition parts
+            condition = self._parse_condition(filter_condition)
+            if condition:
+                rule = BusinessRule(
+                    name=f"{measure_name}_Filter",
+                    condition=condition,
+                    action="filter",
+                    description=f"Filter condition from {measure_name}: {condition}",
+                    entity=self._extract_entity_from_condition(condition)
+                )
+                rules.append(rule)
+        # Pattern 2: IF conditions
+        # Example: IF(RiskScore > 80, "High", "Low")
+        if_pattern = r'IF\s*\(\s*([^,]+),\s*([^,]+),\s*([^)]+)\)'
+        if_matches = re.finditer(if_pattern, dax_formula, re.IGNORECASE)
+        for match in if_matches:
+            condition = match.group(1).strip()
+            true_value = match.group(2).strip()
+            false_value = match.group(3).strip()
+            parsed_condition = self._parse_condition(condition)
+            if parsed_condition:
+                rule = BusinessRule(
+                    name=f"{measure_name}_Condition",
+                    condition=parsed_condition,
+                    action=f"classify_as_{true_value.replace('\"', '').replace(' ', '_').lower()}",
+                    classification=true_value.replace('"', '').strip(),
+                    description=f"IF condition: {parsed_condition} then {true_value} else {false_value}",
+                    entity=self._extract_entity_from_condition(condition)
+                )
+                rules.append(rule)
+        # Pattern 3: SWITCH statements
+        # Example: SWITCH(TRUE(), RiskScore > 80, "High", RiskScore > 50, "Medium", "Low")
+        switch_pattern = r'SWITCH\s*\([^,]+,\s*([^)]+)\)'
+        switch_matches = re.finditer(switch_pattern, dax_formula, re.IGNORECASE)
+        for match in switch_matches:
+            switch_body = match.group(1)
+            # Parse switch cases
+            cases = self._parse_switch_cases(switch_body)
+            for case_condition, case_value in cases:
+                parsed_condition = self._parse_condition(case_condition)
+                if parsed_condition:
+                    rule = BusinessRule(
+                        name=f"{measure_name}_Switch_{case_value.replace('\"', '').replace(' ', '_')}",
+                        condition=parsed_condition,
+                        action=f"classify_as_{case_value.replace('\"', '').replace(' ', '_').lower()}",
+                        classification=case_value.replace('"', '').strip(),
+                        description=f"SWITCH case: {parsed_condition} -> {case_value}",
+                        entity=self._extract_entity_from_condition(case_condition)
+                    )
+                    rules.append(rule)
+        # Pattern 4: Simple threshold conditions
+        # Example: RiskScore > 80
+        threshold_pattern = r'(\w+)\s*(>|<|>=|<=|=)\s*(\d+\.?\d*)'
+        threshold_matches = re.finditer(threshold_pattern, dax_formula)
+        for match in threshold_matches:
+            field = match.group(1)
+            operator = match.group(2)
+            value = match.group(3)
+            # Only add if not already captured by other patterns
+            if not any(field in r.condition for r in rules):
+                rule = BusinessRule(
+                    name=f"{measure_name}_Threshold",
+                    condition=f"{field} {operator} {value}",
+                    action="threshold_check",
+                    description=f"Threshold condition: {field} {operator} {value}",
+                    entity=self._extract_entity_from_field(field)
+                )
+                rules.append(rule)
+        return rules
+    def _parse_condition(self, condition: str) -> Optional[str]:
+        """Parse a condition string and normalize it."""
+        # Clean up the condition
+        condition = condition.strip()
+        # Remove extra whitespace
+        condition = re.sub(r'\s+', ' ', condition)
+        return condition if condition else None
+    def _parse_switch_cases(self, switch_body: str) -> List[tuple]:
+        """Parse SWITCH cases from switch body."""
+        cases = []
+        # Simple parsing - split by comma and pair up
+        parts = [p.strip() for p in switch_body.split(',')]
+        # SWITCH format: condition1, value1, condition2, value2, ..., default_value
+        i = 0
+        while i < len(parts) - 1:
+            condition = parts[i]
+            value = parts[i + 1]
+            cases.append((condition, value))
+            i += 2
+        return cases
+    def _extract_entity_from_condition(self, condition: str) -> str:
+        """Extract entity name from condition (e.g., 'Customer[RiskScore]' -> 'Customer')."""
+        # Match table[column] pattern
+        match = re.search(r'(\w+)\[', condition)
+        if match:
+            return match.group(1)
+        return ""
+    def _extract_entity_from_field(self, field: str) -> str:
+        """Extract entity from field name (heuristic)."""
+        # If field contains underscore, might be entity_field
+        if '_' in field:
+            parts = field.split('_')
+            return parts[0].capitalize()
+        return ""
+    def identify_dependencies(self, dax_formula: str) -> List[str]:
+        """
+        Identify table/column dependencies from DAX formula.
+        Args:
+            dax_formula: DAX formula string
+        Returns:
+            List of dependencies in format "Table.Column"
+        """
+        dependencies = set()
+        # Match table[column] patterns
+        pattern = r'(\w+)\[(\w+)\]'
+        matches = re.findall(pattern, dax_formula)
+        for table, column in matches:
+            dependencies.add(f"{table}.{column}")
+        # Also match table references (without column)
+        table_pattern = r'\b([A-Z][a-zA-Z0-9_]*)\['
+        table_matches = re.findall(table_pattern, dax_formula)
+        for table in table_matches:
+            if table.upper() not in ['IF', 'CALCULATE', 'SUM', 'COUNT', 'AVG', 'MAX', 'MIN']:
+                dependencies.add(f"{table}.*")
+        return sorted(list(dependencies))
+    def classify_measure_type(self, dax_formula: str) -> str:
+        """
+        Classify the type of DAX measure.
+        Returns:
+            MeasureType: AGGREGATION, CALCULATION, CONDITIONAL, FILTER, TIME_INTELLIGENCE
+        """
+        dax_upper = dax_formula.upper()
+        # Time intelligence functions
+        time_intel_keywords = ['DATEADD', 'TOTALYTD', 'TOTALQTD', 'TOTALMTD', 'SAMEPERIODLASTYEAR']
+        if any(keyword in dax_upper for keyword in time_intel_keywords):
+            return "TIME_INTELLIGENCE"
+        # Conditional logic
+        if 'IF' in dax_upper or 'SWITCH' in dax_upper:
+            return "CONDITIONAL"
+        # Filter logic
+        if 'CALCULATE' in dax_upper and ('FILTER' in dax_upper or '>' in dax_formula or '<' in dax_formula):
+            return "FILTER"
+        # Aggregation functions
+        agg_keywords = ['SUM', 'COUNT', 'AVG', 'AVERAGE', 'MAX', 'MIN', 'DISTINCTCOUNT']
+        if any(keyword in dax_upper for keyword in agg_keywords):
+            return "AGGREGATION"
+        # Default to calculation
+        return "CALCULATION"

powerbi_ontology/export/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""Export modules for different ontology formats."""
+from powerbi_ontology.export.fabric_iq import FabricIQExporter
+from powerbi_ontology.export.ontoguard import OntoGuardExporter
+from powerbi_ontology.export.json_schema import JSONSchemaExporter
+from powerbi_ontology.export.owl import OWLExporter
+from powerbi_ontology.export.fabric_iq_to_owl import FabricIQToOWLConverter
+from powerbi_ontology.export.contract_to_owl import ContractToOWLConverter
+__all__ = [
+    "FabricIQExporter",
+    "OntoGuardExporter",
+    "JSONSchemaExporter",
+    "OWLExporter",
+    "FabricIQToOWLConverter",
+    "ContractToOWLConverter",
+]