PyPI - powerbi-ontology-extractor - Versions diffs - 0.1.0__py3-none-any.whl - Mend

powerbi-ontology-extractor 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

cli/__init__.py +1 -0
cli/pbi_ontology_cli.py +286 -0
powerbi_ontology/__init__.py +38 -0
powerbi_ontology/analyzer.py +420 -0
powerbi_ontology/chat.py +303 -0
powerbi_ontology/cli.py +530 -0
powerbi_ontology/contract_builder.py +269 -0
powerbi_ontology/dax_parser.py +305 -0
powerbi_ontology/export/__init__.py +17 -0
powerbi_ontology/export/contract_to_owl.py +408 -0
powerbi_ontology/export/fabric_iq.py +243 -0
powerbi_ontology/export/fabric_iq_to_owl.py +463 -0
powerbi_ontology/export/json_schema.py +110 -0
powerbi_ontology/export/ontoguard.py +177 -0
powerbi_ontology/export/owl.py +522 -0
powerbi_ontology/extractor.py +368 -0
powerbi_ontology/mcp_config.py +237 -0
powerbi_ontology/mcp_models.py +166 -0
powerbi_ontology/mcp_server.py +1106 -0
powerbi_ontology/ontology_diff.py +776 -0
powerbi_ontology/ontology_generator.py +406 -0
powerbi_ontology/review.py +556 -0
powerbi_ontology/schema_mapper.py +369 -0
powerbi_ontology/semantic_debt.py +584 -0
powerbi_ontology/utils/__init__.py +13 -0
powerbi_ontology/utils/pbix_reader.py +558 -0
powerbi_ontology/utils/visualizer.py +332 -0
powerbi_ontology_extractor-0.1.0.dist-info/METADATA +507 -0
powerbi_ontology_extractor-0.1.0.dist-info/RECORD +33 -0
powerbi_ontology_extractor-0.1.0.dist-info/WHEEL +5 -0
powerbi_ontology_extractor-0.1.0.dist-info/entry_points.txt +4 -0
powerbi_ontology_extractor-0.1.0.dist-info/licenses/LICENSE +21 -0
powerbi_ontology_extractor-0.1.0.dist-info/top_level.txt +2 -0

powerbi_ontology/extractor.py ADDED Viewed

@@ -0,0 +1,368 @@
+"""
+Power BI Semantic Model Extractor
+Extracts semantic intelligence from Power BI .pbix files.
+"""
+import logging
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional
+from powerbi_ontology.utils.pbix_reader import PBIXReader
+logger = logging.getLogger(__name__)
+@dataclass
+class Property:
+    """Represents a property/column in an entity."""
+    name: str
+    data_type: str  # String, Integer, Decimal, Date, Boolean, etc.
+    required: bool = False
+    unique: bool = False
+    description: str = ""
+    source_column: str = ""
+@dataclass
+class Entity:
+    """Represents an entity (table) in the semantic model."""
+    name: str
+    description: str = ""
+    properties: List[Property] = field(default_factory=list)
+    source_table: str = ""
+    primary_key: Optional[str] = None
+@dataclass
+class Relationship:
+    """Represents a relationship between entities."""
+    from_entity: str
+    from_property: str
+    to_entity: str
+    to_property: str
+    cardinality: str  # "one-to-many", "many-to-one", "one-to-one", "many-to-many"
+    cross_filter_direction: str = "single"  # "single", "both"
+    is_active: bool = True
+    name: str = ""
+@dataclass
+class Measure:
+    """Represents a DAX measure."""
+    name: str
+    dax_formula: str
+    description: str = ""
+    dependencies: List[str] = field(default_factory=list)
+    folder: str = ""
+    table: str = ""
+@dataclass
+class Hierarchy:
+    """Represents a hierarchy (date or custom)."""
+    name: str
+    table: str
+    levels: List[str] = field(default_factory=list)
+    hierarchy_type: str = "custom"  # "date" or "custom"
+@dataclass
+class SecurityRule:
+    """Represents a row-level security (RLS) rule."""
+    role: str
+    table: str
+    dax_filter: str
+    description: str = ""
+@dataclass
+class SemanticModel:
+    """Complete semantic model extracted from Power BI."""
+    name: str
+    entities: List[Entity] = field(default_factory=list)
+    relationships: List[Relationship] = field(default_factory=list)
+    measures: List[Measure] = field(default_factory=list)
+    hierarchies: List[Hierarchy] = field(default_factory=list)
+    security_rules: List[SecurityRule] = field(default_factory=list)
+    metadata: Dict = field(default_factory=dict)
+    source_file: str = ""
+    def to_ontology(self):
+        """Convert to ontology format (delegates to OntologyGenerator)."""
+        from powerbi_ontology.ontology_generator import OntologyGenerator
+        generator = OntologyGenerator(self)
+        return generator.generate()
+class PowerBIExtractor:
+    """
+    Core class for extracting semantic intelligence from Power BI .pbix files.
+    """
+    def __init__(self, pbix_path: str):
+        """
+        Initialize extractor.
+        Args:
+            pbix_path: Path to the .pbix file
+        """
+        self.pbix_path = pbix_path
+        self.reader: Optional[PBIXReader] = None
+    def extract(self) -> SemanticModel:
+        """
+        Extract complete semantic model from .pbix file.
+        Returns:
+            SemanticModel with all extracted information
+        """
+        logger.info(f"Extracting semantic model from {self.pbix_path}")
+        self.reader = PBIXReader(self.pbix_path)
+        self.reader.extract_to_temp()
+        model_data = self.reader.read_model()
+        # Extract model name
+        model_name = model_data.get("name", "Unknown")
+        if isinstance(model_data, dict) and "model" in model_data:
+            model_name = model_data["model"].get("name", model_name)
+        semantic_model = SemanticModel(
+            name=model_name,
+            source_file=self.pbix_path,
+            metadata={"extraction_date": str(__import__("datetime").datetime.now().isoformat())}
+        )
+        # Extract all components
+        semantic_model.entities = self.extract_entities()
+        semantic_model.relationships = self.extract_relationships()
+        semantic_model.measures = self.extract_measures()
+        semantic_model.hierarchies = self.extract_hierarchies()
+        semantic_model.security_rules = self.extract_security_rules()
+        logger.info(
+            f"Extracted: {len(semantic_model.entities)} entities, "
+            f"{len(semantic_model.relationships)} relationships, "
+            f"{len(semantic_model.measures)} measures"
+        )
+        return semantic_model
+    def extract_entities(self) -> List[Entity]:
+        """
+        Extract entities (tables) from Power BI model.
+        Returns:
+            List of Entity objects
+        """
+        tables = self.reader.get_tables()
+        entities = []
+        for table in tables:
+            table_name = table.get("name", "Unknown")
+            description = table.get("description", "")
+            # Extract columns as properties
+            properties = []
+            columns = table.get("columns", [])
+            for col in columns:
+                prop = Property(
+                    name=col.get("name", ""),
+                    data_type=self._map_data_type(col.get("dataType", "string")),
+                    required=col.get("isNullable", True) is False,
+                    unique=col.get("isUnique", False) or col.get("isKey", False),
+                    description=col.get("description", ""),
+                    source_column=col.get("name", "")
+                )
+                properties.append(prop)
+            # Identify primary key
+            primary_key = None
+            for col in columns:
+                if col.get("isKey", False) or col.get("isUnique", False):
+                    primary_key = col.get("name")
+                    break
+            entity = Entity(
+                name=table_name,
+                description=description,
+                properties=properties,
+                source_table=table_name,
+                primary_key=primary_key
+            )
+            entities.append(entity)
+        return entities
+    def extract_relationships(self) -> List[Relationship]:
+        """
+        Extract relationships between entities.
+        Returns:
+            List of Relationship objects
+        """
+        relationships_data = self.reader.get_relationships()
+        relationships = []
+        for rel in relationships_data:
+            from_table = rel.get("fromTable", "")
+            from_column = rel.get("fromColumn", "")
+            to_table = rel.get("toTable", "")
+            to_column = rel.get("toColumn", "")
+            # Determine cardinality
+            cardinality = "many-to-one"  # Default
+            if rel.get("fromCardinality") == "one" and rel.get("toCardinality") == "many":
+                cardinality = "one-to-many"
+            elif rel.get("fromCardinality") == "one" and rel.get("toCardinality") == "one":
+                cardinality = "one-to-one"
+            elif rel.get("fromCardinality") == "many" and rel.get("toCardinality") == "many":
+                cardinality = "many-to-many"
+            cross_filter = rel.get("crossFilteringBehavior", "singleDirection")
+            if cross_filter == "bothDirections":
+                cross_filter_direction = "both"
+            else:
+                cross_filter_direction = "single"
+            relationship = Relationship(
+                from_entity=from_table,
+                from_property=from_column,
+                to_entity=to_table,
+                to_property=to_column,
+                cardinality=cardinality,
+                cross_filter_direction=cross_filter_direction,
+                is_active=rel.get("isActive", True),
+                name=rel.get("name", f"{from_table}_{to_table}")
+            )
+            relationships.append(relationship)
+        return relationships
+    def extract_measures(self) -> List[Measure]:
+        """
+        Extract DAX measures from all tables.
+        Returns:
+            List of Measure objects
+        """
+        measures_data = self.reader.get_measures()
+        measures = []
+        for measure_data in measures_data:
+            measure = Measure(
+                name=measure_data.get("name", ""),
+                dax_formula=measure_data.get("expression", ""),
+                description=measure_data.get("description", ""),
+                folder=measure_data.get("displayFolder", ""),
+                table=measure_data.get("table", "")
+            )
+            # Extract dependencies (basic - can be enhanced)
+            measure.dependencies = self._extract_measure_dependencies(measure.dax_formula)
+            measures.append(measure)
+        return measures
+    def extract_hierarchies(self) -> List[Hierarchy]:
+        """
+        Extract hierarchies (date and custom).
+        Returns:
+            List of Hierarchy objects
+        """
+        tables = self.reader.get_tables()
+        hierarchies = []
+        for table in tables:
+            table_name = table.get("name", "")
+            # Extract hierarchies from table
+            table_hierarchies = table.get("hierarchies", [])
+            for hier in table_hierarchies:
+                hierarchy = Hierarchy(
+                    name=hier.get("name", ""),
+                    table=table_name,
+                    levels=[level.get("name", "") for level in hier.get("levels", [])],
+                    hierarchy_type="date" if "date" in table_name.lower() else "custom"
+                )
+                hierarchies.append(hierarchy)
+        return hierarchies
+    def extract_security_rules(self) -> List[SecurityRule]:
+        """
+        Extract row-level security (RLS) rules.
+        Returns:
+            List of SecurityRule objects
+        """
+        model_data = self.reader.read_model()
+        security_rules = []
+        # Handle different schema versions
+        roles = []
+        if isinstance(model_data, dict):
+            if "model" in model_data:
+                model_data = model_data["model"]
+            roles = model_data.get("roles", [])
+        for role in roles:
+            role_name = role.get("name", "")
+            table_permissions = role.get("tablePermissions", [])
+            for perm in table_permissions:
+                table_name = perm.get("name", "")
+                filter_expression = perm.get("filterExpression", "")
+                if filter_expression:
+                    rule = SecurityRule(
+                        role=role_name,
+                        table=table_name,
+                        dax_filter=filter_expression,
+                        description=f"RLS rule for {table_name} in role {role_name}"
+                    )
+                    security_rules.append(rule)
+        return security_rules
+    def _map_data_type(self, pbix_type: str) -> str:
+        """Map Power BI data type to ontology data type."""
+        type_mapping = {
+            "string": "String",
+            "int64": "Integer",
+            "double": "Decimal",
+            "datetime": "Date",
+            "boolean": "Boolean",
+            "decimal": "Decimal",
+        }
+        return type_mapping.get(pbix_type.lower(), "String")
+    def _extract_measure_dependencies(self, dax_formula: str) -> List[str]:
+        """
+        Extract table/column dependencies from DAX formula (basic implementation).
+        This is a simplified version - full parsing is done in dax_parser.py
+        """
+        dependencies = []
+        # Simple regex-based extraction (enhanced in dax_parser)
+        import re
+        # Match table[column] patterns
+        pattern = r'(\w+)\[(\w+)\]'
+        matches = re.findall(pattern, dax_formula)
+        for table, column in matches:
+            dependencies.append(f"{table}.{column}")
+        return list(set(dependencies))
+    def __enter__(self):
+        """Context manager entry."""
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit - cleanup."""
+        if self.reader:
+            self.reader.cleanup()

powerbi_ontology/mcp_config.py ADDED Viewed

@@ -0,0 +1,237 @@
+"""
+Configuration loader for MCP Server.
+Loads and validates configuration from YAML file.
+"""
+import logging
+import os
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+logger = logging.getLogger(__name__)
+# Default configuration
+DEFAULT_CONFIG = {
+    "server": {
+        "name": "PowerBI Ontology Extractor MCP",
+        "version": "0.1.0",
+        "description": "Extract semantic intelligence from Power BI files via MCP",
+    },
+    "log_level": "INFO",
+    "extraction": {
+        "include_measures": True,
+        "include_security": True,
+        "cleanup_temp": True,
+        "max_file_size_mb": 100,
+    },
+    "export": {
+        "default_format": "xml",
+        "include_action_rules": True,
+        "include_constraints": True,
+        "default_roles": ["Admin", "Analyst", "Viewer"],
+    },
+    "analysis": {
+        "similarity_threshold": 0.8,
+    },
+    "chat": {
+        "model": "gpt-4o-mini",
+        "temperature": 0.3,
+        "max_tokens": 1000,
+    },
+    "cache": {
+        "enabled": True,
+        "ttl_seconds": 3600,
+    },
+}
+class MCPConfig:
+    """Configuration manager for MCP Server."""
+    def __init__(self, config_path: Optional[str] = None):
+        """
+        Initialize configuration.
+        Args:
+            config_path: Path to config.yaml file. If None, checks:
+                        1. POWERBI_MCP_CONFIG environment variable
+                        2. config/mcp_config.yaml relative to package
+                        3. Uses default configuration
+        """
+        self._config: Dict[str, Any] = DEFAULT_CONFIG.copy()
+        self._config_path: Optional[str] = None
+        # Load configuration
+        self._load_config(config_path)
+    def _load_config(self, config_path: Optional[str] = None):
+        """Load configuration from YAML file."""
+        try:
+            import yaml
+        except ImportError:
+            logger.warning("PyYAML not installed, using default configuration")
+            return
+        # Determine config path
+        if config_path is None:
+            config_path = os.getenv("POWERBI_MCP_CONFIG")
+        if config_path is None or not Path(config_path).exists():
+            # Try default locations
+            possible_paths = [
+                Path("config/mcp_config.yaml"),
+                Path(__file__).parent.parent / "config" / "mcp_config.yaml",
+                Path.home() / ".powerbi-ontology" / "mcp_config.yaml",
+            ]
+            for path in possible_paths:
+                if path.exists():
+                    config_path = str(path)
+                    break
+        if config_path is None or not Path(config_path).exists():
+            logger.info("No config file found, using default configuration")
+            return
+        self._config_path = config_path
+        logger.info(f"Loading configuration from: {config_path}")
+        try:
+            with open(config_path, "r", encoding="utf-8") as f:
+                loaded_config = yaml.safe_load(f) or {}
+            # Deep merge with defaults
+            self._config = self._deep_merge(DEFAULT_CONFIG, loaded_config)
+            logger.info("Configuration loaded successfully")
+        except Exception as e:
+            logger.warning(f"Error loading config file: {e}, using defaults")
+    def _deep_merge(self, base: Dict, override: Dict) -> Dict:
+        """Deep merge two dictionaries."""
+        result = base.copy()
+        for key, value in override.items():
+            if key in result and isinstance(result[key], dict) and isinstance(value, dict):
+                result[key] = self._deep_merge(result[key], value)
+            else:
+                result[key] = value
+        return result
+    # Server settings
+    @property
+    def server_name(self) -> str:
+        return self._config["server"]["name"]
+    @property
+    def server_version(self) -> str:
+        return self._config["server"]["version"]
+    @property
+    def server_description(self) -> str:
+        return self._config["server"]["description"]
+    # Logging
+    @property
+    def log_level(self) -> str:
+        return self._config.get("log_level", "INFO").upper()
+    # Extraction settings
+    @property
+    def include_measures(self) -> bool:
+        return self._config["extraction"]["include_measures"]
+    @property
+    def include_security(self) -> bool:
+        return self._config["extraction"]["include_security"]
+    @property
+    def cleanup_temp(self) -> bool:
+        return self._config["extraction"]["cleanup_temp"]
+    @property
+    def max_file_size_mb(self) -> int:
+        return self._config["extraction"]["max_file_size_mb"]
+    # Export settings
+    @property
+    def default_format(self) -> str:
+        return self._config["export"]["default_format"]
+    @property
+    def include_action_rules(self) -> bool:
+        return self._config["export"]["include_action_rules"]
+    @property
+    def include_constraints(self) -> bool:
+        return self._config["export"]["include_constraints"]
+    @property
+    def default_roles(self) -> List[str]:
+        return self._config["export"]["default_roles"]
+    # Analysis settings
+    @property
+    def similarity_threshold(self) -> float:
+        return self._config["analysis"]["similarity_threshold"]
+    # Chat settings
+    @property
+    def chat_model(self) -> str:
+        return self._config["chat"]["model"]
+    @property
+    def chat_temperature(self) -> float:
+        return self._config["chat"]["temperature"]
+    @property
+    def chat_max_tokens(self) -> int:
+        return self._config["chat"]["max_tokens"]
+    # Cache settings
+    @property
+    def cache_enabled(self) -> bool:
+        return self._config["cache"]["enabled"]
+    @property
+    def cache_ttl(self) -> int:
+        return self._config["cache"]["ttl_seconds"]
+    def get(self, key: str, default: Any = None) -> Any:
+        """Get configuration value by dot-notation key."""
+        keys = key.split(".")
+        value = self._config
+        for k in keys:
+            if isinstance(value, dict) and k in value:
+                value = value[k]
+            else:
+                return default
+        return value
+    def to_dict(self) -> Dict[str, Any]:
+        """Return configuration as dictionary."""
+        return self._config.copy()
+# Global configuration instance
+_config: Optional[MCPConfig] = None
+def get_config() -> MCPConfig:
+    """Get or create global configuration instance."""
+    global _config
+    if _config is None:
+        _config = MCPConfig()
+    return _config
+def reload_config(config_path: Optional[str] = None) -> MCPConfig:
+    """Reload configuration from file."""
+    global _config
+    _config = MCPConfig(config_path)
+    return _config