PyPI - kodit - Versions diffs - 0.5.4__py3-none-any.whl → 0.5.6__py3-none-any.whl - Mend

kodit 0.5.4py3-none-any.whl → 0.5.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kodit might be problematic. Click here for more details.

Files changed (64) hide show

kodit/_version.py +2 -2
kodit/app.py +2 -0
kodit/application/factories/server_factory.py +58 -32
kodit/application/services/code_search_application_service.py +89 -12
kodit/application/services/commit_indexing_application_service.py +527 -195
kodit/application/services/enrichment_query_service.py +311 -43
kodit/application/services/indexing_worker_service.py +1 -1
kodit/application/services/queue_service.py +15 -10
kodit/application/services/sync_scheduler.py +2 -1
kodit/domain/enrichments/architecture/architecture.py +1 -1
kodit/domain/enrichments/architecture/database_schema/__init__.py +1 -0
kodit/domain/enrichments/architecture/database_schema/database_schema.py +17 -0
kodit/domain/enrichments/architecture/physical/physical.py +1 -1
kodit/domain/enrichments/development/development.py +1 -1
kodit/domain/enrichments/development/snippet/snippet.py +12 -5
kodit/domain/enrichments/enrichment.py +31 -4
kodit/domain/enrichments/history/__init__.py +1 -0
kodit/domain/enrichments/history/commit_description/__init__.py +1 -0
kodit/domain/enrichments/history/commit_description/commit_description.py +17 -0
kodit/domain/enrichments/history/history.py +18 -0
kodit/domain/enrichments/usage/api_docs.py +1 -1
kodit/domain/enrichments/usage/usage.py +1 -1
kodit/domain/entities/git.py +30 -25
kodit/domain/factories/git_repo_factory.py +20 -5
kodit/domain/protocols.py +60 -125
kodit/domain/services/embedding_service.py +14 -16
kodit/domain/services/git_repository_service.py +60 -38
kodit/domain/services/git_service.py +18 -11
kodit/domain/tracking/resolution_service.py +6 -16
kodit/domain/value_objects.py +6 -9
kodit/infrastructure/api/v1/dependencies.py +12 -3
kodit/infrastructure/api/v1/query_params.py +27 -0
kodit/infrastructure/api/v1/routers/commits.py +91 -85
kodit/infrastructure/api/v1/routers/repositories.py +53 -37
kodit/infrastructure/api/v1/routers/search.py +1 -1
kodit/infrastructure/api/v1/schemas/enrichment.py +14 -0
kodit/infrastructure/api/v1/schemas/repository.py +1 -1
kodit/infrastructure/cloning/git/git_python_adaptor.py +41 -0
kodit/infrastructure/database_schema/__init__.py +1 -0
kodit/infrastructure/database_schema/database_schema_detector.py +268 -0
kodit/infrastructure/slicing/api_doc_extractor.py +0 -2
kodit/infrastructure/sqlalchemy/embedding_repository.py +44 -34
kodit/infrastructure/sqlalchemy/enrichment_association_repository.py +73 -0
kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +145 -97
kodit/infrastructure/sqlalchemy/entities.py +12 -116
kodit/infrastructure/sqlalchemy/git_branch_repository.py +52 -244
kodit/infrastructure/sqlalchemy/git_commit_repository.py +35 -324
kodit/infrastructure/sqlalchemy/git_file_repository.py +70 -0
kodit/infrastructure/sqlalchemy/git_repository.py +60 -230
kodit/infrastructure/sqlalchemy/git_tag_repository.py +53 -240
kodit/infrastructure/sqlalchemy/query.py +331 -0
kodit/infrastructure/sqlalchemy/repository.py +203 -0
kodit/infrastructure/sqlalchemy/task_repository.py +79 -58
kodit/infrastructure/sqlalchemy/task_status_repository.py +45 -52
kodit/migrations/versions/4b1a3b2c8fa5_refactor_git_tracking.py +190 -0
{kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/METADATA +1 -1
{kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/RECORD +60 -50
kodit/infrastructure/mappers/enrichment_mapper.py +0 -83
kodit/infrastructure/mappers/git_mapper.py +0 -193
kodit/infrastructure/mappers/snippet_mapper.py +0 -104
kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +0 -479
{kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/WHEEL +0 -0
{kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/entry_points.txt +0 -0
{kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/licenses/LICENSE +0 -0

kodit/infrastructure/database_schema/database_schema_detector.py ADDED Viewed

@@ -0,0 +1,268 @@
+"""Database schema detector for discovering database schemas in a repository."""
+import re
+from pathlib import Path
+from typing import ClassVar
+class DatabaseSchemaDetector:
+    """Detects database schemas from various sources in a repository."""
+    # File patterns to look for
+    MIGRATION_PATTERNS: ClassVar[list[str]] = [
+        "**/migrations/**/*.sql",
+        "**/migrations/**/*.py",
+        "**/migrate/**/*.sql",
+        "**/migrate/**/*.go",
+        "**/db/migrate/**/*.rb",
+        "**/alembic/versions/**/*.py",
+        "**/liquibase/**/*.xml",
+        "**/flyway/**/*.sql",
+    ]
+    SQL_FILE_PATTERNS: ClassVar[list[str]] = [
+        "**/*.sql",
+        "**/schema/**/*.sql",
+        "**/schemas/**/*.sql",
+        "**/database/**/*.sql",
+        "**/db/**/*.sql",
+    ]
+    ORM_MODEL_PATTERNS: ClassVar[list[str]] = [
+        "**/models/**/*.py",  # SQLAlchemy, Django
+        "**/models/**/*.go",  # GORM
+        "**/entities/**/*.py",  # SQLAlchemy
+        "**/entities/**/*.ts",  # TypeORM
+        "**/entities/**/*.js",  # TypeORM/Sequelize
+    ]
+    # Regex patterns for schema detection
+    CREATE_TABLE_PATTERN = re.compile(
+        r"CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?[`\"]?(\w+)[`\"]?",
+        re.IGNORECASE,
+    )
+    SQLALCHEMY_MODEL_PATTERN = re.compile(
+        r"class\s+(\w+)\s*\([^)]*(?:Base|Model|db\.Model)[^)]*\):",
+        re.MULTILINE,
+    )
+    GORM_MODEL_PATTERN = re.compile(
+        r"type\s+(\w+)\s+struct\s*{[^}]*gorm\.Model",
+        re.MULTILINE | re.DOTALL,
+    )
+    TYPEORM_ENTITY_PATTERN = re.compile(
+        r"@Entity\([^)]*\)\s*(?:export\s+)?class\s+(\w+)",
+        re.MULTILINE,
+    )
+    async def discover_schemas(self, repo_path: Path) -> str:
+        """Discover database schemas and generate a structured report."""
+        findings: dict[str, set[str] | list[str] | list[dict] | None] = {
+            "tables": set(),
+            "migration_files": [],
+            "sql_files": [],
+            "orm_models": [],
+            "orm_type": None,
+        }
+        # Detect migration files
+        await self._detect_migrations(repo_path, findings)
+        # Detect SQL schema files
+        await self._detect_sql_files(repo_path, findings)
+        # Detect ORM models
+        await self._detect_orm_models(repo_path, findings)
+        # Generate report
+        return self._generate_report(findings)
+    async def _detect_migrations(self, repo_path: Path, findings: dict) -> None:
+        """Detect migration files."""
+        for pattern in self.MIGRATION_PATTERNS:
+            for file_path in repo_path.glob(pattern):
+                if file_path.is_file():
+                    findings["migration_files"].append(str(file_path.relative_to(repo_path)))
+                    # Try to extract table names from migrations
+                    await self._extract_tables_from_file(file_path, findings)
+    async def _detect_sql_files(self, repo_path: Path, findings: dict) -> None:
+        """Detect SQL schema files."""
+        migration_paths = set(findings["migration_files"])
+        for pattern in self.SQL_FILE_PATTERNS:
+            for file_path in repo_path.glob(pattern):
+                if file_path.is_file():
+                    rel_path = str(file_path.relative_to(repo_path))
+                    # Skip if already counted as migration
+                    if rel_path not in migration_paths:
+                        findings["sql_files"].append(rel_path)
+                        await self._extract_tables_from_file(file_path, findings)
+    async def _detect_orm_models(self, repo_path: Path, findings: dict) -> None:
+        """Detect ORM model files."""
+        for pattern in self.ORM_MODEL_PATTERNS:
+            for file_path in repo_path.glob(pattern):
+                if file_path.is_file():
+                    rel_path = str(file_path.relative_to(repo_path))
+                    models = await self._extract_orm_models(file_path)
+                    if models:
+                        findings["orm_models"].append({
+                            "file": rel_path,
+                            "models": models,
+                        })
+                        findings["tables"].update(models)
+    async def _extract_tables_from_file(self, file_path: Path, findings: dict) -> None:
+        """Extract table names from SQL or migration files."""
+        try:
+            content = file_path.read_text(encoding="utf-8", errors="ignore")
+            # Look for CREATE TABLE statements
+            for match in self.CREATE_TABLE_PATTERN.finditer(content):
+                table_name = match.group(1)
+                findings["tables"].add(table_name)
+        except (OSError, UnicodeDecodeError):
+            pass
+    async def _extract_orm_models(self, file_path: Path) -> list[str]:
+        """Extract ORM model names from model files."""
+        models: list[str] = []
+        try:
+            content = file_path.read_text(encoding="utf-8", errors="ignore")
+            suffix = file_path.suffix
+            if suffix == ".py":
+                # SQLAlchemy or Django models
+                models.extend(
+                    match.group(1)
+                    for match in self.SQLALCHEMY_MODEL_PATTERN.finditer(content)
+                )
+            elif suffix == ".go":
+                # GORM models
+                models.extend(
+                    match.group(1)
+                    for match in self.GORM_MODEL_PATTERN.finditer(content)
+                )
+            elif suffix in [".ts", ".js"]:
+                # TypeORM entities
+                models.extend(
+                    match.group(1)
+                    for match in self.TYPEORM_ENTITY_PATTERN.finditer(content)
+                )
+        except (OSError, UnicodeDecodeError):
+            pass
+        return models
+    def _generate_report(self, findings: dict) -> str:  # noqa: PLR0915, C901, PLR0912
+        """Generate a structured report of database schema findings."""
+        lines = []
+        # Summary
+        lines.append("# Database Schema Discovery Report")
+        lines.append("")
+        has_findings = (
+            findings["tables"]
+            or findings["migration_files"]
+            or findings["sql_files"]
+            or findings["orm_models"]
+        )
+        if not has_findings:
+            lines.append("No database schemas detected in this repository.")
+            return "\n".join(lines)
+        # Tables/Entities found
+        if findings["tables"]:
+            lines.append(f"## Detected Tables/Entities ({len(findings['tables'])})")
+            lines.append("")
+            lines.extend(f"- {table}" for table in sorted(findings["tables"]))
+            lines.append("")
+        # Migration files
+        if findings["migration_files"]:
+            lines.append(f"## Migration Files ({len(findings['migration_files'])})")
+            lines.append("")
+            lines.append(
+                "Database migrations detected, suggesting schema evolution over time:"
+            )
+            lines.extend(
+                f"- {mig_file}" for mig_file in findings["migration_files"][:10]
+            )
+            if len(findings["migration_files"]) > 10:
+                lines.append(f"- ... and {len(findings['migration_files']) - 10} more")
+            lines.append("")
+        # SQL files
+        if findings["sql_files"]:
+            lines.append(f"## SQL Schema Files ({len(findings['sql_files'])})")
+            lines.append("")
+            lines.extend(f"- {sql_file}" for sql_file in findings["sql_files"][:10])
+            if len(findings["sql_files"]) > 10:
+                lines.append(f"- ... and {len(findings['sql_files']) - 10} more")
+            lines.append("")
+        # ORM models
+        if findings["orm_models"]:
+            lines.append(f"## ORM Models ({len(findings['orm_models'])} files)")
+            lines.append("")
+            lines.append(
+                "ORM models detected, suggesting object-relational mapping:"
+            )
+            for orm_info in findings["orm_models"][:10]:  # Limit to first 10
+                model_names = ", ".join(orm_info["models"][:5])
+                lines.append(f"- {orm_info['file']}: {model_names}")
+                if len(orm_info["models"]) > 5:
+                    lines.append(f"  (and {len(orm_info['models']) - 5} more models)")
+            if len(findings["orm_models"]) > 10:
+                lines.append(f"- ... and {len(findings['orm_models']) - 10} more files")
+            lines.append("")
+        # Inferred database type
+        lines.append("## Inferred Information")
+        lines.append("")
+        mig_files_str = str(findings.get("migration_files", []))
+        mig_files = findings.get("migration_files", [])
+        if "alembic" in mig_files_str:
+            lines.append("- Migration framework: Alembic (Python/SQLAlchemy)")
+        elif "django" in mig_files_str or any(
+            "migrations" in f and f.endswith(".py") for f in mig_files
+        ):
+            lines.append("- Migration framework: Django Migrations")
+        elif any(".go" in f for f in mig_files):
+            lines.append(
+                "- Migration framework: Go-based migrations (golang-migrate)"
+            )
+        elif "flyway" in mig_files_str:
+            lines.append("- Migration framework: Flyway")
+        elif "liquibase" in mig_files_str:
+            lines.append("- Migration framework: Liquibase")
+        if findings["orm_models"]:
+            orm_models = findings["orm_models"]
+            py_models = sum(1 for m in orm_models if m["file"].endswith(".py"))
+            go_models = sum(1 for m in orm_models if m["file"].endswith(".go"))
+            ts_models = sum(
+                1 for m in orm_models if m["file"].endswith((".ts", ".js"))
+            )
+            if py_models > 0:
+                lines.append("- ORM: Python (likely SQLAlchemy or Django ORM)")
+            if go_models > 0:
+                lines.append("- ORM: Go (likely GORM)")
+            if ts_models > 0:
+                lines.append(
+                    "- ORM: TypeScript/JavaScript (likely TypeORM or Sequelize)"
+                )
+        return "\n".join(lines)

kodit/infrastructure/slicing/api_doc_extractor.py CHANGED Viewed

@@ -39,7 +39,6 @@ class APIDocExtractor:
         self,
         files: list[GitFile],
         language: str,
-        commit_sha: str,
         include_private: bool = False,  # noqa: FBT001, FBT002
     ) -> list[APIDocEnrichment]:
         """Extract API documentation enrichments from files.
@@ -93,7 +92,6 @@ class APIDocExtractor:
         )
         enrichment = APIDocEnrichment(
-            entity_id=commit_sha,
             language=language,
             content=markdown_content,
         )

kodit/infrastructure/sqlalchemy/embedding_repository.py CHANGED Viewed

@@ -1,12 +1,15 @@
 """SQLAlchemy implementation of embedding repository."""
 from collections.abc import Callable
+from typing import Any
 import numpy as np
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 from kodit.infrastructure.sqlalchemy.entities import Embedding, EmbeddingType
+from kodit.infrastructure.sqlalchemy.query import FilterOperator, QueryBuilder
+from kodit.infrastructure.sqlalchemy.repository import SqlAlchemyRepository
 from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
@@ -17,68 +20,75 @@ def create_embedding_repository(
     return SqlAlchemyEmbeddingRepository(session_factory=session_factory)
-class SqlAlchemyEmbeddingRepository:
+class SqlAlchemyEmbeddingRepository(SqlAlchemyRepository[Embedding, Embedding]):
     """SQLAlchemy implementation of embedding repository."""
-    def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
-        """Initialize the SQLAlchemy embedding repository."""
-        self.session_factory = session_factory
+    @property
+    def db_entity_type(self) -> type[Embedding]:
+        """The SQLAlchemy model type."""
+        return Embedding
+    @staticmethod
+    def to_domain(db_entity: Embedding) -> Embedding:
+        """Map database entity to domain entity."""
+        return db_entity
+    @staticmethod
+    def to_db(domain_entity: Embedding) -> Embedding:
+        """Map domain entity to database entity."""
+        return domain_entity
+    def _get_id(self, entity: Embedding) -> Any:
+        """Extract ID from domain entity."""
+        return entity.id
     async def create_embedding(self, embedding: Embedding) -> None:
         """Create a new embedding record in the database."""
-        async with SqlAlchemyUnitOfWork(self.session_factory) as session:
-            session.add(embedding)
+        await self.save(embedding)
     async def get_embedding_by_snippet_id_and_type(
         self, snippet_id: int, embedding_type: EmbeddingType
     ) -> Embedding | None:
         """Get an embedding by its snippet ID and type."""
-        async with SqlAlchemyUnitOfWork(self.session_factory) as session:
-            query = select(Embedding).where(
-                Embedding.snippet_id == snippet_id,
-                Embedding.type == embedding_type,
-            )
-            result = await session.execute(query)
-            return result.scalar_one_or_none()
+        query = (
+            QueryBuilder()
+            .filter("snippet_id", FilterOperator.EQ, snippet_id)
+            .filter("type", FilterOperator.EQ, embedding_type)
+        )
+        results = await self.find(query)
+        return results[0] if results else None
     async def list_embeddings_by_type(
         self, embedding_type: EmbeddingType
     ) -> list[Embedding]:
         """List all embeddings of a given type."""
-        async with SqlAlchemyUnitOfWork(self.session_factory) as session:
-            query = select(Embedding).where(Embedding.type == embedding_type)
-            result = await session.execute(query)
-            return list(result.scalars())
+        query = QueryBuilder().filter("type", FilterOperator.EQ, embedding_type)
+        return await self.find(query)
     async def delete_embeddings_by_snippet_id(self, snippet_id: str) -> None:
         """Delete all embeddings for a snippet."""
-        async with SqlAlchemyUnitOfWork(self.session_factory) as session:
-            query = select(Embedding).where(Embedding.snippet_id == snippet_id)
-            result = await session.execute(query)
-            embeddings = result.scalars().all()
-            for embedding in embeddings:
-                await session.delete(embedding)
+        query = QueryBuilder().filter("snippet_id", FilterOperator.EQ, snippet_id)
+        embeddings = await self.find(query)
+        for embedding in embeddings:
+            await self.delete(embedding)
     async def list_embeddings_by_snippet_ids_and_type(
         self, snippet_ids: list[str], embedding_type: EmbeddingType
     ) -> list[Embedding]:
         """Get all embeddings for the given snippet IDs."""
-        async with SqlAlchemyUnitOfWork(self.session_factory) as session:
-            query = select(Embedding).where(
-                Embedding.snippet_id.in_(snippet_ids),
-                Embedding.type == embedding_type,
-            )
-            result = await session.execute(query)
-            return list(result.scalars())
+        query = (
+            QueryBuilder()
+            .filter("snippet_id", FilterOperator.IN, snippet_ids)
+            .filter("type", FilterOperator.EQ, embedding_type)
+        )
+        return await self.find(query)
     async def get_embeddings_by_snippet_ids(
         self, snippet_ids: list[str]
     ) -> list[Embedding]:
         """Get all embeddings for the given snippet IDs."""
-        async with SqlAlchemyUnitOfWork(self.session_factory) as session:
-            query = select(Embedding).where(Embedding.snippet_id.in_(snippet_ids))
-            result = await session.execute(query)
-            return list(result.scalars())
+        query = QueryBuilder().filter("snippet_id", FilterOperator.IN, snippet_ids)
+        return await self.find(query)
     async def list_semantic_results(
         self,

kodit/infrastructure/sqlalchemy/enrichment_association_repository.py ADDED Viewed

@@ -0,0 +1,73 @@
+"""Enrichment association repository."""
+from collections.abc import Callable
+import structlog
+from sqlalchemy.ext.asyncio import AsyncSession
+from kodit.domain.enrichments.enrichment import (
+    EnrichmentAssociation,
+)
+from kodit.domain.protocols import EnrichmentAssociationRepository
+from kodit.infrastructure.sqlalchemy import entities as db_entities
+from kodit.infrastructure.sqlalchemy.repository import SqlAlchemyRepository
+def create_enrichment_association_repository(
+    session_factory: Callable[[], AsyncSession],
+) -> EnrichmentAssociationRepository:
+    """Create a enrichment association repository."""
+    return SQLAlchemyEnrichmentAssociationRepository(session_factory=session_factory)
+class SQLAlchemyEnrichmentAssociationRepository(
+    SqlAlchemyRepository[EnrichmentAssociation, db_entities.EnrichmentAssociation],
+    EnrichmentAssociationRepository,
+):
+    """Repository for managing enrichment associations."""
+    def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
+        """Initialize the repository."""
+        super().__init__(session_factory=session_factory)
+        self._log = structlog.get_logger(__name__)
+    def _get_id(self, entity: EnrichmentAssociation) -> int | None:
+        """Get the ID of an enrichment association."""
+        return entity.id
+    @property
+    def db_entity_type(self) -> type[db_entities.EnrichmentAssociation]:
+        """The SQLAlchemy model type."""
+        return db_entities.EnrichmentAssociation
+    @staticmethod
+    def to_domain(
+        db_entity: db_entities.EnrichmentAssociation,
+    ) -> EnrichmentAssociation:
+        """Map database entity to domain entity."""
+        return EnrichmentAssociation(
+            enrichment_id=db_entity.enrichment_id,
+            entity_type=db_entity.entity_type,
+            entity_id=db_entity.entity_id,
+            id=db_entity.id,
+        )
+    @staticmethod
+    def to_db(
+        domain_entity: EnrichmentAssociation,
+    ) -> db_entities.EnrichmentAssociation:
+        """Map domain entity to database entity."""
+        from datetime import UTC, datetime
+        now = datetime.now(UTC)
+        db_entity = db_entities.EnrichmentAssociation(
+            enrichment_id=domain_entity.enrichment_id,
+            entity_type=domain_entity.entity_type,
+            entity_id=domain_entity.entity_id,
+        )
+        if domain_entity.id is not None:
+            db_entity.id = domain_entity.id
+        # Always set timestamps since domain entity doesn't track them
+        db_entity.created_at = now
+        db_entity.updated_at = now
+        return db_entity

kodit 0.5.4__py3-none-any.whl → 0.5.6__py3-none-any.whl

Potentially problematic release.

kodit 0.5.4py3-none-any.whl → 0.5.6py3-none-any.whl