PyPI - kodit - Versions diffs - 0.5.5__py3-none-any.whl → 0.5.7__py3-none-any.whl - Mend

kodit 0.5.5py3-none-any.whl → 0.5.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

kodit/_version.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.5.5'
-__version_tuple__ = version_tuple = (0, 5, 5)
+__version__ = version = '0.5.7'
+__version_tuple__ = version_tuple = (0, 5, 7)
 __commit_id__ = commit_id = None

kodit/app.py CHANGED Viewed

@@ -63,6 +63,8 @@ async def app_lifespan(_: FastAPI) -> AsyncIterator[AppLifespanState]:
                 )
             )
         )
+    except StopAsyncIteration:
+        pass
     except Exception as e:
         raise ValueError("Embedding service is not accessible") from e
     try:

kodit/application/factories/server_factory.py CHANGED Viewed

@@ -50,6 +50,9 @@ from kodit.infrastructure.bm25.vectorchord_bm25_repository import (
     VectorChordBM25Repository,
 )
 from kodit.infrastructure.cloning.git.git_python_adaptor import GitPythonAdapter
+from kodit.infrastructure.database_schema.database_schema_detector import (
+    DatabaseSchemaDetector,
+)
 from kodit.infrastructure.embedding.embedding_factory import (
     embedding_domain_service_factory,
 )
@@ -255,6 +258,7 @@ class ServerFactory:
                 text_search_service=self.text_search_service(),
                 embedding_repository=self.embedding_repository(),
                 architecture_service=self.architecture_service(),
+                database_schema_detector=DatabaseSchemaDetector(),
                 enrichment_v2_repository=self.enrichment_v2_repository(),
                 enricher_service=self.enricher(),
                 enrichment_association_repository=self.enrichment_association_repository(),

kodit/application/services/commit_indexing_application_service.py CHANGED Viewed

@@ -14,6 +14,9 @@ if TYPE_CHECKING:
     from kodit.application.services.enrichment_query_service import (
         EnrichmentQueryService,
     )
+from kodit.domain.enrichments.architecture.database_schema.database_schema import (
+    DatabaseSchemaEnrichment,
+)
 from kodit.domain.enrichments.architecture.physical.physical import (
     PhysicalArchitectureEnrichment,
 )
@@ -27,11 +30,20 @@ from kodit.domain.enrichments.enrichment import (
     EnrichmentAssociation,
     EnrichmentV2,
 )
+from kodit.domain.enrichments.history.commit_description.commit_description import (
+    CommitDescriptionEnrichment,
+)
 from kodit.domain.enrichments.request import (
     EnrichmentRequest as GenericEnrichmentRequest,
 )
 from kodit.domain.entities import Task
-from kodit.domain.entities.git import GitFile, GitRepo, SnippetV2, TrackingType
+from kodit.domain.entities.git import (
+    GitCommit,
+    GitFile,
+    GitRepo,
+    SnippetV2,
+    TrackingType,
+)
 from kodit.domain.factories.git_repo_factory import GitRepoFactory
 from kodit.domain.protocols import (
     EnrichmentAssociationRepository,
@@ -63,6 +75,9 @@ from kodit.domain.value_objects import (
     TaskOperation,
     TrackableType,
 )
+from kodit.infrastructure.database_schema.database_schema_detector import (
+    DatabaseSchemaDetector,
+)
 from kodit.infrastructure.slicing.api_doc_extractor import APIDocExtractor
 from kodit.infrastructure.slicing.slicer import Slicer
 from kodit.infrastructure.sqlalchemy import entities as db_entities
@@ -82,6 +97,73 @@ You are a professional software developer. You will be given a snippet of code.
 Please provide a concise explanation of the code.
 """
+COMMIT_DESCRIPTION_SYSTEM_PROMPT = """
+You are a professional software developer. You will be given a git commit diff.
+Please provide a concise description of what changes were made and why.
+"""
+DATABASE_SCHEMA_SYSTEM_PROMPT = """
+You are an expert database architect and documentation specialist.
+Your task is to create clear, visual documentation of database schemas.
+"""
+DATABASE_SCHEMA_TASK_PROMPT = """
+You will be provided with a database schema discovery report.
+Please create comprehensive database schema documentation.
+<schema_report>
+{schema_report}
+</schema_report>
+**Return the following:**
+## Entity List
+For each table/entity, write one line:
+- **[Table Name]**: [brief description of what it stores]
+## Mermaid ERD
+Create a Mermaid Entity Relationship Diagram showing:
+- All entities (tables)
+- Key relationships between entities (if apparent from names or common patterns)
+- Use standard ERD notation
+Example format:
+```mermaid
+erDiagram
+    User ||--o{{ Order : places
+    User {{
+        int id PK
+        string email
+        string name
+    }}
+    Order {{
+        int id PK
+        int user_id FK
+        datetime created_at
+    }}
+```
+If specific field details aren't available, show just the entity boxes and
+relationships.
+## Key Observations
+Answer these questions in 1-2 sentences each:
+1. What is the primary data model pattern (e.g., user-centric,
+   event-sourced, multi-tenant)?
+2. What migration strategy is being used?
+3. Are there any notable database design patterns or concerns?
+## Rules:
+- Be concise and focus on the high-level structure
+- Infer reasonable relationships from table names when explicit information
+  isn't available
+- If no database schema is found, state that clearly
+- Keep entity descriptions to 10 words or less
+"""
 class CommitIndexingApplicationService:
     """Application service for commit indexing operations."""
@@ -103,6 +185,7 @@ class CommitIndexingApplicationService:
         text_search_service: EmbeddingDomainService,
         embedding_repository: SqlAlchemyEmbeddingRepository,
         architecture_service: PhysicalArchitectureService,
+        database_schema_detector: DatabaseSchemaDetector,
         enricher_service: Enricher,
         enrichment_v2_repository: EnrichmentV2Repository,
         enrichment_association_repository: EnrichmentAssociationRepository,
@@ -124,6 +207,7 @@ class CommitIndexingApplicationService:
         self.text_search_service = text_search_service
         self.embedding_repository = embedding_repository
         self.architecture_service = architecture_service
+        self.database_schema_detector = database_schema_detector
         self.enrichment_v2_repository = enrichment_v2_repository
         self.enrichment_association_repository = enrichment_association_repository
         self.enricher_service = enricher_service
@@ -191,11 +275,66 @@ class CommitIndexingApplicationService:
                 await self.process_architecture_discovery(repository_id, commit_sha)
             elif task.type == TaskOperation.CREATE_PUBLIC_API_DOCS_FOR_COMMIT:
                 await self.process_api_docs(repository_id, commit_sha)
+            elif task.type == TaskOperation.CREATE_COMMIT_DESCRIPTION_FOR_COMMIT:
+                await self.process_commit_description(repository_id, commit_sha)
+            elif task.type == TaskOperation.CREATE_DATABASE_SCHEMA_FOR_COMMIT:
+                await self.process_database_schema(repository_id, commit_sha)
             else:
                 raise ValueError(f"Unknown task type: {task.type}")
         else:
             raise ValueError(f"Unknown task type: {task.type}")
+    async def _process_files_in_batches(
+        self, cloned_path: Path, all_commits: list[GitCommit], batch_size: int = 100
+    ) -> int:
+        """Process file metadata for all commits in batches to avoid memory exhaustion.
+        This loads file metadata (paths, sizes, blob SHAs) in batches and saves them
+        incrementally to avoid holding millions of file objects in memory.
+        Args:
+            cloned_path: Path to the cloned repository
+            all_commits: List of all commits from scan
+            batch_size: Number of commits to process at once (default 100)
+        Returns:
+            Total number of files processed
+        """
+        total_files = 0
+        commit_shas = [commit.commit_sha for commit in all_commits]
+        total_batches = (len(commit_shas) + batch_size - 1) // batch_size
+        self._log.info(
+            f"Processing files for {len(commit_shas)} commits "
+            f"in {total_batches} batches"
+        )
+        # Process commits in batches
+        for i in range(0, len(commit_shas), batch_size):
+            batch = commit_shas[i : i + batch_size]
+            batch_num = i // batch_size + 1
+            self._log.debug(
+                f"Processing batch {batch_num}/{total_batches} ({len(batch)} commits)"
+            )
+            # Get file metadata for this batch of commits
+            files = await self.scanner.process_files_for_commits_batch(
+                cloned_path, batch
+            )
+            # Save file metadata to database immediately
+            if files:
+                await self.git_file_repository.save_bulk(files)
+                total_files += len(files)
+                self._log.debug(
+                    f"Batch {batch_num}: Saved {len(files)} files "
+                    f"(total so far: {total_files})"
+                )
+        return total_files
     async def process_clone_repo(self, repository_id: int) -> None:
         """Clone a repository."""
         async with self.operation.create_child(
@@ -233,8 +372,11 @@ class CommitIndexingApplicationService:
             await step.set_current(2, "Saving commits")
             await self.git_commit_repository.save_bulk(scan_result.all_commits)
-            await step.set_current(3, "Saving files")
-            await self.git_file_repository.save_bulk(scan_result.all_files)
+            await step.set_current(3, "Processing and saving files in batches")
+            total_files = await self._process_files_in_batches(
+                repo.cloned_path, scan_result.all_commits
+            )
+            self._log.info(f"Processed and saved {total_files} total files")
             await step.set_current(4, "Saving branches")
             if scan_result.branches:
@@ -798,6 +940,137 @@ class CommitIndexingApplicationService:
                     ]
                 )
+    async def process_commit_description(
+        self, repository_id: int, commit_sha: str
+    ) -> None:
+        """Handle COMMIT_DESCRIPTION task - generate commit descriptions."""
+        async with self.operation.create_child(
+            TaskOperation.CREATE_COMMIT_DESCRIPTION_FOR_COMMIT,
+            trackable_type=TrackableType.KODIT_REPOSITORY,
+            trackable_id=repository_id,
+        ) as step:
+            # Check if commit description already exists for this commit
+            if await self.enrichment_query_service.has_commit_description_for_commit(
+                commit_sha
+            ):
+                await step.skip("Commit description already exists for commit")
+                return
+            # Get repository path
+            repo = await self.repo_repository.get(repository_id)
+            if not repo.cloned_path:
+                raise ValueError(f"Repository {repository_id} has never been cloned")
+            await step.set_total(3)
+            await step.set_current(1, "Getting commit diff")
+            # Get the diff for this commit
+            diff = await self.scanner.git_adapter.get_commit_diff(
+                repo.cloned_path, commit_sha
+            )
+            if not diff or len(diff.strip()) == 0:
+                await step.skip("No diff found for commit")
+                return
+            await step.set_current(2, "Enriching commit description with LLM")
+            # Enrich the diff through the enricher
+            enrichment_request = GenericEnrichmentRequest(
+                id=commit_sha,
+                text=diff,
+                system_prompt=COMMIT_DESCRIPTION_SYSTEM_PROMPT,
+            )
+            enriched_content = ""
+            async for response in self.enricher_service.enrich([enrichment_request]):
+                enriched_content = response.text
+            # Create and save commit description enrichment
+            enrichment = await self.enrichment_v2_repository.save(
+                CommitDescriptionEnrichment(
+                    content=enriched_content,
+                )
+            )
+            if not enrichment or not enrichment.id:
+                raise ValueError(
+                    f"Failed to save commit description enrichment for commit "
+                    f"{commit_sha}"
+                )
+            await self.enrichment_association_repository.save(
+                CommitEnrichmentAssociation(
+                    enrichment_id=enrichment.id,
+                    entity_id=commit_sha,
+                )
+            )
+            await step.set_current(3, "Commit description enrichment completed")
+    async def process_database_schema(
+        self, repository_id: int, commit_sha: str
+    ) -> None:
+        """Handle DATABASE_SCHEMA task - discover and document database schemas."""
+        async with self.operation.create_child(
+            TaskOperation.CREATE_DATABASE_SCHEMA_FOR_COMMIT,
+            trackable_type=TrackableType.KODIT_REPOSITORY,
+            trackable_id=repository_id,
+        ) as step:
+            # Check if database schema already exists for this commit
+            if await self.enrichment_query_service.has_database_schema_for_commit(
+                commit_sha
+            ):
+                await step.skip("Database schema already exists for commit")
+                return
+            # Get repository path
+            repo = await self.repo_repository.get(repository_id)
+            if not repo.cloned_path:
+                raise ValueError(f"Repository {repository_id} has never been cloned")
+            await step.set_total(3)
+            await step.set_current(1, "Discovering database schemas")
+            # Discover database schemas
+            schema_report = await self.database_schema_detector.discover_schemas(
+                repo.cloned_path
+            )
+            if "No database schemas detected" in schema_report:
+                await step.skip("No database schemas found in repository")
+                return
+            await step.set_current(2, "Enriching schema documentation with LLM")
+            # Enrich the schema report through the enricher
+            enrichment_request = GenericEnrichmentRequest(
+                id=commit_sha,
+                text=DATABASE_SCHEMA_TASK_PROMPT.format(schema_report=schema_report),
+                system_prompt=DATABASE_SCHEMA_SYSTEM_PROMPT,
+            )
+            enriched_content = ""
+            async for response in self.enricher_service.enrich([enrichment_request]):
+                enriched_content = response.text
+            # Create and save database schema enrichment
+            enrichment = await self.enrichment_v2_repository.save(
+                DatabaseSchemaEnrichment(
+                    content=enriched_content,
+                )
+            )
+            if not enrichment or not enrichment.id:
+                raise ValueError(
+                    f"Failed to save database schema enrichment for commit {commit_sha}"
+                )
+            await self.enrichment_association_repository.save(
+                CommitEnrichmentAssociation(
+                    enrichment_id=enrichment.id,
+                    entity_id=commit_sha,
+                )
+            )
+            await step.set_current(3, "Database schema enrichment completed")
     async def _new_snippets_for_type(
         self, all_snippets: list[EnrichmentV2], embedding_type: EmbeddingType
     ) -> list[EnrichmentV2]:

kodit/application/services/enrichment_query_service.py CHANGED Viewed

@@ -5,6 +5,9 @@ import structlog
 from kodit.domain.enrichments.architecture.architecture import (
     ENRICHMENT_TYPE_ARCHITECTURE,
 )
+from kodit.domain.enrichments.architecture.database_schema.database_schema import (
+    ENRICHMENT_SUBTYPE_DATABASE_SCHEMA,
+)
 from kodit.domain.enrichments.architecture.physical.physical import (
     ENRICHMENT_SUBTYPE_PHYSICAL,
 )
@@ -14,6 +17,10 @@ from kodit.domain.enrichments.development.snippet.snippet import (
     ENRICHMENT_SUBTYPE_SNIPPET_SUMMARY,
 )
 from kodit.domain.enrichments.enrichment import EnrichmentAssociation, EnrichmentV2
+from kodit.domain.enrichments.history.commit_description.commit_description import (
+    ENRICHMENT_SUBTYPE_COMMIT_DESCRIPTION,
+)
+from kodit.domain.enrichments.history.history import ENRICHMENT_TYPE_HISTORY
 from kodit.domain.enrichments.usage.api_docs import ENRICHMENT_SUBTYPE_API_DOCS
 from kodit.domain.enrichments.usage.usage import ENRICHMENT_TYPE_USAGE
 from kodit.domain.protocols import (
@@ -215,6 +222,36 @@ class EnrichmentQueryService:
         api_docs = await self.get_api_docs_for_commit(commit_sha)
         return len(api_docs) > 0
+    async def get_commit_description_for_commit(
+        self, commit_sha: str
+    ) -> list[EnrichmentV2]:
+        """Get commit description enrichments for a commit."""
+        return await self.get_enrichments_for_commit(
+            commit_sha,
+            enrichment_type=ENRICHMENT_TYPE_HISTORY,
+            enrichment_subtype=ENRICHMENT_SUBTYPE_COMMIT_DESCRIPTION,
+        )
+    async def has_commit_description_for_commit(self, commit_sha: str) -> bool:
+        """Check if a commit has commit description enrichments."""
+        commit_descriptions = await self.get_commit_description_for_commit(commit_sha)
+        return len(commit_descriptions) > 0
+    async def get_database_schema_for_commit(
+        self, commit_sha: str
+    ) -> list[EnrichmentV2]:
+        """Get database schema enrichments for a commit."""
+        return await self.get_enrichments_for_commit(
+            commit_sha,
+            enrichment_type=ENRICHMENT_TYPE_ARCHITECTURE,
+            enrichment_subtype=ENRICHMENT_SUBTYPE_DATABASE_SCHEMA,
+        )
+    async def has_database_schema_for_commit(self, commit_sha: str) -> bool:
+        """Check if a commit has database schema enrichments."""
+        database_schemas = await self.get_database_schema_for_commit(commit_sha)
+        return len(database_schemas) > 0
     async def associations_for_enrichments(
         self, enrichments: list[EnrichmentV2]
     ) -> list[EnrichmentAssociation]:

kodit/domain/enrichments/architecture/database_schema/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Database schema enrichments."""

kodit/domain/enrichments/architecture/database_schema/database_schema.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""Database schema enrichment domain entity."""
+from dataclasses import dataclass
+from kodit.domain.enrichments.architecture.architecture import ArchitectureEnrichment
+ENRICHMENT_SUBTYPE_DATABASE_SCHEMA = "database_schema"
+@dataclass(frozen=True)
+class DatabaseSchemaEnrichment(ArchitectureEnrichment):
+    """Enrichment containing database schema information for a commit."""
+    @property
+    def subtype(self) -> str | None:
+        """Return the enrichment subtype."""
+        return ENRICHMENT_SUBTYPE_DATABASE_SCHEMA

kodit/domain/enrichments/history/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """History enrichments."""

kodit/domain/enrichments/history/commit_description/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Commit description enrichments."""

kodit/domain/enrichments/history/commit_description/commit_description.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""Commit description enrichment domain entity."""
+from dataclasses import dataclass
+from kodit.domain.enrichments.history.history import HistoryEnrichment
+ENRICHMENT_SUBTYPE_COMMIT_DESCRIPTION = "commit_description"
+@dataclass(frozen=True)
+class CommitDescriptionEnrichment(HistoryEnrichment):
+    """Enrichment containing a description of what a commit did."""
+    @property
+    def subtype(self) -> str | None:
+        """Return the enrichment subtype."""
+        return ENRICHMENT_SUBTYPE_COMMIT_DESCRIPTION

kodit/domain/enrichments/history/history.py ADDED Viewed

@@ -0,0 +1,18 @@
+"""History enrichment domain entity."""
+from abc import ABC
+from dataclasses import dataclass
+from kodit.domain.enrichments.enrichment import CommitEnrichment
+ENRICHMENT_TYPE_HISTORY = "history"
+@dataclass(frozen=True)
+class HistoryEnrichment(CommitEnrichment, ABC):
+    """Enrichment containing historical information for a commit."""
+    @property
+    def type(self) -> str:
+        """Return the enrichment type."""
+        return ENRICHMENT_TYPE_HISTORY

kodit/domain/protocols.py CHANGED Viewed

@@ -4,6 +4,8 @@ from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import Any, Protocol, TypeVar
+from git import Repo
 from kodit.domain.enrichments.enrichment import EnrichmentAssociation, EnrichmentV2
 from kodit.domain.entities import (
     Task,
@@ -163,9 +165,16 @@ class GitAdapter(ABC):
     @abstractmethod
     async def get_commit_files(
-        self, local_path: Path, commit_sha: str
+        self, local_path: Path, commit_sha: str, repo: Repo
     ) -> list[dict[str, Any]]:
-        """Get all files in a specific commit from the git tree."""
+        """Get all files in a specific commit from the git tree.
+        Args:
+            local_path: Path to the repository
+            commit_sha: SHA of the commit to get files for
+            repo: Repo object to reuse (avoids creating new Repo per commit)
+        """
     @abstractmethod
     async def get_commit_file_data(
@@ -213,6 +222,10 @@ class GitAdapter(ABC):
     ) -> list[str]:
         """Get only commit SHAs for a branch (much faster than full commit data)."""
+    @abstractmethod
+    async def get_commit_diff(self, local_path: Path, commit_sha: str) -> str:
+        """Get the diff for a specific commit."""
 class SnippetRepositoryV2(ABC):
     """Repository for snippet operations."""

kodit/domain/services/git_repository_service.py CHANGED Viewed

@@ -1,6 +1,5 @@
 """Domain services for Git repository scanning and cloning operations."""
-import asyncio
 import shutil
 from dataclasses import dataclass
 from datetime import UTC, datetime
@@ -66,51 +65,11 @@ class GitRepositoryScanner:
         tags = await self._process_tags(cloned_path, commit_cache, repo_id)
         self._log.info(f"Found {len(tags)} tags")
-        all_files = await self._process_files(cloned_path, commit_cache)
-        self._log.info(f"Found {len(all_files)} files")
+        # Don't load all files into memory - return empty list
+        # Files will be processed in batches by the application service
+        self._log.info("Deferring file processing to avoid memory exhaustion")
-        return self._create_scan_result(branches, commit_cache, tags, all_files)
-    async def _process_commits_concurrently(
-        self,
-        cloned_path: Path,
-        commits_batch: list[tuple[str, dict[str, Any]]],
-    ) -> dict[str, GitCommit]:
-        """Process a batch of commits concurrently."""
-        batch_cache = {}
-        async def process_single_commit(
-            commit_sha: str, commit_data: dict[str, Any]
-        ) -> tuple[str, GitCommit | None]:
-            git_commit = await self._create_git_commit_from_data(
-                cloned_path, commit_data
-            )
-            return commit_sha, git_commit
-        # Process commits concurrently in smaller batches
-        semaphore = asyncio.Semaphore(50)  # Limit concurrent operations
-        async def bounded_process(
-            item: tuple[str, dict[str, Any]],
-        ) -> tuple[str, GitCommit | None]:
-            async with semaphore:
-                return await process_single_commit(item[0], item[1])
-        # Process all commits concurrently
-        results = await asyncio.gather(
-            *[bounded_process(item) for item in commits_batch],
-            return_exceptions=True,
-        )
-        # Collect successful results
-        for result in results:
-            if isinstance(result, tuple):
-                # Type narrowing: result is now tuple[str, GitCommit | None]
-                commit_sha, git_commit = result
-                if git_commit is not None:
-                    batch_cache[commit_sha] = git_commit
-        return batch_cache
+        return self._create_scan_result(branches, commit_cache, tags, [], cloned_path)
     async def _process_branches_bulk(
         self,
@@ -167,30 +126,6 @@ class GitRepositoryScanner:
         return branches, commit_cache
-    async def _create_git_commit_from_data(
-        self, cloned_path: Path, commit_data: dict[str, Any], repo_id: int | None = None
-    ) -> GitCommit | None:
-        """Create GitCommit from pre-fetched commit data."""
-        commit_sha = commit_data["sha"]
-        # Get files for this commit
-        files_data = await self.git_adapter.get_commit_files(cloned_path, commit_sha)
-        self._create_git_files(cloned_path, files_data, commit_sha)
-        author = self._format_author_from_data(commit_data)
-        # Cache datetime creation
-        created_at = datetime.now(UTC)
-        return GitCommit(
-            created_at=created_at,
-            commit_sha=commit_sha,
-            repo_id=repo_id or 0,  # Use 0 as default if not provided
-            date=commit_data["date"],
-            message=commit_data["message"],
-            parent_commit_sha=commit_data["parent_sha"],
-            author=author,
-        )
     def _format_author_from_data(self, commit_data: dict[str, Any]) -> str:
         """Format author string from commit data."""
         author_name = commit_data.get("author_name", "")
@@ -376,17 +311,18 @@ class GitRepositoryScanner:
         branches: list[GitBranch],
         commit_cache: dict[str, GitCommit],
         tags: list[GitTag],
-        all_files: list[GitFile],
+        all_files: list[GitFile],  # noqa: ARG002
+        cloned_path: Path | None = None,  # noqa: ARG002
     ) -> RepositoryScanResult:
         """Create final scan result."""
-        # Files are loaded on-demand for performance, so total_files is 0 during scan
+        # Files list is empty to avoid memory issues - will be processed in batches
         scan_result = RepositoryScanResult(
             branches=branches,
             all_commits=list(commit_cache.values()),
             scan_timestamp=datetime.now(UTC),
-            total_files_across_commits=len(all_files),
+            total_files_across_commits=0,  # Will be updated after batch processing
             all_tags=tags,
-            all_files=all_files,
+            all_files=[],  # Empty - processed in batches to avoid memory exhaustion
         )
         self._log.info(
@@ -395,16 +331,35 @@ class GitRepositoryScanner:
         )
         return scan_result
-    async def _process_files(
-        self, cloned_path: Path, commit_cache: dict[str, GitCommit]
+    async def process_files_for_commits_batch(
+        self, cloned_path: Path, commit_shas: list[str]
     ) -> list[GitFile]:
-        """Process files for a commit."""
+        """Process files for a batch of commits.
+        This allows the application service to process files in batches
+        to avoid loading millions of files into memory at once.
+        CRITICAL: Reuses a single Repo object to avoid creating 32K+ Repo instances
+        which would consume massive memory (1-2 MB each).
+        """
+        from git import Repo
+        # Open repo once and reuse for all commits in this batch
+        repo = Repo(cloned_path)
         files = []
-        for commit_sha in commit_cache:
-            files_data = await self.git_adapter.get_commit_files(
-                cloned_path, commit_sha
-            )
-            files.extend(self._create_git_files(cloned_path, files_data, commit_sha))
+        try:
+            for commit_sha in commit_shas:
+                files_data = await self.git_adapter.get_commit_files(
+                    cloned_path, commit_sha, repo=repo
+                )
+                files.extend(
+                    self._create_git_files(cloned_path, files_data, commit_sha)
+                )
+        finally:
+            # Explicitly close the repo to free resources
+            repo.close()
         return files

kodit/domain/value_objects.py CHANGED Viewed

@@ -614,6 +614,8 @@ class TaskOperation(StrEnum):
         "kodit.commit.create_architecture_enrichment"
     )
     CREATE_PUBLIC_API_DOCS_FOR_COMMIT = "kodit.commit.create_public_api_docs"
+    CREATE_COMMIT_DESCRIPTION_FOR_COMMIT = "kodit.commit.create_commit_description"
+    CREATE_DATABASE_SCHEMA_FOR_COMMIT = "kodit.commit.create_database_schema"
     def is_repository_operation(self) -> bool:
         """Check if the task operation is a repository operation."""
@@ -639,6 +641,8 @@ class PrescribedOperations:
         TaskOperation.CREATE_SUMMARY_EMBEDDINGS_FOR_COMMIT,
         TaskOperation.CREATE_ARCHITECTURE_ENRICHMENT_FOR_COMMIT,
         TaskOperation.CREATE_PUBLIC_API_DOCS_FOR_COMMIT,
+        TaskOperation.CREATE_COMMIT_DESCRIPTION_FOR_COMMIT,
+        TaskOperation.CREATE_DATABASE_SCHEMA_FOR_COMMIT,
     ]
     SYNC_REPOSITORY: ClassVar[list[TaskOperation]] = [
         TaskOperation.SCAN_REPOSITORY,

kodit/infrastructure/cloning/git/git_python_adaptor.py CHANGED Viewed

@@ -346,14 +346,22 @@ class GitPythonAdapter(GitAdapter):
         )
     async def get_commit_files(
-        self, local_path: Path, commit_sha: str
+        self, local_path: Path, commit_sha: str, repo: Repo
     ) -> list[dict[str, Any]]:
-        """Get all files in a specific commit from the git tree."""
+        """Get all files in a specific commit from the git tree.
+        Args:
+            local_path: Path to the repository
+            commit_sha: SHA of the commit to get files for
+            repo: Repo object to reuse (avoids creating new Repo per commit)
+        """
         def _get_files() -> list[dict[str, Any]]:
             try:
-                repo = Repo(local_path)
-                commit = repo.commit(commit_sha)
+                # Use the provided repo object
+                _repo = repo
+                commit = _repo.commit(commit_sha)
                 files = []
@@ -395,7 +403,11 @@ class GitPythonAdapter(GitAdapter):
         """Get file metadata for a commit, with files checked out to disk."""
         await self._checkout_commit(local_path, commit_sha)
         try:
-            return await self.get_commit_files(local_path, commit_sha)
+            repo = Repo(local_path)
+            try:
+                return await self.get_commit_files(local_path, commit_sha, repo)
+            finally:
+                repo.close()
         finally:
             await self.restore_to_branch(local_path, "main")
@@ -532,3 +544,42 @@ class GitPythonAdapter(GitAdapter):
                 raise
         return await asyncio.get_event_loop().run_in_executor(self.executor, _get_tags)
+    async def get_commit_diff(self, local_path: Path, commit_sha: str) -> str:
+        """Get the diff for a specific commit."""
+        def _get_diff() -> str:
+            try:
+                repo = Repo(local_path)
+                commit = repo.commit(commit_sha)
+                # If this is the first commit (no parents), show diff against empty tree
+                if not commit.parents:
+                    diffs = commit.diff(None, create_patch=True)
+                    if not diffs:
+                        return ""
+                    first_diff = diffs[0]
+                    diff_bytes = first_diff.diff
+                    if isinstance(diff_bytes, bytes):
+                        return diff_bytes.decode("utf-8")
+                    return str(diff_bytes) if diff_bytes is not None else ""
+                # For commits with parents, show diff against first parent
+                parent = commit.parents[0]
+                diffs = parent.diff(commit, create_patch=True)
+                # Combine all diffs into a single string
+                diff_text = ""
+                for diff in diffs:
+                    diff_bytes = diff.diff
+                    if diff_bytes and isinstance(diff_bytes, bytes):
+                        diff_text += diff_bytes.decode("utf-8")
+            except Exception as e:
+                self._log.error(
+                    f"Failed to get diff for commit {commit_sha} in {local_path}: {e}"
+                )
+                raise
+            else:
+                return diff_text
+        return await asyncio.get_event_loop().run_in_executor(self.executor, _get_diff)

kodit/infrastructure/database_schema/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Database schema detection infrastructure."""

kodit/infrastructure/database_schema/database_schema_detector.py ADDED Viewed

@@ -0,0 +1,268 @@
+"""Database schema detector for discovering database schemas in a repository."""
+import re
+from pathlib import Path
+from typing import ClassVar
+class DatabaseSchemaDetector:
+    """Detects database schemas from various sources in a repository."""
+    # File patterns to look for
+    MIGRATION_PATTERNS: ClassVar[list[str]] = [
+        "**/migrations/**/*.sql",
+        "**/migrations/**/*.py",
+        "**/migrate/**/*.sql",
+        "**/migrate/**/*.go",
+        "**/db/migrate/**/*.rb",
+        "**/alembic/versions/**/*.py",
+        "**/liquibase/**/*.xml",
+        "**/flyway/**/*.sql",
+    ]
+    SQL_FILE_PATTERNS: ClassVar[list[str]] = [
+        "**/*.sql",
+        "**/schema/**/*.sql",
+        "**/schemas/**/*.sql",
+        "**/database/**/*.sql",
+        "**/db/**/*.sql",
+    ]
+    ORM_MODEL_PATTERNS: ClassVar[list[str]] = [
+        "**/models/**/*.py",  # SQLAlchemy, Django
+        "**/models/**/*.go",  # GORM
+        "**/entities/**/*.py",  # SQLAlchemy
+        "**/entities/**/*.ts",  # TypeORM
+        "**/entities/**/*.js",  # TypeORM/Sequelize
+    ]
+    # Regex patterns for schema detection
+    CREATE_TABLE_PATTERN = re.compile(
+        r"CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?[`\"]?(\w+)[`\"]?",
+        re.IGNORECASE,
+    )
+    SQLALCHEMY_MODEL_PATTERN = re.compile(
+        r"class\s+(\w+)\s*\([^)]*(?:Base|Model|db\.Model)[^)]*\):",
+        re.MULTILINE,
+    )
+    GORM_MODEL_PATTERN = re.compile(
+        r"type\s+(\w+)\s+struct\s*{[^}]*gorm\.Model",
+        re.MULTILINE | re.DOTALL,
+    )
+    TYPEORM_ENTITY_PATTERN = re.compile(
+        r"@Entity\([^)]*\)\s*(?:export\s+)?class\s+(\w+)",
+        re.MULTILINE,
+    )
+    async def discover_schemas(self, repo_path: Path) -> str:
+        """Discover database schemas and generate a structured report."""
+        findings: dict[str, set[str] | list[str] | list[dict] | None] = {
+            "tables": set(),
+            "migration_files": [],
+            "sql_files": [],
+            "orm_models": [],
+            "orm_type": None,
+        }
+        # Detect migration files
+        await self._detect_migrations(repo_path, findings)
+        # Detect SQL schema files
+        await self._detect_sql_files(repo_path, findings)
+        # Detect ORM models
+        await self._detect_orm_models(repo_path, findings)
+        # Generate report
+        return self._generate_report(findings)
+    async def _detect_migrations(self, repo_path: Path, findings: dict) -> None:
+        """Detect migration files."""
+        for pattern in self.MIGRATION_PATTERNS:
+            for file_path in repo_path.glob(pattern):
+                if file_path.is_file():
+                    findings["migration_files"].append(str(file_path.relative_to(repo_path)))
+                    # Try to extract table names from migrations
+                    await self._extract_tables_from_file(file_path, findings)
+    async def _detect_sql_files(self, repo_path: Path, findings: dict) -> None:
+        """Detect SQL schema files."""
+        migration_paths = set(findings["migration_files"])
+        for pattern in self.SQL_FILE_PATTERNS:
+            for file_path in repo_path.glob(pattern):
+                if file_path.is_file():
+                    rel_path = str(file_path.relative_to(repo_path))
+                    # Skip if already counted as migration
+                    if rel_path not in migration_paths:
+                        findings["sql_files"].append(rel_path)
+                        await self._extract_tables_from_file(file_path, findings)
+    async def _detect_orm_models(self, repo_path: Path, findings: dict) -> None:
+        """Detect ORM model files."""
+        for pattern in self.ORM_MODEL_PATTERNS:
+            for file_path in repo_path.glob(pattern):
+                if file_path.is_file():
+                    rel_path = str(file_path.relative_to(repo_path))
+                    models = await self._extract_orm_models(file_path)
+                    if models:
+                        findings["orm_models"].append({
+                            "file": rel_path,
+                            "models": models,
+                        })
+                        findings["tables"].update(models)
+    async def _extract_tables_from_file(self, file_path: Path, findings: dict) -> None:
+        """Extract table names from SQL or migration files."""
+        try:
+            content = file_path.read_text(encoding="utf-8", errors="ignore")
+            # Look for CREATE TABLE statements
+            for match in self.CREATE_TABLE_PATTERN.finditer(content):
+                table_name = match.group(1)
+                findings["tables"].add(table_name)
+        except (OSError, UnicodeDecodeError):
+            pass
+    async def _extract_orm_models(self, file_path: Path) -> list[str]:
+        """Extract ORM model names from model files."""
+        models: list[str] = []
+        try:
+            content = file_path.read_text(encoding="utf-8", errors="ignore")
+            suffix = file_path.suffix
+            if suffix == ".py":
+                # SQLAlchemy or Django models
+                models.extend(
+                    match.group(1)
+                    for match in self.SQLALCHEMY_MODEL_PATTERN.finditer(content)
+                )
+            elif suffix == ".go":
+                # GORM models
+                models.extend(
+                    match.group(1)
+                    for match in self.GORM_MODEL_PATTERN.finditer(content)
+                )
+            elif suffix in [".ts", ".js"]:
+                # TypeORM entities
+                models.extend(
+                    match.group(1)
+                    for match in self.TYPEORM_ENTITY_PATTERN.finditer(content)
+                )
+        except (OSError, UnicodeDecodeError):
+            pass
+        return models
+    def _generate_report(self, findings: dict) -> str:  # noqa: PLR0915, C901, PLR0912
+        """Generate a structured report of database schema findings."""
+        lines = []
+        # Summary
+        lines.append("# Database Schema Discovery Report")
+        lines.append("")
+        has_findings = (
+            findings["tables"]
+            or findings["migration_files"]
+            or findings["sql_files"]
+            or findings["orm_models"]
+        )
+        if not has_findings:
+            lines.append("No database schemas detected in this repository.")
+            return "\n".join(lines)
+        # Tables/Entities found
+        if findings["tables"]:
+            lines.append(f"## Detected Tables/Entities ({len(findings['tables'])})")
+            lines.append("")
+            lines.extend(f"- {table}" for table in sorted(findings["tables"]))
+            lines.append("")
+        # Migration files
+        if findings["migration_files"]:
+            lines.append(f"## Migration Files ({len(findings['migration_files'])})")
+            lines.append("")
+            lines.append(
+                "Database migrations detected, suggesting schema evolution over time:"
+            )
+            lines.extend(
+                f"- {mig_file}" for mig_file in findings["migration_files"][:10]
+            )
+            if len(findings["migration_files"]) > 10:
+                lines.append(f"- ... and {len(findings['migration_files']) - 10} more")
+            lines.append("")
+        # SQL files
+        if findings["sql_files"]:
+            lines.append(f"## SQL Schema Files ({len(findings['sql_files'])})")
+            lines.append("")
+            lines.extend(f"- {sql_file}" for sql_file in findings["sql_files"][:10])
+            if len(findings["sql_files"]) > 10:
+                lines.append(f"- ... and {len(findings['sql_files']) - 10} more")
+            lines.append("")
+        # ORM models
+        if findings["orm_models"]:
+            lines.append(f"## ORM Models ({len(findings['orm_models'])} files)")
+            lines.append("")
+            lines.append(
+                "ORM models detected, suggesting object-relational mapping:"
+            )
+            for orm_info in findings["orm_models"][:10]:  # Limit to first 10
+                model_names = ", ".join(orm_info["models"][:5])
+                lines.append(f"- {orm_info['file']}: {model_names}")
+                if len(orm_info["models"]) > 5:
+                    lines.append(f"  (and {len(orm_info['models']) - 5} more models)")
+            if len(findings["orm_models"]) > 10:
+                lines.append(f"- ... and {len(findings['orm_models']) - 10} more files")
+            lines.append("")
+        # Inferred database type
+        lines.append("## Inferred Information")
+        lines.append("")
+        mig_files_str = str(findings.get("migration_files", []))
+        mig_files = findings.get("migration_files", [])
+        if "alembic" in mig_files_str:
+            lines.append("- Migration framework: Alembic (Python/SQLAlchemy)")
+        elif "django" in mig_files_str or any(
+            "migrations" in f and f.endswith(".py") for f in mig_files
+        ):
+            lines.append("- Migration framework: Django Migrations")
+        elif any(".go" in f for f in mig_files):
+            lines.append(
+                "- Migration framework: Go-based migrations (golang-migrate)"
+            )
+        elif "flyway" in mig_files_str:
+            lines.append("- Migration framework: Flyway")
+        elif "liquibase" in mig_files_str:
+            lines.append("- Migration framework: Liquibase")
+        if findings["orm_models"]:
+            orm_models = findings["orm_models"]
+            py_models = sum(1 for m in orm_models if m["file"].endswith(".py"))
+            go_models = sum(1 for m in orm_models if m["file"].endswith(".go"))
+            ts_models = sum(
+                1 for m in orm_models if m["file"].endswith((".ts", ".js"))
+            )
+            if py_models > 0:
+                lines.append("- ORM: Python (likely SQLAlchemy or Django ORM)")
+            if go_models > 0:
+                lines.append("- ORM: Go (likely GORM)")
+            if ts_models > 0:
+                lines.append(
+                    "- ORM: TypeScript/JavaScript (likely TypeORM or Sequelize)"
+                )
+        return "\n".join(lines)

kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py CHANGED Viewed

@@ -7,6 +7,10 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from kodit.domain.enrichments.architecture.architecture import (
     ENRICHMENT_TYPE_ARCHITECTURE,
 )
+from kodit.domain.enrichments.architecture.database_schema.database_schema import (
+    ENRICHMENT_SUBTYPE_DATABASE_SCHEMA,
+    DatabaseSchemaEnrichment,
+)
 from kodit.domain.enrichments.architecture.physical.physical import (
     ENRICHMENT_SUBTYPE_PHYSICAL,
     PhysicalArchitectureEnrichment,
@@ -19,6 +23,11 @@ from kodit.domain.enrichments.development.snippet.snippet import (
     SnippetEnrichmentSummary,
 )
 from kodit.domain.enrichments.enrichment import EnrichmentV2
+from kodit.domain.enrichments.history.commit_description.commit_description import (
+    ENRICHMENT_SUBTYPE_COMMIT_DESCRIPTION,
+    CommitDescriptionEnrichment,
+)
+from kodit.domain.enrichments.history.history import ENRICHMENT_TYPE_HISTORY
 from kodit.domain.enrichments.usage.api_docs import (
     ENRICHMENT_SUBTYPE_API_DOCS,
     APIDocEnrichment,
@@ -131,6 +140,26 @@ class SQLAlchemyEnrichmentV2Repository(
                 created_at=db_entity.created_at,
                 updated_at=db_entity.updated_at,
             )
+        if (
+            db_entity.type == ENRICHMENT_TYPE_HISTORY
+            and db_entity.subtype == ENRICHMENT_SUBTYPE_COMMIT_DESCRIPTION
+        ):
+            return CommitDescriptionEnrichment(
+                id=db_entity.id,
+                content=db_entity.content,
+                created_at=db_entity.created_at,
+                updated_at=db_entity.updated_at,
+            )
+        if (
+            db_entity.type == ENRICHMENT_TYPE_ARCHITECTURE
+            and db_entity.subtype == ENRICHMENT_SUBTYPE_DATABASE_SCHEMA
+        ):
+            return DatabaseSchemaEnrichment(
+                id=db_entity.id,
+                content=db_entity.content,
+                created_at=db_entity.created_at,
+                updated_at=db_entity.updated_at,
+            )
         raise ValueError(
             f"Unknown enrichment type: {db_entity.type}/{db_entity.subtype}"

{kodit-0.5.5.dist-info → kodit-0.5.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: kodit
-Version: 0.5.5
+Version: 0.5.7
 Summary: Code indexing for better AI code generation
 Project-URL: Homepage, https://docs.helixml.tech/kodit/
 Project-URL: Documentation, https://docs.helixml.tech/kodit/

{kodit-0.5.5.dist-info → kodit-0.5.7.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
 kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
-kodit/_version.py,sha256=0gHg6pqkExJvz1iV3rjNnM6ZmxmZPlhVrGzZVWp6WuA,704
-kodit/app.py,sha256=niIfZiuuDp7mLzrBwQhx_FU7RvKfUALNV5y0o43miss,5802
+kodit/_version.py,sha256=NvV7p6eu_Rli4DWHJnEcpyTUiImNPPDyoDonzzIsNwA,704
+kodit/app.py,sha256=7WxSQcktnpYBmjO1skIjMeBu55rVVRf4lotBEq55pAM,5846
 kodit/cli.py,sha256=QSTXIUDxZo3anIONY-grZi9_VSehWoS8QoVJZyOmWPQ,3086
 kodit/cli_utils.py,sha256=umkvt4kWNapk6db6RGz6bmn7oxgDpsW2Vo09MZ37OGg,2430
 kodit/config.py,sha256=x_67lawaejOenJvl8yMxzXgdIkeWx8Yyc2ISO37GCvc,8031
@@ -13,19 +13,19 @@ kodit/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 kodit/application/__init__.py,sha256=mH50wTpgP9dhbKztFsL8Dda9Hi18TSnMVxXtpp4aGOA,35
 kodit/application/factories/__init__.py,sha256=bU5CvEnaBePZ7JbkCOp1MGTNP752bnU2uEqmfy5FdRk,37
 kodit/application/factories/reporting_factory.py,sha256=3IpRiAw_olM69db-jbDAtjyGtd6Nh5o8jUJX3-rXCA8,1421
-kodit/application/factories/server_factory.py,sha256=Y99haqn_cv9Gci4cC4YRzkfoLUTWtERS9Ghgo5NjGFI,17236
+kodit/application/factories/server_factory.py,sha256=dr0X_zQRUlEybtGZ3NS-kkwTU-K96u2D1Qw5xhWkd88,17409
 kodit/application/services/__init__.py,sha256=p5UQNw-H5sxQvs5Etfte93B3cJ1kKW6DNxK34uFvU1E,38
 kodit/application/services/code_search_application_service.py,sha256=ceyv5TTN-jvlOFOupGa9XwfTTraLNN2GU55kFeulVXY,7763
-kodit/application/services/commit_indexing_application_service.py,sha256=SZEWjgFR1dd1yFsWnVxtOUQ_Dh_AA37cXhIsbrmAvd0,34746
-kodit/application/services/enrichment_query_service.py,sha256=ICAuMY8iw1LlioXXGDPLCcxeL9kPQYMbNg9YNvOpbXk,13362
+kodit/application/services/commit_indexing_application_service.py,sha256=uRYPkVbiqu1V9bORjQu2yoylskLgCz55vYJ1pODjm94,44690
+kodit/application/services/enrichment_query_service.py,sha256=RMVze-DzS5zAki1iC96Kid7tbg-nHSv0z8eqPsiURqc,15002
 kodit/application/services/indexing_worker_service.py,sha256=59cZthlzViOVrAWEoZqUTCfLzxx2OO_FOGdM3pYf9Mc,4065
 kodit/application/services/queue_service.py,sha256=pIHTS8M65FzAhZH5kn54BTiZ43sCbsALYdCFTz9wdqE,2692
 kodit/application/services/reporting.py,sha256=cwe-S-UpSOE6xSAEhoD1hi4hSWk1bW3YRLJ7463fIvM,3518
 kodit/application/services/sync_scheduler.py,sha256=hVT3dlmvfbqXKOV_KU5ZQ5gEKBGPJTlvJcF9gP2ZHQM,2853
 kodit/domain/__init__.py,sha256=TCpg4Xx-oF4mKV91lo4iXqMEfBT1OoRSYnbG-zVWolA,66
 kodit/domain/errors.py,sha256=yIsgCjM_yOFIg8l7l-t7jM8pgeAX4cfPq0owf7iz3DA,106
-kodit/domain/protocols.py,sha256=YQqL1XO2OWqYhPC8W7nyPrqGW3NKafMEJn2HEGXwHIk,7664
-kodit/domain/value_objects.py,sha256=svHQixeLa8fzaJ5NuKl3rBLBVrPfV7VvFd2-U-Vh_Sk,17818
+kodit/domain/protocols.py,sha256=Q6blYD79Tn5LQyNEAioTuPPIdZYXDf46kVpAW2EG2jY,8056
+kodit/domain/value_objects.py,sha256=FW0sTMtcl0Q1qej7vzEg7-Gsv86Z01IbPrDdudsgU3g,18097
 kodit/domain/enrichments/__init__.py,sha256=UpQMnMEHqaK3u3K-eJZOC28kfBPHALLAjFMdyYBXSPE,33
 kodit/domain/enrichments/enricher.py,sha256=jnZ5X9RmZA8Acy-RBS2TbEoBg9QSm8AgleqwS9h5WlY,512
 kodit/domain/enrichments/enrichment.py,sha256=_4lAOFibvSRN-01HB7it61k38IGQsub0gVERqRrhWPs,1776
@@ -33,6 +33,8 @@ kodit/domain/enrichments/request.py,sha256=6zBQhliDcdw8vS4sYPG2mqZSDSbQ5VzY1YQ-4
 kodit/domain/enrichments/response.py,sha256=NzoMAKgs7c2yo9vvgWjQDo1yO0koKHbbY_SrsqsalAk,205
 kodit/domain/enrichments/architecture/__init__.py,sha256=hBSliXMuixUZKtF-_zvcgQjnqrdyc4_SjYG2PTRFYpg,39
 kodit/domain/enrichments/architecture/architecture.py,sha256=_3nF9qdBdcA8rTXPkb1KO2F7OXTcH9SajqNYB-ICaZA,507
+kodit/domain/enrichments/architecture/database_schema/__init__.py,sha256=xS5UGMfHqOXjpr4ZZQup3IUtevJxBDXMH4fO7wuH5vM,35
+kodit/domain/enrichments/architecture/database_schema/database_schema.py,sha256=OOiG5SrN4Jyw_L6_-UVg4DaGWNo5JLiii7e21ZLtDvY,529
 kodit/domain/enrichments/architecture/physical/__init__.py,sha256=4jc89cGxALWo8d3Xzfb5t-YjcCyDb1dDVGwTqVYBFmc,48
 kodit/domain/enrichments/architecture/physical/discovery_notes.py,sha256=Wdv41rkUcMgRqXWB5Q9roaGMGFznH4V_I7mELUvDShw,636
 kodit/domain/enrichments/architecture/physical/formatter.py,sha256=V_JvHsGDPPJ-TqGS-G61P3OS3xe0QpS2NLBEk5jX6Yc,351
@@ -41,6 +43,10 @@ kodit/domain/enrichments/development/__init__.py,sha256=ls7zlKUpSpyLZRl-WTuaow9C
 kodit/domain/enrichments/development/development.py,sha256=amzcheLEtXbOyhhmjlay_yt1Z2FRyW2CrR8wZWkpC0g,483
 kodit/domain/enrichments/development/snippet/__init__.py,sha256=M5XVnlDgfqSE5UiAqkQwE1Mbr5Rg8zQpcspHKC3k_xU,34
 kodit/domain/enrichments/development/snippet/snippet.py,sha256=A1f385Bu3_ZBaDKQrGHZMb6GIiQoo-hORFSw2ca56yQ,791
+kodit/domain/enrichments/history/__init__.py,sha256=OXS0MOFEjD76rBOmLl8yA2L3Q8NYebBkoGhAmgbO2O0,27
+kodit/domain/enrichments/history/history.py,sha256=pdmkU2ZZGFBsZDQ7kKo1hj-GaVKUd0v4Q2Fu15WE2A8,464
+kodit/domain/enrichments/history/commit_description/__init__.py,sha256=j0fVMIkao9RzkLa6JakBPP40KrELl1eb-dfOLvfADMQ,38
+kodit/domain/enrichments/history/commit_description/commit_description.py,sha256=96yKz-YsyWPfUu7zFtnT9AhRe7DjLmky9z0jy7oreFo,518
 kodit/domain/enrichments/usage/__init__.py,sha256=7W36rvCF6DH-VqW2RiqU6GMlkYYHZy9Wm0DL_3_fbRc,40
 kodit/domain/enrichments/usage/api_docs.py,sha256=5cvkNXUfAWDb0HJGIViAzIEZDGEnBnWYhkacs4lHCYA,470
 kodit/domain/enrichments/usage/usage.py,sha256=U_JrxwXWlFtOzCP7fbfMd-NH75W44MwVFliONMzYB4U,453
@@ -51,7 +57,7 @@ kodit/domain/factories/git_repo_factory.py,sha256=EdeQo4HsBi2hVeVvnSnYtFdR3yGVZQ
 kodit/domain/services/__init__.py,sha256=Q1GhCK_PqKHYwYE4tkwDz5BIyXkJngLBBOHhzvX8nzo,42
 kodit/domain/services/bm25_service.py,sha256=-E5k0td2Ucs25qygWkJlY0fl7ZckOUe5xZnKYff3hF8,3631
 kodit/domain/services/embedding_service.py,sha256=CEcQ2E9XvOcjKNCJEw5soYUNMHJ5LCJGyXzPCl75CPc,4812
-kodit/domain/services/git_repository_service.py,sha256=suIBmiBG9OcXUFrw1uiYRidS9yvFEekZU8H-tsY0zs0,16545
+kodit/domain/services/git_repository_service.py,sha256=KtwYF3XKBeNbAHbi-sEdMJ-1jGRy7rmWMZkPpCrh9fw,14980
 kodit/domain/services/git_service.py,sha256=Lr7kPnnBEa_fWfGA9jpffMK7wcfxQ0wfXgynsbSKSzg,11661
 kodit/domain/services/physical_architecture_service.py,sha256=0YgoAvbUxT_VwgIh_prftSYnil_XIqNPSoP0g37eIt4,7209
 kodit/domain/services/task_status_query_service.py,sha256=rI93pTMHeycigQryCWkimXSDzRqx_nJOr07UzPAacPE,736
@@ -89,8 +95,10 @@ kodit/infrastructure/bm25/__init__.py,sha256=DmGbrEO34FOJy4e685BbyxLA7gPW1eqs2gA
 kodit/infrastructure/bm25/local_bm25_repository.py,sha256=YE3pUkPS5n1JNu6oSM_HRBOXM8U04HiY8dMMZCf9CMQ,5197
 kodit/infrastructure/bm25/vectorchord_bm25_repository.py,sha256=LjbUPj4nPMb9pdEudThUbZTmQjhxvpN314EzKGpXfi0,8621
 kodit/infrastructure/cloning/git/__init__.py,sha256=20ePcp0qE6BuLsjsv4KYB1DzKhMIMsPXwEqIEZtjTJs,34
-kodit/infrastructure/cloning/git/git_python_adaptor.py,sha256=X0cyoBz3AWeY4lEmAyAqD4i3bXhSBh0ggKng1ERoswI,19944
+kodit/infrastructure/cloning/git/git_python_adaptor.py,sha256=kiiXrjSqdSYT_c_migWff1WEVlJT8JRlgo5m_9T4rrM,21942
 kodit/infrastructure/cloning/git/working_copy.py,sha256=sPKQN-A1gDVV_QJISNNP4PqxRWxyj5owv5tvWfXMl44,3909
+kodit/infrastructure/database_schema/__init__.py,sha256=jgejYX70fjV69zCuOBiNw3oCQlCKYzxTkjnUUUU7DY0,48
+kodit/infrastructure/database_schema/database_schema_detector.py,sha256=zXU7HqrZU4_EYckloKDbH0gZvZ3_TJG5-Bd5PAkEkXc,10167
 kodit/infrastructure/embedding/__init__.py,sha256=F-8nLlWAerYJ0MOIA4tbXHLan8bW5rRR84vzxx6tRKI,39
 kodit/infrastructure/embedding/embedding_factory.py,sha256=6nP8HKKlNWmDE8ATT5tNQHgPqeTDUMpRuWwn2rsfrOQ,3446
 kodit/infrastructure/embedding/local_vector_search_repository.py,sha256=urccvadIF-uizmYuzK7ii7hl2HaV7swHCiS8P6n7U18,3507
@@ -134,7 +142,7 @@ kodit/infrastructure/slicing/slicer.py,sha256=EDYkoLf6RsTVloudZUq6LS5X10JJAHWcKW
 kodit/infrastructure/sqlalchemy/__init__.py,sha256=UXPMSF_hgWaqr86cawRVqM8XdVNumQyyK5B8B97GnlA,33
 kodit/infrastructure/sqlalchemy/embedding_repository.py,sha256=OhSIuNEQ725WoxaIpK3jcZvUVPW-b95HKRXr1HjurmI,8824
 kodit/infrastructure/sqlalchemy/enrichment_association_repository.py,sha256=mjlGH4vkIv1cPfhkZ4SUyGWpMbgeS7QljsK54yQvV4g,2615
-kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py,sha256=9Yiv8I86qOD3YiNcucs6686JtY_8DOQFpEFJmx1_8HM,5177
+kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py,sha256=9C7y6aRzHHkKJYTTAFxU5p0kwJVBjhqHvs-wUZDWsmk,6350
 kodit/infrastructure/sqlalchemy/entities.py,sha256=kvZqUPCN2TNgovdNAT_0h4Y8zrgFWwkk-OecvcHIz-A,14852
 kodit/infrastructure/sqlalchemy/git_branch_repository.py,sha256=dW9kBr8aDBXXVmw1zEux2mueiKhTcpG0JxnLuz5yZ3w,3106
 kodit/infrastructure/sqlalchemy/git_commit_repository.py,sha256=jzYpFV1gjI-Wfgai-hxesglYn6XD384mqIorV1AtNCA,1991
@@ -169,8 +177,8 @@ kodit/utils/dump_config.py,sha256=dd5uPgqh6ATk02Zt59t2JFKR9X17YWjHudV0nE8VktE,11
 kodit/utils/dump_openapi.py,sha256=EasYOnnpeabwb_sTKQUBrrOLHjPcOFQ7Zx0YKpx9fmM,1239
 kodit/utils/generate_api_paths.py,sha256=TMtx9v55podDfUmiWaHgJHLtEWLV2sLL-5ejGFMPzAo,3569
 kodit/utils/path_utils.py,sha256=UB_81rx7Y1G1jalVv2PX8miwaprBbcqEdtoQ3hPT3kU,2451
-kodit-0.5.5.dist-info/METADATA,sha256=KprUWaNcvHEDmwR0zjo59ln_jUNOC_d6-AokObNZbk4,7703
-kodit-0.5.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-kodit-0.5.5.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
-kodit-0.5.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-kodit-0.5.5.dist-info/RECORD,,
+kodit-0.5.7.dist-info/METADATA,sha256=Yi8IGWrrk1FLgnC5GiqmBc8V3bJcWz8Fl29-nM8CkcE,7703
+kodit-0.5.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+kodit-0.5.7.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
+kodit-0.5.7.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+kodit-0.5.7.dist-info/RECORD,,

{kodit-0.5.5.dist-info → kodit-0.5.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{kodit-0.5.5.dist-info → kodit-0.5.7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{kodit-0.5.5.dist-info → kodit-0.5.7.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

kodit 0.5.5__py3-none-any.whl → 0.5.7__py3-none-any.whl

kodit 0.5.5py3-none-any.whl → 0.5.7py3-none-any.whl