kodit 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (100) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +59 -24
  3. kodit/application/factories/reporting_factory.py +16 -7
  4. kodit/application/factories/server_factory.py +311 -0
  5. kodit/application/services/code_search_application_service.py +144 -0
  6. kodit/application/services/commit_indexing_application_service.py +543 -0
  7. kodit/application/services/indexing_worker_service.py +13 -46
  8. kodit/application/services/queue_service.py +24 -3
  9. kodit/application/services/reporting.py +70 -54
  10. kodit/application/services/sync_scheduler.py +15 -31
  11. kodit/cli.py +2 -763
  12. kodit/cli_utils.py +2 -9
  13. kodit/config.py +3 -96
  14. kodit/database.py +38 -1
  15. kodit/domain/entities/__init__.py +276 -0
  16. kodit/domain/entities/git.py +190 -0
  17. kodit/domain/factories/__init__.py +1 -0
  18. kodit/domain/factories/git_repo_factory.py +76 -0
  19. kodit/domain/protocols.py +270 -46
  20. kodit/domain/services/bm25_service.py +5 -1
  21. kodit/domain/services/embedding_service.py +3 -0
  22. kodit/domain/services/git_repository_service.py +429 -0
  23. kodit/domain/services/git_service.py +300 -0
  24. kodit/domain/services/task_status_query_service.py +19 -0
  25. kodit/domain/value_objects.py +113 -147
  26. kodit/infrastructure/api/client/__init__.py +0 -2
  27. kodit/infrastructure/api/v1/__init__.py +0 -4
  28. kodit/infrastructure/api/v1/dependencies.py +105 -44
  29. kodit/infrastructure/api/v1/routers/__init__.py +0 -6
  30. kodit/infrastructure/api/v1/routers/commits.py +271 -0
  31. kodit/infrastructure/api/v1/routers/queue.py +2 -2
  32. kodit/infrastructure/api/v1/routers/repositories.py +282 -0
  33. kodit/infrastructure/api/v1/routers/search.py +31 -14
  34. kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
  35. kodit/infrastructure/api/v1/schemas/commit.py +96 -0
  36. kodit/infrastructure/api/v1/schemas/context.py +2 -0
  37. kodit/infrastructure/api/v1/schemas/repository.py +128 -0
  38. kodit/infrastructure/api/v1/schemas/search.py +12 -9
  39. kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
  40. kodit/infrastructure/api/v1/schemas/tag.py +31 -0
  41. kodit/infrastructure/api/v1/schemas/task_status.py +41 -0
  42. kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
  43. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
  44. kodit/infrastructure/cloning/git/git_python_adaptor.py +467 -0
  45. kodit/infrastructure/cloning/git/working_copy.py +10 -3
  46. kodit/infrastructure/embedding/embedding_factory.py +3 -2
  47. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  48. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
  49. kodit/infrastructure/enrichment/litellm_enrichment_provider.py +19 -26
  50. kodit/infrastructure/enrichment/local_enrichment_provider.py +41 -30
  51. kodit/infrastructure/indexing/fusion_service.py +1 -1
  52. kodit/infrastructure/mappers/git_mapper.py +193 -0
  53. kodit/infrastructure/mappers/snippet_mapper.py +106 -0
  54. kodit/infrastructure/mappers/task_mapper.py +5 -44
  55. kodit/infrastructure/mappers/task_status_mapper.py +85 -0
  56. kodit/infrastructure/reporting/db_progress.py +23 -0
  57. kodit/infrastructure/reporting/log_progress.py +13 -38
  58. kodit/infrastructure/reporting/telemetry_progress.py +21 -0
  59. kodit/infrastructure/slicing/slicer.py +32 -31
  60. kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
  61. kodit/infrastructure/sqlalchemy/entities.py +428 -131
  62. kodit/infrastructure/sqlalchemy/git_branch_repository.py +263 -0
  63. kodit/infrastructure/sqlalchemy/git_commit_repository.py +337 -0
  64. kodit/infrastructure/sqlalchemy/git_repository.py +252 -0
  65. kodit/infrastructure/sqlalchemy/git_tag_repository.py +257 -0
  66. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +484 -0
  67. kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
  68. kodit/infrastructure/sqlalchemy/task_status_repository.py +91 -0
  69. kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
  70. kodit/mcp.py +12 -26
  71. kodit/migrations/env.py +1 -1
  72. kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
  73. kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
  74. kodit/migrations/versions/b9cd1c3fd762_add_task_status.py +77 -0
  75. kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
  76. kodit/py.typed +0 -0
  77. kodit/utils/dump_openapi.py +7 -4
  78. kodit/utils/path_utils.py +29 -0
  79. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/METADATA +3 -3
  80. kodit-0.5.0.dist-info/RECORD +137 -0
  81. kodit/application/factories/code_indexing_factory.py +0 -193
  82. kodit/application/services/auto_indexing_service.py +0 -103
  83. kodit/application/services/code_indexing_application_service.py +0 -393
  84. kodit/domain/entities.py +0 -323
  85. kodit/domain/services/index_query_service.py +0 -70
  86. kodit/domain/services/index_service.py +0 -267
  87. kodit/infrastructure/api/client/index_client.py +0 -57
  88. kodit/infrastructure/api/v1/routers/indexes.py +0 -119
  89. kodit/infrastructure/api/v1/schemas/index.py +0 -101
  90. kodit/infrastructure/bm25/bm25_factory.py +0 -28
  91. kodit/infrastructure/cloning/__init__.py +0 -1
  92. kodit/infrastructure/cloning/metadata.py +0 -98
  93. kodit/infrastructure/mappers/index_mapper.py +0 -345
  94. kodit/infrastructure/reporting/tdqm_progress.py +0 -73
  95. kodit/infrastructure/slicing/language_detection_service.py +0 -18
  96. kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
  97. kodit-0.4.2.dist-info/RECORD +0 -119
  98. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/WHEEL +0 -0
  99. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/entry_points.txt +0 -0
  100. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,57 +0,0 @@
1
- """Index operations API client for Kodit server."""
2
-
3
- from kodit.infrastructure.api.v1.schemas.index import (
4
- IndexCreateAttributes,
5
- IndexCreateData,
6
- IndexCreateRequest,
7
- IndexData,
8
- IndexListResponse,
9
- IndexResponse,
10
- )
11
-
12
- from .base import BaseAPIClient
13
- from .exceptions import KoditAPIError
14
- from .generated_endpoints import APIEndpoints
15
-
16
-
17
- class IndexClient(BaseAPIClient):
18
- """API client for index operations."""
19
-
20
- async def list_indexes(self) -> list[IndexData]:
21
- """List all indexes."""
22
- response = await self._request("GET", APIEndpoints.API_V1_INDEXES)
23
- data = IndexListResponse.model_validate_json(response.text)
24
- return data.data
25
-
26
- async def create_index(self, uri: str) -> IndexData:
27
- """Create a new index."""
28
- request = IndexCreateRequest(
29
- data=IndexCreateData(
30
- type="index", attributes=IndexCreateAttributes(uri=uri)
31
- )
32
- )
33
- response = await self._request(
34
- "POST", APIEndpoints.API_V1_INDEXES, json=request.model_dump()
35
- )
36
- result = IndexResponse.model_validate_json(response.text)
37
- return result.data
38
-
39
- async def get_index(self, index_id: str) -> IndexData | None:
40
- """Get index by ID."""
41
- try:
42
- response = await self._request(
43
- "GET", APIEndpoints.API_V1_INDEXES_INDEX_ID.format(index_id=index_id)
44
- )
45
- result = IndexResponse.model_validate_json(response.text)
46
- except KoditAPIError as e:
47
- if "404" in str(e):
48
- return None
49
- raise
50
- else:
51
- return result.data
52
-
53
- async def delete_index(self, index_id: str) -> None:
54
- """Delete an index."""
55
- await self._request(
56
- "DELETE", APIEndpoints.API_V1_INDEXES_INDEX_ID.format(index_id=index_id)
57
- )
@@ -1,119 +0,0 @@
1
- """Index management router for the REST API."""
2
-
3
- from fastapi import APIRouter, Depends, HTTPException
4
-
5
- from kodit.domain.entities import Task
6
- from kodit.domain.value_objects import QueuePriority
7
- from kodit.infrastructure.api.middleware.auth import api_key_auth
8
- from kodit.infrastructure.api.v1.dependencies import (
9
- IndexingAppServiceDep,
10
- IndexQueryServiceDep,
11
- QueueServiceDep,
12
- )
13
- from kodit.infrastructure.api.v1.schemas.index import (
14
- IndexAttributes,
15
- IndexCreateRequest,
16
- IndexData,
17
- IndexDetailResponse,
18
- IndexListResponse,
19
- IndexResponse,
20
- )
21
-
22
- router = APIRouter(
23
- prefix="/api/v1/indexes",
24
- tags=["indexes"],
25
- dependencies=[Depends(api_key_auth)],
26
- responses={
27
- 401: {"description": "Unauthorized"},
28
- 422: {"description": "Invalid request"},
29
- },
30
- )
31
-
32
-
33
- @router.get("")
34
- async def list_indexes(
35
- query_service: IndexQueryServiceDep,
36
- ) -> IndexListResponse:
37
- """List all indexes."""
38
- indexes = await query_service.list_indexes()
39
- return IndexListResponse(
40
- data=[
41
- IndexData(
42
- type="index",
43
- id=str(idx.id),
44
- attributes=IndexAttributes(
45
- created_at=idx.created_at,
46
- updated_at=idx.updated_at,
47
- uri=str(idx.source.working_copy.remote_uri),
48
- ),
49
- )
50
- for idx in indexes
51
- ]
52
- )
53
-
54
-
55
- @router.post("", status_code=202)
56
- async def create_index(
57
- request: IndexCreateRequest,
58
- app_service: IndexingAppServiceDep,
59
- queue_service: QueueServiceDep,
60
- ) -> IndexResponse:
61
- """Create a new index and start async indexing."""
62
- # Create index using the application service
63
- index = await app_service.create_index_from_uri(request.data.attributes.uri)
64
-
65
- # Add the indexing task to the queue
66
- await queue_service.enqueue_task(
67
- Task.create_index_update_task(index.id, QueuePriority.USER_INITIATED)
68
- )
69
-
70
- return IndexResponse(
71
- data=IndexData(
72
- type="index",
73
- id=str(index.id),
74
- attributes=IndexAttributes(
75
- created_at=index.created_at,
76
- updated_at=index.updated_at,
77
- uri=str(index.source.working_copy.remote_uri),
78
- ),
79
- )
80
- )
81
-
82
-
83
- @router.get("/{index_id}", responses={404: {"description": "Index not found"}})
84
- async def get_index(
85
- index_id: int,
86
- query_service: IndexQueryServiceDep,
87
- ) -> IndexDetailResponse:
88
- """Get index details."""
89
- index = await query_service.get_index_by_id(index_id)
90
- if not index:
91
- raise HTTPException(status_code=404, detail="Index not found")
92
-
93
- return IndexDetailResponse(
94
- data=IndexData(
95
- type="index",
96
- id=str(index.id),
97
- attributes=IndexAttributes(
98
- created_at=index.created_at,
99
- updated_at=index.updated_at,
100
- uri=str(index.source.working_copy.remote_uri),
101
- ),
102
- ),
103
- )
104
-
105
-
106
- @router.delete(
107
- "/{index_id}", status_code=204, responses={404: {"description": "Index not found"}}
108
- )
109
- async def delete_index(
110
- index_id: int,
111
- query_service: IndexQueryServiceDep,
112
- app_service: IndexingAppServiceDep,
113
- ) -> None:
114
- """Delete an index."""
115
- index = await query_service.get_index_by_id(index_id)
116
- if not index:
117
- raise HTTPException(status_code=404, detail="Index not found")
118
-
119
- await app_service.delete_index(index)
@@ -1,101 +0,0 @@
1
- """JSON:API schemas for index operations."""
2
-
3
- from datetime import datetime
4
-
5
- from pydantic import BaseModel, Field
6
-
7
-
8
- class IndexAttributes(BaseModel):
9
- """Index attributes for JSON:API responses."""
10
-
11
- created_at: datetime
12
- updated_at: datetime
13
- uri: str
14
-
15
-
16
- class SnippetData(BaseModel):
17
- """Snippet data for JSON:API relationships."""
18
-
19
- type: str = "snippet"
20
- id: str
21
-
22
-
23
- class IndexData(BaseModel):
24
- """Index data for JSON:API responses."""
25
-
26
- type: str = "index"
27
- id: str
28
- attributes: IndexAttributes
29
-
30
-
31
- class IndexResponse(BaseModel):
32
- """JSON:API response for single index."""
33
-
34
- data: IndexData
35
-
36
-
37
- class IndexListResponse(BaseModel):
38
- """JSON:API response for index list."""
39
-
40
- data: list[IndexData]
41
-
42
-
43
- class IndexCreateAttributes(BaseModel):
44
- """Attributes for creating an index."""
45
-
46
- uri: str = Field(..., description="URI of the source to index")
47
-
48
-
49
- class IndexCreateData(BaseModel):
50
- """Data for creating an index."""
51
-
52
- type: str = "index"
53
- attributes: IndexCreateAttributes
54
-
55
-
56
- class IndexCreateRequest(BaseModel):
57
- """JSON:API request for creating an index."""
58
-
59
- data: IndexCreateData
60
-
61
-
62
- class AuthorData(BaseModel):
63
- """Author data for JSON:API relationships."""
64
-
65
- type: str = "author"
66
- id: str
67
-
68
-
69
- class AuthorsRelationship(BaseModel):
70
- """Authors relationship for JSON:API."""
71
-
72
- data: list[AuthorData]
73
-
74
-
75
- class FileRelationships(BaseModel):
76
- """File relationships for JSON:API."""
77
-
78
- authors: AuthorsRelationship
79
-
80
-
81
- class FileAttributes(BaseModel):
82
- """File attributes for JSON:API included resources."""
83
-
84
- uri: str
85
- sha256: str
86
- mime_type: str
87
- created_at: datetime
88
- updated_at: datetime
89
-
90
-
91
- class AuthorAttributes(BaseModel):
92
- """Author attributes for JSON:API included resources."""
93
-
94
- name: str
95
- email: str
96
-
97
-
98
- class IndexDetailResponse(BaseModel):
99
- """JSON:API response for index details with included resources."""
100
-
101
- data: IndexData
@@ -1,28 +0,0 @@
1
- """Factory for creating BM25 repositories."""
2
-
3
- from sqlalchemy.ext.asyncio import AsyncSession
4
-
5
- from kodit.config import AppContext
6
- from kodit.domain.services.bm25_service import BM25Repository
7
- from kodit.infrastructure.bm25.local_bm25_repository import LocalBM25Repository
8
- from kodit.infrastructure.bm25.vectorchord_bm25_repository import (
9
- VectorChordBM25Repository,
10
- )
11
-
12
-
13
- def bm25_repository_factory(
14
- app_context: AppContext, session: AsyncSession
15
- ) -> BM25Repository:
16
- """Create a BM25 repository based on configuration.
17
-
18
- Args:
19
- app_context: Application configuration context
20
- session: SQLAlchemy async session
21
-
22
- Returns:
23
- BM25Repository instance
24
-
25
- """
26
- if app_context.default_search.provider == "vectorchord":
27
- return VectorChordBM25Repository(session=session)
28
- return LocalBM25Repository(data_dir=app_context.get_data_dir())
@@ -1 +0,0 @@
1
- """Cloning infrastructure."""
@@ -1,98 +0,0 @@
1
- """Metadata extraction for cloned sources."""
2
-
3
- import mimetypes
4
- from datetime import UTC, datetime
5
- from hashlib import sha256
6
- from pathlib import Path
7
-
8
- import aiofiles
9
- import git
10
- from pydantic import AnyUrl
11
-
12
- from kodit.domain.entities import Author, File
13
- from kodit.domain.value_objects import FileProcessingStatus, SourceType
14
-
15
-
16
- class FileMetadataExtractor:
17
- """File metadata extractor."""
18
-
19
- def __init__(self, source_type: SourceType) -> None:
20
- """Initialize the extractor."""
21
- self.source_type = source_type
22
-
23
- async def extract(self, file_path: Path) -> File:
24
- """Extract metadata from a file."""
25
- if self.source_type == SourceType.GIT:
26
- created_at, updated_at = await self._get_git_timestamps(file_path)
27
- else:
28
- created_at, updated_at = await self._get_file_system_timestamps(file_path)
29
-
30
- # Read file content and calculate metadata
31
- async with aiofiles.open(file_path, "rb") as f:
32
- content = await f.read()
33
- mime_type = mimetypes.guess_type(file_path)
34
- sha = sha256(content).hexdigest()
35
- if self.source_type == SourceType.GIT:
36
- authors = await self._extract_git_authors(file_path)
37
- else:
38
- authors = []
39
-
40
- return File(
41
- created_at=created_at,
42
- updated_at=updated_at,
43
- uri=AnyUrl(file_path.resolve().absolute().as_uri()),
44
- mime_type=mime_type[0]
45
- if mime_type and mime_type[0]
46
- else "application/octet-stream",
47
- sha256=sha,
48
- authors=authors,
49
- file_processing_status=FileProcessingStatus.ADDED,
50
- )
51
-
52
- async def _get_git_timestamps(self, file_path: Path) -> tuple[datetime, datetime]:
53
- """Get timestamps from Git history."""
54
- git_repo = git.Repo(file_path.parent, search_parent_directories=True)
55
- commits = list(git_repo.iter_commits(paths=str(file_path), all=True))
56
-
57
- if commits:
58
- last_modified_at = commits[0].committed_datetime
59
- first_modified_at = commits[-1].committed_datetime
60
- return first_modified_at, last_modified_at
61
- # Fallback to current time if no commits found
62
- now = datetime.now(UTC)
63
- return now, now
64
-
65
- async def _get_file_system_timestamps(
66
- self,
67
- file_path: Path,
68
- ) -> tuple[datetime, datetime]:
69
- """Get timestamps from file system."""
70
- stat = file_path.stat()
71
- file_created_at = datetime.fromtimestamp(stat.st_ctime, UTC)
72
- file_modified_at = datetime.fromtimestamp(stat.st_mtime, UTC)
73
- return file_created_at, file_modified_at
74
-
75
- async def _extract_git_authors(self, file_path: Path) -> list[Author]:
76
- """Extract authors from a Git file."""
77
- git_repo = git.Repo(file_path.parent, search_parent_directories=True)
78
-
79
- try:
80
- # Get the file's blame
81
- blames = git_repo.blame("HEAD", str(file_path))
82
-
83
- # Extract the blame's authors
84
- actors = [
85
- commit.author
86
- for blame in blames or []
87
- for commit in blame
88
- if isinstance(commit, git.Commit)
89
- ]
90
-
91
- # Get or create the authors in the database
92
- return [
93
- Author(name=actor.name or "", email=actor.email or "")
94
- for actor in actors
95
- ]
96
- except git.GitCommandError:
97
- # Handle cases where file might not be tracked
98
- return []