kodit 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (135) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +51 -23
  3. kodit/application/factories/reporting_factory.py +6 -2
  4. kodit/application/factories/server_factory.py +353 -0
  5. kodit/application/services/code_search_application_service.py +144 -0
  6. kodit/application/services/commit_indexing_application_service.py +700 -0
  7. kodit/application/services/indexing_worker_service.py +13 -44
  8. kodit/application/services/queue_service.py +24 -3
  9. kodit/application/services/reporting.py +0 -2
  10. kodit/application/services/sync_scheduler.py +15 -31
  11. kodit/cli.py +2 -753
  12. kodit/cli_utils.py +2 -9
  13. kodit/config.py +4 -97
  14. kodit/database.py +38 -1
  15. kodit/domain/enrichments/__init__.py +1 -0
  16. kodit/domain/enrichments/architecture/__init__.py +1 -0
  17. kodit/domain/enrichments/architecture/architecture.py +20 -0
  18. kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
  19. kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
  20. kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
  21. kodit/domain/enrichments/architecture/physical/physical.py +17 -0
  22. kodit/domain/enrichments/development/__init__.py +1 -0
  23. kodit/domain/enrichments/development/development.py +18 -0
  24. kodit/domain/enrichments/development/snippet/__init__.py +1 -0
  25. kodit/domain/enrichments/development/snippet/snippet.py +21 -0
  26. kodit/domain/enrichments/enricher.py +17 -0
  27. kodit/domain/enrichments/enrichment.py +39 -0
  28. kodit/domain/enrichments/request.py +12 -0
  29. kodit/domain/enrichments/response.py +11 -0
  30. kodit/domain/enrichments/usage/__init__.py +1 -0
  31. kodit/domain/enrichments/usage/api_docs.py +19 -0
  32. kodit/domain/enrichments/usage/usage.py +18 -0
  33. kodit/domain/{entities.py → entities/__init__.py} +50 -195
  34. kodit/domain/entities/git.py +190 -0
  35. kodit/domain/factories/__init__.py +1 -0
  36. kodit/domain/factories/git_repo_factory.py +76 -0
  37. kodit/domain/protocols.py +264 -64
  38. kodit/domain/services/bm25_service.py +5 -1
  39. kodit/domain/services/embedding_service.py +3 -0
  40. kodit/domain/services/enrichment_service.py +9 -30
  41. kodit/domain/services/git_repository_service.py +429 -0
  42. kodit/domain/services/git_service.py +300 -0
  43. kodit/domain/services/physical_architecture_service.py +182 -0
  44. kodit/domain/services/task_status_query_service.py +2 -2
  45. kodit/domain/value_objects.py +87 -135
  46. kodit/infrastructure/api/client/__init__.py +0 -2
  47. kodit/infrastructure/api/v1/__init__.py +0 -4
  48. kodit/infrastructure/api/v1/dependencies.py +92 -46
  49. kodit/infrastructure/api/v1/routers/__init__.py +0 -6
  50. kodit/infrastructure/api/v1/routers/commits.py +352 -0
  51. kodit/infrastructure/api/v1/routers/queue.py +2 -2
  52. kodit/infrastructure/api/v1/routers/repositories.py +282 -0
  53. kodit/infrastructure/api/v1/routers/search.py +31 -14
  54. kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
  55. kodit/infrastructure/api/v1/schemas/commit.py +96 -0
  56. kodit/infrastructure/api/v1/schemas/context.py +2 -0
  57. kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
  58. kodit/infrastructure/api/v1/schemas/repository.py +128 -0
  59. kodit/infrastructure/api/v1/schemas/search.py +12 -9
  60. kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
  61. kodit/infrastructure/api/v1/schemas/tag.py +31 -0
  62. kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
  63. kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
  64. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
  65. kodit/infrastructure/cloning/git/git_python_adaptor.py +534 -0
  66. kodit/infrastructure/cloning/git/working_copy.py +1 -1
  67. kodit/infrastructure/embedding/embedding_factory.py +3 -2
  68. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  69. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
  70. kodit/infrastructure/enricher/__init__.py +1 -0
  71. kodit/infrastructure/enricher/enricher_factory.py +53 -0
  72. kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +36 -56
  73. kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
  74. kodit/infrastructure/enricher/null_enricher.py +36 -0
  75. kodit/infrastructure/indexing/fusion_service.py +1 -1
  76. kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
  77. kodit/infrastructure/mappers/git_mapper.py +193 -0
  78. kodit/infrastructure/mappers/snippet_mapper.py +104 -0
  79. kodit/infrastructure/mappers/task_mapper.py +5 -44
  80. kodit/infrastructure/physical_architecture/__init__.py +1 -0
  81. kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
  82. kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
  83. kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
  84. kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
  85. kodit/infrastructure/reporting/log_progress.py +8 -5
  86. kodit/infrastructure/reporting/telemetry_progress.py +21 -0
  87. kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
  88. kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
  89. kodit/infrastructure/slicing/slicer.py +87 -421
  90. kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
  91. kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
  92. kodit/infrastructure/sqlalchemy/entities.py +402 -158
  93. kodit/infrastructure/sqlalchemy/git_branch_repository.py +274 -0
  94. kodit/infrastructure/sqlalchemy/git_commit_repository.py +346 -0
  95. kodit/infrastructure/sqlalchemy/git_repository.py +262 -0
  96. kodit/infrastructure/sqlalchemy/git_tag_repository.py +268 -0
  97. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +479 -0
  98. kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
  99. kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
  100. kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
  101. kodit/mcp.py +12 -30
  102. kodit/migrations/env.py +1 -0
  103. kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
  104. kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
  105. kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
  106. kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
  107. kodit/py.typed +0 -0
  108. kodit/utils/dump_config.py +361 -0
  109. kodit/utils/dump_openapi.py +6 -4
  110. kodit/utils/path_utils.py +29 -0
  111. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/METADATA +3 -3
  112. kodit-0.5.1.dist-info/RECORD +168 -0
  113. kodit/application/factories/code_indexing_factory.py +0 -195
  114. kodit/application/services/auto_indexing_service.py +0 -99
  115. kodit/application/services/code_indexing_application_service.py +0 -410
  116. kodit/domain/services/index_query_service.py +0 -70
  117. kodit/domain/services/index_service.py +0 -269
  118. kodit/infrastructure/api/client/index_client.py +0 -57
  119. kodit/infrastructure/api/v1/routers/indexes.py +0 -164
  120. kodit/infrastructure/api/v1/schemas/index.py +0 -101
  121. kodit/infrastructure/bm25/bm25_factory.py +0 -28
  122. kodit/infrastructure/cloning/__init__.py +0 -1
  123. kodit/infrastructure/cloning/metadata.py +0 -98
  124. kodit/infrastructure/enrichment/__init__.py +0 -1
  125. kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
  126. kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
  127. kodit/infrastructure/mappers/index_mapper.py +0 -345
  128. kodit/infrastructure/reporting/tdqm_progress.py +0 -38
  129. kodit/infrastructure/slicing/language_detection_service.py +0 -18
  130. kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
  131. kodit-0.4.3.dist-info/RECORD +0 -125
  132. /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
  133. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
  134. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
  135. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -3,15 +3,18 @@
3
3
  from fastapi import APIRouter
4
4
 
5
5
  from kodit.domain.value_objects import MultiSearchRequest, SnippetSearchFilters
6
- from kodit.infrastructure.api.v1.dependencies import (
7
- IndexingAppServiceDep,
8
- )
6
+ from kodit.infrastructure.api.v1.dependencies import CodeSearchAppServiceDep
9
7
  from kodit.infrastructure.api.v1.schemas.search import (
10
8
  SearchRequest,
11
9
  SearchResponse,
12
10
  SnippetAttributes,
13
11
  SnippetData,
14
12
  )
13
+ from kodit.infrastructure.api.v1.schemas.snippet import (
14
+ EnrichmentSchema,
15
+ GitFileSchema,
16
+ SnippetContentSchema,
17
+ )
15
18
 
16
19
  router = APIRouter(tags=["search"])
17
20
 
@@ -19,7 +22,7 @@ router = APIRouter(tags=["search"])
19
22
  @router.post("/api/v1/search")
20
23
  async def search_snippets(
21
24
  request: SearchRequest,
22
- app_service: IndexingAppServiceDep,
25
+ search_application_service: CodeSearchAppServiceDep,
23
26
  ) -> SearchResponse:
24
27
  """Search code snippets with filters matching MCP tool."""
25
28
  # Convert API request to domain request
@@ -50,23 +53,37 @@ async def search_snippets(
50
53
  )
51
54
 
52
55
  # Execute search using application service
53
- results = await app_service.search(domain_request)
56
+ results = await search_application_service.search(domain_request)
54
57
 
55
58
  return SearchResponse(
56
59
  data=[
57
60
  SnippetData(
58
61
  type="snippet",
59
- id=result.id,
62
+ id=result.snippet.id,
60
63
  attributes=SnippetAttributes(
61
- content=result.content,
62
- created_at=result.created_at,
63
- updated_at=result.created_at, # Use created_at as fallback
64
+ created_at=result.snippet.created_at,
65
+ updated_at=result.snippet.updated_at,
66
+ derives_from=[
67
+ GitFileSchema(
68
+ blob_sha=file.blob_sha,
69
+ path=file.path,
70
+ mime_type=file.mime_type,
71
+ size=file.size,
72
+ )
73
+ for file in result.snippet.derives_from
74
+ ],
75
+ content=SnippetContentSchema(
76
+ value=result.snippet.content,
77
+ language=result.snippet.extension,
78
+ ),
79
+ enrichments=[
80
+ EnrichmentSchema(
81
+ type=enrichment.type.value,
82
+ content=enrichment.content,
83
+ )
84
+ for enrichment in result.snippet.enrichments
85
+ ],
64
86
  original_scores=result.original_scores,
65
- source_uri=result.source_uri,
66
- relative_path=result.relative_path,
67
- language=result.language,
68
- authors=result.authors,
69
- summary=result.summary,
70
87
  ),
71
88
  )
72
89
  for result in results
@@ -1,25 +1 @@
1
1
  """JSON:API schemas for the REST API."""
2
-
3
- from .index import (
4
- IndexCreateRequest,
5
- IndexDetailResponse,
6
- IndexListResponse,
7
- IndexResponse,
8
- )
9
- from .search import (
10
- SearchRequest,
11
- SearchResponse,
12
- SearchResponseWithIncluded,
13
- SnippetDetailResponse,
14
- )
15
-
16
- __all__ = [
17
- "IndexCreateRequest",
18
- "IndexDetailResponse",
19
- "IndexListResponse",
20
- "IndexResponse",
21
- "SearchRequest",
22
- "SearchResponse",
23
- "SearchResponseWithIncluded",
24
- "SnippetDetailResponse",
25
- ]
@@ -0,0 +1,96 @@
1
+ """Commit JSON-API schemas."""
2
+
3
+ from datetime import datetime
4
+
5
+ from pydantic import BaseModel
6
+
7
+
8
+ class GitFileData(BaseModel):
9
+ """Git file data."""
10
+
11
+ blob_sha: str
12
+ path: str
13
+ mime_type: str
14
+ size: int
15
+
16
+
17
+ class CommitAttributes(BaseModel):
18
+ """Commit attributes following JSON-API spec."""
19
+
20
+ commit_sha: str
21
+ date: datetime
22
+ message: str
23
+ parent_commit_sha: str
24
+ author: str
25
+
26
+
27
+ class CommitData(BaseModel):
28
+ """Commit data following JSON-API spec."""
29
+
30
+ type: str = "commit"
31
+ id: str
32
+ attributes: CommitAttributes
33
+
34
+
35
+ class CommitResponse(BaseModel):
36
+ """Single commit response following JSON-API spec."""
37
+
38
+ data: CommitData
39
+
40
+
41
+ class CommitListResponse(BaseModel):
42
+ """Commit list response following JSON-API spec."""
43
+
44
+ data: list[CommitData]
45
+
46
+
47
+ class FileAttributes(BaseModel):
48
+ """File attributes following JSON-API spec."""
49
+
50
+ blob_sha: str
51
+ path: str
52
+ mime_type: str
53
+ size: int
54
+ extension: str
55
+
56
+
57
+ class FileData(BaseModel):
58
+ """File data following JSON-API spec."""
59
+
60
+ type: str = "file"
61
+ id: str
62
+ attributes: FileAttributes
63
+
64
+
65
+ class FileResponse(BaseModel):
66
+ """Single file response following JSON-API spec."""
67
+
68
+ data: FileData
69
+
70
+
71
+ class FileListResponse(BaseModel):
72
+ """File list response following JSON-API spec."""
73
+
74
+ data: list[FileData]
75
+
76
+
77
+ class EmbeddingAttributes(BaseModel):
78
+ """Embedding attributes following JSON-API spec."""
79
+
80
+ snippet_sha: str
81
+ embedding_type: str
82
+ embedding: list[float]
83
+
84
+
85
+ class EmbeddingData(BaseModel):
86
+ """Embedding data following JSON-API spec."""
87
+
88
+ type: str = "embedding"
89
+ id: str
90
+ attributes: EmbeddingAttributes
91
+
92
+
93
+ class EmbeddingListResponse(BaseModel):
94
+ """Embedding list response following JSON-API spec."""
95
+
96
+ data: list[EmbeddingData]
@@ -2,6 +2,7 @@
2
2
 
3
3
  from typing import TypedDict
4
4
 
5
+ from kodit.application.factories.server_factory import ServerFactory
5
6
  from kodit.config import AppContext
6
7
 
7
8
 
@@ -9,3 +10,4 @@ class AppLifespanState(TypedDict):
9
10
  """Application lifespan state."""
10
11
 
11
12
  app_context: AppContext
13
+ server_factory: ServerFactory
@@ -0,0 +1,29 @@
1
+ """Enrichment JSON-API schemas."""
2
+
3
+ from datetime import datetime
4
+
5
+ from pydantic import BaseModel
6
+
7
+
8
+ class EnrichmentAttributes(BaseModel):
9
+ """Enrichment attributes following JSON-API spec."""
10
+
11
+ type: str
12
+ subtype: str | None
13
+ content: str
14
+ created_at: datetime | None
15
+ updated_at: datetime | None
16
+
17
+
18
+ class EnrichmentData(BaseModel):
19
+ """Enrichment data following JSON-API spec."""
20
+
21
+ type: str = "enrichment"
22
+ id: str
23
+ attributes: EnrichmentAttributes
24
+
25
+
26
+ class EnrichmentListResponse(BaseModel):
27
+ """Enrichment list response following JSON-API spec."""
28
+
29
+ data: list[EnrichmentData]
@@ -0,0 +1,128 @@
1
+ """Repository JSON-API schemas."""
2
+
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+
6
+ from pydantic import AnyUrl, BaseModel
7
+
8
+ from kodit.domain.entities.git import GitRepo
9
+
10
+
11
+ class RepositoryAttributes(BaseModel):
12
+ """Repository attributes following JSON-API spec."""
13
+
14
+ remote_uri: AnyUrl
15
+ created_at: datetime | None = None
16
+ updated_at: datetime | None = None
17
+ last_scanned_at: datetime | None = None
18
+ cloned_path: Path | None = None
19
+ tracking_branch: str | None = None
20
+ num_commits: int = 0
21
+ num_branches: int = 0
22
+ num_tags: int = 0
23
+
24
+ @staticmethod
25
+ def from_git_repo(repo: GitRepo) -> "RepositoryAttributes":
26
+ """Create a repository attributes from a Git repository."""
27
+ return RepositoryAttributes(
28
+ remote_uri=repo.sanitized_remote_uri,
29
+ cloned_path=repo.cloned_path,
30
+ created_at=repo.created_at,
31
+ updated_at=repo.updated_at,
32
+ last_scanned_at=repo.last_scanned_at,
33
+ tracking_branch=repo.tracking_branch.name if repo.tracking_branch else None,
34
+ num_commits=repo.num_commits,
35
+ num_branches=repo.num_branches,
36
+ num_tags=repo.num_tags,
37
+ )
38
+
39
+
40
+ class RepositoryData(BaseModel):
41
+ """Repository data following JSON-API spec."""
42
+
43
+ type: str = "repository"
44
+ id: str
45
+ attributes: RepositoryAttributes
46
+
47
+ @staticmethod
48
+ def from_git_repo(repo: GitRepo) -> "RepositoryData":
49
+ """Create a repository data from a Git repository."""
50
+ return RepositoryData(
51
+ id=str(repo.id) or "",
52
+ attributes=RepositoryAttributes.from_git_repo(repo),
53
+ )
54
+
55
+
56
+ class RepositoryResponse(BaseModel):
57
+ """Single repository response following JSON-API spec."""
58
+
59
+ data: RepositoryData
60
+
61
+
62
+ class RepositoryListResponse(BaseModel):
63
+ """Repository list response following JSON-API spec."""
64
+
65
+ data: list[RepositoryData]
66
+
67
+
68
+ class RepositoryCreateAttributes(BaseModel):
69
+ """Repository creation attributes."""
70
+
71
+ remote_uri: AnyUrl
72
+
73
+
74
+ class RepositoryCreateData(BaseModel):
75
+ """Repository creation data."""
76
+
77
+ type: str = "repository"
78
+ attributes: RepositoryCreateAttributes
79
+
80
+
81
+ class RepositoryCreateRequest(BaseModel):
82
+ """Repository creation request."""
83
+
84
+ data: RepositoryCreateData
85
+
86
+
87
+ class RepositoryUpdateAttributes(BaseModel):
88
+ """Repository update attributes."""
89
+
90
+ pull_latest: bool = False
91
+
92
+
93
+ class RepositoryUpdateData(BaseModel):
94
+ """Repository update data."""
95
+
96
+ type: str = "repository"
97
+ attributes: RepositoryUpdateAttributes
98
+
99
+
100
+ class RepositoryUpdateRequest(BaseModel):
101
+ """Repository update request."""
102
+
103
+ data: RepositoryUpdateData
104
+
105
+
106
+ class RepositoryBranchData(BaseModel):
107
+ """Repository branch data."""
108
+
109
+ name: str
110
+ is_default: bool
111
+ commit_count: int
112
+
113
+
114
+ class RepositoryCommitData(BaseModel):
115
+ """Repository commit data for repository details."""
116
+
117
+ sha: str
118
+ message: str
119
+ author: str
120
+ timestamp: datetime
121
+
122
+
123
+ class RepositoryDetailsResponse(BaseModel):
124
+ """Repository details response with branches and commits."""
125
+
126
+ data: RepositoryData
127
+ branches: list[RepositoryBranchData]
128
+ recent_commits: list[RepositoryCommitData]
@@ -4,6 +4,12 @@ from datetime import datetime
4
4
 
5
5
  from pydantic import BaseModel, Field
6
6
 
7
+ from kodit.infrastructure.api.v1.schemas.snippet import (
8
+ EnrichmentSchema,
9
+ GitFileSchema,
10
+ SnippetContentSchema,
11
+ )
12
+
7
13
 
8
14
  class SearchFilters(BaseModel):
9
15
  """Search filters for JSON:API requests."""
@@ -111,22 +117,19 @@ class SearchRequest(BaseModel):
111
117
  class SnippetAttributes(BaseModel):
112
118
  """Snippet attributes for JSON:API responses."""
113
119
 
114
- content: str
115
- created_at: datetime
116
- updated_at: datetime
120
+ created_at: datetime | None = None
121
+ updated_at: datetime | None = None
122
+ derives_from: list[GitFileSchema]
123
+ content: SnippetContentSchema
124
+ enrichments: list[EnrichmentSchema]
117
125
  original_scores: list[float]
118
- source_uri: str
119
- relative_path: str
120
- language: str
121
- authors: list[str]
122
- summary: str
123
126
 
124
127
 
125
128
  class SnippetData(BaseModel):
126
129
  """Snippet data for JSON:API responses."""
127
130
 
128
131
  type: str = "snippet"
129
- id: int
132
+ id: str
130
133
  attributes: SnippetAttributes
131
134
 
132
135
 
@@ -0,0 +1,58 @@
1
+ """Snippet JSON-API schemas."""
2
+
3
+ from datetime import datetime
4
+
5
+ from pydantic import BaseModel
6
+
7
+
8
+ class SnippetContentSchema(BaseModel):
9
+ """Snippet content schema following JSON-API spec."""
10
+
11
+ value: str
12
+ language: str
13
+
14
+
15
+ class GitFileSchema(BaseModel):
16
+ """Git file schema following JSON-API spec."""
17
+
18
+ blob_sha: str
19
+ path: str
20
+ mime_type: str
21
+ size: int
22
+
23
+
24
+ class EnrichmentSchema(BaseModel):
25
+ """Enrichment schema following JSON-API spec."""
26
+
27
+ type: str
28
+ content: str
29
+
30
+
31
+ class SnippetAttributes(BaseModel):
32
+ """Snippet attributes following JSON-API spec."""
33
+
34
+ created_at: datetime | None = None
35
+ updated_at: datetime | None = None
36
+ derives_from: list[GitFileSchema]
37
+ content: SnippetContentSchema
38
+ enrichments: list[EnrichmentSchema]
39
+
40
+
41
+ class SnippetData(BaseModel):
42
+ """Snippet data following JSON-API spec."""
43
+
44
+ type: str = "snippet"
45
+ id: str
46
+ attributes: SnippetAttributes
47
+
48
+
49
+ class SnippetResponse(BaseModel):
50
+ """Single snippet response following JSON-API spec."""
51
+
52
+ data: SnippetData
53
+
54
+
55
+ class SnippetListResponse(BaseModel):
56
+ """Snippet list response following JSON-API spec."""
57
+
58
+ data: list[SnippetData]
@@ -0,0 +1,31 @@
1
+ """Tag JSON-API schemas."""
2
+
3
+ from pydantic import BaseModel
4
+
5
+
6
+ class TagAttributes(BaseModel):
7
+ """Tag attributes following JSON-API spec."""
8
+
9
+ name: str
10
+ target_commit_sha: str
11
+ is_version_tag: bool
12
+
13
+
14
+ class TagData(BaseModel):
15
+ """Tag data following JSON-API spec."""
16
+
17
+ type: str = "tag"
18
+ id: str # The tag name
19
+ attributes: TagAttributes
20
+
21
+
22
+ class TagResponse(BaseModel):
23
+ """Single tag response following JSON-API spec."""
24
+
25
+ data: TagData
26
+
27
+
28
+ class TagListResponse(BaseModel):
29
+ """Tag list response following JSON-API spec."""
30
+
31
+ data: list[TagData]
@@ -17,6 +17,8 @@ class TaskStatusAttributes(BaseModel):
17
17
  current: int = Field(default=0, description="Current number of items processed")
18
18
  created_at: datetime | None = Field(default=None, description="Task start time")
19
19
  updated_at: datetime | None = Field(default=None, description="Last update time")
20
+ error: str = Field(default="", description="Error message")
21
+ message: str = Field(default="", description="Message")
20
22
 
21
23
 
22
24
  class TaskStatusData(BaseModel):
@@ -37,7 +37,7 @@ class LocalBM25Repository(BM25Repository):
37
37
  """
38
38
  self.log = structlog.get_logger(__name__)
39
39
  self.index_path = data_dir / "bm25s_index"
40
- self.snippet_ids: list[int] = []
40
+ self.snippet_ids: list[str] = []
41
41
  self.stemmer = Stemmer.Stemmer("english")
42
42
  self.__retriever: bm25s.BM25 | None = None
43
43
 
@@ -76,11 +76,23 @@ class LocalBM25Repository(BM25Repository):
76
76
  self.log.warning("Corpus is empty, skipping bm25 index")
77
77
  return
78
78
 
79
- vocab = self._tokenize([doc.text for doc in request.documents])
79
+ if not self.snippet_ids and (self.index_path / SNIPPET_IDS_FILE).exists():
80
+ async with aiofiles.open(self.index_path / SNIPPET_IDS_FILE) as f:
81
+ self.snippet_ids = json.loads(await f.read())
82
+
83
+ # Filter out documents that have already been indexed
84
+ new_documents = [
85
+ doc for doc in request.documents if doc.snippet_id not in self.snippet_ids
86
+ ]
87
+ if not new_documents:
88
+ self.log.info("No new documents to index")
89
+ return
90
+
91
+ vocab = self._tokenize([doc.text for doc in new_documents])
80
92
  self._retriever().index(vocab, show_progress=False)
81
93
  self._retriever().save(self.index_path)
82
94
  # Replace snippet_ids instead of appending, since the BM25 index is rebuilt
83
- self.snippet_ids = [doc.snippet_id for doc in request.documents]
95
+ self.snippet_ids = [doc.snippet_id for doc in new_documents]
84
96
  async with aiofiles.open(self.index_path / SNIPPET_IDS_FILE, "w") as f:
85
97
  await f.write(json.dumps(self.snippet_ids))
86
98
 
@@ -121,7 +133,7 @@ class LocalBM25Repository(BM25Repository):
121
133
  # Filter results by snippet_ids if provided
122
134
  filtered_results = []
123
135
  for result, score in zip(results[0], scores[0], strict=True):
124
- snippet_id = int(result)
136
+ snippet_id = result
125
137
  if score > 0.0 and (
126
138
  request.snippet_ids is None or snippet_id in request.snippet_ids
127
139
  ):