kodit 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +51 -23
- kodit/application/factories/reporting_factory.py +6 -2
- kodit/application/factories/server_factory.py +353 -0
- kodit/application/services/code_search_application_service.py +144 -0
- kodit/application/services/commit_indexing_application_service.py +700 -0
- kodit/application/services/indexing_worker_service.py +13 -44
- kodit/application/services/queue_service.py +24 -3
- kodit/application/services/reporting.py +0 -2
- kodit/application/services/sync_scheduler.py +15 -31
- kodit/cli.py +2 -753
- kodit/cli_utils.py +2 -9
- kodit/config.py +4 -97
- kodit/database.py +38 -1
- kodit/domain/enrichments/__init__.py +1 -0
- kodit/domain/enrichments/architecture/__init__.py +1 -0
- kodit/domain/enrichments/architecture/architecture.py +20 -0
- kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
- kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
- kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
- kodit/domain/enrichments/architecture/physical/physical.py +17 -0
- kodit/domain/enrichments/development/__init__.py +1 -0
- kodit/domain/enrichments/development/development.py +18 -0
- kodit/domain/enrichments/development/snippet/__init__.py +1 -0
- kodit/domain/enrichments/development/snippet/snippet.py +21 -0
- kodit/domain/enrichments/enricher.py +17 -0
- kodit/domain/enrichments/enrichment.py +39 -0
- kodit/domain/enrichments/request.py +12 -0
- kodit/domain/enrichments/response.py +11 -0
- kodit/domain/enrichments/usage/__init__.py +1 -0
- kodit/domain/enrichments/usage/api_docs.py +19 -0
- kodit/domain/enrichments/usage/usage.py +18 -0
- kodit/domain/{entities.py → entities/__init__.py} +50 -195
- kodit/domain/entities/git.py +190 -0
- kodit/domain/factories/__init__.py +1 -0
- kodit/domain/factories/git_repo_factory.py +76 -0
- kodit/domain/protocols.py +264 -64
- kodit/domain/services/bm25_service.py +5 -1
- kodit/domain/services/embedding_service.py +3 -0
- kodit/domain/services/enrichment_service.py +9 -30
- kodit/domain/services/git_repository_service.py +429 -0
- kodit/domain/services/git_service.py +300 -0
- kodit/domain/services/physical_architecture_service.py +182 -0
- kodit/domain/services/task_status_query_service.py +2 -2
- kodit/domain/value_objects.py +87 -135
- kodit/infrastructure/api/client/__init__.py +0 -2
- kodit/infrastructure/api/v1/__init__.py +0 -4
- kodit/infrastructure/api/v1/dependencies.py +92 -46
- kodit/infrastructure/api/v1/routers/__init__.py +0 -6
- kodit/infrastructure/api/v1/routers/commits.py +352 -0
- kodit/infrastructure/api/v1/routers/queue.py +2 -2
- kodit/infrastructure/api/v1/routers/repositories.py +282 -0
- kodit/infrastructure/api/v1/routers/search.py +31 -14
- kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
- kodit/infrastructure/api/v1/schemas/commit.py +96 -0
- kodit/infrastructure/api/v1/schemas/context.py +2 -0
- kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
- kodit/infrastructure/api/v1/schemas/repository.py +128 -0
- kodit/infrastructure/api/v1/schemas/search.py +12 -9
- kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
- kodit/infrastructure/api/v1/schemas/tag.py +31 -0
- kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
- kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
- kodit/infrastructure/cloning/git/git_python_adaptor.py +534 -0
- kodit/infrastructure/cloning/git/working_copy.py +1 -1
- kodit/infrastructure/embedding/embedding_factory.py +3 -2
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
- kodit/infrastructure/enricher/__init__.py +1 -0
- kodit/infrastructure/enricher/enricher_factory.py +53 -0
- kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +36 -56
- kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
- kodit/infrastructure/enricher/null_enricher.py +36 -0
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
- kodit/infrastructure/mappers/git_mapper.py +193 -0
- kodit/infrastructure/mappers/snippet_mapper.py +104 -0
- kodit/infrastructure/mappers/task_mapper.py +5 -44
- kodit/infrastructure/physical_architecture/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
- kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
- kodit/infrastructure/reporting/log_progress.py +8 -5
- kodit/infrastructure/reporting/telemetry_progress.py +21 -0
- kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
- kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
- kodit/infrastructure/slicing/slicer.py +87 -421
- kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
- kodit/infrastructure/sqlalchemy/entities.py +402 -158
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +274 -0
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +346 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +262 -0
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +268 -0
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +479 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
- kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
- kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
- kodit/mcp.py +12 -30
- kodit/migrations/env.py +1 -0
- kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
- kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
- kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
- kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
- kodit/py.typed +0 -0
- kodit/utils/dump_config.py +361 -0
- kodit/utils/dump_openapi.py +6 -4
- kodit/utils/path_utils.py +29 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/METADATA +3 -3
- kodit-0.5.1.dist-info/RECORD +168 -0
- kodit/application/factories/code_indexing_factory.py +0 -195
- kodit/application/services/auto_indexing_service.py +0 -99
- kodit/application/services/code_indexing_application_service.py +0 -410
- kodit/domain/services/index_query_service.py +0 -70
- kodit/domain/services/index_service.py +0 -269
- kodit/infrastructure/api/client/index_client.py +0 -57
- kodit/infrastructure/api/v1/routers/indexes.py +0 -164
- kodit/infrastructure/api/v1/schemas/index.py +0 -101
- kodit/infrastructure/bm25/bm25_factory.py +0 -28
- kodit/infrastructure/cloning/__init__.py +0 -1
- kodit/infrastructure/cloning/metadata.py +0 -98
- kodit/infrastructure/enrichment/__init__.py +0 -1
- kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
- kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
- kodit/infrastructure/mappers/index_mapper.py +0 -345
- kodit/infrastructure/reporting/tdqm_progress.py +0 -38
- kodit/infrastructure/slicing/language_detection_service.py +0 -18
- kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
- kodit-0.4.3.dist-info/RECORD +0 -125
- /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -3,15 +3,18 @@
|
|
|
3
3
|
from fastapi import APIRouter
|
|
4
4
|
|
|
5
5
|
from kodit.domain.value_objects import MultiSearchRequest, SnippetSearchFilters
|
|
6
|
-
from kodit.infrastructure.api.v1.dependencies import
|
|
7
|
-
IndexingAppServiceDep,
|
|
8
|
-
)
|
|
6
|
+
from kodit.infrastructure.api.v1.dependencies import CodeSearchAppServiceDep
|
|
9
7
|
from kodit.infrastructure.api.v1.schemas.search import (
|
|
10
8
|
SearchRequest,
|
|
11
9
|
SearchResponse,
|
|
12
10
|
SnippetAttributes,
|
|
13
11
|
SnippetData,
|
|
14
12
|
)
|
|
13
|
+
from kodit.infrastructure.api.v1.schemas.snippet import (
|
|
14
|
+
EnrichmentSchema,
|
|
15
|
+
GitFileSchema,
|
|
16
|
+
SnippetContentSchema,
|
|
17
|
+
)
|
|
15
18
|
|
|
16
19
|
router = APIRouter(tags=["search"])
|
|
17
20
|
|
|
@@ -19,7 +22,7 @@ router = APIRouter(tags=["search"])
|
|
|
19
22
|
@router.post("/api/v1/search")
|
|
20
23
|
async def search_snippets(
|
|
21
24
|
request: SearchRequest,
|
|
22
|
-
|
|
25
|
+
search_application_service: CodeSearchAppServiceDep,
|
|
23
26
|
) -> SearchResponse:
|
|
24
27
|
"""Search code snippets with filters matching MCP tool."""
|
|
25
28
|
# Convert API request to domain request
|
|
@@ -50,23 +53,37 @@ async def search_snippets(
|
|
|
50
53
|
)
|
|
51
54
|
|
|
52
55
|
# Execute search using application service
|
|
53
|
-
results = await
|
|
56
|
+
results = await search_application_service.search(domain_request)
|
|
54
57
|
|
|
55
58
|
return SearchResponse(
|
|
56
59
|
data=[
|
|
57
60
|
SnippetData(
|
|
58
61
|
type="snippet",
|
|
59
|
-
id=result.id,
|
|
62
|
+
id=result.snippet.id,
|
|
60
63
|
attributes=SnippetAttributes(
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
+
created_at=result.snippet.created_at,
|
|
65
|
+
updated_at=result.snippet.updated_at,
|
|
66
|
+
derives_from=[
|
|
67
|
+
GitFileSchema(
|
|
68
|
+
blob_sha=file.blob_sha,
|
|
69
|
+
path=file.path,
|
|
70
|
+
mime_type=file.mime_type,
|
|
71
|
+
size=file.size,
|
|
72
|
+
)
|
|
73
|
+
for file in result.snippet.derives_from
|
|
74
|
+
],
|
|
75
|
+
content=SnippetContentSchema(
|
|
76
|
+
value=result.snippet.content,
|
|
77
|
+
language=result.snippet.extension,
|
|
78
|
+
),
|
|
79
|
+
enrichments=[
|
|
80
|
+
EnrichmentSchema(
|
|
81
|
+
type=enrichment.type.value,
|
|
82
|
+
content=enrichment.content,
|
|
83
|
+
)
|
|
84
|
+
for enrichment in result.snippet.enrichments
|
|
85
|
+
],
|
|
64
86
|
original_scores=result.original_scores,
|
|
65
|
-
source_uri=result.source_uri,
|
|
66
|
-
relative_path=result.relative_path,
|
|
67
|
-
language=result.language,
|
|
68
|
-
authors=result.authors,
|
|
69
|
-
summary=result.summary,
|
|
70
87
|
),
|
|
71
88
|
)
|
|
72
89
|
for result in results
|
|
@@ -1,25 +1 @@
|
|
|
1
1
|
"""JSON:API schemas for the REST API."""
|
|
2
|
-
|
|
3
|
-
from .index import (
|
|
4
|
-
IndexCreateRequest,
|
|
5
|
-
IndexDetailResponse,
|
|
6
|
-
IndexListResponse,
|
|
7
|
-
IndexResponse,
|
|
8
|
-
)
|
|
9
|
-
from .search import (
|
|
10
|
-
SearchRequest,
|
|
11
|
-
SearchResponse,
|
|
12
|
-
SearchResponseWithIncluded,
|
|
13
|
-
SnippetDetailResponse,
|
|
14
|
-
)
|
|
15
|
-
|
|
16
|
-
__all__ = [
|
|
17
|
-
"IndexCreateRequest",
|
|
18
|
-
"IndexDetailResponse",
|
|
19
|
-
"IndexListResponse",
|
|
20
|
-
"IndexResponse",
|
|
21
|
-
"SearchRequest",
|
|
22
|
-
"SearchResponse",
|
|
23
|
-
"SearchResponseWithIncluded",
|
|
24
|
-
"SnippetDetailResponse",
|
|
25
|
-
]
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""Commit JSON-API schemas."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class GitFileData(BaseModel):
|
|
9
|
+
"""Git file data."""
|
|
10
|
+
|
|
11
|
+
blob_sha: str
|
|
12
|
+
path: str
|
|
13
|
+
mime_type: str
|
|
14
|
+
size: int
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CommitAttributes(BaseModel):
|
|
18
|
+
"""Commit attributes following JSON-API spec."""
|
|
19
|
+
|
|
20
|
+
commit_sha: str
|
|
21
|
+
date: datetime
|
|
22
|
+
message: str
|
|
23
|
+
parent_commit_sha: str
|
|
24
|
+
author: str
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class CommitData(BaseModel):
|
|
28
|
+
"""Commit data following JSON-API spec."""
|
|
29
|
+
|
|
30
|
+
type: str = "commit"
|
|
31
|
+
id: str
|
|
32
|
+
attributes: CommitAttributes
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class CommitResponse(BaseModel):
|
|
36
|
+
"""Single commit response following JSON-API spec."""
|
|
37
|
+
|
|
38
|
+
data: CommitData
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class CommitListResponse(BaseModel):
|
|
42
|
+
"""Commit list response following JSON-API spec."""
|
|
43
|
+
|
|
44
|
+
data: list[CommitData]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class FileAttributes(BaseModel):
|
|
48
|
+
"""File attributes following JSON-API spec."""
|
|
49
|
+
|
|
50
|
+
blob_sha: str
|
|
51
|
+
path: str
|
|
52
|
+
mime_type: str
|
|
53
|
+
size: int
|
|
54
|
+
extension: str
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class FileData(BaseModel):
|
|
58
|
+
"""File data following JSON-API spec."""
|
|
59
|
+
|
|
60
|
+
type: str = "file"
|
|
61
|
+
id: str
|
|
62
|
+
attributes: FileAttributes
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class FileResponse(BaseModel):
|
|
66
|
+
"""Single file response following JSON-API spec."""
|
|
67
|
+
|
|
68
|
+
data: FileData
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class FileListResponse(BaseModel):
|
|
72
|
+
"""File list response following JSON-API spec."""
|
|
73
|
+
|
|
74
|
+
data: list[FileData]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class EmbeddingAttributes(BaseModel):
|
|
78
|
+
"""Embedding attributes following JSON-API spec."""
|
|
79
|
+
|
|
80
|
+
snippet_sha: str
|
|
81
|
+
embedding_type: str
|
|
82
|
+
embedding: list[float]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class EmbeddingData(BaseModel):
|
|
86
|
+
"""Embedding data following JSON-API spec."""
|
|
87
|
+
|
|
88
|
+
type: str = "embedding"
|
|
89
|
+
id: str
|
|
90
|
+
attributes: EmbeddingAttributes
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class EmbeddingListResponse(BaseModel):
|
|
94
|
+
"""Embedding list response following JSON-API spec."""
|
|
95
|
+
|
|
96
|
+
data: list[EmbeddingData]
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from typing import TypedDict
|
|
4
4
|
|
|
5
|
+
from kodit.application.factories.server_factory import ServerFactory
|
|
5
6
|
from kodit.config import AppContext
|
|
6
7
|
|
|
7
8
|
|
|
@@ -9,3 +10,4 @@ class AppLifespanState(TypedDict):
|
|
|
9
10
|
"""Application lifespan state."""
|
|
10
11
|
|
|
11
12
|
app_context: AppContext
|
|
13
|
+
server_factory: ServerFactory
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Enrichment JSON-API schemas."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class EnrichmentAttributes(BaseModel):
|
|
9
|
+
"""Enrichment attributes following JSON-API spec."""
|
|
10
|
+
|
|
11
|
+
type: str
|
|
12
|
+
subtype: str | None
|
|
13
|
+
content: str
|
|
14
|
+
created_at: datetime | None
|
|
15
|
+
updated_at: datetime | None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class EnrichmentData(BaseModel):
|
|
19
|
+
"""Enrichment data following JSON-API spec."""
|
|
20
|
+
|
|
21
|
+
type: str = "enrichment"
|
|
22
|
+
id: str
|
|
23
|
+
attributes: EnrichmentAttributes
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class EnrichmentListResponse(BaseModel):
|
|
27
|
+
"""Enrichment list response following JSON-API spec."""
|
|
28
|
+
|
|
29
|
+
data: list[EnrichmentData]
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""Repository JSON-API schemas."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from pydantic import AnyUrl, BaseModel
|
|
7
|
+
|
|
8
|
+
from kodit.domain.entities.git import GitRepo
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class RepositoryAttributes(BaseModel):
|
|
12
|
+
"""Repository attributes following JSON-API spec."""
|
|
13
|
+
|
|
14
|
+
remote_uri: AnyUrl
|
|
15
|
+
created_at: datetime | None = None
|
|
16
|
+
updated_at: datetime | None = None
|
|
17
|
+
last_scanned_at: datetime | None = None
|
|
18
|
+
cloned_path: Path | None = None
|
|
19
|
+
tracking_branch: str | None = None
|
|
20
|
+
num_commits: int = 0
|
|
21
|
+
num_branches: int = 0
|
|
22
|
+
num_tags: int = 0
|
|
23
|
+
|
|
24
|
+
@staticmethod
|
|
25
|
+
def from_git_repo(repo: GitRepo) -> "RepositoryAttributes":
|
|
26
|
+
"""Create a repository attributes from a Git repository."""
|
|
27
|
+
return RepositoryAttributes(
|
|
28
|
+
remote_uri=repo.sanitized_remote_uri,
|
|
29
|
+
cloned_path=repo.cloned_path,
|
|
30
|
+
created_at=repo.created_at,
|
|
31
|
+
updated_at=repo.updated_at,
|
|
32
|
+
last_scanned_at=repo.last_scanned_at,
|
|
33
|
+
tracking_branch=repo.tracking_branch.name if repo.tracking_branch else None,
|
|
34
|
+
num_commits=repo.num_commits,
|
|
35
|
+
num_branches=repo.num_branches,
|
|
36
|
+
num_tags=repo.num_tags,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class RepositoryData(BaseModel):
|
|
41
|
+
"""Repository data following JSON-API spec."""
|
|
42
|
+
|
|
43
|
+
type: str = "repository"
|
|
44
|
+
id: str
|
|
45
|
+
attributes: RepositoryAttributes
|
|
46
|
+
|
|
47
|
+
@staticmethod
|
|
48
|
+
def from_git_repo(repo: GitRepo) -> "RepositoryData":
|
|
49
|
+
"""Create a repository data from a Git repository."""
|
|
50
|
+
return RepositoryData(
|
|
51
|
+
id=str(repo.id) or "",
|
|
52
|
+
attributes=RepositoryAttributes.from_git_repo(repo),
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class RepositoryResponse(BaseModel):
|
|
57
|
+
"""Single repository response following JSON-API spec."""
|
|
58
|
+
|
|
59
|
+
data: RepositoryData
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class RepositoryListResponse(BaseModel):
|
|
63
|
+
"""Repository list response following JSON-API spec."""
|
|
64
|
+
|
|
65
|
+
data: list[RepositoryData]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class RepositoryCreateAttributes(BaseModel):
|
|
69
|
+
"""Repository creation attributes."""
|
|
70
|
+
|
|
71
|
+
remote_uri: AnyUrl
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class RepositoryCreateData(BaseModel):
|
|
75
|
+
"""Repository creation data."""
|
|
76
|
+
|
|
77
|
+
type: str = "repository"
|
|
78
|
+
attributes: RepositoryCreateAttributes
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class RepositoryCreateRequest(BaseModel):
|
|
82
|
+
"""Repository creation request."""
|
|
83
|
+
|
|
84
|
+
data: RepositoryCreateData
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class RepositoryUpdateAttributes(BaseModel):
|
|
88
|
+
"""Repository update attributes."""
|
|
89
|
+
|
|
90
|
+
pull_latest: bool = False
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class RepositoryUpdateData(BaseModel):
|
|
94
|
+
"""Repository update data."""
|
|
95
|
+
|
|
96
|
+
type: str = "repository"
|
|
97
|
+
attributes: RepositoryUpdateAttributes
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class RepositoryUpdateRequest(BaseModel):
|
|
101
|
+
"""Repository update request."""
|
|
102
|
+
|
|
103
|
+
data: RepositoryUpdateData
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class RepositoryBranchData(BaseModel):
|
|
107
|
+
"""Repository branch data."""
|
|
108
|
+
|
|
109
|
+
name: str
|
|
110
|
+
is_default: bool
|
|
111
|
+
commit_count: int
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class RepositoryCommitData(BaseModel):
|
|
115
|
+
"""Repository commit data for repository details."""
|
|
116
|
+
|
|
117
|
+
sha: str
|
|
118
|
+
message: str
|
|
119
|
+
author: str
|
|
120
|
+
timestamp: datetime
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class RepositoryDetailsResponse(BaseModel):
|
|
124
|
+
"""Repository details response with branches and commits."""
|
|
125
|
+
|
|
126
|
+
data: RepositoryData
|
|
127
|
+
branches: list[RepositoryBranchData]
|
|
128
|
+
recent_commits: list[RepositoryCommitData]
|
|
@@ -4,6 +4,12 @@ from datetime import datetime
|
|
|
4
4
|
|
|
5
5
|
from pydantic import BaseModel, Field
|
|
6
6
|
|
|
7
|
+
from kodit.infrastructure.api.v1.schemas.snippet import (
|
|
8
|
+
EnrichmentSchema,
|
|
9
|
+
GitFileSchema,
|
|
10
|
+
SnippetContentSchema,
|
|
11
|
+
)
|
|
12
|
+
|
|
7
13
|
|
|
8
14
|
class SearchFilters(BaseModel):
|
|
9
15
|
"""Search filters for JSON:API requests."""
|
|
@@ -111,22 +117,19 @@ class SearchRequest(BaseModel):
|
|
|
111
117
|
class SnippetAttributes(BaseModel):
|
|
112
118
|
"""Snippet attributes for JSON:API responses."""
|
|
113
119
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
120
|
+
created_at: datetime | None = None
|
|
121
|
+
updated_at: datetime | None = None
|
|
122
|
+
derives_from: list[GitFileSchema]
|
|
123
|
+
content: SnippetContentSchema
|
|
124
|
+
enrichments: list[EnrichmentSchema]
|
|
117
125
|
original_scores: list[float]
|
|
118
|
-
source_uri: str
|
|
119
|
-
relative_path: str
|
|
120
|
-
language: str
|
|
121
|
-
authors: list[str]
|
|
122
|
-
summary: str
|
|
123
126
|
|
|
124
127
|
|
|
125
128
|
class SnippetData(BaseModel):
|
|
126
129
|
"""Snippet data for JSON:API responses."""
|
|
127
130
|
|
|
128
131
|
type: str = "snippet"
|
|
129
|
-
id:
|
|
132
|
+
id: str
|
|
130
133
|
attributes: SnippetAttributes
|
|
131
134
|
|
|
132
135
|
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Snippet JSON-API schemas."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SnippetContentSchema(BaseModel):
|
|
9
|
+
"""Snippet content schema following JSON-API spec."""
|
|
10
|
+
|
|
11
|
+
value: str
|
|
12
|
+
language: str
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class GitFileSchema(BaseModel):
|
|
16
|
+
"""Git file schema following JSON-API spec."""
|
|
17
|
+
|
|
18
|
+
blob_sha: str
|
|
19
|
+
path: str
|
|
20
|
+
mime_type: str
|
|
21
|
+
size: int
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class EnrichmentSchema(BaseModel):
|
|
25
|
+
"""Enrichment schema following JSON-API spec."""
|
|
26
|
+
|
|
27
|
+
type: str
|
|
28
|
+
content: str
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class SnippetAttributes(BaseModel):
|
|
32
|
+
"""Snippet attributes following JSON-API spec."""
|
|
33
|
+
|
|
34
|
+
created_at: datetime | None = None
|
|
35
|
+
updated_at: datetime | None = None
|
|
36
|
+
derives_from: list[GitFileSchema]
|
|
37
|
+
content: SnippetContentSchema
|
|
38
|
+
enrichments: list[EnrichmentSchema]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class SnippetData(BaseModel):
|
|
42
|
+
"""Snippet data following JSON-API spec."""
|
|
43
|
+
|
|
44
|
+
type: str = "snippet"
|
|
45
|
+
id: str
|
|
46
|
+
attributes: SnippetAttributes
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class SnippetResponse(BaseModel):
|
|
50
|
+
"""Single snippet response following JSON-API spec."""
|
|
51
|
+
|
|
52
|
+
data: SnippetData
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class SnippetListResponse(BaseModel):
|
|
56
|
+
"""Snippet list response following JSON-API spec."""
|
|
57
|
+
|
|
58
|
+
data: list[SnippetData]
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Tag JSON-API schemas."""
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TagAttributes(BaseModel):
|
|
7
|
+
"""Tag attributes following JSON-API spec."""
|
|
8
|
+
|
|
9
|
+
name: str
|
|
10
|
+
target_commit_sha: str
|
|
11
|
+
is_version_tag: bool
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class TagData(BaseModel):
|
|
15
|
+
"""Tag data following JSON-API spec."""
|
|
16
|
+
|
|
17
|
+
type: str = "tag"
|
|
18
|
+
id: str # The tag name
|
|
19
|
+
attributes: TagAttributes
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TagResponse(BaseModel):
|
|
23
|
+
"""Single tag response following JSON-API spec."""
|
|
24
|
+
|
|
25
|
+
data: TagData
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class TagListResponse(BaseModel):
|
|
29
|
+
"""Tag list response following JSON-API spec."""
|
|
30
|
+
|
|
31
|
+
data: list[TagData]
|
|
@@ -17,6 +17,8 @@ class TaskStatusAttributes(BaseModel):
|
|
|
17
17
|
current: int = Field(default=0, description="Current number of items processed")
|
|
18
18
|
created_at: datetime | None = Field(default=None, description="Task start time")
|
|
19
19
|
updated_at: datetime | None = Field(default=None, description="Last update time")
|
|
20
|
+
error: str = Field(default="", description="Error message")
|
|
21
|
+
message: str = Field(default="", description="Message")
|
|
20
22
|
|
|
21
23
|
|
|
22
24
|
class TaskStatusData(BaseModel):
|
|
@@ -37,7 +37,7 @@ class LocalBM25Repository(BM25Repository):
|
|
|
37
37
|
"""
|
|
38
38
|
self.log = structlog.get_logger(__name__)
|
|
39
39
|
self.index_path = data_dir / "bm25s_index"
|
|
40
|
-
self.snippet_ids: list[
|
|
40
|
+
self.snippet_ids: list[str] = []
|
|
41
41
|
self.stemmer = Stemmer.Stemmer("english")
|
|
42
42
|
self.__retriever: bm25s.BM25 | None = None
|
|
43
43
|
|
|
@@ -76,11 +76,23 @@ class LocalBM25Repository(BM25Repository):
|
|
|
76
76
|
self.log.warning("Corpus is empty, skipping bm25 index")
|
|
77
77
|
return
|
|
78
78
|
|
|
79
|
-
|
|
79
|
+
if not self.snippet_ids and (self.index_path / SNIPPET_IDS_FILE).exists():
|
|
80
|
+
async with aiofiles.open(self.index_path / SNIPPET_IDS_FILE) as f:
|
|
81
|
+
self.snippet_ids = json.loads(await f.read())
|
|
82
|
+
|
|
83
|
+
# Filter out documents that have already been indexed
|
|
84
|
+
new_documents = [
|
|
85
|
+
doc for doc in request.documents if doc.snippet_id not in self.snippet_ids
|
|
86
|
+
]
|
|
87
|
+
if not new_documents:
|
|
88
|
+
self.log.info("No new documents to index")
|
|
89
|
+
return
|
|
90
|
+
|
|
91
|
+
vocab = self._tokenize([doc.text for doc in new_documents])
|
|
80
92
|
self._retriever().index(vocab, show_progress=False)
|
|
81
93
|
self._retriever().save(self.index_path)
|
|
82
94
|
# Replace snippet_ids instead of appending, since the BM25 index is rebuilt
|
|
83
|
-
self.snippet_ids = [doc.snippet_id for doc in
|
|
95
|
+
self.snippet_ids = [doc.snippet_id for doc in new_documents]
|
|
84
96
|
async with aiofiles.open(self.index_path / SNIPPET_IDS_FILE, "w") as f:
|
|
85
97
|
await f.write(json.dumps(self.snippet_ids))
|
|
86
98
|
|
|
@@ -121,7 +133,7 @@ class LocalBM25Repository(BM25Repository):
|
|
|
121
133
|
# Filter results by snippet_ids if provided
|
|
122
134
|
filtered_results = []
|
|
123
135
|
for result, score in zip(results[0], scores[0], strict=True):
|
|
124
|
-
snippet_id =
|
|
136
|
+
snippet_id = result
|
|
125
137
|
if score > 0.0 and (
|
|
126
138
|
request.snippet_ids is None or snippet_id in request.snippet_ids
|
|
127
139
|
):
|