kodit 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +51 -23
- kodit/application/factories/reporting_factory.py +6 -2
- kodit/application/factories/server_factory.py +353 -0
- kodit/application/services/code_search_application_service.py +144 -0
- kodit/application/services/commit_indexing_application_service.py +700 -0
- kodit/application/services/indexing_worker_service.py +13 -44
- kodit/application/services/queue_service.py +24 -3
- kodit/application/services/reporting.py +0 -2
- kodit/application/services/sync_scheduler.py +15 -31
- kodit/cli.py +2 -753
- kodit/cli_utils.py +2 -9
- kodit/config.py +4 -97
- kodit/database.py +38 -1
- kodit/domain/enrichments/__init__.py +1 -0
- kodit/domain/enrichments/architecture/__init__.py +1 -0
- kodit/domain/enrichments/architecture/architecture.py +20 -0
- kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
- kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
- kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
- kodit/domain/enrichments/architecture/physical/physical.py +17 -0
- kodit/domain/enrichments/development/__init__.py +1 -0
- kodit/domain/enrichments/development/development.py +18 -0
- kodit/domain/enrichments/development/snippet/__init__.py +1 -0
- kodit/domain/enrichments/development/snippet/snippet.py +21 -0
- kodit/domain/enrichments/enricher.py +17 -0
- kodit/domain/enrichments/enrichment.py +39 -0
- kodit/domain/enrichments/request.py +12 -0
- kodit/domain/enrichments/response.py +11 -0
- kodit/domain/enrichments/usage/__init__.py +1 -0
- kodit/domain/enrichments/usage/api_docs.py +19 -0
- kodit/domain/enrichments/usage/usage.py +18 -0
- kodit/domain/{entities.py → entities/__init__.py} +50 -195
- kodit/domain/entities/git.py +190 -0
- kodit/domain/factories/__init__.py +1 -0
- kodit/domain/factories/git_repo_factory.py +76 -0
- kodit/domain/protocols.py +264 -64
- kodit/domain/services/bm25_service.py +5 -1
- kodit/domain/services/embedding_service.py +3 -0
- kodit/domain/services/enrichment_service.py +9 -30
- kodit/domain/services/git_repository_service.py +429 -0
- kodit/domain/services/git_service.py +300 -0
- kodit/domain/services/physical_architecture_service.py +182 -0
- kodit/domain/services/task_status_query_service.py +2 -2
- kodit/domain/value_objects.py +87 -135
- kodit/infrastructure/api/client/__init__.py +0 -2
- kodit/infrastructure/api/v1/__init__.py +0 -4
- kodit/infrastructure/api/v1/dependencies.py +92 -46
- kodit/infrastructure/api/v1/routers/__init__.py +0 -6
- kodit/infrastructure/api/v1/routers/commits.py +352 -0
- kodit/infrastructure/api/v1/routers/queue.py +2 -2
- kodit/infrastructure/api/v1/routers/repositories.py +282 -0
- kodit/infrastructure/api/v1/routers/search.py +31 -14
- kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
- kodit/infrastructure/api/v1/schemas/commit.py +96 -0
- kodit/infrastructure/api/v1/schemas/context.py +2 -0
- kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
- kodit/infrastructure/api/v1/schemas/repository.py +128 -0
- kodit/infrastructure/api/v1/schemas/search.py +12 -9
- kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
- kodit/infrastructure/api/v1/schemas/tag.py +31 -0
- kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
- kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
- kodit/infrastructure/cloning/git/git_python_adaptor.py +534 -0
- kodit/infrastructure/cloning/git/working_copy.py +1 -1
- kodit/infrastructure/embedding/embedding_factory.py +3 -2
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
- kodit/infrastructure/enricher/__init__.py +1 -0
- kodit/infrastructure/enricher/enricher_factory.py +53 -0
- kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +36 -56
- kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
- kodit/infrastructure/enricher/null_enricher.py +36 -0
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
- kodit/infrastructure/mappers/git_mapper.py +193 -0
- kodit/infrastructure/mappers/snippet_mapper.py +104 -0
- kodit/infrastructure/mappers/task_mapper.py +5 -44
- kodit/infrastructure/physical_architecture/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
- kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
- kodit/infrastructure/reporting/log_progress.py +8 -5
- kodit/infrastructure/reporting/telemetry_progress.py +21 -0
- kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
- kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
- kodit/infrastructure/slicing/slicer.py +87 -421
- kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
- kodit/infrastructure/sqlalchemy/entities.py +402 -158
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +274 -0
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +346 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +262 -0
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +268 -0
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +479 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
- kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
- kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
- kodit/mcp.py +12 -30
- kodit/migrations/env.py +1 -0
- kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
- kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
- kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
- kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
- kodit/py.typed +0 -0
- kodit/utils/dump_config.py +361 -0
- kodit/utils/dump_openapi.py +6 -4
- kodit/utils/path_utils.py +29 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/METADATA +3 -3
- kodit-0.5.1.dist-info/RECORD +168 -0
- kodit/application/factories/code_indexing_factory.py +0 -195
- kodit/application/services/auto_indexing_service.py +0 -99
- kodit/application/services/code_indexing_application_service.py +0 -410
- kodit/domain/services/index_query_service.py +0 -70
- kodit/domain/services/index_service.py +0 -269
- kodit/infrastructure/api/client/index_client.py +0 -57
- kodit/infrastructure/api/v1/routers/indexes.py +0 -164
- kodit/infrastructure/api/v1/schemas/index.py +0 -101
- kodit/infrastructure/bm25/bm25_factory.py +0 -28
- kodit/infrastructure/cloning/__init__.py +0 -1
- kodit/infrastructure/cloning/metadata.py +0 -98
- kodit/infrastructure/enrichment/__init__.py +0 -1
- kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
- kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
- kodit/infrastructure/mappers/index_mapper.py +0 -345
- kodit/infrastructure/reporting/tdqm_progress.py +0 -38
- kodit/infrastructure/slicing/language_detection_service.py +0 -18
- kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
- kodit-0.4.3.dist-info/RECORD +0 -125
- /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.
|
|
32
|
-
__version_tuple__ = version_tuple = (0,
|
|
31
|
+
__version__ = version = '0.5.1'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 5, 1)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
kodit/app.py
CHANGED
|
@@ -3,75 +3,103 @@
|
|
|
3
3
|
from collections.abc import AsyncIterator
|
|
4
4
|
from contextlib import asynccontextmanager
|
|
5
5
|
|
|
6
|
+
import structlog
|
|
6
7
|
from asgi_correlation_id import CorrelationIdMiddleware
|
|
7
8
|
from fastapi import FastAPI, Response
|
|
8
9
|
from fastapi.responses import RedirectResponse
|
|
9
10
|
|
|
10
11
|
from kodit._version import version
|
|
11
12
|
from kodit.application.factories.reporting_factory import create_server_operation
|
|
12
|
-
from kodit.application.
|
|
13
|
+
from kodit.application.factories.server_factory import ServerFactory
|
|
13
14
|
from kodit.application.services.indexing_worker_service import IndexingWorkerService
|
|
14
15
|
from kodit.application.services.sync_scheduler import SyncSchedulerService
|
|
15
16
|
from kodit.config import AppContext
|
|
16
|
-
from kodit.
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
17
|
+
from kodit.domain.enrichments.request import EnrichmentRequest
|
|
18
|
+
from kodit.domain.value_objects import Document, IndexRequest
|
|
19
|
+
from kodit.infrastructure.api.v1.routers.commits import router as commits_router
|
|
20
|
+
from kodit.infrastructure.api.v1.routers.queue import router as queue_router
|
|
21
|
+
from kodit.infrastructure.api.v1.routers.repositories import (
|
|
22
|
+
router as repositories_router,
|
|
20
23
|
)
|
|
24
|
+
from kodit.infrastructure.api.v1.routers.search import router as search_router
|
|
21
25
|
from kodit.infrastructure.api.v1.schemas.context import AppLifespanState
|
|
22
26
|
from kodit.infrastructure.sqlalchemy.task_status_repository import (
|
|
23
27
|
create_task_status_repository,
|
|
24
28
|
)
|
|
25
29
|
from kodit.mcp import mcp
|
|
26
|
-
from kodit.middleware import
|
|
30
|
+
from kodit.middleware import (
|
|
31
|
+
ASGICancelledErrorMiddleware,
|
|
32
|
+
logging_middleware,
|
|
33
|
+
)
|
|
27
34
|
|
|
28
35
|
# Global services
|
|
29
|
-
_auto_indexing_service: AutoIndexingService | None = None
|
|
30
36
|
_sync_scheduler_service: SyncSchedulerService | None = None
|
|
37
|
+
_server_factory: ServerFactory | None = None
|
|
31
38
|
|
|
32
39
|
|
|
33
40
|
@asynccontextmanager
|
|
34
41
|
async def app_lifespan(_: FastAPI) -> AsyncIterator[AppLifespanState]:
|
|
35
42
|
"""Manage application lifespan for auto-indexing and sync."""
|
|
36
|
-
global
|
|
43
|
+
global _sync_scheduler_service # noqa: PLW0603
|
|
44
|
+
global _server_factory # noqa: PLW0603
|
|
37
45
|
|
|
38
46
|
# App context has already been configured by the CLI.
|
|
39
47
|
app_context = AppContext()
|
|
40
48
|
db = await app_context.get_db()
|
|
49
|
+
log = structlog.get_logger(__name__)
|
|
41
50
|
operation = create_server_operation(
|
|
42
51
|
create_task_status_repository(db.session_factory)
|
|
43
52
|
)
|
|
44
53
|
|
|
54
|
+
_server_factory = ServerFactory(app_context, db.session_factory)
|
|
55
|
+
|
|
56
|
+
# Quickly check if the providers are accessible and raise an error if not
|
|
57
|
+
log.info("Checking providers are accessible")
|
|
58
|
+
try:
|
|
59
|
+
await anext(
|
|
60
|
+
_server_factory.code_search_service().index_documents(
|
|
61
|
+
IndexRequest(
|
|
62
|
+
documents=[Document(snippet_id="1", text="def hello(): pass")]
|
|
63
|
+
)
|
|
64
|
+
)
|
|
65
|
+
)
|
|
66
|
+
except Exception as e:
|
|
67
|
+
raise ValueError("Embedding service is not accessible") from e
|
|
68
|
+
try:
|
|
69
|
+
await anext(
|
|
70
|
+
_server_factory.enricher().enrich(
|
|
71
|
+
[
|
|
72
|
+
EnrichmentRequest(
|
|
73
|
+
id="1",
|
|
74
|
+
text="def hello(): pass",
|
|
75
|
+
system_prompt="Explain this code",
|
|
76
|
+
)
|
|
77
|
+
]
|
|
78
|
+
)
|
|
79
|
+
)
|
|
80
|
+
except Exception as e:
|
|
81
|
+
raise ValueError("Enrichment service is not accessible") from e
|
|
82
|
+
|
|
45
83
|
# Start the queue worker service
|
|
46
84
|
_indexing_worker_service = IndexingWorkerService(
|
|
47
85
|
app_context=app_context,
|
|
48
86
|
session_factory=db.session_factory,
|
|
87
|
+
server_factory=_server_factory,
|
|
49
88
|
)
|
|
50
89
|
await _indexing_worker_service.start(operation)
|
|
51
90
|
|
|
52
|
-
# Start auto-indexing service
|
|
53
|
-
_auto_indexing_service = AutoIndexingService(
|
|
54
|
-
app_context=app_context,
|
|
55
|
-
session_factory=db.session_factory,
|
|
56
|
-
)
|
|
57
|
-
await _auto_indexing_service.start_background_indexing(operation)
|
|
58
|
-
|
|
59
91
|
# Start sync scheduler service
|
|
60
92
|
if app_context.periodic_sync.enabled:
|
|
61
|
-
_sync_scheduler_service =
|
|
62
|
-
session_factory=db.session_factory,
|
|
63
|
-
)
|
|
93
|
+
_sync_scheduler_service = _server_factory.sync_scheduler_service()
|
|
64
94
|
_sync_scheduler_service.start_periodic_sync(
|
|
65
95
|
interval_seconds=app_context.periodic_sync.interval_seconds
|
|
66
96
|
)
|
|
67
97
|
|
|
68
|
-
yield AppLifespanState(app_context=app_context)
|
|
98
|
+
yield AppLifespanState(app_context=app_context, server_factory=_server_factory)
|
|
69
99
|
|
|
70
100
|
# Stop services
|
|
71
101
|
if _sync_scheduler_service:
|
|
72
102
|
await _sync_scheduler_service.stop_periodic_sync()
|
|
73
|
-
if _auto_indexing_service:
|
|
74
|
-
await _auto_indexing_service.stop()
|
|
75
103
|
if _indexing_worker_service:
|
|
76
104
|
await _indexing_worker_service.stop()
|
|
77
105
|
|
|
@@ -123,10 +151,10 @@ async def healthz() -> Response:
|
|
|
123
151
|
|
|
124
152
|
|
|
125
153
|
# Include API routers
|
|
126
|
-
app.include_router(indexes_router)
|
|
127
154
|
app.include_router(queue_router)
|
|
128
155
|
app.include_router(search_router)
|
|
129
|
-
|
|
156
|
+
app.include_router(commits_router)
|
|
157
|
+
app.include_router(repositories_router)
|
|
130
158
|
|
|
131
159
|
# Add mcp routes last, otherwise previous routes aren't added
|
|
132
160
|
# Mount both apps at root - they have different internal paths
|
|
@@ -5,7 +5,9 @@ from kodit.config import ReportingConfig
|
|
|
5
5
|
from kodit.domain.protocols import TaskStatusRepository
|
|
6
6
|
from kodit.infrastructure.reporting.db_progress import DBProgressReportingModule
|
|
7
7
|
from kodit.infrastructure.reporting.log_progress import LoggingReportingModule
|
|
8
|
-
from kodit.infrastructure.reporting.
|
|
8
|
+
from kodit.infrastructure.reporting.telemetry_progress import (
|
|
9
|
+
TelemetryProgressReportingModule,
|
|
10
|
+
)
|
|
9
11
|
|
|
10
12
|
|
|
11
13
|
def create_noop_operation() -> ProgressTracker:
|
|
@@ -17,7 +19,8 @@ def create_cli_operation(config: ReportingConfig | None = None) -> ProgressTrack
|
|
|
17
19
|
"""Create a CLI reporter."""
|
|
18
20
|
shared_config = config or ReportingConfig()
|
|
19
21
|
s = ProgressTracker.create(TaskOperation.ROOT)
|
|
20
|
-
s.subscribe(
|
|
22
|
+
s.subscribe(TelemetryProgressReportingModule())
|
|
23
|
+
s.subscribe(LoggingReportingModule(shared_config))
|
|
21
24
|
return s
|
|
22
25
|
|
|
23
26
|
|
|
@@ -27,6 +30,7 @@ def create_server_operation(
|
|
|
27
30
|
"""Create a server reporter."""
|
|
28
31
|
shared_config = config or ReportingConfig()
|
|
29
32
|
s = ProgressTracker.create(TaskOperation.ROOT)
|
|
33
|
+
s.subscribe(TelemetryProgressReportingModule())
|
|
30
34
|
s.subscribe(LoggingReportingModule(shared_config))
|
|
31
35
|
s.subscribe(DBProgressReportingModule(task_status_repository, shared_config))
|
|
32
36
|
return s
|
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
"""Create a big object that contains all the application services."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
7
|
+
|
|
8
|
+
from kodit.application.factories.reporting_factory import create_server_operation
|
|
9
|
+
from kodit.application.services.code_search_application_service import (
|
|
10
|
+
CodeSearchApplicationService,
|
|
11
|
+
)
|
|
12
|
+
from kodit.application.services.commit_indexing_application_service import (
|
|
13
|
+
CommitIndexingApplicationService,
|
|
14
|
+
)
|
|
15
|
+
from kodit.application.services.queue_service import QueueService
|
|
16
|
+
from kodit.application.services.reporting import ProgressTracker
|
|
17
|
+
from kodit.application.services.sync_scheduler import SyncSchedulerService
|
|
18
|
+
from kodit.config import AppContext
|
|
19
|
+
from kodit.domain.enrichments.architecture.physical.formatter import (
|
|
20
|
+
PhysicalArchitectureFormatter,
|
|
21
|
+
)
|
|
22
|
+
from kodit.domain.enrichments.enricher import Enricher
|
|
23
|
+
from kodit.domain.protocols import (
|
|
24
|
+
FusionService,
|
|
25
|
+
GitAdapter,
|
|
26
|
+
GitBranchRepository,
|
|
27
|
+
GitCommitRepository,
|
|
28
|
+
GitRepoRepository,
|
|
29
|
+
GitTagRepository,
|
|
30
|
+
SnippetRepositoryV2,
|
|
31
|
+
TaskStatusRepository,
|
|
32
|
+
)
|
|
33
|
+
from kodit.domain.services.bm25_service import BM25DomainService, BM25Repository
|
|
34
|
+
from kodit.domain.services.embedding_service import EmbeddingDomainService
|
|
35
|
+
from kodit.domain.services.git_repository_service import (
|
|
36
|
+
GitRepositoryScanner,
|
|
37
|
+
RepositoryCloner,
|
|
38
|
+
)
|
|
39
|
+
from kodit.domain.services.physical_architecture_service import (
|
|
40
|
+
PhysicalArchitectureService,
|
|
41
|
+
)
|
|
42
|
+
from kodit.infrastructure.bm25.local_bm25_repository import LocalBM25Repository
|
|
43
|
+
from kodit.infrastructure.bm25.vectorchord_bm25_repository import (
|
|
44
|
+
VectorChordBM25Repository,
|
|
45
|
+
)
|
|
46
|
+
from kodit.infrastructure.cloning.git.git_python_adaptor import GitPythonAdapter
|
|
47
|
+
from kodit.infrastructure.embedding.embedding_factory import (
|
|
48
|
+
embedding_domain_service_factory,
|
|
49
|
+
)
|
|
50
|
+
from kodit.infrastructure.enricher.enricher_factory import (
|
|
51
|
+
enricher_domain_service_factory,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# InMemoryGitTagRepository removed - now handled by InMemoryGitRepoRepository
|
|
55
|
+
from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
|
|
56
|
+
from kodit.infrastructure.physical_architecture.formatters.narrative_formatter import (
|
|
57
|
+
NarrativeFormatter,
|
|
58
|
+
)
|
|
59
|
+
from kodit.infrastructure.slicing.slicer import Slicer
|
|
60
|
+
from kodit.infrastructure.sqlalchemy.embedding_repository import (
|
|
61
|
+
SqlAlchemyEmbeddingRepository,
|
|
62
|
+
create_embedding_repository,
|
|
63
|
+
)
|
|
64
|
+
from kodit.infrastructure.sqlalchemy.enrichment_v2_repository import (
|
|
65
|
+
EnrichmentV2Repository,
|
|
66
|
+
)
|
|
67
|
+
from kodit.infrastructure.sqlalchemy.git_branch_repository import (
|
|
68
|
+
create_git_branch_repository,
|
|
69
|
+
)
|
|
70
|
+
from kodit.infrastructure.sqlalchemy.git_commit_repository import (
|
|
71
|
+
create_git_commit_repository,
|
|
72
|
+
)
|
|
73
|
+
from kodit.infrastructure.sqlalchemy.git_repository import create_git_repo_repository
|
|
74
|
+
from kodit.infrastructure.sqlalchemy.git_tag_repository import (
|
|
75
|
+
create_git_tag_repository,
|
|
76
|
+
)
|
|
77
|
+
from kodit.infrastructure.sqlalchemy.snippet_v2_repository import (
|
|
78
|
+
create_snippet_v2_repository,
|
|
79
|
+
)
|
|
80
|
+
from kodit.infrastructure.sqlalchemy.task_status_repository import (
|
|
81
|
+
create_task_status_repository,
|
|
82
|
+
)
|
|
83
|
+
from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
|
|
84
|
+
|
|
85
|
+
if TYPE_CHECKING:
|
|
86
|
+
from kodit.domain.services.enrichment_service import EnrichmentDomainService
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class ServerFactory:
|
|
90
|
+
"""Factory for creating server application services."""
|
|
91
|
+
|
|
92
|
+
def __init__(
|
|
93
|
+
self,
|
|
94
|
+
app_context: AppContext,
|
|
95
|
+
session_factory: Callable[[], AsyncSession],
|
|
96
|
+
) -> None:
|
|
97
|
+
"""Initialize the ServerFactory."""
|
|
98
|
+
self.app_context = app_context
|
|
99
|
+
self.session_factory = session_factory
|
|
100
|
+
self._repo_repository: GitRepoRepository | None = None
|
|
101
|
+
self._snippet_v2_repository: SnippetRepositoryV2 | None = None
|
|
102
|
+
self._git_adapter: GitAdapter | None = None
|
|
103
|
+
self._scanner: GitRepositoryScanner | None = None
|
|
104
|
+
self._cloner: RepositoryCloner | None = None
|
|
105
|
+
self._commit_indexing_application_service: (
|
|
106
|
+
CommitIndexingApplicationService | None
|
|
107
|
+
) = None
|
|
108
|
+
self._enrichment_service: EnrichmentDomainService | None = None
|
|
109
|
+
self._enricher_service: Enricher | None = None
|
|
110
|
+
self._task_status_repository: TaskStatusRepository | None = None
|
|
111
|
+
self._operation: ProgressTracker | None = None
|
|
112
|
+
self._queue_service: QueueService | None = None
|
|
113
|
+
self._slicer: Slicer | None = None
|
|
114
|
+
self._bm25_service: BM25DomainService | None = None
|
|
115
|
+
self._bm25_repository: BM25Repository | None = None
|
|
116
|
+
self._code_search_service: EmbeddingDomainService | None = None
|
|
117
|
+
self._text_search_service: EmbeddingDomainService | None = None
|
|
118
|
+
self._sync_scheduler_service: SyncSchedulerService | None = None
|
|
119
|
+
self._embedding_repository: SqlAlchemyEmbeddingRepository | None = None
|
|
120
|
+
self._fusion_service: FusionService | None = None
|
|
121
|
+
self._code_search_application_service: CodeSearchApplicationService | None = (
|
|
122
|
+
None
|
|
123
|
+
)
|
|
124
|
+
self._git_commit_repository: GitCommitRepository | None = None
|
|
125
|
+
self._git_branch_repository: GitBranchRepository | None = None
|
|
126
|
+
self._git_tag_repository: GitTagRepository | None = None
|
|
127
|
+
self._architecture_service: PhysicalArchitectureService | None = None
|
|
128
|
+
self._enrichment_v2_repository: EnrichmentV2Repository | None = None
|
|
129
|
+
self._architecture_formatter: PhysicalArchitectureFormatter | None = None
|
|
130
|
+
|
|
131
|
+
def architecture_formatter(self) -> PhysicalArchitectureFormatter:
|
|
132
|
+
"""Create a PhysicalArchitectureFormatter instance."""
|
|
133
|
+
if not self._architecture_formatter:
|
|
134
|
+
self._architecture_formatter = NarrativeFormatter()
|
|
135
|
+
return self._architecture_formatter
|
|
136
|
+
|
|
137
|
+
def architecture_service(self) -> PhysicalArchitectureService:
|
|
138
|
+
"""Create a PhysicalArchitectureService instance."""
|
|
139
|
+
if not self._architecture_service:
|
|
140
|
+
self._architecture_service = PhysicalArchitectureService(
|
|
141
|
+
formatter=self.architecture_formatter()
|
|
142
|
+
)
|
|
143
|
+
return self._architecture_service
|
|
144
|
+
|
|
145
|
+
def enrichment_v2_repository(self) -> EnrichmentV2Repository:
|
|
146
|
+
"""Create a EnrichmentV2Repository instance."""
|
|
147
|
+
if not self._enrichment_v2_repository:
|
|
148
|
+
self._enrichment_v2_repository = EnrichmentV2Repository(
|
|
149
|
+
session_factory=self.session_factory
|
|
150
|
+
)
|
|
151
|
+
return self._enrichment_v2_repository
|
|
152
|
+
|
|
153
|
+
def queue_service(self) -> QueueService:
|
|
154
|
+
"""Create a QueueService instance."""
|
|
155
|
+
if not self._queue_service:
|
|
156
|
+
self._queue_service = QueueService(session_factory=self.session_factory)
|
|
157
|
+
return self._queue_service
|
|
158
|
+
|
|
159
|
+
def task_status_repository(self) -> TaskStatusRepository:
|
|
160
|
+
"""Create a TaskStatusRepository instance."""
|
|
161
|
+
if not self._task_status_repository:
|
|
162
|
+
self._task_status_repository = create_task_status_repository(
|
|
163
|
+
session_factory=self.session_factory
|
|
164
|
+
)
|
|
165
|
+
return self._task_status_repository
|
|
166
|
+
|
|
167
|
+
def operation(self) -> ProgressTracker:
|
|
168
|
+
"""Create a ProgressTracker instance."""
|
|
169
|
+
if not self._operation:
|
|
170
|
+
self._operation = create_server_operation(
|
|
171
|
+
task_status_repository=self.task_status_repository()
|
|
172
|
+
)
|
|
173
|
+
return self._operation
|
|
174
|
+
|
|
175
|
+
def slicer(self) -> Slicer:
|
|
176
|
+
"""Create a Slicer instance."""
|
|
177
|
+
if not self._slicer:
|
|
178
|
+
self._slicer = Slicer()
|
|
179
|
+
return self._slicer
|
|
180
|
+
|
|
181
|
+
def bm25_repository(self) -> BM25Repository:
|
|
182
|
+
"""Create a BM25Repository instance."""
|
|
183
|
+
if not self._bm25_repository:
|
|
184
|
+
if self.app_context.default_search.provider == "vectorchord":
|
|
185
|
+
self._bm25_repository = VectorChordBM25Repository(
|
|
186
|
+
session_factory=self.session_factory
|
|
187
|
+
)
|
|
188
|
+
else:
|
|
189
|
+
self._bm25_repository = LocalBM25Repository(
|
|
190
|
+
data_dir=self.app_context.get_data_dir()
|
|
191
|
+
)
|
|
192
|
+
return self._bm25_repository
|
|
193
|
+
|
|
194
|
+
def bm25_service(self) -> BM25DomainService:
|
|
195
|
+
"""Create a BM25DomainService instance."""
|
|
196
|
+
if not self._bm25_service:
|
|
197
|
+
self._bm25_service = BM25DomainService(repository=self.bm25_repository())
|
|
198
|
+
return self._bm25_service
|
|
199
|
+
|
|
200
|
+
def code_search_service(self) -> EmbeddingDomainService:
|
|
201
|
+
"""Create a EmbeddingDomainService instance."""
|
|
202
|
+
if not self._code_search_service:
|
|
203
|
+
self._code_search_service = embedding_domain_service_factory(
|
|
204
|
+
"code", self.app_context, self.session_factory
|
|
205
|
+
)
|
|
206
|
+
return self._code_search_service
|
|
207
|
+
|
|
208
|
+
def text_search_service(self) -> EmbeddingDomainService:
|
|
209
|
+
"""Create a EmbeddingDomainService instance."""
|
|
210
|
+
if not self._text_search_service:
|
|
211
|
+
self._text_search_service = embedding_domain_service_factory(
|
|
212
|
+
"text", self.app_context, self.session_factory
|
|
213
|
+
)
|
|
214
|
+
return self._text_search_service
|
|
215
|
+
|
|
216
|
+
def commit_indexing_application_service(self) -> CommitIndexingApplicationService:
|
|
217
|
+
"""Create a CommitIndexingApplicationService instance."""
|
|
218
|
+
if not self._commit_indexing_application_service:
|
|
219
|
+
self._commit_indexing_application_service = (
|
|
220
|
+
CommitIndexingApplicationService(
|
|
221
|
+
snippet_v2_repository=self.snippet_v2_repository(),
|
|
222
|
+
repo_repository=self.repo_repository(),
|
|
223
|
+
git_commit_repository=self.git_commit_repository(),
|
|
224
|
+
git_branch_repository=self.git_branch_repository(),
|
|
225
|
+
git_tag_repository=self.git_tag_repository(),
|
|
226
|
+
operation=self.operation(),
|
|
227
|
+
scanner=self.scanner(),
|
|
228
|
+
cloner=self.cloner(),
|
|
229
|
+
snippet_repository=self.snippet_v2_repository(),
|
|
230
|
+
slicer=self.slicer(),
|
|
231
|
+
queue=self.queue_service(),
|
|
232
|
+
bm25_service=self.bm25_service(),
|
|
233
|
+
code_search_service=self.code_search_service(),
|
|
234
|
+
text_search_service=self.text_search_service(),
|
|
235
|
+
embedding_repository=self.embedding_repository(),
|
|
236
|
+
architecture_service=self.architecture_service(),
|
|
237
|
+
enrichment_v2_repository=self.enrichment_v2_repository(),
|
|
238
|
+
enricher_service=self.enricher(),
|
|
239
|
+
)
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
return self._commit_indexing_application_service
|
|
243
|
+
|
|
244
|
+
def unit_of_work(self) -> SqlAlchemyUnitOfWork:
|
|
245
|
+
"""Create a SqlAlchemyUnitOfWork instance."""
|
|
246
|
+
return SqlAlchemyUnitOfWork(session_factory=self.session_factory)
|
|
247
|
+
|
|
248
|
+
def repo_repository(self) -> GitRepoRepository:
|
|
249
|
+
"""Create a GitRepoRepository instance."""
|
|
250
|
+
if not self._repo_repository:
|
|
251
|
+
self._repo_repository = create_git_repo_repository(
|
|
252
|
+
session_factory=self.session_factory
|
|
253
|
+
)
|
|
254
|
+
return self._repo_repository
|
|
255
|
+
|
|
256
|
+
# branch_repository and commit_repository removed - now handled by repo_repository
|
|
257
|
+
# as GitRepo is the aggregate root
|
|
258
|
+
|
|
259
|
+
def git_adapter(self) -> GitAdapter:
|
|
260
|
+
"""Create a GitAdapter instance."""
|
|
261
|
+
if not self._git_adapter:
|
|
262
|
+
self._git_adapter = GitPythonAdapter()
|
|
263
|
+
return self._git_adapter
|
|
264
|
+
|
|
265
|
+
# tag_repository removed - now handled by repo_repository
|
|
266
|
+
|
|
267
|
+
def scanner(self) -> GitRepositoryScanner:
|
|
268
|
+
"""Create a GitRepositoryScanner instance."""
|
|
269
|
+
if not self._scanner:
|
|
270
|
+
self._scanner = GitRepositoryScanner(self.git_adapter())
|
|
271
|
+
return self._scanner
|
|
272
|
+
|
|
273
|
+
def cloner(self) -> RepositoryCloner:
|
|
274
|
+
"""Create a RepositoryCloner instance."""
|
|
275
|
+
if not self._cloner:
|
|
276
|
+
self._cloner = RepositoryCloner(
|
|
277
|
+
self.git_adapter(), self.app_context.get_clone_dir()
|
|
278
|
+
)
|
|
279
|
+
return self._cloner
|
|
280
|
+
|
|
281
|
+
def snippet_v2_repository(self) -> SnippetRepositoryV2:
|
|
282
|
+
"""Create a SnippetRepositoryV2 instance."""
|
|
283
|
+
if not self._snippet_v2_repository:
|
|
284
|
+
self._snippet_v2_repository = create_snippet_v2_repository(
|
|
285
|
+
session_factory=self.session_factory
|
|
286
|
+
)
|
|
287
|
+
return self._snippet_v2_repository
|
|
288
|
+
|
|
289
|
+
def enricher(self) -> Enricher:
|
|
290
|
+
"""Create a EnricherDomainService instance."""
|
|
291
|
+
if not self._enricher_service:
|
|
292
|
+
self._enricher_service = enricher_domain_service_factory(self.app_context)
|
|
293
|
+
return self._enricher_service
|
|
294
|
+
|
|
295
|
+
def sync_scheduler_service(self) -> SyncSchedulerService:
|
|
296
|
+
"""Create a SyncSchedulerService instance."""
|
|
297
|
+
if not self._sync_scheduler_service:
|
|
298
|
+
self._sync_scheduler_service = SyncSchedulerService(
|
|
299
|
+
queue_service=self.queue_service(),
|
|
300
|
+
repo_repository=self.repo_repository(),
|
|
301
|
+
)
|
|
302
|
+
return self._sync_scheduler_service
|
|
303
|
+
|
|
304
|
+
def embedding_repository(self) -> SqlAlchemyEmbeddingRepository:
|
|
305
|
+
"""Create a SqlAlchemyEmbeddingRepository instance."""
|
|
306
|
+
if not self._embedding_repository:
|
|
307
|
+
self._embedding_repository = create_embedding_repository(
|
|
308
|
+
session_factory=self.session_factory
|
|
309
|
+
)
|
|
310
|
+
return self._embedding_repository
|
|
311
|
+
|
|
312
|
+
def fusion_service(self) -> FusionService:
|
|
313
|
+
"""Create a FusionService instance."""
|
|
314
|
+
if not self._fusion_service:
|
|
315
|
+
self._fusion_service = ReciprocalRankFusionService()
|
|
316
|
+
return self._fusion_service
|
|
317
|
+
|
|
318
|
+
def code_search_application_service(self) -> CodeSearchApplicationService:
|
|
319
|
+
"""Create a CodeSearchApplicationService instance."""
|
|
320
|
+
if not self._code_search_application_service:
|
|
321
|
+
self._code_search_application_service = CodeSearchApplicationService(
|
|
322
|
+
bm25_service=self.bm25_service(),
|
|
323
|
+
code_search_service=self.code_search_service(),
|
|
324
|
+
text_search_service=self.text_search_service(),
|
|
325
|
+
progress_tracker=self.operation(),
|
|
326
|
+
snippet_repository=self.snippet_v2_repository(),
|
|
327
|
+
fusion_service=self.fusion_service(),
|
|
328
|
+
)
|
|
329
|
+
return self._code_search_application_service
|
|
330
|
+
|
|
331
|
+
def git_commit_repository(self) -> GitCommitRepository:
|
|
332
|
+
"""Create a GitCommitRepository instance."""
|
|
333
|
+
if not self._git_commit_repository:
|
|
334
|
+
self._git_commit_repository = create_git_commit_repository(
|
|
335
|
+
session_factory=self.session_factory
|
|
336
|
+
)
|
|
337
|
+
return self._git_commit_repository
|
|
338
|
+
|
|
339
|
+
def git_branch_repository(self) -> GitBranchRepository:
|
|
340
|
+
"""Create a GitBranchRepository instance."""
|
|
341
|
+
if not self._git_branch_repository:
|
|
342
|
+
self._git_branch_repository = create_git_branch_repository(
|
|
343
|
+
session_factory=self.session_factory
|
|
344
|
+
)
|
|
345
|
+
return self._git_branch_repository
|
|
346
|
+
|
|
347
|
+
def git_tag_repository(self) -> GitTagRepository:
|
|
348
|
+
"""Create a GitTagRepository instance."""
|
|
349
|
+
if not self._git_tag_repository:
|
|
350
|
+
self._git_tag_repository = create_git_tag_repository(
|
|
351
|
+
session_factory=self.session_factory
|
|
352
|
+
)
|
|
353
|
+
return self._git_tag_repository
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""Service for searching the indexes."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
import structlog
|
|
6
|
+
|
|
7
|
+
from kodit.application.services.reporting import ProgressTracker
|
|
8
|
+
from kodit.domain.entities.git import SnippetV2
|
|
9
|
+
from kodit.domain.protocols import FusionService, SnippetRepositoryV2
|
|
10
|
+
from kodit.domain.services.bm25_service import BM25DomainService
|
|
11
|
+
from kodit.domain.services.embedding_service import EmbeddingDomainService
|
|
12
|
+
from kodit.domain.value_objects import (
|
|
13
|
+
FusionRequest,
|
|
14
|
+
MultiSearchRequest,
|
|
15
|
+
SearchRequest,
|
|
16
|
+
SearchResult,
|
|
17
|
+
)
|
|
18
|
+
from kodit.log import log_event
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class MultiSearchResult:
|
|
23
|
+
"""Enhanced search result with comprehensive snippet metadata."""
|
|
24
|
+
|
|
25
|
+
snippet: SnippetV2
|
|
26
|
+
original_scores: list[float]
|
|
27
|
+
|
|
28
|
+
def to_json(self) -> str:
|
|
29
|
+
"""Return LLM-optimized JSON representation following the compact schema."""
|
|
30
|
+
return self.snippet.model_dump_json()
|
|
31
|
+
|
|
32
|
+
@classmethod
|
|
33
|
+
def to_jsonlines(cls, results: list["MultiSearchResult"]) -> str:
|
|
34
|
+
"""Convert multiple MultiSearchResult objects to JSON Lines format.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
results: List of MultiSearchResult objects
|
|
38
|
+
include_summary: Whether to include summary fields
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
JSON Lines string (one JSON object per line)
|
|
42
|
+
|
|
43
|
+
"""
|
|
44
|
+
return "\n".join(result.to_json() for result in results)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class CodeSearchApplicationService:
|
|
48
|
+
"""Service for searching the indexes."""
|
|
49
|
+
|
|
50
|
+
def __init__( # noqa: PLR0913
|
|
51
|
+
self,
|
|
52
|
+
bm25_service: BM25DomainService,
|
|
53
|
+
code_search_service: EmbeddingDomainService,
|
|
54
|
+
text_search_service: EmbeddingDomainService,
|
|
55
|
+
progress_tracker: ProgressTracker,
|
|
56
|
+
snippet_repository: SnippetRepositoryV2,
|
|
57
|
+
fusion_service: FusionService,
|
|
58
|
+
) -> None:
|
|
59
|
+
"""Initialize the code search application service."""
|
|
60
|
+
self.bm25_service = bm25_service
|
|
61
|
+
self.code_search_service = code_search_service
|
|
62
|
+
self.text_search_service = text_search_service
|
|
63
|
+
self.progress_tracker = progress_tracker
|
|
64
|
+
self.snippet_repository = snippet_repository
|
|
65
|
+
self.fusion_service = fusion_service
|
|
66
|
+
self.log = structlog.get_logger(__name__)
|
|
67
|
+
|
|
68
|
+
async def search(self, request: MultiSearchRequest) -> list[MultiSearchResult]:
|
|
69
|
+
"""Search for relevant snippets across all indexes."""
|
|
70
|
+
log_event("kodit.index.search")
|
|
71
|
+
|
|
72
|
+
# Apply filters if provided
|
|
73
|
+
filtered_snippet_ids: list[str] | None = None
|
|
74
|
+
# TODO(Phil): Re-implement filtering on search results
|
|
75
|
+
|
|
76
|
+
# Gather results from different search modes
|
|
77
|
+
fusion_list: list[list[FusionRequest]] = []
|
|
78
|
+
|
|
79
|
+
# Keyword search
|
|
80
|
+
if request.keywords:
|
|
81
|
+
result_ids: list[SearchResult] = []
|
|
82
|
+
for keyword in request.keywords:
|
|
83
|
+
results = await self.bm25_service.search(
|
|
84
|
+
SearchRequest(
|
|
85
|
+
query=keyword,
|
|
86
|
+
top_k=request.top_k,
|
|
87
|
+
snippet_ids=filtered_snippet_ids,
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
result_ids.extend(results)
|
|
91
|
+
|
|
92
|
+
fusion_list.append(
|
|
93
|
+
[FusionRequest(id=x.snippet_id, score=x.score) for x in result_ids]
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Semantic code search
|
|
97
|
+
if request.code_query:
|
|
98
|
+
query_results = await self.code_search_service.search(
|
|
99
|
+
SearchRequest(
|
|
100
|
+
query=request.code_query,
|
|
101
|
+
top_k=request.top_k,
|
|
102
|
+
snippet_ids=filtered_snippet_ids,
|
|
103
|
+
)
|
|
104
|
+
)
|
|
105
|
+
fusion_list.append(
|
|
106
|
+
[FusionRequest(id=x.snippet_id, score=x.score) for x in query_results]
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Semantic text search
|
|
110
|
+
if request.text_query:
|
|
111
|
+
query_results = await self.text_search_service.search(
|
|
112
|
+
SearchRequest(
|
|
113
|
+
query=request.text_query,
|
|
114
|
+
top_k=request.top_k,
|
|
115
|
+
snippet_ids=filtered_snippet_ids,
|
|
116
|
+
)
|
|
117
|
+
)
|
|
118
|
+
fusion_list.append(
|
|
119
|
+
[FusionRequest(id=x.snippet_id, score=x.score) for x in query_results]
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
if len(fusion_list) == 0:
|
|
123
|
+
return []
|
|
124
|
+
|
|
125
|
+
# Fusion ranking
|
|
126
|
+
final_results = self.fusion_service.reciprocal_rank_fusion(
|
|
127
|
+
rankings=fusion_list,
|
|
128
|
+
k=60, # This is a parameter in the RRF algorithm, not top_k
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Keep only top_k results
|
|
132
|
+
final_results = final_results[: request.top_k]
|
|
133
|
+
|
|
134
|
+
# Get snippet details
|
|
135
|
+
ids = [x.id for x in final_results]
|
|
136
|
+
search_results = await self.snippet_repository.get_by_ids(ids)
|
|
137
|
+
search_results.sort(key=lambda x: ids.index(x.id))
|
|
138
|
+
return [
|
|
139
|
+
MultiSearchResult(
|
|
140
|
+
snippet=snippet,
|
|
141
|
+
original_scores=[x.score for x in final_results if x.id == snippet.id],
|
|
142
|
+
)
|
|
143
|
+
for snippet in search_results
|
|
144
|
+
]
|