kodit 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +9 -2
- kodit/application/factories/code_indexing_factory.py +62 -13
- kodit/application/factories/reporting_factory.py +32 -0
- kodit/application/services/auto_indexing_service.py +41 -33
- kodit/application/services/code_indexing_application_service.py +137 -138
- kodit/application/services/indexing_worker_service.py +26 -30
- kodit/application/services/queue_service.py +12 -14
- kodit/application/services/reporting.py +104 -0
- kodit/application/services/sync_scheduler.py +21 -20
- kodit/cli.py +71 -85
- kodit/config.py +26 -3
- kodit/database.py +2 -1
- kodit/domain/entities.py +99 -1
- kodit/domain/protocols.py +34 -1
- kodit/domain/services/bm25_service.py +1 -6
- kodit/domain/services/index_service.py +23 -57
- kodit/domain/services/task_status_query_service.py +19 -0
- kodit/domain/value_objects.py +53 -8
- kodit/infrastructure/api/v1/dependencies.py +40 -12
- kodit/infrastructure/api/v1/routers/indexes.py +45 -0
- kodit/infrastructure/api/v1/schemas/task_status.py +39 -0
- kodit/infrastructure/cloning/git/working_copy.py +43 -7
- kodit/infrastructure/embedding/embedding_factory.py +8 -3
- kodit/infrastructure/embedding/embedding_providers/litellm_embedding_provider.py +48 -55
- kodit/infrastructure/enrichment/local_enrichment_provider.py +41 -30
- kodit/infrastructure/git/git_utils.py +3 -2
- kodit/infrastructure/mappers/index_mapper.py +1 -0
- kodit/infrastructure/mappers/task_status_mapper.py +85 -0
- kodit/infrastructure/reporting/__init__.py +1 -0
- kodit/infrastructure/reporting/db_progress.py +23 -0
- kodit/infrastructure/reporting/log_progress.py +37 -0
- kodit/infrastructure/reporting/tdqm_progress.py +38 -0
- kodit/infrastructure/sqlalchemy/embedding_repository.py +47 -68
- kodit/infrastructure/sqlalchemy/entities.py +89 -2
- kodit/infrastructure/sqlalchemy/index_repository.py +274 -236
- kodit/infrastructure/sqlalchemy/task_repository.py +55 -39
- kodit/infrastructure/sqlalchemy/task_status_repository.py +79 -0
- kodit/infrastructure/sqlalchemy/unit_of_work.py +59 -0
- kodit/mcp.py +15 -3
- kodit/migrations/env.py +0 -1
- kodit/migrations/versions/b9cd1c3fd762_add_task_status.py +77 -0
- {kodit-0.4.1.dist-info → kodit-0.4.3.dist-info}/METADATA +1 -1
- {kodit-0.4.1.dist-info → kodit-0.4.3.dist-info}/RECORD +47 -40
- kodit/domain/interfaces.py +0 -27
- kodit/infrastructure/ui/__init__.py +0 -1
- kodit/infrastructure/ui/progress.py +0 -170
- kodit/infrastructure/ui/spinner.py +0 -74
- kodit/reporting.py +0 -78
- {kodit-0.4.1.dist-info → kodit-0.4.3.dist-info}/WHEEL +0 -0
- {kodit-0.4.1.dist-info → kodit-0.4.3.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.1.dist-info → kodit-0.4.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -4,10 +4,12 @@ from dataclasses import replace
|
|
|
4
4
|
from datetime import UTC, datetime
|
|
5
5
|
|
|
6
6
|
import structlog
|
|
7
|
-
from sqlalchemy.ext.asyncio import AsyncSession
|
|
8
7
|
|
|
8
|
+
from kodit.application.services.reporting import (
|
|
9
|
+
ProgressTracker,
|
|
10
|
+
TaskOperation,
|
|
11
|
+
)
|
|
9
12
|
from kodit.domain.entities import Index, Snippet
|
|
10
|
-
from kodit.domain.interfaces import ProgressCallback
|
|
11
13
|
from kodit.domain.protocols import IndexRepository
|
|
12
14
|
from kodit.domain.services.bm25_service import BM25DomainService
|
|
13
15
|
from kodit.domain.services.embedding_service import EmbeddingDomainService
|
|
@@ -23,9 +25,9 @@ from kodit.domain.value_objects import (
|
|
|
23
25
|
SearchRequest,
|
|
24
26
|
SearchResult,
|
|
25
27
|
SnippetSearchFilters,
|
|
28
|
+
TrackableType,
|
|
26
29
|
)
|
|
27
30
|
from kodit.log import log_event
|
|
28
|
-
from kodit.reporting import Reporter
|
|
29
31
|
|
|
30
32
|
|
|
31
33
|
class CodeIndexingApplicationService:
|
|
@@ -40,7 +42,7 @@ class CodeIndexingApplicationService:
|
|
|
40
42
|
code_search_service: EmbeddingDomainService,
|
|
41
43
|
text_search_service: EmbeddingDomainService,
|
|
42
44
|
enrichment_service: EnrichmentDomainService,
|
|
43
|
-
|
|
45
|
+
operation: ProgressTracker,
|
|
44
46
|
) -> None:
|
|
45
47
|
"""Initialize the code indexing application service."""
|
|
46
48
|
self.index_domain_service = indexing_domain_service
|
|
@@ -50,7 +52,7 @@ class CodeIndexingApplicationService:
|
|
|
50
52
|
self.code_search_service = code_search_service
|
|
51
53
|
self.text_search_service = text_search_service
|
|
52
54
|
self.enrichment_service = enrichment_service
|
|
53
|
-
self.
|
|
55
|
+
self.operation = operation
|
|
54
56
|
self.log = structlog.get_logger(__name__)
|
|
55
57
|
|
|
56
58
|
async def does_index_exist(self, uri: str) -> bool:
|
|
@@ -60,107 +62,131 @@ class CodeIndexingApplicationService:
|
|
|
60
62
|
existing_index = await self.index_repository.get_by_uri(sanitized_uri)
|
|
61
63
|
return existing_index is not None
|
|
62
64
|
|
|
63
|
-
async def create_index_from_uri(
|
|
64
|
-
self, uri: str, progress_callback: ProgressCallback | None = None
|
|
65
|
-
) -> Index:
|
|
65
|
+
async def create_index_from_uri(self, uri: str) -> Index:
|
|
66
66
|
"""Create a new index for a source."""
|
|
67
67
|
log_event("kodit.index.create")
|
|
68
|
+
async with self.operation.create_child(TaskOperation.CREATE_INDEX) as operation:
|
|
69
|
+
# Check if index already exists
|
|
70
|
+
sanitized_uri, _ = self.index_domain_service.sanitize_uri(uri)
|
|
71
|
+
self.log.info("Creating index from URI", uri=str(sanitized_uri))
|
|
72
|
+
existing_index = await self.index_repository.get_by_uri(sanitized_uri)
|
|
73
|
+
if existing_index:
|
|
74
|
+
self.log.debug(
|
|
75
|
+
"Index already exists",
|
|
76
|
+
uri=str(sanitized_uri),
|
|
77
|
+
index_id=existing_index.id,
|
|
78
|
+
)
|
|
79
|
+
return existing_index
|
|
68
80
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
if existing_index:
|
|
73
|
-
self.log.debug(
|
|
74
|
-
"Index already exists",
|
|
75
|
-
uri=str(sanitized_uri),
|
|
76
|
-
index_id=existing_index.id,
|
|
77
|
-
)
|
|
78
|
-
return existing_index
|
|
79
|
-
|
|
80
|
-
# Only prepare working copy if we need to create a new index
|
|
81
|
-
working_copy = await self.index_domain_service.prepare_index(
|
|
82
|
-
uri, progress_callback
|
|
83
|
-
)
|
|
81
|
+
# Only prepare working copy if we need to create a new index
|
|
82
|
+
self.log.info("Preparing working copy", uri=str(sanitized_uri))
|
|
83
|
+
working_copy = await self.index_domain_service.prepare_index(uri, operation)
|
|
84
84
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
return index
|
|
85
|
+
# Create new index
|
|
86
|
+
self.log.info("Creating index", uri=str(sanitized_uri))
|
|
87
|
+
return await self.index_repository.create(sanitized_uri, working_copy)
|
|
89
88
|
|
|
90
|
-
async def run_index(
|
|
91
|
-
self, index: Index, progress_callback: ProgressCallback | None = None
|
|
92
|
-
) -> None:
|
|
89
|
+
async def run_index(self, index: Index) -> None:
|
|
93
90
|
"""Run the complete indexing process for a specific index."""
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
return
|
|
133
|
-
|
|
134
|
-
# Create BM25 index
|
|
135
|
-
self.log.info("Creating keyword index")
|
|
136
|
-
await self._create_bm25_index(index.snippets, progress_callback)
|
|
137
|
-
|
|
138
|
-
# Create code embeddings
|
|
139
|
-
self.log.info("Creating semantic code index")
|
|
140
|
-
await self._create_code_embeddings(index.snippets, progress_callback)
|
|
141
|
-
|
|
142
|
-
# Enrich snippets
|
|
143
|
-
self.log.info("Enriching snippets", num_snippets=len(index.snippets))
|
|
144
|
-
enriched_snippets = await self.index_domain_service.enrich_snippets_in_index(
|
|
145
|
-
snippets=index.snippets, progress_callback=progress_callback
|
|
146
|
-
)
|
|
147
|
-
# Update snippets in repository
|
|
148
|
-
await self.index_repository.update_snippets(index.id, enriched_snippets)
|
|
149
|
-
|
|
150
|
-
# Create text embeddings (on enriched content)
|
|
151
|
-
self.log.info("Creating semantic text index")
|
|
152
|
-
await self._create_text_embeddings(enriched_snippets, progress_callback)
|
|
153
|
-
|
|
154
|
-
# Update index timestamp
|
|
155
|
-
await self.index_repository.update_index_timestamp(index.id)
|
|
156
|
-
|
|
157
|
-
# Now that all file dependencies have been captured, enact the file processing
|
|
158
|
-
# statuses
|
|
159
|
-
index.source.working_copy.clear_file_processing_statuses()
|
|
160
|
-
await self.index_repository.update(index)
|
|
91
|
+
# Create a new operation
|
|
92
|
+
async with self.operation.create_child(
|
|
93
|
+
TaskOperation.RUN_INDEX,
|
|
94
|
+
trackable_type=TrackableType.INDEX,
|
|
95
|
+
trackable_id=index.id,
|
|
96
|
+
) as operation:
|
|
97
|
+
# TODO(philwinder): Move this into a reporter # noqa: TD003, FIX002
|
|
98
|
+
log_event("kodit.index.run")
|
|
99
|
+
|
|
100
|
+
if not index or not index.id:
|
|
101
|
+
msg = f"Index has no ID: {index}"
|
|
102
|
+
raise ValueError(msg)
|
|
103
|
+
|
|
104
|
+
# Refresh working copy
|
|
105
|
+
async with operation.create_child(
|
|
106
|
+
TaskOperation.REFRESH_WORKING_COPY
|
|
107
|
+
) as step:
|
|
108
|
+
index.source.working_copy = (
|
|
109
|
+
await self.index_domain_service.refresh_working_copy(
|
|
110
|
+
index.source.working_copy, step
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
if len(index.source.working_copy.changed_files()) == 0:
|
|
114
|
+
self.log.info("No new changes to index", index_id=index.id)
|
|
115
|
+
await step.skip("No new changes to index")
|
|
116
|
+
return
|
|
117
|
+
|
|
118
|
+
# Delete the old snippets from the files that have changed
|
|
119
|
+
async with operation.create_child(
|
|
120
|
+
TaskOperation.DELETE_OLD_SNIPPETS
|
|
121
|
+
) as step:
|
|
122
|
+
await self.index_repository.delete_snippets_by_file_ids(
|
|
123
|
+
[
|
|
124
|
+
file.id
|
|
125
|
+
for file in index.source.working_copy.changed_files()
|
|
126
|
+
if file.id
|
|
127
|
+
]
|
|
128
|
+
)
|
|
161
129
|
|
|
162
|
-
|
|
163
|
-
|
|
130
|
+
# Extract and create snippets (domain service handles progress)
|
|
131
|
+
async with operation.create_child(TaskOperation.EXTRACT_SNIPPETS) as step:
|
|
132
|
+
index = await self.index_domain_service.extract_snippets_from_index(
|
|
133
|
+
index=index, step=step
|
|
134
|
+
)
|
|
135
|
+
await self.index_repository.update(index)
|
|
136
|
+
|
|
137
|
+
# Refresh index to get snippets with IDs, required for subsequent steps
|
|
138
|
+
flushed_index = await self.index_repository.get(index.id)
|
|
139
|
+
if not flushed_index:
|
|
140
|
+
msg = f"Index {index.id} not found after snippet extraction"
|
|
141
|
+
raise ValueError(msg)
|
|
142
|
+
index = flushed_index
|
|
143
|
+
if len(index.snippets) == 0:
|
|
144
|
+
self.log.info(
|
|
145
|
+
"No snippets to index after extraction", index_id=index.id
|
|
146
|
+
)
|
|
147
|
+
await step.skip("No snippets to index after extraction")
|
|
148
|
+
return
|
|
149
|
+
|
|
150
|
+
# Create BM25 index
|
|
151
|
+
self.log.info("Creating keyword index")
|
|
152
|
+
async with operation.create_child(TaskOperation.CREATE_BM25_INDEX) as step:
|
|
153
|
+
await self._create_bm25_index(index.snippets)
|
|
154
|
+
|
|
155
|
+
# Create code embeddings
|
|
156
|
+
async with operation.create_child(
|
|
157
|
+
TaskOperation.CREATE_CODE_EMBEDDINGS
|
|
158
|
+
) as step:
|
|
159
|
+
await self._create_code_embeddings(index.snippets, step)
|
|
160
|
+
|
|
161
|
+
# Enrich snippets
|
|
162
|
+
async with operation.create_child(TaskOperation.ENRICH_SNIPPETS) as step:
|
|
163
|
+
enriched_snippets = (
|
|
164
|
+
await self.index_domain_service.enrich_snippets_in_index(
|
|
165
|
+
snippets=index.snippets,
|
|
166
|
+
reporting_step=step,
|
|
167
|
+
)
|
|
168
|
+
)
|
|
169
|
+
# Update snippets in repository
|
|
170
|
+
await self.index_repository.update_snippets(index.id, enriched_snippets)
|
|
171
|
+
|
|
172
|
+
# Create text embeddings (on enriched content)
|
|
173
|
+
async with operation.create_child(
|
|
174
|
+
TaskOperation.CREATE_TEXT_EMBEDDINGS
|
|
175
|
+
) as step:
|
|
176
|
+
await self._create_text_embeddings(enriched_snippets, step)
|
|
177
|
+
|
|
178
|
+
# Update index timestamp
|
|
179
|
+
async with operation.create_child(
|
|
180
|
+
TaskOperation.UPDATE_INDEX_TIMESTAMP
|
|
181
|
+
) as step:
|
|
182
|
+
await self.index_repository.update_index_timestamp(index.id)
|
|
183
|
+
|
|
184
|
+
# After indexing, clear the file processing statuses
|
|
185
|
+
async with operation.create_child(
|
|
186
|
+
TaskOperation.CLEAR_FILE_PROCESSING_STATUSES
|
|
187
|
+
) as step:
|
|
188
|
+
index.source.working_copy.clear_file_processing_statuses()
|
|
189
|
+
await self.index_repository.update(index)
|
|
164
190
|
|
|
165
191
|
async def search(self, request: MultiSearchRequest) -> list[MultiSearchResult]:
|
|
166
192
|
"""Search for relevant snippets across all indexes."""
|
|
@@ -312,15 +338,7 @@ class CodeIndexingApplicationService:
|
|
|
312
338
|
]
|
|
313
339
|
|
|
314
340
|
# FUTURE: BM25 index enriched content too
|
|
315
|
-
async def _create_bm25_index(
|
|
316
|
-
self, snippets: list[Snippet], progress_callback: ProgressCallback | None = None
|
|
317
|
-
) -> None:
|
|
318
|
-
reporter = Reporter(self.log, progress_callback)
|
|
319
|
-
await reporter.start("bm25_index", len(snippets), "Creating keyword index...")
|
|
320
|
-
|
|
321
|
-
for _snippet in snippets:
|
|
322
|
-
pass
|
|
323
|
-
|
|
341
|
+
async def _create_bm25_index(self, snippets: list[Snippet]) -> None:
|
|
324
342
|
await self.bm25_service.index_documents(
|
|
325
343
|
IndexRequest(
|
|
326
344
|
documents=[
|
|
@@ -331,16 +349,10 @@ class CodeIndexingApplicationService:
|
|
|
331
349
|
)
|
|
332
350
|
)
|
|
333
351
|
|
|
334
|
-
await reporter.done("bm25_index", "Keyword index created")
|
|
335
|
-
|
|
336
352
|
async def _create_code_embeddings(
|
|
337
|
-
self, snippets: list[Snippet],
|
|
353
|
+
self, snippets: list[Snippet], reporting_step: ProgressTracker
|
|
338
354
|
) -> None:
|
|
339
|
-
|
|
340
|
-
await reporter.start(
|
|
341
|
-
"code_embeddings", len(snippets), "Creating code embeddings..."
|
|
342
|
-
)
|
|
343
|
-
|
|
355
|
+
await reporting_step.set_total(len(snippets))
|
|
344
356
|
processed = 0
|
|
345
357
|
async for result in self.code_search_service.index_documents(
|
|
346
358
|
IndexRequest(
|
|
@@ -352,23 +364,13 @@ class CodeIndexingApplicationService:
|
|
|
352
364
|
)
|
|
353
365
|
):
|
|
354
366
|
processed += len(result)
|
|
355
|
-
await
|
|
356
|
-
"
|
|
357
|
-
processed,
|
|
358
|
-
len(snippets),
|
|
359
|
-
"Creating code embeddings...",
|
|
367
|
+
await reporting_step.set_current(
|
|
368
|
+
processed, f"Creating code embeddings for {processed} snippets"
|
|
360
369
|
)
|
|
361
370
|
|
|
362
|
-
await reporter.done("code_embeddings")
|
|
363
|
-
|
|
364
371
|
async def _create_text_embeddings(
|
|
365
|
-
self, snippets: list[Snippet],
|
|
372
|
+
self, snippets: list[Snippet], reporting_step: ProgressTracker
|
|
366
373
|
) -> None:
|
|
367
|
-
reporter = Reporter(self.log, progress_callback)
|
|
368
|
-
await reporter.start(
|
|
369
|
-
"text_embeddings", len(snippets), "Creating text embeddings..."
|
|
370
|
-
)
|
|
371
|
-
|
|
372
374
|
# Only create text embeddings for snippets that have summary content
|
|
373
375
|
documents_with_summaries = []
|
|
374
376
|
for snippet in snippets:
|
|
@@ -384,23 +386,21 @@ class CodeIndexingApplicationService:
|
|
|
384
386
|
continue
|
|
385
387
|
|
|
386
388
|
if not documents_with_summaries:
|
|
387
|
-
await
|
|
389
|
+
await reporting_step.skip(
|
|
390
|
+
"No snippets with summaries to create text embeddings"
|
|
391
|
+
)
|
|
388
392
|
return
|
|
389
393
|
|
|
394
|
+
await reporting_step.set_total(len(documents_with_summaries))
|
|
390
395
|
processed = 0
|
|
391
396
|
async for result in self.text_search_service.index_documents(
|
|
392
397
|
IndexRequest(documents=documents_with_summaries)
|
|
393
398
|
):
|
|
394
399
|
processed += len(result)
|
|
395
|
-
await
|
|
396
|
-
"
|
|
397
|
-
processed,
|
|
398
|
-
len(snippets),
|
|
399
|
-
"Creating text embeddings...",
|
|
400
|
+
await reporting_step.set_current(
|
|
401
|
+
processed, f"Creating text embeddings for {processed} snippets"
|
|
400
402
|
)
|
|
401
403
|
|
|
402
|
-
await reporter.done("text_embeddings")
|
|
403
|
-
|
|
404
404
|
async def delete_index(self, index: Index) -> None:
|
|
405
405
|
"""Delete an index."""
|
|
406
406
|
# Delete the index from the domain
|
|
@@ -408,4 +408,3 @@ class CodeIndexingApplicationService:
|
|
|
408
408
|
|
|
409
409
|
# Delete index from the database
|
|
410
410
|
await self.index_repository.delete(index)
|
|
411
|
-
await self.session.commit()
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
from collections.abc import Callable
|
|
5
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
6
5
|
from contextlib import suppress
|
|
7
6
|
from datetime import UTC, datetime
|
|
8
7
|
|
|
@@ -12,10 +11,12 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
12
11
|
from kodit.application.factories.code_indexing_factory import (
|
|
13
12
|
create_code_indexing_application_service,
|
|
14
13
|
)
|
|
14
|
+
from kodit.application.factories.reporting_factory import create_noop_operation
|
|
15
|
+
from kodit.application.services.reporting import ProgressTracker
|
|
15
16
|
from kodit.config import AppContext
|
|
16
17
|
from kodit.domain.entities import Task
|
|
17
18
|
from kodit.domain.value_objects import TaskType
|
|
18
|
-
from kodit.infrastructure.sqlalchemy.task_repository import
|
|
19
|
+
from kodit.infrastructure.sqlalchemy.task_repository import create_task_repository
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
class IndexingWorkerService:
|
|
@@ -35,17 +36,16 @@ class IndexingWorkerService:
|
|
|
35
36
|
self.session_factory = session_factory
|
|
36
37
|
self._worker_task: asyncio.Task | None = None
|
|
37
38
|
self._shutdown_event = asyncio.Event()
|
|
38
|
-
self.
|
|
39
|
-
max_workers=1, thread_name_prefix="indexing-worker"
|
|
40
|
-
)
|
|
39
|
+
self.task_repository = create_task_repository(session_factory)
|
|
41
40
|
self.log = structlog.get_logger(__name__)
|
|
42
41
|
|
|
43
|
-
async def start(self) -> None:
|
|
42
|
+
async def start(self, operation: ProgressTracker | None = None) -> None:
|
|
44
43
|
"""Start the worker to process the queue."""
|
|
44
|
+
operation = operation or create_noop_operation()
|
|
45
45
|
self._running = True
|
|
46
46
|
|
|
47
47
|
# Start single worker task
|
|
48
|
-
self._worker_task = asyncio.create_task(self._worker_loop())
|
|
48
|
+
self._worker_task = asyncio.create_task(self._worker_loop(operation))
|
|
49
49
|
|
|
50
50
|
self.log.info(
|
|
51
51
|
"Indexing worker started",
|
|
@@ -57,30 +57,24 @@ class IndexingWorkerService:
|
|
|
57
57
|
self._shutdown_event.set()
|
|
58
58
|
|
|
59
59
|
if self._worker_task and not self._worker_task.done():
|
|
60
|
-
self._worker_task.cancel()
|
|
61
60
|
with suppress(asyncio.CancelledError):
|
|
61
|
+
self._worker_task.cancel()
|
|
62
62
|
await self._worker_task
|
|
63
63
|
|
|
64
|
-
# Shutdown the thread pool executor
|
|
65
|
-
self._executor.shutdown(wait=True)
|
|
66
|
-
|
|
67
64
|
self.log.info("Indexing worker stopped")
|
|
68
65
|
|
|
69
|
-
async def _worker_loop(self) -> None:
|
|
66
|
+
async def _worker_loop(self, operation: ProgressTracker) -> None:
|
|
70
67
|
self.log.debug("Worker loop started")
|
|
71
68
|
|
|
72
69
|
while not self._shutdown_event.is_set():
|
|
73
70
|
try:
|
|
74
71
|
async with self.session_factory() as session:
|
|
75
|
-
|
|
76
|
-
task = await repo.take()
|
|
72
|
+
task = await self.task_repository.take()
|
|
77
73
|
await session.commit()
|
|
78
74
|
|
|
79
75
|
# If there's a task, process it in a new thread
|
|
80
76
|
if task:
|
|
81
|
-
await
|
|
82
|
-
self._executor, self._process_task, task
|
|
83
|
-
)
|
|
77
|
+
await self._process_task(task, operation)
|
|
84
78
|
continue
|
|
85
79
|
|
|
86
80
|
# If no task, sleep for a bit
|
|
@@ -96,7 +90,7 @@ class IndexingWorkerService:
|
|
|
96
90
|
|
|
97
91
|
self.log.info("Worker loop stopped")
|
|
98
92
|
|
|
99
|
-
def _process_task(self, task: Task) -> None:
|
|
93
|
+
async def _process_task(self, task: Task, operation: ProgressTracker) -> None:
|
|
100
94
|
"""Process a single task."""
|
|
101
95
|
self.log.info(
|
|
102
96
|
"Processing task",
|
|
@@ -113,7 +107,7 @@ class IndexingWorkerService:
|
|
|
113
107
|
try:
|
|
114
108
|
# Process based on task type (currently only INDEX_UPDATE is supported)
|
|
115
109
|
if task.type is TaskType.INDEX_UPDATE:
|
|
116
|
-
|
|
110
|
+
await self._process_index_update(task, operation)
|
|
117
111
|
else:
|
|
118
112
|
self.log.warning(
|
|
119
113
|
"Unknown task type",
|
|
@@ -131,7 +125,9 @@ class IndexingWorkerService:
|
|
|
131
125
|
duration_seconds=duration,
|
|
132
126
|
)
|
|
133
127
|
|
|
134
|
-
async def _process_index_update(
|
|
128
|
+
async def _process_index_update(
|
|
129
|
+
self, task: Task, operation: ProgressTracker
|
|
130
|
+
) -> None:
|
|
135
131
|
"""Process index update/sync task."""
|
|
136
132
|
index_id = task.payload.get("index_id")
|
|
137
133
|
if not index_id:
|
|
@@ -140,15 +136,15 @@ class IndexingWorkerService:
|
|
|
140
136
|
# Create a fresh database connection for this thread's event loop
|
|
141
137
|
db = await self.app_context.new_db(run_migrations=True)
|
|
142
138
|
try:
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
139
|
+
service = create_code_indexing_application_service(
|
|
140
|
+
app_context=self.app_context,
|
|
141
|
+
session_factory=self.session_factory,
|
|
142
|
+
operation=operation,
|
|
143
|
+
)
|
|
144
|
+
index = await service.index_repository.get(index_id)
|
|
145
|
+
if not index:
|
|
146
|
+
raise ValueError(f"Index not found: {index_id}")
|
|
147
|
+
|
|
148
|
+
await service.run_index(index)
|
|
153
149
|
finally:
|
|
154
150
|
await db.close()
|
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
"""Queue service for managing tasks."""
|
|
2
2
|
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
|
|
3
5
|
import structlog
|
|
4
6
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
5
7
|
|
|
6
8
|
from kodit.domain.entities import Task
|
|
7
9
|
from kodit.domain.value_objects import TaskType
|
|
8
|
-
from kodit.infrastructure.sqlalchemy.task_repository import
|
|
10
|
+
from kodit.infrastructure.sqlalchemy.task_repository import (
|
|
11
|
+
create_task_repository,
|
|
12
|
+
)
|
|
9
13
|
|
|
10
14
|
|
|
11
15
|
class QueueService:
|
|
@@ -17,26 +21,24 @@ class QueueService:
|
|
|
17
21
|
|
|
18
22
|
def __init__(
|
|
19
23
|
self,
|
|
20
|
-
|
|
24
|
+
session_factory: Callable[[], AsyncSession],
|
|
21
25
|
) -> None:
|
|
22
26
|
"""Initialize the queue service."""
|
|
23
|
-
self.
|
|
27
|
+
self.task_repository = create_task_repository(session_factory=session_factory)
|
|
24
28
|
self.log = structlog.get_logger(__name__)
|
|
25
29
|
|
|
26
30
|
async def enqueue_task(self, task: Task) -> None:
|
|
27
31
|
"""Queue a task in the database."""
|
|
28
|
-
repo = SqlAlchemyTaskRepository(self.session)
|
|
29
|
-
|
|
30
32
|
# See if task already exists
|
|
31
|
-
db_task = await
|
|
33
|
+
db_task = await self.task_repository.get(task.id)
|
|
32
34
|
if db_task:
|
|
33
35
|
# Task already exists, update priority
|
|
34
36
|
db_task.priority = task.priority
|
|
35
|
-
await
|
|
37
|
+
await self.task_repository.update(db_task)
|
|
36
38
|
self.log.info("Task updated", task_id=task.id, task_type=task.type)
|
|
37
39
|
else:
|
|
38
40
|
# Otherwise, add task
|
|
39
|
-
await
|
|
41
|
+
await self.task_repository.add(task)
|
|
40
42
|
self.log.info(
|
|
41
43
|
"Task queued",
|
|
42
44
|
task_id=task.id,
|
|
@@ -44,14 +46,10 @@ class QueueService:
|
|
|
44
46
|
payload=task.payload,
|
|
45
47
|
)
|
|
46
48
|
|
|
47
|
-
await self.session.commit()
|
|
48
|
-
|
|
49
49
|
async def list_tasks(self, task_type: TaskType | None = None) -> list[Task]:
|
|
50
50
|
"""List all tasks in the queue."""
|
|
51
|
-
|
|
52
|
-
return await repo.list(task_type)
|
|
51
|
+
return await self.task_repository.list(task_type)
|
|
53
52
|
|
|
54
53
|
async def get_task(self, task_id: str) -> Task | None:
|
|
55
54
|
"""Get a specific task by ID."""
|
|
56
|
-
|
|
57
|
-
return await repo.get(task_id)
|
|
55
|
+
return await self.task_repository.get(task_id)
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Reporting."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import AsyncGenerator
|
|
4
|
+
from contextlib import asynccontextmanager
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
import structlog
|
|
8
|
+
|
|
9
|
+
from kodit.domain.entities import TaskStatus
|
|
10
|
+
from kodit.domain.value_objects import TaskOperation, TrackableType
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from kodit.domain.protocols import ReportingModule
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ProgressTracker:
|
|
17
|
+
"""Progress tracker.
|
|
18
|
+
|
|
19
|
+
Provides a reactive wrapper around TaskStatus domain entities that automatically
|
|
20
|
+
propagates state changes to the database and reporting modules. This pattern was
|
|
21
|
+
chosen over a traditional service-repository approach because:
|
|
22
|
+
- State changes must trigger immediate side effects (database writes, notifications)
|
|
23
|
+
- Multiple consumers need real-time updates without polling
|
|
24
|
+
- The wrapper pattern allows transparent interception of all state mutations
|
|
25
|
+
|
|
26
|
+
The tracker monitors all modifications to the underlying TaskStatus and ensures
|
|
27
|
+
consistency across all downstream systems.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
task_status: TaskStatus,
|
|
33
|
+
) -> None:
|
|
34
|
+
"""Initialize the progress tracker."""
|
|
35
|
+
self.task_status = task_status
|
|
36
|
+
self._log = structlog.get_logger(__name__)
|
|
37
|
+
self._subscribers: list[ReportingModule] = []
|
|
38
|
+
|
|
39
|
+
@staticmethod
|
|
40
|
+
def create(
|
|
41
|
+
operation: TaskOperation,
|
|
42
|
+
parent: "TaskStatus | None" = None,
|
|
43
|
+
trackable_type: TrackableType | None = None,
|
|
44
|
+
trackable_id: int | None = None,
|
|
45
|
+
) -> "ProgressTracker":
|
|
46
|
+
"""Create a progress tracker."""
|
|
47
|
+
return ProgressTracker(
|
|
48
|
+
TaskStatus.create(
|
|
49
|
+
operation=operation,
|
|
50
|
+
trackable_type=trackable_type,
|
|
51
|
+
trackable_id=trackable_id,
|
|
52
|
+
parent=parent,
|
|
53
|
+
)
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
@asynccontextmanager
|
|
57
|
+
async def create_child(
|
|
58
|
+
self,
|
|
59
|
+
operation: TaskOperation,
|
|
60
|
+
trackable_type: TrackableType | None = None,
|
|
61
|
+
trackable_id: int | None = None,
|
|
62
|
+
) -> AsyncGenerator["ProgressTracker", None]:
|
|
63
|
+
"""Create a child step."""
|
|
64
|
+
c = ProgressTracker.create(
|
|
65
|
+
operation=operation,
|
|
66
|
+
parent=self.task_status,
|
|
67
|
+
trackable_type=trackable_type or self.task_status.trackable_type,
|
|
68
|
+
trackable_id=trackable_id or self.task_status.trackable_id,
|
|
69
|
+
)
|
|
70
|
+
try:
|
|
71
|
+
for subscriber in self._subscribers:
|
|
72
|
+
c.subscribe(subscriber)
|
|
73
|
+
|
|
74
|
+
await c.notify_subscribers()
|
|
75
|
+
yield c
|
|
76
|
+
except Exception as e: # noqa: BLE001
|
|
77
|
+
c.task_status.fail(str(e))
|
|
78
|
+
finally:
|
|
79
|
+
c.task_status.complete()
|
|
80
|
+
await c.notify_subscribers()
|
|
81
|
+
|
|
82
|
+
async def skip(self, reason: str) -> None:
|
|
83
|
+
"""Skip the step."""
|
|
84
|
+
self.task_status.skip(reason)
|
|
85
|
+
await self.notify_subscribers()
|
|
86
|
+
|
|
87
|
+
def subscribe(self, subscriber: "ReportingModule") -> None:
|
|
88
|
+
"""Subscribe to the step."""
|
|
89
|
+
self._subscribers.append(subscriber)
|
|
90
|
+
|
|
91
|
+
async def set_total(self, total: int) -> None:
|
|
92
|
+
"""Set the total for the step."""
|
|
93
|
+
self.task_status.set_total(total)
|
|
94
|
+
await self.notify_subscribers()
|
|
95
|
+
|
|
96
|
+
async def set_current(self, current: int, message: str | None = None) -> None:
|
|
97
|
+
"""Progress the step."""
|
|
98
|
+
self.task_status.set_current(current, message)
|
|
99
|
+
await self.notify_subscribers()
|
|
100
|
+
|
|
101
|
+
async def notify_subscribers(self) -> None:
|
|
102
|
+
"""Notify the subscribers only if progress has changed."""
|
|
103
|
+
for subscriber in self._subscribers:
|
|
104
|
+
await subscriber.on_change(self.task_status)
|