kodit 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +23 -4
- kodit/application/factories/code_indexing_factory.py +2 -24
- kodit/application/services/code_indexing_application_service.py +10 -2
- kodit/application/services/sync_scheduler.py +128 -0
- kodit/cli.py +103 -28
- kodit/config.py +15 -0
- kodit/domain/services/index_service.py +25 -66
- kodit/domain/value_objects.py +10 -22
- kodit/infrastructure/slicing/__init__.py +1 -0
- kodit/infrastructure/slicing/language_detection_service.py +18 -0
- kodit/infrastructure/slicing/slicer.py +894 -0
- kodit/infrastructure/sqlalchemy/index_repository.py +29 -0
- kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +6 -4
- kodit/migrations/versions/4552eb3f23ce_add_summary.py +4 -4
- kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py +24 -16
- kodit/migrations/versions/85155663351e_initial.py +64 -48
- kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py +20 -14
- {kodit-0.3.3.dist-info → kodit-0.3.5.dist-info}/METADATA +10 -4
- {kodit-0.3.3.dist-info → kodit-0.3.5.dist-info}/RECORD +23 -32
- kodit/infrastructure/snippet_extraction/__init__.py +0 -1
- kodit/infrastructure/snippet_extraction/factories.py +0 -13
- kodit/infrastructure/snippet_extraction/language_detection_service.py +0 -39
- kodit/infrastructure/snippet_extraction/languages/csharp.scm +0 -12
- kodit/infrastructure/snippet_extraction/languages/go.scm +0 -26
- kodit/infrastructure/snippet_extraction/languages/java.scm +0 -12
- kodit/infrastructure/snippet_extraction/languages/javascript.scm +0 -24
- kodit/infrastructure/snippet_extraction/languages/python.scm +0 -22
- kodit/infrastructure/snippet_extraction/languages/typescript.scm +0 -25
- kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +0 -67
- kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -44
- kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +0 -182
- kodit/infrastructure/sqlalchemy/file_repository.py +0 -78
- {kodit-0.3.3.dist-info → kodit-0.3.5.dist-info}/WHEEL +0 -0
- {kodit-0.3.3.dist-info → kodit-0.3.5.dist-info}/entry_points.txt +0 -0
- {kodit-0.3.3.dist-info → kodit-0.3.5.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py
CHANGED
kodit/app.py
CHANGED
|
@@ -6,28 +6,47 @@ from contextlib import asynccontextmanager
|
|
|
6
6
|
from asgi_correlation_id import CorrelationIdMiddleware
|
|
7
7
|
from fastapi import FastAPI
|
|
8
8
|
|
|
9
|
+
from kodit.application.services.sync_scheduler import SyncSchedulerService
|
|
9
10
|
from kodit.config import AppContext
|
|
10
11
|
from kodit.infrastructure.indexing.auto_indexing_service import AutoIndexingService
|
|
11
12
|
from kodit.mcp import mcp
|
|
12
13
|
from kodit.middleware import ASGICancelledErrorMiddleware, logging_middleware
|
|
13
14
|
|
|
14
|
-
# Global
|
|
15
|
+
# Global services
|
|
15
16
|
_auto_indexing_service: AutoIndexingService | None = None
|
|
17
|
+
_sync_scheduler_service: SyncSchedulerService | None = None
|
|
16
18
|
|
|
17
19
|
|
|
18
20
|
@asynccontextmanager
|
|
19
21
|
async def app_lifespan(_: FastAPI) -> AsyncIterator[None]:
|
|
20
|
-
"""Manage application lifespan for auto-indexing."""
|
|
21
|
-
global _auto_indexing_service # noqa: PLW0603
|
|
22
|
-
|
|
22
|
+
"""Manage application lifespan for auto-indexing and sync."""
|
|
23
|
+
global _auto_indexing_service, _sync_scheduler_service # noqa: PLW0603
|
|
24
|
+
|
|
23
25
|
app_context = AppContext()
|
|
24
26
|
db = await app_context.get_db()
|
|
27
|
+
|
|
28
|
+
# Start auto-indexing service
|
|
25
29
|
_auto_indexing_service = AutoIndexingService(
|
|
26
30
|
app_context=app_context,
|
|
27
31
|
session_factory=db.session_factory,
|
|
28
32
|
)
|
|
29
33
|
await _auto_indexing_service.start_background_indexing()
|
|
34
|
+
|
|
35
|
+
# Start sync scheduler service
|
|
36
|
+
if app_context.sync.enabled:
|
|
37
|
+
_sync_scheduler_service = SyncSchedulerService(
|
|
38
|
+
app_context=app_context,
|
|
39
|
+
session_factory=db.session_factory,
|
|
40
|
+
)
|
|
41
|
+
_sync_scheduler_service.start_periodic_sync(
|
|
42
|
+
interval_seconds=app_context.sync.interval_seconds
|
|
43
|
+
)
|
|
44
|
+
|
|
30
45
|
yield
|
|
46
|
+
|
|
47
|
+
# Stop services
|
|
48
|
+
if _sync_scheduler_service:
|
|
49
|
+
await _sync_scheduler_service.stop_periodic_sync()
|
|
31
50
|
if _auto_indexing_service:
|
|
32
51
|
await _auto_indexing_service.stop()
|
|
33
52
|
|
|
@@ -13,7 +13,7 @@ from kodit.domain.services.index_query_service import IndexQueryService
|
|
|
13
13
|
from kodit.domain.services.index_service import (
|
|
14
14
|
IndexDomainService,
|
|
15
15
|
)
|
|
16
|
-
from kodit.domain.value_objects import LanguageMapping
|
|
16
|
+
from kodit.domain.value_objects import LanguageMapping
|
|
17
17
|
from kodit.infrastructure.bm25.bm25_factory import bm25_repository_factory
|
|
18
18
|
from kodit.infrastructure.embedding.embedding_factory import (
|
|
19
19
|
embedding_domain_service_factory,
|
|
@@ -31,15 +31,9 @@ from kodit.infrastructure.enrichment.null_enrichment_provider import (
|
|
|
31
31
|
NullEnrichmentProvider,
|
|
32
32
|
)
|
|
33
33
|
from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
|
|
34
|
-
from kodit.infrastructure.
|
|
35
|
-
create_snippet_query_provider,
|
|
36
|
-
)
|
|
37
|
-
from kodit.infrastructure.snippet_extraction.language_detection_service import (
|
|
34
|
+
from kodit.infrastructure.slicing.language_detection_service import (
|
|
38
35
|
FileSystemLanguageDetectionService,
|
|
39
36
|
)
|
|
40
|
-
from kodit.infrastructure.snippet_extraction.tree_sitter_snippet_extractor import (
|
|
41
|
-
TreeSitterSnippetExtractor,
|
|
42
|
-
)
|
|
43
37
|
from kodit.infrastructure.sqlalchemy.embedding_repository import (
|
|
44
38
|
SqlAlchemyEmbeddingRepository,
|
|
45
39
|
)
|
|
@@ -63,17 +57,9 @@ def create_code_indexing_application_service(
|
|
|
63
57
|
|
|
64
58
|
# Create infrastructure services
|
|
65
59
|
language_detector = FileSystemLanguageDetectionService(language_map)
|
|
66
|
-
query_provider = create_snippet_query_provider()
|
|
67
60
|
|
|
68
|
-
# Create snippet extractors
|
|
69
|
-
method_extractor = TreeSitterSnippetExtractor(query_provider)
|
|
70
|
-
|
|
71
|
-
snippet_extractors = {
|
|
72
|
-
SnippetExtractionStrategy.METHOD_BASED: method_extractor,
|
|
73
|
-
}
|
|
74
61
|
index_domain_service = IndexDomainService(
|
|
75
62
|
language_detector=language_detector,
|
|
76
|
-
snippet_extractors=snippet_extractors,
|
|
77
63
|
enrichment_service=enrichment_service,
|
|
78
64
|
clone_dir=app_context.get_clone_dir(),
|
|
79
65
|
)
|
|
@@ -136,17 +122,9 @@ def create_fast_test_code_indexing_application_service(
|
|
|
136
122
|
|
|
137
123
|
# Create infrastructure services
|
|
138
124
|
language_detector = FileSystemLanguageDetectionService(language_map)
|
|
139
|
-
query_provider = create_snippet_query_provider()
|
|
140
|
-
|
|
141
|
-
# Create snippet extractors
|
|
142
|
-
method_extractor = TreeSitterSnippetExtractor(query_provider)
|
|
143
125
|
|
|
144
|
-
snippet_extractors = {
|
|
145
|
-
SnippetExtractionStrategy.METHOD_BASED: method_extractor,
|
|
146
|
-
}
|
|
147
126
|
index_domain_service = IndexDomainService(
|
|
148
127
|
language_detector=language_detector,
|
|
149
|
-
snippet_extractors=snippet_extractors,
|
|
150
128
|
enrichment_service=enrichment_service,
|
|
151
129
|
clone_dir=app_context.get_clone_dir(),
|
|
152
130
|
)
|
|
@@ -100,6 +100,11 @@ class CodeIndexingApplicationService:
|
|
|
100
100
|
self.log.info("No new changes to index", index_id=index.id)
|
|
101
101
|
return
|
|
102
102
|
|
|
103
|
+
# Delete the old snippets from the files that have changed
|
|
104
|
+
await self.index_repository.delete_snippets_by_file_ids(
|
|
105
|
+
[file.id for file in index.source.working_copy.changed_files() if file.id]
|
|
106
|
+
)
|
|
107
|
+
|
|
103
108
|
# Extract and create snippets (domain service handles progress)
|
|
104
109
|
self.log.info("Creating snippets for files", index_id=index.id)
|
|
105
110
|
index = await self.index_domain_service.extract_snippets_from_index(
|
|
@@ -115,6 +120,9 @@ class CodeIndexingApplicationService:
|
|
|
115
120
|
msg = f"Index {index.id} not found after snippet extraction"
|
|
116
121
|
raise ValueError(msg)
|
|
117
122
|
index = flushed_index
|
|
123
|
+
if len(index.snippets) == 0:
|
|
124
|
+
self.log.info("No snippets to index after extraction", index_id=index.id)
|
|
125
|
+
return
|
|
118
126
|
|
|
119
127
|
# Create BM25 index
|
|
120
128
|
self.log.info("Creating keyword index")
|
|
@@ -154,8 +162,8 @@ class CodeIndexingApplicationService:
|
|
|
154
162
|
# Apply filters if provided
|
|
155
163
|
filtered_snippet_ids: list[int] | None = None
|
|
156
164
|
if request.filters:
|
|
157
|
-
# Use domain service for filtering
|
|
158
|
-
prefilter_request = replace(request, top_k=
|
|
165
|
+
# Use domain service for filtering (use large top_k for pre-filtering)
|
|
166
|
+
prefilter_request = replace(request, top_k=10000)
|
|
159
167
|
snippet_results = await self.index_query_service.search_snippets(
|
|
160
168
|
prefilter_request
|
|
161
169
|
)
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""Service for scheduling periodic sync operations."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from contextlib import suppress
|
|
6
|
+
|
|
7
|
+
import structlog
|
|
8
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
9
|
+
|
|
10
|
+
from kodit.application.factories.code_indexing_factory import (
|
|
11
|
+
create_code_indexing_application_service,
|
|
12
|
+
)
|
|
13
|
+
from kodit.config import AppContext
|
|
14
|
+
from kodit.domain.services.index_query_service import IndexQueryService
|
|
15
|
+
from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
|
|
16
|
+
from kodit.infrastructure.sqlalchemy.index_repository import SqlAlchemyIndexRepository
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SyncSchedulerService:
|
|
20
|
+
"""Service for scheduling periodic sync operations."""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
app_context: AppContext,
|
|
25
|
+
session_factory: Callable[[], AsyncSession],
|
|
26
|
+
) -> None:
|
|
27
|
+
"""Initialize the sync scheduler service."""
|
|
28
|
+
self.app_context = app_context
|
|
29
|
+
self.session_factory = session_factory
|
|
30
|
+
self.log = structlog.get_logger(__name__)
|
|
31
|
+
self._sync_task: asyncio.Task | None = None
|
|
32
|
+
self._shutdown_event = asyncio.Event()
|
|
33
|
+
|
|
34
|
+
def start_periodic_sync(self, interval_seconds: float = 1800) -> None:
|
|
35
|
+
"""Start periodic sync of all indexes."""
|
|
36
|
+
self.log.info("Starting periodic sync", interval_seconds=interval_seconds)
|
|
37
|
+
|
|
38
|
+
self._sync_task = asyncio.create_task(self._sync_loop(interval_seconds))
|
|
39
|
+
|
|
40
|
+
async def stop_periodic_sync(self) -> None:
|
|
41
|
+
"""Stop the periodic sync task."""
|
|
42
|
+
self.log.info("Stopping periodic sync")
|
|
43
|
+
self._shutdown_event.set()
|
|
44
|
+
|
|
45
|
+
if self._sync_task and not self._sync_task.done():
|
|
46
|
+
self._sync_task.cancel()
|
|
47
|
+
with suppress(asyncio.CancelledError):
|
|
48
|
+
await self._sync_task
|
|
49
|
+
|
|
50
|
+
async def _sync_loop(self, interval_seconds: float) -> None:
|
|
51
|
+
"""Run the sync loop at the specified interval."""
|
|
52
|
+
while not self._shutdown_event.is_set():
|
|
53
|
+
try:
|
|
54
|
+
await self._perform_sync()
|
|
55
|
+
except Exception as e:
|
|
56
|
+
self.log.exception("Sync operation failed", error=e)
|
|
57
|
+
|
|
58
|
+
# Wait for the interval or until shutdown
|
|
59
|
+
try:
|
|
60
|
+
await asyncio.wait_for(
|
|
61
|
+
self._shutdown_event.wait(), timeout=interval_seconds
|
|
62
|
+
)
|
|
63
|
+
# If we reach here, shutdown was requested
|
|
64
|
+
break
|
|
65
|
+
except TimeoutError:
|
|
66
|
+
# Continue to next sync cycle
|
|
67
|
+
continue
|
|
68
|
+
|
|
69
|
+
async def _perform_sync(self) -> None:
|
|
70
|
+
"""Perform a sync operation on all indexes."""
|
|
71
|
+
self.log.info("Starting sync operation")
|
|
72
|
+
|
|
73
|
+
async with self.session_factory() as session:
|
|
74
|
+
# Create services
|
|
75
|
+
service = create_code_indexing_application_service(
|
|
76
|
+
app_context=self.app_context,
|
|
77
|
+
session=session,
|
|
78
|
+
)
|
|
79
|
+
index_query_service = IndexQueryService(
|
|
80
|
+
index_repository=SqlAlchemyIndexRepository(session=session),
|
|
81
|
+
fusion_service=ReciprocalRankFusionService(),
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Get all existing indexes
|
|
85
|
+
all_indexes = await index_query_service.list_indexes()
|
|
86
|
+
|
|
87
|
+
if not all_indexes:
|
|
88
|
+
self.log.info("No indexes found to sync")
|
|
89
|
+
return
|
|
90
|
+
|
|
91
|
+
self.log.info("Syncing indexes", count=len(all_indexes))
|
|
92
|
+
|
|
93
|
+
success_count = 0
|
|
94
|
+
failure_count = 0
|
|
95
|
+
|
|
96
|
+
# Sync each index
|
|
97
|
+
for index in all_indexes:
|
|
98
|
+
try:
|
|
99
|
+
self.log.info(
|
|
100
|
+
"Syncing index",
|
|
101
|
+
index_id=index.id,
|
|
102
|
+
source=str(index.source.working_copy.remote_uri),
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
await service.run_index(index, progress_callback=None)
|
|
106
|
+
success_count += 1
|
|
107
|
+
|
|
108
|
+
self.log.info(
|
|
109
|
+
"Index sync completed",
|
|
110
|
+
index_id=index.id,
|
|
111
|
+
source=str(index.source.working_copy.remote_uri),
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
except Exception as e:
|
|
115
|
+
failure_count += 1
|
|
116
|
+
self.log.exception(
|
|
117
|
+
"Index sync failed",
|
|
118
|
+
index_id=index.id,
|
|
119
|
+
source=str(index.source.working_copy.remote_uri),
|
|
120
|
+
error=e,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
self.log.info(
|
|
124
|
+
"Sync operation completed",
|
|
125
|
+
total=len(all_indexes),
|
|
126
|
+
success=success_count,
|
|
127
|
+
failures=failure_count,
|
|
128
|
+
)
|
kodit/cli.py
CHANGED
|
@@ -63,11 +63,105 @@ def cli(
|
|
|
63
63
|
ctx.obj = config
|
|
64
64
|
|
|
65
65
|
|
|
66
|
+
async def _handle_auto_index(
|
|
67
|
+
app_context: AppContext,
|
|
68
|
+
sources: list[str], # noqa: ARG001
|
|
69
|
+
) -> list[str]:
|
|
70
|
+
"""Handle auto-index option and return sources to process."""
|
|
71
|
+
log = structlog.get_logger(__name__)
|
|
72
|
+
log.info("Auto-indexing configuration", config=app_context.auto_indexing)
|
|
73
|
+
if not app_context.auto_indexing or not app_context.auto_indexing.sources:
|
|
74
|
+
click.echo("No auto-index sources configured.")
|
|
75
|
+
return []
|
|
76
|
+
auto_sources = app_context.auto_indexing.sources
|
|
77
|
+
click.echo(f"Auto-indexing {len(auto_sources)} configured sources...")
|
|
78
|
+
return [source.uri for source in auto_sources]
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
async def _handle_sync(
|
|
82
|
+
service: Any,
|
|
83
|
+
index_query_service: IndexQueryService,
|
|
84
|
+
sources: list[str],
|
|
85
|
+
) -> None:
|
|
86
|
+
"""Handle sync operation."""
|
|
87
|
+
log = structlog.get_logger(__name__)
|
|
88
|
+
log_event("kodit.cli.index.sync")
|
|
89
|
+
|
|
90
|
+
# Get all existing indexes
|
|
91
|
+
all_indexes = await index_query_service.list_indexes()
|
|
92
|
+
|
|
93
|
+
if not all_indexes:
|
|
94
|
+
click.echo("No existing indexes found to sync.")
|
|
95
|
+
return
|
|
96
|
+
|
|
97
|
+
# Filter indexes if specific sources are provided
|
|
98
|
+
indexes_to_sync = all_indexes
|
|
99
|
+
if sources:
|
|
100
|
+
# Filter indexes that match the provided sources
|
|
101
|
+
source_uris = set(sources)
|
|
102
|
+
indexes_to_sync = [
|
|
103
|
+
index for index in all_indexes
|
|
104
|
+
if str(index.source.working_copy.remote_uri) in source_uris
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
if not indexes_to_sync:
|
|
108
|
+
click.echo(
|
|
109
|
+
f"No indexes found for the specified sources: {', '.join(sources)}"
|
|
110
|
+
)
|
|
111
|
+
return
|
|
112
|
+
|
|
113
|
+
click.echo(f"Syncing {len(indexes_to_sync)} indexes...")
|
|
114
|
+
|
|
115
|
+
# Sync each index
|
|
116
|
+
for index in indexes_to_sync:
|
|
117
|
+
click.echo(f"Syncing: {index.source.working_copy.remote_uri}")
|
|
118
|
+
|
|
119
|
+
# Create progress callback for this sync operation
|
|
120
|
+
progress_callback = create_multi_stage_progress_callback()
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
await service.run_index(index, progress_callback)
|
|
124
|
+
click.echo(f"✓ Sync completed: {index.source.working_copy.remote_uri}")
|
|
125
|
+
except Exception as e:
|
|
126
|
+
log.exception("Sync failed", index_id=index.id, error=e)
|
|
127
|
+
click.echo(
|
|
128
|
+
f"✗ Sync failed: {index.source.working_copy.remote_uri} - {e}"
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
async def _handle_list_indexes(index_query_service: IndexQueryService) -> None:
|
|
133
|
+
"""Handle listing all indexes."""
|
|
134
|
+
log_event("kodit.cli.index.list")
|
|
135
|
+
# No source specified, list all indexes
|
|
136
|
+
indexes = await index_query_service.list_indexes()
|
|
137
|
+
headers: list[str | Cell] = [
|
|
138
|
+
"ID",
|
|
139
|
+
"Created At",
|
|
140
|
+
"Updated At",
|
|
141
|
+
"Source",
|
|
142
|
+
"Num Snippets",
|
|
143
|
+
]
|
|
144
|
+
data = [
|
|
145
|
+
[
|
|
146
|
+
index.id,
|
|
147
|
+
index.created_at,
|
|
148
|
+
index.updated_at,
|
|
149
|
+
index.source.working_copy.remote_uri,
|
|
150
|
+
len(index.source.working_copy.files),
|
|
151
|
+
]
|
|
152
|
+
for index in indexes
|
|
153
|
+
]
|
|
154
|
+
click.echo(Table(headers=headers, data=data))
|
|
155
|
+
|
|
156
|
+
|
|
66
157
|
@cli.command()
|
|
67
158
|
@click.argument("sources", nargs=-1)
|
|
68
159
|
@click.option(
|
|
69
160
|
"--auto-index", is_flag=True, help="Index all configured auto-index sources"
|
|
70
161
|
)
|
|
162
|
+
@click.option(
|
|
163
|
+
"--sync", is_flag=True, help="Sync existing indexes with their remotes"
|
|
164
|
+
)
|
|
71
165
|
@with_app_context
|
|
72
166
|
@with_session
|
|
73
167
|
async def index(
|
|
@@ -76,8 +170,9 @@ async def index(
|
|
|
76
170
|
sources: list[str],
|
|
77
171
|
*, # Force keyword-only arguments
|
|
78
172
|
auto_index: bool,
|
|
173
|
+
sync: bool,
|
|
79
174
|
) -> None:
|
|
80
|
-
"""List indexes,
|
|
175
|
+
"""List indexes, index data sources, or sync existing indexes."""
|
|
81
176
|
log = structlog.get_logger(__name__)
|
|
82
177
|
service = create_code_indexing_application_service(
|
|
83
178
|
app_context=app_context,
|
|
@@ -89,36 +184,16 @@ async def index(
|
|
|
89
184
|
)
|
|
90
185
|
|
|
91
186
|
if auto_index:
|
|
92
|
-
|
|
93
|
-
if not
|
|
94
|
-
click.echo("No auto-index sources configured.")
|
|
187
|
+
sources = await _handle_auto_index(app_context, sources)
|
|
188
|
+
if not sources:
|
|
95
189
|
return
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
190
|
+
|
|
191
|
+
if sync:
|
|
192
|
+
await _handle_sync(service, index_query_service, sources)
|
|
193
|
+
return
|
|
99
194
|
|
|
100
195
|
if not sources:
|
|
101
|
-
|
|
102
|
-
# No source specified, list all indexes
|
|
103
|
-
indexes = await index_query_service.list_indexes()
|
|
104
|
-
headers: list[str | Cell] = [
|
|
105
|
-
"ID",
|
|
106
|
-
"Created At",
|
|
107
|
-
"Updated At",
|
|
108
|
-
"Source",
|
|
109
|
-
"Num Snippets",
|
|
110
|
-
]
|
|
111
|
-
data = [
|
|
112
|
-
[
|
|
113
|
-
index.id,
|
|
114
|
-
index.created_at,
|
|
115
|
-
index.updated_at,
|
|
116
|
-
index.source.working_copy.remote_uri,
|
|
117
|
-
len(index.source.working_copy.files),
|
|
118
|
-
]
|
|
119
|
-
for index in indexes
|
|
120
|
-
]
|
|
121
|
-
click.echo(Table(headers=headers, data=data))
|
|
196
|
+
await _handle_list_indexes(index_query_service)
|
|
122
197
|
return
|
|
123
198
|
# Handle source indexing
|
|
124
199
|
for source in sources:
|
kodit/config.py
CHANGED
|
@@ -81,6 +81,18 @@ class AutoIndexingConfig(BaseModel):
|
|
|
81
81
|
return v
|
|
82
82
|
|
|
83
83
|
|
|
84
|
+
class PeriodicSyncConfig(BaseModel):
|
|
85
|
+
"""Configuration for periodic/scheduled syncing."""
|
|
86
|
+
|
|
87
|
+
enabled: bool = Field(default=True, description="Enable periodic sync")
|
|
88
|
+
interval_seconds: float = Field(
|
|
89
|
+
default=1800, description="Interval between automatic syncs in seconds"
|
|
90
|
+
)
|
|
91
|
+
retry_attempts: int = Field(
|
|
92
|
+
default=3, description="Number of retry attempts for failed syncs"
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
84
96
|
class CustomAutoIndexingEnvSource(EnvSettingsSource):
|
|
85
97
|
"""Custom environment source for parsing AutoIndexingConfig."""
|
|
86
98
|
|
|
@@ -173,6 +185,9 @@ class AppContext(BaseSettings):
|
|
|
173
185
|
auto_indexing: AutoIndexingConfig | None = Field(
|
|
174
186
|
default=AutoIndexingConfig(), description="Auto-indexing configuration"
|
|
175
187
|
)
|
|
188
|
+
periodic_sync: PeriodicSyncConfig = Field(
|
|
189
|
+
default=PeriodicSyncConfig(), description="Periodic sync configuration"
|
|
190
|
+
)
|
|
176
191
|
_db: Database | None = None
|
|
177
192
|
|
|
178
193
|
def model_post_init(self, _: Any) -> None:
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""Pure domain service for Index aggregate operations."""
|
|
2
2
|
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
|
-
from collections.abc import Mapping
|
|
5
4
|
from pathlib import Path
|
|
6
5
|
|
|
7
6
|
import structlog
|
|
@@ -13,14 +12,13 @@ from kodit.domain.services.enrichment_service import EnrichmentDomainService
|
|
|
13
12
|
from kodit.domain.value_objects import (
|
|
14
13
|
EnrichmentIndexRequest,
|
|
15
14
|
EnrichmentRequest,
|
|
16
|
-
|
|
17
|
-
SnippetExtractionResult,
|
|
18
|
-
SnippetExtractionStrategy,
|
|
15
|
+
LanguageMapping,
|
|
19
16
|
)
|
|
20
17
|
from kodit.infrastructure.cloning.git.working_copy import GitWorkingCopyProvider
|
|
21
18
|
from kodit.infrastructure.cloning.metadata import FileMetadataExtractor
|
|
22
19
|
from kodit.infrastructure.git.git_utils import is_valid_clone_target
|
|
23
20
|
from kodit.infrastructure.ignore.ignore_pattern_provider import GitIgnorePatternProvider
|
|
21
|
+
from kodit.infrastructure.slicing.slicer import Slicer
|
|
24
22
|
from kodit.reporting import Reporter
|
|
25
23
|
from kodit.utils.path_utils import path_from_uri
|
|
26
24
|
|
|
@@ -33,14 +31,6 @@ class LanguageDetectionService(ABC):
|
|
|
33
31
|
"""Detect the programming language of a file."""
|
|
34
32
|
|
|
35
33
|
|
|
36
|
-
class SnippetExtractor(ABC):
|
|
37
|
-
"""Abstract interface for snippet extraction."""
|
|
38
|
-
|
|
39
|
-
@abstractmethod
|
|
40
|
-
async def extract(self, file_path: Path, language: str) -> list[str]:
|
|
41
|
-
"""Extract snippets from a file."""
|
|
42
|
-
|
|
43
|
-
|
|
44
34
|
class IndexDomainService:
|
|
45
35
|
"""Pure domain service for Index aggregate operations.
|
|
46
36
|
|
|
@@ -54,14 +44,12 @@ class IndexDomainService:
|
|
|
54
44
|
def __init__(
|
|
55
45
|
self,
|
|
56
46
|
language_detector: LanguageDetectionService,
|
|
57
|
-
snippet_extractors: Mapping[SnippetExtractionStrategy, SnippetExtractor],
|
|
58
47
|
enrichment_service: EnrichmentDomainService,
|
|
59
48
|
clone_dir: Path,
|
|
60
49
|
) -> None:
|
|
61
50
|
"""Initialize the index domain service."""
|
|
62
51
|
self._clone_dir = clone_dir
|
|
63
52
|
self._language_detector = language_detector
|
|
64
|
-
self._snippet_extractors = snippet_extractors
|
|
65
53
|
self._enrichment_service = enrichment_service
|
|
66
54
|
self.log = structlog.get_logger(__name__)
|
|
67
55
|
|
|
@@ -99,7 +87,6 @@ class IndexDomainService:
|
|
|
99
87
|
async def extract_snippets_from_index(
|
|
100
88
|
self,
|
|
101
89
|
index: domain_entities.Index,
|
|
102
|
-
strategy: SnippetExtractionStrategy = SnippetExtractionStrategy.METHOD_BASED,
|
|
103
90
|
progress_callback: ProgressCallback | None = None,
|
|
104
91
|
) -> domain_entities.Index:
|
|
105
92
|
"""Extract code snippets from files in the index."""
|
|
@@ -109,46 +96,40 @@ class IndexDomainService:
|
|
|
109
96
|
"Extracting snippets",
|
|
110
97
|
index_id=index.id,
|
|
111
98
|
file_count=file_count,
|
|
112
|
-
strategy=strategy.value,
|
|
113
99
|
)
|
|
114
100
|
|
|
115
101
|
# Only create snippets for files that have been added or modified
|
|
116
102
|
files = index.source.working_copy.changed_files()
|
|
117
103
|
index.delete_snippets_for_files(files)
|
|
118
104
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
new_snippets = []
|
|
125
|
-
for i, domain_file in enumerate(files, 1):
|
|
105
|
+
# Create a set of languages to extract snippets for
|
|
106
|
+
extensions = {file.extension() for file in files}
|
|
107
|
+
languages = []
|
|
108
|
+
for ext in extensions:
|
|
126
109
|
try:
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
)
|
|
131
|
-
result = await self._extract_snippets(request)
|
|
132
|
-
for snippet_text in result.snippets:
|
|
133
|
-
snippet = domain_entities.Snippet(
|
|
134
|
-
derives_from=[domain_file],
|
|
135
|
-
)
|
|
136
|
-
snippet.add_original_content(snippet_text, result.language)
|
|
137
|
-
new_snippets.append(snippet)
|
|
138
|
-
|
|
139
|
-
except (OSError, ValueError) as e:
|
|
140
|
-
self.log.debug(
|
|
141
|
-
"Skipping file for snippet extraction",
|
|
142
|
-
file_uri=str(domain_file.uri),
|
|
143
|
-
error=str(e),
|
|
144
|
-
)
|
|
110
|
+
languages.append(LanguageMapping.get_language_for_extension(ext))
|
|
111
|
+
except ValueError as e:
|
|
112
|
+
self.log.info("Skipping", error=str(e))
|
|
145
113
|
continue
|
|
146
114
|
|
|
115
|
+
reporter = Reporter(self.log, progress_callback)
|
|
116
|
+
await reporter.start(
|
|
117
|
+
"extract_snippets",
|
|
118
|
+
len(files) * len(languages),
|
|
119
|
+
"Extracting code snippets...",
|
|
120
|
+
)
|
|
121
|
+
# Calculate snippets for each language
|
|
122
|
+
slicer = Slicer()
|
|
123
|
+
for i, language in enumerate(languages):
|
|
147
124
|
await reporter.step(
|
|
148
|
-
"extract_snippets",
|
|
125
|
+
"extract_snippets",
|
|
126
|
+
len(files) * (i + 1),
|
|
127
|
+
len(files) * len(languages),
|
|
128
|
+
"Extracting code snippets...",
|
|
149
129
|
)
|
|
130
|
+
s = slicer.extract_snippets(files, language=language)
|
|
131
|
+
index.snippets.extend(s)
|
|
150
132
|
|
|
151
|
-
index.snippets.extend(new_snippets)
|
|
152
133
|
await reporter.done("extract_snippets")
|
|
153
134
|
return index
|
|
154
135
|
|
|
@@ -187,28 +168,6 @@ class IndexDomainService:
|
|
|
187
168
|
await reporter.done("enrichment")
|
|
188
169
|
return list(snippet_map.values())
|
|
189
170
|
|
|
190
|
-
async def _extract_snippets(
|
|
191
|
-
self, request: SnippetExtractionRequest
|
|
192
|
-
) -> SnippetExtractionResult:
|
|
193
|
-
# Domain logic: validate file exists
|
|
194
|
-
if not request.file_path.exists():
|
|
195
|
-
raise ValueError(f"File does not exist: {request.file_path}")
|
|
196
|
-
|
|
197
|
-
# Domain logic: detect language
|
|
198
|
-
language = await self._language_detector.detect_language(request.file_path)
|
|
199
|
-
|
|
200
|
-
# Domain logic: choose strategy and extractor
|
|
201
|
-
if request.strategy not in self._snippet_extractors:
|
|
202
|
-
raise ValueError(f"Unsupported extraction strategy: {request.strategy}")
|
|
203
|
-
|
|
204
|
-
extractor = self._snippet_extractors[request.strategy]
|
|
205
|
-
snippets = await extractor.extract(request.file_path, language)
|
|
206
|
-
|
|
207
|
-
# Domain logic: filter out empty snippets
|
|
208
|
-
filtered_snippets = [snippet for snippet in snippets if snippet.strip()]
|
|
209
|
-
|
|
210
|
-
return SnippetExtractionResult(snippets=filtered_snippets, language=language)
|
|
211
|
-
|
|
212
171
|
def sanitize_uri(
|
|
213
172
|
self, uri_or_path_like: str
|
|
214
173
|
) -> tuple[AnyUrl, domain_entities.SourceType]:
|
|
@@ -297,7 +256,7 @@ class IndexDomainService:
|
|
|
297
256
|
await metadata_extractor.extract(file_path=file_path)
|
|
298
257
|
)
|
|
299
258
|
except (OSError, ValueError) as e:
|
|
300
|
-
self.log.
|
|
259
|
+
self.log.debug("Skipping file", file=str(file_path), error=str(e))
|
|
301
260
|
continue
|
|
302
261
|
|
|
303
262
|
# Finally check if there are any modified files
|
kodit/domain/value_objects.py
CHANGED
|
@@ -134,14 +134,6 @@ class SearchType(Enum):
|
|
|
134
134
|
HYBRID = "hybrid"
|
|
135
135
|
|
|
136
136
|
|
|
137
|
-
@dataclass
|
|
138
|
-
class SnippetExtractionResult:
|
|
139
|
-
"""Domain model for snippet extraction result."""
|
|
140
|
-
|
|
141
|
-
snippets: list[str]
|
|
142
|
-
language: str
|
|
143
|
-
|
|
144
|
-
|
|
145
137
|
@dataclass
|
|
146
138
|
class Document:
|
|
147
139
|
"""Generic document model for indexing."""
|
|
@@ -640,20 +632,6 @@ class SnippetQuery(BaseModel):
|
|
|
640
632
|
top_k: int = 10
|
|
641
633
|
|
|
642
634
|
|
|
643
|
-
class SnippetExtractionStrategy(str, Enum):
|
|
644
|
-
"""Different strategies for extracting snippets from files."""
|
|
645
|
-
|
|
646
|
-
METHOD_BASED = "method_based"
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
@dataclass
|
|
650
|
-
class SnippetExtractionRequest:
|
|
651
|
-
"""Domain model for snippet extraction request."""
|
|
652
|
-
|
|
653
|
-
file_path: Path
|
|
654
|
-
strategy: SnippetExtractionStrategy = SnippetExtractionStrategy.METHOD_BASED
|
|
655
|
-
|
|
656
|
-
|
|
657
635
|
class FileProcessingStatus(IntEnum):
|
|
658
636
|
"""File processing status."""
|
|
659
637
|
|
|
@@ -661,3 +639,13 @@ class FileProcessingStatus(IntEnum):
|
|
|
661
639
|
ADDED = 1
|
|
662
640
|
MODIFIED = 2
|
|
663
641
|
DELETED = 3
|
|
642
|
+
|
|
643
|
+
|
|
644
|
+
@dataclass
|
|
645
|
+
class FunctionDefinition:
|
|
646
|
+
"""Cached function definition."""
|
|
647
|
+
|
|
648
|
+
name: str
|
|
649
|
+
qualified_name: str
|
|
650
|
+
start_byte: int
|
|
651
|
+
end_byte: int
|