kodit 0.2.9__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +36 -1
- kodit/application/services/code_indexing_application_service.py +5 -6
- kodit/cli.py +20 -27
- kodit/config.py +101 -6
- kodit/domain/services/indexing_service.py +4 -3
- kodit/domain/services/snippet_service.py +16 -3
- kodit/domain/value_objects.py +35 -0
- kodit/infrastructure/indexing/auto_indexing_service.py +84 -0
- kodit/infrastructure/indexing/index_repository.py +6 -24
- kodit/mcp.py +1 -1
- {kodit-0.2.9.dist-info → kodit-0.3.1.dist-info}/METADATA +14 -4
- {kodit-0.2.9.dist-info → kodit-0.3.1.dist-info}/RECORD +16 -15
- {kodit-0.2.9.dist-info → kodit-0.3.1.dist-info}/WHEEL +0 -0
- {kodit-0.2.9.dist-info → kodit-0.3.1.dist-info}/entry_points.txt +0 -0
- {kodit-0.2.9.dist-info → kodit-0.3.1.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py
CHANGED
kodit/app.py
CHANGED
|
@@ -1,14 +1,49 @@
|
|
|
1
1
|
"""FastAPI application for kodit API."""
|
|
2
2
|
|
|
3
|
+
from collections.abc import AsyncIterator
|
|
4
|
+
from contextlib import asynccontextmanager
|
|
5
|
+
|
|
3
6
|
from asgi_correlation_id import CorrelationIdMiddleware
|
|
4
7
|
from fastapi import FastAPI
|
|
5
8
|
|
|
9
|
+
from kodit.config import AppContext
|
|
10
|
+
from kodit.infrastructure.indexing.auto_indexing_service import AutoIndexingService
|
|
6
11
|
from kodit.mcp import mcp
|
|
7
12
|
from kodit.middleware import ASGICancelledErrorMiddleware, logging_middleware
|
|
8
13
|
|
|
14
|
+
# Global auto-indexing service
|
|
15
|
+
_auto_indexing_service: AutoIndexingService | None = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@asynccontextmanager
|
|
19
|
+
async def app_lifespan(_: FastAPI) -> AsyncIterator[None]:
|
|
20
|
+
"""Manage application lifespan for auto-indexing."""
|
|
21
|
+
global _auto_indexing_service # noqa: PLW0603
|
|
22
|
+
# Start auto-indexing service
|
|
23
|
+
app_context = AppContext()
|
|
24
|
+
db = await app_context.get_db()
|
|
25
|
+
_auto_indexing_service = AutoIndexingService(
|
|
26
|
+
app_context=app_context,
|
|
27
|
+
session_factory=db.session_factory,
|
|
28
|
+
)
|
|
29
|
+
await _auto_indexing_service.start_background_indexing()
|
|
30
|
+
yield
|
|
31
|
+
if _auto_indexing_service:
|
|
32
|
+
await _auto_indexing_service.stop()
|
|
33
|
+
|
|
34
|
+
|
|
9
35
|
# See https://gofastmcp.com/deployment/asgi#fastapi-integration
|
|
10
36
|
mcp_app = mcp.sse_app()
|
|
11
|
-
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@asynccontextmanager
|
|
40
|
+
async def combined_lifespan(app: FastAPI) -> AsyncIterator[None]:
|
|
41
|
+
"""Combine app and MCP lifespans."""
|
|
42
|
+
async with app_lifespan(app), mcp_app.router.lifespan_context(app):
|
|
43
|
+
yield
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
app = FastAPI(title="kodit API", lifespan=combined_lifespan)
|
|
12
47
|
|
|
13
48
|
# Add middleware
|
|
14
49
|
app.middleware("http")(logging_middleware)
|
|
@@ -27,7 +27,6 @@ from kodit.domain.value_objects import (
|
|
|
27
27
|
MultiSearchResult,
|
|
28
28
|
SearchRequest,
|
|
29
29
|
SearchResult,
|
|
30
|
-
SnippetListItem,
|
|
31
30
|
)
|
|
32
31
|
from kodit.log import log_event
|
|
33
32
|
from kodit.reporting import Reporter
|
|
@@ -225,17 +224,17 @@ class CodeIndexingApplicationService:
|
|
|
225
224
|
|
|
226
225
|
return [
|
|
227
226
|
MultiSearchResult(
|
|
228
|
-
id=snippet
|
|
229
|
-
uri=file
|
|
230
|
-
content=snippet
|
|
227
|
+
id=result.snippet.id,
|
|
228
|
+
uri=result.file.uri,
|
|
229
|
+
content=result.snippet.content,
|
|
231
230
|
original_scores=fr.original_scores,
|
|
232
231
|
)
|
|
233
|
-
for
|
|
232
|
+
for result, fr in zip(search_results, final_results, strict=True)
|
|
234
233
|
]
|
|
235
234
|
|
|
236
235
|
async def list_snippets(
|
|
237
236
|
self, file_path: str | None = None, source_uri: str | None = None
|
|
238
|
-
) -> list[
|
|
237
|
+
) -> list[MultiSearchResult]:
|
|
239
238
|
"""List snippets with optional filtering."""
|
|
240
239
|
log_event("kodit.index.list_snippets")
|
|
241
240
|
return await self.snippet_domain_service.list_snippets(file_path, source_uri)
|
kodit/cli.py
CHANGED
|
@@ -59,12 +59,17 @@ def cli(
|
|
|
59
59
|
|
|
60
60
|
@cli.command()
|
|
61
61
|
@click.argument("sources", nargs=-1)
|
|
62
|
+
@click.option(
|
|
63
|
+
"--auto-index", is_flag=True, help="Index all configured auto-index sources"
|
|
64
|
+
)
|
|
62
65
|
@with_app_context
|
|
63
66
|
@with_session
|
|
64
67
|
async def index(
|
|
65
68
|
session: AsyncSession,
|
|
66
69
|
app_context: AppContext,
|
|
67
70
|
sources: list[str],
|
|
71
|
+
*, # Force keyword-only arguments
|
|
72
|
+
auto_index: bool,
|
|
68
73
|
) -> None:
|
|
69
74
|
"""List indexes, or index data sources."""
|
|
70
75
|
log = structlog.get_logger(__name__)
|
|
@@ -78,6 +83,16 @@ async def index(
|
|
|
78
83
|
source_service=source_service,
|
|
79
84
|
)
|
|
80
85
|
|
|
86
|
+
if auto_index:
|
|
87
|
+
log.info("Auto-indexing configuration", config=app_context.auto_indexing)
|
|
88
|
+
auto_sources = app_context.auto_indexing.sources
|
|
89
|
+
if not auto_sources:
|
|
90
|
+
click.echo("No auto-index sources configured.")
|
|
91
|
+
return
|
|
92
|
+
|
|
93
|
+
click.echo(f"Auto-indexing {len(auto_sources)} configured sources...")
|
|
94
|
+
sources = [source.uri for source in auto_sources]
|
|
95
|
+
|
|
81
96
|
if not sources:
|
|
82
97
|
log_event("kodit.cli.index.list")
|
|
83
98
|
# No source specified, list all indexes
|
|
@@ -245,12 +260,7 @@ async def code( # noqa: PLR0913
|
|
|
245
260
|
return
|
|
246
261
|
|
|
247
262
|
for snippet in snippets:
|
|
248
|
-
click.echo(
|
|
249
|
-
click.echo(f"{snippet.uri}")
|
|
250
|
-
click.echo(f"Original scores: {snippet.original_scores}")
|
|
251
|
-
click.echo(snippet.content)
|
|
252
|
-
click.echo("-" * 80)
|
|
253
|
-
click.echo()
|
|
263
|
+
click.echo(str(snippet))
|
|
254
264
|
|
|
255
265
|
|
|
256
266
|
@search.command()
|
|
@@ -307,12 +317,7 @@ async def keyword( # noqa: PLR0913
|
|
|
307
317
|
return
|
|
308
318
|
|
|
309
319
|
for snippet in snippets:
|
|
310
|
-
click.echo(
|
|
311
|
-
click.echo(f"{snippet.uri}")
|
|
312
|
-
click.echo(f"Original scores: {snippet.original_scores}")
|
|
313
|
-
click.echo(snippet.content)
|
|
314
|
-
click.echo("-" * 80)
|
|
315
|
-
click.echo()
|
|
320
|
+
click.echo(str(snippet))
|
|
316
321
|
|
|
317
322
|
|
|
318
323
|
@search.command()
|
|
@@ -372,12 +377,7 @@ async def text( # noqa: PLR0913
|
|
|
372
377
|
return
|
|
373
378
|
|
|
374
379
|
for snippet in snippets:
|
|
375
|
-
click.echo(
|
|
376
|
-
click.echo(f"{snippet.uri}")
|
|
377
|
-
click.echo(f"Original scores: {snippet.original_scores}")
|
|
378
|
-
click.echo(snippet.content)
|
|
379
|
-
click.echo("-" * 80)
|
|
380
|
-
click.echo()
|
|
380
|
+
click.echo(str(snippet))
|
|
381
381
|
|
|
382
382
|
|
|
383
383
|
@search.command()
|
|
@@ -447,12 +447,7 @@ async def hybrid( # noqa: PLR0913
|
|
|
447
447
|
return
|
|
448
448
|
|
|
449
449
|
for snippet in snippets:
|
|
450
|
-
click.echo(
|
|
451
|
-
click.echo(f"{snippet.uri}")
|
|
452
|
-
click.echo(f"Original scores: {snippet.original_scores}")
|
|
453
|
-
click.echo(snippet.content)
|
|
454
|
-
click.echo("-" * 80)
|
|
455
|
-
click.echo()
|
|
450
|
+
click.echo(str(snippet))
|
|
456
451
|
|
|
457
452
|
|
|
458
453
|
@cli.group()
|
|
@@ -484,9 +479,7 @@ async def snippets(
|
|
|
484
479
|
)
|
|
485
480
|
snippets = await service.list_snippets(file_path=by_path, source_uri=by_source)
|
|
486
481
|
for snippet in snippets:
|
|
487
|
-
click.echo(
|
|
488
|
-
click.echo(f" {snippet.content}")
|
|
489
|
-
click.echo()
|
|
482
|
+
click.echo(str(snippet))
|
|
490
483
|
|
|
491
484
|
|
|
492
485
|
@cli.command()
|
kodit/config.py
CHANGED
|
@@ -8,8 +8,13 @@ from pathlib import Path
|
|
|
8
8
|
from typing import TYPE_CHECKING, Any, Literal, TypeVar
|
|
9
9
|
|
|
10
10
|
import click
|
|
11
|
-
from pydantic import BaseModel, Field
|
|
12
|
-
from pydantic_settings import
|
|
11
|
+
from pydantic import BaseModel, Field, field_validator
|
|
12
|
+
from pydantic_settings import (
|
|
13
|
+
BaseSettings,
|
|
14
|
+
EnvSettingsSource,
|
|
15
|
+
PydanticBaseSettingsSource,
|
|
16
|
+
SettingsConfigDict,
|
|
17
|
+
)
|
|
13
18
|
|
|
14
19
|
if TYPE_CHECKING:
|
|
15
20
|
from collections.abc import Callable, Coroutine
|
|
@@ -37,11 +42,75 @@ class Endpoint(BaseModel):
|
|
|
37
42
|
|
|
38
43
|
|
|
39
44
|
class Search(BaseModel):
|
|
40
|
-
"""Search
|
|
45
|
+
"""Search configuration."""
|
|
41
46
|
|
|
42
47
|
provider: Literal["sqlite", "vectorchord"] = Field(default="sqlite")
|
|
43
48
|
|
|
44
49
|
|
|
50
|
+
class AutoIndexingSource(BaseModel):
|
|
51
|
+
"""Configuration for a single auto-indexing source."""
|
|
52
|
+
|
|
53
|
+
uri: str = Field(description="URI of the source to index (git URL or local path)")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class AutoIndexingConfig(BaseModel):
|
|
57
|
+
"""Configuration for auto-indexing."""
|
|
58
|
+
|
|
59
|
+
sources: list[AutoIndexingSource] = Field(
|
|
60
|
+
default_factory=list, description="List of sources to auto-index"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
@field_validator("sources", mode="before")
|
|
64
|
+
@classmethod
|
|
65
|
+
def parse_sources(cls, v: Any) -> Any:
|
|
66
|
+
"""Parse sources from environment variables or other formats."""
|
|
67
|
+
if v is None:
|
|
68
|
+
return []
|
|
69
|
+
if isinstance(v, list):
|
|
70
|
+
return v
|
|
71
|
+
if isinstance(v, dict):
|
|
72
|
+
# Handle case where env vars are numbered keys like {'0': {'uri': '...'}}
|
|
73
|
+
sources = []
|
|
74
|
+
i = 0
|
|
75
|
+
while str(i) in v:
|
|
76
|
+
source_data = v[str(i)]
|
|
77
|
+
if isinstance(source_data, dict) and "uri" in source_data:
|
|
78
|
+
sources.append(AutoIndexingSource(uri=source_data["uri"]))
|
|
79
|
+
i += 1
|
|
80
|
+
return sources
|
|
81
|
+
return v
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class CustomAutoIndexingEnvSource(EnvSettingsSource):
|
|
85
|
+
"""Custom environment source for parsing AutoIndexingConfig."""
|
|
86
|
+
|
|
87
|
+
def __call__(self) -> dict[str, Any]:
|
|
88
|
+
"""Load settings from env vars with custom auto-indexing parsing."""
|
|
89
|
+
d: dict[str, Any] = {}
|
|
90
|
+
|
|
91
|
+
# First get the standard env vars
|
|
92
|
+
env_vars = super().__call__()
|
|
93
|
+
d.update(env_vars)
|
|
94
|
+
|
|
95
|
+
# Custom parsing for auto-indexing sources
|
|
96
|
+
auto_indexing_sources = []
|
|
97
|
+
i = 0
|
|
98
|
+
while True:
|
|
99
|
+
# Note: env_vars keys are lowercase due to Pydantic Settings normalization
|
|
100
|
+
uri_key = f"auto_indexing_sources_{i}_uri"
|
|
101
|
+
if uri_key in self.env_vars:
|
|
102
|
+
uri_value = self.env_vars[uri_key]
|
|
103
|
+
auto_indexing_sources.append({"uri": uri_value})
|
|
104
|
+
i += 1
|
|
105
|
+
else:
|
|
106
|
+
break
|
|
107
|
+
|
|
108
|
+
if auto_indexing_sources:
|
|
109
|
+
d["auto_indexing"] = {"sources": auto_indexing_sources}
|
|
110
|
+
|
|
111
|
+
return d
|
|
112
|
+
|
|
113
|
+
|
|
45
114
|
class AppContext(BaseSettings):
|
|
46
115
|
"""Global context for the kodit project. Provides a shared state for the app."""
|
|
47
116
|
|
|
@@ -49,10 +118,35 @@ class AppContext(BaseSettings):
|
|
|
49
118
|
env_file=".env",
|
|
50
119
|
env_file_encoding="utf-8",
|
|
51
120
|
env_nested_delimiter="_",
|
|
52
|
-
nested_model_default_partial_update=True,
|
|
53
121
|
env_nested_max_split=1,
|
|
122
|
+
nested_model_default_partial_update=True,
|
|
123
|
+
extra="ignore",
|
|
54
124
|
)
|
|
55
125
|
|
|
126
|
+
@classmethod
|
|
127
|
+
def settings_customise_sources(
|
|
128
|
+
cls,
|
|
129
|
+
settings_cls: type[BaseSettings],
|
|
130
|
+
init_settings: PydanticBaseSettingsSource,
|
|
131
|
+
env_settings: PydanticBaseSettingsSource, # noqa: ARG003
|
|
132
|
+
dotenv_settings: PydanticBaseSettingsSource,
|
|
133
|
+
file_secret_settings: PydanticBaseSettingsSource,
|
|
134
|
+
) -> tuple[PydanticBaseSettingsSource, ...]:
|
|
135
|
+
"""Customize settings sources to use custom auto-indexing parsing."""
|
|
136
|
+
custom_env_settings = CustomAutoIndexingEnvSource(
|
|
137
|
+
settings_cls,
|
|
138
|
+
env_nested_delimiter=settings_cls.model_config.get("env_nested_delimiter"),
|
|
139
|
+
env_ignore_empty=settings_cls.model_config.get("env_ignore_empty", False),
|
|
140
|
+
env_parse_none_str=settings_cls.model_config.get("env_parse_none_str", ""),
|
|
141
|
+
env_parse_enums=settings_cls.model_config.get("env_parse_enums", None),
|
|
142
|
+
)
|
|
143
|
+
return (
|
|
144
|
+
init_settings,
|
|
145
|
+
custom_env_settings,
|
|
146
|
+
dotenv_settings,
|
|
147
|
+
file_secret_settings,
|
|
148
|
+
)
|
|
149
|
+
|
|
56
150
|
data_dir: Path = Field(default=DEFAULT_BASE_DIR)
|
|
57
151
|
db_url: str = Field(default=DEFAULT_DB_URL)
|
|
58
152
|
log_level: str = Field(default=DEFAULT_LOG_LEVEL)
|
|
@@ -76,6 +170,9 @@ class AppContext(BaseSettings):
|
|
|
76
170
|
default_search: Search = Field(
|
|
77
171
|
default=Search(),
|
|
78
172
|
)
|
|
173
|
+
auto_indexing: AutoIndexingConfig | None = Field(
|
|
174
|
+
default=AutoIndexingConfig(), description="Auto-indexing configuration"
|
|
175
|
+
)
|
|
79
176
|
_db: Database | None = None
|
|
80
177
|
|
|
81
178
|
def model_post_init(self, _: Any) -> None:
|
|
@@ -105,8 +202,6 @@ class AppContext(BaseSettings):
|
|
|
105
202
|
|
|
106
203
|
with_app_context = click.make_pass_decorator(AppContext)
|
|
107
204
|
|
|
108
|
-
T = TypeVar("T")
|
|
109
|
-
|
|
110
205
|
|
|
111
206
|
def wrap_async(f: Callable[..., Coroutine[Any, Any, T]]) -> Callable[..., T]:
|
|
112
207
|
"""Decorate async Click commands.
|
|
@@ -8,6 +8,7 @@ from kodit.domain.value_objects import (
|
|
|
8
8
|
FusionResult,
|
|
9
9
|
IndexCreateRequest,
|
|
10
10
|
IndexView,
|
|
11
|
+
SnippetWithFile,
|
|
11
12
|
)
|
|
12
13
|
|
|
13
14
|
|
|
@@ -51,7 +52,7 @@ class IndexRepository(ABC):
|
|
|
51
52
|
"""Update the content of an existing snippet."""
|
|
52
53
|
|
|
53
54
|
@abstractmethod
|
|
54
|
-
async def list_snippets_by_ids(self, ids: list[int]) -> list[
|
|
55
|
+
async def list_snippets_by_ids(self, ids: list[int]) -> list[SnippetWithFile]:
|
|
55
56
|
"""List snippets by IDs."""
|
|
56
57
|
|
|
57
58
|
|
|
@@ -190,14 +191,14 @@ class IndexingDomainService:
|
|
|
190
191
|
"""
|
|
191
192
|
return self.fusion_service.reciprocal_rank_fusion(rankings, k)
|
|
192
193
|
|
|
193
|
-
async def get_snippets_by_ids(self, ids: list[int]) -> list[
|
|
194
|
+
async def get_snippets_by_ids(self, ids: list[int]) -> list[SnippetWithFile]:
|
|
194
195
|
"""Get snippets by IDs.
|
|
195
196
|
|
|
196
197
|
Args:
|
|
197
198
|
ids: List of snippet IDs to retrieve.
|
|
198
199
|
|
|
199
200
|
Returns:
|
|
200
|
-
List of
|
|
201
|
+
List of SnippetWithFile objects containing file and snippet information.
|
|
201
202
|
|
|
202
203
|
"""
|
|
203
204
|
return await self.index_repository.list_snippets_by_ids(ids)
|
|
@@ -14,6 +14,7 @@ from kodit.domain.services.snippet_extraction_service import (
|
|
|
14
14
|
)
|
|
15
15
|
from kodit.domain.value_objects import (
|
|
16
16
|
MultiSearchRequest,
|
|
17
|
+
MultiSearchResult,
|
|
17
18
|
SnippetExtractionRequest,
|
|
18
19
|
SnippetListItem,
|
|
19
20
|
)
|
|
@@ -170,7 +171,7 @@ class SnippetDomainService:
|
|
|
170
171
|
|
|
171
172
|
async def list_snippets(
|
|
172
173
|
self, file_path: str | None = None, source_uri: str | None = None
|
|
173
|
-
) -> list[
|
|
174
|
+
) -> list[MultiSearchResult]:
|
|
174
175
|
"""List snippets with optional filtering.
|
|
175
176
|
|
|
176
177
|
Args:
|
|
@@ -178,10 +179,22 @@ class SnippetDomainService:
|
|
|
178
179
|
source_uri: Optional source URI to filter by
|
|
179
180
|
|
|
180
181
|
Returns:
|
|
181
|
-
List of
|
|
182
|
+
List of search results matching the criteria
|
|
182
183
|
|
|
183
184
|
"""
|
|
184
|
-
|
|
185
|
+
snippet_items = await self.snippet_repository.list_snippets(
|
|
186
|
+
file_path, source_uri
|
|
187
|
+
)
|
|
188
|
+
# Convert SnippetListItem to MultiSearchResult for unified display format
|
|
189
|
+
return [
|
|
190
|
+
MultiSearchResult(
|
|
191
|
+
id=item.id,
|
|
192
|
+
uri=item.source_uri,
|
|
193
|
+
content=item.content,
|
|
194
|
+
original_scores=[],
|
|
195
|
+
)
|
|
196
|
+
for item in snippet_items
|
|
197
|
+
]
|
|
185
198
|
|
|
186
199
|
def _should_process_file(self, file: Any) -> bool:
|
|
187
200
|
"""Check if a file should be processed for snippet extraction.
|
kodit/domain/value_objects.py
CHANGED
|
@@ -182,6 +182,18 @@ class MultiSearchResult:
|
|
|
182
182
|
content: str
|
|
183
183
|
original_scores: list[float]
|
|
184
184
|
|
|
185
|
+
def __str__(self) -> str:
|
|
186
|
+
"""Return formatted string representation for all snippet display."""
|
|
187
|
+
lines = [
|
|
188
|
+
"-" * 80,
|
|
189
|
+
f"ID: {self.id} | {self.uri}",
|
|
190
|
+
f"Original scores: {self.original_scores}",
|
|
191
|
+
self.content,
|
|
192
|
+
"-" * 80,
|
|
193
|
+
"",
|
|
194
|
+
]
|
|
195
|
+
return "\n".join(lines)
|
|
196
|
+
|
|
185
197
|
|
|
186
198
|
@dataclass
|
|
187
199
|
class FusionRequest:
|
|
@@ -289,6 +301,29 @@ class SnippetListItem:
|
|
|
289
301
|
source_uri: str
|
|
290
302
|
|
|
291
303
|
|
|
304
|
+
@dataclass
|
|
305
|
+
class FileInfo:
|
|
306
|
+
"""Domain model for file information."""
|
|
307
|
+
|
|
308
|
+
uri: str
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
@dataclass
|
|
312
|
+
class SnippetInfo:
|
|
313
|
+
"""Domain model for snippet information."""
|
|
314
|
+
|
|
315
|
+
id: int
|
|
316
|
+
content: str
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
@dataclass
|
|
320
|
+
class SnippetWithFile:
|
|
321
|
+
"""Domain model for snippet with associated file information."""
|
|
322
|
+
|
|
323
|
+
file: FileInfo
|
|
324
|
+
snippet: SnippetInfo
|
|
325
|
+
|
|
326
|
+
|
|
292
327
|
class LanguageMapping:
|
|
293
328
|
"""Value object for language-to-extension mappings.
|
|
294
329
|
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Service for automatically indexing configured sources."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from contextlib import suppress
|
|
6
|
+
|
|
7
|
+
import structlog
|
|
8
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
9
|
+
|
|
10
|
+
from kodit.application.factories.code_indexing_factory import (
|
|
11
|
+
create_code_indexing_application_service,
|
|
12
|
+
)
|
|
13
|
+
from kodit.config import AppContext
|
|
14
|
+
from kodit.domain.services.source_service import SourceService
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AutoIndexingService:
|
|
18
|
+
"""Service for automatically indexing configured sources."""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
app_context: AppContext,
|
|
23
|
+
session_factory: Callable[[], AsyncSession],
|
|
24
|
+
) -> None:
|
|
25
|
+
"""Initialize the auto-indexing service."""
|
|
26
|
+
self.app_context = app_context
|
|
27
|
+
self.session_factory = session_factory
|
|
28
|
+
self.log = structlog.get_logger(__name__)
|
|
29
|
+
self._indexing_task: asyncio.Task | None = None
|
|
30
|
+
|
|
31
|
+
async def start_background_indexing(self) -> None:
|
|
32
|
+
"""Start background indexing of configured sources."""
|
|
33
|
+
if (
|
|
34
|
+
not self.app_context.auto_indexing
|
|
35
|
+
or len(self.app_context.auto_indexing.sources) == 0
|
|
36
|
+
):
|
|
37
|
+
self.log.info("Auto-indexing is disabled (no sources configured)")
|
|
38
|
+
return
|
|
39
|
+
|
|
40
|
+
auto_sources = [source.uri for source in self.app_context.auto_indexing.sources]
|
|
41
|
+
self.log.info("Starting background indexing", num_sources=len(auto_sources))
|
|
42
|
+
self._indexing_task = asyncio.create_task(self._index_sources(auto_sources))
|
|
43
|
+
|
|
44
|
+
async def _index_sources(self, sources: list[str]) -> None:
|
|
45
|
+
"""Index all configured sources in the background."""
|
|
46
|
+
async with self.session_factory() as session:
|
|
47
|
+
source_service = SourceService(
|
|
48
|
+
clone_dir=self.app_context.get_clone_dir(),
|
|
49
|
+
session_factory=lambda: session,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
service = create_code_indexing_application_service(
|
|
53
|
+
app_context=self.app_context,
|
|
54
|
+
session=session,
|
|
55
|
+
source_service=source_service,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
for source in sources:
|
|
59
|
+
try:
|
|
60
|
+
self.log.info("Auto-indexing source", source=source)
|
|
61
|
+
|
|
62
|
+
# Create source
|
|
63
|
+
s = await source_service.create(source)
|
|
64
|
+
|
|
65
|
+
# Create index
|
|
66
|
+
index = await service.create_index(s.id)
|
|
67
|
+
|
|
68
|
+
# Run indexing (without progress callback for background mode)
|
|
69
|
+
await service.run_index(index.id, progress_callback=None)
|
|
70
|
+
|
|
71
|
+
self.log.info("Successfully auto-indexed source", source=source)
|
|
72
|
+
|
|
73
|
+
except Exception as exc:
|
|
74
|
+
self.log.exception(
|
|
75
|
+
"Failed to auto-index source", source=source, error=str(exc)
|
|
76
|
+
)
|
|
77
|
+
# Continue with other sources even if one fails
|
|
78
|
+
|
|
79
|
+
async def stop(self) -> None:
|
|
80
|
+
"""Stop background indexing."""
|
|
81
|
+
if self._indexing_task:
|
|
82
|
+
self._indexing_task.cancel()
|
|
83
|
+
with suppress(asyncio.CancelledError):
|
|
84
|
+
await self._indexing_task
|
|
@@ -8,7 +8,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
8
8
|
|
|
9
9
|
from kodit.domain.entities import Embedding, File, Index, Snippet, Source
|
|
10
10
|
from kodit.domain.services.indexing_service import IndexRepository
|
|
11
|
-
from kodit.domain.value_objects import IndexView
|
|
11
|
+
from kodit.domain.value_objects import FileInfo, IndexView, SnippetInfo, SnippetWithFile
|
|
12
12
|
|
|
13
13
|
T = TypeVar("T")
|
|
14
14
|
|
|
@@ -221,14 +221,14 @@ class SQLAlchemyIndexRepository(IndexRepository):
|
|
|
221
221
|
snippet.content = content
|
|
222
222
|
# SQLAlchemy will automatically track this change
|
|
223
223
|
|
|
224
|
-
async def list_snippets_by_ids(self, ids: list[int]) -> list[
|
|
224
|
+
async def list_snippets_by_ids(self, ids: list[int]) -> list[SnippetWithFile]:
|
|
225
225
|
"""List snippets by IDs.
|
|
226
226
|
|
|
227
227
|
Args:
|
|
228
228
|
ids: List of snippet IDs to retrieve.
|
|
229
229
|
|
|
230
230
|
Returns:
|
|
231
|
-
List of
|
|
231
|
+
List of SnippetWithFile objects containing file and snippet information.
|
|
232
232
|
|
|
233
233
|
"""
|
|
234
234
|
query = (
|
|
@@ -241,27 +241,9 @@ class SQLAlchemyIndexRepository(IndexRepository):
|
|
|
241
241
|
# Create a dictionary for O(1) lookup of results by ID
|
|
242
242
|
id_to_result = {}
|
|
243
243
|
for snippet, file in rows.all():
|
|
244
|
-
id_to_result[snippet.id] = (
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
"source_id": file.source_id,
|
|
248
|
-
"mime_type": file.mime_type,
|
|
249
|
-
"uri": file.uri,
|
|
250
|
-
"cloned_path": file.cloned_path,
|
|
251
|
-
"sha256": file.sha256,
|
|
252
|
-
"size_bytes": file.size_bytes,
|
|
253
|
-
"extension": file.extension,
|
|
254
|
-
"created_at": file.created_at,
|
|
255
|
-
"updated_at": file.updated_at,
|
|
256
|
-
},
|
|
257
|
-
{
|
|
258
|
-
"id": snippet.id,
|
|
259
|
-
"file_id": snippet.file_id,
|
|
260
|
-
"index_id": snippet.index_id,
|
|
261
|
-
"content": snippet.content,
|
|
262
|
-
"created_at": snippet.created_at,
|
|
263
|
-
"updated_at": snippet.updated_at,
|
|
264
|
-
},
|
|
244
|
+
id_to_result[snippet.id] = SnippetWithFile(
|
|
245
|
+
file=FileInfo(uri=file.uri),
|
|
246
|
+
snippet=SnippetInfo(id=snippet.id, content=snippet.content)
|
|
265
247
|
)
|
|
266
248
|
|
|
267
249
|
# Check that all IDs are present
|
kodit/mcp.py
CHANGED
|
@@ -203,7 +203,7 @@ async def search( # noqa: PLR0913
|
|
|
203
203
|
|
|
204
204
|
def output_fusion(snippets: list[MultiSearchResult]) -> str:
|
|
205
205
|
"""Fuse the snippets into a single output."""
|
|
206
|
-
return "\n\n".join(
|
|
206
|
+
return "\n\n".join(str(snippet) for snippet in snippets)
|
|
207
207
|
|
|
208
208
|
|
|
209
209
|
@mcp.tool()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kodit
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Code indexing for better AI code generation
|
|
5
5
|
Project-URL: Homepage, https://docs.helixml.tech/kodit/
|
|
6
6
|
Project-URL: Documentation, https://docs.helixml.tech/kodit/
|
|
@@ -83,7 +83,7 @@ If you're an engineer working with AI-powered coding assistants, Kodit helps by
|
|
|
83
83
|
providing relevant and up-to-date examples of your task so that LLMs make less mistakes
|
|
84
84
|
and produce fewer hallucinations.
|
|
85
85
|
|
|
86
|
-
##
|
|
86
|
+
## Features
|
|
87
87
|
|
|
88
88
|
### Codebase Indexing
|
|
89
89
|
|
|
@@ -94,7 +94,11 @@ code. This index is used to build a snippet library, ready for ingestion into an
|
|
|
94
94
|
- Build comprehensive snippet libraries for LLM ingestion
|
|
95
95
|
- Support for multiple codebase types and languages
|
|
96
96
|
- Efficient indexing and search capabilities
|
|
97
|
-
- Privacy first: respects .gitignore and .noindex files
|
|
97
|
+
- Privacy first: respects .gitignore and .noindex files
|
|
98
|
+
- **NEW in 0.3**: Auto-indexing configuration for shared server deployments
|
|
99
|
+
- **NEW in 0.3**: Enhanced Git provider support including Azure DevOps
|
|
100
|
+
- **NEW in 0.3**: Index private repositories via a PAT
|
|
101
|
+
- **NEW in 0.3**: Improved progress monitoring and reporting during indexing
|
|
98
102
|
|
|
99
103
|
### MCP Server
|
|
100
104
|
|
|
@@ -107,6 +111,7 @@ intent. Kodit has been tested to work well with:
|
|
|
107
111
|
- [Cursor](https://docs.helix.ml/kodit/getting-started/integration/#integration-with-cursor)
|
|
108
112
|
- [Cline](https://docs.helix.ml/kodit/getting-started/integration/#integration-with-cline)
|
|
109
113
|
- Please contribute more instructions! ... any other assistant is likely to work ...
|
|
114
|
+
- **New in 0.3**: Filter snippets by source, language, author or timestamp.
|
|
110
115
|
|
|
111
116
|
### Enterprise Ready
|
|
112
117
|
|
|
@@ -127,7 +132,12 @@ Supported providers:
|
|
|
127
132
|
- Secure, private LLM enclave with [Helix](https://helix.ml).
|
|
128
133
|
- Any other OpenAI compatible API
|
|
129
134
|
|
|
130
|
-
|
|
135
|
+
**NEW in 0.3**: Enhanced deployment options:
|
|
136
|
+
|
|
137
|
+
- Docker Compose configurations with VectorChord
|
|
138
|
+
- Kubernetes manifests for production deployments
|
|
139
|
+
|
|
140
|
+
## Quick Start
|
|
131
141
|
|
|
132
142
|
1. [Install Kodit](https://docs.helix.ml/kodit/getting-started/installation/)
|
|
133
143
|
2. [Index codebases](https://docs.helix.ml/kodit/getting-started/quick-start/)
|
|
@@ -1,34 +1,34 @@
|
|
|
1
1
|
kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
|
|
2
2
|
kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
|
|
3
|
-
kodit/_version.py,sha256=
|
|
4
|
-
kodit/app.py,sha256=
|
|
5
|
-
kodit/cli.py,sha256=
|
|
6
|
-
kodit/config.py,sha256=
|
|
3
|
+
kodit/_version.py,sha256=lOWWIGJeBi0KkFopWU_n3GH71C1PsaZ-ZYDfxFkne6c,511
|
|
4
|
+
kodit/app.py,sha256=uv67TE83fZE7wrA7cz-sKosFrAXlKRr1B7fT-X_gMZQ,2103
|
|
5
|
+
kodit/cli.py,sha256=fGk2VMJDrgaj0T9w-97Xh6LVqus6vVehWJUTkjgWtyk,16013
|
|
6
|
+
kodit/config.py,sha256=VUoUi2t2yGhqOtm5MSZuaasNSklH50hfWn6GOrz3jnU,7518
|
|
7
7
|
kodit/database.py,sha256=kI9yBm4uunsgV4-QeVoCBL0wLzU4kYmYv5qZilGnbPE,1740
|
|
8
8
|
kodit/log.py,sha256=sHPHYetlMcKTor2VaFLMyao1_fZ_xhuzqXCAt5F5UMU,8575
|
|
9
|
-
kodit/mcp.py,sha256=
|
|
9
|
+
kodit/mcp.py,sha256=4rvj76SUbO-FfoJgOY31CR5r0y0VxMAKkVKNuKLgwB0,6431
|
|
10
10
|
kodit/middleware.py,sha256=I6FOkqG9-8RH5kR1-0ZoQWfE4qLCB8lZYv8H_OCH29o,2714
|
|
11
11
|
kodit/reporting.py,sha256=icce1ZyiADsA_Qz-mSjgn2H4SSqKuGfLKnw-yrl9nsg,2722
|
|
12
12
|
kodit/application/__init__.py,sha256=mH50wTpgP9dhbKztFsL8Dda9Hi18TSnMVxXtpp4aGOA,35
|
|
13
13
|
kodit/application/factories/__init__.py,sha256=bU5CvEnaBePZ7JbkCOp1MGTNP752bnU2uEqmfy5FdRk,37
|
|
14
14
|
kodit/application/factories/code_indexing_factory.py,sha256=pyGcTmqhBRjw0tDvp5UpG0roBf3ROqYvBcHyvaLZ-qQ,4927
|
|
15
15
|
kodit/application/services/__init__.py,sha256=p5UQNw-H5sxQvs5Etfte93B3cJ1kKW6DNxK34uFvU1E,38
|
|
16
|
-
kodit/application/services/code_indexing_application_service.py,sha256=
|
|
16
|
+
kodit/application/services/code_indexing_application_service.py,sha256=HGaZDZG0GPLxxQ1-GUgeEx_AsZhG0FAtdUugoD68uI0,12521
|
|
17
17
|
kodit/domain/__init__.py,sha256=TCpg4Xx-oF4mKV91lo4iXqMEfBT1OoRSYnbG-zVWolA,66
|
|
18
18
|
kodit/domain/entities.py,sha256=fErA9ZTAqlofkqhBte8FOnV0PHf1MUORb37bW0-Dgc4,5624
|
|
19
19
|
kodit/domain/enums.py,sha256=Ik_h3D3eZ0FsSlPsU0ikm-Yv3Rmvzicffi9yBn19UIE,191
|
|
20
20
|
kodit/domain/errors.py,sha256=yIsgCjM_yOFIg8l7l-t7jM8pgeAX4cfPq0owf7iz3DA,106
|
|
21
21
|
kodit/domain/interfaces.py,sha256=Jkd0Ob4qSvhZHI9jRPFQ1n5Cv0SvU-y3Z-HCw2ikc4I,742
|
|
22
22
|
kodit/domain/repositories.py,sha256=KAIx_-qZD68pAByc1JNVxSCRLjseayHKn5ykqsE6uWw,3781
|
|
23
|
-
kodit/domain/value_objects.py,sha256=
|
|
23
|
+
kodit/domain/value_objects.py,sha256=7OtjtgdVB0rhWwezJB_-A6cCqk_ijmkefMRv2ql5AQ0,13479
|
|
24
24
|
kodit/domain/services/__init__.py,sha256=Q1GhCK_PqKHYwYE4tkwDz5BIyXkJngLBBOHhzvX8nzo,42
|
|
25
25
|
kodit/domain/services/bm25_service.py,sha256=nsfTan3XtDwXuuAu1LUv-6Jukm6qFKVqqCVymjyepZQ,3625
|
|
26
26
|
kodit/domain/services/embedding_service.py,sha256=Wh6Y2NR_GRnud8dq1Q7S6F40aNe-S2UyD5Nqz9LChTM,4507
|
|
27
27
|
kodit/domain/services/enrichment_service.py,sha256=XsXg3nV-KN4rqtC7Zro_ZiZ6RSq-1eA1MG6IDzFGyBA,1316
|
|
28
28
|
kodit/domain/services/ignore_service.py,sha256=boEN-IRLmUtwO9ZnuACaVFZbIKrtUG8YwnsXKEDIG28,1136
|
|
29
|
-
kodit/domain/services/indexing_service.py,sha256=
|
|
29
|
+
kodit/domain/services/indexing_service.py,sha256=vBjg9G75XoNfwH7m43l16zEmKdemHkzrgwunguiWix8,5911
|
|
30
30
|
kodit/domain/services/snippet_extraction_service.py,sha256=QW_99bXWpr8g6ZI-hp4Aj57VCSrUf71dLwQca5T6pyg,3065
|
|
31
|
-
kodit/domain/services/snippet_service.py,sha256=
|
|
31
|
+
kodit/domain/services/snippet_service.py,sha256=OyiDPJx5O2I1EyYnH_W-rvggti0DUefp4JaKIblHhR8,6867
|
|
32
32
|
kodit/domain/services/source_service.py,sha256=9XGS3imJn65v855cztsJSaaFod6LhkF2xfUVMaytx-A,3068
|
|
33
33
|
kodit/infrastructure/__init__.py,sha256=HzEYIjoXnkz_i_MHO2e0sIVYweUcRnl2RpyBiTbMObU,28
|
|
34
34
|
kodit/infrastructure/bm25/__init__.py,sha256=DmGbrEO34FOJy4e685BbyxLA7gPW1eqs2gAxsp6JOuM,34
|
|
@@ -62,8 +62,9 @@ kodit/infrastructure/git/git_utils.py,sha256=2DH6cyTjDRwFfL5Bzt1y2w0DwHZNypbC6R0
|
|
|
62
62
|
kodit/infrastructure/ignore/__init__.py,sha256=VzFv8XOzHmsu0MEGnWVSF6KsgqLBmvHlRqAkT1Xb1MY,36
|
|
63
63
|
kodit/infrastructure/ignore/ignore_pattern_provider.py,sha256=9m2XCsgW87UBTfzHr6Z0Ns6WpzwkLir3zyBY3PwsgXk,2225
|
|
64
64
|
kodit/infrastructure/indexing/__init__.py,sha256=7UPRa2jwCAsa0Orsp6PqXSF8iIXJVzXHMFmrKkI9yH8,38
|
|
65
|
+
kodit/infrastructure/indexing/auto_indexing_service.py,sha256=uXggladN3PTU5Jzhz0Kq-0aObvq3Dq9YbjYKCSkaQA8,3131
|
|
65
66
|
kodit/infrastructure/indexing/fusion_service.py,sha256=mXUUcx3-8e75mWkxXMfl30HIoFXrTNHzB1w90MmEbak,1806
|
|
66
|
-
kodit/infrastructure/indexing/index_repository.py,sha256=
|
|
67
|
+
kodit/infrastructure/indexing/index_repository.py,sha256=qs1RiFuf29kbKX4unP98_4-f9vQCmpCo-q-zvCCFPCE,8287
|
|
67
68
|
kodit/infrastructure/indexing/indexing_factory.py,sha256=LPjPCps_wJ9M_fZGRP02bfc2pvYc50ZSTYI99XwRRPg,918
|
|
68
69
|
kodit/infrastructure/indexing/snippet_domain_service_factory.py,sha256=OMp9qRJSAT3oWqsMyF1fgI2Mb_G-SA22crbbaCb7c-Q,1253
|
|
69
70
|
kodit/infrastructure/snippet_extraction/__init__.py,sha256=v6KqrRDjSj0nt87m7UwRGx2GN_fz_14VWq9Q0uABR_s,54
|
|
@@ -94,8 +95,8 @@ kodit/migrations/versions/85155663351e_initial.py,sha256=Cg7zlF871o9ShV5rQMQ1v7h
|
|
|
94
95
|
kodit/migrations/versions/9e53ea8bb3b0_add_authors.py,sha256=a32Zm8KUQyiiLkjKNPYdaJDgjW6VsV-GhaLnPnK_fpI,3884
|
|
95
96
|
kodit/migrations/versions/__init__.py,sha256=9-lHzptItTzq_fomdIRBegQNm4Znx6pVjwD4MiqRIdo,36
|
|
96
97
|
kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py,sha256=rI8LmjF-I2OMxZ2nOIF_NRmqOLXe45hL_iz_nx97DTQ,1680
|
|
97
|
-
kodit-0.
|
|
98
|
-
kodit-0.
|
|
99
|
-
kodit-0.
|
|
100
|
-
kodit-0.
|
|
101
|
-
kodit-0.
|
|
98
|
+
kodit-0.3.1.dist-info/METADATA,sha256=iI5UCxq-ih4cF-GVJLyNQ8wdRxwC907Jj6pddwiIsJo,6358
|
|
99
|
+
kodit-0.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
100
|
+
kodit-0.3.1.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
|
|
101
|
+
kodit-0.3.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
102
|
+
kodit-0.3.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|