kodit 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/application/services/code_indexing_application_service.py +19 -17
- kodit/cli.py +35 -33
- kodit/config.py +62 -4
- kodit/domain/entities.py +9 -1
- kodit/domain/repositories.py +5 -5
- kodit/domain/services/indexing_service.py +4 -3
- kodit/domain/services/snippet_service.py +33 -16
- kodit/domain/value_objects.py +87 -9
- kodit/infrastructure/indexing/index_repository.py +33 -38
- kodit/infrastructure/sqlalchemy/snippet_repository.py +86 -78
- kodit/log.py +4 -1
- kodit/mcp.py +1 -6
- kodit/migrations/versions/4552eb3f23ce_add_summary.py +34 -0
- {kodit-0.3.0.dist-info → kodit-0.3.2.dist-info}/METADATA +14 -4
- {kodit-0.3.0.dist-info → kodit-0.3.2.dist-info}/RECORD +19 -18
- {kodit-0.3.0.dist-info → kodit-0.3.2.dist-info}/WHEEL +0 -0
- {kodit-0.3.0.dist-info → kodit-0.3.2.dist-info}/entry_points.txt +0 -0
- {kodit-0.3.0.dist-info → kodit-0.3.2.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py
CHANGED
|
@@ -27,7 +27,6 @@ from kodit.domain.value_objects import (
|
|
|
27
27
|
MultiSearchResult,
|
|
28
28
|
SearchRequest,
|
|
29
29
|
SearchResult,
|
|
30
|
-
SnippetListItem,
|
|
31
30
|
)
|
|
32
31
|
from kodit.log import log_event
|
|
33
32
|
from kodit.reporting import Reporter
|
|
@@ -158,7 +157,7 @@ class CodeIndexingApplicationService:
|
|
|
158
157
|
snippet_results = await self.snippet_domain_service.search_snippets(
|
|
159
158
|
prefilter_request
|
|
160
159
|
)
|
|
161
|
-
filtered_snippet_ids = [snippet.id for snippet in snippet_results]
|
|
160
|
+
filtered_snippet_ids = [snippet.snippet.id for snippet in snippet_results]
|
|
162
161
|
|
|
163
162
|
# Gather results from different search modes
|
|
164
163
|
fusion_list: list[list[FusionRequest]] = []
|
|
@@ -225,17 +224,28 @@ class CodeIndexingApplicationService:
|
|
|
225
224
|
|
|
226
225
|
return [
|
|
227
226
|
MultiSearchResult(
|
|
228
|
-
id=snippet
|
|
229
|
-
|
|
230
|
-
content=snippet["content"],
|
|
227
|
+
id=result.snippet.id,
|
|
228
|
+
content=result.snippet.content,
|
|
231
229
|
original_scores=fr.original_scores,
|
|
230
|
+
# Enhanced fields
|
|
231
|
+
source_uri=result.source.uri,
|
|
232
|
+
relative_path=MultiSearchResult.calculate_relative_path(
|
|
233
|
+
result.file.cloned_path, result.source.cloned_path
|
|
234
|
+
),
|
|
235
|
+
language=MultiSearchResult.detect_language_from_extension(
|
|
236
|
+
result.file.extension
|
|
237
|
+
),
|
|
238
|
+
authors=[author.name for author in result.authors],
|
|
239
|
+
created_at=result.snippet.created_at,
|
|
240
|
+
# Summary from snippet entity
|
|
241
|
+
summary=result.snippet.summary,
|
|
232
242
|
)
|
|
233
|
-
for
|
|
243
|
+
for result, fr in zip(search_results, final_results, strict=True)
|
|
234
244
|
]
|
|
235
245
|
|
|
236
246
|
async def list_snippets(
|
|
237
247
|
self, file_path: str | None = None, source_uri: str | None = None
|
|
238
|
-
) -> list[
|
|
248
|
+
) -> list[MultiSearchResult]:
|
|
239
249
|
"""List snippets with optional filtering."""
|
|
240
250
|
log_event("kodit.index.list_snippets")
|
|
241
251
|
return await self.snippet_domain_service.list_snippets(file_path, source_uri)
|
|
@@ -301,16 +311,8 @@ class CodeIndexingApplicationService:
|
|
|
301
311
|
async for result in self.enrichment_service.enrich_documents(
|
|
302
312
|
enrichment_request
|
|
303
313
|
):
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
result.text
|
|
307
|
-
+ "\n\n```\n"
|
|
308
|
-
+ next(s.content for s in snippets if s.id == result.snippet_id)
|
|
309
|
-
+ "\n```"
|
|
310
|
-
)
|
|
311
|
-
|
|
312
|
-
await self.snippet_domain_service.update_snippet_content(
|
|
313
|
-
result.snippet_id, enriched_content
|
|
314
|
+
await self.snippet_domain_service.update_snippet_summary(
|
|
315
|
+
result.snippet_id, result.text
|
|
314
316
|
)
|
|
315
317
|
|
|
316
318
|
processed += 1
|
kodit/cli.py
CHANGED
|
@@ -20,7 +20,11 @@ from kodit.config import (
|
|
|
20
20
|
)
|
|
21
21
|
from kodit.domain.errors import EmptySourceError
|
|
22
22
|
from kodit.domain.services.source_service import SourceService
|
|
23
|
-
from kodit.domain.value_objects import
|
|
23
|
+
from kodit.domain.value_objects import (
|
|
24
|
+
MultiSearchRequest,
|
|
25
|
+
MultiSearchResult,
|
|
26
|
+
SnippetSearchFilters,
|
|
27
|
+
)
|
|
24
28
|
from kodit.infrastructure.ui.progress import (
|
|
25
29
|
create_lazy_progress_callback,
|
|
26
30
|
create_multi_stage_progress_callback,
|
|
@@ -219,6 +223,7 @@ def _parse_filters(
|
|
|
219
223
|
@click.option(
|
|
220
224
|
"--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
|
|
221
225
|
)
|
|
226
|
+
@click.option("--output-format", default="text", help="Format to display snippets in")
|
|
222
227
|
@with_app_context
|
|
223
228
|
@with_session
|
|
224
229
|
async def code( # noqa: PLR0913
|
|
@@ -231,6 +236,7 @@ async def code( # noqa: PLR0913
|
|
|
231
236
|
created_after: str | None,
|
|
232
237
|
created_before: str | None,
|
|
233
238
|
source_repo: str | None,
|
|
239
|
+
output_format: str,
|
|
234
240
|
) -> None:
|
|
235
241
|
"""Search for snippets using semantic code search.
|
|
236
242
|
|
|
@@ -259,13 +265,10 @@ async def code( # noqa: PLR0913
|
|
|
259
265
|
click.echo("No snippets found")
|
|
260
266
|
return
|
|
261
267
|
|
|
262
|
-
|
|
263
|
-
click.echo(
|
|
264
|
-
|
|
265
|
-
click.echo(
|
|
266
|
-
click.echo(snippet.content)
|
|
267
|
-
click.echo("-" * 80)
|
|
268
|
-
click.echo()
|
|
268
|
+
if output_format == "text":
|
|
269
|
+
click.echo(MultiSearchResult.to_string(snippets))
|
|
270
|
+
elif output_format == "json":
|
|
271
|
+
click.echo(MultiSearchResult.to_jsonlines(snippets))
|
|
269
272
|
|
|
270
273
|
|
|
271
274
|
@search.command()
|
|
@@ -284,6 +287,7 @@ async def code( # noqa: PLR0913
|
|
|
284
287
|
@click.option(
|
|
285
288
|
"--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
|
|
286
289
|
)
|
|
290
|
+
@click.option("--output-format", default="text", help="Format to display snippets in")
|
|
287
291
|
@with_app_context
|
|
288
292
|
@with_session
|
|
289
293
|
async def keyword( # noqa: PLR0913
|
|
@@ -296,6 +300,7 @@ async def keyword( # noqa: PLR0913
|
|
|
296
300
|
created_after: str | None,
|
|
297
301
|
created_before: str | None,
|
|
298
302
|
source_repo: str | None,
|
|
303
|
+
output_format: str,
|
|
299
304
|
) -> None:
|
|
300
305
|
"""Search for snippets using keyword search."""
|
|
301
306
|
log_event("kodit.cli.search.keyword")
|
|
@@ -321,13 +326,10 @@ async def keyword( # noqa: PLR0913
|
|
|
321
326
|
click.echo("No snippets found")
|
|
322
327
|
return
|
|
323
328
|
|
|
324
|
-
|
|
325
|
-
click.echo(
|
|
326
|
-
|
|
327
|
-
click.echo(
|
|
328
|
-
click.echo(snippet.content)
|
|
329
|
-
click.echo("-" * 80)
|
|
330
|
-
click.echo()
|
|
329
|
+
if output_format == "text":
|
|
330
|
+
click.echo(MultiSearchResult.to_string(snippets))
|
|
331
|
+
elif output_format == "json":
|
|
332
|
+
click.echo(MultiSearchResult.to_jsonlines(snippets))
|
|
331
333
|
|
|
332
334
|
|
|
333
335
|
@search.command()
|
|
@@ -346,6 +348,7 @@ async def keyword( # noqa: PLR0913
|
|
|
346
348
|
@click.option(
|
|
347
349
|
"--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
|
|
348
350
|
)
|
|
351
|
+
@click.option("--output-format", default="text", help="Format to display snippets in")
|
|
349
352
|
@with_app_context
|
|
350
353
|
@with_session
|
|
351
354
|
async def text( # noqa: PLR0913
|
|
@@ -358,6 +361,7 @@ async def text( # noqa: PLR0913
|
|
|
358
361
|
created_after: str | None,
|
|
359
362
|
created_before: str | None,
|
|
360
363
|
source_repo: str | None,
|
|
364
|
+
output_format: str,
|
|
361
365
|
) -> None:
|
|
362
366
|
"""Search for snippets using semantic text search.
|
|
363
367
|
|
|
@@ -386,13 +390,10 @@ async def text( # noqa: PLR0913
|
|
|
386
390
|
click.echo("No snippets found")
|
|
387
391
|
return
|
|
388
392
|
|
|
389
|
-
|
|
390
|
-
click.echo(
|
|
391
|
-
|
|
392
|
-
click.echo(
|
|
393
|
-
click.echo(snippet.content)
|
|
394
|
-
click.echo("-" * 80)
|
|
395
|
-
click.echo()
|
|
393
|
+
if output_format == "text":
|
|
394
|
+
click.echo(MultiSearchResult.to_string(snippets))
|
|
395
|
+
elif output_format == "json":
|
|
396
|
+
click.echo(MultiSearchResult.to_jsonlines(snippets))
|
|
396
397
|
|
|
397
398
|
|
|
398
399
|
@search.command()
|
|
@@ -413,6 +414,7 @@ async def text( # noqa: PLR0913
|
|
|
413
414
|
@click.option(
|
|
414
415
|
"--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
|
|
415
416
|
)
|
|
417
|
+
@click.option("--output-format", default="text", help="Format to display snippets in")
|
|
416
418
|
@with_app_context
|
|
417
419
|
@with_session
|
|
418
420
|
async def hybrid( # noqa: PLR0913
|
|
@@ -427,6 +429,7 @@ async def hybrid( # noqa: PLR0913
|
|
|
427
429
|
created_after: str | None,
|
|
428
430
|
created_before: str | None,
|
|
429
431
|
source_repo: str | None,
|
|
432
|
+
output_format: str,
|
|
430
433
|
) -> None:
|
|
431
434
|
"""Search for snippets using hybrid search."""
|
|
432
435
|
log_event("kodit.cli.search.hybrid")
|
|
@@ -461,13 +464,10 @@ async def hybrid( # noqa: PLR0913
|
|
|
461
464
|
click.echo("No snippets found")
|
|
462
465
|
return
|
|
463
466
|
|
|
464
|
-
|
|
465
|
-
click.echo(
|
|
466
|
-
|
|
467
|
-
click.echo(
|
|
468
|
-
click.echo(snippet.content)
|
|
469
|
-
click.echo("-" * 80)
|
|
470
|
-
click.echo()
|
|
467
|
+
if output_format == "text":
|
|
468
|
+
click.echo(MultiSearchResult.to_string(snippets))
|
|
469
|
+
elif output_format == "json":
|
|
470
|
+
click.echo(MultiSearchResult.to_jsonlines(snippets))
|
|
471
471
|
|
|
472
472
|
|
|
473
473
|
@cli.group()
|
|
@@ -478,6 +478,7 @@ def show() -> None:
|
|
|
478
478
|
@show.command()
|
|
479
479
|
@click.option("--by-path", help="File or directory path to search for snippets")
|
|
480
480
|
@click.option("--by-source", help="Source URI to filter snippets by")
|
|
481
|
+
@click.option("--output-format", default="text", help="Format to display snippets in")
|
|
481
482
|
@with_app_context
|
|
482
483
|
@with_session
|
|
483
484
|
async def snippets(
|
|
@@ -485,6 +486,7 @@ async def snippets(
|
|
|
485
486
|
app_context: AppContext,
|
|
486
487
|
by_path: str | None,
|
|
487
488
|
by_source: str | None,
|
|
489
|
+
output_format: str,
|
|
488
490
|
) -> None:
|
|
489
491
|
"""Show snippets with optional filtering by path or source."""
|
|
490
492
|
log_event("kodit.cli.show.snippets")
|
|
@@ -498,10 +500,10 @@ async def snippets(
|
|
|
498
500
|
source_service=source_service,
|
|
499
501
|
)
|
|
500
502
|
snippets = await service.list_snippets(file_path=by_path, source_uri=by_source)
|
|
501
|
-
|
|
502
|
-
click.echo(
|
|
503
|
-
|
|
504
|
-
click.echo()
|
|
503
|
+
if output_format == "text":
|
|
504
|
+
click.echo(MultiSearchResult.to_string(snippets))
|
|
505
|
+
elif output_format == "json":
|
|
506
|
+
click.echo(MultiSearchResult.to_jsonlines(snippets))
|
|
505
507
|
|
|
506
508
|
|
|
507
509
|
@cli.command()
|
kodit/config.py
CHANGED
|
@@ -9,7 +9,12 @@ from typing import TYPE_CHECKING, Any, Literal, TypeVar
|
|
|
9
9
|
|
|
10
10
|
import click
|
|
11
11
|
from pydantic import BaseModel, Field, field_validator
|
|
12
|
-
from pydantic_settings import
|
|
12
|
+
from pydantic_settings import (
|
|
13
|
+
BaseSettings,
|
|
14
|
+
EnvSettingsSource,
|
|
15
|
+
PydanticBaseSettingsSource,
|
|
16
|
+
SettingsConfigDict,
|
|
17
|
+
)
|
|
13
18
|
|
|
14
19
|
if TYPE_CHECKING:
|
|
15
20
|
from collections.abc import Callable, Coroutine
|
|
@@ -57,7 +62,7 @@ class AutoIndexingConfig(BaseModel):
|
|
|
57
62
|
|
|
58
63
|
@field_validator("sources", mode="before")
|
|
59
64
|
@classmethod
|
|
60
|
-
def parse_sources(cls, v: Any) ->
|
|
65
|
+
def parse_sources(cls, v: Any) -> Any:
|
|
61
66
|
"""Parse sources from environment variables or other formats."""
|
|
62
67
|
if v is None:
|
|
63
68
|
return []
|
|
@@ -76,6 +81,36 @@ class AutoIndexingConfig(BaseModel):
|
|
|
76
81
|
return v
|
|
77
82
|
|
|
78
83
|
|
|
84
|
+
class CustomAutoIndexingEnvSource(EnvSettingsSource):
|
|
85
|
+
"""Custom environment source for parsing AutoIndexingConfig."""
|
|
86
|
+
|
|
87
|
+
def __call__(self) -> dict[str, Any]:
|
|
88
|
+
"""Load settings from env vars with custom auto-indexing parsing."""
|
|
89
|
+
d: dict[str, Any] = {}
|
|
90
|
+
|
|
91
|
+
# First get the standard env vars
|
|
92
|
+
env_vars = super().__call__()
|
|
93
|
+
d.update(env_vars)
|
|
94
|
+
|
|
95
|
+
# Custom parsing for auto-indexing sources
|
|
96
|
+
auto_indexing_sources = []
|
|
97
|
+
i = 0
|
|
98
|
+
while True:
|
|
99
|
+
# Note: env_vars keys are lowercase due to Pydantic Settings normalization
|
|
100
|
+
uri_key = f"auto_indexing_sources_{i}_uri"
|
|
101
|
+
if uri_key in self.env_vars:
|
|
102
|
+
uri_value = self.env_vars[uri_key]
|
|
103
|
+
auto_indexing_sources.append({"uri": uri_value})
|
|
104
|
+
i += 1
|
|
105
|
+
else:
|
|
106
|
+
break
|
|
107
|
+
|
|
108
|
+
if auto_indexing_sources:
|
|
109
|
+
d["auto_indexing"] = {"sources": auto_indexing_sources}
|
|
110
|
+
|
|
111
|
+
return d
|
|
112
|
+
|
|
113
|
+
|
|
79
114
|
class AppContext(BaseSettings):
|
|
80
115
|
"""Global context for the kodit project. Provides a shared state for the app."""
|
|
81
116
|
|
|
@@ -83,10 +118,35 @@ class AppContext(BaseSettings):
|
|
|
83
118
|
env_file=".env",
|
|
84
119
|
env_file_encoding="utf-8",
|
|
85
120
|
env_nested_delimiter="_",
|
|
121
|
+
env_nested_max_split=1,
|
|
86
122
|
nested_model_default_partial_update=True,
|
|
87
123
|
extra="ignore",
|
|
88
124
|
)
|
|
89
125
|
|
|
126
|
+
@classmethod
|
|
127
|
+
def settings_customise_sources(
|
|
128
|
+
cls,
|
|
129
|
+
settings_cls: type[BaseSettings],
|
|
130
|
+
init_settings: PydanticBaseSettingsSource,
|
|
131
|
+
env_settings: PydanticBaseSettingsSource, # noqa: ARG003
|
|
132
|
+
dotenv_settings: PydanticBaseSettingsSource,
|
|
133
|
+
file_secret_settings: PydanticBaseSettingsSource,
|
|
134
|
+
) -> tuple[PydanticBaseSettingsSource, ...]:
|
|
135
|
+
"""Customize settings sources to use custom auto-indexing parsing."""
|
|
136
|
+
custom_env_settings = CustomAutoIndexingEnvSource(
|
|
137
|
+
settings_cls,
|
|
138
|
+
env_nested_delimiter=settings_cls.model_config.get("env_nested_delimiter"),
|
|
139
|
+
env_ignore_empty=settings_cls.model_config.get("env_ignore_empty", False),
|
|
140
|
+
env_parse_none_str=settings_cls.model_config.get("env_parse_none_str", ""),
|
|
141
|
+
env_parse_enums=settings_cls.model_config.get("env_parse_enums", None),
|
|
142
|
+
)
|
|
143
|
+
return (
|
|
144
|
+
init_settings,
|
|
145
|
+
custom_env_settings,
|
|
146
|
+
dotenv_settings,
|
|
147
|
+
file_secret_settings,
|
|
148
|
+
)
|
|
149
|
+
|
|
90
150
|
data_dir: Path = Field(default=DEFAULT_BASE_DIR)
|
|
91
151
|
db_url: str = Field(default=DEFAULT_DB_URL)
|
|
92
152
|
log_level: str = Field(default=DEFAULT_LOG_LEVEL)
|
|
@@ -142,8 +202,6 @@ class AppContext(BaseSettings):
|
|
|
142
202
|
|
|
143
203
|
with_app_context = click.make_pass_decorator(AppContext)
|
|
144
204
|
|
|
145
|
-
T = TypeVar("T")
|
|
146
|
-
|
|
147
205
|
|
|
148
206
|
def wrap_async(f: Callable[..., Coroutine[Any, Any, T]]) -> Callable[..., T]:
|
|
149
207
|
"""Decorate async Click commands.
|
kodit/domain/entities.py
CHANGED
|
@@ -183,10 +183,18 @@ class Snippet(Base, CommonMixin):
|
|
|
183
183
|
file_id: Mapped[int] = mapped_column(ForeignKey("files.id"), index=True)
|
|
184
184
|
index_id: Mapped[int] = mapped_column(ForeignKey("indexes.id"), index=True)
|
|
185
185
|
content: Mapped[str] = mapped_column(UnicodeText, default="")
|
|
186
|
+
summary: Mapped[str] = mapped_column(UnicodeText, default="")
|
|
186
187
|
|
|
187
|
-
def __init__(
|
|
188
|
+
def __init__(
|
|
189
|
+
self,
|
|
190
|
+
file_id: int,
|
|
191
|
+
index_id: int,
|
|
192
|
+
content: str,
|
|
193
|
+
summary: str = "",
|
|
194
|
+
) -> None:
|
|
188
195
|
"""Initialize the snippet."""
|
|
189
196
|
super().__init__()
|
|
190
197
|
self.file_id = file_id
|
|
191
198
|
self.index_id = index_id
|
|
192
199
|
self.content = content
|
|
200
|
+
self.summary = summary
|
kodit/domain/repositories.py
CHANGED
|
@@ -13,7 +13,7 @@ from kodit.domain.entities import (
|
|
|
13
13
|
)
|
|
14
14
|
from kodit.domain.value_objects import (
|
|
15
15
|
MultiSearchRequest,
|
|
16
|
-
|
|
16
|
+
SnippetWithContext,
|
|
17
17
|
)
|
|
18
18
|
|
|
19
19
|
T = TypeVar("T")
|
|
@@ -92,7 +92,7 @@ class SnippetRepository(GenericRepository[Snippet]):
|
|
|
92
92
|
|
|
93
93
|
async def list_snippets(
|
|
94
94
|
self, file_path: str | None = None, source_uri: str | None = None
|
|
95
|
-
) -> Sequence[
|
|
95
|
+
) -> Sequence[SnippetWithContext]:
|
|
96
96
|
"""List snippets with optional filtering by file path and source URI.
|
|
97
97
|
|
|
98
98
|
Args:
|
|
@@ -102,19 +102,19 @@ class SnippetRepository(GenericRepository[Snippet]):
|
|
|
102
102
|
all sources.
|
|
103
103
|
|
|
104
104
|
Returns:
|
|
105
|
-
A sequence of
|
|
105
|
+
A sequence of SnippetWithContext instances matching the criteria
|
|
106
106
|
|
|
107
107
|
"""
|
|
108
108
|
raise NotImplementedError
|
|
109
109
|
|
|
110
|
-
async def search(self, request: MultiSearchRequest) -> Sequence[
|
|
110
|
+
async def search(self, request: MultiSearchRequest) -> Sequence[SnippetWithContext]:
|
|
111
111
|
"""Search snippets with filters.
|
|
112
112
|
|
|
113
113
|
Args:
|
|
114
114
|
request: The search request containing queries and optional filters.
|
|
115
115
|
|
|
116
116
|
Returns:
|
|
117
|
-
A sequence of
|
|
117
|
+
A sequence of SnippetWithContext instances matching the search criteria.
|
|
118
118
|
|
|
119
119
|
"""
|
|
120
120
|
raise NotImplementedError
|
|
@@ -8,6 +8,7 @@ from kodit.domain.value_objects import (
|
|
|
8
8
|
FusionResult,
|
|
9
9
|
IndexCreateRequest,
|
|
10
10
|
IndexView,
|
|
11
|
+
SnippetWithContext,
|
|
11
12
|
)
|
|
12
13
|
|
|
13
14
|
|
|
@@ -51,7 +52,7 @@ class IndexRepository(ABC):
|
|
|
51
52
|
"""Update the content of an existing snippet."""
|
|
52
53
|
|
|
53
54
|
@abstractmethod
|
|
54
|
-
async def list_snippets_by_ids(self, ids: list[int]) -> list[
|
|
55
|
+
async def list_snippets_by_ids(self, ids: list[int]) -> list[SnippetWithContext]:
|
|
55
56
|
"""List snippets by IDs."""
|
|
56
57
|
|
|
57
58
|
|
|
@@ -190,14 +191,14 @@ class IndexingDomainService:
|
|
|
190
191
|
"""
|
|
191
192
|
return self.fusion_service.reciprocal_rank_fusion(rankings, k)
|
|
192
193
|
|
|
193
|
-
async def get_snippets_by_ids(self, ids: list[int]) -> list[
|
|
194
|
+
async def get_snippets_by_ids(self, ids: list[int]) -> list[SnippetWithContext]:
|
|
194
195
|
"""Get snippets by IDs.
|
|
195
196
|
|
|
196
197
|
Args:
|
|
197
198
|
ids: List of snippet IDs to retrieve.
|
|
198
199
|
|
|
199
200
|
Returns:
|
|
200
|
-
List of
|
|
201
|
+
List of SnippetWithFile objects containing file and snippet information.
|
|
201
202
|
|
|
202
203
|
"""
|
|
203
204
|
return await self.index_repository.list_snippets_by_ids(ids)
|
|
@@ -14,8 +14,9 @@ from kodit.domain.services.snippet_extraction_service import (
|
|
|
14
14
|
)
|
|
15
15
|
from kodit.domain.value_objects import (
|
|
16
16
|
MultiSearchRequest,
|
|
17
|
+
MultiSearchResult,
|
|
17
18
|
SnippetExtractionRequest,
|
|
18
|
-
|
|
19
|
+
SnippetWithContext,
|
|
19
20
|
)
|
|
20
21
|
from kodit.reporting import Reporter
|
|
21
22
|
|
|
@@ -92,6 +93,7 @@ class SnippetDomainService:
|
|
|
92
93
|
file_id=file.id,
|
|
93
94
|
index_id=index_id,
|
|
94
95
|
content=snippet_content,
|
|
96
|
+
summary="", # Initially empty, will be populated by enrichment
|
|
95
97
|
)
|
|
96
98
|
saved_snippet = await self.snippet_repository.save(snippet)
|
|
97
99
|
created_snippets.append(saved_snippet)
|
|
@@ -127,22 +129,16 @@ class SnippetDomainService:
|
|
|
127
129
|
# This delegates to the repository but provides a domain-level interface
|
|
128
130
|
return list(await self.snippet_repository.get_by_index(index_id))
|
|
129
131
|
|
|
130
|
-
async def
|
|
131
|
-
"""Update the
|
|
132
|
-
|
|
133
|
-
Args:
|
|
134
|
-
snippet_id: The ID of the snippet to update
|
|
135
|
-
content: The new content for the snippet
|
|
136
|
-
|
|
137
|
-
"""
|
|
132
|
+
async def update_snippet_summary(self, snippet_id: int, summary: str) -> None:
|
|
133
|
+
"""Update the summary of an existing snippet."""
|
|
138
134
|
# Get the snippet first to ensure it exists
|
|
139
135
|
snippet = await self.snippet_repository.get(snippet_id)
|
|
140
136
|
if not snippet:
|
|
141
137
|
msg = f"Snippet not found: {snippet_id}"
|
|
142
138
|
raise ValueError(msg)
|
|
143
139
|
|
|
144
|
-
# Update the
|
|
145
|
-
snippet.
|
|
140
|
+
# Update the summary
|
|
141
|
+
snippet.summary = summary
|
|
146
142
|
await self.snippet_repository.save(snippet)
|
|
147
143
|
|
|
148
144
|
async def delete_snippets_for_index(self, index_id: int) -> None:
|
|
@@ -156,21 +152,21 @@ class SnippetDomainService:
|
|
|
156
152
|
|
|
157
153
|
async def search_snippets(
|
|
158
154
|
self, request: MultiSearchRequest
|
|
159
|
-
) -> list[
|
|
155
|
+
) -> list[SnippetWithContext]:
|
|
160
156
|
"""Search snippets with filters.
|
|
161
157
|
|
|
162
158
|
Args:
|
|
163
159
|
request: The search request containing filters
|
|
164
160
|
|
|
165
161
|
Returns:
|
|
166
|
-
List of matching snippet items
|
|
162
|
+
List of matching snippet items with context
|
|
167
163
|
|
|
168
164
|
"""
|
|
169
165
|
return list(await self.snippet_repository.search(request))
|
|
170
166
|
|
|
171
167
|
async def list_snippets(
|
|
172
168
|
self, file_path: str | None = None, source_uri: str | None = None
|
|
173
|
-
) -> list[
|
|
169
|
+
) -> list[MultiSearchResult]:
|
|
174
170
|
"""List snippets with optional filtering.
|
|
175
171
|
|
|
176
172
|
Args:
|
|
@@ -178,10 +174,31 @@ class SnippetDomainService:
|
|
|
178
174
|
source_uri: Optional source URI to filter by
|
|
179
175
|
|
|
180
176
|
Returns:
|
|
181
|
-
List of
|
|
177
|
+
List of search results matching the criteria
|
|
182
178
|
|
|
183
179
|
"""
|
|
184
|
-
|
|
180
|
+
snippet_items = await self.snippet_repository.list_snippets(
|
|
181
|
+
file_path, source_uri
|
|
182
|
+
)
|
|
183
|
+
# Convert SnippetWithContext to MultiSearchResult for unified display format
|
|
184
|
+
return [
|
|
185
|
+
MultiSearchResult(
|
|
186
|
+
id=item.snippet.id,
|
|
187
|
+
content=item.snippet.content,
|
|
188
|
+
original_scores=[], # No scores for list operation
|
|
189
|
+
source_uri=item.source.uri,
|
|
190
|
+
relative_path=MultiSearchResult.calculate_relative_path(
|
|
191
|
+
item.file.cloned_path, item.source.cloned_path
|
|
192
|
+
),
|
|
193
|
+
language=MultiSearchResult.detect_language_from_extension(
|
|
194
|
+
item.file.extension
|
|
195
|
+
),
|
|
196
|
+
authors=[author.name for author in item.authors],
|
|
197
|
+
created_at=item.snippet.created_at,
|
|
198
|
+
summary=item.snippet.summary,
|
|
199
|
+
)
|
|
200
|
+
for item in snippet_items
|
|
201
|
+
]
|
|
185
202
|
|
|
186
203
|
def _should_process_file(self, file: Any) -> bool:
|
|
187
204
|
"""Check if a file should be processed for snippet extraction.
|
kodit/domain/value_objects.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Domain value objects and DTOs."""
|
|
2
2
|
|
|
3
|
+
import json
|
|
3
4
|
from dataclasses import dataclass
|
|
4
5
|
from datetime import datetime
|
|
5
6
|
from enum import Enum
|
|
@@ -9,7 +10,7 @@ from typing import Any, ClassVar
|
|
|
9
10
|
from sqlalchemy import JSON, DateTime, Integer, Text
|
|
10
11
|
from sqlalchemy.orm import Mapped, mapped_column
|
|
11
12
|
|
|
12
|
-
from kodit.domain.entities import Base
|
|
13
|
+
from kodit.domain.entities import Author, Base, File, Snippet, Source
|
|
13
14
|
from kodit.domain.enums import SnippetExtractionStrategy
|
|
14
15
|
|
|
15
16
|
|
|
@@ -175,12 +176,89 @@ class MultiSearchRequest:
|
|
|
175
176
|
|
|
176
177
|
@dataclass
|
|
177
178
|
class MultiSearchResult:
|
|
178
|
-
"""
|
|
179
|
+
"""Enhanced search result with comprehensive snippet metadata."""
|
|
179
180
|
|
|
180
181
|
id: int
|
|
181
|
-
uri: str
|
|
182
182
|
content: str
|
|
183
183
|
original_scores: list[float]
|
|
184
|
+
source_uri: str
|
|
185
|
+
relative_path: str
|
|
186
|
+
language: str
|
|
187
|
+
authors: list[str]
|
|
188
|
+
created_at: datetime
|
|
189
|
+
summary: str
|
|
190
|
+
|
|
191
|
+
def __str__(self) -> str:
|
|
192
|
+
"""Return enhanced formatted string representation."""
|
|
193
|
+
lines = [
|
|
194
|
+
"---",
|
|
195
|
+
f"id: {self.id}",
|
|
196
|
+
f"source: {self.source_uri}",
|
|
197
|
+
f"path: {self.relative_path}",
|
|
198
|
+
f"lang: {self.language}",
|
|
199
|
+
f"created: {self.created_at.isoformat()}",
|
|
200
|
+
f"authors: {', '.join(self.authors)}",
|
|
201
|
+
f"scores: {self.original_scores}",
|
|
202
|
+
"---",
|
|
203
|
+
f"{self.summary}\n",
|
|
204
|
+
f"```{self.language}",
|
|
205
|
+
f"{self.content}",
|
|
206
|
+
"```\n",
|
|
207
|
+
]
|
|
208
|
+
return "\n".join(lines)
|
|
209
|
+
|
|
210
|
+
def to_json(self) -> str:
|
|
211
|
+
"""Return LLM-optimized JSON representation following the compact schema."""
|
|
212
|
+
json_obj = {
|
|
213
|
+
"id": self.id,
|
|
214
|
+
"source": self.source_uri,
|
|
215
|
+
"path": self.relative_path,
|
|
216
|
+
"lang": self.language.lower(),
|
|
217
|
+
"created": self.created_at.isoformat() if self.created_at else "",
|
|
218
|
+
"author": ", ".join(self.authors),
|
|
219
|
+
"score": self.original_scores,
|
|
220
|
+
"code": self.content,
|
|
221
|
+
"summary": self.summary,
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
return json.dumps(json_obj, separators=(",", ":"))
|
|
225
|
+
|
|
226
|
+
@classmethod
|
|
227
|
+
def to_jsonlines(cls, results: list["MultiSearchResult"]) -> str:
|
|
228
|
+
"""Convert multiple MultiSearchResult objects to JSON Lines format.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
results: List of MultiSearchResult objects
|
|
232
|
+
include_summary: Whether to include summary fields
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
JSON Lines string (one JSON object per line)
|
|
236
|
+
|
|
237
|
+
"""
|
|
238
|
+
return "\n".join(result.to_json() for result in results)
|
|
239
|
+
|
|
240
|
+
@classmethod
|
|
241
|
+
def to_string(cls, results: list["MultiSearchResult"]) -> str:
|
|
242
|
+
"""Convert multiple MultiSearchResult objects to a string."""
|
|
243
|
+
return "\n\n".join(str(result) for result in results)
|
|
244
|
+
|
|
245
|
+
@staticmethod
|
|
246
|
+
def calculate_relative_path(file_path: str, source_path: str) -> str:
|
|
247
|
+
"""Calculate relative path from source root."""
|
|
248
|
+
try:
|
|
249
|
+
return str(Path(file_path).relative_to(Path(source_path)))
|
|
250
|
+
except ValueError:
|
|
251
|
+
# If file_path is not relative to source_path, return the file name
|
|
252
|
+
return Path(file_path).name
|
|
253
|
+
|
|
254
|
+
@staticmethod
|
|
255
|
+
def detect_language_from_extension(extension: str) -> str:
|
|
256
|
+
"""Detect programming language from file extension."""
|
|
257
|
+
try:
|
|
258
|
+
return LanguageMapping.get_language_for_extension(extension).title()
|
|
259
|
+
except ValueError:
|
|
260
|
+
# Unknown extension, return a default
|
|
261
|
+
return "Unknown"
|
|
184
262
|
|
|
185
263
|
|
|
186
264
|
@dataclass
|
|
@@ -280,13 +358,13 @@ class IndexView:
|
|
|
280
358
|
|
|
281
359
|
|
|
282
360
|
@dataclass
|
|
283
|
-
class
|
|
284
|
-
"""Domain model for snippet
|
|
361
|
+
class SnippetWithContext:
|
|
362
|
+
"""Domain model for snippet with associated context information."""
|
|
285
363
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
364
|
+
source: Source
|
|
365
|
+
file: File
|
|
366
|
+
authors: list[Author]
|
|
367
|
+
snippet: Snippet
|
|
290
368
|
|
|
291
369
|
|
|
292
370
|
class LanguageMapping:
|
|
@@ -6,9 +6,20 @@ from typing import TypeVar
|
|
|
6
6
|
from sqlalchemy import delete, func, select
|
|
7
7
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
8
8
|
|
|
9
|
-
from kodit.domain.entities import
|
|
9
|
+
from kodit.domain.entities import (
|
|
10
|
+
Author,
|
|
11
|
+
AuthorFileMapping,
|
|
12
|
+
Embedding,
|
|
13
|
+
File,
|
|
14
|
+
Index,
|
|
15
|
+
Snippet,
|
|
16
|
+
Source,
|
|
17
|
+
)
|
|
10
18
|
from kodit.domain.services.indexing_service import IndexRepository
|
|
11
|
-
from kodit.domain.value_objects import
|
|
19
|
+
from kodit.domain.value_objects import (
|
|
20
|
+
IndexView,
|
|
21
|
+
SnippetWithContext,
|
|
22
|
+
)
|
|
12
23
|
|
|
13
24
|
T = TypeVar("T")
|
|
14
25
|
|
|
@@ -202,6 +213,7 @@ class SQLAlchemyIndexRepository(IndexRepository):
|
|
|
202
213
|
file_id=snippet["file_id"],
|
|
203
214
|
index_id=snippet["index_id"],
|
|
204
215
|
content=snippet["content"],
|
|
216
|
+
summary=snippet.get("summary", ""),
|
|
205
217
|
)
|
|
206
218
|
self.session.add(db_snippet)
|
|
207
219
|
|
|
@@ -221,48 +233,31 @@ class SQLAlchemyIndexRepository(IndexRepository):
|
|
|
221
233
|
snippet.content = content
|
|
222
234
|
# SQLAlchemy will automatically track this change
|
|
223
235
|
|
|
224
|
-
async def list_snippets_by_ids(self, ids: list[int]) -> list[
|
|
225
|
-
"""List snippets by IDs.
|
|
226
|
-
|
|
227
|
-
Args:
|
|
228
|
-
ids: List of snippet IDs to retrieve.
|
|
229
|
-
|
|
230
|
-
Returns:
|
|
231
|
-
List of (file, snippet) tuples.
|
|
232
|
-
|
|
233
|
-
"""
|
|
236
|
+
async def list_snippets_by_ids(self, ids: list[int]) -> list[SnippetWithContext]:
|
|
237
|
+
"""List snippets by IDs."""
|
|
234
238
|
query = (
|
|
235
|
-
select(Snippet, File)
|
|
239
|
+
select(Snippet, File, Source, Author)
|
|
236
240
|
.where(Snippet.id.in_(ids))
|
|
237
241
|
.join(File, Snippet.file_id == File.id)
|
|
242
|
+
.join(Source, File.source_id == Source.id)
|
|
243
|
+
.outerjoin(AuthorFileMapping, AuthorFileMapping.file_id == File.id)
|
|
244
|
+
.outerjoin(Author, AuthorFileMapping.author_id == Author.id)
|
|
238
245
|
)
|
|
239
246
|
rows = await self.session.execute(query)
|
|
240
247
|
|
|
241
|
-
#
|
|
242
|
-
id_to_result = {}
|
|
243
|
-
for snippet, file in rows.all():
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
"created_at": file.created_at,
|
|
255
|
-
"updated_at": file.updated_at,
|
|
256
|
-
},
|
|
257
|
-
{
|
|
258
|
-
"id": snippet.id,
|
|
259
|
-
"file_id": snippet.file_id,
|
|
260
|
-
"index_id": snippet.index_id,
|
|
261
|
-
"content": snippet.content,
|
|
262
|
-
"created_at": snippet.created_at,
|
|
263
|
-
"updated_at": snippet.updated_at,
|
|
264
|
-
},
|
|
265
|
-
)
|
|
248
|
+
# Group results by snippet ID and collect authors
|
|
249
|
+
id_to_result: dict[int, SnippetWithContext] = {}
|
|
250
|
+
for snippet, file, source, author in rows.all():
|
|
251
|
+
if snippet.id not in id_to_result:
|
|
252
|
+
id_to_result[snippet.id] = SnippetWithContext(
|
|
253
|
+
snippet=snippet,
|
|
254
|
+
file=file,
|
|
255
|
+
source=source,
|
|
256
|
+
authors=[],
|
|
257
|
+
)
|
|
258
|
+
# Add author if it exists (outer join might return None)
|
|
259
|
+
if author is not None:
|
|
260
|
+
id_to_result[snippet.id].authors.append(author)
|
|
266
261
|
|
|
267
262
|
# Check that all IDs are present
|
|
268
263
|
if len(id_to_result) != len(ids):
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
"""SQLAlchemy implementation of snippet repository."""
|
|
2
2
|
|
|
3
|
+
import builtins
|
|
3
4
|
from collections.abc import Sequence
|
|
4
5
|
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
5
7
|
|
|
6
8
|
from sqlalchemy import delete, or_, select
|
|
7
9
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
@@ -18,7 +20,7 @@ from kodit.domain.repositories import SnippetRepository
|
|
|
18
20
|
from kodit.domain.value_objects import (
|
|
19
21
|
LanguageMapping,
|
|
20
22
|
MultiSearchRequest,
|
|
21
|
-
|
|
23
|
+
SnippetWithContext,
|
|
22
24
|
)
|
|
23
25
|
|
|
24
26
|
|
|
@@ -102,7 +104,7 @@ class SqlAlchemySnippetRepository(SnippetRepository):
|
|
|
102
104
|
|
|
103
105
|
async def list_snippets(
|
|
104
106
|
self, file_path: str | None = None, source_uri: str | None = None
|
|
105
|
-
) -> Sequence[
|
|
107
|
+
) -> Sequence[SnippetWithContext]:
|
|
106
108
|
"""List snippets with optional filtering by file path and source URI.
|
|
107
109
|
|
|
108
110
|
Args:
|
|
@@ -112,20 +114,11 @@ class SqlAlchemySnippetRepository(SnippetRepository):
|
|
|
112
114
|
all sources.
|
|
113
115
|
|
|
114
116
|
Returns:
|
|
115
|
-
A sequence of
|
|
117
|
+
A sequence of SnippetWithContext instances matching the criteria
|
|
116
118
|
|
|
117
119
|
"""
|
|
118
|
-
# Build the base query
|
|
119
|
-
query = (
|
|
120
|
-
select(
|
|
121
|
-
Snippet,
|
|
122
|
-
File.cloned_path,
|
|
123
|
-
Source.cloned_path.label("source_cloned_path"),
|
|
124
|
-
Source.uri.label("source_uri"),
|
|
125
|
-
)
|
|
126
|
-
.join(File, Snippet.file_id == File.id)
|
|
127
|
-
.join(Source, File.source_id == Source.id)
|
|
128
|
-
)
|
|
120
|
+
# Build the base query with joins for all required entities
|
|
121
|
+
query = self._build_base_query()
|
|
129
122
|
|
|
130
123
|
# Apply filters
|
|
131
124
|
if file_path is not None:
|
|
@@ -140,20 +133,7 @@ class SqlAlchemySnippetRepository(SnippetRepository):
|
|
|
140
133
|
query = query.where(Source.uri == source_uri)
|
|
141
134
|
|
|
142
135
|
result = await self.session.execute(query)
|
|
143
|
-
return
|
|
144
|
-
SnippetListItem(
|
|
145
|
-
id=snippet.id,
|
|
146
|
-
file_path=self._get_relative_path(file_cloned_path, source_cloned_path),
|
|
147
|
-
content=snippet.content,
|
|
148
|
-
source_uri=source_uri_val,
|
|
149
|
-
)
|
|
150
|
-
for (
|
|
151
|
-
snippet,
|
|
152
|
-
file_cloned_path,
|
|
153
|
-
source_cloned_path,
|
|
154
|
-
source_uri_val,
|
|
155
|
-
) in result.all()
|
|
156
|
-
]
|
|
136
|
+
return self._process_results(result)
|
|
157
137
|
|
|
158
138
|
def _get_relative_path(self, file_path: str, source_path: str) -> str:
|
|
159
139
|
"""Calculate the relative path of a file from the source root.
|
|
@@ -174,57 +154,98 @@ class SqlAlchemySnippetRepository(SnippetRepository):
|
|
|
174
154
|
# If the file is not relative to the source, return the filename
|
|
175
155
|
return Path(file_path).name
|
|
176
156
|
|
|
177
|
-
|
|
178
|
-
"""
|
|
157
|
+
def _apply_filters(self, query: Any, filters: Any) -> Any:
|
|
158
|
+
"""Apply filters to the query.
|
|
179
159
|
|
|
180
160
|
Args:
|
|
181
|
-
|
|
161
|
+
query: The base query to apply filters to
|
|
162
|
+
filters: The filters to apply
|
|
182
163
|
|
|
183
164
|
Returns:
|
|
184
|
-
|
|
165
|
+
The modified query with filters applied
|
|
185
166
|
|
|
186
167
|
"""
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
168
|
+
if not filters:
|
|
169
|
+
return query
|
|
170
|
+
|
|
171
|
+
# Language filter (using file extension)
|
|
172
|
+
if filters.language:
|
|
173
|
+
extensions = LanguageMapping.get_extensions_with_fallback(filters.language)
|
|
174
|
+
query = query.where(File.extension.in_(extensions))
|
|
175
|
+
|
|
176
|
+
# Author filter
|
|
177
|
+
if filters.author:
|
|
178
|
+
query = query.where(Author.name.ilike(f"%{filters.author}%"))
|
|
179
|
+
|
|
180
|
+
# Date filters
|
|
181
|
+
if filters.created_after:
|
|
182
|
+
query = query.where(Snippet.created_at >= filters.created_after)
|
|
183
|
+
|
|
184
|
+
if filters.created_before:
|
|
185
|
+
query = query.where(Snippet.created_at <= filters.created_before)
|
|
186
|
+
|
|
187
|
+
# Source repository filter
|
|
188
|
+
if filters.source_repo:
|
|
189
|
+
query = query.where(Source.uri.like(f"%{filters.source_repo}%"))
|
|
190
|
+
|
|
191
|
+
return query
|
|
192
|
+
|
|
193
|
+
def _build_base_query(self) -> Any:
|
|
194
|
+
"""Build the base query with joins for all required entities.
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
The base query with joins
|
|
198
|
+
|
|
199
|
+
"""
|
|
200
|
+
return (
|
|
201
|
+
select(Snippet, File, Source, Author)
|
|
195
202
|
.join(File, Snippet.file_id == File.id)
|
|
196
203
|
.join(Source, File.source_id == Source.id)
|
|
204
|
+
.outerjoin(AuthorFileMapping, AuthorFileMapping.file_id == File.id)
|
|
205
|
+
.outerjoin(Author, AuthorFileMapping.author_id == Author.id)
|
|
197
206
|
)
|
|
198
207
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
filters = request.filters
|
|
208
|
+
def _process_results(self, result: Any) -> builtins.list[SnippetWithContext]:
|
|
209
|
+
"""Process query results into SnippetWithContext objects.
|
|
202
210
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
211
|
+
Args:
|
|
212
|
+
result: The query result
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
List of SnippetWithContext objects
|
|
216
|
+
|
|
217
|
+
"""
|
|
218
|
+
# Group results by snippet ID and collect authors
|
|
219
|
+
id_to_result: dict[int, SnippetWithContext] = {}
|
|
220
|
+
for snippet, file, source, author in result.all():
|
|
221
|
+
if snippet.id not in id_to_result:
|
|
222
|
+
id_to_result[snippet.id] = SnippetWithContext(
|
|
223
|
+
snippet=snippet,
|
|
224
|
+
file=file,
|
|
225
|
+
source=source,
|
|
226
|
+
authors=[],
|
|
216
227
|
)
|
|
228
|
+
# Add author if it exists (outer join might return None)
|
|
229
|
+
if author is not None:
|
|
230
|
+
id_to_result[snippet.id].authors.append(author)
|
|
231
|
+
|
|
232
|
+
return list(id_to_result.values())
|
|
217
233
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
query = query.where(Snippet.created_at >= filters.created_after)
|
|
234
|
+
async def search(self, request: MultiSearchRequest) -> Sequence[SnippetWithContext]:
|
|
235
|
+
"""Search snippets with filters.
|
|
221
236
|
|
|
222
|
-
|
|
223
|
-
|
|
237
|
+
Args:
|
|
238
|
+
request: The search request containing queries and optional filters.
|
|
224
239
|
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
240
|
+
Returns:
|
|
241
|
+
A sequence of SnippetWithContext instances matching the search criteria.
|
|
242
|
+
|
|
243
|
+
"""
|
|
244
|
+
# Build the base query with joins for all required entities
|
|
245
|
+
query = self._build_base_query()
|
|
246
|
+
|
|
247
|
+
# Apply filters if provided
|
|
248
|
+
query = self._apply_filters(query, request.filters)
|
|
228
249
|
|
|
229
250
|
# Only apply top_k limit if there are no search queries
|
|
230
251
|
# This ensures that when used for pre-filtering (with search queries),
|
|
@@ -235,17 +256,4 @@ class SqlAlchemySnippetRepository(SnippetRepository):
|
|
|
235
256
|
query = query.limit(request.top_k)
|
|
236
257
|
|
|
237
258
|
result = await self.session.execute(query)
|
|
238
|
-
return
|
|
239
|
-
SnippetListItem(
|
|
240
|
-
id=snippet.id,
|
|
241
|
-
file_path=self._get_relative_path(file_cloned_path, source_cloned_path),
|
|
242
|
-
content=snippet.content,
|
|
243
|
-
source_uri=source_uri_val,
|
|
244
|
-
)
|
|
245
|
-
for (
|
|
246
|
-
snippet,
|
|
247
|
-
file_cloned_path,
|
|
248
|
-
source_cloned_path,
|
|
249
|
-
source_uri_val,
|
|
250
|
-
) in result.all()
|
|
251
|
-
]
|
|
259
|
+
return self._process_results(result)
|
kodit/log.py
CHANGED
|
@@ -190,11 +190,14 @@ def _from_sysfs() -> list[int]:
|
|
|
190
190
|
macs: list[int] = []
|
|
191
191
|
for iface in base.iterdir():
|
|
192
192
|
try:
|
|
193
|
+
# Skip if iface is not a directory (e.g., bonding_masters is a file)
|
|
194
|
+
if not iface.is_dir():
|
|
195
|
+
continue
|
|
193
196
|
with (base / iface / "address").open() as f:
|
|
194
197
|
content = f.read().strip()
|
|
195
198
|
if _MAC_RE.fullmatch(content):
|
|
196
199
|
macs.append(_mac_int(content))
|
|
197
|
-
except (FileNotFoundError, PermissionError):
|
|
200
|
+
except (FileNotFoundError, PermissionError, NotADirectoryError):
|
|
198
201
|
pass
|
|
199
202
|
return macs
|
|
200
203
|
|
kodit/mcp.py
CHANGED
|
@@ -195,17 +195,12 @@ async def search( # noqa: PLR0913
|
|
|
195
195
|
snippets = await service.search(request=search_request)
|
|
196
196
|
|
|
197
197
|
log.debug("Fusing output")
|
|
198
|
-
output =
|
|
198
|
+
output = MultiSearchResult.to_jsonlines(results=snippets)
|
|
199
199
|
|
|
200
200
|
log.debug("Output", output=output)
|
|
201
201
|
return output
|
|
202
202
|
|
|
203
203
|
|
|
204
|
-
def output_fusion(snippets: list[MultiSearchResult]) -> str:
|
|
205
|
-
"""Fuse the snippets into a single output."""
|
|
206
|
-
return "\n\n".join(f"{snippet.uri}\n{snippet.content}" for snippet in snippets)
|
|
207
|
-
|
|
208
|
-
|
|
209
204
|
@mcp.tool()
|
|
210
205
|
async def get_version() -> str:
|
|
211
206
|
"""Get the version of the kodit project."""
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# ruff: noqa
|
|
2
|
+
"""add summary
|
|
3
|
+
|
|
4
|
+
Revision ID: 4552eb3f23ce
|
|
5
|
+
Revises: 9e53ea8bb3b0
|
|
6
|
+
Create Date: 2025-06-30 16:32:49.293087
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from typing import Sequence, Union
|
|
11
|
+
|
|
12
|
+
from alembic import op
|
|
13
|
+
import sqlalchemy as sa
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# revision identifiers, used by Alembic.
|
|
17
|
+
revision: str = '4552eb3f23ce'
|
|
18
|
+
down_revision: Union[str, None] = '9e53ea8bb3b0'
|
|
19
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
20
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def upgrade() -> None:
|
|
24
|
+
"""Upgrade schema."""
|
|
25
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
26
|
+
op.add_column('snippets', sa.Column('summary', sa.UnicodeText(), nullable=False))
|
|
27
|
+
# ### end Alembic commands ###
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def downgrade() -> None:
|
|
31
|
+
"""Downgrade schema."""
|
|
32
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
33
|
+
op.drop_column('snippets', 'summary')
|
|
34
|
+
# ### end Alembic commands ###
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kodit
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: Code indexing for better AI code generation
|
|
5
5
|
Project-URL: Homepage, https://docs.helixml.tech/kodit/
|
|
6
6
|
Project-URL: Documentation, https://docs.helixml.tech/kodit/
|
|
@@ -83,7 +83,7 @@ If you're an engineer working with AI-powered coding assistants, Kodit helps by
|
|
|
83
83
|
providing relevant and up-to-date examples of your task so that LLMs make less mistakes
|
|
84
84
|
and produce fewer hallucinations.
|
|
85
85
|
|
|
86
|
-
##
|
|
86
|
+
## Features
|
|
87
87
|
|
|
88
88
|
### Codebase Indexing
|
|
89
89
|
|
|
@@ -94,7 +94,11 @@ code. This index is used to build a snippet library, ready for ingestion into an
|
|
|
94
94
|
- Build comprehensive snippet libraries for LLM ingestion
|
|
95
95
|
- Support for multiple codebase types and languages
|
|
96
96
|
- Efficient indexing and search capabilities
|
|
97
|
-
- Privacy first: respects .gitignore and .noindex files
|
|
97
|
+
- Privacy first: respects .gitignore and .noindex files
|
|
98
|
+
- **NEW in 0.3**: Auto-indexing configuration for shared server deployments
|
|
99
|
+
- **NEW in 0.3**: Enhanced Git provider support including Azure DevOps
|
|
100
|
+
- **NEW in 0.3**: Index private repositories via a PAT
|
|
101
|
+
- **NEW in 0.3**: Improved progress monitoring and reporting during indexing
|
|
98
102
|
|
|
99
103
|
### MCP Server
|
|
100
104
|
|
|
@@ -107,6 +111,7 @@ intent. Kodit has been tested to work well with:
|
|
|
107
111
|
- [Cursor](https://docs.helix.ml/kodit/getting-started/integration/#integration-with-cursor)
|
|
108
112
|
- [Cline](https://docs.helix.ml/kodit/getting-started/integration/#integration-with-cline)
|
|
109
113
|
- Please contribute more instructions! ... any other assistant is likely to work ...
|
|
114
|
+
- **New in 0.3**: Filter snippets by source, language, author or timestamp.
|
|
110
115
|
|
|
111
116
|
### Enterprise Ready
|
|
112
117
|
|
|
@@ -127,7 +132,12 @@ Supported providers:
|
|
|
127
132
|
- Secure, private LLM enclave with [Helix](https://helix.ml).
|
|
128
133
|
- Any other OpenAI compatible API
|
|
129
134
|
|
|
130
|
-
|
|
135
|
+
**NEW in 0.3**: Enhanced deployment options:
|
|
136
|
+
|
|
137
|
+
- Docker Compose configurations with VectorChord
|
|
138
|
+
- Kubernetes manifests for production deployments
|
|
139
|
+
|
|
140
|
+
## Quick Start
|
|
131
141
|
|
|
132
142
|
1. [Install Kodit](https://docs.helix.ml/kodit/getting-started/installation/)
|
|
133
143
|
2. [Index codebases](https://docs.helix.ml/kodit/getting-started/quick-start/)
|
|
@@ -1,34 +1,34 @@
|
|
|
1
1
|
kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
|
|
2
2
|
kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
|
|
3
|
-
kodit/_version.py,sha256=
|
|
3
|
+
kodit/_version.py,sha256=5NopxuphNnyGZECYEkvIUFi0KZxwtDHmTpW5R266eSo,511
|
|
4
4
|
kodit/app.py,sha256=uv67TE83fZE7wrA7cz-sKosFrAXlKRr1B7fT-X_gMZQ,2103
|
|
5
|
-
kodit/cli.py,sha256=
|
|
6
|
-
kodit/config.py,sha256=
|
|
5
|
+
kodit/cli.py,sha256=a-bJQ_Jyz201TEbgJPNvPDa0Qyt3kfSKqBuypeVqG_k,17219
|
|
6
|
+
kodit/config.py,sha256=VUoUi2t2yGhqOtm5MSZuaasNSklH50hfWn6GOrz3jnU,7518
|
|
7
7
|
kodit/database.py,sha256=kI9yBm4uunsgV4-QeVoCBL0wLzU4kYmYv5qZilGnbPE,1740
|
|
8
|
-
kodit/log.py,sha256=
|
|
9
|
-
kodit/mcp.py,sha256=
|
|
8
|
+
kodit/log.py,sha256=WOsLRitpCBtJa5IcsyZpKr146kXXHK2nU5VA90gcJdQ,8736
|
|
9
|
+
kodit/mcp.py,sha256=6gCJvjTqWGWUicuidbpMPtM1Vtqvlc0fKUua3l-EVPQ,6273
|
|
10
10
|
kodit/middleware.py,sha256=I6FOkqG9-8RH5kR1-0ZoQWfE4qLCB8lZYv8H_OCH29o,2714
|
|
11
11
|
kodit/reporting.py,sha256=icce1ZyiADsA_Qz-mSjgn2H4SSqKuGfLKnw-yrl9nsg,2722
|
|
12
12
|
kodit/application/__init__.py,sha256=mH50wTpgP9dhbKztFsL8Dda9Hi18TSnMVxXtpp4aGOA,35
|
|
13
13
|
kodit/application/factories/__init__.py,sha256=bU5CvEnaBePZ7JbkCOp1MGTNP752bnU2uEqmfy5FdRk,37
|
|
14
14
|
kodit/application/factories/code_indexing_factory.py,sha256=pyGcTmqhBRjw0tDvp5UpG0roBf3ROqYvBcHyvaLZ-qQ,4927
|
|
15
15
|
kodit/application/services/__init__.py,sha256=p5UQNw-H5sxQvs5Etfte93B3cJ1kKW6DNxK34uFvU1E,38
|
|
16
|
-
kodit/application/services/code_indexing_application_service.py,sha256=
|
|
16
|
+
kodit/application/services/code_indexing_application_service.py,sha256=PXnBbDnaYqU6xnKGTcOjmYZbjcQZed-_ehf6Uzhx5v4,12809
|
|
17
17
|
kodit/domain/__init__.py,sha256=TCpg4Xx-oF4mKV91lo4iXqMEfBT1OoRSYnbG-zVWolA,66
|
|
18
|
-
kodit/domain/entities.py,sha256=
|
|
18
|
+
kodit/domain/entities.py,sha256=6UBPi7zH9bCIgeXg0Poq6LQu01O5JvoHaWqNusNJ3iA,5787
|
|
19
19
|
kodit/domain/enums.py,sha256=Ik_h3D3eZ0FsSlPsU0ikm-Yv3Rmvzicffi9yBn19UIE,191
|
|
20
20
|
kodit/domain/errors.py,sha256=yIsgCjM_yOFIg8l7l-t7jM8pgeAX4cfPq0owf7iz3DA,106
|
|
21
21
|
kodit/domain/interfaces.py,sha256=Jkd0Ob4qSvhZHI9jRPFQ1n5Cv0SvU-y3Z-HCw2ikc4I,742
|
|
22
|
-
kodit/domain/repositories.py,sha256=
|
|
23
|
-
kodit/domain/value_objects.py,sha256=
|
|
22
|
+
kodit/domain/repositories.py,sha256=VgNV4NXywh6LtxN1GU2fg8bn8mNZ2wgVXZEugqOOb1M,3796
|
|
23
|
+
kodit/domain/value_objects.py,sha256=h9KMAB0neX3gQT2mTC8JxyxxuDXuJ2lyG0czUhkZc0E,15575
|
|
24
24
|
kodit/domain/services/__init__.py,sha256=Q1GhCK_PqKHYwYE4tkwDz5BIyXkJngLBBOHhzvX8nzo,42
|
|
25
25
|
kodit/domain/services/bm25_service.py,sha256=nsfTan3XtDwXuuAu1LUv-6Jukm6qFKVqqCVymjyepZQ,3625
|
|
26
26
|
kodit/domain/services/embedding_service.py,sha256=Wh6Y2NR_GRnud8dq1Q7S6F40aNe-S2UyD5Nqz9LChTM,4507
|
|
27
27
|
kodit/domain/services/enrichment_service.py,sha256=XsXg3nV-KN4rqtC7Zro_ZiZ6RSq-1eA1MG6IDzFGyBA,1316
|
|
28
28
|
kodit/domain/services/ignore_service.py,sha256=boEN-IRLmUtwO9ZnuACaVFZbIKrtUG8YwnsXKEDIG28,1136
|
|
29
|
-
kodit/domain/services/indexing_service.py,sha256=
|
|
29
|
+
kodit/domain/services/indexing_service.py,sha256=7Yb6lyyd_VpZldK_CVMeOXpzXq-08Et-WRhulCWDQdM,5920
|
|
30
30
|
kodit/domain/services/snippet_extraction_service.py,sha256=QW_99bXWpr8g6ZI-hp4Aj57VCSrUf71dLwQca5T6pyg,3065
|
|
31
|
-
kodit/domain/services/snippet_service.py,sha256=
|
|
31
|
+
kodit/domain/services/snippet_service.py,sha256=EyJQoT9UkJdMM2yfC1cFlj0yZVxK5a7NzleeM8lqWR0,7355
|
|
32
32
|
kodit/domain/services/source_service.py,sha256=9XGS3imJn65v855cztsJSaaFod6LhkF2xfUVMaytx-A,3068
|
|
33
33
|
kodit/infrastructure/__init__.py,sha256=HzEYIjoXnkz_i_MHO2e0sIVYweUcRnl2RpyBiTbMObU,28
|
|
34
34
|
kodit/infrastructure/bm25/__init__.py,sha256=DmGbrEO34FOJy4e685BbyxLA7gPW1eqs2gAxsp6JOuM,34
|
|
@@ -64,7 +64,7 @@ kodit/infrastructure/ignore/ignore_pattern_provider.py,sha256=9m2XCsgW87UBTfzHr6
|
|
|
64
64
|
kodit/infrastructure/indexing/__init__.py,sha256=7UPRa2jwCAsa0Orsp6PqXSF8iIXJVzXHMFmrKkI9yH8,38
|
|
65
65
|
kodit/infrastructure/indexing/auto_indexing_service.py,sha256=uXggladN3PTU5Jzhz0Kq-0aObvq3Dq9YbjYKCSkaQA8,3131
|
|
66
66
|
kodit/infrastructure/indexing/fusion_service.py,sha256=mXUUcx3-8e75mWkxXMfl30HIoFXrTNHzB1w90MmEbak,1806
|
|
67
|
-
kodit/infrastructure/indexing/index_repository.py,sha256=
|
|
67
|
+
kodit/infrastructure/indexing/index_repository.py,sha256=4m_kFHQ3OSQdf2pgR1RM72g-k4UZHyHbtYKUwJ8huRs,8719
|
|
68
68
|
kodit/infrastructure/indexing/indexing_factory.py,sha256=LPjPCps_wJ9M_fZGRP02bfc2pvYc50ZSTYI99XwRRPg,918
|
|
69
69
|
kodit/infrastructure/indexing/snippet_domain_service_factory.py,sha256=OMp9qRJSAT3oWqsMyF1fgI2Mb_G-SA22crbbaCb7c-Q,1253
|
|
70
70
|
kodit/infrastructure/snippet_extraction/__init__.py,sha256=v6KqrRDjSj0nt87m7UwRGx2GN_fz_14VWq9Q0uABR_s,54
|
|
@@ -82,7 +82,7 @@ kodit/infrastructure/sqlalchemy/__init__.py,sha256=UXPMSF_hgWaqr86cawRVqM8XdVNum
|
|
|
82
82
|
kodit/infrastructure/sqlalchemy/embedding_repository.py,sha256=u29RVt4W0WqHj6TkrydMHw2iF5_jERHtlidDjWRQvqc,7886
|
|
83
83
|
kodit/infrastructure/sqlalchemy/file_repository.py,sha256=9_kXHJ1YiWA1ingpvBNq8cuxkMu59PHwl_m9_Ttnq2o,2353
|
|
84
84
|
kodit/infrastructure/sqlalchemy/repository.py,sha256=EpZnOjR3wfPEqIauWw_KczpkSqBQPTq5sIyCpJCuW2w,4565
|
|
85
|
-
kodit/infrastructure/sqlalchemy/snippet_repository.py,sha256=
|
|
85
|
+
kodit/infrastructure/sqlalchemy/snippet_repository.py,sha256=aBsr2U6RUQftWnkOHka809WH9YxS4Tpg34knZ--WNms,8473
|
|
86
86
|
kodit/infrastructure/ui/__init__.py,sha256=CzbLOBwIZ6B6iAHEd1L8cIBydCj-n_kobxJAhz2I9_Y,32
|
|
87
87
|
kodit/infrastructure/ui/progress.py,sha256=BaAeMEgXlSSb0c_t_NPxnThIktkzzCS9kegb5ExULJs,4791
|
|
88
88
|
kodit/infrastructure/ui/spinner.py,sha256=GcP115qtR0VEnGfMEtsGoAUpRzVGUSfiUXfoJJERngA,2357
|
|
@@ -90,13 +90,14 @@ kodit/migrations/README,sha256=ISVtAOvqvKk_5ThM5ioJE-lMkvf9IbknFUFVU_vPma4,58
|
|
|
90
90
|
kodit/migrations/__init__.py,sha256=lP5MuwlyWRMO6UcDWnQcQ3G-GYHcFb6rl9gYPHJ1sjo,40
|
|
91
91
|
kodit/migrations/env.py,sha256=j89vEWdSgfnreTAz5ZvFAPlsMGI8SfKti0MlWhm7Jbc,2364
|
|
92
92
|
kodit/migrations/script.py.mako,sha256=zWziKtiwYKEWuwPV_HBNHwa9LCT45_bi01-uSNFaOOE,703
|
|
93
|
+
kodit/migrations/versions/4552eb3f23ce_add_summary.py,sha256=_saoHs5HGzc_z2OzBkFKrifTLQfoNox3BpSBeiKg_f8,870
|
|
93
94
|
kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py,sha256=-61qol9PfQKILCDQRA5jEaats9aGZs9Wdtp-j-38SF4,1644
|
|
94
95
|
kodit/migrations/versions/85155663351e_initial.py,sha256=Cg7zlF871o9ShV5rQMQ1v7hRV7fI59veDY9cjtTrs-8,3306
|
|
95
96
|
kodit/migrations/versions/9e53ea8bb3b0_add_authors.py,sha256=a32Zm8KUQyiiLkjKNPYdaJDgjW6VsV-GhaLnPnK_fpI,3884
|
|
96
97
|
kodit/migrations/versions/__init__.py,sha256=9-lHzptItTzq_fomdIRBegQNm4Znx6pVjwD4MiqRIdo,36
|
|
97
98
|
kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py,sha256=rI8LmjF-I2OMxZ2nOIF_NRmqOLXe45hL_iz_nx97DTQ,1680
|
|
98
|
-
kodit-0.3.
|
|
99
|
-
kodit-0.3.
|
|
100
|
-
kodit-0.3.
|
|
101
|
-
kodit-0.3.
|
|
102
|
-
kodit-0.3.
|
|
99
|
+
kodit-0.3.2.dist-info/METADATA,sha256=JDWIO27pGDjCMUm5gRWUWjdQhRgEGC8J0O3gMFki6p8,6358
|
|
100
|
+
kodit-0.3.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
101
|
+
kodit-0.3.2.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
|
|
102
|
+
kodit-0.3.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
103
|
+
kodit-0.3.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|