kodit 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (35) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/factories/__init__.py +1 -0
  3. kodit/application/factories/code_indexing_factory.py +119 -0
  4. kodit/application/services/{indexing_application_service.py → code_indexing_application_service.py} +159 -198
  5. kodit/cli.py +199 -62
  6. kodit/domain/entities.py +7 -5
  7. kodit/domain/repositories.py +33 -0
  8. kodit/domain/services/bm25_service.py +14 -17
  9. kodit/domain/services/embedding_service.py +10 -14
  10. kodit/domain/services/snippet_service.py +198 -0
  11. kodit/domain/value_objects.py +301 -21
  12. kodit/infrastructure/bm25/local_bm25_repository.py +20 -12
  13. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +31 -11
  14. kodit/infrastructure/cloning/git/working_copy.py +5 -2
  15. kodit/infrastructure/cloning/metadata.py +1 -0
  16. kodit/infrastructure/embedding/embedding_providers/hash_embedding_provider.py +14 -25
  17. kodit/infrastructure/embedding/local_vector_search_repository.py +26 -38
  18. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +50 -35
  19. kodit/infrastructure/enrichment/enrichment_factory.py +1 -1
  20. kodit/infrastructure/indexing/indexing_factory.py +8 -91
  21. kodit/infrastructure/indexing/snippet_domain_service_factory.py +37 -0
  22. kodit/infrastructure/snippet_extraction/languages/java.scm +12 -0
  23. kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +3 -31
  24. kodit/infrastructure/sqlalchemy/embedding_repository.py +14 -3
  25. kodit/infrastructure/sqlalchemy/snippet_repository.py +174 -2
  26. kodit/mcp.py +61 -49
  27. {kodit-0.2.7.dist-info → kodit-0.2.9.dist-info}/METADATA +1 -1
  28. {kodit-0.2.7.dist-info → kodit-0.2.9.dist-info}/RECORD +31 -30
  29. kodit/application/commands/__init__.py +0 -1
  30. kodit/application/commands/snippet_commands.py +0 -22
  31. kodit/application/services/snippet_application_service.py +0 -149
  32. kodit/infrastructure/enrichment/legacy_enrichment_models.py +0 -42
  33. {kodit-0.2.7.dist-info → kodit-0.2.9.dist-info}/WHEEL +0 -0
  34. {kodit-0.2.7.dist-info → kodit-0.2.9.dist-info}/entry_points.txt +0 -0
  35. {kodit-0.2.7.dist-info → kodit-0.2.9.dist-info}/licenses/LICENSE +0 -0
kodit/cli.py CHANGED
@@ -1,6 +1,5 @@
1
1
  """Command line interface for kodit."""
2
2
 
3
- import asyncio
4
3
  import signal
5
4
  from pathlib import Path
6
5
  from typing import Any
@@ -11,8 +10,8 @@ import uvicorn
11
10
  from pytable_formatter import Cell, Table
12
11
  from sqlalchemy.ext.asyncio import AsyncSession
13
12
 
14
- from kodit.application.services.snippet_application_service import (
15
- SnippetApplicationService,
13
+ from kodit.application.factories.code_indexing_factory import (
14
+ create_code_indexing_application_service,
16
15
  )
17
16
  from kodit.config import (
18
17
  AppContext,
@@ -21,14 +20,7 @@ from kodit.config import (
21
20
  )
22
21
  from kodit.domain.errors import EmptySourceError
23
22
  from kodit.domain.services.source_service import SourceService
24
- from kodit.domain.value_objects import MultiSearchRequest
25
- from kodit.infrastructure.indexing.indexing_factory import (
26
- create_indexing_application_service,
27
- )
28
- from kodit.infrastructure.snippet_extraction.snippet_extraction_factory import (
29
- create_snippet_extraction_domain_service,
30
- create_snippet_repositories,
31
- )
23
+ from kodit.domain.value_objects import MultiSearchRequest, SnippetSearchFilters
32
24
  from kodit.infrastructure.ui.progress import (
33
25
  create_lazy_progress_callback,
34
26
  create_multi_stage_progress_callback,
@@ -36,33 +28,6 @@ from kodit.infrastructure.ui.progress import (
36
28
  from kodit.log import configure_logging, configure_telemetry, log_event
37
29
 
38
30
 
39
- def create_snippet_application_service(
40
- session: AsyncSession,
41
- ) -> SnippetApplicationService:
42
- """Create a snippet application service with all dependencies.
43
-
44
- Args:
45
- session: SQLAlchemy session
46
-
47
- Returns:
48
- Configured snippet application service
49
-
50
- """
51
- # Create domain service
52
- snippet_extraction_service = create_snippet_extraction_domain_service()
53
-
54
- # Create repositories
55
- snippet_repository, file_repository = create_snippet_repositories(session)
56
-
57
- # Create application service
58
- return SnippetApplicationService(
59
- snippet_extraction_service=snippet_extraction_service,
60
- snippet_repository=snippet_repository,
61
- file_repository=file_repository,
62
- session=session,
63
- )
64
-
65
-
66
31
  @click.group(context_settings={"max_content_width": 100})
67
32
  @click.option(
68
33
  "--env-file",
@@ -107,12 +72,10 @@ async def index(
107
72
  clone_dir=app_context.get_clone_dir(),
108
73
  session_factory=lambda: session,
109
74
  )
110
- snippet_service = create_snippet_application_service(session)
111
- service = create_indexing_application_service(
75
+ service = create_code_indexing_application_service(
112
76
  app_context=app_context,
113
77
  session=session,
114
78
  source_service=source_service,
115
- snippet_application_service=snippet_service,
116
79
  )
117
80
 
118
81
  if not sources:
@@ -173,16 +136,86 @@ def search() -> None:
173
136
  """Search for snippets in the database."""
174
137
 
175
138
 
139
+ # Utility for robust filter parsing
140
+ def _parse_filters(
141
+ language: str | None,
142
+ author: str | None,
143
+ created_after: str | None,
144
+ created_before: str | None,
145
+ source_repo: str | None,
146
+ ) -> SnippetSearchFilters | None:
147
+ from datetime import datetime
148
+
149
+ # Normalize language to lowercase if provided
150
+ norm_language = language.lower() if language else None
151
+ # Try to parse dates, raise error if invalid
152
+ parsed_created_after = None
153
+ if created_after:
154
+ try:
155
+ parsed_created_after = datetime.fromisoformat(created_after)
156
+ except ValueError as err:
157
+ raise ValueError(
158
+ f"Invalid date format for --created-after: {created_after}. "
159
+ "Expected ISO 8601 format (YYYY-MM-DD)"
160
+ ) from err
161
+ parsed_created_before = None
162
+ if created_before:
163
+ try:
164
+ parsed_created_before = datetime.fromisoformat(created_before)
165
+ except ValueError as err:
166
+ raise ValueError(
167
+ f"Invalid date format for --created-before: {created_before}. "
168
+ "Expected ISO 8601 format (YYYY-MM-DD)"
169
+ ) from err
170
+ # Return None if no filters provided, otherwise return SnippetSearchFilters
171
+ # Check if any original parameters were provided (not just the parsed values)
172
+ if any(
173
+ [
174
+ language,
175
+ author,
176
+ created_after,
177
+ created_before,
178
+ source_repo,
179
+ ]
180
+ ):
181
+ return SnippetSearchFilters(
182
+ language=norm_language,
183
+ author=author,
184
+ created_after=parsed_created_after,
185
+ created_before=parsed_created_before,
186
+ source_repo=source_repo,
187
+ )
188
+ return None
189
+
190
+
176
191
  @search.command()
177
192
  @click.argument("query")
178
193
  @click.option("--top-k", default=10, help="Number of snippets to retrieve")
194
+ @click.option(
195
+ "--language", help="Filter by programming language (e.g., python, go, javascript)"
196
+ )
197
+ @click.option("--author", help="Filter by author name")
198
+ @click.option(
199
+ "--created-after", help="Filter snippets created after this date (YYYY-MM-DD)"
200
+ )
201
+ @click.option(
202
+ "--created-before", help="Filter snippets created before this date (YYYY-MM-DD)"
203
+ )
204
+ @click.option(
205
+ "--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
206
+ )
179
207
  @with_app_context
180
208
  @with_session
181
- async def code(
209
+ async def code( # noqa: PLR0913
182
210
  session: AsyncSession,
183
211
  app_context: AppContext,
184
212
  query: str,
185
213
  top_k: int,
214
+ language: str | None,
215
+ author: str | None,
216
+ created_after: str | None,
217
+ created_before: str | None,
218
+ source_repo: str | None,
186
219
  ) -> None:
187
220
  """Search for snippets using semantic code search.
188
221
 
@@ -193,15 +226,19 @@ async def code(
193
226
  clone_dir=app_context.get_clone_dir(),
194
227
  session_factory=lambda: session,
195
228
  )
196
- snippet_service = create_snippet_application_service(session)
197
- service = create_indexing_application_service(
229
+ service = create_code_indexing_application_service(
198
230
  app_context=app_context,
199
231
  session=session,
200
232
  source_service=source_service,
201
- snippet_application_service=snippet_service,
202
233
  )
203
234
 
204
- snippets = await service.search(MultiSearchRequest(code_query=query, top_k=top_k))
235
+ filters = _parse_filters(
236
+ language, author, created_after, created_before, source_repo
237
+ )
238
+
239
+ snippets = await service.search(
240
+ MultiSearchRequest(code_query=query, top_k=top_k, filters=filters)
241
+ )
205
242
 
206
243
  if len(snippets) == 0:
207
244
  click.echo("No snippets found")
@@ -219,13 +256,31 @@ async def code(
219
256
  @search.command()
220
257
  @click.argument("keywords", nargs=-1)
221
258
  @click.option("--top-k", default=10, help="Number of snippets to retrieve")
259
+ @click.option(
260
+ "--language", help="Filter by programming language (e.g., python, go, javascript)"
261
+ )
262
+ @click.option("--author", help="Filter by author name")
263
+ @click.option(
264
+ "--created-after", help="Filter snippets created after this date (YYYY-MM-DD)"
265
+ )
266
+ @click.option(
267
+ "--created-before", help="Filter snippets created before this date (YYYY-MM-DD)"
268
+ )
269
+ @click.option(
270
+ "--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
271
+ )
222
272
  @with_app_context
223
273
  @with_session
224
- async def keyword(
274
+ async def keyword( # noqa: PLR0913
225
275
  session: AsyncSession,
226
276
  app_context: AppContext,
227
277
  keywords: list[str],
228
278
  top_k: int,
279
+ language: str | None,
280
+ author: str | None,
281
+ created_after: str | None,
282
+ created_before: str | None,
283
+ source_repo: str | None,
229
284
  ) -> None:
230
285
  """Search for snippets using keyword search."""
231
286
  log_event("kodit.cli.search.keyword")
@@ -233,15 +288,19 @@ async def keyword(
233
288
  clone_dir=app_context.get_clone_dir(),
234
289
  session_factory=lambda: session,
235
290
  )
236
- snippet_service = create_snippet_application_service(session)
237
- service = create_indexing_application_service(
291
+ service = create_code_indexing_application_service(
238
292
  app_context=app_context,
239
293
  session=session,
240
294
  source_service=source_service,
241
- snippet_application_service=snippet_service,
242
295
  )
243
296
 
244
- snippets = await service.search(MultiSearchRequest(keywords=keywords, top_k=top_k))
297
+ filters = _parse_filters(
298
+ language, author, created_after, created_before, source_repo
299
+ )
300
+
301
+ snippets = await service.search(
302
+ MultiSearchRequest(keywords=keywords, top_k=top_k, filters=filters)
303
+ )
245
304
 
246
305
  if len(snippets) == 0:
247
306
  click.echo("No snippets found")
@@ -259,13 +318,31 @@ async def keyword(
259
318
  @search.command()
260
319
  @click.argument("query")
261
320
  @click.option("--top-k", default=10, help="Number of snippets to retrieve")
321
+ @click.option(
322
+ "--language", help="Filter by programming language (e.g., python, go, javascript)"
323
+ )
324
+ @click.option("--author", help="Filter by author name")
325
+ @click.option(
326
+ "--created-after", help="Filter snippets created after this date (YYYY-MM-DD)"
327
+ )
328
+ @click.option(
329
+ "--created-before", help="Filter snippets created before this date (YYYY-MM-DD)"
330
+ )
331
+ @click.option(
332
+ "--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
333
+ )
262
334
  @with_app_context
263
335
  @with_session
264
- async def text(
336
+ async def text( # noqa: PLR0913
265
337
  session: AsyncSession,
266
338
  app_context: AppContext,
267
339
  query: str,
268
340
  top_k: int,
341
+ language: str | None,
342
+ author: str | None,
343
+ created_after: str | None,
344
+ created_before: str | None,
345
+ source_repo: str | None,
269
346
  ) -> None:
270
347
  """Search for snippets using semantic text search.
271
348
 
@@ -276,15 +353,19 @@ async def text(
276
353
  clone_dir=app_context.get_clone_dir(),
277
354
  session_factory=lambda: session,
278
355
  )
279
- snippet_service = create_snippet_application_service(session)
280
- service = create_indexing_application_service(
356
+ service = create_code_indexing_application_service(
281
357
  app_context=app_context,
282
358
  session=session,
283
359
  source_service=source_service,
284
- snippet_application_service=snippet_service,
285
360
  )
286
361
 
287
- snippets = await service.search(MultiSearchRequest(text_query=query, top_k=top_k))
362
+ filters = _parse_filters(
363
+ language, author, created_after, created_before, source_repo
364
+ )
365
+
366
+ snippets = await service.search(
367
+ MultiSearchRequest(text_query=query, top_k=top_k, filters=filters)
368
+ )
288
369
 
289
370
  if len(snippets) == 0:
290
371
  click.echo("No snippets found")
@@ -304,6 +385,19 @@ async def text(
304
385
  @click.option("--keywords", required=True, help="Comma separated list of keywords")
305
386
  @click.option("--code", required=True, help="Semantic code search query")
306
387
  @click.option("--text", required=True, help="Semantic text search query")
388
+ @click.option(
389
+ "--language", help="Filter by programming language (e.g., python, go, javascript)"
390
+ )
391
+ @click.option("--author", help="Filter by author name")
392
+ @click.option(
393
+ "--created-after", help="Filter snippets created after this date (YYYY-MM-DD)"
394
+ )
395
+ @click.option(
396
+ "--created-before", help="Filter snippets created before this date (YYYY-MM-DD)"
397
+ )
398
+ @click.option(
399
+ "--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
400
+ )
307
401
  @with_app_context
308
402
  @with_session
309
403
  async def hybrid( # noqa: PLR0913
@@ -313,6 +407,11 @@ async def hybrid( # noqa: PLR0913
313
407
  keywords: str,
314
408
  code: str,
315
409
  text: str,
410
+ language: str | None,
411
+ author: str | None,
412
+ created_after: str | None,
413
+ created_before: str | None,
414
+ source_repo: str | None,
316
415
  ) -> None:
317
416
  """Search for snippets using hybrid search."""
318
417
  log_event("kodit.cli.search.hybrid")
@@ -320,23 +419,26 @@ async def hybrid( # noqa: PLR0913
320
419
  clone_dir=app_context.get_clone_dir(),
321
420
  session_factory=lambda: session,
322
421
  )
323
- snippet_service = create_snippet_application_service(session)
324
- service = create_indexing_application_service(
422
+ service = create_code_indexing_application_service(
325
423
  app_context=app_context,
326
424
  session=session,
327
425
  source_service=source_service,
328
- snippet_application_service=snippet_service,
329
426
  )
330
427
 
331
428
  # Parse keywords into a list of strings
332
429
  keywords_list = [k.strip().lower() for k in keywords.split(",")]
333
430
 
431
+ filters = _parse_filters(
432
+ language, author, created_after, created_before, source_repo
433
+ )
434
+
334
435
  snippets = await service.search(
335
436
  MultiSearchRequest(
336
437
  keywords=keywords_list,
337
438
  code_query=code,
338
439
  text_query=text,
339
440
  top_k=top_k,
441
+ filters=filters,
340
442
  )
341
443
  )
342
444
 
@@ -353,6 +455,40 @@ async def hybrid( # noqa: PLR0913
353
455
  click.echo()
354
456
 
355
457
 
458
+ @cli.group()
459
+ def show() -> None:
460
+ """Show information about elements in the database."""
461
+
462
+
463
+ @show.command()
464
+ @click.option("--by-path", help="File or directory path to search for snippets")
465
+ @click.option("--by-source", help="Source URI to filter snippets by")
466
+ @with_app_context
467
+ @with_session
468
+ async def snippets(
469
+ session: AsyncSession,
470
+ app_context: AppContext,
471
+ by_path: str | None,
472
+ by_source: str | None,
473
+ ) -> None:
474
+ """Show snippets with optional filtering by path or source."""
475
+ log_event("kodit.cli.show.snippets")
476
+ source_service = SourceService(
477
+ clone_dir=app_context.get_clone_dir(),
478
+ session_factory=lambda: session,
479
+ )
480
+ service = create_code_indexing_application_service(
481
+ app_context=app_context,
482
+ session=session,
483
+ source_service=source_service,
484
+ )
485
+ snippets = await service.list_snippets(file_path=by_path, source_uri=by_source)
486
+ for snippet in snippets:
487
+ click.echo(f"{snippet.id}: [{snippet.source_uri}] {snippet.file_path}")
488
+ click.echo(f" {snippet.content}")
489
+ click.echo()
490
+
491
+
356
492
  @cli.command()
357
493
  @click.option("--host", default="127.0.0.1", help="Host to bind the server to")
358
494
  @click.option("--port", default=8080, help="Port to bind the server to")
@@ -393,9 +529,10 @@ def version() -> None:
393
529
  from kodit import _version
394
530
  except ImportError:
395
531
  print("unknown, try running `uv build`, which is what happens in ci") # noqa: T201
396
- else:
397
- print(_version.version) # noqa: T201
532
+ return
533
+
534
+ print(f"kodit {_version.__version__}") # noqa: T201
398
535
 
399
536
 
400
537
  if __name__ == "__main__":
401
- asyncio.run(cli())
538
+ cli()
kodit/domain/entities.py CHANGED
@@ -121,22 +121,24 @@ class File(Base, CommonMixin):
121
121
  created_at: datetime,
122
122
  updated_at: datetime,
123
123
  source_id: int,
124
+ mime_type: str,
125
+ uri: str,
124
126
  cloned_path: str,
125
- mime_type: str = "",
126
- uri: str = "",
127
- sha256: str = "",
128
- size_bytes: int = 0,
127
+ sha256: str,
128
+ size_bytes: int,
129
+ extension: str,
129
130
  ) -> None:
130
131
  """Initialize a new File instance for typing purposes."""
131
132
  super().__init__()
132
133
  self.created_at = created_at
133
134
  self.updated_at = updated_at
134
135
  self.source_id = source_id
135
- self.cloned_path = cloned_path
136
136
  self.mime_type = mime_type
137
137
  self.uri = uri
138
+ self.cloned_path = cloned_path
138
139
  self.sha256 = sha256
139
140
  self.size_bytes = size_bytes
141
+ self.extension = extension
140
142
 
141
143
 
142
144
  class EmbeddingType(Enum):
@@ -11,6 +11,10 @@ from kodit.domain.entities import (
11
11
  Source,
12
12
  SourceType,
13
13
  )
14
+ from kodit.domain.value_objects import (
15
+ MultiSearchRequest,
16
+ SnippetListItem,
17
+ )
14
18
 
15
19
  T = TypeVar("T")
16
20
 
@@ -86,6 +90,35 @@ class SnippetRepository(GenericRepository[Snippet]):
86
90
  """Delete all snippets for an index."""
87
91
  raise NotImplementedError
88
92
 
93
+ async def list_snippets(
94
+ self, file_path: str | None = None, source_uri: str | None = None
95
+ ) -> Sequence[SnippetListItem]:
96
+ """List snippets with optional filtering by file path and source URI.
97
+
98
+ Args:
99
+ file_path: Optional file or directory path to filter by. Can be relative
100
+ (uri) or absolute (cloned_path).
101
+ source_uri: Optional source URI to filter by. If None, returns snippets from
102
+ all sources.
103
+
104
+ Returns:
105
+ A sequence of SnippetListItem instances matching the criteria
106
+
107
+ """
108
+ raise NotImplementedError
109
+
110
+ async def search(self, request: MultiSearchRequest) -> Sequence[SnippetListItem]:
111
+ """Search snippets with filters.
112
+
113
+ Args:
114
+ request: The search request containing queries and optional filters.
115
+
116
+ Returns:
117
+ A sequence of SnippetListItem instances matching the search criteria.
118
+
119
+ """
120
+ raise NotImplementedError
121
+
89
122
 
90
123
  class FileRepository(GenericRepository[File]):
91
124
  """File repository with specific methods."""
@@ -4,10 +4,10 @@ from abc import ABC, abstractmethod
4
4
  from collections.abc import Sequence
5
5
 
6
6
  from kodit.domain.value_objects import (
7
- BM25DeleteRequest,
8
- BM25IndexRequest,
9
- BM25SearchRequest,
10
- BM25SearchResult,
7
+ DeleteRequest,
8
+ IndexRequest,
9
+ SearchRequest,
10
+ SearchResult,
11
11
  )
12
12
 
13
13
 
@@ -15,15 +15,15 @@ class BM25Repository(ABC):
15
15
  """Abstract interface for BM25 repository."""
16
16
 
17
17
  @abstractmethod
18
- async def index_documents(self, request: BM25IndexRequest) -> None:
18
+ async def index_documents(self, request: IndexRequest) -> None:
19
19
  """Index documents for BM25 search."""
20
20
 
21
21
  @abstractmethod
22
- async def search(self, request: BM25SearchRequest) -> Sequence[BM25SearchResult]:
22
+ async def search(self, request: SearchRequest) -> Sequence[SearchResult]:
23
23
  """Search documents using BM25."""
24
24
 
25
25
  @abstractmethod
26
- async def delete_documents(self, request: BM25DeleteRequest) -> None:
26
+ async def delete_documents(self, request: DeleteRequest) -> None:
27
27
  """Delete documents from the BM25 index."""
28
28
 
29
29
 
@@ -39,7 +39,7 @@ class BM25DomainService:
39
39
  """
40
40
  self.repository = repository
41
41
 
42
- async def index_documents(self, request: BM25IndexRequest) -> None:
42
+ async def index_documents(self, request: IndexRequest) -> None:
43
43
  """Index documents using domain business rules.
44
44
 
45
45
  Args:
@@ -64,10 +64,10 @@ class BM25DomainService:
64
64
  raise ValueError("No valid documents to index")
65
65
 
66
66
  # Domain logic: create new request with validated documents
67
- validated_request = BM25IndexRequest(documents=valid_documents)
67
+ validated_request = IndexRequest(documents=valid_documents)
68
68
  await self.repository.index_documents(validated_request)
69
69
 
70
- async def search(self, request: BM25SearchRequest) -> Sequence[BM25SearchResult]:
70
+ async def search(self, request: SearchRequest) -> Sequence[SearchResult]:
71
71
  """Search documents using domain business rules.
72
72
 
73
73
  Args:
@@ -88,14 +88,11 @@ class BM25DomainService:
88
88
  raise ValueError("Top-k must be positive")
89
89
 
90
90
  # Domain logic: normalize query
91
- normalized_query = request.query.strip()
92
- normalized_request = BM25SearchRequest(
93
- query=normalized_query, top_k=request.top_k
94
- )
91
+ request.query = request.query.strip()
95
92
 
96
- return await self.repository.search(normalized_request)
93
+ return await self.repository.search(request)
97
94
 
98
- async def delete_documents(self, request: BM25DeleteRequest) -> None:
95
+ async def delete_documents(self, request: DeleteRequest) -> None:
99
96
  """Delete documents using domain business rules.
100
97
 
101
98
  Args:
@@ -120,5 +117,5 @@ class BM25DomainService:
120
117
  raise ValueError("No valid snippet IDs to delete")
121
118
 
122
119
  # Domain logic: create new request with validated IDs
123
- validated_request = BM25DeleteRequest(snippet_ids=valid_ids)
120
+ validated_request = DeleteRequest(snippet_ids=valid_ids)
124
121
  await self.repository.delete_documents(validated_request)
@@ -7,10 +7,10 @@ from kodit.domain.entities import EmbeddingType
7
7
  from kodit.domain.value_objects import (
8
8
  EmbeddingRequest,
9
9
  EmbeddingResponse,
10
+ IndexRequest,
10
11
  IndexResult,
11
- VectorIndexRequest,
12
- VectorSearchQueryRequest,
13
- VectorSearchResult,
12
+ SearchRequest,
13
+ SearchResult,
14
14
  )
15
15
 
16
16
 
@@ -29,14 +29,12 @@ class VectorSearchRepository(ABC):
29
29
 
30
30
  @abstractmethod
31
31
  def index_documents(
32
- self, request: VectorIndexRequest
32
+ self, request: IndexRequest
33
33
  ) -> AsyncGenerator[list[IndexResult], None]:
34
34
  """Index documents for vector search."""
35
35
 
36
36
  @abstractmethod
37
- async def search(
38
- self, request: VectorSearchQueryRequest
39
- ) -> Sequence[VectorSearchResult]:
37
+ async def search(self, request: SearchRequest) -> Sequence[SearchResult]:
40
38
  """Search documents using vector similarity."""
41
39
 
42
40
  @abstractmethod
@@ -65,7 +63,7 @@ class EmbeddingDomainService:
65
63
  self.vector_search_repository = vector_search_repository
66
64
 
67
65
  async def index_documents(
68
- self, request: VectorIndexRequest
66
+ self, request: IndexRequest
69
67
  ) -> AsyncGenerator[list[IndexResult], None]:
70
68
  """Index documents using domain business rules.
71
69
 
@@ -94,15 +92,13 @@ class EmbeddingDomainService:
94
92
  return
95
93
 
96
94
  # Domain logic: create new request with validated documents
97
- validated_request = VectorIndexRequest(documents=valid_documents)
95
+ validated_request = IndexRequest(documents=valid_documents)
98
96
  async for result in self.vector_search_repository.index_documents(
99
97
  validated_request
100
98
  ):
101
99
  yield result
102
100
 
103
- async def search(
104
- self, request: VectorSearchQueryRequest
105
- ) -> Sequence[VectorSearchResult]:
101
+ async def search(self, request: SearchRequest) -> Sequence[SearchResult]:
106
102
  """Search documents using domain business rules.
107
103
 
108
104
  Args:
@@ -124,8 +120,8 @@ class EmbeddingDomainService:
124
120
 
125
121
  # Domain logic: normalize query
126
122
  normalized_query = request.query.strip()
127
- normalized_request = VectorSearchQueryRequest(
128
- query=normalized_query, top_k=request.top_k
123
+ normalized_request = SearchRequest(
124
+ query=normalized_query, top_k=request.top_k, snippet_ids=request.snippet_ids
129
125
  )
130
126
 
131
127
  return await self.vector_search_repository.search(normalized_request)