kodit 0.2.8__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (37) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +36 -1
  3. kodit/application/factories/__init__.py +1 -0
  4. kodit/application/factories/code_indexing_factory.py +119 -0
  5. kodit/application/services/{indexing_application_service.py → code_indexing_application_service.py} +159 -198
  6. kodit/cli.py +214 -62
  7. kodit/config.py +40 -3
  8. kodit/domain/entities.py +7 -5
  9. kodit/domain/repositories.py +33 -0
  10. kodit/domain/services/bm25_service.py +14 -17
  11. kodit/domain/services/embedding_service.py +10 -14
  12. kodit/domain/services/snippet_service.py +198 -0
  13. kodit/domain/value_objects.py +301 -21
  14. kodit/infrastructure/bm25/local_bm25_repository.py +20 -12
  15. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +31 -11
  16. kodit/infrastructure/cloning/metadata.py +1 -0
  17. kodit/infrastructure/embedding/embedding_providers/hash_embedding_provider.py +14 -25
  18. kodit/infrastructure/embedding/local_vector_search_repository.py +26 -38
  19. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +50 -35
  20. kodit/infrastructure/enrichment/enrichment_factory.py +1 -1
  21. kodit/infrastructure/indexing/auto_indexing_service.py +84 -0
  22. kodit/infrastructure/indexing/indexing_factory.py +8 -91
  23. kodit/infrastructure/indexing/snippet_domain_service_factory.py +37 -0
  24. kodit/infrastructure/snippet_extraction/languages/java.scm +12 -0
  25. kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +3 -31
  26. kodit/infrastructure/sqlalchemy/embedding_repository.py +14 -3
  27. kodit/infrastructure/sqlalchemy/snippet_repository.py +174 -2
  28. kodit/mcp.py +61 -49
  29. {kodit-0.2.8.dist-info → kodit-0.3.0.dist-info}/METADATA +1 -1
  30. {kodit-0.2.8.dist-info → kodit-0.3.0.dist-info}/RECORD +33 -31
  31. kodit/application/commands/__init__.py +0 -1
  32. kodit/application/commands/snippet_commands.py +0 -22
  33. kodit/application/services/snippet_application_service.py +0 -149
  34. kodit/infrastructure/enrichment/legacy_enrichment_models.py +0 -42
  35. {kodit-0.2.8.dist-info → kodit-0.3.0.dist-info}/WHEEL +0 -0
  36. {kodit-0.2.8.dist-info → kodit-0.3.0.dist-info}/entry_points.txt +0 -0
  37. {kodit-0.2.8.dist-info → kodit-0.3.0.dist-info}/licenses/LICENSE +0 -0
kodit/cli.py CHANGED
@@ -1,6 +1,5 @@
1
1
  """Command line interface for kodit."""
2
2
 
3
- import asyncio
4
3
  import signal
5
4
  from pathlib import Path
6
5
  from typing import Any
@@ -11,8 +10,8 @@ import uvicorn
11
10
  from pytable_formatter import Cell, Table
12
11
  from sqlalchemy.ext.asyncio import AsyncSession
13
12
 
14
- from kodit.application.services.snippet_application_service import (
15
- SnippetApplicationService,
13
+ from kodit.application.factories.code_indexing_factory import (
14
+ create_code_indexing_application_service,
16
15
  )
17
16
  from kodit.config import (
18
17
  AppContext,
@@ -21,14 +20,7 @@ from kodit.config import (
21
20
  )
22
21
  from kodit.domain.errors import EmptySourceError
23
22
  from kodit.domain.services.source_service import SourceService
24
- from kodit.domain.value_objects import MultiSearchRequest
25
- from kodit.infrastructure.indexing.indexing_factory import (
26
- create_indexing_application_service,
27
- )
28
- from kodit.infrastructure.snippet_extraction.snippet_extraction_factory import (
29
- create_snippet_extraction_domain_service,
30
- create_snippet_repositories,
31
- )
23
+ from kodit.domain.value_objects import MultiSearchRequest, SnippetSearchFilters
32
24
  from kodit.infrastructure.ui.progress import (
33
25
  create_lazy_progress_callback,
34
26
  create_multi_stage_progress_callback,
@@ -36,33 +28,6 @@ from kodit.infrastructure.ui.progress import (
36
28
  from kodit.log import configure_logging, configure_telemetry, log_event
37
29
 
38
30
 
39
- def create_snippet_application_service(
40
- session: AsyncSession,
41
- ) -> SnippetApplicationService:
42
- """Create a snippet application service with all dependencies.
43
-
44
- Args:
45
- session: SQLAlchemy session
46
-
47
- Returns:
48
- Configured snippet application service
49
-
50
- """
51
- # Create domain service
52
- snippet_extraction_service = create_snippet_extraction_domain_service()
53
-
54
- # Create repositories
55
- snippet_repository, file_repository = create_snippet_repositories(session)
56
-
57
- # Create application service
58
- return SnippetApplicationService(
59
- snippet_extraction_service=snippet_extraction_service,
60
- snippet_repository=snippet_repository,
61
- file_repository=file_repository,
62
- session=session,
63
- )
64
-
65
-
66
31
  @click.group(context_settings={"max_content_width": 100})
67
32
  @click.option(
68
33
  "--env-file",
@@ -94,12 +59,17 @@ def cli(
94
59
 
95
60
  @cli.command()
96
61
  @click.argument("sources", nargs=-1)
62
+ @click.option(
63
+ "--auto-index", is_flag=True, help="Index all configured auto-index sources"
64
+ )
97
65
  @with_app_context
98
66
  @with_session
99
67
  async def index(
100
68
  session: AsyncSession,
101
69
  app_context: AppContext,
102
70
  sources: list[str],
71
+ *, # Force keyword-only arguments
72
+ auto_index: bool,
103
73
  ) -> None:
104
74
  """List indexes, or index data sources."""
105
75
  log = structlog.get_logger(__name__)
@@ -107,14 +77,22 @@ async def index(
107
77
  clone_dir=app_context.get_clone_dir(),
108
78
  session_factory=lambda: session,
109
79
  )
110
- snippet_service = create_snippet_application_service(session)
111
- service = create_indexing_application_service(
80
+ service = create_code_indexing_application_service(
112
81
  app_context=app_context,
113
82
  session=session,
114
83
  source_service=source_service,
115
- snippet_application_service=snippet_service,
116
84
  )
117
85
 
86
+ if auto_index:
87
+ log.info("Auto-indexing configuration", config=app_context.auto_indexing)
88
+ auto_sources = app_context.auto_indexing.sources
89
+ if not auto_sources:
90
+ click.echo("No auto-index sources configured.")
91
+ return
92
+
93
+ click.echo(f"Auto-indexing {len(auto_sources)} configured sources...")
94
+ sources = [source.uri for source in auto_sources]
95
+
118
96
  if not sources:
119
97
  log_event("kodit.cli.index.list")
120
98
  # No source specified, list all indexes
@@ -173,16 +151,86 @@ def search() -> None:
173
151
  """Search for snippets in the database."""
174
152
 
175
153
 
154
+ # Utility for robust filter parsing
155
+ def _parse_filters(
156
+ language: str | None,
157
+ author: str | None,
158
+ created_after: str | None,
159
+ created_before: str | None,
160
+ source_repo: str | None,
161
+ ) -> SnippetSearchFilters | None:
162
+ from datetime import datetime
163
+
164
+ # Normalize language to lowercase if provided
165
+ norm_language = language.lower() if language else None
166
+ # Try to parse dates, raise error if invalid
167
+ parsed_created_after = None
168
+ if created_after:
169
+ try:
170
+ parsed_created_after = datetime.fromisoformat(created_after)
171
+ except ValueError as err:
172
+ raise ValueError(
173
+ f"Invalid date format for --created-after: {created_after}. "
174
+ "Expected ISO 8601 format (YYYY-MM-DD)"
175
+ ) from err
176
+ parsed_created_before = None
177
+ if created_before:
178
+ try:
179
+ parsed_created_before = datetime.fromisoformat(created_before)
180
+ except ValueError as err:
181
+ raise ValueError(
182
+ f"Invalid date format for --created-before: {created_before}. "
183
+ "Expected ISO 8601 format (YYYY-MM-DD)"
184
+ ) from err
185
+ # Return None if no filters provided, otherwise return SnippetSearchFilters
186
+ # Check if any original parameters were provided (not just the parsed values)
187
+ if any(
188
+ [
189
+ language,
190
+ author,
191
+ created_after,
192
+ created_before,
193
+ source_repo,
194
+ ]
195
+ ):
196
+ return SnippetSearchFilters(
197
+ language=norm_language,
198
+ author=author,
199
+ created_after=parsed_created_after,
200
+ created_before=parsed_created_before,
201
+ source_repo=source_repo,
202
+ )
203
+ return None
204
+
205
+
176
206
  @search.command()
177
207
  @click.argument("query")
178
208
  @click.option("--top-k", default=10, help="Number of snippets to retrieve")
209
+ @click.option(
210
+ "--language", help="Filter by programming language (e.g., python, go, javascript)"
211
+ )
212
+ @click.option("--author", help="Filter by author name")
213
+ @click.option(
214
+ "--created-after", help="Filter snippets created after this date (YYYY-MM-DD)"
215
+ )
216
+ @click.option(
217
+ "--created-before", help="Filter snippets created before this date (YYYY-MM-DD)"
218
+ )
219
+ @click.option(
220
+ "--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
221
+ )
179
222
  @with_app_context
180
223
  @with_session
181
- async def code(
224
+ async def code( # noqa: PLR0913
182
225
  session: AsyncSession,
183
226
  app_context: AppContext,
184
227
  query: str,
185
228
  top_k: int,
229
+ language: str | None,
230
+ author: str | None,
231
+ created_after: str | None,
232
+ created_before: str | None,
233
+ source_repo: str | None,
186
234
  ) -> None:
187
235
  """Search for snippets using semantic code search.
188
236
 
@@ -193,15 +241,19 @@ async def code(
193
241
  clone_dir=app_context.get_clone_dir(),
194
242
  session_factory=lambda: session,
195
243
  )
196
- snippet_service = create_snippet_application_service(session)
197
- service = create_indexing_application_service(
244
+ service = create_code_indexing_application_service(
198
245
  app_context=app_context,
199
246
  session=session,
200
247
  source_service=source_service,
201
- snippet_application_service=snippet_service,
202
248
  )
203
249
 
204
- snippets = await service.search(MultiSearchRequest(code_query=query, top_k=top_k))
250
+ filters = _parse_filters(
251
+ language, author, created_after, created_before, source_repo
252
+ )
253
+
254
+ snippets = await service.search(
255
+ MultiSearchRequest(code_query=query, top_k=top_k, filters=filters)
256
+ )
205
257
 
206
258
  if len(snippets) == 0:
207
259
  click.echo("No snippets found")
@@ -219,13 +271,31 @@ async def code(
219
271
  @search.command()
220
272
  @click.argument("keywords", nargs=-1)
221
273
  @click.option("--top-k", default=10, help="Number of snippets to retrieve")
274
+ @click.option(
275
+ "--language", help="Filter by programming language (e.g., python, go, javascript)"
276
+ )
277
+ @click.option("--author", help="Filter by author name")
278
+ @click.option(
279
+ "--created-after", help="Filter snippets created after this date (YYYY-MM-DD)"
280
+ )
281
+ @click.option(
282
+ "--created-before", help="Filter snippets created before this date (YYYY-MM-DD)"
283
+ )
284
+ @click.option(
285
+ "--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
286
+ )
222
287
  @with_app_context
223
288
  @with_session
224
- async def keyword(
289
+ async def keyword( # noqa: PLR0913
225
290
  session: AsyncSession,
226
291
  app_context: AppContext,
227
292
  keywords: list[str],
228
293
  top_k: int,
294
+ language: str | None,
295
+ author: str | None,
296
+ created_after: str | None,
297
+ created_before: str | None,
298
+ source_repo: str | None,
229
299
  ) -> None:
230
300
  """Search for snippets using keyword search."""
231
301
  log_event("kodit.cli.search.keyword")
@@ -233,15 +303,19 @@ async def keyword(
233
303
  clone_dir=app_context.get_clone_dir(),
234
304
  session_factory=lambda: session,
235
305
  )
236
- snippet_service = create_snippet_application_service(session)
237
- service = create_indexing_application_service(
306
+ service = create_code_indexing_application_service(
238
307
  app_context=app_context,
239
308
  session=session,
240
309
  source_service=source_service,
241
- snippet_application_service=snippet_service,
242
310
  )
243
311
 
244
- snippets = await service.search(MultiSearchRequest(keywords=keywords, top_k=top_k))
312
+ filters = _parse_filters(
313
+ language, author, created_after, created_before, source_repo
314
+ )
315
+
316
+ snippets = await service.search(
317
+ MultiSearchRequest(keywords=keywords, top_k=top_k, filters=filters)
318
+ )
245
319
 
246
320
  if len(snippets) == 0:
247
321
  click.echo("No snippets found")
@@ -259,13 +333,31 @@ async def keyword(
259
333
  @search.command()
260
334
  @click.argument("query")
261
335
  @click.option("--top-k", default=10, help="Number of snippets to retrieve")
336
+ @click.option(
337
+ "--language", help="Filter by programming language (e.g., python, go, javascript)"
338
+ )
339
+ @click.option("--author", help="Filter by author name")
340
+ @click.option(
341
+ "--created-after", help="Filter snippets created after this date (YYYY-MM-DD)"
342
+ )
343
+ @click.option(
344
+ "--created-before", help="Filter snippets created before this date (YYYY-MM-DD)"
345
+ )
346
+ @click.option(
347
+ "--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
348
+ )
262
349
  @with_app_context
263
350
  @with_session
264
- async def text(
351
+ async def text( # noqa: PLR0913
265
352
  session: AsyncSession,
266
353
  app_context: AppContext,
267
354
  query: str,
268
355
  top_k: int,
356
+ language: str | None,
357
+ author: str | None,
358
+ created_after: str | None,
359
+ created_before: str | None,
360
+ source_repo: str | None,
269
361
  ) -> None:
270
362
  """Search for snippets using semantic text search.
271
363
 
@@ -276,15 +368,19 @@ async def text(
276
368
  clone_dir=app_context.get_clone_dir(),
277
369
  session_factory=lambda: session,
278
370
  )
279
- snippet_service = create_snippet_application_service(session)
280
- service = create_indexing_application_service(
371
+ service = create_code_indexing_application_service(
281
372
  app_context=app_context,
282
373
  session=session,
283
374
  source_service=source_service,
284
- snippet_application_service=snippet_service,
285
375
  )
286
376
 
287
- snippets = await service.search(MultiSearchRequest(text_query=query, top_k=top_k))
377
+ filters = _parse_filters(
378
+ language, author, created_after, created_before, source_repo
379
+ )
380
+
381
+ snippets = await service.search(
382
+ MultiSearchRequest(text_query=query, top_k=top_k, filters=filters)
383
+ )
288
384
 
289
385
  if len(snippets) == 0:
290
386
  click.echo("No snippets found")
@@ -304,6 +400,19 @@ async def text(
304
400
  @click.option("--keywords", required=True, help="Comma separated list of keywords")
305
401
  @click.option("--code", required=True, help="Semantic code search query")
306
402
  @click.option("--text", required=True, help="Semantic text search query")
403
+ @click.option(
404
+ "--language", help="Filter by programming language (e.g., python, go, javascript)"
405
+ )
406
+ @click.option("--author", help="Filter by author name")
407
+ @click.option(
408
+ "--created-after", help="Filter snippets created after this date (YYYY-MM-DD)"
409
+ )
410
+ @click.option(
411
+ "--created-before", help="Filter snippets created before this date (YYYY-MM-DD)"
412
+ )
413
+ @click.option(
414
+ "--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
415
+ )
307
416
  @with_app_context
308
417
  @with_session
309
418
  async def hybrid( # noqa: PLR0913
@@ -313,6 +422,11 @@ async def hybrid( # noqa: PLR0913
313
422
  keywords: str,
314
423
  code: str,
315
424
  text: str,
425
+ language: str | None,
426
+ author: str | None,
427
+ created_after: str | None,
428
+ created_before: str | None,
429
+ source_repo: str | None,
316
430
  ) -> None:
317
431
  """Search for snippets using hybrid search."""
318
432
  log_event("kodit.cli.search.hybrid")
@@ -320,23 +434,26 @@ async def hybrid( # noqa: PLR0913
320
434
  clone_dir=app_context.get_clone_dir(),
321
435
  session_factory=lambda: session,
322
436
  )
323
- snippet_service = create_snippet_application_service(session)
324
- service = create_indexing_application_service(
437
+ service = create_code_indexing_application_service(
325
438
  app_context=app_context,
326
439
  session=session,
327
440
  source_service=source_service,
328
- snippet_application_service=snippet_service,
329
441
  )
330
442
 
331
443
  # Parse keywords into a list of strings
332
444
  keywords_list = [k.strip().lower() for k in keywords.split(",")]
333
445
 
446
+ filters = _parse_filters(
447
+ language, author, created_after, created_before, source_repo
448
+ )
449
+
334
450
  snippets = await service.search(
335
451
  MultiSearchRequest(
336
452
  keywords=keywords_list,
337
453
  code_query=code,
338
454
  text_query=text,
339
455
  top_k=top_k,
456
+ filters=filters,
340
457
  )
341
458
  )
342
459
 
@@ -353,6 +470,40 @@ async def hybrid( # noqa: PLR0913
353
470
  click.echo()
354
471
 
355
472
 
473
+ @cli.group()
474
+ def show() -> None:
475
+ """Show information about elements in the database."""
476
+
477
+
478
+ @show.command()
479
+ @click.option("--by-path", help="File or directory path to search for snippets")
480
+ @click.option("--by-source", help="Source URI to filter snippets by")
481
+ @with_app_context
482
+ @with_session
483
+ async def snippets(
484
+ session: AsyncSession,
485
+ app_context: AppContext,
486
+ by_path: str | None,
487
+ by_source: str | None,
488
+ ) -> None:
489
+ """Show snippets with optional filtering by path or source."""
490
+ log_event("kodit.cli.show.snippets")
491
+ source_service = SourceService(
492
+ clone_dir=app_context.get_clone_dir(),
493
+ session_factory=lambda: session,
494
+ )
495
+ service = create_code_indexing_application_service(
496
+ app_context=app_context,
497
+ session=session,
498
+ source_service=source_service,
499
+ )
500
+ snippets = await service.list_snippets(file_path=by_path, source_uri=by_source)
501
+ for snippet in snippets:
502
+ click.echo(f"{snippet.id}: [{snippet.source_uri}] {snippet.file_path}")
503
+ click.echo(f" {snippet.content}")
504
+ click.echo()
505
+
506
+
356
507
  @cli.command()
357
508
  @click.option("--host", default="127.0.0.1", help="Host to bind the server to")
358
509
  @click.option("--port", default=8080, help="Port to bind the server to")
@@ -393,9 +544,10 @@ def version() -> None:
393
544
  from kodit import _version
394
545
  except ImportError:
395
546
  print("unknown, try running `uv build`, which is what happens in ci") # noqa: T201
396
- else:
397
- print(_version.version) # noqa: T201
547
+ return
548
+
549
+ print(f"kodit {_version.__version__}") # noqa: T201
398
550
 
399
551
 
400
552
  if __name__ == "__main__":
401
- asyncio.run(cli())
553
+ cli()
kodit/config.py CHANGED
@@ -8,7 +8,7 @@ from pathlib import Path
8
8
  from typing import TYPE_CHECKING, Any, Literal, TypeVar
9
9
 
10
10
  import click
11
- from pydantic import BaseModel, Field
11
+ from pydantic import BaseModel, Field, field_validator
12
12
  from pydantic_settings import BaseSettings, SettingsConfigDict
13
13
 
14
14
  if TYPE_CHECKING:
@@ -37,11 +37,45 @@ class Endpoint(BaseModel):
37
37
 
38
38
 
39
39
  class Search(BaseModel):
40
- """Search provides configuration for a search engine."""
40
+ """Search configuration."""
41
41
 
42
42
  provider: Literal["sqlite", "vectorchord"] = Field(default="sqlite")
43
43
 
44
44
 
45
+ class AutoIndexingSource(BaseModel):
46
+ """Configuration for a single auto-indexing source."""
47
+
48
+ uri: str = Field(description="URI of the source to index (git URL or local path)")
49
+
50
+
51
+ class AutoIndexingConfig(BaseModel):
52
+ """Configuration for auto-indexing."""
53
+
54
+ sources: list[AutoIndexingSource] = Field(
55
+ default_factory=list, description="List of sources to auto-index"
56
+ )
57
+
58
+ @field_validator("sources", mode="before")
59
+ @classmethod
60
+ def parse_sources(cls, v: Any) -> list[AutoIndexingSource]:
61
+ """Parse sources from environment variables or other formats."""
62
+ if v is None:
63
+ return []
64
+ if isinstance(v, list):
65
+ return v
66
+ if isinstance(v, dict):
67
+ # Handle case where env vars are numbered keys like {'0': {'uri': '...'}}
68
+ sources = []
69
+ i = 0
70
+ while str(i) in v:
71
+ source_data = v[str(i)]
72
+ if isinstance(source_data, dict) and "uri" in source_data:
73
+ sources.append(AutoIndexingSource(uri=source_data["uri"]))
74
+ i += 1
75
+ return sources
76
+ return v
77
+
78
+
45
79
  class AppContext(BaseSettings):
46
80
  """Global context for the kodit project. Provides a shared state for the app."""
47
81
 
@@ -50,7 +84,7 @@ class AppContext(BaseSettings):
50
84
  env_file_encoding="utf-8",
51
85
  env_nested_delimiter="_",
52
86
  nested_model_default_partial_update=True,
53
- env_nested_max_split=1,
87
+ extra="ignore",
54
88
  )
55
89
 
56
90
  data_dir: Path = Field(default=DEFAULT_BASE_DIR)
@@ -76,6 +110,9 @@ class AppContext(BaseSettings):
76
110
  default_search: Search = Field(
77
111
  default=Search(),
78
112
  )
113
+ auto_indexing: AutoIndexingConfig | None = Field(
114
+ default=AutoIndexingConfig(), description="Auto-indexing configuration"
115
+ )
79
116
  _db: Database | None = None
80
117
 
81
118
  def model_post_init(self, _: Any) -> None:
kodit/domain/entities.py CHANGED
@@ -121,22 +121,24 @@ class File(Base, CommonMixin):
121
121
  created_at: datetime,
122
122
  updated_at: datetime,
123
123
  source_id: int,
124
+ mime_type: str,
125
+ uri: str,
124
126
  cloned_path: str,
125
- mime_type: str = "",
126
- uri: str = "",
127
- sha256: str = "",
128
- size_bytes: int = 0,
127
+ sha256: str,
128
+ size_bytes: int,
129
+ extension: str,
129
130
  ) -> None:
130
131
  """Initialize a new File instance for typing purposes."""
131
132
  super().__init__()
132
133
  self.created_at = created_at
133
134
  self.updated_at = updated_at
134
135
  self.source_id = source_id
135
- self.cloned_path = cloned_path
136
136
  self.mime_type = mime_type
137
137
  self.uri = uri
138
+ self.cloned_path = cloned_path
138
139
  self.sha256 = sha256
139
140
  self.size_bytes = size_bytes
141
+ self.extension = extension
140
142
 
141
143
 
142
144
  class EmbeddingType(Enum):
@@ -11,6 +11,10 @@ from kodit.domain.entities import (
11
11
  Source,
12
12
  SourceType,
13
13
  )
14
+ from kodit.domain.value_objects import (
15
+ MultiSearchRequest,
16
+ SnippetListItem,
17
+ )
14
18
 
15
19
  T = TypeVar("T")
16
20
 
@@ -86,6 +90,35 @@ class SnippetRepository(GenericRepository[Snippet]):
86
90
  """Delete all snippets for an index."""
87
91
  raise NotImplementedError
88
92
 
93
+ async def list_snippets(
94
+ self, file_path: str | None = None, source_uri: str | None = None
95
+ ) -> Sequence[SnippetListItem]:
96
+ """List snippets with optional filtering by file path and source URI.
97
+
98
+ Args:
99
+ file_path: Optional file or directory path to filter by. Can be relative
100
+ (uri) or absolute (cloned_path).
101
+ source_uri: Optional source URI to filter by. If None, returns snippets from
102
+ all sources.
103
+
104
+ Returns:
105
+ A sequence of SnippetListItem instances matching the criteria
106
+
107
+ """
108
+ raise NotImplementedError
109
+
110
+ async def search(self, request: MultiSearchRequest) -> Sequence[SnippetListItem]:
111
+ """Search snippets with filters.
112
+
113
+ Args:
114
+ request: The search request containing queries and optional filters.
115
+
116
+ Returns:
117
+ A sequence of SnippetListItem instances matching the search criteria.
118
+
119
+ """
120
+ raise NotImplementedError
121
+
89
122
 
90
123
  class FileRepository(GenericRepository[File]):
91
124
  """File repository with specific methods."""
@@ -4,10 +4,10 @@ from abc import ABC, abstractmethod
4
4
  from collections.abc import Sequence
5
5
 
6
6
  from kodit.domain.value_objects import (
7
- BM25DeleteRequest,
8
- BM25IndexRequest,
9
- BM25SearchRequest,
10
- BM25SearchResult,
7
+ DeleteRequest,
8
+ IndexRequest,
9
+ SearchRequest,
10
+ SearchResult,
11
11
  )
12
12
 
13
13
 
@@ -15,15 +15,15 @@ class BM25Repository(ABC):
15
15
  """Abstract interface for BM25 repository."""
16
16
 
17
17
  @abstractmethod
18
- async def index_documents(self, request: BM25IndexRequest) -> None:
18
+ async def index_documents(self, request: IndexRequest) -> None:
19
19
  """Index documents for BM25 search."""
20
20
 
21
21
  @abstractmethod
22
- async def search(self, request: BM25SearchRequest) -> Sequence[BM25SearchResult]:
22
+ async def search(self, request: SearchRequest) -> Sequence[SearchResult]:
23
23
  """Search documents using BM25."""
24
24
 
25
25
  @abstractmethod
26
- async def delete_documents(self, request: BM25DeleteRequest) -> None:
26
+ async def delete_documents(self, request: DeleteRequest) -> None:
27
27
  """Delete documents from the BM25 index."""
28
28
 
29
29
 
@@ -39,7 +39,7 @@ class BM25DomainService:
39
39
  """
40
40
  self.repository = repository
41
41
 
42
- async def index_documents(self, request: BM25IndexRequest) -> None:
42
+ async def index_documents(self, request: IndexRequest) -> None:
43
43
  """Index documents using domain business rules.
44
44
 
45
45
  Args:
@@ -64,10 +64,10 @@ class BM25DomainService:
64
64
  raise ValueError("No valid documents to index")
65
65
 
66
66
  # Domain logic: create new request with validated documents
67
- validated_request = BM25IndexRequest(documents=valid_documents)
67
+ validated_request = IndexRequest(documents=valid_documents)
68
68
  await self.repository.index_documents(validated_request)
69
69
 
70
- async def search(self, request: BM25SearchRequest) -> Sequence[BM25SearchResult]:
70
+ async def search(self, request: SearchRequest) -> Sequence[SearchResult]:
71
71
  """Search documents using domain business rules.
72
72
 
73
73
  Args:
@@ -88,14 +88,11 @@ class BM25DomainService:
88
88
  raise ValueError("Top-k must be positive")
89
89
 
90
90
  # Domain logic: normalize query
91
- normalized_query = request.query.strip()
92
- normalized_request = BM25SearchRequest(
93
- query=normalized_query, top_k=request.top_k
94
- )
91
+ request.query = request.query.strip()
95
92
 
96
- return await self.repository.search(normalized_request)
93
+ return await self.repository.search(request)
97
94
 
98
- async def delete_documents(self, request: BM25DeleteRequest) -> None:
95
+ async def delete_documents(self, request: DeleteRequest) -> None:
99
96
  """Delete documents using domain business rules.
100
97
 
101
98
  Args:
@@ -120,5 +117,5 @@ class BM25DomainService:
120
117
  raise ValueError("No valid snippet IDs to delete")
121
118
 
122
119
  # Domain logic: create new request with validated IDs
123
- validated_request = BM25DeleteRequest(snippet_ids=valid_ids)
120
+ validated_request = DeleteRequest(snippet_ids=valid_ids)
124
121
  await self.repository.delete_documents(validated_request)