kodit 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (70) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/factories/code_indexing_factory.py +56 -29
  3. kodit/application/services/code_indexing_application_service.py +152 -118
  4. kodit/cli.py +14 -41
  5. kodit/domain/entities.py +268 -197
  6. kodit/domain/protocols.py +61 -0
  7. kodit/domain/services/embedding_service.py +1 -1
  8. kodit/domain/services/index_query_service.py +66 -0
  9. kodit/domain/services/index_service.py +282 -0
  10. kodit/domain/value_objects.py +143 -65
  11. kodit/infrastructure/cloning/git/working_copy.py +17 -8
  12. kodit/infrastructure/cloning/metadata.py +37 -67
  13. kodit/infrastructure/embedding/embedding_factory.py +1 -1
  14. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  15. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +1 -1
  16. kodit/infrastructure/enrichment/null_enrichment_provider.py +4 -10
  17. kodit/infrastructure/git/git_utils.py +1 -63
  18. kodit/infrastructure/ignore/ignore_pattern_provider.py +1 -2
  19. kodit/infrastructure/indexing/auto_indexing_service.py +2 -12
  20. kodit/infrastructure/indexing/fusion_service.py +1 -1
  21. kodit/infrastructure/mappers/__init__.py +1 -0
  22. kodit/infrastructure/mappers/index_mapper.py +344 -0
  23. kodit/infrastructure/slicing/__init__.py +1 -0
  24. kodit/infrastructure/slicing/language_detection_service.py +18 -0
  25. kodit/infrastructure/slicing/slicer.py +894 -0
  26. kodit/infrastructure/sqlalchemy/embedding_repository.py +1 -1
  27. kodit/infrastructure/sqlalchemy/entities.py +203 -0
  28. kodit/infrastructure/sqlalchemy/index_repository.py +579 -0
  29. kodit/mcp.py +0 -7
  30. kodit/migrations/env.py +1 -1
  31. kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +36 -0
  32. kodit/migrations/versions/4552eb3f23ce_add_summary.py +4 -4
  33. kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py +24 -16
  34. kodit/migrations/versions/85155663351e_initial.py +64 -48
  35. kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py +20 -14
  36. kodit/utils/__init__.py +1 -0
  37. kodit/utils/path_utils.py +54 -0
  38. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/METADATA +9 -4
  39. kodit-0.3.4.dist-info/RECORD +89 -0
  40. kodit/domain/enums.py +0 -9
  41. kodit/domain/repositories.py +0 -128
  42. kodit/domain/services/ignore_service.py +0 -45
  43. kodit/domain/services/indexing_service.py +0 -204
  44. kodit/domain/services/snippet_extraction_service.py +0 -89
  45. kodit/domain/services/snippet_service.py +0 -215
  46. kodit/domain/services/source_service.py +0 -85
  47. kodit/infrastructure/cloning/folder/__init__.py +0 -1
  48. kodit/infrastructure/cloning/folder/factory.py +0 -128
  49. kodit/infrastructure/cloning/folder/working_copy.py +0 -38
  50. kodit/infrastructure/cloning/git/factory.py +0 -153
  51. kodit/infrastructure/indexing/index_repository.py +0 -286
  52. kodit/infrastructure/indexing/snippet_domain_service_factory.py +0 -37
  53. kodit/infrastructure/snippet_extraction/__init__.py +0 -1
  54. kodit/infrastructure/snippet_extraction/language_detection_service.py +0 -39
  55. kodit/infrastructure/snippet_extraction/languages/csharp.scm +0 -12
  56. kodit/infrastructure/snippet_extraction/languages/go.scm +0 -26
  57. kodit/infrastructure/snippet_extraction/languages/java.scm +0 -12
  58. kodit/infrastructure/snippet_extraction/languages/javascript.scm +0 -24
  59. kodit/infrastructure/snippet_extraction/languages/python.scm +0 -22
  60. kodit/infrastructure/snippet_extraction/languages/typescript.scm +0 -25
  61. kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +0 -67
  62. kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -45
  63. kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +0 -182
  64. kodit/infrastructure/sqlalchemy/file_repository.py +0 -78
  65. kodit/infrastructure/sqlalchemy/repository.py +0 -133
  66. kodit/infrastructure/sqlalchemy/snippet_repository.py +0 -259
  67. kodit-0.3.2.dist-info/RECORD +0 -103
  68. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/WHEEL +0 -0
  69. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/entry_points.txt +0 -0
  70. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/licenses/LICENSE +0 -0
kodit/cli.py CHANGED
@@ -19,12 +19,14 @@ from kodit.config import (
19
19
  with_session,
20
20
  )
21
21
  from kodit.domain.errors import EmptySourceError
22
- from kodit.domain.services.source_service import SourceService
22
+ from kodit.domain.services.index_query_service import IndexQueryService
23
23
  from kodit.domain.value_objects import (
24
24
  MultiSearchRequest,
25
25
  MultiSearchResult,
26
26
  SnippetSearchFilters,
27
27
  )
28
+ from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
29
+ from kodit.infrastructure.sqlalchemy.index_repository import SqlAlchemyIndexRepository
28
30
  from kodit.infrastructure.ui.progress import (
29
31
  create_lazy_progress_callback,
30
32
  create_multi_stage_progress_callback,
@@ -77,30 +79,28 @@ async def index(
77
79
  ) -> None:
78
80
  """List indexes, or index data sources."""
79
81
  log = structlog.get_logger(__name__)
80
- source_service = SourceService(
81
- clone_dir=app_context.get_clone_dir(),
82
- session_factory=lambda: session,
83
- )
84
82
  service = create_code_indexing_application_service(
85
83
  app_context=app_context,
86
84
  session=session,
87
- source_service=source_service,
85
+ )
86
+ index_query_service = IndexQueryService(
87
+ index_repository=SqlAlchemyIndexRepository(session=session),
88
+ fusion_service=ReciprocalRankFusionService(),
88
89
  )
89
90
 
90
91
  if auto_index:
91
92
  log.info("Auto-indexing configuration", config=app_context.auto_indexing)
92
- auto_sources = app_context.auto_indexing.sources
93
- if not auto_sources:
93
+ if not app_context.auto_indexing or not app_context.auto_indexing.sources:
94
94
  click.echo("No auto-index sources configured.")
95
95
  return
96
-
96
+ auto_sources = app_context.auto_indexing.sources
97
97
  click.echo(f"Auto-indexing {len(auto_sources)} configured sources...")
98
98
  sources = [source.uri for source in auto_sources]
99
99
 
100
100
  if not sources:
101
101
  log_event("kodit.cli.index.list")
102
102
  # No source specified, list all indexes
103
- indexes = await service.list_indexes()
103
+ indexes = await index_query_service.list_indexes()
104
104
  headers: list[str | Cell] = [
105
105
  "ID",
106
106
  "Created At",
@@ -113,8 +113,8 @@ async def index(
113
113
  index.id,
114
114
  index.created_at,
115
115
  index.updated_at,
116
- index.source,
117
- index.num_snippets,
116
+ index.source.working_copy.remote_uri,
117
+ len(index.source.working_copy.files),
118
118
  ]
119
119
  for index in indexes
120
120
  ]
@@ -131,14 +131,12 @@ async def index(
131
131
 
132
132
  # Create a lazy progress callback that only shows progress when needed
133
133
  progress_callback = create_lazy_progress_callback()
134
- s = await source_service.create(source, progress_callback)
135
-
136
- index = await service.create_index(s.id)
134
+ index = await service.create_index_from_uri(source, progress_callback)
137
135
 
138
136
  # Create a new progress callback for the indexing operations
139
137
  indexing_progress_callback = create_multi_stage_progress_callback()
140
138
  try:
141
- await service.run_index(index.id, indexing_progress_callback)
139
+ await service.run_index(index, indexing_progress_callback)
142
140
  except EmptySourceError as e:
143
141
  log.exception("Empty source error", error=e)
144
142
  msg = f"""{e}. This could mean:
@@ -243,14 +241,9 @@ async def code( # noqa: PLR0913
243
241
  This works best if your query is code.
244
242
  """
245
243
  log_event("kodit.cli.search.code")
246
- source_service = SourceService(
247
- clone_dir=app_context.get_clone_dir(),
248
- session_factory=lambda: session,
249
- )
250
244
  service = create_code_indexing_application_service(
251
245
  app_context=app_context,
252
246
  session=session,
253
- source_service=source_service,
254
247
  )
255
248
 
256
249
  filters = _parse_filters(
@@ -304,14 +297,9 @@ async def keyword( # noqa: PLR0913
304
297
  ) -> None:
305
298
  """Search for snippets using keyword search."""
306
299
  log_event("kodit.cli.search.keyword")
307
- source_service = SourceService(
308
- clone_dir=app_context.get_clone_dir(),
309
- session_factory=lambda: session,
310
- )
311
300
  service = create_code_indexing_application_service(
312
301
  app_context=app_context,
313
302
  session=session,
314
- source_service=source_service,
315
303
  )
316
304
 
317
305
  filters = _parse_filters(
@@ -368,14 +356,9 @@ async def text( # noqa: PLR0913
368
356
  This works best if your query is text.
369
357
  """
370
358
  log_event("kodit.cli.search.text")
371
- source_service = SourceService(
372
- clone_dir=app_context.get_clone_dir(),
373
- session_factory=lambda: session,
374
- )
375
359
  service = create_code_indexing_application_service(
376
360
  app_context=app_context,
377
361
  session=session,
378
- source_service=source_service,
379
362
  )
380
363
 
381
364
  filters = _parse_filters(
@@ -433,14 +416,9 @@ async def hybrid( # noqa: PLR0913
433
416
  ) -> None:
434
417
  """Search for snippets using hybrid search."""
435
418
  log_event("kodit.cli.search.hybrid")
436
- source_service = SourceService(
437
- clone_dir=app_context.get_clone_dir(),
438
- session_factory=lambda: session,
439
- )
440
419
  service = create_code_indexing_application_service(
441
420
  app_context=app_context,
442
421
  session=session,
443
- source_service=source_service,
444
422
  )
445
423
 
446
424
  # Parse keywords into a list of strings
@@ -490,14 +468,9 @@ async def snippets(
490
468
  ) -> None:
491
469
  """Show snippets with optional filtering by path or source."""
492
470
  log_event("kodit.cli.show.snippets")
493
- source_service = SourceService(
494
- clone_dir=app_context.get_clone_dir(),
495
- session_factory=lambda: session,
496
- )
497
471
  service = create_code_indexing_application_service(
498
472
  app_context=app_context,
499
473
  session=session,
500
- source_service=source_service,
501
474
  )
502
475
  snippets = await service.list_snippets(file_path=by_path, source_uri=by_source)
503
476
  if output_format == "text":
kodit/domain/entities.py CHANGED
@@ -1,200 +1,271 @@
1
- """SQLAlchemy entities."""
2
-
3
- from datetime import UTC, datetime
4
- from enum import Enum
5
-
6
- from git import Actor
7
- from sqlalchemy import (
8
- DateTime,
9
- ForeignKey,
10
- Integer,
11
- String,
12
- UnicodeText,
13
- UniqueConstraint,
14
- )
15
- from sqlalchemy import Enum as SQLAlchemyEnum
16
- from sqlalchemy.ext.asyncio import AsyncAttrs
17
- from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
18
- from sqlalchemy.types import JSON
19
-
20
-
21
- class Base(AsyncAttrs, DeclarativeBase):
22
- """Base class for all models."""
23
-
24
-
25
- class CommonMixin:
26
- """Common mixin for all models."""
27
-
28
- id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
29
- created_at: Mapped[datetime] = mapped_column(
30
- DateTime(timezone=True), default=lambda: datetime.now(UTC)
31
- )
32
- updated_at: Mapped[datetime] = mapped_column(
33
- DateTime(timezone=True),
34
- default=lambda: datetime.now(UTC),
35
- onupdate=lambda: datetime.now(UTC),
36
- )
37
-
38
-
39
- class SourceType(Enum):
40
- """The type of source."""
41
-
42
- UNKNOWN = 0
43
- FOLDER = 1
44
- GIT = 2
45
-
46
-
47
- class Source(Base, CommonMixin):
48
- """Base model for tracking code sources.
49
-
50
- This model serves as the parent table for different types of sources.
51
- It provides common fields and relationships for all source types.
52
-
53
- Attributes:
54
- id: The unique identifier for the source.
55
- created_at: Timestamp when the source was created.
56
- updated_at: Timestamp when the source was last updated.
57
- cloned_uri: A URI to a copy of the source on the local filesystem.
58
- uri: The URI of the source.
59
-
60
- """
61
-
62
- __tablename__ = "sources"
63
- uri: Mapped[str] = mapped_column(String(1024), index=True, unique=True)
64
- cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
65
- type: Mapped[SourceType] = mapped_column(
66
- SQLAlchemyEnum(SourceType), default=SourceType.UNKNOWN, index=True
67
- )
68
-
69
- def __init__(self, uri: str, cloned_path: str, source_type: SourceType) -> None:
70
- """Initialize a new Source instance for typing purposes."""
71
- super().__init__()
72
- self.uri = uri
73
- self.cloned_path = cloned_path
74
- self.type = source_type
75
-
76
-
77
- class Author(Base, CommonMixin):
78
- """Author model."""
79
-
80
- __tablename__ = "authors"
81
-
82
- __table_args__ = (UniqueConstraint("name", "email", name="uix_author"),)
83
-
84
- name: Mapped[str] = mapped_column(String(255), index=True)
85
- email: Mapped[str] = mapped_column(String(255), index=True)
86
-
87
- @staticmethod
88
- def from_actor(actor: Actor) -> "Author":
89
- """Create an Author from an Actor."""
90
- return Author(name=actor.name, email=actor.email)
91
-
1
+ """Pure domain entities using Pydantic."""
92
2
 
93
- class AuthorFileMapping(Base, CommonMixin):
94
- """Author file mapping model."""
3
+ from dataclasses import dataclass
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from typing import Protocol
7
+ from urllib.parse import urlparse, urlunparse
95
8
 
96
- __tablename__ = "author_file_mappings"
9
+ from pydantic import AnyUrl, BaseModel
97
10
 
98
- __table_args__ = (
99
- UniqueConstraint("author_id", "file_id", name="uix_author_file_mapping"),
100
- )
101
-
102
- author_id: Mapped[int] = mapped_column(ForeignKey("authors.id"), index=True)
103
- file_id: Mapped[int] = mapped_column(ForeignKey("files.id"), index=True)
104
-
105
-
106
- class File(Base, CommonMixin):
107
- """File model."""
108
-
109
- __tablename__ = "files"
110
-
111
- source_id: Mapped[int] = mapped_column(ForeignKey("sources.id"))
112
- mime_type: Mapped[str] = mapped_column(String(255), default="", index=True)
113
- uri: Mapped[str] = mapped_column(String(1024), default="", index=True)
114
- cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
115
- sha256: Mapped[str] = mapped_column(String(64), default="", index=True)
116
- size_bytes: Mapped[int] = mapped_column(Integer, default=0)
117
- extension: Mapped[str] = mapped_column(String(255), default="", index=True)
118
-
119
- def __init__( # noqa: PLR0913
120
- self,
121
- created_at: datetime,
122
- updated_at: datetime,
123
- source_id: int,
124
- mime_type: str,
125
- uri: str,
126
- cloned_path: str,
127
- sha256: str,
128
- size_bytes: int,
129
- extension: str,
130
- ) -> None:
131
- """Initialize a new File instance for typing purposes."""
132
- super().__init__()
133
- self.created_at = created_at
134
- self.updated_at = updated_at
135
- self.source_id = source_id
136
- self.mime_type = mime_type
137
- self.uri = uri
138
- self.cloned_path = cloned_path
139
- self.sha256 = sha256
140
- self.size_bytes = size_bytes
141
- self.extension = extension
142
-
143
-
144
- class EmbeddingType(Enum):
145
- """Embedding type."""
146
-
147
- CODE = 1
148
- TEXT = 2
149
-
150
-
151
- class Embedding(Base, CommonMixin):
152
- """Embedding model."""
153
-
154
- __tablename__ = "embeddings"
155
-
156
- snippet_id: Mapped[int] = mapped_column(ForeignKey("snippets.id"), index=True)
157
- type: Mapped[EmbeddingType] = mapped_column(
158
- SQLAlchemyEnum(EmbeddingType), index=True
159
- )
160
- embedding: Mapped[list[float]] = mapped_column(JSON)
161
-
162
-
163
- class Index(Base, CommonMixin):
164
- """Index model."""
165
-
166
- __tablename__ = "indexes"
167
-
168
- source_id: Mapped[int] = mapped_column(
169
- ForeignKey("sources.id"), unique=True, index=True
170
- )
171
-
172
- def __init__(self, source_id: int) -> None:
173
- """Initialize the index."""
174
- super().__init__()
175
- self.source_id = source_id
176
-
177
-
178
- class Snippet(Base, CommonMixin):
179
- """Snippet model."""
180
-
181
- __tablename__ = "snippets"
182
-
183
- file_id: Mapped[int] = mapped_column(ForeignKey("files.id"), index=True)
184
- index_id: Mapped[int] = mapped_column(ForeignKey("indexes.id"), index=True)
185
- content: Mapped[str] = mapped_column(UnicodeText, default="")
186
- summary: Mapped[str] = mapped_column(UnicodeText, default="")
187
-
188
- def __init__(
189
- self,
190
- file_id: int,
191
- index_id: int,
192
- content: str,
193
- summary: str = "",
194
- ) -> None:
195
- """Initialize the snippet."""
196
- super().__init__()
197
- self.file_id = file_id
198
- self.index_id = index_id
199
- self.content = content
200
- self.summary = summary
11
+ from kodit.domain.value_objects import (
12
+ FileProcessingStatus,
13
+ SnippetContent,
14
+ SnippetContentType,
15
+ SourceType,
16
+ )
17
+ from kodit.utils.path_utils import path_from_uri
18
+
19
+
20
+ class IgnorePatternProvider(Protocol):
21
+ """Protocol for ignore pattern providers."""
22
+
23
+ def should_ignore(self, path: Path) -> bool:
24
+ """Check if a path should be ignored."""
25
+ ...
26
+
27
+
28
+ class Author(BaseModel):
29
+ """Author domain entity."""
30
+
31
+ id: int | None = None
32
+ name: str
33
+ email: str
34
+
35
+
36
+ class File(BaseModel):
37
+ """File domain entity."""
38
+
39
+ id: int | None = None # Is populated by repository
40
+ created_at: datetime | None = None # Is populated by repository
41
+ updated_at: datetime | None = None # Is populated by repository
42
+ uri: AnyUrl
43
+ sha256: str
44
+ authors: list[Author]
45
+ mime_type: str
46
+ file_processing_status: FileProcessingStatus
47
+
48
+ def as_path(self) -> Path:
49
+ """Return the file as a path."""
50
+ return path_from_uri(str(self.uri))
51
+
52
+ def extension(self) -> str:
53
+ """Return the file extension."""
54
+ return Path(self.as_path()).suffix.lstrip(".")
55
+
56
+
57
+ class WorkingCopy(BaseModel):
58
+ """Working copy value object representing cloned source location."""
59
+
60
+ created_at: datetime | None = None # Is populated by repository
61
+ updated_at: datetime | None = None # Is populated by repository
62
+ remote_uri: AnyUrl
63
+ cloned_path: Path
64
+ source_type: SourceType
65
+ files: list[File]
66
+
67
+ @classmethod
68
+ def sanitize_local_path(cls, path: str) -> AnyUrl:
69
+ """Sanitize a local path."""
70
+ return AnyUrl(Path(path).resolve().absolute().as_uri())
71
+
72
+ @classmethod
73
+ def sanitize_git_url(cls, url: str) -> AnyUrl:
74
+ """Remove credentials from a git URL while preserving the rest of the URL.
75
+
76
+ This function handles various git URL formats:
77
+ - HTTPS URLs with username:password@host
78
+ - HTTPS URLs with username@host (no password)
79
+ - SSH URLs (left unchanged)
80
+ - File URLs (left unchanged)
81
+
82
+ Args:
83
+ url: The git URL that may contain credentials.
84
+
85
+ Returns:
86
+ The sanitized URL with credentials removed.
87
+
88
+ Examples:
89
+ >>> sanitize_git_url("https://phil:token@dev.azure.com/org/project/_git/repo")
90
+ "https://dev.azure.com/org/project/_git/repo"
91
+ >>> sanitize_git_url("https://username@github.com/user/repo.git")
92
+ "https://github.com/user/repo.git"
93
+ >>> sanitize_git_url("git@github.com:user/repo.git")
94
+ "ssh://git@github.com/user/repo.git"
95
+
96
+ """
97
+ # Handle SSH URLs (they don't have credentials in the URL format)
98
+ if url.startswith("git@"):
99
+ # Convert git@host:path to ssh://git@host/path format for AnyUrl
100
+ # This maintains the same semantic meaning while making it a valid URL
101
+ if ":" in url and not url.startswith("ssh://"):
102
+ host_path = url[4:] # Remove "git@"
103
+ if ":" in host_path:
104
+ host, path = host_path.split(":", 1)
105
+ ssh_url = f"ssh://git@{host}/{path}"
106
+ return AnyUrl(ssh_url)
107
+ return AnyUrl(url)
108
+ if url.startswith("ssh://"):
109
+ return AnyUrl(url)
110
+
111
+ # Handle file URLs
112
+ if url.startswith("file://"):
113
+ return AnyUrl(url)
114
+
115
+ try:
116
+ # Parse the URL
117
+ parsed = urlparse(url)
118
+
119
+ # If there are no credentials, return the URL as-is
120
+ if not parsed.username:
121
+ return AnyUrl(url)
122
+
123
+ # Reconstruct the URL without credentials
124
+ # scheme, netloc (without username/password), path, params, query, fragment
125
+ sanitized_netloc = parsed.hostname
126
+ if parsed.port:
127
+ sanitized_netloc = f"{parsed.hostname}:{parsed.port}"
128
+
129
+ return AnyUrl(
130
+ urlunparse(
131
+ (
132
+ parsed.scheme,
133
+ sanitized_netloc,
134
+ parsed.path,
135
+ parsed.params,
136
+ parsed.query,
137
+ parsed.fragment,
138
+ )
139
+ )
140
+ )
141
+
142
+ except Exception as e:
143
+ raise ValueError(f"Invalid URL: {url}") from e
144
+
145
+ def modified_or_deleted_files(self) -> list[File]:
146
+ """Return the modified or deleted files."""
147
+ return [
148
+ file
149
+ for file in self.files
150
+ if file.file_processing_status
151
+ in (FileProcessingStatus.MODIFIED, FileProcessingStatus.DELETED)
152
+ ]
153
+
154
+ def list_filesystem_paths(
155
+ self, ignore_provider: IgnorePatternProvider
156
+ ) -> list[Path]:
157
+ """List the filesystem paths of the files in the working copy."""
158
+ if not self.cloned_path.exists():
159
+ raise ValueError(f"Cloned path does not exist: {self.cloned_path}")
160
+
161
+ return [
162
+ f
163
+ for f in self.cloned_path.rglob("*")
164
+ if f.is_file() and not ignore_provider.should_ignore(f)
165
+ ]
166
+
167
+ def dirty_files(self) -> list[File]:
168
+ """Return the dirty files."""
169
+ return [
170
+ file
171
+ for file in self.files
172
+ if file.file_processing_status
173
+ in (FileProcessingStatus.MODIFIED, FileProcessingStatus.ADDED)
174
+ ]
175
+
176
+ def changed_files(self) -> list[File]:
177
+ """Return the changed files."""
178
+ return [
179
+ file
180
+ for file in self.files
181
+ if file.file_processing_status != FileProcessingStatus.CLEAN
182
+ ]
183
+
184
+ def clear_file_processing_statuses(self) -> None:
185
+ """Clear the file processing statuses."""
186
+ # First remove any files that are marked for deletion
187
+ self.files = [
188
+ file
189
+ for file in self.files
190
+ if file.file_processing_status != FileProcessingStatus.DELETED
191
+ ]
192
+ # Then clear the statuses for the remaining files
193
+ for file in self.files:
194
+ file.file_processing_status = FileProcessingStatus.CLEAN
195
+
196
+
197
+ class Source(BaseModel):
198
+ """Source domain entity."""
199
+
200
+ id: int | None = None # Is populated by repository
201
+ created_at: datetime | None = None # Is populated by repository
202
+ updated_at: datetime | None = None # Is populated by repository
203
+ working_copy: WorkingCopy
204
+
205
+
206
+ class Snippet(BaseModel):
207
+ """Snippet domain entity."""
208
+
209
+ id: int | None = None # Is populated by repository
210
+ created_at: datetime | None = None # Is populated by repository
211
+ updated_at: datetime | None = None # Is populated by repository
212
+ derives_from: list[File]
213
+ original_content: SnippetContent | None = None
214
+ summary_content: SnippetContent | None = None
215
+
216
+ def original_text(self) -> str:
217
+ """Return the original content of the snippet."""
218
+ if self.original_content is None:
219
+ return ""
220
+ return self.original_content.value
221
+
222
+ def summary_text(self) -> str:
223
+ """Return the summary content of the snippet."""
224
+ if self.summary_content is None:
225
+ return ""
226
+ return self.summary_content.value
227
+
228
+ def add_original_content(self, content: str, language: str) -> None:
229
+ """Add an original content to the snippet."""
230
+ self.original_content = SnippetContent(
231
+ type=SnippetContentType.ORIGINAL,
232
+ value=content,
233
+ language=language,
234
+ )
235
+
236
+ def add_summary(self, summary: str) -> None:
237
+ """Add a summary to the snippet."""
238
+ self.summary_content = SnippetContent(
239
+ type=SnippetContentType.SUMMARY,
240
+ value=summary,
241
+ language="markdown",
242
+ )
243
+
244
+
245
+ class Index(BaseModel):
246
+ """Index domain entity."""
247
+
248
+ id: int
249
+ created_at: datetime
250
+ updated_at: datetime
251
+ source: Source
252
+ snippets: list[Snippet]
253
+
254
+ def delete_snippets_for_files(self, files: list[File]) -> None:
255
+ """Delete the snippets that derive from a list of files."""
256
+ self.snippets = [
257
+ snippet
258
+ for snippet in self.snippets
259
+ if not any(file in snippet.derives_from for file in files)
260
+ ]
261
+
262
+
263
+ # FUTURE: Remove this type, use the domain to get the required information.
264
+ @dataclass(frozen=True)
265
+ class SnippetWithContext:
266
+ """Domain model for snippet with associated context information."""
267
+
268
+ source: Source
269
+ file: File
270
+ authors: list[Author]
271
+ snippet: Snippet
@@ -0,0 +1,61 @@
1
+ """Repository protocol interfaces for the domain layer."""
2
+
3
+ from collections.abc import Sequence
4
+ from typing import Protocol
5
+
6
+ from pydantic import AnyUrl
7
+
8
+ from kodit.domain.entities import Index, Snippet, SnippetWithContext, WorkingCopy
9
+ from kodit.domain.value_objects import MultiSearchRequest
10
+
11
+
12
+ class IndexRepository(Protocol):
13
+ """Repository interface for Index entities."""
14
+
15
+ async def create(self, uri: AnyUrl, working_copy: WorkingCopy) -> Index:
16
+ """Create an index for a source."""
17
+ ...
18
+
19
+ async def update(self, index: Index) -> None:
20
+ """Update an index."""
21
+ ...
22
+
23
+ async def get(self, index_id: int) -> Index | None:
24
+ """Get an index by ID."""
25
+ ...
26
+
27
+ async def all(self) -> list[Index]:
28
+ """List all indexes."""
29
+ ...
30
+
31
+ async def get_by_uri(self, uri: AnyUrl) -> Index | None:
32
+ """Get an index by source URI."""
33
+ ...
34
+
35
+ async def update_index_timestamp(self, index_id: int) -> None:
36
+ """Update the timestamp of an index."""
37
+ ...
38
+
39
+ async def add_snippets(self, index_id: int, snippets: list[Snippet]) -> None:
40
+ """Add snippets to an index."""
41
+ ...
42
+
43
+ async def update_snippets(self, index_id: int, snippets: list[Snippet]) -> None:
44
+ """Update snippets for an index."""
45
+ ...
46
+
47
+ async def delete_snippets(self, index_id: int) -> None:
48
+ """Delete all snippets from an index."""
49
+ ...
50
+
51
+ async def delete_snippets_by_file_ids(self, file_ids: list[int]) -> None:
52
+ """Delete snippets by file IDs."""
53
+ ...
54
+
55
+ async def search(self, request: MultiSearchRequest) -> Sequence[SnippetWithContext]:
56
+ """Search snippets with filters."""
57
+ ...
58
+
59
+ async def get_snippets_by_ids(self, ids: list[int]) -> list[SnippetWithContext]:
60
+ """Get snippets by their IDs."""
61
+ ...