kodit 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (118) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/__init__.py +1 -0
  3. kodit/application/commands/__init__.py +1 -0
  4. kodit/application/commands/snippet_commands.py +22 -0
  5. kodit/application/services/__init__.py +1 -0
  6. kodit/application/services/indexing_application_service.py +363 -0
  7. kodit/application/services/snippet_application_service.py +143 -0
  8. kodit/cli.py +105 -82
  9. kodit/database.py +0 -22
  10. kodit/domain/__init__.py +1 -0
  11. kodit/{source/source_models.py → domain/entities.py} +88 -19
  12. kodit/domain/enums.py +9 -0
  13. kodit/domain/interfaces.py +27 -0
  14. kodit/domain/repositories.py +95 -0
  15. kodit/domain/services/__init__.py +1 -0
  16. kodit/domain/services/bm25_service.py +124 -0
  17. kodit/domain/services/embedding_service.py +155 -0
  18. kodit/domain/services/enrichment_service.py +48 -0
  19. kodit/domain/services/ignore_service.py +45 -0
  20. kodit/domain/services/indexing_service.py +203 -0
  21. kodit/domain/services/snippet_extraction_service.py +89 -0
  22. kodit/domain/services/source_service.py +83 -0
  23. kodit/domain/value_objects.py +215 -0
  24. kodit/infrastructure/__init__.py +1 -0
  25. kodit/infrastructure/bm25/__init__.py +1 -0
  26. kodit/infrastructure/bm25/bm25_factory.py +28 -0
  27. kodit/{bm25/local_bm25.py → infrastructure/bm25/local_bm25_repository.py} +33 -22
  28. kodit/{bm25/vectorchord_bm25.py → infrastructure/bm25/vectorchord_bm25_repository.py} +40 -35
  29. kodit/infrastructure/cloning/__init__.py +1 -0
  30. kodit/infrastructure/cloning/folder/__init__.py +1 -0
  31. kodit/infrastructure/cloning/folder/factory.py +119 -0
  32. kodit/infrastructure/cloning/folder/working_copy.py +38 -0
  33. kodit/infrastructure/cloning/git/__init__.py +1 -0
  34. kodit/infrastructure/cloning/git/factory.py +133 -0
  35. kodit/infrastructure/cloning/git/working_copy.py +32 -0
  36. kodit/infrastructure/cloning/metadata.py +127 -0
  37. kodit/infrastructure/embedding/__init__.py +1 -0
  38. kodit/infrastructure/embedding/embedding_factory.py +87 -0
  39. kodit/infrastructure/embedding/embedding_providers/__init__.py +1 -0
  40. kodit/infrastructure/embedding/embedding_providers/batching.py +93 -0
  41. kodit/infrastructure/embedding/embedding_providers/hash_embedding_provider.py +79 -0
  42. kodit/infrastructure/embedding/embedding_providers/local_embedding_provider.py +129 -0
  43. kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +113 -0
  44. kodit/infrastructure/embedding/local_vector_search_repository.py +114 -0
  45. kodit/{embedding/vectorchord_vector_search_service.py → infrastructure/embedding/vectorchord_vector_search_repository.py} +98 -32
  46. kodit/infrastructure/enrichment/__init__.py +1 -0
  47. kodit/{enrichment → infrastructure/enrichment}/enrichment_factory.py +28 -12
  48. kodit/infrastructure/enrichment/legacy_enrichment_models.py +42 -0
  49. kodit/infrastructure/enrichment/local_enrichment_provider.py +115 -0
  50. kodit/infrastructure/enrichment/null_enrichment_provider.py +25 -0
  51. kodit/infrastructure/enrichment/openai_enrichment_provider.py +89 -0
  52. kodit/infrastructure/git/__init__.py +1 -0
  53. kodit/{source/git.py → infrastructure/git/git_utils.py} +10 -2
  54. kodit/infrastructure/ignore/__init__.py +1 -0
  55. kodit/{source/ignore.py → infrastructure/ignore/ignore_pattern_provider.py} +23 -6
  56. kodit/infrastructure/indexing/__init__.py +1 -0
  57. kodit/infrastructure/indexing/fusion_service.py +55 -0
  58. kodit/infrastructure/indexing/index_repository.py +296 -0
  59. kodit/infrastructure/indexing/indexing_factory.py +111 -0
  60. kodit/infrastructure/snippet_extraction/__init__.py +1 -0
  61. kodit/infrastructure/snippet_extraction/language_detection_service.py +39 -0
  62. kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +95 -0
  63. kodit/infrastructure/snippet_extraction/snippet_query_provider.py +45 -0
  64. kodit/{snippets/method_snippets.py → infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py} +123 -61
  65. kodit/infrastructure/sqlalchemy/__init__.py +1 -0
  66. kodit/{embedding → infrastructure/sqlalchemy}/embedding_repository.py +40 -24
  67. kodit/infrastructure/sqlalchemy/file_repository.py +73 -0
  68. kodit/infrastructure/sqlalchemy/repository.py +121 -0
  69. kodit/infrastructure/sqlalchemy/snippet_repository.py +75 -0
  70. kodit/infrastructure/ui/__init__.py +1 -0
  71. kodit/infrastructure/ui/progress.py +127 -0
  72. kodit/{util → infrastructure/ui}/spinner.py +19 -4
  73. kodit/mcp.py +50 -28
  74. kodit/migrations/env.py +1 -4
  75. kodit/reporting.py +78 -0
  76. {kodit-0.2.3.dist-info → kodit-0.2.5.dist-info}/METADATA +1 -1
  77. kodit-0.2.5.dist-info/RECORD +99 -0
  78. kodit/bm25/__init__.py +0 -1
  79. kodit/bm25/keyword_search_factory.py +0 -17
  80. kodit/bm25/keyword_search_service.py +0 -34
  81. kodit/embedding/__init__.py +0 -1
  82. kodit/embedding/embedding_factory.py +0 -63
  83. kodit/embedding/embedding_models.py +0 -28
  84. kodit/embedding/embedding_provider/__init__.py +0 -1
  85. kodit/embedding/embedding_provider/embedding_provider.py +0 -64
  86. kodit/embedding/embedding_provider/hash_embedding_provider.py +0 -77
  87. kodit/embedding/embedding_provider/local_embedding_provider.py +0 -64
  88. kodit/embedding/embedding_provider/openai_embedding_provider.py +0 -77
  89. kodit/embedding/local_vector_search_service.py +0 -54
  90. kodit/embedding/vector_search_service.py +0 -38
  91. kodit/enrichment/__init__.py +0 -1
  92. kodit/enrichment/enrichment_provider/__init__.py +0 -1
  93. kodit/enrichment/enrichment_provider/enrichment_provider.py +0 -16
  94. kodit/enrichment/enrichment_provider/local_enrichment_provider.py +0 -92
  95. kodit/enrichment/enrichment_provider/openai_enrichment_provider.py +0 -81
  96. kodit/enrichment/enrichment_service.py +0 -33
  97. kodit/indexing/__init__.py +0 -1
  98. kodit/indexing/fusion.py +0 -67
  99. kodit/indexing/indexing_models.py +0 -43
  100. kodit/indexing/indexing_repository.py +0 -216
  101. kodit/indexing/indexing_service.py +0 -338
  102. kodit/snippets/__init__.py +0 -1
  103. kodit/snippets/languages/__init__.py +0 -53
  104. kodit/snippets/snippets.py +0 -50
  105. kodit/source/__init__.py +0 -1
  106. kodit/source/source_factories.py +0 -356
  107. kodit/source/source_repository.py +0 -169
  108. kodit/source/source_service.py +0 -150
  109. kodit/util/__init__.py +0 -1
  110. kodit-0.2.3.dist-info/RECORD +0 -71
  111. /kodit/{snippets → infrastructure/snippet_extraction}/languages/csharp.scm +0 -0
  112. /kodit/{snippets → infrastructure/snippet_extraction}/languages/go.scm +0 -0
  113. /kodit/{snippets → infrastructure/snippet_extraction}/languages/javascript.scm +0 -0
  114. /kodit/{snippets → infrastructure/snippet_extraction}/languages/python.scm +0 -0
  115. /kodit/{snippets → infrastructure/snippet_extraction}/languages/typescript.scm +0 -0
  116. {kodit-0.2.3.dist-info → kodit-0.2.5.dist-info}/WHEEL +0 -0
  117. {kodit-0.2.3.dist-info → kodit-0.2.5.dist-info}/entry_points.txt +0 -0
  118. {kodit-0.2.3.dist-info → kodit-0.2.5.dist-info}/licenses/LICENSE +0 -0
kodit/cli.py CHANGED
@@ -1,5 +1,6 @@
1
1
  """Command line interface for kodit."""
2
2
 
3
+ import asyncio
3
4
  import signal
4
5
  from pathlib import Path
5
6
  from typing import Any
@@ -10,19 +11,54 @@ import uvicorn
10
11
  from pytable_formatter import Cell, Table
11
12
  from sqlalchemy.ext.asyncio import AsyncSession
12
13
 
13
- from kodit.bm25.keyword_search_factory import keyword_search_factory
14
+ from kodit.application.services.snippet_application_service import (
15
+ SnippetApplicationService,
16
+ )
14
17
  from kodit.config import (
15
18
  AppContext,
16
19
  with_app_context,
17
20
  with_session,
18
21
  )
19
- from kodit.embedding.embedding_factory import embedding_factory
20
- from kodit.enrichment.enrichment_factory import enrichment_factory
21
- from kodit.indexing.indexing_repository import IndexRepository
22
- from kodit.indexing.indexing_service import IndexService, SearchRequest
22
+ from kodit.domain.services.source_service import SourceService
23
+ from kodit.domain.value_objects import MultiSearchRequest
24
+ from kodit.infrastructure.indexing.indexing_factory import (
25
+ create_indexing_application_service,
26
+ )
27
+ from kodit.infrastructure.snippet_extraction.snippet_extraction_factory import (
28
+ create_snippet_extraction_domain_service,
29
+ create_snippet_repositories,
30
+ )
31
+ from kodit.infrastructure.ui.progress import (
32
+ create_lazy_progress_callback,
33
+ create_multi_stage_progress_callback,
34
+ )
23
35
  from kodit.log import configure_logging, configure_telemetry, log_event
24
- from kodit.source.source_repository import SourceRepository
25
- from kodit.source.source_service import SourceService
36
+
37
+
38
+ def create_snippet_application_service(
39
+ session: AsyncSession,
40
+ ) -> SnippetApplicationService:
41
+ """Create a snippet application service with all dependencies.
42
+
43
+ Args:
44
+ session: SQLAlchemy session
45
+
46
+ Returns:
47
+ Configured snippet application service
48
+
49
+ """
50
+ # Create domain service
51
+ snippet_extraction_service = create_snippet_extraction_domain_service()
52
+
53
+ # Create repositories
54
+ snippet_repository, file_repository = create_snippet_repositories(session)
55
+
56
+ # Create application service
57
+ return SnippetApplicationService(
58
+ snippet_extraction_service=snippet_extraction_service,
59
+ snippet_repository=snippet_repository,
60
+ file_repository=file_repository,
61
+ )
26
62
 
27
63
 
28
64
  @click.group(context_settings={"max_content_width": 100})
@@ -64,20 +100,16 @@ async def index(
64
100
  sources: list[str],
65
101
  ) -> None:
66
102
  """List indexes, or index data sources."""
67
- source_repository = SourceRepository(session)
68
- source_service = SourceService(app_context.get_clone_dir(), source_repository)
69
- repository = IndexRepository(session)
70
- service = IndexService(
71
- repository=repository,
103
+ source_service = SourceService(
104
+ clone_dir=app_context.get_clone_dir(),
105
+ session_factory=lambda: session,
106
+ )
107
+ snippet_service = create_snippet_application_service(session)
108
+ service = create_indexing_application_service(
109
+ app_context=app_context,
110
+ session=session,
72
111
  source_service=source_service,
73
- keyword_search_provider=keyword_search_factory(app_context, session),
74
- code_search_service=embedding_factory(
75
- task_name="code", app_context=app_context, session=session
76
- ),
77
- text_search_service=embedding_factory(
78
- task_name="text", app_context=app_context, session=session
79
- ),
80
- enrichment_service=enrichment_factory(app_context),
112
+ snippet_application_service=snippet_service,
81
113
  )
82
114
 
83
115
  if not sources:
@@ -109,11 +141,18 @@ async def index(
109
141
  msg = "File indexing is not implemented yet"
110
142
  raise click.UsageError(msg)
111
143
 
112
- # Index source
144
+ # Index source with progress
113
145
  log_event("kodit.cli.index.create")
114
- s = await source_service.create(source)
115
- index = await service.create(s.id)
116
- await service.run(index.id)
146
+
147
+ # Create a lazy progress callback that only shows progress when needed
148
+ progress_callback = create_lazy_progress_callback()
149
+ s = await source_service.create(source, progress_callback)
150
+
151
+ index = await service.create_index(s.id)
152
+
153
+ # Create a new progress callback for the indexing operations
154
+ indexing_progress_callback = create_multi_stage_progress_callback()
155
+ await service.run_index(index.id, indexing_progress_callback)
117
156
 
118
157
 
119
158
  @cli.group()
@@ -137,23 +176,19 @@ async def code(
137
176
  This works best if your query is code.
138
177
  """
139
178
  log_event("kodit.cli.search.code")
140
- source_repository = SourceRepository(session)
141
- source_service = SourceService(app_context.get_clone_dir(), source_repository)
142
- repository = IndexRepository(session)
143
- service = IndexService(
144
- repository=repository,
179
+ source_service = SourceService(
180
+ clone_dir=app_context.get_clone_dir(),
181
+ session_factory=lambda: session,
182
+ )
183
+ snippet_service = create_snippet_application_service(session)
184
+ service = create_indexing_application_service(
185
+ app_context=app_context,
186
+ session=session,
145
187
  source_service=source_service,
146
- keyword_search_provider=keyword_search_factory(app_context, session),
147
- code_search_service=embedding_factory(
148
- task_name="code", app_context=app_context, session=session
149
- ),
150
- text_search_service=embedding_factory(
151
- task_name="text", app_context=app_context, session=session
152
- ),
153
- enrichment_service=enrichment_factory(app_context),
188
+ snippet_application_service=snippet_service,
154
189
  )
155
190
 
156
- snippets = await service.search(SearchRequest(code_query=query, top_k=top_k))
191
+ snippets = await service.search(MultiSearchRequest(code_query=query, top_k=top_k))
157
192
 
158
193
  if len(snippets) == 0:
159
194
  click.echo("No snippets found")
@@ -181,23 +216,19 @@ async def keyword(
181
216
  ) -> None:
182
217
  """Search for snippets using keyword search."""
183
218
  log_event("kodit.cli.search.keyword")
184
- source_repository = SourceRepository(session)
185
- source_service = SourceService(app_context.get_clone_dir(), source_repository)
186
- repository = IndexRepository(session)
187
- service = IndexService(
188
- repository=repository,
219
+ source_service = SourceService(
220
+ clone_dir=app_context.get_clone_dir(),
221
+ session_factory=lambda: session,
222
+ )
223
+ snippet_service = create_snippet_application_service(session)
224
+ service = create_indexing_application_service(
225
+ app_context=app_context,
226
+ session=session,
189
227
  source_service=source_service,
190
- keyword_search_provider=keyword_search_factory(app_context, session),
191
- code_search_service=embedding_factory(
192
- task_name="code", app_context=app_context, session=session
193
- ),
194
- text_search_service=embedding_factory(
195
- task_name="text", app_context=app_context, session=session
196
- ),
197
- enrichment_service=enrichment_factory(app_context),
228
+ snippet_application_service=snippet_service,
198
229
  )
199
230
 
200
- snippets = await service.search(SearchRequest(keywords=keywords, top_k=top_k))
231
+ snippets = await service.search(MultiSearchRequest(keywords=keywords, top_k=top_k))
201
232
 
202
233
  if len(snippets) == 0:
203
234
  click.echo("No snippets found")
@@ -228,23 +259,19 @@ async def text(
228
259
  This works best if your query is text.
229
260
  """
230
261
  log_event("kodit.cli.search.text")
231
- source_repository = SourceRepository(session)
232
- source_service = SourceService(app_context.get_clone_dir(), source_repository)
233
- repository = IndexRepository(session)
234
- service = IndexService(
235
- repository=repository,
262
+ source_service = SourceService(
263
+ clone_dir=app_context.get_clone_dir(),
264
+ session_factory=lambda: session,
265
+ )
266
+ snippet_service = create_snippet_application_service(session)
267
+ service = create_indexing_application_service(
268
+ app_context=app_context,
269
+ session=session,
236
270
  source_service=source_service,
237
- keyword_search_provider=keyword_search_factory(app_context, session),
238
- code_search_service=embedding_factory(
239
- task_name="code", app_context=app_context, session=session
240
- ),
241
- text_search_service=embedding_factory(
242
- task_name="text", app_context=app_context, session=session
243
- ),
244
- enrichment_service=enrichment_factory(app_context),
271
+ snippet_application_service=snippet_service,
245
272
  )
246
273
 
247
- snippets = await service.search(SearchRequest(text_query=query, top_k=top_k))
274
+ snippets = await service.search(MultiSearchRequest(text_query=query, top_k=top_k))
248
275
 
249
276
  if len(snippets) == 0:
250
277
  click.echo("No snippets found")
@@ -276,30 +303,26 @@ async def hybrid( # noqa: PLR0913
276
303
  ) -> None:
277
304
  """Search for snippets using hybrid search."""
278
305
  log_event("kodit.cli.search.hybrid")
279
- source_repository = SourceRepository(session)
280
- source_service = SourceService(app_context.get_clone_dir(), source_repository)
281
- repository = IndexRepository(session)
282
- service = IndexService(
283
- repository=repository,
306
+ source_service = SourceService(
307
+ clone_dir=app_context.get_clone_dir(),
308
+ session_factory=lambda: session,
309
+ )
310
+ snippet_service = create_snippet_application_service(session)
311
+ service = create_indexing_application_service(
312
+ app_context=app_context,
313
+ session=session,
284
314
  source_service=source_service,
285
- keyword_search_provider=keyword_search_factory(app_context, session),
286
- code_search_service=embedding_factory(
287
- task_name="code", app_context=app_context, session=session
288
- ),
289
- text_search_service=embedding_factory(
290
- task_name="text", app_context=app_context, session=session
291
- ),
292
- enrichment_service=enrichment_factory(app_context),
315
+ snippet_application_service=snippet_service,
293
316
  )
294
317
 
295
318
  # Parse keywords into a list of strings
296
319
  keywords_list = [k.strip().lower() for k in keywords.split(",")]
297
320
 
298
321
  snippets = await service.search(
299
- SearchRequest(
300
- text_query=text,
322
+ MultiSearchRequest(
301
323
  keywords=keywords_list,
302
324
  code_query=code,
325
+ text_query=text,
303
326
  top_k=top_k,
304
327
  )
305
328
  )
@@ -362,4 +385,4 @@ def version() -> None:
362
385
 
363
386
 
364
387
  if __name__ == "__main__":
365
- cli()
388
+ asyncio.run(cli())
kodit/database.py CHANGED
@@ -1,41 +1,19 @@
1
1
  """Database configuration for kodit."""
2
2
 
3
- from datetime import UTC, datetime
4
3
  from pathlib import Path
5
4
 
6
5
  import structlog
7
6
  from alembic import command
8
7
  from alembic.config import Config as AlembicConfig
9
- from sqlalchemy import DateTime
10
8
  from sqlalchemy.ext.asyncio import (
11
- AsyncAttrs,
12
9
  AsyncSession,
13
10
  async_sessionmaker,
14
11
  create_async_engine,
15
12
  )
16
- from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
17
13
 
18
14
  from kodit import migrations
19
15
 
20
16
 
21
- class Base(AsyncAttrs, DeclarativeBase):
22
- """Base class for all models."""
23
-
24
-
25
- class CommonMixin:
26
- """Common mixin for all models."""
27
-
28
- id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
29
- created_at: Mapped[datetime] = mapped_column(
30
- DateTime(timezone=True), default=lambda: datetime.now(UTC)
31
- )
32
- updated_at: Mapped[datetime] = mapped_column(
33
- DateTime(timezone=True),
34
- default=lambda: datetime.now(UTC),
35
- onupdate=lambda: datetime.now(UTC),
36
- )
37
-
38
-
39
17
  class Database:
40
18
  """Database class for kodit."""
41
19
 
@@ -0,0 +1 @@
1
+ """Domain layer containing models, services, and repositories."""
@@ -1,24 +1,42 @@
1
- """Source models for managing code sources.
1
+ """SQLAlchemy entities."""
2
2
 
3
- This module defines the SQLAlchemy models used for storing and managing code sources.
4
- It includes models for tracking different types of sources (git repositories and local
5
- folders) and their relationships.
6
- """
7
-
8
- import datetime
9
- from enum import Enum as EnumType
3
+ from datetime import UTC, datetime
4
+ from enum import Enum
10
5
 
11
6
  from git import Actor
12
- from sqlalchemy import Enum, ForeignKey, Integer, String, UniqueConstraint
13
- from sqlalchemy.orm import Mapped, mapped_column
14
-
15
- from kodit.database import Base, CommonMixin
16
-
17
- # Enable proper type hints for SQLAlchemy models
18
- __all__ = ["File", "Source"]
7
+ from sqlalchemy import (
8
+ DateTime,
9
+ ForeignKey,
10
+ Integer,
11
+ String,
12
+ UnicodeText,
13
+ UniqueConstraint,
14
+ )
15
+ from sqlalchemy import Enum as SQLAlchemyEnum
16
+ from sqlalchemy.ext.asyncio import AsyncAttrs
17
+ from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
18
+ from sqlalchemy.types import JSON
19
+
20
+
21
+ class Base(AsyncAttrs, DeclarativeBase):
22
+ """Base class for all models."""
23
+
24
+
25
+ class CommonMixin:
26
+ """Common mixin for all models."""
27
+
28
+ id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
29
+ created_at: Mapped[datetime] = mapped_column(
30
+ DateTime(timezone=True), default=lambda: datetime.now(UTC)
31
+ )
32
+ updated_at: Mapped[datetime] = mapped_column(
33
+ DateTime(timezone=True),
34
+ default=lambda: datetime.now(UTC),
35
+ onupdate=lambda: datetime.now(UTC),
36
+ )
19
37
 
20
38
 
21
- class SourceType(EnumType):
39
+ class SourceType(Enum):
22
40
  """The type of source."""
23
41
 
24
42
  UNKNOWN = 0
@@ -45,7 +63,7 @@ class Source(Base, CommonMixin):
45
63
  uri: Mapped[str] = mapped_column(String(1024), index=True, unique=True)
46
64
  cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
47
65
  type: Mapped[SourceType] = mapped_column(
48
- Enum(SourceType), default=SourceType.UNKNOWN, index=True
66
+ SQLAlchemyEnum(SourceType), default=SourceType.UNKNOWN, index=True
49
67
  )
50
68
 
51
69
  def __init__(self, uri: str, cloned_path: str, source_type: SourceType) -> None:
@@ -100,8 +118,8 @@ class File(Base, CommonMixin):
100
118
 
101
119
  def __init__( # noqa: PLR0913
102
120
  self,
103
- created_at: datetime.datetime,
104
- updated_at: datetime.datetime,
121
+ created_at: datetime,
122
+ updated_at: datetime,
105
123
  source_id: int,
106
124
  cloned_path: str,
107
125
  mime_type: str = "",
@@ -119,3 +137,54 @@ class File(Base, CommonMixin):
119
137
  self.uri = uri
120
138
  self.sha256 = sha256
121
139
  self.size_bytes = size_bytes
140
+
141
+
142
+ class EmbeddingType(Enum):
143
+ """Embedding type."""
144
+
145
+ CODE = 1
146
+ TEXT = 2
147
+
148
+
149
+ class Embedding(Base, CommonMixin):
150
+ """Embedding model."""
151
+
152
+ __tablename__ = "embeddings"
153
+
154
+ snippet_id: Mapped[int] = mapped_column(ForeignKey("snippets.id"), index=True)
155
+ type: Mapped[EmbeddingType] = mapped_column(
156
+ SQLAlchemyEnum(EmbeddingType), index=True
157
+ )
158
+ embedding: Mapped[list[float]] = mapped_column(JSON)
159
+
160
+
161
+ class Index(Base, CommonMixin):
162
+ """Index model."""
163
+
164
+ __tablename__ = "indexes"
165
+
166
+ source_id: Mapped[int] = mapped_column(
167
+ ForeignKey("sources.id"), unique=True, index=True
168
+ )
169
+
170
+ def __init__(self, source_id: int) -> None:
171
+ """Initialize the index."""
172
+ super().__init__()
173
+ self.source_id = source_id
174
+
175
+
176
+ class Snippet(Base, CommonMixin):
177
+ """Snippet model."""
178
+
179
+ __tablename__ = "snippets"
180
+
181
+ file_id: Mapped[int] = mapped_column(ForeignKey("files.id"), index=True)
182
+ index_id: Mapped[int] = mapped_column(ForeignKey("indexes.id"), index=True)
183
+ content: Mapped[str] = mapped_column(UnicodeText, default="")
184
+
185
+ def __init__(self, file_id: int, index_id: int, content: str) -> None:
186
+ """Initialize the snippet."""
187
+ super().__init__()
188
+ self.file_id = file_id
189
+ self.index_id = index_id
190
+ self.content = content
kodit/domain/enums.py ADDED
@@ -0,0 +1,9 @@
1
+ """Domain enums."""
2
+
3
+ from enum import Enum
4
+
5
+
6
+ class SnippetExtractionStrategy(str, Enum):
7
+ """Different strategies for extracting snippets from files."""
8
+
9
+ METHOD_BASED = "method_based"
@@ -0,0 +1,27 @@
1
+ """Domain interfaces."""
2
+
3
+ from abc import ABC, abstractmethod
4
+
5
+ from kodit.domain.value_objects import ProgressEvent
6
+
7
+
8
+ class ProgressCallback(ABC):
9
+ """Abstract interface for progress callbacks."""
10
+
11
+ @abstractmethod
12
+ async def on_progress(self, event: ProgressEvent) -> None:
13
+ """On progress hook."""
14
+
15
+ @abstractmethod
16
+ async def on_complete(self, operation: str) -> None:
17
+ """On complete hook."""
18
+
19
+
20
+ class NullProgressCallback(ProgressCallback):
21
+ """Null implementation of progress callback that does nothing."""
22
+
23
+ async def on_progress(self, event: ProgressEvent) -> None:
24
+ """Do nothing on progress."""
25
+
26
+ async def on_complete(self, operation: str) -> None:
27
+ """Do nothing on complete."""
@@ -0,0 +1,95 @@
1
+ """Domain repositories with generic patterns."""
2
+
3
+ from collections.abc import Sequence
4
+ from typing import Protocol, TypeVar
5
+
6
+ from kodit.domain.entities import (
7
+ Author,
8
+ AuthorFileMapping,
9
+ File,
10
+ Snippet,
11
+ Source,
12
+ SourceType,
13
+ )
14
+
15
+ T = TypeVar("T")
16
+
17
+
18
+ class GenericRepository(Protocol[T]):
19
+ """Generic repository interface."""
20
+
21
+ async def get(self, id: int) -> T | None: # noqa: A002
22
+ """Get entity by ID."""
23
+ ...
24
+
25
+ async def save(self, entity: T) -> T:
26
+ """Save entity."""
27
+ ...
28
+
29
+ async def delete(self, id: int) -> None: # noqa: A002
30
+ """Delete entity by ID."""
31
+ ...
32
+
33
+ async def list(self) -> Sequence[T]:
34
+ """List all entities."""
35
+ ...
36
+
37
+
38
+ class SourceRepository(GenericRepository[Source]):
39
+ """Source repository with specific methods."""
40
+
41
+ async def get_by_uri(self, uri: str) -> Source | None:
42
+ """Get a source by URI."""
43
+ raise NotImplementedError
44
+
45
+ async def list_by_type(
46
+ self, source_type: SourceType | None = None
47
+ ) -> Sequence[Source]:
48
+ """List sources by type."""
49
+ raise NotImplementedError
50
+
51
+ async def create_file(self, file: File) -> File:
52
+ """Create a new file record."""
53
+ raise NotImplementedError
54
+
55
+ async def upsert_author(self, author: Author) -> Author:
56
+ """Create a new author or return existing one if email already exists."""
57
+ raise NotImplementedError
58
+
59
+ async def upsert_author_file_mapping(
60
+ self, mapping: "AuthorFileMapping"
61
+ ) -> "AuthorFileMapping":
62
+ """Create a new author file mapping or return existing one if already exists."""
63
+ raise NotImplementedError
64
+
65
+
66
+ class AuthorRepository(GenericRepository[Author]):
67
+ """Author repository with specific methods."""
68
+
69
+ async def get_by_name(self, name: str) -> Author | None:
70
+ """Get an author by name."""
71
+ raise NotImplementedError
72
+
73
+ async def get_by_email(self, email: str) -> Author | None:
74
+ """Get an author by email."""
75
+ raise NotImplementedError
76
+
77
+
78
+ class SnippetRepository(GenericRepository[Snippet]):
79
+ """Snippet repository with specific methods."""
80
+
81
+ async def get_by_index(self, index_id: int) -> Sequence[Snippet]:
82
+ """Get all snippets for an index."""
83
+ raise NotImplementedError
84
+
85
+ async def delete_by_index(self, index_id: int) -> None:
86
+ """Delete all snippets for an index."""
87
+ raise NotImplementedError
88
+
89
+
90
+ class FileRepository(GenericRepository[File]):
91
+ """File repository with specific methods."""
92
+
93
+ async def get_files_for_index(self, index_id: int) -> Sequence[File]:
94
+ """Get all files for an index."""
95
+ raise NotImplementedError
@@ -0,0 +1 @@
1
+ """Domain services for business logic."""