kodit 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (118) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/__init__.py +1 -0
  3. kodit/application/commands/__init__.py +1 -0
  4. kodit/application/commands/snippet_commands.py +22 -0
  5. kodit/application/services/__init__.py +1 -0
  6. kodit/application/services/indexing_application_service.py +387 -0
  7. kodit/application/services/snippet_application_service.py +149 -0
  8. kodit/cli.py +118 -82
  9. kodit/database.py +0 -22
  10. kodit/domain/__init__.py +1 -0
  11. kodit/{source/source_models.py → domain/entities.py} +88 -19
  12. kodit/domain/enums.py +9 -0
  13. kodit/domain/errors.py +5 -0
  14. kodit/domain/interfaces.py +27 -0
  15. kodit/domain/repositories.py +95 -0
  16. kodit/domain/services/__init__.py +1 -0
  17. kodit/domain/services/bm25_service.py +124 -0
  18. kodit/domain/services/embedding_service.py +155 -0
  19. kodit/domain/services/enrichment_service.py +48 -0
  20. kodit/domain/services/ignore_service.py +45 -0
  21. kodit/domain/services/indexing_service.py +203 -0
  22. kodit/domain/services/snippet_extraction_service.py +89 -0
  23. kodit/domain/services/source_service.py +85 -0
  24. kodit/domain/value_objects.py +215 -0
  25. kodit/infrastructure/__init__.py +1 -0
  26. kodit/infrastructure/bm25/__init__.py +1 -0
  27. kodit/infrastructure/bm25/bm25_factory.py +28 -0
  28. kodit/{bm25/local_bm25.py → infrastructure/bm25/local_bm25_repository.py} +33 -22
  29. kodit/{bm25/vectorchord_bm25.py → infrastructure/bm25/vectorchord_bm25_repository.py} +40 -35
  30. kodit/infrastructure/cloning/__init__.py +1 -0
  31. kodit/infrastructure/cloning/folder/__init__.py +1 -0
  32. kodit/infrastructure/cloning/folder/factory.py +128 -0
  33. kodit/infrastructure/cloning/folder/working_copy.py +38 -0
  34. kodit/infrastructure/cloning/git/__init__.py +1 -0
  35. kodit/infrastructure/cloning/git/factory.py +147 -0
  36. kodit/infrastructure/cloning/git/working_copy.py +32 -0
  37. kodit/infrastructure/cloning/metadata.py +127 -0
  38. kodit/infrastructure/embedding/__init__.py +1 -0
  39. kodit/infrastructure/embedding/embedding_factory.py +87 -0
  40. kodit/infrastructure/embedding/embedding_providers/__init__.py +1 -0
  41. kodit/infrastructure/embedding/embedding_providers/batching.py +93 -0
  42. kodit/infrastructure/embedding/embedding_providers/hash_embedding_provider.py +79 -0
  43. kodit/infrastructure/embedding/embedding_providers/local_embedding_provider.py +129 -0
  44. kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +113 -0
  45. kodit/infrastructure/embedding/local_vector_search_repository.py +114 -0
  46. kodit/{embedding/vectorchord_vector_search_service.py → infrastructure/embedding/vectorchord_vector_search_repository.py} +65 -46
  47. kodit/infrastructure/enrichment/__init__.py +1 -0
  48. kodit/{enrichment → infrastructure/enrichment}/enrichment_factory.py +28 -12
  49. kodit/infrastructure/enrichment/legacy_enrichment_models.py +42 -0
  50. kodit/{enrichment/enrichment_provider → infrastructure/enrichment}/local_enrichment_provider.py +38 -26
  51. kodit/infrastructure/enrichment/null_enrichment_provider.py +25 -0
  52. kodit/infrastructure/enrichment/openai_enrichment_provider.py +89 -0
  53. kodit/infrastructure/git/__init__.py +1 -0
  54. kodit/{source/git.py → infrastructure/git/git_utils.py} +10 -2
  55. kodit/infrastructure/ignore/__init__.py +1 -0
  56. kodit/{source/ignore.py → infrastructure/ignore/ignore_pattern_provider.py} +23 -6
  57. kodit/infrastructure/indexing/__init__.py +1 -0
  58. kodit/infrastructure/indexing/fusion_service.py +55 -0
  59. kodit/infrastructure/indexing/index_repository.py +291 -0
  60. kodit/infrastructure/indexing/indexing_factory.py +113 -0
  61. kodit/infrastructure/snippet_extraction/__init__.py +1 -0
  62. kodit/infrastructure/snippet_extraction/language_detection_service.py +39 -0
  63. kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +95 -0
  64. kodit/infrastructure/snippet_extraction/snippet_query_provider.py +45 -0
  65. kodit/{snippets/method_snippets.py → infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py} +123 -61
  66. kodit/infrastructure/sqlalchemy/__init__.py +1 -0
  67. kodit/{embedding → infrastructure/sqlalchemy}/embedding_repository.py +40 -26
  68. kodit/infrastructure/sqlalchemy/file_repository.py +78 -0
  69. kodit/infrastructure/sqlalchemy/repository.py +133 -0
  70. kodit/infrastructure/sqlalchemy/snippet_repository.py +79 -0
  71. kodit/infrastructure/ui/__init__.py +1 -0
  72. kodit/infrastructure/ui/progress.py +127 -0
  73. kodit/{util → infrastructure/ui}/spinner.py +19 -4
  74. kodit/mcp.py +51 -28
  75. kodit/migrations/env.py +1 -4
  76. kodit/reporting.py +78 -0
  77. {kodit-0.2.4.dist-info → kodit-0.2.6.dist-info}/METADATA +1 -1
  78. kodit-0.2.6.dist-info/RECORD +100 -0
  79. kodit/bm25/__init__.py +0 -1
  80. kodit/bm25/keyword_search_factory.py +0 -17
  81. kodit/bm25/keyword_search_service.py +0 -34
  82. kodit/embedding/__init__.py +0 -1
  83. kodit/embedding/embedding_factory.py +0 -69
  84. kodit/embedding/embedding_models.py +0 -28
  85. kodit/embedding/embedding_provider/__init__.py +0 -1
  86. kodit/embedding/embedding_provider/embedding_provider.py +0 -92
  87. kodit/embedding/embedding_provider/hash_embedding_provider.py +0 -86
  88. kodit/embedding/embedding_provider/local_embedding_provider.py +0 -96
  89. kodit/embedding/embedding_provider/openai_embedding_provider.py +0 -73
  90. kodit/embedding/local_vector_search_service.py +0 -87
  91. kodit/embedding/vector_search_service.py +0 -55
  92. kodit/enrichment/__init__.py +0 -1
  93. kodit/enrichment/enrichment_provider/__init__.py +0 -1
  94. kodit/enrichment/enrichment_provider/enrichment_provider.py +0 -36
  95. kodit/enrichment/enrichment_provider/openai_enrichment_provider.py +0 -79
  96. kodit/enrichment/enrichment_service.py +0 -45
  97. kodit/indexing/__init__.py +0 -1
  98. kodit/indexing/fusion.py +0 -67
  99. kodit/indexing/indexing_models.py +0 -43
  100. kodit/indexing/indexing_repository.py +0 -216
  101. kodit/indexing/indexing_service.py +0 -344
  102. kodit/snippets/__init__.py +0 -1
  103. kodit/snippets/languages/__init__.py +0 -53
  104. kodit/snippets/snippets.py +0 -50
  105. kodit/source/__init__.py +0 -1
  106. kodit/source/source_factories.py +0 -356
  107. kodit/source/source_repository.py +0 -169
  108. kodit/source/source_service.py +0 -150
  109. kodit/util/__init__.py +0 -1
  110. kodit-0.2.4.dist-info/RECORD +0 -71
  111. /kodit/{snippets → infrastructure/snippet_extraction}/languages/csharp.scm +0 -0
  112. /kodit/{snippets → infrastructure/snippet_extraction}/languages/go.scm +0 -0
  113. /kodit/{snippets → infrastructure/snippet_extraction}/languages/javascript.scm +0 -0
  114. /kodit/{snippets → infrastructure/snippet_extraction}/languages/python.scm +0 -0
  115. /kodit/{snippets → infrastructure/snippet_extraction}/languages/typescript.scm +0 -0
  116. {kodit-0.2.4.dist-info → kodit-0.2.6.dist-info}/WHEEL +0 -0
  117. {kodit-0.2.4.dist-info → kodit-0.2.6.dist-info}/entry_points.txt +0 -0
  118. {kodit-0.2.4.dist-info → kodit-0.2.6.dist-info}/licenses/LICENSE +0 -0
kodit/cli.py CHANGED
@@ -1,5 +1,6 @@
1
1
  """Command line interface for kodit."""
2
2
 
3
+ import asyncio
3
4
  import signal
4
5
  from pathlib import Path
5
6
  from typing import Any
@@ -10,19 +11,56 @@ import uvicorn
10
11
  from pytable_formatter import Cell, Table
11
12
  from sqlalchemy.ext.asyncio import AsyncSession
12
13
 
13
- from kodit.bm25.keyword_search_factory import keyword_search_factory
14
+ from kodit.application.services.snippet_application_service import (
15
+ SnippetApplicationService,
16
+ )
14
17
  from kodit.config import (
15
18
  AppContext,
16
19
  with_app_context,
17
20
  with_session,
18
21
  )
19
- from kodit.embedding.embedding_factory import embedding_factory
20
- from kodit.enrichment.enrichment_factory import enrichment_factory
21
- from kodit.indexing.indexing_repository import IndexRepository
22
- from kodit.indexing.indexing_service import IndexService, SearchRequest
22
+ from kodit.domain.errors import EmptySourceError
23
+ from kodit.domain.services.source_service import SourceService
24
+ from kodit.domain.value_objects import MultiSearchRequest
25
+ from kodit.infrastructure.indexing.indexing_factory import (
26
+ create_indexing_application_service,
27
+ )
28
+ from kodit.infrastructure.snippet_extraction.snippet_extraction_factory import (
29
+ create_snippet_extraction_domain_service,
30
+ create_snippet_repositories,
31
+ )
32
+ from kodit.infrastructure.ui.progress import (
33
+ create_lazy_progress_callback,
34
+ create_multi_stage_progress_callback,
35
+ )
23
36
  from kodit.log import configure_logging, configure_telemetry, log_event
24
- from kodit.source.source_repository import SourceRepository
25
- from kodit.source.source_service import SourceService
37
+
38
+
39
+ def create_snippet_application_service(
40
+ session: AsyncSession,
41
+ ) -> SnippetApplicationService:
42
+ """Create a snippet application service with all dependencies.
43
+
44
+ Args:
45
+ session: SQLAlchemy session
46
+
47
+ Returns:
48
+ Configured snippet application service
49
+
50
+ """
51
+ # Create domain service
52
+ snippet_extraction_service = create_snippet_extraction_domain_service()
53
+
54
+ # Create repositories
55
+ snippet_repository, file_repository = create_snippet_repositories(session)
56
+
57
+ # Create application service
58
+ return SnippetApplicationService(
59
+ snippet_extraction_service=snippet_extraction_service,
60
+ snippet_repository=snippet_repository,
61
+ file_repository=file_repository,
62
+ session=session,
63
+ )
26
64
 
27
65
 
28
66
  @click.group(context_settings={"max_content_width": 100})
@@ -64,20 +102,17 @@ async def index(
64
102
  sources: list[str],
65
103
  ) -> None:
66
104
  """List indexes, or index data sources."""
67
- source_repository = SourceRepository(session)
68
- source_service = SourceService(app_context.get_clone_dir(), source_repository)
69
- repository = IndexRepository(session)
70
- service = IndexService(
71
- repository=repository,
105
+ log = structlog.get_logger(__name__)
106
+ source_service = SourceService(
107
+ clone_dir=app_context.get_clone_dir(),
108
+ session_factory=lambda: session,
109
+ )
110
+ snippet_service = create_snippet_application_service(session)
111
+ service = create_indexing_application_service(
112
+ app_context=app_context,
113
+ session=session,
72
114
  source_service=source_service,
73
- keyword_search_provider=keyword_search_factory(app_context, session),
74
- code_search_service=embedding_factory(
75
- task_name="code", app_context=app_context, session=session
76
- ),
77
- text_search_service=embedding_factory(
78
- task_name="text", app_context=app_context, session=session
79
- ),
80
- enrichment_service=enrichment_factory(app_context),
115
+ snippet_application_service=snippet_service,
81
116
  )
82
117
 
83
118
  if not sources:
@@ -109,11 +144,28 @@ async def index(
109
144
  msg = "File indexing is not implemented yet"
110
145
  raise click.UsageError(msg)
111
146
 
112
- # Index source
147
+ # Index source with progress
113
148
  log_event("kodit.cli.index.create")
114
- s = await source_service.create(source)
115
- index = await service.create(s.id)
116
- await service.run(index.id)
149
+
150
+ # Create a lazy progress callback that only shows progress when needed
151
+ progress_callback = create_lazy_progress_callback()
152
+ s = await source_service.create(source, progress_callback)
153
+
154
+ index = await service.create_index(s.id)
155
+
156
+ # Create a new progress callback for the indexing operations
157
+ indexing_progress_callback = create_multi_stage_progress_callback()
158
+ try:
159
+ await service.run_index(index.id, indexing_progress_callback)
160
+ except EmptySourceError as e:
161
+ log.exception("Empty source error", error=e)
162
+ msg = f"""{e}. This could mean:
163
+ • The repository contains no supported file types
164
+ • All files are excluded by ignore patterns
165
+ • The files contain no extractable code snippets
166
+ Please check the repository contents and try again.
167
+ """
168
+ click.echo(msg)
117
169
 
118
170
 
119
171
  @cli.group()
@@ -137,23 +189,19 @@ async def code(
137
189
  This works best if your query is code.
138
190
  """
139
191
  log_event("kodit.cli.search.code")
140
- source_repository = SourceRepository(session)
141
- source_service = SourceService(app_context.get_clone_dir(), source_repository)
142
- repository = IndexRepository(session)
143
- service = IndexService(
144
- repository=repository,
192
+ source_service = SourceService(
193
+ clone_dir=app_context.get_clone_dir(),
194
+ session_factory=lambda: session,
195
+ )
196
+ snippet_service = create_snippet_application_service(session)
197
+ service = create_indexing_application_service(
198
+ app_context=app_context,
199
+ session=session,
145
200
  source_service=source_service,
146
- keyword_search_provider=keyword_search_factory(app_context, session),
147
- code_search_service=embedding_factory(
148
- task_name="code", app_context=app_context, session=session
149
- ),
150
- text_search_service=embedding_factory(
151
- task_name="text", app_context=app_context, session=session
152
- ),
153
- enrichment_service=enrichment_factory(app_context),
201
+ snippet_application_service=snippet_service,
154
202
  )
155
203
 
156
- snippets = await service.search(SearchRequest(code_query=query, top_k=top_k))
204
+ snippets = await service.search(MultiSearchRequest(code_query=query, top_k=top_k))
157
205
 
158
206
  if len(snippets) == 0:
159
207
  click.echo("No snippets found")
@@ -181,23 +229,19 @@ async def keyword(
181
229
  ) -> None:
182
230
  """Search for snippets using keyword search."""
183
231
  log_event("kodit.cli.search.keyword")
184
- source_repository = SourceRepository(session)
185
- source_service = SourceService(app_context.get_clone_dir(), source_repository)
186
- repository = IndexRepository(session)
187
- service = IndexService(
188
- repository=repository,
232
+ source_service = SourceService(
233
+ clone_dir=app_context.get_clone_dir(),
234
+ session_factory=lambda: session,
235
+ )
236
+ snippet_service = create_snippet_application_service(session)
237
+ service = create_indexing_application_service(
238
+ app_context=app_context,
239
+ session=session,
189
240
  source_service=source_service,
190
- keyword_search_provider=keyword_search_factory(app_context, session),
191
- code_search_service=embedding_factory(
192
- task_name="code", app_context=app_context, session=session
193
- ),
194
- text_search_service=embedding_factory(
195
- task_name="text", app_context=app_context, session=session
196
- ),
197
- enrichment_service=enrichment_factory(app_context),
241
+ snippet_application_service=snippet_service,
198
242
  )
199
243
 
200
- snippets = await service.search(SearchRequest(keywords=keywords, top_k=top_k))
244
+ snippets = await service.search(MultiSearchRequest(keywords=keywords, top_k=top_k))
201
245
 
202
246
  if len(snippets) == 0:
203
247
  click.echo("No snippets found")
@@ -228,23 +272,19 @@ async def text(
228
272
  This works best if your query is text.
229
273
  """
230
274
  log_event("kodit.cli.search.text")
231
- source_repository = SourceRepository(session)
232
- source_service = SourceService(app_context.get_clone_dir(), source_repository)
233
- repository = IndexRepository(session)
234
- service = IndexService(
235
- repository=repository,
275
+ source_service = SourceService(
276
+ clone_dir=app_context.get_clone_dir(),
277
+ session_factory=lambda: session,
278
+ )
279
+ snippet_service = create_snippet_application_service(session)
280
+ service = create_indexing_application_service(
281
+ app_context=app_context,
282
+ session=session,
236
283
  source_service=source_service,
237
- keyword_search_provider=keyword_search_factory(app_context, session),
238
- code_search_service=embedding_factory(
239
- task_name="code", app_context=app_context, session=session
240
- ),
241
- text_search_service=embedding_factory(
242
- task_name="text", app_context=app_context, session=session
243
- ),
244
- enrichment_service=enrichment_factory(app_context),
284
+ snippet_application_service=snippet_service,
245
285
  )
246
286
 
247
- snippets = await service.search(SearchRequest(text_query=query, top_k=top_k))
287
+ snippets = await service.search(MultiSearchRequest(text_query=query, top_k=top_k))
248
288
 
249
289
  if len(snippets) == 0:
250
290
  click.echo("No snippets found")
@@ -276,30 +316,26 @@ async def hybrid( # noqa: PLR0913
276
316
  ) -> None:
277
317
  """Search for snippets using hybrid search."""
278
318
  log_event("kodit.cli.search.hybrid")
279
- source_repository = SourceRepository(session)
280
- source_service = SourceService(app_context.get_clone_dir(), source_repository)
281
- repository = IndexRepository(session)
282
- service = IndexService(
283
- repository=repository,
319
+ source_service = SourceService(
320
+ clone_dir=app_context.get_clone_dir(),
321
+ session_factory=lambda: session,
322
+ )
323
+ snippet_service = create_snippet_application_service(session)
324
+ service = create_indexing_application_service(
325
+ app_context=app_context,
326
+ session=session,
284
327
  source_service=source_service,
285
- keyword_search_provider=keyword_search_factory(app_context, session),
286
- code_search_service=embedding_factory(
287
- task_name="code", app_context=app_context, session=session
288
- ),
289
- text_search_service=embedding_factory(
290
- task_name="text", app_context=app_context, session=session
291
- ),
292
- enrichment_service=enrichment_factory(app_context),
328
+ snippet_application_service=snippet_service,
293
329
  )
294
330
 
295
331
  # Parse keywords into a list of strings
296
332
  keywords_list = [k.strip().lower() for k in keywords.split(",")]
297
333
 
298
334
  snippets = await service.search(
299
- SearchRequest(
300
- text_query=text,
335
+ MultiSearchRequest(
301
336
  keywords=keywords_list,
302
337
  code_query=code,
338
+ text_query=text,
303
339
  top_k=top_k,
304
340
  )
305
341
  )
@@ -362,4 +398,4 @@ def version() -> None:
362
398
 
363
399
 
364
400
  if __name__ == "__main__":
365
- cli()
401
+ asyncio.run(cli())
kodit/database.py CHANGED
@@ -1,41 +1,19 @@
1
1
  """Database configuration for kodit."""
2
2
 
3
- from datetime import UTC, datetime
4
3
  from pathlib import Path
5
4
 
6
5
  import structlog
7
6
  from alembic import command
8
7
  from alembic.config import Config as AlembicConfig
9
- from sqlalchemy import DateTime
10
8
  from sqlalchemy.ext.asyncio import (
11
- AsyncAttrs,
12
9
  AsyncSession,
13
10
  async_sessionmaker,
14
11
  create_async_engine,
15
12
  )
16
- from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
17
13
 
18
14
  from kodit import migrations
19
15
 
20
16
 
21
- class Base(AsyncAttrs, DeclarativeBase):
22
- """Base class for all models."""
23
-
24
-
25
- class CommonMixin:
26
- """Common mixin for all models."""
27
-
28
- id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
29
- created_at: Mapped[datetime] = mapped_column(
30
- DateTime(timezone=True), default=lambda: datetime.now(UTC)
31
- )
32
- updated_at: Mapped[datetime] = mapped_column(
33
- DateTime(timezone=True),
34
- default=lambda: datetime.now(UTC),
35
- onupdate=lambda: datetime.now(UTC),
36
- )
37
-
38
-
39
17
  class Database:
40
18
  """Database class for kodit."""
41
19
 
@@ -0,0 +1 @@
1
+ """Domain layer containing models, services, and repositories."""
@@ -1,24 +1,42 @@
1
- """Source models for managing code sources.
1
+ """SQLAlchemy entities."""
2
2
 
3
- This module defines the SQLAlchemy models used for storing and managing code sources.
4
- It includes models for tracking different types of sources (git repositories and local
5
- folders) and their relationships.
6
- """
7
-
8
- import datetime
9
- from enum import Enum as EnumType
3
+ from datetime import UTC, datetime
4
+ from enum import Enum
10
5
 
11
6
  from git import Actor
12
- from sqlalchemy import Enum, ForeignKey, Integer, String, UniqueConstraint
13
- from sqlalchemy.orm import Mapped, mapped_column
14
-
15
- from kodit.database import Base, CommonMixin
16
-
17
- # Enable proper type hints for SQLAlchemy models
18
- __all__ = ["File", "Source"]
7
+ from sqlalchemy import (
8
+ DateTime,
9
+ ForeignKey,
10
+ Integer,
11
+ String,
12
+ UnicodeText,
13
+ UniqueConstraint,
14
+ )
15
+ from sqlalchemy import Enum as SQLAlchemyEnum
16
+ from sqlalchemy.ext.asyncio import AsyncAttrs
17
+ from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
18
+ from sqlalchemy.types import JSON
19
+
20
+
21
+ class Base(AsyncAttrs, DeclarativeBase):
22
+ """Base class for all models."""
23
+
24
+
25
+ class CommonMixin:
26
+ """Common mixin for all models."""
27
+
28
+ id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
29
+ created_at: Mapped[datetime] = mapped_column(
30
+ DateTime(timezone=True), default=lambda: datetime.now(UTC)
31
+ )
32
+ updated_at: Mapped[datetime] = mapped_column(
33
+ DateTime(timezone=True),
34
+ default=lambda: datetime.now(UTC),
35
+ onupdate=lambda: datetime.now(UTC),
36
+ )
19
37
 
20
38
 
21
- class SourceType(EnumType):
39
+ class SourceType(Enum):
22
40
  """The type of source."""
23
41
 
24
42
  UNKNOWN = 0
@@ -45,7 +63,7 @@ class Source(Base, CommonMixin):
45
63
  uri: Mapped[str] = mapped_column(String(1024), index=True, unique=True)
46
64
  cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
47
65
  type: Mapped[SourceType] = mapped_column(
48
- Enum(SourceType), default=SourceType.UNKNOWN, index=True
66
+ SQLAlchemyEnum(SourceType), default=SourceType.UNKNOWN, index=True
49
67
  )
50
68
 
51
69
  def __init__(self, uri: str, cloned_path: str, source_type: SourceType) -> None:
@@ -100,8 +118,8 @@ class File(Base, CommonMixin):
100
118
 
101
119
  def __init__( # noqa: PLR0913
102
120
  self,
103
- created_at: datetime.datetime,
104
- updated_at: datetime.datetime,
121
+ created_at: datetime,
122
+ updated_at: datetime,
105
123
  source_id: int,
106
124
  cloned_path: str,
107
125
  mime_type: str = "",
@@ -119,3 +137,54 @@ class File(Base, CommonMixin):
119
137
  self.uri = uri
120
138
  self.sha256 = sha256
121
139
  self.size_bytes = size_bytes
140
+
141
+
142
+ class EmbeddingType(Enum):
143
+ """Embedding type."""
144
+
145
+ CODE = 1
146
+ TEXT = 2
147
+
148
+
149
+ class Embedding(Base, CommonMixin):
150
+ """Embedding model."""
151
+
152
+ __tablename__ = "embeddings"
153
+
154
+ snippet_id: Mapped[int] = mapped_column(ForeignKey("snippets.id"), index=True)
155
+ type: Mapped[EmbeddingType] = mapped_column(
156
+ SQLAlchemyEnum(EmbeddingType), index=True
157
+ )
158
+ embedding: Mapped[list[float]] = mapped_column(JSON)
159
+
160
+
161
+ class Index(Base, CommonMixin):
162
+ """Index model."""
163
+
164
+ __tablename__ = "indexes"
165
+
166
+ source_id: Mapped[int] = mapped_column(
167
+ ForeignKey("sources.id"), unique=True, index=True
168
+ )
169
+
170
+ def __init__(self, source_id: int) -> None:
171
+ """Initialize the index."""
172
+ super().__init__()
173
+ self.source_id = source_id
174
+
175
+
176
+ class Snippet(Base, CommonMixin):
177
+ """Snippet model."""
178
+
179
+ __tablename__ = "snippets"
180
+
181
+ file_id: Mapped[int] = mapped_column(ForeignKey("files.id"), index=True)
182
+ index_id: Mapped[int] = mapped_column(ForeignKey("indexes.id"), index=True)
183
+ content: Mapped[str] = mapped_column(UnicodeText, default="")
184
+
185
+ def __init__(self, file_id: int, index_id: int, content: str) -> None:
186
+ """Initialize the snippet."""
187
+ super().__init__()
188
+ self.file_id = file_id
189
+ self.index_id = index_id
190
+ self.content = content
kodit/domain/enums.py ADDED
@@ -0,0 +1,9 @@
1
+ """Domain enums."""
2
+
3
+ from enum import Enum
4
+
5
+
6
+ class SnippetExtractionStrategy(str, Enum):
7
+ """Different strategies for extracting snippets from files."""
8
+
9
+ METHOD_BASED = "method_based"
kodit/domain/errors.py ADDED
@@ -0,0 +1,5 @@
1
+ """Domain errors."""
2
+
3
+
4
+ class EmptySourceError(ValueError):
5
+ """Error raised when a source is empty."""
@@ -0,0 +1,27 @@
1
+ """Domain interfaces."""
2
+
3
+ from abc import ABC, abstractmethod
4
+
5
+ from kodit.domain.value_objects import ProgressEvent
6
+
7
+
8
+ class ProgressCallback(ABC):
9
+ """Abstract interface for progress callbacks."""
10
+
11
+ @abstractmethod
12
+ async def on_progress(self, event: ProgressEvent) -> None:
13
+ """On progress hook."""
14
+
15
+ @abstractmethod
16
+ async def on_complete(self, operation: str) -> None:
17
+ """On complete hook."""
18
+
19
+
20
+ class NullProgressCallback(ProgressCallback):
21
+ """Null implementation of progress callback that does nothing."""
22
+
23
+ async def on_progress(self, event: ProgressEvent) -> None:
24
+ """Do nothing on progress."""
25
+
26
+ async def on_complete(self, operation: str) -> None:
27
+ """Do nothing on complete."""
@@ -0,0 +1,95 @@
1
+ """Domain repositories with generic patterns."""
2
+
3
+ from collections.abc import Sequence
4
+ from typing import Protocol, TypeVar
5
+
6
+ from kodit.domain.entities import (
7
+ Author,
8
+ AuthorFileMapping,
9
+ File,
10
+ Snippet,
11
+ Source,
12
+ SourceType,
13
+ )
14
+
15
+ T = TypeVar("T")
16
+
17
+
18
+ class GenericRepository(Protocol[T]):
19
+ """Generic repository interface."""
20
+
21
+ async def get(self, id: int) -> T | None: # noqa: A002
22
+ """Get entity by ID."""
23
+ ...
24
+
25
+ async def save(self, entity: T) -> T:
26
+ """Save entity."""
27
+ ...
28
+
29
+ async def delete(self, id: int) -> None: # noqa: A002
30
+ """Delete entity by ID."""
31
+ ...
32
+
33
+ async def list(self) -> Sequence[T]:
34
+ """List all entities."""
35
+ ...
36
+
37
+
38
+ class SourceRepository(GenericRepository[Source]):
39
+ """Source repository with specific methods."""
40
+
41
+ async def get_by_uri(self, uri: str) -> Source | None:
42
+ """Get a source by URI."""
43
+ raise NotImplementedError
44
+
45
+ async def list_by_type(
46
+ self, source_type: SourceType | None = None
47
+ ) -> Sequence[Source]:
48
+ """List sources by type."""
49
+ raise NotImplementedError
50
+
51
+ async def create_file(self, file: File) -> File:
52
+ """Create a new file record."""
53
+ raise NotImplementedError
54
+
55
+ async def upsert_author(self, author: Author) -> Author:
56
+ """Create a new author or return existing one if email already exists."""
57
+ raise NotImplementedError
58
+
59
+ async def upsert_author_file_mapping(
60
+ self, mapping: "AuthorFileMapping"
61
+ ) -> "AuthorFileMapping":
62
+ """Create a new author file mapping or return existing one if already exists."""
63
+ raise NotImplementedError
64
+
65
+
66
+ class AuthorRepository(GenericRepository[Author]):
67
+ """Author repository with specific methods."""
68
+
69
+ async def get_by_name(self, name: str) -> Author | None:
70
+ """Get an author by name."""
71
+ raise NotImplementedError
72
+
73
+ async def get_by_email(self, email: str) -> Author | None:
74
+ """Get an author by email."""
75
+ raise NotImplementedError
76
+
77
+
78
+ class SnippetRepository(GenericRepository[Snippet]):
79
+ """Snippet repository with specific methods."""
80
+
81
+ async def get_by_index(self, index_id: int) -> Sequence[Snippet]:
82
+ """Get all snippets for an index."""
83
+ raise NotImplementedError
84
+
85
+ async def delete_by_index(self, index_id: int) -> None:
86
+ """Delete all snippets for an index."""
87
+ raise NotImplementedError
88
+
89
+
90
+ class FileRepository(GenericRepository[File]):
91
+ """File repository with specific methods."""
92
+
93
+ async def get_files_for_index(self, index_id: int) -> Sequence[File]:
94
+ """Get all files for an index."""
95
+ raise NotImplementedError
@@ -0,0 +1 @@
1
+ """Domain services for business logic."""