kodit 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (50) hide show
  1. kodit/_version.py +16 -3
  2. kodit/app.py +10 -3
  3. kodit/application/factories/code_indexing_factory.py +54 -7
  4. kodit/application/factories/reporting_factory.py +27 -0
  5. kodit/application/services/auto_indexing_service.py +16 -4
  6. kodit/application/services/code_indexing_application_service.py +115 -133
  7. kodit/application/services/indexing_worker_service.py +18 -20
  8. kodit/application/services/queue_service.py +15 -12
  9. kodit/application/services/reporting.py +86 -0
  10. kodit/application/services/sync_scheduler.py +21 -20
  11. kodit/cli.py +14 -18
  12. kodit/config.py +35 -17
  13. kodit/database.py +2 -1
  14. kodit/domain/protocols.py +9 -1
  15. kodit/domain/services/bm25_service.py +1 -6
  16. kodit/domain/services/index_service.py +22 -58
  17. kodit/domain/value_objects.py +57 -9
  18. kodit/infrastructure/api/v1/__init__.py +2 -2
  19. kodit/infrastructure/api/v1/dependencies.py +23 -10
  20. kodit/infrastructure/api/v1/routers/__init__.py +2 -1
  21. kodit/infrastructure/api/v1/routers/queue.py +76 -0
  22. kodit/infrastructure/api/v1/schemas/queue.py +35 -0
  23. kodit/infrastructure/cloning/git/working_copy.py +36 -7
  24. kodit/infrastructure/embedding/embedding_factory.py +18 -19
  25. kodit/infrastructure/embedding/embedding_providers/litellm_embedding_provider.py +156 -0
  26. kodit/infrastructure/enrichment/enrichment_factory.py +7 -16
  27. kodit/infrastructure/enrichment/{openai_enrichment_provider.py → litellm_enrichment_provider.py} +70 -60
  28. kodit/infrastructure/git/git_utils.py +9 -2
  29. kodit/infrastructure/mappers/index_mapper.py +1 -0
  30. kodit/infrastructure/reporting/__init__.py +1 -0
  31. kodit/infrastructure/reporting/log_progress.py +65 -0
  32. kodit/infrastructure/reporting/tdqm_progress.py +73 -0
  33. kodit/infrastructure/sqlalchemy/embedding_repository.py +47 -68
  34. kodit/infrastructure/sqlalchemy/entities.py +28 -2
  35. kodit/infrastructure/sqlalchemy/index_repository.py +274 -236
  36. kodit/infrastructure/sqlalchemy/task_repository.py +55 -39
  37. kodit/infrastructure/sqlalchemy/unit_of_work.py +59 -0
  38. kodit/log.py +6 -0
  39. kodit/mcp.py +10 -2
  40. {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/METADATA +3 -2
  41. {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/RECORD +44 -41
  42. kodit/domain/interfaces.py +0 -27
  43. kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +0 -183
  44. kodit/infrastructure/ui/__init__.py +0 -1
  45. kodit/infrastructure/ui/progress.py +0 -170
  46. kodit/infrastructure/ui/spinner.py +0 -74
  47. kodit/reporting.py +0 -78
  48. {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/WHEEL +0 -0
  49. {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/entry_points.txt +0 -0
  50. {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,85 +1,64 @@
1
1
  """SQLAlchemy implementation of embedding repository."""
2
2
 
3
+ from collections.abc import Callable
4
+
3
5
  import numpy as np
4
6
  from sqlalchemy import select
5
7
  from sqlalchemy.ext.asyncio import AsyncSession
6
8
 
7
9
  from kodit.infrastructure.sqlalchemy.entities import Embedding, EmbeddingType
10
+ from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
8
11
 
9
12
 
10
- class SqlAlchemyEmbeddingRepository:
11
- """SQLAlchemy implementation of embedding repository."""
12
-
13
- def __init__(self, session: AsyncSession) -> None:
14
- """Initialize the SQLAlchemy embedding repository.
15
-
16
- Args:
17
- session: The SQLAlchemy async session to use for database operations
18
-
19
- """
20
- self.session = session
13
+ def create_embedding_repository(
14
+ session_factory: Callable[[], AsyncSession],
15
+ ) -> "SqlAlchemyEmbeddingRepository":
16
+ """Create an embedding repository."""
17
+ uow = SqlAlchemyUnitOfWork(session_factory=session_factory)
18
+ return SqlAlchemyEmbeddingRepository(uow)
21
19
 
22
- async def create_embedding(self, embedding: Embedding) -> Embedding:
23
- """Create a new embedding record in the database.
24
20
 
25
- Args:
26
- embedding: The Embedding instance to create
21
+ class SqlAlchemyEmbeddingRepository:
22
+ """SQLAlchemy implementation of embedding repository."""
27
23
 
28
- Returns:
29
- The created Embedding instance
24
+ def __init__(self, uow: SqlAlchemyUnitOfWork) -> None:
25
+ """Initialize the SQLAlchemy embedding repository."""
26
+ self.uow = uow
30
27
 
31
- """
32
- self.session.add(embedding)
33
- return embedding
28
+ async def create_embedding(self, embedding: Embedding) -> None:
29
+ """Create a new embedding record in the database."""
30
+ async with self.uow:
31
+ self.uow.session.add(embedding)
34
32
 
35
33
  async def get_embedding_by_snippet_id_and_type(
36
34
  self, snippet_id: int, embedding_type: EmbeddingType
37
35
  ) -> Embedding | None:
38
- """Get an embedding by its snippet ID and type.
39
-
40
- Args:
41
- snippet_id: The ID of the snippet to get the embedding for
42
- embedding_type: The type of embedding to get
43
-
44
- Returns:
45
- The Embedding instance if found, None otherwise
46
-
47
- """
48
- query = select(Embedding).where(
49
- Embedding.snippet_id == snippet_id,
50
- Embedding.type == embedding_type,
51
- )
52
- result = await self.session.execute(query)
53
- return result.scalar_one_or_none()
36
+ """Get an embedding by its snippet ID and type."""
37
+ async with self.uow:
38
+ query = select(Embedding).where(
39
+ Embedding.snippet_id == snippet_id,
40
+ Embedding.type == embedding_type,
41
+ )
42
+ result = await self.uow.session.execute(query)
43
+ return result.scalar_one_or_none()
54
44
 
55
45
  async def list_embeddings_by_type(
56
46
  self, embedding_type: EmbeddingType
57
47
  ) -> list[Embedding]:
58
- """List all embeddings of a given type.
59
-
60
- Args:
61
- embedding_type: The type of embeddings to list
62
-
63
- Returns:
64
- A list of Embedding instances
65
-
66
- """
67
- query = select(Embedding).where(Embedding.type == embedding_type)
68
- result = await self.session.execute(query)
69
- return list(result.scalars())
48
+ """List all embeddings of a given type."""
49
+ async with self.uow:
50
+ query = select(Embedding).where(Embedding.type == embedding_type)
51
+ result = await self.uow.session.execute(query)
52
+ return list(result.scalars())
70
53
 
71
54
  async def delete_embeddings_by_snippet_id(self, snippet_id: int) -> None:
72
- """Delete all embeddings for a snippet.
73
-
74
- Args:
75
- snippet_id: The ID of the snippet to delete embeddings for
76
-
77
- """
78
- query = select(Embedding).where(Embedding.snippet_id == snippet_id)
79
- result = await self.session.execute(query)
80
- embeddings = result.scalars().all()
81
- for embedding in embeddings:
82
- await self.session.delete(embedding)
55
+ """Delete all embeddings for a snippet."""
56
+ async with self.uow:
57
+ query = select(Embedding).where(Embedding.snippet_id == snippet_id)
58
+ result = await self.uow.session.execute(query)
59
+ embeddings = result.scalars().all()
60
+ for embedding in embeddings:
61
+ await self.uow.session.delete(embedding)
83
62
 
84
63
  async def list_semantic_results(
85
64
  self,
@@ -130,17 +109,17 @@ class SqlAlchemyEmbeddingRepository:
130
109
  List of (snippet_id, embedding) tuples
131
110
 
132
111
  """
133
- # Only select the fields we need and use a more efficient query
134
- query = select(Embedding.snippet_id, Embedding.embedding).where(
135
- Embedding.type == embedding_type
136
- )
112
+ async with self.uow:
113
+ query = select(Embedding.snippet_id, Embedding.embedding).where(
114
+ Embedding.type == embedding_type
115
+ )
137
116
 
138
- # Add snippet_ids filter if provided
139
- if snippet_ids is not None:
140
- query = query.where(Embedding.snippet_id.in_(snippet_ids))
117
+ # Add snippet_ids filter if provided
118
+ if snippet_ids is not None:
119
+ query = query.where(Embedding.snippet_id.in_(snippet_ids))
141
120
 
142
- rows = await self.session.execute(query)
143
- return [tuple(row) for row in rows.all()] # Convert Row objects to tuples
121
+ rows = await self.uow.session.execute(query)
122
+ return [tuple(row) for row in rows.all()] # Convert Row objects to tuples
144
123
 
145
124
  def _prepare_vectors(
146
125
  self, embeddings: list[tuple[int, list[float]]], query_embedding: list[float]
@@ -2,6 +2,7 @@
2
2
 
3
3
  from datetime import UTC, datetime
4
4
  from enum import Enum
5
+ from typing import Any
5
6
 
6
7
  from git import Actor
7
8
  from sqlalchemy import (
@@ -9,6 +10,7 @@ from sqlalchemy import (
9
10
  ForeignKey,
10
11
  Integer,
11
12
  String,
13
+ TypeDecorator,
12
14
  UnicodeText,
13
15
  UniqueConstraint,
14
16
  )
@@ -18,6 +20,29 @@ from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
18
20
  from sqlalchemy.types import JSON
19
21
 
20
22
 
23
+ # See <https://docs.sqlalchemy.org/en/20/core/custom_types.html#store-timezone-aware-timestamps-as-timezone-naive-utc>
24
+ # And [this issue](https://github.com/sqlalchemy/sqlalchemy/issues/1985)
25
+ class TZDateTime(TypeDecorator):
26
+ """Timezone-aware datetime type."""
27
+
28
+ impl = DateTime
29
+ cache_ok = True
30
+
31
+ def process_bind_param(self, value: Any, dialect: Any) -> Any: # noqa: ARG002
32
+ """Process bind param."""
33
+ if value is not None:
34
+ if not value.tzinfo or value.tzinfo.utcoffset(value) is None:
35
+ raise TypeError("tzinfo is required")
36
+ value = value.astimezone(UTC).replace(tzinfo=None)
37
+ return value
38
+
39
+ def process_result_value(self, value: Any, dialect: Any) -> Any: # noqa: ARG002
40
+ """Process result value."""
41
+ if value is not None:
42
+ value = value.replace(tzinfo=UTC)
43
+ return value
44
+
45
+
21
46
  class Base(AsyncAttrs, DeclarativeBase):
22
47
  """Base class for all models."""
23
48
 
@@ -27,10 +52,11 @@ class CommonMixin:
27
52
 
28
53
  id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
29
54
  created_at: Mapped[datetime] = mapped_column(
30
- DateTime(timezone=True), default=lambda: datetime.now(UTC)
55
+ TZDateTime, nullable=False, default=lambda: datetime.now(UTC)
31
56
  )
32
57
  updated_at: Mapped[datetime] = mapped_column(
33
- DateTime(timezone=True),
58
+ TZDateTime,
59
+ nullable=False,
34
60
  default=lambda: datetime.now(UTC),
35
61
  onupdate=lambda: datetime.now(UTC),
36
62
  )