kodit 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (50) hide show
  1. kodit/_version.py +16 -3
  2. kodit/app.py +10 -3
  3. kodit/application/factories/code_indexing_factory.py +54 -7
  4. kodit/application/factories/reporting_factory.py +27 -0
  5. kodit/application/services/auto_indexing_service.py +16 -4
  6. kodit/application/services/code_indexing_application_service.py +115 -133
  7. kodit/application/services/indexing_worker_service.py +18 -20
  8. kodit/application/services/queue_service.py +15 -12
  9. kodit/application/services/reporting.py +86 -0
  10. kodit/application/services/sync_scheduler.py +21 -20
  11. kodit/cli.py +14 -18
  12. kodit/config.py +35 -17
  13. kodit/database.py +2 -1
  14. kodit/domain/protocols.py +9 -1
  15. kodit/domain/services/bm25_service.py +1 -6
  16. kodit/domain/services/index_service.py +22 -58
  17. kodit/domain/value_objects.py +57 -9
  18. kodit/infrastructure/api/v1/__init__.py +2 -2
  19. kodit/infrastructure/api/v1/dependencies.py +23 -10
  20. kodit/infrastructure/api/v1/routers/__init__.py +2 -1
  21. kodit/infrastructure/api/v1/routers/queue.py +76 -0
  22. kodit/infrastructure/api/v1/schemas/queue.py +35 -0
  23. kodit/infrastructure/cloning/git/working_copy.py +36 -7
  24. kodit/infrastructure/embedding/embedding_factory.py +18 -19
  25. kodit/infrastructure/embedding/embedding_providers/litellm_embedding_provider.py +156 -0
  26. kodit/infrastructure/enrichment/enrichment_factory.py +7 -16
  27. kodit/infrastructure/enrichment/{openai_enrichment_provider.py → litellm_enrichment_provider.py} +70 -60
  28. kodit/infrastructure/git/git_utils.py +9 -2
  29. kodit/infrastructure/mappers/index_mapper.py +1 -0
  30. kodit/infrastructure/reporting/__init__.py +1 -0
  31. kodit/infrastructure/reporting/log_progress.py +65 -0
  32. kodit/infrastructure/reporting/tdqm_progress.py +73 -0
  33. kodit/infrastructure/sqlalchemy/embedding_repository.py +47 -68
  34. kodit/infrastructure/sqlalchemy/entities.py +28 -2
  35. kodit/infrastructure/sqlalchemy/index_repository.py +274 -236
  36. kodit/infrastructure/sqlalchemy/task_repository.py +55 -39
  37. kodit/infrastructure/sqlalchemy/unit_of_work.py +59 -0
  38. kodit/log.py +6 -0
  39. kodit/mcp.py +10 -2
  40. {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/METADATA +3 -2
  41. {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/RECORD +44 -41
  42. kodit/domain/interfaces.py +0 -27
  43. kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +0 -183
  44. kodit/infrastructure/ui/__init__.py +0 -1
  45. kodit/infrastructure/ui/progress.py +0 -170
  46. kodit/infrastructure/ui/spinner.py +0 -74
  47. kodit/reporting.py +0 -78
  48. {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/WHEEL +0 -0
  49. {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/entry_points.txt +0 -0
  50. {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,5 +1,7 @@
1
1
  """Task repository for the task queue."""
2
2
 
3
+ from collections.abc import Callable
4
+
3
5
  import structlog
4
6
  from sqlalchemy import select
5
7
  from sqlalchemy.ext.asyncio import AsyncSession
@@ -9,14 +11,23 @@ from kodit.domain.protocols import TaskRepository
9
11
  from kodit.domain.value_objects import TaskType
10
12
  from kodit.infrastructure.mappers.task_mapper import TaskMapper, TaskTypeMapper
11
13
  from kodit.infrastructure.sqlalchemy import entities as db_entities
14
+ from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
15
+
16
+
17
+ def create_task_repository(
18
+ session_factory: Callable[[], AsyncSession],
19
+ ) -> TaskRepository:
20
+ """Create an index repository."""
21
+ uow = SqlAlchemyUnitOfWork(session_factory=session_factory)
22
+ return SqlAlchemyTaskRepository(uow)
12
23
 
13
24
 
14
25
  class SqlAlchemyTaskRepository(TaskRepository):
15
26
  """Repository for task persistence using the existing Task entity."""
16
27
 
17
- def __init__(self, session: AsyncSession) -> None:
28
+ def __init__(self, uow: SqlAlchemyUnitOfWork) -> None:
18
29
  """Initialize the repository."""
19
- self.session = session
30
+ self.uow = uow
20
31
  self.log = structlog.get_logger(__name__)
21
32
 
22
33
  async def add(
@@ -24,58 +35,63 @@ class SqlAlchemyTaskRepository(TaskRepository):
24
35
  task: Task,
25
36
  ) -> None:
26
37
  """Create a new task in the database."""
27
- self.session.add(TaskMapper.from_domain_task(task))
38
+ async with self.uow:
39
+ self.uow.session.add(TaskMapper.from_domain_task(task))
28
40
 
29
41
  async def get(self, task_id: str) -> Task | None:
30
42
  """Get a task by ID."""
31
- stmt = select(db_entities.Task).where(db_entities.Task.dedup_key == task_id)
32
- result = await self.session.execute(stmt)
33
- db_task = result.scalar_one_or_none()
34
- if not db_task:
35
- return None
36
- return TaskMapper.to_domain_task(db_task)
43
+ async with self.uow:
44
+ stmt = select(db_entities.Task).where(db_entities.Task.dedup_key == task_id)
45
+ result = await self.uow.session.execute(stmt)
46
+ db_task = result.scalar_one_or_none()
47
+ if not db_task:
48
+ return None
49
+ return TaskMapper.to_domain_task(db_task)
37
50
 
38
51
  async def take(self) -> Task | None:
39
52
  """Take a task for processing and remove it from the database."""
40
- stmt = (
41
- select(db_entities.Task)
42
- .order_by(db_entities.Task.priority.desc(), db_entities.Task.created_at)
43
- .limit(1)
44
- )
45
- result = await self.session.execute(stmt)
46
- db_task = result.scalar_one_or_none()
47
- if not db_task:
48
- return None
49
- await self.session.delete(db_task)
50
- return TaskMapper.to_domain_task(db_task)
53
+ async with self.uow:
54
+ stmt = (
55
+ select(db_entities.Task)
56
+ .order_by(db_entities.Task.priority.desc(), db_entities.Task.created_at)
57
+ .limit(1)
58
+ )
59
+ result = await self.uow.session.execute(stmt)
60
+ db_task = result.scalar_one_or_none()
61
+ if not db_task:
62
+ return None
63
+ await self.uow.session.delete(db_task)
64
+ return TaskMapper.to_domain_task(db_task)
51
65
 
52
66
  async def update(self, task: Task) -> None:
53
67
  """Update a task in the database."""
54
- stmt = select(db_entities.Task).where(db_entities.Task.dedup_key == task.id)
55
- result = await self.session.execute(stmt)
56
- db_task = result.scalar_one_or_none()
68
+ async with self.uow:
69
+ stmt = select(db_entities.Task).where(db_entities.Task.dedup_key == task.id)
70
+ result = await self.uow.session.execute(stmt)
71
+ db_task = result.scalar_one_or_none()
57
72
 
58
- if not db_task:
59
- raise ValueError(f"Task not found: {task.id}")
73
+ if not db_task:
74
+ raise ValueError(f"Task not found: {task.id}")
60
75
 
61
- db_task.priority = task.priority
62
- db_task.payload = task.payload
76
+ db_task.priority = task.priority
77
+ db_task.payload = task.payload
63
78
 
64
79
  async def list(self, task_type: TaskType | None = None) -> list[Task]:
65
80
  """List tasks with optional status filter."""
66
- stmt = select(db_entities.Task)
81
+ async with self.uow:
82
+ stmt = select(db_entities.Task)
67
83
 
68
- if task_type:
69
- stmt = stmt.where(
70
- db_entities.Task.type == TaskTypeMapper.from_domain_type(task_type)
71
- )
84
+ if task_type:
85
+ stmt = stmt.where(
86
+ db_entities.Task.type == TaskTypeMapper.from_domain_type(task_type)
87
+ )
72
88
 
73
- stmt = stmt.order_by(
74
- db_entities.Task.priority.desc(), db_entities.Task.created_at
75
- )
89
+ stmt = stmt.order_by(
90
+ db_entities.Task.priority.desc(), db_entities.Task.created_at
91
+ )
76
92
 
77
- result = await self.session.execute(stmt)
78
- records = result.scalars().all()
93
+ result = await self.uow.session.execute(stmt)
94
+ records = result.scalars().all()
79
95
 
80
- # Convert to domain entities
81
- return [TaskMapper.to_domain_task(record) for record in records]
96
+ # Convert to domain entities
97
+ return [TaskMapper.to_domain_task(record) for record in records]
@@ -0,0 +1,59 @@
1
+ """SQLAlchemy implementation of Unit of Work pattern."""
2
+
3
+ from collections.abc import Callable
4
+ from types import TracebackType
5
+
6
+ from sqlalchemy.ext.asyncio import AsyncSession
7
+
8
+
9
+ class SqlAlchemyUnitOfWork:
10
+ """SQLAlchemy implementation of Unit of Work pattern."""
11
+
12
+ def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
13
+ """Initialize the unit of work with a session factory."""
14
+ self._session_factory = session_factory
15
+ self._session: AsyncSession | None = None
16
+
17
+ @property
18
+ def session(self) -> AsyncSession:
19
+ """Get the current session."""
20
+ if self._session is None:
21
+ raise RuntimeError("UnitOfWork must be used within async context")
22
+ return self._session
23
+
24
+ async def __aenter__(self) -> "SqlAlchemyUnitOfWork":
25
+ """Enter the unit of work context."""
26
+ self._session = self._session_factory()
27
+ return self
28
+
29
+ async def __aexit__(
30
+ self,
31
+ exc_type: type[BaseException] | None,
32
+ exc_val: BaseException | None,
33
+ exc_tb: TracebackType | None,
34
+ ) -> None:
35
+ """Exit the unit of work context."""
36
+ if self._session:
37
+ if exc_type is not None:
38
+ await self._session.rollback()
39
+ await self._session.commit()
40
+ await self._session.close()
41
+ self._session = None
42
+
43
+ async def commit(self) -> None:
44
+ """Commit the current transaction."""
45
+ if self._session is None:
46
+ raise RuntimeError("UnitOfWork must be used within async context")
47
+ await self._session.commit()
48
+
49
+ async def rollback(self) -> None:
50
+ """Rollback the current transaction."""
51
+ if self._session is None:
52
+ raise RuntimeError("UnitOfWork must be used within async context")
53
+ await self._session.rollback()
54
+
55
+ async def flush(self) -> None:
56
+ """Flush pending changes to the database without committing."""
57
+ if self._session is None:
58
+ raise RuntimeError("UnitOfWork must be used within async context")
59
+ await self._session.flush()
kodit/log.py CHANGED
@@ -11,6 +11,7 @@ from functools import lru_cache
11
11
  from pathlib import Path
12
12
  from typing import Any
13
13
 
14
+ import litellm
14
15
  import rudderstack.analytics as rudder_analytics # type: ignore[import-untyped]
15
16
  import structlog
16
17
  from structlog.types import EventDict
@@ -99,6 +100,7 @@ def configure_logging(app_context: AppContext) -> None:
99
100
  "bm25s",
100
101
  "sentence_transformers.SentenceTransformer",
101
102
  "httpx",
103
+ "LiteLLM",
102
104
  ]:
103
105
  if root_logger.getEffectiveLevel() == logging.DEBUG:
104
106
  logging.getLogger(_log).handlers.clear()
@@ -106,6 +108,9 @@ def configure_logging(app_context: AppContext) -> None:
106
108
  else:
107
109
  logging.getLogger(_log).disabled = True
108
110
 
111
+ # More litellm logging cruft
112
+ litellm.suppress_debug_info = True
113
+
109
114
  # Configure SQLAlchemy loggers to use our structlog setup
110
115
  for _log in ["sqlalchemy.engine", "alembic"]:
111
116
  engine_logger = logging.getLogger(_log)
@@ -138,6 +143,7 @@ def configure_logging(app_context: AppContext) -> None:
138
143
 
139
144
  def configure_telemetry(app_context: AppContext) -> None:
140
145
  """Configure telemetry for the application."""
146
+ litellm.telemetry = False # Disable litellm telemetry by default
141
147
  if app_context.disable_telemetry:
142
148
  structlog.stdlib.get_logger(__name__).info("Telemetry has been disabled")
143
149
  rudder_analytics.send = False
kodit/mcp.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """MCP server for kodit."""
2
2
 
3
- from collections.abc import AsyncIterator
3
+ from collections.abc import AsyncIterator, Callable
4
4
  from contextlib import asynccontextmanager
5
5
  from dataclasses import dataclass
6
6
  from pathlib import Path
@@ -15,6 +15,7 @@ from kodit._version import version
15
15
  from kodit.application.factories.code_indexing_factory import (
16
16
  create_code_indexing_application_service,
17
17
  )
18
+ from kodit.application.factories.reporting_factory import create_server_operation
18
19
  from kodit.config import AppContext
19
20
  from kodit.database import Database
20
21
  from kodit.domain.value_objects import (
@@ -32,6 +33,7 @@ class MCPContext:
32
33
  """Context for the MCP server."""
33
34
 
34
35
  session: AsyncSession
36
+ session_factory: Callable[[], AsyncSession]
35
37
  app_context: AppContext
36
38
 
37
39
 
@@ -55,7 +57,11 @@ async def mcp_lifespan(_: FastMCP) -> AsyncIterator[MCPContext]:
55
57
  if _mcp_db is None:
56
58
  _mcp_db = await app_context.get_db()
57
59
  async with _mcp_db.session_factory() as session:
58
- yield MCPContext(session=session, app_context=app_context)
60
+ yield MCPContext(
61
+ session=session,
62
+ app_context=app_context,
63
+ session_factory=_mcp_db.session_factory,
64
+ )
59
65
 
60
66
 
61
67
  def create_mcp_server(name: str, instructions: str | None = None) -> FastMCP:
@@ -174,6 +180,8 @@ def register_mcp_tools(mcp_server: FastMCP) -> None:
174
180
  service = create_code_indexing_application_service(
175
181
  app_context=mcp_context.app_context,
176
182
  session=mcp_context.session,
183
+ session_factory=mcp_context.session_factory,
184
+ operation=create_server_operation(),
177
185
  )
178
186
 
179
187
  log.debug("Searching for snippets")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kodit
3
- Version: 0.4.0
3
+ Version: 0.4.2
4
4
  Summary: Code indexing for better AI code generation
5
5
  Project-URL: Homepage, https://docs.helixml.tech/kodit/
6
6
  Project-URL: Documentation, https://docs.helixml.tech/kodit/
@@ -35,7 +35,8 @@ Requires-Dist: gitpython>=3.1.44
35
35
  Requires-Dist: hf-xet>=1.1.2
36
36
  Requires-Dist: httpx-retries>=0.3.2
37
37
  Requires-Dist: httpx>=0.28.1
38
- Requires-Dist: openai>=1.82.0
38
+ Requires-Dist: litellm>=1.75.8
39
+ Requires-Dist: openai==1.99.9
39
40
  Requires-Dist: pathspec>=0.12.1
40
41
  Requires-Dist: pydantic-settings>=2.9.1
41
42
  Requires-Dist: pystemmer>=3.0.0
@@ -1,36 +1,36 @@
1
1
  kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
2
2
  kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
3
- kodit/_version.py,sha256=l5eo51MdCumDFCp44TFT1JH8yCDo1krag-GJubLxnVo,511
4
- kodit/app.py,sha256=r0w0JJsOacrQ5aAQ1yf-BK1CrYZPrvtUoH1LACd9FaA,4262
5
- kodit/cli.py,sha256=VUZD4cPRgAnrKEWUl2PbS-nOA0FkDVqmJ2SR0g1yJsk,28202
3
+ kodit/_version.py,sha256=A45grTqzrHuDn1CT9K5GVUbY4_Q3OSTcXAl3zdHzcEI,704
4
+ kodit/app.py,sha256=xLy0cM3fduXSQSws3wq9fWg5eJB1xD6vrMpkVFYpnhA,4468
5
+ kodit/cli.py,sha256=ugy0L9m5lVgudLebD5FpmZfJEAVAtvxbCvUTfJvU46Y,27948
6
6
  kodit/cli_utils.py,sha256=bW4rIm-elrsyM_pSGHh30zV0_oX7V-64pL3YSaBcOt0,2810
7
- kodit/config.py,sha256=YYo36lBi3auCssyCVrpw3Z3ZXSzXKTDo45nIsUjkzfs,10305
8
- kodit/database.py,sha256=kI9yBm4uunsgV4-QeVoCBL0wLzU4kYmYv5qZilGnbPE,1740
9
- kodit/log.py,sha256=XyuseZk90gUBj1B7np2UO2EW9eE_ApayIpPRvI19KCE,8651
10
- kodit/mcp.py,sha256=aEcPc8dQiZaR0AswCZZNxcm_rhhUZNsEBimYti0ibSI,7221
7
+ kodit/config.py,sha256=wKXUb06j7VbpD7ydCARd6_DNeAY5tLeJqHvhWozFhyI,11052
8
+ kodit/database.py,sha256=k93byjVUX1VjAb0hLZxUo4liEKKxAWUBJNw2e7rzaiI,1771
9
+ kodit/log.py,sha256=ZpM0eMo_DVGQqrHxg0VV6dMrN2AAmu_3C0I3G7p2nMw,8828
10
+ kodit/mcp.py,sha256=GWh9krkcP37wh8ZmvfXaGJPknhaautBxzvbMMr5FRdg,7555
11
11
  kodit/middleware.py,sha256=TiwebNpaEmiP7QRuZrfZcCL51IUefQyNLSPuzVyk8UM,2813
12
- kodit/reporting.py,sha256=icce1ZyiADsA_Qz-mSjgn2H4SSqKuGfLKnw-yrl9nsg,2722
13
12
  kodit/application/__init__.py,sha256=mH50wTpgP9dhbKztFsL8Dda9Hi18TSnMVxXtpp4aGOA,35
14
13
  kodit/application/factories/__init__.py,sha256=bU5CvEnaBePZ7JbkCOp1MGTNP752bnU2uEqmfy5FdRk,37
15
- kodit/application/factories/code_indexing_factory.py,sha256=R9f0wsj4-3NJFS5SEt_-OIGR_s_01gJXaL3PkZd8MlU,5911
14
+ kodit/application/factories/code_indexing_factory.py,sha256=4c5LS2t7FOHiNS_Xb5sPRngf3we-VbTWKa-NcjZmf0Q,7300
15
+ kodit/application/factories/reporting_factory.py,sha256=Plf3c1KIx36eM5YefU5svPr9QeaNcKFH5UlmDuET8R0,1013
16
16
  kodit/application/services/__init__.py,sha256=p5UQNw-H5sxQvs5Etfte93B3cJ1kKW6DNxK34uFvU1E,38
17
- kodit/application/services/auto_indexing_service.py,sha256=O5BNR5HypgghzUFG4ykIWMl9mxHCUExnBmJuITIhECk,3457
18
- kodit/application/services/code_indexing_application_service.py,sha256=nrnd_Md-D0AfNKku7Aqt3YHDbXsBV9f44Z6XsjhiF3E,15877
19
- kodit/application/services/indexing_worker_service.py,sha256=Un4PytnWJU4uwROcxOMUFkt4cD7nmPezaBLsEHrMN6U,5185
20
- kodit/application/services/queue_service.py,sha256=GaixRoCUaDhLYfwZLVED8C3w_NPiy_QbuVp_jhwP4GI,1727
21
- kodit/application/services/sync_scheduler.py,sha256=aLpEczZdTM8ubfAEY0Ajdh3MLfDcB9s-0ILZJrtIuZs,3504
17
+ kodit/application/services/auto_indexing_service.py,sha256=rJPWiV755eskFNKjYliPr1WMFylXlG8BWPpFcwwOhm0,3973
18
+ kodit/application/services/code_indexing_application_service.py,sha256=tLbbo-fyAc3iZoCOJU9lIfhNI_6Lz9SqQdfjeN5m8yA,16213
19
+ kodit/application/services/indexing_worker_service.py,sha256=B8MdXrzjaYVS7zVTTz8cXUQItkGb8Fk1aXeim2dfCJw,5311
20
+ kodit/application/services/queue_service.py,sha256=G42lR31maFRZ9cSvnWZrzeyb4P1R6yFqrcHWVKAqc9U,1924
21
+ kodit/application/services/reporting.py,sha256=hDisTU_XUBTfiOtnJ5-6x0jj8rHSlq9zZgeNnBT7W5Y,2834
22
+ kodit/application/services/sync_scheduler.py,sha256=FUUpDtxUh7Eg-lnzOrUHzmSWGpzdpYJQYgPhnQYwTcg,3446
22
23
  kodit/domain/__init__.py,sha256=TCpg4Xx-oF4mKV91lo4iXqMEfBT1OoRSYnbG-zVWolA,66
23
24
  kodit/domain/entities.py,sha256=QsCzKXT7gF9jTPAjJo5lqjFGRsIklAFC2qRy_Gt3RbA,10377
24
25
  kodit/domain/errors.py,sha256=yIsgCjM_yOFIg8l7l-t7jM8pgeAX4cfPq0owf7iz3DA,106
25
- kodit/domain/interfaces.py,sha256=Jkd0Ob4qSvhZHI9jRPFQ1n5Cv0SvU-y3Z-HCw2ikc4I,742
26
- kodit/domain/protocols.py,sha256=GA0CCvmhvQ3F4MseeQUVw3NeIgUoaV7V_7TdAaU70Is,2587
27
- kodit/domain/value_objects.py,sha256=dkfbg99PSCrfj6nJ7tZ2UzDG3QUgNa_Cpj2gLakDM5k,17512
26
+ kodit/domain/protocols.py,sha256=RGNOlHyvNq6Nx_95ETTO9DkzeZmjtubfC7qdGvA5iPk,2753
27
+ kodit/domain/value_objects.py,sha256=uIpAdIvq6VefEGa8yq5Uqyuyit72SHtDptnoOUd73u0,18882
28
28
  kodit/domain/services/__init__.py,sha256=Q1GhCK_PqKHYwYE4tkwDz5BIyXkJngLBBOHhzvX8nzo,42
29
- kodit/domain/services/bm25_service.py,sha256=nsfTan3XtDwXuuAu1LUv-6Jukm6qFKVqqCVymjyepZQ,3625
29
+ kodit/domain/services/bm25_service.py,sha256=seRo0V-zW6Uq-Y67j0-zp1xz93gbfQgvlEbpQeYHN1U,3529
30
30
  kodit/domain/services/embedding_service.py,sha256=7drYRC2kjg0WJmo06a2E9N0vDnwInUlBB96twjz2BT8,4526
31
31
  kodit/domain/services/enrichment_service.py,sha256=XsXg3nV-KN4rqtC7Zro_ZiZ6RSq-1eA1MG6IDzFGyBA,1316
32
32
  kodit/domain/services/index_query_service.py,sha256=cDQkgpJ3JbyeZ3z3GTIqH1JzhhKE_LBIwYE6b-lakwU,2172
33
- kodit/domain/services/index_service.py,sha256=uVwDUEQWfZ5yJRvcjaWW7P9gCZttmnlkI51IHz52eew,11554
33
+ kodit/domain/services/index_service.py,sha256=TSvM-UuOtq30hz6eNPgu9AEVFrLDugdYoBgBf1xZDcI,10377
34
34
  kodit/infrastructure/__init__.py,sha256=HzEYIjoXnkz_i_MHO2e0sIVYweUcRnl2RpyBiTbMObU,28
35
35
  kodit/infrastructure/api/__init__.py,sha256=U0TSMPpHrlj1zbAtleuZjU3nXGwudyMe-veNBgvODwM,34
36
36
  kodit/infrastructure/api/client/__init__.py,sha256=6RSYqeuxjDe_zTUq48D0F-VfBBUvDmTkO3K3vD61q3I,349
@@ -41,14 +41,16 @@ kodit/infrastructure/api/client/index_client.py,sha256=OxsakDQBEulwmqZVzwOSSI0Lk
41
41
  kodit/infrastructure/api/client/search_client.py,sha256=f4mM5ZJpAuR7w-i9yASbh4SYMxOq7_f4hXgaQesGquI,2614
42
42
  kodit/infrastructure/api/middleware/__init__.py,sha256=6m7eE5k5buboJbuzyX5E9-Tf99yNwFaeJF0f_6HwLyM,30
43
43
  kodit/infrastructure/api/middleware/auth.py,sha256=QSnMcMLWvfumqN1iG4ePj2vEZb2Dlsgr-WHptkEkkhE,1064
44
- kodit/infrastructure/api/v1/__init__.py,sha256=XYv4_9Z6fo69oMvC2mEbtD6DaMqHth29KHUOelmQFwM,121
45
- kodit/infrastructure/api/v1/dependencies.py,sha256=jaM000IfSnvU8uzwnC1cBZsfsMC-19jWFjObHfqBYuM,2475
46
- kodit/infrastructure/api/v1/routers/__init__.py,sha256=L8hT_SkDzmCXIiWrFQWCkZXQ3UDy_ZMxPr8AIhjSWK0,160
44
+ kodit/infrastructure/api/v1/__init__.py,sha256=hQ03es21FSgzQlmdP5xWZzK80woIvuYGjiZLwFYuYwk,151
45
+ kodit/infrastructure/api/v1/dependencies.py,sha256=MBmCpTtwDAtdsLjJ06Bzod3Vwqon8mMASknZobdoaMU,2919
46
+ kodit/infrastructure/api/v1/routers/__init__.py,sha256=YYyeiuyphIPc-Q_2totF8zfR0BoseOH4ZYFdHP0ed_M,218
47
47
  kodit/infrastructure/api/v1/routers/indexes.py,sha256=_lUir1M0SW6kPHeGqjiPjtSa50rY4PN2es5TZEpSHYE,3442
48
+ kodit/infrastructure/api/v1/routers/queue.py,sha256=EZbR-G0qDO9W5ajV_75GRk2pW1Qdgc0ggOwrGKlBE2A,2138
48
49
  kodit/infrastructure/api/v1/routers/search.py,sha256=da9YTR6VTzU85_6X3aaZemdTHGCEvcPNeKuMFBgmT_A,2452
49
50
  kodit/infrastructure/api/v1/schemas/__init__.py,sha256=_5BVqv4EUi_vvWlAQOE_VfRulUDAF21ZQ7z27y7YOdw,498
50
51
  kodit/infrastructure/api/v1/schemas/context.py,sha256=NlsIn9j1R3se7JkGZivS_CUN4gGP5NYaAtkRe3QH6dk,214
51
52
  kodit/infrastructure/api/v1/schemas/index.py,sha256=NtL09YtO50h-ddpAFxNf-dyxu_Xi5v3yOpKW0W4xsAM,1950
53
+ kodit/infrastructure/api/v1/schemas/queue.py,sha256=oa4wumWOvGzi53Q3cjwIrQJRoentp5nsQSsaj-l-B4U,652
52
54
  kodit/infrastructure/api/v1/schemas/search.py,sha256=CWzg5SIMUJ_4yM-ZfgSLWCanMxov6AyGgQQcOMkRlGw,5618
53
55
  kodit/infrastructure/bm25/__init__.py,sha256=DmGbrEO34FOJy4e685BbyxLA7gPW1eqs2gAxsp6JOuM,34
54
56
  kodit/infrastructure/bm25/bm25_factory.py,sha256=I4eo7qRslnyXIRkBf-StZ5ga2Evrr5J5YFocTChFD3g,884
@@ -57,42 +59,43 @@ kodit/infrastructure/bm25/vectorchord_bm25_repository.py,sha256=p6ht5K-jlDTvEkmo
57
59
  kodit/infrastructure/cloning/__init__.py,sha256=IzIvX-yeRRFZ-lfvPVSEe_qXszO6DGQdjKwwDigexyQ,30
58
60
  kodit/infrastructure/cloning/metadata.py,sha256=GD2UnCC1oR82RD0SVUqk9CJOqzXPxhOAHVOp7jqN6Qc,3571
59
61
  kodit/infrastructure/cloning/git/__init__.py,sha256=20ePcp0qE6BuLsjsv4KYB1DzKhMIMsPXwEqIEZtjTJs,34
60
- kodit/infrastructure/cloning/git/working_copy.py,sha256=qYcrR5qP1rhWZiYGMT1p-1Alavi_YvQLXx4MgIV7eXs,2611
62
+ kodit/infrastructure/cloning/git/working_copy.py,sha256=Lt_NWSoQ1pZAi0u_MKUhrwGeul4XWf3zqCuzG3dn70s,3608
61
63
  kodit/infrastructure/embedding/__init__.py,sha256=F-8nLlWAerYJ0MOIA4tbXHLan8bW5rRR84vzxx6tRKI,39
62
- kodit/infrastructure/embedding/embedding_factory.py,sha256=8LC2jKf2vx-P-TCh8ZanxwF3hT5PSjWA3vuSR6ggcXk,3731
64
+ kodit/infrastructure/embedding/embedding_factory.py,sha256=BNhrrYQAkcnXkuuQy-Q-lwJhyoGONsTsbgN4t0UdGeY,3395
63
65
  kodit/infrastructure/embedding/local_vector_search_repository.py,sha256=ExweyNEL5cP-g3eDhGqZSih7zhdOrop2WdFPPJL-tB4,3505
64
66
  kodit/infrastructure/embedding/vectorchord_vector_search_repository.py,sha256=PIoU0HsDlaoXDXnGjOR0LAkAcW4JiE3ymJy_SBhEopc,8030
65
67
  kodit/infrastructure/embedding/embedding_providers/__init__.py,sha256=qeZ-oAIAxMl5QqebGtO1lq-tHjl_ucAwOXePklcwwGk,34
66
68
  kodit/infrastructure/embedding/embedding_providers/batching.py,sha256=a8CL9PX2VLmbeg616fc_lQzfC4BWTVn32m4SEhXpHxc,3279
67
69
  kodit/infrastructure/embedding/embedding_providers/hash_embedding_provider.py,sha256=V6OdCuWyQQOvo3OJGRi-gBKDApIcrELydFg7T696P5s,2257
70
+ kodit/infrastructure/embedding/embedding_providers/litellm_embedding_provider.py,sha256=9Q5he_MI8xXENODwCvYCbhVawTjTv1bArGQrmxoWLas,5297
68
71
  kodit/infrastructure/embedding/embedding_providers/local_embedding_provider.py,sha256=9aLV1Zg4KMhYWlGRwgAUtswW4aIabNqbsipWhAn64RI,4133
69
- kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py,sha256=CE86s8IicieUjIDWn2xzswteHXCzmw1Qz6Kp4GBIcus,6316
70
72
  kodit/infrastructure/enrichment/__init__.py,sha256=8acZKNzql8Fs0lceFu9U3KoUrOptRBtVIxr_Iw6lz3Y,40
71
- kodit/infrastructure/enrichment/enrichment_factory.py,sha256=jZWGgAvFjEuRUc1oW3iGhgipvX-EnVJZpw6ybzp9NGM,2016
73
+ kodit/infrastructure/enrichment/enrichment_factory.py,sha256=NFGY6u9SJ_GOgiB_RtotbQmte0kGFQUymwzZCbbsx34,1530
74
+ kodit/infrastructure/enrichment/litellm_enrichment_provider.py,sha256=AM4-4KApDndzWzQzzKAedy21iGMhkwylR5VCmV9K-uI,6040
72
75
  kodit/infrastructure/enrichment/local_enrichment_provider.py,sha256=aVU3_kbLJ0BihwGIwvJ00DBe0voHkiKdFSjPxxkVfVA,4150
73
76
  kodit/infrastructure/enrichment/null_enrichment_provider.py,sha256=DhZkJBnkvXg_XSAs-oKiFnKqYFPnmTl3ikdxrqeEfbc,713
74
- kodit/infrastructure/enrichment/openai_enrichment_provider.py,sha256=C0y0NEPu1GpFr22TGi1voxYGsYTV0ZITYuDzvRJ5vW4,5573
75
77
  kodit/infrastructure/enrichment/utils.py,sha256=FE9UCuxxzSdoHrmAC8Si2b5D6Nf6kVqgM1yjUVyCvW0,930
76
78
  kodit/infrastructure/git/__init__.py,sha256=0iMosFzudj4_xNIMe2SRbV6l5bWqkjnUsZoFsoZFuM8,33
77
- kodit/infrastructure/git/git_utils.py,sha256=KERwmhWDR4ooMQKS-nSPxjvdCzoWF9NS6nhdeXyzdtY,571
79
+ kodit/infrastructure/git/git_utils.py,sha256=5lH94AcF7Hac4h6kBzo_B9pzC1S6AK2-Dy13gz--Zf0,781
78
80
  kodit/infrastructure/ignore/__init__.py,sha256=VzFv8XOzHmsu0MEGnWVSF6KsgqLBmvHlRqAkT1Xb1MY,36
79
81
  kodit/infrastructure/ignore/ignore_pattern_provider.py,sha256=zdxun3GodLfXxyssBK8QDUK58xb4fBJ0SKcHUyn3pzM,2131
80
82
  kodit/infrastructure/indexing/__init__.py,sha256=7UPRa2jwCAsa0Orsp6PqXSF8iIXJVzXHMFmrKkI9yH8,38
81
83
  kodit/infrastructure/indexing/fusion_service.py,sha256=2B0guBsuKz19uWcs18sIJpUJPzXoRvULgl7UNWQGysA,1809
82
84
  kodit/infrastructure/mappers/__init__.py,sha256=QPHOjNreXmBPPovZ6elnYFS0vD-IsmrGl4TT01FCKro,77
83
- kodit/infrastructure/mappers/index_mapper.py,sha256=ZSfu8kjTaa8_UY0nTqr4b02NS3VrjqZYkduCN71AL2g,12743
85
+ kodit/infrastructure/mappers/index_mapper.py,sha256=XWtv_him2Sd9dR-Jy_ndy9jYXVtv3LttzmmUGzNK6CE,12825
84
86
  kodit/infrastructure/mappers/task_mapper.py,sha256=QW7uL8rji6QJ7RRdHwbvkWqmwDcUDGTYPLwbwiKlViY,2919
87
+ kodit/infrastructure/reporting/__init__.py,sha256=4Qu38YbDOaeDqLdT_CbK8tOZHTKGrHRXncVKlGRzOeQ,32
88
+ kodit/infrastructure/reporting/log_progress.py,sha256=sNF0oeg56NaTfO3DVg1AXQWwgrHSTaZrOWqPDq-FhVE,2180
89
+ kodit/infrastructure/reporting/tdqm_progress.py,sha256=g01P7PItQqqSXzM5jjXL6uOUIKJQ6O9zaO1WZZ7XKSM,2512
85
90
  kodit/infrastructure/slicing/__init__.py,sha256=x7cjvHA9Ay2weUYE_dpdAaPaStp20M-4U2b5MLgT5KM,37
86
91
  kodit/infrastructure/slicing/language_detection_service.py,sha256=JGJXrq9bLyfnisWJXeP7y1jbZMmKAISdPBlRBCosUcE,684
87
92
  kodit/infrastructure/slicing/slicer.py,sha256=GOqJykd00waOTO1WJHyE5KUgJ2RLx2rOQ7M7T_u5LLg,35600
88
93
  kodit/infrastructure/sqlalchemy/__init__.py,sha256=UXPMSF_hgWaqr86cawRVqM8XdVNumQyyK5B8B97GnlA,33
89
- kodit/infrastructure/sqlalchemy/embedding_repository.py,sha256=dC2Wzj_zQiWExwfScE1LAGiiyxPyg0YepwyLOgDwcs4,7905
90
- kodit/infrastructure/sqlalchemy/entities.py,sha256=2iG_2NoKS26rJXimLFL8whRqFsUvKNGFAXCkQYK5GtE,6951
91
- kodit/infrastructure/sqlalchemy/index_repository.py,sha256=QQNsyLBI09YLUPLguB9qvqPZMxtg1p2twpm7sO_gNlo,23598
92
- kodit/infrastructure/sqlalchemy/task_repository.py,sha256=yazMO6Kw0Pb2b3L8wlGKOFA0QuMFcWBUXYFGZdtZo0w,2874
93
- kodit/infrastructure/ui/__init__.py,sha256=CzbLOBwIZ6B6iAHEd1L8cIBydCj-n_kobxJAhz2I9_Y,32
94
- kodit/infrastructure/ui/progress.py,sha256=SHEUoQA_x36z4nqHrQduVrrWIvFfX6QxAawC7zQ50pw,6433
95
- kodit/infrastructure/ui/spinner.py,sha256=GcP115qtR0VEnGfMEtsGoAUpRzVGUSfiUXfoJJERngA,2357
94
+ kodit/infrastructure/sqlalchemy/embedding_repository.py,sha256=YYxbUEdzDdlKdy0FyAP4EzhJMAIdEnNZiXT6hzPHk9I,7731
95
+ kodit/infrastructure/sqlalchemy/entities.py,sha256=P3BitWqnTxMVXmyez7OX-SB3-UG66XorqvPMjXspwoM,7894
96
+ kodit/infrastructure/sqlalchemy/index_repository.py,sha256=x8MPl0j7GrW_lEZh464EZyb0w935p_EHv2NIMNxjJu0,25680
97
+ kodit/infrastructure/sqlalchemy/task_repository.py,sha256=60ECbxiXC2_UR80f4uPSmJiP_so7PTBzZG_w1WXSiuE,3546
98
+ kodit/infrastructure/sqlalchemy/unit_of_work.py,sha256=gK-C8yk2HYBrAEDrblWxBrldrGb83SBHn-8lURkFeMg,2093
96
99
  kodit/migrations/README,sha256=ISVtAOvqvKk_5ThM5ioJE-lMkvf9IbknFUFVU_vPma4,58
97
100
  kodit/migrations/__init__.py,sha256=lP5MuwlyWRMO6UcDWnQcQ3G-GYHcFb6rl9gYPHJ1sjo,40
98
101
  kodit/migrations/env.py,sha256=m57TkFLYjQ4w2aw1YICXkeek27M6qjwRDMHvThWqIL0,2383
@@ -109,8 +112,8 @@ kodit/utils/__init__.py,sha256=DPEB1i8evnLF4Ns3huuAYg-0pKBFKUFuiDzOKG9r-sw,33
109
112
  kodit/utils/dump_openapi.py,sha256=29VdjHpNSaGAg7RjQw0meq1OLhljCx1ElgBlTC8xoF4,1247
110
113
  kodit/utils/generate_api_paths.py,sha256=TMtx9v55podDfUmiWaHgJHLtEWLV2sLL-5ejGFMPzAo,3569
111
114
  kodit/utils/path_utils.py,sha256=thK6YGGNvQThdBaCYCCeCvS1L8x-lwl3AoGht2jnjGw,1645
112
- kodit-0.4.0.dist-info/METADATA,sha256=bwViEYL092ciMPsday_wvQgIEheDQ1frFv26RxsTxcI,7671
113
- kodit-0.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
114
- kodit-0.4.0.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
115
- kodit-0.4.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
116
- kodit-0.4.0.dist-info/RECORD,,
115
+ kodit-0.4.2.dist-info/METADATA,sha256=bC5eza2ORs3v3w5-bwW1uybuk8b4JdNI13GZQvrP4ps,7702
116
+ kodit-0.4.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
117
+ kodit-0.4.2.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
118
+ kodit-0.4.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
119
+ kodit-0.4.2.dist-info/RECORD,,
@@ -1,27 +0,0 @@
1
- """Domain interfaces."""
2
-
3
- from abc import ABC, abstractmethod
4
-
5
- from kodit.domain.value_objects import ProgressEvent
6
-
7
-
8
- class ProgressCallback(ABC):
9
- """Abstract interface for progress callbacks."""
10
-
11
- @abstractmethod
12
- async def on_progress(self, event: ProgressEvent) -> None:
13
- """On progress hook."""
14
-
15
- @abstractmethod
16
- async def on_complete(self, operation: str) -> None:
17
- """On complete hook."""
18
-
19
-
20
- class NullProgressCallback(ProgressCallback):
21
- """Null implementation of progress callback that does nothing."""
22
-
23
- async def on_progress(self, event: ProgressEvent) -> None:
24
- """Do nothing on progress."""
25
-
26
- async def on_complete(self, operation: str) -> None:
27
- """Do nothing on complete."""
@@ -1,183 +0,0 @@
1
- """OpenAI embedding provider implementation using httpx."""
2
-
3
- import asyncio
4
- from collections.abc import AsyncGenerator
5
- from typing import Any
6
-
7
- import httpx
8
- import structlog
9
- import tiktoken
10
- from tiktoken import Encoding
11
-
12
- from kodit.domain.services.embedding_service import EmbeddingProvider
13
- from kodit.domain.value_objects import EmbeddingRequest, EmbeddingResponse
14
-
15
- from .batching import split_sub_batches
16
-
17
- # Constants
18
- MAX_TOKENS = 8192 # Conservative token limit for the embedding model
19
- BATCH_SIZE = (
20
- 10 # Maximum number of items per API call (keeps existing test expectations)
21
- )
22
- OPENAI_NUM_PARALLEL_TASKS = 10 # Semaphore limit for concurrent OpenAI requests
23
-
24
-
25
- class OpenAIEmbeddingProvider(EmbeddingProvider):
26
- """OpenAI embedding provider that uses OpenAI's embedding API via httpx."""
27
-
28
- def __init__( # noqa: PLR0913
29
- self,
30
- api_key: str | None = None,
31
- base_url: str = "https://api.openai.com",
32
- model_name: str = "text-embedding-3-small",
33
- num_parallel_tasks: int = OPENAI_NUM_PARALLEL_TASKS,
34
- socket_path: str | None = None,
35
- timeout: float = 30.0,
36
- ) -> None:
37
- """Initialize the OpenAI embedding provider.
38
-
39
- Args:
40
- api_key: The OpenAI API key.
41
- base_url: The base URL for the OpenAI API.
42
- model_name: The model name to use for embeddings.
43
- num_parallel_tasks: Maximum number of concurrent requests.
44
- socket_path: Optional Unix socket path for local communication.
45
- timeout: Request timeout in seconds.
46
-
47
- """
48
- self.model_name = model_name
49
- self.num_parallel_tasks = num_parallel_tasks
50
- self.log = structlog.get_logger(__name__)
51
- self.api_key = api_key
52
- self.base_url = base_url
53
- self.socket_path = socket_path
54
- self.timeout = timeout
55
-
56
- # Lazily initialised token encoding
57
- self._encoding: Encoding | None = None
58
-
59
- # Create httpx client with optional Unix socket support
60
- if socket_path:
61
- transport = httpx.AsyncHTTPTransport(uds=socket_path)
62
- self.http_client = httpx.AsyncClient(
63
- transport=transport,
64
- base_url="http://localhost", # Base URL for Unix socket
65
- timeout=timeout,
66
- )
67
- else:
68
- self.http_client = httpx.AsyncClient(
69
- base_url=base_url,
70
- timeout=timeout,
71
- )
72
-
73
- # ---------------------------------------------------------------------
74
- # Helper utilities
75
- # ---------------------------------------------------------------------
76
-
77
- def _get_encoding(self) -> "Encoding":
78
- """Return (and cache) the tiktoken encoding for the chosen model."""
79
- if self._encoding is None:
80
- try:
81
- self._encoding = tiktoken.encoding_for_model(self.model_name)
82
- except KeyError:
83
- # If the model is not supported by tiktoken, use a default encoding
84
- self.log.info(
85
- "Model not supported by tiktoken, using default encoding",
86
- model_name=self.model_name,
87
- default_encoding="o200k_base",
88
- )
89
- self._encoding = tiktoken.get_encoding("o200k_base")
90
-
91
- return self._encoding
92
-
93
- def _split_sub_batches(
94
- self, encoding: "Encoding", data: list[EmbeddingRequest]
95
- ) -> list[list[EmbeddingRequest]]:
96
- """Proxy to the shared batching utility (kept for backward-compat)."""
97
- return split_sub_batches(
98
- encoding,
99
- data,
100
- max_tokens=MAX_TOKENS,
101
- batch_size=BATCH_SIZE,
102
- )
103
-
104
- async def _call_embeddings_api(
105
- self, texts: list[str]
106
- ) -> dict[str, Any]:
107
- """Call the embeddings API using httpx.
108
-
109
- Args:
110
- texts: The texts to embed.
111
-
112
- Returns:
113
- The API response as a dictionary.
114
-
115
- """
116
- headers = {
117
- "Content-Type": "application/json",
118
- }
119
- if self.api_key:
120
- headers["Authorization"] = f"Bearer {self.api_key}"
121
-
122
- data = {
123
- "model": self.model_name,
124
- "input": texts,
125
- }
126
-
127
- response = await self.http_client.post(
128
- "/v1/embeddings",
129
- json=data,
130
- headers=headers,
131
- )
132
- response.raise_for_status()
133
- return response.json()
134
-
135
- async def embed(
136
- self, data: list[EmbeddingRequest]
137
- ) -> AsyncGenerator[list[EmbeddingResponse], None]:
138
- """Embed a list of strings using OpenAI's API."""
139
- if not data:
140
- yield []
141
-
142
- encoding = self._get_encoding()
143
-
144
- # First, split by token limits (and max batch size)
145
- batched_data = self._split_sub_batches(encoding, data)
146
-
147
- # -----------------------------------------------------------------
148
- # Process batches concurrently (but bounded by a semaphore)
149
- # -----------------------------------------------------------------
150
-
151
- sem = asyncio.Semaphore(self.num_parallel_tasks)
152
-
153
- async def _process_batch(
154
- batch: list[EmbeddingRequest],
155
- ) -> list[EmbeddingResponse]:
156
- async with sem:
157
- try:
158
- response = await self._call_embeddings_api(
159
- [item.text for item in batch]
160
- )
161
- embeddings_data = response.get("data", [])
162
-
163
- return [
164
- EmbeddingResponse(
165
- snippet_id=item.snippet_id,
166
- embedding=emb_data.get("embedding", []),
167
- )
168
- for item, emb_data in zip(batch, embeddings_data, strict=True)
169
- ]
170
- except Exception as e:
171
- self.log.exception("Error embedding batch", error=str(e))
172
- # Return no embeddings for this batch if there was an error
173
- return []
174
-
175
- tasks = [_process_batch(batch) for batch in batched_data]
176
- for task in asyncio.as_completed(tasks):
177
- yield await task
178
-
179
- async def close(self) -> None:
180
- """Close the HTTP client."""
181
- if hasattr(self, "http_client"):
182
- await self.http_client.aclose()
183
-