kodit 0.1.15__py3-none-any.whl → 0.1.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

@@ -0,0 +1,67 @@
1
+ """Fusion functions for combining search results."""
2
+
3
+ from collections import defaultdict
4
+ from dataclasses import dataclass
5
+
6
+
7
+ @dataclass
8
+ class FusionResult:
9
+ """Result of a fusion operation."""
10
+
11
+ id: int
12
+ score: float
13
+ original_scores: list[float]
14
+
15
+
16
+ @dataclass
17
+ class FusionRequest:
18
+ """Result of a RRF operation."""
19
+
20
+ id: int
21
+ score: float
22
+
23
+
24
+ def reciprocal_rank_fusion(
25
+ rankings: list[list[FusionRequest]], k: float = 60
26
+ ) -> list[FusionResult]:
27
+ """RRF prioritises results that are present in all results.
28
+
29
+ Args:
30
+ rankings: List of rankers, each containing a list of document ids. Top of the
31
+ list is considered to be the best result.
32
+ k: Parameter for RRF.
33
+
34
+ Returns:
35
+ Dictionary of ids and their scores.
36
+
37
+ """
38
+ scores = {}
39
+ for ranker in rankings:
40
+ for rank in ranker:
41
+ scores[rank.id] = float(0)
42
+
43
+ for ranker in rankings:
44
+ for i, rank in enumerate(ranker):
45
+ scores[rank.id] += 1.0 / (k + i)
46
+
47
+ # Create a list of tuples of ids and their scores
48
+ results = [(rank, scores[rank]) for rank in scores]
49
+
50
+ # Sort results by score
51
+ results.sort(key=lambda x: x[1], reverse=True)
52
+
53
+ # Create a map of original scores to ids
54
+ original_scores_to_ids = defaultdict(list)
55
+ for ranker in rankings:
56
+ for rank in ranker:
57
+ original_scores_to_ids[rank.id].append(rank.score)
58
+
59
+ # Rebuild a list of final results with their original scores
60
+ return [
61
+ FusionResult(
62
+ id=result[0],
63
+ score=result[1],
64
+ original_scores=original_scores_to_ids[result[0]],
65
+ )
66
+ for result in results
67
+ ]
@@ -10,7 +10,6 @@ from typing import TypeVar
10
10
 
11
11
  from sqlalchemy import delete, func, select
12
12
  from sqlalchemy.ext.asyncio import AsyncSession
13
- from sqlalchemy.orm.exc import MultipleResultsFound
14
13
 
15
14
  from kodit.embedding.embedding_models import Embedding
16
15
  from kodit.indexing.indexing_models import Index, Snippet
@@ -125,34 +124,15 @@ class IndexRepository:
125
124
  index.updated_at = datetime.now(UTC)
126
125
  await self.session.commit()
127
126
 
128
- async def add_snippet_or_update_content(self, snippet: Snippet) -> None:
127
+ async def add_snippet(self, snippet: Snippet) -> None:
129
128
  """Add a new snippet to the database if it doesn't exist, otherwise update it.
130
129
 
131
130
  Args:
132
131
  snippet: The Snippet instance to add.
133
132
 
134
133
  """
135
- query = select(Snippet).where(
136
- Snippet.file_id == snippet.file_id,
137
- Snippet.index_id == snippet.index_id,
138
- )
139
- result = await self.session.execute(query)
140
- try:
141
- existing_snippet = result.scalar_one_or_none()
142
-
143
- if existing_snippet:
144
- existing_snippet.content = snippet.content
145
- else:
146
- self.session.add(snippet)
147
-
148
- await self.session.commit()
149
- except MultipleResultsFound as e:
150
- msg = (
151
- f"Multiple snippets found for file_id {snippet.file_id}, this "
152
- "shouldn't happen. "
153
- "Please report this as a bug then delete your index and start again."
154
- )
155
- raise ValueError(msg) from e
134
+ self.session.add(snippet)
135
+ await self.session.commit()
156
136
 
157
137
  async def delete_all_snippets(self, index_id: int) -> None:
158
138
  """Delete all snippets for an index.
@@ -161,6 +141,15 @@ class IndexRepository:
161
141
  index_id: The ID of the index to delete snippets for.
162
142
 
163
143
  """
144
+ # First get all snippets for this index
145
+ snippets = await self.get_snippets_for_index(index_id)
146
+
147
+ # Delete all embeddings for these snippets, if there are any
148
+ for snippet in snippets:
149
+ query = delete(Embedding).where(Embedding.snippet_id == snippet.id)
150
+ await self.session.execute(query)
151
+
152
+ # Now delete the snippets
164
153
  query = delete(Snippet).where(Snippet.index_id == index_id)
165
154
  await self.session.execute(query)
166
155
  await self.session.commit()
@@ -196,3 +185,32 @@ class IndexRepository:
196
185
  """
197
186
  self.session.add(embedding)
198
187
  await self.session.commit()
188
+
189
+ async def list_snippets_by_ids(self, ids: list[int]) -> list[tuple[File, Snippet]]:
190
+ """List snippets by IDs.
191
+
192
+ Returns:
193
+ A list of snippets in the same order as the input IDs.
194
+
195
+ """
196
+ query = (
197
+ select(Snippet, File)
198
+ .where(Snippet.id.in_(ids))
199
+ .join(File, Snippet.file_id == File.id)
200
+ )
201
+ rows = await self.session.execute(query)
202
+
203
+ # Create a dictionary for O(1) lookup of results by ID
204
+ id_to_result = {snippet.id: (file, snippet) for snippet, file in rows.all()}
205
+
206
+ # Check that all IDs are present
207
+ if len(id_to_result) != len(ids):
208
+ # Create a list of missing IDs
209
+ missing_ids = [
210
+ snippet_id for snippet_id in ids if snippet_id not in id_to_result
211
+ ]
212
+ msg = f"Some IDs are not present: {missing_ids}"
213
+ raise ValueError(msg)
214
+
215
+ # Rebuild the list in the same order that it was passed in
216
+ return [id_to_result[i] for i in ids]
@@ -13,11 +13,17 @@ import pydantic
13
13
  import structlog
14
14
  from tqdm.asyncio import tqdm
15
15
 
16
- from kodit.bm25.keyword_search_service import BM25Document, KeywordSearchProvider
16
+ from kodit.bm25.keyword_search_service import (
17
+ BM25Document,
18
+ BM25Result,
19
+ KeywordSearchProvider,
20
+ )
17
21
  from kodit.embedding.vector_search_service import (
18
22
  VectorSearchRequest,
19
23
  VectorSearchService,
20
24
  )
25
+ from kodit.enrichment.enrichment_service import EnrichmentService
26
+ from kodit.indexing.fusion import FusionRequest, reciprocal_rank_fusion
21
27
  from kodit.indexing.indexing_models import Snippet
22
28
  from kodit.indexing.indexing_repository import IndexRepository
23
29
  from kodit.snippets.snippets import SnippetService
@@ -42,6 +48,28 @@ class IndexView(pydantic.BaseModel):
42
48
  num_snippets: int | None = None
43
49
 
44
50
 
51
+ class SearchRequest(pydantic.BaseModel):
52
+ """Request for a search."""
53
+
54
+ text_query: str | None = None
55
+ code_query: str | None = None
56
+ keywords: list[str] | None = None
57
+ top_k: int = 10
58
+
59
+
60
+ class SearchResult(pydantic.BaseModel):
61
+ """Data transfer object for search results.
62
+
63
+ This model represents a single search result, containing both the file path
64
+ and the matching snippet content.
65
+ """
66
+
67
+ id: int
68
+ uri: str
69
+ content: str
70
+ original_scores: list[float]
71
+
72
+
45
73
  class IndexService:
46
74
  """Service for managing code indexes.
47
75
 
@@ -50,12 +78,14 @@ class IndexService:
50
78
  IndexRepository), and provides a clean API for index management.
51
79
  """
52
80
 
53
- def __init__(
81
+ def __init__( # noqa: PLR0913
54
82
  self,
55
83
  repository: IndexRepository,
56
84
  source_service: SourceService,
57
85
  keyword_search_provider: KeywordSearchProvider,
58
- vector_search_service: VectorSearchService,
86
+ code_search_service: VectorSearchService,
87
+ text_search_service: VectorSearchService,
88
+ enrichment_service: EnrichmentService,
59
89
  ) -> None:
60
90
  """Initialize the index service.
61
91
 
@@ -69,7 +99,9 @@ class IndexService:
69
99
  self.snippet_service = SnippetService()
70
100
  self.log = structlog.get_logger(__name__)
71
101
  self.keyword_search_provider = keyword_search_provider
72
- self.code_search_service = vector_search_service
102
+ self.code_search_service = code_search_service
103
+ self.text_search_service = text_search_service
104
+ self.enrichment_service = enrichment_service
73
105
 
74
106
  async def create(self, source_id: int) -> IndexView:
75
107
  """Create a new index for a source.
@@ -129,10 +161,15 @@ class IndexService:
129
161
  msg = f"Index not found: {index_id}"
130
162
  raise ValueError(msg)
131
163
 
164
+ # Delete old snippets so we don't duplicate. In the future should probably check
165
+ # which files have changed and only change those.
166
+ await self.repository.delete_all_snippets(index.id)
167
+
132
168
  # Create snippets for supported file types
133
- await self._create_snippets(index_id)
169
+ self.log.info("Creating snippets for files", index_id=index.id)
170
+ await self._create_snippets(index.id)
134
171
 
135
- snippets = await self.repository.get_all_snippets(index_id)
172
+ snippets = await self.repository.get_all_snippets(index.id)
136
173
 
137
174
  self.log.info("Creating keyword index")
138
175
  with Spinner():
@@ -152,9 +189,93 @@ class IndexService:
152
189
  ]
153
190
  )
154
191
 
192
+ self.log.info("Enriching snippets", num_snippets=len(snippets))
193
+ enriched_contents = await self.enrichment_service.enrich(
194
+ [snippet.content for snippet in snippets]
195
+ )
196
+
197
+ self.log.info("Creating semantic text index")
198
+ with Spinner():
199
+ await self.text_search_service.index(
200
+ [
201
+ VectorSearchRequest(snippet.id, enriched_content)
202
+ for snippet, enriched_content in zip(
203
+ snippets, enriched_contents, strict=True
204
+ )
205
+ ]
206
+ )
207
+ # Add the enriched text back to the snippets and write to the database
208
+ for snippet, enriched_content in zip(
209
+ snippets, enriched_contents, strict=True
210
+ ):
211
+ snippet.content = (
212
+ enriched_content + "\n\n```\n" + snippet.content + "\n```"
213
+ )
214
+ await self.repository.add_snippet(snippet)
215
+
155
216
  # Update index timestamp
156
217
  await self.repository.update_index_timestamp(index)
157
218
 
219
+ async def search(self, request: SearchRequest) -> list[SearchResult]:
220
+ """Search for relevant data."""
221
+ fusion_list: list[list[FusionRequest]] = []
222
+ if request.keywords:
223
+ # Gather results for each keyword
224
+ result_ids: list[BM25Result] = []
225
+ for keyword in request.keywords:
226
+ results = await self.keyword_search_provider.retrieve(
227
+ keyword, request.top_k
228
+ )
229
+ result_ids.extend(results)
230
+
231
+ fusion_list.append(
232
+ [FusionRequest(id=x.snippet_id, score=x.score) for x in result_ids]
233
+ )
234
+
235
+ # Compute embedding for semantic query
236
+ if request.code_query:
237
+ query_embedding = await self.code_search_service.retrieve(
238
+ request.code_query, top_k=request.top_k
239
+ )
240
+ fusion_list.append(
241
+ [FusionRequest(id=x.snippet_id, score=x.score) for x in query_embedding]
242
+ )
243
+
244
+ if request.text_query:
245
+ query_embedding = await self.text_search_service.retrieve(
246
+ request.text_query, top_k=request.top_k
247
+ )
248
+ fusion_list.append(
249
+ [FusionRequest(id=x.snippet_id, score=x.score) for x in query_embedding]
250
+ )
251
+
252
+ if len(fusion_list) == 0:
253
+ return []
254
+
255
+ # Combine all results together with RFF if required
256
+ final_results = reciprocal_rank_fusion(
257
+ rankings=fusion_list,
258
+ k=60,
259
+ )
260
+
261
+ # Only keep top_k results
262
+ final_results = final_results[: request.top_k]
263
+
264
+ # Get snippets from database (up to top_k)
265
+ search_results = await self.repository.list_snippets_by_ids(
266
+ [x.id for x in final_results]
267
+ )
268
+
269
+ return [
270
+ SearchResult(
271
+ id=snippet.id,
272
+ uri=file.uri,
273
+ content=snippet.content,
274
+ original_scores=fr.original_scores,
275
+ )
276
+ for (file, snippet), fr in zip(search_results, final_results, strict=True)
277
+ ]
278
+
158
279
  async def _create_snippets(
159
280
  self,
160
281
  index_id: int,
@@ -168,7 +289,6 @@ class IndexService:
168
289
 
169
290
  """
170
291
  files = await self.repository.files_for_index(index_id)
171
- self.log.info("Creating snippets for files", index_id=index_id)
172
292
  for file in tqdm(files, total=len(files), leave=False):
173
293
  # Skip unsupported file types
174
294
  if file.mime_type in MIME_BLACKLIST:
@@ -190,4 +310,4 @@ class IndexService:
190
310
  file_id=file.id,
191
311
  content=snippet.text,
192
312
  )
193
- await self.repository.add_snippet_or_update_content(s)
313
+ await self.repository.add_snippet(s)
kodit/mcp.py CHANGED
@@ -16,8 +16,11 @@ from kodit.bm25.keyword_search_factory import keyword_search_factory
16
16
  from kodit.config import AppContext
17
17
  from kodit.database import Database
18
18
  from kodit.embedding.embedding_factory import embedding_factory
19
- from kodit.search.search_repository import SearchRepository
20
- from kodit.search.search_service import SearchRequest, SearchResult, SearchService
19
+ from kodit.enrichment.enrichment_factory import enrichment_factory
20
+ from kodit.indexing.indexing_repository import IndexRepository
21
+ from kodit.indexing.indexing_service import IndexService, SearchRequest, SearchResult
22
+ from kodit.source.source_repository import SourceRepository
23
+ from kodit.source.source_service import SourceService
21
24
 
22
25
 
23
26
  @dataclass
@@ -123,32 +126,38 @@ async def search(
123
126
 
124
127
  mcp_context: MCPContext = ctx.request_context.lifespan_context
125
128
 
126
- log.debug("Creating search repository")
127
- search_repository = SearchRepository(
128
- session=mcp_context.session,
129
+ source_repository = SourceRepository(mcp_context.session)
130
+ source_service = SourceService(
131
+ mcp_context.app_context.get_clone_dir(), source_repository
129
132
  )
130
-
131
- log.debug("Creating embedding service")
132
- embedding_service = embedding_factory(
133
- app_context=mcp_context.app_context, session=mcp_context.session
134
- )
135
-
136
- log.debug("Creating search service")
137
- search_service = SearchService(
138
- repository=search_repository,
133
+ repository = IndexRepository(mcp_context.session)
134
+ service = IndexService(
135
+ repository=repository,
136
+ source_service=source_service,
139
137
  keyword_search_provider=keyword_search_factory(
138
+ mcp_context.app_context, mcp_context.session
139
+ ),
140
+ code_search_service=embedding_factory(
141
+ task_name="code",
140
142
  app_context=mcp_context.app_context,
141
143
  session=mcp_context.session,
142
144
  ),
143
- embedding_service=embedding_service,
145
+ text_search_service=embedding_factory(
146
+ task_name="text",
147
+ app_context=mcp_context.app_context,
148
+ session=mcp_context.session,
149
+ ),
150
+ enrichment_service=enrichment_factory(mcp_context.app_context),
144
151
  )
145
152
 
146
153
  search_request = SearchRequest(
147
154
  keywords=keywords,
148
155
  code_query="\n".join(related_file_contents),
156
+ text_query=user_intent,
149
157
  )
158
+
150
159
  log.debug("Searching for snippets")
151
- snippets = await search_service.search(request=search_request)
160
+ snippets = await service.search(request=search_request)
152
161
 
153
162
  log.debug("Fusing output")
154
163
  output = output_fusion(snippets=snippets)
@@ -0,0 +1,44 @@
1
+ # ruff: noqa
2
+ """index all the things
3
+
4
+ Revision ID: c3f5137d30f5
5
+ Revises: 7c3bbc2ab32b
6
+ Create Date: 2025-06-05 17:17:32.440740
7
+
8
+ """
9
+
10
+ from typing import Sequence, Union
11
+
12
+ from alembic import op
13
+ import sqlalchemy as sa
14
+
15
+
16
+ # revision identifiers, used by Alembic.
17
+ revision: str = 'c3f5137d30f5'
18
+ down_revision: Union[str, None] = '7c3bbc2ab32b'
19
+ branch_labels: Union[str, Sequence[str], None] = None
20
+ depends_on: Union[str, Sequence[str], None] = None
21
+
22
+
23
+ def upgrade() -> None:
24
+ """Upgrade schema."""
25
+ # ### commands auto generated by Alembic - please adjust! ###
26
+ op.create_index(op.f('ix_files_cloned_path'), 'files', ['cloned_path'], unique=False)
27
+ op.create_index(op.f('ix_files_mime_type'), 'files', ['mime_type'], unique=False)
28
+ op.create_index(op.f('ix_files_uri'), 'files', ['uri'], unique=False)
29
+ op.create_index(op.f('ix_snippets_file_id'), 'snippets', ['file_id'], unique=False)
30
+ op.create_index(op.f('ix_snippets_index_id'), 'snippets', ['index_id'], unique=False)
31
+ op.create_index(op.f('ix_sources_cloned_path'), 'sources', ['cloned_path'], unique=False)
32
+ # ### end Alembic commands ###
33
+
34
+
35
+ def downgrade() -> None:
36
+ """Downgrade schema."""
37
+ # ### commands auto generated by Alembic - please adjust! ###
38
+ op.drop_index(op.f('ix_sources_cloned_path'), table_name='sources')
39
+ op.drop_index(op.f('ix_snippets_index_id'), table_name='snippets')
40
+ op.drop_index(op.f('ix_snippets_file_id'), table_name='snippets')
41
+ op.drop_index(op.f('ix_files_uri'), table_name='files')
42
+ op.drop_index(op.f('ix_files_mime_type'), table_name='files')
43
+ op.drop_index(op.f('ix_files_cloned_path'), table_name='files')
44
+ # ### end Alembic commands ###
@@ -0,0 +1,26 @@
1
+ (function_declaration
2
+ name: (identifier) @function.name
3
+ body: (block) @function.body
4
+ ) @function.def
5
+
6
+ (method_declaration
7
+ name: (field_identifier) @method.name
8
+ body: (block) @method.body
9
+ ) @method.def
10
+
11
+ (import_declaration
12
+ (import_spec
13
+ path: (interpreted_string_literal) @import.name
14
+ )
15
+ ) @import.statement
16
+
17
+ (identifier) @ident
18
+
19
+ (parameter_declaration
20
+ name: (identifier) @param.name
21
+ )
22
+
23
+ (package_clause "package" (package_identifier) @name.definition.module)
24
+
25
+ ;; Exclude comments from being captured
26
+ (comment) @comment
@@ -31,7 +31,7 @@ class Source(Base, CommonMixin):
31
31
 
32
32
  __tablename__ = "sources"
33
33
  uri: Mapped[str] = mapped_column(String(1024), index=True, unique=True)
34
- cloned_path: Mapped[str] = mapped_column(String(1024))
34
+ cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
35
35
 
36
36
  def __init__(self, uri: str, cloned_path: str) -> None:
37
37
  """Initialize a new Source instance for typing purposes."""
@@ -46,9 +46,9 @@ class File(Base, CommonMixin):
46
46
  __tablename__ = "files"
47
47
 
48
48
  source_id: Mapped[int] = mapped_column(ForeignKey("sources.id"))
49
- mime_type: Mapped[str] = mapped_column(String(255), default="")
50
- uri: Mapped[str] = mapped_column(String(1024), default="")
51
- cloned_path: Mapped[str] = mapped_column(String(1024))
49
+ mime_type: Mapped[str] = mapped_column(String(255), default="", index=True)
50
+ uri: Mapped[str] = mapped_column(String(1024), default="", index=True)
51
+ cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
52
52
  sha256: Mapped[str] = mapped_column(String(64), default="", index=True)
53
53
  size_bytes: Mapped[int] = mapped_column(Integer, default=0)
54
54
 
@@ -0,0 +1,152 @@
1
+ Metadata-Version: 2.4
2
+ Name: kodit
3
+ Version: 0.1.17
4
+ Summary: Code indexing for better AI code generation
5
+ Project-URL: Homepage, https://docs.helixml.tech/kodit/
6
+ Project-URL: Documentation, https://docs.helixml.tech/kodit/
7
+ Project-URL: Repository, https://github.com/helixml/kodit.git
8
+ Project-URL: Issues, https://github.com/helixml/kodit/issues
9
+ Project-URL: Changelog, https://github.com/helixml/kodit/releases
10
+ Author-email: "Helix.ML" <founders@helix.ml>
11
+ Maintainer-email: "Helix.ML" <founders@helix.ml>
12
+ License-Expression: Apache-2.0
13
+ License-File: LICENSE
14
+ Keywords: ai,indexing,mcp,rag
15
+ Classifier: Development Status :: 2 - Pre-Alpha
16
+ Classifier: Intended Audience :: Developers
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Software Development :: Code Generators
20
+ Requires-Python: >=3.12
21
+ Requires-Dist: aiofiles>=24.1.0
22
+ Requires-Dist: aiosqlite>=0.20.0
23
+ Requires-Dist: alembic>=1.15.2
24
+ Requires-Dist: asgi-correlation-id>=4.3.4
25
+ Requires-Dist: asyncpg>=0.30.0
26
+ Requires-Dist: better-exceptions>=0.3.3
27
+ Requires-Dist: bm25s[core]>=0.2.12
28
+ Requires-Dist: click>=8.1.8
29
+ Requires-Dist: colorama>=0.4.6
30
+ Requires-Dist: dotenv>=0.9.9
31
+ Requires-Dist: fastapi[standard]>=0.115.12
32
+ Requires-Dist: fastmcp>=2.3.3
33
+ Requires-Dist: gitpython>=3.1.44
34
+ Requires-Dist: hf-xet>=1.1.2
35
+ Requires-Dist: httpx-retries>=0.3.2
36
+ Requires-Dist: httpx>=0.28.1
37
+ Requires-Dist: openai>=1.82.0
38
+ Requires-Dist: posthog>=4.0.1
39
+ Requires-Dist: pydantic-settings>=2.9.1
40
+ Requires-Dist: pytable-formatter>=0.1.1
41
+ Requires-Dist: sentence-transformers>=4.1.0
42
+ Requires-Dist: sqlalchemy[asyncio]>=2.0.40
43
+ Requires-Dist: structlog>=25.3.0
44
+ Requires-Dist: tdqm>=0.0.1
45
+ Requires-Dist: tiktoken>=0.9.0
46
+ Requires-Dist: transformers>=4.51.3
47
+ Requires-Dist: tree-sitter-language-pack>=0.7.3
48
+ Requires-Dist: tree-sitter>=0.24.0
49
+ Requires-Dist: uritools>=5.0.0
50
+ Description-Content-Type: text/markdown
51
+
52
+ <p align="center">
53
+ <a href="https://docs.helix.ml/kodit/"><img src="https://docs.helix.ml/images/helix-kodit-logo.png" alt="Helix Kodit Logo" width="300"></a>
54
+ </p>
55
+
56
+ <h1 align="center">
57
+ Kodit: A Code Indexing MCP Server
58
+ </h1>
59
+
60
+ <p align="center">
61
+ Kodit connects your AI coding assistant to external codebases to provide accurate and up-to-date snippets of code.
62
+ </p>
63
+
64
+ <div align="center">
65
+
66
+ [![Documentation](https://img.shields.io/badge/Documentation-6B46C1?style=for-the-badge&logo=readthedocs&logoColor=white)](https://docs.helix.ml/kodit/)
67
+ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg?style=for-the-badge)](./LICENSE)
68
+ [![Discussions](https://img.shields.io/badge/Discussions-181717?style=for-the-badge&logo=github&logoColor=white)](https://github.com/helixml/kodit/discussions)
69
+
70
+ </div>
71
+
72
+ **Helix Kodit** is an **MCP server** that connects your AI coding assistant to external codebases. It can:
73
+
74
+ - Improve your AI-assisted code by providing canonical examples direct from the source
75
+ - Index local and public codebases
76
+ - Integrates with any AI coding assistant via MCP
77
+ - Search using keyword and semantic search
78
+ - Integrate with any OpenAI-compatible or custom API/model
79
+
80
+ If you're an engineer working with AI-powered coding assistants, Kodit helps by
81
+ providing relevant and up-to-date examples of your task so that LLMs make less mistakes
82
+ and produce fewer hallucinations.
83
+
84
+ ## ✨ Features
85
+
86
+ ### Codebase Indexing
87
+
88
+ Kodit connects to a variety of local and remote codebases to build an index of your
89
+ code. This index is used to build a snippet library, ready for ingestion into an LLM.
90
+
91
+ - Index local directories and public Git repositories
92
+ - Build comprehensive snippet libraries for LLM ingestion
93
+ - Support for multiple codebase types and languages
94
+ - Efficient indexing and search capabilities
95
+
96
+ ### MCP Server
97
+
98
+ Relevant snippets are exposed to an AI coding assistant via an MCP server. This allows
99
+ the assistant to request relevant snippets by providing keywords, code, and semantic
100
+ intent. Kodit has been tested to work well with:
101
+
102
+ - Seamless integration with popular AI coding assistants
103
+ - Tested and verified with:
104
+ - [Cursor](https://docs.helix.ml/kodit/#integration-with-cursor)
105
+ - [Cline](https://docs.helix.ml/kodit/#integration-with-cline)
106
+ - Please contribute more instructions! ... any other assistant is likely to work ...
107
+
108
+ ### Enterprise Ready
109
+
110
+ Out of the box, Kodit works with a local SQLite database and very small, local models.
111
+ But enterprises can scale out with performant databases and dedicated models. Everything
112
+ can even run securely, privately, with on-premise LLM platforms like
113
+ [Helix](https://helix.ml).
114
+
115
+ Supported databases:
116
+
117
+ - SQLite
118
+ - [Vectorchord](https://github.com/tensorchord/VectorChord)
119
+
120
+ Supported providers:
121
+
122
+ - Local (which uses tiny CPU-only open-source models)
123
+ - OpenAI
124
+ - Secure, private LLM enclave with [Helix](https://helix.ml).
125
+ - Any other OpenAI compatible API
126
+
127
+ ## 🚀 Quick Start
128
+
129
+ 1. [Install Kodit](https://docs.helix.ml/kodit/#installation)
130
+ 2. [Index codebases](https://docs.helix.ml/kodit/#quick-start)
131
+ 3. [Integrate with your coding assistant](https://docs.helix.ml/kodit/#integrating-kodit-with-coding-assistants)
132
+
133
+ ### Documentation
134
+
135
+ - [Installation Guide](https://docs.helix.ml/kodit/#installation)
136
+ - [Usage Guide](https://docs.helix.ml/kodit/#quick-start)
137
+ - [Connecting to Kodit](https://docs.helix.ml/kodit/#integrating-kodit-with-coding-assistants)
138
+ - [Configuration Options](https://docs.helix.ml/kodit/#configuring-kodit)
139
+ - [Contribution Guidelines](.github/CONTRIBUTING.md)
140
+
141
+ ## Roadmap
142
+
143
+ The roadmap is currently maintained as a [Github Project](https://github.com/orgs/helixml/projects/4).
144
+
145
+ ## 💬 Support
146
+
147
+ For commercial support, please contact [Helix.ML](founders@helix.ml). To ask a question,
148
+ please [open a discussion](https://github.com/helixml/kodit/discussions).
149
+
150
+ ## License
151
+
152
+ [Apache 2.0 © 2025 HelixML, Inc.](./LICENSE)