kodit 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +39 -19
- kodit/{infrastructure/indexing → application/services}/auto_indexing_service.py +9 -1
- kodit/application/services/code_indexing_application_service.py +16 -0
- kodit/application/services/sync_scheduler.py +4 -1
- kodit/config.py +22 -1
- kodit/domain/entities.py +5 -0
- kodit/domain/protocols.py +4 -0
- kodit/domain/services/index_query_service.py +5 -1
- kodit/domain/services/index_service.py +11 -0
- kodit/infrastructure/api/__init__.py +1 -0
- kodit/infrastructure/api/middleware/__init__.py +1 -0
- kodit/infrastructure/api/middleware/auth.py +34 -0
- kodit/infrastructure/api/v1/__init__.py +5 -0
- kodit/infrastructure/api/v1/dependencies.py +70 -0
- kodit/infrastructure/api/v1/routers/__init__.py +6 -0
- kodit/infrastructure/api/v1/routers/indexes.py +114 -0
- kodit/infrastructure/api/v1/routers/search.py +74 -0
- kodit/infrastructure/api/v1/schemas/__init__.py +25 -0
- kodit/infrastructure/api/v1/schemas/context.py +11 -0
- kodit/infrastructure/api/v1/schemas/index.py +101 -0
- kodit/infrastructure/api/v1/schemas/search.py +219 -0
- kodit/infrastructure/bm25/local_bm25_repository.py +4 -4
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +4 -1
- kodit/infrastructure/embedding/embedding_providers/local_embedding_provider.py +2 -9
- kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +4 -10
- kodit/infrastructure/sqlalchemy/index_repository.py +29 -0
- kodit/infrastructure/ui/progress.py +43 -0
- kodit/utils/dump_openapi.py +37 -0
- {kodit-0.3.10.dist-info → kodit-0.3.12.dist-info}/METADATA +16 -1
- {kodit-0.3.10.dist-info → kodit-0.3.12.dist-info}/RECORD +34 -21
- {kodit-0.3.10.dist-info → kodit-0.3.12.dist-info}/WHEEL +0 -0
- {kodit-0.3.10.dist-info → kodit-0.3.12.dist-info}/entry_points.txt +0 -0
- {kodit-0.3.10.dist-info → kodit-0.3.12.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""JSON:API schemas for index operations."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class IndexAttributes(BaseModel):
|
|
9
|
+
"""Index attributes for JSON:API responses."""
|
|
10
|
+
|
|
11
|
+
created_at: datetime
|
|
12
|
+
updated_at: datetime
|
|
13
|
+
uri: str
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class SnippetData(BaseModel):
|
|
17
|
+
"""Snippet data for JSON:API relationships."""
|
|
18
|
+
|
|
19
|
+
type: str = "snippet"
|
|
20
|
+
id: str
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class IndexData(BaseModel):
|
|
24
|
+
"""Index data for JSON:API responses."""
|
|
25
|
+
|
|
26
|
+
type: str = "index"
|
|
27
|
+
id: str
|
|
28
|
+
attributes: IndexAttributes
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class IndexResponse(BaseModel):
|
|
32
|
+
"""JSON:API response for single index."""
|
|
33
|
+
|
|
34
|
+
data: IndexData
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class IndexListResponse(BaseModel):
|
|
38
|
+
"""JSON:API response for index list."""
|
|
39
|
+
|
|
40
|
+
data: list[IndexData]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class IndexCreateAttributes(BaseModel):
|
|
44
|
+
"""Attributes for creating an index."""
|
|
45
|
+
|
|
46
|
+
uri: str = Field(..., description="URI of the source to index")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class IndexCreateData(BaseModel):
|
|
50
|
+
"""Data for creating an index."""
|
|
51
|
+
|
|
52
|
+
type: str = "index"
|
|
53
|
+
attributes: IndexCreateAttributes
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class IndexCreateRequest(BaseModel):
|
|
57
|
+
"""JSON:API request for creating an index."""
|
|
58
|
+
|
|
59
|
+
data: IndexCreateData
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class AuthorData(BaseModel):
|
|
63
|
+
"""Author data for JSON:API relationships."""
|
|
64
|
+
|
|
65
|
+
type: str = "author"
|
|
66
|
+
id: str
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class AuthorsRelationship(BaseModel):
|
|
70
|
+
"""Authors relationship for JSON:API."""
|
|
71
|
+
|
|
72
|
+
data: list[AuthorData]
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class FileRelationships(BaseModel):
|
|
76
|
+
"""File relationships for JSON:API."""
|
|
77
|
+
|
|
78
|
+
authors: AuthorsRelationship
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class FileAttributes(BaseModel):
|
|
82
|
+
"""File attributes for JSON:API included resources."""
|
|
83
|
+
|
|
84
|
+
uri: str
|
|
85
|
+
sha256: str
|
|
86
|
+
mime_type: str
|
|
87
|
+
created_at: datetime
|
|
88
|
+
updated_at: datetime
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class AuthorAttributes(BaseModel):
|
|
92
|
+
"""Author attributes for JSON:API included resources."""
|
|
93
|
+
|
|
94
|
+
name: str
|
|
95
|
+
email: str
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class IndexDetailResponse(BaseModel):
|
|
99
|
+
"""JSON:API response for index details with included resources."""
|
|
100
|
+
|
|
101
|
+
data: IndexData
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
"""JSON:API schemas for search operations."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SearchFilters(BaseModel):
|
|
9
|
+
"""Search filters for JSON:API requests."""
|
|
10
|
+
|
|
11
|
+
languages: list[str] | None = Field(
|
|
12
|
+
None, description="Programming languages to filter by"
|
|
13
|
+
)
|
|
14
|
+
authors: list[str] | None = Field(None, description="Authors to filter by")
|
|
15
|
+
start_date: datetime | None = Field(
|
|
16
|
+
None, description="Filter snippets created after this date"
|
|
17
|
+
)
|
|
18
|
+
end_date: datetime | None = Field(
|
|
19
|
+
None, description="Filter snippets created before this date"
|
|
20
|
+
)
|
|
21
|
+
sources: list[str] | None = Field(
|
|
22
|
+
None, description="Source repositories to filter by"
|
|
23
|
+
)
|
|
24
|
+
file_patterns: list[str] | None = Field(
|
|
25
|
+
None, description="File path patterns to filter by"
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class SearchAttributes(BaseModel):
|
|
30
|
+
"""Search attributes for JSON:API requests."""
|
|
31
|
+
|
|
32
|
+
keywords: list[str] | None = Field(None, description="Search keywords")
|
|
33
|
+
code: str | None = Field(None, description="Code search query")
|
|
34
|
+
text: str | None = Field(None, description="Text search query")
|
|
35
|
+
limit: int | None = Field(10, description="Maximum number of results to return")
|
|
36
|
+
filters: SearchFilters | None = Field(None, description="Search filters")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class SearchData(BaseModel):
|
|
40
|
+
"""Search data for JSON:API requests."""
|
|
41
|
+
|
|
42
|
+
type: str = "search"
|
|
43
|
+
attributes: SearchAttributes
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class SearchRequest(BaseModel):
|
|
47
|
+
"""JSON:API request for searching snippets."""
|
|
48
|
+
|
|
49
|
+
data: SearchData
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def limit(self) -> int | None:
|
|
53
|
+
"""Get the limit from the search request."""
|
|
54
|
+
return self.data.attributes.limit
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def languages(self) -> list[str] | None:
|
|
58
|
+
"""Get the languages from the search request."""
|
|
59
|
+
return (
|
|
60
|
+
self.data.attributes.filters.languages
|
|
61
|
+
if self.data.attributes.filters
|
|
62
|
+
else None
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def authors(self) -> list[str] | None:
|
|
67
|
+
"""Get the authors from the search request."""
|
|
68
|
+
return (
|
|
69
|
+
self.data.attributes.filters.authors
|
|
70
|
+
if self.data.attributes.filters
|
|
71
|
+
else None
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def start_date(self) -> datetime | None:
|
|
76
|
+
"""Get the start date from the search request."""
|
|
77
|
+
return (
|
|
78
|
+
self.data.attributes.filters.start_date
|
|
79
|
+
if self.data.attributes.filters
|
|
80
|
+
else None
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def end_date(self) -> datetime | None:
|
|
85
|
+
"""Get the end date from the search request."""
|
|
86
|
+
return (
|
|
87
|
+
self.data.attributes.filters.end_date
|
|
88
|
+
if self.data.attributes.filters
|
|
89
|
+
else None
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def sources(self) -> list[str] | None:
|
|
94
|
+
"""Get the sources from the search request."""
|
|
95
|
+
return (
|
|
96
|
+
self.data.attributes.filters.sources
|
|
97
|
+
if self.data.attributes.filters
|
|
98
|
+
else None
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def file_patterns(self) -> list[str] | None:
|
|
103
|
+
"""Get the file patterns from the search request."""
|
|
104
|
+
return (
|
|
105
|
+
self.data.attributes.filters.file_patterns
|
|
106
|
+
if self.data.attributes.filters
|
|
107
|
+
else None
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class SnippetAttributes(BaseModel):
|
|
112
|
+
"""Snippet attributes for JSON:API responses."""
|
|
113
|
+
|
|
114
|
+
content: str
|
|
115
|
+
created_at: datetime
|
|
116
|
+
updated_at: datetime
|
|
117
|
+
original_scores: list[float]
|
|
118
|
+
source_uri: str
|
|
119
|
+
relative_path: str
|
|
120
|
+
language: str
|
|
121
|
+
authors: list[str]
|
|
122
|
+
summary: str
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class SnippetData(BaseModel):
|
|
126
|
+
"""Snippet data for JSON:API responses."""
|
|
127
|
+
|
|
128
|
+
type: str = "snippet"
|
|
129
|
+
id: int
|
|
130
|
+
attributes: SnippetAttributes
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class SearchResponse(BaseModel):
|
|
134
|
+
"""JSON:API response for search results."""
|
|
135
|
+
|
|
136
|
+
data: list[SnippetData]
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class FileAttributes(BaseModel):
|
|
140
|
+
"""File attributes for JSON:API included resources."""
|
|
141
|
+
|
|
142
|
+
uri: str
|
|
143
|
+
sha256: str
|
|
144
|
+
mime_type: str
|
|
145
|
+
created_at: datetime
|
|
146
|
+
updated_at: datetime
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class AuthorData(BaseModel):
|
|
150
|
+
"""Author data for JSON:API relationships."""
|
|
151
|
+
|
|
152
|
+
type: str = "author"
|
|
153
|
+
id: int
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class AuthorsRelationship(BaseModel):
|
|
157
|
+
"""Authors relationship for JSON:API."""
|
|
158
|
+
|
|
159
|
+
data: list[AuthorData]
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class FileRelationships(BaseModel):
|
|
163
|
+
"""File relationships for JSON:API."""
|
|
164
|
+
|
|
165
|
+
authors: AuthorsRelationship
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class FileDataWithRelationships(BaseModel):
|
|
169
|
+
"""File data with relationships for JSON:API included resources."""
|
|
170
|
+
|
|
171
|
+
type: str = "file"
|
|
172
|
+
id: int
|
|
173
|
+
attributes: FileAttributes
|
|
174
|
+
relationships: FileRelationships
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
class AuthorAttributes(BaseModel):
|
|
178
|
+
"""Author attributes for JSON:API included resources."""
|
|
179
|
+
|
|
180
|
+
name: str
|
|
181
|
+
email: str
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
class AuthorDataWithAttributes(BaseModel):
|
|
185
|
+
"""Author data with attributes for JSON:API included resources."""
|
|
186
|
+
|
|
187
|
+
type: str = "author"
|
|
188
|
+
id: int
|
|
189
|
+
attributes: AuthorAttributes
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class SearchResponseWithIncluded(BaseModel):
|
|
193
|
+
"""JSON:API response for search results with included resources."""
|
|
194
|
+
|
|
195
|
+
data: list[SnippetData]
|
|
196
|
+
included: list[FileDataWithRelationships | AuthorDataWithAttributes] | None = None
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class SnippetDetailAttributes(BaseModel):
|
|
200
|
+
"""Snippet detail attributes for JSON:API responses."""
|
|
201
|
+
|
|
202
|
+
created_at: datetime
|
|
203
|
+
updated_at: datetime
|
|
204
|
+
original_content: dict
|
|
205
|
+
summary_content: dict
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
class SnippetDetailData(BaseModel):
|
|
209
|
+
"""Snippet detail data for JSON:API responses."""
|
|
210
|
+
|
|
211
|
+
type: str = "snippet"
|
|
212
|
+
id: str
|
|
213
|
+
attributes: SnippetDetailAttributes
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
class SnippetDetailResponse(BaseModel):
|
|
217
|
+
"""JSON:API response for snippet details."""
|
|
218
|
+
|
|
219
|
+
data: SnippetDetailData
|
|
@@ -66,6 +66,7 @@ class LocalBM25Repository(BM25Repository):
|
|
|
66
66
|
stemmer=self.stemmer,
|
|
67
67
|
return_ids=False,
|
|
68
68
|
show_progress=True,
|
|
69
|
+
lower=True,
|
|
69
70
|
)
|
|
70
71
|
|
|
71
72
|
async def index_documents(self, request: IndexRequest) -> None:
|
|
@@ -78,9 +79,8 @@ class LocalBM25Repository(BM25Repository):
|
|
|
78
79
|
vocab = self._tokenize([doc.text for doc in request.documents])
|
|
79
80
|
self._retriever().index(vocab, show_progress=False)
|
|
80
81
|
self._retriever().save(self.index_path)
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
]
|
|
82
|
+
# Replace snippet_ids instead of appending, since the BM25 index is rebuilt
|
|
83
|
+
self.snippet_ids = [doc.snippet_id for doc in request.documents]
|
|
84
84
|
async with aiofiles.open(self.index_path / SNIPPET_IDS_FILE, "w") as f:
|
|
85
85
|
await f.write(json.dumps(self.snippet_ids))
|
|
86
86
|
|
|
@@ -120,7 +120,7 @@ class LocalBM25Repository(BM25Repository):
|
|
|
120
120
|
|
|
121
121
|
# Filter results by snippet_ids if provided
|
|
122
122
|
filtered_results = []
|
|
123
|
-
for result, score in zip(results[0], scores[0], strict=
|
|
123
|
+
for result, score in zip(results[0], scores[0], strict=True):
|
|
124
124
|
snippet_id = int(result)
|
|
125
125
|
if score > 0.0 and (
|
|
126
126
|
request.snippet_ids is None or snippet_id in request.snippet_ids
|
|
@@ -70,6 +70,9 @@ UPDATE_QUERY = f"""
|
|
|
70
70
|
UPDATE {TABLE_NAME}
|
|
71
71
|
SET embedding = tokenize(passage, '{TOKENIZER_NAME}')
|
|
72
72
|
""" # noqa: S608
|
|
73
|
+
# https://github.com/tensorchord/VectorChord-bm25:
|
|
74
|
+
# We intentionally make it negative so that you can use the
|
|
75
|
+
# default order by to get the most relevant documents first.
|
|
73
76
|
SEARCH_QUERY = f"""
|
|
74
77
|
SELECT
|
|
75
78
|
snippet_id,
|
|
@@ -185,7 +188,7 @@ class VectorChordBM25Repository(BM25Repository):
|
|
|
185
188
|
|
|
186
189
|
async def search(self, request: SearchRequest) -> list[SearchResult]:
|
|
187
190
|
"""Search documents using BM25."""
|
|
188
|
-
if not request.query or request.query == "":
|
|
191
|
+
if not request.query or request.query.strip() == "":
|
|
189
192
|
return []
|
|
190
193
|
|
|
191
194
|
if request.snippet_ids is not None:
|
|
@@ -112,15 +112,8 @@ class LocalEmbeddingProvider(EmbeddingProvider):
|
|
|
112
112
|
|
|
113
113
|
except Exception as e:
|
|
114
114
|
self.log.exception("Error generating embeddings", error=str(e))
|
|
115
|
-
# Return
|
|
116
|
-
|
|
117
|
-
EmbeddingResponse(
|
|
118
|
-
snippet_id=item.snippet_id,
|
|
119
|
-
embedding=[0.0] * 1536, # Default embedding size
|
|
120
|
-
)
|
|
121
|
-
for item in batch
|
|
122
|
-
]
|
|
123
|
-
yield responses
|
|
115
|
+
# Return no embeddings for this batch if there was an error
|
|
116
|
+
yield []
|
|
124
117
|
|
|
125
118
|
def _split_sub_batches(
|
|
126
119
|
self, encoding: "Encoding", data: list[EmbeddingRequest]
|
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
from collections.abc import AsyncGenerator
|
|
5
|
-
from typing import Any
|
|
6
5
|
|
|
7
6
|
import structlog
|
|
8
7
|
import tiktoken
|
|
8
|
+
from openai import AsyncOpenAI
|
|
9
9
|
from tiktoken import Encoding
|
|
10
10
|
|
|
11
11
|
from kodit.domain.services.embedding_service import EmbeddingProvider
|
|
@@ -25,7 +25,7 @@ class OpenAIEmbeddingProvider(EmbeddingProvider):
|
|
|
25
25
|
"""OpenAI embedding provider that uses OpenAI's embedding API."""
|
|
26
26
|
|
|
27
27
|
def __init__(
|
|
28
|
-
self, openai_client:
|
|
28
|
+
self, openai_client: AsyncOpenAI, model_name: str = "text-embedding-3-small"
|
|
29
29
|
) -> None:
|
|
30
30
|
"""Initialize the OpenAI embedding provider.
|
|
31
31
|
|
|
@@ -99,14 +99,8 @@ class OpenAIEmbeddingProvider(EmbeddingProvider):
|
|
|
99
99
|
]
|
|
100
100
|
except Exception as e:
|
|
101
101
|
self.log.exception("Error embedding batch", error=str(e))
|
|
102
|
-
#
|
|
103
|
-
return [
|
|
104
|
-
EmbeddingResponse(
|
|
105
|
-
snippet_id=item.snippet_id,
|
|
106
|
-
embedding=[0.0] * 1536, # Default OpenAI dim
|
|
107
|
-
)
|
|
108
|
-
for item in batch
|
|
109
|
-
]
|
|
102
|
+
# Return no embeddings for this batch if there was an error
|
|
103
|
+
return []
|
|
110
104
|
|
|
111
105
|
tasks = [_process_batch(batch) for batch in batched_data]
|
|
112
106
|
for task in asyncio.as_completed(tasks):
|
|
@@ -577,3 +577,32 @@ class SqlAlchemyIndexRepository(IndexRepository):
|
|
|
577
577
|
domain_snippet, index.id
|
|
578
578
|
)
|
|
579
579
|
self._session.add(db_snippet)
|
|
580
|
+
|
|
581
|
+
async def delete(self, index: domain_entities.Index) -> None:
|
|
582
|
+
"""Delete everything related to an index."""
|
|
583
|
+
# Delete all snippets and embeddings
|
|
584
|
+
await self.delete_snippets(index.id)
|
|
585
|
+
|
|
586
|
+
# Delete all author file mappings
|
|
587
|
+
stmt = delete(db_entities.AuthorFileMapping).where(
|
|
588
|
+
db_entities.AuthorFileMapping.file_id.in_(
|
|
589
|
+
[file.id for file in index.source.working_copy.files]
|
|
590
|
+
)
|
|
591
|
+
)
|
|
592
|
+
await self._session.execute(stmt)
|
|
593
|
+
|
|
594
|
+
# Delete all files
|
|
595
|
+
stmt = delete(db_entities.File).where(
|
|
596
|
+
db_entities.File.source_id == index.source.id
|
|
597
|
+
)
|
|
598
|
+
await self._session.execute(stmt)
|
|
599
|
+
|
|
600
|
+
# Delete the source
|
|
601
|
+
stmt = delete(db_entities.Source).where(
|
|
602
|
+
db_entities.Source.id == index.source.id
|
|
603
|
+
)
|
|
604
|
+
await self._session.execute(stmt)
|
|
605
|
+
|
|
606
|
+
# Delete the index
|
|
607
|
+
stmt = delete(db_entities.Index).where(db_entities.Index.id == index.id)
|
|
608
|
+
await self._session.execute(stmt)
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from collections.abc import Callable
|
|
4
4
|
|
|
5
|
+
import structlog
|
|
5
6
|
from tqdm import tqdm # type: ignore[import-untyped]
|
|
6
7
|
|
|
7
8
|
from kodit.domain.interfaces import ProgressCallback
|
|
@@ -42,6 +43,43 @@ class TQDMProgressCallback(ProgressCallback):
|
|
|
42
43
|
# TQDM will handle cleanup with leave=False
|
|
43
44
|
|
|
44
45
|
|
|
46
|
+
class LogProgressCallback(ProgressCallback):
|
|
47
|
+
"""Log-based progress callback for server environments."""
|
|
48
|
+
|
|
49
|
+
def __init__(self, milestone_interval: int = 10) -> None:
|
|
50
|
+
"""Initialize with milestone logging interval.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
milestone_interval: Percentage interval for logging (default: 10%)
|
|
54
|
+
|
|
55
|
+
"""
|
|
56
|
+
self.milestone_interval = milestone_interval
|
|
57
|
+
self._last_logged_percentage = -1
|
|
58
|
+
self.log = structlog.get_logger()
|
|
59
|
+
|
|
60
|
+
async def on_progress(self, event: ProgressEvent) -> None:
|
|
61
|
+
"""Log progress at milestone intervals."""
|
|
62
|
+
percentage = int(event.percentage)
|
|
63
|
+
|
|
64
|
+
# Log at milestone intervals (0%, 10%, 20%, etc.)
|
|
65
|
+
milestone = (percentage // self.milestone_interval) * self.milestone_interval
|
|
66
|
+
|
|
67
|
+
if milestone > self._last_logged_percentage and milestone <= percentage:
|
|
68
|
+
self.log.info(
|
|
69
|
+
"Progress milestone reached",
|
|
70
|
+
operation=event.operation,
|
|
71
|
+
percentage=milestone,
|
|
72
|
+
current=event.current,
|
|
73
|
+
total=event.total,
|
|
74
|
+
message=event.message,
|
|
75
|
+
)
|
|
76
|
+
self._last_logged_percentage = milestone
|
|
77
|
+
|
|
78
|
+
async def on_complete(self, operation: str) -> None:
|
|
79
|
+
"""Log completion of the operation."""
|
|
80
|
+
self.log.info("Operation completed", operation=operation)
|
|
81
|
+
|
|
82
|
+
|
|
45
83
|
class LazyProgressCallback(ProgressCallback):
|
|
46
84
|
"""Progress callback that only shows progress when there's actual work to do."""
|
|
47
85
|
|
|
@@ -125,3 +163,8 @@ def create_multi_stage_progress_callback() -> MultiStageProgressCallback:
|
|
|
125
163
|
return MultiStageProgressCallback(
|
|
126
164
|
lambda operation: create_progress_bar(operation, "items")
|
|
127
165
|
)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def create_log_progress_callback(milestone_interval: int = 10) -> LogProgressCallback:
|
|
169
|
+
"""Create a log-based progress callback for server environments."""
|
|
170
|
+
return LogProgressCallback(milestone_interval=milestone_interval)
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Dump the OpenAPI json schema to a file."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from openapi_markdown.generator import to_markdown # type: ignore[import-untyped]
|
|
9
|
+
from uvicorn.importer import import_from_string
|
|
10
|
+
|
|
11
|
+
parser = argparse.ArgumentParser(prog="dump-openapi.py")
|
|
12
|
+
parser.add_argument(
|
|
13
|
+
"app", help='App import string. Eg. "kodit.app:app"', default="kodit.app:app"
|
|
14
|
+
)
|
|
15
|
+
parser.add_argument("--out-dir", help="Output directory", default="docs/reference/api")
|
|
16
|
+
|
|
17
|
+
if __name__ == "__main__":
|
|
18
|
+
args = parser.parse_args()
|
|
19
|
+
|
|
20
|
+
app = import_from_string(args.app)
|
|
21
|
+
openapi = app.openapi()
|
|
22
|
+
version = openapi.get("openapi", "unknown version")
|
|
23
|
+
|
|
24
|
+
# Remove any dev tags from the version by retaining only the semver part
|
|
25
|
+
git_tag = openapi["info"]["version"].split(".")[:3]
|
|
26
|
+
openapi["info"]["version"] = ".".join(git_tag)
|
|
27
|
+
|
|
28
|
+
output_json_file = Path(args.out_dir) / "openapi.json"
|
|
29
|
+
|
|
30
|
+
with output_json_file.open("w") as f:
|
|
31
|
+
json.dump(openapi, f, indent=2)
|
|
32
|
+
|
|
33
|
+
output_md_file = Path(args.out_dir) / "index.md"
|
|
34
|
+
templates_dir = Path(args.out_dir) / "templates"
|
|
35
|
+
options: dict[str, Any] = {}
|
|
36
|
+
|
|
37
|
+
to_markdown(str(output_json_file), str(output_md_file), str(templates_dir), options)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kodit
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.12
|
|
4
4
|
Summary: Code indexing for better AI code generation
|
|
5
5
|
Project-URL: Homepage, https://docs.helixml.tech/kodit/
|
|
6
6
|
Project-URL: Documentation, https://docs.helixml.tech/kodit/
|
|
@@ -72,6 +72,8 @@ Kodit connects your AI coding assistant to external codebases to provide accurat
|
|
|
72
72
|
|
|
73
73
|
</div>
|
|
74
74
|
|
|
75
|
+
:star: _Help us reach more developers and grow the Helix community. Star this repo!_
|
|
76
|
+
|
|
75
77
|
**Helix Kodit** is an **MCP server** that connects your AI coding assistant to external codebases. It can:
|
|
76
78
|
|
|
77
79
|
- Improve your AI-assisted code by providing canonical examples direct from the source
|
|
@@ -120,6 +122,19 @@ intent. Kodit has been tested to work well with:
|
|
|
120
122
|
- **New in 0.3**: Hybrid search combining BM25 keyword search with semantic search
|
|
121
123
|
- **New in 0.4**: Enhanced MCP tools with rich context parameters and metadata
|
|
122
124
|
|
|
125
|
+
### Hosted MCP Server
|
|
126
|
+
|
|
127
|
+
**New in 0.4**: Try Kodit instantly with our hosted MCP server at [https://kodit.helix.ml/mcp](https://kodit.helix.ml/mcp)! No installation required - just add it to your AI coding assistant and start searching popular codebases immediately.
|
|
128
|
+
|
|
129
|
+
The hosted server provides:
|
|
130
|
+
|
|
131
|
+
- Pre-indexed popular open source repositories
|
|
132
|
+
- Zero configuration - works out of the box
|
|
133
|
+
- Same powerful search capabilities as self-hosted Kodit
|
|
134
|
+
- Perfect for trying Kodit before setting up your own instance
|
|
135
|
+
|
|
136
|
+
Find out more in the [hosted Kodit documentation](https://docs.helix.ml/kodit/reference/hosted-kodit/).
|
|
137
|
+
|
|
123
138
|
### Enterprise Ready
|
|
124
139
|
|
|
125
140
|
Out of the box, Kodit works with a local SQLite database and very small, local models.
|