kodit 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/application/__init__.py +1 -0
- kodit/application/commands/__init__.py +1 -0
- kodit/application/commands/snippet_commands.py +22 -0
- kodit/application/services/__init__.py +1 -0
- kodit/application/services/indexing_application_service.py +363 -0
- kodit/application/services/snippet_application_service.py +143 -0
- kodit/cli.py +105 -82
- kodit/database.py +0 -22
- kodit/domain/__init__.py +1 -0
- kodit/{source/source_models.py → domain/entities.py} +88 -19
- kodit/domain/enums.py +9 -0
- kodit/domain/interfaces.py +27 -0
- kodit/domain/repositories.py +95 -0
- kodit/domain/services/__init__.py +1 -0
- kodit/domain/services/bm25_service.py +124 -0
- kodit/domain/services/embedding_service.py +155 -0
- kodit/domain/services/enrichment_service.py +48 -0
- kodit/domain/services/ignore_service.py +45 -0
- kodit/domain/services/indexing_service.py +203 -0
- kodit/domain/services/snippet_extraction_service.py +89 -0
- kodit/domain/services/source_service.py +83 -0
- kodit/domain/value_objects.py +215 -0
- kodit/infrastructure/__init__.py +1 -0
- kodit/infrastructure/bm25/__init__.py +1 -0
- kodit/infrastructure/bm25/bm25_factory.py +28 -0
- kodit/{bm25/local_bm25.py → infrastructure/bm25/local_bm25_repository.py} +33 -22
- kodit/{bm25/vectorchord_bm25.py → infrastructure/bm25/vectorchord_bm25_repository.py} +40 -35
- kodit/infrastructure/cloning/__init__.py +1 -0
- kodit/infrastructure/cloning/folder/__init__.py +1 -0
- kodit/infrastructure/cloning/folder/factory.py +119 -0
- kodit/infrastructure/cloning/folder/working_copy.py +38 -0
- kodit/infrastructure/cloning/git/__init__.py +1 -0
- kodit/infrastructure/cloning/git/factory.py +133 -0
- kodit/infrastructure/cloning/git/working_copy.py +32 -0
- kodit/infrastructure/cloning/metadata.py +127 -0
- kodit/infrastructure/embedding/__init__.py +1 -0
- kodit/infrastructure/embedding/embedding_factory.py +87 -0
- kodit/infrastructure/embedding/embedding_providers/__init__.py +1 -0
- kodit/infrastructure/embedding/embedding_providers/batching.py +93 -0
- kodit/infrastructure/embedding/embedding_providers/hash_embedding_provider.py +79 -0
- kodit/infrastructure/embedding/embedding_providers/local_embedding_provider.py +129 -0
- kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +113 -0
- kodit/infrastructure/embedding/local_vector_search_repository.py +114 -0
- kodit/{embedding/vectorchord_vector_search_service.py → infrastructure/embedding/vectorchord_vector_search_repository.py} +65 -46
- kodit/infrastructure/enrichment/__init__.py +1 -0
- kodit/{enrichment → infrastructure/enrichment}/enrichment_factory.py +28 -12
- kodit/infrastructure/enrichment/legacy_enrichment_models.py +42 -0
- kodit/{enrichment/enrichment_provider → infrastructure/enrichment}/local_enrichment_provider.py +38 -26
- kodit/infrastructure/enrichment/null_enrichment_provider.py +25 -0
- kodit/infrastructure/enrichment/openai_enrichment_provider.py +89 -0
- kodit/infrastructure/git/__init__.py +1 -0
- kodit/{source/git.py → infrastructure/git/git_utils.py} +10 -2
- kodit/infrastructure/ignore/__init__.py +1 -0
- kodit/{source/ignore.py → infrastructure/ignore/ignore_pattern_provider.py} +23 -6
- kodit/infrastructure/indexing/__init__.py +1 -0
- kodit/infrastructure/indexing/fusion_service.py +55 -0
- kodit/infrastructure/indexing/index_repository.py +296 -0
- kodit/infrastructure/indexing/indexing_factory.py +111 -0
- kodit/infrastructure/snippet_extraction/__init__.py +1 -0
- kodit/infrastructure/snippet_extraction/language_detection_service.py +39 -0
- kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +95 -0
- kodit/infrastructure/snippet_extraction/snippet_query_provider.py +45 -0
- kodit/{snippets/method_snippets.py → infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py} +123 -61
- kodit/infrastructure/sqlalchemy/__init__.py +1 -0
- kodit/{embedding → infrastructure/sqlalchemy}/embedding_repository.py +40 -24
- kodit/infrastructure/sqlalchemy/file_repository.py +73 -0
- kodit/infrastructure/sqlalchemy/repository.py +121 -0
- kodit/infrastructure/sqlalchemy/snippet_repository.py +75 -0
- kodit/infrastructure/ui/__init__.py +1 -0
- kodit/infrastructure/ui/progress.py +127 -0
- kodit/{util → infrastructure/ui}/spinner.py +19 -4
- kodit/mcp.py +50 -28
- kodit/migrations/env.py +1 -4
- kodit/reporting.py +78 -0
- {kodit-0.2.4.dist-info → kodit-0.2.5.dist-info}/METADATA +1 -1
- kodit-0.2.5.dist-info/RECORD +99 -0
- kodit/bm25/__init__.py +0 -1
- kodit/bm25/keyword_search_factory.py +0 -17
- kodit/bm25/keyword_search_service.py +0 -34
- kodit/embedding/__init__.py +0 -1
- kodit/embedding/embedding_factory.py +0 -69
- kodit/embedding/embedding_models.py +0 -28
- kodit/embedding/embedding_provider/__init__.py +0 -1
- kodit/embedding/embedding_provider/embedding_provider.py +0 -92
- kodit/embedding/embedding_provider/hash_embedding_provider.py +0 -86
- kodit/embedding/embedding_provider/local_embedding_provider.py +0 -96
- kodit/embedding/embedding_provider/openai_embedding_provider.py +0 -73
- kodit/embedding/local_vector_search_service.py +0 -87
- kodit/embedding/vector_search_service.py +0 -55
- kodit/enrichment/__init__.py +0 -1
- kodit/enrichment/enrichment_provider/__init__.py +0 -1
- kodit/enrichment/enrichment_provider/enrichment_provider.py +0 -36
- kodit/enrichment/enrichment_provider/openai_enrichment_provider.py +0 -79
- kodit/enrichment/enrichment_service.py +0 -45
- kodit/indexing/__init__.py +0 -1
- kodit/indexing/fusion.py +0 -67
- kodit/indexing/indexing_models.py +0 -43
- kodit/indexing/indexing_repository.py +0 -216
- kodit/indexing/indexing_service.py +0 -344
- kodit/snippets/__init__.py +0 -1
- kodit/snippets/languages/__init__.py +0 -53
- kodit/snippets/snippets.py +0 -50
- kodit/source/__init__.py +0 -1
- kodit/source/source_factories.py +0 -356
- kodit/source/source_repository.py +0 -169
- kodit/source/source_service.py +0 -150
- kodit/util/__init__.py +0 -1
- kodit-0.2.4.dist-info/RECORD +0 -71
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/csharp.scm +0 -0
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/go.scm +0 -0
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/javascript.scm +0 -0
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/python.scm +0 -0
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/typescript.scm +0 -0
- {kodit-0.2.4.dist-info → kodit-0.2.5.dist-info}/WHEEL +0 -0
- {kodit-0.2.4.dist-info → kodit-0.2.5.dist-info}/entry_points.txt +0 -0
- {kodit-0.2.4.dist-info → kodit-0.2.5.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Progress UI implementations."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
|
|
5
|
+
from tqdm import tqdm
|
|
6
|
+
|
|
7
|
+
from kodit.domain.interfaces import ProgressCallback
|
|
8
|
+
from kodit.domain.value_objects import ProgressEvent
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TQDMProgressCallback(ProgressCallback):
|
|
12
|
+
"""TQDM-based progress callback implementation."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, pbar: "tqdm") -> None:
|
|
15
|
+
"""Initialize with a TQDM progress bar."""
|
|
16
|
+
self.pbar = pbar
|
|
17
|
+
|
|
18
|
+
async def on_progress(self, event: ProgressEvent) -> None:
|
|
19
|
+
"""Update the TQDM progress bar."""
|
|
20
|
+
# Update total if it changes
|
|
21
|
+
if event.total != self.pbar.total:
|
|
22
|
+
self.pbar.total = event.total
|
|
23
|
+
|
|
24
|
+
# Update the progress bar
|
|
25
|
+
self.pbar.n = event.current
|
|
26
|
+
self.pbar.refresh()
|
|
27
|
+
|
|
28
|
+
# Update description if message is provided
|
|
29
|
+
if event.message:
|
|
30
|
+
# Fix the event message to a specific size so it's not jumping around
|
|
31
|
+
# If it's too small, add spaces
|
|
32
|
+
# If it's too large, truncate
|
|
33
|
+
if len(event.message) < 30:
|
|
34
|
+
self.pbar.set_description(
|
|
35
|
+
event.message + " " * (30 - len(event.message))
|
|
36
|
+
)
|
|
37
|
+
else:
|
|
38
|
+
self.pbar.set_description(event.message[-30:])
|
|
39
|
+
|
|
40
|
+
async def on_complete(self, operation: str) -> None:
|
|
41
|
+
"""Complete the progress bar."""
|
|
42
|
+
# TQDM will handle cleanup with leave=False
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class LazyProgressCallback(ProgressCallback):
|
|
46
|
+
"""Progress callback that only shows progress when there's actual work to do."""
|
|
47
|
+
|
|
48
|
+
def __init__(self, create_pbar_func: Callable[[], tqdm]) -> None:
|
|
49
|
+
"""Initialize with a function that creates a progress bar."""
|
|
50
|
+
self.create_pbar_func = create_pbar_func
|
|
51
|
+
self._callback: ProgressCallback | None = None
|
|
52
|
+
self._has_work = False
|
|
53
|
+
|
|
54
|
+
async def on_progress(self, event: ProgressEvent) -> None:
|
|
55
|
+
"""Update progress, creating the actual callback if needed."""
|
|
56
|
+
if not self._has_work:
|
|
57
|
+
self._has_work = True
|
|
58
|
+
# Only create the progress bar when we actually have work to do
|
|
59
|
+
pbar = self.create_pbar_func()
|
|
60
|
+
self._callback = TQDMProgressCallback(pbar)
|
|
61
|
+
|
|
62
|
+
if self._callback:
|
|
63
|
+
await self._callback.on_progress(event)
|
|
64
|
+
|
|
65
|
+
async def on_complete(self, operation: str) -> None:
|
|
66
|
+
"""Complete the progress operation."""
|
|
67
|
+
if self._callback:
|
|
68
|
+
await self._callback.on_complete(operation)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class MultiStageProgressCallback(ProgressCallback):
|
|
72
|
+
"""Progress callback that handles multiple stages with separate progress bars."""
|
|
73
|
+
|
|
74
|
+
def __init__(self, create_pbar_func: Callable[[str], tqdm]) -> None:
|
|
75
|
+
"""Initialize with a function that creates progress bars."""
|
|
76
|
+
self.create_pbar_func = create_pbar_func
|
|
77
|
+
self._current_callback: ProgressCallback | None = None
|
|
78
|
+
self._current_operation: str | None = None
|
|
79
|
+
|
|
80
|
+
async def on_progress(self, event: ProgressEvent) -> None:
|
|
81
|
+
"""Update progress for the current operation."""
|
|
82
|
+
# If this is a new operation, create a new progress bar
|
|
83
|
+
if self._current_operation != event.operation:
|
|
84
|
+
# Create a new progress bar for this operation
|
|
85
|
+
pbar = self.create_pbar_func(event.operation)
|
|
86
|
+
self._current_callback = TQDMProgressCallback(pbar)
|
|
87
|
+
self._current_operation = event.operation
|
|
88
|
+
|
|
89
|
+
# Update the current progress bar
|
|
90
|
+
if self._current_callback:
|
|
91
|
+
await self._current_callback.on_progress(event)
|
|
92
|
+
|
|
93
|
+
async def on_complete(self, operation: str) -> None:
|
|
94
|
+
"""Complete the current operation."""
|
|
95
|
+
if self._current_callback and self._current_operation == operation:
|
|
96
|
+
await self._current_callback.on_complete(operation)
|
|
97
|
+
self._current_callback = None
|
|
98
|
+
self._current_operation = None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def create_progress_bar(desc: str = "Processing", unit: str = "items") -> "tqdm":
|
|
102
|
+
"""Create a progress bar with the given description and unit."""
|
|
103
|
+
from tqdm import tqdm
|
|
104
|
+
|
|
105
|
+
return tqdm(
|
|
106
|
+
desc=desc,
|
|
107
|
+
unit=unit,
|
|
108
|
+
leave=False,
|
|
109
|
+
dynamic_ncols=True,
|
|
110
|
+
total=None, # Will be set dynamically
|
|
111
|
+
position=0, # Position at top
|
|
112
|
+
mininterval=0.1, # Update at most every 0.1 seconds
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def create_lazy_progress_callback() -> LazyProgressCallback:
|
|
117
|
+
"""Create a lazy progress callback that only shows progress when needed."""
|
|
118
|
+
return LazyProgressCallback(
|
|
119
|
+
lambda: create_progress_bar("Processing files", "files")
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def create_multi_stage_progress_callback() -> MultiStageProgressCallback:
|
|
124
|
+
"""Create a multi-stage progress callback for indexing operations."""
|
|
125
|
+
return MultiStageProgressCallback(
|
|
126
|
+
lambda operation: create_progress_bar(operation, "items")
|
|
127
|
+
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Spinner for long-running tasks."""
|
|
1
|
+
"""Spinner for long-running tasks in the UI layer."""
|
|
2
2
|
|
|
3
3
|
import itertools
|
|
4
4
|
import sys
|
|
@@ -7,10 +7,20 @@ import time
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class Spinner:
|
|
10
|
-
"""Spinner for long-running tasks.
|
|
10
|
+
"""Spinner for long-running tasks.
|
|
11
|
+
|
|
12
|
+
This class provides visual feedback for long-running operations by displaying
|
|
13
|
+
a spinning animation in the terminal. It's designed to be used as a context
|
|
14
|
+
manager for operations that may take some time to complete.
|
|
15
|
+
"""
|
|
11
16
|
|
|
12
17
|
def __init__(self, delay: float = 0.1) -> None:
|
|
13
|
-
"""Initialize the spinner.
|
|
18
|
+
"""Initialize the spinner.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
delay: The delay between spinner updates in seconds.
|
|
22
|
+
|
|
23
|
+
"""
|
|
14
24
|
self.spinner = itertools.cycle(["-", "/", "|", "\\"])
|
|
15
25
|
self.delay = delay
|
|
16
26
|
self.busy = False
|
|
@@ -25,7 +35,12 @@ class Spinner:
|
|
|
25
35
|
sys.stdout.flush()
|
|
26
36
|
|
|
27
37
|
def remove_spinner(self, cleanup: bool = False) -> None: # noqa: FBT001, FBT002
|
|
28
|
-
"""Remove the spinner.
|
|
38
|
+
"""Remove the spinner.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
cleanup: Whether to clean up the spinner display.
|
|
42
|
+
|
|
43
|
+
"""
|
|
29
44
|
with self._screen_lock:
|
|
30
45
|
if self.spinner_visible:
|
|
31
46
|
sys.stdout.write("\b")
|
kodit/mcp.py
CHANGED
|
@@ -12,15 +12,20 @@ from pydantic import Field
|
|
|
12
12
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
13
13
|
|
|
14
14
|
from kodit._version import version
|
|
15
|
-
from kodit.
|
|
15
|
+
from kodit.application.services.snippet_application_service import (
|
|
16
|
+
SnippetApplicationService,
|
|
17
|
+
)
|
|
16
18
|
from kodit.config import AppContext
|
|
17
19
|
from kodit.database import Database
|
|
18
|
-
from kodit.
|
|
19
|
-
from kodit.
|
|
20
|
-
from kodit.indexing.
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
from kodit.
|
|
20
|
+
from kodit.domain.services.source_service import SourceService
|
|
21
|
+
from kodit.domain.value_objects import MultiSearchRequest, MultiSearchResult
|
|
22
|
+
from kodit.infrastructure.indexing.indexing_factory import (
|
|
23
|
+
create_indexing_application_service,
|
|
24
|
+
)
|
|
25
|
+
from kodit.infrastructure.snippet_extraction.snippet_extraction_factory import (
|
|
26
|
+
create_snippet_extraction_domain_service,
|
|
27
|
+
create_snippet_repositories,
|
|
28
|
+
)
|
|
24
29
|
|
|
25
30
|
|
|
26
31
|
@dataclass
|
|
@@ -69,6 +74,32 @@ mcp = FastMCP(
|
|
|
69
74
|
)
|
|
70
75
|
|
|
71
76
|
|
|
77
|
+
def create_snippet_application_service(
|
|
78
|
+
session: AsyncSession,
|
|
79
|
+
) -> SnippetApplicationService:
|
|
80
|
+
"""Create a snippet application service with all dependencies.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
session: SQLAlchemy session
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Configured snippet application service
|
|
87
|
+
|
|
88
|
+
"""
|
|
89
|
+
# Create domain service
|
|
90
|
+
snippet_extraction_service = create_snippet_extraction_domain_service()
|
|
91
|
+
|
|
92
|
+
# Create repositories
|
|
93
|
+
snippet_repository, file_repository = create_snippet_repositories(session)
|
|
94
|
+
|
|
95
|
+
# Create application service
|
|
96
|
+
return SnippetApplicationService(
|
|
97
|
+
snippet_extraction_service=snippet_extraction_service,
|
|
98
|
+
snippet_repository=snippet_repository,
|
|
99
|
+
file_repository=file_repository,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
|
|
72
103
|
@mcp.tool()
|
|
73
104
|
async def search(
|
|
74
105
|
ctx: Context,
|
|
@@ -126,31 +157,22 @@ async def search(
|
|
|
126
157
|
|
|
127
158
|
mcp_context: MCPContext = ctx.request_context.lifespan_context
|
|
128
159
|
|
|
129
|
-
source_repository = SourceRepository(mcp_context.session)
|
|
130
160
|
source_service = SourceService(
|
|
131
|
-
mcp_context.app_context.get_clone_dir(),
|
|
161
|
+
clone_dir=mcp_context.app_context.get_clone_dir(),
|
|
162
|
+
session_factory=lambda: mcp_context.session,
|
|
132
163
|
)
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
164
|
+
# Create snippet application service
|
|
165
|
+
snippet_application_service = create_snippet_application_service(
|
|
166
|
+
mcp_context.session
|
|
167
|
+
)
|
|
168
|
+
service = create_indexing_application_service(
|
|
169
|
+
app_context=mcp_context.app_context,
|
|
170
|
+
session=mcp_context.session,
|
|
136
171
|
source_service=source_service,
|
|
137
|
-
|
|
138
|
-
mcp_context.app_context, mcp_context.session
|
|
139
|
-
),
|
|
140
|
-
code_search_service=embedding_factory(
|
|
141
|
-
task_name="code",
|
|
142
|
-
app_context=mcp_context.app_context,
|
|
143
|
-
session=mcp_context.session,
|
|
144
|
-
),
|
|
145
|
-
text_search_service=embedding_factory(
|
|
146
|
-
task_name="text",
|
|
147
|
-
app_context=mcp_context.app_context,
|
|
148
|
-
session=mcp_context.session,
|
|
149
|
-
),
|
|
150
|
-
enrichment_service=enrichment_factory(mcp_context.app_context),
|
|
172
|
+
snippet_application_service=snippet_application_service,
|
|
151
173
|
)
|
|
152
174
|
|
|
153
|
-
search_request =
|
|
175
|
+
search_request = MultiSearchRequest(
|
|
154
176
|
keywords=keywords,
|
|
155
177
|
code_query="\n".join(related_file_contents),
|
|
156
178
|
text_query=user_intent,
|
|
@@ -166,7 +188,7 @@ async def search(
|
|
|
166
188
|
return output
|
|
167
189
|
|
|
168
190
|
|
|
169
|
-
def output_fusion(snippets: list[
|
|
191
|
+
def output_fusion(snippets: list[MultiSearchResult]) -> str:
|
|
170
192
|
"""Fuse the snippets into a single output."""
|
|
171
193
|
return "\n\n".join(f"{snippet.uri}\n{snippet.content}" for snippet in snippets)
|
|
172
194
|
|
kodit/migrations/env.py
CHANGED
|
@@ -8,10 +8,7 @@ from sqlalchemy import pool
|
|
|
8
8
|
from sqlalchemy.engine import Connection
|
|
9
9
|
from sqlalchemy.ext.asyncio import async_engine_from_config
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
import kodit.indexing.indexing_models
|
|
13
|
-
import kodit.source.source_models
|
|
14
|
-
from kodit.database import Base
|
|
11
|
+
from kodit.domain.entities import Base
|
|
15
12
|
|
|
16
13
|
# this is the Alembic Config object, which provides
|
|
17
14
|
# access to the values within the .ini file in use.
|
kodit/reporting.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Unified logging and progress-reporting helper.
|
|
2
|
+
|
|
3
|
+
This utility consolidates the repeated pattern where services:
|
|
4
|
+
1. Log a message (usually via structlog) and
|
|
5
|
+
2. Emit a ProgressEvent via a ProgressCallback.
|
|
6
|
+
|
|
7
|
+
Using Reporter removes boiler-plate and guarantees consistent telemetry.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import structlog
|
|
11
|
+
|
|
12
|
+
from kodit.domain.interfaces import NullProgressCallback, ProgressCallback
|
|
13
|
+
from kodit.domain.value_objects import ProgressEvent
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Reporter:
|
|
17
|
+
"""Emit log and progress updates with a single call."""
|
|
18
|
+
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
logger: structlog.BoundLogger | None = None,
|
|
22
|
+
progress: ProgressCallback | None = None,
|
|
23
|
+
) -> None:
|
|
24
|
+
"""Initialize the reporter."""
|
|
25
|
+
self.log: structlog.BoundLogger = logger or structlog.get_logger(__name__)
|
|
26
|
+
self.progress: ProgressCallback = progress or NullProgressCallback()
|
|
27
|
+
|
|
28
|
+
# ---------------------------------------------------------------------
|
|
29
|
+
# Life-cycle helpers
|
|
30
|
+
# ---------------------------------------------------------------------
|
|
31
|
+
async def start(
|
|
32
|
+
self, operation: str, total: int, message: str | None = None
|
|
33
|
+
) -> None:
|
|
34
|
+
"""Log *operation.start* and emit initial ProgressEvent."""
|
|
35
|
+
self.log.debug(
|
|
36
|
+
"operation.start", operation=operation, total=total, message=message
|
|
37
|
+
)
|
|
38
|
+
await self.progress.on_progress(
|
|
39
|
+
ProgressEvent(operation=operation, current=0, total=total, message=message)
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
async def step(
|
|
43
|
+
self,
|
|
44
|
+
operation: str,
|
|
45
|
+
current: int,
|
|
46
|
+
total: int,
|
|
47
|
+
message: str | None = None,
|
|
48
|
+
) -> None:
|
|
49
|
+
"""Emit an intermediate progress step (no log by default)."""
|
|
50
|
+
await self.progress.on_progress(
|
|
51
|
+
ProgressEvent(
|
|
52
|
+
operation=operation, current=current, total=total, message=message
|
|
53
|
+
)
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
async def done(self, operation: str, message: str | None = None) -> None:
|
|
57
|
+
"""Log *operation.done* and emit completion event."""
|
|
58
|
+
self.log.debug("operation.done", operation=operation, message=message)
|
|
59
|
+
await self.progress.on_complete(operation)
|
|
60
|
+
|
|
61
|
+
async def advance(
|
|
62
|
+
self,
|
|
63
|
+
operation: str,
|
|
64
|
+
current: int,
|
|
65
|
+
total: int,
|
|
66
|
+
message: str | None = None,
|
|
67
|
+
log_every: int | None = None,
|
|
68
|
+
) -> None:
|
|
69
|
+
"""Emit step; optionally log when *current % log_every == 0*."""
|
|
70
|
+
if log_every and current % log_every == 0:
|
|
71
|
+
self.log.debug(
|
|
72
|
+
"operation.progress",
|
|
73
|
+
operation=operation,
|
|
74
|
+
current=current,
|
|
75
|
+
total=total,
|
|
76
|
+
message=message,
|
|
77
|
+
)
|
|
78
|
+
await self.step(operation, current, total, message)
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
|
|
2
|
+
kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
|
|
3
|
+
kodit/_version.py,sha256=N3oBwJUFmS-AwCjqOcSlRW4GvSq-uJJMaBvoGfv1-hM,511
|
|
4
|
+
kodit/app.py,sha256=qKBWJ0VNSY_M6G3VFfAQ0133q5bnS99cUFD0p396taw,1032
|
|
5
|
+
kodit/cli.py,sha256=SUoo9R-Jut0OlZLajD-nSPdhvHPOZ2SpN35SDyON8tc,11500
|
|
6
|
+
kodit/config.py,sha256=3yh7hfLSILjZK_qJMhcExwRcrWJ0b5Eb1JjjOvMPJZo,4146
|
|
7
|
+
kodit/database.py,sha256=kI9yBm4uunsgV4-QeVoCBL0wLzU4kYmYv5qZilGnbPE,1740
|
|
8
|
+
kodit/log.py,sha256=sHPHYetlMcKTor2VaFLMyao1_fZ_xhuzqXCAt5F5UMU,8575
|
|
9
|
+
kodit/mcp.py,sha256=kiWyZ2Ptluh0jLDQNxKjxNmD18bEG1-zAFVOoZAPsWI,6192
|
|
10
|
+
kodit/middleware.py,sha256=I6FOkqG9-8RH5kR1-0ZoQWfE4qLCB8lZYv8H_OCH29o,2714
|
|
11
|
+
kodit/reporting.py,sha256=icce1ZyiADsA_Qz-mSjgn2H4SSqKuGfLKnw-yrl9nsg,2722
|
|
12
|
+
kodit/application/__init__.py,sha256=mH50wTpgP9dhbKztFsL8Dda9Hi18TSnMVxXtpp4aGOA,35
|
|
13
|
+
kodit/application/commands/__init__.py,sha256=AOVs25fwboBnMCWdgDB7fPbAYTljurAPVMkATIGRKuk,38
|
|
14
|
+
kodit/application/commands/snippet_commands.py,sha256=WzRrnJOnLpIK8-wvN7c-ecGs_4LosQ_jR30dQkFqFBY,600
|
|
15
|
+
kodit/application/services/__init__.py,sha256=p5UQNw-H5sxQvs5Etfte93B3cJ1kKW6DNxK34uFvU1E,38
|
|
16
|
+
kodit/application/services/indexing_application_service.py,sha256=n7ViSfAPshUqe_gebgduRKwIPkB3Cl72CDw2U_qUJXY,13558
|
|
17
|
+
kodit/application/services/snippet_application_service.py,sha256=NJxR3X78lTK0zXpYPdiCCw8NAJdLwyMyynfGnBfvONM,4759
|
|
18
|
+
kodit/domain/__init__.py,sha256=TCpg4Xx-oF4mKV91lo4iXqMEfBT1OoRSYnbG-zVWolA,66
|
|
19
|
+
kodit/domain/entities.py,sha256=6XVuwDIQjkBw5Bm51io5ZUxB6_O4A774CDke2bfKWTY,5584
|
|
20
|
+
kodit/domain/enums.py,sha256=Ik_h3D3eZ0FsSlPsU0ikm-Yv3Rmvzicffi9yBn19UIE,191
|
|
21
|
+
kodit/domain/interfaces.py,sha256=Jkd0Ob4qSvhZHI9jRPFQ1n5Cv0SvU-y3Z-HCw2ikc4I,742
|
|
22
|
+
kodit/domain/repositories.py,sha256=bdKxSKGI6XzrpzeKcv-NDV2JBirbEMRK-Y4UCZmDtoY,2706
|
|
23
|
+
kodit/domain/value_objects.py,sha256=4Vs7Uk1wQgTjnCkZlOlw7E1Q8NiyAmBpFv38Lhs3WZ0,3869
|
|
24
|
+
kodit/domain/services/__init__.py,sha256=Q1GhCK_PqKHYwYE4tkwDz5BIyXkJngLBBOHhzvX8nzo,42
|
|
25
|
+
kodit/domain/services/bm25_service.py,sha256=N3kAcsRjxn22bbYNRhGu-VY6fnLsE-M2WyzEIk7Im3s,3809
|
|
26
|
+
kodit/domain/services/embedding_service.py,sha256=ZgC4n7vuqwW_NOCTRiNxezmCs73OB-G1dTDRGYorGGo,4588
|
|
27
|
+
kodit/domain/services/enrichment_service.py,sha256=XsXg3nV-KN4rqtC7Zro_ZiZ6RSq-1eA1MG6IDzFGyBA,1316
|
|
28
|
+
kodit/domain/services/ignore_service.py,sha256=boEN-IRLmUtwO9ZnuACaVFZbIKrtUG8YwnsXKEDIG28,1136
|
|
29
|
+
kodit/domain/services/indexing_service.py,sha256=FEizu2GkvZA32xHOYXXch0LuHoWg6Z-BbJMPjZslzjc,5853
|
|
30
|
+
kodit/domain/services/snippet_extraction_service.py,sha256=QW_99bXWpr8g6ZI-hp4Aj57VCSrUf71dLwQca5T6pyg,3065
|
|
31
|
+
kodit/domain/services/source_service.py,sha256=CM9TKTvn-9xfsk3AoKvIoPQ5_MridXng2jY8skn6_5Q,2987
|
|
32
|
+
kodit/infrastructure/__init__.py,sha256=HzEYIjoXnkz_i_MHO2e0sIVYweUcRnl2RpyBiTbMObU,28
|
|
33
|
+
kodit/infrastructure/bm25/__init__.py,sha256=DmGbrEO34FOJy4e685BbyxLA7gPW1eqs2gAxsp6JOuM,34
|
|
34
|
+
kodit/infrastructure/bm25/bm25_factory.py,sha256=I4eo7qRslnyXIRkBf-StZ5ga2Evrr5J5YFocTChFD3g,884
|
|
35
|
+
kodit/infrastructure/bm25/local_bm25_repository.py,sha256=HjhzY24FU5R81qfzXfzJl14E3JafLO0l0ug6psutErQ,4276
|
|
36
|
+
kodit/infrastructure/bm25/vectorchord_bm25_repository.py,sha256=0Db9XWFjiS4TFrsNazBMo6FxhX9SxLGNVQB0rDHqnL4,6875
|
|
37
|
+
kodit/infrastructure/cloning/__init__.py,sha256=IzIvX-yeRRFZ-lfvPVSEe_qXszO6DGQdjKwwDigexyQ,30
|
|
38
|
+
kodit/infrastructure/cloning/metadata.py,sha256=C5LLmsUzi29RhSbzVDNqiShbekg7qdp1ihGUyFXy5yM,4277
|
|
39
|
+
kodit/infrastructure/cloning/folder/__init__.py,sha256=w6ykrVtbYJlUDEXAjqgf6w2rMsUMCrrpIbl3QMjubgY,37
|
|
40
|
+
kodit/infrastructure/cloning/folder/factory.py,sha256=qpFXxZuwYbdO17FKurWtm84ahwerDpcaLubClEZrbtk,3955
|
|
41
|
+
kodit/infrastructure/cloning/folder/working_copy.py,sha256=FPhwzuPj40yGoYvwcm9VG8mv8MbJxwfby_N5JS-_daA,1154
|
|
42
|
+
kodit/infrastructure/cloning/git/__init__.py,sha256=20ePcp0qE6BuLsjsv4KYB1DzKhMIMsPXwEqIEZtjTJs,34
|
|
43
|
+
kodit/infrastructure/cloning/git/factory.py,sha256=pjIvl3NIhXNNbRcI86wedz8CP-jV_Z3OVpGl4BUGCmo,4585
|
|
44
|
+
kodit/infrastructure/cloning/git/working_copy.py,sha256=DMW_p7WWGoSeyDI9g55ItwsRomZSotXWRrlopqwszaQ,1115
|
|
45
|
+
kodit/infrastructure/embedding/__init__.py,sha256=F-8nLlWAerYJ0MOIA4tbXHLan8bW5rRR84vzxx6tRKI,39
|
|
46
|
+
kodit/infrastructure/embedding/embedding_factory.py,sha256=1AypjhWJGxvLnZt1SEH_FHPk9P0Vkt9fXdSGzFPp2ow,3432
|
|
47
|
+
kodit/infrastructure/embedding/local_vector_search_repository.py,sha256=UO8A3Eb_djFVrWKKSukAo4u7k8djDD1SlOPHk2pP9ps,3921
|
|
48
|
+
kodit/infrastructure/embedding/vectorchord_vector_search_repository.py,sha256=7P5Uz8heQOi-x0k5wrNSE2biiy8FaC4VTfX0vPdfy6Y,7638
|
|
49
|
+
kodit/infrastructure/embedding/embedding_providers/__init__.py,sha256=qeZ-oAIAxMl5QqebGtO1lq-tHjl_ucAwOXePklcwwGk,34
|
|
50
|
+
kodit/infrastructure/embedding/embedding_providers/batching.py,sha256=a8CL9PX2VLmbeg616fc_lQzfC4BWTVn32m4SEhXpHxc,3279
|
|
51
|
+
kodit/infrastructure/embedding/embedding_providers/hash_embedding_provider.py,sha256=HfilGGJ-hiw3mHOj6Zf7jlouSIIDaDFqwPe-4vKPREE,2611
|
|
52
|
+
kodit/infrastructure/embedding/embedding_providers/local_embedding_provider.py,sha256=U5fc8jUP8wF-nq1zo-CfSbJbLQyE-3muKmRCaYGtytk,4387
|
|
53
|
+
kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py,sha256=LIK9Iir7geraZoqiaNbeHv3hXrghZRDpYGJDEjZaqzQ,4086
|
|
54
|
+
kodit/infrastructure/enrichment/__init__.py,sha256=8acZKNzql8Fs0lceFu9U3KoUrOptRBtVIxr_Iw6lz3Y,40
|
|
55
|
+
kodit/infrastructure/enrichment/enrichment_factory.py,sha256=4saTSHJY9o1LC4wkkL0I_fCsLPh0-Wb7GdZt9-1lXVA,1827
|
|
56
|
+
kodit/infrastructure/enrichment/legacy_enrichment_models.py,sha256=YS-sNEH-b4hoy2ThcqhfkiefsftUsFgIjZjbvfU7j6w,1035
|
|
57
|
+
kodit/infrastructure/enrichment/local_enrichment_provider.py,sha256=8CATNtgMHgBRt24GrYEwaZKrroNCxMJS-39xQJoG3N0,3818
|
|
58
|
+
kodit/infrastructure/enrichment/null_enrichment_provider.py,sha256=5Ksyxl3qDLxUjmOeIdHZ0UAIULy7RcbLXJoT7_CNXoQ,775
|
|
59
|
+
kodit/infrastructure/enrichment/openai_enrichment_provider.py,sha256=fenq4HiJ2UkrzsE2D0A0qpmro38z9mKaIzKKU5v7hnY,3189
|
|
60
|
+
kodit/infrastructure/git/__init__.py,sha256=0iMosFzudj4_xNIMe2SRbV6l5bWqkjnUsZoFsoZFuM8,33
|
|
61
|
+
kodit/infrastructure/git/git_utils.py,sha256=lOujEx41UuWfYSnFWbY4HC2tK5utytyzNkW1e5IPCr0,543
|
|
62
|
+
kodit/infrastructure/ignore/__init__.py,sha256=VzFv8XOzHmsu0MEGnWVSF6KsgqLBmvHlRqAkT1Xb1MY,36
|
|
63
|
+
kodit/infrastructure/ignore/ignore_pattern_provider.py,sha256=9m2XCsgW87UBTfzHr6Z0Ns6WpzwkLir3zyBY3PwsgXk,2225
|
|
64
|
+
kodit/infrastructure/indexing/__init__.py,sha256=7UPRa2jwCAsa0Orsp6PqXSF8iIXJVzXHMFmrKkI9yH8,38
|
|
65
|
+
kodit/infrastructure/indexing/fusion_service.py,sha256=mXUUcx3-8e75mWkxXMfl30HIoFXrTNHzB1w90MmEbak,1806
|
|
66
|
+
kodit/infrastructure/indexing/index_repository.py,sha256=iHK9wcC8893Q6vY8OhW9UrN0gj1_GVzjTVlbewyZdxI,9094
|
|
67
|
+
kodit/infrastructure/indexing/indexing_factory.py,sha256=BbavU9DJZapAe0V9H6Zsk7J6oPY48SJhGP3nrCPvzoQ,3914
|
|
68
|
+
kodit/infrastructure/snippet_extraction/__init__.py,sha256=v6KqrRDjSj0nt87m7UwRGx2GN_fz_14VWq9Q0uABR_s,54
|
|
69
|
+
kodit/infrastructure/snippet_extraction/language_detection_service.py,sha256=Lo9xPLVia-70yP9gzyH4cQcBQzsp7WXjGOa5NBggScg,1158
|
|
70
|
+
kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py,sha256=LGbm614KCPNsM9K8r1z-E763NyAMIZA9ETJ_C61EknA,2759
|
|
71
|
+
kodit/infrastructure/snippet_extraction/snippet_query_provider.py,sha256=pLjFExJx5bX4s6a_mMA4-AfjtfBaC2wjTV3GjYD2HVE,1284
|
|
72
|
+
kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py,sha256=8B14jy_QS9SBA5jNpLtSSOayKP1WgMeCQEsZPuyAs8o,6190
|
|
73
|
+
kodit/infrastructure/snippet_extraction/languages/csharp.scm,sha256=gbBN4RiV1FBuTJF6orSnDFi8H9JwTw-d4piLJYsWUsc,222
|
|
74
|
+
kodit/infrastructure/snippet_extraction/languages/go.scm,sha256=SEX9mTOrhP2KiQW7oflDKkd21u5dK56QbJ4LvTDxY8A,533
|
|
75
|
+
kodit/infrastructure/snippet_extraction/languages/javascript.scm,sha256=Ini5TsVNmcBKQ8aL46a5Id9ut0g9UdmvmVqdMqRJtFk,446
|
|
76
|
+
kodit/infrastructure/snippet_extraction/languages/python.scm,sha256=ee85R9PBzwye3IMTE7-iVoKWd_ViU3EJISTyrFGrVeo,429
|
|
77
|
+
kodit/infrastructure/snippet_extraction/languages/typescript.scm,sha256=U-ujbbv4tylbUBj9wuhL-e5cW6hmgPCNs4xrIX3r_hE,448
|
|
78
|
+
kodit/infrastructure/sqlalchemy/__init__.py,sha256=UXPMSF_hgWaqr86cawRVqM8XdVNumQyyK5B8B97GnlA,33
|
|
79
|
+
kodit/infrastructure/sqlalchemy/embedding_repository.py,sha256=vdjn3E5dFriFkceK8E8QMZzZk83etSf3NQOyS_LCeGY,7548
|
|
80
|
+
kodit/infrastructure/sqlalchemy/file_repository.py,sha256=9O6ysCG_ldZFzDtpcxaTJQ8xya1A2q3CU2NafQo7GS0,2027
|
|
81
|
+
kodit/infrastructure/sqlalchemy/repository.py,sha256=xWVAO9Bx9NKh3fSM-D1KGehT-6MNIYW2YQQcUcTE2xk,4369
|
|
82
|
+
kodit/infrastructure/sqlalchemy/snippet_repository.py,sha256=zMqc5KxD0rP6r2BG2qLVAf3rh3IONkhZkI_pXg-r3a8,2137
|
|
83
|
+
kodit/infrastructure/ui/__init__.py,sha256=CzbLOBwIZ6B6iAHEd1L8cIBydCj-n_kobxJAhz2I9_Y,32
|
|
84
|
+
kodit/infrastructure/ui/progress.py,sha256=BaAeMEgXlSSb0c_t_NPxnThIktkzzCS9kegb5ExULJs,4791
|
|
85
|
+
kodit/infrastructure/ui/spinner.py,sha256=GcP115qtR0VEnGfMEtsGoAUpRzVGUSfiUXfoJJERngA,2357
|
|
86
|
+
kodit/migrations/README,sha256=ISVtAOvqvKk_5ThM5ioJE-lMkvf9IbknFUFVU_vPma4,58
|
|
87
|
+
kodit/migrations/__init__.py,sha256=lP5MuwlyWRMO6UcDWnQcQ3G-GYHcFb6rl9gYPHJ1sjo,40
|
|
88
|
+
kodit/migrations/env.py,sha256=j89vEWdSgfnreTAz5ZvFAPlsMGI8SfKti0MlWhm7Jbc,2364
|
|
89
|
+
kodit/migrations/script.py.mako,sha256=zWziKtiwYKEWuwPV_HBNHwa9LCT45_bi01-uSNFaOOE,703
|
|
90
|
+
kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py,sha256=-61qol9PfQKILCDQRA5jEaats9aGZs9Wdtp-j-38SF4,1644
|
|
91
|
+
kodit/migrations/versions/85155663351e_initial.py,sha256=Cg7zlF871o9ShV5rQMQ1v7hRV7fI59veDY9cjtTrs-8,3306
|
|
92
|
+
kodit/migrations/versions/9e53ea8bb3b0_add_authors.py,sha256=a32Zm8KUQyiiLkjKNPYdaJDgjW6VsV-GhaLnPnK_fpI,3884
|
|
93
|
+
kodit/migrations/versions/__init__.py,sha256=9-lHzptItTzq_fomdIRBegQNm4Znx6pVjwD4MiqRIdo,36
|
|
94
|
+
kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py,sha256=rI8LmjF-I2OMxZ2nOIF_NRmqOLXe45hL_iz_nx97DTQ,1680
|
|
95
|
+
kodit-0.2.5.dist-info/METADATA,sha256=UuPIpWyvjccCITCITmMcJhUwIxRd211UvDmVXi8MbY4,5867
|
|
96
|
+
kodit-0.2.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
97
|
+
kodit-0.2.5.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
|
|
98
|
+
kodit-0.2.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
99
|
+
kodit-0.2.5.dist-info/RECORD,,
|
kodit/bm25/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
"""BM25 module."""
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
"""Factory for creating keyword search providers."""
|
|
2
|
-
|
|
3
|
-
from sqlalchemy.ext.asyncio import AsyncSession
|
|
4
|
-
|
|
5
|
-
from kodit.bm25.keyword_search_service import KeywordSearchProvider
|
|
6
|
-
from kodit.bm25.local_bm25 import BM25Service
|
|
7
|
-
from kodit.bm25.vectorchord_bm25 import VectorChordBM25
|
|
8
|
-
from kodit.config import AppContext
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def keyword_search_factory(
|
|
12
|
-
app_context: AppContext, session: AsyncSession
|
|
13
|
-
) -> KeywordSearchProvider:
|
|
14
|
-
"""Create a keyword search provider."""
|
|
15
|
-
if app_context.default_search.provider == "vectorchord":
|
|
16
|
-
return VectorChordBM25(session=session)
|
|
17
|
-
return BM25Service(data_dir=app_context.get_data_dir())
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
"""Keyword search service."""
|
|
2
|
-
|
|
3
|
-
from abc import ABC, abstractmethod
|
|
4
|
-
from typing import NamedTuple
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class BM25Document(NamedTuple):
|
|
8
|
-
"""BM25 document."""
|
|
9
|
-
|
|
10
|
-
snippet_id: int
|
|
11
|
-
text: str
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class BM25Result(NamedTuple):
|
|
15
|
-
"""BM25 result."""
|
|
16
|
-
|
|
17
|
-
snippet_id: int
|
|
18
|
-
score: float
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class KeywordSearchProvider(ABC):
|
|
22
|
-
"""Interface for keyword search providers."""
|
|
23
|
-
|
|
24
|
-
@abstractmethod
|
|
25
|
-
async def index(self, corpus: list[BM25Document]) -> None:
|
|
26
|
-
"""Index a new corpus."""
|
|
27
|
-
|
|
28
|
-
@abstractmethod
|
|
29
|
-
async def retrieve(self, query: str, top_k: int = 2) -> list[BM25Result]:
|
|
30
|
-
"""Retrieve from the index."""
|
|
31
|
-
|
|
32
|
-
@abstractmethod
|
|
33
|
-
async def delete(self, snippet_ids: list[int]) -> None:
|
|
34
|
-
"""Delete documents from the index."""
|
kodit/embedding/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
"""Embedding module."""
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
"""Embedding service."""
|
|
2
|
-
|
|
3
|
-
from sqlalchemy.ext.asyncio import AsyncSession
|
|
4
|
-
|
|
5
|
-
from kodit.config import AppContext, Endpoint
|
|
6
|
-
from kodit.embedding.embedding_models import EmbeddingType
|
|
7
|
-
from kodit.embedding.embedding_provider.local_embedding_provider import (
|
|
8
|
-
CODE,
|
|
9
|
-
LocalEmbeddingProvider,
|
|
10
|
-
)
|
|
11
|
-
from kodit.embedding.embedding_provider.openai_embedding_provider import (
|
|
12
|
-
OpenAIEmbeddingProvider,
|
|
13
|
-
)
|
|
14
|
-
from kodit.embedding.embedding_repository import EmbeddingRepository
|
|
15
|
-
from kodit.embedding.local_vector_search_service import LocalVectorSearchService
|
|
16
|
-
from kodit.embedding.vector_search_service import (
|
|
17
|
-
VectorSearchService,
|
|
18
|
-
)
|
|
19
|
-
from kodit.embedding.vectorchord_vector_search_service import (
|
|
20
|
-
TaskName,
|
|
21
|
-
VectorChordVectorSearchService,
|
|
22
|
-
)
|
|
23
|
-
from kodit.log import log_event
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def _get_endpoint_configuration(app_context: AppContext) -> Endpoint | None:
|
|
27
|
-
"""Get the endpoint configuration for the embedding service."""
|
|
28
|
-
return app_context.embedding_endpoint or app_context.default_endpoint or None
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def embedding_factory(
|
|
32
|
-
task_name: TaskName, app_context: AppContext, session: AsyncSession
|
|
33
|
-
) -> VectorSearchService:
|
|
34
|
-
"""Create an embedding service."""
|
|
35
|
-
embedding_repository = EmbeddingRepository(session=session)
|
|
36
|
-
endpoint = _get_endpoint_configuration(app_context)
|
|
37
|
-
|
|
38
|
-
if endpoint and endpoint.type == "openai":
|
|
39
|
-
log_event("kodit.embedding", {"provider": "openai"})
|
|
40
|
-
from openai import AsyncOpenAI
|
|
41
|
-
|
|
42
|
-
embedding_provider = OpenAIEmbeddingProvider(
|
|
43
|
-
openai_client=AsyncOpenAI(
|
|
44
|
-
api_key=endpoint.api_key or "default",
|
|
45
|
-
base_url=endpoint.base_url or "https://api.openai.com/v1",
|
|
46
|
-
),
|
|
47
|
-
model_name=endpoint.model or "text-embedding-3-small",
|
|
48
|
-
)
|
|
49
|
-
else:
|
|
50
|
-
log_event("kodit.embedding", {"provider": "local"})
|
|
51
|
-
embedding_provider = LocalEmbeddingProvider(CODE)
|
|
52
|
-
|
|
53
|
-
if app_context.default_search.provider == "vectorchord":
|
|
54
|
-
log_event("kodit.database", {"provider": "vectorchord"})
|
|
55
|
-
return VectorChordVectorSearchService(task_name, session, embedding_provider)
|
|
56
|
-
if app_context.default_search.provider == "sqlite":
|
|
57
|
-
log_event("kodit.database", {"provider": "sqlite"})
|
|
58
|
-
if task_name == "code":
|
|
59
|
-
embedding_type = EmbeddingType.CODE
|
|
60
|
-
elif task_name == "text":
|
|
61
|
-
embedding_type = EmbeddingType.TEXT
|
|
62
|
-
return LocalVectorSearchService(
|
|
63
|
-
embedding_repository=embedding_repository,
|
|
64
|
-
embedding_provider=embedding_provider,
|
|
65
|
-
embedding_type=embedding_type,
|
|
66
|
-
)
|
|
67
|
-
|
|
68
|
-
msg = f"Invalid semantic search provider: {app_context.default_search.provider}"
|
|
69
|
-
raise ValueError(msg)
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
"""Embedding models."""
|
|
2
|
-
|
|
3
|
-
from enum import Enum
|
|
4
|
-
|
|
5
|
-
from sqlalchemy import JSON, ForeignKey
|
|
6
|
-
from sqlalchemy import Enum as SQLAlchemyEnum
|
|
7
|
-
from sqlalchemy.orm import Mapped, mapped_column
|
|
8
|
-
|
|
9
|
-
from kodit.database import Base, CommonMixin
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class EmbeddingType(Enum):
|
|
13
|
-
"""Embedding type."""
|
|
14
|
-
|
|
15
|
-
CODE = 1
|
|
16
|
-
TEXT = 2
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class Embedding(Base, CommonMixin):
|
|
20
|
-
"""Embedding model."""
|
|
21
|
-
|
|
22
|
-
__tablename__ = "embeddings"
|
|
23
|
-
|
|
24
|
-
snippet_id: Mapped[int] = mapped_column(ForeignKey("snippets.id"), index=True)
|
|
25
|
-
type: Mapped[EmbeddingType] = mapped_column(
|
|
26
|
-
SQLAlchemyEnum(EmbeddingType), index=True
|
|
27
|
-
)
|
|
28
|
-
embedding: Mapped[list[float]] = mapped_column(JSON)
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
"""Embedding module."""
|