kodit 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/application/factories/code_indexing_factory.py +56 -29
- kodit/application/services/code_indexing_application_service.py +152 -118
- kodit/cli.py +14 -41
- kodit/domain/entities.py +268 -197
- kodit/domain/protocols.py +61 -0
- kodit/domain/services/embedding_service.py +1 -1
- kodit/domain/services/index_query_service.py +66 -0
- kodit/domain/services/index_service.py +282 -0
- kodit/domain/value_objects.py +143 -65
- kodit/infrastructure/cloning/git/working_copy.py +17 -8
- kodit/infrastructure/cloning/metadata.py +37 -67
- kodit/infrastructure/embedding/embedding_factory.py +1 -1
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +1 -1
- kodit/infrastructure/enrichment/null_enrichment_provider.py +4 -10
- kodit/infrastructure/git/git_utils.py +1 -63
- kodit/infrastructure/ignore/ignore_pattern_provider.py +1 -2
- kodit/infrastructure/indexing/auto_indexing_service.py +2 -12
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/__init__.py +1 -0
- kodit/infrastructure/mappers/index_mapper.py +344 -0
- kodit/infrastructure/slicing/__init__.py +1 -0
- kodit/infrastructure/slicing/language_detection_service.py +18 -0
- kodit/infrastructure/slicing/slicer.py +894 -0
- kodit/infrastructure/sqlalchemy/embedding_repository.py +1 -1
- kodit/infrastructure/sqlalchemy/entities.py +203 -0
- kodit/infrastructure/sqlalchemy/index_repository.py +579 -0
- kodit/mcp.py +0 -7
- kodit/migrations/env.py +1 -1
- kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +36 -0
- kodit/migrations/versions/4552eb3f23ce_add_summary.py +4 -4
- kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py +24 -16
- kodit/migrations/versions/85155663351e_initial.py +64 -48
- kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py +20 -14
- kodit/utils/__init__.py +1 -0
- kodit/utils/path_utils.py +54 -0
- {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/METADATA +9 -4
- kodit-0.3.4.dist-info/RECORD +89 -0
- kodit/domain/enums.py +0 -9
- kodit/domain/repositories.py +0 -128
- kodit/domain/services/ignore_service.py +0 -45
- kodit/domain/services/indexing_service.py +0 -204
- kodit/domain/services/snippet_extraction_service.py +0 -89
- kodit/domain/services/snippet_service.py +0 -215
- kodit/domain/services/source_service.py +0 -85
- kodit/infrastructure/cloning/folder/__init__.py +0 -1
- kodit/infrastructure/cloning/folder/factory.py +0 -128
- kodit/infrastructure/cloning/folder/working_copy.py +0 -38
- kodit/infrastructure/cloning/git/factory.py +0 -153
- kodit/infrastructure/indexing/index_repository.py +0 -286
- kodit/infrastructure/indexing/snippet_domain_service_factory.py +0 -37
- kodit/infrastructure/snippet_extraction/__init__.py +0 -1
- kodit/infrastructure/snippet_extraction/language_detection_service.py +0 -39
- kodit/infrastructure/snippet_extraction/languages/csharp.scm +0 -12
- kodit/infrastructure/snippet_extraction/languages/go.scm +0 -26
- kodit/infrastructure/snippet_extraction/languages/java.scm +0 -12
- kodit/infrastructure/snippet_extraction/languages/javascript.scm +0 -24
- kodit/infrastructure/snippet_extraction/languages/python.scm +0 -22
- kodit/infrastructure/snippet_extraction/languages/typescript.scm +0 -25
- kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +0 -67
- kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -45
- kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +0 -182
- kodit/infrastructure/sqlalchemy/file_repository.py +0 -78
- kodit/infrastructure/sqlalchemy/repository.py +0 -133
- kodit/infrastructure/sqlalchemy/snippet_repository.py +0 -259
- kodit-0.3.2.dist-info/RECORD +0 -103
- {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/WHEEL +0 -0
- {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/entry_points.txt +0 -0
- {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,204 +0,0 @@
|
|
|
1
|
-
"""Domain service for indexing operations."""
|
|
2
|
-
|
|
3
|
-
from abc import ABC, abstractmethod
|
|
4
|
-
|
|
5
|
-
from kodit.domain.entities import Snippet
|
|
6
|
-
from kodit.domain.value_objects import (
|
|
7
|
-
FusionRequest,
|
|
8
|
-
FusionResult,
|
|
9
|
-
IndexCreateRequest,
|
|
10
|
-
IndexView,
|
|
11
|
-
SnippetWithContext,
|
|
12
|
-
)
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class IndexRepository(ABC):
|
|
16
|
-
"""Abstract index repository interface."""
|
|
17
|
-
|
|
18
|
-
@abstractmethod
|
|
19
|
-
async def create_index(self, source_id: int) -> IndexView:
|
|
20
|
-
"""Create a new index for a source."""
|
|
21
|
-
|
|
22
|
-
@abstractmethod
|
|
23
|
-
async def get_index_by_id(self, index_id: int) -> IndexView | None:
|
|
24
|
-
"""Get an index by its ID."""
|
|
25
|
-
|
|
26
|
-
@abstractmethod
|
|
27
|
-
async def get_index_by_source_id(self, source_id: int) -> IndexView | None:
|
|
28
|
-
"""Get an index by its source ID."""
|
|
29
|
-
|
|
30
|
-
@abstractmethod
|
|
31
|
-
async def list_indexes(self) -> list[IndexView]:
|
|
32
|
-
"""List all indexes."""
|
|
33
|
-
|
|
34
|
-
@abstractmethod
|
|
35
|
-
async def update_index_timestamp(self, index_id: int) -> None:
|
|
36
|
-
"""Update the timestamp of an index."""
|
|
37
|
-
|
|
38
|
-
@abstractmethod
|
|
39
|
-
async def delete_all_snippets(self, index_id: int) -> None:
|
|
40
|
-
"""Delete all snippets for an index."""
|
|
41
|
-
|
|
42
|
-
@abstractmethod
|
|
43
|
-
async def get_snippets_for_index(self, index_id: int) -> list[Snippet]:
|
|
44
|
-
"""Get all snippets for an index."""
|
|
45
|
-
|
|
46
|
-
@abstractmethod
|
|
47
|
-
async def add_snippet(self, snippet: dict) -> None:
|
|
48
|
-
"""Add a snippet to the database."""
|
|
49
|
-
|
|
50
|
-
@abstractmethod
|
|
51
|
-
async def update_snippet_content(self, snippet_id: int, content: str) -> None:
|
|
52
|
-
"""Update the content of an existing snippet."""
|
|
53
|
-
|
|
54
|
-
@abstractmethod
|
|
55
|
-
async def list_snippets_by_ids(self, ids: list[int]) -> list[SnippetWithContext]:
|
|
56
|
-
"""List snippets by IDs."""
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
class FusionService(ABC):
|
|
60
|
-
"""Abstract fusion service interface."""
|
|
61
|
-
|
|
62
|
-
@abstractmethod
|
|
63
|
-
def reciprocal_rank_fusion(
|
|
64
|
-
self, rankings: list[list[FusionRequest]], k: float = 60
|
|
65
|
-
) -> list[FusionResult]:
|
|
66
|
-
"""Perform reciprocal rank fusion on search results."""
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
class IndexingDomainService:
|
|
70
|
-
"""Domain service for indexing operations."""
|
|
71
|
-
|
|
72
|
-
def __init__(
|
|
73
|
-
self, index_repository: IndexRepository, fusion_service: FusionService
|
|
74
|
-
) -> None:
|
|
75
|
-
"""Initialize the indexing domain service.
|
|
76
|
-
|
|
77
|
-
Args:
|
|
78
|
-
index_repository: Repository for index operations
|
|
79
|
-
fusion_service: Service for result fusion
|
|
80
|
-
|
|
81
|
-
"""
|
|
82
|
-
self.index_repository = index_repository
|
|
83
|
-
self.fusion_service = fusion_service
|
|
84
|
-
|
|
85
|
-
async def create_index(self, request: IndexCreateRequest) -> IndexView:
|
|
86
|
-
"""Create a new index.
|
|
87
|
-
|
|
88
|
-
Args:
|
|
89
|
-
request: The index create request.
|
|
90
|
-
|
|
91
|
-
Returns:
|
|
92
|
-
The created index view.
|
|
93
|
-
|
|
94
|
-
"""
|
|
95
|
-
return await self.index_repository.create_index(request.source_id)
|
|
96
|
-
|
|
97
|
-
async def get_index(self, index_id: int) -> IndexView | None:
|
|
98
|
-
"""Get an index by its ID.
|
|
99
|
-
|
|
100
|
-
Args:
|
|
101
|
-
index_id: The ID of the index to retrieve.
|
|
102
|
-
|
|
103
|
-
Returns:
|
|
104
|
-
The index view if found, None otherwise.
|
|
105
|
-
|
|
106
|
-
"""
|
|
107
|
-
return await self.index_repository.get_index_by_id(index_id)
|
|
108
|
-
|
|
109
|
-
async def get_index_by_source_id(self, source_id: int) -> IndexView | None:
|
|
110
|
-
"""Get an index by its source ID.
|
|
111
|
-
|
|
112
|
-
Args:
|
|
113
|
-
source_id: The ID of the source to retrieve an index for.
|
|
114
|
-
|
|
115
|
-
Returns:
|
|
116
|
-
The index view if found, None otherwise.
|
|
117
|
-
|
|
118
|
-
"""
|
|
119
|
-
return await self.index_repository.get_index_by_source_id(source_id)
|
|
120
|
-
|
|
121
|
-
async def list_indexes(self) -> list[IndexView]:
|
|
122
|
-
"""List all indexes.
|
|
123
|
-
|
|
124
|
-
Returns:
|
|
125
|
-
A list of index views.
|
|
126
|
-
|
|
127
|
-
"""
|
|
128
|
-
return await self.index_repository.list_indexes()
|
|
129
|
-
|
|
130
|
-
async def update_index_timestamp(self, index_id: int) -> None:
|
|
131
|
-
"""Update the timestamp of an index.
|
|
132
|
-
|
|
133
|
-
Args:
|
|
134
|
-
index_id: The ID of the index to update.
|
|
135
|
-
|
|
136
|
-
"""
|
|
137
|
-
await self.index_repository.update_index_timestamp(index_id)
|
|
138
|
-
|
|
139
|
-
async def delete_all_snippets(self, index_id: int) -> None:
|
|
140
|
-
"""Delete all snippets for an index.
|
|
141
|
-
|
|
142
|
-
Args:
|
|
143
|
-
index_id: The ID of the index to delete snippets for.
|
|
144
|
-
|
|
145
|
-
"""
|
|
146
|
-
await self.index_repository.delete_all_snippets(index_id)
|
|
147
|
-
|
|
148
|
-
async def get_snippets_for_index(self, index_id: int) -> list[Snippet]:
|
|
149
|
-
"""Get all snippets for an index.
|
|
150
|
-
|
|
151
|
-
Args:
|
|
152
|
-
index_id: The ID of the index to get snippets for.
|
|
153
|
-
|
|
154
|
-
Returns:
|
|
155
|
-
A list of Snippet entities.
|
|
156
|
-
|
|
157
|
-
"""
|
|
158
|
-
return await self.index_repository.get_snippets_for_index(index_id)
|
|
159
|
-
|
|
160
|
-
async def add_snippet(self, snippet: dict) -> None:
|
|
161
|
-
"""Add a snippet to the database.
|
|
162
|
-
|
|
163
|
-
Args:
|
|
164
|
-
snippet: The snippet to add.
|
|
165
|
-
|
|
166
|
-
"""
|
|
167
|
-
await self.index_repository.add_snippet(snippet)
|
|
168
|
-
|
|
169
|
-
async def update_snippet_content(self, snippet_id: int, content: str) -> None:
|
|
170
|
-
"""Update the content of an existing snippet.
|
|
171
|
-
|
|
172
|
-
Args:
|
|
173
|
-
snippet_id: The ID of the snippet to update.
|
|
174
|
-
content: The new content for the snippet.
|
|
175
|
-
|
|
176
|
-
"""
|
|
177
|
-
await self.index_repository.update_snippet_content(snippet_id, content)
|
|
178
|
-
|
|
179
|
-
def perform_fusion(
|
|
180
|
-
self, rankings: list[list[FusionRequest]], k: float = 60
|
|
181
|
-
) -> list[FusionResult]:
|
|
182
|
-
"""Perform fusion on search results.
|
|
183
|
-
|
|
184
|
-
Args:
|
|
185
|
-
rankings: List of rankings to fuse.
|
|
186
|
-
k: Parameter for reciprocal rank fusion.
|
|
187
|
-
|
|
188
|
-
Returns:
|
|
189
|
-
Fused search results.
|
|
190
|
-
|
|
191
|
-
"""
|
|
192
|
-
return self.fusion_service.reciprocal_rank_fusion(rankings, k)
|
|
193
|
-
|
|
194
|
-
async def get_snippets_by_ids(self, ids: list[int]) -> list[SnippetWithContext]:
|
|
195
|
-
"""Get snippets by IDs.
|
|
196
|
-
|
|
197
|
-
Args:
|
|
198
|
-
ids: List of snippet IDs to retrieve.
|
|
199
|
-
|
|
200
|
-
Returns:
|
|
201
|
-
List of SnippetWithFile objects containing file and snippet information.
|
|
202
|
-
|
|
203
|
-
"""
|
|
204
|
-
return await self.index_repository.list_snippets_by_ids(ids)
|
|
@@ -1,89 +0,0 @@
|
|
|
1
|
-
"""Domain services for snippet extraction."""
|
|
2
|
-
|
|
3
|
-
from abc import ABC, abstractmethod
|
|
4
|
-
from collections.abc import Mapping
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
|
|
7
|
-
from kodit.domain.enums import SnippetExtractionStrategy
|
|
8
|
-
from kodit.domain.value_objects import SnippetExtractionRequest, SnippetExtractionResult
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class LanguageDetectionService(ABC):
|
|
12
|
-
"""Abstract interface for language detection service."""
|
|
13
|
-
|
|
14
|
-
@abstractmethod
|
|
15
|
-
async def detect_language(self, file_path: Path) -> str:
|
|
16
|
-
"""Detect the programming language of a file."""
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class SnippetExtractor(ABC):
|
|
20
|
-
"""Abstract interface for snippet extraction."""
|
|
21
|
-
|
|
22
|
-
@abstractmethod
|
|
23
|
-
async def extract(self, file_path: Path, language: str) -> list[str]:
|
|
24
|
-
"""Extract snippets from a file."""
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class SnippetExtractionService(ABC):
|
|
28
|
-
"""Domain service for extracting snippets from source code."""
|
|
29
|
-
|
|
30
|
-
@abstractmethod
|
|
31
|
-
async def extract_snippets(
|
|
32
|
-
self, request: SnippetExtractionRequest
|
|
33
|
-
) -> SnippetExtractionResult:
|
|
34
|
-
"""Extract snippets from a file using the specified strategy."""
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class SnippetExtractionDomainService:
|
|
38
|
-
"""Domain service implementation for snippet extraction business logic."""
|
|
39
|
-
|
|
40
|
-
def __init__(
|
|
41
|
-
self,
|
|
42
|
-
language_detector: LanguageDetectionService,
|
|
43
|
-
snippet_extractors: Mapping[SnippetExtractionStrategy, SnippetExtractor],
|
|
44
|
-
) -> None:
|
|
45
|
-
"""Initialize the snippet extraction domain service.
|
|
46
|
-
|
|
47
|
-
Args:
|
|
48
|
-
language_detector: Service for detecting programming languages
|
|
49
|
-
snippet_extractors: Dictionary mapping strategies to extractor
|
|
50
|
-
implementations
|
|
51
|
-
|
|
52
|
-
"""
|
|
53
|
-
self.language_detector = language_detector
|
|
54
|
-
self.snippet_extractors = snippet_extractors
|
|
55
|
-
|
|
56
|
-
async def extract_snippets(
|
|
57
|
-
self, request: SnippetExtractionRequest
|
|
58
|
-
) -> SnippetExtractionResult:
|
|
59
|
-
"""Extract snippets from a file using the specified strategy.
|
|
60
|
-
|
|
61
|
-
Args:
|
|
62
|
-
request: The snippet extraction request
|
|
63
|
-
|
|
64
|
-
Returns:
|
|
65
|
-
SnippetExtractionResult containing the extracted snippets and
|
|
66
|
-
detected language
|
|
67
|
-
|
|
68
|
-
Raises:
|
|
69
|
-
ValueError: If the file doesn't exist or strategy is unsupported
|
|
70
|
-
|
|
71
|
-
"""
|
|
72
|
-
# Domain logic: validate file exists
|
|
73
|
-
if not request.file_path.exists():
|
|
74
|
-
raise ValueError(f"File does not exist: {request.file_path}")
|
|
75
|
-
|
|
76
|
-
# Domain logic: detect language
|
|
77
|
-
language = await self.language_detector.detect_language(request.file_path)
|
|
78
|
-
|
|
79
|
-
# Domain logic: choose strategy and extractor
|
|
80
|
-
if request.strategy not in self.snippet_extractors:
|
|
81
|
-
raise ValueError(f"Unsupported extraction strategy: {request.strategy}")
|
|
82
|
-
|
|
83
|
-
extractor = self.snippet_extractors[request.strategy]
|
|
84
|
-
snippets = await extractor.extract(request.file_path, language)
|
|
85
|
-
|
|
86
|
-
# Domain logic: filter out empty snippets
|
|
87
|
-
filtered_snippets = [snippet for snippet in snippets if snippet.strip()]
|
|
88
|
-
|
|
89
|
-
return SnippetExtractionResult(snippets=filtered_snippets, language=language)
|
|
@@ -1,215 +0,0 @@
|
|
|
1
|
-
"""Domain service for snippet operations."""
|
|
2
|
-
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
from typing import Any
|
|
5
|
-
|
|
6
|
-
import structlog
|
|
7
|
-
|
|
8
|
-
from kodit.domain.entities import Snippet
|
|
9
|
-
from kodit.domain.enums import SnippetExtractionStrategy
|
|
10
|
-
from kodit.domain.interfaces import ProgressCallback
|
|
11
|
-
from kodit.domain.repositories import FileRepository, SnippetRepository
|
|
12
|
-
from kodit.domain.services.snippet_extraction_service import (
|
|
13
|
-
SnippetExtractionDomainService,
|
|
14
|
-
)
|
|
15
|
-
from kodit.domain.value_objects import (
|
|
16
|
-
MultiSearchRequest,
|
|
17
|
-
MultiSearchResult,
|
|
18
|
-
SnippetExtractionRequest,
|
|
19
|
-
SnippetWithContext,
|
|
20
|
-
)
|
|
21
|
-
from kodit.reporting import Reporter
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class SnippetDomainService:
|
|
25
|
-
"""Domain service for snippet-related operations.
|
|
26
|
-
|
|
27
|
-
This service consolidates snippet operations that were previously
|
|
28
|
-
spread between application services. It handles:
|
|
29
|
-
- Snippet extraction from files
|
|
30
|
-
- Snippet persistence
|
|
31
|
-
- Snippet querying and filtering
|
|
32
|
-
"""
|
|
33
|
-
|
|
34
|
-
def __init__(
|
|
35
|
-
self,
|
|
36
|
-
snippet_extraction_service: SnippetExtractionDomainService,
|
|
37
|
-
snippet_repository: SnippetRepository,
|
|
38
|
-
file_repository: FileRepository,
|
|
39
|
-
) -> None:
|
|
40
|
-
"""Initialize the snippet domain service.
|
|
41
|
-
|
|
42
|
-
Args:
|
|
43
|
-
snippet_extraction_service: Service for extracting snippets from files
|
|
44
|
-
snippet_repository: Repository for snippet persistence
|
|
45
|
-
file_repository: Repository for file operations
|
|
46
|
-
|
|
47
|
-
"""
|
|
48
|
-
self.snippet_extraction_service = snippet_extraction_service
|
|
49
|
-
self.snippet_repository = snippet_repository
|
|
50
|
-
self.file_repository = file_repository
|
|
51
|
-
self.log = structlog.get_logger(__name__)
|
|
52
|
-
|
|
53
|
-
async def extract_and_create_snippets(
|
|
54
|
-
self,
|
|
55
|
-
index_id: int,
|
|
56
|
-
strategy: SnippetExtractionStrategy,
|
|
57
|
-
progress_callback: ProgressCallback | None = None,
|
|
58
|
-
) -> list[Snippet]:
|
|
59
|
-
"""Extract snippets from all files in an index and persist them.
|
|
60
|
-
|
|
61
|
-
This method combines the extraction and persistence logic that was
|
|
62
|
-
previously split between domain and application services.
|
|
63
|
-
|
|
64
|
-
Args:
|
|
65
|
-
index_id: The ID of the index to create snippets for
|
|
66
|
-
strategy: The extraction strategy to use
|
|
67
|
-
progress_callback: Optional callback for progress reporting
|
|
68
|
-
|
|
69
|
-
Returns:
|
|
70
|
-
List of created Snippet entities with IDs assigned
|
|
71
|
-
|
|
72
|
-
"""
|
|
73
|
-
files = await self.file_repository.get_files_for_index(index_id)
|
|
74
|
-
created_snippets = []
|
|
75
|
-
|
|
76
|
-
reporter = Reporter(self.log, progress_callback)
|
|
77
|
-
await reporter.start(
|
|
78
|
-
"create_snippets", len(files), "Creating snippets from files..."
|
|
79
|
-
)
|
|
80
|
-
|
|
81
|
-
for i, file in enumerate(files, 1):
|
|
82
|
-
if not self._should_process_file(file):
|
|
83
|
-
continue
|
|
84
|
-
|
|
85
|
-
try:
|
|
86
|
-
# Extract snippets from file
|
|
87
|
-
request = SnippetExtractionRequest(Path(file.cloned_path), strategy)
|
|
88
|
-
result = await self.snippet_extraction_service.extract_snippets(request)
|
|
89
|
-
|
|
90
|
-
# Create and persist snippet entities
|
|
91
|
-
for snippet_content in result.snippets:
|
|
92
|
-
snippet = Snippet(
|
|
93
|
-
file_id=file.id,
|
|
94
|
-
index_id=index_id,
|
|
95
|
-
content=snippet_content,
|
|
96
|
-
summary="", # Initially empty, will be populated by enrichment
|
|
97
|
-
)
|
|
98
|
-
saved_snippet = await self.snippet_repository.save(snippet)
|
|
99
|
-
created_snippets.append(saved_snippet)
|
|
100
|
-
|
|
101
|
-
except (OSError, ValueError) as e:
|
|
102
|
-
self.log.debug(
|
|
103
|
-
"Skipping file",
|
|
104
|
-
file=file.cloned_path,
|
|
105
|
-
error=str(e),
|
|
106
|
-
)
|
|
107
|
-
continue
|
|
108
|
-
|
|
109
|
-
await reporter.step(
|
|
110
|
-
"create_snippets",
|
|
111
|
-
current=i,
|
|
112
|
-
total=len(files),
|
|
113
|
-
message=f"Processing {file.cloned_path}...",
|
|
114
|
-
)
|
|
115
|
-
|
|
116
|
-
await reporter.done("create_snippets")
|
|
117
|
-
return created_snippets
|
|
118
|
-
|
|
119
|
-
async def get_snippets_for_index(self, index_id: int) -> list[Snippet]:
|
|
120
|
-
"""Get all snippets for a specific index.
|
|
121
|
-
|
|
122
|
-
Args:
|
|
123
|
-
index_id: The ID of the index
|
|
124
|
-
|
|
125
|
-
Returns:
|
|
126
|
-
List of Snippet entities for the index
|
|
127
|
-
|
|
128
|
-
"""
|
|
129
|
-
# This delegates to the repository but provides a domain-level interface
|
|
130
|
-
return list(await self.snippet_repository.get_by_index(index_id))
|
|
131
|
-
|
|
132
|
-
async def update_snippet_summary(self, snippet_id: int, summary: str) -> None:
|
|
133
|
-
"""Update the summary of an existing snippet."""
|
|
134
|
-
# Get the snippet first to ensure it exists
|
|
135
|
-
snippet = await self.snippet_repository.get(snippet_id)
|
|
136
|
-
if not snippet:
|
|
137
|
-
msg = f"Snippet not found: {snippet_id}"
|
|
138
|
-
raise ValueError(msg)
|
|
139
|
-
|
|
140
|
-
# Update the summary
|
|
141
|
-
snippet.summary = summary
|
|
142
|
-
await self.snippet_repository.save(snippet)
|
|
143
|
-
|
|
144
|
-
async def delete_snippets_for_index(self, index_id: int) -> None:
|
|
145
|
-
"""Delete all snippets for a specific index.
|
|
146
|
-
|
|
147
|
-
Args:
|
|
148
|
-
index_id: The ID of the index
|
|
149
|
-
|
|
150
|
-
"""
|
|
151
|
-
await self.snippet_repository.delete_by_index(index_id)
|
|
152
|
-
|
|
153
|
-
async def search_snippets(
|
|
154
|
-
self, request: MultiSearchRequest
|
|
155
|
-
) -> list[SnippetWithContext]:
|
|
156
|
-
"""Search snippets with filters.
|
|
157
|
-
|
|
158
|
-
Args:
|
|
159
|
-
request: The search request containing filters
|
|
160
|
-
|
|
161
|
-
Returns:
|
|
162
|
-
List of matching snippet items with context
|
|
163
|
-
|
|
164
|
-
"""
|
|
165
|
-
return list(await self.snippet_repository.search(request))
|
|
166
|
-
|
|
167
|
-
async def list_snippets(
|
|
168
|
-
self, file_path: str | None = None, source_uri: str | None = None
|
|
169
|
-
) -> list[MultiSearchResult]:
|
|
170
|
-
"""List snippets with optional filtering.
|
|
171
|
-
|
|
172
|
-
Args:
|
|
173
|
-
file_path: Optional file path to filter by
|
|
174
|
-
source_uri: Optional source URI to filter by
|
|
175
|
-
|
|
176
|
-
Returns:
|
|
177
|
-
List of search results matching the criteria
|
|
178
|
-
|
|
179
|
-
"""
|
|
180
|
-
snippet_items = await self.snippet_repository.list_snippets(
|
|
181
|
-
file_path, source_uri
|
|
182
|
-
)
|
|
183
|
-
# Convert SnippetWithContext to MultiSearchResult for unified display format
|
|
184
|
-
return [
|
|
185
|
-
MultiSearchResult(
|
|
186
|
-
id=item.snippet.id,
|
|
187
|
-
content=item.snippet.content,
|
|
188
|
-
original_scores=[], # No scores for list operation
|
|
189
|
-
source_uri=item.source.uri,
|
|
190
|
-
relative_path=MultiSearchResult.calculate_relative_path(
|
|
191
|
-
item.file.cloned_path, item.source.cloned_path
|
|
192
|
-
),
|
|
193
|
-
language=MultiSearchResult.detect_language_from_extension(
|
|
194
|
-
item.file.extension
|
|
195
|
-
),
|
|
196
|
-
authors=[author.name for author in item.authors],
|
|
197
|
-
created_at=item.snippet.created_at,
|
|
198
|
-
summary=item.snippet.summary,
|
|
199
|
-
)
|
|
200
|
-
for item in snippet_items
|
|
201
|
-
]
|
|
202
|
-
|
|
203
|
-
def _should_process_file(self, file: Any) -> bool:
|
|
204
|
-
"""Check if a file should be processed for snippet extraction.
|
|
205
|
-
|
|
206
|
-
Args:
|
|
207
|
-
file: The file to check
|
|
208
|
-
|
|
209
|
-
Returns:
|
|
210
|
-
True if the file should be processed
|
|
211
|
-
|
|
212
|
-
"""
|
|
213
|
-
# Skip unsupported file types
|
|
214
|
-
mime_blacklist = ["unknown/unknown"]
|
|
215
|
-
return file.mime_type not in mime_blacklist
|
|
@@ -1,85 +0,0 @@
|
|
|
1
|
-
"""Source service rewritten to work directly with AsyncSession."""
|
|
2
|
-
|
|
3
|
-
from collections.abc import Callable
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
|
|
6
|
-
import structlog
|
|
7
|
-
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
|
8
|
-
|
|
9
|
-
from kodit.domain.entities import Source
|
|
10
|
-
from kodit.domain.interfaces import ProgressCallback
|
|
11
|
-
from kodit.domain.repositories import SourceRepository
|
|
12
|
-
from kodit.infrastructure.cloning.folder.factory import FolderSourceFactory
|
|
13
|
-
from kodit.infrastructure.cloning.folder.working_copy import FolderWorkingCopyProvider
|
|
14
|
-
from kodit.infrastructure.cloning.git.factory import (
|
|
15
|
-
GitSourceFactory,
|
|
16
|
-
GitWorkingCopyProvider,
|
|
17
|
-
)
|
|
18
|
-
from kodit.infrastructure.git.git_utils import is_valid_clone_target
|
|
19
|
-
from kodit.infrastructure.sqlalchemy.repository import SqlAlchemySourceRepository
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class SourceService:
|
|
23
|
-
"""Source service."""
|
|
24
|
-
|
|
25
|
-
def __init__(
|
|
26
|
-
self,
|
|
27
|
-
clone_dir: Path,
|
|
28
|
-
session_factory: async_sessionmaker[AsyncSession] | Callable[[], AsyncSession],
|
|
29
|
-
) -> None:
|
|
30
|
-
"""Initialize the source service."""
|
|
31
|
-
self.clone_dir = clone_dir
|
|
32
|
-
self._session_factory = session_factory
|
|
33
|
-
self.log = structlog.get_logger(__name__)
|
|
34
|
-
|
|
35
|
-
async def get(self, source_id: int) -> Source:
|
|
36
|
-
"""Get a source."""
|
|
37
|
-
async with self._session_factory() as session:
|
|
38
|
-
repo = SqlAlchemySourceRepository(session)
|
|
39
|
-
|
|
40
|
-
source = await repo.get(source_id)
|
|
41
|
-
if source is None:
|
|
42
|
-
raise ValueError(f"Source not found: {source_id}")
|
|
43
|
-
|
|
44
|
-
return source
|
|
45
|
-
|
|
46
|
-
async def create(
|
|
47
|
-
self, uri_or_path_like: str, progress_callback: ProgressCallback | None = None
|
|
48
|
-
) -> Source:
|
|
49
|
-
"""Create a source."""
|
|
50
|
-
async with self._session_factory() as session:
|
|
51
|
-
repo = SqlAlchemySourceRepository(session)
|
|
52
|
-
git_factory, folder_factory = self._build_factories(repo, session)
|
|
53
|
-
|
|
54
|
-
if is_valid_clone_target(uri_or_path_like):
|
|
55
|
-
source = await git_factory.create(uri_or_path_like, progress_callback)
|
|
56
|
-
elif Path(uri_or_path_like).is_dir():
|
|
57
|
-
source = await folder_factory.create(
|
|
58
|
-
uri_or_path_like, progress_callback
|
|
59
|
-
)
|
|
60
|
-
else:
|
|
61
|
-
raise ValueError(f"Unsupported source: {uri_or_path_like}")
|
|
62
|
-
|
|
63
|
-
# Factories handle their own commits now
|
|
64
|
-
return source
|
|
65
|
-
|
|
66
|
-
def _build_factories(
|
|
67
|
-
self, repository: SourceRepository, session: AsyncSession
|
|
68
|
-
) -> tuple[GitSourceFactory, FolderSourceFactory]:
|
|
69
|
-
# Git-specific collaborators
|
|
70
|
-
git_wc = GitWorkingCopyProvider(self.clone_dir)
|
|
71
|
-
git_factory = GitSourceFactory(
|
|
72
|
-
repository=repository,
|
|
73
|
-
working_copy=git_wc,
|
|
74
|
-
session=session,
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
# Folder-specific collaborators
|
|
78
|
-
fold_wc = FolderWorkingCopyProvider(self.clone_dir)
|
|
79
|
-
folder_factory = FolderSourceFactory(
|
|
80
|
-
repository=repository,
|
|
81
|
-
working_copy=fold_wc,
|
|
82
|
-
session=session,
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
return git_factory, folder_factory
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
"""Folder cloning infrastructure."""
|