kodit 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/application/__init__.py +1 -0
- kodit/application/commands/__init__.py +1 -0
- kodit/application/commands/snippet_commands.py +22 -0
- kodit/application/services/__init__.py +1 -0
- kodit/application/services/indexing_application_service.py +387 -0
- kodit/application/services/snippet_application_service.py +149 -0
- kodit/cli.py +118 -82
- kodit/database.py +0 -22
- kodit/domain/__init__.py +1 -0
- kodit/{source/source_models.py → domain/entities.py} +88 -19
- kodit/domain/enums.py +9 -0
- kodit/domain/errors.py +5 -0
- kodit/domain/interfaces.py +27 -0
- kodit/domain/repositories.py +95 -0
- kodit/domain/services/__init__.py +1 -0
- kodit/domain/services/bm25_service.py +124 -0
- kodit/domain/services/embedding_service.py +155 -0
- kodit/domain/services/enrichment_service.py +48 -0
- kodit/domain/services/ignore_service.py +45 -0
- kodit/domain/services/indexing_service.py +203 -0
- kodit/domain/services/snippet_extraction_service.py +89 -0
- kodit/domain/services/source_service.py +85 -0
- kodit/domain/value_objects.py +215 -0
- kodit/infrastructure/__init__.py +1 -0
- kodit/infrastructure/bm25/__init__.py +1 -0
- kodit/infrastructure/bm25/bm25_factory.py +28 -0
- kodit/{bm25/local_bm25.py → infrastructure/bm25/local_bm25_repository.py} +33 -22
- kodit/{bm25/vectorchord_bm25.py → infrastructure/bm25/vectorchord_bm25_repository.py} +40 -35
- kodit/infrastructure/cloning/__init__.py +1 -0
- kodit/infrastructure/cloning/folder/__init__.py +1 -0
- kodit/infrastructure/cloning/folder/factory.py +128 -0
- kodit/infrastructure/cloning/folder/working_copy.py +38 -0
- kodit/infrastructure/cloning/git/__init__.py +1 -0
- kodit/infrastructure/cloning/git/factory.py +147 -0
- kodit/infrastructure/cloning/git/working_copy.py +32 -0
- kodit/infrastructure/cloning/metadata.py +127 -0
- kodit/infrastructure/embedding/__init__.py +1 -0
- kodit/infrastructure/embedding/embedding_factory.py +87 -0
- kodit/infrastructure/embedding/embedding_providers/__init__.py +1 -0
- kodit/infrastructure/embedding/embedding_providers/batching.py +93 -0
- kodit/infrastructure/embedding/embedding_providers/hash_embedding_provider.py +79 -0
- kodit/infrastructure/embedding/embedding_providers/local_embedding_provider.py +129 -0
- kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +113 -0
- kodit/infrastructure/embedding/local_vector_search_repository.py +114 -0
- kodit/{embedding/vectorchord_vector_search_service.py → infrastructure/embedding/vectorchord_vector_search_repository.py} +65 -46
- kodit/infrastructure/enrichment/__init__.py +1 -0
- kodit/{enrichment → infrastructure/enrichment}/enrichment_factory.py +28 -12
- kodit/infrastructure/enrichment/legacy_enrichment_models.py +42 -0
- kodit/{enrichment/enrichment_provider → infrastructure/enrichment}/local_enrichment_provider.py +38 -26
- kodit/infrastructure/enrichment/null_enrichment_provider.py +25 -0
- kodit/infrastructure/enrichment/openai_enrichment_provider.py +89 -0
- kodit/infrastructure/git/__init__.py +1 -0
- kodit/{source/git.py → infrastructure/git/git_utils.py} +10 -2
- kodit/infrastructure/ignore/__init__.py +1 -0
- kodit/{source/ignore.py → infrastructure/ignore/ignore_pattern_provider.py} +23 -6
- kodit/infrastructure/indexing/__init__.py +1 -0
- kodit/infrastructure/indexing/fusion_service.py +55 -0
- kodit/infrastructure/indexing/index_repository.py +291 -0
- kodit/infrastructure/indexing/indexing_factory.py +113 -0
- kodit/infrastructure/snippet_extraction/__init__.py +1 -0
- kodit/infrastructure/snippet_extraction/language_detection_service.py +39 -0
- kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +95 -0
- kodit/infrastructure/snippet_extraction/snippet_query_provider.py +45 -0
- kodit/{snippets/method_snippets.py → infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py} +123 -61
- kodit/infrastructure/sqlalchemy/__init__.py +1 -0
- kodit/{embedding → infrastructure/sqlalchemy}/embedding_repository.py +40 -26
- kodit/infrastructure/sqlalchemy/file_repository.py +78 -0
- kodit/infrastructure/sqlalchemy/repository.py +133 -0
- kodit/infrastructure/sqlalchemy/snippet_repository.py +79 -0
- kodit/infrastructure/ui/__init__.py +1 -0
- kodit/infrastructure/ui/progress.py +127 -0
- kodit/{util → infrastructure/ui}/spinner.py +19 -4
- kodit/mcp.py +51 -28
- kodit/migrations/env.py +1 -4
- kodit/reporting.py +78 -0
- {kodit-0.2.4.dist-info → kodit-0.2.6.dist-info}/METADATA +1 -1
- kodit-0.2.6.dist-info/RECORD +100 -0
- kodit/bm25/__init__.py +0 -1
- kodit/bm25/keyword_search_factory.py +0 -17
- kodit/bm25/keyword_search_service.py +0 -34
- kodit/embedding/__init__.py +0 -1
- kodit/embedding/embedding_factory.py +0 -69
- kodit/embedding/embedding_models.py +0 -28
- kodit/embedding/embedding_provider/__init__.py +0 -1
- kodit/embedding/embedding_provider/embedding_provider.py +0 -92
- kodit/embedding/embedding_provider/hash_embedding_provider.py +0 -86
- kodit/embedding/embedding_provider/local_embedding_provider.py +0 -96
- kodit/embedding/embedding_provider/openai_embedding_provider.py +0 -73
- kodit/embedding/local_vector_search_service.py +0 -87
- kodit/embedding/vector_search_service.py +0 -55
- kodit/enrichment/__init__.py +0 -1
- kodit/enrichment/enrichment_provider/__init__.py +0 -1
- kodit/enrichment/enrichment_provider/enrichment_provider.py +0 -36
- kodit/enrichment/enrichment_provider/openai_enrichment_provider.py +0 -79
- kodit/enrichment/enrichment_service.py +0 -45
- kodit/indexing/__init__.py +0 -1
- kodit/indexing/fusion.py +0 -67
- kodit/indexing/indexing_models.py +0 -43
- kodit/indexing/indexing_repository.py +0 -216
- kodit/indexing/indexing_service.py +0 -344
- kodit/snippets/__init__.py +0 -1
- kodit/snippets/languages/__init__.py +0 -53
- kodit/snippets/snippets.py +0 -50
- kodit/source/__init__.py +0 -1
- kodit/source/source_factories.py +0 -356
- kodit/source/source_repository.py +0 -169
- kodit/source/source_service.py +0 -150
- kodit/util/__init__.py +0 -1
- kodit-0.2.4.dist-info/RECORD +0 -71
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/csharp.scm +0 -0
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/go.scm +0 -0
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/javascript.scm +0 -0
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/python.scm +0 -0
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/typescript.scm +0 -0
- {kodit-0.2.4.dist-info → kodit-0.2.6.dist-info}/WHEEL +0 -0
- {kodit-0.2.4.dist-info → kodit-0.2.6.dist-info}/entry_points.txt +0 -0
- {kodit-0.2.4.dist-info → kodit-0.2.6.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""Domain services for BM25 operations."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from collections.abc import Sequence
|
|
5
|
+
|
|
6
|
+
from kodit.domain.value_objects import (
|
|
7
|
+
BM25DeleteRequest,
|
|
8
|
+
BM25IndexRequest,
|
|
9
|
+
BM25SearchRequest,
|
|
10
|
+
BM25SearchResult,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BM25Repository(ABC):
|
|
15
|
+
"""Abstract interface for BM25 repository."""
|
|
16
|
+
|
|
17
|
+
@abstractmethod
|
|
18
|
+
async def index_documents(self, request: BM25IndexRequest) -> None:
|
|
19
|
+
"""Index documents for BM25 search."""
|
|
20
|
+
|
|
21
|
+
@abstractmethod
|
|
22
|
+
async def search(self, request: BM25SearchRequest) -> Sequence[BM25SearchResult]:
|
|
23
|
+
"""Search documents using BM25."""
|
|
24
|
+
|
|
25
|
+
@abstractmethod
|
|
26
|
+
async def delete_documents(self, request: BM25DeleteRequest) -> None:
|
|
27
|
+
"""Delete documents from the BM25 index."""
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class BM25DomainService:
|
|
31
|
+
"""Domain service for BM25 operations."""
|
|
32
|
+
|
|
33
|
+
def __init__(self, repository: BM25Repository) -> None:
|
|
34
|
+
"""Initialize the BM25 domain service.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
repository: The BM25 repository for persistence operations
|
|
38
|
+
|
|
39
|
+
"""
|
|
40
|
+
self.repository = repository
|
|
41
|
+
|
|
42
|
+
async def index_documents(self, request: BM25IndexRequest) -> None:
|
|
43
|
+
"""Index documents using domain business rules.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
request: The indexing request containing documents to index
|
|
47
|
+
|
|
48
|
+
Raises:
|
|
49
|
+
ValueError: If the request is invalid
|
|
50
|
+
|
|
51
|
+
"""
|
|
52
|
+
# Domain logic: validate request
|
|
53
|
+
if not request.documents:
|
|
54
|
+
raise ValueError("Cannot index empty document list")
|
|
55
|
+
|
|
56
|
+
# Domain logic: filter out invalid documents
|
|
57
|
+
valid_documents = [
|
|
58
|
+
doc
|
|
59
|
+
for doc in request.documents
|
|
60
|
+
if doc.snippet_id is not None and doc.text and doc.text.strip()
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
if not valid_documents:
|
|
64
|
+
raise ValueError("No valid documents to index")
|
|
65
|
+
|
|
66
|
+
# Domain logic: create new request with validated documents
|
|
67
|
+
validated_request = BM25IndexRequest(documents=valid_documents)
|
|
68
|
+
await self.repository.index_documents(validated_request)
|
|
69
|
+
|
|
70
|
+
async def search(self, request: BM25SearchRequest) -> Sequence[BM25SearchResult]:
|
|
71
|
+
"""Search documents using domain business rules.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
request: The search request
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Sequence of search results
|
|
78
|
+
|
|
79
|
+
Raises:
|
|
80
|
+
ValueError: If the request is invalid
|
|
81
|
+
|
|
82
|
+
"""
|
|
83
|
+
# Domain logic: validate request
|
|
84
|
+
if not request.query or not request.query.strip():
|
|
85
|
+
raise ValueError("Search query cannot be empty")
|
|
86
|
+
|
|
87
|
+
if request.top_k <= 0:
|
|
88
|
+
raise ValueError("Top-k must be positive")
|
|
89
|
+
|
|
90
|
+
# Domain logic: normalize query
|
|
91
|
+
normalized_query = request.query.strip()
|
|
92
|
+
normalized_request = BM25SearchRequest(
|
|
93
|
+
query=normalized_query, top_k=request.top_k
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return await self.repository.search(normalized_request)
|
|
97
|
+
|
|
98
|
+
async def delete_documents(self, request: BM25DeleteRequest) -> None:
|
|
99
|
+
"""Delete documents using domain business rules.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
request: The deletion request
|
|
103
|
+
|
|
104
|
+
Raises:
|
|
105
|
+
ValueError: If the request is invalid
|
|
106
|
+
|
|
107
|
+
"""
|
|
108
|
+
# Domain logic: validate request
|
|
109
|
+
if not request.snippet_ids:
|
|
110
|
+
raise ValueError("Cannot delete empty snippet ID list")
|
|
111
|
+
|
|
112
|
+
# Domain logic: filter out invalid IDs
|
|
113
|
+
valid_ids = [
|
|
114
|
+
snippet_id
|
|
115
|
+
for snippet_id in request.snippet_ids
|
|
116
|
+
if snippet_id is not None and snippet_id > 0
|
|
117
|
+
]
|
|
118
|
+
|
|
119
|
+
if not valid_ids:
|
|
120
|
+
raise ValueError("No valid snippet IDs to delete")
|
|
121
|
+
|
|
122
|
+
# Domain logic: create new request with validated IDs
|
|
123
|
+
validated_request = BM25DeleteRequest(snippet_ids=valid_ids)
|
|
124
|
+
await self.repository.delete_documents(validated_request)
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""Domain services for embedding operations."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from collections.abc import AsyncGenerator, Sequence
|
|
5
|
+
|
|
6
|
+
from kodit.domain.entities import EmbeddingType
|
|
7
|
+
from kodit.domain.value_objects import (
|
|
8
|
+
EmbeddingRequest,
|
|
9
|
+
EmbeddingResponse,
|
|
10
|
+
IndexResult,
|
|
11
|
+
VectorIndexRequest,
|
|
12
|
+
VectorSearchQueryRequest,
|
|
13
|
+
VectorSearchResult,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class EmbeddingProvider(ABC):
|
|
18
|
+
"""Abstract interface for embedding provider."""
|
|
19
|
+
|
|
20
|
+
@abstractmethod
|
|
21
|
+
def embed(
|
|
22
|
+
self, data: list[EmbeddingRequest]
|
|
23
|
+
) -> AsyncGenerator[list[EmbeddingResponse], None]:
|
|
24
|
+
"""Embed a list of strings."""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class VectorSearchRepository(ABC):
|
|
28
|
+
"""Abstract interface for vector search repository."""
|
|
29
|
+
|
|
30
|
+
@abstractmethod
|
|
31
|
+
def index_documents(
|
|
32
|
+
self, request: VectorIndexRequest
|
|
33
|
+
) -> AsyncGenerator[list[IndexResult], None]:
|
|
34
|
+
"""Index documents for vector search."""
|
|
35
|
+
|
|
36
|
+
@abstractmethod
|
|
37
|
+
async def search(
|
|
38
|
+
self, request: VectorSearchQueryRequest
|
|
39
|
+
) -> Sequence[VectorSearchResult]:
|
|
40
|
+
"""Search documents using vector similarity."""
|
|
41
|
+
|
|
42
|
+
@abstractmethod
|
|
43
|
+
async def has_embedding(
|
|
44
|
+
self, snippet_id: int, embedding_type: EmbeddingType
|
|
45
|
+
) -> bool:
|
|
46
|
+
"""Check if a snippet has an embedding."""
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class EmbeddingDomainService:
|
|
50
|
+
"""Domain service for embedding operations."""
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
embedding_provider: EmbeddingProvider,
|
|
55
|
+
vector_search_repository: VectorSearchRepository,
|
|
56
|
+
) -> None:
|
|
57
|
+
"""Initialize the embedding domain service.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
embedding_provider: The embedding provider for generating embeddings
|
|
61
|
+
vector_search_repository: The vector search repository for persistence
|
|
62
|
+
|
|
63
|
+
"""
|
|
64
|
+
self.embedding_provider = embedding_provider
|
|
65
|
+
self.vector_search_repository = vector_search_repository
|
|
66
|
+
|
|
67
|
+
async def index_documents(
|
|
68
|
+
self, request: VectorIndexRequest
|
|
69
|
+
) -> AsyncGenerator[list[IndexResult], None]:
|
|
70
|
+
"""Index documents using domain business rules.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
request: The indexing request containing documents to index
|
|
74
|
+
|
|
75
|
+
Yields:
|
|
76
|
+
Lists of IndexResult for each batch processed
|
|
77
|
+
|
|
78
|
+
Raises:
|
|
79
|
+
ValueError: If the request is invalid
|
|
80
|
+
|
|
81
|
+
"""
|
|
82
|
+
# Domain logic: validate request
|
|
83
|
+
if not request.documents:
|
|
84
|
+
return
|
|
85
|
+
|
|
86
|
+
# Domain logic: filter out invalid documents
|
|
87
|
+
valid_documents = [
|
|
88
|
+
doc
|
|
89
|
+
for doc in request.documents
|
|
90
|
+
if doc.snippet_id is not None and doc.text and doc.text.strip()
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
if not valid_documents:
|
|
94
|
+
return
|
|
95
|
+
|
|
96
|
+
# Domain logic: create new request with validated documents
|
|
97
|
+
validated_request = VectorIndexRequest(documents=valid_documents)
|
|
98
|
+
async for result in self.vector_search_repository.index_documents(
|
|
99
|
+
validated_request
|
|
100
|
+
):
|
|
101
|
+
yield result
|
|
102
|
+
|
|
103
|
+
async def search(
|
|
104
|
+
self, request: VectorSearchQueryRequest
|
|
105
|
+
) -> Sequence[VectorSearchResult]:
|
|
106
|
+
"""Search documents using domain business rules.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
request: The search request
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
Sequence of search results
|
|
113
|
+
|
|
114
|
+
Raises:
|
|
115
|
+
ValueError: If the request is invalid
|
|
116
|
+
|
|
117
|
+
"""
|
|
118
|
+
# Domain logic: validate request
|
|
119
|
+
if not request.query or not request.query.strip():
|
|
120
|
+
raise ValueError("Search query cannot be empty")
|
|
121
|
+
|
|
122
|
+
if request.top_k <= 0:
|
|
123
|
+
raise ValueError("Top-k must be positive")
|
|
124
|
+
|
|
125
|
+
# Domain logic: normalize query
|
|
126
|
+
normalized_query = request.query.strip()
|
|
127
|
+
normalized_request = VectorSearchQueryRequest(
|
|
128
|
+
query=normalized_query, top_k=request.top_k
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
return await self.vector_search_repository.search(normalized_request)
|
|
132
|
+
|
|
133
|
+
async def has_embedding(
|
|
134
|
+
self, snippet_id: int, embedding_type: EmbeddingType
|
|
135
|
+
) -> bool:
|
|
136
|
+
"""Check if a snippet has an embedding using domain business rules.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
snippet_id: The snippet ID to check
|
|
140
|
+
embedding_type: The type of embedding to check
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
True if the snippet has an embedding, False otherwise
|
|
144
|
+
|
|
145
|
+
Raises:
|
|
146
|
+
ValueError: If the snippet_id is invalid
|
|
147
|
+
|
|
148
|
+
"""
|
|
149
|
+
# Domain logic: validate snippet_id
|
|
150
|
+
if snippet_id is None or snippet_id <= 0:
|
|
151
|
+
raise ValueError("Snippet ID must be positive")
|
|
152
|
+
|
|
153
|
+
return await self.vector_search_repository.has_embedding(
|
|
154
|
+
snippet_id, embedding_type
|
|
155
|
+
)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Domain service for enrichment operations."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from collections.abc import AsyncGenerator
|
|
5
|
+
|
|
6
|
+
from kodit.domain.value_objects import (
|
|
7
|
+
EnrichmentIndexRequest,
|
|
8
|
+
EnrichmentRequest,
|
|
9
|
+
EnrichmentResponse,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class EnrichmentProvider(ABC):
|
|
14
|
+
"""Abstract enrichment provider interface."""
|
|
15
|
+
|
|
16
|
+
@abstractmethod
|
|
17
|
+
def enrich(
|
|
18
|
+
self, requests: list[EnrichmentRequest]
|
|
19
|
+
) -> AsyncGenerator[EnrichmentResponse, None]:
|
|
20
|
+
"""Enrich a list of requests."""
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class EnrichmentDomainService:
|
|
24
|
+
"""Domain service for enrichment operations."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, enrichment_provider: EnrichmentProvider) -> None:
|
|
27
|
+
"""Initialize the enrichment domain service.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
enrichment_provider: The enrichment provider to use.
|
|
31
|
+
|
|
32
|
+
"""
|
|
33
|
+
self.enrichment_provider = enrichment_provider
|
|
34
|
+
|
|
35
|
+
async def enrich_documents(
|
|
36
|
+
self, request: EnrichmentIndexRequest
|
|
37
|
+
) -> AsyncGenerator[EnrichmentResponse, None]:
|
|
38
|
+
"""Enrich documents using the enrichment provider.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
request: The enrichment index request.
|
|
42
|
+
|
|
43
|
+
Yields:
|
|
44
|
+
Enrichment responses as they are processed.
|
|
45
|
+
|
|
46
|
+
"""
|
|
47
|
+
async for response in self.enrichment_provider.enrich(request.requests):
|
|
48
|
+
yield response
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Domain service for ignore patterns."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class IgnorePatternProvider(ABC):
|
|
8
|
+
"""Abstract interface for ignore pattern providers."""
|
|
9
|
+
|
|
10
|
+
@abstractmethod
|
|
11
|
+
def should_ignore(self, path: Path) -> bool:
|
|
12
|
+
"""Check if a path should be ignored.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
path: The path to check.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
True if the path should be ignored, False otherwise.
|
|
19
|
+
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class IgnoreService:
|
|
24
|
+
"""Domain service for managing ignore patterns."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, ignore_pattern_provider: IgnorePatternProvider) -> None:
|
|
27
|
+
"""Initialize the ignore service.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
ignore_pattern_provider: The ignore pattern provider to use.
|
|
31
|
+
|
|
32
|
+
"""
|
|
33
|
+
self.ignore_pattern_provider = ignore_pattern_provider
|
|
34
|
+
|
|
35
|
+
def should_ignore(self, path: Path) -> bool:
|
|
36
|
+
"""Check if a path should be ignored.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
path: The path to check.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
True if the path should be ignored, False otherwise.
|
|
43
|
+
|
|
44
|
+
"""
|
|
45
|
+
return self.ignore_pattern_provider.should_ignore(path)
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
"""Domain service for indexing operations."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
|
|
5
|
+
from kodit.domain.entities import Snippet
|
|
6
|
+
from kodit.domain.value_objects import (
|
|
7
|
+
FusionRequest,
|
|
8
|
+
FusionResult,
|
|
9
|
+
IndexCreateRequest,
|
|
10
|
+
IndexView,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class IndexRepository(ABC):
|
|
15
|
+
"""Abstract index repository interface."""
|
|
16
|
+
|
|
17
|
+
@abstractmethod
|
|
18
|
+
async def create_index(self, source_id: int) -> IndexView:
|
|
19
|
+
"""Create a new index for a source."""
|
|
20
|
+
|
|
21
|
+
@abstractmethod
|
|
22
|
+
async def get_index_by_id(self, index_id: int) -> IndexView | None:
|
|
23
|
+
"""Get an index by its ID."""
|
|
24
|
+
|
|
25
|
+
@abstractmethod
|
|
26
|
+
async def get_index_by_source_id(self, source_id: int) -> IndexView | None:
|
|
27
|
+
"""Get an index by its source ID."""
|
|
28
|
+
|
|
29
|
+
@abstractmethod
|
|
30
|
+
async def list_indexes(self) -> list[IndexView]:
|
|
31
|
+
"""List all indexes."""
|
|
32
|
+
|
|
33
|
+
@abstractmethod
|
|
34
|
+
async def update_index_timestamp(self, index_id: int) -> None:
|
|
35
|
+
"""Update the timestamp of an index."""
|
|
36
|
+
|
|
37
|
+
@abstractmethod
|
|
38
|
+
async def delete_all_snippets(self, index_id: int) -> None:
|
|
39
|
+
"""Delete all snippets for an index."""
|
|
40
|
+
|
|
41
|
+
@abstractmethod
|
|
42
|
+
async def get_snippets_for_index(self, index_id: int) -> list[Snippet]:
|
|
43
|
+
"""Get all snippets for an index."""
|
|
44
|
+
|
|
45
|
+
@abstractmethod
|
|
46
|
+
async def add_snippet(self, snippet: dict) -> None:
|
|
47
|
+
"""Add a snippet to the database."""
|
|
48
|
+
|
|
49
|
+
@abstractmethod
|
|
50
|
+
async def update_snippet_content(self, snippet_id: int, content: str) -> None:
|
|
51
|
+
"""Update the content of an existing snippet."""
|
|
52
|
+
|
|
53
|
+
@abstractmethod
|
|
54
|
+
async def list_snippets_by_ids(self, ids: list[int]) -> list[tuple[dict, dict]]:
|
|
55
|
+
"""List snippets by IDs."""
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class FusionService(ABC):
|
|
59
|
+
"""Abstract fusion service interface."""
|
|
60
|
+
|
|
61
|
+
@abstractmethod
|
|
62
|
+
def reciprocal_rank_fusion(
|
|
63
|
+
self, rankings: list[list[FusionRequest]], k: float = 60
|
|
64
|
+
) -> list[FusionResult]:
|
|
65
|
+
"""Perform reciprocal rank fusion on search results."""
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class IndexingDomainService:
|
|
69
|
+
"""Domain service for indexing operations."""
|
|
70
|
+
|
|
71
|
+
def __init__(
|
|
72
|
+
self, index_repository: IndexRepository, fusion_service: FusionService
|
|
73
|
+
) -> None:
|
|
74
|
+
"""Initialize the indexing domain service.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
index_repository: Repository for index operations
|
|
78
|
+
fusion_service: Service for result fusion
|
|
79
|
+
|
|
80
|
+
"""
|
|
81
|
+
self.index_repository = index_repository
|
|
82
|
+
self.fusion_service = fusion_service
|
|
83
|
+
|
|
84
|
+
async def create_index(self, request: IndexCreateRequest) -> IndexView:
|
|
85
|
+
"""Create a new index.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
request: The index create request.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
The created index view.
|
|
92
|
+
|
|
93
|
+
"""
|
|
94
|
+
return await self.index_repository.create_index(request.source_id)
|
|
95
|
+
|
|
96
|
+
async def get_index(self, index_id: int) -> IndexView | None:
|
|
97
|
+
"""Get an index by its ID.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
index_id: The ID of the index to retrieve.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
The index view if found, None otherwise.
|
|
104
|
+
|
|
105
|
+
"""
|
|
106
|
+
return await self.index_repository.get_index_by_id(index_id)
|
|
107
|
+
|
|
108
|
+
async def get_index_by_source_id(self, source_id: int) -> IndexView | None:
|
|
109
|
+
"""Get an index by its source ID.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
source_id: The ID of the source to retrieve an index for.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
The index view if found, None otherwise.
|
|
116
|
+
|
|
117
|
+
"""
|
|
118
|
+
return await self.index_repository.get_index_by_source_id(source_id)
|
|
119
|
+
|
|
120
|
+
async def list_indexes(self) -> list[IndexView]:
|
|
121
|
+
"""List all indexes.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
A list of index views.
|
|
125
|
+
|
|
126
|
+
"""
|
|
127
|
+
return await self.index_repository.list_indexes()
|
|
128
|
+
|
|
129
|
+
async def update_index_timestamp(self, index_id: int) -> None:
|
|
130
|
+
"""Update the timestamp of an index.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
index_id: The ID of the index to update.
|
|
134
|
+
|
|
135
|
+
"""
|
|
136
|
+
await self.index_repository.update_index_timestamp(index_id)
|
|
137
|
+
|
|
138
|
+
async def delete_all_snippets(self, index_id: int) -> None:
|
|
139
|
+
"""Delete all snippets for an index.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
index_id: The ID of the index to delete snippets for.
|
|
143
|
+
|
|
144
|
+
"""
|
|
145
|
+
await self.index_repository.delete_all_snippets(index_id)
|
|
146
|
+
|
|
147
|
+
async def get_snippets_for_index(self, index_id: int) -> list[Snippet]:
|
|
148
|
+
"""Get all snippets for an index.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
index_id: The ID of the index to get snippets for.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
A list of Snippet entities.
|
|
155
|
+
|
|
156
|
+
"""
|
|
157
|
+
return await self.index_repository.get_snippets_for_index(index_id)
|
|
158
|
+
|
|
159
|
+
async def add_snippet(self, snippet: dict) -> None:
|
|
160
|
+
"""Add a snippet to the database.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
snippet: The snippet to add.
|
|
164
|
+
|
|
165
|
+
"""
|
|
166
|
+
await self.index_repository.add_snippet(snippet)
|
|
167
|
+
|
|
168
|
+
async def update_snippet_content(self, snippet_id: int, content: str) -> None:
|
|
169
|
+
"""Update the content of an existing snippet.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
snippet_id: The ID of the snippet to update.
|
|
173
|
+
content: The new content for the snippet.
|
|
174
|
+
|
|
175
|
+
"""
|
|
176
|
+
await self.index_repository.update_snippet_content(snippet_id, content)
|
|
177
|
+
|
|
178
|
+
def perform_fusion(
|
|
179
|
+
self, rankings: list[list[FusionRequest]], k: float = 60
|
|
180
|
+
) -> list[FusionResult]:
|
|
181
|
+
"""Perform fusion on search results.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
rankings: List of rankings to fuse.
|
|
185
|
+
k: Parameter for reciprocal rank fusion.
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Fused search results.
|
|
189
|
+
|
|
190
|
+
"""
|
|
191
|
+
return self.fusion_service.reciprocal_rank_fusion(rankings, k)
|
|
192
|
+
|
|
193
|
+
async def get_snippets_by_ids(self, ids: list[int]) -> list[tuple[dict, dict]]:
|
|
194
|
+
"""Get snippets by IDs.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
ids: List of snippet IDs to retrieve.
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
List of (file, snippet) tuples.
|
|
201
|
+
|
|
202
|
+
"""
|
|
203
|
+
return await self.index_repository.list_snippets_by_ids(ids)
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Domain services for snippet extraction."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from collections.abc import Mapping
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from kodit.domain.enums import SnippetExtractionStrategy
|
|
8
|
+
from kodit.domain.value_objects import SnippetExtractionRequest, SnippetExtractionResult
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class LanguageDetectionService(ABC):
|
|
12
|
+
"""Abstract interface for language detection service."""
|
|
13
|
+
|
|
14
|
+
@abstractmethod
|
|
15
|
+
async def detect_language(self, file_path: Path) -> str:
|
|
16
|
+
"""Detect the programming language of a file."""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SnippetExtractor(ABC):
|
|
20
|
+
"""Abstract interface for snippet extraction."""
|
|
21
|
+
|
|
22
|
+
@abstractmethod
|
|
23
|
+
async def extract(self, file_path: Path, language: str) -> list[str]:
|
|
24
|
+
"""Extract snippets from a file."""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class SnippetExtractionService(ABC):
|
|
28
|
+
"""Domain service for extracting snippets from source code."""
|
|
29
|
+
|
|
30
|
+
@abstractmethod
|
|
31
|
+
async def extract_snippets(
|
|
32
|
+
self, request: SnippetExtractionRequest
|
|
33
|
+
) -> SnippetExtractionResult:
|
|
34
|
+
"""Extract snippets from a file using the specified strategy."""
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class SnippetExtractionDomainService:
|
|
38
|
+
"""Domain service implementation for snippet extraction business logic."""
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
language_detector: LanguageDetectionService,
|
|
43
|
+
snippet_extractors: Mapping[SnippetExtractionStrategy, SnippetExtractor],
|
|
44
|
+
) -> None:
|
|
45
|
+
"""Initialize the snippet extraction domain service.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
language_detector: Service for detecting programming languages
|
|
49
|
+
snippet_extractors: Dictionary mapping strategies to extractor
|
|
50
|
+
implementations
|
|
51
|
+
|
|
52
|
+
"""
|
|
53
|
+
self.language_detector = language_detector
|
|
54
|
+
self.snippet_extractors = snippet_extractors
|
|
55
|
+
|
|
56
|
+
async def extract_snippets(
|
|
57
|
+
self, request: SnippetExtractionRequest
|
|
58
|
+
) -> SnippetExtractionResult:
|
|
59
|
+
"""Extract snippets from a file using the specified strategy.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
request: The snippet extraction request
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
SnippetExtractionResult containing the extracted snippets and
|
|
66
|
+
detected language
|
|
67
|
+
|
|
68
|
+
Raises:
|
|
69
|
+
ValueError: If the file doesn't exist or strategy is unsupported
|
|
70
|
+
|
|
71
|
+
"""
|
|
72
|
+
# Domain logic: validate file exists
|
|
73
|
+
if not request.file_path.exists():
|
|
74
|
+
raise ValueError(f"File does not exist: {request.file_path}")
|
|
75
|
+
|
|
76
|
+
# Domain logic: detect language
|
|
77
|
+
language = await self.language_detector.detect_language(request.file_path)
|
|
78
|
+
|
|
79
|
+
# Domain logic: choose strategy and extractor
|
|
80
|
+
if request.strategy not in self.snippet_extractors:
|
|
81
|
+
raise ValueError(f"Unsupported extraction strategy: {request.strategy}")
|
|
82
|
+
|
|
83
|
+
extractor = self.snippet_extractors[request.strategy]
|
|
84
|
+
snippets = await extractor.extract(request.file_path, language)
|
|
85
|
+
|
|
86
|
+
# Domain logic: filter out empty snippets
|
|
87
|
+
filtered_snippets = [snippet for snippet in snippets if snippet.strip()]
|
|
88
|
+
|
|
89
|
+
return SnippetExtractionResult(snippets=filtered_snippets, language=language)
|