kodit 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/application/factories/__init__.py +1 -0
- kodit/application/factories/code_indexing_factory.py +119 -0
- kodit/application/services/{indexing_application_service.py → code_indexing_application_service.py} +159 -198
- kodit/cli.py +199 -62
- kodit/domain/entities.py +7 -5
- kodit/domain/repositories.py +33 -0
- kodit/domain/services/bm25_service.py +14 -17
- kodit/domain/services/embedding_service.py +10 -14
- kodit/domain/services/snippet_service.py +198 -0
- kodit/domain/value_objects.py +301 -21
- kodit/infrastructure/bm25/local_bm25_repository.py +20 -12
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +31 -11
- kodit/infrastructure/cloning/git/working_copy.py +5 -2
- kodit/infrastructure/cloning/metadata.py +1 -0
- kodit/infrastructure/embedding/embedding_providers/hash_embedding_provider.py +14 -25
- kodit/infrastructure/embedding/local_vector_search_repository.py +26 -38
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +50 -35
- kodit/infrastructure/enrichment/enrichment_factory.py +1 -1
- kodit/infrastructure/indexing/indexing_factory.py +8 -91
- kodit/infrastructure/indexing/snippet_domain_service_factory.py +37 -0
- kodit/infrastructure/snippet_extraction/languages/java.scm +12 -0
- kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +3 -31
- kodit/infrastructure/sqlalchemy/embedding_repository.py +14 -3
- kodit/infrastructure/sqlalchemy/snippet_repository.py +174 -2
- kodit/mcp.py +61 -49
- {kodit-0.2.7.dist-info → kodit-0.2.9.dist-info}/METADATA +1 -1
- {kodit-0.2.7.dist-info → kodit-0.2.9.dist-info}/RECORD +31 -30
- kodit/application/commands/__init__.py +0 -1
- kodit/application/commands/snippet_commands.py +0 -22
- kodit/application/services/snippet_application_service.py +0 -149
- kodit/infrastructure/enrichment/legacy_enrichment_models.py +0 -42
- {kodit-0.2.7.dist-info → kodit-0.2.9.dist-info}/WHEEL +0 -0
- {kodit-0.2.7.dist-info → kodit-0.2.9.dist-info}/entry_points.txt +0 -0
- {kodit-0.2.7.dist-info → kodit-0.2.9.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,149 +0,0 @@
|
|
|
1
|
-
"""Application service for snippet operations."""
|
|
2
|
-
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
from typing import Any
|
|
5
|
-
|
|
6
|
-
import structlog
|
|
7
|
-
from sqlalchemy.ext.asyncio import AsyncSession
|
|
8
|
-
|
|
9
|
-
from kodit.application.commands.snippet_commands import (
|
|
10
|
-
CreateIndexSnippetsCommand,
|
|
11
|
-
ExtractSnippetsCommand,
|
|
12
|
-
)
|
|
13
|
-
from kodit.domain.entities import Snippet
|
|
14
|
-
from kodit.domain.enums import SnippetExtractionStrategy
|
|
15
|
-
from kodit.domain.interfaces import ProgressCallback
|
|
16
|
-
from kodit.domain.repositories import FileRepository, SnippetRepository
|
|
17
|
-
from kodit.domain.services.snippet_extraction_service import (
|
|
18
|
-
SnippetExtractionDomainService,
|
|
19
|
-
)
|
|
20
|
-
from kodit.domain.value_objects import SnippetExtractionRequest
|
|
21
|
-
from kodit.reporting import Reporter
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class SnippetApplicationService:
|
|
25
|
-
"""Application service for snippet operations."""
|
|
26
|
-
|
|
27
|
-
def __init__(
|
|
28
|
-
self,
|
|
29
|
-
snippet_extraction_service: SnippetExtractionDomainService,
|
|
30
|
-
snippet_repository: SnippetRepository,
|
|
31
|
-
file_repository: FileRepository,
|
|
32
|
-
session: AsyncSession,
|
|
33
|
-
) -> None:
|
|
34
|
-
"""Initialize the snippet application service.
|
|
35
|
-
|
|
36
|
-
Args:
|
|
37
|
-
snippet_extraction_service: Domain service for snippet extraction
|
|
38
|
-
snippet_repository: Repository for snippet persistence
|
|
39
|
-
file_repository: Repository for file operations
|
|
40
|
-
session: The database session for transaction management
|
|
41
|
-
|
|
42
|
-
"""
|
|
43
|
-
self.snippet_extraction_service = snippet_extraction_service
|
|
44
|
-
self.snippet_repository = snippet_repository
|
|
45
|
-
self.file_repository = file_repository
|
|
46
|
-
self.session = session
|
|
47
|
-
self.log = structlog.get_logger(__name__)
|
|
48
|
-
|
|
49
|
-
async def extract_snippets_from_file(
|
|
50
|
-
self, command: ExtractSnippetsCommand
|
|
51
|
-
) -> list[Snippet]:
|
|
52
|
-
"""Application use case: extract snippets from a single file.
|
|
53
|
-
|
|
54
|
-
Args:
|
|
55
|
-
command: The extract snippets command
|
|
56
|
-
|
|
57
|
-
Returns:
|
|
58
|
-
List of extracted snippets
|
|
59
|
-
|
|
60
|
-
"""
|
|
61
|
-
request = SnippetExtractionRequest(command.file_path, command.strategy)
|
|
62
|
-
result = await self.snippet_extraction_service.extract_snippets(request)
|
|
63
|
-
|
|
64
|
-
# Convert domain result to persistence model
|
|
65
|
-
return [
|
|
66
|
-
Snippet(
|
|
67
|
-
file_id=0, index_id=0, content=snippet_text
|
|
68
|
-
) # IDs will be set later
|
|
69
|
-
for snippet_text in result.snippets
|
|
70
|
-
]
|
|
71
|
-
|
|
72
|
-
def _should_process_file(self, file: Any) -> bool:
|
|
73
|
-
"""Check if a file should be processed for snippet extraction.
|
|
74
|
-
|
|
75
|
-
Args:
|
|
76
|
-
file: The file to check
|
|
77
|
-
|
|
78
|
-
Returns:
|
|
79
|
-
True if the file should be processed
|
|
80
|
-
|
|
81
|
-
"""
|
|
82
|
-
# Skip unsupported file types
|
|
83
|
-
mime_blacklist = ["unknown/unknown"]
|
|
84
|
-
return file.mime_type not in mime_blacklist
|
|
85
|
-
|
|
86
|
-
async def _extract_snippets_from_file(
|
|
87
|
-
self, file: Any, strategy: SnippetExtractionStrategy
|
|
88
|
-
) -> list[str]:
|
|
89
|
-
"""Extract snippets from a single file."""
|
|
90
|
-
command = ExtractSnippetsCommand(
|
|
91
|
-
file_path=Path(file.cloned_path),
|
|
92
|
-
strategy=strategy,
|
|
93
|
-
)
|
|
94
|
-
snippets = await self.extract_snippets_from_file(command)
|
|
95
|
-
return [snippet.content for snippet in snippets]
|
|
96
|
-
|
|
97
|
-
async def create_snippets_for_index(
|
|
98
|
-
self,
|
|
99
|
-
command: CreateIndexSnippetsCommand,
|
|
100
|
-
progress_callback: ProgressCallback | None = None,
|
|
101
|
-
) -> None:
|
|
102
|
-
"""Create snippets for all files in an index.
|
|
103
|
-
|
|
104
|
-
Args:
|
|
105
|
-
command: The create index snippets command
|
|
106
|
-
progress_callback: Optional progress callback for reporting progress
|
|
107
|
-
|
|
108
|
-
"""
|
|
109
|
-
files = await self.file_repository.get_files_for_index(command.index_id)
|
|
110
|
-
|
|
111
|
-
reporter = Reporter(self.log, progress_callback)
|
|
112
|
-
await reporter.start(
|
|
113
|
-
"create_snippets", len(files), "Creating snippets from files..."
|
|
114
|
-
)
|
|
115
|
-
|
|
116
|
-
for i, file in enumerate(files, 1):
|
|
117
|
-
try:
|
|
118
|
-
if not self._should_process_file(file):
|
|
119
|
-
continue
|
|
120
|
-
|
|
121
|
-
snippet_contents = await self._extract_snippets_from_file(
|
|
122
|
-
file, command.strategy
|
|
123
|
-
)
|
|
124
|
-
for snippet_content in snippet_contents:
|
|
125
|
-
snippet = Snippet(
|
|
126
|
-
file_id=file.id,
|
|
127
|
-
index_id=command.index_id,
|
|
128
|
-
content=snippet_content,
|
|
129
|
-
)
|
|
130
|
-
await self.snippet_repository.save(snippet)
|
|
131
|
-
|
|
132
|
-
except (OSError, ValueError) as e:
|
|
133
|
-
self.log.debug(
|
|
134
|
-
"Skipping file",
|
|
135
|
-
file=file.cloned_path,
|
|
136
|
-
error=str(e),
|
|
137
|
-
)
|
|
138
|
-
continue
|
|
139
|
-
|
|
140
|
-
await reporter.step(
|
|
141
|
-
"create_snippets",
|
|
142
|
-
current=i,
|
|
143
|
-
total=len(files),
|
|
144
|
-
message=f"Processing {file.cloned_path}...",
|
|
145
|
-
)
|
|
146
|
-
|
|
147
|
-
# Commit all snippet creations in a single transaction
|
|
148
|
-
await self.session.commit()
|
|
149
|
-
await reporter.done("create_snippets")
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
"""Legacy enrichment models for backward compatibility."""
|
|
2
|
-
|
|
3
|
-
from abc import ABC, abstractmethod
|
|
4
|
-
from collections.abc import AsyncGenerator
|
|
5
|
-
from dataclasses import dataclass
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
@dataclass
|
|
9
|
-
class EnrichmentRequest:
|
|
10
|
-
"""Legacy enrichment request model."""
|
|
11
|
-
|
|
12
|
-
snippet_id: int
|
|
13
|
-
text: str
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
@dataclass
|
|
17
|
-
class EnrichmentResponse:
|
|
18
|
-
"""Legacy enrichment response model."""
|
|
19
|
-
|
|
20
|
-
snippet_id: int
|
|
21
|
-
text: str
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class EnrichmentService(ABC):
|
|
25
|
-
"""Legacy enrichment service interface."""
|
|
26
|
-
|
|
27
|
-
@abstractmethod
|
|
28
|
-
def enrich(
|
|
29
|
-
self, data: list[EnrichmentRequest]
|
|
30
|
-
) -> AsyncGenerator[EnrichmentResponse, None]:
|
|
31
|
-
"""Enrich a list of requests."""
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
class NullEnrichmentService(EnrichmentService):
|
|
35
|
-
"""Null enrichment service for testing."""
|
|
36
|
-
|
|
37
|
-
async def enrich(
|
|
38
|
-
self, data: list[EnrichmentRequest]
|
|
39
|
-
) -> AsyncGenerator[EnrichmentResponse, None]:
|
|
40
|
-
"""Return empty responses for all requests."""
|
|
41
|
-
for request in data:
|
|
42
|
-
yield EnrichmentResponse(snippet_id=request.snippet_id, text="")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|