kodit 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (57) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/factories/code_indexing_factory.py +77 -28
  3. kodit/application/services/code_indexing_application_service.py +148 -119
  4. kodit/cli.py +49 -52
  5. kodit/domain/entities.py +268 -189
  6. kodit/domain/protocols.py +61 -0
  7. kodit/domain/services/embedding_service.py +1 -1
  8. kodit/domain/services/index_query_service.py +66 -0
  9. kodit/domain/services/index_service.py +323 -0
  10. kodit/domain/value_objects.py +225 -92
  11. kodit/infrastructure/cloning/git/working_copy.py +17 -8
  12. kodit/infrastructure/cloning/metadata.py +37 -67
  13. kodit/infrastructure/embedding/embedding_factory.py +1 -1
  14. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  15. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +1 -1
  16. kodit/infrastructure/enrichment/null_enrichment_provider.py +4 -10
  17. kodit/infrastructure/git/git_utils.py +1 -63
  18. kodit/infrastructure/ignore/ignore_pattern_provider.py +1 -2
  19. kodit/infrastructure/indexing/auto_indexing_service.py +2 -12
  20. kodit/infrastructure/indexing/fusion_service.py +1 -1
  21. kodit/infrastructure/mappers/__init__.py +1 -0
  22. kodit/infrastructure/mappers/index_mapper.py +344 -0
  23. kodit/infrastructure/snippet_extraction/factories.py +13 -0
  24. kodit/infrastructure/snippet_extraction/language_detection_service.py +1 -1
  25. kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -1
  26. kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +1 -1
  27. kodit/infrastructure/sqlalchemy/embedding_repository.py +1 -1
  28. kodit/infrastructure/sqlalchemy/entities.py +203 -0
  29. kodit/infrastructure/sqlalchemy/file_repository.py +1 -1
  30. kodit/infrastructure/sqlalchemy/index_repository.py +550 -0
  31. kodit/log.py +4 -1
  32. kodit/mcp.py +1 -13
  33. kodit/migrations/env.py +1 -1
  34. kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +34 -0
  35. kodit/migrations/versions/4552eb3f23ce_add_summary.py +34 -0
  36. kodit/utils/__init__.py +1 -0
  37. kodit/utils/path_utils.py +54 -0
  38. {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/METADATA +1 -1
  39. {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/RECORD +42 -45
  40. kodit/domain/enums.py +0 -9
  41. kodit/domain/repositories.py +0 -128
  42. kodit/domain/services/ignore_service.py +0 -45
  43. kodit/domain/services/indexing_service.py +0 -204
  44. kodit/domain/services/snippet_extraction_service.py +0 -89
  45. kodit/domain/services/snippet_service.py +0 -211
  46. kodit/domain/services/source_service.py +0 -85
  47. kodit/infrastructure/cloning/folder/__init__.py +0 -1
  48. kodit/infrastructure/cloning/folder/factory.py +0 -128
  49. kodit/infrastructure/cloning/folder/working_copy.py +0 -38
  50. kodit/infrastructure/cloning/git/factory.py +0 -153
  51. kodit/infrastructure/indexing/index_repository.py +0 -273
  52. kodit/infrastructure/indexing/snippet_domain_service_factory.py +0 -37
  53. kodit/infrastructure/sqlalchemy/repository.py +0 -133
  54. kodit/infrastructure/sqlalchemy/snippet_repository.py +0 -251
  55. {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/WHEEL +0 -0
  56. {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/entry_points.txt +0 -0
  57. {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,323 @@
1
+ """Pure domain service for Index aggregate operations."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from collections.abc import Mapping
5
+ from pathlib import Path
6
+
7
+ import structlog
8
+ from pydantic import AnyUrl
9
+
10
+ import kodit.domain.entities as domain_entities
11
+ from kodit.domain.interfaces import ProgressCallback
12
+ from kodit.domain.services.enrichment_service import EnrichmentDomainService
13
+ from kodit.domain.value_objects import (
14
+ EnrichmentIndexRequest,
15
+ EnrichmentRequest,
16
+ SnippetExtractionRequest,
17
+ SnippetExtractionResult,
18
+ SnippetExtractionStrategy,
19
+ )
20
+ from kodit.infrastructure.cloning.git.working_copy import GitWorkingCopyProvider
21
+ from kodit.infrastructure.cloning.metadata import FileMetadataExtractor
22
+ from kodit.infrastructure.git.git_utils import is_valid_clone_target
23
+ from kodit.infrastructure.ignore.ignore_pattern_provider import GitIgnorePatternProvider
24
+ from kodit.reporting import Reporter
25
+ from kodit.utils.path_utils import path_from_uri
26
+
27
+
28
+ class LanguageDetectionService(ABC):
29
+ """Abstract interface for language detection service."""
30
+
31
+ @abstractmethod
32
+ async def detect_language(self, file_path: Path) -> str:
33
+ """Detect the programming language of a file."""
34
+
35
+
36
+ class SnippetExtractor(ABC):
37
+ """Abstract interface for snippet extraction."""
38
+
39
+ @abstractmethod
40
+ async def extract(self, file_path: Path, language: str) -> list[str]:
41
+ """Extract snippets from a file."""
42
+
43
+
44
+ class IndexDomainService:
45
+ """Pure domain service for Index aggregate operations.
46
+
47
+ This service handles the full lifecycle of code indexing:
48
+ - Creating indexes for source repositories
49
+ - Cloning and processing source files
50
+ - Extracting and enriching code snippets
51
+ - Managing the complete Index aggregate
52
+ """
53
+
54
+ def __init__(
55
+ self,
56
+ language_detector: LanguageDetectionService,
57
+ snippet_extractors: Mapping[SnippetExtractionStrategy, SnippetExtractor],
58
+ enrichment_service: EnrichmentDomainService,
59
+ clone_dir: Path,
60
+ ) -> None:
61
+ """Initialize the index domain service."""
62
+ self._clone_dir = clone_dir
63
+ self._language_detector = language_detector
64
+ self._snippet_extractors = snippet_extractors
65
+ self._enrichment_service = enrichment_service
66
+ self.log = structlog.get_logger(__name__)
67
+
68
+ async def prepare_index(
69
+ self,
70
+ uri_or_path_like: str, # Must include user/pass, etc
71
+ progress_callback: ProgressCallback | None = None,
72
+ ) -> domain_entities.WorkingCopy:
73
+ """Prepare an index by scanning files and creating working copy."""
74
+ sanitized_uri, source_type = self.sanitize_uri(uri_or_path_like)
75
+ reporter = Reporter(self.log, progress_callback)
76
+ self.log.info("Preparing source", uri=str(sanitized_uri))
77
+
78
+ if source_type == domain_entities.SourceType.FOLDER:
79
+ await reporter.start("prepare_index", 1, "Scanning source...")
80
+ local_path = path_from_uri(str(sanitized_uri))
81
+ elif source_type == domain_entities.SourceType.GIT:
82
+ source_type = domain_entities.SourceType.GIT
83
+ git_working_copy_provider = GitWorkingCopyProvider(self._clone_dir)
84
+ await reporter.start("prepare_index", 1, "Cloning source...")
85
+ local_path = await git_working_copy_provider.prepare(uri_or_path_like)
86
+ await reporter.done("prepare_index")
87
+ else:
88
+ raise ValueError(f"Unsupported source: {uri_or_path_like}")
89
+
90
+ await reporter.done("prepare_index")
91
+
92
+ return domain_entities.WorkingCopy(
93
+ remote_uri=sanitized_uri,
94
+ cloned_path=local_path,
95
+ source_type=source_type,
96
+ files=[],
97
+ )
98
+
99
+ async def extract_snippets_from_index(
100
+ self,
101
+ index: domain_entities.Index,
102
+ strategy: SnippetExtractionStrategy = SnippetExtractionStrategy.METHOD_BASED,
103
+ progress_callback: ProgressCallback | None = None,
104
+ ) -> domain_entities.Index:
105
+ """Extract code snippets from files in the index."""
106
+ file_count = len(index.source.working_copy.files)
107
+
108
+ self.log.info(
109
+ "Extracting snippets",
110
+ index_id=index.id,
111
+ file_count=file_count,
112
+ strategy=strategy.value,
113
+ )
114
+
115
+ # Only create snippets for files that have been added or modified
116
+ files = index.source.working_copy.changed_files()
117
+ index.delete_snippets_for_files(files)
118
+
119
+ reporter = Reporter(self.log, progress_callback)
120
+ await reporter.start(
121
+ "extract_snippets", len(files), "Extracting code snippets..."
122
+ )
123
+
124
+ new_snippets = []
125
+ for i, domain_file in enumerate(files, 1):
126
+ try:
127
+ # Extract snippets from file
128
+ request = SnippetExtractionRequest(
129
+ file_path=domain_file.as_path(), strategy=strategy
130
+ )
131
+ result = await self._extract_snippets(request)
132
+ for snippet_text in result.snippets:
133
+ snippet = domain_entities.Snippet(
134
+ derives_from=[domain_file],
135
+ )
136
+ snippet.add_original_content(snippet_text, result.language)
137
+ new_snippets.append(snippet)
138
+
139
+ except (OSError, ValueError) as e:
140
+ self.log.debug(
141
+ "Skipping file for snippet extraction",
142
+ file_uri=str(domain_file.uri),
143
+ error=str(e),
144
+ )
145
+ continue
146
+
147
+ await reporter.step(
148
+ "extract_snippets", i, len(files), f"Processed {domain_file.uri.path}"
149
+ )
150
+
151
+ index.snippets.extend(new_snippets)
152
+ await reporter.done("extract_snippets")
153
+ return index
154
+
155
+ async def enrich_snippets_in_index(
156
+ self,
157
+ snippets: list[domain_entities.Snippet],
158
+ progress_callback: ProgressCallback | None = None,
159
+ ) -> list[domain_entities.Snippet]:
160
+ """Enrich snippets with AI-generated summaries."""
161
+ if not snippets or len(snippets) == 0:
162
+ return snippets
163
+
164
+ reporter = Reporter(self.log, progress_callback)
165
+ await reporter.start("enrichment", len(snippets), "Enriching snippets...")
166
+
167
+ snippet_map = {snippet.id: snippet for snippet in snippets if snippet.id}
168
+
169
+ enrichment_request = EnrichmentIndexRequest(
170
+ requests=[
171
+ EnrichmentRequest(snippet_id=snippet_id, text=snippet.original_text())
172
+ for snippet_id, snippet in snippet_map.items()
173
+ ]
174
+ )
175
+
176
+ processed = 0
177
+ async for result in self._enrichment_service.enrich_documents(
178
+ enrichment_request
179
+ ):
180
+ snippet_map[result.snippet_id].add_summary(result.text)
181
+
182
+ processed += 1
183
+ await reporter.step(
184
+ "enrichment", processed, len(snippets), "Enriching snippets..."
185
+ )
186
+
187
+ await reporter.done("enrichment")
188
+ return list(snippet_map.values())
189
+
190
+ async def _extract_snippets(
191
+ self, request: SnippetExtractionRequest
192
+ ) -> SnippetExtractionResult:
193
+ # Domain logic: validate file exists
194
+ if not request.file_path.exists():
195
+ raise ValueError(f"File does not exist: {request.file_path}")
196
+
197
+ # Domain logic: detect language
198
+ language = await self._language_detector.detect_language(request.file_path)
199
+
200
+ # Domain logic: choose strategy and extractor
201
+ if request.strategy not in self._snippet_extractors:
202
+ raise ValueError(f"Unsupported extraction strategy: {request.strategy}")
203
+
204
+ extractor = self._snippet_extractors[request.strategy]
205
+ snippets = await extractor.extract(request.file_path, language)
206
+
207
+ # Domain logic: filter out empty snippets
208
+ filtered_snippets = [snippet for snippet in snippets if snippet.strip()]
209
+
210
+ return SnippetExtractionResult(snippets=filtered_snippets, language=language)
211
+
212
+ def sanitize_uri(
213
+ self, uri_or_path_like: str
214
+ ) -> tuple[AnyUrl, domain_entities.SourceType]:
215
+ """Convert a URI or path-like string to a URI."""
216
+ # First, check if it's a local directory (more reliable than git check)
217
+ if Path(uri_or_path_like).is_dir():
218
+ return (
219
+ domain_entities.WorkingCopy.sanitize_local_path(uri_or_path_like),
220
+ domain_entities.SourceType.FOLDER,
221
+ )
222
+
223
+ # Then check if it's git-clonable
224
+ if is_valid_clone_target(uri_or_path_like):
225
+ return (
226
+ domain_entities.WorkingCopy.sanitize_git_url(uri_or_path_like),
227
+ domain_entities.SourceType.GIT,
228
+ )
229
+
230
+ raise ValueError(f"Unsupported source: {uri_or_path_like}")
231
+
232
+ async def refresh_working_copy(
233
+ self,
234
+ working_copy: domain_entities.WorkingCopy,
235
+ progress_callback: ProgressCallback | None = None,
236
+ ) -> domain_entities.WorkingCopy:
237
+ """Refresh the working copy."""
238
+ metadata_extractor = FileMetadataExtractor(working_copy.source_type)
239
+ reporter = Reporter(self.log, progress_callback)
240
+
241
+ if working_copy.source_type == domain_entities.SourceType.GIT:
242
+ git_working_copy_provider = GitWorkingCopyProvider(self._clone_dir)
243
+ await git_working_copy_provider.sync(str(working_copy.remote_uri))
244
+
245
+ current_file_paths = working_copy.list_filesystem_paths(
246
+ GitIgnorePatternProvider(working_copy.cloned_path)
247
+ )
248
+
249
+ previous_files_map = {file.as_path(): file for file in working_copy.files}
250
+
251
+ # Calculate different sets of files
252
+ deleted_file_paths = set(previous_files_map.keys()) - set(current_file_paths)
253
+ new_file_paths = set(current_file_paths) - set(previous_files_map.keys())
254
+ modified_file_paths = set(current_file_paths) & set(previous_files_map.keys())
255
+ num_files_to_process = (
256
+ len(deleted_file_paths) + len(new_file_paths) + len(modified_file_paths)
257
+ )
258
+ self.log.info(
259
+ "Refreshing working copy",
260
+ num_deleted=len(deleted_file_paths),
261
+ num_new=len(new_file_paths),
262
+ num_modified=len(modified_file_paths),
263
+ num_total_changes=num_files_to_process,
264
+ num_dirty=len(working_copy.dirty_files()),
265
+ )
266
+
267
+ # Setup reporter
268
+ processed = 0
269
+ await reporter.start(
270
+ "refresh_working_copy", num_files_to_process, "Refreshing working copy..."
271
+ )
272
+
273
+ # First check to see if any files have been deleted
274
+ for file_path in deleted_file_paths:
275
+ processed += 1
276
+ await reporter.step(
277
+ "refresh_working_copy",
278
+ processed,
279
+ num_files_to_process,
280
+ f"Deleted {file_path.name}",
281
+ )
282
+ previous_files_map[
283
+ file_path
284
+ ].file_processing_status = domain_entities.FileProcessingStatus.DELETED
285
+
286
+ # Then check to see if there are any new files
287
+ for file_path in new_file_paths:
288
+ processed += 1
289
+ await reporter.step(
290
+ "refresh_working_copy",
291
+ processed,
292
+ num_files_to_process,
293
+ f"New {file_path.name}",
294
+ )
295
+ try:
296
+ working_copy.files.append(
297
+ await metadata_extractor.extract(file_path=file_path)
298
+ )
299
+ except (OSError, ValueError) as e:
300
+ self.log.info("Skipping file", file=str(file_path), error=str(e))
301
+ continue
302
+
303
+ # Finally check if there are any modified files
304
+ for file_path in modified_file_paths:
305
+ processed += 1
306
+ await reporter.step(
307
+ "refresh_working_copy",
308
+ processed,
309
+ num_files_to_process,
310
+ f"Modified {file_path.name}",
311
+ )
312
+ try:
313
+ previous_file = previous_files_map[file_path]
314
+ new_file = await metadata_extractor.extract(file_path=file_path)
315
+ if previous_file.sha256 != new_file.sha256:
316
+ previous_file.file_processing_status = (
317
+ domain_entities.FileProcessingStatus.MODIFIED
318
+ )
319
+ except (OSError, ValueError) as e:
320
+ self.log.info("Skipping file", file=str(file_path), error=str(e))
321
+ continue
322
+
323
+ return working_copy
@@ -1,16 +1,129 @@
1
- """Domain value objects and DTOs."""
1
+ """Pure domain value objects and DTOs."""
2
2
 
3
+ import json
3
4
  from dataclasses import dataclass
4
5
  from datetime import datetime
5
- from enum import Enum
6
+ from enum import Enum, IntEnum
6
7
  from pathlib import Path
7
- from typing import Any, ClassVar
8
+ from typing import ClassVar
8
9
 
9
- from sqlalchemy import JSON, DateTime, Integer, Text
10
- from sqlalchemy.orm import Mapped, mapped_column
10
+ from pydantic import BaseModel
11
11
 
12
- from kodit.domain.entities import Base
13
- from kodit.domain.enums import SnippetExtractionStrategy
12
+
13
+ class SourceType(IntEnum):
14
+ """The type of source."""
15
+
16
+ UNKNOWN = 0
17
+ FOLDER = 1
18
+ GIT = 2
19
+
20
+
21
+ class SnippetContentType(IntEnum):
22
+ """Type of snippet content."""
23
+
24
+ UNKNOWN = 0
25
+ ORIGINAL = 1
26
+ SUMMARY = 2
27
+
28
+
29
+ class SnippetContent(BaseModel):
30
+ """Snippet content domain value object."""
31
+
32
+ type: SnippetContentType
33
+ value: str
34
+ language: str
35
+
36
+
37
+ class SnippetSearchResult(BaseModel):
38
+ """Domain result object for snippet searches."""
39
+
40
+ snippet_id: int
41
+ content: str
42
+ summary: str
43
+ score: float
44
+ file_path: Path
45
+ language: str | None = None
46
+ authors: list[str] = []
47
+
48
+
49
+ @dataclass(frozen=True)
50
+ class LanguageExtensions:
51
+ """Value object for language to file extension mappings."""
52
+
53
+ language: str
54
+ extensions: list[str]
55
+
56
+ @classmethod
57
+ def get_supported_languages(cls) -> list[str]:
58
+ """Get all supported programming languages."""
59
+ return [
60
+ "python",
61
+ "javascript",
62
+ "typescript",
63
+ "java",
64
+ "c",
65
+ "cpp",
66
+ "csharp",
67
+ "go",
68
+ "rust",
69
+ "php",
70
+ "ruby",
71
+ "swift",
72
+ "kotlin",
73
+ "scala",
74
+ "r",
75
+ "sql",
76
+ "html",
77
+ "css",
78
+ "json",
79
+ "yaml",
80
+ "xml",
81
+ "markdown",
82
+ "shell",
83
+ ]
84
+
85
+ @classmethod
86
+ def get_extensions_for_language(cls, language: str) -> list[str]:
87
+ """Get file extensions for a given language."""
88
+ language_map = {
89
+ "python": [".py", ".pyw", ".pyi"],
90
+ "javascript": [".js", ".jsx", ".mjs"],
91
+ "typescript": [".ts", ".tsx"],
92
+ "java": [".java"],
93
+ "c": [".c", ".h"],
94
+ "cpp": [".cpp", ".cc", ".cxx", ".hpp", ".hxx"],
95
+ "csharp": [".cs"],
96
+ "go": [".go"],
97
+ "rust": [".rs"],
98
+ "php": [".php"],
99
+ "ruby": [".rb"],
100
+ "swift": [".swift"],
101
+ "kotlin": [".kt", ".kts"],
102
+ "scala": [".scala", ".sc"],
103
+ "r": [".r", ".R"],
104
+ "sql": [".sql"],
105
+ "html": [".html", ".htm"],
106
+ "css": [".css", ".scss", ".sass", ".less"],
107
+ "json": [".json"],
108
+ "yaml": [".yaml", ".yml"],
109
+ "xml": [".xml"],
110
+ "markdown": [".md", ".markdown"],
111
+ "shell": [".sh", ".bash", ".zsh", ".fish"],
112
+ }
113
+ return language_map.get(language.lower(), [])
114
+
115
+ @classmethod
116
+ def is_supported_language(cls, language: str) -> bool:
117
+ """Check if a language is supported."""
118
+ return language.lower() in cls.get_supported_languages()
119
+
120
+ @classmethod
121
+ def get_extensions_or_fallback(cls, language: str) -> list[str]:
122
+ """Get extensions for language or return language as extension if not found."""
123
+ language_lower = language.lower()
124
+ if cls.is_supported_language(language_lower):
125
+ return cls.get_extensions_for_language(language_lower)
126
+ return [language_lower]
14
127
 
15
128
 
16
129
  class SearchType(Enum):
@@ -21,14 +134,6 @@ class SearchType(Enum):
21
134
  HYBRID = "hybrid"
22
135
 
23
136
 
24
- @dataclass
25
- class SnippetExtractionRequest:
26
- """Domain model for snippet extraction request."""
27
-
28
- file_path: Path
29
- strategy: SnippetExtractionStrategy = SnippetExtractionStrategy.METHOD_BASED
30
-
31
-
32
137
  @dataclass
33
138
  class SnippetExtractionResult:
34
139
  """Domain model for snippet extraction result."""
@@ -100,6 +205,7 @@ class SnippetSearchFilters:
100
205
  created_after: datetime | None = None
101
206
  created_before: datetime | None = None
102
207
  source_repo: str | None = None
208
+ file_path: str | None = None
103
209
 
104
210
  @classmethod
105
211
  def from_cli_params(
@@ -175,25 +281,90 @@ class MultiSearchRequest:
175
281
 
176
282
  @dataclass
177
283
  class MultiSearchResult:
178
- """Domain model for multi-modal search result."""
284
+ """Enhanced search result with comprehensive snippet metadata."""
179
285
 
180
286
  id: int
181
- uri: str
182
287
  content: str
183
288
  original_scores: list[float]
289
+ source_uri: str
290
+ relative_path: str
291
+ language: str
292
+ authors: list[str]
293
+ created_at: datetime
294
+ summary: str
184
295
 
185
296
  def __str__(self) -> str:
186
- """Return formatted string representation for all snippet display."""
297
+ """Return enhanced formatted string representation."""
187
298
  lines = [
188
- "-" * 80,
189
- f"ID: {self.id} | {self.uri}",
190
- f"Original scores: {self.original_scores}",
191
- self.content,
192
- "-" * 80,
193
- "",
299
+ "---",
300
+ f"id: {self.id}",
301
+ f"source: {self.source_uri}",
302
+ f"path: {self.relative_path}",
303
+ f"lang: {self.language}",
304
+ f"created: {self.created_at.isoformat()}",
305
+ f"authors: {', '.join(self.authors)}",
306
+ f"scores: {self.original_scores}",
307
+ "---",
308
+ f"{self.summary}\n",
309
+ f"```{self.language}",
310
+ f"{self.content}",
311
+ "```\n",
194
312
  ]
195
313
  return "\n".join(lines)
196
314
 
315
+ def to_json(self) -> str:
316
+ """Return LLM-optimized JSON representation following the compact schema."""
317
+ json_obj = {
318
+ "id": self.id,
319
+ "source": self.source_uri,
320
+ "path": self.relative_path,
321
+ "lang": self.language.lower(),
322
+ "created": self.created_at.isoformat() if self.created_at else "",
323
+ "author": ", ".join(self.authors),
324
+ "score": self.original_scores,
325
+ "code": self.content,
326
+ "summary": self.summary,
327
+ }
328
+
329
+ return json.dumps(json_obj, separators=(",", ":"))
330
+
331
+ @classmethod
332
+ def to_jsonlines(cls, results: list["MultiSearchResult"]) -> str:
333
+ """Convert multiple MultiSearchResult objects to JSON Lines format.
334
+
335
+ Args:
336
+ results: List of MultiSearchResult objects
337
+ include_summary: Whether to include summary fields
338
+
339
+ Returns:
340
+ JSON Lines string (one JSON object per line)
341
+
342
+ """
343
+ return "\n".join(result.to_json() for result in results)
344
+
345
+ @classmethod
346
+ def to_string(cls, results: list["MultiSearchResult"]) -> str:
347
+ """Convert multiple MultiSearchResult objects to a string."""
348
+ return "\n\n".join(str(result) for result in results)
349
+
350
+ @staticmethod
351
+ def calculate_relative_path(file_path: str, source_path: str) -> str:
352
+ """Calculate relative path from source root."""
353
+ try:
354
+ return str(Path(file_path).relative_to(Path(source_path)))
355
+ except ValueError:
356
+ # If file_path is not relative to source_path, return the file name
357
+ return Path(file_path).name
358
+
359
+ @staticmethod
360
+ def detect_language_from_extension(extension: str) -> str:
361
+ """Detect programming language from file extension."""
362
+ try:
363
+ return LanguageMapping.get_language_for_extension(extension).title()
364
+ except ValueError:
365
+ # Unknown extension, return a default
366
+ return "Unknown"
367
+
197
368
 
198
369
  @dataclass
199
370
  class FusionRequest:
@@ -291,39 +462,6 @@ class IndexView:
291
462
  source: str | None = None
292
463
 
293
464
 
294
- @dataclass
295
- class SnippetListItem:
296
- """Domain model for snippet list item with file information."""
297
-
298
- id: int
299
- file_path: str
300
- content: str
301
- source_uri: str
302
-
303
-
304
- @dataclass
305
- class FileInfo:
306
- """Domain model for file information."""
307
-
308
- uri: str
309
-
310
-
311
- @dataclass
312
- class SnippetInfo:
313
- """Domain model for snippet information."""
314
-
315
- id: int
316
- content: str
317
-
318
-
319
- @dataclass
320
- class SnippetWithFile:
321
- """Domain model for snippet with associated file information."""
322
-
323
- file: FileInfo
324
- snippet: SnippetInfo
325
-
326
-
327
465
  class LanguageMapping:
328
466
  """Value object for language-to-extension mappings.
329
467
 
@@ -493,38 +631,33 @@ class LanguageMapping:
493
631
  return [language_lower]
494
632
 
495
633
 
496
- # Database models for value objects
497
- class BM25DocumentModel(Base):
498
- """BM25 document model."""
499
-
500
- __tablename__ = "bm25_documents"
501
-
502
- id: Mapped[int] = mapped_column(Integer, primary_key=True)
503
- content: Mapped[str] = mapped_column(Text, nullable=False)
504
- document_metadata: Mapped[dict[str, Any] | None] = mapped_column(
505
- JSON, nullable=True
506
- )
507
- created_at: Mapped[datetime] = mapped_column(
508
- DateTime(timezone=True), nullable=False
509
- )
510
- updated_at: Mapped[datetime] = mapped_column(
511
- DateTime(timezone=True), nullable=False
512
- )
513
-
514
-
515
- class VectorDocumentModel(Base):
516
- """Vector document model."""
517
-
518
- __tablename__ = "vector_documents"
519
-
520
- id: Mapped[int] = mapped_column(Integer, primary_key=True)
521
- content: Mapped[str] = mapped_column(Text, nullable=False)
522
- document_metadata: Mapped[dict[str, Any] | None] = mapped_column(
523
- JSON, nullable=True
524
- )
525
- created_at: Mapped[datetime] = mapped_column(
526
- DateTime(timezone=True), nullable=False
527
- )
528
- updated_at: Mapped[datetime] = mapped_column(
529
- DateTime(timezone=True), nullable=False
530
- )
634
+ class SnippetQuery(BaseModel):
635
+ """Domain query object for snippet searches."""
636
+
637
+ text: str
638
+ search_type: SearchType = SearchType.HYBRID
639
+ filters: SnippetSearchFilters = SnippetSearchFilters()
640
+ top_k: int = 10
641
+
642
+
643
+ class SnippetExtractionStrategy(str, Enum):
644
+ """Different strategies for extracting snippets from files."""
645
+
646
+ METHOD_BASED = "method_based"
647
+
648
+
649
+ @dataclass
650
+ class SnippetExtractionRequest:
651
+ """Domain model for snippet extraction request."""
652
+
653
+ file_path: Path
654
+ strategy: SnippetExtractionStrategy = SnippetExtractionStrategy.METHOD_BASED
655
+
656
+
657
+ class FileProcessingStatus(IntEnum):
658
+ """File processing status."""
659
+
660
+ CLEAN = 0
661
+ ADDED = 1
662
+ MODIFIED = 2
663
+ DELETED = 3