kodit 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (57) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/factories/code_indexing_factory.py +77 -28
  3. kodit/application/services/code_indexing_application_service.py +148 -119
  4. kodit/cli.py +49 -52
  5. kodit/domain/entities.py +268 -189
  6. kodit/domain/protocols.py +61 -0
  7. kodit/domain/services/embedding_service.py +1 -1
  8. kodit/domain/services/index_query_service.py +66 -0
  9. kodit/domain/services/index_service.py +323 -0
  10. kodit/domain/value_objects.py +225 -92
  11. kodit/infrastructure/cloning/git/working_copy.py +17 -8
  12. kodit/infrastructure/cloning/metadata.py +37 -67
  13. kodit/infrastructure/embedding/embedding_factory.py +1 -1
  14. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  15. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +1 -1
  16. kodit/infrastructure/enrichment/null_enrichment_provider.py +4 -10
  17. kodit/infrastructure/git/git_utils.py +1 -63
  18. kodit/infrastructure/ignore/ignore_pattern_provider.py +1 -2
  19. kodit/infrastructure/indexing/auto_indexing_service.py +2 -12
  20. kodit/infrastructure/indexing/fusion_service.py +1 -1
  21. kodit/infrastructure/mappers/__init__.py +1 -0
  22. kodit/infrastructure/mappers/index_mapper.py +344 -0
  23. kodit/infrastructure/snippet_extraction/factories.py +13 -0
  24. kodit/infrastructure/snippet_extraction/language_detection_service.py +1 -1
  25. kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -1
  26. kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +1 -1
  27. kodit/infrastructure/sqlalchemy/embedding_repository.py +1 -1
  28. kodit/infrastructure/sqlalchemy/entities.py +203 -0
  29. kodit/infrastructure/sqlalchemy/file_repository.py +1 -1
  30. kodit/infrastructure/sqlalchemy/index_repository.py +550 -0
  31. kodit/log.py +4 -1
  32. kodit/mcp.py +1 -13
  33. kodit/migrations/env.py +1 -1
  34. kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +34 -0
  35. kodit/migrations/versions/4552eb3f23ce_add_summary.py +34 -0
  36. kodit/utils/__init__.py +1 -0
  37. kodit/utils/path_utils.py +54 -0
  38. {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/METADATA +1 -1
  39. {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/RECORD +42 -45
  40. kodit/domain/enums.py +0 -9
  41. kodit/domain/repositories.py +0 -128
  42. kodit/domain/services/ignore_service.py +0 -45
  43. kodit/domain/services/indexing_service.py +0 -204
  44. kodit/domain/services/snippet_extraction_service.py +0 -89
  45. kodit/domain/services/snippet_service.py +0 -211
  46. kodit/domain/services/source_service.py +0 -85
  47. kodit/infrastructure/cloning/folder/__init__.py +0 -1
  48. kodit/infrastructure/cloning/folder/factory.py +0 -128
  49. kodit/infrastructure/cloning/folder/working_copy.py +0 -38
  50. kodit/infrastructure/cloning/git/factory.py +0 -153
  51. kodit/infrastructure/indexing/index_repository.py +0 -273
  52. kodit/infrastructure/indexing/snippet_domain_service_factory.py +0 -37
  53. kodit/infrastructure/sqlalchemy/repository.py +0 -133
  54. kodit/infrastructure/sqlalchemy/snippet_repository.py +0 -251
  55. {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/WHEEL +0 -0
  56. {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/entry_points.txt +0 -0
  57. {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,89 +0,0 @@
1
- """Domain services for snippet extraction."""
2
-
3
- from abc import ABC, abstractmethod
4
- from collections.abc import Mapping
5
- from pathlib import Path
6
-
7
- from kodit.domain.enums import SnippetExtractionStrategy
8
- from kodit.domain.value_objects import SnippetExtractionRequest, SnippetExtractionResult
9
-
10
-
11
- class LanguageDetectionService(ABC):
12
- """Abstract interface for language detection service."""
13
-
14
- @abstractmethod
15
- async def detect_language(self, file_path: Path) -> str:
16
- """Detect the programming language of a file."""
17
-
18
-
19
- class SnippetExtractor(ABC):
20
- """Abstract interface for snippet extraction."""
21
-
22
- @abstractmethod
23
- async def extract(self, file_path: Path, language: str) -> list[str]:
24
- """Extract snippets from a file."""
25
-
26
-
27
- class SnippetExtractionService(ABC):
28
- """Domain service for extracting snippets from source code."""
29
-
30
- @abstractmethod
31
- async def extract_snippets(
32
- self, request: SnippetExtractionRequest
33
- ) -> SnippetExtractionResult:
34
- """Extract snippets from a file using the specified strategy."""
35
-
36
-
37
- class SnippetExtractionDomainService:
38
- """Domain service implementation for snippet extraction business logic."""
39
-
40
- def __init__(
41
- self,
42
- language_detector: LanguageDetectionService,
43
- snippet_extractors: Mapping[SnippetExtractionStrategy, SnippetExtractor],
44
- ) -> None:
45
- """Initialize the snippet extraction domain service.
46
-
47
- Args:
48
- language_detector: Service for detecting programming languages
49
- snippet_extractors: Dictionary mapping strategies to extractor
50
- implementations
51
-
52
- """
53
- self.language_detector = language_detector
54
- self.snippet_extractors = snippet_extractors
55
-
56
- async def extract_snippets(
57
- self, request: SnippetExtractionRequest
58
- ) -> SnippetExtractionResult:
59
- """Extract snippets from a file using the specified strategy.
60
-
61
- Args:
62
- request: The snippet extraction request
63
-
64
- Returns:
65
- SnippetExtractionResult containing the extracted snippets and
66
- detected language
67
-
68
- Raises:
69
- ValueError: If the file doesn't exist or strategy is unsupported
70
-
71
- """
72
- # Domain logic: validate file exists
73
- if not request.file_path.exists():
74
- raise ValueError(f"File does not exist: {request.file_path}")
75
-
76
- # Domain logic: detect language
77
- language = await self.language_detector.detect_language(request.file_path)
78
-
79
- # Domain logic: choose strategy and extractor
80
- if request.strategy not in self.snippet_extractors:
81
- raise ValueError(f"Unsupported extraction strategy: {request.strategy}")
82
-
83
- extractor = self.snippet_extractors[request.strategy]
84
- snippets = await extractor.extract(request.file_path, language)
85
-
86
- # Domain logic: filter out empty snippets
87
- filtered_snippets = [snippet for snippet in snippets if snippet.strip()]
88
-
89
- return SnippetExtractionResult(snippets=filtered_snippets, language=language)
@@ -1,211 +0,0 @@
1
- """Domain service for snippet operations."""
2
-
3
- from pathlib import Path
4
- from typing import Any
5
-
6
- import structlog
7
-
8
- from kodit.domain.entities import Snippet
9
- from kodit.domain.enums import SnippetExtractionStrategy
10
- from kodit.domain.interfaces import ProgressCallback
11
- from kodit.domain.repositories import FileRepository, SnippetRepository
12
- from kodit.domain.services.snippet_extraction_service import (
13
- SnippetExtractionDomainService,
14
- )
15
- from kodit.domain.value_objects import (
16
- MultiSearchRequest,
17
- MultiSearchResult,
18
- SnippetExtractionRequest,
19
- SnippetListItem,
20
- )
21
- from kodit.reporting import Reporter
22
-
23
-
24
- class SnippetDomainService:
25
- """Domain service for snippet-related operations.
26
-
27
- This service consolidates snippet operations that were previously
28
- spread between application services. It handles:
29
- - Snippet extraction from files
30
- - Snippet persistence
31
- - Snippet querying and filtering
32
- """
33
-
34
- def __init__(
35
- self,
36
- snippet_extraction_service: SnippetExtractionDomainService,
37
- snippet_repository: SnippetRepository,
38
- file_repository: FileRepository,
39
- ) -> None:
40
- """Initialize the snippet domain service.
41
-
42
- Args:
43
- snippet_extraction_service: Service for extracting snippets from files
44
- snippet_repository: Repository for snippet persistence
45
- file_repository: Repository for file operations
46
-
47
- """
48
- self.snippet_extraction_service = snippet_extraction_service
49
- self.snippet_repository = snippet_repository
50
- self.file_repository = file_repository
51
- self.log = structlog.get_logger(__name__)
52
-
53
- async def extract_and_create_snippets(
54
- self,
55
- index_id: int,
56
- strategy: SnippetExtractionStrategy,
57
- progress_callback: ProgressCallback | None = None,
58
- ) -> list[Snippet]:
59
- """Extract snippets from all files in an index and persist them.
60
-
61
- This method combines the extraction and persistence logic that was
62
- previously split between domain and application services.
63
-
64
- Args:
65
- index_id: The ID of the index to create snippets for
66
- strategy: The extraction strategy to use
67
- progress_callback: Optional callback for progress reporting
68
-
69
- Returns:
70
- List of created Snippet entities with IDs assigned
71
-
72
- """
73
- files = await self.file_repository.get_files_for_index(index_id)
74
- created_snippets = []
75
-
76
- reporter = Reporter(self.log, progress_callback)
77
- await reporter.start(
78
- "create_snippets", len(files), "Creating snippets from files..."
79
- )
80
-
81
- for i, file in enumerate(files, 1):
82
- if not self._should_process_file(file):
83
- continue
84
-
85
- try:
86
- # Extract snippets from file
87
- request = SnippetExtractionRequest(Path(file.cloned_path), strategy)
88
- result = await self.snippet_extraction_service.extract_snippets(request)
89
-
90
- # Create and persist snippet entities
91
- for snippet_content in result.snippets:
92
- snippet = Snippet(
93
- file_id=file.id,
94
- index_id=index_id,
95
- content=snippet_content,
96
- )
97
- saved_snippet = await self.snippet_repository.save(snippet)
98
- created_snippets.append(saved_snippet)
99
-
100
- except (OSError, ValueError) as e:
101
- self.log.debug(
102
- "Skipping file",
103
- file=file.cloned_path,
104
- error=str(e),
105
- )
106
- continue
107
-
108
- await reporter.step(
109
- "create_snippets",
110
- current=i,
111
- total=len(files),
112
- message=f"Processing {file.cloned_path}...",
113
- )
114
-
115
- await reporter.done("create_snippets")
116
- return created_snippets
117
-
118
- async def get_snippets_for_index(self, index_id: int) -> list[Snippet]:
119
- """Get all snippets for a specific index.
120
-
121
- Args:
122
- index_id: The ID of the index
123
-
124
- Returns:
125
- List of Snippet entities for the index
126
-
127
- """
128
- # This delegates to the repository but provides a domain-level interface
129
- return list(await self.snippet_repository.get_by_index(index_id))
130
-
131
- async def update_snippet_content(self, snippet_id: int, content: str) -> None:
132
- """Update the content of an existing snippet.
133
-
134
- Args:
135
- snippet_id: The ID of the snippet to update
136
- content: The new content for the snippet
137
-
138
- """
139
- # Get the snippet first to ensure it exists
140
- snippet = await self.snippet_repository.get(snippet_id)
141
- if not snippet:
142
- msg = f"Snippet not found: {snippet_id}"
143
- raise ValueError(msg)
144
-
145
- # Update the content
146
- snippet.content = content
147
- await self.snippet_repository.save(snippet)
148
-
149
- async def delete_snippets_for_index(self, index_id: int) -> None:
150
- """Delete all snippets for a specific index.
151
-
152
- Args:
153
- index_id: The ID of the index
154
-
155
- """
156
- await self.snippet_repository.delete_by_index(index_id)
157
-
158
- async def search_snippets(
159
- self, request: MultiSearchRequest
160
- ) -> list[SnippetListItem]:
161
- """Search snippets with filters.
162
-
163
- Args:
164
- request: The search request containing filters
165
-
166
- Returns:
167
- List of matching snippet items
168
-
169
- """
170
- return list(await self.snippet_repository.search(request))
171
-
172
- async def list_snippets(
173
- self, file_path: str | None = None, source_uri: str | None = None
174
- ) -> list[MultiSearchResult]:
175
- """List snippets with optional filtering.
176
-
177
- Args:
178
- file_path: Optional file path to filter by
179
- source_uri: Optional source URI to filter by
180
-
181
- Returns:
182
- List of search results matching the criteria
183
-
184
- """
185
- snippet_items = await self.snippet_repository.list_snippets(
186
- file_path, source_uri
187
- )
188
- # Convert SnippetListItem to MultiSearchResult for unified display format
189
- return [
190
- MultiSearchResult(
191
- id=item.id,
192
- uri=item.source_uri,
193
- content=item.content,
194
- original_scores=[],
195
- )
196
- for item in snippet_items
197
- ]
198
-
199
- def _should_process_file(self, file: Any) -> bool:
200
- """Check if a file should be processed for snippet extraction.
201
-
202
- Args:
203
- file: The file to check
204
-
205
- Returns:
206
- True if the file should be processed
207
-
208
- """
209
- # Skip unsupported file types
210
- mime_blacklist = ["unknown/unknown"]
211
- return file.mime_type not in mime_blacklist
@@ -1,85 +0,0 @@
1
- """Source service rewritten to work directly with AsyncSession."""
2
-
3
- from collections.abc import Callable
4
- from pathlib import Path
5
-
6
- import structlog
7
- from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
8
-
9
- from kodit.domain.entities import Source
10
- from kodit.domain.interfaces import ProgressCallback
11
- from kodit.domain.repositories import SourceRepository
12
- from kodit.infrastructure.cloning.folder.factory import FolderSourceFactory
13
- from kodit.infrastructure.cloning.folder.working_copy import FolderWorkingCopyProvider
14
- from kodit.infrastructure.cloning.git.factory import (
15
- GitSourceFactory,
16
- GitWorkingCopyProvider,
17
- )
18
- from kodit.infrastructure.git.git_utils import is_valid_clone_target
19
- from kodit.infrastructure.sqlalchemy.repository import SqlAlchemySourceRepository
20
-
21
-
22
- class SourceService:
23
- """Source service."""
24
-
25
- def __init__(
26
- self,
27
- clone_dir: Path,
28
- session_factory: async_sessionmaker[AsyncSession] | Callable[[], AsyncSession],
29
- ) -> None:
30
- """Initialize the source service."""
31
- self.clone_dir = clone_dir
32
- self._session_factory = session_factory
33
- self.log = structlog.get_logger(__name__)
34
-
35
- async def get(self, source_id: int) -> Source:
36
- """Get a source."""
37
- async with self._session_factory() as session:
38
- repo = SqlAlchemySourceRepository(session)
39
-
40
- source = await repo.get(source_id)
41
- if source is None:
42
- raise ValueError(f"Source not found: {source_id}")
43
-
44
- return source
45
-
46
- async def create(
47
- self, uri_or_path_like: str, progress_callback: ProgressCallback | None = None
48
- ) -> Source:
49
- """Create a source."""
50
- async with self._session_factory() as session:
51
- repo = SqlAlchemySourceRepository(session)
52
- git_factory, folder_factory = self._build_factories(repo, session)
53
-
54
- if is_valid_clone_target(uri_or_path_like):
55
- source = await git_factory.create(uri_or_path_like, progress_callback)
56
- elif Path(uri_or_path_like).is_dir():
57
- source = await folder_factory.create(
58
- uri_or_path_like, progress_callback
59
- )
60
- else:
61
- raise ValueError(f"Unsupported source: {uri_or_path_like}")
62
-
63
- # Factories handle their own commits now
64
- return source
65
-
66
- def _build_factories(
67
- self, repository: SourceRepository, session: AsyncSession
68
- ) -> tuple[GitSourceFactory, FolderSourceFactory]:
69
- # Git-specific collaborators
70
- git_wc = GitWorkingCopyProvider(self.clone_dir)
71
- git_factory = GitSourceFactory(
72
- repository=repository,
73
- working_copy=git_wc,
74
- session=session,
75
- )
76
-
77
- # Folder-specific collaborators
78
- fold_wc = FolderWorkingCopyProvider(self.clone_dir)
79
- folder_factory = FolderSourceFactory(
80
- repository=repository,
81
- working_copy=fold_wc,
82
- session=session,
83
- )
84
-
85
- return git_factory, folder_factory
@@ -1 +0,0 @@
1
- """Folder cloning infrastructure."""
@@ -1,128 +0,0 @@
1
- """Factory for creating folder-based working copies."""
2
-
3
- from pathlib import Path
4
-
5
- import structlog
6
- from sqlalchemy.ext.asyncio import AsyncSession
7
-
8
- from kodit.domain.entities import AuthorFileMapping, Source, SourceType
9
- from kodit.domain.interfaces import NullProgressCallback, ProgressCallback
10
- from kodit.domain.repositories import SourceRepository
11
- from kodit.domain.value_objects import ProgressEvent
12
- from kodit.infrastructure.cloning.folder.working_copy import FolderWorkingCopyProvider
13
- from kodit.infrastructure.cloning.metadata import (
14
- FolderFileMetadataExtractor,
15
- NoOpAuthorExtractor,
16
- )
17
-
18
-
19
- class FolderSourceFactory:
20
- """Factory for creating folder sources."""
21
-
22
- def __init__(
23
- self,
24
- repository: SourceRepository,
25
- working_copy: FolderWorkingCopyProvider,
26
- session: AsyncSession,
27
- ) -> None:
28
- """Initialize the source factory."""
29
- self.log = structlog.get_logger(__name__)
30
- self.repository = repository
31
- self.working_copy = working_copy
32
- self.metadata_extractor = FolderFileMetadataExtractor()
33
- self.author_extractor = NoOpAuthorExtractor()
34
- self.session = session
35
-
36
- async def create(
37
- self, uri: str, progress_callback: ProgressCallback | None = None
38
- ) -> Source:
39
- """Create a folder source from a path."""
40
- # Use null callback if none provided
41
- if progress_callback is None:
42
- progress_callback = NullProgressCallback()
43
-
44
- directory = Path(uri).expanduser().resolve()
45
-
46
- # Check if source already exists
47
- source = await self.repository.get_by_uri(directory.as_uri())
48
- if source:
49
- self.log.info("Source already exists, reusing...", source_id=source.id)
50
- return source
51
-
52
- # Validate directory exists
53
- if not directory.exists():
54
- msg = f"Folder does not exist: {directory}"
55
- raise ValueError(msg)
56
-
57
- # Prepare working copy
58
- clone_path = await self.working_copy.prepare(directory.as_uri())
59
-
60
- # Create source record
61
- source = await self.repository.save(
62
- Source(
63
- uri=directory.as_uri(),
64
- cloned_path=str(clone_path),
65
- source_type=SourceType.FOLDER,
66
- )
67
- )
68
-
69
- # Commit source creation so we get an ID for foreign key relationships
70
- await self.session.commit()
71
-
72
- # Get all files to process
73
- files = [f for f in clone_path.rglob("*") if f.is_file()]
74
-
75
- # Process files
76
- await self._process_files(source, files, progress_callback)
77
-
78
- # Commit file processing
79
- await self.session.commit()
80
-
81
- return source
82
-
83
- async def _process_files(
84
- self, source: Source, files: list[Path], progress_callback: ProgressCallback
85
- ) -> None:
86
- """Process files for a source."""
87
- total_files = len(files)
88
-
89
- # Notify start of operation
90
- await progress_callback.on_progress(
91
- ProgressEvent(
92
- operation="process_files",
93
- current=0,
94
- total=total_files,
95
- message="Processing files...",
96
- )
97
- )
98
-
99
- for i, path in enumerate(files, 1):
100
- if not path.is_file():
101
- continue
102
-
103
- # Extract file metadata
104
- file_record = await self.metadata_extractor.extract(path, source)
105
- await self.repository.create_file(file_record)
106
-
107
- # Extract authors
108
- authors = await self.author_extractor.extract(path, source)
109
- for author in authors:
110
- await self.repository.upsert_author_file_mapping(
111
- AuthorFileMapping(
112
- author_id=author.id,
113
- file_id=file_record.id,
114
- )
115
- )
116
-
117
- # Update progress
118
- await progress_callback.on_progress(
119
- ProgressEvent(
120
- operation="process_files",
121
- current=i,
122
- total=total_files,
123
- message=f"Processing {path.name}...",
124
- )
125
- )
126
-
127
- # Notify completion
128
- await progress_callback.on_complete("process_files")
@@ -1,38 +0,0 @@
1
- """Working copy provider for folder-based sources."""
2
-
3
- import shutil
4
- from pathlib import Path
5
-
6
-
7
- class FolderWorkingCopyProvider:
8
- """Working copy provider for folder-based sources."""
9
-
10
- def __init__(self, clone_dir: Path) -> None:
11
- """Initialize the provider."""
12
- self.clone_dir = clone_dir
13
-
14
- async def prepare(self, uri: str) -> Path:
15
- """Prepare a folder working copy."""
16
- # Handle file:// URIs
17
- if uri.startswith("file://"):
18
- from urllib.parse import urlparse
19
-
20
- parsed = urlparse(uri)
21
- directory = Path(parsed.path).expanduser().resolve()
22
- else:
23
- directory = Path(uri).expanduser().resolve()
24
-
25
- # Clone into a local directory
26
- clone_path = self.clone_dir / directory.as_posix().replace("/", "_")
27
- clone_path.mkdir(parents=True, exist_ok=True)
28
-
29
- # Copy all files recursively, preserving directory structure, ignoring
30
- # hidden files
31
- shutil.copytree(
32
- directory,
33
- clone_path,
34
- ignore=shutil.ignore_patterns(".*"),
35
- dirs_exist_ok=True,
36
- )
37
-
38
- return clone_path