kodit 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/application/factories/code_indexing_factory.py +56 -29
- kodit/application/services/code_indexing_application_service.py +152 -118
- kodit/cli.py +14 -41
- kodit/domain/entities.py +268 -197
- kodit/domain/protocols.py +61 -0
- kodit/domain/services/embedding_service.py +1 -1
- kodit/domain/services/index_query_service.py +66 -0
- kodit/domain/services/index_service.py +282 -0
- kodit/domain/value_objects.py +143 -65
- kodit/infrastructure/cloning/git/working_copy.py +17 -8
- kodit/infrastructure/cloning/metadata.py +37 -67
- kodit/infrastructure/embedding/embedding_factory.py +1 -1
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +1 -1
- kodit/infrastructure/enrichment/null_enrichment_provider.py +4 -10
- kodit/infrastructure/git/git_utils.py +1 -63
- kodit/infrastructure/ignore/ignore_pattern_provider.py +1 -2
- kodit/infrastructure/indexing/auto_indexing_service.py +2 -12
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/__init__.py +1 -0
- kodit/infrastructure/mappers/index_mapper.py +344 -0
- kodit/infrastructure/slicing/__init__.py +1 -0
- kodit/infrastructure/slicing/language_detection_service.py +18 -0
- kodit/infrastructure/slicing/slicer.py +894 -0
- kodit/infrastructure/sqlalchemy/embedding_repository.py +1 -1
- kodit/infrastructure/sqlalchemy/entities.py +203 -0
- kodit/infrastructure/sqlalchemy/index_repository.py +579 -0
- kodit/mcp.py +0 -7
- kodit/migrations/env.py +1 -1
- kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +36 -0
- kodit/migrations/versions/4552eb3f23ce_add_summary.py +4 -4
- kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py +24 -16
- kodit/migrations/versions/85155663351e_initial.py +64 -48
- kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py +20 -14
- kodit/utils/__init__.py +1 -0
- kodit/utils/path_utils.py +54 -0
- {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/METADATA +9 -4
- kodit-0.3.4.dist-info/RECORD +89 -0
- kodit/domain/enums.py +0 -9
- kodit/domain/repositories.py +0 -128
- kodit/domain/services/ignore_service.py +0 -45
- kodit/domain/services/indexing_service.py +0 -204
- kodit/domain/services/snippet_extraction_service.py +0 -89
- kodit/domain/services/snippet_service.py +0 -215
- kodit/domain/services/source_service.py +0 -85
- kodit/infrastructure/cloning/folder/__init__.py +0 -1
- kodit/infrastructure/cloning/folder/factory.py +0 -128
- kodit/infrastructure/cloning/folder/working_copy.py +0 -38
- kodit/infrastructure/cloning/git/factory.py +0 -153
- kodit/infrastructure/indexing/index_repository.py +0 -286
- kodit/infrastructure/indexing/snippet_domain_service_factory.py +0 -37
- kodit/infrastructure/snippet_extraction/__init__.py +0 -1
- kodit/infrastructure/snippet_extraction/language_detection_service.py +0 -39
- kodit/infrastructure/snippet_extraction/languages/csharp.scm +0 -12
- kodit/infrastructure/snippet_extraction/languages/go.scm +0 -26
- kodit/infrastructure/snippet_extraction/languages/java.scm +0 -12
- kodit/infrastructure/snippet_extraction/languages/javascript.scm +0 -24
- kodit/infrastructure/snippet_extraction/languages/python.scm +0 -22
- kodit/infrastructure/snippet_extraction/languages/typescript.scm +0 -25
- kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +0 -67
- kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -45
- kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +0 -182
- kodit/infrastructure/sqlalchemy/file_repository.py +0 -78
- kodit/infrastructure/sqlalchemy/repository.py +0 -133
- kodit/infrastructure/sqlalchemy/snippet_repository.py +0 -259
- kodit-0.3.2.dist-info/RECORD +0 -103
- {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/WHEEL +0 -0
- {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/entry_points.txt +0 -0
- {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,128 +0,0 @@
|
|
|
1
|
-
"""Factory for creating folder-based working copies."""
|
|
2
|
-
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
|
|
5
|
-
import structlog
|
|
6
|
-
from sqlalchemy.ext.asyncio import AsyncSession
|
|
7
|
-
|
|
8
|
-
from kodit.domain.entities import AuthorFileMapping, Source, SourceType
|
|
9
|
-
from kodit.domain.interfaces import NullProgressCallback, ProgressCallback
|
|
10
|
-
from kodit.domain.repositories import SourceRepository
|
|
11
|
-
from kodit.domain.value_objects import ProgressEvent
|
|
12
|
-
from kodit.infrastructure.cloning.folder.working_copy import FolderWorkingCopyProvider
|
|
13
|
-
from kodit.infrastructure.cloning.metadata import (
|
|
14
|
-
FolderFileMetadataExtractor,
|
|
15
|
-
NoOpAuthorExtractor,
|
|
16
|
-
)
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class FolderSourceFactory:
|
|
20
|
-
"""Factory for creating folder sources."""
|
|
21
|
-
|
|
22
|
-
def __init__(
|
|
23
|
-
self,
|
|
24
|
-
repository: SourceRepository,
|
|
25
|
-
working_copy: FolderWorkingCopyProvider,
|
|
26
|
-
session: AsyncSession,
|
|
27
|
-
) -> None:
|
|
28
|
-
"""Initialize the source factory."""
|
|
29
|
-
self.log = structlog.get_logger(__name__)
|
|
30
|
-
self.repository = repository
|
|
31
|
-
self.working_copy = working_copy
|
|
32
|
-
self.metadata_extractor = FolderFileMetadataExtractor()
|
|
33
|
-
self.author_extractor = NoOpAuthorExtractor()
|
|
34
|
-
self.session = session
|
|
35
|
-
|
|
36
|
-
async def create(
|
|
37
|
-
self, uri: str, progress_callback: ProgressCallback | None = None
|
|
38
|
-
) -> Source:
|
|
39
|
-
"""Create a folder source from a path."""
|
|
40
|
-
# Use null callback if none provided
|
|
41
|
-
if progress_callback is None:
|
|
42
|
-
progress_callback = NullProgressCallback()
|
|
43
|
-
|
|
44
|
-
directory = Path(uri).expanduser().resolve()
|
|
45
|
-
|
|
46
|
-
# Check if source already exists
|
|
47
|
-
source = await self.repository.get_by_uri(directory.as_uri())
|
|
48
|
-
if source:
|
|
49
|
-
self.log.info("Source already exists, reusing...", source_id=source.id)
|
|
50
|
-
return source
|
|
51
|
-
|
|
52
|
-
# Validate directory exists
|
|
53
|
-
if not directory.exists():
|
|
54
|
-
msg = f"Folder does not exist: {directory}"
|
|
55
|
-
raise ValueError(msg)
|
|
56
|
-
|
|
57
|
-
# Prepare working copy
|
|
58
|
-
clone_path = await self.working_copy.prepare(directory.as_uri())
|
|
59
|
-
|
|
60
|
-
# Create source record
|
|
61
|
-
source = await self.repository.save(
|
|
62
|
-
Source(
|
|
63
|
-
uri=directory.as_uri(),
|
|
64
|
-
cloned_path=str(clone_path),
|
|
65
|
-
source_type=SourceType.FOLDER,
|
|
66
|
-
)
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
# Commit source creation so we get an ID for foreign key relationships
|
|
70
|
-
await self.session.commit()
|
|
71
|
-
|
|
72
|
-
# Get all files to process
|
|
73
|
-
files = [f for f in clone_path.rglob("*") if f.is_file()]
|
|
74
|
-
|
|
75
|
-
# Process files
|
|
76
|
-
await self._process_files(source, files, progress_callback)
|
|
77
|
-
|
|
78
|
-
# Commit file processing
|
|
79
|
-
await self.session.commit()
|
|
80
|
-
|
|
81
|
-
return source
|
|
82
|
-
|
|
83
|
-
async def _process_files(
|
|
84
|
-
self, source: Source, files: list[Path], progress_callback: ProgressCallback
|
|
85
|
-
) -> None:
|
|
86
|
-
"""Process files for a source."""
|
|
87
|
-
total_files = len(files)
|
|
88
|
-
|
|
89
|
-
# Notify start of operation
|
|
90
|
-
await progress_callback.on_progress(
|
|
91
|
-
ProgressEvent(
|
|
92
|
-
operation="process_files",
|
|
93
|
-
current=0,
|
|
94
|
-
total=total_files,
|
|
95
|
-
message="Processing files...",
|
|
96
|
-
)
|
|
97
|
-
)
|
|
98
|
-
|
|
99
|
-
for i, path in enumerate(files, 1):
|
|
100
|
-
if not path.is_file():
|
|
101
|
-
continue
|
|
102
|
-
|
|
103
|
-
# Extract file metadata
|
|
104
|
-
file_record = await self.metadata_extractor.extract(path, source)
|
|
105
|
-
await self.repository.create_file(file_record)
|
|
106
|
-
|
|
107
|
-
# Extract authors
|
|
108
|
-
authors = await self.author_extractor.extract(path, source)
|
|
109
|
-
for author in authors:
|
|
110
|
-
await self.repository.upsert_author_file_mapping(
|
|
111
|
-
AuthorFileMapping(
|
|
112
|
-
author_id=author.id,
|
|
113
|
-
file_id=file_record.id,
|
|
114
|
-
)
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
# Update progress
|
|
118
|
-
await progress_callback.on_progress(
|
|
119
|
-
ProgressEvent(
|
|
120
|
-
operation="process_files",
|
|
121
|
-
current=i,
|
|
122
|
-
total=total_files,
|
|
123
|
-
message=f"Processing {path.name}...",
|
|
124
|
-
)
|
|
125
|
-
)
|
|
126
|
-
|
|
127
|
-
# Notify completion
|
|
128
|
-
await progress_callback.on_complete("process_files")
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
"""Working copy provider for folder-based sources."""
|
|
2
|
-
|
|
3
|
-
import shutil
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class FolderWorkingCopyProvider:
|
|
8
|
-
"""Working copy provider for folder-based sources."""
|
|
9
|
-
|
|
10
|
-
def __init__(self, clone_dir: Path) -> None:
|
|
11
|
-
"""Initialize the provider."""
|
|
12
|
-
self.clone_dir = clone_dir
|
|
13
|
-
|
|
14
|
-
async def prepare(self, uri: str) -> Path:
|
|
15
|
-
"""Prepare a folder working copy."""
|
|
16
|
-
# Handle file:// URIs
|
|
17
|
-
if uri.startswith("file://"):
|
|
18
|
-
from urllib.parse import urlparse
|
|
19
|
-
|
|
20
|
-
parsed = urlparse(uri)
|
|
21
|
-
directory = Path(parsed.path).expanduser().resolve()
|
|
22
|
-
else:
|
|
23
|
-
directory = Path(uri).expanduser().resolve()
|
|
24
|
-
|
|
25
|
-
# Clone into a local directory
|
|
26
|
-
clone_path = self.clone_dir / directory.as_posix().replace("/", "_")
|
|
27
|
-
clone_path.mkdir(parents=True, exist_ok=True)
|
|
28
|
-
|
|
29
|
-
# Copy all files recursively, preserving directory structure, ignoring
|
|
30
|
-
# hidden files
|
|
31
|
-
shutil.copytree(
|
|
32
|
-
directory,
|
|
33
|
-
clone_path,
|
|
34
|
-
ignore=shutil.ignore_patterns(".*"),
|
|
35
|
-
dirs_exist_ok=True,
|
|
36
|
-
)
|
|
37
|
-
|
|
38
|
-
return clone_path
|
|
@@ -1,153 +0,0 @@
|
|
|
1
|
-
"""Factory for creating git-based working copies."""
|
|
2
|
-
|
|
3
|
-
import tempfile
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
|
|
6
|
-
import git
|
|
7
|
-
import structlog
|
|
8
|
-
from sqlalchemy.ext.asyncio import AsyncSession
|
|
9
|
-
|
|
10
|
-
from kodit.domain.entities import AuthorFileMapping, Source, SourceType
|
|
11
|
-
from kodit.domain.interfaces import NullProgressCallback, ProgressCallback
|
|
12
|
-
from kodit.domain.repositories import SourceRepository
|
|
13
|
-
from kodit.domain.services.ignore_service import IgnoreService
|
|
14
|
-
from kodit.domain.value_objects import ProgressEvent
|
|
15
|
-
from kodit.infrastructure.cloning.git.working_copy import GitWorkingCopyProvider
|
|
16
|
-
from kodit.infrastructure.cloning.metadata import (
|
|
17
|
-
GitAuthorExtractor,
|
|
18
|
-
GitFileMetadataExtractor,
|
|
19
|
-
)
|
|
20
|
-
from kodit.infrastructure.git.git_utils import sanitize_git_url
|
|
21
|
-
from kodit.infrastructure.ignore.ignore_pattern_provider import GitIgnorePatternProvider
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class GitSourceFactory:
|
|
25
|
-
"""Factory for creating git-based working copies."""
|
|
26
|
-
|
|
27
|
-
def __init__(
|
|
28
|
-
self,
|
|
29
|
-
repository: SourceRepository,
|
|
30
|
-
working_copy: GitWorkingCopyProvider,
|
|
31
|
-
session: AsyncSession,
|
|
32
|
-
) -> None:
|
|
33
|
-
"""Initialize the source factory."""
|
|
34
|
-
self.log = structlog.get_logger(__name__)
|
|
35
|
-
self.repository = repository
|
|
36
|
-
self.working_copy = working_copy
|
|
37
|
-
self.metadata_extractor = GitFileMetadataExtractor()
|
|
38
|
-
self.author_extractor = GitAuthorExtractor(repository)
|
|
39
|
-
self.session = session
|
|
40
|
-
|
|
41
|
-
async def create(
|
|
42
|
-
self, uri: str, progress_callback: ProgressCallback | None = None
|
|
43
|
-
) -> Source:
|
|
44
|
-
"""Create a git source from a URI."""
|
|
45
|
-
# Use null callback if none provided
|
|
46
|
-
if progress_callback is None:
|
|
47
|
-
progress_callback = NullProgressCallback()
|
|
48
|
-
|
|
49
|
-
# Normalize the URI
|
|
50
|
-
# Never log the raw URI in production
|
|
51
|
-
self.log.debug("Normalising git uri", uri="[REDACTED]" + uri[-4:])
|
|
52
|
-
with tempfile.TemporaryDirectory() as temp_dir:
|
|
53
|
-
git.Repo.clone_from(uri, temp_dir)
|
|
54
|
-
remote = git.Repo(temp_dir).remote()
|
|
55
|
-
uri = remote.url
|
|
56
|
-
|
|
57
|
-
# Sanitize the URI to remove any credentials
|
|
58
|
-
sanitized_uri = sanitize_git_url(uri)
|
|
59
|
-
self.log.debug("Sanitized git uri", sanitized_uri=sanitized_uri)
|
|
60
|
-
|
|
61
|
-
# Check if source already exists
|
|
62
|
-
self.log.debug("Checking if source already exists", uri=sanitized_uri)
|
|
63
|
-
source = await self.repository.get_by_uri(sanitized_uri)
|
|
64
|
-
|
|
65
|
-
if source:
|
|
66
|
-
self.log.info("Source already exists, reusing...", source_id=source.id)
|
|
67
|
-
return source
|
|
68
|
-
|
|
69
|
-
# Prepare working copy (use original URI for cloning, sanitized for storage)
|
|
70
|
-
clone_path = await self.working_copy.prepare(uri)
|
|
71
|
-
|
|
72
|
-
# Create source record
|
|
73
|
-
self.log.debug("Creating source", uri=sanitized_uri, clone_path=str(clone_path))
|
|
74
|
-
source = await self.repository.save(
|
|
75
|
-
Source(
|
|
76
|
-
uri=sanitized_uri,
|
|
77
|
-
cloned_path=str(clone_path),
|
|
78
|
-
source_type=SourceType.GIT,
|
|
79
|
-
)
|
|
80
|
-
)
|
|
81
|
-
|
|
82
|
-
# Commit source creation so we get an ID for foreign key relationships
|
|
83
|
-
await self.session.commit()
|
|
84
|
-
|
|
85
|
-
# Get files to process using ignore patterns
|
|
86
|
-
ignore_provider = GitIgnorePatternProvider(clone_path)
|
|
87
|
-
ignore_service = IgnoreService(ignore_provider)
|
|
88
|
-
files = [
|
|
89
|
-
f
|
|
90
|
-
for f in clone_path.rglob("*")
|
|
91
|
-
if f.is_file() and not ignore_service.should_ignore(f)
|
|
92
|
-
]
|
|
93
|
-
|
|
94
|
-
# Process files
|
|
95
|
-
self.log.info("Inspecting files", source_id=source.id, num_files=len(files))
|
|
96
|
-
await self._process_files(source, files, progress_callback)
|
|
97
|
-
|
|
98
|
-
# Commit file processing
|
|
99
|
-
await self.session.commit()
|
|
100
|
-
|
|
101
|
-
return source
|
|
102
|
-
|
|
103
|
-
async def _process_files(
|
|
104
|
-
self, source: Source, files: list[Path], progress_callback: ProgressCallback
|
|
105
|
-
) -> None:
|
|
106
|
-
"""Process files for a source."""
|
|
107
|
-
total_files = len(files)
|
|
108
|
-
|
|
109
|
-
# Notify start of operation
|
|
110
|
-
await progress_callback.on_progress(
|
|
111
|
-
ProgressEvent(
|
|
112
|
-
operation="process_files",
|
|
113
|
-
current=0,
|
|
114
|
-
total=total_files,
|
|
115
|
-
message="Processing files...",
|
|
116
|
-
)
|
|
117
|
-
)
|
|
118
|
-
|
|
119
|
-
for i, path in enumerate(files, 1):
|
|
120
|
-
if not path.is_file():
|
|
121
|
-
continue
|
|
122
|
-
|
|
123
|
-
# Extract file metadata
|
|
124
|
-
file_record = await self.metadata_extractor.extract(path, source)
|
|
125
|
-
await self.repository.create_file(file_record)
|
|
126
|
-
|
|
127
|
-
# Extract authors
|
|
128
|
-
authors = await self.author_extractor.extract(path, source)
|
|
129
|
-
|
|
130
|
-
# Commit authors so they get IDs before creating mappings
|
|
131
|
-
if authors:
|
|
132
|
-
await self.session.commit()
|
|
133
|
-
|
|
134
|
-
for author in authors:
|
|
135
|
-
await self.repository.upsert_author_file_mapping(
|
|
136
|
-
AuthorFileMapping(
|
|
137
|
-
author_id=author.id,
|
|
138
|
-
file_id=file_record.id,
|
|
139
|
-
)
|
|
140
|
-
)
|
|
141
|
-
|
|
142
|
-
# Update progress
|
|
143
|
-
await progress_callback.on_progress(
|
|
144
|
-
ProgressEvent(
|
|
145
|
-
operation="process_files",
|
|
146
|
-
current=i,
|
|
147
|
-
total=total_files,
|
|
148
|
-
message=f"Processing {path.name}...",
|
|
149
|
-
)
|
|
150
|
-
)
|
|
151
|
-
|
|
152
|
-
# Notify completion
|
|
153
|
-
await progress_callback.on_complete("process_files")
|
|
@@ -1,286 +0,0 @@
|
|
|
1
|
-
"""Infrastructure implementation of the index repository."""
|
|
2
|
-
|
|
3
|
-
from datetime import UTC, datetime
|
|
4
|
-
from typing import TypeVar
|
|
5
|
-
|
|
6
|
-
from sqlalchemy import delete, func, select
|
|
7
|
-
from sqlalchemy.ext.asyncio import AsyncSession
|
|
8
|
-
|
|
9
|
-
from kodit.domain.entities import (
|
|
10
|
-
Author,
|
|
11
|
-
AuthorFileMapping,
|
|
12
|
-
Embedding,
|
|
13
|
-
File,
|
|
14
|
-
Index,
|
|
15
|
-
Snippet,
|
|
16
|
-
Source,
|
|
17
|
-
)
|
|
18
|
-
from kodit.domain.services.indexing_service import IndexRepository
|
|
19
|
-
from kodit.domain.value_objects import (
|
|
20
|
-
IndexView,
|
|
21
|
-
SnippetWithContext,
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
T = TypeVar("T")
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class SQLAlchemyIndexRepository(IndexRepository):
|
|
28
|
-
"""SQLAlchemy implementation of the index repository."""
|
|
29
|
-
|
|
30
|
-
def __init__(self, session: AsyncSession) -> None:
|
|
31
|
-
"""Initialize the index repository.
|
|
32
|
-
|
|
33
|
-
Args:
|
|
34
|
-
session: The SQLAlchemy async session to use for database operations.
|
|
35
|
-
|
|
36
|
-
"""
|
|
37
|
-
self.session = session
|
|
38
|
-
|
|
39
|
-
async def create_index(self, source_id: int) -> IndexView:
|
|
40
|
-
"""Create a new index for a source.
|
|
41
|
-
|
|
42
|
-
Args:
|
|
43
|
-
source_id: The ID of the source to create an index for.
|
|
44
|
-
|
|
45
|
-
Returns:
|
|
46
|
-
The created index view.
|
|
47
|
-
|
|
48
|
-
"""
|
|
49
|
-
# Check if index already exists
|
|
50
|
-
existing_index = await self.get_index_by_source_id(source_id)
|
|
51
|
-
if existing_index:
|
|
52
|
-
return existing_index
|
|
53
|
-
|
|
54
|
-
index = Index(source_id=source_id)
|
|
55
|
-
self.session.add(index)
|
|
56
|
-
|
|
57
|
-
# Get source for the view
|
|
58
|
-
source_query = select(Source).where(Source.id == source_id)
|
|
59
|
-
source_result = await self.session.execute(source_query)
|
|
60
|
-
source = source_result.scalar_one()
|
|
61
|
-
|
|
62
|
-
return IndexView(
|
|
63
|
-
id=index.id,
|
|
64
|
-
created_at=index.created_at,
|
|
65
|
-
updated_at=index.updated_at,
|
|
66
|
-
source=source.uri,
|
|
67
|
-
num_snippets=0,
|
|
68
|
-
)
|
|
69
|
-
|
|
70
|
-
async def _get_index_view(self, index: Index, source: Source) -> IndexView:
|
|
71
|
-
"""Create an IndexView from Index and Source entities.
|
|
72
|
-
|
|
73
|
-
Args:
|
|
74
|
-
index: The index entity
|
|
75
|
-
source: The source entity
|
|
76
|
-
|
|
77
|
-
Returns:
|
|
78
|
-
The index view
|
|
79
|
-
|
|
80
|
-
"""
|
|
81
|
-
num_snippets = await self.num_snippets_for_index(index.id)
|
|
82
|
-
return IndexView(
|
|
83
|
-
id=index.id,
|
|
84
|
-
created_at=index.created_at,
|
|
85
|
-
updated_at=index.updated_at,
|
|
86
|
-
source=source.uri,
|
|
87
|
-
num_snippets=num_snippets,
|
|
88
|
-
)
|
|
89
|
-
|
|
90
|
-
async def get_index_by_id(self, index_id: int) -> IndexView | None:
|
|
91
|
-
"""Get an index by its ID.
|
|
92
|
-
|
|
93
|
-
Args:
|
|
94
|
-
index_id: The ID of the index to retrieve.
|
|
95
|
-
|
|
96
|
-
Returns:
|
|
97
|
-
The index view if found, None otherwise.
|
|
98
|
-
|
|
99
|
-
"""
|
|
100
|
-
query = (
|
|
101
|
-
select(Index, Source)
|
|
102
|
-
.join(Source, Index.source_id == Source.id)
|
|
103
|
-
.where(Index.id == index_id)
|
|
104
|
-
)
|
|
105
|
-
result = await self.session.execute(query)
|
|
106
|
-
row = result.first()
|
|
107
|
-
|
|
108
|
-
if not row:
|
|
109
|
-
return None
|
|
110
|
-
|
|
111
|
-
index, source = row
|
|
112
|
-
return await self._get_index_view(index, source)
|
|
113
|
-
|
|
114
|
-
async def get_index_by_source_id(self, source_id: int) -> IndexView | None:
|
|
115
|
-
"""Get an index by its source ID.
|
|
116
|
-
|
|
117
|
-
Args:
|
|
118
|
-
source_id: The ID of the source to retrieve an index for.
|
|
119
|
-
|
|
120
|
-
Returns:
|
|
121
|
-
The index view if found, None otherwise.
|
|
122
|
-
|
|
123
|
-
"""
|
|
124
|
-
query = (
|
|
125
|
-
select(Index, Source)
|
|
126
|
-
.join(Source, Index.source_id == Source.id)
|
|
127
|
-
.where(Index.source_id == source_id)
|
|
128
|
-
)
|
|
129
|
-
result = await self.session.execute(query)
|
|
130
|
-
row = result.first()
|
|
131
|
-
|
|
132
|
-
if not row:
|
|
133
|
-
return None
|
|
134
|
-
|
|
135
|
-
index, source = row
|
|
136
|
-
return await self._get_index_view(index, source)
|
|
137
|
-
|
|
138
|
-
async def list_indexes(self) -> list[IndexView]:
|
|
139
|
-
"""List all indexes.
|
|
140
|
-
|
|
141
|
-
Returns:
|
|
142
|
-
A list of index views.
|
|
143
|
-
|
|
144
|
-
"""
|
|
145
|
-
query = select(Index, Source).join(
|
|
146
|
-
Source, Index.source_id == Source.id, full=True
|
|
147
|
-
)
|
|
148
|
-
result = await self.session.execute(query)
|
|
149
|
-
rows = result.tuples()
|
|
150
|
-
|
|
151
|
-
indexes = []
|
|
152
|
-
for index, source in rows:
|
|
153
|
-
index_view = await self._get_index_view(index, source)
|
|
154
|
-
indexes.append(index_view)
|
|
155
|
-
|
|
156
|
-
return indexes
|
|
157
|
-
|
|
158
|
-
async def update_index_timestamp(self, index_id: int) -> None:
|
|
159
|
-
"""Update the timestamp of an index.
|
|
160
|
-
|
|
161
|
-
Args:
|
|
162
|
-
index_id: The ID of the index to update.
|
|
163
|
-
|
|
164
|
-
"""
|
|
165
|
-
query = select(Index).where(Index.id == index_id)
|
|
166
|
-
result = await self.session.execute(query)
|
|
167
|
-
index = result.scalar_one_or_none()
|
|
168
|
-
|
|
169
|
-
if index:
|
|
170
|
-
index.updated_at = datetime.now(UTC)
|
|
171
|
-
|
|
172
|
-
async def delete_all_snippets(self, index_id: int) -> None:
|
|
173
|
-
"""Delete all snippets for an index.
|
|
174
|
-
|
|
175
|
-
Args:
|
|
176
|
-
index_id: The ID of the index to delete snippets for.
|
|
177
|
-
|
|
178
|
-
"""
|
|
179
|
-
# First get all snippets for this index
|
|
180
|
-
snippets = await self.get_snippets_for_index(index_id)
|
|
181
|
-
|
|
182
|
-
# Delete all embeddings for these snippets, if there are any
|
|
183
|
-
for snippet in snippets:
|
|
184
|
-
query = delete(Embedding).where(Embedding.snippet_id == snippet.id)
|
|
185
|
-
await self.session.execute(query)
|
|
186
|
-
|
|
187
|
-
# Now delete the snippets
|
|
188
|
-
query = delete(Snippet).where(Snippet.index_id == index_id)
|
|
189
|
-
await self.session.execute(query)
|
|
190
|
-
|
|
191
|
-
async def get_snippets_for_index(self, index_id: int) -> list[Snippet]:
|
|
192
|
-
"""Get all snippets for an index.
|
|
193
|
-
|
|
194
|
-
Args:
|
|
195
|
-
index_id: The ID of the index to get snippets for.
|
|
196
|
-
|
|
197
|
-
Returns:
|
|
198
|
-
A list of Snippet entities.
|
|
199
|
-
|
|
200
|
-
"""
|
|
201
|
-
query = select(Snippet).where(Snippet.index_id == index_id)
|
|
202
|
-
result = await self.session.execute(query)
|
|
203
|
-
return list(result.scalars())
|
|
204
|
-
|
|
205
|
-
async def add_snippet(self, snippet: dict) -> None:
|
|
206
|
-
"""Add a snippet to the database.
|
|
207
|
-
|
|
208
|
-
Args:
|
|
209
|
-
snippet: The snippet to add.
|
|
210
|
-
|
|
211
|
-
"""
|
|
212
|
-
db_snippet = Snippet(
|
|
213
|
-
file_id=snippet["file_id"],
|
|
214
|
-
index_id=snippet["index_id"],
|
|
215
|
-
content=snippet["content"],
|
|
216
|
-
summary=snippet.get("summary", ""),
|
|
217
|
-
)
|
|
218
|
-
self.session.add(db_snippet)
|
|
219
|
-
|
|
220
|
-
async def update_snippet_content(self, snippet_id: int, content: str) -> None:
|
|
221
|
-
"""Update the content of an existing snippet.
|
|
222
|
-
|
|
223
|
-
Args:
|
|
224
|
-
snippet_id: The ID of the snippet to update.
|
|
225
|
-
content: The new content for the snippet.
|
|
226
|
-
|
|
227
|
-
"""
|
|
228
|
-
query = select(Snippet).where(Snippet.id == snippet_id)
|
|
229
|
-
result = await self.session.execute(query)
|
|
230
|
-
snippet = result.scalar_one_or_none()
|
|
231
|
-
|
|
232
|
-
if snippet:
|
|
233
|
-
snippet.content = content
|
|
234
|
-
# SQLAlchemy will automatically track this change
|
|
235
|
-
|
|
236
|
-
async def list_snippets_by_ids(self, ids: list[int]) -> list[SnippetWithContext]:
|
|
237
|
-
"""List snippets by IDs."""
|
|
238
|
-
query = (
|
|
239
|
-
select(Snippet, File, Source, Author)
|
|
240
|
-
.where(Snippet.id.in_(ids))
|
|
241
|
-
.join(File, Snippet.file_id == File.id)
|
|
242
|
-
.join(Source, File.source_id == Source.id)
|
|
243
|
-
.outerjoin(AuthorFileMapping, AuthorFileMapping.file_id == File.id)
|
|
244
|
-
.outerjoin(Author, AuthorFileMapping.author_id == Author.id)
|
|
245
|
-
)
|
|
246
|
-
rows = await self.session.execute(query)
|
|
247
|
-
|
|
248
|
-
# Group results by snippet ID and collect authors
|
|
249
|
-
id_to_result: dict[int, SnippetWithContext] = {}
|
|
250
|
-
for snippet, file, source, author in rows.all():
|
|
251
|
-
if snippet.id not in id_to_result:
|
|
252
|
-
id_to_result[snippet.id] = SnippetWithContext(
|
|
253
|
-
snippet=snippet,
|
|
254
|
-
file=file,
|
|
255
|
-
source=source,
|
|
256
|
-
authors=[],
|
|
257
|
-
)
|
|
258
|
-
# Add author if it exists (outer join might return None)
|
|
259
|
-
if author is not None:
|
|
260
|
-
id_to_result[snippet.id].authors.append(author)
|
|
261
|
-
|
|
262
|
-
# Check that all IDs are present
|
|
263
|
-
if len(id_to_result) != len(ids):
|
|
264
|
-
# Create a list of missing IDs
|
|
265
|
-
missing_ids = [
|
|
266
|
-
snippet_id for snippet_id in ids if snippet_id not in id_to_result
|
|
267
|
-
]
|
|
268
|
-
msg = f"Some IDs are not present: {missing_ids}"
|
|
269
|
-
raise ValueError(msg)
|
|
270
|
-
|
|
271
|
-
# Rebuild the list in the same order that it was passed in
|
|
272
|
-
return [id_to_result[i] for i in ids]
|
|
273
|
-
|
|
274
|
-
async def num_snippets_for_index(self, index_id: int) -> int:
|
|
275
|
-
"""Get the number of snippets for an index.
|
|
276
|
-
|
|
277
|
-
Args:
|
|
278
|
-
index_id: The ID of the index.
|
|
279
|
-
|
|
280
|
-
Returns:
|
|
281
|
-
The number of snippets.
|
|
282
|
-
|
|
283
|
-
"""
|
|
284
|
-
query = select(func.count()).where(Snippet.index_id == index_id)
|
|
285
|
-
result = await self.session.execute(query)
|
|
286
|
-
return result.scalar_one()
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
"""Factory for creating snippet domain service."""
|
|
2
|
-
|
|
3
|
-
from sqlalchemy.ext.asyncio import AsyncSession
|
|
4
|
-
|
|
5
|
-
from kodit.domain.services.snippet_service import SnippetDomainService
|
|
6
|
-
from kodit.infrastructure.snippet_extraction.snippet_extraction_factory import (
|
|
7
|
-
create_snippet_extraction_domain_service,
|
|
8
|
-
)
|
|
9
|
-
from kodit.infrastructure.sqlalchemy.file_repository import SqlAlchemyFileRepository
|
|
10
|
-
from kodit.infrastructure.sqlalchemy.snippet_repository import (
|
|
11
|
-
SqlAlchemySnippetRepository,
|
|
12
|
-
)
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def snippet_domain_service_factory(session: AsyncSession) -> SnippetDomainService:
|
|
16
|
-
"""Create a snippet domain service with all dependencies.
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
session: The database session
|
|
20
|
-
|
|
21
|
-
Returns:
|
|
22
|
-
Configured snippet domain service
|
|
23
|
-
|
|
24
|
-
"""
|
|
25
|
-
# Create domain service for snippet extraction
|
|
26
|
-
snippet_extraction_service = create_snippet_extraction_domain_service()
|
|
27
|
-
|
|
28
|
-
# Create repositories
|
|
29
|
-
snippet_repository = SqlAlchemySnippetRepository(session)
|
|
30
|
-
file_repository = SqlAlchemyFileRepository(session)
|
|
31
|
-
|
|
32
|
-
# Create and return the domain service
|
|
33
|
-
return SnippetDomainService(
|
|
34
|
-
snippet_extraction_service=snippet_extraction_service,
|
|
35
|
-
snippet_repository=snippet_repository,
|
|
36
|
-
file_repository=file_repository,
|
|
37
|
-
)
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
"""Infrastructure services for snippet extraction."""
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
"""Infrastructure implementation for language detection."""
|
|
2
|
-
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
|
|
5
|
-
from kodit.domain.services.snippet_extraction_service import LanguageDetectionService
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class FileSystemLanguageDetectionService(LanguageDetectionService):
|
|
9
|
-
"""Infrastructure implementation for language detection."""
|
|
10
|
-
|
|
11
|
-
def __init__(self, language_map: dict[str, str]) -> None:
|
|
12
|
-
"""Initialize the language detection service.
|
|
13
|
-
|
|
14
|
-
Args:
|
|
15
|
-
language_map: Mapping of file extensions to programming languages
|
|
16
|
-
|
|
17
|
-
"""
|
|
18
|
-
self.language_map = language_map
|
|
19
|
-
|
|
20
|
-
async def detect_language(self, file_path: Path) -> str:
|
|
21
|
-
"""Detect language based on file extension.
|
|
22
|
-
|
|
23
|
-
Args:
|
|
24
|
-
file_path: Path to the file to detect language for
|
|
25
|
-
|
|
26
|
-
Returns:
|
|
27
|
-
The detected programming language
|
|
28
|
-
|
|
29
|
-
Raises:
|
|
30
|
-
ValueError: If the language is not supported
|
|
31
|
-
|
|
32
|
-
"""
|
|
33
|
-
suffix = file_path.suffix.removeprefix(".").lower()
|
|
34
|
-
language = self.language_map.get(suffix)
|
|
35
|
-
|
|
36
|
-
if language is None:
|
|
37
|
-
raise ValueError(f"Unsupported language for file suffix: {suffix}")
|
|
38
|
-
|
|
39
|
-
return language
|