kodit 0.0.1__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/alembic/README +1 -0
- kodit/alembic/__init__.py +1 -0
- kodit/alembic/env.py +83 -0
- kodit/alembic/script.py.mako +30 -0
- kodit/alembic/versions/85155663351e_initial.py +82 -0
- kodit/alembic/versions/__init__.py +1 -0
- kodit/cli.py +122 -0
- kodit/database.py +85 -0
- kodit/indexing/__init__.py +1 -0
- kodit/indexing/models.py +43 -0
- kodit/indexing/repository.py +132 -0
- kodit/indexing/service.py +153 -0
- kodit/logging.py +9 -0
- kodit/retreival/__init__.py +1 -0
- kodit/retreival/repository.py +76 -0
- kodit/retreival/service.py +30 -0
- kodit/sources/__init__.py +1 -0
- kodit/sources/models.py +71 -0
- kodit/sources/repository.py +110 -0
- kodit/sources/service.py +208 -0
- {kodit-0.0.1.dist-info → kodit-0.1.1.dist-info}/METADATA +47 -1
- kodit-0.1.1.dist-info/RECORD +32 -0
- kodit/mcp_test.py +0 -66
- kodit-0.0.1.dist-info/RECORD +0 -15
- {kodit-0.0.1.dist-info → kodit-0.1.1.dist-info}/WHEEL +0 -0
- {kodit-0.0.1.dist-info → kodit-0.1.1.dist-info}/entry_points.txt +0 -0
- {kodit-0.0.1.dist-info → kodit-0.1.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""Index service for managing code indexes.
|
|
2
|
+
|
|
3
|
+
This module provides the IndexService class which handles the business logic for
|
|
4
|
+
creating, listing, and running code indexes. It orchestrates the interaction between the
|
|
5
|
+
file system, database operations (via IndexRepository), and provides a clean API for
|
|
6
|
+
index management.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
|
|
11
|
+
import aiofiles
|
|
12
|
+
import pydantic
|
|
13
|
+
import structlog
|
|
14
|
+
from tqdm.asyncio import tqdm
|
|
15
|
+
|
|
16
|
+
from kodit.indexing.models import Snippet
|
|
17
|
+
from kodit.indexing.repository import IndexRepository
|
|
18
|
+
from kodit.sources.service import SourceService
|
|
19
|
+
|
|
20
|
+
# List of MIME types that are supported for indexing and snippet creation
|
|
21
|
+
MIME_WHITELIST = [
|
|
22
|
+
"text/plain",
|
|
23
|
+
"text/markdown",
|
|
24
|
+
"text/x-python",
|
|
25
|
+
"text/x-shellscript",
|
|
26
|
+
"text/x-sql",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class IndexView(pydantic.BaseModel):
|
|
31
|
+
"""Data transfer object for index information.
|
|
32
|
+
|
|
33
|
+
This model represents the public interface for index data, providing a clean
|
|
34
|
+
view of index information without exposing internal implementation details.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
id: int
|
|
38
|
+
created_at: datetime
|
|
39
|
+
updated_at: datetime | None = None
|
|
40
|
+
source_uri: str | None = None
|
|
41
|
+
num_snippets: int | None = None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class IndexService:
|
|
45
|
+
"""Service for managing code indexes.
|
|
46
|
+
|
|
47
|
+
This service handles the business logic for creating, listing, and running code
|
|
48
|
+
indexes. It coordinates between file system operations, database operations (via
|
|
49
|
+
IndexRepository), and provides a clean API for index management.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self, repository: IndexRepository, source_service: SourceService
|
|
54
|
+
) -> None:
|
|
55
|
+
"""Initialize the index service.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
repository: The repository instance to use for database operations.
|
|
59
|
+
source_service: The source service instance to use for source validation.
|
|
60
|
+
|
|
61
|
+
"""
|
|
62
|
+
self.repository = repository
|
|
63
|
+
self.source_service = source_service
|
|
64
|
+
self.log = structlog.get_logger(__name__)
|
|
65
|
+
|
|
66
|
+
async def create(self, source_id: int) -> IndexView:
|
|
67
|
+
"""Create a new index for a source.
|
|
68
|
+
|
|
69
|
+
This method creates a new index for the specified source, after validating
|
|
70
|
+
that the source exists and doesn't already have an index.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
source_id: The ID of the source to create an index for.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
An Index object representing the newly created index.
|
|
77
|
+
|
|
78
|
+
Raises:
|
|
79
|
+
ValueError: If the source doesn't exist or already has an index.
|
|
80
|
+
|
|
81
|
+
"""
|
|
82
|
+
# Check if the source exists
|
|
83
|
+
source = await self.source_service.get(source_id)
|
|
84
|
+
|
|
85
|
+
index = await self.repository.create(source.id)
|
|
86
|
+
return IndexView(
|
|
87
|
+
id=index.id,
|
|
88
|
+
created_at=index.created_at,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
async def list_indexes(self) -> list[IndexView]:
|
|
92
|
+
"""List all available indexes with their details.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
A list of Index objects containing information about each index,
|
|
96
|
+
including file and snippet counts.
|
|
97
|
+
|
|
98
|
+
"""
|
|
99
|
+
indexes = await self.repository.list_indexes()
|
|
100
|
+
|
|
101
|
+
# Transform database results into DTOs
|
|
102
|
+
return [
|
|
103
|
+
IndexView(
|
|
104
|
+
id=index.id,
|
|
105
|
+
created_at=index.created_at,
|
|
106
|
+
updated_at=index.updated_at,
|
|
107
|
+
num_snippets=await self.repository.num_snippets_for_index(index.id),
|
|
108
|
+
)
|
|
109
|
+
for index in indexes
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
async def run(self, index_id: int) -> None:
|
|
113
|
+
"""Run the indexing process for a specific index."""
|
|
114
|
+
# Get and validate index
|
|
115
|
+
index = await self.repository.get_by_id(index_id)
|
|
116
|
+
if not index:
|
|
117
|
+
msg = f"Index not found: {index_id}"
|
|
118
|
+
raise ValueError(msg)
|
|
119
|
+
|
|
120
|
+
# Create snippets for supported file types
|
|
121
|
+
await self._create_snippets(index_id)
|
|
122
|
+
|
|
123
|
+
# Update index timestamp
|
|
124
|
+
await self.repository.update_index_timestamp(index)
|
|
125
|
+
|
|
126
|
+
async def _create_snippets(
|
|
127
|
+
self,
|
|
128
|
+
index_id: int,
|
|
129
|
+
) -> None:
|
|
130
|
+
"""Create snippets for supported files.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
index: The index to create snippets for.
|
|
134
|
+
file_list: List of files to create snippets from.
|
|
135
|
+
existing_snippets_set: Set of file IDs that already have snippets.
|
|
136
|
+
|
|
137
|
+
"""
|
|
138
|
+
files = await self.repository.files_for_index(index_id)
|
|
139
|
+
for file in tqdm(files, total=len(files)):
|
|
140
|
+
# Skip unsupported file types
|
|
141
|
+
if file.mime_type not in MIME_WHITELIST:
|
|
142
|
+
self.log.debug("Skipping mime type", mime_type=file.mime_type)
|
|
143
|
+
continue
|
|
144
|
+
|
|
145
|
+
# Create snippet from file content
|
|
146
|
+
async with aiofiles.open(file.cloned_path, "rb") as f:
|
|
147
|
+
content = await f.read()
|
|
148
|
+
snippet = Snippet(
|
|
149
|
+
index_id=index_id,
|
|
150
|
+
file_id=file.id,
|
|
151
|
+
content=content.decode("utf-8"),
|
|
152
|
+
)
|
|
153
|
+
await self.repository.add_snippet(snippet)
|
kodit/logging.py
CHANGED
|
@@ -93,6 +93,15 @@ def configure_logging(log_level: str, log_format: LogFormat) -> None:
|
|
|
93
93
|
logging.getLogger(_log).handlers.clear()
|
|
94
94
|
logging.getLogger(_log).propagate = True
|
|
95
95
|
|
|
96
|
+
# Configure SQLAlchemy loggers to use our structlog setup
|
|
97
|
+
for _log in ["sqlalchemy.engine", "alembic"]:
|
|
98
|
+
engine_logger = logging.getLogger(_log)
|
|
99
|
+
engine_logger.setLevel(logging.WARNING) # Hide INFO logs by default
|
|
100
|
+
if log_level.upper() == "DEBUG":
|
|
101
|
+
engine_logger.setLevel(
|
|
102
|
+
logging.DEBUG
|
|
103
|
+
) # Only show all logs when in DEBUG mode
|
|
104
|
+
|
|
96
105
|
def handle_exception(
|
|
97
106
|
exc_type: type[BaseException],
|
|
98
107
|
exc_value: BaseException,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Retrieval package for code search and retrieval functionality."""
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Repository for retrieving code snippets and search results.
|
|
2
|
+
|
|
3
|
+
This module provides the RetrievalRepository class which handles all database operations
|
|
4
|
+
related to searching and retrieving code snippets, including string-based searches
|
|
5
|
+
and their associated file information.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import TypeVar
|
|
9
|
+
|
|
10
|
+
import pydantic
|
|
11
|
+
from sqlalchemy import select
|
|
12
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
13
|
+
|
|
14
|
+
from kodit.indexing.models import Snippet
|
|
15
|
+
from kodit.sources.models import File
|
|
16
|
+
|
|
17
|
+
T = TypeVar("T")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class RetrievalResult(pydantic.BaseModel):
|
|
21
|
+
"""Data transfer object for search results.
|
|
22
|
+
|
|
23
|
+
This model represents a single search result, containing both the file path
|
|
24
|
+
and the matching snippet content.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
uri: str
|
|
28
|
+
content: str
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class RetrievalRepository:
|
|
32
|
+
"""Repository for retrieving code snippets and search results.
|
|
33
|
+
|
|
34
|
+
This class provides methods for searching and retrieving code snippets from
|
|
35
|
+
the database, including string-based searches and their associated file information.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self, session: AsyncSession) -> None:
|
|
39
|
+
"""Initialize the retrieval repository.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
session: The SQLAlchemy async session to use for database operations.
|
|
43
|
+
|
|
44
|
+
"""
|
|
45
|
+
self.session = session
|
|
46
|
+
|
|
47
|
+
async def string_search(self, query: str) -> list[RetrievalResult]:
|
|
48
|
+
"""Search for snippets containing the given query string.
|
|
49
|
+
|
|
50
|
+
This method performs a case-insensitive search for the query string within
|
|
51
|
+
snippet contents, returning up to 10 most recent matches.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
query: The string to search for within snippet contents.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
A list of RetrievalResult objects containing the matching snippets
|
|
58
|
+
and their associated file paths.
|
|
59
|
+
|
|
60
|
+
"""
|
|
61
|
+
search_query = (
|
|
62
|
+
select(Snippet, File)
|
|
63
|
+
.join(File, Snippet.file_id == File.id)
|
|
64
|
+
.where(Snippet.content.ilike(f"%{query}%"))
|
|
65
|
+
.limit(10)
|
|
66
|
+
)
|
|
67
|
+
rows = await self.session.execute(search_query)
|
|
68
|
+
results = list(rows.all())
|
|
69
|
+
|
|
70
|
+
return [
|
|
71
|
+
RetrievalResult(
|
|
72
|
+
uri=file.uri,
|
|
73
|
+
content=snippet.content,
|
|
74
|
+
)
|
|
75
|
+
for snippet, file in results
|
|
76
|
+
]
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Retrieval service."""
|
|
2
|
+
|
|
3
|
+
import pydantic
|
|
4
|
+
|
|
5
|
+
from kodit.retreival.repository import RetrievalRepository, RetrievalResult
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class RetrievalRequest(pydantic.BaseModel):
|
|
9
|
+
"""Request for a retrieval."""
|
|
10
|
+
|
|
11
|
+
query: str
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Snippet(pydantic.BaseModel):
|
|
15
|
+
"""Snippet model."""
|
|
16
|
+
|
|
17
|
+
content: str
|
|
18
|
+
file_path: str
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class RetrievalService:
|
|
22
|
+
"""Service for retrieving relevant data."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, repository: RetrievalRepository) -> None:
|
|
25
|
+
"""Initialize the retrieval service."""
|
|
26
|
+
self.repository = repository
|
|
27
|
+
|
|
28
|
+
async def retrieve(self, request: RetrievalRequest) -> list[RetrievalResult]:
|
|
29
|
+
"""Retrieve relevant data."""
|
|
30
|
+
return await self.repository.string_search(request.query)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Sources package for managing code source repositories and local directories."""
|
kodit/sources/models.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Source models for managing code sources.
|
|
2
|
+
|
|
3
|
+
This module defines the SQLAlchemy models used for storing and managing code sources.
|
|
4
|
+
It includes models for tracking different types of sources (git repositories and local
|
|
5
|
+
folders) and their relationships.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from sqlalchemy import ForeignKey, Integer, String
|
|
9
|
+
from sqlalchemy.orm import Mapped, mapped_column
|
|
10
|
+
|
|
11
|
+
from kodit.database import Base, CommonMixin
|
|
12
|
+
|
|
13
|
+
# Enable proper type hints for SQLAlchemy models
|
|
14
|
+
__all__ = ["File", "Source"]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Source(Base, CommonMixin):
|
|
18
|
+
"""Base model for tracking code sources.
|
|
19
|
+
|
|
20
|
+
This model serves as the parent table for different types of sources.
|
|
21
|
+
It provides common fields and relationships for all source types.
|
|
22
|
+
|
|
23
|
+
Attributes:
|
|
24
|
+
id: The unique identifier for the source.
|
|
25
|
+
created_at: Timestamp when the source was created.
|
|
26
|
+
updated_at: Timestamp when the source was last updated.
|
|
27
|
+
cloned_uri: A URI to a copy of the source on the local filesystem.
|
|
28
|
+
uri: The URI of the source.
|
|
29
|
+
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
__tablename__ = "sources"
|
|
33
|
+
uri: Mapped[str] = mapped_column(String(1024), index=True, unique=True)
|
|
34
|
+
cloned_path: Mapped[str] = mapped_column(String(1024))
|
|
35
|
+
|
|
36
|
+
def __init__(self, uri: str, cloned_path: str) -> None:
|
|
37
|
+
"""Initialize a new Source instance for typing purposes."""
|
|
38
|
+
super().__init__()
|
|
39
|
+
self.uri = uri
|
|
40
|
+
self.cloned_path = cloned_path
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class File(Base, CommonMixin):
|
|
44
|
+
"""File model."""
|
|
45
|
+
|
|
46
|
+
__tablename__ = "files"
|
|
47
|
+
|
|
48
|
+
source_id: Mapped[int] = mapped_column(ForeignKey("sources.id"))
|
|
49
|
+
mime_type: Mapped[str] = mapped_column(String(255), default="")
|
|
50
|
+
uri: Mapped[str] = mapped_column(String(1024), default="")
|
|
51
|
+
cloned_path: Mapped[str] = mapped_column(String(1024))
|
|
52
|
+
sha256: Mapped[str] = mapped_column(String(64), default="", index=True)
|
|
53
|
+
size_bytes: Mapped[int] = mapped_column(Integer, default=0)
|
|
54
|
+
|
|
55
|
+
def __init__( # noqa: PLR0913
|
|
56
|
+
self,
|
|
57
|
+
source_id: int,
|
|
58
|
+
cloned_path: str,
|
|
59
|
+
mime_type: str = "",
|
|
60
|
+
uri: str = "",
|
|
61
|
+
sha256: str = "",
|
|
62
|
+
size_bytes: int = 0,
|
|
63
|
+
) -> None:
|
|
64
|
+
"""Initialize a new File instance for typing purposes."""
|
|
65
|
+
super().__init__()
|
|
66
|
+
self.source_id = source_id
|
|
67
|
+
self.cloned_path = cloned_path
|
|
68
|
+
self.mime_type = mime_type
|
|
69
|
+
self.uri = uri
|
|
70
|
+
self.sha256 = sha256
|
|
71
|
+
self.size_bytes = size_bytes
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""Source repository for database operations.
|
|
2
|
+
|
|
3
|
+
This module provides the SourceRepository class which handles all database operations
|
|
4
|
+
related to code sources. It manages the creation and retrieval of source records
|
|
5
|
+
from the database, abstracting away the SQLAlchemy implementation details.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from sqlalchemy import func, select
|
|
9
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
10
|
+
|
|
11
|
+
from kodit.sources.models import File, Source
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SourceRepository:
|
|
15
|
+
"""Repository for managing source database operations.
|
|
16
|
+
|
|
17
|
+
This class provides methods for creating and retrieving source records from the
|
|
18
|
+
database. It handles the low-level database operations and transaction management.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
session: The SQLAlchemy async session to use for database operations.
|
|
22
|
+
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, session: AsyncSession) -> None:
|
|
26
|
+
"""Initialize the source repository."""
|
|
27
|
+
self.session = session
|
|
28
|
+
|
|
29
|
+
async def create_source(self, source: Source) -> Source:
|
|
30
|
+
"""Create a new folder source record in the database.
|
|
31
|
+
|
|
32
|
+
This method creates both a Source record and a linked FolderSource record
|
|
33
|
+
in a single transaction.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
path: The absolute path of the folder to create a source for.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
The created Source model instance.
|
|
40
|
+
|
|
41
|
+
Note:
|
|
42
|
+
This method commits the transaction to ensure the source.id is available
|
|
43
|
+
for creating the linked FolderSource record.
|
|
44
|
+
|
|
45
|
+
"""
|
|
46
|
+
self.session.add(source)
|
|
47
|
+
await self.session.commit()
|
|
48
|
+
return source
|
|
49
|
+
|
|
50
|
+
async def create_file(self, file: File) -> File:
|
|
51
|
+
"""Create a new file record in the database.
|
|
52
|
+
|
|
53
|
+
This method creates a new File record and adds it to the session.
|
|
54
|
+
|
|
55
|
+
"""
|
|
56
|
+
self.session.add(file)
|
|
57
|
+
await self.session.commit()
|
|
58
|
+
return file
|
|
59
|
+
|
|
60
|
+
async def num_files_for_source(self, source_id: int) -> int:
|
|
61
|
+
"""Get the number of files for a source.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
source_id: The ID of the source to get the number of files for.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
The number of files for the source.
|
|
68
|
+
|
|
69
|
+
"""
|
|
70
|
+
query = (
|
|
71
|
+
select(func.count()).select_from(File).where(File.source_id == source_id)
|
|
72
|
+
)
|
|
73
|
+
result = await self.session.execute(query)
|
|
74
|
+
return result.scalar_one()
|
|
75
|
+
|
|
76
|
+
async def list_sources(self) -> list[Source]:
|
|
77
|
+
"""Retrieve all sources from the database.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
A list of Source instances.
|
|
81
|
+
|
|
82
|
+
"""
|
|
83
|
+
query = select(Source).limit(10)
|
|
84
|
+
result = await self.session.execute(query)
|
|
85
|
+
return list(result.scalars())
|
|
86
|
+
|
|
87
|
+
async def get_source_by_uri(self, uri: str) -> Source | None:
|
|
88
|
+
"""Get a source by its URI.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
uri: The URI of the source to get.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
The source with the given URI, or None if it does not exist.
|
|
95
|
+
|
|
96
|
+
"""
|
|
97
|
+
query = select(Source).where(Source.uri == uri)
|
|
98
|
+
result = await self.session.execute(query)
|
|
99
|
+
return result.scalar_one_or_none()
|
|
100
|
+
|
|
101
|
+
async def get_source_by_id(self, source_id: int) -> Source | None:
|
|
102
|
+
"""Get a source by its ID.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
source_id: The ID of the source to get.
|
|
106
|
+
|
|
107
|
+
"""
|
|
108
|
+
query = select(Source).where(Source.id == source_id)
|
|
109
|
+
result = await self.session.execute(query)
|
|
110
|
+
return result.scalar_one_or_none()
|
kodit/sources/service.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
"""Source service for managing code sources.
|
|
2
|
+
|
|
3
|
+
This module provides the SourceService class which handles the business logic for
|
|
4
|
+
creating and listing code sources. It orchestrates the interaction between the file
|
|
5
|
+
system, database operations (via SourceRepository), and provides a clean API for
|
|
6
|
+
source management.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import mimetypes
|
|
10
|
+
import shutil
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from hashlib import sha256
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
import aiofiles
|
|
16
|
+
import pydantic
|
|
17
|
+
import structlog
|
|
18
|
+
from tqdm import tqdm
|
|
19
|
+
from uritools import isuri, urisplit
|
|
20
|
+
|
|
21
|
+
from kodit.sources.models import File, Source
|
|
22
|
+
from kodit.sources.repository import SourceRepository
|
|
23
|
+
|
|
24
|
+
CLONE_DIR = Path(".kodit/clones").expanduser().resolve()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class SourceView(pydantic.BaseModel):
|
|
28
|
+
"""View model for displaying source information.
|
|
29
|
+
|
|
30
|
+
This model provides a clean interface for displaying source information,
|
|
31
|
+
containing only the essential fields needed for presentation.
|
|
32
|
+
|
|
33
|
+
Attributes:
|
|
34
|
+
id: The unique identifier for the source.
|
|
35
|
+
uri: The URI or path of the source.
|
|
36
|
+
created_at: Timestamp when the source was created.
|
|
37
|
+
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
id: int
|
|
41
|
+
uri: str
|
|
42
|
+
cloned_path: Path
|
|
43
|
+
created_at: datetime
|
|
44
|
+
num_files: int
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class SourceService:
|
|
48
|
+
"""Service for managing code sources.
|
|
49
|
+
|
|
50
|
+
This service handles the business logic for creating and listing code sources.
|
|
51
|
+
It coordinates between file system operations, database operations (via
|
|
52
|
+
SourceRepository), and provides a clean API for source management.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(self, repository: SourceRepository) -> None:
|
|
56
|
+
"""Initialize the source service.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
repository: The repository instance to use for database operations.
|
|
60
|
+
|
|
61
|
+
"""
|
|
62
|
+
self.repository = repository
|
|
63
|
+
self.log = structlog.get_logger(__name__)
|
|
64
|
+
|
|
65
|
+
async def get(self, source_id: int) -> SourceView:
|
|
66
|
+
"""Get a source by ID.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
source_id: The ID of the source to get.
|
|
70
|
+
|
|
71
|
+
"""
|
|
72
|
+
source = await self.repository.get_source_by_id(source_id)
|
|
73
|
+
if not source:
|
|
74
|
+
msg = f"Source not found: {source_id}"
|
|
75
|
+
raise ValueError(msg)
|
|
76
|
+
return SourceView(
|
|
77
|
+
id=source.id,
|
|
78
|
+
uri=source.uri,
|
|
79
|
+
cloned_path=Path(source.cloned_path),
|
|
80
|
+
created_at=source.created_at,
|
|
81
|
+
num_files=await self.repository.num_files_for_source(source.id),
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
async def create(self, uri_or_path_like: str) -> SourceView:
|
|
85
|
+
"""Create a new source from a URI.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
uri: The URI of the source to create. Can be a git-like URI or a local
|
|
89
|
+
directory.
|
|
90
|
+
|
|
91
|
+
Raises:
|
|
92
|
+
ValueError: If the source type is not supported or if the folder doesn't
|
|
93
|
+
exist.
|
|
94
|
+
|
|
95
|
+
"""
|
|
96
|
+
if Path(uri_or_path_like).is_dir():
|
|
97
|
+
return await self._create_folder_source(Path(uri_or_path_like))
|
|
98
|
+
if isuri(uri_or_path_like):
|
|
99
|
+
parsed = urisplit(uri_or_path_like)
|
|
100
|
+
if parsed.scheme == "file":
|
|
101
|
+
return await self._create_folder_source(Path(parsed.path))
|
|
102
|
+
msg = f"Unsupported source type: {uri_or_path_like}"
|
|
103
|
+
raise ValueError(msg)
|
|
104
|
+
msg = f"Unsupported source type: {uri_or_path_like}"
|
|
105
|
+
raise ValueError(msg)
|
|
106
|
+
|
|
107
|
+
async def _create_folder_source(self, directory: Path) -> SourceView:
|
|
108
|
+
"""Create a folder source.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
directory: The path to the local directory.
|
|
112
|
+
|
|
113
|
+
Raises:
|
|
114
|
+
ValueError: If the folder doesn't exist or is already added.
|
|
115
|
+
|
|
116
|
+
"""
|
|
117
|
+
# Resolve the directory to an absolute path
|
|
118
|
+
directory = directory.expanduser().resolve()
|
|
119
|
+
|
|
120
|
+
# Check if the folder exists
|
|
121
|
+
if not directory.exists():
|
|
122
|
+
msg = f"Folder does not exist: {directory}"
|
|
123
|
+
raise ValueError(msg)
|
|
124
|
+
|
|
125
|
+
# Check if the folder is already added
|
|
126
|
+
if await self.repository.get_source_by_uri(directory.as_uri()):
|
|
127
|
+
msg = f"Directory already added: {directory}"
|
|
128
|
+
raise ValueError(msg)
|
|
129
|
+
|
|
130
|
+
# Clone into a local directory
|
|
131
|
+
clone_path = CLONE_DIR / directory.as_posix().replace("/", "_")
|
|
132
|
+
clone_path.mkdir(parents=True, exist_ok=True)
|
|
133
|
+
|
|
134
|
+
# Copy all files recursively, preserving directory structure, ignoring hidden
|
|
135
|
+
# files
|
|
136
|
+
shutil.copytree(
|
|
137
|
+
directory,
|
|
138
|
+
clone_path,
|
|
139
|
+
ignore=shutil.ignore_patterns(".*"),
|
|
140
|
+
dirs_exist_ok=True,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
source = await self.repository.create_source(
|
|
144
|
+
Source(uri=directory.as_uri(), cloned_path=str(clone_path)),
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# Add all files to the source
|
|
148
|
+
# Count total files for progress bar
|
|
149
|
+
file_count = sum(1 for _ in clone_path.rglob("*") if _.is_file())
|
|
150
|
+
|
|
151
|
+
# Process each file in the source directory
|
|
152
|
+
for path in tqdm(clone_path.rglob("*"), total=file_count):
|
|
153
|
+
await self._process_file(source.id, path.absolute())
|
|
154
|
+
|
|
155
|
+
return SourceView(
|
|
156
|
+
id=source.id,
|
|
157
|
+
uri=source.uri,
|
|
158
|
+
cloned_path=Path(source.cloned_path),
|
|
159
|
+
created_at=source.created_at,
|
|
160
|
+
num_files=await self.repository.num_files_for_source(source.id),
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
async def _process_file(
|
|
164
|
+
self,
|
|
165
|
+
source_id: int,
|
|
166
|
+
cloned_path: Path,
|
|
167
|
+
) -> None:
|
|
168
|
+
"""Process a single file for indexing."""
|
|
169
|
+
if not cloned_path.is_file():
|
|
170
|
+
return
|
|
171
|
+
|
|
172
|
+
async with aiofiles.open(cloned_path, "rb") as f:
|
|
173
|
+
content = await f.read()
|
|
174
|
+
mime_type = mimetypes.guess_type(cloned_path)
|
|
175
|
+
sha = sha256(content).hexdigest()
|
|
176
|
+
|
|
177
|
+
# Create file record
|
|
178
|
+
file = File(
|
|
179
|
+
source_id=source_id,
|
|
180
|
+
cloned_path=cloned_path.as_posix(),
|
|
181
|
+
mime_type=mime_type[0]
|
|
182
|
+
if mime_type and mime_type[0]
|
|
183
|
+
else "application/octet-stream",
|
|
184
|
+
uri=cloned_path.as_uri(),
|
|
185
|
+
sha256=sha,
|
|
186
|
+
size_bytes=len(content),
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
await self.repository.create_file(file)
|
|
190
|
+
|
|
191
|
+
async def list_sources(self) -> list[SourceView]:
|
|
192
|
+
"""List all available sources.
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
A list of SourceView objects containing information about each source.
|
|
196
|
+
|
|
197
|
+
"""
|
|
198
|
+
sources = await self.repository.list_sources()
|
|
199
|
+
return [
|
|
200
|
+
SourceView(
|
|
201
|
+
id=source.id,
|
|
202
|
+
uri=source.uri,
|
|
203
|
+
cloned_path=Path(source.cloned_path),
|
|
204
|
+
created_at=source.created_at,
|
|
205
|
+
num_files=await self.repository.num_files_for_source(source.id),
|
|
206
|
+
)
|
|
207
|
+
for source in sources
|
|
208
|
+
]
|