kodit 0.0.1__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

@@ -0,0 +1,153 @@
1
+ """Index service for managing code indexes.
2
+
3
+ This module provides the IndexService class which handles the business logic for
4
+ creating, listing, and running code indexes. It orchestrates the interaction between the
5
+ file system, database operations (via IndexRepository), and provides a clean API for
6
+ index management.
7
+ """
8
+
9
+ from datetime import datetime
10
+
11
+ import aiofiles
12
+ import pydantic
13
+ import structlog
14
+ from tqdm.asyncio import tqdm
15
+
16
+ from kodit.indexing.models import Snippet
17
+ from kodit.indexing.repository import IndexRepository
18
+ from kodit.sources.service import SourceService
19
+
20
+ # List of MIME types that are supported for indexing and snippet creation
21
+ MIME_WHITELIST = [
22
+ "text/plain",
23
+ "text/markdown",
24
+ "text/x-python",
25
+ "text/x-shellscript",
26
+ "text/x-sql",
27
+ ]
28
+
29
+
30
+ class IndexView(pydantic.BaseModel):
31
+ """Data transfer object for index information.
32
+
33
+ This model represents the public interface for index data, providing a clean
34
+ view of index information without exposing internal implementation details.
35
+ """
36
+
37
+ id: int
38
+ created_at: datetime
39
+ updated_at: datetime | None = None
40
+ source_uri: str | None = None
41
+ num_snippets: int | None = None
42
+
43
+
44
+ class IndexService:
45
+ """Service for managing code indexes.
46
+
47
+ This service handles the business logic for creating, listing, and running code
48
+ indexes. It coordinates between file system operations, database operations (via
49
+ IndexRepository), and provides a clean API for index management.
50
+ """
51
+
52
+ def __init__(
53
+ self, repository: IndexRepository, source_service: SourceService
54
+ ) -> None:
55
+ """Initialize the index service.
56
+
57
+ Args:
58
+ repository: The repository instance to use for database operations.
59
+ source_service: The source service instance to use for source validation.
60
+
61
+ """
62
+ self.repository = repository
63
+ self.source_service = source_service
64
+ self.log = structlog.get_logger(__name__)
65
+
66
+ async def create(self, source_id: int) -> IndexView:
67
+ """Create a new index for a source.
68
+
69
+ This method creates a new index for the specified source, after validating
70
+ that the source exists and doesn't already have an index.
71
+
72
+ Args:
73
+ source_id: The ID of the source to create an index for.
74
+
75
+ Returns:
76
+ An Index object representing the newly created index.
77
+
78
+ Raises:
79
+ ValueError: If the source doesn't exist or already has an index.
80
+
81
+ """
82
+ # Check if the source exists
83
+ source = await self.source_service.get(source_id)
84
+
85
+ index = await self.repository.create(source.id)
86
+ return IndexView(
87
+ id=index.id,
88
+ created_at=index.created_at,
89
+ )
90
+
91
+ async def list_indexes(self) -> list[IndexView]:
92
+ """List all available indexes with their details.
93
+
94
+ Returns:
95
+ A list of Index objects containing information about each index,
96
+ including file and snippet counts.
97
+
98
+ """
99
+ indexes = await self.repository.list_indexes()
100
+
101
+ # Transform database results into DTOs
102
+ return [
103
+ IndexView(
104
+ id=index.id,
105
+ created_at=index.created_at,
106
+ updated_at=index.updated_at,
107
+ num_snippets=await self.repository.num_snippets_for_index(index.id),
108
+ )
109
+ for index in indexes
110
+ ]
111
+
112
+ async def run(self, index_id: int) -> None:
113
+ """Run the indexing process for a specific index."""
114
+ # Get and validate index
115
+ index = await self.repository.get_by_id(index_id)
116
+ if not index:
117
+ msg = f"Index not found: {index_id}"
118
+ raise ValueError(msg)
119
+
120
+ # Create snippets for supported file types
121
+ await self._create_snippets(index_id)
122
+
123
+ # Update index timestamp
124
+ await self.repository.update_index_timestamp(index)
125
+
126
+ async def _create_snippets(
127
+ self,
128
+ index_id: int,
129
+ ) -> None:
130
+ """Create snippets for supported files.
131
+
132
+ Args:
133
+ index: The index to create snippets for.
134
+ file_list: List of files to create snippets from.
135
+ existing_snippets_set: Set of file IDs that already have snippets.
136
+
137
+ """
138
+ files = await self.repository.files_for_index(index_id)
139
+ for file in tqdm(files, total=len(files)):
140
+ # Skip unsupported file types
141
+ if file.mime_type not in MIME_WHITELIST:
142
+ self.log.debug("Skipping mime type", mime_type=file.mime_type)
143
+ continue
144
+
145
+ # Create snippet from file content
146
+ async with aiofiles.open(file.cloned_path, "rb") as f:
147
+ content = await f.read()
148
+ snippet = Snippet(
149
+ index_id=index_id,
150
+ file_id=file.id,
151
+ content=content.decode("utf-8"),
152
+ )
153
+ await self.repository.add_snippet(snippet)
kodit/logging.py CHANGED
@@ -93,6 +93,15 @@ def configure_logging(log_level: str, log_format: LogFormat) -> None:
93
93
  logging.getLogger(_log).handlers.clear()
94
94
  logging.getLogger(_log).propagate = True
95
95
 
96
+ # Configure SQLAlchemy loggers to use our structlog setup
97
+ for _log in ["sqlalchemy.engine", "alembic"]:
98
+ engine_logger = logging.getLogger(_log)
99
+ engine_logger.setLevel(logging.WARNING) # Hide INFO logs by default
100
+ if log_level.upper() == "DEBUG":
101
+ engine_logger.setLevel(
102
+ logging.DEBUG
103
+ ) # Only show all logs when in DEBUG mode
104
+
96
105
  def handle_exception(
97
106
  exc_type: type[BaseException],
98
107
  exc_value: BaseException,
@@ -0,0 +1 @@
1
+ """Retrieval package for code search and retrieval functionality."""
@@ -0,0 +1,76 @@
1
+ """Repository for retrieving code snippets and search results.
2
+
3
+ This module provides the RetrievalRepository class which handles all database operations
4
+ related to searching and retrieving code snippets, including string-based searches
5
+ and their associated file information.
6
+ """
7
+
8
+ from typing import TypeVar
9
+
10
+ import pydantic
11
+ from sqlalchemy import select
12
+ from sqlalchemy.ext.asyncio import AsyncSession
13
+
14
+ from kodit.indexing.models import Snippet
15
+ from kodit.sources.models import File
16
+
17
+ T = TypeVar("T")
18
+
19
+
20
+ class RetrievalResult(pydantic.BaseModel):
21
+ """Data transfer object for search results.
22
+
23
+ This model represents a single search result, containing both the file path
24
+ and the matching snippet content.
25
+ """
26
+
27
+ uri: str
28
+ content: str
29
+
30
+
31
+ class RetrievalRepository:
32
+ """Repository for retrieving code snippets and search results.
33
+
34
+ This class provides methods for searching and retrieving code snippets from
35
+ the database, including string-based searches and their associated file information.
36
+ """
37
+
38
+ def __init__(self, session: AsyncSession) -> None:
39
+ """Initialize the retrieval repository.
40
+
41
+ Args:
42
+ session: The SQLAlchemy async session to use for database operations.
43
+
44
+ """
45
+ self.session = session
46
+
47
+ async def string_search(self, query: str) -> list[RetrievalResult]:
48
+ """Search for snippets containing the given query string.
49
+
50
+ This method performs a case-insensitive search for the query string within
51
+ snippet contents, returning up to 10 most recent matches.
52
+
53
+ Args:
54
+ query: The string to search for within snippet contents.
55
+
56
+ Returns:
57
+ A list of RetrievalResult objects containing the matching snippets
58
+ and their associated file paths.
59
+
60
+ """
61
+ search_query = (
62
+ select(Snippet, File)
63
+ .join(File, Snippet.file_id == File.id)
64
+ .where(Snippet.content.ilike(f"%{query}%"))
65
+ .limit(10)
66
+ )
67
+ rows = await self.session.execute(search_query)
68
+ results = list(rows.all())
69
+
70
+ return [
71
+ RetrievalResult(
72
+ uri=file.uri,
73
+ content=snippet.content,
74
+ )
75
+ for snippet, file in results
76
+ ]
@@ -0,0 +1,30 @@
1
+ """Retrieval service."""
2
+
3
+ import pydantic
4
+
5
+ from kodit.retreival.repository import RetrievalRepository, RetrievalResult
6
+
7
+
8
+ class RetrievalRequest(pydantic.BaseModel):
9
+ """Request for a retrieval."""
10
+
11
+ query: str
12
+
13
+
14
+ class Snippet(pydantic.BaseModel):
15
+ """Snippet model."""
16
+
17
+ content: str
18
+ file_path: str
19
+
20
+
21
+ class RetrievalService:
22
+ """Service for retrieving relevant data."""
23
+
24
+ def __init__(self, repository: RetrievalRepository) -> None:
25
+ """Initialize the retrieval service."""
26
+ self.repository = repository
27
+
28
+ async def retrieve(self, request: RetrievalRequest) -> list[RetrievalResult]:
29
+ """Retrieve relevant data."""
30
+ return await self.repository.string_search(request.query)
@@ -0,0 +1 @@
1
+ """Sources package for managing code source repositories and local directories."""
@@ -0,0 +1,71 @@
1
+ """Source models for managing code sources.
2
+
3
+ This module defines the SQLAlchemy models used for storing and managing code sources.
4
+ It includes models for tracking different types of sources (git repositories and local
5
+ folders) and their relationships.
6
+ """
7
+
8
+ from sqlalchemy import ForeignKey, Integer, String
9
+ from sqlalchemy.orm import Mapped, mapped_column
10
+
11
+ from kodit.database import Base, CommonMixin
12
+
13
+ # Enable proper type hints for SQLAlchemy models
14
+ __all__ = ["File", "Source"]
15
+
16
+
17
+ class Source(Base, CommonMixin):
18
+ """Base model for tracking code sources.
19
+
20
+ This model serves as the parent table for different types of sources.
21
+ It provides common fields and relationships for all source types.
22
+
23
+ Attributes:
24
+ id: The unique identifier for the source.
25
+ created_at: Timestamp when the source was created.
26
+ updated_at: Timestamp when the source was last updated.
27
+ cloned_uri: A URI to a copy of the source on the local filesystem.
28
+ uri: The URI of the source.
29
+
30
+ """
31
+
32
+ __tablename__ = "sources"
33
+ uri: Mapped[str] = mapped_column(String(1024), index=True, unique=True)
34
+ cloned_path: Mapped[str] = mapped_column(String(1024))
35
+
36
+ def __init__(self, uri: str, cloned_path: str) -> None:
37
+ """Initialize a new Source instance for typing purposes."""
38
+ super().__init__()
39
+ self.uri = uri
40
+ self.cloned_path = cloned_path
41
+
42
+
43
+ class File(Base, CommonMixin):
44
+ """File model."""
45
+
46
+ __tablename__ = "files"
47
+
48
+ source_id: Mapped[int] = mapped_column(ForeignKey("sources.id"))
49
+ mime_type: Mapped[str] = mapped_column(String(255), default="")
50
+ uri: Mapped[str] = mapped_column(String(1024), default="")
51
+ cloned_path: Mapped[str] = mapped_column(String(1024))
52
+ sha256: Mapped[str] = mapped_column(String(64), default="", index=True)
53
+ size_bytes: Mapped[int] = mapped_column(Integer, default=0)
54
+
55
+ def __init__( # noqa: PLR0913
56
+ self,
57
+ source_id: int,
58
+ cloned_path: str,
59
+ mime_type: str = "",
60
+ uri: str = "",
61
+ sha256: str = "",
62
+ size_bytes: int = 0,
63
+ ) -> None:
64
+ """Initialize a new File instance for typing purposes."""
65
+ super().__init__()
66
+ self.source_id = source_id
67
+ self.cloned_path = cloned_path
68
+ self.mime_type = mime_type
69
+ self.uri = uri
70
+ self.sha256 = sha256
71
+ self.size_bytes = size_bytes
@@ -0,0 +1,110 @@
1
+ """Source repository for database operations.
2
+
3
+ This module provides the SourceRepository class which handles all database operations
4
+ related to code sources. It manages the creation and retrieval of source records
5
+ from the database, abstracting away the SQLAlchemy implementation details.
6
+ """
7
+
8
+ from sqlalchemy import func, select
9
+ from sqlalchemy.ext.asyncio import AsyncSession
10
+
11
+ from kodit.sources.models import File, Source
12
+
13
+
14
+ class SourceRepository:
15
+ """Repository for managing source database operations.
16
+
17
+ This class provides methods for creating and retrieving source records from the
18
+ database. It handles the low-level database operations and transaction management.
19
+
20
+ Args:
21
+ session: The SQLAlchemy async session to use for database operations.
22
+
23
+ """
24
+
25
+ def __init__(self, session: AsyncSession) -> None:
26
+ """Initialize the source repository."""
27
+ self.session = session
28
+
29
+ async def create_source(self, source: Source) -> Source:
30
+ """Create a new folder source record in the database.
31
+
32
+ This method creates both a Source record and a linked FolderSource record
33
+ in a single transaction.
34
+
35
+ Args:
36
+ path: The absolute path of the folder to create a source for.
37
+
38
+ Returns:
39
+ The created Source model instance.
40
+
41
+ Note:
42
+ This method commits the transaction to ensure the source.id is available
43
+ for creating the linked FolderSource record.
44
+
45
+ """
46
+ self.session.add(source)
47
+ await self.session.commit()
48
+ return source
49
+
50
+ async def create_file(self, file: File) -> File:
51
+ """Create a new file record in the database.
52
+
53
+ This method creates a new File record and adds it to the session.
54
+
55
+ """
56
+ self.session.add(file)
57
+ await self.session.commit()
58
+ return file
59
+
60
+ async def num_files_for_source(self, source_id: int) -> int:
61
+ """Get the number of files for a source.
62
+
63
+ Args:
64
+ source_id: The ID of the source to get the number of files for.
65
+
66
+ Returns:
67
+ The number of files for the source.
68
+
69
+ """
70
+ query = (
71
+ select(func.count()).select_from(File).where(File.source_id == source_id)
72
+ )
73
+ result = await self.session.execute(query)
74
+ return result.scalar_one()
75
+
76
+ async def list_sources(self) -> list[Source]:
77
+ """Retrieve all sources from the database.
78
+
79
+ Returns:
80
+ A list of Source instances.
81
+
82
+ """
83
+ query = select(Source).limit(10)
84
+ result = await self.session.execute(query)
85
+ return list(result.scalars())
86
+
87
+ async def get_source_by_uri(self, uri: str) -> Source | None:
88
+ """Get a source by its URI.
89
+
90
+ Args:
91
+ uri: The URI of the source to get.
92
+
93
+ Returns:
94
+ The source with the given URI, or None if it does not exist.
95
+
96
+ """
97
+ query = select(Source).where(Source.uri == uri)
98
+ result = await self.session.execute(query)
99
+ return result.scalar_one_or_none()
100
+
101
+ async def get_source_by_id(self, source_id: int) -> Source | None:
102
+ """Get a source by its ID.
103
+
104
+ Args:
105
+ source_id: The ID of the source to get.
106
+
107
+ """
108
+ query = select(Source).where(Source.id == source_id)
109
+ result = await self.session.execute(query)
110
+ return result.scalar_one_or_none()
@@ -0,0 +1,208 @@
1
+ """Source service for managing code sources.
2
+
3
+ This module provides the SourceService class which handles the business logic for
4
+ creating and listing code sources. It orchestrates the interaction between the file
5
+ system, database operations (via SourceRepository), and provides a clean API for
6
+ source management.
7
+ """
8
+
9
+ import mimetypes
10
+ import shutil
11
+ from datetime import datetime
12
+ from hashlib import sha256
13
+ from pathlib import Path
14
+
15
+ import aiofiles
16
+ import pydantic
17
+ import structlog
18
+ from tqdm import tqdm
19
+ from uritools import isuri, urisplit
20
+
21
+ from kodit.sources.models import File, Source
22
+ from kodit.sources.repository import SourceRepository
23
+
24
+ CLONE_DIR = Path(".kodit/clones").expanduser().resolve()
25
+
26
+
27
+ class SourceView(pydantic.BaseModel):
28
+ """View model for displaying source information.
29
+
30
+ This model provides a clean interface for displaying source information,
31
+ containing only the essential fields needed for presentation.
32
+
33
+ Attributes:
34
+ id: The unique identifier for the source.
35
+ uri: The URI or path of the source.
36
+ created_at: Timestamp when the source was created.
37
+
38
+ """
39
+
40
+ id: int
41
+ uri: str
42
+ cloned_path: Path
43
+ created_at: datetime
44
+ num_files: int
45
+
46
+
47
+ class SourceService:
48
+ """Service for managing code sources.
49
+
50
+ This service handles the business logic for creating and listing code sources.
51
+ It coordinates between file system operations, database operations (via
52
+ SourceRepository), and provides a clean API for source management.
53
+ """
54
+
55
+ def __init__(self, repository: SourceRepository) -> None:
56
+ """Initialize the source service.
57
+
58
+ Args:
59
+ repository: The repository instance to use for database operations.
60
+
61
+ """
62
+ self.repository = repository
63
+ self.log = structlog.get_logger(__name__)
64
+
65
+ async def get(self, source_id: int) -> SourceView:
66
+ """Get a source by ID.
67
+
68
+ Args:
69
+ source_id: The ID of the source to get.
70
+
71
+ """
72
+ source = await self.repository.get_source_by_id(source_id)
73
+ if not source:
74
+ msg = f"Source not found: {source_id}"
75
+ raise ValueError(msg)
76
+ return SourceView(
77
+ id=source.id,
78
+ uri=source.uri,
79
+ cloned_path=Path(source.cloned_path),
80
+ created_at=source.created_at,
81
+ num_files=await self.repository.num_files_for_source(source.id),
82
+ )
83
+
84
+ async def create(self, uri_or_path_like: str) -> SourceView:
85
+ """Create a new source from a URI.
86
+
87
+ Args:
88
+ uri: The URI of the source to create. Can be a git-like URI or a local
89
+ directory.
90
+
91
+ Raises:
92
+ ValueError: If the source type is not supported or if the folder doesn't
93
+ exist.
94
+
95
+ """
96
+ if Path(uri_or_path_like).is_dir():
97
+ return await self._create_folder_source(Path(uri_or_path_like))
98
+ if isuri(uri_or_path_like):
99
+ parsed = urisplit(uri_or_path_like)
100
+ if parsed.scheme == "file":
101
+ return await self._create_folder_source(Path(parsed.path))
102
+ msg = f"Unsupported source type: {uri_or_path_like}"
103
+ raise ValueError(msg)
104
+ msg = f"Unsupported source type: {uri_or_path_like}"
105
+ raise ValueError(msg)
106
+
107
+ async def _create_folder_source(self, directory: Path) -> SourceView:
108
+ """Create a folder source.
109
+
110
+ Args:
111
+ directory: The path to the local directory.
112
+
113
+ Raises:
114
+ ValueError: If the folder doesn't exist or is already added.
115
+
116
+ """
117
+ # Resolve the directory to an absolute path
118
+ directory = directory.expanduser().resolve()
119
+
120
+ # Check if the folder exists
121
+ if not directory.exists():
122
+ msg = f"Folder does not exist: {directory}"
123
+ raise ValueError(msg)
124
+
125
+ # Check if the folder is already added
126
+ if await self.repository.get_source_by_uri(directory.as_uri()):
127
+ msg = f"Directory already added: {directory}"
128
+ raise ValueError(msg)
129
+
130
+ # Clone into a local directory
131
+ clone_path = CLONE_DIR / directory.as_posix().replace("/", "_")
132
+ clone_path.mkdir(parents=True, exist_ok=True)
133
+
134
+ # Copy all files recursively, preserving directory structure, ignoring hidden
135
+ # files
136
+ shutil.copytree(
137
+ directory,
138
+ clone_path,
139
+ ignore=shutil.ignore_patterns(".*"),
140
+ dirs_exist_ok=True,
141
+ )
142
+
143
+ source = await self.repository.create_source(
144
+ Source(uri=directory.as_uri(), cloned_path=str(clone_path)),
145
+ )
146
+
147
+ # Add all files to the source
148
+ # Count total files for progress bar
149
+ file_count = sum(1 for _ in clone_path.rglob("*") if _.is_file())
150
+
151
+ # Process each file in the source directory
152
+ for path in tqdm(clone_path.rglob("*"), total=file_count):
153
+ await self._process_file(source.id, path.absolute())
154
+
155
+ return SourceView(
156
+ id=source.id,
157
+ uri=source.uri,
158
+ cloned_path=Path(source.cloned_path),
159
+ created_at=source.created_at,
160
+ num_files=await self.repository.num_files_for_source(source.id),
161
+ )
162
+
163
+ async def _process_file(
164
+ self,
165
+ source_id: int,
166
+ cloned_path: Path,
167
+ ) -> None:
168
+ """Process a single file for indexing."""
169
+ if not cloned_path.is_file():
170
+ return
171
+
172
+ async with aiofiles.open(cloned_path, "rb") as f:
173
+ content = await f.read()
174
+ mime_type = mimetypes.guess_type(cloned_path)
175
+ sha = sha256(content).hexdigest()
176
+
177
+ # Create file record
178
+ file = File(
179
+ source_id=source_id,
180
+ cloned_path=cloned_path.as_posix(),
181
+ mime_type=mime_type[0]
182
+ if mime_type and mime_type[0]
183
+ else "application/octet-stream",
184
+ uri=cloned_path.as_uri(),
185
+ sha256=sha,
186
+ size_bytes=len(content),
187
+ )
188
+
189
+ await self.repository.create_file(file)
190
+
191
+ async def list_sources(self) -> list[SourceView]:
192
+ """List all available sources.
193
+
194
+ Returns:
195
+ A list of SourceView objects containing information about each source.
196
+
197
+ """
198
+ sources = await self.repository.list_sources()
199
+ return [
200
+ SourceView(
201
+ id=source.id,
202
+ uri=source.uri,
203
+ cloned_path=Path(source.cloned_path),
204
+ created_at=source.created_at,
205
+ num_files=await self.repository.num_files_for_source(source.id),
206
+ )
207
+ for source in sources
208
+ ]