kodit 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

kodit/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.1.6'
21
- __version_tuple__ = version_tuple = (0, 1, 6)
20
+ __version__ = version = '0.1.8'
21
+ __version_tuple__ = version_tuple = (0, 1, 8)
kodit/cli.py CHANGED
@@ -8,7 +8,7 @@ from typing import Any
8
8
  import click
9
9
  import structlog
10
10
  import uvicorn
11
- from pytable_formatter import Table
11
+ from pytable_formatter import Cell, Table
12
12
  from sqlalchemy.ext.asyncio import AsyncSession
13
13
 
14
14
  from kodit.config import (
@@ -84,110 +84,53 @@ def cli( # noqa: PLR0913
84
84
  ctx.obj = config
85
85
 
86
86
 
87
- @cli.group()
88
- def sources() -> None:
89
- """Manage code sources."""
90
-
91
-
92
- @sources.command(name="list")
93
- @with_app_context
94
- @with_session
95
- async def list_sources(session: AsyncSession, app_context: AppContext) -> None:
96
- """List all code sources."""
97
- repository = SourceRepository(session)
98
- service = SourceService(app_context.get_clone_dir(), repository)
99
- sources = await service.list_sources()
100
-
101
- # Define headers and data
102
- headers = ["ID", "Created At", "URI"]
103
- data = [[source.id, source.created_at, source.uri] for source in sources]
104
-
105
- # Create and display the table
106
- table = Table(headers=headers, data=data)
107
- click.echo(table)
108
-
109
-
110
- @sources.command(name="create")
111
- @click.argument("uri")
112
- @with_app_context
113
- @with_session
114
- async def create_source(
115
- session: AsyncSession, app_context: AppContext, uri: str
116
- ) -> None:
117
- """Add a new code source."""
118
- repository = SourceRepository(session)
119
- service = SourceService(app_context.get_clone_dir(), repository)
120
- source = await service.create(uri)
121
- click.echo(f"Source created: {source.id}")
122
-
123
-
124
- @cli.group()
125
- def indexes() -> None:
126
- """Manage indexes."""
127
-
128
-
129
- @indexes.command(name="create")
130
- @click.argument("source_id")
87
+ @cli.command()
88
+ @click.argument("sources", nargs=-1)
131
89
  @with_app_context
132
90
  @with_session
133
- async def create_index(
134
- session: AsyncSession, app_context: AppContext, source_id: int
91
+ async def index(
92
+ session: AsyncSession,
93
+ app_context: AppContext,
94
+ sources: list[str],
135
95
  ) -> None:
136
- """Create an index for a source."""
96
+ """List indexes, or index data sources."""
137
97
  source_repository = SourceRepository(session)
138
98
  source_service = SourceService(app_context.get_clone_dir(), source_repository)
139
99
  repository = IndexRepository(session)
140
100
  service = IndexService(repository, source_service, app_context.get_data_dir())
141
- index = await service.create(source_id)
142
- click.echo(f"Index created: {index.id}")
143
101
 
144
-
145
- @indexes.command(name="list")
146
- @with_app_context
147
- @with_session
148
- async def list_indexes(session: AsyncSession, app_context: AppContext) -> None:
149
- """List all indexes."""
150
- source_repository = SourceRepository(session)
151
- source_service = SourceService(app_context.get_clone_dir(), source_repository)
152
- repository = IndexRepository(session)
153
- service = IndexService(repository, source_service, app_context.get_data_dir())
154
- indexes = await service.list_indexes()
155
-
156
- # Define headers and data
157
- headers = [
158
- "ID",
159
- "Created At",
160
- "Updated At",
161
- "Num Snippets",
162
- ]
163
- data = [
164
- [
165
- index.id,
166
- index.created_at,
167
- index.updated_at,
168
- index.num_snippets,
102
+ if not sources:
103
+ # No source specified, list all indexes
104
+ indexes = await service.list_indexes()
105
+ headers: list[str | Cell] = [
106
+ "ID",
107
+ "Created At",
108
+ "Updated At",
109
+ "Source",
110
+ "Num Snippets",
169
111
  ]
170
- for index in indexes
171
- ]
172
-
173
- # Create and display the table
174
- table = Table(headers=headers, data=data)
175
- click.echo(table)
176
-
177
-
178
- @indexes.command(name="run")
179
- @click.argument("index_id")
180
- @with_app_context
181
- @with_session
182
- async def run_index(
183
- session: AsyncSession, app_context: AppContext, index_id: int
184
- ) -> None:
185
- """Run an index."""
186
- source_repository = SourceRepository(session)
187
- source_service = SourceService(app_context.get_clone_dir(), source_repository)
188
- repository = IndexRepository(session)
189
- service = IndexService(repository, source_service, app_context.get_data_dir())
190
- await service.run(index_id)
112
+ data = [
113
+ [
114
+ index.id,
115
+ index.created_at,
116
+ index.updated_at,
117
+ index.source,
118
+ index.num_snippets,
119
+ ]
120
+ for index in indexes
121
+ ]
122
+ click.echo(Table(headers=headers, data=data))
123
+ return
124
+ # Handle source indexing
125
+ for source in sources:
126
+ if Path(source).is_file():
127
+ msg = "File indexing is not implemented yet"
128
+ raise click.UsageError(msg)
129
+
130
+ # Index directory
131
+ s = await source_service.create(source)
132
+ index = await service.create(s.id)
133
+ await service.run(index.id)
191
134
 
192
135
 
193
136
  @cli.command()
@@ -8,7 +8,7 @@ and retrieving index information with their associated metadata.
8
8
  from datetime import UTC, datetime
9
9
  from typing import TypeVar
10
10
 
11
- from sqlalchemy import func, select
11
+ from sqlalchemy import delete, func, select
12
12
  from sqlalchemy.ext.asyncio import AsyncSession
13
13
 
14
14
  from kodit.indexing.models import Index, Snippet
@@ -63,6 +63,17 @@ class IndexRepository:
63
63
  result = await self.session.execute(query)
64
64
  return result.scalar_one_or_none()
65
65
 
66
+ async def get_by_source_id(self, source_id: int) -> Index | None:
67
+ """Get an index by its source ID.
68
+
69
+ Args:
70
+ source_id: The ID of the source to retrieve an index for.
71
+
72
+ """
73
+ query = select(Index).where(Index.source_id == source_id)
74
+ result = await self.session.execute(query)
75
+ return result.scalar_one_or_none()
76
+
66
77
  async def files_for_index(self, index_id: int) -> list[File]:
67
78
  """Get all files for an index.
68
79
 
@@ -82,7 +93,7 @@ class IndexRepository:
82
93
  result = await self.session.execute(query)
83
94
  return list(result.scalars())
84
95
 
85
- async def list_indexes(self) -> list[Index]:
96
+ async def list_indexes(self) -> list[tuple[Index, Source]]:
86
97
  """List all indexes.
87
98
 
88
99
  Returns:
@@ -90,9 +101,11 @@ class IndexRepository:
90
101
  and counts of files and snippets.
91
102
 
92
103
  """
93
- query = select(Index).limit(10)
104
+ query = select(Index, Source).join(
105
+ Source, Index.source_id == Source.id, full=True
106
+ )
94
107
  result = await self.session.execute(query)
95
- return list(result.scalars())
108
+ return list(result.tuples())
96
109
 
97
110
  async def num_snippets_for_index(self, index_id: int) -> int:
98
111
  """Get the number of snippets for an index."""
@@ -120,6 +133,17 @@ class IndexRepository:
120
133
  self.session.add(snippet)
121
134
  await self.session.commit()
122
135
 
136
+ async def delete_all_snippets(self, index_id: int) -> None:
137
+ """Delete all snippets for an index.
138
+
139
+ Args:
140
+ index_id: The ID of the index to delete snippets for.
141
+
142
+ """
143
+ query = delete(Snippet).where(Snippet.index_id == index_id)
144
+ await self.session.execute(query)
145
+ await self.session.commit()
146
+
123
147
  async def get_snippets_for_index(self, index_id: int) -> list[Snippet]:
124
148
  """Get all snippets for an index.
125
149
 
kodit/indexing/service.py CHANGED
@@ -33,6 +33,7 @@ class IndexView(pydantic.BaseModel):
33
33
  id: int
34
34
  created_at: datetime
35
35
  updated_at: datetime | None = None
36
+ source: str | None = None
36
37
  num_snippets: int | None = None
37
38
 
38
39
 
@@ -82,7 +83,10 @@ class IndexService:
82
83
  # Check if the source exists
83
84
  source = await self.source_service.get(source_id)
84
85
 
85
- index = await self.repository.create(source.id)
86
+ # Check if the index already exists
87
+ index = await self.repository.get_by_source_id(source.id)
88
+ if not index:
89
+ index = await self.repository.create(source.id)
86
90
  return IndexView(
87
91
  id=index.id,
88
92
  created_at=index.created_at,
@@ -105,8 +109,9 @@ class IndexService:
105
109
  created_at=index.created_at,
106
110
  updated_at=index.updated_at,
107
111
  num_snippets=await self.repository.num_snippets_for_index(index.id),
112
+ source=source.uri,
108
113
  )
109
- for index in indexes
114
+ for index, source in indexes
110
115
  ]
111
116
 
112
117
  async def run(self, index_id: int) -> None:
@@ -117,6 +122,9 @@ class IndexService:
117
122
  msg = f"Index not found: {index_id}"
118
123
  raise ValueError(msg)
119
124
 
125
+ # First delete all old snippets, if they exist
126
+ await self.repository.delete_all_snippets(index_id)
127
+
120
128
  # Create snippets for supported file types
121
129
  await self._create_snippets(index_id)
122
130
 
kodit/sources/service.py CHANGED
@@ -13,6 +13,7 @@ from hashlib import sha256
13
13
  from pathlib import Path
14
14
 
15
15
  import aiofiles
16
+ import git
16
17
  import pydantic
17
18
  import structlog
18
19
  from tqdm import tqdm
@@ -98,8 +99,19 @@ class SourceService:
98
99
  parsed = urisplit(uri_or_path_like)
99
100
  if parsed.scheme == "file":
100
101
  return await self._create_folder_source(Path(parsed.path))
101
- msg = f"Unsupported source type: {uri_or_path_like}"
102
- raise ValueError(msg)
102
+ if parsed.scheme in ("git", "http", "https") and parsed.path.endswith(
103
+ ".git"
104
+ ):
105
+ return await self._create_git_source(uri_or_path_like)
106
+
107
+ # Try adding a .git suffix, sometimes people just pass the url
108
+ if not uri_or_path_like.endswith(".git"):
109
+ uri_or_path_like = uri_or_path_like + ".git"
110
+ try:
111
+ return await self._create_git_source(uri_or_path_like)
112
+ except ValueError:
113
+ pass
114
+
103
115
  msg = f"Unsupported source type: {uri_or_path_like}"
104
116
  raise ValueError(msg)
105
117
 
@@ -110,46 +122,98 @@ class SourceService:
110
122
  directory: The path to the local directory.
111
123
 
112
124
  Raises:
113
- ValueError: If the folder doesn't exist or is already added.
125
+ ValueError: If the folder doesn't exist.
126
+ SourceAlreadyExistsError: If the folder is already added.
114
127
 
115
128
  """
116
- # Resolve the directory to an absolute path
117
- directory = directory.expanduser().resolve()
129
+ source = await self.repository.get_source_by_uri(directory.as_uri())
130
+ if source:
131
+ self.log.info("Source already exists, reusing...", source_id=source.id)
132
+ else:
133
+ # Resolve the directory to an absolute path
134
+ directory = directory.expanduser().resolve()
135
+
136
+ # Check if the folder exists
137
+ if not directory.exists():
138
+ msg = f"Folder does not exist: {directory}"
139
+ raise ValueError(msg)
140
+
141
+ # Check if the folder is already added
142
+ if await self.repository.get_source_by_uri(directory.as_uri()):
143
+ msg = f"Directory already added: {directory}"
144
+ raise ValueError(msg)
145
+
146
+ # Clone into a local directory
147
+ clone_path = self.clone_dir / directory.as_posix().replace("/", "_")
148
+ clone_path.mkdir(parents=True, exist_ok=True)
149
+
150
+ # Copy all files recursively, preserving directory structure, ignoring
151
+ # hidden files
152
+ shutil.copytree(
153
+ directory,
154
+ clone_path,
155
+ ignore=shutil.ignore_patterns(".*"),
156
+ dirs_exist_ok=True,
157
+ )
118
158
 
119
- # Check if the folder exists
120
- if not directory.exists():
121
- msg = f"Folder does not exist: {directory}"
122
- raise ValueError(msg)
159
+ source = await self.repository.create_source(
160
+ Source(uri=directory.as_uri(), cloned_path=str(clone_path)),
161
+ )
123
162
 
124
- # Check if the folder is already added
125
- if await self.repository.get_source_by_uri(directory.as_uri()):
126
- msg = f"Directory already added: {directory}"
127
- raise ValueError(msg)
163
+ # Add all files to the source
164
+ # Count total files for progress bar
165
+ file_count = sum(1 for _ in clone_path.rglob("*") if _.is_file())
128
166
 
129
- # Clone into a local directory
130
- clone_path = self.clone_dir / directory.as_posix().replace("/", "_")
131
- clone_path.mkdir(parents=True, exist_ok=True)
132
-
133
- # Copy all files recursively, preserving directory structure, ignoring hidden
134
- # files
135
- shutil.copytree(
136
- directory,
137
- clone_path,
138
- ignore=shutil.ignore_patterns(".*"),
139
- dirs_exist_ok=True,
140
- )
167
+ # Process each file in the source directory
168
+ for path in tqdm(clone_path.rglob("*"), total=file_count):
169
+ await self._process_file(source.id, path.absolute())
141
170
 
142
- source = await self.repository.create_source(
143
- Source(uri=directory.as_uri(), cloned_path=str(clone_path)),
171
+ return SourceView(
172
+ id=source.id,
173
+ uri=source.uri,
174
+ cloned_path=Path(source.cloned_path),
175
+ created_at=source.created_at,
176
+ num_files=await self.repository.num_files_for_source(source.id),
144
177
  )
145
178
 
146
- # Add all files to the source
147
- # Count total files for progress bar
148
- file_count = sum(1 for _ in clone_path.rglob("*") if _.is_file())
179
+ async def _create_git_source(self, uri: str) -> SourceView:
180
+ """Create a git source.
181
+
182
+ Args:
183
+ uri: The URI of the git repository.
184
+
185
+ Raises:
186
+ ValueError: If the repository cloning fails.
187
+
188
+ """
189
+ # Check if the repository is already added
190
+ source = await self.repository.get_source_by_uri(uri)
191
+
192
+ if source:
193
+ self.log.info("Source already exists, reusing...", source_id=source.id)
194
+ else:
195
+ # Create a unique directory name for the clone
196
+ clone_path = self.clone_dir / uri.replace("/", "_").replace(":", "_")
197
+ clone_path.mkdir(parents=True, exist_ok=True)
198
+
199
+ try:
200
+ # Clone the repository
201
+ git.Repo.clone_from(uri, clone_path)
202
+ except git.GitCommandError as e:
203
+ msg = f"Failed to clone repository: {e}"
204
+ raise ValueError(msg) from e
205
+
206
+ source = await self.repository.create_source(
207
+ Source(uri=uri, cloned_path=str(clone_path)),
208
+ )
209
+
210
+ # Add all files to the source
211
+ # Count total files for progress bar
212
+ file_count = sum(1 for _ in clone_path.rglob("*") if _.is_file())
149
213
 
150
- # Process each file in the source directory
151
- for path in tqdm(clone_path.rglob("*"), total=file_count):
152
- await self._process_file(source.id, path.absolute())
214
+ # Process each file in the source directory
215
+ for path in tqdm(clone_path.rglob("*"), total=file_count):
216
+ await self._process_file(source.id, path.absolute())
153
217
 
154
218
  return SourceView(
155
219
  id=source.id,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kodit
3
- Version: 0.1.6
3
+ Version: 0.1.8
4
4
  Summary: Code indexing for better AI code generation
5
5
  Project-URL: Homepage, https://docs.helixml.tech/kodit/
6
6
  Project-URL: Documentation, https://docs.helixml.tech/kodit/
@@ -28,6 +28,7 @@ Requires-Dist: colorama>=0.4.6
28
28
  Requires-Dist: dotenv>=0.9.9
29
29
  Requires-Dist: fastapi[standard]>=0.115.12
30
30
  Requires-Dist: fastmcp>=2.3.3
31
+ Requires-Dist: gitpython>=3.1.44
31
32
  Requires-Dist: httpx-retries>=0.3.2
32
33
  Requires-Dist: httpx>=0.28.1
33
34
  Requires-Dist: posthog>=4.0.1
@@ -1,8 +1,8 @@
1
1
  kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
2
2
  kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
3
- kodit/_version.py,sha256=ESbJO0YD7TYfOUv_WDIJJgWELGepEWsoyhqVifEcXPA,511
3
+ kodit/_version.py,sha256=AjUi5zEL_BoWoXMXR1FnWc3mD6FHX7snDXjDHVLoens,511
4
4
  kodit/app.py,sha256=Mr5BFHOHx5zppwjC4XPWVvHjwgl1yrKbUjTWXKubJQM,891
5
- kodit/cli.py,sha256=CNewAn4aCDBbBRKfTaO5hosklCySvJM7afDN04h3lvY,8307
5
+ kodit/cli.py,sha256=bsfURvGKZzpHkChnTlatI0nXHV3KV_6vJnUJ2fQEAfM,6637
6
6
  kodit/config.py,sha256=nlm9U-nVx5riH2SrU1XY4XcCMhQK4DrwO_1H8bPOBjA,2927
7
7
  kodit/database.py,sha256=vtTlmrXHyHJH3Ek-twZTCqEjB0jun-NncALFze2fqhA,2350
8
8
  kodit/logging.py,sha256=cFEQXWI27LzWScSxly9ApwkbBDamUG17pA-jEfVakXQ,5316
@@ -18,8 +18,8 @@ kodit/bm25/__init__.py,sha256=j8zyriNWhbwE5Lbybzg1hQAhANlU9mKHWw4beeUR6og,19
18
18
  kodit/bm25/bm25.py,sha256=3wyNRSrTaYqV7s4R1D6X0NpCf22PuFK2_uc8YapzYLE,2263
19
19
  kodit/indexing/__init__.py,sha256=cPyi2Iej3G1JFWlWr7X80_UrsMaTu5W5rBwgif1B3xo,75
20
20
  kodit/indexing/models.py,sha256=sZIhGwvL4Dw0QTWFxrjfWctSLkAoDT6fv5DlGz8-Fr8,1258
21
- kodit/indexing/repository.py,sha256=kvAlNfMSQYboF0TB1huw2qoBdLJ4UsEPiM7ZG-e6rrg,4300
22
- kodit/indexing/service.py,sha256=eopx_IZeaUjCI-5LeSqZq7W7m76JZsDcVvOFhLquHpI,5426
21
+ kodit/indexing/repository.py,sha256=ZicLPXPKQxW6NnY_anmZ4nI1-FGkrJsqjg0NK-vvnTY,5117
22
+ kodit/indexing/service.py,sha256=rLWYI70VytlJAyZtQC5Xpqtj9f3EzbivzgeM_1L9BUU,5751
23
23
  kodit/retreival/__init__.py,sha256=33PhJU-3gtsqYq6A1UkaLNKbev_Zee9Lq6dYC59-CsA,69
24
24
  kodit/retreival/repository.py,sha256=1lqGgJHsBmvMGMzEYa-hrdXg2q7rqtYPl1cvBb7jMRE,3119
25
25
  kodit/retreival/service.py,sha256=9wvURtPPJVvPUWNIC2waIrJMxcm1Ka1J_xDEOEedAFU,2007
@@ -32,9 +32,9 @@ kodit/snippets/languages/python.scm,sha256=ee85R9PBzwye3IMTE7-iVoKWd_ViU3EJISTyr
32
32
  kodit/sources/__init__.py,sha256=1NTZyPdjThVQpZO1Mp1ColVsS7sqYanOVLqnoqV9Ipo,83
33
33
  kodit/sources/models.py,sha256=xb42CaNDO1CUB8SIW-xXMrB6Ji8cFw-yeJ550xBEg9Q,2398
34
34
  kodit/sources/repository.py,sha256=mGJrHWH6Uo8YABdoojHFbzaf_jW-2ywJpAHIa1gnc3U,3401
35
- kodit/sources/service.py,sha256=cBCxnOQKwGNi2e13_3Vue8MylAaUxb9XG4IgM636la0,6712
36
- kodit-0.1.6.dist-info/METADATA,sha256=1HHbQJLVCV6-1Kim8-_pwXq_HmSYUgBLObA_Q3Ck2l8,2181
37
- kodit-0.1.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
38
- kodit-0.1.6.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
39
- kodit-0.1.6.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
40
- kodit-0.1.6.dist-info/RECORD,,
35
+ kodit/sources/service.py,sha256=aEW43wpYr3AJvXRuYzCHCNqNWUaQSJy8lqr5TYlGIRs,9225
36
+ kodit-0.1.8.dist-info/METADATA,sha256=2TlteBGPvV-oVpOWibx6PzgpzTOfMyCWyLpyBuMpgzs,2214
37
+ kodit-0.1.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
38
+ kodit-0.1.8.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
39
+ kodit-0.1.8.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
40
+ kodit-0.1.8.dist-info/RECORD,,
File without changes