kodit 0.1.6__tar.gz → 0.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- {kodit-0.1.6 → kodit-0.1.8}/PKG-INFO +2 -1
- {kodit-0.1.6 → kodit-0.1.8}/docs/_index.md +3 -3
- {kodit-0.1.6 → kodit-0.1.8}/pyproject.toml +1 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/_version.py +2 -2
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/cli.py +39 -96
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/indexing/repository.py +28 -4
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/indexing/service.py +10 -2
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/sources/service.py +97 -33
- {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/cli_test.py +4 -8
- {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/indexing/test_service.py +2 -3
- {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/sources/test_service.py +40 -3
- {kodit-0.1.6 → kodit-0.1.8}/tests/smoke.sh +4 -8
- {kodit-0.1.6 → kodit-0.1.8}/uv.lock +35 -0
- {kodit-0.1.6 → kodit-0.1.8}/.cursor/rules/kodit.mdc +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/.github/CODE_OF_CONDUCT.md +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/.github/CONTRIBUTING.md +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/.github/workflows/docker.yaml +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/.github/workflows/docs.yaml +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/.github/workflows/pypi-test.yaml +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/.github/workflows/pypi.yaml +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/.github/workflows/test.yaml +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/.gitignore +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/.python-version +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/.vscode/launch.json +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/.vscode/settings.json +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/Dockerfile +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/LICENSE +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/README.md +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/alembic.ini +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/docs/developer/index.md +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/.gitignore +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/__init__.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/alembic/README +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/alembic/__init__.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/alembic/env.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/alembic/script.py.mako +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/alembic/versions/85155663351e_initial.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/alembic/versions/__init__.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/app.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/bm25/__init__.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/bm25/bm25.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/config.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/database.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/indexing/__init__.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/indexing/models.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/logging.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/mcp.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/middleware.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/retreival/__init__.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/retreival/repository.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/retreival/service.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/snippets/__init__.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/snippets/languages/__init__.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/snippets/languages/csharp.scm +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/snippets/languages/python.scm +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/snippets/method_snippets.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/snippets/snippets.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/sources/__init__.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/sources/models.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/src/kodit/sources/repository.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/tests/__init__.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/tests/conftest.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/__init__.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/e2e.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/indexing/__init__.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/mcp_test.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/retreival/__init__.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/retreival/test_service.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/snippets/__init__.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/snippets/csharp.cs +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/snippets/detect_language_test.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/snippets/method_extraction_test.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/snippets/python.py +0 -0
- {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/sources/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kodit
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.8
|
|
4
4
|
Summary: Code indexing for better AI code generation
|
|
5
5
|
Project-URL: Homepage, https://docs.helixml.tech/kodit/
|
|
6
6
|
Project-URL: Documentation, https://docs.helixml.tech/kodit/
|
|
@@ -28,6 +28,7 @@ Requires-Dist: colorama>=0.4.6
|
|
|
28
28
|
Requires-Dist: dotenv>=0.9.9
|
|
29
29
|
Requires-Dist: fastapi[standard]>=0.115.12
|
|
30
30
|
Requires-Dist: fastmcp>=2.3.3
|
|
31
|
+
Requires-Dist: gitpython>=3.1.44
|
|
31
32
|
Requires-Dist: httpx-retries>=0.3.2
|
|
32
33
|
Requires-Dist: httpx>=0.28.1
|
|
33
34
|
Requires-Dist: posthog>=4.0.1
|
|
@@ -52,13 +52,13 @@ Use this if you want to use kodit as a python library:
|
|
|
52
52
|
pip install kodit
|
|
53
53
|
```
|
|
54
54
|
|
|
55
|
-
##
|
|
55
|
+
## Quick Start
|
|
56
56
|
|
|
57
57
|
Kodit has two key parts. A configuration CLI to manage what gets indexed and an MCP
|
|
58
58
|
server to expose your code to an AI coding assistant.
|
|
59
59
|
|
|
60
|
-
1.
|
|
61
|
-
2.
|
|
60
|
+
1. Index a local path: `kodit index /path/to/your/code`
|
|
61
|
+
2. Or index a public git repository: `kodit index https://github.com/pydantic/pydantic-ai`
|
|
62
62
|
3. Test retrieval on your index: `kodit retrieve "test"`
|
|
63
63
|
4. Start an MCP server: `kodit serve`
|
|
64
64
|
|
|
@@ -8,7 +8,7 @@ from typing import Any
|
|
|
8
8
|
import click
|
|
9
9
|
import structlog
|
|
10
10
|
import uvicorn
|
|
11
|
-
from pytable_formatter import Table
|
|
11
|
+
from pytable_formatter import Cell, Table
|
|
12
12
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
13
13
|
|
|
14
14
|
from kodit.config import (
|
|
@@ -84,110 +84,53 @@ def cli( # noqa: PLR0913
|
|
|
84
84
|
ctx.obj = config
|
|
85
85
|
|
|
86
86
|
|
|
87
|
-
@cli.
|
|
88
|
-
|
|
89
|
-
"""Manage code sources."""
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
@sources.command(name="list")
|
|
93
|
-
@with_app_context
|
|
94
|
-
@with_session
|
|
95
|
-
async def list_sources(session: AsyncSession, app_context: AppContext) -> None:
|
|
96
|
-
"""List all code sources."""
|
|
97
|
-
repository = SourceRepository(session)
|
|
98
|
-
service = SourceService(app_context.get_clone_dir(), repository)
|
|
99
|
-
sources = await service.list_sources()
|
|
100
|
-
|
|
101
|
-
# Define headers and data
|
|
102
|
-
headers = ["ID", "Created At", "URI"]
|
|
103
|
-
data = [[source.id, source.created_at, source.uri] for source in sources]
|
|
104
|
-
|
|
105
|
-
# Create and display the table
|
|
106
|
-
table = Table(headers=headers, data=data)
|
|
107
|
-
click.echo(table)
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
@sources.command(name="create")
|
|
111
|
-
@click.argument("uri")
|
|
112
|
-
@with_app_context
|
|
113
|
-
@with_session
|
|
114
|
-
async def create_source(
|
|
115
|
-
session: AsyncSession, app_context: AppContext, uri: str
|
|
116
|
-
) -> None:
|
|
117
|
-
"""Add a new code source."""
|
|
118
|
-
repository = SourceRepository(session)
|
|
119
|
-
service = SourceService(app_context.get_clone_dir(), repository)
|
|
120
|
-
source = await service.create(uri)
|
|
121
|
-
click.echo(f"Source created: {source.id}")
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
@cli.group()
|
|
125
|
-
def indexes() -> None:
|
|
126
|
-
"""Manage indexes."""
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
@indexes.command(name="create")
|
|
130
|
-
@click.argument("source_id")
|
|
87
|
+
@cli.command()
|
|
88
|
+
@click.argument("sources", nargs=-1)
|
|
131
89
|
@with_app_context
|
|
132
90
|
@with_session
|
|
133
|
-
async def
|
|
134
|
-
session: AsyncSession,
|
|
91
|
+
async def index(
|
|
92
|
+
session: AsyncSession,
|
|
93
|
+
app_context: AppContext,
|
|
94
|
+
sources: list[str],
|
|
135
95
|
) -> None:
|
|
136
|
-
"""
|
|
96
|
+
"""List indexes, or index data sources."""
|
|
137
97
|
source_repository = SourceRepository(session)
|
|
138
98
|
source_service = SourceService(app_context.get_clone_dir(), source_repository)
|
|
139
99
|
repository = IndexRepository(session)
|
|
140
100
|
service = IndexService(repository, source_service, app_context.get_data_dir())
|
|
141
|
-
index = await service.create(source_id)
|
|
142
|
-
click.echo(f"Index created: {index.id}")
|
|
143
101
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
service = IndexService(repository, source_service, app_context.get_data_dir())
|
|
154
|
-
indexes = await service.list_indexes()
|
|
155
|
-
|
|
156
|
-
# Define headers and data
|
|
157
|
-
headers = [
|
|
158
|
-
"ID",
|
|
159
|
-
"Created At",
|
|
160
|
-
"Updated At",
|
|
161
|
-
"Num Snippets",
|
|
162
|
-
]
|
|
163
|
-
data = [
|
|
164
|
-
[
|
|
165
|
-
index.id,
|
|
166
|
-
index.created_at,
|
|
167
|
-
index.updated_at,
|
|
168
|
-
index.num_snippets,
|
|
102
|
+
if not sources:
|
|
103
|
+
# No source specified, list all indexes
|
|
104
|
+
indexes = await service.list_indexes()
|
|
105
|
+
headers: list[str | Cell] = [
|
|
106
|
+
"ID",
|
|
107
|
+
"Created At",
|
|
108
|
+
"Updated At",
|
|
109
|
+
"Source",
|
|
110
|
+
"Num Snippets",
|
|
169
111
|
]
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
)
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
112
|
+
data = [
|
|
113
|
+
[
|
|
114
|
+
index.id,
|
|
115
|
+
index.created_at,
|
|
116
|
+
index.updated_at,
|
|
117
|
+
index.source,
|
|
118
|
+
index.num_snippets,
|
|
119
|
+
]
|
|
120
|
+
for index in indexes
|
|
121
|
+
]
|
|
122
|
+
click.echo(Table(headers=headers, data=data))
|
|
123
|
+
return
|
|
124
|
+
# Handle source indexing
|
|
125
|
+
for source in sources:
|
|
126
|
+
if Path(source).is_file():
|
|
127
|
+
msg = "File indexing is not implemented yet"
|
|
128
|
+
raise click.UsageError(msg)
|
|
129
|
+
|
|
130
|
+
# Index directory
|
|
131
|
+
s = await source_service.create(source)
|
|
132
|
+
index = await service.create(s.id)
|
|
133
|
+
await service.run(index.id)
|
|
191
134
|
|
|
192
135
|
|
|
193
136
|
@cli.command()
|
|
@@ -8,7 +8,7 @@ and retrieving index information with their associated metadata.
|
|
|
8
8
|
from datetime import UTC, datetime
|
|
9
9
|
from typing import TypeVar
|
|
10
10
|
|
|
11
|
-
from sqlalchemy import func, select
|
|
11
|
+
from sqlalchemy import delete, func, select
|
|
12
12
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
13
13
|
|
|
14
14
|
from kodit.indexing.models import Index, Snippet
|
|
@@ -63,6 +63,17 @@ class IndexRepository:
|
|
|
63
63
|
result = await self.session.execute(query)
|
|
64
64
|
return result.scalar_one_or_none()
|
|
65
65
|
|
|
66
|
+
async def get_by_source_id(self, source_id: int) -> Index | None:
|
|
67
|
+
"""Get an index by its source ID.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
source_id: The ID of the source to retrieve an index for.
|
|
71
|
+
|
|
72
|
+
"""
|
|
73
|
+
query = select(Index).where(Index.source_id == source_id)
|
|
74
|
+
result = await self.session.execute(query)
|
|
75
|
+
return result.scalar_one_or_none()
|
|
76
|
+
|
|
66
77
|
async def files_for_index(self, index_id: int) -> list[File]:
|
|
67
78
|
"""Get all files for an index.
|
|
68
79
|
|
|
@@ -82,7 +93,7 @@ class IndexRepository:
|
|
|
82
93
|
result = await self.session.execute(query)
|
|
83
94
|
return list(result.scalars())
|
|
84
95
|
|
|
85
|
-
async def list_indexes(self) -> list[Index]:
|
|
96
|
+
async def list_indexes(self) -> list[tuple[Index, Source]]:
|
|
86
97
|
"""List all indexes.
|
|
87
98
|
|
|
88
99
|
Returns:
|
|
@@ -90,9 +101,11 @@ class IndexRepository:
|
|
|
90
101
|
and counts of files and snippets.
|
|
91
102
|
|
|
92
103
|
"""
|
|
93
|
-
query = select(Index).
|
|
104
|
+
query = select(Index, Source).join(
|
|
105
|
+
Source, Index.source_id == Source.id, full=True
|
|
106
|
+
)
|
|
94
107
|
result = await self.session.execute(query)
|
|
95
|
-
return list(result.
|
|
108
|
+
return list(result.tuples())
|
|
96
109
|
|
|
97
110
|
async def num_snippets_for_index(self, index_id: int) -> int:
|
|
98
111
|
"""Get the number of snippets for an index."""
|
|
@@ -120,6 +133,17 @@ class IndexRepository:
|
|
|
120
133
|
self.session.add(snippet)
|
|
121
134
|
await self.session.commit()
|
|
122
135
|
|
|
136
|
+
async def delete_all_snippets(self, index_id: int) -> None:
|
|
137
|
+
"""Delete all snippets for an index.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
index_id: The ID of the index to delete snippets for.
|
|
141
|
+
|
|
142
|
+
"""
|
|
143
|
+
query = delete(Snippet).where(Snippet.index_id == index_id)
|
|
144
|
+
await self.session.execute(query)
|
|
145
|
+
await self.session.commit()
|
|
146
|
+
|
|
123
147
|
async def get_snippets_for_index(self, index_id: int) -> list[Snippet]:
|
|
124
148
|
"""Get all snippets for an index.
|
|
125
149
|
|
|
@@ -33,6 +33,7 @@ class IndexView(pydantic.BaseModel):
|
|
|
33
33
|
id: int
|
|
34
34
|
created_at: datetime
|
|
35
35
|
updated_at: datetime | None = None
|
|
36
|
+
source: str | None = None
|
|
36
37
|
num_snippets: int | None = None
|
|
37
38
|
|
|
38
39
|
|
|
@@ -82,7 +83,10 @@ class IndexService:
|
|
|
82
83
|
# Check if the source exists
|
|
83
84
|
source = await self.source_service.get(source_id)
|
|
84
85
|
|
|
85
|
-
index
|
|
86
|
+
# Check if the index already exists
|
|
87
|
+
index = await self.repository.get_by_source_id(source.id)
|
|
88
|
+
if not index:
|
|
89
|
+
index = await self.repository.create(source.id)
|
|
86
90
|
return IndexView(
|
|
87
91
|
id=index.id,
|
|
88
92
|
created_at=index.created_at,
|
|
@@ -105,8 +109,9 @@ class IndexService:
|
|
|
105
109
|
created_at=index.created_at,
|
|
106
110
|
updated_at=index.updated_at,
|
|
107
111
|
num_snippets=await self.repository.num_snippets_for_index(index.id),
|
|
112
|
+
source=source.uri,
|
|
108
113
|
)
|
|
109
|
-
for index in indexes
|
|
114
|
+
for index, source in indexes
|
|
110
115
|
]
|
|
111
116
|
|
|
112
117
|
async def run(self, index_id: int) -> None:
|
|
@@ -117,6 +122,9 @@ class IndexService:
|
|
|
117
122
|
msg = f"Index not found: {index_id}"
|
|
118
123
|
raise ValueError(msg)
|
|
119
124
|
|
|
125
|
+
# First delete all old snippets, if they exist
|
|
126
|
+
await self.repository.delete_all_snippets(index_id)
|
|
127
|
+
|
|
120
128
|
# Create snippets for supported file types
|
|
121
129
|
await self._create_snippets(index_id)
|
|
122
130
|
|
|
@@ -13,6 +13,7 @@ from hashlib import sha256
|
|
|
13
13
|
from pathlib import Path
|
|
14
14
|
|
|
15
15
|
import aiofiles
|
|
16
|
+
import git
|
|
16
17
|
import pydantic
|
|
17
18
|
import structlog
|
|
18
19
|
from tqdm import tqdm
|
|
@@ -98,8 +99,19 @@ class SourceService:
|
|
|
98
99
|
parsed = urisplit(uri_or_path_like)
|
|
99
100
|
if parsed.scheme == "file":
|
|
100
101
|
return await self._create_folder_source(Path(parsed.path))
|
|
101
|
-
|
|
102
|
-
|
|
102
|
+
if parsed.scheme in ("git", "http", "https") and parsed.path.endswith(
|
|
103
|
+
".git"
|
|
104
|
+
):
|
|
105
|
+
return await self._create_git_source(uri_or_path_like)
|
|
106
|
+
|
|
107
|
+
# Try adding a .git suffix, sometimes people just pass the url
|
|
108
|
+
if not uri_or_path_like.endswith(".git"):
|
|
109
|
+
uri_or_path_like = uri_or_path_like + ".git"
|
|
110
|
+
try:
|
|
111
|
+
return await self._create_git_source(uri_or_path_like)
|
|
112
|
+
except ValueError:
|
|
113
|
+
pass
|
|
114
|
+
|
|
103
115
|
msg = f"Unsupported source type: {uri_or_path_like}"
|
|
104
116
|
raise ValueError(msg)
|
|
105
117
|
|
|
@@ -110,46 +122,98 @@ class SourceService:
|
|
|
110
122
|
directory: The path to the local directory.
|
|
111
123
|
|
|
112
124
|
Raises:
|
|
113
|
-
ValueError: If the folder doesn't exist
|
|
125
|
+
ValueError: If the folder doesn't exist.
|
|
126
|
+
SourceAlreadyExistsError: If the folder is already added.
|
|
114
127
|
|
|
115
128
|
"""
|
|
116
|
-
|
|
117
|
-
|
|
129
|
+
source = await self.repository.get_source_by_uri(directory.as_uri())
|
|
130
|
+
if source:
|
|
131
|
+
self.log.info("Source already exists, reusing...", source_id=source.id)
|
|
132
|
+
else:
|
|
133
|
+
# Resolve the directory to an absolute path
|
|
134
|
+
directory = directory.expanduser().resolve()
|
|
135
|
+
|
|
136
|
+
# Check if the folder exists
|
|
137
|
+
if not directory.exists():
|
|
138
|
+
msg = f"Folder does not exist: {directory}"
|
|
139
|
+
raise ValueError(msg)
|
|
140
|
+
|
|
141
|
+
# Check if the folder is already added
|
|
142
|
+
if await self.repository.get_source_by_uri(directory.as_uri()):
|
|
143
|
+
msg = f"Directory already added: {directory}"
|
|
144
|
+
raise ValueError(msg)
|
|
145
|
+
|
|
146
|
+
# Clone into a local directory
|
|
147
|
+
clone_path = self.clone_dir / directory.as_posix().replace("/", "_")
|
|
148
|
+
clone_path.mkdir(parents=True, exist_ok=True)
|
|
149
|
+
|
|
150
|
+
# Copy all files recursively, preserving directory structure, ignoring
|
|
151
|
+
# hidden files
|
|
152
|
+
shutil.copytree(
|
|
153
|
+
directory,
|
|
154
|
+
clone_path,
|
|
155
|
+
ignore=shutil.ignore_patterns(".*"),
|
|
156
|
+
dirs_exist_ok=True,
|
|
157
|
+
)
|
|
118
158
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
raise ValueError(msg)
|
|
159
|
+
source = await self.repository.create_source(
|
|
160
|
+
Source(uri=directory.as_uri(), cloned_path=str(clone_path)),
|
|
161
|
+
)
|
|
123
162
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
raise ValueError(msg)
|
|
163
|
+
# Add all files to the source
|
|
164
|
+
# Count total files for progress bar
|
|
165
|
+
file_count = sum(1 for _ in clone_path.rglob("*") if _.is_file())
|
|
128
166
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
# Copy all files recursively, preserving directory structure, ignoring hidden
|
|
134
|
-
# files
|
|
135
|
-
shutil.copytree(
|
|
136
|
-
directory,
|
|
137
|
-
clone_path,
|
|
138
|
-
ignore=shutil.ignore_patterns(".*"),
|
|
139
|
-
dirs_exist_ok=True,
|
|
140
|
-
)
|
|
167
|
+
# Process each file in the source directory
|
|
168
|
+
for path in tqdm(clone_path.rglob("*"), total=file_count):
|
|
169
|
+
await self._process_file(source.id, path.absolute())
|
|
141
170
|
|
|
142
|
-
|
|
143
|
-
|
|
171
|
+
return SourceView(
|
|
172
|
+
id=source.id,
|
|
173
|
+
uri=source.uri,
|
|
174
|
+
cloned_path=Path(source.cloned_path),
|
|
175
|
+
created_at=source.created_at,
|
|
176
|
+
num_files=await self.repository.num_files_for_source(source.id),
|
|
144
177
|
)
|
|
145
178
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
179
|
+
async def _create_git_source(self, uri: str) -> SourceView:
|
|
180
|
+
"""Create a git source.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
uri: The URI of the git repository.
|
|
184
|
+
|
|
185
|
+
Raises:
|
|
186
|
+
ValueError: If the repository cloning fails.
|
|
187
|
+
|
|
188
|
+
"""
|
|
189
|
+
# Check if the repository is already added
|
|
190
|
+
source = await self.repository.get_source_by_uri(uri)
|
|
191
|
+
|
|
192
|
+
if source:
|
|
193
|
+
self.log.info("Source already exists, reusing...", source_id=source.id)
|
|
194
|
+
else:
|
|
195
|
+
# Create a unique directory name for the clone
|
|
196
|
+
clone_path = self.clone_dir / uri.replace("/", "_").replace(":", "_")
|
|
197
|
+
clone_path.mkdir(parents=True, exist_ok=True)
|
|
198
|
+
|
|
199
|
+
try:
|
|
200
|
+
# Clone the repository
|
|
201
|
+
git.Repo.clone_from(uri, clone_path)
|
|
202
|
+
except git.GitCommandError as e:
|
|
203
|
+
msg = f"Failed to clone repository: {e}"
|
|
204
|
+
raise ValueError(msg) from e
|
|
205
|
+
|
|
206
|
+
source = await self.repository.create_source(
|
|
207
|
+
Source(uri=uri, cloned_path=str(clone_path)),
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
# Add all files to the source
|
|
211
|
+
# Count total files for progress bar
|
|
212
|
+
file_count = sum(1 for _ in clone_path.rglob("*") if _.is_file())
|
|
149
213
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
214
|
+
# Process each file in the source directory
|
|
215
|
+
for path in tqdm(clone_path.rglob("*"), total=file_count):
|
|
216
|
+
await self._process_file(source.id, path.absolute())
|
|
153
217
|
|
|
154
218
|
return SourceView(
|
|
155
219
|
id=source.id,
|
|
@@ -37,9 +37,7 @@ def test_version_command(runner: CliRunner, default_cli_args: list[str]) -> None
|
|
|
37
37
|
def test_cli_vars_work(runner: CliRunner, default_cli_args: list[str]) -> None:
|
|
38
38
|
"""Test that cli args override env vars."""
|
|
39
39
|
runner.env = {"LOG_LEVEL": "INFO"}
|
|
40
|
-
result = runner.invoke(
|
|
41
|
-
cli, [*default_cli_args, "--log-level", "DEBUG", "sources", "list"]
|
|
42
|
-
)
|
|
40
|
+
result = runner.invoke(cli, [*default_cli_args, "--log-level", "DEBUG", "index"])
|
|
43
41
|
assert result.exit_code == 0
|
|
44
42
|
assert result.output.count("debug") > 10 # The db spits out lots of debug messages
|
|
45
43
|
|
|
@@ -47,7 +45,7 @@ def test_cli_vars_work(runner: CliRunner, default_cli_args: list[str]) -> None:
|
|
|
47
45
|
def test_env_vars_work(runner: CliRunner, default_cli_args: list[str]) -> None:
|
|
48
46
|
"""Test that env vars work."""
|
|
49
47
|
runner.env = {"LOG_LEVEL": "DEBUG"}
|
|
50
|
-
result = runner.invoke(cli, [*default_cli_args, "
|
|
48
|
+
result = runner.invoke(cli, [*default_cli_args, "index"])
|
|
51
49
|
assert result.exit_code == 0
|
|
52
50
|
assert result.output.count("debug") > 10 # The db spits out lots of debug messages
|
|
53
51
|
|
|
@@ -57,9 +55,7 @@ def test_dotenv_file_works(runner: CliRunner, default_cli_args: list[str]) -> No
|
|
|
57
55
|
with tempfile.NamedTemporaryFile(delete=False) as f:
|
|
58
56
|
f.write(b"LOG_LEVEL=DEBUG")
|
|
59
57
|
f.flush()
|
|
60
|
-
result = runner.invoke(
|
|
61
|
-
cli, [*default_cli_args, "--env-file", f.name, "sources", "list"]
|
|
62
|
-
)
|
|
58
|
+
result = runner.invoke(cli, [*default_cli_args, "--env-file", f.name, "index"])
|
|
63
59
|
assert result.exit_code == 0
|
|
64
60
|
assert (
|
|
65
61
|
result.output.count("debug") > 10
|
|
@@ -69,7 +65,7 @@ def test_dotenv_file_works(runner: CliRunner, default_cli_args: list[str]) -> No
|
|
|
69
65
|
def test_dotenv_file_not_found(runner: CliRunner, default_cli_args: list[str]) -> None:
|
|
70
66
|
"""Test that the .env file not found error is raised."""
|
|
71
67
|
result = runner.invoke(
|
|
72
|
-
cli, [*default_cli_args, "--env-file", "nonexistent.env", "
|
|
68
|
+
cli, [*default_cli_args, "--env-file", "nonexistent.env", "index"]
|
|
73
69
|
)
|
|
74
70
|
assert result.exit_code == 2
|
|
75
71
|
assert "does not exist" in result.output
|
|
@@ -88,9 +88,8 @@ async def test_create_index_already_exists(
|
|
|
88
88
|
# Create first index
|
|
89
89
|
await service.create(source.id)
|
|
90
90
|
|
|
91
|
-
# Try to create second index
|
|
92
|
-
|
|
93
|
-
await service.create(source.id)
|
|
91
|
+
# Try to create second index, should be fine
|
|
92
|
+
await service.create(source.id)
|
|
94
93
|
|
|
95
94
|
|
|
96
95
|
@pytest.mark.asyncio
|
|
@@ -2,7 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
from datetime import UTC, datetime, timedelta
|
|
4
4
|
from pathlib import Path
|
|
5
|
+
import shutil
|
|
5
6
|
|
|
7
|
+
import git
|
|
6
8
|
import pytest
|
|
7
9
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
8
10
|
|
|
@@ -55,9 +57,8 @@ async def test_create_source_already_added(
|
|
|
55
57
|
# Create a folder source
|
|
56
58
|
await service.create(str(test_dir))
|
|
57
59
|
|
|
58
|
-
# Try to create the same source again
|
|
59
|
-
|
|
60
|
-
await service.create(str(test_dir))
|
|
60
|
+
# Try to create the same source again, should be fine
|
|
61
|
+
await service.create(str(test_dir))
|
|
61
62
|
|
|
62
63
|
|
|
63
64
|
@pytest.mark.asyncio
|
|
@@ -108,3 +109,39 @@ async def test_create_source_list_source(
|
|
|
108
109
|
assert not (cloned_path / ".hidden-file").exists()
|
|
109
110
|
assert (cloned_path / "file1.txt").exists()
|
|
110
111
|
assert (cloned_path / "subdir" / "file2.txt").exists()
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@pytest.mark.asyncio
|
|
115
|
+
async def test_create_git_source(service: SourceService, tmp_path: Path) -> None:
|
|
116
|
+
"""Test creating a git source."""
|
|
117
|
+
# Create a temporary git repository
|
|
118
|
+
repo_path = tmp_path / "test_repo"
|
|
119
|
+
repo_path.mkdir()
|
|
120
|
+
repo = git.Repo.init(repo_path)
|
|
121
|
+
|
|
122
|
+
# Add some files to the repository
|
|
123
|
+
(repo_path / "file1.txt").write_text("Hello, world!")
|
|
124
|
+
(repo_path / "subdir").mkdir()
|
|
125
|
+
(repo_path / "subdir" / "file2.txt").write_text("Hello, world!")
|
|
126
|
+
|
|
127
|
+
# Commit the files
|
|
128
|
+
repo.index.add(["file1.txt", "subdir/file2.txt"])
|
|
129
|
+
repo.index.commit("Initial commit")
|
|
130
|
+
|
|
131
|
+
# Create a git source
|
|
132
|
+
source = await service.create(repo_path.as_uri())
|
|
133
|
+
assert source.id is not None
|
|
134
|
+
assert source.uri == repo_path.as_uri()
|
|
135
|
+
assert source.cloned_path.is_dir()
|
|
136
|
+
assert source.created_at is not None
|
|
137
|
+
assert source.num_files == 2
|
|
138
|
+
|
|
139
|
+
# Check that the files are present in the cloned directory
|
|
140
|
+
cloned_path = Path(source.cloned_path)
|
|
141
|
+
assert cloned_path.exists()
|
|
142
|
+
assert cloned_path.is_dir()
|
|
143
|
+
assert (cloned_path / "file1.txt").exists()
|
|
144
|
+
assert (cloned_path / "subdir" / "file2.txt").exists()
|
|
145
|
+
|
|
146
|
+
# Clean up
|
|
147
|
+
shutil.rmtree(repo_path)
|
|
@@ -24,14 +24,10 @@ echo "print('Hello, world!')" > $tmp_dir/test.py
|
|
|
24
24
|
# Test version command
|
|
25
25
|
$prefix kodit version
|
|
26
26
|
|
|
27
|
-
# Test
|
|
28
|
-
$prefix kodit
|
|
29
|
-
$prefix kodit
|
|
30
|
-
|
|
31
|
-
# Test indexes commands
|
|
32
|
-
$prefix kodit indexes list
|
|
33
|
-
$prefix kodit indexes create 1
|
|
34
|
-
$prefix kodit indexes run 1
|
|
27
|
+
# Test index command
|
|
28
|
+
$prefix kodit index $tmp_dir
|
|
29
|
+
$prefix kodit index https://github.com/winderai/analytics-ai-agent-demo
|
|
30
|
+
$prefix kodit index
|
|
35
31
|
|
|
36
32
|
# Test retrieve command
|
|
37
33
|
$prefix kodit retrieve "Hello"
|
|
@@ -433,6 +433,30 @@ wheels = [
|
|
|
433
433
|
{ url = "https://files.pythonhosted.org/packages/a0/e6/310d1fe6708b7338e1f48915a13d8bf00fd0599acdc7bf98da4fd20fcb66/fastmcp-2.3.4-py3-none-any.whl", hash = "sha256:12a45f72dd95aeaa1a6a56281fff96ca46929def3ccd9f9eb125cb97b722fbab", size = 96393, upload-time = "2025-05-15T00:54:49.714Z" },
|
|
434
434
|
]
|
|
435
435
|
|
|
436
|
+
[[package]]
|
|
437
|
+
name = "gitdb"
|
|
438
|
+
version = "4.0.12"
|
|
439
|
+
source = { registry = "https://pypi.org/simple/" }
|
|
440
|
+
dependencies = [
|
|
441
|
+
{ name = "smmap" },
|
|
442
|
+
]
|
|
443
|
+
sdist = { url = "https://files.pythonhosted.org/packages/72/94/63b0fc47eb32792c7ba1fe1b694daec9a63620db1e313033d18140c2320a/gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571", size = 394684, upload-time = "2025-01-02T07:20:46.413Z" }
|
|
444
|
+
wheels = [
|
|
445
|
+
{ url = "https://files.pythonhosted.org/packages/a0/61/5c78b91c3143ed5c14207f463aecfc8f9dbb5092fb2869baf37c273b2705/gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf", size = 62794, upload-time = "2025-01-02T07:20:43.624Z" },
|
|
446
|
+
]
|
|
447
|
+
|
|
448
|
+
[[package]]
|
|
449
|
+
name = "gitpython"
|
|
450
|
+
version = "3.1.44"
|
|
451
|
+
source = { registry = "https://pypi.org/simple/" }
|
|
452
|
+
dependencies = [
|
|
453
|
+
{ name = "gitdb" },
|
|
454
|
+
]
|
|
455
|
+
sdist = { url = "https://files.pythonhosted.org/packages/c0/89/37df0b71473153574a5cdef8f242de422a0f5d26d7a9e231e6f169b4ad14/gitpython-3.1.44.tar.gz", hash = "sha256:c87e30b26253bf5418b01b0660f818967f3c503193838337fe5e573331249269", size = 214196, upload-time = "2025-01-02T07:32:43.59Z" }
|
|
456
|
+
wheels = [
|
|
457
|
+
{ url = "https://files.pythonhosted.org/packages/1d/9a/4114a9057db2f1462d5c8f8390ab7383925fe1ac012eaa42402ad65c2963/GitPython-3.1.44-py3-none-any.whl", hash = "sha256:9e0e10cda9bed1ee64bc9a6de50e7e38a9c9943241cd7f585f6df3ed28011110", size = 207599, upload-time = "2025-01-02T07:32:40.731Z" },
|
|
458
|
+
]
|
|
459
|
+
|
|
436
460
|
[[package]]
|
|
437
461
|
name = "greenlet"
|
|
438
462
|
version = "3.2.2"
|
|
@@ -691,6 +715,7 @@ dependencies = [
|
|
|
691
715
|
{ name = "dotenv" },
|
|
692
716
|
{ name = "fastapi", extra = ["standard"] },
|
|
693
717
|
{ name = "fastmcp" },
|
|
718
|
+
{ name = "gitpython" },
|
|
694
719
|
{ name = "httpx" },
|
|
695
720
|
{ name = "httpx-retries" },
|
|
696
721
|
{ name = "posthog" },
|
|
@@ -727,6 +752,7 @@ requires-dist = [
|
|
|
727
752
|
{ name = "dotenv", specifier = ">=0.9.9" },
|
|
728
753
|
{ name = "fastapi", extras = ["standard"], specifier = ">=0.115.12" },
|
|
729
754
|
{ name = "fastmcp", specifier = ">=2.3.3" },
|
|
755
|
+
{ name = "gitpython", specifier = ">=3.1.44" },
|
|
730
756
|
{ name = "httpx", specifier = ">=0.28.1" },
|
|
731
757
|
{ name = "httpx-retries", specifier = ">=0.3.2" },
|
|
732
758
|
{ name = "posthog", specifier = ">=4.0.1" },
|
|
@@ -1528,6 +1554,15 @@ wheels = [
|
|
|
1528
1554
|
{ url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
|
|
1529
1555
|
]
|
|
1530
1556
|
|
|
1557
|
+
[[package]]
|
|
1558
|
+
name = "smmap"
|
|
1559
|
+
version = "5.0.2"
|
|
1560
|
+
source = { registry = "https://pypi.org/simple/" }
|
|
1561
|
+
sdist = { url = "https://files.pythonhosted.org/packages/44/cd/a040c4b3119bbe532e5b0732286f805445375489fceaec1f48306068ee3b/smmap-5.0.2.tar.gz", hash = "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5", size = 22329, upload-time = "2025-01-02T07:14:40.909Z" }
|
|
1562
|
+
wheels = [
|
|
1563
|
+
{ url = "https://files.pythonhosted.org/packages/04/be/d09147ad1ec7934636ad912901c5fd7667e1c858e19d355237db0d0cd5e4/smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e", size = 24303, upload-time = "2025-01-02T07:14:38.724Z" },
|
|
1564
|
+
]
|
|
1565
|
+
|
|
1531
1566
|
[[package]]
|
|
1532
1567
|
name = "sniffio"
|
|
1533
1568
|
version = "1.3.1"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|