kodit 0.1.6__tar.gz → 0.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (77) hide show
  1. {kodit-0.1.6 → kodit-0.1.8}/PKG-INFO +2 -1
  2. {kodit-0.1.6 → kodit-0.1.8}/docs/_index.md +3 -3
  3. {kodit-0.1.6 → kodit-0.1.8}/pyproject.toml +1 -0
  4. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/_version.py +2 -2
  5. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/cli.py +39 -96
  6. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/indexing/repository.py +28 -4
  7. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/indexing/service.py +10 -2
  8. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/sources/service.py +97 -33
  9. {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/cli_test.py +4 -8
  10. {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/indexing/test_service.py +2 -3
  11. {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/sources/test_service.py +40 -3
  12. {kodit-0.1.6 → kodit-0.1.8}/tests/smoke.sh +4 -8
  13. {kodit-0.1.6 → kodit-0.1.8}/uv.lock +35 -0
  14. {kodit-0.1.6 → kodit-0.1.8}/.cursor/rules/kodit.mdc +0 -0
  15. {kodit-0.1.6 → kodit-0.1.8}/.github/CODE_OF_CONDUCT.md +0 -0
  16. {kodit-0.1.6 → kodit-0.1.8}/.github/CONTRIBUTING.md +0 -0
  17. {kodit-0.1.6 → kodit-0.1.8}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  18. {kodit-0.1.6 → kodit-0.1.8}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  19. {kodit-0.1.6 → kodit-0.1.8}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  20. {kodit-0.1.6 → kodit-0.1.8}/.github/workflows/docker.yaml +0 -0
  21. {kodit-0.1.6 → kodit-0.1.8}/.github/workflows/docs.yaml +0 -0
  22. {kodit-0.1.6 → kodit-0.1.8}/.github/workflows/pypi-test.yaml +0 -0
  23. {kodit-0.1.6 → kodit-0.1.8}/.github/workflows/pypi.yaml +0 -0
  24. {kodit-0.1.6 → kodit-0.1.8}/.github/workflows/test.yaml +0 -0
  25. {kodit-0.1.6 → kodit-0.1.8}/.gitignore +0 -0
  26. {kodit-0.1.6 → kodit-0.1.8}/.python-version +0 -0
  27. {kodit-0.1.6 → kodit-0.1.8}/.vscode/launch.json +0 -0
  28. {kodit-0.1.6 → kodit-0.1.8}/.vscode/settings.json +0 -0
  29. {kodit-0.1.6 → kodit-0.1.8}/Dockerfile +0 -0
  30. {kodit-0.1.6 → kodit-0.1.8}/LICENSE +0 -0
  31. {kodit-0.1.6 → kodit-0.1.8}/README.md +0 -0
  32. {kodit-0.1.6 → kodit-0.1.8}/alembic.ini +0 -0
  33. {kodit-0.1.6 → kodit-0.1.8}/docs/developer/index.md +0 -0
  34. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/.gitignore +0 -0
  35. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/__init__.py +0 -0
  36. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/alembic/README +0 -0
  37. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/alembic/__init__.py +0 -0
  38. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/alembic/env.py +0 -0
  39. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/alembic/script.py.mako +0 -0
  40. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/alembic/versions/85155663351e_initial.py +0 -0
  41. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/alembic/versions/__init__.py +0 -0
  42. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/app.py +0 -0
  43. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/bm25/__init__.py +0 -0
  44. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/bm25/bm25.py +0 -0
  45. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/config.py +0 -0
  46. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/database.py +0 -0
  47. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/indexing/__init__.py +0 -0
  48. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/indexing/models.py +0 -0
  49. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/logging.py +0 -0
  50. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/mcp.py +0 -0
  51. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/middleware.py +0 -0
  52. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/retreival/__init__.py +0 -0
  53. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/retreival/repository.py +0 -0
  54. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/retreival/service.py +0 -0
  55. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/snippets/__init__.py +0 -0
  56. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/snippets/languages/__init__.py +0 -0
  57. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/snippets/languages/csharp.scm +0 -0
  58. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/snippets/languages/python.scm +0 -0
  59. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/snippets/method_snippets.py +0 -0
  60. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/snippets/snippets.py +0 -0
  61. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/sources/__init__.py +0 -0
  62. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/sources/models.py +0 -0
  63. {kodit-0.1.6 → kodit-0.1.8}/src/kodit/sources/repository.py +0 -0
  64. {kodit-0.1.6 → kodit-0.1.8}/tests/__init__.py +0 -0
  65. {kodit-0.1.6 → kodit-0.1.8}/tests/conftest.py +0 -0
  66. {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/__init__.py +0 -0
  67. {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/e2e.py +0 -0
  68. {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/indexing/__init__.py +0 -0
  69. {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/mcp_test.py +0 -0
  70. {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/retreival/__init__.py +0 -0
  71. {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/retreival/test_service.py +0 -0
  72. {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/snippets/__init__.py +0 -0
  73. {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/snippets/csharp.cs +0 -0
  74. {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/snippets/detect_language_test.py +0 -0
  75. {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/snippets/method_extraction_test.py +0 -0
  76. {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/snippets/python.py +0 -0
  77. {kodit-0.1.6 → kodit-0.1.8}/tests/kodit/sources/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kodit
3
- Version: 0.1.6
3
+ Version: 0.1.8
4
4
  Summary: Code indexing for better AI code generation
5
5
  Project-URL: Homepage, https://docs.helixml.tech/kodit/
6
6
  Project-URL: Documentation, https://docs.helixml.tech/kodit/
@@ -28,6 +28,7 @@ Requires-Dist: colorama>=0.4.6
28
28
  Requires-Dist: dotenv>=0.9.9
29
29
  Requires-Dist: fastapi[standard]>=0.115.12
30
30
  Requires-Dist: fastmcp>=2.3.3
31
+ Requires-Dist: gitpython>=3.1.44
31
32
  Requires-Dist: httpx-retries>=0.3.2
32
33
  Requires-Dist: httpx>=0.28.1
33
34
  Requires-Dist: posthog>=4.0.1
@@ -52,13 +52,13 @@ Use this if you want to use kodit as a python library:
52
52
  pip install kodit
53
53
  ```
54
54
 
55
- ## Usage
55
+ ## Quick Start
56
56
 
57
57
  Kodit has two key parts. A configuration CLI to manage what gets indexed and an MCP
58
58
  server to expose your code to an AI coding assistant.
59
59
 
60
- 1. Add a source: `kodit sources create /path/to/your/code`
61
- 2. Create an index on that source: `kodit indexes create 1`
60
+ 1. Index a local path: `kodit index /path/to/your/code`
61
+ 2. Or index a public git repository: `kodit index https://github.com/pydantic/pydantic-ai`
62
62
  3. Test retrieval on your index: `kodit retrieve "test"`
63
63
  4. Start an MCP server: `kodit serve`
64
64
 
@@ -43,6 +43,7 @@ dependencies = [
43
43
  "fastmcp>=2.3.3",
44
44
  "pydantic-settings>=2.9.1",
45
45
  "bm25s[core]>=0.2.12",
46
+ "gitpython>=3.1.44",
46
47
  ]
47
48
 
48
49
  [dependency-groups]
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.1.6'
21
- __version_tuple__ = version_tuple = (0, 1, 6)
20
+ __version__ = version = '0.1.8'
21
+ __version_tuple__ = version_tuple = (0, 1, 8)
@@ -8,7 +8,7 @@ from typing import Any
8
8
  import click
9
9
  import structlog
10
10
  import uvicorn
11
- from pytable_formatter import Table
11
+ from pytable_formatter import Cell, Table
12
12
  from sqlalchemy.ext.asyncio import AsyncSession
13
13
 
14
14
  from kodit.config import (
@@ -84,110 +84,53 @@ def cli( # noqa: PLR0913
84
84
  ctx.obj = config
85
85
 
86
86
 
87
- @cli.group()
88
- def sources() -> None:
89
- """Manage code sources."""
90
-
91
-
92
- @sources.command(name="list")
93
- @with_app_context
94
- @with_session
95
- async def list_sources(session: AsyncSession, app_context: AppContext) -> None:
96
- """List all code sources."""
97
- repository = SourceRepository(session)
98
- service = SourceService(app_context.get_clone_dir(), repository)
99
- sources = await service.list_sources()
100
-
101
- # Define headers and data
102
- headers = ["ID", "Created At", "URI"]
103
- data = [[source.id, source.created_at, source.uri] for source in sources]
104
-
105
- # Create and display the table
106
- table = Table(headers=headers, data=data)
107
- click.echo(table)
108
-
109
-
110
- @sources.command(name="create")
111
- @click.argument("uri")
112
- @with_app_context
113
- @with_session
114
- async def create_source(
115
- session: AsyncSession, app_context: AppContext, uri: str
116
- ) -> None:
117
- """Add a new code source."""
118
- repository = SourceRepository(session)
119
- service = SourceService(app_context.get_clone_dir(), repository)
120
- source = await service.create(uri)
121
- click.echo(f"Source created: {source.id}")
122
-
123
-
124
- @cli.group()
125
- def indexes() -> None:
126
- """Manage indexes."""
127
-
128
-
129
- @indexes.command(name="create")
130
- @click.argument("source_id")
87
+ @cli.command()
88
+ @click.argument("sources", nargs=-1)
131
89
  @with_app_context
132
90
  @with_session
133
- async def create_index(
134
- session: AsyncSession, app_context: AppContext, source_id: int
91
+ async def index(
92
+ session: AsyncSession,
93
+ app_context: AppContext,
94
+ sources: list[str],
135
95
  ) -> None:
136
- """Create an index for a source."""
96
+ """List indexes, or index data sources."""
137
97
  source_repository = SourceRepository(session)
138
98
  source_service = SourceService(app_context.get_clone_dir(), source_repository)
139
99
  repository = IndexRepository(session)
140
100
  service = IndexService(repository, source_service, app_context.get_data_dir())
141
- index = await service.create(source_id)
142
- click.echo(f"Index created: {index.id}")
143
101
 
144
-
145
- @indexes.command(name="list")
146
- @with_app_context
147
- @with_session
148
- async def list_indexes(session: AsyncSession, app_context: AppContext) -> None:
149
- """List all indexes."""
150
- source_repository = SourceRepository(session)
151
- source_service = SourceService(app_context.get_clone_dir(), source_repository)
152
- repository = IndexRepository(session)
153
- service = IndexService(repository, source_service, app_context.get_data_dir())
154
- indexes = await service.list_indexes()
155
-
156
- # Define headers and data
157
- headers = [
158
- "ID",
159
- "Created At",
160
- "Updated At",
161
- "Num Snippets",
162
- ]
163
- data = [
164
- [
165
- index.id,
166
- index.created_at,
167
- index.updated_at,
168
- index.num_snippets,
102
+ if not sources:
103
+ # No source specified, list all indexes
104
+ indexes = await service.list_indexes()
105
+ headers: list[str | Cell] = [
106
+ "ID",
107
+ "Created At",
108
+ "Updated At",
109
+ "Source",
110
+ "Num Snippets",
169
111
  ]
170
- for index in indexes
171
- ]
172
-
173
- # Create and display the table
174
- table = Table(headers=headers, data=data)
175
- click.echo(table)
176
-
177
-
178
- @indexes.command(name="run")
179
- @click.argument("index_id")
180
- @with_app_context
181
- @with_session
182
- async def run_index(
183
- session: AsyncSession, app_context: AppContext, index_id: int
184
- ) -> None:
185
- """Run an index."""
186
- source_repository = SourceRepository(session)
187
- source_service = SourceService(app_context.get_clone_dir(), source_repository)
188
- repository = IndexRepository(session)
189
- service = IndexService(repository, source_service, app_context.get_data_dir())
190
- await service.run(index_id)
112
+ data = [
113
+ [
114
+ index.id,
115
+ index.created_at,
116
+ index.updated_at,
117
+ index.source,
118
+ index.num_snippets,
119
+ ]
120
+ for index in indexes
121
+ ]
122
+ click.echo(Table(headers=headers, data=data))
123
+ return
124
+ # Handle source indexing
125
+ for source in sources:
126
+ if Path(source).is_file():
127
+ msg = "File indexing is not implemented yet"
128
+ raise click.UsageError(msg)
129
+
130
+ # Index directory
131
+ s = await source_service.create(source)
132
+ index = await service.create(s.id)
133
+ await service.run(index.id)
191
134
 
192
135
 
193
136
  @cli.command()
@@ -8,7 +8,7 @@ and retrieving index information with their associated metadata.
8
8
  from datetime import UTC, datetime
9
9
  from typing import TypeVar
10
10
 
11
- from sqlalchemy import func, select
11
+ from sqlalchemy import delete, func, select
12
12
  from sqlalchemy.ext.asyncio import AsyncSession
13
13
 
14
14
  from kodit.indexing.models import Index, Snippet
@@ -63,6 +63,17 @@ class IndexRepository:
63
63
  result = await self.session.execute(query)
64
64
  return result.scalar_one_or_none()
65
65
 
66
+ async def get_by_source_id(self, source_id: int) -> Index | None:
67
+ """Get an index by its source ID.
68
+
69
+ Args:
70
+ source_id: The ID of the source to retrieve an index for.
71
+
72
+ """
73
+ query = select(Index).where(Index.source_id == source_id)
74
+ result = await self.session.execute(query)
75
+ return result.scalar_one_or_none()
76
+
66
77
  async def files_for_index(self, index_id: int) -> list[File]:
67
78
  """Get all files for an index.
68
79
 
@@ -82,7 +93,7 @@ class IndexRepository:
82
93
  result = await self.session.execute(query)
83
94
  return list(result.scalars())
84
95
 
85
- async def list_indexes(self) -> list[Index]:
96
+ async def list_indexes(self) -> list[tuple[Index, Source]]:
86
97
  """List all indexes.
87
98
 
88
99
  Returns:
@@ -90,9 +101,11 @@ class IndexRepository:
90
101
  and counts of files and snippets.
91
102
 
92
103
  """
93
- query = select(Index).limit(10)
104
+ query = select(Index, Source).join(
105
+ Source, Index.source_id == Source.id, full=True
106
+ )
94
107
  result = await self.session.execute(query)
95
- return list(result.scalars())
108
+ return list(result.tuples())
96
109
 
97
110
  async def num_snippets_for_index(self, index_id: int) -> int:
98
111
  """Get the number of snippets for an index."""
@@ -120,6 +133,17 @@ class IndexRepository:
120
133
  self.session.add(snippet)
121
134
  await self.session.commit()
122
135
 
136
+ async def delete_all_snippets(self, index_id: int) -> None:
137
+ """Delete all snippets for an index.
138
+
139
+ Args:
140
+ index_id: The ID of the index to delete snippets for.
141
+
142
+ """
143
+ query = delete(Snippet).where(Snippet.index_id == index_id)
144
+ await self.session.execute(query)
145
+ await self.session.commit()
146
+
123
147
  async def get_snippets_for_index(self, index_id: int) -> list[Snippet]:
124
148
  """Get all snippets for an index.
125
149
 
@@ -33,6 +33,7 @@ class IndexView(pydantic.BaseModel):
33
33
  id: int
34
34
  created_at: datetime
35
35
  updated_at: datetime | None = None
36
+ source: str | None = None
36
37
  num_snippets: int | None = None
37
38
 
38
39
 
@@ -82,7 +83,10 @@ class IndexService:
82
83
  # Check if the source exists
83
84
  source = await self.source_service.get(source_id)
84
85
 
85
- index = await self.repository.create(source.id)
86
+ # Check if the index already exists
87
+ index = await self.repository.get_by_source_id(source.id)
88
+ if not index:
89
+ index = await self.repository.create(source.id)
86
90
  return IndexView(
87
91
  id=index.id,
88
92
  created_at=index.created_at,
@@ -105,8 +109,9 @@ class IndexService:
105
109
  created_at=index.created_at,
106
110
  updated_at=index.updated_at,
107
111
  num_snippets=await self.repository.num_snippets_for_index(index.id),
112
+ source=source.uri,
108
113
  )
109
- for index in indexes
114
+ for index, source in indexes
110
115
  ]
111
116
 
112
117
  async def run(self, index_id: int) -> None:
@@ -117,6 +122,9 @@ class IndexService:
117
122
  msg = f"Index not found: {index_id}"
118
123
  raise ValueError(msg)
119
124
 
125
+ # First delete all old snippets, if they exist
126
+ await self.repository.delete_all_snippets(index_id)
127
+
120
128
  # Create snippets for supported file types
121
129
  await self._create_snippets(index_id)
122
130
 
@@ -13,6 +13,7 @@ from hashlib import sha256
13
13
  from pathlib import Path
14
14
 
15
15
  import aiofiles
16
+ import git
16
17
  import pydantic
17
18
  import structlog
18
19
  from tqdm import tqdm
@@ -98,8 +99,19 @@ class SourceService:
98
99
  parsed = urisplit(uri_or_path_like)
99
100
  if parsed.scheme == "file":
100
101
  return await self._create_folder_source(Path(parsed.path))
101
- msg = f"Unsupported source type: {uri_or_path_like}"
102
- raise ValueError(msg)
102
+ if parsed.scheme in ("git", "http", "https") and parsed.path.endswith(
103
+ ".git"
104
+ ):
105
+ return await self._create_git_source(uri_or_path_like)
106
+
107
+ # Try adding a .git suffix, sometimes people just pass the url
108
+ if not uri_or_path_like.endswith(".git"):
109
+ uri_or_path_like = uri_or_path_like + ".git"
110
+ try:
111
+ return await self._create_git_source(uri_or_path_like)
112
+ except ValueError:
113
+ pass
114
+
103
115
  msg = f"Unsupported source type: {uri_or_path_like}"
104
116
  raise ValueError(msg)
105
117
 
@@ -110,46 +122,98 @@ class SourceService:
110
122
  directory: The path to the local directory.
111
123
 
112
124
  Raises:
113
- ValueError: If the folder doesn't exist or is already added.
125
+ ValueError: If the folder doesn't exist.
126
+ SourceAlreadyExistsError: If the folder is already added.
114
127
 
115
128
  """
116
- # Resolve the directory to an absolute path
117
- directory = directory.expanduser().resolve()
129
+ source = await self.repository.get_source_by_uri(directory.as_uri())
130
+ if source:
131
+ self.log.info("Source already exists, reusing...", source_id=source.id)
132
+ else:
133
+ # Resolve the directory to an absolute path
134
+ directory = directory.expanduser().resolve()
135
+
136
+ # Check if the folder exists
137
+ if not directory.exists():
138
+ msg = f"Folder does not exist: {directory}"
139
+ raise ValueError(msg)
140
+
141
+ # Check if the folder is already added
142
+ if await self.repository.get_source_by_uri(directory.as_uri()):
143
+ msg = f"Directory already added: {directory}"
144
+ raise ValueError(msg)
145
+
146
+ # Clone into a local directory
147
+ clone_path = self.clone_dir / directory.as_posix().replace("/", "_")
148
+ clone_path.mkdir(parents=True, exist_ok=True)
149
+
150
+ # Copy all files recursively, preserving directory structure, ignoring
151
+ # hidden files
152
+ shutil.copytree(
153
+ directory,
154
+ clone_path,
155
+ ignore=shutil.ignore_patterns(".*"),
156
+ dirs_exist_ok=True,
157
+ )
118
158
 
119
- # Check if the folder exists
120
- if not directory.exists():
121
- msg = f"Folder does not exist: {directory}"
122
- raise ValueError(msg)
159
+ source = await self.repository.create_source(
160
+ Source(uri=directory.as_uri(), cloned_path=str(clone_path)),
161
+ )
123
162
 
124
- # Check if the folder is already added
125
- if await self.repository.get_source_by_uri(directory.as_uri()):
126
- msg = f"Directory already added: {directory}"
127
- raise ValueError(msg)
163
+ # Add all files to the source
164
+ # Count total files for progress bar
165
+ file_count = sum(1 for _ in clone_path.rglob("*") if _.is_file())
128
166
 
129
- # Clone into a local directory
130
- clone_path = self.clone_dir / directory.as_posix().replace("/", "_")
131
- clone_path.mkdir(parents=True, exist_ok=True)
132
-
133
- # Copy all files recursively, preserving directory structure, ignoring hidden
134
- # files
135
- shutil.copytree(
136
- directory,
137
- clone_path,
138
- ignore=shutil.ignore_patterns(".*"),
139
- dirs_exist_ok=True,
140
- )
167
+ # Process each file in the source directory
168
+ for path in tqdm(clone_path.rglob("*"), total=file_count):
169
+ await self._process_file(source.id, path.absolute())
141
170
 
142
- source = await self.repository.create_source(
143
- Source(uri=directory.as_uri(), cloned_path=str(clone_path)),
171
+ return SourceView(
172
+ id=source.id,
173
+ uri=source.uri,
174
+ cloned_path=Path(source.cloned_path),
175
+ created_at=source.created_at,
176
+ num_files=await self.repository.num_files_for_source(source.id),
144
177
  )
145
178
 
146
- # Add all files to the source
147
- # Count total files for progress bar
148
- file_count = sum(1 for _ in clone_path.rglob("*") if _.is_file())
179
+ async def _create_git_source(self, uri: str) -> SourceView:
180
+ """Create a git source.
181
+
182
+ Args:
183
+ uri: The URI of the git repository.
184
+
185
+ Raises:
186
+ ValueError: If the repository cloning fails.
187
+
188
+ """
189
+ # Check if the repository is already added
190
+ source = await self.repository.get_source_by_uri(uri)
191
+
192
+ if source:
193
+ self.log.info("Source already exists, reusing...", source_id=source.id)
194
+ else:
195
+ # Create a unique directory name for the clone
196
+ clone_path = self.clone_dir / uri.replace("/", "_").replace(":", "_")
197
+ clone_path.mkdir(parents=True, exist_ok=True)
198
+
199
+ try:
200
+ # Clone the repository
201
+ git.Repo.clone_from(uri, clone_path)
202
+ except git.GitCommandError as e:
203
+ msg = f"Failed to clone repository: {e}"
204
+ raise ValueError(msg) from e
205
+
206
+ source = await self.repository.create_source(
207
+ Source(uri=uri, cloned_path=str(clone_path)),
208
+ )
209
+
210
+ # Add all files to the source
211
+ # Count total files for progress bar
212
+ file_count = sum(1 for _ in clone_path.rglob("*") if _.is_file())
149
213
 
150
- # Process each file in the source directory
151
- for path in tqdm(clone_path.rglob("*"), total=file_count):
152
- await self._process_file(source.id, path.absolute())
214
+ # Process each file in the source directory
215
+ for path in tqdm(clone_path.rglob("*"), total=file_count):
216
+ await self._process_file(source.id, path.absolute())
153
217
 
154
218
  return SourceView(
155
219
  id=source.id,
@@ -37,9 +37,7 @@ def test_version_command(runner: CliRunner, default_cli_args: list[str]) -> None
37
37
  def test_cli_vars_work(runner: CliRunner, default_cli_args: list[str]) -> None:
38
38
  """Test that cli args override env vars."""
39
39
  runner.env = {"LOG_LEVEL": "INFO"}
40
- result = runner.invoke(
41
- cli, [*default_cli_args, "--log-level", "DEBUG", "sources", "list"]
42
- )
40
+ result = runner.invoke(cli, [*default_cli_args, "--log-level", "DEBUG", "index"])
43
41
  assert result.exit_code == 0
44
42
  assert result.output.count("debug") > 10 # The db spits out lots of debug messages
45
43
 
@@ -47,7 +45,7 @@ def test_cli_vars_work(runner: CliRunner, default_cli_args: list[str]) -> None:
47
45
  def test_env_vars_work(runner: CliRunner, default_cli_args: list[str]) -> None:
48
46
  """Test that env vars work."""
49
47
  runner.env = {"LOG_LEVEL": "DEBUG"}
50
- result = runner.invoke(cli, [*default_cli_args, "sources", "list"])
48
+ result = runner.invoke(cli, [*default_cli_args, "index"])
51
49
  assert result.exit_code == 0
52
50
  assert result.output.count("debug") > 10 # The db spits out lots of debug messages
53
51
 
@@ -57,9 +55,7 @@ def test_dotenv_file_works(runner: CliRunner, default_cli_args: list[str]) -> No
57
55
  with tempfile.NamedTemporaryFile(delete=False) as f:
58
56
  f.write(b"LOG_LEVEL=DEBUG")
59
57
  f.flush()
60
- result = runner.invoke(
61
- cli, [*default_cli_args, "--env-file", f.name, "sources", "list"]
62
- )
58
+ result = runner.invoke(cli, [*default_cli_args, "--env-file", f.name, "index"])
63
59
  assert result.exit_code == 0
64
60
  assert (
65
61
  result.output.count("debug") > 10
@@ -69,7 +65,7 @@ def test_dotenv_file_works(runner: CliRunner, default_cli_args: list[str]) -> No
69
65
  def test_dotenv_file_not_found(runner: CliRunner, default_cli_args: list[str]) -> None:
70
66
  """Test that the .env file not found error is raised."""
71
67
  result = runner.invoke(
72
- cli, [*default_cli_args, "--env-file", "nonexistent.env", "sources", "list"]
68
+ cli, [*default_cli_args, "--env-file", "nonexistent.env", "index"]
73
69
  )
74
70
  assert result.exit_code == 2
75
71
  assert "does not exist" in result.output
@@ -88,9 +88,8 @@ async def test_create_index_already_exists(
88
88
  # Create first index
89
89
  await service.create(source.id)
90
90
 
91
- # Try to create second index
92
- with pytest.raises(IntegrityError, match="UNIQUE constraint failed"):
93
- await service.create(source.id)
91
+ # Try to create second index, should be fine
92
+ await service.create(source.id)
94
93
 
95
94
 
96
95
  @pytest.mark.asyncio
@@ -2,7 +2,9 @@
2
2
 
3
3
  from datetime import UTC, datetime, timedelta
4
4
  from pathlib import Path
5
+ import shutil
5
6
 
7
+ import git
6
8
  import pytest
7
9
  from sqlalchemy.ext.asyncio import AsyncSession
8
10
 
@@ -55,9 +57,8 @@ async def test_create_source_already_added(
55
57
  # Create a folder source
56
58
  await service.create(str(test_dir))
57
59
 
58
- # Try to create the same source again
59
- with pytest.raises(ValueError, match=f"Directory already added: {test_dir}"):
60
- await service.create(str(test_dir))
60
+ # Try to create the same source again, should be fine
61
+ await service.create(str(test_dir))
61
62
 
62
63
 
63
64
  @pytest.mark.asyncio
@@ -108,3 +109,39 @@ async def test_create_source_list_source(
108
109
  assert not (cloned_path / ".hidden-file").exists()
109
110
  assert (cloned_path / "file1.txt").exists()
110
111
  assert (cloned_path / "subdir" / "file2.txt").exists()
112
+
113
+
114
+ @pytest.mark.asyncio
115
+ async def test_create_git_source(service: SourceService, tmp_path: Path) -> None:
116
+ """Test creating a git source."""
117
+ # Create a temporary git repository
118
+ repo_path = tmp_path / "test_repo"
119
+ repo_path.mkdir()
120
+ repo = git.Repo.init(repo_path)
121
+
122
+ # Add some files to the repository
123
+ (repo_path / "file1.txt").write_text("Hello, world!")
124
+ (repo_path / "subdir").mkdir()
125
+ (repo_path / "subdir" / "file2.txt").write_text("Hello, world!")
126
+
127
+ # Commit the files
128
+ repo.index.add(["file1.txt", "subdir/file2.txt"])
129
+ repo.index.commit("Initial commit")
130
+
131
+ # Create a git source
132
+ source = await service.create(repo_path.as_uri())
133
+ assert source.id is not None
134
+ assert source.uri == repo_path.as_uri()
135
+ assert source.cloned_path.is_dir()
136
+ assert source.created_at is not None
137
+ assert source.num_files == 2
138
+
139
+ # Check that the files are present in the cloned directory
140
+ cloned_path = Path(source.cloned_path)
141
+ assert cloned_path.exists()
142
+ assert cloned_path.is_dir()
143
+ assert (cloned_path / "file1.txt").exists()
144
+ assert (cloned_path / "subdir" / "file2.txt").exists()
145
+
146
+ # Clean up
147
+ shutil.rmtree(repo_path)
@@ -24,14 +24,10 @@ echo "print('Hello, world!')" > $tmp_dir/test.py
24
24
  # Test version command
25
25
  $prefix kodit version
26
26
 
27
- # Test sources commands
28
- $prefix kodit sources list
29
- $prefix kodit sources create $tmp_dir
30
-
31
- # Test indexes commands
32
- $prefix kodit indexes list
33
- $prefix kodit indexes create 1
34
- $prefix kodit indexes run 1
27
+ # Test index command
28
+ $prefix kodit index $tmp_dir
29
+ $prefix kodit index https://github.com/winderai/analytics-ai-agent-demo
30
+ $prefix kodit index
35
31
 
36
32
  # Test retrieve command
37
33
  $prefix kodit retrieve "Hello"
@@ -433,6 +433,30 @@ wheels = [
433
433
  { url = "https://files.pythonhosted.org/packages/a0/e6/310d1fe6708b7338e1f48915a13d8bf00fd0599acdc7bf98da4fd20fcb66/fastmcp-2.3.4-py3-none-any.whl", hash = "sha256:12a45f72dd95aeaa1a6a56281fff96ca46929def3ccd9f9eb125cb97b722fbab", size = 96393, upload-time = "2025-05-15T00:54:49.714Z" },
434
434
  ]
435
435
 
436
+ [[package]]
437
+ name = "gitdb"
438
+ version = "4.0.12"
439
+ source = { registry = "https://pypi.org/simple/" }
440
+ dependencies = [
441
+ { name = "smmap" },
442
+ ]
443
+ sdist = { url = "https://files.pythonhosted.org/packages/72/94/63b0fc47eb32792c7ba1fe1b694daec9a63620db1e313033d18140c2320a/gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571", size = 394684, upload-time = "2025-01-02T07:20:46.413Z" }
444
+ wheels = [
445
+ { url = "https://files.pythonhosted.org/packages/a0/61/5c78b91c3143ed5c14207f463aecfc8f9dbb5092fb2869baf37c273b2705/gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf", size = 62794, upload-time = "2025-01-02T07:20:43.624Z" },
446
+ ]
447
+
448
+ [[package]]
449
+ name = "gitpython"
450
+ version = "3.1.44"
451
+ source = { registry = "https://pypi.org/simple/" }
452
+ dependencies = [
453
+ { name = "gitdb" },
454
+ ]
455
+ sdist = { url = "https://files.pythonhosted.org/packages/c0/89/37df0b71473153574a5cdef8f242de422a0f5d26d7a9e231e6f169b4ad14/gitpython-3.1.44.tar.gz", hash = "sha256:c87e30b26253bf5418b01b0660f818967f3c503193838337fe5e573331249269", size = 214196, upload-time = "2025-01-02T07:32:43.59Z" }
456
+ wheels = [
457
+ { url = "https://files.pythonhosted.org/packages/1d/9a/4114a9057db2f1462d5c8f8390ab7383925fe1ac012eaa42402ad65c2963/GitPython-3.1.44-py3-none-any.whl", hash = "sha256:9e0e10cda9bed1ee64bc9a6de50e7e38a9c9943241cd7f585f6df3ed28011110", size = 207599, upload-time = "2025-01-02T07:32:40.731Z" },
458
+ ]
459
+
436
460
  [[package]]
437
461
  name = "greenlet"
438
462
  version = "3.2.2"
@@ -691,6 +715,7 @@ dependencies = [
691
715
  { name = "dotenv" },
692
716
  { name = "fastapi", extra = ["standard"] },
693
717
  { name = "fastmcp" },
718
+ { name = "gitpython" },
694
719
  { name = "httpx" },
695
720
  { name = "httpx-retries" },
696
721
  { name = "posthog" },
@@ -727,6 +752,7 @@ requires-dist = [
727
752
  { name = "dotenv", specifier = ">=0.9.9" },
728
753
  { name = "fastapi", extras = ["standard"], specifier = ">=0.115.12" },
729
754
  { name = "fastmcp", specifier = ">=2.3.3" },
755
+ { name = "gitpython", specifier = ">=3.1.44" },
730
756
  { name = "httpx", specifier = ">=0.28.1" },
731
757
  { name = "httpx-retries", specifier = ">=0.3.2" },
732
758
  { name = "posthog", specifier = ">=4.0.1" },
@@ -1528,6 +1554,15 @@ wheels = [
1528
1554
  { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
1529
1555
  ]
1530
1556
 
1557
+ [[package]]
1558
+ name = "smmap"
1559
+ version = "5.0.2"
1560
+ source = { registry = "https://pypi.org/simple/" }
1561
+ sdist = { url = "https://files.pythonhosted.org/packages/44/cd/a040c4b3119bbe532e5b0732286f805445375489fceaec1f48306068ee3b/smmap-5.0.2.tar.gz", hash = "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5", size = 22329, upload-time = "2025-01-02T07:14:40.909Z" }
1562
+ wheels = [
1563
+ { url = "https://files.pythonhosted.org/packages/04/be/d09147ad1ec7934636ad912901c5fd7667e1c858e19d355237db0d0cd5e4/smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e", size = 24303, upload-time = "2025-01-02T07:14:38.724Z" },
1564
+ ]
1565
+
1531
1566
  [[package]]
1532
1567
  name = "sniffio"
1533
1568
  version = "1.3.1"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes