kodit 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (95) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +53 -23
  3. kodit/application/factories/reporting_factory.py +6 -2
  4. kodit/application/factories/server_factory.py +311 -0
  5. kodit/application/services/code_search_application_service.py +144 -0
  6. kodit/application/services/commit_indexing_application_service.py +543 -0
  7. kodit/application/services/indexing_worker_service.py +13 -44
  8. kodit/application/services/queue_service.py +24 -3
  9. kodit/application/services/reporting.py +0 -2
  10. kodit/application/services/sync_scheduler.py +15 -31
  11. kodit/cli.py +2 -753
  12. kodit/cli_utils.py +2 -9
  13. kodit/config.py +1 -94
  14. kodit/database.py +38 -1
  15. kodit/domain/{entities.py → entities/__init__.py} +50 -195
  16. kodit/domain/entities/git.py +190 -0
  17. kodit/domain/factories/__init__.py +1 -0
  18. kodit/domain/factories/git_repo_factory.py +76 -0
  19. kodit/domain/protocols.py +263 -64
  20. kodit/domain/services/bm25_service.py +5 -1
  21. kodit/domain/services/embedding_service.py +3 -0
  22. kodit/domain/services/git_repository_service.py +429 -0
  23. kodit/domain/services/git_service.py +300 -0
  24. kodit/domain/services/task_status_query_service.py +2 -2
  25. kodit/domain/value_objects.py +83 -114
  26. kodit/infrastructure/api/client/__init__.py +0 -2
  27. kodit/infrastructure/api/v1/__init__.py +0 -4
  28. kodit/infrastructure/api/v1/dependencies.py +92 -46
  29. kodit/infrastructure/api/v1/routers/__init__.py +0 -6
  30. kodit/infrastructure/api/v1/routers/commits.py +271 -0
  31. kodit/infrastructure/api/v1/routers/queue.py +2 -2
  32. kodit/infrastructure/api/v1/routers/repositories.py +282 -0
  33. kodit/infrastructure/api/v1/routers/search.py +31 -14
  34. kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
  35. kodit/infrastructure/api/v1/schemas/commit.py +96 -0
  36. kodit/infrastructure/api/v1/schemas/context.py +2 -0
  37. kodit/infrastructure/api/v1/schemas/repository.py +128 -0
  38. kodit/infrastructure/api/v1/schemas/search.py +12 -9
  39. kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
  40. kodit/infrastructure/api/v1/schemas/tag.py +31 -0
  41. kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
  42. kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
  43. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
  44. kodit/infrastructure/cloning/git/git_python_adaptor.py +467 -0
  45. kodit/infrastructure/cloning/git/working_copy.py +1 -1
  46. kodit/infrastructure/embedding/embedding_factory.py +3 -2
  47. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  48. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
  49. kodit/infrastructure/enrichment/litellm_enrichment_provider.py +19 -26
  50. kodit/infrastructure/indexing/fusion_service.py +1 -1
  51. kodit/infrastructure/mappers/git_mapper.py +193 -0
  52. kodit/infrastructure/mappers/snippet_mapper.py +106 -0
  53. kodit/infrastructure/mappers/task_mapper.py +5 -44
  54. kodit/infrastructure/reporting/log_progress.py +8 -5
  55. kodit/infrastructure/reporting/telemetry_progress.py +21 -0
  56. kodit/infrastructure/slicing/slicer.py +32 -31
  57. kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
  58. kodit/infrastructure/sqlalchemy/entities.py +394 -158
  59. kodit/infrastructure/sqlalchemy/git_branch_repository.py +263 -0
  60. kodit/infrastructure/sqlalchemy/git_commit_repository.py +337 -0
  61. kodit/infrastructure/sqlalchemy/git_repository.py +252 -0
  62. kodit/infrastructure/sqlalchemy/git_tag_repository.py +257 -0
  63. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +484 -0
  64. kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
  65. kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
  66. kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
  67. kodit/mcp.py +12 -30
  68. kodit/migrations/env.py +1 -0
  69. kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
  70. kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
  71. kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
  72. kodit/py.typed +0 -0
  73. kodit/utils/dump_openapi.py +7 -4
  74. kodit/utils/path_utils.py +29 -0
  75. {kodit-0.4.3.dist-info → kodit-0.5.0.dist-info}/METADATA +3 -3
  76. kodit-0.5.0.dist-info/RECORD +137 -0
  77. kodit/application/factories/code_indexing_factory.py +0 -195
  78. kodit/application/services/auto_indexing_service.py +0 -99
  79. kodit/application/services/code_indexing_application_service.py +0 -410
  80. kodit/domain/services/index_query_service.py +0 -70
  81. kodit/domain/services/index_service.py +0 -269
  82. kodit/infrastructure/api/client/index_client.py +0 -57
  83. kodit/infrastructure/api/v1/routers/indexes.py +0 -164
  84. kodit/infrastructure/api/v1/schemas/index.py +0 -101
  85. kodit/infrastructure/bm25/bm25_factory.py +0 -28
  86. kodit/infrastructure/cloning/__init__.py +0 -1
  87. kodit/infrastructure/cloning/metadata.py +0 -98
  88. kodit/infrastructure/mappers/index_mapper.py +0 -345
  89. kodit/infrastructure/reporting/tdqm_progress.py +0 -38
  90. kodit/infrastructure/slicing/language_detection_service.py +0 -18
  91. kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
  92. kodit-0.4.3.dist-info/RECORD +0 -125
  93. {kodit-0.4.3.dist-info → kodit-0.5.0.dist-info}/WHEEL +0 -0
  94. {kodit-0.4.3.dist-info → kodit-0.5.0.dist-info}/entry_points.txt +0 -0
  95. {kodit-0.4.3.dist-info → kodit-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,690 @@
1
+ # ruff: noqa
2
+ """add_new_git_entities
3
+
4
+ Revision ID: 7f15f878c3a1
5
+ Revises: b9cd1c3fd762
6
+ Create Date: 2025-09-18 11:46:54.238720
7
+
8
+ """
9
+
10
+ from typing import Sequence, Union
11
+
12
+ from alembic import op
13
+ import sqlalchemy as sa
14
+ from kodit.infrastructure.sqlalchemy.entities import PathType
15
+
16
+
17
+ # revision identifiers, used by Alembic.
18
+ revision: str = "7f15f878c3a1"
19
+ down_revision: Union[str, None] = "b9cd1c3fd762"
20
+ branch_labels: Union[str, Sequence[str], None] = None
21
+ depends_on: Union[str, Sequence[str], None] = None
22
+
23
+
24
+ def upgrade() -> None:
25
+ """Upgrade schema."""
26
+ # ### commands auto generated by Alembic - please adjust! ###
27
+
28
+ # Drop dependent tables first to respect foreign key constraints in PostgreSQL
29
+ # Drop embeddings first (it references snippets)
30
+ op.drop_index(op.f("ix_embeddings_type"), table_name="embeddings")
31
+ op.drop_index(op.f("ix_embeddings_snippet_id"), table_name="embeddings")
32
+ op.drop_table("embeddings")
33
+
34
+ # Drop author_file_mappings (references both authors and files)
35
+ op.drop_index(
36
+ "ix_author_file_mappings_author_id", table_name="author_file_mappings"
37
+ )
38
+ op.drop_index("ix_author_file_mappings_file_id", table_name="author_file_mappings")
39
+ op.drop_table("author_file_mappings")
40
+
41
+ # Drop snippets (references files and indexes)
42
+ op.drop_index("ix_snippets_file_id", table_name="snippets")
43
+ op.drop_index("ix_snippets_index_id", table_name="snippets")
44
+ op.drop_table("snippets")
45
+
46
+ # Drop indexes (references sources)
47
+ op.drop_index("ix_indexes_source_id", table_name="indexes")
48
+ op.drop_table("indexes")
49
+
50
+ # Drop files (references sources)
51
+ op.drop_index("ix_files_cloned_path", table_name="files")
52
+ op.drop_index("ix_files_extension", table_name="files")
53
+ op.drop_index("ix_files_mime_type", table_name="files")
54
+ op.drop_index("ix_files_sha256", table_name="files")
55
+ op.drop_index("ix_files_uri", table_name="files")
56
+ op.drop_table("files")
57
+
58
+ # Drop authors (no longer has dependencies)
59
+ op.drop_index("ix_authors_email", table_name="authors")
60
+ op.drop_index("ix_authors_name", table_name="authors")
61
+ op.drop_table("authors")
62
+
63
+ # Finally drop sources (base table)
64
+ op.drop_index("ix_sources_cloned_path", table_name="sources")
65
+ op.drop_index("ix_sources_type", table_name="sources")
66
+ op.drop_index("ix_sources_uri", table_name="sources")
67
+ op.drop_table("sources")
68
+
69
+ # Drop the tasks table and recreate with a string type column
70
+ op.drop_index(op.f("ix_tasks_type"), table_name="tasks")
71
+ op.drop_index(op.f("ix_tasks_dedup_key"), table_name="tasks")
72
+ op.drop_table("tasks")
73
+ op.create_table(
74
+ "tasks",
75
+ sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
76
+ sa.Column(
77
+ "created_at",
78
+ sa.DateTime(timezone=True),
79
+ nullable=False,
80
+ ),
81
+ sa.Column(
82
+ "updated_at",
83
+ sa.DateTime(timezone=True),
84
+ nullable=False,
85
+ ),
86
+ sa.Column("dedup_key", sa.String(length=255), nullable=False),
87
+ sa.Column("type", sa.String(length=255), nullable=False),
88
+ sa.Column("payload", sa.JSON(), nullable=False),
89
+ sa.Column("priority", sa.Integer(), nullable=False),
90
+ sa.PrimaryKeyConstraint("id"),
91
+ )
92
+ op.create_index(op.f("ix_tasks_dedup_key"), "tasks", ["dedup_key"], unique=False)
93
+ op.create_index(op.f("ix_tasks_type"), "tasks", ["type"], unique=False)
94
+
95
+ # Create new tables in correct dependency order
96
+ op.create_table(
97
+ "commit_indexes",
98
+ sa.Column("commit_sha", sa.String(length=64), nullable=False),
99
+ sa.Column(
100
+ "status",
101
+ sa.Enum(
102
+ "PENDING", "IN_PROGRESS", "COMPLETED", "FAILED", name="indexstatustype"
103
+ ),
104
+ nullable=False,
105
+ ),
106
+ sa.Column(
107
+ "indexed_at",
108
+ sa.DateTime(timezone=True),
109
+ nullable=True,
110
+ ),
111
+ sa.Column("error_message", sa.UnicodeText(), nullable=True),
112
+ sa.Column("files_processed", sa.Integer(), nullable=False),
113
+ sa.Column("processing_time_seconds", sa.String(length=50), nullable=False),
114
+ sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
115
+ sa.Column(
116
+ "created_at",
117
+ sa.DateTime(timezone=True),
118
+ nullable=False,
119
+ ),
120
+ sa.Column(
121
+ "updated_at",
122
+ sa.DateTime(timezone=True),
123
+ nullable=False,
124
+ ),
125
+ sa.PrimaryKeyConstraint("id"),
126
+ )
127
+ op.create_index(
128
+ op.f("ix_commit_indexes_commit_sha"),
129
+ "commit_indexes",
130
+ ["commit_sha"],
131
+ unique=True,
132
+ )
133
+ op.create_index(
134
+ op.f("ix_commit_indexes_status"), "commit_indexes", ["status"], unique=False
135
+ )
136
+
137
+ op.create_table(
138
+ "git_repos",
139
+ sa.Column("sanitized_remote_uri", sa.String(length=1024), nullable=False),
140
+ sa.Column("remote_uri", sa.String(length=1024), nullable=False),
141
+ sa.Column(
142
+ "cloned_path",
143
+ PathType(length=1024),
144
+ nullable=True,
145
+ ),
146
+ sa.Column(
147
+ "last_scanned_at",
148
+ sa.DateTime(timezone=True),
149
+ nullable=True,
150
+ ),
151
+ sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
152
+ sa.Column(
153
+ "created_at",
154
+ sa.DateTime(timezone=True),
155
+ nullable=False,
156
+ ),
157
+ sa.Column(
158
+ "updated_at",
159
+ sa.DateTime(timezone=True),
160
+ nullable=False,
161
+ ),
162
+ sa.PrimaryKeyConstraint("id"),
163
+ )
164
+ op.create_index(
165
+ op.f("ix_git_repos_sanitized_remote_uri"),
166
+ "git_repos",
167
+ ["sanitized_remote_uri"],
168
+ unique=True,
169
+ )
170
+
171
+ op.create_table(
172
+ "snippets_v2",
173
+ sa.Column("sha", sa.String(length=64), nullable=False),
174
+ sa.Column(
175
+ "created_at",
176
+ sa.DateTime(timezone=True),
177
+ nullable=False,
178
+ ),
179
+ sa.Column(
180
+ "updated_at",
181
+ sa.DateTime(timezone=True),
182
+ nullable=False,
183
+ ),
184
+ sa.Column("content", sa.UnicodeText(), nullable=False),
185
+ sa.Column("extension", sa.String(length=255), nullable=False),
186
+ sa.PrimaryKeyConstraint("sha"),
187
+ )
188
+ op.create_index(
189
+ op.f("ix_snippets_v2_extension"), "snippets_v2", ["extension"], unique=False
190
+ )
191
+
192
+ op.create_table(
193
+ "enrichments",
194
+ sa.Column("snippet_sha", sa.String(length=64), nullable=False),
195
+ sa.Column(
196
+ "type",
197
+ sa.Enum("UNKNOWN", "SUMMARIZATION", name="enrichmenttype"),
198
+ nullable=False,
199
+ ),
200
+ sa.Column("content", sa.UnicodeText(), nullable=False),
201
+ sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
202
+ sa.Column(
203
+ "created_at",
204
+ sa.DateTime(timezone=True),
205
+ nullable=False,
206
+ ),
207
+ sa.Column(
208
+ "updated_at",
209
+ sa.DateTime(timezone=True),
210
+ nullable=False,
211
+ ),
212
+ sa.ForeignKeyConstraint(
213
+ ["snippet_sha"],
214
+ ["snippets_v2.sha"],
215
+ ),
216
+ sa.PrimaryKeyConstraint("id"),
217
+ sa.UniqueConstraint("snippet_sha", "type", name="uix_snippet_enrichment"),
218
+ )
219
+ op.create_index(
220
+ op.f("ix_enrichments_snippet_sha"), "enrichments", ["snippet_sha"], unique=False
221
+ )
222
+ op.create_index(op.f("ix_enrichments_type"), "enrichments", ["type"], unique=False)
223
+
224
+ op.create_table(
225
+ "git_commits",
226
+ sa.Column("commit_sha", sa.String(length=64), nullable=False),
227
+ sa.Column(
228
+ "created_at",
229
+ sa.DateTime(timezone=True),
230
+ nullable=False,
231
+ ),
232
+ sa.Column(
233
+ "updated_at",
234
+ sa.DateTime(timezone=True),
235
+ nullable=False,
236
+ ),
237
+ sa.Column("repo_id", sa.Integer(), nullable=False),
238
+ sa.Column(
239
+ "date",
240
+ sa.DateTime(timezone=True),
241
+ nullable=False,
242
+ ),
243
+ sa.Column("message", sa.UnicodeText(), nullable=False),
244
+ sa.Column("parent_commit_sha", sa.String(length=64), nullable=True),
245
+ sa.Column("author", sa.String(length=255), nullable=False),
246
+ sa.ForeignKeyConstraint(
247
+ ["repo_id"],
248
+ ["git_repos.id"],
249
+ ),
250
+ sa.PrimaryKeyConstraint("commit_sha"),
251
+ )
252
+ op.create_index(
253
+ op.f("ix_git_commits_author"), "git_commits", ["author"], unique=False
254
+ )
255
+ op.create_index(
256
+ op.f("ix_git_commits_repo_id"), "git_commits", ["repo_id"], unique=False
257
+ )
258
+
259
+ op.create_table(
260
+ "commit_snippets_v2",
261
+ sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
262
+ sa.Column("commit_sha", sa.String(length=64), nullable=False),
263
+ sa.Column("snippet_sha", sa.String(length=64), nullable=False),
264
+ sa.ForeignKeyConstraint(
265
+ ["commit_sha"],
266
+ ["git_commits.commit_sha"],
267
+ ),
268
+ sa.ForeignKeyConstraint(
269
+ ["snippet_sha"],
270
+ ["snippets_v2.sha"],
271
+ ),
272
+ sa.PrimaryKeyConstraint("id"),
273
+ sa.UniqueConstraint("commit_sha", "snippet_sha", name="uix_commit_snippet"),
274
+ )
275
+ op.create_index(
276
+ op.f("ix_commit_snippets_v2_commit_sha"),
277
+ "commit_snippets_v2",
278
+ ["commit_sha"],
279
+ unique=False,
280
+ )
281
+ op.create_index(
282
+ op.f("ix_commit_snippets_v2_snippet_sha"),
283
+ "commit_snippets_v2",
284
+ ["snippet_sha"],
285
+ unique=False,
286
+ )
287
+
288
+ op.create_table(
289
+ "git_branches",
290
+ sa.Column("repo_id", sa.Integer(), nullable=False),
291
+ sa.Column("name", sa.String(length=255), nullable=False),
292
+ sa.Column(
293
+ "created_at",
294
+ sa.DateTime(timezone=True),
295
+ nullable=False,
296
+ ),
297
+ sa.Column(
298
+ "updated_at",
299
+ sa.DateTime(timezone=True),
300
+ nullable=False,
301
+ ),
302
+ sa.Column("head_commit_sha", sa.String(length=64), nullable=False),
303
+ sa.ForeignKeyConstraint(
304
+ ["head_commit_sha"],
305
+ ["git_commits.commit_sha"],
306
+ ),
307
+ sa.ForeignKeyConstraint(
308
+ ["repo_id"],
309
+ ["git_repos.id"],
310
+ ),
311
+ sa.PrimaryKeyConstraint("repo_id", "name"),
312
+ sa.UniqueConstraint("repo_id", "name", name="uix_repo_branch"),
313
+ )
314
+ op.create_index(
315
+ op.f("ix_git_branches_name"), "git_branches", ["name"], unique=False
316
+ )
317
+ op.create_index(
318
+ op.f("ix_git_branches_repo_id"), "git_branches", ["repo_id"], unique=False
319
+ )
320
+
321
+ op.create_table(
322
+ "git_tracking_branches",
323
+ sa.Column("repo_id", sa.Integer(), nullable=False),
324
+ sa.Column("name", sa.String(length=255), nullable=False),
325
+ sa.Column(
326
+ "created_at",
327
+ sa.DateTime(timezone=True),
328
+ nullable=False,
329
+ ),
330
+ sa.Column(
331
+ "updated_at",
332
+ sa.DateTime(timezone=True),
333
+ nullable=False,
334
+ ),
335
+ sa.ForeignKeyConstraint(
336
+ ["repo_id", "name"],
337
+ ["git_branches.repo_id", "git_branches.name"],
338
+ ),
339
+ sa.PrimaryKeyConstraint("repo_id"),
340
+ sa.UniqueConstraint("repo_id", "name", name="uix_repo_tracking_branch"),
341
+ )
342
+
343
+ op.create_index(
344
+ op.f("ix_git_tracking_branches_name"),
345
+ "git_tracking_branches",
346
+ ["name"],
347
+ unique=False,
348
+ )
349
+ op.create_index(
350
+ op.f("ix_git_tracking_branches_repo_id"),
351
+ "git_tracking_branches",
352
+ ["repo_id"],
353
+ unique=False,
354
+ )
355
+
356
+ op.create_table(
357
+ "git_commit_files",
358
+ sa.Column("commit_sha", sa.String(length=64), nullable=False),
359
+ sa.Column("path", sa.String(length=1024), nullable=False),
360
+ sa.Column("blob_sha", sa.String(length=64), nullable=False),
361
+ sa.Column("mime_type", sa.String(length=255), nullable=False),
362
+ sa.Column("extension", sa.String(length=255), nullable=False),
363
+ sa.Column("size", sa.Integer(), nullable=False),
364
+ sa.Column(
365
+ "created_at",
366
+ sa.DateTime(timezone=True),
367
+ nullable=False,
368
+ ),
369
+ sa.ForeignKeyConstraint(
370
+ ["commit_sha"],
371
+ ["git_commits.commit_sha"],
372
+ ),
373
+ sa.PrimaryKeyConstraint("commit_sha", "path"),
374
+ sa.UniqueConstraint("commit_sha", "path", name="uix_commit_file"),
375
+ )
376
+
377
+ op.create_index(
378
+ op.f("ix_git_commit_files_blob_sha"),
379
+ "git_commit_files",
380
+ ["blob_sha"],
381
+ unique=False,
382
+ )
383
+ op.create_index(
384
+ op.f("ix_git_commit_files_extension"),
385
+ "git_commit_files",
386
+ ["extension"],
387
+ unique=False,
388
+ )
389
+ op.create_index(
390
+ op.f("ix_git_commit_files_mime_type"),
391
+ "git_commit_files",
392
+ ["mime_type"],
393
+ unique=False,
394
+ )
395
+
396
+ op.create_table(
397
+ "git_tags",
398
+ sa.Column("repo_id", sa.Integer(), nullable=False),
399
+ sa.Column("name", sa.String(length=255), nullable=False),
400
+ sa.Column(
401
+ "created_at",
402
+ sa.DateTime(timezone=True),
403
+ nullable=False,
404
+ ),
405
+ sa.Column(
406
+ "updated_at",
407
+ sa.DateTime(timezone=True),
408
+ nullable=False,
409
+ ),
410
+ sa.Column("target_commit_sha", sa.String(length=64), nullable=False),
411
+ sa.ForeignKeyConstraint(
412
+ ["repo_id"],
413
+ ["git_repos.id"],
414
+ ),
415
+ sa.ForeignKeyConstraint(
416
+ ["target_commit_sha"],
417
+ ["git_commits.commit_sha"],
418
+ ),
419
+ sa.PrimaryKeyConstraint("repo_id", "name"),
420
+ sa.UniqueConstraint("repo_id", "name", name="uix_repo_tag"),
421
+ )
422
+ op.create_index(op.f("ix_git_tags_name"), "git_tags", ["name"], unique=False)
423
+ op.create_index(op.f("ix_git_tags_repo_id"), "git_tags", ["repo_id"], unique=False)
424
+ op.create_index(
425
+ op.f("ix_git_tags_target_commit_sha"),
426
+ "git_tags",
427
+ ["target_commit_sha"],
428
+ unique=False,
429
+ )
430
+
431
+ op.create_table(
432
+ "snippet_v2_files",
433
+ sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
434
+ sa.Column("snippet_sha", sa.String(length=64), nullable=False),
435
+ sa.Column("blob_sha", sa.String(length=64), nullable=False),
436
+ sa.Column("commit_sha", sa.String(length=64), nullable=False),
437
+ sa.Column("file_path", sa.String(length=1024), nullable=False),
438
+ sa.ForeignKeyConstraint(
439
+ ["commit_sha", "file_path"],
440
+ ["git_commit_files.commit_sha", "git_commit_files.path"],
441
+ ),
442
+ sa.ForeignKeyConstraint(
443
+ ["snippet_sha"],
444
+ ["snippets_v2.sha"],
445
+ ),
446
+ sa.PrimaryKeyConstraint("id"),
447
+ sa.UniqueConstraint(
448
+ "snippet_sha",
449
+ "blob_sha",
450
+ "commit_sha",
451
+ "file_path",
452
+ name="uix_snippet_file",
453
+ ),
454
+ )
455
+ op.create_index(
456
+ op.f("ix_snippet_v2_files_blob_sha"),
457
+ "snippet_v2_files",
458
+ ["blob_sha"],
459
+ unique=False,
460
+ )
461
+ op.create_index(
462
+ op.f("ix_snippet_v2_files_commit_sha"),
463
+ "snippet_v2_files",
464
+ ["commit_sha"],
465
+ unique=False,
466
+ )
467
+ op.create_index(
468
+ op.f("ix_snippet_v2_files_file_path"),
469
+ "snippet_v2_files",
470
+ ["file_path"],
471
+ unique=False,
472
+ )
473
+ op.create_index(
474
+ op.f("ix_snippet_v2_files_snippet_sha"),
475
+ "snippet_v2_files",
476
+ ["snippet_sha"],
477
+ unique=False,
478
+ )
479
+
480
+ # Create embeddings table with String snippet_id for snippets_v2 compatibility
481
+ op.create_table(
482
+ "embeddings",
483
+ sa.Column("snippet_id", sa.String(length=64), nullable=False),
484
+ sa.Column(
485
+ "type", sa.Enum("CODE", "TEXT", name="embeddingtype"), nullable=False
486
+ ),
487
+ sa.Column("embedding", sa.JSON(), nullable=False),
488
+ sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
489
+ sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
490
+ sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False),
491
+ sa.PrimaryKeyConstraint("id"),
492
+ )
493
+ op.create_index(
494
+ op.f("ix_embeddings_snippet_id"), "embeddings", ["snippet_id"], unique=False
495
+ )
496
+ op.create_index(op.f("ix_embeddings_type"), "embeddings", ["type"], unique=False)
497
+
498
+ # Note: Skipping tasks table type column update due to SQLite limitations
499
+ # VARCHAR length changes are often not enforced in SQLite anyway
500
+ # ### end Alembic commands ###
501
+
502
+
503
+ def downgrade() -> None:
504
+ """Downgrade schema."""
505
+ # ### commands auto generated by Alembic - please adjust! ###
506
+ # Note: Skipping tasks table type column downgrade due to SQLite limitations
507
+ # Recreate embeddings table for downgrade (SQLite limitations)
508
+ op.drop_index(op.f("ix_embeddings_type"), table_name="embeddings")
509
+ op.drop_index(op.f("ix_embeddings_snippet_id"), table_name="embeddings")
510
+ op.drop_table("embeddings")
511
+
512
+ op.create_table(
513
+ "embeddings",
514
+ sa.Column("snippet_id", sa.INTEGER(), nullable=False),
515
+ sa.Column(
516
+ "type", sa.Enum("CODE", "TEXT", name="embeddingtype"), nullable=False
517
+ ),
518
+ sa.Column("embedding", sa.JSON(), nullable=False),
519
+ sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
520
+ sa.Column("created_at", sa.DateTime(), nullable=False),
521
+ sa.Column("updated_at", sa.DateTime(), nullable=False),
522
+ sa.ForeignKeyConstraint(
523
+ ["snippet_id"],
524
+ ["snippets.id"],
525
+ ),
526
+ sa.PrimaryKeyConstraint("id"),
527
+ )
528
+ op.create_index(
529
+ op.f("ix_embeddings_snippet_id"), "embeddings", ["snippet_id"], unique=False
530
+ )
531
+ op.create_index(op.f("ix_embeddings_type"), "embeddings", ["type"], unique=False)
532
+ op.create_table(
533
+ "indexes",
534
+ sa.Column("source_id", sa.INTEGER(), nullable=False),
535
+ sa.Column("id", sa.INTEGER(), nullable=False),
536
+ sa.Column("created_at", sa.DATETIME(), nullable=False),
537
+ sa.Column("updated_at", sa.DATETIME(), nullable=False),
538
+ sa.ForeignKeyConstraint(
539
+ ["source_id"],
540
+ ["sources.id"],
541
+ ),
542
+ sa.PrimaryKeyConstraint("id"),
543
+ )
544
+ op.create_index("ix_indexes_source_id", "indexes", ["source_id"], unique=True)
545
+ op.create_table(
546
+ "files",
547
+ sa.Column("source_id", sa.INTEGER(), nullable=False),
548
+ sa.Column("mime_type", sa.VARCHAR(length=255), nullable=False),
549
+ sa.Column("uri", sa.VARCHAR(length=1024), nullable=False),
550
+ sa.Column("cloned_path", sa.VARCHAR(length=1024), nullable=False),
551
+ sa.Column("sha256", sa.VARCHAR(length=64), nullable=False),
552
+ sa.Column("size_bytes", sa.INTEGER(), nullable=False),
553
+ sa.Column("id", sa.INTEGER(), nullable=False),
554
+ sa.Column("created_at", sa.DATETIME(), nullable=False),
555
+ sa.Column("updated_at", sa.DATETIME(), nullable=False),
556
+ sa.Column("extension", sa.VARCHAR(length=255), nullable=False),
557
+ sa.Column("file_processing_status", sa.INTEGER(), nullable=False),
558
+ sa.ForeignKeyConstraint(
559
+ ["source_id"],
560
+ ["sources.id"],
561
+ ),
562
+ sa.PrimaryKeyConstraint("id"),
563
+ )
564
+ op.create_index("ix_files_uri", "files", ["uri"], unique=False)
565
+ op.create_index("ix_files_sha256", "files", ["sha256"], unique=False)
566
+ op.create_index("ix_files_mime_type", "files", ["mime_type"], unique=False)
567
+ op.create_index("ix_files_extension", "files", ["extension"], unique=False)
568
+ op.create_index("ix_files_cloned_path", "files", ["cloned_path"], unique=False)
569
+ op.create_table(
570
+ "snippets",
571
+ sa.Column("file_id", sa.INTEGER(), nullable=False),
572
+ sa.Column("index_id", sa.INTEGER(), nullable=False),
573
+ sa.Column("content", sa.TEXT(), nullable=False),
574
+ sa.Column("id", sa.INTEGER(), nullable=False),
575
+ sa.Column("created_at", sa.DATETIME(), nullable=False),
576
+ sa.Column("updated_at", sa.DATETIME(), nullable=False),
577
+ sa.Column("summary", sa.TEXT(), nullable=False),
578
+ sa.ForeignKeyConstraint(
579
+ ["file_id"],
580
+ ["files.id"],
581
+ ),
582
+ sa.ForeignKeyConstraint(
583
+ ["index_id"],
584
+ ["indexes.id"],
585
+ ),
586
+ sa.PrimaryKeyConstraint("id"),
587
+ )
588
+ op.create_index("ix_snippets_index_id", "snippets", ["index_id"], unique=False)
589
+ op.create_index("ix_snippets_file_id", "snippets", ["file_id"], unique=False)
590
+ op.create_table(
591
+ "sources",
592
+ sa.Column("uri", sa.VARCHAR(length=1024), nullable=False),
593
+ sa.Column("cloned_path", sa.VARCHAR(length=1024), nullable=False),
594
+ sa.Column("id", sa.INTEGER(), nullable=False),
595
+ sa.Column("created_at", sa.DATETIME(), nullable=False),
596
+ sa.Column("updated_at", sa.DATETIME(), nullable=False),
597
+ sa.Column("type", sa.VARCHAR(length=7), nullable=False),
598
+ sa.PrimaryKeyConstraint("id"),
599
+ )
600
+ op.create_index("ix_sources_uri", "sources", ["uri"], unique=True)
601
+ op.create_index("ix_sources_type", "sources", ["type"], unique=False)
602
+ op.create_index("ix_sources_cloned_path", "sources", ["cloned_path"], unique=False)
603
+ op.create_table(
604
+ "authors",
605
+ sa.Column("name", sa.VARCHAR(length=255), nullable=False),
606
+ sa.Column("email", sa.VARCHAR(length=255), nullable=False),
607
+ sa.Column("id", sa.INTEGER(), nullable=False),
608
+ sa.Column("created_at", sa.DATETIME(), nullable=False),
609
+ sa.Column("updated_at", sa.DATETIME(), nullable=False),
610
+ sa.PrimaryKeyConstraint("id"),
611
+ sa.UniqueConstraint("name", "email", name="uix_author"),
612
+ )
613
+ op.create_index("ix_authors_name", "authors", ["name"], unique=False)
614
+ op.create_index("ix_authors_email", "authors", ["email"], unique=False)
615
+ op.create_table(
616
+ "author_file_mappings",
617
+ sa.Column("author_id", sa.INTEGER(), nullable=False),
618
+ sa.Column("file_id", sa.INTEGER(), nullable=False),
619
+ sa.Column("id", sa.INTEGER(), nullable=False),
620
+ sa.Column("created_at", sa.DATETIME(), nullable=False),
621
+ sa.Column("updated_at", sa.DATETIME(), nullable=False),
622
+ sa.ForeignKeyConstraint(
623
+ ["author_id"],
624
+ ["authors.id"],
625
+ ),
626
+ sa.ForeignKeyConstraint(
627
+ ["file_id"],
628
+ ["files.id"],
629
+ ),
630
+ sa.PrimaryKeyConstraint("id"),
631
+ sa.UniqueConstraint("author_id", "file_id", name="uix_author_file_mapping"),
632
+ )
633
+ op.create_index(
634
+ "ix_author_file_mappings_file_id",
635
+ "author_file_mappings",
636
+ ["file_id"],
637
+ unique=False,
638
+ )
639
+ op.create_index(
640
+ "ix_author_file_mappings_author_id",
641
+ "author_file_mappings",
642
+ ["author_id"],
643
+ unique=False,
644
+ )
645
+ op.drop_index(
646
+ op.f("ix_snippet_v2_files_snippet_sha"), table_name="snippet_v2_files"
647
+ )
648
+ op.drop_index(op.f("ix_snippet_v2_files_file_path"), table_name="snippet_v2_files")
649
+ op.drop_index(op.f("ix_snippet_v2_files_commit_sha"), table_name="snippet_v2_files")
650
+ op.drop_index(op.f("ix_snippet_v2_files_blob_sha"), table_name="snippet_v2_files")
651
+ op.drop_table("snippet_v2_files")
652
+ op.drop_index(op.f("ix_git_tags_target_commit_sha"), table_name="git_tags")
653
+ op.drop_index(op.f("ix_git_tags_repo_id"), table_name="git_tags")
654
+ op.drop_index(op.f("ix_git_tags_name"), table_name="git_tags")
655
+ op.drop_table("git_tags")
656
+ op.drop_index(op.f("ix_git_commit_files_mime_type"), table_name="git_commit_files")
657
+ op.drop_index(op.f("ix_git_commit_files_extension"), table_name="git_commit_files")
658
+ op.drop_index(op.f("ix_git_commit_files_blob_sha"), table_name="git_commit_files")
659
+ op.drop_table("git_commit_files")
660
+ op.drop_index(op.f("ix_git_branches_repo_id"), table_name="git_branches")
661
+ op.drop_index(op.f("ix_git_branches_name"), table_name="git_branches")
662
+ op.drop_table("git_branches")
663
+ op.drop_index(
664
+ op.f("ix_commit_snippets_v2_snippet_sha"), table_name="commit_snippets_v2"
665
+ )
666
+ op.drop_index(
667
+ op.f("ix_commit_snippets_v2_commit_sha"), table_name="commit_snippets_v2"
668
+ )
669
+ op.drop_table("commit_snippets_v2")
670
+ op.drop_index(
671
+ op.f("ix_git_tracking_branches_repo_id"), table_name="git_tracking_branches"
672
+ )
673
+ op.drop_index(
674
+ op.f("ix_git_tracking_branches_name"), table_name="git_tracking_branches"
675
+ )
676
+ op.drop_table("git_tracking_branches")
677
+ op.drop_index(op.f("ix_git_commits_repo_id"), table_name="git_commits")
678
+ op.drop_index(op.f("ix_git_commits_author"), table_name="git_commits")
679
+ op.drop_table("git_commits")
680
+ op.drop_index(op.f("ix_enrichments_type"), table_name="enrichments")
681
+ op.drop_index(op.f("ix_enrichments_snippet_sha"), table_name="enrichments")
682
+ op.drop_table("enrichments")
683
+ op.drop_index(op.f("ix_snippets_v2_extension"), table_name="snippets_v2")
684
+ op.drop_table("snippets_v2")
685
+ op.drop_index(op.f("ix_git_repos_sanitized_remote_uri"), table_name="git_repos")
686
+ op.drop_table("git_repos")
687
+ op.drop_index(op.f("ix_commit_indexes_status"), table_name="commit_indexes")
688
+ op.drop_index(op.f("ix_commit_indexes_commit_sha"), table_name="commit_indexes")
689
+ op.drop_table("commit_indexes")
690
+ # ### end Alembic commands ###