kodit 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (100) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +59 -24
  3. kodit/application/factories/reporting_factory.py +16 -7
  4. kodit/application/factories/server_factory.py +311 -0
  5. kodit/application/services/code_search_application_service.py +144 -0
  6. kodit/application/services/commit_indexing_application_service.py +543 -0
  7. kodit/application/services/indexing_worker_service.py +13 -46
  8. kodit/application/services/queue_service.py +24 -3
  9. kodit/application/services/reporting.py +70 -54
  10. kodit/application/services/sync_scheduler.py +15 -31
  11. kodit/cli.py +2 -763
  12. kodit/cli_utils.py +2 -9
  13. kodit/config.py +3 -96
  14. kodit/database.py +38 -1
  15. kodit/domain/entities/__init__.py +276 -0
  16. kodit/domain/entities/git.py +190 -0
  17. kodit/domain/factories/__init__.py +1 -0
  18. kodit/domain/factories/git_repo_factory.py +76 -0
  19. kodit/domain/protocols.py +270 -46
  20. kodit/domain/services/bm25_service.py +5 -1
  21. kodit/domain/services/embedding_service.py +3 -0
  22. kodit/domain/services/git_repository_service.py +429 -0
  23. kodit/domain/services/git_service.py +300 -0
  24. kodit/domain/services/task_status_query_service.py +19 -0
  25. kodit/domain/value_objects.py +113 -147
  26. kodit/infrastructure/api/client/__init__.py +0 -2
  27. kodit/infrastructure/api/v1/__init__.py +0 -4
  28. kodit/infrastructure/api/v1/dependencies.py +105 -44
  29. kodit/infrastructure/api/v1/routers/__init__.py +0 -6
  30. kodit/infrastructure/api/v1/routers/commits.py +271 -0
  31. kodit/infrastructure/api/v1/routers/queue.py +2 -2
  32. kodit/infrastructure/api/v1/routers/repositories.py +282 -0
  33. kodit/infrastructure/api/v1/routers/search.py +31 -14
  34. kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
  35. kodit/infrastructure/api/v1/schemas/commit.py +96 -0
  36. kodit/infrastructure/api/v1/schemas/context.py +2 -0
  37. kodit/infrastructure/api/v1/schemas/repository.py +128 -0
  38. kodit/infrastructure/api/v1/schemas/search.py +12 -9
  39. kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
  40. kodit/infrastructure/api/v1/schemas/tag.py +31 -0
  41. kodit/infrastructure/api/v1/schemas/task_status.py +41 -0
  42. kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
  43. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
  44. kodit/infrastructure/cloning/git/git_python_adaptor.py +467 -0
  45. kodit/infrastructure/cloning/git/working_copy.py +10 -3
  46. kodit/infrastructure/embedding/embedding_factory.py +3 -2
  47. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  48. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
  49. kodit/infrastructure/enrichment/litellm_enrichment_provider.py +19 -26
  50. kodit/infrastructure/enrichment/local_enrichment_provider.py +41 -30
  51. kodit/infrastructure/indexing/fusion_service.py +1 -1
  52. kodit/infrastructure/mappers/git_mapper.py +193 -0
  53. kodit/infrastructure/mappers/snippet_mapper.py +106 -0
  54. kodit/infrastructure/mappers/task_mapper.py +5 -44
  55. kodit/infrastructure/mappers/task_status_mapper.py +85 -0
  56. kodit/infrastructure/reporting/db_progress.py +23 -0
  57. kodit/infrastructure/reporting/log_progress.py +13 -38
  58. kodit/infrastructure/reporting/telemetry_progress.py +21 -0
  59. kodit/infrastructure/slicing/slicer.py +32 -31
  60. kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
  61. kodit/infrastructure/sqlalchemy/entities.py +428 -131
  62. kodit/infrastructure/sqlalchemy/git_branch_repository.py +263 -0
  63. kodit/infrastructure/sqlalchemy/git_commit_repository.py +337 -0
  64. kodit/infrastructure/sqlalchemy/git_repository.py +252 -0
  65. kodit/infrastructure/sqlalchemy/git_tag_repository.py +257 -0
  66. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +484 -0
  67. kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
  68. kodit/infrastructure/sqlalchemy/task_status_repository.py +91 -0
  69. kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
  70. kodit/mcp.py +12 -26
  71. kodit/migrations/env.py +1 -1
  72. kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
  73. kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
  74. kodit/migrations/versions/b9cd1c3fd762_add_task_status.py +77 -0
  75. kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
  76. kodit/py.typed +0 -0
  77. kodit/utils/dump_openapi.py +7 -4
  78. kodit/utils/path_utils.py +29 -0
  79. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/METADATA +3 -3
  80. kodit-0.5.0.dist-info/RECORD +137 -0
  81. kodit/application/factories/code_indexing_factory.py +0 -193
  82. kodit/application/services/auto_indexing_service.py +0 -103
  83. kodit/application/services/code_indexing_application_service.py +0 -393
  84. kodit/domain/entities.py +0 -323
  85. kodit/domain/services/index_query_service.py +0 -70
  86. kodit/domain/services/index_service.py +0 -267
  87. kodit/infrastructure/api/client/index_client.py +0 -57
  88. kodit/infrastructure/api/v1/routers/indexes.py +0 -119
  89. kodit/infrastructure/api/v1/schemas/index.py +0 -101
  90. kodit/infrastructure/bm25/bm25_factory.py +0 -28
  91. kodit/infrastructure/cloning/__init__.py +0 -1
  92. kodit/infrastructure/cloning/metadata.py +0 -98
  93. kodit/infrastructure/mappers/index_mapper.py +0 -345
  94. kodit/infrastructure/reporting/tdqm_progress.py +0 -73
  95. kodit/infrastructure/slicing/language_detection_service.py +0 -18
  96. kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
  97. kodit-0.4.2.dist-info/RECORD +0 -119
  98. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/WHEEL +0 -0
  99. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/entry_points.txt +0 -0
  100. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,106 @@
1
+ """Mapping between domain Git entities and SQLAlchemy entities."""
2
+
3
+ import kodit.domain.entities.git as domain_git_entities
4
+ from kodit.domain.value_objects import Enrichment, EnrichmentType
5
+ from kodit.infrastructure.sqlalchemy import entities as db_entities
6
+
7
+
8
+ class SnippetMapper:
9
+ """Mapper for converting between domain Git entities and database entities."""
10
+
11
+ def to_domain_snippet_v2(
12
+ self,
13
+ db_snippet: db_entities.SnippetV2,
14
+ db_files: list[db_entities.GitCommitFile],
15
+ db_enrichments: list[db_entities.Enrichment],
16
+ ) -> domain_git_entities.SnippetV2:
17
+ """Convert SQLAlchemy SnippetV2 to domain SnippetV2."""
18
+ # Convert enrichments
19
+ enrichments = []
20
+ for db_enrichment in db_enrichments:
21
+ # Map from SQLAlchemy enum to domain enum
22
+ enrichment_type = EnrichmentType(db_enrichment.type.value)
23
+ enrichment = Enrichment(
24
+ type=enrichment_type,
25
+ content=db_enrichment.content,
26
+ )
27
+ enrichments.append(enrichment)
28
+
29
+ derives_from = [
30
+ domain_git_entities.GitFile(
31
+ created_at=file.created_at,
32
+ blob_sha=file.blob_sha,
33
+ path=file.path,
34
+ mime_type=file.mime_type,
35
+ size=file.size,
36
+ extension=file.extension,
37
+ )
38
+ for file in db_files
39
+ ]
40
+
41
+ return domain_git_entities.SnippetV2(
42
+ sha=db_snippet.sha,
43
+ created_at=db_snippet.created_at,
44
+ updated_at=db_snippet.updated_at,
45
+ derives_from=derives_from,
46
+ content=db_snippet.content,
47
+ enrichments=enrichments,
48
+ extension=db_snippet.extension,
49
+ )
50
+
51
+ def from_domain_snippet_v2(
52
+ self, domain_snippet: domain_git_entities.SnippetV2
53
+ ) -> db_entities.SnippetV2:
54
+ """Convert domain SnippetV2 to SQLAlchemy SnippetV2."""
55
+ return db_entities.SnippetV2(
56
+ sha=domain_snippet.sha,
57
+ content=domain_snippet.content,
58
+ extension=domain_snippet.extension,
59
+ )
60
+
61
+ def from_domain_enrichments(
62
+ self, snippet_sha: str, enrichments: list[Enrichment]
63
+ ) -> list[db_entities.Enrichment]:
64
+ """Convert domain enrichments to SQLAlchemy enrichments."""
65
+ db_enrichments = []
66
+ for enrichment in enrichments:
67
+ # Map from domain enum to SQLAlchemy enum
68
+ db_enrichment_type = db_entities.EnrichmentType(enrichment.type.value)
69
+ db_enrichment = db_entities.Enrichment(
70
+ snippet_sha=snippet_sha,
71
+ type=db_enrichment_type,
72
+ content=enrichment.content,
73
+ )
74
+ db_enrichments.append(db_enrichment)
75
+ return db_enrichments
76
+
77
+ def to_domain_commit_index(
78
+ self,
79
+ db_commit_index: db_entities.CommitIndex,
80
+ snippets: list[domain_git_entities.SnippetV2],
81
+ ) -> domain_git_entities.CommitIndex:
82
+ """Convert SQLAlchemy CommitIndex to domain CommitIndex."""
83
+ return domain_git_entities.CommitIndex(
84
+ commit_sha=db_commit_index.commit_sha,
85
+ created_at=db_commit_index.created_at,
86
+ updated_at=db_commit_index.updated_at,
87
+ snippets=snippets,
88
+ status=domain_git_entities.IndexStatus(db_commit_index.status),
89
+ indexed_at=db_commit_index.indexed_at,
90
+ error_message=db_commit_index.error_message,
91
+ files_processed=db_commit_index.files_processed,
92
+ processing_time_seconds=float(db_commit_index.processing_time_seconds),
93
+ )
94
+
95
+ def from_domain_commit_index(
96
+ self, domain_commit_index: domain_git_entities.CommitIndex
97
+ ) -> db_entities.CommitIndex:
98
+ """Convert domain CommitIndex to SQLAlchemy CommitIndex."""
99
+ return db_entities.CommitIndex(
100
+ commit_sha=domain_commit_index.commit_sha,
101
+ status=domain_commit_index.status,
102
+ indexed_at=domain_commit_index.indexed_at,
103
+ error_message=domain_commit_index.error_message,
104
+ files_processed=domain_commit_index.files_processed,
105
+ processing_time_seconds=domain_commit_index.processing_time_seconds,
106
+ )
@@ -1,43 +1,10 @@
1
1
  """Task mapper for the task queue."""
2
2
 
3
- from typing import ClassVar
4
-
5
3
  from kodit.domain.entities import Task
6
- from kodit.domain.value_objects import TaskType
4
+ from kodit.domain.value_objects import TaskOperation
7
5
  from kodit.infrastructure.sqlalchemy import entities as db_entities
8
6
 
9
7
 
10
- class TaskTypeMapper:
11
- """Maps between domain QueuedTaskType and SQLAlchemy TaskType."""
12
-
13
- # Map TaskType enum to QueuedTaskType
14
- TASK_TYPE_MAPPING: ClassVar[dict[db_entities.TaskType, TaskType]] = {
15
- db_entities.TaskType.INDEX_UPDATE: TaskType.INDEX_UPDATE,
16
- }
17
-
18
- @staticmethod
19
- def to_domain_type(task_type: db_entities.TaskType) -> TaskType:
20
- """Convert SQLAlchemy TaskType to domain QueuedTaskType."""
21
- if task_type not in TaskTypeMapper.TASK_TYPE_MAPPING:
22
- raise ValueError(f"Unknown task type: {task_type}")
23
- return TaskTypeMapper.TASK_TYPE_MAPPING[task_type]
24
-
25
- @staticmethod
26
- def from_domain_type(task_type: TaskType) -> db_entities.TaskType:
27
- """Convert domain QueuedTaskType to SQLAlchemy TaskType."""
28
- if task_type not in TaskTypeMapper.TASK_TYPE_MAPPING.values():
29
- raise ValueError(f"Unknown task type: {task_type}")
30
-
31
- # Find value in TASK_TYPE_MAPPING
32
- return next(
33
- (
34
- db_task_type
35
- for db_task_type, domain_task_type in TaskTypeMapper.TASK_TYPE_MAPPING.items() # noqa: E501
36
- if domain_task_type == task_type
37
- )
38
- )
39
-
40
-
41
8
  class TaskMapper:
42
9
  """Maps between domain QueuedTask and SQLAlchemy Task entities.
43
10
 
@@ -52,13 +19,12 @@ class TaskMapper:
52
19
  Since QueuedTask doesn't have status fields, we store processing
53
20
  state in the payload.
54
21
  """
55
- # Get the task type
56
- task_type = TaskTypeMapper.to_domain_type(record.type)
57
-
22
+ if record.type not in TaskOperation.__members__.values():
23
+ raise ValueError(f"Unknown operation: {record.type}")
58
24
  # The dedup_key becomes the id in the domain entity
59
25
  return Task(
60
26
  id=record.dedup_key, # Use dedup_key as the unique identifier
61
- type=task_type,
27
+ type=TaskOperation(record.type),
62
28
  priority=record.priority,
63
29
  payload=record.payload or {},
64
30
  created_at=record.created_at,
@@ -68,14 +34,9 @@ class TaskMapper:
68
34
  @staticmethod
69
35
  def from_domain_task(task: Task) -> db_entities.Task:
70
36
  """Convert domain QueuedTask to SQLAlchemy Task record."""
71
- if task.type not in TaskTypeMapper.TASK_TYPE_MAPPING.values():
72
- raise ValueError(f"Unknown task type: {task.type}")
73
-
74
- # Find value in TASK_TYPE_MAPPING
75
- task_type = TaskTypeMapper.from_domain_type(task.type)
76
37
  return db_entities.Task(
77
38
  dedup_key=task.id,
78
- type=task_type,
39
+ type=task.type.value,
79
40
  payload=task.payload,
80
41
  priority=task.priority,
81
42
  )
@@ -0,0 +1,85 @@
1
+ """Task status mapper."""
2
+
3
+ from kodit.domain import entities as domain_entities
4
+ from kodit.domain.value_objects import ReportingState, TaskOperation, TrackableType
5
+ from kodit.infrastructure.sqlalchemy import entities as db_entities
6
+
7
+
8
+ class TaskStatusMapper:
9
+ """Mapper for converting between domain TaskStatus and database entities."""
10
+
11
+ @staticmethod
12
+ def from_domain_task_status(
13
+ task_status: domain_entities.TaskStatus,
14
+ ) -> db_entities.TaskStatus:
15
+ """Convert domain TaskStatus to database TaskStatus."""
16
+ return db_entities.TaskStatus(
17
+ id=task_status.id,
18
+ operation=task_status.operation,
19
+ created_at=task_status.created_at,
20
+ updated_at=task_status.updated_at,
21
+ trackable_id=task_status.trackable_id,
22
+ trackable_type=(
23
+ task_status.trackable_type.value if task_status.trackable_type else None
24
+ ),
25
+ parent=task_status.parent.id if task_status.parent else None,
26
+ state=(
27
+ task_status.state.value
28
+ if isinstance(task_status.state, ReportingState)
29
+ else task_status.state
30
+ ),
31
+ error=task_status.error,
32
+ total=task_status.total,
33
+ current=task_status.current,
34
+ message=task_status.message,
35
+ )
36
+
37
+ @staticmethod
38
+ def to_domain_task_status(
39
+ db_status: db_entities.TaskStatus,
40
+ ) -> domain_entities.TaskStatus:
41
+ """Convert database TaskStatus to domain TaskStatus."""
42
+ return domain_entities.TaskStatus(
43
+ id=db_status.id,
44
+ operation=TaskOperation(db_status.operation),
45
+ state=ReportingState(db_status.state),
46
+ created_at=db_status.created_at,
47
+ updated_at=db_status.updated_at,
48
+ trackable_id=db_status.trackable_id,
49
+ trackable_type=(
50
+ TrackableType(db_status.trackable_type)
51
+ if db_status.trackable_type
52
+ else None
53
+ ),
54
+ parent=None, # Parent relationships need to be reconstructed separately
55
+ error=db_status.error if db_status.error else None,
56
+ total=db_status.total,
57
+ current=db_status.current,
58
+ message=db_status.message,
59
+ )
60
+
61
+ @staticmethod
62
+ def to_domain_task_status_with_hierarchy(
63
+ db_statuses: list[db_entities.TaskStatus],
64
+ ) -> list[domain_entities.TaskStatus]:
65
+ """Convert database TaskStatus list to domain with parent-child hierarchy.
66
+
67
+ This method performs a two-pass conversion:
68
+ 1. First pass: Convert all DB entities to domain entities
69
+ 2. Second pass: Reconstruct parent-child relationships using ID mapping
70
+ """
71
+ # First pass: Convert all database entities to domain entities
72
+ domain_statuses = [
73
+ TaskStatusMapper.to_domain_task_status(db_status)
74
+ for db_status in db_statuses
75
+ ]
76
+
77
+ # Create ID-to-entity mapping for efficient parent lookup
78
+ id_to_entity = {status.id: status for status in domain_statuses}
79
+
80
+ # Second pass: Reconstruct parent-child relationships
81
+ for db_status, domain_status in zip(db_statuses, domain_statuses, strict=True):
82
+ if db_status.parent and db_status.parent in id_to_entity:
83
+ domain_status.parent = id_to_entity[db_status.parent]
84
+
85
+ return domain_statuses
@@ -0,0 +1,23 @@
1
+ """Log progress using structlog."""
2
+
3
+ import structlog
4
+
5
+ from kodit.config import ReportingConfig
6
+ from kodit.domain.entities import TaskStatus
7
+ from kodit.domain.protocols import ReportingModule, TaskStatusRepository
8
+
9
+
10
+ class DBProgressReportingModule(ReportingModule):
11
+ """Database progress reporting module."""
12
+
13
+ def __init__(
14
+ self, task_status_repository: TaskStatusRepository, config: ReportingConfig
15
+ ) -> None:
16
+ """Initialize the database progress reporting module."""
17
+ self.task_status_repository = task_status_repository
18
+ self.config = config
19
+ self._log = structlog.get_logger(__name__)
20
+
21
+ async def on_change(self, progress: TaskStatus) -> None:
22
+ """On step changed - update task status in database."""
23
+ await self.task_status_repository.save(progress)
@@ -1,13 +1,13 @@
1
1
  """Log progress using structlog."""
2
2
 
3
- import time
4
3
  from datetime import UTC, datetime
5
4
 
6
5
  import structlog
7
6
 
8
7
  from kodit.config import ReportingConfig
8
+ from kodit.domain.entities import TaskStatus
9
9
  from kodit.domain.protocols import ReportingModule
10
- from kodit.domain.value_objects import Progress, ProgressState, ReportingState
10
+ from kodit.domain.value_objects import ReportingState
11
11
 
12
12
 
13
13
  class LoggingReportingModule(ReportingModule):
@@ -19,47 +19,22 @@ class LoggingReportingModule(ReportingModule):
19
19
  self._log = structlog.get_logger(__name__)
20
20
  self._last_log_time: datetime = datetime.now(UTC)
21
21
 
22
- def on_change(self, step: Progress) -> None:
22
+ async def on_change(self, progress: TaskStatus) -> None:
23
23
  """On step changed."""
24
24
  current_time = datetime.now(UTC)
25
- time_since_last_log = current_time - self._last_log_time
25
+ step = progress
26
26
 
27
- if (
28
- step.state != ReportingState.IN_PROGRESS
29
- or time_since_last_log >= self.config.log_time_interval
30
- ):
27
+ if step.state == ReportingState.FAILED:
28
+ self._log.exception(
29
+ step.operation,
30
+ state=step.state,
31
+ completion_percent=step.completion_percent,
32
+ error=step.error,
33
+ )
34
+ else:
31
35
  self._log.info(
32
- step.name,
36
+ step.operation,
33
37
  state=step.state,
34
- message=step.message,
35
38
  completion_percent=step.completion_percent,
36
39
  )
37
40
  self._last_log_time = current_time
38
-
39
-
40
- class LogProgress(Progress):
41
- """Log progress using structlog with time-based throttling."""
42
-
43
- def __init__(self, config: ReportingConfig | None = None) -> None:
44
- """Initialize the log progress."""
45
- self.log = structlog.get_logger()
46
- self.config = config or ReportingConfig()
47
- self.last_log_time: float = 0
48
-
49
- def on_update(self, state: ProgressState) -> None:
50
- """Log the progress with time-based throttling."""
51
- current_time = time.time()
52
- time_since_last_log = current_time - self.last_log_time
53
-
54
- if time_since_last_log >= self.config.log_time_interval.total_seconds():
55
- self.log.info(
56
- "Progress...",
57
- operation=state.operation,
58
- percentage=state.percentage,
59
- message=state.message,
60
- )
61
- self.last_log_time = current_time
62
-
63
- def on_complete(self) -> None:
64
- """Log the completion."""
65
- self.log.info("Completed")
@@ -0,0 +1,21 @@
1
+ """Log progress using telemetry."""
2
+
3
+ import structlog
4
+
5
+ from kodit.domain.entities import TaskStatus
6
+ from kodit.domain.protocols import ReportingModule
7
+ from kodit.log import log_event
8
+
9
+
10
+ class TelemetryProgressReportingModule(ReportingModule):
11
+ """Database progress reporting module."""
12
+
13
+ def __init__(self) -> None:
14
+ """Initialize the logging reporting module."""
15
+ self._log = structlog.get_logger(__name__)
16
+
17
+ async def on_change(self, progress: TaskStatus) -> None:
18
+ """On step changed."""
19
+ log_event(
20
+ progress.operation,
21
+ )
@@ -14,7 +14,7 @@ import structlog
14
14
  from tree_sitter import Node, Parser, Tree
15
15
  from tree_sitter_language_pack import get_language
16
16
 
17
- from kodit.domain.entities import File, Snippet
17
+ from kodit.domain.entities.git import GitFile, SnippetV2
18
18
  from kodit.domain.value_objects import LanguageMapping
19
19
 
20
20
 
@@ -149,9 +149,9 @@ class Slicer:
149
149
  """Initialize an empty slicer."""
150
150
  self.log = structlog.get_logger(__name__)
151
151
 
152
- def extract_snippets( # noqa: C901
153
- self, files: list[File], language: str = "python"
154
- ) -> list[Snippet]:
152
+ def extract_snippets_from_git_files( # noqa: C901
153
+ self, files: list[GitFile], language: str = "python"
154
+ ) -> list[SnippetV2]:
155
155
  """Extract code snippets from a list of files.
156
156
 
157
157
  Args:
@@ -187,10 +187,10 @@ class Slicer:
187
187
  raise RuntimeError(f"Failed to load {language} parser: {e}") from e
188
188
 
189
189
  # Create mapping from Paths to File objects and extract paths
190
- path_to_file_map: dict[Path, File] = {}
190
+ path_to_file_map: dict[Path, GitFile] = {}
191
191
  file_paths: list[Path] = []
192
192
  for file in files:
193
- file_path = file.as_path()
193
+ file_path = Path(file.path)
194
194
 
195
195
  # Validate file matches language
196
196
  if not self._file_matches_language(file_path.suffix, language):
@@ -225,7 +225,7 @@ class Slicer:
225
225
  self._build_reverse_call_graph(state)
226
226
 
227
227
  # Extract snippets for all functions
228
- snippets = []
228
+ snippets: list[SnippetV2] = []
229
229
  for qualified_name in state.def_index:
230
230
  snippet_content = self._get_snippet(
231
231
  qualified_name,
@@ -234,7 +234,7 @@ class Slicer:
234
234
  {"max_depth": 2, "max_functions": 8},
235
235
  )
236
236
  if "not found" not in snippet_content:
237
- snippet = self._create_snippet_entity(
237
+ snippet = self._create_snippet_entity_from_git_files(
238
238
  qualified_name, snippet_content, language, state, path_to_file_map
239
239
  )
240
240
  snippets.append(snippet)
@@ -247,8 +247,8 @@ class Slicer:
247
247
  return False
248
248
 
249
249
  try:
250
- return (
251
- language == LanguageMapping.get_language_for_extension(file_extension)
250
+ return language == LanguageMapping.get_language_for_extension(
251
+ file_extension
252
252
  )
253
253
  except ValueError:
254
254
  # Extension not supported, so it doesn't match any language
@@ -614,7 +614,8 @@ class Slicer:
614
614
  if callers:
615
615
  snippet_lines.append("")
616
616
  snippet_lines.append("# === USAGE EXAMPLES ===")
617
- for caller in list(callers)[:2]: # Show up to 2 examples
617
+ # Show up to 2 examples, sorted for deterministic order
618
+ for caller in sorted(callers)[:2]:
618
619
  call_line = self._find_function_call_line(
619
620
  caller, function_name, state, file_contents
620
621
  )
@@ -625,37 +626,37 @@ class Slicer:
625
626
 
626
627
  return "\n".join(snippet_lines)
627
628
 
628
- def _create_snippet_entity(
629
+ def _create_snippet_entity_from_git_files(
629
630
  self,
630
631
  qualified_name: str,
631
632
  snippet_content: str,
632
633
  language: str,
633
634
  state: AnalyzerState,
634
- path_to_file_map: dict[Path, File],
635
- ) -> Snippet:
635
+ path_to_file_map: dict[Path, GitFile],
636
+ ) -> SnippetV2:
636
637
  """Create a Snippet domain entity from extracted content."""
637
638
  # Determine all files that this snippet derives from
638
- derives_from_files = self._find_source_files_for_snippet(
639
+ derives_from_files = self._find_source_files_for_snippet_from_git_files(
639
640
  qualified_name, snippet_content, state, path_to_file_map
640
641
  )
641
642
 
642
643
  # Create the snippet entity
643
- snippet = Snippet(derives_from=derives_from_files)
644
-
645
- # Add the original content
646
- snippet.add_original_content(snippet_content, language)
647
-
648
- return snippet
644
+ return SnippetV2(
645
+ derives_from=derives_from_files,
646
+ content=snippet_content,
647
+ extension=language,
648
+ sha=SnippetV2.compute_sha(snippet_content),
649
+ )
649
650
 
650
- def _find_source_files_for_snippet(
651
+ def _find_source_files_for_snippet_from_git_files(
651
652
  self,
652
653
  qualified_name: str,
653
654
  snippet_content: str,
654
655
  state: AnalyzerState,
655
- path_to_file_map: dict[Path, File],
656
- ) -> list[File]:
656
+ path_to_file_map: dict[Path, GitFile],
657
+ ) -> list[GitFile]:
657
658
  """Find all source files that a snippet derives from."""
658
- source_files: list[File] = []
659
+ source_files: list[GitFile] = []
659
660
  source_file_paths: set[Path] = set()
660
661
 
661
662
  # Add the primary function's file
@@ -835,7 +836,7 @@ class Slicer:
835
836
  # Add direct dependencies
836
837
  to_visit.extend(
837
838
  (callee, depth + 1)
838
- for callee in state.call_graph.get(current, set())
839
+ for callee in sorted(state.call_graph.get(current, set()))
839
840
  if callee not in visited and callee in state.def_index
840
841
  )
841
842
 
@@ -850,26 +851,26 @@ class Slicer:
850
851
  in_degree: dict[str, int] = defaultdict(int)
851
852
  graph: dict[str, set[str]] = defaultdict(set)
852
853
 
853
- for func in functions:
854
- for callee in state.call_graph.get(func, set()):
854
+ for func in sorted(functions):
855
+ for callee in sorted(state.call_graph.get(func, set())):
855
856
  if callee in functions:
856
857
  graph[func].add(callee)
857
858
  in_degree[callee] += 1
858
859
 
859
860
  # Find roots
860
- queue = [f for f in functions if in_degree[f] == 0]
861
+ queue = [f for f in sorted(functions) if in_degree[f] == 0]
861
862
  result = []
862
863
 
863
864
  while queue:
864
865
  current = queue.pop(0)
865
866
  result.append(current)
866
- for neighbor in graph[current]:
867
+ for neighbor in sorted(graph[current]):
867
868
  in_degree[neighbor] -= 1
868
869
  if in_degree[neighbor] == 0:
869
870
  queue.append(neighbor)
870
871
 
871
872
  # Add any remaining (cycles)
872
- for func in functions:
873
+ for func in sorted(functions):
873
874
  if func not in result:
874
875
  result.append(func)
875
876