kodit 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +59 -24
- kodit/application/factories/reporting_factory.py +16 -7
- kodit/application/factories/server_factory.py +311 -0
- kodit/application/services/code_search_application_service.py +144 -0
- kodit/application/services/commit_indexing_application_service.py +543 -0
- kodit/application/services/indexing_worker_service.py +13 -46
- kodit/application/services/queue_service.py +24 -3
- kodit/application/services/reporting.py +70 -54
- kodit/application/services/sync_scheduler.py +15 -31
- kodit/cli.py +2 -763
- kodit/cli_utils.py +2 -9
- kodit/config.py +3 -96
- kodit/database.py +38 -1
- kodit/domain/entities/__init__.py +276 -0
- kodit/domain/entities/git.py +190 -0
- kodit/domain/factories/__init__.py +1 -0
- kodit/domain/factories/git_repo_factory.py +76 -0
- kodit/domain/protocols.py +270 -46
- kodit/domain/services/bm25_service.py +5 -1
- kodit/domain/services/embedding_service.py +3 -0
- kodit/domain/services/git_repository_service.py +429 -0
- kodit/domain/services/git_service.py +300 -0
- kodit/domain/services/task_status_query_service.py +19 -0
- kodit/domain/value_objects.py +113 -147
- kodit/infrastructure/api/client/__init__.py +0 -2
- kodit/infrastructure/api/v1/__init__.py +0 -4
- kodit/infrastructure/api/v1/dependencies.py +105 -44
- kodit/infrastructure/api/v1/routers/__init__.py +0 -6
- kodit/infrastructure/api/v1/routers/commits.py +271 -0
- kodit/infrastructure/api/v1/routers/queue.py +2 -2
- kodit/infrastructure/api/v1/routers/repositories.py +282 -0
- kodit/infrastructure/api/v1/routers/search.py +31 -14
- kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
- kodit/infrastructure/api/v1/schemas/commit.py +96 -0
- kodit/infrastructure/api/v1/schemas/context.py +2 -0
- kodit/infrastructure/api/v1/schemas/repository.py +128 -0
- kodit/infrastructure/api/v1/schemas/search.py +12 -9
- kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
- kodit/infrastructure/api/v1/schemas/tag.py +31 -0
- kodit/infrastructure/api/v1/schemas/task_status.py +41 -0
- kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
- kodit/infrastructure/cloning/git/git_python_adaptor.py +467 -0
- kodit/infrastructure/cloning/git/working_copy.py +10 -3
- kodit/infrastructure/embedding/embedding_factory.py +3 -2
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
- kodit/infrastructure/enrichment/litellm_enrichment_provider.py +19 -26
- kodit/infrastructure/enrichment/local_enrichment_provider.py +41 -30
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/git_mapper.py +193 -0
- kodit/infrastructure/mappers/snippet_mapper.py +106 -0
- kodit/infrastructure/mappers/task_mapper.py +5 -44
- kodit/infrastructure/mappers/task_status_mapper.py +85 -0
- kodit/infrastructure/reporting/db_progress.py +23 -0
- kodit/infrastructure/reporting/log_progress.py +13 -38
- kodit/infrastructure/reporting/telemetry_progress.py +21 -0
- kodit/infrastructure/slicing/slicer.py +32 -31
- kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
- kodit/infrastructure/sqlalchemy/entities.py +428 -131
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +263 -0
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +337 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +252 -0
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +257 -0
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +484 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
- kodit/infrastructure/sqlalchemy/task_status_repository.py +91 -0
- kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
- kodit/mcp.py +12 -26
- kodit/migrations/env.py +1 -1
- kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
- kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
- kodit/migrations/versions/b9cd1c3fd762_add_task_status.py +77 -0
- kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
- kodit/py.typed +0 -0
- kodit/utils/dump_openapi.py +7 -4
- kodit/utils/path_utils.py +29 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/METADATA +3 -3
- kodit-0.5.0.dist-info/RECORD +137 -0
- kodit/application/factories/code_indexing_factory.py +0 -193
- kodit/application/services/auto_indexing_service.py +0 -103
- kodit/application/services/code_indexing_application_service.py +0 -393
- kodit/domain/entities.py +0 -323
- kodit/domain/services/index_query_service.py +0 -70
- kodit/domain/services/index_service.py +0 -267
- kodit/infrastructure/api/client/index_client.py +0 -57
- kodit/infrastructure/api/v1/routers/indexes.py +0 -119
- kodit/infrastructure/api/v1/schemas/index.py +0 -101
- kodit/infrastructure/bm25/bm25_factory.py +0 -28
- kodit/infrastructure/cloning/__init__.py +0 -1
- kodit/infrastructure/cloning/metadata.py +0 -98
- kodit/infrastructure/mappers/index_mapper.py +0 -345
- kodit/infrastructure/reporting/tdqm_progress.py +0 -73
- kodit/infrastructure/slicing/language_detection_service.py +0 -18
- kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
- kodit-0.4.2.dist-info/RECORD +0 -119
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/WHEEL +0 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""Mapping between domain Git entities and SQLAlchemy entities."""
|
|
2
|
+
|
|
3
|
+
import kodit.domain.entities.git as domain_git_entities
|
|
4
|
+
from kodit.domain.value_objects import Enrichment, EnrichmentType
|
|
5
|
+
from kodit.infrastructure.sqlalchemy import entities as db_entities
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SnippetMapper:
|
|
9
|
+
"""Mapper for converting between domain Git entities and database entities."""
|
|
10
|
+
|
|
11
|
+
def to_domain_snippet_v2(
|
|
12
|
+
self,
|
|
13
|
+
db_snippet: db_entities.SnippetV2,
|
|
14
|
+
db_files: list[db_entities.GitCommitFile],
|
|
15
|
+
db_enrichments: list[db_entities.Enrichment],
|
|
16
|
+
) -> domain_git_entities.SnippetV2:
|
|
17
|
+
"""Convert SQLAlchemy SnippetV2 to domain SnippetV2."""
|
|
18
|
+
# Convert enrichments
|
|
19
|
+
enrichments = []
|
|
20
|
+
for db_enrichment in db_enrichments:
|
|
21
|
+
# Map from SQLAlchemy enum to domain enum
|
|
22
|
+
enrichment_type = EnrichmentType(db_enrichment.type.value)
|
|
23
|
+
enrichment = Enrichment(
|
|
24
|
+
type=enrichment_type,
|
|
25
|
+
content=db_enrichment.content,
|
|
26
|
+
)
|
|
27
|
+
enrichments.append(enrichment)
|
|
28
|
+
|
|
29
|
+
derives_from = [
|
|
30
|
+
domain_git_entities.GitFile(
|
|
31
|
+
created_at=file.created_at,
|
|
32
|
+
blob_sha=file.blob_sha,
|
|
33
|
+
path=file.path,
|
|
34
|
+
mime_type=file.mime_type,
|
|
35
|
+
size=file.size,
|
|
36
|
+
extension=file.extension,
|
|
37
|
+
)
|
|
38
|
+
for file in db_files
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
return domain_git_entities.SnippetV2(
|
|
42
|
+
sha=db_snippet.sha,
|
|
43
|
+
created_at=db_snippet.created_at,
|
|
44
|
+
updated_at=db_snippet.updated_at,
|
|
45
|
+
derives_from=derives_from,
|
|
46
|
+
content=db_snippet.content,
|
|
47
|
+
enrichments=enrichments,
|
|
48
|
+
extension=db_snippet.extension,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
def from_domain_snippet_v2(
|
|
52
|
+
self, domain_snippet: domain_git_entities.SnippetV2
|
|
53
|
+
) -> db_entities.SnippetV2:
|
|
54
|
+
"""Convert domain SnippetV2 to SQLAlchemy SnippetV2."""
|
|
55
|
+
return db_entities.SnippetV2(
|
|
56
|
+
sha=domain_snippet.sha,
|
|
57
|
+
content=domain_snippet.content,
|
|
58
|
+
extension=domain_snippet.extension,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def from_domain_enrichments(
|
|
62
|
+
self, snippet_sha: str, enrichments: list[Enrichment]
|
|
63
|
+
) -> list[db_entities.Enrichment]:
|
|
64
|
+
"""Convert domain enrichments to SQLAlchemy enrichments."""
|
|
65
|
+
db_enrichments = []
|
|
66
|
+
for enrichment in enrichments:
|
|
67
|
+
# Map from domain enum to SQLAlchemy enum
|
|
68
|
+
db_enrichment_type = db_entities.EnrichmentType(enrichment.type.value)
|
|
69
|
+
db_enrichment = db_entities.Enrichment(
|
|
70
|
+
snippet_sha=snippet_sha,
|
|
71
|
+
type=db_enrichment_type,
|
|
72
|
+
content=enrichment.content,
|
|
73
|
+
)
|
|
74
|
+
db_enrichments.append(db_enrichment)
|
|
75
|
+
return db_enrichments
|
|
76
|
+
|
|
77
|
+
def to_domain_commit_index(
|
|
78
|
+
self,
|
|
79
|
+
db_commit_index: db_entities.CommitIndex,
|
|
80
|
+
snippets: list[domain_git_entities.SnippetV2],
|
|
81
|
+
) -> domain_git_entities.CommitIndex:
|
|
82
|
+
"""Convert SQLAlchemy CommitIndex to domain CommitIndex."""
|
|
83
|
+
return domain_git_entities.CommitIndex(
|
|
84
|
+
commit_sha=db_commit_index.commit_sha,
|
|
85
|
+
created_at=db_commit_index.created_at,
|
|
86
|
+
updated_at=db_commit_index.updated_at,
|
|
87
|
+
snippets=snippets,
|
|
88
|
+
status=domain_git_entities.IndexStatus(db_commit_index.status),
|
|
89
|
+
indexed_at=db_commit_index.indexed_at,
|
|
90
|
+
error_message=db_commit_index.error_message,
|
|
91
|
+
files_processed=db_commit_index.files_processed,
|
|
92
|
+
processing_time_seconds=float(db_commit_index.processing_time_seconds),
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
def from_domain_commit_index(
|
|
96
|
+
self, domain_commit_index: domain_git_entities.CommitIndex
|
|
97
|
+
) -> db_entities.CommitIndex:
|
|
98
|
+
"""Convert domain CommitIndex to SQLAlchemy CommitIndex."""
|
|
99
|
+
return db_entities.CommitIndex(
|
|
100
|
+
commit_sha=domain_commit_index.commit_sha,
|
|
101
|
+
status=domain_commit_index.status,
|
|
102
|
+
indexed_at=domain_commit_index.indexed_at,
|
|
103
|
+
error_message=domain_commit_index.error_message,
|
|
104
|
+
files_processed=domain_commit_index.files_processed,
|
|
105
|
+
processing_time_seconds=domain_commit_index.processing_time_seconds,
|
|
106
|
+
)
|
|
@@ -1,43 +1,10 @@
|
|
|
1
1
|
"""Task mapper for the task queue."""
|
|
2
2
|
|
|
3
|
-
from typing import ClassVar
|
|
4
|
-
|
|
5
3
|
from kodit.domain.entities import Task
|
|
6
|
-
from kodit.domain.value_objects import
|
|
4
|
+
from kodit.domain.value_objects import TaskOperation
|
|
7
5
|
from kodit.infrastructure.sqlalchemy import entities as db_entities
|
|
8
6
|
|
|
9
7
|
|
|
10
|
-
class TaskTypeMapper:
|
|
11
|
-
"""Maps between domain QueuedTaskType and SQLAlchemy TaskType."""
|
|
12
|
-
|
|
13
|
-
# Map TaskType enum to QueuedTaskType
|
|
14
|
-
TASK_TYPE_MAPPING: ClassVar[dict[db_entities.TaskType, TaskType]] = {
|
|
15
|
-
db_entities.TaskType.INDEX_UPDATE: TaskType.INDEX_UPDATE,
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
@staticmethod
|
|
19
|
-
def to_domain_type(task_type: db_entities.TaskType) -> TaskType:
|
|
20
|
-
"""Convert SQLAlchemy TaskType to domain QueuedTaskType."""
|
|
21
|
-
if task_type not in TaskTypeMapper.TASK_TYPE_MAPPING:
|
|
22
|
-
raise ValueError(f"Unknown task type: {task_type}")
|
|
23
|
-
return TaskTypeMapper.TASK_TYPE_MAPPING[task_type]
|
|
24
|
-
|
|
25
|
-
@staticmethod
|
|
26
|
-
def from_domain_type(task_type: TaskType) -> db_entities.TaskType:
|
|
27
|
-
"""Convert domain QueuedTaskType to SQLAlchemy TaskType."""
|
|
28
|
-
if task_type not in TaskTypeMapper.TASK_TYPE_MAPPING.values():
|
|
29
|
-
raise ValueError(f"Unknown task type: {task_type}")
|
|
30
|
-
|
|
31
|
-
# Find value in TASK_TYPE_MAPPING
|
|
32
|
-
return next(
|
|
33
|
-
(
|
|
34
|
-
db_task_type
|
|
35
|
-
for db_task_type, domain_task_type in TaskTypeMapper.TASK_TYPE_MAPPING.items() # noqa: E501
|
|
36
|
-
if domain_task_type == task_type
|
|
37
|
-
)
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
|
|
41
8
|
class TaskMapper:
|
|
42
9
|
"""Maps between domain QueuedTask and SQLAlchemy Task entities.
|
|
43
10
|
|
|
@@ -52,13 +19,12 @@ class TaskMapper:
|
|
|
52
19
|
Since QueuedTask doesn't have status fields, we store processing
|
|
53
20
|
state in the payload.
|
|
54
21
|
"""
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
22
|
+
if record.type not in TaskOperation.__members__.values():
|
|
23
|
+
raise ValueError(f"Unknown operation: {record.type}")
|
|
58
24
|
# The dedup_key becomes the id in the domain entity
|
|
59
25
|
return Task(
|
|
60
26
|
id=record.dedup_key, # Use dedup_key as the unique identifier
|
|
61
|
-
type=
|
|
27
|
+
type=TaskOperation(record.type),
|
|
62
28
|
priority=record.priority,
|
|
63
29
|
payload=record.payload or {},
|
|
64
30
|
created_at=record.created_at,
|
|
@@ -68,14 +34,9 @@ class TaskMapper:
|
|
|
68
34
|
@staticmethod
|
|
69
35
|
def from_domain_task(task: Task) -> db_entities.Task:
|
|
70
36
|
"""Convert domain QueuedTask to SQLAlchemy Task record."""
|
|
71
|
-
if task.type not in TaskTypeMapper.TASK_TYPE_MAPPING.values():
|
|
72
|
-
raise ValueError(f"Unknown task type: {task.type}")
|
|
73
|
-
|
|
74
|
-
# Find value in TASK_TYPE_MAPPING
|
|
75
|
-
task_type = TaskTypeMapper.from_domain_type(task.type)
|
|
76
37
|
return db_entities.Task(
|
|
77
38
|
dedup_key=task.id,
|
|
78
|
-
type=
|
|
39
|
+
type=task.type.value,
|
|
79
40
|
payload=task.payload,
|
|
80
41
|
priority=task.priority,
|
|
81
42
|
)
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Task status mapper."""
|
|
2
|
+
|
|
3
|
+
from kodit.domain import entities as domain_entities
|
|
4
|
+
from kodit.domain.value_objects import ReportingState, TaskOperation, TrackableType
|
|
5
|
+
from kodit.infrastructure.sqlalchemy import entities as db_entities
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TaskStatusMapper:
|
|
9
|
+
"""Mapper for converting between domain TaskStatus and database entities."""
|
|
10
|
+
|
|
11
|
+
@staticmethod
|
|
12
|
+
def from_domain_task_status(
|
|
13
|
+
task_status: domain_entities.TaskStatus,
|
|
14
|
+
) -> db_entities.TaskStatus:
|
|
15
|
+
"""Convert domain TaskStatus to database TaskStatus."""
|
|
16
|
+
return db_entities.TaskStatus(
|
|
17
|
+
id=task_status.id,
|
|
18
|
+
operation=task_status.operation,
|
|
19
|
+
created_at=task_status.created_at,
|
|
20
|
+
updated_at=task_status.updated_at,
|
|
21
|
+
trackable_id=task_status.trackable_id,
|
|
22
|
+
trackable_type=(
|
|
23
|
+
task_status.trackable_type.value if task_status.trackable_type else None
|
|
24
|
+
),
|
|
25
|
+
parent=task_status.parent.id if task_status.parent else None,
|
|
26
|
+
state=(
|
|
27
|
+
task_status.state.value
|
|
28
|
+
if isinstance(task_status.state, ReportingState)
|
|
29
|
+
else task_status.state
|
|
30
|
+
),
|
|
31
|
+
error=task_status.error,
|
|
32
|
+
total=task_status.total,
|
|
33
|
+
current=task_status.current,
|
|
34
|
+
message=task_status.message,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
@staticmethod
|
|
38
|
+
def to_domain_task_status(
|
|
39
|
+
db_status: db_entities.TaskStatus,
|
|
40
|
+
) -> domain_entities.TaskStatus:
|
|
41
|
+
"""Convert database TaskStatus to domain TaskStatus."""
|
|
42
|
+
return domain_entities.TaskStatus(
|
|
43
|
+
id=db_status.id,
|
|
44
|
+
operation=TaskOperation(db_status.operation),
|
|
45
|
+
state=ReportingState(db_status.state),
|
|
46
|
+
created_at=db_status.created_at,
|
|
47
|
+
updated_at=db_status.updated_at,
|
|
48
|
+
trackable_id=db_status.trackable_id,
|
|
49
|
+
trackable_type=(
|
|
50
|
+
TrackableType(db_status.trackable_type)
|
|
51
|
+
if db_status.trackable_type
|
|
52
|
+
else None
|
|
53
|
+
),
|
|
54
|
+
parent=None, # Parent relationships need to be reconstructed separately
|
|
55
|
+
error=db_status.error if db_status.error else None,
|
|
56
|
+
total=db_status.total,
|
|
57
|
+
current=db_status.current,
|
|
58
|
+
message=db_status.message,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
@staticmethod
|
|
62
|
+
def to_domain_task_status_with_hierarchy(
|
|
63
|
+
db_statuses: list[db_entities.TaskStatus],
|
|
64
|
+
) -> list[domain_entities.TaskStatus]:
|
|
65
|
+
"""Convert database TaskStatus list to domain with parent-child hierarchy.
|
|
66
|
+
|
|
67
|
+
This method performs a two-pass conversion:
|
|
68
|
+
1. First pass: Convert all DB entities to domain entities
|
|
69
|
+
2. Second pass: Reconstruct parent-child relationships using ID mapping
|
|
70
|
+
"""
|
|
71
|
+
# First pass: Convert all database entities to domain entities
|
|
72
|
+
domain_statuses = [
|
|
73
|
+
TaskStatusMapper.to_domain_task_status(db_status)
|
|
74
|
+
for db_status in db_statuses
|
|
75
|
+
]
|
|
76
|
+
|
|
77
|
+
# Create ID-to-entity mapping for efficient parent lookup
|
|
78
|
+
id_to_entity = {status.id: status for status in domain_statuses}
|
|
79
|
+
|
|
80
|
+
# Second pass: Reconstruct parent-child relationships
|
|
81
|
+
for db_status, domain_status in zip(db_statuses, domain_statuses, strict=True):
|
|
82
|
+
if db_status.parent and db_status.parent in id_to_entity:
|
|
83
|
+
domain_status.parent = id_to_entity[db_status.parent]
|
|
84
|
+
|
|
85
|
+
return domain_statuses
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Log progress using structlog."""
|
|
2
|
+
|
|
3
|
+
import structlog
|
|
4
|
+
|
|
5
|
+
from kodit.config import ReportingConfig
|
|
6
|
+
from kodit.domain.entities import TaskStatus
|
|
7
|
+
from kodit.domain.protocols import ReportingModule, TaskStatusRepository
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DBProgressReportingModule(ReportingModule):
|
|
11
|
+
"""Database progress reporting module."""
|
|
12
|
+
|
|
13
|
+
def __init__(
|
|
14
|
+
self, task_status_repository: TaskStatusRepository, config: ReportingConfig
|
|
15
|
+
) -> None:
|
|
16
|
+
"""Initialize the database progress reporting module."""
|
|
17
|
+
self.task_status_repository = task_status_repository
|
|
18
|
+
self.config = config
|
|
19
|
+
self._log = structlog.get_logger(__name__)
|
|
20
|
+
|
|
21
|
+
async def on_change(self, progress: TaskStatus) -> None:
|
|
22
|
+
"""On step changed - update task status in database."""
|
|
23
|
+
await self.task_status_repository.save(progress)
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
"""Log progress using structlog."""
|
|
2
2
|
|
|
3
|
-
import time
|
|
4
3
|
from datetime import UTC, datetime
|
|
5
4
|
|
|
6
5
|
import structlog
|
|
7
6
|
|
|
8
7
|
from kodit.config import ReportingConfig
|
|
8
|
+
from kodit.domain.entities import TaskStatus
|
|
9
9
|
from kodit.domain.protocols import ReportingModule
|
|
10
|
-
from kodit.domain.value_objects import
|
|
10
|
+
from kodit.domain.value_objects import ReportingState
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class LoggingReportingModule(ReportingModule):
|
|
@@ -19,47 +19,22 @@ class LoggingReportingModule(ReportingModule):
|
|
|
19
19
|
self._log = structlog.get_logger(__name__)
|
|
20
20
|
self._last_log_time: datetime = datetime.now(UTC)
|
|
21
21
|
|
|
22
|
-
def on_change(self,
|
|
22
|
+
async def on_change(self, progress: TaskStatus) -> None:
|
|
23
23
|
"""On step changed."""
|
|
24
24
|
current_time = datetime.now(UTC)
|
|
25
|
-
|
|
25
|
+
step = progress
|
|
26
26
|
|
|
27
|
-
if
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
27
|
+
if step.state == ReportingState.FAILED:
|
|
28
|
+
self._log.exception(
|
|
29
|
+
step.operation,
|
|
30
|
+
state=step.state,
|
|
31
|
+
completion_percent=step.completion_percent,
|
|
32
|
+
error=step.error,
|
|
33
|
+
)
|
|
34
|
+
else:
|
|
31
35
|
self._log.info(
|
|
32
|
-
step.
|
|
36
|
+
step.operation,
|
|
33
37
|
state=step.state,
|
|
34
|
-
message=step.message,
|
|
35
38
|
completion_percent=step.completion_percent,
|
|
36
39
|
)
|
|
37
40
|
self._last_log_time = current_time
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
class LogProgress(Progress):
|
|
41
|
-
"""Log progress using structlog with time-based throttling."""
|
|
42
|
-
|
|
43
|
-
def __init__(self, config: ReportingConfig | None = None) -> None:
|
|
44
|
-
"""Initialize the log progress."""
|
|
45
|
-
self.log = structlog.get_logger()
|
|
46
|
-
self.config = config or ReportingConfig()
|
|
47
|
-
self.last_log_time: float = 0
|
|
48
|
-
|
|
49
|
-
def on_update(self, state: ProgressState) -> None:
|
|
50
|
-
"""Log the progress with time-based throttling."""
|
|
51
|
-
current_time = time.time()
|
|
52
|
-
time_since_last_log = current_time - self.last_log_time
|
|
53
|
-
|
|
54
|
-
if time_since_last_log >= self.config.log_time_interval.total_seconds():
|
|
55
|
-
self.log.info(
|
|
56
|
-
"Progress...",
|
|
57
|
-
operation=state.operation,
|
|
58
|
-
percentage=state.percentage,
|
|
59
|
-
message=state.message,
|
|
60
|
-
)
|
|
61
|
-
self.last_log_time = current_time
|
|
62
|
-
|
|
63
|
-
def on_complete(self) -> None:
|
|
64
|
-
"""Log the completion."""
|
|
65
|
-
self.log.info("Completed")
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Log progress using telemetry."""
|
|
2
|
+
|
|
3
|
+
import structlog
|
|
4
|
+
|
|
5
|
+
from kodit.domain.entities import TaskStatus
|
|
6
|
+
from kodit.domain.protocols import ReportingModule
|
|
7
|
+
from kodit.log import log_event
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TelemetryProgressReportingModule(ReportingModule):
|
|
11
|
+
"""Database progress reporting module."""
|
|
12
|
+
|
|
13
|
+
def __init__(self) -> None:
|
|
14
|
+
"""Initialize the logging reporting module."""
|
|
15
|
+
self._log = structlog.get_logger(__name__)
|
|
16
|
+
|
|
17
|
+
async def on_change(self, progress: TaskStatus) -> None:
|
|
18
|
+
"""On step changed."""
|
|
19
|
+
log_event(
|
|
20
|
+
progress.operation,
|
|
21
|
+
)
|
|
@@ -14,7 +14,7 @@ import structlog
|
|
|
14
14
|
from tree_sitter import Node, Parser, Tree
|
|
15
15
|
from tree_sitter_language_pack import get_language
|
|
16
16
|
|
|
17
|
-
from kodit.domain.entities import
|
|
17
|
+
from kodit.domain.entities.git import GitFile, SnippetV2
|
|
18
18
|
from kodit.domain.value_objects import LanguageMapping
|
|
19
19
|
|
|
20
20
|
|
|
@@ -149,9 +149,9 @@ class Slicer:
|
|
|
149
149
|
"""Initialize an empty slicer."""
|
|
150
150
|
self.log = structlog.get_logger(__name__)
|
|
151
151
|
|
|
152
|
-
def
|
|
153
|
-
self, files: list[
|
|
154
|
-
) -> list[
|
|
152
|
+
def extract_snippets_from_git_files( # noqa: C901
|
|
153
|
+
self, files: list[GitFile], language: str = "python"
|
|
154
|
+
) -> list[SnippetV2]:
|
|
155
155
|
"""Extract code snippets from a list of files.
|
|
156
156
|
|
|
157
157
|
Args:
|
|
@@ -187,10 +187,10 @@ class Slicer:
|
|
|
187
187
|
raise RuntimeError(f"Failed to load {language} parser: {e}") from e
|
|
188
188
|
|
|
189
189
|
# Create mapping from Paths to File objects and extract paths
|
|
190
|
-
path_to_file_map: dict[Path,
|
|
190
|
+
path_to_file_map: dict[Path, GitFile] = {}
|
|
191
191
|
file_paths: list[Path] = []
|
|
192
192
|
for file in files:
|
|
193
|
-
file_path = file.
|
|
193
|
+
file_path = Path(file.path)
|
|
194
194
|
|
|
195
195
|
# Validate file matches language
|
|
196
196
|
if not self._file_matches_language(file_path.suffix, language):
|
|
@@ -225,7 +225,7 @@ class Slicer:
|
|
|
225
225
|
self._build_reverse_call_graph(state)
|
|
226
226
|
|
|
227
227
|
# Extract snippets for all functions
|
|
228
|
-
snippets = []
|
|
228
|
+
snippets: list[SnippetV2] = []
|
|
229
229
|
for qualified_name in state.def_index:
|
|
230
230
|
snippet_content = self._get_snippet(
|
|
231
231
|
qualified_name,
|
|
@@ -234,7 +234,7 @@ class Slicer:
|
|
|
234
234
|
{"max_depth": 2, "max_functions": 8},
|
|
235
235
|
)
|
|
236
236
|
if "not found" not in snippet_content:
|
|
237
|
-
snippet = self.
|
|
237
|
+
snippet = self._create_snippet_entity_from_git_files(
|
|
238
238
|
qualified_name, snippet_content, language, state, path_to_file_map
|
|
239
239
|
)
|
|
240
240
|
snippets.append(snippet)
|
|
@@ -247,8 +247,8 @@ class Slicer:
|
|
|
247
247
|
return False
|
|
248
248
|
|
|
249
249
|
try:
|
|
250
|
-
return (
|
|
251
|
-
|
|
250
|
+
return language == LanguageMapping.get_language_for_extension(
|
|
251
|
+
file_extension
|
|
252
252
|
)
|
|
253
253
|
except ValueError:
|
|
254
254
|
# Extension not supported, so it doesn't match any language
|
|
@@ -614,7 +614,8 @@ class Slicer:
|
|
|
614
614
|
if callers:
|
|
615
615
|
snippet_lines.append("")
|
|
616
616
|
snippet_lines.append("# === USAGE EXAMPLES ===")
|
|
617
|
-
|
|
617
|
+
# Show up to 2 examples, sorted for deterministic order
|
|
618
|
+
for caller in sorted(callers)[:2]:
|
|
618
619
|
call_line = self._find_function_call_line(
|
|
619
620
|
caller, function_name, state, file_contents
|
|
620
621
|
)
|
|
@@ -625,37 +626,37 @@ class Slicer:
|
|
|
625
626
|
|
|
626
627
|
return "\n".join(snippet_lines)
|
|
627
628
|
|
|
628
|
-
def
|
|
629
|
+
def _create_snippet_entity_from_git_files(
|
|
629
630
|
self,
|
|
630
631
|
qualified_name: str,
|
|
631
632
|
snippet_content: str,
|
|
632
633
|
language: str,
|
|
633
634
|
state: AnalyzerState,
|
|
634
|
-
path_to_file_map: dict[Path,
|
|
635
|
-
) ->
|
|
635
|
+
path_to_file_map: dict[Path, GitFile],
|
|
636
|
+
) -> SnippetV2:
|
|
636
637
|
"""Create a Snippet domain entity from extracted content."""
|
|
637
638
|
# Determine all files that this snippet derives from
|
|
638
|
-
derives_from_files = self.
|
|
639
|
+
derives_from_files = self._find_source_files_for_snippet_from_git_files(
|
|
639
640
|
qualified_name, snippet_content, state, path_to_file_map
|
|
640
641
|
)
|
|
641
642
|
|
|
642
643
|
# Create the snippet entity
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
644
|
+
return SnippetV2(
|
|
645
|
+
derives_from=derives_from_files,
|
|
646
|
+
content=snippet_content,
|
|
647
|
+
extension=language,
|
|
648
|
+
sha=SnippetV2.compute_sha(snippet_content),
|
|
649
|
+
)
|
|
649
650
|
|
|
650
|
-
def
|
|
651
|
+
def _find_source_files_for_snippet_from_git_files(
|
|
651
652
|
self,
|
|
652
653
|
qualified_name: str,
|
|
653
654
|
snippet_content: str,
|
|
654
655
|
state: AnalyzerState,
|
|
655
|
-
path_to_file_map: dict[Path,
|
|
656
|
-
) -> list[
|
|
656
|
+
path_to_file_map: dict[Path, GitFile],
|
|
657
|
+
) -> list[GitFile]:
|
|
657
658
|
"""Find all source files that a snippet derives from."""
|
|
658
|
-
source_files: list[
|
|
659
|
+
source_files: list[GitFile] = []
|
|
659
660
|
source_file_paths: set[Path] = set()
|
|
660
661
|
|
|
661
662
|
# Add the primary function's file
|
|
@@ -835,7 +836,7 @@ class Slicer:
|
|
|
835
836
|
# Add direct dependencies
|
|
836
837
|
to_visit.extend(
|
|
837
838
|
(callee, depth + 1)
|
|
838
|
-
for callee in state.call_graph.get(current, set())
|
|
839
|
+
for callee in sorted(state.call_graph.get(current, set()))
|
|
839
840
|
if callee not in visited and callee in state.def_index
|
|
840
841
|
)
|
|
841
842
|
|
|
@@ -850,26 +851,26 @@ class Slicer:
|
|
|
850
851
|
in_degree: dict[str, int] = defaultdict(int)
|
|
851
852
|
graph: dict[str, set[str]] = defaultdict(set)
|
|
852
853
|
|
|
853
|
-
for func in functions:
|
|
854
|
-
for callee in state.call_graph.get(func, set()):
|
|
854
|
+
for func in sorted(functions):
|
|
855
|
+
for callee in sorted(state.call_graph.get(func, set())):
|
|
855
856
|
if callee in functions:
|
|
856
857
|
graph[func].add(callee)
|
|
857
858
|
in_degree[callee] += 1
|
|
858
859
|
|
|
859
860
|
# Find roots
|
|
860
|
-
queue = [f for f in functions if in_degree[f] == 0]
|
|
861
|
+
queue = [f for f in sorted(functions) if in_degree[f] == 0]
|
|
861
862
|
result = []
|
|
862
863
|
|
|
863
864
|
while queue:
|
|
864
865
|
current = queue.pop(0)
|
|
865
866
|
result.append(current)
|
|
866
|
-
for neighbor in graph[current]:
|
|
867
|
+
for neighbor in sorted(graph[current]):
|
|
867
868
|
in_degree[neighbor] -= 1
|
|
868
869
|
if in_degree[neighbor] == 0:
|
|
869
870
|
queue.append(neighbor)
|
|
870
871
|
|
|
871
872
|
# Add any remaining (cycles)
|
|
872
|
-
for func in functions:
|
|
873
|
+
for func in sorted(functions):
|
|
873
874
|
if func not in result:
|
|
874
875
|
result.append(func)
|
|
875
876
|
|