kodit 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +59 -24
- kodit/application/factories/reporting_factory.py +16 -7
- kodit/application/factories/server_factory.py +311 -0
- kodit/application/services/code_search_application_service.py +144 -0
- kodit/application/services/commit_indexing_application_service.py +543 -0
- kodit/application/services/indexing_worker_service.py +13 -46
- kodit/application/services/queue_service.py +24 -3
- kodit/application/services/reporting.py +70 -54
- kodit/application/services/sync_scheduler.py +15 -31
- kodit/cli.py +2 -763
- kodit/cli_utils.py +2 -9
- kodit/config.py +3 -96
- kodit/database.py +38 -1
- kodit/domain/entities/__init__.py +276 -0
- kodit/domain/entities/git.py +190 -0
- kodit/domain/factories/__init__.py +1 -0
- kodit/domain/factories/git_repo_factory.py +76 -0
- kodit/domain/protocols.py +270 -46
- kodit/domain/services/bm25_service.py +5 -1
- kodit/domain/services/embedding_service.py +3 -0
- kodit/domain/services/git_repository_service.py +429 -0
- kodit/domain/services/git_service.py +300 -0
- kodit/domain/services/task_status_query_service.py +19 -0
- kodit/domain/value_objects.py +113 -147
- kodit/infrastructure/api/client/__init__.py +0 -2
- kodit/infrastructure/api/v1/__init__.py +0 -4
- kodit/infrastructure/api/v1/dependencies.py +105 -44
- kodit/infrastructure/api/v1/routers/__init__.py +0 -6
- kodit/infrastructure/api/v1/routers/commits.py +271 -0
- kodit/infrastructure/api/v1/routers/queue.py +2 -2
- kodit/infrastructure/api/v1/routers/repositories.py +282 -0
- kodit/infrastructure/api/v1/routers/search.py +31 -14
- kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
- kodit/infrastructure/api/v1/schemas/commit.py +96 -0
- kodit/infrastructure/api/v1/schemas/context.py +2 -0
- kodit/infrastructure/api/v1/schemas/repository.py +128 -0
- kodit/infrastructure/api/v1/schemas/search.py +12 -9
- kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
- kodit/infrastructure/api/v1/schemas/tag.py +31 -0
- kodit/infrastructure/api/v1/schemas/task_status.py +41 -0
- kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
- kodit/infrastructure/cloning/git/git_python_adaptor.py +467 -0
- kodit/infrastructure/cloning/git/working_copy.py +10 -3
- kodit/infrastructure/embedding/embedding_factory.py +3 -2
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
- kodit/infrastructure/enrichment/litellm_enrichment_provider.py +19 -26
- kodit/infrastructure/enrichment/local_enrichment_provider.py +41 -30
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/git_mapper.py +193 -0
- kodit/infrastructure/mappers/snippet_mapper.py +106 -0
- kodit/infrastructure/mappers/task_mapper.py +5 -44
- kodit/infrastructure/mappers/task_status_mapper.py +85 -0
- kodit/infrastructure/reporting/db_progress.py +23 -0
- kodit/infrastructure/reporting/log_progress.py +13 -38
- kodit/infrastructure/reporting/telemetry_progress.py +21 -0
- kodit/infrastructure/slicing/slicer.py +32 -31
- kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
- kodit/infrastructure/sqlalchemy/entities.py +428 -131
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +263 -0
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +337 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +252 -0
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +257 -0
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +484 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
- kodit/infrastructure/sqlalchemy/task_status_repository.py +91 -0
- kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
- kodit/mcp.py +12 -26
- kodit/migrations/env.py +1 -1
- kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
- kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
- kodit/migrations/versions/b9cd1c3fd762_add_task_status.py +77 -0
- kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
- kodit/py.typed +0 -0
- kodit/utils/dump_openapi.py +7 -4
- kodit/utils/path_utils.py +29 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/METADATA +3 -3
- kodit-0.5.0.dist-info/RECORD +137 -0
- kodit/application/factories/code_indexing_factory.py +0 -193
- kodit/application/services/auto_indexing_service.py +0 -103
- kodit/application/services/code_indexing_application_service.py +0 -393
- kodit/domain/entities.py +0 -323
- kodit/domain/services/index_query_service.py +0 -70
- kodit/domain/services/index_service.py +0 -267
- kodit/infrastructure/api/client/index_client.py +0 -57
- kodit/infrastructure/api/v1/routers/indexes.py +0 -119
- kodit/infrastructure/api/v1/schemas/index.py +0 -101
- kodit/infrastructure/bm25/bm25_factory.py +0 -28
- kodit/infrastructure/cloning/__init__.py +0 -1
- kodit/infrastructure/cloning/metadata.py +0 -98
- kodit/infrastructure/mappers/index_mapper.py +0 -345
- kodit/infrastructure/reporting/tdqm_progress.py +0 -73
- kodit/infrastructure/slicing/language_detection_service.py +0 -18
- kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
- kodit-0.4.2.dist-info/RECORD +0 -119
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/WHEEL +0 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/licenses/LICENSE +0 -0
kodit/cli_utils.py
CHANGED
|
@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Any
|
|
|
6
6
|
|
|
7
7
|
import click
|
|
8
8
|
|
|
9
|
-
from kodit.infrastructure.api.client import
|
|
9
|
+
from kodit.infrastructure.api.client import SearchClient
|
|
10
10
|
|
|
11
11
|
if TYPE_CHECKING:
|
|
12
12
|
from kodit.config import AppContext
|
|
@@ -37,7 +37,7 @@ def with_client(f: Callable) -> Callable:
|
|
|
37
37
|
inner_func = getattr(
|
|
38
38
|
getattr(session_wrapped, "__wrapped__", session_wrapped),
|
|
39
39
|
"__wrapped__",
|
|
40
|
-
session_wrapped
|
|
40
|
+
session_wrapped,
|
|
41
41
|
)
|
|
42
42
|
|
|
43
43
|
# Get database session manually
|
|
@@ -47,13 +47,6 @@ def with_client(f: Callable) -> Callable:
|
|
|
47
47
|
else:
|
|
48
48
|
# Remote mode - use API clients
|
|
49
49
|
clients = {
|
|
50
|
-
"index_client": IndexClient(
|
|
51
|
-
base_url=app_context.remote.server_url or "",
|
|
52
|
-
api_key=app_context.remote.api_key,
|
|
53
|
-
timeout=app_context.remote.timeout,
|
|
54
|
-
max_retries=app_context.remote.max_retries,
|
|
55
|
-
verify_ssl=app_context.remote.verify_ssl,
|
|
56
|
-
),
|
|
57
50
|
"search_client": SearchClient(
|
|
58
51
|
base_url=app_context.remote.server_url or "",
|
|
59
52
|
api_key=app_context.remote.api_key,
|
kodit/config.py
CHANGED
|
@@ -14,9 +14,7 @@ import structlog
|
|
|
14
14
|
from pydantic import BaseModel, Field, field_validator
|
|
15
15
|
from pydantic_settings import (
|
|
16
16
|
BaseSettings,
|
|
17
|
-
EnvSettingsSource,
|
|
18
17
|
NoDecode,
|
|
19
|
-
PydanticBaseSettingsSource,
|
|
20
18
|
SettingsConfigDict,
|
|
21
19
|
)
|
|
22
20
|
|
|
@@ -91,46 +89,12 @@ class Search(BaseModel):
|
|
|
91
89
|
provider: Literal["sqlite", "vectorchord"] = Field(default="sqlite")
|
|
92
90
|
|
|
93
91
|
|
|
94
|
-
class AutoIndexingSource(BaseModel):
|
|
95
|
-
"""Configuration for a single auto-indexing source."""
|
|
96
|
-
|
|
97
|
-
uri: str = Field(description="URI of the source to index (git URL or local path)")
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
class AutoIndexingConfig(BaseModel):
|
|
101
|
-
"""Configuration for auto-indexing."""
|
|
102
|
-
|
|
103
|
-
sources: list[AutoIndexingSource] = Field(
|
|
104
|
-
default_factory=list, description="List of sources to auto-index"
|
|
105
|
-
)
|
|
106
|
-
|
|
107
|
-
@field_validator("sources", mode="before")
|
|
108
|
-
@classmethod
|
|
109
|
-
def parse_sources(cls, v: Any) -> Any:
|
|
110
|
-
"""Parse sources from environment variables or other formats."""
|
|
111
|
-
if v is None:
|
|
112
|
-
return []
|
|
113
|
-
if isinstance(v, list):
|
|
114
|
-
return v
|
|
115
|
-
if isinstance(v, dict):
|
|
116
|
-
# Handle case where env vars are numbered keys like {'0': {'uri': '...'}}
|
|
117
|
-
sources = []
|
|
118
|
-
i = 0
|
|
119
|
-
while str(i) in v:
|
|
120
|
-
source_data = v[str(i)]
|
|
121
|
-
if isinstance(source_data, dict) and "uri" in source_data:
|
|
122
|
-
sources.append(AutoIndexingSource(uri=source_data["uri"]))
|
|
123
|
-
i += 1
|
|
124
|
-
return sources
|
|
125
|
-
return v
|
|
126
|
-
|
|
127
|
-
|
|
128
92
|
class PeriodicSyncConfig(BaseModel):
|
|
129
93
|
"""Configuration for periodic/scheduled syncing."""
|
|
130
94
|
|
|
131
95
|
enabled: bool = Field(default=True, description="Enable periodic sync")
|
|
132
96
|
interval_seconds: float = Field(
|
|
133
|
-
default=1800, description="Interval between
|
|
97
|
+
default=1800, description="Interval between periodic syncs in seconds"
|
|
134
98
|
)
|
|
135
99
|
retry_attempts: int = Field(
|
|
136
100
|
default=3, description="Number of retry attempts for failed syncs"
|
|
@@ -147,36 +111,6 @@ class RemoteConfig(BaseModel):
|
|
|
147
111
|
verify_ssl: bool = Field(default=True, description="Verify SSL certificates")
|
|
148
112
|
|
|
149
113
|
|
|
150
|
-
class CustomAutoIndexingEnvSource(EnvSettingsSource):
|
|
151
|
-
"""Custom environment source for parsing AutoIndexingConfig."""
|
|
152
|
-
|
|
153
|
-
def __call__(self) -> dict[str, Any]:
|
|
154
|
-
"""Load settings from env vars with custom auto-indexing parsing."""
|
|
155
|
-
d: dict[str, Any] = {}
|
|
156
|
-
|
|
157
|
-
# First get the standard env vars
|
|
158
|
-
env_vars = super().__call__()
|
|
159
|
-
d.update(env_vars)
|
|
160
|
-
|
|
161
|
-
# Custom parsing for auto-indexing sources
|
|
162
|
-
auto_indexing_sources = []
|
|
163
|
-
i = 0
|
|
164
|
-
while True:
|
|
165
|
-
# Note: env_vars keys are lowercase due to Pydantic Settings normalization
|
|
166
|
-
uri_key = f"auto_indexing_sources_{i}_uri"
|
|
167
|
-
if uri_key in self.env_vars:
|
|
168
|
-
uri_value = self.env_vars[uri_key]
|
|
169
|
-
auto_indexing_sources.append({"uri": uri_value})
|
|
170
|
-
i += 1
|
|
171
|
-
else:
|
|
172
|
-
break
|
|
173
|
-
|
|
174
|
-
if auto_indexing_sources:
|
|
175
|
-
d["auto_indexing"] = {"sources": auto_indexing_sources}
|
|
176
|
-
|
|
177
|
-
return d
|
|
178
|
-
|
|
179
|
-
|
|
180
114
|
class AppContext(BaseSettings):
|
|
181
115
|
"""Global context for the kodit project. Provides a shared state for the app."""
|
|
182
116
|
|
|
@@ -189,30 +123,6 @@ class AppContext(BaseSettings):
|
|
|
189
123
|
extra="ignore",
|
|
190
124
|
)
|
|
191
125
|
|
|
192
|
-
@classmethod
|
|
193
|
-
def settings_customise_sources(
|
|
194
|
-
cls,
|
|
195
|
-
settings_cls: type[BaseSettings],
|
|
196
|
-
init_settings: PydanticBaseSettingsSource,
|
|
197
|
-
env_settings: PydanticBaseSettingsSource, # noqa: ARG003
|
|
198
|
-
dotenv_settings: PydanticBaseSettingsSource,
|
|
199
|
-
file_secret_settings: PydanticBaseSettingsSource,
|
|
200
|
-
) -> tuple[PydanticBaseSettingsSource, ...]:
|
|
201
|
-
"""Customize settings sources to use custom auto-indexing parsing."""
|
|
202
|
-
custom_env_settings = CustomAutoIndexingEnvSource(
|
|
203
|
-
settings_cls,
|
|
204
|
-
env_nested_delimiter=settings_cls.model_config.get("env_nested_delimiter"),
|
|
205
|
-
env_ignore_empty=settings_cls.model_config.get("env_ignore_empty", False),
|
|
206
|
-
env_parse_none_str=settings_cls.model_config.get("env_parse_none_str", ""),
|
|
207
|
-
env_parse_enums=settings_cls.model_config.get("env_parse_enums", None),
|
|
208
|
-
)
|
|
209
|
-
return (
|
|
210
|
-
init_settings,
|
|
211
|
-
custom_env_settings,
|
|
212
|
-
dotenv_settings,
|
|
213
|
-
file_secret_settings,
|
|
214
|
-
)
|
|
215
|
-
|
|
216
126
|
data_dir: Path = Field(default=DEFAULT_BASE_DIR)
|
|
217
127
|
db_url: str = Field(
|
|
218
128
|
default_factory=lambda data: f"sqlite+aiosqlite:///{data['data_dir']}/kodit.db"
|
|
@@ -231,9 +141,6 @@ class AppContext(BaseSettings):
|
|
|
231
141
|
default_search: Search = Field(
|
|
232
142
|
default=Search(),
|
|
233
143
|
)
|
|
234
|
-
auto_indexing: AutoIndexingConfig | None = Field(
|
|
235
|
-
default=AutoIndexingConfig(), description="Auto-indexing configuration"
|
|
236
|
-
)
|
|
237
144
|
periodic_sync: PeriodicSyncConfig = Field(
|
|
238
145
|
default=PeriodicSyncConfig(), description="Periodic sync configuration"
|
|
239
146
|
)
|
|
@@ -305,7 +212,7 @@ class AppContext(BaseSettings):
|
|
|
305
212
|
with_app_context = click.make_pass_decorator(AppContext)
|
|
306
213
|
|
|
307
214
|
|
|
308
|
-
def wrap_async(f: Callable[..., Coroutine[Any, Any, T]]) -> Callable[..., T]:
|
|
215
|
+
def wrap_async[T](f: Callable[..., Coroutine[Any, Any, T]]) -> Callable[..., T]:
|
|
309
216
|
"""Decorate async Click commands.
|
|
310
217
|
|
|
311
218
|
This decorator wraps an async function to run it with asyncio.run().
|
|
@@ -326,7 +233,7 @@ def wrap_async(f: Callable[..., Coroutine[Any, Any, T]]) -> Callable[..., T]:
|
|
|
326
233
|
return wrapper
|
|
327
234
|
|
|
328
235
|
|
|
329
|
-
def with_session(f: Callable[..., Coroutine[Any, Any, T]]) -> Callable[..., T]:
|
|
236
|
+
def with_session[T](f: Callable[..., Coroutine[Any, Any, T]]) -> Callable[..., T]:
|
|
330
237
|
"""Provide a database session to CLI commands."""
|
|
331
238
|
|
|
332
239
|
@wraps(f)
|
kodit/database.py
CHANGED
|
@@ -2,10 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
from collections.abc import Callable
|
|
4
4
|
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
5
6
|
|
|
6
7
|
import structlog
|
|
7
8
|
from alembic import command
|
|
8
9
|
from alembic.config import Config as AlembicConfig
|
|
10
|
+
from sqlalchemy import event
|
|
9
11
|
from sqlalchemy.ext.asyncio import (
|
|
10
12
|
AsyncSession,
|
|
11
13
|
async_sessionmaker,
|
|
@@ -21,7 +23,42 @@ class Database:
|
|
|
21
23
|
def __init__(self, db_url: str) -> None:
|
|
22
24
|
"""Initialize the database."""
|
|
23
25
|
self.log = structlog.get_logger(__name__)
|
|
24
|
-
|
|
26
|
+
|
|
27
|
+
# Configure SQLite-specific connection arguments to prevent locking issues
|
|
28
|
+
connect_args = {}
|
|
29
|
+
if "sqlite" in db_url.lower():
|
|
30
|
+
connect_args = {
|
|
31
|
+
"timeout": 20, # 20 second timeout for database operations
|
|
32
|
+
"check_same_thread": False, # Allow use from different threads
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
self.db_engine = create_async_engine(
|
|
36
|
+
db_url,
|
|
37
|
+
echo=False,
|
|
38
|
+
connect_args=connect_args,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# Configure SQLite pragmas for better concurrency and performance
|
|
42
|
+
if "sqlite" in db_url.lower():
|
|
43
|
+
|
|
44
|
+
@event.listens_for(self.db_engine.sync_engine, "connect")
|
|
45
|
+
def set_sqlite_pragma(
|
|
46
|
+
dbapi_connection: Any, connection_record: Any
|
|
47
|
+
) -> None:
|
|
48
|
+
del (
|
|
49
|
+
connection_record
|
|
50
|
+
) # Unused but required by SQLAlchemy event interface
|
|
51
|
+
cursor = dbapi_connection.cursor()
|
|
52
|
+
# Enable WAL mode for better concurrency
|
|
53
|
+
cursor.execute("PRAGMA journal_mode=WAL")
|
|
54
|
+
# Set busy timeout to prevent immediate locking failures
|
|
55
|
+
cursor.execute("PRAGMA busy_timeout=20000")
|
|
56
|
+
# Enable foreign key constraints
|
|
57
|
+
cursor.execute("PRAGMA foreign_keys=ON")
|
|
58
|
+
# Optimize for speed over safety (acceptable for indexing workloads)
|
|
59
|
+
cursor.execute("PRAGMA synchronous=NORMAL")
|
|
60
|
+
cursor.close()
|
|
61
|
+
|
|
25
62
|
self.db_session_factory = async_sessionmaker(
|
|
26
63
|
self.db_engine,
|
|
27
64
|
class_=AsyncSession,
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
"""Pure domain entities using Pydantic."""
|
|
2
|
+
|
|
3
|
+
from datetime import UTC, datetime
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Protocol
|
|
6
|
+
from urllib.parse import urlparse, urlunparse
|
|
7
|
+
|
|
8
|
+
from pydantic import AnyUrl, BaseModel
|
|
9
|
+
|
|
10
|
+
from kodit.domain.value_objects import (
|
|
11
|
+
ReportingState,
|
|
12
|
+
TaskOperation,
|
|
13
|
+
TrackableType,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class IgnorePatternProvider(Protocol):
|
|
18
|
+
"""Protocol for ignore pattern providers."""
|
|
19
|
+
|
|
20
|
+
def should_ignore(self, path: Path) -> bool:
|
|
21
|
+
"""Check if a path should be ignored."""
|
|
22
|
+
...
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Author(BaseModel):
|
|
26
|
+
"""Author domain entity."""
|
|
27
|
+
|
|
28
|
+
id: int | None = None
|
|
29
|
+
name: str
|
|
30
|
+
email: str
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class WorkingCopy(BaseModel):
|
|
34
|
+
"""Working copy value object representing cloned source location."""
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def sanitize_local_path(cls, path: str) -> AnyUrl:
|
|
38
|
+
"""Sanitize a local path."""
|
|
39
|
+
return AnyUrl(Path(path).resolve().absolute().as_uri())
|
|
40
|
+
|
|
41
|
+
@classmethod
|
|
42
|
+
def sanitize_git_url(cls, url: str) -> AnyUrl:
|
|
43
|
+
"""Remove credentials from a git URL while preserving the rest of the URL.
|
|
44
|
+
|
|
45
|
+
This function handles various git URL formats:
|
|
46
|
+
- HTTPS URLs with username:password@host
|
|
47
|
+
- HTTPS URLs with username@host (no password)
|
|
48
|
+
- SSH URLs (left unchanged)
|
|
49
|
+
- File URLs (left unchanged)
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
url: The git URL that may contain credentials.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
The sanitized URL with credentials removed.
|
|
56
|
+
|
|
57
|
+
Examples:
|
|
58
|
+
>>> sanitize_git_url("https://phil:token@dev.azure.com/org/project/_git/repo")
|
|
59
|
+
"https://dev.azure.com/org/project/_git/repo"
|
|
60
|
+
>>> sanitize_git_url("https://username@github.com/user/repo.git")
|
|
61
|
+
"https://github.com/user/repo.git"
|
|
62
|
+
>>> sanitize_git_url("git@github.com:user/repo.git")
|
|
63
|
+
"ssh://git@github.com/user/repo.git"
|
|
64
|
+
|
|
65
|
+
"""
|
|
66
|
+
# Handle different URL types
|
|
67
|
+
if not url:
|
|
68
|
+
raise ValueError("URL is required")
|
|
69
|
+
|
|
70
|
+
if url.startswith("git@"):
|
|
71
|
+
return cls._handle_ssh_url(url)
|
|
72
|
+
if url.startswith("ssh://"):
|
|
73
|
+
return AnyUrl(url)
|
|
74
|
+
if url.startswith("file://"):
|
|
75
|
+
return AnyUrl(url)
|
|
76
|
+
|
|
77
|
+
# Try local path conversion
|
|
78
|
+
local_url = cls._try_local_path_conversion(url)
|
|
79
|
+
if local_url:
|
|
80
|
+
return local_url
|
|
81
|
+
|
|
82
|
+
# Handle HTTPS URLs with credentials
|
|
83
|
+
return cls._sanitize_https_url(url)
|
|
84
|
+
|
|
85
|
+
@classmethod
|
|
86
|
+
def _handle_ssh_url(cls, url: str) -> AnyUrl:
|
|
87
|
+
"""Handle SSH URL conversion."""
|
|
88
|
+
if ":" in url and not url.startswith("ssh://"):
|
|
89
|
+
host_path = url[4:] # Remove "git@"
|
|
90
|
+
if ":" in host_path:
|
|
91
|
+
host, path = host_path.split(":", 1)
|
|
92
|
+
return AnyUrl(f"ssh://git@{host}/{path}")
|
|
93
|
+
return AnyUrl(url)
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def _try_local_path_conversion(cls, url: str) -> AnyUrl | None:
|
|
97
|
+
"""Try to convert local paths to file:// URLs."""
|
|
98
|
+
from pathlib import Path
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
path = Path(url)
|
|
102
|
+
if path.exists() or url.startswith(("/", "./", "../")) or url == ".":
|
|
103
|
+
absolute_path = path.resolve()
|
|
104
|
+
return AnyUrl(f"file://{absolute_path}")
|
|
105
|
+
except OSError:
|
|
106
|
+
# Path operations failed, not a local path
|
|
107
|
+
pass
|
|
108
|
+
return None
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def _sanitize_https_url(cls, url: str) -> AnyUrl:
|
|
112
|
+
"""Remove credentials from HTTPS URLs."""
|
|
113
|
+
try:
|
|
114
|
+
parsed = urlparse(url)
|
|
115
|
+
|
|
116
|
+
# If there are no credentials, return the URL as-is
|
|
117
|
+
if not parsed.username:
|
|
118
|
+
return AnyUrl(url)
|
|
119
|
+
|
|
120
|
+
# Reconstruct the URL without credentials
|
|
121
|
+
sanitized_netloc = parsed.hostname
|
|
122
|
+
if parsed.port:
|
|
123
|
+
sanitized_netloc = f"{parsed.hostname}:{parsed.port}"
|
|
124
|
+
|
|
125
|
+
return AnyUrl(
|
|
126
|
+
urlunparse(
|
|
127
|
+
(
|
|
128
|
+
parsed.scheme,
|
|
129
|
+
sanitized_netloc,
|
|
130
|
+
parsed.path,
|
|
131
|
+
parsed.params,
|
|
132
|
+
parsed.query,
|
|
133
|
+
parsed.fragment,
|
|
134
|
+
)
|
|
135
|
+
)
|
|
136
|
+
)
|
|
137
|
+
except Exception as e:
|
|
138
|
+
raise ValueError(f"Invalid URL: {url}") from e
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class Source(BaseModel):
|
|
142
|
+
"""Source domain entity."""
|
|
143
|
+
|
|
144
|
+
id: int | None = None # Is populated by repository
|
|
145
|
+
created_at: datetime | None = None # Is populated by repository
|
|
146
|
+
updated_at: datetime | None = None # Is populated by repository
|
|
147
|
+
working_copy: WorkingCopy
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class Task(BaseModel):
|
|
151
|
+
"""Represents an item in the queue waiting to be processed.
|
|
152
|
+
|
|
153
|
+
If the item exists, that means it is in the queue and waiting to be processed. There
|
|
154
|
+
is no status associated.
|
|
155
|
+
"""
|
|
156
|
+
|
|
157
|
+
id: str # Is a unique key to deduplicate items in the queue
|
|
158
|
+
type: TaskOperation # Task operation
|
|
159
|
+
priority: int # Priority (higher number = higher priority)
|
|
160
|
+
payload: dict[str, Any] # Task-specific data
|
|
161
|
+
|
|
162
|
+
created_at: datetime | None = None # Is populated by repository
|
|
163
|
+
updated_at: datetime | None = None # Is populated by repository
|
|
164
|
+
|
|
165
|
+
@staticmethod
|
|
166
|
+
def create(
|
|
167
|
+
operation: TaskOperation, priority: int, payload: dict[str, Any]
|
|
168
|
+
) -> "Task":
|
|
169
|
+
"""Create a task."""
|
|
170
|
+
return Task(
|
|
171
|
+
id=Task.create_id(operation, payload),
|
|
172
|
+
type=operation,
|
|
173
|
+
priority=priority,
|
|
174
|
+
payload=payload,
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
@staticmethod
|
|
178
|
+
def create_id(operation: TaskOperation, payload: dict[str, Any]) -> str:
|
|
179
|
+
"""Create a unique id for a task."""
|
|
180
|
+
first_id = next(iter(payload.values()), None)
|
|
181
|
+
return f"{operation}:{first_id}"
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
class TaskStatus(BaseModel):
|
|
185
|
+
"""Task status domain entity."""
|
|
186
|
+
|
|
187
|
+
id: str
|
|
188
|
+
state: ReportingState
|
|
189
|
+
operation: TaskOperation
|
|
190
|
+
message: str = ""
|
|
191
|
+
|
|
192
|
+
created_at: datetime = datetime.now(UTC)
|
|
193
|
+
updated_at: datetime = datetime.now(UTC)
|
|
194
|
+
total: int = 0
|
|
195
|
+
current: int = 0
|
|
196
|
+
|
|
197
|
+
error: str | None = None
|
|
198
|
+
parent: "TaskStatus | None" = None
|
|
199
|
+
trackable_id: int | None = None
|
|
200
|
+
trackable_type: TrackableType | None = None
|
|
201
|
+
|
|
202
|
+
@staticmethod
|
|
203
|
+
def create(
|
|
204
|
+
operation: TaskOperation,
|
|
205
|
+
parent: "TaskStatus | None" = None,
|
|
206
|
+
trackable_type: TrackableType | None = None,
|
|
207
|
+
trackable_id: int | None = None,
|
|
208
|
+
) -> "TaskStatus":
|
|
209
|
+
"""Create a task status."""
|
|
210
|
+
return TaskStatus(
|
|
211
|
+
id=TaskStatus._create_id(operation, trackable_type, trackable_id),
|
|
212
|
+
operation=operation,
|
|
213
|
+
parent=parent,
|
|
214
|
+
trackable_type=trackable_type,
|
|
215
|
+
trackable_id=trackable_id,
|
|
216
|
+
state=ReportingState.STARTED,
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
@staticmethod
|
|
220
|
+
def _create_id(
|
|
221
|
+
step: TaskOperation,
|
|
222
|
+
trackable_type: TrackableType | None = None,
|
|
223
|
+
trackable_id: int | None = None,
|
|
224
|
+
) -> str:
|
|
225
|
+
"""Create a unique id for a task."""
|
|
226
|
+
result = []
|
|
227
|
+
# Nice to be prefixed by tracking information if it exists
|
|
228
|
+
if trackable_type:
|
|
229
|
+
result.append(str(trackable_type))
|
|
230
|
+
if trackable_id:
|
|
231
|
+
result.append(str(trackable_id))
|
|
232
|
+
result.append(str(step))
|
|
233
|
+
return "-".join(result)
|
|
234
|
+
|
|
235
|
+
@property
|
|
236
|
+
def completion_percent(self) -> float:
|
|
237
|
+
"""Calculate the percentage of completion."""
|
|
238
|
+
if self.total == 0:
|
|
239
|
+
return 0.0
|
|
240
|
+
return min(100.0, max(0.0, (self.current / self.total) * 100.0))
|
|
241
|
+
|
|
242
|
+
def skip(self, message: str) -> None:
|
|
243
|
+
"""Skip the task."""
|
|
244
|
+
self.state = ReportingState.SKIPPED
|
|
245
|
+
self.message = message
|
|
246
|
+
|
|
247
|
+
def fail(self, error: str) -> None:
|
|
248
|
+
"""Fail the task."""
|
|
249
|
+
self.state = ReportingState.FAILED
|
|
250
|
+
self.error = error
|
|
251
|
+
|
|
252
|
+
def set_total(self, total: int) -> None:
|
|
253
|
+
"""Set the total for the step."""
|
|
254
|
+
self.total = total
|
|
255
|
+
|
|
256
|
+
def set_current(self, current: int, message: str | None = None) -> None:
|
|
257
|
+
"""Progress the step."""
|
|
258
|
+
self.state = ReportingState.IN_PROGRESS
|
|
259
|
+
self.current = current
|
|
260
|
+
if message:
|
|
261
|
+
self.message = message
|
|
262
|
+
|
|
263
|
+
def set_tracking_info(
|
|
264
|
+
self, trackable_id: int, trackable_type: TrackableType
|
|
265
|
+
) -> None:
|
|
266
|
+
"""Set the tracking info."""
|
|
267
|
+
self.trackable_id = trackable_id
|
|
268
|
+
self.trackable_type = trackable_type
|
|
269
|
+
|
|
270
|
+
def complete(self) -> None:
|
|
271
|
+
"""Complete the task."""
|
|
272
|
+
if ReportingState.is_terminal(self.state):
|
|
273
|
+
return # Already in terminal state
|
|
274
|
+
|
|
275
|
+
self.state = ReportingState.COMPLETED
|
|
276
|
+
self.current = self.total # Ensure progress shows 100%
|