kodit 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +53 -23
- kodit/application/factories/reporting_factory.py +6 -2
- kodit/application/factories/server_factory.py +311 -0
- kodit/application/services/code_search_application_service.py +144 -0
- kodit/application/services/commit_indexing_application_service.py +543 -0
- kodit/application/services/indexing_worker_service.py +13 -44
- kodit/application/services/queue_service.py +24 -3
- kodit/application/services/reporting.py +0 -2
- kodit/application/services/sync_scheduler.py +15 -31
- kodit/cli.py +2 -753
- kodit/cli_utils.py +2 -9
- kodit/config.py +1 -94
- kodit/database.py +38 -1
- kodit/domain/{entities.py → entities/__init__.py} +50 -195
- kodit/domain/entities/git.py +190 -0
- kodit/domain/factories/__init__.py +1 -0
- kodit/domain/factories/git_repo_factory.py +76 -0
- kodit/domain/protocols.py +263 -64
- kodit/domain/services/bm25_service.py +5 -1
- kodit/domain/services/embedding_service.py +3 -0
- kodit/domain/services/git_repository_service.py +429 -0
- kodit/domain/services/git_service.py +300 -0
- kodit/domain/services/task_status_query_service.py +2 -2
- kodit/domain/value_objects.py +83 -114
- kodit/infrastructure/api/client/__init__.py +0 -2
- kodit/infrastructure/api/v1/__init__.py +0 -4
- kodit/infrastructure/api/v1/dependencies.py +92 -46
- kodit/infrastructure/api/v1/routers/__init__.py +0 -6
- kodit/infrastructure/api/v1/routers/commits.py +271 -0
- kodit/infrastructure/api/v1/routers/queue.py +2 -2
- kodit/infrastructure/api/v1/routers/repositories.py +282 -0
- kodit/infrastructure/api/v1/routers/search.py +31 -14
- kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
- kodit/infrastructure/api/v1/schemas/commit.py +96 -0
- kodit/infrastructure/api/v1/schemas/context.py +2 -0
- kodit/infrastructure/api/v1/schemas/repository.py +128 -0
- kodit/infrastructure/api/v1/schemas/search.py +12 -9
- kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
- kodit/infrastructure/api/v1/schemas/tag.py +31 -0
- kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
- kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
- kodit/infrastructure/cloning/git/git_python_adaptor.py +467 -0
- kodit/infrastructure/cloning/git/working_copy.py +1 -1
- kodit/infrastructure/embedding/embedding_factory.py +3 -2
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
- kodit/infrastructure/enrichment/litellm_enrichment_provider.py +19 -26
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/git_mapper.py +193 -0
- kodit/infrastructure/mappers/snippet_mapper.py +106 -0
- kodit/infrastructure/mappers/task_mapper.py +5 -44
- kodit/infrastructure/reporting/log_progress.py +8 -5
- kodit/infrastructure/reporting/telemetry_progress.py +21 -0
- kodit/infrastructure/slicing/slicer.py +32 -31
- kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
- kodit/infrastructure/sqlalchemy/entities.py +394 -158
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +263 -0
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +337 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +252 -0
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +257 -0
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +484 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
- kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
- kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
- kodit/mcp.py +12 -30
- kodit/migrations/env.py +1 -0
- kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
- kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
- kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
- kodit/py.typed +0 -0
- kodit/utils/dump_openapi.py +7 -4
- kodit/utils/path_utils.py +29 -0
- {kodit-0.4.3.dist-info → kodit-0.5.0.dist-info}/METADATA +3 -3
- kodit-0.5.0.dist-info/RECORD +137 -0
- kodit/application/factories/code_indexing_factory.py +0 -195
- kodit/application/services/auto_indexing_service.py +0 -99
- kodit/application/services/code_indexing_application_service.py +0 -410
- kodit/domain/services/index_query_service.py +0 -70
- kodit/domain/services/index_service.py +0 -269
- kodit/infrastructure/api/client/index_client.py +0 -57
- kodit/infrastructure/api/v1/routers/indexes.py +0 -164
- kodit/infrastructure/api/v1/schemas/index.py +0 -101
- kodit/infrastructure/bm25/bm25_factory.py +0 -28
- kodit/infrastructure/cloning/__init__.py +0 -1
- kodit/infrastructure/cloning/metadata.py +0 -98
- kodit/infrastructure/mappers/index_mapper.py +0 -345
- kodit/infrastructure/reporting/tdqm_progress.py +0 -38
- kodit/infrastructure/slicing/language_detection_service.py +0 -18
- kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
- kodit-0.4.3.dist-info/RECORD +0 -125
- {kodit-0.4.3.dist-info → kodit-0.5.0.dist-info}/WHEEL +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.0.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.0.dist-info}/licenses/LICENSE +0 -0
kodit/cli_utils.py
CHANGED
|
@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Any
|
|
|
6
6
|
|
|
7
7
|
import click
|
|
8
8
|
|
|
9
|
-
from kodit.infrastructure.api.client import
|
|
9
|
+
from kodit.infrastructure.api.client import SearchClient
|
|
10
10
|
|
|
11
11
|
if TYPE_CHECKING:
|
|
12
12
|
from kodit.config import AppContext
|
|
@@ -37,7 +37,7 @@ def with_client(f: Callable) -> Callable:
|
|
|
37
37
|
inner_func = getattr(
|
|
38
38
|
getattr(session_wrapped, "__wrapped__", session_wrapped),
|
|
39
39
|
"__wrapped__",
|
|
40
|
-
session_wrapped
|
|
40
|
+
session_wrapped,
|
|
41
41
|
)
|
|
42
42
|
|
|
43
43
|
# Get database session manually
|
|
@@ -47,13 +47,6 @@ def with_client(f: Callable) -> Callable:
|
|
|
47
47
|
else:
|
|
48
48
|
# Remote mode - use API clients
|
|
49
49
|
clients = {
|
|
50
|
-
"index_client": IndexClient(
|
|
51
|
-
base_url=app_context.remote.server_url or "",
|
|
52
|
-
api_key=app_context.remote.api_key,
|
|
53
|
-
timeout=app_context.remote.timeout,
|
|
54
|
-
max_retries=app_context.remote.max_retries,
|
|
55
|
-
verify_ssl=app_context.remote.verify_ssl,
|
|
56
|
-
),
|
|
57
50
|
"search_client": SearchClient(
|
|
58
51
|
base_url=app_context.remote.server_url or "",
|
|
59
52
|
api_key=app_context.remote.api_key,
|
kodit/config.py
CHANGED
|
@@ -14,9 +14,7 @@ import structlog
|
|
|
14
14
|
from pydantic import BaseModel, Field, field_validator
|
|
15
15
|
from pydantic_settings import (
|
|
16
16
|
BaseSettings,
|
|
17
|
-
EnvSettingsSource,
|
|
18
17
|
NoDecode,
|
|
19
|
-
PydanticBaseSettingsSource,
|
|
20
18
|
SettingsConfigDict,
|
|
21
19
|
)
|
|
22
20
|
|
|
@@ -91,46 +89,12 @@ class Search(BaseModel):
|
|
|
91
89
|
provider: Literal["sqlite", "vectorchord"] = Field(default="sqlite")
|
|
92
90
|
|
|
93
91
|
|
|
94
|
-
class AutoIndexingSource(BaseModel):
|
|
95
|
-
"""Configuration for a single auto-indexing source."""
|
|
96
|
-
|
|
97
|
-
uri: str = Field(description="URI of the source to index (git URL or local path)")
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
class AutoIndexingConfig(BaseModel):
|
|
101
|
-
"""Configuration for auto-indexing."""
|
|
102
|
-
|
|
103
|
-
sources: list[AutoIndexingSource] = Field(
|
|
104
|
-
default_factory=list, description="List of sources to auto-index"
|
|
105
|
-
)
|
|
106
|
-
|
|
107
|
-
@field_validator("sources", mode="before")
|
|
108
|
-
@classmethod
|
|
109
|
-
def parse_sources(cls, v: Any) -> Any:
|
|
110
|
-
"""Parse sources from environment variables or other formats."""
|
|
111
|
-
if v is None:
|
|
112
|
-
return []
|
|
113
|
-
if isinstance(v, list):
|
|
114
|
-
return v
|
|
115
|
-
if isinstance(v, dict):
|
|
116
|
-
# Handle case where env vars are numbered keys like {'0': {'uri': '...'}}
|
|
117
|
-
sources = []
|
|
118
|
-
i = 0
|
|
119
|
-
while str(i) in v:
|
|
120
|
-
source_data = v[str(i)]
|
|
121
|
-
if isinstance(source_data, dict) and "uri" in source_data:
|
|
122
|
-
sources.append(AutoIndexingSource(uri=source_data["uri"]))
|
|
123
|
-
i += 1
|
|
124
|
-
return sources
|
|
125
|
-
return v
|
|
126
|
-
|
|
127
|
-
|
|
128
92
|
class PeriodicSyncConfig(BaseModel):
|
|
129
93
|
"""Configuration for periodic/scheduled syncing."""
|
|
130
94
|
|
|
131
95
|
enabled: bool = Field(default=True, description="Enable periodic sync")
|
|
132
96
|
interval_seconds: float = Field(
|
|
133
|
-
default=1800, description="Interval between
|
|
97
|
+
default=1800, description="Interval between periodic syncs in seconds"
|
|
134
98
|
)
|
|
135
99
|
retry_attempts: int = Field(
|
|
136
100
|
default=3, description="Number of retry attempts for failed syncs"
|
|
@@ -147,36 +111,6 @@ class RemoteConfig(BaseModel):
|
|
|
147
111
|
verify_ssl: bool = Field(default=True, description="Verify SSL certificates")
|
|
148
112
|
|
|
149
113
|
|
|
150
|
-
class CustomAutoIndexingEnvSource(EnvSettingsSource):
|
|
151
|
-
"""Custom environment source for parsing AutoIndexingConfig."""
|
|
152
|
-
|
|
153
|
-
def __call__(self) -> dict[str, Any]:
|
|
154
|
-
"""Load settings from env vars with custom auto-indexing parsing."""
|
|
155
|
-
d: dict[str, Any] = {}
|
|
156
|
-
|
|
157
|
-
# First get the standard env vars
|
|
158
|
-
env_vars = super().__call__()
|
|
159
|
-
d.update(env_vars)
|
|
160
|
-
|
|
161
|
-
# Custom parsing for auto-indexing sources
|
|
162
|
-
auto_indexing_sources = []
|
|
163
|
-
i = 0
|
|
164
|
-
while True:
|
|
165
|
-
# Note: env_vars keys are lowercase due to Pydantic Settings normalization
|
|
166
|
-
uri_key = f"auto_indexing_sources_{i}_uri"
|
|
167
|
-
if uri_key in self.env_vars:
|
|
168
|
-
uri_value = self.env_vars[uri_key]
|
|
169
|
-
auto_indexing_sources.append({"uri": uri_value})
|
|
170
|
-
i += 1
|
|
171
|
-
else:
|
|
172
|
-
break
|
|
173
|
-
|
|
174
|
-
if auto_indexing_sources:
|
|
175
|
-
d["auto_indexing"] = {"sources": auto_indexing_sources}
|
|
176
|
-
|
|
177
|
-
return d
|
|
178
|
-
|
|
179
|
-
|
|
180
114
|
class AppContext(BaseSettings):
|
|
181
115
|
"""Global context for the kodit project. Provides a shared state for the app."""
|
|
182
116
|
|
|
@@ -189,30 +123,6 @@ class AppContext(BaseSettings):
|
|
|
189
123
|
extra="ignore",
|
|
190
124
|
)
|
|
191
125
|
|
|
192
|
-
@classmethod
|
|
193
|
-
def settings_customise_sources(
|
|
194
|
-
cls,
|
|
195
|
-
settings_cls: type[BaseSettings],
|
|
196
|
-
init_settings: PydanticBaseSettingsSource,
|
|
197
|
-
env_settings: PydanticBaseSettingsSource, # noqa: ARG003
|
|
198
|
-
dotenv_settings: PydanticBaseSettingsSource,
|
|
199
|
-
file_secret_settings: PydanticBaseSettingsSource,
|
|
200
|
-
) -> tuple[PydanticBaseSettingsSource, ...]:
|
|
201
|
-
"""Customize settings sources to use custom auto-indexing parsing."""
|
|
202
|
-
custom_env_settings = CustomAutoIndexingEnvSource(
|
|
203
|
-
settings_cls,
|
|
204
|
-
env_nested_delimiter=settings_cls.model_config.get("env_nested_delimiter"),
|
|
205
|
-
env_ignore_empty=settings_cls.model_config.get("env_ignore_empty", False),
|
|
206
|
-
env_parse_none_str=settings_cls.model_config.get("env_parse_none_str", ""),
|
|
207
|
-
env_parse_enums=settings_cls.model_config.get("env_parse_enums", None),
|
|
208
|
-
)
|
|
209
|
-
return (
|
|
210
|
-
init_settings,
|
|
211
|
-
custom_env_settings,
|
|
212
|
-
dotenv_settings,
|
|
213
|
-
file_secret_settings,
|
|
214
|
-
)
|
|
215
|
-
|
|
216
126
|
data_dir: Path = Field(default=DEFAULT_BASE_DIR)
|
|
217
127
|
db_url: str = Field(
|
|
218
128
|
default_factory=lambda data: f"sqlite+aiosqlite:///{data['data_dir']}/kodit.db"
|
|
@@ -231,9 +141,6 @@ class AppContext(BaseSettings):
|
|
|
231
141
|
default_search: Search = Field(
|
|
232
142
|
default=Search(),
|
|
233
143
|
)
|
|
234
|
-
auto_indexing: AutoIndexingConfig | None = Field(
|
|
235
|
-
default=AutoIndexingConfig(), description="Auto-indexing configuration"
|
|
236
|
-
)
|
|
237
144
|
periodic_sync: PeriodicSyncConfig = Field(
|
|
238
145
|
default=PeriodicSyncConfig(), description="Periodic sync configuration"
|
|
239
146
|
)
|
kodit/database.py
CHANGED
|
@@ -2,10 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
from collections.abc import Callable
|
|
4
4
|
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
5
6
|
|
|
6
7
|
import structlog
|
|
7
8
|
from alembic import command
|
|
8
9
|
from alembic.config import Config as AlembicConfig
|
|
10
|
+
from sqlalchemy import event
|
|
9
11
|
from sqlalchemy.ext.asyncio import (
|
|
10
12
|
AsyncSession,
|
|
11
13
|
async_sessionmaker,
|
|
@@ -21,7 +23,42 @@ class Database:
|
|
|
21
23
|
def __init__(self, db_url: str) -> None:
|
|
22
24
|
"""Initialize the database."""
|
|
23
25
|
self.log = structlog.get_logger(__name__)
|
|
24
|
-
|
|
26
|
+
|
|
27
|
+
# Configure SQLite-specific connection arguments to prevent locking issues
|
|
28
|
+
connect_args = {}
|
|
29
|
+
if "sqlite" in db_url.lower():
|
|
30
|
+
connect_args = {
|
|
31
|
+
"timeout": 20, # 20 second timeout for database operations
|
|
32
|
+
"check_same_thread": False, # Allow use from different threads
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
self.db_engine = create_async_engine(
|
|
36
|
+
db_url,
|
|
37
|
+
echo=False,
|
|
38
|
+
connect_args=connect_args,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# Configure SQLite pragmas for better concurrency and performance
|
|
42
|
+
if "sqlite" in db_url.lower():
|
|
43
|
+
|
|
44
|
+
@event.listens_for(self.db_engine.sync_engine, "connect")
|
|
45
|
+
def set_sqlite_pragma(
|
|
46
|
+
dbapi_connection: Any, connection_record: Any
|
|
47
|
+
) -> None:
|
|
48
|
+
del (
|
|
49
|
+
connection_record
|
|
50
|
+
) # Unused but required by SQLAlchemy event interface
|
|
51
|
+
cursor = dbapi_connection.cursor()
|
|
52
|
+
# Enable WAL mode for better concurrency
|
|
53
|
+
cursor.execute("PRAGMA journal_mode=WAL")
|
|
54
|
+
# Set busy timeout to prevent immediate locking failures
|
|
55
|
+
cursor.execute("PRAGMA busy_timeout=20000")
|
|
56
|
+
# Enable foreign key constraints
|
|
57
|
+
cursor.execute("PRAGMA foreign_keys=ON")
|
|
58
|
+
# Optimize for speed over safety (acceptable for indexing workloads)
|
|
59
|
+
cursor.execute("PRAGMA synchronous=NORMAL")
|
|
60
|
+
cursor.close()
|
|
61
|
+
|
|
25
62
|
self.db_session_factory = async_sessionmaker(
|
|
26
63
|
self.db_engine,
|
|
27
64
|
class_=AsyncSession,
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
"""Pure domain entities using Pydantic."""
|
|
2
2
|
|
|
3
|
-
import shutil
|
|
4
|
-
from dataclasses import dataclass
|
|
5
3
|
from datetime import UTC, datetime
|
|
6
4
|
from pathlib import Path
|
|
7
5
|
from typing import Any, Protocol
|
|
@@ -10,17 +8,10 @@ from urllib.parse import urlparse, urlunparse
|
|
|
10
8
|
from pydantic import AnyUrl, BaseModel
|
|
11
9
|
|
|
12
10
|
from kodit.domain.value_objects import (
|
|
13
|
-
FileProcessingStatus,
|
|
14
|
-
QueuePriority,
|
|
15
11
|
ReportingState,
|
|
16
|
-
SnippetContent,
|
|
17
|
-
SnippetContentType,
|
|
18
|
-
SourceType,
|
|
19
12
|
TaskOperation,
|
|
20
|
-
TaskType,
|
|
21
13
|
TrackableType,
|
|
22
14
|
)
|
|
23
|
-
from kodit.utils.path_utils import path_from_uri
|
|
24
15
|
|
|
25
16
|
|
|
26
17
|
class IgnorePatternProvider(Protocol):
|
|
@@ -39,37 +30,9 @@ class Author(BaseModel):
|
|
|
39
30
|
email: str
|
|
40
31
|
|
|
41
32
|
|
|
42
|
-
class File(BaseModel):
|
|
43
|
-
"""File domain entity."""
|
|
44
|
-
|
|
45
|
-
id: int | None = None # Is populated by repository
|
|
46
|
-
created_at: datetime | None = None # Is populated by repository
|
|
47
|
-
updated_at: datetime | None = None # Is populated by repository
|
|
48
|
-
uri: AnyUrl
|
|
49
|
-
sha256: str
|
|
50
|
-
authors: list[Author]
|
|
51
|
-
mime_type: str
|
|
52
|
-
file_processing_status: FileProcessingStatus
|
|
53
|
-
|
|
54
|
-
def as_path(self) -> Path:
|
|
55
|
-
"""Return the file as a path."""
|
|
56
|
-
return path_from_uri(str(self.uri))
|
|
57
|
-
|
|
58
|
-
def extension(self) -> str:
|
|
59
|
-
"""Return the file extension."""
|
|
60
|
-
return Path(self.as_path()).suffix.lstrip(".")
|
|
61
|
-
|
|
62
|
-
|
|
63
33
|
class WorkingCopy(BaseModel):
|
|
64
34
|
"""Working copy value object representing cloned source location."""
|
|
65
35
|
|
|
66
|
-
created_at: datetime | None = None # Is populated by repository
|
|
67
|
-
updated_at: datetime | None = None # Is populated by repository
|
|
68
|
-
remote_uri: AnyUrl
|
|
69
|
-
cloned_path: Path
|
|
70
|
-
source_type: SourceType
|
|
71
|
-
files: list[File]
|
|
72
|
-
|
|
73
36
|
@classmethod
|
|
74
37
|
def sanitize_local_path(cls, path: str) -> AnyUrl:
|
|
75
38
|
"""Sanitize a local path."""
|
|
@@ -100,26 +63,54 @@ class WorkingCopy(BaseModel):
|
|
|
100
63
|
"ssh://git@github.com/user/repo.git"
|
|
101
64
|
|
|
102
65
|
"""
|
|
103
|
-
# Handle
|
|
66
|
+
# Handle different URL types
|
|
67
|
+
if not url:
|
|
68
|
+
raise ValueError("URL is required")
|
|
69
|
+
|
|
104
70
|
if url.startswith("git@"):
|
|
105
|
-
|
|
106
|
-
# This maintains the same semantic meaning while making it a valid URL
|
|
107
|
-
if ":" in url and not url.startswith("ssh://"):
|
|
108
|
-
host_path = url[4:] # Remove "git@"
|
|
109
|
-
if ":" in host_path:
|
|
110
|
-
host, path = host_path.split(":", 1)
|
|
111
|
-
ssh_url = f"ssh://git@{host}/{path}"
|
|
112
|
-
return AnyUrl(ssh_url)
|
|
113
|
-
return AnyUrl(url)
|
|
71
|
+
return cls._handle_ssh_url(url)
|
|
114
72
|
if url.startswith("ssh://"):
|
|
115
73
|
return AnyUrl(url)
|
|
116
|
-
|
|
117
|
-
# Handle file URLs
|
|
118
74
|
if url.startswith("file://"):
|
|
119
75
|
return AnyUrl(url)
|
|
120
76
|
|
|
77
|
+
# Try local path conversion
|
|
78
|
+
local_url = cls._try_local_path_conversion(url)
|
|
79
|
+
if local_url:
|
|
80
|
+
return local_url
|
|
81
|
+
|
|
82
|
+
# Handle HTTPS URLs with credentials
|
|
83
|
+
return cls._sanitize_https_url(url)
|
|
84
|
+
|
|
85
|
+
@classmethod
|
|
86
|
+
def _handle_ssh_url(cls, url: str) -> AnyUrl:
|
|
87
|
+
"""Handle SSH URL conversion."""
|
|
88
|
+
if ":" in url and not url.startswith("ssh://"):
|
|
89
|
+
host_path = url[4:] # Remove "git@"
|
|
90
|
+
if ":" in host_path:
|
|
91
|
+
host, path = host_path.split(":", 1)
|
|
92
|
+
return AnyUrl(f"ssh://git@{host}/{path}")
|
|
93
|
+
return AnyUrl(url)
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def _try_local_path_conversion(cls, url: str) -> AnyUrl | None:
|
|
97
|
+
"""Try to convert local paths to file:// URLs."""
|
|
98
|
+
from pathlib import Path
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
path = Path(url)
|
|
102
|
+
if path.exists() or url.startswith(("/", "./", "../")) or url == ".":
|
|
103
|
+
absolute_path = path.resolve()
|
|
104
|
+
return AnyUrl(f"file://{absolute_path}")
|
|
105
|
+
except OSError:
|
|
106
|
+
# Path operations failed, not a local path
|
|
107
|
+
pass
|
|
108
|
+
return None
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def _sanitize_https_url(cls, url: str) -> AnyUrl:
|
|
112
|
+
"""Remove credentials from HTTPS URLs."""
|
|
121
113
|
try:
|
|
122
|
-
# Parse the URL
|
|
123
114
|
parsed = urlparse(url)
|
|
124
115
|
|
|
125
116
|
# If there are no credentials, return the URL as-is
|
|
@@ -127,7 +118,6 @@ class WorkingCopy(BaseModel):
|
|
|
127
118
|
return AnyUrl(url)
|
|
128
119
|
|
|
129
120
|
# Reconstruct the URL without credentials
|
|
130
|
-
# scheme, netloc (without username/password), path, params, query, fragment
|
|
131
121
|
sanitized_netloc = parsed.hostname
|
|
132
122
|
if parsed.port:
|
|
133
123
|
sanitized_netloc = f"{parsed.hostname}:{parsed.port}"
|
|
@@ -144,65 +134,9 @@ class WorkingCopy(BaseModel):
|
|
|
144
134
|
)
|
|
145
135
|
)
|
|
146
136
|
)
|
|
147
|
-
|
|
148
137
|
except Exception as e:
|
|
149
138
|
raise ValueError(f"Invalid URL: {url}") from e
|
|
150
139
|
|
|
151
|
-
def modified_or_deleted_files(self) -> list[File]:
|
|
152
|
-
"""Return the modified or deleted files."""
|
|
153
|
-
return [
|
|
154
|
-
file
|
|
155
|
-
for file in self.files
|
|
156
|
-
if file.file_processing_status
|
|
157
|
-
in (FileProcessingStatus.MODIFIED, FileProcessingStatus.DELETED)
|
|
158
|
-
]
|
|
159
|
-
|
|
160
|
-
def list_filesystem_paths(
|
|
161
|
-
self, ignore_provider: IgnorePatternProvider
|
|
162
|
-
) -> list[Path]:
|
|
163
|
-
"""List the filesystem paths of the files in the working copy."""
|
|
164
|
-
if not self.cloned_path.exists():
|
|
165
|
-
raise ValueError(f"Cloned path does not exist: {self.cloned_path}")
|
|
166
|
-
|
|
167
|
-
return [
|
|
168
|
-
f
|
|
169
|
-
for f in self.cloned_path.rglob("*")
|
|
170
|
-
if f.is_file() and not ignore_provider.should_ignore(f)
|
|
171
|
-
]
|
|
172
|
-
|
|
173
|
-
def dirty_files(self) -> list[File]:
|
|
174
|
-
"""Return the dirty files."""
|
|
175
|
-
return [
|
|
176
|
-
file
|
|
177
|
-
for file in self.files
|
|
178
|
-
if file.file_processing_status
|
|
179
|
-
in (FileProcessingStatus.MODIFIED, FileProcessingStatus.ADDED)
|
|
180
|
-
]
|
|
181
|
-
|
|
182
|
-
def changed_files(self) -> list[File]:
|
|
183
|
-
"""Return the changed files."""
|
|
184
|
-
return [
|
|
185
|
-
file
|
|
186
|
-
for file in self.files
|
|
187
|
-
if file.file_processing_status != FileProcessingStatus.CLEAN
|
|
188
|
-
]
|
|
189
|
-
|
|
190
|
-
def clear_file_processing_statuses(self) -> None:
|
|
191
|
-
"""Clear the file processing statuses."""
|
|
192
|
-
# First remove any files that are marked for deletion
|
|
193
|
-
self.files = [
|
|
194
|
-
file
|
|
195
|
-
for file in self.files
|
|
196
|
-
if file.file_processing_status != FileProcessingStatus.DELETED
|
|
197
|
-
]
|
|
198
|
-
# Then clear the statuses for the remaining files
|
|
199
|
-
for file in self.files:
|
|
200
|
-
file.file_processing_status = FileProcessingStatus.CLEAN
|
|
201
|
-
|
|
202
|
-
def delete(self) -> None:
|
|
203
|
-
"""Delete the working copy."""
|
|
204
|
-
shutil.rmtree(self.cloned_path)
|
|
205
|
-
|
|
206
140
|
|
|
207
141
|
class Source(BaseModel):
|
|
208
142
|
"""Source domain entity."""
|
|
@@ -213,74 +147,6 @@ class Source(BaseModel):
|
|
|
213
147
|
working_copy: WorkingCopy
|
|
214
148
|
|
|
215
149
|
|
|
216
|
-
class Snippet(BaseModel):
|
|
217
|
-
"""Snippet domain entity."""
|
|
218
|
-
|
|
219
|
-
id: int | None = None # Is populated by repository
|
|
220
|
-
created_at: datetime | None = None # Is populated by repository
|
|
221
|
-
updated_at: datetime | None = None # Is populated by repository
|
|
222
|
-
derives_from: list[File]
|
|
223
|
-
original_content: SnippetContent | None = None
|
|
224
|
-
summary_content: SnippetContent | None = None
|
|
225
|
-
|
|
226
|
-
def original_text(self) -> str:
|
|
227
|
-
"""Return the original content of the snippet."""
|
|
228
|
-
if self.original_content is None:
|
|
229
|
-
return ""
|
|
230
|
-
return self.original_content.value
|
|
231
|
-
|
|
232
|
-
def summary_text(self) -> str:
|
|
233
|
-
"""Return the summary content of the snippet."""
|
|
234
|
-
if self.summary_content is None:
|
|
235
|
-
return ""
|
|
236
|
-
return self.summary_content.value
|
|
237
|
-
|
|
238
|
-
def add_original_content(self, content: str, language: str) -> None:
|
|
239
|
-
"""Add an original content to the snippet."""
|
|
240
|
-
self.original_content = SnippetContent(
|
|
241
|
-
type=SnippetContentType.ORIGINAL,
|
|
242
|
-
value=content,
|
|
243
|
-
language=language,
|
|
244
|
-
)
|
|
245
|
-
|
|
246
|
-
def add_summary(self, summary: str) -> None:
|
|
247
|
-
"""Add a summary to the snippet."""
|
|
248
|
-
self.summary_content = SnippetContent(
|
|
249
|
-
type=SnippetContentType.SUMMARY,
|
|
250
|
-
value=summary,
|
|
251
|
-
language="markdown",
|
|
252
|
-
)
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
class Index(BaseModel):
|
|
256
|
-
"""Index domain entity."""
|
|
257
|
-
|
|
258
|
-
id: int
|
|
259
|
-
created_at: datetime
|
|
260
|
-
updated_at: datetime
|
|
261
|
-
source: Source
|
|
262
|
-
snippets: list[Snippet]
|
|
263
|
-
|
|
264
|
-
def delete_snippets_for_files(self, files: list[File]) -> None:
|
|
265
|
-
"""Delete the snippets that derive from a list of files."""
|
|
266
|
-
self.snippets = [
|
|
267
|
-
snippet
|
|
268
|
-
for snippet in self.snippets
|
|
269
|
-
if not any(file in snippet.derives_from for file in files)
|
|
270
|
-
]
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
# FUTURE: Remove this type, use the domain to get the required information.
|
|
274
|
-
@dataclass(frozen=True)
|
|
275
|
-
class SnippetWithContext:
|
|
276
|
-
"""Domain model for snippet with associated context information."""
|
|
277
|
-
|
|
278
|
-
source: Source
|
|
279
|
-
file: File
|
|
280
|
-
authors: list[Author]
|
|
281
|
-
snippet: Snippet
|
|
282
|
-
|
|
283
|
-
|
|
284
150
|
class Task(BaseModel):
|
|
285
151
|
"""Represents an item in the queue waiting to be processed.
|
|
286
152
|
|
|
@@ -289,7 +155,7 @@ class Task(BaseModel):
|
|
|
289
155
|
"""
|
|
290
156
|
|
|
291
157
|
id: str # Is a unique key to deduplicate items in the queue
|
|
292
|
-
type:
|
|
158
|
+
type: TaskOperation # Task operation
|
|
293
159
|
priority: int # Priority (higher number = higher priority)
|
|
294
160
|
payload: dict[str, Any] # Task-specific data
|
|
295
161
|
|
|
@@ -297,33 +163,22 @@ class Task(BaseModel):
|
|
|
297
163
|
updated_at: datetime | None = None # Is populated by repository
|
|
298
164
|
|
|
299
165
|
@staticmethod
|
|
300
|
-
def create(
|
|
166
|
+
def create(
|
|
167
|
+
operation: TaskOperation, priority: int, payload: dict[str, Any]
|
|
168
|
+
) -> "Task":
|
|
301
169
|
"""Create a task."""
|
|
302
170
|
return Task(
|
|
303
|
-
id=Task.
|
|
304
|
-
type=
|
|
171
|
+
id=Task.create_id(operation, payload),
|
|
172
|
+
type=operation,
|
|
305
173
|
priority=priority,
|
|
306
174
|
payload=payload,
|
|
307
175
|
)
|
|
308
176
|
|
|
309
177
|
@staticmethod
|
|
310
|
-
def
|
|
178
|
+
def create_id(operation: TaskOperation, payload: dict[str, Any]) -> str:
|
|
311
179
|
"""Create a unique id for a task."""
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
raise ValueError(f"Unknown task type: {task_type}")
|
|
316
|
-
|
|
317
|
-
@staticmethod
|
|
318
|
-
def create_index_update_task(
|
|
319
|
-
index_id: int, priority: QueuePriority = QueuePriority.USER_INITIATED
|
|
320
|
-
) -> "Task":
|
|
321
|
-
"""Create an index update task."""
|
|
322
|
-
return Task.create(
|
|
323
|
-
task_type=TaskType.INDEX_UPDATE,
|
|
324
|
-
priority=priority.value,
|
|
325
|
-
payload={"index_id": index_id},
|
|
326
|
-
)
|
|
180
|
+
first_id = next(iter(payload.values()), None)
|
|
181
|
+
return f"{operation}:{first_id}"
|
|
327
182
|
|
|
328
183
|
|
|
329
184
|
class TaskStatus(BaseModel):
|