agent-brain-rag 2.0.0__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agent_brain_rag-2.0.0.dist-info → agent_brain_rag-3.0.0.dist-info}/METADATA +3 -4
- {agent_brain_rag-2.0.0.dist-info → agent_brain_rag-3.0.0.dist-info}/RECORD +26 -20
- {agent_brain_rag-2.0.0.dist-info → agent_brain_rag-3.0.0.dist-info}/WHEEL +1 -1
- {agent_brain_rag-2.0.0.dist-info → agent_brain_rag-3.0.0.dist-info}/entry_points.txt +0 -1
- agent_brain_server/__init__.py +1 -1
- agent_brain_server/api/main.py +118 -45
- agent_brain_server/api/routers/__init__.py +2 -0
- agent_brain_server/api/routers/health.py +85 -22
- agent_brain_server/api/routers/index.py +108 -36
- agent_brain_server/api/routers/jobs.py +111 -0
- agent_brain_server/config/provider_config.py +63 -19
- agent_brain_server/config/settings.py +10 -4
- agent_brain_server/indexing/bm25_index.py +15 -2
- agent_brain_server/indexing/document_loader.py +45 -4
- agent_brain_server/job_queue/__init__.py +11 -0
- agent_brain_server/job_queue/job_service.py +317 -0
- agent_brain_server/job_queue/job_store.py +427 -0
- agent_brain_server/job_queue/job_worker.py +434 -0
- agent_brain_server/locking.py +101 -8
- agent_brain_server/models/__init__.py +19 -0
- agent_brain_server/models/health.py +15 -0
- agent_brain_server/models/job.py +289 -0
- agent_brain_server/models/query.py +2 -2
- agent_brain_server/project_root.py +1 -1
- agent_brain_server/runtime.py +2 -2
- agent_brain_server/storage_paths.py +3 -3
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
"""Job queue models for indexing job management."""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field, computed_field
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class JobStatus(str, Enum):
|
|
13
|
+
"""Status of an indexing job."""
|
|
14
|
+
|
|
15
|
+
PENDING = "pending"
|
|
16
|
+
RUNNING = "running"
|
|
17
|
+
DONE = "done"
|
|
18
|
+
FAILED = "failed"
|
|
19
|
+
CANCELLED = "cancelled"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class JobProgress(BaseModel):
|
|
23
|
+
"""Progress tracking for an indexing job."""
|
|
24
|
+
|
|
25
|
+
files_processed: int = Field(default=0, ge=0, description="Files processed so far")
|
|
26
|
+
files_total: int = Field(default=0, ge=0, description="Total files to process")
|
|
27
|
+
chunks_created: int = Field(default=0, ge=0, description="Chunks created so far")
|
|
28
|
+
current_file: str = Field(default="", description="Currently processing file")
|
|
29
|
+
updated_at: datetime = Field(
|
|
30
|
+
default_factory=lambda: datetime.now(timezone.utc),
|
|
31
|
+
description="Last progress update timestamp",
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
@computed_field # type: ignore[prop-decorator]
|
|
35
|
+
@property
|
|
36
|
+
def percent_complete(self) -> float:
|
|
37
|
+
"""Calculate completion percentage."""
|
|
38
|
+
if self.files_total == 0:
|
|
39
|
+
return 0.0
|
|
40
|
+
return round((self.files_processed / self.files_total) * 100, 1)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class JobRecord(BaseModel):
|
|
44
|
+
"""Persistent job record for the queue."""
|
|
45
|
+
|
|
46
|
+
id: str = Field(..., description="Unique job identifier (job_<uuid12>)")
|
|
47
|
+
dedupe_key: str = Field(..., description="SHA256 hash for deduplication")
|
|
48
|
+
|
|
49
|
+
# Request parameters (normalized)
|
|
50
|
+
folder_path: str = Field(..., description="Resolved, normalized folder path")
|
|
51
|
+
include_code: bool = Field(default=False, description="Whether to index code files")
|
|
52
|
+
operation: str = Field(
|
|
53
|
+
default="index", description="Operation type: 'index' or 'add'"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Optional request parameters
|
|
57
|
+
chunk_size: int = Field(default=512, description="Chunk size in tokens")
|
|
58
|
+
chunk_overlap: int = Field(default=50, description="Chunk overlap in tokens")
|
|
59
|
+
recursive: bool = Field(default=True, description="Recursive folder scan")
|
|
60
|
+
generate_summaries: bool = Field(
|
|
61
|
+
default=False, description="Generate LLM summaries"
|
|
62
|
+
)
|
|
63
|
+
supported_languages: Optional[list[str]] = Field(
|
|
64
|
+
default=None, description="Languages to index"
|
|
65
|
+
)
|
|
66
|
+
include_patterns: Optional[list[str]] = Field(
|
|
67
|
+
default=None, description="File patterns to include"
|
|
68
|
+
)
|
|
69
|
+
exclude_patterns: Optional[list[str]] = Field(
|
|
70
|
+
default=None, description="File patterns to exclude"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Job state
|
|
74
|
+
status: JobStatus = Field(
|
|
75
|
+
default=JobStatus.PENDING, description="Current job status"
|
|
76
|
+
)
|
|
77
|
+
cancel_requested: bool = Field(
|
|
78
|
+
default=False, description="Flag for graceful cancellation"
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# Timestamps
|
|
82
|
+
enqueued_at: datetime = Field(
|
|
83
|
+
default_factory=lambda: datetime.now(timezone.utc),
|
|
84
|
+
description="When the job was enqueued",
|
|
85
|
+
)
|
|
86
|
+
started_at: Optional[datetime] = Field(
|
|
87
|
+
default=None, description="When the job started running"
|
|
88
|
+
)
|
|
89
|
+
finished_at: Optional[datetime] = Field(
|
|
90
|
+
default=None, description="When the job finished (done, failed, or cancelled)"
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Results and metadata
|
|
94
|
+
error: Optional[str] = Field(default=None, description="Error message if failed")
|
|
95
|
+
retry_count: int = Field(default=0, ge=0, description="Number of retry attempts")
|
|
96
|
+
progress: Optional[JobProgress] = Field(
|
|
97
|
+
default=None, description="Progress tracking"
|
|
98
|
+
)
|
|
99
|
+
total_chunks: int = Field(default=0, ge=0, description="Total chunks indexed")
|
|
100
|
+
total_documents: int = Field(default=0, ge=0, description="Total documents indexed")
|
|
101
|
+
|
|
102
|
+
@computed_field # type: ignore[prop-decorator]
|
|
103
|
+
@property
|
|
104
|
+
def execution_time_ms(self) -> Optional[int]:
|
|
105
|
+
"""Calculate execution time in milliseconds."""
|
|
106
|
+
if self.started_at is None:
|
|
107
|
+
return None
|
|
108
|
+
end_time = self.finished_at or datetime.now(timezone.utc)
|
|
109
|
+
delta = end_time - self.started_at
|
|
110
|
+
return int(delta.total_seconds() * 1000)
|
|
111
|
+
|
|
112
|
+
@staticmethod
|
|
113
|
+
def compute_dedupe_key(
|
|
114
|
+
folder_path: str,
|
|
115
|
+
include_code: bool,
|
|
116
|
+
operation: str,
|
|
117
|
+
include_patterns: Optional[list[str]] = None,
|
|
118
|
+
exclude_patterns: Optional[list[str]] = None,
|
|
119
|
+
) -> str:
|
|
120
|
+
"""Compute deduplication key from job parameters.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
folder_path: Normalized, resolved folder path.
|
|
124
|
+
include_code: Whether to include code files.
|
|
125
|
+
operation: Operation type (index or add).
|
|
126
|
+
include_patterns: Optional include patterns.
|
|
127
|
+
exclude_patterns: Optional exclude patterns.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
SHA256 hash of normalized parameters.
|
|
131
|
+
"""
|
|
132
|
+
# Normalize path (resolve and lowercase on case-insensitive systems)
|
|
133
|
+
resolved = str(Path(folder_path).resolve())
|
|
134
|
+
|
|
135
|
+
# Build dedupe string
|
|
136
|
+
parts = [
|
|
137
|
+
resolved,
|
|
138
|
+
str(include_code),
|
|
139
|
+
operation,
|
|
140
|
+
",".join(sorted(include_patterns or [])),
|
|
141
|
+
",".join(sorted(exclude_patterns or [])),
|
|
142
|
+
]
|
|
143
|
+
dedupe_string = "|".join(parts)
|
|
144
|
+
|
|
145
|
+
return hashlib.sha256(dedupe_string.encode()).hexdigest()
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class JobEnqueueResponse(BaseModel):
|
|
149
|
+
"""Response when enqueueing a job."""
|
|
150
|
+
|
|
151
|
+
job_id: str = Field(..., description="Unique job identifier")
|
|
152
|
+
status: str = Field(default="pending", description="Job status")
|
|
153
|
+
queue_position: int = Field(
|
|
154
|
+
default=0, ge=0, description="Position in the queue (0 = first)"
|
|
155
|
+
)
|
|
156
|
+
queue_length: int = Field(default=0, ge=0, description="Total jobs in queue")
|
|
157
|
+
message: str = Field(..., description="Human-readable status message")
|
|
158
|
+
dedupe_hit: bool = Field(
|
|
159
|
+
default=False, description="True if this was a duplicate request"
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
model_config = {
|
|
163
|
+
"json_schema_extra": {
|
|
164
|
+
"examples": [
|
|
165
|
+
{
|
|
166
|
+
"job_id": "job_abc123def456",
|
|
167
|
+
"status": "pending",
|
|
168
|
+
"queue_position": 2,
|
|
169
|
+
"queue_length": 5,
|
|
170
|
+
"message": "Job queued for /path/to/docs",
|
|
171
|
+
"dedupe_hit": False,
|
|
172
|
+
}
|
|
173
|
+
]
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
class JobListResponse(BaseModel):
|
|
179
|
+
"""Response for listing jobs."""
|
|
180
|
+
|
|
181
|
+
jobs: list["JobSummary"] = Field(default_factory=list, description="List of jobs")
|
|
182
|
+
total: int = Field(default=0, ge=0, description="Total number of jobs")
|
|
183
|
+
pending: int = Field(default=0, ge=0, description="Number of pending jobs")
|
|
184
|
+
running: int = Field(default=0, ge=0, description="Number of running jobs")
|
|
185
|
+
completed: int = Field(default=0, ge=0, description="Number of completed jobs")
|
|
186
|
+
failed: int = Field(default=0, ge=0, description="Number of failed jobs")
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class JobSummary(BaseModel):
|
|
190
|
+
"""Summary view of a job for list responses."""
|
|
191
|
+
|
|
192
|
+
id: str = Field(..., description="Job identifier")
|
|
193
|
+
status: JobStatus = Field(..., description="Current status")
|
|
194
|
+
folder_path: str = Field(..., description="Folder being indexed")
|
|
195
|
+
operation: str = Field(..., description="Operation type")
|
|
196
|
+
include_code: bool = Field(..., description="Whether indexing code")
|
|
197
|
+
enqueued_at: datetime = Field(..., description="When queued")
|
|
198
|
+
started_at: Optional[datetime] = Field(default=None, description="When started")
|
|
199
|
+
finished_at: Optional[datetime] = Field(default=None, description="When finished")
|
|
200
|
+
progress_percent: float = Field(default=0.0, description="Completion percentage")
|
|
201
|
+
error: Optional[str] = Field(default=None, description="Error message if failed")
|
|
202
|
+
|
|
203
|
+
@classmethod
|
|
204
|
+
def from_record(cls, record: JobRecord) -> "JobSummary":
|
|
205
|
+
"""Create a summary from a full job record."""
|
|
206
|
+
return cls(
|
|
207
|
+
id=record.id,
|
|
208
|
+
status=record.status,
|
|
209
|
+
folder_path=record.folder_path,
|
|
210
|
+
operation=record.operation,
|
|
211
|
+
include_code=record.include_code,
|
|
212
|
+
enqueued_at=record.enqueued_at,
|
|
213
|
+
started_at=record.started_at,
|
|
214
|
+
finished_at=record.finished_at,
|
|
215
|
+
progress_percent=(
|
|
216
|
+
record.progress.percent_complete if record.progress else 0.0
|
|
217
|
+
),
|
|
218
|
+
error=record.error,
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
class JobDetailResponse(BaseModel):
|
|
223
|
+
"""Detailed response for a single job."""
|
|
224
|
+
|
|
225
|
+
id: str = Field(..., description="Job identifier")
|
|
226
|
+
status: JobStatus = Field(..., description="Current status")
|
|
227
|
+
folder_path: str = Field(..., description="Folder being indexed")
|
|
228
|
+
operation: str = Field(..., description="Operation type")
|
|
229
|
+
include_code: bool = Field(..., description="Whether indexing code")
|
|
230
|
+
|
|
231
|
+
# Timestamps
|
|
232
|
+
enqueued_at: datetime = Field(..., description="When queued")
|
|
233
|
+
started_at: Optional[datetime] = Field(default=None, description="When started")
|
|
234
|
+
finished_at: Optional[datetime] = Field(default=None, description="When finished")
|
|
235
|
+
execution_time_ms: Optional[int] = Field(
|
|
236
|
+
default=None, description="Execution time in ms"
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
# Progress
|
|
240
|
+
progress: Optional[JobProgress] = Field(
|
|
241
|
+
default=None, description="Progress details"
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
# Results
|
|
245
|
+
total_documents: int = Field(default=0, description="Documents indexed")
|
|
246
|
+
total_chunks: int = Field(default=0, description="Chunks created")
|
|
247
|
+
error: Optional[str] = Field(default=None, description="Error message if failed")
|
|
248
|
+
retry_count: int = Field(default=0, description="Retry attempts")
|
|
249
|
+
cancel_requested: bool = Field(
|
|
250
|
+
default=False, description="Whether cancellation requested"
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
@classmethod
|
|
254
|
+
def from_record(cls, record: JobRecord) -> "JobDetailResponse":
|
|
255
|
+
"""Create a detail response from a full job record."""
|
|
256
|
+
return cls(
|
|
257
|
+
id=record.id,
|
|
258
|
+
status=record.status,
|
|
259
|
+
folder_path=record.folder_path,
|
|
260
|
+
operation=record.operation,
|
|
261
|
+
include_code=record.include_code,
|
|
262
|
+
enqueued_at=record.enqueued_at,
|
|
263
|
+
started_at=record.started_at,
|
|
264
|
+
finished_at=record.finished_at,
|
|
265
|
+
execution_time_ms=record.execution_time_ms,
|
|
266
|
+
progress=record.progress,
|
|
267
|
+
total_documents=record.total_documents,
|
|
268
|
+
total_chunks=record.total_chunks,
|
|
269
|
+
error=record.error,
|
|
270
|
+
retry_count=record.retry_count,
|
|
271
|
+
cancel_requested=record.cancel_requested,
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
class QueueStats(BaseModel):
|
|
276
|
+
"""Statistics about the job queue."""
|
|
277
|
+
|
|
278
|
+
pending: int = Field(default=0, ge=0, description="Pending jobs count")
|
|
279
|
+
running: int = Field(default=0, ge=0, description="Running jobs count")
|
|
280
|
+
completed: int = Field(default=0, ge=0, description="Completed jobs count")
|
|
281
|
+
failed: int = Field(default=0, ge=0, description="Failed jobs count")
|
|
282
|
+
cancelled: int = Field(default=0, ge=0, description="Cancelled jobs count")
|
|
283
|
+
total: int = Field(default=0, ge=0, description="Total jobs count")
|
|
284
|
+
current_job_id: Optional[str] = Field(
|
|
285
|
+
default=None, description="Currently running job ID"
|
|
286
|
+
)
|
|
287
|
+
current_job_running_time_ms: Optional[int] = Field(
|
|
288
|
+
default=None, description="Current job running time in ms"
|
|
289
|
+
)
|
|
@@ -34,7 +34,7 @@ class QueryRequest(BaseModel):
|
|
|
34
34
|
description="Number of results to return",
|
|
35
35
|
)
|
|
36
36
|
similarity_threshold: float = Field(
|
|
37
|
-
default=0.
|
|
37
|
+
default=0.3,
|
|
38
38
|
ge=0.0,
|
|
39
39
|
le=1.0,
|
|
40
40
|
description="Minimum similarity score (0-1)",
|
|
@@ -92,7 +92,7 @@ class QueryRequest(BaseModel):
|
|
|
92
92
|
{
|
|
93
93
|
"query": "How do I configure authentication?",
|
|
94
94
|
"top_k": 5,
|
|
95
|
-
"similarity_threshold": 0.
|
|
95
|
+
"similarity_threshold": 0.3,
|
|
96
96
|
"mode": "hybrid",
|
|
97
97
|
"alpha": 0.5,
|
|
98
98
|
},
|
agent_brain_server/runtime.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Runtime state management for
|
|
1
|
+
"""Runtime state management for Agent Brain instances."""
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
4
|
import logging
|
|
@@ -15,7 +15,7 @@ logger = logging.getLogger(__name__)
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class RuntimeState(BaseModel):
|
|
18
|
-
"""Runtime state for
|
|
18
|
+
"""Runtime state for an Agent Brain instance."""
|
|
19
19
|
|
|
20
20
|
schema_version: str = "1.0"
|
|
21
21
|
mode: str = "project" # "project" or "shared"
|
|
@@ -5,7 +5,7 @@ from pathlib import Path
|
|
|
5
5
|
|
|
6
6
|
logger = logging.getLogger(__name__)
|
|
7
7
|
|
|
8
|
-
STATE_DIR_NAME = ".claude/
|
|
8
|
+
STATE_DIR_NAME = ".claude/agent-brain"
|
|
9
9
|
|
|
10
10
|
SUBDIRECTORIES = [
|
|
11
11
|
"data",
|
|
@@ -20,7 +20,7 @@ SUBDIRECTORIES = [
|
|
|
20
20
|
def resolve_state_dir(project_root: Path) -> Path:
|
|
21
21
|
"""Resolve the state directory for a project.
|
|
22
22
|
|
|
23
|
-
Returns <project_root>/.claude/
|
|
23
|
+
Returns <project_root>/.claude/agent-brain/
|
|
24
24
|
|
|
25
25
|
Args:
|
|
26
26
|
project_root: Resolved project root path.
|
|
@@ -69,6 +69,6 @@ def resolve_shared_project_dir(project_id: str) -> Path:
|
|
|
69
69
|
Returns:
|
|
70
70
|
Path to shared project data directory.
|
|
71
71
|
"""
|
|
72
|
-
shared_dir = Path.home() / ".
|
|
72
|
+
shared_dir = Path.home() / ".agent-brain" / "projects" / project_id / "data"
|
|
73
73
|
shared_dir.mkdir(parents=True, exist_ok=True)
|
|
74
74
|
return shared_dir
|