agent-brain-rag 2.0.0__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ """Job queue module for managing indexing jobs."""
2
+
3
+ from .job_service import JobQueueService
4
+ from .job_store import JobQueueStore
5
+ from .job_worker import JobWorker
6
+
7
+ __all__ = [
8
+ "JobQueueStore",
9
+ "JobWorker",
10
+ "JobQueueService",
11
+ ]
@@ -0,0 +1,317 @@
1
+ """API-facing service for job queue management.
2
+
3
+ Provides job enqueueing with deduplication, path validation, job listing,
4
+ detail retrieval, and cancellation.
5
+ """
6
+
7
+ import logging
8
+ import uuid
9
+ from datetime import datetime, timezone
10
+ from pathlib import Path
11
+ from typing import Optional
12
+
13
+ from agent_brain_server.job_queue.job_store import JobQueueStore
14
+ from agent_brain_server.models import IndexRequest
15
+ from agent_brain_server.models.job import (
16
+ JobDetailResponse,
17
+ JobEnqueueResponse,
18
+ JobListResponse,
19
+ JobRecord,
20
+ JobStatus,
21
+ JobSummary,
22
+ QueueStats,
23
+ )
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class JobQueueService:
29
+ """API-facing service for job queue operations.
30
+
31
+ Provides:
32
+ - Job enqueueing with deduplication
33
+ - Path validation (project root checking)
34
+ - Job listing, detail retrieval, and cancellation
35
+ - Queue statistics
36
+
37
+ Backpressure is handled at the router level, not here.
38
+ """
39
+
40
+ def __init__(
41
+ self, store: JobQueueStore, project_root: Optional[Path] = None
42
+ ) -> None:
43
+ """Initialize the job queue service.
44
+
45
+ Args:
46
+ store: The underlying job queue store for persistence.
47
+ project_root: Root directory for path validation. If None, all paths
48
+ are allowed and path validation is skipped.
49
+ """
50
+ self._store = store
51
+ self._project_root = project_root.resolve() if project_root else None
52
+ logger.info(
53
+ f"JobQueueService initialized with project_root={self._project_root}"
54
+ )
55
+
56
+ @property
57
+ def store(self) -> JobQueueStore:
58
+ """Get the underlying job queue store."""
59
+ return self._store
60
+
61
+ @property
62
+ def project_root(self) -> Optional[Path]:
63
+ """Get the project root directory."""
64
+ return self._project_root
65
+
66
+ def _validate_path(self, path: str, allow_external: bool) -> Path:
67
+ """Validate and resolve a path.
68
+
69
+ Args:
70
+ path: The path to validate.
71
+ allow_external: Whether to allow paths outside project root.
72
+
73
+ Returns:
74
+ Resolved Path object.
75
+
76
+ Raises:
77
+ ValueError: If path is outside project root and allow_external is False.
78
+ """
79
+ resolved = Path(path).resolve()
80
+
81
+ # If no project root configured, skip path validation
82
+ if self._project_root is None:
83
+ return resolved
84
+
85
+ if not allow_external:
86
+ try:
87
+ resolved.relative_to(self._project_root)
88
+ except ValueError as err:
89
+ raise ValueError(
90
+ f"Path '{resolved}' is outside project root "
91
+ f"'{self._project_root}'. "
92
+ "Use allow_external=True to index paths outside the project."
93
+ ) from err
94
+
95
+ return resolved
96
+
97
+ def _generate_job_id(self) -> str:
98
+ """Generate a unique job ID.
99
+
100
+ Returns:
101
+ Job ID in format job_<uuid12>.
102
+ """
103
+ return f"job_{uuid.uuid4().hex[:12]}"
104
+
105
+ async def enqueue_job(
106
+ self,
107
+ request: IndexRequest,
108
+ operation: str = "index",
109
+ force: bool = False,
110
+ allow_external: bool = False,
111
+ ) -> JobEnqueueResponse:
112
+ """Enqueue an indexing job with deduplication.
113
+
114
+ Args:
115
+ request: The indexing request containing folder path and options.
116
+ operation: Operation type - 'index' (replace) or 'add' (append).
117
+ force: If True, skip deduplication check and always create new job.
118
+ allow_external: If True, allow paths outside project root.
119
+
120
+ Returns:
121
+ JobEnqueueResponse with job details and queue position.
122
+
123
+ Raises:
124
+ ValueError: If path is outside project root and allow_external is False.
125
+ """
126
+ # Validate and resolve path
127
+ resolved_path = self._validate_path(request.folder_path, allow_external)
128
+ folder_path_str = str(resolved_path)
129
+
130
+ # Compute deduplication key
131
+ dedupe_key = JobRecord.compute_dedupe_key(
132
+ folder_path=folder_path_str,
133
+ include_code=request.include_code,
134
+ operation=operation,
135
+ include_patterns=request.include_patterns,
136
+ exclude_patterns=request.exclude_patterns,
137
+ )
138
+
139
+ # Check for existing job (unless force=True)
140
+ if not force:
141
+ existing_job = await self._store.find_by_dedupe_key(dedupe_key)
142
+ if existing_job is not None:
143
+ # Return existing job info with dedupe_hit=True
144
+ queue_length = await self._store.get_queue_length()
145
+ pending_jobs = await self._store.get_pending_jobs()
146
+
147
+ # Calculate position of existing job in queue
148
+ position = 0
149
+ for i, job in enumerate(pending_jobs):
150
+ if job.id == existing_job.id:
151
+ position = i
152
+ break
153
+
154
+ logger.info(
155
+ f"Dedupe hit: returning existing job {existing_job.id} "
156
+ f"for path {folder_path_str}"
157
+ )
158
+
159
+ return JobEnqueueResponse(
160
+ job_id=existing_job.id,
161
+ status=existing_job.status.value,
162
+ queue_position=position,
163
+ queue_length=queue_length,
164
+ message=f"Existing job found for {folder_path_str}",
165
+ dedupe_hit=True,
166
+ )
167
+
168
+ # Create new job record
169
+ job_id = self._generate_job_id()
170
+ job = JobRecord(
171
+ id=job_id,
172
+ dedupe_key=dedupe_key,
173
+ folder_path=folder_path_str,
174
+ include_code=request.include_code,
175
+ operation=operation,
176
+ chunk_size=request.chunk_size,
177
+ chunk_overlap=request.chunk_overlap,
178
+ recursive=request.recursive,
179
+ generate_summaries=request.generate_summaries,
180
+ supported_languages=request.supported_languages,
181
+ include_patterns=request.include_patterns,
182
+ exclude_patterns=request.exclude_patterns,
183
+ status=JobStatus.PENDING,
184
+ enqueued_at=datetime.now(timezone.utc),
185
+ )
186
+
187
+ # Append to queue and get position
188
+ position = await self._store.append_job(job)
189
+ queue_length = await self._store.get_queue_length()
190
+
191
+ logger.info(
192
+ f"Job {job_id} enqueued at position {position} for path {folder_path_str}"
193
+ )
194
+
195
+ return JobEnqueueResponse(
196
+ job_id=job_id,
197
+ status=JobStatus.PENDING.value,
198
+ queue_position=position,
199
+ queue_length=queue_length,
200
+ message=f"Job queued for {folder_path_str}",
201
+ dedupe_hit=False,
202
+ )
203
+
204
+ async def get_job(self, job_id: str) -> Optional[JobDetailResponse]:
205
+ """Get detailed information about a specific job.
206
+
207
+ Args:
208
+ job_id: The job identifier.
209
+
210
+ Returns:
211
+ JobDetailResponse with full job details, or None if not found.
212
+ """
213
+ job = await self._store.get_job(job_id)
214
+ if job is None:
215
+ return None
216
+
217
+ return JobDetailResponse.from_record(job)
218
+
219
+ async def list_jobs(self, limit: int = 50, offset: int = 0) -> JobListResponse:
220
+ """List jobs with pagination.
221
+
222
+ Args:
223
+ limit: Maximum number of jobs to return.
224
+ offset: Number of jobs to skip.
225
+
226
+ Returns:
227
+ JobListResponse with job summaries and counts.
228
+ """
229
+ jobs = await self._store.get_all_jobs(limit=limit, offset=offset)
230
+ stats = await self._store.get_queue_stats()
231
+
232
+ summaries = [JobSummary.from_record(job) for job in jobs]
233
+
234
+ return JobListResponse(
235
+ jobs=summaries,
236
+ total=stats.total,
237
+ pending=stats.pending,
238
+ running=stats.running,
239
+ completed=stats.completed,
240
+ failed=stats.failed,
241
+ )
242
+
243
+ async def cancel_job(self, job_id: str) -> dict[str, str]:
244
+ """Request cancellation of a job.
245
+
246
+ Only PENDING or RUNNING jobs can be cancelled.
247
+ For RUNNING jobs, sets cancel_requested flag for graceful cancellation.
248
+
249
+ Args:
250
+ job_id: The job identifier.
251
+
252
+ Returns:
253
+ Dict with status and message.
254
+
255
+ Raises:
256
+ KeyError: If job not found.
257
+ ValueError: If job cannot be cancelled (already completed/failed/cancelled).
258
+ """
259
+ job = await self._store.get_job(job_id)
260
+ if job is None:
261
+ raise KeyError(f"Job {job_id} not found")
262
+
263
+ if job.status == JobStatus.CANCELLED:
264
+ return {
265
+ "status": "already_cancelled",
266
+ "message": f"Job {job_id} was already cancelled",
267
+ }
268
+
269
+ if job.status in (JobStatus.DONE, JobStatus.FAILED):
270
+ raise ValueError(
271
+ f"Cannot cancel job {job_id}: job has already {job.status.value}"
272
+ )
273
+
274
+ if job.status == JobStatus.RUNNING:
275
+ # Request graceful cancellation
276
+ # Create new job record with cancel_requested=True
277
+ # (JobRecord is a Pydantic model, so we use model_copy)
278
+ updated_job = job.model_copy(update={"cancel_requested": True})
279
+ await self._store.update_job(updated_job)
280
+
281
+ logger.info(f"Cancellation requested for running job {job_id}")
282
+ return {
283
+ "status": "cancellation_requested",
284
+ "message": f"Cancellation requested for running job {job_id}. "
285
+ "Job will stop at next checkpoint.",
286
+ }
287
+
288
+ if job.status == JobStatus.PENDING:
289
+ # Cancel immediately
290
+ updated_job = job.model_copy(
291
+ update={
292
+ "status": JobStatus.CANCELLED,
293
+ "cancel_requested": True,
294
+ "finished_at": datetime.now(timezone.utc),
295
+ }
296
+ )
297
+ await self._store.update_job(updated_job)
298
+
299
+ logger.info(f"Pending job {job_id} cancelled")
300
+ return {
301
+ "status": "cancelled",
302
+ "message": f"Job {job_id} cancelled",
303
+ }
304
+
305
+ # Should not reach here, but handle gracefully
306
+ return {
307
+ "status": "unknown",
308
+ "message": f"Job {job_id} is in unexpected status: {job.status.value}",
309
+ }
310
+
311
+ async def get_queue_stats(self) -> QueueStats:
312
+ """Get statistics about the job queue.
313
+
314
+ Returns:
315
+ QueueStats with counts and current job info.
316
+ """
317
+ return await self._store.get_queue_stats()