agent-brain-rag 2.0.0__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,289 @@
1
+ """Job queue models for indexing job management."""
2
+
3
+ import hashlib
4
+ from datetime import datetime, timezone
5
+ from enum import Enum
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ from pydantic import BaseModel, Field, computed_field
10
+
11
+
12
+ class JobStatus(str, Enum):
13
+ """Status of an indexing job."""
14
+
15
+ PENDING = "pending"
16
+ RUNNING = "running"
17
+ DONE = "done"
18
+ FAILED = "failed"
19
+ CANCELLED = "cancelled"
20
+
21
+
22
+ class JobProgress(BaseModel):
23
+ """Progress tracking for an indexing job."""
24
+
25
+ files_processed: int = Field(default=0, ge=0, description="Files processed so far")
26
+ files_total: int = Field(default=0, ge=0, description="Total files to process")
27
+ chunks_created: int = Field(default=0, ge=0, description="Chunks created so far")
28
+ current_file: str = Field(default="", description="Currently processing file")
29
+ updated_at: datetime = Field(
30
+ default_factory=lambda: datetime.now(timezone.utc),
31
+ description="Last progress update timestamp",
32
+ )
33
+
34
+ @computed_field # type: ignore[prop-decorator]
35
+ @property
36
+ def percent_complete(self) -> float:
37
+ """Calculate completion percentage."""
38
+ if self.files_total == 0:
39
+ return 0.0
40
+ return round((self.files_processed / self.files_total) * 100, 1)
41
+
42
+
43
+ class JobRecord(BaseModel):
44
+ """Persistent job record for the queue."""
45
+
46
+ id: str = Field(..., description="Unique job identifier (job_<uuid12>)")
47
+ dedupe_key: str = Field(..., description="SHA256 hash for deduplication")
48
+
49
+ # Request parameters (normalized)
50
+ folder_path: str = Field(..., description="Resolved, normalized folder path")
51
+ include_code: bool = Field(default=False, description="Whether to index code files")
52
+ operation: str = Field(
53
+ default="index", description="Operation type: 'index' or 'add'"
54
+ )
55
+
56
+ # Optional request parameters
57
+ chunk_size: int = Field(default=512, description="Chunk size in tokens")
58
+ chunk_overlap: int = Field(default=50, description="Chunk overlap in tokens")
59
+ recursive: bool = Field(default=True, description="Recursive folder scan")
60
+ generate_summaries: bool = Field(
61
+ default=False, description="Generate LLM summaries"
62
+ )
63
+ supported_languages: Optional[list[str]] = Field(
64
+ default=None, description="Languages to index"
65
+ )
66
+ include_patterns: Optional[list[str]] = Field(
67
+ default=None, description="File patterns to include"
68
+ )
69
+ exclude_patterns: Optional[list[str]] = Field(
70
+ default=None, description="File patterns to exclude"
71
+ )
72
+
73
+ # Job state
74
+ status: JobStatus = Field(
75
+ default=JobStatus.PENDING, description="Current job status"
76
+ )
77
+ cancel_requested: bool = Field(
78
+ default=False, description="Flag for graceful cancellation"
79
+ )
80
+
81
+ # Timestamps
82
+ enqueued_at: datetime = Field(
83
+ default_factory=lambda: datetime.now(timezone.utc),
84
+ description="When the job was enqueued",
85
+ )
86
+ started_at: Optional[datetime] = Field(
87
+ default=None, description="When the job started running"
88
+ )
89
+ finished_at: Optional[datetime] = Field(
90
+ default=None, description="When the job finished (done, failed, or cancelled)"
91
+ )
92
+
93
+ # Results and metadata
94
+ error: Optional[str] = Field(default=None, description="Error message if failed")
95
+ retry_count: int = Field(default=0, ge=0, description="Number of retry attempts")
96
+ progress: Optional[JobProgress] = Field(
97
+ default=None, description="Progress tracking"
98
+ )
99
+ total_chunks: int = Field(default=0, ge=0, description="Total chunks indexed")
100
+ total_documents: int = Field(default=0, ge=0, description="Total documents indexed")
101
+
102
+ @computed_field # type: ignore[prop-decorator]
103
+ @property
104
+ def execution_time_ms(self) -> Optional[int]:
105
+ """Calculate execution time in milliseconds."""
106
+ if self.started_at is None:
107
+ return None
108
+ end_time = self.finished_at or datetime.now(timezone.utc)
109
+ delta = end_time - self.started_at
110
+ return int(delta.total_seconds() * 1000)
111
+
112
+ @staticmethod
113
+ def compute_dedupe_key(
114
+ folder_path: str,
115
+ include_code: bool,
116
+ operation: str,
117
+ include_patterns: Optional[list[str]] = None,
118
+ exclude_patterns: Optional[list[str]] = None,
119
+ ) -> str:
120
+ """Compute deduplication key from job parameters.
121
+
122
+ Args:
123
+ folder_path: Normalized, resolved folder path.
124
+ include_code: Whether to include code files.
125
+ operation: Operation type (index or add).
126
+ include_patterns: Optional include patterns.
127
+ exclude_patterns: Optional exclude patterns.
128
+
129
+ Returns:
130
+ SHA256 hash of normalized parameters.
131
+ """
132
+ # Normalize path (resolve and lowercase on case-insensitive systems)
133
+ resolved = str(Path(folder_path).resolve())
134
+
135
+ # Build dedupe string
136
+ parts = [
137
+ resolved,
138
+ str(include_code),
139
+ operation,
140
+ ",".join(sorted(include_patterns or [])),
141
+ ",".join(sorted(exclude_patterns or [])),
142
+ ]
143
+ dedupe_string = "|".join(parts)
144
+
145
+ return hashlib.sha256(dedupe_string.encode()).hexdigest()
146
+
147
+
148
+ class JobEnqueueResponse(BaseModel):
149
+ """Response when enqueueing a job."""
150
+
151
+ job_id: str = Field(..., description="Unique job identifier")
152
+ status: str = Field(default="pending", description="Job status")
153
+ queue_position: int = Field(
154
+ default=0, ge=0, description="Position in the queue (0 = first)"
155
+ )
156
+ queue_length: int = Field(default=0, ge=0, description="Total jobs in queue")
157
+ message: str = Field(..., description="Human-readable status message")
158
+ dedupe_hit: bool = Field(
159
+ default=False, description="True if this was a duplicate request"
160
+ )
161
+
162
+ model_config = {
163
+ "json_schema_extra": {
164
+ "examples": [
165
+ {
166
+ "job_id": "job_abc123def456",
167
+ "status": "pending",
168
+ "queue_position": 2,
169
+ "queue_length": 5,
170
+ "message": "Job queued for /path/to/docs",
171
+ "dedupe_hit": False,
172
+ }
173
+ ]
174
+ }
175
+ }
176
+
177
+
178
+ class JobListResponse(BaseModel):
179
+ """Response for listing jobs."""
180
+
181
+ jobs: list["JobSummary"] = Field(default_factory=list, description="List of jobs")
182
+ total: int = Field(default=0, ge=0, description="Total number of jobs")
183
+ pending: int = Field(default=0, ge=0, description="Number of pending jobs")
184
+ running: int = Field(default=0, ge=0, description="Number of running jobs")
185
+ completed: int = Field(default=0, ge=0, description="Number of completed jobs")
186
+ failed: int = Field(default=0, ge=0, description="Number of failed jobs")
187
+
188
+
189
+ class JobSummary(BaseModel):
190
+ """Summary view of a job for list responses."""
191
+
192
+ id: str = Field(..., description="Job identifier")
193
+ status: JobStatus = Field(..., description="Current status")
194
+ folder_path: str = Field(..., description="Folder being indexed")
195
+ operation: str = Field(..., description="Operation type")
196
+ include_code: bool = Field(..., description="Whether indexing code")
197
+ enqueued_at: datetime = Field(..., description="When queued")
198
+ started_at: Optional[datetime] = Field(default=None, description="When started")
199
+ finished_at: Optional[datetime] = Field(default=None, description="When finished")
200
+ progress_percent: float = Field(default=0.0, description="Completion percentage")
201
+ error: Optional[str] = Field(default=None, description="Error message if failed")
202
+
203
+ @classmethod
204
+ def from_record(cls, record: JobRecord) -> "JobSummary":
205
+ """Create a summary from a full job record."""
206
+ return cls(
207
+ id=record.id,
208
+ status=record.status,
209
+ folder_path=record.folder_path,
210
+ operation=record.operation,
211
+ include_code=record.include_code,
212
+ enqueued_at=record.enqueued_at,
213
+ started_at=record.started_at,
214
+ finished_at=record.finished_at,
215
+ progress_percent=(
216
+ record.progress.percent_complete if record.progress else 0.0
217
+ ),
218
+ error=record.error,
219
+ )
220
+
221
+
222
+ class JobDetailResponse(BaseModel):
223
+ """Detailed response for a single job."""
224
+
225
+ id: str = Field(..., description="Job identifier")
226
+ status: JobStatus = Field(..., description="Current status")
227
+ folder_path: str = Field(..., description="Folder being indexed")
228
+ operation: str = Field(..., description="Operation type")
229
+ include_code: bool = Field(..., description="Whether indexing code")
230
+
231
+ # Timestamps
232
+ enqueued_at: datetime = Field(..., description="When queued")
233
+ started_at: Optional[datetime] = Field(default=None, description="When started")
234
+ finished_at: Optional[datetime] = Field(default=None, description="When finished")
235
+ execution_time_ms: Optional[int] = Field(
236
+ default=None, description="Execution time in ms"
237
+ )
238
+
239
+ # Progress
240
+ progress: Optional[JobProgress] = Field(
241
+ default=None, description="Progress details"
242
+ )
243
+
244
+ # Results
245
+ total_documents: int = Field(default=0, description="Documents indexed")
246
+ total_chunks: int = Field(default=0, description="Chunks created")
247
+ error: Optional[str] = Field(default=None, description="Error message if failed")
248
+ retry_count: int = Field(default=0, description="Retry attempts")
249
+ cancel_requested: bool = Field(
250
+ default=False, description="Whether cancellation requested"
251
+ )
252
+
253
+ @classmethod
254
+ def from_record(cls, record: JobRecord) -> "JobDetailResponse":
255
+ """Create a detail response from a full job record."""
256
+ return cls(
257
+ id=record.id,
258
+ status=record.status,
259
+ folder_path=record.folder_path,
260
+ operation=record.operation,
261
+ include_code=record.include_code,
262
+ enqueued_at=record.enqueued_at,
263
+ started_at=record.started_at,
264
+ finished_at=record.finished_at,
265
+ execution_time_ms=record.execution_time_ms,
266
+ progress=record.progress,
267
+ total_documents=record.total_documents,
268
+ total_chunks=record.total_chunks,
269
+ error=record.error,
270
+ retry_count=record.retry_count,
271
+ cancel_requested=record.cancel_requested,
272
+ )
273
+
274
+
275
+ class QueueStats(BaseModel):
276
+ """Statistics about the job queue."""
277
+
278
+ pending: int = Field(default=0, ge=0, description="Pending jobs count")
279
+ running: int = Field(default=0, ge=0, description="Running jobs count")
280
+ completed: int = Field(default=0, ge=0, description="Completed jobs count")
281
+ failed: int = Field(default=0, ge=0, description="Failed jobs count")
282
+ cancelled: int = Field(default=0, ge=0, description="Cancelled jobs count")
283
+ total: int = Field(default=0, ge=0, description="Total jobs count")
284
+ current_job_id: Optional[str] = Field(
285
+ default=None, description="Currently running job ID"
286
+ )
287
+ current_job_running_time_ms: Optional[int] = Field(
288
+ default=None, description="Current job running time in ms"
289
+ )
@@ -34,7 +34,7 @@ class QueryRequest(BaseModel):
34
34
  description="Number of results to return",
35
35
  )
36
36
  similarity_threshold: float = Field(
37
- default=0.7,
37
+ default=0.3,
38
38
  ge=0.0,
39
39
  le=1.0,
40
40
  description="Minimum similarity score (0-1)",
@@ -92,7 +92,7 @@ class QueryRequest(BaseModel):
92
92
  {
93
93
  "query": "How do I configure authentication?",
94
94
  "top_k": 5,
95
- "similarity_threshold": 0.7,
95
+ "similarity_threshold": 0.3,
96
96
  "mode": "hybrid",
97
97
  "alpha": 0.5,
98
98
  },
@@ -1,4 +1,4 @@
1
- """Project root resolution for per-project doc-serve instances."""
1
+ """Project root resolution for per-project Agent Brain instances."""
2
2
 
3
3
  import logging
4
4
  import subprocess
@@ -1,4 +1,4 @@
1
- """Runtime state management for doc-serve instances."""
1
+ """Runtime state management for Agent Brain instances."""
2
2
 
3
3
  import json
4
4
  import logging
@@ -15,7 +15,7 @@ logger = logging.getLogger(__name__)
15
15
 
16
16
 
17
17
  class RuntimeState(BaseModel):
18
- """Runtime state for a doc-serve instance."""
18
+ """Runtime state for an Agent Brain instance."""
19
19
 
20
20
  schema_version: str = "1.0"
21
21
  mode: str = "project" # "project" or "shared"
@@ -5,7 +5,7 @@ from pathlib import Path
5
5
 
6
6
  logger = logging.getLogger(__name__)
7
7
 
8
- STATE_DIR_NAME = ".claude/doc-serve"
8
+ STATE_DIR_NAME = ".claude/agent-brain"
9
9
 
10
10
  SUBDIRECTORIES = [
11
11
  "data",
@@ -20,7 +20,7 @@ SUBDIRECTORIES = [
20
20
  def resolve_state_dir(project_root: Path) -> Path:
21
21
  """Resolve the state directory for a project.
22
22
 
23
- Returns <project_root>/.claude/doc-serve/
23
+ Returns <project_root>/.claude/agent-brain/
24
24
 
25
25
  Args:
26
26
  project_root: Resolved project root path.
@@ -69,6 +69,6 @@ def resolve_shared_project_dir(project_id: str) -> Path:
69
69
  Returns:
70
70
  Path to shared project data directory.
71
71
  """
72
- shared_dir = Path.home() / ".doc-serve" / "projects" / project_id / "data"
72
+ shared_dir = Path.home() / ".agent-brain" / "projects" / project_id / "data"
73
73
  shared_dir.mkdir(parents=True, exist_ok=True)
74
74
  return shared_dir