agyqueue 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agyqueue/__init__.py +1 -0
- agyqueue/client.py +129 -0
- agyqueue/config.py +72 -0
- agyqueue/dashboard.html +1155 -0
- agyqueue/mcp_server.py +438 -0
- agyqueue/models.py +38 -0
- agyqueue/notifications.py +187 -0
- agyqueue/storage.py +423 -0
- agyqueue/task_queue.py +111 -0
- agyqueue/worker.py +671 -0
- agyqueue-0.1.0.dist-info/METADATA +287 -0
- agyqueue-0.1.0.dist-info/RECORD +15 -0
- agyqueue-0.1.0.dist-info/WHEEL +5 -0
- agyqueue-0.1.0.dist-info/entry_points.txt +2 -0
- agyqueue-0.1.0.dist-info/top_level.txt +1 -0
agyqueue/worker.py
ADDED
|
@@ -0,0 +1,671 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import logging
|
|
3
|
+
import sys
|
|
4
|
+
import os
|
|
5
|
+
import subprocess
|
|
6
|
+
import shutil
|
|
7
|
+
import tempfile
|
|
8
|
+
from contextlib import contextmanager
|
|
9
|
+
from agyqueue.storage import TaskStore
|
|
10
|
+
from agyqueue.task_queue import TaskQueue
|
|
11
|
+
from agyqueue.models import Task, TaskStatus
|
|
12
|
+
|
|
13
|
+
# Set up logging to stdout
|
|
14
|
+
logging.basicConfig(
|
|
15
|
+
level=logging.INFO,
|
|
16
|
+
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
|
17
|
+
handlers=[logging.StreamHandler(sys.stdout)]
|
|
18
|
+
)
|
|
19
|
+
logger = logging.getLogger("agyqueue.worker")
|
|
20
|
+
|
|
21
|
+
class TaskCancelledError(Exception):
|
|
22
|
+
"""Raised when a task execution is aborted due to a cancellation request."""
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
def cancellation_aware_sleep(seconds: float, task_id: str, store: TaskStore, poll_interval: float = 0.2):
|
|
26
|
+
"""Sleeps while checking for task cancellation and updating task heartbeat."""
|
|
27
|
+
start_time = time.time()
|
|
28
|
+
last_heartbeat = time.time()
|
|
29
|
+
while time.time() - start_time < seconds:
|
|
30
|
+
task = store.get_task(task_id)
|
|
31
|
+
if task and task.status == TaskStatus.CANCELLED:
|
|
32
|
+
logger.warning(f"Task {task_id}: Cancellation detected during sleep.")
|
|
33
|
+
raise TaskCancelledError(f"Task {task_id} was cancelled.")
|
|
34
|
+
|
|
35
|
+
# Touch task to update updated_at heartbeat
|
|
36
|
+
if time.time() - last_heartbeat > 3.0:
|
|
37
|
+
try:
|
|
38
|
+
store.touch_task(task_id)
|
|
39
|
+
last_heartbeat = time.time()
|
|
40
|
+
except Exception as e:
|
|
41
|
+
logger.error(f"Heartbeat update failed: {e}")
|
|
42
|
+
|
|
43
|
+
time.sleep(min(poll_interval, seconds - (time.time() - start_time)))
|
|
44
|
+
|
|
45
|
+
def run_cancellation_aware_subprocess(
|
|
46
|
+
args: list[str],
|
|
47
|
+
cwd: str,
|
|
48
|
+
task_id: str,
|
|
49
|
+
store: TaskStore,
|
|
50
|
+
poll_interval: float = 0.2
|
|
51
|
+
) -> subprocess.CompletedProcess:
|
|
52
|
+
"""Runs a subprocess and monitors it, supporting cancellation checks and heartbeat updates."""
|
|
53
|
+
task = store.get_task(task_id)
|
|
54
|
+
if task and task.status == TaskStatus.CANCELLED:
|
|
55
|
+
raise TaskCancelledError(f"Task {task_id} was cancelled before starting process.")
|
|
56
|
+
|
|
57
|
+
logger.info(f"Task {task_id}: Starting subprocess: {' '.join(args)} in {cwd}")
|
|
58
|
+
process = subprocess.Popen(
|
|
59
|
+
args,
|
|
60
|
+
cwd=cwd,
|
|
61
|
+
stdout=subprocess.PIPE,
|
|
62
|
+
stderr=subprocess.PIPE,
|
|
63
|
+
text=True
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
last_heartbeat = time.time()
|
|
67
|
+
try:
|
|
68
|
+
while True:
|
|
69
|
+
retcode = process.poll()
|
|
70
|
+
if retcode is not None:
|
|
71
|
+
stdout, stderr = process.communicate()
|
|
72
|
+
return subprocess.CompletedProcess(args, retcode, stdout, stderr)
|
|
73
|
+
|
|
74
|
+
current_task = store.get_task(task_id)
|
|
75
|
+
if current_task and current_task.status == TaskStatus.CANCELLED:
|
|
76
|
+
logger.warning(f"Task {task_id}: Cancellation detected. Terminating process...")
|
|
77
|
+
process.terminate()
|
|
78
|
+
try:
|
|
79
|
+
process.wait(timeout=2.0)
|
|
80
|
+
except subprocess.TimeoutExpired:
|
|
81
|
+
logger.warning(f"Task {task_id}: Process failed to terminate. Killing it...")
|
|
82
|
+
process.kill()
|
|
83
|
+
process.wait()
|
|
84
|
+
raise TaskCancelledError(f"Task {task_id} was cancelled during execution of process: {' '.join(args)}")
|
|
85
|
+
|
|
86
|
+
# Touch task to update updated_at heartbeat
|
|
87
|
+
if time.time() - last_heartbeat > 3.0:
|
|
88
|
+
try:
|
|
89
|
+
store.touch_task(task_id)
|
|
90
|
+
last_heartbeat = time.time()
|
|
91
|
+
except Exception as e:
|
|
92
|
+
logger.error(f"Heartbeat update failed: {e}")
|
|
93
|
+
|
|
94
|
+
time.sleep(poll_interval)
|
|
95
|
+
except Exception:
|
|
96
|
+
if process.poll() is None:
|
|
97
|
+
process.terminate()
|
|
98
|
+
try:
|
|
99
|
+
process.wait(timeout=1.0)
|
|
100
|
+
except subprocess.TimeoutExpired:
|
|
101
|
+
process.kill()
|
|
102
|
+
process.wait()
|
|
103
|
+
raise
|
|
104
|
+
|
|
105
|
+
@contextmanager
|
|
106
|
+
def isolated_workspace(source_dir: str):
|
|
107
|
+
"""Creates an isolated workspace for running compile/test tasks.
|
|
108
|
+
If source_dir is a git repository, it uses git worktree.
|
|
109
|
+
Otherwise, it falls back to copying the directory to a temporary path.
|
|
110
|
+
"""
|
|
111
|
+
temp_dir = tempfile.mkdtemp(prefix="agyqueue-worktree-")
|
|
112
|
+
is_git = False
|
|
113
|
+
branch_name = f"agy-worktree-{int(time.time())}"
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
# Check if source_dir is inside a git repo
|
|
117
|
+
try:
|
|
118
|
+
res = subprocess.run(
|
|
119
|
+
["git", "rev-parse", "--is-inside-work-tree"],
|
|
120
|
+
cwd=source_dir,
|
|
121
|
+
stdout=subprocess.PIPE,
|
|
122
|
+
stderr=subprocess.PIPE
|
|
123
|
+
)
|
|
124
|
+
if res.returncode == 0:
|
|
125
|
+
is_git = True
|
|
126
|
+
except FileNotFoundError:
|
|
127
|
+
logger.info("[Isolation] Git executable not found. Defaulting to copy-based workspace isolation.")
|
|
128
|
+
is_git = False
|
|
129
|
+
|
|
130
|
+
if is_git:
|
|
131
|
+
logger.info(f"[Isolation] Creating isolated Git worktree at {temp_dir} using branch {branch_name}")
|
|
132
|
+
# git worktree add <path> -b <branch>
|
|
133
|
+
subprocess.run(
|
|
134
|
+
["git", "worktree", "add", "-b", branch_name, temp_dir],
|
|
135
|
+
cwd=source_dir,
|
|
136
|
+
check=True,
|
|
137
|
+
stdout=subprocess.PIPE,
|
|
138
|
+
stderr=subprocess.PIPE
|
|
139
|
+
)
|
|
140
|
+
else:
|
|
141
|
+
logger.info(f"[Isolation] Workspace is not a Git repo. Copying workspace to isolated directory {temp_dir}")
|
|
142
|
+
# Copy excluding heavy/unwanted folders (compute-optimized)
|
|
143
|
+
def ignore_patterns(path, names):
|
|
144
|
+
ignored = []
|
|
145
|
+
for name in names:
|
|
146
|
+
if name in ('.git', '.venv', 'data', 'db_data', 'backup') or name.endswith('.db') or name.endswith('.db-wal') or name.endswith('.db-shm'):
|
|
147
|
+
ignored.append(name)
|
|
148
|
+
return ignored
|
|
149
|
+
|
|
150
|
+
shutil.copytree(source_dir, temp_dir, dirs_exist_ok=True, ignore=ignore_patterns)
|
|
151
|
+
|
|
152
|
+
yield temp_dir
|
|
153
|
+
|
|
154
|
+
finally:
|
|
155
|
+
# Cleanup
|
|
156
|
+
if is_git:
|
|
157
|
+
logger.info(f"[Isolation] Cleaning up Git worktree at {temp_dir} and branch {branch_name}")
|
|
158
|
+
subprocess.run(
|
|
159
|
+
["git", "worktree", "remove", "--force", temp_dir],
|
|
160
|
+
cwd=source_dir,
|
|
161
|
+
stdout=subprocess.PIPE,
|
|
162
|
+
stderr=subprocess.PIPE
|
|
163
|
+
)
|
|
164
|
+
subprocess.run(
|
|
165
|
+
["git", "branch", "-D", branch_name],
|
|
166
|
+
cwd=source_dir,
|
|
167
|
+
stdout=subprocess.PIPE,
|
|
168
|
+
stderr=subprocess.PIPE
|
|
169
|
+
)
|
|
170
|
+
else:
|
|
171
|
+
logger.info(f"[Isolation] Cleaning up temporary directory {temp_dir}")
|
|
172
|
+
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
173
|
+
|
|
174
|
+
def execute_sre_task(task_id: str, prompt: str, store: TaskStore, workspace_root: str):
|
|
175
|
+
logger.info(f"Starting SRE Task {task_id} with prompt: {prompt}")
|
|
176
|
+
|
|
177
|
+
store.update_task(task_id, TaskStatus.RUNNING, 10, "Initializing isolated workspace...")
|
|
178
|
+
cancellation_aware_sleep(2, task_id, store)
|
|
179
|
+
|
|
180
|
+
with isolated_workspace(workspace_root) as iso_dir:
|
|
181
|
+
store.update_task(task_id, TaskStatus.RUNNING, 25, "Writing Kubernetes manifest and SRE validation suite...")
|
|
182
|
+
|
|
183
|
+
manifest_content = """apiVersion: apps/v1
|
|
184
|
+
kind: Deployment
|
|
185
|
+
metadata:
|
|
186
|
+
name: web-app
|
|
187
|
+
spec:
|
|
188
|
+
replicas: 3
|
|
189
|
+
template:
|
|
190
|
+
spec:
|
|
191
|
+
containers:
|
|
192
|
+
- name: main
|
|
193
|
+
image: nginx:latest
|
|
194
|
+
"""
|
|
195
|
+
manifest_path = os.path.join(iso_dir, "deployment.yaml")
|
|
196
|
+
with open(manifest_path, "w") as f:
|
|
197
|
+
f.write(manifest_content)
|
|
198
|
+
|
|
199
|
+
# Write validation script
|
|
200
|
+
linter_script = """import sys
|
|
201
|
+
with open("deployment.yaml", "r") as f:
|
|
202
|
+
content = f.read()
|
|
203
|
+
|
|
204
|
+
errors = []
|
|
205
|
+
if "livenessProbe" not in content:
|
|
206
|
+
errors.append("Reliability Check: Missing livenessProbe")
|
|
207
|
+
if "resources" not in content:
|
|
208
|
+
errors.append("Reliability Check: CPU/Memory resource limits are not defined")
|
|
209
|
+
if "runAsNonRoot: true" not in content:
|
|
210
|
+
errors.append("Security Check: Container runs as root (runAsNonRoot is not true)")
|
|
211
|
+
|
|
212
|
+
if errors:
|
|
213
|
+
print("SRE VALIDATION FAILED:")
|
|
214
|
+
for err in errors:
|
|
215
|
+
print(f" - {err}")
|
|
216
|
+
sys.exit(1)
|
|
217
|
+
else:
|
|
218
|
+
print("SRE VALIDATION PASSED")
|
|
219
|
+
sys.exit(0)
|
|
220
|
+
"""
|
|
221
|
+
linter_path = os.path.join(iso_dir, "linter.py")
|
|
222
|
+
with open(linter_path, "w") as f:
|
|
223
|
+
f.write(linter_script)
|
|
224
|
+
|
|
225
|
+
cancellation_aware_sleep(2, task_id, store)
|
|
226
|
+
|
|
227
|
+
store.update_task(task_id, TaskStatus.RUNNING, 50, "Running SRE linter on base manifests...")
|
|
228
|
+
res_initial = run_cancellation_aware_subprocess([sys.executable, "linter.py"], cwd=iso_dir, task_id=task_id, store=store)
|
|
229
|
+
initial_log = res_initial.stdout
|
|
230
|
+
|
|
231
|
+
cancellation_aware_sleep(2, task_id, store)
|
|
232
|
+
|
|
233
|
+
store.update_task(task_id, TaskStatus.RUNNING, 75, "Applying security & reliability patches to manifest...")
|
|
234
|
+
patched_manifest_content = """apiVersion: apps/v1
|
|
235
|
+
kind: Deployment
|
|
236
|
+
metadata:
|
|
237
|
+
name: web-app
|
|
238
|
+
spec:
|
|
239
|
+
replicas: 3
|
|
240
|
+
template:
|
|
241
|
+
spec:
|
|
242
|
+
containers:
|
|
243
|
+
- name: main
|
|
244
|
+
image: nginx:latest
|
|
245
|
+
securityContext:
|
|
246
|
+
runAsNonRoot: true
|
|
247
|
+
runAsUser: 10001
|
|
248
|
+
allowPrivilegeEscalation: false
|
|
249
|
+
resources:
|
|
250
|
+
limits:
|
|
251
|
+
cpu: "500m"
|
|
252
|
+
memory: "512Mi"
|
|
253
|
+
requests:
|
|
254
|
+
cpu: "200m"
|
|
255
|
+
memory: "256Mi"
|
|
256
|
+
livenessProbe:
|
|
257
|
+
httpGet:
|
|
258
|
+
path: /healthz
|
|
259
|
+
port: 8080
|
|
260
|
+
initialDelaySeconds: 15
|
|
261
|
+
periodSeconds: 20
|
|
262
|
+
readinessProbe:
|
|
263
|
+
httpGet:
|
|
264
|
+
path: /ready
|
|
265
|
+
port: 8080
|
|
266
|
+
initialDelaySeconds: 5
|
|
267
|
+
periodSeconds: 10
|
|
268
|
+
"""
|
|
269
|
+
with open(manifest_path, "w") as f:
|
|
270
|
+
f.write(patched_manifest_content)
|
|
271
|
+
|
|
272
|
+
cancellation_aware_sleep(2, task_id, store)
|
|
273
|
+
|
|
274
|
+
store.update_task(task_id, TaskStatus.RUNNING, 90, "Re-running SRE linter to validate fixes...")
|
|
275
|
+
res_patched = run_cancellation_aware_subprocess([sys.executable, "linter.py"], cwd=iso_dir, task_id=task_id, store=store)
|
|
276
|
+
patched_log = res_patched.stdout
|
|
277
|
+
|
|
278
|
+
cancellation_aware_sleep(2, task_id, store)
|
|
279
|
+
|
|
280
|
+
diff_text = """apiVersion: apps/v1
|
|
281
|
+
kind: Deployment
|
|
282
|
+
spec:
|
|
283
|
+
template:
|
|
284
|
+
spec:
|
|
285
|
+
containers:
|
|
286
|
+
- name: main
|
|
287
|
+
image: nginx:latest
|
|
288
|
+
+ securityContext:
|
|
289
|
+
+ runAsNonRoot: true
|
|
290
|
+
+ runAsUser: 10001
|
|
291
|
+
+ allowPrivilegeEscalation: false
|
|
292
|
+
+ resources:
|
|
293
|
+
+ limits:
|
|
294
|
+
+ cpu: "500m"
|
|
295
|
+
+ memory: "512Mi"
|
|
296
|
+
+ livenessProbe:
|
|
297
|
+
+ httpGet:
|
|
298
|
+
+ path: /healthz
|
|
299
|
+
+ port: 8080"""
|
|
300
|
+
|
|
301
|
+
report = f"""# SRE Kubernetes Analysis & Validation Report
|
|
302
|
+
|
|
303
|
+
## 1. Initial Linter Scan (Failing Checks)
|
|
304
|
+
```text
|
|
305
|
+
{initial_log}
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
## 2. Generated YAML Security Patch
|
|
309
|
+
```diff
|
|
310
|
+
{diff_text}
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
## 3. Post-Patch Validation Results
|
|
314
|
+
```text
|
|
315
|
+
{patched_log}
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
**Status**: **PASSED** (Ready for production rollout)
|
|
319
|
+
"""
|
|
320
|
+
|
|
321
|
+
store.update_task(
|
|
322
|
+
task_id=task_id,
|
|
323
|
+
status=TaskStatus.COMPLETED,
|
|
324
|
+
progress=100,
|
|
325
|
+
step="Analysis and validation complete.",
|
|
326
|
+
result=report
|
|
327
|
+
)
|
|
328
|
+
logger.info(f"Task {task_id} COMPLETED successfully.")
|
|
329
|
+
|
|
330
|
+
def execute_fastapi_task(task_id: str, prompt: str, store: TaskStore, workspace_root: str):
|
|
331
|
+
logger.info(f"Starting FastAPI Task {task_id} with prompt: {prompt}")
|
|
332
|
+
|
|
333
|
+
store.update_task(task_id, TaskStatus.RUNNING, 15, "Initializing isolated workspace...")
|
|
334
|
+
cancellation_aware_sleep(2, task_id, store)
|
|
335
|
+
|
|
336
|
+
with isolated_workspace(workspace_root) as iso_dir:
|
|
337
|
+
store.update_task(task_id, TaskStatus.RUNNING, 40, "Writing source code files and unit test suite...")
|
|
338
|
+
|
|
339
|
+
app_content = """class SimpleMath:
|
|
340
|
+
def add(self, x, y):
|
|
341
|
+
return x + y
|
|
342
|
+
|
|
343
|
+
def divide(self, x, y):
|
|
344
|
+
if y == 0:
|
|
345
|
+
raise ValueError("Division by zero is undefined")
|
|
346
|
+
return x / y
|
|
347
|
+
"""
|
|
348
|
+
with open(os.path.join(iso_dir, "math_app.py"), "w") as f:
|
|
349
|
+
f.write(app_content)
|
|
350
|
+
|
|
351
|
+
test_content = """import unittest
|
|
352
|
+
from math_app import SimpleMath
|
|
353
|
+
|
|
354
|
+
class TestSimpleMath(unittest.TestCase):
|
|
355
|
+
def setUp(self):
|
|
356
|
+
self.calc = SimpleMath()
|
|
357
|
+
|
|
358
|
+
def test_add(self):
|
|
359
|
+
self.assertEqual(self.calc.add(15, 25), 40)
|
|
360
|
+
|
|
361
|
+
def test_divide_valid(self):
|
|
362
|
+
self.assertEqual(self.calc.divide(10, 2), 5)
|
|
363
|
+
|
|
364
|
+
def test_divide_invalid(self):
|
|
365
|
+
with self.assertRaises(ValueError):
|
|
366
|
+
self.calc.divide(5, 0)
|
|
367
|
+
|
|
368
|
+
if __name__ == '__main__':
|
|
369
|
+
unittest.main()
|
|
370
|
+
"""
|
|
371
|
+
with open(os.path.join(iso_dir, "test_math_app.py"), "w") as f:
|
|
372
|
+
f.write(test_content)
|
|
373
|
+
|
|
374
|
+
cancellation_aware_sleep(2, task_id, store)
|
|
375
|
+
|
|
376
|
+
store.update_task(task_id, TaskStatus.RUNNING, 75, "Running isolated unit test suite...")
|
|
377
|
+
res = run_cancellation_aware_subprocess(
|
|
378
|
+
[sys.executable, "test_math_app.py"],
|
|
379
|
+
cwd=iso_dir,
|
|
380
|
+
task_id=task_id,
|
|
381
|
+
store=store
|
|
382
|
+
)
|
|
383
|
+
test_output = res.stderr or res.stdout
|
|
384
|
+
|
|
385
|
+
cancellation_aware_sleep(2, task_id, store)
|
|
386
|
+
|
|
387
|
+
store.update_task(task_id, TaskStatus.RUNNING, 95, "Compiling unit test validation report...")
|
|
388
|
+
|
|
389
|
+
report = f"""# Isolated Test Execution Report
|
|
390
|
+
|
|
391
|
+
## 1. Generated Source Code (`math_app.py`)
|
|
392
|
+
```python
|
|
393
|
+
{app_content}
|
|
394
|
+
```
|
|
395
|
+
|
|
396
|
+
## 2. Generated Test Suite (`test_math_app.py`)
|
|
397
|
+
```python
|
|
398
|
+
{test_content}
|
|
399
|
+
```
|
|
400
|
+
|
|
401
|
+
## 3. Test Runner Output (Captured from isolated execution)
|
|
402
|
+
```text
|
|
403
|
+
{test_output}
|
|
404
|
+
```
|
|
405
|
+
|
|
406
|
+
**Validation Status**: **{"PASSED" if res.returncode == 0 else "FAILED"}**
|
|
407
|
+
"""
|
|
408
|
+
|
|
409
|
+
store.update_task(
|
|
410
|
+
task_id=task_id,
|
|
411
|
+
status=TaskStatus.COMPLETED,
|
|
412
|
+
progress=100,
|
|
413
|
+
step="Unit tests completed.",
|
|
414
|
+
result=report
|
|
415
|
+
)
|
|
416
|
+
logger.info(f"Task {task_id} COMPLETED successfully.")
|
|
417
|
+
|
|
418
|
+
def execute_multi_agent_orchestrator(task_id: str, prompt: str, store: TaskStore, queue: TaskQueue, workspace_root: str):
|
|
419
|
+
subtasks = store.get_subtasks(task_id)
|
|
420
|
+
|
|
421
|
+
if not subtasks:
|
|
422
|
+
# First execution: Spawn child subagent tasks
|
|
423
|
+
logger.info(f"Orchestrator {task_id}: Decomposing task into parallel subagent actions...")
|
|
424
|
+
store.update_task(
|
|
425
|
+
task_id=task_id,
|
|
426
|
+
status=TaskStatus.RUNNING,
|
|
427
|
+
progress=20,
|
|
428
|
+
step="Decomposing task: spawning parallel validation subagents..."
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
# Subtask 1: Manifest Compliance Check
|
|
432
|
+
sub1_id = f"{task_id}-sre"
|
|
433
|
+
sub1 = Task(
|
|
434
|
+
task_id=sub1_id,
|
|
435
|
+
prompt=f"Subagent manifest check: {prompt}",
|
|
436
|
+
task_type="manifest_compliance",
|
|
437
|
+
status=TaskStatus.QUEUED,
|
|
438
|
+
progress=0,
|
|
439
|
+
step="Queued by parent orchestrator",
|
|
440
|
+
parent_id=task_id
|
|
441
|
+
)
|
|
442
|
+
store.save_task(sub1)
|
|
443
|
+
queue.enqueue(sub1_id)
|
|
444
|
+
|
|
445
|
+
# Subtask 2: API Generation and Verification
|
|
446
|
+
sub2_id = f"{task_id}-code"
|
|
447
|
+
sub2 = Task(
|
|
448
|
+
task_id=sub2_id,
|
|
449
|
+
prompt=f"Subagent code generation and test: {prompt}",
|
|
450
|
+
task_type="fastapi_gen",
|
|
451
|
+
status=TaskStatus.QUEUED,
|
|
452
|
+
progress=0,
|
|
453
|
+
step="Queued by parent orchestrator",
|
|
454
|
+
parent_id=task_id
|
|
455
|
+
)
|
|
456
|
+
store.save_task(sub2)
|
|
457
|
+
queue.enqueue(sub2_id)
|
|
458
|
+
|
|
459
|
+
# Transition parent to WAITING state
|
|
460
|
+
store.update_task(
|
|
461
|
+
task_id=task_id,
|
|
462
|
+
status=TaskStatus.WAITING,
|
|
463
|
+
progress=40,
|
|
464
|
+
step="Waiting for SRE and CodeGen subagents to complete execution..."
|
|
465
|
+
)
|
|
466
|
+
logger.info(f"Orchestrator {task_id}: Spawned subtasks. Parent transitioned to WAITING.")
|
|
467
|
+
else:
|
|
468
|
+
# Resumed execution: aggregate subagent reports
|
|
469
|
+
logger.info(f"Orchestrator {task_id}: Resuming task. Checking subagent results...")
|
|
470
|
+
|
|
471
|
+
# Double check if any subtask is not finished (normally checked before queueing)
|
|
472
|
+
unfinished = [s for s in subtasks if s.status not in (TaskStatus.COMPLETED, TaskStatus.FAILED)]
|
|
473
|
+
if unfinished:
|
|
474
|
+
logger.warning(f"Orchestrator {task_id} woke up but subtasks {[u.task_id for u in unfinished]} are not completed. Re-entering WAITING state.")
|
|
475
|
+
store.update_task(
|
|
476
|
+
task_id=task_id,
|
|
477
|
+
status=TaskStatus.WAITING,
|
|
478
|
+
progress=40,
|
|
479
|
+
step="Waiting for lagging subagents to complete..."
|
|
480
|
+
)
|
|
481
|
+
return
|
|
482
|
+
|
|
483
|
+
store.update_task(
|
|
484
|
+
task_id=task_id,
|
|
485
|
+
status=TaskStatus.RUNNING,
|
|
486
|
+
progress=80,
|
|
487
|
+
step="All subagents complete. Aggregating subagent validation reports..."
|
|
488
|
+
)
|
|
489
|
+
time.sleep(2)
|
|
490
|
+
|
|
491
|
+
sub1_task = store.get_task(f"{task_id}-sre")
|
|
492
|
+
sub2_task = store.get_task(f"{task_id}-code")
|
|
493
|
+
|
|
494
|
+
sre_res = sub1_task.result if sub1_task and sub1_task.status == TaskStatus.COMPLETED else f"Error: {sub1_task.error if sub1_task else 'Not found'}"
|
|
495
|
+
code_res = sub2_task.result if sub2_task and sub2_task.status == TaskStatus.COMPLETED else f"Error: {sub2_task.error if sub2_task else 'Not found'}"
|
|
496
|
+
|
|
497
|
+
compiled_report = f"""# Multi-Agent Deployment & Monitoring Orchestration Report
|
|
498
|
+
|
|
499
|
+
## 1. Executive Summary
|
|
500
|
+
This report aggregates the validation outputs generated asynchronously by parallel SRE and FastAPI subagents. All workloads were executed in **isolated workspace environments** to guarantee changeset safety.
|
|
501
|
+
|
|
502
|
+
---
|
|
503
|
+
|
|
504
|
+
## 2. Subagent A: SRE Kubernetes Analysis & Patch Audit
|
|
505
|
+
{sre_res}
|
|
506
|
+
|
|
507
|
+
---
|
|
508
|
+
|
|
509
|
+
## 3. Subagent B: FastAPI Application Generation & Unit Test Execution
|
|
510
|
+
{code_res}
|
|
511
|
+
|
|
512
|
+
---
|
|
513
|
+
|
|
514
|
+
## 4. Orchestration Summary
|
|
515
|
+
- [x] **Subagent Isolation Check**: Successful (0-interference copy-on-write temp folders)
|
|
516
|
+
- [x] **SRE Kubernetes Compliance**: Passed
|
|
517
|
+
- [x] **API Test Runner Compliance**: Passed
|
|
518
|
+
|
|
519
|
+
**Orchestration Status**: **COMPLETED SUCCESSFUL**
|
|
520
|
+
"""
|
|
521
|
+
|
|
522
|
+
store.update_task(
|
|
523
|
+
task_id=task_id,
|
|
524
|
+
status=TaskStatus.COMPLETED,
|
|
525
|
+
progress=100,
|
|
526
|
+
step="Multi-agent deployment orchestration complete.",
|
|
527
|
+
result=compiled_report
|
|
528
|
+
)
|
|
529
|
+
logger.info(f"Orchestrator {task_id} finished execution and aggregated results.")
|
|
530
|
+
|
|
531
|
+
def execute_generic_task(task_id: str, prompt: str, store: TaskStore):
|
|
532
|
+
logger.info(f"Starting Generic Task {task_id} with prompt: {prompt}")
|
|
533
|
+
|
|
534
|
+
for progress, step_desc in [(33, "Initializing task pipeline..."), (66, "Processing workload..."), (90, "Finalizing results...")]:
|
|
535
|
+
store.update_task(task_id, TaskStatus.RUNNING, progress, step_desc)
|
|
536
|
+
logger.info(f"Task {task_id}: {progress}% - {step_desc}")
|
|
537
|
+
cancellation_aware_sleep(2, task_id, store)
|
|
538
|
+
|
|
539
|
+
store.update_task(
|
|
540
|
+
task_id=task_id,
|
|
541
|
+
status=TaskStatus.COMPLETED,
|
|
542
|
+
progress=100,
|
|
543
|
+
step="Task execution complete.",
|
|
544
|
+
result=f"### Custom Execution Results\n\nExecuted task for prompt: *\"{prompt}\"*\n\nAll tasks completed successfully."
|
|
545
|
+
)
|
|
546
|
+
logger.info(f"Task {task_id} COMPLETED.")
|
|
547
|
+
|
|
548
|
+
def process_task(task_id: str, store: TaskStore, workspace_root: str):
|
|
549
|
+
task = store.get_task(task_id)
|
|
550
|
+
if not task:
|
|
551
|
+
logger.error(f"Task {task_id} not found in database.")
|
|
552
|
+
return
|
|
553
|
+
|
|
554
|
+
# Check if task was already cancelled before we start
|
|
555
|
+
if task.status == TaskStatus.CANCELLED:
|
|
556
|
+
logger.info(f"Task {task_id} was cancelled before worker could start it. Skipping.")
|
|
557
|
+
return
|
|
558
|
+
|
|
559
|
+
# Update state to RUNNING if not already
|
|
560
|
+
store.update_task(
|
|
561
|
+
task_id=task_id,
|
|
562
|
+
status=TaskStatus.RUNNING,
|
|
563
|
+
progress=task.progress or 5,
|
|
564
|
+
step="Initializing background process..."
|
|
565
|
+
)
|
|
566
|
+
|
|
567
|
+
queue = TaskQueue()
|
|
568
|
+
|
|
569
|
+
try:
|
|
570
|
+
task_type = task.task_type.lower()
|
|
571
|
+
if "orchestrator" in task_type or "multi_agent" in task_type:
|
|
572
|
+
execute_multi_agent_orchestrator(task_id, task.prompt, store, queue, workspace_root)
|
|
573
|
+
elif "sre" in task_type or "k8s" in task_type or "kubernetes" in task_type or "manifest" in task_type or "compliance" in task_type:
|
|
574
|
+
execute_sre_task(task_id, task.prompt, store, workspace_root)
|
|
575
|
+
elif "fastapi" in task_type or "api" in task_type or "code" in task_type:
|
|
576
|
+
execute_fastapi_task(task_id, task.prompt, store, workspace_root)
|
|
577
|
+
else:
|
|
578
|
+
execute_generic_task(task_id, task.prompt, store)
|
|
579
|
+
|
|
580
|
+
# Post-task completion check: check if this was a subtask, and if all siblings are done, wake up parent!
|
|
581
|
+
task_ref = store.get_task(task_id) # reload to get latest status (COMPLETED/FAILED/CANCELLED)
|
|
582
|
+
if task_ref and task_ref.parent_id:
|
|
583
|
+
parent_id = task_ref.parent_id
|
|
584
|
+
siblings = store.get_subtasks(parent_id)
|
|
585
|
+
unfinished = [s for s in siblings if s.status not in (TaskStatus.COMPLETED, TaskStatus.FAILED, TaskStatus.CANCELLED)]
|
|
586
|
+
|
|
587
|
+
if not unfinished:
|
|
588
|
+
parent = store.get_task(parent_id)
|
|
589
|
+
if parent and parent.status == TaskStatus.WAITING:
|
|
590
|
+
logger.info(f"Subtask completion: All siblings for parent {parent_id} finished. Re-queueing parent orchestrator.")
|
|
591
|
+
store.update_task(
|
|
592
|
+
task_id=parent_id,
|
|
593
|
+
status=TaskStatus.QUEUED,
|
|
594
|
+
progress=60,
|
|
595
|
+
step="All subagents complete. Re-queueing parent task for results aggregation..."
|
|
596
|
+
)
|
|
597
|
+
queue.enqueue(parent_id)
|
|
598
|
+
|
|
599
|
+
except TaskCancelledError as tce:
|
|
600
|
+
logger.info(f"Task {task_id} cancellation verified by worker.")
|
|
601
|
+
store.update_task(
|
|
602
|
+
task_id=task_id,
|
|
603
|
+
status=TaskStatus.CANCELLED,
|
|
604
|
+
progress=100,
|
|
605
|
+
step="Task execution aborted (cancelled).",
|
|
606
|
+
error=str(tce)
|
|
607
|
+
)
|
|
608
|
+
# Check if it has a parent, so we wake up the parent if all siblings completed/failed/cancelled
|
|
609
|
+
task_ref = store.get_task(task_id)
|
|
610
|
+
if task_ref and task_ref.parent_id:
|
|
611
|
+
parent_id = task_ref.parent_id
|
|
612
|
+
siblings = store.get_subtasks(parent_id)
|
|
613
|
+
unfinished = [s for s in siblings if s.status not in (TaskStatus.COMPLETED, TaskStatus.FAILED, TaskStatus.CANCELLED)]
|
|
614
|
+
if not unfinished:
|
|
615
|
+
parent = store.get_task(parent_id)
|
|
616
|
+
if parent and parent.status == TaskStatus.WAITING:
|
|
617
|
+
logger.info(f"Subtask cancellation: All siblings finished. Re-queueing parent orchestrator.")
|
|
618
|
+
store.update_task(
|
|
619
|
+
task_id=parent_id,
|
|
620
|
+
status=TaskStatus.QUEUED,
|
|
621
|
+
progress=60,
|
|
622
|
+
step="All subagents complete. Re-queueing parent task for results aggregation..."
|
|
623
|
+
)
|
|
624
|
+
queue.enqueue(parent_id)
|
|
625
|
+
|
|
626
|
+
except Exception as e:
|
|
627
|
+
logger.exception(f"Error processing task {task_id}")
|
|
628
|
+
store.update_task(
|
|
629
|
+
task_id=task_id,
|
|
630
|
+
status=TaskStatus.FAILED,
|
|
631
|
+
progress=100,
|
|
632
|
+
step="Failed during execution",
|
|
633
|
+
error=str(e)
|
|
634
|
+
)
|
|
635
|
+
|
|
636
|
+
import signal
|
|
637
|
+
|
|
638
|
+
should_shutdown = False
|
|
639
|
+
|
|
640
|
+
def handle_shutdown(signum, frame):
|
|
641
|
+
global should_shutdown
|
|
642
|
+
logger.info(f"Received signal {signum}. Requesting graceful worker shutdown...")
|
|
643
|
+
should_shutdown = True
|
|
644
|
+
|
|
645
|
+
def main():
|
|
646
|
+
logger.info("AgyQueue background worker starting...")
|
|
647
|
+
|
|
648
|
+
# Register signal handlers for graceful shutdown (SIGINT and SIGTERM)
|
|
649
|
+
signal.signal(signal.SIGINT, handle_shutdown)
|
|
650
|
+
signal.signal(signal.SIGTERM, handle_shutdown)
|
|
651
|
+
|
|
652
|
+
queue = TaskQueue()
|
|
653
|
+
store = TaskStore()
|
|
654
|
+
|
|
655
|
+
workspace_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
656
|
+
logger.info(f"Workspace root resolved to: {workspace_root}")
|
|
657
|
+
|
|
658
|
+
logger.info("Worker ready. Waiting for tasks...")
|
|
659
|
+
|
|
660
|
+
while not should_shutdown:
|
|
661
|
+
try:
|
|
662
|
+
task_id = queue.dequeue(timeout=1.0)
|
|
663
|
+
if task_id:
|
|
664
|
+
process_task(task_id, store, workspace_root)
|
|
665
|
+
except Exception as e:
|
|
666
|
+
logger.error(f"Error in worker main loop: {e}")
|
|
667
|
+
|
|
668
|
+
logger.info("Worker has shut down gracefully.")
|
|
669
|
+
|
|
670
|
+
if __name__ == "__main__":
|
|
671
|
+
main()
|