juno-code 1.0.47 → 1.0.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -8
- package/dist/bin/cli.d.mts +17 -0
- package/dist/bin/cli.d.ts +17 -0
- package/dist/bin/cli.js +5606 -17514
- package/dist/bin/cli.js.map +1 -1
- package/dist/bin/cli.mjs +5647 -17553
- package/dist/bin/cli.mjs.map +1 -1
- package/dist/bin/feedback-collector.d.mts +2 -0
- package/dist/bin/feedback-collector.d.ts +2 -0
- package/dist/bin/feedback-collector.js.map +1 -1
- package/dist/bin/feedback-collector.mjs.map +1 -1
- package/dist/index.d.mts +2107 -0
- package/dist/index.d.ts +2107 -0
- package/dist/index.js +3760 -14730
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +3760 -14537
- package/dist/index.mjs.map +1 -1
- package/dist/templates/extensions/pi/juno-skill-preprocessor.ts +239 -0
- package/dist/templates/scripts/__pycache__/github.cpython-313.pyc +0 -0
- package/dist/templates/scripts/__pycache__/parallel_runner.cpython-313.pyc +0 -0
- package/dist/templates/scripts/__pycache__/slack_respond.cpython-313.pyc +0 -0
- package/dist/templates/scripts/kanban.sh +18 -4
- package/dist/templates/scripts/parallel_runner.sh +2242 -0
- package/dist/templates/services/README.md +61 -1
- package/dist/templates/services/__pycache__/claude.cpython-313.pyc +0 -0
- package/dist/templates/services/__pycache__/codex.cpython-313.pyc +0 -0
- package/dist/templates/services/__pycache__/pi.cpython-313.pyc +0 -0
- package/dist/templates/services/claude.py +132 -33
- package/dist/templates/services/codex.py +179 -66
- package/dist/templates/services/gemini.py +117 -27
- package/dist/templates/services/pi.py +1753 -0
- package/dist/templates/skills/claude/plan-kanban-tasks/SKILL.md +14 -7
- package/dist/templates/skills/claude/ralph-loop/SKILL.md +18 -22
- package/dist/templates/skills/claude/ralph-loop/references/first_check.md +15 -14
- package/dist/templates/skills/claude/ralph-loop/references/implement.md +17 -17
- package/dist/templates/skills/claude/ralph-loop/scripts/kanban.sh +18 -4
- package/dist/templates/skills/claude/understand-project/SKILL.md +14 -7
- package/dist/templates/skills/codex/ralph-loop/SKILL.md +18 -22
- package/dist/templates/skills/codex/ralph-loop/references/first_check.md +15 -14
- package/dist/templates/skills/codex/ralph-loop/references/implement.md +17 -17
- package/dist/templates/skills/codex/ralph-loop/scripts/kanban.sh +18 -4
- package/dist/templates/skills/pi/.gitkeep +0 -0
- package/dist/templates/skills/pi/plan-kanban-tasks/SKILL.md +32 -0
- package/dist/templates/skills/pi/ralph-loop/SKILL.md +39 -0
- package/dist/templates/skills/pi/ralph-loop/references/first_check.md +21 -0
- package/dist/templates/skills/pi/ralph-loop/references/implement.md +99 -0
- package/dist/templates/skills/pi/understand-project/SKILL.md +46 -0
- package/package.json +20 -42
- package/dist/templates/scripts/__pycache__/attachment_downloader.cpython-38.pyc +0 -0
- package/dist/templates/scripts/__pycache__/github.cpython-38.pyc +0 -0
- package/dist/templates/scripts/__pycache__/slack_fetch.cpython-38.pyc +0 -0
- package/dist/templates/scripts/__pycache__/slack_state.cpython-38.pyc +0 -0
- package/dist/templates/services/__pycache__/claude.cpython-38.pyc +0 -0
- package/dist/templates/services/__pycache__/codex.cpython-38.pyc +0 -0
|
@@ -0,0 +1,2242 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Run juno-code tasks in parallel with queue management, output extraction, and aggregation.
|
|
4
|
+
|
|
5
|
+
Modes:
|
|
6
|
+
Headless (default): ThreadPoolExecutor, output to log files only.
|
|
7
|
+
Tmux windows: Each worker = tmux window, coordinator = window 0.
|
|
8
|
+
Tmux panes: Workers as split panes, coordinator = top pane.
|
|
9
|
+
|
|
10
|
+
Usage:
|
|
11
|
+
# Kanban mode (task IDs as input)
|
|
12
|
+
./parallel_runner.sh --kanban TASK1,TASK2,TASK3 [--parallel 3]
|
|
13
|
+
./parallel_runner.sh --kanban TASK1 TASK2 TASK3 --parallel 2
|
|
14
|
+
./parallel_runner.sh --kanban "TASK1 TASK2 TASK3"
|
|
15
|
+
./parallel_runner.sh --kanban T1 T2 --prompt-file instructions.md
|
|
16
|
+
|
|
17
|
+
# Generic items mode (any list, requires --prompt-file with {{item}})
|
|
18
|
+
./parallel_runner.sh --items "url1,url2,url3" --prompt-file crawl.md
|
|
19
|
+
./parallel_runner.sh --items shop1 shop2 shop3 --prompt-file analyze.md
|
|
20
|
+
|
|
21
|
+
# File mode (structured data)
|
|
22
|
+
./parallel_runner.sh --items-file data.jsonl --prompt-file analyze.md
|
|
23
|
+
./parallel_runner.sh --items-file data.csv --strict --file-format csv
|
|
24
|
+
|
|
25
|
+
# Common options
|
|
26
|
+
./parallel_runner.sh --tmux --kanban T1 T2 T3 --parallel 2
|
|
27
|
+
./parallel_runner.sh --tmux panes --kanban T1 T2 --parallel 2
|
|
28
|
+
./parallel_runner.sh --tmux --kanban T1 T2 --name my-batch
|
|
29
|
+
./parallel_runner.sh -s codex --kanban T1 T2
|
|
30
|
+
./parallel_runner.sh -s pi -m gpt-5 --kanban T1 T2
|
|
31
|
+
./parallel_runner.sh --stop # stop only running session
|
|
32
|
+
./parallel_runner.sh --stop --name my-batch # stop specific session
|
|
33
|
+
./parallel_runner.sh --stop-all # stop all sessions
|
|
34
|
+
|
|
35
|
+
Input modes (exactly one required, unless --stop/--stop-all):
|
|
36
|
+
--kanban Kanban task IDs. {{task_id}} = {{item}} = the ID.
|
|
37
|
+
--kanban-filter Filter string passed to kanban.sh list. Internally runs
|
|
38
|
+
kanban.sh list {filters} -f json --raw and extracts IDs.
|
|
39
|
+
--items Generic item list (comma/space separated). Auto-generates item-001 IDs.
|
|
40
|
+
--items-file Path to file (JSONL, CSV, TSV, XLSX). Format auto-detected by extension.
|
|
41
|
+
|
|
42
|
+
File options (for --items-file):
|
|
43
|
+
--format Force file format: jsonl, csv, tsv, xlsx (default: auto-detect).
|
|
44
|
+
--no-header CSV/TSV/XLSX: treat first row as data, not column headers.
|
|
45
|
+
--chunk-size Records per item (default: 1). >1 groups records into a JSON array.
|
|
46
|
+
--start First record to process, 1-indexed after header (default: 1).
|
|
47
|
+
--end Last record to process, inclusive (default: end of file).
|
|
48
|
+
|
|
49
|
+
Output extraction:
|
|
50
|
+
--file-format Expected output format (e.g., json, csv, md). Sets {{file_format}} placeholder.
|
|
51
|
+
--strict Extract response from fenced code block in output.
|
|
52
|
+
Writes extracted content to {output_dir}/{task_id}.{file_format}.
|
|
53
|
+
Marks task as ERROR if code block not found. Requires --file-format.
|
|
54
|
+
|
|
55
|
+
Arguments:
|
|
56
|
+
--parallel Max concurrent subprocesses (default: 3)
|
|
57
|
+
-s, --service Backend service: claude, codex, pi (default: claude). Env: JUNO_SERVICE.
|
|
58
|
+
-m, --model Model override. Env: JUNO_MODEL.
|
|
59
|
+
--env Environment overrides. KEY=VALUE pairs or path to .env file.
|
|
60
|
+
--prompt-file Path to a file whose content is appended to the prompt.
|
|
61
|
+
Re-read per task. Placeholders: {{task_id}}, {{item}}, {{file_format}}.
|
|
62
|
+
--tmux Run in tmux mode. 'windows' (default) or 'panes' (side-by-side).
|
|
63
|
+
--name Session name (default: auto-generated batch-N). Tmux session = pc-{name}.
|
|
64
|
+
--output-dir Structured output directory. Default: /tmp/juno-code-sessions/{date}/{run_id}.
|
|
65
|
+
--stop Stop a session. Uses --name if provided, otherwise auto-detects.
|
|
66
|
+
--stop-all Stop ALL running sessions.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
import argparse
|
|
70
|
+
import csv
|
|
71
|
+
import io
|
|
72
|
+
import json
|
|
73
|
+
import os
|
|
74
|
+
import random
|
|
75
|
+
import re
|
|
76
|
+
import shlex
|
|
77
|
+
import shutil
|
|
78
|
+
import signal
|
|
79
|
+
import string
|
|
80
|
+
import subprocess
|
|
81
|
+
import sys
|
|
82
|
+
import textwrap
|
|
83
|
+
import threading
|
|
84
|
+
import time
|
|
85
|
+
import uuid
|
|
86
|
+
from collections import deque
|
|
87
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
88
|
+
from dataclasses import dataclass
|
|
89
|
+
from datetime import datetime
|
|
90
|
+
from enum import Enum
|
|
91
|
+
from pathlib import Path
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
SCRIPT_DIR = Path(__file__).resolve().parent
|
|
95
|
+
# _log_base is the flat root for discoverable session files (PID, pause, dashboard).
|
|
96
|
+
# LOG_DIR (computed at startup in main()) nests under _log_base/{date}/{run_id}/
|
|
97
|
+
# for per-task logs and the combined log — isolating concurrent/repeated runs.
|
|
98
|
+
_log_base = SCRIPT_DIR / "logs"
|
|
99
|
+
LOG_DIR = _log_base # overwritten in main() with run-ID path
|
|
100
|
+
COMBINED_LOG = LOG_DIR / "parallel_runner.log" # overwritten in main()
|
|
101
|
+
|
|
102
|
+
# 5-char alphanumeric run ID, generated once at startup in main()
|
|
103
|
+
_run_id = ""
|
|
104
|
+
|
|
105
|
+
# Thread-safe lock for writing to the shared combined log
|
|
106
|
+
_log_lock = threading.Lock()
|
|
107
|
+
|
|
108
|
+
# Shared counters for remaining-task tracking
|
|
109
|
+
_completed_count = 0
|
|
110
|
+
_completed_lock = threading.Lock()
|
|
111
|
+
_total_tasks = 0
|
|
112
|
+
|
|
113
|
+
# Per-task elapsed times
|
|
114
|
+
_task_times = {}
|
|
115
|
+
_task_times_lock = threading.Lock()
|
|
116
|
+
|
|
117
|
+
# Shutdown flag for graceful exit (set by signal handlers)
|
|
118
|
+
_shutdown_event = threading.Event()
|
|
119
|
+
|
|
120
|
+
# --- Color system for task identification ---
|
|
121
|
+
# ANSI 256-color codes picked for high contrast between consecutive colors,
|
|
122
|
+
# visibility on both dark and light terminal backgrounds.
|
|
123
|
+
_TASK_COLORS = [
|
|
124
|
+
196, # red
|
|
125
|
+
39, # dodger blue
|
|
126
|
+
208, # orange
|
|
127
|
+
35, # cyan-green
|
|
128
|
+
201, # magenta/pink
|
|
129
|
+
220, # gold
|
|
130
|
+
27, # blue
|
|
131
|
+
118, # bright green
|
|
132
|
+
163, # rose
|
|
133
|
+
45, # turquoise
|
|
134
|
+
214, # dark orange
|
|
135
|
+
99, # purple
|
|
136
|
+
82, # lime
|
|
137
|
+
197, # hot pink
|
|
138
|
+
33, # royal blue
|
|
139
|
+
215, # sandy orange
|
|
140
|
+
48, # sea green
|
|
141
|
+
135, # medium purple
|
|
142
|
+
226, # yellow
|
|
143
|
+
69, # cornflower blue
|
|
144
|
+
]
|
|
145
|
+
_RESET = "\033[0m"
|
|
146
|
+
|
|
147
|
+
# --- Service / model defaults ---
|
|
148
|
+
_VALID_SERVICES = ("claude", "codex", "pi")
|
|
149
|
+
_DEFAULT_SERVICE = "claude"
|
|
150
|
+
_SERVICE_DEFAULT_MODEL = {
|
|
151
|
+
"claude": ":sonnet",
|
|
152
|
+
"codex": ":codex",
|
|
153
|
+
"pi": "openai-codex/gpt-5.3-codex",
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
# Resolved environment overrides from --env args (populated at parse time)
|
|
157
|
+
_env_overrides = {}
|
|
158
|
+
|
|
159
|
+
# Map task_id -> color (assigned at startup)
|
|
160
|
+
_task_color_map = {}
|
|
161
|
+
|
|
162
|
+
# Map task_id -> item value (the full input data for each task)
|
|
163
|
+
# For --kanban: item == task_id. For --items/--items-file: item is the real data.
|
|
164
|
+
_item_map = {}
|
|
165
|
+
|
|
166
|
+
# Python helper script piped via tmux pipe-pane to format output into log files.
|
|
167
|
+
# argv: task_id, task_log_path, combined_log_path, ansi_color_code
|
|
168
|
+
# Combined log gets colored dot; task log stays plain.
|
|
169
|
+
_LOG_PIPE_SCRIPT = r"""#!/usr/bin/env python3
|
|
170
|
+
import re, sys
|
|
171
|
+
from datetime import datetime
|
|
172
|
+
|
|
173
|
+
task_id = sys.argv[1]
|
|
174
|
+
task_log_path = sys.argv[2]
|
|
175
|
+
combined_log_path = sys.argv[3]
|
|
176
|
+
color_code = sys.argv[4] if len(sys.argv) > 4 else '7'
|
|
177
|
+
ansi_re = re.compile(r'\x1b\[[0-9;]*[a-zA-Z]')
|
|
178
|
+
dot = '\033[38;5;%sm\u25cf\033[0m' % color_code # colored dot
|
|
179
|
+
|
|
180
|
+
with open(task_log_path, 'w') as tl, open(combined_log_path, 'a') as cl:
|
|
181
|
+
while True:
|
|
182
|
+
line = sys.stdin.readline()
|
|
183
|
+
if not line:
|
|
184
|
+
break
|
|
185
|
+
line = line.rstrip('\n\r')
|
|
186
|
+
if not line:
|
|
187
|
+
continue
|
|
188
|
+
ts = datetime.now().strftime('%H:%M:%S')
|
|
189
|
+
clean = ansi_re.sub('', line)
|
|
190
|
+
tl.write('[%s] [%s] %s\n' % (ts, task_id, clean))
|
|
191
|
+
tl.flush()
|
|
192
|
+
cl.write('[%s] %s [%s] %s\n' % (ts, dot, task_id, line))
|
|
193
|
+
cl.flush()
|
|
194
|
+
"""
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
# ---------------------------------------------------------------------------
|
|
198
|
+
# Per-session file helpers
|
|
199
|
+
# ---------------------------------------------------------------------------
|
|
200
|
+
|
|
201
|
+
def _session_name_to_tmux(name):
|
|
202
|
+
"""Convert session name to tmux session name."""
|
|
203
|
+
return f"pc-{name}"
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def _dashboard_file(name):
|
|
207
|
+
return _log_base / f".dashboard_{name}"
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _pause_file(name):
|
|
211
|
+
return _log_base / f".pause_{name}"
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _pid_file(name):
|
|
215
|
+
return _log_base / f".orchestrator_pid_{name}"
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _orchestrator_log(name):
|
|
219
|
+
return LOG_DIR / f"orchestrator_{name}.log"
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def _tmp_dir(name):
|
|
223
|
+
return Path(f"/tmp/pc-{name}")
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _write_log_pipe_helper(name):
|
|
227
|
+
"""Write the Python log-pipe helper to /tmp (once per session)."""
|
|
228
|
+
tmp = _tmp_dir(name)
|
|
229
|
+
tmp.mkdir(parents=True, exist_ok=True)
|
|
230
|
+
helper_path = tmp / "log_pipe.py"
|
|
231
|
+
helper_path.write_text(_LOG_PIPE_SCRIPT)
|
|
232
|
+
return str(helper_path)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _resolve_service_model(args):
|
|
236
|
+
"""Resolve service and model from CLI args > env vars > defaults."""
|
|
237
|
+
service = getattr(args, "service", None) or os.environ.get("JUNO_SERVICE") or _DEFAULT_SERVICE
|
|
238
|
+
service = service.lower()
|
|
239
|
+
if service not in _VALID_SERVICES:
|
|
240
|
+
print(f"ERROR: Invalid service '{service}'. Must be one of: {', '.join(_VALID_SERVICES)}",
|
|
241
|
+
file=sys.stderr)
|
|
242
|
+
sys.exit(2)
|
|
243
|
+
model = getattr(args, "model", None) or os.environ.get("JUNO_MODEL") or _SERVICE_DEFAULT_MODEL[service]
|
|
244
|
+
return service, model
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _parse_env_file(path):
|
|
248
|
+
"""Parse a .env file into a dict. Supports KEY=VALUE, comments (#), empty lines."""
|
|
249
|
+
env = {}
|
|
250
|
+
for line_num, raw_line in enumerate(
|
|
251
|
+
Path(path).read_text(encoding="utf-8").splitlines(), 1
|
|
252
|
+
):
|
|
253
|
+
line = raw_line.strip()
|
|
254
|
+
if not line or line.startswith("#"):
|
|
255
|
+
continue
|
|
256
|
+
if "=" not in line:
|
|
257
|
+
print(f"WARNING: Skipping invalid line {line_num} in {path}: {line}",
|
|
258
|
+
file=sys.stderr)
|
|
259
|
+
continue
|
|
260
|
+
key, _, value = line.partition("=")
|
|
261
|
+
key = key.strip()
|
|
262
|
+
value = value.strip()
|
|
263
|
+
if len(value) >= 2 and value[0] == value[-1] and value[0] in ('"', "'"):
|
|
264
|
+
value = value[1:-1]
|
|
265
|
+
env[key] = value
|
|
266
|
+
return env
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _resolve_env_overrides(env_args):
|
|
270
|
+
"""Resolve --env arguments into a merged dict."""
|
|
271
|
+
overrides = {}
|
|
272
|
+
if not env_args:
|
|
273
|
+
return overrides
|
|
274
|
+
for arg in env_args:
|
|
275
|
+
if "=" in arg:
|
|
276
|
+
key_part = arg.split("=", 1)[0]
|
|
277
|
+
if Path(key_part).exists() and not key_part.replace("_", "").replace("-", "").isalnum():
|
|
278
|
+
overrides.update(_parse_env_file(arg))
|
|
279
|
+
else:
|
|
280
|
+
key, _, value = arg.partition("=")
|
|
281
|
+
overrides[key.strip()] = value.strip()
|
|
282
|
+
else:
|
|
283
|
+
path_obj = Path(arg)
|
|
284
|
+
if not path_obj.exists():
|
|
285
|
+
print(f"ERROR: Env file not found: {arg}", file=sys.stderr)
|
|
286
|
+
sys.exit(2)
|
|
287
|
+
overrides.update(_parse_env_file(arg))
|
|
288
|
+
return overrides
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _build_process_env(extra_capture_env=None):
|
|
292
|
+
"""Build the environment dict for a child process."""
|
|
293
|
+
env = os.environ.copy()
|
|
294
|
+
env.update(_env_overrides)
|
|
295
|
+
if extra_capture_env:
|
|
296
|
+
env.update(extra_capture_env)
|
|
297
|
+
return env
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def _generate_env_exports():
|
|
301
|
+
"""Generate shell export lines for the full process environment."""
|
|
302
|
+
merged = os.environ.copy()
|
|
303
|
+
merged.update(_env_overrides)
|
|
304
|
+
lines = []
|
|
305
|
+
for key, value in sorted(merged.items()):
|
|
306
|
+
if key in ("TERM_SESSION_ID", "TMUX", "TMUX_PANE", "STY", "WINDOW",
|
|
307
|
+
"SHLVL", "OLDPWD", "_"):
|
|
308
|
+
continue
|
|
309
|
+
lines.append(f"export {key}={shlex.quote(value)}")
|
|
310
|
+
return "\n".join(lines)
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
# ---------------------------------------------------------------------------
|
|
314
|
+
# File parsing — multi-format pipeline
|
|
315
|
+
# ---------------------------------------------------------------------------
|
|
316
|
+
|
|
317
|
+
_FORMAT_EXTENSIONS = {
|
|
318
|
+
".jsonl": "jsonl",
|
|
319
|
+
".ndjson": "jsonl",
|
|
320
|
+
".csv": "csv",
|
|
321
|
+
".tsv": "tsv",
|
|
322
|
+
".xlsx": "xlsx",
|
|
323
|
+
}
|
|
324
|
+
_VALID_FORMATS = ("jsonl", "csv", "tsv", "xlsx")
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def _detect_format(path, format_override=None):
|
|
328
|
+
"""Detect file format from extension or --format override."""
|
|
329
|
+
if format_override:
|
|
330
|
+
fmt = format_override.lower()
|
|
331
|
+
if fmt not in _VALID_FORMATS:
|
|
332
|
+
print(f"ERROR: Unknown format '{fmt}'. Must be one of: {', '.join(_VALID_FORMATS)}",
|
|
333
|
+
file=sys.stderr)
|
|
334
|
+
sys.exit(2)
|
|
335
|
+
return fmt
|
|
336
|
+
ext = Path(path).suffix.lower()
|
|
337
|
+
fmt = _FORMAT_EXTENSIONS.get(ext)
|
|
338
|
+
if not fmt:
|
|
339
|
+
print(f"ERROR: Cannot detect format from extension '{ext}'. "
|
|
340
|
+
f"Use --format to specify one of: {', '.join(_VALID_FORMATS)}",
|
|
341
|
+
file=sys.stderr)
|
|
342
|
+
sys.exit(2)
|
|
343
|
+
return fmt
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def _record_to_item(record):
|
|
347
|
+
"""Convert a parsed record to its {{item}} string representation."""
|
|
348
|
+
if isinstance(record, str):
|
|
349
|
+
return record
|
|
350
|
+
return json.dumps(record, ensure_ascii=False)
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def _parse_jsonl(path):
|
|
354
|
+
"""Parse JSONL/NDJSON into a list of parsed values."""
|
|
355
|
+
records = []
|
|
356
|
+
for line_num, raw_line in enumerate(
|
|
357
|
+
Path(path).read_text(encoding="utf-8").splitlines(), 1
|
|
358
|
+
):
|
|
359
|
+
line = raw_line.strip()
|
|
360
|
+
if not line or line.startswith("#"):
|
|
361
|
+
continue
|
|
362
|
+
try:
|
|
363
|
+
records.append(json.loads(line))
|
|
364
|
+
except json.JSONDecodeError as e:
|
|
365
|
+
print(f"ERROR: Invalid JSON at {path}:{line_num}: {e}", file=sys.stderr)
|
|
366
|
+
sys.exit(2)
|
|
367
|
+
return records
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def _parse_csv(path, has_header=True, delimiter=","):
|
|
371
|
+
"""Parse CSV/TSV into a list of records (dicts if header, lists otherwise)."""
|
|
372
|
+
text = Path(path).read_text(encoding="utf-8")
|
|
373
|
+
reader = csv.reader(io.StringIO(text), delimiter=delimiter)
|
|
374
|
+
rows = list(reader)
|
|
375
|
+
if not rows:
|
|
376
|
+
return []
|
|
377
|
+
if has_header:
|
|
378
|
+
headers = rows[0]
|
|
379
|
+
return [dict(zip(headers, row)) for row in rows[1:]]
|
|
380
|
+
return [row for row in rows]
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def _parse_xlsx(path, has_header=True):
|
|
384
|
+
"""Parse XLSX into a list of records (dicts if header, lists otherwise)."""
|
|
385
|
+
try:
|
|
386
|
+
import openpyxl
|
|
387
|
+
except ImportError:
|
|
388
|
+
print("ERROR: openpyxl is required for XLSX files. Install with:\n"
|
|
389
|
+
" pip install openpyxl", file=sys.stderr)
|
|
390
|
+
sys.exit(2)
|
|
391
|
+
wb = openpyxl.load_workbook(path, read_only=True, data_only=True)
|
|
392
|
+
ws = wb.active
|
|
393
|
+
rows = []
|
|
394
|
+
for row in ws.iter_rows(values_only=True):
|
|
395
|
+
rows.append([cell if cell is not None else "" for cell in row])
|
|
396
|
+
wb.close()
|
|
397
|
+
if not rows:
|
|
398
|
+
return []
|
|
399
|
+
if has_header:
|
|
400
|
+
headers = [str(h) for h in rows[0]]
|
|
401
|
+
return [dict(zip(headers, row)) for row in rows[1:]]
|
|
402
|
+
return rows
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
def _parse_items_file(path, format_override=None, has_header=True,
|
|
406
|
+
start=None, end=None, chunk_size=1):
|
|
407
|
+
"""Full pipeline: parse -> slice -> chunk -> return list of item strings."""
|
|
408
|
+
file_path = Path(path)
|
|
409
|
+
if not file_path.exists():
|
|
410
|
+
print(f"ERROR: Items file not found: {path}", file=sys.stderr)
|
|
411
|
+
sys.exit(2)
|
|
412
|
+
|
|
413
|
+
fmt = _detect_format(path, format_override)
|
|
414
|
+
|
|
415
|
+
if fmt == "jsonl":
|
|
416
|
+
records = _parse_jsonl(path)
|
|
417
|
+
elif fmt == "csv":
|
|
418
|
+
records = _parse_csv(path, has_header=has_header, delimiter=",")
|
|
419
|
+
elif fmt == "tsv":
|
|
420
|
+
records = _parse_csv(path, has_header=has_header, delimiter="\t")
|
|
421
|
+
elif fmt == "xlsx":
|
|
422
|
+
records = _parse_xlsx(path, has_header=has_header)
|
|
423
|
+
else:
|
|
424
|
+
print(f"ERROR: Unsupported format '{fmt}'", file=sys.stderr)
|
|
425
|
+
sys.exit(2)
|
|
426
|
+
|
|
427
|
+
if not records:
|
|
428
|
+
print(f"ERROR: No records found in {path}", file=sys.stderr)
|
|
429
|
+
sys.exit(2)
|
|
430
|
+
|
|
431
|
+
# Slice: --start and --end (1-indexed, inclusive, after header)
|
|
432
|
+
start_idx = (start - 1) if start and start >= 1 else 0
|
|
433
|
+
end_idx = end if end else len(records)
|
|
434
|
+
records = records[start_idx:end_idx]
|
|
435
|
+
|
|
436
|
+
if not records:
|
|
437
|
+
print(f"ERROR: No records in range --start {start} --end {end} "
|
|
438
|
+
f"(file has {end_idx} data rows)", file=sys.stderr)
|
|
439
|
+
sys.exit(2)
|
|
440
|
+
|
|
441
|
+
# Chunk: group records into chunks of --chunk-size
|
|
442
|
+
items = []
|
|
443
|
+
for i in range(0, len(records), chunk_size):
|
|
444
|
+
chunk = records[i:i + chunk_size]
|
|
445
|
+
if chunk_size == 1:
|
|
446
|
+
items.append(_record_to_item(chunk[0]))
|
|
447
|
+
else:
|
|
448
|
+
items.append(json.dumps(chunk, ensure_ascii=False))
|
|
449
|
+
|
|
450
|
+
return items
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
# ---------------------------------------------------------------------------
|
|
454
|
+
# Input resolution
|
|
455
|
+
# ---------------------------------------------------------------------------
|
|
456
|
+
|
|
457
|
+
def _resolve_input(args):
|
|
458
|
+
"""Resolve input mode and return task_ids list. Populates _item_map."""
|
|
459
|
+
global _item_map
|
|
460
|
+
|
|
461
|
+
modes = sum([
|
|
462
|
+
bool(args.kanban),
|
|
463
|
+
bool(args.items),
|
|
464
|
+
bool(args.items_file),
|
|
465
|
+
])
|
|
466
|
+
if modes == 0:
|
|
467
|
+
print("ERROR: One of --kanban, --items, or --items-file is required "
|
|
468
|
+
"(unless using --stop or --stop-all)", file=sys.stderr)
|
|
469
|
+
sys.exit(2)
|
|
470
|
+
if modes > 1:
|
|
471
|
+
print("ERROR: Only one of --kanban, --items, or --items-file can be used",
|
|
472
|
+
file=sys.stderr)
|
|
473
|
+
sys.exit(2)
|
|
474
|
+
|
|
475
|
+
if args.kanban:
|
|
476
|
+
task_ids = args.kanban
|
|
477
|
+
_item_map = {tid: tid for tid in task_ids}
|
|
478
|
+
return task_ids
|
|
479
|
+
|
|
480
|
+
# Generic items — auto-generate IDs
|
|
481
|
+
if args.items:
|
|
482
|
+
raw_items = []
|
|
483
|
+
for val in args.items:
|
|
484
|
+
for part in val.replace(",", " ").split():
|
|
485
|
+
part = part.strip()
|
|
486
|
+
if part:
|
|
487
|
+
raw_items.append(part)
|
|
488
|
+
else:
|
|
489
|
+
raw_items = _parse_items_file(
|
|
490
|
+
args.items_file,
|
|
491
|
+
format_override=getattr(args, "format", None),
|
|
492
|
+
has_header=not getattr(args, "no_header", False),
|
|
493
|
+
start=getattr(args, "start", None),
|
|
494
|
+
end=getattr(args, "end", None),
|
|
495
|
+
chunk_size=getattr(args, "chunk_size", 1) or 1,
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
if not raw_items:
|
|
499
|
+
print("ERROR: No items found in input", file=sys.stderr)
|
|
500
|
+
sys.exit(2)
|
|
501
|
+
|
|
502
|
+
width = max(len(str(len(raw_items))), 3)
|
|
503
|
+
task_ids = []
|
|
504
|
+
for i, item_value in enumerate(raw_items, 1):
|
|
505
|
+
task_id = f"item-{i:0{width}d}"
|
|
506
|
+
task_ids.append(task_id)
|
|
507
|
+
_item_map[task_id] = item_value
|
|
508
|
+
|
|
509
|
+
return task_ids
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
def _color_for(task_id):
|
|
513
|
+
"""Get the ANSI colored dot prefix for a task_id."""
|
|
514
|
+
code = _task_color_map.get(task_id, 7)
|
|
515
|
+
return f"\033[38;5;{code}m\u25cf{_RESET}"
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
def _colored_tag(task_id):
|
|
519
|
+
"""Return a colored dot + [task_id] string for log lines."""
|
|
520
|
+
return f"{_color_for(task_id)} [{task_id}]"
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
# ---------------------------------------------------------------------------
|
|
524
|
+
# Structured output capture
|
|
525
|
+
# ---------------------------------------------------------------------------
|
|
526
|
+
|
|
527
|
+
def _resolve_output_dir(args):
|
|
528
|
+
"""Resolve the output directory for structured task output."""
|
|
529
|
+
if args.output_dir:
|
|
530
|
+
output_dir = Path(args.output_dir)
|
|
531
|
+
elif os.environ.get("JUNO_OUTPUT_DIR"):
|
|
532
|
+
output_dir = Path(os.environ["JUNO_OUTPUT_DIR"])
|
|
533
|
+
else:
|
|
534
|
+
today = datetime.now().strftime("%Y-%m-%d")
|
|
535
|
+
output_dir = Path(f"/tmp/juno-code-sessions/{today}")
|
|
536
|
+
if _run_id:
|
|
537
|
+
output_dir = output_dir / _run_id
|
|
538
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
539
|
+
return output_dir
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
def _task_output_path(output_dir, task_id):
|
|
543
|
+
"""Path for the per-task structured output JSON."""
|
|
544
|
+
return output_dir / f"{task_id}.json"
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
def _parse_result_from_log(task_log_path):
|
|
548
|
+
"""Parse the juno-code result event from a task log file.
|
|
549
|
+
|
|
550
|
+
juno-code prints a JSON line with {"type":"result",...} to stdout.
|
|
551
|
+
We walk backwards to find it near the end.
|
|
552
|
+
"""
|
|
553
|
+
try:
|
|
554
|
+
lines = Path(task_log_path).read_text(encoding="utf-8").splitlines()
|
|
555
|
+
except (OSError, UnicodeDecodeError):
|
|
556
|
+
return None
|
|
557
|
+
|
|
558
|
+
for line in reversed(lines):
|
|
559
|
+
idx = line.find('{"type":')
|
|
560
|
+
if idx == -1:
|
|
561
|
+
continue
|
|
562
|
+
candidate = line[idx:]
|
|
563
|
+
try:
|
|
564
|
+
parsed = json.loads(candidate)
|
|
565
|
+
if isinstance(parsed, dict) and parsed.get("type") == "result":
|
|
566
|
+
return parsed
|
|
567
|
+
except json.JSONDecodeError:
|
|
568
|
+
continue
|
|
569
|
+
return None
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
def _extract_response(backend_result, file_format):
|
|
573
|
+
"""Extract the clean response from a backend result.
|
|
574
|
+
|
|
575
|
+
Tries fenced code blocks first, falls back to raw result.
|
|
576
|
+
"""
|
|
577
|
+
if not isinstance(backend_result, dict):
|
|
578
|
+
return None, None
|
|
579
|
+
|
|
580
|
+
raw_result = backend_result.get("result")
|
|
581
|
+
if not raw_result:
|
|
582
|
+
return None, None
|
|
583
|
+
|
|
584
|
+
if file_format:
|
|
585
|
+
content, _ = _extract_from_fenced_block(raw_result, file_format)
|
|
586
|
+
if content is not None:
|
|
587
|
+
return content, None
|
|
588
|
+
|
|
589
|
+
any_block = re.compile(r"```\w*\s*\n(.*?)```", re.DOTALL)
|
|
590
|
+
matches = list(any_block.finditer(raw_result))
|
|
591
|
+
if matches:
|
|
592
|
+
return matches[-1].group(1).rstrip(), None
|
|
593
|
+
|
|
594
|
+
error_msg = (f"No ```{file_format} code block found (used raw result)"
|
|
595
|
+
if file_format else None)
|
|
596
|
+
return raw_result, error_msg
|
|
597
|
+
|
|
598
|
+
|
|
599
|
+
def _write_task_output(output_dir, task_id, exit_code, wall_time, start_time,
|
|
600
|
+
end_time, worker_id=-1,
|
|
601
|
+
extracted_response=None, extraction_error=None,
|
|
602
|
+
backend_result=None, file_format=""):
|
|
603
|
+
"""Write per-task structured JSON output."""
|
|
604
|
+
output_path = _task_output_path(output_dir, task_id)
|
|
605
|
+
|
|
606
|
+
if extracted_response is None and backend_result is not None:
|
|
607
|
+
extracted_response, extraction_error = _extract_response(
|
|
608
|
+
backend_result, file_format,
|
|
609
|
+
)
|
|
610
|
+
|
|
611
|
+
session_id = None
|
|
612
|
+
if isinstance(backend_result, dict):
|
|
613
|
+
session_id = backend_result.get("session_id")
|
|
614
|
+
|
|
615
|
+
task_output = {
|
|
616
|
+
"task_id": task_id,
|
|
617
|
+
"session_id": session_id,
|
|
618
|
+
"exit_code": exit_code,
|
|
619
|
+
"wall_time_seconds": round(wall_time, 2),
|
|
620
|
+
"start_time": start_time,
|
|
621
|
+
"end_time": end_time,
|
|
622
|
+
"worker_id": worker_id,
|
|
623
|
+
"backend_result": backend_result,
|
|
624
|
+
"extracted_response": extracted_response,
|
|
625
|
+
"extraction_error": extraction_error,
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
output_path.write_text(
|
|
629
|
+
json.dumps(task_output, indent=2, ensure_ascii=False),
|
|
630
|
+
encoding="utf-8",
|
|
631
|
+
)
|
|
632
|
+
return task_output
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
def _extract_from_fenced_block(log_text, file_format):
|
|
636
|
+
"""Extract content from the last fenced code block matching the format."""
|
|
637
|
+
pattern = re.compile(
|
|
638
|
+
r"```" + re.escape(file_format) + r"\s*\n(.*?)```",
|
|
639
|
+
re.DOTALL,
|
|
640
|
+
)
|
|
641
|
+
matches = list(pattern.finditer(log_text))
|
|
642
|
+
if not matches:
|
|
643
|
+
return None, f"No ```{file_format} code block found in task output"
|
|
644
|
+
content = matches[-1].group(1)
|
|
645
|
+
content = content.rstrip()
|
|
646
|
+
return content, None
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
def _extract_strict_output(task_id, task_log_path, output_dir, file_format, exit_code):
|
|
650
|
+
"""Run strict extraction on a completed task's log."""
|
|
651
|
+
try:
|
|
652
|
+
log_text = Path(task_log_path).read_text(encoding="utf-8")
|
|
653
|
+
except (OSError, UnicodeDecodeError) as e:
|
|
654
|
+
_update_task_json(output_dir, task_id, None, f"Could not read task log: {e}")
|
|
655
|
+
return 1
|
|
656
|
+
|
|
657
|
+
content, error = _extract_from_fenced_block(log_text, file_format)
|
|
658
|
+
|
|
659
|
+
if error:
|
|
660
|
+
_update_task_json(output_dir, task_id, None, error)
|
|
661
|
+
return 1
|
|
662
|
+
|
|
663
|
+
extracted_path = output_dir / f"{task_id}.{file_format}"
|
|
664
|
+
extracted_path.write_text(content, encoding="utf-8")
|
|
665
|
+
_update_task_json(output_dir, task_id, content, None)
|
|
666
|
+
|
|
667
|
+
return exit_code
|
|
668
|
+
|
|
669
|
+
|
|
670
|
+
def _update_task_json(output_dir, task_id, extracted_response, extraction_error):
|
|
671
|
+
"""Update an existing per-task JSON file with extraction results."""
|
|
672
|
+
output_path = _task_output_path(output_dir, task_id)
|
|
673
|
+
if not output_path.exists():
|
|
674
|
+
return
|
|
675
|
+
try:
|
|
676
|
+
data = json.loads(output_path.read_text(encoding="utf-8"))
|
|
677
|
+
except (json.JSONDecodeError, OSError):
|
|
678
|
+
return
|
|
679
|
+
data["extracted_response"] = extracted_response
|
|
680
|
+
data["extraction_error"] = extraction_error
|
|
681
|
+
if extraction_error:
|
|
682
|
+
data["exit_code"] = 1
|
|
683
|
+
output_path.write_text(
|
|
684
|
+
json.dumps(data, indent=2, ensure_ascii=False),
|
|
685
|
+
encoding="utf-8",
|
|
686
|
+
)
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
def _write_aggregation(output_dir, task_outputs, wall_time, parallelism,
|
|
690
|
+
mode="headless", session_name=None, file_format=""):
|
|
691
|
+
"""Build and write the aggregation file."""
|
|
692
|
+
succeeded = sum(1 for t in task_outputs.values() if t["exit_code"] == 0)
|
|
693
|
+
failed = sum(1 for t in task_outputs.values() if t["exit_code"] != 0)
|
|
694
|
+
|
|
695
|
+
merged_parts = []
|
|
696
|
+
failed_ids = []
|
|
697
|
+
failed_sessions = {}
|
|
698
|
+
for tid in sorted(task_outputs.keys()):
|
|
699
|
+
t = task_outputs[tid]
|
|
700
|
+
er = t.get("extracted_response")
|
|
701
|
+
br = t.get("backend_result") or {}
|
|
702
|
+
backend_ok = isinstance(br, dict) and br.get("exit_code", -1) == 0
|
|
703
|
+
if er and (t.get("exit_code") == 0 or backend_ok):
|
|
704
|
+
if file_format == "csv" and merged_parts:
|
|
705
|
+
lines = er.split("\n")
|
|
706
|
+
if len(lines) > 1:
|
|
707
|
+
merged_parts.append("\n".join(lines[1:]))
|
|
708
|
+
else:
|
|
709
|
+
merged_parts.append(er)
|
|
710
|
+
else:
|
|
711
|
+
failed_ids.append(tid)
|
|
712
|
+
sid = t.get("session_id")
|
|
713
|
+
if sid:
|
|
714
|
+
failed_sessions[tid] = sid
|
|
715
|
+
|
|
716
|
+
separator = "\n" if file_format in ("csv", "tsv") else "\n\n"
|
|
717
|
+
merged_extracted = separator.join(merged_parts) if merged_parts else None
|
|
718
|
+
|
|
719
|
+
aggregation = {
|
|
720
|
+
"meta": {
|
|
721
|
+
"created_at": datetime.now().isoformat(),
|
|
722
|
+
"run_id": _run_id,
|
|
723
|
+
"total_tasks": len(task_outputs),
|
|
724
|
+
"succeeded": succeeded,
|
|
725
|
+
"failed": failed,
|
|
726
|
+
"wall_time_seconds": round(wall_time, 2),
|
|
727
|
+
"parallelism": parallelism,
|
|
728
|
+
"mode": mode,
|
|
729
|
+
"session_name": session_name,
|
|
730
|
+
},
|
|
731
|
+
"merged_extracted": merged_extracted,
|
|
732
|
+
"tasks": task_outputs,
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
736
|
+
agg_path = output_dir / f"aggregation_{timestamp}.json"
|
|
737
|
+
agg_path.write_text(
|
|
738
|
+
json.dumps(aggregation, indent=2, ensure_ascii=False),
|
|
739
|
+
encoding="utf-8",
|
|
740
|
+
)
|
|
741
|
+
|
|
742
|
+
merged_path = None
|
|
743
|
+
if merged_extracted and file_format:
|
|
744
|
+
merged_path = output_dir / f"merged_{timestamp}.{file_format}"
|
|
745
|
+
merged_path.write_text(merged_extracted, encoding="utf-8")
|
|
746
|
+
|
|
747
|
+
return {
|
|
748
|
+
"agg_path": str(agg_path),
|
|
749
|
+
"merged_path": str(merged_path) if merged_path else None,
|
|
750
|
+
"failed_ids": failed_ids,
|
|
751
|
+
"failed_sessions": failed_sessions,
|
|
752
|
+
"error_count": len(failed_ids),
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
|
|
756
|
+
def _format_output_summary(agg_result):
|
|
757
|
+
"""Format output paths and errors into a multi-line summary string."""
|
|
758
|
+
lines = []
|
|
759
|
+
lines.append("=" * 60)
|
|
760
|
+
lines.append("OUTPUT FILES")
|
|
761
|
+
lines.append("=" * 60)
|
|
762
|
+
lines.append(f" Run ID: {_run_id}")
|
|
763
|
+
lines.append(f" Aggregation: {agg_result['agg_path']}")
|
|
764
|
+
if agg_result["merged_path"]:
|
|
765
|
+
lines.append(f" Merged file: {agg_result['merged_path']}")
|
|
766
|
+
if agg_result["error_count"] > 0:
|
|
767
|
+
lines.append(f" Errors: {agg_result['error_count']} chunks failed extraction")
|
|
768
|
+
lines.append(f" Failed IDs: {', '.join(agg_result['failed_ids'])}")
|
|
769
|
+
failed_sessions = agg_result.get("failed_sessions", {})
|
|
770
|
+
if failed_sessions:
|
|
771
|
+
lines.append(" Sessions to investigate:")
|
|
772
|
+
for tid, sid in failed_sessions.items():
|
|
773
|
+
lines.append(f" {tid}: {sid}")
|
|
774
|
+
no_session = [tid for tid in agg_result["failed_ids"] if tid not in failed_sessions]
|
|
775
|
+
if no_session:
|
|
776
|
+
lines.append(f" No session ID: {', '.join(no_session)}")
|
|
777
|
+
else:
|
|
778
|
+
lines.append(" Errors: 0")
|
|
779
|
+
lines.append("=" * 60)
|
|
780
|
+
return "\n".join(lines)
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
def _print_output_summary(agg_result):
|
|
784
|
+
"""Print output summary to combined log."""
|
|
785
|
+
for line in _format_output_summary(agg_result).splitlines():
|
|
786
|
+
log_combined(line)
|
|
787
|
+
|
|
788
|
+
|
|
789
|
+
# ---------------------------------------------------------------------------
|
|
790
|
+
# Auto-naming
|
|
791
|
+
# ---------------------------------------------------------------------------
|
|
792
|
+
|
|
793
|
+
def _next_batch_name():
|
|
794
|
+
"""Find the next available batch-N name."""
|
|
795
|
+
existing = set()
|
|
796
|
+
try:
|
|
797
|
+
result = subprocess.run(
|
|
798
|
+
["tmux", "list-sessions", "-F", "#{session_name}"],
|
|
799
|
+
capture_output=True, text=True,
|
|
800
|
+
)
|
|
801
|
+
for line in result.stdout.strip().splitlines():
|
|
802
|
+
line = line.strip()
|
|
803
|
+
if line.startswith("pc-batch-"):
|
|
804
|
+
try:
|
|
805
|
+
n = int(line[len("pc-batch-"):])
|
|
806
|
+
existing.add(n)
|
|
807
|
+
except ValueError:
|
|
808
|
+
pass
|
|
809
|
+
except Exception:
|
|
810
|
+
pass
|
|
811
|
+
try:
|
|
812
|
+
for f in _log_base.glob(".orchestrator_pid_batch-*"):
|
|
813
|
+
suffix = f.name[len(".orchestrator_pid_batch-"):]
|
|
814
|
+
try:
|
|
815
|
+
existing.add(int(suffix))
|
|
816
|
+
except ValueError:
|
|
817
|
+
pass
|
|
818
|
+
except Exception:
|
|
819
|
+
pass
|
|
820
|
+
counter = 1
|
|
821
|
+
while counter in existing:
|
|
822
|
+
counter += 1
|
|
823
|
+
return f"batch-{counter}"
|
|
824
|
+
|
|
825
|
+
|
|
826
|
+
# ---------------------------------------------------------------------------
|
|
827
|
+
# Stop commands
|
|
828
|
+
# ---------------------------------------------------------------------------
|
|
829
|
+
|
|
830
|
+
def _list_running_sessions():
|
|
831
|
+
"""Return list of (name, pid) tuples for running sessions."""
|
|
832
|
+
sessions = []
|
|
833
|
+
try:
|
|
834
|
+
for f in _log_base.glob(".orchestrator_pid_*"):
|
|
835
|
+
name = f.name[len(".orchestrator_pid_"):]
|
|
836
|
+
try:
|
|
837
|
+
pid = int(f.read_text().strip())
|
|
838
|
+
os.kill(pid, 0)
|
|
839
|
+
sessions.append((name, pid))
|
|
840
|
+
except (ValueError, ProcessLookupError, PermissionError):
|
|
841
|
+
pass
|
|
842
|
+
except Exception:
|
|
843
|
+
pass
|
|
844
|
+
return sessions
|
|
845
|
+
|
|
846
|
+
|
|
847
|
+
def _stop_session(name):
|
|
848
|
+
"""Stop a single session by name."""
|
|
849
|
+
stopped = False
|
|
850
|
+
pid_path = _pid_file(name)
|
|
851
|
+
tmux_session = _session_name_to_tmux(name)
|
|
852
|
+
|
|
853
|
+
if pid_path.exists():
|
|
854
|
+
try:
|
|
855
|
+
pid = int(pid_path.read_text().strip())
|
|
856
|
+
os.kill(pid, signal.SIGTERM)
|
|
857
|
+
print(f" Sent SIGTERM to orchestrator (PID {pid})")
|
|
858
|
+
stopped = True
|
|
859
|
+
except (ValueError, ProcessLookupError, PermissionError):
|
|
860
|
+
pass
|
|
861
|
+
|
|
862
|
+
result = subprocess.run(
|
|
863
|
+
["tmux", "kill-session", "-t", tmux_session],
|
|
864
|
+
capture_output=True, text=True,
|
|
865
|
+
)
|
|
866
|
+
if result.returncode == 0:
|
|
867
|
+
print(f" Killed tmux session '{tmux_session}'")
|
|
868
|
+
stopped = True
|
|
869
|
+
|
|
870
|
+
for f in [pid_path, _dashboard_file(name), _pause_file(name)]:
|
|
871
|
+
try:
|
|
872
|
+
f.unlink(missing_ok=True)
|
|
873
|
+
except OSError:
|
|
874
|
+
pass
|
|
875
|
+
|
|
876
|
+
tmp = _tmp_dir(name)
|
|
877
|
+
if tmp.exists():
|
|
878
|
+
shutil.rmtree(str(tmp), ignore_errors=True)
|
|
879
|
+
|
|
880
|
+
return stopped
|
|
881
|
+
|
|
882
|
+
|
|
883
|
+
def run_stop(args):
|
|
884
|
+
"""Handle --stop command."""
|
|
885
|
+
if args.name:
|
|
886
|
+
name = args.name
|
|
887
|
+
print(f"Stopping session '{name}'...")
|
|
888
|
+
if _stop_session(name):
|
|
889
|
+
print(f"Session '{name}' stopped.")
|
|
890
|
+
else:
|
|
891
|
+
print(f"No running session found with name '{name}'.")
|
|
892
|
+
return
|
|
893
|
+
|
|
894
|
+
sessions = _list_running_sessions()
|
|
895
|
+
if len(sessions) == 0:
|
|
896
|
+
print("No running sessions found.")
|
|
897
|
+
return
|
|
898
|
+
if len(sessions) == 1:
|
|
899
|
+
name = sessions[0][0]
|
|
900
|
+
print(f"Stopping session '{name}'...")
|
|
901
|
+
_stop_session(name)
|
|
902
|
+
print(f"Session '{name}' stopped.")
|
|
903
|
+
return
|
|
904
|
+
|
|
905
|
+
print(f"Multiple sessions running ({len(sessions)}). Specify --name:")
|
|
906
|
+
for name, pid in sessions:
|
|
907
|
+
print(f" --name {name} (PID {pid}, tmux: pc-{name})")
|
|
908
|
+
sys.exit(1)
|
|
909
|
+
|
|
910
|
+
|
|
911
|
+
def run_stop_all():
|
|
912
|
+
"""Handle --stop-all command."""
|
|
913
|
+
sessions = _list_running_sessions()
|
|
914
|
+
session_names = set(s[0] for s in sessions)
|
|
915
|
+
try:
|
|
916
|
+
result = subprocess.run(
|
|
917
|
+
["tmux", "list-sessions", "-F", "#{session_name}"],
|
|
918
|
+
capture_output=True, text=True,
|
|
919
|
+
)
|
|
920
|
+
for line in result.stdout.strip().splitlines():
|
|
921
|
+
line = line.strip()
|
|
922
|
+
if line.startswith("pc-"):
|
|
923
|
+
name = line[3:]
|
|
924
|
+
session_names.add(name)
|
|
925
|
+
except Exception:
|
|
926
|
+
pass
|
|
927
|
+
|
|
928
|
+
if not session_names:
|
|
929
|
+
print("No running sessions found.")
|
|
930
|
+
return
|
|
931
|
+
|
|
932
|
+
print(f"Stopping {len(session_names)} session(s)...")
|
|
933
|
+
for name in sorted(session_names):
|
|
934
|
+
print(f"\n [{name}]")
|
|
935
|
+
_stop_session(name)
|
|
936
|
+
print(f"\nAll sessions stopped.")
|
|
937
|
+
|
|
938
|
+
|
|
939
|
+
# ---------------------------------------------------------------------------
|
|
940
|
+
# CLI parsing
|
|
941
|
+
# ---------------------------------------------------------------------------
|
|
942
|
+
|
|
943
|
+
def parse_args():
|
|
944
|
+
parser = argparse.ArgumentParser(
|
|
945
|
+
description="Run juno-code tasks in parallel with queue management and output extraction")
|
|
946
|
+
|
|
947
|
+
# --- Input modes ---
|
|
948
|
+
input_group = parser.add_argument_group("input modes (exactly one required unless --stop/--stop-all)")
|
|
949
|
+
input_group.add_argument(
|
|
950
|
+
"--kanban", nargs="+",
|
|
951
|
+
help="Kanban task IDs (comma/space/quoted). {{task_id}} = {{item}} = the ID.",
|
|
952
|
+
)
|
|
953
|
+
input_group.add_argument(
|
|
954
|
+
"--kanban-filter", type=str, default=None,
|
|
955
|
+
help="Filter string passed to kanban.sh list. Internally runs "
|
|
956
|
+
"kanban.sh list {filters} -f json --raw and extracts IDs.",
|
|
957
|
+
)
|
|
958
|
+
input_group.add_argument(
|
|
959
|
+
"--items", nargs="+",
|
|
960
|
+
help="Generic item list (comma/space/quoted). Auto-generates item-NNN IDs.",
|
|
961
|
+
)
|
|
962
|
+
input_group.add_argument(
|
|
963
|
+
"--items-file", type=str, default=None,
|
|
964
|
+
help="Path to file (JSONL, CSV, TSV, XLSX). Format auto-detected by extension.",
|
|
965
|
+
)
|
|
966
|
+
|
|
967
|
+
# --- File options ---
|
|
968
|
+
file_group = parser.add_argument_group("file options (for --items-file)")
|
|
969
|
+
file_group.add_argument(
|
|
970
|
+
"--format", type=str, default=None, choices=["jsonl", "csv", "tsv", "xlsx"],
|
|
971
|
+
help="Force file format (default: auto-detect by extension).",
|
|
972
|
+
)
|
|
973
|
+
file_group.add_argument(
|
|
974
|
+
"--no-header", action="store_true", default=False,
|
|
975
|
+
help="CSV/TSV/XLSX: treat first row as data, not column headers.",
|
|
976
|
+
)
|
|
977
|
+
file_group.add_argument(
|
|
978
|
+
"--chunk-size", type=int, default=1,
|
|
979
|
+
help="Records per item (default: 1). >1 groups records into a JSON array.",
|
|
980
|
+
)
|
|
981
|
+
file_group.add_argument(
|
|
982
|
+
"--start", type=int, default=None,
|
|
983
|
+
help="First record to process, 1-indexed after header (default: 1).",
|
|
984
|
+
)
|
|
985
|
+
file_group.add_argument(
|
|
986
|
+
"--end", type=int, default=None,
|
|
987
|
+
help="Last record to process, inclusive (default: end of file).",
|
|
988
|
+
)
|
|
989
|
+
|
|
990
|
+
# --- Output extraction ---
|
|
991
|
+
extract_group = parser.add_argument_group("output extraction")
|
|
992
|
+
extract_group.add_argument(
|
|
993
|
+
"--file-format", type=str, default=None,
|
|
994
|
+
help="Expected output format (e.g., json, csv, md). Sets {{file_format}} placeholder.",
|
|
995
|
+
)
|
|
996
|
+
extract_group.add_argument(
|
|
997
|
+
"--strict", action="store_true", default=False,
|
|
998
|
+
help="Extract response from fenced code block. Requires --file-format.",
|
|
999
|
+
)
|
|
1000
|
+
|
|
1001
|
+
# --- Execution options ---
|
|
1002
|
+
parser.add_argument(
|
|
1003
|
+
"--parallel", type=int, default=3,
|
|
1004
|
+
help="Max concurrent subprocesses (default: 3)",
|
|
1005
|
+
)
|
|
1006
|
+
parser.add_argument(
|
|
1007
|
+
"-s", "--service", type=str, default=None, choices=["claude", "codex", "pi"],
|
|
1008
|
+
help="Backend service (default: claude). Env: JUNO_SERVICE.",
|
|
1009
|
+
)
|
|
1010
|
+
parser.add_argument(
|
|
1011
|
+
"-m", "--model", type=str, default=None,
|
|
1012
|
+
help="Model override. Env: JUNO_MODEL.",
|
|
1013
|
+
)
|
|
1014
|
+
parser.add_argument(
|
|
1015
|
+
"--env", nargs="+", default=None,
|
|
1016
|
+
help="Environment overrides. KEY=VALUE or .env file path.",
|
|
1017
|
+
)
|
|
1018
|
+
parser.add_argument(
|
|
1019
|
+
"--prompt-file", type=str, default=None,
|
|
1020
|
+
help="Prompt template file. Re-read per task. Placeholders: {{task_id}}, {{item}}, {{file_format}}.",
|
|
1021
|
+
)
|
|
1022
|
+
parser.add_argument(
|
|
1023
|
+
"--tmux", nargs="?", const="windows", default=None, choices=["windows", "panes"],
|
|
1024
|
+
help="Run in tmux mode. 'windows' (default) or 'panes'.",
|
|
1025
|
+
)
|
|
1026
|
+
parser.add_argument(
|
|
1027
|
+
"--name", type=str, default=None,
|
|
1028
|
+
help="Session name (default: auto-generated batch-N). Tmux session = pc-{name}.",
|
|
1029
|
+
)
|
|
1030
|
+
parser.add_argument(
|
|
1031
|
+
"--output-dir", type=str, default=None,
|
|
1032
|
+
help="Structured output directory. Default: /tmp/juno-code-sessions/{date}/{run_id}.",
|
|
1033
|
+
)
|
|
1034
|
+
parser.add_argument(
|
|
1035
|
+
"--stop", action="store_true", default=False,
|
|
1036
|
+
help="Stop a session. Uses --name if provided, otherwise auto-detects.",
|
|
1037
|
+
)
|
|
1038
|
+
parser.add_argument(
|
|
1039
|
+
"--stop-all", action="store_true", default=False,
|
|
1040
|
+
help="Stop ALL running sessions.",
|
|
1041
|
+
)
|
|
1042
|
+
args = parser.parse_args()
|
|
1043
|
+
|
|
1044
|
+
# Handle stop commands first
|
|
1045
|
+
if args.stop_all:
|
|
1046
|
+
return args
|
|
1047
|
+
if args.stop:
|
|
1048
|
+
return args
|
|
1049
|
+
|
|
1050
|
+
# Resolve --kanban-filter -> --kanban
|
|
1051
|
+
if args.kanban_filter:
|
|
1052
|
+
if args.kanban:
|
|
1053
|
+
parser.error("Cannot use --kanban-filter together with --kanban")
|
|
1054
|
+
kanban_script = SCRIPT_DIR / "kanban.sh"
|
|
1055
|
+
if not kanban_script.exists():
|
|
1056
|
+
parser.error(f"Kanban script not found: {kanban_script}")
|
|
1057
|
+
filter_args = shlex.split(args.kanban_filter)
|
|
1058
|
+
cmd = [str(kanban_script), "list"] + filter_args + ["-f", "json", "--raw"]
|
|
1059
|
+
print(f"Running kanban filter: {' '.join(cmd)}", file=sys.stderr)
|
|
1060
|
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
|
|
1061
|
+
if result.returncode != 0:
|
|
1062
|
+
parser.error(f"kanban.sh list failed (exit {result.returncode}):\n{result.stderr.strip()}")
|
|
1063
|
+
first_line = result.stdout.strip().split("\n")[0] if result.stdout.strip() else "[]"
|
|
1064
|
+
try:
|
|
1065
|
+
tasks = json.loads(first_line)
|
|
1066
|
+
except json.JSONDecodeError as e:
|
|
1067
|
+
parser.error(f"Failed to parse kanban output as JSON: {e}\nOutput: {first_line[:200]}")
|
|
1068
|
+
if not isinstance(tasks, list):
|
|
1069
|
+
parser.error(f"Expected JSON array from kanban output, got {type(tasks).__name__}")
|
|
1070
|
+
ids = [t["id"] for t in tasks if isinstance(t, dict) and "id" in t]
|
|
1071
|
+
if not ids:
|
|
1072
|
+
parser.error("kanban-filter returned 0 tasks. Check your filters.")
|
|
1073
|
+
print(f"kanban-filter resolved {len(ids)} task(s): {', '.join(ids[:10])}"
|
|
1074
|
+
f"{'...' if len(ids) > 10 else ''}", file=sys.stderr)
|
|
1075
|
+
args.kanban = ids
|
|
1076
|
+
|
|
1077
|
+
# Auto-infer --file-format from --items-file extension
|
|
1078
|
+
if not args.file_format and args.items_file:
|
|
1079
|
+
ext = Path(args.items_file).suffix.lstrip(".").lower()
|
|
1080
|
+
if ext in ("csv", "tsv", "json", "jsonl", "ndjson", "md", "txt", "xlsx"):
|
|
1081
|
+
infer_map = {"xlsx": "csv", "ndjson": "jsonl"}
|
|
1082
|
+
args.file_format = infer_map.get(ext, ext)
|
|
1083
|
+
print(f"Auto-inferred --file-format={args.file_format} from input file extension",
|
|
1084
|
+
file=sys.stderr)
|
|
1085
|
+
|
|
1086
|
+
if args.strict and not args.file_format:
|
|
1087
|
+
parser.error("--strict requires --file-format to be set")
|
|
1088
|
+
|
|
1089
|
+
global _env_overrides
|
|
1090
|
+
_env_overrides = _resolve_env_overrides(args.env)
|
|
1091
|
+
|
|
1092
|
+
# Flatten --kanban
|
|
1093
|
+
if args.kanban:
|
|
1094
|
+
flat = []
|
|
1095
|
+
for val in args.kanban:
|
|
1096
|
+
for part in val.replace(",", " ").split():
|
|
1097
|
+
part = part.strip()
|
|
1098
|
+
if part:
|
|
1099
|
+
flat.append(part)
|
|
1100
|
+
args.kanban = flat
|
|
1101
|
+
|
|
1102
|
+
task_ids = _resolve_input(args)
|
|
1103
|
+
args.kanban = task_ids
|
|
1104
|
+
return args
|
|
1105
|
+
|
|
1106
|
+
|
|
1107
|
+
# ---------------------------------------------------------------------------
|
|
1108
|
+
# Shared helpers
|
|
1109
|
+
# ---------------------------------------------------------------------------
|
|
1110
|
+
|
|
1111
|
+
def log_combined(msg, task_id=None):
|
|
1112
|
+
"""Write a timestamped line to both stdout and the combined log (thread-safe)."""
|
|
1113
|
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
1114
|
+
plain_line = f"[{timestamp}] {msg}"
|
|
1115
|
+
if task_id:
|
|
1116
|
+
colored_line = f"[{timestamp}] {_colored_tag(task_id)} {msg}"
|
|
1117
|
+
else:
|
|
1118
|
+
colored_line = plain_line
|
|
1119
|
+
with _log_lock:
|
|
1120
|
+
print(colored_line, flush=True)
|
|
1121
|
+
with open(COMBINED_LOG, "a") as f:
|
|
1122
|
+
f.write(plain_line + "\n")
|
|
1123
|
+
|
|
1124
|
+
|
|
1125
|
+
def stream_to_log(pipe, task_id, task_log_path):
|
|
1126
|
+
"""Read lines from a subprocess pipe and write to both per-task and combined logs."""
|
|
1127
|
+
tag_colored = _colored_tag(task_id)
|
|
1128
|
+
with open(task_log_path, "a") as task_log:
|
|
1129
|
+
for raw_line in iter(pipe.readline, b""):
|
|
1130
|
+
decoded = raw_line.decode("utf-8", errors="replace").rstrip("\n")
|
|
1131
|
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
|
1132
|
+
plain_entry = f"[{timestamp}] [{task_id}] {decoded}"
|
|
1133
|
+
colored_entry = f"[{timestamp}] {tag_colored} {decoded}"
|
|
1134
|
+
|
|
1135
|
+
task_log.write(plain_entry + "\n")
|
|
1136
|
+
task_log.flush()
|
|
1137
|
+
|
|
1138
|
+
with _log_lock:
|
|
1139
|
+
with open(COMBINED_LOG, "a") as f:
|
|
1140
|
+
f.write(colored_entry + "\n")
|
|
1141
|
+
|
|
1142
|
+
|
|
1143
|
+
def format_duration(seconds):
|
|
1144
|
+
"""Format seconds into human-readable duration."""
|
|
1145
|
+
if seconds < 60:
|
|
1146
|
+
return f"{seconds:.1f}s"
|
|
1147
|
+
minutes = int(seconds // 60)
|
|
1148
|
+
secs = seconds % 60
|
|
1149
|
+
if minutes < 60:
|
|
1150
|
+
return f"{minutes}m {secs:.0f}s"
|
|
1151
|
+
hours = int(minutes // 60)
|
|
1152
|
+
mins = minutes % 60
|
|
1153
|
+
return f"{hours}h {mins}m {secs:.0f}s"
|
|
1154
|
+
|
|
1155
|
+
|
|
1156
|
+
def resolve_prompt_file(args, pwd):
|
|
1157
|
+
"""Resolve the prompt file path, exit if not found."""
|
|
1158
|
+
if not args.prompt_file:
|
|
1159
|
+
return None
|
|
1160
|
+
prompt_path = Path(args.prompt_file)
|
|
1161
|
+
if not prompt_path.is_absolute():
|
|
1162
|
+
prompt_path = Path(pwd) / prompt_path
|
|
1163
|
+
if not prompt_path.exists():
|
|
1164
|
+
print(f"ERROR: Prompt file not found: {prompt_path}", file=sys.stderr)
|
|
1165
|
+
sys.exit(2)
|
|
1166
|
+
return str(prompt_path)
|
|
1167
|
+
|
|
1168
|
+
|
|
1169
|
+
def print_summary(task_ids, results, task_times, wall_elapsed, total_tasks):
|
|
1170
|
+
"""Print final results and stats summary."""
|
|
1171
|
+
log_combined("")
|
|
1172
|
+
log_combined("=" * 60)
|
|
1173
|
+
log_combined("RESULTS")
|
|
1174
|
+
log_combined("=" * 60)
|
|
1175
|
+
|
|
1176
|
+
ok = 0
|
|
1177
|
+
failed = 0
|
|
1178
|
+
for task_id in task_ids:
|
|
1179
|
+
rc = results.get(task_id, -1)
|
|
1180
|
+
elapsed = task_times.get(task_id, 0)
|
|
1181
|
+
status_str = "OK" if rc == 0 else f"FAILED (exit {rc})"
|
|
1182
|
+
log_combined(f" {status_str} [{format_duration(elapsed)}]", task_id)
|
|
1183
|
+
if rc == 0:
|
|
1184
|
+
ok += 1
|
|
1185
|
+
else:
|
|
1186
|
+
failed += 1
|
|
1187
|
+
|
|
1188
|
+
log_combined("-" * 60)
|
|
1189
|
+
log_combined("STATS")
|
|
1190
|
+
log_combined(f" Total tasks: {total_tasks}")
|
|
1191
|
+
log_combined(f" Succeeded: {ok}")
|
|
1192
|
+
log_combined(f" Failed: {failed}")
|
|
1193
|
+
log_combined(f" Wall time: {format_duration(wall_elapsed)}")
|
|
1194
|
+
if task_times:
|
|
1195
|
+
avg = sum(task_times.values()) / len(task_times)
|
|
1196
|
+
fastest = min(task_times.values())
|
|
1197
|
+
slowest = max(task_times.values())
|
|
1198
|
+
log_combined(f" Avg per task: {format_duration(avg)}")
|
|
1199
|
+
log_combined(f" Fastest task: {format_duration(fastest)}")
|
|
1200
|
+
log_combined(f" Slowest task: {format_duration(slowest)}")
|
|
1201
|
+
log_combined(f" Run ID: {_run_id}")
|
|
1202
|
+
log_combined(f" Per-task logs: {LOG_DIR}/task_<TASK_ID>.log")
|
|
1203
|
+
log_combined("=" * 60)
|
|
1204
|
+
|
|
1205
|
+
return failed
|
|
1206
|
+
|
|
1207
|
+
|
|
1208
|
+
# ---------------------------------------------------------------------------
|
|
1209
|
+
# Headless mode
|
|
1210
|
+
# ---------------------------------------------------------------------------
|
|
1211
|
+
|
|
1212
|
+
def run_task(task_id, semaphore, pwd, prompt_file_path=None, output_dir=None,
|
|
1213
|
+
service="claude", model=":sonnet", file_format="", strict=False):
|
|
1214
|
+
"""Run a single juno-code subprocess (called from its own thread)."""
|
|
1215
|
+
global _completed_count
|
|
1216
|
+
|
|
1217
|
+
semaphore.acquire()
|
|
1218
|
+
try:
|
|
1219
|
+
log_combined(f"Starting juno-code (thread {threading.current_thread().name})", task_id)
|
|
1220
|
+
start = time.monotonic()
|
|
1221
|
+
start_iso = datetime.now().isoformat()
|
|
1222
|
+
|
|
1223
|
+
task_log_path = LOG_DIR / f"task_{task_id}.log"
|
|
1224
|
+
|
|
1225
|
+
prompt = ""
|
|
1226
|
+
|
|
1227
|
+
if prompt_file_path:
|
|
1228
|
+
try:
|
|
1229
|
+
extra = Path(prompt_file_path).read_text(encoding="utf-8")
|
|
1230
|
+
extra = extra.replace("{{task_id}}", task_id)
|
|
1231
|
+
extra = extra.replace("{{item}}", _item_map.get(task_id, task_id))
|
|
1232
|
+
extra = extra.replace("{{file_format}}", file_format)
|
|
1233
|
+
prompt += "\n\n---\n\n" + extra
|
|
1234
|
+
log_combined(f"Loaded prompt file ({len(extra)} chars)", task_id)
|
|
1235
|
+
except Exception as e:
|
|
1236
|
+
log_combined(f"WARNING: Could not read prompt file: {e}", task_id)
|
|
1237
|
+
|
|
1238
|
+
tmp_prompt_dir = Path("/tmp/pc-headless")
|
|
1239
|
+
tmp_prompt_dir.mkdir(parents=True, exist_ok=True)
|
|
1240
|
+
tmp_prompt_path = tmp_prompt_dir / f"prompt_{task_id}.txt"
|
|
1241
|
+
tmp_prompt_path.write_text(prompt, encoding="utf-8")
|
|
1242
|
+
|
|
1243
|
+
env = _build_process_env()
|
|
1244
|
+
|
|
1245
|
+
proc = subprocess.Popen(
|
|
1246
|
+
[
|
|
1247
|
+
"juno-code",
|
|
1248
|
+
"-b", "shell",
|
|
1249
|
+
"-s", service,
|
|
1250
|
+
"-m", model,
|
|
1251
|
+
"-i", "1",
|
|
1252
|
+
"-v",
|
|
1253
|
+
"--no-hooks",
|
|
1254
|
+
"-f", str(tmp_prompt_path),
|
|
1255
|
+
],
|
|
1256
|
+
stdout=subprocess.PIPE,
|
|
1257
|
+
stderr=subprocess.STDOUT,
|
|
1258
|
+
cwd=pwd,
|
|
1259
|
+
bufsize=0,
|
|
1260
|
+
env=env,
|
|
1261
|
+
)
|
|
1262
|
+
|
|
1263
|
+
reader = threading.Thread(
|
|
1264
|
+
target=stream_to_log,
|
|
1265
|
+
args=(proc.stdout, task_id, task_log_path),
|
|
1266
|
+
daemon=True,
|
|
1267
|
+
)
|
|
1268
|
+
reader.start()
|
|
1269
|
+
|
|
1270
|
+
proc.wait()
|
|
1271
|
+
reader.join()
|
|
1272
|
+
|
|
1273
|
+
try:
|
|
1274
|
+
tmp_prompt_path.unlink(missing_ok=True)
|
|
1275
|
+
except OSError:
|
|
1276
|
+
pass
|
|
1277
|
+
|
|
1278
|
+
elapsed = time.monotonic() - start
|
|
1279
|
+
|
|
1280
|
+
with _completed_lock:
|
|
1281
|
+
_completed_count += 1
|
|
1282
|
+
done = _completed_count
|
|
1283
|
+
remaining = _total_tasks - done
|
|
1284
|
+
|
|
1285
|
+
with _task_times_lock:
|
|
1286
|
+
_task_times[task_id] = elapsed
|
|
1287
|
+
|
|
1288
|
+
actual_exit_code = proc.returncode
|
|
1289
|
+
backend_result = _parse_result_from_log(task_log_path) if output_dir else None
|
|
1290
|
+
|
|
1291
|
+
if output_dir:
|
|
1292
|
+
_write_task_output(
|
|
1293
|
+
output_dir, task_id, actual_exit_code, elapsed,
|
|
1294
|
+
start_iso, datetime.now().isoformat(),
|
|
1295
|
+
backend_result=backend_result, file_format=file_format,
|
|
1296
|
+
)
|
|
1297
|
+
|
|
1298
|
+
if strict and file_format and output_dir:
|
|
1299
|
+
actual_exit_code = _extract_strict_output(
|
|
1300
|
+
task_id, task_log_path, output_dir, file_format, actual_exit_code,
|
|
1301
|
+
)
|
|
1302
|
+
|
|
1303
|
+
status = "OK" if actual_exit_code == 0 else f"FAILED (exit {actual_exit_code})"
|
|
1304
|
+
log_combined(
|
|
1305
|
+
f"Finished - {status} ({format_duration(elapsed)}) "
|
|
1306
|
+
f"| Progress: {done}/{_total_tasks} done, {remaining} remaining",
|
|
1307
|
+
task_id,
|
|
1308
|
+
)
|
|
1309
|
+
return task_id, actual_exit_code
|
|
1310
|
+
|
|
1311
|
+
finally:
|
|
1312
|
+
semaphore.release()
|
|
1313
|
+
|
|
1314
|
+
|
|
1315
|
+
def run_headless_mode(args, pwd, prompt_file_path, output_dir, service, model):
|
|
1316
|
+
"""Run tasks in headless mode using ThreadPoolExecutor."""
|
|
1317
|
+
global _total_tasks
|
|
1318
|
+
_total_tasks = len(args.kanban)
|
|
1319
|
+
wall_start = time.monotonic()
|
|
1320
|
+
|
|
1321
|
+
log_combined("=" * 60)
|
|
1322
|
+
log_combined(f"Starting parallel task execution")
|
|
1323
|
+
log_combined(f"Run ID: {_run_id}")
|
|
1324
|
+
log_combined(f"PWD: {pwd}")
|
|
1325
|
+
log_combined(f"Tasks ({_total_tasks}): {', '.join(args.kanban)}")
|
|
1326
|
+
if any(tid.startswith("item-") for tid in args.kanban):
|
|
1327
|
+
preview = [f" {tid} -> {_item_map[tid][:80]}" for tid in args.kanban[:3]]
|
|
1328
|
+
log_combined(f"Items preview:\n" + "\n".join(preview)
|
|
1329
|
+
+ (f"\n ... and {len(args.kanban) - 3} more" if len(args.kanban) > 3 else ""))
|
|
1330
|
+
log_combined(f"Parallelism: {args.parallel}")
|
|
1331
|
+
log_combined(f"Service: {service} | Model: {model}")
|
|
1332
|
+
if prompt_file_path:
|
|
1333
|
+
log_combined(f"Prompt file: {prompt_file_path} (re-read per task)")
|
|
1334
|
+
if output_dir:
|
|
1335
|
+
log_combined(f"Output dir: {output_dir}")
|
|
1336
|
+
legend = " ".join(f"{_color_for(tid)} {tid}" for tid in args.kanban)
|
|
1337
|
+
with _log_lock:
|
|
1338
|
+
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Legend: {legend}", flush=True)
|
|
1339
|
+
with open(COMBINED_LOG, "a") as f:
|
|
1340
|
+
f.write(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Legend: {', '.join(args.kanban)}\n")
|
|
1341
|
+
log_combined("=" * 60)
|
|
1342
|
+
|
|
1343
|
+
semaphore = threading.Semaphore(args.parallel)
|
|
1344
|
+
|
|
1345
|
+
file_format = getattr(args, "file_format", "") or ""
|
|
1346
|
+
strict = getattr(args, "strict", False)
|
|
1347
|
+
|
|
1348
|
+
with ThreadPoolExecutor(max_workers=len(args.kanban)) as pool:
|
|
1349
|
+
futures = {
|
|
1350
|
+
pool.submit(run_task, task_id, semaphore, pwd, prompt_file_path, output_dir,
|
|
1351
|
+
service, model, file_format, strict): task_id
|
|
1352
|
+
for task_id in args.kanban
|
|
1353
|
+
}
|
|
1354
|
+
|
|
1355
|
+
results = {}
|
|
1356
|
+
for future in as_completed(futures):
|
|
1357
|
+
task_id = futures[future]
|
|
1358
|
+
try:
|
|
1359
|
+
tid, returncode = future.result()
|
|
1360
|
+
results[tid] = returncode
|
|
1361
|
+
except Exception as exc:
|
|
1362
|
+
log_combined(f"EXCEPTION: {exc}", task_id)
|
|
1363
|
+
results[task_id] = -1
|
|
1364
|
+
|
|
1365
|
+
wall_elapsed = time.monotonic() - wall_start
|
|
1366
|
+
failed = print_summary(args.kanban, results, _task_times, wall_elapsed, _total_tasks)
|
|
1367
|
+
|
|
1368
|
+
if output_dir:
|
|
1369
|
+
task_outputs = {}
|
|
1370
|
+
for tid in args.kanban:
|
|
1371
|
+
cap = _task_output_path(output_dir, tid)
|
|
1372
|
+
if cap.exists():
|
|
1373
|
+
try:
|
|
1374
|
+
task_outputs[tid] = json.loads(cap.read_text(encoding="utf-8"))
|
|
1375
|
+
except (json.JSONDecodeError, OSError):
|
|
1376
|
+
task_outputs[tid] = {"task_id": tid, "exit_code": results.get(tid, -1),
|
|
1377
|
+
"backend_result": None}
|
|
1378
|
+
else:
|
|
1379
|
+
task_outputs[tid] = {"task_id": tid, "exit_code": results.get(tid, -1),
|
|
1380
|
+
"backend_result": None}
|
|
1381
|
+
agg_result = _write_aggregation(
|
|
1382
|
+
output_dir, task_outputs, wall_elapsed, args.parallel,
|
|
1383
|
+
mode="headless", file_format=args.file_format,
|
|
1384
|
+
)
|
|
1385
|
+
_print_output_summary(agg_result)
|
|
1386
|
+
|
|
1387
|
+
sys.exit(1 if failed > 0 else 0)
|
|
1388
|
+
|
|
1389
|
+
|
|
1390
|
+
# ---------------------------------------------------------------------------
|
|
1391
|
+
# Tmux mode — data structures
|
|
1392
|
+
# ---------------------------------------------------------------------------
|
|
1393
|
+
|
|
1394
|
+
class TaskStatus(Enum):
|
|
1395
|
+
PENDING = "pending"
|
|
1396
|
+
RUNNING = "running"
|
|
1397
|
+
DONE = "done"
|
|
1398
|
+
FAILED = "failed"
|
|
1399
|
+
|
|
1400
|
+
|
|
1401
|
+
@dataclass
|
|
1402
|
+
class TaskState:
|
|
1403
|
+
task_id: str
|
|
1404
|
+
status: TaskStatus = TaskStatus.PENDING
|
|
1405
|
+
worker_id: int = -1
|
|
1406
|
+
start_time: float = 0.0
|
|
1407
|
+
end_time: float = 0.0
|
|
1408
|
+
exit_code: int = -1
|
|
1409
|
+
sentinel_id: str = ""
|
|
1410
|
+
start_time_iso: str = ""
|
|
1411
|
+
end_time_iso: str = ""
|
|
1412
|
+
|
|
1413
|
+
|
|
1414
|
+
@dataclass
|
|
1415
|
+
class WorkerState:
|
|
1416
|
+
worker_id: int
|
|
1417
|
+
tmux_target: str
|
|
1418
|
+
current_task: str = ""
|
|
1419
|
+
busy: bool = False
|
|
1420
|
+
sentinel_id: str = ""
|
|
1421
|
+
|
|
1422
|
+
|
|
1423
|
+
# ---------------------------------------------------------------------------
|
|
1424
|
+
# Tmux mode — session creation
|
|
1425
|
+
# ---------------------------------------------------------------------------
|
|
1426
|
+
|
|
1427
|
+
def tmux_run(cmd, check=True):
|
|
1428
|
+
"""Run a tmux command, return stdout."""
|
|
1429
|
+
result = subprocess.run(
|
|
1430
|
+
["tmux"] + cmd,
|
|
1431
|
+
capture_output=True,
|
|
1432
|
+
text=True,
|
|
1433
|
+
)
|
|
1434
|
+
if check and result.returncode != 0:
|
|
1435
|
+
raise RuntimeError(f"tmux {' '.join(cmd)} failed: {result.stderr.strip()}")
|
|
1436
|
+
return result.stdout.strip()
|
|
1437
|
+
|
|
1438
|
+
|
|
1439
|
+
def _ansi256_to_tmux_color(code):
|
|
1440
|
+
"""Convert ANSI 256-color code to tmux colour string."""
|
|
1441
|
+
return f"colour{code}"
|
|
1442
|
+
|
|
1443
|
+
|
|
1444
|
+
def update_pane_border_color(worker_target, task_id):
|
|
1445
|
+
"""Set the pane/window border color to match the task's assigned color."""
|
|
1446
|
+
color_code = _task_color_map.get(task_id, 7)
|
|
1447
|
+
tmux_color = _ansi256_to_tmux_color(color_code)
|
|
1448
|
+
tmux_run([
|
|
1449
|
+
"select-pane", "-t", worker_target,
|
|
1450
|
+
"-P", f"border-style=fg={tmux_color}",
|
|
1451
|
+
], check=False)
|
|
1452
|
+
tmux_run([
|
|
1453
|
+
"select-pane", "-t", worker_target,
|
|
1454
|
+
"-T", f"{task_id}",
|
|
1455
|
+
], check=False)
|
|
1456
|
+
|
|
1457
|
+
|
|
1458
|
+
def create_tmux_session(session_name, mode, num_workers, pwd):
|
|
1459
|
+
"""Create tmux session with coordinator + worker windows/panes."""
|
|
1460
|
+
subprocess.run(
|
|
1461
|
+
["tmux", "kill-session", "-t", session_name],
|
|
1462
|
+
capture_output=True,
|
|
1463
|
+
)
|
|
1464
|
+
|
|
1465
|
+
workers = []
|
|
1466
|
+
|
|
1467
|
+
def _setup_status_bar():
|
|
1468
|
+
tmux_run(["set-option", "-t", session_name, "status", "on"], check=False)
|
|
1469
|
+
tmux_run(["set-option", "-t", session_name, "status-style", "bg=black,fg=white"], check=False)
|
|
1470
|
+
tmux_run(["set-option", "-t", session_name, "status-left", f"#[fg=cyan,bold] {session_name} #[default]"], check=False)
|
|
1471
|
+
tmux_run(["set-option", "-t", session_name, "status-left-length", "25"], check=False)
|
|
1472
|
+
tmux_run(["set-option", "-t", session_name, "status-right", "Starting..."], check=False)
|
|
1473
|
+
tmux_run(["set-option", "-t", session_name, "status-right-length", "120"], check=False)
|
|
1474
|
+
tmux_run(["set-option", "-t", session_name, "pane-border-status", "top"], check=False)
|
|
1475
|
+
tmux_run(["set-option", "-t", session_name, "pane-border-format",
|
|
1476
|
+
" #{pane_index}: #{pane_title} "], check=False)
|
|
1477
|
+
tmux_run(["set-option", "-t", session_name, "pane-border-indicators", "colour"], check=False)
|
|
1478
|
+
|
|
1479
|
+
if mode == "windows":
|
|
1480
|
+
tmux_run([
|
|
1481
|
+
"new-session", "-d", "-s", session_name,
|
|
1482
|
+
"-n", "coordinator", "-x", "200", "-y", "50",
|
|
1483
|
+
])
|
|
1484
|
+
tmux_run(["set-option", "-t", session_name, "remain-on-exit", "off"])
|
|
1485
|
+
_setup_status_bar()
|
|
1486
|
+
|
|
1487
|
+
for i in range(num_workers):
|
|
1488
|
+
name = f"worker-{i}"
|
|
1489
|
+
tmux_run(["new-window", "-t", session_name, "-n", name])
|
|
1490
|
+
target = f"{session_name}:{name}"
|
|
1491
|
+
tmux_run(["send-keys", "-t", target, f"cd {shlex.quote(pwd)}", "Enter"])
|
|
1492
|
+
workers.append(WorkerState(worker_id=i, tmux_target=target))
|
|
1493
|
+
|
|
1494
|
+
tmux_run(["select-window", "-t", f"{session_name}:coordinator"])
|
|
1495
|
+
coordinator_target = f"{session_name}:coordinator"
|
|
1496
|
+
|
|
1497
|
+
else: # panes
|
|
1498
|
+
tmux_run([
|
|
1499
|
+
"new-session", "-d", "-s", session_name,
|
|
1500
|
+
"-n", "main", "-x", "200", "-y", "50",
|
|
1501
|
+
])
|
|
1502
|
+
tmux_run(["set-option", "-t", session_name, "remain-on-exit", "off"])
|
|
1503
|
+
# FIX-005: Increase scrollback for panes mode to prevent sentinel eviction
|
|
1504
|
+
tmux_run(["set-option", "-t", session_name, "history-limit", "50000"], check=False)
|
|
1505
|
+
_setup_status_bar()
|
|
1506
|
+
|
|
1507
|
+
base = f"{session_name}:main"
|
|
1508
|
+
|
|
1509
|
+
for i in range(num_workers):
|
|
1510
|
+
tmux_run(["split-window", "-t", base, "-v"])
|
|
1511
|
+
tmux_run(["select-layout", "-t", base, "tiled"])
|
|
1512
|
+
|
|
1513
|
+
for i in range(num_workers):
|
|
1514
|
+
pane_idx = i + 1
|
|
1515
|
+
target = f"{base}.{pane_idx}"
|
|
1516
|
+
tmux_run(["send-keys", "-t", target, f"cd {shlex.quote(pwd)}", "Enter"])
|
|
1517
|
+
workers.append(WorkerState(worker_id=i, tmux_target=target))
|
|
1518
|
+
|
|
1519
|
+
coordinator_target = f"{base}.0"
|
|
1520
|
+
|
|
1521
|
+
return coordinator_target, workers
|
|
1522
|
+
|
|
1523
|
+
|
|
1524
|
+
# ---------------------------------------------------------------------------
|
|
1525
|
+
# Tmux mode — command building & dispatch
|
|
1526
|
+
# ---------------------------------------------------------------------------
|
|
1527
|
+
|
|
1528
|
+
def write_runner_script(task_id, pwd, prompt_file_path, session_name_short,
|
|
1529
|
+
output_dir=None, service="claude", model=":sonnet",
|
|
1530
|
+
file_format=""):
|
|
1531
|
+
"""Write prompt file + bash runner script for a task."""
|
|
1532
|
+
prompt = ""
|
|
1533
|
+
|
|
1534
|
+
if prompt_file_path:
|
|
1535
|
+
try:
|
|
1536
|
+
extra = Path(prompt_file_path).read_text(encoding="utf-8")
|
|
1537
|
+
extra = extra.replace("{{task_id}}", task_id)
|
|
1538
|
+
extra = extra.replace("{{item}}", _item_map.get(task_id, task_id))
|
|
1539
|
+
extra = extra.replace("{{file_format}}", file_format)
|
|
1540
|
+
prompt += "\n\n---\n\n" + extra
|
|
1541
|
+
except Exception:
|
|
1542
|
+
pass
|
|
1543
|
+
|
|
1544
|
+
sentinel_id = uuid.uuid4().hex[:12]
|
|
1545
|
+
|
|
1546
|
+
tmp = _tmp_dir(session_name_short)
|
|
1547
|
+
tmp.mkdir(parents=True, exist_ok=True)
|
|
1548
|
+
|
|
1549
|
+
prompt_path = tmp / f"prompt_{task_id}.txt"
|
|
1550
|
+
prompt_path.write_text(prompt)
|
|
1551
|
+
|
|
1552
|
+
env_exports = _generate_env_exports()
|
|
1553
|
+
|
|
1554
|
+
env_path = tmp / f"env_{task_id}.sh"
|
|
1555
|
+
env_path.write_text(env_exports + "\n")
|
|
1556
|
+
|
|
1557
|
+
runner_path = tmp / f"run_{task_id}.sh"
|
|
1558
|
+
runner_path.write_text(textwrap.dedent("""\
|
|
1559
|
+
#!/bin/bash
|
|
1560
|
+
source %(env_path)s
|
|
1561
|
+
cd %(pwd)s
|
|
1562
|
+
juno-code -b shell -s %(service)s -m %(model)s -i 1 -v --no-hooks \\
|
|
1563
|
+
-f %(prompt_path)s
|
|
1564
|
+
echo "___DONE_%(sentinel_id)s_${?}___"
|
|
1565
|
+
""") % {
|
|
1566
|
+
"env_path": shlex.quote(str(env_path)),
|
|
1567
|
+
"pwd": shlex.quote(pwd),
|
|
1568
|
+
"prompt_path": shlex.quote(str(prompt_path)),
|
|
1569
|
+
"sentinel_id": sentinel_id,
|
|
1570
|
+
"service": shlex.quote(service),
|
|
1571
|
+
"model": shlex.quote(model),
|
|
1572
|
+
})
|
|
1573
|
+
|
|
1574
|
+
return str(runner_path), sentinel_id
|
|
1575
|
+
|
|
1576
|
+
|
|
1577
|
+
def dispatch_task(worker, task_id, task_state, pwd, prompt_file_path,
|
|
1578
|
+
session_name_short, output_dir=None, service="claude",
|
|
1579
|
+
model=":sonnet", file_format=""):
|
|
1580
|
+
"""Send a task command to a worker's tmux pane/window."""
|
|
1581
|
+
runner_path, sentinel_id = write_runner_script(
|
|
1582
|
+
task_id, pwd, prompt_file_path, session_name_short, output_dir,
|
|
1583
|
+
service, model, file_format)
|
|
1584
|
+
|
|
1585
|
+
# FIX-003: Stop old pipe-pane explicitly before starting new one
|
|
1586
|
+
tmux_run(["pipe-pane", "-t", worker.tmux_target], check=False)
|
|
1587
|
+
time.sleep(0.1)
|
|
1588
|
+
|
|
1589
|
+
task_log = str(LOG_DIR / f"task_{task_id}.log")
|
|
1590
|
+
helper = str(_tmp_dir(session_name_short) / "log_pipe.py")
|
|
1591
|
+
color_code = str(_task_color_map.get(task_id, 7))
|
|
1592
|
+
tmux_run([
|
|
1593
|
+
"pipe-pane", "-t", worker.tmux_target,
|
|
1594
|
+
"-o", "python3 -u %s %s %s %s %s" % (
|
|
1595
|
+
shlex.quote(helper),
|
|
1596
|
+
shlex.quote(task_id),
|
|
1597
|
+
shlex.quote(task_log),
|
|
1598
|
+
shlex.quote(str(COMBINED_LOG)),
|
|
1599
|
+
shlex.quote(color_code),
|
|
1600
|
+
),
|
|
1601
|
+
])
|
|
1602
|
+
|
|
1603
|
+
update_pane_border_color(worker.tmux_target, task_id)
|
|
1604
|
+
|
|
1605
|
+
tmux_run(["send-keys", "-t", worker.tmux_target, f"bash {shlex.quote(runner_path)}", "Enter"])
|
|
1606
|
+
|
|
1607
|
+
worker.busy = True
|
|
1608
|
+
worker.current_task = task_id
|
|
1609
|
+
worker.sentinel_id = sentinel_id
|
|
1610
|
+
task_state.status = TaskStatus.RUNNING
|
|
1611
|
+
task_state.worker_id = worker.worker_id
|
|
1612
|
+
task_state.start_time = time.monotonic()
|
|
1613
|
+
task_state.start_time_iso = datetime.now().isoformat()
|
|
1614
|
+
task_state.sentinel_id = sentinel_id
|
|
1615
|
+
|
|
1616
|
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
1617
|
+
tag = _colored_tag(task_id)
|
|
1618
|
+
with open(COMBINED_LOG, "a") as f:
|
|
1619
|
+
f.write(f"[{timestamp}] {tag} Dispatched to worker-{worker.worker_id}\n")
|
|
1620
|
+
|
|
1621
|
+
|
|
1622
|
+
# ---------------------------------------------------------------------------
|
|
1623
|
+
# Tmux mode — completion detection
|
|
1624
|
+
# ---------------------------------------------------------------------------
|
|
1625
|
+
|
|
1626
|
+
_SENTINEL_RE = re.compile(r"___DONE_([a-f0-9]+)_(\d+)___")
|
|
1627
|
+
|
|
1628
|
+
|
|
1629
|
+
def check_worker_done(worker):
|
|
1630
|
+
"""Check if a worker's current task has finished by looking for the sentinel."""
|
|
1631
|
+
if not worker.busy:
|
|
1632
|
+
return False, -1
|
|
1633
|
+
|
|
1634
|
+
try:
|
|
1635
|
+
# FIX-004: -J joins wrapped lines (prevents sentinel wrap in narrow panes)
|
|
1636
|
+
output = tmux_run([
|
|
1637
|
+
"capture-pane", "-t", worker.tmux_target,
|
|
1638
|
+
"-p", "-J", "-S", "-20",
|
|
1639
|
+
], check=False)
|
|
1640
|
+
except Exception:
|
|
1641
|
+
return False, -1
|
|
1642
|
+
|
|
1643
|
+
for match in _SENTINEL_RE.finditer(output):
|
|
1644
|
+
if match.group(1) == worker.sentinel_id:
|
|
1645
|
+
exit_code = int(match.group(2))
|
|
1646
|
+
return True, exit_code
|
|
1647
|
+
|
|
1648
|
+
return False, -1
|
|
1649
|
+
|
|
1650
|
+
|
|
1651
|
+
# ---------------------------------------------------------------------------
|
|
1652
|
+
# Tmux mode — dashboard
|
|
1653
|
+
# ---------------------------------------------------------------------------
|
|
1654
|
+
|
|
1655
|
+
def update_tmux_status_bar(task_states, paused, wall_start, session_name):
|
|
1656
|
+
"""Update tmux bottom status bar with progress summary."""
|
|
1657
|
+
now = time.monotonic()
|
|
1658
|
+
done = sum(1 for t in task_states.values() if t.status == TaskStatus.DONE)
|
|
1659
|
+
failed = sum(1 for t in task_states.values() if t.status == TaskStatus.FAILED)
|
|
1660
|
+
running = sum(1 for t in task_states.values() if t.status == TaskStatus.RUNNING)
|
|
1661
|
+
total = len(task_states)
|
|
1662
|
+
completed = done + failed
|
|
1663
|
+
pct = int(completed / total * 100) if total > 0 else 0
|
|
1664
|
+
elapsed = format_duration(now - wall_start)
|
|
1665
|
+
|
|
1666
|
+
pause_tag = " #[fg=yellow,bold]PAUSED#[default]" if paused else ""
|
|
1667
|
+
fail_tag = f" #[fg=red]{failed}F#[default]" if failed > 0 else ""
|
|
1668
|
+
|
|
1669
|
+
status = (
|
|
1670
|
+
f"#[fg=cyan,bold]Progress:#[default] {completed}/{total} ({pct}%)"
|
|
1671
|
+
f" | #[fg=green]{done}OK#[default]{fail_tag}"
|
|
1672
|
+
f" | #[fg=blue]{running} running#[default]"
|
|
1673
|
+
f" | {elapsed}{pause_tag}"
|
|
1674
|
+
)
|
|
1675
|
+
|
|
1676
|
+
try:
|
|
1677
|
+
tmux_run(["set-option", "-t", session_name, "status-right-length", "120"], check=False)
|
|
1678
|
+
tmux_run(["set-option", "-t", session_name, "status-right", status], check=False)
|
|
1679
|
+
tmux_run(["set-option", "-t", session_name, "status-style", "bg=black,fg=white"], check=False)
|
|
1680
|
+
except Exception:
|
|
1681
|
+
pass
|
|
1682
|
+
|
|
1683
|
+
|
|
1684
|
+
def update_dashboard_file(task_states, workers, paused, wall_start, session_name_short, session_name):
|
|
1685
|
+
"""Write dashboard content to .dashboard_{name} file."""
|
|
1686
|
+
now = time.monotonic()
|
|
1687
|
+
lines = []
|
|
1688
|
+
dashboard_path = _dashboard_file(session_name_short)
|
|
1689
|
+
pause_path = _pause_file(session_name_short)
|
|
1690
|
+
|
|
1691
|
+
pending = sum(1 for t in task_states.values() if t.status == TaskStatus.PENDING)
|
|
1692
|
+
running = sum(1 for t in task_states.values() if t.status == TaskStatus.RUNNING)
|
|
1693
|
+
done = sum(1 for t in task_states.values() if t.status == TaskStatus.DONE)
|
|
1694
|
+
failed = sum(1 for t in task_states.values() if t.status == TaskStatus.FAILED)
|
|
1695
|
+
total = len(task_states)
|
|
1696
|
+
|
|
1697
|
+
other_sessions = _list_running_sessions()
|
|
1698
|
+
total_sessions = len(other_sessions)
|
|
1699
|
+
|
|
1700
|
+
lines.append("=" * 58)
|
|
1701
|
+
session_header = f" PARALLEL RUNNER — {session_name_short.upper()}"
|
|
1702
|
+
if total_sessions > 1:
|
|
1703
|
+
session_header += f" ({total_sessions} sessions active)"
|
|
1704
|
+
lines.append(session_header)
|
|
1705
|
+
lines.append("=" * 58)
|
|
1706
|
+
|
|
1707
|
+
# Recent completions
|
|
1708
|
+
finished = sorted(
|
|
1709
|
+
[t for t in task_states.values() if t.status in (TaskStatus.DONE, TaskStatus.FAILED)],
|
|
1710
|
+
key=lambda t: t.end_time,
|
|
1711
|
+
reverse=True,
|
|
1712
|
+
)[:4]
|
|
1713
|
+
lines.append("")
|
|
1714
|
+
lines.append(" RECENT COMPLETIONS:")
|
|
1715
|
+
if finished:
|
|
1716
|
+
for t in finished:
|
|
1717
|
+
elapsed_t = format_duration(t.end_time - t.start_time) if t.start_time > 0 else "?"
|
|
1718
|
+
dot = _color_for(t.task_id)
|
|
1719
|
+
if t.status == TaskStatus.DONE:
|
|
1720
|
+
status = f"\033[32mOK\033[0m"
|
|
1721
|
+
else:
|
|
1722
|
+
status = f"\033[31mFAIL(exit {t.exit_code})\033[0m"
|
|
1723
|
+
log_path = f"logs/task_{t.task_id}.log"
|
|
1724
|
+
lines.append(f" {dot} {t.task_id}: {status} [{elapsed_t}] -> {log_path}")
|
|
1725
|
+
else:
|
|
1726
|
+
lines.append(" (none yet)")
|
|
1727
|
+
|
|
1728
|
+
# Help
|
|
1729
|
+
lines.append("")
|
|
1730
|
+
if not paused:
|
|
1731
|
+
lines.append(f" Pause: touch {pause_path}")
|
|
1732
|
+
else:
|
|
1733
|
+
lines.append(f" Resume: rm {pause_path}")
|
|
1734
|
+
lines.append(" Logs: .juno_task/scripts/logs/task_<TASK_ID>.log")
|
|
1735
|
+
lines.append(" Detach: Ctrl-b d (orchestrator keeps running)")
|
|
1736
|
+
lines.append(f" Reattach: tmux attach -t {session_name}")
|
|
1737
|
+
lines.append(f" Stop: Ctrl-c (or --stop --name {session_name_short})")
|
|
1738
|
+
|
|
1739
|
+
if paused:
|
|
1740
|
+
lines.append("")
|
|
1741
|
+
lines.append(" \033[33;1m*** PAUSED ***\033[0m")
|
|
1742
|
+
|
|
1743
|
+
# Running workers
|
|
1744
|
+
lines.append("")
|
|
1745
|
+
lines.append(" RUNNING:")
|
|
1746
|
+
running_tasks = [t for t in task_states.values() if t.status == TaskStatus.RUNNING]
|
|
1747
|
+
if running_tasks:
|
|
1748
|
+
for t in running_tasks:
|
|
1749
|
+
elapsed_t = format_duration(now - t.start_time)
|
|
1750
|
+
dot = _color_for(t.task_id)
|
|
1751
|
+
lines.append(f" {dot} worker-{t.worker_id}: {t.task_id} ({elapsed_t})")
|
|
1752
|
+
else:
|
|
1753
|
+
lines.append(" (none)")
|
|
1754
|
+
|
|
1755
|
+
# Progress bar
|
|
1756
|
+
lines.append("")
|
|
1757
|
+
lines.append("-" * 58)
|
|
1758
|
+
elapsed = format_duration(now - wall_start)
|
|
1759
|
+
lines.append(f" Pending: {pending} | Running: {running} | Done: {done} | Failed: {failed} | Total: {total}")
|
|
1760
|
+
if total > 0:
|
|
1761
|
+
completed = done + failed
|
|
1762
|
+
remaining = pending + running
|
|
1763
|
+
pct = int(completed / total * 100)
|
|
1764
|
+
bar_width = 40
|
|
1765
|
+
filled = int(bar_width * completed / total)
|
|
1766
|
+
bar = "\u2588" * filled + "\u2591" * (bar_width - filled)
|
|
1767
|
+
eta_str = ""
|
|
1768
|
+
if completed > 0 and remaining > 0:
|
|
1769
|
+
finished_tasks = [t for t in task_states.values()
|
|
1770
|
+
if t.status in (TaskStatus.DONE, TaskStatus.FAILED)
|
|
1771
|
+
and t.start_time > 0 and t.end_time > 0]
|
|
1772
|
+
if finished_tasks:
|
|
1773
|
+
avg_task = sum(t.end_time - t.start_time for t in finished_tasks) / len(finished_tasks)
|
|
1774
|
+
active_workers = max(len(workers), 1)
|
|
1775
|
+
eta_secs = avg_task * remaining / active_workers
|
|
1776
|
+
eta_str = f" ETA: ~{format_duration(eta_secs)}"
|
|
1777
|
+
lines.append(f" [{bar}] {pct}% ({completed}/{total}) Wall: {elapsed}{eta_str}")
|
|
1778
|
+
lines.append("-" * 58)
|
|
1779
|
+
|
|
1780
|
+
dashboard_path.write_text("\n".join(lines) + "\n")
|
|
1781
|
+
update_tmux_status_bar(task_states, paused, wall_start, session_name)
|
|
1782
|
+
|
|
1783
|
+
|
|
1784
|
+
# ---------------------------------------------------------------------------
|
|
1785
|
+
# Tmux mode — orchestration loop
|
|
1786
|
+
# ---------------------------------------------------------------------------
|
|
1787
|
+
|
|
1788
|
+
def orchestration_loop(task_states, workers, task_queue, pwd, prompt_file_path,
|
|
1789
|
+
wall_start, session_name_short, session_name,
|
|
1790
|
+
output_dir=None, service="claude", model=":sonnet",
|
|
1791
|
+
file_format="", strict=False):
|
|
1792
|
+
"""Main orchestration loop — polls workers, dispatches tasks, updates dashboard."""
|
|
1793
|
+
all_task_ids = list(task_states.keys())
|
|
1794
|
+
pause_path = _pause_file(session_name_short)
|
|
1795
|
+
|
|
1796
|
+
while True:
|
|
1797
|
+
if _shutdown_event.is_set():
|
|
1798
|
+
print(f"[{datetime.now()}] Shutdown signal received, stopping dispatch...",
|
|
1799
|
+
file=sys.stderr)
|
|
1800
|
+
break
|
|
1801
|
+
|
|
1802
|
+
result = subprocess.run(
|
|
1803
|
+
["tmux", "has-session", "-t", session_name],
|
|
1804
|
+
capture_output=True,
|
|
1805
|
+
)
|
|
1806
|
+
if result.returncode != 0:
|
|
1807
|
+
print(f"[{datetime.now()}] Tmux session '{session_name}' gone, shutting down...",
|
|
1808
|
+
file=sys.stderr)
|
|
1809
|
+
break
|
|
1810
|
+
|
|
1811
|
+
paused = pause_path.exists()
|
|
1812
|
+
|
|
1813
|
+
# Check each worker for completion
|
|
1814
|
+
for worker in workers:
|
|
1815
|
+
if not worker.busy:
|
|
1816
|
+
continue
|
|
1817
|
+
|
|
1818
|
+
done, exit_code = check_worker_done(worker)
|
|
1819
|
+
if not done:
|
|
1820
|
+
continue
|
|
1821
|
+
|
|
1822
|
+
task_id = worker.current_task
|
|
1823
|
+
ts = task_states[task_id]
|
|
1824
|
+
ts.end_time = time.monotonic()
|
|
1825
|
+
ts.end_time_iso = datetime.now().isoformat()
|
|
1826
|
+
ts.exit_code = exit_code
|
|
1827
|
+
|
|
1828
|
+
if exit_code == 0:
|
|
1829
|
+
ts.status = TaskStatus.DONE
|
|
1830
|
+
else:
|
|
1831
|
+
ts.status = TaskStatus.FAILED
|
|
1832
|
+
|
|
1833
|
+
worker.busy = False
|
|
1834
|
+
worker.current_task = ""
|
|
1835
|
+
worker.sentinel_id = ""
|
|
1836
|
+
|
|
1837
|
+
elapsed = ts.end_time - ts.start_time
|
|
1838
|
+
status_str = "OK" if exit_code == 0 else f"FAILED (exit {exit_code})"
|
|
1839
|
+
|
|
1840
|
+
# FIX-002: Parse result from log, with capture-pane fallback
|
|
1841
|
+
task_log_path = LOG_DIR / f"task_{task_id}.log"
|
|
1842
|
+
backend_result = _parse_result_from_log(task_log_path) if output_dir else None
|
|
1843
|
+
|
|
1844
|
+
if output_dir and backend_result is None:
|
|
1845
|
+
try:
|
|
1846
|
+
scrollback = tmux_run([
|
|
1847
|
+
"capture-pane", "-t", worker.tmux_target,
|
|
1848
|
+
"-p", "-S", "-",
|
|
1849
|
+
], check=False)
|
|
1850
|
+
if scrollback:
|
|
1851
|
+
ansi_re = re.compile(r'\x1b\[[0-9;]*[a-zA-Z]')
|
|
1852
|
+
clean = ansi_re.sub('', scrollback)
|
|
1853
|
+
task_log_path.write_text(clean, encoding="utf-8")
|
|
1854
|
+
backend_result = _parse_result_from_log(task_log_path)
|
|
1855
|
+
except Exception:
|
|
1856
|
+
pass
|
|
1857
|
+
|
|
1858
|
+
if strict and file_format and output_dir:
|
|
1859
|
+
exit_code = _extract_strict_output(
|
|
1860
|
+
task_id, task_log_path, output_dir, file_format, exit_code,
|
|
1861
|
+
)
|
|
1862
|
+
ts.exit_code = exit_code
|
|
1863
|
+
if exit_code != 0 and ts.status != TaskStatus.FAILED:
|
|
1864
|
+
ts.status = TaskStatus.FAILED
|
|
1865
|
+
|
|
1866
|
+
if output_dir:
|
|
1867
|
+
_write_task_output(
|
|
1868
|
+
output_dir, task_id, exit_code, elapsed,
|
|
1869
|
+
ts.start_time_iso, ts.end_time_iso, ts.worker_id,
|
|
1870
|
+
backend_result=backend_result, file_format=file_format,
|
|
1871
|
+
)
|
|
1872
|
+
|
|
1873
|
+
# Clean up temp files
|
|
1874
|
+
tmp = _tmp_dir(session_name_short)
|
|
1875
|
+
for tmp_f in [tmp / f"prompt_{task_id}.txt", tmp / f"run_{task_id}.sh",
|
|
1876
|
+
tmp / f"env_{task_id}.sh"]:
|
|
1877
|
+
try:
|
|
1878
|
+
tmp_f.unlink(missing_ok=True)
|
|
1879
|
+
except OSError:
|
|
1880
|
+
pass
|
|
1881
|
+
|
|
1882
|
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
1883
|
+
tag = _colored_tag(task_id)
|
|
1884
|
+
with open(COMBINED_LOG, "a") as f:
|
|
1885
|
+
f.write(
|
|
1886
|
+
f"[{timestamp}] {tag} Finished - {status_str} "
|
|
1887
|
+
f"({format_duration(elapsed)})\n"
|
|
1888
|
+
)
|
|
1889
|
+
|
|
1890
|
+
# Dispatch new tasks to free workers
|
|
1891
|
+
if not paused and not _shutdown_event.is_set():
|
|
1892
|
+
for worker in workers:
|
|
1893
|
+
if worker.busy:
|
|
1894
|
+
continue
|
|
1895
|
+
if not task_queue:
|
|
1896
|
+
break
|
|
1897
|
+
next_task_id = task_queue.popleft()
|
|
1898
|
+
dispatch_task(
|
|
1899
|
+
worker, next_task_id, task_states[next_task_id],
|
|
1900
|
+
pwd, prompt_file_path, session_name_short, output_dir,
|
|
1901
|
+
service, model, file_format,
|
|
1902
|
+
)
|
|
1903
|
+
|
|
1904
|
+
# Update dashboard
|
|
1905
|
+
update_dashboard_file(task_states, workers, paused, wall_start,
|
|
1906
|
+
session_name_short, session_name)
|
|
1907
|
+
|
|
1908
|
+
# Check if all done
|
|
1909
|
+
all_done = all(
|
|
1910
|
+
ts.status in (TaskStatus.DONE, TaskStatus.FAILED)
|
|
1911
|
+
for ts in task_states.values()
|
|
1912
|
+
)
|
|
1913
|
+
if all_done:
|
|
1914
|
+
break
|
|
1915
|
+
|
|
1916
|
+
if _shutdown_event.is_set():
|
|
1917
|
+
any_busy = any(w.busy for w in workers)
|
|
1918
|
+
if not any_busy:
|
|
1919
|
+
break
|
|
1920
|
+
|
|
1921
|
+
time.sleep(2)
|
|
1922
|
+
|
|
1923
|
+
# Close all pipe-panes
|
|
1924
|
+
for worker in workers:
|
|
1925
|
+
tmux_run(["pipe-pane", "-t", worker.tmux_target], check=False)
|
|
1926
|
+
|
|
1927
|
+
shutil.rmtree(str(_tmp_dir(session_name_short)), ignore_errors=True)
|
|
1928
|
+
|
|
1929
|
+
# Final dashboard update
|
|
1930
|
+
update_dashboard_file(task_states, workers, False, wall_start,
|
|
1931
|
+
session_name_short, session_name)
|
|
1932
|
+
|
|
1933
|
+
# Build results
|
|
1934
|
+
results = {}
|
|
1935
|
+
task_times = {}
|
|
1936
|
+
for tid, ts in task_states.items():
|
|
1937
|
+
results[tid] = ts.exit_code
|
|
1938
|
+
if ts.start_time > 0 and ts.end_time > 0:
|
|
1939
|
+
task_times[tid] = ts.end_time - ts.start_time
|
|
1940
|
+
|
|
1941
|
+
wall_elapsed = time.monotonic() - wall_start
|
|
1942
|
+
|
|
1943
|
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
1944
|
+
ok = sum(1 for ts in task_states.values() if ts.status == TaskStatus.DONE)
|
|
1945
|
+
failed = sum(1 for ts in task_states.values() if ts.status == TaskStatus.FAILED)
|
|
1946
|
+
|
|
1947
|
+
summary_lines = [
|
|
1948
|
+
"",
|
|
1949
|
+
"=" * 60,
|
|
1950
|
+
"RESULTS",
|
|
1951
|
+
"=" * 60,
|
|
1952
|
+
]
|
|
1953
|
+
for tid in all_task_ids:
|
|
1954
|
+
rc = results.get(tid, -1)
|
|
1955
|
+
el = task_times.get(tid, 0)
|
|
1956
|
+
st = "OK" if rc == 0 else f"FAILED (exit {rc})"
|
|
1957
|
+
tag = _colored_tag(tid)
|
|
1958
|
+
summary_lines.append(f" {tag} {st} [{format_duration(el)}]")
|
|
1959
|
+
|
|
1960
|
+
summary_lines.extend([
|
|
1961
|
+
"-" * 60,
|
|
1962
|
+
"STATS",
|
|
1963
|
+
f" Total tasks: {len(task_states)}",
|
|
1964
|
+
f" Succeeded: {ok}",
|
|
1965
|
+
f" Failed: {failed}",
|
|
1966
|
+
f" Wall time: {format_duration(wall_elapsed)}",
|
|
1967
|
+
])
|
|
1968
|
+
if task_times:
|
|
1969
|
+
avg = sum(task_times.values()) / len(task_times)
|
|
1970
|
+
fastest = min(task_times.values())
|
|
1971
|
+
slowest = max(task_times.values())
|
|
1972
|
+
summary_lines.extend([
|
|
1973
|
+
f" Avg per task: {format_duration(avg)}",
|
|
1974
|
+
f" Fastest task: {format_duration(fastest)}",
|
|
1975
|
+
f" Slowest task: {format_duration(slowest)}",
|
|
1976
|
+
])
|
|
1977
|
+
summary_lines.extend([
|
|
1978
|
+
f" Run ID: {_run_id}",
|
|
1979
|
+
f" Per-task logs: {LOG_DIR}/task_<TASK_ID>.log",
|
|
1980
|
+
"=" * 60,
|
|
1981
|
+
])
|
|
1982
|
+
|
|
1983
|
+
with open(COMBINED_LOG, "a") as f:
|
|
1984
|
+
for line in summary_lines:
|
|
1985
|
+
f.write(f"[{timestamp}] {line}\n")
|
|
1986
|
+
|
|
1987
|
+
dashboard_path = _dashboard_file(session_name_short)
|
|
1988
|
+
with open(dashboard_path, "a") as f:
|
|
1989
|
+
f.write("\n")
|
|
1990
|
+
for line in summary_lines:
|
|
1991
|
+
f.write(f" {line}\n")
|
|
1992
|
+
|
|
1993
|
+
# Write aggregation
|
|
1994
|
+
if output_dir:
|
|
1995
|
+
task_outputs = {}
|
|
1996
|
+
for tid in all_task_ids:
|
|
1997
|
+
cap = _task_output_path(output_dir, tid)
|
|
1998
|
+
if cap.exists():
|
|
1999
|
+
try:
|
|
2000
|
+
task_outputs[tid] = json.loads(cap.read_text(encoding="utf-8"))
|
|
2001
|
+
except (json.JSONDecodeError, OSError):
|
|
2002
|
+
task_outputs[tid] = {"task_id": tid, "exit_code": results.get(tid, -1),
|
|
2003
|
+
"backend_result": None}
|
|
2004
|
+
else:
|
|
2005
|
+
task_outputs[tid] = {"task_id": tid, "exit_code": results.get(tid, -1),
|
|
2006
|
+
"backend_result": None}
|
|
2007
|
+
agg_result = _write_aggregation(
|
|
2008
|
+
output_dir, task_outputs, wall_elapsed, len(workers),
|
|
2009
|
+
mode=f"tmux/{session_name}", session_name=session_name_short,
|
|
2010
|
+
file_format=file_format,
|
|
2011
|
+
)
|
|
2012
|
+
summary_text = _format_output_summary(agg_result)
|
|
2013
|
+
with open(COMBINED_LOG, "a") as f:
|
|
2014
|
+
f.write(f"[{timestamp}] {summary_text}\n")
|
|
2015
|
+
with open(dashboard_path, "a") as f:
|
|
2016
|
+
f.write(f"\n{summary_text}\n")
|
|
2017
|
+
|
|
2018
|
+
# Tmux notification
|
|
2019
|
+
try:
|
|
2020
|
+
tmux_run([
|
|
2021
|
+
"display-message", "-t", session_name,
|
|
2022
|
+
f"All {len(task_states)} tasks complete! ({ok} OK, {failed} failed)",
|
|
2023
|
+
], check=False)
|
|
2024
|
+
except Exception:
|
|
2025
|
+
pass
|
|
2026
|
+
|
|
2027
|
+
return 1 if failed > 0 else 0
|
|
2028
|
+
|
|
2029
|
+
|
|
2030
|
+
# ---------------------------------------------------------------------------
|
|
2031
|
+
# Tmux mode — entry point
|
|
2032
|
+
# ---------------------------------------------------------------------------
|
|
2033
|
+
|
|
2034
|
+
def run_tmux_mode(args, pwd, prompt_file_path, output_dir, service, model):
|
|
2035
|
+
"""Set up tmux session and run orchestrator."""
|
|
2036
|
+
num_workers = args.parallel
|
|
2037
|
+
mode = args.tmux
|
|
2038
|
+
|
|
2039
|
+
session_name_short = args.name if args.name else _next_batch_name()
|
|
2040
|
+
session_name = _session_name_to_tmux(session_name_short)
|
|
2041
|
+
|
|
2042
|
+
if mode == "panes" and num_workers > 5:
|
|
2043
|
+
print(
|
|
2044
|
+
f"WARNING: --parallel {num_workers} with panes mode may make panes too small. "
|
|
2045
|
+
f"Consider using 'windows' mode or reducing --parallel to 5.",
|
|
2046
|
+
file=sys.stderr,
|
|
2047
|
+
)
|
|
2048
|
+
|
|
2049
|
+
_log_base.mkdir(parents=True, exist_ok=True)
|
|
2050
|
+
LOG_DIR.mkdir(parents=True, exist_ok=True)
|
|
2051
|
+
|
|
2052
|
+
# Kill stale orchestrator daemon for THIS session name only
|
|
2053
|
+
pid_path = _pid_file(session_name_short)
|
|
2054
|
+
if pid_path.exists():
|
|
2055
|
+
try:
|
|
2056
|
+
old_pid = int(pid_path.read_text().strip())
|
|
2057
|
+
os.kill(old_pid, signal.SIGTERM)
|
|
2058
|
+
print(f"Killed stale orchestrator for '{session_name_short}' (PID {old_pid})")
|
|
2059
|
+
time.sleep(0.3)
|
|
2060
|
+
except (ValueError, ProcessLookupError, PermissionError):
|
|
2061
|
+
pass
|
|
2062
|
+
|
|
2063
|
+
for f in [_dashboard_file(session_name_short), _pause_file(session_name_short), pid_path]:
|
|
2064
|
+
if f.exists():
|
|
2065
|
+
f.unlink()
|
|
2066
|
+
|
|
2067
|
+
# Log startup
|
|
2068
|
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
2069
|
+
with open(COMBINED_LOG, "a") as f:
|
|
2070
|
+
f.write(f"[{timestamp}] {'=' * 60}\n")
|
|
2071
|
+
f.write(f"[{timestamp}] Starting parallel task execution (tmux {mode} mode)\n")
|
|
2072
|
+
f.write(f"[{timestamp}] Run ID: {_run_id}\n")
|
|
2073
|
+
f.write(f"[{timestamp}] Session: {session_name} (name: {session_name_short})\n")
|
|
2074
|
+
f.write(f"[{timestamp}] PWD: {pwd}\n")
|
|
2075
|
+
f.write(f"[{timestamp}] Tasks ({len(args.kanban)}): {', '.join(args.kanban)}\n")
|
|
2076
|
+
if any(tid.startswith("item-") for tid in args.kanban):
|
|
2077
|
+
for tid in args.kanban[:3]:
|
|
2078
|
+
f.write(f"[{timestamp}] {tid} -> {_item_map[tid][:80]}\n")
|
|
2079
|
+
if len(args.kanban) > 3:
|
|
2080
|
+
f.write(f"[{timestamp}] ... and {len(args.kanban) - 3} more\n")
|
|
2081
|
+
f.write(f"[{timestamp}] Parallelism: {num_workers}\n")
|
|
2082
|
+
f.write(f"[{timestamp}] Service: {service} | Model: {model}\n")
|
|
2083
|
+
if prompt_file_path:
|
|
2084
|
+
f.write(f"[{timestamp}] Prompt file: {prompt_file_path} (re-read per task)\n")
|
|
2085
|
+
if output_dir:
|
|
2086
|
+
f.write(f"[{timestamp}] Output dir: {output_dir}\n")
|
|
2087
|
+
f.write(f"[{timestamp}] {'=' * 60}\n")
|
|
2088
|
+
|
|
2089
|
+
_write_log_pipe_helper(session_name_short)
|
|
2090
|
+
|
|
2091
|
+
print(f"Creating tmux session '{session_name}' ({mode} mode, {num_workers} workers)...")
|
|
2092
|
+
|
|
2093
|
+
coordinator_target, workers = create_tmux_session(session_name, mode, num_workers, pwd)
|
|
2094
|
+
|
|
2095
|
+
task_states = {}
|
|
2096
|
+
for tid in args.kanban:
|
|
2097
|
+
task_states[tid] = TaskState(task_id=tid)
|
|
2098
|
+
task_queue = deque(args.kanban)
|
|
2099
|
+
|
|
2100
|
+
wall_start = time.monotonic()
|
|
2101
|
+
|
|
2102
|
+
# Start coordinator dashboard
|
|
2103
|
+
pid_path_str = shlex.quote(str(pid_path))
|
|
2104
|
+
dashboard_file_str = shlex.quote(str(_dashboard_file(session_name_short)))
|
|
2105
|
+
dashboard_cmd = (
|
|
2106
|
+
f"trap 'kill $(cat {pid_path_str}) 2>/dev/null; exit' INT; "
|
|
2107
|
+
f"while true; do printf '\\033[H'; cat {dashboard_file_str} 2>/dev/null "
|
|
2108
|
+
f"|| echo 'Waiting for dashboard...'; printf '\\033[J'; sleep 2; done"
|
|
2109
|
+
)
|
|
2110
|
+
tmux_run(["send-keys", "-t", coordinator_target, dashboard_cmd, "Enter"])
|
|
2111
|
+
|
|
2112
|
+
update_dashboard_file(task_states, workers, False, wall_start,
|
|
2113
|
+
session_name_short, session_name)
|
|
2114
|
+
|
|
2115
|
+
# Fork: child = orchestrator daemon, parent = tmux attach
|
|
2116
|
+
pid = os.fork()
|
|
2117
|
+
|
|
2118
|
+
if pid == 0:
|
|
2119
|
+
# Child process — orchestrator daemon
|
|
2120
|
+
try:
|
|
2121
|
+
os.setsid()
|
|
2122
|
+
except OSError:
|
|
2123
|
+
pass
|
|
2124
|
+
|
|
2125
|
+
pid_path.write_text(str(os.getpid()))
|
|
2126
|
+
|
|
2127
|
+
log_fd = os.open(
|
|
2128
|
+
str(_orchestrator_log(session_name_short)),
|
|
2129
|
+
os.O_WRONLY | os.O_CREAT | os.O_APPEND,
|
|
2130
|
+
)
|
|
2131
|
+
os.dup2(log_fd, 1)
|
|
2132
|
+
os.dup2(log_fd, 2)
|
|
2133
|
+
os.close(log_fd)
|
|
2134
|
+
|
|
2135
|
+
signal.signal(signal.SIGHUP, signal.SIG_IGN)
|
|
2136
|
+
|
|
2137
|
+
def _shutdown_handler(signum, frame):
|
|
2138
|
+
sig_name = "SIGTERM" if signum == signal.SIGTERM else "SIGINT"
|
|
2139
|
+
print(f"[{datetime.now()}] Received {sig_name}, initiating graceful shutdown...",
|
|
2140
|
+
file=sys.stderr)
|
|
2141
|
+
_shutdown_event.set()
|
|
2142
|
+
|
|
2143
|
+
signal.signal(signal.SIGTERM, _shutdown_handler)
|
|
2144
|
+
signal.signal(signal.SIGINT, _shutdown_handler)
|
|
2145
|
+
|
|
2146
|
+
try:
|
|
2147
|
+
for worker in workers:
|
|
2148
|
+
if not task_queue:
|
|
2149
|
+
break
|
|
2150
|
+
next_task_id = task_queue.popleft()
|
|
2151
|
+
file_format = getattr(args, "file_format", "") or ""
|
|
2152
|
+
dispatch_task(
|
|
2153
|
+
worker, next_task_id, task_states[next_task_id],
|
|
2154
|
+
pwd, prompt_file_path, session_name_short, output_dir,
|
|
2155
|
+
service, model, file_format,
|
|
2156
|
+
)
|
|
2157
|
+
|
|
2158
|
+
update_dashboard_file(task_states, workers, False, wall_start,
|
|
2159
|
+
session_name_short, session_name)
|
|
2160
|
+
|
|
2161
|
+
file_format = getattr(args, "file_format", "") or ""
|
|
2162
|
+
strict = getattr(args, "strict", False)
|
|
2163
|
+
exit_code = orchestration_loop(
|
|
2164
|
+
task_states, workers, task_queue,
|
|
2165
|
+
pwd, prompt_file_path, wall_start,
|
|
2166
|
+
session_name_short, session_name, output_dir,
|
|
2167
|
+
service, model, file_format, strict,
|
|
2168
|
+
)
|
|
2169
|
+
except Exception:
|
|
2170
|
+
import traceback
|
|
2171
|
+
traceback.print_exc()
|
|
2172
|
+
exit_code = 1
|
|
2173
|
+
|
|
2174
|
+
if pid_path.exists():
|
|
2175
|
+
try:
|
|
2176
|
+
pid_path.unlink()
|
|
2177
|
+
except OSError:
|
|
2178
|
+
pass
|
|
2179
|
+
|
|
2180
|
+
os._exit(exit_code)
|
|
2181
|
+
|
|
2182
|
+
else:
|
|
2183
|
+
# Parent process — attach to tmux session
|
|
2184
|
+
print(f"Orchestrator daemon started (PID {pid})")
|
|
2185
|
+
print(f"Run ID: {_run_id}")
|
|
2186
|
+
print(f"Session: {session_name} (name: {session_name_short})")
|
|
2187
|
+
print(f"Tasks: {', '.join(args.kanban)}")
|
|
2188
|
+
print(f"Workers: {num_workers}")
|
|
2189
|
+
print(f"Logs: {LOG_DIR}/")
|
|
2190
|
+
print(f"Pause: touch {_pause_file(session_name_short)}")
|
|
2191
|
+
print(f"Stop: --stop --name {session_name_short}")
|
|
2192
|
+
print(f"Attaching to tmux session...")
|
|
2193
|
+
print()
|
|
2194
|
+
|
|
2195
|
+
time.sleep(0.5)
|
|
2196
|
+
|
|
2197
|
+
os.execvp("tmux", ["tmux", "attach-session", "-t", session_name])
|
|
2198
|
+
|
|
2199
|
+
|
|
2200
|
+
# ---------------------------------------------------------------------------
|
|
2201
|
+
# Main entry point
|
|
2202
|
+
# ---------------------------------------------------------------------------
|
|
2203
|
+
|
|
2204
|
+
def main():
|
|
2205
|
+
global LOG_DIR, COMBINED_LOG, _run_id
|
|
2206
|
+
|
|
2207
|
+
args = parse_args()
|
|
2208
|
+
|
|
2209
|
+
if args.stop_all:
|
|
2210
|
+
run_stop_all()
|
|
2211
|
+
return
|
|
2212
|
+
if args.stop:
|
|
2213
|
+
run_stop(args)
|
|
2214
|
+
return
|
|
2215
|
+
|
|
2216
|
+
pwd = os.getcwd()
|
|
2217
|
+
|
|
2218
|
+
# Generate run ID and compute per-run LOG_DIR
|
|
2219
|
+
_run_id = ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))
|
|
2220
|
+
date_str = datetime.now().strftime("%Y-%m-%d")
|
|
2221
|
+
LOG_DIR = _log_base / date_str / _run_id
|
|
2222
|
+
COMBINED_LOG = LOG_DIR / "parallel_runner.log"
|
|
2223
|
+
|
|
2224
|
+
# Assign colors
|
|
2225
|
+
for i, tid in enumerate(args.kanban):
|
|
2226
|
+
_task_color_map[tid] = _TASK_COLORS[i % len(_TASK_COLORS)]
|
|
2227
|
+
|
|
2228
|
+
service, model = _resolve_service_model(args)
|
|
2229
|
+
prompt_file_path = resolve_prompt_file(args, pwd)
|
|
2230
|
+
output_dir = _resolve_output_dir(args)
|
|
2231
|
+
|
|
2232
|
+
_log_base.mkdir(parents=True, exist_ok=True)
|
|
2233
|
+
LOG_DIR.mkdir(parents=True, exist_ok=True)
|
|
2234
|
+
|
|
2235
|
+
if args.tmux:
|
|
2236
|
+
run_tmux_mode(args, pwd, prompt_file_path, output_dir, service, model)
|
|
2237
|
+
else:
|
|
2238
|
+
run_headless_mode(args, pwd, prompt_file_path, output_dir, service, model)
|
|
2239
|
+
|
|
2240
|
+
|
|
2241
|
+
if __name__ == "__main__":
|
|
2242
|
+
main()
|