quadwork 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/README.md +189 -82
  2. package/bin/quadwork.js +82 -0
  3. package/out/404.html +1 -1
  4. package/out/__next.__PAGE__.txt +3 -3
  5. package/out/__next._full.txt +12 -12
  6. package/out/__next._head.txt +4 -4
  7. package/out/__next._index.txt +6 -6
  8. package/out/__next._tree.txt +2 -2
  9. package/out/_next/static/chunks/{134b1p_egmf1c.js → 0-y13tz~pmpno.js} +1 -1
  10. package/out/_next/static/chunks/{0swlbn4q4u71z.js → 0.9m84as-sc_r.js} +14 -14
  11. package/out/_next/static/chunks/05.po0c1knrbu.css +2 -0
  12. package/out/_next/static/chunks/084lff9v4p_vh.js +1 -0
  13. package/out/_next/static/chunks/{0md7hgvwnovzq.js → 0e.ktwt1nyj...js} +1 -1
  14. package/out/_next/static/chunks/{0e~ue9ca5zrep.js → 0za4cvk8.n0-y.js} +1 -1
  15. package/out/_next/static/chunks/17y2walb2um9w.js +1 -0
  16. package/out/_not-found/__next._full.txt +11 -11
  17. package/out/_not-found/__next._head.txt +4 -4
  18. package/out/_not-found/__next._index.txt +6 -6
  19. package/out/_not-found/__next._not-found.__PAGE__.txt +2 -2
  20. package/out/_not-found/__next._not-found.txt +3 -3
  21. package/out/_not-found/__next._tree.txt +2 -2
  22. package/out/_not-found.html +1 -1
  23. package/out/_not-found.txt +11 -11
  24. package/out/app-shell/__next._full.txt +11 -11
  25. package/out/app-shell/__next._head.txt +4 -4
  26. package/out/app-shell/__next._index.txt +6 -6
  27. package/out/app-shell/__next._tree.txt +2 -2
  28. package/out/app-shell/__next.app-shell.__PAGE__.txt +2 -2
  29. package/out/app-shell/__next.app-shell.txt +3 -3
  30. package/out/app-shell.html +1 -1
  31. package/out/app-shell.txt +11 -11
  32. package/out/index.html +1 -1
  33. package/out/index.txt +12 -12
  34. package/out/project/_/__next._full.txt +12 -12
  35. package/out/project/_/__next._head.txt +4 -4
  36. package/out/project/_/__next._index.txt +6 -6
  37. package/out/project/_/__next._tree.txt +2 -2
  38. package/out/project/_/__next.project.$d$id.__PAGE__.txt +3 -3
  39. package/out/project/_/__next.project.$d$id.txt +3 -3
  40. package/out/project/_/__next.project.txt +3 -3
  41. package/out/project/_/memory/__next._full.txt +12 -12
  42. package/out/project/_/memory/__next._head.txt +4 -4
  43. package/out/project/_/memory/__next._index.txt +6 -6
  44. package/out/project/_/memory/__next._tree.txt +2 -2
  45. package/out/project/_/memory/__next.project.$d$id.memory.__PAGE__.txt +3 -3
  46. package/out/project/_/memory/__next.project.$d$id.memory.txt +3 -3
  47. package/out/project/_/memory/__next.project.$d$id.txt +3 -3
  48. package/out/project/_/memory/__next.project.txt +3 -3
  49. package/out/project/_/memory.html +1 -1
  50. package/out/project/_/memory.txt +12 -12
  51. package/out/project/_/queue/__next._full.txt +12 -12
  52. package/out/project/_/queue/__next._head.txt +4 -4
  53. package/out/project/_/queue/__next._index.txt +6 -6
  54. package/out/project/_/queue/__next._tree.txt +2 -2
  55. package/out/project/_/queue/__next.project.$d$id.queue.__PAGE__.txt +3 -3
  56. package/out/project/_/queue/__next.project.$d$id.queue.txt +3 -3
  57. package/out/project/_/queue/__next.project.$d$id.txt +3 -3
  58. package/out/project/_/queue/__next.project.txt +3 -3
  59. package/out/project/_/queue.html +1 -1
  60. package/out/project/_/queue.txt +12 -12
  61. package/out/project/_.html +1 -1
  62. package/out/project/_.txt +12 -12
  63. package/out/settings/__next._full.txt +12 -12
  64. package/out/settings/__next._head.txt +4 -4
  65. package/out/settings/__next._index.txt +6 -6
  66. package/out/settings/__next._tree.txt +2 -2
  67. package/out/settings/__next.settings.__PAGE__.txt +3 -3
  68. package/out/settings/__next.settings.txt +3 -3
  69. package/out/settings.html +1 -1
  70. package/out/settings.txt +12 -12
  71. package/out/setup/__next._full.txt +12 -12
  72. package/out/setup/__next._head.txt +4 -4
  73. package/out/setup/__next._index.txt +6 -6
  74. package/out/setup/__next._tree.txt +2 -2
  75. package/out/setup/__next.setup.__PAGE__.txt +3 -3
  76. package/out/setup/__next.setup.txt +3 -3
  77. package/out/setup.html +1 -1
  78. package/out/setup.txt +12 -12
  79. package/package.json +5 -2
  80. package/server/index.js +274 -12
  81. package/server/queue-watcher.js +47 -10
  82. package/server/queue-watcher.test.js +64 -0
  83. package/server/routes.batchProgress.test.js +94 -0
  84. package/server/routes.js +752 -33
  85. package/server/routes.parseActiveBatch.test.js +88 -0
  86. package/server/routes.telegramBridge.test.js +70 -0
  87. package/templates/CLAUDE.md +0 -1
  88. package/out/_next/static/chunks/06mbme.sc_26-.css +0 -2
  89. package/out/_next/static/chunks/0caq73v0knw_w.js +0 -1
  90. package/out/_next/static/chunks/0omuxbg.tg-il.js +0 -1
  91. /package/out/_next/static/{na3L7KeOGKGsbamYVibRj → OzDK1Fplm2eUu23bzILlU}/_buildManifest.js +0 -0
  92. /package/out/_next/static/{na3L7KeOGKGsbamYVibRj → OzDK1Fplm2eUu23bzILlU}/_clientMiddlewareManifest.js +0 -0
  93. /package/out/_next/static/{na3L7KeOGKGsbamYVibRj → OzDK1Fplm2eUu23bzILlU}/_ssgManifest.js +0 -0
package/server/routes.js CHANGED
@@ -267,6 +267,18 @@ router.put("/api/loop-guard", async (req, res) => {
267
267
  if (!tomlPath || !fs.existsSync(tomlPath)) {
268
268
  return res.status(404).json({ error: "config.toml not found for project" });
269
269
  }
270
+ // Capture the previous value before rewriting so we can decide
271
+ // whether the /continue auto-resume should fire (only when the
272
+ // operator is RAISING the limit — lowering it means they want
273
+ // the runaway loop to stay paused).
274
+ let previousValue = null;
275
+ try {
276
+ const previousContent = fs.readFileSync(tomlPath, "utf-8");
277
+ const prevMatch = previousContent.match(/^\s*max_agent_hops\s*=\s*(\d+)/m);
278
+ if (prevMatch) previousValue = parseInt(prevMatch[1], 10);
279
+ } catch {
280
+ // fall through — previousValue stays null, auto-resume will skip
281
+ }
270
282
  try {
271
283
  let content = fs.readFileSync(tomlPath, "utf-8");
272
284
  if (/^\s*max_agent_hops\s*=/m.test(content)) {
@@ -289,14 +301,77 @@ router.put("/api/loop-guard", async (req, res) => {
289
301
  // /api/chat does (#230): re-sync the session token from AC and
290
302
  // retry once. Other failures stay non-fatal — the persisted value
291
303
  // still takes effect on next AC restart.
304
+ //
305
+ // #417 / quadwork#309: the update_settings ws event correctly
306
+ // updates router.max_hops in the running AC (verified in AC's
307
+ // app.py:1249), AND writes settings.json via _save_settings. But
308
+ // AC's router stays paused once it has tripped the guard — raising
309
+ // max_hops at runtime does NOT resurrect an already-paused channel
310
+ // (router.py:76-77 → `paused = True`). The operator typically
311
+ // raises the limit precisely BECAUSE the channel is stuck paused,
312
+ // so we immediately follow the update_settings event with a
313
+ // `/continue` chat message (the same path AC's own slash command
314
+ // handler uses at app.py:1106-1110) to resume routing. This is the
315
+ // whole fix: the previous version updated max_hops live but left
316
+ // the channel frozen, which made the widget look like a no-op.
292
317
  let live = false;
318
+ let autoResumed = false;
319
+ // Only auto-resume when ALL of:
320
+ // (a) operator is RAISING the limit (lowering = "make it
321
+ // stricter", must leave a paused runaway alone)
322
+ // (b) the router is currently paused (AC's continue_routing
323
+ // resets hop_count + paused + guard_emitted unconditionally,
324
+ // so firing it on an actively-running chain would silently
325
+ // extend the chain beyond the new limit — t2a finding)
326
+ // (c) previousValue is known (null means we can't prove it's a
327
+ // raise, so err on the side of not touching router state)
328
+ const isRaising = previousValue !== null && value > previousValue;
329
+ const ensureLive = async (sessionToken) => {
330
+ await sendWsEvent(base, sessionToken, { type: "update_settings", data: { max_agent_hops: value } });
331
+ if (isRaising) {
332
+ // Check AC's /api/status before firing /continue so we don't
333
+ // reset hop_count on a running (unpaused) chain. The endpoint
334
+ // exposes `paused: true` iff ANY channel currently paused.
335
+ let isPaused = false;
336
+ try {
337
+ // AC's security middleware (app.py:212-224) only accepts
338
+ // bearer auth for /api/messages, /api/send, and /api/rules/*.
339
+ // /api/status requires x-session-token header (or ?token=),
340
+ // so pass that instead — a bearer header silently 403s and
341
+ // leaves isPaused stuck at false, defeating the gate.
342
+ const statusUrl = `${base}/api/status`;
343
+ const statusRes = await fetch(statusUrl, {
344
+ headers: sessionToken ? { "x-session-token": sessionToken } : {},
345
+ signal: AbortSignal.timeout(5000),
346
+ });
347
+ if (statusRes.ok) {
348
+ const statusJson = await statusRes.json();
349
+ isPaused = !!(statusJson && statusJson.paused);
350
+ }
351
+ } catch {
352
+ // Status fetch failed — err toward "don't auto-resume". The
353
+ // operator can always type /continue manually.
354
+ }
355
+ if (isPaused) {
356
+ // Resume paused channels. /continue is routed by AC's ws
357
+ // message handler when the buffer starts with /continue;
358
+ // the handler calls router.continue_routing() which
359
+ // unpauses AND resets hop_count — which is why we gate on
360
+ // isPaused to avoid wiping the counter on a live chain.
361
+ await sendWsEvent(base, sessionToken, { type: "message", text: "/continue", channel: "general", sender: "user" });
362
+ autoResumed = true;
363
+ }
364
+ }
365
+ live = true;
366
+ };
367
+ let base = null;
293
368
  try {
294
- const { url: base, token: sessionToken } = getChattrConfig(projectId);
369
+ const chattr = getChattrConfig(projectId);
370
+ base = chattr.url;
371
+ const sessionToken = chattr.token;
295
372
  if (base) {
296
- const event = { type: "update_settings", data: { max_agent_hops: value } };
297
373
  try {
298
- await sendWsEvent(base, sessionToken, event);
299
- live = true;
374
+ await ensureLive(sessionToken);
300
375
  } catch (err) {
301
376
  if (err && err.code === "EAGENTCHATTR_401") {
302
377
  console.warn(`[loop-guard] ws auth failed for ${projectId}, re-syncing session token and retrying...`);
@@ -305,8 +380,7 @@ router.put("/api/loop-guard", async (req, res) => {
305
380
  const { token: refreshed } = getChattrConfig(projectId);
306
381
  if (refreshed && refreshed !== sessionToken) {
307
382
  try {
308
- await sendWsEvent(base, refreshed, event);
309
- live = true;
383
+ await ensureLive(refreshed);
310
384
  } catch (retryErr) {
311
385
  console.warn(`[loop-guard] retry after token resync failed: ${retryErr.message || retryErr}`);
312
386
  }
@@ -320,7 +394,7 @@ router.put("/api/loop-guard", async (req, res) => {
320
394
  console.warn(`[loop-guard] live update failed for ${projectId}: ${err.message || err}`);
321
395
  }
322
396
 
323
- res.json({ ok: true, value, live });
397
+ res.json({ ok: true, value, live, previousValue, resumed: autoResumed });
324
398
  });
325
399
 
326
400
  // #412 / quadwork#279: project history export + import.
@@ -532,6 +606,214 @@ router.post("/api/project-history", async (req, res) => {
532
606
  res.json({ ok: errors.length === 0, imported, skipped, total: body.messages.length, errors });
533
607
  });
534
608
 
609
+ // #424 / quadwork#304 Phase 4: list + restore auto-snapshots.
610
+ // snapshotProjectHistory() in server/index.js writes envelope
611
+ // files to ~/.quadwork/{id}/history-snapshots/{ISO}.json before
612
+ // destructive restart/update operations. These endpoints let the
613
+ // Project History widget surface them with a restore button so
614
+ // the operator can roll back a bad /clear or botched update.
615
+ router.get("/api/project-history/snapshots", (req, res) => {
616
+ const projectId = req.query.project;
617
+ if (!projectId) return res.status(400).json({ error: "Missing project" });
618
+ const snapDir = path.join(CONFIG_DIR, projectId, "history-snapshots");
619
+ if (!fs.existsSync(snapDir)) return res.json({ snapshots: [] });
620
+ try {
621
+ const entries = fs.readdirSync(snapDir)
622
+ .filter((f) => f.endsWith(".json"))
623
+ .map((f) => {
624
+ const st = fs.statSync(path.join(snapDir, f));
625
+ return { name: f, size: st.size, mtime: st.mtimeMs };
626
+ })
627
+ .sort((a, b) => b.mtime - a.mtime);
628
+ res.json({ snapshots: entries });
629
+ } catch (err) {
630
+ res.status(500).json({ error: "Failed to list snapshots", detail: err.message });
631
+ }
632
+ });
633
+
634
+ router.post("/api/project-history/restore", async (req, res) => {
635
+ const projectId = req.query.project;
636
+ const name = req.query.name || req.body?.name;
637
+ if (!projectId || !name) return res.status(400).json({ error: "Missing project or name" });
638
+ // Prevent path traversal — only allow basenames from the snapshot
639
+ // directory; reject anything with a separator or ".." segment.
640
+ if (name !== path.basename(name) || name.includes("..") || !name.endsWith(".json")) {
641
+ return res.status(400).json({ error: "Invalid snapshot name" });
642
+ }
643
+ const snapPath = path.join(CONFIG_DIR, projectId, "history-snapshots", name);
644
+ if (!fs.existsSync(snapPath)) {
645
+ return res.status(404).json({ error: "Snapshot not found" });
646
+ }
647
+ let body;
648
+ try {
649
+ const text = fs.readFileSync(snapPath, "utf-8");
650
+ body = JSON.parse(text);
651
+ } catch (err) {
652
+ return res.status(500).json({ error: "Failed to read snapshot", detail: err.message });
653
+ }
654
+ // Post the snapshot back through the existing import endpoint
655
+ // with both bypass flags — the snapshot contains real agent
656
+ // senders (so allow_agent_senders) and may match a previous
657
+ // restore's exported_at (so allow_duplicate). This is the
658
+ // legitimate disaster-recovery case the #297 denylist expected.
659
+ try {
660
+ const cfg = JSON.parse(fs.readFileSync(CONFIG_PATH, "utf-8"));
661
+ const qwPort = cfg.port || 8400;
662
+ const r = await fetch(`http://127.0.0.1:${qwPort}/api/project-history?project=${encodeURIComponent(projectId)}`, {
663
+ method: "POST",
664
+ headers: { "Content-Type": "application/json" },
665
+ body: JSON.stringify({ ...body, allow_agent_senders: true, allow_duplicate: true }),
666
+ });
667
+ const data = await r.json().catch(() => null);
668
+ if (!r.ok) {
669
+ return res.status(r.status).json(data || { error: `import returned ${r.status}` });
670
+ }
671
+ res.json({ ok: true, ...(data || {}) });
672
+ } catch (err) {
673
+ res.status(502).json({ error: "Restore failed", detail: err.message });
674
+ }
675
+ });
676
+
677
+ // #430 / quadwork#312: AI team work-hours tracking.
678
+ //
679
+ // The frontend's TerminalGrid detects per-agent activity transitions
680
+ // (idle → active, active → idle) via the existing activity ref and
681
+ // POSTs them to /api/activity/log. We buffer `start` events in
682
+ // memory keyed by `${project}/${agent}`; an `end` event looks up the
683
+ // matching buffered start, computes the duration, and appends a
684
+ // complete session row to ~/.quadwork/{project}/activity.jsonl.
685
+ //
686
+ // /api/activity/stats aggregates across all projects with a 30s
687
+ // cache so the dashboard can poll it every minute without thrashing
688
+ // the filesystem.
689
+
690
+ const _activityStarts = new Map(); // `${project}/${agent}` → startTimestamp
691
+ const _activityStatsCache = { ts: 0, data: null };
692
+ const ACTIVITY_STATS_TTL_MS = 30000;
693
+
694
+ function activityLogPath(projectId) {
695
+ return path.join(CONFIG_DIR, projectId, "activity.jsonl");
696
+ }
697
+
698
+ router.post("/api/activity/log", (req, res) => {
699
+ const { project, agent, type, timestamp } = req.body || {};
700
+ if (typeof project !== "string" || !project) return res.status(400).json({ error: "Missing project" });
701
+ if (typeof agent !== "string" || !agent) return res.status(400).json({ error: "Missing agent" });
702
+ if (type !== "start" && type !== "end") return res.status(400).json({ error: "type must be start|end" });
703
+ const ts = typeof timestamp === "number" && Number.isFinite(timestamp) ? timestamp : Date.now();
704
+ const key = `${project}/${agent}`;
705
+
706
+ if (type === "start") {
707
+ // Only remember the first start per session — duplicate starts
708
+ // are possible if the frontend re-mounts mid-stream; ignore
709
+ // them so the session duration reflects the original onset.
710
+ if (!_activityStarts.has(key)) _activityStarts.set(key, ts);
711
+ return res.json({ ok: true });
712
+ }
713
+
714
+ // type === "end"
715
+ const start = _activityStarts.get(key);
716
+ if (start === undefined) {
717
+ // Orphan end (missed start — probably happens on server
718
+ // restart while a session was live). Drop it silently so we
719
+ // don't write a row with an unknown start timestamp.
720
+ return res.json({ ok: true, dropped: "orphan" });
721
+ }
722
+ _activityStarts.delete(key);
723
+ const row = { agent, start, end: ts, duration_ms: Math.max(0, ts - start) };
724
+ try {
725
+ const p = activityLogPath(project);
726
+ fs.mkdirSync(path.dirname(p), { recursive: true });
727
+ fs.appendFileSync(p, JSON.stringify(row) + "\n");
728
+ // Invalidate the stats cache so the next read sees the new row.
729
+ _activityStatsCache.ts = 0;
730
+ } catch (err) {
731
+ console.warn(`[activity] failed to append ${project}/${agent}: ${err.message || err}`);
732
+ }
733
+ res.json({ ok: true, duration_ms: row.duration_ms });
734
+ });
735
+
736
+ // Aggregate all activity.jsonl files under ~/.quadwork/*/activity.jsonl.
737
+ // `today`, `week`, `month` boundaries use the operator's local
738
+ // timezone rather than UTC — "this week" should mean the week the
739
+ // operator is living in, not a UTC-offset week that starts at
740
+ // 16:00 local time.
741
+ function computeActivityStats() {
742
+ if (Date.now() - _activityStatsCache.ts < ACTIVITY_STATS_TTL_MS && _activityStatsCache.data) {
743
+ return _activityStatsCache.data;
744
+ }
745
+ const now = new Date();
746
+ const startOfToday = new Date(now.getFullYear(), now.getMonth(), now.getDate()).getTime();
747
+ // Start of this week = local Monday 00:00. JS: getDay() → 0-Sun..6-Sat.
748
+ const day = now.getDay();
749
+ const mondayOffset = day === 0 ? -6 : 1 - day; // Sun → -6, Mon → 0, Tue → -1, …
750
+ const startOfWeek = new Date(now.getFullYear(), now.getMonth(), now.getDate() + mondayOffset).getTime();
751
+ const startOfMonth = new Date(now.getFullYear(), now.getMonth(), 1).getTime();
752
+
753
+ const totals = { today_ms: 0, week_ms: 0, month_ms: 0, total_ms: 0 };
754
+ const byProject = {};
755
+ // #430 / quadwork#312: only count projects registered in
756
+ // config.json, not every directory under ~/.quadwork/. Stray
757
+ // folders from deleted / unconfigured projects must not inflate
758
+ // the stats — that's explicit in #312's acceptance.
759
+ let projectIds = [];
760
+ try {
761
+ const cfg = JSON.parse(fs.readFileSync(CONFIG_PATH, "utf-8"));
762
+ if (Array.isArray(cfg.projects)) {
763
+ projectIds = cfg.projects.map((p) => p && p.id).filter((id) => typeof id === "string" && id);
764
+ }
765
+ } catch {
766
+ // config unreadable → no projects → empty stats (safe fallback)
767
+ }
768
+ for (const projectId of projectIds) {
769
+ const p = activityLogPath(projectId);
770
+ if (!fs.existsSync(p)) continue;
771
+ const projectTotals = { today_ms: 0, week_ms: 0, month_ms: 0, total_ms: 0 };
772
+ let text;
773
+ try { text = fs.readFileSync(p, "utf-8"); } catch { continue; }
774
+ for (const line of text.split("\n")) {
775
+ if (!line.trim()) continue;
776
+ let row;
777
+ try { row = JSON.parse(line); } catch { continue; }
778
+ const d = row && typeof row.duration_ms === "number" ? row.duration_ms : 0;
779
+ const start = row && typeof row.start === "number" ? row.start : 0;
780
+ if (d <= 0 || !start) continue;
781
+ projectTotals.total_ms += d;
782
+ if (start >= startOfToday) projectTotals.today_ms += d;
783
+ if (start >= startOfWeek) projectTotals.week_ms += d;
784
+ if (start >= startOfMonth) projectTotals.month_ms += d;
785
+ }
786
+ byProject[projectId] = {
787
+ today: Math.round(projectTotals.today_ms / 3600) / 1000,
788
+ week: Math.round(projectTotals.week_ms / 3600) / 1000,
789
+ month: Math.round(projectTotals.month_ms / 3600) / 1000,
790
+ total: Math.round(projectTotals.total_ms / 3600) / 1000,
791
+ };
792
+ totals.today_ms += projectTotals.today_ms;
793
+ totals.week_ms += projectTotals.week_ms;
794
+ totals.month_ms += projectTotals.month_ms;
795
+ totals.total_ms += projectTotals.total_ms;
796
+ }
797
+ const data = {
798
+ today: Math.round(totals.today_ms / 3600) / 1000,
799
+ week: Math.round(totals.week_ms / 3600) / 1000,
800
+ month: Math.round(totals.month_ms / 3600) / 1000,
801
+ total: Math.round(totals.total_ms / 3600) / 1000,
802
+ by_project: byProject,
803
+ };
804
+ _activityStatsCache.ts = Date.now();
805
+ _activityStatsCache.data = data;
806
+ return data;
807
+ }
808
+
809
+ router.get("/api/activity/stats", (_req, res) => {
810
+ try {
811
+ res.json(computeActivityStats());
812
+ } catch (err) {
813
+ res.status(500).json({ error: "Failed to compute activity stats", detail: err.message });
814
+ }
815
+ });
816
+
535
817
  router.post("/api/chat", async (req, res) => {
536
818
  const projectId = req.query.project || req.body.project;
537
819
  const { url: base, token: sessionToken } = getChattrConfig(projectId);
@@ -836,15 +1118,128 @@ router.get("/api/github/merged-prs", (req, res) => {
836
1118
  // deterministic from issue/PR state — no agent inference.
837
1119
  //
838
1120
  // Progress mapping (from upstream issue):
839
- // queued 0% issue exists, no linked PR
1121
+ // queued 0% issue OPEN, no linked PR
840
1122
  // in_review 20% PR open, 0 approvals
841
1123
  // approved1 50% PR open, 1 approval
842
1124
  // ready 80% PR open, 2+ approvals
843
1125
  // merged 100% PR merged AND issue closed
1126
+ // closed 100% issue CLOSED with no linked PR (superseded,
1127
+ // not planned, or runbook-only tasks) — #350
844
1128
  //
845
1129
  // Cached for 10s per project to avoid hammering gh on every poll.
846
1130
 
847
1131
  const _batchProgressCache = new Map(); // projectId -> { ts, data }
1132
+
1133
+ // #429 / quadwork#316: persistent batch snapshot on disk so the
1134
+ // Batch Progress panel keeps showing merged items after Head moves
1135
+ // them from Active Batch to Done. The in-memory `_batchProgressCache`
1136
+ // above is a 10s TTL cache of the rendered rows; this new cache is
1137
+ // the *set of issue numbers* we currently consider "the active
1138
+ // batch", and it survives restarts + lives across polls.
1139
+ function batchSnapshotPath(projectId) {
1140
+ return path.join(CONFIG_DIR, projectId, "batch-progress-cache.json");
1141
+ }
1142
+ function readBatchSnapshot(projectId) {
1143
+ try {
1144
+ return JSON.parse(fs.readFileSync(batchSnapshotPath(projectId), "utf-8"));
1145
+ } catch {
1146
+ return null;
1147
+ }
1148
+ }
1149
+ function writeBatchSnapshot(projectId, snapshot) {
1150
+ try {
1151
+ const p = batchSnapshotPath(projectId);
1152
+ fs.mkdirSync(path.dirname(p), { recursive: true });
1153
+ fs.writeFileSync(p, JSON.stringify(snapshot));
1154
+ } catch {
1155
+ // Non-fatal — panel still works from the live parse.
1156
+ }
1157
+ }
1158
+ function deleteBatchSnapshot(projectId) {
1159
+ try {
1160
+ fs.unlinkSync(batchSnapshotPath(projectId));
1161
+ } catch {
1162
+ // Non-fatal — file may already be gone.
1163
+ }
1164
+ }
1165
+
1166
+ // #334: verify the snapshot's first issue number still exists on
1167
+ // GitHub before trusting the snapshot. A soft existence check is
1168
+ // enough — if the first issue genuinely 404s, treat the whole
1169
+ // snapshot as stale (most likely a leftover from a prior
1170
+ // project/repo that was purged) and let the caller drop it. One
1171
+ // gh call per cache miss, wrapped in the existing
1172
+ // BATCH_PROGRESS_TTL_MS cache upstream.
1173
+ //
1174
+ // Returns one of:
1175
+ // "fresh" — first issue resolved, snapshot is trustworthy
1176
+ // "gone" — first issue confirmed 404; snapshot should be dropped
1177
+ // "unknown" — transient error (auth/network/timeout); leave
1178
+ // snapshot alone and let the next cache miss retry
1179
+ async function checkBatchSnapshotFreshness(repo, snapshot) {
1180
+ if (!snapshot || !Array.isArray(snapshot.issueNumbers) || snapshot.issueNumbers.length === 0) {
1181
+ return "gone";
1182
+ }
1183
+ const first = snapshot.issueNumbers[0];
1184
+ try {
1185
+ await ghJsonExecAsync([
1186
+ "issue",
1187
+ "view",
1188
+ String(first),
1189
+ "-R",
1190
+ repo,
1191
+ "--json",
1192
+ "number",
1193
+ ]);
1194
+ return "fresh";
1195
+ } catch (err) {
1196
+ // gh surfaces a 404 via stderr text on a non-zero exit. Only
1197
+ // the unambiguous "not found" / "could not resolve" shapes
1198
+ // count as genuinely gone; anything else (network, auth,
1199
+ // timeout) is transient and must NOT delete the snapshot.
1200
+ const msg = String((err && (err.stderr || err.message)) || "").toLowerCase();
1201
+ if (msg.includes("could not resolve") || msg.includes("not found") || msg.includes("no issue")) {
1202
+ return "gone";
1203
+ }
1204
+ return "unknown";
1205
+ }
1206
+ }
1207
+
1208
+ // Decide which batch to render, combining the live parse of
1209
+ // OVERNIGHT-QUEUE.md with the persistent snapshot. The snapshot is
1210
+ // replaced whenever a new batch starts (explicit Batch: N bump OR
1211
+ // the live Active Batch contains items the snapshot doesn't); in
1212
+ // all other cases the snapshot wins, so items Head moved to Done
1213
+ // stay visible until the operator starts the next batch.
1214
+ function resolveDisplayedBatch(queueText, projectId, { queueReadOk = true } = {}) {
1215
+ // Queue file deleted / unreadable → fall back to empty state per
1216
+ // #316's edge case. Returning the snapshot here would "heal" a
1217
+ // genuinely missing file into stale data the operator can't
1218
+ // reconcile without nuking ~/.quadwork/{id}/batch-progress-cache.json
1219
+ // manually.
1220
+ if (!queueReadOk) return { batchNumber: null, issueNumbers: [] };
1221
+ const current = parseActiveBatch(queueText);
1222
+ const snapshot = readBatchSnapshot(projectId);
1223
+ const hasExplicitBump =
1224
+ current.batchNumber !== null &&
1225
+ (!snapshot || snapshot.batchNumber === null || current.batchNumber > snapshot.batchNumber);
1226
+ const hasNewItems =
1227
+ current.issueNumbers.length > 0 &&
1228
+ (!snapshot || current.issueNumbers.some((n) => !snapshot.issueNumbers.includes(n)));
1229
+ let next;
1230
+ if (hasExplicitBump || hasNewItems) {
1231
+ next = { batchNumber: current.batchNumber, issueNumbers: current.issueNumbers.slice() };
1232
+ } else if (snapshot && Array.isArray(snapshot.issueNumbers) && snapshot.issueNumbers.length > 0) {
1233
+ next = {
1234
+ batchNumber: snapshot.batchNumber ?? null,
1235
+ issueNumbers: snapshot.issueNumbers.slice(),
1236
+ };
1237
+ } else {
1238
+ next = { batchNumber: current.batchNumber, issueNumbers: current.issueNumbers.slice() };
1239
+ }
1240
+ if (next.issueNumbers.length > 0) writeBatchSnapshot(projectId, next);
1241
+ return next;
1242
+ }
848
1243
  const BATCH_PROGRESS_TTL_MS = 10000;
849
1244
 
850
1245
  function parseActiveBatch(queueText) {
@@ -860,10 +1255,11 @@ function parseActiveBatch(queueText) {
860
1255
  const batchNumber = batchMatch ? parseInt(batchMatch[1], 10) : null;
861
1256
  // Only collect issue numbers from lines that look like list-item
862
1257
  // entries — i.e. lines whose first content token is either `#N`
863
- // or `[#N]` after an optional list marker. This rejects prose
864
- // like "Tracking umbrella: #293", "next after #294 merged", and
865
- // similar dependency / commentary references that t2a flagged on
866
- // realproject7/dropcast's queue.
1258
+ // or `[#N]` after an optional list marker, and optionally after
1259
+ // a GitHub-flavored markdown checkbox token `[ ]` / `[x]` / `[X]`.
1260
+ // This rejects prose like "Tracking umbrella: #293", "next after
1261
+ // #294 merged", and similar dependency / commentary references
1262
+ // that t2a flagged on realproject7/dropcast's queue.
867
1263
  //
868
1264
  // Accepted line shapes:
869
1265
  // - #295 sub-A heartbeat
@@ -872,12 +1268,22 @@ function parseActiveBatch(queueText) {
872
1268
  // #295 sub-A heartbeat
873
1269
  // - [#295] sub-A heartbeat
874
1270
  // [#295] sub-A heartbeat
1271
+ // - [ ] #295 sub-A heartbeat (#342/quadwork#341: GFM checkbox)
1272
+ // - [x] #295 sub-A heartbeat (checked)
1273
+ // - [X] #295 sub-A heartbeat (checked, uppercase)
875
1274
  //
876
1275
  // Rejected:
877
1276
  // Tracking umbrella: #293
878
1277
  // Assigned next after #294 merged.
879
1278
  // See #295 for context.
880
- const ITEM_LINE_RE = /^\s*(?:[-*]\s+|\d+\.\s+)?\[?#(\d{1,6})\]?\b/;
1279
+ //
1280
+ // The previous regex permitted an optional `[` *immediately*
1281
+ // before `#`, which happened to match `[#295]` but not `[ ] #295`
1282
+ // (a space between `[` and `#`), so Head-generated queues that
1283
+ // used GFM checkbox syntax produced zero issue numbers and the
1284
+ // Current Batch panel showed empty. #341 adds an explicit optional
1285
+ // checkbox token after the list marker.
1286
+ const ITEM_LINE_RE = /^\s*(?:[-*]\s+|\d+\.\s+)?(?:\[[ xX]\]\s+)?\[?#(\d{1,6})\]?\b/;
881
1287
  const seen = new Set();
882
1288
  const issueNumbers = [];
883
1289
  for (const line of section.split("\n")) {
@@ -909,6 +1315,32 @@ async function ghJsonExecAsync(args) {
909
1315
  return JSON.parse(stdout);
910
1316
  }
911
1317
 
1318
+ // #350: pure helper for the "no linked PR" branch of
1319
+ // progressForItemAsync. Takes the issue JSON (shape: { number,
1320
+ // title, state, url, ... }) and returns the batch-progress row
1321
+ // for an item that has no closedByPullRequestsReferences. Exported
1322
+ // from module.exports below for unit tests — no other callers.
1323
+ function buildNoPrRow(issue) {
1324
+ if (issue && issue.state === "CLOSED") {
1325
+ return {
1326
+ issue_number: issue.number,
1327
+ title: issue.title,
1328
+ url: issue.url,
1329
+ status: "closed",
1330
+ progress: 100,
1331
+ label: "Closed (no PR) ✓",
1332
+ };
1333
+ }
1334
+ return {
1335
+ issue_number: issue.number,
1336
+ title: issue.title,
1337
+ url: issue.url,
1338
+ status: "queued",
1339
+ progress: 0,
1340
+ label: "Issue · queued",
1341
+ };
1342
+ }
1343
+
912
1344
  async function progressForItemAsync(repo, issueNumber) {
913
1345
  // Pull issue state + linked PRs in one call. closedByPullRequestsReferences
914
1346
  // is gh's serializer for the GraphQL `closedByPullRequestsReferences`
@@ -935,16 +1367,14 @@ async function progressForItemAsync(repo, issueNumber) {
935
1367
  const pr = linked.length > 0
936
1368
  ? linked.slice().sort((a, b) => (b.number || 0) - (a.number || 0))[0]
937
1369
  : null;
938
- // No linked PR yet queued.
1370
+ // No linked PR. #350: before falling into the "queued" bucket,
1371
+ // honor the issue's own state — a CLOSED issue with no linked
1372
+ // PR is fully done (superseded, not planned, runbook-only, etc.)
1373
+ // and should render at 100% with a ✓ label instead of a
1374
+ // misleading "0% · queued" row. Only truly OPEN issues with no
1375
+ // linked PR are still queued.
939
1376
  if (!pr) {
940
- return {
941
- issue_number: issue.number,
942
- title: issue.title,
943
- url: issue.url,
944
- status: "queued",
945
- progress: 0,
946
- label: "Issue · queued",
947
- };
1377
+ return buildNoPrRow(issue);
948
1378
  }
949
1379
  // Re-fetch the PR to get reviewDecision + reviews + state, since
950
1380
  // the issue's closedByPullRequestsReferences edge only carries
@@ -1048,15 +1478,23 @@ async function progressForItemAsync(repo, issueNumber) {
1048
1478
  }
1049
1479
 
1050
1480
  function summarizeItems(items) {
1051
- let merged = 0, ready = 0, approved1 = 0, inReview = 0, queued = 0;
1481
+ // #350: "closed" (CLOSED issue with no linked PR superseded,
1482
+ // not planned, runbook-only) counts toward the complete tally
1483
+ // alongside "merged". The panel tally now reads "X/N complete"
1484
+ // when the batch mixes both kinds of completion, otherwise
1485
+ // "X/N merged" for the classic all-via-PR case.
1486
+ let merged = 0, closed = 0, ready = 0, approved1 = 0, inReview = 0, queued = 0;
1052
1487
  for (const it of items) {
1053
1488
  if (it.status === "merged") merged++;
1489
+ else if (it.status === "closed") closed++;
1054
1490
  else if (it.status === "ready") ready++;
1055
1491
  else if (it.status === "approved1") approved1++;
1056
1492
  else if (it.status === "in_review") inReview++;
1057
1493
  else if (it.status === "queued") queued++;
1058
1494
  }
1059
- const parts = [`${merged}/${items.length} merged`];
1495
+ const done = merged + closed;
1496
+ const doneLabel = closed > 0 ? "complete" : "merged";
1497
+ const parts = [`${done}/${items.length} ${doneLabel}`];
1060
1498
  if (ready > 0) parts.push(`${ready} ready to merge`);
1061
1499
  if (approved1 > 0) parts.push(`${approved1} needs 2nd approval`);
1062
1500
  if (inReview > 0) parts.push(`${inReview} in review`);
@@ -1078,10 +1516,41 @@ router.get("/api/batch-progress", async (req, res) => {
1078
1516
 
1079
1517
  const queuePath = path.join(CONFIG_DIR, projectId, "OVERNIGHT-QUEUE.md");
1080
1518
  let queueText = "";
1081
- try { queueText = fs.readFileSync(queuePath, "utf-8"); }
1082
- catch { /* missing file → empty active batch */ }
1519
+ let queueReadOk = false;
1520
+ try {
1521
+ queueText = fs.readFileSync(queuePath, "utf-8");
1522
+ queueReadOk = true;
1523
+ } catch {
1524
+ // Missing / unreadable file — pass queueReadOk=false so the
1525
+ // resolver bypasses the snapshot and returns the empty state
1526
+ // per #316's edge case.
1527
+ }
1528
+
1529
+ // #334 / quadwork#334: validate the on-disk snapshot against
1530
+ // GitHub before resolveDisplayedBatch can serve it. A snapshot
1531
+ // whose first issue 404s is almost certainly a leftover from a
1532
+ // prior project/repo that was purged; drop the file so the
1533
+ // resolver falls through to the live queue parse (which will
1534
+ // typically also be empty) instead of serving stale data
1535
+ // indefinitely. We only run the check on cache-miss paths (this
1536
+ // route already sits behind BATCH_PROGRESS_TTL_MS) and only
1537
+ // when we'd actually rely on the snapshot — i.e. the live queue
1538
+ // read succeeded, so the existing #316 bypass for unreadable
1539
+ // queue files keeps precedence.
1540
+ if (queueReadOk) {
1541
+ const existing = readBatchSnapshot(projectId);
1542
+ if (existing && Array.isArray(existing.issueNumbers) && existing.issueNumbers.length > 0) {
1543
+ const freshness = await checkBatchSnapshotFreshness(repo, existing);
1544
+ if (freshness === "gone") deleteBatchSnapshot(projectId);
1545
+ // "unknown" → leave the file alone; transient failure will
1546
+ // retry on the next cache miss.
1547
+ }
1548
+ }
1083
1549
 
1084
- const { batchNumber, issueNumbers } = parseActiveBatch(queueText);
1550
+ // #429 / quadwork#316: resolve the displayed batch through the
1551
+ // snapshot-aware helper so merged items stay visible after Head
1552
+ // moves them from Active Batch to Done, until a new batch starts.
1553
+ const { batchNumber, issueNumbers } = resolveDisplayedBatch(queueText, projectId, { queueReadOk });
1085
1554
  if (issueNumbers.length === 0) {
1086
1555
  const data = { batch_number: batchNumber, items: [], summary: "", complete: false };
1087
1556
  _batchProgressCache.set(projectId, { ts: Date.now(), data });
@@ -1109,7 +1578,10 @@ router.get("/api/batch-progress", async (req, res) => {
1109
1578
  };
1110
1579
  });
1111
1580
  const summary = summarizeItems(items);
1112
- const complete = items.length > 0 && items.every((it) => it.status === "merged");
1581
+ // #350: treat CLOSED-without-PR items as complete alongside merged
1582
+ // so batches that mix runbook/superseded closes with real PRs
1583
+ // still flip to the COMPLETE state once everything is done.
1584
+ const complete = items.length > 0 && items.every((it) => it.status === "merged" || it.status === "closed");
1113
1585
  const data = { batch_number: batchNumber, items, summary, complete };
1114
1586
  _batchProgressCache.set(projectId, { ts: Date.now(), data });
1115
1587
  res.json(data);
@@ -1602,6 +2074,12 @@ router.post("/api/setup", (req, res) => {
1602
2074
  return res.json({ ok: true, message: "Project already in config" });
1603
2075
  }
1604
2076
  // Match CLI wizard agent structure: { cwd, command, auto_approve, mcp_inject }
2077
+ // #343: default Codex-backed agents to reasoning_effort="medium"
2078
+ // instead of the upstream xhigh/high default. high/xhigh is the
2079
+ // provider-side capacity-failure hot spot; medium is the
2080
+ // safe-default for fresh installs so new projects don't hit
2081
+ // "Selected model is at capacity" out of the box. Operators can
2082
+ // bump individual agents back up via the Agent Models widget.
1605
2083
  const agents = {};
1606
2084
  for (const agentId of ["head", "reviewer1", "reviewer2", "dev"]) {
1607
2085
  const cmd = (backends && backends[agentId]) || "claude";
@@ -1612,6 +2090,7 @@ router.post("/api/setup", (req, res) => {
1612
2090
  command: cmd,
1613
2091
  auto_approve: autoApprove,
1614
2092
  mcp_inject: injectMode,
2093
+ ...(cliBase === "codex" ? { reasoning_effort: "medium" } : {}),
1615
2094
  };
1616
2095
  }
1617
2096
  // Use pre-assigned ports/token from agentchattr-config step if provided,
@@ -1821,6 +2300,66 @@ function telegramConfigToml(projectId) {
1821
2300
  return path.join(CONFIG_DIR, `telegram-${projectId}.toml`);
1822
2301
  }
1823
2302
 
2303
+ // #353: per-project log file for the bridge subprocess. The start
2304
+ // handler redirects stdout + stderr here so crashes (ImportError,
2305
+ // config parse, auth failure) are recoverable instead of
2306
+ // /dev/null'd by `stdio: "ignore"`.
2307
+ function telegramBridgeLog(projectId) {
2308
+ return path.join(CONFIG_DIR, `telegram-bridge-${projectId}.log`);
2309
+ }
2310
+
2311
+ // Tail the last N lines of a file without reading the whole thing
2312
+ // into memory if it is huge. For the bridge log we care about the
2313
+ // final crash frame, not historical output.
2314
+ function readLastLines(filePath, n) {
2315
+ try {
2316
+ if (!fs.existsSync(filePath)) return "";
2317
+ const stat = fs.statSync(filePath);
2318
+ const readBytes = Math.min(stat.size, 64 * 1024);
2319
+ if (readBytes === 0) return "";
2320
+ const buf = Buffer.alloc(readBytes);
2321
+ const fd = fs.openSync(filePath, "r");
2322
+ try {
2323
+ fs.readSync(fd, buf, 0, readBytes, Math.max(0, stat.size - readBytes));
2324
+ } finally {
2325
+ fs.closeSync(fd);
2326
+ }
2327
+ const text = buf.toString("utf-8");
2328
+ const lines = text.split(/\r?\n/).filter((l) => l.length > 0);
2329
+ return lines.slice(-n).join("\n");
2330
+ } catch {
2331
+ return "";
2332
+ }
2333
+ }
2334
+
2335
+ // Verify that the bridge's Python runtime has its required modules
2336
+ // available. Cheap pre-flight so a missing `requests` install
2337
+ // produces a readable error instead of a silent Start → Stopped
2338
+ // flicker. Returns { ok: true } on success, { ok: false, error }
2339
+ // otherwise. Keep the import list small and close to what the
2340
+ // bridge actually needs; add modules here if the bridge gains new
2341
+ // hard deps.
2342
+ function checkTelegramBridgePythonDeps() {
2343
+ try {
2344
+ // Only check the third-party module the bridge actually needs
2345
+ // at import time — `requests`. Toml parsing differs between
2346
+ // Python versions (tomllib on 3.11+, tomli on 3.10-), and any
2347
+ // genuine toml import failure will now be captured in the
2348
+ // bridge log file on spawn, so this pre-flight stays narrow
2349
+ // and avoids false negatives on older Python installs.
2350
+ execFileSync("python3", ["-c", "import requests"], {
2351
+ encoding: "utf-8",
2352
+ timeout: 10000,
2353
+ stdio: ["ignore", "pipe", "pipe"],
2354
+ });
2355
+ return { ok: true };
2356
+ } catch (err) {
2357
+ const stderr = (err && err.stderr && err.stderr.toString && err.stderr.toString()) || "";
2358
+ const msg = stderr.trim() || (err && err.message) || "python3 import check failed";
2359
+ return { ok: false, error: msg };
2360
+ }
2361
+ }
2362
+
1824
2363
  function isTelegramRunning(projectId) {
1825
2364
  const pf = telegramPidFile(projectId);
1826
2365
  if (!fs.existsSync(pf)) return false;
@@ -1919,12 +2458,29 @@ router.get("/api/telegram", async (req, res) => {
1919
2458
  }
1920
2459
  } catch { /* non-fatal — widget will just show no username */ }
1921
2460
  }
2461
+ // #353: if the bridge is not running but a log file exists with
2462
+ // content, tail it and expose it as `last_error` so the widget
2463
+ // can surface runtime crashes (bad token mid-session, network
2464
+ // failure, config parse error) that happen after the initial
2465
+ // 500 ms post-spawn liveness check and would otherwise just
2466
+ // revert the pill to Stopped with no explanation.
2467
+ const running = isTelegramRunning(projectId);
2468
+ let lastError = "";
2469
+ if (!running) {
2470
+ const logPath = telegramBridgeLog(projectId);
2471
+ try {
2472
+ if (fs.existsSync(logPath) && fs.statSync(logPath).size > 0) {
2473
+ lastError = readLastLines(logPath, 20);
2474
+ }
2475
+ } catch {}
2476
+ }
1922
2477
  res.json({
1923
- running: isTelegramRunning(projectId),
2478
+ running,
1924
2479
  configured,
1925
2480
  chat_id: chatId,
1926
2481
  bot_username: botUsername,
1927
2482
  bridge_installed: bridgeInstalled,
2483
+ last_error: lastError,
1928
2484
  });
1929
2485
  });
1930
2486
 
@@ -1947,15 +2503,39 @@ router.post("/api/telegram", async (req, res) => {
1947
2503
  }
1948
2504
  }
1949
2505
  case "install": {
2506
+ // #353: pip3 can exit 0 on some systems (PEP 668 externally-
2507
+ // managed environments, non-writable site-packages) even when
2508
+ // the subsequent import still fails. After the pip step, run
2509
+ // a post-install import check and surface both the pip output
2510
+ // and the import error together if the check fails — that's
2511
+ // the signal the operator needs to know whether to pick a
2512
+ // virtualenv, use --user, or --break-system-packages.
2513
+ let pipOutput = "";
1950
2514
  try {
1951
2515
  if (!fs.existsSync(BRIDGE_DIR)) {
1952
2516
  execFileSync("gh", ["repo", "clone", "realproject7/agentchattr-telegram", BRIDGE_DIR], { encoding: "utf-8", timeout: 30000 });
1953
2517
  }
1954
- execFileSync("pip3", ["install", "-r", path.join(BRIDGE_DIR, "requirements.txt")], { encoding: "utf-8", timeout: 30000 });
1955
- return res.json({ ok: true });
2518
+ pipOutput = execFileSync(
2519
+ "pip3",
2520
+ ["install", "-r", path.join(BRIDGE_DIR, "requirements.txt")],
2521
+ { encoding: "utf-8", timeout: 60000 },
2522
+ );
1956
2523
  } catch (err) {
1957
2524
  return res.json({ ok: false, error: err.message || "Install failed" });
1958
2525
  }
2526
+ const depCheck = checkTelegramBridgePythonDeps();
2527
+ if (!depCheck.ok) {
2528
+ return res.json({
2529
+ ok: false,
2530
+ error:
2531
+ "pip3 reported success but the bridge's Python deps still fail to import. " +
2532
+ "This usually means pip installed into a location python3 cannot see " +
2533
+ "(externally-managed environment / PEP 668 / mismatched interpreter).\n\n" +
2534
+ `Import error: ${depCheck.error}\n\n` +
2535
+ `pip output tail:\n${pipOutput.split("\n").slice(-10).join("\n")}`,
2536
+ });
2537
+ }
2538
+ return res.json({ ok: true });
1959
2539
  }
1960
2540
  case "start": {
1961
2541
  const projectId = body.project_id;
@@ -1969,14 +2549,75 @@ router.post("/api/telegram", async (req, res) => {
1969
2549
  const tomlContent = `[telegram]\nbot_token = "${tg.bot_token}"\nchat_id = "${tg.chat_id}"\n\n[agentchattr]\nurl = "${tg.agentchattr_url}"\n`;
1970
2550
  fs.writeFileSync(tomlPath, tomlContent, { mode: 0o600 });
1971
2551
  fs.chmodSync(tomlPath, 0o600);
2552
+ // #353: pre-flight import check so a fresh install with no
2553
+ // `requests` module produces a readable error instead of the
2554
+ // Start → Running → Stopped flicker that the v1 code path
2555
+ // produced with `stdio: "ignore"`.
2556
+ const depCheck = checkTelegramBridgePythonDeps();
2557
+ if (!depCheck.ok) {
2558
+ return res.json({
2559
+ ok: false,
2560
+ error:
2561
+ "Bridge Python dependencies not installed. Click \"Install Bridge\" to install them, " +
2562
+ "or run: pip3 install -r " + path.join(BRIDGE_DIR, "requirements.txt") + "\n\n" +
2563
+ `Import error: ${depCheck.error}`,
2564
+ });
2565
+ }
2566
+ // #353: capture stdout + stderr to a per-project log file so
2567
+ // bridge crashes (bad token, network failure, config parse
2568
+ // error, etc.) are recoverable. The handle must be opened
2569
+ // BEFORE spawn and passed through stdio so the detached
2570
+ // child keeps writing after the parent unrefs it.
2571
+ const logPath = telegramBridgeLog(projectId);
2572
+ // #353 follow-up: truncate the log at the start of every
2573
+ // spawn so the status endpoint's last_error tail only ever
2574
+ // reflects the *current* session. Otherwise a previous
2575
+ // crash's trace would linger forever and the widget would
2576
+ // keep surfacing a stale error even after the operator
2577
+ // fixed the underlying problem and restarted cleanly.
2578
+ try { fs.writeFileSync(logPath, ""); } catch {}
2579
+ let outFd, errFd;
2580
+ try {
2581
+ outFd = fs.openSync(logPath, "a");
2582
+ errFd = fs.openSync(logPath, "a");
2583
+ } catch (err) {
2584
+ return res.json({ ok: false, error: `Could not open bridge log file: ${err.message}` });
2585
+ }
2586
+ let child;
1972
2587
  try {
1973
- const child = spawn("python3", [bridgeScript, "--config", tomlPath], { detached: true, stdio: "ignore" });
2588
+ child = spawn("python3", [bridgeScript, "--config", tomlPath], {
2589
+ detached: true,
2590
+ stdio: ["ignore", outFd, errFd],
2591
+ });
1974
2592
  child.unref();
1975
2593
  if (child.pid) fs.writeFileSync(telegramPidFile(projectId), String(child.pid));
1976
- return res.json({ ok: true, running: true, pid: child.pid });
1977
2594
  } catch (err) {
2595
+ try { fs.closeSync(outFd); } catch {}
2596
+ try { fs.closeSync(errFd); } catch {}
1978
2597
  return res.json({ ok: false, error: err.message || "Start failed" });
1979
2598
  }
2599
+ // Close our copies of the fds in the parent now that the
2600
+ // child has inherited them — otherwise the parent holds the
2601
+ // log file open forever.
2602
+ try { fs.closeSync(outFd); } catch {}
2603
+ try { fs.closeSync(errFd); } catch {}
2604
+ // #353: liveness check — wait 500ms, then verify the child
2605
+ // is still running. If it already died, tail the log file
2606
+ // and return those lines as the error.
2607
+ await new Promise((r) => setTimeout(r, 500));
2608
+ let alive = true;
2609
+ try { process.kill(child.pid, 0); } catch { alive = false; }
2610
+ if (!alive) {
2611
+ const tail = readLastLines(logPath, 20);
2612
+ try { fs.unlinkSync(telegramPidFile(projectId)); } catch {}
2613
+ return res.json({
2614
+ ok: false,
2615
+ error:
2616
+ "Bridge crashed on start (exited within 500ms).\n\n" +
2617
+ `Last log lines (${logPath}):\n${tail || "(log empty)"}`,
2618
+ });
2619
+ }
2620
+ return res.json({ ok: true, running: true, pid: child.pid });
1980
2621
  }
1981
2622
  case "stop": {
1982
2623
  const projectId = body.project_id;
@@ -2050,4 +2691,82 @@ router.post("/api/telegram", async (req, res) => {
2050
2691
  }
2051
2692
  });
2052
2693
 
2694
+ // #343: per-agent model + reasoning-effort settings endpoint.
2695
+ // GET returns the rows the dashboard Agent Models widget needs;
2696
+ // PUT persists a single row back to config.json. Kept narrow on
2697
+ // purpose — only `model` and `reasoning_effort` are writable
2698
+ // here, and codex is the only backend that accepts
2699
+ // reasoning_effort today. The launch-time wiring lives in
2700
+ // server/index.js buildAgentArgs; this endpoint is purely
2701
+ // config storage.
2702
+ const ALLOWED_REASONING_EFFORTS = new Set(["minimal", "low", "medium", "high"]);
2703
+
2704
+ router.get("/api/project/:projectId/agent-models", (req, res) => {
2705
+ try {
2706
+ const cfg = JSON.parse(fs.readFileSync(CONFIG_PATH, "utf-8"));
2707
+ const project = cfg.projects?.find((p) => p.id === req.params.projectId);
2708
+ if (!project) return res.status(404).json({ error: "Unknown project" });
2709
+ const rows = ["head", "reviewer1", "reviewer2", "dev"].map((agentId) => {
2710
+ const a = project.agents?.[agentId] || {};
2711
+ const command = a.command || "claude";
2712
+ const cliBase = command.split("/").pop().split(" ")[0];
2713
+ return {
2714
+ agent_id: agentId,
2715
+ backend: cliBase,
2716
+ model: a.model || "",
2717
+ reasoning_effort: a.reasoning_effort || "",
2718
+ reasoning_supported: cliBase === "codex",
2719
+ };
2720
+ });
2721
+ return res.json({ agents: rows });
2722
+ } catch (err) {
2723
+ return res.status(500).json({ error: err.message || "read failed" });
2724
+ }
2725
+ });
2726
+
2727
+ router.put("/api/project/:projectId/agent-models/:agentId", (req, res) => {
2728
+ const { projectId, agentId } = req.params;
2729
+ if (!["head", "reviewer1", "reviewer2", "dev"].includes(agentId)) {
2730
+ return res.json({ ok: false, error: "Unknown agent" });
2731
+ }
2732
+ const body = req.body || {};
2733
+ // Accept empty string as "clear override → fall back to CLI default".
2734
+ const model = typeof body.model === "string" ? body.model.trim() : undefined;
2735
+ const reasoning = typeof body.reasoning_effort === "string" ? body.reasoning_effort.trim() : undefined;
2736
+ if (reasoning && reasoning !== "" && !ALLOWED_REASONING_EFFORTS.has(reasoning)) {
2737
+ return res.json({ ok: false, error: `Invalid reasoning_effort: ${reasoning}` });
2738
+ }
2739
+ try {
2740
+ const raw = fs.readFileSync(CONFIG_PATH, "utf-8");
2741
+ const cfg = JSON.parse(raw);
2742
+ const project = cfg.projects?.find((p) => p.id === projectId);
2743
+ if (!project) return res.status(404).json({ ok: false, error: "Unknown project" });
2744
+ if (!project.agents) project.agents = {};
2745
+ const a = project.agents[agentId] || {};
2746
+ if (model !== undefined) {
2747
+ if (model === "") delete a.model;
2748
+ else a.model = model;
2749
+ }
2750
+ if (reasoning !== undefined) {
2751
+ if (reasoning === "") delete a.reasoning_effort;
2752
+ else a.reasoning_effort = reasoning;
2753
+ }
2754
+ project.agents[agentId] = a;
2755
+ fs.writeFileSync(CONFIG_PATH, JSON.stringify(cfg, null, 2));
2756
+ return res.json({ ok: true, agent: { agent_id: agentId, model: a.model || "", reasoning_effort: a.reasoning_effort || "" } });
2757
+ } catch (err) {
2758
+ return res.json({ ok: false, error: err.message || "write failed" });
2759
+ }
2760
+ });
2761
+
2053
2762
  module.exports = router;
2763
+ // #341: export parseActiveBatch for unit tests. No production callers
2764
+ // outside this file; the export is strictly for the node:assert
2765
+ // script at server/routes.parseActiveBatch.test.js.
2766
+ module.exports.parseActiveBatch = parseActiveBatch;
2767
+ // #350: same pattern — expose the no-linked-PR row builder and
2768
+ // summarizeItems for the batch-progress fixture test.
2769
+ module.exports.buildNoPrRow = buildNoPrRow;
2770
+ module.exports.summarizeItems = summarizeItems;
2771
+ // #353: expose readLastLines for the telegram-bridge test.
2772
+ module.exports.readLastLines = readLastLines;