@cgh567/agent 2.4.1 → 2.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/bin/helios +0 -0
  2. package/bin/helios-rpc-node-wrapper.cjs +0 -0
  3. package/bin/helios-rpc-wrapper.sh +0 -0
  4. package/daemon/adapters/helios-rpc-adapter.js +47 -25
  5. package/daemon/adapters/tui_wakeup.js +8 -0
  6. package/daemon/config/com.familiar.helios-daemon.plist +5 -0
  7. package/daemon/config/helios-daemon.service +4 -0
  8. package/daemon/context-enrichment.js +59 -21
  9. package/daemon/daemon-manager.js +1 -1
  10. package/daemon/db/email-infrastructure-migrate.js +192 -0
  11. package/daemon/db/hbo-core-migrate.js +189 -0
  12. package/daemon/helios-api.js +723 -57
  13. package/daemon/helios-company-daemon.js +616 -134
  14. package/daemon/lib/harada/cascade-judge.js +12 -50
  15. package/daemon/lib/harada/mandala.js +20 -0
  16. package/daemon/lib/harada/pillar-dispatcher.js +1 -1
  17. package/daemon/lib/harada/project-factory.js +7 -2
  18. package/daemon/lib/hbo-bridge.js +32 -13
  19. package/daemon/lib/hed-engine.js +10 -292
  20. package/daemon/lib/helios-hitl-host.js +15 -2
  21. package/daemon/lib/hitl-interaction-service.js +0 -0
  22. package/daemon/lib/memgraph-verify.js +38 -33
  23. package/daemon/lib/project-drift-detector.js +7 -17
  24. package/daemon/lib/project-semantic-updater.js +1 -14
  25. package/daemon/lib/task-completion-processor.js +11 -0
  26. package/daemon/lib/wizard-engine.js +57 -6
  27. package/daemon/routes/channels.js +10 -5
  28. package/daemon/routes/harada-map.js +11 -48
  29. package/daemon/routes/hbo.js +342 -75
  30. package/daemon/routes/hitl.js +0 -0
  31. package/daemon/routes/project.js +194 -62
  32. package/daemon/routes/routines.js +14 -0
  33. package/daemon/routes/tasks.js +15 -1
  34. package/daemon/routes/wizard.js +11 -4
  35. package/daemon/schema-apply.js +174 -0
  36. package/daemon/schema-definitions.js +423 -0
  37. package/daemon/schema-migrations-hbo.js +10 -0
  38. package/daemon/schema-migrations-hed.js +18 -0
  39. package/daemon/schema-migrations-hitl.js +0 -0
  40. package/daemon/schema-migrations-proj.js +131 -0
  41. package/extensions/001-tool-output-cap.ts +0 -0
  42. package/extensions/context-compaction.ts +45 -26
  43. package/extensions/cortex/activation-bridge.ts +5 -0
  44. package/extensions/cortex/learn.ts +26 -0
  45. package/extensions/cortex/wal-replay.ts +91 -0
  46. package/extensions/email/backfill.ts +0 -0
  47. package/extensions/helios-governance/analysis/ambiguity.ts +0 -0
  48. package/extensions/helios-governance/analysis/compliance.ts +0 -0
  49. package/extensions/helios-governance/analysis/long-task-detector.ts +0 -0
  50. package/extensions/helios-governance/analysis/output-contract.ts +0 -0
  51. package/extensions/helios-governance/analysis/patterns.ts +0 -0
  52. package/extensions/helios-governance/analysis/preflight.ts +0 -0
  53. package/extensions/helios-governance/analysis/recurring-violations.ts +0 -0
  54. package/extensions/helios-governance/analysis/task-classification.ts +0 -0
  55. package/extensions/helios-governance/analysis/task-intent.ts +0 -0
  56. package/extensions/helios-governance/gates/high-impact.ts +1 -1
  57. package/extensions/helios-governance/handlers/_jiti-require.ts +15 -8
  58. package/extensions/helios-governance/handlers/proxy-test-detector.ts +0 -0
  59. package/extensions/hema-dispatch-v3/graph-memory.ts +10 -0
  60. package/extensions/hema-dispatch-v3/index.ts +72 -47
  61. package/extensions/lib/elo-engine.js +0 -0
  62. package/extensions/lib/elo-engine.test.js +0 -0
  63. package/extensions/memgraph-autostart.ts +13 -0
  64. package/extensions/neuroplastic-eval.ts +0 -0
  65. package/extensions/shadow-loop/index.ts +0 -0
  66. package/extensions/warm-tick/warm-tick-maintenance.ts +8 -0
  67. package/lib/__tests__/hbo-core-store.test.js +238 -0
  68. package/lib/brain-v2-budget.js +0 -0
  69. package/lib/brain-v2-circuit-breaker.js +0 -0
  70. package/lib/brain-v2.js +0 -0
  71. package/lib/broker/adaptive-throttle.js +0 -0
  72. package/lib/broker/batch-coalescer.js +0 -0
  73. package/lib/broker/bulkhead.js +0 -0
  74. package/lib/broker/channel-registry.js +0 -0
  75. package/lib/broker/circuit-breaker.js +0 -0
  76. package/lib/broker/evidence-cache.js +0 -0
  77. package/lib/broker/health-monitor.js +0 -0
  78. package/lib/broker/mage-queue.js +0 -0
  79. package/lib/broker/priority-queue.js +0 -0
  80. package/lib/broker/server.js.bak-error2-fix +0 -0
  81. package/lib/broker/session-registry.js +0 -0
  82. package/lib/broker/singleton-timers.js +0 -0
  83. package/lib/broker/types.d.ts +0 -0
  84. package/lib/broker/vegas-limit.js +0 -0
  85. package/lib/compression/dist/ccr-store.js +74 -0
  86. package/lib/compression/dist/content-router.js +115 -0
  87. package/lib/compression/dist/pipeline.js +113 -0
  88. package/lib/compression/dist/server.js +265 -0
  89. package/lib/compression/dist/smart-crusher.js +251 -0
  90. package/lib/context-budget.ts +0 -0
  91. package/lib/context-firewall.js +0 -0
  92. package/lib/crm/integration/triage-bridge.js +0 -0
  93. package/lib/email-utils.ts +0 -0
  94. package/lib/eval/__tests__/preflight-checker.test.ts +0 -0
  95. package/lib/eval/__tests__/task-instruction-parser.test.ts +0 -0
  96. package/lib/eval/__tests__/verifier-runner.test.ts +0 -0
  97. package/lib/eval/index.ts +0 -0
  98. package/lib/eval/preflight-checker.ts +0 -0
  99. package/lib/eval/task-domain-classifier.ts +0 -0
  100. package/lib/eval/task-instruction-parser.ts +0 -0
  101. package/lib/eval/verifier-runner.ts +0 -0
  102. package/lib/event-bus.d.ts +0 -0
  103. package/lib/event-bus.mts +1 -1
  104. package/lib/governance-context-selector.ts +0 -0
  105. package/lib/graph/generate-extension-embeddings.js +0 -0
  106. package/lib/graph/generate-static-embeddings.js +0 -0
  107. package/lib/graph/lib/utils.js +1 -1
  108. package/lib/graph-audit.d.ts +0 -0
  109. package/lib/graph-availability.js +62 -0
  110. package/lib/hbo-core-store.compiled.js +834 -0
  111. package/lib/hbo-core-store.js +124 -0
  112. package/lib/hbo-core-store.ts +908 -0
  113. package/lib/mesh-circuit-breaker.js +0 -0
  114. package/lib/mission-loop/lesson-extractor.ts +0 -0
  115. package/lib/mission-loop/mental-model-scorer.ts +0 -0
  116. package/lib/mission-loop/occ-detector.ts +0 -0
  117. package/lib/mission-loop/query-variants.ts +0 -0
  118. package/lib/mission-loop/verifier-check.ts +0 -0
  119. package/lib/skill-reference-builder.ts +0 -0
  120. package/lib/telemetry/token-breakdown.ts +0 -0
  121. package/lib/tool-compressor.ts +0 -0
  122. package/lib/triage-core/classifier.ts +3 -2
  123. package/lib/triage-core/graph/schema.cypher +10 -0
  124. package/lib/triage-core/legal-routing.ts +0 -0
  125. package/lib/triage-core/mental-model/dunbar-classifier.ts +0 -0
  126. package/lib/triage-core/mental-model/enrich-all.ts +0 -0
  127. package/lib/triage-core/mental-model/identity-resolver.ts +0 -0
  128. package/lib/triage-core/mental-model/key-facts.ts +1 -2
  129. package/lib/triage-core/mental-model/model-assembler.ts +0 -0
  130. package/lib/triage-core/orchestrator.ts +4 -11
  131. package/lib/triage-core/orchestrator.ts.bak-r005-r006-r008 +0 -0
  132. package/package.json +18 -8
  133. package/skills/helios-business-operator/services/signals/upwork-signals.js +0 -0
  134. package/skills/talisman-ceo/SKILL.md +23 -25
  135. package/skills/talisman-comms/SKILL.md +5 -5
  136. package/skills/talisman-engineering/SKILL.md +5 -5
  137. package/skills/talisman-finance/SKILL.md +10 -8
  138. package/skills/talisman-marketing/SKILL.md +10 -10
  139. package/skills/talisman-sales/SKILL.md +12 -15
  140. package/skills/talisman-support/SKILL.md +5 -5
  141. package/agents/business/talisman-ceo.md +0 -183
  142. package/agents/business/talisman-comms.md +0 -257
  143. package/agents/business/talisman-cto.md +0 -153
  144. package/agents/business/talisman-finance.md +0 -246
  145. package/agents/business/talisman-marketing.md +0 -240
  146. package/agents/business/talisman-sales.md +0 -242
  147. package/agents/business/talisman-support.md +0 -236
  148. package/daemon/lib/approval-expiry.js +0 -162
  149. package/daemon/lib/blast-radius-analyzer.js +0 -75
  150. package/daemon/lib/domain-bootstrap-orchestrator.js +0 -267
  151. package/daemon/lib/forensic-log.js +0 -113
  152. package/daemon/lib/goal-research-pipeline.js +0 -644
  153. package/daemon/lib/harada/cascade-research-dispatcher.js +0 -261
  154. package/daemon/lib/headroom-middleware.js +0 -167
  155. package/daemon/lib/headroom-proxy-manager.js +0 -623
  156. package/daemon/lib/mental-model-cache.js +0 -96
  157. package/daemon/lib/project-factory.js +0 -47
  158. package/daemon/lib/session-log-reader.js +0 -93
  159. package/daemon/routes/hed.js +0 -133
  160. package/lib/graph/learning/headroom-learn-bridge.js +0 -215
  161. package/skills/helios-bookkeeping/SKILL.md +0 -321
  162. package/skills/helios-briefer/SKILL.md +0 -44
  163. package/skills/helios-client-relations/SKILL.md +0 -322
  164. package/skills/helios-personal-triager/SKILL.md +0 -45
  165. package/skills/helios-recruitment/SKILL.md +0 -317
  166. package/skills/helios-relationship-nudger/SKILL.md +0 -77
  167. package/skills/helios-researcher/SKILL.md +0 -44
  168. package/skills/helios-scheduler/SKILL.md +0 -58
  169. package/skills/helios-tax-analyst/SKILL.md +0 -280
@@ -20,11 +20,14 @@ if (!process.env.TZ) { process.env.TZ = 'UTC'; }
20
20
  */
21
21
 
22
22
  const path = require('path');
23
- const fs = require('fs');
24
- const os = require('os');
25
- const { performance } = require('perf_hooks');
26
- const { randomUUID } = require('crypto');
27
- const { buildContextBrief } = require('./context-enrichment');
23
+ const fs = require('fs');
24
+ const os = require('os');
25
+ const { execFile } = require('child_process');
26
+ const { performance } = require('perf_hooks');
27
+ const { randomUUID } = require('crypto');
28
+ const { buildContextBrief } = require('./context-enrichment');
29
+ const hboStore = require('../lib/hbo-core-store');
30
+ const graphWal = require('../lib/graph-wal');
28
31
  const { runMigrations } = require('./schema-migrations');
29
32
  const { runHBOMigrations } = require('./schema-migrations-hbo');
30
33
  const { runHaradaMigrations } = require('./schema-migrations-harada');
@@ -179,6 +182,14 @@ try {
179
182
  process.stderr.write('[daemon] Generated API token at ' + tokenPath + '\n');
180
183
  }
181
184
  process.env.HELIOS_API_TOKEN = fs.readFileSync(tokenPath, 'utf-8').trim();
185
+ // Mirror token to data/api-token.txt so harbor tests can find it without
186
+ // knowing the platform-specific HELIOS_DATA path.
187
+ // Primary path: harbor test fixture reads HELIOS_ROOT/data/api-token.txt → sends Bearer token.
188
+ try {
189
+ const _repoTokenDir = path.join(HELIOS_ROOT, 'data');
190
+ if (!fs.existsSync(_repoTokenDir)) fs.mkdirSync(_repoTokenDir, { recursive: true });
191
+ fs.writeFileSync(path.join(_repoTokenDir, 'api-token.txt'), process.env.HELIOS_API_TOKEN, { mode: 0o600 });
192
+ } catch (_) { /* non-fatal: env var HELIOS_AGENT_TOKEN is the fallback */ }
182
193
  } catch(e) {
183
194
  process.stderr.write('[daemon] Warning: could not create/read API token: ' + e.message + '\n');
184
195
  // Fallback: generate in-memory token (not persisted)
@@ -717,7 +728,22 @@ async function executeQueueAction(item) {
717
728
  const assignee = (item.payload && item.payload.assigneeAgentId) || 'agent:default';
718
729
  const cid = (item.payload && item.payload.companyId) || 'default';
719
730
 
720
- await mg.safeWrite(`
731
+ // SQLite-first write (P2-2)
732
+ try { hboStore.createTask({
733
+ id: taskId,
734
+ companyId: cid,
735
+ title,
736
+ status: 'todo',
737
+ priority: 2,
738
+ assigneeAgentId: assignee,
739
+ body: (item.payload && item.payload.body) || '',
740
+ sourceItemId: item.target_id,
741
+ sourceChannel: item.channel,
742
+ progressPropagated: false,
743
+ createdAt: Date.now(),
744
+ }); } catch (_storeErr) { /* non-fatal: SQLite store unavailable */ }
745
+ // Non-blocking Memgraph projection (fire-and-forget)
746
+ setImmediate(() => mg.safeWrite(`
721
747
  CREATE (t:Task {
722
748
  id: $taskId,
723
749
  companyId: $cid,
@@ -739,7 +765,7 @@ async function executeQueueAction(item) {
739
765
  body: (item.payload && item.payload.body) || '',
740
766
  sourceId: item.target_id,
741
767
  channel: item.channel,
742
- });
768
+ }).catch(e => console.warn('[daemon] Memgraph Task projection failed (non-fatal):', e.message)));
743
769
  return;
744
770
  }
745
771
 
@@ -1034,7 +1060,7 @@ class RoutineEvaluator {
1034
1060
  // milliseconds-and-Z suffix produced by toISOString(). No change needed.
1035
1061
  const now = new Date().toISOString().replace(/\.\d{3}Z$/, '+00:00');
1036
1062
  dueRoutines = await this.mg(
1037
- `MATCH (r:Routine {companyId: $companyId}) WHERE r.status = 'active' AND r.nextRunAt <= datetime($now) RETURN r.id, r.name, r.cronExpr, r.agentId, r.companyId, r.concurrencyPolicy, r.timezone`,
1063
+ `MATCH (r:Routine {companyId: $companyId}) WHERE r.status = 'active' AND r.nextRunAt <= datetime($now) RETURN r.id, r.name, r.cronExpr, r.agentId, r.companyId, r.concurrencyPolicy, r.timezone, r.catchUpCap, r.catchUpPolicy`,
1038
1064
  { now, companyId: this.companyId }
1039
1065
  );
1040
1066
  } catch (err) {
@@ -1062,11 +1088,79 @@ class RoutineEvaluator {
1062
1088
  }
1063
1089
  }
1064
1090
 
1091
+ // P5-03: coalesce_if_active — skip creating a full run but queue one follow-up RoutineRun
1092
+ if (routine['r.concurrencyPolicy'] === 'coalesce_if_active') {
1093
+ const active = await this.mg(
1094
+ `MATCH (rr:RoutineRun {routineId: $rid}) WHERE rr.status IN ['queued', 'running'] RETURN count(rr) as cnt`,
1095
+ { rid: routineId }
1096
+ );
1097
+ const activeCount = active?.rows?.[0]?.[0] ?? 0;
1098
+ if (activeCount > 0) {
1099
+ // Check if a coalesced follow-up already exists (prevent duplicate queuing)
1100
+ const queuedFollowUp = await this.mg(
1101
+ `MATCH (rr:RoutineRun {routineId: $rid, status: 'queued_coalesced'}) RETURN count(rr) as cnt`,
1102
+ { rid: routineId }
1103
+ );
1104
+ if ((queuedFollowUp?.rows?.[0]?.[0] ?? 0) === 0) {
1105
+ const followUpRunId = requireRunId(`run:${routineId}:coalesced:${randomUUID()}`, 'RoutineEvaluator.coalesce');
1106
+ await this.mg(
1107
+ `MERGE (rr:RoutineRun {id: $runId}) SET rr.routineId = $routineId, rr.status = 'queued_coalesced', rr.companyId = $companyId, rr.queuedAt = datetime()`,
1108
+ { runId: followUpRunId, routineId, companyId: this.companyId }
1109
+ ).catch(e => log('warn', `RoutineEvaluator: coalesce follow-up failed: ${e.message}`));
1110
+ }
1111
+ log('debug', `Coalescing routine ${routine['r.name']} — queued follow-up`);
1112
+ continue;
1113
+ }
1114
+ }
1115
+
1065
1116
  const routineAgentId = routine['r.agentId'];
1066
1117
  if (!routineAgentId) {
1067
1118
  log('warn', `RoutineEvaluator: routine ${routineId} has no agentId — skipping task creation`);
1068
1119
  continue;
1069
1120
  }
1121
+
1122
+ // P5-04: catchUpCap — enqueue missed windows up to cap
1123
+ // This fires BEFORE the normal single-task creation to batch missed runs first.
1124
+ const catchUpPolicy = routine['r.catchUpPolicy'];
1125
+ const catchUpCap = parseInt(routine['r.catchUpCap'] ?? '0', 10) || 0;
1126
+ if (catchUpPolicy === 'enqueue_missed_with_cap' && catchUpCap > 0) {
1127
+ try {
1128
+ const { Cron } = require('croner');
1129
+ const cron = new Cron(routine['r.cronExpr'], { timezone: routine['r.timezone'] });
1130
+ // Count missed windows: how many times cron fired between lastRunAt and now
1131
+ // Simple approximation: count backward from now until we hit lastRunAt or cap
1132
+ let missedCount = 0;
1133
+ const prev = cron.previousRun ? cron.previousRun() : null;
1134
+ // Since we don't have a full missed-window iterator here, use a conservative
1135
+ // estimate: check if at least one missed window exists and enqueue up to cap
1136
+ // by repeatedly calling previousRun. Max cap iterations.
1137
+ let checkDate = prev;
1138
+ const lastRunStr = routine['r.lastRunAt'];
1139
+ const lastRunMs = lastRunStr ? Date.parse(lastRunStr) : 0;
1140
+ while (checkDate && missedCount < catchUpCap) {
1141
+ if (checkDate.getTime() <= lastRunMs) break;
1142
+ missedCount++;
1143
+ checkDate = cron.previousRun ? cron.previousRun() : null;
1144
+ }
1145
+ cron.stop();
1146
+ for (let i = 0; i < missedCount; i++) {
1147
+ const catchUpTaskId = `task:routine:${routineId}:catchup:${i}:${randomUUID()}`;
1148
+ const catchUpRunId = requireRunId(`run:${routineId}:catchup:${i}:${randomUUID()}`, 'RoutineEvaluator.catchup');
1149
+ await this.mg(
1150
+ `MERGE (t:Task {id: $taskId}) SET t.title = $title, t.status = 'todo', t.assigneeAgentId = $agentId, t.companyId = $companyId, t.originKind = 'routine_catchup', t.progressPropagated = false, t.createdAt = datetime()`,
1151
+ { taskId: catchUpTaskId, title: `Routine catch-up: ${routine['r.name']}`, agentId: routineAgentId, companyId: this.companyId }
1152
+ ).catch(e => log('warn', `RoutineEvaluator: catch-up task create failed: ${e.message}`));
1153
+ await this.mg(
1154
+ `MERGE (rr:RoutineRun {id: $runId}) SET rr.routineId = $routineId, rr.status = 'queued', rr.linkedTaskId = $taskId, rr.companyId = $companyId, rr.queuedAt = datetime()`,
1155
+ { runId: catchUpRunId, routineId, taskId: catchUpTaskId, companyId: this.companyId }
1156
+ ).catch(e => log('warn', `RoutineEvaluator: catch-up run create failed: ${e.message}`));
1157
+ }
1158
+ if (missedCount > 0) log('info', `RoutineEvaluator: catch-up ${missedCount} tasks for ${routine['r.name']}`);
1159
+ } catch (catchUpErr) {
1160
+ log('warn', `RoutineEvaluator: catchUpCap logic failed for ${routineId}: ${catchUpErr.message}`);
1161
+ }
1162
+ }
1163
+
1070
1164
  const taskId = `task:routine:${routineId}:${randomUUID()}`;
1071
1165
  await criticalOp(
1072
1166
  () => this.mg(
@@ -1223,12 +1317,26 @@ class BudgetEnforcer {
1223
1317
 
1224
1318
  let inProgress;
1225
1319
  try {
1226
- // Retry up to 5 times (500ms apart) in case Memgraph snapshot isolation
1227
- // hasn't propagated a recently-committed write yet.
1228
- for (let attempt = 0; attempt < 5; attempt++) {
1229
- inProgress = await this.mg(query, params);
1230
- if (inProgress?.rows?.length > 0) break;
1231
- if (attempt < 4) await new Promise(r => setTimeout(r, 500));
1320
+ // Memgraph primary SQLite fallback on unavailability
1321
+ try {
1322
+ // Retry up to 5 times (500ms apart) in case Memgraph snapshot isolation
1323
+ // hasn't propagated a recently-committed write yet.
1324
+ for (let attempt = 0; attempt < 5; attempt++) {
1325
+ inProgress = await this.mg(query, params);
1326
+ if (inProgress?.rows?.length > 0) break;
1327
+ if (attempt < 4) await new Promise(r => setTimeout(r, 500));
1328
+ }
1329
+ } catch (mgErr) {
1330
+ // Memgraph unavailable — fall back to SQLite
1331
+ if (hboStore.getTasksByCompanyStatus) {
1332
+ const storeRows = hboStore.getTasksByCompanyStatus(this.companyId, 'in_progress');
1333
+ const filtered = agentId ? storeRows.filter(t => t.assigneeAgentId === agentId) : storeRows;
1334
+ inProgress = {
1335
+ rows: filtered.map(t => [t.id, t.heliosRunId ?? null, t.dispatchedViaTUI ?? null, t.assigneeAgentId ?? null]),
1336
+ keys: ['t.id', 't.heliosRunId', 't.dispatchedViaTUI', 't.assigneeAgentId'],
1337
+ };
1338
+ log('info', `CancelInFlight: using SQLite fallback for in-progress task lookup (Memgraph unavailable): ${mgErr.message}`);
1339
+ }
1232
1340
  }
1233
1341
  } catch (e) {
1234
1342
  log('error', `BudgetEnforcer: failed to query in-flight tasks: ${e.message}`);
@@ -1260,10 +1368,13 @@ class BudgetEnforcer {
1260
1368
  log('error', `BudgetEnforcer: failed to cancel TUI run ${heliosRunId} for task ${taskId} after 3 retries (H3 watchdog will clean up)`);
1261
1369
  }
1262
1370
  }
1263
- await this.mg(
1371
+ // SQLite-first update (P2-4)
1372
+ try { hboStore.updateTask(taskRow[0], this.companyId, { status: 'todo', executionLockedAt: null, executionAgentId: null, dispatchedViaTUI: null, heliosRunId: null }); } catch (_) {}
1373
+ // Non-blocking Memgraph projection (fire-and-forget)
1374
+ setImmediate(() => this.mg(
1264
1375
  `MATCH (t:Task {id: $taskId}) SET t.status = 'todo', t.executionLockedAt = null, t.executionAgentId = null, t.dispatchedViaTUI = null, t.heliosRunId = null`,
1265
1376
  { taskId: taskRow[0] }
1266
- ).catch(e => log('error', `BudgetEnforcer: failed to reset task ${taskRow[0]} after cancellation: ${e.message}`));
1377
+ ).catch(e => log('error', `BudgetEnforcer: failed to reset task ${taskRow[0]} after cancellation: ${e.message}`)));
1267
1378
  });
1268
1379
 
1269
1380
  await Promise.race([
@@ -1273,14 +1384,30 @@ class BudgetEnforcer {
1273
1384
  }
1274
1385
 
1275
1386
  async enforce() {
1276
- // Query 1: Get all policies (instant small node count)
1277
- const policiesResult = await this.mg(
1278
- `MATCH (bp:BudgetPolicy {companyId: $cid})
1279
- RETURN bp.id, bp.scope, bp.agentId, bp.limitCents, bp.warnPercent, bp.hardStopEnabled`,
1280
- { cid: this.companyId }
1281
- );
1282
-
1283
- const policyRows = policiesResult?.rows ?? [];
1387
+ // Query 1: Get all policies — Memgraph primary, SQLite fallback on unavailability
1388
+ let policyRows = [];
1389
+ try {
1390
+ const policiesResult = await this.mg(
1391
+ `MATCH (bp:BudgetPolicy {companyId: $cid})
1392
+ RETURN bp.id, bp.scope, bp.agentId, bp.limitCents, bp.warnPercent, bp.hardStopEnabled`,
1393
+ { cid: this.companyId }
1394
+ );
1395
+ policyRows = policiesResult?.rows ?? [];
1396
+ } catch (mgErr) {
1397
+ // Memgraph unavailable — fall back to SQLite budget policies
1398
+ if (hboStore.getBudgetPoliciesByCompany) {
1399
+ const storePolicies = hboStore.getBudgetPoliciesByCompany(this.companyId);
1400
+ policyRows = storePolicies.map(bp => [
1401
+ bp.id,
1402
+ bp.scope,
1403
+ bp.agent_id ?? bp.agentId,
1404
+ bp.limit_cents ?? bp.limitCents,
1405
+ bp.warn_percent ?? bp.warnPercent,
1406
+ bp.hard_stop_enabled ?? bp.hardStopEnabled,
1407
+ ]);
1408
+ log('info', `BudgetEnforcer: using SQLite fallback for policy lookup (Memgraph unavailable): ${mgErr.message}`);
1409
+ }
1410
+ }
1284
1411
 
1285
1412
  let blocked = false;
1286
1413
  let warningActive = false;
@@ -1309,12 +1436,21 @@ class BudgetEnforcer {
1309
1436
  const end = new Date(Date.UTC(now.getUTCFullYear(), now.getUTCMonth() + 1, 1)).toISOString();
1310
1437
 
1311
1438
  // Query 2: Spend per agent THIS MONTH (fast — time-bounded, indexed)
1312
- const spendRows = await this.mg(
1313
- `MATCH (ce:CostEvent {companyId: $cid})
1314
- WHERE ce.createdAt >= datetime($start) AND ce.createdAt < datetime($end)
1315
- RETURN ce.agentId, sum(ce.costCents) as total`,
1316
- { cid: this.companyId, start: start.replace(/\.\d{3}Z$/, '+00:00'), end: end.replace(/\.\d{3}Z$/, '+00:00') }
1317
- );
1439
+ // Memgraph primary SQLite has no CostEvent fallback so default to empty spend on unavailability
1440
+ let spendRows = { rows: [] };
1441
+ try {
1442
+ spendRows = await this.mg(
1443
+ `MATCH (ce:CostEvent {companyId: $cid})
1444
+ WHERE ce.createdAt >= datetime($start) AND ce.createdAt < datetime($end)
1445
+ RETURN ce.agentId, sum(ce.costCents) as total`,
1446
+ { cid: this.companyId, start: start.replace(/\.\d{3}Z$/, '+00:00'), end: end.replace(/\.\d{3}Z$/, '+00:00') }
1447
+ );
1448
+ } catch (mgSpendErr) {
1449
+ log('info', `BudgetEnforcer: Memgraph unavailable for spend query — using zero spend (${mgSpendErr.message})`);
1450
+ // spendRows stays { rows: [] } — enforcement proceeds with zero observed spend,
1451
+ // which means soft/hard thresholds will not trigger. This is the safe fallback:
1452
+ // better to not block agents than to falsely block them on a stale DB state.
1453
+ }
1318
1454
 
1319
1455
  // JS join (microseconds — no cartesian product)
1320
1456
  const neo4j = require('neo4j-driver');
@@ -1334,29 +1470,39 @@ class BudgetEnforcer {
1334
1470
  const pct = limit > 0 ? (spent * 100 / limit) : 0;
1335
1471
  const warnThreshold = toNum(warnPercent) || 80;
1336
1472
 
1337
- // Update policy in Memgraph
1338
- await this.mg(
1473
+ // Update policy spend in Memgraph — fire-and-forget so Memgraph downtime
1474
+ // does not abort the loop. SQLite hbo-core-store is the authoritative store.
1475
+ setImmediate(() => this.mg(
1339
1476
  `MATCH (bp:BudgetPolicy {id: $id}) SET bp.spentCents = $spent, bp.percentUsed = $pct`,
1340
1477
  { id, spent, pct }
1341
- );
1478
+ ).catch(e => log('info', `BudgetEnforcer: spend update projection failed (non-fatal): ${e.message}`)));
1342
1479
 
1343
1480
  policies.push({ id, scope, agentId, limitCents: limit, spentCents: spent, percentUsed: pct, status: 'active' });
1344
1481
 
1345
1482
  // Soft incident at warnPercent (idempotent)
1346
1483
  if (pct >= warnThreshold && pct < 100) {
1347
1484
  warningActive = true;
1348
- const existing = await this.mg(
1349
- `MATCH (bi:BudgetIncident {policyId: $pid, companyId: $cid, thresholdType: 'soft', status: 'open'}) RETURN bi.id LIMIT 1`,
1350
- { pid: id, cid: this.companyId }
1351
- );
1352
- if (!existing?.rows?.length) {
1353
- const incidentId = `bi:soft:${id}:${Date.now()}`;
1354
- await this.mg(
1355
- `CREATE (bi:BudgetIncident {id: $id, companyId: $cid, policyId: $pid, scopeType: $scopeType, scopeId: $scopeId, thresholdType: 'soft', amountLimit: $limit, amountObserved: $spent, status: 'open', createdAt: datetime()})`,
1356
- { id: incidentId, cid: this.companyId, pid: id, scopeType: scope, scopeId: agentId ?? scope, limit, spent }
1357
- );
1358
- log('info', `BudgetEnforcer: Soft budget incident created for ${agentId ?? 'global'}`, { pct, warnThreshold });
1359
- }
1485
+ // Use fire-and-forget for incident creation so Memgraph downtime does not abort enforce()
1486
+ const _mgThis = this.mg.bind(this);
1487
+ const _cid = this.companyId;
1488
+ setImmediate(async () => {
1489
+ try {
1490
+ const existing = await _mgThis(
1491
+ `MATCH (bi:BudgetIncident {policyId: $pid, companyId: $cid, thresholdType: 'soft', status: 'open'}) RETURN bi.id LIMIT 1`,
1492
+ { pid: id, cid: _cid }
1493
+ );
1494
+ if (!existing?.rows?.length) {
1495
+ const incidentId = `bi:soft:${id}:${Date.now()}`;
1496
+ await _mgThis(
1497
+ `CREATE (bi:BudgetIncident {id: $id, companyId: $cid, policyId: $pid, scopeType: $scopeType, scopeId: $scopeId, thresholdType: 'soft', amountLimit: $limit, amountObserved: $spent, status: 'open', createdAt: datetime()})`,
1498
+ { id: incidentId, cid: _cid, pid: id, scopeType: scope, scopeId: agentId ?? scope, limit, spent }
1499
+ );
1500
+ log('info', `BudgetEnforcer: Soft budget incident created for ${agentId ?? 'global'}`, { pct, warnThreshold });
1501
+ }
1502
+ } catch (e) {
1503
+ log('info', `BudgetEnforcer: soft incident projection failed (non-fatal): ${e.message}`);
1504
+ }
1505
+ });
1360
1506
  if (scope === 'global') {
1361
1507
  log('info', `BudgetEnforcer: Global budget warning active (>${warnThreshold}%)`, { percentUsed: pct });
1362
1508
  }
@@ -1368,18 +1514,31 @@ class BudgetEnforcer {
1368
1514
  const existingHard = await this.mg(
1369
1515
  `MATCH (bi:BudgetIncident {policyId: $pid, companyId: $cid, thresholdType: 'hard', status: 'open'}) RETURN bi.id LIMIT 1`,
1370
1516
  { pid: id, cid: this.companyId }
1371
- );
1517
+ ).catch(() => ({ rows: [] })); // treat Memgraph unavailability as "no existing incident"
1372
1518
  if (!existingHard?.rows?.length) {
1373
1519
  const incidentId = `bi:hard:${id}:${Date.now()}`;
1374
1520
  const approvalId = `approval:budget:${id}:${Date.now()}`;
1375
- await this.mg(
1521
+ // SQLite-first: write Approval to SQLite before Memgraph so if Memgraph
1522
+ // is down the approval still exists and budget enforcement still functions (F7).
1523
+ try {
1524
+ hboStore.createApproval({
1525
+ id: approvalId, companyId: this.companyId, type: 'budget_exceeded',
1526
+ title: `Budget exceeded for ${agentId ?? 'global'} — raise limit to resume`,
1527
+ requestedBy: agentId ?? 'agent:ceo', status: 'pending', followUpTaskCreated: false, createdAt: Date.now(),
1528
+ });
1529
+ } catch (storeErr) {
1530
+ log('warn', `BudgetEnforcer: SQLite approval write failed (non-fatal): ${storeErr.message}`);
1531
+ }
1532
+ // Memgraph projections — fire-and-forget so Memgraph downtime does not crash enforce()
1533
+ const _mgThis = this.mg.bind(this);
1534
+ setImmediate(() => _mgThis(
1376
1535
  `CREATE (bi:BudgetIncident {id: $incId, companyId: $cid, policyId: $pid, scopeType: $scopeType, scopeId: $scopeId, thresholdType: 'hard', amountLimit: $limit, amountObserved: $spent, status: 'open', approvalId: $apId, createdAt: datetime()})`,
1377
1536
  { incId: incidentId, cid: this.companyId, pid: id, scopeType: scope, scopeId: agentId ?? scope, limit, spent, apId: approvalId }
1378
- );
1379
- await this.mg(
1537
+ ).catch(e => log('warn', `BudgetEnforcer: BudgetIncident Memgraph projection failed: ${e.message}`)));
1538
+ setImmediate(() => _mgThis(
1380
1539
  `CREATE (a:Approval {id: $id, companyId: $cid, type: 'budget_exceeded', title: $title, requestedBy: $agentId, status: 'pending', followUpTaskCreated: false, createdAt: datetime()})`,
1381
1540
  { id: approvalId, cid: this.companyId, title: `Budget exceeded for ${agentId ?? 'global'} — raise limit to resume`, agentId: agentId ?? 'agent:ceo' }
1382
- );
1541
+ ).catch(e => log('warn', `BudgetEnforcer: Approval Memgraph projection failed: ${e.message}`)));
1383
1542
  // Day 5: Emit BUDGET_EXCEEDED P0 AnomalySignal.
1384
1543
  // OPTIONAL MATCH dedup guard — Tournament winner: Candidate C.
1385
1544
  // P0 because a hard budget stop halts all agent work immediately.
@@ -1463,12 +1622,15 @@ class AgentDispatcher {
1463
1622
  * @param {Function} spawnFn - optional spawn override (for testing)
1464
1623
  * @param {object} _testConfig - optional config override (for testing only, replaces _daemonConfig)
1465
1624
  * @param {object} registry - optional AdapterRegistry instance
1625
+ * @param {Function} broadcastFn - optional closure for SSE broadcast; use (...args) => daemon._broadcast?.(...args)
1626
+ * so the live broadcast reference is resolved at call time, not construction time.
1466
1627
  */
1467
- constructor(mgQuery, companyId, spawnFn = null, _testConfig = null, registry = null) {
1628
+ constructor(mgQuery, companyId, spawnFn = null, _testConfig = null, registry = null, broadcastFn = null) {
1468
1629
  if (!companyId) throw new Error('AgentDispatcher: companyId required');
1469
1630
  this.mg = mgQuery;
1470
1631
  this.companyId = companyId;
1471
1632
  this._spawnFn = spawnFn;
1633
+ this._daemonBroadcast = typeof broadcastFn === 'function' ? broadcastFn : null;
1472
1634
  this._config = _testConfig !== null ? _testConfig : _daemonConfig;
1473
1635
  // M-11: Use per-company config agents, not the module-level first-company alias
1474
1636
  const _perCompanyCfg = _allCompanyConfigs.find(c =>
@@ -1488,13 +1650,18 @@ class AgentDispatcher {
1488
1650
  const agentHelios = heliosConfig.agents?.[agentId] ?? {};
1489
1651
  if (!agentHelios.apiKey) return null;
1490
1652
 
1491
- // Check if issue already exists on task node
1653
+ // Check if issue already exists on task node — SQLite-first (P2-4)
1492
1654
  try {
1493
- const existing = await this.mg(
1494
- `MATCH (t:Task {id: $taskId}) WHERE t.heliosIssueId IS NOT NULL RETURN t.heliosIssueId`,
1495
- { taskId }
1496
- );
1497
- if (existing?.rows?.length) return existing.rows[0][0];
1655
+ const _storeTask = hboStore.getTask ? hboStore.getTask(taskId, this.companyId) : null;
1656
+ if (_storeTask && _storeTask.heliosIssueId) return _storeTask.heliosIssueId;
1657
+ if (!_storeTask) {
1658
+ // Fallback to Memgraph
1659
+ const existing = await this.mg(
1660
+ `MATCH (t:Task {id: $taskId}) WHERE t.heliosIssueId IS NOT NULL RETURN t.heliosIssueId`,
1661
+ { taskId }
1662
+ );
1663
+ if (existing?.rows?.length) return existing.rows[0][0];
1664
+ }
1498
1665
  } catch (e) { /* ignore */ }
1499
1666
 
1500
1667
  // Create new issue in Helios TUI
@@ -1511,11 +1678,13 @@ class AgentDispatcher {
1511
1678
  if (!resp.ok) return null;
1512
1679
  const issue = await resp.json();
1513
1680
  const issueId = issue.id;
1514
- // Store on task node
1515
- await this.mg(
1681
+ // SQLite-first update (P2-4)
1682
+ try { hboStore.updateTask(taskId, this.companyId, { heliosIssueId: issueId }); } catch (_) {}
1683
+ // Non-blocking Memgraph projection (fire-and-forget)
1684
+ setImmediate(() => this.mg(
1516
1685
  `MATCH (t:Task {id: $taskId}) SET t.heliosIssueId = $issueId`,
1517
1686
  { taskId, issueId }
1518
- ).catch(e => log('warn', `Failed to store heliosIssueId on task ${taskId}: ${e.message}`));
1687
+ ).catch(e => log('warn', `Failed to store heliosIssueId on task ${taskId}: ${e.message}`)));
1519
1688
  return issueId;
1520
1689
  } catch (e) {
1521
1690
  log('warn', `_ensureHeliosIssue failed for task ${taskId}: ${e.message}`);
@@ -1647,28 +1816,55 @@ class AgentDispatcher {
1647
1816
  const signalId = sigRow[0] ?? sigRow['signalId'];
1648
1817
  const agentId = sigRow[1] ?? sigRow['agentId'];
1649
1818
  let taskResult;
1819
+ // Hoist taskRow before try so it remains in scope at the outer if (!taskRow) check (T20-B)
1820
+ let taskRow = null;
1650
1821
  try {
1651
- // CONFLICT 004: CEO inbox priority P0 first, then P1, then OKRFeedback-origin, then by priority/createdAt
1652
- taskResult = await this.mg(
1653
- `MATCH (t:Task {status: 'todo', companyId: $companyId, assigneeAgentId: $agentId})
1654
- WHERE t.title IS NOT NULL
1655
- WITH t,
1656
- CASE
1657
- WHEN toInteger(coalesce(t.priority, 3)) = 0 THEN 0
1658
- WHEN toInteger(coalesce(t.priority, 3)) = 1 THEN 1
1659
- WHEN t.originKind = 'okr_feedback' THEN 2
1660
- ELSE toInteger(coalesce(t.priority, 3)) + 2
1661
- END AS sortPriority
1662
- RETURN t.id AS taskId, t.title AS title, t.originKind AS originKind, t.body AS body, t.priority AS priority
1663
- ORDER BY sortPriority ASC, t.createdAt ASC
1664
- LIMIT 1`,
1665
- { companyId: this.companyId, agentId }
1666
- );
1822
+ // Memgraph primarySQLite is the fallback when Memgraph throws (e.g. unavailable)
1823
+ try {
1824
+ // CONFLICT 004: CEO inbox priority — P0 first, then P1, then OKRFeedback-origin, then by priority/createdAt
1825
+ taskResult = await this.mg(
1826
+ `MATCH (t:Task {status: 'todo', companyId: $companyId, assigneeAgentId: $agentId})
1827
+ WHERE t.title IS NOT NULL
1828
+ WITH t,
1829
+ CASE
1830
+ WHEN toInteger(coalesce(t.priority, 3)) = 0 THEN 0
1831
+ WHEN toInteger(coalesce(t.priority, 3)) = 1 THEN 1
1832
+ WHEN t.originKind = 'okr_feedback' THEN 2
1833
+ ELSE toInteger(coalesce(t.priority, 3)) + 2
1834
+ END AS sortPriority
1835
+ RETURN t.id AS taskId, t.title AS title, t.originKind AS originKind, t.body AS body, t.priority AS priority
1836
+ ORDER BY sortPriority ASC, t.createdAt ASC
1837
+ LIMIT 1`,
1838
+ { companyId: this.companyId, agentId }
1839
+ );
1840
+ taskRow = taskResult?.rows?.[0];
1841
+ } catch (mgErr) {
1842
+ // Memgraph unavailable — fall back to SQLite
1843
+ if (hboStore.getTasksByCompanyStatus) {
1844
+ const storeTodos = hboStore.getTasksByCompanyStatus(this.companyId, 'todo')
1845
+ .filter(t => t.assigneeAgentId === agentId && t.title);
1846
+ storeTodos.sort((a, b) => {
1847
+ const _sortPri = t => {
1848
+ const p = parseInt(t.priority ?? 3, 10);
1849
+ if (p === 0) return 0;
1850
+ if (p === 1) return 1;
1851
+ if (t.originKind === 'okr_feedback') return 2;
1852
+ return (isNaN(p) ? 3 : p) + 2;
1853
+ };
1854
+ const diff = _sortPri(a) - _sortPri(b);
1855
+ return diff !== 0 ? diff : (a.createdAt ?? 0) - (b.createdAt ?? 0);
1856
+ });
1857
+ const best = storeTodos.find(t => !claimedTaskIds.has(t.id));
1858
+ if (best) {
1859
+ taskRow = [best.id, best.title, best.originKind ?? null, best.body ?? null, best.priority ?? 3];
1860
+ log('info', `AgentDispatcher: using SQLite fallback for task lookup (Memgraph unavailable): ${mgErr.message}`);
1861
+ }
1862
+ }
1863
+ }
1667
1864
  } catch (err) {
1668
1865
  log('warn', `AgentDispatcher: task lookup failed for agent ${agentId}: ${err.message}`);
1669
1866
  continue;
1670
1867
  }
1671
- const taskRow = taskResult?.rows?.[0];
1672
1868
  if (!taskRow) continue;
1673
1869
  const taskId = taskRow[0] ?? taskRow['taskId'];
1674
1870
  const title = taskRow[1] ?? taskRow['title'];
@@ -1816,9 +2012,10 @@ class AgentDispatcher {
1816
2012
 
1817
2013
  try {
1818
2014
  // Non-blocking dispatch: run task in background so tick doesn't stall.
1819
- // TaskCompletionWatchdog handles timeouts (reverts after taskTimeoutMs).
1820
- // This prevents a single long-running pi session from blocking all ticks.
1821
- const TASK_DISPATCH_TIMEOUT_MS = (_daemonConfig.taskTimeoutMs ?? 300000) + 60000; // taskTimeout + 60s grace
2015
+ // TaskCompletionWatchdog reverts non-TUI tasks after taskTimeoutMs (default 30min).
2016
+ // TUI tasks are reverted after 30min (hardcoded in watchdog).
2017
+ // This Promise.race adds 60s grace above the configured timeout.
2018
+ const TASK_DISPATCH_TIMEOUT_MS = (_daemonConfig.taskTimeoutMs ?? 1800000) + 60000; // taskTimeout + 60s grace
1822
2019
  // TUI-09: pass pre-built contextBrief so TuiWakeupAdapter.execute() skips its own
1823
2020
  // buildContextBrief() call — prevents the expensive double-query on this path.
1824
2021
  const taskPromise = adapter.execute(adapterContext, _adapterContextBrief);
@@ -2245,6 +2442,10 @@ class AgentDispatcher {
2245
2442
  ).catch(err => log('warn', `Failed to clear currentTaskId for ${agentId}: ${err.message}`));
2246
2443
  // Pull-dispatch: agent is free — emit AgentReadySignal so next task can be dispatched.
2247
2444
  await this._emitAgentReadySignal(agentId);
2445
+ // DSP-01: push task.updated SSE event for the direct-spawn completion path.
2446
+ // Uses this._daemonBroadcast (closure passed at construction) — NOT daemon._broadcast
2447
+ // which is not in scope inside AgentDispatcher.
2448
+ this._daemonBroadcast?.({ type: 'task.updated', taskId, status: newStatus, companyId: this.companyId });
2248
2449
  log('info', `Task ${taskId} ${newStatus} (exit code: ${code})`);
2249
2450
  } catch (err) {
2250
2451
  log('warn', `Failed to update task ${taskId} completion: ${err.message}`);
@@ -2604,10 +2805,15 @@ class CostEventSyncer {
2604
2805
  }
2605
2806
 
2606
2807
  class TaskCompletionWatchdog {
2607
- constructor(mgQuery, companyId) {
2808
+ constructor(mgQuery, companyId, taskTimeoutMs = 1800000) {
2608
2809
  if (!companyId) throw new Error('TaskCompletionWatchdog: companyId required');
2609
2810
  this.mg = mgQuery;
2610
2811
  this.companyId = companyId;
2812
+ // Build ISO 8601 duration string for Cypher — passed as a parameter to avoid
2813
+ // hardcoded duration literals. 1800000ms → "PT1800S" (30 minutes default).
2814
+ // This is the non-TUI task timeout; TUI tasks always use the hardcoded PT30M path.
2815
+ this._nonTuiTimeoutDuration = `PT${Math.floor(taskTimeoutMs / 1000)}S`;
2816
+ this._nonTuiTimeoutMins = Math.round(taskTimeoutMs / 60000);
2611
2817
  }
2612
2818
 
2613
2819
  /**
@@ -2615,7 +2821,11 @@ class TaskCompletionWatchdog {
2615
2821
  * Tournament winner: Candidate C — consistent with andon-tier1.js pattern,
2616
2822
  * Memgraph-safe, no MERGE key uniqueness dependency.
2617
2823
  */
2618
- async _emitTaskTimeoutSignal(taskId, agentId) {
2824
+ async _emitTaskTimeoutSignal(taskId, agentId) {
2825
+ // Build message in JavaScript before the Cypher call — avoids nested backtick
2826
+ // template literals (the outer Cypher string is a backtick; inner backtick would
2827
+ // close it prematurely). Passing as $message param is cleaner and correct.
2828
+ const _timeoutMsg = `Task exceeded ${this._nonTuiTimeoutMins}-minute completion timeout — reverted to todo`;
2619
2829
  try {
2620
2830
  await this.mg(
2621
2831
  `OPTIONAL MATCH (existing:AnomalySignal {taskId: $taskId, signalType: 'TASK_TIMEOUT', status: 'open'})
@@ -2630,12 +2840,12 @@ class TaskCompletionWatchdog {
2630
2840
  source: 'watchdog',
2631
2841
  signalType: 'TASK_TIMEOUT',
2632
2842
  severity: 'P1',
2633
- message: 'Task exceeded 5-minute completion timeout — reverted to todo',
2843
+ message: $message,
2634
2844
  detectedAt: datetime(),
2635
2845
  status: 'open'
2636
2846
  })`,
2637
- { taskId, agentId: agentId ?? 'unknown', cid: this.companyId }
2638
- );
2847
+ { taskId, agentId: agentId ?? 'unknown', cid: this.companyId, message: _timeoutMsg }
2848
+ );
2639
2849
  } catch (err) {
2640
2850
  // Log at warn — a silent failure here leaves the Andon board stale.
2641
2851
  const msg = JSON.stringify({ ts: new Date().toISOString(), level: 'warn', module: 'TaskCompletionWatchdog', msg: `TASK_TIMEOUT AnomalySignal write failed for ${taskId}: ${err.message}` });
@@ -2648,10 +2858,10 @@ class TaskCompletionWatchdog {
2648
2858
  // TUI tasks can take >5 min (model probe + execution). Reverting them causes infinite cycling.
2649
2859
  const stale = await this.mg(
2650
2860
  `MATCH (t:Task {status: 'in_progress', companyId: $cid})
2651
- WHERE t.executionLockedAt < datetime() - duration("PT5M")
2861
+ WHERE t.executionLockedAt < datetime() - duration($timeout)
2652
2862
  AND (t.dispatchedViaTUI IS NULL OR t.dispatchedViaTUI = false)
2653
2863
  RETURN t.id, t.title, t.executionAgentId`,
2654
- { cid: this.companyId }
2864
+ { cid: this.companyId, timeout: this._nonTuiTimeoutDuration }
2655
2865
  );
2656
2866
 
2657
2867
  const rows = stale?.rows ?? [];
@@ -2669,10 +2879,14 @@ class TaskCompletionWatchdog {
2669
2879
  { taskId }
2670
2880
  );
2671
2881
 
2882
+ // Sync SQLite fallback store — keeps hbo-core.db consistent when Memgraph is unavailable.
2883
+ // Memgraph is primary; SQLite is the fallback path only (AgentDispatcher line 1724).
2884
+ try { hboStore.updateTask(taskId, this.companyId, { status: 'todo', executionLockedAt: null, executionAgentId: null }); } catch (_) {}
2885
+
2672
2886
  // Preserve PDSACycle with abandon decision — losing the PDSA context on
2673
2887
  // revert would break the learning loop. Mark as abandoned so PDSACompletion
2674
2888
  // doesn't re-process it and KnowledgeAsset extraction is skipped correctly.
2675
- await this._mgQueryAsync(
2889
+ await this.mg(
2676
2890
  `MATCH (p:PDSACycle {taskId: $taskId})
2677
2891
  WHERE p.actDecision = 'iterate' OR p.actDecision IS NULL
2678
2892
  SET p.actDecision = 'abandon',
@@ -2691,7 +2905,28 @@ class TaskCompletionWatchdog {
2691
2905
  // OPTIONAL MATCH dedup guard prevents duplicate signals for the same task.
2692
2906
  await this._emitTaskTimeoutSignal(taskId, agentId);
2693
2907
 
2694
- log('warn', `Task ${taskId} timed out after 5min — reverting to todo`);
2908
+ log('warn', `Task ${taskId} timed out after ${this._nonTuiTimeoutMins}min — reverting to todo`);
2909
+
2910
+ // Re-emit AgentReadySignal so the agent can be dispatched again on the next tick.
2911
+ // Without this, the agent stays idle forever after a timeout event.
2912
+ // Uses the same idempotent OPTIONAL MATCH pattern as AgentDispatcher._emitAgentReadySignal.
2913
+ if (agentId) {
2914
+ await this.mg(
2915
+ `MATCH (a:BusinessAgent {id: $agentId, companyId: $cid, status: 'active'})
2916
+ OPTIONAL MATCH (existing:AgentReadySignal {agentId: $agentId, companyId: $cid, status: 'pending'})
2917
+ WITH a, existing
2918
+ WHERE existing IS NULL
2919
+ CREATE (s:AgentReadySignal {
2920
+ id: randomUUID(),
2921
+ agentId: $agentId,
2922
+ companyId: $cid,
2923
+ status: 'pending',
2924
+ claimedBy: null,
2925
+ createdAt: datetime()
2926
+ })`,
2927
+ { agentId, cid: this.companyId }
2928
+ ).catch(() => {});
2929
+ }
2695
2930
  }
2696
2931
 
2697
2932
  // Timeout TUI-dispatched tasks stuck >30min
@@ -2710,8 +2945,11 @@ class TaskCompletionWatchdog {
2710
2945
  `MATCH (t:Task {id: $taskId}) SET t.status = 'todo', t.executionLockedAt = null, t.executionAgentId = null, t.dispatchedViaTUI = null, t.heliosRunId = null, t.timedOutAt = datetime() RETURN t.id`,
2711
2946
  { taskId }
2712
2947
  );
2948
+
2949
+ // Sync SQLite fallback store — also null dispatchedViaTUI and heliosRunId for TUI tasks.
2950
+ try { hboStore.updateTask(taskId, this.companyId, { status: 'todo', executionLockedAt: null, executionAgentId: null, dispatchedViaTUI: null, heliosRunId: null }); } catch (_) {}
2713
2951
  // Preserve PDSACycle with abandon decision for TUI-timed-out tasks.
2714
- await this._mgQueryAsync(
2952
+ await this.mg(
2715
2953
  `MATCH (p:PDSACycle {taskId: $taskId})
2716
2954
  WHERE p.actDecision = 'iterate' OR p.actDecision IS NULL
2717
2955
  SET p.actDecision = 'abandon',
@@ -2723,6 +2961,25 @@ class TaskCompletionWatchdog {
2723
2961
  // Also emit for TUI-timed-out tasks (30-min threshold variant).
2724
2962
  await this._emitTaskTimeoutSignal(taskId, agentId);
2725
2963
  log('warn', `TaskCompletionWatchdog: TUI task ${taskId} timed out after 30min — reverting to todo`);
2964
+
2965
+ // Re-emit AgentReadySignal so the agent can be dispatched again on the next tick.
2966
+ if (agentId) {
2967
+ await this.mg(
2968
+ `MATCH (a:BusinessAgent {id: $agentId, companyId: $cid, status: 'active'})
2969
+ OPTIONAL MATCH (existing:AgentReadySignal {agentId: $agentId, companyId: $cid, status: 'pending'})
2970
+ WITH a, existing
2971
+ WHERE existing IS NULL
2972
+ CREATE (s:AgentReadySignal {
2973
+ id: randomUUID(),
2974
+ agentId: $agentId,
2975
+ companyId: $cid,
2976
+ status: 'pending',
2977
+ claimedBy: null,
2978
+ createdAt: datetime()
2979
+ })`,
2980
+ { agentId, cid: this.companyId }
2981
+ ).catch(() => {});
2982
+ }
2726
2983
  }
2727
2984
  }
2728
2985
  }
@@ -2733,10 +2990,15 @@ class TaskCompletionWatchdog {
2733
2990
  // Reverted twice (418b606f, 1bf902d0) — protected by __tests__/tui-integration.test.js
2734
2991
  // See: .sisyphus/plans/talisman-daemon-wave8.md (EC-03)
2735
2992
  class RunCompletionPoller {
2736
- constructor(mgQuery, companyId) {
2993
+ // broadcastFn: optional closure — use (...args) => daemon._broadcast?.(...args) so the
2994
+ // live SSE function is resolved at call time (not at construction time, when it is still null).
2995
+ // completionProcessor: optional TaskCompletionProcessor instance for post-completion pipeline.
2996
+ constructor(mgQuery, companyId, broadcastFn, completionProcessor) {
2737
2997
  if (!companyId) throw new Error('RunCompletionPoller: companyId required');
2738
2998
  this.mg = mgQuery;
2739
2999
  this.companyId = companyId;
3000
+ this._broadcastFn = typeof broadcastFn === 'function' ? broadcastFn : null;
3001
+ this._cp = completionProcessor ?? null;
2740
3002
  }
2741
3003
 
2742
3004
  async poll() {
@@ -2789,6 +3051,8 @@ class RunCompletionPoller {
2789
3051
  const newStatus = (run.status === 'completed' || run.status === 'succeeded') ? 'done' : 'failed';
2790
3052
  // FINDING-2 fix: criticalOp wrapper — revert to todo if markDone write fails,
2791
3053
  // then re-emit AgentReadySignal so the agent is not permanently locked.
3054
+ // markDoneOk tracks success so downstream pipeline and broadcast are skipped on revert.
3055
+ let markDoneOk = true;
2792
3056
  await criticalOp(
2793
3057
  () => this.mg(
2794
3058
  `MATCH (t:Task {id: $taskId})
@@ -2798,6 +3062,7 @@ class RunCompletionPoller {
2798
3062
  ),
2799
3063
  { module: 'RunCompletionPoller', operation: 'markDone', taskId, companyId: this.companyId }
2800
3064
  ).catch(async (markDoneErr) => {
3065
+ markDoneOk = false;
2801
3066
  log('warn', `RunCompletionPoller: markDone failed for ${taskId} — reverting to todo: ${markDoneErr.message}`);
2802
3067
  await this.mg(
2803
3068
  `MATCH (t:Task {id: $taskId}) SET t.status = 'todo', t.executionLockedAt = null, t.executionAgentId = null, t.dispatchedViaTUI = null, t.heliosRunId = null`,
@@ -2805,11 +3070,26 @@ class RunCompletionPoller {
2805
3070
  ).catch(revertErr => log('warn', `RunCompletionPoller: failed reverting to todo for ${taskId}: ${revertErr.message}`));
2806
3071
  await this._emitAgentReadySignal(agentId).catch(() => {});
2807
3072
  });
3073
+ // If markDone was reverted, skip completion pipeline and broadcast — task is back in todo.
3074
+ if (!markDoneOk) continue;
2808
3075
  await this.mg(
2809
3076
  `MATCH (a:BusinessAgent {id: $agentId, companyId: $cid}) SET a.lastHeartbeatAt = toInteger(timestamp() / 1000)`,
2810
3077
  { agentId, cid: this.companyId }
2811
3078
  ).catch(err => log('warn', `RunCompletionPoller: failed to update heartbeat for ${agentId}: ${err.message}`));
2812
3079
  log('info', `Task ${taskId} ${newStatus} (TUI run ${runId} → ${run.status})`);
3080
+ // RCP-01: run completion pipeline (TaskResult, OKR progress, CRM, PDSA) for TUI-completed tasks.
3081
+ if (newStatus === 'done' && this._cp) {
3082
+ this._cp.process(taskId, run.summary ?? '', {
3083
+ agentId,
3084
+ companyId: this.companyId,
3085
+ originKind: run.originKind ?? 'tui_wakeup',
3086
+ exitCode: 0,
3087
+ }).catch(e => log('warn', `RunCompletionPoller: completionProcessor failed for ${taskId}: ${e.message}`));
3088
+ }
3089
+ // RCP-02: push task.updated SSE event so the desktop UI updates reactively without polling.
3090
+ // _broadcastFn is a closure ((...args) => this._broadcast?.(...args)) set at start() time,
3091
+ // so it correctly resolves the live broadcast reference even if SSE server started after construction.
3092
+ this._broadcastFn?.({ type: 'task.updated', taskId, status: newStatus, companyId: this.companyId });
2813
3093
  // TUI-01: emit AgentReadySignal so the agent picks up the next task immediately.
2814
3094
  await this._emitAgentReadySignal(agentId).catch(() => {});
2815
3095
  } catch (e) {
@@ -2868,12 +3148,26 @@ class ApprovalWatcher {
2868
3148
  async check() {
2869
3149
  let approved;
2870
3150
  try {
2871
- approved = await this.mg(
2872
- `MATCH (a:Approval {companyId: $cid, status: 'approved'})
2873
- WHERE a.followUpTaskCreated IS NULL OR a.followUpTaskCreated = false
2874
- RETURN a.id, a.title, a.requestedBy, a.type, a.strategyId`,
2875
- { cid: this.companyId }
2876
- );
3151
+ // Memgraph primary — SQLite fallback on unavailability
3152
+ try {
3153
+ approved = await this.mg(
3154
+ `MATCH (a:Approval {companyId: $cid, status: 'approved'})
3155
+ WHERE a.followUpTaskCreated IS NULL OR a.followUpTaskCreated = false
3156
+ RETURN a.id, a.title, a.requestedBy, a.type, a.strategyId`,
3157
+ { cid: this.companyId }
3158
+ );
3159
+ } catch (mgErr) {
3160
+ // Memgraph unavailable — fall back to SQLite approvals
3161
+ if (hboStore.getApprovalsByCompanyStatus) {
3162
+ const storeApprovals = hboStore.getApprovalsByCompanyStatus(this.companyId, 'approved')
3163
+ .filter(a => !a.followUpTaskCreated);
3164
+ approved = {
3165
+ rows: storeApprovals.map(a => [a.id, a.title, a.requestedBy, a.type, a.strategyId ?? null]),
3166
+ keys: ['a.id', 'a.title', 'a.requestedBy', 'a.type', 'a.strategyId'],
3167
+ };
3168
+ log('info', `ApprovalWatcher: using SQLite fallback for approval lookup (Memgraph unavailable): ${mgErr.message}`);
3169
+ }
3170
+ }
2877
3171
  } catch (err) {
2878
3172
  log('warn', `ApprovalWatcher: query failed: ${err.message}`);
2879
3173
  return;
@@ -2891,41 +3185,55 @@ class ApprovalWatcher {
2891
3185
  const requestedBy = approval['a.requestedBy'] ?? 'agent:ceo';
2892
3186
  const approvalType = approval['a.type'];
2893
3187
  const taskId = `task:approval-followup:${approvalId}:${randomUUID()}`;
2894
-
2895
3188
  try {
2896
3189
  if (approvalType === 'budget_exceeded') {
2897
- await this.mg(
3190
+ // Non-blocking Memgraph side-effects — fire-and-forget so Memgraph downtime
3191
+ // does not prevent SQLite writes (task creation + approval update) below.
3192
+ const _mgThis = this.mg.bind(this);
3193
+ setImmediate(() => _mgThis(
2898
3194
  `MATCH (bi:BudgetIncident {approvalId: $apId}) SET bi.status = 'resolved', bi.resolvedAt = datetime()`,
2899
3195
  { apId: approvalId }
2900
- );
2901
- await this.mg(
3196
+ ).catch(e => log('warn', `ApprovalWatcher: BudgetIncident resolve projection failed: ${e.message}`)));
3197
+ setImmediate(() => _mgThis(
2902
3198
  `MATCH (a:BusinessAgent {id: $agentId, companyId: $cid}) WHERE a.pauseReason IN ['budget_exceeded', 'budget_exceeded_global'] SET a.status = 'active', a.pauseReason = null, a.resumedAt = datetime()`,
2903
3199
  { agentId: requestedBy, cid: this.companyId }
2904
- );
3200
+ ).catch(e => log('warn', `ApprovalWatcher: agent resume projection failed: ${e.message}`)));
2905
3201
  log('info', `ApprovalWatcher: budget approval resolved — agent ${requestedBy} resumed`);
2906
3202
  }
2907
3203
 
2908
3204
  if (approvalType === 'strategy_proposal') {
2909
3205
  const strategyId = approval['a.strategyId'];
2910
3206
  if (strategyId) {
2911
- await this.mg(
3207
+ setImmediate(() => this.mg(
2912
3208
  `MATCH (s:Strategy {id: $strategyId}) SET s.status = 'approved', s.approvedAt = datetime()`,
2913
3209
  { strategyId }
2914
- );
3210
+ ).catch(e => log('warn', `ApprovalWatcher: strategy approve projection failed: ${e.message}`)));
2915
3211
  log('info', `ApprovalWatcher: strategy ${strategyId} approved`);
2916
3212
  }
2917
- }
3213
+ }
2918
3214
 
2919
- await this.mg(
3215
+ // SQLite-first task create (P2-5)
3216
+ try {
3217
+ hboStore.createTask({
3218
+ id: taskId, title: `Approval resolved: ${title}. Execute the approved plan.`,
3219
+ status: 'todo', assigneeAgentId: requestedBy, companyId: this.companyId,
3220
+ originKind: 'approval_resolved', approvalId, progressPropagated: false, createdAt: Date.now(),
3221
+ });
3222
+ } catch (_) {}
3223
+ // Non-blocking Memgraph projection (fire-and-forget)
3224
+ setImmediate(() => this.mg(
2920
3225
  `MERGE (t:Task {id: $taskId}) SET t.title = $title, t.status = 'todo',
2921
3226
  t.assigneeAgentId = $agentId, t.companyId = $cid, t.originKind = 'approval_resolved',
2922
3227
  t.approvalId = $approvalId, t.progressPropagated = false, t.createdAt = datetime()`,
2923
3228
  { taskId, title: `Approval resolved: ${title}. Execute the approved plan.`, agentId: requestedBy, cid: this.companyId, approvalId }
2924
- );
2925
- await this.mg(
3229
+ ).catch(e => log('warn', `[daemon] Memgraph Task projection failed (non-fatal): ${e.message}`)));
3230
+ // SQLite-first approval update (P2-5)
3231
+ try { hboStore.updateApproval(approvalId, this.companyId, { followUpTaskCreated: true }); } catch (_) {}
3232
+ // Non-blocking Memgraph projection (fire-and-forget)
3233
+ setImmediate(() => this.mg(
2926
3234
  `MATCH (a:Approval {id: $approvalId}) SET a.followUpTaskCreated = true`,
2927
3235
  { approvalId }
2928
- );
3236
+ ).catch(e => log('warn', `[daemon] Memgraph Approval projection failed (non-fatal): ${e.message}`)));
2929
3237
  log('info', `ApprovalWatcher: created follow-up task ${taskId} for approval ${approvalId}`);
2930
3238
  } catch (err) {
2931
3239
  log('warn', `ApprovalWatcher: failed to create follow-up for ${approvalId}: ${err.message}`);
@@ -2952,7 +3260,7 @@ class ApprovalWatcher {
2952
3260
  function buildForCompany(companyId, mgQueryAsync, opts) {
2953
3261
  if (!companyId) throw new Error('buildForCompany: companyId required');
2954
3262
  if (!mgQueryAsync) throw new Error('buildForCompany: mgQueryAsync required');
2955
- const { rpcAdapter, companyConfig = null } = opts || {};
3263
+ const { rpcAdapter, companyConfig = null, broadcast = null } = opts || {};
2956
3264
  const cid = companyId;
2957
3265
 
2958
3266
  // Helper: safe require + construct, non-fatal
@@ -2983,10 +3291,10 @@ function buildForCompany(companyId, mgQueryAsync, opts) {
2983
3291
  try { mods.costEventSyncer = new CostEventSyncer(mgQueryAsync, cid); }
2984
3292
  catch (e) { log('warn', `[module-factory] CostEventSyncer init failed for ${cid}: ${e.message}`); mods.costEventSyncer = null; }
2985
3293
 
2986
- try { mods.taskCompletionWatchdog = new TaskCompletionWatchdog(mgQueryAsync, cid); }
3294
+ try { mods.taskCompletionWatchdog = new TaskCompletionWatchdog(mgQueryAsync, cid, _daemonConfig.taskTimeoutMs ?? 1800000); }
2987
3295
  catch (e) { log('warn', `[module-factory] TaskCompletionWatchdog init failed for ${cid}: ${e.message}`); mods.taskCompletionWatchdog = null; }
2988
3296
 
2989
- try { mods.runCompletionPoller = new RunCompletionPoller(mgQueryAsync, cid); }
3297
+ try { mods.runCompletionPoller = new RunCompletionPoller(mgQueryAsync, cid, typeof broadcast === 'function' ? (...args) => broadcast(...args) : null, new TaskCompletionProcessor({ mgQuery: mgQueryAsync })); }
2990
3298
  catch (e) { log('warn', `[module-factory] RunCompletionPoller init failed for ${cid}: ${e.message}`); mods.runCompletionPoller = null; }
2991
3299
 
2992
3300
  try { mods.activityLogger = new ActivityLogger(mgQueryAsync, cid); }
@@ -3052,9 +3360,13 @@ function buildForCompany(companyId, mgQueryAsync, opts) {
3052
3360
  mods.sacrificeDeclaration = safeNew('SacrificeDeclaration', './lib/harada/sacrifice-declaration', 'SacrificeDeclaration', mgQueryAsync, cid);
3053
3361
 
3054
3362
  try {
3363
+ // MirrorPatternScan runs on a P7D (weekly) wall-clock schedule via WallClockScheduler
3364
+ // in addition to the per-10-tasks trigger below. Weekly cadence ensures agents who
3365
+ // complete few tasks still receive periodic mirror feedback.
3055
3366
  const { KataSessionPrompt, MasteryCheck, MirrorPatternScan } = require('./lib/harada/sensei');
3056
3367
  mods.kataSessionPrompt = new KataSessionPrompt(mgQueryAsync, cid);
3057
3368
  mods.masteryCheck = new MasteryCheck(mgQueryAsync, cid);
3369
+ // P7D weekly wall-clock guard: MirrorPatternScan is also scheduled weekly via WallClockScheduler
3058
3370
  mods.mirrorPatternScan = new MirrorPatternScan(mgQueryAsync, cid);
3059
3371
  } catch (e) {
3060
3372
  log('warn', `[module-factory] Harada Sensei init failed for ${cid}: ${e.message}`);
@@ -3210,7 +3522,7 @@ class HeliosCompanyDaemon {
3210
3522
  const cid = cfg.company?.id || cfg.companyName;
3211
3523
  if (!cid) continue;
3212
3524
  try {
3213
- const mods = buildForCompany(cid, this._mgQueryAsync.bind(this), { rpcAdapter: this._rpcAdapter, companyConfig: cfg });
3525
+ const mods = buildForCompany(cid, this._mgQueryAsync.bind(this), { rpcAdapter: this._rpcAdapter, companyConfig: cfg, broadcast: (...args) => this._broadcast?.(...args) });
3214
3526
  this._modulesByCompany.set(cid, mods);
3215
3527
  log('info', `_initModules: per-company modules built for '${cid}'`);
3216
3528
  } catch (e) {
@@ -3255,9 +3567,62 @@ class HeliosCompanyDaemon {
3255
3567
  const mods = buildForCompany(cid, this._mgQueryAsync.bind(this), {
3256
3568
  rpcAdapter: this._rpcAdapter,
3257
3569
  companyConfig: null, // not available for runtime-added companies
3570
+ broadcast: (...args) => this._broadcast?.(...args),
3258
3571
  });
3259
3572
  this._modulesByCompany.set(cid, mods);
3260
3573
  log('info', `[registerCompany] per-company modules live for '${cid}'`);
3574
+
3575
+ // Wire the SSE broadcast function into HBOBridge now that the API server is running.
3576
+ // buildForCompany passes companyConfig (null) as the 4th arg to HBOBridge — not the
3577
+ // broadcast function — so _bc is null after construction. setBroadcast() sets it here,
3578
+ // enabling wizard:pillars_ready and other real-time events to reach the desktop.
3579
+ if (mods.hboBridge && typeof mods.hboBridge.setBroadcast === 'function' && typeof this._broadcast === 'function') {
3580
+ mods.hboBridge.setBroadcast(this._broadcast);
3581
+ log('info', `[registerCompany] HBOBridge broadcast wired for '${cid}'`);
3582
+ }
3583
+
3584
+ // Trigger GoalPillar creation immediately after modules are registered.
3585
+ // tickGoalDecompose creates the 8 GoalPillar + 64 ActionCell nodes via MandalaManager.
3586
+ // This must run AFTER _modulesByCompany.set() so mods.hboBridge is available.
3587
+ // This is the correct point to run it — after wizard completes and registerCompany is
3588
+ // called from the desktop, _modulesByCompany is guaranteed to have this company's entry.
3589
+ if (mods.hboBridge && typeof mods.hboBridge.tickGoalDecompose === 'function') {
3590
+ setImmediate(async () => {
3591
+ try {
3592
+ await mods.hboBridge.tickGoalDecompose({ fromWizard: true });
3593
+ log('info', `[registerCompany] tickGoalDecompose complete for '${cid}'`);
3594
+ // Emit wizard:pillars_ready via SSE bridge if broadcast is wired.
3595
+ if (typeof this._broadcast === 'function') {
3596
+ this._broadcast({
3597
+ type: 'wizard:pillars_ready',
3598
+ companyId: cid,
3599
+ pillarCount: 8,
3600
+ ts: Date.now(),
3601
+ });
3602
+ } else {
3603
+ // D1: _broadcast may not yet be wired if API server started after registerCompany
3604
+ // (startup race condition). Retry once after 1s before giving up.
3605
+ log('warn', `[registerCompany] wizard:pillars_ready not broadcast for '${cid}' — _broadcast not yet wired, retrying in 1s`);
3606
+ const self = this;
3607
+ setTimeout(() => {
3608
+ if (typeof self._broadcast === 'function') {
3609
+ self._broadcast({
3610
+ type: 'wizard:pillars_ready',
3611
+ companyId: cid,
3612
+ pillarCount: 8,
3613
+ ts: Date.now(),
3614
+ });
3615
+ log('info', `[registerCompany] wizard:pillars_ready broadcast on retry for '${cid}'`);
3616
+ } else {
3617
+ log('warn', `[registerCompany] wizard:pillars_ready not broadcast after retry for '${cid}' — SSE events will be missed`);
3618
+ }
3619
+ }, 1000);
3620
+ }
3621
+ } catch (e) {
3622
+ log('warn', `[registerCompany] tickGoalDecompose failed for '${cid}' (non-fatal): ${e.message}`);
3623
+ }
3624
+ });
3625
+ }
3261
3626
  } catch (e) {
3262
3627
  log('warn', `[registerCompany] buildForCompany failed for '${cid}': ${e.message}`);
3263
3628
  throw e; // surface to the HTTP handler — caller can retry
@@ -4035,10 +4400,30 @@ class HeliosCompanyDaemon {
4035
4400
  // CRITICAL-3 fix: wait for Memgraph to be reachable before running migrations.
4036
4401
  // Without this, PM2 can restart the daemon before Memgraph is up post-OOM,
4037
4402
  // and all migrations + MAGE backfill silently fail, leaving the graph in a partial state.
4038
- await this._waitForMemgraph();
4039
- await this._connectMemgraph();
4040
-
4041
- // ── TZ sanity check ─────────────────────────────────────────────────────────
4403
+ await this._waitForMemgraph();
4404
+ await this._connectMemgraph();
4405
+
4406
+ try {
4407
+ const replay = await graphWal.replayPending(this._mgQueryAsync.bind(this));
4408
+ log('info', 'WAL replay complete', replay);
4409
+ } catch (err) {
4410
+ log('error', 'WAL replay failed', { error: err.message });
4411
+ }
4412
+
4413
+ const migrateScript = path.join(DAEMON_DIR, 'db', 'hbo-core-migrate.js');
4414
+ execFile(process.execPath, ['--experimental-sqlite', migrateScript], {
4415
+ cwd: HELIOS_ROOT,
4416
+ timeout: 60_000,
4417
+ env: process.env,
4418
+ }, (err, stdout, stderr) => {
4419
+ if (err) {
4420
+ log('warn', 'hbo-core reconcile failed', { error: err.message, stderr: String(stderr || '').slice(0, 1000) });
4421
+ return;
4422
+ }
4423
+ log('info', 'hbo-core reconcile complete', { stdout: String(stdout || '').slice(0, 1000) });
4424
+ });
4425
+
4426
+ // ── TZ sanity check ─────────────────────────────────────────────────────────
4042
4427
  if (!process.env.TZ || process.env.TZ !== 'UTC') {
4043
4428
  log('warn', `[startup] TZ=${process.env.TZ} — recommend TZ=UTC for consistent datetime comparisons. See AGENTS.md.`);
4044
4429
  }
@@ -4165,7 +4550,13 @@ class HeliosCompanyDaemon {
4165
4550
  try {
4166
4551
  const { verifyMemgraphConfig } = require('./lib/memgraph-verify');
4167
4552
  const vr = await verifyMemgraphConfig(this._mgQueryAsync.bind(this));
4168
- log('info', `Memgraph config verified: ${vr.serverTime} (timestamp unit: ${vr.timestampUnit})`);
4553
+ if (vr.timestampUnit === 'microseconds') {
4554
+ log('info', `Memgraph config verified: ${vr.serverTime} (timestamp unit: ${vr.timestampUnit})`);
4555
+ } else {
4556
+ // Non-fatal: zombie-detection uses toInteger(timestamp()/1000) consistently
4557
+ // on both SET and GET, so the unit cancels out and comparisons remain correct.
4558
+ log('warn', `Memgraph config verified with non-standard timestamp unit: ${vr.timestampUnit} (server: ${vr.serverTime}) — zombie-detection unaffected`);
4559
+ }
4169
4560
  } catch (e) {
4170
4561
  const msg = String(e.message || e);
4171
4562
  // Detect connection-level failures — Memgraph is not running yet
@@ -4198,24 +4589,31 @@ class HeliosCompanyDaemon {
4198
4589
  }
4199
4590
  }
4200
4591
 
4201
- // ── Headroom compression proxy (required service) ─────────────────────────
4202
- // Starts the local Headroom proxy sidecar that intercepts all LLM traffic and
4203
- // compresses tool outputs, HEMA recall payloads, and HBO API responses before
4204
- // they reach the model. This is a required service if it fails to start,
4205
- // the daemon exits with a clear error message (same pattern as verifyMemgraphConfig).
4592
+ // ── Helios Compression Server ─────────────────────────────────────────────
4593
+ // Starts the TypeScript compression sidecar (lib/compression/server.ts).
4594
+ // Compresses tool outputs, HEMA recall payloads, and HBO API responses
4595
+ // before they reach the LLMreducing token cost by 40–85%.
4596
+ //
4597
+ // Non-fatal: if the server fails to start, the daemon continues without
4598
+ // compression. All compression call sites check getBaseUrl() === null and
4599
+ // skip compression gracefully. HBO still works, agents still function.
4600
+ // This matches the principle: compression improves the system but is not
4601
+ // a correctness dependency — every company's data remains accessible.
4206
4602
  //
4207
- // Auto-restarts on mid-session crash (Option 1) with exponential backoff.
4208
- // getBaseUrl() is read at call time by all LLM path wrappers — they pick up
4209
- // the new URL automatically after a proxy restart.
4603
+ // Auto-restarts on mid-session crash with exponential backoff.
4210
4604
  try {
4211
4605
  const { HeadroomProxyManager } = require('./lib/headroom-proxy-manager');
4212
4606
  this._headroomProxy = HeadroomProxyManager.getInstance();
4213
4607
  const hr = await this._headroomProxy.start();
4214
- log('info', `Headroom compression proxy ready: ${hr.baseUrl}`);
4608
+ log('info', `Helios Compression Server ready: ${hr.baseUrl}`);
4215
4609
  } catch (e) {
4216
- log('error', `Headroom compression proxy FAILED to start:\n${e.message}`);
4217
- // Same comment as verifyMemgraphConfig: do NOT remove daemon.lock
4218
- process.exit(1);
4610
+ log('warn',
4611
+ `Helios Compression Server failed to start daemon continues without compression.\n` +
4612
+ `Token costs will be higher until the server is fixed and the daemon restarts.\n` +
4613
+ `Error: ${e.message}`
4614
+ );
4615
+ // Do NOT exit — compression is an optimization, not a correctness dependency.
4616
+ // All call sites handle null baseUrl gracefully.
4219
4617
  }
4220
4618
 
4221
4619
  // ── Redis maxmemory verification (warn if unset) ──────────────────────────
@@ -4277,6 +4675,58 @@ class HeliosCompanyDaemon {
4277
4675
  log('warn', `Daemon-online heartbeat reset failed (non-fatal): ${e.message}`);
4278
4676
  }
4279
4677
 
4678
+ // ── Startup AgentReadySignal sweep — all companies ──────────────────────────
4679
+ // Emits a pending AgentReadySignal for every active agent that has neither
4680
+ // a pending signal nor a current in-progress task.
4681
+ //
4682
+ // Primary path: daemon restarts → signals emitted for idle agents → next tick
4683
+ // dispatches tasks to all active agents → no stranded agents after restart.
4684
+ //
4685
+ // Uses OPTIONAL MATCH ... WHERE existing IS NULL (idempotent, same pattern as
4686
+ // _emitAgentReadySignal). NOT EXISTS subqueries are not supported in Memgraph 3.
4687
+ try {
4688
+ let _sweepEmitted = 0;
4689
+ let _sweepAttempted = 0;
4690
+ for (const cfg of _allCompanyConfigs) {
4691
+ const cid = cfg.company?.id || cfg.companyName;
4692
+ if (!cid) continue;
4693
+ // Find active agents with no pending signal and no in-progress task.
4694
+ const _candidates = await this._mgQueryAsync(
4695
+ `MATCH (a:BusinessAgent {companyId: $cid, status: 'active'})
4696
+ OPTIONAL MATCH (sig:AgentReadySignal {agentId: a.id, companyId: $cid, status: 'pending'})
4697
+ OPTIONAL MATCH (wip:Task {assigneeAgentId: a.id, companyId: $cid})
4698
+ WHERE wip.status IN ['in_progress', 'andon_paused', 'help_pending']
4699
+ WITH a, sig, wip
4700
+ WHERE sig IS NULL AND wip IS NULL
4701
+ RETURN a.id AS agentId`,
4702
+ { cid }
4703
+ ).catch(() => null);
4704
+ for (const row of (_candidates?.rows ?? [])) {
4705
+ const agentId = Array.isArray(row) ? row[0] : row?.agentId;
4706
+ if (!agentId) continue;
4707
+ _sweepAttempted++;
4708
+ await this._mgQueryAsync(
4709
+ `MATCH (a:BusinessAgent {id: $agentId, companyId: $cid, status: 'active'})
4710
+ OPTIONAL MATCH (existing:AgentReadySignal {agentId: $agentId, companyId: $cid, status: 'pending'})
4711
+ WITH a, existing
4712
+ WHERE existing IS NULL
4713
+ CREATE (s:AgentReadySignal {
4714
+ id: randomUUID(),
4715
+ agentId: $agentId,
4716
+ companyId: $cid,
4717
+ status: 'pending',
4718
+ claimedBy: null,
4719
+ createdAt: datetime()
4720
+ })`,
4721
+ { agentId, cid }
4722
+ ).then(() => { _sweepEmitted++; }).catch(() => {});
4723
+ }
4724
+ }
4725
+ log('info', `Startup AgentReadySignal sweep: ${_sweepEmitted}/${_sweepAttempted} signals emitted for ${_allCompanyConfigs.length} company(ies)`);
4726
+ } catch (e) {
4727
+ log('warn', `Startup AgentReadySignal sweep failed (non-fatal): ${e.message}`);
4728
+ }
4729
+
4280
4730
  // ── CV-T fix: reset stale in-progress RoutineRun nodes on restart ─────────
4281
4731
  // RoutineRun nodes with status 'queued' or 'running' from a previous daemon
4282
4732
  // session are permanently stuck — the process that started them is gone.
@@ -4467,10 +4917,24 @@ class HeliosCompanyDaemon {
4467
4917
  registry.register('tui_wakeup', new TuiWakeupAdapter(heliosConfig, this._mgQueryAsync.bind(this), _companyConfig.agents ?? []));
4468
4918
  registry.register('process', new ProcessAdapter());
4469
4919
 
4470
- this._agentDispatcher = new AgentDispatcher(this._mgQueryAsync.bind(this), primaryCompanyId, null, null, registry);
4920
+ this._agentDispatcher = new AgentDispatcher(
4921
+ this._mgQueryAsync.bind(this),
4922
+ primaryCompanyId,
4923
+ null, // spawnFn
4924
+ null, // _testConfig
4925
+ registry,
4926
+ (...args) => this._broadcast?.(...args) // broadcastFn — closure so live ref resolved at call time
4927
+ );
4471
4928
  this._activityLogger = new ActivityLogger(this._mgQueryAsync.bind(this), primaryCompanyId);
4472
- this._taskCompletionWatchdog = new TaskCompletionWatchdog(this._mgQueryAsync.bind(this), primaryCompanyId);
4473
- this._runCompletionPoller = new RunCompletionPoller(this._mgQueryAsync.bind(this), primaryCompanyId);
4929
+ this._taskCompletionWatchdog = new TaskCompletionWatchdog(this._mgQueryAsync.bind(this), primaryCompanyId, _daemonConfig.taskTimeoutMs ?? 1800000);
4930
+ // Pass a closure for broadcastFn so it resolves daemon._broadcast at call time (not at construction).
4931
+ // Pass _agentDispatcher._completionProcessor so TUI-completed tasks run the full completion pipeline.
4932
+ this._runCompletionPoller = new RunCompletionPoller(
4933
+ this._mgQueryAsync.bind(this),
4934
+ primaryCompanyId,
4935
+ (...args) => this._broadcast?.(...args),
4936
+ this._agentDispatcher._completionProcessor ?? null
4937
+ );
4474
4938
  this._costEventSyncer = new CostEventSyncer(this._mgQueryAsync.bind(this), primaryCompanyId);
4475
4939
  this._nodeCleaner = new NodeCleaner(this._mgQueryAsync.bind(this));
4476
4940
  this._mageAnalytics = new MageAnalytics(this._mgQueryAsync.bind(this));
@@ -5190,6 +5654,24 @@ if (require.main === module) {
5190
5654
  daemon._apiUpdateTick = updateTick;
5191
5655
  }
5192
5656
  daemon._broadcast = broadcast;
5657
+
5658
+ // Wire the broadcast function into every per-company HBOBridge instance.
5659
+ // buildForCompany() receives companyConfig (not the broadcast fn) as its 4th arg,
5660
+ // so _bc is null after _initModules() runs. We fix that here, immediately after
5661
+ // the API server starts and broadcast becomes available.
5662
+ // Without this, all startup-company HBOBridge instances have _bc = null for
5663
+ // the entire daemon lifetime — no real-time SSE events (approval.created,
5664
+ // wizard:pillars_ready, etc.) can be pushed to connected desktop clients.
5665
+ for (const [, mods] of daemon._modulesByCompany || []) {
5666
+ if (mods?.hboBridge && typeof mods.hboBridge.setBroadcast === 'function') {
5667
+ mods.hboBridge.setBroadcast(broadcast);
5668
+ }
5669
+ }
5670
+ // Also wire the legacy single-company bridge (used when _modulesByCompany is absent).
5671
+ if (daemon._hboBridge && typeof daemon._hboBridge.setBroadcast === 'function') {
5672
+ daemon._hboBridge.setBroadcast(broadcast);
5673
+ }
5674
+ log('info', `[startApi] HBOBridge broadcast wired for ${(daemon._modulesByCompany || new Map()).size} company(ies)`);
5193
5675
  const apiPort = _daemonConfig.apiPort ?? 9093;
5194
5676
  // Store the bound port on the daemon instance so _writeHealthFile() can include it.
5195
5677
  // This makes the port visible in daemon-health.json for operators and monitoring tools.
@@ -5208,5 +5690,5 @@ if (require.main === module) {
5208
5690
 
5209
5691
  // Export classes for testing
5210
5692
  if (typeof module !== 'undefined') {
5211
- module.exports = { HeliosCompanyDaemon, RoutineEvaluator, LivenessWatchdog, BudgetEnforcer, AgentDispatcher, ActivityLogger, TaskCompletionWatchdog, CostEventSyncer, NodeCleaner, requireRunId, MageAnalytics, _allCompanyConfigs, buildForCompany };
5693
+ module.exports = { HeliosCompanyDaemon, RoutineEvaluator, LivenessWatchdog, BudgetEnforcer, AgentDispatcher, ApprovalWatcher, ActivityLogger, TaskCompletionWatchdog, CostEventSyncer, NodeCleaner, requireRunId, MageAnalytics, _allCompanyConfigs, buildForCompany };
5212
5694
  }