@cgh567/agent 2.4.1 → 2.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/helios +0 -0
- package/bin/helios-rpc-node-wrapper.cjs +0 -0
- package/bin/helios-rpc-wrapper.sh +0 -0
- package/daemon/adapters/helios-rpc-adapter.js +47 -25
- package/daemon/adapters/tui_wakeup.js +8 -0
- package/daemon/config/com.familiar.helios-daemon.plist +5 -0
- package/daemon/config/helios-daemon.service +4 -0
- package/daemon/context-enrichment.js +59 -21
- package/daemon/daemon-manager.js +1 -1
- package/daemon/db/email-infrastructure-migrate.js +192 -0
- package/daemon/db/hbo-core-migrate.js +189 -0
- package/daemon/helios-api.js +723 -57
- package/daemon/helios-company-daemon.js +616 -134
- package/daemon/lib/harada/cascade-judge.js +12 -50
- package/daemon/lib/harada/mandala.js +20 -0
- package/daemon/lib/harada/pillar-dispatcher.js +1 -1
- package/daemon/lib/harada/project-factory.js +7 -2
- package/daemon/lib/hbo-bridge.js +32 -13
- package/daemon/lib/hed-engine.js +10 -292
- package/daemon/lib/helios-hitl-host.js +15 -2
- package/daemon/lib/hitl-interaction-service.js +0 -0
- package/daemon/lib/memgraph-verify.js +38 -33
- package/daemon/lib/project-drift-detector.js +7 -17
- package/daemon/lib/project-semantic-updater.js +1 -14
- package/daemon/lib/task-completion-processor.js +11 -0
- package/daemon/lib/wizard-engine.js +57 -6
- package/daemon/routes/channels.js +10 -5
- package/daemon/routes/harada-map.js +11 -48
- package/daemon/routes/hbo.js +342 -75
- package/daemon/routes/hitl.js +0 -0
- package/daemon/routes/project.js +194 -62
- package/daemon/routes/routines.js +14 -0
- package/daemon/routes/tasks.js +15 -1
- package/daemon/routes/wizard.js +11 -4
- package/daemon/schema-apply.js +174 -0
- package/daemon/schema-definitions.js +423 -0
- package/daemon/schema-migrations-hbo.js +10 -0
- package/daemon/schema-migrations-hed.js +18 -0
- package/daemon/schema-migrations-hitl.js +0 -0
- package/daemon/schema-migrations-proj.js +131 -0
- package/extensions/001-tool-output-cap.ts +0 -0
- package/extensions/context-compaction.ts +45 -26
- package/extensions/cortex/activation-bridge.ts +5 -0
- package/extensions/cortex/learn.ts +26 -0
- package/extensions/cortex/wal-replay.ts +91 -0
- package/extensions/email/backfill.ts +0 -0
- package/extensions/helios-governance/analysis/ambiguity.ts +0 -0
- package/extensions/helios-governance/analysis/compliance.ts +0 -0
- package/extensions/helios-governance/analysis/long-task-detector.ts +0 -0
- package/extensions/helios-governance/analysis/output-contract.ts +0 -0
- package/extensions/helios-governance/analysis/patterns.ts +0 -0
- package/extensions/helios-governance/analysis/preflight.ts +0 -0
- package/extensions/helios-governance/analysis/recurring-violations.ts +0 -0
- package/extensions/helios-governance/analysis/task-classification.ts +0 -0
- package/extensions/helios-governance/analysis/task-intent.ts +0 -0
- package/extensions/helios-governance/gates/high-impact.ts +1 -1
- package/extensions/helios-governance/handlers/_jiti-require.ts +15 -8
- package/extensions/helios-governance/handlers/proxy-test-detector.ts +0 -0
- package/extensions/hema-dispatch-v3/graph-memory.ts +10 -0
- package/extensions/hema-dispatch-v3/index.ts +72 -47
- package/extensions/lib/elo-engine.js +0 -0
- package/extensions/lib/elo-engine.test.js +0 -0
- package/extensions/memgraph-autostart.ts +13 -0
- package/extensions/neuroplastic-eval.ts +0 -0
- package/extensions/shadow-loop/index.ts +0 -0
- package/extensions/warm-tick/warm-tick-maintenance.ts +8 -0
- package/lib/__tests__/hbo-core-store.test.js +238 -0
- package/lib/brain-v2-budget.js +0 -0
- package/lib/brain-v2-circuit-breaker.js +0 -0
- package/lib/brain-v2.js +0 -0
- package/lib/broker/adaptive-throttle.js +0 -0
- package/lib/broker/batch-coalescer.js +0 -0
- package/lib/broker/bulkhead.js +0 -0
- package/lib/broker/channel-registry.js +0 -0
- package/lib/broker/circuit-breaker.js +0 -0
- package/lib/broker/evidence-cache.js +0 -0
- package/lib/broker/health-monitor.js +0 -0
- package/lib/broker/mage-queue.js +0 -0
- package/lib/broker/priority-queue.js +0 -0
- package/lib/broker/server.js.bak-error2-fix +0 -0
- package/lib/broker/session-registry.js +0 -0
- package/lib/broker/singleton-timers.js +0 -0
- package/lib/broker/types.d.ts +0 -0
- package/lib/broker/vegas-limit.js +0 -0
- package/lib/compression/dist/ccr-store.js +74 -0
- package/lib/compression/dist/content-router.js +115 -0
- package/lib/compression/dist/pipeline.js +113 -0
- package/lib/compression/dist/server.js +265 -0
- package/lib/compression/dist/smart-crusher.js +251 -0
- package/lib/context-budget.ts +0 -0
- package/lib/context-firewall.js +0 -0
- package/lib/crm/integration/triage-bridge.js +0 -0
- package/lib/email-utils.ts +0 -0
- package/lib/eval/__tests__/preflight-checker.test.ts +0 -0
- package/lib/eval/__tests__/task-instruction-parser.test.ts +0 -0
- package/lib/eval/__tests__/verifier-runner.test.ts +0 -0
- package/lib/eval/index.ts +0 -0
- package/lib/eval/preflight-checker.ts +0 -0
- package/lib/eval/task-domain-classifier.ts +0 -0
- package/lib/eval/task-instruction-parser.ts +0 -0
- package/lib/eval/verifier-runner.ts +0 -0
- package/lib/event-bus.d.ts +0 -0
- package/lib/event-bus.mts +1 -1
- package/lib/governance-context-selector.ts +0 -0
- package/lib/graph/generate-extension-embeddings.js +0 -0
- package/lib/graph/generate-static-embeddings.js +0 -0
- package/lib/graph/lib/utils.js +1 -1
- package/lib/graph-audit.d.ts +0 -0
- package/lib/graph-availability.js +62 -0
- package/lib/hbo-core-store.compiled.js +834 -0
- package/lib/hbo-core-store.js +124 -0
- package/lib/hbo-core-store.ts +908 -0
- package/lib/mesh-circuit-breaker.js +0 -0
- package/lib/mission-loop/lesson-extractor.ts +0 -0
- package/lib/mission-loop/mental-model-scorer.ts +0 -0
- package/lib/mission-loop/occ-detector.ts +0 -0
- package/lib/mission-loop/query-variants.ts +0 -0
- package/lib/mission-loop/verifier-check.ts +0 -0
- package/lib/skill-reference-builder.ts +0 -0
- package/lib/telemetry/token-breakdown.ts +0 -0
- package/lib/tool-compressor.ts +0 -0
- package/lib/triage-core/classifier.ts +3 -2
- package/lib/triage-core/graph/schema.cypher +10 -0
- package/lib/triage-core/legal-routing.ts +0 -0
- package/lib/triage-core/mental-model/dunbar-classifier.ts +0 -0
- package/lib/triage-core/mental-model/enrich-all.ts +0 -0
- package/lib/triage-core/mental-model/identity-resolver.ts +0 -0
- package/lib/triage-core/mental-model/key-facts.ts +1 -2
- package/lib/triage-core/mental-model/model-assembler.ts +0 -0
- package/lib/triage-core/orchestrator.ts +4 -11
- package/lib/triage-core/orchestrator.ts.bak-r005-r006-r008 +0 -0
- package/package.json +18 -8
- package/skills/helios-business-operator/services/signals/upwork-signals.js +0 -0
- package/skills/talisman-ceo/SKILL.md +23 -25
- package/skills/talisman-comms/SKILL.md +5 -5
- package/skills/talisman-engineering/SKILL.md +5 -5
- package/skills/talisman-finance/SKILL.md +10 -8
- package/skills/talisman-marketing/SKILL.md +10 -10
- package/skills/talisman-sales/SKILL.md +12 -15
- package/skills/talisman-support/SKILL.md +5 -5
- package/agents/business/talisman-ceo.md +0 -183
- package/agents/business/talisman-comms.md +0 -257
- package/agents/business/talisman-cto.md +0 -153
- package/agents/business/talisman-finance.md +0 -246
- package/agents/business/talisman-marketing.md +0 -240
- package/agents/business/talisman-sales.md +0 -242
- package/agents/business/talisman-support.md +0 -236
- package/daemon/lib/approval-expiry.js +0 -162
- package/daemon/lib/blast-radius-analyzer.js +0 -75
- package/daemon/lib/domain-bootstrap-orchestrator.js +0 -267
- package/daemon/lib/forensic-log.js +0 -113
- package/daemon/lib/goal-research-pipeline.js +0 -644
- package/daemon/lib/harada/cascade-research-dispatcher.js +0 -261
- package/daemon/lib/headroom-middleware.js +0 -167
- package/daemon/lib/headroom-proxy-manager.js +0 -623
- package/daemon/lib/mental-model-cache.js +0 -96
- package/daemon/lib/project-factory.js +0 -47
- package/daemon/lib/session-log-reader.js +0 -93
- package/daemon/routes/hed.js +0 -133
- package/lib/graph/learning/headroom-learn-bridge.js +0 -215
- package/skills/helios-bookkeeping/SKILL.md +0 -321
- package/skills/helios-briefer/SKILL.md +0 -44
- package/skills/helios-client-relations/SKILL.md +0 -322
- package/skills/helios-personal-triager/SKILL.md +0 -45
- package/skills/helios-recruitment/SKILL.md +0 -317
- package/skills/helios-relationship-nudger/SKILL.md +0 -77
- package/skills/helios-researcher/SKILL.md +0 -44
- package/skills/helios-scheduler/SKILL.md +0 -58
- package/skills/helios-tax-analyst/SKILL.md +0 -280
|
@@ -20,11 +20,14 @@ if (!process.env.TZ) { process.env.TZ = 'UTC'; }
|
|
|
20
20
|
*/
|
|
21
21
|
|
|
22
22
|
const path = require('path');
|
|
23
|
-
const fs = require('fs');
|
|
24
|
-
const os = require('os');
|
|
25
|
-
const {
|
|
26
|
-
const {
|
|
27
|
-
const {
|
|
23
|
+
const fs = require('fs');
|
|
24
|
+
const os = require('os');
|
|
25
|
+
const { execFile } = require('child_process');
|
|
26
|
+
const { performance } = require('perf_hooks');
|
|
27
|
+
const { randomUUID } = require('crypto');
|
|
28
|
+
const { buildContextBrief } = require('./context-enrichment');
|
|
29
|
+
const hboStore = require('../lib/hbo-core-store');
|
|
30
|
+
const graphWal = require('../lib/graph-wal');
|
|
28
31
|
const { runMigrations } = require('./schema-migrations');
|
|
29
32
|
const { runHBOMigrations } = require('./schema-migrations-hbo');
|
|
30
33
|
const { runHaradaMigrations } = require('./schema-migrations-harada');
|
|
@@ -179,6 +182,14 @@ try {
|
|
|
179
182
|
process.stderr.write('[daemon] Generated API token at ' + tokenPath + '\n');
|
|
180
183
|
}
|
|
181
184
|
process.env.HELIOS_API_TOKEN = fs.readFileSync(tokenPath, 'utf-8').trim();
|
|
185
|
+
// Mirror token to data/api-token.txt so harbor tests can find it without
|
|
186
|
+
// knowing the platform-specific HELIOS_DATA path.
|
|
187
|
+
// Primary path: harbor test fixture reads HELIOS_ROOT/data/api-token.txt → sends Bearer token.
|
|
188
|
+
try {
|
|
189
|
+
const _repoTokenDir = path.join(HELIOS_ROOT, 'data');
|
|
190
|
+
if (!fs.existsSync(_repoTokenDir)) fs.mkdirSync(_repoTokenDir, { recursive: true });
|
|
191
|
+
fs.writeFileSync(path.join(_repoTokenDir, 'api-token.txt'), process.env.HELIOS_API_TOKEN, { mode: 0o600 });
|
|
192
|
+
} catch (_) { /* non-fatal: env var HELIOS_AGENT_TOKEN is the fallback */ }
|
|
182
193
|
} catch(e) {
|
|
183
194
|
process.stderr.write('[daemon] Warning: could not create/read API token: ' + e.message + '\n');
|
|
184
195
|
// Fallback: generate in-memory token (not persisted)
|
|
@@ -717,7 +728,22 @@ async function executeQueueAction(item) {
|
|
|
717
728
|
const assignee = (item.payload && item.payload.assigneeAgentId) || 'agent:default';
|
|
718
729
|
const cid = (item.payload && item.payload.companyId) || 'default';
|
|
719
730
|
|
|
720
|
-
|
|
731
|
+
// SQLite-first write (P2-2)
|
|
732
|
+
try { hboStore.createTask({
|
|
733
|
+
id: taskId,
|
|
734
|
+
companyId: cid,
|
|
735
|
+
title,
|
|
736
|
+
status: 'todo',
|
|
737
|
+
priority: 2,
|
|
738
|
+
assigneeAgentId: assignee,
|
|
739
|
+
body: (item.payload && item.payload.body) || '',
|
|
740
|
+
sourceItemId: item.target_id,
|
|
741
|
+
sourceChannel: item.channel,
|
|
742
|
+
progressPropagated: false,
|
|
743
|
+
createdAt: Date.now(),
|
|
744
|
+
}); } catch (_storeErr) { /* non-fatal: SQLite store unavailable */ }
|
|
745
|
+
// Non-blocking Memgraph projection (fire-and-forget)
|
|
746
|
+
setImmediate(() => mg.safeWrite(`
|
|
721
747
|
CREATE (t:Task {
|
|
722
748
|
id: $taskId,
|
|
723
749
|
companyId: $cid,
|
|
@@ -739,7 +765,7 @@ async function executeQueueAction(item) {
|
|
|
739
765
|
body: (item.payload && item.payload.body) || '',
|
|
740
766
|
sourceId: item.target_id,
|
|
741
767
|
channel: item.channel,
|
|
742
|
-
});
|
|
768
|
+
}).catch(e => console.warn('[daemon] Memgraph Task projection failed (non-fatal):', e.message)));
|
|
743
769
|
return;
|
|
744
770
|
}
|
|
745
771
|
|
|
@@ -1034,7 +1060,7 @@ class RoutineEvaluator {
|
|
|
1034
1060
|
// milliseconds-and-Z suffix produced by toISOString(). No change needed.
|
|
1035
1061
|
const now = new Date().toISOString().replace(/\.\d{3}Z$/, '+00:00');
|
|
1036
1062
|
dueRoutines = await this.mg(
|
|
1037
|
-
`MATCH (r:Routine {companyId: $companyId}) WHERE r.status = 'active' AND r.nextRunAt <= datetime($now) RETURN r.id, r.name, r.cronExpr, r.agentId, r.companyId, r.concurrencyPolicy, r.timezone`,
|
|
1063
|
+
`MATCH (r:Routine {companyId: $companyId}) WHERE r.status = 'active' AND r.nextRunAt <= datetime($now) RETURN r.id, r.name, r.cronExpr, r.agentId, r.companyId, r.concurrencyPolicy, r.timezone, r.catchUpCap, r.catchUpPolicy`,
|
|
1038
1064
|
{ now, companyId: this.companyId }
|
|
1039
1065
|
);
|
|
1040
1066
|
} catch (err) {
|
|
@@ -1062,11 +1088,79 @@ class RoutineEvaluator {
|
|
|
1062
1088
|
}
|
|
1063
1089
|
}
|
|
1064
1090
|
|
|
1091
|
+
// P5-03: coalesce_if_active — skip creating a full run but queue one follow-up RoutineRun
|
|
1092
|
+
if (routine['r.concurrencyPolicy'] === 'coalesce_if_active') {
|
|
1093
|
+
const active = await this.mg(
|
|
1094
|
+
`MATCH (rr:RoutineRun {routineId: $rid}) WHERE rr.status IN ['queued', 'running'] RETURN count(rr) as cnt`,
|
|
1095
|
+
{ rid: routineId }
|
|
1096
|
+
);
|
|
1097
|
+
const activeCount = active?.rows?.[0]?.[0] ?? 0;
|
|
1098
|
+
if (activeCount > 0) {
|
|
1099
|
+
// Check if a coalesced follow-up already exists (prevent duplicate queuing)
|
|
1100
|
+
const queuedFollowUp = await this.mg(
|
|
1101
|
+
`MATCH (rr:RoutineRun {routineId: $rid, status: 'queued_coalesced'}) RETURN count(rr) as cnt`,
|
|
1102
|
+
{ rid: routineId }
|
|
1103
|
+
);
|
|
1104
|
+
if ((queuedFollowUp?.rows?.[0]?.[0] ?? 0) === 0) {
|
|
1105
|
+
const followUpRunId = requireRunId(`run:${routineId}:coalesced:${randomUUID()}`, 'RoutineEvaluator.coalesce');
|
|
1106
|
+
await this.mg(
|
|
1107
|
+
`MERGE (rr:RoutineRun {id: $runId}) SET rr.routineId = $routineId, rr.status = 'queued_coalesced', rr.companyId = $companyId, rr.queuedAt = datetime()`,
|
|
1108
|
+
{ runId: followUpRunId, routineId, companyId: this.companyId }
|
|
1109
|
+
).catch(e => log('warn', `RoutineEvaluator: coalesce follow-up failed: ${e.message}`));
|
|
1110
|
+
}
|
|
1111
|
+
log('debug', `Coalescing routine ${routine['r.name']} — queued follow-up`);
|
|
1112
|
+
continue;
|
|
1113
|
+
}
|
|
1114
|
+
}
|
|
1115
|
+
|
|
1065
1116
|
const routineAgentId = routine['r.agentId'];
|
|
1066
1117
|
if (!routineAgentId) {
|
|
1067
1118
|
log('warn', `RoutineEvaluator: routine ${routineId} has no agentId — skipping task creation`);
|
|
1068
1119
|
continue;
|
|
1069
1120
|
}
|
|
1121
|
+
|
|
1122
|
+
// P5-04: catchUpCap — enqueue missed windows up to cap
|
|
1123
|
+
// This fires BEFORE the normal single-task creation to batch missed runs first.
|
|
1124
|
+
const catchUpPolicy = routine['r.catchUpPolicy'];
|
|
1125
|
+
const catchUpCap = parseInt(routine['r.catchUpCap'] ?? '0', 10) || 0;
|
|
1126
|
+
if (catchUpPolicy === 'enqueue_missed_with_cap' && catchUpCap > 0) {
|
|
1127
|
+
try {
|
|
1128
|
+
const { Cron } = require('croner');
|
|
1129
|
+
const cron = new Cron(routine['r.cronExpr'], { timezone: routine['r.timezone'] });
|
|
1130
|
+
// Count missed windows: how many times cron fired between lastRunAt and now
|
|
1131
|
+
// Simple approximation: count backward from now until we hit lastRunAt or cap
|
|
1132
|
+
let missedCount = 0;
|
|
1133
|
+
const prev = cron.previousRun ? cron.previousRun() : null;
|
|
1134
|
+
// Since we don't have a full missed-window iterator here, use a conservative
|
|
1135
|
+
// estimate: check if at least one missed window exists and enqueue up to cap
|
|
1136
|
+
// by repeatedly calling previousRun. Max cap iterations.
|
|
1137
|
+
let checkDate = prev;
|
|
1138
|
+
const lastRunStr = routine['r.lastRunAt'];
|
|
1139
|
+
const lastRunMs = lastRunStr ? Date.parse(lastRunStr) : 0;
|
|
1140
|
+
while (checkDate && missedCount < catchUpCap) {
|
|
1141
|
+
if (checkDate.getTime() <= lastRunMs) break;
|
|
1142
|
+
missedCount++;
|
|
1143
|
+
checkDate = cron.previousRun ? cron.previousRun() : null;
|
|
1144
|
+
}
|
|
1145
|
+
cron.stop();
|
|
1146
|
+
for (let i = 0; i < missedCount; i++) {
|
|
1147
|
+
const catchUpTaskId = `task:routine:${routineId}:catchup:${i}:${randomUUID()}`;
|
|
1148
|
+
const catchUpRunId = requireRunId(`run:${routineId}:catchup:${i}:${randomUUID()}`, 'RoutineEvaluator.catchup');
|
|
1149
|
+
await this.mg(
|
|
1150
|
+
`MERGE (t:Task {id: $taskId}) SET t.title = $title, t.status = 'todo', t.assigneeAgentId = $agentId, t.companyId = $companyId, t.originKind = 'routine_catchup', t.progressPropagated = false, t.createdAt = datetime()`,
|
|
1151
|
+
{ taskId: catchUpTaskId, title: `Routine catch-up: ${routine['r.name']}`, agentId: routineAgentId, companyId: this.companyId }
|
|
1152
|
+
).catch(e => log('warn', `RoutineEvaluator: catch-up task create failed: ${e.message}`));
|
|
1153
|
+
await this.mg(
|
|
1154
|
+
`MERGE (rr:RoutineRun {id: $runId}) SET rr.routineId = $routineId, rr.status = 'queued', rr.linkedTaskId = $taskId, rr.companyId = $companyId, rr.queuedAt = datetime()`,
|
|
1155
|
+
{ runId: catchUpRunId, routineId, taskId: catchUpTaskId, companyId: this.companyId }
|
|
1156
|
+
).catch(e => log('warn', `RoutineEvaluator: catch-up run create failed: ${e.message}`));
|
|
1157
|
+
}
|
|
1158
|
+
if (missedCount > 0) log('info', `RoutineEvaluator: catch-up ${missedCount} tasks for ${routine['r.name']}`);
|
|
1159
|
+
} catch (catchUpErr) {
|
|
1160
|
+
log('warn', `RoutineEvaluator: catchUpCap logic failed for ${routineId}: ${catchUpErr.message}`);
|
|
1161
|
+
}
|
|
1162
|
+
}
|
|
1163
|
+
|
|
1070
1164
|
const taskId = `task:routine:${routineId}:${randomUUID()}`;
|
|
1071
1165
|
await criticalOp(
|
|
1072
1166
|
() => this.mg(
|
|
@@ -1223,12 +1317,26 @@ class BudgetEnforcer {
|
|
|
1223
1317
|
|
|
1224
1318
|
let inProgress;
|
|
1225
1319
|
try {
|
|
1226
|
-
//
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1320
|
+
// Memgraph primary — SQLite fallback on unavailability
|
|
1321
|
+
try {
|
|
1322
|
+
// Retry up to 5 times (500ms apart) in case Memgraph snapshot isolation
|
|
1323
|
+
// hasn't propagated a recently-committed write yet.
|
|
1324
|
+
for (let attempt = 0; attempt < 5; attempt++) {
|
|
1325
|
+
inProgress = await this.mg(query, params);
|
|
1326
|
+
if (inProgress?.rows?.length > 0) break;
|
|
1327
|
+
if (attempt < 4) await new Promise(r => setTimeout(r, 500));
|
|
1328
|
+
}
|
|
1329
|
+
} catch (mgErr) {
|
|
1330
|
+
// Memgraph unavailable — fall back to SQLite
|
|
1331
|
+
if (hboStore.getTasksByCompanyStatus) {
|
|
1332
|
+
const storeRows = hboStore.getTasksByCompanyStatus(this.companyId, 'in_progress');
|
|
1333
|
+
const filtered = agentId ? storeRows.filter(t => t.assigneeAgentId === agentId) : storeRows;
|
|
1334
|
+
inProgress = {
|
|
1335
|
+
rows: filtered.map(t => [t.id, t.heliosRunId ?? null, t.dispatchedViaTUI ?? null, t.assigneeAgentId ?? null]),
|
|
1336
|
+
keys: ['t.id', 't.heliosRunId', 't.dispatchedViaTUI', 't.assigneeAgentId'],
|
|
1337
|
+
};
|
|
1338
|
+
log('info', `CancelInFlight: using SQLite fallback for in-progress task lookup (Memgraph unavailable): ${mgErr.message}`);
|
|
1339
|
+
}
|
|
1232
1340
|
}
|
|
1233
1341
|
} catch (e) {
|
|
1234
1342
|
log('error', `BudgetEnforcer: failed to query in-flight tasks: ${e.message}`);
|
|
@@ -1260,10 +1368,13 @@ class BudgetEnforcer {
|
|
|
1260
1368
|
log('error', `BudgetEnforcer: failed to cancel TUI run ${heliosRunId} for task ${taskId} after 3 retries (H3 watchdog will clean up)`);
|
|
1261
1369
|
}
|
|
1262
1370
|
}
|
|
1263
|
-
|
|
1371
|
+
// SQLite-first update (P2-4)
|
|
1372
|
+
try { hboStore.updateTask(taskRow[0], this.companyId, { status: 'todo', executionLockedAt: null, executionAgentId: null, dispatchedViaTUI: null, heliosRunId: null }); } catch (_) {}
|
|
1373
|
+
// Non-blocking Memgraph projection (fire-and-forget)
|
|
1374
|
+
setImmediate(() => this.mg(
|
|
1264
1375
|
`MATCH (t:Task {id: $taskId}) SET t.status = 'todo', t.executionLockedAt = null, t.executionAgentId = null, t.dispatchedViaTUI = null, t.heliosRunId = null`,
|
|
1265
1376
|
{ taskId: taskRow[0] }
|
|
1266
|
-
).catch(e => log('error', `BudgetEnforcer: failed to reset task ${taskRow[0]} after cancellation: ${e.message}`));
|
|
1377
|
+
).catch(e => log('error', `BudgetEnforcer: failed to reset task ${taskRow[0]} after cancellation: ${e.message}`)));
|
|
1267
1378
|
});
|
|
1268
1379
|
|
|
1269
1380
|
await Promise.race([
|
|
@@ -1273,14 +1384,30 @@ class BudgetEnforcer {
|
|
|
1273
1384
|
}
|
|
1274
1385
|
|
|
1275
1386
|
async enforce() {
|
|
1276
|
-
// Query 1: Get all policies
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1387
|
+
// Query 1: Get all policies — Memgraph primary, SQLite fallback on unavailability
|
|
1388
|
+
let policyRows = [];
|
|
1389
|
+
try {
|
|
1390
|
+
const policiesResult = await this.mg(
|
|
1391
|
+
`MATCH (bp:BudgetPolicy {companyId: $cid})
|
|
1392
|
+
RETURN bp.id, bp.scope, bp.agentId, bp.limitCents, bp.warnPercent, bp.hardStopEnabled`,
|
|
1393
|
+
{ cid: this.companyId }
|
|
1394
|
+
);
|
|
1395
|
+
policyRows = policiesResult?.rows ?? [];
|
|
1396
|
+
} catch (mgErr) {
|
|
1397
|
+
// Memgraph unavailable — fall back to SQLite budget policies
|
|
1398
|
+
if (hboStore.getBudgetPoliciesByCompany) {
|
|
1399
|
+
const storePolicies = hboStore.getBudgetPoliciesByCompany(this.companyId);
|
|
1400
|
+
policyRows = storePolicies.map(bp => [
|
|
1401
|
+
bp.id,
|
|
1402
|
+
bp.scope,
|
|
1403
|
+
bp.agent_id ?? bp.agentId,
|
|
1404
|
+
bp.limit_cents ?? bp.limitCents,
|
|
1405
|
+
bp.warn_percent ?? bp.warnPercent,
|
|
1406
|
+
bp.hard_stop_enabled ?? bp.hardStopEnabled,
|
|
1407
|
+
]);
|
|
1408
|
+
log('info', `BudgetEnforcer: using SQLite fallback for policy lookup (Memgraph unavailable): ${mgErr.message}`);
|
|
1409
|
+
}
|
|
1410
|
+
}
|
|
1284
1411
|
|
|
1285
1412
|
let blocked = false;
|
|
1286
1413
|
let warningActive = false;
|
|
@@ -1309,12 +1436,21 @@ class BudgetEnforcer {
|
|
|
1309
1436
|
const end = new Date(Date.UTC(now.getUTCFullYear(), now.getUTCMonth() + 1, 1)).toISOString();
|
|
1310
1437
|
|
|
1311
1438
|
// Query 2: Spend per agent THIS MONTH (fast — time-bounded, indexed)
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1439
|
+
// Memgraph primary — SQLite has no CostEvent fallback so default to empty spend on unavailability
|
|
1440
|
+
let spendRows = { rows: [] };
|
|
1441
|
+
try {
|
|
1442
|
+
spendRows = await this.mg(
|
|
1443
|
+
`MATCH (ce:CostEvent {companyId: $cid})
|
|
1444
|
+
WHERE ce.createdAt >= datetime($start) AND ce.createdAt < datetime($end)
|
|
1445
|
+
RETURN ce.agentId, sum(ce.costCents) as total`,
|
|
1446
|
+
{ cid: this.companyId, start: start.replace(/\.\d{3}Z$/, '+00:00'), end: end.replace(/\.\d{3}Z$/, '+00:00') }
|
|
1447
|
+
);
|
|
1448
|
+
} catch (mgSpendErr) {
|
|
1449
|
+
log('info', `BudgetEnforcer: Memgraph unavailable for spend query — using zero spend (${mgSpendErr.message})`);
|
|
1450
|
+
// spendRows stays { rows: [] } — enforcement proceeds with zero observed spend,
|
|
1451
|
+
// which means soft/hard thresholds will not trigger. This is the safe fallback:
|
|
1452
|
+
// better to not block agents than to falsely block them on a stale DB state.
|
|
1453
|
+
}
|
|
1318
1454
|
|
|
1319
1455
|
// JS join (microseconds — no cartesian product)
|
|
1320
1456
|
const neo4j = require('neo4j-driver');
|
|
@@ -1334,29 +1470,39 @@ class BudgetEnforcer {
|
|
|
1334
1470
|
const pct = limit > 0 ? (spent * 100 / limit) : 0;
|
|
1335
1471
|
const warnThreshold = toNum(warnPercent) || 80;
|
|
1336
1472
|
|
|
1337
|
-
// Update policy in Memgraph
|
|
1338
|
-
|
|
1473
|
+
// Update policy spend in Memgraph — fire-and-forget so Memgraph downtime
|
|
1474
|
+
// does not abort the loop. SQLite hbo-core-store is the authoritative store.
|
|
1475
|
+
setImmediate(() => this.mg(
|
|
1339
1476
|
`MATCH (bp:BudgetPolicy {id: $id}) SET bp.spentCents = $spent, bp.percentUsed = $pct`,
|
|
1340
1477
|
{ id, spent, pct }
|
|
1341
|
-
);
|
|
1478
|
+
).catch(e => log('info', `BudgetEnforcer: spend update projection failed (non-fatal): ${e.message}`)));
|
|
1342
1479
|
|
|
1343
1480
|
policies.push({ id, scope, agentId, limitCents: limit, spentCents: spent, percentUsed: pct, status: 'active' });
|
|
1344
1481
|
|
|
1345
1482
|
// Soft incident at warnPercent (idempotent)
|
|
1346
1483
|
if (pct >= warnThreshold && pct < 100) {
|
|
1347
1484
|
warningActive = true;
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
)
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1485
|
+
// Use fire-and-forget for incident creation so Memgraph downtime does not abort enforce()
|
|
1486
|
+
const _mgThis = this.mg.bind(this);
|
|
1487
|
+
const _cid = this.companyId;
|
|
1488
|
+
setImmediate(async () => {
|
|
1489
|
+
try {
|
|
1490
|
+
const existing = await _mgThis(
|
|
1491
|
+
`MATCH (bi:BudgetIncident {policyId: $pid, companyId: $cid, thresholdType: 'soft', status: 'open'}) RETURN bi.id LIMIT 1`,
|
|
1492
|
+
{ pid: id, cid: _cid }
|
|
1493
|
+
);
|
|
1494
|
+
if (!existing?.rows?.length) {
|
|
1495
|
+
const incidentId = `bi:soft:${id}:${Date.now()}`;
|
|
1496
|
+
await _mgThis(
|
|
1497
|
+
`CREATE (bi:BudgetIncident {id: $id, companyId: $cid, policyId: $pid, scopeType: $scopeType, scopeId: $scopeId, thresholdType: 'soft', amountLimit: $limit, amountObserved: $spent, status: 'open', createdAt: datetime()})`,
|
|
1498
|
+
{ id: incidentId, cid: _cid, pid: id, scopeType: scope, scopeId: agentId ?? scope, limit, spent }
|
|
1499
|
+
);
|
|
1500
|
+
log('info', `BudgetEnforcer: Soft budget incident created for ${agentId ?? 'global'}`, { pct, warnThreshold });
|
|
1501
|
+
}
|
|
1502
|
+
} catch (e) {
|
|
1503
|
+
log('info', `BudgetEnforcer: soft incident projection failed (non-fatal): ${e.message}`);
|
|
1504
|
+
}
|
|
1505
|
+
});
|
|
1360
1506
|
if (scope === 'global') {
|
|
1361
1507
|
log('info', `BudgetEnforcer: Global budget warning active (>${warnThreshold}%)`, { percentUsed: pct });
|
|
1362
1508
|
}
|
|
@@ -1368,18 +1514,31 @@ class BudgetEnforcer {
|
|
|
1368
1514
|
const existingHard = await this.mg(
|
|
1369
1515
|
`MATCH (bi:BudgetIncident {policyId: $pid, companyId: $cid, thresholdType: 'hard', status: 'open'}) RETURN bi.id LIMIT 1`,
|
|
1370
1516
|
{ pid: id, cid: this.companyId }
|
|
1371
|
-
);
|
|
1517
|
+
).catch(() => ({ rows: [] })); // treat Memgraph unavailability as "no existing incident"
|
|
1372
1518
|
if (!existingHard?.rows?.length) {
|
|
1373
1519
|
const incidentId = `bi:hard:${id}:${Date.now()}`;
|
|
1374
1520
|
const approvalId = `approval:budget:${id}:${Date.now()}`;
|
|
1375
|
-
|
|
1521
|
+
// SQLite-first: write Approval to SQLite before Memgraph so if Memgraph
|
|
1522
|
+
// is down the approval still exists and budget enforcement still functions (F7).
|
|
1523
|
+
try {
|
|
1524
|
+
hboStore.createApproval({
|
|
1525
|
+
id: approvalId, companyId: this.companyId, type: 'budget_exceeded',
|
|
1526
|
+
title: `Budget exceeded for ${agentId ?? 'global'} — raise limit to resume`,
|
|
1527
|
+
requestedBy: agentId ?? 'agent:ceo', status: 'pending', followUpTaskCreated: false, createdAt: Date.now(),
|
|
1528
|
+
});
|
|
1529
|
+
} catch (storeErr) {
|
|
1530
|
+
log('warn', `BudgetEnforcer: SQLite approval write failed (non-fatal): ${storeErr.message}`);
|
|
1531
|
+
}
|
|
1532
|
+
// Memgraph projections — fire-and-forget so Memgraph downtime does not crash enforce()
|
|
1533
|
+
const _mgThis = this.mg.bind(this);
|
|
1534
|
+
setImmediate(() => _mgThis(
|
|
1376
1535
|
`CREATE (bi:BudgetIncident {id: $incId, companyId: $cid, policyId: $pid, scopeType: $scopeType, scopeId: $scopeId, thresholdType: 'hard', amountLimit: $limit, amountObserved: $spent, status: 'open', approvalId: $apId, createdAt: datetime()})`,
|
|
1377
1536
|
{ incId: incidentId, cid: this.companyId, pid: id, scopeType: scope, scopeId: agentId ?? scope, limit, spent, apId: approvalId }
|
|
1378
|
-
);
|
|
1379
|
-
|
|
1537
|
+
).catch(e => log('warn', `BudgetEnforcer: BudgetIncident Memgraph projection failed: ${e.message}`)));
|
|
1538
|
+
setImmediate(() => _mgThis(
|
|
1380
1539
|
`CREATE (a:Approval {id: $id, companyId: $cid, type: 'budget_exceeded', title: $title, requestedBy: $agentId, status: 'pending', followUpTaskCreated: false, createdAt: datetime()})`,
|
|
1381
1540
|
{ id: approvalId, cid: this.companyId, title: `Budget exceeded for ${agentId ?? 'global'} — raise limit to resume`, agentId: agentId ?? 'agent:ceo' }
|
|
1382
|
-
);
|
|
1541
|
+
).catch(e => log('warn', `BudgetEnforcer: Approval Memgraph projection failed: ${e.message}`)));
|
|
1383
1542
|
// Day 5: Emit BUDGET_EXCEEDED P0 AnomalySignal.
|
|
1384
1543
|
// OPTIONAL MATCH dedup guard — Tournament winner: Candidate C.
|
|
1385
1544
|
// P0 because a hard budget stop halts all agent work immediately.
|
|
@@ -1463,12 +1622,15 @@ class AgentDispatcher {
|
|
|
1463
1622
|
* @param {Function} spawnFn - optional spawn override (for testing)
|
|
1464
1623
|
* @param {object} _testConfig - optional config override (for testing only, replaces _daemonConfig)
|
|
1465
1624
|
* @param {object} registry - optional AdapterRegistry instance
|
|
1625
|
+
* @param {Function} broadcastFn - optional closure for SSE broadcast; use (...args) => daemon._broadcast?.(...args)
|
|
1626
|
+
* so the live broadcast reference is resolved at call time, not construction time.
|
|
1466
1627
|
*/
|
|
1467
|
-
constructor(mgQuery, companyId, spawnFn = null, _testConfig = null, registry = null) {
|
|
1628
|
+
constructor(mgQuery, companyId, spawnFn = null, _testConfig = null, registry = null, broadcastFn = null) {
|
|
1468
1629
|
if (!companyId) throw new Error('AgentDispatcher: companyId required');
|
|
1469
1630
|
this.mg = mgQuery;
|
|
1470
1631
|
this.companyId = companyId;
|
|
1471
1632
|
this._spawnFn = spawnFn;
|
|
1633
|
+
this._daemonBroadcast = typeof broadcastFn === 'function' ? broadcastFn : null;
|
|
1472
1634
|
this._config = _testConfig !== null ? _testConfig : _daemonConfig;
|
|
1473
1635
|
// M-11: Use per-company config agents, not the module-level first-company alias
|
|
1474
1636
|
const _perCompanyCfg = _allCompanyConfigs.find(c =>
|
|
@@ -1488,13 +1650,18 @@ class AgentDispatcher {
|
|
|
1488
1650
|
const agentHelios = heliosConfig.agents?.[agentId] ?? {};
|
|
1489
1651
|
if (!agentHelios.apiKey) return null;
|
|
1490
1652
|
|
|
1491
|
-
// Check if issue already exists on task node
|
|
1653
|
+
// Check if issue already exists on task node — SQLite-first (P2-4)
|
|
1492
1654
|
try {
|
|
1493
|
-
const
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
|
|
1655
|
+
const _storeTask = hboStore.getTask ? hboStore.getTask(taskId, this.companyId) : null;
|
|
1656
|
+
if (_storeTask && _storeTask.heliosIssueId) return _storeTask.heliosIssueId;
|
|
1657
|
+
if (!_storeTask) {
|
|
1658
|
+
// Fallback to Memgraph
|
|
1659
|
+
const existing = await this.mg(
|
|
1660
|
+
`MATCH (t:Task {id: $taskId}) WHERE t.heliosIssueId IS NOT NULL RETURN t.heliosIssueId`,
|
|
1661
|
+
{ taskId }
|
|
1662
|
+
);
|
|
1663
|
+
if (existing?.rows?.length) return existing.rows[0][0];
|
|
1664
|
+
}
|
|
1498
1665
|
} catch (e) { /* ignore */ }
|
|
1499
1666
|
|
|
1500
1667
|
// Create new issue in Helios TUI
|
|
@@ -1511,11 +1678,13 @@ class AgentDispatcher {
|
|
|
1511
1678
|
if (!resp.ok) return null;
|
|
1512
1679
|
const issue = await resp.json();
|
|
1513
1680
|
const issueId = issue.id;
|
|
1514
|
-
//
|
|
1515
|
-
|
|
1681
|
+
// SQLite-first update (P2-4)
|
|
1682
|
+
try { hboStore.updateTask(taskId, this.companyId, { heliosIssueId: issueId }); } catch (_) {}
|
|
1683
|
+
// Non-blocking Memgraph projection (fire-and-forget)
|
|
1684
|
+
setImmediate(() => this.mg(
|
|
1516
1685
|
`MATCH (t:Task {id: $taskId}) SET t.heliosIssueId = $issueId`,
|
|
1517
1686
|
{ taskId, issueId }
|
|
1518
|
-
).catch(e => log('warn', `Failed to store heliosIssueId on task ${taskId}: ${e.message}`));
|
|
1687
|
+
).catch(e => log('warn', `Failed to store heliosIssueId on task ${taskId}: ${e.message}`)));
|
|
1519
1688
|
return issueId;
|
|
1520
1689
|
} catch (e) {
|
|
1521
1690
|
log('warn', `_ensureHeliosIssue failed for task ${taskId}: ${e.message}`);
|
|
@@ -1647,28 +1816,55 @@ class AgentDispatcher {
|
|
|
1647
1816
|
const signalId = sigRow[0] ?? sigRow['signalId'];
|
|
1648
1817
|
const agentId = sigRow[1] ?? sigRow['agentId'];
|
|
1649
1818
|
let taskResult;
|
|
1819
|
+
// Hoist taskRow before try so it remains in scope at the outer if (!taskRow) check (T20-B)
|
|
1820
|
+
let taskRow = null;
|
|
1650
1821
|
try {
|
|
1651
|
-
//
|
|
1652
|
-
|
|
1653
|
-
|
|
1654
|
-
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
|
|
1662
|
-
|
|
1663
|
-
|
|
1664
|
-
|
|
1665
|
-
|
|
1666
|
-
|
|
1822
|
+
// Memgraph primary — SQLite is the fallback when Memgraph throws (e.g. unavailable)
|
|
1823
|
+
try {
|
|
1824
|
+
// CONFLICT 004: CEO inbox priority — P0 first, then P1, then OKRFeedback-origin, then by priority/createdAt
|
|
1825
|
+
taskResult = await this.mg(
|
|
1826
|
+
`MATCH (t:Task {status: 'todo', companyId: $companyId, assigneeAgentId: $agentId})
|
|
1827
|
+
WHERE t.title IS NOT NULL
|
|
1828
|
+
WITH t,
|
|
1829
|
+
CASE
|
|
1830
|
+
WHEN toInteger(coalesce(t.priority, 3)) = 0 THEN 0
|
|
1831
|
+
WHEN toInteger(coalesce(t.priority, 3)) = 1 THEN 1
|
|
1832
|
+
WHEN t.originKind = 'okr_feedback' THEN 2
|
|
1833
|
+
ELSE toInteger(coalesce(t.priority, 3)) + 2
|
|
1834
|
+
END AS sortPriority
|
|
1835
|
+
RETURN t.id AS taskId, t.title AS title, t.originKind AS originKind, t.body AS body, t.priority AS priority
|
|
1836
|
+
ORDER BY sortPriority ASC, t.createdAt ASC
|
|
1837
|
+
LIMIT 1`,
|
|
1838
|
+
{ companyId: this.companyId, agentId }
|
|
1839
|
+
);
|
|
1840
|
+
taskRow = taskResult?.rows?.[0];
|
|
1841
|
+
} catch (mgErr) {
|
|
1842
|
+
// Memgraph unavailable — fall back to SQLite
|
|
1843
|
+
if (hboStore.getTasksByCompanyStatus) {
|
|
1844
|
+
const storeTodos = hboStore.getTasksByCompanyStatus(this.companyId, 'todo')
|
|
1845
|
+
.filter(t => t.assigneeAgentId === agentId && t.title);
|
|
1846
|
+
storeTodos.sort((a, b) => {
|
|
1847
|
+
const _sortPri = t => {
|
|
1848
|
+
const p = parseInt(t.priority ?? 3, 10);
|
|
1849
|
+
if (p === 0) return 0;
|
|
1850
|
+
if (p === 1) return 1;
|
|
1851
|
+
if (t.originKind === 'okr_feedback') return 2;
|
|
1852
|
+
return (isNaN(p) ? 3 : p) + 2;
|
|
1853
|
+
};
|
|
1854
|
+
const diff = _sortPri(a) - _sortPri(b);
|
|
1855
|
+
return diff !== 0 ? diff : (a.createdAt ?? 0) - (b.createdAt ?? 0);
|
|
1856
|
+
});
|
|
1857
|
+
const best = storeTodos.find(t => !claimedTaskIds.has(t.id));
|
|
1858
|
+
if (best) {
|
|
1859
|
+
taskRow = [best.id, best.title, best.originKind ?? null, best.body ?? null, best.priority ?? 3];
|
|
1860
|
+
log('info', `AgentDispatcher: using SQLite fallback for task lookup (Memgraph unavailable): ${mgErr.message}`);
|
|
1861
|
+
}
|
|
1862
|
+
}
|
|
1863
|
+
}
|
|
1667
1864
|
} catch (err) {
|
|
1668
1865
|
log('warn', `AgentDispatcher: task lookup failed for agent ${agentId}: ${err.message}`);
|
|
1669
1866
|
continue;
|
|
1670
1867
|
}
|
|
1671
|
-
const taskRow = taskResult?.rows?.[0];
|
|
1672
1868
|
if (!taskRow) continue;
|
|
1673
1869
|
const taskId = taskRow[0] ?? taskRow['taskId'];
|
|
1674
1870
|
const title = taskRow[1] ?? taskRow['title'];
|
|
@@ -1816,9 +2012,10 @@ class AgentDispatcher {
|
|
|
1816
2012
|
|
|
1817
2013
|
try {
|
|
1818
2014
|
// Non-blocking dispatch: run task in background so tick doesn't stall.
|
|
1819
|
-
// TaskCompletionWatchdog
|
|
1820
|
-
//
|
|
1821
|
-
|
|
2015
|
+
// TaskCompletionWatchdog reverts non-TUI tasks after taskTimeoutMs (default 30min).
|
|
2016
|
+
// TUI tasks are reverted after 30min (hardcoded in watchdog).
|
|
2017
|
+
// This Promise.race adds 60s grace above the configured timeout.
|
|
2018
|
+
const TASK_DISPATCH_TIMEOUT_MS = (_daemonConfig.taskTimeoutMs ?? 1800000) + 60000; // taskTimeout + 60s grace
|
|
1822
2019
|
// TUI-09: pass pre-built contextBrief so TuiWakeupAdapter.execute() skips its own
|
|
1823
2020
|
// buildContextBrief() call — prevents the expensive double-query on this path.
|
|
1824
2021
|
const taskPromise = adapter.execute(adapterContext, _adapterContextBrief);
|
|
@@ -2245,6 +2442,10 @@ class AgentDispatcher {
|
|
|
2245
2442
|
).catch(err => log('warn', `Failed to clear currentTaskId for ${agentId}: ${err.message}`));
|
|
2246
2443
|
// Pull-dispatch: agent is free — emit AgentReadySignal so next task can be dispatched.
|
|
2247
2444
|
await this._emitAgentReadySignal(agentId);
|
|
2445
|
+
// DSP-01: push task.updated SSE event for the direct-spawn completion path.
|
|
2446
|
+
// Uses this._daemonBroadcast (closure passed at construction) — NOT daemon._broadcast
|
|
2447
|
+
// which is not in scope inside AgentDispatcher.
|
|
2448
|
+
this._daemonBroadcast?.({ type: 'task.updated', taskId, status: newStatus, companyId: this.companyId });
|
|
2248
2449
|
log('info', `Task ${taskId} ${newStatus} (exit code: ${code})`);
|
|
2249
2450
|
} catch (err) {
|
|
2250
2451
|
log('warn', `Failed to update task ${taskId} completion: ${err.message}`);
|
|
@@ -2604,10 +2805,15 @@ class CostEventSyncer {
|
|
|
2604
2805
|
}
|
|
2605
2806
|
|
|
2606
2807
|
class TaskCompletionWatchdog {
|
|
2607
|
-
constructor(mgQuery, companyId) {
|
|
2808
|
+
constructor(mgQuery, companyId, taskTimeoutMs = 1800000) {
|
|
2608
2809
|
if (!companyId) throw new Error('TaskCompletionWatchdog: companyId required');
|
|
2609
2810
|
this.mg = mgQuery;
|
|
2610
2811
|
this.companyId = companyId;
|
|
2812
|
+
// Build ISO 8601 duration string for Cypher — passed as a parameter to avoid
|
|
2813
|
+
// hardcoded duration literals. 1800000ms → "PT1800S" (30 minutes default).
|
|
2814
|
+
// This is the non-TUI task timeout; TUI tasks always use the hardcoded PT30M path.
|
|
2815
|
+
this._nonTuiTimeoutDuration = `PT${Math.floor(taskTimeoutMs / 1000)}S`;
|
|
2816
|
+
this._nonTuiTimeoutMins = Math.round(taskTimeoutMs / 60000);
|
|
2611
2817
|
}
|
|
2612
2818
|
|
|
2613
2819
|
/**
|
|
@@ -2615,7 +2821,11 @@ class TaskCompletionWatchdog {
|
|
|
2615
2821
|
* Tournament winner: Candidate C — consistent with andon-tier1.js pattern,
|
|
2616
2822
|
* Memgraph-safe, no MERGE key uniqueness dependency.
|
|
2617
2823
|
*/
|
|
2618
|
-
|
|
2824
|
+
async _emitTaskTimeoutSignal(taskId, agentId) {
|
|
2825
|
+
// Build message in JavaScript before the Cypher call — avoids nested backtick
|
|
2826
|
+
// template literals (the outer Cypher string is a backtick; inner backtick would
|
|
2827
|
+
// close it prematurely). Passing as $message param is cleaner and correct.
|
|
2828
|
+
const _timeoutMsg = `Task exceeded ${this._nonTuiTimeoutMins}-minute completion timeout — reverted to todo`;
|
|
2619
2829
|
try {
|
|
2620
2830
|
await this.mg(
|
|
2621
2831
|
`OPTIONAL MATCH (existing:AnomalySignal {taskId: $taskId, signalType: 'TASK_TIMEOUT', status: 'open'})
|
|
@@ -2630,12 +2840,12 @@ class TaskCompletionWatchdog {
|
|
|
2630
2840
|
source: 'watchdog',
|
|
2631
2841
|
signalType: 'TASK_TIMEOUT',
|
|
2632
2842
|
severity: 'P1',
|
|
2633
|
-
|
|
2843
|
+
message: $message,
|
|
2634
2844
|
detectedAt: datetime(),
|
|
2635
2845
|
status: 'open'
|
|
2636
2846
|
})`,
|
|
2637
|
-
{ taskId, agentId: agentId ?? 'unknown', cid: this.companyId }
|
|
2638
|
-
);
|
|
2847
|
+
{ taskId, agentId: agentId ?? 'unknown', cid: this.companyId, message: _timeoutMsg }
|
|
2848
|
+
);
|
|
2639
2849
|
} catch (err) {
|
|
2640
2850
|
// Log at warn — a silent failure here leaves the Andon board stale.
|
|
2641
2851
|
const msg = JSON.stringify({ ts: new Date().toISOString(), level: 'warn', module: 'TaskCompletionWatchdog', msg: `TASK_TIMEOUT AnomalySignal write failed for ${taskId}: ${err.message}` });
|
|
@@ -2648,10 +2858,10 @@ class TaskCompletionWatchdog {
|
|
|
2648
2858
|
// TUI tasks can take >5 min (model probe + execution). Reverting them causes infinite cycling.
|
|
2649
2859
|
const stale = await this.mg(
|
|
2650
2860
|
`MATCH (t:Task {status: 'in_progress', companyId: $cid})
|
|
2651
|
-
WHERE t.executionLockedAt < datetime() - duration(
|
|
2861
|
+
WHERE t.executionLockedAt < datetime() - duration($timeout)
|
|
2652
2862
|
AND (t.dispatchedViaTUI IS NULL OR t.dispatchedViaTUI = false)
|
|
2653
2863
|
RETURN t.id, t.title, t.executionAgentId`,
|
|
2654
|
-
{ cid: this.companyId }
|
|
2864
|
+
{ cid: this.companyId, timeout: this._nonTuiTimeoutDuration }
|
|
2655
2865
|
);
|
|
2656
2866
|
|
|
2657
2867
|
const rows = stale?.rows ?? [];
|
|
@@ -2669,10 +2879,14 @@ class TaskCompletionWatchdog {
|
|
|
2669
2879
|
{ taskId }
|
|
2670
2880
|
);
|
|
2671
2881
|
|
|
2882
|
+
// Sync SQLite fallback store — keeps hbo-core.db consistent when Memgraph is unavailable.
|
|
2883
|
+
// Memgraph is primary; SQLite is the fallback path only (AgentDispatcher line 1724).
|
|
2884
|
+
try { hboStore.updateTask(taskId, this.companyId, { status: 'todo', executionLockedAt: null, executionAgentId: null }); } catch (_) {}
|
|
2885
|
+
|
|
2672
2886
|
// Preserve PDSACycle with abandon decision — losing the PDSA context on
|
|
2673
2887
|
// revert would break the learning loop. Mark as abandoned so PDSACompletion
|
|
2674
2888
|
// doesn't re-process it and KnowledgeAsset extraction is skipped correctly.
|
|
2675
|
-
await this.
|
|
2889
|
+
await this.mg(
|
|
2676
2890
|
`MATCH (p:PDSACycle {taskId: $taskId})
|
|
2677
2891
|
WHERE p.actDecision = 'iterate' OR p.actDecision IS NULL
|
|
2678
2892
|
SET p.actDecision = 'abandon',
|
|
@@ -2691,7 +2905,28 @@ class TaskCompletionWatchdog {
|
|
|
2691
2905
|
// OPTIONAL MATCH dedup guard prevents duplicate signals for the same task.
|
|
2692
2906
|
await this._emitTaskTimeoutSignal(taskId, agentId);
|
|
2693
2907
|
|
|
2694
|
-
log('warn', `Task ${taskId} timed out after
|
|
2908
|
+
log('warn', `Task ${taskId} timed out after ${this._nonTuiTimeoutMins}min — reverting to todo`);
|
|
2909
|
+
|
|
2910
|
+
// Re-emit AgentReadySignal so the agent can be dispatched again on the next tick.
|
|
2911
|
+
// Without this, the agent stays idle forever after a timeout event.
|
|
2912
|
+
// Uses the same idempotent OPTIONAL MATCH pattern as AgentDispatcher._emitAgentReadySignal.
|
|
2913
|
+
if (agentId) {
|
|
2914
|
+
await this.mg(
|
|
2915
|
+
`MATCH (a:BusinessAgent {id: $agentId, companyId: $cid, status: 'active'})
|
|
2916
|
+
OPTIONAL MATCH (existing:AgentReadySignal {agentId: $agentId, companyId: $cid, status: 'pending'})
|
|
2917
|
+
WITH a, existing
|
|
2918
|
+
WHERE existing IS NULL
|
|
2919
|
+
CREATE (s:AgentReadySignal {
|
|
2920
|
+
id: randomUUID(),
|
|
2921
|
+
agentId: $agentId,
|
|
2922
|
+
companyId: $cid,
|
|
2923
|
+
status: 'pending',
|
|
2924
|
+
claimedBy: null,
|
|
2925
|
+
createdAt: datetime()
|
|
2926
|
+
})`,
|
|
2927
|
+
{ agentId, cid: this.companyId }
|
|
2928
|
+
).catch(() => {});
|
|
2929
|
+
}
|
|
2695
2930
|
}
|
|
2696
2931
|
|
|
2697
2932
|
// Timeout TUI-dispatched tasks stuck >30min
|
|
@@ -2710,8 +2945,11 @@ class TaskCompletionWatchdog {
|
|
|
2710
2945
|
`MATCH (t:Task {id: $taskId}) SET t.status = 'todo', t.executionLockedAt = null, t.executionAgentId = null, t.dispatchedViaTUI = null, t.heliosRunId = null, t.timedOutAt = datetime() RETURN t.id`,
|
|
2711
2946
|
{ taskId }
|
|
2712
2947
|
);
|
|
2948
|
+
|
|
2949
|
+
// Sync SQLite fallback store — also null dispatchedViaTUI and heliosRunId for TUI tasks.
|
|
2950
|
+
try { hboStore.updateTask(taskId, this.companyId, { status: 'todo', executionLockedAt: null, executionAgentId: null, dispatchedViaTUI: null, heliosRunId: null }); } catch (_) {}
|
|
2713
2951
|
// Preserve PDSACycle with abandon decision for TUI-timed-out tasks.
|
|
2714
|
-
await this.
|
|
2952
|
+
await this.mg(
|
|
2715
2953
|
`MATCH (p:PDSACycle {taskId: $taskId})
|
|
2716
2954
|
WHERE p.actDecision = 'iterate' OR p.actDecision IS NULL
|
|
2717
2955
|
SET p.actDecision = 'abandon',
|
|
@@ -2723,6 +2961,25 @@ class TaskCompletionWatchdog {
|
|
|
2723
2961
|
// Also emit for TUI-timed-out tasks (30-min threshold variant).
|
|
2724
2962
|
await this._emitTaskTimeoutSignal(taskId, agentId);
|
|
2725
2963
|
log('warn', `TaskCompletionWatchdog: TUI task ${taskId} timed out after 30min — reverting to todo`);
|
|
2964
|
+
|
|
2965
|
+
// Re-emit AgentReadySignal so the agent can be dispatched again on the next tick.
|
|
2966
|
+
if (agentId) {
|
|
2967
|
+
await this.mg(
|
|
2968
|
+
`MATCH (a:BusinessAgent {id: $agentId, companyId: $cid, status: 'active'})
|
|
2969
|
+
OPTIONAL MATCH (existing:AgentReadySignal {agentId: $agentId, companyId: $cid, status: 'pending'})
|
|
2970
|
+
WITH a, existing
|
|
2971
|
+
WHERE existing IS NULL
|
|
2972
|
+
CREATE (s:AgentReadySignal {
|
|
2973
|
+
id: randomUUID(),
|
|
2974
|
+
agentId: $agentId,
|
|
2975
|
+
companyId: $cid,
|
|
2976
|
+
status: 'pending',
|
|
2977
|
+
claimedBy: null,
|
|
2978
|
+
createdAt: datetime()
|
|
2979
|
+
})`,
|
|
2980
|
+
{ agentId, cid: this.companyId }
|
|
2981
|
+
).catch(() => {});
|
|
2982
|
+
}
|
|
2726
2983
|
}
|
|
2727
2984
|
}
|
|
2728
2985
|
}
|
|
@@ -2733,10 +2990,15 @@ class TaskCompletionWatchdog {
|
|
|
2733
2990
|
// Reverted twice (418b606f, 1bf902d0) — protected by __tests__/tui-integration.test.js
|
|
2734
2991
|
// See: .sisyphus/plans/talisman-daemon-wave8.md (EC-03)
|
|
2735
2992
|
class RunCompletionPoller {
|
|
2736
|
-
|
|
2993
|
+
// broadcastFn: optional closure — use (...args) => daemon._broadcast?.(...args) so the
|
|
2994
|
+
// live SSE function is resolved at call time (not at construction time, when it is still null).
|
|
2995
|
+
// completionProcessor: optional TaskCompletionProcessor instance for post-completion pipeline.
|
|
2996
|
+
constructor(mgQuery, companyId, broadcastFn, completionProcessor) {
|
|
2737
2997
|
if (!companyId) throw new Error('RunCompletionPoller: companyId required');
|
|
2738
2998
|
this.mg = mgQuery;
|
|
2739
2999
|
this.companyId = companyId;
|
|
3000
|
+
this._broadcastFn = typeof broadcastFn === 'function' ? broadcastFn : null;
|
|
3001
|
+
this._cp = completionProcessor ?? null;
|
|
2740
3002
|
}
|
|
2741
3003
|
|
|
2742
3004
|
async poll() {
|
|
@@ -2789,6 +3051,8 @@ class RunCompletionPoller {
|
|
|
2789
3051
|
const newStatus = (run.status === 'completed' || run.status === 'succeeded') ? 'done' : 'failed';
|
|
2790
3052
|
// FINDING-2 fix: criticalOp wrapper — revert to todo if markDone write fails,
|
|
2791
3053
|
// then re-emit AgentReadySignal so the agent is not permanently locked.
|
|
3054
|
+
// markDoneOk tracks success so downstream pipeline and broadcast are skipped on revert.
|
|
3055
|
+
let markDoneOk = true;
|
|
2792
3056
|
await criticalOp(
|
|
2793
3057
|
() => this.mg(
|
|
2794
3058
|
`MATCH (t:Task {id: $taskId})
|
|
@@ -2798,6 +3062,7 @@ class RunCompletionPoller {
|
|
|
2798
3062
|
),
|
|
2799
3063
|
{ module: 'RunCompletionPoller', operation: 'markDone', taskId, companyId: this.companyId }
|
|
2800
3064
|
).catch(async (markDoneErr) => {
|
|
3065
|
+
markDoneOk = false;
|
|
2801
3066
|
log('warn', `RunCompletionPoller: markDone failed for ${taskId} — reverting to todo: ${markDoneErr.message}`);
|
|
2802
3067
|
await this.mg(
|
|
2803
3068
|
`MATCH (t:Task {id: $taskId}) SET t.status = 'todo', t.executionLockedAt = null, t.executionAgentId = null, t.dispatchedViaTUI = null, t.heliosRunId = null`,
|
|
@@ -2805,11 +3070,26 @@ class RunCompletionPoller {
|
|
|
2805
3070
|
).catch(revertErr => log('warn', `RunCompletionPoller: failed reverting to todo for ${taskId}: ${revertErr.message}`));
|
|
2806
3071
|
await this._emitAgentReadySignal(agentId).catch(() => {});
|
|
2807
3072
|
});
|
|
3073
|
+
// If markDone was reverted, skip completion pipeline and broadcast — task is back in todo.
|
|
3074
|
+
if (!markDoneOk) continue;
|
|
2808
3075
|
await this.mg(
|
|
2809
3076
|
`MATCH (a:BusinessAgent {id: $agentId, companyId: $cid}) SET a.lastHeartbeatAt = toInteger(timestamp() / 1000)`,
|
|
2810
3077
|
{ agentId, cid: this.companyId }
|
|
2811
3078
|
).catch(err => log('warn', `RunCompletionPoller: failed to update heartbeat for ${agentId}: ${err.message}`));
|
|
2812
3079
|
log('info', `Task ${taskId} ${newStatus} (TUI run ${runId} → ${run.status})`);
|
|
3080
|
+
// RCP-01: run completion pipeline (TaskResult, OKR progress, CRM, PDSA) for TUI-completed tasks.
|
|
3081
|
+
if (newStatus === 'done' && this._cp) {
|
|
3082
|
+
this._cp.process(taskId, run.summary ?? '', {
|
|
3083
|
+
agentId,
|
|
3084
|
+
companyId: this.companyId,
|
|
3085
|
+
originKind: run.originKind ?? 'tui_wakeup',
|
|
3086
|
+
exitCode: 0,
|
|
3087
|
+
}).catch(e => log('warn', `RunCompletionPoller: completionProcessor failed for ${taskId}: ${e.message}`));
|
|
3088
|
+
}
|
|
3089
|
+
// RCP-02: push task.updated SSE event so the desktop UI updates reactively without polling.
|
|
3090
|
+
// _broadcastFn is a closure ((...args) => this._broadcast?.(...args)) set at start() time,
|
|
3091
|
+
// so it correctly resolves the live broadcast reference even if SSE server started after construction.
|
|
3092
|
+
this._broadcastFn?.({ type: 'task.updated', taskId, status: newStatus, companyId: this.companyId });
|
|
2813
3093
|
// TUI-01: emit AgentReadySignal so the agent picks up the next task immediately.
|
|
2814
3094
|
await this._emitAgentReadySignal(agentId).catch(() => {});
|
|
2815
3095
|
} catch (e) {
|
|
@@ -2868,12 +3148,26 @@ class ApprovalWatcher {
|
|
|
2868
3148
|
async check() {
|
|
2869
3149
|
let approved;
|
|
2870
3150
|
try {
|
|
2871
|
-
|
|
2872
|
-
|
|
2873
|
-
|
|
2874
|
-
|
|
2875
|
-
|
|
2876
|
-
|
|
3151
|
+
// Memgraph primary — SQLite fallback on unavailability
|
|
3152
|
+
try {
|
|
3153
|
+
approved = await this.mg(
|
|
3154
|
+
`MATCH (a:Approval {companyId: $cid, status: 'approved'})
|
|
3155
|
+
WHERE a.followUpTaskCreated IS NULL OR a.followUpTaskCreated = false
|
|
3156
|
+
RETURN a.id, a.title, a.requestedBy, a.type, a.strategyId`,
|
|
3157
|
+
{ cid: this.companyId }
|
|
3158
|
+
);
|
|
3159
|
+
} catch (mgErr) {
|
|
3160
|
+
// Memgraph unavailable — fall back to SQLite approvals
|
|
3161
|
+
if (hboStore.getApprovalsByCompanyStatus) {
|
|
3162
|
+
const storeApprovals = hboStore.getApprovalsByCompanyStatus(this.companyId, 'approved')
|
|
3163
|
+
.filter(a => !a.followUpTaskCreated);
|
|
3164
|
+
approved = {
|
|
3165
|
+
rows: storeApprovals.map(a => [a.id, a.title, a.requestedBy, a.type, a.strategyId ?? null]),
|
|
3166
|
+
keys: ['a.id', 'a.title', 'a.requestedBy', 'a.type', 'a.strategyId'],
|
|
3167
|
+
};
|
|
3168
|
+
log('info', `ApprovalWatcher: using SQLite fallback for approval lookup (Memgraph unavailable): ${mgErr.message}`);
|
|
3169
|
+
}
|
|
3170
|
+
}
|
|
2877
3171
|
} catch (err) {
|
|
2878
3172
|
log('warn', `ApprovalWatcher: query failed: ${err.message}`);
|
|
2879
3173
|
return;
|
|
@@ -2891,41 +3185,55 @@ class ApprovalWatcher {
|
|
|
2891
3185
|
const requestedBy = approval['a.requestedBy'] ?? 'agent:ceo';
|
|
2892
3186
|
const approvalType = approval['a.type'];
|
|
2893
3187
|
const taskId = `task:approval-followup:${approvalId}:${randomUUID()}`;
|
|
2894
|
-
|
|
2895
3188
|
try {
|
|
2896
3189
|
if (approvalType === 'budget_exceeded') {
|
|
2897
|
-
|
|
3190
|
+
// Non-blocking Memgraph side-effects — fire-and-forget so Memgraph downtime
|
|
3191
|
+
// does not prevent SQLite writes (task creation + approval update) below.
|
|
3192
|
+
const _mgThis = this.mg.bind(this);
|
|
3193
|
+
setImmediate(() => _mgThis(
|
|
2898
3194
|
`MATCH (bi:BudgetIncident {approvalId: $apId}) SET bi.status = 'resolved', bi.resolvedAt = datetime()`,
|
|
2899
3195
|
{ apId: approvalId }
|
|
2900
|
-
);
|
|
2901
|
-
|
|
3196
|
+
).catch(e => log('warn', `ApprovalWatcher: BudgetIncident resolve projection failed: ${e.message}`)));
|
|
3197
|
+
setImmediate(() => _mgThis(
|
|
2902
3198
|
`MATCH (a:BusinessAgent {id: $agentId, companyId: $cid}) WHERE a.pauseReason IN ['budget_exceeded', 'budget_exceeded_global'] SET a.status = 'active', a.pauseReason = null, a.resumedAt = datetime()`,
|
|
2903
3199
|
{ agentId: requestedBy, cid: this.companyId }
|
|
2904
|
-
);
|
|
3200
|
+
).catch(e => log('warn', `ApprovalWatcher: agent resume projection failed: ${e.message}`)));
|
|
2905
3201
|
log('info', `ApprovalWatcher: budget approval resolved — agent ${requestedBy} resumed`);
|
|
2906
3202
|
}
|
|
2907
3203
|
|
|
2908
3204
|
if (approvalType === 'strategy_proposal') {
|
|
2909
3205
|
const strategyId = approval['a.strategyId'];
|
|
2910
3206
|
if (strategyId) {
|
|
2911
|
-
|
|
3207
|
+
setImmediate(() => this.mg(
|
|
2912
3208
|
`MATCH (s:Strategy {id: $strategyId}) SET s.status = 'approved', s.approvedAt = datetime()`,
|
|
2913
3209
|
{ strategyId }
|
|
2914
|
-
);
|
|
3210
|
+
).catch(e => log('warn', `ApprovalWatcher: strategy approve projection failed: ${e.message}`)));
|
|
2915
3211
|
log('info', `ApprovalWatcher: strategy ${strategyId} approved`);
|
|
2916
3212
|
}
|
|
2917
|
-
}
|
|
3213
|
+
}
|
|
2918
3214
|
|
|
2919
|
-
|
|
3215
|
+
// SQLite-first task create (P2-5)
|
|
3216
|
+
try {
|
|
3217
|
+
hboStore.createTask({
|
|
3218
|
+
id: taskId, title: `Approval resolved: ${title}. Execute the approved plan.`,
|
|
3219
|
+
status: 'todo', assigneeAgentId: requestedBy, companyId: this.companyId,
|
|
3220
|
+
originKind: 'approval_resolved', approvalId, progressPropagated: false, createdAt: Date.now(),
|
|
3221
|
+
});
|
|
3222
|
+
} catch (_) {}
|
|
3223
|
+
// Non-blocking Memgraph projection (fire-and-forget)
|
|
3224
|
+
setImmediate(() => this.mg(
|
|
2920
3225
|
`MERGE (t:Task {id: $taskId}) SET t.title = $title, t.status = 'todo',
|
|
2921
3226
|
t.assigneeAgentId = $agentId, t.companyId = $cid, t.originKind = 'approval_resolved',
|
|
2922
3227
|
t.approvalId = $approvalId, t.progressPropagated = false, t.createdAt = datetime()`,
|
|
2923
3228
|
{ taskId, title: `Approval resolved: ${title}. Execute the approved plan.`, agentId: requestedBy, cid: this.companyId, approvalId }
|
|
2924
|
-
);
|
|
2925
|
-
|
|
3229
|
+
).catch(e => log('warn', `[daemon] Memgraph Task projection failed (non-fatal): ${e.message}`)));
|
|
3230
|
+
// SQLite-first approval update (P2-5)
|
|
3231
|
+
try { hboStore.updateApproval(approvalId, this.companyId, { followUpTaskCreated: true }); } catch (_) {}
|
|
3232
|
+
// Non-blocking Memgraph projection (fire-and-forget)
|
|
3233
|
+
setImmediate(() => this.mg(
|
|
2926
3234
|
`MATCH (a:Approval {id: $approvalId}) SET a.followUpTaskCreated = true`,
|
|
2927
3235
|
{ approvalId }
|
|
2928
|
-
);
|
|
3236
|
+
).catch(e => log('warn', `[daemon] Memgraph Approval projection failed (non-fatal): ${e.message}`)));
|
|
2929
3237
|
log('info', `ApprovalWatcher: created follow-up task ${taskId} for approval ${approvalId}`);
|
|
2930
3238
|
} catch (err) {
|
|
2931
3239
|
log('warn', `ApprovalWatcher: failed to create follow-up for ${approvalId}: ${err.message}`);
|
|
@@ -2952,7 +3260,7 @@ class ApprovalWatcher {
|
|
|
2952
3260
|
function buildForCompany(companyId, mgQueryAsync, opts) {
|
|
2953
3261
|
if (!companyId) throw new Error('buildForCompany: companyId required');
|
|
2954
3262
|
if (!mgQueryAsync) throw new Error('buildForCompany: mgQueryAsync required');
|
|
2955
|
-
const { rpcAdapter, companyConfig = null } = opts || {};
|
|
3263
|
+
const { rpcAdapter, companyConfig = null, broadcast = null } = opts || {};
|
|
2956
3264
|
const cid = companyId;
|
|
2957
3265
|
|
|
2958
3266
|
// Helper: safe require + construct, non-fatal
|
|
@@ -2983,10 +3291,10 @@ function buildForCompany(companyId, mgQueryAsync, opts) {
|
|
|
2983
3291
|
try { mods.costEventSyncer = new CostEventSyncer(mgQueryAsync, cid); }
|
|
2984
3292
|
catch (e) { log('warn', `[module-factory] CostEventSyncer init failed for ${cid}: ${e.message}`); mods.costEventSyncer = null; }
|
|
2985
3293
|
|
|
2986
|
-
try { mods.taskCompletionWatchdog = new TaskCompletionWatchdog(mgQueryAsync, cid); }
|
|
3294
|
+
try { mods.taskCompletionWatchdog = new TaskCompletionWatchdog(mgQueryAsync, cid, _daemonConfig.taskTimeoutMs ?? 1800000); }
|
|
2987
3295
|
catch (e) { log('warn', `[module-factory] TaskCompletionWatchdog init failed for ${cid}: ${e.message}`); mods.taskCompletionWatchdog = null; }
|
|
2988
3296
|
|
|
2989
|
-
try { mods.runCompletionPoller = new RunCompletionPoller(mgQueryAsync, cid); }
|
|
3297
|
+
try { mods.runCompletionPoller = new RunCompletionPoller(mgQueryAsync, cid, typeof broadcast === 'function' ? (...args) => broadcast(...args) : null, new TaskCompletionProcessor({ mgQuery: mgQueryAsync })); }
|
|
2990
3298
|
catch (e) { log('warn', `[module-factory] RunCompletionPoller init failed for ${cid}: ${e.message}`); mods.runCompletionPoller = null; }
|
|
2991
3299
|
|
|
2992
3300
|
try { mods.activityLogger = new ActivityLogger(mgQueryAsync, cid); }
|
|
@@ -3052,9 +3360,13 @@ function buildForCompany(companyId, mgQueryAsync, opts) {
|
|
|
3052
3360
|
mods.sacrificeDeclaration = safeNew('SacrificeDeclaration', './lib/harada/sacrifice-declaration', 'SacrificeDeclaration', mgQueryAsync, cid);
|
|
3053
3361
|
|
|
3054
3362
|
try {
|
|
3363
|
+
// MirrorPatternScan runs on a P7D (weekly) wall-clock schedule via WallClockScheduler
|
|
3364
|
+
// in addition to the per-10-tasks trigger below. Weekly cadence ensures agents who
|
|
3365
|
+
// complete few tasks still receive periodic mirror feedback.
|
|
3055
3366
|
const { KataSessionPrompt, MasteryCheck, MirrorPatternScan } = require('./lib/harada/sensei');
|
|
3056
3367
|
mods.kataSessionPrompt = new KataSessionPrompt(mgQueryAsync, cid);
|
|
3057
3368
|
mods.masteryCheck = new MasteryCheck(mgQueryAsync, cid);
|
|
3369
|
+
// P7D weekly wall-clock guard: MirrorPatternScan is also scheduled weekly via WallClockScheduler
|
|
3058
3370
|
mods.mirrorPatternScan = new MirrorPatternScan(mgQueryAsync, cid);
|
|
3059
3371
|
} catch (e) {
|
|
3060
3372
|
log('warn', `[module-factory] Harada Sensei init failed for ${cid}: ${e.message}`);
|
|
@@ -3210,7 +3522,7 @@ class HeliosCompanyDaemon {
|
|
|
3210
3522
|
const cid = cfg.company?.id || cfg.companyName;
|
|
3211
3523
|
if (!cid) continue;
|
|
3212
3524
|
try {
|
|
3213
|
-
const mods = buildForCompany(cid, this._mgQueryAsync.bind(this), { rpcAdapter: this._rpcAdapter, companyConfig: cfg });
|
|
3525
|
+
const mods = buildForCompany(cid, this._mgQueryAsync.bind(this), { rpcAdapter: this._rpcAdapter, companyConfig: cfg, broadcast: (...args) => this._broadcast?.(...args) });
|
|
3214
3526
|
this._modulesByCompany.set(cid, mods);
|
|
3215
3527
|
log('info', `_initModules: per-company modules built for '${cid}'`);
|
|
3216
3528
|
} catch (e) {
|
|
@@ -3255,9 +3567,62 @@ class HeliosCompanyDaemon {
|
|
|
3255
3567
|
const mods = buildForCompany(cid, this._mgQueryAsync.bind(this), {
|
|
3256
3568
|
rpcAdapter: this._rpcAdapter,
|
|
3257
3569
|
companyConfig: null, // not available for runtime-added companies
|
|
3570
|
+
broadcast: (...args) => this._broadcast?.(...args),
|
|
3258
3571
|
});
|
|
3259
3572
|
this._modulesByCompany.set(cid, mods);
|
|
3260
3573
|
log('info', `[registerCompany] per-company modules live for '${cid}'`);
|
|
3574
|
+
|
|
3575
|
+
// Wire the SSE broadcast function into HBOBridge now that the API server is running.
|
|
3576
|
+
// buildForCompany passes companyConfig (null) as the 4th arg to HBOBridge — not the
|
|
3577
|
+
// broadcast function — so _bc is null after construction. setBroadcast() sets it here,
|
|
3578
|
+
// enabling wizard:pillars_ready and other real-time events to reach the desktop.
|
|
3579
|
+
if (mods.hboBridge && typeof mods.hboBridge.setBroadcast === 'function' && typeof this._broadcast === 'function') {
|
|
3580
|
+
mods.hboBridge.setBroadcast(this._broadcast);
|
|
3581
|
+
log('info', `[registerCompany] HBOBridge broadcast wired for '${cid}'`);
|
|
3582
|
+
}
|
|
3583
|
+
|
|
3584
|
+
// Trigger GoalPillar creation immediately after modules are registered.
|
|
3585
|
+
// tickGoalDecompose creates the 8 GoalPillar + 64 ActionCell nodes via MandalaManager.
|
|
3586
|
+
// This must run AFTER _modulesByCompany.set() so mods.hboBridge is available.
|
|
3587
|
+
// This is the correct point to run it — after wizard completes and registerCompany is
|
|
3588
|
+
// called from the desktop, _modulesByCompany is guaranteed to have this company's entry.
|
|
3589
|
+
if (mods.hboBridge && typeof mods.hboBridge.tickGoalDecompose === 'function') {
|
|
3590
|
+
setImmediate(async () => {
|
|
3591
|
+
try {
|
|
3592
|
+
await mods.hboBridge.tickGoalDecompose({ fromWizard: true });
|
|
3593
|
+
log('info', `[registerCompany] tickGoalDecompose complete for '${cid}'`);
|
|
3594
|
+
// Emit wizard:pillars_ready via SSE bridge if broadcast is wired.
|
|
3595
|
+
if (typeof this._broadcast === 'function') {
|
|
3596
|
+
this._broadcast({
|
|
3597
|
+
type: 'wizard:pillars_ready',
|
|
3598
|
+
companyId: cid,
|
|
3599
|
+
pillarCount: 8,
|
|
3600
|
+
ts: Date.now(),
|
|
3601
|
+
});
|
|
3602
|
+
} else {
|
|
3603
|
+
// D1: _broadcast may not yet be wired if API server started after registerCompany
|
|
3604
|
+
// (startup race condition). Retry once after 1s before giving up.
|
|
3605
|
+
log('warn', `[registerCompany] wizard:pillars_ready not broadcast for '${cid}' — _broadcast not yet wired, retrying in 1s`);
|
|
3606
|
+
const self = this;
|
|
3607
|
+
setTimeout(() => {
|
|
3608
|
+
if (typeof self._broadcast === 'function') {
|
|
3609
|
+
self._broadcast({
|
|
3610
|
+
type: 'wizard:pillars_ready',
|
|
3611
|
+
companyId: cid,
|
|
3612
|
+
pillarCount: 8,
|
|
3613
|
+
ts: Date.now(),
|
|
3614
|
+
});
|
|
3615
|
+
log('info', `[registerCompany] wizard:pillars_ready broadcast on retry for '${cid}'`);
|
|
3616
|
+
} else {
|
|
3617
|
+
log('warn', `[registerCompany] wizard:pillars_ready not broadcast after retry for '${cid}' — SSE events will be missed`);
|
|
3618
|
+
}
|
|
3619
|
+
}, 1000);
|
|
3620
|
+
}
|
|
3621
|
+
} catch (e) {
|
|
3622
|
+
log('warn', `[registerCompany] tickGoalDecompose failed for '${cid}' (non-fatal): ${e.message}`);
|
|
3623
|
+
}
|
|
3624
|
+
});
|
|
3625
|
+
}
|
|
3261
3626
|
} catch (e) {
|
|
3262
3627
|
log('warn', `[registerCompany] buildForCompany failed for '${cid}': ${e.message}`);
|
|
3263
3628
|
throw e; // surface to the HTTP handler — caller can retry
|
|
@@ -4035,10 +4400,30 @@ class HeliosCompanyDaemon {
|
|
|
4035
4400
|
// CRITICAL-3 fix: wait for Memgraph to be reachable before running migrations.
|
|
4036
4401
|
// Without this, PM2 can restart the daemon before Memgraph is up post-OOM,
|
|
4037
4402
|
// and all migrations + MAGE backfill silently fail, leaving the graph in a partial state.
|
|
4038
|
-
await this._waitForMemgraph();
|
|
4039
|
-
await this._connectMemgraph();
|
|
4040
|
-
|
|
4041
|
-
|
|
4403
|
+
await this._waitForMemgraph();
|
|
4404
|
+
await this._connectMemgraph();
|
|
4405
|
+
|
|
4406
|
+
try {
|
|
4407
|
+
const replay = await graphWal.replayPending(this._mgQueryAsync.bind(this));
|
|
4408
|
+
log('info', 'WAL replay complete', replay);
|
|
4409
|
+
} catch (err) {
|
|
4410
|
+
log('error', 'WAL replay failed', { error: err.message });
|
|
4411
|
+
}
|
|
4412
|
+
|
|
4413
|
+
const migrateScript = path.join(DAEMON_DIR, 'db', 'hbo-core-migrate.js');
|
|
4414
|
+
execFile(process.execPath, ['--experimental-sqlite', migrateScript], {
|
|
4415
|
+
cwd: HELIOS_ROOT,
|
|
4416
|
+
timeout: 60_000,
|
|
4417
|
+
env: process.env,
|
|
4418
|
+
}, (err, stdout, stderr) => {
|
|
4419
|
+
if (err) {
|
|
4420
|
+
log('warn', 'hbo-core reconcile failed', { error: err.message, stderr: String(stderr || '').slice(0, 1000) });
|
|
4421
|
+
return;
|
|
4422
|
+
}
|
|
4423
|
+
log('info', 'hbo-core reconcile complete', { stdout: String(stdout || '').slice(0, 1000) });
|
|
4424
|
+
});
|
|
4425
|
+
|
|
4426
|
+
// ── TZ sanity check ─────────────────────────────────────────────────────────
|
|
4042
4427
|
if (!process.env.TZ || process.env.TZ !== 'UTC') {
|
|
4043
4428
|
log('warn', `[startup] TZ=${process.env.TZ} — recommend TZ=UTC for consistent datetime comparisons. See AGENTS.md.`);
|
|
4044
4429
|
}
|
|
@@ -4165,7 +4550,13 @@ class HeliosCompanyDaemon {
|
|
|
4165
4550
|
try {
|
|
4166
4551
|
const { verifyMemgraphConfig } = require('./lib/memgraph-verify');
|
|
4167
4552
|
const vr = await verifyMemgraphConfig(this._mgQueryAsync.bind(this));
|
|
4168
|
-
|
|
4553
|
+
if (vr.timestampUnit === 'microseconds') {
|
|
4554
|
+
log('info', `Memgraph config verified: ${vr.serverTime} (timestamp unit: ${vr.timestampUnit})`);
|
|
4555
|
+
} else {
|
|
4556
|
+
// Non-fatal: zombie-detection uses toInteger(timestamp()/1000) consistently
|
|
4557
|
+
// on both SET and GET, so the unit cancels out and comparisons remain correct.
|
|
4558
|
+
log('warn', `Memgraph config verified with non-standard timestamp unit: ${vr.timestampUnit} (server: ${vr.serverTime}) — zombie-detection unaffected`);
|
|
4559
|
+
}
|
|
4169
4560
|
} catch (e) {
|
|
4170
4561
|
const msg = String(e.message || e);
|
|
4171
4562
|
// Detect connection-level failures — Memgraph is not running yet
|
|
@@ -4198,24 +4589,31 @@ class HeliosCompanyDaemon {
|
|
|
4198
4589
|
}
|
|
4199
4590
|
}
|
|
4200
4591
|
|
|
4201
|
-
// ──
|
|
4202
|
-
// Starts the
|
|
4203
|
-
//
|
|
4204
|
-
// they reach the
|
|
4205
|
-
//
|
|
4592
|
+
// ── Helios Compression Server ─────────────────────────────────────────────
|
|
4593
|
+
// Starts the TypeScript compression sidecar (lib/compression/server.ts).
|
|
4594
|
+
// Compresses tool outputs, HEMA recall payloads, and HBO API responses
|
|
4595
|
+
// before they reach the LLM — reducing token cost by 40–85%.
|
|
4596
|
+
//
|
|
4597
|
+
// Non-fatal: if the server fails to start, the daemon continues without
|
|
4598
|
+
// compression. All compression call sites check getBaseUrl() === null and
|
|
4599
|
+
// skip compression gracefully. HBO still works, agents still function.
|
|
4600
|
+
// This matches the principle: compression improves the system but is not
|
|
4601
|
+
// a correctness dependency — every company's data remains accessible.
|
|
4206
4602
|
//
|
|
4207
|
-
// Auto-restarts on mid-session crash
|
|
4208
|
-
// getBaseUrl() is read at call time by all LLM path wrappers — they pick up
|
|
4209
|
-
// the new URL automatically after a proxy restart.
|
|
4603
|
+
// Auto-restarts on mid-session crash with exponential backoff.
|
|
4210
4604
|
try {
|
|
4211
4605
|
const { HeadroomProxyManager } = require('./lib/headroom-proxy-manager');
|
|
4212
4606
|
this._headroomProxy = HeadroomProxyManager.getInstance();
|
|
4213
4607
|
const hr = await this._headroomProxy.start();
|
|
4214
|
-
log('info', `
|
|
4608
|
+
log('info', `Helios Compression Server ready: ${hr.baseUrl}`);
|
|
4215
4609
|
} catch (e) {
|
|
4216
|
-
log('
|
|
4217
|
-
|
|
4218
|
-
|
|
4610
|
+
log('warn',
|
|
4611
|
+
`Helios Compression Server failed to start — daemon continues without compression.\n` +
|
|
4612
|
+
`Token costs will be higher until the server is fixed and the daemon restarts.\n` +
|
|
4613
|
+
`Error: ${e.message}`
|
|
4614
|
+
);
|
|
4615
|
+
// Do NOT exit — compression is an optimization, not a correctness dependency.
|
|
4616
|
+
// All call sites handle null baseUrl gracefully.
|
|
4219
4617
|
}
|
|
4220
4618
|
|
|
4221
4619
|
// ── Redis maxmemory verification (warn if unset) ──────────────────────────
|
|
@@ -4277,6 +4675,58 @@ class HeliosCompanyDaemon {
|
|
|
4277
4675
|
log('warn', `Daemon-online heartbeat reset failed (non-fatal): ${e.message}`);
|
|
4278
4676
|
}
|
|
4279
4677
|
|
|
4678
|
+
// ── Startup AgentReadySignal sweep — all companies ──────────────────────────
|
|
4679
|
+
// Emits a pending AgentReadySignal for every active agent that has neither
|
|
4680
|
+
// a pending signal nor a current in-progress task.
|
|
4681
|
+
//
|
|
4682
|
+
// Primary path: daemon restarts → signals emitted for idle agents → next tick
|
|
4683
|
+
// dispatches tasks to all active agents → no stranded agents after restart.
|
|
4684
|
+
//
|
|
4685
|
+
// Uses OPTIONAL MATCH ... WHERE existing IS NULL (idempotent, same pattern as
|
|
4686
|
+
// _emitAgentReadySignal). NOT EXISTS subqueries are not supported in Memgraph 3.
|
|
4687
|
+
try {
|
|
4688
|
+
let _sweepEmitted = 0;
|
|
4689
|
+
let _sweepAttempted = 0;
|
|
4690
|
+
for (const cfg of _allCompanyConfigs) {
|
|
4691
|
+
const cid = cfg.company?.id || cfg.companyName;
|
|
4692
|
+
if (!cid) continue;
|
|
4693
|
+
// Find active agents with no pending signal and no in-progress task.
|
|
4694
|
+
const _candidates = await this._mgQueryAsync(
|
|
4695
|
+
`MATCH (a:BusinessAgent {companyId: $cid, status: 'active'})
|
|
4696
|
+
OPTIONAL MATCH (sig:AgentReadySignal {agentId: a.id, companyId: $cid, status: 'pending'})
|
|
4697
|
+
OPTIONAL MATCH (wip:Task {assigneeAgentId: a.id, companyId: $cid})
|
|
4698
|
+
WHERE wip.status IN ['in_progress', 'andon_paused', 'help_pending']
|
|
4699
|
+
WITH a, sig, wip
|
|
4700
|
+
WHERE sig IS NULL AND wip IS NULL
|
|
4701
|
+
RETURN a.id AS agentId`,
|
|
4702
|
+
{ cid }
|
|
4703
|
+
).catch(() => null);
|
|
4704
|
+
for (const row of (_candidates?.rows ?? [])) {
|
|
4705
|
+
const agentId = Array.isArray(row) ? row[0] : row?.agentId;
|
|
4706
|
+
if (!agentId) continue;
|
|
4707
|
+
_sweepAttempted++;
|
|
4708
|
+
await this._mgQueryAsync(
|
|
4709
|
+
`MATCH (a:BusinessAgent {id: $agentId, companyId: $cid, status: 'active'})
|
|
4710
|
+
OPTIONAL MATCH (existing:AgentReadySignal {agentId: $agentId, companyId: $cid, status: 'pending'})
|
|
4711
|
+
WITH a, existing
|
|
4712
|
+
WHERE existing IS NULL
|
|
4713
|
+
CREATE (s:AgentReadySignal {
|
|
4714
|
+
id: randomUUID(),
|
|
4715
|
+
agentId: $agentId,
|
|
4716
|
+
companyId: $cid,
|
|
4717
|
+
status: 'pending',
|
|
4718
|
+
claimedBy: null,
|
|
4719
|
+
createdAt: datetime()
|
|
4720
|
+
})`,
|
|
4721
|
+
{ agentId, cid }
|
|
4722
|
+
).then(() => { _sweepEmitted++; }).catch(() => {});
|
|
4723
|
+
}
|
|
4724
|
+
}
|
|
4725
|
+
log('info', `Startup AgentReadySignal sweep: ${_sweepEmitted}/${_sweepAttempted} signals emitted for ${_allCompanyConfigs.length} company(ies)`);
|
|
4726
|
+
} catch (e) {
|
|
4727
|
+
log('warn', `Startup AgentReadySignal sweep failed (non-fatal): ${e.message}`);
|
|
4728
|
+
}
|
|
4729
|
+
|
|
4280
4730
|
// ── CV-T fix: reset stale in-progress RoutineRun nodes on restart ─────────
|
|
4281
4731
|
// RoutineRun nodes with status 'queued' or 'running' from a previous daemon
|
|
4282
4732
|
// session are permanently stuck — the process that started them is gone.
|
|
@@ -4467,10 +4917,24 @@ class HeliosCompanyDaemon {
|
|
|
4467
4917
|
registry.register('tui_wakeup', new TuiWakeupAdapter(heliosConfig, this._mgQueryAsync.bind(this), _companyConfig.agents ?? []));
|
|
4468
4918
|
registry.register('process', new ProcessAdapter());
|
|
4469
4919
|
|
|
4470
|
-
this._agentDispatcher = new AgentDispatcher(
|
|
4920
|
+
this._agentDispatcher = new AgentDispatcher(
|
|
4921
|
+
this._mgQueryAsync.bind(this),
|
|
4922
|
+
primaryCompanyId,
|
|
4923
|
+
null, // spawnFn
|
|
4924
|
+
null, // _testConfig
|
|
4925
|
+
registry,
|
|
4926
|
+
(...args) => this._broadcast?.(...args) // broadcastFn — closure so live ref resolved at call time
|
|
4927
|
+
);
|
|
4471
4928
|
this._activityLogger = new ActivityLogger(this._mgQueryAsync.bind(this), primaryCompanyId);
|
|
4472
|
-
this._taskCompletionWatchdog = new TaskCompletionWatchdog(this._mgQueryAsync.bind(this), primaryCompanyId);
|
|
4473
|
-
|
|
4929
|
+
this._taskCompletionWatchdog = new TaskCompletionWatchdog(this._mgQueryAsync.bind(this), primaryCompanyId, _daemonConfig.taskTimeoutMs ?? 1800000);
|
|
4930
|
+
// Pass a closure for broadcastFn so it resolves daemon._broadcast at call time (not at construction).
|
|
4931
|
+
// Pass _agentDispatcher._completionProcessor so TUI-completed tasks run the full completion pipeline.
|
|
4932
|
+
this._runCompletionPoller = new RunCompletionPoller(
|
|
4933
|
+
this._mgQueryAsync.bind(this),
|
|
4934
|
+
primaryCompanyId,
|
|
4935
|
+
(...args) => this._broadcast?.(...args),
|
|
4936
|
+
this._agentDispatcher._completionProcessor ?? null
|
|
4937
|
+
);
|
|
4474
4938
|
this._costEventSyncer = new CostEventSyncer(this._mgQueryAsync.bind(this), primaryCompanyId);
|
|
4475
4939
|
this._nodeCleaner = new NodeCleaner(this._mgQueryAsync.bind(this));
|
|
4476
4940
|
this._mageAnalytics = new MageAnalytics(this._mgQueryAsync.bind(this));
|
|
@@ -5190,6 +5654,24 @@ if (require.main === module) {
|
|
|
5190
5654
|
daemon._apiUpdateTick = updateTick;
|
|
5191
5655
|
}
|
|
5192
5656
|
daemon._broadcast = broadcast;
|
|
5657
|
+
|
|
5658
|
+
// Wire the broadcast function into every per-company HBOBridge instance.
|
|
5659
|
+
// buildForCompany() receives companyConfig (not the broadcast fn) as its 4th arg,
|
|
5660
|
+
// so _bc is null after _initModules() runs. We fix that here, immediately after
|
|
5661
|
+
// the API server starts and broadcast becomes available.
|
|
5662
|
+
// Without this, all startup-company HBOBridge instances have _bc = null for
|
|
5663
|
+
// the entire daemon lifetime — no real-time SSE events (approval.created,
|
|
5664
|
+
// wizard:pillars_ready, etc.) can be pushed to connected desktop clients.
|
|
5665
|
+
for (const [, mods] of daemon._modulesByCompany || []) {
|
|
5666
|
+
if (mods?.hboBridge && typeof mods.hboBridge.setBroadcast === 'function') {
|
|
5667
|
+
mods.hboBridge.setBroadcast(broadcast);
|
|
5668
|
+
}
|
|
5669
|
+
}
|
|
5670
|
+
// Also wire the legacy single-company bridge (used when _modulesByCompany is absent).
|
|
5671
|
+
if (daemon._hboBridge && typeof daemon._hboBridge.setBroadcast === 'function') {
|
|
5672
|
+
daemon._hboBridge.setBroadcast(broadcast);
|
|
5673
|
+
}
|
|
5674
|
+
log('info', `[startApi] HBOBridge broadcast wired for ${(daemon._modulesByCompany || new Map()).size} company(ies)`);
|
|
5193
5675
|
const apiPort = _daemonConfig.apiPort ?? 9093;
|
|
5194
5676
|
// Store the bound port on the daemon instance so _writeHealthFile() can include it.
|
|
5195
5677
|
// This makes the port visible in daemon-health.json for operators and monitoring tools.
|
|
@@ -5208,5 +5690,5 @@ if (require.main === module) {
|
|
|
5208
5690
|
|
|
5209
5691
|
// Export classes for testing
|
|
5210
5692
|
if (typeof module !== 'undefined') {
|
|
5211
|
-
module.exports = { HeliosCompanyDaemon, RoutineEvaluator, LivenessWatchdog, BudgetEnforcer, AgentDispatcher, ActivityLogger, TaskCompletionWatchdog, CostEventSyncer, NodeCleaner, requireRunId, MageAnalytics, _allCompanyConfigs, buildForCompany };
|
|
5693
|
+
module.exports = { HeliosCompanyDaemon, RoutineEvaluator, LivenessWatchdog, BudgetEnforcer, AgentDispatcher, ApprovalWatcher, ActivityLogger, TaskCompletionWatchdog, CostEventSyncer, NodeCleaner, requireRunId, MageAnalytics, _allCompanyConfigs, buildForCompany };
|
|
5212
5694
|
}
|