@sireai/optimus 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +16 -0
- package/LICENSE +21 -0
- package/README.md +104 -0
- package/dist/cli/optimus.d.ts +2 -0
- package/dist/cli/optimus.js +2951 -0
- package/dist/cli/optimus.js.map +1 -0
- package/dist/cli/self-update.d.ts +49 -0
- package/dist/cli/self-update.js +264 -0
- package/dist/cli/self-update.js.map +1 -0
- package/dist/config/load-config.d.ts +3 -0
- package/dist/config/load-config.js +321 -0
- package/dist/config/load-config.js.map +1 -0
- package/dist/config/optimus-paths.d.ts +13 -0
- package/dist/config/optimus-paths.js +44 -0
- package/dist/config/optimus-paths.js.map +1 -0
- package/dist/index.d.ts +25 -0
- package/dist/index.js +27 -0
- package/dist/index.js.map +1 -0
- package/dist/integrations/jira/jira-cli.d.ts +1 -0
- package/dist/integrations/jira/jira-cli.js +278 -0
- package/dist/integrations/jira/jira-cli.js.map +1 -0
- package/dist/integrations/jira/jira-client.d.ts +99 -0
- package/dist/integrations/jira/jira-client.js +521 -0
- package/dist/integrations/jira/jira-client.js.map +1 -0
- package/dist/integrations/jira/jira-submit.d.ts +71 -0
- package/dist/integrations/jira/jira-submit.js +351 -0
- package/dist/integrations/jira/jira-submit.js.map +1 -0
- package/dist/problem-solving-core/codex/codex-auth-resolver.d.ts +23 -0
- package/dist/problem-solving-core/codex/codex-auth-resolver.js +136 -0
- package/dist/problem-solving-core/codex/codex-auth-resolver.js.map +1 -0
- package/dist/problem-solving-core/codex/codex-connectivity-checks.d.ts +6 -0
- package/dist/problem-solving-core/codex/codex-connectivity-checks.js +81 -0
- package/dist/problem-solving-core/codex/codex-connectivity-checks.js.map +1 -0
- package/dist/problem-solving-core/codex/codex-failure-classifier.d.ts +2 -0
- package/dist/problem-solving-core/codex/codex-failure-classifier.js +49 -0
- package/dist/problem-solving-core/codex/codex-failure-classifier.js.map +1 -0
- package/dist/problem-solving-core/codex/codex-global-config.d.ts +17 -0
- package/dist/problem-solving-core/codex/codex-global-config.js +100 -0
- package/dist/problem-solving-core/codex/codex-global-config.js.map +1 -0
- package/dist/problem-solving-core/codex/codex-preflight.d.ts +13 -0
- package/dist/problem-solving-core/codex/codex-preflight.js +142 -0
- package/dist/problem-solving-core/codex/codex-preflight.js.map +1 -0
- package/dist/problem-solving-core/codex/codex-provider-profile.d.ts +14 -0
- package/dist/problem-solving-core/codex/codex-provider-profile.js +68 -0
- package/dist/problem-solving-core/codex/codex-provider-profile.js.map +1 -0
- package/dist/problem-solving-core/codex/codex-required-env.d.ts +3 -0
- package/dist/problem-solving-core/codex/codex-required-env.js +21 -0
- package/dist/problem-solving-core/codex/codex-required-env.js.map +1 -0
- package/dist/problem-solving-core/codex/codex-runner.d.ts +37 -0
- package/dist/problem-solving-core/codex/codex-runner.js +926 -0
- package/dist/problem-solving-core/codex/codex-runner.js.map +1 -0
- package/dist/problem-solving-core/codex/evolution-skill-guard.d.ts +36 -0
- package/dist/problem-solving-core/codex/evolution-skill-guard.js +143 -0
- package/dist/problem-solving-core/codex/evolution-skill-guard.js.map +1 -0
- package/dist/problem-solving-core/codex/repo-memory-service.d.ts +24 -0
- package/dist/problem-solving-core/codex/repo-memory-service.js +114 -0
- package/dist/problem-solving-core/codex/repo-memory-service.js.map +1 -0
- package/dist/problem-solving-core/codex/skill-sync-service.d.ts +35 -0
- package/dist/problem-solving-core/codex/skill-sync-service.js +280 -0
- package/dist/problem-solving-core/codex/skill-sync-service.js.map +1 -0
- package/dist/task-environment/cancellation/task-abort-registry.d.ts +17 -0
- package/dist/task-environment/cancellation/task-abort-registry.js +51 -0
- package/dist/task-environment/cancellation/task-abort-registry.js.map +1 -0
- package/dist/task-environment/cancellation/task-cancellation-service.d.ts +25 -0
- package/dist/task-environment/cancellation/task-cancellation-service.js +54 -0
- package/dist/task-environment/cancellation/task-cancellation-service.js.map +1 -0
- package/dist/task-environment/cancellation/task-cleanup-service.d.ts +22 -0
- package/dist/task-environment/cancellation/task-cleanup-service.js +67 -0
- package/dist/task-environment/cancellation/task-cleanup-service.js.map +1 -0
- package/dist/task-environment/delivery/commit-message/bugfix-commit-message-template.d.ts +13 -0
- package/dist/task-environment/delivery/commit-message/bugfix-commit-message-template.js +83 -0
- package/dist/task-environment/delivery/commit-message/bugfix-commit-message-template.js.map +1 -0
- package/dist/task-environment/delivery/commit-message/commit-message-builder.d.ts +6 -0
- package/dist/task-environment/delivery/commit-message/commit-message-builder.js +15 -0
- package/dist/task-environment/delivery/commit-message/commit-message-builder.js.map +1 -0
- package/dist/task-environment/delivery/commit-message/commit-message-template-types.d.ts +16 -0
- package/dist/task-environment/delivery/commit-message/commit-message-template-types.js +2 -0
- package/dist/task-environment/delivery/commit-message/commit-message-template-types.js.map +1 -0
- package/dist/task-environment/delivery/feishu-analysis-doc-service.d.ts +50 -0
- package/dist/task-environment/delivery/feishu-analysis-doc-service.js +454 -0
- package/dist/task-environment/delivery/feishu-analysis-doc-service.js.map +1 -0
- package/dist/task-environment/delivery/feishu-card-renderer.d.ts +38 -0
- package/dist/task-environment/delivery/feishu-card-renderer.js +449 -0
- package/dist/task-environment/delivery/feishu-card-renderer.js.map +1 -0
- package/dist/task-environment/delivery/feishu-content/feishu-content-renderer.d.ts +34 -0
- package/dist/task-environment/delivery/feishu-content/feishu-content-renderer.js +201 -0
- package/dist/task-environment/delivery/feishu-content/feishu-content-renderer.js.map +1 -0
- package/dist/task-environment/delivery/feishu-content/feishu-copy-config.d.ts +27 -0
- package/dist/task-environment/delivery/feishu-content/feishu-copy-config.js +74 -0
- package/dist/task-environment/delivery/feishu-content/feishu-copy-config.js.map +1 -0
- package/dist/task-environment/delivery/feishu-notifier.d.ts +45 -0
- package/dist/task-environment/delivery/feishu-notifier.js +250 -0
- package/dist/task-environment/delivery/feishu-notifier.js.map +1 -0
- package/dist/task-environment/delivery/feishu-templates/analysis-message-template.d.ts +6 -0
- package/dist/task-environment/delivery/feishu-templates/analysis-message-template.js +39 -0
- package/dist/task-environment/delivery/feishu-templates/analysis-message-template.js.map +1 -0
- package/dist/task-environment/delivery/feishu-templates/bugfix-message-template.d.ts +6 -0
- package/dist/task-environment/delivery/feishu-templates/bugfix-message-template.js +40 -0
- package/dist/task-environment/delivery/feishu-templates/bugfix-message-template.js.map +1 -0
- package/dist/task-environment/delivery/feishu-templates/default-message-template.d.ts +6 -0
- package/dist/task-environment/delivery/feishu-templates/default-message-template.js +33 -0
- package/dist/task-environment/delivery/feishu-templates/default-message-template.js.map +1 -0
- package/dist/task-environment/delivery/feishu-templates/patch-message-template.d.ts +6 -0
- package/dist/task-environment/delivery/feishu-templates/patch-message-template.js +40 -0
- package/dist/task-environment/delivery/feishu-templates/patch-message-template.js.map +1 -0
- package/dist/task-environment/delivery/feishu-templates/template-registry.d.ts +2 -0
- package/dist/task-environment/delivery/feishu-templates/template-registry.js +11 -0
- package/dist/task-environment/delivery/feishu-templates/template-registry.js.map +1 -0
- package/dist/task-environment/delivery/feishu-templates/template-types.d.ts +20 -0
- package/dist/task-environment/delivery/feishu-templates/template-types.js +2 -0
- package/dist/task-environment/delivery/feishu-templates/template-types.js.map +1 -0
- package/dist/task-environment/delivery/task-delivery-dispatcher.d.ts +14 -0
- package/dist/task-environment/delivery/task-delivery-dispatcher.js +109 -0
- package/dist/task-environment/delivery/task-delivery-dispatcher.js.map +1 -0
- package/dist/task-environment/delivery/task-delivery-service.d.ts +33 -0
- package/dist/task-environment/delivery/task-delivery-service.js +432 -0
- package/dist/task-environment/delivery/task-delivery-service.js.map +1 -0
- package/dist/task-environment/delivery/task-publication-service.d.ts +97 -0
- package/dist/task-environment/delivery/task-publication-service.js +1369 -0
- package/dist/task-environment/delivery/task-publication-service.js.map +1 -0
- package/dist/task-environment/execution-addresses.d.ts +40 -0
- package/dist/task-environment/execution-addresses.js +63 -0
- package/dist/task-environment/execution-addresses.js.map +1 -0
- package/dist/task-environment/intake/cli-file-intake.d.ts +12 -0
- package/dist/task-environment/intake/cli-file-intake.js +56 -0
- package/dist/task-environment/intake/cli-file-intake.js.map +1 -0
- package/dist/task-environment/intake/manual-problem-intake.d.ts +3 -0
- package/dist/task-environment/intake/manual-problem-intake.js +57 -0
- package/dist/task-environment/intake/manual-problem-intake.js.map +1 -0
- package/dist/task-environment/intake/polling-problem-intake.d.ts +14 -0
- package/dist/task-environment/intake/polling-problem-intake.js +232 -0
- package/dist/task-environment/intake/polling-problem-intake.js.map +1 -0
- package/dist/task-environment/observability/logger.d.ts +76 -0
- package/dist/task-environment/observability/logger.js +604 -0
- package/dist/task-environment/observability/logger.js.map +1 -0
- package/dist/task-environment/observability/runtime-panel.d.ts +82 -0
- package/dist/task-environment/observability/runtime-panel.js +1008 -0
- package/dist/task-environment/observability/runtime-panel.js.map +1 -0
- package/dist/task-environment/observability/sound-notifier.d.ts +18 -0
- package/dist/task-environment/observability/sound-notifier.js +71 -0
- package/dist/task-environment/observability/sound-notifier.js.map +1 -0
- package/dist/task-environment/orchestration/execution-context-assembler.d.ts +41 -0
- package/dist/task-environment/orchestration/execution-context-assembler.js +464 -0
- package/dist/task-environment/orchestration/execution-context-assembler.js.map +1 -0
- package/dist/task-environment/orchestration/git-change-classifier.d.ts +19 -0
- package/dist/task-environment/orchestration/git-change-classifier.js +106 -0
- package/dist/task-environment/orchestration/git-change-classifier.js.map +1 -0
- package/dist/task-environment/orchestration/harness-registry.d.ts +27 -0
- package/dist/task-environment/orchestration/harness-registry.js +116 -0
- package/dist/task-environment/orchestration/harness-registry.js.map +1 -0
- package/dist/task-environment/orchestration/harness-resolver.d.ts +8 -0
- package/dist/task-environment/orchestration/harness-resolver.js +39 -0
- package/dist/task-environment/orchestration/harness-resolver.js.map +1 -0
- package/dist/task-environment/orchestration/task-orchestrator.d.ts +45 -0
- package/dist/task-environment/orchestration/task-orchestrator.js +1122 -0
- package/dist/task-environment/orchestration/task-orchestrator.js.map +1 -0
- package/dist/task-environment/orchestration/task-package-assembler.d.ts +4 -0
- package/dist/task-environment/orchestration/task-package-assembler.js +10 -0
- package/dist/task-environment/orchestration/task-package-assembler.js.map +1 -0
- package/dist/task-environment/orchestration/triage-agent.d.ts +54 -0
- package/dist/task-environment/orchestration/triage-agent.js +636 -0
- package/dist/task-environment/orchestration/triage-agent.js.map +1 -0
- package/dist/task-environment/orchestration/triage-runner.d.ts +65 -0
- package/dist/task-environment/orchestration/triage-runner.js +655 -0
- package/dist/task-environment/orchestration/triage-runner.js.map +1 -0
- package/dist/task-environment/publication-target.d.ts +12 -0
- package/dist/task-environment/publication-target.js +174 -0
- package/dist/task-environment/publication-target.js.map +1 -0
- package/dist/task-environment/runtime/blocking-event-queue.d.ts +7 -0
- package/dist/task-environment/runtime/blocking-event-queue.js +27 -0
- package/dist/task-environment/runtime/blocking-event-queue.js.map +1 -0
- package/dist/task-environment/runtime/optimus-runtime.d.ts +69 -0
- package/dist/task-environment/runtime/optimus-runtime.js +751 -0
- package/dist/task-environment/runtime/optimus-runtime.js.map +1 -0
- package/dist/task-environment/storage/sqlite-event-store.d.ts +52 -0
- package/dist/task-environment/storage/sqlite-event-store.js +288 -0
- package/dist/task-environment/storage/sqlite-event-store.js.map +1 -0
- package/dist/task-environment/storage/sqlite-task-store.d.ts +122 -0
- package/dist/task-environment/storage/sqlite-task-store.js +1182 -0
- package/dist/task-environment/storage/sqlite-task-store.js.map +1 -0
- package/dist/types.d.ts +629 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/embedded-skills/shared/repo-inspection/SKILL.md +9 -0
- package/embedded-skills/shared/repo-inspection/skill.json +5 -0
- package/embedded-skills/task/bugfix/android-debug-protocol/SKILL.md +10 -0
- package/embedded-skills/task/bugfix/android-debug-protocol/skill.json +6 -0
- package/harness/AGENTS.md +30 -0
- package/harness/CHECKLIST.md +44 -0
- package/harness/CONSTRAINTS.md +60 -0
- package/harness/FRAMEWORK.md +28 -0
- package/harness/GOAL.md +28 -0
- package/harness/HANDOFF.md +45 -0
- package/harness/TASK_PLAN.md +79 -0
- package/optimus.config.template.json +34 -0
- package/package.json +109 -0
- package/task-harnesses/bugfix/ACCEPT.md +47 -0
- package/task-harnesses/bugfix/CONSTRAINTS.md +46 -0
- package/task-harnesses/bugfix/CONTEXT.md +29 -0
- package/task-harnesses/bugfix/EVOLUTION.md +82 -0
- package/task-harnesses/bugfix/ROLE.md +29 -0
- package/task-harnesses/bugfix/STANDARD.md +250 -0
- package/task-harnesses/bugfix/manifest.json +13 -0
- package/task-harnesses/registry.json +8 -0
|
@@ -0,0 +1,751 @@
|
|
|
1
|
+
import { watch } from "node:fs";
|
|
2
|
+
import { mkdir, rm, stat, writeFile } from "node:fs/promises";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { OptimusLogger } from "../observability/logger.js";
|
|
5
|
+
import { RuntimePanel } from "../observability/runtime-panel.js";
|
|
6
|
+
import { SoundNotifier } from "../observability/sound-notifier.js";
|
|
7
|
+
import { CliFileIntake } from "../intake/cli-file-intake.js";
|
|
8
|
+
import { PollingProblemIntake } from "../intake/polling-problem-intake.js";
|
|
9
|
+
import { BlockingEventQueue } from "./blocking-event-queue.js";
|
|
10
|
+
import { SQLiteEventStore } from "../storage/sqlite-event-store.js";
|
|
11
|
+
import { TriageRunner } from "../orchestration/triage-runner.js";
|
|
12
|
+
import { SkillSyncService } from "../../problem-solving-core/codex/skill-sync-service.js";
|
|
13
|
+
import { TaskAbortRegistry } from "../cancellation/task-abort-registry.js";
|
|
14
|
+
import { TaskCancellationService } from "../cancellation/task-cancellation-service.js";
|
|
15
|
+
import { TaskCleanupService } from "../cancellation/task-cleanup-service.js";
|
|
16
|
+
import { FeishuNotifier } from "../delivery/feishu-notifier.js";
|
|
17
|
+
export class OptimusRuntime {
|
|
18
|
+
store;
|
|
19
|
+
taskOrchestrator;
|
|
20
|
+
config;
|
|
21
|
+
logger;
|
|
22
|
+
get intakeStateStore() {
|
|
23
|
+
return this.eventStore;
|
|
24
|
+
}
|
|
25
|
+
// Resident runtime state.
|
|
26
|
+
eventStore;
|
|
27
|
+
eventQueue = new BlockingEventQueue();
|
|
28
|
+
cliFileIntake;
|
|
29
|
+
pollingProblemIntake;
|
|
30
|
+
triageRunner;
|
|
31
|
+
runtimePanel;
|
|
32
|
+
soundNotifier;
|
|
33
|
+
skillSyncService;
|
|
34
|
+
cleanupService;
|
|
35
|
+
cancellationService;
|
|
36
|
+
abortRegistry = new TaskAbortRegistry();
|
|
37
|
+
runningTaskIds = new Set();
|
|
38
|
+
running = false;
|
|
39
|
+
cliWatcher;
|
|
40
|
+
pollTimers = new Map();
|
|
41
|
+
maintenanceTimer;
|
|
42
|
+
cliSignalTimer;
|
|
43
|
+
lastCliSignalMtimeMs = 0;
|
|
44
|
+
eventLoopPromise;
|
|
45
|
+
backgroundDeliveryRetryLastAttemptAt = new Map();
|
|
46
|
+
// Construction wires runtime collaborators once; hot-path state lives on queue and task sets.
|
|
47
|
+
constructor(store, taskOrchestrator, config, dependencies = {}) {
|
|
48
|
+
this.store = store;
|
|
49
|
+
this.taskOrchestrator = taskOrchestrator;
|
|
50
|
+
this.config = config;
|
|
51
|
+
this.logger = new OptimusLogger(config);
|
|
52
|
+
this.runtimePanel = new RuntimePanel(config.storage.rootDir);
|
|
53
|
+
this.soundNotifier = new SoundNotifier(config);
|
|
54
|
+
this.eventStore = new SQLiteEventStore(config.storage.rootDir);
|
|
55
|
+
this.cliFileIntake = new CliFileIntake(config.intake.cliInboxDir);
|
|
56
|
+
this.pollingProblemIntake = new PollingProblemIntake(this.eventQueue, this.eventStore, config);
|
|
57
|
+
this.skillSyncService = new SkillSyncService(config);
|
|
58
|
+
this.cleanupService = new TaskCleanupService(store, config);
|
|
59
|
+
this.cancellationService = new TaskCancellationService(store, this.abortRegistry, this.cleanupService, config);
|
|
60
|
+
// Tests can inject a stubbed resident triage runner so runtime coverage stays deterministic and offline.
|
|
61
|
+
// Production still uses the real resident Codex triage runner by default.
|
|
62
|
+
this.triageRunner = dependencies.triageRunner ?? new TriageRunner(config);
|
|
63
|
+
}
|
|
64
|
+
// Runtime lifecycle: startup, steady-state processing, and shutdown signaling.
|
|
65
|
+
async start() {
|
|
66
|
+
this.runtimePanel.start();
|
|
67
|
+
await this.store.init();
|
|
68
|
+
await this.eventStore.init();
|
|
69
|
+
await this.cliFileIntake.init();
|
|
70
|
+
this.lastCliSignalMtimeMs = await this.readCliSignalMtimeMs();
|
|
71
|
+
await this.ensureRuntimeDirs();
|
|
72
|
+
await this.logger.writeEvolutionSnapshot({
|
|
73
|
+
ok: true,
|
|
74
|
+
taskType: null,
|
|
75
|
+
createdSkillIds: [],
|
|
76
|
+
updatedSkillIds: [],
|
|
77
|
+
violations: [],
|
|
78
|
+
allowedEvolutionSkillDir: null,
|
|
79
|
+
updatedAt: new Date().toISOString()
|
|
80
|
+
});
|
|
81
|
+
const skillSyncStartedAt = new Date().toISOString();
|
|
82
|
+
const installedSkills = await this.skillSyncService.syncBuiltinSkills();
|
|
83
|
+
await this.logger.writeSkillSyncSnapshot({
|
|
84
|
+
ok: true,
|
|
85
|
+
startedAt: skillSyncStartedAt,
|
|
86
|
+
finishedAt: new Date().toISOString(),
|
|
87
|
+
installedCount: installedSkills.length,
|
|
88
|
+
sharedCount: installedSkills.filter((skill) => skill.level === "shared").length,
|
|
89
|
+
taskLevelCount: installedSkills.filter((skill) => skill.level === "task").length,
|
|
90
|
+
embeddedRootDir: this.config.skills.embeddedRootDir,
|
|
91
|
+
evolutionRootDir: this.config.skills.evolutionRootDir,
|
|
92
|
+
storeDir: this.config.skills.storeDir,
|
|
93
|
+
disabledSkills: this.config.skills.disabledSkills,
|
|
94
|
+
skills: installedSkills.map((skill) => ({
|
|
95
|
+
id: skill.id,
|
|
96
|
+
level: skill.level,
|
|
97
|
+
sourceKind: skill.sourceKind,
|
|
98
|
+
version: skill.version,
|
|
99
|
+
taskTypes: skill.taskTypes,
|
|
100
|
+
storeDir: skill.storeDir
|
|
101
|
+
}))
|
|
102
|
+
});
|
|
103
|
+
await this.recoverStaleActiveTasks();
|
|
104
|
+
try {
|
|
105
|
+
await this.triageRunner.initialize();
|
|
106
|
+
await this.logger.writeTriageStatusSnapshot(this.triageRunner.getStatusSnapshot());
|
|
107
|
+
}
|
|
108
|
+
catch (error) {
|
|
109
|
+
await this.logger.writeTriageStatusSnapshot({
|
|
110
|
+
ok: false,
|
|
111
|
+
startedAt: new Date().toISOString(),
|
|
112
|
+
failureCategory: this.triageRunner.classifyFailure(error),
|
|
113
|
+
reason: error instanceof Error ? error.message : "Unknown triage startup failure"
|
|
114
|
+
});
|
|
115
|
+
throw error;
|
|
116
|
+
}
|
|
117
|
+
await this.logger.info("runtime.started", {
|
|
118
|
+
workerConcurrency: this.config.runtime.workerConcurrency,
|
|
119
|
+
startupTimeoutMs: this.config.runtime.startupTimeoutMs,
|
|
120
|
+
idleTimeoutMs: this.config.runtime.idleTimeoutMs,
|
|
121
|
+
stallWarningMs: this.config.runtime.stallWarningMs,
|
|
122
|
+
runTimeoutMs: this.config.runtime.runTimeoutMs,
|
|
123
|
+
pollingEnabled: this.config.intake.pollingEnabled,
|
|
124
|
+
pollingIntervalMs: this.config.intake.pollingIntervalMs,
|
|
125
|
+
cliWatchEnabled: this.config.intake.cliWatchEnabled,
|
|
126
|
+
cliInboxDir: this.config.intake.cliInboxDir,
|
|
127
|
+
cliSignalWatchIntervalMs: this.config.intake.cliSignalWatchIntervalMs,
|
|
128
|
+
healthLogIntervalMs: this.config.observability.healthLogIntervalMs,
|
|
129
|
+
consoleLevel: this.config.observability.consoleLevel,
|
|
130
|
+
fileLevel: this.config.observability.fileLevel
|
|
131
|
+
});
|
|
132
|
+
await this.logger.info("runtime.ready", {
|
|
133
|
+
installedSkillCount: installedSkills.length,
|
|
134
|
+
workerConcurrency: this.config.runtime.workerConcurrency,
|
|
135
|
+
cliInboxDir: this.config.intake.cliInboxDir,
|
|
136
|
+
taskHarnessRootDir: this.config.runtime.taskHarnessRootDir,
|
|
137
|
+
codexHomeDir: this.config.codex.homeDir
|
|
138
|
+
});
|
|
139
|
+
this.running = true;
|
|
140
|
+
await this.bootstrapCliInbox();
|
|
141
|
+
this.startIntakeAdapters();
|
|
142
|
+
this.startMaintenanceTimer();
|
|
143
|
+
this.eventLoopPromise = this.runEventLoop();
|
|
144
|
+
await this.eventLoopPromise;
|
|
145
|
+
}
|
|
146
|
+
// Stop pushes a sentinel event so a blocking pop can exit without busy waiting.
|
|
147
|
+
stop() {
|
|
148
|
+
this.running = false;
|
|
149
|
+
this.runtimePanel.stop();
|
|
150
|
+
this.soundNotifier.stop();
|
|
151
|
+
this.cliWatcher?.close();
|
|
152
|
+
for (const timer of this.pollTimers.values()) {
|
|
153
|
+
clearInterval(timer);
|
|
154
|
+
}
|
|
155
|
+
this.pollTimers.clear();
|
|
156
|
+
if (this.maintenanceTimer) {
|
|
157
|
+
clearInterval(this.maintenanceTimer);
|
|
158
|
+
}
|
|
159
|
+
if (this.cliSignalTimer) {
|
|
160
|
+
clearInterval(this.cliSignalTimer);
|
|
161
|
+
}
|
|
162
|
+
this.eventQueue.push(this.createRuntimeStopEvent());
|
|
163
|
+
}
|
|
164
|
+
// The event loop remains the single admission point before anything enters the scheduler queue.
|
|
165
|
+
async runEventLoop() {
|
|
166
|
+
while (this.running) {
|
|
167
|
+
const event = await this.eventQueue.pop();
|
|
168
|
+
if (event.type === "task.cancel_requested" && event.content.title === "__runtime_stop__") {
|
|
169
|
+
continue;
|
|
170
|
+
}
|
|
171
|
+
await this.handleEvent(event);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
// Runtime first persists the raw event, then routes control events or triages business events into queued tasks.
|
|
175
|
+
async handleEvent(event) {
|
|
176
|
+
const existingTask = await this.store.findTaskBySourceEventId(event.eventId);
|
|
177
|
+
if (existingTask) {
|
|
178
|
+
await this.logger.warn("event.duplicate_ignored", {
|
|
179
|
+
eventId: event.eventId,
|
|
180
|
+
taskId: existingTask.taskId,
|
|
181
|
+
eventType: event.type,
|
|
182
|
+
source: event.content.source,
|
|
183
|
+
status: existingTask.status
|
|
184
|
+
});
|
|
185
|
+
return;
|
|
186
|
+
}
|
|
187
|
+
await this.eventStore.append(event);
|
|
188
|
+
await this.logger.info("event.received", {
|
|
189
|
+
eventId: event.eventId,
|
|
190
|
+
eventType: event.type,
|
|
191
|
+
source: event.content.source,
|
|
192
|
+
queueDepth: this.eventQueue.size()
|
|
193
|
+
});
|
|
194
|
+
if (event.type === "task.retry_requested") {
|
|
195
|
+
await this.handleRetryEvent(event);
|
|
196
|
+
return;
|
|
197
|
+
}
|
|
198
|
+
if (event.type === "task.cancel_requested") {
|
|
199
|
+
await this.handleCancelEvent(event);
|
|
200
|
+
return;
|
|
201
|
+
}
|
|
202
|
+
await this.eventStore.markTriaging(event.eventId);
|
|
203
|
+
await this.logger.info("event.triage.started", {
|
|
204
|
+
eventId: event.eventId,
|
|
205
|
+
eventType: event.type,
|
|
206
|
+
source: event.content.source,
|
|
207
|
+
stage: "triage_start"
|
|
208
|
+
});
|
|
209
|
+
let triageDecision;
|
|
210
|
+
try {
|
|
211
|
+
triageDecision = await this.triageRunner.triage(event);
|
|
212
|
+
}
|
|
213
|
+
catch (error) {
|
|
214
|
+
const failureCategory = this.triageRunner.classifyFailure(error);
|
|
215
|
+
await this.eventStore.completeTriage(event.eventId, "failed");
|
|
216
|
+
await this.logger.error("triage.failed", {
|
|
217
|
+
eventId: event.eventId,
|
|
218
|
+
eventType: event.type,
|
|
219
|
+
source: event.content.source,
|
|
220
|
+
failureCategory,
|
|
221
|
+
reason: error instanceof Error ? error.message : "Unknown triage failure"
|
|
222
|
+
});
|
|
223
|
+
return;
|
|
224
|
+
}
|
|
225
|
+
if (triageDecision.decision === "rejected") {
|
|
226
|
+
await this.eventStore.completeTriage(event.eventId, "rejected", triageDecision);
|
|
227
|
+
await this.logger.warn("triage.rejected", {
|
|
228
|
+
eventId: event.eventId,
|
|
229
|
+
reason: triageDecision.reason,
|
|
230
|
+
summary: triageDecision.summary
|
|
231
|
+
});
|
|
232
|
+
return;
|
|
233
|
+
}
|
|
234
|
+
if (triageDecision.taskPackage.idempotencyKey) {
|
|
235
|
+
const duplicatedTask = await this.store.findTaskByIdempotencyKey(triageDecision.taskPackage.idempotencyKey);
|
|
236
|
+
if (duplicatedTask) {
|
|
237
|
+
await this.eventStore.completeTriage(event.eventId, "accepted", triageDecision);
|
|
238
|
+
await this.logger.warn("task.idempotent_duplicate_ignored", {
|
|
239
|
+
eventId: event.eventId,
|
|
240
|
+
taskId: duplicatedTask.taskId,
|
|
241
|
+
taskType: duplicatedTask.taskType,
|
|
242
|
+
status: duplicatedTask.status,
|
|
243
|
+
idempotencyKey: triageDecision.taskPackage.idempotencyKey
|
|
244
|
+
});
|
|
245
|
+
return;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
const task = await this.store.createTaskFromPackage(triageDecision.taskPackage, { sourceEventId: event.eventId });
|
|
249
|
+
await this.eventStore.completeTriage(event.eventId, "accepted", triageDecision);
|
|
250
|
+
await this.logger.info("event.triage.finished", {
|
|
251
|
+
eventId: event.eventId,
|
|
252
|
+
eventType: event.type,
|
|
253
|
+
source: event.content.source,
|
|
254
|
+
stage: "triage_finish",
|
|
255
|
+
status: "accepted",
|
|
256
|
+
taskId: task.taskId,
|
|
257
|
+
taskType: task.taskType
|
|
258
|
+
});
|
|
259
|
+
await this.logger.info("task.received", {
|
|
260
|
+
eventId: event.eventId,
|
|
261
|
+
taskId: task.taskId,
|
|
262
|
+
taskPackageId: task.taskPackageId,
|
|
263
|
+
taskType: task.taskType,
|
|
264
|
+
status: task.status
|
|
265
|
+
});
|
|
266
|
+
await this.dispatchAvailableTasks();
|
|
267
|
+
}
|
|
268
|
+
async handleRetryEvent(event) {
|
|
269
|
+
const retryTargetId = event.content.sourceRef?.trim();
|
|
270
|
+
if (!retryTargetId) {
|
|
271
|
+
await this.logger.warn("task.retry_ignored", {
|
|
272
|
+
eventId: event.eventId,
|
|
273
|
+
reason: "missing_source_ref"
|
|
274
|
+
});
|
|
275
|
+
return;
|
|
276
|
+
}
|
|
277
|
+
const retriedTask = await this.store.retryTask(retryTargetId, event.eventId);
|
|
278
|
+
if (!retriedTask) {
|
|
279
|
+
await this.logger.warn("task.retry_ignored", {
|
|
280
|
+
eventId: event.eventId,
|
|
281
|
+
taskId: retryTargetId,
|
|
282
|
+
reason: "task_not_retryable"
|
|
283
|
+
});
|
|
284
|
+
return;
|
|
285
|
+
}
|
|
286
|
+
await this.logger.info("task.retry_enqueued", {
|
|
287
|
+
eventId: event.eventId,
|
|
288
|
+
taskId: retriedTask.taskId,
|
|
289
|
+
status: retriedTask.status
|
|
290
|
+
});
|
|
291
|
+
await this.dispatchAvailableTasks();
|
|
292
|
+
}
|
|
293
|
+
async handleCancelEvent(event) {
|
|
294
|
+
const cancelTargetId = event.content.sourceRef?.trim();
|
|
295
|
+
if (!cancelTargetId) {
|
|
296
|
+
await this.logger.warn("task.cancel_ignored", {
|
|
297
|
+
eventId: event.eventId,
|
|
298
|
+
reason: "missing_source_ref"
|
|
299
|
+
});
|
|
300
|
+
return;
|
|
301
|
+
}
|
|
302
|
+
const cancelResult = await this.cancellationService.requestCancel({
|
|
303
|
+
eventId: event.eventId,
|
|
304
|
+
taskId: cancelTargetId,
|
|
305
|
+
reason: event.content.description?.trim() || `Manual cancellation requested for ${cancelTargetId}.`,
|
|
306
|
+
cancelSource: this.resolveCancelSource(event),
|
|
307
|
+
force: event.content.metadata?.forceCancel === true,
|
|
308
|
+
keepArtifacts: event.content.metadata?.keepArtifacts === true
|
|
309
|
+
});
|
|
310
|
+
if (!cancelResult) {
|
|
311
|
+
await this.logger.warn("task.cancel_ignored", {
|
|
312
|
+
eventId: event.eventId,
|
|
313
|
+
taskId: cancelTargetId,
|
|
314
|
+
reason: "task_not_cancelable"
|
|
315
|
+
});
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
// Dispatch respects global concurrency and lets the store arbitrate task claiming.
|
|
319
|
+
async dispatchAvailableTasks() {
|
|
320
|
+
const availableSlots = this.config.runtime.workerConcurrency - this.runningTaskIds.size;
|
|
321
|
+
if (availableSlots <= 0) {
|
|
322
|
+
return;
|
|
323
|
+
}
|
|
324
|
+
const tasks = await this.store.listRunnableTasks({
|
|
325
|
+
limit: availableSlots,
|
|
326
|
+
excludeTaskIds: Array.from(this.runningTaskIds)
|
|
327
|
+
});
|
|
328
|
+
for (const task of tasks) {
|
|
329
|
+
const claimedTask = await this.store.claimTask(task.taskId);
|
|
330
|
+
if (!claimedTask) {
|
|
331
|
+
continue;
|
|
332
|
+
}
|
|
333
|
+
this.runningTaskIds.add(claimedTask.taskId);
|
|
334
|
+
void this.runClaimedTask(claimedTask);
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
// Claimed tasks are still run under the orchestrator; runtime only manages queueing and concurrency.
|
|
338
|
+
async runClaimedTask(task) {
|
|
339
|
+
await this.store.appendEvent(task.taskId, "runtime.dispatch", "Task picked by background runtime worker.", this.buildTaskEventOptions(task.activeRunId, task.sourceEventId));
|
|
340
|
+
await this.logger.info("task.dispatched", {
|
|
341
|
+
taskId: task.taskId,
|
|
342
|
+
stage: "dispatch",
|
|
343
|
+
taskType: task.taskType,
|
|
344
|
+
sourceEventId: task.sourceEventId,
|
|
345
|
+
runId: task.activeRunId,
|
|
346
|
+
status: task.status,
|
|
347
|
+
activeRunId: task.activeRunId,
|
|
348
|
+
runCount: task.runCount,
|
|
349
|
+
activeWorkers: this.runningTaskIds.size
|
|
350
|
+
});
|
|
351
|
+
const abortController = new AbortController();
|
|
352
|
+
const unregisterAbortHandle = task.activeRunId
|
|
353
|
+
? this.abortRegistry.register({
|
|
354
|
+
taskId: task.taskId,
|
|
355
|
+
runId: task.activeRunId,
|
|
356
|
+
controller: abortController
|
|
357
|
+
})
|
|
358
|
+
: undefined;
|
|
359
|
+
try {
|
|
360
|
+
await this.taskOrchestrator.run({
|
|
361
|
+
...task,
|
|
362
|
+
...(task.activeRunId ? { abortSignal: abortController.signal } : {})
|
|
363
|
+
});
|
|
364
|
+
}
|
|
365
|
+
finally {
|
|
366
|
+
unregisterAbortHandle?.();
|
|
367
|
+
const latestTask = await this.store.getTask(task.taskId);
|
|
368
|
+
if (latestTask?.status === "canceled") {
|
|
369
|
+
await this.cleanupService.cleanupCanceledTask({
|
|
370
|
+
taskId: task.taskId,
|
|
371
|
+
cancelSource: "runtime"
|
|
372
|
+
});
|
|
373
|
+
}
|
|
374
|
+
this.runningTaskIds.delete(task.taskId);
|
|
375
|
+
await this.dispatchAvailableTasks();
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
// Manual and automated intake adapters feed one shared runtime queue.
|
|
379
|
+
startIntakeAdapters() {
|
|
380
|
+
if (this.config.intake.cliWatchEnabled) {
|
|
381
|
+
try {
|
|
382
|
+
this.cliWatcher = watch(this.cliFileIntake.getSignalPath(), () => {
|
|
383
|
+
void this.drainCliInbox();
|
|
384
|
+
});
|
|
385
|
+
this.cliWatcher.on("error", (error) => {
|
|
386
|
+
void this.handleCliWatcherFailure(error);
|
|
387
|
+
});
|
|
388
|
+
}
|
|
389
|
+
catch (error) {
|
|
390
|
+
void this.handleCliWatcherFailure(error);
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
const pollingSources = this.pollingProblemIntake.listConfiguredSources();
|
|
394
|
+
for (const source of pollingSources) {
|
|
395
|
+
const intervalMs = source.intervalMs ?? this.config.intake.pollingIntervalMs;
|
|
396
|
+
const timer = setInterval(() => {
|
|
397
|
+
void this.pollQualityProblems(source.id);
|
|
398
|
+
}, intervalMs);
|
|
399
|
+
this.pollTimers.set(source.id, timer);
|
|
400
|
+
if (source.enabled) {
|
|
401
|
+
void this.pollQualityProblems(source.id);
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
// Maintenance stays off the hot path so health work never blocks dispatch.
|
|
406
|
+
startMaintenanceTimer() {
|
|
407
|
+
this.maintenanceTimer = setInterval(() => {
|
|
408
|
+
void this.runMaintenanceCycle();
|
|
409
|
+
}, this.config.observability.healthLogIntervalMs);
|
|
410
|
+
}
|
|
411
|
+
async bootstrapCliInbox() {
|
|
412
|
+
await this.drainCliInbox();
|
|
413
|
+
}
|
|
414
|
+
// CLI inbox files are normalized into runtime events and removed after successful enqueue.
|
|
415
|
+
async drainCliInbox() {
|
|
416
|
+
const files = await this.cliFileIntake.listInboxFiles();
|
|
417
|
+
for (const file of files) {
|
|
418
|
+
try {
|
|
419
|
+
const event = await this.cliFileIntake.readEventFromFile(file);
|
|
420
|
+
this.eventQueue.push(event);
|
|
421
|
+
await this.cliFileIntake.deleteInboxFile(file);
|
|
422
|
+
await this.logger.info("intake.cli.accepted", {
|
|
423
|
+
eventId: event.eventId,
|
|
424
|
+
eventType: event.type,
|
|
425
|
+
source: event.content.source,
|
|
426
|
+
queueDepth: this.eventQueue.size()
|
|
427
|
+
});
|
|
428
|
+
}
|
|
429
|
+
catch (error) {
|
|
430
|
+
await this.logger.error("intake.cli.failed", {
|
|
431
|
+
file,
|
|
432
|
+
reason: error instanceof Error ? error.message : "Unknown inbox parse error"
|
|
433
|
+
});
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
// When fs watching fails, runtime degrades to signal polling instead of dropping intake.
|
|
438
|
+
async handleCliWatcherFailure(error) {
|
|
439
|
+
this.cliWatcher?.close();
|
|
440
|
+
this.cliWatcher = undefined;
|
|
441
|
+
if (!this.cliSignalTimer) {
|
|
442
|
+
this.cliSignalTimer = setInterval(() => {
|
|
443
|
+
void this.pollCliSignal();
|
|
444
|
+
}, this.config.intake.cliSignalWatchIntervalMs);
|
|
445
|
+
}
|
|
446
|
+
await this.logger.warn("intake.cli.watch_fallback", {
|
|
447
|
+
reason: error instanceof Error ? error.message : "Unknown cli watcher error",
|
|
448
|
+
fallbackIntervalMs: this.config.intake.cliSignalWatchIntervalMs
|
|
449
|
+
});
|
|
450
|
+
}
|
|
451
|
+
async pollCliSignal() {
|
|
452
|
+
const currentMtimeMs = await this.readCliSignalMtimeMs();
|
|
453
|
+
if (currentMtimeMs <= this.lastCliSignalMtimeMs) {
|
|
454
|
+
return;
|
|
455
|
+
}
|
|
456
|
+
this.lastCliSignalMtimeMs = currentMtimeMs;
|
|
457
|
+
await this.drainCliInbox();
|
|
458
|
+
}
|
|
459
|
+
async readCliSignalMtimeMs() {
|
|
460
|
+
const metadata = await stat(this.cliFileIntake.getSignalPath());
|
|
461
|
+
return metadata.mtimeMs;
|
|
462
|
+
}
|
|
463
|
+
resolveCancelSource(event) {
|
|
464
|
+
if (event.content.source === "system") {
|
|
465
|
+
return "runtime";
|
|
466
|
+
}
|
|
467
|
+
if (event.content.source === "api") {
|
|
468
|
+
return "api";
|
|
469
|
+
}
|
|
470
|
+
return "cli";
|
|
471
|
+
}
|
|
472
|
+
async resolveRunnablePoller(pollerId) {
|
|
473
|
+
const source = this.pollingProblemIntake.listConfiguredSources().find((candidate) => candidate.id === pollerId);
|
|
474
|
+
if (!source || !source.enabled) {
|
|
475
|
+
return undefined;
|
|
476
|
+
}
|
|
477
|
+
const state = (await this.eventStore.listPollerStates()).find((candidate) => candidate.pollerId === pollerId);
|
|
478
|
+
if (state && !state.enabled) {
|
|
479
|
+
return undefined;
|
|
480
|
+
}
|
|
481
|
+
return source;
|
|
482
|
+
}
|
|
483
|
+
async runPollerOnce(pollerId) {
|
|
484
|
+
await this.pollQualityProblems(pollerId);
|
|
485
|
+
}
|
|
486
|
+
async pollQualityProblems(pollerId) {
|
|
487
|
+
const source = await this.resolveRunnablePoller(pollerId);
|
|
488
|
+
if (!source) {
|
|
489
|
+
return;
|
|
490
|
+
}
|
|
491
|
+
const intervalMs = source.intervalMs ?? this.config.intake.pollingIntervalMs;
|
|
492
|
+
await this.eventStore.recordPollerRunStarted(source, intervalMs);
|
|
493
|
+
try {
|
|
494
|
+
const result = await this.pollingProblemIntake.pollAndEnqueue(source);
|
|
495
|
+
await this.eventStore.recordPollerRunSucceeded(source, intervalMs, result);
|
|
496
|
+
await this.logger.info("intake.poll.tick", {
|
|
497
|
+
pollerId: source.id,
|
|
498
|
+
pollingIntervalMs: intervalMs,
|
|
499
|
+
pollingProvider: source.type,
|
|
500
|
+
acceptedCount: result.acceptedCount,
|
|
501
|
+
duplicateCount: result.duplicateCount,
|
|
502
|
+
updatedCount: result.updatedCount,
|
|
503
|
+
skippedCount: result.skippedCount,
|
|
504
|
+
checkpoint: result.checkpoint ? JSON.stringify(result.checkpoint) : undefined,
|
|
505
|
+
eventType: source.eventType ?? "problem.discovered",
|
|
506
|
+
queueDepth: this.eventQueue.size()
|
|
507
|
+
});
|
|
508
|
+
}
|
|
509
|
+
catch (error) {
|
|
510
|
+
const reason = error instanceof Error ? error.message : "Unknown polling failure";
|
|
511
|
+
await this.eventStore.recordPollerRunFailed(source, intervalMs, reason);
|
|
512
|
+
await this.logger.error("intake.poll.failed", {
|
|
513
|
+
pollerId: source.id,
|
|
514
|
+
pollingIntervalMs: intervalMs,
|
|
515
|
+
pollingProvider: source.type,
|
|
516
|
+
eventType: source.eventType ?? "problem.discovered",
|
|
517
|
+
reason
|
|
518
|
+
});
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
async recoverStaleActiveTasks() {
|
|
522
|
+
const recovered = await this.store.recoverActiveTasks();
|
|
523
|
+
if (recovered.length === 0) {
|
|
524
|
+
return;
|
|
525
|
+
}
|
|
526
|
+
await this.logger.warn("runtime.recovery.requeued", {
|
|
527
|
+
recoveredCount: recovered.length,
|
|
528
|
+
recoveredTaskIds: recovered.map((item) => item.taskId),
|
|
529
|
+
recoveredRunIds: recovered.map((item) => item.runId)
|
|
530
|
+
});
|
|
531
|
+
}
|
|
532
|
+
// Active run monitoring upgrades raw progress fields into health judgments for the scheduler.
|
|
533
|
+
async monitorActiveRuns() {
|
|
534
|
+
const runs = await this.store.listTaskRuns();
|
|
535
|
+
const activeRuns = runs.filter((run) => !run.endedAt);
|
|
536
|
+
const now = Date.now();
|
|
537
|
+
for (const run of activeRuns) {
|
|
538
|
+
const lastSignalAt = Date.parse(run.lastEventAt ?? run.startedAt);
|
|
539
|
+
const elapsedMs = now - Date.parse(run.startedAt);
|
|
540
|
+
const idleMs = Number.isNaN(lastSignalAt) ? 0 : now - lastSignalAt;
|
|
541
|
+
if (run.status === "cancel_requested") {
|
|
542
|
+
this.abortRegistry.cancelRun(run.runId, `Cancellation requested for ${run.taskId}.`);
|
|
543
|
+
continue;
|
|
544
|
+
}
|
|
545
|
+
if (run.status === "timed_out") {
|
|
546
|
+
this.abortRegistry.cancelRun(run.runId, `Run previously marked timed_out for ${run.taskId}.`);
|
|
547
|
+
continue;
|
|
548
|
+
}
|
|
549
|
+
if (elapsedMs >= this.config.runtime.runTimeoutMs) {
|
|
550
|
+
this.abortRegistry.cancelRun(run.runId, `Run exceeded hard timeout after ${elapsedMs}ms.`);
|
|
551
|
+
await this.store.updateTaskRun(run.taskId, run.runId, {
|
|
552
|
+
runStatus: "timed_out",
|
|
553
|
+
health: "timed_out",
|
|
554
|
+
failureCategory: "timeout",
|
|
555
|
+
summary: "Run exceeded the configured hard runtime timeout.",
|
|
556
|
+
eventType: "runtime.timeout.hard"
|
|
557
|
+
});
|
|
558
|
+
const task = await this.store.getTask(run.taskId);
|
|
559
|
+
if (task?.activeRunId === run.runId) {
|
|
560
|
+
await this.logger.error("task.timed_out", {
|
|
561
|
+
taskId: run.taskId,
|
|
562
|
+
runId: run.runId,
|
|
563
|
+
durationMs: elapsedMs,
|
|
564
|
+
timeoutKind: "hard_timeout",
|
|
565
|
+
failureCategory: "timeout"
|
|
566
|
+
});
|
|
567
|
+
}
|
|
568
|
+
continue;
|
|
569
|
+
}
|
|
570
|
+
const waitingForFirstSignal = run.status === "leased" || run.status === "bootstrapping" || run.status === "sdk_starting";
|
|
571
|
+
if (waitingForFirstSignal && elapsedMs >= this.config.runtime.startupTimeoutMs) {
|
|
572
|
+
this.abortRegistry.cancelRun(run.runId, `Run exceeded startup timeout after ${elapsedMs}ms without first progress signal.`);
|
|
573
|
+
await this.store.updateTaskRun(run.taskId, run.runId, {
|
|
574
|
+
runStatus: "timed_out",
|
|
575
|
+
health: "timed_out",
|
|
576
|
+
failureCategory: "timeout",
|
|
577
|
+
summary: "Run exceeded the configured startup timeout before the first progress signal.",
|
|
578
|
+
eventType: "runtime.timeout.startup"
|
|
579
|
+
});
|
|
580
|
+
const task = await this.store.getTask(run.taskId);
|
|
581
|
+
if (task?.activeRunId === run.runId) {
|
|
582
|
+
await this.logger.error("task.timed_out", {
|
|
583
|
+
taskId: run.taskId,
|
|
584
|
+
runId: run.runId,
|
|
585
|
+
durationMs: elapsedMs,
|
|
586
|
+
timeoutKind: "startup_timeout",
|
|
587
|
+
failureCategory: "timeout"
|
|
588
|
+
});
|
|
589
|
+
}
|
|
590
|
+
continue;
|
|
591
|
+
}
|
|
592
|
+
if (!waitingForFirstSignal && idleMs >= this.config.runtime.idleTimeoutMs) {
|
|
593
|
+
this.abortRegistry.cancelRun(run.runId, `Run exceeded idle timeout after ${idleMs}ms without new progress events.`);
|
|
594
|
+
await this.store.updateTaskRun(run.taskId, run.runId, {
|
|
595
|
+
runStatus: "timed_out",
|
|
596
|
+
health: "timed_out",
|
|
597
|
+
failureCategory: "timeout",
|
|
598
|
+
summary: "Run exceeded the configured idle timeout without new progress events.",
|
|
599
|
+
eventType: "runtime.timeout.idle"
|
|
600
|
+
});
|
|
601
|
+
const task = await this.store.getTask(run.taskId);
|
|
602
|
+
if (task?.activeRunId === run.runId) {
|
|
603
|
+
await this.logger.error("task.timed_out", {
|
|
604
|
+
taskId: run.taskId,
|
|
605
|
+
runId: run.runId,
|
|
606
|
+
durationMs: elapsedMs,
|
|
607
|
+
idleMs,
|
|
608
|
+
timeoutKind: "idle_timeout",
|
|
609
|
+
failureCategory: "timeout"
|
|
610
|
+
});
|
|
611
|
+
}
|
|
612
|
+
continue;
|
|
613
|
+
}
|
|
614
|
+
if (!waitingForFirstSignal && idleMs >= this.config.runtime.stallWarningMs) {
|
|
615
|
+
await this.store.updateTaskRun(run.taskId, run.runId, {
|
|
616
|
+
health: "suspected_stall",
|
|
617
|
+
eventType: "runtime.stall_warning"
|
|
618
|
+
});
|
|
619
|
+
continue;
|
|
620
|
+
}
|
|
621
|
+
if (run.health !== "healthy_running" && run.health !== "waiting_start") {
|
|
622
|
+
await this.store.updateTaskRun(run.taskId, run.runId, {
|
|
623
|
+
health: run.status === "sdk_starting" || run.status === "bootstrapping" || run.status === "leased"
|
|
624
|
+
? "waiting_start"
|
|
625
|
+
: "healthy_running",
|
|
626
|
+
eventType: "runtime.health_refresh"
|
|
627
|
+
});
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
async runMaintenanceCycle() {
|
|
632
|
+
await this.logHealth();
|
|
633
|
+
await this.retryFailedDeliveryAttempts();
|
|
634
|
+
}
|
|
635
|
+
async retryFailedDeliveryAttempts() {
|
|
636
|
+
if (!this.config.delivery.retry.backgroundEnabled) {
|
|
637
|
+
return;
|
|
638
|
+
}
|
|
639
|
+
const retryableAttempts = await this.store.listRetryableDeliveryAttempts(this.config.delivery.retry.backgroundScanLimit);
|
|
640
|
+
for (const attempt of retryableAttempts) {
|
|
641
|
+
const retryKey = `${attempt.taskId}:${attempt.channel}`;
|
|
642
|
+
const now = Date.now();
|
|
643
|
+
const lastAttemptAt = this.backgroundDeliveryRetryLastAttemptAt.get(retryKey) ?? 0;
|
|
644
|
+
if (now - lastAttemptAt < this.config.delivery.retry.backgroundCooldownMs) {
|
|
645
|
+
continue;
|
|
646
|
+
}
|
|
647
|
+
this.backgroundDeliveryRetryLastAttemptAt.set(retryKey, now);
|
|
648
|
+
await this.retryFailedDeliveryAttempt(attempt);
|
|
649
|
+
}
|
|
650
|
+
}
|
|
651
|
+
async retryFailedDeliveryAttempt(attempt) {
|
|
652
|
+
if (attempt.channel !== "feishu") {
|
|
653
|
+
return;
|
|
654
|
+
}
|
|
655
|
+
const bundle = await this.store.getLatestTaskDeliveryBundle(attempt.taskId);
|
|
656
|
+
if (!bundle) {
|
|
657
|
+
await this.logger.warn("task.delivery_retry.skipped", {
|
|
658
|
+
taskId: attempt.taskId,
|
|
659
|
+
channel: attempt.channel,
|
|
660
|
+
reason: "bundle_missing"
|
|
661
|
+
});
|
|
662
|
+
return;
|
|
663
|
+
}
|
|
664
|
+
const publicationAttempts = await this.store.listTaskPublicationAttempts(attempt.taskId);
|
|
665
|
+
const notifier = new FeishuNotifier({
|
|
666
|
+
publicationAttempts,
|
|
667
|
+
retryPolicy: this.config.delivery.retry
|
|
668
|
+
});
|
|
669
|
+
const retriedAttempts = await notifier.dispatch(bundle);
|
|
670
|
+
await this.store.appendTaskDeliveryAttempts(retriedAttempts);
|
|
671
|
+
const success = retriedAttempts.every((candidate) => candidate.status === "dispatched");
|
|
672
|
+
await this.logger.info("task.delivery_retry.executed", {
|
|
673
|
+
taskId: attempt.taskId,
|
|
674
|
+
channel: attempt.channel,
|
|
675
|
+
status: success ? "dispatched" : "failed",
|
|
676
|
+
retriedAttemptCount: retriedAttempts.length,
|
|
677
|
+
previousAttemptCreatedAt: attempt.createdAt,
|
|
678
|
+
...(attempt.error ? { previousError: attempt.error } : {})
|
|
679
|
+
});
|
|
680
|
+
}
|
|
681
|
+
// Health logs summarize runtime pressure and state distribution for operators.
|
|
682
|
+
async logHealth() {
|
|
683
|
+
await this.monitorActiveRuns();
|
|
684
|
+
const tasks = await this.store.listTasks();
|
|
685
|
+
const summary = tasks.reduce((accumulator, task) => {
|
|
686
|
+
accumulator[task.status] = (accumulator[task.status] ?? 0) + 1;
|
|
687
|
+
return accumulator;
|
|
688
|
+
}, {});
|
|
689
|
+
const runs = await this.store.listTaskRuns();
|
|
690
|
+
const activeRuns = runs.filter((run) => !run.endedAt);
|
|
691
|
+
const stalledRuns = activeRuns.filter((run) => run.health === "suspected_stall");
|
|
692
|
+
const recentTasks = tasks
|
|
693
|
+
.slice(-this.config.observability.maxRecentTasksInHealthLog)
|
|
694
|
+
.map((task) => `${task.taskId}:${task.status}:${task.activeRunId ?? "-"}`)
|
|
695
|
+
.join(",");
|
|
696
|
+
await this.logger.info("runtime.health", {
|
|
697
|
+
totalTasks: tasks.length,
|
|
698
|
+
queueDepth: this.eventQueue.size(),
|
|
699
|
+
activeWorkers: this.runningTaskIds.size,
|
|
700
|
+
activeRuns: activeRuns.length,
|
|
701
|
+
stalledRuns: stalledRuns.length,
|
|
702
|
+
queued: summary.queued ?? 0,
|
|
703
|
+
preparing: summary.preparing ?? 0,
|
|
704
|
+
running: summary.running ?? 0,
|
|
705
|
+
patchGenerated: summary.patch_generated ?? 0,
|
|
706
|
+
validating: summary.validating ?? 0,
|
|
707
|
+
needsHuman: summary.needs_human ?? 0,
|
|
708
|
+
failed: summary.failed ?? 0,
|
|
709
|
+
completed: summary.completed ?? 0,
|
|
710
|
+
recentTasks
|
|
711
|
+
});
|
|
712
|
+
}
|
|
713
|
+
createRuntimeStopEvent() {
|
|
714
|
+
return {
|
|
715
|
+
eventId: `event-stop-${Date.now()}`,
|
|
716
|
+
type: "task.cancel_requested",
|
|
717
|
+
content: {
|
|
718
|
+
source: "system",
|
|
719
|
+
title: "__runtime_stop__",
|
|
720
|
+
description: "runtime stop signal",
|
|
721
|
+
content: "runtime stop signal"
|
|
722
|
+
},
|
|
723
|
+
createdAt: new Date().toISOString()
|
|
724
|
+
};
|
|
725
|
+
}
|
|
726
|
+
async ensureRuntimeDirs() {
|
|
727
|
+
await rm(join(this.config.storage.rootDir, "data", "tasks.db"), { force: true });
|
|
728
|
+
await mkdir(join(this.config.storage.rootDir, "artifacts"), { recursive: true });
|
|
729
|
+
await mkdir(join(this.config.storage.rootDir, "workspaces"), { recursive: true });
|
|
730
|
+
await mkdir(this.config.skills.evolutionRootDir, { recursive: true });
|
|
731
|
+
}
|
|
732
|
+
buildTaskEventOptions(runId, sourceEventId) {
|
|
733
|
+
const options = {};
|
|
734
|
+
if (runId) {
|
|
735
|
+
options.runId = runId;
|
|
736
|
+
}
|
|
737
|
+
if (sourceEventId) {
|
|
738
|
+
options.sourceEventId = sourceEventId;
|
|
739
|
+
}
|
|
740
|
+
return options;
|
|
741
|
+
}
|
|
742
|
+
// Manual submission writes into the same inbox path consumed by the resident runtime.
|
|
743
|
+
async writeManualSubmission(payload) {
|
|
744
|
+
await this.cliFileIntake.init();
|
|
745
|
+
const path = this.cliFileIntake.buildInboxFilePath();
|
|
746
|
+
await writeFile(path, JSON.stringify(payload, null, 2), "utf8");
|
|
747
|
+
await this.cliFileIntake.touchSignal();
|
|
748
|
+
return path;
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
//# sourceMappingURL=optimus-runtime.js.map
|