@iamoberlin/chorus 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/scheduler.ts CHANGED
@@ -8,7 +8,9 @@
8
8
  import type { OpenClawPluginService, PluginLogger } from "openclaw/plugin-sdk";
9
9
  import type { ChorusConfig } from "./config.js";
10
10
  import { CHOIRS, shouldRunChoir, CASCADE_ORDER, type Choir } from "./choirs.js";
11
+ import { deliverChoirOutput } from "./delivery.js";
11
12
  import { recordExecution, type ChoirExecution } from "./metrics.js";
13
+ import { recordCost, estimateCost } from "./economics.js";
12
14
  import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
13
15
  import { join } from "path";
14
16
  import { homedir } from "os";
@@ -22,8 +24,8 @@ interface AgentTurnResult {
22
24
  }
23
25
 
24
26
  // ── Delivery ─────────────────────────────────────────────────
25
- // Choir agents handle their own delivery via OpenClaw messaging tools.
26
- // The scheduler's job is execution and scheduling not routing messages.
27
+ // Delivery is handled via shared channel adapter to keep behavior consistent
28
+ // across scheduler and manual CLI execution paths.
27
29
 
28
30
  interface ChoirContext {
29
31
  choirId: string;
@@ -41,6 +43,29 @@ interface ChoirRunState {
41
43
  const CHORUS_DIR = join(homedir(), ".chorus");
42
44
  const RUN_STATE_PATH = join(CHORUS_DIR, "run-state.json");
43
45
 
46
+ // ── 429 Circuit Breaker ──────────────────────────────────────
47
+ // Exponential backoff on rate limit errors. Skips choir execution
48
+ // when backing off, resets on successful runs.
49
+ interface CircuitBreakerState {
50
+ backoffUntil: number; // timestamp ms — skip all choirs until this time
51
+ consecutiveFailures: number;
52
+ }
53
+
54
+ const MAX_BACKOFF_MS = 16 * 60 * 1000; // 16 minutes cap
55
+ const BASE_BACKOFF_MS = 60 * 1000; // 1 minute base
56
+
57
+ function isRateLimitError(err: unknown): boolean {
58
+ const msg = String(err).toLowerCase();
59
+ return msg.includes("429") || msg.includes("rate limit") || msg.includes("rate_limit") || msg.includes("too many requests");
60
+ }
61
+
62
+ function computeBackoffMs(failures: number): number {
63
+ const ms = BASE_BACKOFF_MS * Math.pow(2, Math.min(failures - 1, 4));
64
+ // Add jitter: ±25%
65
+ const jitter = ms * 0.25 * (Math.random() * 2 - 1);
66
+ return Math.min(ms + jitter, MAX_BACKOFF_MS);
67
+ }
68
+
44
69
  // Load persisted run state from disk
45
70
  function loadRunState(log: PluginLogger): Map<string, ChoirRunState> {
46
71
  const state = new Map<string, ChoirRunState>();
@@ -106,12 +131,15 @@ export function createChoirScheduler(
106
131
  // Load persisted state instead of starting fresh
107
132
  const runState = loadRunState(log);
108
133
 
134
+ // Circuit breaker state
135
+ const circuitBreaker: CircuitBreakerState = { backoffUntil: 0, consecutiveFailures: 0 };
136
+
109
137
  // CLI fallback for executing choirs when plugin API is unavailable
110
138
  async function executeChoirViaCli(choir: Choir, prompt: string): Promise<string> {
111
139
  const result = await new Promise<{ status: number | null; stdout: string; stderr: string }>((resolve) => {
112
140
  const child = spawn('openclaw', [
113
141
  'agent',
114
- '--session-id', `chorus:${choir.id}`,
142
+ '--session-id', `chorus-${choir.id}`,
115
143
  '--message', prompt,
116
144
  '--json',
117
145
  ], { stdio: ['pipe', 'pipe', 'pipe'] });
@@ -190,7 +218,7 @@ export function createChoirScheduler(
190
218
  if (typeof api.runAgentTurn === 'function') {
191
219
  try {
192
220
  const result: AgentTurnResult = await api.runAgentTurn({
193
- sessionLabel: `chorus:${choir.id}`,
221
+ sessionLabel: `chorus-${choir.id}`,
194
222
  message: prompt,
195
223
  isolated: true,
196
224
  timeoutSeconds: 300,
@@ -208,6 +236,17 @@ export function createChoirScheduler(
208
236
  output = result.text;
209
237
  }
210
238
  } catch (apiErr) {
239
+ if (isRateLimitError(apiErr)) {
240
+ circuitBreaker.consecutiveFailures++;
241
+ const backoffMs = computeBackoffMs(circuitBreaker.consecutiveFailures);
242
+ circuitBreaker.backoffUntil = Date.now() + backoffMs;
243
+ log.warn(`[chorus] ⚡ 429 RATE LIMIT on ${choir.name} — backing off ${(backoffMs/1000).toFixed(0)}s (failure #${circuitBreaker.consecutiveFailures})`);
244
+ execution.durationMs = Date.now() - startTime;
245
+ execution.success = false;
246
+ execution.error = "429_rate_limit_backoff";
247
+ recordExecution(execution);
248
+ return; // Skip this choir and remaining choirs will be skipped by checkAndRunChoirs
249
+ }
211
250
  log.warn(`[chorus] API runAgentTurn failed for ${choir.name}, falling back to CLI: ${apiErr}`);
212
251
  output = await executeChoirViaCli(choir, prompt);
213
252
  }
@@ -221,6 +260,21 @@ export function createChoirScheduler(
221
260
  execution.outputLength = output.length;
222
261
  execution.tokensUsed = estimateTokens(output);
223
262
 
263
+ // Record economics cost (only for runs that produced output)
264
+ if (execution.success) {
265
+ const inputTokensEst = Math.ceil(prompt.length / 4);
266
+ const outputTokensEst = execution.tokensUsed || Math.ceil(output.length / 4);
267
+ recordCost({
268
+ choirId: choir.id,
269
+ timestamp: new Date().toISOString(),
270
+ inputTokens: inputTokensEst,
271
+ outputTokens: outputTokensEst,
272
+ model: "default",
273
+ inferenceMs: execution.durationMs,
274
+ costUsd: estimateCost(inputTokensEst, outputTokensEst),
275
+ });
276
+ }
277
+
224
278
  // Parse output for metrics (findings, alerts, improvements)
225
279
  execution.findings = countFindings(output);
226
280
  execution.alerts = countAlerts(output);
@@ -243,8 +297,18 @@ export function createChoirScheduler(
243
297
  // Persist state to disk after each run
244
298
  saveRunState(runState, log);
245
299
 
300
+ // Reset circuit breaker on success
301
+ if (circuitBreaker.consecutiveFailures > 0) {
302
+ log.info(`[chorus] ✅ Circuit breaker reset after ${circuitBreaker.consecutiveFailures} failures`);
303
+ circuitBreaker.consecutiveFailures = 0;
304
+ circuitBreaker.backoffUntil = 0;
305
+ }
306
+
246
307
  log.info(`[chorus] ${choir.emoji} ${choir.name} completed (${(execution.durationMs/1000).toFixed(1)}s)`);
247
308
 
309
+ // Deliver output via shared adapter for consistent routing/format behavior
310
+ await deliverChoirOutput(api, choir, output, log);
311
+
248
312
  // Log illumination flow
249
313
  if (choir.passesTo.length > 0) {
250
314
  log.debug(`[chorus] Illumination ready for: ${choir.passesTo.join(", ")}`);
@@ -318,10 +382,20 @@ export function createChoirScheduler(
318
382
  return improvements.slice(0, 5); // Cap at 5
319
383
  }
320
384
 
385
+ // Track choirs currently executing to prevent duplicate runs
386
+ const executing = new Set<string>();
387
+
321
388
  // Check and run due choirs
322
389
  async function checkAndRunChoirs(): Promise<void> {
323
390
  const now = new Date();
324
391
 
392
+ // Circuit breaker: skip entire cycle if backing off from rate limit
393
+ if (Date.now() < circuitBreaker.backoffUntil) {
394
+ const remainingSec = ((circuitBreaker.backoffUntil - Date.now()) / 1000).toFixed(0);
395
+ log.debug(`[chorus] ⚡ Circuit breaker active — ${remainingSec}s remaining`);
396
+ return;
397
+ }
398
+
325
399
  // Check choirs in cascade order (important for illumination flow)
326
400
  for (const choirId of CASCADE_ORDER) {
327
401
  const choir = CHOIRS[choirId];
@@ -332,10 +406,20 @@ export function createChoirScheduler(
332
406
  continue;
333
407
  }
334
408
 
409
+ // Skip if already executing (prevents duplicate runs for slow choirs)
410
+ if (executing.has(choirId)) {
411
+ continue;
412
+ }
413
+
335
414
  // Check if due based on interval
336
415
  const state = runState.get(choirId);
337
416
  if (shouldRunChoir(choir, now, state?.lastRun)) {
338
- await executeChoir(choir);
417
+ executing.add(choirId);
418
+ try {
419
+ await executeChoir(choir);
420
+ } finally {
421
+ executing.delete(choirId);
422
+ }
339
423
  }
340
424
  }
341
425
  }