@elisym/cli 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -8,7 +8,7 @@ import { generateSecretKey, getPublicKey, nip19, verifyEvent } from 'nostr-tools
8
8
  import YAML from 'yaml';
9
9
  import { Command } from 'commander';
10
10
  import { createHash } from 'node:crypto';
11
- import { LlmHealthMonitor, startLlmRecovery, createFreeLlmLimiterSet, ScriptBillingExhaustedError, FREE_LLM_GLOBAL_KEY, freeLlmCustomerKey } from '@elisym/sdk/llm-health';
11
+ import { LlmHealthMonitor, startLlmRecovery, createFreeLlmLimiterSet, ScriptBillingExhaustedError, FREE_LLM_GLOBAL_KEY, freeLlmCustomerKey, LlmHealthError } from '@elisym/sdk/llm-health';
12
12
  import { lookup } from 'node:dns/promises';
13
13
  import { Socket } from 'node:net';
14
14
  import pino from 'pino';
@@ -1959,6 +1959,7 @@ var payment = new SolanaPaymentStrategy();
1959
1959
  var LEDGER_GC_INTERVAL_MS = 60 * 60 * 1e3;
1960
1960
  var LEDGER_RETENTION_MS = 30 * 24 * 60 * 60 * 1e3;
1961
1961
  var TOTAL_JOB_TIMEOUT_MS = 5 * 60 * 1e3;
1962
+ var MAX_PAID_AGE_MS = 24 * 60 * 60 * 1e3;
1962
1963
  var SIG_PATH_TIMEOUT_MS = 60 * 1e3;
1963
1964
  function resolveJobPrice(tags, skills) {
1964
1965
  const skill = skills.route(tags);
@@ -2276,9 +2277,15 @@ var AgentRuntime = class {
2276
2277
  const log = this.callbacks.onLog ?? console.log;
2277
2278
  log(`[${job.jobId.slice(0, 8)}] Error: ${e.message}`);
2278
2279
  const currentStatus = this.ledger.getStatus(job.jobId);
2279
- if (currentStatus !== "executed") {
2280
+ const keepPaidForRecovery = e instanceof AgentUnavailableError && currentStatus === "paid";
2281
+ if (currentStatus !== "executed" && !keepPaidForRecovery) {
2280
2282
  this.ledger.markFailed(job.jobId);
2281
2283
  }
2284
+ if (keepPaidForRecovery) {
2285
+ log(
2286
+ `[${job.jobId.slice(0, 8)}] Keeping status=paid; recovery will re-execute when LLM pair recovers (24h cutoff).`
2287
+ );
2288
+ }
2282
2289
  this.callbacks.onJobError?.(job.jobId, e.message);
2283
2290
  let safeMessage;
2284
2291
  if (e instanceof AgentUnavailableError) {
@@ -2543,9 +2550,35 @@ var AgentRuntime = class {
2543
2550
  }
2544
2551
  const log = this.callbacks.onLog ?? console.log;
2545
2552
  log(`Recovering ${pending.length} pending jobs...`);
2553
+ if (this.healthMonitor) {
2554
+ const snap = this.healthMonitor.snapshot();
2555
+ const unhealthyKeys = new Set(
2556
+ snap.filter(
2557
+ (entry) => entry.status === "invalid" || entry.status === "billing" || entry.status === "unavailable"
2558
+ ).map((entry) => `${entry.provider}::${entry.model}`)
2559
+ );
2560
+ if (unhealthyKeys.size > 0) {
2561
+ const hasPaidOnUnhealthy = pending.some((entry) => {
2562
+ if (entry.status !== "paid") {
2563
+ return false;
2564
+ }
2565
+ const skill = this.skills.route(entry.tags);
2566
+ const pair = resolveHealthPair(skill);
2567
+ return pair !== null && unhealthyKeys.has(`${pair.provider}::${pair.model}`);
2568
+ });
2569
+ if (hasPaidOnUnhealthy) {
2570
+ void this.healthMonitor.refreshUnhealthy().catch(() => {
2571
+ });
2572
+ }
2573
+ }
2574
+ }
2546
2575
  for (const entry of pending) {
2547
- if (entry.retry_count >= this.config.recoveryMaxRetries) {
2576
+ const ageMs = (Math.floor(Date.now() / 1e3) - entry.created_at) * 1e3;
2577
+ const expired = ageMs > MAX_PAID_AGE_MS;
2578
+ const exhaustedRetries = entry.retry_count >= this.config.recoveryMaxRetries;
2579
+ if (expired || exhaustedRetries) {
2548
2580
  this.ledger.markFailed(entry.job_id);
2581
+ const reason = expired ? "Job permanently failed: agent did not recover within 24 hours" : "Job permanently failed after maximum retries";
2549
2582
  if (entry.raw_event_json) {
2550
2583
  try {
2551
2584
  const rawEvent = JSON.parse(entry.raw_event_json);
@@ -2559,7 +2592,7 @@ var AgentRuntime = class {
2559
2592
  encrypted: false,
2560
2593
  rawEvent
2561
2594
  },
2562
- { type: "error", message: "Job permanently failed after maximum retries" }
2595
+ { type: "error", message: reason }
2563
2596
  ).catch(() => {
2564
2597
  });
2565
2598
  } catch {
@@ -2592,7 +2625,6 @@ var AgentRuntime = class {
2592
2625
  this.jobAbortControllers.add(recoveryAbort);
2593
2626
  const timeout = setTimeout(() => recoveryAbort.abort(), TOTAL_JOB_TIMEOUT_MS);
2594
2627
  try {
2595
- this.ledger.incrementRetry(entry.job_id);
2596
2628
  const rawEvent = JSON.parse(entry.raw_event_json);
2597
2629
  const fakeJob = {
2598
2630
  jobId: entry.job_id,
@@ -2604,6 +2636,7 @@ var AgentRuntime = class {
2604
2636
  rawEvent
2605
2637
  };
2606
2638
  if (entry.status === "executed" && entry.result !== void 0) {
2639
+ this.ledger.incrementRetry(entry.job_id);
2607
2640
  await this.transport.deliverResult(fakeJob, entry.result, entry.net_amount);
2608
2641
  this.ledger.markDelivered(entry.job_id);
2609
2642
  log(`[${entry.job_id.slice(0, 8)}] Recovery: re-delivered`);
@@ -2614,6 +2647,21 @@ var AgentRuntime = class {
2614
2647
  this.ledger.markFailed(entry.job_id);
2615
2648
  return;
2616
2649
  }
2650
+ const healthPair = resolveHealthPair(skill);
2651
+ if (this.healthMonitor && healthPair) {
2652
+ try {
2653
+ await this.healthMonitor.assertReady(healthPair.provider, healthPair.model);
2654
+ } catch (err) {
2655
+ if (err instanceof LlmHealthError) {
2656
+ log(
2657
+ `[${entry.job_id.slice(0, 8)}] Recovery: pair ${healthPair.provider}/${healthPair.model} still unhealthy (${err.reason}); waiting for recovery probe.`
2658
+ );
2659
+ return;
2660
+ }
2661
+ throw err;
2662
+ }
2663
+ }
2664
+ this.ledger.incrementRetry(entry.job_id);
2617
2665
  if (skill.priceSubunits > 0 && !entry.net_amount) {
2618
2666
  if (entry.payment_request) {
2619
2667
  const verified = await this.reVerifyPayment(