@elisym/cli 0.13.0 → 0.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -8,7 +8,7 @@ import { generateSecretKey, getPublicKey, nip19, verifyEvent } from 'nostr-tools
8
8
  import YAML from 'yaml';
9
9
  import { Command } from 'commander';
10
10
  import { createHash } from 'node:crypto';
11
- import { LlmHealthMonitor, startLlmRecovery, createFreeLlmLimiterSet, ScriptBillingExhaustedError, FREE_LLM_GLOBAL_KEY, freeLlmCustomerKey } from '@elisym/sdk/llm-health';
11
+ import { LlmHealthMonitor, startLlmRecovery, createFreeLlmLimiterSet, ScriptBillingExhaustedError, FREE_LLM_GLOBAL_KEY, freeLlmCustomerKey, LlmHealthError } from '@elisym/sdk/llm-health';
12
12
  import { lookup } from 'node:dns/promises';
13
13
  import { Socket } from 'node:net';
14
14
  import pino from 'pino';
@@ -1959,6 +1959,7 @@ var payment = new SolanaPaymentStrategy();
1959
1959
  var LEDGER_GC_INTERVAL_MS = 60 * 60 * 1e3;
1960
1960
  var LEDGER_RETENTION_MS = 30 * 24 * 60 * 60 * 1e3;
1961
1961
  var TOTAL_JOB_TIMEOUT_MS = 5 * 60 * 1e3;
1962
+ var MAX_PAID_AGE_MS = 24 * 60 * 60 * 1e3;
1962
1963
  var SIG_PATH_TIMEOUT_MS = 60 * 1e3;
1963
1964
  function resolveJobPrice(tags, skills) {
1964
1965
  const skill = skills.route(tags);
@@ -1969,6 +1970,22 @@ function resolveJobAsset(tags, skills) {
1969
1970
  return skill?.asset ?? NATIVE_SOL;
1970
1971
  }
1971
1972
  var BILLING_BODY_MARKERS3 = ["credit balance", "billing", "insufficient", "insufficient_quota"];
1973
+ var SCRIPT_BILLING_INVALID_MARKERS = [
1974
+ "credit balance",
1975
+ "billing",
1976
+ "insufficient",
1977
+ "insufficient_quota",
1978
+ "x-api-key",
1979
+ "invalid api key",
1980
+ "invalid_api_key",
1981
+ "authentication_error",
1982
+ "unauthorized",
1983
+ "unauthenticated"
1984
+ ];
1985
+ function scriptMessageLooksLikeBillingOrInvalid(message) {
1986
+ const lower = message.toLowerCase();
1987
+ return SCRIPT_BILLING_INVALID_MARKERS.some((marker) => lower.includes(marker));
1988
+ }
1972
1989
  var AGENT_UNAVAILABLE_MESSAGE = "Agent temporarily unavailable";
1973
1990
  var AgentUnavailableError = class extends Error {
1974
1991
  constructor() {
@@ -2069,6 +2086,32 @@ var AgentRuntime = class {
2069
2086
  * Anything else is a transient/skill error and does NOT touch health
2070
2087
  * state - the recovery loop should not be poisoned by skill bugs.
2071
2088
  */
2089
+ /**
2090
+ * Build a "and N other model(s) for the same provider" suffix for
2091
+ * cascade-narrating log lines. The SDK monitor cascades `invalid` /
2092
+ * `billing` flips across every sibling pair sharing the same provider
2093
+ * (shared API key); this helper just narrates that to the operator log
2094
+ * so they can see why unrelated skills are now refusing jobs.
2095
+ */
2096
+ cascadeSuffix(provider, triggeringModel) {
2097
+ if (!this.healthMonitor) {
2098
+ return "";
2099
+ }
2100
+ let siblings = 0;
2101
+ for (const entry of this.healthMonitor.snapshot()) {
2102
+ if (entry.provider !== provider) {
2103
+ continue;
2104
+ }
2105
+ if (entry.model === triggeringModel) {
2106
+ continue;
2107
+ }
2108
+ siblings += 1;
2109
+ }
2110
+ if (siblings === 0) {
2111
+ return "";
2112
+ }
2113
+ return ` (cascading to ${siblings} other model(s) for ${provider} sharing the same API key)`;
2114
+ }
2072
2115
  markHealthFromExecuteError(skill, err, log, jobId) {
2073
2116
  if (!this.healthMonitor) {
2074
2117
  return false;
@@ -2084,7 +2127,7 @@ var AgentRuntime = class {
2084
2127
  return false;
2085
2128
  }
2086
2129
  log(
2087
- `${tag} Script signaled billing-exhausted (exit ${err.exitCode}). Marking ${provider}/${model} unhealthy; future jobs against this pair will be refused until recovery probe succeeds.`
2130
+ `${tag} Script signaled billing-exhausted (exit ${err.exitCode}). Marking ${provider}/${model} unhealthy${this.cascadeSuffix(provider, model)}; future jobs against this pair will be refused until recovery probe succeeds.`
2088
2131
  );
2089
2132
  this.healthMonitor.markUnhealthyFromJob(provider, model, "billing", err.message);
2090
2133
  return true;
@@ -2108,11 +2151,32 @@ var AgentRuntime = class {
2108
2151
  const provider = skill.resolvedTriple.provider;
2109
2152
  const model = skill.resolvedTriple.model;
2110
2153
  log(
2111
- `${tag} LLM provider returned HTTP ${status} (${reason}). Marking ${provider}/${model} unhealthy; future jobs against this pair will be refused until recovery probe succeeds.`
2154
+ `${tag} LLM provider returned HTTP ${status} (${reason}). Marking ${provider}/${model} unhealthy${this.cascadeSuffix(provider, model)}; future jobs against this pair will be refused until recovery probe succeeds.`
2112
2155
  );
2113
2156
  this.healthMonitor.markUnhealthyFromJob(provider, model, reason, body);
2114
2157
  return true;
2115
2158
  }
2159
+ if (skill.mode !== "llm") {
2160
+ const message = err instanceof Error ? err.message : String(err);
2161
+ if (!scriptMessageLooksLikeBillingOrInvalid(message)) {
2162
+ return false;
2163
+ }
2164
+ const provider = skill.llmOverride?.provider;
2165
+ const model = skill.llmOverride?.model;
2166
+ if (!provider || !model) {
2167
+ log(
2168
+ `${tag} Script failure looks like billing/invalid ("${message.slice(0, 120)}") but skill "${skill.name}" did not declare provider/model in SKILL.md - cannot gate future jobs.`
2169
+ );
2170
+ return false;
2171
+ }
2172
+ const lower = message.toLowerCase();
2173
+ const reason = lower.includes("credit balance") || lower.includes("billing") || lower.includes("insufficient") ? "billing" : "invalid";
2174
+ log(
2175
+ `${tag} Script failure carries ${reason} signal in stderr. Marking ${provider}/${model} unhealthy${this.cascadeSuffix(provider, model)}; future jobs against this pair will be refused until recovery probe succeeds.`
2176
+ );
2177
+ this.healthMonitor.markUnhealthyFromJob(provider, model, reason, message.slice(0, 200));
2178
+ return true;
2179
+ }
2116
2180
  return false;
2117
2181
  }
2118
2182
  /** Fetch on-chain protocol config (fee, treasury). Always fetches fresh to avoid stale treasury. */
@@ -2276,9 +2340,15 @@ var AgentRuntime = class {
2276
2340
  const log = this.callbacks.onLog ?? console.log;
2277
2341
  log(`[${job.jobId.slice(0, 8)}] Error: ${e.message}`);
2278
2342
  const currentStatus = this.ledger.getStatus(job.jobId);
2279
- if (currentStatus !== "executed") {
2343
+ const keepPaidForRecovery = e instanceof AgentUnavailableError && currentStatus === "paid";
2344
+ if (currentStatus !== "executed" && !keepPaidForRecovery) {
2280
2345
  this.ledger.markFailed(job.jobId);
2281
2346
  }
2347
+ if (keepPaidForRecovery) {
2348
+ log(
2349
+ `[${job.jobId.slice(0, 8)}] Keeping status=paid; recovery will re-execute when LLM pair recovers (24h cutoff).`
2350
+ );
2351
+ }
2282
2352
  this.callbacks.onJobError?.(job.jobId, e.message);
2283
2353
  let safeMessage;
2284
2354
  if (e instanceof AgentUnavailableError) {
@@ -2543,9 +2613,35 @@ var AgentRuntime = class {
2543
2613
  }
2544
2614
  const log = this.callbacks.onLog ?? console.log;
2545
2615
  log(`Recovering ${pending.length} pending jobs...`);
2616
+ if (this.healthMonitor) {
2617
+ const snap = this.healthMonitor.snapshot();
2618
+ const unhealthyKeys = new Set(
2619
+ snap.filter(
2620
+ (entry) => entry.status === "invalid" || entry.status === "billing" || entry.status === "unavailable"
2621
+ ).map((entry) => `${entry.provider}::${entry.model}`)
2622
+ );
2623
+ if (unhealthyKeys.size > 0) {
2624
+ const hasPaidOnUnhealthy = pending.some((entry) => {
2625
+ if (entry.status !== "paid") {
2626
+ return false;
2627
+ }
2628
+ const skill = this.skills.route(entry.tags);
2629
+ const pair = resolveHealthPair(skill);
2630
+ return pair !== null && unhealthyKeys.has(`${pair.provider}::${pair.model}`);
2631
+ });
2632
+ if (hasPaidOnUnhealthy) {
2633
+ void this.healthMonitor.refreshUnhealthy().catch(() => {
2634
+ });
2635
+ }
2636
+ }
2637
+ }
2546
2638
  for (const entry of pending) {
2547
- if (entry.retry_count >= this.config.recoveryMaxRetries) {
2639
+ const ageMs = (Math.floor(Date.now() / 1e3) - entry.created_at) * 1e3;
2640
+ const expired = ageMs > MAX_PAID_AGE_MS;
2641
+ const exhaustedRetries = entry.retry_count >= this.config.recoveryMaxRetries;
2642
+ if (expired || exhaustedRetries) {
2548
2643
  this.ledger.markFailed(entry.job_id);
2644
+ const reason = expired ? "Job permanently failed: agent did not recover within 24 hours" : "Job permanently failed after maximum retries";
2549
2645
  if (entry.raw_event_json) {
2550
2646
  try {
2551
2647
  const rawEvent = JSON.parse(entry.raw_event_json);
@@ -2559,7 +2655,7 @@ var AgentRuntime = class {
2559
2655
  encrypted: false,
2560
2656
  rawEvent
2561
2657
  },
2562
- { type: "error", message: "Job permanently failed after maximum retries" }
2658
+ { type: "error", message: reason }
2563
2659
  ).catch(() => {
2564
2660
  });
2565
2661
  } catch {
@@ -2592,7 +2688,6 @@ var AgentRuntime = class {
2592
2688
  this.jobAbortControllers.add(recoveryAbort);
2593
2689
  const timeout = setTimeout(() => recoveryAbort.abort(), TOTAL_JOB_TIMEOUT_MS);
2594
2690
  try {
2595
- this.ledger.incrementRetry(entry.job_id);
2596
2691
  const rawEvent = JSON.parse(entry.raw_event_json);
2597
2692
  const fakeJob = {
2598
2693
  jobId: entry.job_id,
@@ -2604,6 +2699,7 @@ var AgentRuntime = class {
2604
2699
  rawEvent
2605
2700
  };
2606
2701
  if (entry.status === "executed" && entry.result !== void 0) {
2702
+ this.ledger.incrementRetry(entry.job_id);
2607
2703
  await this.transport.deliverResult(fakeJob, entry.result, entry.net_amount);
2608
2704
  this.ledger.markDelivered(entry.job_id);
2609
2705
  log(`[${entry.job_id.slice(0, 8)}] Recovery: re-delivered`);
@@ -2614,6 +2710,21 @@ var AgentRuntime = class {
2614
2710
  this.ledger.markFailed(entry.job_id);
2615
2711
  return;
2616
2712
  }
2713
+ const healthPair = resolveHealthPair(skill);
2714
+ if (this.healthMonitor && healthPair) {
2715
+ try {
2716
+ await this.healthMonitor.assertReady(healthPair.provider, healthPair.model);
2717
+ } catch (err) {
2718
+ if (err instanceof LlmHealthError) {
2719
+ log(
2720
+ `[${entry.job_id.slice(0, 8)}] Recovery: pair ${healthPair.provider}/${healthPair.model} still unhealthy (${err.reason}); waiting for recovery probe.`
2721
+ );
2722
+ return;
2723
+ }
2724
+ throw err;
2725
+ }
2726
+ }
2727
+ this.ledger.incrementRetry(entry.job_id);
2617
2728
  if (skill.priceSubunits > 0 && !entry.net_amount) {
2618
2729
  if (entry.payment_request) {
2619
2730
  const verified = await this.reVerifyPayment(