@elisym/cli 0.16.0 → 0.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -406,7 +406,16 @@ function createOpenAICompatibleProvider(config) {
406
406
  },
407
407
  body: JSON.stringify({
408
408
  model,
409
- max_tokens: 1,
409
+ // Some xAI / DeepSeek model variants are reasoning models that
410
+ // burn output tokens on internal chain-of-thought before any
411
+ // visible reply; with `max_tokens: 1` the API returns HTTP 400
412
+ // and we falsely flip the (provider, model) pair to unhealthy
413
+ // on a valid key. 256 leaves room for the minimum reasoning
414
+ // budget on every current variant; for non-reasoning models
415
+ // the probe still stops at the first natural token, so the
416
+ // extra ceiling does not change the per-probe cost in
417
+ // practice.
418
+ max_tokens: 256,
410
419
  messages: [{ role: "user", content: "." }]
411
420
  })
412
421
  },
@@ -693,7 +702,17 @@ async function verifyKeyDeep2(apiKey, model, signal) {
693
702
  },
694
703
  body: JSON.stringify({
695
704
  model,
696
- ...reasoning ? { max_completion_tokens: 1 } : { max_tokens: 1 },
705
+ // GPT-5 reasoning models count internal chain-of-thought tokens
706
+ // against `max_completion_tokens`. With a budget of 1 the model
707
+ // exhausts it on reasoning before producing any visible content
708
+ // and the API responds HTTP 400 ("max_tokens reached") - which
709
+ // would falsely flip the (provider, model) pair to unhealthy on
710
+ // an otherwise valid key. 256 covers a "." prompt's reasoning
711
+ // budget on every current GPT-5 variant we tested with margin;
712
+ // non-reasoning models still take max_tokens: 1 since they emit
713
+ // visible content directly. Probe cost stays sub-cent per
714
+ // startup probe (reasoning tokens are billed at output rates).
715
+ ...reasoning ? { max_completion_tokens: 256 } : { max_tokens: 1 },
697
716
  messages: [{ role: "user", content: "." }]
698
717
  })
699
718
  },
@@ -2158,23 +2177,26 @@ var AgentRuntime = class {
2158
2177
  }
2159
2178
  if (skill.mode !== "llm") {
2160
2179
  const message = err instanceof Error ? err.message : String(err);
2161
- if (!scriptMessageLooksLikeBillingOrInvalid(message)) {
2162
- return false;
2163
- }
2164
2180
  const provider = skill.llmOverride?.provider;
2165
2181
  const model = skill.llmOverride?.model;
2166
2182
  if (!provider || !model) {
2167
2183
  log(
2168
- `${tag} Script failure looks like billing/invalid ("${message.slice(0, 120)}") but skill "${skill.name}" did not declare provider/model in SKILL.md - cannot gate future jobs.`
2184
+ `${tag} Script "${skill.name}" failed ("${message.slice(0, 120)}") but did not declare provider/model in SKILL.md - cannot gate future jobs.`
2169
2185
  );
2170
2186
  return false;
2171
2187
  }
2172
2188
  const lower = message.toLowerCase();
2173
- const reason = lower.includes("credit balance") || lower.includes("billing") || lower.includes("insufficient") ? "billing" : "invalid";
2189
+ const looksBillingOrInvalid = scriptMessageLooksLikeBillingOrInvalid(message);
2190
+ const reason = looksBillingOrInvalid && (lower.includes("credit balance") || lower.includes("billing") || lower.includes("insufficient")) ? "billing" : "invalid";
2191
+ const cascade = looksBillingOrInvalid;
2192
+ const cascadeNote = cascade ? this.cascadeSuffix(provider, model) : " (no cascade)";
2193
+ const signalNote = looksBillingOrInvalid ? `${reason} signal in stderr` : `generic exit (no billing/invalid markers, classified as ${reason}, skill-local)`;
2174
2194
  log(
2175
- `${tag} Script failure carries ${reason} signal in stderr. Marking ${provider}/${model} unhealthy${this.cascadeSuffix(provider, model)}; future jobs against this pair will be refused until recovery probe succeeds.`
2195
+ `${tag} Script failure (${signalNote}). Marking ${provider}/${model} unhealthy${cascadeNote}; future jobs against this pair will be refused until recovery probe succeeds.`
2176
2196
  );
2177
- this.healthMonitor.markUnhealthyFromJob(provider, model, reason, message.slice(0, 200));
2197
+ this.healthMonitor.markUnhealthyFromJob(provider, model, reason, message.slice(0, 200), {
2198
+ cascade
2199
+ });
2178
2200
  return true;
2179
2201
  }
2180
2202
  return false;