@elisym/cli 0.17.0 → 0.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -406,7 +406,16 @@ function createOpenAICompatibleProvider(config) {
406
406
  },
407
407
  body: JSON.stringify({
408
408
  model,
409
- max_tokens: 1,
409
+ // Some xAI / DeepSeek model variants are reasoning models that
410
+ // burn output tokens on internal chain-of-thought before any
411
+ // visible reply; with `max_tokens: 1` the API returns HTTP 400
412
+ // and we falsely flip the (provider, model) pair to unhealthy
413
+ // on a valid key. 256 leaves room for the minimum reasoning
414
+ // budget on every current variant; for non-reasoning models
415
+ // the probe still stops at the first natural token, so the
416
+ // extra ceiling does not change the per-probe cost in
417
+ // practice.
418
+ max_tokens: 256,
410
419
  messages: [{ role: "user", content: "." }]
411
420
  })
412
421
  },
@@ -693,7 +702,17 @@ async function verifyKeyDeep2(apiKey, model, signal) {
693
702
  },
694
703
  body: JSON.stringify({
695
704
  model,
696
- ...reasoning ? { max_completion_tokens: 1 } : { max_tokens: 1 },
705
+ // GPT-5 reasoning models count internal chain-of-thought tokens
706
+ // against `max_completion_tokens`. With a budget of 1 the model
707
+ // exhausts it on reasoning before producing any visible content
708
+ // and the API responds HTTP 400 ("max_tokens reached") - which
709
+ // would falsely flip the (provider, model) pair to unhealthy on
710
+ // an otherwise valid key. 256 covers a "." prompt's reasoning
711
+ // budget on every current GPT-5 variant we tested with margin;
712
+ // non-reasoning models still take max_tokens: 1 since they emit
713
+ // visible content directly. Probe cost stays sub-cent per
714
+ // startup probe (reasoning tokens are billed at output rates).
715
+ ...reasoning ? { max_completion_tokens: 256 } : { max_tokens: 1 },
697
716
  messages: [{ role: "user", content: "." }]
698
717
  })
699
718
  },
@@ -2016,8 +2035,10 @@ function resolveHealthPair(skill) {
2016
2035
  return null;
2017
2036
  }
2018
2037
  var RATE_LIMIT_WINDOW_MS = 10 * 60 * 1e3;
2019
- var MAX_JOBS_PER_CUSTOMER = 20;
2020
- var GLOBAL_MAX_JOBS_PER_WINDOW = 200;
2038
+ var FREE_MAX_JOBS_PER_CUSTOMER = 20;
2039
+ var FREE_GLOBAL_MAX_JOBS_PER_WINDOW = 200;
2040
+ var PAID_MAX_JOBS_PER_CUSTOMER = 200;
2041
+ var PAID_GLOBAL_MAX_JOBS_PER_WINDOW = 2e3;
2021
2042
  var MAX_TRACKED_CUSTOMERS = 1e3;
2022
2043
  var GLOBAL_LIMITER_KEY = "__global__";
2023
2044
  var AgentRuntime = class {
@@ -2041,16 +2062,32 @@ var AgentRuntime = class {
2041
2062
  recoveryInterval = null;
2042
2063
  gcInterval = null;
2043
2064
  stopped = false;
2044
- /** Per-customer sliding-window rate limiter (keyed on customer pubkey). */
2045
- customerLimiter = createSlidingWindowLimiter({
2065
+ /** Per-customer sliding-window rate limiter for free skills. */
2066
+ freeCustomerLimiter = createSlidingWindowLimiter({
2046
2067
  windowMs: RATE_LIMIT_WINDOW_MS,
2047
- maxPerWindow: MAX_JOBS_PER_CUSTOMER,
2068
+ maxPerWindow: FREE_MAX_JOBS_PER_CUSTOMER,
2048
2069
  maxKeys: MAX_TRACKED_CUSTOMERS
2049
2070
  });
2050
- /** Global sliding-window rate limiter (Sybil protection). */
2051
- globalLimiter = createSlidingWindowLimiter({
2071
+ /** Global sliding-window rate limiter for free skills (Sybil protection). */
2072
+ freeGlobalLimiter = createSlidingWindowLimiter({
2052
2073
  windowMs: RATE_LIMIT_WINDOW_MS,
2053
- maxPerWindow: GLOBAL_MAX_JOBS_PER_WINDOW,
2074
+ maxPerWindow: FREE_GLOBAL_MAX_JOBS_PER_WINDOW,
2075
+ maxKeys: 1
2076
+ });
2077
+ /**
2078
+ * Per-customer sliding-window limiter for paid skills (10x looser than free).
2079
+ * Payment is the primary economic deterrent; this cap exists to bound the
2080
+ * "claim paid skill but never pay" queue-spam vector.
2081
+ */
2082
+ paidCustomerLimiter = createSlidingWindowLimiter({
2083
+ windowMs: RATE_LIMIT_WINDOW_MS,
2084
+ maxPerWindow: PAID_MAX_JOBS_PER_CUSTOMER,
2085
+ maxKeys: MAX_TRACKED_CUSTOMERS
2086
+ });
2087
+ /** Global sliding-window limiter for paid skills (Sybil protection, 10x free). */
2088
+ paidGlobalLimiter = createSlidingWindowLimiter({
2089
+ windowMs: RATE_LIMIT_WINDOW_MS,
2090
+ maxPerWindow: PAID_GLOBAL_MAX_JOBS_PER_WINDOW,
2054
2091
  maxKeys: 1
2055
2092
  });
2056
2093
  /**
@@ -2228,17 +2265,20 @@ var AgentRuntime = class {
2228
2265
  });
2229
2266
  return;
2230
2267
  }
2231
- if (!this.customerLimiter.peek(job.customerId).allowed) {
2268
+ const matched = this.skills.route(job.tags);
2269
+ const isPaid = matched ? matched.priceSubunits > 0 : false;
2270
+ const customerLimiter = isPaid ? this.paidCustomerLimiter : this.freeCustomerLimiter;
2271
+ const globalLimiter = isPaid ? this.paidGlobalLimiter : this.freeGlobalLimiter;
2272
+ if (!customerLimiter.peek(job.customerId).allowed) {
2232
2273
  this.transport.sendFeedback(job, { type: "error", message: "Rate limited, try again later" }).catch(() => {
2233
2274
  });
2234
2275
  return;
2235
2276
  }
2236
- if (!this.globalLimiter.peek(GLOBAL_LIMITER_KEY).allowed) {
2277
+ if (!globalLimiter.peek(GLOBAL_LIMITER_KEY).allowed) {
2237
2278
  this.transport.sendFeedback(job, { type: "error", message: "Server busy, try again later" }).catch(() => {
2238
2279
  });
2239
2280
  return;
2240
2281
  }
2241
- const matched = this.skills.route(job.tags);
2242
2282
  const isFreeLlm = matched?.mode === "llm" && matched.priceSubunits === 0;
2243
2283
  let perCustomerLimiter;
2244
2284
  let perSkillKey;
@@ -2259,8 +2299,8 @@ var AgentRuntime = class {
2259
2299
  return;
2260
2300
  }
2261
2301
  }
2262
- this.customerLimiter.check(job.customerId);
2263
- this.globalLimiter.check(GLOBAL_LIMITER_KEY);
2302
+ customerLimiter.check(job.customerId);
2303
+ globalLimiter.check(GLOBAL_LIMITER_KEY);
2264
2304
  if (isFreeLlm && perCustomerLimiter && perSkillKey) {
2265
2305
  this.freeLlmLimiters.globalLimiter.check(FREE_LLM_GLOBAL_KEY);
2266
2306
  perCustomerLimiter.check(perSkillKey);
@@ -2294,8 +2334,10 @@ var AgentRuntime = class {
2294
2334
  }
2295
2335
  /** Drop expired hits from every sliding-window limiter. */
2296
2336
  cleanupRateLimits() {
2297
- this.customerLimiter.prune();
2298
- this.globalLimiter.prune();
2337
+ this.freeCustomerLimiter.prune();
2338
+ this.freeGlobalLimiter.prune();
2339
+ this.paidCustomerLimiter.prune();
2340
+ this.paidGlobalLimiter.prune();
2299
2341
  this.freeLlmLimiters.globalLimiter.prune();
2300
2342
  this.freeLlmLimiters.prunePerCustomer();
2301
2343
  }