npm - @elisym/cli - Versions diffs - 0.17.0 → 0.17.2 - Mend

@elisym/cli 0.17.0 → 0.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -406,7 +406,16 @@ function createOpenAICompatibleProvider(config) {
           },
           body: JSON.stringify({
             model,
-            max_tokens: 1,
+            // Some xAI / DeepSeek model variants are reasoning models that
+            // burn output tokens on internal chain-of-thought before any
+            // visible reply; with `max_tokens: 1` the API returns HTTP 400
+            // and we falsely flip the (provider, model) pair to unhealthy
+            // on a valid key. 256 leaves room for the minimum reasoning
+            // budget on every current variant; for non-reasoning models
+            // the probe still stops at the first natural token, so the
+            // extra ceiling does not change the per-probe cost in
+            // practice.
+            max_tokens: 256,
             messages: [{ role: "user", content: "." }]
           })
         },
@@ -693,7 +702,17 @@ async function verifyKeyDeep2(apiKey, model, signal) {
         },
         body: JSON.stringify({
           model,
-          ...reasoning ? { max_completion_tokens: 1 } : { max_tokens: 1 },
+          // GPT-5 reasoning models count internal chain-of-thought tokens
+          // against `max_completion_tokens`. With a budget of 1 the model
+          // exhausts it on reasoning before producing any visible content
+          // and the API responds HTTP 400 ("max_tokens reached") - which
+          // would falsely flip the (provider, model) pair to unhealthy on
+          // an otherwise valid key. 256 covers a "." prompt's reasoning
+          // budget on every current GPT-5 variant we tested with margin;
+          // non-reasoning models still take max_tokens: 1 since they emit
+          // visible content directly. Probe cost stays sub-cent per
+          // startup probe (reasoning tokens are billed at output rates).
+          ...reasoning ? { max_completion_tokens: 256 } : { max_tokens: 1 },
           messages: [{ role: "user", content: "." }]
         })
       },
@@ -2016,8 +2035,10 @@ function resolveHealthPair(skill) {
   return null;
 }
 var RATE_LIMIT_WINDOW_MS = 10 * 60 * 1e3;
-var MAX_JOBS_PER_CUSTOMER = 20;
-var GLOBAL_MAX_JOBS_PER_WINDOW = 200;
+var FREE_MAX_JOBS_PER_CUSTOMER = 20;
+var FREE_GLOBAL_MAX_JOBS_PER_WINDOW = 200;
+var PAID_MAX_JOBS_PER_CUSTOMER = 200;
+var PAID_GLOBAL_MAX_JOBS_PER_WINDOW = 2e3;
 var MAX_TRACKED_CUSTOMERS = 1e3;
 var GLOBAL_LIMITER_KEY = "__global__";
 var AgentRuntime = class {
@@ -2041,16 +2062,32 @@ var AgentRuntime = class {
   recoveryInterval = null;
   gcInterval = null;
   stopped = false;
-  /** Per-customer sliding-window rate limiter (keyed on customer pubkey). */
-  customerLimiter = createSlidingWindowLimiter({
+  /** Per-customer sliding-window rate limiter for free skills. */
+  freeCustomerLimiter = createSlidingWindowLimiter({
     windowMs: RATE_LIMIT_WINDOW_MS,
-    maxPerWindow: MAX_JOBS_PER_CUSTOMER,
+    maxPerWindow: FREE_MAX_JOBS_PER_CUSTOMER,
     maxKeys: MAX_TRACKED_CUSTOMERS
   });
-  /** Global sliding-window rate limiter (Sybil protection). */
-  globalLimiter = createSlidingWindowLimiter({
+  /** Global sliding-window rate limiter for free skills (Sybil protection). */
+  freeGlobalLimiter = createSlidingWindowLimiter({
     windowMs: RATE_LIMIT_WINDOW_MS,
-    maxPerWindow: GLOBAL_MAX_JOBS_PER_WINDOW,
+    maxPerWindow: FREE_GLOBAL_MAX_JOBS_PER_WINDOW,
+    maxKeys: 1
+  });
+  /**
+   * Per-customer sliding-window limiter for paid skills (10x looser than free).
+   * Payment is the primary economic deterrent; this cap exists to bound the
+   * "claim paid skill but never pay" queue-spam vector.
+   */
+  paidCustomerLimiter = createSlidingWindowLimiter({
+    windowMs: RATE_LIMIT_WINDOW_MS,
+    maxPerWindow: PAID_MAX_JOBS_PER_CUSTOMER,
+    maxKeys: MAX_TRACKED_CUSTOMERS
+  });
+  /** Global sliding-window limiter for paid skills (Sybil protection, 10x free). */
+  paidGlobalLimiter = createSlidingWindowLimiter({
+    windowMs: RATE_LIMIT_WINDOW_MS,
+    maxPerWindow: PAID_GLOBAL_MAX_JOBS_PER_WINDOW,
     maxKeys: 1
   });
   /**
@@ -2228,17 +2265,20 @@ var AgentRuntime = class {
         });
         return;
       }
-      if (!this.customerLimiter.peek(job.customerId).allowed) {
+      const matched = this.skills.route(job.tags);
+      const isPaid = matched ? matched.priceSubunits > 0 : false;
+      const customerLimiter = isPaid ? this.paidCustomerLimiter : this.freeCustomerLimiter;
+      const globalLimiter = isPaid ? this.paidGlobalLimiter : this.freeGlobalLimiter;
+      if (!customerLimiter.peek(job.customerId).allowed) {
         this.transport.sendFeedback(job, { type: "error", message: "Rate limited, try again later" }).catch(() => {
         });
         return;
       }
-      if (!this.globalLimiter.peek(GLOBAL_LIMITER_KEY).allowed) {
+      if (!globalLimiter.peek(GLOBAL_LIMITER_KEY).allowed) {
         this.transport.sendFeedback(job, { type: "error", message: "Server busy, try again later" }).catch(() => {
         });
         return;
       }
-      const matched = this.skills.route(job.tags);
       const isFreeLlm = matched?.mode === "llm" && matched.priceSubunits === 0;
       let perCustomerLimiter;
       let perSkillKey;
@@ -2259,8 +2299,8 @@ var AgentRuntime = class {
           return;
         }
       }
-      this.customerLimiter.check(job.customerId);
-      this.globalLimiter.check(GLOBAL_LIMITER_KEY);
+      customerLimiter.check(job.customerId);
+      globalLimiter.check(GLOBAL_LIMITER_KEY);
       if (isFreeLlm && perCustomerLimiter && perSkillKey) {
         this.freeLlmLimiters.globalLimiter.check(FREE_LLM_GLOBAL_KEY);
         perCustomerLimiter.check(perSkillKey);
@@ -2294,8 +2334,10 @@ var AgentRuntime = class {
   }
   /** Drop expired hits from every sliding-window limiter. */
   cleanupRateLimits() {
-    this.customerLimiter.prune();
-    this.globalLimiter.prune();
+    this.freeCustomerLimiter.prune();
+    this.freeGlobalLimiter.prune();
+    this.paidCustomerLimiter.prune();
+    this.paidGlobalLimiter.prune();
     this.freeLlmLimiters.globalLimiter.prune();
     this.freeLlmLimiters.prunePerCustomer();
   }