@elisym/cli 0.17.0 → 0.17.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +59 -17
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -406,7 +406,16 @@ function createOpenAICompatibleProvider(config) {
|
|
|
406
406
|
},
|
|
407
407
|
body: JSON.stringify({
|
|
408
408
|
model,
|
|
409
|
-
|
|
409
|
+
// Some xAI / DeepSeek model variants are reasoning models that
|
|
410
|
+
// burn output tokens on internal chain-of-thought before any
|
|
411
|
+
// visible reply; with `max_tokens: 1` the API returns HTTP 400
|
|
412
|
+
// and we falsely flip the (provider, model) pair to unhealthy
|
|
413
|
+
// on a valid key. 256 leaves room for the minimum reasoning
|
|
414
|
+
// budget on every current variant; for non-reasoning models
|
|
415
|
+
// the probe still stops at the first natural token, so the
|
|
416
|
+
// extra ceiling does not change the per-probe cost in
|
|
417
|
+
// practice.
|
|
418
|
+
max_tokens: 256,
|
|
410
419
|
messages: [{ role: "user", content: "." }]
|
|
411
420
|
})
|
|
412
421
|
},
|
|
@@ -693,7 +702,17 @@ async function verifyKeyDeep2(apiKey, model, signal) {
|
|
|
693
702
|
},
|
|
694
703
|
body: JSON.stringify({
|
|
695
704
|
model,
|
|
696
|
-
|
|
705
|
+
// GPT-5 reasoning models count internal chain-of-thought tokens
|
|
706
|
+
// against `max_completion_tokens`. With a budget of 1 the model
|
|
707
|
+
// exhausts it on reasoning before producing any visible content
|
|
708
|
+
// and the API responds HTTP 400 ("max_tokens reached") - which
|
|
709
|
+
// would falsely flip the (provider, model) pair to unhealthy on
|
|
710
|
+
// an otherwise valid key. 256 covers a "." prompt's reasoning
|
|
711
|
+
// budget on every current GPT-5 variant we tested with margin;
|
|
712
|
+
// non-reasoning models still take max_tokens: 1 since they emit
|
|
713
|
+
// visible content directly. Probe cost stays sub-cent per
|
|
714
|
+
// startup probe (reasoning tokens are billed at output rates).
|
|
715
|
+
...reasoning ? { max_completion_tokens: 256 } : { max_tokens: 1 },
|
|
697
716
|
messages: [{ role: "user", content: "." }]
|
|
698
717
|
})
|
|
699
718
|
},
|
|
@@ -2016,8 +2035,10 @@ function resolveHealthPair(skill) {
|
|
|
2016
2035
|
return null;
|
|
2017
2036
|
}
|
|
2018
2037
|
var RATE_LIMIT_WINDOW_MS = 10 * 60 * 1e3;
|
|
2019
|
-
var
|
|
2020
|
-
var
|
|
2038
|
+
var FREE_MAX_JOBS_PER_CUSTOMER = 20;
|
|
2039
|
+
var FREE_GLOBAL_MAX_JOBS_PER_WINDOW = 200;
|
|
2040
|
+
var PAID_MAX_JOBS_PER_CUSTOMER = 200;
|
|
2041
|
+
var PAID_GLOBAL_MAX_JOBS_PER_WINDOW = 2e3;
|
|
2021
2042
|
var MAX_TRACKED_CUSTOMERS = 1e3;
|
|
2022
2043
|
var GLOBAL_LIMITER_KEY = "__global__";
|
|
2023
2044
|
var AgentRuntime = class {
|
|
@@ -2041,16 +2062,32 @@ var AgentRuntime = class {
|
|
|
2041
2062
|
recoveryInterval = null;
|
|
2042
2063
|
gcInterval = null;
|
|
2043
2064
|
stopped = false;
|
|
2044
|
-
/** Per-customer sliding-window rate limiter
|
|
2045
|
-
|
|
2065
|
+
/** Per-customer sliding-window rate limiter for free skills. */
|
|
2066
|
+
freeCustomerLimiter = createSlidingWindowLimiter({
|
|
2046
2067
|
windowMs: RATE_LIMIT_WINDOW_MS,
|
|
2047
|
-
maxPerWindow:
|
|
2068
|
+
maxPerWindow: FREE_MAX_JOBS_PER_CUSTOMER,
|
|
2048
2069
|
maxKeys: MAX_TRACKED_CUSTOMERS
|
|
2049
2070
|
});
|
|
2050
|
-
/** Global sliding-window rate limiter (Sybil protection). */
|
|
2051
|
-
|
|
2071
|
+
/** Global sliding-window rate limiter for free skills (Sybil protection). */
|
|
2072
|
+
freeGlobalLimiter = createSlidingWindowLimiter({
|
|
2052
2073
|
windowMs: RATE_LIMIT_WINDOW_MS,
|
|
2053
|
-
maxPerWindow:
|
|
2074
|
+
maxPerWindow: FREE_GLOBAL_MAX_JOBS_PER_WINDOW,
|
|
2075
|
+
maxKeys: 1
|
|
2076
|
+
});
|
|
2077
|
+
/**
|
|
2078
|
+
* Per-customer sliding-window limiter for paid skills (10x looser than free).
|
|
2079
|
+
* Payment is the primary economic deterrent; this cap exists to bound the
|
|
2080
|
+
* "claim paid skill but never pay" queue-spam vector.
|
|
2081
|
+
*/
|
|
2082
|
+
paidCustomerLimiter = createSlidingWindowLimiter({
|
|
2083
|
+
windowMs: RATE_LIMIT_WINDOW_MS,
|
|
2084
|
+
maxPerWindow: PAID_MAX_JOBS_PER_CUSTOMER,
|
|
2085
|
+
maxKeys: MAX_TRACKED_CUSTOMERS
|
|
2086
|
+
});
|
|
2087
|
+
/** Global sliding-window limiter for paid skills (Sybil protection, 10x free). */
|
|
2088
|
+
paidGlobalLimiter = createSlidingWindowLimiter({
|
|
2089
|
+
windowMs: RATE_LIMIT_WINDOW_MS,
|
|
2090
|
+
maxPerWindow: PAID_GLOBAL_MAX_JOBS_PER_WINDOW,
|
|
2054
2091
|
maxKeys: 1
|
|
2055
2092
|
});
|
|
2056
2093
|
/**
|
|
@@ -2228,17 +2265,20 @@ var AgentRuntime = class {
|
|
|
2228
2265
|
});
|
|
2229
2266
|
return;
|
|
2230
2267
|
}
|
|
2231
|
-
|
|
2268
|
+
const matched = this.skills.route(job.tags);
|
|
2269
|
+
const isPaid = matched ? matched.priceSubunits > 0 : false;
|
|
2270
|
+
const customerLimiter = isPaid ? this.paidCustomerLimiter : this.freeCustomerLimiter;
|
|
2271
|
+
const globalLimiter = isPaid ? this.paidGlobalLimiter : this.freeGlobalLimiter;
|
|
2272
|
+
if (!customerLimiter.peek(job.customerId).allowed) {
|
|
2232
2273
|
this.transport.sendFeedback(job, { type: "error", message: "Rate limited, try again later" }).catch(() => {
|
|
2233
2274
|
});
|
|
2234
2275
|
return;
|
|
2235
2276
|
}
|
|
2236
|
-
if (!
|
|
2277
|
+
if (!globalLimiter.peek(GLOBAL_LIMITER_KEY).allowed) {
|
|
2237
2278
|
this.transport.sendFeedback(job, { type: "error", message: "Server busy, try again later" }).catch(() => {
|
|
2238
2279
|
});
|
|
2239
2280
|
return;
|
|
2240
2281
|
}
|
|
2241
|
-
const matched = this.skills.route(job.tags);
|
|
2242
2282
|
const isFreeLlm = matched?.mode === "llm" && matched.priceSubunits === 0;
|
|
2243
2283
|
let perCustomerLimiter;
|
|
2244
2284
|
let perSkillKey;
|
|
@@ -2259,8 +2299,8 @@ var AgentRuntime = class {
|
|
|
2259
2299
|
return;
|
|
2260
2300
|
}
|
|
2261
2301
|
}
|
|
2262
|
-
|
|
2263
|
-
|
|
2302
|
+
customerLimiter.check(job.customerId);
|
|
2303
|
+
globalLimiter.check(GLOBAL_LIMITER_KEY);
|
|
2264
2304
|
if (isFreeLlm && perCustomerLimiter && perSkillKey) {
|
|
2265
2305
|
this.freeLlmLimiters.globalLimiter.check(FREE_LLM_GLOBAL_KEY);
|
|
2266
2306
|
perCustomerLimiter.check(perSkillKey);
|
|
@@ -2294,8 +2334,10 @@ var AgentRuntime = class {
|
|
|
2294
2334
|
}
|
|
2295
2335
|
/** Drop expired hits from every sliding-window limiter. */
|
|
2296
2336
|
cleanupRateLimits() {
|
|
2297
|
-
this.
|
|
2298
|
-
this.
|
|
2337
|
+
this.freeCustomerLimiter.prune();
|
|
2338
|
+
this.freeGlobalLimiter.prune();
|
|
2339
|
+
this.paidCustomerLimiter.prune();
|
|
2340
|
+
this.paidGlobalLimiter.prune();
|
|
2299
2341
|
this.freeLlmLimiters.globalLimiter.prune();
|
|
2300
2342
|
this.freeLlmLimiters.prunePerCustomer();
|
|
2301
2343
|
}
|