omnius 1.0.135 → 1.0.136
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +107 -17
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1374,7 +1374,7 @@ var init_model_broker = __esm({
|
|
|
1374
1374
|
DEFAULT_IDLE_EVICT_MS = 5 * 60 * 1e3;
|
|
1375
1375
|
DEFAULT_POLL_MS = 4e3;
|
|
1376
1376
|
DEFAULT_INFLIGHT_WAIT_MS = 6e4;
|
|
1377
|
-
DEFAULT_SLOT_CAPACITY =
|
|
1377
|
+
DEFAULT_SLOT_CAPACITY = 8;
|
|
1378
1378
|
DEFAULT_QUEUE_CAPACITY = 50;
|
|
1379
1379
|
THROUGHPUT_EMA_ALPHA = 0.2;
|
|
1380
1380
|
THROUGHPUT_INITIAL_TPS = 25;
|
|
@@ -1977,6 +1977,33 @@ var init_model_broker = __esm({
|
|
|
1977
1977
|
return Promise.resolve(slot);
|
|
1978
1978
|
}
|
|
1979
1979
|
return new Promise((resolve55, reject) => {
|
|
1980
|
+
if (this._slotQueue.length >= this.queueCapacity) {
|
|
1981
|
+
const newPrio = spec.priority ?? 0;
|
|
1982
|
+
let victim = -1;
|
|
1983
|
+
let victimPrio = Infinity;
|
|
1984
|
+
for (let i2 = this._slotQueue.length - 1; i2 >= 0; i2--) {
|
|
1985
|
+
const p2 = this._slotQueue[i2].spec.priority ?? 0;
|
|
1986
|
+
if (p2 < victimPrio) {
|
|
1987
|
+
victimPrio = p2;
|
|
1988
|
+
victim = i2;
|
|
1989
|
+
}
|
|
1990
|
+
if (victimPrio === 0)
|
|
1991
|
+
break;
|
|
1992
|
+
}
|
|
1993
|
+
if (victim >= 0 && victimPrio < newPrio) {
|
|
1994
|
+
const dropped = this._slotQueue.splice(victim, 1)[0];
|
|
1995
|
+
if (dropped.onSignalAbort && dropped.spec.signal) {
|
|
1996
|
+
dropped.spec.signal.removeEventListener("abort", dropped.onSignalAbort);
|
|
1997
|
+
}
|
|
1998
|
+
try {
|
|
1999
|
+
dropped.reject(new Error("broker queue shed: capacity reached, lower-priority entry displaced"));
|
|
2000
|
+
} catch {
|
|
2001
|
+
}
|
|
2002
|
+
} else {
|
|
2003
|
+
reject(new Error(`broker queue full (capacity=${this.queueCapacity}); caller priority ${newPrio} insufficient to displace`));
|
|
2004
|
+
return;
|
|
2005
|
+
}
|
|
2006
|
+
}
|
|
1980
2007
|
const entry = { spec, resolve: resolve55, reject, enqueuedAt: Date.now() };
|
|
1981
2008
|
if (spec.signal) {
|
|
1982
2009
|
const onAbort = () => {
|
|
@@ -618138,6 +618165,32 @@ function estimatePromptTokensFromRequest(request) {
|
|
|
618138
618165
|
}
|
|
618139
618166
|
return Math.ceil(chars / 4);
|
|
618140
618167
|
}
|
|
618168
|
+
function isLikelyTruncatedRouterJson(text) {
|
|
618169
|
+
if (typeof text !== "string") return false;
|
|
618170
|
+
const stripped = text.replace(/^\s*<think>[\s\S]*?<\/think>\s*/i, "").trim();
|
|
618171
|
+
if (!stripped.startsWith("{")) return false;
|
|
618172
|
+
let depth = 0;
|
|
618173
|
+
let inString = false;
|
|
618174
|
+
let escape2 = false;
|
|
618175
|
+
for (let i2 = 0; i2 < stripped.length; i2++) {
|
|
618176
|
+
const ch = stripped[i2];
|
|
618177
|
+
if (escape2) {
|
|
618178
|
+
escape2 = false;
|
|
618179
|
+
continue;
|
|
618180
|
+
}
|
|
618181
|
+
if (inString) {
|
|
618182
|
+
if (ch === "\\") escape2 = true;
|
|
618183
|
+
else if (ch === '"') inString = false;
|
|
618184
|
+
continue;
|
|
618185
|
+
}
|
|
618186
|
+
if (ch === '"') inString = true;
|
|
618187
|
+
else if (ch === "{") depth++;
|
|
618188
|
+
else if (ch === "}") depth--;
|
|
618189
|
+
}
|
|
618190
|
+
if (depth <= 0) return false;
|
|
618191
|
+
const hits = (stripped.includes('"route"') ? 1 : 0) + (stripped.includes('"should_reply"') ? 1 : 0) + (stripped.includes('"confidence"') ? 1 : 0) + (stripped.includes('"reason"') ? 1 : 0) + (stripped.includes('"silent_disposition"') ? 1 : 0) + (stripped.includes('"mental_note"') ? 1 : 0);
|
|
618192
|
+
return hits >= 3;
|
|
618193
|
+
}
|
|
618141
618194
|
function telegramRouterTimeoutMs(configTimeoutMs, _minMs, _legacyMaxMs) {
|
|
618142
618195
|
void _minMs;
|
|
618143
618196
|
void _legacyMaxMs;
|
|
@@ -623949,7 +624002,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
|
|
|
623949
624002
|
],
|
|
623950
624003
|
tools: [],
|
|
623951
624004
|
temperature: 0,
|
|
623952
|
-
|
|
624005
|
+
// Reflection has 12 string fields; 650 was tight enough to truncate.
|
|
624006
|
+
maxTokens: 1500,
|
|
623953
624007
|
timeoutMs: telegramRouterTimeoutMs(timeoutMs),
|
|
623954
624008
|
think: false
|
|
623955
624009
|
},
|
|
@@ -624039,7 +624093,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
|
|
|
624039
624093
|
const promptTokens = estimatePromptTokensFromRequest(request);
|
|
624040
624094
|
const broker = getModelBroker();
|
|
624041
624095
|
const trainCtx = await broker.getNctxTrain(model).catch(() => null);
|
|
624042
|
-
const
|
|
624096
|
+
const completionHeadroom = 4096;
|
|
624097
|
+
const targetCtx = trainCtx && trainCtx > 0 ? Math.min(trainCtx, Math.max(2048, promptTokens + completionHeadroom)) : Math.max(2048, promptTokens + completionHeadroom);
|
|
624043
624098
|
const requestWithCtx = { ...request, numCtx: targetCtx };
|
|
624044
624099
|
const slot = await broker.acquireInferenceSlot({
|
|
624045
624100
|
model,
|
|
@@ -624049,10 +624104,12 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
|
|
|
624049
624104
|
promptTokens,
|
|
624050
624105
|
priority: kind === "router" || kind === "router-repair" || kind === "router-strict-retry" ? 1 : 0
|
|
624051
624106
|
});
|
|
624052
|
-
|
|
624053
|
-
|
|
624054
|
-
|
|
624055
|
-
|
|
624107
|
+
if (process.env["OMNIUS_BROKER_TRACE"] === "1") {
|
|
624108
|
+
this.tuiWrite(() => renderTelegramSubAgentEvent(
|
|
624109
|
+
sessionKey,
|
|
624110
|
+
`inference admitted [${kind}] model=${model} prompt~${promptTokens}t num_ctx=${targetCtx} slot=${slot.info.id}${slot.info.reserved ? " reserved" : ""}`
|
|
624111
|
+
));
|
|
624112
|
+
}
|
|
624056
624113
|
const streamFn = backend.chatCompletionStream;
|
|
624057
624114
|
const id = this.registerTelegramInference(kind, sessionKey, model);
|
|
624058
624115
|
let completionTokens = 0;
|
|
@@ -624274,7 +624331,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
|
|
|
624274
624331
|
getTelegramThinkingVisible() {
|
|
624275
624332
|
return this.telegramThinkingVisible;
|
|
624276
624333
|
}
|
|
624277
|
-
async repairTelegramInteractionDecision(backend, rawOutput, forcedRoute, timeoutMs, diagnostics) {
|
|
624334
|
+
async repairTelegramInteractionDecision(backend, rawOutput, forcedRoute, timeoutMs, diagnostics, sessionKey = "__router__") {
|
|
624278
624335
|
const rawPreview = telegramRouterRawPreview(rawOutput, 4e3);
|
|
624279
624336
|
if (!rawPreview || telegramDecisionOutputHasDanglingJson(rawOutput)) {
|
|
624280
624337
|
if (diagnostics) {
|
|
@@ -624309,10 +624366,10 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
|
|
|
624309
624366
|
],
|
|
624310
624367
|
tools: [],
|
|
624311
624368
|
temperature: 0,
|
|
624312
|
-
maxTokens:
|
|
624369
|
+
maxTokens: 1500,
|
|
624313
624370
|
timeoutMs: telegramRouterTimeoutMs(timeoutMs, 8e3, 2e4),
|
|
624314
624371
|
think: false
|
|
624315
|
-
});
|
|
624372
|
+
}, diagnostics, "router-repair", sessionKey);
|
|
624316
624373
|
const repairedText = result.choices[0]?.message?.content ?? "";
|
|
624317
624374
|
if (telegramDecisionRecoverableFlag(repairedText) === false) {
|
|
624318
624375
|
if (diagnostics) diagnostics.repairStatus = "no-recoverable-output";
|
|
@@ -624344,7 +624401,7 @@ ${repairedText}`,
|
|
|
624344
624401
|
return null;
|
|
624345
624402
|
}
|
|
624346
624403
|
}
|
|
624347
|
-
async retryTelegramInteractionDecisionStrict(backend, userPrompt, rawOutput, forcedRoute, timeoutMs, diagnostics) {
|
|
624404
|
+
async retryTelegramInteractionDecisionStrict(backend, userPrompt, rawOutput, forcedRoute, timeoutMs, diagnostics, sessionKey = "__router__") {
|
|
624348
624405
|
const invalidPreview = telegramRouterRawPreview(rawOutput, 1200) ?? "(empty assistant content)";
|
|
624349
624406
|
const routeInstruction = forcedRoute ? `The operator selected Telegram mode "${forcedRoute}". The route field must be "${forcedRoute}", but should_reply must still be inferred from context.` : `Infer route live from context.`;
|
|
624350
624407
|
const trimmedUserPrompt = userPrompt.length > 4e3 ? `…
|
|
@@ -624376,10 +624433,10 @@ ${userPrompt.slice(-4e3)}` : userPrompt;
|
|
|
624376
624433
|
],
|
|
624377
624434
|
tools: [],
|
|
624378
624435
|
temperature: 0,
|
|
624379
|
-
maxTokens:
|
|
624436
|
+
maxTokens: 2400,
|
|
624380
624437
|
timeoutMs: telegramRouterTimeoutMs(timeoutMs, 1e4, 3e4),
|
|
624381
624438
|
think: false
|
|
624382
|
-
});
|
|
624439
|
+
}, diagnostics, "router-strict-retry", sessionKey);
|
|
624383
624440
|
const retryText = result.choices[0]?.message?.content ?? "";
|
|
624384
624441
|
if (diagnostics) diagnostics.strictRetryPreview = telegramRouterRawPreview(retryText, 320);
|
|
624385
624442
|
const parsed = parseTelegramInteractionDecision(retryText, forcedRoute, {
|
|
@@ -624762,10 +624819,14 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
|
|
|
624762
624819
|
],
|
|
624763
624820
|
tools: [],
|
|
624764
624821
|
temperature: 0,
|
|
624765
|
-
|
|
624822
|
+
// Router JSON schema has ~18 string-valued fields when reflection is
|
|
624823
|
+
// embedded (consolidated mode). 1000 tokens was the documented cause
|
|
624824
|
+
// of truncated JSON → repair → strict-retry cascade. 2400 is enough
|
|
624825
|
+
// for normal verbose values without slowing the call appreciably.
|
|
624826
|
+
maxTokens: 2400,
|
|
624766
624827
|
timeoutMs: telegramRouterTimeoutMs(config.timeoutMs),
|
|
624767
624828
|
think: false
|
|
624768
|
-
}, diagnostics);
|
|
624829
|
+
}, diagnostics, "router", sessionKey);
|
|
624769
624830
|
const text = result.choices[0]?.message?.content ?? "";
|
|
624770
624831
|
const routerLatencyMs = Date.now() - routerStartMs;
|
|
624771
624832
|
try {
|
|
@@ -624788,12 +624849,40 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
|
|
|
624788
624849
|
if (parsed) {
|
|
624789
624850
|
return this.applyTelegramSilentReflectionNotes(parsed, reflectionNotes);
|
|
624790
624851
|
}
|
|
624852
|
+
if (isLikelyTruncatedRouterJson(text)) {
|
|
624853
|
+
if (diagnostics) diagnostics.repairStatus = "skipped-truncation-rerun";
|
|
624854
|
+
try {
|
|
624855
|
+
const reissued = await this.telegramRouterJsonCompletion(backend, {
|
|
624856
|
+
messages: [
|
|
624857
|
+
{
|
|
624858
|
+
role: "system",
|
|
624859
|
+
content: "You perform live Telegram route and stimulation inference. Output strict JSON only."
|
|
624860
|
+
},
|
|
624861
|
+
{ role: "user", content: userPrompt }
|
|
624862
|
+
],
|
|
624863
|
+
tools: [],
|
|
624864
|
+
temperature: 0,
|
|
624865
|
+
maxTokens: 4096,
|
|
624866
|
+
timeoutMs: telegramRouterTimeoutMs(config.timeoutMs),
|
|
624867
|
+
think: false
|
|
624868
|
+
}, diagnostics, "router", sessionKey);
|
|
624869
|
+
const reissuedText = reissued.choices[0]?.message?.content ?? "";
|
|
624870
|
+
const reparsed = parseTelegramInteractionDecision(reissuedText, forcedRoute, {
|
|
624871
|
+
defaultShouldReply: false
|
|
624872
|
+
});
|
|
624873
|
+
if (reparsed) {
|
|
624874
|
+
return this.applyTelegramSilentReflectionNotes(reparsed, reflectionNotes);
|
|
624875
|
+
}
|
|
624876
|
+
} catch {
|
|
624877
|
+
}
|
|
624878
|
+
}
|
|
624791
624879
|
const repaired = await this.repairTelegramInteractionDecision(
|
|
624792
624880
|
backend,
|
|
624793
624881
|
text,
|
|
624794
624882
|
forcedRoute,
|
|
624795
624883
|
config.timeoutMs ?? 3e4,
|
|
624796
|
-
diagnostics
|
|
624884
|
+
diagnostics,
|
|
624885
|
+
sessionKey
|
|
624797
624886
|
);
|
|
624798
624887
|
if (repaired) {
|
|
624799
624888
|
return this.applyTelegramSilentReflectionNotes(repaired, reflectionNotes);
|
|
@@ -624804,7 +624893,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
|
|
|
624804
624893
|
text,
|
|
624805
624894
|
forcedRoute,
|
|
624806
624895
|
config.timeoutMs ?? 3e4,
|
|
624807
|
-
diagnostics
|
|
624896
|
+
diagnostics,
|
|
624897
|
+
sessionKey
|
|
624808
624898
|
);
|
|
624809
624899
|
if (strictRetry) {
|
|
624810
624900
|
return this.applyTelegramSilentReflectionNotes(strictRetry, reflectionNotes);
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.136",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.136",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
package/package.json
CHANGED