omnius 1.0.135 → 1.0.136

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1374,7 +1374,7 @@ var init_model_broker = __esm({
1374
1374
  DEFAULT_IDLE_EVICT_MS = 5 * 60 * 1e3;
1375
1375
  DEFAULT_POLL_MS = 4e3;
1376
1376
  DEFAULT_INFLIGHT_WAIT_MS = 6e4;
1377
- DEFAULT_SLOT_CAPACITY = 4;
1377
+ DEFAULT_SLOT_CAPACITY = 8;
1378
1378
  DEFAULT_QUEUE_CAPACITY = 50;
1379
1379
  THROUGHPUT_EMA_ALPHA = 0.2;
1380
1380
  THROUGHPUT_INITIAL_TPS = 25;
@@ -1977,6 +1977,33 @@ var init_model_broker = __esm({
1977
1977
  return Promise.resolve(slot);
1978
1978
  }
1979
1979
  return new Promise((resolve55, reject) => {
1980
+ if (this._slotQueue.length >= this.queueCapacity) {
1981
+ const newPrio = spec.priority ?? 0;
1982
+ let victim = -1;
1983
+ let victimPrio = Infinity;
1984
+ for (let i2 = this._slotQueue.length - 1; i2 >= 0; i2--) {
1985
+ const p2 = this._slotQueue[i2].spec.priority ?? 0;
1986
+ if (p2 < victimPrio) {
1987
+ victimPrio = p2;
1988
+ victim = i2;
1989
+ }
1990
+ if (victimPrio === 0)
1991
+ break;
1992
+ }
1993
+ if (victim >= 0 && victimPrio < newPrio) {
1994
+ const dropped = this._slotQueue.splice(victim, 1)[0];
1995
+ if (dropped.onSignalAbort && dropped.spec.signal) {
1996
+ dropped.spec.signal.removeEventListener("abort", dropped.onSignalAbort);
1997
+ }
1998
+ try {
1999
+ dropped.reject(new Error("broker queue shed: capacity reached, lower-priority entry displaced"));
2000
+ } catch {
2001
+ }
2002
+ } else {
2003
+ reject(new Error(`broker queue full (capacity=${this.queueCapacity}); caller priority ${newPrio} insufficient to displace`));
2004
+ return;
2005
+ }
2006
+ }
1980
2007
  const entry = { spec, resolve: resolve55, reject, enqueuedAt: Date.now() };
1981
2008
  if (spec.signal) {
1982
2009
  const onAbort = () => {
@@ -618138,6 +618165,32 @@ function estimatePromptTokensFromRequest(request) {
618138
618165
  }
618139
618166
  return Math.ceil(chars / 4);
618140
618167
  }
618168
+ function isLikelyTruncatedRouterJson(text) {
618169
+ if (typeof text !== "string") return false;
618170
+ const stripped = text.replace(/^\s*<think>[\s\S]*?<\/think>\s*/i, "").trim();
618171
+ if (!stripped.startsWith("{")) return false;
618172
+ let depth = 0;
618173
+ let inString = false;
618174
+ let escape2 = false;
618175
+ for (let i2 = 0; i2 < stripped.length; i2++) {
618176
+ const ch = stripped[i2];
618177
+ if (escape2) {
618178
+ escape2 = false;
618179
+ continue;
618180
+ }
618181
+ if (inString) {
618182
+ if (ch === "\\") escape2 = true;
618183
+ else if (ch === '"') inString = false;
618184
+ continue;
618185
+ }
618186
+ if (ch === '"') inString = true;
618187
+ else if (ch === "{") depth++;
618188
+ else if (ch === "}") depth--;
618189
+ }
618190
+ if (depth <= 0) return false;
618191
+ const hits = (stripped.includes('"route"') ? 1 : 0) + (stripped.includes('"should_reply"') ? 1 : 0) + (stripped.includes('"confidence"') ? 1 : 0) + (stripped.includes('"reason"') ? 1 : 0) + (stripped.includes('"silent_disposition"') ? 1 : 0) + (stripped.includes('"mental_note"') ? 1 : 0);
618192
+ return hits >= 3;
618193
+ }
618141
618194
  function telegramRouterTimeoutMs(configTimeoutMs, _minMs, _legacyMaxMs) {
618142
618195
  void _minMs;
618143
618196
  void _legacyMaxMs;
@@ -623949,7 +624002,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
623949
624002
  ],
623950
624003
  tools: [],
623951
624004
  temperature: 0,
623952
- maxTokens: 650,
624005
+ // Reflection has 12 string fields; 650 was tight enough to truncate.
624006
+ maxTokens: 1500,
623953
624007
  timeoutMs: telegramRouterTimeoutMs(timeoutMs),
623954
624008
  think: false
623955
624009
  },
@@ -624039,7 +624093,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
624039
624093
  const promptTokens = estimatePromptTokensFromRequest(request);
624040
624094
  const broker = getModelBroker();
624041
624095
  const trainCtx = await broker.getNctxTrain(model).catch(() => null);
624042
- const targetCtx = trainCtx && trainCtx > 0 ? Math.min(trainCtx, Math.max(2048, promptTokens + 1024)) : Math.max(2048, promptTokens + 1024);
624096
+ const completionHeadroom = 4096;
624097
+ const targetCtx = trainCtx && trainCtx > 0 ? Math.min(trainCtx, Math.max(2048, promptTokens + completionHeadroom)) : Math.max(2048, promptTokens + completionHeadroom);
624043
624098
  const requestWithCtx = { ...request, numCtx: targetCtx };
624044
624099
  const slot = await broker.acquireInferenceSlot({
624045
624100
  model,
@@ -624049,10 +624104,12 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
624049
624104
  promptTokens,
624050
624105
  priority: kind === "router" || kind === "router-repair" || kind === "router-strict-retry" ? 1 : 0
624051
624106
  });
624052
- this.tuiWrite(() => renderTelegramSubAgentEvent(
624053
- sessionKey,
624054
- `inference admitted [${kind}] model=${model} prompt~${promptTokens}t num_ctx=${targetCtx} slot=${slot.info.id}${slot.info.reserved ? " reserved" : ""}`
624055
- ));
624107
+ if (process.env["OMNIUS_BROKER_TRACE"] === "1") {
624108
+ this.tuiWrite(() => renderTelegramSubAgentEvent(
624109
+ sessionKey,
624110
+ `inference admitted [${kind}] model=${model} prompt~${promptTokens}t num_ctx=${targetCtx} slot=${slot.info.id}${slot.info.reserved ? " reserved" : ""}`
624111
+ ));
624112
+ }
624056
624113
  const streamFn = backend.chatCompletionStream;
624057
624114
  const id = this.registerTelegramInference(kind, sessionKey, model);
624058
624115
  let completionTokens = 0;
@@ -624274,7 +624331,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
624274
624331
  getTelegramThinkingVisible() {
624275
624332
  return this.telegramThinkingVisible;
624276
624333
  }
624277
- async repairTelegramInteractionDecision(backend, rawOutput, forcedRoute, timeoutMs, diagnostics) {
624334
+ async repairTelegramInteractionDecision(backend, rawOutput, forcedRoute, timeoutMs, diagnostics, sessionKey = "__router__") {
624278
624335
  const rawPreview = telegramRouterRawPreview(rawOutput, 4e3);
624279
624336
  if (!rawPreview || telegramDecisionOutputHasDanglingJson(rawOutput)) {
624280
624337
  if (diagnostics) {
@@ -624309,10 +624366,10 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
624309
624366
  ],
624310
624367
  tools: [],
624311
624368
  temperature: 0,
624312
- maxTokens: 500,
624369
+ maxTokens: 1500,
624313
624370
  timeoutMs: telegramRouterTimeoutMs(timeoutMs, 8e3, 2e4),
624314
624371
  think: false
624315
- });
624372
+ }, diagnostics, "router-repair", sessionKey);
624316
624373
  const repairedText = result.choices[0]?.message?.content ?? "";
624317
624374
  if (telegramDecisionRecoverableFlag(repairedText) === false) {
624318
624375
  if (diagnostics) diagnostics.repairStatus = "no-recoverable-output";
@@ -624344,7 +624401,7 @@ ${repairedText}`,
624344
624401
  return null;
624345
624402
  }
624346
624403
  }
624347
- async retryTelegramInteractionDecisionStrict(backend, userPrompt, rawOutput, forcedRoute, timeoutMs, diagnostics) {
624404
+ async retryTelegramInteractionDecisionStrict(backend, userPrompt, rawOutput, forcedRoute, timeoutMs, diagnostics, sessionKey = "__router__") {
624348
624405
  const invalidPreview = telegramRouterRawPreview(rawOutput, 1200) ?? "(empty assistant content)";
624349
624406
  const routeInstruction = forcedRoute ? `The operator selected Telegram mode "${forcedRoute}". The route field must be "${forcedRoute}", but should_reply must still be inferred from context.` : `Infer route live from context.`;
624350
624407
  const trimmedUserPrompt = userPrompt.length > 4e3 ? `…
@@ -624376,10 +624433,10 @@ ${userPrompt.slice(-4e3)}` : userPrompt;
624376
624433
  ],
624377
624434
  tools: [],
624378
624435
  temperature: 0,
624379
- maxTokens: 1200,
624436
+ maxTokens: 2400,
624380
624437
  timeoutMs: telegramRouterTimeoutMs(timeoutMs, 1e4, 3e4),
624381
624438
  think: false
624382
- });
624439
+ }, diagnostics, "router-strict-retry", sessionKey);
624383
624440
  const retryText = result.choices[0]?.message?.content ?? "";
624384
624441
  if (diagnostics) diagnostics.strictRetryPreview = telegramRouterRawPreview(retryText, 320);
624385
624442
  const parsed = parseTelegramInteractionDecision(retryText, forcedRoute, {
@@ -624762,10 +624819,14 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
624762
624819
  ],
624763
624820
  tools: [],
624764
624821
  temperature: 0,
624765
- maxTokens: 1e3,
624822
+ // Router JSON schema has ~18 string-valued fields when reflection is
624823
+ // embedded (consolidated mode). 1000 tokens was the documented cause
624824
+ // of truncated JSON → repair → strict-retry cascade. 2400 is enough
624825
+ // for normal verbose values without slowing the call appreciably.
624826
+ maxTokens: 2400,
624766
624827
  timeoutMs: telegramRouterTimeoutMs(config.timeoutMs),
624767
624828
  think: false
624768
- }, diagnostics);
624829
+ }, diagnostics, "router", sessionKey);
624769
624830
  const text = result.choices[0]?.message?.content ?? "";
624770
624831
  const routerLatencyMs = Date.now() - routerStartMs;
624771
624832
  try {
@@ -624788,12 +624849,40 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
624788
624849
  if (parsed) {
624789
624850
  return this.applyTelegramSilentReflectionNotes(parsed, reflectionNotes);
624790
624851
  }
624852
+ if (isLikelyTruncatedRouterJson(text)) {
624853
+ if (diagnostics) diagnostics.repairStatus = "skipped-truncation-rerun";
624854
+ try {
624855
+ const reissued = await this.telegramRouterJsonCompletion(backend, {
624856
+ messages: [
624857
+ {
624858
+ role: "system",
624859
+ content: "You perform live Telegram route and stimulation inference. Output strict JSON only."
624860
+ },
624861
+ { role: "user", content: userPrompt }
624862
+ ],
624863
+ tools: [],
624864
+ temperature: 0,
624865
+ maxTokens: 4096,
624866
+ timeoutMs: telegramRouterTimeoutMs(config.timeoutMs),
624867
+ think: false
624868
+ }, diagnostics, "router", sessionKey);
624869
+ const reissuedText = reissued.choices[0]?.message?.content ?? "";
624870
+ const reparsed = parseTelegramInteractionDecision(reissuedText, forcedRoute, {
624871
+ defaultShouldReply: false
624872
+ });
624873
+ if (reparsed) {
624874
+ return this.applyTelegramSilentReflectionNotes(reparsed, reflectionNotes);
624875
+ }
624876
+ } catch {
624877
+ }
624878
+ }
624791
624879
  const repaired = await this.repairTelegramInteractionDecision(
624792
624880
  backend,
624793
624881
  text,
624794
624882
  forcedRoute,
624795
624883
  config.timeoutMs ?? 3e4,
624796
- diagnostics
624884
+ diagnostics,
624885
+ sessionKey
624797
624886
  );
624798
624887
  if (repaired) {
624799
624888
  return this.applyTelegramSilentReflectionNotes(repaired, reflectionNotes);
@@ -624804,7 +624893,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
624804
624893
  text,
624805
624894
  forcedRoute,
624806
624895
  config.timeoutMs ?? 3e4,
624807
- diagnostics
624896
+ diagnostics,
624897
+ sessionKey
624808
624898
  );
624809
624899
  if (strictRetry) {
624810
624900
  return this.applyTelegramSilentReflectionNotes(strictRetry, reflectionNotes);
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.135",
3
+ "version": "1.0.136",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.135",
9
+ "version": "1.0.136",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.135",
3
+ "version": "1.0.136",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",