omnius 1.0.173 → 1.0.174

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -562106,6 +562106,7 @@ ${description}`
562106
562106
  if (effectiveThink === true && (effectiveMaxTokens ?? 0) < 4096) {
562107
562107
  effectiveMaxTokens = 4096;
562108
562108
  }
562109
+ const responseFormat = request.responseFormat ?? request.response_format;
562109
562110
  const body = {
562110
562111
  model: this.model,
562111
562112
  messages: cleanedMessages,
@@ -562116,6 +562117,16 @@ ${description}`
562116
562117
  stream_options: { include_usage: true },
562117
562118
  think: effectiveThink
562118
562119
  };
562120
+ if (responseFormat !== void 0) {
562121
+ body["response_format"] = responseFormat;
562122
+ }
562123
+ const reqNumCtx = request.numCtx;
562124
+ if (Number.isFinite(reqNumCtx) && (reqNumCtx ?? 0) > 0) {
562125
+ const opts = body["options"] ?? {};
562126
+ opts["num_ctx"] = reqNumCtx;
562127
+ body["options"] = opts;
562128
+ body["num_ctx"] = reqNumCtx;
562129
+ }
562119
562130
  let poolSlot = shouldUseOllamaPoolForBaseUrl(this.baseUrl) ? await getOllamaPool({ baseInstanceUrl: this.baseUrl }).acquire({
562120
562131
  model: this.model
562121
562132
  }) : null;
@@ -633170,7 +633181,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
633170
633181
  backend,
633171
633182
  { ...suppressed, responseFormat: TELEGRAM_INTERACTION_DECISION_RESPONSE_FORMAT },
633172
633183
  inferenceKind,
633173
- sessionKey
633184
+ sessionKey,
633185
+ { stream: false, reason: "router-json" }
633174
633186
  );
633175
633187
  const visible = jsonModeResult.choices.some(
633176
633188
  (choice) => stripTelegramHiddenThinking(choice.message.content ?? "").trim().length > 0
@@ -633192,7 +633204,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
633192
633204
  backend,
633193
633205
  suppressed,
633194
633206
  inferenceKind,
633195
- sessionKey
633207
+ sessionKey,
633208
+ { stream: false, reason: "router-plain-retry" }
633196
633209
  );
633197
633210
  if (diagnostics) {
633198
633211
  const plainVisible = plainResult.choices.some(
@@ -633231,7 +633244,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
633231
633244
  * content tokens emitted, the registry shows it, and the
633232
633245
  * hard-deadline retire path becomes diagnosable instead of opaque
633233
633246
  */
633234
- async telegramObservableInference(backend, request, kind, sessionKey) {
633247
+ async telegramObservableInference(backend, request, kind, sessionKey, options2 = {}) {
633235
633248
  const model = this.agentConfig?.model ?? "?";
633236
633249
  const promptTokens = estimatePromptTokensFromRequest(request);
633237
633250
  const broker = getModelBroker();
@@ -633259,7 +633272,17 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
633259
633272
  let completionTokens = 0;
633260
633273
  try {
633261
633274
  let result;
633262
- if (typeof streamFn !== "function") {
633275
+ const streamAllowed = options2.stream !== false;
633276
+ if (!streamAllowed) {
633277
+ result = await backend.chatCompletion(requestWithCtx);
633278
+ this.updateTelegramInferenceFinal(id, result);
633279
+ if (!streamAllowed && process.env["OMNIUS_BROKER_TRACE"] === "1") {
633280
+ this.tuiWrite(() => renderTelegramSubAgentEvent(
633281
+ sessionKey,
633282
+ `inference ${id}: non-stream direct (${options2.reason ?? "requested"}) ${this.telegramInferenceRequestDiagnostic(requestWithCtx)}`
633283
+ ));
633284
+ }
633285
+ } else if (typeof streamFn !== "function") {
633263
633286
  result = await backend.chatCompletion(requestWithCtx);
633264
633287
  this.updateTelegramInferenceFinal(id, result);
633265
633288
  } else {
@@ -633289,6 +633312,28 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
633289
633312
  this.deregisterTelegramInference(id);
633290
633313
  }
633291
633314
  }
633315
+ telegramBackendDiagnostic() {
633316
+ const config = this.agentConfig;
633317
+ if (!config) return "backend=unconfigured model=?";
633318
+ return `backend=${config.backendType} url=${config.backendUrl} model=${config.model}`;
633319
+ }
633320
+ telegramInferenceRequestDiagnostic(request) {
633321
+ const responseFormat = request.responseFormat ?? request.response_format;
633322
+ const responseFormatType = responseFormat && typeof responseFormat["type"] === "string" ? responseFormat["type"] : responseFormat ? "present" : "none";
633323
+ const numCtx = request.numCtx;
633324
+ const think = request.think;
633325
+ const tools = Array.isArray(request.tools) ? request.tools.length : 0;
633326
+ return `${this.telegramBackendDiagnostic()} response_format=${responseFormatType} num_ctx=${Number.isFinite(numCtx) ? numCtx : "unset"} think=${think === void 0 ? "default" : String(think)} tools=${tools} timeoutMs=${Number.isFinite(request.timeoutMs) ? request.timeoutMs : "unset"}`;
633327
+ }
633328
+ telegramStreamInactivityDiagnostic(request, inferenceId, inactivityMs, contentChars, thinkingChars) {
633329
+ const entry = this.telegramActiveInferences.get(inferenceId);
633330
+ const now = performance.now();
633331
+ const elapsed = entry ? `${((now - entry.startTs) / 1e3).toFixed(1)}s` : "unknown";
633332
+ const idle = entry ? `${((now - entry.lastTokenAt) / 1e3).toFixed(1)}s` : "unknown";
633333
+ const ttfb = entry?.firstChunkAt !== void 0 ? `${((entry.firstChunkAt - entry.startTs) / 1e3).toFixed(1)}s` : "never";
633334
+ const phase = entry?.firstChunkAt === void 0 ? "before-first-chunk" : "mid-stream";
633335
+ return `stream-inactivity: no chunks for ${(inactivityMs / 1e3).toFixed(0)}s (phase=${phase}; elapsed=${elapsed}; idle=${idle}; ttfb=${ttfb}; content=${contentChars}c thinking=${thinkingChars}c; ${this.telegramInferenceRequestDiagnostic(request)}; stream_endpoint=no-sse-chunk)`;
633336
+ }
633292
633337
  /**
633293
633338
  * Drive a chatCompletionStream to exhaustion, accumulating tokens into a
633294
633339
  * chatCompletion-shaped result. Live-emits content + thinking tokens
@@ -633325,7 +633370,13 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
633325
633370
  const inactivityPromise = new Promise((_, reject) => {
633326
633371
  timeoutHandle = setTimeout(
633327
633372
  () => reject(new Error(
633328
- `stream-inactivity: no chunks for ${(inactivityMs / 1e3).toFixed(0)}s (content=${contentBuf.length}c thinking=${thinkingBuf.length}c so far) — Ollama likely cold-loading the model or wedged; falling back to non-stream`
633373
+ this.telegramStreamInactivityDiagnostic(
633374
+ request,
633375
+ inferenceId,
633376
+ inactivityMs,
633377
+ contentBuf.length,
633378
+ thinkingBuf.length
633379
+ )
633329
633380
  )),
633330
633381
  inactivityMs
633331
633382
  );
@@ -634180,6 +634231,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
634180
634231
  composeTelegramRouterDiagnosticNote(invalidRouterPreview, failureNarrative, headline) {
634181
634232
  const segments = [];
634182
634233
  segments.push(headline);
634234
+ segments.push(this.telegramBackendDiagnostic());
634183
634235
  if (failureNarrative.summary) segments.push(failureNarrative.summary);
634184
634236
  if (invalidRouterPreview) segments.push(`invalid router output preview: ${invalidRouterPreview}`);
634185
634237
  if (failureNarrative.detail) segments.push(`router-failure trace: ${failureNarrative.detail}`);
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.173",
3
+ "version": "1.0.174",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.173",
9
+ "version": "1.0.174",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.173",
3
+ "version": "1.0.174",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",