omnius 1.0.134 → 1.0.136

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1365,7 +1365,7 @@ async function vramSnapshotMB() {
1365
1365
  function getModelBroker() {
1366
1366
  return ModelBroker.getInstance();
1367
1367
  }
1368
- var DEFAULT_RAM_HEADROOM_MB, DEFAULT_VRAM_HEADROOM_MB, DEFAULT_IDLE_EVICT_MS, DEFAULT_POLL_MS, DEFAULT_INFLIGHT_WAIT_MS, ModelBroker, _nvSmiAvailable;
1368
+ var DEFAULT_RAM_HEADROOM_MB, DEFAULT_VRAM_HEADROOM_MB, DEFAULT_IDLE_EVICT_MS, DEFAULT_POLL_MS, DEFAULT_INFLIGHT_WAIT_MS, DEFAULT_SLOT_CAPACITY, DEFAULT_QUEUE_CAPACITY, THROUGHPUT_EMA_ALPHA, THROUGHPUT_INITIAL_TPS, STUCK_INFLIGHT_DIAGNOSTIC_MS, ModelBroker, _nvSmiAvailable;
1369
1369
  var init_model_broker = __esm({
1370
1370
  "packages/execution/dist/model-broker.js"() {
1371
1371
  "use strict";
@@ -1374,6 +1374,11 @@ var init_model_broker = __esm({
1374
1374
  DEFAULT_IDLE_EVICT_MS = 5 * 60 * 1e3;
1375
1375
  DEFAULT_POLL_MS = 4e3;
1376
1376
  DEFAULT_INFLIGHT_WAIT_MS = 6e4;
1377
+ DEFAULT_SLOT_CAPACITY = 8;
1378
+ DEFAULT_QUEUE_CAPACITY = 50;
1379
+ THROUGHPUT_EMA_ALPHA = 0.2;
1380
+ THROUGHPUT_INITIAL_TPS = 25;
1381
+ STUCK_INFLIGHT_DIAGNOSTIC_MS = 5 * 60 * 1e3;
1377
1382
  ModelBroker = class _ModelBroker {
1378
1383
  static _instance = null;
1379
1384
  /** Loaded model registry keyed by `${host}:${name}`. */
@@ -1398,6 +1403,22 @@ var init_model_broker = __esm({
1398
1403
  ramHeadroomMB = DEFAULT_RAM_HEADROOM_MB;
1399
1404
  vramHeadroomMB = DEFAULT_VRAM_HEADROOM_MB;
1400
1405
  idleEvictMs = DEFAULT_IDLE_EVICT_MS;
1406
+ /** Inference slot capacity (auto-tunes from Ollama pool size when known). */
1407
+ slotCapacity = DEFAULT_SLOT_CAPACITY;
1408
+ /** Maximum queue depth before queue pressure is emitted. */
1409
+ queueCapacity = DEFAULT_QUEUE_CAPACITY;
1410
+ // ── Inference slot tracking ─────────────────────────────────────────
1411
+ /** Active slots keyed by slot id. */
1412
+ _activeSlots = /* @__PURE__ */ new Map();
1413
+ /** Reserved slots per sessionKey (1 reserved slot per active chat). */
1414
+ _reservedBySession = /* @__PURE__ */ new Map();
1415
+ // sessionKey -> slot id
1416
+ /** Shared (non-reserved) queue. FIFO with priority insertion. */
1417
+ _slotQueue = [];
1418
+ /** Per-model throughput tracking. */
1419
+ _throughput = /* @__PURE__ */ new Map();
1420
+ /** Monotonic counter for slot ids. */
1421
+ _slotIdSeq = 0;
1401
1422
  static getInstance() {
1402
1423
  if (!_ModelBroker._instance)
1403
1424
  _ModelBroker._instance = new _ModelBroker();
@@ -1876,7 +1897,30 @@ var init_model_broker = __esm({
1876
1897
  inflight: [...this._inflight.entries()].map(([key, v]) => ({ key, owner: v.owner, startedMs: v.startedMs })),
1877
1898
  ramMB: ram,
1878
1899
  vramMB: vram,
1879
- lastPollAt: Date.now()
1900
+ lastPollAt: Date.now(),
1901
+ slots: this.buildSlotsSnapshot()
1902
+ };
1903
+ }
1904
+ buildSlotsSnapshot() {
1905
+ const byModel = {};
1906
+ for (const slot of this._activeSlots.values()) {
1907
+ const k = slot.model;
1908
+ if (!byModel[k])
1909
+ byModel[k] = { inUse: 0, tokensPerSec: 0, samples: 0 };
1910
+ byModel[k].inUse += 1;
1911
+ }
1912
+ for (const [model, tp] of this._throughput) {
1913
+ if (!byModel[model])
1914
+ byModel[model] = { inUse: 0, tokensPerSec: 0, samples: 0 };
1915
+ byModel[model].tokensPerSec = tp.tokensPerSec;
1916
+ byModel[model].samples = tp.samples;
1917
+ }
1918
+ return {
1919
+ inUse: this._activeSlots.size,
1920
+ capacity: this.slotCapacity,
1921
+ queueDepth: this._slotQueue.length,
1922
+ queueCapacity: this.queueCapacity,
1923
+ byModel
1880
1924
  };
1881
1925
  }
1882
1926
  async checkPressure(snap) {
@@ -1890,6 +1934,196 @@ var init_model_broker = __esm({
1890
1934
  this.emit("pressure", "vram", v.free, this.vramHeadroomMB);
1891
1935
  }
1892
1936
  }
1937
+ const queueThreshold = Math.floor(this.queueCapacity * 0.8);
1938
+ if (this._slotQueue.length >= queueThreshold) {
1939
+ this.emit("pressure", "queue", this._slotQueue.length, queueThreshold);
1940
+ }
1941
+ const now = Date.now();
1942
+ for (const slot of this._activeSlots.values()) {
1943
+ if (now - slot.acquiredAt > STUCK_INFLIGHT_DIAGNOSTIC_MS) {
1944
+ }
1945
+ }
1946
+ }
1947
+ // ------------------------------------------------------------------
1948
+ // Inference slot admission control (replaces timeouts)
1949
+ // ------------------------------------------------------------------
1950
+ /**
1951
+ * Acquire an inference slot. Blocks (queues with backpressure) until a slot
1952
+ * is available. Never times out — work either completes or is cancelled
1953
+ * via the caller-provided AbortSignal before admission.
1954
+ *
1955
+ * Two-tier admission:
1956
+ * 1. Reserved: 1 slot per sessionKey kept warm even when shared pool full
1957
+ * 2. Shared: queue with FIFO+priority ordering; size-bounded by queueCapacity
1958
+ *
1959
+ * Backpressure: when queue exceeds 80% capacity, emit `pressure: "queue"` —
1960
+ * upstream callers (e.g. Telegram poll loop) should slow ingress.
1961
+ */
1962
+ acquireInferenceSlot(spec) {
1963
+ if (this._activeSlots.size < this.slotCapacity) {
1964
+ return Promise.resolve(this.admitSlot(
1965
+ spec,
1966
+ /*reserved*/
1967
+ false
1968
+ ));
1969
+ }
1970
+ if (spec.sessionKey && !this._reservedBySession.has(spec.sessionKey) && this._activeSlots.size < this.slotCapacity + 1) {
1971
+ const slot = this.admitSlot(
1972
+ spec,
1973
+ /*reserved*/
1974
+ true
1975
+ );
1976
+ this._reservedBySession.set(spec.sessionKey, slot.info.id);
1977
+ return Promise.resolve(slot);
1978
+ }
1979
+ return new Promise((resolve55, reject) => {
1980
+ if (this._slotQueue.length >= this.queueCapacity) {
1981
+ const newPrio = spec.priority ?? 0;
1982
+ let victim = -1;
1983
+ let victimPrio = Infinity;
1984
+ for (let i2 = this._slotQueue.length - 1; i2 >= 0; i2--) {
1985
+ const p2 = this._slotQueue[i2].spec.priority ?? 0;
1986
+ if (p2 < victimPrio) {
1987
+ victimPrio = p2;
1988
+ victim = i2;
1989
+ }
1990
+ if (victimPrio === 0)
1991
+ break;
1992
+ }
1993
+ if (victim >= 0 && victimPrio < newPrio) {
1994
+ const dropped = this._slotQueue.splice(victim, 1)[0];
1995
+ if (dropped.onSignalAbort && dropped.spec.signal) {
1996
+ dropped.spec.signal.removeEventListener("abort", dropped.onSignalAbort);
1997
+ }
1998
+ try {
1999
+ dropped.reject(new Error("broker queue shed: capacity reached, lower-priority entry displaced"));
2000
+ } catch {
2001
+ }
2002
+ } else {
2003
+ reject(new Error(`broker queue full (capacity=${this.queueCapacity}); caller priority ${newPrio} insufficient to displace`));
2004
+ return;
2005
+ }
2006
+ }
2007
+ const entry = { spec, resolve: resolve55, reject, enqueuedAt: Date.now() };
2008
+ if (spec.signal) {
2009
+ const onAbort = () => {
2010
+ const idx = this._slotQueue.indexOf(entry);
2011
+ if (idx >= 0)
2012
+ this._slotQueue.splice(idx, 1);
2013
+ reject(new Error("inference slot acquisition aborted by caller signal"));
2014
+ };
2015
+ if (spec.signal.aborted) {
2016
+ onAbort();
2017
+ return;
2018
+ }
2019
+ spec.signal.addEventListener("abort", onAbort, { once: true });
2020
+ entry.onSignalAbort = onAbort;
2021
+ }
2022
+ const prio = spec.priority ?? 0;
2023
+ let insertAt = this._slotQueue.length;
2024
+ for (let i2 = this._slotQueue.length - 1; i2 >= 0; i2--) {
2025
+ const p2 = this._slotQueue[i2].spec.priority ?? 0;
2026
+ if (p2 >= prio) {
2027
+ insertAt = i2 + 1;
2028
+ break;
2029
+ }
2030
+ if (i2 === 0)
2031
+ insertAt = 0;
2032
+ }
2033
+ this._slotQueue.splice(insertAt, 0, entry);
2034
+ const threshold = Math.floor(this.queueCapacity * 0.8);
2035
+ if (this._slotQueue.length === threshold) {
2036
+ this.emit("pressure", "queue", this._slotQueue.length, threshold);
2037
+ }
2038
+ });
2039
+ }
2040
+ /** Admit a slot — internal, called from acquire fast path and from drainQueue. */
2041
+ admitSlot(spec, reserved) {
2042
+ const id = `slot-${++this._slotIdSeq}-${Date.now().toString(36)}`;
2043
+ const info = {
2044
+ id,
2045
+ model: spec.model,
2046
+ domain: spec.domain,
2047
+ owner: spec.owner,
2048
+ sessionKey: spec.sessionKey,
2049
+ acquiredAt: Date.now(),
2050
+ promptTokens: spec.promptTokens ?? 0,
2051
+ reserved
2052
+ };
2053
+ this._activeSlots.set(id, info);
2054
+ this.emit("slotAcquired", info);
2055
+ let released = false;
2056
+ const broker = this;
2057
+ return {
2058
+ info,
2059
+ release(outcome) {
2060
+ if (released)
2061
+ return;
2062
+ released = true;
2063
+ broker.releaseSlot(info, outcome);
2064
+ }
2065
+ };
2066
+ }
2067
+ releaseSlot(info, outcome) {
2068
+ this._activeSlots.delete(info.id);
2069
+ if (info.sessionKey && this._reservedBySession.get(info.sessionKey) === info.id) {
2070
+ this._reservedBySession.delete(info.sessionKey);
2071
+ }
2072
+ if (outcome.ok && (outcome.completionTokens ?? 0) > 0) {
2073
+ const wallMs = Date.now() - info.acquiredAt;
2074
+ if (wallMs > 100) {
2075
+ const tps = outcome.completionTokens * 1e3 / wallMs;
2076
+ const cur = this._throughput.get(info.model) ?? {
2077
+ tokensPerSec: THROUGHPUT_INITIAL_TPS,
2078
+ samples: 0,
2079
+ lastReleaseAt: 0
2080
+ };
2081
+ cur.tokensPerSec = cur.samples === 0 ? tps : cur.tokensPerSec * (1 - THROUGHPUT_EMA_ALPHA) + tps * THROUGHPUT_EMA_ALPHA;
2082
+ cur.samples += 1;
2083
+ cur.lastReleaseAt = Date.now();
2084
+ this._throughput.set(info.model, cur);
2085
+ this.emit("throughputUpdated", info.model, cur.tokensPerSec);
2086
+ }
2087
+ }
2088
+ this.emit("slotReleased", info, outcome);
2089
+ this.drainSlotQueue();
2090
+ }
2091
+ drainSlotQueue() {
2092
+ while (this._slotQueue.length > 0 && this._activeSlots.size < this.slotCapacity) {
2093
+ const entry = this._slotQueue.shift();
2094
+ if (entry.onSignalAbort && entry.spec.signal) {
2095
+ entry.spec.signal.removeEventListener("abort", entry.onSignalAbort);
2096
+ }
2097
+ if (entry.spec.signal?.aborted) {
2098
+ try {
2099
+ entry.reject(new Error("aborted before admission"));
2100
+ } catch {
2101
+ }
2102
+ continue;
2103
+ }
2104
+ const slot = this.admitSlot(
2105
+ entry.spec,
2106
+ /*reserved*/
2107
+ false
2108
+ );
2109
+ try {
2110
+ entry.resolve(slot);
2111
+ } catch {
2112
+ }
2113
+ }
2114
+ }
2115
+ /** Snapshot of throughput EMAs (for /broker and debugging). */
2116
+ throughputByModel() {
2117
+ const out = {};
2118
+ for (const [model, tp] of this._throughput) {
2119
+ out[model] = { tokensPerSec: tp.tokensPerSec, samples: tp.samples };
2120
+ }
2121
+ return out;
2122
+ }
2123
+ /** Tune the shared slot capacity at runtime (e.g. when Ollama pool resizes). */
2124
+ setSlotCapacity(n2) {
2125
+ this.slotCapacity = Math.max(1, Math.floor(n2));
2126
+ this.drainSlotQueue();
1893
2127
  }
1894
2128
  keyOf(spec) {
1895
2129
  return `${spec.host}:${spec.name}`;
@@ -1899,6 +2133,115 @@ var init_model_broker = __esm({
1899
2133
  }
1900
2134
  });
1901
2135
 
2136
+ // packages/execution/dist/broker-mediated-backend.js
2137
+ function wrapWithBroker(backend, options2) {
2138
+ const broker = getModelBroker();
2139
+ const clamp7 = options2.clampNumCtx !== false;
2140
+ const wrapped = Object.create(backend);
2141
+ wrapped.chatCompletion = async (request) => {
2142
+ const model = backend.model || request.model || "unknown";
2143
+ let effectiveRequest = request;
2144
+ if (clamp7) {
2145
+ const trainCtx = await broker.getNctxTrain(model).catch(() => null);
2146
+ const requestedNumCtx = request.numCtx;
2147
+ if (trainCtx && trainCtx > 0) {
2148
+ const target = requestedNumCtx ? Math.min(requestedNumCtx, trainCtx) : Math.min(trainCtx, estimateContextNeed(request));
2149
+ if (target > 0) {
2150
+ effectiveRequest = { ...request, numCtx: target };
2151
+ }
2152
+ }
2153
+ }
2154
+ const promptTokens = estimatePromptTokens(request);
2155
+ const slot = await broker.acquireInferenceSlot({
2156
+ model,
2157
+ domain: options2.domain,
2158
+ owner: options2.owner,
2159
+ sessionKey: options2.sessionKey,
2160
+ promptTokens,
2161
+ priority: options2.priority ?? 0
2162
+ });
2163
+ try {
2164
+ const result = await backend.chatCompletion(effectiveRequest);
2165
+ const usage = result.usage;
2166
+ slot.release({ ok: true, completionTokens: usage?.completion_tokens ?? 0 });
2167
+ return result;
2168
+ } catch (err) {
2169
+ slot.release({ ok: false, error: err instanceof Error ? err.message : String(err) });
2170
+ throw err;
2171
+ }
2172
+ };
2173
+ if (typeof backend.chatCompletionStream === "function") {
2174
+ const streamFn = backend.chatCompletionStream.bind(backend);
2175
+ wrapped.chatCompletionStream = async function* (request) {
2176
+ const model = backend.model || request.model || "unknown";
2177
+ let effectiveRequest = request;
2178
+ if (clamp7) {
2179
+ const trainCtx = await broker.getNctxTrain(model).catch(() => null);
2180
+ const requestedNumCtx = request.numCtx;
2181
+ if (trainCtx && trainCtx > 0) {
2182
+ const target = requestedNumCtx ? Math.min(requestedNumCtx, trainCtx) : Math.min(trainCtx, estimateContextNeed(request));
2183
+ if (target > 0)
2184
+ effectiveRequest = { ...request, numCtx: target };
2185
+ }
2186
+ }
2187
+ const promptTokens = estimatePromptTokens(request);
2188
+ const slot = await broker.acquireInferenceSlot({
2189
+ model,
2190
+ domain: options2.domain,
2191
+ owner: options2.owner,
2192
+ sessionKey: options2.sessionKey,
2193
+ promptTokens,
2194
+ priority: options2.priority ?? 0
2195
+ });
2196
+ let completionTokens = 0;
2197
+ try {
2198
+ for await (const chunk of streamFn(effectiveRequest)) {
2199
+ const usage = chunk.usage;
2200
+ if (usage?.completion_tokens)
2201
+ completionTokens = usage.completion_tokens;
2202
+ yield chunk;
2203
+ }
2204
+ slot.release({ ok: true, completionTokens });
2205
+ } catch (err) {
2206
+ slot.release({ ok: false, error: err instanceof Error ? err.message : String(err) });
2207
+ throw err;
2208
+ }
2209
+ };
2210
+ }
2211
+ return wrapped;
2212
+ }
2213
+ function estimatePromptTokens(request) {
2214
+ let chars = 0;
2215
+ if (Array.isArray(request?.messages)) {
2216
+ for (const m2 of request.messages) {
2217
+ if (typeof m2.content === "string")
2218
+ chars += m2.content.length;
2219
+ else if (Array.isArray(m2.content)) {
2220
+ for (const part of m2.content) {
2221
+ if (typeof part?.text === "string")
2222
+ chars += part.text.length;
2223
+ }
2224
+ }
2225
+ chars += 8;
2226
+ }
2227
+ }
2228
+ if (Array.isArray(request?.tools) && request.tools.length > 0) {
2229
+ chars += request.tools.length * 600;
2230
+ }
2231
+ return Math.ceil(chars / 4);
2232
+ }
2233
+ function estimateContextNeed(request) {
2234
+ const promptTokens = estimatePromptTokens(request);
2235
+ const maxTokens = request?.maxTokens ?? 1024;
2236
+ return Math.max(2048, promptTokens + maxTokens + 512);
2237
+ }
2238
+ var init_broker_mediated_backend = __esm({
2239
+ "packages/execution/dist/broker-mediated-backend.js"() {
2240
+ "use strict";
2241
+ init_model_broker();
2242
+ }
2243
+ });
2244
+
1902
2245
  // packages/execution/dist/tools/security-classifier.js
1903
2246
  function classifyTool(name10) {
1904
2247
  for (const rule of RULES) {
@@ -525416,6 +525759,7 @@ __export(dist_exports, {
525416
525759
  videoGenerationQualityLadder: () => videoGenerationQualityLadder,
525417
525760
  videoGenerationSetupPlan: () => videoGenerationSetupPlan,
525418
525761
  worktreeHasChanges: () => worktreeHasChanges,
525762
+ wrapWithBroker: () => wrapWithBroker,
525419
525763
  writeProvenanceFile: () => writeProvenanceFile,
525420
525764
  writeTodos: () => writeTodos
525421
525765
  });
@@ -525424,6 +525768,7 @@ var init_dist5 = __esm({
525424
525768
  "use strict";
525425
525769
  init_tool_executor();
525426
525770
  init_model_broker();
525771
+ init_broker_mediated_backend();
525427
525772
  init_security_classifier();
525428
525773
  init_tool_manifest();
525429
525774
  init_provenance();
@@ -555275,6 +555620,13 @@ ${description}`
555275
555620
  if (responseFormat !== void 0) {
555276
555621
  body["response_format"] = responseFormat;
555277
555622
  }
555623
+ const reqNumCtx = request.numCtx;
555624
+ if (Number.isFinite(reqNumCtx) && (reqNumCtx ?? 0) > 0) {
555625
+ const opts = body["options"] ?? {};
555626
+ opts["num_ctx"] = reqNumCtx;
555627
+ body["options"] = opts;
555628
+ body["num_ctx"] = reqNumCtx;
555629
+ }
555278
555630
  let poolSlot = shouldUseOllamaPoolForBaseUrl(this.baseUrl) ? await getOllamaPool({ baseInstanceUrl: this.baseUrl }).acquire({
555279
555631
  model: this.model
555280
555632
  }) : null;
@@ -615178,6 +615530,300 @@ var init_stimulation = __esm({
615178
615530
  }
615179
615531
  });
615180
615532
 
615533
+ // packages/cli/src/tui/pid-controller.ts
615534
+ function clamp018(x) {
615535
+ if (!Number.isFinite(x)) return 0;
615536
+ if (x < 0) return 0;
615537
+ if (x > 1) return 1;
615538
+ return x;
615539
+ }
615540
+ function getPidRegistry() {
615541
+ if (!_registry2) _registry2 = new PidRegistry();
615542
+ return _registry2;
615543
+ }
615544
+ var DEFAULT_PID_CONFIG, PidRegistry, _registry2;
615545
+ var init_pid_controller = __esm({
615546
+ "packages/cli/src/tui/pid-controller.ts"() {
615547
+ "use strict";
615548
+ DEFAULT_PID_CONFIG = {
615549
+ kp: 1e-4,
615550
+ ki: 1e-5,
615551
+ kd: 0,
615552
+ setpointMs: 8e3,
615553
+ initialOutput: 1,
615554
+ pvEmaAlpha: 0.3,
615555
+ integralClamp: 5e3
615556
+ // ms*s — bounds integral term contribution
615557
+ };
615558
+ PidRegistry = class {
615559
+ _controllers = /* @__PURE__ */ new Map();
615560
+ /** Get or create a controller. */
615561
+ get(key, configOverride) {
615562
+ let st = this._controllers.get(key);
615563
+ if (!st) {
615564
+ const config = { ...DEFAULT_PID_CONFIG, ...configOverride ?? {} };
615565
+ st = {
615566
+ output: config.initialOutput,
615567
+ pv: config.setpointMs,
615568
+ // assume on-target at startup
615569
+ integral: 0,
615570
+ lastError: 0,
615571
+ lastSampleAt: 0,
615572
+ samples: 0,
615573
+ config
615574
+ };
615575
+ this._controllers.set(key, st);
615576
+ }
615577
+ return st;
615578
+ }
615579
+ /** Read-only current output (inclusion ratio in [0,1]). */
615580
+ output(key) {
615581
+ return this._controllers.get(key)?.output ?? DEFAULT_PID_CONFIG.initialOutput;
615582
+ }
615583
+ /**
615584
+ * Record a new latency sample (in ms) and update the controller.
615585
+ * Returns the new output value.
615586
+ */
615587
+ sample(key, latencyMs, configOverride) {
615588
+ const st = this.get(key, configOverride);
615589
+ const now = Date.now();
615590
+ st.pv = st.samples === 0 ? latencyMs : st.pv * (1 - st.config.pvEmaAlpha) + latencyMs * st.config.pvEmaAlpha;
615591
+ const error = st.config.setpointMs - st.pv;
615592
+ st.integral += error;
615593
+ if (st.integral > st.config.integralClamp) st.integral = st.config.integralClamp;
615594
+ if (st.integral < -st.config.integralClamp) st.integral = -st.config.integralClamp;
615595
+ const dt = st.lastSampleAt > 0 ? now - st.lastSampleAt : 1e3;
615596
+ const derivative = dt > 0 ? (error - st.lastError) / dt : 0;
615597
+ const u = st.config.kp * error + st.config.ki * st.integral + st.config.kd * derivative;
615598
+ st.output = clamp018(st.output + u);
615599
+ st.lastError = error;
615600
+ st.lastSampleAt = now;
615601
+ st.samples += 1;
615602
+ return st.output;
615603
+ }
615604
+ /** All controller snapshots — for /broker debug surface. */
615605
+ snapshot() {
615606
+ return [...this._controllers.entries()].map(([key, st]) => ({
615607
+ key,
615608
+ output: st.output,
615609
+ pv: st.pv,
615610
+ setpoint: st.config.setpointMs,
615611
+ samples: st.samples
615612
+ }));
615613
+ }
615614
+ /** Reset (test-only). */
615615
+ reset() {
615616
+ this._controllers.clear();
615617
+ }
615618
+ };
615619
+ _registry2 = null;
615620
+ }
615621
+ });
615622
+
615623
+ // packages/cli/src/tui/component-benefit.ts
615624
+ function getComponentBenefitRegistry() {
615625
+ if (!_registry3) _registry3 = new ComponentBenefitRegistry();
615626
+ return _registry3;
615627
+ }
615628
+ var EMA_ALPHA, MIN_SAMPLES_TO_TRUST, ComponentBenefitRegistry, _registry3;
615629
+ var init_component_benefit = __esm({
615630
+ "packages/cli/src/tui/component-benefit.ts"() {
615631
+ "use strict";
615632
+ EMA_ALPHA = 0.2;
615633
+ MIN_SAMPLES_TO_TRUST = 3;
615634
+ ComponentBenefitRegistry = class {
615635
+ /** Two-tier map: chatKey → componentKey → state. */
615636
+ _byChat = /* @__PURE__ */ new Map();
615637
+ /** Score for a component in a chat. Returns 0.5 (neutral) when not enough samples. */
615638
+ score(chatKey, componentKey) {
615639
+ const st = this._byChat.get(chatKey)?.get(componentKey);
615640
+ if (!st || st.samples < MIN_SAMPLES_TO_TRUST) return 0.5;
615641
+ return st.score;
615642
+ }
615643
+ /** Record one batch — for each sampled component, did the decision text reference its needle? */
615644
+ recordOutcome(chatKey, samples, decisionText) {
615645
+ const haystack = decisionText.toLowerCase();
615646
+ let map2 = this._byChat.get(chatKey);
615647
+ if (!map2) {
615648
+ map2 = /* @__PURE__ */ new Map();
615649
+ this._byChat.set(chatKey, map2);
615650
+ }
615651
+ const now = Date.now();
615652
+ for (const sample of samples) {
615653
+ const needle = sample.needle.toLowerCase();
615654
+ const hit = needle.length >= 3 && haystack.includes(needle);
615655
+ let st = map2.get(sample.key);
615656
+ if (!st) {
615657
+ st = { score: 0.5, samples: 0, hits: 0, lastSeenAt: now };
615658
+ map2.set(sample.key, st);
615659
+ }
615660
+ const newScore = hit ? 1 : 0;
615661
+ st.score = st.samples === 0 ? newScore : st.score * (1 - EMA_ALPHA) + newScore * EMA_ALPHA;
615662
+ st.samples += 1;
615663
+ st.hits += hit ? 1 : 0;
615664
+ st.lastSeenAt = now;
615665
+ }
615666
+ }
615667
+ /** Snapshot for /broker debug surface. */
615668
+ snapshot(chatKey) {
615669
+ const out = [];
615670
+ const iterate = chatKey ? [[chatKey, this._byChat.get(chatKey)]].filter((e2) => !!e2[1]) : [...this._byChat.entries()];
615671
+ for (const [cKey, map2] of iterate) {
615672
+ for (const [comp, st] of map2) {
615673
+ out.push({ chatKey: cKey, componentKey: comp, score: st.score, samples: st.samples, hits: st.hits });
615674
+ }
615675
+ }
615676
+ return out;
615677
+ }
615678
+ reset() {
615679
+ this._byChat.clear();
615680
+ }
615681
+ };
615682
+ _registry3 = null;
615683
+ }
615684
+ });
615685
+
615686
+ // packages/cli/src/tui/soul-observations.ts
615687
+ function getSoulObservationStream() {
615688
+ if (!_stream) {
615689
+ _stream = new SoulObservationStream();
615690
+ subscribeBrokerEvents(_stream);
615691
+ }
615692
+ return _stream;
615693
+ }
615694
+ function subscribeBrokerEvents(stream) {
615695
+ if (_brokerSubscribed) return;
615696
+ _brokerSubscribed = true;
615697
+ const broker = getModelBroker();
615698
+ broker.on("slotReleased", (info, outcome) => {
615699
+ if (outcome.ok) {
615700
+ stream.emit({
615701
+ kind: "inference.completed",
615702
+ model: info.model,
615703
+ sessionKey: info.sessionKey,
615704
+ latencyMs: Date.now() - info.acquiredAt,
615705
+ promptTokens: info.promptTokens,
615706
+ completionTokens: outcome.completionTokens ?? 0,
615707
+ ts: Date.now()
615708
+ });
615709
+ } else {
615710
+ stream.emit({
615711
+ kind: "inference.degraded",
615712
+ model: info.model,
615713
+ sessionKey: info.sessionKey,
615714
+ reason: outcome.error ?? "unknown",
615715
+ ts: Date.now()
615716
+ });
615717
+ }
615718
+ });
615719
+ broker.on("pressure", (kind, value2, threshold) => {
615720
+ stream.emit({ kind: "broker.pressure", pressure: kind, value: value2, threshold, ts: Date.now() });
615721
+ });
615722
+ broker.on("evicted", (m2, reason) => {
615723
+ stream.emit({ kind: "model.evicted", host: m2.host, name: m2.name, reason, ts: Date.now() });
615724
+ });
615725
+ }
615726
+ function formatSystemObservations(sessionKey) {
615727
+ const stream = getSoulObservationStream();
615728
+ const broker = getModelBroker();
615729
+ const snap = broker.snapshot();
615730
+ const pidSnap = getPidRegistry().snapshot();
615731
+ const lines = [];
615732
+ const slots = snap.slots;
615733
+ const utilPct = slots.capacity > 0 ? Math.round(slots.inUse / slots.capacity * 100) : 0;
615734
+ const tpsByModel = Object.entries(slots.byModel).filter(([, m2]) => m2.samples > 0).map(([model, m2]) => `${model}=${m2.tokensPerSec.toFixed(1)}t/s (${m2.samples}s)`).join(", ");
615735
+ if (slots.inUse > 0 || slots.queueDepth > 0 || tpsByModel) {
615736
+ lines.push(`Capacity: ${slots.inUse}/${slots.capacity} slots in use (${utilPct}%), queue=${slots.queueDepth}/${slots.queueCapacity}${tpsByModel ? `; throughput: ${tpsByModel}` : ""}.`);
615737
+ }
615738
+ if (snap.ramMB.free < (broker.ramHeadroomMB ?? 0)) {
615739
+ lines.push(`RAM pressure: ${snap.ramMB.free}MB free (below ${broker.ramHeadroomMB}MB headroom).`);
615740
+ }
615741
+ if (snap.vramMB && snap.vramMB.free < (broker.vramHeadroomMB ?? 0)) {
615742
+ lines.push(`VRAM pressure: ${snap.vramMB.free}MB free (below ${broker.vramHeadroomMB}MB headroom).`);
615743
+ }
615744
+ const queueThreshold = Math.floor(snap.slots.queueCapacity * 0.8);
615745
+ if (snap.slots.queueDepth >= queueThreshold) {
615746
+ lines.push(`Queue pressure: ${snap.slots.queueDepth}/${snap.slots.queueCapacity} entries — prefer brief replies or single-emoji reactions to keep the queue draining.`);
615747
+ }
615748
+ const interesting = pidSnap.filter((p2) => p2.samples >= 3 && (p2.output < 0.95 || p2.output > 1.05));
615749
+ if (interesting.length > 0) {
615750
+ const pidLines = interesting.slice(0, 4).map((p2) => `${p2.key}: u=${p2.output.toFixed(2)} (pv=${Math.round(p2.pv)}ms, sp=${p2.setpoint}ms)`).join(", ");
615751
+ lines.push(`Context tier PID state: ${pidLines}.`);
615752
+ }
615753
+ if (sessionKey) {
615754
+ const recent = stream.recentForSession(sessionKey, 15);
615755
+ if (recent.length > 0) {
615756
+ const sends = recent.filter((e2) => e2.kind.startsWith("telegram.send."));
615757
+ const reactions = recent.filter((e2) => e2.kind.startsWith("emoji."));
615758
+ const forbidden = sends.filter((e2) => e2.kind === "telegram.send.forbidden").length;
615759
+ const rateLimited = sends.filter((e2) => e2.kind === "telegram.send.rate_limited").length;
615760
+ if (forbidden > 0) lines.push(`This chat has refused ${forbidden} recent send attempt(s) (e.g. no rights to post). Treat as a strong silence signal.`);
615761
+ if (rateLimited > 0) lines.push(`This chat rate-limited ${rateLimited} recent send(s). Slow cadence.`);
615762
+ if (reactions.length > 0) {
615763
+ const reactSummary = reactions.filter((e2) => e2.kind === "emoji.reaction.received").map((e2) => e2.emoji).join("");
615764
+ if (reactSummary) lines.push(`Recent inbound reactions in this chat: ${reactSummary}`);
615765
+ }
615766
+ }
615767
+ }
615768
+ if (lines.length === 0) return "";
615769
+ return ["## System Observations (broker, PID, capacity, send outcomes)", ...lines].join("\n");
615770
+ }
615771
+ var PER_SESSION_BUFFER, GLOBAL_BUFFER, SoulObservationStream, _stream, _brokerSubscribed;
615772
+ var init_soul_observations = __esm({
615773
+ "packages/cli/src/tui/soul-observations.ts"() {
615774
+ "use strict";
615775
+ init_dist5();
615776
+ init_pid_controller();
615777
+ PER_SESSION_BUFFER = 60;
615778
+ GLOBAL_BUFFER = 200;
615779
+ SoulObservationStream = class {
615780
+ _bySession = /* @__PURE__ */ new Map();
615781
+ _global = [];
615782
+ _listeners = /* @__PURE__ */ new Set();
615783
+ /** Record an event. */
615784
+ emit(event) {
615785
+ if ("sessionKey" in event && event.sessionKey) {
615786
+ let buf = this._bySession.get(event.sessionKey);
615787
+ if (!buf) {
615788
+ buf = [];
615789
+ this._bySession.set(event.sessionKey, buf);
615790
+ }
615791
+ buf.push(event);
615792
+ if (buf.length > PER_SESSION_BUFFER) buf.shift();
615793
+ }
615794
+ this._global.push(event);
615795
+ if (this._global.length > GLOBAL_BUFFER) this._global.shift();
615796
+ for (const listener of this._listeners) {
615797
+ try {
615798
+ listener(event);
615799
+ } catch {
615800
+ }
615801
+ }
615802
+ }
615803
+ /** Subscribe to all events (live tail). */
615804
+ subscribe(listener) {
615805
+ this._listeners.add(listener);
615806
+ return () => this._listeners.delete(listener);
615807
+ }
615808
+ /** Read recent events for a session (most recent last). */
615809
+ recentForSession(sessionKey, limit = 20) {
615810
+ const buf = this._bySession.get(sessionKey) ?? [];
615811
+ return buf.slice(-limit);
615812
+ }
615813
+ /** Read recent global events. */
615814
+ recentGlobal(limit = 30) {
615815
+ return this._global.slice(-limit);
615816
+ }
615817
+ reset() {
615818
+ this._bySession.clear();
615819
+ this._global.length = 0;
615820
+ }
615821
+ };
615822
+ _stream = null;
615823
+ _brokerSubscribed = false;
615824
+ }
615825
+ });
615826
+
615181
615827
  // packages/cli/src/tui/telegram-channel-dmn.ts
615182
615828
  import { existsSync as existsSync115, mkdirSync as mkdirSync65, readdirSync as readdirSync40, readFileSync as readFileSync94, writeFileSync as writeFileSync59 } from "node:fs";
615183
615829
  import { join as join129 } from "node:path";
@@ -615276,7 +615922,7 @@ function buildReplyOpportunities(input, openQuestions) {
615276
615922
  function daydreamOpportunityId(input, trigger) {
615277
615923
  return createHash23("sha1").update(`${input.sessionKey}:${input.generatedAtMs}:${trigger}`).digest("hex").slice(0, 16);
615278
615924
  }
615279
- function clamp018(value2) {
615925
+ function clamp019(value2) {
615280
615926
  if (!Number.isFinite(value2)) return 0;
615281
615927
  return Math.max(0, Math.min(1, value2));
615282
615928
  }
@@ -615287,7 +615933,7 @@ function pushStimulationSignal(signals, signal, source, weight) {
615287
615933
  const cleanSignal = compactLine2(signal, 120);
615288
615934
  const cleanSource = compactLine2(source, 180);
615289
615935
  if (!cleanSignal || signals.some((entry) => entry.signal === cleanSignal && entry.source === cleanSource)) return;
615290
- signals.push({ signal: cleanSignal, source: cleanSource, weight: clamp018(weight) });
615936
+ signals.push({ signal: cleanSignal, source: cleanSource, weight: clamp019(weight) });
615291
615937
  }
615292
615938
  function buildMetaAnalysisSignals(input) {
615293
615939
  const chatLabel = input.chatTitle || input.chatId;
@@ -615362,7 +616008,7 @@ function buildCuriosityThreads(input, openQuestions, stimulationSignals) {
615362
616008
  question: text.endsWith("?") || text.endsWith("?") ? text : `What should be learned or clarified from: ${text || entry.mediaSummary || "recent media"}?`,
615363
616009
  rationale: "Human curiosity, uncertainty, or multimodal content makes this a useful idle exploration target.",
615364
616010
  sourceMessages: messageId,
615365
- intensity: clamp018(0.5 + replyBoost + mediaBoost + questionBoost)
616011
+ intensity: clamp019(0.5 + replyBoost + mediaBoost + questionBoost)
615366
616012
  });
615367
616013
  }
615368
616014
  for (const question of openQuestions.slice(-4)) {
@@ -615382,7 +616028,7 @@ function buildCuriosityThreads(input, openQuestions, stimulationSignals) {
615382
616028
  question: `Is there a useful clarification or memory consolidation around ${strongest.source}?`,
615383
616029
  rationale: "Strongest stimulation signal can seed a low-intrusion reflection target.",
615384
616030
  sourceMessages: [],
615385
- intensity: clamp018(strongest.weight * 0.72)
616031
+ intensity: clamp019(strongest.weight * 0.72)
615386
616032
  });
615387
616033
  }
615388
616034
  return threads.sort((a2, b) => b.intensity - a2.intensity).slice(0, 8);
@@ -615456,7 +616102,7 @@ function buildOutreachPlans(input, curiosityThreads) {
615456
616102
  purpose: "Continue the public thread only when the live model judges that the group would benefit from a concise follow-up.",
615457
616103
  draftIntent: "Ask one concrete clarification, offer one useful synthesis, or stay silent if the room has moved on.",
615458
616104
  gate: "model_decision",
615459
- confidence: clamp018(thread.intensity * 0.86)
616105
+ confidence: clamp019(thread.intensity * 0.86)
615460
616106
  });
615461
616107
  const participant = participantForThread(input, thread);
615462
616108
  if (!participant) continue;
@@ -615468,7 +616114,7 @@ function buildOutreachPlans(input, curiosityThreads) {
615468
616114
  purpose: "Offer a one-to-one follow-up only if private contact is allowed and the issue is personal, unresolved, or better handled outside the group.",
615469
616115
  draftIntent: "Reference the public thread briefly, ask permission to continue privately, and do not reveal hidden meta-analysis.",
615470
616116
  gate: "admin_review",
615471
- confidence: clamp018(thread.intensity * 0.58)
616117
+ confidence: clamp019(thread.intensity * 0.58)
615472
616118
  });
615473
616119
  }
615474
616120
  return plans.slice(0, 8);
@@ -616571,7 +617217,7 @@ function numberOr(value2, fallback) {
616571
617217
  function isNumber(value2) {
616572
617218
  return typeof value2 === "number" && Number.isFinite(value2);
616573
617219
  }
616574
- function clamp019(value2) {
617220
+ function clamp0110(value2) {
616575
617221
  return Math.max(0, Math.min(1, Number.isFinite(value2) ? value2 : 0));
616576
617222
  }
616577
617223
  function iso(ts) {
@@ -616718,8 +617364,8 @@ function normalizeRelationship(raw) {
616718
617364
  kind: value2.kind,
616719
617365
  fromKey: String(value2.fromKey),
616720
617366
  toKey: String(value2.toKey),
616721
- confidence: clamp019(numberOr(value2.confidence, 0)),
616722
- weight: clamp019(numberOr(value2.weight, 0)),
617367
+ confidence: clamp0110(numberOr(value2.confidence, 0)),
617368
+ weight: clamp0110(numberOr(value2.weight, 0)),
616723
617369
  firstSeenAt: numberOr(value2.firstSeenAt, Date.now()),
616724
617370
  lastSeenAt: numberOr(value2.lastSeenAt, Date.now()),
616725
617371
  evidenceMessageIds: Array.isArray(value2.evidenceMessageIds) ? value2.evidenceMessageIds.filter(isNumber).slice(-40) : [],
@@ -616738,7 +617384,7 @@ function normalizePreferences(raw) {
616738
617384
  if (!evidence || typeof evidence !== "object") continue;
616739
617385
  out[actorKey][key] = {
616740
617386
  value: Math.max(-1, Math.min(1, numberOr(evidence.value, 0))),
616741
- confidence: clamp019(numberOr(evidence.confidence, 0)),
617387
+ confidence: clamp0110(numberOr(evidence.confidence, 0)),
616742
617388
  updatedAt: numberOr(evidence.updatedAt, Date.now()),
616743
617389
  evidenceMessageIds: Array.isArray(evidence.evidenceMessageIds) ? evidence.evidenceMessageIds.filter(isNumber).slice(-12) : [],
616744
617390
  note: compactOptional(evidence.note, 220)
@@ -616796,7 +617442,7 @@ function normalizeOutcome(raw) {
616796
617442
  replyToMessageId: typeof value2.replyToMessageId === "number" ? value2.replyToMessageId : void 0,
616797
617443
  route: value2.route === "action" ? "action" : "chat",
616798
617444
  shouldReply: value2.shouldReply === true,
616799
- confidence: clamp019(numberOr(value2.confidence, 0)),
617445
+ confidence: clamp0110(numberOr(value2.confidence, 0)),
616800
617446
  reason: compact2(value2.reason || "", 280),
616801
617447
  source: compact2(value2.source || "unknown", 80),
616802
617448
  silentDisposition: compactOptional(value2.silentDisposition, 280),
@@ -616808,7 +617454,7 @@ function normalizeOutcome(raw) {
616808
617454
  scenarioNote: compactOptional(value2.scenarioNote, 360),
616809
617455
  scenarioId: compactOptional(value2.scenarioId, 160),
616810
617456
  scenarioLabel: compactOptional(value2.scenarioLabel, 160),
616811
- scenarioConfidence: typeof value2.scenarioConfidence === "number" && Number.isFinite(value2.scenarioConfidence) ? clamp019(value2.scenarioConfidence) : void 0,
617457
+ scenarioConfidence: typeof value2.scenarioConfidence === "number" && Number.isFinite(value2.scenarioConfidence) ? clamp0110(value2.scenarioConfidence) : void 0,
616812
617458
  scenarioObjective: compactOptional(value2.scenarioObjective, 360),
616813
617459
  scenarioStateLoop: compactOptional(value2.scenarioStateLoop, 360),
616814
617460
  salienceSignals: Array.isArray(value2.salienceSignals) ? value2.salienceSignals.map(String).slice(0, 16) : [],
@@ -616826,7 +617472,7 @@ function normalizeDaydreamOpportunity(raw) {
616826
617472
  artifactId: String(value2.artifactId || "unknown"),
616827
617473
  generatedAt: String(value2.generatedAt || (/* @__PURE__ */ new Date()).toISOString()),
616828
617474
  trigger: compact2(value2.trigger || "", 240),
616829
- confidence: clamp019(numberOr(value2.confidence, 0)),
617475
+ confidence: clamp0110(numberOr(value2.confidence, 0)),
616830
617476
  lifecycle,
616831
617477
  firstSeenAt: numberOr(value2.firstSeenAt, Date.now()),
616832
617478
  updatedAt: numberOr(value2.updatedAt, Date.now()),
@@ -616883,7 +617529,7 @@ function commitTelegramSocialDecision(state, input) {
616883
617529
  replyToMessageId: input.replyToMessageId,
616884
617530
  route: input.route,
616885
617531
  shouldReply: input.shouldReply,
616886
- confidence: clamp019(input.confidence),
617532
+ confidence: clamp0110(input.confidence),
616887
617533
  reason: compact2(input.reason, 280),
616888
617534
  source: compact2(input.source, 80),
616889
617535
  silentDisposition: compactOptional(input.silentDisposition, 280),
@@ -616895,7 +617541,7 @@ function commitTelegramSocialDecision(state, input) {
616895
617541
  scenarioNote: compactOptional(input.scenarioNote, 360),
616896
617542
  scenarioId: compactOptional(input.scenarioId, 160),
616897
617543
  scenarioLabel: compactOptional(input.scenarioLabel, 160),
616898
- scenarioConfidence: input.scenarioConfidence === void 0 ? void 0 : clamp019(input.scenarioConfidence),
617544
+ scenarioConfidence: input.scenarioConfidence === void 0 ? void 0 : clamp0110(input.scenarioConfidence),
616899
617545
  scenarioObjective: compactOptional(input.scenarioObjective, 360),
616900
617546
  scenarioStateLoop: compactOptional(input.scenarioStateLoop, 360),
616901
617547
  salienceSignals: [...new Set((input.salienceSignals ?? []).map(String))].slice(0, 16),
@@ -616919,7 +617565,7 @@ function registerDaydreamOpportunities(state, opportunities, now = Date.now()) {
616919
617565
  artifactId: opportunity.artifactId || "unknown",
616920
617566
  generatedAt: opportunity.generatedAt || new Date(now).toISOString(),
616921
617567
  trigger: compact2(opportunity.trigger, 240),
616922
- confidence: clamp019(opportunity.confidence),
617568
+ confidence: clamp0110(opportunity.confidence),
616923
617569
  lifecycle: "proposed",
616924
617570
  firstSeenAt: now,
616925
617571
  updatedAt: now,
@@ -616929,7 +617575,7 @@ function registerDaydreamOpportunities(state, opportunities, now = Date.now()) {
616929
617575
  };
616930
617576
  if (existing) {
616931
617577
  item.trigger = compact2(opportunity.trigger, 240) || item.trigger;
616932
- item.confidence = clamp019(opportunity.confidence);
617578
+ item.confidence = clamp0110(opportunity.confidence);
616933
617579
  item.updatedAt = now;
616934
617580
  }
616935
617581
  state.daydreamOpportunities[id] = item;
@@ -617069,8 +617715,8 @@ function upsertRelationship(state, kind, fromKey, toKey, messageId, confidence2,
617069
617715
  evidenceMessageIds: [],
617070
617716
  source
617071
617717
  };
617072
- edge.confidence = Math.max(edge.confidence, clamp019(confidence2));
617073
- edge.weight = Math.min(1, edge.weight + 0.12 + clamp019(confidence2) * 0.2);
617718
+ edge.confidence = Math.max(edge.confidence, clamp0110(confidence2));
617719
+ edge.weight = Math.min(1, edge.weight + 0.12 + clamp0110(confidence2) * 0.2);
617074
617720
  edge.lastSeenAt = now;
617075
617721
  edge.evidenceMessageIds = appendUnique(edge.evidenceMessageIds, messageId, 40);
617076
617722
  edge.note = compactOptional(note, 260) || edge.note;
@@ -617112,7 +617758,7 @@ function setPreference(vector, key, value2, confidence2, messageId, now, note) {
617112
617758
  const existing = vector[key];
617113
617759
  vector[key] = {
617114
617760
  value: existing ? existing.value * 0.7 + value2 * 0.3 : value2,
617115
- confidence: Math.max(existing?.confidence ?? 0, clamp019(confidence2)),
617761
+ confidence: Math.max(existing?.confidence ?? 0, clamp0110(confidence2)),
617116
617762
  updatedAt: now,
617117
617763
  evidenceMessageIds: appendUnique(existing?.evidenceMessageIds ?? [], messageId, 12),
617118
617764
  note
@@ -617503,9 +618149,57 @@ function parseTelegramSilentReflectionNotes(text) {
617503
618149
  }
617504
618150
  return null;
617505
618151
  }
617506
- function telegramRouterTimeoutMs(configTimeoutMs, minMs = 12e4, _legacyMaxMs) {
617507
- const configured = Number.isFinite(configTimeoutMs) && (configTimeoutMs ?? 0) > 0 ? configTimeoutMs : 3e5;
617508
- return Math.max(configured, minMs, 12e4);
618152
+ function estimatePromptTokensFromRequest(request) {
618153
+ let chars = 0;
618154
+ for (const m2 of request.messages ?? []) {
618155
+ if (typeof m2.content === "string") chars += m2.content.length;
618156
+ else if (Array.isArray(m2.content)) {
618157
+ for (const part of m2.content) {
618158
+ if (typeof part?.text === "string") chars += part.text.length;
618159
+ }
618160
+ }
618161
+ chars += 8;
618162
+ }
618163
+ if (Array.isArray(request.tools) && request.tools.length > 0) {
618164
+ chars += request.tools.length * 600;
618165
+ }
618166
+ return Math.ceil(chars / 4);
618167
+ }
618168
+ function isLikelyTruncatedRouterJson(text) {
618169
+ if (typeof text !== "string") return false;
618170
+ const stripped = text.replace(/^\s*<think>[\s\S]*?<\/think>\s*/i, "").trim();
618171
+ if (!stripped.startsWith("{")) return false;
618172
+ let depth = 0;
618173
+ let inString = false;
618174
+ let escape2 = false;
618175
+ for (let i2 = 0; i2 < stripped.length; i2++) {
618176
+ const ch = stripped[i2];
618177
+ if (escape2) {
618178
+ escape2 = false;
618179
+ continue;
618180
+ }
618181
+ if (inString) {
618182
+ if (ch === "\\") escape2 = true;
618183
+ else if (ch === '"') inString = false;
618184
+ continue;
618185
+ }
618186
+ if (ch === '"') inString = true;
618187
+ else if (ch === "{") depth++;
618188
+ else if (ch === "}") depth--;
618189
+ }
618190
+ if (depth <= 0) return false;
618191
+ const hits = (stripped.includes('"route"') ? 1 : 0) + (stripped.includes('"should_reply"') ? 1 : 0) + (stripped.includes('"confidence"') ? 1 : 0) + (stripped.includes('"reason"') ? 1 : 0) + (stripped.includes('"silent_disposition"') ? 1 : 0) + (stripped.includes('"mental_note"') ? 1 : 0);
618192
+ return hits >= 3;
618193
+ }
618194
+ function telegramRouterTimeoutMs(configTimeoutMs, _minMs, _legacyMaxMs) {
618195
+ void _minMs;
618196
+ void _legacyMaxMs;
618197
+ const envRaw = Number.parseInt(process.env["OMNIUS_TG_INFERENCE_LIVENESS_MS"] ?? "", 10);
618198
+ const livenessMs = Number.isFinite(envRaw) && envRaw >= 1e4 ? envRaw : 6e5;
618199
+ if (Number.isFinite(configTimeoutMs) && (configTimeoutMs ?? 0) >= livenessMs) {
618200
+ return configTimeoutMs;
618201
+ }
618202
+ return livenessMs;
617509
618203
  }
617510
618204
  function telegramThinkSuppressedRequest(request) {
617511
618205
  const messages2 = Array.isArray(request.messages) ? request.messages.slice() : [];
@@ -618960,6 +619654,9 @@ var init_telegram_bridge = __esm({
618960
619654
  init_telegram_creative_tools();
618961
619655
  init_omnius_directory();
618962
619656
  init_stimulation();
619657
+ init_pid_controller();
619658
+ init_component_benefit();
619659
+ init_soul_observations();
618963
619660
  init_identity_memory_tool();
618964
619661
  init_visual_identity_association();
618965
619662
  init_telegram_channel_dmn();
@@ -622925,6 +623622,12 @@ ${lines.join("\n")}`);
622925
623622
  this.ensureTelegramConversationLoaded(sessionKey);
622926
623623
  const history = this.chatHistory.get(sessionKey) ?? [];
622927
623624
  const participants = [...this.chatParticipants.get(sessionKey)?.values() ?? []].sort((a2, b) => b.lastSeenTs - a2.lastSeenTs);
623625
+ const modelKey = this.agentConfig?.model ?? "?";
623626
+ const pidReg = getPidRegistry();
623627
+ const tier1Ratio = pidReg.output(`tier1.${modelKey}`);
623628
+ const tier2Ratio = pidReg.output(`tier2.${modelKey}`);
623629
+ const benefitReg = getComponentBenefitRegistry();
623630
+ const sampledComponents = [];
622928
623631
  const isGroup = msg.chatType !== "private";
622929
623632
  const retainedCount = history.length;
622930
623633
  const olderCount = Math.max(0, retainedCount - maxRecent);
@@ -622958,14 +623661,28 @@ ${lines.join("\n")}`);
622958
623661
  sections.push(socialStateContext);
622959
623662
  }
622960
623663
  if (participants.length > 0) {
622961
- const participantLines = participants.slice(0, 12).map((profile) => {
623664
+ const fullCount = Math.min(12, participants.length);
623665
+ const tier1Count = Math.max(1, Math.round(fullCount * tier1Ratio));
623666
+ const sortedByBenefit = participants.slice(0, fullCount).sort((a2, b) => {
623667
+ const scoreA = benefitReg.score(sessionKey, `tier1.participant.${a2.username ?? a2.fromUserId}`);
623668
+ const scoreB = benefitReg.score(sessionKey, `tier1.participant.${b.username ?? b.fromUserId}`);
623669
+ return scoreB - scoreA;
623670
+ });
623671
+ const selected = sortedByBenefit.slice(0, tier1Count);
623672
+ const participantLines = selected.map((profile) => {
622962
623673
  const label = profile.username && profile.username !== "unknown" ? `@${profile.username}` : profile.firstName || `user:${profile.fromUserId}`;
622963
623674
  const tones = [...profile.toneTags].slice(0, 5).join(", ") || "neutral";
622964
623675
  const direct = profile.directAddressCount ? `, direct-addresses:${profile.directAddressCount}` : "";
622965
623676
  const replies = profile.replyCount ? `, replies:${profile.replyCount}` : "";
623677
+ sampledComponents.push({
623678
+ key: `tier1.participant.${profile.username ?? profile.fromUserId}`,
623679
+ needle: profile.username ?? String(profile.fromUserId)
623680
+ });
622966
623681
  return `- ${label} [${telegramActorKindLabel(profile)}]: messages:${profile.messageCount}${direct}${replies}; tone:${tones}; last=${telegramContextJsonString(profile.lastMessage, 180)}`;
622967
623682
  });
622968
- sections.push(`### Participants And Relationship Signals
623683
+ const shed = fullCount - tier1Count;
623684
+ const tierNote = shed > 0 ? ` (tier1 u=${tier1Ratio.toFixed(2)}; ${shed} participants shed by benefit)` : "";
623685
+ sections.push(`### Participants And Relationship Signals${tierNote}
622969
623686
  ${participantLines.join("\n")}`);
622970
623687
  }
622971
623688
  const associativeContext = this.relevantTelegramAssociativeMemoryContext(
@@ -622997,16 +623714,32 @@ ${participantLines.join("\n")}`);
622997
623714
  }
622998
623715
  const memoryCards = this.relevantTelegramMemoryCards(sessionKey, msg, isGroup ? 10 : 6);
622999
623716
  if (memoryCards.length > 0) {
623000
- const cardLines = memoryCards.map(({ card, score }) => {
623001
- const tags = card.tags.length ? ` tags:${card.tags.slice(0, 8).join(",")}` : "";
623002
- const speakers = card.speakers.length ? ` speakers:${card.speakers.join(", ")}` : "";
623003
- const relevance = score > 0 ? ` relevance:${score.toFixed(2)}` : " relevance:recent";
623004
- const notes2 = card.notes.slice(-3).map((note) => ` - note=${telegramContextJsonString(note, 220)}`).join("\n");
623005
- return `- ${card.title} (${card.id};${relevance};${speakers}${tags})
623717
+ const fullMC = memoryCards.length;
623718
+ const tier2Count = Math.max(0, Math.round(fullMC * tier2Ratio));
623719
+ const sortedMC = [...memoryCards].sort((a2, b) => {
623720
+ const scoreA = benefitReg.score(sessionKey, `tier2.memory_card.${a2.card.id}`);
623721
+ const scoreB = benefitReg.score(sessionKey, `tier2.memory_card.${b.card.id}`);
623722
+ return scoreB - scoreA;
623723
+ });
623724
+ const selectedMC = sortedMC.slice(0, tier2Count);
623725
+ if (selectedMC.length > 0) {
623726
+ const cardLines = selectedMC.map(({ card, score }) => {
623727
+ const tags = card.tags.length ? ` tags:${card.tags.slice(0, 8).join(",")}` : "";
623728
+ const speakers = card.speakers.length ? ` speakers:${card.speakers.join(", ")}` : "";
623729
+ const relevance = score > 0 ? ` relevance:${score.toFixed(2)}` : " relevance:recent";
623730
+ const notes2 = card.notes.slice(-3).map((note) => ` - note=${telegramContextJsonString(note, 220)}`).join("\n");
623731
+ sampledComponents.push({
623732
+ key: `tier2.memory_card.${card.id}`,
623733
+ needle: card.id
623734
+ });
623735
+ return `- ${card.title} (${card.id};${relevance};${speakers}${tags})
623006
623736
  ${notes2}`;
623007
- });
623008
- sections.push(`### Zettelkasten Memory Recall (untrusted conversation notes)
623737
+ });
623738
+ const shed = fullMC - tier2Count;
623739
+ const tierNote = shed > 0 ? ` (tier2 u=${tier2Ratio.toFixed(2)}; ${shed} cards shed by benefit)` : "";
623740
+ sections.push(`### Zettelkasten Memory Recall (untrusted conversation notes)${tierNote}
623009
623741
  ${cardLines.join("\n")}`);
623742
+ }
623010
623743
  }
623011
623744
  const channelDaydream = this.formatLatestTelegramChannelDaydreamContext(sessionKey);
623012
623745
  if (channelDaydream) {
@@ -623079,6 +623812,7 @@ ${lines.join("\n")}`);
623079
623812
  `- If the current sender asks what you see or remember, answer from this stream instead of saying the history is gone.`
623080
623813
  ].join("\n")
623081
623814
  );
623815
+ this.telegramStashContextSamples(sessionKey, sampledComponents);
623082
623816
  return sections.join("\n\n");
623083
623817
  }
623084
623818
  maybeLogTelegramGroupSkip(msg, reason) {
@@ -623141,6 +623875,25 @@ ${lines.join("\n")}`);
623141
623875
  nextAnalysisAfterMessages: decision2.nextCheckAfterMessages
623142
623876
  });
623143
623877
  }
623878
+ /**
623879
+ * Collect the per-component benefit samples that were tagged when assembling
623880
+ * the last context stream for this session. Returns the same shape the
623881
+ * component-benefit registry consumes; an empty list means no tier-1/tier-2
623882
+ * components were emitted (early return — benefit tracking skipped).
623883
+ *
623884
+ * Tags are stored on `_telegramLastContextSamples` (a per-session WeakMap-
623885
+ * style cache) so the post-call feedback knows what to score without
623886
+ * re-running the context assembly.
623887
+ */
623888
+ telegramComponentSamplesForSession(sessionKey) {
623889
+ return this._telegramLastContextSamples.get(sessionKey) ?? [];
623890
+ }
623891
+ /** Per-session cache of last emitted context-component samples. */
623892
+ _telegramLastContextSamples = /* @__PURE__ */ new Map();
623893
+ /** Stash samples for the next post-call feedback cycle. */
623894
+ telegramStashContextSamples(sessionKey, samples) {
623895
+ this._telegramLastContextSamples.set(sessionKey, samples);
623896
+ }
623144
623897
  buildTelegramRouterPersonaContext(sessionKey, msg, toolContext, selfIdentityContext) {
623145
623898
  const baseContract = toolContext === "telegram-admin-dm" ? ADMIN_DM_PROMPT : toolContext === "telegram-admin-group" ? ADMIN_GROUP_PROMPT : TELEGRAM_SAFETY_PROMPT;
623146
623899
  return buildSoulContext({
@@ -623249,7 +624002,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
623249
624002
  ],
623250
624003
  tools: [],
623251
624004
  temperature: 0,
623252
- maxTokens: 650,
624005
+ // Reflection has 12 string fields; 650 was tight enough to truncate.
624006
+ maxTokens: 1500,
623253
624007
  timeoutMs: telegramRouterTimeoutMs(timeoutMs),
623254
624008
  think: false
623255
624009
  },
@@ -623335,30 +624089,58 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
623335
624089
  * hard-deadline retire path becomes diagnosable instead of opaque
623336
624090
  */
623337
624091
  async telegramObservableInference(backend, request, kind, sessionKey) {
624092
+ const model = this.agentConfig?.model ?? "?";
624093
+ const promptTokens = estimatePromptTokensFromRequest(request);
624094
+ const broker = getModelBroker();
624095
+ const trainCtx = await broker.getNctxTrain(model).catch(() => null);
624096
+ const completionHeadroom = 4096;
624097
+ const targetCtx = trainCtx && trainCtx > 0 ? Math.min(trainCtx, Math.max(2048, promptTokens + completionHeadroom)) : Math.max(2048, promptTokens + completionHeadroom);
624098
+ const requestWithCtx = { ...request, numCtx: targetCtx };
624099
+ const slot = await broker.acquireInferenceSlot({
624100
+ model,
624101
+ domain: "chat",
624102
+ owner: `telegram-bridge/${kind}`,
624103
+ sessionKey,
624104
+ promptTokens,
624105
+ priority: kind === "router" || kind === "router-repair" || kind === "router-strict-retry" ? 1 : 0
624106
+ });
624107
+ if (process.env["OMNIUS_BROKER_TRACE"] === "1") {
624108
+ this.tuiWrite(() => renderTelegramSubAgentEvent(
624109
+ sessionKey,
624110
+ `inference admitted [${kind}] model=${model} prompt~${promptTokens}t num_ctx=${targetCtx} slot=${slot.info.id}${slot.info.reserved ? " reserved" : ""}`
624111
+ ));
624112
+ }
623338
624113
  const streamFn = backend.chatCompletionStream;
623339
- const id = this.registerTelegramInference(kind, sessionKey, this.agentConfig?.model ?? "?");
624114
+ const id = this.registerTelegramInference(kind, sessionKey, model);
624115
+ let completionTokens = 0;
623340
624116
  try {
624117
+ let result;
623341
624118
  if (typeof streamFn !== "function") {
623342
- const r2 = await backend.chatCompletion(request);
623343
- this.updateTelegramInferenceFinal(id, r2);
623344
- return r2;
623345
- }
623346
- try {
623347
- const result = await this.streamTelegramInferenceToCompletion(
623348
- streamFn.bind(backend),
623349
- request,
623350
- id
623351
- );
623352
- return result;
623353
- } catch (streamErr) {
623354
- const r2 = await backend.chatCompletion(request);
623355
- this.updateTelegramInferenceFinal(id, r2);
623356
- this.tuiWrite(() => renderTelegramSubAgentEvent(
623357
- sessionKey,
623358
- `inference ${id}: stream errored (${streamErr instanceof Error ? streamErr.message : String(streamErr)}); fell back to non-stream`
623359
- ));
623360
- return r2;
624119
+ result = await backend.chatCompletion(requestWithCtx);
624120
+ this.updateTelegramInferenceFinal(id, result);
624121
+ } else {
624122
+ try {
624123
+ result = await this.streamTelegramInferenceToCompletion(
624124
+ streamFn.bind(backend),
624125
+ requestWithCtx,
624126
+ id
624127
+ );
624128
+ } catch (streamErr) {
624129
+ result = await backend.chatCompletion(requestWithCtx);
624130
+ this.updateTelegramInferenceFinal(id, result);
624131
+ this.tuiWrite(() => renderTelegramSubAgentEvent(
624132
+ sessionKey,
624133
+ `inference ${id}: stream errored (${streamErr instanceof Error ? streamErr.message : String(streamErr)}); fell back to non-stream`
624134
+ ));
624135
+ }
623361
624136
  }
624137
+ const usage = result.usage;
624138
+ completionTokens = usage?.completion_tokens ?? 0;
624139
+ slot.release({ ok: true, completionTokens });
624140
+ return result;
624141
+ } catch (err) {
624142
+ slot.release({ ok: false, error: err instanceof Error ? err.message : String(err) });
624143
+ throw err;
623362
624144
  } finally {
623363
624145
  this.deregisterTelegramInference(id);
623364
624146
  }
@@ -623549,7 +624331,7 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
623549
624331
  getTelegramThinkingVisible() {
623550
624332
  return this.telegramThinkingVisible;
623551
624333
  }
623552
- async repairTelegramInteractionDecision(backend, rawOutput, forcedRoute, timeoutMs, diagnostics) {
624334
+ async repairTelegramInteractionDecision(backend, rawOutput, forcedRoute, timeoutMs, diagnostics, sessionKey = "__router__") {
623553
624335
  const rawPreview = telegramRouterRawPreview(rawOutput, 4e3);
623554
624336
  if (!rawPreview || telegramDecisionOutputHasDanglingJson(rawOutput)) {
623555
624337
  if (diagnostics) {
@@ -623584,10 +624366,10 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`,
623584
624366
  ],
623585
624367
  tools: [],
623586
624368
  temperature: 0,
623587
- maxTokens: 500,
624369
+ maxTokens: 1500,
623588
624370
  timeoutMs: telegramRouterTimeoutMs(timeoutMs, 8e3, 2e4),
623589
624371
  think: false
623590
- });
624372
+ }, diagnostics, "router-repair", sessionKey);
623591
624373
  const repairedText = result.choices[0]?.message?.content ?? "";
623592
624374
  if (telegramDecisionRecoverableFlag(repairedText) === false) {
623593
624375
  if (diagnostics) diagnostics.repairStatus = "no-recoverable-output";
@@ -623619,7 +624401,7 @@ ${repairedText}`,
623619
624401
  return null;
623620
624402
  }
623621
624403
  }
623622
- async retryTelegramInteractionDecisionStrict(backend, userPrompt, rawOutput, forcedRoute, timeoutMs, diagnostics) {
624404
+ async retryTelegramInteractionDecisionStrict(backend, userPrompt, rawOutput, forcedRoute, timeoutMs, diagnostics, sessionKey = "__router__") {
623623
624405
  const invalidPreview = telegramRouterRawPreview(rawOutput, 1200) ?? "(empty assistant content)";
623624
624406
  const routeInstruction = forcedRoute ? `The operator selected Telegram mode "${forcedRoute}". The route field must be "${forcedRoute}", but should_reply must still be inferred from context.` : `Infer route live from context.`;
623625
624407
  const trimmedUserPrompt = userPrompt.length > 4e3 ? `…
@@ -623651,10 +624433,10 @@ ${userPrompt.slice(-4e3)}` : userPrompt;
623651
624433
  ],
623652
624434
  tools: [],
623653
624435
  temperature: 0,
623654
- maxTokens: 1200,
624436
+ maxTokens: 2400,
623655
624437
  timeoutMs: telegramRouterTimeoutMs(timeoutMs, 1e4, 3e4),
623656
624438
  think: false
623657
- });
624439
+ }, diagnostics, "router-strict-retry", sessionKey);
623658
624440
  const retryText = result.choices[0]?.message?.content ?? "";
623659
624441
  if (diagnostics) diagnostics.strictRetryPreview = telegramRouterRawPreview(retryText, 320);
623660
624442
  const parsed = parseTelegramInteractionDecision(retryText, forcedRoute, {
@@ -623718,33 +624500,15 @@ ${retryText}`,
623718
624500
  /**
623719
624501
  * Internal: start an actual router inference for a sessionKey, store its
623720
624502
  * in-flight promise, and on completion fire any queued trailing call.
624503
+ *
624504
+ * No watchdog timeout — the broker's admission control guarantees the
624505
+ * inference fits available compute. Inflight work always completes; new
624506
+ * work waits in the broker's bounded queue with backpressure to upstream.
624507
+ * Only the fetch-level I/O liveness AbortSignal can interrupt, and only
624508
+ * on TCP-dead.
623721
624509
  */
623722
624510
  startCoalescedTelegramRouterCall(sessionKey, msg, toolContext) {
623723
- const HARD_DEADLINE_MS = this.telegramRouterHardDeadlineMs();
623724
- const inner = this.inferTelegramInteractionDecision(msg, toolContext);
623725
- const promise = new Promise((resolve55, reject) => {
623726
- let settled = false;
623727
- const guard = setTimeout(() => {
623728
- if (settled) return;
623729
- settled = true;
623730
- reject(new Error(`router-coalescer: hard deadline exceeded (${Math.round(HARD_DEADLINE_MS / 1e3)}s); inner inference did not settle`));
623731
- }, HARD_DEADLINE_MS);
623732
- if (typeof guard.unref === "function") guard.unref();
623733
- inner.then(
623734
- (v) => {
623735
- if (settled) return;
623736
- settled = true;
623737
- clearTimeout(guard);
623738
- resolve55(v);
623739
- },
623740
- (e2) => {
623741
- if (settled) return;
623742
- settled = true;
623743
- clearTimeout(guard);
623744
- reject(e2);
623745
- }
623746
- );
623747
- });
624511
+ const promise = this.inferTelegramInteractionDecision(msg, toolContext);
623748
624512
  this.telegramRouterSessionState.set(sessionKey, { inFlight: promise });
623749
624513
  const onSettled = () => {
623750
624514
  let state;
@@ -623765,11 +624529,6 @@ ${retryText}`,
623765
624529
  promise.then(onSettled, onSettled);
623766
624530
  return promise;
623767
624531
  }
623768
- telegramRouterHardDeadlineMs() {
623769
- const raw = Number.parseInt(process.env["OMNIUS_TG_ROUTER_HARD_DEADLINE_MS"] ?? "", 10);
623770
- if (Number.isFinite(raw) && raw >= 5e3 && raw <= 18e4) return raw;
623771
- return 6e4;
623772
- }
623773
624532
  /**
623774
624533
  * Forcibly cancel every in-flight + trailing router-coalescer entry.
623775
624534
  * Used on bridge stop() and by the watchdog if it detects the coalescer
@@ -623965,30 +624724,52 @@ ${stimulationProbe.context}`,
623965
624724
  "",
623966
624725
  context2
623967
624726
  ].filter(Boolean).join("\n");
623968
- const reflectionNotes = await this.inferTelegramSilentReflectionNotes(
623969
- backend,
623970
- sessionKey,
623971
- msg,
623972
- toolContext,
623973
- personaContext,
623974
- observationContext,
623975
- config.timeoutMs
623976
- );
623977
- const reflectionContext = [
623978
- "## Silent Reflection Deliverables (must inform the attention decision)",
623979
- `silent_disposition: ${reflectionNotes.silentDisposition ?? "heard and retained"}`,
623980
- `mental_note: ${reflectionNotes.mentalNote ?? "no additional observation"}`,
623981
- `memory_note: ${reflectionNotes.memoryNote ?? "message retained in scoped memory"}`,
623982
- `relationship_note: ${reflectionNotes.relationshipNote ?? "no relationship change inferred"}`,
623983
- `procedure_note: ${reflectionNotes.procedureNote ?? "active voice-soul tree loaded; no procedure change inferred"}`,
623984
- `voice_note: ${reflectionNotes.voiceNote ?? "final voice unchanged unless reply is emitted"}`,
623985
- `scenario_note: ${reflectionNotes.scenarioNote ?? "scenario classification unavailable"}`,
623986
- `scenario_id: ${reflectionNotes.scenarioId ?? "unclassified"}`,
623987
- `scenario_label: ${reflectionNotes.scenarioLabel ?? "Unclassified"}`,
623988
- `scenario_confidence: ${reflectionNotes.scenarioConfidence !== void 0 ? reflectionNotes.scenarioConfidence.toFixed(2) : "0.00"}`,
623989
- `scenario_objective: ${reflectionNotes.scenarioObjective ?? "pending model-derived classifier output"}`,
623990
- `scenario_state_loop: ${reflectionNotes.scenarioStateLoop ?? "pending model-derived classifier output"}`
623991
- ].join("\n");
624727
+ const brokerSnap = getModelBroker().snapshot();
624728
+ const idleSlotRatio = brokerSnap.slots.capacity > 0 ? 1 - brokerSnap.slots.inUse / brokerSnap.slots.capacity : 1;
624729
+ const consolidatedMode = idleSlotRatio < 0.5 || process.env["OMNIUS_TG_FORCE_CONSOLIDATED"] === "1";
624730
+ let reflectionNotes;
624731
+ let reflectionContext;
624732
+ if (consolidatedMode) {
624733
+ reflectionNotes = this.fallbackTelegramSilentReflectionNotes(msg, "consolidated mode: reflection computed inline by router");
624734
+ reflectionContext = [
624735
+ "## Consolidated Reflection (you produce these fields as part of the same JSON)",
624736
+ "Before emitting your final decision, internally reflect on:",
624737
+ " silent_disposition: what happens silently with this message",
624738
+ " mental_note: concise observation of the turn",
624739
+ " memory_note: what scoped memory should retain or connect",
624740
+ " relationship_note: relationship/thread implication",
624741
+ " procedure_note: active tree/branch/abort implication",
624742
+ " voice_note: final voice implication if a reply happens",
624743
+ " scenario_note: identified scenario and transition state",
624744
+ " scenario_id / scenario_label / scenario_confidence / scenario_objective / scenario_state_loop",
624745
+ "Use these as your attention substrate, then decide route/should_reply/confidence. Return all fields in ONE JSON."
624746
+ ].join("\n");
624747
+ } else {
624748
+ reflectionNotes = await this.inferTelegramSilentReflectionNotes(
624749
+ backend,
624750
+ sessionKey,
624751
+ msg,
624752
+ toolContext,
624753
+ personaContext,
624754
+ observationContext,
624755
+ config.timeoutMs
624756
+ );
624757
+ reflectionContext = [
624758
+ "## Silent Reflection Deliverables (must inform the attention decision)",
624759
+ `silent_disposition: ${reflectionNotes.silentDisposition ?? "heard and retained"}`,
624760
+ `mental_note: ${reflectionNotes.mentalNote ?? "no additional observation"}`,
624761
+ `memory_note: ${reflectionNotes.memoryNote ?? "message retained in scoped memory"}`,
624762
+ `relationship_note: ${reflectionNotes.relationshipNote ?? "no relationship change inferred"}`,
624763
+ `procedure_note: ${reflectionNotes.procedureNote ?? "active voice-soul tree loaded; no procedure change inferred"}`,
624764
+ `voice_note: ${reflectionNotes.voiceNote ?? "final voice unchanged unless reply is emitted"}`,
624765
+ `scenario_note: ${reflectionNotes.scenarioNote ?? "scenario classification unavailable"}`,
624766
+ `scenario_id: ${reflectionNotes.scenarioId ?? "unclassified"}`,
624767
+ `scenario_label: ${reflectionNotes.scenarioLabel ?? "Unclassified"}`,
624768
+ `scenario_confidence: ${reflectionNotes.scenarioConfidence !== void 0 ? reflectionNotes.scenarioConfidence.toFixed(2) : "0.00"}`,
624769
+ `scenario_objective: ${reflectionNotes.scenarioObjective ?? "pending model-derived classifier output"}`,
624770
+ `scenario_state_loop: ${reflectionNotes.scenarioStateLoop ?? "pending model-derived classifier output"}`
624771
+ ].join("\n");
624772
+ }
623992
624773
  const userPrompt = [
623993
624774
  `You are the Telegram live routing and reply-discretion model.`,
623994
624775
  `The attention decision must happen after reading the silent reflection deliverables below. The notes are not decorations: they are the decision substrate.`,
@@ -624020,10 +624801,13 @@ ${stimulationProbe.context}`,
624020
624801
  ``,
624021
624802
  observationContext,
624022
624803
  ``,
624804
+ formatSystemObservations(sessionKey),
624805
+ ``,
624023
624806
  `Current Telegram message text (untrusted user data):
624024
624807
  ${this.quoteTelegramContextBlock(msg.text, 1200)}`
624025
624808
  ].filter(Boolean).join("\n");
624026
624809
  const diagnostics = {};
624810
+ const routerStartMs = Date.now();
624027
624811
  try {
624028
624812
  const result = await this.telegramRouterJsonCompletion(backend, {
624029
624813
  messages: [
@@ -624035,23 +624819,70 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
624035
624819
  ],
624036
624820
  tools: [],
624037
624821
  temperature: 0,
624038
- maxTokens: 1e3,
624822
+ // Router JSON schema has ~18 string-valued fields when reflection is
624823
+ // embedded (consolidated mode). 1000 tokens was the documented cause
624824
+ // of truncated JSON → repair → strict-retry cascade. 2400 is enough
624825
+ // for normal verbose values without slowing the call appreciably.
624826
+ maxTokens: 2400,
624039
624827
  timeoutMs: telegramRouterTimeoutMs(config.timeoutMs),
624040
624828
  think: false
624041
- }, diagnostics);
624829
+ }, diagnostics, "router", sessionKey);
624042
624830
  const text = result.choices[0]?.message?.content ?? "";
624831
+ const routerLatencyMs = Date.now() - routerStartMs;
624832
+ try {
624833
+ const pidReg = getPidRegistry();
624834
+ const modelKey = this.agentConfig?.model ?? "?";
624835
+ pidReg.sample(`tier1.${modelKey}`, routerLatencyMs);
624836
+ pidReg.sample(`tier2.${modelKey}`, routerLatencyMs);
624837
+ } catch {
624838
+ }
624839
+ try {
624840
+ const samples = this.telegramComponentSamplesForSession(sessionKey);
624841
+ if (samples.length > 0) {
624842
+ getComponentBenefitRegistry().recordOutcome(sessionKey, samples, text);
624843
+ }
624844
+ } catch {
624845
+ }
624043
624846
  const parsed = parseTelegramInteractionDecision(text, forcedRoute, {
624044
624847
  defaultShouldReply: false
624045
624848
  });
624046
624849
  if (parsed) {
624047
624850
  return this.applyTelegramSilentReflectionNotes(parsed, reflectionNotes);
624048
624851
  }
624852
+ if (isLikelyTruncatedRouterJson(text)) {
624853
+ if (diagnostics) diagnostics.repairStatus = "skipped-truncation-rerun";
624854
+ try {
624855
+ const reissued = await this.telegramRouterJsonCompletion(backend, {
624856
+ messages: [
624857
+ {
624858
+ role: "system",
624859
+ content: "You perform live Telegram route and stimulation inference. Output strict JSON only."
624860
+ },
624861
+ { role: "user", content: userPrompt }
624862
+ ],
624863
+ tools: [],
624864
+ temperature: 0,
624865
+ maxTokens: 4096,
624866
+ timeoutMs: telegramRouterTimeoutMs(config.timeoutMs),
624867
+ think: false
624868
+ }, diagnostics, "router", sessionKey);
624869
+ const reissuedText = reissued.choices[0]?.message?.content ?? "";
624870
+ const reparsed = parseTelegramInteractionDecision(reissuedText, forcedRoute, {
624871
+ defaultShouldReply: false
624872
+ });
624873
+ if (reparsed) {
624874
+ return this.applyTelegramSilentReflectionNotes(reparsed, reflectionNotes);
624875
+ }
624876
+ } catch {
624877
+ }
624878
+ }
624049
624879
  const repaired = await this.repairTelegramInteractionDecision(
624050
624880
  backend,
624051
624881
  text,
624052
624882
  forcedRoute,
624053
624883
  config.timeoutMs ?? 3e4,
624054
- diagnostics
624884
+ diagnostics,
624885
+ sessionKey
624055
624886
  );
624056
624887
  if (repaired) {
624057
624888
  return this.applyTelegramSilentReflectionNotes(repaired, reflectionNotes);
@@ -624062,7 +624893,8 @@ ${this.quoteTelegramContextBlock(msg.text, 1200)}`
624062
624893
  text,
624063
624894
  forcedRoute,
624064
624895
  config.timeoutMs ?? 3e4,
624065
- diagnostics
624896
+ diagnostics,
624897
+ sessionKey
624066
624898
  );
624067
624899
  if (strictRetry) {
624068
624900
  return this.applyTelegramSilentReflectionNotes(strictRetry, reflectionNotes);
@@ -628274,11 +629106,18 @@ ${text}`.trim());
628274
629106
  };
628275
629107
  const replyParameters = idx === 0 ? telegramReplyParameters(replyToMessageId) : void 0;
628276
629108
  if (replyParameters) body["reply_parameters"] = replyParameters;
629109
+ const sessionKeyForObs = String(chatId);
628277
629110
  try {
628278
629111
  const result = await this.apiCall("sendMessage", body);
628279
629112
  if (result.ok === false) throw new Error(String(result.description || "Telegram sendMessage failed"));
628280
629113
  this.state.messagesSent++;
628281
629114
  if (sentId === null) sentId = result.result?.message_id ?? null;
629115
+ getSoulObservationStream().emit({
629116
+ kind: "telegram.send.success",
629117
+ sessionKey: sessionKeyForObs,
629118
+ messageId: result.result?.message_id ?? void 0,
629119
+ ts: Date.now()
629120
+ });
628282
629121
  } catch {
628283
629122
  const plain = chunk.replace(/<[^>]+>/g, "");
628284
629123
  const fallbackBody = { chat_id: chatId, text: plain };
@@ -628288,8 +629127,32 @@ ${text}`.trim());
628288
629127
  if (result.ok === false) throw new Error(String(result.description || "Telegram sendMessage failed"));
628289
629128
  this.state.messagesSent++;
628290
629129
  if (sentId === null) sentId = result.result?.message_id ?? null;
629130
+ getSoulObservationStream().emit({
629131
+ kind: "telegram.send.success",
629132
+ sessionKey: sessionKeyForObs,
629133
+ messageId: result.result?.message_id ?? void 0,
629134
+ ts: Date.now()
629135
+ });
628291
629136
  } catch (err) {
628292
629137
  this.tuiWrite(() => renderWarning(`Failed to send Telegram message: ${err instanceof Error ? err.message : String(err)}`));
629138
+ const errStr = err instanceof Error ? err.message : String(err);
629139
+ const lc = errStr.toLowerCase();
629140
+ if (/(not enough rights|forbidden|chat_write_forbidden|user_banned|kicked|chat_admin_required)/.test(lc)) {
629141
+ getSoulObservationStream().emit({
629142
+ kind: "telegram.send.forbidden",
629143
+ sessionKey: sessionKeyForObs,
629144
+ reason: errStr,
629145
+ ts: Date.now()
629146
+ });
629147
+ } else if (/too many requests|retry after/.test(lc)) {
629148
+ const m2 = lc.match(/retry after (\d+)/);
629149
+ getSoulObservationStream().emit({
629150
+ kind: "telegram.send.rate_limited",
629151
+ sessionKey: sessionKeyForObs,
629152
+ retryAfterSec: m2 ? parseInt(m2[1], 10) : void 0,
629153
+ ts: Date.now()
629154
+ });
629155
+ }
628293
629156
  }
628294
629157
  }
628295
629158
  }
@@ -630726,7 +631589,7 @@ __export(voicechat_exports, {
630726
631589
  VoiceChatSession: () => VoiceChatSession
630727
631590
  });
630728
631591
  import { EventEmitter as EventEmitter13 } from "node:events";
630729
- function clamp0110(x) {
631592
+ function clamp0111(x) {
630730
631593
  return x < 0 ? 0 : x > 1 ? 1 : x;
630731
631594
  }
630732
631595
  function alnumRatio(s2) {
@@ -630765,9 +631628,9 @@ function computeSignalFromText(text, confidence2) {
630765
631628
  else score = 0.15;
630766
631629
  score -= repeatingCharPenalty(t2) * 0.4;
630767
631630
  if (typeof confidence2 === "number" && !Number.isNaN(confidence2)) {
630768
- score = 0.7 * score + 0.3 * clamp0110(confidence2);
631631
+ score = 0.7 * score + 0.3 * clamp0111(confidence2);
630769
631632
  }
630770
- return clamp0110(score);
631633
+ return clamp0111(score);
630771
631634
  }
630772
631635
  function truncateForLog(s2, n2) {
630773
631636
  return s2.length <= n2 ? s2 : s2.slice(0, n2 - 1) + "…";
@@ -631037,7 +631900,7 @@ Rules:
631037
631900
  }, MAX_SEGMENT_MS);
631038
631901
  }
631039
631902
  this.captureBuffer = text;
631040
- this.lastSignalScore = typeof snr === "number" && !Number.isNaN(snr) ? clamp0110(snr) : computeSignalFromText(text, confidence2);
631903
+ this.lastSignalScore = typeof snr === "number" && !Number.isNaN(snr) ? clamp0111(snr) : computeSignalFromText(text, confidence2);
631041
631904
  this.emit("snr", { score: this.lastSignalScore });
631042
631905
  this.onPartialTranscript(text);
631043
631906
  if (this.silenceTimer) clearTimeout(this.silenceTimer);