npm - @jsonstudio/llms - Versions diffs - 0.6.1172 → 0.6.1354 - Mend

@jsonstudio/llms 0.6.1172 → 0.6.1354

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (160) hide show

package/dist/router/virtual-router/engine.d.ts CHANGED Viewed

@@ -17,6 +17,7 @@ export declare class VirtualRouterEngine {
     private readonly contextAdvisor;
     private contextRouting;
     private routeStats;
+    private readonly aliasQueueStore;
     private readonly debug;
     private healthConfig;
     private readonly statsCenter;

package/dist/router/virtual-router/engine.js CHANGED Viewed

@@ -10,7 +10,7 @@ import { parseRoutingInstructions, applyRoutingInstructions, cleanMessagesFromRo
 import { loadRoutingInstructionStateSync, saveRoutingInstructionStateAsync, saveRoutingInstructionStateSync } from './sticky-session-store.js';
 import { buildHitReason, formatVirtualRouterHit } from './engine-logging.js';
 import { selectDirectProviderModel, selectFromStickyPool, selectProviderImpl } from './engine-selection.js';
-import { applyQuotaDepletedImpl, applyQuotaRecoveryImpl, applySeriesCooldownImpl, handleProviderFailureImpl, mapProviderErrorImpl } from './engine-health.js';
+import { applyQuotaDepletedImpl, applyQuotaRecoveryImpl, handleProviderFailureImpl, mapProviderErrorImpl } from './engine-health.js';
 import { mergeStopMessageFromPersisted } from './stop-message-state-sync.js';
 export class VirtualRouterEngine {
     routing = {};
@@ -22,6 +22,9 @@ export class VirtualRouterEngine {
     contextAdvisor = new ContextAdvisor();
     contextRouting;
     routeStats = new Map();
+    // Alias selection state (global within this VirtualRouterEngine instance).
+    // Used by alias-selection strategies to avoid rapid cross-alias switching.
+    aliasQueueStore = new Map();
     debug = console; // thin hook; host may monkey-patch for colored logging
     healthConfig = null;
     statsCenter = getStatsCenter();
@@ -161,7 +164,13 @@ export class VirtualRouterEngine {
                         : undefined;
                     return Boolean(entryText) && entryText === sessionText && entryMax === sessionMax;
                 });
-                if (allSame) {
+                const used = typeof sessionState.stopMessageUsed === 'number' && Number.isFinite(sessionState.stopMessageUsed)
+                    ? Math.max(0, Math.floor(sessionState.stopMessageUsed))
+                    : 0;
+                const hasLastUsedAt = typeof sessionState.stopMessageLastUsedAt === 'number' &&
+                    Number.isFinite(sessionState.stopMessageLastUsedAt);
+                const alreadyArmed = used === 0 && !hasLastUsedAt;
+                if (allSame && alreadyArmed) {
                     instructions = parsedInstructions.filter((entry) => entry.type !== 'stopMessageSet');
                 }
             }
@@ -213,9 +222,16 @@ export class VirtualRouterEngine {
                             typeof maxRepeats === 'number' &&
                             Math.floor(sessionState.stopMessageMaxRepeats) === Math.floor(maxRepeats);
                         const isSameInstruction = Boolean(text) && sameText && sameMax;
+                        const used = typeof sessionState.stopMessageUsed === 'number' && Number.isFinite(sessionState.stopMessageUsed)
+                            ? Math.max(0, Math.floor(sessionState.stopMessageUsed))
+                            : 0;
+                        const hasLastUsedAt = typeof sessionState.stopMessageLastUsedAt === 'number' &&
+                            Number.isFinite(sessionState.stopMessageLastUsedAt);
+                        const shouldRearm = !isSameInstruction || used > 0 || hasLastUsedAt;
                         nextSessionState.stopMessageText = text || undefined;
                         nextSessionState.stopMessageMaxRepeats = maxRepeats;
-                        if (!isSameInstruction) {
+                        nextSessionState.stopMessageSource = 'explicit';
+                        if (shouldRearm) {
                             nextSessionState.stopMessageUsed = 0;
                             nextSessionState.stopMessageUpdatedAt =
                                 typeof routingState.stopMessageUpdatedAt === 'number'
@@ -426,7 +442,26 @@ export class VirtualRouterEngine {
                 else if (routingState.preferTarget) {
                     // Auto-clear only when the target becomes invalid or blocked by explicit routing instructions.
                     // Do NOT clear for temporary unavailability (e.g. 429 cooldown, quota cooldown, transient health).
-                    const shouldAutoClear = candidateKeys.length === 0 || eligibleKeys.length > 0;
+                    const shouldAutoClear = (() => {
+                        if (candidateKeys.length === 0) {
+                            return true;
+                        }
+                        // Prefer selection failed despite eligible keys existing: treat as a hard block (e.g. routing rules).
+                        if (eligibleKeys.length > 0) {
+                            return true;
+                        }
+                        // If quota explicitly marks the preferred target as out-of-pool, clear the prefer instruction so
+                        // the router can fall back to other targets without repeatedly retrying an impossible preference.
+                        if (selectionDeps.quotaView) {
+                            for (const key of candidateKeys) {
+                                const entry = selectionDeps.quotaView(key);
+                                if (entry && entry.inPool === false) {
+                                    return true;
+                                }
+                            }
+                        }
+                        return false;
+                    })();
                     if (shouldAutoClear) {
                         routingState = {
                             ...routingState,
@@ -588,10 +623,18 @@ export class VirtualRouterEngine {
                 // ignore persistence errors
             }
         }
-        // 当 Host 注入 quotaView 时，VirtualRouter 的入池/优先级决策应以 quota 为准；
-        // 此时不再在 engine-health 内部进行 429/backoff/series cooldown 等健康决策，
-        // 以避免与 daemon/quota-center 的长期熔断策略重复维护并导致日志噪声。
+        // When Host injects quotaView, pool decisions primarily follow quota;
+        // however explicit host-provided health signals (quota recovery/depleted/series cooldown)
+        // must still be applied so retry selection can avoid obviously blocked runtimes.
         if (this.quotaView) {
+            const handledByQuota = applyQuotaRecoveryImpl(event, this.healthManager, (key) => this.clearProviderCooldown(key), this.debug);
+            if (handledByQuota) {
+                return;
+            }
+            const handledByQuotaDepleted = applyQuotaDepletedImpl(event, this.healthManager, (key, ttl) => this.markProviderCooldown(key, ttl), this.debug);
+            if (handledByQuotaDepleted) {
+                return;
+            }
             return;
         }
         // 配额恢复事件优先处理：一旦识别到 virtualRouterQuotaRecovery，
@@ -604,7 +647,6 @@ export class VirtualRouterEngine {
         if (handledByQuotaDepleted) {
             return;
         }
-        applySeriesCooldownImpl(event, this.providerRegistry, this.healthManager, (key, ttl) => this.markProviderCooldown(key, ttl), this.debug);
         const derived = mapProviderErrorImpl(event, this.providerHealthConfig());
         if (!derived) {
             return;
@@ -678,7 +720,8 @@ export class VirtualRouterEngine {
             loadBalancer: this.loadBalancer,
             isProviderCoolingDown: (key) => this.isProviderCoolingDown(key),
             resolveStickyKey: (m) => this.resolveStickyKey(m),
-            quotaView: this.quotaView
+            quotaView: this.quotaView,
+            aliasQueueStore: this.aliasQueueStore
         }, { routingState });
     }
     incrementRouteStat(routeName, providerKey) {
@@ -1015,7 +1058,9 @@ export class VirtualRouterEngine {
             contextAdvisor: this.contextAdvisor,
             loadBalancer: this.loadBalancer,
             isProviderCoolingDown: (key) => this.isProviderCoolingDown(key),
-            resolveStickyKey: (m) => this.resolveStickyKey(m)
+            resolveStickyKey: (m) => this.resolveStickyKey(m),
+            quotaView: this.quotaView,
+            aliasQueueStore: this.aliasQueueStore
         }, { routingState: state });
     }
     extractProviderId(providerKey) {

package/dist/router/virtual-router/routing-instructions.js CHANGED Viewed

@@ -504,10 +504,15 @@ export function applyRoutingInstructions(instructions, currentState) {
                     const sameMax = typeof newState.stopMessageMaxRepeats === 'number' &&
                         Math.floor(newState.stopMessageMaxRepeats) === maxRepeats;
                     const isSameInstruction = sameText && sameMax;
+                    const used = typeof newState.stopMessageUsed === 'number' && Number.isFinite(newState.stopMessageUsed)
+                        ? Math.max(0, Math.floor(newState.stopMessageUsed))
+                        : 0;
+                    const hasLastUsedAt = typeof newState.stopMessageLastUsedAt === 'number' && Number.isFinite(newState.stopMessageLastUsedAt);
+                    const shouldRearm = !isSameInstruction || used > 0 || hasLastUsedAt;
                     newState.stopMessageText = text;
                     newState.stopMessageMaxRepeats = maxRepeats;
                     newState.stopMessageSource = 'explicit';
-                    if (!isSameInstruction) {
+                    if (shouldRearm) {
                         newState.stopMessageUsed = 0;
                         newState.stopMessageUpdatedAt = Date.now();
                         newState.stopMessageLastUsedAt = undefined;

package/dist/router/virtual-router/stop-message-state-sync.d.ts CHANGED Viewed

@@ -10,6 +10,11 @@ type StopMessageSubset = Pick<RoutingInstructionState, 'stopMessageSource' | 'st
  * Strategy:
  * - If existing has a newer stopMessageUpdatedAt than persisted → keep existing config.
  * - Otherwise → adopt persisted fully.
+ *
+ * Note:
+ * - We intentionally do NOT merge counters from an older persisted config into a newer in-memory config.
+ *   A stopMessage "set" is expected to re-arm/reset counters; allowing older lastUsedAt to overwrite
+ *   would make re-arming flaky until the async persistence catches up.
  */
 export declare function mergeStopMessageFromPersisted(existing: StopMessageSubset, persisted: StopMessageSubset | null): StopMessageSubset;
 export {};

package/dist/router/virtual-router/stop-message-state-sync.js CHANGED Viewed

@@ -21,6 +21,11 @@ function lastUsedAtOf(state) {
  * Strategy:
  * - If existing has a newer stopMessageUpdatedAt than persisted → keep existing config.
  * - Otherwise → adopt persisted fully.
+ *
+ * Note:
+ * - We intentionally do NOT merge counters from an older persisted config into a newer in-memory config.
+ *   A stopMessage "set" is expected to re-arm/reset counters; allowing older lastUsedAt to overwrite
+ *   would make re-arming flaky until the async persistence catches up.
  */
 export function mergeStopMessageFromPersisted(existing, persisted) {
     if (!persisted) {
@@ -40,18 +45,5 @@ export function mergeStopMessageFromPersisted(existing, persisted) {
             stopMessageLastUsedAt: persisted.stopMessageLastUsedAt
         };
     }
-    // Keep existing config, but still allow persisted usage counters to move forward if they are newer.
-    const existingLastUsedAt = lastUsedAtOf(existing);
-    const persistedLastUsedAt = lastUsedAtOf(persisted);
-    const countersAreNewer = persistedLastUsedAt !== null &&
-        (existingLastUsedAt === null || persistedLastUsedAt > existingLastUsedAt);
-    return {
-        ...existing,
-        ...(countersAreNewer
-            ? {
-                stopMessageUsed: persisted.stopMessageUsed,
-                stopMessageLastUsedAt: persisted.stopMessageLastUsedAt
-            }
-            : {})
-    };
+    return { ...existing };
 }

package/dist/router/virtual-router/types.d.ts CHANGED Viewed

@@ -22,7 +22,7 @@ export interface RoutePoolTier {
      * Optional force flag for this route pool.
      * Currently interpreted for:
      * - routing.vision: force dedicated vision backend handling.
-     * - routing.web_search / routing.search: force server-side web_search flow.
+     * - routing.web_search: force server-side web_search flow.
      */
     force?: boolean;
 }
@@ -100,6 +100,15 @@ export interface VirtualRouterClassifierConfig {
 export interface LoadBalancingPolicy {
     strategy: 'round-robin' | 'weighted' | 'sticky';
     weights?: Record<string, number>;
+    /**
+     * Alias-level selection strategy (provider auth aliases).
+     *
+     * Use this when a provider exposes multiple auth aliases for the same model, and the upstream
+     * gateway behaves poorly when requests rapidly switch across keys (e.g. repeated 429 "no capacity"
+     * despite quota). Strategies are applied inside VirtualRouter selection only; providers remain
+     * transport-only.
+     */
+    aliasSelection?: AliasSelectionConfig;
     /**
      * AWRR: health-weighted selection.
      * - Deterministic (no randomness)
@@ -143,6 +152,21 @@ export interface HealthWeightedLoadBalancingConfig {
      */
     recoverToBestOnRetry?: boolean;
 }
+export type AliasSelectionStrategy = 'none' | 'sticky-queue';
+export interface AliasSelectionConfig {
+    /**
+     * Global on/off switch. When false, no alias-level selection is applied.
+     */
+    enabled?: boolean;
+    /**
+     * Default strategy used when a provider has no explicit override.
+     */
+    defaultStrategy?: AliasSelectionStrategy;
+    /**
+     * Per-provider overrides keyed by providerId (e.g. "antigravity").
+     */
+    providers?: Record<string, AliasSelectionStrategy>;
+}
 export interface ContextWeightedLoadBalancingConfig {
     /**
      * When false, context-weighted logic is disabled.

package/dist/servertool/clock/config.d.ts CHANGED Viewed

@@ -5,3 +5,11 @@ export declare const CLOCK_CONFIG_DEFAULTS: {
     readonly tickMs: 60000;
 };
 export declare function normalizeClockConfig(raw: unknown): ClockConfigSnapshot | null;
+/**
+ * Resolve the effective clock config for a request/session.
+ *
+ * - If a config object exists and enabled=true -> return normalized config.
+ * - If the config is explicitly present but disabled/invalid -> return null.
+ * - If the config is absent (undefined) -> return null (opt-in only).
+ */
+export declare function resolveClockConfig(raw: unknown): ClockConfigSnapshot | null;

package/dist/servertool/clock/config.js CHANGED Viewed

@@ -3,6 +3,11 @@ export const CLOCK_CONFIG_DEFAULTS = {
     dueWindowMs: 60_000,
     tickMs: 60_000
 };
+function isClockDisabledByEnv() {
+    const raw = process.env.ROUTECODEX_DISABLE_CLOCK ?? process.env.LLMSWITCH_DISABLE_CLOCK ?? '';
+    const v = String(raw).trim().toLowerCase();
+    return v === '1' || v === 'true' || v === 'yes' || v === 'on';
+}
 export function normalizeClockConfig(raw) {
     if (!raw || typeof raw !== 'object' || Array.isArray(raw)) {
         return null;
@@ -25,3 +30,20 @@ export function normalizeClockConfig(raw) {
         : CLOCK_CONFIG_DEFAULTS.tickMs;
     return { enabled: true, retentionMs, dueWindowMs, tickMs };
 }
+/**
+ * Resolve the effective clock config for a request/session.
+ *
+ * - If a config object exists and enabled=true -> return normalized config.
+ * - If the config is explicitly present but disabled/invalid -> return null.
+ * - If the config is absent (undefined) -> return null (opt-in only).
+ */
+export function resolveClockConfig(raw) {
+    if (isClockDisabledByEnv()) {
+        return null;
+    }
+    const normalized = normalizeClockConfig(raw);
+    if (normalized) {
+        return normalized;
+    }
+    return null;
+}

package/dist/servertool/clock/log.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+export declare const CLOCK_LOG_GOLD = "\u001B[38;5;220m";
+export declare const CLOCK_LOG_RESET = "\u001B[0m";
+export declare function logClock(message: string, extra?: Record<string, unknown>): void;

package/dist/servertool/clock/log.js ADDED Viewed

@@ -0,0 +1,13 @@
+export const CLOCK_LOG_GOLD = '\x1b[38;5;220m';
+export const CLOCK_LOG_RESET = '\x1b[0m';
+export function logClock(message, extra) {
+    try {
+        // eslint-disable-next-line no-console
+        console.log(`${CLOCK_LOG_GOLD}[servertool][clock] ${message}` +
+            (extra ? ` ${JSON.stringify(extra)}` : '') +
+            CLOCK_LOG_RESET);
+    }
+    catch {
+        // best-effort logging
+    }
+}

package/dist/servertool/clock/task-store.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 export type { ClockConfigSnapshot, ClockReservation, ClockScheduleItem, ClockSessionState, ClockTask } from './types.js';
-export { normalizeClockConfig } from './config.js';
+export { normalizeClockConfig, resolveClockConfig } from './config.js';
 export { startClockDaemonIfNeeded, stopClockDaemonForTests } from './daemon.js';
 export { loadClockSessionState, clearClockSession } from './session-store.js';
 export { cancelClockTask, clearClockTasks, commitClockReservation, findNextUndeliveredDueAtMs, listClockTasks, parseDueAtMs, reserveDueTasksForRequest, scheduleClockTasks, selectDueUndeliveredTasks } from './tasks.js';

package/dist/servertool/clock/task-store.js CHANGED Viewed

@@ -1,4 +1,4 @@
-export { normalizeClockConfig } from './config.js';
+export { normalizeClockConfig, resolveClockConfig } from './config.js';
 export { startClockDaemonIfNeeded, stopClockDaemonForTests } from './daemon.js';
 export { loadClockSessionState, clearClockSession } from './session-store.js';
 export { cancelClockTask, clearClockTasks, commitClockReservation, findNextUndeliveredDueAtMs, listClockTasks, parseDueAtMs, reserveDueTasksForRequest, scheduleClockTasks, selectDueUndeliveredTasks } from './tasks.js';

package/dist/servertool/clock/tasks.js CHANGED Viewed

@@ -15,7 +15,7 @@ function safeJson(value) {
 }
 function safeQuoted(text) {
     const normalized = String(text ?? '');
-    const escaped = normalized.replace(/\"/g, '\\"');
+    const escaped = normalized.replace(/"/g, '\\"');
     return `"${escaped}"`;
 }
 function buildTaskId() {

package/dist/servertool/engine.js CHANGED Viewed

@@ -1,11 +1,39 @@
 import { runServerSideToolEngine } from './server-side-tools.js';
 import { ProviderProtocolError } from '../conversion/shared/errors.js';
+import { ensureRuntimeMetadata, readRuntimeMetadata } from '../conversion/shared/runtime-metadata.js';
 import { createHash } from 'node:crypto';
 import { loadRoutingInstructionStateSync, saveRoutingInstructionStateSync } from '../router/virtual-router/sticky-session-store.js';
 import { deserializeRoutingInstructionState, serializeRoutingInstructionState } from '../router/virtual-router/routing-instructions.js';
 import { applyHubFollowupPolicyShadow } from './followup-shadow.js';
 import { buildServerToolFollowupChatPayloadFromInjection } from './handlers/followup-request-builder.js';
-import { findNextUndeliveredDueAtMs, listClockTasks, normalizeClockConfig } from './clock/task-store.js';
+import { findNextUndeliveredDueAtMs, listClockTasks, resolveClockConfig } from './clock/task-store.js';
+import { savePendingServerToolInjection } from './pending-session.js';
+function stripToolHistoryFromFollowupMessages(raw) {
+    const messages = Array.isArray(raw) ? raw : null;
+    if (!messages) {
+        return raw;
+    }
+    const out = [];
+    for (const msg of messages) {
+        if (!msg || typeof msg !== 'object' || Array.isArray(msg)) {
+            out.push(msg);
+            continue;
+        }
+        const record = msg;
+        const role = typeof record.role === 'string' ? record.role.trim().toLowerCase() : '';
+        // Drop tool-role messages entirely for a "no-tools" recovery followup.
+        if (role === 'tool') {
+            continue;
+        }
+        // Remove OpenAI tool call fields that could trigger Gemini strict validation.
+        const cloned = { ...record };
+        delete cloned.tool_calls;
+        delete cloned.tool_call_id;
+        delete cloned.name;
+        out.push(cloned);
+    }
+    return out;
+}
 function parseTimeoutMs(raw, fallback) {
     const n = typeof raw === 'string' ? Number(raw.trim()) : typeof raw === 'number' ? raw : NaN;
     if (!Number.isFinite(n) || n <= 0) {
@@ -207,14 +235,15 @@ async function shouldDisableServerToolTimeoutForClockHold(args) {
         return false;
     }
     const record = args.adapterContext;
-    const clockConfig = normalizeClockConfig(record.clock);
-    if (!clockConfig) {
-        return false;
-    }
+    const rt = readRuntimeMetadata(record);
     const sessionId = typeof record.sessionId === 'string' ? record.sessionId.trim() : '';
     if (!sessionId) {
         return false;
     }
+    const clockConfig = resolveClockConfig(rt?.clock);
+    if (!clockConfig) {
+        return false;
+    }
     // If already within due window, clock_auto won't need long hold.
     try {
         const tasks = await listClockTasks(sessionId, clockConfig);
@@ -282,6 +311,29 @@ export async function runServerToolOrchestration(options) {
     const flowId = engineResult.execution.flowId ?? 'unknown';
     const totalSteps = 5;
     logProgress(1, totalSteps, 'matched', { flowId });
+    // Mixed tools: persist servertool outputs for next request, but return remaining tool_calls to client.
+    if (engineResult.pendingInjection) {
+        const sessionId = engineResult.pendingInjection.sessionId;
+        if (sessionId && sessionId.trim()) {
+            try {
+                await savePendingServerToolInjection(sessionId.trim(), {
+                    createdAtMs: Date.now(),
+                    afterToolCallIds: engineResult.pendingInjection.afterToolCallIds,
+                    messages: engineResult.pendingInjection.messages,
+                    sourceRequestId: options.requestId
+                });
+            }
+            catch {
+                // best-effort: do not fail the response conversion just because persistence failed
+            }
+        }
+        logProgress(5, totalSteps, 'completed (mixed tools; no reenter)', { flowId });
+        return {
+            chat: engineResult.finalChatResponse,
+            executed: true,
+            flowId: engineResult.execution.flowId
+        };
+    }
     if (!engineResult.execution.followup || !options.reenterPipeline) {
         logProgress(5, totalSteps, 'completed (no followup)', { flowId });
         return {
@@ -328,7 +380,41 @@ export async function runServerToolOrchestration(options) {
         }
         return null;
     })();
-    if (!followupPayloadRaw) {
+    // Prevent nested followup execution on serverToolFollowup hops.
+    // Followup responses should still be eligible for servertool triggers (e.g. clock/web_search parsing),
+    // but they must not start a new followup flow inside an existing followup hop.
+    //
+    // Exception: allow continuing the same flow when serverToolLoopState.flowId matches.
+    const followupSeedPayload = (() => {
+        if (!followupPayloadRaw) {
+            return null;
+        }
+        try {
+            const rt = readRuntimeMetadata(options.adapterContext);
+            const followupFlagRaw = rt?.serverToolFollowup;
+            const isFollowup = followupFlagRaw === true ||
+                (typeof followupFlagRaw === 'string' && followupFlagRaw.trim().toLowerCase() === 'true');
+            if (!isFollowup) {
+                return followupPayloadRaw;
+            }
+            const loopState = rt?.serverToolLoopState;
+            const loopFlowId = loopState && typeof loopState === 'object' && !Array.isArray(loopState)
+                ? String(loopState.flowId || '').trim()
+                : '';
+            const flowId = typeof engineResult.execution?.flowId === 'string' && engineResult.execution.flowId.trim().length
+                ? engineResult.execution.flowId.trim()
+                : '';
+            if (loopFlowId && flowId && loopFlowId === flowId) {
+                return followupPayloadRaw;
+            }
+            return null;
+        }
+        catch {
+            // best-effort: if metadata is malformed, avoid nested followups
+            return null;
+        }
+    })();
+    if (!followupSeedPayload) {
         logProgress(5, totalSteps, 'completed (missing followup payload)', { flowId });
         return {
             chat: engineResult.finalChatResponse,
@@ -336,7 +422,7 @@ export async function runServerToolOrchestration(options) {
             flowId: engineResult.execution.flowId
         };
     }
-    const loopState = buildServerToolLoopState(options.adapterContext, engineResult.execution.flowId, followupPayloadRaw);
+    const loopState = buildServerToolLoopState(options.adapterContext, engineResult.execution.flowId, followupSeedPayload);
     if (applyAutoLimit && loopState && typeof loopState.repeatCount === 'number' && loopState.repeatCount >= 3) {
         logProgress(5, totalSteps, 'completed (auto limit hit)', { flowId });
         return {
@@ -354,11 +440,14 @@ export async function runServerToolOrchestration(options) {
         };
     }
     const metadata = {
-        serverToolFollowup: true,
         stream: false,
-        ...(loopState ? { serverToolLoopState: loopState } : {}),
         ...(engineResult.execution.followup.metadata ?? {})
     };
+    const rt = ensureRuntimeMetadata(metadata);
+    rt.serverToolFollowup = true;
+    if (loopState) {
+        rt.serverToolLoopState = loopState;
+    }
     // Followup re-enters HubPipeline at chat-process entry with a canonical "chat-like" body.
     // This avoids re-running per-protocol inbound parse/semantic-map for each client protocol.
     metadata.__hubEntry = 'chat_process';
@@ -366,18 +455,44 @@ export async function runServerToolOrchestration(options) {
     // - clear any inherited routeHint
     // - do not inherit sticky target
     // - record original entry endpoint for downstream formatting/debug
-    metadata.preserveRouteHint = preserveRouteHint;
+    rt.preserveRouteHint = preserveRouteHint;
     // Use empty string (falsy) to avoid VirtualRouter calling `.trim()` on non-string values.
     metadata.routeHint = '';
-    metadata.disableStickyRoutes = true;
-    metadata.serverToolOriginalEntryEndpoint =
+    rt.disableStickyRoutes = true;
+    rt.serverToolOriginalEntryEndpoint =
         (typeof options.entryEndpoint === 'string' && options.entryEndpoint.trim().length
             ? options.entryEndpoint
             : followupEntryEndpoint);
+    // For stateful auto-followups, keep the same providerKey/alias.
+    // Otherwise the followup requestId suffix could cause round-robin alias switching or
+    // route re-evaluation (e.g. "continue" prompt being treated as a new intent).
+    if (isStopMessageFlow || isGeminiEmptyReplyContinue) {
+        const providerKeyRaw = options.adapterContext.providerKey;
+        const providerKey = typeof providerKeyRaw === 'string' && providerKeyRaw.trim().length ? providerKeyRaw.trim() : '';
+        if (providerKey) {
+            metadata.__shadowCompareForcedProviderKey = providerKey;
+        }
+    }
     const retryEmptyFollowupOnce = isStopMessageFlow || isGeminiEmptyReplyContinue;
     const maxAttempts = retryEmptyFollowupOnce ? 2 : 1;
     const followupRequestId = buildFollowupRequestId(options.requestId, engineResult.execution.followup.requestIdSuffix);
-    let followupPayload = coerceFollowupPayloadStream(followupPayloadRaw, metadata.stream === true);
+    let followupPayload = coerceFollowupPayloadStream(followupSeedPayload, metadata.stream === true);
+    if (isGeminiEmptyReplyContinue) {
+        // For gemini_empty_reply_continue, the goal is to recover text output from an empty/malformed reply.
+        // Force the followup to be non-tool-calling to avoid repeated MALFORMED_FUNCTION_CALL loops.
+        const paramsRaw = followupPayload.parameters;
+        const params = paramsRaw && typeof paramsRaw === 'object' && !Array.isArray(paramsRaw) ? { ...paramsRaw } : {};
+        params.tool_choice = 'none';
+        params.parallel_tool_calls = false;
+        // Ensure we don't override the tool_choice->toolConfig mapping with an inherited tool_config.
+        delete params.tool_config;
+        delete params.toolConfig;
+        followupPayload.parameters = params;
+        // Additionally, strip tool-call history. Gemini/CloudCode can strict-validate
+        // (history tool calls) ↔ (current tool declarations). We keep tools declared (so the
+        // session can continue), but remove history tool artifacts to avoid malformed loops.
+        followupPayload.messages = stripToolHistoryFromFollowupMessages(followupPayload.messages);
+    }
     followupPayload = applyHubFollowupPolicyShadow({
         requestId: followupRequestId,
         entryEndpoint: followupEntryEndpoint,
@@ -499,7 +614,8 @@ function reserveStopMessageUsage(adapterContext) {
     }
     let state = loadRoutingInstructionStateSync(stickyKey);
     if (!state || !state.stopMessageText || !state.stopMessageMaxRepeats) {
-        const fallback = resolveStopMessageSnapshot(adapterContext.stopMessageState);
+        const rt = readRuntimeMetadata(adapterContext);
+        const fallback = resolveStopMessageSnapshot(rt?.stopMessageState);
         if (!fallback) {
             return null;
         }
@@ -518,13 +634,18 @@ function reserveStopMessageUsage(adapterContext) {
         : 0;
     const nextUsed = used + 1;
     state.stopMessageUsed = nextUsed;
-    state.stopMessageLastUsedAt = Date.now();
+    const now = Date.now();
+    state.stopMessageLastUsedAt = now;
     if (nextUsed >= maxRepeats) {
+        // Auto-clear after reaching max repeats. This avoids leaving an "exhausted" stopMessage
+        // stuck in sticky state and ensures a fresh `<**stopMessage:...**>` can re-arm cleanly.
         state.stopMessageText = undefined;
         state.stopMessageMaxRepeats = undefined;
         state.stopMessageUsed = undefined;
-        state.stopMessageUpdatedAt = undefined;
-        state.stopMessageLastUsedAt = undefined;
+        state.stopMessageSource = undefined;
+        // Keep monotonic timestamps as a tombstone to prevent accidental re-application from replayed history.
+        state.stopMessageUpdatedAt = now;
+        state.stopMessageLastUsedAt = now;
     }
     saveRoutingInstructionStateSync(stickyKey, state);
     return { stickyKey, previousState };
@@ -544,11 +665,13 @@ function disableStopMessageAfterFailedFollowup(adapterContext, reservation) {
         if (!state) {
             return;
         }
+        const now = Date.now();
         state.stopMessageText = undefined;
         state.stopMessageMaxRepeats = undefined;
         state.stopMessageUsed = undefined;
-        state.stopMessageUpdatedAt = undefined;
-        state.stopMessageLastUsedAt = undefined;
+        state.stopMessageSource = undefined;
+        state.stopMessageUpdatedAt = now;
+        state.stopMessageLastUsedAt = now;
         saveRoutingInstructionStateSync(key, state);
     }
     catch {
@@ -715,7 +838,8 @@ function readServerToolLoopState(adapterContext) {
     if (!adapterContext || typeof adapterContext !== 'object') {
         return null;
     }
-    const raw = adapterContext.serverToolLoopState;
+    const rt = readRuntimeMetadata(adapterContext);
+    const raw = rt?.serverToolLoopState;
     if (!raw || typeof raw !== 'object' || Array.isArray(raw)) {
         return null;
     }
@@ -774,7 +898,8 @@ function getStopMessageSource(adapterContext) {
     if (!adapterContext || typeof adapterContext !== 'object') {
         return undefined;
     }
-    const raw = adapterContext.stopMessageState;
+    const rt = readRuntimeMetadata(adapterContext);
+    const raw = rt?.stopMessageState;
     if (!raw || typeof raw !== 'object' || Array.isArray(raw)) {
         return undefined;
     }

package/dist/servertool/handlers/clock-auto.js CHANGED Viewed

@@ -1,6 +1,8 @@
 import { registerServerToolHandler } from '../registry.js';
 import { extractCapturedChatSeed } from './followup-request-builder.js';
-import { findNextUndeliveredDueAtMs, listClockTasks, normalizeClockConfig, startClockDaemonIfNeeded } from '../clock/task-store.js';
+import { readRuntimeMetadata } from '../../conversion/shared/runtime-metadata.js';
+import { findNextUndeliveredDueAtMs, listClockTasks, resolveClockConfig, startClockDaemonIfNeeded } from '../clock/task-store.js';
+import { logClock } from '../clock/log.js';
 const FLOW_ID = 'clock_hold_flow';
 function resolveClientConnectionState(value) {
     if (!value || typeof value !== 'object' || Array.isArray(value)) {
@@ -96,15 +98,17 @@ const handler = async (ctx) => {
         (typeof clientDisconnectedRaw === 'string' && clientDisconnectedRaw.trim().toLowerCase() === 'true')) {
         return null;
     }
-    const clockConfig = normalizeClockConfig(record.clock);
-    if (!clockConfig) {
-        return null;
-    }
-    await startClockDaemonIfNeeded(clockConfig);
+    const rt = readRuntimeMetadata(ctx.adapterContext);
     const sessionId = resolveSessionId(ctx.adapterContext);
     if (!sessionId) {
         return null;
     }
+    // Default-enable clock when config is absent, but keep "explicitly disabled" honored.
+    const clockConfig = resolveClockConfig(rt?.clock);
+    if (!clockConfig) {
+        return null;
+    }
+    await startClockDaemonIfNeeded(clockConfig);
     const seed = extractCapturedChatSeed(record.capturedChatRequest);
     if (!seed) {
         return null;
@@ -117,6 +121,7 @@ const handler = async (ctx) => {
     }
     // Wait until the "due window" is reached (now >= dueAt - dueWindowMs).
     const thresholdMs = nextDueAtMs - clockConfig.dueWindowMs;
+    logClock('hold_start', { sessionId, nextDueAtMs, thresholdMs });
     while (Date.now() < thresholdMs) {
         const state = resolveClientConnectionState(ctx.adapterContext.clientConnectionState);
         if (state?.disconnected === true) {