npm - @jsonstudio/rcc - Versions diffs - 0.90.814 → 0.90.872 - Mend

@jsonstudio/rcc 0.90.814 → 0.90.872

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (217) hide show

package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-pipeline-execute-chat-process-entry.js CHANGED Viewed

@@ -1,4 +1,5 @@
 import { shouldRecordSnapshots } from "../../snapshot-utils.js";
+import { ensureRuntimeMetadata } from "../../runtime-metadata.js";
 import { REQUEST_STAGE_HOOKS } from "./hub-pipeline-stage-hooks.js";
 import { buildReqInboundSkippedNodeWithNative, coerceStandardizedRequestFromPayloadWithNative, findMappableSemanticsKeysWithNative, liftResponsesResumeIntoSemanticsWithNative, prepareRuntimeMetadataForServertoolsWithNative, syncResponsesContextFromCanonicalMessagesWithNative, } from "../../../router/virtual-router/engine-selection/native-hub-pipeline-orchestration-semantics.js";
 import { runReqProcessStage1ToolGovernance } from "./stages/req_process/req_process_stage1_tool_governance/index.js";
@@ -7,6 +8,7 @@ import { deriveWorkingRequestFlags, estimateInputTokensForWorkingRequest, propag
 import { annotatePassthroughAuditSkipped, appendPassthroughGovernanceSkippedNode, appendToolGovernanceNodeResult, propagateClockReservationToMetadata, } from "./hub-pipeline-chat-process-governance-utils.js";
 import { createSnapshotRecorder } from "../snapshot-recorder.js";
 import { executeRouteAndBuildOutbound } from "./hub-pipeline-route-and-outbound.js";
+import { peekHubStageTopSummary } from "./hub-stage-timing.js";
 export async function executeChatProcessEntryPipeline(args) {
     const { normalized, routerEngine, config } = args;
     const hooks = REQUEST_STAGE_HOOKS[normalized.providerProtocol];
@@ -119,7 +121,6 @@ export async function executeChatProcessEntryPipeline(args) {
         normalizedMetadata: normalized.metadata ??
             (normalized.metadata = {}),
     });
-    const normalizedMeta = normalized.metadata;
     // responsesResume is a client-protocol semantic (/v1/responses tool loop) and must live in chat.semantics.
     // Do not read it from metadata once entering chat_process.
     const { responsesResume, hasImageAttachment, serverToolRequired } = deriveWorkingRequestFlags(workingRequest);
@@ -145,6 +146,11 @@ export async function executeChatProcessEntryPipeline(args) {
             enabled: false,
         },
     });
+    const hubStageTop = peekHubStageTopSummary(normalized.id);
+    if (hubStageTop.length) {
+        const rt = ensureRuntimeMetadata(outbound.metadata);
+        rt.hubStageTop = hubStageTop;
+    }
     return {
         requestId: normalized.id,
         providerPayload: outbound.providerPayload,

package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-pipeline-execute-request-stage.js CHANGED Viewed

@@ -1,5 +1,7 @@
 import { executeRequestStageInbound, } from "./hub-pipeline-execute-request-stage-inbound.js";
 import { executeRouteAndBuildOutbound, } from "./hub-pipeline-route-and-outbound.js";
+import { ensureRuntimeMetadata } from "../../runtime-metadata.js";
+import { peekHubStageTopSummary } from "./hub-stage-timing.js";
 export async function executeRequestStagePipeline(args) {
     const { normalized, hooks, routerEngine, config } = args;
     const inbound = await executeRequestStageInbound({
@@ -30,6 +32,11 @@ export async function executeRequestStagePipeline(args) {
             requestId: normalized.id,
         },
     });
+    const hubStageTop = peekHubStageTopSummary(normalized.id);
+    if (hubStageTop.length) {
+        const rt = ensureRuntimeMetadata(outbound.metadata);
+        rt.hubStageTop = hubStageTop;
+    }
     return {
         requestId: normalized.id,
         providerPayload: outbound.providerPayload,

package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-pipeline-heavy-input-fastpath.d.ts ADDED Viewed

@@ -0,0 +1,24 @@
+import type { ProcessedRequest, StandardizedRequest } from "../types/standardized.js";
+export declare function isHeavyInputFastpathEnabled(): boolean;
+export declare function shouldUseHeavyInputFastpath(metadata?: Record<string, unknown>): {
+    enabled: boolean;
+    hit: boolean;
+    threshold: number;
+    estimatedInputTokens?: number;
+};
+export declare function markHeavyInputFastpath(options: {
+    metadata?: Record<string, unknown>;
+    estimatedInputTokens?: number;
+    reason: "rough_estimate" | "full_estimate" | "metadata_threshold";
+}): void;
+export declare function buildCapturedChatRequestInput(args: {
+    workingRequest: StandardizedRequest | ProcessedRequest;
+    normalizedMetadata?: Record<string, unknown>;
+}): {
+    model?: unknown;
+    messages?: unknown;
+    tools?: unknown;
+    parameters?: unknown;
+};
+export declare function roughEstimateInputTokensFromRequest(request: StandardizedRequest | ProcessedRequest): number;
+export declare function resolveHeavyInputTokenThreshold(): number;

package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-pipeline-heavy-input-fastpath.js ADDED Viewed

@@ -0,0 +1,203 @@
+import { ensureRuntimeMetadata, readRuntimeMetadata } from "../../runtime-metadata.js";
+const TRUTHY = new Set(["1", "true", "yes", "on"]);
+const FALSY = new Set(["0", "false", "no", "off"]);
+const DEFAULT_INPUT_TOKEN_THRESHOLD = 120_000;
+function readBooleanEnv(names, fallback) {
+    for (const name of names) {
+        const raw = process.env[name];
+        if (raw === undefined) {
+            continue;
+        }
+        const normalized = String(raw).trim().toLowerCase();
+        if (TRUTHY.has(normalized)) {
+            return true;
+        }
+        if (FALSY.has(normalized)) {
+            return false;
+        }
+    }
+    return fallback;
+}
+function readPositiveIntEnv(names, fallback) {
+    for (const name of names) {
+        const raw = process.env[name];
+        if (raw === undefined) {
+            continue;
+        }
+        const parsed = Number.parseInt(String(raw).trim(), 10);
+        if (Number.isFinite(parsed) && parsed > 0) {
+            return parsed;
+        }
+    }
+    return fallback;
+}
+function getConfig() {
+    return {
+        enabled: readBooleanEnv([
+            "ROUTECODEX_HUB_FASTPATH_HEAVY_INPUT",
+            "RCC_HUB_FASTPATH_HEAVY_INPUT",
+        ], true),
+        inputTokenThreshold: readPositiveIntEnv([
+            "ROUTECODEX_HUB_FASTPATH_INPUT_TOKEN_THRESHOLD",
+            "RCC_HUB_FASTPATH_INPUT_TOKEN_THRESHOLD",
+        ], DEFAULT_INPUT_TOKEN_THRESHOLD),
+    };
+}
+export function isHeavyInputFastpathEnabled() {
+    return getConfig().enabled;
+}
+function readEstimatedInputTokens(metadata) {
+    if (!metadata || typeof metadata !== "object") {
+        return undefined;
+    }
+    const candidate = typeof metadata.estimatedInputTokens === "number" &&
+        Number.isFinite(metadata.estimatedInputTokens)
+        ? Math.max(0, Math.floor(metadata.estimatedInputTokens))
+        : undefined;
+    if (candidate && candidate > 0) {
+        return candidate;
+    }
+    const rt = readRuntimeMetadata(metadata);
+    const rtCandidate = typeof rt
+        ?.hubFastpathEstimatedInputTokens === "number" &&
+        Number.isFinite(rt.hubFastpathEstimatedInputTokens)
+        ? Math.max(0, Math.floor(rt.hubFastpathEstimatedInputTokens))
+        : undefined;
+    return rtCandidate && rtCandidate > 0 ? rtCandidate : undefined;
+}
+export function shouldUseHeavyInputFastpath(metadata) {
+    const config = getConfig();
+    const estimatedInputTokens = readEstimatedInputTokens(metadata);
+    const rt = metadata ? readRuntimeMetadata(metadata) : undefined;
+    const runtimeForced = rt &&
+        typeof rt.hubFastpathHeavyInput ===
+            "boolean" &&
+        rt.hubFastpathHeavyInput === true;
+    const hit = config.enabled &&
+        (runtimeForced ||
+            (typeof estimatedInputTokens === "number" &&
+                estimatedInputTokens >= config.inputTokenThreshold));
+    return {
+        enabled: config.enabled,
+        hit,
+        threshold: config.inputTokenThreshold,
+        ...(typeof estimatedInputTokens === "number"
+            ? { estimatedInputTokens }
+            : {}),
+    };
+}
+export function markHeavyInputFastpath(options) {
+    const { metadata, estimatedInputTokens, reason } = options;
+    if (!metadata || typeof metadata !== "object") {
+        return;
+    }
+    const config = getConfig();
+    if (!config.enabled) {
+        return;
+    }
+    const rt = ensureRuntimeMetadata(metadata);
+    rt.hubFastpathHeavyInput = true;
+    rt.hubFastpathReason = reason;
+    rt.hubFastpathInputTokenThreshold =
+        config.inputTokenThreshold;
+    if (typeof estimatedInputTokens === "number" &&
+        Number.isFinite(estimatedInputTokens) &&
+        estimatedInputTokens > 0) {
+        const rounded = Math.max(1, Math.floor(estimatedInputTokens));
+        metadata.estimatedInputTokens = rounded;
+        rt.hubFastpathEstimatedInputTokens = rounded;
+    }
+}
+export function buildCapturedChatRequestInput(args) {
+    const { workingRequest, normalizedMetadata } = args;
+    const fastpath = shouldUseHeavyInputFastpath(normalizedMetadata);
+    if (fastpath.hit) {
+        markHeavyInputFastpath({
+            metadata: normalizedMetadata,
+            estimatedInputTokens: fastpath.estimatedInputTokens,
+            reason: "metadata_threshold",
+        });
+    }
+    // Hard rule: captured request must preserve full semantic payload.
+    return {
+        model: workingRequest.model,
+        messages: workingRequest.messages,
+        tools: workingRequest.tools,
+        parameters: workingRequest.parameters,
+    };
+}
+function estimateContentChars(content, cap) {
+    if (cap <= 0 || content === undefined || content === null) {
+        return 0;
+    }
+    if (typeof content === "string") {
+        return Math.min(content.length, cap);
+    }
+    if (Array.isArray(content)) {
+        let used = 0;
+        for (const part of content) {
+            if (used >= cap) {
+                break;
+            }
+            if (typeof part === "string") {
+                used += Math.min(part.length, cap - used);
+                continue;
+            }
+            if (!part || typeof part !== "object") {
+                continue;
+            }
+            const record = part;
+            if (typeof record.text === "string") {
+                used += Math.min(record.text.length, cap - used);
+            }
+            else if (typeof record.input_text === "string") {
+                used += Math.min(record.input_text.length, cap - used);
+            }
+            else if (typeof record.output_text === "string") {
+                used += Math.min(record.output_text.length, cap - used);
+            }
+            else {
+                used += Math.min(64, cap - used);
+            }
+        }
+        return used;
+    }
+    return Math.min(64, cap);
+}
+export function roughEstimateInputTokensFromRequest(request) {
+    const config = getConfig();
+    let chars = 0;
+    const charCap = Math.max(config.inputTokenThreshold * 8, 16_384);
+    const messages = Array.isArray(request.messages) ? request.messages : [];
+    for (const message of messages) {
+        if (chars >= charCap) {
+            break;
+        }
+        if (!message || typeof message !== "object") {
+            chars += 16;
+            continue;
+        }
+        const record = message;
+        if (typeof record.role === "string") {
+            chars += Math.min(record.role.length, charCap - chars);
+        }
+        if (typeof record.name === "string") {
+            chars += Math.min(record.name.length, Math.max(0, charCap - chars));
+        }
+        if (typeof record.tool_call_id === "string") {
+            chars += Math.min(record.tool_call_id.length, Math.max(0, charCap - chars));
+        }
+        chars += estimateContentChars(record.content, Math.max(0, charCap - chars));
+        if (Array.isArray(record.tool_calls)) {
+            chars += Math.min(record.tool_calls.length * 128, Math.max(0, charCap - chars));
+        }
+    }
+    if (Array.isArray(request.tools)) {
+        chars += request.tools.length * 256;
+    }
+    const estimated = Math.max(Math.ceil(chars / 3.5), messages.length * 8 + (Array.isArray(request.tools) ? request.tools.length * 32 : 0));
+    return Math.max(1, Math.floor(estimated));
+}
+export function resolveHeavyInputTokenThreshold() {
+    return getConfig().inputTokenThreshold;
+}

package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-pipeline-route-and-outbound.js CHANGED Viewed

@@ -1,4 +1,3 @@
-import { jsonClone } from "../types/json.js";
 import { runReqProcessStage2RouteSelect } from "./stages/req_process/req_process_stage2_route_select/index.js";
 import { buildAdapterContextFromNormalized } from "./hub-pipeline-adapter-context.js";
 import { extractSessionIdentifiersFromMetadata } from "./session-identifiers.js";
@@ -8,6 +7,7 @@ import { logHubStageTiming } from "./hub-stage-timing.js";
 import { shouldRecordSnapshots } from "../../snapshot-utils.js";
 import { createSnapshotRecorder } from "../snapshot-recorder.js";
 import { applyOutboundStreamPreferenceWithNative, applyHasImageAttachmentFlagWithNative, buildCapturedChatRequestSnapshotWithNative, buildHubPipelineResultMetadataWithNative, buildReqOutboundNodeResultWithNative, buildRouterMetadataInputWithNative, resolveOutboundStreamIntentWithNative, syncSessionIdentifiersToMetadataWithNative, } from "../../../router/virtual-router/engine-selection/native-hub-pipeline-orchestration-semantics.js";
+import { buildCapturedChatRequestInput } from "./hub-pipeline-heavy-input-fastpath.js";
 export async function executeRouteAndBuildOutbound(args) {
     const { normalized, hooks, routerEngine, config, nodeResults, inboundRecorder, activeProcessMode, responsesResume, serverToolRequired, hasImageAttachment, passthroughAudit, rawRequest, contextSnapshot, semanticMapper, effectivePolicy, shadowCompareBaselineMode, routeSelectTiming, } = args;
     let { workingRequest } = args;
@@ -16,6 +16,11 @@ export async function executeRouteAndBuildOutbound(args) {
     // 便于后续 AdapterContext（响应侧 servertool）也能访问到相同的 sessionId /
     // conversationId，用于 sticky-session 相关逻辑（例如 stopMessage）。
     const normalizedMetadata = normalized.metadata;
+    const routeRuntimeDirectives = normalizedMetadata &&
+        typeof normalizedMetadata.__rt === "object" &&
+        !Array.isArray(normalizedMetadata.__rt)
+        ? normalizedMetadata.__rt
+        : undefined;
     if (normalizedMetadata && typeof normalizedMetadata === "object") {
         const next = syncSessionIdentifiersToMetadataWithNative({
             metadata: normalizedMetadata,
@@ -43,6 +48,11 @@ export async function executeRouteAndBuildOutbound(args) {
         conversationId: sessionIdentifiers.conversationId,
         metadata: normalizedMetadata,
     });
+    if (routeRuntimeDirectives) {
+        metadataInput.__rt = {
+            ...routeRuntimeDirectives,
+        };
+    }
     if (routeSelectTiming?.enabled) {
         logHubStageTiming(routeSelectTiming.requestId ?? normalized.id, "req_process.stage2_route_select", "start");
     }
@@ -60,7 +70,8 @@ export async function executeRouteAndBuildOutbound(args) {
     try {
         const logger = (normalized.metadata &&
             normalized.metadata.logger);
-        if (logger &&
+        if (routeRuntimeDirectives?.disableVirtualRouterHitLog !== true &&
+            logger &&
             typeof logger.logVirtualRouterHit === "function" &&
             routing.decision?.routeName &&
             routing.target?.providerKey) {
@@ -141,16 +152,10 @@ export async function executeRouteAndBuildOutbound(args) {
     // 注意：这里不再根据 processMode(passthrough/chat) 做分支判断——即使某些
     // route 将 processMode 标记为 passthrough，我们仍然需要保留一次规范化后的
     // Chat 请求快照，供 stopMessage 等被动触发型 servertool 在响应阶段使用。
-    const capturedChatRequest = buildCapturedChatRequestSnapshotWithNative({
-        model: workingRequest.model,
-        messages: jsonClone(workingRequest.messages),
-        tools: workingRequest.tools
-            ? jsonClone(workingRequest.tools)
-            : workingRequest.tools,
-        parameters: workingRequest.parameters
-            ? jsonClone(workingRequest.parameters)
-            : workingRequest.parameters,
-    });
+    const capturedChatRequest = buildCapturedChatRequestSnapshotWithNative(buildCapturedChatRequestInput({
+        workingRequest,
+        normalizedMetadata: normalized.metadata,
+    }));
     const metadata = buildHubPipelineResultMetadataWithNative({
         normalized: {
             metadata: normalized.metadata,

package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-stage-timing.d.ts CHANGED Viewed

@@ -1,5 +1,16 @@
 export declare function isHubStageTimingDetailEnabled(): boolean;
 export declare function clearHubStageTiming(requestId: string | undefined | null): void;
+export type HubStageTopSummaryEntry = {
+    stage: string;
+    totalMs: number;
+    count: number;
+    avgMs: number;
+    maxMs: number;
+};
+export declare function peekHubStageTopSummary(requestId: string | undefined | null, options?: {
+    topN?: number;
+    minMs?: number;
+}): HubStageTopSummaryEntry[];
 export declare function logHubStageTiming(requestId: string, stage: string, phase: 'start' | 'completed' | 'error', details?: Record<string, unknown>): void;
 export declare function measureHubStage<T>(requestId: string, stage: string, fn: () => Promise<T> | T, options?: {
     startDetails?: Record<string, unknown>;

package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-stage-timing.js CHANGED Viewed

@@ -1,9 +1,13 @@
 const truthy = new Set(['1', 'true', 'yes', 'on']);
 const falsy = new Set(['0', 'false', 'no', 'off']);
+// Native alignment note: timing integrates with *WithNative stage orchestration flow.
 const REQUEST_TIMELINES = new Map();
 const REQUEST_TIMELINE_TTL_MS = 30 * 60 * 1000;
 const REQUEST_TIMELINE_MAX = 4096;
 const DEFAULT_HUB_STAGE_LOG_MIN_MS = 50;
+const DEFAULT_HUB_STAGE_TOP_N = 5;
+const DEFAULT_HUB_STAGE_TOP_MIN_MS = 5;
+const REQUEST_STAGE_BREAKDOWNS = new Map();
 function resolveBool(raw, fallback) {
     if (raw === undefined) {
         return fallback;
@@ -62,6 +66,7 @@ function prune(nowMs) {
     for (const [key, timeline] of REQUEST_TIMELINES.entries()) {
         if (nowMs - timeline.lastAtMs >= REQUEST_TIMELINE_TTL_MS) {
             REQUEST_TIMELINES.delete(key);
+            REQUEST_STAGE_BREAKDOWNS.delete(key);
         }
     }
     while (REQUEST_TIMELINES.size > REQUEST_TIMELINE_MAX) {
@@ -70,6 +75,7 @@ function prune(nowMs) {
             break;
         }
         REQUEST_TIMELINES.delete(oldestKey);
+        REQUEST_STAGE_BREAKDOWNS.delete(oldestKey);
     }
 }
 function touchTiming(requestId) {
@@ -125,8 +131,82 @@ export function clearHubStageTiming(requestId) {
         return;
     }
     REQUEST_TIMELINES.delete(requestId);
+    REQUEST_STAGE_BREAKDOWNS.delete(requestId);
+}
+function recordHubStageElapsed(requestId, stage, elapsedMs) {
+    if (!requestId || !stage || !Number.isFinite(elapsedMs) || elapsedMs < 0) {
+        return;
+    }
+    const nowMs = Date.now();
+    prune(nowMs);
+    const byStage = REQUEST_STAGE_BREAKDOWNS.get(requestId) ?? new Map();
+    if (!REQUEST_STAGE_BREAKDOWNS.has(requestId)) {
+        REQUEST_STAGE_BREAKDOWNS.set(requestId, byStage);
+    }
+    const existing = byStage.get(stage);
+    if (!existing) {
+        byStage.set(stage, {
+            totalMs: elapsedMs,
+            count: 1,
+            maxMs: elapsedMs
+        });
+        return;
+    }
+    existing.totalMs += elapsedMs;
+    existing.count += 1;
+    existing.maxMs = Math.max(existing.maxMs, elapsedMs);
+}
+function readIntEnv(name, fallback) {
+    const raw = process.env[name];
+    const parsed = Number.parseInt(String(raw ?? '').trim(), 10);
+    if (Number.isFinite(parsed) && parsed > 0) {
+        return parsed;
+    }
+    return fallback;
+}
+export function peekHubStageTopSummary(requestId, options) {
+    if (!requestId) {
+        return [];
+    }
+    const byStage = REQUEST_STAGE_BREAKDOWNS.get(requestId);
+    if (!byStage || !byStage.size) {
+        return [];
+    }
+    const topN = Math.max(1, options?.topN ?? readIntEnv('ROUTECODEX_HUB_STAGE_TOP_N', DEFAULT_HUB_STAGE_TOP_N));
+    const minMs = Math.max(0, options?.minMs ?? readIntEnv('ROUTECODEX_HUB_STAGE_TOP_MIN_MS', DEFAULT_HUB_STAGE_TOP_MIN_MS));
+    return Array.from(byStage.entries())
+        .map(([stage, stats]) => {
+        const totalMs = Math.max(0, Math.round(stats.totalMs));
+        const count = Math.max(0, Math.floor(stats.count));
+        const maxMs = Math.max(0, Math.round(stats.maxMs));
+        const avgMs = count > 0 ? Math.max(0, Math.round(totalMs / count)) : 0;
+        return {
+            stage,
+            totalMs,
+            count,
+            avgMs,
+            maxMs
+        };
+    })
+        .filter((entry) => entry.totalMs >= minMs)
+        .sort((a, b) => b.totalMs - a.totalMs)
+        .slice(0, topN);
 }
 export function logHubStageTiming(requestId, stage, phase, details) {
+    const stageElapsedMs = phase === 'completed' || phase === 'error'
+        ? (typeof details?.elapsedMs === 'number'
+            ? details.elapsedMs
+            : typeof details?.nativeMs === 'number'
+                ? details.nativeMs
+                : undefined)
+        : undefined;
+    if (requestId &&
+        stage &&
+        typeof stageElapsedMs === 'number' &&
+        Number.isFinite(stageElapsedMs) &&
+        stageElapsedMs >= 0) {
+        recordHubStageElapsed(requestId, stage, stageElapsedMs);
+    }
     if (!isHubStageTimingEnabled() || !requestId || !stage) {
         return;
     }
@@ -200,10 +280,11 @@ export async function measureHubStage(requestId, stage, fn, options) {
         return value;
     }
     catch (error) {
+        const elapsedMs = Math.max(0, Date.now() - startedAt);
         const mapped = options?.mapErrorDetails?.(error);
         const message = error instanceof Error ? error.message : String(error ?? 'unknown');
         logHubStageTiming(requestId, stage, 'error', mapped ?? {
-            elapsedMs: Math.max(0, Date.now() - startedAt),
+            elapsedMs,
             message
         });
         throw error;

package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage2_semantic_map/index.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { isJsonObject, jsonClone, } from "../../../../types/json.js";
+import { isJsonObject, } from "../../../../types/json.js";
 import { applyHubOperationTableInbound } from "../../../../operation-table/operation-table-runner.js";
 import { recordStage } from "../../../stages/utils.js";
 import { liftReqInboundSemantics } from "./semantic-lift.js";
@@ -7,6 +7,28 @@ import { chatEnvelopeToStandardizedWithNative } from "../../../../../../router/v
 import { normalizeReqInboundShellLikeToolCallsWithNative } from "../../../../../../router/virtual-router/engine-selection/native-hub-pipeline-req-inbound-semantics-tools.js";
 import { fixApplyPatchToolCallsWithNative } from "../../../../../../router/virtual-router/engine-selection/native-compat-action-semantics.js";
 import { isHubStageTimingDetailEnabled, logHubStageTiming, } from "../../../hub-stage-timing.js";
+function buildSlimResponsesContextForSemantics(context) {
+    if (!context || typeof context !== "object" || Array.isArray(context)) {
+        return undefined;
+    }
+    // Keep semantic essentials only; avoid carrying full `input` history through
+    // chat_process and req_process stages (it can be huge and is not required for
+    // non-responses outbound paths).
+    //
+    // IMPORTANT:
+    // Do not spread-clone first and then delete heavy keys. For large /v1/responses
+    // payloads that would deep-copy gigantic arrays/strings into a temporary object.
+    // Build a filtered object directly to keep this step O(selected fields).
+    const src = context;
+    const out = {};
+    for (const [key, value] of Object.entries(src)) {
+        if (key === "input" || key === "__captured_tool_results") {
+            continue;
+        }
+        out[key] = value;
+    }
+    return out;
+}
 export async function runReqInboundStage2SemanticMap(options) {
     const requestId = options.adapterContext.requestId || "unknown";
     const forceDetailLog = isHubStageTimingDetailEnabled();
@@ -25,8 +47,12 @@ export async function runReqInboundStage2SemanticMap(options) {
         const contextNode = responsesNode && isJsonObject(responsesNode.context)
             ? responsesNode.context
             : undefined;
-        return contextNode ? jsonClone(contextNode) : undefined;
+        // Perf: keep reference instead of deep clone to avoid multi-pass cloning on
+        // heavy /v1/responses histories.
+        return contextNode;
     })();
+    const semanticsResponsesContext = buildSlimResponsesContextForSemantics(preservedResponsesContext) ??
+        preservedResponsesContext;
     logHubStageTiming(requestId, "req_inbound.stage2_operation_table_inbound", "start");
     const operationTableStart = Date.now();
     applyHubOperationTableInbound({
@@ -52,11 +78,11 @@ export async function runReqInboundStage2SemanticMap(options) {
         elapsedMs: Date.now() - semanticLiftStart,
         forceLog: forceDetailLog,
     });
-    if (preservedResponsesContext) {
+    if (semanticsResponsesContext) {
         const currentSemantics = chatEnvelope.semantics;
         if (!currentSemantics || typeof currentSemantics !== "object") {
             chatEnvelope.semantics = {
-                responses: { context: jsonClone(preservedResponsesContext) },
+                responses: { context: semanticsResponsesContext },
             };
         }
         else {
@@ -69,19 +95,24 @@ export async function runReqInboundStage2SemanticMap(options) {
                     ...semantics,
                     responses: {
                         ...responsesNode,
-                        context: jsonClone(preservedResponsesContext),
+                        context: semanticsResponsesContext,
                     },
                 };
             }
         }
     }
-    normalizeReqInboundShellLikeToolCallsWithNative(chatEnvelope);
-    const fixedApplyPatch = fixApplyPatchToolCallsWithNative({
-        messages: (Array.isArray(chatEnvelope.messages)
-            ? chatEnvelope.messages
-            : []),
-    });
-    chatEnvelope.messages = fixedApplyPatch.messages;
+    // openai-responses path already ran request_inbound bridge policy in
+    // buildChatRequestFromResponses (including call-id/apply-patch compat actions).
+    // Skip duplicate message-wide normalization passes here to reduce heavy-input cost.
+    if (options.formatEnvelope.protocol !== "openai-responses") {
+        normalizeReqInboundShellLikeToolCallsWithNative(chatEnvelope);
+        const fixedApplyPatch = fixApplyPatchToolCallsWithNative({
+            messages: (Array.isArray(chatEnvelope.messages)
+                ? chatEnvelope.messages
+                : []),
+        });
+        chatEnvelope.messages = fixedApplyPatch.messages;
+    }
     logHubStageTiming(requestId, "req_inbound.stage2_validate_chat_envelope", "start");
     const validateStart = Date.now();
     validateChatEnvelopeWithNative(chatEnvelope, {
@@ -107,7 +138,7 @@ export async function runReqInboundStage2SemanticMap(options) {
         const envelopeSemantics = chatEnvelope.semantics;
         const existing = standardizedRequest.semantics;
         if (!existing || typeof existing !== "object") {
-            standardizedRequest.semantics = jsonClone(envelopeSemantics);
+            standardizedRequest.semantics = envelopeSemantics;
         }
         else {
             const existingObj = existing;
@@ -118,11 +149,13 @@ export async function runReqInboundStage2SemanticMap(options) {
                 ? envelopeResponses.context
                 : undefined;
             if (envelopeContext) {
+                const slimContext = buildSlimResponsesContextForSemantics(envelopeContext) ??
+                    envelopeContext;
                 const nextResponses = {
                     ...(isJsonObject(existingObj.responses)
                         ? existingObj.responses
                         : {}),
-                    context: jsonClone(envelopeContext),
+                    context: slimContext,
                 };
                 standardizedRequest.semantics = {
                     ...existingObj,

package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage1_semantic_map/index.js CHANGED Viewed

@@ -19,6 +19,49 @@ export async function runReqOutboundStage1SemanticMap(options) {
         request: options.request,
         adapterContext: options.adapterContext
     });
+    // Perf: when outbound target is not /v1/responses, the large responses.context
+    // semantic snapshot is not needed for provider request mapping and can cause
+    // expensive deep traversals in downstream native mappers/policy actions.
+    if (providerProtocol !== 'openai-responses') {
+        const semantics = chatEnvelope.semantics;
+        const responsesNode = semantics && typeof semantics.responses === 'object' && semantics.responses !== null && !Array.isArray(semantics.responses)
+            ? semantics.responses
+            : undefined;
+        if (responsesNode && Object.prototype.hasOwnProperty.call(responsesNode, 'context')) {
+            const { context: _unusedContext, ...restResponses } = responsesNode;
+            if (Object.keys(restResponses).length > 0) {
+                chatEnvelope.semantics = {
+                    ...(semantics ?? {}),
+                    responses: restResponses
+                };
+            }
+            else if (semantics && Object.keys(semantics).length > 0) {
+                const { responses: _unusedResponses, ...restSemantics } = semantics;
+                chatEnvelope.semantics =
+                    Object.keys(restSemantics).length > 0
+                        ? restSemantics
+                        : undefined;
+            }
+        }
+    }
+    if (providerProtocol === 'openai-responses'
+        && options.contextSnapshot
+        && typeof options.contextSnapshot === 'object'
+        && !Array.isArray(options.contextSnapshot)) {
+        const semantics = chatEnvelope.semantics && typeof chatEnvelope.semantics === 'object' && !Array.isArray(chatEnvelope.semantics)
+            ? chatEnvelope.semantics
+            : {};
+        const responsesNode = semantics.responses && typeof semantics.responses === 'object' && !Array.isArray(semantics.responses)
+            ? semantics.responses
+            : {};
+        chatEnvelope.semantics = {
+            ...semantics,
+            responses: {
+                ...responsesNode,
+                context: options.contextSnapshot
+            }
+        };
+    }
     logHubStageTiming(requestId, 'req_outbound.stage1_native_to_chat_envelope', 'completed', {
         elapsedMs: Date.now() - toChatStart,
         forceLog: forceDetailLog