@jsonstudio/rcc 0.90.814 → 0.90.872
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -0
- package/configsamples/provider-default/ali-coding-plan/config.v2.json +76 -0
- package/configsamples/provider-default/antigravity/config.v2.json +142 -0
- package/configsamples/provider-default/ark-coding-plan/config.v2.json +64 -0
- package/configsamples/provider-default/crs/config.v2.json +54 -0
- package/configsamples/provider-default/deepseek-web/config.v2.json +56 -0
- package/configsamples/provider-default/gemini/config.v2.json +43 -0
- package/configsamples/provider-default/gemini-cli/config.v2.json +45 -0
- package/configsamples/provider-default/gemini-native/config.v2.json +208 -0
- package/configsamples/provider-default/glm/config.v2.json +33 -0
- package/configsamples/provider-default/glm-anthropic/config.v2.json +29 -0
- package/configsamples/provider-default/kimi/config.v2.json +25 -0
- package/configsamples/provider-default/lmstudio/config.v2.json +79 -0
- package/configsamples/provider-default/lmstudio-proxy/config.v2.json +78 -0
- package/configsamples/provider-default/manifest.json +31 -0
- package/configsamples/provider-default/meituan/config.v2.json +20 -0
- package/configsamples/provider-default/mimo/config.v2.json +26 -0
- package/configsamples/provider-default/modelscope/config.v2.json +81 -0
- package/configsamples/provider-default/my-openai/config.v2.json +20 -0
- package/configsamples/provider-default/nvidia/config.v2.json +32 -0
- package/configsamples/provider-default/opencode-zen-free/config.v2.json +56 -0
- package/configsamples/provider-default/openrouter/config.v2.json +210 -0
- package/configsamples/provider-default/qwen/config.v2.json +38 -0
- package/configsamples/provider-default/qwenchat/config.v2.json +53 -0
- package/configsamples/provider-default/tab/config.v2.json +26 -0
- package/configsamples/provider-default/tabglm/config.v2.json +77 -0
- package/dist/build-info.js +2 -2
- package/dist/cli/commands/config.d.ts +5 -0
- package/dist/cli/commands/config.js +369 -1
- package/dist/cli/commands/config.js.map +1 -1
- package/dist/cli/commands/examples.js +3 -0
- package/dist/cli/commands/examples.js.map +1 -1
- package/dist/cli/commands/init.js +25 -1
- package/dist/cli/commands/init.js.map +1 -1
- package/dist/cli/commands/launcher-kernel.js +122 -46
- package/dist/cli/commands/launcher-kernel.js.map +1 -1
- package/dist/cli/commands/start.js +60 -3
- package/dist/cli/commands/start.js.map +1 -1
- package/dist/cli/config/bundled-provider-pack.d.ts +20 -0
- package/dist/cli/config/bundled-provider-pack.js +146 -0
- package/dist/cli/config/bundled-provider-pack.js.map +1 -0
- package/dist/cli/register/status-config-commands.d.ts +2 -0
- package/dist/cli/register/status-config-commands.js.map +1 -1
- package/dist/cli.js +81 -28
- package/dist/cli.js.map +1 -1
- package/dist/debug/snapshot-store.js +2 -1
- package/dist/debug/snapshot-store.js.map +1 -1
- package/dist/index.js +23 -14
- package/dist/index.js.map +1 -1
- package/dist/manager/modules/quota/provider-quota-daemon.model-backoff.js +1 -1
- package/dist/manager/modules/quota/provider-quota-daemon.model-backoff.js.map +1 -1
- package/dist/manager/quota/provider-quota-center.js +1 -1
- package/dist/manager/quota/provider-quota-center.js.map +1 -1
- package/dist/manager/storage/file-store.js +10 -0
- package/dist/manager/storage/file-store.js.map +1 -1
- package/dist/modules/llmswitch/bridge/snapshot-recorder-runtime.js +18 -1
- package/dist/modules/llmswitch/bridge/snapshot-recorder-runtime.js.map +1 -1
- package/dist/modules/llmswitch/bridge/snapshot-recorder.js +132 -51
- package/dist/modules/llmswitch/bridge/snapshot-recorder.js.map +1 -1
- package/dist/provider-sdk/provider-runtime-inference.js +2 -2
- package/dist/provider-sdk/provider-runtime-inference.js.map +1 -1
- package/dist/providers/auth/deepseek-account-token-acquirer.js +32 -3
- package/dist/providers/auth/deepseek-account-token-acquirer.js.map +1 -1
- package/dist/providers/core/api/provider-types.d.ts +11 -0
- package/dist/providers/core/config/service-profiles.js +1 -1
- package/dist/providers/core/runtime/deepseek-http-provider.d.ts +2 -0
- package/dist/providers/core/runtime/deepseek-http-provider.js +31 -1
- package/dist/providers/core/runtime/deepseek-http-provider.js.map +1 -1
- package/dist/providers/core/runtime/qwenchat-http-provider-helpers.d.ts +3 -2
- package/dist/providers/core/runtime/qwenchat-http-provider-helpers.js +513 -96
- package/dist/providers/core/runtime/qwenchat-http-provider-helpers.js.map +1 -1
- package/dist/providers/core/runtime/standard-tool-text-harvest.d.ts +8 -0
- package/dist/providers/core/runtime/standard-tool-text-harvest.js +16 -0
- package/dist/providers/core/runtime/standard-tool-text-harvest.js.map +1 -0
- package/dist/providers/core/runtime/standard-tool-text-request-transform.d.ts +4 -1
- package/dist/providers/core/runtime/standard-tool-text-request-transform.js +121 -3
- package/dist/providers/core/runtime/standard-tool-text-request-transform.js.map +1 -1
- package/dist/providers/core/utils/snapshot-writer.js +5 -2
- package/dist/providers/core/utils/snapshot-writer.js.map +1 -1
- package/dist/providers/profile/provider-profile-loader.js +52 -1
- package/dist/providers/profile/provider-profile-loader.js.map +1 -1
- package/dist/providers/profile/provider-profile.d.ts +3 -0
- package/dist/server/handlers/handler-response-utils.js +1 -0
- package/dist/server/handlers/handler-response-utils.js.map +1 -1
- package/dist/server/handlers/images-handler.d.ts +9 -0
- package/dist/server/handlers/images-handler.js +258 -0
- package/dist/server/handlers/images-handler.js.map +1 -0
- package/dist/server/handlers/types.d.ts +7 -0
- package/dist/server/runtime/http-server/antigravity-startup-tasks.d.ts +3 -0
- package/dist/server/runtime/http-server/antigravity-startup-tasks.js +16 -0
- package/dist/server/runtime/http-server/antigravity-startup-tasks.js.map +1 -1
- package/dist/server/runtime/http-server/daemon-admin/auth-handler.js +3 -18
- package/dist/server/runtime/http-server/daemon-admin/auth-handler.js.map +1 -1
- package/dist/server/runtime/http-server/daemon-admin/providers-handler-utils.d.ts +7 -0
- package/dist/server/runtime/http-server/daemon-admin/providers-handler-utils.js +17 -0
- package/dist/server/runtime/http-server/daemon-admin/providers-handler-utils.js.map +1 -1
- package/dist/server/runtime/http-server/daemon-admin/providers-handler.js +50 -17
- package/dist/server/runtime/http-server/daemon-admin/providers-handler.js.map +1 -1
- package/dist/server/runtime/http-server/daemon-admin-routes.js +32 -2
- package/dist/server/runtime/http-server/daemon-admin-routes.js.map +1 -1
- package/dist/server/runtime/http-server/executor/provider-response-converter.js +42 -13
- package/dist/server/runtime/http-server/executor/provider-response-converter.js.map +1 -1
- package/dist/server/runtime/http-server/executor/provider-response-utils.js +41 -3
- package/dist/server/runtime/http-server/executor/provider-response-utils.js.map +1 -1
- package/dist/server/runtime/http-server/executor/usage-aggregator.js +7 -7
- package/dist/server/runtime/http-server/executor/usage-aggregator.js.map +1 -1
- package/dist/server/runtime/http-server/executor/usage-logger.d.ts +9 -0
- package/dist/server/runtime/http-server/executor/usage-logger.js +35 -2
- package/dist/server/runtime/http-server/executor/usage-logger.js.map +1 -1
- package/dist/server/runtime/http-server/executor-metadata.js +12 -4
- package/dist/server/runtime/http-server/executor-metadata.js.map +1 -1
- package/dist/server/runtime/http-server/executor-pipeline.js +24 -15
- package/dist/server/runtime/http-server/executor-pipeline.js.map +1 -1
- package/dist/server/runtime/http-server/executor-provider.d.ts +6 -1
- package/dist/server/runtime/http-server/executor-provider.js +137 -5
- package/dist/server/runtime/http-server/executor-provider.js.map +1 -1
- package/dist/server/runtime/http-server/http-server-bootstrap.js +6 -0
- package/dist/server/runtime/http-server/http-server-bootstrap.js.map +1 -1
- package/dist/server/runtime/http-server/http-server-lifecycle.js +23 -15
- package/dist/server/runtime/http-server/http-server-lifecycle.js.map +1 -1
- package/dist/server/runtime/http-server/http-server-runtime-providers.js +14 -4
- package/dist/server/runtime/http-server/http-server-runtime-providers.js.map +1 -1
- package/dist/server/runtime/http-server/http-server-runtime-setup.js +83 -1
- package/dist/server/runtime/http-server/http-server-runtime-setup.js.map +1 -1
- package/dist/server/runtime/http-server/hub-shadow-compare.js +2 -41
- package/dist/server/runtime/http-server/hub-shadow-compare.js.map +1 -1
- package/dist/server/runtime/http-server/provider-routing-scope.d.ts +9 -0
- package/dist/server/runtime/http-server/provider-routing-scope.js +20 -0
- package/dist/server/runtime/http-server/provider-routing-scope.js.map +1 -0
- package/dist/server/runtime/http-server/provider-traffic-governor.d.ts +67 -0
- package/dist/server/runtime/http-server/provider-traffic-governor.js +467 -0
- package/dist/server/runtime/http-server/provider-traffic-governor.js.map +1 -0
- package/dist/server/runtime/http-server/request-executor.d.ts +8 -0
- package/dist/server/runtime/http-server/request-executor.js +446 -21
- package/dist/server/runtime/http-server/request-executor.js.map +1 -1
- package/dist/server/runtime/http-server/routes.js +13 -0
- package/dist/server/runtime/http-server/routes.js.map +1 -1
- package/dist/server/runtime/http-server/session-client-registry.js +30 -4
- package/dist/server/runtime/http-server/session-client-registry.js.map +1 -1
- package/dist/server/runtime/http-server/session-client-route-utils.d.ts +7 -0
- package/dist/server/runtime/http-server/session-client-route-utils.js +38 -0
- package/dist/server/runtime/http-server/session-client-route-utils.js.map +1 -1
- package/dist/server/runtime/http-server/session-client-routes.js +12 -2
- package/dist/server/runtime/http-server/session-client-routes.js.map +1 -1
- package/dist/server/utils/request-id-manager.js +42 -5
- package/dist/server/utils/request-id-manager.js.map +1 -1
- package/dist/server/utils/stage-logger.d.ts +1 -0
- package/dist/server/utils/stage-logger.js +27 -0
- package/dist/server/utils/stage-logger.js.map +1 -1
- package/dist/utils/errorsamples.js +3 -1
- package/dist/utils/errorsamples.js.map +1 -1
- package/dist/utils/sensitive-redaction.d.ts +1 -0
- package/dist/utils/sensitive-redaction.js +122 -0
- package/dist/utils/sensitive-redaction.js.map +1 -0
- package/docs/INSTALLATION_AND_QUICKSTART.md +14 -1
- package/docs/PORTS.md +12 -0
- package/docs/lmstudio-tool-calling.md +25 -0
- package/node_modules/@jsonstudio/llms/dist/conversion/compat/actions/qwenchat-web-request.d.ts +3 -0
- package/node_modules/@jsonstudio/llms/dist/conversion/compat/actions/qwenchat-web-request.js +62 -0
- package/node_modules/@jsonstudio/llms/dist/conversion/compat/profiles/chat-qwenchat-web.json +47 -0
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/operation-table/operation-table-runner.js +68 -7
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/operation-table/semantic-mappers/anthropic-mapper-from-chat.js +138 -3
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-pipeline-chat-process-request-utils.js +24 -0
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-pipeline-execute-chat-process-entry.js +7 -1
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-pipeline-execute-request-stage.js +7 -0
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-pipeline-heavy-input-fastpath.d.ts +24 -0
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-pipeline-heavy-input-fastpath.js +203 -0
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-pipeline-route-and-outbound.js +17 -12
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-stage-timing.d.ts +11 -0
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-stage-timing.js +82 -1
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage2_semantic_map/index.js +47 -14
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage1_semantic_map/index.js +43 -0
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/stages/resp_outbound/resp_outbound_stage1_client_remap/client-remap-protocol-switch.js +222 -19
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/policy/policy-engine.js +2 -2
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/process/chat-process-pending-tool-sync.js +24 -7
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/response/provider-response.js +90 -1
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/snapshot-recorder.d.ts +1 -0
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/snapshot-recorder.js +252 -1
- package/node_modules/@jsonstudio/llms/dist/conversion/responses/responses-openai-bridge/utils.js +5 -3
- package/node_modules/@jsonstudio/llms/dist/conversion/responses/responses-openai-bridge.js +44 -4
- package/node_modules/@jsonstudio/llms/dist/conversion/shared/anthropic-message-utils-openai-request.d.ts +3 -1
- package/node_modules/@jsonstudio/llms/dist/conversion/shared/anthropic-message-utils-openai-request.js +20 -23
- package/node_modules/@jsonstudio/llms/dist/conversion/shared/tool-governor.js +68 -30
- package/node_modules/@jsonstudio/llms/dist/conversion/snapshot-utils.js +48 -8
- package/node_modules/@jsonstudio/llms/dist/native/router_hotpath_napi.node +0 -0
- package/node_modules/@jsonstudio/llms/dist/quota/quota-state.js +2 -2
- package/node_modules/@jsonstudio/llms/dist/router/virtual-router/engine/routing-state/store.js +35 -2
- package/node_modules/@jsonstudio/llms/dist/router/virtual-router/engine.js +9 -9
- package/node_modules/@jsonstudio/llms/dist/router/virtual-router/sticky-session-store.js +104 -18
- package/node_modules/@jsonstudio/llms/dist/servertool/engine.js +79 -32
- package/node_modules/@jsonstudio/llms/dist/servertool/handlers/vision.js +49 -0
- package/node_modules/@jsonstudio/llms/dist/servertool/pending-session.js +48 -2
- package/node_modules/@jsonstudio/llms/dist/servertool/server-side-tools.js +14 -1
- package/node_modules/@jsonstudio/llms/dist/servertool/types.d.ts +1 -0
- package/node_modules/@jsonstudio/llms/package.json +1 -1
- package/node_modules/ajv/dist/compile/jtd/serialize.js +9 -2
- package/node_modules/ajv/dist/compile/jtd/serialize.js.map +1 -1
- package/node_modules/ajv/dist/core.d.ts +1 -0
- package/node_modules/ajv/dist/core.js.map +1 -1
- package/node_modules/ajv/dist/vocabularies/validation/pattern.js +13 -4
- package/node_modules/ajv/dist/vocabularies/validation/pattern.js.map +1 -1
- package/node_modules/ajv/lib/compile/jtd/serialize.ts +13 -2
- package/node_modules/ajv/lib/core.ts +1 -0
- package/node_modules/ajv/lib/vocabularies/validation/pattern.ts +15 -4
- package/node_modules/ajv/package.json +2 -1
- package/package.json +15 -10
- package/scripts/ci/repo-sanity.mjs +23 -2
- package/scripts/ci/secrets-check.mjs +48 -0
- package/scripts/ci/silent-failure-audit.mjs +192 -0
- package/scripts/mock-provider/run-regressions.mjs +1 -0
- package/scripts/pack-mode.mjs +32 -36
- package/scripts/publish-rcc.mjs +38 -60
- package/scripts/tests/apply-patch-loop.mjs +1 -0
- package/scripts/tests/blackbox-rcc-vs-routecodex-antigravity.mjs +2 -0
- package/scripts/tools-dev/responses-debug-client/src/index.ts +8 -3
- package/scripts/verify-e2e-toolcall.mjs +1 -0
- package/scripts/verify-install-e2e.mjs +2 -1
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { shouldRecordSnapshots } from "../../snapshot-utils.js";
|
|
2
|
+
import { ensureRuntimeMetadata } from "../../runtime-metadata.js";
|
|
2
3
|
import { REQUEST_STAGE_HOOKS } from "./hub-pipeline-stage-hooks.js";
|
|
3
4
|
import { buildReqInboundSkippedNodeWithNative, coerceStandardizedRequestFromPayloadWithNative, findMappableSemanticsKeysWithNative, liftResponsesResumeIntoSemanticsWithNative, prepareRuntimeMetadataForServertoolsWithNative, syncResponsesContextFromCanonicalMessagesWithNative, } from "../../../router/virtual-router/engine-selection/native-hub-pipeline-orchestration-semantics.js";
|
|
4
5
|
import { runReqProcessStage1ToolGovernance } from "./stages/req_process/req_process_stage1_tool_governance/index.js";
|
|
@@ -7,6 +8,7 @@ import { deriveWorkingRequestFlags, estimateInputTokensForWorkingRequest, propag
|
|
|
7
8
|
import { annotatePassthroughAuditSkipped, appendPassthroughGovernanceSkippedNode, appendToolGovernanceNodeResult, propagateClockReservationToMetadata, } from "./hub-pipeline-chat-process-governance-utils.js";
|
|
8
9
|
import { createSnapshotRecorder } from "../snapshot-recorder.js";
|
|
9
10
|
import { executeRouteAndBuildOutbound } from "./hub-pipeline-route-and-outbound.js";
|
|
11
|
+
import { peekHubStageTopSummary } from "./hub-stage-timing.js";
|
|
10
12
|
export async function executeChatProcessEntryPipeline(args) {
|
|
11
13
|
const { normalized, routerEngine, config } = args;
|
|
12
14
|
const hooks = REQUEST_STAGE_HOOKS[normalized.providerProtocol];
|
|
@@ -119,7 +121,6 @@ export async function executeChatProcessEntryPipeline(args) {
|
|
|
119
121
|
normalizedMetadata: normalized.metadata ??
|
|
120
122
|
(normalized.metadata = {}),
|
|
121
123
|
});
|
|
122
|
-
const normalizedMeta = normalized.metadata;
|
|
123
124
|
// responsesResume is a client-protocol semantic (/v1/responses tool loop) and must live in chat.semantics.
|
|
124
125
|
// Do not read it from metadata once entering chat_process.
|
|
125
126
|
const { responsesResume, hasImageAttachment, serverToolRequired } = deriveWorkingRequestFlags(workingRequest);
|
|
@@ -145,6 +146,11 @@ export async function executeChatProcessEntryPipeline(args) {
|
|
|
145
146
|
enabled: false,
|
|
146
147
|
},
|
|
147
148
|
});
|
|
149
|
+
const hubStageTop = peekHubStageTopSummary(normalized.id);
|
|
150
|
+
if (hubStageTop.length) {
|
|
151
|
+
const rt = ensureRuntimeMetadata(outbound.metadata);
|
|
152
|
+
rt.hubStageTop = hubStageTop;
|
|
153
|
+
}
|
|
148
154
|
return {
|
|
149
155
|
requestId: normalized.id,
|
|
150
156
|
providerPayload: outbound.providerPayload,
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import { executeRequestStageInbound, } from "./hub-pipeline-execute-request-stage-inbound.js";
|
|
2
2
|
import { executeRouteAndBuildOutbound, } from "./hub-pipeline-route-and-outbound.js";
|
|
3
|
+
import { ensureRuntimeMetadata } from "../../runtime-metadata.js";
|
|
4
|
+
import { peekHubStageTopSummary } from "./hub-stage-timing.js";
|
|
3
5
|
export async function executeRequestStagePipeline(args) {
|
|
4
6
|
const { normalized, hooks, routerEngine, config } = args;
|
|
5
7
|
const inbound = await executeRequestStageInbound({
|
|
@@ -30,6 +32,11 @@ export async function executeRequestStagePipeline(args) {
|
|
|
30
32
|
requestId: normalized.id,
|
|
31
33
|
},
|
|
32
34
|
});
|
|
35
|
+
const hubStageTop = peekHubStageTopSummary(normalized.id);
|
|
36
|
+
if (hubStageTop.length) {
|
|
37
|
+
const rt = ensureRuntimeMetadata(outbound.metadata);
|
|
38
|
+
rt.hubStageTop = hubStageTop;
|
|
39
|
+
}
|
|
33
40
|
return {
|
|
34
41
|
requestId: normalized.id,
|
|
35
42
|
providerPayload: outbound.providerPayload,
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { ProcessedRequest, StandardizedRequest } from "../types/standardized.js";
|
|
2
|
+
export declare function isHeavyInputFastpathEnabled(): boolean;
|
|
3
|
+
export declare function shouldUseHeavyInputFastpath(metadata?: Record<string, unknown>): {
|
|
4
|
+
enabled: boolean;
|
|
5
|
+
hit: boolean;
|
|
6
|
+
threshold: number;
|
|
7
|
+
estimatedInputTokens?: number;
|
|
8
|
+
};
|
|
9
|
+
export declare function markHeavyInputFastpath(options: {
|
|
10
|
+
metadata?: Record<string, unknown>;
|
|
11
|
+
estimatedInputTokens?: number;
|
|
12
|
+
reason: "rough_estimate" | "full_estimate" | "metadata_threshold";
|
|
13
|
+
}): void;
|
|
14
|
+
export declare function buildCapturedChatRequestInput(args: {
|
|
15
|
+
workingRequest: StandardizedRequest | ProcessedRequest;
|
|
16
|
+
normalizedMetadata?: Record<string, unknown>;
|
|
17
|
+
}): {
|
|
18
|
+
model?: unknown;
|
|
19
|
+
messages?: unknown;
|
|
20
|
+
tools?: unknown;
|
|
21
|
+
parameters?: unknown;
|
|
22
|
+
};
|
|
23
|
+
export declare function roughEstimateInputTokensFromRequest(request: StandardizedRequest | ProcessedRequest): number;
|
|
24
|
+
export declare function resolveHeavyInputTokenThreshold(): number;
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
import { ensureRuntimeMetadata, readRuntimeMetadata } from "../../runtime-metadata.js";
|
|
2
|
+
const TRUTHY = new Set(["1", "true", "yes", "on"]);
|
|
3
|
+
const FALSY = new Set(["0", "false", "no", "off"]);
|
|
4
|
+
const DEFAULT_INPUT_TOKEN_THRESHOLD = 120_000;
|
|
5
|
+
function readBooleanEnv(names, fallback) {
|
|
6
|
+
for (const name of names) {
|
|
7
|
+
const raw = process.env[name];
|
|
8
|
+
if (raw === undefined) {
|
|
9
|
+
continue;
|
|
10
|
+
}
|
|
11
|
+
const normalized = String(raw).trim().toLowerCase();
|
|
12
|
+
if (TRUTHY.has(normalized)) {
|
|
13
|
+
return true;
|
|
14
|
+
}
|
|
15
|
+
if (FALSY.has(normalized)) {
|
|
16
|
+
return false;
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
return fallback;
|
|
20
|
+
}
|
|
21
|
+
function readPositiveIntEnv(names, fallback) {
|
|
22
|
+
for (const name of names) {
|
|
23
|
+
const raw = process.env[name];
|
|
24
|
+
if (raw === undefined) {
|
|
25
|
+
continue;
|
|
26
|
+
}
|
|
27
|
+
const parsed = Number.parseInt(String(raw).trim(), 10);
|
|
28
|
+
if (Number.isFinite(parsed) && parsed > 0) {
|
|
29
|
+
return parsed;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
return fallback;
|
|
33
|
+
}
|
|
34
|
+
function getConfig() {
|
|
35
|
+
return {
|
|
36
|
+
enabled: readBooleanEnv([
|
|
37
|
+
"ROUTECODEX_HUB_FASTPATH_HEAVY_INPUT",
|
|
38
|
+
"RCC_HUB_FASTPATH_HEAVY_INPUT",
|
|
39
|
+
], true),
|
|
40
|
+
inputTokenThreshold: readPositiveIntEnv([
|
|
41
|
+
"ROUTECODEX_HUB_FASTPATH_INPUT_TOKEN_THRESHOLD",
|
|
42
|
+
"RCC_HUB_FASTPATH_INPUT_TOKEN_THRESHOLD",
|
|
43
|
+
], DEFAULT_INPUT_TOKEN_THRESHOLD),
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
export function isHeavyInputFastpathEnabled() {
|
|
47
|
+
return getConfig().enabled;
|
|
48
|
+
}
|
|
49
|
+
function readEstimatedInputTokens(metadata) {
|
|
50
|
+
if (!metadata || typeof metadata !== "object") {
|
|
51
|
+
return undefined;
|
|
52
|
+
}
|
|
53
|
+
const candidate = typeof metadata.estimatedInputTokens === "number" &&
|
|
54
|
+
Number.isFinite(metadata.estimatedInputTokens)
|
|
55
|
+
? Math.max(0, Math.floor(metadata.estimatedInputTokens))
|
|
56
|
+
: undefined;
|
|
57
|
+
if (candidate && candidate > 0) {
|
|
58
|
+
return candidate;
|
|
59
|
+
}
|
|
60
|
+
const rt = readRuntimeMetadata(metadata);
|
|
61
|
+
const rtCandidate = typeof rt
|
|
62
|
+
?.hubFastpathEstimatedInputTokens === "number" &&
|
|
63
|
+
Number.isFinite(rt.hubFastpathEstimatedInputTokens)
|
|
64
|
+
? Math.max(0, Math.floor(rt.hubFastpathEstimatedInputTokens))
|
|
65
|
+
: undefined;
|
|
66
|
+
return rtCandidate && rtCandidate > 0 ? rtCandidate : undefined;
|
|
67
|
+
}
|
|
68
|
+
export function shouldUseHeavyInputFastpath(metadata) {
|
|
69
|
+
const config = getConfig();
|
|
70
|
+
const estimatedInputTokens = readEstimatedInputTokens(metadata);
|
|
71
|
+
const rt = metadata ? readRuntimeMetadata(metadata) : undefined;
|
|
72
|
+
const runtimeForced = rt &&
|
|
73
|
+
typeof rt.hubFastpathHeavyInput ===
|
|
74
|
+
"boolean" &&
|
|
75
|
+
rt.hubFastpathHeavyInput === true;
|
|
76
|
+
const hit = config.enabled &&
|
|
77
|
+
(runtimeForced ||
|
|
78
|
+
(typeof estimatedInputTokens === "number" &&
|
|
79
|
+
estimatedInputTokens >= config.inputTokenThreshold));
|
|
80
|
+
return {
|
|
81
|
+
enabled: config.enabled,
|
|
82
|
+
hit,
|
|
83
|
+
threshold: config.inputTokenThreshold,
|
|
84
|
+
...(typeof estimatedInputTokens === "number"
|
|
85
|
+
? { estimatedInputTokens }
|
|
86
|
+
: {}),
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
export function markHeavyInputFastpath(options) {
|
|
90
|
+
const { metadata, estimatedInputTokens, reason } = options;
|
|
91
|
+
if (!metadata || typeof metadata !== "object") {
|
|
92
|
+
return;
|
|
93
|
+
}
|
|
94
|
+
const config = getConfig();
|
|
95
|
+
if (!config.enabled) {
|
|
96
|
+
return;
|
|
97
|
+
}
|
|
98
|
+
const rt = ensureRuntimeMetadata(metadata);
|
|
99
|
+
rt.hubFastpathHeavyInput = true;
|
|
100
|
+
rt.hubFastpathReason = reason;
|
|
101
|
+
rt.hubFastpathInputTokenThreshold =
|
|
102
|
+
config.inputTokenThreshold;
|
|
103
|
+
if (typeof estimatedInputTokens === "number" &&
|
|
104
|
+
Number.isFinite(estimatedInputTokens) &&
|
|
105
|
+
estimatedInputTokens > 0) {
|
|
106
|
+
const rounded = Math.max(1, Math.floor(estimatedInputTokens));
|
|
107
|
+
metadata.estimatedInputTokens = rounded;
|
|
108
|
+
rt.hubFastpathEstimatedInputTokens = rounded;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
export function buildCapturedChatRequestInput(args) {
|
|
112
|
+
const { workingRequest, normalizedMetadata } = args;
|
|
113
|
+
const fastpath = shouldUseHeavyInputFastpath(normalizedMetadata);
|
|
114
|
+
if (fastpath.hit) {
|
|
115
|
+
markHeavyInputFastpath({
|
|
116
|
+
metadata: normalizedMetadata,
|
|
117
|
+
estimatedInputTokens: fastpath.estimatedInputTokens,
|
|
118
|
+
reason: "metadata_threshold",
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
// Hard rule: captured request must preserve full semantic payload.
|
|
122
|
+
return {
|
|
123
|
+
model: workingRequest.model,
|
|
124
|
+
messages: workingRequest.messages,
|
|
125
|
+
tools: workingRequest.tools,
|
|
126
|
+
parameters: workingRequest.parameters,
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
function estimateContentChars(content, cap) {
|
|
130
|
+
if (cap <= 0 || content === undefined || content === null) {
|
|
131
|
+
return 0;
|
|
132
|
+
}
|
|
133
|
+
if (typeof content === "string") {
|
|
134
|
+
return Math.min(content.length, cap);
|
|
135
|
+
}
|
|
136
|
+
if (Array.isArray(content)) {
|
|
137
|
+
let used = 0;
|
|
138
|
+
for (const part of content) {
|
|
139
|
+
if (used >= cap) {
|
|
140
|
+
break;
|
|
141
|
+
}
|
|
142
|
+
if (typeof part === "string") {
|
|
143
|
+
used += Math.min(part.length, cap - used);
|
|
144
|
+
continue;
|
|
145
|
+
}
|
|
146
|
+
if (!part || typeof part !== "object") {
|
|
147
|
+
continue;
|
|
148
|
+
}
|
|
149
|
+
const record = part;
|
|
150
|
+
if (typeof record.text === "string") {
|
|
151
|
+
used += Math.min(record.text.length, cap - used);
|
|
152
|
+
}
|
|
153
|
+
else if (typeof record.input_text === "string") {
|
|
154
|
+
used += Math.min(record.input_text.length, cap - used);
|
|
155
|
+
}
|
|
156
|
+
else if (typeof record.output_text === "string") {
|
|
157
|
+
used += Math.min(record.output_text.length, cap - used);
|
|
158
|
+
}
|
|
159
|
+
else {
|
|
160
|
+
used += Math.min(64, cap - used);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
return used;
|
|
164
|
+
}
|
|
165
|
+
return Math.min(64, cap);
|
|
166
|
+
}
|
|
167
|
+
export function roughEstimateInputTokensFromRequest(request) {
|
|
168
|
+
const config = getConfig();
|
|
169
|
+
let chars = 0;
|
|
170
|
+
const charCap = Math.max(config.inputTokenThreshold * 8, 16_384);
|
|
171
|
+
const messages = Array.isArray(request.messages) ? request.messages : [];
|
|
172
|
+
for (const message of messages) {
|
|
173
|
+
if (chars >= charCap) {
|
|
174
|
+
break;
|
|
175
|
+
}
|
|
176
|
+
if (!message || typeof message !== "object") {
|
|
177
|
+
chars += 16;
|
|
178
|
+
continue;
|
|
179
|
+
}
|
|
180
|
+
const record = message;
|
|
181
|
+
if (typeof record.role === "string") {
|
|
182
|
+
chars += Math.min(record.role.length, charCap - chars);
|
|
183
|
+
}
|
|
184
|
+
if (typeof record.name === "string") {
|
|
185
|
+
chars += Math.min(record.name.length, Math.max(0, charCap - chars));
|
|
186
|
+
}
|
|
187
|
+
if (typeof record.tool_call_id === "string") {
|
|
188
|
+
chars += Math.min(record.tool_call_id.length, Math.max(0, charCap - chars));
|
|
189
|
+
}
|
|
190
|
+
chars += estimateContentChars(record.content, Math.max(0, charCap - chars));
|
|
191
|
+
if (Array.isArray(record.tool_calls)) {
|
|
192
|
+
chars += Math.min(record.tool_calls.length * 128, Math.max(0, charCap - chars));
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
if (Array.isArray(request.tools)) {
|
|
196
|
+
chars += request.tools.length * 256;
|
|
197
|
+
}
|
|
198
|
+
const estimated = Math.max(Math.ceil(chars / 3.5), messages.length * 8 + (Array.isArray(request.tools) ? request.tools.length * 32 : 0));
|
|
199
|
+
return Math.max(1, Math.floor(estimated));
|
|
200
|
+
}
|
|
201
|
+
export function resolveHeavyInputTokenThreshold() {
|
|
202
|
+
return getConfig().inputTokenThreshold;
|
|
203
|
+
}
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { jsonClone } from "../types/json.js";
|
|
2
1
|
import { runReqProcessStage2RouteSelect } from "./stages/req_process/req_process_stage2_route_select/index.js";
|
|
3
2
|
import { buildAdapterContextFromNormalized } from "./hub-pipeline-adapter-context.js";
|
|
4
3
|
import { extractSessionIdentifiersFromMetadata } from "./session-identifiers.js";
|
|
@@ -8,6 +7,7 @@ import { logHubStageTiming } from "./hub-stage-timing.js";
|
|
|
8
7
|
import { shouldRecordSnapshots } from "../../snapshot-utils.js";
|
|
9
8
|
import { createSnapshotRecorder } from "../snapshot-recorder.js";
|
|
10
9
|
import { applyOutboundStreamPreferenceWithNative, applyHasImageAttachmentFlagWithNative, buildCapturedChatRequestSnapshotWithNative, buildHubPipelineResultMetadataWithNative, buildReqOutboundNodeResultWithNative, buildRouterMetadataInputWithNative, resolveOutboundStreamIntentWithNative, syncSessionIdentifiersToMetadataWithNative, } from "../../../router/virtual-router/engine-selection/native-hub-pipeline-orchestration-semantics.js";
|
|
10
|
+
import { buildCapturedChatRequestInput } from "./hub-pipeline-heavy-input-fastpath.js";
|
|
11
11
|
export async function executeRouteAndBuildOutbound(args) {
|
|
12
12
|
const { normalized, hooks, routerEngine, config, nodeResults, inboundRecorder, activeProcessMode, responsesResume, serverToolRequired, hasImageAttachment, passthroughAudit, rawRequest, contextSnapshot, semanticMapper, effectivePolicy, shadowCompareBaselineMode, routeSelectTiming, } = args;
|
|
13
13
|
let { workingRequest } = args;
|
|
@@ -16,6 +16,11 @@ export async function executeRouteAndBuildOutbound(args) {
|
|
|
16
16
|
// 便于后续 AdapterContext(响应侧 servertool)也能访问到相同的 sessionId /
|
|
17
17
|
// conversationId,用于 sticky-session 相关逻辑(例如 stopMessage)。
|
|
18
18
|
const normalizedMetadata = normalized.metadata;
|
|
19
|
+
const routeRuntimeDirectives = normalizedMetadata &&
|
|
20
|
+
typeof normalizedMetadata.__rt === "object" &&
|
|
21
|
+
!Array.isArray(normalizedMetadata.__rt)
|
|
22
|
+
? normalizedMetadata.__rt
|
|
23
|
+
: undefined;
|
|
19
24
|
if (normalizedMetadata && typeof normalizedMetadata === "object") {
|
|
20
25
|
const next = syncSessionIdentifiersToMetadataWithNative({
|
|
21
26
|
metadata: normalizedMetadata,
|
|
@@ -43,6 +48,11 @@ export async function executeRouteAndBuildOutbound(args) {
|
|
|
43
48
|
conversationId: sessionIdentifiers.conversationId,
|
|
44
49
|
metadata: normalizedMetadata,
|
|
45
50
|
});
|
|
51
|
+
if (routeRuntimeDirectives) {
|
|
52
|
+
metadataInput.__rt = {
|
|
53
|
+
...routeRuntimeDirectives,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
46
56
|
if (routeSelectTiming?.enabled) {
|
|
47
57
|
logHubStageTiming(routeSelectTiming.requestId ?? normalized.id, "req_process.stage2_route_select", "start");
|
|
48
58
|
}
|
|
@@ -60,7 +70,8 @@ export async function executeRouteAndBuildOutbound(args) {
|
|
|
60
70
|
try {
|
|
61
71
|
const logger = (normalized.metadata &&
|
|
62
72
|
normalized.metadata.logger);
|
|
63
|
-
if (
|
|
73
|
+
if (routeRuntimeDirectives?.disableVirtualRouterHitLog !== true &&
|
|
74
|
+
logger &&
|
|
64
75
|
typeof logger.logVirtualRouterHit === "function" &&
|
|
65
76
|
routing.decision?.routeName &&
|
|
66
77
|
routing.target?.providerKey) {
|
|
@@ -141,16 +152,10 @@ export async function executeRouteAndBuildOutbound(args) {
|
|
|
141
152
|
// 注意:这里不再根据 processMode(passthrough/chat) 做分支判断——即使某些
|
|
142
153
|
// route 将 processMode 标记为 passthrough,我们仍然需要保留一次规范化后的
|
|
143
154
|
// Chat 请求快照,供 stopMessage 等被动触发型 servertool 在响应阶段使用。
|
|
144
|
-
const capturedChatRequest = buildCapturedChatRequestSnapshotWithNative({
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
? jsonClone(workingRequest.tools)
|
|
149
|
-
: workingRequest.tools,
|
|
150
|
-
parameters: workingRequest.parameters
|
|
151
|
-
? jsonClone(workingRequest.parameters)
|
|
152
|
-
: workingRequest.parameters,
|
|
153
|
-
});
|
|
155
|
+
const capturedChatRequest = buildCapturedChatRequestSnapshotWithNative(buildCapturedChatRequestInput({
|
|
156
|
+
workingRequest,
|
|
157
|
+
normalizedMetadata: normalized.metadata,
|
|
158
|
+
}));
|
|
154
159
|
const metadata = buildHubPipelineResultMetadataWithNative({
|
|
155
160
|
normalized: {
|
|
156
161
|
metadata: normalized.metadata,
|
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
export declare function isHubStageTimingDetailEnabled(): boolean;
|
|
2
2
|
export declare function clearHubStageTiming(requestId: string | undefined | null): void;
|
|
3
|
+
export type HubStageTopSummaryEntry = {
|
|
4
|
+
stage: string;
|
|
5
|
+
totalMs: number;
|
|
6
|
+
count: number;
|
|
7
|
+
avgMs: number;
|
|
8
|
+
maxMs: number;
|
|
9
|
+
};
|
|
10
|
+
export declare function peekHubStageTopSummary(requestId: string | undefined | null, options?: {
|
|
11
|
+
topN?: number;
|
|
12
|
+
minMs?: number;
|
|
13
|
+
}): HubStageTopSummaryEntry[];
|
|
3
14
|
export declare function logHubStageTiming(requestId: string, stage: string, phase: 'start' | 'completed' | 'error', details?: Record<string, unknown>): void;
|
|
4
15
|
export declare function measureHubStage<T>(requestId: string, stage: string, fn: () => Promise<T> | T, options?: {
|
|
5
16
|
startDetails?: Record<string, unknown>;
|
|
@@ -1,9 +1,13 @@
|
|
|
1
1
|
const truthy = new Set(['1', 'true', 'yes', 'on']);
|
|
2
2
|
const falsy = new Set(['0', 'false', 'no', 'off']);
|
|
3
|
+
// Native alignment note: timing integrates with *WithNative stage orchestration flow.
|
|
3
4
|
const REQUEST_TIMELINES = new Map();
|
|
4
5
|
const REQUEST_TIMELINE_TTL_MS = 30 * 60 * 1000;
|
|
5
6
|
const REQUEST_TIMELINE_MAX = 4096;
|
|
6
7
|
const DEFAULT_HUB_STAGE_LOG_MIN_MS = 50;
|
|
8
|
+
const DEFAULT_HUB_STAGE_TOP_N = 5;
|
|
9
|
+
const DEFAULT_HUB_STAGE_TOP_MIN_MS = 5;
|
|
10
|
+
const REQUEST_STAGE_BREAKDOWNS = new Map();
|
|
7
11
|
function resolveBool(raw, fallback) {
|
|
8
12
|
if (raw === undefined) {
|
|
9
13
|
return fallback;
|
|
@@ -62,6 +66,7 @@ function prune(nowMs) {
|
|
|
62
66
|
for (const [key, timeline] of REQUEST_TIMELINES.entries()) {
|
|
63
67
|
if (nowMs - timeline.lastAtMs >= REQUEST_TIMELINE_TTL_MS) {
|
|
64
68
|
REQUEST_TIMELINES.delete(key);
|
|
69
|
+
REQUEST_STAGE_BREAKDOWNS.delete(key);
|
|
65
70
|
}
|
|
66
71
|
}
|
|
67
72
|
while (REQUEST_TIMELINES.size > REQUEST_TIMELINE_MAX) {
|
|
@@ -70,6 +75,7 @@ function prune(nowMs) {
|
|
|
70
75
|
break;
|
|
71
76
|
}
|
|
72
77
|
REQUEST_TIMELINES.delete(oldestKey);
|
|
78
|
+
REQUEST_STAGE_BREAKDOWNS.delete(oldestKey);
|
|
73
79
|
}
|
|
74
80
|
}
|
|
75
81
|
function touchTiming(requestId) {
|
|
@@ -125,8 +131,82 @@ export function clearHubStageTiming(requestId) {
|
|
|
125
131
|
return;
|
|
126
132
|
}
|
|
127
133
|
REQUEST_TIMELINES.delete(requestId);
|
|
134
|
+
REQUEST_STAGE_BREAKDOWNS.delete(requestId);
|
|
135
|
+
}
|
|
136
|
+
function recordHubStageElapsed(requestId, stage, elapsedMs) {
|
|
137
|
+
if (!requestId || !stage || !Number.isFinite(elapsedMs) || elapsedMs < 0) {
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
const nowMs = Date.now();
|
|
141
|
+
prune(nowMs);
|
|
142
|
+
const byStage = REQUEST_STAGE_BREAKDOWNS.get(requestId) ?? new Map();
|
|
143
|
+
if (!REQUEST_STAGE_BREAKDOWNS.has(requestId)) {
|
|
144
|
+
REQUEST_STAGE_BREAKDOWNS.set(requestId, byStage);
|
|
145
|
+
}
|
|
146
|
+
const existing = byStage.get(stage);
|
|
147
|
+
if (!existing) {
|
|
148
|
+
byStage.set(stage, {
|
|
149
|
+
totalMs: elapsedMs,
|
|
150
|
+
count: 1,
|
|
151
|
+
maxMs: elapsedMs
|
|
152
|
+
});
|
|
153
|
+
return;
|
|
154
|
+
}
|
|
155
|
+
existing.totalMs += elapsedMs;
|
|
156
|
+
existing.count += 1;
|
|
157
|
+
existing.maxMs = Math.max(existing.maxMs, elapsedMs);
|
|
158
|
+
}
|
|
159
|
+
function readIntEnv(name, fallback) {
|
|
160
|
+
const raw = process.env[name];
|
|
161
|
+
const parsed = Number.parseInt(String(raw ?? '').trim(), 10);
|
|
162
|
+
if (Number.isFinite(parsed) && parsed > 0) {
|
|
163
|
+
return parsed;
|
|
164
|
+
}
|
|
165
|
+
return fallback;
|
|
166
|
+
}
|
|
167
|
+
export function peekHubStageTopSummary(requestId, options) {
|
|
168
|
+
if (!requestId) {
|
|
169
|
+
return [];
|
|
170
|
+
}
|
|
171
|
+
const byStage = REQUEST_STAGE_BREAKDOWNS.get(requestId);
|
|
172
|
+
if (!byStage || !byStage.size) {
|
|
173
|
+
return [];
|
|
174
|
+
}
|
|
175
|
+
const topN = Math.max(1, options?.topN ?? readIntEnv('ROUTECODEX_HUB_STAGE_TOP_N', DEFAULT_HUB_STAGE_TOP_N));
|
|
176
|
+
const minMs = Math.max(0, options?.minMs ?? readIntEnv('ROUTECODEX_HUB_STAGE_TOP_MIN_MS', DEFAULT_HUB_STAGE_TOP_MIN_MS));
|
|
177
|
+
return Array.from(byStage.entries())
|
|
178
|
+
.map(([stage, stats]) => {
|
|
179
|
+
const totalMs = Math.max(0, Math.round(stats.totalMs));
|
|
180
|
+
const count = Math.max(0, Math.floor(stats.count));
|
|
181
|
+
const maxMs = Math.max(0, Math.round(stats.maxMs));
|
|
182
|
+
const avgMs = count > 0 ? Math.max(0, Math.round(totalMs / count)) : 0;
|
|
183
|
+
return {
|
|
184
|
+
stage,
|
|
185
|
+
totalMs,
|
|
186
|
+
count,
|
|
187
|
+
avgMs,
|
|
188
|
+
maxMs
|
|
189
|
+
};
|
|
190
|
+
})
|
|
191
|
+
.filter((entry) => entry.totalMs >= minMs)
|
|
192
|
+
.sort((a, b) => b.totalMs - a.totalMs)
|
|
193
|
+
.slice(0, topN);
|
|
128
194
|
}
|
|
129
195
|
export function logHubStageTiming(requestId, stage, phase, details) {
|
|
196
|
+
const stageElapsedMs = phase === 'completed' || phase === 'error'
|
|
197
|
+
? (typeof details?.elapsedMs === 'number'
|
|
198
|
+
? details.elapsedMs
|
|
199
|
+
: typeof details?.nativeMs === 'number'
|
|
200
|
+
? details.nativeMs
|
|
201
|
+
: undefined)
|
|
202
|
+
: undefined;
|
|
203
|
+
if (requestId &&
|
|
204
|
+
stage &&
|
|
205
|
+
typeof stageElapsedMs === 'number' &&
|
|
206
|
+
Number.isFinite(stageElapsedMs) &&
|
|
207
|
+
stageElapsedMs >= 0) {
|
|
208
|
+
recordHubStageElapsed(requestId, stage, stageElapsedMs);
|
|
209
|
+
}
|
|
130
210
|
if (!isHubStageTimingEnabled() || !requestId || !stage) {
|
|
131
211
|
return;
|
|
132
212
|
}
|
|
@@ -200,10 +280,11 @@ export async function measureHubStage(requestId, stage, fn, options) {
|
|
|
200
280
|
return value;
|
|
201
281
|
}
|
|
202
282
|
catch (error) {
|
|
283
|
+
const elapsedMs = Math.max(0, Date.now() - startedAt);
|
|
203
284
|
const mapped = options?.mapErrorDetails?.(error);
|
|
204
285
|
const message = error instanceof Error ? error.message : String(error ?? 'unknown');
|
|
205
286
|
logHubStageTiming(requestId, stage, 'error', mapped ?? {
|
|
206
|
-
elapsedMs
|
|
287
|
+
elapsedMs,
|
|
207
288
|
message
|
|
208
289
|
});
|
|
209
290
|
throw error;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { isJsonObject,
|
|
1
|
+
import { isJsonObject, } from "../../../../types/json.js";
|
|
2
2
|
import { applyHubOperationTableInbound } from "../../../../operation-table/operation-table-runner.js";
|
|
3
3
|
import { recordStage } from "../../../stages/utils.js";
|
|
4
4
|
import { liftReqInboundSemantics } from "./semantic-lift.js";
|
|
@@ -7,6 +7,28 @@ import { chatEnvelopeToStandardizedWithNative } from "../../../../../../router/v
|
|
|
7
7
|
import { normalizeReqInboundShellLikeToolCallsWithNative } from "../../../../../../router/virtual-router/engine-selection/native-hub-pipeline-req-inbound-semantics-tools.js";
|
|
8
8
|
import { fixApplyPatchToolCallsWithNative } from "../../../../../../router/virtual-router/engine-selection/native-compat-action-semantics.js";
|
|
9
9
|
import { isHubStageTimingDetailEnabled, logHubStageTiming, } from "../../../hub-stage-timing.js";
|
|
10
|
+
function buildSlimResponsesContextForSemantics(context) {
|
|
11
|
+
if (!context || typeof context !== "object" || Array.isArray(context)) {
|
|
12
|
+
return undefined;
|
|
13
|
+
}
|
|
14
|
+
// Keep semantic essentials only; avoid carrying full `input` history through
|
|
15
|
+
// chat_process and req_process stages (it can be huge and is not required for
|
|
16
|
+
// non-responses outbound paths).
|
|
17
|
+
//
|
|
18
|
+
// IMPORTANT:
|
|
19
|
+
// Do not spread-clone first and then delete heavy keys. For large /v1/responses
|
|
20
|
+
// payloads that would deep-copy gigantic arrays/strings into a temporary object.
|
|
21
|
+
// Build a filtered object directly to keep this step O(selected fields).
|
|
22
|
+
const src = context;
|
|
23
|
+
const out = {};
|
|
24
|
+
for (const [key, value] of Object.entries(src)) {
|
|
25
|
+
if (key === "input" || key === "__captured_tool_results") {
|
|
26
|
+
continue;
|
|
27
|
+
}
|
|
28
|
+
out[key] = value;
|
|
29
|
+
}
|
|
30
|
+
return out;
|
|
31
|
+
}
|
|
10
32
|
export async function runReqInboundStage2SemanticMap(options) {
|
|
11
33
|
const requestId = options.adapterContext.requestId || "unknown";
|
|
12
34
|
const forceDetailLog = isHubStageTimingDetailEnabled();
|
|
@@ -25,8 +47,12 @@ export async function runReqInboundStage2SemanticMap(options) {
|
|
|
25
47
|
const contextNode = responsesNode && isJsonObject(responsesNode.context)
|
|
26
48
|
? responsesNode.context
|
|
27
49
|
: undefined;
|
|
28
|
-
|
|
50
|
+
// Perf: keep reference instead of deep clone to avoid multi-pass cloning on
|
|
51
|
+
// heavy /v1/responses histories.
|
|
52
|
+
return contextNode;
|
|
29
53
|
})();
|
|
54
|
+
const semanticsResponsesContext = buildSlimResponsesContextForSemantics(preservedResponsesContext) ??
|
|
55
|
+
preservedResponsesContext;
|
|
30
56
|
logHubStageTiming(requestId, "req_inbound.stage2_operation_table_inbound", "start");
|
|
31
57
|
const operationTableStart = Date.now();
|
|
32
58
|
applyHubOperationTableInbound({
|
|
@@ -52,11 +78,11 @@ export async function runReqInboundStage2SemanticMap(options) {
|
|
|
52
78
|
elapsedMs: Date.now() - semanticLiftStart,
|
|
53
79
|
forceLog: forceDetailLog,
|
|
54
80
|
});
|
|
55
|
-
if (
|
|
81
|
+
if (semanticsResponsesContext) {
|
|
56
82
|
const currentSemantics = chatEnvelope.semantics;
|
|
57
83
|
if (!currentSemantics || typeof currentSemantics !== "object") {
|
|
58
84
|
chatEnvelope.semantics = {
|
|
59
|
-
responses: { context:
|
|
85
|
+
responses: { context: semanticsResponsesContext },
|
|
60
86
|
};
|
|
61
87
|
}
|
|
62
88
|
else {
|
|
@@ -69,19 +95,24 @@ export async function runReqInboundStage2SemanticMap(options) {
|
|
|
69
95
|
...semantics,
|
|
70
96
|
responses: {
|
|
71
97
|
...responsesNode,
|
|
72
|
-
context:
|
|
98
|
+
context: semanticsResponsesContext,
|
|
73
99
|
},
|
|
74
100
|
};
|
|
75
101
|
}
|
|
76
102
|
}
|
|
77
103
|
}
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
104
|
+
// openai-responses path already ran request_inbound bridge policy in
|
|
105
|
+
// buildChatRequestFromResponses (including call-id/apply-patch compat actions).
|
|
106
|
+
// Skip duplicate message-wide normalization passes here to reduce heavy-input cost.
|
|
107
|
+
if (options.formatEnvelope.protocol !== "openai-responses") {
|
|
108
|
+
normalizeReqInboundShellLikeToolCallsWithNative(chatEnvelope);
|
|
109
|
+
const fixedApplyPatch = fixApplyPatchToolCallsWithNative({
|
|
110
|
+
messages: (Array.isArray(chatEnvelope.messages)
|
|
111
|
+
? chatEnvelope.messages
|
|
112
|
+
: []),
|
|
113
|
+
});
|
|
114
|
+
chatEnvelope.messages = fixedApplyPatch.messages;
|
|
115
|
+
}
|
|
85
116
|
logHubStageTiming(requestId, "req_inbound.stage2_validate_chat_envelope", "start");
|
|
86
117
|
const validateStart = Date.now();
|
|
87
118
|
validateChatEnvelopeWithNative(chatEnvelope, {
|
|
@@ -107,7 +138,7 @@ export async function runReqInboundStage2SemanticMap(options) {
|
|
|
107
138
|
const envelopeSemantics = chatEnvelope.semantics;
|
|
108
139
|
const existing = standardizedRequest.semantics;
|
|
109
140
|
if (!existing || typeof existing !== "object") {
|
|
110
|
-
standardizedRequest.semantics =
|
|
141
|
+
standardizedRequest.semantics = envelopeSemantics;
|
|
111
142
|
}
|
|
112
143
|
else {
|
|
113
144
|
const existingObj = existing;
|
|
@@ -118,11 +149,13 @@ export async function runReqInboundStage2SemanticMap(options) {
|
|
|
118
149
|
? envelopeResponses.context
|
|
119
150
|
: undefined;
|
|
120
151
|
if (envelopeContext) {
|
|
152
|
+
const slimContext = buildSlimResponsesContextForSemantics(envelopeContext) ??
|
|
153
|
+
envelopeContext;
|
|
121
154
|
const nextResponses = {
|
|
122
155
|
...(isJsonObject(existingObj.responses)
|
|
123
156
|
? existingObj.responses
|
|
124
157
|
: {}),
|
|
125
|
-
context:
|
|
158
|
+
context: slimContext,
|
|
126
159
|
};
|
|
127
160
|
standardizedRequest.semantics = {
|
|
128
161
|
...existingObj,
|
|
@@ -19,6 +19,49 @@ export async function runReqOutboundStage1SemanticMap(options) {
|
|
|
19
19
|
request: options.request,
|
|
20
20
|
adapterContext: options.adapterContext
|
|
21
21
|
});
|
|
22
|
+
// Perf: when outbound target is not /v1/responses, the large responses.context
|
|
23
|
+
// semantic snapshot is not needed for provider request mapping and can cause
|
|
24
|
+
// expensive deep traversals in downstream native mappers/policy actions.
|
|
25
|
+
if (providerProtocol !== 'openai-responses') {
|
|
26
|
+
const semantics = chatEnvelope.semantics;
|
|
27
|
+
const responsesNode = semantics && typeof semantics.responses === 'object' && semantics.responses !== null && !Array.isArray(semantics.responses)
|
|
28
|
+
? semantics.responses
|
|
29
|
+
: undefined;
|
|
30
|
+
if (responsesNode && Object.prototype.hasOwnProperty.call(responsesNode, 'context')) {
|
|
31
|
+
const { context: _unusedContext, ...restResponses } = responsesNode;
|
|
32
|
+
if (Object.keys(restResponses).length > 0) {
|
|
33
|
+
chatEnvelope.semantics = {
|
|
34
|
+
...(semantics ?? {}),
|
|
35
|
+
responses: restResponses
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
else if (semantics && Object.keys(semantics).length > 0) {
|
|
39
|
+
const { responses: _unusedResponses, ...restSemantics } = semantics;
|
|
40
|
+
chatEnvelope.semantics =
|
|
41
|
+
Object.keys(restSemantics).length > 0
|
|
42
|
+
? restSemantics
|
|
43
|
+
: undefined;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
if (providerProtocol === 'openai-responses'
|
|
48
|
+
&& options.contextSnapshot
|
|
49
|
+
&& typeof options.contextSnapshot === 'object'
|
|
50
|
+
&& !Array.isArray(options.contextSnapshot)) {
|
|
51
|
+
const semantics = chatEnvelope.semantics && typeof chatEnvelope.semantics === 'object' && !Array.isArray(chatEnvelope.semantics)
|
|
52
|
+
? chatEnvelope.semantics
|
|
53
|
+
: {};
|
|
54
|
+
const responsesNode = semantics.responses && typeof semantics.responses === 'object' && !Array.isArray(semantics.responses)
|
|
55
|
+
? semantics.responses
|
|
56
|
+
: {};
|
|
57
|
+
chatEnvelope.semantics = {
|
|
58
|
+
...semantics,
|
|
59
|
+
responses: {
|
|
60
|
+
...responsesNode,
|
|
61
|
+
context: options.contextSnapshot
|
|
62
|
+
}
|
|
63
|
+
};
|
|
64
|
+
}
|
|
22
65
|
logHubStageTiming(requestId, 'req_outbound.stage1_native_to_chat_envelope', 'completed', {
|
|
23
66
|
elapsedMs: Date.now() - toChatStart,
|
|
24
67
|
forceLog: forceDetailLog
|