@jsonstudio/rcc 0.90.814 → 0.90.876
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -0
- package/configsamples/provider-default/ali-coding-plan/config.v2.json +76 -0
- package/configsamples/provider-default/antigravity/config.v2.json +142 -0
- package/configsamples/provider-default/ark-coding-plan/config.v2.json +64 -0
- package/configsamples/provider-default/crs/config.v2.json +54 -0
- package/configsamples/provider-default/deepseek-web/config.v2.json +56 -0
- package/configsamples/provider-default/gemini/config.v2.json +43 -0
- package/configsamples/provider-default/gemini-cli/config.v2.json +45 -0
- package/configsamples/provider-default/gemini-native/config.v2.json +208 -0
- package/configsamples/provider-default/glm/config.v2.json +33 -0
- package/configsamples/provider-default/glm-anthropic/config.v2.json +29 -0
- package/configsamples/provider-default/kimi/config.v2.json +25 -0
- package/configsamples/provider-default/lmstudio/config.v2.json +79 -0
- package/configsamples/provider-default/lmstudio-proxy/config.v2.json +78 -0
- package/configsamples/provider-default/manifest.json +31 -0
- package/configsamples/provider-default/meituan/config.v2.json +20 -0
- package/configsamples/provider-default/mimo/config.v2.json +26 -0
- package/configsamples/provider-default/modelscope/config.v2.json +81 -0
- package/configsamples/provider-default/my-openai/config.v2.json +20 -0
- package/configsamples/provider-default/nvidia/config.v2.json +32 -0
- package/configsamples/provider-default/opencode-zen-free/config.v2.json +56 -0
- package/configsamples/provider-default/openrouter/config.v2.json +210 -0
- package/configsamples/provider-default/qwen/config.v2.json +38 -0
- package/configsamples/provider-default/qwenchat/config.v2.json +53 -0
- package/configsamples/provider-default/tab/config.v2.json +26 -0
- package/configsamples/provider-default/tabglm/config.v2.json +77 -0
- package/dist/build-info.js +2 -2
- package/dist/cli/commands/config.d.ts +5 -0
- package/dist/cli/commands/config.js +369 -1
- package/dist/cli/commands/config.js.map +1 -1
- package/dist/cli/commands/examples.js +3 -0
- package/dist/cli/commands/examples.js.map +1 -1
- package/dist/cli/commands/init.js +25 -1
- package/dist/cli/commands/init.js.map +1 -1
- package/dist/cli/commands/launcher-kernel.js +122 -46
- package/dist/cli/commands/launcher-kernel.js.map +1 -1
- package/dist/cli/commands/start.js +60 -3
- package/dist/cli/commands/start.js.map +1 -1
- package/dist/cli/config/bundled-provider-pack.d.ts +20 -0
- package/dist/cli/config/bundled-provider-pack.js +146 -0
- package/dist/cli/config/bundled-provider-pack.js.map +1 -0
- package/dist/cli/register/status-config-commands.d.ts +2 -0
- package/dist/cli/register/status-config-commands.js.map +1 -1
- package/dist/cli.js +81 -28
- package/dist/cli.js.map +1 -1
- package/dist/debug/snapshot-store.js +2 -1
- package/dist/debug/snapshot-store.js.map +1 -1
- package/dist/index.js +23 -14
- package/dist/index.js.map +1 -1
- package/dist/manager/modules/quota/provider-quota-daemon.model-backoff.js +1 -1
- package/dist/manager/modules/quota/provider-quota-daemon.model-backoff.js.map +1 -1
- package/dist/manager/quota/provider-quota-center.js +1 -1
- package/dist/manager/quota/provider-quota-center.js.map +1 -1
- package/dist/manager/storage/file-store.js +10 -0
- package/dist/manager/storage/file-store.js.map +1 -1
- package/dist/modules/llmswitch/bridge/snapshot-recorder-runtime.js +18 -1
- package/dist/modules/llmswitch/bridge/snapshot-recorder-runtime.js.map +1 -1
- package/dist/modules/llmswitch/bridge/snapshot-recorder.js +132 -51
- package/dist/modules/llmswitch/bridge/snapshot-recorder.js.map +1 -1
- package/dist/provider-sdk/provider-runtime-inference.js +2 -2
- package/dist/provider-sdk/provider-runtime-inference.js.map +1 -1
- package/dist/providers/auth/deepseek-account-token-acquirer.js +32 -3
- package/dist/providers/auth/deepseek-account-token-acquirer.js.map +1 -1
- package/dist/providers/core/api/provider-types.d.ts +11 -0
- package/dist/providers/core/config/service-profiles.js +1 -1
- package/dist/providers/core/runtime/deepseek-http-provider.d.ts +2 -0
- package/dist/providers/core/runtime/deepseek-http-provider.js +31 -1
- package/dist/providers/core/runtime/deepseek-http-provider.js.map +1 -1
- package/dist/providers/core/runtime/qwenchat-http-provider-helpers.d.ts +3 -2
- package/dist/providers/core/runtime/qwenchat-http-provider-helpers.js +513 -96
- package/dist/providers/core/runtime/qwenchat-http-provider-helpers.js.map +1 -1
- package/dist/providers/core/runtime/standard-tool-text-harvest.d.ts +8 -0
- package/dist/providers/core/runtime/standard-tool-text-harvest.js +24 -0
- package/dist/providers/core/runtime/standard-tool-text-harvest.js.map +1 -0
- package/dist/providers/core/runtime/standard-tool-text-request-transform.d.ts +4 -1
- package/dist/providers/core/runtime/standard-tool-text-request-transform.js +129 -3
- package/dist/providers/core/runtime/standard-tool-text-request-transform.js.map +1 -1
- package/dist/providers/core/utils/snapshot-writer.js +5 -2
- package/dist/providers/core/utils/snapshot-writer.js.map +1 -1
- package/dist/providers/profile/provider-profile-loader.js +52 -1
- package/dist/providers/profile/provider-profile-loader.js.map +1 -1
- package/dist/providers/profile/provider-profile.d.ts +3 -0
- package/dist/server/handlers/handler-response-utils.js +1 -0
- package/dist/server/handlers/handler-response-utils.js.map +1 -1
- package/dist/server/handlers/images-handler.d.ts +9 -0
- package/dist/server/handlers/images-handler.js +258 -0
- package/dist/server/handlers/images-handler.js.map +1 -0
- package/dist/server/handlers/types.d.ts +7 -0
- package/dist/server/runtime/http-server/antigravity-startup-tasks.d.ts +3 -0
- package/dist/server/runtime/http-server/antigravity-startup-tasks.js +16 -0
- package/dist/server/runtime/http-server/antigravity-startup-tasks.js.map +1 -1
- package/dist/server/runtime/http-server/daemon-admin/auth-handler.js +3 -18
- package/dist/server/runtime/http-server/daemon-admin/auth-handler.js.map +1 -1
- package/dist/server/runtime/http-server/daemon-admin/providers-handler-utils.d.ts +7 -0
- package/dist/server/runtime/http-server/daemon-admin/providers-handler-utils.js +17 -0
- package/dist/server/runtime/http-server/daemon-admin/providers-handler-utils.js.map +1 -1
- package/dist/server/runtime/http-server/daemon-admin/providers-handler.js +50 -17
- package/dist/server/runtime/http-server/daemon-admin/providers-handler.js.map +1 -1
- package/dist/server/runtime/http-server/daemon-admin-routes.js +32 -2
- package/dist/server/runtime/http-server/daemon-admin-routes.js.map +1 -1
- package/dist/server/runtime/http-server/executor/provider-response-converter.js +42 -13
- package/dist/server/runtime/http-server/executor/provider-response-converter.js.map +1 -1
- package/dist/server/runtime/http-server/executor/provider-response-utils.js +41 -3
- package/dist/server/runtime/http-server/executor/provider-response-utils.js.map +1 -1
- package/dist/server/runtime/http-server/executor/usage-aggregator.js +7 -7
- package/dist/server/runtime/http-server/executor/usage-aggregator.js.map +1 -1
- package/dist/server/runtime/http-server/executor/usage-logger.d.ts +9 -0
- package/dist/server/runtime/http-server/executor/usage-logger.js +35 -2
- package/dist/server/runtime/http-server/executor/usage-logger.js.map +1 -1
- package/dist/server/runtime/http-server/executor-metadata.js +12 -4
- package/dist/server/runtime/http-server/executor-metadata.js.map +1 -1
- package/dist/server/runtime/http-server/executor-pipeline.js +24 -15
- package/dist/server/runtime/http-server/executor-pipeline.js.map +1 -1
- package/dist/server/runtime/http-server/executor-provider.d.ts +6 -1
- package/dist/server/runtime/http-server/executor-provider.js +137 -5
- package/dist/server/runtime/http-server/executor-provider.js.map +1 -1
- package/dist/server/runtime/http-server/http-server-bootstrap.js +6 -0
- package/dist/server/runtime/http-server/http-server-bootstrap.js.map +1 -1
- package/dist/server/runtime/http-server/http-server-lifecycle.js +23 -15
- package/dist/server/runtime/http-server/http-server-lifecycle.js.map +1 -1
- package/dist/server/runtime/http-server/http-server-runtime-providers.js +14 -4
- package/dist/server/runtime/http-server/http-server-runtime-providers.js.map +1 -1
- package/dist/server/runtime/http-server/http-server-runtime-setup.js +83 -1
- package/dist/server/runtime/http-server/http-server-runtime-setup.js.map +1 -1
- package/dist/server/runtime/http-server/hub-shadow-compare.js +2 -41
- package/dist/server/runtime/http-server/hub-shadow-compare.js.map +1 -1
- package/dist/server/runtime/http-server/provider-routing-scope.d.ts +9 -0
- package/dist/server/runtime/http-server/provider-routing-scope.js +20 -0
- package/dist/server/runtime/http-server/provider-routing-scope.js.map +1 -0
- package/dist/server/runtime/http-server/provider-traffic-governor.d.ts +67 -0
- package/dist/server/runtime/http-server/provider-traffic-governor.js +467 -0
- package/dist/server/runtime/http-server/provider-traffic-governor.js.map +1 -0
- package/dist/server/runtime/http-server/request-executor.d.ts +8 -0
- package/dist/server/runtime/http-server/request-executor.js +446 -21
- package/dist/server/runtime/http-server/request-executor.js.map +1 -1
- package/dist/server/runtime/http-server/routes.js +13 -0
- package/dist/server/runtime/http-server/routes.js.map +1 -1
- package/dist/server/runtime/http-server/session-client-registry.js +30 -4
- package/dist/server/runtime/http-server/session-client-registry.js.map +1 -1
- package/dist/server/runtime/http-server/session-client-route-utils.d.ts +7 -0
- package/dist/server/runtime/http-server/session-client-route-utils.js +38 -0
- package/dist/server/runtime/http-server/session-client-route-utils.js.map +1 -1
- package/dist/server/runtime/http-server/session-client-routes.js +12 -2
- package/dist/server/runtime/http-server/session-client-routes.js.map +1 -1
- package/dist/server/utils/request-id-manager.js +42 -5
- package/dist/server/utils/request-id-manager.js.map +1 -1
- package/dist/server/utils/stage-logger.d.ts +1 -0
- package/dist/server/utils/stage-logger.js +27 -0
- package/dist/server/utils/stage-logger.js.map +1 -1
- package/dist/utils/errorsamples.js +3 -1
- package/dist/utils/errorsamples.js.map +1 -1
- package/dist/utils/sensitive-redaction.d.ts +1 -0
- package/dist/utils/sensitive-redaction.js +122 -0
- package/dist/utils/sensitive-redaction.js.map +1 -0
- package/dist/utils/snapshot-writer.js +162 -2
- package/dist/utils/snapshot-writer.js.map +1 -1
- package/docs/INSTALLATION_AND_QUICKSTART.md +14 -1
- package/docs/PORTS.md +12 -0
- package/docs/lmstudio-tool-calling.md +25 -0
- package/node_modules/@jsonstudio/llms/dist/conversion/compat/actions/qwenchat-web-request.d.ts +3 -0
- package/node_modules/@jsonstudio/llms/dist/conversion/compat/actions/qwenchat-web-request.js +62 -0
- package/node_modules/@jsonstudio/llms/dist/conversion/compat/profiles/chat-qwenchat-web.json +47 -0
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/node-support.js +5 -2
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/operation-table/operation-table-runner.js +68 -7
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/operation-table/semantic-mappers/anthropic-mapper-from-chat.js +138 -3
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-pipeline-chat-process-request-utils.js +24 -0
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-pipeline-execute-chat-process-entry.js +7 -1
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-pipeline-execute-request-stage.js +7 -0
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-pipeline-heavy-input-fastpath.d.ts +24 -0
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-pipeline-heavy-input-fastpath.js +203 -0
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-pipeline-route-and-outbound.js +17 -12
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-stage-timing.d.ts +11 -0
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-stage-timing.js +82 -1
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage2_semantic_map/index.js +47 -14
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage1_semantic_map/index.js +43 -0
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/stages/resp_outbound/resp_outbound_stage1_client_remap/client-remap-protocol-switch.js +222 -19
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/policy/policy-engine.js +2 -2
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/process/chat-process-pending-tool-sync.js +24 -7
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/response/provider-response.js +90 -1
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/snapshot-recorder.d.ts +1 -0
- package/node_modules/@jsonstudio/llms/dist/conversion/hub/snapshot-recorder.js +252 -1
- package/node_modules/@jsonstudio/llms/dist/conversion/responses/responses-openai-bridge/utils.js +5 -3
- package/node_modules/@jsonstudio/llms/dist/conversion/responses/responses-openai-bridge.js +44 -4
- package/node_modules/@jsonstudio/llms/dist/conversion/shared/anthropic-message-utils-openai-request.d.ts +3 -1
- package/node_modules/@jsonstudio/llms/dist/conversion/shared/anthropic-message-utils-openai-request.js +20 -23
- package/node_modules/@jsonstudio/llms/dist/conversion/shared/tool-governor.js +68 -30
- package/node_modules/@jsonstudio/llms/dist/conversion/snapshot-utils.js +194 -10
- package/node_modules/@jsonstudio/llms/dist/native/router_hotpath_napi.node +0 -0
- package/node_modules/@jsonstudio/llms/dist/quota/quota-state.js +2 -2
- package/node_modules/@jsonstudio/llms/dist/router/virtual-router/engine/routing-state/store.js +35 -2
- package/node_modules/@jsonstudio/llms/dist/router/virtual-router/engine.js +9 -9
- package/node_modules/@jsonstudio/llms/dist/router/virtual-router/sticky-session-store.js +104 -18
- package/node_modules/@jsonstudio/llms/dist/servertool/engine.js +79 -32
- package/node_modules/@jsonstudio/llms/dist/servertool/handlers/vision.js +49 -0
- package/node_modules/@jsonstudio/llms/dist/servertool/pending-session.js +48 -2
- package/node_modules/@jsonstudio/llms/dist/servertool/server-side-tools.js +14 -1
- package/node_modules/@jsonstudio/llms/dist/servertool/types.d.ts +1 -0
- package/node_modules/@jsonstudio/llms/package.json +1 -1
- package/node_modules/ajv/dist/compile/jtd/serialize.js +9 -2
- package/node_modules/ajv/dist/compile/jtd/serialize.js.map +1 -1
- package/node_modules/ajv/dist/core.d.ts +1 -0
- package/node_modules/ajv/dist/core.js.map +1 -1
- package/node_modules/ajv/dist/vocabularies/validation/pattern.js +13 -4
- package/node_modules/ajv/dist/vocabularies/validation/pattern.js.map +1 -1
- package/node_modules/ajv/lib/compile/jtd/serialize.ts +13 -2
- package/node_modules/ajv/lib/core.ts +1 -0
- package/node_modules/ajv/lib/vocabularies/validation/pattern.ts +15 -4
- package/node_modules/ajv/package.json +2 -1
- package/package.json +15 -10
- package/scripts/ci/repo-sanity.mjs +23 -2
- package/scripts/ci/secrets-check.mjs +48 -0
- package/scripts/ci/silent-failure-audit.mjs +192 -0
- package/scripts/mock-provider/run-regressions.mjs +1 -0
- package/scripts/monitor/memory-guard.mjs +207 -0
- package/scripts/pack-mode.mjs +32 -36
- package/scripts/publish-rcc.mjs +38 -60
- package/scripts/tests/apply-patch-loop.mjs +1 -0
- package/scripts/tests/blackbox-rcc-vs-routecodex-antigravity.mjs +2 -0
- package/scripts/tools-dev/responses-debug-client/src/index.ts +8 -3
- package/scripts/verify-e2e-toolcall.mjs +1 -0
- package/scripts/verify-install-e2e.mjs +2 -1
package/node_modules/@jsonstudio/llms/dist/conversion/hub/operation-table/operation-table-runner.js
CHANGED
|
@@ -10,7 +10,10 @@ const INBOUND_BRIDGE_SPECS = {
|
|
|
10
10
|
'gemini-chat': { protocol: 'gemini-chat', stage: 'request_inbound', messages: 'chat_envelope' }
|
|
11
11
|
};
|
|
12
12
|
const OUTBOUND_BRIDGE_SPECS = {
|
|
13
|
-
|
|
13
|
+
// openai-chat outbound post-map hooks do not write back `state.messages` into payload.
|
|
14
|
+
// Feeding full payload.messages here only adds O(n) scan cost on large histories.
|
|
15
|
+
// Keep hooks metadata-only by not passing message arrays.
|
|
16
|
+
'openai-chat': { protocol: 'openai-chat', stage: 'request_outbound', messages: 'none', includeCapturedToolResults: true },
|
|
14
17
|
// Keep parity: openai-responses outbound actions should not touch normalized messages.
|
|
15
18
|
'openai-responses': { protocol: 'openai-responses', stage: 'request_outbound', messages: 'none', moduleType: 'openai-responses' },
|
|
16
19
|
'anthropic-messages': { protocol: 'anthropic-messages', stage: 'request_outbound', messages: 'none', includeCapturedToolResults: true },
|
|
@@ -41,24 +44,82 @@ function buildCapturedToolResults(toolOutputs) {
|
|
|
41
44
|
name: entry.name
|
|
42
45
|
}));
|
|
43
46
|
}
|
|
44
|
-
function
|
|
45
|
-
|
|
46
|
-
|
|
47
|
+
function hasToolSignalsInMessages(messages) {
|
|
48
|
+
if (!Array.isArray(messages) || messages.length === 0) {
|
|
49
|
+
return false;
|
|
50
|
+
}
|
|
51
|
+
for (const message of messages) {
|
|
52
|
+
if (!message || typeof message !== 'object') {
|
|
53
|
+
continue;
|
|
54
|
+
}
|
|
55
|
+
const role = typeof message.role === 'string' ? message.role.trim().toLowerCase() : '';
|
|
56
|
+
if (role === 'tool') {
|
|
57
|
+
return true;
|
|
58
|
+
}
|
|
59
|
+
if (typeof message.tool_call_id === 'string' && message.tool_call_id.trim().length > 0) {
|
|
60
|
+
return true;
|
|
61
|
+
}
|
|
62
|
+
const toolCalls = message.tool_calls;
|
|
63
|
+
if (Array.isArray(toolCalls) && toolCalls.length > 0) {
|
|
64
|
+
return true;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
return false;
|
|
68
|
+
}
|
|
69
|
+
function filterToolOnlyActionsWhenNoToolSignals(stage, actions, messages) {
|
|
47
70
|
if (!actions?.length) {
|
|
48
|
-
return;
|
|
71
|
+
return actions;
|
|
49
72
|
}
|
|
50
|
-
|
|
73
|
+
if (stage !== 'request_outbound' && stage !== 'request_inbound') {
|
|
74
|
+
return actions;
|
|
75
|
+
}
|
|
76
|
+
if (hasToolSignalsInMessages(messages)) {
|
|
77
|
+
return actions;
|
|
78
|
+
}
|
|
79
|
+
const toolOnlyActions = new Set([
|
|
80
|
+
'tools.capture-results',
|
|
81
|
+
'tools.normalize-call-ids',
|
|
82
|
+
'compat.fix-apply-patch',
|
|
83
|
+
'tools.ensure-placeholders'
|
|
84
|
+
]);
|
|
85
|
+
return actions.filter((action) => {
|
|
86
|
+
const name = typeof action?.name === 'string' ? action.name.trim().toLowerCase() : '';
|
|
87
|
+
return !toolOnlyActions.has(name);
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
function applyBridgePolicy(spec, options) {
|
|
91
|
+
const bridgePolicy = resolveBridgePolicy({ protocol: spec.protocol, moduleType: spec.moduleType ?? spec.protocol });
|
|
92
|
+
const resolvedActions = resolvePolicyActions(bridgePolicy, spec.stage);
|
|
51
93
|
const messages = spec.messages === 'chat_envelope'
|
|
52
94
|
? options.chatEnvelope.messages
|
|
53
95
|
: spec.messages === 'format_payload_messages'
|
|
54
96
|
? extractPayloadMessages(options.payload)
|
|
55
97
|
: undefined;
|
|
98
|
+
const actions = filterToolOnlyActionsWhenNoToolSignals(spec.stage, resolvedActions, messages);
|
|
99
|
+
if (!actions?.length) {
|
|
100
|
+
return;
|
|
101
|
+
}
|
|
102
|
+
const metadata = options.chatEnvelope.metadata;
|
|
103
|
+
const rawRequestForActionState = (() => {
|
|
104
|
+
if (spec.messages !== 'format_payload_messages') {
|
|
105
|
+
return options.payload;
|
|
106
|
+
}
|
|
107
|
+
if (!messages || !Array.isArray(messages) || messages.length === 0) {
|
|
108
|
+
return options.payload;
|
|
109
|
+
}
|
|
110
|
+
// Performance stop-bleed: avoid duplicating a very large messages[] payload in both
|
|
111
|
+
// `state.messages` and `state.rawRequest.messages`. Bridge actions still receive the
|
|
112
|
+
// canonical messages via `state.messages`.
|
|
113
|
+
const compact = { ...options.payload };
|
|
114
|
+
delete compact.messages;
|
|
115
|
+
return compact;
|
|
116
|
+
})();
|
|
56
117
|
const capturedToolResults = spec.includeCapturedToolResults
|
|
57
118
|
? buildCapturedToolResults(options.chatEnvelope.toolOutputs)
|
|
58
119
|
: undefined;
|
|
59
120
|
const actionState = createBridgeActionState({
|
|
60
121
|
...(messages ? { messages } : {}),
|
|
61
|
-
rawRequest:
|
|
122
|
+
rawRequest: rawRequestForActionState,
|
|
62
123
|
metadata,
|
|
63
124
|
...(capturedToolResults ? { capturedToolResults } : {})
|
|
64
125
|
});
|
|
@@ -1,10 +1,88 @@
|
|
|
1
1
|
import { isJsonObject, jsonClone } from '../../types/json.js';
|
|
2
2
|
import { buildAnthropicRequestFromOpenAIChat } from '../../../codecs/anthropic-openai-codec.js';
|
|
3
|
+
import { buildAnthropicFromOpenAIChatWithNative } from '../../../../router/virtual-router/engine-selection/native-compat-action-semantics.js';
|
|
3
4
|
import { encodeMetadataPassthrough } from '../../../metadata-passthrough.js';
|
|
5
|
+
import { isHubStageTimingDetailEnabled, logHubStageTiming } from '../../pipeline/hub-stage-timing.js';
|
|
4
6
|
import { applyEffortBudget, buildAnthropicThinkingFromConfig, mergeAnthropicOutputConfig, mergeAnthropicThinkingConfig, normalizeAnthropicThinkingConfigFromUnknown, resolveConfiguredAnthropicThinkingBudgets, resolveConfiguredAnthropicThinkingConfig } from './anthropic-thinking-config.js';
|
|
5
7
|
import { appendDroppedFieldAudit, appendLossyFieldAudit, hasExplicitEmptyToolsSemantics, isResponsesOrigin, } from './anthropic-semantics-audit.js';
|
|
6
8
|
import { ANTHROPIC_TOP_LEVEL_FIELDS, PASSTHROUGH_METADATA_PREFIX, PASSTHROUGH_PARAMETERS, RESPONSES_DROPPED_PARAMETER_KEYS, sanitizeAnthropicPayload, } from './anthropic-mapper-config.js';
|
|
9
|
+
const TRUTHY = new Set(['1', 'true', 'yes', 'on']);
|
|
10
|
+
const FALSY = new Set(['0', 'false', 'no', 'off']);
|
|
11
|
+
const DEFAULT_HEAVY_INPUT_THRESHOLD = 120_000;
|
|
12
|
+
function readBooleanEnv(names, fallback) {
|
|
13
|
+
for (const name of names) {
|
|
14
|
+
const raw = process.env[name];
|
|
15
|
+
if (raw === undefined) {
|
|
16
|
+
continue;
|
|
17
|
+
}
|
|
18
|
+
const normalized = String(raw).trim().toLowerCase();
|
|
19
|
+
if (TRUTHY.has(normalized)) {
|
|
20
|
+
return true;
|
|
21
|
+
}
|
|
22
|
+
if (FALSY.has(normalized)) {
|
|
23
|
+
return false;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
return fallback;
|
|
27
|
+
}
|
|
28
|
+
function readPositiveIntEnv(names, fallback) {
|
|
29
|
+
for (const name of names) {
|
|
30
|
+
const raw = process.env[name];
|
|
31
|
+
if (raw === undefined) {
|
|
32
|
+
continue;
|
|
33
|
+
}
|
|
34
|
+
const parsed = Number.parseInt(String(raw).trim(), 10);
|
|
35
|
+
if (Number.isFinite(parsed) && parsed > 0) {
|
|
36
|
+
return parsed;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
return fallback;
|
|
40
|
+
}
|
|
41
|
+
function shouldUseNativeBuild(ctx) {
|
|
42
|
+
const enabled = readBooleanEnv([
|
|
43
|
+
'ROUTECODEX_HUB_FASTPATH_ANTHROPIC_NATIVE_BUILD',
|
|
44
|
+
'RCC_HUB_FASTPATH_ANTHROPIC_NATIVE_BUILD',
|
|
45
|
+
// backward-compatible manual knob
|
|
46
|
+
'ROUTECODEX_HUB_ANTHROPIC_NATIVE_BUILD',
|
|
47
|
+
'RCC_HUB_ANTHROPIC_NATIVE_BUILD',
|
|
48
|
+
], false);
|
|
49
|
+
if (!enabled) {
|
|
50
|
+
return false;
|
|
51
|
+
}
|
|
52
|
+
const threshold = readPositiveIntEnv([
|
|
53
|
+
'ROUTECODEX_HUB_FASTPATH_INPUT_TOKEN_THRESHOLD',
|
|
54
|
+
'RCC_HUB_FASTPATH_INPUT_TOKEN_THRESHOLD',
|
|
55
|
+
], DEFAULT_HEAVY_INPUT_THRESHOLD);
|
|
56
|
+
const rt = ctx.__rt;
|
|
57
|
+
if (rt &&
|
|
58
|
+
typeof rt === 'object' &&
|
|
59
|
+
rt.hubFastpathHeavyInput === true) {
|
|
60
|
+
return true;
|
|
61
|
+
}
|
|
62
|
+
const estimatedInputTokens = ctx.estimatedInputTokens;
|
|
63
|
+
return (typeof estimatedInputTokens === 'number' &&
|
|
64
|
+
Number.isFinite(estimatedInputTokens) &&
|
|
65
|
+
estimatedInputTokens >= threshold);
|
|
66
|
+
}
|
|
67
|
+
function hasAnthropicSystemSemantic(chat) {
|
|
68
|
+
try {
|
|
69
|
+
const sysNode = chat.semantics && typeof chat.semantics === 'object'
|
|
70
|
+
? chat.semantics.system
|
|
71
|
+
: undefined;
|
|
72
|
+
if (!sysNode || typeof sysNode !== 'object' || Array.isArray(sysNode)) {
|
|
73
|
+
return false;
|
|
74
|
+
}
|
|
75
|
+
return Object.prototype.hasOwnProperty.call(sysNode, 'blocks');
|
|
76
|
+
}
|
|
77
|
+
catch {
|
|
78
|
+
return false;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
7
81
|
export function buildAnthropicFormatEnvelopeFromChat(chat, ctx) {
|
|
82
|
+
const requestId = typeof ctx.requestId === 'string' && ctx.requestId.trim().length
|
|
83
|
+
? ctx.requestId
|
|
84
|
+
: 'unknown';
|
|
85
|
+
const forceDetailLog = isHubStageTimingDetailEnabled();
|
|
8
86
|
const model = chat.parameters?.model;
|
|
9
87
|
if (typeof model !== 'string' || !model.trim()) {
|
|
10
88
|
throw new Error('ChatEnvelope.parameters.model is required for anthropic-messages outbound conversion');
|
|
@@ -114,15 +192,72 @@ export function buildAnthropicFormatEnvelopeFromChat(chat, ctx) {
|
|
|
114
192
|
catch {
|
|
115
193
|
// ignore
|
|
116
194
|
}
|
|
117
|
-
const
|
|
118
|
-
|
|
195
|
+
const useNativeBuild = shouldUseNativeBuild(ctx);
|
|
196
|
+
let payloadSource;
|
|
197
|
+
if (useNativeBuild) {
|
|
198
|
+
logHubStageTiming(requestId, 'req_outbound.anthropic.build_request_native', 'start');
|
|
199
|
+
const nativeBuildStartedAt = Date.now();
|
|
200
|
+
try {
|
|
201
|
+
payloadSource = buildAnthropicFromOpenAIChatWithNative(baseRequest, {
|
|
202
|
+
requestId: typeof ctx.requestId === 'string' && ctx.requestId.trim().length
|
|
203
|
+
? ctx.requestId
|
|
204
|
+
: undefined,
|
|
205
|
+
entryEndpoint: typeof ctx.entryEndpoint === 'string' && ctx.entryEndpoint.trim().length
|
|
206
|
+
? ctx.entryEndpoint
|
|
207
|
+
: undefined,
|
|
208
|
+
});
|
|
209
|
+
if (hasAnthropicSystemSemantic(chat) &&
|
|
210
|
+
!Object.prototype.hasOwnProperty.call(payloadSource, 'system')) {
|
|
211
|
+
throw new Error('native_missing_system_semantic_replay');
|
|
212
|
+
}
|
|
213
|
+
logHubStageTiming(requestId, 'req_outbound.anthropic.build_request_native', 'completed', {
|
|
214
|
+
elapsedMs: Date.now() - nativeBuildStartedAt,
|
|
215
|
+
forceLog: forceDetailLog,
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
catch {
|
|
219
|
+
logHubStageTiming(requestId, 'req_outbound.anthropic.build_request_native', 'completed', {
|
|
220
|
+
elapsedMs: Date.now() - nativeBuildStartedAt,
|
|
221
|
+
forceLog: true,
|
|
222
|
+
fallbackToJs: true,
|
|
223
|
+
});
|
|
224
|
+
logHubStageTiming(requestId, 'req_outbound.anthropic.build_request_js_fallback', 'start');
|
|
225
|
+
const jsFallbackStartedAt = Date.now();
|
|
226
|
+
payloadSource = buildAnthropicRequestFromOpenAIChat(baseRequest, {
|
|
227
|
+
requestId,
|
|
228
|
+
});
|
|
229
|
+
logHubStageTiming(requestId, 'req_outbound.anthropic.build_request_js_fallback', 'completed', {
|
|
230
|
+
elapsedMs: Date.now() - jsFallbackStartedAt,
|
|
231
|
+
forceLog: forceDetailLog,
|
|
232
|
+
});
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
else {
|
|
236
|
+
logHubStageTiming(requestId, 'req_outbound.anthropic.build_request_js', 'start');
|
|
237
|
+
const jsBuildStartedAt = Date.now();
|
|
238
|
+
payloadSource = buildAnthropicRequestFromOpenAIChat(baseRequest, {
|
|
239
|
+
requestId,
|
|
240
|
+
});
|
|
241
|
+
logHubStageTiming(requestId, 'req_outbound.anthropic.build_request_js', 'completed', {
|
|
242
|
+
elapsedMs: Date.now() - jsBuildStartedAt,
|
|
243
|
+
forceLog: forceDetailLog,
|
|
244
|
+
});
|
|
245
|
+
}
|
|
246
|
+
logHubStageTiming(requestId, 'req_outbound.anthropic.payload_sanitize', 'start');
|
|
247
|
+
const sanitizeStartedAt = Date.now();
|
|
248
|
+
const payload = sanitizeAnthropicPayload({
|
|
249
|
+
...payloadSource,
|
|
250
|
+
});
|
|
119
251
|
if (baseRequest.thinking !== undefined) {
|
|
120
252
|
payload.thinking = jsonClone(baseRequest.thinking);
|
|
121
253
|
}
|
|
122
254
|
if (baseRequest.output_config !== undefined) {
|
|
123
255
|
payload.output_config = jsonClone(baseRequest.output_config);
|
|
124
256
|
}
|
|
125
|
-
|
|
257
|
+
logHubStageTiming(requestId, 'req_outbound.anthropic.payload_sanitize', 'completed', {
|
|
258
|
+
elapsedMs: Date.now() - sanitizeStartedAt,
|
|
259
|
+
forceLog: forceDetailLog,
|
|
260
|
+
});
|
|
126
261
|
return {
|
|
127
262
|
protocol: 'anthropic-messages',
|
|
128
263
|
direction: 'response',
|
|
@@ -3,6 +3,7 @@ import { buildPassthroughAuditWithNative, readResponsesResumeFromRequestSemantic
|
|
|
3
3
|
import { readRuntimeMetadata } from "../../runtime-metadata.js";
|
|
4
4
|
import { computeRequestTokens } from "../../../router/virtual-router/token-estimator.js";
|
|
5
5
|
import { estimateSessionBoundTokens } from "../process/chat-process-session-usage.js";
|
|
6
|
+
import { isHeavyInputFastpathEnabled, markHeavyInputFastpath, resolveHeavyInputTokenThreshold, roughEstimateInputTokensFromRequest, } from "./hub-pipeline-heavy-input-fastpath.js";
|
|
6
7
|
export function sanitizeStandardizedRequestMessages(standardizedRequest) {
|
|
7
8
|
return {
|
|
8
9
|
...standardizedRequest,
|
|
@@ -37,12 +38,35 @@ export function resolveActiveProcessModeAndAudit(args) {
|
|
|
37
38
|
export function estimateInputTokensForWorkingRequest(args) {
|
|
38
39
|
const { workingRequest, normalizedMetadata } = args;
|
|
39
40
|
try {
|
|
41
|
+
const fastpathEnabled = isHeavyInputFastpathEnabled();
|
|
42
|
+
const threshold = resolveHeavyInputTokenThreshold();
|
|
43
|
+
if (fastpathEnabled && threshold > 0) {
|
|
44
|
+
const roughEstimate = roughEstimateInputTokensFromRequest(workingRequest);
|
|
45
|
+
if (roughEstimate >= threshold) {
|
|
46
|
+
if (normalizedMetadata && typeof normalizedMetadata === "object") {
|
|
47
|
+
normalizedMetadata.estimatedInputTokens = roughEstimate;
|
|
48
|
+
markHeavyInputFastpath({
|
|
49
|
+
metadata: normalizedMetadata,
|
|
50
|
+
estimatedInputTokens: roughEstimate,
|
|
51
|
+
reason: "rough_estimate",
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
return;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
40
57
|
const estimatedTokens = estimateSessionBoundTokens(workingRequest, normalizedMetadata) ?? computeRequestTokens(workingRequest, "");
|
|
41
58
|
if (typeof estimatedTokens === "number" &&
|
|
42
59
|
Number.isFinite(estimatedTokens) &&
|
|
43
60
|
estimatedTokens > 0) {
|
|
44
61
|
if (normalizedMetadata && typeof normalizedMetadata === "object") {
|
|
45
62
|
normalizedMetadata.estimatedInputTokens = estimatedTokens;
|
|
63
|
+
if (fastpathEnabled && estimatedTokens >= threshold) {
|
|
64
|
+
markHeavyInputFastpath({
|
|
65
|
+
metadata: normalizedMetadata,
|
|
66
|
+
estimatedInputTokens: estimatedTokens,
|
|
67
|
+
reason: "full_estimate",
|
|
68
|
+
});
|
|
69
|
+
}
|
|
46
70
|
}
|
|
47
71
|
}
|
|
48
72
|
}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { shouldRecordSnapshots } from "../../snapshot-utils.js";
|
|
2
|
+
import { ensureRuntimeMetadata } from "../../runtime-metadata.js";
|
|
2
3
|
import { REQUEST_STAGE_HOOKS } from "./hub-pipeline-stage-hooks.js";
|
|
3
4
|
import { buildReqInboundSkippedNodeWithNative, coerceStandardizedRequestFromPayloadWithNative, findMappableSemanticsKeysWithNative, liftResponsesResumeIntoSemanticsWithNative, prepareRuntimeMetadataForServertoolsWithNative, syncResponsesContextFromCanonicalMessagesWithNative, } from "../../../router/virtual-router/engine-selection/native-hub-pipeline-orchestration-semantics.js";
|
|
4
5
|
import { runReqProcessStage1ToolGovernance } from "./stages/req_process/req_process_stage1_tool_governance/index.js";
|
|
@@ -7,6 +8,7 @@ import { deriveWorkingRequestFlags, estimateInputTokensForWorkingRequest, propag
|
|
|
7
8
|
import { annotatePassthroughAuditSkipped, appendPassthroughGovernanceSkippedNode, appendToolGovernanceNodeResult, propagateClockReservationToMetadata, } from "./hub-pipeline-chat-process-governance-utils.js";
|
|
8
9
|
import { createSnapshotRecorder } from "../snapshot-recorder.js";
|
|
9
10
|
import { executeRouteAndBuildOutbound } from "./hub-pipeline-route-and-outbound.js";
|
|
11
|
+
import { peekHubStageTopSummary } from "./hub-stage-timing.js";
|
|
10
12
|
export async function executeChatProcessEntryPipeline(args) {
|
|
11
13
|
const { normalized, routerEngine, config } = args;
|
|
12
14
|
const hooks = REQUEST_STAGE_HOOKS[normalized.providerProtocol];
|
|
@@ -119,7 +121,6 @@ export async function executeChatProcessEntryPipeline(args) {
|
|
|
119
121
|
normalizedMetadata: normalized.metadata ??
|
|
120
122
|
(normalized.metadata = {}),
|
|
121
123
|
});
|
|
122
|
-
const normalizedMeta = normalized.metadata;
|
|
123
124
|
// responsesResume is a client-protocol semantic (/v1/responses tool loop) and must live in chat.semantics.
|
|
124
125
|
// Do not read it from metadata once entering chat_process.
|
|
125
126
|
const { responsesResume, hasImageAttachment, serverToolRequired } = deriveWorkingRequestFlags(workingRequest);
|
|
@@ -145,6 +146,11 @@ export async function executeChatProcessEntryPipeline(args) {
|
|
|
145
146
|
enabled: false,
|
|
146
147
|
},
|
|
147
148
|
});
|
|
149
|
+
const hubStageTop = peekHubStageTopSummary(normalized.id);
|
|
150
|
+
if (hubStageTop.length) {
|
|
151
|
+
const rt = ensureRuntimeMetadata(outbound.metadata);
|
|
152
|
+
rt.hubStageTop = hubStageTop;
|
|
153
|
+
}
|
|
148
154
|
return {
|
|
149
155
|
requestId: normalized.id,
|
|
150
156
|
providerPayload: outbound.providerPayload,
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import { executeRequestStageInbound, } from "./hub-pipeline-execute-request-stage-inbound.js";
|
|
2
2
|
import { executeRouteAndBuildOutbound, } from "./hub-pipeline-route-and-outbound.js";
|
|
3
|
+
import { ensureRuntimeMetadata } from "../../runtime-metadata.js";
|
|
4
|
+
import { peekHubStageTopSummary } from "./hub-stage-timing.js";
|
|
3
5
|
export async function executeRequestStagePipeline(args) {
|
|
4
6
|
const { normalized, hooks, routerEngine, config } = args;
|
|
5
7
|
const inbound = await executeRequestStageInbound({
|
|
@@ -30,6 +32,11 @@ export async function executeRequestStagePipeline(args) {
|
|
|
30
32
|
requestId: normalized.id,
|
|
31
33
|
},
|
|
32
34
|
});
|
|
35
|
+
const hubStageTop = peekHubStageTopSummary(normalized.id);
|
|
36
|
+
if (hubStageTop.length) {
|
|
37
|
+
const rt = ensureRuntimeMetadata(outbound.metadata);
|
|
38
|
+
rt.hubStageTop = hubStageTop;
|
|
39
|
+
}
|
|
33
40
|
return {
|
|
34
41
|
requestId: normalized.id,
|
|
35
42
|
providerPayload: outbound.providerPayload,
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { ProcessedRequest, StandardizedRequest } from "../types/standardized.js";
|
|
2
|
+
export declare function isHeavyInputFastpathEnabled(): boolean;
|
|
3
|
+
export declare function shouldUseHeavyInputFastpath(metadata?: Record<string, unknown>): {
|
|
4
|
+
enabled: boolean;
|
|
5
|
+
hit: boolean;
|
|
6
|
+
threshold: number;
|
|
7
|
+
estimatedInputTokens?: number;
|
|
8
|
+
};
|
|
9
|
+
export declare function markHeavyInputFastpath(options: {
|
|
10
|
+
metadata?: Record<string, unknown>;
|
|
11
|
+
estimatedInputTokens?: number;
|
|
12
|
+
reason: "rough_estimate" | "full_estimate" | "metadata_threshold";
|
|
13
|
+
}): void;
|
|
14
|
+
export declare function buildCapturedChatRequestInput(args: {
|
|
15
|
+
workingRequest: StandardizedRequest | ProcessedRequest;
|
|
16
|
+
normalizedMetadata?: Record<string, unknown>;
|
|
17
|
+
}): {
|
|
18
|
+
model?: unknown;
|
|
19
|
+
messages?: unknown;
|
|
20
|
+
tools?: unknown;
|
|
21
|
+
parameters?: unknown;
|
|
22
|
+
};
|
|
23
|
+
export declare function roughEstimateInputTokensFromRequest(request: StandardizedRequest | ProcessedRequest): number;
|
|
24
|
+
export declare function resolveHeavyInputTokenThreshold(): number;
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
import { ensureRuntimeMetadata, readRuntimeMetadata } from "../../runtime-metadata.js";
|
|
2
|
+
const TRUTHY = new Set(["1", "true", "yes", "on"]);
|
|
3
|
+
const FALSY = new Set(["0", "false", "no", "off"]);
|
|
4
|
+
const DEFAULT_INPUT_TOKEN_THRESHOLD = 120_000;
|
|
5
|
+
function readBooleanEnv(names, fallback) {
|
|
6
|
+
for (const name of names) {
|
|
7
|
+
const raw = process.env[name];
|
|
8
|
+
if (raw === undefined) {
|
|
9
|
+
continue;
|
|
10
|
+
}
|
|
11
|
+
const normalized = String(raw).trim().toLowerCase();
|
|
12
|
+
if (TRUTHY.has(normalized)) {
|
|
13
|
+
return true;
|
|
14
|
+
}
|
|
15
|
+
if (FALSY.has(normalized)) {
|
|
16
|
+
return false;
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
return fallback;
|
|
20
|
+
}
|
|
21
|
+
function readPositiveIntEnv(names, fallback) {
|
|
22
|
+
for (const name of names) {
|
|
23
|
+
const raw = process.env[name];
|
|
24
|
+
if (raw === undefined) {
|
|
25
|
+
continue;
|
|
26
|
+
}
|
|
27
|
+
const parsed = Number.parseInt(String(raw).trim(), 10);
|
|
28
|
+
if (Number.isFinite(parsed) && parsed > 0) {
|
|
29
|
+
return parsed;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
return fallback;
|
|
33
|
+
}
|
|
34
|
+
function getConfig() {
|
|
35
|
+
return {
|
|
36
|
+
enabled: readBooleanEnv([
|
|
37
|
+
"ROUTECODEX_HUB_FASTPATH_HEAVY_INPUT",
|
|
38
|
+
"RCC_HUB_FASTPATH_HEAVY_INPUT",
|
|
39
|
+
], true),
|
|
40
|
+
inputTokenThreshold: readPositiveIntEnv([
|
|
41
|
+
"ROUTECODEX_HUB_FASTPATH_INPUT_TOKEN_THRESHOLD",
|
|
42
|
+
"RCC_HUB_FASTPATH_INPUT_TOKEN_THRESHOLD",
|
|
43
|
+
], DEFAULT_INPUT_TOKEN_THRESHOLD),
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
export function isHeavyInputFastpathEnabled() {
|
|
47
|
+
return getConfig().enabled;
|
|
48
|
+
}
|
|
49
|
+
function readEstimatedInputTokens(metadata) {
|
|
50
|
+
if (!metadata || typeof metadata !== "object") {
|
|
51
|
+
return undefined;
|
|
52
|
+
}
|
|
53
|
+
const candidate = typeof metadata.estimatedInputTokens === "number" &&
|
|
54
|
+
Number.isFinite(metadata.estimatedInputTokens)
|
|
55
|
+
? Math.max(0, Math.floor(metadata.estimatedInputTokens))
|
|
56
|
+
: undefined;
|
|
57
|
+
if (candidate && candidate > 0) {
|
|
58
|
+
return candidate;
|
|
59
|
+
}
|
|
60
|
+
const rt = readRuntimeMetadata(metadata);
|
|
61
|
+
const rtCandidate = typeof rt
|
|
62
|
+
?.hubFastpathEstimatedInputTokens === "number" &&
|
|
63
|
+
Number.isFinite(rt.hubFastpathEstimatedInputTokens)
|
|
64
|
+
? Math.max(0, Math.floor(rt.hubFastpathEstimatedInputTokens))
|
|
65
|
+
: undefined;
|
|
66
|
+
return rtCandidate && rtCandidate > 0 ? rtCandidate : undefined;
|
|
67
|
+
}
|
|
68
|
+
export function shouldUseHeavyInputFastpath(metadata) {
|
|
69
|
+
const config = getConfig();
|
|
70
|
+
const estimatedInputTokens = readEstimatedInputTokens(metadata);
|
|
71
|
+
const rt = metadata ? readRuntimeMetadata(metadata) : undefined;
|
|
72
|
+
const runtimeForced = rt &&
|
|
73
|
+
typeof rt.hubFastpathHeavyInput ===
|
|
74
|
+
"boolean" &&
|
|
75
|
+
rt.hubFastpathHeavyInput === true;
|
|
76
|
+
const hit = config.enabled &&
|
|
77
|
+
(runtimeForced ||
|
|
78
|
+
(typeof estimatedInputTokens === "number" &&
|
|
79
|
+
estimatedInputTokens >= config.inputTokenThreshold));
|
|
80
|
+
return {
|
|
81
|
+
enabled: config.enabled,
|
|
82
|
+
hit,
|
|
83
|
+
threshold: config.inputTokenThreshold,
|
|
84
|
+
...(typeof estimatedInputTokens === "number"
|
|
85
|
+
? { estimatedInputTokens }
|
|
86
|
+
: {}),
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
export function markHeavyInputFastpath(options) {
|
|
90
|
+
const { metadata, estimatedInputTokens, reason } = options;
|
|
91
|
+
if (!metadata || typeof metadata !== "object") {
|
|
92
|
+
return;
|
|
93
|
+
}
|
|
94
|
+
const config = getConfig();
|
|
95
|
+
if (!config.enabled) {
|
|
96
|
+
return;
|
|
97
|
+
}
|
|
98
|
+
const rt = ensureRuntimeMetadata(metadata);
|
|
99
|
+
rt.hubFastpathHeavyInput = true;
|
|
100
|
+
rt.hubFastpathReason = reason;
|
|
101
|
+
rt.hubFastpathInputTokenThreshold =
|
|
102
|
+
config.inputTokenThreshold;
|
|
103
|
+
if (typeof estimatedInputTokens === "number" &&
|
|
104
|
+
Number.isFinite(estimatedInputTokens) &&
|
|
105
|
+
estimatedInputTokens > 0) {
|
|
106
|
+
const rounded = Math.max(1, Math.floor(estimatedInputTokens));
|
|
107
|
+
metadata.estimatedInputTokens = rounded;
|
|
108
|
+
rt.hubFastpathEstimatedInputTokens = rounded;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
export function buildCapturedChatRequestInput(args) {
|
|
112
|
+
const { workingRequest, normalizedMetadata } = args;
|
|
113
|
+
const fastpath = shouldUseHeavyInputFastpath(normalizedMetadata);
|
|
114
|
+
if (fastpath.hit) {
|
|
115
|
+
markHeavyInputFastpath({
|
|
116
|
+
metadata: normalizedMetadata,
|
|
117
|
+
estimatedInputTokens: fastpath.estimatedInputTokens,
|
|
118
|
+
reason: "metadata_threshold",
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
// Hard rule: captured request must preserve full semantic payload.
|
|
122
|
+
return {
|
|
123
|
+
model: workingRequest.model,
|
|
124
|
+
messages: workingRequest.messages,
|
|
125
|
+
tools: workingRequest.tools,
|
|
126
|
+
parameters: workingRequest.parameters,
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
function estimateContentChars(content, cap) {
|
|
130
|
+
if (cap <= 0 || content === undefined || content === null) {
|
|
131
|
+
return 0;
|
|
132
|
+
}
|
|
133
|
+
if (typeof content === "string") {
|
|
134
|
+
return Math.min(content.length, cap);
|
|
135
|
+
}
|
|
136
|
+
if (Array.isArray(content)) {
|
|
137
|
+
let used = 0;
|
|
138
|
+
for (const part of content) {
|
|
139
|
+
if (used >= cap) {
|
|
140
|
+
break;
|
|
141
|
+
}
|
|
142
|
+
if (typeof part === "string") {
|
|
143
|
+
used += Math.min(part.length, cap - used);
|
|
144
|
+
continue;
|
|
145
|
+
}
|
|
146
|
+
if (!part || typeof part !== "object") {
|
|
147
|
+
continue;
|
|
148
|
+
}
|
|
149
|
+
const record = part;
|
|
150
|
+
if (typeof record.text === "string") {
|
|
151
|
+
used += Math.min(record.text.length, cap - used);
|
|
152
|
+
}
|
|
153
|
+
else if (typeof record.input_text === "string") {
|
|
154
|
+
used += Math.min(record.input_text.length, cap - used);
|
|
155
|
+
}
|
|
156
|
+
else if (typeof record.output_text === "string") {
|
|
157
|
+
used += Math.min(record.output_text.length, cap - used);
|
|
158
|
+
}
|
|
159
|
+
else {
|
|
160
|
+
used += Math.min(64, cap - used);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
return used;
|
|
164
|
+
}
|
|
165
|
+
return Math.min(64, cap);
|
|
166
|
+
}
|
|
167
|
+
export function roughEstimateInputTokensFromRequest(request) {
|
|
168
|
+
const config = getConfig();
|
|
169
|
+
let chars = 0;
|
|
170
|
+
const charCap = Math.max(config.inputTokenThreshold * 8, 16_384);
|
|
171
|
+
const messages = Array.isArray(request.messages) ? request.messages : [];
|
|
172
|
+
for (const message of messages) {
|
|
173
|
+
if (chars >= charCap) {
|
|
174
|
+
break;
|
|
175
|
+
}
|
|
176
|
+
if (!message || typeof message !== "object") {
|
|
177
|
+
chars += 16;
|
|
178
|
+
continue;
|
|
179
|
+
}
|
|
180
|
+
const record = message;
|
|
181
|
+
if (typeof record.role === "string") {
|
|
182
|
+
chars += Math.min(record.role.length, charCap - chars);
|
|
183
|
+
}
|
|
184
|
+
if (typeof record.name === "string") {
|
|
185
|
+
chars += Math.min(record.name.length, Math.max(0, charCap - chars));
|
|
186
|
+
}
|
|
187
|
+
if (typeof record.tool_call_id === "string") {
|
|
188
|
+
chars += Math.min(record.tool_call_id.length, Math.max(0, charCap - chars));
|
|
189
|
+
}
|
|
190
|
+
chars += estimateContentChars(record.content, Math.max(0, charCap - chars));
|
|
191
|
+
if (Array.isArray(record.tool_calls)) {
|
|
192
|
+
chars += Math.min(record.tool_calls.length * 128, Math.max(0, charCap - chars));
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
if (Array.isArray(request.tools)) {
|
|
196
|
+
chars += request.tools.length * 256;
|
|
197
|
+
}
|
|
198
|
+
const estimated = Math.max(Math.ceil(chars / 3.5), messages.length * 8 + (Array.isArray(request.tools) ? request.tools.length * 32 : 0));
|
|
199
|
+
return Math.max(1, Math.floor(estimated));
|
|
200
|
+
}
|
|
201
|
+
export function resolveHeavyInputTokenThreshold() {
|
|
202
|
+
return getConfig().inputTokenThreshold;
|
|
203
|
+
}
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { jsonClone } from "../types/json.js";
|
|
2
1
|
import { runReqProcessStage2RouteSelect } from "./stages/req_process/req_process_stage2_route_select/index.js";
|
|
3
2
|
import { buildAdapterContextFromNormalized } from "./hub-pipeline-adapter-context.js";
|
|
4
3
|
import { extractSessionIdentifiersFromMetadata } from "./session-identifiers.js";
|
|
@@ -8,6 +7,7 @@ import { logHubStageTiming } from "./hub-stage-timing.js";
|
|
|
8
7
|
import { shouldRecordSnapshots } from "../../snapshot-utils.js";
|
|
9
8
|
import { createSnapshotRecorder } from "../snapshot-recorder.js";
|
|
10
9
|
import { applyOutboundStreamPreferenceWithNative, applyHasImageAttachmentFlagWithNative, buildCapturedChatRequestSnapshotWithNative, buildHubPipelineResultMetadataWithNative, buildReqOutboundNodeResultWithNative, buildRouterMetadataInputWithNative, resolveOutboundStreamIntentWithNative, syncSessionIdentifiersToMetadataWithNative, } from "../../../router/virtual-router/engine-selection/native-hub-pipeline-orchestration-semantics.js";
|
|
10
|
+
import { buildCapturedChatRequestInput } from "./hub-pipeline-heavy-input-fastpath.js";
|
|
11
11
|
export async function executeRouteAndBuildOutbound(args) {
|
|
12
12
|
const { normalized, hooks, routerEngine, config, nodeResults, inboundRecorder, activeProcessMode, responsesResume, serverToolRequired, hasImageAttachment, passthroughAudit, rawRequest, contextSnapshot, semanticMapper, effectivePolicy, shadowCompareBaselineMode, routeSelectTiming, } = args;
|
|
13
13
|
let { workingRequest } = args;
|
|
@@ -16,6 +16,11 @@ export async function executeRouteAndBuildOutbound(args) {
|
|
|
16
16
|
// 便于后续 AdapterContext(响应侧 servertool)也能访问到相同的 sessionId /
|
|
17
17
|
// conversationId,用于 sticky-session 相关逻辑(例如 stopMessage)。
|
|
18
18
|
const normalizedMetadata = normalized.metadata;
|
|
19
|
+
const routeRuntimeDirectives = normalizedMetadata &&
|
|
20
|
+
typeof normalizedMetadata.__rt === "object" &&
|
|
21
|
+
!Array.isArray(normalizedMetadata.__rt)
|
|
22
|
+
? normalizedMetadata.__rt
|
|
23
|
+
: undefined;
|
|
19
24
|
if (normalizedMetadata && typeof normalizedMetadata === "object") {
|
|
20
25
|
const next = syncSessionIdentifiersToMetadataWithNative({
|
|
21
26
|
metadata: normalizedMetadata,
|
|
@@ -43,6 +48,11 @@ export async function executeRouteAndBuildOutbound(args) {
|
|
|
43
48
|
conversationId: sessionIdentifiers.conversationId,
|
|
44
49
|
metadata: normalizedMetadata,
|
|
45
50
|
});
|
|
51
|
+
if (routeRuntimeDirectives) {
|
|
52
|
+
metadataInput.__rt = {
|
|
53
|
+
...routeRuntimeDirectives,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
46
56
|
if (routeSelectTiming?.enabled) {
|
|
47
57
|
logHubStageTiming(routeSelectTiming.requestId ?? normalized.id, "req_process.stage2_route_select", "start");
|
|
48
58
|
}
|
|
@@ -60,7 +70,8 @@ export async function executeRouteAndBuildOutbound(args) {
|
|
|
60
70
|
try {
|
|
61
71
|
const logger = (normalized.metadata &&
|
|
62
72
|
normalized.metadata.logger);
|
|
63
|
-
if (
|
|
73
|
+
if (routeRuntimeDirectives?.disableVirtualRouterHitLog !== true &&
|
|
74
|
+
logger &&
|
|
64
75
|
typeof logger.logVirtualRouterHit === "function" &&
|
|
65
76
|
routing.decision?.routeName &&
|
|
66
77
|
routing.target?.providerKey) {
|
|
@@ -141,16 +152,10 @@ export async function executeRouteAndBuildOutbound(args) {
|
|
|
141
152
|
// 注意:这里不再根据 processMode(passthrough/chat) 做分支判断——即使某些
|
|
142
153
|
// route 将 processMode 标记为 passthrough,我们仍然需要保留一次规范化后的
|
|
143
154
|
// Chat 请求快照,供 stopMessage 等被动触发型 servertool 在响应阶段使用。
|
|
144
|
-
const capturedChatRequest = buildCapturedChatRequestSnapshotWithNative({
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
? jsonClone(workingRequest.tools)
|
|
149
|
-
: workingRequest.tools,
|
|
150
|
-
parameters: workingRequest.parameters
|
|
151
|
-
? jsonClone(workingRequest.parameters)
|
|
152
|
-
: workingRequest.parameters,
|
|
153
|
-
});
|
|
155
|
+
const capturedChatRequest = buildCapturedChatRequestSnapshotWithNative(buildCapturedChatRequestInput({
|
|
156
|
+
workingRequest,
|
|
157
|
+
normalizedMetadata: normalized.metadata,
|
|
158
|
+
}));
|
|
154
159
|
const metadata = buildHubPipelineResultMetadataWithNative({
|
|
155
160
|
normalized: {
|
|
156
161
|
metadata: normalized.metadata,
|