@jsonstudio/llms 0.6.3539 → 0.6.3541

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  import { buildNativeReqOutboundCompatAdapterContext } from '../../hub/pipeline/compat/native-adapter-context.js';
2
2
  import { loadNativeRouterHotpathBindingForInternalUse } from '../../../router/virtual-router/engine-selection/native-router-hotpath.js';
3
3
  import { isNativeDisabledByEnv, makeNativeRequiredError } from '../../../router/virtual-router/engine-selection/native-router-hotpath-policy.js';
4
+ import { providerErrorCenter } from '../../../router/virtual-router/error-center.js';
4
5
  const CAPABILITY = 'runRespInboundStage3CompatJson';
5
6
  const PROFILE = 'chat:deepseek-web';
6
7
  const DEFAULT_PROVIDER_PROTOCOL = 'openai-chat';
@@ -28,6 +29,44 @@ const readToolProtocol = (value) => {
28
29
  const normalized = value.trim().toLowerCase();
29
30
  return normalized === 'native' || normalized === 'text' ? normalized : undefined;
30
31
  };
32
+ function buildRuntimeMetadata(adapterContext, payload, details) {
33
+ const contextRecord = adapterContext && typeof adapterContext === 'object'
34
+ ? adapterContext
35
+ : undefined;
36
+ const runtime = {};
37
+ const assignString = (key, value) => {
38
+ if (typeof value === 'string' && value.trim()) {
39
+ runtime[key] = value.trim();
40
+ }
41
+ };
42
+ assignString('requestId', contextRecord?.requestId);
43
+ assignString('providerProtocol', contextRecord?.providerProtocol);
44
+ assignString('providerId', contextRecord?.providerId);
45
+ assignString('providerKey', contextRecord?.providerKey);
46
+ assignString('runtimeKey', contextRecord?.runtimeKey);
47
+ assignString('routeName', contextRecord?.routeId);
48
+ assignString('pipelineId', PROFILE);
49
+ if (payload && typeof payload === 'object') {
50
+ assignString('target', payload.model);
51
+ }
52
+ if (details && Object.keys(details).length > 0) {
53
+ runtime.details = details;
54
+ }
55
+ return runtime;
56
+ }
57
+ function emitCompatError(error, adapterContext, payload, details) {
58
+ providerErrorCenter.emit({
59
+ code: 'DEEPSEEK_WEB_COMPAT_ERROR',
60
+ message: error.message,
61
+ stage: 'compat:deepseek-web-response',
62
+ runtime: buildRuntimeMetadata(adapterContext, payload, details),
63
+ details: {
64
+ compatibilityProfile: PROFILE,
65
+ ...(details ?? {})
66
+ }
67
+ });
68
+ throw error;
69
+ }
31
70
  function resolveDeepseekNode(adapterContext, config) {
32
71
  const nativeContext = buildNativeReqOutboundCompatAdapterContext(adapterContext);
33
72
  const baseNode = isRecord(nativeContext.deepseek) ? nativeContext.deepseek : {};
@@ -72,31 +111,49 @@ function parseCompatOutput(raw) {
72
111
  }
73
112
  return parsed;
74
113
  }
75
- function callDeepSeekWebResponseCompat(input) {
114
+ function callDeepSeekWebResponseCompat(input, adapterContext) {
76
115
  if (isNativeDisabledByEnv()) {
77
- throw makeNativeRequiredError(CAPABILITY, 'native disabled');
116
+ emitCompatError(makeNativeRequiredError(CAPABILITY, 'native disabled'), adapterContext, input.payload, {
117
+ reason: 'native disabled'
118
+ });
78
119
  }
79
120
  const binding = loadNativeRouterHotpathBindingForInternalUse();
80
121
  const fn = binding?.[CAPABILITY];
81
122
  if (typeof fn !== 'function') {
82
- throw makeNativeRequiredError(CAPABILITY);
123
+ emitCompatError(makeNativeRequiredError(CAPABILITY), adapterContext, input.payload, {
124
+ reason: 'missing native export'
125
+ });
83
126
  }
84
127
  let inputJson;
85
128
  try {
86
129
  inputJson = JSON.stringify(input);
87
130
  }
88
131
  catch {
89
- throw makeNativeRequiredError(CAPABILITY, 'json stringify failed');
132
+ emitCompatError(makeNativeRequiredError(CAPABILITY, 'json stringify failed'), adapterContext, input.payload, {
133
+ reason: 'json stringify failed'
134
+ });
135
+ }
136
+ try {
137
+ const raw = fn(inputJson);
138
+ if (typeof raw !== 'string' || !raw) {
139
+ emitCompatError(makeNativeRequiredError(CAPABILITY, 'empty result'), adapterContext, input.payload, {
140
+ reason: 'empty result'
141
+ });
142
+ }
143
+ return parseCompatOutput(raw);
90
144
  }
91
- const raw = fn(inputJson);
92
- if (typeof raw !== 'string' || !raw) {
93
- throw makeNativeRequiredError(CAPABILITY, 'empty result');
145
+ catch (error) {
146
+ const compatError = error instanceof Error ? error : new Error(String(error));
147
+ emitCompatError(compatError, adapterContext, input.payload, {
148
+ reason: 'native compat execution failed'
149
+ });
94
150
  }
95
- return parseCompatOutput(raw);
96
151
  }
97
152
  export function applyDeepSeekWebResponseTransform(payload, adapterContext, config) {
98
153
  if (!payload || typeof payload !== 'object') {
99
- return payload;
154
+ emitCompatError(new Error('[deepseek-web] invalid compat payload: expected object'), adapterContext, payload, {
155
+ reason: 'payload is not an object'
156
+ });
100
157
  }
101
- return callDeepSeekWebResponseCompat(buildCompatInput(payload, adapterContext, config)).payload;
158
+ return callDeepSeekWebResponseCompat(buildCompatInput(payload, adapterContext, config), adapterContext).payload;
102
159
  }
@@ -1,6 +1,7 @@
1
1
  const COOLDOWN_SCHEDULE_429_MS = [3_000, 10_000, 31_000, 61_000];
2
2
  const COOLDOWN_SCHEDULE_FATAL_MS = [3_000, 10_000, 31_000, 61_000];
3
3
  const COOLDOWN_SCHEDULE_DEFAULT_MS = [3_000, 10_000, 31_000, 61_000];
4
+ const COOLDOWN_SCHEDULE_TRANSIENT_KEEP_POOL_MS = [3_000, 5_000, 10_000, 31_000];
4
5
  const ERROR_CHAIN_WINDOW_MS = 10 * 60_000;
5
6
  const NETWORK_ERROR_CODES = [
6
7
  'ECONNRESET',
@@ -84,10 +85,26 @@ function computeCooldownMsBySeries(series, consecutive) {
84
85
  const idx = Math.min(consecutive - 1, schedule.length - 1);
85
86
  return schedule[idx] ?? null;
86
87
  }
88
+ function shouldKeepProviderInPoolDuringCooldown(series, consecutive) {
89
+ if (consecutive <= 0) {
90
+ return false;
91
+ }
92
+ return (series === 'ENET' || series === 'E5XX' || series === 'EOTHER') && consecutive <= 2;
93
+ }
94
+ function shouldAccumulateBySeries(series) {
95
+ return series === 'ENET' || series === 'E5XX' || series === 'EOTHER';
96
+ }
97
+ function computeTransientKeepPoolCooldownMs(series, consecutive) {
98
+ if (!shouldKeepProviderInPoolDuringCooldown(series, consecutive)) {
99
+ return null;
100
+ }
101
+ const idx = Math.min(consecutive - 1, COOLDOWN_SCHEDULE_TRANSIENT_KEEP_POOL_MS.length - 1);
102
+ return COOLDOWN_SCHEDULE_TRANSIENT_KEEP_POOL_MS[idx] ?? null;
103
+ }
87
104
  export function tickQuotaStateTime(state, nowMs) {
88
105
  let next = state;
89
106
  if (typeof next.cooldownUntil === 'number' && next.cooldownUntil <= nowMs) {
90
- next = { ...next, cooldownUntil: null };
107
+ next = { ...next, cooldownUntil: null, cooldownKeepsPool: undefined };
91
108
  }
92
109
  if (typeof next.blacklistUntil === 'number' && next.blacklistUntil <= nowMs) {
93
110
  next = { ...next, blacklistUntil: null };
@@ -107,14 +124,15 @@ export function tickQuotaStateTime(state, nowMs) {
107
124
  return next;
108
125
  }
109
126
  if (inCooldown) {
110
- if (next.inPool !== false || next.reason !== 'cooldown') {
111
- next = { ...next, inPool: false, reason: 'cooldown' };
127
+ const keepInPool = next.cooldownKeepsPool === true;
128
+ if (next.inPool !== keepInPool || next.reason !== 'cooldown') {
129
+ next = { ...next, inPool: keepInPool, reason: 'cooldown' };
112
130
  }
113
131
  return next;
114
132
  }
115
133
  // TTLs expired: only auto-reset "cooldown/blacklist" back to ok.
116
134
  if (next.reason === 'cooldown' || next.reason === 'blacklist') {
117
- next = { ...next, inPool: true, reason: 'ok' };
135
+ next = { ...next, inPool: true, reason: 'ok', cooldownKeepsPool: undefined };
118
136
  }
119
137
  return next;
120
138
  }
@@ -131,7 +149,9 @@ export function applyErrorEvent(state, event, nowMs = event.timestampMs ?? Date.
131
149
  const withinChainWindow = typeof lastAt === 'number' &&
132
150
  nowMs - lastAt >= 0 &&
133
151
  nowMs - lastAt <= ERROR_CHAIN_WINDOW_MS;
134
- const sameErrorKey = withinChainWindow && state.lastErrorCode === errorKey;
152
+ const sameErrorKey = withinChainWindow &&
153
+ (state.lastErrorCode === errorKey ||
154
+ (shouldAccumulateBySeries(series) && state.lastErrorSeries === series));
135
155
  const schedule = series === 'E429'
136
156
  ? COOLDOWN_SCHEDULE_429_MS
137
157
  : series === 'EFATAL'
@@ -139,7 +159,7 @@ export function applyErrorEvent(state, event, nowMs = event.timestampMs ?? Date.
139
159
  : COOLDOWN_SCHEDULE_DEFAULT_MS;
140
160
  const rawNextCount = sameErrorKey ? state.consecutiveErrorCount + 1 : 1;
141
161
  const nextCount = rawNextCount > schedule.length ? 1 : rawNextCount;
142
- const cooldownMs = computeCooldownMsBySeries(series, nextCount);
162
+ const cooldownMs = computeTransientKeepPoolCooldownMs(series, nextCount) ?? computeCooldownMsBySeries(series, nextCount);
143
163
  const nextUntil = cooldownMs ? nowMs + cooldownMs : null;
144
164
  const existingUntil = typeof state.cooldownUntil === 'number' ? state.cooldownUntil : null;
145
165
  const cooldownUntil = typeof nextUntil === 'number' && Number.isFinite(nextUntil)
@@ -149,12 +169,14 @@ export function applyErrorEvent(state, event, nowMs = event.timestampMs ?? Date.
149
169
  : existingUntil;
150
170
  const inCooldown = typeof cooldownUntil === 'number' && cooldownUntil > nowMs;
151
171
  const inBlacklist = typeof state.blacklistUntil === 'number' && state.blacklistUntil > nowMs;
152
- const inPool = !inCooldown && !inBlacklist;
172
+ const cooldownKeepsPool = shouldKeepProviderInPoolDuringCooldown(series, nextCount);
173
+ const inPool = !inBlacklist && (!inCooldown || cooldownKeepsPool);
153
174
  return {
154
175
  ...state,
155
176
  inPool,
156
177
  reason: inBlacklist ? 'blacklist' : inCooldown ? 'cooldown' : 'ok',
157
178
  cooldownUntil,
179
+ cooldownKeepsPool: inCooldown ? cooldownKeepsPool : undefined,
158
180
  lastErrorSeries: series,
159
181
  lastErrorCode: errorKey,
160
182
  lastErrorAtMs: nowMs,
@@ -26,6 +26,7 @@ export interface QuotaState {
26
26
  authIssue?: QuotaAuthIssue;
27
27
  priorityTier: number;
28
28
  cooldownUntil: number | null;
29
+ cooldownKeepsPool?: boolean;
29
30
  blacklistUntil: number | null;
30
31
  lastErrorSeries: ErrorSeries | null;
31
32
  lastErrorCode: string | null;
@@ -1,4 +1,5 @@
1
1
  import { buildChatRequestFromResponses, captureResponsesContext } from '../../conversion/responses/responses-openai-bridge.js';
2
+ import { stripHistoricalImageAttachments } from '../../conversion/hub/process/chat-process-media.js';
2
3
  import { cloneJson } from '../server-side-tools.js';
3
4
  import { trimOpenAiMessagesForFollowup } from './followup-message-trimmer.js';
4
5
  function extractResponsesTopLevelParameters(record) {
@@ -74,9 +75,13 @@ export function normalizeFollowupParameters(value) {
74
75
  return undefined;
75
76
  }
76
77
  const cloned = cloneJson(value);
77
- // Followup requests are always non-streaming (servertool orchestration enforces this),
78
- // so remove any inherited stream hints to avoid conflicting flags.
78
+ // Followup requests are always re-entered as a fresh hop:
79
+ // - non-streaming (servertool orchestration enforces this)
80
+ // - no inherited tool-selection hints, otherwise the resumed turn can be biased toward
81
+ // immediately calling tools again instead of consuming the tool outputs that were just injected.
82
+ // Keep `parallel_tool_calls` inherited; provider compat can still disable it selectively.
79
83
  delete cloned.stream;
84
+ delete cloned.tool_choice;
80
85
  return Object.keys(cloned).length ? cloned : undefined;
81
86
  }
82
87
  export function dropToolByFunctionName(tools, dropName) {
@@ -418,6 +423,11 @@ export function buildServerToolFollowupChatPayloadFromInjection(args) {
418
423
  return null;
419
424
  }
420
425
  let messages = Array.isArray(seed.messages) ? cloneJson(seed.messages) : [];
426
+ // ServerTool followups must enter marker/routing/chat-process analysis with the same
427
+ // historical-media invariants as normal chat-process requests:
428
+ // only the latest live user turn may keep inline image payloads; earlier user turns
429
+ // are scrubbed to placeholders before any followup ops append new assistant/user items.
430
+ messages = stripHistoricalImageAttachments(messages);
421
431
  const ops = Array.isArray(args.injection?.ops) ? args.injection.ops : [];
422
432
  // Followup is a normal request hop: inherit tool schema from the captured request and
423
433
  // let compat/tool-governance apply standard sanitization rules.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jsonstudio/llms",
3
- "version": "0.6.3539",
3
+ "version": "0.6.3541",
4
4
  "type": "module",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",