@jsonstudio/llms 0.6.2125 → 0.6.2172

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/dist/conversion/compat/actions/deepseek-web-response.js +27 -3
  2. package/dist/conversion/compat/actions/strip-orphan-function-calls-tag.js +1 -1
  3. package/dist/conversion/hub/pipeline/stages/resp_process/resp_process_stage1_tool_governance/index.js +9 -3
  4. package/dist/conversion/hub/process/chat-process.js +15 -18
  5. package/dist/conversion/responses/responses-openai-bridge.js +13 -12
  6. package/dist/conversion/shared/bridge-message-utils.js +92 -39
  7. package/dist/router/virtual-router/classifier.js +29 -5
  8. package/dist/router/virtual-router/engine/routing-pools/index.js +111 -5
  9. package/dist/router/virtual-router/engine-selection/multimodal-capability.d.ts +3 -0
  10. package/dist/router/virtual-router/engine-selection/multimodal-capability.js +26 -0
  11. package/dist/router/virtual-router/engine-selection/route-utils.js +6 -2
  12. package/dist/router/virtual-router/engine-selection/selection-deps.d.ts +1 -0
  13. package/dist/router/virtual-router/engine-selection/tier-selection.js +2 -0
  14. package/dist/router/virtual-router/engine.d.ts +2 -0
  15. package/dist/router/virtual-router/engine.js +57 -14
  16. package/dist/router/virtual-router/features.js +12 -4
  17. package/dist/router/virtual-router/message-utils.d.ts +8 -0
  18. package/dist/router/virtual-router/message-utils.js +170 -45
  19. package/dist/router/virtual-router/token-counter.js +51 -10
  20. package/dist/router/virtual-router/types.d.ts +3 -0
  21. package/dist/servertool/clock/session-scope.d.ts +3 -0
  22. package/dist/servertool/clock/session-scope.js +52 -0
  23. package/dist/servertool/engine.js +68 -8
  24. package/dist/servertool/handlers/clock-auto.js +2 -8
  25. package/dist/servertool/handlers/clock.js +3 -9
  26. package/dist/servertool/handlers/stop-message-auto/blocked-report.d.ts +16 -0
  27. package/dist/servertool/handlers/stop-message-auto/blocked-report.js +349 -0
  28. package/dist/servertool/handlers/stop-message-auto/iflow-followup.d.ts +23 -0
  29. package/dist/servertool/handlers/stop-message-auto/iflow-followup.js +503 -0
  30. package/dist/servertool/handlers/stop-message-auto/routing-state.d.ts +38 -0
  31. package/dist/servertool/handlers/stop-message-auto/routing-state.js +149 -0
  32. package/dist/servertool/handlers/stop-message-auto/runtime-utils.d.ts +67 -0
  33. package/dist/servertool/handlers/stop-message-auto/runtime-utils.js +387 -0
  34. package/dist/servertool/handlers/stop-message-auto.d.ts +1 -7
  35. package/dist/servertool/handlers/stop-message-auto.js +69 -971
  36. package/dist/servertool/handlers/web-search.js +117 -0
  37. package/package.json +1 -1
@@ -651,17 +651,41 @@ function normalizeFunctionResultsMarkupText(value) {
651
651
  changed: true
652
652
  };
653
653
  }
654
+ function normalizeCommentaryMarkupText(value) {
655
+ const capturedCommentary = [];
656
+ let changed = false;
657
+ const stripped = value.replace(/<\s*commentary\s*>([\s\S]*?)<\s*\/\s*commentary\s*>/gi, (_match, inner) => {
658
+ changed = true;
659
+ const text = typeof inner === 'string' ? inner.trim() : '';
660
+ if (text.length) {
661
+ capturedCommentary.push(text);
662
+ }
663
+ return '';
664
+ });
665
+ if (!changed) {
666
+ return { text: value, changed: false };
667
+ }
668
+ const collapsed = stripped.replace(/\n{3,}/g, '\n\n').trim();
669
+ if (collapsed.length) {
670
+ return { text: collapsed, changed: true };
671
+ }
672
+ return {
673
+ text: capturedCommentary.join('\n\n').trim(),
674
+ changed: true
675
+ };
676
+ }
654
677
  function harvestFunctionResultsMarkup(message) {
655
678
  let harvested = false;
656
679
  const apply = (input) => {
657
680
  if (typeof input !== 'string') {
658
681
  return undefined;
659
682
  }
660
- const normalized = normalizeFunctionResultsMarkupText(input);
661
- if (normalized.changed) {
683
+ const functionResultsNormalized = normalizeFunctionResultsMarkupText(input);
684
+ if (functionResultsNormalized.changed) {
662
685
  harvested = true;
663
686
  }
664
- return normalized.text;
687
+ const commentaryNormalized = normalizeCommentaryMarkupText(functionResultsNormalized.text);
688
+ return commentaryNormalized.text;
665
689
  };
666
690
  if (typeof message.content === 'string') {
667
691
  const next = apply(message.content);
@@ -1,7 +1,7 @@
1
1
  function isRecord(value) {
2
2
  return Boolean(value) && typeof value === 'object' && !Array.isArray(value);
3
3
  }
4
- const ORPHAN_TAG_RE = /^\s*(?:[•*+-]\s*)?<\/?\s*function_calls\s*\/?\s*>\s*$/i;
4
+ const ORPHAN_TAG_RE = /^\s*(?:[•*+-]\s*)?(?:<\/?\s*function_calls\s*\/?\s*>|<\/\s*(?:parameter|function|tool_call)\s*>)\s*$/i;
5
5
  function stripOrphanTagLines(text) {
6
6
  const raw = String(text ?? '');
7
7
  if (!raw)
@@ -2,6 +2,7 @@ import { runChatResponseToolFilters } from '../../../../../shared/tool-filter-pi
2
2
  import { normalizeApplyPatchToolCallsOnResponse } from '../../../../../shared/tool-governor.js';
3
3
  import { buildChatResponseFromResponses } from '../../../../../shared/responses-response-utils.js';
4
4
  import { normalizeAssistantTextToToolCalls } from '../../../../../shared/text-markup-normalizer.js';
5
+ import { stripOrphanFunctionCallsTag } from '../../../../../compat/actions/strip-orphan-function-calls-tag.js';
5
6
  import { ToolGovernanceEngine } from '../../../../tool-governance/index.js';
6
7
  import { recordStage } from '../../../stages/utils.js';
7
8
  const toolGovernanceEngine = new ToolGovernanceEngine();
@@ -92,14 +93,19 @@ function maybeHarvestEmptyToolCallsFromJsonContent(payload) {
92
93
  }
93
94
  return payload;
94
95
  }
96
+ function sanitizeResponseShapeBeforeGovernance(payload) {
97
+ return stripOrphanFunctionCallsTag(payload);
98
+ }
95
99
  export async function runRespProcessStage1ToolGovernance(options) {
96
100
  const canonicalInput = coerceToCanonicalChatCompletion(options.payload);
97
- maybeHarvestEmptyToolCallsFromJsonContent(canonicalInput);
101
+ const shapeSanitizedInput = sanitizeResponseShapeBeforeGovernance(canonicalInput);
102
+ maybeHarvestEmptyToolCallsFromJsonContent(shapeSanitizedInput);
98
103
  recordStage(options.stageRecorder, 'chat_process.resp.stage6.canonicalize_chat_completion', {
99
104
  converted: canonicalInput !== options.payload,
100
- canonicalPayload: canonicalInput
105
+ shapeSanitized: shapeSanitizedInput !== canonicalInput,
106
+ canonicalPayload: shapeSanitizedInput
101
107
  });
102
- const filtered = await runChatResponseToolFilters(canonicalInput, {
108
+ const filtered = await runChatResponseToolFilters(shapeSanitizedInput, {
103
109
  entryEndpoint: options.entryEndpoint,
104
110
  requestId: options.requestId,
105
111
  profile: 'openai-chat'
@@ -8,6 +8,7 @@ import { clearClockSession, parseDueAtMs, resolveClockConfig, reserveDueTasksFor
8
8
  import { logClock } from '../../../servertool/clock/log.js';
9
9
  import { logContinueExecution } from '../../../servertool/continue-execution/log.js';
10
10
  import { buildTimeTagLine, getClockTimeSnapshot } from '../../../servertool/clock/ntp.js';
11
+ import { resolveClockSessionScope } from '../../../servertool/clock/session-scope.js';
11
12
  import { clearPendingServerToolInjection, loadPendingServerToolInjection } from '../../../servertool/pending-session.js';
12
13
  import { loadRoutingInstructionStateSync } from '../../../router/virtual-router/sticky-session-store.js';
13
14
  import { isJsonObject } from '../types/json.js';
@@ -941,8 +942,10 @@ function appendDirectiveToUserContent(content, directive) {
941
942
  return next;
942
943
  }
943
944
  function resolveSessionIdForClock(metadata, request) {
944
- const candidate = readString(metadata.sessionId) ?? readString(request.metadata?.sessionId);
945
- return candidate && candidate.trim() ? candidate.trim() : null;
945
+ const requestMetadata = request.metadata && typeof request.metadata === 'object' && !Array.isArray(request.metadata)
946
+ ? request.metadata
947
+ : null;
948
+ return resolveClockSessionScope(metadata, requestMetadata);
946
949
  }
947
950
  function stripClockClearDirectiveFromText(text) {
948
951
  const pattern = /<\*\*\s*clock\s*:\s*clear\s*\*\*>/gi;
@@ -1279,7 +1282,7 @@ async function maybeInjectClockRemindersAndApplyDirectives(request, metadata, re
1279
1282
  markerToolMessages = markerToolMessages.concat(buildClockMarkerScheduleMessages(requestId, index, marker, {
1280
1283
  ok: false,
1281
1284
  action: 'schedule',
1282
- message: 'clock requires sessionId (x-session-id header or metadata.sessionId).'
1285
+ message: 'clock requires session scope (sessionId/conversationId or clockDaemonId).'
1283
1286
  }));
1284
1287
  continue;
1285
1288
  }
@@ -1613,26 +1616,20 @@ function detectWebSearchIntent(request) {
1613
1616
  if (!messages.length) {
1614
1617
  return { hasIntent: false, googlePreferred: false };
1615
1618
  }
1616
- // 从末尾向前找到最近一条 user 消息,忽略 tool / assistant 的工具调用轮次,
1617
- // 以便在 Responses / 多轮工具调用场景下仍然根据“最近一条用户输入”判断意图。
1618
- let lastUser;
1619
- for (let idx = messages.length - 1; idx >= 0; idx -= 1) {
1620
- const candidate = messages[idx];
1621
- if (candidate && candidate.role === 'user') {
1622
- lastUser = candidate;
1623
- break;
1624
- }
1625
- }
1626
- if (!lastUser) {
1619
+ // 仅使用“当前请求最后一条消息且角色为 user”的文本判断 web_search 意图:
1620
+ // - 不读取系统提示词(system/developer)。
1621
+ // - 不读取历史 user 消息(避免历史上下文误触发)。
1622
+ const lastMessage = messages[messages.length - 1];
1623
+ if (!lastMessage || lastMessage.role !== 'user') {
1627
1624
  return { hasIntent: false, googlePreferred: false };
1628
1625
  }
1629
1626
  // 支持多模态 content:既可能是纯文本字符串,也可能是带 image_url 的分段数组。
1630
1627
  let content = '';
1631
- if (typeof lastUser.content === 'string') {
1632
- content = lastUser.content;
1628
+ if (typeof lastMessage.content === 'string') {
1629
+ content = lastMessage.content;
1633
1630
  }
1634
- else if (Array.isArray(lastUser.content)) {
1635
- const parts = lastUser.content;
1631
+ else if (Array.isArray(lastMessage.content)) {
1632
+ const parts = lastMessage.content;
1636
1633
  const texts = [];
1637
1634
  for (const part of parts) {
1638
1635
  if (typeof part === 'string') {
@@ -205,6 +205,7 @@ function appendLocalImageBlockOnLatestUserInput(messages, context) {
205
205
  }
206
206
  }
207
207
  }
208
+ const unreadableImageNotices = [];
208
209
  for (const imagePath of imagePaths) {
209
210
  let dataUrl = '';
210
211
  try {
@@ -213,21 +214,21 @@ function appendLocalImageBlockOnLatestUserInput(messages, context) {
213
214
  catch (error) {
214
215
  const reason = error instanceof Error
215
216
  ? `${error.code ?? 'READ_FAILED'}: ${error.message}`
216
- : String(error);
217
- throw new ProviderProtocolError('Failed to load local image path from latest user input', {
218
- code: 'MALFORMED_REQUEST',
219
- protocol: 'openai-responses',
220
- providerType: 'responses',
221
- details: {
222
- context: 'buildChatRequestFromResponses.localImagePathAutoload',
223
- requestId: context.requestId,
224
- path: imagePath,
225
- reason
226
- }
227
- });
217
+ : String(error ?? 'READ_FAILED');
218
+ unreadableImageNotices.push(`[local_image_unreadable] 文件不可读,已跳过该图片路径: ${imagePath} (${reason})`);
219
+ continue;
228
220
  }
229
221
  normalizedContent.push({ type: 'image_url', image_url: { url: dataUrl } });
230
222
  }
223
+ if (unreadableImageNotices.length) {
224
+ normalizedContent.push({
225
+ type: 'text',
226
+ text: unreadableImageNotices.join('\n')
227
+ });
228
+ }
229
+ if (!messageHasImageContent(normalizedContent) && !unreadableImageNotices.length) {
230
+ return;
231
+ }
231
232
  latestUserMessage.content = normalizedContent;
232
233
  }
233
234
  // NOTE: 自修复提示已移除(统一标准:不做模糊兜底)。
@@ -60,8 +60,8 @@ function collectText(value) {
60
60
  }
61
61
  return '';
62
62
  }
63
- function extractImageBlocksFromContent(content) {
64
- const images = [];
63
+ function extractMediaBlocksFromContent(content) {
64
+ const media = [];
65
65
  const visit = (value) => {
66
66
  if (!value)
67
67
  return;
@@ -75,14 +75,27 @@ function extractImageBlocksFromContent(content) {
75
75
  }
76
76
  const record = value;
77
77
  const typeValue = typeof record.type === 'string' ? record.type.toLowerCase() : '';
78
+ let kind = null;
78
79
  if (typeValue === 'image' || typeValue === 'image_url' || typeValue === 'input_image') {
80
+ kind = 'image';
81
+ }
82
+ else if (typeValue === 'video' || typeValue === 'video_url' || typeValue === 'input_video') {
83
+ kind = 'video';
84
+ }
85
+ else if (record.video_url !== undefined) {
86
+ kind = 'video';
87
+ }
88
+ else if (record.image_url !== undefined) {
89
+ kind = 'image';
90
+ }
91
+ if (kind) {
79
92
  let url = '';
80
- const imageUrl = record.image_url;
81
- if (typeof imageUrl === 'string') {
82
- url = imageUrl;
93
+ const mediaUrl = kind === 'video' ? record.video_url : record.image_url;
94
+ if (typeof mediaUrl === 'string') {
95
+ url = mediaUrl;
83
96
  }
84
- else if (imageUrl && typeof imageUrl === 'object' && typeof imageUrl.url === 'string') {
85
- url = imageUrl.url;
97
+ else if (mediaUrl && typeof mediaUrl === 'object' && typeof mediaUrl.url === 'string') {
98
+ url = mediaUrl.url;
86
99
  }
87
100
  else if (typeof record.url === 'string') {
88
101
  url = record.url;
@@ -96,13 +109,13 @@ function extractImageBlocksFromContent(content) {
96
109
  const trimmed = url.trim();
97
110
  if (trimmed.length) {
98
111
  let detail;
99
- if (imageUrl && typeof imageUrl === 'object' && typeof imageUrl.detail === 'string') {
100
- detail = imageUrl.detail.trim() || undefined;
112
+ if (mediaUrl && typeof mediaUrl === 'object' && typeof mediaUrl.detail === 'string') {
113
+ detail = mediaUrl.detail.trim() || undefined;
101
114
  }
102
115
  else if (typeof record.detail === 'string') {
103
116
  detail = record.detail.trim() || undefined;
104
117
  }
105
- images.push({ url: trimmed, detail });
118
+ media.push({ kind, url: trimmed, detail });
106
119
  }
107
120
  return;
108
121
  }
@@ -111,7 +124,7 @@ function extractImageBlocksFromContent(content) {
111
124
  }
112
125
  };
113
126
  visit(content);
114
- return images;
127
+ return media;
115
128
  }
116
129
  function extractUserTextFromEntry(entry) {
117
130
  if (!entry || typeof entry !== 'object')
@@ -141,7 +154,7 @@ export function convertMessagesToBridgeInput(options) {
141
154
  const role = coerceBridgeRole(m.role || 'user');
142
155
  const content = m.content;
143
156
  const collectedText = collectText(content);
144
- const imageBlocks = extractImageBlocksFromContent(content);
157
+ const mediaBlocks = extractMediaBlocksFromContent(content);
145
158
  const text = role === 'system' ? collectedText : collectedText.trim();
146
159
  if (role === 'system') {
147
160
  if (collectedText && collectedText.length) {
@@ -218,20 +231,26 @@ export function convertMessagesToBridgeInput(options) {
218
231
  }
219
232
  continue;
220
233
  }
221
- if (typeof text === 'string' || imageBlocks.length) {
234
+ if (typeof text === 'string' || mediaBlocks.length) {
222
235
  const tRole = role === 'assistant' ? 'output_text' : 'input_text';
223
236
  const blocks = [];
224
237
  if (typeof text === 'string' && text.length) {
225
238
  blocks.push({ type: tRole, text });
226
239
  }
227
- for (const img of imageBlocks) {
228
- const block = {
229
- type: 'input_image',
230
- image_url: img.url
231
- };
232
- if (img.detail) {
233
- block.detail = img.detail;
240
+ for (const media of mediaBlocks) {
241
+ const block = media.kind === 'video'
242
+ ? {
243
+ type: 'input_video',
244
+ video_url: media.url
245
+ }
246
+ : {
247
+ type: 'input_image',
248
+ image_url: media.url
249
+ };
250
+ if (media.detail) {
251
+ block.detail = media.detail;
234
252
  }
253
+ ;
235
254
  blocks.push(block);
236
255
  }
237
256
  if (blocks.length) {
@@ -324,7 +343,7 @@ function processMessageBlocks(blocks, normalizeFunctionName, tools, toolNameById
324
343
  const toolMessages = [];
325
344
  let currentLastCall = lastToolCallId;
326
345
  const reasoningSegments = [];
327
- const images = [];
346
+ const mediaBlocks = [];
328
347
  for (const block of blocks) {
329
348
  if (!block || typeof block !== 'object')
330
349
  continue;
@@ -347,17 +366,45 @@ function processMessageBlocks(blocks, normalizeFunctionName, tools, toolNameById
347
366
  toolMessages.push(tm);
348
367
  currentLastCall = nested.lastCallId;
349
368
  reasoningSegments.push(...nested.reasoningSegments);
350
- if (nested.images.length)
351
- images.push(...nested.images);
369
+ if (nested.mediaBlocks.length)
370
+ mediaBlocks.push(...nested.mediaBlocks);
371
+ continue;
372
+ }
373
+ if (type === 'input_image' || type === 'image' || type === 'image_url') {
374
+ let url = '';
375
+ if (typeof block.image_url === 'string') {
376
+ url = block.image_url.trim();
377
+ }
378
+ else if (block.image_url && typeof block.image_url.url === 'string') {
379
+ url = block.image_url.url.trim();
380
+ }
381
+ else if (typeof block.url === 'string') {
382
+ url = block.url.trim();
383
+ }
384
+ if (url) {
385
+ const detail = typeof block.detail === 'string' && block.detail.trim()
386
+ ? block.detail.trim()
387
+ : undefined;
388
+ mediaBlocks.push({ kind: 'image', url, detail });
389
+ }
352
390
  continue;
353
391
  }
354
- if (type === 'input_image') {
355
- const url = typeof block.image_url === 'string' ? block.image_url.trim() : '';
392
+ if (type === 'input_video' || type === 'video' || type === 'video_url') {
393
+ let url = '';
394
+ if (typeof block.video_url === 'string') {
395
+ url = block.video_url.trim();
396
+ }
397
+ else if (block.video_url && typeof block.video_url.url === 'string') {
398
+ url = block.video_url.url.trim();
399
+ }
400
+ else if (typeof block.url === 'string') {
401
+ url = block.url.trim();
402
+ }
356
403
  if (url) {
357
404
  const detail = typeof block.detail === 'string' && block.detail.trim()
358
405
  ? block.detail.trim()
359
406
  : undefined;
360
- images.push({ url, detail });
407
+ mediaBlocks.push({ kind: 'video', url, detail });
361
408
  }
362
409
  continue;
363
410
  }
@@ -421,7 +468,7 @@ function processMessageBlocks(blocks, normalizeFunctionName, tools, toolNameById
421
468
  }
422
469
  }
423
470
  const text = textParts.length ? textParts.join('\n').trim() : null;
424
- return { text, images, toolCalls, toolMessages, lastCallId: currentLastCall, reasoningSegments };
471
+ return { text, mediaBlocks, toolCalls, toolMessages, lastCallId: currentLastCall, reasoningSegments };
425
472
  }
426
473
  export function convertBridgeInputToChatMessages(options) {
427
474
  const { input, tools, normalizeFunctionName, toolResultFallbackText } = options;
@@ -547,17 +594,20 @@ export function convertBridgeInputToChatMessages(options) {
547
594
  for (const msg of nested.toolMessages)
548
595
  messages.push(msg);
549
596
  const normalizedRole = coerceBridgeRole((explicit.role ?? entry.role) || 'user');
550
- if (nested.images.length) {
597
+ if (nested.mediaBlocks.length) {
551
598
  const contentBlocks = [];
552
599
  if (typeof nested.text === 'string' && nested.text.trim().length) {
553
600
  contentBlocks.push({ type: 'text', text: nested.text });
554
601
  }
555
- for (const img of nested.images) {
556
- const imgBlock = { type: 'image_url', image_url: { url: img.url } };
557
- if (img.detail) {
558
- imgBlock.image_url.detail = img.detail;
602
+ for (const media of nested.mediaBlocks) {
603
+ const mediaBlock = media.kind === 'video'
604
+ ? { type: 'video_url', video_url: { url: media.url } }
605
+ : { type: 'image_url', image_url: { url: media.url } };
606
+ if (media.detail) {
607
+ const key = media.kind === 'video' ? 'video_url' : 'image_url';
608
+ mediaBlock[key].detail = media.detail;
559
609
  }
560
- contentBlocks.push(imgBlock);
610
+ contentBlocks.push(mediaBlock);
561
611
  }
562
612
  const msg = {
563
613
  role: normalizedRole,
@@ -590,17 +640,20 @@ export function convertBridgeInputToChatMessages(options) {
590
640
  for (const msg of nested.toolMessages)
591
641
  messages.push(msg);
592
642
  const normalizedRole = coerceBridgeRole(entry.role || 'user');
593
- if (nested.images.length) {
643
+ if (nested.mediaBlocks.length) {
594
644
  const contentBlocks = [];
595
645
  if (typeof nested.text === 'string' && nested.text.trim().length) {
596
646
  contentBlocks.push({ type: 'text', text: nested.text });
597
647
  }
598
- for (const img of nested.images) {
599
- const imgBlock = { type: 'image_url', image_url: { url: img.url } };
600
- if (img.detail) {
601
- imgBlock.image_url.detail = img.detail;
648
+ for (const media of nested.mediaBlocks) {
649
+ const mediaBlock = media.kind === 'video'
650
+ ? { type: 'video_url', video_url: { url: media.url } }
651
+ : { type: 'image_url', image_url: { url: media.url } };
652
+ if (media.detail) {
653
+ const key = media.kind === 'video' ? 'video_url' : 'image_url';
654
+ mediaBlock[key].detail = media.detail;
602
655
  }
603
- contentBlocks.push(imgBlock);
656
+ contentBlocks.push(mediaBlock);
604
657
  }
605
658
  const msg = {
606
659
  role: normalizedRole,
@@ -114,13 +114,15 @@ function detectWebSearchIntent(text) {
114
114
  return false;
115
115
  }
116
116
  const normalized = text.toLowerCase();
117
+ if (isNegativeWebSearchContext(normalized, text)) {
118
+ return false;
119
+ }
117
120
  const directKeywords = [
118
121
  'web search',
119
122
  'web_search',
120
123
  'websearch',
121
124
  'search the web',
122
125
  'internet search',
123
- 'search online',
124
126
  '搜索网页',
125
127
  '联网搜索',
126
128
  '上网搜索',
@@ -133,21 +135,43 @@ function detectWebSearchIntent(text) {
133
135
  return true;
134
136
  }
135
137
  const enVerb = ['search', 'find', 'lookup', 'look up', 'google'];
136
- const enNoun = ['web', 'internet', 'online', 'news', 'latest', 'today'];
138
+ const enNoun = ['web', 'internet', 'online', 'google', 'bing'];
137
139
  const hasEnVerb = enVerb.some((keyword) => normalized.includes(keyword));
138
140
  const hasEnNoun = enNoun.some((keyword) => normalized.includes(keyword));
139
141
  if (hasEnVerb && hasEnNoun) {
140
142
  return true;
141
143
  }
142
- const zhVerb = ['搜索', '查找', '查询', ''];
143
- const zhNoun = ['网络', '联网', '网页', '新闻', '资讯', '实时', '最新', '今天'];
144
+ const zhVerb = ['搜索', '查找', '', '上网查', '上网搜', '联网查', '联网搜'];
145
+ const zhNoun = ['网络', '联网', '网页', '网上', '互联网', '谷歌', '百度'];
144
146
  const hasZhVerb = zhVerb.some((keyword) => text.includes(keyword));
145
147
  const hasZhNoun = zhNoun.some((keyword) => text.includes(keyword));
146
- if (text.includes('上网') || (hasZhVerb && hasZhNoun)) {
148
+ if ((text.includes('上网') || text.includes('联网')) && (text.includes('搜') || text.includes('查'))) {
149
+ return true;
150
+ }
151
+ if (hasZhVerb && hasZhNoun) {
147
152
  return true;
148
153
  }
149
154
  return false;
150
155
  }
156
+ function isNegativeWebSearchContext(normalized, originalText) {
157
+ const englishPatterns = [
158
+ /prefer\s+resources?\s+over\s+web[\s_-]?search/u,
159
+ /prefer[\s\S]{0,40}web[\s_-]?search/u,
160
+ /do\s+not[\s\S]{0,20}web[\s_-]?search/u,
161
+ /don't[\s\S]{0,20}web[\s_-]?search/u,
162
+ /without[\s\S]{0,20}web[\s_-]?search/u,
163
+ /cannot[\s\S]{0,20}web[\s_-]?search/u
164
+ ];
165
+ if (englishPatterns.some((pattern) => pattern.test(normalized))) {
166
+ return true;
167
+ }
168
+ const chinesePatterns = [
169
+ /不能.{0,20}(上网|联网|web[_ -]?search|搜索网页)/u,
170
+ /不要.{0,20}(上网|联网|web[_ -]?search|搜索网页)/u,
171
+ /避免.{0,20}(上网|联网|web[_ -]?search|搜索网页)/u
172
+ ];
173
+ return chinesePatterns.some((pattern) => pattern.test(originalText));
174
+ }
151
175
  function normalizeList(source, fallback) {
152
176
  if (!source || source.length === 0) {
153
177
  return fallback;