npm - @zhin.js/agent - Versions diffs - 0.0.13 → 0.0.15 - Mend

@zhin.js/agent 0.0.13 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/CHANGELOG.md +18 -0
package/lib/builtin-tools.d.ts +17 -2
package/lib/builtin-tools.d.ts.map +1 -1
package/lib/builtin-tools.js +57 -14
package/lib/builtin-tools.js.map +1 -1
package/lib/init/create-zhin-agent.d.ts.map +1 -1
package/lib/init/create-zhin-agent.js +5 -2
package/lib/init/create-zhin-agent.js.map +1 -1
package/lib/init/register-ai-trigger.d.ts.map +1 -1
package/lib/init/register-ai-trigger.js +85 -24
package/lib/init/register-ai-trigger.js.map +1 -1
package/lib/init/register-builtin-tools.d.ts.map +1 -1
package/lib/init/register-builtin-tools.js +8 -5
package/lib/init/register-builtin-tools.js.map +1 -1
package/lib/zhin-agent/exec-policy.d.ts.map +1 -1
package/lib/zhin-agent/exec-policy.js +5 -3
package/lib/zhin-agent/exec-policy.js.map +1 -1
package/lib/zhin-agent/index.d.ts.map +1 -1
package/lib/zhin-agent/index.js +31 -6
package/lib/zhin-agent/index.js.map +1 -1
package/lib/zhin-agent/prompt.d.ts.map +1 -1
package/lib/zhin-agent/prompt.js +12 -1
package/lib/zhin-agent/prompt.js.map +1 -1
package/lib/zhin-agent/tool-collector.d.ts.map +1 -1
package/lib/zhin-agent/tool-collector.js +10 -3
package/lib/zhin-agent/tool-collector.js.map +1 -1
package/package.json +3 -3
package/src/builtin-tools.ts +61 -15
package/src/init/create-zhin-agent.ts +5 -2
package/src/init/register-ai-trigger.ts +85 -23
package/src/init/register-builtin-tools.ts +14 -5
package/src/zhin-agent/exec-policy.ts +5 -3
package/src/zhin-agent/index.ts +38 -6
package/src/zhin-agent/prompt.ts +11 -1
package/src/zhin-agent/tool-collector.ts +10 -3
package/tests/ai/multimodal.test.ts +106 -0
package/tests/ai/zhin-agent.test.ts +130 -1
package/tests/file-policy.test.ts +1 -1

package/src/init/register-ai-trigger.ts CHANGED Viewed

@@ -8,27 +8,80 @@ import type { ContentPart } from '@zhin.js/core';
 import type { OutputElement } from '../output.js';
 import type { AIServiceRefs } from './shared-refs.js';
-function extractImageUrls(message: Message<any>): string[] {
-  const urls: string[] = [];
-  const raw = typeof message.$raw === 'string' ? message.$raw : JSON.stringify(message.$raw || '');
-  const xmlMatches = raw.match(/<image[^>]+url="([^"]+)"/g);
-  if (xmlMatches) {
-    for (const m of xmlMatches) {
-      const urlMatch = m.match(/url="([^"]+)"/);
-      if (urlMatch) urls.push(urlMatch[1]);
+/**
+ * Extract multimodal ContentPart[] from a Message's structured $content segments.
+ * Handles image, video, audio, and face/sticker types.
+ * Falls back to raw string parsing for image URLs when $content has no media segments.
+ */
+function extractMediaParts(message: Message<any>): ContentPart[] {
+  const parts: ContentPart[] = [];
+  // 1. Extract from structured $content segments
+  if (Array.isArray(message.$content)) {
+    for (const seg of message.$content) {
+      if (typeof seg === 'string' || !seg || !seg.type) continue;
+      const { type, data } = seg;
+      switch (type) {
+        case 'image': {
+          const url = data?.url || data?.file || data?.src;
+          if (url) parts.push({ type: 'image_url', image_url: { url } });
+          break;
+        }
+        case 'video': {
+          const url = data?.url || data?.file || data?.src;
+          if (url) parts.push({ type: 'video_url', video_url: { url } });
+          break;
+        }
+        case 'audio':
+        case 'record':
+        case 'voice': {
+          const dataStr = data?.data || data?.base64;
+          if (dataStr) {
+            const fmt = data?.format === 'wav' ? 'wav' : 'mp3';
+            parts.push({ type: 'audio', audio: { data: dataStr, format: fmt } });
+          } else {
+            const url = data?.url || data?.file || data?.src;
+            if (url) {
+              // Audio URL: describe as text since most LLMs can't play audio URLs directly
+              parts.push({ type: 'text', text: `[用户发送了一段语音: ${url}]` });
+            }
+          }
+          break;
+        }
+        case 'face':
+        case 'sticker':
+        case 'emoji': {
+          const id = String(data?.id ?? data?.face_id ?? '');
+          const text = data?.text || data?.name || data?.describe;
+          if (id) parts.push({ type: 'face', face: { id, text } });
+          break;
+        }
+      }
     }
   }
-  const cqMatches = raw.match(/\[CQ:image[^\]]*url=([^\],]+)/g);
-  if (cqMatches) {
-    for (const m of cqMatches) {
-      const urlMatch = m.match(/url=([^\],]+)/);
-      if (urlMatch) urls.push(urlMatch[1]);
+  // 2. Fallback: parse image URLs from $raw for adapters that don't use structured $content
+  if (parts.length === 0) {
+    const raw = typeof message.$raw === 'string' ? message.$raw : JSON.stringify(message.$raw || '');
+    const xmlMatches = raw.match(/<image[^>]+url="([^"]+)"/g);
+    if (xmlMatches) {
+      for (const m of xmlMatches) {
+        const urlMatch = m.match(/url="([^"]+)"/);
+        if (urlMatch) parts.push({ type: 'image_url', image_url: { url: urlMatch[1] } });
+      }
+    }
+    const cqMatches = raw.match(/\[CQ:image[^\]]*url=([^\],]+)/g);
+    if (cqMatches) {
+      for (const m of cqMatches) {
+        const urlMatch = m.match(/url=([^\],]+)/);
+        if (urlMatch) parts.push({ type: 'image_url', image_url: { url: urlMatch[1] } });
+      }
     }
   }
-  return urls;
+  return parts;
 }
 function renderOutput(elements: OutputElement[]): string {
@@ -76,14 +129,25 @@ export function registerAITrigger(refs: AIServiceRefs): void {
       return;
     }
+    const dispatcherSvc = root.inject('dispatcher') as
+      | { replyWithPolish?: (m: Message<any>, s: 'ai' | 'command', c: unknown) => Promise<unknown> }
+      | undefined;
     const handleAIMessage = async (
       message: Message<any>,
       content: string,
     ) => {
+      const replyOutbound = async (payload: unknown) => {
+        if (dispatcherSvc && typeof dispatcherSvc.replyWithPolish === 'function') {
+          return dispatcherSvc.replyWithPolish(message, 'ai', payload as any);
+        }
+        return message.$reply(payload as any);
+      };
       const t0 = performance.now();
       if (!ai.isReady()) return;
       if (triggerConfig.thinkingMessage)
-        await message.$reply(triggerConfig.thinkingMessage);
+        await replyOutbound(triggerConfig.thinkingMessage);
       const permissions = inferSenderPermissions(message, triggerConfig);
       const toolContext: ToolContext = {
@@ -116,14 +180,12 @@ export function registerAITrigger(refs: AIServiceRefs): void {
         let responseText: string;
         if (refs.zhinAgent) {
-          const imageUrls = extractImageUrls(message);
+          const mediaParts = extractMediaParts(message);
           let elements: OutputElement[];
-          if (imageUrls.length > 0) {
+          if (mediaParts.length > 0) {
             const parts: ContentPart[] = [];
             if (content) parts.push({ type: 'text', text: content });
-            for (const url of imageUrls) {
-              parts.push({ type: 'image_url', image_url: { url } });
-            }
+            parts.push(...mediaParts);
             elements = await Promise.race([
               refs.zhinAgent.processMultimodal(parts, toolContext),
               timeout,
@@ -143,12 +205,12 @@ export function registerAITrigger(refs: AIServiceRefs): void {
           responseText = typeof response === 'string' ? response : '';
         }
-        if (responseText) await message.$reply(parseRichMediaContent(responseText));
+        if (responseText) await replyOutbound(parseRichMediaContent(responseText));
         logger.info(`[AI Handler] 总耗时: ${(performance.now() - t0).toFixed(0)}ms`);
       } catch (error) {
         const msg = error instanceof Error ? error.message : String(error);
         logger.warn(`[AI Handler] 失败 (${(performance.now() - t0).toFixed(0)}ms): ${msg}`);
-        await message.$reply(triggerConfig.errorTemplate.replace('{error}', msg));
+        await replyOutbound(triggerConfig.errorTemplate.replace('{error}', msg));
       }
     };

package/src/init/register-builtin-tools.ts CHANGED Viewed

@@ -6,7 +6,13 @@ import * as fs from 'fs';
 import * as os from 'os';
 import * as path from 'path';
 import { getPlugin, type Tool, type SkillFeature } from '@zhin.js/core';
-import { createBuiltinTools, discoverWorkspaceSkills, loadAlwaysSkillsContent, buildSkillsSummaryXML } from '../builtin-tools.js';
+import {
+  collectPluginSkillSearchRoots,
+  createBuiltinTools,
+  discoverWorkspaceSkills,
+  loadAlwaysSkillsContent,
+  buildSkillsSummaryXML,
+} from '../builtin-tools.js';
 import { resolveSkillInstructionMaxChars, DEFAULT_CONFIG } from '../zhin-agent/config.js';
 import { loadBootstrapFiles, buildContextFiles, buildBootstrapContextSection } from '../bootstrap.js';
 import { triggerAIHook, createAIHookEvent } from '../hooks.js';
@@ -24,7 +30,10 @@ export function registerBuiltinTools(refs: AIServiceRefs): void {
     const agentCfg = ai.getAgentConfig();
     const fullCfg = { ...DEFAULT_CONFIG, ...agentCfg } as Required<import('../zhin-agent/config.js').ZhinAgentConfig>;
     const modelName = provider.models[0] || '';
-    const builtinTools = createBuiltinTools({ skillInstructionMaxChars: resolveSkillInstructionMaxChars(fullCfg, modelName) });
+    const builtinTools = createBuiltinTools({
+      skillInstructionMaxChars: resolveSkillInstructionMaxChars(fullCfg, modelName),
+      pluginSkillRootsResolver: () => collectPluginSkillSearchRoots(root),
+    });
     const disposers: (() => void)[] = [];
     for (const tool of builtinTools) disposers.push(toolService.addTool(tool, root.name));
     const cronTools = createCronTools();
@@ -39,7 +48,7 @@ export function registerBuiltinTools(refs: AIServiceRefs): void {
       if (!skillFeature) return 0;
       const existing = skillFeature.getByPlugin(root.name);
       for (const s of existing) skillFeature.remove(s);
-      const skills = await discoverWorkspaceSkills();
+      const skills = await discoverWorkspaceSkills(root);
       if (skills.length === 0) return 0;
       const allRegisteredTools = toolService.getAll();
       const toolNameIndex = new Map<string, Tool>();
@@ -111,7 +120,7 @@ export function registerBuiltinTools(refs: AIServiceRefs): void {
       // Step 3: inject always-on skills content + XML summary
       try {
-        const skillsForContext = await discoverWorkspaceSkills();
+        const skillsForContext = await discoverWorkspaceSkills(root);
         const alwaysContent = await loadAlwaysSkillsContent(skillsForContext);
         const skillsXml = buildSkillsSummaryXML(skillsForContext);
         if (refs.zhinAgent) {
@@ -140,7 +149,7 @@ export function registerBuiltinTools(refs: AIServiceRefs): void {
           skillReloadDebounce = null;
           try {
             const count = await syncWorkspaceSkills();
-            const skillsForContext = await discoverWorkspaceSkills();
+            const skillsForContext = await discoverWorkspaceSkills(root);
             const alwaysContent = await loadAlwaysSkillsContent(skillsForContext);
             const skillsXml = buildSkillsSummaryXML(skillsForContext);
             if (refs.zhinAgent) {

package/src/zhin-agent/exec-policy.ts CHANGED Viewed

@@ -41,12 +41,14 @@ export function checkExecPolicy(config: Required<ZhinAgentConfig>, command: stri
   // allowlist
   const list = resolveExecAllowlist(config);
   const cmd = (command || '').trim();
+  // 提取命令的第一个 token（实际可执行程序名）进行白名单匹配
+  const cmdName = cmd.split(/[\s;|&]/)[0];
   const allowed = list.some(pattern => {
     try {
-      const re = new RegExp(pattern);
-      return re.test(cmd);
+      const re = new RegExp(`^${pattern}$`);
+      return re.test(cmdName);
     } catch {
-      return cmd === pattern || cmd.startsWith(pattern);
+      return cmdName === pattern;
     }
   });
   if (!allowed) {

package/src/zhin-agent/index.ts CHANGED Viewed

@@ -385,17 +385,49 @@ ${preData ? `\nPre-fetched data:\n${preData}\n` : ''}`;
     const profileSummary = await this.userProfiles.buildProfileSummary(userId);
     const personaEnhanced = buildEnhancedPersona(this.config, profileSummary, '');
-    const textContent = parts
-      .filter((p): p is Extract<ContentPart, { type: 'text' }> => p.type === 'text')
-      .map(p => p.text)
-      .join(' ') || '[多模态消息]';
+    // Build text summary describing the multimodal content
+    const textFragments: string[] = [];
+    const llmParts: ContentPart[] = [];
+    /** Full multimodal ContentPart union (core/ai may export a narrower type in some builds) */
+    type MultimodalPart =
+      | ContentPart
+      | { type: 'video_url'; video_url: { url: string } }
+      | { type: 'face'; face: { id: string; text?: string } };
+    for (const p of parts as MultimodalPart[]) {
+      switch (p.type) {
+        case 'text':
+          textFragments.push(p.text);
+          llmParts.push(p);
+          break;
+        case 'image_url':
+          textFragments.push('[图片]');
+          llmParts.push(p);
+          break;
+        case 'video_url':
+          textFragments.push('[视频]');
+          // Most LLMs don't support video natively; describe it as a URL for context
+          llmParts.push({ type: 'text', text: `[用户发送了一个视频: ${p.video_url.url}]` });
+          break;
+        case 'audio':
+          textFragments.push('[音频]');
+          llmParts.push(p);
+          break;
+        case 'face':
+          textFragments.push(p.face.text || `[表情:${p.face.id}]`);
+          llmParts.push({ type: 'text', text: p.face.text ? `[表情: ${p.face.text}]` : `[表情ID: ${p.face.id}]` });
+          break;
+      }
+    }
+    const textContent = textFragments.join(' ') || '[多模态消息]';
     const visionModel = this.config.visionModel || this.provider.models[0];
     const messages: ChatMessage[] = [
       { role: 'system', content: personaEnhanced },
       ...historyMessages,
-      { role: 'user', content: parts },
+      { role: 'user', content: llmParts },
     ];
     let reply = '';
@@ -413,7 +445,7 @@ ${preData ? `\nPre-fetched data:\n${preData}\n` : ''}`;
       reply = typeof msg === 'string' ? msg : '';
     }
-    if (!reply) reply = '抱歉，我无法理解这张图片。';
+    if (!reply) reply = '抱歉，我无法理解这条消息。';
     await this.saveToSession(sessionId, textContent, reply, sceneId);
     return parseOutput(reply);
   }

package/src/zhin-agent/prompt.ts CHANGED Viewed

@@ -15,7 +15,17 @@ export function contentToText(c: string | ContentPart[] | ContentPart | null | u
   if (c == null) return '';
   if (typeof c === 'string') return c;
   const parts = Array.isArray(c) ? c : [c as ContentPart];
-  return parts.map(p => (p?.type === 'text' ? p.text : '')).join('');
+  return parts.map(p => {
+    if (!p) return '';
+    switch (p.type) {
+      case 'text': return p.text;
+      case 'image_url': return '[图片]';
+      case 'audio': return '[音频]';
+      case 'video_url': return '[视频]';
+      case 'face': return (p as Extract<ContentPart, { type: 'face' }>).face.text || '[表情]';
+      default: return '';
+    }
+  }).join('');
 }
 export function buildUserMessageWithHistory(history: ChatMessage[], currentContent: string): string {

package/src/zhin-agent/tool-collector.ts CHANGED Viewed

@@ -106,16 +106,23 @@ export function collectRelevantTools(
   const collected: AgentTool[] = [];
   const collectedNames = new Set<string>();
-  // 0. Detect if user mentions a known skill name
+  // 0. Detect if user mentions a known skill（名称或关键词，与 SkillFeature / SKILL.md 注入一致）
   let mentionedSkill: string | null = null;
   if (skillRegistry && skillRegistry.size > 0) {
     const msgLower = message.toLowerCase();
-    for (const skill of skillRegistry.getAll()) {
+    outer: for (const skill of skillRegistry.getAll()) {
       if (msgLower.includes(skill.name.toLowerCase())) {
         mentionedSkill = skill.name;
-        logger.debug(`[技能检测] 用户提到技能: ${mentionedSkill}`);
+        logger.debug(`[技能检测] 用户提到技能(名称): ${mentionedSkill}`);
         break;
       }
+      for (const kw of skill.keywords || []) {
+        if (kw && msgLower.includes(String(kw).toLowerCase())) {
+          mentionedSkill = skill.name;
+          logger.debug(`[技能检测] 用户提到技能(关键词→${skill.name}): ${kw}`);
+          break outer;
+        }
+      }
     }
   }

package/tests/ai/multimodal.test.ts ADDED Viewed

@@ -0,0 +1,106 @@
+/**
+ * 多模态功能测试
+ *
+ * 测试 ContentPart 类型扩展、contentToText 辅助函数等多模态相关功能
+ */
+import { describe, it, expect } from 'vitest';
+import type { ContentPart } from '@zhin.js/core';
+import { contentToText } from '@zhin.js/agent';
+describe('contentToText 多模态支持', () => {
+  it('应处理纯文本', () => {
+    expect(contentToText('hello')).toBe('hello');
+  });
+  it('应处理 null 和 undefined', () => {
+    expect(contentToText(null)).toBe('');
+    expect(contentToText(undefined)).toBe('');
+  });
+  it('应处理 text ContentPart', () => {
+    const parts: ContentPart[] = [{ type: 'text', text: '你好' }];
+    expect(contentToText(parts)).toBe('你好');
+  });
+  it('应将 image_url ContentPart 转为 [图片]', () => {
+    const parts: ContentPart[] = [
+      { type: 'image_url', image_url: { url: 'https://example.com/img.jpg' } },
+    ];
+    expect(contentToText(parts)).toBe('[图片]');
+  });
+  it('应将 video_url ContentPart 转为 [视频]', () => {
+    const parts: ContentPart[] = [
+      { type: 'video_url', video_url: { url: 'https://example.com/video.mp4' } },
+    ];
+    expect(contentToText(parts)).toBe('[视频]');
+  });
+  it('应将 audio ContentPart 转为 [音频]', () => {
+    const parts: ContentPart[] = [
+      { type: 'audio', audio: { data: 'base64data', format: 'mp3' } },
+    ];
+    expect(contentToText(parts)).toBe('[音频]');
+  });
+  it('应将 face ContentPart 转为表情文字', () => {
+    const parts: ContentPart[] = [
+      { type: 'face', face: { id: '178', text: '笑哭' } },
+    ];
+    expect(contentToText(parts)).toBe('笑哭');
+  });
+  it('应将无文字 face ContentPart 转为 [表情]', () => {
+    const parts: ContentPart[] = [
+      { type: 'face', face: { id: '178' } },
+    ];
+    expect(contentToText(parts)).toBe('[表情]');
+  });
+  it('应正确处理混合内容', () => {
+    const parts: ContentPart[] = [
+      { type: 'text', text: '看看这个' },
+      { type: 'image_url', image_url: { url: 'https://example.com/img.jpg' } },
+      { type: 'face', face: { id: '1', text: '微笑' } },
+    ];
+    expect(contentToText(parts)).toBe('看看这个[图片]微笑');
+  });
+  it('应处理单个 ContentPart（非数组）', () => {
+    const part: ContentPart = { type: 'text', text: '单个' };
+    expect(contentToText(part)).toBe('单个');
+  });
+});
+describe('ContentPart 类型完整性', () => {
+  it('应支持所有多模态类型', () => {
+    const textPart: ContentPart = { type: 'text', text: 'hello' };
+    const imagePart: ContentPart = { type: 'image_url', image_url: { url: 'https://img.png' } };
+    const audioPart: ContentPart = { type: 'audio', audio: { data: 'data', format: 'mp3' } };
+    const videoPart: ContentPart = { type: 'video_url', video_url: { url: 'https://vid.mp4' } };
+    const facePart: ContentPart = { type: 'face', face: { id: '1', text: '微笑' } };
+    expect(textPart.type).toBe('text');
+    expect(imagePart.type).toBe('image_url');
+    expect(audioPart.type).toBe('audio');
+    expect(videoPart.type).toBe('video_url');
+    expect(facePart.type).toBe('face');
+  });
+  it('image_url 应支持 detail 参数', () => {
+    const part: ContentPart = {
+      type: 'image_url',
+      image_url: { url: 'https://img.png', detail: 'high' },
+    };
+    if (part.type === 'image_url') {
+      expect(part.image_url.detail).toBe('high');
+    }
+  });
+  it('face 的 text 应为可选', () => {
+    const part: ContentPart = { type: 'face', face: { id: '100' } };
+    if (part.type === 'face') {
+      expect(part.face.text).toBeUndefined();
+    }
+  });
+});

package/tests/ai/zhin-agent.test.ts CHANGED Viewed

@@ -6,7 +6,7 @@
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 import { ZhinAgent } from '@zhin.js/agent';
 import { SkillFeature } from '@zhin.js/core';
-import type { AIProvider, AgentTool } from '@zhin.js/core';
+import type { AIProvider, AgentTool, ContentPart } from '@zhin.js/core';
 import type { Tool, ToolContext } from '@zhin.js/core';
 // Mock AIProvider
@@ -21,6 +21,27 @@ function createMockProvider(response: string = '你好！'): AIProvider {
   };
 }
+// Mock AIProvider with chatStream support (for multimodal tests)
+function createStreamMockProvider(response: string = '你好！'): AIProvider {
+  return {
+    name: 'mock',
+    models: ['mock-model'],
+    chat: vi.fn(async () => ({
+      choices: [{ message: { role: 'assistant' as const, content: response }, finish_reason: 'stop' }],
+    } as ChatResponse)),
+    chatStream: vi.fn(async function* () {
+      yield {
+        id: 'chunk-1',
+        object: 'chat.completion.chunk' as const,
+        created: Date.now(),
+        model: 'mock-model',
+        choices: [{ index: 0, delta: { content: response }, finish_reason: null }],
+      };
+    }),
+    listModels: vi.fn(async () => ['mock-model']),
+  };
+}
 function makeToolContext(overrides: Partial<ToolContext> = {}): ToolContext {
   return {
     platform: 'test',
@@ -174,4 +195,112 @@ describe('ZhinAgent', () => {
       expect(() => agent.dispose()).not.toThrow();
     });
   });
+  describe('processMultimodal', () => {
+    let streamAgent: ZhinAgent;
+    let streamProvider: AIProvider;
+    beforeEach(() => {
+      streamProvider = createStreamMockProvider();
+      streamAgent = new ZhinAgent(streamProvider, {
+        persona: '测试助手',
+        maxIterations: 3,
+      });
+    });
+    afterEach(() => {
+      streamAgent.dispose();
+    });
+    it('应处理图片+文本的多模态消息', async () => {
+      const context = makeToolContext();
+      const parts: ContentPart[] = [
+        { type: 'text', text: '这是什么？' },
+        { type: 'image_url', image_url: { url: 'https://example.com/cat.jpg' } },
+      ];
+      const result = await streamAgent.processMultimodal(parts, context);
+      expect(result).toBeDefined();
+      expect(Array.isArray(result)).toBe(true);
+      expect(result.length).toBeGreaterThan(0);
+    });
+    it('应处理视频类型的多模态消息', async () => {
+      const context = makeToolContext();
+      const parts: ContentPart[] = [
+        { type: 'text', text: '这个视频讲的是什么？' },
+        { type: 'video_url', video_url: { url: 'https://example.com/video.mp4' } },
+      ];
+      const result = await streamAgent.processMultimodal(parts, context);
+      expect(result).toBeDefined();
+      expect(Array.isArray(result)).toBe(true);
+      expect(result.length).toBeGreaterThan(0);
+    });
+    it('应处理表情类型的多模态消息', async () => {
+      const context = makeToolContext();
+      const parts: ContentPart[] = [
+        { type: 'text', text: '你好' },
+        { type: 'face', face: { id: '178', text: '笑哭' } },
+      ];
+      const result = await streamAgent.processMultimodal(parts, context);
+      expect(result).toBeDefined();
+      expect(Array.isArray(result)).toBe(true);
+      expect(result.length).toBeGreaterThan(0);
+    });
+    it('应处理混合多种媒体类型的多模态消息', async () => {
+      const context = makeToolContext();
+      const parts: ContentPart[] = [
+        { type: 'text', text: '看看这些' },
+        { type: 'image_url', image_url: { url: 'https://example.com/pic.jpg' } },
+        { type: 'video_url', video_url: { url: 'https://example.com/clip.mp4' } },
+        { type: 'face', face: { id: '1', text: '微笑' } },
+      ];
+      const result = await streamAgent.processMultimodal(parts, context);
+      expect(result).toBeDefined();
+      expect(Array.isArray(result)).toBe(true);
+    });
+    it('无文本时应使用默认描述', async () => {
+      const context = makeToolContext();
+      const parts: ContentPart[] = [
+        { type: 'image_url', image_url: { url: 'https://example.com/img.jpg' } },
+      ];
+      const result = await streamAgent.processMultimodal(parts, context);
+      expect(result).toBeDefined();
+      expect(Array.isArray(result)).toBe(true);
+    });
+    it('速率限制在多模态处理中应生效', async () => {
+      const strictAgent = new ZhinAgent(streamProvider, {
+        rateLimit: { maxRequestsPerMinute: 1, cooldownSeconds: 5 },
+      });
+      const context = makeToolContext();
+      const parts: ContentPart[] = [
+        { type: 'text', text: '第一次' },
+        { type: 'image_url', image_url: { url: 'https://example.com/1.jpg' } },
+      ];
+      // 第一次
+      await strictAgent.processMultimodal(parts, context);
+      // 第二次应被限制
+      const result = await strictAgent.processMultimodal(parts, context);
+      expect(result).toBeDefined();
+      expect(Array.isArray(result)).toBe(true);
+      strictAgent.dispose();
+    });
+  });
 });

package/tests/file-policy.test.ts CHANGED Viewed

@@ -54,6 +54,7 @@ describe('file-policy', () => {
         '/home/user/.gcloud/properties',
         '/home/user/.kube/config',
         '/root/.ssh/authorized_keys',
+        'data/memory/notes.md', // data 目录为敏感目录，禁止访问
       ];
       for (const p of blockedPaths) {
@@ -85,7 +86,6 @@ describe('file-policy', () => {
         '/home/user/project/README.md',
         '/home/user/project/tsconfig.json',
         './src/utils.ts',
-        'data/memory/notes.md',
         '/tmp/test.txt',
       ];