opc-agent 4.1.0 → 4.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. package/.github/ISSUE_TEMPLATE/bug_report.md +20 -20
  2. package/.github/ISSUE_TEMPLATE/feature_request.md +14 -14
  3. package/.github/PULL_REQUEST_TEMPLATE.md +13 -13
  4. package/CHANGELOG.md +48 -48
  5. package/CONTRIBUTING.md +36 -36
  6. package/README.zh-CN.md +497 -497
  7. package/dist/channels/wechat.js +6 -6
  8. package/dist/deploy/index.js +56 -56
  9. package/dist/studio/server.js +30 -1
  10. package/dist/studio-ui/index.html +230 -10
  11. package/dist/ui/components.js +105 -105
  12. package/examples/README.md +22 -22
  13. package/examples/basic-agent.ts +90 -90
  14. package/examples/brain-integration.ts +71 -71
  15. package/examples/multi-channel.ts +74 -74
  16. package/fix-sidebar.mjs +188 -188
  17. package/install.ps1 +154 -154
  18. package/install.sh +164 -164
  19. package/package.json +1 -1
  20. package/scripts/install.ps1 +31 -31
  21. package/scripts/install.sh +40 -40
  22. package/serve-studio.js +13 -13
  23. package/serve-test.js +25 -25
  24. package/src/channels/dingtalk.ts +46 -46
  25. package/src/channels/email.ts +351 -351
  26. package/src/channels/feishu.ts +349 -349
  27. package/src/channels/googlechat.ts +42 -42
  28. package/src/channels/imessage.ts +31 -31
  29. package/src/channels/irc.ts +82 -82
  30. package/src/channels/line.ts +32 -32
  31. package/src/channels/matrix.ts +33 -33
  32. package/src/channels/mattermost.ts +57 -57
  33. package/src/channels/msteams.ts +32 -32
  34. package/src/channels/nostr.ts +32 -32
  35. package/src/channels/qq.ts +33 -33
  36. package/src/channels/signal.ts +32 -32
  37. package/src/channels/sms.ts +33 -33
  38. package/src/channels/telegram.ts +616 -616
  39. package/src/channels/twitch.ts +65 -65
  40. package/src/channels/voice-call.ts +100 -100
  41. package/src/channels/websocket.ts +399 -399
  42. package/src/channels/wechat.ts +329 -329
  43. package/src/channels/whatsapp.ts +32 -32
  44. package/src/cli/chat.ts +99 -99
  45. package/src/cli/setup.ts +314 -314
  46. package/src/core/agent.ts +476 -476
  47. package/src/core/api-server.ts +277 -277
  48. package/src/core/audio.ts +98 -98
  49. package/src/core/collaboration.ts +275 -275
  50. package/src/core/context-discovery.ts +85 -85
  51. package/src/core/context-refs.ts +140 -140
  52. package/src/core/gateway.ts +106 -106
  53. package/src/core/heartbeat.ts +51 -51
  54. package/src/core/hooks.ts +105 -105
  55. package/src/core/ide-bridge.ts +133 -133
  56. package/src/core/node-network.ts +86 -86
  57. package/src/core/profiles.ts +122 -122
  58. package/src/core/scheduler.ts +187 -187
  59. package/src/core/session-manager.ts +137 -137
  60. package/src/core/subagent.ts +98 -98
  61. package/src/core/vision.ts +180 -180
  62. package/src/core/workflow-graph.ts +365 -365
  63. package/src/daemon.ts +96 -96
  64. package/src/deploy/index.ts +255 -255
  65. package/src/doctor.ts +156 -156
  66. package/src/eval/index.ts +211 -211
  67. package/src/eval/suites/basic.json +16 -16
  68. package/src/eval/suites/memory.json +12 -12
  69. package/src/eval/suites/safety.json +14 -14
  70. package/src/hub/brain-seed.ts +54 -54
  71. package/src/hub/client.ts +60 -60
  72. package/src/mcp/servers/calculator-mcp.ts +65 -65
  73. package/src/mcp/servers/crypto-mcp.ts +73 -73
  74. package/src/mcp/servers/database-mcp.ts +72 -72
  75. package/src/mcp/servers/datetime-mcp.ts +69 -69
  76. package/src/mcp/servers/filesystem.ts +66 -66
  77. package/src/mcp/servers/github-mcp.ts +58 -58
  78. package/src/mcp/servers/index.ts +63 -63
  79. package/src/mcp/servers/json-mcp.ts +102 -102
  80. package/src/mcp/servers/memory-mcp.ts +56 -56
  81. package/src/mcp/servers/regex-mcp.ts +53 -53
  82. package/src/mcp/servers/web-mcp.ts +49 -49
  83. package/src/memory/context-compressor.ts +189 -189
  84. package/src/memory/seed-loader.ts +212 -212
  85. package/src/memory/user-profiler.ts +215 -215
  86. package/src/plugins/content-filter.ts +23 -23
  87. package/src/plugins/logger.ts +18 -18
  88. package/src/plugins/rate-limiter.ts +38 -38
  89. package/src/protocols/a2a/client.ts +132 -132
  90. package/src/protocols/a2a/index.ts +8 -8
  91. package/src/protocols/a2a/server.ts +333 -333
  92. package/src/protocols/a2a/types.ts +88 -88
  93. package/src/protocols/a2a/utils.ts +50 -50
  94. package/src/protocols/agui/client.ts +83 -83
  95. package/src/protocols/agui/index.ts +4 -4
  96. package/src/protocols/agui/server.ts +218 -218
  97. package/src/protocols/agui/types.ts +153 -153
  98. package/src/protocols/index.ts +2 -2
  99. package/src/protocols/mcp/agent-tools.ts +134 -134
  100. package/src/protocols/mcp/index.ts +8 -8
  101. package/src/protocols/mcp/server.ts +262 -262
  102. package/src/protocols/mcp/types.ts +69 -69
  103. package/src/providers/index.ts +632 -632
  104. package/src/publish/index.ts +376 -376
  105. package/src/scheduler/cron-engine.ts +191 -191
  106. package/src/scheduler/index.ts +2 -2
  107. package/src/schema/oad.ts +217 -217
  108. package/src/security/approval.ts +131 -131
  109. package/src/security/approvals.ts +143 -143
  110. package/src/security/elevated.ts +105 -105
  111. package/src/security/guardrails.ts +248 -248
  112. package/src/security/index.ts +9 -9
  113. package/src/security/keys.ts +87 -87
  114. package/src/security/secrets.ts +129 -129
  115. package/src/skills/builtin/index.ts +408 -408
  116. package/src/skills/marketplace.ts +113 -113
  117. package/src/skills/types.ts +42 -42
  118. package/src/studio/server.ts +31 -1
  119. package/src/studio/templates-data.ts +178 -178
  120. package/src/studio-ui/index.html +230 -10
  121. package/src/telemetry/index.ts +324 -324
  122. package/src/tools/builtin/browser.ts +299 -299
  123. package/src/tools/builtin/datetime.ts +41 -41
  124. package/src/tools/builtin/file.ts +107 -107
  125. package/src/tools/builtin/home-assistant.ts +116 -116
  126. package/src/tools/builtin/rl-tools.ts +243 -243
  127. package/src/tools/builtin/shell.ts +43 -43
  128. package/src/tools/builtin/vision.ts +64 -64
  129. package/src/tools/builtin/web-search.ts +126 -126
  130. package/src/tools/builtin/web.ts +35 -35
  131. package/src/tools/document-processor.ts +213 -213
  132. package/src/tools/image-generator.ts +150 -150
  133. package/src/tools/integrations/calendar.ts +73 -73
  134. package/src/tools/integrations/code-exec.ts +39 -39
  135. package/src/tools/integrations/csv-analyzer.ts +92 -92
  136. package/src/tools/integrations/database.ts +44 -44
  137. package/src/tools/integrations/email-send.ts +76 -76
  138. package/src/tools/integrations/git-tool.ts +42 -42
  139. package/src/tools/integrations/github-tool.ts +76 -76
  140. package/src/tools/integrations/image-gen.ts +56 -56
  141. package/src/tools/integrations/index.ts +92 -92
  142. package/src/tools/integrations/jira.ts +83 -83
  143. package/src/tools/integrations/notion.ts +71 -71
  144. package/src/tools/integrations/npm-tool.ts +48 -48
  145. package/src/tools/integrations/pdf-reader.ts +58 -58
  146. package/src/tools/integrations/slack.ts +65 -65
  147. package/src/tools/integrations/summarizer.ts +49 -49
  148. package/src/tools/integrations/translator.ts +48 -48
  149. package/src/tools/integrations/trello.ts +60 -60
  150. package/src/tools/integrations/vector-search.ts +42 -42
  151. package/src/tools/integrations/web-scraper.ts +47 -47
  152. package/src/tools/integrations/web-search.ts +58 -58
  153. package/src/tools/integrations/webhook.ts +38 -38
  154. package/src/tools/mcp-client.ts +131 -131
  155. package/src/tools/web-scraper.ts +179 -179
  156. package/src/tools/web-search.ts +180 -180
  157. package/src/ui/components.ts +127 -127
  158. package/srv-out.txt +1 -1
  159. package/templates/ecommerce-assistant/README.md +45 -45
  160. package/templates/ecommerce-assistant/oad.yaml +47 -47
  161. package/templates/tech-support/README.md +43 -43
  162. package/templates/tech-support/oad.yaml +45 -45
  163. package/test-agent/Dockerfile +9 -9
  164. package/test-agent/README.md +50 -50
  165. package/test-agent/agent.yaml +23 -23
  166. package/test-agent/docker-compose.yml +11 -11
  167. package/test-agent/oad.yaml +31 -31
  168. package/test-agent/package-lock.json +1492 -1492
  169. package/test-agent/package.json +17 -17
  170. package/test-agent/src/index.ts +24 -24
  171. package/test-agent/src/skills/echo.ts +15 -15
  172. package/test-agent/tsconfig.json +24 -24
  173. package/test-full.js +43 -43
  174. package/test-sidebar.js +22 -22
  175. package/test-studio3.js +75 -75
  176. package/test-studio4.js +41 -41
  177. package/tests/a2a-protocol.test.ts +285 -285
  178. package/tests/agui-protocol.test.ts +246 -246
  179. package/tests/api-server.test.ts +148 -148
  180. package/tests/approvals.test.ts +89 -89
  181. package/tests/audio.test.ts +40 -40
  182. package/tests/brain-seed-extended.test.ts +490 -490
  183. package/tests/brain-seed.test.ts +239 -239
  184. package/tests/browser.test.ts +179 -179
  185. package/tests/channels/discord.test.ts +79 -79
  186. package/tests/channels/email.test.ts +148 -148
  187. package/tests/channels/feishu.test.ts +123 -123
  188. package/tests/channels/telegram.test.ts +129 -129
  189. package/tests/channels/websocket.test.ts +53 -53
  190. package/tests/channels/wechat.test.ts +170 -170
  191. package/tests/channels-extra.test.ts +45 -45
  192. package/tests/chat-cli.test.ts +160 -160
  193. package/tests/cli.test.ts +46 -46
  194. package/tests/context-compressor.test.ts +172 -172
  195. package/tests/context-refs.test.ts +121 -121
  196. package/tests/cron-engine.test.ts +101 -101
  197. package/tests/daemon.test.ts +135 -135
  198. package/tests/deepbrain-wire.test.ts +234 -234
  199. package/tests/deploy-and-dag.test.ts +196 -196
  200. package/tests/doctor.test.ts +38 -38
  201. package/tests/document-processor.test.ts +69 -69
  202. package/tests/e2e-nocode.test.ts +442 -442
  203. package/tests/elevated.test.ts +69 -69
  204. package/tests/eval.test.ts +173 -173
  205. package/tests/gateway.test.ts +63 -63
  206. package/tests/guardrails.test.ts +177 -177
  207. package/tests/home-assistant.test.ts +40 -40
  208. package/tests/hooks.test.ts +79 -79
  209. package/tests/ide-bridge.test.ts +38 -38
  210. package/tests/image-generator.test.ts +84 -84
  211. package/tests/init-role.test.ts +124 -124
  212. package/tests/integrations.test.ts +249 -249
  213. package/tests/mcp-client.test.ts +92 -92
  214. package/tests/mcp-server.test.ts +178 -178
  215. package/tests/mcp-servers.test.ts +260 -260
  216. package/tests/node-network.test.ts +74 -74
  217. package/tests/plugin-a2a-enhanced.test.ts +230 -230
  218. package/tests/profiles.test.ts +61 -61
  219. package/tests/publish.test.ts +231 -231
  220. package/tests/rl-tools.test.ts +93 -93
  221. package/tests/sandbox-manager.test.ts +46 -46
  222. package/tests/scheduler.test.ts +200 -200
  223. package/tests/secrets.test.ts +107 -107
  224. package/tests/security-enhanced.test.ts +233 -233
  225. package/tests/settings-api.test.ts +148 -148
  226. package/tests/setup.test.ts +73 -73
  227. package/tests/subagent.test.ts +193 -193
  228. package/tests/telegram-discord.test.ts +60 -60
  229. package/tests/telemetry.test.ts +186 -186
  230. package/tests/user-profiler.test.ts +169 -169
  231. package/tests/v090-features.test.ts +254 -254
  232. package/tests/vision.test.ts +61 -61
  233. package/tests/voice-call.test.ts +47 -47
  234. package/tests/voice-enhanced.test.ts +169 -169
  235. package/tests/voice-interaction.test.ts +38 -38
  236. package/tests/web-search.test.ts +155 -155
  237. package/tests/workflow-graph.test.ts +279 -279
  238. package/tutorial/customer-service-agent/README.md +612 -612
  239. package/tutorial/customer-service-agent/SOUL.md +26 -26
  240. package/tutorial/customer-service-agent/agent.yaml +63 -63
  241. package/tutorial/customer-service-agent/package.json +19 -19
  242. package/tutorial/customer-service-agent/src/index.ts +69 -69
  243. package/tutorial/customer-service-agent/src/skills/faq.ts +27 -27
  244. package/tutorial/customer-service-agent/src/skills/ticket.ts +22 -22
  245. package/tutorial/customer-service-agent/tsconfig.json +14 -14
@@ -1,98 +1,98 @@
1
- import { BaseAgent } from './agent';
2
- import { InMemoryStore } from '../memory';
3
- import type { Message } from './types';
4
-
5
- export interface SubAgentConfig {
6
- name: string;
7
- task: string;
8
- systemPrompt?: string;
9
- provider?: string;
10
- model?: string;
11
- timeout?: number;
12
- isolated?: boolean;
13
- }
14
-
15
- export interface SubAgentResult {
16
- id: string;
17
- name: string;
18
- status: 'completed' | 'failed' | 'timeout';
19
- result: string;
20
- duration: number;
21
- }
22
-
23
- interface SubAgentEntry {
24
- agent: BaseAgent;
25
- status: string;
26
- name: string;
27
- }
28
-
29
- export class SubAgentManager {
30
- private agents: Map<string, SubAgentEntry> = new Map();
31
-
32
- async spawn(config: SubAgentConfig, parentProvider?: any): Promise<SubAgentResult> {
33
- const id = `sub_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
34
- const timeout = config.timeout ?? 300000;
35
- const isolated = config.isolated !== false;
36
-
37
- const agent = new BaseAgent({
38
- name: config.name,
39
- systemPrompt: config.systemPrompt ?? 'You are a helpful sub-agent.',
40
- provider: config.provider ?? 'openai',
41
- model: config.model,
42
- memory: isolated ? new InMemoryStore() : undefined,
43
- });
44
-
45
- this.agents.set(id, { agent, status: 'running', name: config.name });
46
-
47
- const message: Message = {
48
- id: `msg_${Date.now()}`,
49
- role: 'user',
50
- content: config.task,
51
- timestamp: Date.now(),
52
- metadata: { subAgentId: id },
53
- };
54
-
55
- const start = Date.now();
56
-
57
- try {
58
- const result = await Promise.race([
59
- agent.handleMessage(message),
60
- new Promise<never>((_, reject) =>
61
- setTimeout(() => reject(new Error('SubAgent timeout')), timeout),
62
- ),
63
- ]);
64
-
65
- const duration = Date.now() - start;
66
- this.agents.set(id, { agent, status: 'completed', name: config.name });
67
-
68
- return { id, name: config.name, status: 'completed', result: result.content, duration };
69
- } catch (err) {
70
- const duration = Date.now() - start;
71
- const isTimeout = (err as Error).message.includes('timeout');
72
- const status = isTimeout ? 'timeout' : 'failed';
73
- this.agents.set(id, { agent, status, name: config.name });
74
-
75
- return { id, name: config.name, status, result: (err as Error).message, duration };
76
- }
77
- }
78
-
79
- async spawnParallel(configs: SubAgentConfig[], parentProvider?: any): Promise<SubAgentResult[]> {
80
- return Promise.all(configs.map((c) => this.spawn(c, parentProvider)));
81
- }
82
-
83
- list(): Array<{ id: string; name: string; status: string }> {
84
- return Array.from(this.agents.entries()).map(([id, entry]) => ({
85
- id,
86
- name: entry.name,
87
- status: entry.status,
88
- }));
89
- }
90
-
91
- kill(id: string): boolean {
92
- const entry = this.agents.get(id);
93
- if (!entry) return false;
94
- entry.status = 'killed';
95
- this.agents.set(id, entry);
96
- return true;
97
- }
98
- }
1
+ import { BaseAgent } from './agent';
2
+ import { InMemoryStore } from '../memory';
3
+ import type { Message } from './types';
4
+
5
+ export interface SubAgentConfig {
6
+ name: string;
7
+ task: string;
8
+ systemPrompt?: string;
9
+ provider?: string;
10
+ model?: string;
11
+ timeout?: number;
12
+ isolated?: boolean;
13
+ }
14
+
15
+ export interface SubAgentResult {
16
+ id: string;
17
+ name: string;
18
+ status: 'completed' | 'failed' | 'timeout';
19
+ result: string;
20
+ duration: number;
21
+ }
22
+
23
+ interface SubAgentEntry {
24
+ agent: BaseAgent;
25
+ status: string;
26
+ name: string;
27
+ }
28
+
29
+ export class SubAgentManager {
30
+ private agents: Map<string, SubAgentEntry> = new Map();
31
+
32
+ async spawn(config: SubAgentConfig, parentProvider?: any): Promise<SubAgentResult> {
33
+ const id = `sub_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
34
+ const timeout = config.timeout ?? 300000;
35
+ const isolated = config.isolated !== false;
36
+
37
+ const agent = new BaseAgent({
38
+ name: config.name,
39
+ systemPrompt: config.systemPrompt ?? 'You are a helpful sub-agent.',
40
+ provider: config.provider ?? 'openai',
41
+ model: config.model,
42
+ memory: isolated ? new InMemoryStore() : undefined,
43
+ });
44
+
45
+ this.agents.set(id, { agent, status: 'running', name: config.name });
46
+
47
+ const message: Message = {
48
+ id: `msg_${Date.now()}`,
49
+ role: 'user',
50
+ content: config.task,
51
+ timestamp: Date.now(),
52
+ metadata: { subAgentId: id },
53
+ };
54
+
55
+ const start = Date.now();
56
+
57
+ try {
58
+ const result = await Promise.race([
59
+ agent.handleMessage(message),
60
+ new Promise<never>((_, reject) =>
61
+ setTimeout(() => reject(new Error('SubAgent timeout')), timeout),
62
+ ),
63
+ ]);
64
+
65
+ const duration = Date.now() - start;
66
+ this.agents.set(id, { agent, status: 'completed', name: config.name });
67
+
68
+ return { id, name: config.name, status: 'completed', result: result.content, duration };
69
+ } catch (err) {
70
+ const duration = Date.now() - start;
71
+ const isTimeout = (err as Error).message.includes('timeout');
72
+ const status = isTimeout ? 'timeout' : 'failed';
73
+ this.agents.set(id, { agent, status, name: config.name });
74
+
75
+ return { id, name: config.name, status, result: (err as Error).message, duration };
76
+ }
77
+ }
78
+
79
+ async spawnParallel(configs: SubAgentConfig[], parentProvider?: any): Promise<SubAgentResult[]> {
80
+ return Promise.all(configs.map((c) => this.spawn(c, parentProvider)));
81
+ }
82
+
83
+ list(): Array<{ id: string; name: string; status: string }> {
84
+ return Array.from(this.agents.entries()).map(([id, entry]) => ({
85
+ id,
86
+ name: entry.name,
87
+ status: entry.status,
88
+ }));
89
+ }
90
+
91
+ kill(id: string): boolean {
92
+ const entry = this.agents.get(id);
93
+ if (!entry) return false;
94
+ entry.status = 'killed';
95
+ this.agents.set(id, entry);
96
+ return true;
97
+ }
98
+ }
@@ -1,180 +1,180 @@
1
- import * as fs from 'fs';
2
- import * as path from 'path';
3
-
4
- // ─── Types ───────────────────────────────────────────────────
5
-
6
- export interface ImageInput {
7
- type: 'base64' | 'url' | 'file';
8
- data: string;
9
- mimeType?: string;
10
- }
11
-
12
- export interface VisionResult {
13
- description: string;
14
- text_content?: string;
15
- objects?: string[];
16
- }
17
-
18
- // ─── MIME detection from magic bytes ─────────────────────────
19
-
20
- const MAGIC_BYTES: Array<{ bytes: number[]; mime: string }> = [
21
- { bytes: [0x89, 0x50, 0x4E, 0x47], mime: 'image/png' },
22
- { bytes: [0xFF, 0xD8, 0xFF], mime: 'image/jpeg' },
23
- { bytes: [0x47, 0x49, 0x46, 0x38], mime: 'image/gif' },
24
- { bytes: [0x52, 0x49, 0x46, 0x46], mime: 'image/webp' }, // RIFF header (WebP)
25
- ];
26
-
27
- export function detectMimeType(buffer: Buffer): string {
28
- for (const { bytes, mime } of MAGIC_BYTES) {
29
- if (bytes.every((b, i) => buffer[i] === b)) {
30
- return mime;
31
- }
32
- }
33
- return 'application/octet-stream';
34
- }
35
-
36
- // ─── VisionManager ───────────────────────────────────────────
37
-
38
- export interface VisionManagerConfig {
39
- model?: string;
40
- apiKey?: string;
41
- baseURL?: string;
42
- maxImageSize?: number; // bytes, default 20MB
43
- }
44
-
45
- export class VisionManager {
46
- private config: VisionManagerConfig;
47
-
48
- constructor(config: VisionManagerConfig = {}) {
49
- this.config = {
50
- model: config.model ?? 'gpt-4o',
51
- apiKey: config.apiKey ?? process.env.OPENAI_API_KEY ?? '',
52
- baseURL: config.baseURL ?? 'https://api.openai.com/v1',
53
- maxImageSize: config.maxImageSize ?? 20 * 1024 * 1024,
54
- };
55
- }
56
-
57
- /**
58
- * Load image data as base64, detecting MIME type.
59
- */
60
- async loadImage(input: ImageInput): Promise<{ base64: string; mimeType: string }> {
61
- if (input.type === 'base64') {
62
- const buf = Buffer.from(input.data, 'base64');
63
- return { base64: input.data, mimeType: input.mimeType ?? detectMimeType(buf) };
64
- }
65
- if (input.type === 'file') {
66
- const buf = fs.readFileSync(input.data);
67
- if (buf.length > this.config.maxImageSize!) {
68
- throw new Error(`Image exceeds max size: ${buf.length} > ${this.config.maxImageSize}`);
69
- }
70
- return { base64: buf.toString('base64'), mimeType: input.mimeType ?? detectMimeType(buf) };
71
- }
72
- // URL — return as-is for API
73
- return { base64: '', mimeType: input.mimeType ?? 'image/jpeg' };
74
- }
75
-
76
- /**
77
- * Prepare messages in OpenAI multimodal format.
78
- */
79
- prepareMessage(images: ImageInput[], text: string): Array<Record<string, unknown>> {
80
- const content: Array<Record<string, unknown>> = [];
81
- if (text) {
82
- content.push({ type: 'text', text });
83
- }
84
- for (const img of images) {
85
- if (img.type === 'url') {
86
- content.push({
87
- type: 'image_url',
88
- image_url: { url: img.data },
89
- });
90
- } else {
91
- // base64 or file — will need to be loaded first
92
- const mime = img.mimeType ?? 'image/jpeg';
93
- const data = img.type === 'base64' ? img.data : fs.readFileSync(img.data).toString('base64');
94
- content.push({
95
- type: 'image_url',
96
- image_url: { url: `data:${mime};base64,${data}` },
97
- });
98
- }
99
- }
100
- return [{ role: 'user', content }];
101
- }
102
-
103
- /**
104
- * Analyze an image with optional prompt.
105
- */
106
- async analyze(image: ImageInput, prompt?: string): Promise<VisionResult> {
107
- const messages = this.prepareMessage([image], prompt ?? 'Describe this image in detail.');
108
-
109
- if (!this.config.apiKey) {
110
- throw new Error('Vision API key not configured. Set OPENAI_API_KEY or pass apiKey in config.');
111
- }
112
-
113
- const response = await fetch(`${this.config.baseURL}/chat/completions`, {
114
- method: 'POST',
115
- headers: {
116
- 'Content-Type': 'application/json',
117
- 'Authorization': `Bearer ${this.config.apiKey}`,
118
- },
119
- body: JSON.stringify({
120
- model: this.config.model,
121
- messages,
122
- max_tokens: 1000,
123
- }),
124
- });
125
-
126
- if (!response.ok) {
127
- throw new Error(`Vision API error: ${response.status} ${response.statusText}`);
128
- }
129
-
130
- const data = await response.json() as any;
131
- const text = data.choices?.[0]?.message?.content ?? '';
132
-
133
- return {
134
- description: text,
135
- text_content: undefined,
136
- objects: [],
137
- };
138
- }
139
-
140
- /**
141
- * Extract text (OCR) from an image.
142
- */
143
- async extractText(image: ImageInput): Promise<string> {
144
- const result = await this.analyze(image, 'Extract all visible text from this image. Return only the text, nothing else.');
145
- return result.description;
146
- }
147
-
148
- /**
149
- * Compare multiple images.
150
- */
151
- async compareImages(images: ImageInput[], prompt?: string): Promise<string> {
152
- if (images.length < 2) throw new Error('Need at least 2 images to compare');
153
-
154
- const messages = this.prepareMessage(images, prompt ?? 'Compare these images and describe the differences.');
155
-
156
- if (!this.config.apiKey) {
157
- throw new Error('Vision API key not configured.');
158
- }
159
-
160
- const response = await fetch(`${this.config.baseURL}/chat/completions`, {
161
- method: 'POST',
162
- headers: {
163
- 'Content-Type': 'application/json',
164
- 'Authorization': `Bearer ${this.config.apiKey}`,
165
- },
166
- body: JSON.stringify({
167
- model: this.config.model,
168
- messages,
169
- max_tokens: 1000,
170
- }),
171
- });
172
-
173
- if (!response.ok) {
174
- throw new Error(`Vision API error: ${response.status} ${response.statusText}`);
175
- }
176
-
177
- const data = await response.json() as any;
178
- return data.choices?.[0]?.message?.content ?? '';
179
- }
180
- }
1
+ import * as fs from 'fs';
2
+ import * as path from 'path';
3
+
4
+ // ─── Types ───────────────────────────────────────────────────
5
+
6
+ export interface ImageInput {
7
+ type: 'base64' | 'url' | 'file';
8
+ data: string;
9
+ mimeType?: string;
10
+ }
11
+
12
+ export interface VisionResult {
13
+ description: string;
14
+ text_content?: string;
15
+ objects?: string[];
16
+ }
17
+
18
+ // ─── MIME detection from magic bytes ─────────────────────────
19
+
20
+ const MAGIC_BYTES: Array<{ bytes: number[]; mime: string }> = [
21
+ { bytes: [0x89, 0x50, 0x4E, 0x47], mime: 'image/png' },
22
+ { bytes: [0xFF, 0xD8, 0xFF], mime: 'image/jpeg' },
23
+ { bytes: [0x47, 0x49, 0x46, 0x38], mime: 'image/gif' },
24
+ { bytes: [0x52, 0x49, 0x46, 0x46], mime: 'image/webp' }, // RIFF header (WebP)
25
+ ];
26
+
27
+ export function detectMimeType(buffer: Buffer): string {
28
+ for (const { bytes, mime } of MAGIC_BYTES) {
29
+ if (bytes.every((b, i) => buffer[i] === b)) {
30
+ return mime;
31
+ }
32
+ }
33
+ return 'application/octet-stream';
34
+ }
35
+
36
+ // ─── VisionManager ───────────────────────────────────────────
37
+
38
+ export interface VisionManagerConfig {
39
+ model?: string;
40
+ apiKey?: string;
41
+ baseURL?: string;
42
+ maxImageSize?: number; // bytes, default 20MB
43
+ }
44
+
45
+ export class VisionManager {
46
+ private config: VisionManagerConfig;
47
+
48
+ constructor(config: VisionManagerConfig = {}) {
49
+ this.config = {
50
+ model: config.model ?? 'gpt-4o',
51
+ apiKey: config.apiKey ?? process.env.OPENAI_API_KEY ?? '',
52
+ baseURL: config.baseURL ?? 'https://api.openai.com/v1',
53
+ maxImageSize: config.maxImageSize ?? 20 * 1024 * 1024,
54
+ };
55
+ }
56
+
57
+ /**
58
+ * Load image data as base64, detecting MIME type.
59
+ */
60
+ async loadImage(input: ImageInput): Promise<{ base64: string; mimeType: string }> {
61
+ if (input.type === 'base64') {
62
+ const buf = Buffer.from(input.data, 'base64');
63
+ return { base64: input.data, mimeType: input.mimeType ?? detectMimeType(buf) };
64
+ }
65
+ if (input.type === 'file') {
66
+ const buf = fs.readFileSync(input.data);
67
+ if (buf.length > this.config.maxImageSize!) {
68
+ throw new Error(`Image exceeds max size: ${buf.length} > ${this.config.maxImageSize}`);
69
+ }
70
+ return { base64: buf.toString('base64'), mimeType: input.mimeType ?? detectMimeType(buf) };
71
+ }
72
+ // URL — return as-is for API
73
+ return { base64: '', mimeType: input.mimeType ?? 'image/jpeg' };
74
+ }
75
+
76
+ /**
77
+ * Prepare messages in OpenAI multimodal format.
78
+ */
79
+ prepareMessage(images: ImageInput[], text: string): Array<Record<string, unknown>> {
80
+ const content: Array<Record<string, unknown>> = [];
81
+ if (text) {
82
+ content.push({ type: 'text', text });
83
+ }
84
+ for (const img of images) {
85
+ if (img.type === 'url') {
86
+ content.push({
87
+ type: 'image_url',
88
+ image_url: { url: img.data },
89
+ });
90
+ } else {
91
+ // base64 or file — will need to be loaded first
92
+ const mime = img.mimeType ?? 'image/jpeg';
93
+ const data = img.type === 'base64' ? img.data : fs.readFileSync(img.data).toString('base64');
94
+ content.push({
95
+ type: 'image_url',
96
+ image_url: { url: `data:${mime};base64,${data}` },
97
+ });
98
+ }
99
+ }
100
+ return [{ role: 'user', content }];
101
+ }
102
+
103
+ /**
104
+ * Analyze an image with optional prompt.
105
+ */
106
+ async analyze(image: ImageInput, prompt?: string): Promise<VisionResult> {
107
+ const messages = this.prepareMessage([image], prompt ?? 'Describe this image in detail.');
108
+
109
+ if (!this.config.apiKey) {
110
+ throw new Error('Vision API key not configured. Set OPENAI_API_KEY or pass apiKey in config.');
111
+ }
112
+
113
+ const response = await fetch(`${this.config.baseURL}/chat/completions`, {
114
+ method: 'POST',
115
+ headers: {
116
+ 'Content-Type': 'application/json',
117
+ 'Authorization': `Bearer ${this.config.apiKey}`,
118
+ },
119
+ body: JSON.stringify({
120
+ model: this.config.model,
121
+ messages,
122
+ max_tokens: 1000,
123
+ }),
124
+ });
125
+
126
+ if (!response.ok) {
127
+ throw new Error(`Vision API error: ${response.status} ${response.statusText}`);
128
+ }
129
+
130
+ const data = await response.json() as any;
131
+ const text = data.choices?.[0]?.message?.content ?? '';
132
+
133
+ return {
134
+ description: text,
135
+ text_content: undefined,
136
+ objects: [],
137
+ };
138
+ }
139
+
140
+ /**
141
+ * Extract text (OCR) from an image.
142
+ */
143
+ async extractText(image: ImageInput): Promise<string> {
144
+ const result = await this.analyze(image, 'Extract all visible text from this image. Return only the text, nothing else.');
145
+ return result.description;
146
+ }
147
+
148
+ /**
149
+ * Compare multiple images.
150
+ */
151
+ async compareImages(images: ImageInput[], prompt?: string): Promise<string> {
152
+ if (images.length < 2) throw new Error('Need at least 2 images to compare');
153
+
154
+ const messages = this.prepareMessage(images, prompt ?? 'Compare these images and describe the differences.');
155
+
156
+ if (!this.config.apiKey) {
157
+ throw new Error('Vision API key not configured.');
158
+ }
159
+
160
+ const response = await fetch(`${this.config.baseURL}/chat/completions`, {
161
+ method: 'POST',
162
+ headers: {
163
+ 'Content-Type': 'application/json',
164
+ 'Authorization': `Bearer ${this.config.apiKey}`,
165
+ },
166
+ body: JSON.stringify({
167
+ model: this.config.model,
168
+ messages,
169
+ max_tokens: 1000,
170
+ }),
171
+ });
172
+
173
+ if (!response.ok) {
174
+ throw new Error(`Vision API error: ${response.status} ${response.statusText}`);
175
+ }
176
+
177
+ const data = await response.json() as any;
178
+ return data.choices?.[0]?.message?.content ?? '';
179
+ }
180
+ }