opc-agent 4.1.0 → 4.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/ISSUE_TEMPLATE/bug_report.md +20 -20
- package/.github/ISSUE_TEMPLATE/feature_request.md +14 -14
- package/.github/PULL_REQUEST_TEMPLATE.md +13 -13
- package/CHANGELOG.md +48 -48
- package/CONTRIBUTING.md +36 -36
- package/README.zh-CN.md +497 -497
- package/USABILITY-ISSUES.md +73 -0
- package/dist/channels/web.js +8 -2
- package/dist/channels/wechat.js +6 -6
- package/dist/cli.js +200 -85
- package/dist/core/runtime.js +37 -15
- package/dist/deploy/index.js +56 -56
- package/dist/doctor.d.ts +1 -0
- package/dist/doctor.js +105 -10
- package/dist/memory/deepbrain.d.ts +1 -1
- package/dist/memory/deepbrain.js +95 -4
- package/dist/scheduler/cron-engine.js +3 -36
- package/dist/studio/server.js +30 -1
- package/dist/studio-ui/index.html +230 -10
- package/dist/ui/components.js +105 -105
- package/examples/README.md +22 -22
- package/examples/basic-agent.ts +90 -90
- package/examples/brain-integration.ts +71 -71
- package/examples/multi-channel.ts +74 -74
- package/fix-sidebar.mjs +188 -188
- package/install.ps1 +154 -154
- package/install.sh +164 -164
- package/package.json +1 -1
- package/scripts/install.ps1 +31 -31
- package/scripts/install.sh +40 -40
- package/serve-studio.js +13 -13
- package/serve-test.js +25 -25
- package/src/channels/dingtalk.ts +46 -46
- package/src/channels/email.ts +351 -351
- package/src/channels/feishu.ts +349 -349
- package/src/channels/googlechat.ts +42 -42
- package/src/channels/imessage.ts +31 -31
- package/src/channels/irc.ts +82 -82
- package/src/channels/line.ts +32 -32
- package/src/channels/matrix.ts +33 -33
- package/src/channels/mattermost.ts +57 -57
- package/src/channels/msteams.ts +32 -32
- package/src/channels/nostr.ts +32 -32
- package/src/channels/qq.ts +33 -33
- package/src/channels/signal.ts +32 -32
- package/src/channels/sms.ts +33 -33
- package/src/channels/telegram.ts +616 -616
- package/src/channels/twitch.ts +65 -65
- package/src/channels/voice-call.ts +100 -100
- package/src/channels/web.ts +8 -2
- package/src/channels/websocket.ts +399 -399
- package/src/channels/wechat.ts +329 -329
- package/src/channels/whatsapp.ts +32 -32
- package/src/cli/chat.ts +99 -99
- package/src/cli/setup.ts +314 -314
- package/src/cli.ts +195 -92
- package/src/core/agent.ts +476 -476
- package/src/core/api-server.ts +277 -277
- package/src/core/audio.ts +98 -98
- package/src/core/collaboration.ts +275 -275
- package/src/core/context-discovery.ts +85 -85
- package/src/core/context-refs.ts +140 -140
- package/src/core/gateway.ts +106 -106
- package/src/core/heartbeat.ts +51 -51
- package/src/core/hooks.ts +105 -105
- package/src/core/ide-bridge.ts +133 -133
- package/src/core/node-network.ts +86 -86
- package/src/core/profiles.ts +122 -122
- package/src/core/runtime.ts +25 -0
- package/src/core/scheduler.ts +187 -187
- package/src/core/session-manager.ts +137 -137
- package/src/core/subagent.ts +98 -98
- package/src/core/vision.ts +180 -180
- package/src/core/workflow-graph.ts +365 -365
- package/src/daemon.ts +96 -96
- package/src/deploy/index.ts +255 -255
- package/src/doctor.ts +98 -11
- package/src/eval/index.ts +211 -211
- package/src/eval/suites/basic.json +16 -16
- package/src/eval/suites/memory.json +12 -12
- package/src/eval/suites/safety.json +14 -14
- package/src/hub/brain-seed.ts +54 -54
- package/src/hub/client.ts +60 -60
- package/src/mcp/servers/calculator-mcp.ts +65 -65
- package/src/mcp/servers/crypto-mcp.ts +73 -73
- package/src/mcp/servers/database-mcp.ts +72 -72
- package/src/mcp/servers/datetime-mcp.ts +69 -69
- package/src/mcp/servers/filesystem.ts +66 -66
- package/src/mcp/servers/github-mcp.ts +58 -58
- package/src/mcp/servers/index.ts +63 -63
- package/src/mcp/servers/json-mcp.ts +102 -102
- package/src/mcp/servers/memory-mcp.ts +56 -56
- package/src/mcp/servers/regex-mcp.ts +53 -53
- package/src/mcp/servers/web-mcp.ts +49 -49
- package/src/memory/context-compressor.ts +189 -189
- package/src/memory/deepbrain.ts +99 -5
- package/src/memory/seed-loader.ts +212 -212
- package/src/memory/user-profiler.ts +215 -215
- package/src/plugins/content-filter.ts +23 -23
- package/src/plugins/logger.ts +18 -18
- package/src/plugins/rate-limiter.ts +38 -38
- package/src/protocols/a2a/client.ts +132 -132
- package/src/protocols/a2a/index.ts +8 -8
- package/src/protocols/a2a/server.ts +333 -333
- package/src/protocols/a2a/types.ts +88 -88
- package/src/protocols/a2a/utils.ts +50 -50
- package/src/protocols/agui/client.ts +83 -83
- package/src/protocols/agui/index.ts +4 -4
- package/src/protocols/agui/server.ts +218 -218
- package/src/protocols/agui/types.ts +153 -153
- package/src/protocols/index.ts +2 -2
- package/src/protocols/mcp/agent-tools.ts +134 -134
- package/src/protocols/mcp/index.ts +8 -8
- package/src/protocols/mcp/server.ts +262 -262
- package/src/protocols/mcp/types.ts +69 -69
- package/src/providers/index.ts +632 -632
- package/src/publish/index.ts +376 -376
- package/src/scheduler/cron-engine.ts +191 -191
- package/src/scheduler/index.ts +2 -2
- package/src/schema/oad.ts +217 -217
- package/src/security/approval.ts +131 -131
- package/src/security/approvals.ts +143 -143
- package/src/security/elevated.ts +105 -105
- package/src/security/guardrails.ts +248 -248
- package/src/security/index.ts +9 -9
- package/src/security/keys.ts +87 -87
- package/src/security/secrets.ts +129 -129
- package/src/skills/builtin/index.ts +408 -408
- package/src/skills/marketplace.ts +113 -113
- package/src/skills/types.ts +42 -42
- package/src/studio/server.ts +31 -1
- package/src/studio/templates-data.ts +178 -178
- package/src/studio-ui/index.html +230 -10
- package/src/telemetry/index.ts +324 -324
- package/src/tools/builtin/browser.ts +299 -299
- package/src/tools/builtin/datetime.ts +41 -41
- package/src/tools/builtin/file.ts +107 -107
- package/src/tools/builtin/home-assistant.ts +116 -116
- package/src/tools/builtin/rl-tools.ts +243 -243
- package/src/tools/builtin/shell.ts +43 -43
- package/src/tools/builtin/vision.ts +64 -64
- package/src/tools/builtin/web-search.ts +126 -126
- package/src/tools/builtin/web.ts +35 -35
- package/src/tools/document-processor.ts +213 -213
- package/src/tools/image-generator.ts +150 -150
- package/src/tools/integrations/calendar.ts +73 -73
- package/src/tools/integrations/code-exec.ts +39 -39
- package/src/tools/integrations/csv-analyzer.ts +92 -92
- package/src/tools/integrations/database.ts +44 -44
- package/src/tools/integrations/email-send.ts +76 -76
- package/src/tools/integrations/git-tool.ts +42 -42
- package/src/tools/integrations/github-tool.ts +76 -76
- package/src/tools/integrations/image-gen.ts +56 -56
- package/src/tools/integrations/index.ts +92 -92
- package/src/tools/integrations/jira.ts +83 -83
- package/src/tools/integrations/notion.ts +71 -71
- package/src/tools/integrations/npm-tool.ts +48 -48
- package/src/tools/integrations/pdf-reader.ts +58 -58
- package/src/tools/integrations/slack.ts +65 -65
- package/src/tools/integrations/summarizer.ts +49 -49
- package/src/tools/integrations/translator.ts +48 -48
- package/src/tools/integrations/trello.ts +60 -60
- package/src/tools/integrations/vector-search.ts +42 -42
- package/src/tools/integrations/web-scraper.ts +47 -47
- package/src/tools/integrations/web-search.ts +58 -58
- package/src/tools/integrations/webhook.ts +38 -38
- package/src/tools/mcp-client.ts +131 -131
- package/src/tools/web-scraper.ts +179 -179
- package/src/tools/web-search.ts +180 -180
- package/src/ui/components.ts +127 -127
- package/srv-out.txt +1 -1
- package/templates/ecommerce-assistant/README.md +45 -45
- package/templates/ecommerce-assistant/oad.yaml +47 -47
- package/templates/tech-support/README.md +43 -43
- package/templates/tech-support/oad.yaml +45 -45
- package/test-agent/Dockerfile +9 -9
- package/test-agent/README.md +50 -50
- package/test-agent/agent.yaml +23 -23
- package/test-agent/docker-compose.yml +11 -11
- package/test-agent/oad.yaml +31 -31
- package/test-agent/package-lock.json +1492 -1492
- package/test-agent/package.json +17 -17
- package/test-agent/src/index.ts +24 -24
- package/test-agent/src/skills/echo.ts +15 -15
- package/test-agent/tsconfig.json +24 -24
- package/test-full.js +43 -43
- package/test-sidebar.js +22 -22
- package/test-studio3.js +75 -75
- package/test-studio4.js +41 -41
- package/tests/a2a-protocol.test.ts +285 -285
- package/tests/agui-protocol.test.ts +246 -246
- package/tests/api-server.test.ts +148 -148
- package/tests/approvals.test.ts +89 -89
- package/tests/audio.test.ts +40 -40
- package/tests/brain-seed-extended.test.ts +490 -490
- package/tests/brain-seed.test.ts +239 -239
- package/tests/browser.test.ts +179 -179
- package/tests/channels/discord.test.ts +79 -79
- package/tests/channels/email.test.ts +148 -148
- package/tests/channels/feishu.test.ts +123 -123
- package/tests/channels/telegram.test.ts +129 -129
- package/tests/channels/websocket.test.ts +53 -53
- package/tests/channels/wechat.test.ts +170 -170
- package/tests/channels-extra.test.ts +45 -45
- package/tests/chat-cli.test.ts +160 -160
- package/tests/cli.test.ts +46 -46
- package/tests/context-compressor.test.ts +172 -172
- package/tests/context-refs.test.ts +121 -121
- package/tests/cron-engine.test.ts +101 -101
- package/tests/daemon.test.ts +135 -135
- package/tests/deepbrain-wire.test.ts +234 -234
- package/tests/deploy-and-dag.test.ts +196 -196
- package/tests/doctor.test.ts +38 -38
- package/tests/document-processor.test.ts +69 -69
- package/tests/e2e-nocode.test.ts +442 -442
- package/tests/elevated.test.ts +69 -69
- package/tests/eval.test.ts +173 -173
- package/tests/gateway.test.ts +63 -63
- package/tests/guardrails.test.ts +177 -177
- package/tests/home-assistant.test.ts +40 -40
- package/tests/hooks.test.ts +79 -79
- package/tests/ide-bridge.test.ts +38 -38
- package/tests/image-generator.test.ts +84 -84
- package/tests/init-role.test.ts +124 -124
- package/tests/integrations.test.ts +249 -249
- package/tests/mcp-client.test.ts +92 -92
- package/tests/mcp-server.test.ts +178 -178
- package/tests/mcp-servers.test.ts +260 -260
- package/tests/node-network.test.ts +74 -74
- package/tests/plugin-a2a-enhanced.test.ts +230 -230
- package/tests/profiles.test.ts +61 -61
- package/tests/publish.test.ts +231 -231
- package/tests/rl-tools.test.ts +93 -93
- package/tests/sandbox-manager.test.ts +46 -46
- package/tests/scheduler.test.ts +200 -200
- package/tests/secrets.test.ts +107 -107
- package/tests/security-enhanced.test.ts +233 -233
- package/tests/settings-api.test.ts +148 -148
- package/tests/setup.test.ts +73 -73
- package/tests/subagent.test.ts +193 -193
- package/tests/telegram-discord.test.ts +60 -60
- package/tests/telemetry.test.ts +186 -186
- package/tests/user-profiler.test.ts +169 -169
- package/tests/v090-features.test.ts +254 -254
- package/tests/vision.test.ts +61 -61
- package/tests/voice-call.test.ts +47 -47
- package/tests/voice-enhanced.test.ts +169 -169
- package/tests/voice-interaction.test.ts +38 -38
- package/tests/web-search.test.ts +155 -155
- package/tests/workflow-graph.test.ts +279 -279
- package/tutorial/customer-service-agent/README.md +612 -612
- package/tutorial/customer-service-agent/SOUL.md +26 -26
- package/tutorial/customer-service-agent/agent.yaml +63 -63
- package/tutorial/customer-service-agent/package.json +19 -19
- package/tutorial/customer-service-agent/src/index.ts +69 -69
- package/tutorial/customer-service-agent/src/skills/faq.ts +27 -27
- package/tutorial/customer-service-agent/src/skills/ticket.ts +22 -22
- package/tutorial/customer-service-agent/tsconfig.json +14 -14
package/src/core/subagent.ts
CHANGED
|
@@ -1,98 +1,98 @@
|
|
|
1
|
-
import { BaseAgent } from './agent';
|
|
2
|
-
import { InMemoryStore } from '../memory';
|
|
3
|
-
import type { Message } from './types';
|
|
4
|
-
|
|
5
|
-
export interface SubAgentConfig {
|
|
6
|
-
name: string;
|
|
7
|
-
task: string;
|
|
8
|
-
systemPrompt?: string;
|
|
9
|
-
provider?: string;
|
|
10
|
-
model?: string;
|
|
11
|
-
timeout?: number;
|
|
12
|
-
isolated?: boolean;
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
export interface SubAgentResult {
|
|
16
|
-
id: string;
|
|
17
|
-
name: string;
|
|
18
|
-
status: 'completed' | 'failed' | 'timeout';
|
|
19
|
-
result: string;
|
|
20
|
-
duration: number;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
interface SubAgentEntry {
|
|
24
|
-
agent: BaseAgent;
|
|
25
|
-
status: string;
|
|
26
|
-
name: string;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
export class SubAgentManager {
|
|
30
|
-
private agents: Map<string, SubAgentEntry> = new Map();
|
|
31
|
-
|
|
32
|
-
async spawn(config: SubAgentConfig, parentProvider?: any): Promise<SubAgentResult> {
|
|
33
|
-
const id = `sub_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
|
|
34
|
-
const timeout = config.timeout ?? 300000;
|
|
35
|
-
const isolated = config.isolated !== false;
|
|
36
|
-
|
|
37
|
-
const agent = new BaseAgent({
|
|
38
|
-
name: config.name,
|
|
39
|
-
systemPrompt: config.systemPrompt ?? 'You are a helpful sub-agent.',
|
|
40
|
-
provider: config.provider ?? 'openai',
|
|
41
|
-
model: config.model,
|
|
42
|
-
memory: isolated ? new InMemoryStore() : undefined,
|
|
43
|
-
});
|
|
44
|
-
|
|
45
|
-
this.agents.set(id, { agent, status: 'running', name: config.name });
|
|
46
|
-
|
|
47
|
-
const message: Message = {
|
|
48
|
-
id: `msg_${Date.now()}`,
|
|
49
|
-
role: 'user',
|
|
50
|
-
content: config.task,
|
|
51
|
-
timestamp: Date.now(),
|
|
52
|
-
metadata: { subAgentId: id },
|
|
53
|
-
};
|
|
54
|
-
|
|
55
|
-
const start = Date.now();
|
|
56
|
-
|
|
57
|
-
try {
|
|
58
|
-
const result = await Promise.race([
|
|
59
|
-
agent.handleMessage(message),
|
|
60
|
-
new Promise<never>((_, reject) =>
|
|
61
|
-
setTimeout(() => reject(new Error('SubAgent timeout')), timeout),
|
|
62
|
-
),
|
|
63
|
-
]);
|
|
64
|
-
|
|
65
|
-
const duration = Date.now() - start;
|
|
66
|
-
this.agents.set(id, { agent, status: 'completed', name: config.name });
|
|
67
|
-
|
|
68
|
-
return { id, name: config.name, status: 'completed', result: result.content, duration };
|
|
69
|
-
} catch (err) {
|
|
70
|
-
const duration = Date.now() - start;
|
|
71
|
-
const isTimeout = (err as Error).message.includes('timeout');
|
|
72
|
-
const status = isTimeout ? 'timeout' : 'failed';
|
|
73
|
-
this.agents.set(id, { agent, status, name: config.name });
|
|
74
|
-
|
|
75
|
-
return { id, name: config.name, status, result: (err as Error).message, duration };
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
async spawnParallel(configs: SubAgentConfig[], parentProvider?: any): Promise<SubAgentResult[]> {
|
|
80
|
-
return Promise.all(configs.map((c) => this.spawn(c, parentProvider)));
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
list(): Array<{ id: string; name: string; status: string }> {
|
|
84
|
-
return Array.from(this.agents.entries()).map(([id, entry]) => ({
|
|
85
|
-
id,
|
|
86
|
-
name: entry.name,
|
|
87
|
-
status: entry.status,
|
|
88
|
-
}));
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
kill(id: string): boolean {
|
|
92
|
-
const entry = this.agents.get(id);
|
|
93
|
-
if (!entry) return false;
|
|
94
|
-
entry.status = 'killed';
|
|
95
|
-
this.agents.set(id, entry);
|
|
96
|
-
return true;
|
|
97
|
-
}
|
|
98
|
-
}
|
|
1
|
+
import { BaseAgent } from './agent';
|
|
2
|
+
import { InMemoryStore } from '../memory';
|
|
3
|
+
import type { Message } from './types';
|
|
4
|
+
|
|
5
|
+
export interface SubAgentConfig {
|
|
6
|
+
name: string;
|
|
7
|
+
task: string;
|
|
8
|
+
systemPrompt?: string;
|
|
9
|
+
provider?: string;
|
|
10
|
+
model?: string;
|
|
11
|
+
timeout?: number;
|
|
12
|
+
isolated?: boolean;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export interface SubAgentResult {
|
|
16
|
+
id: string;
|
|
17
|
+
name: string;
|
|
18
|
+
status: 'completed' | 'failed' | 'timeout';
|
|
19
|
+
result: string;
|
|
20
|
+
duration: number;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
interface SubAgentEntry {
|
|
24
|
+
agent: BaseAgent;
|
|
25
|
+
status: string;
|
|
26
|
+
name: string;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export class SubAgentManager {
|
|
30
|
+
private agents: Map<string, SubAgentEntry> = new Map();
|
|
31
|
+
|
|
32
|
+
async spawn(config: SubAgentConfig, parentProvider?: any): Promise<SubAgentResult> {
|
|
33
|
+
const id = `sub_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
|
|
34
|
+
const timeout = config.timeout ?? 300000;
|
|
35
|
+
const isolated = config.isolated !== false;
|
|
36
|
+
|
|
37
|
+
const agent = new BaseAgent({
|
|
38
|
+
name: config.name,
|
|
39
|
+
systemPrompt: config.systemPrompt ?? 'You are a helpful sub-agent.',
|
|
40
|
+
provider: config.provider ?? 'openai',
|
|
41
|
+
model: config.model,
|
|
42
|
+
memory: isolated ? new InMemoryStore() : undefined,
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
this.agents.set(id, { agent, status: 'running', name: config.name });
|
|
46
|
+
|
|
47
|
+
const message: Message = {
|
|
48
|
+
id: `msg_${Date.now()}`,
|
|
49
|
+
role: 'user',
|
|
50
|
+
content: config.task,
|
|
51
|
+
timestamp: Date.now(),
|
|
52
|
+
metadata: { subAgentId: id },
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
const start = Date.now();
|
|
56
|
+
|
|
57
|
+
try {
|
|
58
|
+
const result = await Promise.race([
|
|
59
|
+
agent.handleMessage(message),
|
|
60
|
+
new Promise<never>((_, reject) =>
|
|
61
|
+
setTimeout(() => reject(new Error('SubAgent timeout')), timeout),
|
|
62
|
+
),
|
|
63
|
+
]);
|
|
64
|
+
|
|
65
|
+
const duration = Date.now() - start;
|
|
66
|
+
this.agents.set(id, { agent, status: 'completed', name: config.name });
|
|
67
|
+
|
|
68
|
+
return { id, name: config.name, status: 'completed', result: result.content, duration };
|
|
69
|
+
} catch (err) {
|
|
70
|
+
const duration = Date.now() - start;
|
|
71
|
+
const isTimeout = (err as Error).message.includes('timeout');
|
|
72
|
+
const status = isTimeout ? 'timeout' : 'failed';
|
|
73
|
+
this.agents.set(id, { agent, status, name: config.name });
|
|
74
|
+
|
|
75
|
+
return { id, name: config.name, status, result: (err as Error).message, duration };
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
async spawnParallel(configs: SubAgentConfig[], parentProvider?: any): Promise<SubAgentResult[]> {
|
|
80
|
+
return Promise.all(configs.map((c) => this.spawn(c, parentProvider)));
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
list(): Array<{ id: string; name: string; status: string }> {
|
|
84
|
+
return Array.from(this.agents.entries()).map(([id, entry]) => ({
|
|
85
|
+
id,
|
|
86
|
+
name: entry.name,
|
|
87
|
+
status: entry.status,
|
|
88
|
+
}));
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
kill(id: string): boolean {
|
|
92
|
+
const entry = this.agents.get(id);
|
|
93
|
+
if (!entry) return false;
|
|
94
|
+
entry.status = 'killed';
|
|
95
|
+
this.agents.set(id, entry);
|
|
96
|
+
return true;
|
|
97
|
+
}
|
|
98
|
+
}
|
package/src/core/vision.ts
CHANGED
|
@@ -1,180 +1,180 @@
|
|
|
1
|
-
import * as fs from 'fs';
|
|
2
|
-
import * as path from 'path';
|
|
3
|
-
|
|
4
|
-
// ─── Types ───────────────────────────────────────────────────
|
|
5
|
-
|
|
6
|
-
export interface ImageInput {
|
|
7
|
-
type: 'base64' | 'url' | 'file';
|
|
8
|
-
data: string;
|
|
9
|
-
mimeType?: string;
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
export interface VisionResult {
|
|
13
|
-
description: string;
|
|
14
|
-
text_content?: string;
|
|
15
|
-
objects?: string[];
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
// ─── MIME detection from magic bytes ─────────────────────────
|
|
19
|
-
|
|
20
|
-
const MAGIC_BYTES: Array<{ bytes: number[]; mime: string }> = [
|
|
21
|
-
{ bytes: [0x89, 0x50, 0x4E, 0x47], mime: 'image/png' },
|
|
22
|
-
{ bytes: [0xFF, 0xD8, 0xFF], mime: 'image/jpeg' },
|
|
23
|
-
{ bytes: [0x47, 0x49, 0x46, 0x38], mime: 'image/gif' },
|
|
24
|
-
{ bytes: [0x52, 0x49, 0x46, 0x46], mime: 'image/webp' }, // RIFF header (WebP)
|
|
25
|
-
];
|
|
26
|
-
|
|
27
|
-
export function detectMimeType(buffer: Buffer): string {
|
|
28
|
-
for (const { bytes, mime } of MAGIC_BYTES) {
|
|
29
|
-
if (bytes.every((b, i) => buffer[i] === b)) {
|
|
30
|
-
return mime;
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
return 'application/octet-stream';
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
// ─── VisionManager ───────────────────────────────────────────
|
|
37
|
-
|
|
38
|
-
export interface VisionManagerConfig {
|
|
39
|
-
model?: string;
|
|
40
|
-
apiKey?: string;
|
|
41
|
-
baseURL?: string;
|
|
42
|
-
maxImageSize?: number; // bytes, default 20MB
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
export class VisionManager {
|
|
46
|
-
private config: VisionManagerConfig;
|
|
47
|
-
|
|
48
|
-
constructor(config: VisionManagerConfig = {}) {
|
|
49
|
-
this.config = {
|
|
50
|
-
model: config.model ?? 'gpt-4o',
|
|
51
|
-
apiKey: config.apiKey ?? process.env.OPENAI_API_KEY ?? '',
|
|
52
|
-
baseURL: config.baseURL ?? 'https://api.openai.com/v1',
|
|
53
|
-
maxImageSize: config.maxImageSize ?? 20 * 1024 * 1024,
|
|
54
|
-
};
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
/**
|
|
58
|
-
* Load image data as base64, detecting MIME type.
|
|
59
|
-
*/
|
|
60
|
-
async loadImage(input: ImageInput): Promise<{ base64: string; mimeType: string }> {
|
|
61
|
-
if (input.type === 'base64') {
|
|
62
|
-
const buf = Buffer.from(input.data, 'base64');
|
|
63
|
-
return { base64: input.data, mimeType: input.mimeType ?? detectMimeType(buf) };
|
|
64
|
-
}
|
|
65
|
-
if (input.type === 'file') {
|
|
66
|
-
const buf = fs.readFileSync(input.data);
|
|
67
|
-
if (buf.length > this.config.maxImageSize!) {
|
|
68
|
-
throw new Error(`Image exceeds max size: ${buf.length} > ${this.config.maxImageSize}`);
|
|
69
|
-
}
|
|
70
|
-
return { base64: buf.toString('base64'), mimeType: input.mimeType ?? detectMimeType(buf) };
|
|
71
|
-
}
|
|
72
|
-
// URL — return as-is for API
|
|
73
|
-
return { base64: '', mimeType: input.mimeType ?? 'image/jpeg' };
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
/**
|
|
77
|
-
* Prepare messages in OpenAI multimodal format.
|
|
78
|
-
*/
|
|
79
|
-
prepareMessage(images: ImageInput[], text: string): Array<Record<string, unknown>> {
|
|
80
|
-
const content: Array<Record<string, unknown>> = [];
|
|
81
|
-
if (text) {
|
|
82
|
-
content.push({ type: 'text', text });
|
|
83
|
-
}
|
|
84
|
-
for (const img of images) {
|
|
85
|
-
if (img.type === 'url') {
|
|
86
|
-
content.push({
|
|
87
|
-
type: 'image_url',
|
|
88
|
-
image_url: { url: img.data },
|
|
89
|
-
});
|
|
90
|
-
} else {
|
|
91
|
-
// base64 or file — will need to be loaded first
|
|
92
|
-
const mime = img.mimeType ?? 'image/jpeg';
|
|
93
|
-
const data = img.type === 'base64' ? img.data : fs.readFileSync(img.data).toString('base64');
|
|
94
|
-
content.push({
|
|
95
|
-
type: 'image_url',
|
|
96
|
-
image_url: { url: `data:${mime};base64,${data}` },
|
|
97
|
-
});
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
return [{ role: 'user', content }];
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
/**
|
|
104
|
-
* Analyze an image with optional prompt.
|
|
105
|
-
*/
|
|
106
|
-
async analyze(image: ImageInput, prompt?: string): Promise<VisionResult> {
|
|
107
|
-
const messages = this.prepareMessage([image], prompt ?? 'Describe this image in detail.');
|
|
108
|
-
|
|
109
|
-
if (!this.config.apiKey) {
|
|
110
|
-
throw new Error('Vision API key not configured. Set OPENAI_API_KEY or pass apiKey in config.');
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
const response = await fetch(`${this.config.baseURL}/chat/completions`, {
|
|
114
|
-
method: 'POST',
|
|
115
|
-
headers: {
|
|
116
|
-
'Content-Type': 'application/json',
|
|
117
|
-
'Authorization': `Bearer ${this.config.apiKey}`,
|
|
118
|
-
},
|
|
119
|
-
body: JSON.stringify({
|
|
120
|
-
model: this.config.model,
|
|
121
|
-
messages,
|
|
122
|
-
max_tokens: 1000,
|
|
123
|
-
}),
|
|
124
|
-
});
|
|
125
|
-
|
|
126
|
-
if (!response.ok) {
|
|
127
|
-
throw new Error(`Vision API error: ${response.status} ${response.statusText}`);
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
const data = await response.json() as any;
|
|
131
|
-
const text = data.choices?.[0]?.message?.content ?? '';
|
|
132
|
-
|
|
133
|
-
return {
|
|
134
|
-
description: text,
|
|
135
|
-
text_content: undefined,
|
|
136
|
-
objects: [],
|
|
137
|
-
};
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
/**
|
|
141
|
-
* Extract text (OCR) from an image.
|
|
142
|
-
*/
|
|
143
|
-
async extractText(image: ImageInput): Promise<string> {
|
|
144
|
-
const result = await this.analyze(image, 'Extract all visible text from this image. Return only the text, nothing else.');
|
|
145
|
-
return result.description;
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
/**
|
|
149
|
-
* Compare multiple images.
|
|
150
|
-
*/
|
|
151
|
-
async compareImages(images: ImageInput[], prompt?: string): Promise<string> {
|
|
152
|
-
if (images.length < 2) throw new Error('Need at least 2 images to compare');
|
|
153
|
-
|
|
154
|
-
const messages = this.prepareMessage(images, prompt ?? 'Compare these images and describe the differences.');
|
|
155
|
-
|
|
156
|
-
if (!this.config.apiKey) {
|
|
157
|
-
throw new Error('Vision API key not configured.');
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
const response = await fetch(`${this.config.baseURL}/chat/completions`, {
|
|
161
|
-
method: 'POST',
|
|
162
|
-
headers: {
|
|
163
|
-
'Content-Type': 'application/json',
|
|
164
|
-
'Authorization': `Bearer ${this.config.apiKey}`,
|
|
165
|
-
},
|
|
166
|
-
body: JSON.stringify({
|
|
167
|
-
model: this.config.model,
|
|
168
|
-
messages,
|
|
169
|
-
max_tokens: 1000,
|
|
170
|
-
}),
|
|
171
|
-
});
|
|
172
|
-
|
|
173
|
-
if (!response.ok) {
|
|
174
|
-
throw new Error(`Vision API error: ${response.status} ${response.statusText}`);
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
const data = await response.json() as any;
|
|
178
|
-
return data.choices?.[0]?.message?.content ?? '';
|
|
179
|
-
}
|
|
180
|
-
}
|
|
1
|
+
import * as fs from 'fs';
|
|
2
|
+
import * as path from 'path';
|
|
3
|
+
|
|
4
|
+
// ─── Types ───────────────────────────────────────────────────
|
|
5
|
+
|
|
6
|
+
export interface ImageInput {
|
|
7
|
+
type: 'base64' | 'url' | 'file';
|
|
8
|
+
data: string;
|
|
9
|
+
mimeType?: string;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface VisionResult {
|
|
13
|
+
description: string;
|
|
14
|
+
text_content?: string;
|
|
15
|
+
objects?: string[];
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
// ─── MIME detection from magic bytes ─────────────────────────
|
|
19
|
+
|
|
20
|
+
const MAGIC_BYTES: Array<{ bytes: number[]; mime: string }> = [
|
|
21
|
+
{ bytes: [0x89, 0x50, 0x4E, 0x47], mime: 'image/png' },
|
|
22
|
+
{ bytes: [0xFF, 0xD8, 0xFF], mime: 'image/jpeg' },
|
|
23
|
+
{ bytes: [0x47, 0x49, 0x46, 0x38], mime: 'image/gif' },
|
|
24
|
+
{ bytes: [0x52, 0x49, 0x46, 0x46], mime: 'image/webp' }, // RIFF header (WebP)
|
|
25
|
+
];
|
|
26
|
+
|
|
27
|
+
export function detectMimeType(buffer: Buffer): string {
|
|
28
|
+
for (const { bytes, mime } of MAGIC_BYTES) {
|
|
29
|
+
if (bytes.every((b, i) => buffer[i] === b)) {
|
|
30
|
+
return mime;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
return 'application/octet-stream';
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// ─── VisionManager ───────────────────────────────────────────
|
|
37
|
+
|
|
38
|
+
export interface VisionManagerConfig {
|
|
39
|
+
model?: string;
|
|
40
|
+
apiKey?: string;
|
|
41
|
+
baseURL?: string;
|
|
42
|
+
maxImageSize?: number; // bytes, default 20MB
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export class VisionManager {
|
|
46
|
+
private config: VisionManagerConfig;
|
|
47
|
+
|
|
48
|
+
constructor(config: VisionManagerConfig = {}) {
|
|
49
|
+
this.config = {
|
|
50
|
+
model: config.model ?? 'gpt-4o',
|
|
51
|
+
apiKey: config.apiKey ?? process.env.OPENAI_API_KEY ?? '',
|
|
52
|
+
baseURL: config.baseURL ?? 'https://api.openai.com/v1',
|
|
53
|
+
maxImageSize: config.maxImageSize ?? 20 * 1024 * 1024,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Load image data as base64, detecting MIME type.
|
|
59
|
+
*/
|
|
60
|
+
async loadImage(input: ImageInput): Promise<{ base64: string; mimeType: string }> {
|
|
61
|
+
if (input.type === 'base64') {
|
|
62
|
+
const buf = Buffer.from(input.data, 'base64');
|
|
63
|
+
return { base64: input.data, mimeType: input.mimeType ?? detectMimeType(buf) };
|
|
64
|
+
}
|
|
65
|
+
if (input.type === 'file') {
|
|
66
|
+
const buf = fs.readFileSync(input.data);
|
|
67
|
+
if (buf.length > this.config.maxImageSize!) {
|
|
68
|
+
throw new Error(`Image exceeds max size: ${buf.length} > ${this.config.maxImageSize}`);
|
|
69
|
+
}
|
|
70
|
+
return { base64: buf.toString('base64'), mimeType: input.mimeType ?? detectMimeType(buf) };
|
|
71
|
+
}
|
|
72
|
+
// URL — return as-is for API
|
|
73
|
+
return { base64: '', mimeType: input.mimeType ?? 'image/jpeg' };
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Prepare messages in OpenAI multimodal format.
|
|
78
|
+
*/
|
|
79
|
+
prepareMessage(images: ImageInput[], text: string): Array<Record<string, unknown>> {
|
|
80
|
+
const content: Array<Record<string, unknown>> = [];
|
|
81
|
+
if (text) {
|
|
82
|
+
content.push({ type: 'text', text });
|
|
83
|
+
}
|
|
84
|
+
for (const img of images) {
|
|
85
|
+
if (img.type === 'url') {
|
|
86
|
+
content.push({
|
|
87
|
+
type: 'image_url',
|
|
88
|
+
image_url: { url: img.data },
|
|
89
|
+
});
|
|
90
|
+
} else {
|
|
91
|
+
// base64 or file — will need to be loaded first
|
|
92
|
+
const mime = img.mimeType ?? 'image/jpeg';
|
|
93
|
+
const data = img.type === 'base64' ? img.data : fs.readFileSync(img.data).toString('base64');
|
|
94
|
+
content.push({
|
|
95
|
+
type: 'image_url',
|
|
96
|
+
image_url: { url: `data:${mime};base64,${data}` },
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return [{ role: 'user', content }];
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Analyze an image with optional prompt.
|
|
105
|
+
*/
|
|
106
|
+
async analyze(image: ImageInput, prompt?: string): Promise<VisionResult> {
|
|
107
|
+
const messages = this.prepareMessage([image], prompt ?? 'Describe this image in detail.');
|
|
108
|
+
|
|
109
|
+
if (!this.config.apiKey) {
|
|
110
|
+
throw new Error('Vision API key not configured. Set OPENAI_API_KEY or pass apiKey in config.');
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const response = await fetch(`${this.config.baseURL}/chat/completions`, {
|
|
114
|
+
method: 'POST',
|
|
115
|
+
headers: {
|
|
116
|
+
'Content-Type': 'application/json',
|
|
117
|
+
'Authorization': `Bearer ${this.config.apiKey}`,
|
|
118
|
+
},
|
|
119
|
+
body: JSON.stringify({
|
|
120
|
+
model: this.config.model,
|
|
121
|
+
messages,
|
|
122
|
+
max_tokens: 1000,
|
|
123
|
+
}),
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
if (!response.ok) {
|
|
127
|
+
throw new Error(`Vision API error: ${response.status} ${response.statusText}`);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
const data = await response.json() as any;
|
|
131
|
+
const text = data.choices?.[0]?.message?.content ?? '';
|
|
132
|
+
|
|
133
|
+
return {
|
|
134
|
+
description: text,
|
|
135
|
+
text_content: undefined,
|
|
136
|
+
objects: [],
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Extract text (OCR) from an image.
|
|
142
|
+
*/
|
|
143
|
+
async extractText(image: ImageInput): Promise<string> {
|
|
144
|
+
const result = await this.analyze(image, 'Extract all visible text from this image. Return only the text, nothing else.');
|
|
145
|
+
return result.description;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Compare multiple images.
|
|
150
|
+
*/
|
|
151
|
+
async compareImages(images: ImageInput[], prompt?: string): Promise<string> {
|
|
152
|
+
if (images.length < 2) throw new Error('Need at least 2 images to compare');
|
|
153
|
+
|
|
154
|
+
const messages = this.prepareMessage(images, prompt ?? 'Compare these images and describe the differences.');
|
|
155
|
+
|
|
156
|
+
if (!this.config.apiKey) {
|
|
157
|
+
throw new Error('Vision API key not configured.');
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
const response = await fetch(`${this.config.baseURL}/chat/completions`, {
|
|
161
|
+
method: 'POST',
|
|
162
|
+
headers: {
|
|
163
|
+
'Content-Type': 'application/json',
|
|
164
|
+
'Authorization': `Bearer ${this.config.apiKey}`,
|
|
165
|
+
},
|
|
166
|
+
body: JSON.stringify({
|
|
167
|
+
model: this.config.model,
|
|
168
|
+
messages,
|
|
169
|
+
max_tokens: 1000,
|
|
170
|
+
}),
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
if (!response.ok) {
|
|
174
|
+
throw new Error(`Vision API error: ${response.status} ${response.statusText}`);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
const data = await response.json() as any;
|
|
178
|
+
return data.choices?.[0]?.message?.content ?? '';
|
|
179
|
+
}
|
|
180
|
+
}
|