@johpaz/hive-sdk 0.0.12 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. package/.github/CODEOWNERS +9 -0
  2. package/.github/workflows/publish.yml +89 -0
  3. package/.github/workflows/version-bump.yml +102 -0
  4. package/CHANGELOG.md +38 -0
  5. package/README.md +158 -0
  6. package/bun.lock +543 -0
  7. package/bunfig.toml +7 -0
  8. package/docs/API-AGENTS.md +316 -0
  9. package/docs/API-CONTEXT-COMPILER.md +252 -0
  10. package/docs/API-DAG-SCHEDULER.md +273 -0
  11. package/docs/API-TOOLS-SKILLS-CHANNELS.md +293 -0
  12. package/docs/API-WORKERS-EVENTS.md +152 -0
  13. package/docs/INDEX.md +141 -0
  14. package/docs/README.md +68 -0
  15. package/package.json +54 -105
  16. package/packages/cli/package.json +17 -0
  17. package/packages/cli/src/commands/init.ts +56 -0
  18. package/packages/cli/src/commands/run.ts +45 -0
  19. package/packages/cli/src/commands/test.ts +42 -0
  20. package/packages/cli/src/commands/trace.ts +55 -0
  21. package/packages/cli/src/index.ts +43 -0
  22. package/packages/core/package.json +58 -0
  23. package/packages/core/src/ace/Curator.ts +158 -0
  24. package/packages/core/src/ace/Reflector.ts +200 -0
  25. package/packages/core/src/ace/Tracer.ts +100 -0
  26. package/packages/core/src/ace/index.ts +4 -0
  27. package/packages/core/src/agent/AgentRunner.ts +699 -0
  28. package/packages/core/src/agent/Compaction.ts +221 -0
  29. package/packages/core/src/agent/ContextCompiler.ts +567 -0
  30. package/packages/core/src/agent/ContextGuard.ts +91 -0
  31. package/packages/core/src/agent/ConversationStore.ts +244 -0
  32. package/packages/core/src/agent/Hooks.ts +166 -0
  33. package/packages/core/src/agent/NativeTools.ts +31 -0
  34. package/packages/core/src/agent/PromptBuilder.ts +169 -0
  35. package/packages/core/src/agent/Service.ts +267 -0
  36. package/packages/core/src/agent/StuckLoop.ts +133 -0
  37. package/packages/core/src/agent/index.ts +12 -0
  38. package/packages/core/src/agent/providers/LLMClient.ts +149 -0
  39. package/packages/core/src/agent/providers/anthropic.ts +212 -0
  40. package/packages/core/src/agent/providers/gemini.ts +215 -0
  41. package/packages/core/src/agent/providers/index.ts +199 -0
  42. package/packages/core/src/agent/providers/interface.ts +195 -0
  43. package/packages/core/src/agent/providers/ollama.ts +175 -0
  44. package/packages/core/src/agent/providers/openai-compat.ts +231 -0
  45. package/packages/core/src/agent/providers.ts +1 -0
  46. package/packages/core/src/agent/selectors/PlaybookSelector.ts +147 -0
  47. package/packages/core/src/agent/selectors/SkillSelector.ts +478 -0
  48. package/packages/core/src/agent/selectors/ToolSelector.ts +577 -0
  49. package/packages/core/src/agent/selectors/index.ts +6 -0
  50. package/packages/core/src/api/createAgent.test.ts +48 -0
  51. package/packages/core/src/api/createAgent.ts +122 -0
  52. package/packages/core/src/api/index.ts +2 -0
  53. package/packages/core/src/canvas/CanvasManager.ts +390 -0
  54. package/packages/core/src/canvas/a2ui-tools.ts +255 -0
  55. package/packages/core/src/canvas/canvas-tools.ts +448 -0
  56. package/packages/core/src/canvas/emitter.ts +149 -0
  57. package/packages/core/src/canvas/index.ts +6 -0
  58. package/packages/core/src/config/index.ts +2 -0
  59. package/packages/core/src/config/loader.ts +554 -0
  60. package/packages/core/src/ethics/EthicsGuard.test.ts +54 -0
  61. package/packages/core/src/ethics/EthicsGuard.ts +66 -0
  62. package/packages/core/src/ethics/index.ts +2 -0
  63. package/packages/core/src/gateway/channel-notify.test.ts +14 -0
  64. package/packages/core/src/gateway/channel-notify.ts +12 -0
  65. package/packages/core/src/gateway/index.ts +1 -0
  66. package/packages/core/src/index.ts +37 -0
  67. package/packages/core/src/mcp/MCPClient.ts +439 -0
  68. package/packages/core/src/mcp/MCPToolAdapter.ts +176 -0
  69. package/packages/core/src/mcp/config.ts +13 -0
  70. package/packages/core/src/mcp/hot-reload.ts +147 -0
  71. package/packages/core/src/mcp/index.ts +11 -0
  72. package/packages/core/src/mcp/logger.ts +42 -0
  73. package/packages/core/src/mcp/singleton.ts +21 -0
  74. package/packages/core/src/mcp/transports/index.ts +67 -0
  75. package/packages/core/src/mcp/transports/sse.ts +241 -0
  76. package/packages/core/src/mcp/transports/websocket.ts +159 -0
  77. package/packages/core/src/memory/Scratchpad.test.ts +47 -0
  78. package/packages/core/src/memory/Scratchpad.ts +37 -0
  79. package/packages/core/src/memory/Storage.ts +6 -0
  80. package/packages/core/src/memory/index.ts +2 -0
  81. package/packages/core/src/multimodal/VisionService.ts +293 -0
  82. package/packages/core/src/multimodal/index.ts +2 -0
  83. package/packages/core/src/multimodal/types.ts +28 -0
  84. package/packages/core/src/security/Pairing.ts +250 -0
  85. package/packages/core/src/security/RateLimit.ts +270 -0
  86. package/packages/core/src/security/index.ts +4 -0
  87. package/packages/core/src/skills/SkillLoader.ts +388 -0
  88. package/packages/core/src/skills/bundled-data.generated.ts +3332 -0
  89. package/packages/core/src/skills/defineSkill.ts +18 -0
  90. package/packages/core/src/skills/index.ts +4 -0
  91. package/packages/core/src/state/index.ts +2 -0
  92. package/packages/core/src/state/store.ts +312 -0
  93. package/packages/core/src/storage/SQLiteStorage.ts +407 -0
  94. package/packages/core/src/storage/crypto.ts +101 -0
  95. package/packages/core/src/storage/index.ts +10 -0
  96. package/packages/core/src/storage/onboarding.ts +1603 -0
  97. package/packages/core/src/storage/schema.ts +689 -0
  98. package/packages/core/src/storage/seed.ts +740 -0
  99. package/packages/core/src/storage/usage.ts +374 -0
  100. package/packages/core/src/swarm/AgentBus.ts +460 -0
  101. package/packages/core/src/swarm/AgentExecutor.ts +53 -0
  102. package/packages/core/src/swarm/Coordinator.ts +251 -0
  103. package/packages/core/src/swarm/EventBridge.ts +122 -0
  104. package/packages/core/src/swarm/EventBus.ts +169 -0
  105. package/packages/core/src/swarm/TaskGraph.ts +192 -0
  106. package/packages/core/src/swarm/TaskNode.ts +97 -0
  107. package/packages/core/src/swarm/TaskResult.ts +22 -0
  108. package/packages/core/src/swarm/WorkerPool.ts +236 -0
  109. package/packages/core/src/swarm/errors.ts +37 -0
  110. package/packages/core/src/swarm/index.ts +30 -0
  111. package/packages/core/src/swarm/presets/HiveLearnPreset.ts +99 -0
  112. package/packages/core/src/swarm/presets/ResearchPreset.ts +97 -0
  113. package/packages/core/src/swarm/presets/index.ts +4 -0
  114. package/packages/core/src/swarm/strategies/ParallelStrategy.ts +21 -0
  115. package/packages/core/src/swarm/strategies/PriorityStrategy.ts +46 -0
  116. package/packages/core/src/swarm/strategies/index.ts +3 -0
  117. package/packages/core/src/swarm/types.ts +164 -0
  118. package/packages/core/src/tools/ToolExecutor.ts +58 -0
  119. package/packages/core/src/tools/ToolRegistry.test.ts +98 -0
  120. package/packages/core/src/tools/ToolRegistry.ts +61 -0
  121. package/packages/core/src/tools/agents/get-available-models.ts +118 -0
  122. package/packages/core/src/tools/agents/index.ts +715 -0
  123. package/packages/core/src/tools/bridge-events.ts +26 -0
  124. package/packages/core/src/tools/canvas/index.ts +375 -0
  125. package/packages/core/src/tools/cli/index.ts +142 -0
  126. package/packages/core/src/tools/codebridge/index.ts +342 -0
  127. package/packages/core/src/tools/core/index.ts +476 -0
  128. package/packages/core/src/tools/cron/index.ts +626 -0
  129. package/packages/core/src/tools/filesystem/fs-delete.ts +78 -0
  130. package/packages/core/src/tools/filesystem/fs-edit.ts +106 -0
  131. package/packages/core/src/tools/filesystem/fs-exists.ts +63 -0
  132. package/packages/core/src/tools/filesystem/fs-glob.ts +108 -0
  133. package/packages/core/src/tools/filesystem/fs-list.ts +129 -0
  134. package/packages/core/src/tools/filesystem/fs-read.ts +72 -0
  135. package/packages/core/src/tools/filesystem/fs-write.ts +67 -0
  136. package/packages/core/src/tools/filesystem/index.ts +34 -0
  137. package/packages/core/src/tools/filesystem/workspace-guard.ts +62 -0
  138. package/packages/core/src/tools/index.ts +231 -0
  139. package/packages/core/src/tools/meeting/index.ts +363 -0
  140. package/packages/core/src/tools/office/index.ts +47 -0
  141. package/packages/core/src/tools/office/office-escribir-docx.ts +192 -0
  142. package/packages/core/src/tools/office/office-escribir-pdf.ts +172 -0
  143. package/packages/core/src/tools/office/office-escribir-pptx.ts +174 -0
  144. package/packages/core/src/tools/office/office-escribir-xlsx.ts +116 -0
  145. package/packages/core/src/tools/office/office-leer-docx.ts +93 -0
  146. package/packages/core/src/tools/office/office-leer-pdf.ts +114 -0
  147. package/packages/core/src/tools/office/office-leer-pptx.ts +136 -0
  148. package/packages/core/src/tools/office/office-leer-xlsx.ts +124 -0
  149. package/packages/core/src/tools/projects/index.ts +37 -0
  150. package/packages/core/src/tools/projects/project-create.ts +94 -0
  151. package/packages/core/src/tools/projects/project-done.ts +66 -0
  152. package/packages/core/src/tools/projects/project-fail.ts +66 -0
  153. package/packages/core/src/tools/projects/project-list.ts +96 -0
  154. package/packages/core/src/tools/projects/project-update.ts +72 -0
  155. package/packages/core/src/tools/projects/task-create.ts +68 -0
  156. package/packages/core/src/tools/projects/task-evaluate.ts +93 -0
  157. package/packages/core/src/tools/projects/task-update.ts +93 -0
  158. package/packages/core/src/tools/types.ts +39 -0
  159. package/packages/core/src/tools/voice/index.ts +104 -0
  160. package/packages/core/src/tools/web/browser-click.ts +78 -0
  161. package/packages/core/src/tools/web/browser-extract.ts +139 -0
  162. package/packages/core/src/tools/web/browser-navigate.ts +106 -0
  163. package/packages/core/src/tools/web/browser-screenshot.ts +87 -0
  164. package/packages/core/src/tools/web/browser-script.ts +88 -0
  165. package/packages/core/src/tools/web/browser-service.ts +554 -0
  166. package/packages/core/src/tools/web/browser-type.ts +101 -0
  167. package/packages/core/src/tools/web/browser-wait.ts +136 -0
  168. package/packages/core/src/tools/web/index.ts +41 -0
  169. package/packages/core/src/tools/web/web-fetch.ts +78 -0
  170. package/packages/core/src/tools/web/web-search.ts +123 -0
  171. package/packages/core/src/utils/benchmark.ts +80 -0
  172. package/packages/core/src/utils/crypto.ts +73 -0
  173. package/packages/core/src/utils/date.ts +42 -0
  174. package/packages/core/src/utils/index.ts +10 -0
  175. package/packages/core/src/utils/logger.ts +389 -0
  176. package/packages/core/src/utils/retry.ts +70 -0
  177. package/packages/core/src/utils/toon.ts +253 -0
  178. package/packages/core/src/voice/index.ts +656 -0
  179. package/test/setup-db.ts +216 -0
  180. package/tsconfig.json +39 -0
  181. package/src/agents.ts +0 -1
  182. package/src/canvas.ts +0 -1
  183. package/src/channels.ts +0 -1
  184. package/src/config.ts +0 -1
  185. package/src/events.ts +0 -1
  186. package/src/gateway.ts +0 -1
  187. package/src/index.ts +0 -304
  188. package/src/mcp.ts +0 -1
  189. package/src/multimodal.ts +0 -1
  190. package/src/scheduler.ts +0 -1
  191. package/src/security.ts +0 -1
  192. package/src/skills.ts +0 -1
  193. package/src/state.ts +0 -1
  194. package/src/storage.ts +0 -1
  195. package/src/tools.ts +0 -1
  196. package/src/tts.ts +0 -1
  197. package/src/types.ts +0 -82
  198. package/src/utils.ts +0 -1
  199. package/src/voice.ts +0 -1
@@ -0,0 +1,47 @@
1
+ import { describe, it, expect, beforeAll, afterAll } from "bun:test";
2
+ import { Scratchpad } from "./Scratchpad.ts";
3
+ import { getDb, initializeDatabase, DatabaseService, dbService } from "../storage/SQLiteStorage.ts";
4
+
5
+ describe("Scratchpad", () => {
6
+ let pad: Scratchpad;
7
+
8
+ beforeAll(async () => {
9
+ await initializeDatabase();
10
+ const db = getDb();
11
+ pad = new Scratchpad(db);
12
+ });
13
+
14
+ afterAll(() => {
15
+ dbService.close();
16
+ });
17
+
18
+ const THREAD = "test-thread";
19
+
20
+ it("writes and reads a note", () => {
21
+ pad.write(THREAD, "test-1", "hello world");
22
+ const value = pad.read(THREAD, "test-1");
23
+ expect(value).toBe("hello world");
24
+ });
25
+
26
+ it("lists notes as key-value map", () => {
27
+ pad.write(THREAD, "list-a", "aaa");
28
+ pad.write(THREAD, "list-b", "bbb");
29
+ const notes = pad.list(THREAD);
30
+ expect(notes["list-a"]).toBe("aaa");
31
+ expect(notes["list-b"]).toBe("bbb");
32
+ });
33
+
34
+ it("deletes a note", () => {
35
+ pad.write(THREAD, "to-delete", "delete me");
36
+ pad.delete(THREAD, "to-delete");
37
+ const value = pad.read(THREAD, "to-delete");
38
+ expect(value).toBeNull();
39
+ });
40
+
41
+ it("clear removes all notes for a thread", () => {
42
+ pad.write(THREAD, "clear-a", "a");
43
+ pad.write(THREAD, "clear-b", "b");
44
+ pad.clear(THREAD);
45
+ expect(Object.keys(pad.list(THREAD)).length).toBe(0);
46
+ });
47
+ });
@@ -0,0 +1,37 @@
1
+ import type { Database } from "bun:sqlite";
2
+
3
+ export class Scratchpad {
4
+ constructor(private db: Database) {}
5
+
6
+ write(threadId: string, key: string, value: string): void {
7
+ this.db.run(
8
+ `INSERT OR REPLACE INTO scratchpad (thread_id, key, value, updated_at) VALUES (?, ?, ?, datetime('now'))`,
9
+ [threadId, key, value]
10
+ );
11
+ }
12
+
13
+ read(threadId: string, key: string): string | null {
14
+ const row = this.db
15
+ .query(`SELECT value FROM scratchpad WHERE thread_id = ? AND key = ?`)
16
+ .get(threadId, key) as any;
17
+ return row?.value ?? null;
18
+ }
19
+
20
+ list(threadId: string): Record<string, string> {
21
+ const rows = this.db
22
+ .query(`SELECT key, value FROM scratchpad WHERE thread_id = ?`)
23
+ .all(threadId) as any[];
24
+ return Object.fromEntries(rows.map(r => [r.key, r.value]));
25
+ }
26
+
27
+ delete(threadId: string, key: string): void {
28
+ this.db.run(
29
+ `DELETE FROM scratchpad WHERE thread_id = ? AND key = ?`,
30
+ [threadId, key]
31
+ );
32
+ }
33
+
34
+ clear(threadId: string): void {
35
+ this.db.run(`DELETE FROM scratchpad WHERE thread_id = ?`, [threadId]);
36
+ }
37
+ }
@@ -0,0 +1,6 @@
1
+ export interface IStorage {
2
+ query(sql: string, ...params: any[]): any;
3
+ run(sql: string, ...params: any[]): void;
4
+ transaction<T>(fn: () => T): T;
5
+ close(): void;
6
+ }
@@ -0,0 +1,2 @@
1
+ export type { IStorage } from "./Storage.ts";
2
+ export { Scratchpad } from "./Scratchpad.ts";
@@ -0,0 +1,293 @@
1
+ import { getDb } from "../storage/SQLiteStorage.ts"
2
+ import { decryptApiKey } from "../storage/crypto.ts"
3
+ import { logger } from "../utils/logger.ts"
4
+ import type { ImageInput, DocumentInput, VisionConfig } from "./types"
5
+ import type { ContentPart } from "./types"
6
+
7
+ const log = logger.child("multimodal")
8
+
9
+ class MultimodalService {
10
+ private static instance: MultimodalService
11
+
12
+ private constructor() {}
13
+
14
+ static getInstance(): MultimodalService {
15
+ if (!MultimodalService.instance) {
16
+ MultimodalService.instance = new MultimodalService()
17
+ }
18
+ return MultimodalService.instance
19
+ }
20
+
21
+ getChannelVisionConfig(channelId: string): VisionConfig {
22
+ const db = getDb()
23
+ const result = db.query(`
24
+ SELECT vision_enabled, ocr_provider, vision_provider, vision_model_id
25
+ FROM channels WHERE id = ?
26
+ `).get(channelId) as {
27
+ vision_enabled: number
28
+ ocr_provider: string | null
29
+ vision_provider: string | null
30
+ vision_model_id: string | null
31
+ } | undefined
32
+
33
+ if (!result) {
34
+ return { visionEnabled: false, ocrProvider: null, visionProvider: null, visionModelId: null }
35
+ }
36
+
37
+ return {
38
+ visionEnabled: result.vision_enabled === 1,
39
+ ocrProvider: result.ocr_provider,
40
+ visionProvider: result.vision_provider,
41
+ visionModelId: result.vision_model_id,
42
+ }
43
+ }
44
+
45
+ async processImage(image: ImageInput, visionModelId?: string): Promise<ContentPart[]> {
46
+ const parts: ContentPart[] = []
47
+
48
+ if (image.caption) {
49
+ parts.push({ type: "text", text: image.caption })
50
+ }
51
+
52
+ if (image.type === "url") {
53
+ parts.push({ type: "image_url", image_url: { url: image.data as string } })
54
+ } else if (image.type === "base64") {
55
+ parts.push({
56
+ type: "image_base64",
57
+ base64: image.data as string,
58
+ mimeType: image.mimeType || "image/jpeg",
59
+ })
60
+ } else if (image.type === "buffer") {
61
+ const base64 = Buffer.from(image.data as Buffer).toString("base64")
62
+ parts.push({
63
+ type: "image_base64",
64
+ base64,
65
+ mimeType: image.mimeType || "image/jpeg",
66
+ })
67
+ }
68
+
69
+ return parts
70
+ }
71
+
72
+ async ocrImage(image: ImageInput, providerId?: string): Promise<string> {
73
+ const resolved = providerId || "openai"
74
+
75
+ if (resolved === "openai") {
76
+ return this.ocrWithOpenAI(image)
77
+ } else if (resolved === "gemini") {
78
+ return this.ocrWithGemini(image)
79
+ } else if (resolved === "anthropic") {
80
+ return this.ocrWithAnthropic(image)
81
+ }
82
+
83
+ log.warn(`Unknown OCR provider ${resolved}, defaulting to OpenAI`)
84
+ return this.ocrWithOpenAI(image)
85
+ }
86
+
87
+ normalizeImageFromChannel(channelType: string, imageData: unknown): ImageInput {
88
+ const data = imageData as { url?: string; base64?: string; buffer?: Buffer; mimeType?: string; caption?: string }
89
+
90
+ if (data.url) {
91
+ return { type: "url", data: data.url, mimeType: data.mimeType, caption: data.caption }
92
+ }
93
+ if (data.base64) {
94
+ return { type: "base64", data: data.base64, mimeType: data.mimeType || "image/jpeg", caption: data.caption }
95
+ }
96
+ if (data.buffer) {
97
+ return { type: "buffer", data: data.buffer, mimeType: data.mimeType || "image/jpeg", caption: data.caption }
98
+ }
99
+
100
+ throw new Error(`${channelType} image missing url, base64, or buffer`)
101
+ }
102
+
103
+ normalizeDocumentFromChannel(channelType: string, docData: unknown): DocumentInput {
104
+ const data = docData as { url?: string; base64?: string; buffer?: Buffer; mimeType?: string; fileName?: string }
105
+
106
+ if (data.url) {
107
+ return { type: "url", data: data.url, mimeType: data.mimeType || "application/pdf", fileName: data.fileName }
108
+ }
109
+ if (data.base64) {
110
+ return { type: "base64", data: data.base64, mimeType: data.mimeType || "application/pdf", fileName: data.fileName }
111
+ }
112
+ if (data.buffer) {
113
+ return { type: "buffer", data: data.buffer, mimeType: data.mimeType || "application/pdf", fileName: data.fileName }
114
+ }
115
+
116
+ throw new Error(`${channelType} document missing url, base64, or buffer`)
117
+ }
118
+
119
+ async resolveImageUrl(image: ImageInput): Promise<string> {
120
+ if (image.type === "url") return image.data as string
121
+ if (image.type === "base64") {
122
+ const mime = image.mimeType || "image/jpeg"
123
+ return `data:${mime};base64,${image.data as string}`
124
+ }
125
+ if (image.type === "buffer") {
126
+ const base64 = Buffer.from(image.data as Buffer).toString("base64")
127
+ const mime = image.mimeType || "image/jpeg"
128
+ return `data:${mime};base64,${base64}`
129
+ }
130
+ throw new Error("Cannot resolve image URL")
131
+ }
132
+
133
+ private async getProviderApiKey(providerId: string): Promise<string | null> {
134
+ const db = getDb()
135
+ const provider = db.query(`
136
+ SELECT api_key_encrypted, api_key_iv FROM providers WHERE id = ?
137
+ `).get(providerId) as { api_key_encrypted: string; api_key_iv: string } | undefined
138
+
139
+ if (!provider?.api_key_encrypted) return null
140
+
141
+ try {
142
+ return await decryptApiKey(provider.api_key_encrypted, provider.api_key_iv)
143
+ } catch (error) {
144
+ log.error(`Failed to decrypt API key for provider ${providerId}: ${(error as Error).message}`)
145
+ return null
146
+ }
147
+ }
148
+
149
+ private async ocrWithOpenAI(image: ImageInput): Promise<string> {
150
+ const key = await this.getProviderApiKey("openai") || process.env.OPENAI_API_KEY
151
+ if (!key) throw new Error("OPENAI_API_KEY not configured for OCR")
152
+
153
+ const imageUrl = await this.resolveImageUrl(image)
154
+
155
+ const response = await fetch("https://api.openai.com/v1/chat/completions", {
156
+ method: "POST",
157
+ headers: { "Content-Type": "application/json", "Authorization": `Bearer ${key}` },
158
+ body: JSON.stringify({
159
+ model: "gpt-4o-mini",
160
+ messages: [{
161
+ role: "user",
162
+ content: [
163
+ { type: "text", text: "Describe el contenido de esta imagen en detalle. Si hay texto, transcríbelo exactamente." },
164
+ { type: "image_url", image_url: { url: imageUrl } },
165
+ ],
166
+ }],
167
+ max_tokens: 1000,
168
+ }),
169
+ })
170
+
171
+ if (!response.ok) {
172
+ const error = await response.text()
173
+ throw new Error(`OpenAI OCR failed: ${error}`)
174
+ }
175
+
176
+ const data = await response.json() as { choices: Array<{ message: { content: string } }> }
177
+ return data.choices[0]?.message?.content || ""
178
+ }
179
+
180
+ private async ocrWithGemini(image: ImageInput): Promise<string> {
181
+ const key = await this.getProviderApiKey("gemini") || process.env.GEMINI_API_KEY
182
+ if (!key) throw new Error("GEMINI_API_KEY not configured for OCR")
183
+
184
+ let imagePart: any
185
+ if (image.type === "url") {
186
+ const imgResponse = await fetch(image.data as string)
187
+ const buffer = Buffer.from(await imgResponse.arrayBuffer())
188
+ imagePart = { inlineData: { data: buffer.toString("base64"), mimeType: image.mimeType || "image/jpeg" } }
189
+ } else if (image.type === "base64") {
190
+ imagePart = { inlineData: { data: image.data as string, mimeType: image.mimeType || "image/jpeg" } }
191
+ } else {
192
+ imagePart = { inlineData: { data: Buffer.from(image.data as Buffer).toString("base64"), mimeType: image.mimeType || "image/jpeg" } }
193
+ }
194
+
195
+ const response = await fetch(
196
+ `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=${key}`,
197
+ {
198
+ method: "POST",
199
+ headers: { "Content-Type": "application/json" },
200
+ body: JSON.stringify({
201
+ contents: [{ parts: [{ text: "Describe el contenido de esta imagen en detalle. Si hay texto, transcríbelo exactamente." }, imagePart] }],
202
+ }),
203
+ },
204
+ )
205
+
206
+ if (!response.ok) {
207
+ const error = await response.text()
208
+ throw new Error(`Gemini OCR failed: ${error}`)
209
+ }
210
+
211
+ const data = await response.json() as { candidates: Array<{ content: { parts: Array<{ text?: string }> } }> }
212
+ return data.candidates?.[0]?.content?.parts?.[0]?.text || ""
213
+ }
214
+
215
+ private async ocrWithAnthropic(image: ImageInput): Promise<string> {
216
+ const key = await this.getProviderApiKey("anthropic") || process.env.ANTHROPIC_API_KEY
217
+ if (!key) throw new Error("ANTHROPIC_API_KEY not configured for OCR")
218
+
219
+ const imageUrl = await this.resolveImageUrl(image)
220
+
221
+ let source: any
222
+ if (imageUrl.startsWith("data:")) {
223
+ const match = imageUrl.match(/^data:([^;]+);base64,(.+)$/)
224
+ if (match) {
225
+ source = { type: "base64", media_type: match[1], data: match[2] }
226
+ } else {
227
+ throw new Error("Invalid base64 data URL")
228
+ }
229
+ } else {
230
+ source = { type: "url", url: imageUrl }
231
+ }
232
+
233
+ const response = await fetch("https://api.anthropic.com/v1/messages", {
234
+ method: "POST",
235
+ headers: {
236
+ "Content-Type": "application/json",
237
+ "x-api-key": key,
238
+ "anthropic-version": "2023-06-01",
239
+ "anthropic-dangerous-direct-browser-access": "true",
240
+ },
241
+ body: JSON.stringify({
242
+ model: "claude-haiku-4-5-20251001",
243
+ max_tokens: 1000,
244
+ messages: [{
245
+ role: "user",
246
+ content: [
247
+ { type: "image", source },
248
+ { type: "text", text: "Describe el contenido de esta imagen en detalle. Si hay texto, transcríbelo exactamente." },
249
+ ],
250
+ }],
251
+ }),
252
+ })
253
+
254
+ if (!response.ok) {
255
+ const error = await response.text()
256
+ throw new Error(`Anthropic OCR failed: ${error}`)
257
+ }
258
+
259
+ const data = await response.json() as { content: Array<{ type: string; text?: string }> }
260
+ const textBlock = data.content?.find(b => b.type === "text" && b.text)
261
+ return textBlock?.text || ""
262
+ }
263
+
264
+ getConfiguredVisionProviders(): Record<string, boolean> {
265
+ const db = getDb()
266
+ const hasDbKey = (providerId: string): boolean => {
267
+ const row = db.query(
268
+ `SELECT api_key_encrypted FROM providers WHERE id = ? AND api_key_encrypted IS NOT NULL AND api_key_encrypted != ''`
269
+ ).get(providerId) as { api_key_encrypted: string } | undefined
270
+ return !!row
271
+ }
272
+
273
+ return {
274
+ openai: hasDbKey("openai") || !!(process.env.OPENAI_API_KEY),
275
+ gemini: hasDbKey("gemini") || !!(process.env.GEMINI_API_KEY),
276
+ anthropic: hasDbKey("anthropic") || !!(process.env.ANTHROPIC_API_KEY),
277
+ }
278
+ }
279
+
280
+ modelSupportsVision(providerId: string, modelId: string): boolean {
281
+ const db = getDb()
282
+ const model = db.query(`SELECT capabilities FROM models WHERE id = ? AND provider_id = ?`).get(modelId, providerId) as { capabilities: string } | undefined
283
+ if (!model?.capabilities) return false
284
+ try {
285
+ const caps = JSON.parse(model.capabilities) as string[]
286
+ return caps.includes("vision")
287
+ } catch {
288
+ return false
289
+ }
290
+ }
291
+ }
292
+
293
+ export const multimodalService = MultimodalService.getInstance()
@@ -0,0 +1,2 @@
1
+ export type { ContentPart, ImageInput, DocumentInput, VisionConfig, MultimodalMessageType } from "./types.ts";
2
+ export { multimodalService } from "./VisionService.ts";
@@ -0,0 +1,28 @@
1
+ export type ContentPart =
2
+ | { type: "text"; text: string }
3
+ | { type: "image_url"; image_url: { url: string } }
4
+ | { type: "image_base64"; base64: string; mimeType: string }
5
+ | { type: "document"; base64: string; mimeType: string; fileName?: string }
6
+
7
+ export interface ImageInput {
8
+ type: "url" | "base64" | "buffer"
9
+ data: string | Buffer
10
+ mimeType?: string
11
+ caption?: string
12
+ }
13
+
14
+ export interface DocumentInput {
15
+ type: "url" | "base64" | "buffer"
16
+ data: string | Buffer
17
+ mimeType: string
18
+ fileName?: string
19
+ }
20
+
21
+ export interface VisionConfig {
22
+ visionEnabled: boolean
23
+ ocrProvider: string | null
24
+ visionProvider: string | null
25
+ visionModelId: string | null
26
+ }
27
+
28
+ export type MultimodalMessageType = "text" | "image" | "document" | "audio"
@@ -0,0 +1,250 @@
1
+ import crypto from "crypto";
2
+ import { eventBus } from "../swarm/EventBus.ts";
3
+ import { logger } from "../utils/logger.ts";
4
+
5
+ export interface PairingCode {
6
+ code: string;
7
+ channel: string;
8
+ userId: string;
9
+ createdAt: number;
10
+ expiresAt: number;
11
+ attempts: number;
12
+ }
13
+
14
+ export interface PairingConfig {
15
+ codeLength?: number;
16
+ expirationMs?: number;
17
+ maxAttempts?: number;
18
+ }
19
+
20
+ export interface PairingStats {
21
+ pendingCodes: number;
22
+ totalAllowlist: number;
23
+ byChannel: Record<string, { pending: number; allowed: number }>;
24
+ }
25
+
26
+ export class PairingService {
27
+ private codes: Map<string, PairingCode> = new Map();
28
+ private allowlist: Map<string, Set<string>> = new Map();
29
+ private config: Required<PairingConfig>;
30
+ private log = logger.child("pairing");
31
+
32
+ constructor(config: PairingConfig = {}) {
33
+ this.config = {
34
+ codeLength: config.codeLength ?? 8,
35
+ expirationMs: config.expirationMs ?? 10 * 60 * 1000,
36
+ maxAttempts: config.maxAttempts ?? 3,
37
+ };
38
+
39
+ this.startCleanup();
40
+ }
41
+
42
+ generateCode(channel: string, userId: string): string {
43
+ this.cleanup();
44
+
45
+ const code = this.generateSecureCode();
46
+ const now = Date.now();
47
+
48
+ const record: PairingCode = {
49
+ code,
50
+ channel,
51
+ userId,
52
+ createdAt: now,
53
+ expiresAt: now + this.config.expirationMs,
54
+ attempts: 0,
55
+ };
56
+
57
+ this.codes.set(code, record);
58
+
59
+ this.log.info(`Generated pairing code for ${channel}:${userId}`);
60
+
61
+ eventBus.emit("pairing:requested", {
62
+ channel,
63
+ userId,
64
+ code,
65
+ expiresAt: record.expiresAt,
66
+ });
67
+
68
+ return code;
69
+ }
70
+
71
+ validateCode(code: string): PairingCode | null {
72
+ const record = this.codes.get(code);
73
+ if (!record) return null;
74
+
75
+ if (Date.now() > record.expiresAt) {
76
+ this.codes.delete(code);
77
+ eventBus.emit("pairing:expired", {
78
+ code,
79
+ channel: record.channel,
80
+ userId: record.userId,
81
+ });
82
+ return null;
83
+ }
84
+
85
+ return record;
86
+ }
87
+
88
+ approve(code: string): { success: boolean; error?: string } {
89
+ const record = this.validateCode(code);
90
+ if (!record) {
91
+ return { success: false, error: "Invalid or expired code" };
92
+ }
93
+
94
+ if (!this.allowlist.has(record.channel)) {
95
+ this.allowlist.set(record.channel, new Set());
96
+ }
97
+ this.allowlist.get(record.channel)!.add(record.userId);
98
+
99
+ this.codes.delete(code);
100
+
101
+ this.log.info(`Approved pairing for ${record.channel}:${record.userId}`);
102
+
103
+ eventBus.emit("pairing:approved", {
104
+ channel: record.channel,
105
+ userId: record.userId,
106
+ });
107
+
108
+ return { success: true };
109
+ }
110
+
111
+ reject(code: string, reason: string): boolean {
112
+ const record = this.codes.get(code);
113
+ if (!record) return false;
114
+
115
+ this.codes.delete(code);
116
+
117
+ this.log.info(`Rejected pairing for ${record.channel}:${record.userId}: ${reason}`);
118
+
119
+ eventBus.emit("pairing:rejected", {
120
+ channel: record.channel,
121
+ userId: record.userId,
122
+ reason,
123
+ });
124
+
125
+ return true;
126
+ }
127
+
128
+ attempt(code: string): boolean {
129
+ const record = this.codes.get(code);
130
+ if (!record) return false;
131
+
132
+ record.attempts++;
133
+
134
+ if (record.attempts >= this.config.maxAttempts) {
135
+ this.codes.delete(code);
136
+ this.log.warn(`Code ${code} exhausted attempts`);
137
+ return false;
138
+ }
139
+
140
+ return true;
141
+ }
142
+
143
+ isAllowed(channel: string, userId: string): boolean {
144
+ const channelAllowlist = this.allowlist.get(channel);
145
+ return channelAllowlist?.has(userId) ?? false;
146
+ }
147
+
148
+ removeFromAllowlist(channel: string, userId: string): boolean {
149
+ const channelAllowlist = this.allowlist.get(channel);
150
+ if (!channelAllowlist) return false;
151
+
152
+ const removed = channelAllowlist.delete(userId);
153
+
154
+ if (channelAllowlist.size === 0) {
155
+ this.allowlist.delete(channel);
156
+ }
157
+
158
+ if (removed) {
159
+ this.log.info(`Removed ${userId} from allowlist for ${channel}`);
160
+ }
161
+
162
+ return removed;
163
+ }
164
+
165
+ listAllowed(channel?: string): { channel: string; userId: string }[] {
166
+ const result: { channel: string; userId: string }[] = [];
167
+
168
+ if (channel) {
169
+ const channelAllowlist = this.allowlist.get(channel);
170
+ if (channelAllowlist) {
171
+ for (const userId of channelAllowlist) {
172
+ result.push({ channel, userId });
173
+ }
174
+ }
175
+ } else {
176
+ for (const [ch, users] of this.allowlist) {
177
+ for (const userId of users) {
178
+ result.push({ channel: ch, userId });
179
+ }
180
+ }
181
+ }
182
+
183
+ return result;
184
+ }
185
+
186
+ listPending(): PairingCode[] {
187
+ this.cleanup();
188
+ return Array.from(this.codes.values());
189
+ }
190
+
191
+ getStats(): PairingStats {
192
+ const byChannel: Record<string, { pending: number; allowed: number }> = {};
193
+
194
+ for (const [channel, users] of this.allowlist) {
195
+ byChannel[channel] = {
196
+ pending: 0,
197
+ allowed: users.size,
198
+ };
199
+ }
200
+
201
+ for (const record of this.codes.values()) {
202
+ if (!byChannel[record.channel]) {
203
+ byChannel[record.channel] = { pending: 0, allowed: 0 };
204
+ }
205
+ byChannel[record.channel]!.pending++;
206
+ }
207
+
208
+ return {
209
+ pendingCodes: this.codes.size,
210
+ totalAllowlist: Array.from(this.allowlist.values()).reduce(
211
+ (sum, set) => sum + set.size,
212
+ 0
213
+ ),
214
+ byChannel,
215
+ };
216
+ }
217
+
218
+ clear(): void {
219
+ this.codes.clear();
220
+ this.allowlist.clear();
221
+ this.log.info("All pairing data cleared");
222
+ }
223
+
224
+ private generateSecureCode(): string {
225
+ const bytes = crypto.randomBytes(Math.ceil(this.config.codeLength / 2));
226
+ return bytes.toString("hex").toUpperCase().slice(0, this.config.codeLength);
227
+ }
228
+
229
+ private cleanup(): void {
230
+ const now = Date.now();
231
+ for (const [code, record] of this.codes) {
232
+ if (now > record.expiresAt) {
233
+ this.codes.delete(code);
234
+ eventBus.emit("pairing:expired", {
235
+ code,
236
+ channel: record.channel,
237
+ userId: record.userId,
238
+ });
239
+ }
240
+ }
241
+ }
242
+
243
+ private startCleanup(): void {
244
+ setInterval(() => {
245
+ this.cleanup();
246
+ }, 60 * 1000);
247
+ }
248
+ }
249
+
250
+ export const pairingService = new PairingService();