@framers/agentos 0.1.112 → 0.1.114

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (552) hide show
  1. package/README.md +39 -5
  2. package/dist/api/AgentOS.d.ts +45 -12
  3. package/dist/api/AgentOS.d.ts.map +1 -1
  4. package/dist/api/AgentOS.js +225 -78
  5. package/dist/api/AgentOS.js.map +1 -1
  6. package/dist/api/AgentOSOrchestrator.d.ts +8 -0
  7. package/dist/api/AgentOSOrchestrator.d.ts.map +1 -1
  8. package/dist/api/AgentOSOrchestrator.js +350 -59
  9. package/dist/api/AgentOSOrchestrator.js.map +1 -1
  10. package/dist/api/StreamChunkEmitter.d.ts.map +1 -1
  11. package/dist/api/StreamChunkEmitter.js +2 -0
  12. package/dist/api/StreamChunkEmitter.js.map +1 -1
  13. package/dist/api/agency.d.ts.map +1 -1
  14. package/dist/api/agency.js +47 -1
  15. package/dist/api/agency.js.map +1 -1
  16. package/dist/api/agent.d.ts +18 -5
  17. package/dist/api/agent.d.ts.map +1 -1
  18. package/dist/api/agent.js +48 -9
  19. package/dist/api/agent.js.map +1 -1
  20. package/dist/api/agentExport.d.ts +202 -0
  21. package/dist/api/agentExport.d.ts.map +1 -0
  22. package/dist/api/agentExport.js +323 -0
  23. package/dist/api/agentExport.js.map +1 -0
  24. package/dist/api/editImage.d.ts +119 -0
  25. package/dist/api/editImage.d.ts.map +1 -0
  26. package/dist/api/editImage.js +150 -0
  27. package/dist/api/editImage.js.map +1 -0
  28. package/dist/api/embedText.d.ts +137 -0
  29. package/dist/api/embedText.d.ts.map +1 -0
  30. package/dist/api/embedText.js +229 -0
  31. package/dist/api/embedText.js.map +1 -0
  32. package/dist/api/externalToolRegistry.d.ts +44 -0
  33. package/dist/api/externalToolRegistry.d.ts.map +1 -0
  34. package/dist/api/externalToolRegistry.js +245 -0
  35. package/dist/api/externalToolRegistry.js.map +1 -0
  36. package/dist/api/generateImage.d.ts +1 -1
  37. package/dist/api/generateImage.d.ts.map +1 -1
  38. package/dist/api/generateImage.js +17 -13
  39. package/dist/api/generateImage.js.map +1 -1
  40. package/dist/api/generateObject.d.ts +185 -0
  41. package/dist/api/generateObject.d.ts.map +1 -0
  42. package/dist/api/generateObject.js +249 -0
  43. package/dist/api/generateObject.js.map +1 -0
  44. package/dist/api/generateText.d.ts +13 -3
  45. package/dist/api/generateText.d.ts.map +1 -1
  46. package/dist/api/generateText.js +20 -5
  47. package/dist/api/generateText.js.map +1 -1
  48. package/dist/api/interfaces/IAgentOS.d.ts +29 -1
  49. package/dist/api/interfaces/IAgentOS.d.ts.map +1 -1
  50. package/dist/api/model.d.ts +7 -7
  51. package/dist/api/model.d.ts.map +1 -1
  52. package/dist/api/model.js +22 -16
  53. package/dist/api/model.js.map +1 -1
  54. package/dist/api/processRequestWithExternalTools.d.ts +26 -0
  55. package/dist/api/processRequestWithExternalTools.d.ts.map +1 -0
  56. package/dist/api/processRequestWithExternalTools.js +52 -0
  57. package/dist/api/processRequestWithExternalTools.js.map +1 -0
  58. package/dist/api/processRequestWithRegisteredTools.d.ts +56 -0
  59. package/dist/api/processRequestWithRegisteredTools.d.ts.map +1 -0
  60. package/dist/api/processRequestWithRegisteredTools.js +125 -0
  61. package/dist/api/processRequestWithRegisteredTools.js.map +1 -0
  62. package/dist/api/provider-defaults.d.ts.map +1 -1
  63. package/dist/api/provider-defaults.js +28 -0
  64. package/dist/api/provider-defaults.js.map +1 -1
  65. package/dist/api/resumeExternalToolRequestWithRegisteredTools.d.ts +71 -0
  66. package/dist/api/resumeExternalToolRequestWithRegisteredTools.d.ts.map +1 -0
  67. package/dist/api/resumeExternalToolRequestWithRegisteredTools.js +159 -0
  68. package/dist/api/resumeExternalToolRequestWithRegisteredTools.js.map +1 -0
  69. package/dist/api/strategies/agentGraphBuilder.d.ts +170 -0
  70. package/dist/api/strategies/agentGraphBuilder.d.ts.map +1 -0
  71. package/dist/api/strategies/agentGraphBuilder.js +299 -0
  72. package/dist/api/strategies/agentGraphBuilder.js.map +1 -0
  73. package/dist/api/strategies/debate.d.ts +12 -1
  74. package/dist/api/strategies/debate.d.ts.map +1 -1
  75. package/dist/api/strategies/debate.js +41 -5
  76. package/dist/api/strategies/debate.js.map +1 -1
  77. package/dist/api/strategies/graphCompiler.d.ts +84 -0
  78. package/dist/api/strategies/graphCompiler.d.ts.map +1 -0
  79. package/dist/api/strategies/graphCompiler.js +617 -0
  80. package/dist/api/strategies/graphCompiler.js.map +1 -0
  81. package/dist/api/strategies/hierarchical.d.ts +15 -1
  82. package/dist/api/strategies/hierarchical.d.ts.map +1 -1
  83. package/dist/api/strategies/hierarchical.js +53 -8
  84. package/dist/api/strategies/hierarchical.js.map +1 -1
  85. package/dist/api/strategies/index.d.ts +29 -4
  86. package/dist/api/strategies/index.d.ts.map +1 -1
  87. package/dist/api/strategies/index.js +28 -4
  88. package/dist/api/strategies/index.js.map +1 -1
  89. package/dist/api/strategies/parallel.d.ts +15 -4
  90. package/dist/api/strategies/parallel.d.ts.map +1 -1
  91. package/dist/api/strategies/parallel.js +53 -16
  92. package/dist/api/strategies/parallel.js.map +1 -1
  93. package/dist/api/strategies/review-loop.d.ts +15 -1
  94. package/dist/api/strategies/review-loop.d.ts.map +1 -1
  95. package/dist/api/strategies/review-loop.js +36 -10
  96. package/dist/api/strategies/review-loop.js.map +1 -1
  97. package/dist/api/strategies/sequential.d.ts +11 -1
  98. package/dist/api/strategies/sequential.d.ts.map +1 -1
  99. package/dist/api/strategies/sequential.js +39 -8
  100. package/dist/api/strategies/sequential.js.map +1 -1
  101. package/dist/api/strategies/shared.d.ts +72 -8
  102. package/dist/api/strategies/shared.d.ts.map +1 -1
  103. package/dist/api/strategies/shared.js +92 -12
  104. package/dist/api/strategies/shared.js.map +1 -1
  105. package/dist/api/streamObject.d.ts +166 -0
  106. package/dist/api/streamObject.d.ts.map +1 -0
  107. package/dist/api/streamObject.js +268 -0
  108. package/dist/api/streamObject.js.map +1 -0
  109. package/dist/api/streamText.d.ts +1 -1
  110. package/dist/api/streamText.d.ts.map +1 -1
  111. package/dist/api/streamText.js +26 -8
  112. package/dist/api/streamText.js.map +1 -1
  113. package/dist/api/toolAdapter.d.ts +44 -8
  114. package/dist/api/toolAdapter.d.ts.map +1 -1
  115. package/dist/api/toolAdapter.js +224 -45
  116. package/dist/api/toolAdapter.js.map +1 -1
  117. package/dist/api/types/AgentOSExternalToolRequest.d.ts +35 -0
  118. package/dist/api/types/AgentOSExternalToolRequest.d.ts.map +1 -0
  119. package/dist/api/types/AgentOSExternalToolRequest.js +2 -0
  120. package/dist/api/types/AgentOSExternalToolRequest.js.map +1 -0
  121. package/dist/api/types/AgentOSResponse.d.ts +25 -0
  122. package/dist/api/types/AgentOSResponse.d.ts.map +1 -1
  123. package/dist/api/types/AgentOSResponse.js +20 -0
  124. package/dist/api/types/AgentOSResponse.js.map +1 -1
  125. package/dist/api/types/AgentOSToolResult.d.ts +11 -0
  126. package/dist/api/types/AgentOSToolResult.d.ts.map +1 -0
  127. package/dist/api/types/AgentOSToolResult.js +2 -0
  128. package/dist/api/types/AgentOSToolResult.js.map +1 -0
  129. package/dist/api/types.d.ts +81 -4
  130. package/dist/api/types.d.ts.map +1 -1
  131. package/dist/api/types.js.map +1 -1
  132. package/dist/api/upscaleImage.d.ts +92 -0
  133. package/dist/api/upscaleImage.d.ts.map +1 -0
  134. package/dist/api/upscaleImage.js +133 -0
  135. package/dist/api/upscaleImage.js.map +1 -0
  136. package/dist/api/variateImage.d.ts +102 -0
  137. package/dist/api/variateImage.d.ts.map +1 -0
  138. package/dist/api/variateImage.js +154 -0
  139. package/dist/api/variateImage.js.map +1 -0
  140. package/dist/cognitive_substrate/GMI.d.ts +16 -2
  141. package/dist/cognitive_substrate/GMI.d.ts.map +1 -1
  142. package/dist/cognitive_substrate/GMI.js +188 -56
  143. package/dist/cognitive_substrate/GMI.js.map +1 -1
  144. package/dist/cognitive_substrate/IGMI.d.ts +10 -0
  145. package/dist/cognitive_substrate/IGMI.d.ts.map +1 -1
  146. package/dist/cognitive_substrate/IGMI.js.map +1 -1
  147. package/dist/config/AgentOSConfig.d.ts +19 -2
  148. package/dist/config/AgentOSConfig.d.ts.map +1 -1
  149. package/dist/config/AgentOSConfig.js +46 -29
  150. package/dist/config/AgentOSConfig.js.map +1 -1
  151. package/dist/core/guardrails/IGuardrailService.d.ts +1 -1
  152. package/dist/core/images/IImageProvider.d.ts +93 -0
  153. package/dist/core/images/IImageProvider.d.ts.map +1 -1
  154. package/dist/core/images/IImageProvider.js.map +1 -1
  155. package/dist/core/images/ImageOperationError.d.ts +52 -0
  156. package/dist/core/images/ImageOperationError.d.ts.map +1 -0
  157. package/dist/core/images/ImageOperationError.js +58 -0
  158. package/dist/core/images/ImageOperationError.js.map +1 -0
  159. package/dist/core/images/imageToBuffer.d.ts +41 -0
  160. package/dist/core/images/imageToBuffer.d.ts.map +1 -0
  161. package/dist/core/images/imageToBuffer.js +95 -0
  162. package/dist/core/images/imageToBuffer.js.map +1 -0
  163. package/dist/core/images/index.d.ts +4 -0
  164. package/dist/core/images/index.d.ts.map +1 -1
  165. package/dist/core/images/index.js +8 -0
  166. package/dist/core/images/index.js.map +1 -1
  167. package/dist/core/images/providers/FalImageProvider.d.ts +208 -0
  168. package/dist/core/images/providers/FalImageProvider.d.ts.map +1 -0
  169. package/dist/core/images/providers/FalImageProvider.js +301 -0
  170. package/dist/core/images/providers/FalImageProvider.js.map +1 -0
  171. package/dist/core/images/providers/FluxImageProvider.d.ts +197 -0
  172. package/dist/core/images/providers/FluxImageProvider.d.ts.map +1 -0
  173. package/dist/core/images/providers/FluxImageProvider.js +271 -0
  174. package/dist/core/images/providers/FluxImageProvider.js.map +1 -0
  175. package/dist/core/images/providers/OpenAIImageProvider.d.ts +33 -1
  176. package/dist/core/images/providers/OpenAIImageProvider.d.ts.map +1 -1
  177. package/dist/core/images/providers/OpenAIImageProvider.js +125 -0
  178. package/dist/core/images/providers/OpenAIImageProvider.js.map +1 -1
  179. package/dist/core/images/providers/ReplicateImageProvider.d.ts +26 -1
  180. package/dist/core/images/providers/ReplicateImageProvider.d.ts.map +1 -1
  181. package/dist/core/images/providers/ReplicateImageProvider.js +118 -0
  182. package/dist/core/images/providers/ReplicateImageProvider.js.map +1 -1
  183. package/dist/core/images/providers/StabilityImageProvider.d.ts +41 -1
  184. package/dist/core/images/providers/StabilityImageProvider.d.ts.map +1 -1
  185. package/dist/core/images/providers/StabilityImageProvider.js +180 -7
  186. package/dist/core/images/providers/StabilityImageProvider.js.map +1 -1
  187. package/dist/core/images/providers/StableDiffusionLocalProvider.d.ts +29 -1
  188. package/dist/core/images/providers/StableDiffusionLocalProvider.d.ts.map +1 -1
  189. package/dist/core/images/providers/StableDiffusionLocalProvider.js +124 -0
  190. package/dist/core/images/providers/StableDiffusionLocalProvider.js.map +1 -1
  191. package/dist/core/llm/IPromptEngine.d.ts +2 -2
  192. package/dist/core/llm/IPromptEngine.d.ts.map +1 -1
  193. package/dist/core/llm/IPromptEngine.js +2 -2
  194. package/dist/core/llm/IPromptEngine.js.map +1 -1
  195. package/dist/core/llm/providers/AIModelProviderManager.d.ts +7 -1
  196. package/dist/core/llm/providers/AIModelProviderManager.d.ts.map +1 -1
  197. package/dist/core/llm/providers/AIModelProviderManager.js +24 -0
  198. package/dist/core/llm/providers/AIModelProviderManager.js.map +1 -1
  199. package/dist/core/llm/providers/errors/AnthropicProviderError.d.ts +42 -0
  200. package/dist/core/llm/providers/errors/AnthropicProviderError.d.ts.map +1 -0
  201. package/dist/core/llm/providers/errors/AnthropicProviderError.js +45 -0
  202. package/dist/core/llm/providers/errors/AnthropicProviderError.js.map +1 -0
  203. package/dist/core/llm/providers/errors/GeminiProviderError.d.ts +45 -0
  204. package/dist/core/llm/providers/errors/GeminiProviderError.d.ts.map +1 -0
  205. package/dist/core/llm/providers/errors/GeminiProviderError.js +46 -0
  206. package/dist/core/llm/providers/errors/GeminiProviderError.js.map +1 -0
  207. package/dist/core/llm/providers/errors/OllamaProviderError.d.ts +1 -1
  208. package/dist/core/llm/providers/errors/OllamaProviderError.d.ts.map +1 -1
  209. package/dist/core/llm/providers/errors/OllamaProviderError.js +1 -1
  210. package/dist/core/llm/providers/errors/OllamaProviderError.js.map +1 -1
  211. package/dist/core/llm/providers/errors/OpenAIProviderError.d.ts +1 -1
  212. package/dist/core/llm/providers/errors/OpenAIProviderError.js +1 -1
  213. package/dist/core/llm/providers/errors/OpenRouterProviderError.d.ts +1 -1
  214. package/dist/core/llm/providers/errors/OpenRouterProviderError.js +1 -1
  215. package/dist/core/llm/providers/implementations/AnthropicProvider.d.ts +340 -0
  216. package/dist/core/llm/providers/implementations/AnthropicProvider.d.ts.map +1 -0
  217. package/dist/core/llm/providers/implementations/AnthropicProvider.js +959 -0
  218. package/dist/core/llm/providers/implementations/AnthropicProvider.js.map +1 -0
  219. package/dist/core/llm/providers/implementations/GeminiProvider.d.ts +339 -0
  220. package/dist/core/llm/providers/implementations/GeminiProvider.d.ts.map +1 -0
  221. package/dist/core/llm/providers/implementations/GeminiProvider.js +1004 -0
  222. package/dist/core/llm/providers/implementations/GeminiProvider.js.map +1 -0
  223. package/dist/core/llm/providers/implementations/GroqProvider.d.ts +105 -0
  224. package/dist/core/llm/providers/implementations/GroqProvider.d.ts.map +1 -0
  225. package/dist/core/llm/providers/implementations/GroqProvider.js +134 -0
  226. package/dist/core/llm/providers/implementations/GroqProvider.js.map +1 -0
  227. package/dist/core/llm/providers/implementations/MistralProvider.d.ts +105 -0
  228. package/dist/core/llm/providers/implementations/MistralProvider.d.ts.map +1 -0
  229. package/dist/core/llm/providers/implementations/MistralProvider.js +146 -0
  230. package/dist/core/llm/providers/implementations/MistralProvider.js.map +1 -0
  231. package/dist/core/llm/providers/implementations/TogetherProvider.d.ts +107 -0
  232. package/dist/core/llm/providers/implementations/TogetherProvider.d.ts.map +1 -0
  233. package/dist/core/llm/providers/implementations/TogetherProvider.js +138 -0
  234. package/dist/core/llm/providers/implementations/TogetherProvider.js.map +1 -0
  235. package/dist/core/llm/providers/implementations/XAIProvider.d.ts +102 -0
  236. package/dist/core/llm/providers/implementations/XAIProvider.d.ts.map +1 -0
  237. package/dist/core/llm/providers/implementations/XAIProvider.js +123 -0
  238. package/dist/core/llm/providers/implementations/XAIProvider.js.map +1 -0
  239. package/dist/core/orchestration/AgentOrchestrator.d.ts.map +1 -1
  240. package/dist/core/orchestration/AgentOrchestrator.js +26 -5
  241. package/dist/core/orchestration/AgentOrchestrator.js.map +1 -1
  242. package/dist/core/tools/IToolOrchestrator.d.ts +2 -2
  243. package/dist/core/tools/IToolOrchestrator.d.ts.map +1 -1
  244. package/dist/core/tools/ToolExecutor.d.ts +3 -0
  245. package/dist/core/tools/ToolExecutor.d.ts.map +1 -1
  246. package/dist/core/tools/ToolExecutor.js +2 -1
  247. package/dist/core/tools/ToolExecutor.js.map +1 -1
  248. package/dist/core/tools/ToolOrchestrator.d.ts +7 -7
  249. package/dist/core/tools/ToolOrchestrator.d.ts.map +1 -1
  250. package/dist/core/tools/ToolOrchestrator.js +135 -36
  251. package/dist/core/tools/ToolOrchestrator.js.map +1 -1
  252. package/dist/core/tools/permissions/ToolPermissionManager.d.ts +6 -5
  253. package/dist/core/tools/permissions/ToolPermissionManager.d.ts.map +1 -1
  254. package/dist/core/tools/permissions/ToolPermissionManager.js +47 -21
  255. package/dist/core/tools/permissions/ToolPermissionManager.js.map +1 -1
  256. package/dist/core/vision/VisionPipeline.d.ts +437 -0
  257. package/dist/core/vision/VisionPipeline.d.ts.map +1 -0
  258. package/dist/core/vision/VisionPipeline.js +1113 -0
  259. package/dist/core/vision/VisionPipeline.js.map +1 -0
  260. package/dist/core/vision/index.d.ts +97 -0
  261. package/dist/core/vision/index.d.ts.map +1 -0
  262. package/dist/core/vision/index.js +182 -0
  263. package/dist/core/vision/index.js.map +1 -0
  264. package/dist/core/vision/providers/LLMVisionProvider.d.ts +135 -0
  265. package/dist/core/vision/providers/LLMVisionProvider.d.ts.map +1 -0
  266. package/dist/core/vision/providers/LLMVisionProvider.js +136 -0
  267. package/dist/core/vision/providers/LLMVisionProvider.js.map +1 -0
  268. package/dist/core/vision/providers/PipelineVisionProvider.d.ts +154 -0
  269. package/dist/core/vision/providers/PipelineVisionProvider.d.ts.map +1 -0
  270. package/dist/core/vision/providers/PipelineVisionProvider.js +160 -0
  271. package/dist/core/vision/providers/PipelineVisionProvider.js.map +1 -0
  272. package/dist/core/vision/types.d.ts +286 -0
  273. package/dist/core/vision/types.d.ts.map +1 -0
  274. package/dist/core/vision/types.js +24 -0
  275. package/dist/core/vision/types.js.map +1 -0
  276. package/dist/discovery/CapabilityDiscoveryEngine.d.ts +1 -1
  277. package/dist/discovery/CapabilityDiscoveryEngine.d.ts.map +1 -1
  278. package/dist/discovery/CapabilityDiscoveryEngine.js +1 -1
  279. package/dist/discovery/CapabilityDiscoveryEngine.js.map +1 -1
  280. package/dist/emergent/ComposableToolBuilder.d.ts +15 -4
  281. package/dist/emergent/ComposableToolBuilder.d.ts.map +1 -1
  282. package/dist/emergent/ComposableToolBuilder.js +29 -14
  283. package/dist/emergent/ComposableToolBuilder.js.map +1 -1
  284. package/dist/emergent/EmergentCapabilityEngine.d.ts +3 -3
  285. package/dist/emergent/EmergentCapabilityEngine.d.ts.map +1 -1
  286. package/dist/emergent/EmergentCapabilityEngine.js +15 -12
  287. package/dist/emergent/EmergentCapabilityEngine.js.map +1 -1
  288. package/dist/emergent/EmergentJudge.d.ts +20 -0
  289. package/dist/emergent/EmergentJudge.d.ts.map +1 -1
  290. package/dist/emergent/EmergentJudge.js +121 -26
  291. package/dist/emergent/EmergentJudge.js.map +1 -1
  292. package/dist/emergent/EmergentToolRegistry.d.ts +17 -0
  293. package/dist/emergent/EmergentToolRegistry.d.ts.map +1 -1
  294. package/dist/emergent/EmergentToolRegistry.js +26 -0
  295. package/dist/emergent/EmergentToolRegistry.js.map +1 -1
  296. package/dist/emergent/ForgeToolMetaTool.d.ts +1 -1
  297. package/dist/emergent/ForgeToolMetaTool.d.ts.map +1 -1
  298. package/dist/emergent/ForgeToolMetaTool.js +15 -2
  299. package/dist/emergent/ForgeToolMetaTool.js.map +1 -1
  300. package/dist/emergent/SandboxedToolForge.d.ts +2 -2
  301. package/dist/emergent/SandboxedToolForge.d.ts.map +1 -1
  302. package/dist/emergent/SandboxedToolForge.js +13 -23
  303. package/dist/emergent/SandboxedToolForge.js.map +1 -1
  304. package/dist/emergent/SkillExporter.d.ts +119 -0
  305. package/dist/emergent/SkillExporter.d.ts.map +1 -0
  306. package/dist/emergent/SkillExporter.js +344 -0
  307. package/dist/emergent/SkillExporter.js.map +1 -0
  308. package/dist/emergent/index.d.ts +1 -0
  309. package/dist/emergent/index.d.ts.map +1 -1
  310. package/dist/emergent/index.js +1 -0
  311. package/dist/emergent/index.js.map +1 -1
  312. package/dist/emergent/types.d.ts +4 -4
  313. package/dist/index.d.ts +30 -5
  314. package/dist/index.d.ts.map +1 -1
  315. package/dist/index.js +19 -2
  316. package/dist/index.js.map +1 -1
  317. package/dist/memory/facade/Memory.d.ts.map +1 -1
  318. package/dist/memory/facade/Memory.js +8 -0
  319. package/dist/memory/facade/Memory.js.map +1 -1
  320. package/dist/memory/facade/types.d.ts +10 -0
  321. package/dist/memory/facade/types.d.ts.map +1 -1
  322. package/dist/memory/index.d.ts +15 -7
  323. package/dist/memory/index.d.ts.map +1 -1
  324. package/dist/memory/index.js +7 -0
  325. package/dist/memory/index.js.map +1 -1
  326. package/dist/memory/ingestion/DoclingLoader.d.ts +3 -3
  327. package/dist/memory/ingestion/DoclingLoader.d.ts.map +1 -1
  328. package/dist/memory/ingestion/DoclingLoader.js +12 -8
  329. package/dist/memory/ingestion/DoclingLoader.js.map +1 -1
  330. package/dist/memory/ingestion/FolderScanner.d.ts +7 -7
  331. package/dist/memory/ingestion/FolderScanner.d.ts.map +1 -1
  332. package/dist/memory/ingestion/FolderScanner.js +6 -6
  333. package/dist/memory/ingestion/FolderScanner.js.map +1 -1
  334. package/dist/memory/ingestion/LoaderRegistry.d.ts +8 -8
  335. package/dist/memory/ingestion/LoaderRegistry.d.ts.map +1 -1
  336. package/dist/memory/ingestion/LoaderRegistry.js +9 -11
  337. package/dist/memory/ingestion/LoaderRegistry.js.map +1 -1
  338. package/dist/memory/ingestion/MultimodalAggregator.d.ts +1 -1
  339. package/dist/memory/ingestion/MultimodalAggregator.js +1 -1
  340. package/dist/memory/ingestion/OcrPdfLoader.d.ts +2 -2
  341. package/dist/memory/ingestion/OcrPdfLoader.d.ts.map +1 -1
  342. package/dist/memory/ingestion/OcrPdfLoader.js +12 -8
  343. package/dist/memory/ingestion/OcrPdfLoader.js.map +1 -1
  344. package/dist/memory/ingestion/PdfLoader.d.ts +8 -8
  345. package/dist/memory/ingestion/PdfLoader.d.ts.map +1 -1
  346. package/dist/memory/ingestion/PdfLoader.js +13 -10
  347. package/dist/memory/ingestion/PdfLoader.js.map +1 -1
  348. package/dist/memory/io/MarkdownExporter.d.ts +1 -1
  349. package/dist/memory/io/MarkdownExporter.d.ts.map +1 -1
  350. package/dist/memory/io/MarkdownExporter.js +1 -1
  351. package/dist/memory/io/MarkdownExporter.js.map +1 -1
  352. package/dist/memory/observation/MemoryObserver.d.ts +63 -1
  353. package/dist/memory/observation/MemoryObserver.d.ts.map +1 -1
  354. package/dist/memory/observation/MemoryObserver.js +115 -4
  355. package/dist/memory/observation/MemoryObserver.js.map +1 -1
  356. package/dist/memory/observation/ObservationCompressor.d.ts +88 -0
  357. package/dist/memory/observation/ObservationCompressor.d.ts.map +1 -0
  358. package/dist/memory/observation/ObservationCompressor.js +207 -0
  359. package/dist/memory/observation/ObservationCompressor.js.map +1 -0
  360. package/dist/memory/observation/ObservationReflector.d.ts +82 -0
  361. package/dist/memory/observation/ObservationReflector.d.ts.map +1 -0
  362. package/dist/memory/observation/ObservationReflector.js +212 -0
  363. package/dist/memory/observation/ObservationReflector.js.map +1 -0
  364. package/dist/memory/observation/temporal.d.ts +54 -0
  365. package/dist/memory/observation/temporal.d.ts.map +1 -0
  366. package/dist/memory/observation/temporal.js +115 -0
  367. package/dist/memory/observation/temporal.js.map +1 -0
  368. package/dist/memory/tools/MemoryAddTool.d.ts +2 -2
  369. package/dist/memory/tools/MemoryAddTool.d.ts.map +1 -1
  370. package/dist/memory/tools/MemoryAddTool.js +8 -3
  371. package/dist/memory/tools/MemoryAddTool.js.map +1 -1
  372. package/dist/memory/tools/MemorySearchTool.d.ts +3 -3
  373. package/dist/memory/tools/MemorySearchTool.d.ts.map +1 -1
  374. package/dist/memory/tools/MemorySearchTool.js +11 -9
  375. package/dist/memory/tools/MemorySearchTool.js.map +1 -1
  376. package/dist/memory/tools/scopeContext.d.ts +11 -0
  377. package/dist/memory/tools/scopeContext.d.ts.map +1 -0
  378. package/dist/memory/tools/scopeContext.js +46 -0
  379. package/dist/memory/tools/scopeContext.js.map +1 -0
  380. package/dist/orchestration/builders/AgentGraph.d.ts +12 -11
  381. package/dist/orchestration/builders/AgentGraph.d.ts.map +1 -1
  382. package/dist/orchestration/builders/AgentGraph.js +12 -11
  383. package/dist/orchestration/builders/AgentGraph.js.map +1 -1
  384. package/dist/orchestration/builders/VoiceNodeBuilder.d.ts +82 -25
  385. package/dist/orchestration/builders/VoiceNodeBuilder.d.ts.map +1 -1
  386. package/dist/orchestration/builders/VoiceNodeBuilder.js +86 -26
  387. package/dist/orchestration/builders/VoiceNodeBuilder.js.map +1 -1
  388. package/dist/orchestration/builders/WorkflowBuilder.d.ts +1 -1
  389. package/dist/orchestration/builders/WorkflowBuilder.d.ts.map +1 -1
  390. package/dist/orchestration/builders/WorkflowBuilder.js +1 -1
  391. package/dist/orchestration/builders/WorkflowBuilder.js.map +1 -1
  392. package/dist/orchestration/checkpoint/InMemoryCheckpointStore.d.ts +7 -54
  393. package/dist/orchestration/checkpoint/InMemoryCheckpointStore.d.ts.map +1 -1
  394. package/dist/orchestration/checkpoint/InMemoryCheckpointStore.js +8 -56
  395. package/dist/orchestration/checkpoint/InMemoryCheckpointStore.js.map +1 -1
  396. package/dist/orchestration/events/GraphEvent.d.ts +67 -5
  397. package/dist/orchestration/events/GraphEvent.d.ts.map +1 -1
  398. package/dist/orchestration/events/GraphEvent.js.map +1 -1
  399. package/dist/orchestration/runtime/GraphRuntime.d.ts.map +1 -1
  400. package/dist/orchestration/runtime/GraphRuntime.js +151 -1
  401. package/dist/orchestration/runtime/GraphRuntime.js.map +1 -1
  402. package/dist/orchestration/runtime/LoopController.d.ts +3 -3
  403. package/dist/orchestration/runtime/LoopController.d.ts.map +1 -1
  404. package/dist/orchestration/runtime/LoopController.js.map +1 -1
  405. package/dist/orchestration/runtime/StateManager.d.ts +3 -3
  406. package/dist/orchestration/runtime/StateManager.js +3 -3
  407. package/dist/orchestration/runtime/VoiceNodeExecutor.d.ts +103 -26
  408. package/dist/orchestration/runtime/VoiceNodeExecutor.d.ts.map +1 -1
  409. package/dist/orchestration/runtime/VoiceNodeExecutor.js +155 -43
  410. package/dist/orchestration/runtime/VoiceNodeExecutor.js.map +1 -1
  411. package/dist/orchestration/runtime/VoiceTransportAdapter.d.ts +95 -33
  412. package/dist/orchestration/runtime/VoiceTransportAdapter.d.ts.map +1 -1
  413. package/dist/orchestration/runtime/VoiceTransportAdapter.js +83 -29
  414. package/dist/orchestration/runtime/VoiceTransportAdapter.js.map +1 -1
  415. package/dist/orchestration/runtime/VoiceTurnCollector.d.ts +73 -20
  416. package/dist/orchestration/runtime/VoiceTurnCollector.d.ts.map +1 -1
  417. package/dist/orchestration/runtime/VoiceTurnCollector.js +84 -23
  418. package/dist/orchestration/runtime/VoiceTurnCollector.js.map +1 -1
  419. package/dist/query-router/KeywordFallback.d.ts +70 -0
  420. package/dist/query-router/KeywordFallback.d.ts.map +1 -0
  421. package/dist/query-router/KeywordFallback.js +132 -0
  422. package/dist/query-router/KeywordFallback.js.map +1 -0
  423. package/dist/query-router/QueryClassifier.d.ts +140 -0
  424. package/dist/query-router/QueryClassifier.d.ts.map +1 -0
  425. package/dist/query-router/QueryClassifier.js +223 -0
  426. package/dist/query-router/QueryClassifier.js.map +1 -0
  427. package/dist/query-router/QueryDispatcher.d.ts +139 -0
  428. package/dist/query-router/QueryDispatcher.d.ts.map +1 -0
  429. package/dist/query-router/QueryDispatcher.js +297 -0
  430. package/dist/query-router/QueryDispatcher.js.map +1 -0
  431. package/dist/query-router/QueryGenerator.d.ts +184 -0
  432. package/dist/query-router/QueryGenerator.d.ts.map +1 -0
  433. package/dist/query-router/QueryGenerator.js +241 -0
  434. package/dist/query-router/QueryGenerator.js.map +1 -0
  435. package/dist/query-router/QueryRouter.d.ts +292 -0
  436. package/dist/query-router/QueryRouter.d.ts.map +1 -0
  437. package/dist/query-router/QueryRouter.js +803 -0
  438. package/dist/query-router/QueryRouter.js.map +1 -0
  439. package/dist/query-router/TopicExtractor.d.ts +73 -0
  440. package/dist/query-router/TopicExtractor.d.ts.map +1 -0
  441. package/dist/query-router/TopicExtractor.js +95 -0
  442. package/dist/query-router/TopicExtractor.js.map +1 -0
  443. package/dist/query-router/index.d.ts +40 -0
  444. package/dist/query-router/index.d.ts.map +1 -0
  445. package/dist/query-router/index.js +46 -0
  446. package/dist/query-router/index.js.map +1 -0
  447. package/dist/query-router/types.d.ts +508 -0
  448. package/dist/query-router/types.d.ts.map +1 -0
  449. package/dist/query-router/types.js +39 -0
  450. package/dist/query-router/types.js.map +1 -0
  451. package/dist/rag/index.d.ts +5 -0
  452. package/dist/rag/index.d.ts.map +1 -1
  453. package/dist/rag/index.js +7 -0
  454. package/dist/rag/index.js.map +1 -1
  455. package/dist/rag/multimodal/LLMVisionAdapter.d.ts +43 -0
  456. package/dist/rag/multimodal/LLMVisionAdapter.d.ts.map +1 -0
  457. package/dist/rag/multimodal/LLMVisionAdapter.js +46 -0
  458. package/dist/rag/multimodal/LLMVisionAdapter.js.map +1 -0
  459. package/dist/rag/multimodal/MultimodalIndexer.d.ts +244 -0
  460. package/dist/rag/multimodal/MultimodalIndexer.d.ts.map +1 -0
  461. package/dist/rag/multimodal/MultimodalIndexer.js +411 -0
  462. package/dist/rag/multimodal/MultimodalIndexer.js.map +1 -0
  463. package/dist/rag/multimodal/MultimodalMemoryBridge.d.ts +448 -0
  464. package/dist/rag/multimodal/MultimodalMemoryBridge.d.ts.map +1 -0
  465. package/dist/rag/multimodal/MultimodalMemoryBridge.js +941 -0
  466. package/dist/rag/multimodal/MultimodalMemoryBridge.js.map +1 -0
  467. package/dist/rag/multimodal/SpeechProviderAdapter.d.ts +139 -0
  468. package/dist/rag/multimodal/SpeechProviderAdapter.d.ts.map +1 -0
  469. package/dist/rag/multimodal/SpeechProviderAdapter.js +143 -0
  470. package/dist/rag/multimodal/SpeechProviderAdapter.js.map +1 -0
  471. package/dist/rag/multimodal/createMultimodalIndexerFromResolver.d.ts +172 -0
  472. package/dist/rag/multimodal/createMultimodalIndexerFromResolver.d.ts.map +1 -0
  473. package/dist/rag/multimodal/createMultimodalIndexerFromResolver.js +152 -0
  474. package/dist/rag/multimodal/createMultimodalIndexerFromResolver.js.map +1 -0
  475. package/dist/rag/multimodal/index.d.ts +44 -0
  476. package/dist/rag/multimodal/index.d.ts.map +1 -0
  477. package/dist/rag/multimodal/index.js +42 -0
  478. package/dist/rag/multimodal/index.js.map +1 -0
  479. package/dist/rag/multimodal/types.d.ts +276 -0
  480. package/dist/rag/multimodal/types.d.ts.map +1 -0
  481. package/dist/rag/multimodal/types.js +26 -0
  482. package/dist/rag/multimodal/types.js.map +1 -0
  483. package/dist/social-posting/SocialPostManager.d.ts +3 -3
  484. package/dist/social-posting/SocialPostManager.d.ts.map +1 -1
  485. package/dist/social-posting/SocialPostManager.js +3 -5
  486. package/dist/social-posting/SocialPostManager.js.map +1 -1
  487. package/dist/speech/FallbackProxy.d.ts +6 -6
  488. package/dist/speech/FallbackProxy.d.ts.map +1 -1
  489. package/dist/speech/FallbackProxy.js +3 -3
  490. package/dist/speech/FallbackProxy.js.map +1 -1
  491. package/dist/speech/SpeechProviderResolver.d.ts +8 -8
  492. package/dist/speech/SpeechProviderResolver.d.ts.map +1 -1
  493. package/dist/speech/SpeechProviderResolver.js +22 -11
  494. package/dist/speech/SpeechProviderResolver.js.map +1 -1
  495. package/dist/speech/SpeechRuntime.d.ts +1 -5
  496. package/dist/speech/SpeechRuntime.d.ts.map +1 -1
  497. package/dist/speech/SpeechRuntime.js +17 -9
  498. package/dist/speech/SpeechRuntime.js.map +1 -1
  499. package/dist/speech/providers/AssemblyAISTTProvider.d.ts +4 -4
  500. package/dist/speech/providers/AssemblyAISTTProvider.js +4 -4
  501. package/dist/speech/providers/AzureSpeechTTSProvider.d.ts +3 -3
  502. package/dist/speech/providers/AzureSpeechTTSProvider.js +2 -2
  503. package/dist/speech/providers/AzureSpeechTTSProvider.js.map +1 -1
  504. package/dist/speech/providers/BuiltInAdaptiveVadProvider.d.ts +9 -9
  505. package/dist/speech/providers/BuiltInAdaptiveVadProvider.d.ts.map +1 -1
  506. package/dist/speech/providers/BuiltInAdaptiveVadProvider.js +5 -5
  507. package/dist/speech/providers/BuiltInAdaptiveVadProvider.js.map +1 -1
  508. package/dist/speech/providers/DeepgramBatchSTTProvider.d.ts +2 -2
  509. package/dist/speech/providers/DeepgramBatchSTTProvider.js +2 -2
  510. package/dist/speech/providers/OpenAITextToSpeechProvider.d.ts +3 -3
  511. package/dist/speech/providers/OpenAITextToSpeechProvider.js +2 -2
  512. package/dist/speech/providers/OpenAIWhisperSpeechToTextProvider.d.ts +1 -1
  513. package/dist/speech/providers/OpenAIWhisperSpeechToTextProvider.d.ts.map +1 -1
  514. package/dist/speech/providers/OpenAIWhisperSpeechToTextProvider.js +1 -1
  515. package/dist/speech/providers/OpenAIWhisperSpeechToTextProvider.js.map +1 -1
  516. package/dist/voice/TelephonyStreamTransport.d.ts +6 -6
  517. package/dist/voice/TelephonyStreamTransport.d.ts.map +1 -1
  518. package/dist/voice/TelephonyStreamTransport.js +5 -5
  519. package/dist/voice/TelephonyStreamTransport.js.map +1 -1
  520. package/dist/voice-pipeline/AcousticEndpointDetector.d.ts +4 -4
  521. package/dist/voice-pipeline/AcousticEndpointDetector.d.ts.map +1 -1
  522. package/dist/voice-pipeline/AcousticEndpointDetector.js +4 -4
  523. package/dist/voice-pipeline/AcousticEndpointDetector.js.map +1 -1
  524. package/dist/voice-pipeline/HardCutBargeinHandler.d.ts +3 -3
  525. package/dist/voice-pipeline/HardCutBargeinHandler.js +3 -3
  526. package/dist/voice-pipeline/HeuristicEndpointDetector.d.ts +3 -3
  527. package/dist/voice-pipeline/HeuristicEndpointDetector.d.ts.map +1 -1
  528. package/dist/voice-pipeline/HeuristicEndpointDetector.js +3 -3
  529. package/dist/voice-pipeline/HeuristicEndpointDetector.js.map +1 -1
  530. package/dist/voice-pipeline/SoftFadeBargeinHandler.d.ts +5 -5
  531. package/dist/voice-pipeline/SoftFadeBargeinHandler.js +1 -1
  532. package/dist/voice-pipeline/VoiceInterruptError.d.ts +6 -6
  533. package/dist/voice-pipeline/VoiceInterruptError.d.ts.map +1 -1
  534. package/dist/voice-pipeline/VoiceInterruptError.js +4 -4
  535. package/dist/voice-pipeline/VoiceInterruptError.js.map +1 -1
  536. package/dist/voice-pipeline/VoicePipelineOrchestrator.d.ts +9 -9
  537. package/dist/voice-pipeline/VoicePipelineOrchestrator.d.ts.map +1 -1
  538. package/dist/voice-pipeline/VoicePipelineOrchestrator.js +8 -8
  539. package/dist/voice-pipeline/VoicePipelineOrchestrator.js.map +1 -1
  540. package/dist/voice-pipeline/WebRTCStreamTransport.d.ts +421 -0
  541. package/dist/voice-pipeline/WebRTCStreamTransport.d.ts.map +1 -0
  542. package/dist/voice-pipeline/WebRTCStreamTransport.js +573 -0
  543. package/dist/voice-pipeline/WebRTCStreamTransport.js.map +1 -0
  544. package/dist/voice-pipeline/WebSocketStreamTransport.d.ts +8 -8
  545. package/dist/voice-pipeline/WebSocketStreamTransport.js +5 -5
  546. package/dist/voice-pipeline/index.d.ts +1 -0
  547. package/dist/voice-pipeline/index.d.ts.map +1 -1
  548. package/dist/voice-pipeline/index.js +2 -0
  549. package/dist/voice-pipeline/index.js.map +1 -1
  550. package/dist/voice-pipeline/types.d.ts +43 -43
  551. package/dist/voice-pipeline/types.d.ts.map +1 -1
  552. package/package.json +19 -1
@@ -0,0 +1,1113 @@
1
+ /**
2
+ * @module core/vision/VisionPipeline
3
+ *
4
+ * Unified vision pipeline with progressive enhancement.
5
+ *
6
+ * Processes images through configurable tiers:
7
+ *
8
+ * ```
9
+ * ┌─────────────────────────────────────────────────────────────────────┐
10
+ * │ Image Buffer / URL │
11
+ * │ ↓ │
12
+ * │ Preprocessing (sharp: resize, grayscale, sharpen, normalize) │
13
+ * │ ↓ │
14
+ * │ Tier 1 — Local OCR (PaddleOCR or Tesseract.js) │
15
+ * │ ↓ confidence < threshold? │
16
+ * │ Tier 2 — Local Vision (TrOCR / Florence-2) │
17
+ * │ ↓ still below threshold? │
18
+ * │ Tier 3 — Cloud Vision (GPT-4o / Claude / Gemini via generateText) │
19
+ * │ ↓ │
20
+ * │ Merge: highest-confidence text wins, structured layout preserved │
21
+ * │ │
22
+ * │ [parallel] CLIP embedding runs alongside all tiers │
23
+ * └─────────────────────────────────────────────────────────────────────┘
24
+ * ```
25
+ *
26
+ * ## Dependency loading
27
+ *
28
+ * All heavy ML dependencies (ppu-paddle-ocr, tesseract.js,
29
+ * \@huggingface/transformers) are loaded lazily via dynamic `import()`.
30
+ * If a dependency is missing, the pipeline throws a helpful error
31
+ * with installation instructions — it never crashes on missing
32
+ * optional peer deps at module load time.
33
+ *
34
+ * ## Strategy behaviours
35
+ *
36
+ * | Strategy | Tier 1 | Tier 2 | Tier 3 | Notes |
37
+ * |----------|--------|--------|--------|-------|
38
+ * | progressive | Always | If low confidence | If still low | Default |
39
+ * | local-only | Always | Always | Never | Air-gapped |
40
+ * | cloud-only | Never | Never | Always | Best quality |
41
+ * | parallel | Always | Always | Always | Merge best |
42
+ *
43
+ * @see {@link VisionPipelineConfig} for configuration options.
44
+ * @see {@link VisionResult} for the output shape.
45
+ * @see {@link createVisionPipeline} for the auto-detecting factory.
46
+ *
47
+ * @example
48
+ * ```typescript
49
+ * const pipeline = new VisionPipeline({
50
+ * strategy: 'progressive',
51
+ * ocr: 'paddle',
52
+ * handwriting: true,
53
+ * documentAI: true,
54
+ * embedding: true,
55
+ * cloudProvider: 'openai',
56
+ * confidenceThreshold: 0.8,
57
+ * });
58
+ *
59
+ * const result = await pipeline.process(imageBuffer);
60
+ * console.log(result.text); // extracted text
61
+ * console.log(result.category); // 'printed-text' | 'handwritten' | etc.
62
+ * console.log(result.embedding); // CLIP vector for search
63
+ * console.log(result.layout); // structured document layout
64
+ * ```
65
+ */
66
+ // ---------------------------------------------------------------------------
67
+ // Constants
68
+ // ---------------------------------------------------------------------------
69
+ /**
70
+ * Default confidence threshold for the progressive strategy.
71
+ * OCR results above this threshold are accepted without cloud escalation.
72
+ */
73
+ const DEFAULT_CONFIDENCE_THRESHOLD = 0.7;
74
+ /**
75
+ * Default cloud vision confidence score. Cloud LLMs don't return numeric
76
+ * confidence, so we assign a fixed high value since they are generally
77
+ * the most capable tier.
78
+ */
79
+ const CLOUD_VISION_CONFIDENCE = 0.95;
80
+ /**
81
+ * Prompt sent to cloud vision LLMs when describing images.
82
+ * Designed to extract both descriptive text AND any embedded text,
83
+ * and to identify the content type for routing purposes.
84
+ */
85
+ const CLOUD_VISION_PROMPT = 'Describe this image in detail. Extract all visible text exactly as written. ' +
86
+ 'Identify the type of content (printed document, handwritten note, photograph, ' +
87
+ 'diagram, screenshot, etc.). If the image contains a document, preserve the ' +
88
+ 'logical reading order and structure.';
89
+ // ---------------------------------------------------------------------------
90
+ // VisionPipeline
91
+ // ---------------------------------------------------------------------------
92
+ /**
93
+ * Unified vision pipeline with progressive enhancement.
94
+ *
95
+ * Processes images through up to three tiers of increasing capability:
96
+ * 1. Local OCR (PaddleOCR / Tesseract.js) — fast, free, offline
97
+ * 2. Local Vision Models (TrOCR / Florence-2 / CLIP) — offline but slower
98
+ * 3. Cloud Vision LLMs (GPT-4o, Claude, Gemini) — best quality, API cost
99
+ *
100
+ * All heavy dependencies are loaded lazily on first use. The pipeline
101
+ * never imports ML libraries at module load time, so it's safe to
102
+ * instantiate even when optional peer deps are missing — errors only
103
+ * surface when a tier that needs them actually runs.
104
+ *
105
+ * @see {@link createVisionPipeline} for automatic provider detection.
106
+ */
107
+ export class VisionPipeline {
108
+ // -------------------------------------------------------------------------
109
+ // Constructor
110
+ // -------------------------------------------------------------------------
111
+ /**
112
+ * Create a new vision pipeline.
113
+ *
114
+ * @param config - Pipeline configuration. All heavy dependencies are loaded
115
+ * lazily, so construction is synchronous and never imports ML libraries.
116
+ *
117
+ * @example
118
+ * ```typescript
119
+ * const pipeline = new VisionPipeline({
120
+ * strategy: 'progressive',
121
+ * ocr: 'paddle',
122
+ * handwriting: true,
123
+ * cloudProvider: 'openai',
124
+ * });
125
+ * ```
126
+ */
127
+ constructor(config) {
128
+ /** Whether dispose() has been called. Guards against use-after-free. */
129
+ this._disposed = false;
130
+ this._config = { ...config };
131
+ }
132
+ // -------------------------------------------------------------------------
133
+ // Public API
134
+ // -------------------------------------------------------------------------
135
+ /**
136
+ * Process an image through the configured tiers.
137
+ *
138
+ * Automatically detects content type (printed text, handwritten, diagram,
139
+ * etc.) and routes through the appropriate processing tiers based on the
140
+ * configured {@link VisionStrategy}.
141
+ *
142
+ * @param image - Image data as a Buffer or file-path / URL string.
143
+ * Buffers are preprocessed with sharp (if configured). URL strings
144
+ * are passed directly to providers that support them.
145
+ * @param options - Optional overrides for this specific invocation.
146
+ * @param options.forceCategory - Force a specific content category
147
+ * instead of auto-detecting from OCR confidence heuristics.
148
+ * @param options.tiers - Run only these specific tiers, ignoring
149
+ * the strategy's normal routing logic.
150
+ * @returns Aggregated vision result with text, confidence, embeddings, etc.
151
+ *
152
+ * @throws {Error} If all configured tiers fail to produce a result.
153
+ * @throws {Error} If a required dependency (e.g. ppu-paddle-ocr) is missing.
154
+ * @throws {Error} If `dispose()` was already called.
155
+ *
156
+ * @example
157
+ * ```typescript
158
+ * // Full progressive pipeline
159
+ * const result = await pipeline.process(imageBuffer);
160
+ *
161
+ * // Force handwriting mode
162
+ * const hw = await pipeline.process(scanBuffer, {
163
+ * forceCategory: 'handwritten',
164
+ * });
165
+ *
166
+ * // Only run OCR and embedding, skip everything else
167
+ * const partial = await pipeline.process(imageBuffer, {
168
+ * tiers: ['ocr', 'embedding'],
169
+ * });
170
+ * ```
171
+ */
172
+ async process(image, options) {
173
+ this._assertNotDisposed();
174
+ const startTime = Date.now();
175
+ const { strategy } = this._config;
176
+ const threshold = this._config.confidenceThreshold ?? DEFAULT_CONFIDENCE_THRESHOLD;
177
+ // Preprocess the image (resize, grayscale, etc.) if it's a Buffer
178
+ const preprocessed = Buffer.isBuffer(image)
179
+ ? await this._preprocess(image)
180
+ : image;
181
+ const tierResults = [];
182
+ let embedding;
183
+ let layout;
184
+ const activeTiers = [];
185
+ // Determine which tiers to run based on strategy (or explicit override)
186
+ const requestedTiers = options?.tiers;
187
+ // -----------------------------------------------------------------------
188
+ // CLIP embedding — runs in parallel with everything else when enabled,
189
+ // because it doesn't affect the text extraction path.
190
+ // -----------------------------------------------------------------------
191
+ const embeddingPromise = this._shouldRunTier('embedding', strategy, requestedTiers)
192
+ ? this._runClipEmbedding(preprocessed).catch(() => undefined)
193
+ : Promise.resolve(undefined);
194
+ // -----------------------------------------------------------------------
195
+ // Strategy: cloud-only — skip all local tiers
196
+ // -----------------------------------------------------------------------
197
+ if (strategy === 'cloud-only' && !requestedTiers) {
198
+ const cloudResult = await this._runCloudVision(preprocessed);
199
+ tierResults.push(cloudResult);
200
+ activeTiers.push('cloud-vision');
201
+ embedding = await embeddingPromise;
202
+ if (embedding)
203
+ activeTiers.push('embedding');
204
+ return this._assembleResult(tierResults, activeTiers, embedding, layout, options?.forceCategory, startTime);
205
+ }
206
+ // -----------------------------------------------------------------------
207
+ // Tier 1 — Local OCR (PaddleOCR or Tesseract.js)
208
+ // -----------------------------------------------------------------------
209
+ let ocrResult;
210
+ if (this._shouldRunTier('ocr', strategy, requestedTiers)) {
211
+ ocrResult = await this._runOcr(preprocessed);
212
+ tierResults.push(ocrResult);
213
+ activeTiers.push('ocr');
214
+ // In progressive mode, if OCR confidence is high enough, we can
215
+ // skip expensive downstream tiers and return early.
216
+ if (strategy === 'progressive' &&
217
+ !requestedTiers &&
218
+ ocrResult.confidence >= threshold) {
219
+ embedding = await embeddingPromise;
220
+ if (embedding)
221
+ activeTiers.push('embedding');
222
+ return this._assembleResult(tierResults, activeTiers, embedding, layout, options?.forceCategory, startTime);
223
+ }
224
+ }
225
+ // -----------------------------------------------------------------------
226
+ // Content category detection — decides which Tier 2 models to invoke
227
+ // -----------------------------------------------------------------------
228
+ const category = options?.forceCategory ?? this._detectCategory(ocrResult);
229
+ // -----------------------------------------------------------------------
230
+ // Tier 2a — Handwriting recognition (TrOCR)
231
+ // Triggered when content appears handwritten (low OCR confidence +
232
+ // single-char region heuristic) or when forced via forceCategory.
233
+ // -----------------------------------------------------------------------
234
+ if (this._shouldRunTier('handwriting', strategy, requestedTiers) &&
235
+ (category === 'handwritten' || category === 'mixed')) {
236
+ try {
237
+ const hwResult = await this._runTrOcr(preprocessed);
238
+ tierResults.push(hwResult);
239
+ activeTiers.push('handwriting');
240
+ }
241
+ catch {
242
+ // TrOCR failure is non-fatal — we still have OCR or cloud fallback
243
+ }
244
+ }
245
+ // -----------------------------------------------------------------------
246
+ // Tier 2b — Document understanding (Florence-2)
247
+ // Triggered for complex layouts (many regions with varying sizes).
248
+ // -----------------------------------------------------------------------
249
+ if (this._shouldRunTier('document-ai', strategy, requestedTiers) &&
250
+ (category === 'document-layout' || category === 'mixed')) {
251
+ try {
252
+ const docResult = await this._runFlorence2(preprocessed);
253
+ tierResults.push(docResult.tierResult);
254
+ activeTiers.push('document-ai');
255
+ layout = docResult.layout;
256
+ }
257
+ catch {
258
+ // Florence-2 failure is non-fatal
259
+ }
260
+ }
261
+ // -----------------------------------------------------------------------
262
+ // Tier 3 — Cloud Vision (GPT-4o / Claude / Gemini)
263
+ // In progressive mode: only if we're still below threshold.
264
+ // In parallel mode: always runs.
265
+ // In local-only mode: never runs.
266
+ // -----------------------------------------------------------------------
267
+ const bestLocalConfidence = this._bestConfidence(tierResults);
268
+ if (this._shouldRunCloudVision(strategy, bestLocalConfidence, threshold, requestedTiers)) {
269
+ try {
270
+ const cloudResult = await this._runCloudVision(preprocessed);
271
+ tierResults.push(cloudResult);
272
+ activeTiers.push('cloud-vision');
273
+ }
274
+ catch {
275
+ // Cloud failure is non-fatal if we have local results
276
+ if (tierResults.length === 0) {
277
+ throw new Error('VisionPipeline: cloud vision failed and no local results available.');
278
+ }
279
+ }
280
+ }
281
+ // -----------------------------------------------------------------------
282
+ // Collect CLIP embedding (was running in parallel)
283
+ // -----------------------------------------------------------------------
284
+ embedding = await embeddingPromise;
285
+ if (embedding)
286
+ activeTiers.push('embedding');
287
+ // -----------------------------------------------------------------------
288
+ // Assemble final result
289
+ // -----------------------------------------------------------------------
290
+ return this._assembleResult(tierResults, activeTiers, embedding, layout, options?.forceCategory ?? category, startTime);
291
+ }
292
+ /**
293
+ * Extract text only — fastest path using OCR tier exclusively.
294
+ *
295
+ * Ignores all other tiers (handwriting, document-ai, cloud, embedding).
296
+ * Useful when you just need the text content and don't need confidence
297
+ * scoring, layout analysis, or embeddings.
298
+ *
299
+ * @param image - Image data as a Buffer or file-path / URL string.
300
+ * @returns Extracted text, or empty string if OCR produces no output.
301
+ *
302
+ * @throws {Error} If the configured OCR engine is missing.
303
+ *
304
+ * @example
305
+ * ```typescript
306
+ * const text = await pipeline.extractText(receiptImage);
307
+ * console.log(text); // "ACME STORE\n...\nTotal: $42.99"
308
+ * ```
309
+ */
310
+ async extractText(image) {
311
+ this._assertNotDisposed();
312
+ const preprocessed = Buffer.isBuffer(image)
313
+ ? await this._preprocess(image)
314
+ : image;
315
+ const result = await this._runOcr(preprocessed);
316
+ return result.text;
317
+ }
318
+ /**
319
+ * Generate an image embedding using CLIP — embedding tier only.
320
+ *
321
+ * Useful for building image similarity search indexes without running
322
+ * the full OCR + vision pipeline.
323
+ *
324
+ * @param image - Image data as a Buffer or file-path / URL string.
325
+ * @returns CLIP embedding vector (typically 512 or 768 dimensions).
326
+ *
327
+ * @throws {Error} If `@huggingface/transformers` is not installed.
328
+ * @throws {Error} If CLIP model loading fails.
329
+ *
330
+ * @example
331
+ * ```typescript
332
+ * const embedding = await pipeline.embed(photoBuffer);
333
+ * await vectorStore.upsert('images', [{
334
+ * id: 'photo-1',
335
+ * embedding,
336
+ * metadata: { source: 'upload' },
337
+ * }]);
338
+ * ```
339
+ */
340
+ async embed(image) {
341
+ this._assertNotDisposed();
342
+ const preprocessed = Buffer.isBuffer(image)
343
+ ? await this._preprocess(image)
344
+ : image;
345
+ const result = await this._runClipEmbedding(preprocessed);
346
+ if (!result) {
347
+ throw new Error('VisionPipeline: CLIP embedding returned empty result.');
348
+ }
349
+ return result;
350
+ }
351
+ /**
352
+ * Analyze document layout using Florence-2 — document-ai tier only.
353
+ *
354
+ * Returns structured {@link DocumentLayout} with semantic blocks
355
+ * (text, tables, figures, headings, lists, code) and their bounding
356
+ * boxes within each page.
357
+ *
358
+ * @param image - Image data as a Buffer or file-path / URL string.
359
+ * @returns Structured document layout with pages and blocks.
360
+ *
361
+ * @throws {Error} If `@huggingface/transformers` is not installed.
362
+ * @throws {Error} If Florence-2 model loading fails.
363
+ *
364
+ * @example
365
+ * ```typescript
366
+ * const layout = await pipeline.analyzeLayout(documentScan);
367
+ * for (const page of layout.pages) {
368
+ * for (const block of page.blocks) {
369
+ * console.log(`${block.type}: ${block.content.slice(0, 50)}...`);
370
+ * }
371
+ * }
372
+ * ```
373
+ */
374
+ async analyzeLayout(image) {
375
+ this._assertNotDisposed();
376
+ const preprocessed = Buffer.isBuffer(image)
377
+ ? await this._preprocess(image)
378
+ : image;
379
+ const result = await this._runFlorence2(preprocessed);
380
+ return result.layout;
381
+ }
382
+ /**
383
+ * Shut down the pipeline and release all loaded model resources.
384
+ *
385
+ * After calling dispose(), any further calls to `process()`,
386
+ * `extractText()`, `embed()`, or `analyzeLayout()` will throw.
387
+ *
388
+ * @example
389
+ * ```typescript
390
+ * const pipeline = new VisionPipeline({ strategy: 'progressive' });
391
+ * try {
392
+ * const result = await pipeline.process(image);
393
+ * } finally {
394
+ * await pipeline.dispose();
395
+ * }
396
+ * ```
397
+ */
398
+ async dispose() {
399
+ this._disposed = true;
400
+ // Release PaddleOCR resources
401
+ if (this._paddleOcr?.dispose) {
402
+ try {
403
+ await this._paddleOcr.dispose();
404
+ }
405
+ catch {
406
+ // Swallow disposal errors — we're tearing down anyway
407
+ }
408
+ }
409
+ this._paddleOcr = undefined;
410
+ // Terminate Tesseract worker
411
+ if (this._tesseract?.terminate) {
412
+ try {
413
+ await this._tesseract.terminate();
414
+ }
415
+ catch {
416
+ // Swallow disposal errors
417
+ }
418
+ }
419
+ this._tesseract = undefined;
420
+ // Release HuggingFace pipelines by dropping references.
421
+ // The transformers library doesn't expose explicit dispose(),
422
+ // so we rely on GC to reclaim WASM/ONNX memory.
423
+ this._trOcrPipeline = undefined;
424
+ this._florencePipeline = undefined;
425
+ this._clipPipeline = undefined;
426
+ }
427
+ // -------------------------------------------------------------------------
428
+ // Preprocessing
429
+ // -------------------------------------------------------------------------
430
+ /**
431
+ * Apply configured preprocessing to an image buffer using sharp.
432
+ *
433
+ * @param image - Raw image buffer.
434
+ * @returns Preprocessed image buffer, or the original if no preprocessing
435
+ * is configured or sharp is unavailable.
436
+ */
437
+ async _preprocess(image) {
438
+ const pp = this._config.preprocessing;
439
+ if (!pp)
440
+ return image;
441
+ // Only import sharp when preprocessing is actually needed.
442
+ // sharp is already a project dependency, but we guard the import
443
+ // to keep the pipeline functional even if sharp fails to load
444
+ // (e.g. in environments without native bindings).
445
+ let sharp;
446
+ try {
447
+ // @ts-ignore — sharp is an optional native dependency, may not be installed in CI
448
+ sharp = (await import('sharp')).default;
449
+ }
450
+ catch {
451
+ // sharp not available — return original image unmodified.
452
+ // This is a soft failure because preprocessing is an optimization,
453
+ // not a hard requirement.
454
+ return image;
455
+ }
456
+ let pipeline = sharp(image);
457
+ // Resize while preserving aspect ratio — never upscale
458
+ if (pp.resize) {
459
+ pipeline = pipeline.resize({
460
+ width: pp.resize.maxWidth,
461
+ height: pp.resize.maxHeight,
462
+ fit: 'inside',
463
+ withoutEnlargement: true,
464
+ });
465
+ }
466
+ // Convert to grayscale (improves OCR contrast on colored backgrounds)
467
+ if (pp.grayscale) {
468
+ pipeline = pipeline.grayscale();
469
+ }
470
+ // Sharpen (helps blurry scans and camera captures)
471
+ if (pp.sharpen) {
472
+ pipeline = pipeline.sharpen();
473
+ }
474
+ // Normalize brightness/contrast via histogram stretching
475
+ if (pp.normalize) {
476
+ pipeline = pipeline.normalize();
477
+ }
478
+ return pipeline.toBuffer();
479
+ }
480
+ // -------------------------------------------------------------------------
481
+ // Tier 1 — Local OCR
482
+ // -------------------------------------------------------------------------
483
+ /**
484
+ * Run OCR on the image using the configured engine (PaddleOCR or Tesseract.js).
485
+ *
486
+ * @param image - Preprocessed image buffer or URL string.
487
+ * @returns Tier result with extracted text, confidence, and regions.
488
+ * @throws {Error} If OCR engine is 'none' or neither engine is available.
489
+ */
490
+ async _runOcr(image) {
491
+ const ocrEngine = this._config.ocr ?? 'paddle';
492
+ if (ocrEngine === 'none') {
493
+ throw new Error('VisionPipeline: OCR is set to "none" but OCR tier was requested.');
494
+ }
495
+ if (ocrEngine === 'paddle') {
496
+ return this._runPaddleOcr(image);
497
+ }
498
+ return this._runTesseract(image);
499
+ }
500
+ /**
501
+ * Run PaddleOCR for text extraction.
502
+ *
503
+ * Lazily loads and initializes the ppu-paddle-ocr library on first call.
504
+ * Subsequent calls reuse the cached service instance.
505
+ *
506
+ * @param image - Image buffer or URL string.
507
+ * @returns Tier result with PaddleOCR output.
508
+ * @throws {Error} If ppu-paddle-ocr is not installed.
509
+ */
510
+ async _runPaddleOcr(image) {
511
+ const start = Date.now();
512
+ const ocr = await this._loadPaddleOcr();
513
+ // PaddleOCR expects a Buffer; convert URL/path to buffer if needed
514
+ const imageBuffer = Buffer.isBuffer(image) ? image : await this._urlToBuffer(image);
515
+ const ocrResult = await ocr.recognize(imageBuffer);
516
+ // Normalize PaddleOCR output into our standard shape.
517
+ // PaddleOCR returns an array of detected text regions with bounding
518
+ // boxes and per-region confidence scores.
519
+ const regions = (ocrResult?.regions ?? ocrResult?.data ?? []).map((r) => ({
520
+ text: r.text ?? r.content ?? '',
521
+ confidence: r.confidence ?? r.score ?? 0,
522
+ bbox: {
523
+ x: r.bbox?.[0]?.[0] ?? r.box?.[0]?.[0] ?? 0,
524
+ y: r.bbox?.[0]?.[1] ?? r.box?.[0]?.[1] ?? 0,
525
+ width: (r.bbox?.[1]?.[0] ?? r.box?.[1]?.[0] ?? 0) - (r.bbox?.[0]?.[0] ?? r.box?.[0]?.[0] ?? 0),
526
+ height: (r.bbox?.[2]?.[1] ?? r.box?.[2]?.[1] ?? 0) - (r.bbox?.[0]?.[1] ?? r.box?.[0]?.[1] ?? 0),
527
+ },
528
+ }));
529
+ const text = regions.map((r) => r.text).join('\n');
530
+ const avgConfidence = regions.length > 0
531
+ ? regions.reduce((sum, r) => sum + r.confidence, 0) / regions.length
532
+ : 0;
533
+ return {
534
+ tier: 'ocr',
535
+ provider: 'paddle',
536
+ text,
537
+ confidence: avgConfidence,
538
+ durationMs: Date.now() - start,
539
+ regions,
540
+ };
541
+ }
542
+ /**
543
+ * Run Tesseract.js for text extraction.
544
+ *
545
+ * Lazily loads the tesseract.js library and creates a worker on first call.
546
+ * The worker is reused for subsequent calls and terminated on dispose().
547
+ *
548
+ * @param image - Image buffer or URL string.
549
+ * @returns Tier result with Tesseract output.
550
+ * @throws {Error} If tesseract.js is not installed.
551
+ */
552
+ async _runTesseract(image) {
553
+ const start = Date.now();
554
+ const worker = await this._loadTesseract();
555
+ // Tesseract.js accepts Buffer, URL, or base64 string
556
+ const input = Buffer.isBuffer(image) ? image : image;
557
+ const result = await worker.recognize(input);
558
+ // Normalize Tesseract output into our standard shape.
559
+ // Tesseract returns paragraphs → lines → words with bounding boxes.
560
+ const regions = (result.data?.words ?? []).map((w) => ({
561
+ text: w.text ?? '',
562
+ confidence: (w.confidence ?? 0) / 100, // Tesseract uses 0-100 scale
563
+ bbox: {
564
+ x: w.bbox?.x0 ?? 0,
565
+ y: w.bbox?.y0 ?? 0,
566
+ width: (w.bbox?.x1 ?? 0) - (w.bbox?.x0 ?? 0),
567
+ height: (w.bbox?.y1 ?? 0) - (w.bbox?.y0 ?? 0),
568
+ },
569
+ }));
570
+ const text = result.data?.text ?? '';
571
+ // Tesseract confidence is 0-100; normalize to 0-1
572
+ const confidence = (result.data?.confidence ?? 0) / 100;
573
+ return {
574
+ tier: 'ocr',
575
+ provider: 'tesseract',
576
+ text,
577
+ confidence,
578
+ durationMs: Date.now() - start,
579
+ regions,
580
+ };
581
+ }
582
+ // -------------------------------------------------------------------------
583
+ // Tier 2a — Handwriting recognition (TrOCR)
584
+ // -------------------------------------------------------------------------
585
+ /**
586
+ * Run TrOCR handwriting recognition via @huggingface/transformers.
587
+ *
588
+ * TrOCR is a transformer model specifically trained for handwritten
589
+ * text recognition. It excels where standard OCR engines (PaddleOCR,
590
+ * Tesseract) produce low-confidence, garbled output on cursive text.
591
+ *
592
+ * @param image - Preprocessed image buffer or URL string.
593
+ * @returns Tier result with handwriting-recognized text.
594
+ * @throws {Error} If @huggingface/transformers is not installed.
595
+ */
596
+ async _runTrOcr(image) {
597
+ const start = Date.now();
598
+ const pipe = await this._loadTrOcr();
599
+ // The image-to-text pipeline accepts Buffer, URL, or base64 data URL
600
+ const input = Buffer.isBuffer(image)
601
+ ? `data:image/png;base64,${image.toString('base64')}`
602
+ : image;
603
+ const output = await pipe(input);
604
+ // The pipeline returns an array of { generated_text: string }
605
+ const text = Array.isArray(output)
606
+ ? output.map((o) => o.generated_text ?? '').join('\n')
607
+ : output?.generated_text ?? '';
608
+ return {
609
+ tier: 'handwriting',
610
+ provider: 'trocr',
611
+ text,
612
+ // TrOCR doesn't output per-token confidence for the full sequence,
613
+ // so we assign a moderate default. The progressive strategy will
614
+ // still prefer cloud results if they exist.
615
+ confidence: text.length > 0 ? 0.75 : 0,
616
+ durationMs: Date.now() - start,
617
+ };
618
+ }
619
+ // -------------------------------------------------------------------------
620
+ // Tier 2b — Document understanding (Florence-2)
621
+ // -------------------------------------------------------------------------
622
+ /**
623
+ * Run Florence-2 document understanding via @huggingface/transformers.
624
+ *
625
+ * Florence-2 detects semantic blocks (text, tables, figures, headings,
626
+ * lists, code) and their bounding boxes, producing a structured
627
+ * {@link DocumentLayout} alongside extracted text.
628
+ *
629
+ * @param image - Preprocessed image buffer or URL string.
630
+ * @returns Tier result plus structured document layout.
631
+ * @throws {Error} If @huggingface/transformers is not installed.
632
+ */
633
+ async _runFlorence2(image) {
634
+ const start = Date.now();
635
+ const pipe = await this._loadFlorence2();
636
+ // Florence-2 uses a VQA-style interface — we ask it to describe
637
+ // the document layout.
638
+ const input = Buffer.isBuffer(image)
639
+ ? `data:image/png;base64,${image.toString('base64')}`
640
+ : image;
641
+ const output = await pipe(input, 'Describe the document layout in detail.');
642
+ // Parse Florence-2 output into our structured layout format.
643
+ // The model returns a description — we extract block annotations
644
+ // if the model provides them, or fall back to a single text block.
645
+ const text = Array.isArray(output)
646
+ ? output.map((o) => o.generated_text ?? '').join('\n')
647
+ : output?.generated_text ?? '';
648
+ const blocks = [{
649
+ type: 'text',
650
+ content: text,
651
+ bbox: { x: 0, y: 0, width: 0, height: 0 },
652
+ confidence: 0.8,
653
+ }];
654
+ const layout = {
655
+ pages: [{
656
+ pageNumber: 1,
657
+ width: 0,
658
+ height: 0,
659
+ blocks,
660
+ }],
661
+ };
662
+ return {
663
+ tierResult: {
664
+ tier: 'document-ai',
665
+ provider: 'florence-2',
666
+ text,
667
+ confidence: text.length > 0 ? 0.8 : 0,
668
+ durationMs: Date.now() - start,
669
+ },
670
+ layout,
671
+ };
672
+ }
673
+ // -------------------------------------------------------------------------
674
+ // Tier 2c — Image embeddings (CLIP)
675
+ // -------------------------------------------------------------------------
676
+ /**
677
+ * Generate a CLIP image embedding via @huggingface/transformers.
678
+ *
679
+ * CLIP embeddings enable cross-modal similarity search — the embedding
680
+ * lives in the same vector space as text embeddings from the same model,
681
+ * so you can search images with text queries and vice versa.
682
+ *
683
+ * @param image - Preprocessed image buffer or URL string.
684
+ * @returns Embedding vector (typically 512 or 768 dimensions), or undefined
685
+ * if CLIP is not available.
686
+ * @throws {Error} If @huggingface/transformers is not installed.
687
+ */
688
+ async _runClipEmbedding(image) {
689
+ const pipe = await this._loadClip();
690
+ const input = Buffer.isBuffer(image)
691
+ ? `data:image/png;base64,${image.toString('base64')}`
692
+ : image;
693
+ const output = await pipe(input);
694
+ // The feature-extraction pipeline returns a nested tensor-like structure.
695
+ // We extract the flat float array from it.
696
+ if (Array.isArray(output)) {
697
+ // output is [[number, number, ...]] — flatten one level
698
+ const flat = Array.isArray(output[0]) ? output[0] : output;
699
+ return flat.map((v) => Number(v));
700
+ }
701
+ // Handle tensor-like output with .data or .tolist()
702
+ if (output?.data) {
703
+ return Array.from(output.data);
704
+ }
705
+ if (typeof output?.tolist === 'function') {
706
+ const list = output.tolist();
707
+ return Array.isArray(list[0]) ? list[0] : list;
708
+ }
709
+ return undefined;
710
+ }
711
+ // -------------------------------------------------------------------------
712
+ // Tier 3 — Cloud Vision
713
+ // -------------------------------------------------------------------------
714
+ /**
715
+ * Run cloud vision LLM for image understanding.
716
+ *
717
+ * Uses the existing `generateText()` API with a multimodal message
718
+ * containing the image as a base64 data URL. This works with any
719
+ * vision-capable provider (OpenAI GPT-4o, Anthropic Claude, Google
720
+ * Gemini, Ollama with LLaVA, etc.).
721
+ *
722
+ * @param image - Image buffer or URL string.
723
+ * @returns Tier result with cloud vision description.
724
+ * @throws {Error} If no cloud provider is configured.
725
+ * @throws {Error} If the cloud API call fails.
726
+ */
727
+ async _runCloudVision(image) {
728
+ const start = Date.now();
729
+ if (!this._config.cloudProvider) {
730
+ throw new Error('VisionPipeline: cloud vision requested but no cloudProvider is configured. ' +
731
+ 'Set cloudProvider in the pipeline config (e.g. "openai", "anthropic").');
732
+ }
733
+ // Import the high-level API to avoid coupling to any specific provider
734
+ const { generateText } = await import('../../api/generateText.js');
735
+ // Build the base64 data URL for the image
736
+ const base64 = Buffer.isBuffer(image)
737
+ ? image.toString('base64')
738
+ : image;
739
+ const imageUrl = Buffer.isBuffer(image)
740
+ ? `data:image/png;base64,${base64}`
741
+ : image;
742
+ // Use the multimodal message format supported by the IProvider interface.
743
+ // The `content` array with image_url parts is the standard format
744
+ // across OpenAI, Anthropic, and Gemini providers.
745
+ const result = await generateText({
746
+ provider: this._config.cloudProvider,
747
+ model: this._config.cloudModel,
748
+ messages: [{
749
+ role: 'user',
750
+ // The generateText API passes content through to the provider as-is
751
+ // when it's an array (multimodal message). All major providers support
752
+ // the OpenAI-style content parts array.
753
+ content: JSON.stringify([
754
+ { type: 'text', text: CLOUD_VISION_PROMPT },
755
+ { type: 'image_url', image_url: { url: imageUrl } },
756
+ ]),
757
+ }],
758
+ });
759
+ return {
760
+ tier: 'cloud-vision',
761
+ provider: this._config.cloudProvider,
762
+ text: result.text,
763
+ confidence: CLOUD_VISION_CONFIDENCE,
764
+ durationMs: Date.now() - start,
765
+ };
766
+ }
767
+ // -------------------------------------------------------------------------
768
+ // Lazy loader methods (optional peer dependency pattern)
769
+ // -------------------------------------------------------------------------
770
+ /**
771
+ * Lazily load and initialize PaddleOCR.
772
+ *
773
+ * @returns Initialized PaddleOCR service instance.
774
+ * @throws {Error} If ppu-paddle-ocr is not installed, with install instructions.
775
+ */
776
+ async _loadPaddleOcr() {
777
+ if (this._paddleOcr)
778
+ return this._paddleOcr;
779
+ try {
780
+ const mod = await import('ppu-paddle-ocr');
781
+ // ppu-paddle-ocr exports vary by version — handle both default and named
782
+ const PaddleOcrCls = mod.PaddleOcrService ?? mod.default?.PaddleOcrService ?? mod.default;
783
+ const instance = new PaddleOcrCls();
784
+ // PaddleOCR requires async initialization to load ONNX models
785
+ if (typeof instance.init === 'function') {
786
+ await instance.init();
787
+ }
788
+ this._paddleOcr = instance;
789
+ return instance;
790
+ }
791
+ catch (err) {
792
+ // Distinguish between "not installed" and "runtime init failure"
793
+ if (err?.code === 'ERR_MODULE_NOT_FOUND' || err?.code === 'MODULE_NOT_FOUND') {
794
+ throw new Error('ppu-paddle-ocr is not installed. Install with:\n' +
795
+ ' npm install ppu-paddle-ocr\n\n' +
796
+ 'Or switch to Tesseract.js by setting ocr: "tesseract" in the pipeline config.');
797
+ }
798
+ throw err;
799
+ }
800
+ }
801
+ /**
802
+ * Lazily load and initialize a Tesseract.js worker.
803
+ *
804
+ * @returns Initialized Tesseract worker ready for recognition.
805
+ * @throws {Error} If tesseract.js is not installed, with install instructions.
806
+ */
807
+ async _loadTesseract() {
808
+ if (this._tesseract)
809
+ return this._tesseract;
810
+ try {
811
+ const mod = await import('tesseract.js');
812
+ const Tesseract = mod.default ?? mod;
813
+ // createWorker() handles downloading trained data on first run
814
+ const worker = await Tesseract.createWorker('eng');
815
+ this._tesseract = worker;
816
+ return worker;
817
+ }
818
+ catch (err) {
819
+ if (err?.code === 'ERR_MODULE_NOT_FOUND' || err?.code === 'MODULE_NOT_FOUND') {
820
+ throw new Error('tesseract.js is not installed. Install with:\n' +
821
+ ' npm install tesseract.js\n\n' +
822
+ 'Or switch to PaddleOCR by setting ocr: "paddle" in the pipeline config.');
823
+ }
824
+ throw err;
825
+ }
826
+ }
827
+ /**
828
+ * Lazily load the TrOCR image-to-text pipeline from @huggingface/transformers.
829
+ *
830
+ * @returns HuggingFace image-to-text pipeline configured with TrOCR weights.
831
+ * @throws {Error} If @huggingface/transformers is not installed.
832
+ */
833
+ async _loadTrOcr() {
834
+ if (this._trOcrPipeline)
835
+ return this._trOcrPipeline;
836
+ try {
837
+ const { pipeline } = await import('@huggingface/transformers');
838
+ // TrOCR is an image-to-text model for handwriting recognition.
839
+ // microsoft/trocr-base-handwritten is the standard pretrained checkpoint.
840
+ this._trOcrPipeline = await pipeline('image-to-text', 'microsoft/trocr-base-handwritten');
841
+ return this._trOcrPipeline;
842
+ }
843
+ catch (err) {
844
+ if (err?.code === 'ERR_MODULE_NOT_FOUND' || err?.code === 'MODULE_NOT_FOUND') {
845
+ throw new Error('@huggingface/transformers is not installed. Install with:\n' +
846
+ ' npm install @huggingface/transformers\n\n' +
847
+ 'This is required for handwriting recognition (TrOCR).');
848
+ }
849
+ throw err;
850
+ }
851
+ }
852
+ /**
853
+ * Lazily load the Florence-2 document understanding pipeline.
854
+ *
855
+ * @returns HuggingFace pipeline configured for Florence-2 document analysis.
856
+ * @throws {Error} If @huggingface/transformers is not installed.
857
+ */
858
+ async _loadFlorence2() {
859
+ if (this._florencePipeline)
860
+ return this._florencePipeline;
861
+ try {
862
+ const { pipeline } = await import('@huggingface/transformers');
863
+ // Florence-2 uses the image-to-text task with a VQA-style interface.
864
+ // microsoft/Florence-2-base is the standard pretrained checkpoint.
865
+ this._florencePipeline = await pipeline('image-to-text', 'microsoft/Florence-2-base');
866
+ return this._florencePipeline;
867
+ }
868
+ catch (err) {
869
+ if (err?.code === 'ERR_MODULE_NOT_FOUND' || err?.code === 'MODULE_NOT_FOUND') {
870
+ throw new Error('@huggingface/transformers is not installed. Install with:\n' +
871
+ ' npm install @huggingface/transformers\n\n' +
872
+ 'This is required for document understanding (Florence-2).');
873
+ }
874
+ throw err;
875
+ }
876
+ }
877
+ /**
878
+ * Lazily load the CLIP feature-extraction pipeline for image embeddings.
879
+ *
880
+ * @returns HuggingFace feature-extraction pipeline configured with CLIP.
881
+ * @throws {Error} If @huggingface/transformers is not installed.
882
+ */
883
+ async _loadClip() {
884
+ if (this._clipPipeline)
885
+ return this._clipPipeline;
886
+ try {
887
+ const { pipeline } = await import('@huggingface/transformers');
888
+ // CLIP ViT-B/32 is the standard model for image embeddings.
889
+ // It produces 512-dimensional vectors in the same space as
890
+ // CLIP text embeddings, enabling cross-modal search.
891
+ this._clipPipeline = await pipeline('feature-extraction', 'Xenova/clip-vit-base-patch32');
892
+ return this._clipPipeline;
893
+ }
894
+ catch (err) {
895
+ if (err?.code === 'ERR_MODULE_NOT_FOUND' || err?.code === 'MODULE_NOT_FOUND') {
896
+ throw new Error('@huggingface/transformers is not installed. Install with:\n' +
897
+ ' npm install @huggingface/transformers\n\n' +
898
+ 'This is required for CLIP image embeddings.');
899
+ }
900
+ throw err;
901
+ }
902
+ }
903
+ // -------------------------------------------------------------------------
904
+ // Content category heuristics
905
+ // -------------------------------------------------------------------------
906
+ /**
907
+ * Detect the content category from OCR results using heuristics.
908
+ *
909
+ * This avoids running expensive classification models just to decide
910
+ * which Tier 2 model to invoke. The heuristics are deliberately
911
+ * conservative — when in doubt, they return 'mixed' which triggers
912
+ * both handwriting and document-ai tiers.
913
+ *
914
+ * @param ocrResult - Result from Tier 1 OCR, or undefined if OCR was skipped.
915
+ * @returns Detected content category.
916
+ */
917
+ _detectCategory(ocrResult) {
918
+ if (!ocrResult)
919
+ return 'mixed';
920
+ // High confidence + clean text → printed document
921
+ if (ocrResult.confidence > 0.85)
922
+ return 'printed-text';
923
+ // Low confidence + many single-character detections is a strong
924
+ // handwriting signal: OCR struggles with cursive and often splits
925
+ // connected strokes into individual character guesses.
926
+ const singleCharRegions = ocrResult.regions?.filter((r) => r.text.trim().length === 1);
927
+ if (ocrResult.confidence < 0.5 &&
928
+ singleCharRegions &&
929
+ singleCharRegions.length > 0) {
930
+ return 'handwritten';
931
+ }
932
+ // Many regions with varying sizes suggests a complex document layout
933
+ // with headers, body text, sidebars, tables, etc.
934
+ if (ocrResult.regions && ocrResult.regions.length > 20) {
935
+ return 'document-layout';
936
+ }
937
+ // Moderate confidence but few regions — probably a photograph or
938
+ // diagram with some incidental text.
939
+ if (ocrResult.confidence < 0.6 && (ocrResult.regions?.length ?? 0) < 5) {
940
+ return 'photograph';
941
+ }
942
+ return 'mixed';
943
+ }
944
+ // -------------------------------------------------------------------------
945
+ // Routing helpers
946
+ // -------------------------------------------------------------------------
947
+ /**
948
+ * Determine whether a specific tier should run based on the strategy
949
+ * and any explicit tier overrides.
950
+ *
951
+ * @param tier - The tier to check.
952
+ * @param strategy - The pipeline's configured strategy.
953
+ * @param requestedTiers - Explicit tier overrides from the caller, if any.
954
+ * @returns True if the tier should run.
955
+ */
956
+ _shouldRunTier(tier, strategy, requestedTiers) {
957
+ // Explicit tier list takes precedence over strategy
958
+ if (requestedTiers)
959
+ return requestedTiers.includes(tier);
960
+ // Strategy-based routing
961
+ switch (tier) {
962
+ case 'ocr':
963
+ // OCR runs in all strategies except cloud-only
964
+ return strategy !== 'cloud-only';
965
+ case 'handwriting':
966
+ // Handwriting only runs if explicitly enabled in config
967
+ if (!this._config.handwriting)
968
+ return false;
969
+ // Runs in progressive (conditionally), local-only, and parallel
970
+ return strategy !== 'cloud-only';
971
+ case 'document-ai':
972
+ // Document AI only runs if explicitly enabled in config
973
+ if (!this._config.documentAI)
974
+ return false;
975
+ return strategy !== 'cloud-only';
976
+ case 'embedding':
977
+ // Embedding only runs if explicitly enabled in config
978
+ if (!this._config.embedding)
979
+ return false;
980
+ return true; // CLIP runs regardless of strategy
981
+ case 'cloud-vision':
982
+ // Cloud vision routing is handled separately in _shouldRunCloudVision
983
+ return false;
984
+ default:
985
+ return false;
986
+ }
987
+ }
988
+ /**
989
+ * Determine whether cloud vision should run based on strategy, current
990
+ * confidence, and threshold.
991
+ *
992
+ * Cloud vision is the most expensive tier, so we're careful about when
993
+ * to invoke it — only when local results are insufficient.
994
+ *
995
+ * @param strategy - Pipeline strategy.
996
+ * @param bestLocalConfidence - Best confidence from local tiers so far.
997
+ * @param threshold - Confidence threshold for cloud escalation.
998
+ * @param requestedTiers - Explicit tier overrides, if any.
999
+ * @returns True if cloud vision should run.
1000
+ */
1001
+ _shouldRunCloudVision(strategy, bestLocalConfidence, threshold, requestedTiers) {
1002
+ // Explicit tier list takes precedence
1003
+ if (requestedTiers)
1004
+ return requestedTiers.includes('cloud-vision');
1005
+ // No cloud provider configured — can't run
1006
+ if (!this._config.cloudProvider)
1007
+ return false;
1008
+ switch (strategy) {
1009
+ case 'cloud-only':
1010
+ // Already handled at the top of process() — shouldn't reach here
1011
+ return true;
1012
+ case 'local-only':
1013
+ // Never call cloud
1014
+ return false;
1015
+ case 'parallel':
1016
+ // Always run cloud alongside local
1017
+ return true;
1018
+ case 'progressive':
1019
+ // Only escalate when local confidence is below threshold
1020
+ return bestLocalConfidence < threshold;
1021
+ default:
1022
+ return false;
1023
+ }
1024
+ }
1025
+ /**
1026
+ * Find the highest confidence among a set of tier results.
1027
+ *
1028
+ * @param tierResults - Results from tiers that have run so far.
1029
+ * @returns Best confidence score, or 0 if no results.
1030
+ */
1031
+ _bestConfidence(tierResults) {
1032
+ if (tierResults.length === 0)
1033
+ return 0;
1034
+ return Math.max(...tierResults.map((r) => r.confidence));
1035
+ }
1036
+ // -------------------------------------------------------------------------
1037
+ // Result assembly
1038
+ // -------------------------------------------------------------------------
1039
+ /**
1040
+ * Assemble the final {@link VisionResult} from individual tier outputs.
1041
+ *
1042
+ * The winning tier is the one with the highest confidence. Layout data
1043
+ * from Florence-2 is always included when available, regardless of
1044
+ * which tier's text wins.
1045
+ *
1046
+ * @param tierResults - All tier results collected during processing.
1047
+ * @param activeTiers - Which tiers actually ran (for metadata).
1048
+ * @param embedding - CLIP embedding, if generated.
1049
+ * @param layout - Florence-2 document layout, if generated.
1050
+ * @param forcedCategory - Caller-specified category override.
1051
+ * @param startTime - Timestamp when processing started (for duration).
1052
+ * @returns Assembled vision result.
1053
+ */
1054
+ _assembleResult(tierResults, activeTiers, embedding, layout, forcedCategory, startTime) {
1055
+ // Pick the tier result with the highest confidence for the primary text
1056
+ const winner = tierResults.reduce((best, current) => (current.confidence > best.confidence ? current : best), tierResults[0] ?? { text: '', confidence: 0, regions: undefined });
1057
+ // Detect category from the OCR result (first tier), unless forced
1058
+ const ocrResult = tierResults.find((r) => r.tier === 'ocr');
1059
+ const category = forcedCategory ?? this._detectCategory(ocrResult);
1060
+ return {
1061
+ text: winner?.text ?? '',
1062
+ confidence: winner?.confidence ?? 0,
1063
+ category,
1064
+ tiers: activeTiers,
1065
+ tierResults,
1066
+ embedding,
1067
+ layout,
1068
+ regions: winner?.regions,
1069
+ durationMs: Date.now() - startTime,
1070
+ };
1071
+ }
1072
+ // -------------------------------------------------------------------------
1073
+ // Utility methods
1074
+ // -------------------------------------------------------------------------
1075
+ /**
1076
+ * Convert a URL or file path to a Buffer by reading the file or
1077
+ * fetching the URL.
1078
+ *
1079
+ * @param url - URL string (http://, https://, file://, or bare path).
1080
+ * @returns Image data as a Buffer.
1081
+ */
1082
+ async _urlToBuffer(url) {
1083
+ // Handle data URLs by extracting the base64 payload
1084
+ if (url.startsWith('data:')) {
1085
+ const commaIdx = url.indexOf(',');
1086
+ if (commaIdx === -1)
1087
+ throw new Error(`VisionPipeline: invalid data URL.`);
1088
+ return Buffer.from(url.slice(commaIdx + 1), 'base64');
1089
+ }
1090
+ // Handle http/https URLs
1091
+ if (url.startsWith('http://') || url.startsWith('https://')) {
1092
+ const { default: axios } = await import('axios');
1093
+ const response = await axios.get(url, { responseType: 'arraybuffer' });
1094
+ return Buffer.from(response.data);
1095
+ }
1096
+ // Handle file:// URLs and bare file paths
1097
+ const { readFile } = await import('node:fs/promises');
1098
+ const filePath = url.startsWith('file://') ? url.slice(7) : url;
1099
+ return readFile(filePath);
1100
+ }
1101
+ /**
1102
+ * Guard method that throws if the pipeline has been disposed.
1103
+ * Called at the top of every public method to prevent use-after-free.
1104
+ *
1105
+ * @throws {Error} If dispose() has been called.
1106
+ */
1107
+ _assertNotDisposed() {
1108
+ if (this._disposed) {
1109
+ throw new Error('VisionPipeline: pipeline has been disposed. Create a new instance.');
1110
+ }
1111
+ }
1112
+ }
1113
+ //# sourceMappingURL=VisionPipeline.js.map