agentic-flow 2.0.2 → 2.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. package/.claude/agents/test-neural.md +14 -0
  2. package/.claude/settings.json +9 -133
  3. package/README.md +622 -1862
  4. package/dist/.tsbuildinfo +1 -1
  5. package/dist/billing/mcp/tools.js +0 -1
  6. package/dist/billing/mcp/tools.js.map +1 -1
  7. package/dist/cli/commands/hooks.d.ts.map +1 -1
  8. package/dist/cli/commands/hooks.js +4 -79
  9. package/dist/cli/commands/hooks.js.map +1 -1
  10. package/dist/cli-proxy.js +1 -89
  11. package/dist/cli-proxy.js.map +1 -1
  12. package/dist/core/agentdb-fast.js +3 -3
  13. package/dist/core/agentdb-fast.js.map +1 -1
  14. package/dist/core/agentdb-wrapper-enhanced.d.ts.map +1 -1
  15. package/dist/core/agentdb-wrapper-enhanced.js +4 -20
  16. package/dist/core/agentdb-wrapper-enhanced.js.map +1 -1
  17. package/dist/core/agentdb-wrapper.d.ts +2 -3
  18. package/dist/core/agentdb-wrapper.d.ts.map +1 -1
  19. package/dist/core/agentdb-wrapper.js +1 -15
  20. package/dist/core/agentdb-wrapper.js.map +1 -1
  21. package/dist/core/attention-native.d.ts +0 -4
  22. package/dist/core/attention-native.d.ts.map +1 -1
  23. package/dist/core/attention-native.js +2 -14
  24. package/dist/core/attention-native.js.map +1 -1
  25. package/dist/federation/SecurityManager.d.ts +2 -11
  26. package/dist/federation/SecurityManager.d.ts.map +1 -1
  27. package/dist/federation/SecurityManager.js +17 -50
  28. package/dist/federation/SecurityManager.js.map +1 -1
  29. package/dist/federation/integrations/supabase-adapter-debug.js +3 -3
  30. package/dist/federation/integrations/supabase-adapter-debug.js.map +1 -1
  31. package/dist/hooks/swarm-learning-optimizer.js +5 -5
  32. package/dist/hooks/swarm-learning-optimizer.js.map +1 -1
  33. package/dist/intelligence/IntelligenceStore.d.ts +26 -35
  34. package/dist/intelligence/IntelligenceStore.d.ts.map +1 -1
  35. package/dist/intelligence/IntelligenceStore.js +123 -308
  36. package/dist/intelligence/IntelligenceStore.js.map +1 -1
  37. package/dist/intelligence/RuVectorIntelligence.d.ts +1 -26
  38. package/dist/intelligence/RuVectorIntelligence.d.ts.map +1 -1
  39. package/dist/intelligence/RuVectorIntelligence.js +10 -49
  40. package/dist/intelligence/RuVectorIntelligence.js.map +1 -1
  41. package/dist/intelligence/agent-booster-enhanced.d.ts +0 -1
  42. package/dist/intelligence/agent-booster-enhanced.d.ts.map +1 -1
  43. package/dist/intelligence/agent-booster-enhanced.js +3 -24
  44. package/dist/intelligence/agent-booster-enhanced.js.map +1 -1
  45. package/dist/intelligence/index.d.ts +3 -29
  46. package/dist/intelligence/index.d.ts.map +1 -1
  47. package/dist/intelligence/index.js +3 -13
  48. package/dist/intelligence/index.js.map +1 -1
  49. package/dist/mcp/claudeFlowSdkServer.d.ts.map +1 -1
  50. package/dist/mcp/claudeFlowSdkServer.js +3 -9
  51. package/dist/mcp/claudeFlowSdkServer.js.map +1 -1
  52. package/dist/mcp/fastmcp/tools/hooks/intelligence-bridge.js +5 -5
  53. package/dist/mcp/fastmcp/tools/hooks/intelligence-bridge.js.map +1 -1
  54. package/dist/mcp/fastmcp/tools/swarm/init.d.ts.map +1 -1
  55. package/dist/mcp/fastmcp/tools/swarm/init.js +7 -36
  56. package/dist/mcp/fastmcp/tools/swarm/init.js.map +1 -1
  57. package/dist/mcp/fastmcp/tools/swarm/spawn.d.ts.map +1 -1
  58. package/dist/mcp/fastmcp/tools/swarm/spawn.js +8 -47
  59. package/dist/mcp/fastmcp/tools/swarm/spawn.js.map +1 -1
  60. package/dist/mcp/tools/agent-booster-tools.d.ts +1 -1
  61. package/dist/mcp/tools/agent-booster-tools.d.ts.map +1 -1
  62. package/dist/mcp/tools/agent-booster-tools.js +4 -10
  63. package/dist/mcp/tools/agent-booster-tools.js.map +1 -1
  64. package/dist/mcp/tools/sona-tools.d.ts.map +1 -1
  65. package/dist/mcp/tools/sona-tools.js +0 -6
  66. package/dist/mcp/tools/sona-tools.js.map +1 -1
  67. package/dist/optimizations/agent-booster-migration.d.ts.map +1 -1
  68. package/dist/optimizations/agent-booster-migration.js +2 -5
  69. package/dist/optimizations/agent-booster-migration.js.map +1 -1
  70. package/dist/proxy/anthropic-to-openrouter.js.map +1 -1
  71. package/dist/proxy/anthropic-to-requesty.js.map +1 -1
  72. package/dist/proxy/quic-proxy.d.ts +1 -0
  73. package/dist/proxy/quic-proxy.d.ts.map +1 -1
  74. package/dist/proxy/quic-proxy.js +2 -2
  75. package/dist/proxy/quic-proxy.js.map +1 -1
  76. package/dist/reasoningbank/AdvancedMemory.js +1 -1
  77. package/dist/reasoningbank/AdvancedMemory.js.map +1 -1
  78. package/dist/reasoningbank/HybridBackend.d.ts.map +1 -1
  79. package/dist/reasoningbank/HybridBackend.js +5 -2
  80. package/dist/reasoningbank/HybridBackend.js.map +1 -1
  81. package/dist/reasoningbank/backend-selector.d.ts +1 -11
  82. package/dist/reasoningbank/backend-selector.d.ts.map +1 -1
  83. package/dist/reasoningbank/backend-selector.js +5 -45
  84. package/dist/reasoningbank/backend-selector.js.map +1 -1
  85. package/dist/reasoningbank/core/consolidate.d.ts.map +1 -1
  86. package/dist/reasoningbank/core/consolidate.js +45 -113
  87. package/dist/reasoningbank/core/consolidate.js.map +1 -1
  88. package/dist/reasoningbank/index-new.d.ts +6 -1
  89. package/dist/reasoningbank/index-new.d.ts.map +1 -1
  90. package/dist/reasoningbank/index-new.js +6 -1
  91. package/dist/reasoningbank/index-new.js.map +1 -1
  92. package/dist/reasoningbank/index.d.ts +6 -2
  93. package/dist/reasoningbank/index.d.ts.map +1 -1
  94. package/dist/reasoningbank/index.js +6 -2
  95. package/dist/reasoningbank/index.js.map +1 -1
  96. package/dist/reasoningbank/utils/embeddings.d.ts +0 -1
  97. package/dist/reasoningbank/utils/embeddings.d.ts.map +1 -1
  98. package/dist/reasoningbank/utils/embeddings.js +26 -53
  99. package/dist/reasoningbank/utils/embeddings.js.map +1 -1
  100. package/dist/router/index.d.ts +17 -0
  101. package/dist/router/index.d.ts.map +1 -0
  102. package/dist/router/index.js +19 -0
  103. package/dist/router/index.js.map +1 -0
  104. package/dist/router/providers/onnx-local-optimized.d.ts +0 -2
  105. package/dist/router/providers/onnx-local-optimized.d.ts.map +1 -1
  106. package/dist/router/providers/onnx-local-optimized.js +0 -10
  107. package/dist/router/providers/onnx-local-optimized.js.map +1 -1
  108. package/dist/router/providers/onnx-local.d.ts +0 -1
  109. package/dist/router/providers/onnx-local.d.ts.map +1 -1
  110. package/dist/router/providers/onnx-local.js +5 -22
  111. package/dist/router/providers/onnx-local.js.map +1 -1
  112. package/dist/services/embedding-service.js.map +1 -1
  113. package/dist/services/sona-agent-training.d.ts +0 -1
  114. package/dist/services/sona-agent-training.d.ts.map +1 -1
  115. package/dist/services/sona-agent-training.js.map +1 -1
  116. package/dist/services/sona-agentdb-integration.d.ts.map +1 -1
  117. package/dist/services/sona-agentdb-integration.js +6 -9
  118. package/dist/services/sona-agentdb-integration.js.map +1 -1
  119. package/dist/services/sona-service.d.ts.map +1 -1
  120. package/dist/services/sona-service.js +5 -6
  121. package/dist/services/sona-service.js.map +1 -1
  122. package/dist/utils/cli.d.ts +1 -1
  123. package/dist/utils/cli.d.ts.map +1 -1
  124. package/dist/utils/cli.js +0 -21
  125. package/dist/utils/cli.js.map +1 -1
  126. package/package.json +7 -19
  127. package/scripts/postinstall.js +4 -45
  128. package/wasm/reasoningbank/reasoningbank_wasm_bg.js +2 -2
  129. package/wasm/reasoningbank/reasoningbank_wasm_bg.wasm +0 -0
  130. package/.claude/agents/.claude-flow/metrics/agent-metrics.json +0 -1
  131. package/.claude/agents/.claude-flow/metrics/performance.json +0 -87
  132. package/.claude/agents/.claude-flow/metrics/task-metrics.json +0 -10
  133. package/.claude/skills/.claude-flow/metrics/agent-metrics.json +0 -1
  134. package/.claude/skills/.claude-flow/metrics/performance.json +0 -87
  135. package/.claude/skills/.claude-flow/metrics/task-metrics.json +0 -10
  136. package/.claude/skills/agentic-flow-quickstart/skill.md +0 -69
  137. package/.claude/skills/hooks-automation/skill.md +0 -155
  138. package/.claude/skills/memory-patterns/skill.md +0 -110
  139. package/.claude/skills/skill-builder/.claude-flow/metrics/agent-metrics.json +0 -1
  140. package/.claude/skills/skill-builder/.claude-flow/metrics/performance.json +0 -87
  141. package/.claude/skills/skill-builder/.claude-flow/metrics/task-metrics.json +0 -10
  142. package/.claude/skills/sparc-methodology/skill.md +0 -137
  143. package/.claude/skills/swarm-coordination/skill.md +0 -94
  144. package/.claude/skills/worker-benchmarks/skill.md +0 -135
  145. package/.claude/skills/worker-integration/skill.md +0 -154
  146. package/.claude/statusline.mjs +0 -109
  147. package/.claude/statusline.sh +0 -71
  148. package/dist/agentdb/benchmarks/comprehensive-benchmark.js +0 -664
  149. package/dist/agentdb/benchmarks/frontier-benchmark.js +0 -419
  150. package/dist/agentdb/benchmarks/reflexion-benchmark.js +0 -370
  151. package/dist/agentdb/cli/agentdb-cli.js +0 -717
  152. package/dist/agentdb/controllers/CausalMemoryGraph.js +0 -322
  153. package/dist/agentdb/controllers/CausalRecall.js +0 -281
  154. package/dist/agentdb/controllers/EmbeddingService.d.ts +0 -37
  155. package/dist/agentdb/controllers/EmbeddingService.d.ts.map +0 -1
  156. package/dist/agentdb/controllers/EmbeddingService.js +0 -119
  157. package/dist/agentdb/controllers/EmbeddingService.js.map +0 -1
  158. package/dist/agentdb/controllers/ExplainableRecall.js +0 -387
  159. package/dist/agentdb/controllers/NightlyLearner.js +0 -382
  160. package/dist/agentdb/controllers/ReflexionMemory.js +0 -239
  161. package/dist/agentdb/controllers/SkillLibrary.js +0 -276
  162. package/dist/agentdb/controllers/frontier-index.js +0 -9
  163. package/dist/agentdb/controllers/index.js +0 -8
  164. package/dist/agentdb/optimizations/BatchOperations.js +0 -198
  165. package/dist/agentdb/optimizations/QueryOptimizer.js +0 -225
  166. package/dist/agentdb/optimizations/index.js +0 -7
  167. package/dist/agentdb/tests/frontier-features.test.js +0 -665
  168. package/dist/benchmarks/embeddings-benchmark.d.ts +0 -38
  169. package/dist/benchmarks/embeddings-benchmark.d.ts.map +0 -1
  170. package/dist/benchmarks/embeddings-benchmark.js +0 -282
  171. package/dist/benchmarks/embeddings-benchmark.js.map +0 -1
  172. package/dist/cli/commands/embeddings.d.ts +0 -12
  173. package/dist/cli/commands/embeddings.d.ts.map +0 -1
  174. package/dist/cli/commands/embeddings.js +0 -386
  175. package/dist/cli/commands/embeddings.js.map +0 -1
  176. package/dist/cli/commands/init.d.ts +0 -8
  177. package/dist/cli/commands/init.d.ts.map +0 -1
  178. package/dist/cli/commands/init.js +0 -514
  179. package/dist/cli/commands/init.js.map +0 -1
  180. package/dist/cli/commands/workers.d.ts +0 -9
  181. package/dist/cli/commands/workers.d.ts.map +0 -1
  182. package/dist/cli/commands/workers.js +0 -991
  183. package/dist/cli/commands/workers.js.map +0 -1
  184. package/dist/cli/skills-manager.js +0 -1297
  185. package/dist/cli/update-message.js +0 -175
  186. package/dist/embeddings/index.d.ts +0 -17
  187. package/dist/embeddings/index.d.ts.map +0 -1
  188. package/dist/embeddings/index.js +0 -17
  189. package/dist/embeddings/index.js.map +0 -1
  190. package/dist/embeddings/neural-substrate.d.ts +0 -206
  191. package/dist/embeddings/neural-substrate.d.ts.map +0 -1
  192. package/dist/embeddings/neural-substrate.js +0 -629
  193. package/dist/embeddings/neural-substrate.js.map +0 -1
  194. package/dist/embeddings/optimized-embedder.d.ts +0 -103
  195. package/dist/embeddings/optimized-embedder.d.ts.map +0 -1
  196. package/dist/embeddings/optimized-embedder.js +0 -730
  197. package/dist/embeddings/optimized-embedder.js.map +0 -1
  198. package/dist/examples/embedding-geometry.d.ts +0 -105
  199. package/dist/examples/embedding-geometry.d.ts.map +0 -1
  200. package/dist/examples/embedding-geometry.js +0 -528
  201. package/dist/examples/embedding-geometry.js.map +0 -1
  202. package/dist/memory/SharedMemoryPool.d.ts +0 -129
  203. package/dist/memory/SharedMemoryPool.d.ts.map +0 -1
  204. package/dist/memory/SharedMemoryPool.js +0 -243
  205. package/dist/memory/SharedMemoryPool.js.map +0 -1
  206. package/dist/memory/index.d.ts +0 -8
  207. package/dist/memory/index.d.ts.map +0 -1
  208. package/dist/memory/index.js +0 -7
  209. package/dist/memory/index.js.map +0 -1
  210. package/dist/proxy/http3-proxy-old.js +0 -331
  211. package/dist/proxy/proxy/anthropic-to-gemini.js +0 -439
  212. package/dist/proxy/utils/logger.js +0 -59
  213. package/dist/reasoningbank/agentdb-adapter.js +0 -125
  214. package/dist/reasoningbank/core/database.js +0 -250
  215. package/dist/reasoningbank/core/memory-engine.js +0 -335
  216. package/dist/swarm/ipfs-swarm.d.ts +0 -265
  217. package/dist/swarm/ipfs-swarm.d.ts.map +0 -1
  218. package/dist/swarm/ipfs-swarm.js +0 -508
  219. package/dist/swarm/ipfs-swarm.js.map +0 -1
  220. package/dist/swarm/p2p-free-swarm.d.ts +0 -344
  221. package/dist/swarm/p2p-free-swarm.d.ts.map +0 -1
  222. package/dist/swarm/p2p-free-swarm.js +0 -603
  223. package/dist/swarm/p2p-free-swarm.js.map +0 -1
  224. package/dist/swarm/real-p2p-swarm.d.ts +0 -183
  225. package/dist/swarm/real-p2p-swarm.d.ts.map +0 -1
  226. package/dist/swarm/real-p2p-swarm.js +0 -469
  227. package/dist/swarm/real-p2p-swarm.js.map +0 -1
  228. package/dist/utils/adaptive-pool-sizing.js +0 -414
  229. package/dist/utils/agentdbCommands.js +0 -175
  230. package/dist/utils/circular-rate-limiter.js +0 -391
  231. package/dist/utils/dynamic-compression.js +0 -298
  232. package/dist/utils/http2-multiplexing.js +0 -319
  233. package/dist/utils/index.d.ts +0 -6
  234. package/dist/utils/index.d.ts.map +0 -1
  235. package/dist/utils/index.js +0 -6
  236. package/dist/utils/index.js.map +0 -1
  237. package/dist/utils/lazy-auth.js +0 -311
  238. package/dist/utils/model-cache.d.ts +0 -61
  239. package/dist/utils/model-cache.d.ts.map +0 -1
  240. package/dist/utils/model-cache.js +0 -176
  241. package/dist/utils/model-cache.js.map +0 -1
  242. package/dist/utils/server-push.js +0 -251
  243. package/dist/utils/suppress-warnings.d.ts +0 -19
  244. package/dist/utils/suppress-warnings.d.ts.map +0 -1
  245. package/dist/utils/suppress-warnings.js +0 -59
  246. package/dist/utils/suppress-warnings.js.map +0 -1
  247. package/dist/utils/zero-copy-buffer.js +0 -286
  248. package/dist/workers/consolidated-phases.d.ts +0 -40
  249. package/dist/workers/consolidated-phases.d.ts.map +0 -1
  250. package/dist/workers/consolidated-phases.js +0 -497
  251. package/dist/workers/consolidated-phases.js.map +0 -1
  252. package/dist/workers/custom-worker-config.d.ts +0 -133
  253. package/dist/workers/custom-worker-config.d.ts.map +0 -1
  254. package/dist/workers/custom-worker-config.js +0 -215
  255. package/dist/workers/custom-worker-config.js.map +0 -1
  256. package/dist/workers/custom-worker-factory.d.ts +0 -89
  257. package/dist/workers/custom-worker-factory.d.ts.map +0 -1
  258. package/dist/workers/custom-worker-factory.js +0 -404
  259. package/dist/workers/custom-worker-factory.js.map +0 -1
  260. package/dist/workers/dispatch-service.d.ts +0 -123
  261. package/dist/workers/dispatch-service.d.ts.map +0 -1
  262. package/dist/workers/dispatch-service.js +0 -1024
  263. package/dist/workers/dispatch-service.js.map +0 -1
  264. package/dist/workers/hooks-integration.d.ts +0 -79
  265. package/dist/workers/hooks-integration.d.ts.map +0 -1
  266. package/dist/workers/hooks-integration.js +0 -286
  267. package/dist/workers/hooks-integration.js.map +0 -1
  268. package/dist/workers/index.d.ts +0 -42
  269. package/dist/workers/index.d.ts.map +0 -1
  270. package/dist/workers/index.js +0 -52
  271. package/dist/workers/index.js.map +0 -1
  272. package/dist/workers/mcp-tools.d.ts +0 -56
  273. package/dist/workers/mcp-tools.d.ts.map +0 -1
  274. package/dist/workers/mcp-tools.js +0 -359
  275. package/dist/workers/mcp-tools.js.map +0 -1
  276. package/dist/workers/phase-executors.d.ts +0 -22
  277. package/dist/workers/phase-executors.d.ts.map +0 -1
  278. package/dist/workers/phase-executors.js +0 -445
  279. package/dist/workers/phase-executors.js.map +0 -1
  280. package/dist/workers/resource-governor.d.ts +0 -75
  281. package/dist/workers/resource-governor.d.ts.map +0 -1
  282. package/dist/workers/resource-governor.js +0 -187
  283. package/dist/workers/resource-governor.js.map +0 -1
  284. package/dist/workers/ruvector-integration.d.ts +0 -163
  285. package/dist/workers/ruvector-integration.d.ts.map +0 -1
  286. package/dist/workers/ruvector-integration.js +0 -543
  287. package/dist/workers/ruvector-integration.js.map +0 -1
  288. package/dist/workers/ruvector-native-integration.d.ts +0 -91
  289. package/dist/workers/ruvector-native-integration.d.ts.map +0 -1
  290. package/dist/workers/ruvector-native-integration.js +0 -254
  291. package/dist/workers/ruvector-native-integration.js.map +0 -1
  292. package/dist/workers/trigger-detector.d.ts +0 -68
  293. package/dist/workers/trigger-detector.d.ts.map +0 -1
  294. package/dist/workers/trigger-detector.js +0 -281
  295. package/dist/workers/trigger-detector.js.map +0 -1
  296. package/dist/workers/types.d.ts +0 -145
  297. package/dist/workers/types.d.ts.map +0 -1
  298. package/dist/workers/types.js +0 -6
  299. package/dist/workers/types.js.map +0 -1
  300. package/dist/workers/worker-agent-integration.d.ts +0 -140
  301. package/dist/workers/worker-agent-integration.d.ts.map +0 -1
  302. package/dist/workers/worker-agent-integration.js +0 -471
  303. package/dist/workers/worker-agent-integration.js.map +0 -1
  304. package/dist/workers/worker-benchmarks.d.ts +0 -88
  305. package/dist/workers/worker-benchmarks.d.ts.map +0 -1
  306. package/dist/workers/worker-benchmarks.js +0 -452
  307. package/dist/workers/worker-benchmarks.js.map +0 -1
  308. package/dist/workers/worker-registry.d.ts +0 -85
  309. package/dist/workers/worker-registry.d.ts.map +0 -1
  310. package/dist/workers/worker-registry.js +0 -547
  311. package/dist/workers/worker-registry.js.map +0 -1
  312. package/docs/.claude-flow/metrics/agent-metrics.json +0 -1
  313. package/docs/.claude-flow/metrics/performance.json +0 -87
  314. package/docs/.claude-flow/metrics/task-metrics.json +0 -10
  315. package/docs/embeddings/EMBEDDING_GEOMETRY.md +0 -935
  316. /package/.claude/agents/analysis/{analyze-code-quality.md → code-review/analyze-code-quality.md} +0 -0
  317. /package/.claude/agents/architecture/{arch-system-design.md → system-design/arch-system-design.md} +0 -0
  318. /package/.claude/agents/data/{data-ml-model.md → ml/data-ml-model.md} +0 -0
  319. /package/.claude/agents/development/{dev-backend-api.md → backend/dev-backend-api.md} +0 -0
  320. /package/.claude/agents/devops/{ops-cicd-github.md → ci-cd/ops-cicd-github.md} +0 -0
  321. /package/.claude/agents/documentation/{docs-api-openapi.md → api-docs/docs-api-openapi.md} +0 -0
  322. /package/.claude/agents/specialized/{spec-mobile-react-native.md → mobile/spec-mobile-react-native.md} +0 -0
  323. /package/.claude/agents/testing/{tdd-london-swarm.md → unit/tdd-london-swarm.md} +0 -0
  324. /package/.claude/agents/testing/{production-validator.md → validation/production-validator.md} +0 -0
@@ -1 +1 @@
1
- {"version":3,"file":"embeddings.js","sourceRoot":"","sources":["../../../src/reasoningbank/utils/embeddings.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC,iFAAiF;AACjF,kFAAkF;AAClF,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC,6BAA6B;AACnE,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,8BAA8B;AAErE,IAAI,iBAAiB,GAAQ,IAAI,CAAC;AAClC,IAAI,qBAAqB,GAAyB,IAAI,CAAC;AACvD,MAAM,cAAc,GAAG,IAAI,GAAG,EAAwB,CAAC;AACvD,8DAA8D;AAC9D,MAAM,eAAe,GAAG,IAAI,GAAG,EAA0B,CAAC;AAE1D;;;GAGG;AACH,KAAK,UAAU,oBAAoB;IACjC,sBAAsB;IACtB,IAAI,iBAAiB;QAAE,OAAO;IAE9B,sDAAsD;IACtD,IAAI,qBAAqB,EAAE,CAAC;QAC1B,OAAO,qBAAqB,CAAC;IAC/B,CAAC;IAED,mEAAmE;IACnE,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,KAAK,KAAK;QACzC,OAAO,CAAC,GAAG,CAAC,YAAY,EAAE,QAAQ,CAAC,KAAK,CAAC;QACzC,OAAO,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC;QAChC,OAAO,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;IAEpD,IAAI,QAAQ,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,kBAAkB,EAAE,CAAC;QAChD,OAAO,CAAC,GAAG,CAAC,qEAAqE,CAAC,CAAC;QACnF,OAAO,CAAC,GAAG,CAAC,gFAAgF,CAAC,CAAC;QAC9F,OAAO;IACT,CAAC;IAED,qEAAqE;IACrE,qBAAqB,GAAG,CAAC,KAAK,IAAI,EAAE;QAClC,OAAO,CAAC,GAAG,CAAC,8EAA8E,CAAC,CAAC;QAC5F,OAAO,CAAC,GAAG,CAAC,qDAAqD,CAAC,CAAC;QAEnE,IAAI,CAAC;YACH,iBAAiB,GAAG,MAAM,QAAQ,CAChC,oBAAoB,EACpB,yBAAyB,EACzB,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,kBAAkB;aACvC,CAAC;YACF,OAAO,CAAC,GAAG,CAAC,kDAAkD,CAAC,CAAC;QAClE,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,OAAO,CAAC,KAAK,CAAC,oCAAoC,EAAE,KAAK,EAAE,OAAO,IAAI,KAAK,CAAC,CAAC;YAC7E,OAAO,CAAC,IAAI,CAAC,oDAAoD,CAAC,CAAC;YACnE,qCAAqC;YACrC,qBAAqB,GAAG,IAAI,CAAC;QAC/B,CAAC;IACH,CAAC,CAAC,EAAE,CAAC;IAEL,OAAO,qBAAqB,CAAC;AAC/B,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,IAAY;IACjD,MAAM,MAAM,GAAG,UAAU,EAAE,CAAC;IAE5B,cAAc;IACd,MAAM,QAAQ,GAAG,SAAS,IAAI,EAAE,CAAC;IACjC,IAAI,cAAc,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;QACjC,OAAO,cAAc,CAAC,GAAG,CAAC,QAAQ,CAAE,CAAC;IACvC,CAAC;IAED,IAAI,SAAuB,CAAC;IAE5B,uBAAuB;IACvB,MAAM,oBAAoB,EAAE,CAAC;IAE7B,IAAI,iBAAiB,EAAE,CAAC;QACtB,IAAI,CAAC;YACH,0CAA0C;YAC1C,MAAM,MAAM,GAAG,MAAM,iBAAiB,CAAC,IAAI,EAAE;gBAC3C,OAAO,EAAE,MAAM;gBACf,SAAS,EAAE,IAAI;aAChB,CAAC,CAAC;YACH,SAAS,GAAG,IAAI,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAC5C,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,OAAO,CAAC,KAAK,CAAC,iCAAiC,EAAE,KAAK,EAAE,OAAO,IAAI,KAAK,CAAC,CAAC;YAC1E,SAAS,GAAG,SAAS,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,CAAC,WAAW;QAC/C,CAAC;IACH,CAAC;SAAM,CAAC;QACN,oCAAoC;QACpC,MAAM,IAAI,GAAG,MAAM,EAAE,UAAU,EAAE,UAAU,IAAI,GAAG,CAAC;QACnD,SAAS,GAAG,SAAS,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IACpC,CAAC;IAED,sDAAsD;IACtD,MAAM,aAAa,GAAG,eAAe,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IACpD,IAAI,aAAa,EAAE,CAAC;QAClB,YAAY,CAAC,aAAa,CAAC,CAAC;QAC5B,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;IACnC,CAAC;IAED,sCAAsC;IACtC,2DAA2D;IAC3D,IAAI,cAAc,CAAC,IAAI,IAAI,IAAI,EAAE,CAAC;QAChC,8DAA8D;QAC9D,MAAM,QAAQ,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC;QACpD,IAAI,QAAQ,EAAE,CAAC;YACb,cAAc,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;YAChC,uBAAuB;YACvB,MAAM,KAAK,GAAG,eAAe,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YAC5C,IAAI,KAAK,EAAE,CAAC;gBACV,YAAY,CAAC,KAAK,CAAC,CAAC;gBACpB,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;YACnC,CAAC;QACH,CAAC;IACH,CAAC;IACD,cAAc,CAAC,GAAG,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;IAExC,6CAA6C;IAC7C,MAAM,GAAG,GAAG,MAAM,EAAE,UAAU,EAAE,iBAAiB,IAAI,IAAI,CAAC;IAC1D,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,EAAE;QAC9B,cAAc,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAChC,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;IACnC,CAAC,EAAE,GAAG,GAAG,IAAI,CAAC,CAAC;IAEf,2CAA2C;IAC3C,eAAe,CAAC,GAAG,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAEvC,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CAAC,KAAe;IACzD,OAAO,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAChE,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,sBAAsB;IACpC,OAAO,GAAG,CAAC,CAAC,uCAAuC;AACrD,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAC,IAAY,EAAE,IAAY;IAC3C,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;IAC9B,MAAM,GAAG,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,CAAC;IAEnC,wDAAwD;IACxD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC;QAC9B,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;IACvE,CAAC;IAED,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,SAAS,UAAU,CAAC,GAAW;IAC7B,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,IAAI,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAChD,IAAI,IAAI,CAAC,CAAC;IACZ,CAAC;IACD,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAC,GAAiB;IAClC,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;IACzB,CAAC;IACD,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAErB,IAAI,GAAG,KAAK,CAAC;QAAE,OAAO,GAAG,CAAC;IAE1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,GAAG,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC;IAChB,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,mBAAmB;IACjC,iDAAiD;IACjD,KAAK,MAAM,KAAK,IAAI,eAAe,CAAC,MAAM,EAAE,EAAE,CAAC;QAC7C,YAAY,CAAC,KAAK,CAAC,CAAC;IACtB,CAAC;IACD,eAAe,CAAC,KAAK,EAAE,CAAC;IACxB,cAAc,CAAC,KAAK,EAAE,CAAC;AACzB,CAAC","sourcesContent":["/**\n * Embedding generation for semantic similarity\n * Uses local transformers.js - no API key required!\n */\n\nimport { pipeline, env } from '@xenova/transformers';\nimport { loadConfig } from './config.js';\n\n// Configure transformers.js to use WASM backend only (avoid ONNX runtime issues)\n// The native ONNX runtime causes \"DefaultLogger not registered\" errors in Node.js\nenv.backends.onnx.wasm.proxy = false; // Disable ONNX runtime proxy\nenv.backends.onnx.wasm.numThreads = 1; // Single thread for stability\n\nlet embeddingPipeline: any = null;\nlet initializationPromise: Promise<void> | null = null;\nconst embeddingCache = new Map<string, Float32Array>();\n// MEMORY LEAK FIX: Track TTL timers so they can be cleaned up\nconst embeddingTimers = new Map<string, NodeJS.Timeout>();\n\n/**\n * Initialize the embedding pipeline (lazy load)\n * RACE CONDITION FIX: Use promise-based initialization instead of busy-wait\n */\nasync function initializeEmbeddings(): Promise<void> {\n // Already initialized\n if (embeddingPipeline) return;\n\n // Initialization in progress - await existing promise\n if (initializationPromise) {\n return initializationPromise;\n }\n\n // Detect npx environment (known transformer initialization issues)\n const isNpxEnv = process.env.npm_lifecycle_event === 'npx' ||\n process.env.npm_execpath?.includes('npx') ||\n process.cwd().includes('/_npx/') ||\n process.cwd().includes('\\\\_npx\\\\');\n\n if (isNpxEnv && !process.env.FORCE_TRANSFORMERS) {\n console.log('[Embeddings] NPX environment detected - using hash-based embeddings');\n console.log('[Embeddings] For semantic search, install globally: npm install -g claude-flow');\n return;\n }\n\n // RACE CONDITION FIX: Create promise for concurrent callers to await\n initializationPromise = (async () => {\n console.log('[Embeddings] Initializing local embedding model (Xenova/all-MiniLM-L6-v2)...');\n console.log('[Embeddings] First run will download ~23MB model...');\n\n try {\n embeddingPipeline = await pipeline(\n 'feature-extraction',\n 'Xenova/all-MiniLM-L6-v2',\n { quantized: true } // Smaller, faster\n );\n console.log('[Embeddings] Local model ready! (384 dimensions)');\n } catch (error: any) {\n console.error('[Embeddings] Failed to initialize:', error?.message || error);\n console.warn('[Embeddings] Falling back to hash-based embeddings');\n // Reset promise so retry is possible\n initializationPromise = null;\n }\n })();\n\n return initializationPromise;\n}\n\n/**\n * Compute embedding for text using local model\n */\nexport async function computeEmbedding(text: string): Promise<Float32Array> {\n const config = loadConfig();\n\n // Check cache\n const cacheKey = `local:${text}`;\n if (embeddingCache.has(cacheKey)) {\n return embeddingCache.get(cacheKey)!;\n }\n\n let embedding: Float32Array;\n\n // Initialize if needed\n await initializeEmbeddings();\n\n if (embeddingPipeline) {\n try {\n // Use transformers.js for real embeddings\n const output = await embeddingPipeline(text, {\n pooling: 'mean',\n normalize: true\n });\n embedding = new Float32Array(output.data);\n } catch (error: any) {\n console.error('[Embeddings] Generation failed:', error?.message || error);\n embedding = hashEmbed(text, 384); // Fallback\n }\n } else {\n // Fallback to hash-based embeddings\n const dims = config?.embeddings?.dimensions || 384;\n embedding = hashEmbed(text, dims);\n }\n\n // MEMORY LEAK FIX: Clear existing timer if key exists\n const existingTimer = embeddingTimers.get(cacheKey);\n if (existingTimer) {\n clearTimeout(existingTimer);\n embeddingTimers.delete(cacheKey);\n }\n\n // Cache with LRU (limit 1000 entries)\n // PERFORMANCE FIX: Use proper LRU by tracking access order\n if (embeddingCache.size >= 1000) {\n // Find and remove oldest entry (first key in iteration order)\n const firstKey = embeddingCache.keys().next().value;\n if (firstKey) {\n embeddingCache.delete(firstKey);\n // Also clear its timer\n const timer = embeddingTimers.get(firstKey);\n if (timer) {\n clearTimeout(timer);\n embeddingTimers.delete(firstKey);\n }\n }\n }\n embeddingCache.set(cacheKey, embedding);\n\n // Set TTL for cache entry with tracked timer\n const ttl = config?.embeddings?.cache_ttl_seconds || 3600;\n const timerId = setTimeout(() => {\n embeddingCache.delete(cacheKey);\n embeddingTimers.delete(cacheKey);\n }, ttl * 1000);\n\n // MEMORY LEAK FIX: Track timer for cleanup\n embeddingTimers.set(cacheKey, timerId);\n\n return embedding;\n}\n\n/**\n * Batch compute embeddings (more efficient)\n */\nexport async function computeEmbeddingBatch(texts: string[]): Promise<Float32Array[]> {\n return Promise.all(texts.map(text => computeEmbedding(text)));\n}\n\n/**\n * Get embedding dimensions\n */\nexport function getEmbeddingDimensions(): number {\n return 384; // all-MiniLM-L6-v2 uses 384 dimensions\n}\n\n/**\n * Deterministic hash-based embedding (fallback)\n */\nfunction hashEmbed(text: string, dims: number): Float32Array {\n const hash = simpleHash(text);\n const vec = new Float32Array(dims);\n\n // Generate deterministic pseudo-random vector from hash\n for (let i = 0; i < dims; i++) {\n vec[i] = Math.sin(hash * (i + 1) * 0.01) + Math.cos(hash * i * 0.02);\n }\n\n return normalize(vec);\n}\n\n/**\n * Simple string hash function\n */\nfunction simpleHash(str: string): number {\n let hash = 0;\n for (let i = 0; i < str.length; i++) {\n hash = ((hash << 5) - hash) + str.charCodeAt(i);\n hash |= 0;\n }\n return Math.abs(hash);\n}\n\n/**\n * Normalize vector to unit length\n */\nfunction normalize(vec: Float32Array): Float32Array {\n let mag = 0;\n for (let i = 0; i < vec.length; i++) {\n mag += vec[i] * vec[i];\n }\n mag = Math.sqrt(mag);\n\n if (mag === 0) return vec;\n\n for (let i = 0; i < vec.length; i++) {\n vec[i] /= mag;\n }\n return vec;\n}\n\n/**\n * Clear embedding cache\n * MEMORY LEAK FIX: Also clear all TTL timers\n */\nexport function clearEmbeddingCache(): void {\n // Clear all timers first to prevent memory leaks\n for (const timer of embeddingTimers.values()) {\n clearTimeout(timer);\n }\n embeddingTimers.clear();\n embeddingCache.clear();\n}\n"]}
1
+ {"version":3,"file":"embeddings.js","sourceRoot":"","sources":["../../../src/reasoningbank/utils/embeddings.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC,iFAAiF;AACjF,kFAAkF;AAClF,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC,6BAA6B;AACnE,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,8BAA8B;AAErE,IAAI,iBAAiB,GAAQ,IAAI,CAAC;AAClC,IAAI,cAAc,GAAG,KAAK,CAAC;AAC3B,MAAM,cAAc,GAAG,IAAI,GAAG,EAAwB,CAAC;AAEvD;;GAEG;AACH,KAAK,UAAU,oBAAoB;IACjC,IAAI,iBAAiB;QAAE,OAAO;IAC9B,IAAI,cAAc,EAAE,CAAC;QACnB,sCAAsC;QACtC,OAAO,cAAc,EAAE,CAAC;YACtB,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC;QACzD,CAAC;QACD,OAAO;IACT,CAAC;IAED,mEAAmE;IACnE,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,KAAK,KAAK;QACzC,OAAO,CAAC,GAAG,CAAC,YAAY,EAAE,QAAQ,CAAC,KAAK,CAAC;QACzC,OAAO,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC;QAChC,OAAO,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;IAEpD,IAAI,QAAQ,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,kBAAkB,EAAE,CAAC;QAChD,OAAO,CAAC,GAAG,CAAC,qEAAqE,CAAC,CAAC;QACnF,OAAO,CAAC,GAAG,CAAC,gFAAgF,CAAC,CAAC;QAC9F,cAAc,GAAG,KAAK,CAAC;QACvB,OAAO;IACT,CAAC;IAED,cAAc,GAAG,IAAI,CAAC;IACtB,OAAO,CAAC,GAAG,CAAC,8EAA8E,CAAC,CAAC;IAC5F,OAAO,CAAC,GAAG,CAAC,qDAAqD,CAAC,CAAC;IAEnE,IAAI,CAAC;QACH,iBAAiB,GAAG,MAAM,QAAQ,CAChC,oBAAoB,EACpB,yBAAyB,EACzB,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,kBAAkB;SACvC,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,kDAAkD,CAAC,CAAC;IAClE,CAAC;IAAC,OAAO,KAAU,EAAE,CAAC;QACpB,OAAO,CAAC,KAAK,CAAC,oCAAoC,EAAE,KAAK,EAAE,OAAO,IAAI,KAAK,CAAC,CAAC;QAC7E,OAAO,CAAC,IAAI,CAAC,oDAAoD,CAAC,CAAC;IACrE,CAAC;YAAS,CAAC;QACT,cAAc,GAAG,KAAK,CAAC;IACzB,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,IAAY;IACjD,MAAM,MAAM,GAAG,UAAU,EAAE,CAAC;IAE5B,cAAc;IACd,MAAM,QAAQ,GAAG,SAAS,IAAI,EAAE,CAAC;IACjC,IAAI,cAAc,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;QACjC,OAAO,cAAc,CAAC,GAAG,CAAC,QAAQ,CAAE,CAAC;IACvC,CAAC;IAED,IAAI,SAAuB,CAAC;IAE5B,uBAAuB;IACvB,MAAM,oBAAoB,EAAE,CAAC;IAE7B,IAAI,iBAAiB,EAAE,CAAC;QACtB,IAAI,CAAC;YACH,0CAA0C;YAC1C,MAAM,MAAM,GAAG,MAAM,iBAAiB,CAAC,IAAI,EAAE;gBAC3C,OAAO,EAAE,MAAM;gBACf,SAAS,EAAE,IAAI;aAChB,CAAC,CAAC;YACH,SAAS,GAAG,IAAI,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAC5C,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,OAAO,CAAC,KAAK,CAAC,iCAAiC,EAAE,KAAK,EAAE,OAAO,IAAI,KAAK,CAAC,CAAC;YAC1E,SAAS,GAAG,SAAS,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,CAAC,WAAW;QAC/C,CAAC;IACH,CAAC;SAAM,CAAC;QACN,oCAAoC;QACpC,MAAM,IAAI,GAAG,MAAM,EAAE,UAAU,EAAE,UAAU,IAAI,GAAG,CAAC;QACnD,SAAS,GAAG,SAAS,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IACpC,CAAC;IAED,sCAAsC;IACtC,IAAI,cAAc,CAAC,IAAI,GAAG,IAAI,EAAE,CAAC;QAC/B,MAAM,QAAQ,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC;QACpD,IAAI,QAAQ,EAAE,CAAC;YACb,cAAc,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IACD,cAAc,CAAC,GAAG,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;IAExC,0BAA0B;IAC1B,MAAM,GAAG,GAAG,MAAM,EAAE,UAAU,EAAE,iBAAiB,IAAI,IAAI,CAAC;IAC1D,UAAU,CACR,GAAG,EAAE,CAAC,cAAc,CAAC,MAAM,CAAC,QAAQ,CAAC,EACrC,GAAG,GAAG,IAAI,CACX,CAAC;IAEF,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CAAC,KAAe;IACzD,OAAO,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAChE,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,sBAAsB;IACpC,OAAO,GAAG,CAAC,CAAC,uCAAuC;AACrD,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAC,IAAY,EAAE,IAAY;IAC3C,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;IAC9B,MAAM,GAAG,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,CAAC;IAEnC,wDAAwD;IACxD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC;QAC9B,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;IACvE,CAAC;IAED,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,SAAS,UAAU,CAAC,GAAW;IAC7B,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,IAAI,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAChD,IAAI,IAAI,CAAC,CAAC;IACZ,CAAC;IACD,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAC,GAAiB;IAClC,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;IACzB,CAAC;IACD,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAErB,IAAI,GAAG,KAAK,CAAC;QAAE,OAAO,GAAG,CAAC;IAE1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,GAAG,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC;IAChB,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB;IACjC,cAAc,CAAC,KAAK,EAAE,CAAC;AACzB,CAAC","sourcesContent":["/**\n * Embedding generation for semantic similarity\n * Uses local transformers.js - no API key required!\n */\n\nimport { pipeline, env } from '@xenova/transformers';\nimport { loadConfig } from './config.js';\n\n// Configure transformers.js to use WASM backend only (avoid ONNX runtime issues)\n// The native ONNX runtime causes \"DefaultLogger not registered\" errors in Node.js\nenv.backends.onnx.wasm.proxy = false; // Disable ONNX runtime proxy\nenv.backends.onnx.wasm.numThreads = 1; // Single thread for stability\n\nlet embeddingPipeline: any = null;\nlet isInitializing = false;\nconst embeddingCache = new Map<string, Float32Array>();\n\n/**\n * Initialize the embedding pipeline (lazy load)\n */\nasync function initializeEmbeddings(): Promise<void> {\n if (embeddingPipeline) return;\n if (isInitializing) {\n // Wait for initialization to complete\n while (isInitializing) {\n await new Promise(resolve => setTimeout(resolve, 100));\n }\n return;\n }\n\n // Detect npx environment (known transformer initialization issues)\n const isNpxEnv = process.env.npm_lifecycle_event === 'npx' ||\n process.env.npm_execpath?.includes('npx') ||\n process.cwd().includes('/_npx/') ||\n process.cwd().includes('\\\\_npx\\\\');\n\n if (isNpxEnv && !process.env.FORCE_TRANSFORMERS) {\n console.log('[Embeddings] NPX environment detected - using hash-based embeddings');\n console.log('[Embeddings] For semantic search, install globally: npm install -g claude-flow');\n isInitializing = false;\n return;\n }\n\n isInitializing = true;\n console.log('[Embeddings] Initializing local embedding model (Xenova/all-MiniLM-L6-v2)...');\n console.log('[Embeddings] First run will download ~23MB model...');\n\n try {\n embeddingPipeline = await pipeline(\n 'feature-extraction',\n 'Xenova/all-MiniLM-L6-v2',\n { quantized: true } // Smaller, faster\n );\n console.log('[Embeddings] Local model ready! (384 dimensions)');\n } catch (error: any) {\n console.error('[Embeddings] Failed to initialize:', error?.message || error);\n console.warn('[Embeddings] Falling back to hash-based embeddings');\n } finally {\n isInitializing = false;\n }\n}\n\n/**\n * Compute embedding for text using local model\n */\nexport async function computeEmbedding(text: string): Promise<Float32Array> {\n const config = loadConfig();\n\n // Check cache\n const cacheKey = `local:${text}`;\n if (embeddingCache.has(cacheKey)) {\n return embeddingCache.get(cacheKey)!;\n }\n\n let embedding: Float32Array;\n\n // Initialize if needed\n await initializeEmbeddings();\n\n if (embeddingPipeline) {\n try {\n // Use transformers.js for real embeddings\n const output = await embeddingPipeline(text, {\n pooling: 'mean',\n normalize: true\n });\n embedding = new Float32Array(output.data);\n } catch (error: any) {\n console.error('[Embeddings] Generation failed:', error?.message || error);\n embedding = hashEmbed(text, 384); // Fallback\n }\n } else {\n // Fallback to hash-based embeddings\n const dims = config?.embeddings?.dimensions || 384;\n embedding = hashEmbed(text, dims);\n }\n\n // Cache with LRU (limit 1000 entries)\n if (embeddingCache.size > 1000) {\n const firstKey = embeddingCache.keys().next().value;\n if (firstKey) {\n embeddingCache.delete(firstKey);\n }\n }\n embeddingCache.set(cacheKey, embedding);\n\n // Set TTL for cache entry\n const ttl = config?.embeddings?.cache_ttl_seconds || 3600;\n setTimeout(\n () => embeddingCache.delete(cacheKey),\n ttl * 1000\n );\n\n return embedding;\n}\n\n/**\n * Batch compute embeddings (more efficient)\n */\nexport async function computeEmbeddingBatch(texts: string[]): Promise<Float32Array[]> {\n return Promise.all(texts.map(text => computeEmbedding(text)));\n}\n\n/**\n * Get embedding dimensions\n */\nexport function getEmbeddingDimensions(): number {\n return 384; // all-MiniLM-L6-v2 uses 384 dimensions\n}\n\n/**\n * Deterministic hash-based embedding (fallback)\n */\nfunction hashEmbed(text: string, dims: number): Float32Array {\n const hash = simpleHash(text);\n const vec = new Float32Array(dims);\n\n // Generate deterministic pseudo-random vector from hash\n for (let i = 0; i < dims; i++) {\n vec[i] = Math.sin(hash * (i + 1) * 0.01) + Math.cos(hash * i * 0.02);\n }\n\n return normalize(vec);\n}\n\n/**\n * Simple string hash function\n */\nfunction simpleHash(str: string): number {\n let hash = 0;\n for (let i = 0; i < str.length; i++) {\n hash = ((hash << 5) - hash) + str.charCodeAt(i);\n hash |= 0;\n }\n return Math.abs(hash);\n}\n\n/**\n * Normalize vector to unit length\n */\nfunction normalize(vec: Float32Array): Float32Array {\n let mag = 0;\n for (let i = 0; i < vec.length; i++) {\n mag += vec[i] * vec[i];\n }\n mag = Math.sqrt(mag);\n\n if (mag === 0) return vec;\n\n for (let i = 0; i < vec.length; i++) {\n vec[i] /= mag;\n }\n return vec;\n}\n\n/**\n * Clear embedding cache\n */\nexport function clearEmbeddingCache(): void {\n embeddingCache.clear();\n}\n"]}
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Router module - Multi-model routing for agentic-flow
3
+ *
4
+ * Provides intelligent routing between LLM providers:
5
+ * - Anthropic
6
+ * - OpenRouter
7
+ * - Gemini
8
+ * - ONNX Local
9
+ */
10
+ export { ModelRouter } from './router.js';
11
+ export type { LLMProvider, ProviderType, ChatParams, ChatResponse, StreamChunk, Message, ContentBlock, Tool, ProviderConfig, RouterConfig, RoutingConfig, RoutingRule, ToolCallingConfig, MonitoringConfig, CacheConfig, RouterMetrics, ProviderError } from './types.js';
12
+ export { OPENROUTER_MODELS, ANTHROPIC_TO_OPENROUTER, OPENROUTER_TO_ANTHROPIC, getOpenRouterModel, getAnthropicModel } from './model-mapping.js';
13
+ export { OpenRouterProvider } from './providers/openrouter.js';
14
+ export { AnthropicProvider } from './providers/anthropic.js';
15
+ export { GeminiProvider } from './providers/gemini.js';
16
+ export { ONNXLocalProvider } from './providers/onnx-local.js';
17
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/router/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAGH,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAG1C,YAAY,EACV,WAAW,EACX,YAAY,EACZ,UAAU,EACV,YAAY,EACZ,WAAW,EACX,OAAO,EACP,YAAY,EACZ,IAAI,EACJ,cAAc,EACd,YAAY,EACZ,aAAa,EACb,WAAW,EACX,iBAAiB,EACjB,gBAAgB,EAChB,WAAW,EACX,aAAa,EACb,aAAa,EACd,MAAM,YAAY,CAAC;AAGpB,OAAO,EACL,iBAAiB,EACjB,uBAAuB,EACvB,uBAAuB,EACvB,kBAAkB,EAClB,iBAAiB,EAClB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC"}
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Router module - Multi-model routing for agentic-flow
3
+ *
4
+ * Provides intelligent routing between LLM providers:
5
+ * - Anthropic
6
+ * - OpenRouter
7
+ * - Gemini
8
+ * - ONNX Local
9
+ */
10
+ // Main router class
11
+ export { ModelRouter } from './router.js';
12
+ // Model mappings
13
+ export { OPENROUTER_MODELS, ANTHROPIC_TO_OPENROUTER, OPENROUTER_TO_ANTHROPIC, getOpenRouterModel, getAnthropicModel } from './model-mapping.js';
14
+ // Providers
15
+ export { OpenRouterProvider } from './providers/openrouter.js';
16
+ export { AnthropicProvider } from './providers/anthropic.js';
17
+ export { GeminiProvider } from './providers/gemini.js';
18
+ export { ONNXLocalProvider } from './providers/onnx-local.js';
19
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/router/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,oBAAoB;AACpB,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAuB1C,iBAAiB;AACjB,OAAO,EACL,iBAAiB,EACjB,uBAAuB,EACvB,uBAAuB,EACvB,kBAAkB,EAClB,iBAAiB,EAClB,MAAM,oBAAoB,CAAC;AAE5B,YAAY;AACZ,OAAO,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC","sourcesContent":["/**\n * Router module - Multi-model routing for agentic-flow\n *\n * Provides intelligent routing between LLM providers:\n * - Anthropic\n * - OpenRouter\n * - Gemini\n * - ONNX Local\n */\n\n// Main router class\nexport { ModelRouter } from './router.js';\n\n// Types\nexport type {\n LLMProvider,\n ProviderType,\n ChatParams,\n ChatResponse,\n StreamChunk,\n Message,\n ContentBlock,\n Tool,\n ProviderConfig,\n RouterConfig,\n RoutingConfig,\n RoutingRule,\n ToolCallingConfig,\n MonitoringConfig,\n CacheConfig,\n RouterMetrics,\n ProviderError\n} from './types.js';\n\n// Model mappings\nexport {\n OPENROUTER_MODELS,\n ANTHROPIC_TO_OPENROUTER,\n OPENROUTER_TO_ANTHROPIC,\n getOpenRouterModel,\n getAnthropicModel\n} from './model-mapping.js';\n\n// Providers\nexport { OpenRouterProvider } from './providers/openrouter.js';\nexport { AnthropicProvider } from './providers/anthropic.js';\nexport { GeminiProvider } from './providers/gemini.js';\nexport { ONNXLocalProvider } from './providers/onnx-local.js';\n"]}
@@ -7,8 +7,6 @@
7
7
  * - KV cache pooling for 20-30% faster generation
8
8
  * - Better generation parameters for code tasks
9
9
  * - System prompt caching
10
- *
11
- * Note: onnxruntime-node is optional - will error if not installed
12
10
  */
13
11
  import type { ChatParams, ChatResponse } from '../types.js';
14
12
  import { ONNXLocalProvider, ONNXLocalConfig } from './onnx-local.js';
@@ -1 +1 @@
1
- {"version":3,"file":"onnx-local-optimized.d.ts","sourceRoot":"","sources":["../../../src/router/providers/onnx-local-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAaH,OAAO,KAAK,EACV,UAAU,EACV,YAAY,EAIb,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAErE,MAAM,WAAW,mBAAoB,SAAQ,eAAe;IAC1D,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,qBAAa,qBAAsB,SAAQ,iBAAiB;IAC1D,OAAO,CAAC,eAAe,CAAgC;IACvD,OAAO,CAAC,WAAW,CAA+B;IAClD,OAAO,CAAC,iBAAiB,CAAmE;gBAEhF,MAAM,GAAE,mBAAwB;IAkB5C;;OAEG;IACH,OAAO,CAAC,cAAc;IAKtB;;OAEG;IACH,OAAO,CAAC,eAAe;IAmDvB;;OAEG;IACH,OAAO,CAAC,cAAc;IA8BtB;;OAEG;IACG,IAAI,CAAC,MAAM,EAAE,UAAU,GAAG,OAAO,CAAC,YAAY,CAAC;IA+BrD;;OAEG;IACH,mBAAmB;;;;;;;;;;;;;;;;;;;;IAoBnB;;OAEG;IACH,WAAW;CAKZ"}
1
+ {"version":3,"file":"onnx-local-optimized.d.ts","sourceRoot":"","sources":["../../../src/router/providers/onnx-local-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAKH,OAAO,KAAK,EACV,UAAU,EACV,YAAY,EAIb,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAErE,MAAM,WAAW,mBAAoB,SAAQ,eAAe;IAC1D,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,qBAAa,qBAAsB,SAAQ,iBAAiB;IAC1D,OAAO,CAAC,eAAe,CAAgC;IACvD,OAAO,CAAC,WAAW,CAA+B;IAClD,OAAO,CAAC,iBAAiB,CAAmE;gBAEhF,MAAM,GAAE,mBAAwB;IAkB5C;;OAEG;IACH,OAAO,CAAC,cAAc;IAKtB;;OAEG;IACH,OAAO,CAAC,eAAe;IAmDvB;;OAEG;IACH,OAAO,CAAC,cAAc;IA8BtB;;OAEG;IACG,IAAI,CAAC,MAAM,EAAE,UAAU,GAAG,OAAO,CAAC,YAAY,CAAC;IA+BrD;;OAEG;IACH,mBAAmB;;;;;;;;;;;;;;;;;;;;IAoBnB;;OAEG;IACH,WAAW;CAKZ"}
@@ -7,17 +7,7 @@
7
7
  * - KV cache pooling for 20-30% faster generation
8
8
  * - Better generation parameters for code tasks
9
9
  * - System prompt caching
10
- *
11
- * Note: onnxruntime-node is optional - will error if not installed
12
10
  */
13
- let ort = null;
14
- // Dynamic import for optional onnxruntime-node
15
- try {
16
- ort = await import('onnxruntime-node');
17
- }
18
- catch {
19
- // Will be handled at runtime
20
- }
21
11
  import { ONNXLocalProvider } from './onnx-local.js';
22
12
  export class OptimizedONNXProvider extends ONNXLocalProvider {
23
13
  optimizedConfig;
@@ -1 +1 @@
1
- {"version":3,"file":"onnx-local-optimized.js","sourceRoot":"","sources":["../../../src/router/providers/onnx-local-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,IAAI,GAAG,GAAQ,IAAI,CAAC;AAEpB,+CAA+C;AAC/C,IAAI,CAAC;IACH,GAAG,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;AACzC,CAAC;AAAC,MAAM,CAAC;IACP,6BAA6B;AAC/B,CAAC;AAWD,OAAO,EAAE,iBAAiB,EAAmB,MAAM,iBAAiB,CAAC;AAYrE,MAAM,OAAO,qBAAsB,SAAQ,iBAAiB;IAClD,eAAe,CAAgC;IAC/C,WAAW,GAAqB,IAAI,GAAG,EAAE,CAAC;IAC1C,iBAAiB,GAAyD,IAAI,GAAG,EAAE,CAAC;IAE5F,YAAY,SAA8B,EAAE;QAC1C,KAAK,CAAC,MAAM,CAAC,CAAC;QAEd,IAAI,CAAC,eAAe,GAAG;YACrB,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,iFAAiF;YAChH,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,IAAI,CAAC,KAAK,CAAC;YACxD,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,GAAG;YAClC,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,GAAG,EAAG,sCAAsC;YAC/E,gBAAgB,EAAE,MAAM,CAAC,gBAAgB,IAAI,IAAI,EAAG,sBAAsB;YAC1E,aAAa,EAAE,MAAM,CAAC,aAAa,KAAK,KAAK,EAAG,eAAe;YAC/D,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,KAAK,KAAK,EAAG,eAAe;YACzE,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,KAAK,KAAK,EAAG,eAAe;YACzE,IAAI,EAAE,MAAM,CAAC,IAAI,IAAI,EAAE;YACvB,IAAI,EAAE,MAAM,CAAC,IAAI,IAAI,GAAG;YACxB,iBAAiB,EAAE,MAAM,CAAC,iBAAiB,IAAI,GAAG;SACnD,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,IAAY;QACjC,qDAAqD;QACrD,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACpC,CAAC;IAED;;OAEG;IACK,eAAe,CAAC,QAAmB;QACzC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,aAAa,EAAE,CAAC;YACxC,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,eAAe,CAAC,gBAAgB,CAAC;QACxD,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,MAAM,SAAS,GAAc,EAAE,CAAC;QAEhC,wCAAwC;QACxC,MAAM,SAAS,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC;QAC1D,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,OAAO,GAAG,OAAO,SAAS,CAAC,OAAO,KAAK,QAAQ;gBACnD,CAAC,CAAC,SAAS,CAAC,OAAO;gBACnB,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEzE,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC1B,WAAW,IAAI,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;QAC9C,CAAC;QAED,+CAA+C;QAC/C,KAAK,IAAI,CAAC,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC9C,MAAM,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;YAExB,yCAAyC;YACzC,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ;gBAAE,SAAS;YAEpC,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;gBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;gBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEnE,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;YAE5C,IAAI,WAAW,GAAG,MAAM,GAAG,SAAS,EAAE,CAAC;gBACrC,OAAO,CAAC,GAAG,CAAC,4BAA4B,QAAQ,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,eAAe,WAAW,cAAc,CAAC,CAAC;gBACpH,MAAM;YACR,CAAC;YAED,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YACvB,WAAW,IAAI,MAAM,CAAC;QACxB,CAAC;QAED,sCAAsC;QACtC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,EAAE,CAAC;YACtE,MAAM,WAAW,GAAG,QAAQ,CAAC,KAAK,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC;YAC5E,IAAI,WAAW;gBAAE,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC/C,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAmB;QACxC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,kBAAkB,EAAE,CAAC;YAC7C,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,MAAM,SAAS,GAAG,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE;YACnC,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBACxB,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;oBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;oBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBAEnE,wCAAwC;gBACxC,MAAM,UAAU,GAAG,0DAA0D,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBAE5F,IAAI,UAAU,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;oBAC5E,MAAM,eAAe,GAAG,GAAG,OAAO,kHAAkH,CAAC;oBAErJ,OAAO;wBACL,GAAG,GAAG;wBACN,OAAO,EAAE,eAAe;qBACzB,CAAC;gBACJ,CAAC;YACH,CAAC;YAED,OAAO,GAAG,CAAC;QACb,CAAC,CAAC,CAAC;QAEH,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CAAC,MAAkB;QAC3B,4CAA4C;QAC5C,IAAI,QAAQ,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAErD,uCAAuC;QACvC,QAAQ,GAAG,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;QAEzC,2DAA2D;QAC3D,MAAM,cAAc,GAAG;YACrB,GAAG,MAAM;YACT,QAAQ;YACR,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC,eAAe,CAAC,WAAW;YACnE,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,eAAe,CAAC,SAAS;SAC9D,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAElD,4BAA4B;QAC5B,IAAI,QAAQ,CAAC,QAAQ,EAAE,CAAC;YACtB,QAAQ,CAAC,QAAQ,CAAC,aAAa,GAAG;gBAChC,cAAc,EAAE,IAAI,CAAC,eAAe,CAAC,aAAa;gBAClD,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,mBAAmB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC5D,oBAAoB,EAAE,MAAM,CAAC,QAAQ,CAAC,MAAM;gBAC5C,qBAAqB,EAAE,QAAQ,CAAC,MAAM;aACvC,CAAC;QACJ,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACH,mBAAmB;QACjB,OAAO;YACL,GAAG,KAAK,CAAC,YAAY,EAAE;YACvB,aAAa,EAAE;gBACb,gBAAgB,EAAE,IAAI,CAAC,eAAe,CAAC,gBAAgB;gBACvD,aAAa,EAAE,IAAI,CAAC,eAAe,CAAC,aAAa;gBACjD,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,WAAW,EAAE,IAAI,CAAC,eAAe,CAAC,WAAW;gBAC7C,IAAI,EAAE,IAAI,CAAC,eAAe,CAAC,IAAI;gBAC/B,IAAI,EAAE,IAAI,CAAC,eAAe,CAAC,IAAI;gBAC/B,iBAAiB,EAAE,IAAI,CAAC,eAAe,CAAC,iBAAiB;aAC1D;YACD,UAAU,EAAE;gBACV,eAAe,EAAE,IAAI,CAAC,WAAW,CAAC,IAAI;gBACtC,qBAAqB,EAAE,IAAI,CAAC,iBAAiB,CAAC,IAAI;aACnD;SACF,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,WAAW;QACT,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,CAAC;QAC/B,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACnC,CAAC;CACF","sourcesContent":["/**\n * Optimized ONNX Runtime Local Inference Provider\n *\n * Improvements over base implementation:\n * - Context pruning for 2-4x speed improvement\n * - Prompt optimization for 30-50% quality improvement\n * - KV cache pooling for 20-30% faster generation\n * - Better generation parameters for code tasks\n * - System prompt caching\n *\n * Note: onnxruntime-node is optional - will error if not installed\n */\n\nlet ort: any = null;\n\n// Dynamic import for optional onnxruntime-node\ntry {\n ort = await import('onnxruntime-node');\n} catch {\n // Will be handled at runtime\n}\n\nimport { get_encoding } from 'tiktoken';\nimport { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';\nimport type {\n ChatParams,\n ChatResponse,\n Message,\n ContentBlock,\n ProviderError\n} from '../types.js';\nimport { ONNXLocalProvider, ONNXLocalConfig } from './onnx-local.js';\n\nexport interface OptimizedONNXConfig extends ONNXLocalConfig {\n maxContextTokens?: number;\n slidingWindow?: boolean;\n cacheSystemPrompts?: boolean;\n promptOptimization?: boolean;\n topK?: number;\n topP?: number;\n repetitionPenalty?: number;\n}\n\nexport class OptimizedONNXProvider extends ONNXLocalProvider {\n private optimizedConfig: Required<OptimizedONNXConfig>;\n private kvCachePool: Map<string, any> = new Map();\n private systemPromptCache: Map<string, { tokens: number[]; timestamp: number }> = new Map();\n\n constructor(config: OptimizedONNXConfig = {}) {\n super(config);\n\n this.optimizedConfig = {\n modelPath: config.modelPath || './models/phi-4-mini/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx',\n executionProviders: config.executionProviders || ['cpu'],\n maxTokens: config.maxTokens || 200,\n temperature: config.temperature || 0.3, // Lower for code (more deterministic)\n maxContextTokens: config.maxContextTokens || 2048, // Keep under 4K limit\n slidingWindow: config.slidingWindow !== false, // Default true\n cacheSystemPrompts: config.cacheSystemPrompts !== false, // Default true\n promptOptimization: config.promptOptimization !== false, // Default true\n topK: config.topK || 50,\n topP: config.topP || 0.9,\n repetitionPenalty: config.repetitionPenalty || 1.1\n };\n }\n\n /**\n * Estimate token count for a string\n */\n private estimateTokens(text: string): number {\n // Rough estimate: 1 token ≈ 4 characters for English\n return Math.ceil(text.length / 4);\n }\n\n /**\n * Optimize messages using sliding window context pruning\n */\n private optimizeContext(messages: Message[]): Message[] {\n if (!this.optimizedConfig.slidingWindow) {\n return messages;\n }\n\n const maxTokens = this.optimizedConfig.maxContextTokens;\n let totalTokens = 0;\n const optimized: Message[] = [];\n\n // Always keep system message if present\n const systemMsg = messages.find(m => m.role === 'system');\n if (systemMsg) {\n const content = typeof systemMsg.content === 'string'\n ? systemMsg.content\n : systemMsg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n optimized.push(systemMsg);\n totalTokens += this.estimateTokens(content);\n }\n\n // Add recent messages from end (most relevant)\n for (let i = messages.length - 1; i >= 0; i--) {\n const msg = messages[i];\n\n // Skip if already added (system message)\n if (msg.role === 'system') continue;\n\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n const tokens = this.estimateTokens(content);\n\n if (totalTokens + tokens > maxTokens) {\n console.log(`📊 Context pruned: Saved ${messages.length - optimized.length} messages, ~${totalTokens} tokens kept`);\n break;\n }\n\n optimized.unshift(msg);\n totalTokens += tokens;\n }\n\n // Ensure at least user message exists\n if (optimized.length === 0 || !optimized.some(m => m.role === 'user')) {\n const lastUserMsg = messages.slice().reverse().find(m => m.role === 'user');\n if (lastUserMsg) optimized.push(lastUserMsg);\n }\n\n return optimized;\n }\n\n /**\n * Optimize prompt for better quality output\n */\n private optimizePrompt(messages: Message[]): Message[] {\n if (!this.optimizedConfig.promptOptimization) {\n return messages;\n }\n\n const optimized = messages.map(msg => {\n if (msg.role === 'user') {\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n // Add quality indicators for code tasks\n const isCodeTask = /write|create|implement|generate|code|function|class|api/i.test(content);\n\n if (isCodeTask && !content.includes('include') && !content.includes('with')) {\n const enhancedContent = `${content}. Include: proper error handling, type hints/types, and edge case handling. Return clean, production-ready code.`;\n\n return {\n ...msg,\n content: enhancedContent\n };\n }\n }\n\n return msg;\n });\n\n return optimized;\n }\n\n /**\n * Enhanced chat with optimization\n */\n async chat(params: ChatParams): Promise<ChatResponse> {\n // Step 1: Optimize context (sliding window)\n let messages = this.optimizeContext(params.messages);\n\n // Step 2: Optimize prompts for quality\n messages = this.optimizePrompt(messages);\n\n // Step 3: Call base implementation with optimized messages\n const enhancedParams = {\n ...params,\n messages,\n temperature: params.temperature || this.optimizedConfig.temperature,\n maxTokens: params.maxTokens || this.optimizedConfig.maxTokens\n };\n\n const response = await super.chat(enhancedParams);\n\n // Add optimization metadata\n if (response.metadata) {\n response.metadata.optimizations = {\n contextPruning: this.optimizedConfig.slidingWindow,\n promptOptimization: this.optimizedConfig.promptOptimization,\n systemPromptCaching: this.optimizedConfig.cacheSystemPrompts,\n originalMessageCount: params.messages.length,\n optimizedMessageCount: messages.length\n };\n }\n\n return response;\n }\n\n /**\n * Get optimization info\n */\n getOptimizationInfo() {\n return {\n ...super.getModelInfo(),\n optimizations: {\n maxContextTokens: this.optimizedConfig.maxContextTokens,\n slidingWindow: this.optimizedConfig.slidingWindow,\n cacheSystemPrompts: this.optimizedConfig.cacheSystemPrompts,\n promptOptimization: this.optimizedConfig.promptOptimization,\n temperature: this.optimizedConfig.temperature,\n topK: this.optimizedConfig.topK,\n topP: this.optimizedConfig.topP,\n repetitionPenalty: this.optimizedConfig.repetitionPenalty\n },\n cacheStats: {\n kvCachePoolSize: this.kvCachePool.size,\n systemPromptCacheSize: this.systemPromptCache.size\n }\n };\n }\n\n /**\n * Clear caches\n */\n clearCaches() {\n this.kvCachePool.clear();\n this.systemPromptCache.clear();\n console.log('🧹 Caches cleared');\n }\n}\n"]}
1
+ {"version":3,"file":"onnx-local-optimized.js","sourceRoot":"","sources":["../../../src/router/providers/onnx-local-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAYH,OAAO,EAAE,iBAAiB,EAAmB,MAAM,iBAAiB,CAAC;AAYrE,MAAM,OAAO,qBAAsB,SAAQ,iBAAiB;IAClD,eAAe,CAAgC;IAC/C,WAAW,GAAqB,IAAI,GAAG,EAAE,CAAC;IAC1C,iBAAiB,GAAyD,IAAI,GAAG,EAAE,CAAC;IAE5F,YAAY,SAA8B,EAAE;QAC1C,KAAK,CAAC,MAAM,CAAC,CAAC;QAEd,IAAI,CAAC,eAAe,GAAG;YACrB,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,iFAAiF;YAChH,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,IAAI,CAAC,KAAK,CAAC;YACxD,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,GAAG;YAClC,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,GAAG,EAAG,sCAAsC;YAC/E,gBAAgB,EAAE,MAAM,CAAC,gBAAgB,IAAI,IAAI,EAAG,sBAAsB;YAC1E,aAAa,EAAE,MAAM,CAAC,aAAa,KAAK,KAAK,EAAG,eAAe;YAC/D,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,KAAK,KAAK,EAAG,eAAe;YACzE,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,KAAK,KAAK,EAAG,eAAe;YACzE,IAAI,EAAE,MAAM,CAAC,IAAI,IAAI,EAAE;YACvB,IAAI,EAAE,MAAM,CAAC,IAAI,IAAI,GAAG;YACxB,iBAAiB,EAAE,MAAM,CAAC,iBAAiB,IAAI,GAAG;SACnD,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,IAAY;QACjC,qDAAqD;QACrD,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACpC,CAAC;IAED;;OAEG;IACK,eAAe,CAAC,QAAmB;QACzC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,aAAa,EAAE,CAAC;YACxC,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,eAAe,CAAC,gBAAgB,CAAC;QACxD,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,MAAM,SAAS,GAAc,EAAE,CAAC;QAEhC,wCAAwC;QACxC,MAAM,SAAS,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC;QAC1D,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,OAAO,GAAG,OAAO,SAAS,CAAC,OAAO,KAAK,QAAQ;gBACnD,CAAC,CAAC,SAAS,CAAC,OAAO;gBACnB,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEzE,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC1B,WAAW,IAAI,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;QAC9C,CAAC;QAED,+CAA+C;QAC/C,KAAK,IAAI,CAAC,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC9C,MAAM,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;YAExB,yCAAyC;YACzC,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ;gBAAE,SAAS;YAEpC,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;gBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;gBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEnE,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;YAE5C,IAAI,WAAW,GAAG,MAAM,GAAG,SAAS,EAAE,CAAC;gBACrC,OAAO,CAAC,GAAG,CAAC,4BAA4B,QAAQ,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,eAAe,WAAW,cAAc,CAAC,CAAC;gBACpH,MAAM;YACR,CAAC;YAED,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YACvB,WAAW,IAAI,MAAM,CAAC;QACxB,CAAC;QAED,sCAAsC;QACtC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,EAAE,CAAC;YACtE,MAAM,WAAW,GAAG,QAAQ,CAAC,KAAK,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC;YAC5E,IAAI,WAAW;gBAAE,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC/C,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAmB;QACxC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,kBAAkB,EAAE,CAAC;YAC7C,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,MAAM,SAAS,GAAG,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE;YACnC,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBACxB,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;oBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;oBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBAEnE,wCAAwC;gBACxC,MAAM,UAAU,GAAG,0DAA0D,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBAE5F,IAAI,UAAU,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;oBAC5E,MAAM,eAAe,GAAG,GAAG,OAAO,kHAAkH,CAAC;oBAErJ,OAAO;wBACL,GAAG,GAAG;wBACN,OAAO,EAAE,eAAe;qBACzB,CAAC;gBACJ,CAAC;YACH,CAAC;YAED,OAAO,GAAG,CAAC;QACb,CAAC,CAAC,CAAC;QAEH,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CAAC,MAAkB;QAC3B,4CAA4C;QAC5C,IAAI,QAAQ,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAErD,uCAAuC;QACvC,QAAQ,GAAG,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;QAEzC,2DAA2D;QAC3D,MAAM,cAAc,GAAG;YACrB,GAAG,MAAM;YACT,QAAQ;YACR,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC,eAAe,CAAC,WAAW;YACnE,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,eAAe,CAAC,SAAS;SAC9D,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAElD,4BAA4B;QAC5B,IAAI,QAAQ,CAAC,QAAQ,EAAE,CAAC;YACtB,QAAQ,CAAC,QAAQ,CAAC,aAAa,GAAG;gBAChC,cAAc,EAAE,IAAI,CAAC,eAAe,CAAC,aAAa;gBAClD,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,mBAAmB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC5D,oBAAoB,EAAE,MAAM,CAAC,QAAQ,CAAC,MAAM;gBAC5C,qBAAqB,EAAE,QAAQ,CAAC,MAAM;aACvC,CAAC;QACJ,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACH,mBAAmB;QACjB,OAAO;YACL,GAAG,KAAK,CAAC,YAAY,EAAE;YACvB,aAAa,EAAE;gBACb,gBAAgB,EAAE,IAAI,CAAC,eAAe,CAAC,gBAAgB;gBACvD,aAAa,EAAE,IAAI,CAAC,eAAe,CAAC,aAAa;gBACjD,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,WAAW,EAAE,IAAI,CAAC,eAAe,CAAC,WAAW;gBAC7C,IAAI,EAAE,IAAI,CAAC,eAAe,CAAC,IAAI;gBAC/B,IAAI,EAAE,IAAI,CAAC,eAAe,CAAC,IAAI;gBAC/B,iBAAiB,EAAE,IAAI,CAAC,eAAe,CAAC,iBAAiB;aAC1D;YACD,UAAU,EAAE;gBACV,eAAe,EAAE,IAAI,CAAC,WAAW,CAAC,IAAI;gBACtC,qBAAqB,EAAE,IAAI,CAAC,iBAAiB,CAAC,IAAI;aACnD;SACF,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,WAAW;QACT,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,CAAC;QAC/B,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACnC,CAAC;CACF","sourcesContent":["/**\n * Optimized ONNX Runtime Local Inference Provider\n *\n * Improvements over base implementation:\n * - Context pruning for 2-4x speed improvement\n * - Prompt optimization for 30-50% quality improvement\n * - KV cache pooling for 20-30% faster generation\n * - Better generation parameters for code tasks\n * - System prompt caching\n */\n\nimport * as ort from 'onnxruntime-node';\nimport { get_encoding } from 'tiktoken';\nimport { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';\nimport type {\n ChatParams,\n ChatResponse,\n Message,\n ContentBlock,\n ProviderError\n} from '../types.js';\nimport { ONNXLocalProvider, ONNXLocalConfig } from './onnx-local.js';\n\nexport interface OptimizedONNXConfig extends ONNXLocalConfig {\n maxContextTokens?: number;\n slidingWindow?: boolean;\n cacheSystemPrompts?: boolean;\n promptOptimization?: boolean;\n topK?: number;\n topP?: number;\n repetitionPenalty?: number;\n}\n\nexport class OptimizedONNXProvider extends ONNXLocalProvider {\n private optimizedConfig: Required<OptimizedONNXConfig>;\n private kvCachePool: Map<string, any> = new Map();\n private systemPromptCache: Map<string, { tokens: number[]; timestamp: number }> = new Map();\n\n constructor(config: OptimizedONNXConfig = {}) {\n super(config);\n\n this.optimizedConfig = {\n modelPath: config.modelPath || './models/phi-4-mini/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx',\n executionProviders: config.executionProviders || ['cpu'],\n maxTokens: config.maxTokens || 200,\n temperature: config.temperature || 0.3, // Lower for code (more deterministic)\n maxContextTokens: config.maxContextTokens || 2048, // Keep under 4K limit\n slidingWindow: config.slidingWindow !== false, // Default true\n cacheSystemPrompts: config.cacheSystemPrompts !== false, // Default true\n promptOptimization: config.promptOptimization !== false, // Default true\n topK: config.topK || 50,\n topP: config.topP || 0.9,\n repetitionPenalty: config.repetitionPenalty || 1.1\n };\n }\n\n /**\n * Estimate token count for a string\n */\n private estimateTokens(text: string): number {\n // Rough estimate: 1 token ≈ 4 characters for English\n return Math.ceil(text.length / 4);\n }\n\n /**\n * Optimize messages using sliding window context pruning\n */\n private optimizeContext(messages: Message[]): Message[] {\n if (!this.optimizedConfig.slidingWindow) {\n return messages;\n }\n\n const maxTokens = this.optimizedConfig.maxContextTokens;\n let totalTokens = 0;\n const optimized: Message[] = [];\n\n // Always keep system message if present\n const systemMsg = messages.find(m => m.role === 'system');\n if (systemMsg) {\n const content = typeof systemMsg.content === 'string'\n ? systemMsg.content\n : systemMsg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n optimized.push(systemMsg);\n totalTokens += this.estimateTokens(content);\n }\n\n // Add recent messages from end (most relevant)\n for (let i = messages.length - 1; i >= 0; i--) {\n const msg = messages[i];\n\n // Skip if already added (system message)\n if (msg.role === 'system') continue;\n\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n const tokens = this.estimateTokens(content);\n\n if (totalTokens + tokens > maxTokens) {\n console.log(`📊 Context pruned: Saved ${messages.length - optimized.length} messages, ~${totalTokens} tokens kept`);\n break;\n }\n\n optimized.unshift(msg);\n totalTokens += tokens;\n }\n\n // Ensure at least user message exists\n if (optimized.length === 0 || !optimized.some(m => m.role === 'user')) {\n const lastUserMsg = messages.slice().reverse().find(m => m.role === 'user');\n if (lastUserMsg) optimized.push(lastUserMsg);\n }\n\n return optimized;\n }\n\n /**\n * Optimize prompt for better quality output\n */\n private optimizePrompt(messages: Message[]): Message[] {\n if (!this.optimizedConfig.promptOptimization) {\n return messages;\n }\n\n const optimized = messages.map(msg => {\n if (msg.role === 'user') {\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n // Add quality indicators for code tasks\n const isCodeTask = /write|create|implement|generate|code|function|class|api/i.test(content);\n\n if (isCodeTask && !content.includes('include') && !content.includes('with')) {\n const enhancedContent = `${content}. Include: proper error handling, type hints/types, and edge case handling. Return clean, production-ready code.`;\n\n return {\n ...msg,\n content: enhancedContent\n };\n }\n }\n\n return msg;\n });\n\n return optimized;\n }\n\n /**\n * Enhanced chat with optimization\n */\n async chat(params: ChatParams): Promise<ChatResponse> {\n // Step 1: Optimize context (sliding window)\n let messages = this.optimizeContext(params.messages);\n\n // Step 2: Optimize prompts for quality\n messages = this.optimizePrompt(messages);\n\n // Step 3: Call base implementation with optimized messages\n const enhancedParams = {\n ...params,\n messages,\n temperature: params.temperature || this.optimizedConfig.temperature,\n maxTokens: params.maxTokens || this.optimizedConfig.maxTokens\n };\n\n const response = await super.chat(enhancedParams);\n\n // Add optimization metadata\n if (response.metadata) {\n response.metadata.optimizations = {\n contextPruning: this.optimizedConfig.slidingWindow,\n promptOptimization: this.optimizedConfig.promptOptimization,\n systemPromptCaching: this.optimizedConfig.cacheSystemPrompts,\n originalMessageCount: params.messages.length,\n optimizedMessageCount: messages.length\n };\n }\n\n return response;\n }\n\n /**\n * Get optimization info\n */\n getOptimizationInfo() {\n return {\n ...super.getModelInfo(),\n optimizations: {\n maxContextTokens: this.optimizedConfig.maxContextTokens,\n slidingWindow: this.optimizedConfig.slidingWindow,\n cacheSystemPrompts: this.optimizedConfig.cacheSystemPrompts,\n promptOptimization: this.optimizedConfig.promptOptimization,\n temperature: this.optimizedConfig.temperature,\n topK: this.optimizedConfig.topK,\n topP: this.optimizedConfig.topP,\n repetitionPenalty: this.optimizedConfig.repetitionPenalty\n },\n cacheStats: {\n kvCachePoolSize: this.kvCachePool.size,\n systemPromptCacheSize: this.systemPromptCache.size\n }\n };\n }\n\n /**\n * Clear caches\n */\n clearCaches() {\n this.kvCachePool.clear();\n this.systemPromptCache.clear();\n console.log('🧹 Caches cleared');\n }\n}\n"]}
@@ -2,7 +2,6 @@
2
2
  * ONNX Runtime Local Inference Provider for Phi-4
3
3
  *
4
4
  * Uses onnxruntime-node for true local CPU/GPU inference
5
- * Falls back gracefully when native module isn't available (Windows)
6
5
  */
7
6
  import type { LLMProvider, ChatParams, ChatResponse, StreamChunk } from '../types.js';
8
7
  export interface ONNXLocalConfig {
@@ -1 +1 @@
1
- {"version":3,"file":"onnx-local.d.ts","sourceRoot":"","sources":["../../../src/router/providers/onnx-local.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAiBH,OAAO,KAAK,EACV,WAAW,EACX,UAAU,EACV,YAAY,EACZ,WAAW,EAIZ,MAAM,aAAa,CAAC;AAErB,MAAM,WAAW,eAAe;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kBAAkB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,qBAAa,iBAAkB,YAAW,WAAW;IACnD,IAAI,SAAgB;IACpB,IAAI,EAAG,QAAQ,CAAU;IACzB,iBAAiB,UAAS;IAC1B,aAAa,UAAS;IACtB,WAAW,UAAS;IAEpB,OAAO,CAAC,OAAO,CAAa;IAC5B,OAAO,CAAC,MAAM,CAA4B;IAC1C,OAAO,CAAC,SAAS,CAAa;IAC9B,OAAO,CAAC,QAAQ,CAAa;gBAEjB,MAAM,GAAE,eAAoB;IASxC;;OAEG;YACW,aAAa;IAc3B;;OAEG;IACH,OAAO,CAAC,MAAM;IAId;;OAEG;IACH,OAAO,CAAC,MAAM;IAgBd;;OAEG;YACW,iBAAiB;IAiD/B;;OAEG;IACH,OAAO,CAAC,cAAc;IAqBtB;;;OAGG;IACH,OAAO,CAAC,iBAAiB;IA8BzB;;OAEG;IACG,IAAI,CAAC,MAAM,EAAE,UAAU,GAAG,OAAO,CAAC,YAAY,CAAC;IA8IrD;;OAEG;IACI,MAAM,CAAC,MAAM,EAAE,UAAU,GAAG,cAAc,CAAC,WAAW,CAAC;IAI9D;;OAEG;IACH,oBAAoB,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,OAAO;IAKjD;;OAEG;IACH,YAAY;;;;;;IASZ;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAU/B"}
1
+ {"version":3,"file":"onnx-local.d.ts","sourceRoot":"","sources":["../../../src/router/providers/onnx-local.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAOH,OAAO,KAAK,EACV,WAAW,EACX,UAAU,EACV,YAAY,EACZ,WAAW,EAIZ,MAAM,aAAa,CAAC;AAErB,MAAM,WAAW,eAAe;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kBAAkB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,qBAAa,iBAAkB,YAAW,WAAW;IACnD,IAAI,SAAgB;IACpB,IAAI,EAAG,QAAQ,CAAU;IACzB,iBAAiB,UAAS;IAC1B,aAAa,UAAS;IACtB,WAAW,UAAS;IAEpB,OAAO,CAAC,OAAO,CAAqC;IACpD,OAAO,CAAC,MAAM,CAA4B;IAC1C,OAAO,CAAC,SAAS,CAAa;IAC9B,OAAO,CAAC,QAAQ,CAAa;gBAEjB,MAAM,GAAE,eAAoB;IASxC;;OAEG;YACW,aAAa;IAc3B;;OAEG;IACH,OAAO,CAAC,MAAM;IAId;;OAEG;IACH,OAAO,CAAC,MAAM;IAgBd;;OAEG;YACW,iBAAiB;IA6C/B;;OAEG;IACH,OAAO,CAAC,cAAc;IAqBtB;;;OAGG;IACH,OAAO,CAAC,iBAAiB;IA2BzB;;OAEG;IACG,IAAI,CAAC,MAAM,EAAE,UAAU,GAAG,OAAO,CAAC,YAAY,CAAC;IA2IrD;;OAEG;IACI,MAAM,CAAC,MAAM,EAAE,UAAU,GAAG,cAAc,CAAC,WAAW,CAAC;IAI9D;;OAEG;IACH,oBAAoB,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,OAAO;IAKjD;;OAEG;IACH,YAAY;;;;;;IASZ;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAU/B"}
@@ -2,18 +2,8 @@
2
2
  * ONNX Runtime Local Inference Provider for Phi-4
3
3
  *
4
4
  * Uses onnxruntime-node for true local CPU/GPU inference
5
- * Falls back gracefully when native module isn't available (Windows)
6
5
  */
7
- let ort = null;
8
- let ortAvailable = false;
9
- // Dynamic import for optional onnxruntime-node
10
- try {
11
- ort = await import('onnxruntime-node');
12
- ortAvailable = true;
13
- }
14
- catch {
15
- console.warn('[ONNX] onnxruntime-node not available - local inference disabled');
16
- }
6
+ import * as ort from 'onnxruntime-node';
17
7
  import { get_encoding } from 'tiktoken';
18
8
  import { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';
19
9
  export class ONNXLocalProvider {
@@ -82,9 +72,6 @@ export class ONNXLocalProvider {
82
72
  async initializeSession() {
83
73
  if (this.session)
84
74
  return;
85
- if (!ortAvailable || !ort) {
86
- throw new Error('onnxruntime-node not available - install with: npm install onnxruntime-node');
87
- }
88
75
  try {
89
76
  // Ensure model is downloaded
90
77
  console.log(`🔍 Checking for Phi-4 ONNX model...`);
@@ -148,14 +135,12 @@ export class ONNXLocalProvider {
148
135
  const numKVHeads = 8;
149
136
  const headDim = 128; // 3072 / 24 = 128
150
137
  const kvCache = {};
151
- // Get Tensor constructor - use any for flexible access
152
- const TensorClass = ort.Tensor;
153
138
  // Initialize empty cache for each layer (key and value)
154
139
  for (let i = 0; i < numLayers; i++) {
155
140
  // Empty cache: [batch_size, num_kv_heads, 0, head_dim]
156
141
  const emptyCache = new Float32Array(0);
157
- kvCache[`past_key_values.${i}.key`] = new TensorClass('float32', emptyCache, [batchSize, numKVHeads, 0, headDim]);
158
- kvCache[`past_key_values.${i}.value`] = new TensorClass('float32', emptyCache, [batchSize, numKVHeads, 0, headDim]);
142
+ kvCache[`past_key_values.${i}.key`] = new ort.Tensor('float32', emptyCache, [batchSize, numKVHeads, 0, headDim]);
143
+ kvCache[`past_key_values.${i}.value`] = new ort.Tensor('float32', emptyCache, [batchSize, numKVHeads, 0, headDim]);
159
144
  }
160
145
  return kvCache;
161
146
  }
@@ -183,13 +168,11 @@ export class ONNXLocalProvider {
183
168
  // For first step, use all input tokens; for subsequent steps, use only last token
184
169
  const currentInputIds = step === 0 ? inputIds : [outputIds[outputIds.length - 1]];
185
170
  const currentSeqLen = currentInputIds.length;
186
- // Get Tensor constructor - use any for flexible access
187
- const TensorClass = ort.Tensor;
188
171
  // Create input tensor for current step
189
- const inputTensor = new TensorClass('int64', BigInt64Array.from(currentInputIds.map(BigInt)), [1, currentSeqLen]);
172
+ const inputTensor = new ort.Tensor('int64', BigInt64Array.from(currentInputIds.map(BigInt)), [1, currentSeqLen]);
190
173
  // Create attention mask for current step
191
174
  const totalSeqLen = allTokenIds.length;
192
- const attentionMask = new TensorClass('int64', BigInt64Array.from(Array(totalSeqLen).fill(1n)), [1, totalSeqLen]);
175
+ const attentionMask = new ort.Tensor('int64', BigInt64Array.from(Array(totalSeqLen).fill(1n)), [1, totalSeqLen]);
193
176
  // Build feeds with input, attention mask, and KV cache
194
177
  const feeds = {
195
178
  input_ids: inputTensor,
@@ -1 +1 @@
1
- {"version":3,"file":"onnx-local.js","sourceRoot":"","sources":["../../../src/router/providers/onnx-local.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,IAAI,GAAG,GAAQ,IAAI,CAAC;AACpB,IAAI,YAAY,GAAG,KAAK,CAAC;AAEzB,+CAA+C;AAC/C,IAAI,CAAC;IACH,GAAG,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;IACvC,YAAY,GAAG,IAAI,CAAC;AACtB,CAAC;AAAC,MAAM,CAAC;IACP,OAAO,CAAC,IAAI,CAAC,kEAAkE,CAAC,CAAC;AACnF,CAAC;AAID,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAkBnF,MAAM,OAAO,iBAAiB;IAC5B,IAAI,GAAG,YAAY,CAAC;IACpB,IAAI,GAAG,QAAiB,CAAC;IACzB,iBAAiB,GAAG,KAAK,CAAC,CAAC,mDAAmD;IAC9E,aAAa,GAAG,KAAK,CAAC;IACtB,WAAW,GAAG,KAAK,CAAC;IAEZ,OAAO,GAAQ,IAAI,CAAC;IACpB,MAAM,CAA4B;IAClC,SAAS,GAAQ,IAAI,CAAC;IACtB,QAAQ,GAAQ,IAAI,CAAC;IAE7B,YAAY,SAA0B,EAAE;QACtC,IAAI,CAAC,MAAM,GAAG;YACZ,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,4EAA4E;YAC3G,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,IAAI,CAAC,KAAK,CAAC;YACxD,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,GAAG;YAClC,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,GAAG;SACvC,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,aAAa;QACzB,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO;QAE1B,IAAI,CAAC;YACH,qDAAqD;YACrD,IAAI,CAAC,QAAQ,GAAG,YAAY,CAAC,aAAa,CAAC,CAAC;YAE5C,OAAO,CAAC,GAAG,CAAC,2CAA2C,CAAC,CAAC;QAC3D,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,4BAA4B,EAAE,KAAK,CAAC,CAAC;YACnD,MAAM,IAAI,KAAK,CAAC,6BAA6B,KAAK,EAAE,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,IAAY;QACzB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC;IAChD,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,GAAa;QAC1B,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC;YAC3D,6CAA6C;YAC7C,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;gBAChC,OAAO,OAAO,CAAC;YACjB,CAAC;iBAAM,IAAI,OAAO,YAAY,UAAU,IAAI,OAAO,YAAY,MAAM,EAAE,CAAC;gBACtE,OAAO,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAC3C,CAAC;YACD,OAAO,MAAM,CAAC,OAAO,CAAC,CAAC;QACzB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,IAAI,CAAC,kCAAkC,EAAE,KAAK,CAAC,CAAC;YACxD,OAAO,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,iBAAiB;QAC7B,IAAI,IAAI,CAAC,OAAO;YAAE,OAAO;QAEzB,IAAI,CAAC,YAAY,IAAI,CAAC,GAAG,EAAE,CAAC;YAC1B,MAAM,IAAI,KAAK,CAAC,6EAA6E,CAAC,CAAC;QACjG,CAAC;QAED,IAAI,CAAC;YACH,6BAA6B;YAC7B,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAC;YAEnD,MAAM,SAAS,GAAG,MAAM,eAAe,CAAC,CAAC,QAAQ,EAAE,EAAE;gBACnD,IAAI,QAAQ,CAAC,UAAU,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,iBAAiB;oBACnD,OAAO,CAAC,GAAG,CAAC,sBAAsB,eAAe,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;gBAChF,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,uCAAuC;YACvC,IAAI,CAAC,MAAM,CAAC,SAAS,GAAG,SAAS,CAAC;YAElC,OAAO,CAAC,GAAG,CAAC,0BAA0B,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;YAE/D,IAAI,CAAC,OAAO,GAAG,MAAM,GAAG,CAAC,gBAAgB,CAAC,MAAM,CAC9C,IAAI,CAAC,MAAM,CAAC,SAAS,EACrB;gBACE,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAyB;gBACzD,sBAAsB,EAAE,KAAK;gBAC7B,iBAAiB,EAAE,IAAI;gBACvB,gBAAgB,EAAE,IAAI;aACvB,CACF,CAAC;YAEF,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;YACnC,OAAO,CAAC,GAAG,CAAC,2BAA2B,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEpF,iBAAiB;YACjB,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC;QAE7B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,aAAa,GAAkB;gBACnC,IAAI,EAAE,eAAe;gBACrB,OAAO,EAAE,oCAAoC,KAAK,EAAE;gBACpD,QAAQ,EAAE,YAAY;gBACtB,SAAS,EAAE,KAAK;aACjB,CAAC;YACF,MAAM,aAAa,CAAC;QACtB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAmB;QACxC,IAAI,MAAM,GAAG,EAAE,CAAC;QAEhB,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;YAC3B,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;gBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;gBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEnE,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBAC1B,MAAM,IAAI,eAAe,OAAO,WAAW,CAAC;YAC9C,CAAC;iBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBAC/B,MAAM,IAAI,aAAa,OAAO,WAAW,CAAC;YAC5C,CAAC;iBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBACpC,MAAM,IAAI,kBAAkB,OAAO,WAAW,CAAC;YACjD,CAAC;QACH,CAAC;QAED,MAAM,IAAI,iBAAiB,CAAC;QAC5B,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;OAGG;IACK,iBAAiB,CAAC,SAAiB,EAAE,cAAsB;QACjE,MAAM,SAAS,GAAG,EAAE,CAAC;QACrB,MAAM,UAAU,GAAG,CAAC,CAAC;QACrB,MAAM,OAAO,GAAG,GAAG,CAAC,CAAC,kBAAkB;QACvC,MAAM,OAAO,GAAwB,EAAE,CAAC;QAExC,uDAAuD;QACvD,MAAM,WAAW,GAAI,GAAW,CAAC,MAAM,CAAC;QAExC,wDAAwD;QACxD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,uDAAuD;YACvD,MAAM,UAAU,GAAG,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC;YAEvC,OAAO,CAAC,mBAAmB,CAAC,MAAM,CAAC,GAAG,IAAI,WAAW,CACnD,SAAS,EACT,UAAU,EACV,CAAC,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,CAAC,CACpC,CAAC;YAEF,OAAO,CAAC,mBAAmB,CAAC,QAAQ,CAAC,GAAG,IAAI,WAAW,CACrD,SAAS,EACT,UAAU,EACV,CAAC,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,CAAC,CACpC,CAAC;QACJ,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CAAC,MAAkB;QAC3B,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAE/B,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAEpD,IAAI,CAAC;YACH,0CAA0C;YAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,oBAAoB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;YAEnD,2CAA2C;YAC3C,IAAI,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YAE/C,6BAA6B;YAC7B,MAAM,WAAW,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC;YAClC,MAAM,SAAS,GAAa,EAAE,CAAC;YAE/B,8CAA8C;YAC9C,MAAM,SAAS,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAEhF,iCAAiC;YACjC,MAAM,YAAY,GAAG,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;YAE/D,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,YAAY,EAAE,IAAI,EAAE,EAAE,CAAC;gBAC/C,kFAAkF;gBAClF,MAAM,eAAe,GAAG,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;gBAClF,MAAM,aAAa,GAAG,eAAe,CAAC,MAAM,CAAC;gBAE7C,uDAAuD;gBACvD,MAAM,WAAW,GAAI,GAAW,CAAC,MAAM,CAAC;gBAExC,uCAAuC;gBACvC,MAAM,WAAW,GAAG,IAAI,WAAW,CACjC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EAC/C,CAAC,CAAC,EAAE,aAAa,CAAC,CACnB,CAAC;gBAEF,yCAAyC;gBACzC,MAAM,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC;gBACvC,MAAM,aAAa,GAAG,IAAI,WAAW,CACnC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAC/C,CAAC,CAAC,EAAE,WAAW,CAAC,CACjB,CAAC;gBAEF,uDAAuD;gBACvD,MAAM,KAAK,GAAwB;oBACjC,SAAS,EAAE,WAAW;oBACtB,cAAc,EAAE,aAAa;oBAC7B,GAAG,WAAW;iBACf,CAAC;gBAEF,gBAAgB;gBAChB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,OAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBAE/C,4CAA4C;gBAC5C,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,IAAoB,CAAC;gBACnD,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAEtE,gCAAgC;gBAChC,MAAM,qBAAqB,GAAG,CAAC,aAAa,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC;gBAE9D,uCAAuC;gBACvC,IAAI,SAAS,GAAG,CAAC,CAAC;gBAClB,IAAI,MAAM,GAAG,CAAC,QAAQ,CAAC;gBAEvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;oBACnC,MAAM,KAAK,GAAG,MAAM,CAAC,qBAAqB,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;oBAClG,IAAI,KAAK,GAAG,MAAM,EAAE,CAAC;wBACnB,MAAM,GAAG,KAAK,CAAC;wBACf,SAAS,GAAG,CAAC,CAAC;oBAChB,CAAC;gBACH,CAAC;gBAED,gBAAgB;gBAChB,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAC1B,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAE5B,wDAAwD;gBACxD,IAAI,SAAS,KAAK,CAAC,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;oBACvC,OAAO,CAAC,GAAG,CAAC,2BAA2B,SAAS,EAAE,CAAC,CAAC;oBACpD,MAAM;gBACR,CAAC;gBAED,kDAAkD;gBAClD,WAAW,GAAG,EAAE,CAAC;gBACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC5B,WAAW,CAAC,mBAAmB,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;oBACtE,WAAW,CAAC,mBAAmB,CAAC,QAAQ,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;gBAC5E,CAAC;gBAED,qBAAqB;gBACrB,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,CAAC;oBAC1B,OAAO,CAAC,GAAG,CAAC,gBAAgB,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC;gBACpD,CAAC;YACH,CAAC;YAED,yCAAyC;YACzC,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAC7C,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACvC,MAAM,eAAe,GAAG,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YAEzE,OAAO,CAAC,GAAG,CAAC,gBAAgB,aAAa,EAAE,CAAC,CAAC;YAC7C,OAAO,CAAC,GAAG,CAAC,gBAAgB,OAAO,OAAO,eAAe,cAAc,CAAC,CAAC;YAEzE,MAAM,OAAO,GAAmB,CAAC;oBAC/B,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,aAAa,CAAC,IAAI,EAAE;iBAC3B,CAAC,CAAC;YAEH,OAAO;gBACL,EAAE,EAAE,cAAc,IAAI,CAAC,GAAG,EAAE,EAAE;gBAC9B,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;gBAC5B,OAAO;gBACP,UAAU,EAAE,UAAU;gBACtB,KAAK,EAAE;oBACL,WAAW,EAAE,QAAQ,CAAC,MAAM;oBAC5B,YAAY,EAAE,SAAS,CAAC,MAAM;iBAC/B;gBACD,QAAQ,EAAE;oBACR,QAAQ,EAAE,YAAY;oBACtB,KAAK,EAAE,0BAA0B;oBACjC,OAAO;oBACP,IAAI,EAAE,CAAC,EAAE,0BAA0B;oBACnC,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB;oBAClD,eAAe,EAAE,UAAU,CAAC,eAAe,CAAC;iBAC7C;aACF,CAAC;QAEJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,aAAa,GAAkB;gBACnC,IAAI,EAAE,oBAAoB;gBAC1B,OAAO,EAAE,0BAA0B,KAAK,EAAE;gBAC1C,QAAQ,EAAE,YAAY;gBACtB,SAAS,EAAE,IAAI;aAChB,CAAC;YACF,MAAM,aAAa,CAAC;QACtB,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,CAAC,MAAM,CAAC,MAAkB;QAC9B,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC5E,CAAC;IAED;;OAEG;IACH,oBAAoB,CAAC,QAAkB;QACrC,MAAM,SAAS,GAAG,CAAC,MAAM,CAAC,CAAC;QAC3B,OAAO,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;IACpD,CAAC;IAED;;OAEG;IACH,YAAY;QACV,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;YAChC,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB;YAClD,WAAW,EAAE,IAAI,CAAC,OAAO,KAAK,IAAI;YAClC,eAAe,EAAE,IAAI,CAAC,QAAQ,KAAK,IAAI;SACxC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,gEAAgE;YAChE,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;QACD,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClB,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACrB,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;QACvB,CAAC;IACH,CAAC;CACF","sourcesContent":["/**\n * ONNX Runtime Local Inference Provider for Phi-4\n *\n * Uses onnxruntime-node for true local CPU/GPU inference\n * Falls back gracefully when native module isn't available (Windows)\n */\n\nlet ort: any = null;\nlet ortAvailable = false;\n\n// Dynamic import for optional onnxruntime-node\ntry {\n ort = await import('onnxruntime-node');\n ortAvailable = true;\n} catch {\n console.warn('[ONNX] onnxruntime-node not available - local inference disabled');\n}\n\nimport * as fs from 'fs';\nimport * as path from 'path';\nimport { get_encoding } from 'tiktoken';\nimport { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';\nimport type {\n LLMProvider,\n ChatParams,\n ChatResponse,\n StreamChunk,\n ProviderError,\n Message,\n ContentBlock\n} from '../types.js';\n\nexport interface ONNXLocalConfig {\n modelPath?: string;\n executionProviders?: string[];\n maxTokens?: number;\n temperature?: number;\n}\n\nexport class ONNXLocalProvider implements LLMProvider {\n name = 'onnx-local';\n type = 'custom' as const;\n supportsStreaming = false; // Streaming requires complex token generation loop\n supportsTools = false;\n supportsMCP = false;\n\n private session: any = null;\n private config: Required<ONNXLocalConfig>;\n private tokenizer: any = null;\n private tiktoken: any = null;\n\n constructor(config: ONNXLocalConfig = {}) {\n this.config = {\n modelPath: config.modelPath || './models/phi-4/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx',\n executionProviders: config.executionProviders || ['cpu'],\n maxTokens: config.maxTokens || 100,\n temperature: config.temperature || 0.7\n };\n }\n\n /**\n * Load optimized tiktoken tokenizer (cl100k_base for Phi-4)\n */\n private async loadTokenizer(): Promise<void> {\n if (this.tiktoken) return;\n\n try {\n // Use cl100k_base encoding (GPT-4, similar to Phi-4)\n this.tiktoken = get_encoding('cl100k_base');\n\n console.log('✅ Tokenizer loaded (tiktoken cl100k_base)');\n } catch (error) {\n console.error('❌ Failed to load tiktoken:', error);\n throw new Error(`Tokenizer loading failed: ${error}`);\n }\n }\n\n /**\n * Encode text using tiktoken (fast BPE)\n */\n private encode(text: string): number[] {\n return Array.from(this.tiktoken.encode(text));\n }\n\n /**\n * Decode tokens using tiktoken\n */\n private decode(ids: number[]): string {\n try {\n const decoded = this.tiktoken.decode(new Uint32Array(ids));\n // tiktoken returns buffer, convert to string\n if (typeof decoded === 'string') {\n return decoded;\n } else if (decoded instanceof Uint8Array || decoded instanceof Buffer) {\n return new TextDecoder().decode(decoded);\n }\n return String(decoded);\n } catch (error) {\n console.warn('Decode error, returning raw IDs:', error);\n return ids.join(',');\n }\n }\n\n /**\n * Initialize ONNX session (with automatic model download)\n */\n private async initializeSession(): Promise<void> {\n if (this.session) return;\n\n if (!ortAvailable || !ort) {\n throw new Error('onnxruntime-node not available - install with: npm install onnxruntime-node');\n }\n\n try {\n // Ensure model is downloaded\n console.log(`🔍 Checking for Phi-4 ONNX model...`);\n\n const modelPath = await ensurePhi4Model((progress) => {\n if (progress.percentage % 10 < 1) { // Log every ~10%\n console.log(` 📥 Downloading: ${ModelDownloader.formatProgress(progress)}`);\n }\n });\n\n // Update config with actual model path\n this.config.modelPath = modelPath;\n\n console.log(`📦 Loading ONNX model: ${this.config.modelPath}`);\n\n this.session = await ort.InferenceSession.create(\n this.config.modelPath,\n {\n executionProviders: this.config.executionProviders as any,\n graphOptimizationLevel: 'all',\n enableCpuMemArena: true,\n enableMemPattern: true\n }\n );\n\n console.log(`✅ ONNX model loaded`);\n console.log(`🔧 Execution providers: ${this.config.executionProviders.join(', ')}`);\n\n // Load tokenizer\n await this.loadTokenizer();\n\n } catch (error) {\n const providerError: ProviderError = {\n name: 'ONNXInitError',\n message: `Failed to initialize ONNX model: ${error}`,\n provider: 'onnx-local',\n retryable: false\n };\n throw providerError;\n }\n }\n\n /**\n * Format messages for Phi-4 chat template\n */\n private formatMessages(messages: Message[]): string {\n let prompt = '';\n\n for (const msg of messages) {\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n if (msg.role === 'system') {\n prompt += `<|system|>\\n${content}<|end|>\\n`;\n } else if (msg.role === 'user') {\n prompt += `<|user|>\\n${content}<|end|>\\n`;\n } else if (msg.role === 'assistant') {\n prompt += `<|assistant|>\\n${content}<|end|>\\n`;\n }\n }\n\n prompt += '<|assistant|>\\n';\n return prompt;\n }\n\n /**\n * Initialize KV cache tensors for all 32 layers\n * Phi-4 architecture: 32 layers, 8 KV heads, 128 head_dim\n */\n private initializeKVCache(batchSize: number, sequenceLength: number) {\n const numLayers = 32;\n const numKVHeads = 8;\n const headDim = 128; // 3072 / 24 = 128\n const kvCache: Record<string, any> = {};\n\n // Get Tensor constructor - use any for flexible access\n const TensorClass = (ort as any).Tensor;\n\n // Initialize empty cache for each layer (key and value)\n for (let i = 0; i < numLayers; i++) {\n // Empty cache: [batch_size, num_kv_heads, 0, head_dim]\n const emptyCache = new Float32Array(0);\n\n kvCache[`past_key_values.${i}.key`] = new TensorClass(\n 'float32',\n emptyCache,\n [batchSize, numKVHeads, 0, headDim]\n );\n\n kvCache[`past_key_values.${i}.value`] = new TensorClass(\n 'float32',\n emptyCache,\n [batchSize, numKVHeads, 0, headDim]\n );\n }\n\n return kvCache;\n }\n\n /**\n * Chat completion using ONNX with KV cache\n */\n async chat(params: ChatParams): Promise<ChatResponse> {\n await this.initializeSession();\n\n const startTime = Date.now();\n const prompt = this.formatMessages(params.messages);\n\n try {\n // Tokenize input using optimized tiktoken\n const inputIds = this.encode(prompt);\n console.log(`📝 Input tokens: ${inputIds.length}`);\n\n // Initialize KV cache (reusable for batch)\n let pastKVCache = this.initializeKVCache(1, 0);\n\n // Track all generated tokens\n const allTokenIds = [...inputIds];\n const outputIds: number[] = [];\n\n // Pre-allocate tensor buffers for performance\n const maxSeqLen = inputIds.length + (params.maxTokens || this.config.maxTokens);\n\n // Autoregressive generation loop\n const maxNewTokens = params.maxTokens || this.config.maxTokens;\n\n for (let step = 0; step < maxNewTokens; step++) {\n // For first step, use all input tokens; for subsequent steps, use only last token\n const currentInputIds = step === 0 ? inputIds : [outputIds[outputIds.length - 1]];\n const currentSeqLen = currentInputIds.length;\n\n // Get Tensor constructor - use any for flexible access\n const TensorClass = (ort as any).Tensor;\n\n // Create input tensor for current step\n const inputTensor = new TensorClass(\n 'int64',\n BigInt64Array.from(currentInputIds.map(BigInt)),\n [1, currentSeqLen]\n );\n\n // Create attention mask for current step\n const totalSeqLen = allTokenIds.length;\n const attentionMask = new TensorClass(\n 'int64',\n BigInt64Array.from(Array(totalSeqLen).fill(1n)),\n [1, totalSeqLen]\n );\n\n // Build feeds with input, attention mask, and KV cache\n const feeds: Record<string, any> = {\n input_ids: inputTensor,\n attention_mask: attentionMask,\n ...pastKVCache\n };\n\n // Run inference\n const results = await this.session!.run(feeds);\n\n // Get logits for next token (last position)\n const logits = results.logits.data as Float32Array;\n const vocabSize = results.logits.dims[results.logits.dims.length - 1];\n\n // Extract logits for last token\n const lastTokenLogitsOffset = (currentSeqLen - 1) * vocabSize;\n\n // Apply temperature and get next token\n let nextToken = 0;\n let maxVal = -Infinity;\n\n for (let i = 0; i < vocabSize; i++) {\n const logit = logits[lastTokenLogitsOffset + i] / (params.temperature || this.config.temperature);\n if (logit > maxVal) {\n maxVal = logit;\n nextToken = i;\n }\n }\n\n // Add to output\n outputIds.push(nextToken);\n allTokenIds.push(nextToken);\n\n // Check for end token (2 is typical EOS for Phi models)\n if (nextToken === 2 || nextToken === 0) {\n console.log(`🛑 Stop token detected: ${nextToken}`);\n break;\n }\n\n // Update KV cache from outputs for next iteration\n pastKVCache = {};\n for (let i = 0; i < 32; i++) {\n pastKVCache[`past_key_values.${i}.key`] = results[`present.${i}.key`];\n pastKVCache[`past_key_values.${i}.value`] = results[`present.${i}.value`];\n }\n\n // Progress indicator\n if ((step + 1) % 10 === 0) {\n console.log(`🔄 Generated ${step + 1} tokens...`);\n }\n }\n\n // Decode output using optimized tiktoken\n const generatedText = this.decode(outputIds);\n const latency = Date.now() - startTime;\n const tokensPerSecond = (outputIds.length / (latency / 1000)).toFixed(1);\n\n console.log(`✅ Generated: ${generatedText}`);\n console.log(`⏱️ Latency: ${latency}ms (${tokensPerSecond} tokens/sec)`);\n\n const content: ContentBlock[] = [{\n type: 'text',\n text: generatedText.trim()\n }];\n\n return {\n id: `onnx-local-${Date.now()}`,\n model: this.config.modelPath,\n content,\n stopReason: 'end_turn',\n usage: {\n inputTokens: inputIds.length,\n outputTokens: outputIds.length\n },\n metadata: {\n provider: 'onnx-local',\n model: 'Phi-4-mini-instruct-onnx',\n latency,\n cost: 0, // Local inference is free\n executionProviders: this.config.executionProviders,\n tokensPerSecond: parseFloat(tokensPerSecond)\n }\n };\n\n } catch (error) {\n const providerError: ProviderError = {\n name: 'ONNXInferenceError',\n message: `ONNX inference failed: ${error}`,\n provider: 'onnx-local',\n retryable: true\n };\n throw providerError;\n }\n }\n\n /**\n * Streaming not implemented (requires complex generation loop)\n */\n async *stream(params: ChatParams): AsyncGenerator<StreamChunk> {\n throw new Error('Streaming not yet implemented for ONNX local inference');\n }\n\n /**\n * Validate capabilities\n */\n validateCapabilities(features: string[]): boolean {\n const supported = ['chat'];\n return features.every(f => supported.includes(f));\n }\n\n /**\n * Get model info\n */\n getModelInfo() {\n return {\n modelPath: this.config.modelPath,\n executionProviders: this.config.executionProviders,\n initialized: this.session !== null,\n tokenizerLoaded: this.tiktoken !== null\n };\n }\n\n /**\n * Cleanup resources\n */\n async dispose(): Promise<void> {\n if (this.session) {\n // ONNX Runtime sessions don't have explicit disposal in Node.js\n this.session = null;\n }\n if (this.tiktoken) {\n this.tiktoken.free();\n this.tiktoken = null;\n }\n }\n}\n"]}
1
+ {"version":3,"file":"onnx-local.js","sourceRoot":"","sources":["../../../src/router/providers/onnx-local.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,GAAG,MAAM,kBAAkB,CAAC;AAGxC,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAkBnF,MAAM,OAAO,iBAAiB;IAC5B,IAAI,GAAG,YAAY,CAAC;IACpB,IAAI,GAAG,QAAiB,CAAC;IACzB,iBAAiB,GAAG,KAAK,CAAC,CAAC,mDAAmD;IAC9E,aAAa,GAAG,KAAK,CAAC;IACtB,WAAW,GAAG,KAAK,CAAC;IAEZ,OAAO,GAAgC,IAAI,CAAC;IAC5C,MAAM,CAA4B;IAClC,SAAS,GAAQ,IAAI,CAAC;IACtB,QAAQ,GAAQ,IAAI,CAAC;IAE7B,YAAY,SAA0B,EAAE;QACtC,IAAI,CAAC,MAAM,GAAG;YACZ,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,4EAA4E;YAC3G,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,IAAI,CAAC,KAAK,CAAC;YACxD,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,GAAG;YAClC,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,GAAG;SACvC,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,aAAa;QACzB,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO;QAE1B,IAAI,CAAC;YACH,qDAAqD;YACrD,IAAI,CAAC,QAAQ,GAAG,YAAY,CAAC,aAAa,CAAC,CAAC;YAE5C,OAAO,CAAC,GAAG,CAAC,2CAA2C,CAAC,CAAC;QAC3D,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,4BAA4B,EAAE,KAAK,CAAC,CAAC;YACnD,MAAM,IAAI,KAAK,CAAC,6BAA6B,KAAK,EAAE,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,IAAY;QACzB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC;IAChD,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,GAAa;QAC1B,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC;YAC3D,6CAA6C;YAC7C,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;gBAChC,OAAO,OAAO,CAAC;YACjB,CAAC;iBAAM,IAAI,OAAO,YAAY,UAAU,IAAI,OAAO,YAAY,MAAM,EAAE,CAAC;gBACtE,OAAO,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAC3C,CAAC;YACD,OAAO,MAAM,CAAC,OAAO,CAAC,CAAC;QACzB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,IAAI,CAAC,kCAAkC,EAAE,KAAK,CAAC,CAAC;YACxD,OAAO,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,iBAAiB;QAC7B,IAAI,IAAI,CAAC,OAAO;YAAE,OAAO;QAEzB,IAAI,CAAC;YACH,6BAA6B;YAC7B,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAC;YAEnD,MAAM,SAAS,GAAG,MAAM,eAAe,CAAC,CAAC,QAAQ,EAAE,EAAE;gBACnD,IAAI,QAAQ,CAAC,UAAU,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,iBAAiB;oBACnD,OAAO,CAAC,GAAG,CAAC,sBAAsB,eAAe,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;gBAChF,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,uCAAuC;YACvC,IAAI,CAAC,MAAM,CAAC,SAAS,GAAG,SAAS,CAAC;YAElC,OAAO,CAAC,GAAG,CAAC,0BAA0B,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;YAE/D,IAAI,CAAC,OAAO,GAAG,MAAM,GAAG,CAAC,gBAAgB,CAAC,MAAM,CAC9C,IAAI,CAAC,MAAM,CAAC,SAAS,EACrB;gBACE,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAyB;gBACzD,sBAAsB,EAAE,KAAK;gBAC7B,iBAAiB,EAAE,IAAI;gBACvB,gBAAgB,EAAE,IAAI;aACvB,CACF,CAAC;YAEF,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;YACnC,OAAO,CAAC,GAAG,CAAC,2BAA2B,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEpF,iBAAiB;YACjB,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC;QAE7B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,aAAa,GAAkB;gBACnC,IAAI,EAAE,eAAe;gBACrB,OAAO,EAAE,oCAAoC,KAAK,EAAE;gBACpD,QAAQ,EAAE,YAAY;gBACtB,SAAS,EAAE,KAAK;aACjB,CAAC;YACF,MAAM,aAAa,CAAC;QACtB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAmB;QACxC,IAAI,MAAM,GAAG,EAAE,CAAC;QAEhB,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;YAC3B,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;gBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;gBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEnE,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBAC1B,MAAM,IAAI,eAAe,OAAO,WAAW,CAAC;YAC9C,CAAC;iBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBAC/B,MAAM,IAAI,aAAa,OAAO,WAAW,CAAC;YAC5C,CAAC;iBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBACpC,MAAM,IAAI,kBAAkB,OAAO,WAAW,CAAC;YACjD,CAAC;QACH,CAAC;QAED,MAAM,IAAI,iBAAiB,CAAC;QAC5B,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;OAGG;IACK,iBAAiB,CAAC,SAAiB,EAAE,cAAsB;QACjE,MAAM,SAAS,GAAG,EAAE,CAAC;QACrB,MAAM,UAAU,GAAG,CAAC,CAAC;QACrB,MAAM,OAAO,GAAG,GAAG,CAAC,CAAC,kBAAkB;QACvC,MAAM,OAAO,GAA+B,EAAE,CAAC;QAE/C,wDAAwD;QACxD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,uDAAuD;YACvD,MAAM,UAAU,GAAG,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC;YAEvC,OAAO,CAAC,mBAAmB,CAAC,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAClD,SAAS,EACT,UAAU,EACV,CAAC,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,CAAC,CACpC,CAAC;YAEF,OAAO,CAAC,mBAAmB,CAAC,QAAQ,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CACpD,SAAS,EACT,UAAU,EACV,CAAC,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,CAAC,CACpC,CAAC;QACJ,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CAAC,MAAkB;QAC3B,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAE/B,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAEpD,IAAI,CAAC;YACH,0CAA0C;YAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,oBAAoB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;YAEnD,2CAA2C;YAC3C,IAAI,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YAE/C,6BAA6B;YAC7B,MAAM,WAAW,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC;YAClC,MAAM,SAAS,GAAa,EAAE,CAAC;YAE/B,8CAA8C;YAC9C,MAAM,SAAS,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAEhF,iCAAiC;YACjC,MAAM,YAAY,GAAG,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;YAE/D,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,YAAY,EAAE,IAAI,EAAE,EAAE,CAAC;gBAC/C,kFAAkF;gBAClF,MAAM,eAAe,GAAG,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;gBAClF,MAAM,aAAa,GAAG,eAAe,CAAC,MAAM,CAAC;gBAE7C,uCAAuC;gBACvC,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,MAAM,CAChC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EAC/C,CAAC,CAAC,EAAE,aAAa,CAAC,CACnB,CAAC;gBAEF,yCAAyC;gBACzC,MAAM,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC;gBACvC,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,MAAM,CAClC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAC/C,CAAC,CAAC,EAAE,WAAW,CAAC,CACjB,CAAC;gBAEF,uDAAuD;gBACvD,MAAM,KAAK,GAA+B;oBACxC,SAAS,EAAE,WAAW;oBACtB,cAAc,EAAE,aAAa;oBAC7B,GAAG,WAAW;iBACf,CAAC;gBAEF,gBAAgB;gBAChB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,OAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBAE/C,4CAA4C;gBAC5C,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,IAAoB,CAAC;gBACnD,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAEtE,gCAAgC;gBAChC,MAAM,qBAAqB,GAAG,CAAC,aAAa,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC;gBAE9D,uCAAuC;gBACvC,IAAI,SAAS,GAAG,CAAC,CAAC;gBAClB,IAAI,MAAM,GAAG,CAAC,QAAQ,CAAC;gBAEvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;oBACnC,MAAM,KAAK,GAAG,MAAM,CAAC,qBAAqB,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;oBAClG,IAAI,KAAK,GAAG,MAAM,EAAE,CAAC;wBACnB,MAAM,GAAG,KAAK,CAAC;wBACf,SAAS,GAAG,CAAC,CAAC;oBAChB,CAAC;gBACH,CAAC;gBAED,gBAAgB;gBAChB,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAC1B,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAE5B,wDAAwD;gBACxD,IAAI,SAAS,KAAK,CAAC,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;oBACvC,OAAO,CAAC,GAAG,CAAC,2BAA2B,SAAS,EAAE,CAAC,CAAC;oBACpD,MAAM;gBACR,CAAC;gBAED,kDAAkD;gBAClD,WAAW,GAAG,EAAE,CAAC;gBACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC5B,WAAW,CAAC,mBAAmB,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;oBACtE,WAAW,CAAC,mBAAmB,CAAC,QAAQ,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;gBAC5E,CAAC;gBAED,qBAAqB;gBACrB,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,CAAC;oBAC1B,OAAO,CAAC,GAAG,CAAC,gBAAgB,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC;gBACpD,CAAC;YACH,CAAC;YAED,yCAAyC;YACzC,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAC7C,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACvC,MAAM,eAAe,GAAG,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YAEzE,OAAO,CAAC,GAAG,CAAC,gBAAgB,aAAa,EAAE,CAAC,CAAC;YAC7C,OAAO,CAAC,GAAG,CAAC,gBAAgB,OAAO,OAAO,eAAe,cAAc,CAAC,CAAC;YAEzE,MAAM,OAAO,GAAmB,CAAC;oBAC/B,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,aAAa,CAAC,IAAI,EAAE;iBAC3B,CAAC,CAAC;YAEH,OAAO;gBACL,EAAE,EAAE,cAAc,IAAI,CAAC,GAAG,EAAE,EAAE;gBAC9B,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;gBAC5B,OAAO;gBACP,UAAU,EAAE,UAAU;gBACtB,KAAK,EAAE;oBACL,WAAW,EAAE,QAAQ,CAAC,MAAM;oBAC5B,YAAY,EAAE,SAAS,CAAC,MAAM;iBAC/B;gBACD,QAAQ,EAAE;oBACR,QAAQ,EAAE,YAAY;oBACtB,KAAK,EAAE,0BAA0B;oBACjC,OAAO;oBACP,IAAI,EAAE,CAAC,EAAE,0BAA0B;oBACnC,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB;oBAClD,eAAe,EAAE,UAAU,CAAC,eAAe,CAAC;iBAC7C;aACF,CAAC;QAEJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,aAAa,GAAkB;gBACnC,IAAI,EAAE,oBAAoB;gBAC1B,OAAO,EAAE,0BAA0B,KAAK,EAAE;gBAC1C,QAAQ,EAAE,YAAY;gBACtB,SAAS,EAAE,IAAI;aAChB,CAAC;YACF,MAAM,aAAa,CAAC;QACtB,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,CAAC,MAAM,CAAC,MAAkB;QAC9B,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC5E,CAAC;IAED;;OAEG;IACH,oBAAoB,CAAC,QAAkB;QACrC,MAAM,SAAS,GAAG,CAAC,MAAM,CAAC,CAAC;QAC3B,OAAO,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;IACpD,CAAC;IAED;;OAEG;IACH,YAAY;QACV,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;YAChC,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB;YAClD,WAAW,EAAE,IAAI,CAAC,OAAO,KAAK,IAAI;YAClC,eAAe,EAAE,IAAI,CAAC,QAAQ,KAAK,IAAI;SACxC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,gEAAgE;YAChE,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;QACD,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClB,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACrB,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;QACvB,CAAC;IACH,CAAC;CACF","sourcesContent":["/**\n * ONNX Runtime Local Inference Provider for Phi-4\n *\n * Uses onnxruntime-node for true local CPU/GPU inference\n */\n\nimport * as ort from 'onnxruntime-node';\nimport * as fs from 'fs';\nimport * as path from 'path';\nimport { get_encoding } from 'tiktoken';\nimport { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';\nimport type {\n LLMProvider,\n ChatParams,\n ChatResponse,\n StreamChunk,\n ProviderError,\n Message,\n ContentBlock\n} from '../types.js';\n\nexport interface ONNXLocalConfig {\n modelPath?: string;\n executionProviders?: string[];\n maxTokens?: number;\n temperature?: number;\n}\n\nexport class ONNXLocalProvider implements LLMProvider {\n name = 'onnx-local';\n type = 'custom' as const;\n supportsStreaming = false; // Streaming requires complex token generation loop\n supportsTools = false;\n supportsMCP = false;\n\n private session: ort.InferenceSession | null = null;\n private config: Required<ONNXLocalConfig>;\n private tokenizer: any = null;\n private tiktoken: any = null;\n\n constructor(config: ONNXLocalConfig = {}) {\n this.config = {\n modelPath: config.modelPath || './models/phi-4/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx',\n executionProviders: config.executionProviders || ['cpu'],\n maxTokens: config.maxTokens || 100,\n temperature: config.temperature || 0.7\n };\n }\n\n /**\n * Load optimized tiktoken tokenizer (cl100k_base for Phi-4)\n */\n private async loadTokenizer(): Promise<void> {\n if (this.tiktoken) return;\n\n try {\n // Use cl100k_base encoding (GPT-4, similar to Phi-4)\n this.tiktoken = get_encoding('cl100k_base');\n\n console.log('✅ Tokenizer loaded (tiktoken cl100k_base)');\n } catch (error) {\n console.error('❌ Failed to load tiktoken:', error);\n throw new Error(`Tokenizer loading failed: ${error}`);\n }\n }\n\n /**\n * Encode text using tiktoken (fast BPE)\n */\n private encode(text: string): number[] {\n return Array.from(this.tiktoken.encode(text));\n }\n\n /**\n * Decode tokens using tiktoken\n */\n private decode(ids: number[]): string {\n try {\n const decoded = this.tiktoken.decode(new Uint32Array(ids));\n // tiktoken returns buffer, convert to string\n if (typeof decoded === 'string') {\n return decoded;\n } else if (decoded instanceof Uint8Array || decoded instanceof Buffer) {\n return new TextDecoder().decode(decoded);\n }\n return String(decoded);\n } catch (error) {\n console.warn('Decode error, returning raw IDs:', error);\n return ids.join(',');\n }\n }\n\n /**\n * Initialize ONNX session (with automatic model download)\n */\n private async initializeSession(): Promise<void> {\n if (this.session) return;\n\n try {\n // Ensure model is downloaded\n console.log(`🔍 Checking for Phi-4 ONNX model...`);\n\n const modelPath = await ensurePhi4Model((progress) => {\n if (progress.percentage % 10 < 1) { // Log every ~10%\n console.log(` 📥 Downloading: ${ModelDownloader.formatProgress(progress)}`);\n }\n });\n\n // Update config with actual model path\n this.config.modelPath = modelPath;\n\n console.log(`📦 Loading ONNX model: ${this.config.modelPath}`);\n\n this.session = await ort.InferenceSession.create(\n this.config.modelPath,\n {\n executionProviders: this.config.executionProviders as any,\n graphOptimizationLevel: 'all',\n enableCpuMemArena: true,\n enableMemPattern: true\n }\n );\n\n console.log(`✅ ONNX model loaded`);\n console.log(`🔧 Execution providers: ${this.config.executionProviders.join(', ')}`);\n\n // Load tokenizer\n await this.loadTokenizer();\n\n } catch (error) {\n const providerError: ProviderError = {\n name: 'ONNXInitError',\n message: `Failed to initialize ONNX model: ${error}`,\n provider: 'onnx-local',\n retryable: false\n };\n throw providerError;\n }\n }\n\n /**\n * Format messages for Phi-4 chat template\n */\n private formatMessages(messages: Message[]): string {\n let prompt = '';\n\n for (const msg of messages) {\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n if (msg.role === 'system') {\n prompt += `<|system|>\\n${content}<|end|>\\n`;\n } else if (msg.role === 'user') {\n prompt += `<|user|>\\n${content}<|end|>\\n`;\n } else if (msg.role === 'assistant') {\n prompt += `<|assistant|>\\n${content}<|end|>\\n`;\n }\n }\n\n prompt += '<|assistant|>\\n';\n return prompt;\n }\n\n /**\n * Initialize KV cache tensors for all 32 layers\n * Phi-4 architecture: 32 layers, 8 KV heads, 128 head_dim\n */\n private initializeKVCache(batchSize: number, sequenceLength: number) {\n const numLayers = 32;\n const numKVHeads = 8;\n const headDim = 128; // 3072 / 24 = 128\n const kvCache: Record<string, ort.Tensor> = {};\n\n // Initialize empty cache for each layer (key and value)\n for (let i = 0; i < numLayers; i++) {\n // Empty cache: [batch_size, num_kv_heads, 0, head_dim]\n const emptyCache = new Float32Array(0);\n\n kvCache[`past_key_values.${i}.key`] = new ort.Tensor(\n 'float32',\n emptyCache,\n [batchSize, numKVHeads, 0, headDim]\n );\n\n kvCache[`past_key_values.${i}.value`] = new ort.Tensor(\n 'float32',\n emptyCache,\n [batchSize, numKVHeads, 0, headDim]\n );\n }\n\n return kvCache;\n }\n\n /**\n * Chat completion using ONNX with KV cache\n */\n async chat(params: ChatParams): Promise<ChatResponse> {\n await this.initializeSession();\n\n const startTime = Date.now();\n const prompt = this.formatMessages(params.messages);\n\n try {\n // Tokenize input using optimized tiktoken\n const inputIds = this.encode(prompt);\n console.log(`📝 Input tokens: ${inputIds.length}`);\n\n // Initialize KV cache (reusable for batch)\n let pastKVCache = this.initializeKVCache(1, 0);\n\n // Track all generated tokens\n const allTokenIds = [...inputIds];\n const outputIds: number[] = [];\n\n // Pre-allocate tensor buffers for performance\n const maxSeqLen = inputIds.length + (params.maxTokens || this.config.maxTokens);\n\n // Autoregressive generation loop\n const maxNewTokens = params.maxTokens || this.config.maxTokens;\n\n for (let step = 0; step < maxNewTokens; step++) {\n // For first step, use all input tokens; for subsequent steps, use only last token\n const currentInputIds = step === 0 ? inputIds : [outputIds[outputIds.length - 1]];\n const currentSeqLen = currentInputIds.length;\n\n // Create input tensor for current step\n const inputTensor = new ort.Tensor(\n 'int64',\n BigInt64Array.from(currentInputIds.map(BigInt)),\n [1, currentSeqLen]\n );\n\n // Create attention mask for current step\n const totalSeqLen = allTokenIds.length;\n const attentionMask = new ort.Tensor(\n 'int64',\n BigInt64Array.from(Array(totalSeqLen).fill(1n)),\n [1, totalSeqLen]\n );\n\n // Build feeds with input, attention mask, and KV cache\n const feeds: Record<string, ort.Tensor> = {\n input_ids: inputTensor,\n attention_mask: attentionMask,\n ...pastKVCache\n };\n\n // Run inference\n const results = await this.session!.run(feeds);\n\n // Get logits for next token (last position)\n const logits = results.logits.data as Float32Array;\n const vocabSize = results.logits.dims[results.logits.dims.length - 1];\n\n // Extract logits for last token\n const lastTokenLogitsOffset = (currentSeqLen - 1) * vocabSize;\n\n // Apply temperature and get next token\n let nextToken = 0;\n let maxVal = -Infinity;\n\n for (let i = 0; i < vocabSize; i++) {\n const logit = logits[lastTokenLogitsOffset + i] / (params.temperature || this.config.temperature);\n if (logit > maxVal) {\n maxVal = logit;\n nextToken = i;\n }\n }\n\n // Add to output\n outputIds.push(nextToken);\n allTokenIds.push(nextToken);\n\n // Check for end token (2 is typical EOS for Phi models)\n if (nextToken === 2 || nextToken === 0) {\n console.log(`🛑 Stop token detected: ${nextToken}`);\n break;\n }\n\n // Update KV cache from outputs for next iteration\n pastKVCache = {};\n for (let i = 0; i < 32; i++) {\n pastKVCache[`past_key_values.${i}.key`] = results[`present.${i}.key`];\n pastKVCache[`past_key_values.${i}.value`] = results[`present.${i}.value`];\n }\n\n // Progress indicator\n if ((step + 1) % 10 === 0) {\n console.log(`🔄 Generated ${step + 1} tokens...`);\n }\n }\n\n // Decode output using optimized tiktoken\n const generatedText = this.decode(outputIds);\n const latency = Date.now() - startTime;\n const tokensPerSecond = (outputIds.length / (latency / 1000)).toFixed(1);\n\n console.log(`✅ Generated: ${generatedText}`);\n console.log(`⏱️ Latency: ${latency}ms (${tokensPerSecond} tokens/sec)`);\n\n const content: ContentBlock[] = [{\n type: 'text',\n text: generatedText.trim()\n }];\n\n return {\n id: `onnx-local-${Date.now()}`,\n model: this.config.modelPath,\n content,\n stopReason: 'end_turn',\n usage: {\n inputTokens: inputIds.length,\n outputTokens: outputIds.length\n },\n metadata: {\n provider: 'onnx-local',\n model: 'Phi-4-mini-instruct-onnx',\n latency,\n cost: 0, // Local inference is free\n executionProviders: this.config.executionProviders,\n tokensPerSecond: parseFloat(tokensPerSecond)\n }\n };\n\n } catch (error) {\n const providerError: ProviderError = {\n name: 'ONNXInferenceError',\n message: `ONNX inference failed: ${error}`,\n provider: 'onnx-local',\n retryable: true\n };\n throw providerError;\n }\n }\n\n /**\n * Streaming not implemented (requires complex generation loop)\n */\n async *stream(params: ChatParams): AsyncGenerator<StreamChunk> {\n throw new Error('Streaming not yet implemented for ONNX local inference');\n }\n\n /**\n * Validate capabilities\n */\n validateCapabilities(features: string[]): boolean {\n const supported = ['chat'];\n return features.every(f => supported.includes(f));\n }\n\n /**\n * Get model info\n */\n getModelInfo() {\n return {\n modelPath: this.config.modelPath,\n executionProviders: this.config.executionProviders,\n initialized: this.session !== null,\n tokenizerLoaded: this.tiktoken !== null\n };\n }\n\n /**\n * Cleanup resources\n */\n async dispose(): Promise<void> {\n if (this.session) {\n // ONNX Runtime sessions don't have explicit disposal in Node.js\n this.session = null;\n }\n if (this.tiktoken) {\n this.tiktoken.free();\n this.tiktoken = null;\n }\n }\n}\n"]}
@@ -1 +1 @@
1
- {"version":3,"file":"embedding-service.js","sourceRoot":"","sources":["../../src/services/embedding-service.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AAmBtC;;GAEG;AACH,MAAM,OAAgB,gBAAiB,SAAQ,YAAY;IAC/C,MAAM,CAAkB;IACxB,KAAK,GAA0B,IAAI,GAAG,EAAE,CAAC;IAEnD,YAAY,MAAuB;QACjC,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,MAAM,GAAG;YACZ,SAAS,EAAE,IAAI;YACf,GAAG,MAAM;SACV,CAAC;IACJ,CAAC;IAKD;;OAEG;IACO,SAAS,CAAC,IAAY;QAC9B,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC;IACtC,CAAC;IAED;;OAEG;IACO,SAAS,CAAC,IAAY,EAAE,SAAmB;QACnD,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,CAAC,SAAU,EAAE,CAAC;YAC9C,qCAAqC;YACrC,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC;YAChD,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAC9B,CAAC;QACD,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;IAClC,CAAC;IAED;;OAEG;IACH,UAAU;QACR,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;IACrB,CAAC;CACF;AAED;;;;;GAKG;AACH,MAAM,OAAO,sBAAuB,SAAQ,gBAAgB;IAClD,MAAM,CAAS;IACf,KAAK,CAAS;IACd,OAAO,GAAG,sCAAsC,CAAC;IAEzD,YAAY,MAA8D;QACxE,KAAK,CAAC,EAAE,GAAG,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,CAAC,CAAC;QACzC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,wBAAwB,CAAC;IACxD,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,IAAY;QACtB,cAAc;QACd,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QACpC,IAAI,MAAM,EAAE,CAAC;YACX,OAAO;gBACL,SAAS,EAAE,MAAM;gBACjB,OAAO,EAAE,CAAC;aACX,CAAC;QACJ,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAEzB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE;gBACzC,MAAM,EAAE,MAAM;gBACd,OAAO,EAAE;oBACP,cAAc,EAAE,kBAAkB;oBAClC,aAAa,EAAE,UAAU,IAAI,CAAC,MAAM,EAAE;iBACvC;gBACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;oBACnB,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,KAAK,EAAE,IAAI;oBACX,UAAU,EAAE,IAAI,CAAC,MAAM,CAAC,UAAU,IAAI,SAAS;iBAChD,CAAC;aACH,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,MAAM,IAAI,KAAK,CAAC,qBAAqB,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;YAC9D,CAAC;YAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnC,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAEzC,WAAW;YACX,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YAEhC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;YAEnC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC;YAEtC,OAAO;gBACL,SAAS;gBACT,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,OAAO;aACR,CAAC;QACJ,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,4BAA4B,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC/D,CAAC;IACH,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAe;QAC9B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAEzB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE;gBACzC,MAAM,EAAE,MAAM;gBACd,OAAO,EAAE;oBACP,cAAc,EAAE,kBAAkB;oBAClC,aAAa,EAAE,UAAU,IAAI,CAAC,MAAM,EAAE;iBACvC;gBACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;oBACnB,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,KAAK,EAAE,KAAK;oBACZ,UAAU,EAAE,IAAI,CAAC,MAAM,CAAC,UAAU,IAAI,SAAS;iBAChD,CAAC;aACH,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,MAAM,IAAI,KAAK,CAAC,qBAAqB,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;YAC9D,CAAC;YAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;YAEnC,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAS,EAAE,KAAa,EAAE,EAAE;gBAChD,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC;gBACjC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,SAAS,CAAC,CAAC;gBAExC,OAAO;oBACL,SAAS;oBACT,KAAK,EAAE;wBACL,YAAY,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,aAAa,GAAG,KAAK,CAAC,MAAM,CAAC;wBACjE,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,YAAY,GAAG,KAAK,CAAC,MAAM,CAAC;qBAChE;oBACD,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC;iBAC5C,CAAC;YACJ,CAAC,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,kCAAkC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QACrE,CAAC;IACH,CAAC;CACF;AAED;;;;;GAKG;AACH,MAAM,OAAO,4BAA6B,SAAQ,gBAAgB;IACxD,QAAQ,GAAQ,IAAI,CAAC;IACrB,SAAS,CAAS;IAE1B,YAAY,MAAyC;QACnD,KAAK,CAAC,EAAE,GAAG,MAAM,EAAE,QAAQ,EAAE,cAAc,EAAE,CAAC,CAAC;QAC/C,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,KAAK,IAAI,yBAAyB,CAAC;IAC7D,CAAC;IAED,KAAK,CAAC,UAAU;QACd,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO;QAE1B,IAAI,CAAC;YACH,qCAAqC;YACrC,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,CAAC;YAE1D,IAAI,CAAC,QAAQ,GAAG,MAAM,QAAQ,CAAC,oBAAoB,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;YACrE,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,EAAE,KAAK,EAAE,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC;QACtD,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,yCAAyC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC5E,CAAC;IACH,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,IAAY;QACtB,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;QAExB,cAAc;QACd,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QACpC,IAAI,MAAM,EAAE,CAAC;YACX,OAAO;gBACL,SAAS,EAAE,MAAM;gBACjB,OAAO,EAAE,CAAC;aACX,CAAC;QACJ,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAEzB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YAE/E,2BAA2B;YAC3B,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAa,CAAC;YAEtD,WAAW;YACX,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YAEhC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;YAEnC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC;YAEtC,OAAO;gBACL,SAAS;gBACT,OAAO;aACR,CAAC;QACJ,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,qCAAqC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QACxE,CAAC;IACH,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAe;QAC9B,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;QAExB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAEzB,IAAI,CAAC;YACH,MAAM,OAAO,GAAsB,EAAE,CAAC;YAEtC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACzB,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;gBAEpC,IAAI,MAAM,EAAE,CAAC;oBACX,OAAO,CAAC,IAAI,CAAC;wBACX,SAAS,EAAE,MAAM;wBACjB,OAAO,EAAE,CAAC;qBACX,CAAC,CAAC;gBACL,CAAC;qBAAM,CAAC;oBACN,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;wBACvC,OAAO,EAAE,MAAM;wBACf,SAAS,EAAE,IAAI;qBAChB,CAAC,CAAC;oBACH,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAa,CAAC;oBAEtD,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;oBAEhC,OAAO,CAAC,IAAI,CAAC;wBACX,SAAS;wBACT,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;qBACzD,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAED,OAAO,OAAO,CAAC;QACjB,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,2CAA2C,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC9E,CAAC;IACH,CAAC;CACF;AAED;;;;;GAKG;AACH,MAAM,OAAO,oBAAqB,SAAQ,gBAAgB;IACxD,YAAY,MAAiC;QAC3C,KAAK,CAAC;YACJ,QAAQ,EAAE,MAAM;YAChB,UAAU,EAAE,GAAG;YACf,GAAG,MAAM;SACV,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,IAAY;QACtB,cAAc;QACd,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QACpC,IAAI,MAAM,EAAE,CAAC;YACX,OAAO;gBACL,SAAS,EAAE,MAAM;gBACjB,OAAO,EAAE,CAAC;aACX,CAAC;QACJ,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAEzB,gCAAgC;QAChC,MAAM,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;QAE3C,WAAW;QACX,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;QAEhC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;QAEnC,OAAO;YACL,SAAS;YACT,OAAO;SACR,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAe;QAC9B,OAAO,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC1D,CAAC;IAEO,aAAa,CAAC,IAAY;QAChC,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,UAAU,IAAI,GAAG,CAAC;QACjD,MAAM,SAAS,GAAG,IAAI,KAAK,CAAC,UAAU,CAAC,CAAC;QAExC,sBAAsB;QACtB,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACrC,IAAI,GAAG,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YAC/C,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC;QACrB,CAAC;QAED,mCAAmC;QACnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;YACpC,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,GAAG,UAAU,CAAC;YACnC,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC;YACjC,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACnC,CAAC;QAED,2BAA2B;QAC3B,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QACrE,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC;IACtC,CAAC;CACF;AAED;;GAEG;AACH,MAAM,UAAU,sBAAsB,CAAC,MAAuB;IAC5D,QAAQ,MAAM,CAAC,QAAQ,EAAE,CAAC;QACxB,KAAK,QAAQ;YACX,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;gBACnB,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;YAC7C,CAAC;YACD,OAAO,IAAI,sBAAsB,CAAC,MAAa,CAAC,CAAC;QAEnD,KAAK,cAAc;YACjB,OAAO,IAAI,4BAA4B,CAAC,MAAM,CAAC,CAAC;QAElD,KAAK,MAAM;YACT,OAAO,IAAI,oBAAoB,CAAC,MAAM,CAAC,CAAC;QAE1C;YACE,OAAO,CAAC,IAAI,CAAC,qBAAqB,MAAM,CAAC,QAAQ,cAAc,CAAC,CAAC;YACjE,OAAO,IAAI,oBAAoB,CAAC,MAAM,CAAC,CAAC;IAC5C,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,IAAY,EACZ,MAAiC;IAEjC,MAAM,OAAO,GAAG,sBAAsB,CAAC;QACrC,QAAQ,EAAE,MAAM;QAChB,GAAG,MAAM;KACS,CAAC,CAAC;IAEtB,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACzC,OAAO,MAAM,CAAC,SAAS,CAAC;AAC1B,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,WAAmB,aAAa;IAKxE,MAAM,OAAO,GAAQ,EAAE,CAAC;IAExB,YAAY;IACZ,MAAM,WAAW,GAAG,IAAI,oBAAoB,CAAC,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC,CAAC;IAClE,MAAM,UAAU,GAAG,MAAM,WAAW,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IACrD,OAAO,CAAC,IAAI,GAAG;QACb,OAAO,EAAE,UAAU,CAAC,OAAO;QAC3B,UAAU,EAAE,UAAU,CAAC,SAAS,CAAC,MAAM;KACxC,CAAC;IAEF,mCAAmC;IACnC,IAAI,CAAC;QACH,MAAM,mBAAmB,GAAG,IAAI,4BAA4B,CAAC;YAC3D,KAAK,EAAE,yBAAyB;SACjC,CAAC,CAAC;QACH,MAAM,kBAAkB,GAAG,MAAM,mBAAmB,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QACrE,OAAO,CAAC,YAAY,GAAG;YACrB,OAAO,EAAE,kBAAkB,CAAC,OAAO;YACnC,UAAU,EAAE,kBAAkB,CAAC,SAAS,CAAC,MAAM;SAChD,CAAC;IACJ,CAAC;IAAC,OAAO,KAAU,EAAE,CAAC;QACpB,OAAO,CAAC,YAAY,GAAG;YACrB,KAAK,EAAE,KAAK,CAAC,OAAO;SACrB,CAAC;IACJ,CAAC;IAED,qCAAqC;IACrC,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC;IAC1C,IAAI,MAAM,EAAE,CAAC;QACX,IAAI,CAAC;YACH,MAAM,aAAa,GAAG,IAAI,sBAAsB,CAAC;gBAC/C,MAAM;gBACN,KAAK,EAAE,wBAAwB;aAChC,CAAC,CAAC;YACH,MAAM,YAAY,GAAG,MAAM,aAAa,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;YACzD,OAAO,CAAC,MAAM,GAAG;gBACf,OAAO,EAAE,YAAY,CAAC,OAAO;gBAC7B,UAAU,EAAE,YAAY,CAAC,SAAS,CAAC,MAAM;aAC1C,CAAC;QACJ,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,OAAO,CAAC,MAAM,GAAG;gBACf,KAAK,EAAE,KAAK,CAAC,OAAO;aACrB,CAAC;QACJ,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC","sourcesContent":["/**\n * Production Embedding Service\n *\n * Replaces mock embeddings with real implementations:\n * 1. OpenAI Embeddings API (text-embedding-3-small/large)\n * 2. Local Transformers.js (runs in Node.js/browser)\n * 3. Custom ONNX models\n * 4. Fallback hash-based embeddings (for development)\n */\n\nimport { EventEmitter } from 'events';\n\nexport interface EmbeddingConfig {\n provider: 'openai' | 'transformers' | 'onnx' | 'mock';\n model?: string;\n dimensions?: number;\n apiKey?: string;\n cacheSize?: number;\n}\n\nexport interface EmbeddingResult {\n embedding: number[];\n usage?: {\n promptTokens: number;\n totalTokens: number;\n };\n latency: number;\n}\n\n/**\n * Base embedding service interface\n */\nexport abstract class EmbeddingService extends EventEmitter {\n protected config: EmbeddingConfig;\n protected cache: Map<string, number[]> = new Map();\n\n constructor(config: EmbeddingConfig) {\n super();\n this.config = {\n cacheSize: 1000,\n ...config\n };\n }\n\n abstract embed(text: string): Promise<EmbeddingResult>;\n abstract embedBatch(texts: string[]): Promise<EmbeddingResult[]>;\n\n /**\n * Get cached embedding if available\n */\n protected getCached(text: string): number[] | null {\n return this.cache.get(text) || null;\n }\n\n /**\n * Cache embedding with LRU eviction\n */\n protected setCached(text: string, embedding: number[]): void {\n if (this.cache.size >= this.config.cacheSize!) {\n // Remove oldest entry (first in map)\n const firstKey = this.cache.keys().next().value;\n this.cache.delete(firstKey);\n }\n this.cache.set(text, embedding);\n }\n\n /**\n * Clear cache\n */\n clearCache(): void {\n this.cache.clear();\n }\n}\n\n/**\n * OpenAI Embeddings Service\n *\n * Uses OpenAI's text-embedding-3-small (1536D) or text-embedding-3-large (3072D)\n * https://platform.openai.com/docs/guides/embeddings\n */\nexport class OpenAIEmbeddingService extends EmbeddingService {\n private apiKey: string;\n private model: string;\n private baseURL = 'https://api.openai.com/v1/embeddings';\n\n constructor(config: Omit<EmbeddingConfig, 'provider'> & { apiKey: string }) {\n super({ ...config, provider: 'openai' });\n this.apiKey = config.apiKey;\n this.model = config.model || 'text-embedding-3-small';\n }\n\n async embed(text: string): Promise<EmbeddingResult> {\n // Check cache\n const cached = this.getCached(text);\n if (cached) {\n return {\n embedding: cached,\n latency: 0\n };\n }\n\n const start = Date.now();\n\n try {\n const response = await fetch(this.baseURL, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`\n },\n body: JSON.stringify({\n model: this.model,\n input: text,\n dimensions: this.config.dimensions || undefined\n })\n });\n\n if (!response.ok) {\n throw new Error(`OpenAI API error: ${response.statusText}`);\n }\n\n const data = await response.json();\n const embedding = data.data[0].embedding;\n\n // Cache it\n this.setCached(text, embedding);\n\n const latency = Date.now() - start;\n\n this.emit('embed', { text, latency });\n\n return {\n embedding,\n usage: data.usage,\n latency\n };\n } catch (error: any) {\n throw new Error(`OpenAI embedding failed: ${error.message}`);\n }\n }\n\n async embedBatch(texts: string[]): Promise<EmbeddingResult[]> {\n const start = Date.now();\n\n try {\n const response = await fetch(this.baseURL, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`\n },\n body: JSON.stringify({\n model: this.model,\n input: texts,\n dimensions: this.config.dimensions || undefined\n })\n });\n\n if (!response.ok) {\n throw new Error(`OpenAI API error: ${response.statusText}`);\n }\n\n const data = await response.json();\n const latency = Date.now() - start;\n\n return data.data.map((item: any, index: number) => {\n const embedding = item.embedding;\n this.setCached(texts[index], embedding);\n\n return {\n embedding,\n usage: {\n promptTokens: Math.floor(data.usage.prompt_tokens / texts.length),\n totalTokens: Math.floor(data.usage.total_tokens / texts.length)\n },\n latency: Math.floor(latency / texts.length)\n };\n });\n } catch (error: any) {\n throw new Error(`OpenAI batch embedding failed: ${error.message}`);\n }\n }\n}\n\n/**\n * Transformers.js Local Embedding Service\n *\n * Runs locally without API calls using ONNX runtime\n * https://huggingface.co/docs/transformers.js\n */\nexport class TransformersEmbeddingService extends EmbeddingService {\n private pipeline: any = null;\n private modelName: string;\n\n constructor(config: Omit<EmbeddingConfig, 'provider'>) {\n super({ ...config, provider: 'transformers' });\n this.modelName = config.model || 'Xenova/all-MiniLM-L6-v2';\n }\n\n async initialize(): Promise<void> {\n if (this.pipeline) return;\n\n try {\n // Dynamically import transformers.js\n const { pipeline } = await import('@xenova/transformers');\n\n this.pipeline = await pipeline('feature-extraction', this.modelName);\n this.emit('initialized', { model: this.modelName });\n } catch (error: any) {\n throw new Error(`Failed to initialize transformers.js: ${error.message}`);\n }\n }\n\n async embed(text: string): Promise<EmbeddingResult> {\n await this.initialize();\n\n // Check cache\n const cached = this.getCached(text);\n if (cached) {\n return {\n embedding: cached,\n latency: 0\n };\n }\n\n const start = Date.now();\n\n try {\n const output = await this.pipeline(text, { pooling: 'mean', normalize: true });\n\n // Convert to regular array\n const embedding = Array.from(output.data) as number[];\n\n // Cache it\n this.setCached(text, embedding);\n\n const latency = Date.now() - start;\n\n this.emit('embed', { text, latency });\n\n return {\n embedding,\n latency\n };\n } catch (error: any) {\n throw new Error(`Transformers.js embedding failed: ${error.message}`);\n }\n }\n\n async embedBatch(texts: string[]): Promise<EmbeddingResult[]> {\n await this.initialize();\n\n const start = Date.now();\n\n try {\n const results: EmbeddingResult[] = [];\n\n for (const text of texts) {\n const cached = this.getCached(text);\n\n if (cached) {\n results.push({\n embedding: cached,\n latency: 0\n });\n } else {\n const output = await this.pipeline(text, {\n pooling: 'mean',\n normalize: true\n });\n const embedding = Array.from(output.data) as number[];\n\n this.setCached(text, embedding);\n\n results.push({\n embedding,\n latency: Math.floor((Date.now() - start) / texts.length)\n });\n }\n }\n\n return results;\n } catch (error: any) {\n throw new Error(`Transformers.js batch embedding failed: ${error.message}`);\n }\n }\n}\n\n/**\n * Mock Embedding Service (for development/testing)\n *\n * Generates deterministic hash-based embeddings\n * Fast but not semantically meaningful\n */\nexport class MockEmbeddingService extends EmbeddingService {\n constructor(config?: Partial<EmbeddingConfig>) {\n super({\n provider: 'mock',\n dimensions: 384,\n ...config\n });\n }\n\n async embed(text: string): Promise<EmbeddingResult> {\n // Check cache\n const cached = this.getCached(text);\n if (cached) {\n return {\n embedding: cached,\n latency: 0\n };\n }\n\n const start = Date.now();\n\n // Generate hash-based embedding\n const embedding = this.hashEmbedding(text);\n\n // Cache it\n this.setCached(text, embedding);\n\n const latency = Date.now() - start;\n\n return {\n embedding,\n latency\n };\n }\n\n async embedBatch(texts: string[]): Promise<EmbeddingResult[]> {\n return Promise.all(texts.map(text => this.embed(text)));\n }\n\n private hashEmbedding(text: string): number[] {\n const dimensions = this.config.dimensions || 384;\n const embedding = new Array(dimensions);\n\n // Seed with text hash\n let hash = 0;\n for (let i = 0; i < text.length; i++) {\n hash = (hash << 5) - hash + text.charCodeAt(i);\n hash = hash & hash;\n }\n\n // Generate pseudo-random embedding\n for (let i = 0; i < dimensions; i++) {\n const seed = hash + i * 2654435761;\n const x = Math.sin(seed) * 10000;\n embedding[i] = x - Math.floor(x);\n }\n\n // Normalize to unit vector\n const norm = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));\n return embedding.map(v => v / norm);\n }\n}\n\n/**\n * Factory function to create appropriate embedding service\n */\nexport function createEmbeddingService(config: EmbeddingConfig): EmbeddingService {\n switch (config.provider) {\n case 'openai':\n if (!config.apiKey) {\n throw new Error('OpenAI API key required');\n }\n return new OpenAIEmbeddingService(config as any);\n\n case 'transformers':\n return new TransformersEmbeddingService(config);\n\n case 'mock':\n return new MockEmbeddingService(config);\n\n default:\n console.warn(`Unknown provider: ${config.provider}, using mock`);\n return new MockEmbeddingService(config);\n }\n}\n\n/**\n * Convenience function for quick embeddings\n */\nexport async function getEmbedding(\n text: string,\n config?: Partial<EmbeddingConfig>\n): Promise<number[]> {\n const service = createEmbeddingService({\n provider: 'mock',\n ...config\n } as EmbeddingConfig);\n\n const result = await service.embed(text);\n return result.embedding;\n}\n\n/**\n * Benchmark different embedding providers\n */\nexport async function benchmarkEmbeddings(testText: string = 'Hello world'): Promise<{\n mock: { latency: number; dimensions: number };\n transformers?: { latency: number; dimensions: number; error?: string };\n openai?: { latency: number; dimensions: number; error?: string };\n}> {\n const results: any = {};\n\n // Test mock\n const mockService = new MockEmbeddingService({ dimensions: 384 });\n const mockResult = await mockService.embed(testText);\n results.mock = {\n latency: mockResult.latency,\n dimensions: mockResult.embedding.length\n };\n\n // Test transformers (if available)\n try {\n const transformersService = new TransformersEmbeddingService({\n model: 'Xenova/all-MiniLM-L6-v2'\n });\n const transformersResult = await transformersService.embed(testText);\n results.transformers = {\n latency: transformersResult.latency,\n dimensions: transformersResult.embedding.length\n };\n } catch (error: any) {\n results.transformers = {\n error: error.message\n };\n }\n\n // Test OpenAI (if API key available)\n const apiKey = process.env.OPENAI_API_KEY;\n if (apiKey) {\n try {\n const openaiService = new OpenAIEmbeddingService({\n apiKey,\n model: 'text-embedding-3-small'\n });\n const openaiResult = await openaiService.embed(testText);\n results.openai = {\n latency: openaiResult.latency,\n dimensions: openaiResult.embedding.length\n };\n } catch (error: any) {\n results.openai = {\n error: error.message\n };\n }\n }\n\n return results;\n}\n"]}
1
+ {"version":3,"file":"embedding-service.js","sourceRoot":"","sources":["../../src/services/embedding-service.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AAmBtC;;GAEG;AACH,MAAM,OAAgB,gBAAiB,SAAQ,YAAY;IAC/C,MAAM,CAAkB;IACxB,KAAK,GAA0B,IAAI,GAAG,EAAE,CAAC;IAEnD,YAAY,MAAuB;QACjC,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,MAAM,GAAG;YACZ,SAAS,EAAE,IAAI;YACf,GAAG,MAAM;SACV,CAAC;IACJ,CAAC;IAKD;;OAEG;IACO,SAAS,CAAC,IAAY;QAC9B,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC;IACtC,CAAC;IAED;;OAEG;IACO,SAAS,CAAC,IAAY,EAAE,SAAmB;QACnD,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,CAAC,SAAU,EAAE,CAAC;YAC9C,qCAAqC;YACrC,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC;YAChD,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAC9B,CAAC;QACD,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;IAClC,CAAC;IAED;;OAEG;IACH,UAAU;QACR,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;IACrB,CAAC;CACF;AAED;;;;;GAKG;AACH,MAAM,OAAO,sBAAuB,SAAQ,gBAAgB;IAClD,MAAM,CAAS;IACf,KAAK,CAAS;IACd,OAAO,GAAG,sCAAsC,CAAC;IAEzD,YAAY,MAA8D;QACxE,KAAK,CAAC,EAAE,GAAG,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,CAAC,CAAC;QACzC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,wBAAwB,CAAC;IACxD,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,IAAY;QACtB,cAAc;QACd,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QACpC,IAAI,MAAM,EAAE,CAAC;YACX,OAAO;gBACL,SAAS,EAAE,MAAM;gBACjB,OAAO,EAAE,CAAC;aACX,CAAC;QACJ,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAEzB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE;gBACzC,MAAM,EAAE,MAAM;gBACd,OAAO,EAAE;oBACP,cAAc,EAAE,kBAAkB;oBAClC,aAAa,EAAE,UAAU,IAAI,CAAC,MAAM,EAAE;iBACvC;gBACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;oBACnB,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,KAAK,EAAE,IAAI;oBACX,UAAU,EAAE,IAAI,CAAC,MAAM,CAAC,UAAU,IAAI,SAAS;iBAChD,CAAC;aACH,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,MAAM,IAAI,KAAK,CAAC,qBAAqB,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;YAC9D,CAAC;YAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnC,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAEzC,WAAW;YACX,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YAEhC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;YAEnC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC;YAEtC,OAAO;gBACL,SAAS;gBACT,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,OAAO;aACR,CAAC;QACJ,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,4BAA4B,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC/D,CAAC;IACH,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAe;QAC9B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAEzB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE;gBACzC,MAAM,EAAE,MAAM;gBACd,OAAO,EAAE;oBACP,cAAc,EAAE,kBAAkB;oBAClC,aAAa,EAAE,UAAU,IAAI,CAAC,MAAM,EAAE;iBACvC;gBACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;oBACnB,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,KAAK,EAAE,KAAK;oBACZ,UAAU,EAAE,IAAI,CAAC,MAAM,CAAC,UAAU,IAAI,SAAS;iBAChD,CAAC;aACH,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,MAAM,IAAI,KAAK,CAAC,qBAAqB,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;YAC9D,CAAC;YAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;YAEnC,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAS,EAAE,KAAa,EAAE,EAAE;gBAChD,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC;gBACjC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,SAAS,CAAC,CAAC;gBAExC,OAAO;oBACL,SAAS;oBACT,KAAK,EAAE;wBACL,YAAY,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,aAAa,GAAG,KAAK,CAAC,MAAM,CAAC;wBACjE,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,YAAY,GAAG,KAAK,CAAC,MAAM,CAAC;qBAChE;oBACD,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC;iBAC5C,CAAC;YACJ,CAAC,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,kCAAkC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QACrE,CAAC;IACH,CAAC;CACF;AAED;;;;;GAKG;AACH,MAAM,OAAO,4BAA6B,SAAQ,gBAAgB;IACxD,QAAQ,GAAQ,IAAI,CAAC;IACrB,SAAS,CAAS;IAE1B,YAAY,MAAyC;QACnD,KAAK,CAAC,EAAE,GAAG,MAAM,EAAE,QAAQ,EAAE,cAAc,EAAE,CAAC,CAAC;QAC/C,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,KAAK,IAAI,yBAAyB,CAAC;IAC7D,CAAC;IAED,KAAK,CAAC,UAAU;QACd,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO;QAE1B,IAAI,CAAC;YACH,qCAAqC;YACrC,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,CAAC;YAE1D,IAAI,CAAC,QAAQ,GAAG,MAAM,QAAQ,CAAC,oBAAoB,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;YACrE,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,EAAE,KAAK,EAAE,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC;QACtD,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,yCAAyC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC5E,CAAC;IACH,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,IAAY;QACtB,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;QAExB,cAAc;QACd,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QACpC,IAAI,MAAM,EAAE,CAAC;YACX,OAAO;gBACL,SAAS,EAAE,MAAM;gBACjB,OAAO,EAAE,CAAC;aACX,CAAC;QACJ,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAEzB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YAE/E,2BAA2B;YAC3B,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YAE1C,WAAW;YACX,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YAEhC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;YAEnC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC;YAEtC,OAAO;gBACL,SAAS;gBACT,OAAO;aACR,CAAC;QACJ,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,qCAAqC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QACxE,CAAC;IACH,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAe;QAC9B,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;QAExB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAEzB,IAAI,CAAC;YACH,MAAM,OAAO,GAAsB,EAAE,CAAC;YAEtC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACzB,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;gBAEpC,IAAI,MAAM,EAAE,CAAC;oBACX,OAAO,CAAC,IAAI,CAAC;wBACX,SAAS,EAAE,MAAM;wBACjB,OAAO,EAAE,CAAC;qBACX,CAAC,CAAC;gBACL,CAAC;qBAAM,CAAC;oBACN,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;wBACvC,OAAO,EAAE,MAAM;wBACf,SAAS,EAAE,IAAI;qBAChB,CAAC,CAAC;oBACH,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;oBAE1C,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;oBAEhC,OAAO,CAAC,IAAI,CAAC;wBACX,SAAS;wBACT,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;qBACzD,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAED,OAAO,OAAO,CAAC;QACjB,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,2CAA2C,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC9E,CAAC;IACH,CAAC;CACF;AAED;;;;;GAKG;AACH,MAAM,OAAO,oBAAqB,SAAQ,gBAAgB;IACxD,YAAY,MAAiC;QAC3C,KAAK,CAAC;YACJ,QAAQ,EAAE,MAAM;YAChB,UAAU,EAAE,GAAG;YACf,GAAG,MAAM;SACV,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,IAAY;QACtB,cAAc;QACd,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QACpC,IAAI,MAAM,EAAE,CAAC;YACX,OAAO;gBACL,SAAS,EAAE,MAAM;gBACjB,OAAO,EAAE,CAAC;aACX,CAAC;QACJ,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAEzB,gCAAgC;QAChC,MAAM,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;QAE3C,WAAW;QACX,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;QAEhC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;QAEnC,OAAO;YACL,SAAS;YACT,OAAO;SACR,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAe;QAC9B,OAAO,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC1D,CAAC;IAEO,aAAa,CAAC,IAAY;QAChC,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,UAAU,IAAI,GAAG,CAAC;QACjD,MAAM,SAAS,GAAG,IAAI,KAAK,CAAC,UAAU,CAAC,CAAC;QAExC,sBAAsB;QACtB,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACrC,IAAI,GAAG,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YAC/C,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC;QACrB,CAAC;QAED,mCAAmC;QACnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;YACpC,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,GAAG,UAAU,CAAC;YACnC,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC;YACjC,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACnC,CAAC;QAED,2BAA2B;QAC3B,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QACrE,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC;IACtC,CAAC;CACF;AAED;;GAEG;AACH,MAAM,UAAU,sBAAsB,CAAC,MAAuB;IAC5D,QAAQ,MAAM,CAAC,QAAQ,EAAE,CAAC;QACxB,KAAK,QAAQ;YACX,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;gBACnB,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;YAC7C,CAAC;YACD,OAAO,IAAI,sBAAsB,CAAC,MAAa,CAAC,CAAC;QAEnD,KAAK,cAAc;YACjB,OAAO,IAAI,4BAA4B,CAAC,MAAM,CAAC,CAAC;QAElD,KAAK,MAAM;YACT,OAAO,IAAI,oBAAoB,CAAC,MAAM,CAAC,CAAC;QAE1C;YACE,OAAO,CAAC,IAAI,CAAC,qBAAqB,MAAM,CAAC,QAAQ,cAAc,CAAC,CAAC;YACjE,OAAO,IAAI,oBAAoB,CAAC,MAAM,CAAC,CAAC;IAC5C,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,IAAY,EACZ,MAAiC;IAEjC,MAAM,OAAO,GAAG,sBAAsB,CAAC;QACrC,QAAQ,EAAE,MAAM;QAChB,GAAG,MAAM;KACS,CAAC,CAAC;IAEtB,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACzC,OAAO,MAAM,CAAC,SAAS,CAAC;AAC1B,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,WAAmB,aAAa;IAKxE,MAAM,OAAO,GAAQ,EAAE,CAAC;IAExB,YAAY;IACZ,MAAM,WAAW,GAAG,IAAI,oBAAoB,CAAC,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC,CAAC;IAClE,MAAM,UAAU,GAAG,MAAM,WAAW,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IACrD,OAAO,CAAC,IAAI,GAAG;QACb,OAAO,EAAE,UAAU,CAAC,OAAO;QAC3B,UAAU,EAAE,UAAU,CAAC,SAAS,CAAC,MAAM;KACxC,CAAC;IAEF,mCAAmC;IACnC,IAAI,CAAC;QACH,MAAM,mBAAmB,GAAG,IAAI,4BAA4B,CAAC;YAC3D,KAAK,EAAE,yBAAyB;SACjC,CAAC,CAAC;QACH,MAAM,kBAAkB,GAAG,MAAM,mBAAmB,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QACrE,OAAO,CAAC,YAAY,GAAG;YACrB,OAAO,EAAE,kBAAkB,CAAC,OAAO;YACnC,UAAU,EAAE,kBAAkB,CAAC,SAAS,CAAC,MAAM;SAChD,CAAC;IACJ,CAAC;IAAC,OAAO,KAAU,EAAE,CAAC;QACpB,OAAO,CAAC,YAAY,GAAG;YACrB,KAAK,EAAE,KAAK,CAAC,OAAO;SACrB,CAAC;IACJ,CAAC;IAED,qCAAqC;IACrC,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC;IAC1C,IAAI,MAAM,EAAE,CAAC;QACX,IAAI,CAAC;YACH,MAAM,aAAa,GAAG,IAAI,sBAAsB,CAAC;gBAC/C,MAAM;gBACN,KAAK,EAAE,wBAAwB;aAChC,CAAC,CAAC;YACH,MAAM,YAAY,GAAG,MAAM,aAAa,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;YACzD,OAAO,CAAC,MAAM,GAAG;gBACf,OAAO,EAAE,YAAY,CAAC,OAAO;gBAC7B,UAAU,EAAE,YAAY,CAAC,SAAS,CAAC,MAAM;aAC1C,CAAC;QACJ,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,OAAO,CAAC,MAAM,GAAG;gBACf,KAAK,EAAE,KAAK,CAAC,OAAO;aACrB,CAAC;QACJ,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC","sourcesContent":["/**\n * Production Embedding Service\n *\n * Replaces mock embeddings with real implementations:\n * 1. OpenAI Embeddings API (text-embedding-3-small/large)\n * 2. Local Transformers.js (runs in Node.js/browser)\n * 3. Custom ONNX models\n * 4. Fallback hash-based embeddings (for development)\n */\n\nimport { EventEmitter } from 'events';\n\nexport interface EmbeddingConfig {\n provider: 'openai' | 'transformers' | 'onnx' | 'mock';\n model?: string;\n dimensions?: number;\n apiKey?: string;\n cacheSize?: number;\n}\n\nexport interface EmbeddingResult {\n embedding: number[];\n usage?: {\n promptTokens: number;\n totalTokens: number;\n };\n latency: number;\n}\n\n/**\n * Base embedding service interface\n */\nexport abstract class EmbeddingService extends EventEmitter {\n protected config: EmbeddingConfig;\n protected cache: Map<string, number[]> = new Map();\n\n constructor(config: EmbeddingConfig) {\n super();\n this.config = {\n cacheSize: 1000,\n ...config\n };\n }\n\n abstract embed(text: string): Promise<EmbeddingResult>;\n abstract embedBatch(texts: string[]): Promise<EmbeddingResult[]>;\n\n /**\n * Get cached embedding if available\n */\n protected getCached(text: string): number[] | null {\n return this.cache.get(text) || null;\n }\n\n /**\n * Cache embedding with LRU eviction\n */\n protected setCached(text: string, embedding: number[]): void {\n if (this.cache.size >= this.config.cacheSize!) {\n // Remove oldest entry (first in map)\n const firstKey = this.cache.keys().next().value;\n this.cache.delete(firstKey);\n }\n this.cache.set(text, embedding);\n }\n\n /**\n * Clear cache\n */\n clearCache(): void {\n this.cache.clear();\n }\n}\n\n/**\n * OpenAI Embeddings Service\n *\n * Uses OpenAI's text-embedding-3-small (1536D) or text-embedding-3-large (3072D)\n * https://platform.openai.com/docs/guides/embeddings\n */\nexport class OpenAIEmbeddingService extends EmbeddingService {\n private apiKey: string;\n private model: string;\n private baseURL = 'https://api.openai.com/v1/embeddings';\n\n constructor(config: Omit<EmbeddingConfig, 'provider'> & { apiKey: string }) {\n super({ ...config, provider: 'openai' });\n this.apiKey = config.apiKey;\n this.model = config.model || 'text-embedding-3-small';\n }\n\n async embed(text: string): Promise<EmbeddingResult> {\n // Check cache\n const cached = this.getCached(text);\n if (cached) {\n return {\n embedding: cached,\n latency: 0\n };\n }\n\n const start = Date.now();\n\n try {\n const response = await fetch(this.baseURL, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`\n },\n body: JSON.stringify({\n model: this.model,\n input: text,\n dimensions: this.config.dimensions || undefined\n })\n });\n\n if (!response.ok) {\n throw new Error(`OpenAI API error: ${response.statusText}`);\n }\n\n const data = await response.json();\n const embedding = data.data[0].embedding;\n\n // Cache it\n this.setCached(text, embedding);\n\n const latency = Date.now() - start;\n\n this.emit('embed', { text, latency });\n\n return {\n embedding,\n usage: data.usage,\n latency\n };\n } catch (error: any) {\n throw new Error(`OpenAI embedding failed: ${error.message}`);\n }\n }\n\n async embedBatch(texts: string[]): Promise<EmbeddingResult[]> {\n const start = Date.now();\n\n try {\n const response = await fetch(this.baseURL, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`\n },\n body: JSON.stringify({\n model: this.model,\n input: texts,\n dimensions: this.config.dimensions || undefined\n })\n });\n\n if (!response.ok) {\n throw new Error(`OpenAI API error: ${response.statusText}`);\n }\n\n const data = await response.json();\n const latency = Date.now() - start;\n\n return data.data.map((item: any, index: number) => {\n const embedding = item.embedding;\n this.setCached(texts[index], embedding);\n\n return {\n embedding,\n usage: {\n promptTokens: Math.floor(data.usage.prompt_tokens / texts.length),\n totalTokens: Math.floor(data.usage.total_tokens / texts.length)\n },\n latency: Math.floor(latency / texts.length)\n };\n });\n } catch (error: any) {\n throw new Error(`OpenAI batch embedding failed: ${error.message}`);\n }\n }\n}\n\n/**\n * Transformers.js Local Embedding Service\n *\n * Runs locally without API calls using ONNX runtime\n * https://huggingface.co/docs/transformers.js\n */\nexport class TransformersEmbeddingService extends EmbeddingService {\n private pipeline: any = null;\n private modelName: string;\n\n constructor(config: Omit<EmbeddingConfig, 'provider'>) {\n super({ ...config, provider: 'transformers' });\n this.modelName = config.model || 'Xenova/all-MiniLM-L6-v2';\n }\n\n async initialize(): Promise<void> {\n if (this.pipeline) return;\n\n try {\n // Dynamically import transformers.js\n const { pipeline } = await import('@xenova/transformers');\n\n this.pipeline = await pipeline('feature-extraction', this.modelName);\n this.emit('initialized', { model: this.modelName });\n } catch (error: any) {\n throw new Error(`Failed to initialize transformers.js: ${error.message}`);\n }\n }\n\n async embed(text: string): Promise<EmbeddingResult> {\n await this.initialize();\n\n // Check cache\n const cached = this.getCached(text);\n if (cached) {\n return {\n embedding: cached,\n latency: 0\n };\n }\n\n const start = Date.now();\n\n try {\n const output = await this.pipeline(text, { pooling: 'mean', normalize: true });\n\n // Convert to regular array\n const embedding = Array.from(output.data);\n\n // Cache it\n this.setCached(text, embedding);\n\n const latency = Date.now() - start;\n\n this.emit('embed', { text, latency });\n\n return {\n embedding,\n latency\n };\n } catch (error: any) {\n throw new Error(`Transformers.js embedding failed: ${error.message}`);\n }\n }\n\n async embedBatch(texts: string[]): Promise<EmbeddingResult[]> {\n await this.initialize();\n\n const start = Date.now();\n\n try {\n const results: EmbeddingResult[] = [];\n\n for (const text of texts) {\n const cached = this.getCached(text);\n\n if (cached) {\n results.push({\n embedding: cached,\n latency: 0\n });\n } else {\n const output = await this.pipeline(text, {\n pooling: 'mean',\n normalize: true\n });\n const embedding = Array.from(output.data);\n\n this.setCached(text, embedding);\n\n results.push({\n embedding,\n latency: Math.floor((Date.now() - start) / texts.length)\n });\n }\n }\n\n return results;\n } catch (error: any) {\n throw new Error(`Transformers.js batch embedding failed: ${error.message}`);\n }\n }\n}\n\n/**\n * Mock Embedding Service (for development/testing)\n *\n * Generates deterministic hash-based embeddings\n * Fast but not semantically meaningful\n */\nexport class MockEmbeddingService extends EmbeddingService {\n constructor(config?: Partial<EmbeddingConfig>) {\n super({\n provider: 'mock',\n dimensions: 384,\n ...config\n });\n }\n\n async embed(text: string): Promise<EmbeddingResult> {\n // Check cache\n const cached = this.getCached(text);\n if (cached) {\n return {\n embedding: cached,\n latency: 0\n };\n }\n\n const start = Date.now();\n\n // Generate hash-based embedding\n const embedding = this.hashEmbedding(text);\n\n // Cache it\n this.setCached(text, embedding);\n\n const latency = Date.now() - start;\n\n return {\n embedding,\n latency\n };\n }\n\n async embedBatch(texts: string[]): Promise<EmbeddingResult[]> {\n return Promise.all(texts.map(text => this.embed(text)));\n }\n\n private hashEmbedding(text: string): number[] {\n const dimensions = this.config.dimensions || 384;\n const embedding = new Array(dimensions);\n\n // Seed with text hash\n let hash = 0;\n for (let i = 0; i < text.length; i++) {\n hash = (hash << 5) - hash + text.charCodeAt(i);\n hash = hash & hash;\n }\n\n // Generate pseudo-random embedding\n for (let i = 0; i < dimensions; i++) {\n const seed = hash + i * 2654435761;\n const x = Math.sin(seed) * 10000;\n embedding[i] = x - Math.floor(x);\n }\n\n // Normalize to unit vector\n const norm = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));\n return embedding.map(v => v / norm);\n }\n}\n\n/**\n * Factory function to create appropriate embedding service\n */\nexport function createEmbeddingService(config: EmbeddingConfig): EmbeddingService {\n switch (config.provider) {\n case 'openai':\n if (!config.apiKey) {\n throw new Error('OpenAI API key required');\n }\n return new OpenAIEmbeddingService(config as any);\n\n case 'transformers':\n return new TransformersEmbeddingService(config);\n\n case 'mock':\n return new MockEmbeddingService(config);\n\n default:\n console.warn(`Unknown provider: ${config.provider}, using mock`);\n return new MockEmbeddingService(config);\n }\n}\n\n/**\n * Convenience function for quick embeddings\n */\nexport async function getEmbedding(\n text: string,\n config?: Partial<EmbeddingConfig>\n): Promise<number[]> {\n const service = createEmbeddingService({\n provider: 'mock',\n ...config\n } as EmbeddingConfig);\n\n const result = await service.embed(text);\n return result.embedding;\n}\n\n/**\n * Benchmark different embedding providers\n */\nexport async function benchmarkEmbeddings(testText: string = 'Hello world'): Promise<{\n mock: { latency: number; dimensions: number };\n transformers?: { latency: number; dimensions: number; error?: string };\n openai?: { latency: number; dimensions: number; error?: string };\n}> {\n const results: any = {};\n\n // Test mock\n const mockService = new MockEmbeddingService({ dimensions: 384 });\n const mockResult = await mockService.embed(testText);\n results.mock = {\n latency: mockResult.latency,\n dimensions: mockResult.embedding.length\n };\n\n // Test transformers (if available)\n try {\n const transformersService = new TransformersEmbeddingService({\n model: 'Xenova/all-MiniLM-L6-v2'\n });\n const transformersResult = await transformersService.embed(testText);\n results.transformers = {\n latency: transformersResult.latency,\n dimensions: transformersResult.embedding.length\n };\n } catch (error: any) {\n results.transformers = {\n error: error.message\n };\n }\n\n // Test OpenAI (if API key available)\n const apiKey = process.env.OPENAI_API_KEY;\n if (apiKey) {\n try {\n const openaiService = new OpenAIEmbeddingService({\n apiKey,\n model: 'text-embedding-3-small'\n });\n const openaiResult = await openaiService.embed(testText);\n results.openai = {\n latency: openaiResult.latency,\n dimensions: openaiResult.embedding.length\n };\n } catch (error: any) {\n results.openai = {\n error: error.message\n };\n }\n }\n\n return results;\n}\n"]}
@@ -10,7 +10,6 @@ export interface AgentConfig {
10
10
  purpose: 'simple' | 'complex' | 'diverse';
11
11
  hiddenDim?: number;
12
12
  microLoraRank?: number;
13
- microLoraLr?: number;
14
13
  baseLoraRank?: number;
15
14
  patternClusters?: number;
16
15
  trajectoryCapacity?: number;
@@ -1 +1 @@
1
- {"version":3,"file":"sona-agent-training.d.ts","sourceRoot":"","sources":["../../src/services/sona-agent-training.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AAGtC,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,QAAQ,GAAG,SAAS,GAAG,SAAS,CAAC;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAC/B;AAED,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,KAAK,CAAC;QACb,IAAI,EAAE,MAAM,CAAC;QACb,IAAI,EAAE,UAAU,GAAG,OAAO,GAAG,WAAW,GAAG,QAAQ,CAAC;QACpD,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;KACtB,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,IAAI,CAAC;IACnB,MAAM,EAAE,GAAG,CAAC;CACb;AAED;;GAEG;AACH,qBAAa,YAAa,SAAQ,YAAY;IAC5C,OAAO,CAAC,MAAM,CAMX;IAEH,OAAO,CAAC,UAAU,CAAuB;gBAE7B,UAAU,GAAE,OAAO,CAAC,WAAW,CAAM;IAYjD;;OAEG;IACH,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,GAAE,OAAO,CAAC,WAAW,CAAM,GAAG,GAAG;IAwCjE;;OAEG;IACG,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,eAAe,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IA0D5E;;OAEG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,GAAG;IAI3B;;OAEG;IACH,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,UAAU,GAAG,IAAI;IAiB9C;;OAEG;IACH,UAAU,IAAI,UAAU,EAAE;IAM1B;;OAEG;IACG,YAAY,CAAC,SAAS,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,EAAE,EAAE,CAAC,GAAE,MAAU,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;IAS9F;;OAEG;IACG,eAAe,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;CAQjF;AAED;;GAEG;AACH,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAM;IACpB,OAAO,CAAC,OAAO,CAAa;gBAEhB,MAAM,GAAE,OAAO,CAAC,WAAW,CAAM;IAc7C;;OAEG;IACG,aAAa,CAAC,KAAK,EAAE,YAAY,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAoC3D;;OAEG;IACG,KAAK,CAAC,SAAS,EAAE,MAAM,EAAE,CAAC,GAAE,MAAU,GAAG,OAAO,CAAC;QACrD,OAAO,EAAE,MAAM,EAAE,CAAC;QAClB,gBAAgB,EAAE,GAAG,EAAE,CAAC;KACzB,CAAC;IAYF;;OAEG;IACH,QAAQ;IAOR;;OAEG;IACH,OAAO,CAAC,SAAS;IAyCjB;;OAEG;IACH,OAAO,CAAC,YAAY;IAmBpB;;OAEG;IACH,OAAO,CAAC,aAAa;IAYrB,OAAO,CAAC,QAAQ;CASjB;AAED;;GAEG;AACH,eAAO,MAAM,cAAc;IACzB;;OAEG;yBACgB,WAAW;IAS9B;;OAEG;mBACU,WAAW;IASxB;;OAEG;uBACc,WAAW;IAS5B;;OAEG;oBACW,WAAW;IAUzB;;OAEG;uBACc,WAAW;IAU5B;;OAEG;2BACoB,MAAM,KAAG,WAAW;CAQ5C,CAAC"}
1
+ {"version":3,"file":"sona-agent-training.d.ts","sourceRoot":"","sources":["../../src/services/sona-agent-training.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AAGtC,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,QAAQ,GAAG,SAAS,GAAG,SAAS,CAAC;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAC/B;AAED,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,KAAK,CAAC;QACb,IAAI,EAAE,MAAM,CAAC;QACb,IAAI,EAAE,UAAU,GAAG,OAAO,GAAG,WAAW,GAAG,QAAQ,CAAC;QACpD,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;KACtB,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,IAAI,CAAC;IACnB,MAAM,EAAE,GAAG,CAAC;CACb;AAED;;GAEG;AACH,qBAAa,YAAa,SAAQ,YAAY;IAC5C,OAAO,CAAC,MAAM,CAMX;IAEH,OAAO,CAAC,UAAU,CAAuB;gBAE7B,UAAU,GAAE,OAAO,CAAC,WAAW,CAAM;IAYjD;;OAEG;IACH,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,GAAE,OAAO,CAAC,WAAW,CAAM,GAAG,GAAG;IAwCjE;;OAEG;IACG,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,eAAe,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IA0D5E;;OAEG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,GAAG;IAI3B;;OAEG;IACH,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,UAAU,GAAG,IAAI;IAiB9C;;OAEG;IACH,UAAU,IAAI,UAAU,EAAE;IAM1B;;OAEG;IACG,YAAY,CAAC,SAAS,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,EAAE,EAAE,CAAC,GAAE,MAAU,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;IAS9F;;OAEG;IACG,eAAe,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;CAQjF;AAED;;GAEG;AACH,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAM;IACpB,OAAO,CAAC,OAAO,CAAa;gBAEhB,MAAM,GAAE,OAAO,CAAC,WAAW,CAAM;IAc7C;;OAEG;IACG,aAAa,CAAC,KAAK,EAAE,YAAY,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAoC3D;;OAEG;IACG,KAAK,CAAC,SAAS,EAAE,MAAM,EAAE,CAAC,GAAE,MAAU,GAAG,OAAO,CAAC;QACrD,OAAO,EAAE,MAAM,EAAE,CAAC;QAClB,gBAAgB,EAAE,GAAG,EAAE,CAAC;KACzB,CAAC;IAYF;;OAEG;IACH,QAAQ;IAOR;;OAEG;IACH,OAAO,CAAC,SAAS;IAyCjB;;OAEG;IACH,OAAO,CAAC,YAAY;IAmBpB;;OAEG;IACH,OAAO,CAAC,aAAa;IAYrB,OAAO,CAAC,QAAQ;CASjB;AAED;;GAEG;AACH,eAAO,MAAM,cAAc;IACzB;;OAEG;yBACgB,WAAW;IAS9B;;OAEG;mBACU,WAAW;IASxB;;OAEG;uBACc,WAAW;IAS5B;;OAEG;oBACW,WAAW;IAUzB;;OAEG;uBACc,WAAW;IAU5B;;OAEG;2BACoB,MAAM,KAAG,WAAW;CAQ5C,CAAC"}