agent-orcha 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. package/README.md +86 -28
  2. package/dist/lib/agents/agent-executor.d.ts.map +1 -1
  3. package/dist/lib/agents/agent-executor.js +23 -7
  4. package/dist/lib/agents/agent-executor.js.map +1 -1
  5. package/dist/lib/agents/react-loop.d.ts.map +1 -1
  6. package/dist/lib/agents/react-loop.js +27 -0
  7. package/dist/lib/agents/react-loop.js.map +1 -1
  8. package/dist/lib/functions/simple-function-wrapper.js +3 -3
  9. package/dist/lib/functions/simple-function-wrapper.js.map +1 -1
  10. package/dist/lib/knowledge/knowledge-store.d.ts +1 -1
  11. package/dist/lib/knowledge/knowledge-store.d.ts.map +1 -1
  12. package/dist/lib/knowledge/knowledge-store.js +25 -4
  13. package/dist/lib/knowledge/knowledge-store.js.map +1 -1
  14. package/dist/lib/knowledge/loaders/file-loaders.d.ts +0 -1
  15. package/dist/lib/knowledge/loaders/file-loaders.d.ts.map +1 -1
  16. package/dist/lib/knowledge/loaders/file-loaders.js +7 -15
  17. package/dist/lib/knowledge/loaders/file-loaders.js.map +1 -1
  18. package/dist/lib/knowledge/sqlite-store.d.ts.map +1 -1
  19. package/dist/lib/knowledge/sqlite-store.js +19 -10
  20. package/dist/lib/knowledge/sqlite-store.js.map +1 -1
  21. package/dist/lib/knowledge/types.d.ts +13 -13
  22. package/dist/lib/llm/index.d.ts +1 -1
  23. package/dist/lib/llm/index.d.ts.map +1 -1
  24. package/dist/lib/llm/index.js +1 -1
  25. package/dist/lib/llm/index.js.map +1 -1
  26. package/dist/lib/llm/llm-config.d.ts +51 -8
  27. package/dist/lib/llm/llm-config.d.ts.map +1 -1
  28. package/dist/lib/llm/llm-config.js +161 -17
  29. package/dist/lib/llm/llm-config.js.map +1 -1
  30. package/dist/lib/llm/llm-factory.d.ts +1 -2
  31. package/dist/lib/llm/llm-factory.d.ts.map +1 -1
  32. package/dist/lib/llm/llm-factory.js +41 -8
  33. package/dist/lib/llm/llm-factory.js.map +1 -1
  34. package/dist/lib/llm/providers/openai-chat-model.d.ts +10 -0
  35. package/dist/lib/llm/providers/openai-chat-model.d.ts.map +1 -1
  36. package/dist/lib/llm/providers/openai-chat-model.js +37 -5
  37. package/dist/lib/llm/providers/openai-chat-model.js.map +1 -1
  38. package/dist/lib/llm/providers/openai-embeddings.d.ts.map +1 -1
  39. package/dist/lib/llm/providers/openai-embeddings.js +41 -10
  40. package/dist/lib/llm/providers/openai-embeddings.js.map +1 -1
  41. package/dist/lib/local-llm/binary-manager.d.ts +66 -0
  42. package/dist/lib/local-llm/binary-manager.d.ts.map +1 -0
  43. package/dist/lib/local-llm/binary-manager.js +441 -0
  44. package/dist/lib/local-llm/binary-manager.js.map +1 -0
  45. package/dist/lib/local-llm/engine-interface.d.ts +47 -0
  46. package/dist/lib/local-llm/engine-interface.d.ts.map +1 -0
  47. package/dist/lib/local-llm/engine-interface.js +2 -0
  48. package/dist/lib/local-llm/engine-interface.js.map +1 -0
  49. package/dist/lib/local-llm/engine-registry.d.ts +20 -0
  50. package/dist/lib/local-llm/engine-registry.d.ts.map +1 -0
  51. package/dist/lib/local-llm/engine-registry.js +56 -0
  52. package/dist/lib/local-llm/engine-registry.js.map +1 -0
  53. package/dist/lib/local-llm/engines/llama-cpp-engine.d.ts +31 -0
  54. package/dist/lib/local-llm/engines/llama-cpp-engine.d.ts.map +1 -0
  55. package/dist/lib/local-llm/engines/llama-cpp-engine.js +164 -0
  56. package/dist/lib/local-llm/engines/llama-cpp-engine.js.map +1 -0
  57. package/dist/lib/local-llm/engines/mlx-serve-engine.d.ts +31 -0
  58. package/dist/lib/local-llm/engines/mlx-serve-engine.d.ts.map +1 -0
  59. package/dist/lib/local-llm/engines/mlx-serve-engine.js +161 -0
  60. package/dist/lib/local-llm/engines/mlx-serve-engine.js.map +1 -0
  61. package/dist/lib/local-llm/gguf-reader.d.ts +20 -0
  62. package/dist/lib/local-llm/gguf-reader.d.ts.map +1 -0
  63. package/dist/lib/local-llm/gguf-reader.js +190 -0
  64. package/dist/lib/local-llm/gguf-reader.js.map +1 -0
  65. package/dist/lib/local-llm/index.d.ts +9 -0
  66. package/dist/lib/local-llm/index.d.ts.map +1 -0
  67. package/dist/lib/local-llm/index.js +6 -0
  68. package/dist/lib/local-llm/index.js.map +1 -0
  69. package/dist/lib/local-llm/llama-server-process.d.ts +42 -0
  70. package/dist/lib/local-llm/llama-server-process.d.ts.map +1 -0
  71. package/dist/lib/local-llm/llama-server-process.js +237 -0
  72. package/dist/lib/local-llm/llama-server-process.js.map +1 -0
  73. package/dist/lib/local-llm/mlx-binary-manager.d.ts +33 -0
  74. package/dist/lib/local-llm/mlx-binary-manager.d.ts.map +1 -0
  75. package/dist/lib/local-llm/mlx-binary-manager.js +211 -0
  76. package/dist/lib/local-llm/mlx-binary-manager.js.map +1 -0
  77. package/dist/lib/local-llm/mlx-server-process.d.ts +26 -0
  78. package/dist/lib/local-llm/mlx-server-process.d.ts.map +1 -0
  79. package/dist/lib/local-llm/mlx-server-process.js +210 -0
  80. package/dist/lib/local-llm/mlx-server-process.js.map +1 -0
  81. package/dist/lib/local-llm/model-manager.d.ts +33 -0
  82. package/dist/lib/local-llm/model-manager.d.ts.map +1 -0
  83. package/dist/lib/local-llm/model-manager.js +591 -0
  84. package/dist/lib/local-llm/model-manager.js.map +1 -0
  85. package/dist/lib/local-llm/types.d.ts +51 -0
  86. package/dist/lib/local-llm/types.d.ts.map +1 -0
  87. package/dist/lib/local-llm/types.js +2 -0
  88. package/dist/lib/local-llm/types.js.map +1 -0
  89. package/dist/lib/logger.d.ts +2 -0
  90. package/dist/lib/logger.d.ts.map +1 -1
  91. package/dist/lib/logger.js +68 -5
  92. package/dist/lib/logger.js.map +1 -1
  93. package/dist/lib/orchestrator.d.ts +9 -0
  94. package/dist/lib/orchestrator.d.ts.map +1 -1
  95. package/dist/lib/orchestrator.js +151 -3
  96. package/dist/lib/orchestrator.js.map +1 -1
  97. package/dist/lib/sandbox/cdp-client.d.ts +2 -1
  98. package/dist/lib/sandbox/cdp-client.d.ts.map +1 -1
  99. package/dist/lib/sandbox/cdp-client.js +33 -7
  100. package/dist/lib/sandbox/cdp-client.js.map +1 -1
  101. package/dist/lib/sandbox/index.d.ts +1 -0
  102. package/dist/lib/sandbox/index.d.ts.map +1 -1
  103. package/dist/lib/sandbox/index.js +1 -0
  104. package/dist/lib/sandbox/index.js.map +1 -1
  105. package/dist/lib/sandbox/page-readiness.d.ts.map +1 -1
  106. package/dist/lib/sandbox/page-readiness.js +33 -0
  107. package/dist/lib/sandbox/page-readiness.js.map +1 -1
  108. package/dist/lib/sandbox/sandbox-browser.d.ts.map +1 -1
  109. package/dist/lib/sandbox/sandbox-browser.js +14 -1
  110. package/dist/lib/sandbox/sandbox-browser.js.map +1 -1
  111. package/dist/lib/sandbox/sandbox-container.d.ts +39 -0
  112. package/dist/lib/sandbox/sandbox-container.d.ts.map +1 -0
  113. package/dist/lib/sandbox/sandbox-container.js +176 -0
  114. package/dist/lib/sandbox/sandbox-container.js.map +1 -0
  115. package/dist/lib/sandbox/sandbox-file.d.ts.map +1 -1
  116. package/dist/lib/sandbox/sandbox-file.js +5 -4
  117. package/dist/lib/sandbox/sandbox-file.js.map +1 -1
  118. package/dist/lib/sandbox/sandbox-shell.d.ts +2 -1
  119. package/dist/lib/sandbox/sandbox-shell.d.ts.map +1 -1
  120. package/dist/lib/sandbox/sandbox-shell.js +42 -24
  121. package/dist/lib/sandbox/sandbox-shell.js.map +1 -1
  122. package/dist/lib/sandbox/sandbox-web.d.ts.map +1 -1
  123. package/dist/lib/sandbox/sandbox-web.js +27 -2
  124. package/dist/lib/sandbox/sandbox-web.js.map +1 -1
  125. package/dist/lib/sandbox/vision-browser.d.ts.map +1 -1
  126. package/dist/lib/sandbox/vision-browser.js +9 -0
  127. package/dist/lib/sandbox/vision-browser.js.map +1 -1
  128. package/dist/lib/sea/app-window.d.ts +7 -0
  129. package/dist/lib/sea/app-window.d.ts.map +1 -0
  130. package/dist/lib/sea/app-window.js +95 -0
  131. package/dist/lib/sea/app-window.js.map +1 -0
  132. package/dist/lib/sea/bootstrap.d.ts +18 -0
  133. package/dist/lib/sea/bootstrap.d.ts.map +1 -0
  134. package/dist/lib/sea/bootstrap.js +103 -0
  135. package/dist/lib/sea/bootstrap.js.map +1 -0
  136. package/dist/lib/sea/sqlite-vec-shim.d.ts +3 -0
  137. package/dist/lib/sea/sqlite-vec-shim.d.ts.map +1 -0
  138. package/dist/lib/sea/sqlite-vec-shim.js +10 -0
  139. package/dist/lib/sea/sqlite-vec-shim.js.map +1 -0
  140. package/dist/lib/tools/built-in/knowledge-entity-lookup.tool.d.ts +1 -2
  141. package/dist/lib/tools/built-in/knowledge-entity-lookup.tool.d.ts.map +1 -1
  142. package/dist/lib/tools/built-in/knowledge-entity-lookup.tool.js +7 -13
  143. package/dist/lib/tools/built-in/knowledge-entity-lookup.tool.js.map +1 -1
  144. package/dist/lib/tools/built-in/knowledge-graph-schema.tool.d.ts.map +1 -1
  145. package/dist/lib/tools/built-in/knowledge-graph-schema.tool.js +2 -4
  146. package/dist/lib/tools/built-in/knowledge-graph-schema.tool.js.map +1 -1
  147. package/dist/lib/tools/built-in/knowledge-search.tool.js +4 -4
  148. package/dist/lib/tools/built-in/knowledge-search.tool.js.map +1 -1
  149. package/dist/lib/tools/built-in/knowledge-sql.tool.d.ts.map +1 -1
  150. package/dist/lib/tools/built-in/knowledge-sql.tool.js +70 -37
  151. package/dist/lib/tools/built-in/knowledge-sql.tool.js.map +1 -1
  152. package/dist/lib/tools/built-in/knowledge-tools-factory.js +2 -2
  153. package/dist/lib/tools/built-in/knowledge-tools-factory.js.map +1 -1
  154. package/dist/lib/tools/built-in/knowledge-traverse.tool.d.ts +1 -2
  155. package/dist/lib/tools/built-in/knowledge-traverse.tool.d.ts.map +1 -1
  156. package/dist/lib/tools/built-in/knowledge-traverse.tool.js +5 -11
  157. package/dist/lib/tools/built-in/knowledge-traverse.tool.js.map +1 -1
  158. package/dist/lib/tools/workspace/workspace-tools.d.ts.map +1 -1
  159. package/dist/lib/tools/workspace/workspace-tools.js +5 -4
  160. package/dist/lib/tools/workspace/workspace-tools.js.map +1 -1
  161. package/dist/lib/types/tool-factory.d.ts.map +1 -1
  162. package/dist/lib/types/tool-factory.js +9 -2
  163. package/dist/lib/types/tool-factory.js.map +1 -1
  164. package/dist/lib/utils/document-extract.d.ts +10 -0
  165. package/dist/lib/utils/document-extract.d.ts.map +1 -0
  166. package/dist/lib/utils/document-extract.js +149 -0
  167. package/dist/lib/utils/document-extract.js.map +1 -0
  168. package/dist/lib/workflows/react-workflow-executor.d.ts.map +1 -1
  169. package/dist/lib/workflows/react-workflow-executor.js +20 -14
  170. package/dist/lib/workflows/react-workflow-executor.js.map +1 -1
  171. package/dist/lib/workflows/types.d.ts +71 -45
  172. package/dist/lib/workflows/types.d.ts.map +1 -1
  173. package/dist/lib/workflows/types.js +10 -0
  174. package/dist/lib/workflows/types.js.map +1 -1
  175. package/dist/public/assets/logo.png +0 -0
  176. package/dist/public/chat.html +3 -78
  177. package/dist/public/index.html +3 -330
  178. package/dist/public/src/components/AgentComposer.js +132 -132
  179. package/dist/public/src/components/AgentsView.js +1231 -350
  180. package/dist/public/src/components/AppRoot.js +101 -39
  181. package/dist/public/src/components/GraphView.js +11 -13
  182. package/dist/public/src/components/IdeView.js +133 -98
  183. package/dist/public/src/components/KnowledgeView.js +94 -130
  184. package/dist/public/src/components/LlmView.js +15 -19
  185. package/dist/public/src/components/LocalLlmView.js +2440 -0
  186. package/dist/public/src/components/LogViewer.js +155 -0
  187. package/dist/public/src/components/McpView.js +41 -49
  188. package/dist/public/src/components/MonitorView.js +79 -126
  189. package/dist/public/src/components/NavBar.js +16 -26
  190. package/dist/public/src/components/StandaloneChat.js +136 -150
  191. package/dist/public/src/services/ApiService.js +196 -2
  192. package/dist/public/src/services/SessionStore.js +6 -3
  193. package/dist/public/src/services/StreamManager.js +183 -0
  194. package/dist/public/src/store.js +1 -1
  195. package/dist/public/src/utils/card.js +21 -0
  196. package/dist/public/src/utils/markdown.js +1 -7
  197. package/dist/public/styles.css +2777 -0
  198. package/dist/src/cli/commands/init.d.ts.map +1 -1
  199. package/dist/src/cli/commands/init.js +7 -1
  200. package/dist/src/cli/commands/init.js.map +1 -1
  201. package/dist/src/cli/commands/start.d.ts.map +1 -1
  202. package/dist/src/cli/commands/start.js +28 -5
  203. package/dist/src/cli/commands/start.js.map +1 -1
  204. package/dist/src/cli/index.js +13 -2
  205. package/dist/src/cli/index.js.map +1 -1
  206. package/dist/src/index.js +7 -1
  207. package/dist/src/index.js.map +1 -1
  208. package/dist/src/routes/agents.route.d.ts.map +1 -1
  209. package/dist/src/routes/agents.route.js +2 -0
  210. package/dist/src/routes/agents.route.js.map +1 -1
  211. package/dist/src/routes/chat.route.d.ts.map +1 -1
  212. package/dist/src/routes/chat.route.js +3 -2
  213. package/dist/src/routes/chat.route.js.map +1 -1
  214. package/dist/src/routes/llm.route.d.ts.map +1 -1
  215. package/dist/src/routes/llm.route.js +227 -7
  216. package/dist/src/routes/llm.route.js.map +1 -1
  217. package/dist/src/routes/local-llm.route.d.ts +3 -0
  218. package/dist/src/routes/local-llm.route.d.ts.map +1 -0
  219. package/dist/src/routes/local-llm.route.js +688 -0
  220. package/dist/src/routes/local-llm.route.js.map +1 -0
  221. package/dist/src/routes/logs.route.d.ts +3 -0
  222. package/dist/src/routes/logs.route.d.ts.map +1 -0
  223. package/dist/src/routes/logs.route.js +24 -0
  224. package/dist/src/routes/logs.route.js.map +1 -0
  225. package/dist/src/routes/vnc.route.d.ts +10 -1
  226. package/dist/src/routes/vnc.route.d.ts.map +1 -1
  227. package/dist/src/routes/vnc.route.js +37 -12
  228. package/dist/src/routes/vnc.route.js.map +1 -1
  229. package/dist/src/routes/workflows.route.d.ts.map +1 -1
  230. package/dist/src/routes/workflows.route.js +24 -0
  231. package/dist/src/routes/workflows.route.js.map +1 -1
  232. package/dist/src/server.d.ts.map +1 -1
  233. package/dist/src/server.js +24 -2
  234. package/dist/src/server.js.map +1 -1
  235. package/dist/templates/agents/actor.agent.yaml +34 -0
  236. package/dist/templates/agents/architect.agent.yaml +0 -1
  237. package/dist/templates/agents/chatbot.agent.yaml +0 -1
  238. package/dist/templates/agents/corporate.agent.yaml +0 -1
  239. package/dist/templates/agents/functions.agent.yaml +29 -0
  240. package/dist/templates/agents/investment-analyst.agent.yaml +0 -1
  241. package/dist/templates/agents/music-librarian.agent.yaml +3 -27
  242. package/dist/templates/agents/network-security.agent.yaml +0 -1
  243. package/dist/templates/agents/transport-security.agent.yaml +0 -1
  244. package/dist/templates/agents/web-engineer.agent.yaml +3 -4
  245. package/dist/templates/agents/web-pilot.agent.yaml +0 -1
  246. package/dist/templates/knowledge/patient-records.knowledge.yaml +20 -0
  247. package/dist/templates/knowledge/pdf-patients/PDF_Deid_Deidentification_0.pdf +0 -0
  248. package/dist/templates/knowledge/pdf-patients/PDF_Deid_Deidentification_1.pdf +0 -0
  249. package/dist/templates/knowledge/pdf-patients/PDF_Deid_Deidentification_10.pdf +0 -0
  250. package/dist/templates/knowledge/pdf-patients/PDF_Deid_Deidentification_11.pdf +0 -0
  251. package/dist/templates/knowledge/web-docs.knowledge.yaml +1 -1
  252. package/dist/templates/llm.json +73 -10
  253. package/dist/templates/skills/orcha-builder/SKILL.md +56 -3
  254. package/dist/templates/workflows/example.workflow.yaml +27 -35
  255. package/dist/templates/workflows/react-example.workflow.yaml +14 -19
  256. package/dist/templates/workflows/team-chat.workflow.yaml +47 -0
  257. package/package.json +14 -6
  258. package/dist/public/src/components/SkillsView.js +0 -137
  259. package/dist/public/src/components/WorkflowsView.js +0 -568
@@ -0,0 +1,164 @@
1
+ import * as path from 'path';
2
+ import { LlamaServerProcess } from "../llama-server-process.js";
3
+ import { killOrphanedServers } from "../llama-server-process.js";
4
+ import { ModelManager } from "../model-manager.js";
5
+ import { readGGUFModelInfo, calculateOptimalContextSize, kvCacheBytesPerToken } from "../gguf-reader.js";
6
+ import { detectGpu, getBinaryVersion, isSystemBinary, updateBinary, checkForUpdate } from "../binary-manager.js";
7
+ import { logger } from "../../logger.js";
8
+ export class LlamaCppEngine {
9
+ engineName = 'llama-cpp';
10
+ _baseDir = '';
11
+ chatServer = null;
12
+ embeddingServer = null;
13
+ _detectedContextSize = null;
14
+ _memoryEstimate = null;
15
+ _supportsVision = false;
16
+ setBaseDir(dir) {
17
+ this._baseDir = dir;
18
+ }
19
+ isAvailable() {
20
+ return getBinaryVersion(this._baseDir) !== null;
21
+ }
22
+ // ─── Chat ───────────────────────────────────────────────────────────────────
23
+ async loadChat(modelPath, opts) {
24
+ if (!this.chatServer)
25
+ this.chatServer = new LlamaServerProcess(this._baseDir);
26
+ if (this.chatServer.running && this.chatServer.modelPath === modelPath)
27
+ return;
28
+ let contextSize = opts?.contextSize;
29
+ const modelInfo = await readGGUFModelInfo(modelPath);
30
+ if (!contextSize && modelInfo) {
31
+ contextSize = calculateOptimalContextSize(modelInfo);
32
+ }
33
+ if (modelInfo && contextSize) {
34
+ const kvBytes = contextSize * kvCacheBytesPerToken(modelInfo);
35
+ this._memoryEstimate = {
36
+ modelBytes: modelInfo.fileSizeBytes,
37
+ kvCacheBytes: kvBytes,
38
+ totalBytes: modelInfo.fileSizeBytes + kvBytes,
39
+ };
40
+ }
41
+ // Auto-detect multimodal projector (mmproj) for vision support
42
+ const modelFileName = path.basename(modelPath);
43
+ const manager = new ModelManager(this._baseDir);
44
+ const mmproj = await manager.findMmprojForModel(modelFileName);
45
+ this._supportsVision = !!mmproj;
46
+ if (mmproj) {
47
+ logger.info(`[LlamaCppEngine] Vision enabled with mmproj: ${path.basename(mmproj)}`);
48
+ }
49
+ this._detectedContextSize = contextSize ?? null;
50
+ const gpu = detectGpu();
51
+ const isGpu = gpu.accel !== 'none';
52
+ const isMetal = gpu.accel === 'metal';
53
+ await this.chatServer.start({
54
+ modelPath,
55
+ contextSize,
56
+ mmproj: mmproj ?? undefined,
57
+ gpuLayers: isGpu ? -1 : 0,
58
+ flashAttn: isGpu,
59
+ ...(isGpu ? { batchSize: 4096, ubatchSize: 1024 } : {}),
60
+ ...(isMetal ? { cacheTypeK: 'q8_0', cacheTypeV: 'q8_0', mlock: true } : {}),
61
+ ...(opts?.reasoningBudget !== undefined ? { reasoningBudget: opts.reasoningBudget } : {}),
62
+ });
63
+ }
64
+ async unloadChat() {
65
+ if (this.chatServer) {
66
+ await this.chatServer.stop();
67
+ }
68
+ }
69
+ async swapChat(modelPath, opts) {
70
+ await this.unloadChat();
71
+ await this.loadChat(modelPath, opts);
72
+ }
73
+ async ensureRunningChat(modelName, opts) {
74
+ if (this.chatServer?.running)
75
+ return;
76
+ logger.info(`[LlamaCppEngine] Auto-starting chat model: ${modelName}`);
77
+ const { filePath } = await this.resolveModelPath(modelName);
78
+ await this.loadChat(filePath, opts);
79
+ }
80
+ getChatStatus() {
81
+ const running = this.chatServer?.running ?? false;
82
+ return {
83
+ running,
84
+ activeModel: running ? (this.chatServer?.modelPath ?? null) : null,
85
+ port: this.chatServer?.port ?? null,
86
+ contextSize: this._detectedContextSize,
87
+ memoryEstimate: this._memoryEstimate,
88
+ supportsVision: this._supportsVision,
89
+ };
90
+ }
91
+ getChatBaseUrl() {
92
+ return this.chatServer?.ready ? this.chatServer.getBaseUrl() : null;
93
+ }
94
+ // ─── Embedding ──────────────────────────────────────────────────────────────
95
+ async loadEmbedding(modelPath) {
96
+ if (!this.embeddingServer)
97
+ this.embeddingServer = new LlamaServerProcess(this._baseDir, true);
98
+ if (this.embeddingServer.running && this.embeddingServer.modelPath === modelPath)
99
+ return;
100
+ await this.embeddingServer.start({ modelPath, embedding: true });
101
+ }
102
+ async unloadEmbedding() {
103
+ if (this.embeddingServer) {
104
+ await this.embeddingServer.stop();
105
+ }
106
+ }
107
+ async ensureRunningEmbedding(modelName) {
108
+ if (this.embeddingServer?.running)
109
+ return;
110
+ logger.info(`[LlamaCppEngine] Auto-starting embedding model: ${modelName}`);
111
+ const { filePath } = await this.resolveModelPath(modelName);
112
+ await this.loadEmbedding(filePath);
113
+ }
114
+ getEmbeddingStatus() {
115
+ const running = this.embeddingServer?.running ?? false;
116
+ return {
117
+ running,
118
+ activeModel: running ? (this.embeddingServer?.modelPath ?? null) : null,
119
+ port: this.embeddingServer?.port ?? null,
120
+ contextSize: null,
121
+ memoryEstimate: null,
122
+ };
123
+ }
124
+ getEmbeddingBaseUrl() {
125
+ return this.embeddingServer?.ready ? this.embeddingServer.getBaseUrl() : null;
126
+ }
127
+ // ─── Combined ───────────────────────────────────────────────────────────────
128
+ getStatus() {
129
+ return {
130
+ engineName: this.engineName,
131
+ available: this.isAvailable(),
132
+ chat: this.getChatStatus(),
133
+ embedding: this.getEmbeddingStatus(),
134
+ };
135
+ }
136
+ killOrphans() {
137
+ killOrphanedServers(this._baseDir);
138
+ }
139
+ // ─── Binary management ─────────────────────────────────────────────────────
140
+ getBinaryVersion() {
141
+ return getBinaryVersion(this._baseDir);
142
+ }
143
+ getBinarySource() {
144
+ const version = getBinaryVersion(this._baseDir);
145
+ if (!version)
146
+ return null;
147
+ return isSystemBinary() ? 'system' : 'managed';
148
+ }
149
+ async checkForUpdate() {
150
+ return checkForUpdate(this._baseDir);
151
+ }
152
+ async updateBinary() {
153
+ return updateBinary(this._baseDir);
154
+ }
155
+ // ─── Private ────────────────────────────────────────────────────────────────
156
+ async resolveModelPath(modelName) {
157
+ const manager = new ModelManager(this._baseDir);
158
+ const result = await manager.findModelFile(modelName);
159
+ if (!result)
160
+ throw new Error(`Local model "${modelName}" not found. Download it first.`);
161
+ return result;
162
+ }
163
+ }
164
+ //# sourceMappingURL=llama-cpp-engine.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"llama-cpp-engine.js","sourceRoot":"","sources":["../../../../lib/local-llm/engines/llama-cpp-engine.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,EAAE,kBAAkB,EAAE,MAAM,4BAA4B,CAAC;AAChE,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAC;AACjE,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACnD,OAAO,EAAE,iBAAiB,EAAE,2BAA2B,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAC;AACzG,OAAO,EAAE,SAAS,EAAE,gBAAgB,EAAE,cAAc,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACjH,OAAO,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AAGzC,MAAM,OAAO,cAAc;IAChB,UAAU,GAAG,WAAW,CAAC;IAE1B,QAAQ,GAAG,EAAE,CAAC;IACd,UAAU,GAA8B,IAAI,CAAC;IAC7C,eAAe,GAA8B,IAAI,CAAC;IAClD,oBAAoB,GAAkB,IAAI,CAAC;IAC3C,eAAe,GAA4E,IAAI,CAAC;IAChG,eAAe,GAAG,KAAK,CAAC;IAEhC,UAAU,CAAC,GAAW;QACpB,IAAI,CAAC,QAAQ,GAAG,GAAG,CAAC;IACtB,CAAC;IAED,WAAW;QACT,OAAO,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,IAAI,CAAC;IAClD,CAAC;IAED,+EAA+E;IAE/E,KAAK,CAAC,QAAQ,CAAC,SAAiB,EAAE,IAAkB;QAClD,IAAI,CAAC,IAAI,CAAC,UAAU;YAAE,IAAI,CAAC,UAAU,GAAG,IAAI,kBAAkB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC9E,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,IAAI,IAAI,CAAC,UAAU,CAAC,SAAS,KAAK,SAAS;YAAE,OAAO;QAE/E,IAAI,WAAW,GAAG,IAAI,EAAE,WAAW,CAAC;QAEpC,MAAM,SAAS,GAAG,MAAM,iBAAiB,CAAC,SAAS,CAAC,CAAC;QACrD,IAAI,CAAC,WAAW,IAAI,SAAS,EAAE,CAAC;YAC9B,WAAW,GAAG,2BAA2B,CAAC,SAAS,CAAC,CAAC;QACvD,CAAC;QAED,IAAI,SAAS,IAAI,WAAW,EAAE,CAAC;YAC7B,MAAM,OAAO,GAAG,WAAW,GAAG,oBAAoB,CAAC,SAAS,CAAC,CAAC;YAC9D,IAAI,CAAC,eAAe,GAAG;gBACrB,UAAU,EAAE,SAAS,CAAC,aAAa;gBACnC,YAAY,EAAE,OAAO;gBACrB,UAAU,EAAE,SAAS,CAAC,aAAa,GAAG,OAAO;aAC9C,CAAC;QACJ,CAAC;QAED,+DAA+D;QAC/D,MAAM,aAAa,GAAG,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;QAC/C,MAAM,OAAO,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAChD,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,kBAAkB,CAAC,aAAa,CAAC,CAAC;QAC/D,IAAI,CAAC,eAAe,GAAG,CAAC,CAAC,MAAM,CAAC;QAEhC,IAAI,MAAM,EAAE,CAAC;YACX,MAAM,CAAC,IAAI,CAAC,gDAAgD,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACvF,CAAC;QAED,IAAI,CAAC,oBAAoB,GAAG,WAAW,IAAI,IAAI,CAAC;QAChD,MAAM,GAAG,GAAG,SAAS,EAAE,CAAC;QACxB,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,KAAK,MAAM,CAAC;QACnC,MAAM,OAAO,GAAG,GAAG,CAAC,KAAK,KAAK,OAAO,CAAC;QACtC,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC;YAC1B,SAAS;YACT,WAAW;YACX,MAAM,EAAE,MAAM,IAAI,SAAS;YAC3B,SAAS,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACzB,SAAS,EAAE,KAAK;YAChB,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACvD,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3E,GAAG,CAAC,IAAI,EAAE,eAAe,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,eAAe,EAAE,IAAI,CAAC,eAAe,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SAC1F,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,UAAU;QACd,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;YACpB,MAAM,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC;QAC/B,CAAC;IACH,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,SAAiB,EAAE,IAAkB;QAClD,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;QACxB,MAAM,IAAI,CAAC,QAAQ,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;IACvC,CAAC;IAED,KAAK,CAAC,iBAAiB,CAAC,SAAiB,EAAE,IAAkB;QAC3D,IAAI,IAAI,CAAC,UAAU,EAAE,OAAO;YAAE,OAAO;QACrC,MAAM,CAAC,IAAI,CAAC,8CAA8C,SAAS,EAAE,CAAC,CAAC;QACvE,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC;QAC5D,MAAM,IAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;IACtC,CAAC;IAED,aAAa;QACX,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,EAAE,OAAO,IAAI,KAAK,CAAC;QAClD,OAAO;YACL,OAAO;YACP,WAAW,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,EAAE,SAAS,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI;YAClE,IAAI,EAAE,IAAI,CAAC,UAAU,EAAE,IAAI,IAAI,IAAI;YACnC,WAAW,EAAE,IAAI,CAAC,oBAAoB;YACtC,cAAc,EAAE,IAAI,CAAC,eAAe;YACpC,cAAc,EAAE,IAAI,CAAC,eAAe;SACrC,CAAC;IACJ,CAAC;IAED,cAAc;QACZ,OAAO,IAAI,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IACtE,CAAC;IAED,+EAA+E;IAE/E,KAAK,CAAC,aAAa,CAAC,SAAiB;QACnC,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,IAAI,CAAC,eAAe,GAAG,IAAI,kBAAkB,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;QAC9F,IAAI,IAAI,CAAC,eAAe,CAAC,OAAO,IAAI,IAAI,CAAC,eAAe,CAAC,SAAS,KAAK,SAAS;YAAE,OAAO;QACzF,MAAM,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC,EAAE,SAAS,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACnE,CAAC;IAED,KAAK,CAAC,eAAe;QACnB,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACzB,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;QACpC,CAAC;IACH,CAAC;IAED,KAAK,CAAC,sBAAsB,CAAC,SAAiB;QAC5C,IAAI,IAAI,CAAC,eAAe,EAAE,OAAO;YAAE,OAAO;QAC1C,MAAM,CAAC,IAAI,CAAC,mDAAmD,SAAS,EAAE,CAAC,CAAC;QAC5E,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC;QAC5D,MAAM,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;IACrC,CAAC;IAED,kBAAkB;QAChB,MAAM,OAAO,GAAG,IAAI,CAAC,eAAe,EAAE,OAAO,IAAI,KAAK,CAAC;QACvD,OAAO;YACL,OAAO;YACP,WAAW,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,eAAe,EAAE,SAAS,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI;YACvE,IAAI,EAAE,IAAI,CAAC,eAAe,EAAE,IAAI,IAAI,IAAI;YACxC,WAAW,EAAE,IAAI;YACjB,cAAc,EAAE,IAAI;SACrB,CAAC;IACJ,CAAC;IAED,mBAAmB;QACjB,OAAO,IAAI,CAAC,eAAe,EAAE,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IAChF,CAAC;IAED,+EAA+E;IAE/E,SAAS;QACP,OAAO;YACL,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,SAAS,EAAE,IAAI,CAAC,WAAW,EAAE;YAC7B,IAAI,EAAE,IAAI,CAAC,aAAa,EAAE;YAC1B,SAAS,EAAE,IAAI,CAAC,kBAAkB,EAAE;SACrC,CAAC;IACJ,CAAC;IAED,WAAW;QACT,mBAAmB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACrC,CAAC;IAED,8EAA8E;IAE9E,gBAAgB;QACd,OAAO,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACzC,CAAC;IAED,eAAe;QACb,MAAM,OAAO,GAAG,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAChD,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAC1B,OAAO,cAAc,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC;IACjD,CAAC;IAED,KAAK,CAAC,cAAc;QAClB,OAAO,cAAc,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACvC,CAAC;IAED,KAAK,CAAC,YAAY;QAChB,OAAO,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACrC,CAAC;IAED,+EAA+E;IAEvE,KAAK,CAAC,gBAAgB,CAAC,SAAiB;QAC9C,MAAM,OAAO,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAChD,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;QACtD,IAAI,CAAC,MAAM;YAAE,MAAM,IAAI,KAAK,CAAC,gBAAgB,SAAS,iCAAiC,CAAC,CAAC;QACzF,OAAO,MAAM,CAAC;IAChB,CAAC;CACF"}
@@ -0,0 +1,31 @@
1
+ import type { LocalEngine, EngineChatStatus, EngineServerStatus, EngineStatus, LoadOptions } from '../engine-interface.ts';
2
+ export declare class MlxServeEngine implements LocalEngine {
3
+ readonly engineName = "mlx-serve";
4
+ private _baseDir;
5
+ private chatServer;
6
+ private embeddingServer;
7
+ private _detectedContextSize;
8
+ private _memoryEstimate;
9
+ private _supportsVision;
10
+ setBaseDir(dir: string): void;
11
+ isAvailable(): boolean;
12
+ loadChat(modelPath: string, opts?: LoadOptions): Promise<void>;
13
+ unloadChat(): Promise<void>;
14
+ swapChat(modelPath: string, opts?: LoadOptions): Promise<void>;
15
+ ensureRunningChat(modelName: string, opts?: LoadOptions): Promise<void>;
16
+ getChatStatus(): EngineChatStatus;
17
+ getChatBaseUrl(): string | null;
18
+ loadEmbedding(modelPath: string): Promise<void>;
19
+ unloadEmbedding(): Promise<void>;
20
+ ensureRunningEmbedding(modelName: string): Promise<void>;
21
+ getEmbeddingStatus(): EngineServerStatus;
22
+ getEmbeddingBaseUrl(): string | null;
23
+ getStatus(): EngineStatus;
24
+ killOrphans(): void;
25
+ getBinaryVersion(): string | null;
26
+ getBinarySource(): 'managed' | 'system' | null;
27
+ checkForUpdate(): Promise<any>;
28
+ updateBinary(): Promise<void>;
29
+ private resolveModelPath;
30
+ }
31
+ //# sourceMappingURL=mlx-serve-engine.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"mlx-serve-engine.d.ts","sourceRoot":"","sources":["../../../../lib/local-llm/engines/mlx-serve-engine.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,WAAW,EAAE,gBAAgB,EAAE,kBAAkB,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAE3H,qBAAa,cAAe,YAAW,WAAW;IAChD,QAAQ,CAAC,UAAU,eAAe;IAElC,OAAO,CAAC,QAAQ,CAAM;IACtB,OAAO,CAAC,UAAU,CAAiC;IACnD,OAAO,CAAC,eAAe,CAAiC;IACxD,OAAO,CAAC,oBAAoB,CAAuB;IACnD,OAAO,CAAC,eAAe,CAAiF;IACxG,OAAO,CAAC,eAAe,CAAS;IAEhC,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;IAI7B,WAAW,IAAI,OAAO;IAMhB,QAAQ,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC;IAwC9D,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAM3B,QAAQ,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC;IAK9D,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC;IAO7E,aAAa,IAAI,gBAAgB;IAYjC,cAAc,IAAI,MAAM,GAAG,IAAI;IAMzB,aAAa,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAQ/C,eAAe,IAAI,OAAO,CAAC,IAAI,CAAC;IAMhC,sBAAsB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAO9D,kBAAkB,IAAI,kBAAkB;IAWxC,mBAAmB,IAAI,MAAM,GAAG,IAAI;IAMpC,SAAS,IAAI,YAAY;IASzB,WAAW,IAAI,IAAI;IAMnB,gBAAgB,IAAI,MAAM,GAAG,IAAI;IAIjC,eAAe,IAAI,SAAS,GAAG,QAAQ,GAAG,IAAI;IAMxC,cAAc,IAAI,OAAO,CAAC,GAAG,CAAC;IAI9B,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC;YAMrB,gBAAgB;CAM/B"}
@@ -0,0 +1,161 @@
1
+ import { MlxServerProcess } from "../mlx-server-process.js";
2
+ import { killOrphanedMlxServers } from "../mlx-server-process.js";
3
+ import { ModelManager } from "../model-manager.js";
4
+ import { getMlxBinaryVersion, isMlxSystemBinary, updateMlxBinary, checkForMlxUpdate } from "../mlx-binary-manager.js";
5
+ import { logger } from "../../logger.js";
6
+ export class MlxServeEngine {
7
+ engineName = 'mlx-serve';
8
+ _baseDir = '';
9
+ chatServer = null;
10
+ embeddingServer = null;
11
+ _detectedContextSize = null;
12
+ _memoryEstimate = null;
13
+ _supportsVision = false;
14
+ setBaseDir(dir) {
15
+ this._baseDir = dir;
16
+ }
17
+ isAvailable() {
18
+ return getMlxBinaryVersion(this._baseDir) !== null && process.platform === 'darwin' && process.arch === 'arm64';
19
+ }
20
+ // ─── Chat ───────────────────────────────────────────────────────────────────
21
+ async loadChat(modelPath, opts) {
22
+ if (this.chatServer?.running && this.chatServer.modelPath === modelPath)
23
+ return;
24
+ if (!this.chatServer)
25
+ this.chatServer = new MlxServerProcess(this._baseDir);
26
+ this._memoryEstimate = null;
27
+ this._supportsVision = false;
28
+ this._detectedContextSize = opts?.contextSize ?? null;
29
+ await this.chatServer.start({ modelPath, contextSize: opts?.contextSize, reasoningBudget: opts?.reasoningBudget });
30
+ // Fetch /props from mlx-serve to get memory usage and context size
31
+ try {
32
+ const baseUrl = this.chatServer.getBaseUrl();
33
+ const res = await fetch(`${baseUrl}/props`);
34
+ if (res.ok) {
35
+ const props = await res.json();
36
+ const nCtx = props.default_generation_settings?.n_ctx;
37
+ if (nCtx && !this._detectedContextSize) {
38
+ this._detectedContextSize = nCtx;
39
+ }
40
+ const ctxSize = this._detectedContextSize ?? nCtx ?? 0;
41
+ const info = props.model_info;
42
+ // KV cache: layers × 2(K+V) × kv_heads × head_dim × 2(float16) × ctx
43
+ const kvCacheBytes = info
44
+ ? info.num_hidden_layers * 2 * info.num_key_value_heads * info.head_dim * 2 * ctxSize
45
+ : 0;
46
+ const modelBytes = props.memory?.active_bytes ?? 0;
47
+ if (modelBytes || kvCacheBytes) {
48
+ this._memoryEstimate = {
49
+ modelBytes,
50
+ kvCacheBytes,
51
+ totalBytes: modelBytes + kvCacheBytes,
52
+ };
53
+ }
54
+ }
55
+ }
56
+ catch (err) {
57
+ logger.warn('[MlxServeEngine] Failed to fetch /props:', err);
58
+ }
59
+ }
60
+ async unloadChat() {
61
+ if (this.chatServer) {
62
+ await this.chatServer.stop();
63
+ }
64
+ }
65
+ async swapChat(modelPath, opts) {
66
+ await this.unloadChat();
67
+ await this.loadChat(modelPath, opts);
68
+ }
69
+ async ensureRunningChat(modelName, opts) {
70
+ if (this.chatServer?.running)
71
+ return;
72
+ logger.info(`[MlxServeEngine] Auto-starting chat model: ${modelName}`);
73
+ const { filePath } = await this.resolveModelPath(modelName);
74
+ await this.loadChat(filePath, opts);
75
+ }
76
+ getChatStatus() {
77
+ const running = this.chatServer?.running ?? false;
78
+ return {
79
+ running,
80
+ activeModel: running ? (this.chatServer?.modelPath ?? null) : null,
81
+ port: this.chatServer?.port ?? null,
82
+ contextSize: this._detectedContextSize,
83
+ memoryEstimate: this._memoryEstimate,
84
+ supportsVision: this._supportsVision,
85
+ };
86
+ }
87
+ getChatBaseUrl() {
88
+ return this.chatServer?.ready ? this.chatServer.getBaseUrl() : null;
89
+ }
90
+ // ─── Embedding ──────────────────────────────────────────────────────────────
91
+ async loadEmbedding(modelPath) {
92
+ if (this.embeddingServer?.running && this.embeddingServer.modelPath === modelPath)
93
+ return;
94
+ if (!this.embeddingServer)
95
+ this.embeddingServer = new MlxServerProcess(this._baseDir, 'embedding');
96
+ await this.embeddingServer.start({ modelPath });
97
+ logger.info(`[MlxServeEngine] Embedding server ready on port ${this.embeddingServer.port}`);
98
+ }
99
+ async unloadEmbedding() {
100
+ if (this.embeddingServer) {
101
+ await this.embeddingServer.stop();
102
+ }
103
+ }
104
+ async ensureRunningEmbedding(modelName) {
105
+ if (this.embeddingServer?.running)
106
+ return;
107
+ logger.info(`[MlxServeEngine] Auto-starting embedding model: ${modelName}`);
108
+ const { filePath } = await this.resolveModelPath(modelName);
109
+ await this.loadEmbedding(filePath);
110
+ }
111
+ getEmbeddingStatus() {
112
+ const running = this.embeddingServer?.running ?? false;
113
+ return {
114
+ running,
115
+ activeModel: running ? (this.embeddingServer?.modelPath ?? null) : null,
116
+ port: this.embeddingServer?.port ?? null,
117
+ contextSize: null,
118
+ memoryEstimate: null,
119
+ };
120
+ }
121
+ getEmbeddingBaseUrl() {
122
+ return this.embeddingServer?.ready ? this.embeddingServer.getBaseUrl() : null;
123
+ }
124
+ // ─── Combined ───────────────────────────────────────────────────────────────
125
+ getStatus() {
126
+ return {
127
+ engineName: this.engineName,
128
+ available: this.isAvailable(),
129
+ chat: this.getChatStatus(),
130
+ embedding: this.getEmbeddingStatus(),
131
+ };
132
+ }
133
+ killOrphans() {
134
+ killOrphanedMlxServers(this._baseDir);
135
+ }
136
+ // ─── Binary management ─────────────────────────────────────────────────────
137
+ getBinaryVersion() {
138
+ return getMlxBinaryVersion(this._baseDir);
139
+ }
140
+ getBinarySource() {
141
+ const version = getMlxBinaryVersion(this._baseDir);
142
+ if (!version)
143
+ return null;
144
+ return isMlxSystemBinary() ? 'system' : 'managed';
145
+ }
146
+ async checkForUpdate() {
147
+ return checkForMlxUpdate(this._baseDir);
148
+ }
149
+ async updateBinary() {
150
+ return updateMlxBinary(this._baseDir);
151
+ }
152
+ // ─── Private ────────────────────────────────────────────────────────────────
153
+ async resolveModelPath(modelName) {
154
+ const manager = new ModelManager(this._baseDir);
155
+ const result = await manager.findModelFile(modelName);
156
+ if (!result)
157
+ throw new Error(`Local model "${modelName}" not found. Download it first.`);
158
+ return result;
159
+ }
160
+ }
161
+ //# sourceMappingURL=mlx-serve-engine.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"mlx-serve-engine.js","sourceRoot":"","sources":["../../../../lib/local-llm/engines/mlx-serve-engine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AAC5D,OAAO,EAAE,sBAAsB,EAAE,MAAM,0BAA0B,CAAC;AAClE,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACnD,OAAO,EAAE,mBAAmB,EAAE,iBAAiB,EAAE,eAAe,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AACtH,OAAO,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AAGzC,MAAM,OAAO,cAAc;IAChB,UAAU,GAAG,WAAW,CAAC;IAE1B,QAAQ,GAAG,EAAE,CAAC;IACd,UAAU,GAA4B,IAAI,CAAC;IAC3C,eAAe,GAA4B,IAAI,CAAC;IAChD,oBAAoB,GAAkB,IAAI,CAAC;IAC3C,eAAe,GAA4E,IAAI,CAAC;IAChG,eAAe,GAAG,KAAK,CAAC;IAEhC,UAAU,CAAC,GAAW;QACpB,IAAI,CAAC,QAAQ,GAAG,GAAG,CAAC;IACtB,CAAC;IAED,WAAW;QACT,OAAO,mBAAmB,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,IAAI,IAAI,OAAO,CAAC,QAAQ,KAAK,QAAQ,IAAI,OAAO,CAAC,IAAI,KAAK,OAAO,CAAC;IAClH,CAAC;IAED,+EAA+E;IAE/E,KAAK,CAAC,QAAQ,CAAC,SAAiB,EAAE,IAAkB;QAClD,IAAI,IAAI,CAAC,UAAU,EAAE,OAAO,IAAI,IAAI,CAAC,UAAU,CAAC,SAAS,KAAK,SAAS;YAAE,OAAO;QAChF,IAAI,CAAC,IAAI,CAAC,UAAU;YAAE,IAAI,CAAC,UAAU,GAAG,IAAI,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAE5E,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;QAC5B,IAAI,CAAC,eAAe,GAAG,KAAK,CAAC;QAC7B,IAAI,CAAC,oBAAoB,GAAG,IAAI,EAAE,WAAW,IAAI,IAAI,CAAC;QAEtD,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,SAAS,EAAE,WAAW,EAAE,IAAI,EAAE,WAAW,EAAE,eAAe,EAAE,IAAI,EAAE,eAAe,EAAE,CAAC,CAAC;QAEnH,mEAAmE;QACnE,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,UAAU,EAAE,CAAC;YAC7C,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,OAAO,QAAQ,CAAC,CAAC;YAC5C,IAAI,GAAG,CAAC,EAAE,EAAE,CAAC;gBACX,MAAM,KAAK,GAAG,MAAM,GAAG,CAAC,IAAI,EAAS,CAAC;gBACtC,MAAM,IAAI,GAAG,KAAK,CAAC,2BAA2B,EAAE,KAAK,CAAC;gBACtD,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,oBAAoB,EAAE,CAAC;oBACvC,IAAI,CAAC,oBAAoB,GAAG,IAAI,CAAC;gBACnC,CAAC;gBACD,MAAM,OAAO,GAAG,IAAI,CAAC,oBAAoB,IAAI,IAAI,IAAI,CAAC,CAAC;gBACvD,MAAM,IAAI,GAAG,KAAK,CAAC,UAAU,CAAC;gBAC9B,qEAAqE;gBACrE,MAAM,YAAY,GAAG,IAAI;oBACvB,CAAC,CAAC,IAAI,CAAC,iBAAiB,GAAG,CAAC,GAAG,IAAI,CAAC,mBAAmB,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,GAAG,OAAO;oBACrF,CAAC,CAAC,CAAC,CAAC;gBACN,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM,EAAE,YAAY,IAAI,CAAC,CAAC;gBACnD,IAAI,UAAU,IAAI,YAAY,EAAE,CAAC;oBAC/B,IAAI,CAAC,eAAe,GAAG;wBACrB,UAAU;wBACV,YAAY;wBACZ,UAAU,EAAE,UAAU,GAAG,YAAY;qBACtC,CAAC;gBACJ,CAAC;YACH,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,CAAC,IAAI,CAAC,0CAA0C,EAAE,GAAG,CAAC,CAAC;QAC/D,CAAC;IACH,CAAC;IAED,KAAK,CAAC,UAAU;QACd,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;YACpB,MAAM,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC;QAC/B,CAAC;IACH,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,SAAiB,EAAE,IAAkB;QAClD,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;QACxB,MAAM,IAAI,CAAC,QAAQ,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;IACvC,CAAC;IAED,KAAK,CAAC,iBAAiB,CAAC,SAAiB,EAAE,IAAkB;QAC3D,IAAI,IAAI,CAAC,UAAU,EAAE,OAAO;YAAE,OAAO;QACrC,MAAM,CAAC,IAAI,CAAC,8CAA8C,SAAS,EAAE,CAAC,CAAC;QACvE,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC;QAC5D,MAAM,IAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;IACtC,CAAC;IAED,aAAa;QACX,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,EAAE,OAAO,IAAI,KAAK,CAAC;QAClD,OAAO;YACL,OAAO;YACP,WAAW,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,EAAE,SAAS,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI;YAClE,IAAI,EAAE,IAAI,CAAC,UAAU,EAAE,IAAI,IAAI,IAAI;YACnC,WAAW,EAAE,IAAI,CAAC,oBAAoB;YACtC,cAAc,EAAE,IAAI,CAAC,eAAe;YACpC,cAAc,EAAE,IAAI,CAAC,eAAe;SACrC,CAAC;IACJ,CAAC;IAED,cAAc;QACZ,OAAO,IAAI,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IACtE,CAAC;IAED,+EAA+E;IAE/E,KAAK,CAAC,aAAa,CAAC,SAAiB;QACnC,IAAI,IAAI,CAAC,eAAe,EAAE,OAAO,IAAI,IAAI,CAAC,eAAe,CAAC,SAAS,KAAK,SAAS;YAAE,OAAO;QAC1F,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,IAAI,CAAC,eAAe,GAAG,IAAI,gBAAgB,CAAC,IAAI,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC;QAEnG,MAAM,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC,EAAE,SAAS,EAAE,CAAC,CAAC;QAChD,MAAM,CAAC,IAAI,CAAC,mDAAmD,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC,CAAC;IAC9F,CAAC;IAED,KAAK,CAAC,eAAe;QACnB,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACzB,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;QACpC,CAAC;IACH,CAAC;IAED,KAAK,CAAC,sBAAsB,CAAC,SAAiB;QAC5C,IAAI,IAAI,CAAC,eAAe,EAAE,OAAO;YAAE,OAAO;QAC1C,MAAM,CAAC,IAAI,CAAC,mDAAmD,SAAS,EAAE,CAAC,CAAC;QAC5E,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC;QAC5D,MAAM,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;IACrC,CAAC;IAED,kBAAkB;QAChB,MAAM,OAAO,GAAG,IAAI,CAAC,eAAe,EAAE,OAAO,IAAI,KAAK,CAAC;QACvD,OAAO;YACL,OAAO;YACP,WAAW,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,eAAe,EAAE,SAAS,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI;YACvE,IAAI,EAAE,IAAI,CAAC,eAAe,EAAE,IAAI,IAAI,IAAI;YACxC,WAAW,EAAE,IAAI;YACjB,cAAc,EAAE,IAAI;SACrB,CAAC;IACJ,CAAC;IAED,mBAAmB;QACjB,OAAO,IAAI,CAAC,eAAe,EAAE,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IAChF,CAAC;IAED,+EAA+E;IAE/E,SAAS;QACP,OAAO;YACL,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,SAAS,EAAE,IAAI,CAAC,WAAW,EAAE;YAC7B,IAAI,EAAE,IAAI,CAAC,aAAa,EAAE;YAC1B,SAAS,EAAE,IAAI,CAAC,kBAAkB,EAAE;SACrC,CAAC;IACJ,CAAC;IAED,WAAW;QACT,sBAAsB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACxC,CAAC;IAED,8EAA8E;IAE9E,gBAAgB;QACd,OAAO,mBAAmB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC5C,CAAC;IAED,eAAe;QACb,MAAM,OAAO,GAAG,mBAAmB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACnD,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAC1B,OAAO,iBAAiB,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC;IACpD,CAAC;IAED,KAAK,CAAC,cAAc;QAClB,OAAO,iBAAiB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC1C,CAAC;IAED,KAAK,CAAC,YAAY;QAChB,OAAO,eAAe,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACxC,CAAC;IAED,+EAA+E;IAEvE,KAAK,CAAC,gBAAgB,CAAC,SAAiB;QAC9C,MAAM,OAAO,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAChD,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;QACtD,IAAI,CAAC,MAAM;YAAE,MAAM,IAAI,KAAK,CAAC,gBAAgB,SAAS,iCAAiC,CAAC,CAAC;QACzF,OAAO,MAAM,CAAC;IAChB,CAAC;CACF"}
@@ -0,0 +1,20 @@
1
+ export interface GGUFModelInfo {
2
+ contextLength: number;
3
+ blockCount: number;
4
+ embeddingLength: number;
5
+ headCount: number;
6
+ headCountKv: number;
7
+ fileSizeBytes: number;
8
+ }
9
+ /**
10
+ * Reads model architecture info from a GGUF file's metadata header.
11
+ * Only reads the first 1MB — no model loading required.
12
+ */
13
+ export declare function readGGUFModelInfo(modelPath: string): Promise<GGUFModelInfo | null>;
14
+ /**
15
+ * Estimates KV cache bytes per token for a model.
16
+ * KV cache = 2 (K+V) * n_layers * n_kv_heads * head_dim * 2 bytes (f16)
17
+ */
18
+ export declare function kvCacheBytesPerToken(info: GGUFModelInfo): number;
19
+ export declare function calculateOptimalContextSize(info: GGUFModelInfo): number;
20
+ //# sourceMappingURL=gguf-reader.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"gguf-reader.d.ts","sourceRoot":"","sources":["../../../lib/local-llm/gguf-reader.ts"],"names":[],"mappings":"AASA,MAAM,WAAW,aAAa;IAC5B,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe,EAAE,MAAM,CAAC;IACxB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;CACvB;AAED;;;GAGG;AACH,wBAAsB,iBAAiB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,GAAG,IAAI,CAAC,CAkExF;AAED;;;GAGG;AACH,wBAAgB,oBAAoB,CAAC,IAAI,EAAE,aAAa,GAAG,MAAM,CAGhE;AA2BD,wBAAgB,2BAA2B,CAAC,IAAI,EAAE,aAAa,GAAG,MAAM,CAuBvE"}
@@ -0,0 +1,190 @@
1
+ import * as fs from 'fs/promises';
2
+ import { readFileSync } from 'fs';
3
+ import * as os from 'os';
4
+ import { logger } from "../logger.js";
5
+ const GGUF_MAGIC = 0x46554747; // "GGUF" in little-endian
6
+ const METADATA_BUFFER_SIZE = 1024 * 1024; // 1MB covers metadata for all models
7
+ const OS_RESERVED_BYTES = 4 * 1024 * 1024 * 1024; // Reserve 4GB for OS + apps
8
+ /**
9
+ * Reads model architecture info from a GGUF file's metadata header.
10
+ * Only reads the first 1MB — no model loading required.
11
+ */
12
+ export async function readGGUFModelInfo(modelPath) {
13
+ let handle = null;
14
+ try {
15
+ handle = await fs.open(modelPath, 'r');
16
+ const stat = await handle.stat();
17
+ const buf = Buffer.alloc(METADATA_BUFFER_SIZE);
18
+ const { bytesRead } = await handle.read(buf, 0, buf.length, 0);
19
+ if (bytesRead < 24)
20
+ return null;
21
+ const magic = buf.readUInt32LE(0);
22
+ if (magic !== GGUF_MAGIC)
23
+ return null;
24
+ const version = buf.readUInt32LE(4);
25
+ if (version < 2 || version > 3)
26
+ return null;
27
+ const kvCount = Number(buf.readBigUInt64LE(16));
28
+ let pos = 24;
29
+ const info = { fileSizeBytes: stat.size };
30
+ const needed = new Set(['context_length', 'block_count', 'embedding_length', 'attention.head_count', 'attention.head_count_kv']);
31
+ for (let i = 0; i < kvCount && pos < bytesRead - 12 && needed.size > 0; i++) {
32
+ if (pos + 8 > bytesRead)
33
+ break;
34
+ const keyLen = Number(buf.readBigUInt64LE(pos));
35
+ pos += 8;
36
+ if (pos + keyLen > bytesRead)
37
+ break;
38
+ const key = buf.toString('utf-8', pos, pos + keyLen);
39
+ pos += keyLen;
40
+ if (pos + 4 > bytesRead)
41
+ break;
42
+ const vtype = buf.readUInt32LE(pos);
43
+ pos += 4;
44
+ if (key.endsWith('.context_length')) {
45
+ info.contextLength = readScalar(buf, pos, vtype) ?? 0;
46
+ needed.delete('context_length');
47
+ }
48
+ else if (key.endsWith('.block_count')) {
49
+ info.blockCount = readScalar(buf, pos, vtype) ?? 0;
50
+ needed.delete('block_count');
51
+ }
52
+ else if (key.endsWith('.embedding_length')) {
53
+ info.embeddingLength = readScalar(buf, pos, vtype) ?? 0;
54
+ needed.delete('embedding_length');
55
+ }
56
+ else if (key.endsWith('.attention.head_count_kv')) {
57
+ info.headCountKv = readScalar(buf, pos, vtype) ?? 0;
58
+ needed.delete('attention.head_count_kv');
59
+ }
60
+ else if (key.endsWith('.attention.head_count')) {
61
+ info.headCount = readScalar(buf, pos, vtype) ?? 0;
62
+ needed.delete('attention.head_count');
63
+ }
64
+ pos = skipValue(buf, pos, vtype, bytesRead);
65
+ if (pos < 0)
66
+ break;
67
+ }
68
+ if (!info.contextLength)
69
+ return null;
70
+ const result = info;
71
+ logger.info(`[GGUFReader] ${modelPath.split('/').pop()}: ctx=${result.contextLength} layers=${result.blockCount} embd=${result.embeddingLength} heads=${result.headCount} kv_heads=${result.headCountKv} size=${(result.fileSizeBytes / 1024 / 1024 / 1024).toFixed(1)}GB`);
72
+ return result;
73
+ }
74
+ catch (err) {
75
+ logger.warn(`[GGUFReader] Failed to read GGUF metadata: ${err}`);
76
+ return null;
77
+ }
78
+ finally {
79
+ await handle?.close();
80
+ }
81
+ }
82
+ /**
83
+ * Estimates KV cache bytes per token for a model.
84
+ * KV cache = 2 (K+V) * n_layers * n_kv_heads * head_dim * 2 bytes (f16)
85
+ */
86
+ export function kvCacheBytesPerToken(info) {
87
+ const headDim = info.embeddingLength / info.headCount;
88
+ return 2 * info.blockCount * info.headCountKv * headDim * 2;
89
+ }
90
+ /**
91
+ * Calculates optimal context size based on available system RAM.
92
+ * Accounts for model weights, KV cache, and OS overhead.
93
+ */
94
+ /**
95
+ * Returns effective total memory, respecting container cgroup limits.
96
+ * os.totalmem() returns host RAM even inside Docker, so we check cgroup first.
97
+ */
98
+ function getEffectiveMemory() {
99
+ const hostRam = os.totalmem();
100
+ if (process.platform !== 'linux')
101
+ return hostRam;
102
+ try {
103
+ // cgroup v2
104
+ const raw = readFileSync('/sys/fs/cgroup/memory.max', 'utf-8').trim();
105
+ if (raw !== 'max')
106
+ return Math.min(Number(raw), hostRam);
107
+ }
108
+ catch { /* not cgroup v2 */ }
109
+ try {
110
+ // cgroup v1
111
+ const raw = readFileSync('/sys/fs/cgroup/memory/memory.limit_in_bytes', 'utf-8').trim();
112
+ const limit = Number(raw);
113
+ if (limit > 0 && limit < hostRam)
114
+ return limit;
115
+ }
116
+ catch { /* not cgroup v1 */ }
117
+ return hostRam;
118
+ }
119
+ export function calculateOptimalContextSize(info) {
120
+ const totalRam = getEffectiveMemory();
121
+ const availableForModel = totalRam - OS_RESERVED_BYTES;
122
+ const memAfterWeights = availableForModel - info.fileSizeBytes;
123
+ if (memAfterWeights <= 0) {
124
+ logger.warn(`[GGUFReader] Model file (${(info.fileSizeBytes / 1024 / 1024 / 1024).toFixed(1)}GB) exceeds available memory, using minimum context`);
125
+ return 2048;
126
+ }
127
+ const bytesPerToken = kvCacheBytesPerToken(info);
128
+ // Use at most 50% of available-after-weights for KV cache — leave headroom for runtime allocations
129
+ const maxCtxByRam = Math.floor((memAfterWeights * 0.5) / bytesPerToken);
130
+ // Hard cap: 32K tokens is practical for local models, avoids memory pressure on small machines
131
+ const MAX_CONTEXT_CAP = 32768;
132
+ const nativeCtx = info.contextLength;
133
+ const optimal = Math.min(maxCtxByRam, nativeCtx, MAX_CONTEXT_CAP);
134
+ // Floor to nearest 1024 for cleanliness, minimum 2048
135
+ const result = Math.max(2048, Math.floor(optimal / 1024) * 1024);
136
+ logger.info(`[GGUFReader] RAM: ${(totalRam / 1024 / 1024 / 1024).toFixed(0)}GB total, ${(memAfterWeights / 1024 / 1024 / 1024).toFixed(1)}GB available for KV | KV/token: ${bytesPerToken} bytes | max by RAM: ${maxCtxByRam} | native: ${nativeCtx} | cap: ${MAX_CONTEXT_CAP} | optimal: ${result}`);
137
+ return result;
138
+ }
139
+ function readScalar(buf, pos, vtype) {
140
+ switch (vtype) {
141
+ case 0: return buf.readUInt8(pos);
142
+ case 1: return buf.readInt8(pos);
143
+ case 2: return buf.readUInt16LE(pos);
144
+ case 3: return buf.readInt16LE(pos);
145
+ case 4: return buf.readUInt32LE(pos);
146
+ case 5: return buf.readInt32LE(pos);
147
+ case 6: return buf.readFloatLE(pos);
148
+ case 7: return buf.readUInt8(pos);
149
+ case 10: return Number(buf.readBigUInt64LE(pos));
150
+ case 11: return Number(buf.readBigInt64LE(pos));
151
+ case 12: return buf.readDoubleLE(pos);
152
+ default: return null;
153
+ }
154
+ }
155
+ function skipValue(buf, pos, vtype, limit) {
156
+ switch (vtype) {
157
+ case 0:
158
+ case 1:
159
+ case 7: return pos + 1;
160
+ case 2:
161
+ case 3: return pos + 2;
162
+ case 4:
163
+ case 5:
164
+ case 6: return pos + 4;
165
+ case 10:
166
+ case 11:
167
+ case 12: return pos + 8;
168
+ case 8: {
169
+ if (pos + 8 > limit)
170
+ return -1;
171
+ const len = Number(buf.readBigUInt64LE(pos));
172
+ return pos + 8 + len;
173
+ }
174
+ case 9: {
175
+ if (pos + 12 > limit)
176
+ return -1;
177
+ const elemType = buf.readUInt32LE(pos);
178
+ const count = Number(buf.readBigUInt64LE(pos + 4));
179
+ pos += 12;
180
+ for (let i = 0; i < count && pos < limit; i++) {
181
+ pos = skipValue(buf, pos, elemType, limit);
182
+ if (pos < 0)
183
+ return -1;
184
+ }
185
+ return pos;
186
+ }
187
+ default: return -1;
188
+ }
189
+ }
190
+ //# sourceMappingURL=gguf-reader.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"gguf-reader.js","sourceRoot":"","sources":["../../../lib/local-llm/gguf-reader.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,aAAa,CAAC;AAClC,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAClC,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAEtC,MAAM,UAAU,GAAG,UAAU,CAAC,CAAC,0BAA0B;AACzD,MAAM,oBAAoB,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,qCAAqC;AAC/E,MAAM,iBAAiB,GAAG,CAAC,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,4BAA4B;AAW9E;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,SAAiB;IACvD,IAAI,MAAM,GAAyB,IAAI,CAAC;IACxC,IAAI,CAAC;QACH,MAAM,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;QACvC,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;QACjC,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,oBAAoB,CAAC,CAAC;QAC/C,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAC/D,IAAI,SAAS,GAAG,EAAE;YAAE,OAAO,IAAI,CAAC;QAEhC,MAAM,KAAK,GAAG,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAClC,IAAI,KAAK,KAAK,UAAU;YAAE,OAAO,IAAI,CAAC;QAEtC,MAAM,OAAO,GAAG,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACpC,IAAI,OAAO,GAAG,CAAC,IAAI,OAAO,GAAG,CAAC;YAAE,OAAO,IAAI,CAAC;QAE5C,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,eAAe,CAAC,EAAE,CAAC,CAAC,CAAC;QAChD,IAAI,GAAG,GAAG,EAAE,CAAC;QAEb,MAAM,IAAI,GAA2B,EAAE,aAAa,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC;QAClE,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,CAAC,gBAAgB,EAAE,aAAa,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,yBAAyB,CAAC,CAAC,CAAC;QAEjI,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,IAAI,GAAG,GAAG,SAAS,GAAG,EAAE,IAAI,MAAM,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5E,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS;gBAAE,MAAM;YAC/B,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC;YAChD,GAAG,IAAI,CAAC,CAAC;YAET,IAAI,GAAG,GAAG,MAAM,GAAG,SAAS;gBAAE,MAAM;YACpC,MAAM,GAAG,GAAG,GAAG,CAAC,QAAQ,CAAC,OAAO,EAAE,GAAG,EAAE,GAAG,GAAG,MAAM,CAAC,CAAC;YACrD,GAAG,IAAI,MAAM,CAAC;YAEd,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS;gBAAE,MAAM;YAC/B,MAAM,KAAK,GAAG,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;YACpC,GAAG,IAAI,CAAC,CAAC;YAET,IAAI,GAAG,CAAC,QAAQ,CAAC,iBAAiB,CAAC,EAAE,CAAC;gBACpC,IAAI,CAAC,aAAa,GAAG,UAAU,CAAC,GAAG,EAAE,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;gBACtD,MAAM,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC;YAClC,CAAC;iBAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,cAAc,CAAC,EAAE,CAAC;gBACxC,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC,GAAG,EAAE,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;gBACnD,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;YAC/B,CAAC;iBAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,mBAAmB,CAAC,EAAE,CAAC;gBAC7C,IAAI,CAAC,eAAe,GAAG,UAAU,CAAC,GAAG,EAAE,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;gBACxD,MAAM,CAAC,MAAM,CAAC,kBAAkB,CAAC,CAAC;YACpC,CAAC;iBAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,0BAA0B,CAAC,EAAE,CAAC;gBACpD,IAAI,CAAC,WAAW,GAAG,UAAU,CAAC,GAAG,EAAE,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;gBACpD,MAAM,CAAC,MAAM,CAAC,yBAAyB,CAAC,CAAC;YAC3C,CAAC;iBAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,uBAAuB,CAAC,EAAE,CAAC;gBACjD,IAAI,CAAC,SAAS,GAAG,UAAU,CAAC,GAAG,EAAE,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;gBAClD,MAAM,CAAC,MAAM,CAAC,sBAAsB,CAAC,CAAC;YACxC,CAAC;YAED,GAAG,GAAG,SAAS,CAAC,GAAG,EAAE,GAAG,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;YAC5C,IAAI,GAAG,GAAG,CAAC;gBAAE,MAAM;QACrB,CAAC;QAED,IAAI,CAAC,IAAI,CAAC,aAAa;YAAE,OAAO,IAAI,CAAC;QAErC,MAAM,MAAM,GAAG,IAAqB,CAAC;QACrC,MAAM,CAAC,IAAI,CAAC,gBAAgB,SAAS,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,SAAS,MAAM,CAAC,aAAa,WAAW,MAAM,CAAC,UAAU,SAAS,MAAM,CAAC,eAAe,UAAU,MAAM,CAAC,SAAS,aAAa,MAAM,CAAC,WAAW,SAAS,CAAC,MAAM,CAAC,aAAa,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC5Q,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,CAAC,IAAI,CAAC,8CAA8C,GAAG,EAAE,CAAC,CAAC;QACjE,OAAO,IAAI,CAAC;IACd,CAAC;YAAS,CAAC;QACT,MAAM,MAAM,EAAE,KAAK,EAAE,CAAC;IACxB,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,oBAAoB,CAAC,IAAmB;IACtD,MAAM,OAAO,GAAG,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,SAAS,CAAC;IACtD,OAAO,CAAC,GAAG,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,WAAW,GAAG,OAAO,GAAG,CAAC,CAAC;AAC9D,CAAC;AAED;;;GAGG;AACH;;;GAGG;AACH,SAAS,kBAAkB;IACzB,MAAM,OAAO,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC;IAC9B,IAAI,OAAO,CAAC,QAAQ,KAAK,OAAO;QAAE,OAAO,OAAO,CAAC;IACjD,IAAI,CAAC;QACH,YAAY;QACZ,MAAM,GAAG,GAAG,YAAY,CAAC,2BAA2B,EAAE,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;QACtE,IAAI,GAAG,KAAK,KAAK;YAAE,OAAO,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,OAAO,CAAC,CAAC;IAC3D,CAAC;IAAC,MAAM,CAAC,CAAC,mBAAmB,CAAC,CAAC;IAC/B,IAAI,CAAC;QACH,YAAY;QACZ,MAAM,GAAG,GAAG,YAAY,CAAC,6CAA6C,EAAE,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;QACxF,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;QAC1B,IAAI,KAAK,GAAG,CAAC,IAAI,KAAK,GAAG,OAAO;YAAE,OAAO,KAAK,CAAC;IACjD,CAAC;IAAC,MAAM,CAAC,CAAC,mBAAmB,CAAC,CAAC;IAC/B,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,2BAA2B,CAAC,IAAmB;IAC7D,MAAM,QAAQ,GAAG,kBAAkB,EAAE,CAAC;IACtC,MAAM,iBAAiB,GAAG,QAAQ,GAAG,iBAAiB,CAAC;IACvD,MAAM,eAAe,GAAG,iBAAiB,GAAG,IAAI,CAAC,aAAa,CAAC;IAE/D,IAAI,eAAe,IAAI,CAAC,EAAE,CAAC;QACzB,MAAM,CAAC,IAAI,CAAC,4BAA4B,CAAC,IAAI,CAAC,aAAa,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,qDAAqD,CAAC,CAAC;QACnJ,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,aAAa,GAAG,oBAAoB,CAAC,IAAI,CAAC,CAAC;IACjD,mGAAmG;IACnG,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,eAAe,GAAG,GAAG,CAAC,GAAG,aAAa,CAAC,CAAC;IACxE,+FAA+F;IAC/F,MAAM,eAAe,GAAG,KAAK,CAAC;IAC9B,MAAM,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC;IAErC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,SAAS,EAAE,eAAe,CAAC,CAAC;IAClE,sDAAsD;IACtD,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;IAEjE,MAAM,CAAC,IAAI,CAAC,qBAAqB,CAAC,QAAQ,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,aAAa,CAAC,eAAe,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,mCAAmC,aAAa,wBAAwB,WAAW,cAAc,SAAS,WAAW,eAAe,eAAe,MAAM,EAAE,CAAC,CAAC;IACtS,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,UAAU,CAAC,GAAW,EAAE,GAAW,EAAE,KAAa;IACzD,QAAQ,KAAK,EAAE,CAAC;QACd,KAAK,CAAC,CAAC,CAAC,OAAO,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;QAClC,KAAK,CAAC,CAAC,CAAC,OAAO,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;QACjC,KAAK,CAAC,CAAC,CAAC,OAAO,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;QACrC,KAAK,CAAC,CAAC,CAAC,OAAO,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;QACpC,KAAK,CAAC,CAAC,CAAC,OAAO,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;QACrC,KAAK,CAAC,CAAC,CAAC,OAAO,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;QACpC,KAAK,CAAC,CAAC,CAAC,OAAO,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;QACpC,KAAK,CAAC,CAAC,CAAC,OAAO,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;QAClC,KAAK,EAAE,CAAC,CAAC,OAAO,MAAM,CAAC,GAAG,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC;QACjD,KAAK,EAAE,CAAC,CAAC,OAAO,MAAM,CAAC,GAAG,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC;QAChD,KAAK,EAAE,CAAC,CAAC,OAAO,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;QACtC,OAAO,CAAC,CAAC,OAAO,IAAI,CAAC;IACvB,CAAC;AACH,CAAC;AAED,SAAS,SAAS,CAAC,GAAW,EAAE,GAAW,EAAE,KAAa,EAAE,KAAa;IACvE,QAAQ,KAAK,EAAE,CAAC;QACd,KAAK,CAAC,CAAC;QAAC,KAAK,CAAC,CAAC;QAAC,KAAK,CAAC,CAAC,CAAC,OAAO,GAAG,GAAG,CAAC,CAAC;QACvC,KAAK,CAAC,CAAC;QAAC,KAAK,CAAC,CAAC,CAAC,OAAO,GAAG,GAAG,CAAC,CAAC;QAC/B,KAAK,CAAC,CAAC;QAAC,KAAK,CAAC,CAAC;QAAC,KAAK,CAAC,CAAC,CAAC,OAAO,GAAG,GAAG,CAAC,CAAC;QACvC,KAAK,EAAE,CAAC;QAAC,KAAK,EAAE,CAAC;QAAC,KAAK,EAAE,CAAC,CAAC,OAAO,GAAG,GAAG,CAAC,CAAC;QAC1C,KAAK,CAAC,CAAC,CAAC,CAAC;YACP,IAAI,GAAG,GAAG,CAAC,GAAG,KAAK;gBAAE,OAAO,CAAC,CAAC,CAAC;YAC/B,MAAM,GAAG,GAAG,MAAM,CAAC,GAAG,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC;YAC7C,OAAO,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;QACvB,CAAC;QACD,KAAK,CAAC,CAAC,CAAC,CAAC;YACP,IAAI,GAAG,GAAG,EAAE,GAAG,KAAK;gBAAE,OAAO,CAAC,CAAC,CAAC;YAChC,MAAM,QAAQ,GAAG,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;YACvC,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,eAAe,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;YACnD,GAAG,IAAI,EAAE,CAAC;YACV,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,IAAI,GAAG,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC9C,GAAG,GAAG,SAAS,CAAC,GAAG,EAAE,GAAG,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC;gBAC3C,IAAI,GAAG,GAAG,CAAC;oBAAE,OAAO,CAAC,CAAC,CAAC;YACzB,CAAC;YACD,OAAO,GAAG,CAAC;QACb,CAAC;QACD,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;IACrB,CAAC;AACH,CAAC"}
@@ -0,0 +1,9 @@
1
+ export { ModelManager } from './model-manager.ts';
2
+ export { engineRegistry } from './engine-registry.ts';
3
+ export type { LocalEngine, EngineStatus, EngineChatStatus, EngineServerStatus, LoadOptions } from './engine-interface.ts';
4
+ export { getBinaryPath, detectGpu } from './binary-manager.ts';
5
+ export type { GpuInfo } from './binary-manager.ts';
6
+ export { LlamaServerProcess } from './llama-server-process.ts';
7
+ export { MlxServerProcess, killOrphanedMlxServers } from './mlx-server-process.ts';
8
+ export type { LocalModel, LocalLlmStatus, HuggingFaceModelResult, HuggingFaceGgufFile, DownloadProgress, ActiveDownload, InterruptedDownload, LocalLlmState, } from './types.ts';
9
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../lib/local-llm/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,YAAY,EAAE,WAAW,EAAE,YAAY,EAAE,gBAAgB,EAAE,kBAAkB,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AAC1H,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAC/D,YAAY,EAAE,OAAO,EAAE,MAAM,qBAAqB,CAAC;AACnD,OAAO,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAC/D,OAAO,EAAE,gBAAgB,EAAE,sBAAsB,EAAE,MAAM,yBAAyB,CAAC;AACnF,YAAY,EACV,UAAU,EACV,cAAc,EACd,sBAAsB,EACtB,mBAAmB,EACnB,gBAAgB,EAChB,cAAc,EACd,mBAAmB,EACnB,aAAa,GACd,MAAM,YAAY,CAAC"}
@@ -0,0 +1,6 @@
1
+ export { ModelManager } from "./model-manager.js";
2
+ export { engineRegistry } from "./engine-registry.js";
3
+ export { getBinaryPath, detectGpu } from "./binary-manager.js";
4
+ export { LlamaServerProcess } from "./llama-server-process.js";
5
+ export { MlxServerProcess, killOrphanedMlxServers } from "./mlx-server-process.js";
6
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../lib/local-llm/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAEtD,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAE/D,OAAO,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAC/D,OAAO,EAAE,gBAAgB,EAAE,sBAAsB,EAAE,MAAM,yBAAyB,CAAC"}