web-agent-bridge 3.4.0 → 3.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (315) hide show
  1. package/LICENSE +84 -84
  2. package/README.ar.md +1565 -1304
  3. package/README.md +171 -298
  4. package/bin/agent-runner.js +474 -474
  5. package/bin/cli.js +237 -237
  6. package/bin/wab-init.js +244 -223
  7. package/bin/wab.js +80 -80
  8. package/examples/azure-dns-wab.js +83 -83
  9. package/examples/bidi-agent.js +119 -119
  10. package/examples/cloudflare-wab-dns.js +121 -121
  11. package/examples/cpanel-wab-dns.js +114 -114
  12. package/examples/cross-site-agent.js +91 -91
  13. package/examples/dns-discovery-agent.js +166 -166
  14. package/examples/gcp-dns-wab.js +76 -76
  15. package/examples/governance-agent.js +169 -169
  16. package/examples/mcp-agent.js +94 -94
  17. package/examples/next-app-router/README.md +44 -44
  18. package/examples/plesk-wab-dns.js +103 -103
  19. package/examples/puppeteer-agent.js +108 -108
  20. package/examples/route53-wab-dns.js +144 -144
  21. package/examples/saas-dashboard/README.md +55 -55
  22. package/examples/safe-mode-agent.js +96 -96
  23. package/examples/self-discovery.js +106 -0
  24. package/examples/shopify-hydrogen/README.md +74 -74
  25. package/examples/vision-agent.js +171 -171
  26. package/examples/wab-sign.js +74 -74
  27. package/examples/wab-verify.js +60 -60
  28. package/examples/wordpress-elementor/README.md +77 -77
  29. package/package.json +93 -93
  30. package/public/.well-known/agent-tools.json +180 -180
  31. package/public/.well-known/ai-assets.json +59 -59
  32. package/public/.well-known/security.txt +8 -8
  33. package/public/.well-known/wab.json +28 -28
  34. package/public/activate.html +448 -368
  35. package/public/adopt.html +236 -0
  36. package/public/adoption-metrics.html +188 -188
  37. package/public/agent-workspace.html +359 -349
  38. package/public/ai.html +198 -198
  39. package/public/api.html +397 -413
  40. package/public/atp.html +171 -0
  41. package/public/azure-dns-integration.html +289 -289
  42. package/public/browser.html +486 -486
  43. package/public/cloudflare-integration.html +380 -380
  44. package/public/commander-dashboard.html +243 -243
  45. package/public/cookies.html +210 -210
  46. package/public/cpanel-integration.html +398 -398
  47. package/public/css/agent-workspace.css +1713 -1713
  48. package/public/css/premium.css +317 -317
  49. package/public/css/styles.css +1401 -1263
  50. package/public/dashboard-shieldlink.html +295 -0
  51. package/public/dashboard.html +711 -707
  52. package/public/dns.html +436 -436
  53. package/public/docs.html +588 -588
  54. package/public/enterprise-mesh.ar.html +80 -0
  55. package/public/enterprise-mesh.html +81 -0
  56. package/public/feed.xml +89 -89
  57. package/public/gcp-dns-integration.html +318 -318
  58. package/public/governance.ar.html +70 -0
  59. package/public/governance.html +69 -0
  60. package/public/growth.html +465 -465
  61. package/public/index.html +1372 -1266
  62. package/public/integrations.html +556 -556
  63. package/public/js/activate.js +449 -145
  64. package/public/js/agent-workspace.js +1740 -1740
  65. package/public/js/auth-nav.js +117 -65
  66. package/public/js/auth-redirect.js +12 -12
  67. package/public/js/cookie-consent.js +56 -56
  68. package/public/js/dns.js +438 -438
  69. package/public/js/wab-demo-page.js +721 -721
  70. package/public/js/ws-client.js +74 -74
  71. package/public/l-preview.html +242 -0
  72. package/public/llms-full.txt +360 -360
  73. package/public/llms.txt +125 -125
  74. package/public/login.html +85 -85
  75. package/public/mesh-dashboard.html +328 -328
  76. package/public/milestones.html +346 -0
  77. package/public/one-click.html +779 -0
  78. package/public/openapi.json +669 -669
  79. package/public/partners.ar.html +145 -0
  80. package/public/partners.html +143 -0
  81. package/public/phone-shield.html +281 -281
  82. package/public/plesk-integration.html +375 -375
  83. package/public/premium-dashboard.html +2489 -2489
  84. package/public/premium.html +793 -793
  85. package/public/privacy.html +297 -297
  86. package/public/provider-onboarding.html +172 -172
  87. package/public/provider-sandbox.html +134 -134
  88. package/public/providers.html +359 -359
  89. package/public/refusals.html +172 -0
  90. package/public/register.html +105 -105
  91. package/public/registrar-integrations.html +141 -141
  92. package/public/ring4.html +292 -0
  93. package/public/robots.txt +99 -99
  94. package/public/route53-integration.html +531 -531
  95. package/public/score.html +263 -0
  96. package/public/script/wab-consent.d.ts +36 -36
  97. package/public/script/wab-consent.js +104 -104
  98. package/public/script/wab-schema.js +131 -131
  99. package/public/script/wab.d.ts +108 -108
  100. package/public/script/wab.min.js +580 -580
  101. package/public/security.txt +8 -8
  102. package/public/shieldlink.html +244 -0
  103. package/public/shieldqr.html +231 -231
  104. package/public/sitemap.xml +13 -1
  105. package/public/terms.html +256 -256
  106. package/public/trust-graph-api.ar.html +92 -0
  107. package/public/trust-graph-api.html +91 -0
  108. package/public/wab-features.html +560 -0
  109. package/public/wab-trust.html +200 -200
  110. package/public/wab-truth.html +375 -0
  111. package/public/wab-vs-protocols.html +210 -210
  112. package/public/whitepaper.html +449 -449
  113. package/script/ai-agent-bridge.js +1754 -1754
  114. package/sdk/README.md +99 -99
  115. package/sdk/agent-mesh.js +449 -449
  116. package/sdk/atp.js +103 -0
  117. package/sdk/auto-discovery.js +301 -288
  118. package/sdk/commander.js +262 -262
  119. package/sdk/governance.js +262 -262
  120. package/sdk/index.d.ts +464 -464
  121. package/sdk/index.js +653 -649
  122. package/sdk/multi-agent.js +318 -318
  123. package/sdk/safe-mode.js +221 -221
  124. package/sdk/safety-shield.js +219 -219
  125. package/sdk/schema-discovery.js +83 -83
  126. package/server/adapters/index.js +520 -520
  127. package/server/config/plans.js +412 -367
  128. package/server/config/secrets.js +102 -102
  129. package/server/control-plane/index.js +301 -301
  130. package/server/data-plane/index.js +354 -354
  131. package/server/index.js +793 -670
  132. package/server/llm/index.js +404 -404
  133. package/server/middleware/adminAuth.js +35 -35
  134. package/server/middleware/api-tier.js +170 -0
  135. package/server/middleware/auth.js +50 -50
  136. package/server/middleware/featureGate.js +88 -88
  137. package/server/middleware/rateLimits.js +100 -100
  138. package/server/middleware/sensitiveAction.js +157 -157
  139. package/server/middleware/wab-trust.js +141 -0
  140. package/server/migrations/001_add_analytics_indexes.sql +7 -7
  141. package/server/migrations/002_premium_features.sql +418 -418
  142. package/server/migrations/003_ads_integer_cents.sql +33 -33
  143. package/server/migrations/004_agent_os.sql +158 -158
  144. package/server/migrations/005_marketplace_metering.sql +126 -126
  145. package/server/migrations/006_growth_suite.sql +138 -0
  146. package/server/migrations/007_governance.sql +106 -106
  147. package/server/migrations/008_plans.sql +144 -144
  148. package/server/migrations/009_shieldqr.sql +30 -30
  149. package/server/migrations/010_extended_trust.sql +33 -33
  150. package/server/migrations/011_outreach.sql +47 -0
  151. package/server/migrations/012_shieldlink.sql +116 -0
  152. package/server/migrations/013_ct_monitor.sql +13 -0
  153. package/server/migrations/014_wab_advanced_features.sql +128 -0
  154. package/server/migrations/015_wab_truth_layer.sql +101 -0
  155. package/server/migrations/016_ring4_external_trust.sql +84 -0
  156. package/server/migrations/017_ring4_extensions.sql +69 -0
  157. package/server/migrations/018_commercial_foundations.sql +167 -0
  158. package/server/migrations/019_unify_tier_constraints.sql +133 -0
  159. package/server/migrations/020_agent_transaction_primitive.sql +119 -0
  160. package/server/models/adapters/index.js +33 -33
  161. package/server/models/adapters/mysql.js +183 -183
  162. package/server/models/adapters/postgresql.js +172 -172
  163. package/server/models/adapters/sqlite.js +7 -7
  164. package/server/models/db.js +740 -740
  165. package/server/observability/failure-analysis.js +337 -337
  166. package/server/observability/index.js +394 -394
  167. package/server/protocol/capabilities.js +223 -223
  168. package/server/protocol/index.js +243 -243
  169. package/server/protocol/schema.js +584 -584
  170. package/server/registry/certification.js +271 -271
  171. package/server/registry/index.js +326 -326
  172. package/server/routes/activate.js +478 -0
  173. package/server/routes/admin-outreach.js +239 -0
  174. package/server/routes/admin-plans.js +76 -76
  175. package/server/routes/admin-premium.js +674 -673
  176. package/server/routes/admin-shieldlink.js +137 -0
  177. package/server/routes/admin-shieldqr.js +90 -90
  178. package/server/routes/admin-trust-monitor.js +139 -83
  179. package/server/routes/admin.js +550 -549
  180. package/server/routes/adopt.js +61 -0
  181. package/server/routes/ads.js +130 -130
  182. package/server/routes/agent-workspace.js +540 -540
  183. package/server/routes/api-keys.js +127 -0
  184. package/server/routes/api.js +150 -150
  185. package/server/routes/auth.js +71 -71
  186. package/server/routes/billing.js +57 -57
  187. package/server/routes/commander.js +316 -316
  188. package/server/routes/customer-shieldlink.js +133 -0
  189. package/server/routes/demo-showcase.js +332 -332
  190. package/server/routes/demo-store.js +154 -154
  191. package/server/routes/diagnose.js +373 -0
  192. package/server/routes/discovery.js +2348 -2348
  193. package/server/routes/enterprise-mesh.js +170 -0
  194. package/server/routes/gateway.js +173 -173
  195. package/server/routes/governance-saas.js +203 -0
  196. package/server/routes/governance.js +208 -208
  197. package/server/routes/growth.js +1048 -0
  198. package/server/routes/intent.js +328 -0
  199. package/server/routes/license.js +251 -251
  200. package/server/routes/mesh.js +469 -469
  201. package/server/routes/noscript.js +543 -543
  202. package/server/routes/partners.js +201 -0
  203. package/server/routes/plans.js +33 -33
  204. package/server/routes/premium-v2.js +686 -686
  205. package/server/routes/premium.js +724 -724
  206. package/server/routes/providers.js +650 -650
  207. package/server/routes/reputation.js +411 -0
  208. package/server/routes/ring4.js +885 -0
  209. package/server/routes/runtime.js +2148 -2148
  210. package/server/routes/shieldlink.js +70 -0
  211. package/server/routes/shieldqr.js +88 -88
  212. package/server/routes/sovereign.js +465 -465
  213. package/server/routes/transactions.js +233 -0
  214. package/server/routes/truth-layer.js +670 -0
  215. package/server/routes/universal.js +200 -200
  216. package/server/routes/unsubscribe.js +51 -0
  217. package/server/routes/wab-api.js +850 -850
  218. package/server/routes/wab-cache.js +282 -0
  219. package/server/runtime/container-worker.js +111 -111
  220. package/server/runtime/container.js +448 -448
  221. package/server/runtime/distributed-worker.js +362 -362
  222. package/server/runtime/event-bus.js +210 -210
  223. package/server/runtime/index.js +253 -253
  224. package/server/runtime/queue.js +599 -599
  225. package/server/runtime/replay.js +666 -666
  226. package/server/runtime/sandbox.js +266 -266
  227. package/server/runtime/scheduler.js +534 -534
  228. package/server/runtime/session-engine.js +293 -293
  229. package/server/runtime/state-manager.js +188 -188
  230. package/server/secrets/wab-signing-key.pem +3 -0
  231. package/server/secrets/wab-signing-pub.pem +3 -0
  232. package/server/security/cross-site-redactor.js +196 -196
  233. package/server/security/dry-run.js +180 -180
  234. package/server/security/human-gate-rate-limit.js +147 -147
  235. package/server/security/human-gate-transports.js +178 -178
  236. package/server/security/human-gate.js +281 -281
  237. package/server/security/index.js +368 -368
  238. package/server/security/intent-engine.js +245 -245
  239. package/server/security/reward-guard.js +171 -171
  240. package/server/security/rollback-store.js +239 -239
  241. package/server/security/token-scope.js +404 -404
  242. package/server/security/url-policy.js +139 -139
  243. package/server/services/adoption-agent.js +182 -0
  244. package/server/services/agent-chat.js +506 -506
  245. package/server/services/agent-learning.js +601 -601
  246. package/server/services/agent-memory.js +625 -625
  247. package/server/services/agent-mesh.js +555 -555
  248. package/server/services/agent-symphony.js +717 -717
  249. package/server/services/agent-tasks.js +1807 -1807
  250. package/server/services/api-key-engine.js +292 -292
  251. package/server/services/cluster.js +894 -894
  252. package/server/services/commander.js +738 -738
  253. package/server/services/edge-compute.js +440 -440
  254. package/server/services/email.js +233 -233
  255. package/server/services/fairness-engine.js +409 -0
  256. package/server/services/fairness.js +420 -0
  257. package/server/services/governance.js +466 -466
  258. package/server/services/hosted-runtime.js +205 -205
  259. package/server/services/lfd.js +635 -635
  260. package/server/services/local-ai.js +389 -389
  261. package/server/services/marketplace.js +270 -270
  262. package/server/services/metering.js +182 -182
  263. package/server/services/modules/affiliate-intelligence.js +93 -93
  264. package/server/services/modules/agent-firewall.js +90 -90
  265. package/server/services/modules/bounty.js +89 -89
  266. package/server/services/modules/collective-bargaining.js +92 -92
  267. package/server/services/modules/dark-pattern.js +66 -66
  268. package/server/services/modules/gov-intelligence.js +45 -45
  269. package/server/services/modules/neural.js +55 -55
  270. package/server/services/modules/notary.js +49 -49
  271. package/server/services/modules/price-time-machine.js +86 -86
  272. package/server/services/modules/protocol.js +104 -104
  273. package/server/services/negotiation.js +439 -439
  274. package/server/services/outreach-agent.js +312 -0
  275. package/server/services/plans.js +214 -214
  276. package/server/services/plugins.js +771 -771
  277. package/server/services/price-intelligence.js +566 -566
  278. package/server/services/price-shield.js +1137 -1137
  279. package/server/services/provider-clients.js +740 -740
  280. package/server/services/reputation.js +465 -465
  281. package/server/services/search-engine.js +357 -357
  282. package/server/services/security.js +513 -513
  283. package/server/services/self-healing.js +843 -843
  284. package/server/services/shieldlink.js +492 -0
  285. package/server/services/shieldqr.js +322 -322
  286. package/server/services/sovereign-shield.js +542 -542
  287. package/server/services/ssl-ct-monitor.js +224 -0
  288. package/server/services/ssl-inspector.js +42 -42
  289. package/server/services/ssl-monitor.js +167 -167
  290. package/server/services/stripe.js +206 -205
  291. package/server/services/swarm.js +788 -788
  292. package/server/services/transactions.js +525 -0
  293. package/server/services/universal-scraper.js +662 -662
  294. package/server/services/verification.js +481 -481
  295. package/server/services/vision.js +1163 -1163
  296. package/server/services/wab-crypto.js +178 -178
  297. package/server/utils/cache.js +125 -125
  298. package/server/utils/migrate.js +81 -81
  299. package/server/utils/safe-fetch.js +228 -228
  300. package/server/utils/secureFields.js +50 -50
  301. package/server/ws.js +161 -161
  302. package/templates/artisan-marketplace.yaml +104 -104
  303. package/templates/book-price-scout.yaml +98 -98
  304. package/templates/electronics-price-tracker.yaml +108 -108
  305. package/templates/flight-deal-hunter.yaml +113 -113
  306. package/templates/freelancer-direct.yaml +116 -116
  307. package/templates/grocery-price-compare.yaml +93 -93
  308. package/templates/hotel-direct-booking.yaml +113 -113
  309. package/templates/local-services.yaml +98 -98
  310. package/templates/olive-oil-tunisia.yaml +88 -88
  311. package/templates/organic-farm-fresh.yaml +101 -101
  312. package/templates/restaurant-direct.yaml +97 -97
  313. package/templates/ring4/banking-sovereign.yaml +55 -0
  314. package/templates/ring4/ecommerce-sovereign.yaml +58 -0
  315. package/templates/ring4/healthcare-sovereign.yaml +60 -0
@@ -1,389 +1,389 @@
1
- /**
2
- * Local AI — Sovereign Intelligence Runtime
3
- *
4
- * Manages local AI models running on the user's own hardware.
5
- * Auto-detects Ollama, llama.cpp, and any OpenAI-compatible local endpoint.
6
- * Routes inference requests to the best available model based on capability,
7
- * context window, and current load.
8
- *
9
- * Supported Providers:
10
- * - Ollama (http://localhost:11434)
11
- * - llama.cpp server (http://localhost:8080)
12
- * - Custom OpenAI-compatible endpoints
13
- *
14
- * All inference happens locally. No data leaves the device.
15
- */
16
-
17
- const crypto = require('crypto');
18
- const { db } = require('../models/db');
19
-
20
- // ─── Schema ──────────────────────────────────────────────────────────
21
-
22
- db.exec(`
23
- CREATE TABLE IF NOT EXISTS local_models (
24
- id TEXT PRIMARY KEY,
25
- site_id TEXT NOT NULL,
26
- provider TEXT NOT NULL,
27
- model_name TEXT NOT NULL,
28
- endpoint TEXT NOT NULL,
29
- capabilities TEXT DEFAULT '["text"]',
30
- context_window INTEGER DEFAULT 4096,
31
- parameters TEXT DEFAULT '{}',
32
- status TEXT DEFAULT 'available',
33
- total_requests INTEGER DEFAULT 0,
34
- total_tokens INTEGER DEFAULT 0,
35
- avg_latency_ms REAL DEFAULT 0,
36
- last_used TEXT,
37
- last_probe TEXT,
38
- created_at TEXT DEFAULT (datetime('now')),
39
- UNIQUE(site_id, provider, model_name)
40
- );
41
-
42
- CREATE TABLE IF NOT EXISTS local_inference_log (
43
- id TEXT PRIMARY KEY,
44
- model_id TEXT NOT NULL,
45
- task_type TEXT,
46
- prompt_tokens INTEGER DEFAULT 0,
47
- completion_tokens INTEGER DEFAULT 0,
48
- latency_ms INTEGER DEFAULT 0,
49
- success INTEGER DEFAULT 1,
50
- created_at TEXT DEFAULT (datetime('now'))
51
- );
52
-
53
- CREATE INDEX IF NOT EXISTS idx_local_models_site ON local_models(site_id);
54
- CREATE INDEX IF NOT EXISTS idx_local_models_status ON local_models(status);
55
- CREATE INDEX IF NOT EXISTS idx_local_inference_model ON local_inference_log(model_id);
56
- `);
57
-
58
- // ─── Prepared Statements ─────────────────────────────────────────────
59
-
60
- const stmts = {
61
- upsertModel: db.prepare("INSERT INTO local_models (id, site_id, provider, model_name, endpoint, capabilities, context_window, parameters) VALUES (?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(site_id, provider, model_name) DO UPDATE SET endpoint = ?, capabilities = ?, context_window = ?, parameters = ?, status = 'available', last_probe = datetime('now')"),
62
- getModel: db.prepare('SELECT * FROM local_models WHERE id = ?'),
63
- getModels: db.prepare('SELECT * FROM local_models WHERE site_id = ? ORDER BY provider, model_name'),
64
- getAvailableModels: db.prepare("SELECT * FROM local_models WHERE site_id = ? AND status = 'available' ORDER BY avg_latency_ms ASC"),
65
- getModelsByCapability: db.prepare("SELECT * FROM local_models WHERE site_id = ? AND status = 'available' AND capabilities LIKE ? ORDER BY avg_latency_ms ASC"),
66
- updateModelStatus: db.prepare('UPDATE local_models SET status = ?, last_probe = datetime(\'now\') WHERE id = ?'),
67
- updateModelStats: db.prepare("UPDATE local_models SET total_requests = total_requests + 1, total_tokens = total_tokens + ?, avg_latency_ms = (avg_latency_ms * total_requests + ?) / (total_requests + 1), last_used = datetime('now') WHERE id = ?"),
68
- insertLog: db.prepare('INSERT INTO local_inference_log (id, model_id, task_type, prompt_tokens, completion_tokens, latency_ms, success) VALUES (?, ?, ?, ?, ?, ?, ?)'),
69
- getStats: db.prepare(`SELECT
70
- (SELECT COUNT(*) FROM local_models WHERE site_id = ? AND status = 'available') as available_models,
71
- (SELECT COUNT(*) FROM local_models WHERE site_id = ?) as total_models,
72
- (SELECT SUM(total_requests) FROM local_models WHERE site_id = ?) as total_requests,
73
- (SELECT SUM(total_tokens) FROM local_models WHERE site_id = ?) as total_tokens,
74
- (SELECT AVG(avg_latency_ms) FROM local_models WHERE site_id = ? AND status = 'available') as avg_latency`),
75
- };
76
-
77
- // ─── Default Provider Endpoints ──────────────────────────────────────
78
-
79
- const PROVIDERS = {
80
- ollama: { name: 'ollama', baseUrl: 'http://localhost:11434', tagsPath: '/api/tags', chatPath: '/api/chat', generatePath: '/api/generate' },
81
- llamacpp: { name: 'llamacpp', baseUrl: 'http://localhost:8080', chatPath: '/v1/chat/completions', modelsPath: '/v1/models' },
82
- };
83
-
84
- // ─── Model Discovery ─────────────────────────────────────────────────
85
-
86
- /**
87
- * Probe local endpoints and register discovered models.
88
- */
89
- async function discoverModels(siteId, customEndpoints = []) {
90
- const discovered = [];
91
-
92
- // Probe Ollama
93
- try {
94
- const ollamaModels = await _probeOllama(PROVIDERS.ollama.baseUrl);
95
- for (const m of ollamaModels) {
96
- const result = _registerModel(siteId, 'ollama', m.name, PROVIDERS.ollama.baseUrl, m.capabilities, m.contextWindow, m.parameters);
97
- discovered.push(result);
98
- }
99
- } catch (_) { /* Ollama not running */ }
100
-
101
- // Probe llama.cpp
102
- try {
103
- const lcModels = await _probeLlamaCpp(PROVIDERS.llamacpp.baseUrl);
104
- for (const m of lcModels) {
105
- const result = _registerModel(siteId, 'llamacpp', m.name, PROVIDERS.llamacpp.baseUrl, m.capabilities, m.contextWindow, m.parameters);
106
- discovered.push(result);
107
- }
108
- } catch (_) { /* llama.cpp not running */ }
109
-
110
- // Probe custom endpoints
111
- for (const ep of customEndpoints) {
112
- try {
113
- const models = await _probeOpenAICompatible(ep.url);
114
- for (const m of models) {
115
- const result = _registerModel(siteId, ep.name || 'custom', m.name, ep.url, m.capabilities, m.contextWindow, m.parameters);
116
- discovered.push(result);
117
- }
118
- } catch (_) { /* endpoint not available */ }
119
- }
120
-
121
- return { discovered: discovered.length, models: discovered };
122
- }
123
-
124
- /**
125
- * Register a model manually.
126
- */
127
- function registerModel(siteId, provider, modelName, endpoint, capabilities = ['text'], contextWindow = 4096) {
128
- return _registerModel(siteId, provider, modelName, endpoint, capabilities, contextWindow, {});
129
- }
130
-
131
- function _registerModel(siteId, provider, modelName, endpoint, capabilities, contextWindow, parameters) {
132
- const id = crypto.randomUUID();
133
- const caps = JSON.stringify(capabilities);
134
- const params = JSON.stringify(parameters);
135
-
136
- stmts.upsertModel.run(id, siteId, provider, modelName, endpoint, caps, contextWindow, params, endpoint, caps, contextWindow, params);
137
- return { id, provider, modelName, endpoint, capabilities, contextWindow };
138
- }
139
-
140
- // ─── Inference ───────────────────────────────────────────────────────
141
-
142
- /**
143
- * Run inference on the best available local model.
144
- * @param {string} siteId
145
- * @param {string} prompt - The user prompt
146
- * @param {object} options - { capability, model, systemPrompt, temperature, maxTokens, stream }
147
- */
148
- async function infer(siteId, prompt, options = {}) {
149
- const capability = options.capability || 'text';
150
-
151
- // Select model
152
- let model;
153
- if (options.modelId) {
154
- model = stmts.getModel.get(options.modelId);
155
- if (!model || model.status !== 'available') throw new Error('Selected model unavailable');
156
- } else {
157
- const candidates = stmts.getModelsByCapability.all(siteId, `%${capability}%`);
158
- if (candidates.length === 0) throw new Error(`No local model available for capability: ${capability}`);
159
- model = candidates[0]; // Fastest by avg latency
160
- }
161
-
162
- const start = Date.now();
163
- let result;
164
-
165
- try {
166
- const parsed = JSON.parse(model.parameters || '{}');
167
- if (model.provider === 'ollama') {
168
- result = await _inferOllama(model, prompt, options);
169
- } else if (model.provider === 'llamacpp') {
170
- result = await _inferLlamaCpp(model, prompt, options);
171
- } else {
172
- result = await _inferOpenAICompatible(model, prompt, options);
173
- }
174
- } catch (err) {
175
- const latency = Date.now() - start;
176
- stmts.insertLog.run(crypto.randomUUID(), model.id, capability, 0, 0, latency, 0);
177
- throw err;
178
- }
179
-
180
- const latency = Date.now() - start;
181
- const totalTokens = (result.promptTokens || 0) + (result.completionTokens || 0);
182
-
183
- stmts.updateModelStats.run(totalTokens, latency, model.id);
184
- stmts.insertLog.run(crypto.randomUUID(), model.id, capability, result.promptTokens || 0, result.completionTokens || 0, latency, 1);
185
-
186
- return {
187
- modelId: model.id,
188
- provider: model.provider,
189
- model: model.model_name,
190
- response: result.text,
191
- promptTokens: result.promptTokens || 0,
192
- completionTokens: result.completionTokens || 0,
193
- latencyMs: latency,
194
- };
195
- }
196
-
197
- // ─── Model Management ────────────────────────────────────────────────
198
-
199
- function getModels(siteId) {
200
- return stmts.getModels.all(siteId).map(_deserializeModel);
201
- }
202
-
203
- function getAvailableModels(siteId) {
204
- return stmts.getAvailableModels.all(siteId).map(_deserializeModel);
205
- }
206
-
207
- function getModel(modelId) {
208
- const row = stmts.getModel.get(modelId);
209
- return row ? _deserializeModel(row) : null;
210
- }
211
-
212
- function updateModelStatus(modelId, status) {
213
- stmts.updateModelStatus.run(status, modelId);
214
- }
215
-
216
- function getStats(siteId) {
217
- const row = stmts.getStats.get(siteId, siteId, siteId, siteId, siteId);
218
- return {
219
- availableModels: row.available_models || 0,
220
- totalModels: row.total_models || 0,
221
- totalRequests: row.total_requests || 0,
222
- totalTokens: row.total_tokens || 0,
223
- avgLatency: row.avg_latency ? Math.round(row.avg_latency) : 0,
224
- };
225
- }
226
-
227
- // ─── Provider-Specific Inference ─────────────────────────────────────
228
-
229
- async function _inferOllama(model, prompt, options) {
230
- const body = {
231
- model: model.model_name,
232
- messages: [],
233
- stream: false,
234
- options: {},
235
- };
236
-
237
- if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
238
- body.messages.push({ role: 'user', content: prompt });
239
- if (options.temperature != null) body.options.temperature = options.temperature;
240
-
241
- const res = await fetch(`${model.endpoint}/api/chat`, {
242
- method: 'POST',
243
- headers: { 'Content-Type': 'application/json' },
244
- body: JSON.stringify(body),
245
- signal: AbortSignal.timeout(options.timeout || 120000),
246
- });
247
-
248
- if (!res.ok) throw new Error(`Ollama error: ${res.status}`);
249
- const data = await res.json();
250
-
251
- return {
252
- text: data.message?.content || '',
253
- promptTokens: data.prompt_eval_count || 0,
254
- completionTokens: data.eval_count || 0,
255
- };
256
- }
257
-
258
- async function _inferLlamaCpp(model, prompt, options) {
259
- const body = {
260
- model: model.model_name,
261
- messages: [],
262
- max_tokens: options.maxTokens || 2048,
263
- stream: false,
264
- };
265
-
266
- if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
267
- body.messages.push({ role: 'user', content: prompt });
268
- if (options.temperature != null) body.temperature = options.temperature;
269
-
270
- const res = await fetch(`${model.endpoint}/v1/chat/completions`, {
271
- method: 'POST',
272
- headers: { 'Content-Type': 'application/json' },
273
- body: JSON.stringify(body),
274
- signal: AbortSignal.timeout(options.timeout || 120000),
275
- });
276
-
277
- if (!res.ok) throw new Error(`llama.cpp error: ${res.status}`);
278
- const data = await res.json();
279
-
280
- return {
281
- text: data.choices?.[0]?.message?.content || '',
282
- promptTokens: data.usage?.prompt_tokens || 0,
283
- completionTokens: data.usage?.completion_tokens || 0,
284
- };
285
- }
286
-
287
- async function _inferOpenAICompatible(model, prompt, options) {
288
- const body = {
289
- model: model.model_name,
290
- messages: [],
291
- max_tokens: options.maxTokens || 2048,
292
- stream: false,
293
- };
294
-
295
- if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
296
- body.messages.push({ role: 'user', content: prompt });
297
- if (options.temperature != null) body.temperature = options.temperature;
298
-
299
- const res = await fetch(`${model.endpoint}/v1/chat/completions`, {
300
- method: 'POST',
301
- headers: { 'Content-Type': 'application/json' },
302
- body: JSON.stringify(body),
303
- signal: AbortSignal.timeout(options.timeout || 120000),
304
- });
305
-
306
- if (!res.ok) throw new Error(`Inference error: ${res.status}`);
307
- const data = await res.json();
308
-
309
- return {
310
- text: data.choices?.[0]?.message?.content || '',
311
- promptTokens: data.usage?.prompt_tokens || 0,
312
- completionTokens: data.usage?.completion_tokens || 0,
313
- };
314
- }
315
-
316
- // ─── Provider Probing ────────────────────────────────────────────────
317
-
318
- async function _probeOllama(baseUrl) {
319
- const res = await fetch(`${baseUrl}/api/tags`, { signal: AbortSignal.timeout(5000) });
320
- if (!res.ok) return [];
321
- const data = await res.json();
322
- return (data.models || []).map(m => ({
323
- name: m.name,
324
- capabilities: _detectCapabilities(m.name),
325
- contextWindow: m.details?.parameter_size ? _estimateContext(m.details.parameter_size) : 4096,
326
- parameters: { size: m.size, family: m.details?.family },
327
- }));
328
- }
329
-
330
- async function _probeLlamaCpp(baseUrl) {
331
- const res = await fetch(`${baseUrl}/v1/models`, { signal: AbortSignal.timeout(5000) });
332
- if (!res.ok) return [];
333
- const data = await res.json();
334
- return (data.data || []).map(m => ({
335
- name: m.id,
336
- capabilities: _detectCapabilities(m.id),
337
- contextWindow: 4096,
338
- parameters: {},
339
- }));
340
- }
341
-
342
- async function _probeOpenAICompatible(baseUrl) {
343
- const res = await fetch(`${baseUrl}/v1/models`, { signal: AbortSignal.timeout(5000) });
344
- if (!res.ok) return [];
345
- const data = await res.json();
346
- return (data.data || []).map(m => ({
347
- name: m.id,
348
- capabilities: _detectCapabilities(m.id),
349
- contextWindow: 4096,
350
- parameters: {},
351
- }));
352
- }
353
-
354
- // ─── Helpers ─────────────────────────────────────────────────────────
355
-
356
- function _detectCapabilities(modelName) {
357
- const n = modelName.toLowerCase();
358
- const caps = ['text'];
359
- if (n.includes('vision') || n.includes('llava') || n.includes('bakllava')) caps.push('vision');
360
- if (n.includes('code') || n.includes('codellama') || n.includes('deepseek-coder') || n.includes('starcoder')) caps.push('code');
361
- if (n.includes('embed') || n.includes('nomic')) caps.push('embedding');
362
- if (n.includes('mistral') || n.includes('mixtral')) caps.push('reasoning');
363
- return caps;
364
- }
365
-
366
- function _estimateContext(paramSize) {
367
- // Rough estimate: smaller models typically have smaller context
368
- if (typeof paramSize === 'string') {
369
- const num = parseFloat(paramSize);
370
- if (num >= 70) return 32768;
371
- if (num >= 13) return 8192;
372
- return 4096;
373
- }
374
- return 4096;
375
- }
376
-
377
- function _deserializeModel(row) {
378
- return {
379
- ...row,
380
- capabilities: JSON.parse(row.capabilities || '["text"]'),
381
- parameters: JSON.parse(row.parameters || '{}'),
382
- };
383
- }
384
-
385
- module.exports = {
386
- discoverModels, registerModel, infer,
387
- getModels, getAvailableModels, getModel, updateModelStatus,
388
- getStats,
389
- };
1
+ /**
2
+ * Local AI — Sovereign Intelligence Runtime
3
+ *
4
+ * Manages local AI models running on the user's own hardware.
5
+ * Auto-detects Ollama, llama.cpp, and any OpenAI-compatible local endpoint.
6
+ * Routes inference requests to the best available model based on capability,
7
+ * context window, and current load.
8
+ *
9
+ * Supported Providers:
10
+ * - Ollama (http://localhost:11434)
11
+ * - llama.cpp server (http://localhost:8080)
12
+ * - Custom OpenAI-compatible endpoints
13
+ *
14
+ * All inference happens locally. No data leaves the device.
15
+ */
16
+
17
+ const crypto = require('crypto');
18
+ const { db } = require('../models/db');
19
+
20
+ // ─── Schema ──────────────────────────────────────────────────────────
21
+
22
+ db.exec(`
23
+ CREATE TABLE IF NOT EXISTS local_models (
24
+ id TEXT PRIMARY KEY,
25
+ site_id TEXT NOT NULL,
26
+ provider TEXT NOT NULL,
27
+ model_name TEXT NOT NULL,
28
+ endpoint TEXT NOT NULL,
29
+ capabilities TEXT DEFAULT '["text"]',
30
+ context_window INTEGER DEFAULT 4096,
31
+ parameters TEXT DEFAULT '{}',
32
+ status TEXT DEFAULT 'available',
33
+ total_requests INTEGER DEFAULT 0,
34
+ total_tokens INTEGER DEFAULT 0,
35
+ avg_latency_ms REAL DEFAULT 0,
36
+ last_used TEXT,
37
+ last_probe TEXT,
38
+ created_at TEXT DEFAULT (datetime('now')),
39
+ UNIQUE(site_id, provider, model_name)
40
+ );
41
+
42
+ CREATE TABLE IF NOT EXISTS local_inference_log (
43
+ id TEXT PRIMARY KEY,
44
+ model_id TEXT NOT NULL,
45
+ task_type TEXT,
46
+ prompt_tokens INTEGER DEFAULT 0,
47
+ completion_tokens INTEGER DEFAULT 0,
48
+ latency_ms INTEGER DEFAULT 0,
49
+ success INTEGER DEFAULT 1,
50
+ created_at TEXT DEFAULT (datetime('now'))
51
+ );
52
+
53
+ CREATE INDEX IF NOT EXISTS idx_local_models_site ON local_models(site_id);
54
+ CREATE INDEX IF NOT EXISTS idx_local_models_status ON local_models(status);
55
+ CREATE INDEX IF NOT EXISTS idx_local_inference_model ON local_inference_log(model_id);
56
+ `);
57
+
58
+ // ─── Prepared Statements ─────────────────────────────────────────────
59
+
60
+ const stmts = {
61
+ upsertModel: db.prepare("INSERT INTO local_models (id, site_id, provider, model_name, endpoint, capabilities, context_window, parameters) VALUES (?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(site_id, provider, model_name) DO UPDATE SET endpoint = ?, capabilities = ?, context_window = ?, parameters = ?, status = 'available', last_probe = datetime('now')"),
62
+ getModel: db.prepare('SELECT * FROM local_models WHERE id = ?'),
63
+ getModels: db.prepare('SELECT * FROM local_models WHERE site_id = ? ORDER BY provider, model_name'),
64
+ getAvailableModels: db.prepare("SELECT * FROM local_models WHERE site_id = ? AND status = 'available' ORDER BY avg_latency_ms ASC"),
65
+ getModelsByCapability: db.prepare("SELECT * FROM local_models WHERE site_id = ? AND status = 'available' AND capabilities LIKE ? ORDER BY avg_latency_ms ASC"),
66
+ updateModelStatus: db.prepare('UPDATE local_models SET status = ?, last_probe = datetime(\'now\') WHERE id = ?'),
67
+ updateModelStats: db.prepare("UPDATE local_models SET total_requests = total_requests + 1, total_tokens = total_tokens + ?, avg_latency_ms = (avg_latency_ms * total_requests + ?) / (total_requests + 1), last_used = datetime('now') WHERE id = ?"),
68
+ insertLog: db.prepare('INSERT INTO local_inference_log (id, model_id, task_type, prompt_tokens, completion_tokens, latency_ms, success) VALUES (?, ?, ?, ?, ?, ?, ?)'),
69
+ getStats: db.prepare(`SELECT
70
+ (SELECT COUNT(*) FROM local_models WHERE site_id = ? AND status = 'available') as available_models,
71
+ (SELECT COUNT(*) FROM local_models WHERE site_id = ?) as total_models,
72
+ (SELECT SUM(total_requests) FROM local_models WHERE site_id = ?) as total_requests,
73
+ (SELECT SUM(total_tokens) FROM local_models WHERE site_id = ?) as total_tokens,
74
+ (SELECT AVG(avg_latency_ms) FROM local_models WHERE site_id = ? AND status = 'available') as avg_latency`),
75
+ };
76
+
77
+ // ─── Default Provider Endpoints ──────────────────────────────────────
78
+
79
+ const PROVIDERS = {
80
+ ollama: { name: 'ollama', baseUrl: 'http://localhost:11434', tagsPath: '/api/tags', chatPath: '/api/chat', generatePath: '/api/generate' },
81
+ llamacpp: { name: 'llamacpp', baseUrl: 'http://localhost:8080', chatPath: '/v1/chat/completions', modelsPath: '/v1/models' },
82
+ };
83
+
84
+ // ─── Model Discovery ─────────────────────────────────────────────────
85
+
86
+ /**
87
+ * Probe local endpoints and register discovered models.
88
+ */
89
+ async function discoverModels(siteId, customEndpoints = []) {
90
+ const discovered = [];
91
+
92
+ // Probe Ollama
93
+ try {
94
+ const ollamaModels = await _probeOllama(PROVIDERS.ollama.baseUrl);
95
+ for (const m of ollamaModels) {
96
+ const result = _registerModel(siteId, 'ollama', m.name, PROVIDERS.ollama.baseUrl, m.capabilities, m.contextWindow, m.parameters);
97
+ discovered.push(result);
98
+ }
99
+ } catch (_) { /* Ollama not running */ }
100
+
101
+ // Probe llama.cpp
102
+ try {
103
+ const lcModels = await _probeLlamaCpp(PROVIDERS.llamacpp.baseUrl);
104
+ for (const m of lcModels) {
105
+ const result = _registerModel(siteId, 'llamacpp', m.name, PROVIDERS.llamacpp.baseUrl, m.capabilities, m.contextWindow, m.parameters);
106
+ discovered.push(result);
107
+ }
108
+ } catch (_) { /* llama.cpp not running */ }
109
+
110
+ // Probe custom endpoints
111
+ for (const ep of customEndpoints) {
112
+ try {
113
+ const models = await _probeOpenAICompatible(ep.url);
114
+ for (const m of models) {
115
+ const result = _registerModel(siteId, ep.name || 'custom', m.name, ep.url, m.capabilities, m.contextWindow, m.parameters);
116
+ discovered.push(result);
117
+ }
118
+ } catch (_) { /* endpoint not available */ }
119
+ }
120
+
121
+ return { discovered: discovered.length, models: discovered };
122
+ }
123
+
124
+ /**
125
+ * Register a model manually.
126
+ */
127
+ function registerModel(siteId, provider, modelName, endpoint, capabilities = ['text'], contextWindow = 4096) {
128
+ return _registerModel(siteId, provider, modelName, endpoint, capabilities, contextWindow, {});
129
+ }
130
+
131
+ function _registerModel(siteId, provider, modelName, endpoint, capabilities, contextWindow, parameters) {
132
+ const id = crypto.randomUUID();
133
+ const caps = JSON.stringify(capabilities);
134
+ const params = JSON.stringify(parameters);
135
+
136
+ stmts.upsertModel.run(id, siteId, provider, modelName, endpoint, caps, contextWindow, params, endpoint, caps, contextWindow, params);
137
+ return { id, provider, modelName, endpoint, capabilities, contextWindow };
138
+ }
139
+
140
+ // ─── Inference ───────────────────────────────────────────────────────
141
+
142
+ /**
143
+ * Run inference on the best available local model.
144
+ * @param {string} siteId
145
+ * @param {string} prompt - The user prompt
146
+ * @param {object} options - { capability, model, systemPrompt, temperature, maxTokens, stream }
147
+ */
148
+ async function infer(siteId, prompt, options = {}) {
149
+ const capability = options.capability || 'text';
150
+
151
+ // Select model
152
+ let model;
153
+ if (options.modelId) {
154
+ model = stmts.getModel.get(options.modelId);
155
+ if (!model || model.status !== 'available') throw new Error('Selected model unavailable');
156
+ } else {
157
+ const candidates = stmts.getModelsByCapability.all(siteId, `%${capability}%`);
158
+ if (candidates.length === 0) throw new Error(`No local model available for capability: ${capability}`);
159
+ model = candidates[0]; // Fastest by avg latency
160
+ }
161
+
162
+ const start = Date.now();
163
+ let result;
164
+
165
+ try {
166
+ const parsed = JSON.parse(model.parameters || '{}');
167
+ if (model.provider === 'ollama') {
168
+ result = await _inferOllama(model, prompt, options);
169
+ } else if (model.provider === 'llamacpp') {
170
+ result = await _inferLlamaCpp(model, prompt, options);
171
+ } else {
172
+ result = await _inferOpenAICompatible(model, prompt, options);
173
+ }
174
+ } catch (err) {
175
+ const latency = Date.now() - start;
176
+ stmts.insertLog.run(crypto.randomUUID(), model.id, capability, 0, 0, latency, 0);
177
+ throw err;
178
+ }
179
+
180
+ const latency = Date.now() - start;
181
+ const totalTokens = (result.promptTokens || 0) + (result.completionTokens || 0);
182
+
183
+ stmts.updateModelStats.run(totalTokens, latency, model.id);
184
+ stmts.insertLog.run(crypto.randomUUID(), model.id, capability, result.promptTokens || 0, result.completionTokens || 0, latency, 1);
185
+
186
+ return {
187
+ modelId: model.id,
188
+ provider: model.provider,
189
+ model: model.model_name,
190
+ response: result.text,
191
+ promptTokens: result.promptTokens || 0,
192
+ completionTokens: result.completionTokens || 0,
193
+ latencyMs: latency,
194
+ };
195
+ }
196
+
197
+ // ─── Model Management ────────────────────────────────────────────────
198
+
199
+ function getModels(siteId) {
200
+ return stmts.getModels.all(siteId).map(_deserializeModel);
201
+ }
202
+
203
+ function getAvailableModels(siteId) {
204
+ return stmts.getAvailableModels.all(siteId).map(_deserializeModel);
205
+ }
206
+
207
+ function getModel(modelId) {
208
+ const row = stmts.getModel.get(modelId);
209
+ return row ? _deserializeModel(row) : null;
210
+ }
211
+
212
+ function updateModelStatus(modelId, status) {
213
+ stmts.updateModelStatus.run(status, modelId);
214
+ }
215
+
216
+ function getStats(siteId) {
217
+ const row = stmts.getStats.get(siteId, siteId, siteId, siteId, siteId);
218
+ return {
219
+ availableModels: row.available_models || 0,
220
+ totalModels: row.total_models || 0,
221
+ totalRequests: row.total_requests || 0,
222
+ totalTokens: row.total_tokens || 0,
223
+ avgLatency: row.avg_latency ? Math.round(row.avg_latency) : 0,
224
+ };
225
+ }
226
+
227
+ // ─── Provider-Specific Inference ─────────────────────────────────────
228
+
229
+ async function _inferOllama(model, prompt, options) {
230
+ const body = {
231
+ model: model.model_name,
232
+ messages: [],
233
+ stream: false,
234
+ options: {},
235
+ };
236
+
237
+ if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
238
+ body.messages.push({ role: 'user', content: prompt });
239
+ if (options.temperature != null) body.options.temperature = options.temperature;
240
+
241
+ const res = await fetch(`${model.endpoint}/api/chat`, {
242
+ method: 'POST',
243
+ headers: { 'Content-Type': 'application/json' },
244
+ body: JSON.stringify(body),
245
+ signal: AbortSignal.timeout(options.timeout || 120000),
246
+ });
247
+
248
+ if (!res.ok) throw new Error(`Ollama error: ${res.status}`);
249
+ const data = await res.json();
250
+
251
+ return {
252
+ text: data.message?.content || '',
253
+ promptTokens: data.prompt_eval_count || 0,
254
+ completionTokens: data.eval_count || 0,
255
+ };
256
+ }
257
+
258
+ async function _inferLlamaCpp(model, prompt, options) {
259
+ const body = {
260
+ model: model.model_name,
261
+ messages: [],
262
+ max_tokens: options.maxTokens || 2048,
263
+ stream: false,
264
+ };
265
+
266
+ if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
267
+ body.messages.push({ role: 'user', content: prompt });
268
+ if (options.temperature != null) body.temperature = options.temperature;
269
+
270
+ const res = await fetch(`${model.endpoint}/v1/chat/completions`, {
271
+ method: 'POST',
272
+ headers: { 'Content-Type': 'application/json' },
273
+ body: JSON.stringify(body),
274
+ signal: AbortSignal.timeout(options.timeout || 120000),
275
+ });
276
+
277
+ if (!res.ok) throw new Error(`llama.cpp error: ${res.status}`);
278
+ const data = await res.json();
279
+
280
+ return {
281
+ text: data.choices?.[0]?.message?.content || '',
282
+ promptTokens: data.usage?.prompt_tokens || 0,
283
+ completionTokens: data.usage?.completion_tokens || 0,
284
+ };
285
+ }
286
+
287
+ async function _inferOpenAICompatible(model, prompt, options) {
288
+ const body = {
289
+ model: model.model_name,
290
+ messages: [],
291
+ max_tokens: options.maxTokens || 2048,
292
+ stream: false,
293
+ };
294
+
295
+ if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
296
+ body.messages.push({ role: 'user', content: prompt });
297
+ if (options.temperature != null) body.temperature = options.temperature;
298
+
299
+ const res = await fetch(`${model.endpoint}/v1/chat/completions`, {
300
+ method: 'POST',
301
+ headers: { 'Content-Type': 'application/json' },
302
+ body: JSON.stringify(body),
303
+ signal: AbortSignal.timeout(options.timeout || 120000),
304
+ });
305
+
306
+ if (!res.ok) throw new Error(`Inference error: ${res.status}`);
307
+ const data = await res.json();
308
+
309
+ return {
310
+ text: data.choices?.[0]?.message?.content || '',
311
+ promptTokens: data.usage?.prompt_tokens || 0,
312
+ completionTokens: data.usage?.completion_tokens || 0,
313
+ };
314
+ }
315
+
316
+ // ─── Provider Probing ────────────────────────────────────────────────
317
+
318
+ async function _probeOllama(baseUrl) {
319
+ const res = await fetch(`${baseUrl}/api/tags`, { signal: AbortSignal.timeout(5000) });
320
+ if (!res.ok) return [];
321
+ const data = await res.json();
322
+ return (data.models || []).map(m => ({
323
+ name: m.name,
324
+ capabilities: _detectCapabilities(m.name),
325
+ contextWindow: m.details?.parameter_size ? _estimateContext(m.details.parameter_size) : 4096,
326
+ parameters: { size: m.size, family: m.details?.family },
327
+ }));
328
+ }
329
+
330
+ async function _probeLlamaCpp(baseUrl) {
331
+ const res = await fetch(`${baseUrl}/v1/models`, { signal: AbortSignal.timeout(5000) });
332
+ if (!res.ok) return [];
333
+ const data = await res.json();
334
+ return (data.data || []).map(m => ({
335
+ name: m.id,
336
+ capabilities: _detectCapabilities(m.id),
337
+ contextWindow: 4096,
338
+ parameters: {},
339
+ }));
340
+ }
341
+
342
+ async function _probeOpenAICompatible(baseUrl) {
343
+ const res = await fetch(`${baseUrl}/v1/models`, { signal: AbortSignal.timeout(5000) });
344
+ if (!res.ok) return [];
345
+ const data = await res.json();
346
+ return (data.data || []).map(m => ({
347
+ name: m.id,
348
+ capabilities: _detectCapabilities(m.id),
349
+ contextWindow: 4096,
350
+ parameters: {},
351
+ }));
352
+ }
353
+
354
+ // ─── Helpers ─────────────────────────────────────────────────────────
355
+
356
+ function _detectCapabilities(modelName) {
357
+ const n = modelName.toLowerCase();
358
+ const caps = ['text'];
359
+ if (n.includes('vision') || n.includes('llava') || n.includes('bakllava')) caps.push('vision');
360
+ if (n.includes('code') || n.includes('codellama') || n.includes('deepseek-coder') || n.includes('starcoder')) caps.push('code');
361
+ if (n.includes('embed') || n.includes('nomic')) caps.push('embedding');
362
+ if (n.includes('mistral') || n.includes('mixtral')) caps.push('reasoning');
363
+ return caps;
364
+ }
365
+
366
+ function _estimateContext(paramSize) {
367
+ // Rough estimate: smaller models typically have smaller context
368
+ if (typeof paramSize === 'string') {
369
+ const num = parseFloat(paramSize);
370
+ if (num >= 70) return 32768;
371
+ if (num >= 13) return 8192;
372
+ return 4096;
373
+ }
374
+ return 4096;
375
+ }
376
+
377
+ function _deserializeModel(row) {
378
+ return {
379
+ ...row,
380
+ capabilities: JSON.parse(row.capabilities || '["text"]'),
381
+ parameters: JSON.parse(row.parameters || '{}'),
382
+ };
383
+ }
384
+
385
+ module.exports = {
386
+ discoverModels, registerModel, infer,
387
+ getModels, getAvailableModels, getModel, updateModelStatus,
388
+ getStats,
389
+ };