web-agent-bridge 3.3.0 → 3.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (312) hide show
  1. package/LICENSE +84 -72
  2. package/README.ar.md +1563 -1286
  3. package/README.md +137 -1764
  4. package/bin/agent-runner.js +474 -474
  5. package/bin/cli.js +237 -237
  6. package/bin/wab-init.js +244 -0
  7. package/bin/wab.js +80 -80
  8. package/examples/azure-dns-wab.js +83 -0
  9. package/examples/bidi-agent.js +119 -119
  10. package/examples/cloudflare-wab-dns.js +121 -0
  11. package/examples/cpanel-wab-dns.js +114 -0
  12. package/examples/cross-site-agent.js +91 -91
  13. package/examples/dns-discovery-agent.js +166 -0
  14. package/examples/gcp-dns-wab.js +76 -0
  15. package/examples/governance-agent.js +169 -0
  16. package/examples/mcp-agent.js +94 -94
  17. package/examples/next-app-router/README.md +44 -44
  18. package/examples/plesk-wab-dns.js +103 -0
  19. package/examples/puppeteer-agent.js +108 -108
  20. package/examples/route53-wab-dns.js +144 -0
  21. package/examples/saas-dashboard/README.md +55 -55
  22. package/examples/safe-mode-agent.js +96 -0
  23. package/examples/self-discovery.js +106 -0
  24. package/examples/shopify-hydrogen/README.md +74 -74
  25. package/examples/vision-agent.js +171 -171
  26. package/examples/wab-sign.js +74 -0
  27. package/examples/wab-verify.js +60 -0
  28. package/examples/wordpress-elementor/README.md +77 -77
  29. package/package.json +93 -93
  30. package/public/.well-known/agent-tools.json +180 -180
  31. package/public/.well-known/ai-assets.json +59 -59
  32. package/public/.well-known/security.txt +8 -8
  33. package/public/.well-known/wab.json +28 -0
  34. package/public/activate.html +448 -0
  35. package/public/adopt.html +236 -0
  36. package/public/adoption-metrics.html +188 -0
  37. package/public/agent-workspace.html +359 -349
  38. package/public/ai.html +198 -198
  39. package/public/api.html +397 -413
  40. package/public/azure-dns-integration.html +289 -0
  41. package/public/browser.html +486 -486
  42. package/public/cloudflare-integration.html +380 -0
  43. package/public/commander-dashboard.html +243 -243
  44. package/public/cookies.html +210 -210
  45. package/public/cpanel-integration.html +398 -0
  46. package/public/css/agent-workspace.css +1713 -1713
  47. package/public/css/premium.css +317 -317
  48. package/public/css/styles.css +1401 -1235
  49. package/public/dashboard-shieldlink.html +295 -0
  50. package/public/dashboard.html +711 -706
  51. package/public/dns.html +436 -507
  52. package/public/docs.html +588 -587
  53. package/public/enterprise-mesh.ar.html +80 -0
  54. package/public/enterprise-mesh.html +81 -0
  55. package/public/feed.xml +89 -89
  56. package/public/gcp-dns-integration.html +318 -0
  57. package/public/governance.ar.html +70 -0
  58. package/public/governance.html +69 -0
  59. package/public/growth.html +465 -463
  60. package/public/index.html +1372 -1070
  61. package/public/integrations.html +556 -556
  62. package/public/js/activate.js +449 -0
  63. package/public/js/agent-workspace.js +1740 -1740
  64. package/public/js/auth-nav.js +117 -31
  65. package/public/js/auth-redirect.js +12 -12
  66. package/public/js/cookie-consent.js +56 -56
  67. package/public/js/dns.js +438 -0
  68. package/public/js/wab-demo-page.js +721 -721
  69. package/public/js/ws-client.js +74 -74
  70. package/public/l-preview.html +242 -0
  71. package/public/llms-full.txt +360 -360
  72. package/public/llms.txt +125 -125
  73. package/public/login.html +85 -85
  74. package/public/mesh-dashboard.html +328 -328
  75. package/public/milestones.html +346 -0
  76. package/public/one-click.html +779 -0
  77. package/public/openapi.json +669 -580
  78. package/public/partners.ar.html +145 -0
  79. package/public/partners.html +143 -0
  80. package/public/phone-shield.html +281 -281
  81. package/public/plesk-integration.html +375 -0
  82. package/public/premium-dashboard.html +2489 -2489
  83. package/public/premium.html +793 -793
  84. package/public/privacy.html +297 -297
  85. package/public/provider-onboarding.html +172 -0
  86. package/public/provider-sandbox.html +134 -0
  87. package/public/providers.html +359 -0
  88. package/public/refusals.html +172 -0
  89. package/public/register.html +105 -105
  90. package/public/registrar-integrations.html +141 -0
  91. package/public/ring4.html +292 -0
  92. package/public/robots.txt +99 -87
  93. package/public/route53-integration.html +531 -0
  94. package/public/score.html +263 -0
  95. package/public/script/wab-consent.d.ts +36 -36
  96. package/public/script/wab-consent.js +104 -104
  97. package/public/script/wab-schema.js +131 -131
  98. package/public/script/wab.d.ts +108 -108
  99. package/public/script/wab.min.js +580 -580
  100. package/public/security.txt +8 -8
  101. package/public/shieldlink.html +244 -0
  102. package/public/shieldqr.html +231 -0
  103. package/public/sitemap.xml +19 -1
  104. package/public/terms.html +256 -256
  105. package/public/trust-graph-api.ar.html +92 -0
  106. package/public/trust-graph-api.html +91 -0
  107. package/public/wab-features.html +560 -0
  108. package/public/wab-trust.html +200 -0
  109. package/public/wab-truth.html +375 -0
  110. package/public/wab-vs-protocols.html +210 -0
  111. package/public/whitepaper.html +449 -0
  112. package/script/ai-agent-bridge.js +1754 -1754
  113. package/sdk/README.md +99 -99
  114. package/sdk/agent-mesh.js +449 -449
  115. package/sdk/auto-discovery.js +301 -0
  116. package/sdk/commander.js +262 -262
  117. package/sdk/governance.js +262 -0
  118. package/sdk/index.d.ts +464 -464
  119. package/sdk/index.js +649 -636
  120. package/sdk/multi-agent.js +318 -318
  121. package/sdk/package.json +2 -2
  122. package/sdk/safe-mode.js +221 -0
  123. package/sdk/safety-shield.js +219 -219
  124. package/sdk/schema-discovery.js +83 -83
  125. package/server/adapters/index.js +520 -520
  126. package/server/config/plans.js +412 -367
  127. package/server/config/secrets.js +102 -102
  128. package/server/control-plane/index.js +301 -301
  129. package/server/data-plane/index.js +354 -354
  130. package/server/index.js +790 -531
  131. package/server/llm/index.js +404 -404
  132. package/server/middleware/adminAuth.js +35 -35
  133. package/server/middleware/api-tier.js +170 -0
  134. package/server/middleware/auth.js +50 -50
  135. package/server/middleware/featureGate.js +88 -88
  136. package/server/middleware/rateLimits.js +100 -100
  137. package/server/middleware/sensitiveAction.js +157 -157
  138. package/server/middleware/wab-trust.js +141 -0
  139. package/server/migrations/001_add_analytics_indexes.sql +7 -7
  140. package/server/migrations/002_premium_features.sql +418 -418
  141. package/server/migrations/003_ads_integer_cents.sql +33 -33
  142. package/server/migrations/004_agent_os.sql +158 -158
  143. package/server/migrations/005_marketplace_metering.sql +126 -126
  144. package/server/migrations/006_growth_suite.sql +138 -0
  145. package/server/migrations/007_governance.sql +106 -0
  146. package/server/migrations/008_plans.sql +144 -0
  147. package/server/migrations/009_shieldqr.sql +30 -0
  148. package/server/migrations/010_extended_trust.sql +33 -0
  149. package/server/migrations/011_outreach.sql +47 -0
  150. package/server/migrations/012_shieldlink.sql +116 -0
  151. package/server/migrations/013_ct_monitor.sql +13 -0
  152. package/server/migrations/014_wab_advanced_features.sql +128 -0
  153. package/server/migrations/015_wab_truth_layer.sql +101 -0
  154. package/server/migrations/016_ring4_external_trust.sql +84 -0
  155. package/server/migrations/017_ring4_extensions.sql +69 -0
  156. package/server/migrations/018_commercial_foundations.sql +167 -0
  157. package/server/migrations/019_unify_tier_constraints.sql +133 -0
  158. package/server/models/adapters/index.js +33 -33
  159. package/server/models/adapters/mysql.js +183 -183
  160. package/server/models/adapters/postgresql.js +172 -172
  161. package/server/models/adapters/sqlite.js +7 -7
  162. package/server/models/db.js +740 -681
  163. package/server/observability/failure-analysis.js +337 -337
  164. package/server/observability/index.js +394 -394
  165. package/server/protocol/capabilities.js +223 -223
  166. package/server/protocol/index.js +243 -243
  167. package/server/protocol/schema.js +584 -584
  168. package/server/registry/certification.js +271 -271
  169. package/server/registry/index.js +326 -326
  170. package/server/routes/activate.js +478 -0
  171. package/server/routes/admin-outreach.js +239 -0
  172. package/server/routes/admin-plans.js +76 -0
  173. package/server/routes/admin-premium.js +674 -671
  174. package/server/routes/admin-shieldlink.js +137 -0
  175. package/server/routes/admin-shieldqr.js +90 -0
  176. package/server/routes/admin-trust-monitor.js +139 -0
  177. package/server/routes/admin.js +550 -261
  178. package/server/routes/adopt.js +61 -0
  179. package/server/routes/ads.js +130 -130
  180. package/server/routes/agent-workspace.js +540 -540
  181. package/server/routes/api-keys.js +127 -0
  182. package/server/routes/api.js +150 -150
  183. package/server/routes/auth.js +71 -71
  184. package/server/routes/billing.js +57 -45
  185. package/server/routes/commander.js +316 -316
  186. package/server/routes/customer-shieldlink.js +133 -0
  187. package/server/routes/demo-showcase.js +332 -332
  188. package/server/routes/demo-store.js +154 -154
  189. package/server/routes/diagnose.js +373 -0
  190. package/server/routes/discovery.js +2348 -417
  191. package/server/routes/enterprise-mesh.js +170 -0
  192. package/server/routes/gateway.js +173 -173
  193. package/server/routes/governance-saas.js +203 -0
  194. package/server/routes/governance.js +208 -0
  195. package/server/routes/growth.js +1048 -0
  196. package/server/routes/intent.js +328 -0
  197. package/server/routes/license.js +251 -251
  198. package/server/routes/mesh.js +469 -469
  199. package/server/routes/noscript.js +543 -543
  200. package/server/routes/partners.js +201 -0
  201. package/server/routes/plans.js +33 -0
  202. package/server/routes/premium-v2.js +686 -686
  203. package/server/routes/premium.js +724 -724
  204. package/server/routes/providers.js +650 -0
  205. package/server/routes/reputation.js +411 -0
  206. package/server/routes/ring4.js +885 -0
  207. package/server/routes/runtime.js +2148 -2148
  208. package/server/routes/shieldlink.js +70 -0
  209. package/server/routes/shieldqr.js +88 -0
  210. package/server/routes/sovereign.js +465 -465
  211. package/server/routes/truth-layer.js +670 -0
  212. package/server/routes/universal.js +200 -200
  213. package/server/routes/unsubscribe.js +51 -0
  214. package/server/routes/wab-api.js +850 -850
  215. package/server/routes/wab-cache.js +282 -0
  216. package/server/runtime/container-worker.js +111 -111
  217. package/server/runtime/container.js +448 -448
  218. package/server/runtime/distributed-worker.js +362 -362
  219. package/server/runtime/event-bus.js +210 -210
  220. package/server/runtime/index.js +253 -253
  221. package/server/runtime/queue.js +599 -599
  222. package/server/runtime/replay.js +666 -666
  223. package/server/runtime/sandbox.js +266 -266
  224. package/server/runtime/scheduler.js +534 -534
  225. package/server/runtime/session-engine.js +293 -293
  226. package/server/runtime/state-manager.js +188 -188
  227. package/server/secrets/wab-signing-key.pem +3 -0
  228. package/server/secrets/wab-signing-pub.pem +3 -0
  229. package/server/security/cross-site-redactor.js +196 -196
  230. package/server/security/dry-run.js +180 -180
  231. package/server/security/human-gate-rate-limit.js +147 -147
  232. package/server/security/human-gate-transports.js +178 -178
  233. package/server/security/human-gate.js +281 -281
  234. package/server/security/index.js +368 -368
  235. package/server/security/intent-engine.js +245 -245
  236. package/server/security/reward-guard.js +171 -171
  237. package/server/security/rollback-store.js +239 -239
  238. package/server/security/token-scope.js +404 -404
  239. package/server/security/url-policy.js +139 -139
  240. package/server/services/adoption-agent.js +182 -0
  241. package/server/services/agent-chat.js +506 -506
  242. package/server/services/agent-learning.js +601 -601
  243. package/server/services/agent-memory.js +625 -625
  244. package/server/services/agent-mesh.js +555 -555
  245. package/server/services/agent-symphony.js +717 -717
  246. package/server/services/agent-tasks.js +1807 -1807
  247. package/server/services/api-key-engine.js +292 -292
  248. package/server/services/cluster.js +894 -894
  249. package/server/services/commander.js +738 -738
  250. package/server/services/edge-compute.js +440 -440
  251. package/server/services/email.js +233 -204
  252. package/server/services/fairness-engine.js +409 -0
  253. package/server/services/fairness.js +420 -0
  254. package/server/services/governance.js +466 -0
  255. package/server/services/hosted-runtime.js +205 -205
  256. package/server/services/lfd.js +635 -635
  257. package/server/services/local-ai.js +389 -389
  258. package/server/services/marketplace.js +270 -270
  259. package/server/services/metering.js +182 -182
  260. package/server/services/modules/affiliate-intelligence.js +93 -93
  261. package/server/services/modules/agent-firewall.js +90 -90
  262. package/server/services/modules/bounty.js +89 -89
  263. package/server/services/modules/collective-bargaining.js +92 -92
  264. package/server/services/modules/dark-pattern.js +66 -66
  265. package/server/services/modules/gov-intelligence.js +45 -45
  266. package/server/services/modules/neural.js +55 -55
  267. package/server/services/modules/notary.js +49 -49
  268. package/server/services/modules/price-time-machine.js +86 -86
  269. package/server/services/modules/protocol.js +104 -104
  270. package/server/services/negotiation.js +439 -439
  271. package/server/services/outreach-agent.js +312 -0
  272. package/server/services/plans.js +214 -0
  273. package/server/services/plugins.js +771 -771
  274. package/server/services/premium.js +1 -1
  275. package/server/services/price-intelligence.js +566 -566
  276. package/server/services/price-shield.js +1137 -1137
  277. package/server/services/provider-clients.js +740 -0
  278. package/server/services/reputation.js +465 -465
  279. package/server/services/search-engine.js +357 -357
  280. package/server/services/security.js +513 -513
  281. package/server/services/self-healing.js +843 -843
  282. package/server/services/shieldlink.js +492 -0
  283. package/server/services/shieldqr.js +322 -0
  284. package/server/services/sovereign-shield.js +542 -542
  285. package/server/services/ssl-ct-monitor.js +224 -0
  286. package/server/services/ssl-inspector.js +42 -0
  287. package/server/services/ssl-monitor.js +167 -0
  288. package/server/services/stripe.js +206 -192
  289. package/server/services/swarm.js +788 -788
  290. package/server/services/universal-scraper.js +662 -662
  291. package/server/services/verification.js +481 -481
  292. package/server/services/vision.js +1163 -1163
  293. package/server/services/wab-crypto.js +178 -0
  294. package/server/utils/cache.js +125 -125
  295. package/server/utils/migrate.js +81 -81
  296. package/server/utils/safe-fetch.js +228 -228
  297. package/server/utils/secureFields.js +50 -50
  298. package/server/ws.js +161 -161
  299. package/templates/artisan-marketplace.yaml +104 -104
  300. package/templates/book-price-scout.yaml +98 -98
  301. package/templates/electronics-price-tracker.yaml +108 -108
  302. package/templates/flight-deal-hunter.yaml +113 -113
  303. package/templates/freelancer-direct.yaml +116 -116
  304. package/templates/grocery-price-compare.yaml +93 -93
  305. package/templates/hotel-direct-booking.yaml +113 -113
  306. package/templates/local-services.yaml +98 -98
  307. package/templates/olive-oil-tunisia.yaml +88 -88
  308. package/templates/organic-farm-fresh.yaml +101 -101
  309. package/templates/restaurant-direct.yaml +97 -97
  310. package/templates/ring4/banking-sovereign.yaml +55 -0
  311. package/templates/ring4/ecommerce-sovereign.yaml +58 -0
  312. package/templates/ring4/healthcare-sovereign.yaml +60 -0
@@ -1,389 +1,389 @@
1
- /**
2
- * Local AI — Sovereign Intelligence Runtime
3
- *
4
- * Manages local AI models running on the user's own hardware.
5
- * Auto-detects Ollama, llama.cpp, and any OpenAI-compatible local endpoint.
6
- * Routes inference requests to the best available model based on capability,
7
- * context window, and current load.
8
- *
9
- * Supported Providers:
10
- * - Ollama (http://localhost:11434)
11
- * - llama.cpp server (http://localhost:8080)
12
- * - Custom OpenAI-compatible endpoints
13
- *
14
- * All inference happens locally. No data leaves the device.
15
- */
16
-
17
- const crypto = require('crypto');
18
- const { db } = require('../models/db');
19
-
20
- // ─── Schema ──────────────────────────────────────────────────────────
21
-
22
- db.exec(`
23
- CREATE TABLE IF NOT EXISTS local_models (
24
- id TEXT PRIMARY KEY,
25
- site_id TEXT NOT NULL,
26
- provider TEXT NOT NULL,
27
- model_name TEXT NOT NULL,
28
- endpoint TEXT NOT NULL,
29
- capabilities TEXT DEFAULT '["text"]',
30
- context_window INTEGER DEFAULT 4096,
31
- parameters TEXT DEFAULT '{}',
32
- status TEXT DEFAULT 'available',
33
- total_requests INTEGER DEFAULT 0,
34
- total_tokens INTEGER DEFAULT 0,
35
- avg_latency_ms REAL DEFAULT 0,
36
- last_used TEXT,
37
- last_probe TEXT,
38
- created_at TEXT DEFAULT (datetime('now')),
39
- UNIQUE(site_id, provider, model_name)
40
- );
41
-
42
- CREATE TABLE IF NOT EXISTS local_inference_log (
43
- id TEXT PRIMARY KEY,
44
- model_id TEXT NOT NULL,
45
- task_type TEXT,
46
- prompt_tokens INTEGER DEFAULT 0,
47
- completion_tokens INTEGER DEFAULT 0,
48
- latency_ms INTEGER DEFAULT 0,
49
- success INTEGER DEFAULT 1,
50
- created_at TEXT DEFAULT (datetime('now'))
51
- );
52
-
53
- CREATE INDEX IF NOT EXISTS idx_local_models_site ON local_models(site_id);
54
- CREATE INDEX IF NOT EXISTS idx_local_models_status ON local_models(status);
55
- CREATE INDEX IF NOT EXISTS idx_local_inference_model ON local_inference_log(model_id);
56
- `);
57
-
58
- // ─── Prepared Statements ─────────────────────────────────────────────
59
-
60
- const stmts = {
61
- upsertModel: db.prepare("INSERT INTO local_models (id, site_id, provider, model_name, endpoint, capabilities, context_window, parameters) VALUES (?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(site_id, provider, model_name) DO UPDATE SET endpoint = ?, capabilities = ?, context_window = ?, parameters = ?, status = 'available', last_probe = datetime('now')"),
62
- getModel: db.prepare('SELECT * FROM local_models WHERE id = ?'),
63
- getModels: db.prepare('SELECT * FROM local_models WHERE site_id = ? ORDER BY provider, model_name'),
64
- getAvailableModels: db.prepare("SELECT * FROM local_models WHERE site_id = ? AND status = 'available' ORDER BY avg_latency_ms ASC"),
65
- getModelsByCapability: db.prepare("SELECT * FROM local_models WHERE site_id = ? AND status = 'available' AND capabilities LIKE ? ORDER BY avg_latency_ms ASC"),
66
- updateModelStatus: db.prepare('UPDATE local_models SET status = ?, last_probe = datetime(\'now\') WHERE id = ?'),
67
- updateModelStats: db.prepare("UPDATE local_models SET total_requests = total_requests + 1, total_tokens = total_tokens + ?, avg_latency_ms = (avg_latency_ms * total_requests + ?) / (total_requests + 1), last_used = datetime('now') WHERE id = ?"),
68
- insertLog: db.prepare('INSERT INTO local_inference_log (id, model_id, task_type, prompt_tokens, completion_tokens, latency_ms, success) VALUES (?, ?, ?, ?, ?, ?, ?)'),
69
- getStats: db.prepare(`SELECT
70
- (SELECT COUNT(*) FROM local_models WHERE site_id = ? AND status = 'available') as available_models,
71
- (SELECT COUNT(*) FROM local_models WHERE site_id = ?) as total_models,
72
- (SELECT SUM(total_requests) FROM local_models WHERE site_id = ?) as total_requests,
73
- (SELECT SUM(total_tokens) FROM local_models WHERE site_id = ?) as total_tokens,
74
- (SELECT AVG(avg_latency_ms) FROM local_models WHERE site_id = ? AND status = 'available') as avg_latency`),
75
- };
76
-
77
- // ─── Default Provider Endpoints ──────────────────────────────────────
78
-
79
- const PROVIDERS = {
80
- ollama: { name: 'ollama', baseUrl: 'http://localhost:11434', tagsPath: '/api/tags', chatPath: '/api/chat', generatePath: '/api/generate' },
81
- llamacpp: { name: 'llamacpp', baseUrl: 'http://localhost:8080', chatPath: '/v1/chat/completions', modelsPath: '/v1/models' },
82
- };
83
-
84
- // ─── Model Discovery ─────────────────────────────────────────────────
85
-
86
- /**
87
- * Probe local endpoints and register discovered models.
88
- */
89
- async function discoverModels(siteId, customEndpoints = []) {
90
- const discovered = [];
91
-
92
- // Probe Ollama
93
- try {
94
- const ollamaModels = await _probeOllama(PROVIDERS.ollama.baseUrl);
95
- for (const m of ollamaModels) {
96
- const result = _registerModel(siteId, 'ollama', m.name, PROVIDERS.ollama.baseUrl, m.capabilities, m.contextWindow, m.parameters);
97
- discovered.push(result);
98
- }
99
- } catch (_) { /* Ollama not running */ }
100
-
101
- // Probe llama.cpp
102
- try {
103
- const lcModels = await _probeLlamaCpp(PROVIDERS.llamacpp.baseUrl);
104
- for (const m of lcModels) {
105
- const result = _registerModel(siteId, 'llamacpp', m.name, PROVIDERS.llamacpp.baseUrl, m.capabilities, m.contextWindow, m.parameters);
106
- discovered.push(result);
107
- }
108
- } catch (_) { /* llama.cpp not running */ }
109
-
110
- // Probe custom endpoints
111
- for (const ep of customEndpoints) {
112
- try {
113
- const models = await _probeOpenAICompatible(ep.url);
114
- for (const m of models) {
115
- const result = _registerModel(siteId, ep.name || 'custom', m.name, ep.url, m.capabilities, m.contextWindow, m.parameters);
116
- discovered.push(result);
117
- }
118
- } catch (_) { /* endpoint not available */ }
119
- }
120
-
121
- return { discovered: discovered.length, models: discovered };
122
- }
123
-
124
- /**
125
- * Register a model manually.
126
- */
127
- function registerModel(siteId, provider, modelName, endpoint, capabilities = ['text'], contextWindow = 4096) {
128
- return _registerModel(siteId, provider, modelName, endpoint, capabilities, contextWindow, {});
129
- }
130
-
131
- function _registerModel(siteId, provider, modelName, endpoint, capabilities, contextWindow, parameters) {
132
- const id = crypto.randomUUID();
133
- const caps = JSON.stringify(capabilities);
134
- const params = JSON.stringify(parameters);
135
-
136
- stmts.upsertModel.run(id, siteId, provider, modelName, endpoint, caps, contextWindow, params, endpoint, caps, contextWindow, params);
137
- return { id, provider, modelName, endpoint, capabilities, contextWindow };
138
- }
139
-
140
- // ─── Inference ───────────────────────────────────────────────────────
141
-
142
- /**
143
- * Run inference on the best available local model.
144
- * @param {string} siteId
145
- * @param {string} prompt - The user prompt
146
- * @param {object} options - { capability, model, systemPrompt, temperature, maxTokens, stream }
147
- */
148
- async function infer(siteId, prompt, options = {}) {
149
- const capability = options.capability || 'text';
150
-
151
- // Select model
152
- let model;
153
- if (options.modelId) {
154
- model = stmts.getModel.get(options.modelId);
155
- if (!model || model.status !== 'available') throw new Error('Selected model unavailable');
156
- } else {
157
- const candidates = stmts.getModelsByCapability.all(siteId, `%${capability}%`);
158
- if (candidates.length === 0) throw new Error(`No local model available for capability: ${capability}`);
159
- model = candidates[0]; // Fastest by avg latency
160
- }
161
-
162
- const start = Date.now();
163
- let result;
164
-
165
- try {
166
- const parsed = JSON.parse(model.parameters || '{}');
167
- if (model.provider === 'ollama') {
168
- result = await _inferOllama(model, prompt, options);
169
- } else if (model.provider === 'llamacpp') {
170
- result = await _inferLlamaCpp(model, prompt, options);
171
- } else {
172
- result = await _inferOpenAICompatible(model, prompt, options);
173
- }
174
- } catch (err) {
175
- const latency = Date.now() - start;
176
- stmts.insertLog.run(crypto.randomUUID(), model.id, capability, 0, 0, latency, 0);
177
- throw err;
178
- }
179
-
180
- const latency = Date.now() - start;
181
- const totalTokens = (result.promptTokens || 0) + (result.completionTokens || 0);
182
-
183
- stmts.updateModelStats.run(totalTokens, latency, model.id);
184
- stmts.insertLog.run(crypto.randomUUID(), model.id, capability, result.promptTokens || 0, result.completionTokens || 0, latency, 1);
185
-
186
- return {
187
- modelId: model.id,
188
- provider: model.provider,
189
- model: model.model_name,
190
- response: result.text,
191
- promptTokens: result.promptTokens || 0,
192
- completionTokens: result.completionTokens || 0,
193
- latencyMs: latency,
194
- };
195
- }
196
-
197
- // ─── Model Management ────────────────────────────────────────────────
198
-
199
- function getModels(siteId) {
200
- return stmts.getModels.all(siteId).map(_deserializeModel);
201
- }
202
-
203
- function getAvailableModels(siteId) {
204
- return stmts.getAvailableModels.all(siteId).map(_deserializeModel);
205
- }
206
-
207
- function getModel(modelId) {
208
- const row = stmts.getModel.get(modelId);
209
- return row ? _deserializeModel(row) : null;
210
- }
211
-
212
- function updateModelStatus(modelId, status) {
213
- stmts.updateModelStatus.run(status, modelId);
214
- }
215
-
216
- function getStats(siteId) {
217
- const row = stmts.getStats.get(siteId, siteId, siteId, siteId, siteId);
218
- return {
219
- availableModels: row.available_models || 0,
220
- totalModels: row.total_models || 0,
221
- totalRequests: row.total_requests || 0,
222
- totalTokens: row.total_tokens || 0,
223
- avgLatency: row.avg_latency ? Math.round(row.avg_latency) : 0,
224
- };
225
- }
226
-
227
- // ─── Provider-Specific Inference ─────────────────────────────────────
228
-
229
- async function _inferOllama(model, prompt, options) {
230
- const body = {
231
- model: model.model_name,
232
- messages: [],
233
- stream: false,
234
- options: {},
235
- };
236
-
237
- if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
238
- body.messages.push({ role: 'user', content: prompt });
239
- if (options.temperature != null) body.options.temperature = options.temperature;
240
-
241
- const res = await fetch(`${model.endpoint}/api/chat`, {
242
- method: 'POST',
243
- headers: { 'Content-Type': 'application/json' },
244
- body: JSON.stringify(body),
245
- signal: AbortSignal.timeout(options.timeout || 120000),
246
- });
247
-
248
- if (!res.ok) throw new Error(`Ollama error: ${res.status}`);
249
- const data = await res.json();
250
-
251
- return {
252
- text: data.message?.content || '',
253
- promptTokens: data.prompt_eval_count || 0,
254
- completionTokens: data.eval_count || 0,
255
- };
256
- }
257
-
258
- async function _inferLlamaCpp(model, prompt, options) {
259
- const body = {
260
- model: model.model_name,
261
- messages: [],
262
- max_tokens: options.maxTokens || 2048,
263
- stream: false,
264
- };
265
-
266
- if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
267
- body.messages.push({ role: 'user', content: prompt });
268
- if (options.temperature != null) body.temperature = options.temperature;
269
-
270
- const res = await fetch(`${model.endpoint}/v1/chat/completions`, {
271
- method: 'POST',
272
- headers: { 'Content-Type': 'application/json' },
273
- body: JSON.stringify(body),
274
- signal: AbortSignal.timeout(options.timeout || 120000),
275
- });
276
-
277
- if (!res.ok) throw new Error(`llama.cpp error: ${res.status}`);
278
- const data = await res.json();
279
-
280
- return {
281
- text: data.choices?.[0]?.message?.content || '',
282
- promptTokens: data.usage?.prompt_tokens || 0,
283
- completionTokens: data.usage?.completion_tokens || 0,
284
- };
285
- }
286
-
287
- async function _inferOpenAICompatible(model, prompt, options) {
288
- const body = {
289
- model: model.model_name,
290
- messages: [],
291
- max_tokens: options.maxTokens || 2048,
292
- stream: false,
293
- };
294
-
295
- if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
296
- body.messages.push({ role: 'user', content: prompt });
297
- if (options.temperature != null) body.temperature = options.temperature;
298
-
299
- const res = await fetch(`${model.endpoint}/v1/chat/completions`, {
300
- method: 'POST',
301
- headers: { 'Content-Type': 'application/json' },
302
- body: JSON.stringify(body),
303
- signal: AbortSignal.timeout(options.timeout || 120000),
304
- });
305
-
306
- if (!res.ok) throw new Error(`Inference error: ${res.status}`);
307
- const data = await res.json();
308
-
309
- return {
310
- text: data.choices?.[0]?.message?.content || '',
311
- promptTokens: data.usage?.prompt_tokens || 0,
312
- completionTokens: data.usage?.completion_tokens || 0,
313
- };
314
- }
315
-
316
- // ─── Provider Probing ────────────────────────────────────────────────
317
-
318
- async function _probeOllama(baseUrl) {
319
- const res = await fetch(`${baseUrl}/api/tags`, { signal: AbortSignal.timeout(5000) });
320
- if (!res.ok) return [];
321
- const data = await res.json();
322
- return (data.models || []).map(m => ({
323
- name: m.name,
324
- capabilities: _detectCapabilities(m.name),
325
- contextWindow: m.details?.parameter_size ? _estimateContext(m.details.parameter_size) : 4096,
326
- parameters: { size: m.size, family: m.details?.family },
327
- }));
328
- }
329
-
330
- async function _probeLlamaCpp(baseUrl) {
331
- const res = await fetch(`${baseUrl}/v1/models`, { signal: AbortSignal.timeout(5000) });
332
- if (!res.ok) return [];
333
- const data = await res.json();
334
- return (data.data || []).map(m => ({
335
- name: m.id,
336
- capabilities: _detectCapabilities(m.id),
337
- contextWindow: 4096,
338
- parameters: {},
339
- }));
340
- }
341
-
342
- async function _probeOpenAICompatible(baseUrl) {
343
- const res = await fetch(`${baseUrl}/v1/models`, { signal: AbortSignal.timeout(5000) });
344
- if (!res.ok) return [];
345
- const data = await res.json();
346
- return (data.data || []).map(m => ({
347
- name: m.id,
348
- capabilities: _detectCapabilities(m.id),
349
- contextWindow: 4096,
350
- parameters: {},
351
- }));
352
- }
353
-
354
- // ─── Helpers ─────────────────────────────────────────────────────────
355
-
356
- function _detectCapabilities(modelName) {
357
- const n = modelName.toLowerCase();
358
- const caps = ['text'];
359
- if (n.includes('vision') || n.includes('llava') || n.includes('bakllava')) caps.push('vision');
360
- if (n.includes('code') || n.includes('codellama') || n.includes('deepseek-coder') || n.includes('starcoder')) caps.push('code');
361
- if (n.includes('embed') || n.includes('nomic')) caps.push('embedding');
362
- if (n.includes('mistral') || n.includes('mixtral')) caps.push('reasoning');
363
- return caps;
364
- }
365
-
366
- function _estimateContext(paramSize) {
367
- // Rough estimate: smaller models typically have smaller context
368
- if (typeof paramSize === 'string') {
369
- const num = parseFloat(paramSize);
370
- if (num >= 70) return 32768;
371
- if (num >= 13) return 8192;
372
- return 4096;
373
- }
374
- return 4096;
375
- }
376
-
377
- function _deserializeModel(row) {
378
- return {
379
- ...row,
380
- capabilities: JSON.parse(row.capabilities || '["text"]'),
381
- parameters: JSON.parse(row.parameters || '{}'),
382
- };
383
- }
384
-
385
- module.exports = {
386
- discoverModels, registerModel, infer,
387
- getModels, getAvailableModels, getModel, updateModelStatus,
388
- getStats,
389
- };
1
+ /**
2
+ * Local AI — Sovereign Intelligence Runtime
3
+ *
4
+ * Manages local AI models running on the user's own hardware.
5
+ * Auto-detects Ollama, llama.cpp, and any OpenAI-compatible local endpoint.
6
+ * Routes inference requests to the best available model based on capability,
7
+ * context window, and current load.
8
+ *
9
+ * Supported Providers:
10
+ * - Ollama (http://localhost:11434)
11
+ * - llama.cpp server (http://localhost:8080)
12
+ * - Custom OpenAI-compatible endpoints
13
+ *
14
+ * All inference happens locally. No data leaves the device.
15
+ */
16
+
17
+ const crypto = require('crypto');
18
+ const { db } = require('../models/db');
19
+
20
+ // ─── Schema ──────────────────────────────────────────────────────────
21
+
22
+ db.exec(`
23
+ CREATE TABLE IF NOT EXISTS local_models (
24
+ id TEXT PRIMARY KEY,
25
+ site_id TEXT NOT NULL,
26
+ provider TEXT NOT NULL,
27
+ model_name TEXT NOT NULL,
28
+ endpoint TEXT NOT NULL,
29
+ capabilities TEXT DEFAULT '["text"]',
30
+ context_window INTEGER DEFAULT 4096,
31
+ parameters TEXT DEFAULT '{}',
32
+ status TEXT DEFAULT 'available',
33
+ total_requests INTEGER DEFAULT 0,
34
+ total_tokens INTEGER DEFAULT 0,
35
+ avg_latency_ms REAL DEFAULT 0,
36
+ last_used TEXT,
37
+ last_probe TEXT,
38
+ created_at TEXT DEFAULT (datetime('now')),
39
+ UNIQUE(site_id, provider, model_name)
40
+ );
41
+
42
+ CREATE TABLE IF NOT EXISTS local_inference_log (
43
+ id TEXT PRIMARY KEY,
44
+ model_id TEXT NOT NULL,
45
+ task_type TEXT,
46
+ prompt_tokens INTEGER DEFAULT 0,
47
+ completion_tokens INTEGER DEFAULT 0,
48
+ latency_ms INTEGER DEFAULT 0,
49
+ success INTEGER DEFAULT 1,
50
+ created_at TEXT DEFAULT (datetime('now'))
51
+ );
52
+
53
+ CREATE INDEX IF NOT EXISTS idx_local_models_site ON local_models(site_id);
54
+ CREATE INDEX IF NOT EXISTS idx_local_models_status ON local_models(status);
55
+ CREATE INDEX IF NOT EXISTS idx_local_inference_model ON local_inference_log(model_id);
56
+ `);
57
+
58
+ // ─── Prepared Statements ─────────────────────────────────────────────
59
+
60
+ const stmts = {
61
+ upsertModel: db.prepare("INSERT INTO local_models (id, site_id, provider, model_name, endpoint, capabilities, context_window, parameters) VALUES (?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(site_id, provider, model_name) DO UPDATE SET endpoint = ?, capabilities = ?, context_window = ?, parameters = ?, status = 'available', last_probe = datetime('now')"),
62
+ getModel: db.prepare('SELECT * FROM local_models WHERE id = ?'),
63
+ getModels: db.prepare('SELECT * FROM local_models WHERE site_id = ? ORDER BY provider, model_name'),
64
+ getAvailableModels: db.prepare("SELECT * FROM local_models WHERE site_id = ? AND status = 'available' ORDER BY avg_latency_ms ASC"),
65
+ getModelsByCapability: db.prepare("SELECT * FROM local_models WHERE site_id = ? AND status = 'available' AND capabilities LIKE ? ORDER BY avg_latency_ms ASC"),
66
+ updateModelStatus: db.prepare('UPDATE local_models SET status = ?, last_probe = datetime(\'now\') WHERE id = ?'),
67
+ updateModelStats: db.prepare("UPDATE local_models SET total_requests = total_requests + 1, total_tokens = total_tokens + ?, avg_latency_ms = (avg_latency_ms * total_requests + ?) / (total_requests + 1), last_used = datetime('now') WHERE id = ?"),
68
+ insertLog: db.prepare('INSERT INTO local_inference_log (id, model_id, task_type, prompt_tokens, completion_tokens, latency_ms, success) VALUES (?, ?, ?, ?, ?, ?, ?)'),
69
+ getStats: db.prepare(`SELECT
70
+ (SELECT COUNT(*) FROM local_models WHERE site_id = ? AND status = 'available') as available_models,
71
+ (SELECT COUNT(*) FROM local_models WHERE site_id = ?) as total_models,
72
+ (SELECT SUM(total_requests) FROM local_models WHERE site_id = ?) as total_requests,
73
+ (SELECT SUM(total_tokens) FROM local_models WHERE site_id = ?) as total_tokens,
74
+ (SELECT AVG(avg_latency_ms) FROM local_models WHERE site_id = ? AND status = 'available') as avg_latency`),
75
+ };
76
+
77
+ // ─── Default Provider Endpoints ──────────────────────────────────────
78
+
79
+ const PROVIDERS = {
80
+ ollama: { name: 'ollama', baseUrl: 'http://localhost:11434', tagsPath: '/api/tags', chatPath: '/api/chat', generatePath: '/api/generate' },
81
+ llamacpp: { name: 'llamacpp', baseUrl: 'http://localhost:8080', chatPath: '/v1/chat/completions', modelsPath: '/v1/models' },
82
+ };
83
+
84
+ // ─── Model Discovery ─────────────────────────────────────────────────
85
+
86
+ /**
87
+ * Probe local endpoints and register discovered models.
88
+ */
89
+ async function discoverModels(siteId, customEndpoints = []) {
90
+ const discovered = [];
91
+
92
+ // Probe Ollama
93
+ try {
94
+ const ollamaModels = await _probeOllama(PROVIDERS.ollama.baseUrl);
95
+ for (const m of ollamaModels) {
96
+ const result = _registerModel(siteId, 'ollama', m.name, PROVIDERS.ollama.baseUrl, m.capabilities, m.contextWindow, m.parameters);
97
+ discovered.push(result);
98
+ }
99
+ } catch (_) { /* Ollama not running */ }
100
+
101
+ // Probe llama.cpp
102
+ try {
103
+ const lcModels = await _probeLlamaCpp(PROVIDERS.llamacpp.baseUrl);
104
+ for (const m of lcModels) {
105
+ const result = _registerModel(siteId, 'llamacpp', m.name, PROVIDERS.llamacpp.baseUrl, m.capabilities, m.contextWindow, m.parameters);
106
+ discovered.push(result);
107
+ }
108
+ } catch (_) { /* llama.cpp not running */ }
109
+
110
+ // Probe custom endpoints
111
+ for (const ep of customEndpoints) {
112
+ try {
113
+ const models = await _probeOpenAICompatible(ep.url);
114
+ for (const m of models) {
115
+ const result = _registerModel(siteId, ep.name || 'custom', m.name, ep.url, m.capabilities, m.contextWindow, m.parameters);
116
+ discovered.push(result);
117
+ }
118
+ } catch (_) { /* endpoint not available */ }
119
+ }
120
+
121
+ return { discovered: discovered.length, models: discovered };
122
+ }
123
+
124
+ /**
125
+ * Register a model manually.
126
+ */
127
+ function registerModel(siteId, provider, modelName, endpoint, capabilities = ['text'], contextWindow = 4096) {
128
+ return _registerModel(siteId, provider, modelName, endpoint, capabilities, contextWindow, {});
129
+ }
130
+
131
+ function _registerModel(siteId, provider, modelName, endpoint, capabilities, contextWindow, parameters) {
132
+ const id = crypto.randomUUID();
133
+ const caps = JSON.stringify(capabilities);
134
+ const params = JSON.stringify(parameters);
135
+
136
+ stmts.upsertModel.run(id, siteId, provider, modelName, endpoint, caps, contextWindow, params, endpoint, caps, contextWindow, params);
137
+ return { id, provider, modelName, endpoint, capabilities, contextWindow };
138
+ }
139
+
140
+ // ─── Inference ───────────────────────────────────────────────────────
141
+
142
+ /**
143
+ * Run inference on the best available local model.
144
+ * @param {string} siteId
145
+ * @param {string} prompt - The user prompt
146
+ * @param {object} options - { capability, model, systemPrompt, temperature, maxTokens, stream }
147
+ */
148
+ async function infer(siteId, prompt, options = {}) {
149
+ const capability = options.capability || 'text';
150
+
151
+ // Select model
152
+ let model;
153
+ if (options.modelId) {
154
+ model = stmts.getModel.get(options.modelId);
155
+ if (!model || model.status !== 'available') throw new Error('Selected model unavailable');
156
+ } else {
157
+ const candidates = stmts.getModelsByCapability.all(siteId, `%${capability}%`);
158
+ if (candidates.length === 0) throw new Error(`No local model available for capability: ${capability}`);
159
+ model = candidates[0]; // Fastest by avg latency
160
+ }
161
+
162
+ const start = Date.now();
163
+ let result;
164
+
165
+ try {
166
+ const parsed = JSON.parse(model.parameters || '{}');
167
+ if (model.provider === 'ollama') {
168
+ result = await _inferOllama(model, prompt, options);
169
+ } else if (model.provider === 'llamacpp') {
170
+ result = await _inferLlamaCpp(model, prompt, options);
171
+ } else {
172
+ result = await _inferOpenAICompatible(model, prompt, options);
173
+ }
174
+ } catch (err) {
175
+ const latency = Date.now() - start;
176
+ stmts.insertLog.run(crypto.randomUUID(), model.id, capability, 0, 0, latency, 0);
177
+ throw err;
178
+ }
179
+
180
+ const latency = Date.now() - start;
181
+ const totalTokens = (result.promptTokens || 0) + (result.completionTokens || 0);
182
+
183
+ stmts.updateModelStats.run(totalTokens, latency, model.id);
184
+ stmts.insertLog.run(crypto.randomUUID(), model.id, capability, result.promptTokens || 0, result.completionTokens || 0, latency, 1);
185
+
186
+ return {
187
+ modelId: model.id,
188
+ provider: model.provider,
189
+ model: model.model_name,
190
+ response: result.text,
191
+ promptTokens: result.promptTokens || 0,
192
+ completionTokens: result.completionTokens || 0,
193
+ latencyMs: latency,
194
+ };
195
+ }
196
+
197
+ // ─── Model Management ────────────────────────────────────────────────
198
+
199
+ function getModels(siteId) {
200
+ return stmts.getModels.all(siteId).map(_deserializeModel);
201
+ }
202
+
203
+ function getAvailableModels(siteId) {
204
+ return stmts.getAvailableModels.all(siteId).map(_deserializeModel);
205
+ }
206
+
207
+ function getModel(modelId) {
208
+ const row = stmts.getModel.get(modelId);
209
+ return row ? _deserializeModel(row) : null;
210
+ }
211
+
212
+ function updateModelStatus(modelId, status) {
213
+ stmts.updateModelStatus.run(status, modelId);
214
+ }
215
+
216
+ function getStats(siteId) {
217
+ const row = stmts.getStats.get(siteId, siteId, siteId, siteId, siteId);
218
+ return {
219
+ availableModels: row.available_models || 0,
220
+ totalModels: row.total_models || 0,
221
+ totalRequests: row.total_requests || 0,
222
+ totalTokens: row.total_tokens || 0,
223
+ avgLatency: row.avg_latency ? Math.round(row.avg_latency) : 0,
224
+ };
225
+ }
226
+
227
+ // ─── Provider-Specific Inference ─────────────────────────────────────
228
+
229
+ async function _inferOllama(model, prompt, options) {
230
+ const body = {
231
+ model: model.model_name,
232
+ messages: [],
233
+ stream: false,
234
+ options: {},
235
+ };
236
+
237
+ if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
238
+ body.messages.push({ role: 'user', content: prompt });
239
+ if (options.temperature != null) body.options.temperature = options.temperature;
240
+
241
+ const res = await fetch(`${model.endpoint}/api/chat`, {
242
+ method: 'POST',
243
+ headers: { 'Content-Type': 'application/json' },
244
+ body: JSON.stringify(body),
245
+ signal: AbortSignal.timeout(options.timeout || 120000),
246
+ });
247
+
248
+ if (!res.ok) throw new Error(`Ollama error: ${res.status}`);
249
+ const data = await res.json();
250
+
251
+ return {
252
+ text: data.message?.content || '',
253
+ promptTokens: data.prompt_eval_count || 0,
254
+ completionTokens: data.eval_count || 0,
255
+ };
256
+ }
257
+
258
+ async function _inferLlamaCpp(model, prompt, options) {
259
+ const body = {
260
+ model: model.model_name,
261
+ messages: [],
262
+ max_tokens: options.maxTokens || 2048,
263
+ stream: false,
264
+ };
265
+
266
+ if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
267
+ body.messages.push({ role: 'user', content: prompt });
268
+ if (options.temperature != null) body.temperature = options.temperature;
269
+
270
+ const res = await fetch(`${model.endpoint}/v1/chat/completions`, {
271
+ method: 'POST',
272
+ headers: { 'Content-Type': 'application/json' },
273
+ body: JSON.stringify(body),
274
+ signal: AbortSignal.timeout(options.timeout || 120000),
275
+ });
276
+
277
+ if (!res.ok) throw new Error(`llama.cpp error: ${res.status}`);
278
+ const data = await res.json();
279
+
280
+ return {
281
+ text: data.choices?.[0]?.message?.content || '',
282
+ promptTokens: data.usage?.prompt_tokens || 0,
283
+ completionTokens: data.usage?.completion_tokens || 0,
284
+ };
285
+ }
286
+
287
+ async function _inferOpenAICompatible(model, prompt, options) {
288
+ const body = {
289
+ model: model.model_name,
290
+ messages: [],
291
+ max_tokens: options.maxTokens || 2048,
292
+ stream: false,
293
+ };
294
+
295
+ if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
296
+ body.messages.push({ role: 'user', content: prompt });
297
+ if (options.temperature != null) body.temperature = options.temperature;
298
+
299
+ const res = await fetch(`${model.endpoint}/v1/chat/completions`, {
300
+ method: 'POST',
301
+ headers: { 'Content-Type': 'application/json' },
302
+ body: JSON.stringify(body),
303
+ signal: AbortSignal.timeout(options.timeout || 120000),
304
+ });
305
+
306
+ if (!res.ok) throw new Error(`Inference error: ${res.status}`);
307
+ const data = await res.json();
308
+
309
+ return {
310
+ text: data.choices?.[0]?.message?.content || '',
311
+ promptTokens: data.usage?.prompt_tokens || 0,
312
+ completionTokens: data.usage?.completion_tokens || 0,
313
+ };
314
+ }
315
+
316
+ // ─── Provider Probing ────────────────────────────────────────────────
317
+
318
+ async function _probeOllama(baseUrl) {
319
+ const res = await fetch(`${baseUrl}/api/tags`, { signal: AbortSignal.timeout(5000) });
320
+ if (!res.ok) return [];
321
+ const data = await res.json();
322
+ return (data.models || []).map(m => ({
323
+ name: m.name,
324
+ capabilities: _detectCapabilities(m.name),
325
+ contextWindow: m.details?.parameter_size ? _estimateContext(m.details.parameter_size) : 4096,
326
+ parameters: { size: m.size, family: m.details?.family },
327
+ }));
328
+ }
329
+
330
+ async function _probeLlamaCpp(baseUrl) {
331
+ const res = await fetch(`${baseUrl}/v1/models`, { signal: AbortSignal.timeout(5000) });
332
+ if (!res.ok) return [];
333
+ const data = await res.json();
334
+ return (data.data || []).map(m => ({
335
+ name: m.id,
336
+ capabilities: _detectCapabilities(m.id),
337
+ contextWindow: 4096,
338
+ parameters: {},
339
+ }));
340
+ }
341
+
342
+ async function _probeOpenAICompatible(baseUrl) {
343
+ const res = await fetch(`${baseUrl}/v1/models`, { signal: AbortSignal.timeout(5000) });
344
+ if (!res.ok) return [];
345
+ const data = await res.json();
346
+ return (data.data || []).map(m => ({
347
+ name: m.id,
348
+ capabilities: _detectCapabilities(m.id),
349
+ contextWindow: 4096,
350
+ parameters: {},
351
+ }));
352
+ }
353
+
354
+ // ─── Helpers ─────────────────────────────────────────────────────────
355
+
356
+ function _detectCapabilities(modelName) {
357
+ const n = modelName.toLowerCase();
358
+ const caps = ['text'];
359
+ if (n.includes('vision') || n.includes('llava') || n.includes('bakllava')) caps.push('vision');
360
+ if (n.includes('code') || n.includes('codellama') || n.includes('deepseek-coder') || n.includes('starcoder')) caps.push('code');
361
+ if (n.includes('embed') || n.includes('nomic')) caps.push('embedding');
362
+ if (n.includes('mistral') || n.includes('mixtral')) caps.push('reasoning');
363
+ return caps;
364
+ }
365
+
366
+ function _estimateContext(paramSize) {
367
+ // Rough estimate: smaller models typically have smaller context
368
+ if (typeof paramSize === 'string') {
369
+ const num = parseFloat(paramSize);
370
+ if (num >= 70) return 32768;
371
+ if (num >= 13) return 8192;
372
+ return 4096;
373
+ }
374
+ return 4096;
375
+ }
376
+
377
+ function _deserializeModel(row) {
378
+ return {
379
+ ...row,
380
+ capabilities: JSON.parse(row.capabilities || '["text"]'),
381
+ parameters: JSON.parse(row.parameters || '{}'),
382
+ };
383
+ }
384
+
385
+ module.exports = {
386
+ discoverModels, registerModel, infer,
387
+ getModels, getAvailableModels, getModel, updateModelStatus,
388
+ getStats,
389
+ };