web-agent-bridge 3.2.0 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (256) hide show
  1. package/LICENSE +84 -72
  2. package/README.ar.md +1304 -1152
  3. package/README.md +298 -1635
  4. package/bin/agent-runner.js +474 -474
  5. package/bin/cli.js +237 -138
  6. package/bin/wab-init.js +223 -0
  7. package/bin/wab.js +80 -80
  8. package/examples/azure-dns-wab.js +83 -0
  9. package/examples/bidi-agent.js +119 -119
  10. package/examples/cloudflare-wab-dns.js +121 -0
  11. package/examples/cpanel-wab-dns.js +114 -0
  12. package/examples/cross-site-agent.js +91 -91
  13. package/examples/dns-discovery-agent.js +166 -0
  14. package/examples/gcp-dns-wab.js +76 -0
  15. package/examples/governance-agent.js +169 -0
  16. package/examples/mcp-agent.js +94 -94
  17. package/examples/next-app-router/README.md +44 -44
  18. package/examples/plesk-wab-dns.js +103 -0
  19. package/examples/puppeteer-agent.js +108 -108
  20. package/examples/route53-wab-dns.js +144 -0
  21. package/examples/saas-dashboard/README.md +55 -55
  22. package/examples/safe-mode-agent.js +96 -0
  23. package/examples/shopify-hydrogen/README.md +74 -74
  24. package/examples/vision-agent.js +171 -171
  25. package/examples/wab-sign.js +74 -0
  26. package/examples/wab-verify.js +60 -0
  27. package/examples/wordpress-elementor/README.md +77 -77
  28. package/package.json +19 -6
  29. package/public/.well-known/agent-tools.json +180 -180
  30. package/public/.well-known/ai-assets.json +59 -59
  31. package/public/.well-known/security.txt +8 -0
  32. package/public/.well-known/wab.json +28 -0
  33. package/public/activate.html +368 -0
  34. package/public/adoption-metrics.html +188 -0
  35. package/public/agent-workspace.html +349 -349
  36. package/public/ai.html +198 -198
  37. package/public/api.html +413 -412
  38. package/public/azure-dns-integration.html +289 -0
  39. package/public/browser.html +486 -486
  40. package/public/cloudflare-integration.html +380 -0
  41. package/public/commander-dashboard.html +243 -243
  42. package/public/cookies.html +210 -210
  43. package/public/cpanel-integration.html +398 -0
  44. package/public/css/agent-workspace.css +1713 -1713
  45. package/public/css/premium.css +317 -317
  46. package/public/css/styles.css +1263 -1235
  47. package/public/dashboard.html +707 -706
  48. package/public/dns.html +436 -0
  49. package/public/docs.html +588 -587
  50. package/public/feed.xml +89 -89
  51. package/public/gcp-dns-integration.html +318 -0
  52. package/public/growth.html +465 -463
  53. package/public/index.html +1266 -982
  54. package/public/integrations.html +556 -0
  55. package/public/js/activate.js +145 -0
  56. package/public/js/agent-workspace.js +1740 -1740
  57. package/public/js/auth-nav.js +65 -31
  58. package/public/js/auth-redirect.js +12 -12
  59. package/public/js/cookie-consent.js +56 -56
  60. package/public/js/dns.js +438 -0
  61. package/public/js/wab-demo-page.js +721 -721
  62. package/public/js/ws-client.js +74 -74
  63. package/public/llms-full.txt +360 -360
  64. package/public/llms.txt +125 -125
  65. package/public/login.html +85 -85
  66. package/public/mesh-dashboard.html +328 -328
  67. package/public/openapi.json +669 -580
  68. package/public/phone-shield.html +281 -0
  69. package/public/plesk-integration.html +375 -0
  70. package/public/premium-dashboard.html +2489 -2489
  71. package/public/premium.html +793 -793
  72. package/public/privacy.html +297 -297
  73. package/public/provider-onboarding.html +172 -0
  74. package/public/provider-sandbox.html +134 -0
  75. package/public/providers.html +359 -0
  76. package/public/register.html +105 -105
  77. package/public/registrar-integrations.html +141 -0
  78. package/public/robots.txt +99 -87
  79. package/public/route53-integration.html +531 -0
  80. package/public/script/wab-consent.d.ts +36 -36
  81. package/public/script/wab-consent.js +104 -104
  82. package/public/script/wab-schema.js +131 -131
  83. package/public/script/wab.d.ts +108 -108
  84. package/public/script/wab.min.js +580 -580
  85. package/public/security.txt +8 -0
  86. package/public/shieldqr.html +231 -0
  87. package/public/sitemap.xml +6 -0
  88. package/public/terms.html +256 -256
  89. package/public/wab-trust.html +200 -0
  90. package/public/wab-vs-protocols.html +210 -0
  91. package/public/whitepaper.html +449 -0
  92. package/script/ai-agent-bridge.js +1754 -1754
  93. package/sdk/README.md +99 -99
  94. package/sdk/agent-mesh.js +449 -449
  95. package/sdk/auto-discovery.js +288 -0
  96. package/sdk/commander.js +262 -262
  97. package/sdk/governance.js +262 -0
  98. package/sdk/index.d.ts +464 -464
  99. package/sdk/index.js +25 -1
  100. package/sdk/multi-agent.js +318 -318
  101. package/sdk/package.json +2 -2
  102. package/sdk/safe-mode.js +221 -0
  103. package/sdk/safety-shield.js +219 -0
  104. package/sdk/schema-discovery.js +83 -83
  105. package/server/adapters/index.js +520 -520
  106. package/server/config/plans.js +367 -367
  107. package/server/config/secrets.js +102 -102
  108. package/server/control-plane/index.js +301 -301
  109. package/server/data-plane/index.js +354 -354
  110. package/server/index.js +670 -427
  111. package/server/llm/index.js +404 -404
  112. package/server/middleware/adminAuth.js +35 -35
  113. package/server/middleware/auth.js +50 -50
  114. package/server/middleware/featureGate.js +88 -88
  115. package/server/middleware/rateLimits.js +100 -100
  116. package/server/middleware/sensitiveAction.js +157 -0
  117. package/server/migrations/001_add_analytics_indexes.sql +7 -7
  118. package/server/migrations/002_premium_features.sql +418 -418
  119. package/server/migrations/003_ads_integer_cents.sql +33 -33
  120. package/server/migrations/004_agent_os.sql +158 -158
  121. package/server/migrations/005_marketplace_metering.sql +126 -126
  122. package/server/migrations/007_governance.sql +106 -0
  123. package/server/migrations/008_plans.sql +144 -0
  124. package/server/migrations/009_shieldqr.sql +30 -0
  125. package/server/migrations/010_extended_trust.sql +33 -0
  126. package/server/models/adapters/index.js +33 -33
  127. package/server/models/adapters/mysql.js +183 -183
  128. package/server/models/adapters/postgresql.js +172 -172
  129. package/server/models/adapters/sqlite.js +7 -7
  130. package/server/models/db.js +740 -681
  131. package/server/observability/failure-analysis.js +337 -337
  132. package/server/observability/index.js +394 -394
  133. package/server/protocol/capabilities.js +223 -223
  134. package/server/protocol/index.js +243 -243
  135. package/server/protocol/schema.js +584 -584
  136. package/server/registry/certification.js +271 -271
  137. package/server/registry/index.js +326 -326
  138. package/server/routes/admin-plans.js +76 -0
  139. package/server/routes/admin-premium.js +673 -671
  140. package/server/routes/admin-shieldqr.js +90 -0
  141. package/server/routes/admin-trust-monitor.js +83 -0
  142. package/server/routes/admin.js +549 -261
  143. package/server/routes/ads.js +130 -130
  144. package/server/routes/agent-workspace.js +540 -540
  145. package/server/routes/api.js +150 -150
  146. package/server/routes/auth.js +71 -71
  147. package/server/routes/billing.js +57 -45
  148. package/server/routes/commander.js +316 -316
  149. package/server/routes/demo-showcase.js +332 -332
  150. package/server/routes/demo-store.js +154 -0
  151. package/server/routes/discovery.js +2348 -417
  152. package/server/routes/gateway.js +173 -157
  153. package/server/routes/governance.js +208 -0
  154. package/server/routes/license.js +251 -240
  155. package/server/routes/mesh.js +469 -469
  156. package/server/routes/noscript.js +543 -543
  157. package/server/routes/plans.js +33 -0
  158. package/server/routes/premium-v2.js +686 -686
  159. package/server/routes/premium.js +724 -724
  160. package/server/routes/providers.js +650 -0
  161. package/server/routes/runtime.js +2148 -2147
  162. package/server/routes/shieldqr.js +88 -0
  163. package/server/routes/sovereign.js +465 -385
  164. package/server/routes/universal.js +200 -185
  165. package/server/routes/wab-api.js +850 -501
  166. package/server/runtime/container-worker.js +111 -111
  167. package/server/runtime/container.js +448 -448
  168. package/server/runtime/distributed-worker.js +362 -362
  169. package/server/runtime/event-bus.js +210 -210
  170. package/server/runtime/index.js +253 -253
  171. package/server/runtime/queue.js +599 -599
  172. package/server/runtime/replay.js +666 -666
  173. package/server/runtime/sandbox.js +266 -266
  174. package/server/runtime/scheduler.js +534 -534
  175. package/server/runtime/session-engine.js +293 -293
  176. package/server/runtime/state-manager.js +188 -188
  177. package/server/security/cross-site-redactor.js +196 -0
  178. package/server/security/dry-run.js +180 -0
  179. package/server/security/human-gate-rate-limit.js +147 -0
  180. package/server/security/human-gate-transports.js +178 -0
  181. package/server/security/human-gate.js +281 -0
  182. package/server/security/index.js +368 -368
  183. package/server/security/intent-engine.js +245 -0
  184. package/server/security/reward-guard.js +171 -0
  185. package/server/security/rollback-store.js +239 -0
  186. package/server/security/token-scope.js +404 -0
  187. package/server/security/url-policy.js +139 -0
  188. package/server/services/agent-chat.js +506 -506
  189. package/server/services/agent-learning.js +601 -575
  190. package/server/services/agent-memory.js +625 -625
  191. package/server/services/agent-mesh.js +555 -539
  192. package/server/services/agent-symphony.js +717 -717
  193. package/server/services/agent-tasks.js +1807 -1807
  194. package/server/services/api-key-engine.js +292 -261
  195. package/server/services/cluster.js +894 -894
  196. package/server/services/commander.js +738 -738
  197. package/server/services/edge-compute.js +440 -440
  198. package/server/services/email.js +233 -204
  199. package/server/services/governance.js +466 -0
  200. package/server/services/hosted-runtime.js +205 -205
  201. package/server/services/lfd.js +635 -635
  202. package/server/services/local-ai.js +389 -389
  203. package/server/services/marketplace.js +270 -270
  204. package/server/services/metering.js +182 -182
  205. package/server/services/modules/affiliate-intelligence.js +93 -93
  206. package/server/services/modules/agent-firewall.js +90 -90
  207. package/server/services/modules/bounty.js +89 -89
  208. package/server/services/modules/collective-bargaining.js +92 -92
  209. package/server/services/modules/dark-pattern.js +66 -66
  210. package/server/services/modules/gov-intelligence.js +45 -45
  211. package/server/services/modules/neural.js +55 -55
  212. package/server/services/modules/notary.js +49 -49
  213. package/server/services/modules/price-time-machine.js +86 -86
  214. package/server/services/modules/protocol.js +104 -104
  215. package/server/services/negotiation.js +439 -439
  216. package/server/services/plans.js +214 -0
  217. package/server/services/plugins.js +771 -771
  218. package/server/services/premium.js +1 -1
  219. package/server/services/price-intelligence.js +566 -566
  220. package/server/services/price-shield.js +1137 -1137
  221. package/server/services/provider-clients.js +740 -0
  222. package/server/services/reputation.js +465 -465
  223. package/server/services/search-engine.js +357 -357
  224. package/server/services/security.js +513 -513
  225. package/server/services/self-healing.js +843 -843
  226. package/server/services/shieldqr.js +322 -0
  227. package/server/services/sovereign-shield.js +542 -0
  228. package/server/services/ssl-inspector.js +42 -0
  229. package/server/services/ssl-monitor.js +167 -0
  230. package/server/services/stripe.js +205 -192
  231. package/server/services/swarm.js +788 -788
  232. package/server/services/universal-scraper.js +662 -661
  233. package/server/services/verification.js +481 -481
  234. package/server/services/vision.js +1163 -1163
  235. package/server/services/wab-crypto.js +178 -0
  236. package/server/utils/cache.js +125 -125
  237. package/server/utils/migrate.js +81 -81
  238. package/server/utils/safe-fetch.js +228 -0
  239. package/server/utils/secureFields.js +50 -50
  240. package/server/ws.js +161 -161
  241. package/templates/artisan-marketplace.yaml +104 -104
  242. package/templates/book-price-scout.yaml +98 -98
  243. package/templates/electronics-price-tracker.yaml +108 -108
  244. package/templates/flight-deal-hunter.yaml +113 -113
  245. package/templates/freelancer-direct.yaml +116 -116
  246. package/templates/grocery-price-compare.yaml +93 -93
  247. package/templates/hotel-direct-booking.yaml +113 -113
  248. package/templates/local-services.yaml +98 -98
  249. package/templates/olive-oil-tunisia.yaml +88 -88
  250. package/templates/organic-farm-fresh.yaml +101 -101
  251. package/templates/restaurant-direct.yaml +97 -97
  252. package/public/score.html +0 -263
  253. package/server/migrations/006_growth_suite.sql +0 -138
  254. package/server/routes/growth.js +0 -962
  255. package/server/services/fairness-engine.js +0 -409
  256. package/server/services/fairness.js +0 -420
@@ -1,389 +1,389 @@
1
- /**
2
- * Local AI — Sovereign Intelligence Runtime
3
- *
4
- * Manages local AI models running on the user's own hardware.
5
- * Auto-detects Ollama, llama.cpp, and any OpenAI-compatible local endpoint.
6
- * Routes inference requests to the best available model based on capability,
7
- * context window, and current load.
8
- *
9
- * Supported Providers:
10
- * - Ollama (http://localhost:11434)
11
- * - llama.cpp server (http://localhost:8080)
12
- * - Custom OpenAI-compatible endpoints
13
- *
14
- * All inference happens locally. No data leaves the device.
15
- */
16
-
17
- const crypto = require('crypto');
18
- const { db } = require('../models/db');
19
-
20
- // ─── Schema ──────────────────────────────────────────────────────────
21
-
22
- db.exec(`
23
- CREATE TABLE IF NOT EXISTS local_models (
24
- id TEXT PRIMARY KEY,
25
- site_id TEXT NOT NULL,
26
- provider TEXT NOT NULL,
27
- model_name TEXT NOT NULL,
28
- endpoint TEXT NOT NULL,
29
- capabilities TEXT DEFAULT '["text"]',
30
- context_window INTEGER DEFAULT 4096,
31
- parameters TEXT DEFAULT '{}',
32
- status TEXT DEFAULT 'available',
33
- total_requests INTEGER DEFAULT 0,
34
- total_tokens INTEGER DEFAULT 0,
35
- avg_latency_ms REAL DEFAULT 0,
36
- last_used TEXT,
37
- last_probe TEXT,
38
- created_at TEXT DEFAULT (datetime('now')),
39
- UNIQUE(site_id, provider, model_name)
40
- );
41
-
42
- CREATE TABLE IF NOT EXISTS local_inference_log (
43
- id TEXT PRIMARY KEY,
44
- model_id TEXT NOT NULL,
45
- task_type TEXT,
46
- prompt_tokens INTEGER DEFAULT 0,
47
- completion_tokens INTEGER DEFAULT 0,
48
- latency_ms INTEGER DEFAULT 0,
49
- success INTEGER DEFAULT 1,
50
- created_at TEXT DEFAULT (datetime('now'))
51
- );
52
-
53
- CREATE INDEX IF NOT EXISTS idx_local_models_site ON local_models(site_id);
54
- CREATE INDEX IF NOT EXISTS idx_local_models_status ON local_models(status);
55
- CREATE INDEX IF NOT EXISTS idx_local_inference_model ON local_inference_log(model_id);
56
- `);
57
-
58
- // ─── Prepared Statements ─────────────────────────────────────────────
59
-
60
- const stmts = {
61
- upsertModel: db.prepare("INSERT INTO local_models (id, site_id, provider, model_name, endpoint, capabilities, context_window, parameters) VALUES (?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(site_id, provider, model_name) DO UPDATE SET endpoint = ?, capabilities = ?, context_window = ?, parameters = ?, status = 'available', last_probe = datetime('now')"),
62
- getModel: db.prepare('SELECT * FROM local_models WHERE id = ?'),
63
- getModels: db.prepare('SELECT * FROM local_models WHERE site_id = ? ORDER BY provider, model_name'),
64
- getAvailableModels: db.prepare("SELECT * FROM local_models WHERE site_id = ? AND status = 'available' ORDER BY avg_latency_ms ASC"),
65
- getModelsByCapability: db.prepare("SELECT * FROM local_models WHERE site_id = ? AND status = 'available' AND capabilities LIKE ? ORDER BY avg_latency_ms ASC"),
66
- updateModelStatus: db.prepare('UPDATE local_models SET status = ?, last_probe = datetime(\'now\') WHERE id = ?'),
67
- updateModelStats: db.prepare("UPDATE local_models SET total_requests = total_requests + 1, total_tokens = total_tokens + ?, avg_latency_ms = (avg_latency_ms * total_requests + ?) / (total_requests + 1), last_used = datetime('now') WHERE id = ?"),
68
- insertLog: db.prepare('INSERT INTO local_inference_log (id, model_id, task_type, prompt_tokens, completion_tokens, latency_ms, success) VALUES (?, ?, ?, ?, ?, ?, ?)'),
69
- getStats: db.prepare(`SELECT
70
- (SELECT COUNT(*) FROM local_models WHERE site_id = ? AND status = 'available') as available_models,
71
- (SELECT COUNT(*) FROM local_models WHERE site_id = ?) as total_models,
72
- (SELECT SUM(total_requests) FROM local_models WHERE site_id = ?) as total_requests,
73
- (SELECT SUM(total_tokens) FROM local_models WHERE site_id = ?) as total_tokens,
74
- (SELECT AVG(avg_latency_ms) FROM local_models WHERE site_id = ? AND status = 'available') as avg_latency`),
75
- };
76
-
77
- // ─── Default Provider Endpoints ──────────────────────────────────────
78
-
79
- const PROVIDERS = {
80
- ollama: { name: 'ollama', baseUrl: 'http://localhost:11434', tagsPath: '/api/tags', chatPath: '/api/chat', generatePath: '/api/generate' },
81
- llamacpp: { name: 'llamacpp', baseUrl: 'http://localhost:8080', chatPath: '/v1/chat/completions', modelsPath: '/v1/models' },
82
- };
83
-
84
- // ─── Model Discovery ─────────────────────────────────────────────────
85
-
86
- /**
87
- * Probe local endpoints and register discovered models.
88
- */
89
- async function discoverModels(siteId, customEndpoints = []) {
90
- const discovered = [];
91
-
92
- // Probe Ollama
93
- try {
94
- const ollamaModels = await _probeOllama(PROVIDERS.ollama.baseUrl);
95
- for (const m of ollamaModels) {
96
- const result = _registerModel(siteId, 'ollama', m.name, PROVIDERS.ollama.baseUrl, m.capabilities, m.contextWindow, m.parameters);
97
- discovered.push(result);
98
- }
99
- } catch (_) { /* Ollama not running */ }
100
-
101
- // Probe llama.cpp
102
- try {
103
- const lcModels = await _probeLlamaCpp(PROVIDERS.llamacpp.baseUrl);
104
- for (const m of lcModels) {
105
- const result = _registerModel(siteId, 'llamacpp', m.name, PROVIDERS.llamacpp.baseUrl, m.capabilities, m.contextWindow, m.parameters);
106
- discovered.push(result);
107
- }
108
- } catch (_) { /* llama.cpp not running */ }
109
-
110
- // Probe custom endpoints
111
- for (const ep of customEndpoints) {
112
- try {
113
- const models = await _probeOpenAICompatible(ep.url);
114
- for (const m of models) {
115
- const result = _registerModel(siteId, ep.name || 'custom', m.name, ep.url, m.capabilities, m.contextWindow, m.parameters);
116
- discovered.push(result);
117
- }
118
- } catch (_) { /* endpoint not available */ }
119
- }
120
-
121
- return { discovered: discovered.length, models: discovered };
122
- }
123
-
124
- /**
125
- * Register a model manually.
126
- */
127
- function registerModel(siteId, provider, modelName, endpoint, capabilities = ['text'], contextWindow = 4096) {
128
- return _registerModel(siteId, provider, modelName, endpoint, capabilities, contextWindow, {});
129
- }
130
-
131
- function _registerModel(siteId, provider, modelName, endpoint, capabilities, contextWindow, parameters) {
132
- const id = crypto.randomUUID();
133
- const caps = JSON.stringify(capabilities);
134
- const params = JSON.stringify(parameters);
135
-
136
- stmts.upsertModel.run(id, siteId, provider, modelName, endpoint, caps, contextWindow, params, endpoint, caps, contextWindow, params);
137
- return { id, provider, modelName, endpoint, capabilities, contextWindow };
138
- }
139
-
140
- // ─── Inference ───────────────────────────────────────────────────────
141
-
142
- /**
143
- * Run inference on the best available local model.
144
- * @param {string} siteId
145
- * @param {string} prompt - The user prompt
146
- * @param {object} options - { capability, model, systemPrompt, temperature, maxTokens, stream }
147
- */
148
- async function infer(siteId, prompt, options = {}) {
149
- const capability = options.capability || 'text';
150
-
151
- // Select model
152
- let model;
153
- if (options.modelId) {
154
- model = stmts.getModel.get(options.modelId);
155
- if (!model || model.status !== 'available') throw new Error('Selected model unavailable');
156
- } else {
157
- const candidates = stmts.getModelsByCapability.all(siteId, `%${capability}%`);
158
- if (candidates.length === 0) throw new Error(`No local model available for capability: ${capability}`);
159
- model = candidates[0]; // Fastest by avg latency
160
- }
161
-
162
- const start = Date.now();
163
- let result;
164
-
165
- try {
166
- const parsed = JSON.parse(model.parameters || '{}');
167
- if (model.provider === 'ollama') {
168
- result = await _inferOllama(model, prompt, options);
169
- } else if (model.provider === 'llamacpp') {
170
- result = await _inferLlamaCpp(model, prompt, options);
171
- } else {
172
- result = await _inferOpenAICompatible(model, prompt, options);
173
- }
174
- } catch (err) {
175
- const latency = Date.now() - start;
176
- stmts.insertLog.run(crypto.randomUUID(), model.id, capability, 0, 0, latency, 0);
177
- throw err;
178
- }
179
-
180
- const latency = Date.now() - start;
181
- const totalTokens = (result.promptTokens || 0) + (result.completionTokens || 0);
182
-
183
- stmts.updateModelStats.run(totalTokens, latency, model.id);
184
- stmts.insertLog.run(crypto.randomUUID(), model.id, capability, result.promptTokens || 0, result.completionTokens || 0, latency, 1);
185
-
186
- return {
187
- modelId: model.id,
188
- provider: model.provider,
189
- model: model.model_name,
190
- response: result.text,
191
- promptTokens: result.promptTokens || 0,
192
- completionTokens: result.completionTokens || 0,
193
- latencyMs: latency,
194
- };
195
- }
196
-
197
- // ─── Model Management ────────────────────────────────────────────────
198
-
199
- function getModels(siteId) {
200
- return stmts.getModels.all(siteId).map(_deserializeModel);
201
- }
202
-
203
- function getAvailableModels(siteId) {
204
- return stmts.getAvailableModels.all(siteId).map(_deserializeModel);
205
- }
206
-
207
- function getModel(modelId) {
208
- const row = stmts.getModel.get(modelId);
209
- return row ? _deserializeModel(row) : null;
210
- }
211
-
212
- function updateModelStatus(modelId, status) {
213
- stmts.updateModelStatus.run(status, modelId);
214
- }
215
-
216
- function getStats(siteId) {
217
- const row = stmts.getStats.get(siteId, siteId, siteId, siteId, siteId);
218
- return {
219
- availableModels: row.available_models || 0,
220
- totalModels: row.total_models || 0,
221
- totalRequests: row.total_requests || 0,
222
- totalTokens: row.total_tokens || 0,
223
- avgLatency: row.avg_latency ? Math.round(row.avg_latency) : 0,
224
- };
225
- }
226
-
227
- // ─── Provider-Specific Inference ─────────────────────────────────────
228
-
229
- async function _inferOllama(model, prompt, options) {
230
- const body = {
231
- model: model.model_name,
232
- messages: [],
233
- stream: false,
234
- options: {},
235
- };
236
-
237
- if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
238
- body.messages.push({ role: 'user', content: prompt });
239
- if (options.temperature != null) body.options.temperature = options.temperature;
240
-
241
- const res = await fetch(`${model.endpoint}/api/chat`, {
242
- method: 'POST',
243
- headers: { 'Content-Type': 'application/json' },
244
- body: JSON.stringify(body),
245
- signal: AbortSignal.timeout(options.timeout || 120000),
246
- });
247
-
248
- if (!res.ok) throw new Error(`Ollama error: ${res.status}`);
249
- const data = await res.json();
250
-
251
- return {
252
- text: data.message?.content || '',
253
- promptTokens: data.prompt_eval_count || 0,
254
- completionTokens: data.eval_count || 0,
255
- };
256
- }
257
-
258
- async function _inferLlamaCpp(model, prompt, options) {
259
- const body = {
260
- model: model.model_name,
261
- messages: [],
262
- max_tokens: options.maxTokens || 2048,
263
- stream: false,
264
- };
265
-
266
- if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
267
- body.messages.push({ role: 'user', content: prompt });
268
- if (options.temperature != null) body.temperature = options.temperature;
269
-
270
- const res = await fetch(`${model.endpoint}/v1/chat/completions`, {
271
- method: 'POST',
272
- headers: { 'Content-Type': 'application/json' },
273
- body: JSON.stringify(body),
274
- signal: AbortSignal.timeout(options.timeout || 120000),
275
- });
276
-
277
- if (!res.ok) throw new Error(`llama.cpp error: ${res.status}`);
278
- const data = await res.json();
279
-
280
- return {
281
- text: data.choices?.[0]?.message?.content || '',
282
- promptTokens: data.usage?.prompt_tokens || 0,
283
- completionTokens: data.usage?.completion_tokens || 0,
284
- };
285
- }
286
-
287
- async function _inferOpenAICompatible(model, prompt, options) {
288
- const body = {
289
- model: model.model_name,
290
- messages: [],
291
- max_tokens: options.maxTokens || 2048,
292
- stream: false,
293
- };
294
-
295
- if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
296
- body.messages.push({ role: 'user', content: prompt });
297
- if (options.temperature != null) body.temperature = options.temperature;
298
-
299
- const res = await fetch(`${model.endpoint}/v1/chat/completions`, {
300
- method: 'POST',
301
- headers: { 'Content-Type': 'application/json' },
302
- body: JSON.stringify(body),
303
- signal: AbortSignal.timeout(options.timeout || 120000),
304
- });
305
-
306
- if (!res.ok) throw new Error(`Inference error: ${res.status}`);
307
- const data = await res.json();
308
-
309
- return {
310
- text: data.choices?.[0]?.message?.content || '',
311
- promptTokens: data.usage?.prompt_tokens || 0,
312
- completionTokens: data.usage?.completion_tokens || 0,
313
- };
314
- }
315
-
316
- // ─── Provider Probing ────────────────────────────────────────────────
317
-
318
- async function _probeOllama(baseUrl) {
319
- const res = await fetch(`${baseUrl}/api/tags`, { signal: AbortSignal.timeout(5000) });
320
- if (!res.ok) return [];
321
- const data = await res.json();
322
- return (data.models || []).map(m => ({
323
- name: m.name,
324
- capabilities: _detectCapabilities(m.name),
325
- contextWindow: m.details?.parameter_size ? _estimateContext(m.details.parameter_size) : 4096,
326
- parameters: { size: m.size, family: m.details?.family },
327
- }));
328
- }
329
-
330
- async function _probeLlamaCpp(baseUrl) {
331
- const res = await fetch(`${baseUrl}/v1/models`, { signal: AbortSignal.timeout(5000) });
332
- if (!res.ok) return [];
333
- const data = await res.json();
334
- return (data.data || []).map(m => ({
335
- name: m.id,
336
- capabilities: _detectCapabilities(m.id),
337
- contextWindow: 4096,
338
- parameters: {},
339
- }));
340
- }
341
-
342
- async function _probeOpenAICompatible(baseUrl) {
343
- const res = await fetch(`${baseUrl}/v1/models`, { signal: AbortSignal.timeout(5000) });
344
- if (!res.ok) return [];
345
- const data = await res.json();
346
- return (data.data || []).map(m => ({
347
- name: m.id,
348
- capabilities: _detectCapabilities(m.id),
349
- contextWindow: 4096,
350
- parameters: {},
351
- }));
352
- }
353
-
354
- // ─── Helpers ─────────────────────────────────────────────────────────
355
-
356
- function _detectCapabilities(modelName) {
357
- const n = modelName.toLowerCase();
358
- const caps = ['text'];
359
- if (n.includes('vision') || n.includes('llava') || n.includes('bakllava')) caps.push('vision');
360
- if (n.includes('code') || n.includes('codellama') || n.includes('deepseek-coder') || n.includes('starcoder')) caps.push('code');
361
- if (n.includes('embed') || n.includes('nomic')) caps.push('embedding');
362
- if (n.includes('mistral') || n.includes('mixtral')) caps.push('reasoning');
363
- return caps;
364
- }
365
-
366
- function _estimateContext(paramSize) {
367
- // Rough estimate: smaller models typically have smaller context
368
- if (typeof paramSize === 'string') {
369
- const num = parseFloat(paramSize);
370
- if (num >= 70) return 32768;
371
- if (num >= 13) return 8192;
372
- return 4096;
373
- }
374
- return 4096;
375
- }
376
-
377
- function _deserializeModel(row) {
378
- return {
379
- ...row,
380
- capabilities: JSON.parse(row.capabilities || '["text"]'),
381
- parameters: JSON.parse(row.parameters || '{}'),
382
- };
383
- }
384
-
385
- module.exports = {
386
- discoverModels, registerModel, infer,
387
- getModels, getAvailableModels, getModel, updateModelStatus,
388
- getStats,
389
- };
1
+ /**
2
+ * Local AI — Sovereign Intelligence Runtime
3
+ *
4
+ * Manages local AI models running on the user's own hardware.
5
+ * Auto-detects Ollama, llama.cpp, and any OpenAI-compatible local endpoint.
6
+ * Routes inference requests to the best available model based on capability,
7
+ * context window, and current load.
8
+ *
9
+ * Supported Providers:
10
+ * - Ollama (http://localhost:11434)
11
+ * - llama.cpp server (http://localhost:8080)
12
+ * - Custom OpenAI-compatible endpoints
13
+ *
14
+ * All inference happens locally. No data leaves the device.
15
+ */
16
+
17
+ const crypto = require('crypto');
18
+ const { db } = require('../models/db');
19
+
20
+ // ─── Schema ──────────────────────────────────────────────────────────
21
+
22
+ db.exec(`
23
+ CREATE TABLE IF NOT EXISTS local_models (
24
+ id TEXT PRIMARY KEY,
25
+ site_id TEXT NOT NULL,
26
+ provider TEXT NOT NULL,
27
+ model_name TEXT NOT NULL,
28
+ endpoint TEXT NOT NULL,
29
+ capabilities TEXT DEFAULT '["text"]',
30
+ context_window INTEGER DEFAULT 4096,
31
+ parameters TEXT DEFAULT '{}',
32
+ status TEXT DEFAULT 'available',
33
+ total_requests INTEGER DEFAULT 0,
34
+ total_tokens INTEGER DEFAULT 0,
35
+ avg_latency_ms REAL DEFAULT 0,
36
+ last_used TEXT,
37
+ last_probe TEXT,
38
+ created_at TEXT DEFAULT (datetime('now')),
39
+ UNIQUE(site_id, provider, model_name)
40
+ );
41
+
42
+ CREATE TABLE IF NOT EXISTS local_inference_log (
43
+ id TEXT PRIMARY KEY,
44
+ model_id TEXT NOT NULL,
45
+ task_type TEXT,
46
+ prompt_tokens INTEGER DEFAULT 0,
47
+ completion_tokens INTEGER DEFAULT 0,
48
+ latency_ms INTEGER DEFAULT 0,
49
+ success INTEGER DEFAULT 1,
50
+ created_at TEXT DEFAULT (datetime('now'))
51
+ );
52
+
53
+ CREATE INDEX IF NOT EXISTS idx_local_models_site ON local_models(site_id);
54
+ CREATE INDEX IF NOT EXISTS idx_local_models_status ON local_models(status);
55
+ CREATE INDEX IF NOT EXISTS idx_local_inference_model ON local_inference_log(model_id);
56
+ `);
57
+
58
+ // ─── Prepared Statements ─────────────────────────────────────────────
59
+
60
+ const stmts = {
61
+ upsertModel: db.prepare("INSERT INTO local_models (id, site_id, provider, model_name, endpoint, capabilities, context_window, parameters) VALUES (?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(site_id, provider, model_name) DO UPDATE SET endpoint = ?, capabilities = ?, context_window = ?, parameters = ?, status = 'available', last_probe = datetime('now')"),
62
+ getModel: db.prepare('SELECT * FROM local_models WHERE id = ?'),
63
+ getModels: db.prepare('SELECT * FROM local_models WHERE site_id = ? ORDER BY provider, model_name'),
64
+ getAvailableModels: db.prepare("SELECT * FROM local_models WHERE site_id = ? AND status = 'available' ORDER BY avg_latency_ms ASC"),
65
+ getModelsByCapability: db.prepare("SELECT * FROM local_models WHERE site_id = ? AND status = 'available' AND capabilities LIKE ? ORDER BY avg_latency_ms ASC"),
66
+ updateModelStatus: db.prepare('UPDATE local_models SET status = ?, last_probe = datetime(\'now\') WHERE id = ?'),
67
+ updateModelStats: db.prepare("UPDATE local_models SET total_requests = total_requests + 1, total_tokens = total_tokens + ?, avg_latency_ms = (avg_latency_ms * total_requests + ?) / (total_requests + 1), last_used = datetime('now') WHERE id = ?"),
68
+ insertLog: db.prepare('INSERT INTO local_inference_log (id, model_id, task_type, prompt_tokens, completion_tokens, latency_ms, success) VALUES (?, ?, ?, ?, ?, ?, ?)'),
69
+ getStats: db.prepare(`SELECT
70
+ (SELECT COUNT(*) FROM local_models WHERE site_id = ? AND status = 'available') as available_models,
71
+ (SELECT COUNT(*) FROM local_models WHERE site_id = ?) as total_models,
72
+ (SELECT SUM(total_requests) FROM local_models WHERE site_id = ?) as total_requests,
73
+ (SELECT SUM(total_tokens) FROM local_models WHERE site_id = ?) as total_tokens,
74
+ (SELECT AVG(avg_latency_ms) FROM local_models WHERE site_id = ? AND status = 'available') as avg_latency`),
75
+ };
76
+
77
+ // ─── Default Provider Endpoints ──────────────────────────────────────
78
+
79
+ const PROVIDERS = {
80
+ ollama: { name: 'ollama', baseUrl: 'http://localhost:11434', tagsPath: '/api/tags', chatPath: '/api/chat', generatePath: '/api/generate' },
81
+ llamacpp: { name: 'llamacpp', baseUrl: 'http://localhost:8080', chatPath: '/v1/chat/completions', modelsPath: '/v1/models' },
82
+ };
83
+
84
+ // ─── Model Discovery ─────────────────────────────────────────────────
85
+
86
+ /**
87
+ * Probe local endpoints and register discovered models.
88
+ */
89
+ async function discoverModels(siteId, customEndpoints = []) {
90
+ const discovered = [];
91
+
92
+ // Probe Ollama
93
+ try {
94
+ const ollamaModels = await _probeOllama(PROVIDERS.ollama.baseUrl);
95
+ for (const m of ollamaModels) {
96
+ const result = _registerModel(siteId, 'ollama', m.name, PROVIDERS.ollama.baseUrl, m.capabilities, m.contextWindow, m.parameters);
97
+ discovered.push(result);
98
+ }
99
+ } catch (_) { /* Ollama not running */ }
100
+
101
+ // Probe llama.cpp
102
+ try {
103
+ const lcModels = await _probeLlamaCpp(PROVIDERS.llamacpp.baseUrl);
104
+ for (const m of lcModels) {
105
+ const result = _registerModel(siteId, 'llamacpp', m.name, PROVIDERS.llamacpp.baseUrl, m.capabilities, m.contextWindow, m.parameters);
106
+ discovered.push(result);
107
+ }
108
+ } catch (_) { /* llama.cpp not running */ }
109
+
110
+ // Probe custom endpoints
111
+ for (const ep of customEndpoints) {
112
+ try {
113
+ const models = await _probeOpenAICompatible(ep.url);
114
+ for (const m of models) {
115
+ const result = _registerModel(siteId, ep.name || 'custom', m.name, ep.url, m.capabilities, m.contextWindow, m.parameters);
116
+ discovered.push(result);
117
+ }
118
+ } catch (_) { /* endpoint not available */ }
119
+ }
120
+
121
+ return { discovered: discovered.length, models: discovered };
122
+ }
123
+
124
+ /**
125
+ * Register a model manually.
126
+ */
127
+ function registerModel(siteId, provider, modelName, endpoint, capabilities = ['text'], contextWindow = 4096) {
128
+ return _registerModel(siteId, provider, modelName, endpoint, capabilities, contextWindow, {});
129
+ }
130
+
131
+ function _registerModel(siteId, provider, modelName, endpoint, capabilities, contextWindow, parameters) {
132
+ const id = crypto.randomUUID();
133
+ const caps = JSON.stringify(capabilities);
134
+ const params = JSON.stringify(parameters);
135
+
136
+ stmts.upsertModel.run(id, siteId, provider, modelName, endpoint, caps, contextWindow, params, endpoint, caps, contextWindow, params);
137
+ return { id, provider, modelName, endpoint, capabilities, contextWindow };
138
+ }
139
+
140
+ // ─── Inference ───────────────────────────────────────────────────────
141
+
142
+ /**
143
+ * Run inference on the best available local model.
144
+ * @param {string} siteId
145
+ * @param {string} prompt - The user prompt
146
+ * @param {object} options - { capability, model, systemPrompt, temperature, maxTokens, stream }
147
+ */
148
+ async function infer(siteId, prompt, options = {}) {
149
+ const capability = options.capability || 'text';
150
+
151
+ // Select model
152
+ let model;
153
+ if (options.modelId) {
154
+ model = stmts.getModel.get(options.modelId);
155
+ if (!model || model.status !== 'available') throw new Error('Selected model unavailable');
156
+ } else {
157
+ const candidates = stmts.getModelsByCapability.all(siteId, `%${capability}%`);
158
+ if (candidates.length === 0) throw new Error(`No local model available for capability: ${capability}`);
159
+ model = candidates[0]; // Fastest by avg latency
160
+ }
161
+
162
+ const start = Date.now();
163
+ let result;
164
+
165
+ try {
166
+ const parsed = JSON.parse(model.parameters || '{}');
167
+ if (model.provider === 'ollama') {
168
+ result = await _inferOllama(model, prompt, options);
169
+ } else if (model.provider === 'llamacpp') {
170
+ result = await _inferLlamaCpp(model, prompt, options);
171
+ } else {
172
+ result = await _inferOpenAICompatible(model, prompt, options);
173
+ }
174
+ } catch (err) {
175
+ const latency = Date.now() - start;
176
+ stmts.insertLog.run(crypto.randomUUID(), model.id, capability, 0, 0, latency, 0);
177
+ throw err;
178
+ }
179
+
180
+ const latency = Date.now() - start;
181
+ const totalTokens = (result.promptTokens || 0) + (result.completionTokens || 0);
182
+
183
+ stmts.updateModelStats.run(totalTokens, latency, model.id);
184
+ stmts.insertLog.run(crypto.randomUUID(), model.id, capability, result.promptTokens || 0, result.completionTokens || 0, latency, 1);
185
+
186
+ return {
187
+ modelId: model.id,
188
+ provider: model.provider,
189
+ model: model.model_name,
190
+ response: result.text,
191
+ promptTokens: result.promptTokens || 0,
192
+ completionTokens: result.completionTokens || 0,
193
+ latencyMs: latency,
194
+ };
195
+ }
196
+
197
+ // ─── Model Management ────────────────────────────────────────────────
198
+
199
+ function getModels(siteId) {
200
+ return stmts.getModels.all(siteId).map(_deserializeModel);
201
+ }
202
+
203
+ function getAvailableModels(siteId) {
204
+ return stmts.getAvailableModels.all(siteId).map(_deserializeModel);
205
+ }
206
+
207
+ function getModel(modelId) {
208
+ const row = stmts.getModel.get(modelId);
209
+ return row ? _deserializeModel(row) : null;
210
+ }
211
+
212
+ function updateModelStatus(modelId, status) {
213
+ stmts.updateModelStatus.run(status, modelId);
214
+ }
215
+
216
+ function getStats(siteId) {
217
+ const row = stmts.getStats.get(siteId, siteId, siteId, siteId, siteId);
218
+ return {
219
+ availableModels: row.available_models || 0,
220
+ totalModels: row.total_models || 0,
221
+ totalRequests: row.total_requests || 0,
222
+ totalTokens: row.total_tokens || 0,
223
+ avgLatency: row.avg_latency ? Math.round(row.avg_latency) : 0,
224
+ };
225
+ }
226
+
227
+ // ─── Provider-Specific Inference ─────────────────────────────────────
228
+
229
+ async function _inferOllama(model, prompt, options) {
230
+ const body = {
231
+ model: model.model_name,
232
+ messages: [],
233
+ stream: false,
234
+ options: {},
235
+ };
236
+
237
+ if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
238
+ body.messages.push({ role: 'user', content: prompt });
239
+ if (options.temperature != null) body.options.temperature = options.temperature;
240
+
241
+ const res = await fetch(`${model.endpoint}/api/chat`, {
242
+ method: 'POST',
243
+ headers: { 'Content-Type': 'application/json' },
244
+ body: JSON.stringify(body),
245
+ signal: AbortSignal.timeout(options.timeout || 120000),
246
+ });
247
+
248
+ if (!res.ok) throw new Error(`Ollama error: ${res.status}`);
249
+ const data = await res.json();
250
+
251
+ return {
252
+ text: data.message?.content || '',
253
+ promptTokens: data.prompt_eval_count || 0,
254
+ completionTokens: data.eval_count || 0,
255
+ };
256
+ }
257
+
258
+ async function _inferLlamaCpp(model, prompt, options) {
259
+ const body = {
260
+ model: model.model_name,
261
+ messages: [],
262
+ max_tokens: options.maxTokens || 2048,
263
+ stream: false,
264
+ };
265
+
266
+ if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
267
+ body.messages.push({ role: 'user', content: prompt });
268
+ if (options.temperature != null) body.temperature = options.temperature;
269
+
270
+ const res = await fetch(`${model.endpoint}/v1/chat/completions`, {
271
+ method: 'POST',
272
+ headers: { 'Content-Type': 'application/json' },
273
+ body: JSON.stringify(body),
274
+ signal: AbortSignal.timeout(options.timeout || 120000),
275
+ });
276
+
277
+ if (!res.ok) throw new Error(`llama.cpp error: ${res.status}`);
278
+ const data = await res.json();
279
+
280
+ return {
281
+ text: data.choices?.[0]?.message?.content || '',
282
+ promptTokens: data.usage?.prompt_tokens || 0,
283
+ completionTokens: data.usage?.completion_tokens || 0,
284
+ };
285
+ }
286
+
287
+ async function _inferOpenAICompatible(model, prompt, options) {
288
+ const body = {
289
+ model: model.model_name,
290
+ messages: [],
291
+ max_tokens: options.maxTokens || 2048,
292
+ stream: false,
293
+ };
294
+
295
+ if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
296
+ body.messages.push({ role: 'user', content: prompt });
297
+ if (options.temperature != null) body.temperature = options.temperature;
298
+
299
+ const res = await fetch(`${model.endpoint}/v1/chat/completions`, {
300
+ method: 'POST',
301
+ headers: { 'Content-Type': 'application/json' },
302
+ body: JSON.stringify(body),
303
+ signal: AbortSignal.timeout(options.timeout || 120000),
304
+ });
305
+
306
+ if (!res.ok) throw new Error(`Inference error: ${res.status}`);
307
+ const data = await res.json();
308
+
309
+ return {
310
+ text: data.choices?.[0]?.message?.content || '',
311
+ promptTokens: data.usage?.prompt_tokens || 0,
312
+ completionTokens: data.usage?.completion_tokens || 0,
313
+ };
314
+ }
315
+
316
+ // ─── Provider Probing ────────────────────────────────────────────────
317
+
318
+ async function _probeOllama(baseUrl) {
319
+ const res = await fetch(`${baseUrl}/api/tags`, { signal: AbortSignal.timeout(5000) });
320
+ if (!res.ok) return [];
321
+ const data = await res.json();
322
+ return (data.models || []).map(m => ({
323
+ name: m.name,
324
+ capabilities: _detectCapabilities(m.name),
325
+ contextWindow: m.details?.parameter_size ? _estimateContext(m.details.parameter_size) : 4096,
326
+ parameters: { size: m.size, family: m.details?.family },
327
+ }));
328
+ }
329
+
330
+ async function _probeLlamaCpp(baseUrl) {
331
+ const res = await fetch(`${baseUrl}/v1/models`, { signal: AbortSignal.timeout(5000) });
332
+ if (!res.ok) return [];
333
+ const data = await res.json();
334
+ return (data.data || []).map(m => ({
335
+ name: m.id,
336
+ capabilities: _detectCapabilities(m.id),
337
+ contextWindow: 4096,
338
+ parameters: {},
339
+ }));
340
+ }
341
+
342
+ async function _probeOpenAICompatible(baseUrl) {
343
+ const res = await fetch(`${baseUrl}/v1/models`, { signal: AbortSignal.timeout(5000) });
344
+ if (!res.ok) return [];
345
+ const data = await res.json();
346
+ return (data.data || []).map(m => ({
347
+ name: m.id,
348
+ capabilities: _detectCapabilities(m.id),
349
+ contextWindow: 4096,
350
+ parameters: {},
351
+ }));
352
+ }
353
+
354
+ // ─── Helpers ─────────────────────────────────────────────────────────
355
+
356
+ function _detectCapabilities(modelName) {
357
+ const n = modelName.toLowerCase();
358
+ const caps = ['text'];
359
+ if (n.includes('vision') || n.includes('llava') || n.includes('bakllava')) caps.push('vision');
360
+ if (n.includes('code') || n.includes('codellama') || n.includes('deepseek-coder') || n.includes('starcoder')) caps.push('code');
361
+ if (n.includes('embed') || n.includes('nomic')) caps.push('embedding');
362
+ if (n.includes('mistral') || n.includes('mixtral')) caps.push('reasoning');
363
+ return caps;
364
+ }
365
+
366
+ function _estimateContext(paramSize) {
367
+ // Rough estimate: smaller models typically have smaller context
368
+ if (typeof paramSize === 'string') {
369
+ const num = parseFloat(paramSize);
370
+ if (num >= 70) return 32768;
371
+ if (num >= 13) return 8192;
372
+ return 4096;
373
+ }
374
+ return 4096;
375
+ }
376
+
377
+ function _deserializeModel(row) {
378
+ return {
379
+ ...row,
380
+ capabilities: JSON.parse(row.capabilities || '["text"]'),
381
+ parameters: JSON.parse(row.parameters || '{}'),
382
+ };
383
+ }
384
+
385
+ module.exports = {
386
+ discoverModels, registerModel, infer,
387
+ getModels, getAvailableModels, getModel, updateModelStatus,
388
+ getStats,
389
+ };