web-agent-bridge 3.2.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. package/LICENSE +72 -72
  2. package/README.ar.md +1286 -1152
  3. package/README.md +1764 -1635
  4. package/bin/agent-runner.js +474 -474
  5. package/bin/cli.js +237 -138
  6. package/bin/wab.js +80 -80
  7. package/examples/bidi-agent.js +119 -119
  8. package/examples/cross-site-agent.js +91 -91
  9. package/examples/mcp-agent.js +94 -94
  10. package/examples/next-app-router/README.md +44 -44
  11. package/examples/puppeteer-agent.js +108 -108
  12. package/examples/saas-dashboard/README.md +55 -55
  13. package/examples/shopify-hydrogen/README.md +74 -74
  14. package/examples/vision-agent.js +171 -171
  15. package/examples/wordpress-elementor/README.md +77 -77
  16. package/package.json +16 -3
  17. package/public/.well-known/agent-tools.json +180 -180
  18. package/public/.well-known/ai-assets.json +59 -59
  19. package/public/.well-known/security.txt +8 -0
  20. package/public/agent-workspace.html +349 -349
  21. package/public/ai.html +198 -198
  22. package/public/api.html +413 -412
  23. package/public/browser.html +486 -486
  24. package/public/commander-dashboard.html +243 -243
  25. package/public/cookies.html +210 -210
  26. package/public/css/agent-workspace.css +1713 -1713
  27. package/public/css/premium.css +317 -317
  28. package/public/css/styles.css +1235 -1235
  29. package/public/dashboard.html +706 -706
  30. package/public/dns.html +507 -0
  31. package/public/docs.html +587 -587
  32. package/public/feed.xml +89 -89
  33. package/public/growth.html +463 -463
  34. package/public/index.html +1070 -982
  35. package/public/integrations.html +556 -0
  36. package/public/js/agent-workspace.js +1740 -1740
  37. package/public/js/auth-nav.js +31 -31
  38. package/public/js/auth-redirect.js +12 -12
  39. package/public/js/cookie-consent.js +56 -56
  40. package/public/js/wab-demo-page.js +721 -721
  41. package/public/js/ws-client.js +74 -74
  42. package/public/llms-full.txt +360 -360
  43. package/public/llms.txt +125 -125
  44. package/public/login.html +85 -85
  45. package/public/mesh-dashboard.html +328 -328
  46. package/public/openapi.json +580 -580
  47. package/public/phone-shield.html +281 -0
  48. package/public/premium-dashboard.html +2489 -2489
  49. package/public/premium.html +793 -793
  50. package/public/privacy.html +297 -297
  51. package/public/register.html +105 -105
  52. package/public/robots.txt +87 -87
  53. package/public/script/wab-consent.d.ts +36 -36
  54. package/public/script/wab-consent.js +104 -104
  55. package/public/script/wab-schema.js +131 -131
  56. package/public/script/wab.d.ts +108 -108
  57. package/public/script/wab.min.js +580 -580
  58. package/public/security.txt +8 -0
  59. package/public/terms.html +256 -256
  60. package/script/ai-agent-bridge.js +1754 -1754
  61. package/sdk/README.md +99 -99
  62. package/sdk/agent-mesh.js +449 -449
  63. package/sdk/commander.js +262 -262
  64. package/sdk/index.d.ts +464 -464
  65. package/sdk/index.js +12 -1
  66. package/sdk/multi-agent.js +318 -318
  67. package/sdk/package.json +1 -1
  68. package/sdk/safety-shield.js +219 -0
  69. package/sdk/schema-discovery.js +83 -83
  70. package/server/adapters/index.js +520 -520
  71. package/server/config/plans.js +367 -367
  72. package/server/config/secrets.js +102 -102
  73. package/server/control-plane/index.js +301 -301
  74. package/server/data-plane/index.js +354 -354
  75. package/server/index.js +531 -427
  76. package/server/llm/index.js +404 -404
  77. package/server/middleware/adminAuth.js +35 -35
  78. package/server/middleware/auth.js +50 -50
  79. package/server/middleware/featureGate.js +88 -88
  80. package/server/middleware/rateLimits.js +100 -100
  81. package/server/middleware/sensitiveAction.js +157 -0
  82. package/server/migrations/001_add_analytics_indexes.sql +7 -7
  83. package/server/migrations/002_premium_features.sql +418 -418
  84. package/server/migrations/003_ads_integer_cents.sql +33 -33
  85. package/server/migrations/004_agent_os.sql +158 -158
  86. package/server/migrations/005_marketplace_metering.sql +126 -126
  87. package/server/models/adapters/index.js +33 -33
  88. package/server/models/adapters/mysql.js +183 -183
  89. package/server/models/adapters/postgresql.js +172 -172
  90. package/server/models/adapters/sqlite.js +7 -7
  91. package/server/models/db.js +681 -681
  92. package/server/observability/failure-analysis.js +337 -337
  93. package/server/observability/index.js +394 -394
  94. package/server/protocol/capabilities.js +223 -223
  95. package/server/protocol/index.js +243 -243
  96. package/server/protocol/schema.js +584 -584
  97. package/server/registry/certification.js +271 -271
  98. package/server/registry/index.js +326 -326
  99. package/server/routes/admin-premium.js +671 -671
  100. package/server/routes/admin.js +261 -261
  101. package/server/routes/ads.js +130 -130
  102. package/server/routes/agent-workspace.js +540 -540
  103. package/server/routes/api.js +150 -150
  104. package/server/routes/auth.js +71 -71
  105. package/server/routes/billing.js +45 -45
  106. package/server/routes/commander.js +316 -316
  107. package/server/routes/demo-showcase.js +332 -332
  108. package/server/routes/demo-store.js +154 -0
  109. package/server/routes/discovery.js +417 -417
  110. package/server/routes/gateway.js +173 -157
  111. package/server/routes/license.js +251 -240
  112. package/server/routes/mesh.js +469 -469
  113. package/server/routes/noscript.js +543 -543
  114. package/server/routes/premium-v2.js +686 -686
  115. package/server/routes/premium.js +724 -724
  116. package/server/routes/runtime.js +2148 -2147
  117. package/server/routes/sovereign.js +465 -385
  118. package/server/routes/universal.js +200 -185
  119. package/server/routes/wab-api.js +850 -501
  120. package/server/runtime/container-worker.js +111 -111
  121. package/server/runtime/container.js +448 -448
  122. package/server/runtime/distributed-worker.js +362 -362
  123. package/server/runtime/event-bus.js +210 -210
  124. package/server/runtime/index.js +253 -253
  125. package/server/runtime/queue.js +599 -599
  126. package/server/runtime/replay.js +666 -666
  127. package/server/runtime/sandbox.js +266 -266
  128. package/server/runtime/scheduler.js +534 -534
  129. package/server/runtime/session-engine.js +293 -293
  130. package/server/runtime/state-manager.js +188 -188
  131. package/server/security/cross-site-redactor.js +196 -0
  132. package/server/security/dry-run.js +180 -0
  133. package/server/security/human-gate-rate-limit.js +147 -0
  134. package/server/security/human-gate-transports.js +178 -0
  135. package/server/security/human-gate.js +281 -0
  136. package/server/security/index.js +368 -368
  137. package/server/security/intent-engine.js +245 -0
  138. package/server/security/reward-guard.js +171 -0
  139. package/server/security/rollback-store.js +239 -0
  140. package/server/security/token-scope.js +404 -0
  141. package/server/security/url-policy.js +139 -0
  142. package/server/services/agent-chat.js +506 -506
  143. package/server/services/agent-learning.js +601 -575
  144. package/server/services/agent-memory.js +625 -625
  145. package/server/services/agent-mesh.js +555 -539
  146. package/server/services/agent-symphony.js +717 -717
  147. package/server/services/agent-tasks.js +1807 -1807
  148. package/server/services/api-key-engine.js +292 -261
  149. package/server/services/cluster.js +894 -894
  150. package/server/services/commander.js +738 -738
  151. package/server/services/edge-compute.js +440 -440
  152. package/server/services/email.js +204 -204
  153. package/server/services/hosted-runtime.js +205 -205
  154. package/server/services/lfd.js +635 -635
  155. package/server/services/local-ai.js +389 -389
  156. package/server/services/marketplace.js +270 -270
  157. package/server/services/metering.js +182 -182
  158. package/server/services/modules/affiliate-intelligence.js +93 -93
  159. package/server/services/modules/agent-firewall.js +90 -90
  160. package/server/services/modules/bounty.js +89 -89
  161. package/server/services/modules/collective-bargaining.js +92 -92
  162. package/server/services/modules/dark-pattern.js +66 -66
  163. package/server/services/modules/gov-intelligence.js +45 -45
  164. package/server/services/modules/neural.js +55 -55
  165. package/server/services/modules/notary.js +49 -49
  166. package/server/services/modules/price-time-machine.js +86 -86
  167. package/server/services/modules/protocol.js +104 -104
  168. package/server/services/negotiation.js +439 -439
  169. package/server/services/plugins.js +771 -771
  170. package/server/services/price-intelligence.js +566 -566
  171. package/server/services/price-shield.js +1137 -1137
  172. package/server/services/reputation.js +465 -465
  173. package/server/services/search-engine.js +357 -357
  174. package/server/services/security.js +513 -513
  175. package/server/services/self-healing.js +843 -843
  176. package/server/services/sovereign-shield.js +542 -0
  177. package/server/services/stripe.js +192 -192
  178. package/server/services/swarm.js +788 -788
  179. package/server/services/universal-scraper.js +662 -661
  180. package/server/services/verification.js +481 -481
  181. package/server/services/vision.js +1163 -1163
  182. package/server/utils/cache.js +125 -125
  183. package/server/utils/migrate.js +81 -81
  184. package/server/utils/safe-fetch.js +228 -0
  185. package/server/utils/secureFields.js +50 -50
  186. package/server/ws.js +161 -161
  187. package/templates/artisan-marketplace.yaml +104 -104
  188. package/templates/book-price-scout.yaml +98 -98
  189. package/templates/electronics-price-tracker.yaml +108 -108
  190. package/templates/flight-deal-hunter.yaml +113 -113
  191. package/templates/freelancer-direct.yaml +116 -116
  192. package/templates/grocery-price-compare.yaml +93 -93
  193. package/templates/hotel-direct-booking.yaml +113 -113
  194. package/templates/local-services.yaml +98 -98
  195. package/templates/olive-oil-tunisia.yaml +88 -88
  196. package/templates/organic-farm-fresh.yaml +101 -101
  197. package/templates/restaurant-direct.yaml +97 -97
  198. package/public/score.html +0 -263
  199. package/server/migrations/006_growth_suite.sql +0 -138
  200. package/server/routes/growth.js +0 -962
  201. package/server/services/fairness-engine.js +0 -409
  202. package/server/services/fairness.js +0 -420
@@ -1,389 +1,389 @@
1
- /**
2
- * Local AI — Sovereign Intelligence Runtime
3
- *
4
- * Manages local AI models running on the user's own hardware.
5
- * Auto-detects Ollama, llama.cpp, and any OpenAI-compatible local endpoint.
6
- * Routes inference requests to the best available model based on capability,
7
- * context window, and current load.
8
- *
9
- * Supported Providers:
10
- * - Ollama (http://localhost:11434)
11
- * - llama.cpp server (http://localhost:8080)
12
- * - Custom OpenAI-compatible endpoints
13
- *
14
- * All inference happens locally. No data leaves the device.
15
- */
16
-
17
- const crypto = require('crypto');
18
- const { db } = require('../models/db');
19
-
20
- // ─── Schema ──────────────────────────────────────────────────────────
21
-
22
- db.exec(`
23
- CREATE TABLE IF NOT EXISTS local_models (
24
- id TEXT PRIMARY KEY,
25
- site_id TEXT NOT NULL,
26
- provider TEXT NOT NULL,
27
- model_name TEXT NOT NULL,
28
- endpoint TEXT NOT NULL,
29
- capabilities TEXT DEFAULT '["text"]',
30
- context_window INTEGER DEFAULT 4096,
31
- parameters TEXT DEFAULT '{}',
32
- status TEXT DEFAULT 'available',
33
- total_requests INTEGER DEFAULT 0,
34
- total_tokens INTEGER DEFAULT 0,
35
- avg_latency_ms REAL DEFAULT 0,
36
- last_used TEXT,
37
- last_probe TEXT,
38
- created_at TEXT DEFAULT (datetime('now')),
39
- UNIQUE(site_id, provider, model_name)
40
- );
41
-
42
- CREATE TABLE IF NOT EXISTS local_inference_log (
43
- id TEXT PRIMARY KEY,
44
- model_id TEXT NOT NULL,
45
- task_type TEXT,
46
- prompt_tokens INTEGER DEFAULT 0,
47
- completion_tokens INTEGER DEFAULT 0,
48
- latency_ms INTEGER DEFAULT 0,
49
- success INTEGER DEFAULT 1,
50
- created_at TEXT DEFAULT (datetime('now'))
51
- );
52
-
53
- CREATE INDEX IF NOT EXISTS idx_local_models_site ON local_models(site_id);
54
- CREATE INDEX IF NOT EXISTS idx_local_models_status ON local_models(status);
55
- CREATE INDEX IF NOT EXISTS idx_local_inference_model ON local_inference_log(model_id);
56
- `);
57
-
58
- // ─── Prepared Statements ─────────────────────────────────────────────
59
-
60
- const stmts = {
61
- upsertModel: db.prepare("INSERT INTO local_models (id, site_id, provider, model_name, endpoint, capabilities, context_window, parameters) VALUES (?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(site_id, provider, model_name) DO UPDATE SET endpoint = ?, capabilities = ?, context_window = ?, parameters = ?, status = 'available', last_probe = datetime('now')"),
62
- getModel: db.prepare('SELECT * FROM local_models WHERE id = ?'),
63
- getModels: db.prepare('SELECT * FROM local_models WHERE site_id = ? ORDER BY provider, model_name'),
64
- getAvailableModels: db.prepare("SELECT * FROM local_models WHERE site_id = ? AND status = 'available' ORDER BY avg_latency_ms ASC"),
65
- getModelsByCapability: db.prepare("SELECT * FROM local_models WHERE site_id = ? AND status = 'available' AND capabilities LIKE ? ORDER BY avg_latency_ms ASC"),
66
- updateModelStatus: db.prepare('UPDATE local_models SET status = ?, last_probe = datetime(\'now\') WHERE id = ?'),
67
- updateModelStats: db.prepare("UPDATE local_models SET total_requests = total_requests + 1, total_tokens = total_tokens + ?, avg_latency_ms = (avg_latency_ms * total_requests + ?) / (total_requests + 1), last_used = datetime('now') WHERE id = ?"),
68
- insertLog: db.prepare('INSERT INTO local_inference_log (id, model_id, task_type, prompt_tokens, completion_tokens, latency_ms, success) VALUES (?, ?, ?, ?, ?, ?, ?)'),
69
- getStats: db.prepare(`SELECT
70
- (SELECT COUNT(*) FROM local_models WHERE site_id = ? AND status = 'available') as available_models,
71
- (SELECT COUNT(*) FROM local_models WHERE site_id = ?) as total_models,
72
- (SELECT SUM(total_requests) FROM local_models WHERE site_id = ?) as total_requests,
73
- (SELECT SUM(total_tokens) FROM local_models WHERE site_id = ?) as total_tokens,
74
- (SELECT AVG(avg_latency_ms) FROM local_models WHERE site_id = ? AND status = 'available') as avg_latency`),
75
- };
76
-
77
- // ─── Default Provider Endpoints ──────────────────────────────────────
78
-
79
- const PROVIDERS = {
80
- ollama: { name: 'ollama', baseUrl: 'http://localhost:11434', tagsPath: '/api/tags', chatPath: '/api/chat', generatePath: '/api/generate' },
81
- llamacpp: { name: 'llamacpp', baseUrl: 'http://localhost:8080', chatPath: '/v1/chat/completions', modelsPath: '/v1/models' },
82
- };
83
-
84
- // ─── Model Discovery ─────────────────────────────────────────────────
85
-
86
- /**
87
- * Probe local endpoints and register discovered models.
88
- */
89
- async function discoverModels(siteId, customEndpoints = []) {
90
- const discovered = [];
91
-
92
- // Probe Ollama
93
- try {
94
- const ollamaModels = await _probeOllama(PROVIDERS.ollama.baseUrl);
95
- for (const m of ollamaModels) {
96
- const result = _registerModel(siteId, 'ollama', m.name, PROVIDERS.ollama.baseUrl, m.capabilities, m.contextWindow, m.parameters);
97
- discovered.push(result);
98
- }
99
- } catch (_) { /* Ollama not running */ }
100
-
101
- // Probe llama.cpp
102
- try {
103
- const lcModels = await _probeLlamaCpp(PROVIDERS.llamacpp.baseUrl);
104
- for (const m of lcModels) {
105
- const result = _registerModel(siteId, 'llamacpp', m.name, PROVIDERS.llamacpp.baseUrl, m.capabilities, m.contextWindow, m.parameters);
106
- discovered.push(result);
107
- }
108
- } catch (_) { /* llama.cpp not running */ }
109
-
110
- // Probe custom endpoints
111
- for (const ep of customEndpoints) {
112
- try {
113
- const models = await _probeOpenAICompatible(ep.url);
114
- for (const m of models) {
115
- const result = _registerModel(siteId, ep.name || 'custom', m.name, ep.url, m.capabilities, m.contextWindow, m.parameters);
116
- discovered.push(result);
117
- }
118
- } catch (_) { /* endpoint not available */ }
119
- }
120
-
121
- return { discovered: discovered.length, models: discovered };
122
- }
123
-
124
- /**
125
- * Register a model manually.
126
- */
127
- function registerModel(siteId, provider, modelName, endpoint, capabilities = ['text'], contextWindow = 4096) {
128
- return _registerModel(siteId, provider, modelName, endpoint, capabilities, contextWindow, {});
129
- }
130
-
131
- function _registerModel(siteId, provider, modelName, endpoint, capabilities, contextWindow, parameters) {
132
- const id = crypto.randomUUID();
133
- const caps = JSON.stringify(capabilities);
134
- const params = JSON.stringify(parameters);
135
-
136
- stmts.upsertModel.run(id, siteId, provider, modelName, endpoint, caps, contextWindow, params, endpoint, caps, contextWindow, params);
137
- return { id, provider, modelName, endpoint, capabilities, contextWindow };
138
- }
139
-
140
- // ─── Inference ───────────────────────────────────────────────────────
141
-
142
- /**
143
- * Run inference on the best available local model.
144
- * @param {string} siteId
145
- * @param {string} prompt - The user prompt
146
- * @param {object} options - { capability, model, systemPrompt, temperature, maxTokens, stream }
147
- */
148
- async function infer(siteId, prompt, options = {}) {
149
- const capability = options.capability || 'text';
150
-
151
- // Select model
152
- let model;
153
- if (options.modelId) {
154
- model = stmts.getModel.get(options.modelId);
155
- if (!model || model.status !== 'available') throw new Error('Selected model unavailable');
156
- } else {
157
- const candidates = stmts.getModelsByCapability.all(siteId, `%${capability}%`);
158
- if (candidates.length === 0) throw new Error(`No local model available for capability: ${capability}`);
159
- model = candidates[0]; // Fastest by avg latency
160
- }
161
-
162
- const start = Date.now();
163
- let result;
164
-
165
- try {
166
- const parsed = JSON.parse(model.parameters || '{}');
167
- if (model.provider === 'ollama') {
168
- result = await _inferOllama(model, prompt, options);
169
- } else if (model.provider === 'llamacpp') {
170
- result = await _inferLlamaCpp(model, prompt, options);
171
- } else {
172
- result = await _inferOpenAICompatible(model, prompt, options);
173
- }
174
- } catch (err) {
175
- const latency = Date.now() - start;
176
- stmts.insertLog.run(crypto.randomUUID(), model.id, capability, 0, 0, latency, 0);
177
- throw err;
178
- }
179
-
180
- const latency = Date.now() - start;
181
- const totalTokens = (result.promptTokens || 0) + (result.completionTokens || 0);
182
-
183
- stmts.updateModelStats.run(totalTokens, latency, model.id);
184
- stmts.insertLog.run(crypto.randomUUID(), model.id, capability, result.promptTokens || 0, result.completionTokens || 0, latency, 1);
185
-
186
- return {
187
- modelId: model.id,
188
- provider: model.provider,
189
- model: model.model_name,
190
- response: result.text,
191
- promptTokens: result.promptTokens || 0,
192
- completionTokens: result.completionTokens || 0,
193
- latencyMs: latency,
194
- };
195
- }
196
-
197
- // ─── Model Management ────────────────────────────────────────────────
198
-
199
- function getModels(siteId) {
200
- return stmts.getModels.all(siteId).map(_deserializeModel);
201
- }
202
-
203
- function getAvailableModels(siteId) {
204
- return stmts.getAvailableModels.all(siteId).map(_deserializeModel);
205
- }
206
-
207
- function getModel(modelId) {
208
- const row = stmts.getModel.get(modelId);
209
- return row ? _deserializeModel(row) : null;
210
- }
211
-
212
- function updateModelStatus(modelId, status) {
213
- stmts.updateModelStatus.run(status, modelId);
214
- }
215
-
216
- function getStats(siteId) {
217
- const row = stmts.getStats.get(siteId, siteId, siteId, siteId, siteId);
218
- return {
219
- availableModels: row.available_models || 0,
220
- totalModels: row.total_models || 0,
221
- totalRequests: row.total_requests || 0,
222
- totalTokens: row.total_tokens || 0,
223
- avgLatency: row.avg_latency ? Math.round(row.avg_latency) : 0,
224
- };
225
- }
226
-
227
- // ─── Provider-Specific Inference ─────────────────────────────────────
228
-
229
- async function _inferOllama(model, prompt, options) {
230
- const body = {
231
- model: model.model_name,
232
- messages: [],
233
- stream: false,
234
- options: {},
235
- };
236
-
237
- if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
238
- body.messages.push({ role: 'user', content: prompt });
239
- if (options.temperature != null) body.options.temperature = options.temperature;
240
-
241
- const res = await fetch(`${model.endpoint}/api/chat`, {
242
- method: 'POST',
243
- headers: { 'Content-Type': 'application/json' },
244
- body: JSON.stringify(body),
245
- signal: AbortSignal.timeout(options.timeout || 120000),
246
- });
247
-
248
- if (!res.ok) throw new Error(`Ollama error: ${res.status}`);
249
- const data = await res.json();
250
-
251
- return {
252
- text: data.message?.content || '',
253
- promptTokens: data.prompt_eval_count || 0,
254
- completionTokens: data.eval_count || 0,
255
- };
256
- }
257
-
258
- async function _inferLlamaCpp(model, prompt, options) {
259
- const body = {
260
- model: model.model_name,
261
- messages: [],
262
- max_tokens: options.maxTokens || 2048,
263
- stream: false,
264
- };
265
-
266
- if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
267
- body.messages.push({ role: 'user', content: prompt });
268
- if (options.temperature != null) body.temperature = options.temperature;
269
-
270
- const res = await fetch(`${model.endpoint}/v1/chat/completions`, {
271
- method: 'POST',
272
- headers: { 'Content-Type': 'application/json' },
273
- body: JSON.stringify(body),
274
- signal: AbortSignal.timeout(options.timeout || 120000),
275
- });
276
-
277
- if (!res.ok) throw new Error(`llama.cpp error: ${res.status}`);
278
- const data = await res.json();
279
-
280
- return {
281
- text: data.choices?.[0]?.message?.content || '',
282
- promptTokens: data.usage?.prompt_tokens || 0,
283
- completionTokens: data.usage?.completion_tokens || 0,
284
- };
285
- }
286
-
287
- async function _inferOpenAICompatible(model, prompt, options) {
288
- const body = {
289
- model: model.model_name,
290
- messages: [],
291
- max_tokens: options.maxTokens || 2048,
292
- stream: false,
293
- };
294
-
295
- if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
296
- body.messages.push({ role: 'user', content: prompt });
297
- if (options.temperature != null) body.temperature = options.temperature;
298
-
299
- const res = await fetch(`${model.endpoint}/v1/chat/completions`, {
300
- method: 'POST',
301
- headers: { 'Content-Type': 'application/json' },
302
- body: JSON.stringify(body),
303
- signal: AbortSignal.timeout(options.timeout || 120000),
304
- });
305
-
306
- if (!res.ok) throw new Error(`Inference error: ${res.status}`);
307
- const data = await res.json();
308
-
309
- return {
310
- text: data.choices?.[0]?.message?.content || '',
311
- promptTokens: data.usage?.prompt_tokens || 0,
312
- completionTokens: data.usage?.completion_tokens || 0,
313
- };
314
- }
315
-
316
- // ─── Provider Probing ────────────────────────────────────────────────
317
-
318
- async function _probeOllama(baseUrl) {
319
- const res = await fetch(`${baseUrl}/api/tags`, { signal: AbortSignal.timeout(5000) });
320
- if (!res.ok) return [];
321
- const data = await res.json();
322
- return (data.models || []).map(m => ({
323
- name: m.name,
324
- capabilities: _detectCapabilities(m.name),
325
- contextWindow: m.details?.parameter_size ? _estimateContext(m.details.parameter_size) : 4096,
326
- parameters: { size: m.size, family: m.details?.family },
327
- }));
328
- }
329
-
330
- async function _probeLlamaCpp(baseUrl) {
331
- const res = await fetch(`${baseUrl}/v1/models`, { signal: AbortSignal.timeout(5000) });
332
- if (!res.ok) return [];
333
- const data = await res.json();
334
- return (data.data || []).map(m => ({
335
- name: m.id,
336
- capabilities: _detectCapabilities(m.id),
337
- contextWindow: 4096,
338
- parameters: {},
339
- }));
340
- }
341
-
342
- async function _probeOpenAICompatible(baseUrl) {
343
- const res = await fetch(`${baseUrl}/v1/models`, { signal: AbortSignal.timeout(5000) });
344
- if (!res.ok) return [];
345
- const data = await res.json();
346
- return (data.data || []).map(m => ({
347
- name: m.id,
348
- capabilities: _detectCapabilities(m.id),
349
- contextWindow: 4096,
350
- parameters: {},
351
- }));
352
- }
353
-
354
- // ─── Helpers ─────────────────────────────────────────────────────────
355
-
356
- function _detectCapabilities(modelName) {
357
- const n = modelName.toLowerCase();
358
- const caps = ['text'];
359
- if (n.includes('vision') || n.includes('llava') || n.includes('bakllava')) caps.push('vision');
360
- if (n.includes('code') || n.includes('codellama') || n.includes('deepseek-coder') || n.includes('starcoder')) caps.push('code');
361
- if (n.includes('embed') || n.includes('nomic')) caps.push('embedding');
362
- if (n.includes('mistral') || n.includes('mixtral')) caps.push('reasoning');
363
- return caps;
364
- }
365
-
366
- function _estimateContext(paramSize) {
367
- // Rough estimate: smaller models typically have smaller context
368
- if (typeof paramSize === 'string') {
369
- const num = parseFloat(paramSize);
370
- if (num >= 70) return 32768;
371
- if (num >= 13) return 8192;
372
- return 4096;
373
- }
374
- return 4096;
375
- }
376
-
377
- function _deserializeModel(row) {
378
- return {
379
- ...row,
380
- capabilities: JSON.parse(row.capabilities || '["text"]'),
381
- parameters: JSON.parse(row.parameters || '{}'),
382
- };
383
- }
384
-
385
- module.exports = {
386
- discoverModels, registerModel, infer,
387
- getModels, getAvailableModels, getModel, updateModelStatus,
388
- getStats,
389
- };
1
+ /**
2
+ * Local AI — Sovereign Intelligence Runtime
3
+ *
4
+ * Manages local AI models running on the user's own hardware.
5
+ * Auto-detects Ollama, llama.cpp, and any OpenAI-compatible local endpoint.
6
+ * Routes inference requests to the best available model based on capability,
7
+ * context window, and current load.
8
+ *
9
+ * Supported Providers:
10
+ * - Ollama (http://localhost:11434)
11
+ * - llama.cpp server (http://localhost:8080)
12
+ * - Custom OpenAI-compatible endpoints
13
+ *
14
+ * All inference happens locally. No data leaves the device.
15
+ */
16
+
17
+ const crypto = require('crypto');
18
+ const { db } = require('../models/db');
19
+
20
+ // ─── Schema ──────────────────────────────────────────────────────────
21
+
22
+ db.exec(`
23
+ CREATE TABLE IF NOT EXISTS local_models (
24
+ id TEXT PRIMARY KEY,
25
+ site_id TEXT NOT NULL,
26
+ provider TEXT NOT NULL,
27
+ model_name TEXT NOT NULL,
28
+ endpoint TEXT NOT NULL,
29
+ capabilities TEXT DEFAULT '["text"]',
30
+ context_window INTEGER DEFAULT 4096,
31
+ parameters TEXT DEFAULT '{}',
32
+ status TEXT DEFAULT 'available',
33
+ total_requests INTEGER DEFAULT 0,
34
+ total_tokens INTEGER DEFAULT 0,
35
+ avg_latency_ms REAL DEFAULT 0,
36
+ last_used TEXT,
37
+ last_probe TEXT,
38
+ created_at TEXT DEFAULT (datetime('now')),
39
+ UNIQUE(site_id, provider, model_name)
40
+ );
41
+
42
+ CREATE TABLE IF NOT EXISTS local_inference_log (
43
+ id TEXT PRIMARY KEY,
44
+ model_id TEXT NOT NULL,
45
+ task_type TEXT,
46
+ prompt_tokens INTEGER DEFAULT 0,
47
+ completion_tokens INTEGER DEFAULT 0,
48
+ latency_ms INTEGER DEFAULT 0,
49
+ success INTEGER DEFAULT 1,
50
+ created_at TEXT DEFAULT (datetime('now'))
51
+ );
52
+
53
+ CREATE INDEX IF NOT EXISTS idx_local_models_site ON local_models(site_id);
54
+ CREATE INDEX IF NOT EXISTS idx_local_models_status ON local_models(status);
55
+ CREATE INDEX IF NOT EXISTS idx_local_inference_model ON local_inference_log(model_id);
56
+ `);
57
+
58
+ // ─── Prepared Statements ─────────────────────────────────────────────
59
+
60
+ const stmts = {
61
+ upsertModel: db.prepare("INSERT INTO local_models (id, site_id, provider, model_name, endpoint, capabilities, context_window, parameters) VALUES (?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(site_id, provider, model_name) DO UPDATE SET endpoint = ?, capabilities = ?, context_window = ?, parameters = ?, status = 'available', last_probe = datetime('now')"),
62
+ getModel: db.prepare('SELECT * FROM local_models WHERE id = ?'),
63
+ getModels: db.prepare('SELECT * FROM local_models WHERE site_id = ? ORDER BY provider, model_name'),
64
+ getAvailableModels: db.prepare("SELECT * FROM local_models WHERE site_id = ? AND status = 'available' ORDER BY avg_latency_ms ASC"),
65
+ getModelsByCapability: db.prepare("SELECT * FROM local_models WHERE site_id = ? AND status = 'available' AND capabilities LIKE ? ORDER BY avg_latency_ms ASC"),
66
+ updateModelStatus: db.prepare('UPDATE local_models SET status = ?, last_probe = datetime(\'now\') WHERE id = ?'),
67
+ updateModelStats: db.prepare("UPDATE local_models SET total_requests = total_requests + 1, total_tokens = total_tokens + ?, avg_latency_ms = (avg_latency_ms * total_requests + ?) / (total_requests + 1), last_used = datetime('now') WHERE id = ?"),
68
+ insertLog: db.prepare('INSERT INTO local_inference_log (id, model_id, task_type, prompt_tokens, completion_tokens, latency_ms, success) VALUES (?, ?, ?, ?, ?, ?, ?)'),
69
+ getStats: db.prepare(`SELECT
70
+ (SELECT COUNT(*) FROM local_models WHERE site_id = ? AND status = 'available') as available_models,
71
+ (SELECT COUNT(*) FROM local_models WHERE site_id = ?) as total_models,
72
+ (SELECT SUM(total_requests) FROM local_models WHERE site_id = ?) as total_requests,
73
+ (SELECT SUM(total_tokens) FROM local_models WHERE site_id = ?) as total_tokens,
74
+ (SELECT AVG(avg_latency_ms) FROM local_models WHERE site_id = ? AND status = 'available') as avg_latency`),
75
+ };
76
+
77
+ // ─── Default Provider Endpoints ──────────────────────────────────────
78
+
79
+ const PROVIDERS = {
80
+ ollama: { name: 'ollama', baseUrl: 'http://localhost:11434', tagsPath: '/api/tags', chatPath: '/api/chat', generatePath: '/api/generate' },
81
+ llamacpp: { name: 'llamacpp', baseUrl: 'http://localhost:8080', chatPath: '/v1/chat/completions', modelsPath: '/v1/models' },
82
+ };
83
+
84
+ // ─── Model Discovery ─────────────────────────────────────────────────
85
+
86
+ /**
87
+ * Probe local endpoints and register discovered models.
88
+ */
89
+ async function discoverModels(siteId, customEndpoints = []) {
90
+ const discovered = [];
91
+
92
+ // Probe Ollama
93
+ try {
94
+ const ollamaModels = await _probeOllama(PROVIDERS.ollama.baseUrl);
95
+ for (const m of ollamaModels) {
96
+ const result = _registerModel(siteId, 'ollama', m.name, PROVIDERS.ollama.baseUrl, m.capabilities, m.contextWindow, m.parameters);
97
+ discovered.push(result);
98
+ }
99
+ } catch (_) { /* Ollama not running */ }
100
+
101
+ // Probe llama.cpp
102
+ try {
103
+ const lcModels = await _probeLlamaCpp(PROVIDERS.llamacpp.baseUrl);
104
+ for (const m of lcModels) {
105
+ const result = _registerModel(siteId, 'llamacpp', m.name, PROVIDERS.llamacpp.baseUrl, m.capabilities, m.contextWindow, m.parameters);
106
+ discovered.push(result);
107
+ }
108
+ } catch (_) { /* llama.cpp not running */ }
109
+
110
+ // Probe custom endpoints
111
+ for (const ep of customEndpoints) {
112
+ try {
113
+ const models = await _probeOpenAICompatible(ep.url);
114
+ for (const m of models) {
115
+ const result = _registerModel(siteId, ep.name || 'custom', m.name, ep.url, m.capabilities, m.contextWindow, m.parameters);
116
+ discovered.push(result);
117
+ }
118
+ } catch (_) { /* endpoint not available */ }
119
+ }
120
+
121
+ return { discovered: discovered.length, models: discovered };
122
+ }
123
+
124
+ /**
125
+ * Register a model manually.
126
+ */
127
+ function registerModel(siteId, provider, modelName, endpoint, capabilities = ['text'], contextWindow = 4096) {
128
+ return _registerModel(siteId, provider, modelName, endpoint, capabilities, contextWindow, {});
129
+ }
130
+
131
+ function _registerModel(siteId, provider, modelName, endpoint, capabilities, contextWindow, parameters) {
132
+ const id = crypto.randomUUID();
133
+ const caps = JSON.stringify(capabilities);
134
+ const params = JSON.stringify(parameters);
135
+
136
+ stmts.upsertModel.run(id, siteId, provider, modelName, endpoint, caps, contextWindow, params, endpoint, caps, contextWindow, params);
137
+ return { id, provider, modelName, endpoint, capabilities, contextWindow };
138
+ }
139
+
140
+ // ─── Inference ───────────────────────────────────────────────────────
141
+
142
+ /**
143
+ * Run inference on the best available local model.
144
+ * @param {string} siteId
145
+ * @param {string} prompt - The user prompt
146
+ * @param {object} options - { capability, model, systemPrompt, temperature, maxTokens, stream }
147
+ */
148
+ async function infer(siteId, prompt, options = {}) {
149
+ const capability = options.capability || 'text';
150
+
151
+ // Select model
152
+ let model;
153
+ if (options.modelId) {
154
+ model = stmts.getModel.get(options.modelId);
155
+ if (!model || model.status !== 'available') throw new Error('Selected model unavailable');
156
+ } else {
157
+ const candidates = stmts.getModelsByCapability.all(siteId, `%${capability}%`);
158
+ if (candidates.length === 0) throw new Error(`No local model available for capability: ${capability}`);
159
+ model = candidates[0]; // Fastest by avg latency
160
+ }
161
+
162
+ const start = Date.now();
163
+ let result;
164
+
165
+ try {
166
+ const parsed = JSON.parse(model.parameters || '{}');
167
+ if (model.provider === 'ollama') {
168
+ result = await _inferOllama(model, prompt, options);
169
+ } else if (model.provider === 'llamacpp') {
170
+ result = await _inferLlamaCpp(model, prompt, options);
171
+ } else {
172
+ result = await _inferOpenAICompatible(model, prompt, options);
173
+ }
174
+ } catch (err) {
175
+ const latency = Date.now() - start;
176
+ stmts.insertLog.run(crypto.randomUUID(), model.id, capability, 0, 0, latency, 0);
177
+ throw err;
178
+ }
179
+
180
+ const latency = Date.now() - start;
181
+ const totalTokens = (result.promptTokens || 0) + (result.completionTokens || 0);
182
+
183
+ stmts.updateModelStats.run(totalTokens, latency, model.id);
184
+ stmts.insertLog.run(crypto.randomUUID(), model.id, capability, result.promptTokens || 0, result.completionTokens || 0, latency, 1);
185
+
186
+ return {
187
+ modelId: model.id,
188
+ provider: model.provider,
189
+ model: model.model_name,
190
+ response: result.text,
191
+ promptTokens: result.promptTokens || 0,
192
+ completionTokens: result.completionTokens || 0,
193
+ latencyMs: latency,
194
+ };
195
+ }
196
+
197
+ // ─── Model Management ────────────────────────────────────────────────
198
+
199
+ function getModels(siteId) {
200
+ return stmts.getModels.all(siteId).map(_deserializeModel);
201
+ }
202
+
203
+ function getAvailableModels(siteId) {
204
+ return stmts.getAvailableModels.all(siteId).map(_deserializeModel);
205
+ }
206
+
207
+ function getModel(modelId) {
208
+ const row = stmts.getModel.get(modelId);
209
+ return row ? _deserializeModel(row) : null;
210
+ }
211
+
212
+ function updateModelStatus(modelId, status) {
213
+ stmts.updateModelStatus.run(status, modelId);
214
+ }
215
+
216
+ function getStats(siteId) {
217
+ const row = stmts.getStats.get(siteId, siteId, siteId, siteId, siteId);
218
+ return {
219
+ availableModels: row.available_models || 0,
220
+ totalModels: row.total_models || 0,
221
+ totalRequests: row.total_requests || 0,
222
+ totalTokens: row.total_tokens || 0,
223
+ avgLatency: row.avg_latency ? Math.round(row.avg_latency) : 0,
224
+ };
225
+ }
226
+
227
+ // ─── Provider-Specific Inference ─────────────────────────────────────
228
+
229
+ async function _inferOllama(model, prompt, options) {
230
+ const body = {
231
+ model: model.model_name,
232
+ messages: [],
233
+ stream: false,
234
+ options: {},
235
+ };
236
+
237
+ if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
238
+ body.messages.push({ role: 'user', content: prompt });
239
+ if (options.temperature != null) body.options.temperature = options.temperature;
240
+
241
+ const res = await fetch(`${model.endpoint}/api/chat`, {
242
+ method: 'POST',
243
+ headers: { 'Content-Type': 'application/json' },
244
+ body: JSON.stringify(body),
245
+ signal: AbortSignal.timeout(options.timeout || 120000),
246
+ });
247
+
248
+ if (!res.ok) throw new Error(`Ollama error: ${res.status}`);
249
+ const data = await res.json();
250
+
251
+ return {
252
+ text: data.message?.content || '',
253
+ promptTokens: data.prompt_eval_count || 0,
254
+ completionTokens: data.eval_count || 0,
255
+ };
256
+ }
257
+
258
+ async function _inferLlamaCpp(model, prompt, options) {
259
+ const body = {
260
+ model: model.model_name,
261
+ messages: [],
262
+ max_tokens: options.maxTokens || 2048,
263
+ stream: false,
264
+ };
265
+
266
+ if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
267
+ body.messages.push({ role: 'user', content: prompt });
268
+ if (options.temperature != null) body.temperature = options.temperature;
269
+
270
+ const res = await fetch(`${model.endpoint}/v1/chat/completions`, {
271
+ method: 'POST',
272
+ headers: { 'Content-Type': 'application/json' },
273
+ body: JSON.stringify(body),
274
+ signal: AbortSignal.timeout(options.timeout || 120000),
275
+ });
276
+
277
+ if (!res.ok) throw new Error(`llama.cpp error: ${res.status}`);
278
+ const data = await res.json();
279
+
280
+ return {
281
+ text: data.choices?.[0]?.message?.content || '',
282
+ promptTokens: data.usage?.prompt_tokens || 0,
283
+ completionTokens: data.usage?.completion_tokens || 0,
284
+ };
285
+ }
286
+
287
+ async function _inferOpenAICompatible(model, prompt, options) {
288
+ const body = {
289
+ model: model.model_name,
290
+ messages: [],
291
+ max_tokens: options.maxTokens || 2048,
292
+ stream: false,
293
+ };
294
+
295
+ if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
296
+ body.messages.push({ role: 'user', content: prompt });
297
+ if (options.temperature != null) body.temperature = options.temperature;
298
+
299
+ const res = await fetch(`${model.endpoint}/v1/chat/completions`, {
300
+ method: 'POST',
301
+ headers: { 'Content-Type': 'application/json' },
302
+ body: JSON.stringify(body),
303
+ signal: AbortSignal.timeout(options.timeout || 120000),
304
+ });
305
+
306
+ if (!res.ok) throw new Error(`Inference error: ${res.status}`);
307
+ const data = await res.json();
308
+
309
+ return {
310
+ text: data.choices?.[0]?.message?.content || '',
311
+ promptTokens: data.usage?.prompt_tokens || 0,
312
+ completionTokens: data.usage?.completion_tokens || 0,
313
+ };
314
+ }
315
+
316
+ // ─── Provider Probing ────────────────────────────────────────────────
317
+
318
+ async function _probeOllama(baseUrl) {
319
+ const res = await fetch(`${baseUrl}/api/tags`, { signal: AbortSignal.timeout(5000) });
320
+ if (!res.ok) return [];
321
+ const data = await res.json();
322
+ return (data.models || []).map(m => ({
323
+ name: m.name,
324
+ capabilities: _detectCapabilities(m.name),
325
+ contextWindow: m.details?.parameter_size ? _estimateContext(m.details.parameter_size) : 4096,
326
+ parameters: { size: m.size, family: m.details?.family },
327
+ }));
328
+ }
329
+
330
+ async function _probeLlamaCpp(baseUrl) {
331
+ const res = await fetch(`${baseUrl}/v1/models`, { signal: AbortSignal.timeout(5000) });
332
+ if (!res.ok) return [];
333
+ const data = await res.json();
334
+ return (data.data || []).map(m => ({
335
+ name: m.id,
336
+ capabilities: _detectCapabilities(m.id),
337
+ contextWindow: 4096,
338
+ parameters: {},
339
+ }));
340
+ }
341
+
342
+ async function _probeOpenAICompatible(baseUrl) {
343
+ const res = await fetch(`${baseUrl}/v1/models`, { signal: AbortSignal.timeout(5000) });
344
+ if (!res.ok) return [];
345
+ const data = await res.json();
346
+ return (data.data || []).map(m => ({
347
+ name: m.id,
348
+ capabilities: _detectCapabilities(m.id),
349
+ contextWindow: 4096,
350
+ parameters: {},
351
+ }));
352
+ }
353
+
354
+ // ─── Helpers ─────────────────────────────────────────────────────────
355
+
356
+ function _detectCapabilities(modelName) {
357
+ const n = modelName.toLowerCase();
358
+ const caps = ['text'];
359
+ if (n.includes('vision') || n.includes('llava') || n.includes('bakllava')) caps.push('vision');
360
+ if (n.includes('code') || n.includes('codellama') || n.includes('deepseek-coder') || n.includes('starcoder')) caps.push('code');
361
+ if (n.includes('embed') || n.includes('nomic')) caps.push('embedding');
362
+ if (n.includes('mistral') || n.includes('mixtral')) caps.push('reasoning');
363
+ return caps;
364
+ }
365
+
366
+ function _estimateContext(paramSize) {
367
+ // Rough estimate: smaller models typically have smaller context
368
+ if (typeof paramSize === 'string') {
369
+ const num = parseFloat(paramSize);
370
+ if (num >= 70) return 32768;
371
+ if (num >= 13) return 8192;
372
+ return 4096;
373
+ }
374
+ return 4096;
375
+ }
376
+
377
+ function _deserializeModel(row) {
378
+ return {
379
+ ...row,
380
+ capabilities: JSON.parse(row.capabilities || '["text"]'),
381
+ parameters: JSON.parse(row.parameters || '{}'),
382
+ };
383
+ }
384
+
385
+ module.exports = {
386
+ discoverModels, registerModel, infer,
387
+ getModels, getAvailableModels, getModel, updateModelStatus,
388
+ getStats,
389
+ };