web-agent-bridge 3.0.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. package/LICENSE +72 -21
  2. package/README.ar.md +1286 -1073
  3. package/README.md +1764 -1535
  4. package/bin/agent-runner.js +474 -474
  5. package/bin/cli.js +237 -138
  6. package/bin/wab.js +80 -80
  7. package/examples/bidi-agent.js +119 -119
  8. package/examples/cross-site-agent.js +91 -91
  9. package/examples/mcp-agent.js +94 -94
  10. package/examples/next-app-router/README.md +44 -44
  11. package/examples/puppeteer-agent.js +108 -108
  12. package/examples/saas-dashboard/README.md +55 -55
  13. package/examples/shopify-hydrogen/README.md +74 -74
  14. package/examples/vision-agent.js +171 -171
  15. package/examples/wordpress-elementor/README.md +77 -77
  16. package/package.json +17 -3
  17. package/public/.well-known/agent-tools.json +180 -180
  18. package/public/.well-known/ai-assets.json +59 -59
  19. package/public/.well-known/ai-plugin.json +28 -0
  20. package/public/.well-known/security.txt +8 -0
  21. package/public/agent-workspace.html +349 -347
  22. package/public/ai.html +198 -196
  23. package/public/api.html +413 -0
  24. package/public/browser.html +486 -484
  25. package/public/commander-dashboard.html +243 -243
  26. package/public/cookies.html +210 -208
  27. package/public/css/agent-workspace.css +1713 -1713
  28. package/public/css/premium.css +317 -317
  29. package/public/css/styles.css +1235 -1235
  30. package/public/dashboard.html +706 -704
  31. package/public/demo.html +1770 -1
  32. package/public/dns.html +507 -0
  33. package/public/docs.html +587 -585
  34. package/public/feed.xml +89 -89
  35. package/public/growth.html +463 -0
  36. package/public/index.html +341 -9
  37. package/public/integrations.html +556 -0
  38. package/public/js/agent-workspace.js +1740 -1740
  39. package/public/js/auth-nav.js +31 -31
  40. package/public/js/auth-redirect.js +12 -12
  41. package/public/js/cookie-consent.js +56 -56
  42. package/public/js/wab-demo-page.js +721 -721
  43. package/public/js/ws-client.js +74 -74
  44. package/public/llms-full.txt +360 -309
  45. package/public/llms.txt +125 -86
  46. package/public/login.html +85 -83
  47. package/public/mesh-dashboard.html +328 -328
  48. package/public/openapi.json +580 -580
  49. package/public/phone-shield.html +281 -0
  50. package/public/premium-dashboard.html +2489 -2487
  51. package/public/premium.html +793 -791
  52. package/public/privacy.html +297 -295
  53. package/public/register.html +105 -103
  54. package/public/robots.txt +87 -87
  55. package/public/script/wab-consent.d.ts +36 -36
  56. package/public/script/wab-consent.js +104 -104
  57. package/public/script/wab-schema.js +131 -131
  58. package/public/script/wab.d.ts +108 -108
  59. package/public/script/wab.min.js +580 -580
  60. package/public/security.txt +8 -0
  61. package/public/terms.html +256 -254
  62. package/script/ai-agent-bridge.js +1754 -1754
  63. package/sdk/README.md +99 -99
  64. package/sdk/agent-mesh.js +449 -449
  65. package/sdk/commander.js +262 -262
  66. package/sdk/index.d.ts +464 -464
  67. package/sdk/index.js +18 -1
  68. package/sdk/multi-agent.js +318 -318
  69. package/sdk/package.json +12 -1
  70. package/sdk/safety-shield.js +219 -0
  71. package/sdk/schema-discovery.js +83 -83
  72. package/server/adapters/index.js +520 -520
  73. package/server/config/plans.js +367 -367
  74. package/server/config/secrets.js +102 -102
  75. package/server/control-plane/index.js +301 -301
  76. package/server/data-plane/index.js +354 -354
  77. package/server/index.js +175 -19
  78. package/server/llm/index.js +404 -404
  79. package/server/middleware/adminAuth.js +35 -35
  80. package/server/middleware/auth.js +50 -50
  81. package/server/middleware/featureGate.js +88 -88
  82. package/server/middleware/rateLimits.js +100 -100
  83. package/server/middleware/sensitiveAction.js +157 -0
  84. package/server/migrations/001_add_analytics_indexes.sql +7 -7
  85. package/server/migrations/002_premium_features.sql +418 -418
  86. package/server/migrations/003_ads_integer_cents.sql +33 -33
  87. package/server/migrations/004_agent_os.sql +158 -158
  88. package/server/migrations/005_marketplace_metering.sql +126 -126
  89. package/server/models/adapters/index.js +33 -33
  90. package/server/models/adapters/mysql.js +183 -183
  91. package/server/models/adapters/postgresql.js +172 -172
  92. package/server/models/adapters/sqlite.js +7 -7
  93. package/server/models/db.js +681 -681
  94. package/server/observability/failure-analysis.js +337 -337
  95. package/server/observability/index.js +394 -394
  96. package/server/protocol/capabilities.js +223 -223
  97. package/server/protocol/index.js +243 -243
  98. package/server/protocol/schema.js +584 -584
  99. package/server/registry/certification.js +271 -271
  100. package/server/registry/index.js +326 -326
  101. package/server/routes/admin-premium.js +671 -671
  102. package/server/routes/admin.js +261 -261
  103. package/server/routes/ads.js +130 -130
  104. package/server/routes/agent-workspace.js +540 -378
  105. package/server/routes/api.js +150 -150
  106. package/server/routes/auth.js +71 -71
  107. package/server/routes/billing.js +45 -45
  108. package/server/routes/commander.js +316 -316
  109. package/server/routes/demo-showcase.js +332 -0
  110. package/server/routes/demo-store.js +154 -0
  111. package/server/routes/discovery.js +417 -406
  112. package/server/routes/gateway.js +173 -0
  113. package/server/routes/license.js +251 -240
  114. package/server/routes/mesh.js +469 -469
  115. package/server/routes/noscript.js +543 -543
  116. package/server/routes/premium-v2.js +686 -686
  117. package/server/routes/premium.js +724 -724
  118. package/server/routes/runtime.js +2148 -2147
  119. package/server/routes/sovereign.js +465 -385
  120. package/server/routes/universal.js +200 -177
  121. package/server/routes/wab-api.js +850 -491
  122. package/server/runtime/container-worker.js +111 -111
  123. package/server/runtime/container.js +448 -448
  124. package/server/runtime/distributed-worker.js +362 -362
  125. package/server/runtime/event-bus.js +210 -210
  126. package/server/runtime/index.js +253 -253
  127. package/server/runtime/queue.js +599 -599
  128. package/server/runtime/replay.js +666 -666
  129. package/server/runtime/sandbox.js +266 -266
  130. package/server/runtime/scheduler.js +534 -534
  131. package/server/runtime/session-engine.js +293 -293
  132. package/server/runtime/state-manager.js +188 -188
  133. package/server/security/cross-site-redactor.js +196 -0
  134. package/server/security/dry-run.js +180 -0
  135. package/server/security/human-gate-rate-limit.js +147 -0
  136. package/server/security/human-gate-transports.js +178 -0
  137. package/server/security/human-gate.js +281 -0
  138. package/server/security/index.js +368 -368
  139. package/server/security/intent-engine.js +245 -0
  140. package/server/security/reward-guard.js +171 -0
  141. package/server/security/rollback-store.js +239 -0
  142. package/server/security/token-scope.js +404 -0
  143. package/server/security/url-policy.js +139 -0
  144. package/server/services/agent-chat.js +506 -506
  145. package/server/services/agent-learning.js +601 -575
  146. package/server/services/agent-memory.js +625 -625
  147. package/server/services/agent-mesh.js +555 -539
  148. package/server/services/agent-symphony.js +717 -717
  149. package/server/services/agent-tasks.js +1807 -1807
  150. package/server/services/api-key-engine.js +292 -0
  151. package/server/services/cluster.js +894 -894
  152. package/server/services/commander.js +738 -738
  153. package/server/services/edge-compute.js +440 -440
  154. package/server/services/email.js +204 -204
  155. package/server/services/hosted-runtime.js +205 -205
  156. package/server/services/lfd.js +635 -616
  157. package/server/services/local-ai.js +389 -389
  158. package/server/services/marketplace.js +270 -270
  159. package/server/services/metering.js +182 -182
  160. package/server/services/modules/affiliate-intelligence.js +93 -0
  161. package/server/services/modules/agent-firewall.js +90 -0
  162. package/server/services/modules/bounty.js +89 -0
  163. package/server/services/modules/collective-bargaining.js +92 -0
  164. package/server/services/modules/dark-pattern.js +66 -0
  165. package/server/services/modules/gov-intelligence.js +45 -0
  166. package/server/services/modules/neural.js +55 -0
  167. package/server/services/modules/notary.js +49 -0
  168. package/server/services/modules/price-time-machine.js +86 -0
  169. package/server/services/modules/protocol.js +104 -0
  170. package/server/services/negotiation.js +439 -439
  171. package/server/services/plugins.js +771 -771
  172. package/server/services/premium.js +1 -1
  173. package/server/services/price-intelligence.js +566 -565
  174. package/server/services/price-shield.js +1137 -1137
  175. package/server/services/reputation.js +465 -465
  176. package/server/services/search-engine.js +357 -357
  177. package/server/services/security.js +513 -513
  178. package/server/services/self-healing.js +843 -843
  179. package/server/services/sovereign-shield.js +542 -0
  180. package/server/services/stripe.js +192 -192
  181. package/server/services/swarm.js +788 -788
  182. package/server/services/universal-scraper.js +662 -661
  183. package/server/services/verification.js +481 -481
  184. package/server/services/vision.js +1163 -1163
  185. package/server/utils/cache.js +125 -125
  186. package/server/utils/migrate.js +81 -81
  187. package/server/utils/safe-fetch.js +228 -0
  188. package/server/utils/secureFields.js +50 -50
  189. package/server/ws.js +161 -161
  190. package/templates/artisan-marketplace.yaml +104 -104
  191. package/templates/book-price-scout.yaml +98 -98
  192. package/templates/electronics-price-tracker.yaml +108 -108
  193. package/templates/flight-deal-hunter.yaml +113 -113
  194. package/templates/freelancer-direct.yaml +116 -116
  195. package/templates/grocery-price-compare.yaml +93 -93
  196. package/templates/hotel-direct-booking.yaml +113 -113
  197. package/templates/local-services.yaml +98 -98
  198. package/templates/olive-oil-tunisia.yaml +88 -88
  199. package/templates/organic-farm-fresh.yaml +101 -101
  200. package/templates/restaurant-direct.yaml +97 -97
  201. package/server/services/fairness-engine.js +0 -409
  202. package/server/services/fairness.js +0 -420
@@ -1,389 +1,389 @@
1
- /**
2
- * Local AI — Sovereign Intelligence Runtime
3
- *
4
- * Manages local AI models running on the user's own hardware.
5
- * Auto-detects Ollama, llama.cpp, and any OpenAI-compatible local endpoint.
6
- * Routes inference requests to the best available model based on capability,
7
- * context window, and current load.
8
- *
9
- * Supported Providers:
10
- * - Ollama (http://localhost:11434)
11
- * - llama.cpp server (http://localhost:8080)
12
- * - Custom OpenAI-compatible endpoints
13
- *
14
- * All inference happens locally. No data leaves the device.
15
- */
16
-
17
- const crypto = require('crypto');
18
- const { db } = require('../models/db');
19
-
20
- // ─── Schema ──────────────────────────────────────────────────────────
21
-
22
- db.exec(`
23
- CREATE TABLE IF NOT EXISTS local_models (
24
- id TEXT PRIMARY KEY,
25
- site_id TEXT NOT NULL,
26
- provider TEXT NOT NULL,
27
- model_name TEXT NOT NULL,
28
- endpoint TEXT NOT NULL,
29
- capabilities TEXT DEFAULT '["text"]',
30
- context_window INTEGER DEFAULT 4096,
31
- parameters TEXT DEFAULT '{}',
32
- status TEXT DEFAULT 'available',
33
- total_requests INTEGER DEFAULT 0,
34
- total_tokens INTEGER DEFAULT 0,
35
- avg_latency_ms REAL DEFAULT 0,
36
- last_used TEXT,
37
- last_probe TEXT,
38
- created_at TEXT DEFAULT (datetime('now')),
39
- UNIQUE(site_id, provider, model_name)
40
- );
41
-
42
- CREATE TABLE IF NOT EXISTS local_inference_log (
43
- id TEXT PRIMARY KEY,
44
- model_id TEXT NOT NULL,
45
- task_type TEXT,
46
- prompt_tokens INTEGER DEFAULT 0,
47
- completion_tokens INTEGER DEFAULT 0,
48
- latency_ms INTEGER DEFAULT 0,
49
- success INTEGER DEFAULT 1,
50
- created_at TEXT DEFAULT (datetime('now'))
51
- );
52
-
53
- CREATE INDEX IF NOT EXISTS idx_local_models_site ON local_models(site_id);
54
- CREATE INDEX IF NOT EXISTS idx_local_models_status ON local_models(status);
55
- CREATE INDEX IF NOT EXISTS idx_local_inference_model ON local_inference_log(model_id);
56
- `);
57
-
58
- // ─── Prepared Statements ─────────────────────────────────────────────
59
-
60
- const stmts = {
61
- upsertModel: db.prepare("INSERT INTO local_models (id, site_id, provider, model_name, endpoint, capabilities, context_window, parameters) VALUES (?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(site_id, provider, model_name) DO UPDATE SET endpoint = ?, capabilities = ?, context_window = ?, parameters = ?, status = 'available', last_probe = datetime('now')"),
62
- getModel: db.prepare('SELECT * FROM local_models WHERE id = ?'),
63
- getModels: db.prepare('SELECT * FROM local_models WHERE site_id = ? ORDER BY provider, model_name'),
64
- getAvailableModels: db.prepare("SELECT * FROM local_models WHERE site_id = ? AND status = 'available' ORDER BY avg_latency_ms ASC"),
65
- getModelsByCapability: db.prepare("SELECT * FROM local_models WHERE site_id = ? AND status = 'available' AND capabilities LIKE ? ORDER BY avg_latency_ms ASC"),
66
- updateModelStatus: db.prepare('UPDATE local_models SET status = ?, last_probe = datetime(\'now\') WHERE id = ?'),
67
- updateModelStats: db.prepare("UPDATE local_models SET total_requests = total_requests + 1, total_tokens = total_tokens + ?, avg_latency_ms = (avg_latency_ms * total_requests + ?) / (total_requests + 1), last_used = datetime('now') WHERE id = ?"),
68
- insertLog: db.prepare('INSERT INTO local_inference_log (id, model_id, task_type, prompt_tokens, completion_tokens, latency_ms, success) VALUES (?, ?, ?, ?, ?, ?, ?)'),
69
- getStats: db.prepare(`SELECT
70
- (SELECT COUNT(*) FROM local_models WHERE site_id = ? AND status = 'available') as available_models,
71
- (SELECT COUNT(*) FROM local_models WHERE site_id = ?) as total_models,
72
- (SELECT SUM(total_requests) FROM local_models WHERE site_id = ?) as total_requests,
73
- (SELECT SUM(total_tokens) FROM local_models WHERE site_id = ?) as total_tokens,
74
- (SELECT AVG(avg_latency_ms) FROM local_models WHERE site_id = ? AND status = 'available') as avg_latency`),
75
- };
76
-
77
- // ─── Default Provider Endpoints ──────────────────────────────────────
78
-
79
- const PROVIDERS = {
80
- ollama: { name: 'ollama', baseUrl: 'http://localhost:11434', tagsPath: '/api/tags', chatPath: '/api/chat', generatePath: '/api/generate' },
81
- llamacpp: { name: 'llamacpp', baseUrl: 'http://localhost:8080', chatPath: '/v1/chat/completions', modelsPath: '/v1/models' },
82
- };
83
-
84
- // ─── Model Discovery ─────────────────────────────────────────────────
85
-
86
- /**
87
- * Probe local endpoints and register discovered models.
88
- */
89
- async function discoverModels(siteId, customEndpoints = []) {
90
- const discovered = [];
91
-
92
- // Probe Ollama
93
- try {
94
- const ollamaModels = await _probeOllama(PROVIDERS.ollama.baseUrl);
95
- for (const m of ollamaModels) {
96
- const result = _registerModel(siteId, 'ollama', m.name, PROVIDERS.ollama.baseUrl, m.capabilities, m.contextWindow, m.parameters);
97
- discovered.push(result);
98
- }
99
- } catch (_) { /* Ollama not running */ }
100
-
101
- // Probe llama.cpp
102
- try {
103
- const lcModels = await _probeLlamaCpp(PROVIDERS.llamacpp.baseUrl);
104
- for (const m of lcModels) {
105
- const result = _registerModel(siteId, 'llamacpp', m.name, PROVIDERS.llamacpp.baseUrl, m.capabilities, m.contextWindow, m.parameters);
106
- discovered.push(result);
107
- }
108
- } catch (_) { /* llama.cpp not running */ }
109
-
110
- // Probe custom endpoints
111
- for (const ep of customEndpoints) {
112
- try {
113
- const models = await _probeOpenAICompatible(ep.url);
114
- for (const m of models) {
115
- const result = _registerModel(siteId, ep.name || 'custom', m.name, ep.url, m.capabilities, m.contextWindow, m.parameters);
116
- discovered.push(result);
117
- }
118
- } catch (_) { /* endpoint not available */ }
119
- }
120
-
121
- return { discovered: discovered.length, models: discovered };
122
- }
123
-
124
- /**
125
- * Register a model manually.
126
- */
127
- function registerModel(siteId, provider, modelName, endpoint, capabilities = ['text'], contextWindow = 4096) {
128
- return _registerModel(siteId, provider, modelName, endpoint, capabilities, contextWindow, {});
129
- }
130
-
131
- function _registerModel(siteId, provider, modelName, endpoint, capabilities, contextWindow, parameters) {
132
- const id = crypto.randomUUID();
133
- const caps = JSON.stringify(capabilities);
134
- const params = JSON.stringify(parameters);
135
-
136
- stmts.upsertModel.run(id, siteId, provider, modelName, endpoint, caps, contextWindow, params, endpoint, caps, contextWindow, params);
137
- return { id, provider, modelName, endpoint, capabilities, contextWindow };
138
- }
139
-
140
- // ─── Inference ───────────────────────────────────────────────────────
141
-
142
- /**
143
- * Run inference on the best available local model.
144
- * @param {string} siteId
145
- * @param {string} prompt - The user prompt
146
- * @param {object} options - { capability, model, systemPrompt, temperature, maxTokens, stream }
147
- */
148
- async function infer(siteId, prompt, options = {}) {
149
- const capability = options.capability || 'text';
150
-
151
- // Select model
152
- let model;
153
- if (options.modelId) {
154
- model = stmts.getModel.get(options.modelId);
155
- if (!model || model.status !== 'available') throw new Error('Selected model unavailable');
156
- } else {
157
- const candidates = stmts.getModelsByCapability.all(siteId, `%${capability}%`);
158
- if (candidates.length === 0) throw new Error(`No local model available for capability: ${capability}`);
159
- model = candidates[0]; // Fastest by avg latency
160
- }
161
-
162
- const start = Date.now();
163
- let result;
164
-
165
- try {
166
- const parsed = JSON.parse(model.parameters || '{}');
167
- if (model.provider === 'ollama') {
168
- result = await _inferOllama(model, prompt, options);
169
- } else if (model.provider === 'llamacpp') {
170
- result = await _inferLlamaCpp(model, prompt, options);
171
- } else {
172
- result = await _inferOpenAICompatible(model, prompt, options);
173
- }
174
- } catch (err) {
175
- const latency = Date.now() - start;
176
- stmts.insertLog.run(crypto.randomUUID(), model.id, capability, 0, 0, latency, 0);
177
- throw err;
178
- }
179
-
180
- const latency = Date.now() - start;
181
- const totalTokens = (result.promptTokens || 0) + (result.completionTokens || 0);
182
-
183
- stmts.updateModelStats.run(totalTokens, latency, model.id);
184
- stmts.insertLog.run(crypto.randomUUID(), model.id, capability, result.promptTokens || 0, result.completionTokens || 0, latency, 1);
185
-
186
- return {
187
- modelId: model.id,
188
- provider: model.provider,
189
- model: model.model_name,
190
- response: result.text,
191
- promptTokens: result.promptTokens || 0,
192
- completionTokens: result.completionTokens || 0,
193
- latencyMs: latency,
194
- };
195
- }
196
-
197
- // ─── Model Management ────────────────────────────────────────────────
198
-
199
- function getModels(siteId) {
200
- return stmts.getModels.all(siteId).map(_deserializeModel);
201
- }
202
-
203
- function getAvailableModels(siteId) {
204
- return stmts.getAvailableModels.all(siteId).map(_deserializeModel);
205
- }
206
-
207
- function getModel(modelId) {
208
- const row = stmts.getModel.get(modelId);
209
- return row ? _deserializeModel(row) : null;
210
- }
211
-
212
- function updateModelStatus(modelId, status) {
213
- stmts.updateModelStatus.run(status, modelId);
214
- }
215
-
216
- function getStats(siteId) {
217
- const row = stmts.getStats.get(siteId, siteId, siteId, siteId, siteId);
218
- return {
219
- availableModels: row.available_models || 0,
220
- totalModels: row.total_models || 0,
221
- totalRequests: row.total_requests || 0,
222
- totalTokens: row.total_tokens || 0,
223
- avgLatency: row.avg_latency ? Math.round(row.avg_latency) : 0,
224
- };
225
- }
226
-
227
- // ─── Provider-Specific Inference ─────────────────────────────────────
228
-
229
- async function _inferOllama(model, prompt, options) {
230
- const body = {
231
- model: model.model_name,
232
- messages: [],
233
- stream: false,
234
- options: {},
235
- };
236
-
237
- if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
238
- body.messages.push({ role: 'user', content: prompt });
239
- if (options.temperature != null) body.options.temperature = options.temperature;
240
-
241
- const res = await fetch(`${model.endpoint}/api/chat`, {
242
- method: 'POST',
243
- headers: { 'Content-Type': 'application/json' },
244
- body: JSON.stringify(body),
245
- signal: AbortSignal.timeout(options.timeout || 120000),
246
- });
247
-
248
- if (!res.ok) throw new Error(`Ollama error: ${res.status}`);
249
- const data = await res.json();
250
-
251
- return {
252
- text: data.message?.content || '',
253
- promptTokens: data.prompt_eval_count || 0,
254
- completionTokens: data.eval_count || 0,
255
- };
256
- }
257
-
258
- async function _inferLlamaCpp(model, prompt, options) {
259
- const body = {
260
- model: model.model_name,
261
- messages: [],
262
- max_tokens: options.maxTokens || 2048,
263
- stream: false,
264
- };
265
-
266
- if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
267
- body.messages.push({ role: 'user', content: prompt });
268
- if (options.temperature != null) body.temperature = options.temperature;
269
-
270
- const res = await fetch(`${model.endpoint}/v1/chat/completions`, {
271
- method: 'POST',
272
- headers: { 'Content-Type': 'application/json' },
273
- body: JSON.stringify(body),
274
- signal: AbortSignal.timeout(options.timeout || 120000),
275
- });
276
-
277
- if (!res.ok) throw new Error(`llama.cpp error: ${res.status}`);
278
- const data = await res.json();
279
-
280
- return {
281
- text: data.choices?.[0]?.message?.content || '',
282
- promptTokens: data.usage?.prompt_tokens || 0,
283
- completionTokens: data.usage?.completion_tokens || 0,
284
- };
285
- }
286
-
287
- async function _inferOpenAICompatible(model, prompt, options) {
288
- const body = {
289
- model: model.model_name,
290
- messages: [],
291
- max_tokens: options.maxTokens || 2048,
292
- stream: false,
293
- };
294
-
295
- if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
296
- body.messages.push({ role: 'user', content: prompt });
297
- if (options.temperature != null) body.temperature = options.temperature;
298
-
299
- const res = await fetch(`${model.endpoint}/v1/chat/completions`, {
300
- method: 'POST',
301
- headers: { 'Content-Type': 'application/json' },
302
- body: JSON.stringify(body),
303
- signal: AbortSignal.timeout(options.timeout || 120000),
304
- });
305
-
306
- if (!res.ok) throw new Error(`Inference error: ${res.status}`);
307
- const data = await res.json();
308
-
309
- return {
310
- text: data.choices?.[0]?.message?.content || '',
311
- promptTokens: data.usage?.prompt_tokens || 0,
312
- completionTokens: data.usage?.completion_tokens || 0,
313
- };
314
- }
315
-
316
- // ─── Provider Probing ────────────────────────────────────────────────
317
-
318
- async function _probeOllama(baseUrl) {
319
- const res = await fetch(`${baseUrl}/api/tags`, { signal: AbortSignal.timeout(5000) });
320
- if (!res.ok) return [];
321
- const data = await res.json();
322
- return (data.models || []).map(m => ({
323
- name: m.name,
324
- capabilities: _detectCapabilities(m.name),
325
- contextWindow: m.details?.parameter_size ? _estimateContext(m.details.parameter_size) : 4096,
326
- parameters: { size: m.size, family: m.details?.family },
327
- }));
328
- }
329
-
330
- async function _probeLlamaCpp(baseUrl) {
331
- const res = await fetch(`${baseUrl}/v1/models`, { signal: AbortSignal.timeout(5000) });
332
- if (!res.ok) return [];
333
- const data = await res.json();
334
- return (data.data || []).map(m => ({
335
- name: m.id,
336
- capabilities: _detectCapabilities(m.id),
337
- contextWindow: 4096,
338
- parameters: {},
339
- }));
340
- }
341
-
342
- async function _probeOpenAICompatible(baseUrl) {
343
- const res = await fetch(`${baseUrl}/v1/models`, { signal: AbortSignal.timeout(5000) });
344
- if (!res.ok) return [];
345
- const data = await res.json();
346
- return (data.data || []).map(m => ({
347
- name: m.id,
348
- capabilities: _detectCapabilities(m.id),
349
- contextWindow: 4096,
350
- parameters: {},
351
- }));
352
- }
353
-
354
- // ─── Helpers ─────────────────────────────────────────────────────────
355
-
356
- function _detectCapabilities(modelName) {
357
- const n = modelName.toLowerCase();
358
- const caps = ['text'];
359
- if (n.includes('vision') || n.includes('llava') || n.includes('bakllava')) caps.push('vision');
360
- if (n.includes('code') || n.includes('codellama') || n.includes('deepseek-coder') || n.includes('starcoder')) caps.push('code');
361
- if (n.includes('embed') || n.includes('nomic')) caps.push('embedding');
362
- if (n.includes('mistral') || n.includes('mixtral')) caps.push('reasoning');
363
- return caps;
364
- }
365
-
366
- function _estimateContext(paramSize) {
367
- // Rough estimate: smaller models typically have smaller context
368
- if (typeof paramSize === 'string') {
369
- const num = parseFloat(paramSize);
370
- if (num >= 70) return 32768;
371
- if (num >= 13) return 8192;
372
- return 4096;
373
- }
374
- return 4096;
375
- }
376
-
377
- function _deserializeModel(row) {
378
- return {
379
- ...row,
380
- capabilities: JSON.parse(row.capabilities || '["text"]'),
381
- parameters: JSON.parse(row.parameters || '{}'),
382
- };
383
- }
384
-
385
- module.exports = {
386
- discoverModels, registerModel, infer,
387
- getModels, getAvailableModels, getModel, updateModelStatus,
388
- getStats,
389
- };
1
+ /**
2
+ * Local AI — Sovereign Intelligence Runtime
3
+ *
4
+ * Manages local AI models running on the user's own hardware.
5
+ * Auto-detects Ollama, llama.cpp, and any OpenAI-compatible local endpoint.
6
+ * Routes inference requests to the best available model based on capability,
7
+ * context window, and current load.
8
+ *
9
+ * Supported Providers:
10
+ * - Ollama (http://localhost:11434)
11
+ * - llama.cpp server (http://localhost:8080)
12
+ * - Custom OpenAI-compatible endpoints
13
+ *
14
+ * All inference happens locally. No data leaves the device.
15
+ */
16
+
17
+ const crypto = require('crypto');
18
+ const { db } = require('../models/db');
19
+
20
+ // ─── Schema ──────────────────────────────────────────────────────────
21
+
22
+ db.exec(`
23
+ CREATE TABLE IF NOT EXISTS local_models (
24
+ id TEXT PRIMARY KEY,
25
+ site_id TEXT NOT NULL,
26
+ provider TEXT NOT NULL,
27
+ model_name TEXT NOT NULL,
28
+ endpoint TEXT NOT NULL,
29
+ capabilities TEXT DEFAULT '["text"]',
30
+ context_window INTEGER DEFAULT 4096,
31
+ parameters TEXT DEFAULT '{}',
32
+ status TEXT DEFAULT 'available',
33
+ total_requests INTEGER DEFAULT 0,
34
+ total_tokens INTEGER DEFAULT 0,
35
+ avg_latency_ms REAL DEFAULT 0,
36
+ last_used TEXT,
37
+ last_probe TEXT,
38
+ created_at TEXT DEFAULT (datetime('now')),
39
+ UNIQUE(site_id, provider, model_name)
40
+ );
41
+
42
+ CREATE TABLE IF NOT EXISTS local_inference_log (
43
+ id TEXT PRIMARY KEY,
44
+ model_id TEXT NOT NULL,
45
+ task_type TEXT,
46
+ prompt_tokens INTEGER DEFAULT 0,
47
+ completion_tokens INTEGER DEFAULT 0,
48
+ latency_ms INTEGER DEFAULT 0,
49
+ success INTEGER DEFAULT 1,
50
+ created_at TEXT DEFAULT (datetime('now'))
51
+ );
52
+
53
+ CREATE INDEX IF NOT EXISTS idx_local_models_site ON local_models(site_id);
54
+ CREATE INDEX IF NOT EXISTS idx_local_models_status ON local_models(status);
55
+ CREATE INDEX IF NOT EXISTS idx_local_inference_model ON local_inference_log(model_id);
56
+ `);
57
+
58
+ // ─── Prepared Statements ─────────────────────────────────────────────
59
+
60
+ const stmts = {
61
+ upsertModel: db.prepare("INSERT INTO local_models (id, site_id, provider, model_name, endpoint, capabilities, context_window, parameters) VALUES (?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(site_id, provider, model_name) DO UPDATE SET endpoint = ?, capabilities = ?, context_window = ?, parameters = ?, status = 'available', last_probe = datetime('now')"),
62
+ getModel: db.prepare('SELECT * FROM local_models WHERE id = ?'),
63
+ getModels: db.prepare('SELECT * FROM local_models WHERE site_id = ? ORDER BY provider, model_name'),
64
+ getAvailableModels: db.prepare("SELECT * FROM local_models WHERE site_id = ? AND status = 'available' ORDER BY avg_latency_ms ASC"),
65
+ getModelsByCapability: db.prepare("SELECT * FROM local_models WHERE site_id = ? AND status = 'available' AND capabilities LIKE ? ORDER BY avg_latency_ms ASC"),
66
+ updateModelStatus: db.prepare('UPDATE local_models SET status = ?, last_probe = datetime(\'now\') WHERE id = ?'),
67
+ updateModelStats: db.prepare("UPDATE local_models SET total_requests = total_requests + 1, total_tokens = total_tokens + ?, avg_latency_ms = (avg_latency_ms * total_requests + ?) / (total_requests + 1), last_used = datetime('now') WHERE id = ?"),
68
+ insertLog: db.prepare('INSERT INTO local_inference_log (id, model_id, task_type, prompt_tokens, completion_tokens, latency_ms, success) VALUES (?, ?, ?, ?, ?, ?, ?)'),
69
+ getStats: db.prepare(`SELECT
70
+ (SELECT COUNT(*) FROM local_models WHERE site_id = ? AND status = 'available') as available_models,
71
+ (SELECT COUNT(*) FROM local_models WHERE site_id = ?) as total_models,
72
+ (SELECT SUM(total_requests) FROM local_models WHERE site_id = ?) as total_requests,
73
+ (SELECT SUM(total_tokens) FROM local_models WHERE site_id = ?) as total_tokens,
74
+ (SELECT AVG(avg_latency_ms) FROM local_models WHERE site_id = ? AND status = 'available') as avg_latency`),
75
+ };
76
+
77
+ // ─── Default Provider Endpoints ──────────────────────────────────────
78
+
79
+ const PROVIDERS = {
80
+ ollama: { name: 'ollama', baseUrl: 'http://localhost:11434', tagsPath: '/api/tags', chatPath: '/api/chat', generatePath: '/api/generate' },
81
+ llamacpp: { name: 'llamacpp', baseUrl: 'http://localhost:8080', chatPath: '/v1/chat/completions', modelsPath: '/v1/models' },
82
+ };
83
+
84
+ // ─── Model Discovery ─────────────────────────────────────────────────
85
+
86
+ /**
87
+ * Probe local endpoints and register discovered models.
88
+ */
89
+ async function discoverModels(siteId, customEndpoints = []) {
90
+ const discovered = [];
91
+
92
+ // Probe Ollama
93
+ try {
94
+ const ollamaModels = await _probeOllama(PROVIDERS.ollama.baseUrl);
95
+ for (const m of ollamaModels) {
96
+ const result = _registerModel(siteId, 'ollama', m.name, PROVIDERS.ollama.baseUrl, m.capabilities, m.contextWindow, m.parameters);
97
+ discovered.push(result);
98
+ }
99
+ } catch (_) { /* Ollama not running */ }
100
+
101
+ // Probe llama.cpp
102
+ try {
103
+ const lcModels = await _probeLlamaCpp(PROVIDERS.llamacpp.baseUrl);
104
+ for (const m of lcModels) {
105
+ const result = _registerModel(siteId, 'llamacpp', m.name, PROVIDERS.llamacpp.baseUrl, m.capabilities, m.contextWindow, m.parameters);
106
+ discovered.push(result);
107
+ }
108
+ } catch (_) { /* llama.cpp not running */ }
109
+
110
+ // Probe custom endpoints
111
+ for (const ep of customEndpoints) {
112
+ try {
113
+ const models = await _probeOpenAICompatible(ep.url);
114
+ for (const m of models) {
115
+ const result = _registerModel(siteId, ep.name || 'custom', m.name, ep.url, m.capabilities, m.contextWindow, m.parameters);
116
+ discovered.push(result);
117
+ }
118
+ } catch (_) { /* endpoint not available */ }
119
+ }
120
+
121
+ return { discovered: discovered.length, models: discovered };
122
+ }
123
+
124
+ /**
125
+ * Register a model manually.
126
+ */
127
+ function registerModel(siteId, provider, modelName, endpoint, capabilities = ['text'], contextWindow = 4096) {
128
+ return _registerModel(siteId, provider, modelName, endpoint, capabilities, contextWindow, {});
129
+ }
130
+
131
+ function _registerModel(siteId, provider, modelName, endpoint, capabilities, contextWindow, parameters) {
132
+ const id = crypto.randomUUID();
133
+ const caps = JSON.stringify(capabilities);
134
+ const params = JSON.stringify(parameters);
135
+
136
+ stmts.upsertModel.run(id, siteId, provider, modelName, endpoint, caps, contextWindow, params, endpoint, caps, contextWindow, params);
137
+ return { id, provider, modelName, endpoint, capabilities, contextWindow };
138
+ }
139
+
140
+ // ─── Inference ───────────────────────────────────────────────────────
141
+
142
+ /**
143
+ * Run inference on the best available local model.
144
+ * @param {string} siteId
145
+ * @param {string} prompt - The user prompt
146
+ * @param {object} options - { capability, model, systemPrompt, temperature, maxTokens, stream }
147
+ */
148
+ async function infer(siteId, prompt, options = {}) {
149
+ const capability = options.capability || 'text';
150
+
151
+ // Select model
152
+ let model;
153
+ if (options.modelId) {
154
+ model = stmts.getModel.get(options.modelId);
155
+ if (!model || model.status !== 'available') throw new Error('Selected model unavailable');
156
+ } else {
157
+ const candidates = stmts.getModelsByCapability.all(siteId, `%${capability}%`);
158
+ if (candidates.length === 0) throw new Error(`No local model available for capability: ${capability}`);
159
+ model = candidates[0]; // Fastest by avg latency
160
+ }
161
+
162
+ const start = Date.now();
163
+ let result;
164
+
165
+ try {
166
+ const parsed = JSON.parse(model.parameters || '{}');
167
+ if (model.provider === 'ollama') {
168
+ result = await _inferOllama(model, prompt, options);
169
+ } else if (model.provider === 'llamacpp') {
170
+ result = await _inferLlamaCpp(model, prompt, options);
171
+ } else {
172
+ result = await _inferOpenAICompatible(model, prompt, options);
173
+ }
174
+ } catch (err) {
175
+ const latency = Date.now() - start;
176
+ stmts.insertLog.run(crypto.randomUUID(), model.id, capability, 0, 0, latency, 0);
177
+ throw err;
178
+ }
179
+
180
+ const latency = Date.now() - start;
181
+ const totalTokens = (result.promptTokens || 0) + (result.completionTokens || 0);
182
+
183
+ stmts.updateModelStats.run(totalTokens, latency, model.id);
184
+ stmts.insertLog.run(crypto.randomUUID(), model.id, capability, result.promptTokens || 0, result.completionTokens || 0, latency, 1);
185
+
186
+ return {
187
+ modelId: model.id,
188
+ provider: model.provider,
189
+ model: model.model_name,
190
+ response: result.text,
191
+ promptTokens: result.promptTokens || 0,
192
+ completionTokens: result.completionTokens || 0,
193
+ latencyMs: latency,
194
+ };
195
+ }
196
+
197
+ // ─── Model Management ────────────────────────────────────────────────
198
+
199
+ function getModels(siteId) {
200
+ return stmts.getModels.all(siteId).map(_deserializeModel);
201
+ }
202
+
203
+ function getAvailableModels(siteId) {
204
+ return stmts.getAvailableModels.all(siteId).map(_deserializeModel);
205
+ }
206
+
207
+ function getModel(modelId) {
208
+ const row = stmts.getModel.get(modelId);
209
+ return row ? _deserializeModel(row) : null;
210
+ }
211
+
212
+ function updateModelStatus(modelId, status) {
213
+ stmts.updateModelStatus.run(status, modelId);
214
+ }
215
+
216
+ function getStats(siteId) {
217
+ const row = stmts.getStats.get(siteId, siteId, siteId, siteId, siteId);
218
+ return {
219
+ availableModels: row.available_models || 0,
220
+ totalModels: row.total_models || 0,
221
+ totalRequests: row.total_requests || 0,
222
+ totalTokens: row.total_tokens || 0,
223
+ avgLatency: row.avg_latency ? Math.round(row.avg_latency) : 0,
224
+ };
225
+ }
226
+
227
+ // ─── Provider-Specific Inference ─────────────────────────────────────
228
+
229
+ async function _inferOllama(model, prompt, options) {
230
+ const body = {
231
+ model: model.model_name,
232
+ messages: [],
233
+ stream: false,
234
+ options: {},
235
+ };
236
+
237
+ if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
238
+ body.messages.push({ role: 'user', content: prompt });
239
+ if (options.temperature != null) body.options.temperature = options.temperature;
240
+
241
+ const res = await fetch(`${model.endpoint}/api/chat`, {
242
+ method: 'POST',
243
+ headers: { 'Content-Type': 'application/json' },
244
+ body: JSON.stringify(body),
245
+ signal: AbortSignal.timeout(options.timeout || 120000),
246
+ });
247
+
248
+ if (!res.ok) throw new Error(`Ollama error: ${res.status}`);
249
+ const data = await res.json();
250
+
251
+ return {
252
+ text: data.message?.content || '',
253
+ promptTokens: data.prompt_eval_count || 0,
254
+ completionTokens: data.eval_count || 0,
255
+ };
256
+ }
257
+
258
+ async function _inferLlamaCpp(model, prompt, options) {
259
+ const body = {
260
+ model: model.model_name,
261
+ messages: [],
262
+ max_tokens: options.maxTokens || 2048,
263
+ stream: false,
264
+ };
265
+
266
+ if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
267
+ body.messages.push({ role: 'user', content: prompt });
268
+ if (options.temperature != null) body.temperature = options.temperature;
269
+
270
+ const res = await fetch(`${model.endpoint}/v1/chat/completions`, {
271
+ method: 'POST',
272
+ headers: { 'Content-Type': 'application/json' },
273
+ body: JSON.stringify(body),
274
+ signal: AbortSignal.timeout(options.timeout || 120000),
275
+ });
276
+
277
+ if (!res.ok) throw new Error(`llama.cpp error: ${res.status}`);
278
+ const data = await res.json();
279
+
280
+ return {
281
+ text: data.choices?.[0]?.message?.content || '',
282
+ promptTokens: data.usage?.prompt_tokens || 0,
283
+ completionTokens: data.usage?.completion_tokens || 0,
284
+ };
285
+ }
286
+
287
+ async function _inferOpenAICompatible(model, prompt, options) {
288
+ const body = {
289
+ model: model.model_name,
290
+ messages: [],
291
+ max_tokens: options.maxTokens || 2048,
292
+ stream: false,
293
+ };
294
+
295
+ if (options.systemPrompt) body.messages.push({ role: 'system', content: options.systemPrompt });
296
+ body.messages.push({ role: 'user', content: prompt });
297
+ if (options.temperature != null) body.temperature = options.temperature;
298
+
299
+ const res = await fetch(`${model.endpoint}/v1/chat/completions`, {
300
+ method: 'POST',
301
+ headers: { 'Content-Type': 'application/json' },
302
+ body: JSON.stringify(body),
303
+ signal: AbortSignal.timeout(options.timeout || 120000),
304
+ });
305
+
306
+ if (!res.ok) throw new Error(`Inference error: ${res.status}`);
307
+ const data = await res.json();
308
+
309
+ return {
310
+ text: data.choices?.[0]?.message?.content || '',
311
+ promptTokens: data.usage?.prompt_tokens || 0,
312
+ completionTokens: data.usage?.completion_tokens || 0,
313
+ };
314
+ }
315
+
316
+ // ─── Provider Probing ────────────────────────────────────────────────
317
+
318
+ async function _probeOllama(baseUrl) {
319
+ const res = await fetch(`${baseUrl}/api/tags`, { signal: AbortSignal.timeout(5000) });
320
+ if (!res.ok) return [];
321
+ const data = await res.json();
322
+ return (data.models || []).map(m => ({
323
+ name: m.name,
324
+ capabilities: _detectCapabilities(m.name),
325
+ contextWindow: m.details?.parameter_size ? _estimateContext(m.details.parameter_size) : 4096,
326
+ parameters: { size: m.size, family: m.details?.family },
327
+ }));
328
+ }
329
+
330
+ async function _probeLlamaCpp(baseUrl) {
331
+ const res = await fetch(`${baseUrl}/v1/models`, { signal: AbortSignal.timeout(5000) });
332
+ if (!res.ok) return [];
333
+ const data = await res.json();
334
+ return (data.data || []).map(m => ({
335
+ name: m.id,
336
+ capabilities: _detectCapabilities(m.id),
337
+ contextWindow: 4096,
338
+ parameters: {},
339
+ }));
340
+ }
341
+
342
+ async function _probeOpenAICompatible(baseUrl) {
343
+ const res = await fetch(`${baseUrl}/v1/models`, { signal: AbortSignal.timeout(5000) });
344
+ if (!res.ok) return [];
345
+ const data = await res.json();
346
+ return (data.data || []).map(m => ({
347
+ name: m.id,
348
+ capabilities: _detectCapabilities(m.id),
349
+ contextWindow: 4096,
350
+ parameters: {},
351
+ }));
352
+ }
353
+
354
+ // ─── Helpers ─────────────────────────────────────────────────────────
355
+
356
+ function _detectCapabilities(modelName) {
357
+ const n = modelName.toLowerCase();
358
+ const caps = ['text'];
359
+ if (n.includes('vision') || n.includes('llava') || n.includes('bakllava')) caps.push('vision');
360
+ if (n.includes('code') || n.includes('codellama') || n.includes('deepseek-coder') || n.includes('starcoder')) caps.push('code');
361
+ if (n.includes('embed') || n.includes('nomic')) caps.push('embedding');
362
+ if (n.includes('mistral') || n.includes('mixtral')) caps.push('reasoning');
363
+ return caps;
364
+ }
365
+
366
+ function _estimateContext(paramSize) {
367
+ // Rough estimate: smaller models typically have smaller context
368
+ if (typeof paramSize === 'string') {
369
+ const num = parseFloat(paramSize);
370
+ if (num >= 70) return 32768;
371
+ if (num >= 13) return 8192;
372
+ return 4096;
373
+ }
374
+ return 4096;
375
+ }
376
+
377
+ function _deserializeModel(row) {
378
+ return {
379
+ ...row,
380
+ capabilities: JSON.parse(row.capabilities || '["text"]'),
381
+ parameters: JSON.parse(row.parameters || '{}'),
382
+ };
383
+ }
384
+
385
+ module.exports = {
386
+ discoverModels, registerModel, infer,
387
+ getModels, getAvailableModels, getModel, updateModelStatus,
388
+ getStats,
389
+ };