twinclaw 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. package/README.md +66 -0
  2. package/bin/npm-twinclaw.js +17 -0
  3. package/bin/run-twinbot-cli.js +36 -0
  4. package/bin/twinbot.js +4 -0
  5. package/bin/twinclaw.js +4 -0
  6. package/dist/api/handlers/browser.js +160 -0
  7. package/dist/api/handlers/callback.js +80 -0
  8. package/dist/api/handlers/config-validate.js +19 -0
  9. package/dist/api/handlers/health.js +117 -0
  10. package/dist/api/handlers/local-state-backup.js +118 -0
  11. package/dist/api/handlers/persona-state.js +59 -0
  12. package/dist/api/handlers/skill-packages.js +94 -0
  13. package/dist/api/router.js +278 -0
  14. package/dist/api/runtime-event-producer.js +99 -0
  15. package/dist/api/shared.js +82 -0
  16. package/dist/api/websocket-hub.js +305 -0
  17. package/dist/config/config-loader.js +2 -0
  18. package/dist/config/env-schema.js +202 -0
  19. package/dist/config/env-validator.js +223 -0
  20. package/dist/config/identity-bootstrap.js +115 -0
  21. package/dist/config/json-config.js +344 -0
  22. package/dist/config/workspace.js +186 -0
  23. package/dist/core/channels-cli.js +77 -0
  24. package/dist/core/cli.js +119 -0
  25. package/dist/core/context-assembly.js +33 -0
  26. package/dist/core/doctor.js +365 -0
  27. package/dist/core/gateway-cli.js +323 -0
  28. package/dist/core/gateway.js +416 -0
  29. package/dist/core/heartbeat.js +54 -0
  30. package/dist/core/install-cli.js +320 -0
  31. package/dist/core/lane-executor.js +134 -0
  32. package/dist/core/logs-cli.js +70 -0
  33. package/dist/core/onboarding.js +760 -0
  34. package/dist/core/pairing-cli.js +78 -0
  35. package/dist/core/secret-vault-cli.js +204 -0
  36. package/dist/core/types.js +1 -0
  37. package/dist/index.js +404 -0
  38. package/dist/interfaces/dispatcher.js +214 -0
  39. package/dist/interfaces/telegram_handler.js +82 -0
  40. package/dist/interfaces/tui-dashboard.js +53 -0
  41. package/dist/interfaces/whatsapp_handler.js +94 -0
  42. package/dist/release/cli.js +97 -0
  43. package/dist/release/mvp-gate-cli.js +118 -0
  44. package/dist/release/twinbot-config-schema.js +162 -0
  45. package/dist/release/twinclaw-config-schema.js +162 -0
  46. package/dist/services/block-chunker.js +174 -0
  47. package/dist/services/browser-service.js +334 -0
  48. package/dist/services/context-lifecycle.js +314 -0
  49. package/dist/services/db.js +1055 -0
  50. package/dist/services/delivery-tracker.js +110 -0
  51. package/dist/services/dm-pairing.js +245 -0
  52. package/dist/services/embedding-service.js +125 -0
  53. package/dist/services/file-watcher.js +125 -0
  54. package/dist/services/inbound-debounce.js +92 -0
  55. package/dist/services/incident-manager.js +516 -0
  56. package/dist/services/job-scheduler.js +176 -0
  57. package/dist/services/local-state-backup.js +682 -0
  58. package/dist/services/mcp-client-adapter.js +291 -0
  59. package/dist/services/mcp-server-manager.js +143 -0
  60. package/dist/services/model-router.js +927 -0
  61. package/dist/services/mvp-gate.js +845 -0
  62. package/dist/services/orchestration-service.js +422 -0
  63. package/dist/services/persona-state.js +256 -0
  64. package/dist/services/policy-engine.js +92 -0
  65. package/dist/services/proactive-notifier.js +94 -0
  66. package/dist/services/queue-service.js +146 -0
  67. package/dist/services/release-pipeline.js +652 -0
  68. package/dist/services/runtime-budget-governor.js +415 -0
  69. package/dist/services/secret-vault.js +704 -0
  70. package/dist/services/semantic-memory.js +249 -0
  71. package/dist/services/skill-package-manager.js +806 -0
  72. package/dist/services/skill-registry.js +122 -0
  73. package/dist/services/streaming-output.js +75 -0
  74. package/dist/services/stt-service.js +39 -0
  75. package/dist/services/tts-service.js +44 -0
  76. package/dist/skills/builtin.js +250 -0
  77. package/dist/skills/shell.js +87 -0
  78. package/dist/skills/types.js +1 -0
  79. package/dist/types/api.js +1 -0
  80. package/dist/types/context-budget.js +1 -0
  81. package/dist/types/doctor.js +1 -0
  82. package/dist/types/file-watcher.js +1 -0
  83. package/dist/types/incident.js +1 -0
  84. package/dist/types/local-state-backup.js +1 -0
  85. package/dist/types/mcp.js +1 -0
  86. package/dist/types/messaging.js +1 -0
  87. package/dist/types/model-routing.js +1 -0
  88. package/dist/types/mvp-gate.js +2 -0
  89. package/dist/types/orchestration.js +1 -0
  90. package/dist/types/persona-state.js +22 -0
  91. package/dist/types/policy.js +1 -0
  92. package/dist/types/reasoning-graph.js +1 -0
  93. package/dist/types/release.js +1 -0
  94. package/dist/types/reliability.js +1 -0
  95. package/dist/types/runtime-budget.js +1 -0
  96. package/dist/types/scheduler.js +1 -0
  97. package/dist/types/secret-vault.js +1 -0
  98. package/dist/types/skill-packages.js +1 -0
  99. package/dist/types/websocket.js +14 -0
  100. package/dist/utils/logger.js +57 -0
  101. package/dist/utils/retry.js +61 -0
  102. package/dist/utils/secret-scan.js +208 -0
  103. package/mcp-servers.json +179 -0
  104. package/package.json +81 -0
  105. package/skill-packages.json +92 -0
  106. package/skill-packages.lock.json +5 -0
  107. package/src/skills/builtin.ts +275 -0
  108. package/src/skills/shell.ts +118 -0
  109. package/src/skills/types.ts +30 -0
  110. package/src/types/api.ts +252 -0
  111. package/src/types/blessed-contrib.d.ts +4 -0
  112. package/src/types/context-budget.ts +76 -0
  113. package/src/types/doctor.ts +29 -0
  114. package/src/types/file-watcher.ts +26 -0
  115. package/src/types/incident.ts +57 -0
  116. package/src/types/local-state-backup.ts +121 -0
  117. package/src/types/mcp.ts +106 -0
  118. package/src/types/messaging.ts +35 -0
  119. package/src/types/model-routing.ts +61 -0
  120. package/src/types/mvp-gate.ts +99 -0
  121. package/src/types/orchestration.ts +65 -0
  122. package/src/types/persona-state.ts +61 -0
  123. package/src/types/policy.ts +27 -0
  124. package/src/types/reasoning-graph.ts +58 -0
  125. package/src/types/release.ts +115 -0
  126. package/src/types/reliability.ts +43 -0
  127. package/src/types/runtime-budget.ts +85 -0
  128. package/src/types/scheduler.ts +47 -0
  129. package/src/types/secret-vault.ts +62 -0
  130. package/src/types/skill-packages.ts +81 -0
  131. package/src/types/sqlite-vec.d.ts +5 -0
  132. package/src/types/websocket.ts +122 -0
@@ -0,0 +1,927 @@
1
+ import { randomUUID } from 'node:crypto';
2
+ import { scrubSensitiveText } from '../utils/logger.js';
3
+ import { getModelRoutingSetting, listModelRoutingEvents, saveModelRoutingEvent, saveModelRoutingSetting, } from './db.js';
4
+ import { getSecretVaultService } from './secret-vault.js';
5
+ import { RuntimeBudgetGovernor } from './runtime-budget-governor.js';
6
+ import { getConfigValue } from '../config/config-loader.js';
7
+ const CHARS_PER_TOKEN = 4;
8
+ const DEFAULT_SESSION_ID = 'global';
9
+ const FALLBACK_MODE_SETTING_KEY = 'fallback_mode';
10
+ const DEFAULT_FALLBACK_MODE = 'aggressive_fallback';
11
+ const DEFAULT_RATE_LIMIT_COOLDOWN_MS = 30_000;
12
+ const DEFAULT_INTELLIGENT_PACING_MAX_WAIT_MS = 5_000;
13
+ const DEFAULT_MAX_RUNTIME_EVENTS = 120;
14
+ const DEFAULT_MAX_PERSISTED_EVENTS = 500;
15
+ const DEFAULT_BOOTSTRAP_EVENT_COUNT = 30;
16
+ function isFallbackMode(value) {
17
+ return value === 'intelligent_pacing' || value === 'aggressive_fallback';
18
+ }
19
+ function parseRetryAfterMs(rawHeader) {
20
+ if (!rawHeader) {
21
+ return null;
22
+ }
23
+ const asSeconds = Number(rawHeader);
24
+ if (Number.isFinite(asSeconds) && asSeconds >= 0) {
25
+ return Math.floor(asSeconds * 1_000);
26
+ }
27
+ const asDateMs = Date.parse(rawHeader);
28
+ if (!Number.isFinite(asDateMs)) {
29
+ return null;
30
+ }
31
+ return Math.max(0, asDateMs - Date.now());
32
+ }
33
+ function parseEventDetail(raw) {
34
+ try {
35
+ const parsed = JSON.parse(raw);
36
+ if (typeof parsed.detail === 'string') {
37
+ return parsed.detail;
38
+ }
39
+ }
40
+ catch {
41
+ // fall through
42
+ }
43
+ return raw;
44
+ }
45
+ export class ModelRouter {
46
+ models;
47
+ preferredModelIndex = 0;
48
+ budgetGovernor;
49
+ usageByModel = new Map();
50
+ runtimeEvents = [];
51
+ nowFn;
52
+ sleepFn;
53
+ defaultRateLimitCooldownMs;
54
+ intelligentPacingMaxWaitMs;
55
+ maxRuntimeEvents;
56
+ maxPersistedEvents;
57
+ fallbackMode;
58
+ currentModelId = null;
59
+ metrics = {
60
+ totalRequests: 0,
61
+ totalFailures: 0,
62
+ consecutiveFailures: 0,
63
+ failoverCount: 0,
64
+ lastError: null,
65
+ lastFailureAt: null,
66
+ };
67
+ constructor(options = {}) {
68
+ this.models = [
69
+ {
70
+ id: 'primary',
71
+ model: 'zai-org/GLM-5-FP8',
72
+ baseURL: 'https://api.us-west-2.modal.direct/v1/chat/completions',
73
+ apiKeyEnvName: 'MODAL_API_KEY',
74
+ },
75
+ {
76
+ id: 'fallback_1',
77
+ model: 'stepfun/step-3.5-flash:free',
78
+ baseURL: 'https://openrouter.ai/api/v1/chat/completions',
79
+ apiKeyEnvName: 'OPENROUTER_API_KEY',
80
+ },
81
+ {
82
+ id: 'fallback_2',
83
+ model: 'gemini-flash-lite-latest',
84
+ baseURL: 'https://generativelanguage.googleapis.com/v1beta/openai/chat/completions',
85
+ apiKeyEnvName: 'GEMINI_API_KEY',
86
+ },
87
+ ];
88
+ this.budgetGovernor = options.budgetGovernor ?? new RuntimeBudgetGovernor();
89
+ this.nowFn = options.now ?? (() => Date.now());
90
+ this.sleepFn = options.sleep ?? sleep;
91
+ this.defaultRateLimitCooldownMs = Math.max(1_000, Math.floor(options.defaultRateLimitCooldownMs ?? DEFAULT_RATE_LIMIT_COOLDOWN_MS));
92
+ this.intelligentPacingMaxWaitMs = Math.max(0, Math.floor(options.intelligentPacingMaxWaitMs ?? DEFAULT_INTELLIGENT_PACING_MAX_WAIT_MS));
93
+ this.maxRuntimeEvents = Math.max(10, Math.floor(options.maxRuntimeEvents ?? DEFAULT_MAX_RUNTIME_EVENTS));
94
+ this.maxPersistedEvents = Math.max(50, Math.floor(options.maxPersistedEvents ?? DEFAULT_MAX_PERSISTED_EVENTS));
95
+ for (const model of this.models) {
96
+ this.usageByModel.set(model.id, {
97
+ modelId: model.id,
98
+ modelName: model.model,
99
+ provider: this.resolveProviderId(model),
100
+ attempts: 0,
101
+ successes: 0,
102
+ failures: 0,
103
+ rateLimits: 0,
104
+ lastUsedAt: null,
105
+ lastError: null,
106
+ cooldownUntilMs: null,
107
+ cooldownReason: null,
108
+ });
109
+ }
110
+ this.fallbackMode = this.resolveInitialFallbackMode(options.fallbackMode);
111
+ this.hydrateRecentEvents();
112
+ }
113
+ getFallbackMode() {
114
+ return this.fallbackMode;
115
+ }
116
+ setFallbackMode(mode) {
117
+ if (mode === this.fallbackMode) {
118
+ return this.getHealthSnapshot();
119
+ }
120
+ const previousMode = this.fallbackMode;
121
+ this.fallbackMode = mode;
122
+ this.persistFallbackMode(mode);
123
+ this.recordEvent('mode_change', null, `Fallback mode changed ${previousMode} -> ${mode}.`);
124
+ return this.getHealthSnapshot();
125
+ }
126
+ getHealthSnapshot() {
127
+ const activeCooldowns = this.getActiveCooldowns();
128
+ const usage = this.models
129
+ .map((model) => this.usageByModel.get(model.id))
130
+ .filter((entry) => Boolean(entry))
131
+ .map((entry) => ({
132
+ modelId: entry.modelId,
133
+ modelName: entry.modelName,
134
+ provider: entry.provider,
135
+ attempts: entry.attempts,
136
+ successes: entry.successes,
137
+ failures: entry.failures,
138
+ rateLimits: entry.rateLimits,
139
+ lastUsedAt: entry.lastUsedAt,
140
+ lastError: entry.lastError,
141
+ }));
142
+ return {
143
+ fallbackMode: this.fallbackMode,
144
+ preferredModelId: this.models[this.preferredModelIndex]?.id ?? null,
145
+ currentModelId: this.currentModelId,
146
+ currentModelName: this.currentModelId
147
+ ? this.models.find((model) => model.id === this.currentModelId)?.model ?? null
148
+ : null,
149
+ totalRequests: this.metrics.totalRequests,
150
+ totalFailures: this.metrics.totalFailures,
151
+ consecutiveFailures: this.metrics.consecutiveFailures,
152
+ failoverCount: this.metrics.failoverCount,
153
+ lastError: this.metrics.lastError,
154
+ lastFailureAt: this.metrics.lastFailureAt,
155
+ activeCooldowns,
156
+ usage,
157
+ recentEvents: [...this.runtimeEvents],
158
+ operatorGuidance: this.buildOperatorGuidance(activeCooldowns),
159
+ };
160
+ }
161
+ getBudgetSnapshot(sessionId) {
162
+ return this.budgetGovernor.getSnapshot(sessionId ?? DEFAULT_SESSION_ID, 80);
163
+ }
164
+ setBudgetProfile(profile, sessionId) {
165
+ this.budgetGovernor.setManualProfile(profile, sessionId ?? DEFAULT_SESSION_ID);
166
+ }
167
+ resetBudgetPolicyState(sessionId) {
168
+ this.budgetGovernor.resetPolicyState(sessionId ?? DEFAULT_SESSION_ID);
169
+ }
170
+ forceFailover() {
171
+ const previousModelId = this.models[this.preferredModelIndex]?.id ?? null;
172
+ if (!this.models.length) {
173
+ return { previousModelId, nextModelId: null };
174
+ }
175
+ this.preferredModelIndex = (this.preferredModelIndex + 1) % this.models.length;
176
+ this.metrics.failoverCount += 1;
177
+ const nextModelId = this.models[this.preferredModelIndex]?.id ?? null;
178
+ this.recordEvent('failover', this.models[this.preferredModelIndex] ?? null, `Forced failover applied: ${previousModelId ?? 'none'} -> ${nextModelId ?? 'none'}.`);
179
+ console.log(`[Router] Forced failover applied: ${previousModelId} -> ${nextModelId}`);
180
+ return { previousModelId, nextModelId };
181
+ }
182
+ resetPreferredModel() {
183
+ this.preferredModelIndex = 0;
184
+ }
185
+ async createChatCompletion(messages, tools, context = {}) {
186
+ let lastError = null;
187
+ let lastTriedModelId = null;
188
+ const sessionId = context.sessionId ?? DEFAULT_SESSION_ID;
189
+ const directive = this.budgetGovernor.getRoutingDirective(sessionId);
190
+ if (directive.pacingDelayMs > 0) {
191
+ await this.sleepFn(directive.pacingDelayMs);
192
+ }
193
+ const orderedModels = this.getOrderedModels(directive.profile);
194
+ const formattedTools = tools?.length
195
+ ? tools.map((tool) => ({
196
+ type: 'function',
197
+ function: {
198
+ name: tool.name,
199
+ description: tool.description,
200
+ parameters: tool.parameters,
201
+ },
202
+ }))
203
+ : undefined;
204
+ const estimatedRequestTokens = estimateRequestTokens(messages, formattedTools);
205
+ for (const config of orderedModels) {
206
+ const providerId = this.resolveProviderId(config);
207
+ if (directive.blockedProviders.includes(providerId) || directive.blockedModelIds.includes(config.id)) {
208
+ this.budgetGovernor.recordUsage({
209
+ sessionId,
210
+ modelId: config.id,
211
+ providerId,
212
+ profile: directive.profile,
213
+ stage: 'skipped',
214
+ requestTokens: estimatedRequestTokens,
215
+ responseTokens: 0,
216
+ latencyMs: 0,
217
+ error: `Skipped by runtime budget policy (${directive.severity}).`,
218
+ });
219
+ this.recordEvent('cooldown_skip', config, `Skipped by runtime budget policy (${directive.severity}).`);
220
+ continue;
221
+ }
222
+ const preflightCooldown = await this.resolveCooldownPreflight(config);
223
+ if (!preflightCooldown) {
224
+ lastTriedModelId = config.id;
225
+ continue;
226
+ }
227
+ const apiKey = this.getApiKey(config.apiKeyEnvName);
228
+ if (!apiKey) {
229
+ continue;
230
+ }
231
+ if (lastTriedModelId && lastTriedModelId !== config.id) {
232
+ this.metrics.failoverCount += 1;
233
+ this.recordEvent('failover', config, `Automatic fallback ${lastTriedModelId} -> ${config.id}.`);
234
+ }
235
+ const payload = {
236
+ model: config.model,
237
+ messages,
238
+ };
239
+ if (formattedTools) {
240
+ payload.tools = formattedTools;
241
+ payload.tool_choice = 'auto';
242
+ }
243
+ const firstAttempt = await this.executeHttpAttempt({
244
+ config,
245
+ apiKey,
246
+ payload,
247
+ providerId,
248
+ sessionId,
249
+ directive,
250
+ estimatedRequestTokens,
251
+ });
252
+ if (firstAttempt.ok && firstAttempt.message) {
253
+ return firstAttempt.message;
254
+ }
255
+ if (firstAttempt.statusCode === 429 &&
256
+ this.fallbackMode === 'intelligent_pacing' &&
257
+ typeof firstAttempt.rateLimitCooldownMs === 'number') {
258
+ const retryWaitMs = Math.min(firstAttempt.rateLimitCooldownMs, this.intelligentPacingMaxWaitMs);
259
+ if (retryWaitMs > 0) {
260
+ this.recordEvent('cooldown_wait', config, `Intelligent pacing wait ${retryWaitMs}ms before retrying ${config.id}.`);
261
+ await this.sleepFn(retryWaitMs);
262
+ }
263
+ const retryCooldown = this.getModelCooldownState(config.id);
264
+ if (retryCooldown.remainingMs <= 0) {
265
+ const retryAttempt = await this.executeHttpAttempt({
266
+ config,
267
+ apiKey,
268
+ payload,
269
+ providerId,
270
+ sessionId,
271
+ directive,
272
+ estimatedRequestTokens,
273
+ });
274
+ if (retryAttempt.ok && retryAttempt.message) {
275
+ return retryAttempt.message;
276
+ }
277
+ if (retryAttempt.errorMessage) {
278
+ lastError = new Error(retryAttempt.errorMessage);
279
+ }
280
+ }
281
+ else {
282
+ this.recordEvent('cooldown_skip', config, `Retry skipped; cooldown still active for ${retryCooldown.remainingMs}ms.`);
283
+ }
284
+ }
285
+ if (firstAttempt.errorMessage) {
286
+ lastError = new Error(firstAttempt.errorMessage);
287
+ }
288
+ lastTriedModelId = config.id;
289
+ }
290
+ throw new Error(`All configured models exhausted or failed. Last error: ${scrubSensitiveText(lastError?.message ?? 'unknown')}`);
291
+ }
292
+ async createStreamingChatCompletion(messages, onDelta, tools, context = {}) {
293
+ let lastError = null;
294
+ let lastTriedModelId = null;
295
+ const sessionId = context.sessionId ?? DEFAULT_SESSION_ID;
296
+ const directive = this.budgetGovernor.getRoutingDirective(sessionId);
297
+ if (directive.pacingDelayMs > 0) {
298
+ await this.sleepFn(directive.pacingDelayMs);
299
+ }
300
+ const orderedModels = this.getOrderedModels(directive.profile);
301
+ const formattedTools = tools?.length
302
+ ? tools.map((tool) => ({
303
+ type: 'function',
304
+ function: {
305
+ name: tool.name,
306
+ description: tool.description,
307
+ parameters: tool.parameters,
308
+ },
309
+ }))
310
+ : undefined;
311
+ const estimatedRequestTokens = estimateRequestTokens(messages, formattedTools);
312
+ for (const config of orderedModels) {
313
+ const providerId = this.resolveProviderId(config);
314
+ if (directive.blockedProviders.includes(providerId) || directive.blockedModelIds.includes(config.id)) {
315
+ this.budgetGovernor.recordUsage({
316
+ sessionId,
317
+ modelId: config.id,
318
+ providerId,
319
+ profile: directive.profile,
320
+ stage: 'skipped',
321
+ requestTokens: estimatedRequestTokens,
322
+ responseTokens: 0,
323
+ latencyMs: 0,
324
+ error: `Skipped by runtime budget policy (${directive.severity}).`,
325
+ });
326
+ this.recordEvent('cooldown_skip', config, `Skipped by runtime budget policy (${directive.severity}).`);
327
+ continue;
328
+ }
329
+ const preflightCooldown = await this.resolveCooldownPreflight(config);
330
+ if (!preflightCooldown) {
331
+ lastTriedModelId = config.id;
332
+ continue;
333
+ }
334
+ const apiKey = this.getApiKey(config.apiKeyEnvName);
335
+ if (!apiKey) {
336
+ continue;
337
+ }
338
+ if (lastTriedModelId && lastTriedModelId !== config.id) {
339
+ this.metrics.failoverCount += 1;
340
+ this.recordEvent('failover', config, `Automatic fallback ${lastTriedModelId} -> ${config.id}.`);
341
+ }
342
+ const payload = {
343
+ model: config.model,
344
+ messages,
345
+ stream: true,
346
+ };
347
+ if (formattedTools) {
348
+ payload.tools = formattedTools;
349
+ payload.tool_choice = 'auto';
350
+ }
351
+ const streamAttempt = await this.executeStreamingAttempt({
352
+ config,
353
+ apiKey,
354
+ payload,
355
+ providerId,
356
+ sessionId,
357
+ directive,
358
+ estimatedRequestTokens,
359
+ onDelta,
360
+ });
361
+ if (streamAttempt.ok && streamAttempt.message) {
362
+ return streamAttempt.message;
363
+ }
364
+ if (streamAttempt.errorMessage) {
365
+ lastError = new Error(streamAttempt.errorMessage);
366
+ }
367
+ lastTriedModelId = config.id;
368
+ }
369
+ const errorMessage = `All configured models exhausted or failed. Last error: ${scrubSensitiveText(lastError?.message ?? 'unknown')}`;
370
+ onDelta({ type: 'error', error: errorMessage });
371
+ throw new Error(errorMessage);
372
+ }
373
+ getApiKey(envName) {
374
+ const key = getSecretVaultService().readSecret(envName);
375
+ if (!key) {
376
+ console.warn(`Warning: API key ${envName} is not set in environment.`);
377
+ return '';
378
+ }
379
+ return key;
380
+ }
381
+ resolveInitialFallbackMode(override) {
382
+ if (override) {
383
+ this.persistFallbackMode(override);
384
+ return override;
385
+ }
386
+ const persisted = getModelRoutingSetting(FALLBACK_MODE_SETTING_KEY);
387
+ if (isFallbackMode(persisted)) {
388
+ return persisted;
389
+ }
390
+ const fromEnv = getConfigValue('MODEL_ROUTING_FALLBACK_MODE');
391
+ const resolved = isFallbackMode(fromEnv) ? fromEnv : DEFAULT_FALLBACK_MODE;
392
+ this.persistFallbackMode(resolved);
393
+ return resolved;
394
+ }
395
+ persistFallbackMode(mode) {
396
+ try {
397
+ saveModelRoutingSetting(FALLBACK_MODE_SETTING_KEY, mode);
398
+ }
399
+ catch (error) {
400
+ const message = scrubSensitiveText(error instanceof Error ? error.message : String(error));
401
+ console.warn(`[Router] Failed to persist fallback mode: ${message}`);
402
+ }
403
+ }
404
+ hydrateRecentEvents() {
405
+ try {
406
+ const rows = listModelRoutingEvents(Math.min(this.maxRuntimeEvents, DEFAULT_BOOTSTRAP_EVENT_COUNT));
407
+ this.runtimeEvents.push(...rows.map((row) => ({
408
+ id: row.id,
409
+ type: row.event_type,
410
+ modelId: row.model_id,
411
+ modelName: row.model_name,
412
+ provider: row.provider,
413
+ fallbackMode: row.fallback_mode,
414
+ detail: scrubSensitiveText(parseEventDetail(row.detail_json)),
415
+ createdAt: row.created_at,
416
+ })));
417
+ }
418
+ catch (error) {
419
+ const message = scrubSensitiveText(error instanceof Error ? error.message : String(error));
420
+ console.warn(`[Router] Failed to hydrate model routing events: ${message}`);
421
+ }
422
+ }
423
+ async resolveCooldownPreflight(config) {
424
+ const cooldown = this.getModelCooldownState(config.id);
425
+ if (cooldown.remainingMs <= 0) {
426
+ return true;
427
+ }
428
+ if (this.fallbackMode === 'aggressive_fallback') {
429
+ this.recordEvent('cooldown_skip', config, `Skipped ${config.id}; cooldown active for ${cooldown.remainingMs}ms.`);
430
+ return false;
431
+ }
432
+ const waitMs = Math.min(cooldown.remainingMs, this.intelligentPacingMaxWaitMs);
433
+ if (waitMs > 0) {
434
+ this.recordEvent('cooldown_wait', config, `Waiting ${waitMs}ms for ${config.id} cooldown.`);
435
+ await this.sleepFn(waitMs);
436
+ }
437
+ const postWaitCooldown = this.getModelCooldownState(config.id);
438
+ if (postWaitCooldown.remainingMs > 0) {
439
+ this.recordEvent('cooldown_skip', config, `Cooldown for ${config.id} remains active (${postWaitCooldown.remainingMs}ms).`);
440
+ return false;
441
+ }
442
+ return true;
443
+ }
444
+ async executeHttpAttempt(input) {
445
+ this.metrics.totalRequests += 1;
446
+ this.trackUsageAttempt(input.config.id);
447
+ this.recordEvent('attempt', input.config, `Attempting ${input.config.model} (profile=${input.directive.profile}, severity=${input.directive.severity}).`);
448
+ const startedAt = this.nowFn();
449
+ try {
450
+ const response = await fetch(input.config.baseURL, {
451
+ method: 'POST',
452
+ headers: {
453
+ 'Content-Type': 'application/json',
454
+ Authorization: `Bearer ${input.apiKey}`,
455
+ ...(input.config.id === 'fallback_1'
456
+ ? { 'HTTP-Referer': 'https://twinbot.ai', 'X-Title': 'TwinBot' }
457
+ : {}),
458
+ },
459
+ body: JSON.stringify(input.payload),
460
+ });
461
+ const latencyMs = this.nowFn() - startedAt;
462
+ if (response.status === 429) {
463
+ const cooldownMs = parseRetryAfterMs(response.headers.get('retry-after')) ?? this.defaultRateLimitCooldownMs;
464
+ this.#recordFailure(`429 Too Many Requests: ${input.config.model}`);
465
+ this.trackUsageFailure(input.config.id, `429 Too Many Requests: ${input.config.model}`, true);
466
+ this.budgetGovernor.recordUsage({
467
+ sessionId: input.sessionId,
468
+ modelId: input.config.id,
469
+ providerId: input.providerId,
470
+ profile: input.directive.profile,
471
+ stage: 'failure',
472
+ requestTokens: input.estimatedRequestTokens,
473
+ responseTokens: 0,
474
+ latencyMs,
475
+ statusCode: 429,
476
+ error: `429 Too Many Requests: ${input.config.model}`,
477
+ });
478
+ this.budgetGovernor.applyProviderCooldown(input.providerId, input.sessionId, 'Provider returned 429; cooldown activated.');
479
+ this.setModelCooldown(input.config.id, cooldownMs, '429 rate-limit');
480
+ this.recordEvent('rate_limit', input.config, `Rate limit on ${input.config.id}; cooldown=${cooldownMs}ms; mode=${this.fallbackMode}.`);
481
+ return {
482
+ ok: false,
483
+ errorMessage: `429 Too Many Requests: ${input.config.model}`,
484
+ statusCode: 429,
485
+ rateLimitCooldownMs: cooldownMs,
486
+ };
487
+ }
488
+ if (!response.ok) {
489
+ const errText = scrubSensitiveText(await response.text());
490
+ const errorMessage = `HTTP ${response.status}: ${errText}`;
491
+ this.#recordFailure(errorMessage);
492
+ this.trackUsageFailure(input.config.id, errorMessage);
493
+ this.budgetGovernor.recordUsage({
494
+ sessionId: input.sessionId,
495
+ modelId: input.config.id,
496
+ providerId: input.providerId,
497
+ profile: input.directive.profile,
498
+ stage: 'failure',
499
+ requestTokens: input.estimatedRequestTokens,
500
+ responseTokens: 0,
501
+ latencyMs,
502
+ statusCode: response.status,
503
+ error: errorMessage,
504
+ });
505
+ this.recordEvent('failure', input.config, `HTTP error (${response.status}) for ${input.config.id}.`);
506
+ return { ok: false, errorMessage, statusCode: response.status };
507
+ }
508
+ const data = await response.json();
509
+ const message = data?.choices?.[0]?.message;
510
+ if (!message) {
511
+ const errorMessage = `Model ${input.config.id} returned empty choices payload.`;
512
+ this.#recordFailure(errorMessage);
513
+ this.trackUsageFailure(input.config.id, errorMessage);
514
+ this.budgetGovernor.recordUsage({
515
+ sessionId: input.sessionId,
516
+ modelId: input.config.id,
517
+ providerId: input.providerId,
518
+ profile: input.directive.profile,
519
+ stage: 'failure',
520
+ requestTokens: input.estimatedRequestTokens,
521
+ responseTokens: 0,
522
+ latencyMs,
523
+ statusCode: response.status,
524
+ error: errorMessage,
525
+ });
526
+ this.recordEvent('failure', input.config, errorMessage);
527
+ return { ok: false, errorMessage, statusCode: response.status };
528
+ }
529
+ const responseContent = message.content ? String(message.content) : '';
530
+ this.metrics.consecutiveFailures = 0;
531
+ this.metrics.lastError = null;
532
+ this.currentModelId = input.config.id;
533
+ this.clearModelCooldown(input.config.id);
534
+ this.trackUsageSuccess(input.config.id);
535
+ this.budgetGovernor.recordUsage({
536
+ sessionId: input.sessionId,
537
+ modelId: input.config.id,
538
+ providerId: input.providerId,
539
+ profile: input.directive.profile,
540
+ stage: 'success',
541
+ requestTokens: input.estimatedRequestTokens,
542
+ responseTokens: estimateTokenCount(responseContent),
543
+ latencyMs,
544
+ statusCode: response.status,
545
+ });
546
+ this.recordEvent('success', input.config, `Response succeeded for ${input.config.id}.`);
547
+ return { ok: true, message, statusCode: response.status };
548
+ }
549
+ catch (error) {
550
+ const latencyMs = this.nowFn() - startedAt;
551
+ const message = scrubSensitiveText(error instanceof Error ? error.message : String(error));
552
+ this.#recordFailure(message);
553
+ this.trackUsageFailure(input.config.id, message);
554
+ this.budgetGovernor.recordUsage({
555
+ sessionId: input.sessionId,
556
+ modelId: input.config.id,
557
+ providerId: input.providerId,
558
+ profile: input.directive.profile,
559
+ stage: 'failure',
560
+ requestTokens: input.estimatedRequestTokens,
561
+ responseTokens: 0,
562
+ latencyMs,
563
+ error: message,
564
+ });
565
+ this.recordEvent('failure', input.config, `Transport error on ${input.config.id}: ${message}`);
566
+ return { ok: false, errorMessage: message };
567
+ }
568
+ }
569
+ async executeStreamingAttempt(input) {
570
+ this.metrics.totalRequests += 1;
571
+ this.trackUsageAttempt(input.config.id);
572
+ this.recordEvent('attempt', input.config, `Attempting streaming ${input.config.model} (profile=${input.directive.profile}).`);
573
+ const startedAt = this.nowFn();
574
+ let responseContent = '';
575
+ const toolCalls = [];
576
+ try {
577
+ const response = await fetch(input.config.baseURL, {
578
+ method: 'POST',
579
+ headers: {
580
+ 'Content-Type': 'application/json',
581
+ Authorization: `Bearer ${input.apiKey}`,
582
+ ...(input.config.id === 'fallback_1'
583
+ ? { 'HTTP-Referer': 'https://twinbot.ai', 'X-Title': 'TwinBot' }
584
+ : {}),
585
+ },
586
+ body: JSON.stringify(input.payload),
587
+ });
588
+ if (response.status === 429) {
589
+ const cooldownMs = parseRetryAfterMs(response.headers.get('retry-after')) ?? this.defaultRateLimitCooldownMs;
590
+ this.#recordFailure(`429 Too Many Requests: ${input.config.model}`);
591
+ this.trackUsageFailure(input.config.id, `429 Too Many Requests: ${input.config.model}`, true);
592
+ this.setModelCooldown(input.config.id, cooldownMs, '429 rate-limit');
593
+ this.recordEvent('rate_limit', input.config, `Rate limit on ${input.config.id}; cooldown=${cooldownMs}ms.`);
594
+ return {
595
+ ok: false,
596
+ errorMessage: `429 Too Many Requests: ${input.config.model}`,
597
+ statusCode: 429,
598
+ rateLimitCooldownMs: cooldownMs,
599
+ };
600
+ }
601
+ if (!response.ok) {
602
+ const errText = scrubSensitiveText(await response.text());
603
+ const errorMessage = `HTTP ${response.status}: ${errText}`;
604
+ this.#recordFailure(errorMessage);
605
+ this.trackUsageFailure(input.config.id, errorMessage);
606
+ this.recordEvent('failure', input.config, `HTTP error (${response.status}) for ${input.config.id}.`);
607
+ return { ok: false, errorMessage, statusCode: response.status };
608
+ }
609
+ if (!response.body) {
610
+ return { ok: false, errorMessage: 'No response body for streaming request' };
611
+ }
612
+ const reader = response.body.getReader();
613
+ const decoder = new TextDecoder();
614
+ let buffer = '';
615
+ while (true) {
616
+ const { done, value } = await reader.read();
617
+ if (done)
618
+ break;
619
+ buffer += decoder.decode(value, { stream: true });
620
+ const lines = buffer.split('\n');
621
+ buffer = lines.pop() ?? '';
622
+ for (const line of lines) {
623
+ const trimmed = line.trim();
624
+ if (!trimmed || !trimmed.startsWith('data:'))
625
+ continue;
626
+ const data = trimmed.slice(5).trim();
627
+ if (data === '[DONE]')
628
+ continue;
629
+ try {
630
+ const parsed = JSON.parse(data);
631
+ const delta = parsed.choices?.[0]?.delta;
632
+ if (delta?.content) {
633
+ responseContent += delta.content;
634
+ input.onDelta({ type: 'text_delta', content: delta.content });
635
+ }
636
+ if (delta?.tool_calls) {
637
+ for (const tc of delta.tool_calls) {
638
+ if (tc.id) {
639
+ toolCalls.push({ id: tc.id, name: '', arguments: '' });
640
+ input.onDelta({ type: 'tool_call_start', toolCallId: tc.id });
641
+ }
642
+ if (tc.function?.name) {
643
+ const lastTc = toolCalls[toolCalls.length - 1];
644
+ if (lastTc)
645
+ lastTc.name = tc.function.name;
646
+ input.onDelta({
647
+ type: 'tool_call_delta',
648
+ toolCallId: lastTc?.id,
649
+ toolCallName: tc.function.name,
650
+ });
651
+ }
652
+ if (tc.function?.arguments) {
653
+ const lastTc = toolCalls[toolCalls.length - 1];
654
+ if (lastTc)
655
+ lastTc.arguments += tc.function.arguments;
656
+ input.onDelta({
657
+ type: 'tool_call_delta',
658
+ toolCallId: lastTc?.id,
659
+ toolCallArguments: tc.function.arguments,
660
+ });
661
+ }
662
+ }
663
+ }
664
+ }
665
+ catch {
666
+ // Skip unparseable SSE lines
667
+ }
668
+ }
669
+ }
670
+ const latencyMs = this.nowFn() - startedAt;
671
+ this.metrics.consecutiveFailures = 0;
672
+ this.metrics.lastError = null;
673
+ this.currentModelId = input.config.id;
674
+ this.clearModelCooldown(input.config.id);
675
+ this.trackUsageSuccess(input.config.id);
676
+ this.budgetGovernor.recordUsage({
677
+ sessionId: input.sessionId,
678
+ modelId: input.config.id,
679
+ providerId: input.providerId,
680
+ profile: input.directive.profile,
681
+ stage: 'success',
682
+ requestTokens: input.estimatedRequestTokens,
683
+ responseTokens: estimateTokenCount(responseContent),
684
+ latencyMs,
685
+ });
686
+ this.recordEvent('success', input.config, `Streaming response succeeded for ${input.config.id}.`);
687
+ input.onDelta({ type: 'done' });
688
+ const message = {
689
+ role: 'assistant',
690
+ content: responseContent || null,
691
+ tool_calls: toolCalls.length > 0
692
+ ? toolCalls.map((tc) => ({
693
+ id: tc.id,
694
+ type: 'function',
695
+ function: {
696
+ name: tc.name,
697
+ arguments: tc.arguments,
698
+ },
699
+ }))
700
+ : undefined,
701
+ };
702
+ return { ok: true, message };
703
+ }
704
+ catch (error) {
705
+ const latencyMs = this.nowFn() - startedAt;
706
+ const message = scrubSensitiveText(error instanceof Error ? error.message : String(error));
707
+ this.#recordFailure(message);
708
+ this.trackUsageFailure(input.config.id, message);
709
+ this.budgetGovernor.recordUsage({
710
+ sessionId: input.sessionId,
711
+ modelId: input.config.id,
712
+ providerId: input.providerId,
713
+ profile: input.directive.profile,
714
+ stage: 'failure',
715
+ requestTokens: input.estimatedRequestTokens,
716
+ responseTokens: 0,
717
+ latencyMs,
718
+ error: message,
719
+ });
720
+ this.recordEvent('failure', input.config, `Streaming transport error on ${input.config.id}: ${message}`);
721
+ input.onDelta({ type: 'error', error: message });
722
+ return { ok: false, errorMessage: message };
723
+ }
724
+ }
725
+ getOrderedModels(profile) {
726
+ if (!this.models.length) {
727
+ return [];
728
+ }
729
+ const preferred = this.getPreferredOrdering();
730
+ if (profile === 'economy') {
731
+ return [...preferred].sort((a, b) => this.getCostRank(a.id) - this.getCostRank(b.id));
732
+ }
733
+ if (profile === 'balanced') {
734
+ return [...preferred].sort((a, b) => this.getBalancedRank(a.id) - this.getBalancedRank(b.id));
735
+ }
736
+ return preferred;
737
+ }
738
+ getPreferredOrdering() {
739
+ if (this.preferredModelIndex <= 0) {
740
+ return [...this.models];
741
+ }
742
+ return [
743
+ ...this.models.slice(this.preferredModelIndex),
744
+ ...this.models.slice(0, this.preferredModelIndex),
745
+ ];
746
+ }
747
+ getCostRank(modelId) {
748
+ if (modelId === 'fallback_2')
749
+ return 1;
750
+ if (modelId === 'fallback_1')
751
+ return 2;
752
+ if (modelId === 'primary')
753
+ return 3;
754
+ return 4;
755
+ }
756
+ getBalancedRank(modelId) {
757
+ if (modelId === 'fallback_1')
758
+ return 1;
759
+ if (modelId === 'primary')
760
+ return 2;
761
+ if (modelId === 'fallback_2')
762
+ return 3;
763
+ return 4;
764
+ }
765
+ resolveProviderId(config) {
766
+ const url = config.baseURL.toLowerCase();
767
+ if (url.includes('openrouter.ai'))
768
+ return 'openrouter';
769
+ if (url.includes('generativelanguage.googleapis.com'))
770
+ return 'google';
771
+ if (url.includes('modal.direct'))
772
+ return 'modal';
773
+ return 'unknown';
774
+ }
775
+ getModelCooldownState(modelId) {
776
+ const usage = this.usageByModel.get(modelId);
777
+ if (!usage?.cooldownUntilMs) {
778
+ return { remainingMs: 0, reason: usage?.cooldownReason ?? null };
779
+ }
780
+ const remainingMs = Math.max(0, usage.cooldownUntilMs - this.nowFn());
781
+ if (remainingMs === 0) {
782
+ usage.cooldownUntilMs = null;
783
+ usage.cooldownReason = null;
784
+ }
785
+ return { remainingMs, reason: usage.cooldownReason };
786
+ }
787
+ setModelCooldown(modelId, cooldownMs, reason) {
788
+ const usage = this.usageByModel.get(modelId);
789
+ if (!usage) {
790
+ return;
791
+ }
792
+ usage.cooldownUntilMs = this.nowFn() + Math.max(0, cooldownMs);
793
+ usage.cooldownReason = reason;
794
+ this.recordEvent('cooldown_set', this.models.find((model) => model.id === modelId) ?? null, `Cooldown set for ${modelId}: ${cooldownMs}ms (${reason}).`);
795
+ }
796
+ clearModelCooldown(modelId) {
797
+ const usage = this.usageByModel.get(modelId);
798
+ if (!usage) {
799
+ return;
800
+ }
801
+ usage.cooldownUntilMs = null;
802
+ usage.cooldownReason = null;
803
+ }
804
+ trackUsageAttempt(modelId) {
805
+ const usage = this.usageByModel.get(modelId);
806
+ if (!usage) {
807
+ return;
808
+ }
809
+ usage.attempts += 1;
810
+ usage.lastUsedAt = new Date(this.nowFn()).toISOString();
811
+ }
812
+ trackUsageSuccess(modelId) {
813
+ const usage = this.usageByModel.get(modelId);
814
+ if (!usage) {
815
+ return;
816
+ }
817
+ usage.successes += 1;
818
+ usage.lastError = null;
819
+ }
820
+ trackUsageFailure(modelId, message, rateLimited = false) {
821
+ const usage = this.usageByModel.get(modelId);
822
+ if (!usage) {
823
+ return;
824
+ }
825
+ usage.failures += 1;
826
+ usage.lastError = scrubSensitiveText(message);
827
+ if (rateLimited) {
828
+ usage.rateLimits += 1;
829
+ }
830
+ }
831
+ getActiveCooldowns() {
832
+ const now = this.nowFn();
833
+ const result = [];
834
+ for (const model of this.models) {
835
+ const usage = this.usageByModel.get(model.id);
836
+ if (!usage?.cooldownUntilMs || usage.cooldownUntilMs <= now) {
837
+ continue;
838
+ }
839
+ result.push({
840
+ modelId: model.id,
841
+ modelName: model.model,
842
+ provider: usage.provider,
843
+ reason: usage.cooldownReason ?? 'cooldown',
844
+ remainingMs: usage.cooldownUntilMs - now,
845
+ until: new Date(usage.cooldownUntilMs).toISOString(),
846
+ });
847
+ }
848
+ return result.sort((a, b) => b.remainingMs - a.remainingMs);
849
+ }
850
+ buildOperatorGuidance(cooldowns) {
851
+ const guidance = [];
852
+ if (cooldowns.length >= this.models.length) {
853
+ const nextReadyMs = Math.min(...cooldowns.map((cooldown) => cooldown.remainingMs));
854
+ guidance.push(`All providers cooling down. Next model availability in ~${Math.ceil(nextReadyMs / 1000)}s.`);
855
+ }
856
+ if (this.metrics.consecutiveFailures >= 3) {
857
+ guidance.push(`Routing instability detected (${this.metrics.consecutiveFailures} consecutive failures). Validate quotas and provider credentials.`);
858
+ }
859
+ if (this.fallbackMode === 'intelligent_pacing' && cooldowns.length > 0) {
860
+ guidance.push('Fallback mode intelligent_pacing is active: waiting briefly before provider switching.');
861
+ }
862
+ if (this.fallbackMode === 'aggressive_fallback' && this.metrics.failoverCount > 0) {
863
+ guidance.push('Fallback mode aggressive_fallback is active: immediate provider switching enabled.');
864
+ }
865
+ if (guidance.length === 0) {
866
+ guidance.push('Routing stable. No active model cooldown pressure detected.');
867
+ }
868
+ return guidance.map((item) => scrubSensitiveText(item));
869
+ }
870
+ recordEvent(type, model, detail) {
871
+ const event = {
872
+ id: randomUUID(),
873
+ type,
874
+ modelId: model?.id ?? null,
875
+ modelName: model?.model ?? null,
876
+ provider: model ? this.resolveProviderId(model) : null,
877
+ fallbackMode: this.fallbackMode,
878
+ detail: scrubSensitiveText(detail),
879
+ createdAt: new Date(this.nowFn()).toISOString(),
880
+ };
881
+ this.runtimeEvents.unshift(event);
882
+ if (this.runtimeEvents.length > this.maxRuntimeEvents) {
883
+ this.runtimeEvents.splice(this.maxRuntimeEvents);
884
+ }
885
+ try {
886
+ saveModelRoutingEvent({
887
+ id: event.id,
888
+ eventType: event.type,
889
+ modelId: event.modelId,
890
+ modelName: event.modelName,
891
+ provider: event.provider,
892
+ fallbackMode: event.fallbackMode,
893
+ detailJson: JSON.stringify({ detail: event.detail }),
894
+ createdAt: event.createdAt,
895
+ }, this.maxPersistedEvents);
896
+ }
897
+ catch (error) {
898
+ const message = scrubSensitiveText(error instanceof Error ? error.message : String(error));
899
+ console.warn(`[Router] Failed to persist model routing telemetry: ${message}`);
900
+ }
901
+ }
902
+ #recordFailure(message) {
903
+ this.metrics.totalFailures += 1;
904
+ this.metrics.consecutiveFailures += 1;
905
+ this.metrics.lastError = scrubSensitiveText(message);
906
+ this.metrics.lastFailureAt = new Date(this.nowFn()).toISOString();
907
+ }
908
+ }
909
+ function estimateRequestTokens(messages, tools) {
910
+ const messageTokens = messages.reduce((sum, message) => sum + estimateTokenCount(message.content ?? ''), 0);
911
+ const toolTokens = tools ? estimateTokenCount(JSON.stringify(tools)) : 0;
912
+ return messageTokens + toolTokens;
913
+ }
914
+ function estimateTokenCount(content) {
915
+ if (!content) {
916
+ return 0;
917
+ }
918
+ return Math.max(1, Math.ceil(content.length / CHARS_PER_TOKEN));
919
+ }
920
+ async function sleep(ms) {
921
+ if (!Number.isFinite(ms) || ms <= 0) {
922
+ return;
923
+ }
924
+ await new Promise((resolve) => {
925
+ setTimeout(() => resolve(), ms);
926
+ });
927
+ }