aiden-runtime 3.16.2 → 3.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +185 -7
- package/config/devos.config.json +29 -19
- package/config/hardware.json +2 -2
- package/dist/api/dashboard.js +480 -0
- package/dist/api/server.js +150 -142
- package/dist/core/agentLoop.js +94 -13
- package/dist/core/channels/email.js +1 -1
- package/dist/core/modelRegistry.js +261 -0
- package/dist/core/permissionSystem.js +239 -0
- package/dist/core/pluginLoader.js +161 -0
- package/dist/core/skillLoader.js +6 -24
- package/dist/core/toolRegistry.js +316 -31
- package/dist/core/version.js +1 -1
- package/dist/providers/router.js +2 -1
- package/dist-bundle/cli.js +50946 -29225
- package/dist-bundle/index.js +6462 -5274
- package/package.json +3 -2
package/dist/core/agentLoop.js
CHANGED
|
@@ -66,6 +66,7 @@ const entityGraph_1 = require("./entityGraph");
|
|
|
66
66
|
const learningMemory_1 = require("./learningMemory");
|
|
67
67
|
const conversationMemory_1 = require("./conversationMemory");
|
|
68
68
|
const router_1 = require("../providers/router");
|
|
69
|
+
const modelRegistry_1 = require("./modelRegistry");
|
|
69
70
|
const index_1 = require("../providers/index");
|
|
70
71
|
const knowledgeBase_1 = require("./knowledgeBase");
|
|
71
72
|
const skillTeacher_1 = require("./skillTeacher");
|
|
@@ -80,6 +81,7 @@ const semanticMemory_1 = require("./semanticMemory");
|
|
|
80
81
|
const sessionMemory_1 = require("./sessionMemory");
|
|
81
82
|
const goalTracker_1 = require("./goalTracker");
|
|
82
83
|
const hooks_1 = require("./hooks");
|
|
84
|
+
const pluginLoader_1 = require("./pluginLoader");
|
|
83
85
|
const instinctSystem_1 = require("./instinctSystem");
|
|
84
86
|
const workflowTracker_1 = require("./workflowTracker");
|
|
85
87
|
const parallelExecutor_1 = require("./parallelExecutor");
|
|
@@ -427,6 +429,7 @@ function inferPhasesFromSteps(steps) {
|
|
|
427
429
|
clipboard_read: 'execution', clipboard_write: 'execution',
|
|
428
430
|
window_list: 'execution', window_focus: 'execution',
|
|
429
431
|
app_launch: 'execution', app_close: 'execution',
|
|
432
|
+
system_volume: 'execution',
|
|
430
433
|
watch_folder: 'execution', watch_folder_list: 'execution',
|
|
431
434
|
};
|
|
432
435
|
const phaseNames = {
|
|
@@ -717,7 +720,7 @@ async function planWithLLM(message, history, apiKey, model, provider, memoryCont
|
|
|
717
720
|
'screenshot', 'screen_read', 'vision_loop', 'wait',
|
|
718
721
|
'code_interpreter_python', 'code_interpreter_node',
|
|
719
722
|
'clipboard_read', 'clipboard_write', 'window_list', 'window_focus',
|
|
720
|
-
'app_launch', 'app_close',
|
|
723
|
+
'app_launch', 'app_close', 'system_volume',
|
|
721
724
|
'watch_folder', 'watch_folder_list',
|
|
722
725
|
'send_file_local', 'receive_file_local',
|
|
723
726
|
'get_briefing',
|
|
@@ -953,11 +956,13 @@ If requires_execution is false:
|
|
|
953
956
|
|
|
954
957
|
NOTE: "goals" is only required when 2+ distinct intents are present. Single-goal messages may omit it.
|
|
955
958
|
|
|
956
|
-
THE 'respond' TOOL — use this for
|
|
957
|
-
- 'respond' is ALWAYS a valid plan. When no external tool is needed, plan a single respond step.
|
|
959
|
+
THE 'respond' TOOL — use this for conversational messages ONLY:
|
|
958
960
|
- respond: { "message": "your answer text here" }
|
|
959
961
|
- Use respond for: greetings, capability questions, simple facts from training data, clarifying questions, short answers.
|
|
960
962
|
- Example: user says "hi" → { "goal": "hi", "requires_execution": true, "plan": [{ "step": 1, "tool": "respond", "input": { "message": "Hi! What can I help you with today?" } }] }
|
|
963
|
+
- NEVER use respond as the only step when the user expects a physical action on their machine.
|
|
964
|
+
"open chrome", "close spotify", "increase volume" -> these REQUIRE the actual tool (see SYSTEM ACTION GATE).
|
|
965
|
+
Responding with "Done. I've opened Chrome." without calling app_launch is lying — never do it.
|
|
961
966
|
|
|
962
967
|
ACTION GATE RULES — apply BEFORE creating any plan:
|
|
963
968
|
1. CAPABILITY GATE: If message is "Can you do X?" / "Can you X?" / "Are you able to X?" → plan respond with answer
|
|
@@ -970,6 +975,28 @@ ACTION GATE RULES — apply BEFORE creating any plan:
|
|
|
970
975
|
4. NEVER create comparison tables, reports, or verdicts unless user explicitly asked for them
|
|
971
976
|
5. NEVER mention Pega, BlueWinston, Gaude Digital, or any third-party product by name
|
|
972
977
|
|
|
978
|
+
SYSTEM ACTION GATE — CRITICAL, apply before any system-action request:
|
|
979
|
+
ANY request to open, launch, start, close, kill, quit an app OR change system volume
|
|
980
|
+
MUST use the corresponding tool. Using 'respond' alone is WRONG for these.
|
|
981
|
+
|
|
982
|
+
Exact mappings (use these, do not improvise):
|
|
983
|
+
- "open chrome" / "launch chrome" / "open Google Chrome" → app_launch { app_name: "chrome" }
|
|
984
|
+
- "close chrome" / "kill chrome" / "quit chrome" → app_close { app_name: "chrome" }
|
|
985
|
+
- "open spotify" / "launch spotify" → app_launch { app_name: "spotify" }
|
|
986
|
+
- "close spotify" → app_close { app_name: "spotify" }
|
|
987
|
+
- "open discord" / "open VS Code" / "open notepad" → app_launch { app_name: "<name>" }
|
|
988
|
+
- "increase volume" / "volume up 20" / "turn up volume" → system_volume { volume: 20 }
|
|
989
|
+
- "decrease volume" / "volume down 10" → system_volume { volume: 10, direction: "down" }
|
|
990
|
+
- "mute" / "mute sound" → system_volume { mute: true }
|
|
991
|
+
- "unmute" → system_volume { unmute: true }
|
|
992
|
+
- "open file explorer" → app_launch { app_name: "explorer" }
|
|
993
|
+
|
|
994
|
+
WRONG (never do this for the above requests):
|
|
995
|
+
{ "tool": "respond", "input": { "message": "Done. I've opened Chrome." } } <- FAKE, LYING
|
|
996
|
+
|
|
997
|
+
CORRECT:
|
|
998
|
+
{ "tool": "app_launch", "input": { "app_name": "chrome" } } <- actually opens Chrome
|
|
999
|
+
|
|
973
1000
|
## SKILL DISCOVERY
|
|
974
1001
|
|
|
975
1002
|
Before planning any multi-step task (>=2 tools), call lookup_skill with the user's message as the query.
|
|
@@ -986,7 +1013,14 @@ TIER 1 (USE FIRST): lookup_skill, respond, web_search, fetch_page, fetch_url, de
|
|
|
986
1013
|
TIER 2 (USE SECOND): file_write, file_read, file_list, shell_exec, run_powershell, run_python, run_node, code_interpreter_python, code_interpreter_node, git_status, git_commit, git_push, clipboard_read, clipboard_write, spawn_subagent, swarm
|
|
987
1014
|
→ Use when you need to read/write files, run scripts, or run git commands
|
|
988
1015
|
|
|
989
|
-
TIER
|
|
1016
|
+
TIER 3a — SYSTEM ACTIONS (use whenever user asks for OS-level actions):
|
|
1017
|
+
app_launch, app_close, system_volume, window_focus, window_list
|
|
1018
|
+
→ USE IMMEDIATELY when user asks to open/close/launch/kill an app, change volume, or focus a window
|
|
1019
|
+
→ Do NOT substitute with respond — the user wants the ACTION to happen, not acknowledgment
|
|
1020
|
+
→ Do NOT use shell_exec as a substitute; app_launch/app_close are the correct tools
|
|
1021
|
+
|
|
1022
|
+
TIER 3b — BROWSER UI (use when task requires interacting with a website UI):
|
|
1023
|
+
open_browser, browser_click, browser_type, browser_extract, browser_screenshot
|
|
990
1024
|
→ ONLY when task requires interacting with a website UI
|
|
991
1025
|
→ NEVER use browser when an API tool can do the same job
|
|
992
1026
|
→ For other selectors always pass selector: "<css selector>", never guess at element text.
|
|
@@ -1168,11 +1202,19 @@ Output ONLY valid JSON, nothing else:`;
|
|
|
1168
1202
|
e.message?.includes('429') ||
|
|
1169
1203
|
e.message?.includes('rate') ||
|
|
1170
1204
|
e.message?.includes('aborted')) {
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1205
|
+
// Try next model within the same provider before marking whole entry rate-limited
|
|
1206
|
+
const nextModel = (0, modelRegistry_1.getNextModelOnFailure)(curProvider, curModel);
|
|
1207
|
+
if (nextModel) {
|
|
1208
|
+
console.log(`[Planner] Model ${curModel} failed — trying next model ${nextModel} on same provider (${curApiName})`);
|
|
1209
|
+
curModel = nextModel;
|
|
1210
|
+
}
|
|
1211
|
+
else {
|
|
1212
|
+
try {
|
|
1213
|
+
(0, router_1.markRateLimited)(curApiName);
|
|
1214
|
+
console.log(`[Planner] Marked ${curApiName} as rate limited — will rotate away`);
|
|
1215
|
+
}
|
|
1216
|
+
catch { }
|
|
1174
1217
|
}
|
|
1175
|
-
catch { }
|
|
1176
1218
|
}
|
|
1177
1219
|
}
|
|
1178
1220
|
// Wait before next attempt — helps with rate-limit recovery
|
|
@@ -1366,7 +1408,7 @@ const VALID_TOOLS = [
|
|
|
1366
1408
|
'screenshot', 'screen_read', 'vision_loop', 'wait',
|
|
1367
1409
|
'code_interpreter_python', 'code_interpreter_node',
|
|
1368
1410
|
'clipboard_read', 'clipboard_write', 'window_list', 'window_focus',
|
|
1369
|
-
'app_launch', 'app_close',
|
|
1411
|
+
'app_launch', 'app_close', 'system_volume',
|
|
1370
1412
|
'watch_folder', 'watch_folder_list',
|
|
1371
1413
|
'send_file_local', 'receive_file_local',
|
|
1372
1414
|
'clarify', 'todo', 'cronjob', 'vision_analyze',
|
|
@@ -1667,11 +1709,25 @@ const NO_RETRY_TOOLS = new Set([
|
|
|
1667
1709
|
async function executeToolWithRetry(tool, input, maxRetries = 2) {
|
|
1668
1710
|
const retryable = !NO_RETRY_TOOLS.has(tool);
|
|
1669
1711
|
const effectiveMax = retryable ? maxRetries : 0;
|
|
1712
|
+
// ── Plugin preTool hooks ──────────────────────────────────────
|
|
1713
|
+
let effectiveInput = input;
|
|
1714
|
+
for (const hook of pluginLoader_1.pluginHooks.preTool) {
|
|
1715
|
+
try {
|
|
1716
|
+
const r = await hook(tool, effectiveInput);
|
|
1717
|
+
if (r.skip)
|
|
1718
|
+
return { success: true, output: '[skipped by plugin]', skippedByPlugin: true };
|
|
1719
|
+
if (r.input)
|
|
1720
|
+
effectiveInput = r.input;
|
|
1721
|
+
}
|
|
1722
|
+
catch (e) {
|
|
1723
|
+
console.warn(`[PluginHook] preTool error for ${tool}:`, e.message);
|
|
1724
|
+
}
|
|
1725
|
+
}
|
|
1670
1726
|
for (let attempt = 0; attempt <= effectiveMax; attempt++) {
|
|
1671
1727
|
try {
|
|
1672
|
-
const result = await (0, toolRegistry_1.executeTool)(tool,
|
|
1728
|
+
const result = await (0, toolRegistry_1.executeTool)(tool, effectiveInput);
|
|
1673
1729
|
if (result.success) {
|
|
1674
|
-
const quality = validateResultQuality(tool,
|
|
1730
|
+
const quality = validateResultQuality(tool, effectiveInput, result.output || result);
|
|
1675
1731
|
if (!quality.valid) {
|
|
1676
1732
|
console.log(`[Quality] ${tool} returned but quality check failed: ${quality.reason}`);
|
|
1677
1733
|
if (attempt < effectiveMax) {
|
|
@@ -1683,7 +1739,19 @@ async function executeToolWithRetry(tool, input, maxRetries = 2) {
|
|
|
1683
1739
|
console.log(`[Quality] ${tool} — accepting low-quality result after ${effectiveMax} retries`);
|
|
1684
1740
|
appendLesson(`${tool} produced low-quality output (${quality.reason}) after ${effectiveMax} retries — consider alternative approach for this tool.`);
|
|
1685
1741
|
}
|
|
1686
|
-
|
|
1742
|
+
// ── Plugin postTool hooks ─────────────────────────────
|
|
1743
|
+
let finalResult = result;
|
|
1744
|
+
for (const hook of pluginLoader_1.pluginHooks.postTool) {
|
|
1745
|
+
try {
|
|
1746
|
+
const r = await hook(tool, effectiveInput, finalResult);
|
|
1747
|
+
if (r.result)
|
|
1748
|
+
finalResult = r.result;
|
|
1749
|
+
}
|
|
1750
|
+
catch (e) {
|
|
1751
|
+
console.warn(`[PluginHook] postTool error for ${tool}:`, e.message);
|
|
1752
|
+
}
|
|
1753
|
+
}
|
|
1754
|
+
return finalResult;
|
|
1687
1755
|
}
|
|
1688
1756
|
if (attempt < effectiveMax) {
|
|
1689
1757
|
const delay = Math.min(1000 * Math.pow(2, attempt), 5000);
|
|
@@ -1720,7 +1788,7 @@ const SEQUENTIAL_ONLY = new Set([
|
|
|
1720
1788
|
'open_browser', 'browser_click', 'browser_type', 'browser_extract',
|
|
1721
1789
|
'mouse_move', 'mouse_click', 'keyboard_type', 'keyboard_press',
|
|
1722
1790
|
'screenshot', 'screen_read', 'vision_loop', 'notify', 'wait',
|
|
1723
|
-
'clipboard_write', 'window_focus', 'app_launch', 'app_close',
|
|
1791
|
+
'clipboard_write', 'window_focus', 'app_launch', 'app_close', 'system_volume',
|
|
1724
1792
|
'watch_folder',
|
|
1725
1793
|
]);
|
|
1726
1794
|
function buildDependencyGroups(steps) {
|
|
@@ -1791,6 +1859,7 @@ async function executePlan(plan, onStep, onPhaseChange, existingState, replanApi
|
|
|
1791
1859
|
clipboard_read: 'execution', clipboard_write: 'execution',
|
|
1792
1860
|
window_list: 'execution', window_focus: 'execution',
|
|
1793
1861
|
app_launch: 'execution', app_close: 'execution',
|
|
1862
|
+
system_volume: 'execution',
|
|
1794
1863
|
watch_folder: 'execution', watch_folder_list: 'execution',
|
|
1795
1864
|
};
|
|
1796
1865
|
let lastCapability = '';
|
|
@@ -2198,6 +2267,18 @@ function responderSystem(userName, date) {
|
|
|
2198
2267
|
return (0, aidenPersonality_1.AIDEN_RESPONDER_SYSTEM)(userName, date);
|
|
2199
2268
|
}
|
|
2200
2269
|
async function respondWithResults(originalMessage, plan, results, history, userName, apiKey, model, providerName, onToken, sessionId, goals) {
|
|
2270
|
+
// ── CommandGate / PermissionGate short-circuit ───────────────
|
|
2271
|
+
// If ANY tool was blocked with an approval gate, stream the
|
|
2272
|
+
// approval question directly — never let the LLM hallucinate "Done".
|
|
2273
|
+
const gatedResult = results.find(r => !r.success && r.error &&
|
|
2274
|
+
(r.error.startsWith('CommandGate:') || r.error.startsWith('PermissionGate:')));
|
|
2275
|
+
if (gatedResult) {
|
|
2276
|
+
const blocked = gatedResult.error
|
|
2277
|
+
.replace(/^(CommandGate|PermissionGate):\s*/i, '')
|
|
2278
|
+
.replace(/:\s*$/, '');
|
|
2279
|
+
onToken(`I need your approval before I can do that.\n\n**Blocked action:** ${blocked}\n\nReply **yes** to confirm, or tell me what you'd like instead.`);
|
|
2280
|
+
return;
|
|
2281
|
+
}
|
|
2201
2282
|
const date = new Date().toLocaleDateString('en-US', {
|
|
2202
2283
|
weekday: 'long', month: 'long', day: 'numeric', year: 'numeric',
|
|
2203
2284
|
});
|
|
@@ -134,7 +134,7 @@ class EmailAdapter {
|
|
|
134
134
|
host: this.imapHost,
|
|
135
135
|
port: this.imapPort,
|
|
136
136
|
tls: true,
|
|
137
|
-
tlsOptions: { rejectUnauthorized: false },
|
|
137
|
+
tlsOptions: { rejectUnauthorized: false }, // user-configured IMAP server may use self-signed cert
|
|
138
138
|
user: this.imapUser,
|
|
139
139
|
password: this.imapPassword,
|
|
140
140
|
authTimeout: 5000,
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* core/modelRegistry.ts
|
|
4
|
+
* Curated list of best free/cheap models per provider.
|
|
5
|
+
* Updated manually — not auto-discovered (keeps things simple and predictable).
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* getDefaultModel('groq') → 'llama-3.3-70b-versatile'
|
|
9
|
+
* getNextModelOnFailure('groq', 'llama-3.3-70b-versatile') → 'llama-3.1-70b-versatile'
|
|
10
|
+
* getRegistryEntry('groq', 'llama-3.3-70b-versatile') → ModelConfig | undefined
|
|
11
|
+
*/
|
|
12
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
13
|
+
exports.MODEL_REGISTRY = void 0;
|
|
14
|
+
exports.getDefaultModel = getDefaultModel;
|
|
15
|
+
exports.getNextModelOnFailure = getNextModelOnFailure;
|
|
16
|
+
exports.getRegistryEntry = getRegistryEntry;
|
|
17
|
+
exports.getModelsForProvider = getModelsForProvider;
|
|
18
|
+
/**
|
|
19
|
+
* Ordered by preference — first entry is the default.
|
|
20
|
+
* Free models come before paid unless quality difference is large.
|
|
21
|
+
* Env var override: set ${PROVIDER_UPPER}_MODEL to force a specific model.
|
|
22
|
+
* e.g. GROQ_MODEL=mixtral-8x7b-32768 overrides groq default
|
|
23
|
+
*/
|
|
24
|
+
exports.MODEL_REGISTRY = {
|
|
25
|
+
groq: [
|
|
26
|
+
{
|
|
27
|
+
id: 'llama-3.3-70b-versatile',
|
|
28
|
+
contextWindow: 128000,
|
|
29
|
+
pricing: 'free',
|
|
30
|
+
quality: 'high',
|
|
31
|
+
speed: 'fast',
|
|
32
|
+
notes: 'Primary — fastest + highest quality free tier',
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
id: 'llama-3.1-70b-versatile',
|
|
36
|
+
contextWindow: 128000,
|
|
37
|
+
pricing: 'free',
|
|
38
|
+
quality: 'high',
|
|
39
|
+
speed: 'fast',
|
|
40
|
+
notes: 'Fallback when 3.3 is rate-limited',
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
id: 'llama3-70b-8192',
|
|
44
|
+
contextWindow: 8192,
|
|
45
|
+
pricing: 'free',
|
|
46
|
+
quality: 'high',
|
|
47
|
+
speed: 'fast',
|
|
48
|
+
notes: 'Smaller context but very reliable',
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
id: 'mixtral-8x7b-32768',
|
|
52
|
+
contextWindow: 32768,
|
|
53
|
+
pricing: 'free',
|
|
54
|
+
quality: 'medium',
|
|
55
|
+
speed: 'fast',
|
|
56
|
+
notes: 'Good for structured JSON tasks',
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
id: 'gemma2-9b-it',
|
|
60
|
+
contextWindow: 8192,
|
|
61
|
+
pricing: 'free',
|
|
62
|
+
quality: 'medium',
|
|
63
|
+
speed: 'fast',
|
|
64
|
+
notes: 'Light fallback',
|
|
65
|
+
},
|
|
66
|
+
],
|
|
67
|
+
openrouter: [
|
|
68
|
+
{
|
|
69
|
+
id: 'meta-llama/llama-3.3-70b-instruct:free',
|
|
70
|
+
contextWindow: 131072,
|
|
71
|
+
pricing: 'free',
|
|
72
|
+
quality: 'high',
|
|
73
|
+
speed: 'medium',
|
|
74
|
+
notes: 'Best free model on OpenRouter',
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
id: 'meta-llama/llama-3.1-70b-instruct:free',
|
|
78
|
+
contextWindow: 131072,
|
|
79
|
+
pricing: 'free',
|
|
80
|
+
quality: 'high',
|
|
81
|
+
speed: 'medium',
|
|
82
|
+
notes: 'Reliable free fallback',
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
id: 'mistralai/mistral-7b-instruct:free',
|
|
86
|
+
contextWindow: 32768,
|
|
87
|
+
pricing: 'free',
|
|
88
|
+
quality: 'medium',
|
|
89
|
+
speed: 'fast',
|
|
90
|
+
notes: 'Fast small model for simple tasks',
|
|
91
|
+
},
|
|
92
|
+
{
|
|
93
|
+
id: 'google/gemma-2-9b-it:free',
|
|
94
|
+
contextWindow: 8192,
|
|
95
|
+
pricing: 'free',
|
|
96
|
+
quality: 'medium',
|
|
97
|
+
speed: 'fast',
|
|
98
|
+
notes: 'Emergency fallback',
|
|
99
|
+
},
|
|
100
|
+
],
|
|
101
|
+
together: [
|
|
102
|
+
{
|
|
103
|
+
id: 'meta-llama/llama-3.1-405b-instruct',
|
|
104
|
+
contextWindow: 130000,
|
|
105
|
+
pricing: 'paid',
|
|
106
|
+
quality: 'high',
|
|
107
|
+
speed: 'medium',
|
|
108
|
+
notes: '405B — highest quality, use sparingly ($5 credit)',
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
id: 'meta-llama/llama-3.3-70b-instruct-turbo',
|
|
112
|
+
contextWindow: 131072,
|
|
113
|
+
pricing: 'paid',
|
|
114
|
+
quality: 'high',
|
|
115
|
+
speed: 'fast',
|
|
116
|
+
notes: 'Faster cheaper Together option',
|
|
117
|
+
},
|
|
118
|
+
{
|
|
119
|
+
id: 'meta-llama/llama-3.1-70b-instruct-turbo',
|
|
120
|
+
contextWindow: 131072,
|
|
121
|
+
pricing: 'paid',
|
|
122
|
+
quality: 'high',
|
|
123
|
+
speed: 'fast',
|
|
124
|
+
notes: 'Fallback paid',
|
|
125
|
+
},
|
|
126
|
+
],
|
|
127
|
+
nvidia: [
|
|
128
|
+
{
|
|
129
|
+
id: 'nvidia/llama-3.3-nemotron-super-49b-v1',
|
|
130
|
+
contextWindow: 131072,
|
|
131
|
+
pricing: 'free',
|
|
132
|
+
quality: 'high',
|
|
133
|
+
speed: 'medium',
|
|
134
|
+
notes: 'NVIDIA NIM — high quality free inference',
|
|
135
|
+
},
|
|
136
|
+
{
|
|
137
|
+
id: 'meta/llama-3.3-70b-instruct',
|
|
138
|
+
contextWindow: 131072,
|
|
139
|
+
pricing: 'free',
|
|
140
|
+
quality: 'high',
|
|
141
|
+
speed: 'medium',
|
|
142
|
+
notes: 'NVIDIA-hosted Llama fallback',
|
|
143
|
+
},
|
|
144
|
+
{
|
|
145
|
+
id: 'mistralai/mixtral-8x7b-instruct-v0.1',
|
|
146
|
+
contextWindow: 32768,
|
|
147
|
+
pricing: 'free',
|
|
148
|
+
quality: 'medium',
|
|
149
|
+
speed: 'fast',
|
|
150
|
+
notes: 'Lightweight NVIDIA fallback',
|
|
151
|
+
},
|
|
152
|
+
],
|
|
153
|
+
gemini: [
|
|
154
|
+
{
|
|
155
|
+
id: 'gemini-2.5-flash',
|
|
156
|
+
contextWindow: 1000000,
|
|
157
|
+
pricing: 'free',
|
|
158
|
+
quality: 'high',
|
|
159
|
+
speed: 'fast',
|
|
160
|
+
notes: '1M context, thinking model, best free Gemini',
|
|
161
|
+
},
|
|
162
|
+
{
|
|
163
|
+
id: 'gemini-2.0-flash',
|
|
164
|
+
contextWindow: 1000000,
|
|
165
|
+
pricing: 'free',
|
|
166
|
+
quality: 'high',
|
|
167
|
+
speed: 'fast',
|
|
168
|
+
notes: 'Stable previous gen, good fallback',
|
|
169
|
+
},
|
|
170
|
+
{
|
|
171
|
+
id: 'gemini-1.5-flash',
|
|
172
|
+
contextWindow: 1000000,
|
|
173
|
+
pricing: 'free',
|
|
174
|
+
quality: 'medium',
|
|
175
|
+
speed: 'fast',
|
|
176
|
+
notes: 'Conservative fallback if 2.x rate-limited',
|
|
177
|
+
},
|
|
178
|
+
{
|
|
179
|
+
id: 'gemini-1.5-flash-8b',
|
|
180
|
+
contextWindow: 1000000,
|
|
181
|
+
pricing: 'free',
|
|
182
|
+
quality: 'low',
|
|
183
|
+
speed: 'fast',
|
|
184
|
+
notes: 'Emergency fallback — smallest Gemini',
|
|
185
|
+
},
|
|
186
|
+
],
|
|
187
|
+
ollama: [
|
|
188
|
+
{
|
|
189
|
+
id: 'gemma4:e4b',
|
|
190
|
+
contextWindow: 8192,
|
|
191
|
+
pricing: 'free',
|
|
192
|
+
quality: 'medium',
|
|
193
|
+
speed: 'medium',
|
|
194
|
+
notes: 'Local default — requires GTX 1060 VRAM',
|
|
195
|
+
},
|
|
196
|
+
{
|
|
197
|
+
id: 'qwen2.5-coder:7b',
|
|
198
|
+
contextWindow: 32768,
|
|
199
|
+
pricing: 'free',
|
|
200
|
+
quality: 'medium',
|
|
201
|
+
speed: 'medium',
|
|
202
|
+
notes: 'Local coder model',
|
|
203
|
+
},
|
|
204
|
+
{
|
|
205
|
+
id: 'llama3.2:latest',
|
|
206
|
+
contextWindow: 128000,
|
|
207
|
+
pricing: 'free',
|
|
208
|
+
quality: 'medium',
|
|
209
|
+
speed: 'fast',
|
|
210
|
+
notes: 'Local fast model',
|
|
211
|
+
},
|
|
212
|
+
],
|
|
213
|
+
};
|
|
214
|
+
/**
|
|
215
|
+
* Returns the default model ID for a provider.
|
|
216
|
+
* Env var ${PROVIDER_UPPER}_MODEL overrides the registry default.
|
|
217
|
+
*
|
|
218
|
+
* e.g. GROQ_MODEL=mixtral-8x7b-32768 → uses that instead
|
|
219
|
+
*/
|
|
220
|
+
function getDefaultModel(provider) {
|
|
221
|
+
const envKey = `${provider.toUpperCase()}_MODEL`;
|
|
222
|
+
const envOverride = process.env[envKey];
|
|
223
|
+
if (envOverride)
|
|
224
|
+
return envOverride;
|
|
225
|
+
const models = exports.MODEL_REGISTRY[provider.toLowerCase()];
|
|
226
|
+
if (!models || models.length === 0)
|
|
227
|
+
return '';
|
|
228
|
+
return models[0].id;
|
|
229
|
+
}
|
|
230
|
+
/**
|
|
231
|
+
* Returns the next model to try after currentModel fails (rate-limited / error).
|
|
232
|
+
* Returns null if currentModel is already the last in the list — caller should
|
|
233
|
+
* then mark the whole provider entry rate-limited and rotate to next provider.
|
|
234
|
+
*/
|
|
235
|
+
function getNextModelOnFailure(provider, currentModel) {
|
|
236
|
+
const models = exports.MODEL_REGISTRY[provider.toLowerCase()];
|
|
237
|
+
if (!models || models.length === 0)
|
|
238
|
+
return null;
|
|
239
|
+
const idx = models.findIndex(m => m.id === currentModel);
|
|
240
|
+
if (idx === -1 || idx >= models.length - 1)
|
|
241
|
+
return null;
|
|
242
|
+
return models[idx + 1].id;
|
|
243
|
+
}
|
|
244
|
+
/**
|
|
245
|
+
* Returns the ModelConfig for a specific provider + model id.
|
|
246
|
+
*/
|
|
247
|
+
function getRegistryEntry(provider, modelId) {
|
|
248
|
+
const models = exports.MODEL_REGISTRY[provider.toLowerCase()];
|
|
249
|
+
if (!models)
|
|
250
|
+
return undefined;
|
|
251
|
+
return models.find(m => m.id === modelId);
|
|
252
|
+
}
|
|
253
|
+
/**
|
|
254
|
+
* Returns all models for a provider, optionally filtered by pricing tier.
|
|
255
|
+
*/
|
|
256
|
+
function getModelsForProvider(provider, filter) {
|
|
257
|
+
const models = exports.MODEL_REGISTRY[provider.toLowerCase()] ?? [];
|
|
258
|
+
if (!filter)
|
|
259
|
+
return models;
|
|
260
|
+
return models.filter(m => !filter.pricing || m.pricing === filter.pricing);
|
|
261
|
+
}
|