aiden-runtime 3.16.2 → 3.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -66,6 +66,7 @@ const entityGraph_1 = require("./entityGraph");
66
66
  const learningMemory_1 = require("./learningMemory");
67
67
  const conversationMemory_1 = require("./conversationMemory");
68
68
  const router_1 = require("../providers/router");
69
+ const modelRegistry_1 = require("./modelRegistry");
69
70
  const index_1 = require("../providers/index");
70
71
  const knowledgeBase_1 = require("./knowledgeBase");
71
72
  const skillTeacher_1 = require("./skillTeacher");
@@ -80,6 +81,7 @@ const semanticMemory_1 = require("./semanticMemory");
80
81
  const sessionMemory_1 = require("./sessionMemory");
81
82
  const goalTracker_1 = require("./goalTracker");
82
83
  const hooks_1 = require("./hooks");
84
+ const pluginLoader_1 = require("./pluginLoader");
83
85
  const instinctSystem_1 = require("./instinctSystem");
84
86
  const workflowTracker_1 = require("./workflowTracker");
85
87
  const parallelExecutor_1 = require("./parallelExecutor");
@@ -427,6 +429,7 @@ function inferPhasesFromSteps(steps) {
427
429
  clipboard_read: 'execution', clipboard_write: 'execution',
428
430
  window_list: 'execution', window_focus: 'execution',
429
431
  app_launch: 'execution', app_close: 'execution',
432
+ system_volume: 'execution',
430
433
  watch_folder: 'execution', watch_folder_list: 'execution',
431
434
  };
432
435
  const phaseNames = {
@@ -717,7 +720,7 @@ async function planWithLLM(message, history, apiKey, model, provider, memoryCont
717
720
  'screenshot', 'screen_read', 'vision_loop', 'wait',
718
721
  'code_interpreter_python', 'code_interpreter_node',
719
722
  'clipboard_read', 'clipboard_write', 'window_list', 'window_focus',
720
- 'app_launch', 'app_close',
723
+ 'app_launch', 'app_close', 'system_volume',
721
724
  'watch_folder', 'watch_folder_list',
722
725
  'send_file_local', 'receive_file_local',
723
726
  'get_briefing',
@@ -953,11 +956,13 @@ If requires_execution is false:
953
956
 
954
957
  NOTE: "goals" is only required when 2+ distinct intents are present. Single-goal messages may omit it.
955
958
 
956
- THE 'respond' TOOL — use this for ALL conversational messages:
957
- - 'respond' is ALWAYS a valid plan. When no external tool is needed, plan a single respond step.
959
+ THE 'respond' TOOL — use this for conversational messages ONLY:
958
960
  - respond: { "message": "your answer text here" }
959
961
  - Use respond for: greetings, capability questions, simple facts from training data, clarifying questions, short answers.
960
962
  - Example: user says "hi" → { "goal": "hi", "requires_execution": true, "plan": [{ "step": 1, "tool": "respond", "input": { "message": "Hi! What can I help you with today?" } }] }
963
+ - NEVER use respond as the only step when the user expects a physical action on their machine.
964
+ "open chrome", "close spotify", "increase volume" -> these REQUIRE the actual tool (see SYSTEM ACTION GATE).
965
+ Responding with "Done. I've opened Chrome." without calling app_launch is lying — never do it.
961
966
 
962
967
  ACTION GATE RULES — apply BEFORE creating any plan:
963
968
  1. CAPABILITY GATE: If message is "Can you do X?" / "Can you X?" / "Are you able to X?" → plan respond with answer
@@ -970,6 +975,28 @@ ACTION GATE RULES — apply BEFORE creating any plan:
970
975
  4. NEVER create comparison tables, reports, or verdicts unless user explicitly asked for them
971
976
  5. NEVER mention Pega, BlueWinston, Gaude Digital, or any third-party product by name
972
977
 
978
+ SYSTEM ACTION GATE — CRITICAL, apply before any system-action request:
979
+ ANY request to open, launch, start, close, kill, quit an app OR change system volume
980
+ MUST use the corresponding tool. Using 'respond' alone is WRONG for these.
981
+
982
+ Exact mappings (use these, do not improvise):
983
+ - "open chrome" / "launch chrome" / "open Google Chrome" → app_launch { app_name: "chrome" }
984
+ - "close chrome" / "kill chrome" / "quit chrome" → app_close { app_name: "chrome" }
985
+ - "open spotify" / "launch spotify" → app_launch { app_name: "spotify" }
986
+ - "close spotify" → app_close { app_name: "spotify" }
987
+ - "open discord" / "open VS Code" / "open notepad" → app_launch { app_name: "<name>" }
988
+ - "increase volume" / "volume up 20" / "turn up volume" → system_volume { volume: 20 }
989
+ - "decrease volume" / "volume down 10" → system_volume { volume: 10, direction: "down" }
990
+ - "mute" / "mute sound" → system_volume { mute: true }
991
+ - "unmute" → system_volume { unmute: true }
992
+ - "open file explorer" → app_launch { app_name: "explorer" }
993
+
994
+ WRONG (never do this for the above requests):
995
+ { "tool": "respond", "input": { "message": "Done. I've opened Chrome." } } <- FAKE, LYING
996
+
997
+ CORRECT:
998
+ { "tool": "app_launch", "input": { "app_name": "chrome" } } <- actually opens Chrome
999
+
973
1000
  ## SKILL DISCOVERY
974
1001
 
975
1002
  Before planning any multi-step task (>=2 tools), call lookup_skill with the user's message as the query.
@@ -986,7 +1013,14 @@ TIER 1 (USE FIRST): lookup_skill, respond, web_search, fetch_page, fetch_url, de
986
1013
  TIER 2 (USE SECOND): file_write, file_read, file_list, shell_exec, run_powershell, run_python, run_node, code_interpreter_python, code_interpreter_node, git_status, git_commit, git_push, clipboard_read, clipboard_write, spawn_subagent, swarm
987
1014
  → Use when you need to read/write files, run scripts, or run git commands
988
1015
 
989
- TIER 3 (USE THIRD): open_browser, browser_click, browser_type, browser_extract, browser_screenshot, window_list, window_focus, app_launch, app_close
1016
+ TIER 3a SYSTEM ACTIONS (use whenever user asks for OS-level actions):
1017
+ app_launch, app_close, system_volume, window_focus, window_list
1018
+ → USE IMMEDIATELY when user asks to open/close/launch/kill an app, change volume, or focus a window
1019
+ → Do NOT substitute with respond — the user wants the ACTION to happen, not acknowledgment
1020
+ → Do NOT use shell_exec as a substitute; app_launch/app_close are the correct tools
1021
+
1022
+ TIER 3b — BROWSER UI (use when task requires interacting with a website UI):
1023
+ open_browser, browser_click, browser_type, browser_extract, browser_screenshot
990
1024
  → ONLY when task requires interacting with a website UI
991
1025
  → NEVER use browser when an API tool can do the same job
992
1026
  → For other selectors always pass selector: "<css selector>", never guess at element text.
@@ -1168,11 +1202,19 @@ Output ONLY valid JSON, nothing else:`;
1168
1202
  e.message?.includes('429') ||
1169
1203
  e.message?.includes('rate') ||
1170
1204
  e.message?.includes('aborted')) {
1171
- try {
1172
- (0, router_1.markRateLimited)(curApiName);
1173
- console.log(`[Planner] Marked ${curApiName} as rate limited — will rotate away`);
1205
+ // Try next model within the same provider before marking whole entry rate-limited
1206
+ const nextModel = (0, modelRegistry_1.getNextModelOnFailure)(curProvider, curModel);
1207
+ if (nextModel) {
1208
+ console.log(`[Planner] Model ${curModel} failed — trying next model ${nextModel} on same provider (${curApiName})`);
1209
+ curModel = nextModel;
1210
+ }
1211
+ else {
1212
+ try {
1213
+ (0, router_1.markRateLimited)(curApiName);
1214
+ console.log(`[Planner] Marked ${curApiName} as rate limited — will rotate away`);
1215
+ }
1216
+ catch { }
1174
1217
  }
1175
- catch { }
1176
1218
  }
1177
1219
  }
1178
1220
  // Wait before next attempt — helps with rate-limit recovery
@@ -1366,7 +1408,7 @@ const VALID_TOOLS = [
1366
1408
  'screenshot', 'screen_read', 'vision_loop', 'wait',
1367
1409
  'code_interpreter_python', 'code_interpreter_node',
1368
1410
  'clipboard_read', 'clipboard_write', 'window_list', 'window_focus',
1369
- 'app_launch', 'app_close',
1411
+ 'app_launch', 'app_close', 'system_volume',
1370
1412
  'watch_folder', 'watch_folder_list',
1371
1413
  'send_file_local', 'receive_file_local',
1372
1414
  'clarify', 'todo', 'cronjob', 'vision_analyze',
@@ -1667,11 +1709,25 @@ const NO_RETRY_TOOLS = new Set([
1667
1709
  async function executeToolWithRetry(tool, input, maxRetries = 2) {
1668
1710
  const retryable = !NO_RETRY_TOOLS.has(tool);
1669
1711
  const effectiveMax = retryable ? maxRetries : 0;
1712
+ // ── Plugin preTool hooks ──────────────────────────────────────
1713
+ let effectiveInput = input;
1714
+ for (const hook of pluginLoader_1.pluginHooks.preTool) {
1715
+ try {
1716
+ const r = await hook(tool, effectiveInput);
1717
+ if (r.skip)
1718
+ return { success: true, output: '[skipped by plugin]', skippedByPlugin: true };
1719
+ if (r.input)
1720
+ effectiveInput = r.input;
1721
+ }
1722
+ catch (e) {
1723
+ console.warn(`[PluginHook] preTool error for ${tool}:`, e.message);
1724
+ }
1725
+ }
1670
1726
  for (let attempt = 0; attempt <= effectiveMax; attempt++) {
1671
1727
  try {
1672
- const result = await (0, toolRegistry_1.executeTool)(tool, input);
1728
+ const result = await (0, toolRegistry_1.executeTool)(tool, effectiveInput);
1673
1729
  if (result.success) {
1674
- const quality = validateResultQuality(tool, input, result.output || result);
1730
+ const quality = validateResultQuality(tool, effectiveInput, result.output || result);
1675
1731
  if (!quality.valid) {
1676
1732
  console.log(`[Quality] ${tool} returned but quality check failed: ${quality.reason}`);
1677
1733
  if (attempt < effectiveMax) {
@@ -1683,7 +1739,19 @@ async function executeToolWithRetry(tool, input, maxRetries = 2) {
1683
1739
  console.log(`[Quality] ${tool} — accepting low-quality result after ${effectiveMax} retries`);
1684
1740
  appendLesson(`${tool} produced low-quality output (${quality.reason}) after ${effectiveMax} retries — consider alternative approach for this tool.`);
1685
1741
  }
1686
- return result;
1742
+ // ── Plugin postTool hooks ─────────────────────────────
1743
+ let finalResult = result;
1744
+ for (const hook of pluginLoader_1.pluginHooks.postTool) {
1745
+ try {
1746
+ const r = await hook(tool, effectiveInput, finalResult);
1747
+ if (r.result)
1748
+ finalResult = r.result;
1749
+ }
1750
+ catch (e) {
1751
+ console.warn(`[PluginHook] postTool error for ${tool}:`, e.message);
1752
+ }
1753
+ }
1754
+ return finalResult;
1687
1755
  }
1688
1756
  if (attempt < effectiveMax) {
1689
1757
  const delay = Math.min(1000 * Math.pow(2, attempt), 5000);
@@ -1720,7 +1788,7 @@ const SEQUENTIAL_ONLY = new Set([
1720
1788
  'open_browser', 'browser_click', 'browser_type', 'browser_extract',
1721
1789
  'mouse_move', 'mouse_click', 'keyboard_type', 'keyboard_press',
1722
1790
  'screenshot', 'screen_read', 'vision_loop', 'notify', 'wait',
1723
- 'clipboard_write', 'window_focus', 'app_launch', 'app_close',
1791
+ 'clipboard_write', 'window_focus', 'app_launch', 'app_close', 'system_volume',
1724
1792
  'watch_folder',
1725
1793
  ]);
1726
1794
  function buildDependencyGroups(steps) {
@@ -1791,6 +1859,7 @@ async function executePlan(plan, onStep, onPhaseChange, existingState, replanApi
1791
1859
  clipboard_read: 'execution', clipboard_write: 'execution',
1792
1860
  window_list: 'execution', window_focus: 'execution',
1793
1861
  app_launch: 'execution', app_close: 'execution',
1862
+ system_volume: 'execution',
1794
1863
  watch_folder: 'execution', watch_folder_list: 'execution',
1795
1864
  };
1796
1865
  let lastCapability = '';
@@ -2198,6 +2267,18 @@ function responderSystem(userName, date) {
2198
2267
  return (0, aidenPersonality_1.AIDEN_RESPONDER_SYSTEM)(userName, date);
2199
2268
  }
2200
2269
  async function respondWithResults(originalMessage, plan, results, history, userName, apiKey, model, providerName, onToken, sessionId, goals) {
2270
+ // ── CommandGate / PermissionGate short-circuit ───────────────
2271
+ // If ANY tool was blocked with an approval gate, stream the
2272
+ // approval question directly — never let the LLM hallucinate "Done".
2273
+ const gatedResult = results.find(r => !r.success && r.error &&
2274
+ (r.error.startsWith('CommandGate:') || r.error.startsWith('PermissionGate:')));
2275
+ if (gatedResult) {
2276
+ const blocked = gatedResult.error
2277
+ .replace(/^(CommandGate|PermissionGate):\s*/i, '')
2278
+ .replace(/:\s*$/, '');
2279
+ onToken(`I need your approval before I can do that.\n\n**Blocked action:** ${blocked}\n\nReply **yes** to confirm, or tell me what you'd like instead.`);
2280
+ return;
2281
+ }
2201
2282
  const date = new Date().toLocaleDateString('en-US', {
2202
2283
  weekday: 'long', month: 'long', day: 'numeric', year: 'numeric',
2203
2284
  });
@@ -134,7 +134,7 @@ class EmailAdapter {
134
134
  host: this.imapHost,
135
135
  port: this.imapPort,
136
136
  tls: true,
137
- tlsOptions: { rejectUnauthorized: false },
137
+ tlsOptions: { rejectUnauthorized: false }, // user-configured IMAP server may use self-signed cert
138
138
  user: this.imapUser,
139
139
  password: this.imapPassword,
140
140
  authTimeout: 5000,
@@ -0,0 +1,261 @@
1
+ "use strict";
2
+ /**
3
+ * core/modelRegistry.ts
4
+ * Curated list of best free/cheap models per provider.
5
+ * Updated manually — not auto-discovered (keeps things simple and predictable).
6
+ *
7
+ * Usage:
8
+ * getDefaultModel('groq') → 'llama-3.3-70b-versatile'
9
+ * getNextModelOnFailure('groq', 'llama-3.3-70b-versatile') → 'llama-3.1-70b-versatile'
10
+ * getRegistryEntry('groq', 'llama-3.3-70b-versatile') → ModelConfig | undefined
11
+ */
12
+ Object.defineProperty(exports, "__esModule", { value: true });
13
+ exports.MODEL_REGISTRY = void 0;
14
+ exports.getDefaultModel = getDefaultModel;
15
+ exports.getNextModelOnFailure = getNextModelOnFailure;
16
+ exports.getRegistryEntry = getRegistryEntry;
17
+ exports.getModelsForProvider = getModelsForProvider;
18
+ /**
19
+ * Ordered by preference — first entry is the default.
20
+ * Free models come before paid unless quality difference is large.
21
+ * Env var override: set ${PROVIDER_UPPER}_MODEL to force a specific model.
22
+ * e.g. GROQ_MODEL=mixtral-8x7b-32768 overrides groq default
23
+ */
24
+ exports.MODEL_REGISTRY = {
25
+ groq: [
26
+ {
27
+ id: 'llama-3.3-70b-versatile',
28
+ contextWindow: 128000,
29
+ pricing: 'free',
30
+ quality: 'high',
31
+ speed: 'fast',
32
+ notes: 'Primary — fastest + highest quality free tier',
33
+ },
34
+ {
35
+ id: 'llama-3.1-70b-versatile',
36
+ contextWindow: 128000,
37
+ pricing: 'free',
38
+ quality: 'high',
39
+ speed: 'fast',
40
+ notes: 'Fallback when 3.3 is rate-limited',
41
+ },
42
+ {
43
+ id: 'llama3-70b-8192',
44
+ contextWindow: 8192,
45
+ pricing: 'free',
46
+ quality: 'high',
47
+ speed: 'fast',
48
+ notes: 'Smaller context but very reliable',
49
+ },
50
+ {
51
+ id: 'mixtral-8x7b-32768',
52
+ contextWindow: 32768,
53
+ pricing: 'free',
54
+ quality: 'medium',
55
+ speed: 'fast',
56
+ notes: 'Good for structured JSON tasks',
57
+ },
58
+ {
59
+ id: 'gemma2-9b-it',
60
+ contextWindow: 8192,
61
+ pricing: 'free',
62
+ quality: 'medium',
63
+ speed: 'fast',
64
+ notes: 'Light fallback',
65
+ },
66
+ ],
67
+ openrouter: [
68
+ {
69
+ id: 'meta-llama/llama-3.3-70b-instruct:free',
70
+ contextWindow: 131072,
71
+ pricing: 'free',
72
+ quality: 'high',
73
+ speed: 'medium',
74
+ notes: 'Best free model on OpenRouter',
75
+ },
76
+ {
77
+ id: 'meta-llama/llama-3.1-70b-instruct:free',
78
+ contextWindow: 131072,
79
+ pricing: 'free',
80
+ quality: 'high',
81
+ speed: 'medium',
82
+ notes: 'Reliable free fallback',
83
+ },
84
+ {
85
+ id: 'mistralai/mistral-7b-instruct:free',
86
+ contextWindow: 32768,
87
+ pricing: 'free',
88
+ quality: 'medium',
89
+ speed: 'fast',
90
+ notes: 'Fast small model for simple tasks',
91
+ },
92
+ {
93
+ id: 'google/gemma-2-9b-it:free',
94
+ contextWindow: 8192,
95
+ pricing: 'free',
96
+ quality: 'medium',
97
+ speed: 'fast',
98
+ notes: 'Emergency fallback',
99
+ },
100
+ ],
101
+ together: [
102
+ {
103
+ id: 'meta-llama/llama-3.1-405b-instruct',
104
+ contextWindow: 130000,
105
+ pricing: 'paid',
106
+ quality: 'high',
107
+ speed: 'medium',
108
+ notes: '405B — highest quality, use sparingly ($5 credit)',
109
+ },
110
+ {
111
+ id: 'meta-llama/llama-3.3-70b-instruct-turbo',
112
+ contextWindow: 131072,
113
+ pricing: 'paid',
114
+ quality: 'high',
115
+ speed: 'fast',
116
+ notes: 'Faster cheaper Together option',
117
+ },
118
+ {
119
+ id: 'meta-llama/llama-3.1-70b-instruct-turbo',
120
+ contextWindow: 131072,
121
+ pricing: 'paid',
122
+ quality: 'high',
123
+ speed: 'fast',
124
+ notes: 'Fallback paid',
125
+ },
126
+ ],
127
+ nvidia: [
128
+ {
129
+ id: 'nvidia/llama-3.3-nemotron-super-49b-v1',
130
+ contextWindow: 131072,
131
+ pricing: 'free',
132
+ quality: 'high',
133
+ speed: 'medium',
134
+ notes: 'NVIDIA NIM — high quality free inference',
135
+ },
136
+ {
137
+ id: 'meta/llama-3.3-70b-instruct',
138
+ contextWindow: 131072,
139
+ pricing: 'free',
140
+ quality: 'high',
141
+ speed: 'medium',
142
+ notes: 'NVIDIA-hosted Llama fallback',
143
+ },
144
+ {
145
+ id: 'mistralai/mixtral-8x7b-instruct-v0.1',
146
+ contextWindow: 32768,
147
+ pricing: 'free',
148
+ quality: 'medium',
149
+ speed: 'fast',
150
+ notes: 'Lightweight NVIDIA fallback',
151
+ },
152
+ ],
153
+ gemini: [
154
+ {
155
+ id: 'gemini-2.5-flash',
156
+ contextWindow: 1000000,
157
+ pricing: 'free',
158
+ quality: 'high',
159
+ speed: 'fast',
160
+ notes: '1M context, thinking model, best free Gemini',
161
+ },
162
+ {
163
+ id: 'gemini-2.0-flash',
164
+ contextWindow: 1000000,
165
+ pricing: 'free',
166
+ quality: 'high',
167
+ speed: 'fast',
168
+ notes: 'Stable previous gen, good fallback',
169
+ },
170
+ {
171
+ id: 'gemini-1.5-flash',
172
+ contextWindow: 1000000,
173
+ pricing: 'free',
174
+ quality: 'medium',
175
+ speed: 'fast',
176
+ notes: 'Conservative fallback if 2.x rate-limited',
177
+ },
178
+ {
179
+ id: 'gemini-1.5-flash-8b',
180
+ contextWindow: 1000000,
181
+ pricing: 'free',
182
+ quality: 'low',
183
+ speed: 'fast',
184
+ notes: 'Emergency fallback — smallest Gemini',
185
+ },
186
+ ],
187
+ ollama: [
188
+ {
189
+ id: 'gemma4:e4b',
190
+ contextWindow: 8192,
191
+ pricing: 'free',
192
+ quality: 'medium',
193
+ speed: 'medium',
194
+ notes: 'Local default — requires GTX 1060 VRAM',
195
+ },
196
+ {
197
+ id: 'qwen2.5-coder:7b',
198
+ contextWindow: 32768,
199
+ pricing: 'free',
200
+ quality: 'medium',
201
+ speed: 'medium',
202
+ notes: 'Local coder model',
203
+ },
204
+ {
205
+ id: 'llama3.2:latest',
206
+ contextWindow: 128000,
207
+ pricing: 'free',
208
+ quality: 'medium',
209
+ speed: 'fast',
210
+ notes: 'Local fast model',
211
+ },
212
+ ],
213
+ };
214
+ /**
215
+ * Returns the default model ID for a provider.
216
+ * Env var ${PROVIDER_UPPER}_MODEL overrides the registry default.
217
+ *
218
+ * e.g. GROQ_MODEL=mixtral-8x7b-32768 → uses that instead
219
+ */
220
+ function getDefaultModel(provider) {
221
+ const envKey = `${provider.toUpperCase()}_MODEL`;
222
+ const envOverride = process.env[envKey];
223
+ if (envOverride)
224
+ return envOverride;
225
+ const models = exports.MODEL_REGISTRY[provider.toLowerCase()];
226
+ if (!models || models.length === 0)
227
+ return '';
228
+ return models[0].id;
229
+ }
230
+ /**
231
+ * Returns the next model to try after currentModel fails (rate-limited / error).
232
+ * Returns null if currentModel is already the last in the list — caller should
233
+ * then mark the whole provider entry rate-limited and rotate to next provider.
234
+ */
235
+ function getNextModelOnFailure(provider, currentModel) {
236
+ const models = exports.MODEL_REGISTRY[provider.toLowerCase()];
237
+ if (!models || models.length === 0)
238
+ return null;
239
+ const idx = models.findIndex(m => m.id === currentModel);
240
+ if (idx === -1 || idx >= models.length - 1)
241
+ return null;
242
+ return models[idx + 1].id;
243
+ }
244
+ /**
245
+ * Returns the ModelConfig for a specific provider + model id.
246
+ */
247
+ function getRegistryEntry(provider, modelId) {
248
+ const models = exports.MODEL_REGISTRY[provider.toLowerCase()];
249
+ if (!models)
250
+ return undefined;
251
+ return models.find(m => m.id === modelId);
252
+ }
253
+ /**
254
+ * Returns all models for a provider, optionally filtered by pricing tier.
255
+ */
256
+ function getModelsForProvider(provider, filter) {
257
+ const models = exports.MODEL_REGISTRY[provider.toLowerCase()] ?? [];
258
+ if (!filter)
259
+ return models;
260
+ return models.filter(m => !filter.pricing || m.pricing === filter.pricing);
261
+ }