@auxiora/runtime 1.10.15 → 1.10.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -9,7 +9,7 @@ import { Vault, vaultExists } from '@auxiora/vault';
9
9
  import { audit } from '@auxiora/audit';
10
10
  import { getWorkspacePath, getSoulPath, getAgentsPath, getIdentityPath, getUserPath, getBehaviorsPath, getWebhooksPath, getScreenshotsDir, } from '@auxiora/core';
11
11
  import { createArchitect, ARCHITECT_BASE_PROMPT, VaultStorageAdapter } from '@auxiora/personality/architect';
12
- import { toolRegistry, toolExecutor, initializeToolExecutor, ToolPermission, setBrowserManager, setWebhookManager, setBehaviorManager, setProviderFactory, setOrchestrationEngine, setResearchEngine, setClipboardMonitor, setAppController, setSystemStateMonitor, setEmailIntelligence, setCalendarIntelligence, setContactGraph, setContextRecall, setComposeEngine, setGrammarChecker, setLanguageDetector, } from '@auxiora/tools';
12
+ import { toolRegistry, toolExecutor, initializeToolExecutor, ToolPermission, setBrowserManager, setWebBrowserManager, setWebhookManager, setBehaviorManager, setProviderFactory, setOrchestrationEngine, setResearchEngine, setClipboardMonitor, setAppController, setSystemStateMonitor, setEmailIntelligence, setCalendarIntelligence, setContactGraph, setContextRecall, setComposeEngine, setGrammarChecker, setLanguageDetector, } from '@auxiora/tools';
13
13
  import { ResearchEngine, ResearchIntentDetector, DeepResearchOrchestrator, ReportGenerator } from '@auxiora/research';
14
14
  import { OrchestrationEngine } from '@auxiora/orchestrator';
15
15
  import * as crypto from 'node:crypto';
@@ -250,6 +250,8 @@ export class Auxiora {
250
250
  activeAgents = new Map();
251
251
  channelTargetsPath = path.join(path.dirname(getBehaviorsPath()), 'channel-targets.json');
252
252
  orchestrationHistory = [];
253
+ /** Per-session run state for message queueing. */
254
+ sessionRunStates = new Map();
253
255
  async initialize(options = {}) {
254
256
  // Read version from package.json
255
257
  try {
@@ -700,6 +702,7 @@ export class Auxiora {
700
702
  },
701
703
  });
702
704
  setBrowserManager(this.browserManager);
705
+ setWebBrowserManager(this.browserManager);
703
706
  // Initialize OS bridge
704
707
  const clipboardMonitor = new ClipboardMonitor();
705
708
  const appController = new AppController(process.platform);
@@ -2571,6 +2574,8 @@ export class Auxiora {
2571
2574
  }
2572
2575
  // Append tool usage guidance
2573
2576
  this.standardPrompt += '\n\n---\n\n## Tool Usage\n'
2577
+ + '- IMPORTANT: When the user asks you to create files, generate projects, write code, or perform actions on the filesystem, you MUST use the `bash` and `file_write` tools to actually do the work. Do not just describe what you would do — execute it.\n'
2578
+ + '- Use `bash` to run shell commands (mkdir, npm init, git init, etc.) and `file_write` to create files with content.\n'
2574
2579
  + '- For reading web pages, searching, fetching articles, or looking up information, use the `web_browser` tool. It is fast, lightweight, and always available.\n'
2575
2580
  + '- Only use `browser_navigate` and other browser_* tools when you need JavaScript rendering or interactive features (clicking buttons, filling forms, taking screenshots).\n'
2576
2581
  + '- Never expose raw tool errors to the user. If a tool fails, explain the situation naturally.';
@@ -2870,269 +2875,313 @@ export class Auxiora {
2870
2875
  senderId: client.senderId,
2871
2876
  });
2872
2877
  }
2873
- // Apply redaction if guardrails flagged PII
2874
- let processedContent = content;
2875
- if (inputScan?.action === 'redact' && inputScan.redactedContent) {
2876
- processedContent = inputScan.redactedContent;
2877
- audit('guardrail.triggered', {
2878
- action: 'redact',
2879
- direction: 'input',
2880
- threatCount: inputScan.threats.length,
2881
- channelType: 'webchat',
2882
- sessionId: session.id,
2883
- });
2884
- }
2885
- else if (inputScan?.action === 'warn') {
2886
- audit('guardrail.triggered', {
2887
- action: 'warn',
2888
- direction: 'input',
2889
- threatCount: inputScan.threats.length,
2890
- channelType: 'webchat',
2891
- sessionId: session.id,
2878
+ // ── Message queue gate ─────────────────────────────────────────
2879
+ if (!this.acquireSessionRun(session.id)) {
2880
+ this.enqueueMessage(session.id, {
2881
+ content: payload?.content ?? '',
2882
+ enqueuedAt: Date.now(),
2883
+ client,
2884
+ requestId,
2885
+ chatId: payload?.chatId,
2886
+ modelOverride: payload?.model,
2887
+ providerOverride: payload?.provider,
2892
2888
  });
2893
- }
2894
- // Add user message
2895
- await this.sessions.addMessage(session.id, 'user', processedContent);
2896
- // Check if providers are available
2897
- if (!this.providers) {
2898
2889
  this.sendToClient(client, {
2899
- type: 'message',
2900
- id: requestId,
2901
- payload: {
2902
- role: 'assistant',
2903
- content: 'I need API keys to respond. Please add them:\n\n```\nauxiora vault add ANTHROPIC_API_KEY\n```',
2904
- },
2890
+ type: 'queued',
2891
+ requestId,
2892
+ position: this.getSessionRunState(session.id).queue.length,
2905
2893
  });
2906
2894
  return;
2907
2895
  }
2908
- // Get context messages
2909
- const contextMessages = this.sessions.getContextMessages(session.id, this.getProviderMaxTokens(this.providers.getPrimaryProvider()), 4096);
2910
- const chatMessages = sanitizeTranscript(contextMessages).map((m) => ({
2911
- role: m.role,
2912
- content: m.content,
2913
- }));
2914
2896
  try {
2915
- // Get tool definitions from registry
2916
- const tools = toolRegistry.toProviderFormat();
2917
- // Resolve per-chat personality (metadata overrides global default)
2918
- const chatRecord = chatId ? this.sessions.getChat(chatId) : undefined;
2919
- const chatPersonality = chatRecord?.metadata?.personality;
2920
- const useArchitect = chatPersonality
2921
- ? chatPersonality === 'the-architect'
2922
- : this.config.agent.personality === 'the-architect';
2923
- const basePrompt = useArchitect ? this.architectPrompt : this.standardPrompt;
2924
- // Build enriched prompt through pipeline
2925
- let enrichedPrompt = basePrompt;
2926
- let architectResult = { prompt: basePrompt };
2927
- // Reset Architect conversation state for new chats
2928
- if (useArchitect && this.architect && chatId && !this.architectResetChats.has(chatId)) {
2929
- this.architectResetChats.add(chatId);
2930
- this.architect.resetConversation();
2931
- audit('personality.reset', { sessionId: session.id, chatId });
2932
- }
2933
- if (this.enrichmentPipeline) {
2934
- const enrichCtx = {
2935
- basePrompt,
2936
- userMessage: processedContent,
2937
- history: contextMessages,
2897
+ // Apply redaction if guardrails flagged PII
2898
+ let processedContent = content;
2899
+ if (inputScan?.action === 'redact' && inputScan.redactedContent) {
2900
+ processedContent = inputScan.redactedContent;
2901
+ audit('guardrail.triggered', {
2902
+ action: 'redact',
2903
+ direction: 'input',
2904
+ threatCount: inputScan.threats.length,
2938
2905
  channelType: 'webchat',
2939
- chatId: chatId ?? session.id,
2940
2906
  sessionId: session.id,
2941
- userId: client.senderId ?? 'anonymous',
2942
- toolsUsed: this.lastToolsUsed.get(session.id) ?? [],
2943
- config: this.config,
2944
- };
2945
- const result = await this.enrichmentPipeline.run(enrichCtx);
2946
- enrichedPrompt = result.prompt;
2947
- architectResult = { prompt: enrichedPrompt, architectMeta: result.metadata.architect };
2948
- }
2949
- // Route to best model for this message
2950
- let provider;
2951
- let routingResult;
2952
- if (providerOverride || modelOverride) {
2953
- // Manual override — skip router
2954
- provider = this.providers.getProvider(providerOverride || this.config.provider.primary);
2955
- }
2956
- else if (this.modelRouter && this.config.routing?.enabled !== false) {
2957
- try {
2958
- routingResult = this.modelRouter.route(processedContent, { hasImages: false });
2959
- provider = this.providers.getProvider(routingResult.selection.provider);
2960
- }
2961
- catch {
2962
- provider = this.providers.getPrimaryProvider();
2963
- }
2907
+ });
2964
2908
  }
2965
- else {
2966
- provider = this.providers.getPrimaryProvider();
2967
- }
2968
- // Inject model identity so the AI knows what it's running on
2969
- enrichedPrompt += this.buildModelIdentityFragment(provider, routingResult?.selection.model ?? modelOverride);
2970
- // Execute streaming AI call with tool follow-up loop
2971
- const processingStartTime = Date.now();
2972
- const fallbackCandidates = this.providers.resolveFallbackCandidates();
2973
- const toolsUsed = [];
2974
- let streamChunkCount = 0;
2975
- const { response: fullResponse, usage } = await this.executeWithTools(session.id, chatMessages, enrichedPrompt, provider, (type, data) => {
2976
- if (type === 'text') {
2977
- streamChunkCount++;
2978
- this.sendToClient(client, { type: 'chunk', id: requestId, payload: { content: data } });
2979
- }
2980
- else if (type === 'thinking') {
2981
- this.sendToClient(client, { type: 'thinking', id: requestId, payload: { content: data } });
2982
- }
2983
- else if (type === 'tool_use') {
2984
- toolsUsed.push({ name: data?.name ?? 'unknown', success: true });
2985
- this.sendToClient(client, { type: 'tool_use', id: requestId, payload: data });
2986
- }
2987
- else if (type === 'tool_result') {
2988
- // Update last tool's success based on result
2989
- if (toolsUsed.length > 0 && data?.error) {
2990
- toolsUsed[toolsUsed.length - 1].success = false;
2991
- }
2992
- this.sendToClient(client, { type: 'tool_result', id: requestId, payload: data });
2993
- }
2994
- else if (type === 'status') {
2995
- this.sendToClient(client, { type: 'status', id: requestId, payload: data });
2996
- }
2997
- }, { tools, fallbackCandidates });
2998
- // Feed tool usage to awareness collector
2999
- if (this.architectAwarenessCollector && toolsUsed.length > 0) {
3000
- this.architectAwarenessCollector.updateToolContext(toolsUsed);
3001
- }
3002
- // Store tools for next turn's enrichment context
3003
- this.lastToolsUsed.set(session.id, toolsUsed);
3004
- // ── Guardrail output scan ─────────────────────────────────────
3005
- const outputScan = this.checkOutputGuardrails(fullResponse);
3006
- const finalResponse = outputScan.response;
3007
- if (outputScan.wasModified) {
2909
+ else if (inputScan?.action === 'warn') {
3008
2910
  audit('guardrail.triggered', {
3009
- action: outputScan.action,
3010
- direction: 'output',
2911
+ action: 'warn',
2912
+ direction: 'input',
2913
+ threatCount: inputScan.threats.length,
3011
2914
  channelType: 'webchat',
3012
2915
  sessionId: session.id,
3013
2916
  });
3014
- // Send correction since chunks were already streamed
2917
+ }
2918
+ // Add user message
2919
+ await this.sessions.addMessage(session.id, 'user', processedContent);
2920
+ // Check if providers are available
2921
+ if (!this.providers) {
3015
2922
  this.sendToClient(client, {
3016
- type: 'guardrail_correction',
2923
+ type: 'message',
3017
2924
  id: requestId,
3018
- payload: { content: finalResponse },
2925
+ payload: {
2926
+ role: 'assistant',
2927
+ content: 'I need API keys to respond. Please add them:\n\n```\nauxiora vault add ANTHROPIC_API_KEY\n```',
2928
+ },
3019
2929
  });
2930
+ return;
3020
2931
  }
3021
- // Collect transparency metadata (best-effort)
3022
- let transparencyMeta;
2932
+ // Get context messages
2933
+ const contextMessages = this.sessions.getContextMessages(session.id, this.getProviderMaxTokens(this.providers.getPrimaryProvider()), 4096);
2934
+ const chatMessages = sanitizeTranscript(contextMessages).map((m) => ({
2935
+ role: m.role,
2936
+ content: m.content,
2937
+ }));
2938
+ // Snapshot message count before agentic loop so we can rollback on failure
2939
+ let messageCountSnapshot;
3023
2940
  try {
3024
- const modelId = routingResult?.selection.model ?? modelOverride ?? provider.defaultModel;
3025
- const caps = provider.metadata.models[modelId];
3026
- if (caps) {
3027
- transparencyMeta = collectTransparencyMeta({
3028
- enrichment: this.enrichmentPipeline
3029
- ? { prompt: enrichedPrompt, metadata: { architect: architectResult.architectMeta, stages: architectResult.stages ?? [] } }
3030
- : { prompt: enrichedPrompt, metadata: { stages: [] } },
3031
- completion: { content: finalResponse, usage, model: modelId, finishReason: 'stop', toolUse: toolsUsed.map(t => ({ name: t.name })) },
3032
- capabilities: { costPer1kInput: caps.costPer1kInput, costPer1kOutput: caps.costPer1kOutput },
3033
- providerName: provider.name,
3034
- awarenessSignals: [],
3035
- responseText: finalResponse,
3036
- processingStartTime,
2941
+ // Get tool definitions from registry
2942
+ const tools = toolRegistry.toProviderFormat();
2943
+ // Resolve per-chat personality (metadata overrides global default)
2944
+ const chatRecord = chatId ? this.sessions.getChat(chatId) : undefined;
2945
+ const chatPersonality = chatRecord?.metadata?.personality;
2946
+ const useArchitect = chatPersonality
2947
+ ? chatPersonality === 'the-architect'
2948
+ : this.config.agent.personality === 'the-architect';
2949
+ const basePrompt = useArchitect ? this.architectPrompt : this.standardPrompt;
2950
+ // Build enriched prompt through pipeline
2951
+ let enrichedPrompt = basePrompt;
2952
+ let architectResult = { prompt: basePrompt };
2953
+ // Reset Architect conversation state for new chats
2954
+ if (useArchitect && this.architect && chatId && !this.architectResetChats.has(chatId)) {
2955
+ this.architectResetChats.add(chatId);
2956
+ this.architect.resetConversation();
2957
+ audit('personality.reset', { sessionId: session.id, chatId });
2958
+ }
2959
+ if (this.enrichmentPipeline) {
2960
+ const enrichCtx = {
2961
+ basePrompt,
2962
+ userMessage: processedContent,
2963
+ history: contextMessages,
2964
+ channelType: 'webchat',
2965
+ chatId: chatId ?? session.id,
2966
+ sessionId: session.id,
2967
+ userId: client.senderId ?? 'anonymous',
2968
+ toolsUsed: this.lastToolsUsed.get(session.id) ?? [],
2969
+ config: this.config,
2970
+ };
2971
+ const result = await this.enrichmentPipeline.run(enrichCtx);
2972
+ enrichedPrompt = result.prompt;
2973
+ architectResult = { prompt: enrichedPrompt, architectMeta: result.metadata.architect };
2974
+ }
2975
+ // Route to best model for this message
2976
+ let provider;
2977
+ let routingResult;
2978
+ if (providerOverride || modelOverride) {
2979
+ // Manual override — skip router
2980
+ provider = this.providers.getProvider(providerOverride || this.config.provider.primary);
2981
+ }
2982
+ else if (this.modelRouter && this.config.routing?.enabled !== false) {
2983
+ try {
2984
+ routingResult = this.modelRouter.route(processedContent, { hasImages: false });
2985
+ provider = this.providers.getProvider(routingResult.selection.provider);
2986
+ }
2987
+ catch {
2988
+ provider = this.providers.getPrimaryProvider();
2989
+ }
2990
+ }
2991
+ else {
2992
+ provider = this.providers.getPrimaryProvider();
2993
+ }
2994
+ // Inject model identity so the AI knows what it's running on
2995
+ enrichedPrompt += this.buildModelIdentityFragment(provider, routingResult?.selection.model ?? modelOverride);
2996
+ // Execute streaming AI call with tool follow-up loop
2997
+ const processingStartTime = Date.now();
2998
+ const fallbackCandidates = this.providers.resolveFallbackCandidates();
2999
+ const toolsUsed = [];
3000
+ let streamChunkCount = 0;
3001
+ // Snapshot message count so we can rollback orphaned messages if the loop fails
3002
+ messageCountSnapshot = this.sessions.getMessageCount(session.id);
3003
+ const { response: fullResponse, usage } = await this.executeWithTools(session.id, chatMessages, enrichedPrompt, provider, (type, data) => {
3004
+ if (type === 'text') {
3005
+ streamChunkCount++;
3006
+ this.sendToClient(client, { type: 'chunk', id: requestId, payload: { content: data } });
3007
+ }
3008
+ else if (type === 'thinking') {
3009
+ this.sendToClient(client, { type: 'thinking', id: requestId, payload: { content: data } });
3010
+ }
3011
+ else if (type === 'tool_use') {
3012
+ toolsUsed.push({ name: data?.name ?? 'unknown', success: true });
3013
+ this.sendToClient(client, { type: 'tool_use', id: requestId, payload: data });
3014
+ }
3015
+ else if (type === 'tool_result') {
3016
+ // Update last tool's success based on result
3017
+ if (toolsUsed.length > 0 && data?.error) {
3018
+ toolsUsed[toolsUsed.length - 1].success = false;
3019
+ }
3020
+ this.sendToClient(client, { type: 'tool_result', id: requestId, payload: data });
3021
+ }
3022
+ else if (type === 'status') {
3023
+ this.sendToClient(client, { type: 'status', id: requestId, payload: data });
3024
+ }
3025
+ }, { tools, fallbackCandidates });
3026
+ // Feed tool usage to awareness collector
3027
+ if (this.architectAwarenessCollector && toolsUsed.length > 0) {
3028
+ this.architectAwarenessCollector.updateToolContext(toolsUsed);
3029
+ }
3030
+ // Store tools for next turn's enrichment context
3031
+ this.lastToolsUsed.set(session.id, toolsUsed);
3032
+ // ── Guardrail output scan ─────────────────────────────────────
3033
+ const outputScan = this.checkOutputGuardrails(fullResponse);
3034
+ const finalResponse = outputScan.response;
3035
+ if (outputScan.wasModified) {
3036
+ audit('guardrail.triggered', {
3037
+ action: outputScan.action,
3038
+ direction: 'output',
3039
+ channelType: 'webchat',
3040
+ sessionId: session.id,
3041
+ });
3042
+ // Send correction since chunks were already streamed
3043
+ this.sendToClient(client, {
3044
+ type: 'guardrail_correction',
3045
+ id: requestId,
3046
+ payload: { content: finalResponse },
3037
3047
  });
3038
3048
  }
3039
- }
3040
- catch {
3041
- // Transparency is best-effort — never block message delivery
3042
- }
3043
- // Save assistant message (skip if empty — happens when response is tool-only)
3044
- if (finalResponse) {
3045
- await this.sessions.addMessage(session.id, 'assistant', finalResponse, {
3046
- input: usage.inputTokens,
3047
- output: usage.outputTokens,
3048
- }, {
3049
- ...(architectResult.architectMeta ? { architectDomain: architectResult.architectMeta.detectedContext.domain } : {}),
3050
- ...(transparencyMeta ? { transparency: transparencyMeta } : {}),
3049
+ // Collect transparency metadata (best-effort)
3050
+ let transparencyMeta;
3051
+ try {
3052
+ const modelId = routingResult?.selection.model ?? modelOverride ?? provider.defaultModel;
3053
+ const caps = provider.metadata.models[modelId];
3054
+ if (caps) {
3055
+ transparencyMeta = collectTransparencyMeta({
3056
+ enrichment: this.enrichmentPipeline
3057
+ ? { prompt: enrichedPrompt, metadata: { architect: architectResult.architectMeta, stages: architectResult.stages ?? [] } }
3058
+ : { prompt: enrichedPrompt, metadata: { stages: [] } },
3059
+ completion: { content: finalResponse, usage, model: modelId, finishReason: 'stop', toolUse: toolsUsed.map(t => ({ name: t.name })) },
3060
+ capabilities: { costPer1kInput: caps.costPer1kInput, costPer1kOutput: caps.costPer1kOutput },
3061
+ providerName: provider.name,
3062
+ awarenessSignals: [],
3063
+ responseText: finalResponse,
3064
+ processingStartTime,
3065
+ });
3066
+ }
3067
+ }
3068
+ catch {
3069
+ // Transparency is best-effort — never block message delivery
3070
+ }
3071
+ // Save assistant message (skip if empty — happens when response is tool-only)
3072
+ if (finalResponse) {
3073
+ await this.sessions.addMessage(session.id, 'assistant', finalResponse, {
3074
+ input: usage.inputTokens,
3075
+ output: usage.outputTokens,
3076
+ }, {
3077
+ ...(architectResult.architectMeta ? { architectDomain: architectResult.architectMeta.detectedContext.domain } : {}),
3078
+ ...(transparencyMeta ? { transparency: transparencyMeta } : {}),
3079
+ });
3080
+ }
3081
+ // Record usage for cost tracking
3082
+ if (this.modelRouter && routingResult) {
3083
+ this.modelRouter.recordUsage(routingResult.selection.provider, routingResult.selection.model, usage.inputTokens, usage.outputTokens);
3084
+ }
3085
+ // Extract memories and learn from conversation (if auto-extract enabled)
3086
+ if (this.config.memory?.autoExtract !== false && this.memoryStore && finalResponse && processedContent.length > 20) {
3087
+ void this.extractAndLearn(processedContent, finalResponse, session.id);
3088
+ }
3089
+ // Auto-title webchat chats after first exchange
3090
+ if (finalResponse &&
3091
+ session.metadata.channelType === 'webchat' &&
3092
+ session.messages.length <= 3) {
3093
+ void this.generateChatTitle(session.id, processedContent, finalResponse, client);
3094
+ }
3095
+ // Send done signal
3096
+ this.sendToClient(client, {
3097
+ type: 'done',
3098
+ id: requestId,
3099
+ payload: {
3100
+ usage,
3101
+ routing: routingResult ? {
3102
+ model: routingResult.selection.model,
3103
+ provider: routingResult.selection.provider,
3104
+ isLocal: routingResult.selection.isLocal,
3105
+ taskType: routingResult.classification.type,
3106
+ } : (providerOverride || modelOverride) ? {
3107
+ model: modelOverride,
3108
+ provider: providerOverride || this.config.provider.primary,
3109
+ override: true,
3110
+ } : undefined,
3111
+ architect: architectResult.architectMeta,
3112
+ transparency: transparencyMeta,
3113
+ },
3114
+ });
3115
+ // Background self-awareness analysis
3116
+ if (this.selfAwarenessAssembler) {
3117
+ this.selfAwarenessAssembler.afterResponse({
3118
+ userId: client.senderId ?? 'anonymous',
3119
+ sessionId: session.id,
3120
+ chatId: chatId ?? session.id,
3121
+ currentMessage: processedContent,
3122
+ recentMessages: contextMessages,
3123
+ response: finalResponse,
3124
+ responseTime: Date.now() - (session.metadata.lastActiveAt ?? Date.now()),
3125
+ tokensUsed: { input: usage?.inputTokens ?? 0, output: usage?.outputTokens ?? 0 },
3126
+ streamChunks: streamChunkCount,
3127
+ }).catch(() => { });
3128
+ }
3129
+ // Record conversation in consciousness journal
3130
+ if (this.consciousness) {
3131
+ const journalBase = {
3132
+ sessionId: session.id,
3133
+ type: 'message',
3134
+ context: {
3135
+ domains: architectResult.architectMeta
3136
+ ? [architectResult.architectMeta.detectedContext.domain]
3137
+ : ['general'],
3138
+ },
3139
+ selfState: {
3140
+ health: (this.healthMonitor?.getHealthState().overall === 'unhealthy' ? 'degraded' : this.healthMonitor?.getHealthState().overall ?? 'healthy'),
3141
+ activeProviders: [this.config.provider.primary],
3142
+ uptime: Math.round(process.uptime()),
3143
+ },
3144
+ };
3145
+ this.consciousness.journal.record({ ...journalBase, message: { role: 'user', content: processedContent } }).catch(() => { });
3146
+ this.consciousness.journal.record({ ...journalBase, message: { role: 'assistant', content: finalResponse } }).catch(() => { });
3147
+ }
3148
+ audit('message.sent', {
3149
+ sessionId: session.id,
3150
+ inputTokens: usage.inputTokens,
3151
+ outputTokens: usage.outputTokens,
3152
+ model: routingResult?.selection.model,
3153
+ provider: routingResult?.selection.provider,
3051
3154
  });
3052
3155
  }
3053
- // Record usage for cost tracking
3054
- if (this.modelRouter && routingResult) {
3055
- this.modelRouter.recordUsage(routingResult.selection.provider, routingResult.selection.model, usage.inputTokens, usage.outputTokens);
3156
+ catch (error) {
3157
+ const errorMessage = error instanceof Error ? error.message : 'Unknown error';
3158
+ audit('channel.error', { sessionId: session.id, error: errorMessage });
3159
+ // Rollback orphaned messages from interrupted agentic tool loops.
3160
+ // executeWithTools saves intermediate messages (tool announces + tool results)
3161
+ // incrementally — if it throws, those partial messages pollute the next request.
3162
+ if (typeof messageCountSnapshot === 'number') {
3163
+ const rolled = this.sessions.rollbackMessages(session.id, messageCountSnapshot);
3164
+ if (rolled > 0) {
3165
+ this.logger.info('Rolled back orphaned messages from interrupted tool loop', {
3166
+ sessionId: session.id,
3167
+ rolledBack: rolled,
3168
+ });
3169
+ }
3170
+ }
3171
+ this.sendToClient(client, {
3172
+ type: 'error',
3173
+ id: requestId,
3174
+ payload: { message: `Error: ${errorMessage}` },
3175
+ });
3056
3176
  }
3057
- // Extract memories and learn from conversation (if auto-extract enabled)
3058
- if (this.config.memory?.autoExtract !== false && this.memoryStore && finalResponse && processedContent.length > 20) {
3059
- void this.extractAndLearn(processedContent, finalResponse, session.id);
3177
+ }
3178
+ finally {
3179
+ try {
3180
+ await this.drainSessionQueue(session.id);
3060
3181
  }
3061
- // Auto-title webchat chats after first exchange
3062
- if (finalResponse &&
3063
- session.metadata.channelType === 'webchat' &&
3064
- session.messages.length <= 3) {
3065
- void this.generateChatTitle(session.id, processedContent, finalResponse, client);
3182
+ finally {
3183
+ this.releaseSessionRun(session.id);
3066
3184
  }
3067
- // Send done signal
3068
- this.sendToClient(client, {
3069
- type: 'done',
3070
- id: requestId,
3071
- payload: {
3072
- usage,
3073
- routing: routingResult ? {
3074
- model: routingResult.selection.model,
3075
- provider: routingResult.selection.provider,
3076
- isLocal: routingResult.selection.isLocal,
3077
- taskType: routingResult.classification.type,
3078
- } : (providerOverride || modelOverride) ? {
3079
- model: modelOverride,
3080
- provider: providerOverride || this.config.provider.primary,
3081
- override: true,
3082
- } : undefined,
3083
- architect: architectResult.architectMeta,
3084
- transparency: transparencyMeta,
3085
- },
3086
- });
3087
- // Background self-awareness analysis
3088
- if (this.selfAwarenessAssembler) {
3089
- this.selfAwarenessAssembler.afterResponse({
3090
- userId: client.senderId ?? 'anonymous',
3091
- sessionId: session.id,
3092
- chatId: chatId ?? session.id,
3093
- currentMessage: processedContent,
3094
- recentMessages: contextMessages,
3095
- response: finalResponse,
3096
- responseTime: Date.now() - (session.metadata.lastActiveAt ?? Date.now()),
3097
- tokensUsed: { input: usage?.inputTokens ?? 0, output: usage?.outputTokens ?? 0 },
3098
- streamChunks: streamChunkCount,
3099
- }).catch(() => { });
3100
- }
3101
- // Record conversation in consciousness journal
3102
- if (this.consciousness) {
3103
- const journalBase = {
3104
- sessionId: session.id,
3105
- type: 'message',
3106
- context: {
3107
- domains: architectResult.architectMeta
3108
- ? [architectResult.architectMeta.detectedContext.domain]
3109
- : ['general'],
3110
- },
3111
- selfState: {
3112
- health: (this.healthMonitor?.getHealthState().overall === 'unhealthy' ? 'degraded' : this.healthMonitor?.getHealthState().overall ?? 'healthy'),
3113
- activeProviders: [this.config.provider.primary],
3114
- uptime: Math.round(process.uptime()),
3115
- },
3116
- };
3117
- this.consciousness.journal.record({ ...journalBase, message: { role: 'user', content: processedContent } }).catch(() => { });
3118
- this.consciousness.journal.record({ ...journalBase, message: { role: 'assistant', content: finalResponse } }).catch(() => { });
3119
- }
3120
- audit('message.sent', {
3121
- sessionId: session.id,
3122
- inputTokens: usage.inputTokens,
3123
- outputTokens: usage.outputTokens,
3124
- model: routingResult?.selection.model,
3125
- provider: routingResult?.selection.provider,
3126
- });
3127
- }
3128
- catch (error) {
3129
- const errorMessage = error instanceof Error ? error.message : 'Unknown error';
3130
- audit('channel.error', { sessionId: session.id, error: errorMessage });
3131
- this.sendToClient(client, {
3132
- type: 'error',
3133
- id: requestId,
3134
- payload: { message: `Error: ${errorMessage}` },
3135
- });
3136
3185
  }
3137
3186
  }
3138
3187
  async generateChatTitle(chatId, userMessage, assistantResponse, client) {
@@ -3293,22 +3342,35 @@ export class Auxiora {
3293
3342
  * for synthesis, looping up to maxToolRounds times.
3294
3343
  */
3295
3344
  async executeWithTools(sessionId, messages, enrichedPrompt, provider, onChunk, options) {
3296
- const maxRounds = options?.maxToolRounds ?? 10;
3345
+ const maxRounds = options?.maxToolRounds ?? 20;
3297
3346
  const maxContinuations = 3; // Safety cap for auto-continue on truncation
3298
3347
  const tools = options?.tools ?? toolRegistry.toProviderFormat();
3299
3348
  let currentMessages = [...messages];
3300
3349
  let totalUsage = { inputTokens: 0, outputTokens: 0 };
3301
3350
  let fullResponse = '';
3302
3351
  let lastRoundHadTools = false;
3303
- const loopState = createLoopDetectionState();
3352
+ const loopState = createLoopDetectionState({
3353
+ genericRepeatWarn: 3, // Catch loops faster (default 5)
3354
+ genericRepeatCritical: 8, // Stop after blocked attempts too (default 10)
3355
+ noProgressWarn: 4, // Detect identical results sooner (default 8)
3356
+ noProgressCritical: 8, // Hard stop (default 15)
3357
+ });
3358
+ // Track tools to temporarily exclude when loop is detected
3359
+ const excludedToolNames = new Set();
3360
+ // Track file paths written to detect same-file rewrites
3361
+ const writtenFiles = new Map(); // path → count
3304
3362
  for (let round = 0; round < maxRounds; round++) {
3305
3363
  let roundResponse = '';
3306
3364
  let roundUsage = { inputTokens: 0, outputTokens: 0 };
3307
3365
  let roundFinishReason = '';
3308
3366
  const toolUses = [];
3367
+ // Filter out excluded tools (loop breaker)
3368
+ const roundTools = excludedToolNames.size > 0
3369
+ ? tools.filter(t => !excludedToolNames.has(t.name))
3370
+ : tools;
3309
3371
  const streamOptions = {
3310
3372
  systemPrompt: enrichedPrompt,
3311
- tools: tools.length > 0 ? tools : undefined,
3373
+ tools: roundTools.length > 0 ? roundTools : undefined,
3312
3374
  passThroughAllTools: true,
3313
3375
  };
3314
3376
  const candidates = options?.fallbackCandidates ?? [
@@ -3336,9 +3398,25 @@ export class Auxiora {
3336
3398
  }
3337
3399
  totalUsage.inputTokens += roundUsage.inputTokens;
3338
3400
  totalUsage.outputTokens += roundUsage.outputTokens;
3339
- // No tool calls — check if response was truncated
3401
+ // No tool calls — check if we should nudge the model to continue
3340
3402
  if (toolUses.length === 0) {
3341
3403
  fullResponse += roundResponse;
3404
+ // Nudge the model to use tools if it only output text.
3405
+ // Case 1 (round 0): Model described a plan but never called tools.
3406
+ // The user likely asked for an action, so give the model one more
3407
+ // chance by injecting a "please use tools" follow-up.
3408
+ // Case 2 (round > 0, lastRoundHadTools): Model used tools last round
3409
+ // but emitted a short text-only response this round (stalled).
3410
+ const shouldNudge = round < maxRounds - 1 && ((round === 0) ||
3411
+ (lastRoundHadTools && roundResponse.length < 200));
3412
+ if (shouldNudge) {
3413
+ this.logger.info('Model stated intent without tool calls, nudging to continue', { round, responseLength: roundResponse.length });
3414
+ currentMessages.push({ role: 'assistant', content: roundResponse });
3415
+ currentMessages.push({ role: 'user', content: 'Please proceed — use the bash and file_write tools to do the work now. Do not describe what you will do — actually call the tools.' });
3416
+ onChunk('status', { message: 'Continuing...' });
3417
+ // Don't break — let the loop continue so the model can make tool calls
3418
+ continue;
3419
+ }
3342
3420
  // Auto-continue if response was cut off by token limit
3343
3421
  const wasTruncated = roundFinishReason === 'max_tokens' || roundFinishReason === 'length';
3344
3422
  if (wasTruncated && fullResponse.length > 0) {
@@ -3388,6 +3466,7 @@ export class Auxiora {
3388
3466
  sessionId,
3389
3467
  workingDirectory: getWorkspacePath(),
3390
3468
  timeout: 30000,
3469
+ environment: { ALLOW_OUTSIDE_WORKSPACE: 'true' },
3391
3470
  };
3392
3471
  const toolResultParts = [];
3393
3472
  for (const toolUse of toolUses) {
@@ -3401,6 +3480,15 @@ export class Auxiora {
3401
3480
  recordToolOutcome(loopState, toolUse.id, mapped.skip);
3402
3481
  continue;
3403
3482
  }
3483
+ // Block excluded tools (loop breaker — tool was disabled due to repetition)
3484
+ if (excludedToolNames.has(mapped.name) || excludedToolNames.has(toolUse.name)) {
3485
+ const blockMsg = `Tool "${toolUse.name}" is temporarily disabled because you were repeating the same call. Use a DIFFERENT tool. For creating files, use file_write (or Write). Do NOT call ${toolUse.name} again.`;
3486
+ onChunk('tool_result', { tool: toolUse.name, success: false, error: blockMsg });
3487
+ toolResultParts.push(`[${toolUse.name}]: Error: ${blockMsg}`);
3488
+ recordToolCall(loopState, toolUse.id, mapped.name, mapped.input);
3489
+ recordToolOutcome(loopState, toolUse.id, blockMsg);
3490
+ continue;
3491
+ }
3404
3492
  recordToolCall(loopState, toolUse.id, mapped.name, mapped.input);
3405
3493
  try {
3406
3494
  const result = await toolExecutor.execute(mapped.name, mapped.input, context);
@@ -3417,6 +3505,15 @@ export class Auxiora {
3417
3505
  }
3418
3506
  toolResultParts.push(`[${toolUse.name}]: ${output}`);
3419
3507
  recordToolOutcome(loopState, toolUse.id, output);
3508
+ // Track file writes to detect same-file rewrites
3509
+ if ((mapped.name === 'file_write' || toolUse.name === 'Write') && mapped.input?.path) {
3510
+ const filePath = mapped.input.path;
3511
+ const count = (writtenFiles.get(filePath) || 0) + 1;
3512
+ writtenFiles.set(filePath, count);
3513
+ if (count > 1) {
3514
+ this.logger.info('Same file rewritten multiple times', { filePath, count });
3515
+ }
3516
+ }
3420
3517
  }
3421
3518
  catch (error) {
3422
3519
  const errorMessage = error instanceof Error ? error.message : 'Unknown error';
@@ -3427,7 +3524,14 @@ export class Auxiora {
3427
3524
  }
3428
3525
  // Append tool results directly to conversation (don't rebuild from getContextMessages
3429
3526
  // which can drop messages due to token windowing)
3430
- const toolResultsMessage = `[Tool Results]\n${toolResultParts.join('\n')}`;
3527
+ let toolResultsMessage = `[Tool Results]\n${toolResultParts.join('\n')}`;
3528
+ // Detect same-file rewrites and nudge to move on
3529
+ const rewrittenFiles = [...writtenFiles.entries()].filter(([, count]) => count > 1);
3530
+ if (rewrittenFiles.length > 0) {
3531
+ const fileList = rewrittenFiles.map(([f, c]) => `${f} (${c}x)`).join(', ');
3532
+ toolResultsMessage += `\n\n⚠️ You have rewritten the same file(s) multiple times: ${fileList}. Each file only needs to be written ONCE. Move on to creating the NEXT file in the project. Do NOT rewrite files you've already created.`;
3533
+ this.logger.info('Same-file rewrite nudge injected', { rewrittenFiles: rewrittenFiles.map(([f, c]) => ({ file: f, count: c })) });
3534
+ }
3431
3535
  currentMessages.push({ role: 'user', content: toolResultsMessage });
3432
3536
  await this.sessions.addMessage(sessionId, 'user', toolResultsMessage);
3433
3537
  // Check for tool loop patterns
@@ -3443,11 +3547,24 @@ export class Auxiora {
3443
3547
  break;
3444
3548
  }
3445
3549
  if (detection.severity === 'warning') {
3446
- this.logger.info('Tool loop warning', {
3550
+ // Temporarily exclude the looping tool to force the model to use alternatives
3551
+ const loopingTool = detection.details?.toolName;
3552
+ if (loopingTool) {
3553
+ excludedToolNames.add(loopingTool);
3554
+ // Also exclude CC-equivalent names
3555
+ const ccEquivalents = { bash: 'Bash', file_read: 'Read', file_write: 'Write', file_list: 'Glob' };
3556
+ const auxEquivalents = { Bash: 'bash', Read: 'file_read', Write: 'file_write', Glob: 'file_list' };
3557
+ if (ccEquivalents[loopingTool])
3558
+ excludedToolNames.add(ccEquivalents[loopingTool]);
3559
+ if (auxEquivalents[loopingTool])
3560
+ excludedToolNames.add(auxEquivalents[loopingTool]);
3561
+ }
3562
+ this.logger.info('Tool loop warning — excluding tool from next round', {
3447
3563
  detector: detection.detector,
3448
3564
  message: detection.message,
3565
+ excludedTools: Array.from(excludedToolNames),
3449
3566
  });
3450
- currentMessages.push({ role: 'user', content: `⚠️ Loop detection warning: ${detection.message}\nPlease try a different approach or different parameters.` });
3567
+ currentMessages.push({ role: 'user', content: `⚠️ You are repeating the same tool call. ${detection.message}\nThe previous calls already succeeded — the ${loopingTool} tool is now temporarily disabled. Move on to the NEXT step: create the actual files using the file_write tool (or Write tool). Do NOT try to create directories again.` });
3451
3568
  }
3452
3569
  // Notify the client that tool processing is done and AI is thinking about results
3453
3570
  onChunk('status', { message: 'Analyzing results...' });
@@ -3588,6 +3705,93 @@ export class Auxiora {
3588
3705
  client.ws.send(JSON.stringify(message));
3589
3706
  }
3590
3707
  }
3708
+ acquireSessionRun(sessionId) {
3709
+ let state = this.sessionRunStates.get(sessionId);
3710
+ if (!state) {
3711
+ state = { running: false, queue: [], lastRunStartedAt: 0 };
3712
+ this.sessionRunStates.set(sessionId, state);
3713
+ }
3714
+ if (state.running)
3715
+ return false;
3716
+ state.running = true;
3717
+ state.lastRunStartedAt = Date.now();
3718
+ return true;
3719
+ }
3720
+ releaseSessionRun(sessionId) {
3721
+ const state = this.sessionRunStates.get(sessionId);
3722
+ if (state) {
3723
+ state.running = false;
3724
+ }
3725
+ }
3726
+ getSessionRunState(sessionId) {
3727
+ let state = this.sessionRunStates.get(sessionId);
3728
+ if (!state) {
3729
+ state = { running: false, queue: [], lastRunStartedAt: 0 };
3730
+ this.sessionRunStates.set(sessionId, state);
3731
+ }
3732
+ return state;
3733
+ }
3734
+ enqueueMessage(sessionId, pending) {
3735
+ const state = this.getSessionRunState(sessionId);
3736
+ const cap = this.config.queue?.cap ?? 20;
3737
+ state.queue.push(pending);
3738
+ if (state.queue.length > cap) {
3739
+ const dropped = state.queue.shift();
3740
+ this.logger.warn('Message queue overflow — dropped oldest message', {
3741
+ sessionId,
3742
+ droppedContent: dropped?.content.slice(0, 80),
3743
+ queueLength: state.queue.length,
3744
+ });
3745
+ }
3746
+ }
3747
+ async drainSessionQueue(sessionId) {
3748
+ const state = this.sessionRunStates.get(sessionId);
3749
+ if (!state)
3750
+ return;
3751
+ while (state.queue.length > 0) {
3752
+ const pending = state.queue.shift();
3753
+ // Skip webchat messages if the client disconnected
3754
+ if (pending.client && !pending.inbound && pending.client.ws.readyState !== 1) {
3755
+ this.logger.info('Skipping queued webchat message — client disconnected', { sessionId });
3756
+ continue;
3757
+ }
3758
+ // Skip if session was destroyed
3759
+ const session = await this.sessions.get(sessionId);
3760
+ if (!session) {
3761
+ this.logger.info('Skipping queued messages — session destroyed', { sessionId });
3762
+ state.queue.length = 0;
3763
+ break;
3764
+ }
3765
+ try {
3766
+ // Release the lock so the re-entrant call can acquire it
3767
+ state.running = false;
3768
+ if (pending.inbound) {
3769
+ await this.handleChannelMessage(pending.inbound);
3770
+ }
3771
+ else if (pending.client) {
3772
+ const wsMessage = {
3773
+ id: pending.requestId ?? `queued-${Date.now()}`,
3774
+ type: 'message',
3775
+ payload: {
3776
+ content: pending.content,
3777
+ sessionId,
3778
+ chatId: pending.chatId,
3779
+ model: pending.modelOverride,
3780
+ provider: pending.providerOverride,
3781
+ },
3782
+ };
3783
+ await this.handleMessage(pending.client, wsMessage);
3784
+ }
3785
+ }
3786
+ catch (err) {
3787
+ this.logger.error('Error processing queued message', {
3788
+ sessionId,
3789
+ error: err instanceof Error ? err : new Error(String(err)),
3790
+ });
3791
+ }
3792
+ }
3793
+ state.running = false;
3794
+ }
3591
3795
  /** Load persisted channel targets from disk so behavior delivery survives restarts. */
3592
3796
  async loadChannelTargets() {
3593
3797
  try {
@@ -3695,260 +3899,300 @@ export class Auxiora {
3695
3899
  }
3696
3900
  return;
3697
3901
  }
3698
- // Process media attachments and add user message
3699
- let messageContent = inbound.content;
3700
- if (inbound.attachments && inbound.attachments.length > 0 && this.mediaProcessor) {
3701
- messageContent = await this.mediaProcessor.process(inbound.attachments, inbound.content);
3702
- }
3703
- // ── Guardrail input scan ──────────────────────────────────────
3704
- const inputScan = this.checkInputGuardrails(messageContent);
3705
- if (inputScan && inputScan.action === 'block') {
3706
- audit('guardrail.triggered', {
3707
- action: 'block',
3708
- direction: 'input',
3709
- threatCount: inputScan.threats.length,
3710
- channelType: inbound.channelType,
3711
- sessionId: session.id,
3902
+ // ── Message queue gate ─────────────────────────────────────────
3903
+ if (!this.acquireSessionRun(session.id)) {
3904
+ this.enqueueMessage(session.id, {
3905
+ content: inbound.content,
3906
+ enqueuedAt: Date.now(),
3907
+ inbound,
3712
3908
  });
3713
3909
  if (this.channels) {
3714
3910
  await this.channels.send(inbound.channelType, inbound.channelId, {
3715
- content: this.GUARDRAIL_BLOCK_MESSAGE,
3911
+ content: "Got it — I'll get to that after I finish the current task.",
3716
3912
  replyToId: inbound.id,
3717
3913
  });
3718
3914
  }
3719
3915
  return;
3720
3916
  }
3721
- // Apply redaction if guardrails flagged PII
3722
- if (inputScan?.action === 'redact' && inputScan.redactedContent) {
3723
- messageContent = inputScan.redactedContent;
3724
- audit('guardrail.triggered', {
3725
- action: 'redact',
3726
- direction: 'input',
3727
- threatCount: inputScan.threats.length,
3728
- channelType: inbound.channelType,
3729
- });
3730
- }
3731
- else if (inputScan?.action === 'warn') {
3732
- audit('guardrail.triggered', {
3733
- action: 'warn',
3734
- direction: 'input',
3735
- threatCount: inputScan.threats.length,
3736
- channelType: inbound.channelType,
3737
- });
3738
- }
3739
- await this.sessions.addMessage(session.id, 'user', messageContent);
3740
- // Check if providers are available
3741
- if (!this.providers) {
3742
- if (this.channels) {
3743
- await this.channels.send(inbound.channelType, inbound.channelId, {
3744
- content: 'I need API keys to respond. Please configure them in the vault.',
3745
- replyToId: inbound.id,
3746
- });
3917
+ try {
3918
+ // Process media attachments and add user message
3919
+ let messageContent = inbound.content;
3920
+ if (inbound.attachments && inbound.attachments.length > 0 && this.mediaProcessor) {
3921
+ messageContent = await this.mediaProcessor.process(inbound.attachments, inbound.content);
3747
3922
  }
3748
- return;
3749
- }
3750
- // Get context messages channel sessions use a capped token budget and turn limit
3751
- // to prevent excessively long API calls from models with huge context windows.
3752
- const contextMessages = this.sessions.getContextMessages(session.id, this.getProviderMaxTokens(this.providers.getPrimaryProvider()), 4096, { isChannel: true });
3753
- const chatMessages = sanitizeTranscript(contextMessages).map((m) => ({
3754
- role: m.role,
3755
- content: m.content,
3756
- }));
3757
- // Show typing indicator while generating response
3758
- const stopTyping = this.channels
3759
- ? await this.channels.startTyping(inbound.channelType, inbound.channelId)
3760
- : () => { };
3761
- const channelAgentId = `channel:${inbound.channelType}:${inbound.channelId}:${Date.now()}`;
3762
- // 4-minute timeout for the entire LLM response cycle.
3763
- // Increased from 2min to accommodate auto-continuations (max_tokens → "Continue")
3764
- // and tool round-trips. If the provider stream hangs (network issue, overloaded API),
3765
- // this ensures the user gets an error message instead of infinite "typing…".
3766
- const CHANNEL_RESPONSE_TIMEOUT_MS = 240_000;
3767
- let draftLoop = null;
3768
- let draftMessageId = null;
3769
- try { // outer try — finally block guarantees stopTyping() runs
3770
- try {
3771
- // Get tool definitions from registry
3772
- const tools = toolRegistry.toProviderFormat();
3773
- // Build enriched prompt through pipeline
3774
- let enrichedPrompt = this.systemPrompt;
3775
- const channelChatId = `${inbound.channelType}:${inbound.channelId}`;
3776
- let channelArchitectResult = { prompt: this.systemPrompt };
3777
- // Reset Architect conversation state for new channel chats
3778
- const useChannelArchitect = this.config.agent.personality === 'the-architect';
3779
- if (useChannelArchitect && this.architect && !this.architectResetChats.has(channelChatId)) {
3780
- this.architectResetChats.add(channelChatId);
3781
- this.architect.resetConversation();
3782
- audit('personality.reset', { sessionId: session.id, chatId: channelChatId });
3923
+ // ── Guardrail input scan ──────────────────────────────────────
3924
+ const inputScan = this.checkInputGuardrails(messageContent);
3925
+ if (inputScan && inputScan.action === 'block') {
3926
+ audit('guardrail.triggered', {
3927
+ action: 'block',
3928
+ direction: 'input',
3929
+ threatCount: inputScan.threats.length,
3930
+ channelType: inbound.channelType,
3931
+ sessionId: session.id,
3932
+ });
3933
+ if (this.channels) {
3934
+ await this.channels.send(inbound.channelType, inbound.channelId, {
3935
+ content: this.GUARDRAIL_BLOCK_MESSAGE,
3936
+ replyToId: inbound.id,
3937
+ });
3783
3938
  }
3784
- if (this.enrichmentPipeline) {
3785
- const enrichCtx = {
3786
- basePrompt: this.systemPrompt,
3787
- userMessage: messageContent,
3788
- history: contextMessages,
3789
- channelType: inbound.channelType,
3790
- chatId: channelChatId,
3791
- sessionId: session.id,
3792
- userId: inbound.senderId ?? 'anonymous',
3793
- toolsUsed: this.lastToolsUsed.get(session.id) ?? [],
3794
- config: this.config,
3795
- senderName: inbound.senderName,
3796
- groupContext: inbound.groupContext,
3797
- };
3798
- const result = await this.enrichmentPipeline.run(enrichCtx);
3799
- enrichedPrompt = result.prompt;
3800
- channelArchitectResult = { prompt: enrichedPrompt, architectMeta: result.metadata.architect };
3939
+ return;
3940
+ }
3941
+ // Apply redaction if guardrails flagged PII
3942
+ if (inputScan?.action === 'redact' && inputScan.redactedContent) {
3943
+ messageContent = inputScan.redactedContent;
3944
+ audit('guardrail.triggered', {
3945
+ action: 'redact',
3946
+ direction: 'input',
3947
+ threatCount: inputScan.threats.length,
3948
+ channelType: inbound.channelType,
3949
+ });
3950
+ }
3951
+ else if (inputScan?.action === 'warn') {
3952
+ audit('guardrail.triggered', {
3953
+ action: 'warn',
3954
+ direction: 'input',
3955
+ threatCount: inputScan.threats.length,
3956
+ channelType: inbound.channelType,
3957
+ });
3958
+ }
3959
+ await this.sessions.addMessage(session.id, 'user', messageContent);
3960
+ // Check if providers are available
3961
+ if (!this.providers) {
3962
+ if (this.channels) {
3963
+ await this.channels.send(inbound.channelType, inbound.channelId, {
3964
+ content: 'I need API keys to respond. Please configure them in the vault.',
3965
+ replyToId: inbound.id,
3966
+ });
3801
3967
  }
3802
- // Use executeWithTools for channels — collect final text for channel reply
3803
- const provider = this.providers.getPrimaryProvider();
3804
- // Inject model identity so the AI knows what it's running on
3805
- enrichedPrompt += this.buildModelIdentityFragment(provider);
3806
- this.agentStart(channelAgentId, 'channel', `Processing message on ${inbound.channelType}`, inbound.channelType);
3807
- // Draft streaming: edit message in place if adapter supports it
3808
- const adapter = this.channels?.getAdapter(inbound.channelType);
3809
- const supportsDraft = !!adapter?.editMessage;
3810
- let accumulatedText = '';
3811
- if (supportsDraft && this.channels) {
3812
- const channels = this.channels;
3813
- draftLoop = new DraftStreamLoop(async (text) => {
3814
- try {
3815
- if (!draftMessageId) {
3816
- const result = await channels.send(inbound.channelType, inbound.channelId, {
3817
- content: text,
3818
- replyToId: inbound.id,
3819
- });
3820
- if (result.success && result.messageId) {
3821
- draftMessageId = result.messageId;
3968
+ return;
3969
+ }
3970
+ // Get context messages channel sessions use a capped token budget and turn limit
3971
+ // to prevent excessively long API calls from models with huge context windows.
3972
+ const contextMessages = this.sessions.getContextMessages(session.id, this.getProviderMaxTokens(this.providers.getPrimaryProvider()), 4096, { isChannel: true });
3973
+ const chatMessages = sanitizeTranscript(contextMessages).map((m) => ({
3974
+ role: m.role,
3975
+ content: m.content,
3976
+ }));
3977
+ // Show typing indicator while generating response
3978
+ const stopTyping = this.channels
3979
+ ? await this.channels.startTyping(inbound.channelType, inbound.channelId)
3980
+ : () => { };
3981
+ const channelAgentId = `channel:${inbound.channelType}:${inbound.channelId}:${Date.now()}`;
3982
+ // 30-minute timeout for the entire LLM response cycle.
3983
+ // Agentic tool loops can take many rounds (up to 20), each requiring a full LLM
3984
+ // call (30-90s) + tool execution. A multi-file generation task easily takes 10-20 minutes.
3985
+ const CHANNEL_RESPONSE_TIMEOUT_MS = 1_800_000;
3986
+ let draftLoop = null;
3987
+ let draftMessageId = null;
3988
+ // Snapshot message count before agentic loop so we can rollback on failure
3989
+ let channelMessageSnapshot;
3990
+ try { // outer try — finally block guarantees stopTyping() runs
3991
+ try {
3992
+ // Get tool definitions from registry
3993
+ const tools = toolRegistry.toProviderFormat();
3994
+ // Build enriched prompt through pipeline
3995
+ let enrichedPrompt = this.systemPrompt;
3996
+ const channelChatId = `${inbound.channelType}:${inbound.channelId}`;
3997
+ let channelArchitectResult = { prompt: this.systemPrompt };
3998
+ // Reset Architect conversation state for new channel chats
3999
+ const useChannelArchitect = this.config.agent.personality === 'the-architect';
4000
+ if (useChannelArchitect && this.architect && !this.architectResetChats.has(channelChatId)) {
4001
+ this.architectResetChats.add(channelChatId);
4002
+ this.architect.resetConversation();
4003
+ audit('personality.reset', { sessionId: session.id, chatId: channelChatId });
4004
+ }
4005
+ if (this.enrichmentPipeline) {
4006
+ const enrichCtx = {
4007
+ basePrompt: this.systemPrompt,
4008
+ userMessage: messageContent,
4009
+ history: contextMessages,
4010
+ channelType: inbound.channelType,
4011
+ chatId: channelChatId,
4012
+ sessionId: session.id,
4013
+ userId: inbound.senderId ?? 'anonymous',
4014
+ toolsUsed: this.lastToolsUsed.get(session.id) ?? [],
4015
+ config: this.config,
4016
+ senderName: inbound.senderName,
4017
+ groupContext: inbound.groupContext,
4018
+ };
4019
+ const result = await this.enrichmentPipeline.run(enrichCtx);
4020
+ enrichedPrompt = result.prompt;
4021
+ channelArchitectResult = { prompt: enrichedPrompt, architectMeta: result.metadata.architect };
4022
+ }
4023
+ // Use executeWithTools for channels — collect final text for channel reply
4024
+ const provider = this.providers.getPrimaryProvider();
4025
+ // Inject model identity so the AI knows what it's running on
4026
+ enrichedPrompt += this.buildModelIdentityFragment(provider);
4027
+ this.agentStart(channelAgentId, 'channel', `Processing message on ${inbound.channelType}`, inbound.channelType);
4028
+ // Draft streaming: edit message in place if adapter supports it
4029
+ const adapter = this.channels?.getAdapter(inbound.channelType);
4030
+ const supportsDraft = !!adapter?.editMessage;
4031
+ let accumulatedText = '';
4032
+ if (supportsDraft && this.channels) {
4033
+ const channels = this.channels;
4034
+ draftLoop = new DraftStreamLoop(async (text) => {
4035
+ try {
4036
+ if (!draftMessageId) {
4037
+ const result = await channels.send(inbound.channelType, inbound.channelId, {
4038
+ content: text,
4039
+ replyToId: inbound.id,
4040
+ });
4041
+ if (result.success && result.messageId) {
4042
+ draftMessageId = result.messageId;
4043
+ }
4044
+ return result.success;
4045
+ }
4046
+ else {
4047
+ const result = await channels.editMessage(inbound.channelType, inbound.channelId, draftMessageId, { content: text });
4048
+ return result.success;
3822
4049
  }
3823
- return result.success;
3824
4050
  }
3825
- else {
3826
- const result = await channels.editMessage(inbound.channelType, inbound.channelId, draftMessageId, { content: text });
3827
- return result.success;
4051
+ catch {
4052
+ return false;
3828
4053
  }
4054
+ }, 1000);
4055
+ }
4056
+ const fallbackCandidates = this.providers.resolveFallbackCandidates();
4057
+ const channelToolsUsed = [];
4058
+ // Snapshot message count so we can rollback orphaned messages on timeout/error
4059
+ channelMessageSnapshot = this.sessions.getMessageCount(session.id);
4060
+ const { response: channelResponse, usage: channelUsage } = await Promise.race([
4061
+ this.executeWithTools(session.id, chatMessages, enrichedPrompt, provider, (type, data) => {
4062
+ if (type === 'text' && data && draftLoop) {
4063
+ accumulatedText += data;
4064
+ draftLoop.update(accumulatedText);
4065
+ }
4066
+ else if (type === 'tool_use') {
4067
+ channelToolsUsed.push({ name: data?.name ?? 'unknown', success: true });
4068
+ }
4069
+ else if (type === 'tool_result') {
4070
+ if (channelToolsUsed.length > 0 && data?.error) {
4071
+ channelToolsUsed[channelToolsUsed.length - 1].success = false;
4072
+ }
4073
+ }
4074
+ }, { tools, fallbackCandidates }),
4075
+ new Promise((_, reject) => setTimeout(() => reject(new Error('Response timed out — the task did not complete within 30 minutes. Try breaking it into smaller steps.')), CHANNEL_RESPONSE_TIMEOUT_MS)),
4076
+ ]);
4077
+ // Feed tool usage to awareness collector
4078
+ if (this.architectAwarenessCollector && channelToolsUsed.length > 0) {
4079
+ this.architectAwarenessCollector.updateToolContext(channelToolsUsed);
4080
+ }
4081
+ this.lastToolsUsed.set(session.id, channelToolsUsed);
4082
+ // Flush final draft text
4083
+ if (draftLoop) {
4084
+ if (channelResponse && channelResponse !== accumulatedText) {
4085
+ draftLoop.update(channelResponse);
3829
4086
  }
3830
- catch {
3831
- return false;
3832
- }
3833
- }, 1000);
3834
- }
3835
- const fallbackCandidates = this.providers.resolveFallbackCandidates();
3836
- const channelToolsUsed = [];
3837
- const { response: channelResponse, usage: channelUsage } = await Promise.race([
3838
- this.executeWithTools(session.id, chatMessages, enrichedPrompt, provider, (type, data) => {
3839
- if (type === 'text' && data && draftLoop) {
3840
- accumulatedText += data;
3841
- draftLoop.update(accumulatedText);
3842
- }
3843
- else if (type === 'tool_use') {
3844
- channelToolsUsed.push({ name: data?.name ?? 'unknown', success: true });
4087
+ await draftLoop.flush();
4088
+ draftLoop.stop();
4089
+ }
4090
+ // ── Guardrail output scan ─────────────────────────────────────
4091
+ const channelOutputScan = this.checkOutputGuardrails(channelResponse);
4092
+ const finalChannelResponse = channelOutputScan.response;
4093
+ if (channelOutputScan.wasModified) {
4094
+ audit('guardrail.triggered', {
4095
+ action: channelOutputScan.action,
4096
+ direction: 'output',
4097
+ channelType: inbound.channelType,
4098
+ sessionId: session.id,
4099
+ });
4100
+ // If draft streaming already sent partial text, do a final edit with clean version
4101
+ if (draftMessageId && adapter?.editMessage) {
4102
+ await adapter.editMessage(inbound.channelId, draftMessageId, { content: finalChannelResponse });
3845
4103
  }
3846
- else if (type === 'tool_result') {
3847
- if (channelToolsUsed.length > 0 && data?.error) {
3848
- channelToolsUsed[channelToolsUsed.length - 1].success = false;
3849
- }
4104
+ }
4105
+ // Save assistant message
4106
+ await this.sessions.addMessage(session.id, 'assistant', finalChannelResponse, {
4107
+ input: channelUsage.inputTokens,
4108
+ output: channelUsage.outputTokens,
4109
+ }, channelArchitectResult.architectMeta ? { architectDomain: channelArchitectResult.architectMeta.detectedContext.domain } : undefined);
4110
+ // Extract memories and learn from conversation (if auto-extract enabled)
4111
+ if (this.config.memory?.autoExtract !== false && this.memoryStore && finalChannelResponse && messageContent.length > 20) {
4112
+ void this.extractAndLearn(messageContent, finalChannelResponse, session.id);
4113
+ }
4114
+ // Send final response. The draft stream loop edits a single message,
4115
+ // but Discord silently truncates edits at 2000 chars. For long responses,
4116
+ // replace the draft with a chunked send so nothing is lost.
4117
+ const DRAFT_SAFE_LENGTH = 1900; // leave margin below Discord's 2000 char limit
4118
+ if (draftMessageId && this.channels && finalChannelResponse.length > DRAFT_SAFE_LENGTH) {
4119
+ // Draft only showed partial content — replace it with a pointer and send full chunked response
4120
+ if (adapter?.editMessage) {
4121
+ await adapter.editMessage(inbound.channelId, draftMessageId, {
4122
+ content: '*\u2026 (full response below)*',
4123
+ });
3850
4124
  }
3851
- }, { tools, fallbackCandidates }),
3852
- new Promise((_, reject) => setTimeout(() => reject(new Error('Response timed out — the AI provider did not respond within 4 minutes. Please try again.')), CHANNEL_RESPONSE_TIMEOUT_MS)),
3853
- ]);
3854
- // Feed tool usage to awareness collector
3855
- if (this.architectAwarenessCollector && channelToolsUsed.length > 0) {
3856
- this.architectAwarenessCollector.updateToolContext(channelToolsUsed);
3857
- }
3858
- this.lastToolsUsed.set(session.id, channelToolsUsed);
3859
- // Flush final draft text
3860
- if (draftLoop) {
3861
- if (channelResponse && channelResponse !== accumulatedText) {
3862
- draftLoop.update(channelResponse);
4125
+ await this.channels.send(inbound.channelType, inbound.channelId, {
4126
+ content: finalChannelResponse,
4127
+ });
3863
4128
  }
3864
- await draftLoop.flush();
3865
- draftLoop.stop();
3866
- }
3867
- // ── Guardrail output scan ─────────────────────────────────────
3868
- const channelOutputScan = this.checkOutputGuardrails(channelResponse);
3869
- const finalChannelResponse = channelOutputScan.response;
3870
- if (channelOutputScan.wasModified) {
3871
- audit('guardrail.triggered', {
3872
- action: channelOutputScan.action,
3873
- direction: 'output',
4129
+ else if (!draftMessageId && this.channels) {
4130
+ await this.channels.send(inbound.channelType, inbound.channelId, {
4131
+ content: finalChannelResponse,
4132
+ replyToId: inbound.id,
4133
+ });
4134
+ }
4135
+ audit('message.sent', {
3874
4136
  channelType: inbound.channelType,
3875
4137
  sessionId: session.id,
4138
+ inputTokens: channelUsage.inputTokens,
4139
+ outputTokens: channelUsage.outputTokens,
3876
4140
  });
3877
- // If draft streaming already sent partial text, do a final edit with clean version
3878
- if (draftMessageId && adapter?.editMessage) {
3879
- await adapter.editMessage(inbound.channelId, draftMessageId, { content: finalChannelResponse });
3880
- }
3881
- }
3882
- // Save assistant message
3883
- await this.sessions.addMessage(session.id, 'assistant', finalChannelResponse, {
3884
- input: channelUsage.inputTokens,
3885
- output: channelUsage.outputTokens,
3886
- }, channelArchitectResult.architectMeta ? { architectDomain: channelArchitectResult.architectMeta.detectedContext.domain } : undefined);
3887
- // Extract memories and learn from conversation (if auto-extract enabled)
3888
- if (this.config.memory?.autoExtract !== false && this.memoryStore && finalChannelResponse && messageContent.length > 20) {
3889
- void this.extractAndLearn(messageContent, finalChannelResponse, session.id);
4141
+ this.agentEnd(channelAgentId, true);
3890
4142
  }
3891
- // Send final response. The draft stream loop edits a single message,
3892
- // but Discord silently truncates edits at 2000 chars. For long responses,
3893
- // replace the draft with a chunked send so nothing is lost.
3894
- const DRAFT_SAFE_LENGTH = 1900; // leave margin below Discord's 2000 char limit
3895
- if (draftMessageId && this.channels && finalChannelResponse.length > DRAFT_SAFE_LENGTH) {
3896
- // Draft only showed partial content — replace it with a pointer and send full chunked response
3897
- if (adapter?.editMessage) {
3898
- await adapter.editMessage(inbound.channelId, draftMessageId, {
3899
- content: '*\u2026 (full response below)*',
3900
- });
4143
+ catch (error) {
4144
+ if (draftLoop)
4145
+ draftLoop.stop();
4146
+ this.agentEnd(channelAgentId, false);
4147
+ const errorMessage = error instanceof Error ? error.message : 'Unknown error';
4148
+ audit('channel.error', { sessionId: session.id, error: errorMessage });
4149
+ // Rollback orphaned messages from interrupted agentic tool loops.
4150
+ // This is critical for channel messages where timeouts are common (30-min limit).
4151
+ if (typeof channelMessageSnapshot === 'number') {
4152
+ const rolled = this.sessions.rollbackMessages(session.id, channelMessageSnapshot);
4153
+ if (rolled > 0) {
4154
+ this.logger.info('Rolled back orphaned channel messages from interrupted tool loop', {
4155
+ sessionId: session.id,
4156
+ channelType: inbound.channelType,
4157
+ rolledBack: rolled,
4158
+ });
4159
+ }
3901
4160
  }
3902
- await this.channels.send(inbound.channelType, inbound.channelId, {
3903
- content: finalChannelResponse,
3904
- });
3905
- }
3906
- else if (!draftMessageId && this.channels) {
3907
- await this.channels.send(inbound.channelType, inbound.channelId, {
3908
- content: finalChannelResponse,
3909
- replyToId: inbound.id,
3910
- });
3911
- }
3912
- audit('message.sent', {
3913
- channelType: inbound.channelType,
3914
- sessionId: session.id,
3915
- inputTokens: channelUsage.inputTokens,
3916
- outputTokens: channelUsage.outputTokens,
3917
- });
3918
- this.agentEnd(channelAgentId, true);
3919
- }
3920
- catch (error) {
3921
- if (draftLoop)
3922
- draftLoop.stop();
3923
- this.agentEnd(channelAgentId, false);
3924
- const errorMessage = error instanceof Error ? error.message : 'Unknown error';
3925
- audit('channel.error', { sessionId: session.id, error: errorMessage });
3926
- if (this.channels) {
3927
- const errorContent = `Error: ${errorMessage}`;
3928
- // If a draft message exists, edit it with the error instead of sending a new one
3929
- if (draftMessageId) {
3930
- try {
3931
- await this.channels.editMessage(inbound.channelType, inbound.channelId, draftMessageId, { content: errorContent });
4161
+ if (this.channels) {
4162
+ const errorContent = `Error: ${errorMessage}`;
4163
+ // If a draft message exists, edit it with the error instead of sending a new one
4164
+ if (draftMessageId) {
4165
+ try {
4166
+ await this.channels.editMessage(inbound.channelType, inbound.channelId, draftMessageId, { content: errorContent });
4167
+ }
4168
+ catch {
4169
+ // Edit failed — fall back to new message
4170
+ await this.channels.send(inbound.channelType, inbound.channelId, {
4171
+ content: errorContent,
4172
+ replyToId: inbound.id,
4173
+ });
4174
+ }
3932
4175
  }
3933
- catch {
3934
- // Edit failed — fall back to new message
4176
+ else {
3935
4177
  await this.channels.send(inbound.channelType, inbound.channelId, {
3936
4178
  content: errorContent,
3937
4179
  replyToId: inbound.id,
3938
4180
  });
3939
4181
  }
3940
4182
  }
3941
- else {
3942
- await this.channels.send(inbound.channelType, inbound.channelId, {
3943
- content: errorContent,
3944
- replyToId: inbound.id,
3945
- });
3946
- }
3947
4183
  }
3948
4184
  }
4185
+ finally {
4186
+ stopTyping();
4187
+ }
3949
4188
  }
3950
4189
  finally {
3951
- stopTyping();
4190
+ try {
4191
+ await this.drainSessionQueue(session.id);
4192
+ }
4193
+ finally {
4194
+ this.releaseSessionRun(session.id);
4195
+ }
3952
4196
  }
3953
4197
  }); // end runWithRequestId
3954
4198
  }
@@ -4256,6 +4500,7 @@ export class Auxiora {
4256
4500
  catch { /* best-effort — don't block shutdown */ }
4257
4501
  }
4258
4502
  this.consciousness?.shutdown();
4503
+ this.sessionRunStates.clear();
4259
4504
  this.sessions.destroy();
4260
4505
  this.vault.lock();
4261
4506
  this.running = false;