@auxiora/runtime 1.10.16 → 1.10.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -250,6 +250,8 @@ export class Auxiora {
250
250
  activeAgents = new Map();
251
251
  channelTargetsPath = path.join(path.dirname(getBehaviorsPath()), 'channel-targets.json');
252
252
  orchestrationHistory = [];
253
+ /** Per-session run state for message queueing. */
254
+ sessionRunStates = new Map();
253
255
  async initialize(options = {}) {
254
256
  // Read version from package.json
255
257
  try {
@@ -2572,6 +2574,8 @@ export class Auxiora {
2572
2574
  }
2573
2575
  // Append tool usage guidance
2574
2576
  this.standardPrompt += '\n\n---\n\n## Tool Usage\n'
2577
+ + '- IMPORTANT: When the user asks you to create files, generate projects, write code, or perform actions on the filesystem, you MUST use the `bash` and `file_write` tools to actually do the work. Do not just describe what you would do — execute it.\n'
2578
+ + '- Use `bash` to run shell commands (mkdir, npm init, git init, etc.) and `file_write` to create files with content.\n'
2575
2579
  + '- For reading web pages, searching, fetching articles, or looking up information, use the `web_browser` tool. It is fast, lightweight, and always available.\n'
2576
2580
  + '- Only use `browser_navigate` and other browser_* tools when you need JavaScript rendering or interactive features (clicking buttons, filling forms, taking screenshots).\n'
2577
2581
  + '- Never expose raw tool errors to the user. If a tool fails, explain the situation naturally.';
@@ -2871,269 +2875,313 @@ export class Auxiora {
2871
2875
  senderId: client.senderId,
2872
2876
  });
2873
2877
  }
2874
- // Apply redaction if guardrails flagged PII
2875
- let processedContent = content;
2876
- if (inputScan?.action === 'redact' && inputScan.redactedContent) {
2877
- processedContent = inputScan.redactedContent;
2878
- audit('guardrail.triggered', {
2879
- action: 'redact',
2880
- direction: 'input',
2881
- threatCount: inputScan.threats.length,
2882
- channelType: 'webchat',
2883
- sessionId: session.id,
2884
- });
2885
- }
2886
- else if (inputScan?.action === 'warn') {
2887
- audit('guardrail.triggered', {
2888
- action: 'warn',
2889
- direction: 'input',
2890
- threatCount: inputScan.threats.length,
2891
- channelType: 'webchat',
2892
- sessionId: session.id,
2878
+ // ── Message queue gate ─────────────────────────────────────────
2879
+ if (!this.acquireSessionRun(session.id)) {
2880
+ this.enqueueMessage(session.id, {
2881
+ content: payload?.content ?? '',
2882
+ enqueuedAt: Date.now(),
2883
+ client,
2884
+ requestId,
2885
+ chatId: payload?.chatId,
2886
+ modelOverride: payload?.model,
2887
+ providerOverride: payload?.provider,
2893
2888
  });
2894
- }
2895
- // Add user message
2896
- await this.sessions.addMessage(session.id, 'user', processedContent);
2897
- // Check if providers are available
2898
- if (!this.providers) {
2899
2889
  this.sendToClient(client, {
2900
- type: 'message',
2901
- id: requestId,
2902
- payload: {
2903
- role: 'assistant',
2904
- content: 'I need API keys to respond. Please add them:\n\n```\nauxiora vault add ANTHROPIC_API_KEY\n```',
2905
- },
2890
+ type: 'queued',
2891
+ requestId,
2892
+ position: this.getSessionRunState(session.id).queue.length,
2906
2893
  });
2907
2894
  return;
2908
2895
  }
2909
- // Get context messages
2910
- const contextMessages = this.sessions.getContextMessages(session.id, this.getProviderMaxTokens(this.providers.getPrimaryProvider()), 4096);
2911
- const chatMessages = sanitizeTranscript(contextMessages).map((m) => ({
2912
- role: m.role,
2913
- content: m.content,
2914
- }));
2915
2896
  try {
2916
- // Get tool definitions from registry
2917
- const tools = toolRegistry.toProviderFormat();
2918
- // Resolve per-chat personality (metadata overrides global default)
2919
- const chatRecord = chatId ? this.sessions.getChat(chatId) : undefined;
2920
- const chatPersonality = chatRecord?.metadata?.personality;
2921
- const useArchitect = chatPersonality
2922
- ? chatPersonality === 'the-architect'
2923
- : this.config.agent.personality === 'the-architect';
2924
- const basePrompt = useArchitect ? this.architectPrompt : this.standardPrompt;
2925
- // Build enriched prompt through pipeline
2926
- let enrichedPrompt = basePrompt;
2927
- let architectResult = { prompt: basePrompt };
2928
- // Reset Architect conversation state for new chats
2929
- if (useArchitect && this.architect && chatId && !this.architectResetChats.has(chatId)) {
2930
- this.architectResetChats.add(chatId);
2931
- this.architect.resetConversation();
2932
- audit('personality.reset', { sessionId: session.id, chatId });
2933
- }
2934
- if (this.enrichmentPipeline) {
2935
- const enrichCtx = {
2936
- basePrompt,
2937
- userMessage: processedContent,
2938
- history: contextMessages,
2897
+ // Apply redaction if guardrails flagged PII
2898
+ let processedContent = content;
2899
+ if (inputScan?.action === 'redact' && inputScan.redactedContent) {
2900
+ processedContent = inputScan.redactedContent;
2901
+ audit('guardrail.triggered', {
2902
+ action: 'redact',
2903
+ direction: 'input',
2904
+ threatCount: inputScan.threats.length,
2939
2905
  channelType: 'webchat',
2940
- chatId: chatId ?? session.id,
2941
2906
  sessionId: session.id,
2942
- userId: client.senderId ?? 'anonymous',
2943
- toolsUsed: this.lastToolsUsed.get(session.id) ?? [],
2944
- config: this.config,
2945
- };
2946
- const result = await this.enrichmentPipeline.run(enrichCtx);
2947
- enrichedPrompt = result.prompt;
2948
- architectResult = { prompt: enrichedPrompt, architectMeta: result.metadata.architect };
2949
- }
2950
- // Route to best model for this message
2951
- let provider;
2952
- let routingResult;
2953
- if (providerOverride || modelOverride) {
2954
- // Manual override — skip router
2955
- provider = this.providers.getProvider(providerOverride || this.config.provider.primary);
2956
- }
2957
- else if (this.modelRouter && this.config.routing?.enabled !== false) {
2958
- try {
2959
- routingResult = this.modelRouter.route(processedContent, { hasImages: false });
2960
- provider = this.providers.getProvider(routingResult.selection.provider);
2961
- }
2962
- catch {
2963
- provider = this.providers.getPrimaryProvider();
2964
- }
2907
+ });
2965
2908
  }
2966
- else {
2967
- provider = this.providers.getPrimaryProvider();
2968
- }
2969
- // Inject model identity so the AI knows what it's running on
2970
- enrichedPrompt += this.buildModelIdentityFragment(provider, routingResult?.selection.model ?? modelOverride);
2971
- // Execute streaming AI call with tool follow-up loop
2972
- const processingStartTime = Date.now();
2973
- const fallbackCandidates = this.providers.resolveFallbackCandidates();
2974
- const toolsUsed = [];
2975
- let streamChunkCount = 0;
2976
- const { response: fullResponse, usage } = await this.executeWithTools(session.id, chatMessages, enrichedPrompt, provider, (type, data) => {
2977
- if (type === 'text') {
2978
- streamChunkCount++;
2979
- this.sendToClient(client, { type: 'chunk', id: requestId, payload: { content: data } });
2980
- }
2981
- else if (type === 'thinking') {
2982
- this.sendToClient(client, { type: 'thinking', id: requestId, payload: { content: data } });
2983
- }
2984
- else if (type === 'tool_use') {
2985
- toolsUsed.push({ name: data?.name ?? 'unknown', success: true });
2986
- this.sendToClient(client, { type: 'tool_use', id: requestId, payload: data });
2987
- }
2988
- else if (type === 'tool_result') {
2989
- // Update last tool's success based on result
2990
- if (toolsUsed.length > 0 && data?.error) {
2991
- toolsUsed[toolsUsed.length - 1].success = false;
2992
- }
2993
- this.sendToClient(client, { type: 'tool_result', id: requestId, payload: data });
2994
- }
2995
- else if (type === 'status') {
2996
- this.sendToClient(client, { type: 'status', id: requestId, payload: data });
2997
- }
2998
- }, { tools, fallbackCandidates });
2999
- // Feed tool usage to awareness collector
3000
- if (this.architectAwarenessCollector && toolsUsed.length > 0) {
3001
- this.architectAwarenessCollector.updateToolContext(toolsUsed);
3002
- }
3003
- // Store tools for next turn's enrichment context
3004
- this.lastToolsUsed.set(session.id, toolsUsed);
3005
- // ── Guardrail output scan ─────────────────────────────────────
3006
- const outputScan = this.checkOutputGuardrails(fullResponse);
3007
- const finalResponse = outputScan.response;
3008
- if (outputScan.wasModified) {
2909
+ else if (inputScan?.action === 'warn') {
3009
2910
  audit('guardrail.triggered', {
3010
- action: outputScan.action,
3011
- direction: 'output',
2911
+ action: 'warn',
2912
+ direction: 'input',
2913
+ threatCount: inputScan.threats.length,
3012
2914
  channelType: 'webchat',
3013
2915
  sessionId: session.id,
3014
2916
  });
3015
- // Send correction since chunks were already streamed
2917
+ }
2918
+ // Add user message
2919
+ await this.sessions.addMessage(session.id, 'user', processedContent);
2920
+ // Check if providers are available
2921
+ if (!this.providers) {
3016
2922
  this.sendToClient(client, {
3017
- type: 'guardrail_correction',
2923
+ type: 'message',
3018
2924
  id: requestId,
3019
- payload: { content: finalResponse },
2925
+ payload: {
2926
+ role: 'assistant',
2927
+ content: 'I need API keys to respond. Please add them:\n\n```\nauxiora vault add ANTHROPIC_API_KEY\n```',
2928
+ },
3020
2929
  });
2930
+ return;
3021
2931
  }
3022
- // Collect transparency metadata (best-effort)
3023
- let transparencyMeta;
2932
+ // Get context messages
2933
+ const contextMessages = this.sessions.getContextMessages(session.id, this.getProviderMaxTokens(this.providers.getPrimaryProvider()), 4096);
2934
+ const chatMessages = sanitizeTranscript(contextMessages).map((m) => ({
2935
+ role: m.role,
2936
+ content: m.content,
2937
+ }));
2938
+ // Snapshot message count before agentic loop so we can rollback on failure
2939
+ let messageCountSnapshot;
3024
2940
  try {
3025
- const modelId = routingResult?.selection.model ?? modelOverride ?? provider.defaultModel;
3026
- const caps = provider.metadata.models[modelId];
3027
- if (caps) {
3028
- transparencyMeta = collectTransparencyMeta({
3029
- enrichment: this.enrichmentPipeline
3030
- ? { prompt: enrichedPrompt, metadata: { architect: architectResult.architectMeta, stages: architectResult.stages ?? [] } }
3031
- : { prompt: enrichedPrompt, metadata: { stages: [] } },
3032
- completion: { content: finalResponse, usage, model: modelId, finishReason: 'stop', toolUse: toolsUsed.map(t => ({ name: t.name })) },
3033
- capabilities: { costPer1kInput: caps.costPer1kInput, costPer1kOutput: caps.costPer1kOutput },
3034
- providerName: provider.name,
3035
- awarenessSignals: [],
3036
- responseText: finalResponse,
3037
- processingStartTime,
2941
+ // Get tool definitions from registry
2942
+ const tools = toolRegistry.toProviderFormat();
2943
+ // Resolve per-chat personality (metadata overrides global default)
2944
+ const chatRecord = chatId ? this.sessions.getChat(chatId) : undefined;
2945
+ const chatPersonality = chatRecord?.metadata?.personality;
2946
+ const useArchitect = chatPersonality
2947
+ ? chatPersonality === 'the-architect'
2948
+ : this.config.agent.personality === 'the-architect';
2949
+ const basePrompt = useArchitect ? this.architectPrompt : this.standardPrompt;
2950
+ // Build enriched prompt through pipeline
2951
+ let enrichedPrompt = basePrompt;
2952
+ let architectResult = { prompt: basePrompt };
2953
+ // Reset Architect conversation state for new chats
2954
+ if (useArchitect && this.architect && chatId && !this.architectResetChats.has(chatId)) {
2955
+ this.architectResetChats.add(chatId);
2956
+ this.architect.resetConversation();
2957
+ audit('personality.reset', { sessionId: session.id, chatId });
2958
+ }
2959
+ if (this.enrichmentPipeline) {
2960
+ const enrichCtx = {
2961
+ basePrompt,
2962
+ userMessage: processedContent,
2963
+ history: contextMessages,
2964
+ channelType: 'webchat',
2965
+ chatId: chatId ?? session.id,
2966
+ sessionId: session.id,
2967
+ userId: client.senderId ?? 'anonymous',
2968
+ toolsUsed: this.lastToolsUsed.get(session.id) ?? [],
2969
+ config: this.config,
2970
+ };
2971
+ const result = await this.enrichmentPipeline.run(enrichCtx);
2972
+ enrichedPrompt = result.prompt;
2973
+ architectResult = { prompt: enrichedPrompt, architectMeta: result.metadata.architect };
2974
+ }
2975
+ // Route to best model for this message
2976
+ let provider;
2977
+ let routingResult;
2978
+ if (providerOverride || modelOverride) {
2979
+ // Manual override — skip router
2980
+ provider = this.providers.getProvider(providerOverride || this.config.provider.primary);
2981
+ }
2982
+ else if (this.modelRouter && this.config.routing?.enabled !== false) {
2983
+ try {
2984
+ routingResult = this.modelRouter.route(processedContent, { hasImages: false });
2985
+ provider = this.providers.getProvider(routingResult.selection.provider);
2986
+ }
2987
+ catch {
2988
+ provider = this.providers.getPrimaryProvider();
2989
+ }
2990
+ }
2991
+ else {
2992
+ provider = this.providers.getPrimaryProvider();
2993
+ }
2994
+ // Inject model identity so the AI knows what it's running on
2995
+ enrichedPrompt += this.buildModelIdentityFragment(provider, routingResult?.selection.model ?? modelOverride);
2996
+ // Execute streaming AI call with tool follow-up loop
2997
+ const processingStartTime = Date.now();
2998
+ const fallbackCandidates = this.providers.resolveFallbackCandidates();
2999
+ const toolsUsed = [];
3000
+ let streamChunkCount = 0;
3001
+ // Snapshot message count so we can rollback orphaned messages if the loop fails
3002
+ messageCountSnapshot = this.sessions.getMessageCount(session.id);
3003
+ const { response: fullResponse, usage } = await this.executeWithTools(session.id, chatMessages, enrichedPrompt, provider, (type, data) => {
3004
+ if (type === 'text') {
3005
+ streamChunkCount++;
3006
+ this.sendToClient(client, { type: 'chunk', id: requestId, payload: { content: data } });
3007
+ }
3008
+ else if (type === 'thinking') {
3009
+ this.sendToClient(client, { type: 'thinking', id: requestId, payload: { content: data } });
3010
+ }
3011
+ else if (type === 'tool_use') {
3012
+ toolsUsed.push({ name: data?.name ?? 'unknown', success: true });
3013
+ this.sendToClient(client, { type: 'tool_use', id: requestId, payload: data });
3014
+ }
3015
+ else if (type === 'tool_result') {
3016
+ // Update last tool's success based on result
3017
+ if (toolsUsed.length > 0 && data?.error) {
3018
+ toolsUsed[toolsUsed.length - 1].success = false;
3019
+ }
3020
+ this.sendToClient(client, { type: 'tool_result', id: requestId, payload: data });
3021
+ }
3022
+ else if (type === 'status') {
3023
+ this.sendToClient(client, { type: 'status', id: requestId, payload: data });
3024
+ }
3025
+ }, { tools, fallbackCandidates });
3026
+ // Feed tool usage to awareness collector
3027
+ if (this.architectAwarenessCollector && toolsUsed.length > 0) {
3028
+ this.architectAwarenessCollector.updateToolContext(toolsUsed);
3029
+ }
3030
+ // Store tools for next turn's enrichment context
3031
+ this.lastToolsUsed.set(session.id, toolsUsed);
3032
+ // ── Guardrail output scan ─────────────────────────────────────
3033
+ const outputScan = this.checkOutputGuardrails(fullResponse);
3034
+ const finalResponse = outputScan.response;
3035
+ if (outputScan.wasModified) {
3036
+ audit('guardrail.triggered', {
3037
+ action: outputScan.action,
3038
+ direction: 'output',
3039
+ channelType: 'webchat',
3040
+ sessionId: session.id,
3041
+ });
3042
+ // Send correction since chunks were already streamed
3043
+ this.sendToClient(client, {
3044
+ type: 'guardrail_correction',
3045
+ id: requestId,
3046
+ payload: { content: finalResponse },
3038
3047
  });
3039
3048
  }
3040
- }
3041
- catch {
3042
- // Transparency is best-effort — never block message delivery
3043
- }
3044
- // Save assistant message (skip if empty — happens when response is tool-only)
3045
- if (finalResponse) {
3046
- await this.sessions.addMessage(session.id, 'assistant', finalResponse, {
3047
- input: usage.inputTokens,
3048
- output: usage.outputTokens,
3049
- }, {
3050
- ...(architectResult.architectMeta ? { architectDomain: architectResult.architectMeta.detectedContext.domain } : {}),
3051
- ...(transparencyMeta ? { transparency: transparencyMeta } : {}),
3049
+ // Collect transparency metadata (best-effort)
3050
+ let transparencyMeta;
3051
+ try {
3052
+ const modelId = routingResult?.selection.model ?? modelOverride ?? provider.defaultModel;
3053
+ const caps = provider.metadata.models[modelId];
3054
+ if (caps) {
3055
+ transparencyMeta = collectTransparencyMeta({
3056
+ enrichment: this.enrichmentPipeline
3057
+ ? { prompt: enrichedPrompt, metadata: { architect: architectResult.architectMeta, stages: architectResult.stages ?? [] } }
3058
+ : { prompt: enrichedPrompt, metadata: { stages: [] } },
3059
+ completion: { content: finalResponse, usage, model: modelId, finishReason: 'stop', toolUse: toolsUsed.map(t => ({ name: t.name })) },
3060
+ capabilities: { costPer1kInput: caps.costPer1kInput, costPer1kOutput: caps.costPer1kOutput },
3061
+ providerName: provider.name,
3062
+ awarenessSignals: [],
3063
+ responseText: finalResponse,
3064
+ processingStartTime,
3065
+ });
3066
+ }
3067
+ }
3068
+ catch {
3069
+ // Transparency is best-effort — never block message delivery
3070
+ }
3071
+ // Save assistant message (skip if empty — happens when response is tool-only)
3072
+ if (finalResponse) {
3073
+ await this.sessions.addMessage(session.id, 'assistant', finalResponse, {
3074
+ input: usage.inputTokens,
3075
+ output: usage.outputTokens,
3076
+ }, {
3077
+ ...(architectResult.architectMeta ? { architectDomain: architectResult.architectMeta.detectedContext.domain } : {}),
3078
+ ...(transparencyMeta ? { transparency: transparencyMeta } : {}),
3079
+ });
3080
+ }
3081
+ // Record usage for cost tracking
3082
+ if (this.modelRouter && routingResult) {
3083
+ this.modelRouter.recordUsage(routingResult.selection.provider, routingResult.selection.model, usage.inputTokens, usage.outputTokens);
3084
+ }
3085
+ // Extract memories and learn from conversation (if auto-extract enabled)
3086
+ if (this.config.memory?.autoExtract !== false && this.memoryStore && finalResponse && processedContent.length > 20) {
3087
+ void this.extractAndLearn(processedContent, finalResponse, session.id);
3088
+ }
3089
+ // Auto-title webchat chats after first exchange
3090
+ if (finalResponse &&
3091
+ session.metadata.channelType === 'webchat' &&
3092
+ session.messages.length <= 3) {
3093
+ void this.generateChatTitle(session.id, processedContent, finalResponse, client);
3094
+ }
3095
+ // Send done signal
3096
+ this.sendToClient(client, {
3097
+ type: 'done',
3098
+ id: requestId,
3099
+ payload: {
3100
+ usage,
3101
+ routing: routingResult ? {
3102
+ model: routingResult.selection.model,
3103
+ provider: routingResult.selection.provider,
3104
+ isLocal: routingResult.selection.isLocal,
3105
+ taskType: routingResult.classification.type,
3106
+ } : (providerOverride || modelOverride) ? {
3107
+ model: modelOverride,
3108
+ provider: providerOverride || this.config.provider.primary,
3109
+ override: true,
3110
+ } : undefined,
3111
+ architect: architectResult.architectMeta,
3112
+ transparency: transparencyMeta,
3113
+ },
3114
+ });
3115
+ // Background self-awareness analysis
3116
+ if (this.selfAwarenessAssembler) {
3117
+ this.selfAwarenessAssembler.afterResponse({
3118
+ userId: client.senderId ?? 'anonymous',
3119
+ sessionId: session.id,
3120
+ chatId: chatId ?? session.id,
3121
+ currentMessage: processedContent,
3122
+ recentMessages: contextMessages,
3123
+ response: finalResponse,
3124
+ responseTime: Date.now() - (session.metadata.lastActiveAt ?? Date.now()),
3125
+ tokensUsed: { input: usage?.inputTokens ?? 0, output: usage?.outputTokens ?? 0 },
3126
+ streamChunks: streamChunkCount,
3127
+ }).catch(() => { });
3128
+ }
3129
+ // Record conversation in consciousness journal
3130
+ if (this.consciousness) {
3131
+ const journalBase = {
3132
+ sessionId: session.id,
3133
+ type: 'message',
3134
+ context: {
3135
+ domains: architectResult.architectMeta
3136
+ ? [architectResult.architectMeta.detectedContext.domain]
3137
+ : ['general'],
3138
+ },
3139
+ selfState: {
3140
+ health: (this.healthMonitor?.getHealthState().overall === 'unhealthy' ? 'degraded' : this.healthMonitor?.getHealthState().overall ?? 'healthy'),
3141
+ activeProviders: [this.config.provider.primary],
3142
+ uptime: Math.round(process.uptime()),
3143
+ },
3144
+ };
3145
+ this.consciousness.journal.record({ ...journalBase, message: { role: 'user', content: processedContent } }).catch(() => { });
3146
+ this.consciousness.journal.record({ ...journalBase, message: { role: 'assistant', content: finalResponse } }).catch(() => { });
3147
+ }
3148
+ audit('message.sent', {
3149
+ sessionId: session.id,
3150
+ inputTokens: usage.inputTokens,
3151
+ outputTokens: usage.outputTokens,
3152
+ model: routingResult?.selection.model,
3153
+ provider: routingResult?.selection.provider,
3052
3154
  });
3053
3155
  }
3054
- // Record usage for cost tracking
3055
- if (this.modelRouter && routingResult) {
3056
- this.modelRouter.recordUsage(routingResult.selection.provider, routingResult.selection.model, usage.inputTokens, usage.outputTokens);
3156
+ catch (error) {
3157
+ const errorMessage = error instanceof Error ? error.message : 'Unknown error';
3158
+ audit('channel.error', { sessionId: session.id, error: errorMessage });
3159
+ // Rollback orphaned messages from interrupted agentic tool loops.
3160
+ // executeWithTools saves intermediate messages (tool announces + tool results)
3161
+ // incrementally — if it throws, those partial messages pollute the next request.
3162
+ if (typeof messageCountSnapshot === 'number') {
3163
+ const rolled = this.sessions.rollbackMessages(session.id, messageCountSnapshot);
3164
+ if (rolled > 0) {
3165
+ this.logger.info('Rolled back orphaned messages from interrupted tool loop', {
3166
+ sessionId: session.id,
3167
+ rolledBack: rolled,
3168
+ });
3169
+ }
3170
+ }
3171
+ this.sendToClient(client, {
3172
+ type: 'error',
3173
+ id: requestId,
3174
+ payload: { message: `Error: ${errorMessage}` },
3175
+ });
3057
3176
  }
3058
- // Extract memories and learn from conversation (if auto-extract enabled)
3059
- if (this.config.memory?.autoExtract !== false && this.memoryStore && finalResponse && processedContent.length > 20) {
3060
- void this.extractAndLearn(processedContent, finalResponse, session.id);
3177
+ }
3178
+ finally {
3179
+ try {
3180
+ await this.drainSessionQueue(session.id);
3061
3181
  }
3062
- // Auto-title webchat chats after first exchange
3063
- if (finalResponse &&
3064
- session.metadata.channelType === 'webchat' &&
3065
- session.messages.length <= 3) {
3066
- void this.generateChatTitle(session.id, processedContent, finalResponse, client);
3182
+ finally {
3183
+ this.releaseSessionRun(session.id);
3067
3184
  }
3068
- // Send done signal
3069
- this.sendToClient(client, {
3070
- type: 'done',
3071
- id: requestId,
3072
- payload: {
3073
- usage,
3074
- routing: routingResult ? {
3075
- model: routingResult.selection.model,
3076
- provider: routingResult.selection.provider,
3077
- isLocal: routingResult.selection.isLocal,
3078
- taskType: routingResult.classification.type,
3079
- } : (providerOverride || modelOverride) ? {
3080
- model: modelOverride,
3081
- provider: providerOverride || this.config.provider.primary,
3082
- override: true,
3083
- } : undefined,
3084
- architect: architectResult.architectMeta,
3085
- transparency: transparencyMeta,
3086
- },
3087
- });
3088
- // Background self-awareness analysis
3089
- if (this.selfAwarenessAssembler) {
3090
- this.selfAwarenessAssembler.afterResponse({
3091
- userId: client.senderId ?? 'anonymous',
3092
- sessionId: session.id,
3093
- chatId: chatId ?? session.id,
3094
- currentMessage: processedContent,
3095
- recentMessages: contextMessages,
3096
- response: finalResponse,
3097
- responseTime: Date.now() - (session.metadata.lastActiveAt ?? Date.now()),
3098
- tokensUsed: { input: usage?.inputTokens ?? 0, output: usage?.outputTokens ?? 0 },
3099
- streamChunks: streamChunkCount,
3100
- }).catch(() => { });
3101
- }
3102
- // Record conversation in consciousness journal
3103
- if (this.consciousness) {
3104
- const journalBase = {
3105
- sessionId: session.id,
3106
- type: 'message',
3107
- context: {
3108
- domains: architectResult.architectMeta
3109
- ? [architectResult.architectMeta.detectedContext.domain]
3110
- : ['general'],
3111
- },
3112
- selfState: {
3113
- health: (this.healthMonitor?.getHealthState().overall === 'unhealthy' ? 'degraded' : this.healthMonitor?.getHealthState().overall ?? 'healthy'),
3114
- activeProviders: [this.config.provider.primary],
3115
- uptime: Math.round(process.uptime()),
3116
- },
3117
- };
3118
- this.consciousness.journal.record({ ...journalBase, message: { role: 'user', content: processedContent } }).catch(() => { });
3119
- this.consciousness.journal.record({ ...journalBase, message: { role: 'assistant', content: finalResponse } }).catch(() => { });
3120
- }
3121
- audit('message.sent', {
3122
- sessionId: session.id,
3123
- inputTokens: usage.inputTokens,
3124
- outputTokens: usage.outputTokens,
3125
- model: routingResult?.selection.model,
3126
- provider: routingResult?.selection.provider,
3127
- });
3128
- }
3129
- catch (error) {
3130
- const errorMessage = error instanceof Error ? error.message : 'Unknown error';
3131
- audit('channel.error', { sessionId: session.id, error: errorMessage });
3132
- this.sendToClient(client, {
3133
- type: 'error',
3134
- id: requestId,
3135
- payload: { message: `Error: ${errorMessage}` },
3136
- });
3137
3185
  }
3138
3186
  }
3139
3187
  async generateChatTitle(chatId, userMessage, assistantResponse, client) {
@@ -3294,22 +3342,35 @@ export class Auxiora {
3294
3342
  * for synthesis, looping up to maxToolRounds times.
3295
3343
  */
3296
3344
  async executeWithTools(sessionId, messages, enrichedPrompt, provider, onChunk, options) {
3297
- const maxRounds = options?.maxToolRounds ?? 10;
3345
+ const maxRounds = options?.maxToolRounds ?? 20;
3298
3346
  const maxContinuations = 3; // Safety cap for auto-continue on truncation
3299
3347
  const tools = options?.tools ?? toolRegistry.toProviderFormat();
3300
3348
  let currentMessages = [...messages];
3301
3349
  let totalUsage = { inputTokens: 0, outputTokens: 0 };
3302
3350
  let fullResponse = '';
3303
3351
  let lastRoundHadTools = false;
3304
- const loopState = createLoopDetectionState();
3352
+ const loopState = createLoopDetectionState({
3353
+ genericRepeatWarn: 3, // Catch loops faster (default 5)
3354
+ genericRepeatCritical: 8, // Stop after blocked attempts too (default 10)
3355
+ noProgressWarn: 4, // Detect identical results sooner (default 8)
3356
+ noProgressCritical: 8, // Hard stop (default 15)
3357
+ });
3358
+ // Track tools to temporarily exclude when loop is detected
3359
+ const excludedToolNames = new Set();
3360
+ // Track file paths written to detect same-file rewrites
3361
+ const writtenFiles = new Map(); // path → count
3305
3362
  for (let round = 0; round < maxRounds; round++) {
3306
3363
  let roundResponse = '';
3307
3364
  let roundUsage = { inputTokens: 0, outputTokens: 0 };
3308
3365
  let roundFinishReason = '';
3309
3366
  const toolUses = [];
3367
+ // Filter out excluded tools (loop breaker)
3368
+ const roundTools = excludedToolNames.size > 0
3369
+ ? tools.filter(t => !excludedToolNames.has(t.name))
3370
+ : tools;
3310
3371
  const streamOptions = {
3311
3372
  systemPrompt: enrichedPrompt,
3312
- tools: tools.length > 0 ? tools : undefined,
3373
+ tools: roundTools.length > 0 ? roundTools : undefined,
3313
3374
  passThroughAllTools: true,
3314
3375
  };
3315
3376
  const candidates = options?.fallbackCandidates ?? [
@@ -3337,9 +3398,25 @@ export class Auxiora {
3337
3398
  }
3338
3399
  totalUsage.inputTokens += roundUsage.inputTokens;
3339
3400
  totalUsage.outputTokens += roundUsage.outputTokens;
3340
- // No tool calls — check if response was truncated
3401
+ // No tool calls — check if we should nudge the model to continue
3341
3402
  if (toolUses.length === 0) {
3342
3403
  fullResponse += roundResponse;
3404
+ // Nudge the model to use tools if it only output text.
3405
+ // Case 1 (round 0): Model described a plan but never called tools.
3406
+ // The user likely asked for an action, so give the model one more
3407
+ // chance by injecting a "please use tools" follow-up.
3408
+ // Case 2 (round > 0, lastRoundHadTools): Model used tools last round
3409
+ // but emitted a short text-only response this round (stalled).
3410
+ const shouldNudge = round < maxRounds - 1 && ((round === 0) ||
3411
+ (lastRoundHadTools && roundResponse.length < 200));
3412
+ if (shouldNudge) {
3413
+ this.logger.info('Model stated intent without tool calls, nudging to continue', { round, responseLength: roundResponse.length });
3414
+ currentMessages.push({ role: 'assistant', content: roundResponse });
3415
+ currentMessages.push({ role: 'user', content: 'Please proceed — use the bash and file_write tools to do the work now. Do not describe what you will do — actually call the tools.' });
3416
+ onChunk('status', { message: 'Continuing...' });
3417
+ // Don't break — let the loop continue so the model can make tool calls
3418
+ continue;
3419
+ }
3343
3420
  // Auto-continue if response was cut off by token limit
3344
3421
  const wasTruncated = roundFinishReason === 'max_tokens' || roundFinishReason === 'length';
3345
3422
  if (wasTruncated && fullResponse.length > 0) {
@@ -3389,6 +3466,7 @@ export class Auxiora {
3389
3466
  sessionId,
3390
3467
  workingDirectory: getWorkspacePath(),
3391
3468
  timeout: 30000,
3469
+ environment: { ALLOW_OUTSIDE_WORKSPACE: 'true' },
3392
3470
  };
3393
3471
  const toolResultParts = [];
3394
3472
  for (const toolUse of toolUses) {
@@ -3402,6 +3480,15 @@ export class Auxiora {
3402
3480
  recordToolOutcome(loopState, toolUse.id, mapped.skip);
3403
3481
  continue;
3404
3482
  }
3483
+ // Block excluded tools (loop breaker — tool was disabled due to repetition)
3484
+ if (excludedToolNames.has(mapped.name) || excludedToolNames.has(toolUse.name)) {
3485
+ const blockMsg = `Tool "${toolUse.name}" is temporarily disabled because you were repeating the same call. Use a DIFFERENT tool. For creating files, use file_write (or Write). Do NOT call ${toolUse.name} again.`;
3486
+ onChunk('tool_result', { tool: toolUse.name, success: false, error: blockMsg });
3487
+ toolResultParts.push(`[${toolUse.name}]: Error: ${blockMsg}`);
3488
+ recordToolCall(loopState, toolUse.id, mapped.name, mapped.input);
3489
+ recordToolOutcome(loopState, toolUse.id, blockMsg);
3490
+ continue;
3491
+ }
3405
3492
  recordToolCall(loopState, toolUse.id, mapped.name, mapped.input);
3406
3493
  try {
3407
3494
  const result = await toolExecutor.execute(mapped.name, mapped.input, context);
@@ -3418,6 +3505,15 @@ export class Auxiora {
3418
3505
  }
3419
3506
  toolResultParts.push(`[${toolUse.name}]: ${output}`);
3420
3507
  recordToolOutcome(loopState, toolUse.id, output);
3508
+ // Track file writes to detect same-file rewrites
3509
+ if ((mapped.name === 'file_write' || toolUse.name === 'Write') && mapped.input?.path) {
3510
+ const filePath = mapped.input.path;
3511
+ const count = (writtenFiles.get(filePath) || 0) + 1;
3512
+ writtenFiles.set(filePath, count);
3513
+ if (count > 1) {
3514
+ this.logger.info('Same file rewritten multiple times', { filePath, count });
3515
+ }
3516
+ }
3421
3517
  }
3422
3518
  catch (error) {
3423
3519
  const errorMessage = error instanceof Error ? error.message : 'Unknown error';
@@ -3428,7 +3524,14 @@ export class Auxiora {
3428
3524
  }
3429
3525
  // Append tool results directly to conversation (don't rebuild from getContextMessages
3430
3526
  // which can drop messages due to token windowing)
3431
- const toolResultsMessage = `[Tool Results]\n${toolResultParts.join('\n')}`;
3527
+ let toolResultsMessage = `[Tool Results]\n${toolResultParts.join('\n')}`;
3528
+ // Detect same-file rewrites and nudge to move on
3529
+ const rewrittenFiles = [...writtenFiles.entries()].filter(([, count]) => count > 1);
3530
+ if (rewrittenFiles.length > 0) {
3531
+ const fileList = rewrittenFiles.map(([f, c]) => `${f} (${c}x)`).join(', ');
3532
+ toolResultsMessage += `\n\n⚠️ You have rewritten the same file(s) multiple times: ${fileList}. Each file only needs to be written ONCE. Move on to creating the NEXT file in the project. Do NOT rewrite files you've already created.`;
3533
+ this.logger.info('Same-file rewrite nudge injected', { rewrittenFiles: rewrittenFiles.map(([f, c]) => ({ file: f, count: c })) });
3534
+ }
3432
3535
  currentMessages.push({ role: 'user', content: toolResultsMessage });
3433
3536
  await this.sessions.addMessage(sessionId, 'user', toolResultsMessage);
3434
3537
  // Check for tool loop patterns
@@ -3444,11 +3547,24 @@ export class Auxiora {
3444
3547
  break;
3445
3548
  }
3446
3549
  if (detection.severity === 'warning') {
3447
- this.logger.info('Tool loop warning', {
3550
+ // Temporarily exclude the looping tool to force the model to use alternatives
3551
+ const loopingTool = detection.details?.toolName;
3552
+ if (loopingTool) {
3553
+ excludedToolNames.add(loopingTool);
3554
+ // Also exclude CC-equivalent names
3555
+ const ccEquivalents = { bash: 'Bash', file_read: 'Read', file_write: 'Write', file_list: 'Glob' };
3556
+ const auxEquivalents = { Bash: 'bash', Read: 'file_read', Write: 'file_write', Glob: 'file_list' };
3557
+ if (ccEquivalents[loopingTool])
3558
+ excludedToolNames.add(ccEquivalents[loopingTool]);
3559
+ if (auxEquivalents[loopingTool])
3560
+ excludedToolNames.add(auxEquivalents[loopingTool]);
3561
+ }
3562
+ this.logger.info('Tool loop warning — excluding tool from next round', {
3448
3563
  detector: detection.detector,
3449
3564
  message: detection.message,
3565
+ excludedTools: Array.from(excludedToolNames),
3450
3566
  });
3451
- currentMessages.push({ role: 'user', content: `⚠️ Loop detection warning: ${detection.message}\nPlease try a different approach or different parameters.` });
3567
+ currentMessages.push({ role: 'user', content: `⚠️ You are repeating the same tool call. ${detection.message}\nThe previous calls already succeeded — the ${loopingTool} tool is now temporarily disabled. Move on to the NEXT step: create the actual files using the file_write tool (or Write tool). Do NOT try to create directories again.` });
3452
3568
  }
3453
3569
  // Notify the client that tool processing is done and AI is thinking about results
3454
3570
  onChunk('status', { message: 'Analyzing results...' });
@@ -3589,6 +3705,93 @@ export class Auxiora {
3589
3705
  client.ws.send(JSON.stringify(message));
3590
3706
  }
3591
3707
  }
3708
+ acquireSessionRun(sessionId) {
3709
+ let state = this.sessionRunStates.get(sessionId);
3710
+ if (!state) {
3711
+ state = { running: false, queue: [], lastRunStartedAt: 0 };
3712
+ this.sessionRunStates.set(sessionId, state);
3713
+ }
3714
+ if (state.running)
3715
+ return false;
3716
+ state.running = true;
3717
+ state.lastRunStartedAt = Date.now();
3718
+ return true;
3719
+ }
3720
+ releaseSessionRun(sessionId) {
3721
+ const state = this.sessionRunStates.get(sessionId);
3722
+ if (state) {
3723
+ state.running = false;
3724
+ }
3725
+ }
3726
+ getSessionRunState(sessionId) {
3727
+ let state = this.sessionRunStates.get(sessionId);
3728
+ if (!state) {
3729
+ state = { running: false, queue: [], lastRunStartedAt: 0 };
3730
+ this.sessionRunStates.set(sessionId, state);
3731
+ }
3732
+ return state;
3733
+ }
3734
+ enqueueMessage(sessionId, pending) {
3735
+ const state = this.getSessionRunState(sessionId);
3736
+ const cap = this.config.queue?.cap ?? 20;
3737
+ state.queue.push(pending);
3738
+ if (state.queue.length > cap) {
3739
+ const dropped = state.queue.shift();
3740
+ this.logger.warn('Message queue overflow — dropped oldest message', {
3741
+ sessionId,
3742
+ droppedContent: dropped?.content.slice(0, 80),
3743
+ queueLength: state.queue.length,
3744
+ });
3745
+ }
3746
+ }
3747
+ async drainSessionQueue(sessionId) {
3748
+ const state = this.sessionRunStates.get(sessionId);
3749
+ if (!state)
3750
+ return;
3751
+ while (state.queue.length > 0) {
3752
+ const pending = state.queue.shift();
3753
+ // Skip webchat messages if the client disconnected
3754
+ if (pending.client && !pending.inbound && pending.client.ws.readyState !== 1) {
3755
+ this.logger.info('Skipping queued webchat message — client disconnected', { sessionId });
3756
+ continue;
3757
+ }
3758
+ // Skip if session was destroyed
3759
+ const session = await this.sessions.get(sessionId);
3760
+ if (!session) {
3761
+ this.logger.info('Skipping queued messages — session destroyed', { sessionId });
3762
+ state.queue.length = 0;
3763
+ break;
3764
+ }
3765
+ try {
3766
+ // Release the lock so the re-entrant call can acquire it
3767
+ state.running = false;
3768
+ if (pending.inbound) {
3769
+ await this.handleChannelMessage(pending.inbound);
3770
+ }
3771
+ else if (pending.client) {
3772
+ const wsMessage = {
3773
+ id: pending.requestId ?? `queued-${Date.now()}`,
3774
+ type: 'message',
3775
+ payload: {
3776
+ content: pending.content,
3777
+ sessionId,
3778
+ chatId: pending.chatId,
3779
+ model: pending.modelOverride,
3780
+ provider: pending.providerOverride,
3781
+ },
3782
+ };
3783
+ await this.handleMessage(pending.client, wsMessage);
3784
+ }
3785
+ }
3786
+ catch (err) {
3787
+ this.logger.error('Error processing queued message', {
3788
+ sessionId,
3789
+ error: err instanceof Error ? err : new Error(String(err)),
3790
+ });
3791
+ }
3792
+ }
3793
+ state.running = false;
3794
+ }
3592
3795
  /** Load persisted channel targets from disk so behavior delivery survives restarts. */
3593
3796
  async loadChannelTargets() {
3594
3797
  try {
@@ -3696,260 +3899,300 @@ export class Auxiora {
3696
3899
  }
3697
3900
  return;
3698
3901
  }
3699
- // Process media attachments and add user message
3700
- let messageContent = inbound.content;
3701
- if (inbound.attachments && inbound.attachments.length > 0 && this.mediaProcessor) {
3702
- messageContent = await this.mediaProcessor.process(inbound.attachments, inbound.content);
3703
- }
3704
- // ── Guardrail input scan ──────────────────────────────────────
3705
- const inputScan = this.checkInputGuardrails(messageContent);
3706
- if (inputScan && inputScan.action === 'block') {
3707
- audit('guardrail.triggered', {
3708
- action: 'block',
3709
- direction: 'input',
3710
- threatCount: inputScan.threats.length,
3711
- channelType: inbound.channelType,
3712
- sessionId: session.id,
3902
+ // ── Message queue gate ─────────────────────────────────────────
3903
+ if (!this.acquireSessionRun(session.id)) {
3904
+ this.enqueueMessage(session.id, {
3905
+ content: inbound.content,
3906
+ enqueuedAt: Date.now(),
3907
+ inbound,
3713
3908
  });
3714
3909
  if (this.channels) {
3715
3910
  await this.channels.send(inbound.channelType, inbound.channelId, {
3716
- content: this.GUARDRAIL_BLOCK_MESSAGE,
3911
+ content: "Got it — I'll get to that after I finish the current task.",
3717
3912
  replyToId: inbound.id,
3718
3913
  });
3719
3914
  }
3720
3915
  return;
3721
3916
  }
3722
- // Apply redaction if guardrails flagged PII
3723
- if (inputScan?.action === 'redact' && inputScan.redactedContent) {
3724
- messageContent = inputScan.redactedContent;
3725
- audit('guardrail.triggered', {
3726
- action: 'redact',
3727
- direction: 'input',
3728
- threatCount: inputScan.threats.length,
3729
- channelType: inbound.channelType,
3730
- });
3731
- }
3732
- else if (inputScan?.action === 'warn') {
3733
- audit('guardrail.triggered', {
3734
- action: 'warn',
3735
- direction: 'input',
3736
- threatCount: inputScan.threats.length,
3737
- channelType: inbound.channelType,
3738
- });
3739
- }
3740
- await this.sessions.addMessage(session.id, 'user', messageContent);
3741
- // Check if providers are available
3742
- if (!this.providers) {
3743
- if (this.channels) {
3744
- await this.channels.send(inbound.channelType, inbound.channelId, {
3745
- content: 'I need API keys to respond. Please configure them in the vault.',
3746
- replyToId: inbound.id,
3747
- });
3917
+ try {
3918
+ // Process media attachments and add user message
3919
+ let messageContent = inbound.content;
3920
+ if (inbound.attachments && inbound.attachments.length > 0 && this.mediaProcessor) {
3921
+ messageContent = await this.mediaProcessor.process(inbound.attachments, inbound.content);
3748
3922
  }
3749
- return;
3750
- }
3751
- // Get context messages channel sessions use a capped token budget and turn limit
3752
- // to prevent excessively long API calls from models with huge context windows.
3753
- const contextMessages = this.sessions.getContextMessages(session.id, this.getProviderMaxTokens(this.providers.getPrimaryProvider()), 4096, { isChannel: true });
3754
- const chatMessages = sanitizeTranscript(contextMessages).map((m) => ({
3755
- role: m.role,
3756
- content: m.content,
3757
- }));
3758
- // Show typing indicator while generating response
3759
- const stopTyping = this.channels
3760
- ? await this.channels.startTyping(inbound.channelType, inbound.channelId)
3761
- : () => { };
3762
- const channelAgentId = `channel:${inbound.channelType}:${inbound.channelId}:${Date.now()}`;
3763
- // 4-minute timeout for the entire LLM response cycle.
3764
- // Increased from 2min to accommodate auto-continuations (max_tokens → "Continue")
3765
- // and tool round-trips. If the provider stream hangs (network issue, overloaded API),
3766
- // this ensures the user gets an error message instead of infinite "typing…".
3767
- const CHANNEL_RESPONSE_TIMEOUT_MS = 240_000;
3768
- let draftLoop = null;
3769
- let draftMessageId = null;
3770
- try { // outer try — finally block guarantees stopTyping() runs
3771
- try {
3772
- // Get tool definitions from registry
3773
- const tools = toolRegistry.toProviderFormat();
3774
- // Build enriched prompt through pipeline
3775
- let enrichedPrompt = this.systemPrompt;
3776
- const channelChatId = `${inbound.channelType}:${inbound.channelId}`;
3777
- let channelArchitectResult = { prompt: this.systemPrompt };
3778
- // Reset Architect conversation state for new channel chats
3779
- const useChannelArchitect = this.config.agent.personality === 'the-architect';
3780
- if (useChannelArchitect && this.architect && !this.architectResetChats.has(channelChatId)) {
3781
- this.architectResetChats.add(channelChatId);
3782
- this.architect.resetConversation();
3783
- audit('personality.reset', { sessionId: session.id, chatId: channelChatId });
3923
+ // ── Guardrail input scan ──────────────────────────────────────
3924
+ const inputScan = this.checkInputGuardrails(messageContent);
3925
+ if (inputScan && inputScan.action === 'block') {
3926
+ audit('guardrail.triggered', {
3927
+ action: 'block',
3928
+ direction: 'input',
3929
+ threatCount: inputScan.threats.length,
3930
+ channelType: inbound.channelType,
3931
+ sessionId: session.id,
3932
+ });
3933
+ if (this.channels) {
3934
+ await this.channels.send(inbound.channelType, inbound.channelId, {
3935
+ content: this.GUARDRAIL_BLOCK_MESSAGE,
3936
+ replyToId: inbound.id,
3937
+ });
3784
3938
  }
3785
- if (this.enrichmentPipeline) {
3786
- const enrichCtx = {
3787
- basePrompt: this.systemPrompt,
3788
- userMessage: messageContent,
3789
- history: contextMessages,
3790
- channelType: inbound.channelType,
3791
- chatId: channelChatId,
3792
- sessionId: session.id,
3793
- userId: inbound.senderId ?? 'anonymous',
3794
- toolsUsed: this.lastToolsUsed.get(session.id) ?? [],
3795
- config: this.config,
3796
- senderName: inbound.senderName,
3797
- groupContext: inbound.groupContext,
3798
- };
3799
- const result = await this.enrichmentPipeline.run(enrichCtx);
3800
- enrichedPrompt = result.prompt;
3801
- channelArchitectResult = { prompt: enrichedPrompt, architectMeta: result.metadata.architect };
3939
+ return;
3940
+ }
3941
+ // Apply redaction if guardrails flagged PII
3942
+ if (inputScan?.action === 'redact' && inputScan.redactedContent) {
3943
+ messageContent = inputScan.redactedContent;
3944
+ audit('guardrail.triggered', {
3945
+ action: 'redact',
3946
+ direction: 'input',
3947
+ threatCount: inputScan.threats.length,
3948
+ channelType: inbound.channelType,
3949
+ });
3950
+ }
3951
+ else if (inputScan?.action === 'warn') {
3952
+ audit('guardrail.triggered', {
3953
+ action: 'warn',
3954
+ direction: 'input',
3955
+ threatCount: inputScan.threats.length,
3956
+ channelType: inbound.channelType,
3957
+ });
3958
+ }
3959
+ await this.sessions.addMessage(session.id, 'user', messageContent);
3960
+ // Check if providers are available
3961
+ if (!this.providers) {
3962
+ if (this.channels) {
3963
+ await this.channels.send(inbound.channelType, inbound.channelId, {
3964
+ content: 'I need API keys to respond. Please configure them in the vault.',
3965
+ replyToId: inbound.id,
3966
+ });
3802
3967
  }
3803
- // Use executeWithTools for channels — collect final text for channel reply
3804
- const provider = this.providers.getPrimaryProvider();
3805
- // Inject model identity so the AI knows what it's running on
3806
- enrichedPrompt += this.buildModelIdentityFragment(provider);
3807
- this.agentStart(channelAgentId, 'channel', `Processing message on ${inbound.channelType}`, inbound.channelType);
3808
- // Draft streaming: edit message in place if adapter supports it
3809
- const adapter = this.channels?.getAdapter(inbound.channelType);
3810
- const supportsDraft = !!adapter?.editMessage;
3811
- let accumulatedText = '';
3812
- if (supportsDraft && this.channels) {
3813
- const channels = this.channels;
3814
- draftLoop = new DraftStreamLoop(async (text) => {
3815
- try {
3816
- if (!draftMessageId) {
3817
- const result = await channels.send(inbound.channelType, inbound.channelId, {
3818
- content: text,
3819
- replyToId: inbound.id,
3820
- });
3821
- if (result.success && result.messageId) {
3822
- draftMessageId = result.messageId;
3968
+ return;
3969
+ }
3970
+ // Get context messages channel sessions use a capped token budget and turn limit
3971
+ // to prevent excessively long API calls from models with huge context windows.
3972
+ const contextMessages = this.sessions.getContextMessages(session.id, this.getProviderMaxTokens(this.providers.getPrimaryProvider()), 4096, { isChannel: true });
3973
+ const chatMessages = sanitizeTranscript(contextMessages).map((m) => ({
3974
+ role: m.role,
3975
+ content: m.content,
3976
+ }));
3977
+ // Show typing indicator while generating response
3978
+ const stopTyping = this.channels
3979
+ ? await this.channels.startTyping(inbound.channelType, inbound.channelId)
3980
+ : () => { };
3981
+ const channelAgentId = `channel:${inbound.channelType}:${inbound.channelId}:${Date.now()}`;
3982
+ // 30-minute timeout for the entire LLM response cycle.
3983
+ // Agentic tool loops can take many rounds (up to 20), each requiring a full LLM
3984
+ // call (30-90s) + tool execution. A multi-file generation task easily takes 10-20 minutes.
3985
+ const CHANNEL_RESPONSE_TIMEOUT_MS = 1_800_000;
3986
+ let draftLoop = null;
3987
+ let draftMessageId = null;
3988
+ // Snapshot message count before agentic loop so we can rollback on failure
3989
+ let channelMessageSnapshot;
3990
+ try { // outer try — finally block guarantees stopTyping() runs
3991
+ try {
3992
+ // Get tool definitions from registry
3993
+ const tools = toolRegistry.toProviderFormat();
3994
+ // Build enriched prompt through pipeline
3995
+ let enrichedPrompt = this.systemPrompt;
3996
+ const channelChatId = `${inbound.channelType}:${inbound.channelId}`;
3997
+ let channelArchitectResult = { prompt: this.systemPrompt };
3998
+ // Reset Architect conversation state for new channel chats
3999
+ const useChannelArchitect = this.config.agent.personality === 'the-architect';
4000
+ if (useChannelArchitect && this.architect && !this.architectResetChats.has(channelChatId)) {
4001
+ this.architectResetChats.add(channelChatId);
4002
+ this.architect.resetConversation();
4003
+ audit('personality.reset', { sessionId: session.id, chatId: channelChatId });
4004
+ }
4005
+ if (this.enrichmentPipeline) {
4006
+ const enrichCtx = {
4007
+ basePrompt: this.systemPrompt,
4008
+ userMessage: messageContent,
4009
+ history: contextMessages,
4010
+ channelType: inbound.channelType,
4011
+ chatId: channelChatId,
4012
+ sessionId: session.id,
4013
+ userId: inbound.senderId ?? 'anonymous',
4014
+ toolsUsed: this.lastToolsUsed.get(session.id) ?? [],
4015
+ config: this.config,
4016
+ senderName: inbound.senderName,
4017
+ groupContext: inbound.groupContext,
4018
+ };
4019
+ const result = await this.enrichmentPipeline.run(enrichCtx);
4020
+ enrichedPrompt = result.prompt;
4021
+ channelArchitectResult = { prompt: enrichedPrompt, architectMeta: result.metadata.architect };
4022
+ }
4023
+ // Use executeWithTools for channels — collect final text for channel reply
4024
+ const provider = this.providers.getPrimaryProvider();
4025
+ // Inject model identity so the AI knows what it's running on
4026
+ enrichedPrompt += this.buildModelIdentityFragment(provider);
4027
+ this.agentStart(channelAgentId, 'channel', `Processing message on ${inbound.channelType}`, inbound.channelType);
4028
+ // Draft streaming: edit message in place if adapter supports it
4029
+ const adapter = this.channels?.getAdapter(inbound.channelType);
4030
+ const supportsDraft = !!adapter?.editMessage;
4031
+ let accumulatedText = '';
4032
+ if (supportsDraft && this.channels) {
4033
+ const channels = this.channels;
4034
+ draftLoop = new DraftStreamLoop(async (text) => {
4035
+ try {
4036
+ if (!draftMessageId) {
4037
+ const result = await channels.send(inbound.channelType, inbound.channelId, {
4038
+ content: text,
4039
+ replyToId: inbound.id,
4040
+ });
4041
+ if (result.success && result.messageId) {
4042
+ draftMessageId = result.messageId;
4043
+ }
4044
+ return result.success;
4045
+ }
4046
+ else {
4047
+ const result = await channels.editMessage(inbound.channelType, inbound.channelId, draftMessageId, { content: text });
4048
+ return result.success;
3823
4049
  }
3824
- return result.success;
3825
4050
  }
3826
- else {
3827
- const result = await channels.editMessage(inbound.channelType, inbound.channelId, draftMessageId, { content: text });
3828
- return result.success;
4051
+ catch {
4052
+ return false;
3829
4053
  }
4054
+ }, 1000);
4055
+ }
4056
+ const fallbackCandidates = this.providers.resolveFallbackCandidates();
4057
+ const channelToolsUsed = [];
4058
+ // Snapshot message count so we can rollback orphaned messages on timeout/error
4059
+ channelMessageSnapshot = this.sessions.getMessageCount(session.id);
4060
+ const { response: channelResponse, usage: channelUsage } = await Promise.race([
4061
+ this.executeWithTools(session.id, chatMessages, enrichedPrompt, provider, (type, data) => {
4062
+ if (type === 'text' && data && draftLoop) {
4063
+ accumulatedText += data;
4064
+ draftLoop.update(accumulatedText);
4065
+ }
4066
+ else if (type === 'tool_use') {
4067
+ channelToolsUsed.push({ name: data?.name ?? 'unknown', success: true });
4068
+ }
4069
+ else if (type === 'tool_result') {
4070
+ if (channelToolsUsed.length > 0 && data?.error) {
4071
+ channelToolsUsed[channelToolsUsed.length - 1].success = false;
4072
+ }
4073
+ }
4074
+ }, { tools, fallbackCandidates }),
4075
+ new Promise((_, reject) => setTimeout(() => reject(new Error('Response timed out — the task did not complete within 30 minutes. Try breaking it into smaller steps.')), CHANNEL_RESPONSE_TIMEOUT_MS)),
4076
+ ]);
4077
+ // Feed tool usage to awareness collector
4078
+ if (this.architectAwarenessCollector && channelToolsUsed.length > 0) {
4079
+ this.architectAwarenessCollector.updateToolContext(channelToolsUsed);
4080
+ }
4081
+ this.lastToolsUsed.set(session.id, channelToolsUsed);
4082
+ // Flush final draft text
4083
+ if (draftLoop) {
4084
+ if (channelResponse && channelResponse !== accumulatedText) {
4085
+ draftLoop.update(channelResponse);
3830
4086
  }
3831
- catch {
3832
- return false;
3833
- }
3834
- }, 1000);
3835
- }
3836
- const fallbackCandidates = this.providers.resolveFallbackCandidates();
3837
- const channelToolsUsed = [];
3838
- const { response: channelResponse, usage: channelUsage } = await Promise.race([
3839
- this.executeWithTools(session.id, chatMessages, enrichedPrompt, provider, (type, data) => {
3840
- if (type === 'text' && data && draftLoop) {
3841
- accumulatedText += data;
3842
- draftLoop.update(accumulatedText);
3843
- }
3844
- else if (type === 'tool_use') {
3845
- channelToolsUsed.push({ name: data?.name ?? 'unknown', success: true });
4087
+ await draftLoop.flush();
4088
+ draftLoop.stop();
4089
+ }
4090
+ // ── Guardrail output scan ─────────────────────────────────────
4091
+ const channelOutputScan = this.checkOutputGuardrails(channelResponse);
4092
+ const finalChannelResponse = channelOutputScan.response;
4093
+ if (channelOutputScan.wasModified) {
4094
+ audit('guardrail.triggered', {
4095
+ action: channelOutputScan.action,
4096
+ direction: 'output',
4097
+ channelType: inbound.channelType,
4098
+ sessionId: session.id,
4099
+ });
4100
+ // If draft streaming already sent partial text, do a final edit with clean version
4101
+ if (draftMessageId && adapter?.editMessage) {
4102
+ await adapter.editMessage(inbound.channelId, draftMessageId, { content: finalChannelResponse });
3846
4103
  }
3847
- else if (type === 'tool_result') {
3848
- if (channelToolsUsed.length > 0 && data?.error) {
3849
- channelToolsUsed[channelToolsUsed.length - 1].success = false;
3850
- }
4104
+ }
4105
+ // Save assistant message
4106
+ await this.sessions.addMessage(session.id, 'assistant', finalChannelResponse, {
4107
+ input: channelUsage.inputTokens,
4108
+ output: channelUsage.outputTokens,
4109
+ }, channelArchitectResult.architectMeta ? { architectDomain: channelArchitectResult.architectMeta.detectedContext.domain } : undefined);
4110
+ // Extract memories and learn from conversation (if auto-extract enabled)
4111
+ if (this.config.memory?.autoExtract !== false && this.memoryStore && finalChannelResponse && messageContent.length > 20) {
4112
+ void this.extractAndLearn(messageContent, finalChannelResponse, session.id);
4113
+ }
4114
+ // Send final response. The draft stream loop edits a single message,
4115
+ // but Discord silently truncates edits at 2000 chars. For long responses,
4116
+ // replace the draft with a chunked send so nothing is lost.
4117
+ const DRAFT_SAFE_LENGTH = 1900; // leave margin below Discord's 2000 char limit
4118
+ if (draftMessageId && this.channels && finalChannelResponse.length > DRAFT_SAFE_LENGTH) {
4119
+ // Draft only showed partial content — replace it with a pointer and send full chunked response
4120
+ if (adapter?.editMessage) {
4121
+ await adapter.editMessage(inbound.channelId, draftMessageId, {
4122
+ content: '*\u2026 (full response below)*',
4123
+ });
3851
4124
  }
3852
- }, { tools, fallbackCandidates }),
3853
- new Promise((_, reject) => setTimeout(() => reject(new Error('Response timed out — the AI provider did not respond within 4 minutes. Please try again.')), CHANNEL_RESPONSE_TIMEOUT_MS)),
3854
- ]);
3855
- // Feed tool usage to awareness collector
3856
- if (this.architectAwarenessCollector && channelToolsUsed.length > 0) {
3857
- this.architectAwarenessCollector.updateToolContext(channelToolsUsed);
3858
- }
3859
- this.lastToolsUsed.set(session.id, channelToolsUsed);
3860
- // Flush final draft text
3861
- if (draftLoop) {
3862
- if (channelResponse && channelResponse !== accumulatedText) {
3863
- draftLoop.update(channelResponse);
4125
+ await this.channels.send(inbound.channelType, inbound.channelId, {
4126
+ content: finalChannelResponse,
4127
+ });
3864
4128
  }
3865
- await draftLoop.flush();
3866
- draftLoop.stop();
3867
- }
3868
- // ── Guardrail output scan ─────────────────────────────────────
3869
- const channelOutputScan = this.checkOutputGuardrails(channelResponse);
3870
- const finalChannelResponse = channelOutputScan.response;
3871
- if (channelOutputScan.wasModified) {
3872
- audit('guardrail.triggered', {
3873
- action: channelOutputScan.action,
3874
- direction: 'output',
4129
+ else if (!draftMessageId && this.channels) {
4130
+ await this.channels.send(inbound.channelType, inbound.channelId, {
4131
+ content: finalChannelResponse,
4132
+ replyToId: inbound.id,
4133
+ });
4134
+ }
4135
+ audit('message.sent', {
3875
4136
  channelType: inbound.channelType,
3876
4137
  sessionId: session.id,
4138
+ inputTokens: channelUsage.inputTokens,
4139
+ outputTokens: channelUsage.outputTokens,
3877
4140
  });
3878
- // If draft streaming already sent partial text, do a final edit with clean version
3879
- if (draftMessageId && adapter?.editMessage) {
3880
- await adapter.editMessage(inbound.channelId, draftMessageId, { content: finalChannelResponse });
3881
- }
3882
- }
3883
- // Save assistant message
3884
- await this.sessions.addMessage(session.id, 'assistant', finalChannelResponse, {
3885
- input: channelUsage.inputTokens,
3886
- output: channelUsage.outputTokens,
3887
- }, channelArchitectResult.architectMeta ? { architectDomain: channelArchitectResult.architectMeta.detectedContext.domain } : undefined);
3888
- // Extract memories and learn from conversation (if auto-extract enabled)
3889
- if (this.config.memory?.autoExtract !== false && this.memoryStore && finalChannelResponse && messageContent.length > 20) {
3890
- void this.extractAndLearn(messageContent, finalChannelResponse, session.id);
4141
+ this.agentEnd(channelAgentId, true);
3891
4142
  }
3892
- // Send final response. The draft stream loop edits a single message,
3893
- // but Discord silently truncates edits at 2000 chars. For long responses,
3894
- // replace the draft with a chunked send so nothing is lost.
3895
- const DRAFT_SAFE_LENGTH = 1900; // leave margin below Discord's 2000 char limit
3896
- if (draftMessageId && this.channels && finalChannelResponse.length > DRAFT_SAFE_LENGTH) {
3897
- // Draft only showed partial content — replace it with a pointer and send full chunked response
3898
- if (adapter?.editMessage) {
3899
- await adapter.editMessage(inbound.channelId, draftMessageId, {
3900
- content: '*\u2026 (full response below)*',
3901
- });
4143
+ catch (error) {
4144
+ if (draftLoop)
4145
+ draftLoop.stop();
4146
+ this.agentEnd(channelAgentId, false);
4147
+ const errorMessage = error instanceof Error ? error.message : 'Unknown error';
4148
+ audit('channel.error', { sessionId: session.id, error: errorMessage });
4149
+ // Rollback orphaned messages from interrupted agentic tool loops.
4150
+ // This is critical for channel messages where timeouts are common (30-min limit).
4151
+ if (typeof channelMessageSnapshot === 'number') {
4152
+ const rolled = this.sessions.rollbackMessages(session.id, channelMessageSnapshot);
4153
+ if (rolled > 0) {
4154
+ this.logger.info('Rolled back orphaned channel messages from interrupted tool loop', {
4155
+ sessionId: session.id,
4156
+ channelType: inbound.channelType,
4157
+ rolledBack: rolled,
4158
+ });
4159
+ }
3902
4160
  }
3903
- await this.channels.send(inbound.channelType, inbound.channelId, {
3904
- content: finalChannelResponse,
3905
- });
3906
- }
3907
- else if (!draftMessageId && this.channels) {
3908
- await this.channels.send(inbound.channelType, inbound.channelId, {
3909
- content: finalChannelResponse,
3910
- replyToId: inbound.id,
3911
- });
3912
- }
3913
- audit('message.sent', {
3914
- channelType: inbound.channelType,
3915
- sessionId: session.id,
3916
- inputTokens: channelUsage.inputTokens,
3917
- outputTokens: channelUsage.outputTokens,
3918
- });
3919
- this.agentEnd(channelAgentId, true);
3920
- }
3921
- catch (error) {
3922
- if (draftLoop)
3923
- draftLoop.stop();
3924
- this.agentEnd(channelAgentId, false);
3925
- const errorMessage = error instanceof Error ? error.message : 'Unknown error';
3926
- audit('channel.error', { sessionId: session.id, error: errorMessage });
3927
- if (this.channels) {
3928
- const errorContent = `Error: ${errorMessage}`;
3929
- // If a draft message exists, edit it with the error instead of sending a new one
3930
- if (draftMessageId) {
3931
- try {
3932
- await this.channels.editMessage(inbound.channelType, inbound.channelId, draftMessageId, { content: errorContent });
4161
+ if (this.channels) {
4162
+ const errorContent = `Error: ${errorMessage}`;
4163
+ // If a draft message exists, edit it with the error instead of sending a new one
4164
+ if (draftMessageId) {
4165
+ try {
4166
+ await this.channels.editMessage(inbound.channelType, inbound.channelId, draftMessageId, { content: errorContent });
4167
+ }
4168
+ catch {
4169
+ // Edit failed — fall back to new message
4170
+ await this.channels.send(inbound.channelType, inbound.channelId, {
4171
+ content: errorContent,
4172
+ replyToId: inbound.id,
4173
+ });
4174
+ }
3933
4175
  }
3934
- catch {
3935
- // Edit failed — fall back to new message
4176
+ else {
3936
4177
  await this.channels.send(inbound.channelType, inbound.channelId, {
3937
4178
  content: errorContent,
3938
4179
  replyToId: inbound.id,
3939
4180
  });
3940
4181
  }
3941
4182
  }
3942
- else {
3943
- await this.channels.send(inbound.channelType, inbound.channelId, {
3944
- content: errorContent,
3945
- replyToId: inbound.id,
3946
- });
3947
- }
3948
4183
  }
3949
4184
  }
4185
+ finally {
4186
+ stopTyping();
4187
+ }
3950
4188
  }
3951
4189
  finally {
3952
- stopTyping();
4190
+ try {
4191
+ await this.drainSessionQueue(session.id);
4192
+ }
4193
+ finally {
4194
+ this.releaseSessionRun(session.id);
4195
+ }
3953
4196
  }
3954
4197
  }); // end runWithRequestId
3955
4198
  }
@@ -4257,6 +4500,7 @@ export class Auxiora {
4257
4500
  catch { /* best-effort — don't block shutdown */ }
4258
4501
  }
4259
4502
  this.consciousness?.shutdown();
4503
+ this.sessionRunStates.clear();
4260
4504
  this.sessions.destroy();
4261
4505
  this.vault.lock();
4262
4506
  this.running = false;