@vfarcic/dot-ai 0.108.0 → 0.110.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/ai-provider.interface.d.ts +11 -16
- package/dist/core/ai-provider.interface.d.ts.map +1 -1
- package/dist/core/kubectl-tools.d.ts +66 -0
- package/dist/core/kubectl-tools.d.ts.map +1 -0
- package/dist/core/kubectl-tools.js +473 -0
- package/dist/core/kubernetes-utils.d.ts +1 -0
- package/dist/core/kubernetes-utils.d.ts.map +1 -1
- package/dist/core/kubernetes-utils.js +30 -0
- package/dist/core/providers/anthropic-provider.d.ts +5 -4
- package/dist/core/providers/anthropic-provider.d.ts.map +1 -1
- package/dist/core/providers/anthropic-provider.js +152 -109
- package/dist/core/providers/provider-debug-utils.d.ts +47 -4
- package/dist/core/providers/provider-debug-utils.d.ts.map +1 -1
- package/dist/core/providers/provider-debug-utils.js +67 -7
- package/dist/core/providers/vercel-provider.d.ts +11 -21
- package/dist/core/providers/vercel-provider.d.ts.map +1 -1
- package/dist/core/providers/vercel-provider.js +285 -25
- package/dist/tools/remediate.d.ts +0 -40
- package/dist/tools/remediate.d.ts.map +1 -1
- package/dist/tools/remediate.js +133 -493
- package/package.json +1 -1
- package/prompts/remediate-system.md +166 -0
- package/prompts/remediate-final-analysis.md +0 -243
- package/prompts/remediate-investigation.md +0 -194
package/dist/tools/remediate.js
CHANGED
|
@@ -36,34 +36,21 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
36
36
|
};
|
|
37
37
|
})();
|
|
38
38
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
-
exports.REMEDIATE_TOOL_INPUT_SCHEMA = exports.
|
|
39
|
+
exports.REMEDIATE_TOOL_INPUT_SCHEMA = exports.REMEDIATE_TOOL_DESCRIPTION = exports.REMEDIATE_TOOL_NAME = void 0;
|
|
40
40
|
exports.parseAIFinalAnalysis = parseAIFinalAnalysis;
|
|
41
|
-
exports.parseAIResponse = parseAIResponse;
|
|
42
41
|
exports.handleRemediateTool = handleRemediateTool;
|
|
43
42
|
const zod_1 = require("zod");
|
|
44
43
|
const error_handling_1 = require("../core/error-handling");
|
|
45
44
|
const ai_provider_factory_1 = require("../core/ai-provider-factory");
|
|
46
45
|
const session_utils_1 = require("../core/session-utils");
|
|
47
|
-
const
|
|
46
|
+
const kubectl_tools_1 = require("../core/kubectl-tools");
|
|
48
47
|
const fs = __importStar(require("fs"));
|
|
49
48
|
const path = __importStar(require("path"));
|
|
50
49
|
const crypto = __importStar(require("crypto"));
|
|
50
|
+
// PRD #143 Milestone 1: Hybrid approach - AI can use kubectl_api_resources tool OR continue with JSON dataRequests
|
|
51
51
|
// Tool metadata for direct MCP registration
|
|
52
52
|
exports.REMEDIATE_TOOL_NAME = 'remediate';
|
|
53
53
|
exports.REMEDIATE_TOOL_DESCRIPTION = 'AI-powered Kubernetes issue analysis that provides root cause identification and actionable remediation steps. Unlike basic kubectl commands, this tool performs multi-step investigation, correlates cluster data, and generates intelligent solutions. Use when users want to understand WHY something is broken, not just see raw status. Ideal for: troubleshooting failures, diagnosing performance issues, analyzing pod problems, investigating networking/storage issues, or any "what\'s wrong" questions.';
|
|
54
|
-
// Safety: Whitelist of allowed read-only operations
|
|
55
|
-
exports.SAFE_OPERATIONS = ['get', 'describe', 'logs', 'events', 'top', 'explain'];
|
|
56
|
-
/**
|
|
57
|
-
* Check if command arguments contain dry-run flag (making any operation safe)
|
|
58
|
-
*/
|
|
59
|
-
function hasDryRunFlag(args) {
|
|
60
|
-
if (!args)
|
|
61
|
-
return false;
|
|
62
|
-
return args.some(arg => arg === '--dry-run=client' ||
|
|
63
|
-
arg === '--dry-run=server' ||
|
|
64
|
-
arg === '--dry-run' ||
|
|
65
|
-
arg.startsWith('--dry-run='));
|
|
66
|
-
}
|
|
67
54
|
// Zod schema for MCP registration
|
|
68
55
|
exports.REMEDIATE_TOOL_INPUT_SCHEMA = {
|
|
69
56
|
issue: zod_1.z.string().min(1).max(2000).describe('Issue description that needs to be analyzed and remediated').optional(),
|
|
@@ -114,181 +101,111 @@ function updateSessionFile(sessionDir, sessionId, updates) {
|
|
|
114
101
|
writeSessionFile(sessionDir, sessionId, updatedSession);
|
|
115
102
|
}
|
|
116
103
|
/**
|
|
117
|
-
* AI-driven investigation
|
|
104
|
+
* AI-driven investigation - uses toolLoop for single-phase investigation and analysis
|
|
118
105
|
*/
|
|
119
106
|
async function conductInvestigation(session, sessionDir, aiProvider, logger, requestId) {
|
|
120
|
-
const maxIterations = 20;
|
|
121
|
-
|
|
122
|
-
logger.info('Starting AI investigation loop', {
|
|
123
|
-
requestId,
|
|
124
|
-
sessionId: session.sessionId,
|
|
125
|
-
currentIterations: currentIteration
|
|
126
|
-
});
|
|
127
|
-
while (currentIteration < maxIterations) {
|
|
128
|
-
logger.debug(`Starting investigation iteration ${currentIteration + 1}`, { requestId, sessionId: session.sessionId });
|
|
129
|
-
try {
|
|
130
|
-
// Get AI analysis with investigation prompts
|
|
131
|
-
const aiAnalysis = await analyzeCurrentState(session, aiProvider, logger, requestId);
|
|
132
|
-
// Parse AI response for data requests and completion status
|
|
133
|
-
const { dataRequests, isComplete, needsMoreSpecificInfo, parsedResponse } = parseAIResponse(aiAnalysis);
|
|
134
|
-
// Handle early termination when issue description is too vague
|
|
135
|
-
if (needsMoreSpecificInfo) {
|
|
136
|
-
logger.info('Investigation terminated: needs more specific information', {
|
|
137
|
-
requestId,
|
|
138
|
-
sessionId: session.sessionId,
|
|
139
|
-
iteration: currentIteration + 1
|
|
140
|
-
});
|
|
141
|
-
throw error_handling_1.ErrorHandler.createError(error_handling_1.ErrorCategory.VALIDATION, error_handling_1.ErrorSeverity.MEDIUM, 'Unable to find relevant resources for the reported issue. Please be more specific about which resource type or component is having problems (e.g., "my sqls.devopstoolkit.live resource named test-db" instead of "my database").', {
|
|
142
|
-
operation: 'investigation_early_termination',
|
|
143
|
-
component: 'RemediateTool',
|
|
144
|
-
input: { sessionId: session.sessionId, issue: session.issue }
|
|
145
|
-
});
|
|
146
|
-
}
|
|
147
|
-
// Gather safe data from Kubernetes using kubectl
|
|
148
|
-
const gatheredData = await gatherSafeData(dataRequests, logger, requestId);
|
|
149
|
-
// Create iteration record
|
|
150
|
-
const iteration = {
|
|
151
|
-
step: currentIteration + 1,
|
|
152
|
-
aiAnalysis,
|
|
153
|
-
dataRequests,
|
|
154
|
-
gatheredData,
|
|
155
|
-
complete: isComplete,
|
|
156
|
-
timestamp: new Date()
|
|
157
|
-
};
|
|
158
|
-
// Store parsed response data if available
|
|
159
|
-
if (parsedResponse) {
|
|
160
|
-
logger.debug('AI investigation analysis', {
|
|
161
|
-
requestId,
|
|
162
|
-
sessionId: session.sessionId,
|
|
163
|
-
confidence: parsedResponse.confidence,
|
|
164
|
-
reasoning: parsedResponse.reasoning,
|
|
165
|
-
dataRequestCount: parsedResponse.dataRequests.length
|
|
166
|
-
});
|
|
167
|
-
}
|
|
168
|
-
// Update session with new iteration
|
|
169
|
-
session.iterations.push(iteration);
|
|
170
|
-
updateSessionFile(sessionDir, session.sessionId, { iterations: session.iterations });
|
|
171
|
-
logger.debug('Investigation iteration completed', {
|
|
172
|
-
requestId,
|
|
173
|
-
sessionId: session.sessionId,
|
|
174
|
-
step: iteration.step,
|
|
175
|
-
dataRequestCount: dataRequests.length,
|
|
176
|
-
complete: iteration.complete
|
|
177
|
-
});
|
|
178
|
-
// Check if analysis is complete
|
|
179
|
-
if (iteration.complete) {
|
|
180
|
-
logger.info('Investigation completed by AI decision', {
|
|
181
|
-
requestId,
|
|
182
|
-
sessionId: session.sessionId,
|
|
183
|
-
totalIterations: iteration.step,
|
|
184
|
-
confidence: parsedResponse?.confidence,
|
|
185
|
-
reasoning: parsedResponse?.reasoning
|
|
186
|
-
});
|
|
187
|
-
break;
|
|
188
|
-
}
|
|
189
|
-
currentIteration++;
|
|
190
|
-
}
|
|
191
|
-
catch (error) {
|
|
192
|
-
logger.error('Investigation iteration failed', error, {
|
|
193
|
-
requestId,
|
|
194
|
-
sessionId: session.sessionId,
|
|
195
|
-
iteration: currentIteration + 1
|
|
196
|
-
});
|
|
197
|
-
// Mark session as failed
|
|
198
|
-
updateSessionFile(sessionDir, session.sessionId, { status: 'failed' });
|
|
199
|
-
throw error_handling_1.ErrorHandler.createError(error_handling_1.ErrorCategory.AI_SERVICE, error_handling_1.ErrorSeverity.HIGH, `Investigation failed at iteration ${currentIteration + 1}: ${error instanceof Error ? error.message : 'Unknown error'}`, {
|
|
200
|
-
operation: 'investigation_loop',
|
|
201
|
-
component: 'RemediateTool',
|
|
202
|
-
input: { sessionId: session.sessionId, iteration: currentIteration + 1 }
|
|
203
|
-
});
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
// Generate final analysis
|
|
207
|
-
const finalAnalysis = await generateFinalAnalysis(session, logger, requestId);
|
|
208
|
-
// Update session with final analysis
|
|
209
|
-
updateSessionFile(sessionDir, session.sessionId, {
|
|
210
|
-
finalAnalysis,
|
|
211
|
-
status: 'analysis_complete'
|
|
212
|
-
});
|
|
213
|
-
logger.info('Investigation and analysis completed', {
|
|
107
|
+
const maxIterations = 20;
|
|
108
|
+
logger.info('Starting AI investigation with toolLoop', {
|
|
214
109
|
requestId,
|
|
215
110
|
sessionId: session.sessionId,
|
|
216
|
-
|
|
217
|
-
recommendedActions: finalAnalysis.remediation.actions.length
|
|
111
|
+
issue: session.issue
|
|
218
112
|
});
|
|
219
|
-
return finalAnalysis;
|
|
220
|
-
}
|
|
221
|
-
/**
|
|
222
|
-
* Analyze current state using AI with investigation prompts
|
|
223
|
-
*/
|
|
224
|
-
async function analyzeCurrentState(session, aiProvider, logger, requestId) {
|
|
225
|
-
logger.debug('Analyzing current state with AI', { requestId, sessionId: session.sessionId });
|
|
226
113
|
try {
|
|
227
|
-
// Load investigation prompt
|
|
228
|
-
const promptPath = path.join(__dirname, '..', '..', 'prompts', 'remediate-
|
|
229
|
-
const
|
|
230
|
-
|
|
231
|
-
let clusterApiResources = '';
|
|
232
|
-
try {
|
|
233
|
-
// Use kubectl api-resources directly - simple and reliable
|
|
234
|
-
clusterApiResources = await (0, kubernetes_utils_1.executeKubectl)(['api-resources']);
|
|
235
|
-
logger.debug('Discovered cluster API resources', {
|
|
236
|
-
requestId,
|
|
237
|
-
sessionId: session.sessionId,
|
|
238
|
-
outputLength: clusterApiResources.length
|
|
239
|
-
});
|
|
240
|
-
}
|
|
241
|
-
catch (error) {
|
|
242
|
-
const errorMessage = `Failed to discover cluster API resources: ${error instanceof Error ? error.message : String(error)}. Complete API visibility is required for quality remediation recommendations.`;
|
|
243
|
-
logger.error('API discovery failed - aborting remediation', error, {
|
|
244
|
-
requestId,
|
|
245
|
-
sessionId: session.sessionId
|
|
246
|
-
});
|
|
247
|
-
throw new Error(errorMessage);
|
|
248
|
-
}
|
|
249
|
-
// Prepare template variables
|
|
250
|
-
const currentIteration = session.iterations.length + 1;
|
|
251
|
-
const maxIterations = 20;
|
|
252
|
-
const previousIterationsJson = JSON.stringify(session.iterations.map(iter => ({
|
|
253
|
-
step: iter.step,
|
|
254
|
-
analysis: iter.aiAnalysis,
|
|
255
|
-
dataRequests: iter.dataRequests,
|
|
256
|
-
gatheredData: iter.gatheredData
|
|
257
|
-
})), null, 2);
|
|
258
|
-
// Replace template variables
|
|
259
|
-
const investigationPrompt = promptTemplate
|
|
260
|
-
.replace('{issue}', session.issue)
|
|
261
|
-
.replace('{currentIteration}', currentIteration.toString())
|
|
262
|
-
.replace('{maxIterations}', maxIterations.toString())
|
|
263
|
-
.replace('{previousIterations}', previousIterationsJson)
|
|
264
|
-
.replace('{clusterApiResources}', clusterApiResources);
|
|
265
|
-
logger.debug('Sending investigation prompt to AI', {
|
|
114
|
+
// Load investigation system prompt (static, cacheable)
|
|
115
|
+
const promptPath = path.join(__dirname, '..', '..', 'prompts', 'remediate-system.md');
|
|
116
|
+
const systemPrompt = fs.readFileSync(promptPath, 'utf8');
|
|
117
|
+
logger.debug('Starting toolLoop with kubectl investigation tools', {
|
|
266
118
|
requestId,
|
|
267
119
|
sessionId: session.sessionId,
|
|
268
|
-
|
|
269
|
-
iteration: currentIteration
|
|
120
|
+
toolCount: kubectl_tools_1.KUBECTL_INVESTIGATION_TOOLS.length
|
|
270
121
|
});
|
|
271
|
-
//
|
|
272
|
-
|
|
273
|
-
|
|
122
|
+
// Use toolLoop for AI-driven investigation with kubectl tools
|
|
123
|
+
// System prompt is static (cached), issue description is dynamic (userMessage)
|
|
124
|
+
const result = await aiProvider.toolLoop({
|
|
125
|
+
systemPrompt: systemPrompt,
|
|
126
|
+
userMessage: `Investigate this Kubernetes issue: ${session.issue}`,
|
|
127
|
+
tools: kubectl_tools_1.KUBECTL_INVESTIGATION_TOOLS,
|
|
128
|
+
toolExecutor: kubectl_tools_1.executeKubectlTools,
|
|
129
|
+
maxIterations: maxIterations,
|
|
130
|
+
operation: 'remediate-investigation'
|
|
131
|
+
});
|
|
132
|
+
logger.info('Investigation completed by toolLoop', {
|
|
274
133
|
requestId,
|
|
275
134
|
sessionId: session.sessionId,
|
|
276
|
-
|
|
135
|
+
iterations: result.iterations,
|
|
136
|
+
toolCallsExecuted: result.toolCallsExecuted.length,
|
|
137
|
+
responseLength: result.finalMessage.length
|
|
277
138
|
});
|
|
278
|
-
|
|
139
|
+
// Parse final response as JSON (AI returns final analysis in JSON format)
|
|
140
|
+
const finalAnalysis = parseAIFinalAnalysis(result.finalMessage);
|
|
141
|
+
// Build RemediateOutput from parsed analysis
|
|
142
|
+
const output = {
|
|
143
|
+
status: finalAnalysis.issueStatus === 'active' ? 'awaiting_user_approval' : 'success',
|
|
144
|
+
sessionId: session.sessionId,
|
|
145
|
+
investigation: {
|
|
146
|
+
iterations: result.iterations,
|
|
147
|
+
dataGathered: result.toolCallsExecuted.map((tc, i) => `${tc.tool} (call ${i + 1})`)
|
|
148
|
+
},
|
|
149
|
+
analysis: {
|
|
150
|
+
rootCause: finalAnalysis.rootCause,
|
|
151
|
+
confidence: finalAnalysis.confidence,
|
|
152
|
+
factors: finalAnalysis.factors
|
|
153
|
+
},
|
|
154
|
+
remediation: finalAnalysis.remediation,
|
|
155
|
+
validationIntent: finalAnalysis.validationIntent,
|
|
156
|
+
executed: false,
|
|
157
|
+
mode: session.mode
|
|
158
|
+
};
|
|
159
|
+
// Add guidance based on issue status
|
|
160
|
+
if (finalAnalysis.issueStatus === 'resolved' || finalAnalysis.issueStatus === 'non_existent') {
|
|
161
|
+
const statusMessage = finalAnalysis.issueStatus === 'resolved'
|
|
162
|
+
? 'Issue has been successfully resolved'
|
|
163
|
+
: 'No issues found - system is healthy';
|
|
164
|
+
output.guidance = `✅ ${statusMessage.toUpperCase()}: ${finalAnalysis.remediation.summary}`;
|
|
165
|
+
output.agentInstructions = `1. Show user that the ${finalAnalysis.issueStatus === 'resolved' ? 'issue has been resolved' : 'no issues were found'}\n2. Display the analysis and confidence level\n3. Explain the current healthy state\n4. No further action required`;
|
|
166
|
+
output.message = `${statusMessage} with ${Math.round(finalAnalysis.confidence * 100)}% confidence.`;
|
|
167
|
+
}
|
|
168
|
+
else {
|
|
169
|
+
// Active issue - generate execution options
|
|
170
|
+
const commandsSummary = finalAnalysis.remediation.actions.length === 1
|
|
171
|
+
? `The following kubectl command will be executed:\n${finalAnalysis.remediation.actions[0].command}`
|
|
172
|
+
: `The following ${finalAnalysis.remediation.actions.length} kubectl commands will be executed:\n${finalAnalysis.remediation.actions.map((action, i) => `${i + 1}. ${action.command}`).join('\n')}`;
|
|
173
|
+
const highRiskActions = finalAnalysis.remediation.actions.filter(a => a.risk === 'high');
|
|
174
|
+
const mediumRiskActions = finalAnalysis.remediation.actions.filter(a => a.risk === 'medium');
|
|
175
|
+
const riskSummary = [
|
|
176
|
+
...(highRiskActions.length > 0 ? [`${highRiskActions.length} HIGH RISK actions require careful review`] : []),
|
|
177
|
+
...(mediumRiskActions.length > 0 ? [`${mediumRiskActions.length} MEDIUM RISK actions should be executed with monitoring`] : []),
|
|
178
|
+
"All actions are designed to be safe kubectl operations (no destructive commands)"
|
|
179
|
+
].join('. ');
|
|
180
|
+
output.guidance = `🔴 CRITICAL: Present the kubectl commands to the user and ask them to choose execution method. DO NOT execute commands without user approval.\n\n${commandsSummary}\n\nRisk Assessment: ${riskSummary}`;
|
|
181
|
+
output.agentInstructions = `1. Show the user the root cause analysis and confidence level\n2. Display the kubectl commands that will be executed\n3. Explain the risk assessment\n4. Present the two execution choices and wait for user selection\n5. When user selects option 1 or 2, call the remediate tool again with: executeChoice: [1 or 2], sessionId: "${session.sessionId}", mode: "${session.mode}"\n6. DO NOT automatically execute any commands until user makes their choice`;
|
|
182
|
+
output.nextAction = 'remediate';
|
|
183
|
+
output.message = `AI analysis identified the root cause with ${Math.round(finalAnalysis.confidence * 100)}% confidence. ${finalAnalysis.remediation.actions.length} remediation actions are recommended.`;
|
|
184
|
+
}
|
|
185
|
+
// Update session with final analysis
|
|
186
|
+
updateSessionFile(sessionDir, session.sessionId, {
|
|
187
|
+
finalAnalysis: output,
|
|
188
|
+
status: 'analysis_complete'
|
|
189
|
+
});
|
|
190
|
+
logger.info('Investigation and analysis completed', {
|
|
191
|
+
requestId,
|
|
192
|
+
sessionId: session.sessionId,
|
|
193
|
+
rootCause: output.analysis.rootCause,
|
|
194
|
+
recommendedActions: output.remediation.actions.length
|
|
195
|
+
});
|
|
196
|
+
return output;
|
|
279
197
|
}
|
|
280
198
|
catch (error) {
|
|
281
|
-
logger.error('
|
|
282
|
-
throw error_handling_1.ErrorHandler.createError(error_handling_1.ErrorCategory.AI_SERVICE, error_handling_1.ErrorSeverity.HIGH, `AI analysis failed: ${error instanceof Error ? error.message : 'Unknown error'}`, {
|
|
283
|
-
operation: 'ai_analysis',
|
|
284
|
-
component: 'RemediateTool',
|
|
199
|
+
logger.error('Investigation failed', error, {
|
|
285
200
|
requestId,
|
|
286
|
-
sessionId: session.sessionId
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
201
|
+
sessionId: session.sessionId
|
|
202
|
+
});
|
|
203
|
+
// Mark session as failed
|
|
204
|
+
updateSessionFile(sessionDir, session.sessionId, { status: 'failed' });
|
|
205
|
+
throw error_handling_1.ErrorHandler.createError(error_handling_1.ErrorCategory.AI_SERVICE, error_handling_1.ErrorSeverity.HIGH, `Investigation failed: ${error instanceof Error ? error.message : 'Unknown error'}`, {
|
|
206
|
+
operation: 'investigation_loop',
|
|
207
|
+
component: 'RemediateTool',
|
|
208
|
+
input: { sessionId: session.sessionId }
|
|
292
209
|
});
|
|
293
210
|
}
|
|
294
211
|
}
|
|
@@ -298,13 +215,49 @@ async function analyzeCurrentState(session, aiProvider, logger, requestId) {
|
|
|
298
215
|
function parseAIFinalAnalysis(aiResponse) {
|
|
299
216
|
try {
|
|
300
217
|
// Try to extract JSON from the response
|
|
301
|
-
|
|
302
|
-
|
|
218
|
+
// Use non-greedy match and try to parse incrementally to handle extra text after JSON
|
|
219
|
+
const firstBraceIndex = aiResponse.indexOf('{');
|
|
220
|
+
if (firstBraceIndex === -1) {
|
|
303
221
|
throw new Error('No JSON found in AI final analysis response');
|
|
304
222
|
}
|
|
305
|
-
|
|
223
|
+
// Try to find the end of the JSON object by tracking brace depth
|
|
224
|
+
let braceCount = 0;
|
|
225
|
+
let inString = false;
|
|
226
|
+
let escapeNext = false;
|
|
227
|
+
let jsonEndIndex = -1;
|
|
228
|
+
for (let i = firstBraceIndex; i < aiResponse.length; i++) {
|
|
229
|
+
const char = aiResponse[i];
|
|
230
|
+
if (escapeNext) {
|
|
231
|
+
escapeNext = false;
|
|
232
|
+
continue;
|
|
233
|
+
}
|
|
234
|
+
if (char === '\\') {
|
|
235
|
+
escapeNext = true;
|
|
236
|
+
continue;
|
|
237
|
+
}
|
|
238
|
+
if (char === '"') {
|
|
239
|
+
inString = !inString;
|
|
240
|
+
continue;
|
|
241
|
+
}
|
|
242
|
+
if (inString)
|
|
243
|
+
continue;
|
|
244
|
+
if (char === '{')
|
|
245
|
+
braceCount++;
|
|
246
|
+
if (char === '}') {
|
|
247
|
+
braceCount--;
|
|
248
|
+
if (braceCount === 0) {
|
|
249
|
+
jsonEndIndex = i + 1;
|
|
250
|
+
break;
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
if (jsonEndIndex === -1) {
|
|
255
|
+
throw new Error('Could not find complete JSON object in AI response');
|
|
256
|
+
}
|
|
257
|
+
const jsonString = aiResponse.substring(firstBraceIndex, jsonEndIndex);
|
|
258
|
+
const parsed = JSON.parse(jsonString);
|
|
306
259
|
// Validate required fields
|
|
307
|
-
if (!parsed.issueStatus || !parsed.rootCause ||
|
|
260
|
+
if (!parsed.issueStatus || !parsed.rootCause || parsed.confidence === undefined || !Array.isArray(parsed.factors) || !parsed.remediation) {
|
|
308
261
|
throw new Error('Invalid AI final analysis response structure');
|
|
309
262
|
}
|
|
310
263
|
// Validate issueStatus field
|
|
@@ -337,318 +290,6 @@ function parseAIFinalAnalysis(aiResponse) {
|
|
|
337
290
|
throw new Error(`Failed to parse AI final analysis response: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
338
291
|
}
|
|
339
292
|
}
|
|
340
|
-
/**
|
|
341
|
-
* Parse AI response for data requests and investigation status
|
|
342
|
-
*/
|
|
343
|
-
function parseAIResponse(aiResponse) {
|
|
344
|
-
try {
|
|
345
|
-
// Try to extract JSON from the response
|
|
346
|
-
const jsonMatch = aiResponse.match(/\{[\s\S]*\}/);
|
|
347
|
-
if (!jsonMatch) {
|
|
348
|
-
throw new Error('No JSON found in AI response');
|
|
349
|
-
}
|
|
350
|
-
const parsed = JSON.parse(jsonMatch[0]);
|
|
351
|
-
// Validate required fields
|
|
352
|
-
if (typeof parsed.investigationComplete !== 'boolean') {
|
|
353
|
-
throw new Error('Missing or invalid investigationComplete field');
|
|
354
|
-
}
|
|
355
|
-
if (!Array.isArray(parsed.dataRequests)) {
|
|
356
|
-
throw new Error('Missing or invalid dataRequests field');
|
|
357
|
-
}
|
|
358
|
-
// Validate data requests format
|
|
359
|
-
for (const request of parsed.dataRequests) {
|
|
360
|
-
// Check if operation is safe (read-only) or has dry-run flag
|
|
361
|
-
const isDryRun = hasDryRunFlag(request.args);
|
|
362
|
-
const isSafeOperation = exports.SAFE_OPERATIONS.includes(request.type);
|
|
363
|
-
if (!isSafeOperation && !isDryRun) {
|
|
364
|
-
throw new Error(`Invalid data request type: ${request.type}. Allowed: ${exports.SAFE_OPERATIONS.join(', ')} or any operation with --dry-run flag`);
|
|
365
|
-
}
|
|
366
|
-
if (!request.resource || !request.rationale) {
|
|
367
|
-
throw new Error('Data request missing required fields: resource, rationale');
|
|
368
|
-
}
|
|
369
|
-
}
|
|
370
|
-
return {
|
|
371
|
-
dataRequests: parsed.dataRequests,
|
|
372
|
-
isComplete: parsed.investigationComplete,
|
|
373
|
-
needsMoreSpecificInfo: parsed.needsMoreSpecificInfo,
|
|
374
|
-
parsedResponse: parsed
|
|
375
|
-
};
|
|
376
|
-
}
|
|
377
|
-
catch (error) {
|
|
378
|
-
// Fallback: try to extract data requests from text patterns
|
|
379
|
-
console.warn('Failed to parse AI JSON response, using fallback parsing:', error instanceof Error ? error.message : 'Unknown error');
|
|
380
|
-
// Simple fallback - assume investigation needs to continue and no data requests
|
|
381
|
-
return {
|
|
382
|
-
dataRequests: [],
|
|
383
|
-
isComplete: false
|
|
384
|
-
};
|
|
385
|
-
}
|
|
386
|
-
}
|
|
387
|
-
/**
|
|
388
|
-
* Gather safe data from Kubernetes using kubectl
|
|
389
|
-
* Implements resilient error handling - failed requests don't kill the investigation
|
|
390
|
-
*/
|
|
391
|
-
async function gatherSafeData(dataRequests, logger, requestId) {
|
|
392
|
-
logger.debug('Gathering safe data from Kubernetes', { requestId, requestCount: dataRequests.length });
|
|
393
|
-
const result = {
|
|
394
|
-
successful: {},
|
|
395
|
-
failed: {},
|
|
396
|
-
summary: {
|
|
397
|
-
total: dataRequests.length,
|
|
398
|
-
successful: 0,
|
|
399
|
-
failed: 0
|
|
400
|
-
}
|
|
401
|
-
};
|
|
402
|
-
// Process each data request independently
|
|
403
|
-
for (let i = 0; i < dataRequests.length; i++) {
|
|
404
|
-
const request = dataRequests[i];
|
|
405
|
-
const dataRequestId = `${requestId}-req-${i}`;
|
|
406
|
-
try {
|
|
407
|
-
// Safety validation - allow read-only operations OR operations with dry-run flag
|
|
408
|
-
const isDryRun = hasDryRunFlag(request.args);
|
|
409
|
-
const isReadOnlyOperation = exports.SAFE_OPERATIONS.includes(request.type);
|
|
410
|
-
if (!isReadOnlyOperation && !isDryRun) {
|
|
411
|
-
const error = `Unsafe operation '${request.type}' - only allowed: ${exports.SAFE_OPERATIONS.join(', ')} or any operation with --dry-run flag`;
|
|
412
|
-
result.failed[dataRequestId] = {
|
|
413
|
-
error,
|
|
414
|
-
command: `kubectl ${request.type} ${request.resource}${request.args ? ' ' + request.args.join(' ') : ''}`,
|
|
415
|
-
suggestion: 'Use read-only operations (get, describe, logs, events, top) or add --dry-run=client to validate commands safely'
|
|
416
|
-
};
|
|
417
|
-
result.summary.failed++;
|
|
418
|
-
logger.warn('Rejected unsafe kubectl operation', { requestId, dataRequestId, operation: request.type, isDryRun });
|
|
419
|
-
continue;
|
|
420
|
-
}
|
|
421
|
-
// Build kubectl command
|
|
422
|
-
const args = [request.type, request.resource];
|
|
423
|
-
if (request.namespace) {
|
|
424
|
-
args.push('-n', request.namespace);
|
|
425
|
-
}
|
|
426
|
-
// Add any additional arguments (like --dry-run=client)
|
|
427
|
-
if (request.args && request.args.length > 0) {
|
|
428
|
-
args.push(...request.args);
|
|
429
|
-
}
|
|
430
|
-
// Add output format for structured data (only for read-only commands that support it)
|
|
431
|
-
if ((request.type === 'get' || request.type === 'events' || request.type === 'top') && !isDryRun) {
|
|
432
|
-
args.push('-o', 'yaml');
|
|
433
|
-
}
|
|
434
|
-
logger.debug('Executing kubectl command', {
|
|
435
|
-
requestId,
|
|
436
|
-
dataRequestId,
|
|
437
|
-
command: `kubectl ${args.join(' ')}`,
|
|
438
|
-
rationale: request.rationale
|
|
439
|
-
});
|
|
440
|
-
// Execute kubectl command
|
|
441
|
-
const output = await (0, kubernetes_utils_1.executeKubectl)(args, { timeout: 30000 });
|
|
442
|
-
// Store successful result
|
|
443
|
-
result.successful[dataRequestId] = {
|
|
444
|
-
request,
|
|
445
|
-
output,
|
|
446
|
-
command: `kubectl ${args.join(' ')}`,
|
|
447
|
-
timestamp: new Date().toISOString()
|
|
448
|
-
};
|
|
449
|
-
result.summary.successful++;
|
|
450
|
-
logger.debug('kubectl command successful', {
|
|
451
|
-
requestId,
|
|
452
|
-
dataRequestId,
|
|
453
|
-
outputLength: output.length
|
|
454
|
-
});
|
|
455
|
-
}
|
|
456
|
-
catch (error) {
|
|
457
|
-
// Store failed result with error details
|
|
458
|
-
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
|
459
|
-
const command = `kubectl ${request.type} ${request.resource}${request.namespace ? ` -n ${request.namespace}` : ''}`;
|
|
460
|
-
result.failed[dataRequestId] = {
|
|
461
|
-
error: errorMessage,
|
|
462
|
-
command,
|
|
463
|
-
suggestion: generateErrorSuggestion(errorMessage)
|
|
464
|
-
};
|
|
465
|
-
result.summary.failed++;
|
|
466
|
-
logger.warn('kubectl command failed', {
|
|
467
|
-
requestId,
|
|
468
|
-
dataRequestId,
|
|
469
|
-
command,
|
|
470
|
-
error: errorMessage,
|
|
471
|
-
rationale: request.rationale
|
|
472
|
-
});
|
|
473
|
-
}
|
|
474
|
-
}
|
|
475
|
-
logger.info('Data gathering completed', {
|
|
476
|
-
requestId,
|
|
477
|
-
successful: result.summary.successful,
|
|
478
|
-
failed: result.summary.failed,
|
|
479
|
-
total: result.summary.total
|
|
480
|
-
});
|
|
481
|
-
return result;
|
|
482
|
-
}
|
|
483
|
-
/**
|
|
484
|
-
* Generate helpful suggestions based on kubectl error messages
|
|
485
|
-
*/
|
|
486
|
-
function generateErrorSuggestion(errorMessage) {
|
|
487
|
-
const lowerError = errorMessage.toLowerCase();
|
|
488
|
-
if (lowerError.includes('not found')) {
|
|
489
|
-
return 'Resource may not exist or may be in a different namespace. Try listing available resources first.';
|
|
490
|
-
}
|
|
491
|
-
if (lowerError.includes('forbidden')) {
|
|
492
|
-
return 'Insufficient permissions. Check RBAC configuration for read access to this resource.';
|
|
493
|
-
}
|
|
494
|
-
if (lowerError.includes('namespace') && lowerError.includes('not found')) {
|
|
495
|
-
return 'Namespace does not exist. Try listing available namespaces first.';
|
|
496
|
-
}
|
|
497
|
-
if (lowerError.includes('connection refused') || lowerError.includes('timeout')) {
|
|
498
|
-
return 'Cannot connect to Kubernetes cluster. Verify cluster connectivity and kubectl configuration.';
|
|
499
|
-
}
|
|
500
|
-
return undefined;
|
|
501
|
-
}
|
|
502
|
-
/**
|
|
503
|
-
* Generate final analysis and remediation recommendations using AI
|
|
504
|
-
*/
|
|
505
|
-
async function generateFinalAnalysis(session, logger, requestId) {
|
|
506
|
-
logger.debug('Generating final analysis with AI', { requestId, sessionId: session.sessionId });
|
|
507
|
-
try {
|
|
508
|
-
// Initialize AI provider (will validate API key automatically)
|
|
509
|
-
const aiProvider = (0, ai_provider_factory_1.createAIProvider)();
|
|
510
|
-
// Load final analysis prompt template
|
|
511
|
-
const promptPath = path.join(__dirname, '..', '..', 'prompts', 'remediate-final-analysis.md');
|
|
512
|
-
const promptTemplate = fs.readFileSync(promptPath, 'utf8');
|
|
513
|
-
// Prepare template variables - extract actual data source identifiers
|
|
514
|
-
const dataSources = session.iterations.flatMap(iter => {
|
|
515
|
-
if (iter.gatheredData && iter.gatheredData.successful) {
|
|
516
|
-
return Object.keys(iter.gatheredData.successful);
|
|
517
|
-
}
|
|
518
|
-
return [];
|
|
519
|
-
});
|
|
520
|
-
// Compile complete investigation data for AI analysis
|
|
521
|
-
const completeInvestigationData = session.iterations.map(iter => ({
|
|
522
|
-
iteration: iter.step,
|
|
523
|
-
analysis: iter.aiAnalysis,
|
|
524
|
-
dataGathered: Object.entries(iter.gatheredData).map(([key, value]) => ({
|
|
525
|
-
source: key,
|
|
526
|
-
data: typeof value === 'string' ? value.substring(0, 1000) : JSON.stringify(value).substring(0, 1000)
|
|
527
|
-
}))
|
|
528
|
-
}));
|
|
529
|
-
// Replace template variables
|
|
530
|
-
const finalAnalysisPrompt = promptTemplate
|
|
531
|
-
.replace('{issue}', session.issue)
|
|
532
|
-
.replace('{iterations}', session.iterations.length.toString())
|
|
533
|
-
.replace('{dataSources}', dataSources.join(', '))
|
|
534
|
-
.replace('{completeInvestigationData}', JSON.stringify(completeInvestigationData, null, 2));
|
|
535
|
-
logger.debug('Sending final analysis request to AI provider', {
|
|
536
|
-
requestId,
|
|
537
|
-
sessionId: session.sessionId,
|
|
538
|
-
promptLength: finalAnalysisPrompt.length
|
|
539
|
-
});
|
|
540
|
-
// Send to AI provider
|
|
541
|
-
const aiResponse = await aiProvider.sendMessage(finalAnalysisPrompt);
|
|
542
|
-
logger.debug('Received AI final analysis response', {
|
|
543
|
-
requestId,
|
|
544
|
-
sessionId: session.sessionId,
|
|
545
|
-
responseLength: aiResponse.content.length
|
|
546
|
-
});
|
|
547
|
-
// Parse AI response
|
|
548
|
-
const finalAnalysis = parseAIFinalAnalysis(aiResponse.content);
|
|
549
|
-
logger.info('Final analysis generated successfully', {
|
|
550
|
-
requestId,
|
|
551
|
-
sessionId: session.sessionId,
|
|
552
|
-
confidence: finalAnalysis.confidence,
|
|
553
|
-
actionCount: finalAnalysis.remediation.actions.length,
|
|
554
|
-
overallRisk: finalAnalysis.remediation.risk
|
|
555
|
-
});
|
|
556
|
-
// Convert data sources to human-readable format
|
|
557
|
-
const humanReadableDataSources = dataSources.length > 0
|
|
558
|
-
? [`Analyzed ${dataSources.length} data sources from ${session.iterations.length} investigation iterations`]
|
|
559
|
-
: ['cluster-resources', 'pod-status', 'node-capacity'];
|
|
560
|
-
// Handle different issue statuses
|
|
561
|
-
if (finalAnalysis.issueStatus === 'resolved' || finalAnalysis.issueStatus === 'non_existent') {
|
|
562
|
-
// Issue is resolved or doesn't exist - return success status
|
|
563
|
-
const statusMessage = finalAnalysis.issueStatus === 'resolved'
|
|
564
|
-
? 'Issue has been successfully resolved'
|
|
565
|
-
: 'No issues found - system is healthy';
|
|
566
|
-
return {
|
|
567
|
-
status: 'success',
|
|
568
|
-
analysis: {
|
|
569
|
-
rootCause: finalAnalysis.rootCause,
|
|
570
|
-
confidence: finalAnalysis.confidence,
|
|
571
|
-
factors: finalAnalysis.factors
|
|
572
|
-
},
|
|
573
|
-
remediation: {
|
|
574
|
-
summary: finalAnalysis.remediation.summary,
|
|
575
|
-
actions: finalAnalysis.remediation.actions,
|
|
576
|
-
risk: finalAnalysis.remediation.risk
|
|
577
|
-
},
|
|
578
|
-
validationIntent: finalAnalysis.validationIntent,
|
|
579
|
-
sessionId: session.sessionId,
|
|
580
|
-
investigation: {
|
|
581
|
-
iterations: session.iterations.length,
|
|
582
|
-
dataGathered: humanReadableDataSources
|
|
583
|
-
},
|
|
584
|
-
executed: false,
|
|
585
|
-
mode: session.mode,
|
|
586
|
-
// Success state guidance
|
|
587
|
-
guidance: `✅ ${statusMessage.toUpperCase()}: ${finalAnalysis.remediation.summary}`,
|
|
588
|
-
agentInstructions: `1. Show user that the ${finalAnalysis.issueStatus === 'resolved' ? 'issue has been resolved' : 'no issues were found'}\n2. Display the analysis and confidence level\n3. Explain the current healthy state\n4. No further action required`,
|
|
589
|
-
nextAction: undefined,
|
|
590
|
-
message: `${statusMessage} with ${Math.round(finalAnalysis.confidence * 100)}% confidence.`
|
|
591
|
-
};
|
|
592
|
-
}
|
|
593
|
-
// Issue is active - generate execution options
|
|
594
|
-
const commandsSummary = finalAnalysis.remediation.actions.length === 1
|
|
595
|
-
? `The following kubectl command will be executed:\n${finalAnalysis.remediation.actions[0].command}`
|
|
596
|
-
: `The following ${finalAnalysis.remediation.actions.length} kubectl commands will be executed:\n${finalAnalysis.remediation.actions.map((action, i) => `${i + 1}. ${action.command}`).join('\n')}`;
|
|
597
|
-
// Generate risk summary
|
|
598
|
-
const highRiskActions = finalAnalysis.remediation.actions.filter(a => a.risk === 'high');
|
|
599
|
-
const mediumRiskActions = finalAnalysis.remediation.actions.filter(a => a.risk === 'medium');
|
|
600
|
-
const riskSummary = [
|
|
601
|
-
...(highRiskActions.length > 0 ? [`${highRiskActions.length} HIGH RISK actions require careful review`] : []),
|
|
602
|
-
...(mediumRiskActions.length > 0 ? [`${mediumRiskActions.length} MEDIUM RISK actions should be executed with monitoring`] : []),
|
|
603
|
-
"All actions are designed to be safe kubectl operations (no destructive commands)"
|
|
604
|
-
].join('. ');
|
|
605
|
-
// Return active issue response with execution choices
|
|
606
|
-
return {
|
|
607
|
-
status: 'awaiting_user_approval',
|
|
608
|
-
analysis: {
|
|
609
|
-
rootCause: finalAnalysis.rootCause,
|
|
610
|
-
confidence: finalAnalysis.confidence,
|
|
611
|
-
factors: finalAnalysis.factors
|
|
612
|
-
},
|
|
613
|
-
remediation: {
|
|
614
|
-
summary: finalAnalysis.remediation.summary,
|
|
615
|
-
actions: finalAnalysis.remediation.actions,
|
|
616
|
-
risk: finalAnalysis.remediation.risk
|
|
617
|
-
},
|
|
618
|
-
validationIntent: finalAnalysis.validationIntent,
|
|
619
|
-
sessionId: session.sessionId,
|
|
620
|
-
investigation: {
|
|
621
|
-
iterations: session.iterations.length,
|
|
622
|
-
dataGathered: humanReadableDataSources
|
|
623
|
-
},
|
|
624
|
-
executed: false,
|
|
625
|
-
mode: session.mode,
|
|
626
|
-
// Active issue guidance
|
|
627
|
-
guidance: `🔴 CRITICAL: Present the kubectl commands to the user and ask them to choose execution method. DO NOT execute commands without user approval.\n\n${commandsSummary}\n\nRisk Assessment: ${riskSummary}`,
|
|
628
|
-
agentInstructions: `1. Show the user the root cause analysis and confidence level\n2. Display the kubectl commands that will be executed\n3. Explain the risk assessment\n4. Present the two execution choices and wait for user selection\n5. When user selects option 1 or 2, call the remediate tool again with: executeChoice: [1 or 2], sessionId: "${session.sessionId}", mode: "${session.mode}"\n6. Do NOT automatically execute any commands until user makes their choice`,
|
|
629
|
-
nextAction: 'remediate',
|
|
630
|
-
message: `AI analysis identified the root cause with ${Math.round(finalAnalysis.confidence * 100)}% confidence. ${finalAnalysis.remediation.actions.length} remediation actions are recommended.`
|
|
631
|
-
};
|
|
632
|
-
}
|
|
633
|
-
catch (error) {
|
|
634
|
-
logger.error('Failed to generate final analysis', error, {
|
|
635
|
-
requestId,
|
|
636
|
-
sessionId: session.sessionId
|
|
637
|
-
});
|
|
638
|
-
throw error_handling_1.ErrorHandler.createError(error_handling_1.ErrorCategory.AI_SERVICE, error_handling_1.ErrorSeverity.HIGH, `Final analysis generation failed: ${error instanceof Error ? error.message : 'Unknown error'}`, {
|
|
639
|
-
operation: 'generateFinalAnalysis',
|
|
640
|
-
component: 'RemediateTool',
|
|
641
|
-
requestId,
|
|
642
|
-
sessionId: session.sessionId,
|
|
643
|
-
suggestedActions: [
|
|
644
|
-
'Check AI provider API key is set correctly',
|
|
645
|
-
'Verify prompts/remediate-final-analysis.md exists',
|
|
646
|
-
'Check network connectivity to AI provider',
|
|
647
|
-
'Review AI response format for parsing issues'
|
|
648
|
-
]
|
|
649
|
-
});
|
|
650
|
-
}
|
|
651
|
-
}
|
|
652
293
|
/**
|
|
653
294
|
* Execute user choice from previous session
|
|
654
295
|
*/
|
|
@@ -955,7 +596,6 @@ async function handleRemediateTool(args) {
|
|
|
955
596
|
sessionId,
|
|
956
597
|
issue: validatedInput.issue,
|
|
957
598
|
mode: validatedInput.mode || 'manual',
|
|
958
|
-
iterations: [],
|
|
959
599
|
created: new Date(),
|
|
960
600
|
updated: new Date(),
|
|
961
601
|
status: 'investigating'
|