@cogitator-ai/self-modifying 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +60 -50
- package/dist/architecture-evolution/capability-analyzer.d.ts.map +1 -1
- package/dist/architecture-evolution/capability-analyzer.js +105 -19
- package/dist/architecture-evolution/capability-analyzer.js.map +1 -1
- package/dist/architecture-evolution/evolution-strategy.d.ts.map +1 -1
- package/dist/architecture-evolution/evolution-strategy.js +2 -6
- package/dist/architecture-evolution/evolution-strategy.js.map +1 -1
- package/dist/architecture-evolution/index.d.ts +1 -1
- package/dist/architecture-evolution/index.d.ts.map +1 -1
- package/dist/architecture-evolution/index.js +1 -1
- package/dist/architecture-evolution/index.js.map +1 -1
- package/dist/architecture-evolution/parameter-optimizer.d.ts.map +1 -1
- package/dist/architecture-evolution/parameter-optimizer.js.map +1 -1
- package/dist/architecture-evolution/prompts.d.ts.map +1 -1
- package/dist/architecture-evolution/prompts.js.map +1 -1
- package/dist/constraints/index.d.ts +1 -1
- package/dist/constraints/index.d.ts.map +1 -1
- package/dist/constraints/index.js +1 -1
- package/dist/constraints/index.js.map +1 -1
- package/dist/constraints/modification-validator.d.ts.map +1 -1
- package/dist/constraints/modification-validator.js +1 -2
- package/dist/constraints/modification-validator.js.map +1 -1
- package/dist/constraints/rollback-manager.d.ts.map +1 -1
- package/dist/constraints/rollback-manager.js.map +1 -1
- package/dist/constraints/safety-constraints.d.ts.map +1 -1
- package/dist/constraints/safety-constraints.js +1 -3
- package/dist/constraints/safety-constraints.js.map +1 -1
- package/dist/index.d.ts +2 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -2
- package/dist/index.js.map +1 -1
- package/dist/meta-reasoning/meta-reasoner.d.ts.map +1 -1
- package/dist/meta-reasoning/meta-reasoner.js +5 -10
- package/dist/meta-reasoning/meta-reasoner.js.map +1 -1
- package/dist/meta-reasoning/observation-collector.d.ts.map +1 -1
- package/dist/meta-reasoning/observation-collector.js +1 -3
- package/dist/meta-reasoning/observation-collector.js.map +1 -1
- package/dist/meta-reasoning/prompts.d.ts.map +1 -1
- package/dist/meta-reasoning/prompts.js +4 -2
- package/dist/meta-reasoning/prompts.js.map +1 -1
- package/dist/meta-reasoning/strategy-selector.d.ts.map +1 -1
- package/dist/meta-reasoning/strategy-selector.js +3 -1
- package/dist/meta-reasoning/strategy-selector.js.map +1 -1
- package/dist/self-modifying-agent.d.ts.map +1 -1
- package/dist/self-modifying-agent.js +14 -6
- package/dist/self-modifying-agent.js.map +1 -1
- package/dist/tool-generation/gap-analyzer.d.ts.map +1 -1
- package/dist/tool-generation/gap-analyzer.js +4 -1
- package/dist/tool-generation/gap-analyzer.js.map +1 -1
- package/dist/tool-generation/generated-tool-store.d.ts.map +1 -1
- package/dist/tool-generation/generated-tool-store.js.map +1 -1
- package/dist/tool-generation/prompts.d.ts.map +1 -1
- package/dist/tool-generation/prompts.js +5 -15
- package/dist/tool-generation/prompts.js.map +1 -1
- package/dist/tool-generation/tool-generator.d.ts.map +1 -1
- package/dist/tool-generation/tool-generator.js.map +1 -1
- package/dist/tool-generation/tool-sandbox.d.ts.map +1 -1
- package/dist/tool-generation/tool-sandbox.js +1 -3
- package/dist/tool-generation/tool-sandbox.js.map +1 -1
- package/dist/tool-generation/tool-validator.d.ts.map +1 -1
- package/dist/tool-generation/tool-validator.js.map +1 -1
- package/package.json +4 -4
- package/src/__tests__/architecture-evolution.test.ts +131 -30
- package/src/__tests__/constraints.test.ts +61 -46
- package/src/__tests__/index.test.ts +4 -17
- package/src/__tests__/meta-reasoning.test.ts +246 -155
- package/src/__tests__/tool-generation.test.ts +26 -7
- package/src/architecture-evolution/capability-analyzer.ts +113 -31
- package/src/architecture-evolution/evolution-strategy.ts +20 -13
- package/src/architecture-evolution/index.ts +1 -4
- package/src/architecture-evolution/parameter-optimizer.ts +42 -23
- package/src/architecture-evolution/prompts.ts +14 -15
- package/src/constraints/index.ts +1 -4
- package/src/constraints/modification-validator.ts +5 -18
- package/src/constraints/rollback-manager.ts +1 -3
- package/src/constraints/safety-constraints.ts +1 -3
- package/src/index.ts +6 -5
- package/src/meta-reasoning/meta-reasoner.ts +9 -16
- package/src/meta-reasoning/observation-collector.ts +3 -12
- package/src/meta-reasoning/prompts.ts +9 -9
- package/src/meta-reasoning/strategy-selector.ts +5 -1
- package/src/self-modifying-agent.ts +25 -28
- package/src/tool-generation/gap-analyzer.ts +18 -14
- package/src/tool-generation/generated-tool-store.ts +5 -8
- package/src/tool-generation/prompts.ts +5 -15
- package/src/tool-generation/tool-generator.ts +15 -11
- package/src/tool-generation/tool-sandbox.ts +4 -15
- package/src/tool-generation/tool-validator.ts +17 -13
|
@@ -1,8 +1,4 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
MetaObservation,
|
|
3
|
-
ReasoningMode,
|
|
4
|
-
ReasoningModeConfig,
|
|
5
|
-
} from '@cogitator-ai/types';
|
|
1
|
+
import type { MetaObservation, ReasoningMode, ReasoningModeConfig } from '@cogitator-ai/types';
|
|
6
2
|
|
|
7
3
|
export function buildMetaAssessmentPrompt(
|
|
8
4
|
observation: MetaObservation,
|
|
@@ -44,10 +40,14 @@ Iterations remaining: ${observation.iterationsRemaining}
|
|
|
44
40
|
${(observation.recentActions ?? []).map((a) => `- ${a.type}: ${a.toolName ?? 'N/A'} ${a.error ? '(ERROR: ' + a.error + ')' : ''}`).join('\n') || 'None'}
|
|
45
41
|
|
|
46
42
|
## Recent Insights
|
|
47
|
-
${
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
43
|
+
${
|
|
44
|
+
(observation.recentInsights ?? ([] as Array<{ type?: string; content?: string }>))
|
|
45
|
+
.map((i) => {
|
|
46
|
+
const insight = i as { type?: string; content?: string };
|
|
47
|
+
return `- [${insight.type ?? 'insight'}] ${insight.content ?? ''}`;
|
|
48
|
+
})
|
|
49
|
+
.join('\n') || 'None'
|
|
50
|
+
}
|
|
51
51
|
|
|
52
52
|
## Available Modes for Switching
|
|
53
53
|
${context.allowedModes.map((m) => `- ${m}`).join('\n')}
|
|
@@ -78,7 +78,11 @@ export class StrategySelector {
|
|
|
78
78
|
score += 0.2;
|
|
79
79
|
reasons.push('Thorough for complex tasks');
|
|
80
80
|
}
|
|
81
|
-
if (
|
|
81
|
+
if (
|
|
82
|
+
!profile.timeConstraint ||
|
|
83
|
+
profile.timeConstraint === 'none' ||
|
|
84
|
+
profile.timeConstraint === 'relaxed'
|
|
85
|
+
) {
|
|
82
86
|
score += 0.1;
|
|
83
87
|
reasons.push('Has time for systematic approach');
|
|
84
88
|
}
|
|
@@ -262,11 +262,7 @@ export class SelfModifyingAgent {
|
|
|
262
262
|
return null;
|
|
263
263
|
}
|
|
264
264
|
|
|
265
|
-
async recordToolUsage(
|
|
266
|
-
toolId: string,
|
|
267
|
-
success: boolean,
|
|
268
|
-
executionTime: number
|
|
269
|
-
): Promise<void> {
|
|
265
|
+
async recordToolUsage(toolId: string, success: boolean, executionTime: number): Promise<void> {
|
|
270
266
|
await this.toolStore.recordUsage({
|
|
271
267
|
toolId,
|
|
272
268
|
timestamp: new Date(),
|
|
@@ -336,7 +332,6 @@ export class SelfModifyingAgent {
|
|
|
336
332
|
const activeToos = await this.toolStore.list({ status: 'active' });
|
|
337
333
|
for (const tool of activeToos) {
|
|
338
334
|
if (tool.validationScore && tool.validationScore > 0.8) {
|
|
339
|
-
|
|
340
335
|
}
|
|
341
336
|
}
|
|
342
337
|
|
|
@@ -364,7 +359,14 @@ export class SelfModifyingAgent {
|
|
|
364
359
|
metaReasoning: {
|
|
365
360
|
enabled: true,
|
|
366
361
|
defaultMode: 'analytical',
|
|
367
|
-
allowedModes: [
|
|
362
|
+
allowedModes: [
|
|
363
|
+
'analytical',
|
|
364
|
+
'creative',
|
|
365
|
+
'systematic',
|
|
366
|
+
'intuitive',
|
|
367
|
+
'reflective',
|
|
368
|
+
'exploratory',
|
|
369
|
+
],
|
|
368
370
|
modeProfiles: {
|
|
369
371
|
analytical: { mode: 'analytical', temperature: 0.3, depth: 3 },
|
|
370
372
|
creative: { mode: 'creative', temperature: 0.9, depth: 2 },
|
|
@@ -413,7 +415,10 @@ export class SelfModifyingAgent {
|
|
|
413
415
|
enabled: partial.enabled ?? defaults.enabled,
|
|
414
416
|
toolGeneration: { ...defaults.toolGeneration, ...partial.toolGeneration },
|
|
415
417
|
metaReasoning: { ...defaults.metaReasoning, ...partial.metaReasoning },
|
|
416
|
-
architectureEvolution: {
|
|
418
|
+
architectureEvolution: {
|
|
419
|
+
...defaults.architectureEvolution,
|
|
420
|
+
...partial.architectureEvolution,
|
|
421
|
+
},
|
|
417
422
|
constraints: { ...defaults.constraints, ...partial.constraints },
|
|
418
423
|
};
|
|
419
424
|
}
|
|
@@ -451,19 +456,14 @@ export class SelfModifyingAgent {
|
|
|
451
456
|
});
|
|
452
457
|
}
|
|
453
458
|
}
|
|
454
|
-
} catch {
|
|
455
|
-
|
|
456
|
-
}
|
|
459
|
+
} catch {}
|
|
457
460
|
}
|
|
458
461
|
|
|
459
462
|
private async analyzeAndGenerateTools(input: string): Promise<void> {
|
|
460
463
|
if (!this.currentContext) return;
|
|
461
464
|
|
|
462
465
|
try {
|
|
463
|
-
const analysis = await this.gapAnalyzer.analyze(
|
|
464
|
-
input,
|
|
465
|
-
this.currentContext.tools
|
|
466
|
-
);
|
|
466
|
+
const analysis = await this.gapAnalyzer.analyze(input, this.currentContext.tools);
|
|
467
467
|
|
|
468
468
|
for (const gap of analysis.gaps) {
|
|
469
469
|
if (gap.confidence >= this.config.toolGeneration.minConfidenceForGeneration) {
|
|
@@ -483,15 +483,10 @@ export class SelfModifyingAgent {
|
|
|
483
483
|
}
|
|
484
484
|
}
|
|
485
485
|
}
|
|
486
|
-
} catch {
|
|
487
|
-
|
|
488
|
-
}
|
|
486
|
+
} catch {}
|
|
489
487
|
}
|
|
490
488
|
|
|
491
|
-
private async executeWithMetaReasoning(
|
|
492
|
-
input: string,
|
|
493
|
-
runId: string
|
|
494
|
-
): Promise<string> {
|
|
489
|
+
private async executeWithMetaReasoning(input: string, runId: string): Promise<string> {
|
|
495
490
|
if (!this.currentContext) {
|
|
496
491
|
throw new Error('No active run context');
|
|
497
492
|
}
|
|
@@ -520,12 +515,14 @@ export class SelfModifyingAgent {
|
|
|
520
515
|
|
|
521
516
|
this.currentContext.observations.push(observation);
|
|
522
517
|
|
|
523
|
-
if (
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
518
|
+
if (
|
|
519
|
+
this.metaReasoner.shouldTrigger(runId, 'periodic', {
|
|
520
|
+
iteration,
|
|
521
|
+
confidence: this.estimateConfidence(output),
|
|
522
|
+
progressDelta: output.length > 0 ? 0.1 : 0,
|
|
523
|
+
stagnationCount: 0,
|
|
524
|
+
})
|
|
525
|
+
) {
|
|
529
526
|
const assessment = await this.metaReasoner.assess(observation);
|
|
530
527
|
|
|
531
528
|
this.emitter.emit({
|
|
@@ -41,22 +41,21 @@ export class GapAnalyzer {
|
|
|
41
41
|
description: t.description,
|
|
42
42
|
}));
|
|
43
43
|
|
|
44
|
-
const prompt = buildGapAnalysisPrompt(
|
|
45
|
-
userIntent,
|
|
46
|
-
toolSummaries,
|
|
47
|
-
context?.failedAttempts
|
|
48
|
-
);
|
|
44
|
+
const prompt = buildGapAnalysisPrompt(userIntent, toolSummaries, context?.failedAttempts);
|
|
49
45
|
|
|
50
|
-
const response = await this.callLLM(
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
46
|
+
const response = await this.callLLM(
|
|
47
|
+
[
|
|
48
|
+
{
|
|
49
|
+
role: 'system',
|
|
50
|
+
content: `You are a capability analyzer for AI agents.
|
|
54
51
|
Identify gaps between user intent and available tools.
|
|
55
52
|
Be conservative - only report gaps when truly necessary.
|
|
56
53
|
Consider tool composition before suggesting new tools.`,
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
54
|
+
},
|
|
55
|
+
{ role: 'user', content: prompt },
|
|
56
|
+
],
|
|
57
|
+
0.3
|
|
58
|
+
);
|
|
60
59
|
|
|
61
60
|
const parsed = parseGapAnalysisResponse(response.content);
|
|
62
61
|
const filteredGaps = this.filterAndPrioritizeGaps(
|
|
@@ -186,7 +185,9 @@ Consider tool composition before suggesting new tools.`,
|
|
|
186
185
|
for (const [action, related] of Object.entries(keywords)) {
|
|
187
186
|
if (intentLower.includes(action)) {
|
|
188
187
|
const relatedTools = availableTools.filter((t) =>
|
|
189
|
-
related.some(
|
|
188
|
+
related.some(
|
|
189
|
+
(r) => t.name.toLowerCase().includes(r) || t.description.toLowerCase().includes(r)
|
|
190
|
+
)
|
|
190
191
|
);
|
|
191
192
|
|
|
192
193
|
if (relatedTools.length >= 2) {
|
|
@@ -214,7 +215,10 @@ Consider tool composition before suggesting new tools.`,
|
|
|
214
215
|
}
|
|
215
216
|
|
|
216
217
|
private buildCacheKey(userIntent: string, tools: Tool[]): string {
|
|
217
|
-
const toolSignature = tools
|
|
218
|
+
const toolSignature = tools
|
|
219
|
+
.map((t) => t.name)
|
|
220
|
+
.sort()
|
|
221
|
+
.join(',');
|
|
218
222
|
return `${userIntent.slice(0, 100)}|${toolSignature}`;
|
|
219
223
|
}
|
|
220
224
|
|
|
@@ -73,10 +73,7 @@ export class InMemoryGeneratedToolStore implements IGeneratedToolStore {
|
|
|
73
73
|
return deleted;
|
|
74
74
|
}
|
|
75
75
|
|
|
76
|
-
async updateStatus(
|
|
77
|
-
id: string,
|
|
78
|
-
status: GeneratedTool['status']
|
|
79
|
-
): Promise<boolean> {
|
|
76
|
+
async updateStatus(id: string, status: GeneratedTool['status']): Promise<boolean> {
|
|
80
77
|
const tool = this.tools.get(id);
|
|
81
78
|
if (!tool) return false;
|
|
82
79
|
|
|
@@ -121,7 +118,9 @@ export class InMemoryGeneratedToolStore implements IGeneratedToolStore {
|
|
|
121
118
|
};
|
|
122
119
|
}
|
|
123
120
|
|
|
124
|
-
async getTopTools(
|
|
121
|
+
async getTopTools(
|
|
122
|
+
limit: number = 10
|
|
123
|
+
): Promise<Array<{ tool: GeneratedTool; metrics: ToolMetrics }>> {
|
|
125
124
|
const result: Array<{ tool: GeneratedTool; metrics: ToolMetrics }> = [];
|
|
126
125
|
|
|
127
126
|
for (const tool of this.tools.values()) {
|
|
@@ -149,9 +148,7 @@ export class InMemoryGeneratedToolStore implements IGeneratedToolStore {
|
|
|
149
148
|
for (const tool of this.tools.values()) {
|
|
150
149
|
if (tool.status === 'deprecated') continue;
|
|
151
150
|
|
|
152
|
-
const toolWords = new Set(
|
|
153
|
-
`${tool.name} ${tool.description}`.toLowerCase().split(/\s+/)
|
|
154
|
-
);
|
|
151
|
+
const toolWords = new Set(`${tool.name} ${tool.description}`.toLowerCase().split(/\s+/));
|
|
155
152
|
|
|
156
153
|
let matchCount = 0;
|
|
157
154
|
for (const word of descWords) {
|
|
@@ -26,9 +26,7 @@ export function buildGapAnalysisPrompt(
|
|
|
26
26
|
availableTools: Array<{ name: string; description: string }>,
|
|
27
27
|
failedAttempts?: string[]
|
|
28
28
|
): string {
|
|
29
|
-
const toolList = availableTools
|
|
30
|
-
.map((t) => `- ${t.name}: ${t.description}`)
|
|
31
|
-
.join('\n');
|
|
29
|
+
const toolList = availableTools.map((t) => `- ${t.name}: ${t.description}`).join('\n');
|
|
32
30
|
|
|
33
31
|
const failureContext = failedAttempts?.length
|
|
34
32
|
? `\n\nPrevious failed attempts:\n${failedAttempts.map((f) => `- ${f}`).join('\n')}`
|
|
@@ -280,18 +278,10 @@ export function parseValidationResponse(response: string): ToolValidationResult
|
|
|
280
278
|
|
|
281
279
|
return {
|
|
282
280
|
isValid: Boolean(parsed.isValid),
|
|
283
|
-
securityIssues: Array.isArray(parsed.securityIssues)
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
? parsed.logicIssues.map(String)
|
|
288
|
-
: [],
|
|
289
|
-
edgeCases: Array.isArray(parsed.edgeCases)
|
|
290
|
-
? parsed.edgeCases.map(String)
|
|
291
|
-
: [],
|
|
292
|
-
suggestions: Array.isArray(parsed.suggestions)
|
|
293
|
-
? parsed.suggestions.map(String)
|
|
294
|
-
: [],
|
|
281
|
+
securityIssues: Array.isArray(parsed.securityIssues) ? parsed.securityIssues.map(String) : [],
|
|
282
|
+
logicIssues: Array.isArray(parsed.logicIssues) ? parsed.logicIssues.map(String) : [],
|
|
283
|
+
edgeCases: Array.isArray(parsed.edgeCases) ? parsed.edgeCases.map(String) : [],
|
|
284
|
+
suggestions: Array.isArray(parsed.suggestions) ? parsed.suggestions.map(String) : [],
|
|
295
285
|
testResults: Array.isArray(parsed.testResults)
|
|
296
286
|
? parsed.testResults.map((tr: Record<string, unknown>) => ({
|
|
297
287
|
input: tr.input,
|
|
@@ -150,10 +150,13 @@ export class ToolGenerator {
|
|
|
150
150
|
allowedModules: this.config.sandboxConfig?.allowedModules,
|
|
151
151
|
});
|
|
152
152
|
|
|
153
|
-
const response = await this.callLLM(
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
153
|
+
const response = await this.callLLM(
|
|
154
|
+
[
|
|
155
|
+
{ role: 'system', content: TOOL_GENERATION_SYSTEM_PROMPT },
|
|
156
|
+
{ role: 'user', content: prompt },
|
|
157
|
+
],
|
|
158
|
+
0.4
|
|
159
|
+
);
|
|
157
160
|
|
|
158
161
|
const tool = parseToolGenerationResponse(response.content);
|
|
159
162
|
|
|
@@ -175,10 +178,13 @@ export class ToolGenerator {
|
|
|
175
178
|
): Promise<GeneratedTool | null> {
|
|
176
179
|
const prompt = buildToolImprovementPrompt(tool, validationResult, iteration);
|
|
177
180
|
|
|
178
|
-
const response = await this.callLLM(
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
181
|
+
const response = await this.callLLM(
|
|
182
|
+
[
|
|
183
|
+
{ role: 'system', content: TOOL_GENERATION_SYSTEM_PROMPT },
|
|
184
|
+
{ role: 'user', content: prompt },
|
|
185
|
+
],
|
|
186
|
+
0.3
|
|
187
|
+
);
|
|
182
188
|
|
|
183
189
|
const improved = parseToolGenerationResponse(response.content);
|
|
184
190
|
|
|
@@ -216,9 +222,7 @@ export class ToolGenerator {
|
|
|
216
222
|
};
|
|
217
223
|
}
|
|
218
224
|
|
|
219
|
-
private compileImplementation(
|
|
220
|
-
implementation: string
|
|
221
|
-
): (params: unknown) => Promise<unknown> {
|
|
225
|
+
private compileImplementation(implementation: string): (params: unknown) => Promise<unknown> {
|
|
222
226
|
return async (params: unknown): Promise<unknown> => {
|
|
223
227
|
const factory = new Function(`
|
|
224
228
|
"use strict";
|
|
@@ -1,8 +1,4 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
ToolSandboxConfig,
|
|
3
|
-
ToolSandboxResult,
|
|
4
|
-
GeneratedTool,
|
|
5
|
-
} from '@cogitator-ai/types';
|
|
1
|
+
import type { ToolSandboxConfig, ToolSandboxResult, GeneratedTool } from '@cogitator-ai/types';
|
|
6
2
|
|
|
7
3
|
export const DEFAULT_SANDBOX_CONFIG: ToolSandboxConfig = {
|
|
8
4
|
enabled: true,
|
|
@@ -43,10 +39,7 @@ export class ToolSandbox {
|
|
|
43
39
|
this.config = { ...DEFAULT_SANDBOX_CONFIG, ...config };
|
|
44
40
|
}
|
|
45
41
|
|
|
46
|
-
async execute(
|
|
47
|
-
tool: GeneratedTool,
|
|
48
|
-
params: unknown
|
|
49
|
-
): Promise<ToolSandboxResult> {
|
|
42
|
+
async execute(tool: GeneratedTool, params: unknown): Promise<ToolSandboxResult> {
|
|
50
43
|
const startTime = Date.now();
|
|
51
44
|
this.logs.length = 0;
|
|
52
45
|
|
|
@@ -114,9 +107,7 @@ export class ToolSandbox {
|
|
|
114
107
|
if (testCase.shouldThrow) {
|
|
115
108
|
passed = !execResult.success;
|
|
116
109
|
} else if (testCase.expectedOutput !== undefined) {
|
|
117
|
-
passed =
|
|
118
|
-
execResult.success &&
|
|
119
|
-
this.deepEqual(execResult.result, testCase.expectedOutput);
|
|
110
|
+
passed = execResult.success && this.deepEqual(execResult.result, testCase.expectedOutput);
|
|
120
111
|
} else {
|
|
121
112
|
passed = execResult.success;
|
|
122
113
|
}
|
|
@@ -174,9 +165,7 @@ export class ToolSandbox {
|
|
|
174
165
|
|
|
175
166
|
for (const pattern of forbidden) {
|
|
176
167
|
if (pattern.test(code)) {
|
|
177
|
-
throw new Error(
|
|
178
|
-
`Security violation: forbidden pattern detected - ${pattern.source}`
|
|
179
|
-
);
|
|
168
|
+
throw new Error(`Security violation: forbidden pattern detected - ${pattern.source}`);
|
|
180
169
|
}
|
|
181
170
|
}
|
|
182
171
|
|
|
@@ -188,7 +188,10 @@ export class ToolValidator {
|
|
|
188
188
|
if (sandboxResult.failed > 0) {
|
|
189
189
|
const failedTests = sandboxResult.results.filter((r) => !r.passed);
|
|
190
190
|
logicIssues.push(
|
|
191
|
-
...failedTests.map(
|
|
191
|
+
...failedTests.map(
|
|
192
|
+
(t) =>
|
|
193
|
+
`Test failed for input ${JSON.stringify(t.input)}: ${t.error || 'unexpected output'}`
|
|
194
|
+
)
|
|
192
195
|
);
|
|
193
196
|
}
|
|
194
197
|
|
|
@@ -266,15 +269,18 @@ export class ToolValidator {
|
|
|
266
269
|
}))
|
|
267
270
|
);
|
|
268
271
|
|
|
269
|
-
const response = await this.callLLM(
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
272
|
+
const response = await this.callLLM(
|
|
273
|
+
[
|
|
274
|
+
{
|
|
275
|
+
role: 'system',
|
|
276
|
+
content: `You are a security auditor and code reviewer.
|
|
273
277
|
Analyze code for security vulnerabilities, logic errors, and edge cases.
|
|
274
278
|
Be thorough but practical - focus on real issues.`,
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
279
|
+
},
|
|
280
|
+
{ role: 'user', content: prompt },
|
|
281
|
+
],
|
|
282
|
+
0.2
|
|
283
|
+
);
|
|
278
284
|
|
|
279
285
|
return response ? parseValidationResponse(response.content) : null;
|
|
280
286
|
} catch {
|
|
@@ -285,15 +291,13 @@ Be thorough but practical - focus on real issues.`,
|
|
|
285
291
|
private generateBasicTestCases(
|
|
286
292
|
tool: GeneratedTool
|
|
287
293
|
): Array<{ input: unknown; expectedOutput?: unknown; shouldThrow?: boolean }> {
|
|
288
|
-
const testCases: Array<{ input: unknown; expectedOutput?: unknown; shouldThrow?: boolean }> =
|
|
294
|
+
const testCases: Array<{ input: unknown; expectedOutput?: unknown; shouldThrow?: boolean }> =
|
|
295
|
+
[];
|
|
289
296
|
const params = tool.parameters;
|
|
290
297
|
|
|
291
298
|
if (params.type === 'object' && params.properties) {
|
|
292
299
|
const validInput: Record<string, unknown> = {};
|
|
293
|
-
const properties = params.properties as Record<
|
|
294
|
-
string,
|
|
295
|
-
{ type?: string; default?: unknown }
|
|
296
|
-
>;
|
|
300
|
+
const properties = params.properties as Record<string, { type?: string; default?: unknown }>;
|
|
297
301
|
|
|
298
302
|
for (const [key, schema] of Object.entries(properties)) {
|
|
299
303
|
validInput[key] = this.generateSampleValue(schema.type, schema.default);
|