@cogitator-ai/self-modifying 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +714 -0
  3. package/dist/architecture-evolution/capability-analyzer.d.ts +32 -0
  4. package/dist/architecture-evolution/capability-analyzer.d.ts.map +1 -0
  5. package/dist/architecture-evolution/capability-analyzer.js +264 -0
  6. package/dist/architecture-evolution/capability-analyzer.js.map +1 -0
  7. package/dist/architecture-evolution/evolution-strategy.d.ts +29 -0
  8. package/dist/architecture-evolution/evolution-strategy.d.ts.map +1 -0
  9. package/dist/architecture-evolution/evolution-strategy.js +176 -0
  10. package/dist/architecture-evolution/evolution-strategy.js.map +1 -0
  11. package/dist/architecture-evolution/index.d.ts +5 -0
  12. package/dist/architecture-evolution/index.d.ts.map +1 -0
  13. package/dist/architecture-evolution/index.js +5 -0
  14. package/dist/architecture-evolution/index.js.map +1 -0
  15. package/dist/architecture-evolution/parameter-optimizer.d.ts +67 -0
  16. package/dist/architecture-evolution/parameter-optimizer.d.ts.map +1 -0
  17. package/dist/architecture-evolution/parameter-optimizer.js +341 -0
  18. package/dist/architecture-evolution/parameter-optimizer.js.map +1 -0
  19. package/dist/architecture-evolution/prompts.d.ts +33 -0
  20. package/dist/architecture-evolution/prompts.d.ts.map +1 -0
  21. package/dist/architecture-evolution/prompts.js +169 -0
  22. package/dist/architecture-evolution/prompts.js.map +1 -0
  23. package/dist/constraints/index.d.ts +4 -0
  24. package/dist/constraints/index.d.ts.map +1 -0
  25. package/dist/constraints/index.js +4 -0
  26. package/dist/constraints/index.js.map +1 -0
  27. package/dist/constraints/modification-validator.d.ts +26 -0
  28. package/dist/constraints/modification-validator.d.ts.map +1 -0
  29. package/dist/constraints/modification-validator.js +313 -0
  30. package/dist/constraints/modification-validator.js.map +1 -0
  31. package/dist/constraints/rollback-manager.d.ts +52 -0
  32. package/dist/constraints/rollback-manager.d.ts.map +1 -0
  33. package/dist/constraints/rollback-manager.js +113 -0
  34. package/dist/constraints/rollback-manager.js.map +1 -0
  35. package/dist/constraints/safety-constraints.d.ts +11 -0
  36. package/dist/constraints/safety-constraints.d.ts.map +1 -0
  37. package/dist/constraints/safety-constraints.js +78 -0
  38. package/dist/constraints/safety-constraints.js.map +1 -0
  39. package/dist/events/event-emitter.d.ts +12 -0
  40. package/dist/events/event-emitter.d.ts.map +1 -0
  41. package/dist/events/event-emitter.js +43 -0
  42. package/dist/events/event-emitter.js.map +1 -0
  43. package/dist/events/index.d.ts +2 -0
  44. package/dist/events/index.d.ts.map +1 -0
  45. package/dist/events/index.js +2 -0
  46. package/dist/events/index.js.map +1 -0
  47. package/dist/index.d.ts +8 -0
  48. package/dist/index.d.ts.map +1 -0
  49. package/dist/index.js +7 -0
  50. package/dist/index.js.map +1 -0
  51. package/dist/meta-reasoning/index.d.ts +5 -0
  52. package/dist/meta-reasoning/index.d.ts.map +1 -0
  53. package/dist/meta-reasoning/index.js +5 -0
  54. package/dist/meta-reasoning/index.js.map +1 -0
  55. package/dist/meta-reasoning/meta-reasoner.d.ts +53 -0
  56. package/dist/meta-reasoning/meta-reasoner.d.ts.map +1 -0
  57. package/dist/meta-reasoning/meta-reasoner.js +261 -0
  58. package/dist/meta-reasoning/meta-reasoner.js.map +1 -0
  59. package/dist/meta-reasoning/observation-collector.d.ts +37 -0
  60. package/dist/meta-reasoning/observation-collector.d.ts.map +1 -0
  61. package/dist/meta-reasoning/observation-collector.js +123 -0
  62. package/dist/meta-reasoning/observation-collector.js.map +1 -0
  63. package/dist/meta-reasoning/prompts.d.ts +31 -0
  64. package/dist/meta-reasoning/prompts.d.ts.map +1 -0
  65. package/dist/meta-reasoning/prompts.js +96 -0
  66. package/dist/meta-reasoning/prompts.js.map +1 -0
  67. package/dist/meta-reasoning/strategy-selector.d.ts +27 -0
  68. package/dist/meta-reasoning/strategy-selector.d.ts.map +1 -0
  69. package/dist/meta-reasoning/strategy-selector.js +138 -0
  70. package/dist/meta-reasoning/strategy-selector.js.map +1 -0
  71. package/dist/self-modifying-agent.d.ts +61 -0
  72. package/dist/self-modifying-agent.d.ts.map +1 -0
  73. package/dist/self-modifying-agent.js +449 -0
  74. package/dist/self-modifying-agent.js.map +1 -0
  75. package/dist/tool-generation/gap-analyzer.d.ts +25 -0
  76. package/dist/tool-generation/gap-analyzer.d.ts.map +1 -0
  77. package/dist/tool-generation/gap-analyzer.js +153 -0
  78. package/dist/tool-generation/gap-analyzer.js.map +1 -0
  79. package/dist/tool-generation/generated-tool-store.d.ts +51 -0
  80. package/dist/tool-generation/generated-tool-store.d.ts.map +1 -0
  81. package/dist/tool-generation/generated-tool-store.js +195 -0
  82. package/dist/tool-generation/generated-tool-store.js.map +1 -0
  83. package/dist/tool-generation/index.d.ts +7 -0
  84. package/dist/tool-generation/index.d.ts.map +1 -0
  85. package/dist/tool-generation/index.js +7 -0
  86. package/dist/tool-generation/index.js.map +1 -0
  87. package/dist/tool-generation/prompts.d.ts +28 -0
  88. package/dist/tool-generation/prompts.d.ts.map +1 -0
  89. package/dist/tool-generation/prompts.js +269 -0
  90. package/dist/tool-generation/prompts.js.map +1 -0
  91. package/dist/tool-generation/tool-generator.d.ts +29 -0
  92. package/dist/tool-generation/tool-generator.d.ts.map +1 -0
  93. package/dist/tool-generation/tool-generator.js +169 -0
  94. package/dist/tool-generation/tool-generator.js.map +1 -0
  95. package/dist/tool-generation/tool-sandbox.d.ts +31 -0
  96. package/dist/tool-generation/tool-sandbox.d.ts.map +1 -0
  97. package/dist/tool-generation/tool-sandbox.js +240 -0
  98. package/dist/tool-generation/tool-sandbox.js.map +1 -0
  99. package/dist/tool-generation/tool-validator.d.ts +32 -0
  100. package/dist/tool-generation/tool-validator.d.ts.map +1 -0
  101. package/dist/tool-generation/tool-validator.js +304 -0
  102. package/dist/tool-generation/tool-validator.js.map +1 -0
  103. package/dist/utils/index.d.ts +2 -0
  104. package/dist/utils/index.d.ts.map +1 -0
  105. package/dist/utils/index.js +2 -0
  106. package/dist/utils/index.js.map +1 -0
  107. package/dist/utils/llm-helper.d.ts +6 -0
  108. package/dist/utils/llm-helper.d.ts.map +1 -0
  109. package/dist/utils/llm-helper.js +18 -0
  110. package/dist/utils/llm-helper.js.map +1 -0
  111. package/package.json +61 -0
  112. package/src/__tests__/architecture-evolution.test.ts +368 -0
  113. package/src/__tests__/constraints.test.ts +266 -0
  114. package/src/__tests__/index.test.ts +99 -0
  115. package/src/__tests__/meta-reasoning.test.ts +343 -0
  116. package/src/__tests__/tool-generation.test.ts +455 -0
  117. package/src/architecture-evolution/capability-analyzer.ts +337 -0
  118. package/src/architecture-evolution/evolution-strategy.ts +224 -0
  119. package/src/architecture-evolution/index.ts +26 -0
  120. package/src/architecture-evolution/parameter-optimizer.ts +489 -0
  121. package/src/architecture-evolution/prompts.ts +216 -0
  122. package/src/constraints/index.ts +23 -0
  123. package/src/constraints/modification-validator.ts +402 -0
  124. package/src/constraints/rollback-manager.ts +173 -0
  125. package/src/constraints/safety-constraints.ts +103 -0
  126. package/src/events/event-emitter.ts +62 -0
  127. package/src/events/index.ts +1 -0
  128. package/src/index.ts +112 -0
  129. package/src/meta-reasoning/index.ts +24 -0
  130. package/src/meta-reasoning/meta-reasoner.ts +381 -0
  131. package/src/meta-reasoning/observation-collector.ts +161 -0
  132. package/src/meta-reasoning/prompts.ts +131 -0
  133. package/src/meta-reasoning/strategy-selector.ts +179 -0
  134. package/src/self-modifying-agent.ts +585 -0
  135. package/src/tool-generation/gap-analyzer.ts +234 -0
  136. package/src/tool-generation/generated-tool-store.ts +268 -0
  137. package/src/tool-generation/index.ts +19 -0
  138. package/src/tool-generation/prompts.ts +308 -0
  139. package/src/tool-generation/tool-generator.ts +243 -0
  140. package/src/tool-generation/tool-sandbox.ts +332 -0
  141. package/src/tool-generation/tool-validator.ts +365 -0
  142. package/src/utils/index.ts +1 -0
  143. package/src/utils/llm-helper.ts +24 -0
@@ -0,0 +1,332 @@
1
+ import type {
2
+ ToolSandboxConfig,
3
+ ToolSandboxResult,
4
+ GeneratedTool,
5
+ } from '@cogitator-ai/types';
6
+
7
+ export const DEFAULT_SANDBOX_CONFIG: ToolSandboxConfig = {
8
+ enabled: true,
9
+ maxExecutionTime: 5000,
10
+ maxMemory: 50 * 1024 * 1024,
11
+ allowedModules: [],
12
+ isolationLevel: 'strict',
13
+ };
14
+
15
+ interface SandboxContext {
16
+ console: {
17
+ log: (...args: unknown[]) => void;
18
+ warn: (...args: unknown[]) => void;
19
+ error: (...args: unknown[]) => void;
20
+ };
21
+ Math: typeof Math;
22
+ JSON: typeof JSON;
23
+ Date: typeof Date;
24
+ Array: typeof Array;
25
+ Object: typeof Object;
26
+ String: typeof String;
27
+ Number: typeof Number;
28
+ Boolean: typeof Boolean;
29
+ RegExp: typeof RegExp;
30
+ Map: typeof Map;
31
+ Set: typeof Set;
32
+ Promise: typeof Promise;
33
+ Error: typeof Error;
34
+ TypeError: typeof TypeError;
35
+ RangeError: typeof RangeError;
36
+ }
37
+
38
+ export class ToolSandbox {
39
+ private readonly config: ToolSandboxConfig;
40
+ private readonly logs: string[] = [];
41
+
42
+ constructor(config: Partial<ToolSandboxConfig> = {}) {
43
+ this.config = { ...DEFAULT_SANDBOX_CONFIG, ...config };
44
+ }
45
+
46
+ async execute(
47
+ tool: GeneratedTool,
48
+ params: unknown
49
+ ): Promise<ToolSandboxResult> {
50
+ const startTime = Date.now();
51
+ this.logs.length = 0;
52
+
53
+ if (!this.config.enabled) {
54
+ return this.executeUnsandboxed(tool, params, startTime);
55
+ }
56
+
57
+ try {
58
+ this.validateImplementation(tool.implementation);
59
+
60
+ const context = this.createContext();
61
+ const wrappedCode = this.wrapImplementation(tool.implementation);
62
+
63
+ const result = await this.executeWithTimeout(
64
+ wrappedCode,
65
+ context,
66
+ params,
67
+ this.config.maxExecutionTime
68
+ );
69
+
70
+ return {
71
+ success: true,
72
+ result,
73
+ executionTime: Date.now() - startTime,
74
+ memoryUsed: this.estimateMemoryUsage(result),
75
+ logs: [...this.logs],
76
+ };
77
+ } catch (error) {
78
+ return {
79
+ success: false,
80
+ error: error instanceof Error ? error.message : String(error),
81
+ executionTime: Date.now() - startTime,
82
+ memoryUsed: 0,
83
+ logs: [...this.logs],
84
+ };
85
+ }
86
+ }
87
+
88
+ async testWithCases(
89
+ tool: GeneratedTool,
90
+ testCases: Array<{ input: unknown; expectedOutput?: unknown; shouldThrow?: boolean }>
91
+ ): Promise<{
92
+ passed: number;
93
+ failed: number;
94
+ results: Array<{
95
+ input: unknown;
96
+ output?: unknown;
97
+ error?: string;
98
+ passed: boolean;
99
+ executionTime: number;
100
+ }>;
101
+ }> {
102
+ const results: Array<{
103
+ input: unknown;
104
+ output?: unknown;
105
+ error?: string;
106
+ passed: boolean;
107
+ executionTime: number;
108
+ }> = [];
109
+
110
+ for (const testCase of testCases) {
111
+ const execResult = await this.execute(tool, testCase.input);
112
+
113
+ let passed = false;
114
+ if (testCase.shouldThrow) {
115
+ passed = !execResult.success;
116
+ } else if (testCase.expectedOutput !== undefined) {
117
+ passed =
118
+ execResult.success &&
119
+ this.deepEqual(execResult.result, testCase.expectedOutput);
120
+ } else {
121
+ passed = execResult.success;
122
+ }
123
+
124
+ results.push({
125
+ input: testCase.input,
126
+ output: execResult.result,
127
+ error: execResult.error,
128
+ passed,
129
+ executionTime: execResult.executionTime,
130
+ });
131
+ }
132
+
133
+ return {
134
+ passed: results.filter((r) => r.passed).length,
135
+ failed: results.filter((r) => !r.passed).length,
136
+ results,
137
+ };
138
+ }
139
+
140
+ private validateImplementation(code: string): void {
141
+ const forbidden = [
142
+ /\beval\s*\(/,
143
+ /\bFunction\s*\(/,
144
+ /\bnew\s+Function\s*\(/,
145
+ /\bimport\s*\(/,
146
+ /\brequire\s*\(/,
147
+ /\bprocess\./,
148
+ /\bglobal\./,
149
+ /\bglobalThis\./,
150
+ /\bwindow\./,
151
+ /\bdocument\./,
152
+ /\bchild_process/,
153
+ /\bfs\./,
154
+ /\bhttp\./,
155
+ /\bhttps\./,
156
+ /\bnet\./,
157
+ /\bdns\./,
158
+ /\bos\./,
159
+ /\bexec\s*\(/,
160
+ /\bspawn\s*\(/,
161
+ /__proto__/,
162
+ /\bconstructor\s*\[/,
163
+ ];
164
+
165
+ if (this.config.isolationLevel === 'strict') {
166
+ forbidden.push(
167
+ /\bfetch\s*\(/,
168
+ /\bXMLHttpRequest/,
169
+ /\bWebSocket/,
170
+ /\bsetTimeout\s*\(/,
171
+ /\bsetInterval\s*\(/
172
+ );
173
+ }
174
+
175
+ for (const pattern of forbidden) {
176
+ if (pattern.test(code)) {
177
+ throw new Error(
178
+ `Security violation: forbidden pattern detected - ${pattern.source}`
179
+ );
180
+ }
181
+ }
182
+
183
+ const lines = code.split('\n').length;
184
+ if (lines > 200) {
185
+ throw new Error(`Implementation too large: ${lines} lines (max 200)`);
186
+ }
187
+ }
188
+
189
+ private createContext(): SandboxContext {
190
+ const sandbox = this;
191
+
192
+ return {
193
+ console: {
194
+ log: (...args: unknown[]) => {
195
+ sandbox.logs.push(`[LOG] ${args.map(String).join(' ')}`);
196
+ },
197
+ warn: (...args: unknown[]) => {
198
+ sandbox.logs.push(`[WARN] ${args.map(String).join(' ')}`);
199
+ },
200
+ error: (...args: unknown[]) => {
201
+ sandbox.logs.push(`[ERROR] ${args.map(String).join(' ')}`);
202
+ },
203
+ },
204
+ Math,
205
+ JSON,
206
+ Date,
207
+ Array,
208
+ Object,
209
+ String,
210
+ Number,
211
+ Boolean,
212
+ RegExp,
213
+ Map,
214
+ Set,
215
+ Promise,
216
+ Error,
217
+ TypeError,
218
+ RangeError,
219
+ };
220
+ }
221
+
222
+ private wrapImplementation(code: string): string {
223
+ return `
224
+ "use strict";
225
+ return (async function sandboxedExecution(params, context) {
226
+ const { console, Math, JSON, Date, Array, Object, String, Number, Boolean, RegExp, Map, Set, Promise, Error, TypeError, RangeError } = context;
227
+
228
+ ${code}
229
+
230
+ if (typeof execute === 'function') {
231
+ return await execute(params);
232
+ }
233
+ throw new Error('Implementation must define an execute function');
234
+ });
235
+ `;
236
+ }
237
+
238
+ private async executeWithTimeout(
239
+ wrappedCode: string,
240
+ context: SandboxContext,
241
+ params: unknown,
242
+ timeout: number
243
+ ): Promise<unknown> {
244
+ return new Promise((resolve, reject) => {
245
+ const timer = setTimeout(() => {
246
+ reject(new Error(`Execution timeout: exceeded ${timeout}ms`));
247
+ }, timeout);
248
+
249
+ try {
250
+ const factory = new Function(wrappedCode);
251
+ const executor = factory();
252
+
253
+ Promise.resolve(executor(params, context))
254
+ .then((result) => {
255
+ clearTimeout(timer);
256
+ resolve(result);
257
+ })
258
+ .catch((error) => {
259
+ clearTimeout(timer);
260
+ reject(error);
261
+ });
262
+ } catch (error) {
263
+ clearTimeout(timer);
264
+ reject(error);
265
+ }
266
+ });
267
+ }
268
+
269
+ private async executeUnsandboxed(
270
+ tool: GeneratedTool,
271
+ params: unknown,
272
+ startTime: number
273
+ ): Promise<ToolSandboxResult> {
274
+ try {
275
+ const factory = new Function(`
276
+ "use strict";
277
+ ${tool.implementation}
278
+ return execute;
279
+ `);
280
+ const execute = factory();
281
+ const result = await execute(params);
282
+
283
+ return {
284
+ success: true,
285
+ result,
286
+ executionTime: Date.now() - startTime,
287
+ memoryUsed: this.estimateMemoryUsage(result),
288
+ logs: [],
289
+ };
290
+ } catch (error) {
291
+ return {
292
+ success: false,
293
+ error: error instanceof Error ? error.message : String(error),
294
+ executionTime: Date.now() - startTime,
295
+ memoryUsed: 0,
296
+ logs: [],
297
+ };
298
+ }
299
+ }
300
+
301
+ private estimateMemoryUsage(value: unknown): number {
302
+ try {
303
+ const str = JSON.stringify(value);
304
+ return str.length * 2;
305
+ } catch {
306
+ return 0;
307
+ }
308
+ }
309
+
310
+ private deepEqual(a: unknown, b: unknown): boolean {
311
+ if (a === b) return true;
312
+ if (typeof a !== typeof b) return false;
313
+ if (a === null || b === null) return a === b;
314
+
315
+ if (Array.isArray(a) && Array.isArray(b)) {
316
+ if (a.length !== b.length) return false;
317
+ return a.every((val, i) => this.deepEqual(val, b[i]));
318
+ }
319
+
320
+ if (typeof a === 'object' && typeof b === 'object') {
321
+ const aObj = a as Record<string, unknown>;
322
+ const bObj = b as Record<string, unknown>;
323
+ const keysA = Object.keys(aObj);
324
+ const keysB = Object.keys(bObj);
325
+
326
+ if (keysA.length !== keysB.length) return false;
327
+ return keysA.every((key) => this.deepEqual(aObj[key], bObj[key]));
328
+ }
329
+
330
+ return false;
331
+ }
332
+ }
@@ -0,0 +1,365 @@
1
+ import type {
2
+ GeneratedTool,
3
+ ToolValidationResult,
4
+ LLMBackend,
5
+ ToolSelfGenerationConfig,
6
+ } from '@cogitator-ai/types';
7
+ import { ToolSandbox } from './tool-sandbox';
8
+ import { buildToolValidationPrompt, parseValidationResponse } from './prompts';
9
+
10
+ export interface ToolValidatorOptions {
11
+ llm?: LLMBackend;
12
+ config: ToolSelfGenerationConfig;
13
+ }
14
+
15
+ interface ValidationRule {
16
+ id: string;
17
+ name: string;
18
+ severity: 'error' | 'warning' | 'info';
19
+ check: (code: string, tool: GeneratedTool) => string | null;
20
+ }
21
+
22
+ const STATIC_VALIDATION_RULES: ValidationRule[] = [
23
+ {
24
+ id: 'no_eval',
25
+ name: 'No eval() or Function constructor',
26
+ severity: 'error',
27
+ check: (code) => {
28
+ if (/\beval\s*\(/.test(code)) return 'Uses eval()';
29
+ if (/\bnew\s+Function\s*\(/.test(code)) return 'Uses Function constructor';
30
+ return null;
31
+ },
32
+ },
33
+ {
34
+ id: 'no_dynamic_import',
35
+ name: 'No dynamic imports',
36
+ severity: 'error',
37
+ check: (code) => {
38
+ if (/\bimport\s*\(/.test(code)) return 'Uses dynamic import()';
39
+ if (/\brequire\s*\(/.test(code)) return 'Uses require()';
40
+ return null;
41
+ },
42
+ },
43
+ {
44
+ id: 'no_global_access',
45
+ name: 'No global object access',
46
+ severity: 'error',
47
+ check: (code) => {
48
+ if (/\bprocess\./.test(code)) return 'Accesses process object';
49
+ if (/\bglobal\./.test(code)) return 'Accesses global object';
50
+ if (/\bglobalThis\./.test(code)) return 'Accesses globalThis';
51
+ return null;
52
+ },
53
+ },
54
+ {
55
+ id: 'no_prototype_pollution',
56
+ name: 'No prototype pollution',
57
+ severity: 'error',
58
+ check: (code) => {
59
+ if (/__proto__/.test(code)) return 'Uses __proto__';
60
+ if (/\.prototype\s*=/.test(code)) return 'Modifies prototype';
61
+ if (/Object\.setPrototypeOf/.test(code)) return 'Uses setPrototypeOf';
62
+ return null;
63
+ },
64
+ },
65
+ {
66
+ id: 'no_infinite_loops',
67
+ name: 'No obvious infinite loops',
68
+ severity: 'warning',
69
+ check: (code) => {
70
+ if (/while\s*\(\s*true\s*\)/.test(code) && !/break/.test(code)) {
71
+ return 'Contains while(true) without break';
72
+ }
73
+ if (/for\s*\(\s*;\s*;\s*\)/.test(code) && !/break/.test(code)) {
74
+ return 'Contains for(;;) without break';
75
+ }
76
+ return null;
77
+ },
78
+ },
79
+ {
80
+ id: 'has_execute_function',
81
+ name: 'Has execute function',
82
+ severity: 'error',
83
+ check: (code) => {
84
+ const patterns = [
85
+ /(?:async\s+)?function\s+execute\s*\(/,
86
+ /const\s+execute\s*=\s*(?:async\s*)?\(/,
87
+ /const\s+execute\s*=\s*(?:async\s+)?function/,
88
+ /let\s+execute\s*=\s*(?:async\s*)?\(/,
89
+ /execute\s*=\s*(?:async\s*)?\(/,
90
+ ];
91
+ const hasExecute = patterns.some((p) => p.test(code));
92
+ return hasExecute ? null : 'Missing execute function';
93
+ },
94
+ },
95
+ {
96
+ id: 'reasonable_length',
97
+ name: 'Reasonable code length',
98
+ severity: 'warning',
99
+ check: (code) => {
100
+ const lines = code.split('\n').length;
101
+ if (lines > 150) return `Too long: ${lines} lines (recommended < 150)`;
102
+ return null;
103
+ },
104
+ },
105
+ {
106
+ id: 'no_shell_commands',
107
+ name: 'No shell command execution',
108
+ severity: 'error',
109
+ check: (code) => {
110
+ if (/child_process/.test(code)) return 'Uses child_process';
111
+ if (/\bexec\s*\(/.test(code)) return 'Uses exec()';
112
+ if (/\bspawn\s*\(/.test(code)) return 'Uses spawn()';
113
+ if (/\bexecSync\s*\(/.test(code)) return 'Uses execSync()';
114
+ return null;
115
+ },
116
+ },
117
+ {
118
+ id: 'no_file_system',
119
+ name: 'No file system access',
120
+ severity: 'error',
121
+ check: (code) => {
122
+ if (/\bfs\./.test(code)) return 'Uses fs module';
123
+ if (/readFileSync|writeFileSync/.test(code)) return 'Uses file system sync methods';
124
+ if (/readFile|writeFile/.test(code)) return 'Uses file system methods';
125
+ return null;
126
+ },
127
+ },
128
+ {
129
+ id: 'has_error_handling',
130
+ name: 'Has error handling',
131
+ severity: 'info',
132
+ check: (code) => {
133
+ if (!/try\s*\{/.test(code) && !/\.catch\s*\(/.test(code)) {
134
+ return 'No try-catch or .catch() error handling';
135
+ }
136
+ return null;
137
+ },
138
+ },
139
+ ];
140
+
141
+ export class ToolValidator {
142
+ private readonly llm?: LLMBackend;
143
+ private readonly config: ToolSelfGenerationConfig;
144
+ private readonly sandbox: ToolSandbox;
145
+ private readonly customRules: ValidationRule[] = [];
146
+
147
+ constructor(options: ToolValidatorOptions) {
148
+ this.llm = options.llm;
149
+ this.config = options.config;
150
+ this.sandbox = new ToolSandbox(options.config.sandboxConfig);
151
+ }
152
+
153
+ async validate(
154
+ tool: GeneratedTool,
155
+ testCases?: Array<{ input: unknown; expectedOutput?: unknown; shouldThrow?: boolean }>
156
+ ): Promise<ToolValidationResult> {
157
+ const securityIssues: string[] = [];
158
+ const logicIssues: string[] = [];
159
+ const edgeCases: string[] = [];
160
+ const suggestions: string[] = [];
161
+
162
+ const staticResult = this.runStaticAnalysis(tool);
163
+ securityIssues.push(...staticResult.errors);
164
+ suggestions.push(...staticResult.warnings);
165
+
166
+ if (securityIssues.length > 0) {
167
+ return {
168
+ isValid: false,
169
+ securityIssues,
170
+ logicIssues,
171
+ edgeCases,
172
+ suggestions,
173
+ testResults: [],
174
+ overallScore: 0,
175
+ };
176
+ }
177
+
178
+ const effectiveTestCases = testCases || this.generateBasicTestCases(tool);
179
+ const sandboxResult = await this.sandbox.testWithCases(tool, effectiveTestCases);
180
+
181
+ const testResults = sandboxResult.results.map((r) => ({
182
+ input: r.input,
183
+ output: r.output,
184
+ passed: r.passed,
185
+ error: r.error,
186
+ }));
187
+
188
+ if (sandboxResult.failed > 0) {
189
+ const failedTests = sandboxResult.results.filter((r) => !r.passed);
190
+ logicIssues.push(
191
+ ...failedTests.map((t) => `Test failed for input ${JSON.stringify(t.input)}: ${t.error || 'unexpected output'}`)
192
+ );
193
+ }
194
+
195
+ if (this.llm && this.config.requireLLMValidation) {
196
+ const llmResult = await this.runLLMValidation(tool, effectiveTestCases);
197
+ if (llmResult) {
198
+ securityIssues.push(...llmResult.securityIssues);
199
+ logicIssues.push(...llmResult.logicIssues);
200
+ edgeCases.push(...llmResult.edgeCases);
201
+ suggestions.push(...llmResult.suggestions);
202
+ }
203
+ }
204
+
205
+ const score = this.calculateScore(
206
+ securityIssues.length,
207
+ logicIssues.length,
208
+ edgeCases.length,
209
+ sandboxResult.passed,
210
+ sandboxResult.failed
211
+ );
212
+
213
+ return {
214
+ isValid: securityIssues.length === 0 && logicIssues.length === 0 && score >= 0.7,
215
+ securityIssues: [...new Set(securityIssues)],
216
+ logicIssues: [...new Set(logicIssues)],
217
+ edgeCases: [...new Set(edgeCases)],
218
+ suggestions: [...new Set(suggestions)],
219
+ testResults,
220
+ overallScore: score,
221
+ };
222
+ }
223
+
224
+ addRule(rule: ValidationRule): void {
225
+ this.customRules.push(rule);
226
+ }
227
+
228
+ private runStaticAnalysis(tool: GeneratedTool): {
229
+ errors: string[];
230
+ warnings: string[];
231
+ } {
232
+ const errors: string[] = [];
233
+ const warnings: string[] = [];
234
+ const allRules = [...STATIC_VALIDATION_RULES, ...this.customRules];
235
+
236
+ for (const rule of allRules) {
237
+ const result = rule.check(tool.implementation, tool);
238
+ if (result) {
239
+ if (rule.severity === 'error') {
240
+ errors.push(`[${rule.id}] ${result}`);
241
+ } else if (rule.severity === 'warning') {
242
+ warnings.push(`[${rule.id}] ${result}`);
243
+ }
244
+ }
245
+ }
246
+
247
+ return { errors, warnings };
248
+ }
249
+
250
+ private async runLLMValidation(
251
+ tool: GeneratedTool,
252
+ testCases: Array<{ input: unknown; expectedOutput?: unknown; shouldThrow?: boolean }>
253
+ ): Promise<ToolValidationResult | null> {
254
+ if (!this.llm) return null;
255
+
256
+ try {
257
+ const prompt = buildToolValidationPrompt(
258
+ tool,
259
+ testCases.map((tc) => ({
260
+ input: tc.input,
261
+ expectedBehavior: tc.expectedOutput
262
+ ? `Should return ${JSON.stringify(tc.expectedOutput)}`
263
+ : tc.shouldThrow
264
+ ? 'Should throw an error'
265
+ : 'Should execute successfully',
266
+ }))
267
+ );
268
+
269
+ const response = await this.callLLM([
270
+ {
271
+ role: 'system',
272
+ content: `You are a security auditor and code reviewer.
273
+ Analyze code for security vulnerabilities, logic errors, and edge cases.
274
+ Be thorough but practical - focus on real issues.`,
275
+ },
276
+ { role: 'user', content: prompt },
277
+ ], 0.2);
278
+
279
+ return response ? parseValidationResponse(response.content) : null;
280
+ } catch {
281
+ return null;
282
+ }
283
+ }
284
+
285
+ private generateBasicTestCases(
286
+ tool: GeneratedTool
287
+ ): Array<{ input: unknown; expectedOutput?: unknown; shouldThrow?: boolean }> {
288
+ const testCases: Array<{ input: unknown; expectedOutput?: unknown; shouldThrow?: boolean }> = [];
289
+ const params = tool.parameters;
290
+
291
+ if (params.type === 'object' && params.properties) {
292
+ const validInput: Record<string, unknown> = {};
293
+ const properties = params.properties as Record<
294
+ string,
295
+ { type?: string; default?: unknown }
296
+ >;
297
+
298
+ for (const [key, schema] of Object.entries(properties)) {
299
+ validInput[key] = this.generateSampleValue(schema.type, schema.default);
300
+ }
301
+ testCases.push({ input: validInput });
302
+
303
+ const required = params.required as string[] | undefined;
304
+ testCases.push({ input: {}, shouldThrow: (required?.length ?? 0) > 0 });
305
+
306
+ testCases.push({ input: null, shouldThrow: true });
307
+ }
308
+
309
+ return testCases;
310
+ }
311
+
312
+ private generateSampleValue(type?: string, defaultValue?: unknown): unknown {
313
+ if (defaultValue !== undefined) return defaultValue;
314
+
315
+ switch (type) {
316
+ case 'string':
317
+ return 'test';
318
+ case 'number':
319
+ case 'integer':
320
+ return 42;
321
+ case 'boolean':
322
+ return true;
323
+ case 'array':
324
+ return [];
325
+ case 'object':
326
+ return {};
327
+ default:
328
+ return null;
329
+ }
330
+ }
331
+
332
+ private calculateScore(
333
+ securityCount: number,
334
+ logicCount: number,
335
+ edgeCaseCount: number,
336
+ testsPassed: number,
337
+ testsFailed: number
338
+ ): number {
339
+ if (securityCount > 0) return 0;
340
+
341
+ let score = 1.0;
342
+
343
+ score -= logicCount * 0.2;
344
+ score -= edgeCaseCount * 0.05;
345
+
346
+ const totalTests = testsPassed + testsFailed;
347
+ if (totalTests > 0) {
348
+ const testScore = testsPassed / totalTests;
349
+ score = score * 0.6 + testScore * 0.4;
350
+ }
351
+
352
+ return Math.max(0, Math.min(1, score));
353
+ }
354
+
355
+ private async callLLM(
356
+ messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }>,
357
+ temperature: number
358
+ ) {
359
+ if (!this.llm) return null;
360
+ if (this.llm.complete) {
361
+ return this.llm.complete({ messages, temperature });
362
+ }
363
+ return this.llm.chat({ model: 'default', messages, temperature });
364
+ }
365
+ }
@@ -0,0 +1 @@
1
+ export { llmChat } from './llm-helper';
@@ -0,0 +1,24 @@
1
+ import type { LLMBackend, Message } from '@cogitator-ai/types';
2
+
3
+ export async function llmChat(
4
+ llm: LLMBackend,
5
+ messages: Message[],
6
+ options?: { temperature?: number; maxTokens?: number }
7
+ ): Promise<string> {
8
+ if (llm.complete) {
9
+ const response = await llm.complete({
10
+ messages,
11
+ temperature: options?.temperature,
12
+ maxTokens: options?.maxTokens,
13
+ });
14
+ return response.content;
15
+ }
16
+
17
+ const response = await llm.chat({
18
+ model: 'default',
19
+ messages,
20
+ temperature: options?.temperature,
21
+ maxTokens: options?.maxTokens,
22
+ });
23
+ return response.content;
24
+ }