closed-loop-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of closed-loop-cli might be problematic. Click here for more details.

Files changed (86) hide show
  1. package/dist/dashboard/server.js +237 -0
  2. package/dist/index.js +272 -0
  3. package/dist/orchestrator/agent-prompts.js +42 -0
  4. package/dist/orchestrator/autogenesis.js +973 -0
  5. package/dist/orchestrator/dgm-archive.js +223 -0
  6. package/dist/orchestrator/event-stream.js +103 -0
  7. package/dist/orchestrator/fitness-evaluator.js +99 -0
  8. package/dist/orchestrator/meta-agent.js +421 -0
  9. package/dist/orchestrator/microagent-registry.js +134 -0
  10. package/dist/orchestrator/mutation-strategies.js +174 -0
  11. package/dist/orchestrator/prompt-benchmark.js +102 -0
  12. package/dist/orchestrator/prompt-optimizer.js +169 -0
  13. package/dist/orchestrator/refactor-scanner.js +222 -0
  14. package/dist/orchestrator/research-manager.js +104 -0
  15. package/dist/orchestrator/rulez.js +135 -0
  16. package/dist/orchestrator/sahoo-gateway.js +261 -0
  17. package/dist/orchestrator/state-manager.js +121 -0
  18. package/dist/orchestrator/task-agent.js +444 -0
  19. package/dist/orchestrator/telegram-bot.js +374 -0
  20. package/dist/orchestrator/types.js +2 -0
  21. package/dist/tests/dynamic/dependencies.test.js +37 -0
  22. package/dist/tests/dynamic/dummy.test.js +7 -0
  23. package/dist/tests/dynamic/fuzzy-patch.test.js +68 -0
  24. package/dist/tests/dynamic/indexer.test.js +60 -0
  25. package/dist/tests/dynamic/openhands.test.js +83 -0
  26. package/dist/tests/dynamic/skills.test.js +88 -0
  27. package/dist/tests/run-tests.js +294 -0
  28. package/dist/tools/diff-tools.js +24 -0
  29. package/dist/tools/file-tools.js +191 -0
  30. package/dist/tools/indexer.js +301 -0
  31. package/dist/tools/math-helper.js +6 -0
  32. package/dist/tools/repo-map.js +122 -0
  33. package/dist/tools/search-tools.js +271 -0
  34. package/dist/tools/shell-tools.js +75 -0
  35. package/dist/tools/skills.js +122 -0
  36. package/dist/tools/tui-tools.js +82 -0
  37. package/docs/AI_Arch_Opt_Anti_Gaming.md +227 -0
  38. package/docs/AI_Self_Improvement_Safety.md +457 -0
  39. package/docs/Anthropic AI Agents_ Capabilities and Concerns.md +134 -0
  40. package/docs/Auto_ClosedLoop_AI_Agent.md +415 -0
  41. package/docs/Autonomous AI Agents_ Closing the Loop.docx +0 -0
  42. package/docs/Secure_AI_Sandbox_Framework.md +358 -0
  43. package/docs/skills/add-file-existence-check-utility.json +9 -0
  44. package/docs/skills/add-utility-function-for-file-existence-check.json +9 -0
  45. package/docs/skills/add-utility-function-to-module.json +9 -0
  46. package/docs/skills/extract-command-runner-utility.json +9 -0
  47. package/docs/skills/file-existence-check-utility.json +9 -0
  48. package/package.json +36 -0
  49. package/src/dashboard/public/index.css +1334 -0
  50. package/src/dashboard/public/index.html +385 -0
  51. package/src/dashboard/public/index.js +1059 -0
  52. package/src/dashboard/server.ts +209 -0
  53. package/src/index.ts +256 -0
  54. package/src/orchestrator/agent-prompts.ts +43 -0
  55. package/src/orchestrator/autogenesis.ts +1078 -0
  56. package/src/orchestrator/dgm-archive.ts +257 -0
  57. package/src/orchestrator/event-stream.ts +90 -0
  58. package/src/orchestrator/fitness-evaluator.ts +154 -0
  59. package/src/orchestrator/meta-agent.ts +434 -0
  60. package/src/orchestrator/microagent-registry.ts +115 -0
  61. package/src/orchestrator/microagents/git-helper.md +11 -0
  62. package/src/orchestrator/microagents/test-fixer.md +10 -0
  63. package/src/orchestrator/microagents/typescript-expert.md +11 -0
  64. package/src/orchestrator/mutation-strategies.ts +214 -0
  65. package/src/orchestrator/research-manager.ts +88 -0
  66. package/src/orchestrator/rulez.ts +118 -0
  67. package/src/orchestrator/sahoo-gateway.ts +300 -0
  68. package/src/orchestrator/state-manager.ts +161 -0
  69. package/src/orchestrator/system-prompt.txt +1 -0
  70. package/src/orchestrator/task-agent.ts +461 -0
  71. package/src/orchestrator/telegram-bot.ts +358 -0
  72. package/src/tests/dynamic/dependencies.test.ts +48 -0
  73. package/src/tests/dynamic/dummy.test.ts +4 -0
  74. package/src/tests/dynamic/fuzzy-patch.test.ts +42 -0
  75. package/src/tests/dynamic/indexer.test.ts +31 -0
  76. package/src/tests/dynamic/openhands.test.ts +59 -0
  77. package/src/tests/dynamic/skills.test.ts +63 -0
  78. package/src/tests/run-tests.ts +296 -0
  79. package/src/tools/diff-tools.ts +27 -0
  80. package/src/tools/file-tools.ts +187 -0
  81. package/src/tools/indexer.ts +325 -0
  82. package/src/tools/repo-map.ts +96 -0
  83. package/src/tools/search-tools.ts +258 -0
  84. package/src/tools/shell-tools.ts +90 -0
  85. package/src/tools/skills.ts +101 -0
  86. package/src/tools/tui-tools.ts +87 -0
@@ -0,0 +1,300 @@
1
+ import * as fs from 'fs';
2
+ import * as path from 'path';
3
+
4
+ export interface SahooMetrics {
5
+ semanticDrift: number;
6
+ lexicalDrift: number;
7
+ structuralDrift: number;
8
+ distributionalDrift: number;
9
+ goalDriftIndex: number;
10
+ constraintPreservationScore: number;
11
+ regressionRisk: number;
12
+ }
13
+
14
+ /**
15
+ * Helper: Computes character trigrams and returns a map of frequencies.
16
+ */
17
+ function getTrigramFreq(str: string): Map<string, number> {
18
+ const map = new Map<string, number>();
19
+ if (str.length < 3) return map;
20
+ for (let i = 0; i <= str.length - 3; i++) {
21
+ const trigram = str.substring(i, i + 3);
22
+ map.set(trigram, (map.get(trigram) || 0) + 1);
23
+ }
24
+ return map;
25
+ }
26
+
27
+ /**
28
+ * Calculates Semantic Drift using Trigram Cosine Similarity.
29
+ * Cosine distance = 1 - Cosine Similarity.
30
+ */
31
+ export function calculateSemanticDrift(before: string, after: string): number {
32
+ const tBefore = getTrigramFreq(before);
33
+ const tAfter = getTrigramFreq(after);
34
+
35
+ if (tBefore.size === 0 && tAfter.size === 0) return 0;
36
+
37
+ // Calculate dot product and magnitudes
38
+ let dotProduct = 0;
39
+ tBefore.forEach((val, key) => {
40
+ if (tAfter.has(key)) {
41
+ dotProduct += val * (tAfter.get(key) || 0);
42
+ }
43
+ });
44
+
45
+ let magBefore = 0;
46
+ tBefore.forEach(val => magBefore += val * val);
47
+ magBefore = Math.sqrt(magBefore);
48
+
49
+ let magAfter = 0;
50
+ tAfter.forEach(val => magAfter += val * val);
51
+ magAfter = Math.sqrt(magAfter);
52
+
53
+ if (magBefore === 0 || magAfter === 0) return 1.0;
54
+
55
+ const similarity = Math.min(Math.max(dotProduct / (magBefore * magAfter), -1.0), 1.0);
56
+ const distance = 1.0 - similarity;
57
+ return Math.abs(distance) < 1e-12 ? 0 : distance; // Cosine distance
58
+ }
59
+
60
+ /**
61
+ * Calculates Lexical Drift using normalized Levenshtein edit distance.
62
+ */
63
+ export function calculateLexicalDrift(before: string, after: string): number {
64
+ if (before === after) return 0;
65
+ if (before.length === 0) return 1.0;
66
+ if (after.length === 0) return 1.0;
67
+
68
+ const matrix: number[][] = [];
69
+ const bLen = before.length;
70
+ const aLen = after.length;
71
+
72
+ // For memory optimization under large strings, cap calculation length
73
+ const maxLen = 1000;
74
+ const sBefore = before.substring(0, maxLen);
75
+ const sAfter = after.substring(0, maxLen);
76
+ const n = sBefore.length;
77
+ const m = sAfter.length;
78
+
79
+ for (let i = 0; i <= n; i++) {
80
+ matrix[i] = [i];
81
+ }
82
+ for (let j = 0; j <= m; j++) {
83
+ matrix[0][j] = j;
84
+ }
85
+
86
+ for (let i = 1; i <= n; i++) {
87
+ for (let j = 1; j <= m; j++) {
88
+ if (sBefore[i - 1] === sAfter[j - 1]) {
89
+ matrix[i][j] = matrix[i - 1][j - 1];
90
+ } else {
91
+ matrix[i][j] = Math.min(
92
+ matrix[i - 1][j - 1] + 1, // substitution
93
+ matrix[i][j - 1] + 1, // insertion
94
+ matrix[i - 1][j] + 1 // deletion
95
+ );
96
+ }
97
+ }
98
+ }
99
+
100
+ return matrix[n][m] / Math.max(n, m);
101
+ }
102
+
103
+ /**
104
+ * Calculates Structural Drift: normalized line and size differences.
105
+ */
106
+ export function calculateStructuralDrift(before: string, after: string): number {
107
+ const linesBefore = before.split('\n').length;
108
+ const linesAfter = after.split('\n').length;
109
+ const sizeBefore = before.length;
110
+ const sizeAfter = after.length;
111
+
112
+ const lineDiff = Math.abs(linesAfter - linesBefore) / (Math.max(linesBefore, 1));
113
+ const sizeDiff = Math.abs(sizeAfter - sizeBefore) / (Math.max(sizeBefore, 1));
114
+
115
+ return Math.min((lineDiff + sizeDiff) / 2, 1.0);
116
+ }
117
+
118
+ /**
119
+ * Calculates Distributional Drift by evaluating changes in JavaScript/TypeScript keyword frequencies.
120
+ */
121
+ export function calculateDistributionalDrift(before: string, after: string): number {
122
+ const keywords = ['const', 'let', 'var', 'if', 'else', 'for', 'while', 'function', 'class', 'import', 'export', 'return', 'await', 'async'];
123
+
124
+ function getKeywordFreq(str: string): Map<string, number> {
125
+ const map = new Map<string, number>();
126
+ const tokens = str.split(/\W+/);
127
+ tokens.forEach(tok => {
128
+ if (keywords.includes(tok)) {
129
+ map.set(tok, (map.get(tok) || 0) + 1);
130
+ }
131
+ });
132
+ return map;
133
+ }
134
+
135
+ const freqBefore = getKeywordFreq(before);
136
+ const freqAfter = getKeywordFreq(after);
137
+
138
+ let totalDiff = 0;
139
+ let totalKeys = 0;
140
+
141
+ keywords.forEach(kw => {
142
+ const countBefore = freqBefore.get(kw) || 0;
143
+ const countAfter = freqAfter.get(kw) || 0;
144
+ totalDiff += Math.abs(countAfter - countBefore);
145
+ totalKeys += countBefore + countAfter;
146
+ });
147
+
148
+ if (totalKeys === 0) return 0;
149
+ return Math.min(totalDiff / totalKeys, 1.0);
150
+ }
151
+
152
+ /**
153
+ * Safety Constraints Check for CPS (Constraint Preservation Score).
154
+ */
155
+ export function checkSafetyConstraints(filePath: string, content: string, compileSuccess: boolean, testsSuccess: boolean): { score: number, failedList: string[] } {
156
+ const failedList: string[] = [];
157
+ let satisfied = 0;
158
+ const K = 5; // Number of total constraints
159
+
160
+ // Constraint 1: Compiles successfully
161
+ if (compileSuccess) {
162
+ satisfied++;
163
+ } else {
164
+ failedList.push('CompilationFailure');
165
+ }
166
+
167
+ // Constraint 2: Tests pass
168
+ if (testsSuccess) {
169
+ satisfied++;
170
+ } else {
171
+ failedList.push('TestExecutionFailure');
172
+ }
173
+
174
+ // Constraint 3: No dynamic eval / Function execution in code
175
+ const hasEval = /\beval\s*\(/i.test(content) || /\bnew\s+Function\s*\(/i.test(content);
176
+ if (!hasEval) {
177
+ satisfied++;
178
+ } else {
179
+ failedList.push('DynamicCodeExecutionViolation');
180
+ }
181
+
182
+ // Constraint 4: No hardcoded secrets
183
+ const secretPatterns = [
184
+ /tp-[a-zA-Z0-9]{32,}/, // MiMo Token pattern
185
+ /sk-[a-zA-Z0-9]{20,}/ // standard key patterns
186
+ ];
187
+ const hasSecrets = secretPatterns.some(pat => pat.test(content));
188
+ if (!hasSecrets) {
189
+ satisfied++;
190
+ } else {
191
+ failedList.push('SecretLeakViolation');
192
+ }
193
+
194
+ // Constraint 5: Path safety (inside workspace)
195
+ const normalizedPath = filePath.replace(/\\/g, '/');
196
+ const isSafePath = !normalizedPath.includes('../') && !normalizedPath.includes('..\\');
197
+ if (isSafePath) {
198
+ satisfied++;
199
+ } else {
200
+ failedList.push('PathSafetyViolation');
201
+ }
202
+
203
+ return {
204
+ score: satisfied / K,
205
+ failedList
206
+ };
207
+ }
208
+
209
+ /**
210
+ * Evaluates a codebase mutation against the SAHOO Safeguarding Policy.
211
+ */
212
+ export function evaluateSahoo(
213
+ filePath: string,
214
+ beforeContent: string,
215
+ afterContent: string,
216
+ compileSuccess: boolean,
217
+ testsSuccess: boolean,
218
+ historicalScores: number[] = [1.0] // Default history to prevent division by zero
219
+ ): { passed: boolean; metrics: SahooMetrics; reason?: string } {
220
+
221
+ // 1. Calculate individual drifts
222
+ const semantic = calculateSemanticDrift(beforeContent, afterContent);
223
+ const lexical = calculateLexicalDrift(beforeContent, afterContent);
224
+ const structural = calculateStructuralDrift(beforeContent, afterContent);
225
+ const distributional = calculateDistributionalDrift(beforeContent, afterContent);
226
+
227
+ // 2. Compute Goal Drift Index (GDI)
228
+ // GDI = 0.38*semantic + 0.12*lexical + 0.21*structural + 0.29*distributional
229
+ const gdi = (0.38 * semantic) + (0.12 * lexical) + (0.21 * structural) + (0.29 * distributional);
230
+
231
+ // 3. Compute Constraint Preservation Score (CPS)
232
+ const { score: cps, failedList } = checkSafetyConstraints(filePath, afterContent, compileSuccess, testsSuccess);
233
+
234
+ // 4. Compute Regression Risk (R_c)
235
+ // Pr(Q_c < Q_max - delta)
236
+ // We model Q_c as the current CPS score. Q_max is the highest historical score (typically 1.0).
237
+ // If historical performance is low, we quantify risk mathematically.
238
+ const qMax = Math.max(...historicalScores, 1.0);
239
+ const currentQ = cps;
240
+
241
+ // Calculate variance of historical scores
242
+ const mean = historicalScores.reduce((a, b) => a + b, 0) / historicalScores.length;
243
+ const variance = historicalScores.reduce((a, b) => a + Math.pow(b - mean, 2), 0) / historicalScores.length;
244
+ const stdDev = Math.max(Math.sqrt(variance), 0.05); // Min standard dev of 5%
245
+
246
+ // Regression Risk estimate using Gaussian Cumulative Distribution proxy:
247
+ // Z = (currentQ - qMax) / stdDev
248
+ // R_c = P(Z < 0)
249
+ const z = (currentQ - qMax) / stdDev;
250
+ let regressionRisk = 0.5; // default if z == 0
251
+ if (z < 0) {
252
+ // Standard approximation of normal CDF
253
+ regressionRisk = 1.0 - (1.0 / (1.0 + Math.exp(-1.6 * z)));
254
+ } else {
255
+ regressionRisk = 1.0 / (1.0 + Math.exp(1.6 * z));
256
+ }
257
+
258
+ const metrics: SahooMetrics = {
259
+ semanticDrift: semantic,
260
+ lexicalDrift: lexical,
261
+ structuralDrift: structural,
262
+ distributionalDrift: distributional,
263
+ goalDriftIndex: gdi,
264
+ constraintPreservationScore: cps,
265
+ regressionRisk: regressionRisk
266
+ };
267
+
268
+ // Halting policies:
269
+ // 1. GDI Threshold: Halts execution if GDI > 0.440
270
+ if (gdi > 0.440) {
271
+ return {
272
+ passed: false,
273
+ metrics,
274
+ reason: `Blocked by SAHOO: Goal Drift Index exceeded threshold (${gdi.toFixed(3)} > 0.440)`
275
+ };
276
+ }
277
+
278
+ // 2. CPS Threshold: Halts execution if CPS < 1.0 (Zero-tolerance for safety violations)
279
+ if (cps < 1.0) {
280
+ return {
281
+ passed: false,
282
+ metrics,
283
+ reason: `Blocked by SAHOO: Constraint Preservation Score fell below 1.0 (Violations: ${failedList.join(', ')})`
284
+ };
285
+ }
286
+
287
+ // 3. Regression Risk threshold: Halt if risk > 0.8
288
+ if (regressionRisk > 0.80) {
289
+ return {
290
+ passed: false,
291
+ metrics,
292
+ reason: `Blocked by SAHOO: Regression Risk too high (${(regressionRisk * 100).toFixed(1)}% > 80%)`
293
+ };
294
+ }
295
+
296
+ return {
297
+ passed: true,
298
+ metrics
299
+ };
300
+ }
@@ -0,0 +1,161 @@
1
+ import * as fs from 'fs';
2
+ import * as path from 'path';
3
+ import { SahooMetrics } from './sahoo-gateway';
4
+ export type { SahooMetrics };
5
+
6
+ export interface EvolutionState {
7
+ status: 'idle' | 'reflecting' | 'scanning' | 'refactoring' | 'evaluating' | 'committing' | 'rolling_back' | 'optimizing_prompt' | 'cooldown' | 'error';
8
+ currentCycle: number;
9
+ maxCycles: number;
10
+ tokenBudget: number;
11
+ tokensUsed: {
12
+ input: number;
13
+ output: number;
14
+ total: number;
15
+ };
16
+ lastUpdated: string;
17
+ currentTask: string | null;
18
+ lastRefactorProposal: {
19
+ targetFile: string;
20
+ refactorGoal: string;
21
+ time: string;
22
+ } | null;
23
+ lastRefactorResult: {
24
+ success: boolean;
25
+ targetFile: string;
26
+ message: string;
27
+ time: string;
28
+ metrics?: SahooMetrics;
29
+ reason?: string;
30
+ } | null;
31
+ lastPromptOptimization: {
32
+ success: boolean;
33
+ baselineScore: number;
34
+ candidateScore: number;
35
+ approved: boolean;
36
+ time: string;
37
+ } | null;
38
+ history: Array<{
39
+ cycle: number;
40
+ time: string;
41
+ type: 'refactor' | 'prompt_opt' | 'dgm_evolution';
42
+ description: string;
43
+ success: boolean;
44
+ details?: string;
45
+ }>;
46
+ archive: Array<{
47
+ commitHash: string;
48
+ task: string;
49
+ cycle: number;
50
+ timestamp: string;
51
+ metrics?: SahooMetrics;
52
+ }>;
53
+ /** DGM-specific: fitness score ปัจจุบัน (test pass rate 0–1) */
54
+ currentFitness: number;
55
+ /** DGM-specific: fitness score สูงสุดที่เคยได้ */
56
+ bestFitness: number;
57
+ /** DGM-specific: จำนวน entries ใน dgm-archive.json ปัจจุบัน */
58
+ dgmPopulationSize: number;
59
+ /** DGM-specific: ID ของ best archive entry ที่ fitness สูงสุด */
60
+ dgmBestEntryId: string | null;
61
+ }
62
+
63
+ const statePath = path.join(process.cwd(), 'evolution-state.json');
64
+
65
+ const defaultState: EvolutionState = {
66
+ status: 'idle',
67
+ currentCycle: 0,
68
+ maxCycles: 0,
69
+ tokenBudget: 500000,
70
+ tokensUsed: { input: 0, output: 0, total: 0 },
71
+ lastUpdated: new Date().toISOString(),
72
+ currentTask: null,
73
+ lastRefactorProposal: null,
74
+ lastRefactorResult: null,
75
+ lastPromptOptimization: null,
76
+ history: [],
77
+ archive: [],
78
+ currentFitness: 0,
79
+ bestFitness: 0,
80
+ dgmPopulationSize: 0,
81
+ dgmBestEntryId: null
82
+ };
83
+
84
+ export function getEvolutionState(): EvolutionState {
85
+ if (!fs.existsSync(statePath)) {
86
+ return defaultState;
87
+ }
88
+ try {
89
+ const raw = fs.readFileSync(statePath, 'utf-8');
90
+ return JSON.parse(raw);
91
+ } catch (e) {
92
+ return defaultState;
93
+ }
94
+ }
95
+
96
+ import { EventStream } from './event-stream';
97
+
98
+ export function updateEvolutionState(update: Partial<EvolutionState>): EvolutionState {
99
+ const currentState = getEvolutionState();
100
+
101
+ const tokensUsed = update.tokensUsed
102
+ ? { ...currentState.tokensUsed, ...update.tokensUsed }
103
+ : currentState.tokensUsed;
104
+
105
+ const merged: EvolutionState = {
106
+ ...currentState,
107
+ ...update,
108
+ tokensUsed,
109
+ lastUpdated: new Date().toISOString()
110
+ };
111
+
112
+ merged.tokensUsed.total = merged.tokensUsed.input + merged.tokensUsed.output;
113
+
114
+ // Log status transitions to EventStream
115
+ if (update.status && update.status !== currentState.status) {
116
+ EventStream.getInstance().publish(
117
+ 'system',
118
+ 'log',
119
+ 'StateTransition',
120
+ `System status changed from '${currentState.status}' to '${update.status}'`
121
+ );
122
+ }
123
+
124
+ try {
125
+ fs.writeFileSync(statePath, JSON.stringify(merged, null, 2), 'utf-8');
126
+ } catch (err) {
127
+ console.error('Failed to write evolution state file:', err);
128
+ }
129
+
130
+ return merged;
131
+ }
132
+
133
+ export function appendHistory(item: Omit<EvolutionState['history'][0], 'time'>) {
134
+ const state = getEvolutionState();
135
+ const historyItem = {
136
+ ...item,
137
+ time: new Date().toISOString()
138
+ };
139
+ const updatedHistory = [...state.history, historyItem];
140
+
141
+ if (updatedHistory.length > 50) {
142
+ updatedHistory.shift();
143
+ }
144
+
145
+ updateEvolutionState({ history: updatedHistory });
146
+ }
147
+
148
+ export function appendArchive(item: Omit<EvolutionState['archive'][0], 'timestamp'>) {
149
+ const state = getEvolutionState();
150
+ const archiveItem = {
151
+ ...item,
152
+ timestamp: new Date().toISOString()
153
+ };
154
+ const updatedArchive = [...(state.archive || []), archiveItem];
155
+
156
+ if (updatedArchive.length > 10) {
157
+ updatedArchive.shift();
158
+ }
159
+
160
+ updateEvolutionState({ archive: updatedArchive });
161
+ }
@@ -0,0 +1 @@
1
+ You are an autonomous coding agent in a codebase. Fulfill tasks accurately using file read/write/edit tools and terminal access. Always use editFile for partial edits to existing files; use writeFile only for new files. After running commands, verify compilation and tests; if tests fail, fix issues (self-healing). Be concise: plan before changes to minimize token waste and thinking overhead.