erosolar-cli 2.1.191 → 2.1.193
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/runtime/agentSession.d.ts.map +1 -1
- package/dist/runtime/agentSession.js +1 -2
- package/dist/runtime/agentSession.js.map +1 -1
- package/dist/runtime/flowOrchestrator.d.ts +8 -34
- package/dist/runtime/flowOrchestrator.d.ts.map +1 -1
- package/dist/runtime/flowOrchestrator.js +25 -431
- package/dist/runtime/flowOrchestrator.js.map +1 -1
- package/dist/shell/interactiveShell.d.ts.map +1 -1
- package/dist/shell/interactiveShell.js +9 -46
- package/dist/shell/interactiveShell.js.map +1 -1
- package/dist/shell/taskCompletionDetector.d.ts +3 -57
- package/dist/shell/taskCompletionDetector.d.ts.map +1 -1
- package/dist/shell/taskCompletionDetector.js +13 -284
- package/dist/shell/taskCompletionDetector.js.map +1 -1
- package/package.json +1 -1
- package/dist/core/alphaZeroOrchestrator.d.ts +0 -140
- package/dist/core/alphaZeroOrchestrator.d.ts.map +0 -1
- package/dist/core/alphaZeroOrchestrator.js +0 -418
- package/dist/core/alphaZeroOrchestrator.js.map +0 -1
|
@@ -1,14 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* Simple Task Completion Detector
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
* Key features:
|
|
8
|
-
* - Multi-signal analysis (tool usage, response content, state changes)
|
|
9
|
-
* - AI verification round before final completion
|
|
10
|
-
* - Confidence scoring
|
|
11
|
-
* - Work-in-progress detection
|
|
4
|
+
* Tracks tool usage and responses for the flow orchestrator.
|
|
5
|
+
* Simplified to avoid false positives that caused infinite loops.
|
|
12
6
|
*
|
|
13
7
|
* @license MIT
|
|
14
8
|
* @author Bo Shang
|
|
@@ -20,11 +14,6 @@ export interface ToolActivity {
|
|
|
20
14
|
hasOutput: boolean;
|
|
21
15
|
}
|
|
22
16
|
export interface CompletionSignals {
|
|
23
|
-
hasExplicitCompletionStatement: boolean;
|
|
24
|
-
hasIncompleteWorkIndicators: boolean;
|
|
25
|
-
hasPendingActionIndicators: boolean;
|
|
26
|
-
hasErrorIndicators: boolean;
|
|
27
|
-
hasFollowUpQuestions: boolean;
|
|
28
17
|
toolsUsedInLastResponse: number;
|
|
29
18
|
lastToolWasReadOnly: boolean;
|
|
30
19
|
consecutiveResponsesWithoutTools: number;
|
|
@@ -32,7 +21,6 @@ export interface CompletionSignals {
|
|
|
32
21
|
hasRecentCommits: boolean;
|
|
33
22
|
todoItemsPending: number;
|
|
34
23
|
todoItemsCompleted: number;
|
|
35
|
-
mentionsFutureWork: boolean;
|
|
36
24
|
completionConfidence: number;
|
|
37
25
|
}
|
|
38
26
|
export interface CompletionAnalysis {
|
|
@@ -51,55 +39,13 @@ export declare class TaskCompletionDetector {
|
|
|
51
39
|
private consecutiveNoTools;
|
|
52
40
|
private todoStats;
|
|
53
41
|
constructor();
|
|
54
|
-
/**
|
|
55
|
-
* Reset the detector state for a new task
|
|
56
|
-
*/
|
|
57
42
|
reset(): void;
|
|
58
|
-
/**
|
|
59
|
-
* Record a tool call
|
|
60
|
-
*/
|
|
61
43
|
recordToolCall(toolName: string, success: boolean, hasOutput: boolean): void;
|
|
62
|
-
/**
|
|
63
|
-
* Record a response (call after each AI response)
|
|
64
|
-
*/
|
|
65
44
|
recordResponse(response: string, toolsUsed: string[]): void;
|
|
66
|
-
/**
|
|
67
|
-
* Update todo statistics
|
|
68
|
-
*/
|
|
69
45
|
updateTodoStats(pending: number, completed: number): void;
|
|
70
|
-
/**
|
|
71
|
-
* Analyze the current state and determine if the task is complete
|
|
72
|
-
*/
|
|
73
46
|
analyzeCompletion(currentResponse: string, toolsUsedThisRound: string[]): CompletionAnalysis;
|
|
74
|
-
/**
|
|
75
|
-
* Gather all completion signals from the current state
|
|
76
|
-
*/
|
|
77
47
|
private gatherSignals;
|
|
78
|
-
/**
|
|
79
|
-
* Calculate confidence score for task completion
|
|
80
|
-
*/
|
|
81
|
-
private calculateConfidence;
|
|
82
|
-
/**
|
|
83
|
-
* Generate a verification prompt to ask the AI if the task is truly complete
|
|
84
|
-
*/
|
|
85
|
-
private generateVerificationPrompt;
|
|
86
|
-
/**
|
|
87
|
-
* Generate a verification prompt for stagnation cases
|
|
88
|
-
*/
|
|
89
|
-
private generateStagnationVerificationPrompt;
|
|
90
|
-
/**
|
|
91
|
-
* Get a human-readable reason for low confidence
|
|
92
|
-
*/
|
|
93
|
-
private getLowConfidenceReason;
|
|
94
|
-
/**
|
|
95
|
-
* Check if a verification response confirms completion
|
|
96
|
-
*/
|
|
97
48
|
isVerificationConfirmed(verificationResponse: string): boolean;
|
|
98
|
-
/**
|
|
99
|
-
* Check if a response contradicts itself by saying "complete" but also indicating incomplete work.
|
|
100
|
-
* This comprehensive list catches many ways AI might admit work isn't done while claiming completion.
|
|
101
|
-
*/
|
|
102
|
-
private responseContainsIncompleteIndicators;
|
|
103
49
|
}
|
|
104
50
|
export declare function getTaskCompletionDetector(): TaskCompletionDetector;
|
|
105
51
|
export declare function resetTaskCompletionDetector(): void;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"taskCompletionDetector.d.ts","sourceRoot":"","sources":["../../src/shell/taskCompletionDetector.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"taskCompletionDetector.d.ts","sourceRoot":"","sources":["../../src/shell/taskCompletionDetector.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,MAAM,WAAW,YAAY;IAC3B,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,OAAO,CAAC;IACjB,SAAS,EAAE,OAAO,CAAC;CACpB;AAED,MAAM,WAAW,iBAAiB;IAChC,uBAAuB,EAAE,MAAM,CAAC;IAChC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,gCAAgC,EAAE,MAAM,CAAC;IACzC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,gBAAgB,EAAE,OAAO,CAAC;IAC1B,gBAAgB,EAAE,MAAM,CAAC;IACzB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,oBAAoB,EAAE,MAAM,CAAC;CAC9B;AAED,MAAM,WAAW,kBAAkB;IACjC,UAAU,EAAE,OAAO,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,iBAAiB,CAAC;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,OAAO,CAAC;IACtB,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAmBD,eAAO,MAAM,WAAW,aAStB,CAAC;AAEH,qBAAa,sBAAsB;IACjC,OAAO,CAAC,WAAW,CAAsB;IACzC,OAAO,CAAC,eAAe,CAAgB;IACvC,OAAO,CAAC,aAAa,CAAgB;IACrC,OAAO,CAAC,kBAAkB,CAAK;IAC/B,OAAO,CAAC,SAAS,CAAgC;;IAMjD,KAAK,IAAI,IAAI;IAQb,cAAc,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,GAAG,IAAI;IAiB5E,cAAc,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,GAAG,IAAI;IAe3D,eAAe,CAAC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,IAAI;IAIzD,iBAAiB,CAAC,eAAe,EAAE,MAAM,EAAE,kBAAkB,EAAE,MAAM,EAAE,GAAG,kBAAkB;IAqB5F,OAAO,CAAC,aAAa;IAwBrB,uBAAuB,CAAC,oBAAoB,EAAE,MAAM,GAAG,OAAO;CAG/D;AAID,wBAAgB,yBAAyB,IAAI,sBAAsB,CAKlE;AAED,wBAAgB,2BAA2B,IAAI,IAAI,CAIlD"}
|
|
@@ -1,61 +1,12 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* Simple Task Completion Detector
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
* Key features:
|
|
8
|
-
* - Multi-signal analysis (tool usage, response content, state changes)
|
|
9
|
-
* - AI verification round before final completion
|
|
10
|
-
* - Confidence scoring
|
|
11
|
-
* - Work-in-progress detection
|
|
4
|
+
* Tracks tool usage and responses for the flow orchestrator.
|
|
5
|
+
* Simplified to avoid false positives that caused infinite loops.
|
|
12
6
|
*
|
|
13
7
|
* @license MIT
|
|
14
8
|
* @author Bo Shang
|
|
15
9
|
*/
|
|
16
|
-
// Keywords that strongly indicate task completion
|
|
17
|
-
const STRONG_COMPLETION_PATTERNS = [
|
|
18
|
-
/^(all\s+)?tasks?\s+(are\s+)?(now\s+)?(complete|done|finished)/im,
|
|
19
|
-
/^(i('ve|'m|\s+have|\s+am)\s+)?(successfully\s+)?(completed?|finished|done)\s+(all|the|with|everything)/im,
|
|
20
|
-
/^everything\s+(is\s+)?(now\s+)?(complete|done|finished)/im,
|
|
21
|
-
/^the\s+requested?\s+(task|work|changes?)\s+(is|are|has been)\s+(complete|done|finished)/im,
|
|
22
|
-
/^i\s+have\s+(now\s+)?(successfully\s+)?(completed?|finished|done)\s+(all|the|everything)/im,
|
|
23
|
-
/no\s+(more|further)\s+(tasks?|work|actions?|changes?)\s+(are\s+)?(needed|required|necessary)/im,
|
|
24
|
-
];
|
|
25
|
-
// Keywords that indicate work is still in progress
|
|
26
|
-
const INCOMPLETE_WORK_PATTERNS = [
|
|
27
|
-
/\b(next|then|now\s+I('ll|\s+will)|let\s+me|I('ll|\s+will)|going\s+to|about\s+to)\b/i,
|
|
28
|
-
/\b(continue|continuing|proceed|proceeding|working\s+on)\b/i,
|
|
29
|
-
/\b(TODO|FIXME|WIP|in\s+progress)\b/i,
|
|
30
|
-
/\b(still\s+need|remaining|left\s+to\s+do|more\s+to\s+do)\b/i,
|
|
31
|
-
/\b(step\s+\d+|phase\s+\d+|iteration\s+\d+)\b/i,
|
|
32
|
-
/\b(haven'?t\s+(yet|finished)|not\s+yet\s+(done|complete|finished))\b/i,
|
|
33
|
-
];
|
|
34
|
-
// Keywords that indicate pending actions
|
|
35
|
-
const PENDING_ACTION_PATTERNS = [
|
|
36
|
-
/\b(need\s+to|should|must|have\s+to|requires?)\b/i,
|
|
37
|
-
/\b(waiting|pending|queued)\b/i,
|
|
38
|
-
/\b(before\s+I\s+can|after\s+that|once\s+that)\b/i,
|
|
39
|
-
/\b(running|executing|processing)\b/i,
|
|
40
|
-
];
|
|
41
|
-
// Keywords that indicate errors or issues
|
|
42
|
-
const ERROR_PATTERNS = [
|
|
43
|
-
/\b(error|failed|failure|exception|issue|problem|bug)\b/i,
|
|
44
|
-
/\b(can'?t|cannot|couldn'?t|unable\s+to)\b/i,
|
|
45
|
-
/\b(fix|fixing|resolve|resolving|debug|debugging)\b/i,
|
|
46
|
-
];
|
|
47
|
-
// Keywords that indicate follow-up questions
|
|
48
|
-
const FOLLOWUP_QUESTION_PATTERNS = [
|
|
49
|
-
/\b(would\s+you\s+like|do\s+you\s+want|shall\s+I|should\s+I)\b/i,
|
|
50
|
-
/\b(let\s+me\s+know|please\s+(confirm|tell|let))\b/i,
|
|
51
|
-
/\?$/m,
|
|
52
|
-
];
|
|
53
|
-
// Keywords that indicate future work
|
|
54
|
-
const FUTURE_WORK_PATTERNS = [
|
|
55
|
-
/\b(could\s+also|might\s+want\s+to|consider|recommend)\b/i,
|
|
56
|
-
/\b(future|later|eventually|when\s+you\s+have\s+time)\b/i,
|
|
57
|
-
/\b(improvement|enhancement|optimization)\b/i,
|
|
58
|
-
];
|
|
59
10
|
// Read-only tool names
|
|
60
11
|
const READ_ONLY_TOOLS = new Set([
|
|
61
12
|
'read_file',
|
|
@@ -91,9 +42,6 @@ export class TaskCompletionDetector {
|
|
|
91
42
|
constructor() {
|
|
92
43
|
this.reset();
|
|
93
44
|
}
|
|
94
|
-
/**
|
|
95
|
-
* Reset the detector state for a new task
|
|
96
|
-
*/
|
|
97
45
|
reset() {
|
|
98
46
|
this.toolHistory = [];
|
|
99
47
|
this.responseHistory = [];
|
|
@@ -101,9 +49,6 @@ export class TaskCompletionDetector {
|
|
|
101
49
|
this.consecutiveNoTools = 0;
|
|
102
50
|
this.todoStats = { pending: 0, completed: 0 };
|
|
103
51
|
}
|
|
104
|
-
/**
|
|
105
|
-
* Record a tool call
|
|
106
|
-
*/
|
|
107
52
|
recordToolCall(toolName, success, hasOutput) {
|
|
108
53
|
this.toolHistory.push({
|
|
109
54
|
toolName,
|
|
@@ -112,7 +57,6 @@ export class TaskCompletionDetector {
|
|
|
112
57
|
hasOutput,
|
|
113
58
|
});
|
|
114
59
|
this.lastToolNames.push(toolName);
|
|
115
|
-
// Keep only recent history
|
|
116
60
|
if (this.toolHistory.length > 100) {
|
|
117
61
|
this.toolHistory = this.toolHistory.slice(-100);
|
|
118
62
|
}
|
|
@@ -120,9 +64,6 @@ export class TaskCompletionDetector {
|
|
|
120
64
|
this.lastToolNames = this.lastToolNames.slice(-20);
|
|
121
65
|
}
|
|
122
66
|
}
|
|
123
|
-
/**
|
|
124
|
-
* Record a response (call after each AI response)
|
|
125
|
-
*/
|
|
126
67
|
recordResponse(response, toolsUsed) {
|
|
127
68
|
this.responseHistory.push(response);
|
|
128
69
|
if (toolsUsed.length === 0) {
|
|
@@ -132,86 +73,35 @@ export class TaskCompletionDetector {
|
|
|
132
73
|
this.consecutiveNoTools = 0;
|
|
133
74
|
this.lastToolNames = toolsUsed;
|
|
134
75
|
}
|
|
135
|
-
// Keep only recent history
|
|
136
76
|
if (this.responseHistory.length > 20) {
|
|
137
77
|
this.responseHistory = this.responseHistory.slice(-20);
|
|
138
78
|
}
|
|
139
79
|
}
|
|
140
|
-
/**
|
|
141
|
-
* Update todo statistics
|
|
142
|
-
*/
|
|
143
80
|
updateTodoStats(pending, completed) {
|
|
144
81
|
this.todoStats = { pending, completed };
|
|
145
82
|
}
|
|
146
|
-
/**
|
|
147
|
-
* Analyze the current state and determine if the task is complete
|
|
148
|
-
*/
|
|
149
83
|
analyzeCompletion(currentResponse, toolsUsedThisRound) {
|
|
150
84
|
this.recordResponse(currentResponse, toolsUsedThisRound);
|
|
151
|
-
const signals = this.gatherSignals(
|
|
152
|
-
|
|
85
|
+
const signals = this.gatherSignals(toolsUsedThisRound);
|
|
86
|
+
// Simple: check for explicit completion marker
|
|
87
|
+
const isComplete = currentResponse.includes('TASK_FULLY_COMPLETE');
|
|
88
|
+
const confidence = isComplete ? 1.0 : 0.5;
|
|
153
89
|
signals.completionConfidence = confidence;
|
|
154
|
-
// Determine completion status
|
|
155
|
-
let isComplete = false;
|
|
156
|
-
let reason = '';
|
|
157
|
-
let shouldVerify = false;
|
|
158
|
-
let verificationPrompt;
|
|
159
|
-
// High confidence completion
|
|
160
|
-
if (confidence >= 0.85 && signals.hasExplicitCompletionStatement && !signals.hasIncompleteWorkIndicators) {
|
|
161
|
-
isComplete = true;
|
|
162
|
-
reason = 'High confidence explicit completion statement with no incomplete work indicators';
|
|
163
|
-
}
|
|
164
|
-
// Medium confidence - needs verification
|
|
165
|
-
else if (confidence >= 0.6 && signals.hasExplicitCompletionStatement) {
|
|
166
|
-
shouldVerify = true;
|
|
167
|
-
reason = 'Medium confidence completion - AI verification recommended';
|
|
168
|
-
verificationPrompt = this.generateVerificationPrompt(signals);
|
|
169
|
-
}
|
|
170
|
-
// Low confidence - likely not complete
|
|
171
|
-
else if (confidence < 0.4) {
|
|
172
|
-
isComplete = false;
|
|
173
|
-
reason = this.getLowConfidenceReason(signals);
|
|
174
|
-
}
|
|
175
|
-
// Ambiguous case - check for stagnation
|
|
176
|
-
else if (this.consecutiveNoTools >= 3 && !signals.hasIncompleteWorkIndicators) {
|
|
177
|
-
shouldVerify = true;
|
|
178
|
-
reason = 'No tool activity for multiple rounds - verification needed';
|
|
179
|
-
verificationPrompt = this.generateStagnationVerificationPrompt();
|
|
180
|
-
}
|
|
181
|
-
// Default: not complete
|
|
182
|
-
else {
|
|
183
|
-
isComplete = false;
|
|
184
|
-
reason = 'Active work indicators detected or low completion confidence';
|
|
185
|
-
}
|
|
186
90
|
return {
|
|
187
91
|
isComplete,
|
|
188
92
|
confidence,
|
|
189
93
|
signals,
|
|
190
|
-
reason,
|
|
191
|
-
shouldVerify,
|
|
192
|
-
verificationPrompt,
|
|
94
|
+
reason: isComplete ? 'Explicit completion marker' : 'Task in progress',
|
|
95
|
+
shouldVerify: false,
|
|
96
|
+
verificationPrompt: undefined,
|
|
193
97
|
};
|
|
194
98
|
}
|
|
195
|
-
|
|
196
|
-
* Gather all completion signals from the current state
|
|
197
|
-
*/
|
|
198
|
-
gatherSignals(response, toolsUsed) {
|
|
199
|
-
const hasExplicitCompletionStatement = STRONG_COMPLETION_PATTERNS.some((p) => p.test(response));
|
|
200
|
-
const hasIncompleteWorkIndicators = INCOMPLETE_WORK_PATTERNS.some((p) => p.test(response));
|
|
201
|
-
const hasPendingActionIndicators = PENDING_ACTION_PATTERNS.some((p) => p.test(response));
|
|
202
|
-
const hasErrorIndicators = ERROR_PATTERNS.some((p) => p.test(response));
|
|
203
|
-
const hasFollowUpQuestions = FOLLOWUP_QUESTION_PATTERNS.some((p) => p.test(response));
|
|
204
|
-
const mentionsFutureWork = FUTURE_WORK_PATTERNS.some((p) => p.test(response));
|
|
99
|
+
gatherSignals(toolsUsed) {
|
|
205
100
|
const lastToolWasReadOnly = toolsUsed.length > 0 && toolsUsed.every((t) => READ_ONLY_TOOLS.has(t));
|
|
206
101
|
const recentTools = this.toolHistory.filter((t) => Date.now() - t.timestamp < 60000);
|
|
207
102
|
const hasRecentFileWrites = recentTools.some((t) => t.toolName === 'edit_file' || t.toolName === 'Edit');
|
|
208
103
|
const hasRecentCommits = recentTools.some((t) => t.toolName === 'bash' || t.toolName === 'Bash') && this.responseHistory.some((r) => r.includes('git commit') || r.includes('committed'));
|
|
209
104
|
return {
|
|
210
|
-
hasExplicitCompletionStatement,
|
|
211
|
-
hasIncompleteWorkIndicators,
|
|
212
|
-
hasPendingActionIndicators,
|
|
213
|
-
hasErrorIndicators,
|
|
214
|
-
hasFollowUpQuestions,
|
|
215
105
|
toolsUsedInLastResponse: toolsUsed.length,
|
|
216
106
|
lastToolWasReadOnly,
|
|
217
107
|
consecutiveResponsesWithoutTools: this.consecutiveNoTools,
|
|
@@ -219,174 +109,13 @@ export class TaskCompletionDetector {
|
|
|
219
109
|
hasRecentCommits,
|
|
220
110
|
todoItemsPending: this.todoStats.pending,
|
|
221
111
|
todoItemsCompleted: this.todoStats.completed,
|
|
222
|
-
|
|
223
|
-
completionConfidence: 0, // Will be calculated
|
|
112
|
+
completionConfidence: 0,
|
|
224
113
|
};
|
|
225
114
|
}
|
|
226
|
-
/**
|
|
227
|
-
* Calculate confidence score for task completion
|
|
228
|
-
*/
|
|
229
|
-
calculateConfidence(signals) {
|
|
230
|
-
let score = 0.5; // Start at neutral
|
|
231
|
-
// Strong positive signals
|
|
232
|
-
if (signals.hasExplicitCompletionStatement)
|
|
233
|
-
score += 0.25;
|
|
234
|
-
if (signals.hasRecentCommits)
|
|
235
|
-
score += 0.1;
|
|
236
|
-
if (signals.todoItemsPending === 0 && signals.todoItemsCompleted > 0)
|
|
237
|
-
score += 0.15;
|
|
238
|
-
// Strong negative signals
|
|
239
|
-
if (signals.hasIncompleteWorkIndicators)
|
|
240
|
-
score -= 0.3;
|
|
241
|
-
if (signals.hasPendingActionIndicators)
|
|
242
|
-
score -= 0.2;
|
|
243
|
-
if (signals.hasErrorIndicators)
|
|
244
|
-
score -= 0.25;
|
|
245
|
-
if (signals.todoItemsPending > 0)
|
|
246
|
-
score -= 0.15;
|
|
247
|
-
// Moderate signals
|
|
248
|
-
if (signals.toolsUsedInLastResponse > 0 && !signals.lastToolWasReadOnly)
|
|
249
|
-
score -= 0.1;
|
|
250
|
-
if (signals.consecutiveResponsesWithoutTools >= 2)
|
|
251
|
-
score += 0.1;
|
|
252
|
-
if (signals.hasFollowUpQuestions)
|
|
253
|
-
score -= 0.1;
|
|
254
|
-
if (signals.mentionsFutureWork && signals.hasExplicitCompletionStatement)
|
|
255
|
-
score += 0.05;
|
|
256
|
-
// Clamp to 0-1 range
|
|
257
|
-
return Math.max(0, Math.min(1, score));
|
|
258
|
-
}
|
|
259
|
-
/**
|
|
260
|
-
* Generate a verification prompt to ask the AI if the task is truly complete
|
|
261
|
-
*/
|
|
262
|
-
generateVerificationPrompt(signals) {
|
|
263
|
-
const concerns = [];
|
|
264
|
-
if (signals.todoItemsPending > 0) {
|
|
265
|
-
concerns.push(`there are ${signals.todoItemsPending} todo items still pending`);
|
|
266
|
-
}
|
|
267
|
-
if (signals.hasFollowUpQuestions) {
|
|
268
|
-
concerns.push('you asked follow-up questions');
|
|
269
|
-
}
|
|
270
|
-
if (signals.mentionsFutureWork) {
|
|
271
|
-
concerns.push('you mentioned potential future improvements');
|
|
272
|
-
}
|
|
273
|
-
const concernsText = concerns.length > 0 ? `However, ${concerns.join(' and ')}. ` : '';
|
|
274
|
-
return `You indicated the task might be complete. ${concernsText}Please confirm:
|
|
275
|
-
|
|
276
|
-
1. Have ALL the originally requested changes been implemented?
|
|
277
|
-
2. Are there any remaining errors or issues that need to be fixed?
|
|
278
|
-
3. Is there anything else you need to do to fully complete this task?
|
|
279
|
-
|
|
280
|
-
If everything is truly done, respond with exactly: "TASK_FULLY_COMPLETE"
|
|
281
|
-
If there's more work to do, describe what remains.`;
|
|
282
|
-
}
|
|
283
|
-
/**
|
|
284
|
-
* Generate a verification prompt for stagnation cases
|
|
285
|
-
*/
|
|
286
|
-
generateStagnationVerificationPrompt() {
|
|
287
|
-
return `I notice you haven't used any tools for several responses. Let me check:
|
|
288
|
-
|
|
289
|
-
1. Is the task complete? If so, summarize what was accomplished.
|
|
290
|
-
2. Are you blocked on something? If so, what do you need?
|
|
291
|
-
3. Is there more work to do? If so, please continue.
|
|
292
|
-
|
|
293
|
-
If everything is done, respond with exactly: "TASK_FULLY_COMPLETE"
|
|
294
|
-
Otherwise, please continue with the next action.`;
|
|
295
|
-
}
|
|
296
|
-
/**
|
|
297
|
-
* Get a human-readable reason for low confidence
|
|
298
|
-
*/
|
|
299
|
-
getLowConfidenceReason(signals) {
|
|
300
|
-
const reasons = [];
|
|
301
|
-
if (signals.hasIncompleteWorkIndicators) {
|
|
302
|
-
reasons.push('incomplete work indicators detected');
|
|
303
|
-
}
|
|
304
|
-
if (signals.hasPendingActionIndicators) {
|
|
305
|
-
reasons.push('pending action indicators found');
|
|
306
|
-
}
|
|
307
|
-
if (signals.hasErrorIndicators) {
|
|
308
|
-
reasons.push('error indicators present');
|
|
309
|
-
}
|
|
310
|
-
if (signals.toolsUsedInLastResponse > 0 && !signals.lastToolWasReadOnly) {
|
|
311
|
-
reasons.push('write operations performed');
|
|
312
|
-
}
|
|
313
|
-
if (signals.todoItemsPending > 0) {
|
|
314
|
-
reasons.push(`${signals.todoItemsPending} todo items still pending`);
|
|
315
|
-
}
|
|
316
|
-
return reasons.length > 0 ? reasons.join(', ') : 'no clear completion signals';
|
|
317
|
-
}
|
|
318
|
-
/**
|
|
319
|
-
* Check if a verification response confirms completion
|
|
320
|
-
*/
|
|
321
115
|
isVerificationConfirmed(verificationResponse) {
|
|
322
|
-
|
|
323
|
-
/^(yes|confirmed?|all\s+done|everything\s+(is\s+)?complete)/im.test(verificationResponse.trim()));
|
|
324
|
-
// Even if completion marker is present, check for contradictions
|
|
325
|
-
if (hasCompletionMarker && this.responseContainsIncompleteIndicators(verificationResponse)) {
|
|
326
|
-
return false;
|
|
327
|
-
}
|
|
328
|
-
return hasCompletionMarker;
|
|
329
|
-
}
|
|
330
|
-
/**
|
|
331
|
-
* Check if a response contradicts itself by saying "complete" but also indicating incomplete work.
|
|
332
|
-
* This comprehensive list catches many ways AI might admit work isn't done while claiming completion.
|
|
333
|
-
*/
|
|
334
|
-
responseContainsIncompleteIndicators(response) {
|
|
335
|
-
const incompletePatterns = [
|
|
336
|
-
// === INTEGRATION/DEPLOYMENT STATE ===
|
|
337
|
-
/hasn'?t\s+been\s+(integrated|implemented|connected|deployed|added|completed|tested|verified)\s*(yet|still)?/i,
|
|
338
|
-
/not\s+(yet\s+)?(integrated|implemented|connected|deployed|functional|working|complete|tested|verified)/i,
|
|
339
|
-
/ready\s+(for|to\s+be)\s+(integration|integrated|connected|deployed|testing|review)/i,
|
|
340
|
-
/needs?\s+to\s+be\s+(integrated|connected|deployed|added|hooked|wired|tested|reviewed|merged)/i,
|
|
341
|
-
/was\s+not\s+(performed|completed|implemented|deployed|integrated|tested)/i,
|
|
342
|
-
/the\s+\w+\s+(service|module|component|feature)\s+hasn'?t\s+been/i,
|
|
343
|
-
// === PARTIAL/INCOMPLETE STATE ===
|
|
344
|
-
/still\s+(stores?|uses?|has|contains?|needs?|requires?|missing|lacks?|broken)/i,
|
|
345
|
-
/\b(partially|mostly|almost|nearly|not\s+fully)\s+(complete|done|finished|implemented|working)/i,
|
|
346
|
-
/\b(only\s+)?(part|some|half|portion)\s+of\s+(the\s+)?(task|work|feature|implementation)/i,
|
|
347
|
-
// === QUALIFIER WORDS (uncertain completion) ===
|
|
348
|
-
/\b(should|might|may|could|appears?\s+to)\s+be\s+(complete|done|working|functional)/i,
|
|
349
|
-
/\btheoretically\s+(complete|done|working|functional)/i,
|
|
350
|
-
/\b(assuming|provided|if)\s+(everything|it|this|that)\s+(works?|is\s+correct)/i,
|
|
351
|
-
// === SELF-CONTRADICTION PHRASES ===
|
|
352
|
-
/\b(done|complete|finished)\s+(but|except|however|although|though)/i,
|
|
353
|
-
/however[,\s].{0,50}?(hasn'?t|not\s+yet|still\s+needs?|pending|remains?|missing|broken|failing)/i,
|
|
354
|
-
/\bbut\s+.{0,30}?(not|hasn'?t|won'?t|can'?t|doesn'?t|isn'?t|wasn'?t)/i,
|
|
355
|
-
// === FUTURE TENSE / DEFERRED WORK ===
|
|
356
|
-
/will\s+(need\s+to|require|have\s+to)\s+(integrate|connect|deploy|complete|implement|test|fix)/i,
|
|
357
|
-
/\b(left\s+as|deferred|postponed|out\s+of\s+scope|for\s+later|in\s+a\s+future)/i,
|
|
358
|
-
/\b(after\s+(restart|reboot|redeploy)|takes?\s+effect\s+after|once\s+you)/i,
|
|
359
|
-
// === REMAINING WORK INDICATORS ===
|
|
360
|
-
/\b(remaining|outstanding|pending|leftover)\s+(tasks?|items?|work|issues?|steps?)/i,
|
|
361
|
-
/\b(more\s+to\s+do|still\s+have\s+to|yet\s+to\s+be\s+done)/i,
|
|
362
|
-
/\b(blocker|blocked\s+by|waiting\s+(for|on)|depends?\s+on)/i,
|
|
363
|
-
// === ERROR/FAILURE STATE ===
|
|
364
|
-
/\b(failing|broken|erroring)\s+(tests?|builds?|checks?|validations?)/i,
|
|
365
|
-
/\btests?\s+(are\s+)?(still\s+)?failing/i,
|
|
366
|
-
/\b(errors?|warnings?|issues?)\s+to\s+(address|fix|resolve)/i,
|
|
367
|
-
/\b(doesn'?t|isn'?t|not)\s+(work|working|functional|functioning)/i,
|
|
368
|
-
// === MANUAL STEPS REQUIRED ===
|
|
369
|
-
/\b(you('ll|\s+will)\s+need\s+to|manually\s+(run|configure|set|update)|requires?\s+user)/i,
|
|
370
|
-
/\b(run\s+this|execute\s+the\s+following|apply\s+the\s+migration)/i,
|
|
371
|
-
// === TODO/FIXME IN PROSE ===
|
|
372
|
-
/\b(todo|fixme|hack|xxx):\s/i,
|
|
373
|
-
/\b(need\s+to|should|must)\s+(add|implement|create|write|build|fix)\b/i,
|
|
374
|
-
// === SCOPE LIMITATIONS ===
|
|
375
|
-
/\b(didn'?t|did\s+not)\s+have\s+(time|chance|opportunity)/i,
|
|
376
|
-
/\b(beyond|outside)\s+(the\s+)?scope/i,
|
|
377
|
-
/\b(for\s+now|at\s+this\s+point|currently)\s*.{0,20}?(not|without|lacks?|missing)/i,
|
|
378
|
-
];
|
|
379
|
-
for (const pattern of incompletePatterns) {
|
|
380
|
-
if (pattern.test(response)) {
|
|
381
|
-
return true;
|
|
382
|
-
}
|
|
383
|
-
}
|
|
384
|
-
return false;
|
|
116
|
+
return verificationResponse.includes('TASK_FULLY_COMPLETE');
|
|
385
117
|
}
|
|
386
118
|
}
|
|
387
|
-
/**
|
|
388
|
-
* Create a singleton instance for the shell to use
|
|
389
|
-
*/
|
|
390
119
|
let detectorInstance = null;
|
|
391
120
|
export function getTaskCompletionDetector() {
|
|
392
121
|
if (!detectorInstance) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"taskCompletionDetector.js","sourceRoot":"","sources":["../../src/shell/taskCompletionDetector.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"taskCompletionDetector.js","sourceRoot":"","sources":["../../src/shell/taskCompletionDetector.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AA6BH,uBAAuB;AACvB,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC;IAC9B,WAAW;IACX,MAAM;IACN,UAAU;IACV,YAAY;IACZ,aAAa;IACb,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,YAAY;IACZ,SAAS;IACT,UAAU;CACX,CAAC,CAAC;AAEH,qEAAqE;AACrE,MAAM,CAAC,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC;IACjC,WAAW;IACX,MAAM;IACN,MAAM;IACN,MAAM;IACN,iBAAiB;IACjB,YAAY;IACZ,UAAU;IACV,cAAc;CACf,CAAC,CAAC;AAEH,MAAM,OAAO,sBAAsB;IACzB,WAAW,GAAmB,EAAE,CAAC;IACjC,eAAe,GAAa,EAAE,CAAC;IAC/B,aAAa,GAAa,EAAE,CAAC;IAC7B,kBAAkB,GAAG,CAAC,CAAC;IACvB,SAAS,GAAG,EAAE,OAAO,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC;IAEjD;QACE,IAAI,CAAC,KAAK,EAAE,CAAC;IACf,CAAC;IAED,KAAK;QACH,IAAI,CAAC,WAAW,GAAG,EAAE,CAAC;QACtB,IAAI,CAAC,eAAe,GAAG,EAAE,CAAC;QAC1B,IAAI,CAAC,aAAa,GAAG,EAAE,CAAC;QACxB,IAAI,CAAC,kBAAkB,GAAG,CAAC,CAAC;QAC5B,IAAI,CAAC,SAAS,GAAG,EAAE,OAAO,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC;IAChD,CAAC;IAED,cAAc,CAAC,QAAgB,EAAE,OAAgB,EAAE,SAAkB;QACnE,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC;YACpB,QAAQ;YACR,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;YACrB,OAAO;YACP,SAAS;SACV,CAAC,CAAC;QACH,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAElC,IAAI,IAAI,CAAC,WAAW,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YAClC,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC;QAClD,CAAC;QACD,IAAI,IAAI,CAAC,aAAa,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;YACnC,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;QACrD,CAAC;IACH,CAAC;IAED,cAAc,CAAC,QAAgB,EAAE,SAAmB;QAClD,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAEpC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,IAAI,CAAC,kBAAkB,EAAE,CAAC;QAC5B,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,kBAAkB,GAAG,CAAC,CAAC;YAC5B,IAAI,CAAC,aAAa,GAAG,SAAS,CAAC;QACjC,CAAC;QAED,IAAI,IAAI,CAAC,eAAe,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;YACrC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;QACzD,CAAC;IACH,CAAC;IAED,eAAe,CAAC,OAAe,EAAE,SAAiB;QAChD,IAAI,CAAC,SAAS,GAAG,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC;IAC1C,CAAC;IAED,iBAAiB,CAAC,eAAuB,EAAE,kBAA4B;QACrE,IAAI,CAAC,cAAc,CAAC,eAAe,EAAE,kBAAkB,CAAC,CAAC;QAEzD,MAAM,OAAO,GAAG,IAAI,CAAC,aAAa,CAAC,kBAAkB,CAAC,CAAC;QAEvD,+CAA+C;QAC/C,MAAM,UAAU,GAAG,eAAe,CAAC,QAAQ,CAAC,qBAAqB,CAAC,CAAC;QACnE,MAAM,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;QAE1C,OAAO,CAAC,oBAAoB,GAAG,UAAU,CAAC;QAE1C,OAAO;YACL,UAAU;YACV,UAAU;YACV,OAAO;YACP,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC,4BAA4B,CAAC,CAAC,CAAC,kBAAkB;YACtE,YAAY,EAAE,KAAK;YACnB,kBAAkB,EAAE,SAAS;SAC9B,CAAC;IACJ,CAAC;IAEO,aAAa,CAAC,SAAmB;QACvC,MAAM,mBAAmB,GACvB,SAAS,CAAC,MAAM,GAAG,CAAC,IAAI,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAEzE,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,SAAS,GAAG,KAAK,CAAC,CAAC;QACrF,MAAM,mBAAmB,GAAG,WAAW,CAAC,IAAI,CAC1C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,WAAW,IAAI,CAAC,CAAC,QAAQ,KAAK,MAAM,CAC3D,CAAC;QACF,MAAM,gBAAgB,GAAG,WAAW,CAAC,IAAI,CACvC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,MAAM,IAAI,CAAC,CAAC,QAAQ,KAAK,MAAM,CACtD,IAAI,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,CAAC;QAE3F,OAAO;YACL,uBAAuB,EAAE,SAAS,CAAC,MAAM;YACzC,mBAAmB;YACnB,gCAAgC,EAAE,IAAI,CAAC,kBAAkB;YACzD,mBAAmB;YACnB,gBAAgB;YAChB,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO;YACxC,kBAAkB,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS;YAC5C,oBAAoB,EAAE,CAAC;SACxB,CAAC;IACJ,CAAC;IAED,uBAAuB,CAAC,oBAA4B;QAClD,OAAO,oBAAoB,CAAC,QAAQ,CAAC,qBAAqB,CAAC,CAAC;IAC9D,CAAC;CACF;AAED,IAAI,gBAAgB,GAAkC,IAAI,CAAC;AAE3D,MAAM,UAAU,yBAAyB;IACvC,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,gBAAgB,GAAG,IAAI,sBAAsB,EAAE,CAAC;IAClD,CAAC;IACD,OAAO,gBAAgB,CAAC;AAC1B,CAAC;AAED,MAAM,UAAU,2BAA2B;IACzC,IAAI,gBAAgB,EAAE,CAAC;QACrB,gBAAgB,CAAC,KAAK,EAAE,CAAC;IAC3B,CAAC;AACH,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "erosolar-cli",
|
|
3
|
-
"version": "2.1.
|
|
3
|
+
"version": "2.1.193",
|
|
4
4
|
"description": "Unified AI agent framework for the command line - Multi-provider support with schema-driven tools, code intelligence, and transparent reasoning",
|
|
5
5
|
"main": "dist/bin/erosolar.js",
|
|
6
6
|
"type": "module",
|
|
@@ -1,140 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* AlphaZero Orchestrator - Complete Self-Improvement System
|
|
3
|
-
*
|
|
4
|
-
* This module provides the "best" AlphaZero-style self-improvement for erosolar-cli:
|
|
5
|
-
*
|
|
6
|
-
* 1. DUAL-RESPONSE GENERATION
|
|
7
|
-
* - Generate 2 responses with different temperatures
|
|
8
|
-
* - Use LLM to evaluate and pick the winner
|
|
9
|
-
* - Learn which approach works better for task types
|
|
10
|
-
*
|
|
11
|
-
* 2. SELF-CRITIQUE LOOP
|
|
12
|
-
* - Critique responses for issues
|
|
13
|
-
* - Iteratively improve until quality threshold met
|
|
14
|
-
* - Learn from improvements for future responses
|
|
15
|
-
*
|
|
16
|
-
* 3. PATTERN LEARNING
|
|
17
|
-
* - Track tool sequences that work well
|
|
18
|
-
* - Record failure patterns to avoid
|
|
19
|
-
* - Store successful prompts for reuse
|
|
20
|
-
*
|
|
21
|
-
* 4. SOURCE IMPROVEMENT
|
|
22
|
-
* - Detect patterns in erosolar-cli itself
|
|
23
|
-
* - Apply validated fixes with git safety
|
|
24
|
-
* - Rollback on any failure
|
|
25
|
-
*
|
|
26
|
-
* Principal Investigator: Bo Shang
|
|
27
|
-
*/
|
|
28
|
-
import type { LLMProvider, ConversationMessage } from './types.js';
|
|
29
|
-
import { type ToolCallRecord, type CritiqueIssue, type ResponseEvaluation } from './alphaZeroEngine.js';
|
|
30
|
-
import { getLearningSummary } from './learningPersistence.js';
|
|
31
|
-
export interface AlphaZeroConfig {
|
|
32
|
-
dualResponseEnabled: boolean;
|
|
33
|
-
temperatureConservative: number;
|
|
34
|
-
temperatureCreative: number;
|
|
35
|
-
selfCritiqueEnabled: boolean;
|
|
36
|
-
maxCritiqueIterations: number;
|
|
37
|
-
minQualityThreshold: number;
|
|
38
|
-
patternLearningEnabled: boolean;
|
|
39
|
-
autoApplyLearning: boolean;
|
|
40
|
-
sourceImprovementEnabled: boolean;
|
|
41
|
-
minValidationsForSourceFix: number;
|
|
42
|
-
}
|
|
43
|
-
export declare const DEFAULT_ALPHAZERO_CONFIG: AlphaZeroConfig;
|
|
44
|
-
export interface AlphaZeroSession {
|
|
45
|
-
sessionId: string;
|
|
46
|
-
startTime: string;
|
|
47
|
-
taskType: string;
|
|
48
|
-
userQuery: string;
|
|
49
|
-
dualResponseUsed: boolean;
|
|
50
|
-
critiqueIterations: number;
|
|
51
|
-
finalQuality: number;
|
|
52
|
-
toolsUsed: string[];
|
|
53
|
-
success: boolean;
|
|
54
|
-
improvementsLearned: number;
|
|
55
|
-
}
|
|
56
|
-
export declare class AlphaZeroOrchestrator {
|
|
57
|
-
private config;
|
|
58
|
-
private provider;
|
|
59
|
-
private state;
|
|
60
|
-
constructor(config?: Partial<AlphaZeroConfig>);
|
|
61
|
-
/**
|
|
62
|
-
* Set the LLM provider for evaluations
|
|
63
|
-
*/
|
|
64
|
-
setProvider(provider: LLMProvider): void;
|
|
65
|
-
/**
|
|
66
|
-
* Start tracking a new task
|
|
67
|
-
*/
|
|
68
|
-
startTask(userQuery: string): string;
|
|
69
|
-
/**
|
|
70
|
-
* Record a tool call in current session
|
|
71
|
-
*/
|
|
72
|
-
recordToolCall(toolName: string, success: boolean): void;
|
|
73
|
-
/**
|
|
74
|
-
* Complete the current task and learn from it
|
|
75
|
-
*/
|
|
76
|
-
completeTask(response: string, toolCalls: ToolCallRecord[], success: boolean): Promise<{
|
|
77
|
-
qualityScore: number;
|
|
78
|
-
learned: string[];
|
|
79
|
-
suggestions: string[];
|
|
80
|
-
}>;
|
|
81
|
-
/**
|
|
82
|
-
* Generate two responses and pick the better one
|
|
83
|
-
*/
|
|
84
|
-
generateDualResponse(messages: ConversationMessage[], userQuery: string): Promise<{
|
|
85
|
-
winningResponse: string;
|
|
86
|
-
evaluation: ResponseEvaluation | null;
|
|
87
|
-
used: boolean;
|
|
88
|
-
}>;
|
|
89
|
-
private responsesAreSimilar;
|
|
90
|
-
private evaluateResponses;
|
|
91
|
-
/**
|
|
92
|
-
* Critique and iteratively improve a response
|
|
93
|
-
*/
|
|
94
|
-
selfCritiqueLoop(response: string, userQuery: string, toolCalls: ToolCallRecord[]): Promise<{
|
|
95
|
-
improvedResponse: string;
|
|
96
|
-
iterations: number;
|
|
97
|
-
issues: CritiqueIssue[];
|
|
98
|
-
qualityImprovement: number;
|
|
99
|
-
}>;
|
|
100
|
-
private critiqueResponse;
|
|
101
|
-
private improveResponse;
|
|
102
|
-
/**
|
|
103
|
-
* Get recommended tools for a task type
|
|
104
|
-
*/
|
|
105
|
-
getRecommendedTools(taskType: string): string[] | null;
|
|
106
|
-
/**
|
|
107
|
-
* Get patterns to avoid for a task type
|
|
108
|
-
*/
|
|
109
|
-
getPatternsToAvoid(taskType: string): string[];
|
|
110
|
-
/**
|
|
111
|
-
* Get comprehensive learning summary
|
|
112
|
-
*/
|
|
113
|
-
getSummary(): {
|
|
114
|
-
sessions: number;
|
|
115
|
-
improvements: number;
|
|
116
|
-
avgQuality: number;
|
|
117
|
-
learningData: ReturnType<typeof getLearningSummary>;
|
|
118
|
-
};
|
|
119
|
-
/**
|
|
120
|
-
* Get current configuration
|
|
121
|
-
*/
|
|
122
|
-
getConfig(): AlphaZeroConfig;
|
|
123
|
-
/**
|
|
124
|
-
* Update configuration
|
|
125
|
-
*/
|
|
126
|
-
updateConfig(updates: Partial<AlphaZeroConfig>): void;
|
|
127
|
-
/**
|
|
128
|
-
* Get formatted status for display
|
|
129
|
-
*/
|
|
130
|
-
getStatus(): string;
|
|
131
|
-
}
|
|
132
|
-
/**
|
|
133
|
-
* Get the global AlphaZero orchestrator instance
|
|
134
|
-
*/
|
|
135
|
-
export declare function getAlphaZeroOrchestrator(): AlphaZeroOrchestrator;
|
|
136
|
-
/**
|
|
137
|
-
* Initialize with a provider
|
|
138
|
-
*/
|
|
139
|
-
export declare function initializeAlphaZero(provider: LLMProvider): AlphaZeroOrchestrator;
|
|
140
|
-
//# sourceMappingURL=alphaZeroOrchestrator.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"alphaZeroOrchestrator.d.ts","sourceRoot":"","sources":["../../src/core/alphaZeroOrchestrator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAMH,OAAO,KAAK,EAAE,WAAW,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAC;AACnE,OAAO,EAQL,KAAK,cAAc,EACnB,KAAK,aAAa,EAClB,KAAK,kBAAkB,EAGxB,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAML,kBAAkB,EACnB,MAAM,0BAA0B,CAAC;AAalC,MAAM,WAAW,eAAe;IAE9B,mBAAmB,EAAE,OAAO,CAAC;IAC7B,uBAAuB,EAAE,MAAM,CAAC;IAChC,mBAAmB,EAAE,MAAM,CAAC;IAG5B,mBAAmB,EAAE,OAAO,CAAC;IAC7B,qBAAqB,EAAE,MAAM,CAAC;IAC9B,mBAAmB,EAAE,MAAM,CAAC;IAG5B,sBAAsB,EAAE,OAAO,CAAC;IAChC,iBAAiB,EAAE,OAAO,CAAC;IAG3B,wBAAwB,EAAE,OAAO,CAAC;IAClC,0BAA0B,EAAE,MAAM,CAAC;CACpC;AAED,eAAO,MAAM,wBAAwB,EAAE,eActC,CAAC;AAMF,MAAM,WAAW,gBAAgB;IAC/B,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,OAAO,CAAC;IAC1B,kBAAkB,EAAE,MAAM,CAAC;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,OAAO,EAAE,OAAO,CAAC;IACjB,mBAAmB,EAAE,MAAM,CAAC;CAC7B;AA0CD,qBAAa,qBAAqB;IAChC,OAAO,CAAC,MAAM,CAAkB;IAChC,OAAO,CAAC,QAAQ,CAA4B;IAC5C,OAAO,CAAC,KAAK,CAAiB;gBAElB,MAAM,GAAE,OAAO,CAAC,eAAe,CAAM;IAKjD;;OAEG;IACH,WAAW,CAAC,QAAQ,EAAE,WAAW,GAAG,IAAI;IAIxC;;OAEG;IACH,SAAS,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM;IAoBpC;;OAEG;IACH,cAAc,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,GAAG,IAAI;IAMxD;;OAEG;IACG,YAAY,CAChB,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,cAAc,EAAE,EAC3B,OAAO,EAAE,OAAO,GACf,OAAO,CAAC;QACT,YAAY,EAAE,MAAM,CAAC;QACrB,OAAO,EAAE,MAAM,EAAE,CAAC;QAClB,WAAW,EAAE,MAAM,EAAE,CAAC;KACvB,CAAC;IAkFF;;OAEG;IACG,oBAAoB,CACxB,QAAQ,EAAE,mBAAmB,EAAE,EAC/B,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC;QACT,eAAe,EAAE,MAAM,CAAC;QACxB,UAAU,EAAE,kBAAkB,GAAG,IAAI,CAAC;QACtC,IAAI,EAAE,OAAO,CAAC;KACf,CAAC;IAyCF,OAAO,CAAC,mBAAmB;YASb,iBAAiB;IAuB/B;;OAEG;IACG,gBAAgB,CACpB,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,cAAc,EAAE,GAC1B,OAAO,CAAC;QACT,gBAAgB,EAAE,MAAM,CAAC;QACzB,UAAU,EAAE,MAAM,CAAC;QACnB,MAAM,EAAE,aAAa,EAAE,CAAC;QACxB,kBAAkB,EAAE,MAAM,CAAC;KAC5B,CAAC;YAuEY,gBAAgB;YAmBhB,eAAe;IAuB7B;;OAEG;IACH,mBAAmB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,IAAI;IAKtD;;OAEG;IACH,kBAAkB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,EAAE;IAK9C;;OAEG;IACH,UAAU,IAAI;QACZ,QAAQ,EAAE,MAAM,CAAC;QACjB,YAAY,EAAE,MAAM,CAAC;QACrB,UAAU,EAAE,MAAM,CAAC;QACnB,YAAY,EAAE,UAAU,CAAC,OAAO,kBAAkB,CAAC,CAAC;KACrD;IAaD;;OAEG;IACH,SAAS,IAAI,eAAe;IAI5B;;OAEG;IACH,YAAY,CAAC,OAAO,EAAE,OAAO,CAAC,eAAe,CAAC,GAAG,IAAI;IAIrD;;OAEG;IACH,SAAS,IAAI,MAAM;CA4BpB;AAQD;;GAEG;AACH,wBAAgB,wBAAwB,IAAI,qBAAqB,CAKhE;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,WAAW,GAAG,qBAAqB,CAIhF"}
|