sentinel-agentos 0.3.6 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/dashboard.html +175 -0
- package/package.json +3 -2
- package/LICENSE +0 -21
- package/README.md +0 -1797
- package/dist/adapters/evaluation-bridge.d.ts +0 -78
- package/dist/adapters/evaluation-bridge.d.ts.map +0 -1
- package/dist/adapters/evaluation-bridge.js +0 -273
- package/dist/adapters/evaluation-bridge.js.map +0 -1
- package/dist/adapters/memory-bridge.d.ts +0 -110
- package/dist/adapters/memory-bridge.d.ts.map +0 -1
- package/dist/adapters/memory-bridge.js +0 -316
- package/dist/adapters/memory-bridge.js.map +0 -1
- package/dist/adapters/migrate.d.ts +0 -2
- package/dist/adapters/migrate.d.ts.map +0 -1
- package/dist/adapters/migrate.js +0 -63
- package/dist/adapters/migrate.js.map +0 -1
- package/dist/api.d.ts +0 -151
- package/dist/api.d.ts.map +0 -1
- package/dist/api.js +0 -179
- package/dist/api.js.map +0 -1
- package/dist/cli.d.ts +0 -16
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js +0 -350
- package/dist/cli.js.map +0 -1
- package/dist/core.d.ts +0 -151
- package/dist/core.d.ts.map +0 -1
- package/dist/core.js +0 -341
- package/dist/core.js.map +0 -1
- package/dist/evaluator/exec-evaluator.d.ts +0 -102
- package/dist/evaluator/exec-evaluator.d.ts.map +0 -1
- package/dist/evaluator/exec-evaluator.js +0 -266
- package/dist/evaluator/exec-evaluator.js.map +0 -1
- package/dist/evaluator/feedback.d.ts +0 -57
- package/dist/evaluator/feedback.d.ts.map +0 -1
- package/dist/evaluator/feedback.js +0 -225
- package/dist/evaluator/feedback.js.map +0 -1
- package/dist/evaluator/profiler.d.ts +0 -55
- package/dist/evaluator/profiler.d.ts.map +0 -1
- package/dist/evaluator/profiler.js +0 -117
- package/dist/evaluator/profiler.js.map +0 -1
- package/dist/guard/audit-log.d.ts +0 -47
- package/dist/guard/audit-log.d.ts.map +0 -1
- package/dist/guard/audit-log.js +0 -199
- package/dist/guard/audit-log.js.map +0 -1
- package/dist/guard/container-sandbox.d.ts +0 -25
- package/dist/guard/container-sandbox.d.ts.map +0 -1
- package/dist/guard/container-sandbox.js +0 -145
- package/dist/guard/container-sandbox.js.map +0 -1
- package/dist/guard/risk-gate.d.ts +0 -101
- package/dist/guard/risk-gate.d.ts.map +0 -1
- package/dist/guard/risk-gate.js +0 -200
- package/dist/guard/risk-gate.js.map +0 -1
- package/dist/guard/sandbox.d.ts +0 -112
- package/dist/guard/sandbox.d.ts.map +0 -1
- package/dist/guard/sandbox.js +0 -379
- package/dist/guard/sandbox.js.map +0 -1
- package/dist/guard/schema-gate.d.ts +0 -90
- package/dist/guard/schema-gate.d.ts.map +0 -1
- package/dist/guard/schema-gate.js +0 -452
- package/dist/guard/schema-gate.js.map +0 -1
- package/dist/guard/snapshot-verify.d.ts +0 -111
- package/dist/guard/snapshot-verify.d.ts.map +0 -1
- package/dist/guard/snapshot-verify.js +0 -571
- package/dist/guard/snapshot-verify.js.map +0 -1
- package/dist/index.d.ts +0 -28
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js +0 -59
- package/dist/index.js.map +0 -1
- package/dist/memory/episodic.d.ts +0 -76
- package/dist/memory/episodic.d.ts.map +0 -1
- package/dist/memory/episodic.js +0 -289
- package/dist/memory/episodic.js.map +0 -1
- package/dist/memory/semantic.d.ts +0 -68
- package/dist/memory/semantic.d.ts.map +0 -1
- package/dist/memory/semantic.js +0 -299
- package/dist/memory/semantic.js.map +0 -1
- package/dist/memory/working.d.ts +0 -53
- package/dist/memory/working.d.ts.map +0 -1
- package/dist/memory/working.js +0 -166
- package/dist/memory/working.js.map +0 -1
- package/dist/middleware/openclaw.d.ts +0 -45
- package/dist/middleware/openclaw.d.ts.map +0 -1
- package/dist/middleware/openclaw.js +0 -95
- package/dist/middleware/openclaw.js.map +0 -1
- package/dist/middleware/wrapper.d.ts +0 -54
- package/dist/middleware/wrapper.d.ts.map +0 -1
- package/dist/middleware/wrapper.js +0 -155
- package/dist/middleware/wrapper.js.map +0 -1
- package/dist/server.d.ts +0 -45
- package/dist/server.d.ts.map +0 -1
- package/dist/server.js +0 -246
- package/dist/server.js.map +0 -1
- package/dist/types/index.d.ts +0 -228
- package/dist/types/index.d.ts.map +0 -1
- package/dist/types/index.js +0 -23
- package/dist/types/index.js.map +0 -1
- package/scripts/sentinel-light.js +0 -234
|
@@ -1,266 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.PostExecEvaluator = exports.RuntimeEvaluator = exports.PreExecEvaluator = void 0;
|
|
4
|
-
/**
|
|
5
|
-
* PreExecEvaluator — captures metrics before tool execution.
|
|
6
|
-
*
|
|
7
|
-
* Watches the Guard layer output and WorkingMemory context
|
|
8
|
-
* to score parameter quality, context utilization, and risk.
|
|
9
|
-
*/
|
|
10
|
-
class PreExecEvaluator {
|
|
11
|
-
schemaGate;
|
|
12
|
-
riskGate;
|
|
13
|
-
workingMemory;
|
|
14
|
-
constructor(schemaGate, riskGate, workingMemory) {
|
|
15
|
-
this.schemaGate = schemaGate;
|
|
16
|
-
this.riskGate = riskGate;
|
|
17
|
-
this.workingMemory = workingMemory;
|
|
18
|
-
}
|
|
19
|
-
/**
|
|
20
|
-
* Evaluate a tool call before execution.
|
|
21
|
-
*/
|
|
22
|
-
evaluate(toolName, parameters) {
|
|
23
|
-
// 1. Schema check
|
|
24
|
-
const schemaCheck = this.schemaGate.check(toolName, parameters);
|
|
25
|
-
// 2. Risk assessment
|
|
26
|
-
const riskScore = this.riskGate.evaluate(toolName, parameters);
|
|
27
|
-
// 3. Parameter quality: does the agent use context-aware params?
|
|
28
|
-
const paramQuality = this.evaluateParamQuality(toolName, parameters);
|
|
29
|
-
// 4. Context utilization: is the agent leveraging WorkingMemory?
|
|
30
|
-
const contextUtilization = this.evaluateContextUtilization(toolName, parameters);
|
|
31
|
-
return {
|
|
32
|
-
timestamp: Date.now(),
|
|
33
|
-
toolName,
|
|
34
|
-
schemaCheck,
|
|
35
|
-
riskScore,
|
|
36
|
-
paramQuality,
|
|
37
|
-
contextUtilization,
|
|
38
|
-
};
|
|
39
|
-
}
|
|
40
|
-
/**
|
|
41
|
-
* Score parameter quality based on contextual awareness.
|
|
42
|
-
*
|
|
43
|
-
* High quality: path contains session-relevant project paths,
|
|
44
|
-
* content references open files, etc.
|
|
45
|
-
* Low quality: bare strings, random-looking paths, missing files.
|
|
46
|
-
*/
|
|
47
|
-
evaluateParamQuality(_toolName, parameters) {
|
|
48
|
-
let score = 0.5; // neutral start
|
|
49
|
-
const observations = [];
|
|
50
|
-
// Check if path references an open file
|
|
51
|
-
if (typeof parameters['path'] === 'string') {
|
|
52
|
-
const path = parameters['path'];
|
|
53
|
-
if (this.workingMemory.openFiles.some((f) => path.includes(f))) {
|
|
54
|
-
score += 0.3;
|
|
55
|
-
observations.push('Path references an open file');
|
|
56
|
-
}
|
|
57
|
-
if (path.startsWith('/') || path.match(/^[A-Z]:\\/)) {
|
|
58
|
-
observations.push('Absolute path used');
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
// Check if content parameter is meaningful
|
|
62
|
-
if (typeof parameters['content'] === 'string') {
|
|
63
|
-
const content = parameters['content'];
|
|
64
|
-
if (content.length > 20) {
|
|
65
|
-
score = Math.min(1.0, score + 0.1);
|
|
66
|
-
}
|
|
67
|
-
if (content.length === 0) {
|
|
68
|
-
score -= 0.2;
|
|
69
|
-
observations.push('Empty content — possible error');
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
// Check for file paths in multiple parameters
|
|
73
|
-
const filePaths = Object.values(parameters).filter((v) => typeof v === 'string' && (v.includes('.ts') || v.includes('.js') || v.includes('.json')));
|
|
74
|
-
if (filePaths.length > 1) {
|
|
75
|
-
score = Math.min(1.0, score + 0.1);
|
|
76
|
-
observations.push('Multiple file references — coordinated operation');
|
|
77
|
-
}
|
|
78
|
-
return {
|
|
79
|
-
score: Math.round(Math.max(0, Math.min(1, score)) * 100) / 100,
|
|
80
|
-
observations,
|
|
81
|
-
};
|
|
82
|
-
}
|
|
83
|
-
/**
|
|
84
|
-
* Score how well the agent uses stored context.
|
|
85
|
-
*/
|
|
86
|
-
evaluateContextUtilization(_toolName, parameters) {
|
|
87
|
-
let score = 0.4;
|
|
88
|
-
const patterns = [];
|
|
89
|
-
// Check if agent references recent messages
|
|
90
|
-
if (this.workingMemory.recentMessages.length > 0) {
|
|
91
|
-
score += 0.1;
|
|
92
|
-
patterns.push(`${this.workingMemory.recentMessages.length} recent messages available`);
|
|
93
|
-
}
|
|
94
|
-
// Check if agent uses cached tool results
|
|
95
|
-
const cachedCount = this.workingMemory.recentToolResults.size;
|
|
96
|
-
if (cachedCount > 0) {
|
|
97
|
-
score += 0.1;
|
|
98
|
-
patterns.push(`${cachedCount} cached results available`);
|
|
99
|
-
}
|
|
100
|
-
// Check parameter values for context patterns
|
|
101
|
-
const allValues = Object.values(parameters).map(String).join(' ');
|
|
102
|
-
for (const msg of this.workingMemory.recentMessages.slice(-3)) {
|
|
103
|
-
const words = msg.content.split(/\s+/).filter((w) => w.length > 3);
|
|
104
|
-
for (const word of words.slice(0, 5)) {
|
|
105
|
-
if (allValues.includes(word)) {
|
|
106
|
-
score += 0.1;
|
|
107
|
-
patterns.push(`Parameter references recent context: "${word}"`);
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
return {
|
|
112
|
-
score: Math.round(Math.min(1.0, score) * 100) / 100,
|
|
113
|
-
patterns,
|
|
114
|
-
};
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
exports.PreExecEvaluator = PreExecEvaluator;
|
|
118
|
-
/**
|
|
119
|
-
* RuntimeEvaluator — captures metrics during execution.
|
|
120
|
-
*
|
|
121
|
-
* Tracks retries, self-corrections, timeouts, and
|
|
122
|
-
* whether the agent selected the right tool for the job.
|
|
123
|
-
*/
|
|
124
|
-
class RuntimeEvaluator {
|
|
125
|
-
/** Historical tool selection patterns — toolName -> successful scenario count */
|
|
126
|
-
toolHistory = new Map();
|
|
127
|
-
/**
|
|
128
|
-
* Evaluate a completed tool execution.
|
|
129
|
-
*/
|
|
130
|
-
evaluate(options) {
|
|
131
|
-
const durationMs = options.endTime - options.startTime;
|
|
132
|
-
const toolSuccess = !options.hadTimeout && options.toolResult !== undefined;
|
|
133
|
-
// Tool selection accuracy: compare against historical patterns
|
|
134
|
-
let toolSelectionMatch;
|
|
135
|
-
if (options.expectedTool) {
|
|
136
|
-
// Direct comparison if expectedTool is provided
|
|
137
|
-
toolSelectionMatch = options.toolName === options.expectedTool;
|
|
138
|
-
}
|
|
139
|
-
else {
|
|
140
|
-
// Auto-detect: is this tool historically successful for similar params?
|
|
141
|
-
const history = this.toolHistory.get(options.toolName);
|
|
142
|
-
if (history) {
|
|
143
|
-
const historicalSuccessRate = history.calls > 0
|
|
144
|
-
? history.successes / history.calls
|
|
145
|
-
: 0;
|
|
146
|
-
// If this tool has >70% historical success, consider it a "good" selection
|
|
147
|
-
toolSelectionMatch = historicalSuccessRate > 0.7 ? true : undefined;
|
|
148
|
-
}
|
|
149
|
-
}
|
|
150
|
-
// Record this call in history
|
|
151
|
-
this.recordToolCall(options.toolName, toolSuccess);
|
|
152
|
-
// Adaptive score: composite of retry rate, timeout, correction
|
|
153
|
-
let adaptiveScore = 1.0;
|
|
154
|
-
adaptiveScore -= options.retryCount * 0.15; // Each retry costs 0.15
|
|
155
|
-
if (options.hadTimeout)
|
|
156
|
-
adaptiveScore -= 0.5;
|
|
157
|
-
if (options.wasSelfCorrected)
|
|
158
|
-
adaptiveScore += 0.2; // Self-correction is good!
|
|
159
|
-
adaptiveScore = Math.max(0, Math.min(1, adaptiveScore));
|
|
160
|
-
return {
|
|
161
|
-
retryCount: options.retryCount,
|
|
162
|
-
selfCorrected: options.wasSelfCorrected,
|
|
163
|
-
hadTimeout: options.hadTimeout,
|
|
164
|
-
toolSuccess,
|
|
165
|
-
toolSelectionMatch,
|
|
166
|
-
adaptiveScore: Math.round(adaptiveScore * 100) / 100,
|
|
167
|
-
durationMs,
|
|
168
|
-
};
|
|
169
|
-
}
|
|
170
|
-
/** Record a tool call in the history tracker */
|
|
171
|
-
recordToolCall(toolName, success) {
|
|
172
|
-
const existing = this.toolHistory.get(toolName);
|
|
173
|
-
if (existing) {
|
|
174
|
-
existing.calls++;
|
|
175
|
-
if (success)
|
|
176
|
-
existing.successes++;
|
|
177
|
-
}
|
|
178
|
-
else {
|
|
179
|
-
this.toolHistory.set(toolName, { calls: 1, successes: success ? 1 : 0 });
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
/** Get tool selection accuracy statistics */
|
|
183
|
-
getToolAccuracy() {
|
|
184
|
-
const result = {};
|
|
185
|
-
for (const [tool, history] of this.toolHistory) {
|
|
186
|
-
result[tool] = {
|
|
187
|
-
calls: history.calls,
|
|
188
|
-
successRate: history.calls > 0
|
|
189
|
-
? Math.round((history.successes / history.calls) * 100) / 100
|
|
190
|
-
: 0,
|
|
191
|
-
};
|
|
192
|
-
}
|
|
193
|
-
return result;
|
|
194
|
-
}
|
|
195
|
-
}
|
|
196
|
-
exports.RuntimeEvaluator = RuntimeEvaluator;
|
|
197
|
-
/**
|
|
198
|
-
* PostExecEvaluator — captures metrics after execution.
|
|
199
|
-
*
|
|
200
|
-
* Scores verify results, user acceptance patterns,
|
|
201
|
-
* and checks if the agent actually used its own result later.
|
|
202
|
-
*/
|
|
203
|
-
class PostExecEvaluator {
|
|
204
|
-
/** Track result references for utilization scoring */
|
|
205
|
-
resultReferenceTracker = new Map();
|
|
206
|
-
/**
|
|
207
|
-
* Evaluate post-execution outcomes.
|
|
208
|
-
*/
|
|
209
|
-
evaluate(options) {
|
|
210
|
-
// Verify score
|
|
211
|
-
const verifyScore = options.verifyChecks > 0
|
|
212
|
-
? 1 - (options.verifyFailures / options.verifyChecks)
|
|
213
|
-
: 1;
|
|
214
|
-
// User acceptance
|
|
215
|
-
const acceptance = options.userAccepted ? 1.0 : options.userProvidedEdit ? 0.3 : 0.7;
|
|
216
|
-
// Composite outcome score
|
|
217
|
-
const outcomeScore = (verifyScore * 0.3 +
|
|
218
|
-
acceptance * 0.4 +
|
|
219
|
-
(options.resultWasUsed ? 0.3 : 0));
|
|
220
|
-
// Overall health flag
|
|
221
|
-
const healthy = verifyScore > 0.8 && acceptance > 0.5;
|
|
222
|
-
return {
|
|
223
|
-
verifyPassed: options.verifyPassed,
|
|
224
|
-
verifyScore: Math.round(verifyScore * 100) / 100,
|
|
225
|
-
userAccepted: options.userAccepted,
|
|
226
|
-
userEditRate: options.userProvidedEdit ? 1 : 0,
|
|
227
|
-
resultUtilized: options.resultWasUsed,
|
|
228
|
-
outcomeScore: Math.round(outcomeScore * 100) / 100,
|
|
229
|
-
healthy,
|
|
230
|
-
diffLinesChanged: options.diffLinesChanged,
|
|
231
|
-
};
|
|
232
|
-
}
|
|
233
|
-
/**
|
|
234
|
-
* Track a tool result for later utilization detection.
|
|
235
|
-
* Call this after each tool execution.
|
|
236
|
-
*/
|
|
237
|
-
trackResult(operationId, result) {
|
|
238
|
-
this.resultReferenceTracker.set(operationId, { result, referenced: false });
|
|
239
|
-
}
|
|
240
|
-
/**
|
|
241
|
-
* Mark a previously-tracked result as referenced (used by the agent later).
|
|
242
|
-
*/
|
|
243
|
-
markResultReferenced(operationId) {
|
|
244
|
-
const entry = this.resultReferenceTracker.get(operationId);
|
|
245
|
-
if (entry)
|
|
246
|
-
entry.referenced = true;
|
|
247
|
-
}
|
|
248
|
-
/**
|
|
249
|
-
* Check if a result has been utilized by the agent.
|
|
250
|
-
*/
|
|
251
|
-
isResultReferenced(operationId) {
|
|
252
|
-
return this.resultReferenceTracker.get(operationId)?.referenced ?? false;
|
|
253
|
-
}
|
|
254
|
-
/**
|
|
255
|
-
* Get overall result utilization rate.
|
|
256
|
-
*/
|
|
257
|
-
getUtilizationRate() {
|
|
258
|
-
const entries = Array.from(this.resultReferenceTracker.values());
|
|
259
|
-
if (entries.length === 0)
|
|
260
|
-
return 0;
|
|
261
|
-
const referenced = entries.filter((e) => e.referenced).length;
|
|
262
|
-
return Math.round((referenced / entries.length) * 100) / 100;
|
|
263
|
-
}
|
|
264
|
-
}
|
|
265
|
-
exports.PostExecEvaluator = PostExecEvaluator;
|
|
266
|
-
//# sourceMappingURL=exec-evaluator.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"exec-evaluator.js","sourceRoot":"","sources":["../../src/evaluator/exec-evaluator.ts"],"names":[],"mappings":";;;AAWA;;;;;GAKG;AACH,MAAa,gBAAgB;IACnB,UAAU,CAAa;IACvB,QAAQ,CAAW;IACnB,aAAa,CAAgB;IAErC,YACE,UAAsB,EACtB,QAAkB,EAClB,aAA4B;QAE5B,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;IACrC,CAAC;IAED;;OAEG;IACH,QAAQ,CACN,QAAgB,EAChB,UAAmC;QAEnC,kBAAkB;QAClB,MAAM,WAAW,GAAgB,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAE7E,qBAAqB;QACrB,MAAM,SAAS,GAAc,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAE1E,iEAAiE;QACjE,MAAM,YAAY,GAAG,IAAI,CAAC,oBAAoB,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAErE,iEAAiE;QACjE,MAAM,kBAAkB,GAAG,IAAI,CAAC,0BAA0B,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAEjF,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;YACrB,QAAQ;YACR,WAAW;YACX,SAAS;YACT,YAAY;YACZ,kBAAkB;SACnB,CAAC;IACJ,CAAC;IAED;;;;;;OAMG;IACK,oBAAoB,CAC1B,SAAiB,EACjB,UAAmC;QAEnC,IAAI,KAAK,GAAG,GAAG,CAAC,CAAC,gBAAgB;QACjC,MAAM,YAAY,GAAa,EAAE,CAAC;QAElC,wCAAwC;QACxC,IAAI,OAAO,UAAU,CAAC,MAAM,CAAC,KAAK,QAAQ,EAAE,CAAC;YAC3C,MAAM,IAAI,GAAG,UAAU,CAAC,MAAM,CAAW,CAAC;YAC1C,IAAI,IAAI,CAAC,aAAa,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC/D,KAAK,IAAI,GAAG,CAAC;gBACb,YAAY,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;YACpD,CAAC;YACD,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,EAAE,CAAC;gBACpD,YAAY,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;YAC1C,CAAC;QACH,CAAC;QAED,2CAA2C;QAC3C,IAAI,OAAO,UAAU,CAAC,SAAS,CAAC,KAAK,QAAQ,EAAE,CAAC;YAC9C,MAAM,OAAO,GAAG,UAAU,CAAC,SAAS,CAAW,CAAC;YAChD,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;gBACxB,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,GAAG,GAAG,CAAC,CAAC;YACrC,CAAC;YACD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACzB,KAAK,IAAI,GAAG,CAAC;gBACb,YAAY,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;YACtD,CAAC;QACH,CAAC;QAED,8CAA8C;QAC9C,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,MAAM,CAChD,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAChG,CAAC;QAEF,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,GAAG,GAAG,CAAC,CAAC;YACnC,YAAY,CAAC,IAAI,CAAC,kDAAkD,CAAC,CAAC;QACxE,CAAC;QAED,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG;YAC9D,YAAY;SACb,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,0BAA0B,CAChC,SAAiB,EACjB,UAAmC;QAEnC,IAAI,KAAK,GAAG,GAAG,CAAC;QAChB,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,4CAA4C;QAC5C,IAAI,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjD,KAAK,IAAI,GAAG,CAAC;YACb,QAAQ,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,MAAM,4BAA4B,CAAC,CAAC;QACzF,CAAC;QAED,0CAA0C;QAC1C,MAAM,WAAW,GAAG,IAAI,CAAC,aAAa,CAAC,iBAAiB,CAAC,IAAI,CAAC;QAC9D,IAAI,WAAW,GAAG,CAAC,EAAE,CAAC;YACpB,KAAK,IAAI,GAAG,CAAC;YACb,QAAQ,CAAC,IAAI,CAAC,GAAG,WAAW,2BAA2B,CAAC,CAAC;QAC3D,CAAC;QAED,8CAA8C;QAC9C,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAClE,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC9D,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YACnE,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;gBACrC,IAAI,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;oBAC7B,KAAK,IAAI,GAAG,CAAC;oBACb,QAAQ,CAAC,IAAI,CAAC,yCAAyC,IAAI,GAAG,CAAC,CAAC;gBAClE,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG;YACnD,QAAQ;SACT,CAAC;IACJ,CAAC;CACF;AA1ID,4CA0IC;AAED;;;;;GAKG;AACH,MAAa,gBAAgB;IAC3B,iFAAiF;IACzE,WAAW,GAAsD,IAAI,GAAG,EAAE,CAAC;IAEnF;;OAEG;IACH,QAAQ,CAAC,OASR;QACC,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,GAAG,OAAO,CAAC,SAAS,CAAC;QACvD,MAAM,WAAW,GAAG,CAAC,OAAO,CAAC,UAAU,IAAI,OAAO,CAAC,UAAU,KAAK,SAAS,CAAC;QAE5E,+DAA+D;QAC/D,IAAI,kBAAuC,CAAC;QAC5C,IAAI,OAAO,CAAC,YAAY,EAAE,CAAC;YACzB,gDAAgD;YAChD,kBAAkB,GAAG,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,YAAY,CAAC;QACjE,CAAC;aAAM,CAAC;YACN,wEAAwE;YACxE,MAAM,OAAO,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;YACvD,IAAI,OAAO,EAAE,CAAC;gBACZ,MAAM,qBAAqB,GAAG,OAAO,CAAC,KAAK,GAAG,CAAC;oBAC7C,CAAC,CAAC,OAAO,CAAC,SAAS,GAAG,OAAO,CAAC,KAAK;oBACnC,CAAC,CAAC,CAAC,CAAC;gBACN,2EAA2E;gBAC3E,kBAAkB,GAAG,qBAAqB,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC;YACtE,CAAC;QACH,CAAC;QAED,8BAA8B;QAC9B,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC;QAEnD,+DAA+D;QAC/D,IAAI,aAAa,GAAG,GAAG,CAAC;QACxB,aAAa,IAAI,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC,CAAC,wBAAwB;QACpE,IAAI,OAAO,CAAC,UAAU;YAAE,aAAa,IAAI,GAAG,CAAC;QAC7C,IAAI,OAAO,CAAC,gBAAgB;YAAE,aAAa,IAAI,GAAG,CAAC,CAAC,2BAA2B;QAC/E,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC,CAAC;QAExD,OAAO;YACL,UAAU,EAAE,OAAO,CAAC,UAAU;YAC9B,aAAa,EAAE,OAAO,CAAC,gBAAgB;YACvC,UAAU,EAAE,OAAO,CAAC,UAAU;YAC9B,WAAW;YACX,kBAAkB;YAClB,aAAa,EAAE,IAAI,CAAC,KAAK,CAAC,aAAa,GAAG,GAAG,CAAC,GAAG,GAAG;YACpD,UAAU;SACX,CAAC;IACJ,CAAC;IAED,gDAAgD;IACxC,cAAc,CAAC,QAAgB,EAAE,OAAgB;QACvD,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAChD,IAAI,QAAQ,EAAE,CAAC;YACb,QAAQ,CAAC,KAAK,EAAE,CAAC;YACjB,IAAI,OAAO;gBAAE,QAAQ,CAAC,SAAS,EAAE,CAAC;QACpC,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QAC3E,CAAC;IACH,CAAC;IAED,6CAA6C;IAC7C,eAAe;QACb,MAAM,MAAM,GAA2D,EAAE,CAAC;QAC1E,KAAK,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YAC/C,MAAM,CAAC,IAAI,CAAC,GAAG;gBACb,KAAK,EAAE,OAAO,CAAC,KAAK;gBACpB,WAAW,EAAE,OAAO,CAAC,KAAK,GAAG,CAAC;oBAC5B,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG;oBAC7D,CAAC,CAAC,CAAC;aACN,CAAC;QACJ,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;CACF;AAlFD,4CAkFC;AAED;;;;;GAKG;AACH,MAAa,iBAAiB;IAC5B,sDAAsD;IAC9C,sBAAsB,GAA0D,IAAI,GAAG,EAAE,CAAC;IAElG;;OAEG;IACH,QAAQ,CAAC,OAQR;QACC,eAAe;QACf,MAAM,WAAW,GAAG,OAAO,CAAC,YAAY,GAAG,CAAC;YAC1C,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,cAAc,GAAG,OAAO,CAAC,YAAY,CAAC;YACrD,CAAC,CAAC,CAAC,CAAC;QAEN,kBAAkB;QAClB,MAAM,UAAU,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;QAErF,0BAA0B;QAC1B,MAAM,YAAY,GAAG,CACnB,WAAW,GAAG,GAAG;YACjB,UAAU,GAAG,GAAG;YAChB,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAClC,CAAC;QAEF,sBAAsB;QACtB,MAAM,OAAO,GAAG,WAAW,GAAG,GAAG,IAAI,UAAU,GAAG,GAAG,CAAC;QAEtD,OAAO;YACL,YAAY,EAAE,OAAO,CAAC,YAAY;YAClC,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW,GAAG,GAAG,CAAC,GAAG,GAAG;YAChD,YAAY,EAAE,OAAO,CAAC,YAAY;YAClC,YAAY,EAAE,OAAO,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9C,cAAc,EAAE,OAAO,CAAC,aAAa;YACrC,YAAY,EAAE,IAAI,CAAC,KAAK,CAAC,YAAY,GAAG,GAAG,CAAC,GAAG,GAAG;YAClD,OAAO;YACP,gBAAgB,EAAE,OAAO,CAAC,gBAAgB;SAC3C,CAAC;IACJ,CAAC;IAED;;;OAGG;IACH,WAAW,CAAC,WAAmB,EAAE,MAAe;QAC9C,IAAI,CAAC,sBAAsB,CAAC,GAAG,CAAC,WAAW,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,CAAC;IAC9E,CAAC;IAED;;OAEG;IACH,oBAAoB,CAAC,WAAmB;QACtC,MAAM,KAAK,GAAG,IAAI,CAAC,sBAAsB,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;QAC3D,IAAI,KAAK;YAAE,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC;IACrC,CAAC;IAED;;OAEG;IACH,kBAAkB,CAAC,WAAmB;QACpC,OAAO,IAAI,CAAC,sBAAsB,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,UAAU,IAAI,KAAK,CAAC;IAC3E,CAAC;IAED;;OAEG;IACH,kBAAkB;QAChB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,sBAAsB,CAAC,MAAM,EAAE,CAAC,CAAC;QACjE,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC;QACnC,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC;QAC9D,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;IAC/D,CAAC;CACF;AA9ED,8CA8EC"}
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
import { ImplicitFeedback, SignalType } from '../types';
|
|
2
|
-
import { AuditEntry } from '../types';
|
|
3
|
-
/**
|
|
4
|
-
* ImplicitFeedbackEngine — captures and interprets implicit user signals.
|
|
5
|
-
*
|
|
6
|
-
* Instead of relying on explicit "thumbs up/down", this engine
|
|
7
|
-
* detects subtle signals from user behavior to infer satisfaction.
|
|
8
|
-
*
|
|
9
|
-
* Two modes:
|
|
10
|
-
* - Manual: caller provides explicit signals via record()
|
|
11
|
-
* - Auto-detect: scans audit log to infer signals (results unused,
|
|
12
|
-
* results modified later, repeated same tool, verify failures)
|
|
13
|
-
*
|
|
14
|
-
* This is the key differentiator of AgentOS: it learns from
|
|
15
|
-
* what users DO, not just what they SAY.
|
|
16
|
-
*/
|
|
17
|
-
export declare class ImplicitFeedbackEngine {
|
|
18
|
-
private feedbackLog;
|
|
19
|
-
/**
|
|
20
|
-
* Record an implicit feedback signal.
|
|
21
|
-
*/
|
|
22
|
-
record(signal: SignalType, sessionId: string, operationId?: string, confidence?: number, source?: string): ImplicitFeedback;
|
|
23
|
-
/**
|
|
24
|
-
* Scan the audit log and auto-detect implicit feedback signals.
|
|
25
|
-
*
|
|
26
|
-
* Detection rules (conservative — low confidence to avoid false positives):
|
|
27
|
-
* - verify FAIL or WARN → user_provided_correction (agent made mistakes)
|
|
28
|
-
* - same tool+params called within 60s → user_repeated_instruction (low confidence, noisy)
|
|
29
|
-
* - high risk operations that were retried and eventually passed → agent_self_corrected
|
|
30
|
-
*
|
|
31
|
-
* Note: auto-detected signals carry lower confidence than explicit user feedback.
|
|
32
|
-
* They serve as supplementary data, not primary quality indicators.
|
|
33
|
-
*
|
|
34
|
-
* @param entries Recent audit entries to analyze
|
|
35
|
-
* @param sessionId Session to attribute signals to
|
|
36
|
-
* @returns Number of signals auto-detected
|
|
37
|
-
*/
|
|
38
|
-
autoDetect(entries: AuditEntry[], sessionId: string): number;
|
|
39
|
-
private getSignalStrength;
|
|
40
|
-
getSatisfactionScore(sessionId?: string, recentHours?: number): number;
|
|
41
|
-
query(filter?: {
|
|
42
|
-
signal?: SignalType;
|
|
43
|
-
sessionId?: string;
|
|
44
|
-
minStrength?: number;
|
|
45
|
-
maxStrength?: number;
|
|
46
|
-
since?: number;
|
|
47
|
-
limit?: number;
|
|
48
|
-
}): ImplicitFeedback[];
|
|
49
|
-
stats(): {
|
|
50
|
-
totalSignals: number;
|
|
51
|
-
positiveSignals: number;
|
|
52
|
-
negativeSignals: number;
|
|
53
|
-
averageStrength: number;
|
|
54
|
-
mostCommonSignal: SignalType | null;
|
|
55
|
-
};
|
|
56
|
-
}
|
|
57
|
-
//# sourceMappingURL=feedback.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"feedback.d.ts","sourceRoot":"","sources":["../../src/evaluator/feedback.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AACxD,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAUtC;;;;;;;;;;;;;GAaG;AACH,qBAAa,sBAAsB;IACjC,OAAO,CAAC,WAAW,CAA0B;IAE7C;;OAEG;IACH,MAAM,CACJ,MAAM,EAAE,UAAU,EAClB,SAAS,EAAE,MAAM,EACjB,WAAW,CAAC,EAAE,MAAM,EACpB,UAAU,SAAM,EAChB,MAAM,SAAkB,GACvB,gBAAgB;IAsBnB;;;;;;;;;;;;;;OAcG;IACH,UAAU,CAAC,OAAO,EAAE,UAAU,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM;IAqE5D,OAAO,CAAC,iBAAiB;IAkBzB,oBAAoB,CAAC,SAAS,CAAC,EAAE,MAAM,EAAE,WAAW,SAAK,GAAG,MAAM;IA8BlE,KAAK,CAAC,MAAM,GAAE;QACZ,MAAM,CAAC,EAAE,UAAU,CAAC;QACpB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,KAAK,CAAC,EAAE,MAAM,CAAC;KACX,GAAG,gBAAgB,EAAE;IAa3B,KAAK,IAAI;QACP,YAAY,EAAE,MAAM,CAAC;QACrB,eAAe,EAAE,MAAM,CAAC;QACxB,eAAe,EAAE,MAAM,CAAC;QACxB,eAAe,EAAE,MAAM,CAAC;QACxB,gBAAgB,EAAE,UAAU,GAAG,IAAI,CAAC;KACrC;CAuBF"}
|
|
@@ -1,225 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
-
if (k2 === undefined) k2 = k;
|
|
4
|
-
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
-
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
-
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
-
}
|
|
8
|
-
Object.defineProperty(o, k2, desc);
|
|
9
|
-
}) : (function(o, m, k, k2) {
|
|
10
|
-
if (k2 === undefined) k2 = k;
|
|
11
|
-
o[k2] = m[k];
|
|
12
|
-
}));
|
|
13
|
-
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
-
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
-
}) : function(o, v) {
|
|
16
|
-
o["default"] = v;
|
|
17
|
-
});
|
|
18
|
-
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
-
var ownKeys = function(o) {
|
|
20
|
-
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
-
var ar = [];
|
|
22
|
-
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
-
return ar;
|
|
24
|
-
};
|
|
25
|
-
return ownKeys(o);
|
|
26
|
-
};
|
|
27
|
-
return function (mod) {
|
|
28
|
-
if (mod && mod.__esModule) return mod;
|
|
29
|
-
var result = {};
|
|
30
|
-
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
-
__setModuleDefault(result, mod);
|
|
32
|
-
return result;
|
|
33
|
-
};
|
|
34
|
-
})();
|
|
35
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
-
exports.ImplicitFeedbackEngine = void 0;
|
|
37
|
-
const crypto = __importStar(require("crypto"));
|
|
38
|
-
/**
|
|
39
|
-
* Generate a unique feedback ID.
|
|
40
|
-
*/
|
|
41
|
-
function generateFeedbackId() {
|
|
42
|
-
return `fb_${Date.now()}_${crypto.randomBytes(4).toString('hex')}`;
|
|
43
|
-
}
|
|
44
|
-
/**
|
|
45
|
-
* ImplicitFeedbackEngine — captures and interprets implicit user signals.
|
|
46
|
-
*
|
|
47
|
-
* Instead of relying on explicit "thumbs up/down", this engine
|
|
48
|
-
* detects subtle signals from user behavior to infer satisfaction.
|
|
49
|
-
*
|
|
50
|
-
* Two modes:
|
|
51
|
-
* - Manual: caller provides explicit signals via record()
|
|
52
|
-
* - Auto-detect: scans audit log to infer signals (results unused,
|
|
53
|
-
* results modified later, repeated same tool, verify failures)
|
|
54
|
-
*
|
|
55
|
-
* This is the key differentiator of AgentOS: it learns from
|
|
56
|
-
* what users DO, not just what they SAY.
|
|
57
|
-
*/
|
|
58
|
-
class ImplicitFeedbackEngine {
|
|
59
|
-
feedbackLog = [];
|
|
60
|
-
/**
|
|
61
|
-
* Record an implicit feedback signal.
|
|
62
|
-
*/
|
|
63
|
-
record(signal, sessionId, operationId, confidence = 0.8, source = 'auto-detected') {
|
|
64
|
-
const strength = this.getSignalStrength(signal);
|
|
65
|
-
const feedback = {
|
|
66
|
-
id: generateFeedbackId(),
|
|
67
|
-
timestamp: Date.now(),
|
|
68
|
-
signal,
|
|
69
|
-
strength,
|
|
70
|
-
confidence,
|
|
71
|
-
sessionId,
|
|
72
|
-
operationId,
|
|
73
|
-
source,
|
|
74
|
-
};
|
|
75
|
-
this.feedbackLog.push(feedback);
|
|
76
|
-
return feedback;
|
|
77
|
-
}
|
|
78
|
-
// ══════════════════════════════════
|
|
79
|
-
// Auto-detect feedback from audit log
|
|
80
|
-
// ══════════════════════════════════
|
|
81
|
-
/**
|
|
82
|
-
* Scan the audit log and auto-detect implicit feedback signals.
|
|
83
|
-
*
|
|
84
|
-
* Detection rules (conservative — low confidence to avoid false positives):
|
|
85
|
-
* - verify FAIL or WARN → user_provided_correction (agent made mistakes)
|
|
86
|
-
* - same tool+params called within 60s → user_repeated_instruction (low confidence, noisy)
|
|
87
|
-
* - high risk operations that were retried and eventually passed → agent_self_corrected
|
|
88
|
-
*
|
|
89
|
-
* Note: auto-detected signals carry lower confidence than explicit user feedback.
|
|
90
|
-
* They serve as supplementary data, not primary quality indicators.
|
|
91
|
-
*
|
|
92
|
-
* @param entries Recent audit entries to analyze
|
|
93
|
-
* @param sessionId Session to attribute signals to
|
|
94
|
-
* @returns Number of signals auto-detected
|
|
95
|
-
*/
|
|
96
|
-
autoDetect(entries, sessionId) {
|
|
97
|
-
let detected = 0;
|
|
98
|
-
// Rule 1: Verify failures → agent made errors (confidence 0.7)
|
|
99
|
-
for (const entry of entries) {
|
|
100
|
-
if (entry.verifyGate.status !== 'PASS') {
|
|
101
|
-
this.record('user_provided_correction', sessionId, entry.id, 0.7, 'auto-audit-verify');
|
|
102
|
-
detected++;
|
|
103
|
-
}
|
|
104
|
-
}
|
|
105
|
-
// Rule 2: Repeated same tool call within 60s → user had to repeat
|
|
106
|
-
// Low confidence (0.3) because some tools (read, exec) are legitimately called multiple times
|
|
107
|
-
for (let i = 0; i < entries.length; i++) {
|
|
108
|
-
for (let j = i + 1; j < entries.length; j++) {
|
|
109
|
-
const ei = entries[i];
|
|
110
|
-
const ej = entries[j];
|
|
111
|
-
if (!ei || !ej)
|
|
112
|
-
continue;
|
|
113
|
-
if (ei.toolName === ej.toolName &&
|
|
114
|
-
JSON.stringify(ei.toolParameters) === JSON.stringify(ej.toolParameters) &&
|
|
115
|
-
Math.abs(ei.completedAt - ej.startedAt) < 60_000) {
|
|
116
|
-
this.record('user_repeated_instruction', sessionId, ej.id, 0.3, 'auto-audit-repeat');
|
|
117
|
-
detected++;
|
|
118
|
-
break;
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
// Rule 3: Agent self-corrected — only when retry eventually succeeded
|
|
123
|
-
// (NOT "high risk passed" — that's coincidence, not demonstrated skill)
|
|
124
|
-
// Detected by: same tool+params failed once, then passed later in the same session
|
|
125
|
-
const failures = new Set();
|
|
126
|
-
for (const entry of entries) {
|
|
127
|
-
if (entry.verifyGate.status !== 'PASS') {
|
|
128
|
-
failures.add(JSON.stringify({ t: entry.toolName, p: entry.toolParameters }));
|
|
129
|
-
}
|
|
130
|
-
}
|
|
131
|
-
for (const entry of entries) {
|
|
132
|
-
const key = JSON.stringify({ t: entry.toolName, p: entry.toolParameters });
|
|
133
|
-
if (entry.verifyGate.status === 'PASS' && failures.has(key)) {
|
|
134
|
-
this.record('agent_self_corrected', sessionId, entry.id, 0.5, 'auto-audit-self-corrected');
|
|
135
|
-
detected++;
|
|
136
|
-
failures.delete(key); // one signal per correction
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
return detected;
|
|
140
|
-
}
|
|
141
|
-
// ══════════════════════════════════
|
|
142
|
-
getSignalStrength(signal) {
|
|
143
|
-
switch (signal) {
|
|
144
|
-
case 'user_deleted_code': return -0.8;
|
|
145
|
-
case 'user_interrupted': return -0.6;
|
|
146
|
-
case 'user_provided_correction': return -0.7;
|
|
147
|
-
case 'user_modified_output': return -0.5;
|
|
148
|
-
case 'user_repeated_instruction': return -0.15;
|
|
149
|
-
case 'user_ignored_result': return -0.4;
|
|
150
|
-
case 'user_silence_then_praise': return 0.2;
|
|
151
|
-
case 'user_immediate_continue': return 0.3;
|
|
152
|
-
case 'agent_self_corrected': return 0.3;
|
|
153
|
-
case 'user_explicit_approval': return 0.6;
|
|
154
|
-
case 'user_used_result': return 0.7;
|
|
155
|
-
case 'user_shared_output': return 0.8;
|
|
156
|
-
default: return 0;
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
getSatisfactionScore(sessionId, recentHours = 24) {
|
|
160
|
-
let relevant = this.feedbackLog;
|
|
161
|
-
if (sessionId) {
|
|
162
|
-
relevant = relevant.filter((f) => f.sessionId === sessionId);
|
|
163
|
-
}
|
|
164
|
-
const cutoff = Date.now() - recentHours * 60 * 60 * 1000;
|
|
165
|
-
relevant = relevant.filter((f) => f.timestamp >= cutoff);
|
|
166
|
-
if (relevant.length === 0)
|
|
167
|
-
return 0;
|
|
168
|
-
let weightedSum = 0;
|
|
169
|
-
let totalWeight = 0;
|
|
170
|
-
for (const fb of relevant) {
|
|
171
|
-
const ageHours = (Date.now() - fb.timestamp) / (60 * 60 * 1000);
|
|
172
|
-
const recencyWeight = Math.max(0.1, 1 - ageHours / recentHours);
|
|
173
|
-
// Auto-detected signals get 0.5x weight discount to avoid polluting stats
|
|
174
|
-
const sourceWeight = fb.source.startsWith('auto-') ? 0.5 : 1.0;
|
|
175
|
-
const weight = fb.confidence * recencyWeight * sourceWeight;
|
|
176
|
-
weightedSum += fb.strength * weight;
|
|
177
|
-
totalWeight += weight;
|
|
178
|
-
}
|
|
179
|
-
return totalWeight > 0
|
|
180
|
-
? Math.round((weightedSum / totalWeight) * 100) / 100
|
|
181
|
-
: 0;
|
|
182
|
-
}
|
|
183
|
-
query(filter = {}) {
|
|
184
|
-
let results = this.feedbackLog;
|
|
185
|
-
if (filter.signal)
|
|
186
|
-
results = results.filter((f) => f.signal === filter.signal);
|
|
187
|
-
if (filter.sessionId)
|
|
188
|
-
results = results.filter((f) => f.sessionId === filter.sessionId);
|
|
189
|
-
if (filter.minStrength !== undefined)
|
|
190
|
-
results = results.filter((f) => f.strength >= filter.minStrength);
|
|
191
|
-
if (filter.maxStrength !== undefined)
|
|
192
|
-
results = results.filter((f) => f.strength <= filter.maxStrength);
|
|
193
|
-
if (filter.since !== undefined)
|
|
194
|
-
results = results.filter((f) => f.timestamp >= filter.since);
|
|
195
|
-
results.sort((a, b) => b.timestamp - a.timestamp);
|
|
196
|
-
return results.slice(0, filter.limit ?? 50);
|
|
197
|
-
}
|
|
198
|
-
stats() {
|
|
199
|
-
const positive = this.feedbackLog.filter((f) => f.strength > 0);
|
|
200
|
-
const negative = this.feedbackLog.filter((f) => f.strength < 0);
|
|
201
|
-
const avgStrength = this.feedbackLog.length > 0
|
|
202
|
-
? this.feedbackLog.reduce((s, f) => s + f.strength, 0) / this.feedbackLog.length
|
|
203
|
-
: 0;
|
|
204
|
-
const counts = new Map();
|
|
205
|
-
for (const fb of this.feedbackLog)
|
|
206
|
-
counts.set(fb.signal, (counts.get(fb.signal) || 0) + 1);
|
|
207
|
-
let mostCommon = null;
|
|
208
|
-
let maxCount = 0;
|
|
209
|
-
for (const [sig, count] of counts) {
|
|
210
|
-
if (count > maxCount) {
|
|
211
|
-
maxCount = count;
|
|
212
|
-
mostCommon = sig;
|
|
213
|
-
}
|
|
214
|
-
}
|
|
215
|
-
return {
|
|
216
|
-
totalSignals: this.feedbackLog.length,
|
|
217
|
-
positiveSignals: positive.length,
|
|
218
|
-
negativeSignals: negative.length,
|
|
219
|
-
averageStrength: Math.round(avgStrength * 100) / 100,
|
|
220
|
-
mostCommonSignal: mostCommon,
|
|
221
|
-
};
|
|
222
|
-
}
|
|
223
|
-
}
|
|
224
|
-
exports.ImplicitFeedbackEngine = ImplicitFeedbackEngine;
|
|
225
|
-
//# sourceMappingURL=feedback.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"feedback.js","sourceRoot":"","sources":["../../src/evaluator/feedback.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEA,+CAAiC;AAEjC;;GAEG;AACH,SAAS,kBAAkB;IACzB,OAAO,MAAM,IAAI,CAAC,GAAG,EAAE,IAAI,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;AACrE,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAa,sBAAsB;IACzB,WAAW,GAAuB,EAAE,CAAC;IAE7C;;OAEG;IACH,MAAM,CACJ,MAAkB,EAClB,SAAiB,EACjB,WAAoB,EACpB,UAAU,GAAG,GAAG,EAChB,MAAM,GAAG,eAAe;QAExB,MAAM,QAAQ,GAAG,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC;QAEhD,MAAM,QAAQ,GAAqB;YACjC,EAAE,EAAE,kBAAkB,EAAE;YACxB,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;YACrB,MAAM;YACN,QAAQ;YACR,UAAU;YACV,SAAS;YACT,WAAW;YACX,MAAM;SACP,CAAC;QAEF,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAChC,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,qCAAqC;IACrC,uCAAuC;IACvC,qCAAqC;IAErC;;;;;;;;;;;;;;OAcG;IACH,UAAU,CAAC,OAAqB,EAAE,SAAiB;QACjD,IAAI,QAAQ,GAAG,CAAC,CAAC;QAEjB,+DAA+D;QAC/D,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,IAAI,KAAK,CAAC,UAAU,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;gBACvC,IAAI,CAAC,MAAM,CACT,0BAA0B,EAC1B,SAAS,EACT,KAAK,CAAC,EAAE,EACR,GAAG,EACH,mBAAmB,CACpB,CAAC;gBACF,QAAQ,EAAE,CAAC;YACb,CAAC;QACH,CAAC;QAED,kEAAkE;QAClE,8FAA8F;QAC9F,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC5C,MAAM,EAAE,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;gBACtB,MAAM,EAAE,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;gBACtB,IAAI,CAAC,EAAE,IAAI,CAAC,EAAE;oBAAE,SAAS;gBACzB,IAAI,EAAE,CAAC,QAAQ,KAAK,EAAE,CAAC,QAAQ;oBAC3B,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC,cAAc,CAAC,KAAK,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC,cAAc,CAAC;oBACvE,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,WAAW,GAAG,EAAE,CAAC,SAAS,CAAC,GAAG,MAAM,EAAE,CAAC;oBACrD,IAAI,CAAC,MAAM,CACT,2BAA2B,EAC3B,SAAS,EACT,EAAE,CAAC,EAAE,EACL,GAAG,EACH,mBAAmB,CACpB,CAAC;oBACF,QAAQ,EAAE,CAAC;oBACX,MAAM;gBACR,CAAC;YACH,CAAC;QACH,CAAC;QAED,sEAAsE;QACtE,wEAAwE;QACxE,mFAAmF;QACnF,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;QACnC,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,IAAI,KAAK,CAAC,UAAU,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;gBACvC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC,EAAE,KAAK,CAAC,QAAQ,EAAE,CAAC,EAAE,KAAK,CAAC,cAAc,EAAE,CAAC,CAAC,CAAC;YAC/E,CAAC;QACH,CAAC;QACD,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,MAAM,GAAG,GAAG,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC,EAAE,KAAK,CAAC,QAAQ,EAAE,CAAC,EAAE,KAAK,CAAC,cAAc,EAAE,CAAC,CAAC;YAC3E,IAAI,KAAK,CAAC,UAAU,CAAC,MAAM,KAAK,MAAM,IAAI,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC5D,IAAI,CAAC,MAAM,CACT,sBAAsB,EACtB,SAAS,EACT,KAAK,CAAC,EAAE,EACR,GAAG,EACH,2BAA2B,CAC5B,CAAC;gBACF,QAAQ,EAAE,CAAC;gBACX,QAAQ,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,4BAA4B;YACpD,CAAC;QACH,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,qCAAqC;IAE7B,iBAAiB,CAAC,MAAkB;QAC1C,QAAQ,MAAM,EAAE,CAAC;YACf,KAAK,mBAAmB,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC;YACtC,KAAK,kBAAkB,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC;YACrC,KAAK,0BAA0B,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC;YAC7C,KAAK,sBAAsB,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC;YACzC,KAAK,2BAA2B,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC;YAC/C,KAAK,qBAAqB,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC;YACxC,KAAK,0BAA0B,CAAC,CAAC,OAAO,GAAG,CAAC;YAC5C,KAAK,yBAAyB,CAAC,CAAC,OAAO,GAAG,CAAC;YAC3C,KAAK,sBAAsB,CAAC,CAAC,OAAO,GAAG,CAAC;YACxC,KAAK,wBAAwB,CAAC,CAAC,OAAO,GAAG,CAAC;YAC1C,KAAK,kBAAkB,CAAC,CAAC,OAAO,GAAG,CAAC;YACpC,KAAK,oBAAoB,CAAC,CAAC,OAAO,GAAG,CAAC;YACtC,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC;QACpB,CAAC;IACH,CAAC;IAED,oBAAoB,CAAC,SAAkB,EAAE,WAAW,GAAG,EAAE;QACvD,IAAI,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC;QAEhC,IAAI,SAAS,EAAE,CAAC;YACd,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC;QAC/D,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,WAAW,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;QACzD,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,IAAI,MAAM,CAAC,CAAC;QAEzD,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC;QAEpC,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,IAAI,WAAW,GAAG,CAAC,CAAC;QAEpB,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;YAC1B,MAAM,QAAQ,GAAG,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,CAAC;YAChE,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,QAAQ,GAAG,WAAW,CAAC,CAAC;YAChE,0EAA0E;YAC1E,MAAM,YAAY,GAAG,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;YAC/D,MAAM,MAAM,GAAG,EAAE,CAAC,UAAU,GAAG,aAAa,GAAG,YAAY,CAAC;YAC5D,WAAW,IAAI,EAAE,CAAC,QAAQ,GAAG,MAAM,CAAC;YACpC,WAAW,IAAI,MAAM,CAAC;QACxB,CAAC;QAED,OAAO,WAAW,GAAG,CAAC;YACpB,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,WAAW,GAAG,WAAW,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG;YACrD,CAAC,CAAC,CAAC,CAAC;IACR,CAAC;IAED,KAAK,CAAC,SAOF,EAAE;QACJ,IAAI,OAAO,GAAG,IAAI,CAAC,WAAW,CAAC;QAE/B,IAAI,MAAM,CAAC,MAAM;YAAE,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,MAAM,CAAC,CAAC;QAC/E,IAAI,MAAM,CAAC,SAAS;YAAE,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,MAAM,CAAC,SAAS,CAAC,CAAC;QACxF,IAAI,MAAM,CAAC,WAAW,KAAK,SAAS;YAAE,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,IAAI,MAAM,CAAC,WAAY,CAAC,CAAC;QACzG,IAAI,MAAM,CAAC,WAAW,KAAK,SAAS;YAAE,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,IAAI,MAAM,CAAC,WAAY,CAAC,CAAC;QACzG,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS;YAAE,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,IAAI,MAAM,CAAC,KAAM,CAAC,CAAC;QAE9F,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC;QAClD,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC;IAC9C,CAAC;IAED,KAAK;QAOH,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC;QAChE,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC;QAChE,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC;YAC7C,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM;YAChF,CAAC,CAAC,CAAC,CAAC;QAEN,MAAM,MAAM,GAAG,IAAI,GAAG,EAAsB,CAAC;QAC7C,KAAK,MAAM,EAAE,IAAI,IAAI,CAAC,WAAW;YAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC3F,IAAI,UAAU,GAAsB,IAAI,CAAC;QACzC,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,EAAE,CAAC;YAClC,IAAI,KAAK,GAAG,QAAQ,EAAE,CAAC;gBAAC,QAAQ,GAAG,KAAK,CAAC;gBAAC,UAAU,GAAG,GAAG,CAAC;YAAC,CAAC;QAC/D,CAAC;QAED,OAAO;YACL,YAAY,EAAE,IAAI,CAAC,WAAW,CAAC,MAAM;YACrC,eAAe,EAAE,QAAQ,CAAC,MAAM;YAChC,eAAe,EAAE,QAAQ,CAAC,MAAM;YAChC,eAAe,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW,GAAG,GAAG,CAAC,GAAG,GAAG;YACpD,gBAAgB,EAAE,UAAU;SAC7B,CAAC;IACJ,CAAC;CACF;AAvND,wDAuNC"}
|
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
import { PreExecMetrics, RuntimeMetrics, PostExecMetrics } from '../types';
|
|
2
|
-
import { ImplicitFeedbackEngine } from './feedback';
|
|
3
|
-
/**
|
|
4
|
-
* Agent quality profile — accumulated across all evaluations.
|
|
5
|
-
*/
|
|
6
|
-
export interface AgentProfile {
|
|
7
|
-
/** Overall quality score (0-100) */
|
|
8
|
-
overallScore: number;
|
|
9
|
-
/** Number of operations evaluated */
|
|
10
|
-
totalOps: number;
|
|
11
|
-
/** Score breakdown by metric category */
|
|
12
|
-
breakdown: {
|
|
13
|
-
preExec: number | null;
|
|
14
|
-
runtime: number | null;
|
|
15
|
-
postExec: number | null;
|
|
16
|
-
userSatisfaction: number;
|
|
17
|
-
};
|
|
18
|
-
/** Trend data */
|
|
19
|
-
trends: {
|
|
20
|
-
improving: boolean;
|
|
21
|
-
recentOps: number;
|
|
22
|
-
recentScore: number;
|
|
23
|
-
};
|
|
24
|
-
/** Areas needing attention */
|
|
25
|
-
warnings: string[];
|
|
26
|
-
/** Kudos for good patterns */
|
|
27
|
-
strengths: string[];
|
|
28
|
-
}
|
|
29
|
-
/**
|
|
30
|
-
* AgentProfiler — builds and maintains the agent's quality profile.
|
|
31
|
-
*
|
|
32
|
-
* Aggregates PreExec + Runtime + PostExec metrics and
|
|
33
|
-
* ImplicitFeedback to produce a composite quality score
|
|
34
|
-
* that improves over time through self-correction.
|
|
35
|
-
*/
|
|
36
|
-
export declare class AgentProfiler {
|
|
37
|
-
private feedbackEngine;
|
|
38
|
-
private preMetrics;
|
|
39
|
-
private runMetrics;
|
|
40
|
-
private postMetrics;
|
|
41
|
-
private sessionScores;
|
|
42
|
-
constructor(feedbackEngine: ImplicitFeedbackEngine);
|
|
43
|
-
/**
|
|
44
|
-
* Record a complete evaluation cycle for one tool call.
|
|
45
|
-
*/
|
|
46
|
-
recordCycle(sessionId: string, pre: PreExecMetrics, run: RuntimeMetrics, post: PostExecMetrics): void;
|
|
47
|
-
/** Clean up session scores to prevent memory leak */
|
|
48
|
-
clearSession(sessionId: string): void;
|
|
49
|
-
/**
|
|
50
|
-
* Build the current agent profile.
|
|
51
|
-
*/
|
|
52
|
-
getProfile(sessionId?: string): AgentProfile;
|
|
53
|
-
private average;
|
|
54
|
-
}
|
|
55
|
-
//# sourceMappingURL=profiler.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"profiler.d.ts","sourceRoot":"","sources":["../../src/evaluator/profiler.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,cAAc,EACd,cAAc,EACd,eAAe,EAChB,MAAM,UAAU,CAAC;AAClB,OAAO,EAAE,sBAAsB,EAAE,MAAM,YAAY,CAAC;AAEpD;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,oCAAoC;IACpC,YAAY,EAAE,MAAM,CAAC;IACrB,qCAAqC;IACrC,QAAQ,EAAE,MAAM,CAAC;IACjB,yCAAyC;IACzC,SAAS,EAAE;QACT,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;QACvB,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;QACvB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;QACxB,gBAAgB,EAAE,MAAM,CAAC;KAC1B,CAAC;IACF,iBAAiB;IACjB,MAAM,EAAE;QACN,SAAS,EAAE,OAAO,CAAC;QACnB,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;KACrB,CAAC;IACF,8BAA8B;IAC9B,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,8BAA8B;IAC9B,SAAS,EAAE,MAAM,EAAE,CAAC;CACrB;AAED;;;;;;GAMG;AACH,qBAAa,aAAa;IACxB,OAAO,CAAC,cAAc,CAAyB;IAE/C,OAAO,CAAC,UAAU,CAAwB;IAC1C,OAAO,CAAC,UAAU,CAAwB;IAC1C,OAAO,CAAC,WAAW,CAAyB;IAC5C,OAAO,CAAC,aAAa,CAAoC;gBAE7C,cAAc,EAAE,sBAAsB;IAIlD;;OAEG;IACH,WAAW,CAAC,SAAS,EAAE,MAAM,EAAE,GAAG,EAAE,cAAc,EAAE,GAAG,EAAE,cAAc,EAAE,IAAI,EAAE,eAAe,GAAG,IAAI;IAcrG,qDAAqD;IACrD,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI;IAIrC;;OAEG;IACH,UAAU,CAAC,SAAS,CAAC,EAAE,MAAM,GAAG,YAAY;IA0F5C,OAAO,CAAC,OAAO;CAKhB"}
|