sentinel-agentos 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +636 -0
- package/dist/api.d.ts +151 -0
- package/dist/api.d.ts.map +1 -0
- package/dist/api.js +179 -0
- package/dist/api.js.map +1 -0
- package/dist/cli.d.ts +14 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +182 -0
- package/dist/cli.js.map +1 -0
- package/dist/core.d.ts +139 -0
- package/dist/core.d.ts.map +1 -0
- package/dist/core.js +247 -0
- package/dist/core.js.map +1 -0
- package/dist/evaluator/exec-evaluator.d.ts +102 -0
- package/dist/evaluator/exec-evaluator.d.ts.map +1 -0
- package/dist/evaluator/exec-evaluator.js +266 -0
- package/dist/evaluator/exec-evaluator.js.map +1 -0
- package/dist/evaluator/feedback.d.ts +66 -0
- package/dist/evaluator/feedback.d.ts.map +1 -0
- package/dist/evaluator/feedback.js +195 -0
- package/dist/evaluator/feedback.js.map +1 -0
- package/dist/evaluator/profiler.d.ts +53 -0
- package/dist/evaluator/profiler.d.ts.map +1 -0
- package/dist/evaluator/profiler.js +108 -0
- package/dist/evaluator/profiler.js.map +1 -0
- package/dist/guard/audit-log.d.ts +75 -0
- package/dist/guard/audit-log.d.ts.map +1 -0
- package/dist/guard/audit-log.js +207 -0
- package/dist/guard/audit-log.js.map +1 -0
- package/dist/guard/risk-gate.d.ts +97 -0
- package/dist/guard/risk-gate.d.ts.map +1 -0
- package/dist/guard/risk-gate.js +160 -0
- package/dist/guard/risk-gate.js.map +1 -0
- package/dist/guard/sandbox.d.ts +112 -0
- package/dist/guard/sandbox.d.ts.map +1 -0
- package/dist/guard/sandbox.js +379 -0
- package/dist/guard/sandbox.js.map +1 -0
- package/dist/guard/schema-gate.d.ts +90 -0
- package/dist/guard/schema-gate.d.ts.map +1 -0
- package/dist/guard/schema-gate.js +452 -0
- package/dist/guard/schema-gate.js.map +1 -0
- package/dist/guard/snapshot-verify.d.ts +111 -0
- package/dist/guard/snapshot-verify.d.ts.map +1 -0
- package/dist/guard/snapshot-verify.js +578 -0
- package/dist/guard/snapshot-verify.js.map +1 -0
- package/dist/index.d.ts +28 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +59 -0
- package/dist/index.js.map +1 -0
- package/dist/memory/episodic.d.ts +76 -0
- package/dist/memory/episodic.d.ts.map +1 -0
- package/dist/memory/episodic.js +289 -0
- package/dist/memory/episodic.js.map +1 -0
- package/dist/memory/semantic.d.ts +69 -0
- package/dist/memory/semantic.d.ts.map +1 -0
- package/dist/memory/semantic.js +243 -0
- package/dist/memory/semantic.js.map +1 -0
- package/dist/memory/working.d.ts +53 -0
- package/dist/memory/working.d.ts.map +1 -0
- package/dist/memory/working.js +150 -0
- package/dist/memory/working.js.map +1 -0
- package/dist/middleware/openclaw.d.ts +45 -0
- package/dist/middleware/openclaw.d.ts.map +1 -0
- package/dist/middleware/openclaw.js +95 -0
- package/dist/middleware/openclaw.js.map +1 -0
- package/dist/middleware/wrapper.d.ts +54 -0
- package/dist/middleware/wrapper.d.ts.map +1 -0
- package/dist/middleware/wrapper.js +155 -0
- package/dist/middleware/wrapper.js.map +1 -0
- package/dist/server.d.ts +45 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +229 -0
- package/dist/server.js.map +1 -0
- package/dist/types/index.d.ts +201 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +4 -0
- package/dist/types/index.js.map +1 -0
- package/package.json +64 -0
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.PostExecEvaluator = exports.RuntimeEvaluator = exports.PreExecEvaluator = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* PreExecEvaluator — captures metrics before tool execution.
|
|
6
|
+
*
|
|
7
|
+
* Watches the Guard layer output and WorkingMemory context
|
|
8
|
+
* to score parameter quality, context utilization, and risk.
|
|
9
|
+
*/
|
|
10
|
+
class PreExecEvaluator {
|
|
11
|
+
schemaGate;
|
|
12
|
+
riskGate;
|
|
13
|
+
workingMemory;
|
|
14
|
+
constructor(schemaGate, riskGate, workingMemory) {
|
|
15
|
+
this.schemaGate = schemaGate;
|
|
16
|
+
this.riskGate = riskGate;
|
|
17
|
+
this.workingMemory = workingMemory;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Evaluate a tool call before execution.
|
|
21
|
+
*/
|
|
22
|
+
evaluate(toolName, parameters) {
|
|
23
|
+
// 1. Schema check
|
|
24
|
+
const schemaCheck = this.schemaGate.check(toolName, parameters);
|
|
25
|
+
// 2. Risk assessment
|
|
26
|
+
const riskScore = this.riskGate.evaluate(toolName, parameters);
|
|
27
|
+
// 3. Parameter quality: does the agent use context-aware params?
|
|
28
|
+
const paramQuality = this.evaluateParamQuality(toolName, parameters);
|
|
29
|
+
// 4. Context utilization: is the agent leveraging WorkingMemory?
|
|
30
|
+
const contextUtilization = this.evaluateContextUtilization(toolName, parameters);
|
|
31
|
+
return {
|
|
32
|
+
timestamp: Date.now(),
|
|
33
|
+
toolName,
|
|
34
|
+
schemaCheck,
|
|
35
|
+
riskScore,
|
|
36
|
+
paramQuality,
|
|
37
|
+
contextUtilization,
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Score parameter quality based on contextual awareness.
|
|
42
|
+
*
|
|
43
|
+
* High quality: path contains session-relevant project paths,
|
|
44
|
+
* content references open files, etc.
|
|
45
|
+
* Low quality: bare strings, random-looking paths, missing files.
|
|
46
|
+
*/
|
|
47
|
+
evaluateParamQuality(_toolName, parameters) {
|
|
48
|
+
let score = 0.5; // neutral start
|
|
49
|
+
const observations = [];
|
|
50
|
+
// Check if path references an open file
|
|
51
|
+
if (typeof parameters['path'] === 'string') {
|
|
52
|
+
const path = parameters['path'];
|
|
53
|
+
if (this.workingMemory.openFiles.some((f) => path.includes(f))) {
|
|
54
|
+
score += 0.3;
|
|
55
|
+
observations.push('Path references an open file');
|
|
56
|
+
}
|
|
57
|
+
if (path.startsWith('/') || path.match(/^[A-Z]:\\/)) {
|
|
58
|
+
observations.push('Absolute path used');
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
// Check if content parameter is meaningful
|
|
62
|
+
if (typeof parameters['content'] === 'string') {
|
|
63
|
+
const content = parameters['content'];
|
|
64
|
+
if (content.length > 20) {
|
|
65
|
+
score = Math.min(1.0, score + 0.1);
|
|
66
|
+
}
|
|
67
|
+
if (content.length === 0) {
|
|
68
|
+
score -= 0.2;
|
|
69
|
+
observations.push('Empty content — possible error');
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
// Check for file paths in multiple parameters
|
|
73
|
+
const filePaths = Object.values(parameters).filter((v) => typeof v === 'string' && (v.includes('.ts') || v.includes('.js') || v.includes('.json')));
|
|
74
|
+
if (filePaths.length > 1) {
|
|
75
|
+
score = Math.min(1.0, score + 0.1);
|
|
76
|
+
observations.push('Multiple file references — coordinated operation');
|
|
77
|
+
}
|
|
78
|
+
return {
|
|
79
|
+
score: Math.round(Math.max(0, Math.min(1, score)) * 100) / 100,
|
|
80
|
+
observations,
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Score how well the agent uses stored context.
|
|
85
|
+
*/
|
|
86
|
+
evaluateContextUtilization(_toolName, parameters) {
|
|
87
|
+
let score = 0.4;
|
|
88
|
+
const patterns = [];
|
|
89
|
+
// Check if agent references recent messages
|
|
90
|
+
if (this.workingMemory.recentMessages.length > 0) {
|
|
91
|
+
score += 0.1;
|
|
92
|
+
patterns.push(`${this.workingMemory.recentMessages.length} recent messages available`);
|
|
93
|
+
}
|
|
94
|
+
// Check if agent uses cached tool results
|
|
95
|
+
const cachedCount = this.workingMemory.recentToolResults.size;
|
|
96
|
+
if (cachedCount > 0) {
|
|
97
|
+
score += 0.1;
|
|
98
|
+
patterns.push(`${cachedCount} cached results available`);
|
|
99
|
+
}
|
|
100
|
+
// Check parameter values for context patterns
|
|
101
|
+
const allValues = Object.values(parameters).map(String).join(' ');
|
|
102
|
+
for (const msg of this.workingMemory.recentMessages.slice(-3)) {
|
|
103
|
+
const words = msg.content.split(/\s+/).filter((w) => w.length > 3);
|
|
104
|
+
for (const word of words.slice(0, 5)) {
|
|
105
|
+
if (allValues.includes(word)) {
|
|
106
|
+
score += 0.1;
|
|
107
|
+
patterns.push(`Parameter references recent context: "${word}"`);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return {
|
|
112
|
+
score: Math.round(Math.min(1.0, score) * 100) / 100,
|
|
113
|
+
patterns,
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
exports.PreExecEvaluator = PreExecEvaluator;
|
|
118
|
+
/**
|
|
119
|
+
* RuntimeEvaluator — captures metrics during execution.
|
|
120
|
+
*
|
|
121
|
+
* Tracks retries, self-corrections, timeouts, and
|
|
122
|
+
* whether the agent selected the right tool for the job.
|
|
123
|
+
*/
|
|
124
|
+
class RuntimeEvaluator {
|
|
125
|
+
/** Historical tool selection patterns — toolName -> successful scenario count */
|
|
126
|
+
toolHistory = new Map();
|
|
127
|
+
/**
|
|
128
|
+
* Evaluate a completed tool execution.
|
|
129
|
+
*/
|
|
130
|
+
evaluate(options) {
|
|
131
|
+
const durationMs = options.endTime - options.startTime;
|
|
132
|
+
const toolSuccess = !options.hadTimeout && options.toolResult !== undefined;
|
|
133
|
+
// Tool selection accuracy: compare against historical patterns
|
|
134
|
+
let toolSelectionMatch;
|
|
135
|
+
if (options.expectedTool) {
|
|
136
|
+
// Direct comparison if expectedTool is provided
|
|
137
|
+
toolSelectionMatch = options.toolName === options.expectedTool;
|
|
138
|
+
}
|
|
139
|
+
else {
|
|
140
|
+
// Auto-detect: is this tool historically successful for similar params?
|
|
141
|
+
const history = this.toolHistory.get(options.toolName);
|
|
142
|
+
if (history) {
|
|
143
|
+
const historicalSuccessRate = history.calls > 0
|
|
144
|
+
? history.successes / history.calls
|
|
145
|
+
: 0;
|
|
146
|
+
// If this tool has >70% historical success, consider it a "good" selection
|
|
147
|
+
toolSelectionMatch = historicalSuccessRate > 0.7 ? true : undefined;
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
// Record this call in history
|
|
151
|
+
this.recordToolCall(options.toolName, toolSuccess);
|
|
152
|
+
// Adaptive score: composite of retry rate, timeout, correction
|
|
153
|
+
let adaptiveScore = 1.0;
|
|
154
|
+
adaptiveScore -= options.retryCount * 0.15; // Each retry costs 0.15
|
|
155
|
+
if (options.hadTimeout)
|
|
156
|
+
adaptiveScore -= 0.5;
|
|
157
|
+
if (options.wasSelfCorrected)
|
|
158
|
+
adaptiveScore += 0.2; // Self-correction is good!
|
|
159
|
+
adaptiveScore = Math.max(0, Math.min(1, adaptiveScore));
|
|
160
|
+
return {
|
|
161
|
+
retryCount: options.retryCount,
|
|
162
|
+
selfCorrected: options.wasSelfCorrected,
|
|
163
|
+
hadTimeout: options.hadTimeout,
|
|
164
|
+
toolSuccess,
|
|
165
|
+
toolSelectionMatch,
|
|
166
|
+
adaptiveScore: Math.round(adaptiveScore * 100) / 100,
|
|
167
|
+
durationMs,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
/** Record a tool call in the history tracker */
|
|
171
|
+
recordToolCall(toolName, success) {
|
|
172
|
+
const existing = this.toolHistory.get(toolName);
|
|
173
|
+
if (existing) {
|
|
174
|
+
existing.calls++;
|
|
175
|
+
if (success)
|
|
176
|
+
existing.successes++;
|
|
177
|
+
}
|
|
178
|
+
else {
|
|
179
|
+
this.toolHistory.set(toolName, { calls: 1, successes: success ? 1 : 0 });
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
/** Get tool selection accuracy statistics */
|
|
183
|
+
getToolAccuracy() {
|
|
184
|
+
const result = {};
|
|
185
|
+
for (const [tool, history] of this.toolHistory) {
|
|
186
|
+
result[tool] = {
|
|
187
|
+
calls: history.calls,
|
|
188
|
+
successRate: history.calls > 0
|
|
189
|
+
? Math.round((history.successes / history.calls) * 100) / 100
|
|
190
|
+
: 0,
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
return result;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
exports.RuntimeEvaluator = RuntimeEvaluator;
|
|
197
|
+
/**
|
|
198
|
+
* PostExecEvaluator — captures metrics after execution.
|
|
199
|
+
*
|
|
200
|
+
* Scores verify results, user acceptance patterns,
|
|
201
|
+
* and checks if the agent actually used its own result later.
|
|
202
|
+
*/
|
|
203
|
+
class PostExecEvaluator {
|
|
204
|
+
/** Track result references for utilization scoring */
|
|
205
|
+
resultReferenceTracker = new Map();
|
|
206
|
+
/**
|
|
207
|
+
* Evaluate post-execution outcomes.
|
|
208
|
+
*/
|
|
209
|
+
evaluate(options) {
|
|
210
|
+
// Verify score
|
|
211
|
+
const verifyScore = options.verifyChecks > 0
|
|
212
|
+
? 1 - (options.verifyFailures / options.verifyChecks)
|
|
213
|
+
: 1;
|
|
214
|
+
// User acceptance
|
|
215
|
+
const acceptance = options.userAccepted ? 1.0 : options.userProvidedEdit ? 0.3 : 0.7;
|
|
216
|
+
// Composite outcome score
|
|
217
|
+
const outcomeScore = (verifyScore * 0.3 +
|
|
218
|
+
acceptance * 0.4 +
|
|
219
|
+
(options.resultWasUsed ? 0.3 : 0));
|
|
220
|
+
// Overall health flag
|
|
221
|
+
const healthy = verifyScore > 0.8 && acceptance > 0.5;
|
|
222
|
+
return {
|
|
223
|
+
verifyPassed: options.verifyPassed,
|
|
224
|
+
verifyScore: Math.round(verifyScore * 100) / 100,
|
|
225
|
+
userAccepted: options.userAccepted,
|
|
226
|
+
userEditRate: options.userProvidedEdit ? 1 : 0,
|
|
227
|
+
resultUtilized: options.resultWasUsed,
|
|
228
|
+
outcomeScore: Math.round(outcomeScore * 100) / 100,
|
|
229
|
+
healthy,
|
|
230
|
+
diffLinesChanged: options.diffLinesChanged,
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
/**
|
|
234
|
+
* Track a tool result for later utilization detection.
|
|
235
|
+
* Call this after each tool execution.
|
|
236
|
+
*/
|
|
237
|
+
trackResult(operationId, result) {
|
|
238
|
+
this.resultReferenceTracker.set(operationId, { result, referenced: false });
|
|
239
|
+
}
|
|
240
|
+
/**
|
|
241
|
+
* Mark a previously-tracked result as referenced (used by the agent later).
|
|
242
|
+
*/
|
|
243
|
+
markResultReferenced(operationId) {
|
|
244
|
+
const entry = this.resultReferenceTracker.get(operationId);
|
|
245
|
+
if (entry)
|
|
246
|
+
entry.referenced = true;
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Check if a result has been utilized by the agent.
|
|
250
|
+
*/
|
|
251
|
+
isResultReferenced(operationId) {
|
|
252
|
+
return this.resultReferenceTracker.get(operationId)?.referenced ?? false;
|
|
253
|
+
}
|
|
254
|
+
/**
|
|
255
|
+
* Get overall result utilization rate.
|
|
256
|
+
*/
|
|
257
|
+
getUtilizationRate() {
|
|
258
|
+
const entries = Array.from(this.resultReferenceTracker.values());
|
|
259
|
+
if (entries.length === 0)
|
|
260
|
+
return 0;
|
|
261
|
+
const referenced = entries.filter((e) => e.referenced).length;
|
|
262
|
+
return Math.round((referenced / entries.length) * 100) / 100;
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
exports.PostExecEvaluator = PostExecEvaluator;
|
|
266
|
+
//# sourceMappingURL=exec-evaluator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"exec-evaluator.js","sourceRoot":"","sources":["../../src/evaluator/exec-evaluator.ts"],"names":[],"mappings":";;;AAWA;;;;;GAKG;AACH,MAAa,gBAAgB;IACnB,UAAU,CAAa;IACvB,QAAQ,CAAW;IACnB,aAAa,CAAgB;IAErC,YACE,UAAsB,EACtB,QAAkB,EAClB,aAA4B;QAE5B,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;IACrC,CAAC;IAED;;OAEG;IACH,QAAQ,CACN,QAAgB,EAChB,UAAmC;QAEnC,kBAAkB;QAClB,MAAM,WAAW,GAAgB,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAE7E,qBAAqB;QACrB,MAAM,SAAS,GAAc,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAE1E,iEAAiE;QACjE,MAAM,YAAY,GAAG,IAAI,CAAC,oBAAoB,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAErE,iEAAiE;QACjE,MAAM,kBAAkB,GAAG,IAAI,CAAC,0BAA0B,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAEjF,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;YACrB,QAAQ;YACR,WAAW;YACX,SAAS;YACT,YAAY;YACZ,kBAAkB;SACnB,CAAC;IACJ,CAAC;IAED;;;;;;OAMG;IACK,oBAAoB,CAC1B,SAAiB,EACjB,UAAmC;QAEnC,IAAI,KAAK,GAAG,GAAG,CAAC,CAAC,gBAAgB;QACjC,MAAM,YAAY,GAAa,EAAE,CAAC;QAElC,wCAAwC;QACxC,IAAI,OAAO,UAAU,CAAC,MAAM,CAAC,KAAK,QAAQ,EAAE,CAAC;YAC3C,MAAM,IAAI,GAAG,UAAU,CAAC,MAAM,CAAW,CAAC;YAC1C,IAAI,IAAI,CAAC,aAAa,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC/D,KAAK,IAAI,GAAG,CAAC;gBACb,YAAY,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;YACpD,CAAC;YACD,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,EAAE,CAAC;gBACpD,YAAY,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;YAC1C,CAAC;QACH,CAAC;QAED,2CAA2C;QAC3C,IAAI,OAAO,UAAU,CAAC,SAAS,CAAC,KAAK,QAAQ,EAAE,CAAC;YAC9C,MAAM,OAAO,GAAG,UAAU,CAAC,SAAS,CAAW,CAAC;YAChD,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;gBACxB,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,GAAG,GAAG,CAAC,CAAC;YACrC,CAAC;YACD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACzB,KAAK,IAAI,GAAG,CAAC;gBACb,YAAY,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;YACtD,CAAC;QACH,CAAC;QAED,8CAA8C;QAC9C,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,MAAM,CAChD,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAChG,CAAC;QAEF,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,GAAG,GAAG,CAAC,CAAC;YACnC,YAAY,CAAC,IAAI,CAAC,kDAAkD,CAAC,CAAC;QACxE,CAAC;QAED,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG;YAC9D,YAAY;SACb,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,0BAA0B,CAChC,SAAiB,EACjB,UAAmC;QAEnC,IAAI,KAAK,GAAG,GAAG,CAAC;QAChB,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,4CAA4C;QAC5C,IAAI,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjD,KAAK,IAAI,GAAG,CAAC;YACb,QAAQ,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,MAAM,4BAA4B,CAAC,CAAC;QACzF,CAAC;QAED,0CAA0C;QAC1C,MAAM,WAAW,GAAG,IAAI,CAAC,aAAa,CAAC,iBAAiB,CAAC,IAAI,CAAC;QAC9D,IAAI,WAAW,GAAG,CAAC,EAAE,CAAC;YACpB,KAAK,IAAI,GAAG,CAAC;YACb,QAAQ,CAAC,IAAI,CAAC,GAAG,WAAW,2BAA2B,CAAC,CAAC;QAC3D,CAAC;QAED,8CAA8C;QAC9C,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAClE,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC9D,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YACnE,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;gBACrC,IAAI,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;oBAC7B,KAAK,IAAI,GAAG,CAAC;oBACb,QAAQ,CAAC,IAAI,CAAC,yCAAyC,IAAI,GAAG,CAAC,CAAC;gBAClE,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG;YACnD,QAAQ;SACT,CAAC;IACJ,CAAC;CACF;AA1ID,4CA0IC;AAED;;;;;GAKG;AACH,MAAa,gBAAgB;IAC3B,iFAAiF;IACzE,WAAW,GAAsD,IAAI,GAAG,EAAE,CAAC;IAEnF;;OAEG;IACH,QAAQ,CAAC,OASR;QACC,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,GAAG,OAAO,CAAC,SAAS,CAAC;QACvD,MAAM,WAAW,GAAG,CAAC,OAAO,CAAC,UAAU,IAAI,OAAO,CAAC,UAAU,KAAK,SAAS,CAAC;QAE5E,+DAA+D;QAC/D,IAAI,kBAAuC,CAAC;QAC5C,IAAI,OAAO,CAAC,YAAY,EAAE,CAAC;YACzB,gDAAgD;YAChD,kBAAkB,GAAG,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,YAAY,CAAC;QACjE,CAAC;aAAM,CAAC;YACN,wEAAwE;YACxE,MAAM,OAAO,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;YACvD,IAAI,OAAO,EAAE,CAAC;gBACZ,MAAM,qBAAqB,GAAG,OAAO,CAAC,KAAK,GAAG,CAAC;oBAC7C,CAAC,CAAC,OAAO,CAAC,SAAS,GAAG,OAAO,CAAC,KAAK;oBACnC,CAAC,CAAC,CAAC,CAAC;gBACN,2EAA2E;gBAC3E,kBAAkB,GAAG,qBAAqB,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC;YACtE,CAAC;QACH,CAAC;QAED,8BAA8B;QAC9B,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC;QAEnD,+DAA+D;QAC/D,IAAI,aAAa,GAAG,GAAG,CAAC;QACxB,aAAa,IAAI,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC,CAAC,wBAAwB;QACpE,IAAI,OAAO,CAAC,UAAU;YAAE,aAAa,IAAI,GAAG,CAAC;QAC7C,IAAI,OAAO,CAAC,gBAAgB;YAAE,aAAa,IAAI,GAAG,CAAC,CAAC,2BAA2B;QAC/E,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC,CAAC;QAExD,OAAO;YACL,UAAU,EAAE,OAAO,CAAC,UAAU;YAC9B,aAAa,EAAE,OAAO,CAAC,gBAAgB;YACvC,UAAU,EAAE,OAAO,CAAC,UAAU;YAC9B,WAAW;YACX,kBAAkB;YAClB,aAAa,EAAE,IAAI,CAAC,KAAK,CAAC,aAAa,GAAG,GAAG,CAAC,GAAG,GAAG;YACpD,UAAU;SACX,CAAC;IACJ,CAAC;IAED,gDAAgD;IACxC,cAAc,CAAC,QAAgB,EAAE,OAAgB;QACvD,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAChD,IAAI,QAAQ,EAAE,CAAC;YACb,QAAQ,CAAC,KAAK,EAAE,CAAC;YACjB,IAAI,OAAO;gBAAE,QAAQ,CAAC,SAAS,EAAE,CAAC;QACpC,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QAC3E,CAAC;IACH,CAAC;IAED,6CAA6C;IAC7C,eAAe;QACb,MAAM,MAAM,GAA2D,EAAE,CAAC;QAC1E,KAAK,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YAC/C,MAAM,CAAC,IAAI,CAAC,GAAG;gBACb,KAAK,EAAE,OAAO,CAAC,KAAK;gBACpB,WAAW,EAAE,OAAO,CAAC,KAAK,GAAG,CAAC;oBAC5B,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG;oBAC7D,CAAC,CAAC,CAAC;aACN,CAAC;QACJ,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;CACF;AAlFD,4CAkFC;AAED;;;;;GAKG;AACH,MAAa,iBAAiB;IAC5B,sDAAsD;IAC9C,sBAAsB,GAA0D,IAAI,GAAG,EAAE,CAAC;IAElG;;OAEG;IACH,QAAQ,CAAC,OAQR;QACC,eAAe;QACf,MAAM,WAAW,GAAG,OAAO,CAAC,YAAY,GAAG,CAAC;YAC1C,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,cAAc,GAAG,OAAO,CAAC,YAAY,CAAC;YACrD,CAAC,CAAC,CAAC,CAAC;QAEN,kBAAkB;QAClB,MAAM,UAAU,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;QAErF,0BAA0B;QAC1B,MAAM,YAAY,GAAG,CACnB,WAAW,GAAG,GAAG;YACjB,UAAU,GAAG,GAAG;YAChB,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAClC,CAAC;QAEF,sBAAsB;QACtB,MAAM,OAAO,GAAG,WAAW,GAAG,GAAG,IAAI,UAAU,GAAG,GAAG,CAAC;QAEtD,OAAO;YACL,YAAY,EAAE,OAAO,CAAC,YAAY;YAClC,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW,GAAG,GAAG,CAAC,GAAG,GAAG;YAChD,YAAY,EAAE,OAAO,CAAC,YAAY;YAClC,YAAY,EAAE,OAAO,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9C,cAAc,EAAE,OAAO,CAAC,aAAa;YACrC,YAAY,EAAE,IAAI,CAAC,KAAK,CAAC,YAAY,GAAG,GAAG,CAAC,GAAG,GAAG;YAClD,OAAO;YACP,gBAAgB,EAAE,OAAO,CAAC,gBAAgB;SAC3C,CAAC;IACJ,CAAC;IAED;;;OAGG;IACH,WAAW,CAAC,WAAmB,EAAE,MAAe;QAC9C,IAAI,CAAC,sBAAsB,CAAC,GAAG,CAAC,WAAW,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,CAAC;IAC9E,CAAC;IAED;;OAEG;IACH,oBAAoB,CAAC,WAAmB;QACtC,MAAM,KAAK,GAAG,IAAI,CAAC,sBAAsB,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;QAC3D,IAAI,KAAK;YAAE,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC;IACrC,CAAC;IAED;;OAEG;IACH,kBAAkB,CAAC,WAAmB;QACpC,OAAO,IAAI,CAAC,sBAAsB,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,UAAU,IAAI,KAAK,CAAC;IAC3E,CAAC;IAED;;OAEG;IACH,kBAAkB;QAChB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,sBAAsB,CAAC,MAAM,EAAE,CAAC,CAAC;QACjE,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC;QACnC,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC;QAC9D,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;IAC/D,CAAC;CACF;AA9ED,8CA8EC"}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import { ImplicitFeedback, SignalType } from '../types';
|
|
2
|
+
/**
|
|
3
|
+
* ImplicitFeedbackEngine — captures and interprets implicit user signals.
|
|
4
|
+
*
|
|
5
|
+
* Instead of relying on explicit "thumbs up/down", this engine
|
|
6
|
+
* detects subtle signals from user behavior to infer satisfaction.
|
|
7
|
+
*
|
|
8
|
+
* This is the key differentiator of AgentOS: it learns from
|
|
9
|
+
* what users DO, not just what they SAY.
|
|
10
|
+
*
|
|
11
|
+
* Signal rules (based on DESIGN.md §6.3):
|
|
12
|
+
* - user_deleted_code: User deleted what agent wrote → strong negative (-0.8)
|
|
13
|
+
* - user_modified_output: User modified agent's output → moderate negative (-0.5)
|
|
14
|
+
* - user_repeated_instruction: User repeated same command → mild negative (-0.3)
|
|
15
|
+
* - user_immediate_continue: User immediately continued without edit → positive (+0.3)
|
|
16
|
+
* - user_used_result: User referenced agent's output later → strong positive (+0.7)
|
|
17
|
+
* - user_silence_followed_by_praise: Gap then "谢谢" → mild positive (+0.2)
|
|
18
|
+
* - user_interrupted: User stopped agent mid-execution → negative (-0.6)
|
|
19
|
+
* - agent_self_corrected: Agent caught its own mistake → mild positive for agent (+0.3)
|
|
20
|
+
*/
|
|
21
|
+
export declare class ImplicitFeedbackEngine {
|
|
22
|
+
private feedbackLog;
|
|
23
|
+
/**
|
|
24
|
+
* Record an implicit feedback signal.
|
|
25
|
+
*
|
|
26
|
+
* @param signal - Type of implicit signal detected
|
|
27
|
+
* @param sessionId - Session where signal was detected
|
|
28
|
+
* @param operationId - Related tool call or request ID
|
|
29
|
+
* @param confidence - How confident we are about this interpretation (0-1)
|
|
30
|
+
* @param source - Where the signal was detected (audit_log, message_pattern, diff)
|
|
31
|
+
*/
|
|
32
|
+
record(signal: SignalType, sessionId: string, operationId?: string, confidence?: number, source?: string): ImplicitFeedback;
|
|
33
|
+
/**
|
|
34
|
+
* Get the default strength for a signal type.
|
|
35
|
+
*/
|
|
36
|
+
private getSignalStrength;
|
|
37
|
+
/**
|
|
38
|
+
* Compute the aggregate satisfaction score from all feedback.
|
|
39
|
+
*
|
|
40
|
+
* Weighted by confidence and recency (newer signals matter more).
|
|
41
|
+
* Returns a score from -1.0 (very unhappy) to +1.0 (very happy).
|
|
42
|
+
*/
|
|
43
|
+
getSatisfactionScore(sessionId?: string, recentHours?: number): number;
|
|
44
|
+
/**
|
|
45
|
+
* Get feedback events, optionally filtered.
|
|
46
|
+
*/
|
|
47
|
+
query(filter?: {
|
|
48
|
+
signal?: SignalType;
|
|
49
|
+
sessionId?: string;
|
|
50
|
+
minStrength?: number;
|
|
51
|
+
maxStrength?: number;
|
|
52
|
+
since?: number;
|
|
53
|
+
limit?: number;
|
|
54
|
+
}): ImplicitFeedback[];
|
|
55
|
+
/**
|
|
56
|
+
* Get feedback summary statistics.
|
|
57
|
+
*/
|
|
58
|
+
stats(): {
|
|
59
|
+
totalSignals: number;
|
|
60
|
+
positiveSignals: number;
|
|
61
|
+
negativeSignals: number;
|
|
62
|
+
averageStrength: number;
|
|
63
|
+
mostCommonSignal: SignalType | null;
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
//# sourceMappingURL=feedback.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"feedback.d.ts","sourceRoot":"","sources":["../../src/evaluator/feedback.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAUxD;;;;;;;;;;;;;;;;;;GAkBG;AACH,qBAAa,sBAAsB;IACjC,OAAO,CAAC,WAAW,CAA0B;IAE7C;;;;;;;;OAQG;IACH,MAAM,CACJ,MAAM,EAAE,UAAU,EAClB,SAAS,EAAE,MAAM,EACjB,WAAW,CAAC,EAAE,MAAM,EACpB,UAAU,SAAM,EAChB,MAAM,SAAkB,GACvB,gBAAgB;IAkBnB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAkBzB;;;;;OAKG;IACH,oBAAoB,CAAC,SAAS,CAAC,EAAE,MAAM,EAAE,WAAW,SAAK,GAAG,MAAM;IA+BlE;;OAEG;IACH,KAAK,CAAC,MAAM,GAAE;QACZ,MAAM,CAAC,EAAE,UAAU,CAAC;QACpB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,KAAK,CAAC,EAAE,MAAM,CAAC;KACX,GAAG,gBAAgB,EAAE;IAyB3B;;OAEG;IACH,KAAK,IAAI;QACP,YAAY,EAAE,MAAM,CAAC;QACrB,eAAe,EAAE,MAAM,CAAC;QACxB,eAAe,EAAE,MAAM,CAAC;QACxB,eAAe,EAAE,MAAM,CAAC;QACxB,gBAAgB,EAAE,UAAU,GAAG,IAAI,CAAC;KACrC;CA6BF"}
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.ImplicitFeedbackEngine = void 0;
|
|
37
|
+
const crypto = __importStar(require("crypto"));
|
|
38
|
+
/**
|
|
39
|
+
* Generate a unique feedback ID.
|
|
40
|
+
*/
|
|
41
|
+
function generateFeedbackId() {
|
|
42
|
+
return `fb_${Date.now()}_${crypto.randomBytes(4).toString('hex')}`;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* ImplicitFeedbackEngine — captures and interprets implicit user signals.
|
|
46
|
+
*
|
|
47
|
+
* Instead of relying on explicit "thumbs up/down", this engine
|
|
48
|
+
* detects subtle signals from user behavior to infer satisfaction.
|
|
49
|
+
*
|
|
50
|
+
* This is the key differentiator of AgentOS: it learns from
|
|
51
|
+
* what users DO, not just what they SAY.
|
|
52
|
+
*
|
|
53
|
+
* Signal rules (based on DESIGN.md §6.3):
|
|
54
|
+
* - user_deleted_code: User deleted what agent wrote → strong negative (-0.8)
|
|
55
|
+
* - user_modified_output: User modified agent's output → moderate negative (-0.5)
|
|
56
|
+
* - user_repeated_instruction: User repeated same command → mild negative (-0.3)
|
|
57
|
+
* - user_immediate_continue: User immediately continued without edit → positive (+0.3)
|
|
58
|
+
* - user_used_result: User referenced agent's output later → strong positive (+0.7)
|
|
59
|
+
* - user_silence_followed_by_praise: Gap then "谢谢" → mild positive (+0.2)
|
|
60
|
+
* - user_interrupted: User stopped agent mid-execution → negative (-0.6)
|
|
61
|
+
* - agent_self_corrected: Agent caught its own mistake → mild positive for agent (+0.3)
|
|
62
|
+
*/
|
|
63
|
+
class ImplicitFeedbackEngine {
|
|
64
|
+
feedbackLog = [];
|
|
65
|
+
/**
|
|
66
|
+
* Record an implicit feedback signal.
|
|
67
|
+
*
|
|
68
|
+
* @param signal - Type of implicit signal detected
|
|
69
|
+
* @param sessionId - Session where signal was detected
|
|
70
|
+
* @param operationId - Related tool call or request ID
|
|
71
|
+
* @param confidence - How confident we are about this interpretation (0-1)
|
|
72
|
+
* @param source - Where the signal was detected (audit_log, message_pattern, diff)
|
|
73
|
+
*/
|
|
74
|
+
record(signal, sessionId, operationId, confidence = 0.8, source = 'auto-detected') {
|
|
75
|
+
const strength = this.getSignalStrength(signal);
|
|
76
|
+
const feedback = {
|
|
77
|
+
id: generateFeedbackId(),
|
|
78
|
+
timestamp: Date.now(),
|
|
79
|
+
signal,
|
|
80
|
+
strength,
|
|
81
|
+
confidence,
|
|
82
|
+
sessionId,
|
|
83
|
+
operationId,
|
|
84
|
+
source,
|
|
85
|
+
};
|
|
86
|
+
this.feedbackLog.push(feedback);
|
|
87
|
+
return feedback;
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Get the default strength for a signal type.
|
|
91
|
+
*/
|
|
92
|
+
getSignalStrength(signal) {
|
|
93
|
+
switch (signal) {
|
|
94
|
+
case 'user_deleted_code': return -0.8;
|
|
95
|
+
case 'user_interrupted': return -0.6;
|
|
96
|
+
case 'user_provided_correction': return -0.7;
|
|
97
|
+
case 'user_modified_output': return -0.5;
|
|
98
|
+
case 'user_repeated_instruction': return -0.3;
|
|
99
|
+
case 'user_ignored_result': return -0.4;
|
|
100
|
+
case 'user_silence_then_praise': return 0.2;
|
|
101
|
+
case 'user_immediate_continue': return 0.3;
|
|
102
|
+
case 'agent_self_corrected': return 0.3;
|
|
103
|
+
case 'user_explicit_approval': return 0.6;
|
|
104
|
+
case 'user_used_result': return 0.7;
|
|
105
|
+
case 'user_shared_output': return 0.8; // User shared agent output → strong positive
|
|
106
|
+
default: return 0;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Compute the aggregate satisfaction score from all feedback.
|
|
111
|
+
*
|
|
112
|
+
* Weighted by confidence and recency (newer signals matter more).
|
|
113
|
+
* Returns a score from -1.0 (very unhappy) to +1.0 (very happy).
|
|
114
|
+
*/
|
|
115
|
+
getSatisfactionScore(sessionId, recentHours = 24) {
|
|
116
|
+
let relevant = this.feedbackLog;
|
|
117
|
+
if (sessionId) {
|
|
118
|
+
relevant = relevant.filter((f) => f.sessionId === sessionId);
|
|
119
|
+
}
|
|
120
|
+
// Only consider recent signals
|
|
121
|
+
const cutoff = Date.now() - recentHours * 60 * 60 * 1000;
|
|
122
|
+
relevant = relevant.filter((f) => f.timestamp >= cutoff);
|
|
123
|
+
if (relevant.length === 0)
|
|
124
|
+
return 0;
|
|
125
|
+
let weightedSum = 0;
|
|
126
|
+
let totalWeight = 0;
|
|
127
|
+
for (const fb of relevant) {
|
|
128
|
+
// Recency weight: newer = more important
|
|
129
|
+
const ageHours = (Date.now() - fb.timestamp) / (60 * 60 * 1000);
|
|
130
|
+
const recencyWeight = Math.max(0.1, 1 - ageHours / recentHours);
|
|
131
|
+
const weight = fb.confidence * recencyWeight;
|
|
132
|
+
weightedSum += fb.strength * weight;
|
|
133
|
+
totalWeight += weight;
|
|
134
|
+
}
|
|
135
|
+
return totalWeight > 0
|
|
136
|
+
? Math.round((weightedSum / totalWeight) * 100) / 100
|
|
137
|
+
: 0;
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* Get feedback events, optionally filtered.
|
|
141
|
+
*/
|
|
142
|
+
query(filter = {}) {
|
|
143
|
+
let results = this.feedbackLog;
|
|
144
|
+
if (filter.signal) {
|
|
145
|
+
results = results.filter((f) => f.signal === filter.signal);
|
|
146
|
+
}
|
|
147
|
+
if (filter.sessionId) {
|
|
148
|
+
results = results.filter((f) => f.sessionId === filter.sessionId);
|
|
149
|
+
}
|
|
150
|
+
if (filter.minStrength !== undefined) {
|
|
151
|
+
results = results.filter((f) => f.strength >= filter.minStrength);
|
|
152
|
+
}
|
|
153
|
+
if (filter.maxStrength !== undefined) {
|
|
154
|
+
results = results.filter((f) => f.strength <= filter.maxStrength);
|
|
155
|
+
}
|
|
156
|
+
if (filter.since !== undefined) {
|
|
157
|
+
results = results.filter((f) => f.timestamp >= filter.since);
|
|
158
|
+
}
|
|
159
|
+
results.sort((a, b) => b.timestamp - a.timestamp);
|
|
160
|
+
const limit = filter.limit ?? 50;
|
|
161
|
+
return results.slice(0, limit);
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Get feedback summary statistics.
|
|
165
|
+
*/
|
|
166
|
+
stats() {
|
|
167
|
+
const positive = this.feedbackLog.filter((f) => f.strength > 0);
|
|
168
|
+
const negative = this.feedbackLog.filter((f) => f.strength < 0);
|
|
169
|
+
const avgStrength = this.feedbackLog.length > 0
|
|
170
|
+
? this.feedbackLog.reduce((s, f) => s + f.strength, 0) / this.feedbackLog.length
|
|
171
|
+
: 0;
|
|
172
|
+
// Most common signal
|
|
173
|
+
const counts = new Map();
|
|
174
|
+
for (const fb of this.feedbackLog) {
|
|
175
|
+
counts.set(fb.signal, (counts.get(fb.signal) || 0) + 1);
|
|
176
|
+
}
|
|
177
|
+
let mostCommon = null;
|
|
178
|
+
let maxCount = 0;
|
|
179
|
+
for (const [sig, count] of counts) {
|
|
180
|
+
if (count > maxCount) {
|
|
181
|
+
maxCount = count;
|
|
182
|
+
mostCommon = sig;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
return {
|
|
186
|
+
totalSignals: this.feedbackLog.length,
|
|
187
|
+
positiveSignals: positive.length,
|
|
188
|
+
negativeSignals: negative.length,
|
|
189
|
+
averageStrength: Math.round(avgStrength * 100) / 100,
|
|
190
|
+
mostCommonSignal: mostCommon,
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
exports.ImplicitFeedbackEngine = ImplicitFeedbackEngine;
|
|
195
|
+
//# sourceMappingURL=feedback.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"feedback.js","sourceRoot":"","sources":["../../src/evaluator/feedback.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AACA,+CAAiC;AAEjC;;GAEG;AACH,SAAS,kBAAkB;IACzB,OAAO,MAAM,IAAI,CAAC,GAAG,EAAE,IAAI,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;AACrE,CAAC;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAa,sBAAsB;IACzB,WAAW,GAAuB,EAAE,CAAC;IAE7C;;;;;;;;OAQG;IACH,MAAM,CACJ,MAAkB,EAClB,SAAiB,EACjB,WAAoB,EACpB,UAAU,GAAG,GAAG,EAChB,MAAM,GAAG,eAAe;QAExB,MAAM,QAAQ,GAAG,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC;QAEhD,MAAM,QAAQ,GAAqB;YACjC,EAAE,EAAE,kBAAkB,EAAE;YACxB,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;YACrB,MAAM;YACN,QAAQ;YACR,UAAU;YACV,SAAS;YACT,WAAW;YACX,MAAM;SACP,CAAC;QAEF,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAChC,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACK,iBAAiB,CAAC,MAAkB;QAC1C,QAAQ,MAAM,EAAE,CAAC;YACf,KAAK,mBAAmB,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC;YACtC,KAAK,kBAAkB,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC;YACrC,KAAK,0BAA0B,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC;YAC7C,KAAK,sBAAsB,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC;YACzC,KAAK,2BAA2B,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC;YAC9C,KAAK,qBAAqB,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC;YACxC,KAAK,0BAA0B,CAAC,CAAC,OAAO,GAAG,CAAC;YAC5C,KAAK,yBAAyB,CAAC,CAAC,OAAO,GAAG,CAAC;YAC3C,KAAK,sBAAsB,CAAC,CAAC,OAAO,GAAG,CAAC;YACxC,KAAK,wBAAwB,CAAC,CAAC,OAAO,GAAG,CAAC;YAC1C,KAAK,kBAAkB,CAAC,CAAC,OAAO,GAAG,CAAC;YACpC,KAAK,oBAAoB,CAAC,CAAC,OAAO,GAAG,CAAC,CAAC,6CAA6C;YACpF,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC;QACpB,CAAC;IACH,CAAC;IAED;;;;;OAKG;IACH,oBAAoB,CAAC,SAAkB,EAAE,WAAW,GAAG,EAAE;QACvD,IAAI,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC;QAEhC,IAAI,SAAS,EAAE,CAAC;YACd,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC;QAC/D,CAAC;QAED,+BAA+B;QAC/B,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,WAAW,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;QACzD,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,IAAI,MAAM,CAAC,CAAC;QAEzD,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC;QAEpC,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,IAAI,WAAW,GAAG,CAAC,CAAC;QAEpB,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;YAC1B,yCAAyC;YACzC,MAAM,QAAQ,GAAG,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,CAAC;YAChE,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,QAAQ,GAAG,WAAW,CAAC,CAAC;YAEhE,MAAM,MAAM,GAAG,EAAE,CAAC,UAAU,GAAG,aAAa,CAAC;YAC7C,WAAW,IAAI,EAAE,CAAC,QAAQ,GAAG,MAAM,CAAC;YACpC,WAAW,IAAI,MAAM,CAAC;QACxB,CAAC;QAED,OAAO,WAAW,GAAG,CAAC;YACpB,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,WAAW,GAAG,WAAW,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG;YACrD,CAAC,CAAC,CAAC,CAAC;IACR,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,SAOF,EAAE;QACJ,IAAI,OAAO,GAAG,IAAI,CAAC,WAAW,CAAC;QAE/B,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAClB,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,MAAM,CAAC,CAAC;QAC9D,CAAC;QACD,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;YACrB,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,MAAM,CAAC,SAAS,CAAC,CAAC;QACpE,CAAC;QACD,IAAI,MAAM,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;YACrC,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,IAAI,MAAM,CAAC,WAAY,CAAC,CAAC;QACrE,CAAC;QACD,IAAI,MAAM,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;YACrC,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,IAAI,MAAM,CAAC,WAAY,CAAC,CAAC;QACrE,CAAC;QACD,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;YAC/B,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,IAAI,MAAM,CAAC,KAAM,CAAC,CAAC;QAChE,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC;QAElD,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC;QACjC,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IACjC,CAAC;IAED;;OAEG;IACH,KAAK;QAOH,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC;QAChE,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC;QAChE,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC;YAC7C,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM;YAChF,CAAC,CAAC,CAAC,CAAC;QAEN,qBAAqB;QACrB,MAAM,MAAM,GAAG,IAAI,GAAG,EAAsB,CAAC;QAC7C,KAAK,MAAM,EAAE,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YAClC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC1D,CAAC;QACD,IAAI,UAAU,GAAsB,IAAI,CAAC;QACzC,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,EAAE,CAAC;YAClC,IAAI,KAAK,GAAG,QAAQ,EAAE,CAAC;gBACrB,QAAQ,GAAG,KAAK,CAAC;gBACjB,UAAU,GAAG,GAAG,CAAC;YACnB,CAAC;QACH,CAAC;QAED,OAAO;YACL,YAAY,EAAE,IAAI,CAAC,WAAW,CAAC,MAAM;YACrC,eAAe,EAAE,QAAQ,CAAC,MAAM;YAChC,eAAe,EAAE,QAAQ,CAAC,MAAM;YAChC,eAAe,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW,GAAG,GAAG,CAAC,GAAG,GAAG;YACpD,gBAAgB,EAAE,UAAU;SAC7B,CAAC;IACJ,CAAC;CACF;AAvKD,wDAuKC"}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import { PreExecMetrics, RuntimeMetrics, PostExecMetrics } from '../types';
|
|
2
|
+
import { ImplicitFeedbackEngine } from './feedback';
|
|
3
|
+
/**
|
|
4
|
+
* Agent quality profile — accumulated across all evaluations.
|
|
5
|
+
*/
|
|
6
|
+
export interface AgentProfile {
|
|
7
|
+
/** Overall quality score (0-100) */
|
|
8
|
+
overallScore: number;
|
|
9
|
+
/** Number of operations evaluated */
|
|
10
|
+
totalOps: number;
|
|
11
|
+
/** Score breakdown by metric category */
|
|
12
|
+
breakdown: {
|
|
13
|
+
preExec: number;
|
|
14
|
+
runtime: number;
|
|
15
|
+
postExec: number;
|
|
16
|
+
userSatisfaction: number;
|
|
17
|
+
};
|
|
18
|
+
/** Trend data */
|
|
19
|
+
trends: {
|
|
20
|
+
improving: boolean;
|
|
21
|
+
recentOps: number;
|
|
22
|
+
recentScore: number;
|
|
23
|
+
};
|
|
24
|
+
/** Areas needing attention */
|
|
25
|
+
warnings: string[];
|
|
26
|
+
/** Kudos for good patterns */
|
|
27
|
+
strengths: string[];
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* AgentProfiler — builds and maintains the agent's quality profile.
|
|
31
|
+
*
|
|
32
|
+
* Aggregates PreExec + Runtime + PostExec metrics and
|
|
33
|
+
* ImplicitFeedback to produce a composite quality score
|
|
34
|
+
* that improves over time through self-correction.
|
|
35
|
+
*/
|
|
36
|
+
export declare class AgentProfiler {
|
|
37
|
+
private feedbackEngine;
|
|
38
|
+
private preMetrics;
|
|
39
|
+
private runMetrics;
|
|
40
|
+
private postMetrics;
|
|
41
|
+
private sessionScores;
|
|
42
|
+
constructor(feedbackEngine: ImplicitFeedbackEngine);
|
|
43
|
+
/**
|
|
44
|
+
* Record a complete evaluation cycle for one tool call.
|
|
45
|
+
*/
|
|
46
|
+
recordCycle(sessionId: string, pre: PreExecMetrics, run: RuntimeMetrics, post: PostExecMetrics): void;
|
|
47
|
+
/**
|
|
48
|
+
* Build the current agent profile.
|
|
49
|
+
*/
|
|
50
|
+
getProfile(sessionId?: string): AgentProfile;
|
|
51
|
+
private average;
|
|
52
|
+
}
|
|
53
|
+
//# sourceMappingURL=profiler.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"profiler.d.ts","sourceRoot":"","sources":["../../src/evaluator/profiler.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,cAAc,EACd,cAAc,EACd,eAAe,EAChB,MAAM,UAAU,CAAC;AAClB,OAAO,EAAE,sBAAsB,EAAE,MAAM,YAAY,CAAC;AAEpD;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,oCAAoC;IACpC,YAAY,EAAE,MAAM,CAAC;IACrB,qCAAqC;IACrC,QAAQ,EAAE,MAAM,CAAC;IACjB,yCAAyC;IACzC,SAAS,EAAE;QACT,OAAO,EAAE,MAAM,CAAC;QAChB,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE,MAAM,CAAC;QACjB,gBAAgB,EAAE,MAAM,CAAC;KAC1B,CAAC;IACF,iBAAiB;IACjB,MAAM,EAAE;QACN,SAAS,EAAE,OAAO,CAAC;QACnB,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;KACrB,CAAC;IACF,8BAA8B;IAC9B,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,8BAA8B;IAC9B,SAAS,EAAE,MAAM,EAAE,CAAC;CACrB;AAED;;;;;;GAMG;AACH,qBAAa,aAAa;IACxB,OAAO,CAAC,cAAc,CAAyB;IAE/C,OAAO,CAAC,UAAU,CAAwB;IAC1C,OAAO,CAAC,UAAU,CAAwB;IAC1C,OAAO,CAAC,WAAW,CAAyB;IAC5C,OAAO,CAAC,aAAa,CAAoC;gBAE7C,cAAc,EAAE,sBAAsB;IAIlD;;OAEG;IACH,WAAW,CAAC,SAAS,EAAE,MAAM,EAAE,GAAG,EAAE,cAAc,EAAE,GAAG,EAAE,cAAc,EAAE,IAAI,EAAE,eAAe,GAAG,IAAI;IAWrG;;OAEG;IACH,UAAU,CAAC,SAAS,CAAC,EAAE,MAAM,GAAG,YAAY;IA0F5C,OAAO,CAAC,OAAO;CAKhB"}
|