sentinel-agentos 0.3.8 → 0.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +1797 -0
- package/dist/adapters/evaluation-bridge.d.ts +78 -0
- package/dist/adapters/evaluation-bridge.d.ts.map +1 -0
- package/dist/adapters/evaluation-bridge.js +273 -0
- package/dist/adapters/evaluation-bridge.js.map +1 -0
- package/dist/adapters/memory-bridge.d.ts +110 -0
- package/dist/adapters/memory-bridge.d.ts.map +1 -0
- package/dist/adapters/memory-bridge.js +316 -0
- package/dist/adapters/memory-bridge.js.map +1 -0
- package/dist/adapters/migrate.d.ts +2 -0
- package/dist/adapters/migrate.d.ts.map +1 -0
- package/dist/adapters/migrate.js +63 -0
- package/dist/adapters/migrate.js.map +1 -0
- package/dist/api.d.ts +151 -0
- package/dist/api.d.ts.map +1 -0
- package/dist/api.js +179 -0
- package/dist/api.js.map +1 -0
- package/dist/cli.d.ts +16 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +350 -0
- package/dist/cli.js.map +1 -0
- package/dist/core.d.ts +156 -0
- package/dist/core.d.ts.map +1 -0
- package/dist/core.js +400 -0
- package/dist/core.js.map +1 -0
- package/dist/evaluator/exec-evaluator.d.ts +102 -0
- package/dist/evaluator/exec-evaluator.d.ts.map +1 -0
- package/dist/evaluator/exec-evaluator.js +266 -0
- package/dist/evaluator/exec-evaluator.js.map +1 -0
- package/dist/evaluator/feedback.d.ts +102 -0
- package/dist/evaluator/feedback.d.ts.map +1 -0
- package/dist/evaluator/feedback.js +478 -0
- package/dist/evaluator/feedback.js.map +1 -0
- package/dist/evaluator/profiler.d.ts +56 -0
- package/dist/evaluator/profiler.d.ts.map +1 -0
- package/dist/evaluator/profiler.js +140 -0
- package/dist/evaluator/profiler.js.map +1 -0
- package/dist/guard/audit-log.d.ts +48 -0
- package/dist/guard/audit-log.d.ts.map +1 -0
- package/dist/guard/audit-log.js +213 -0
- package/dist/guard/audit-log.js.map +1 -0
- package/dist/guard/container-sandbox.d.ts +25 -0
- package/dist/guard/container-sandbox.d.ts.map +1 -0
- package/dist/guard/container-sandbox.js +145 -0
- package/dist/guard/container-sandbox.js.map +1 -0
- package/dist/guard/risk-gate.d.ts +101 -0
- package/dist/guard/risk-gate.d.ts.map +1 -0
- package/dist/guard/risk-gate.js +200 -0
- package/dist/guard/risk-gate.js.map +1 -0
- package/dist/guard/sandbox.d.ts +112 -0
- package/dist/guard/sandbox.d.ts.map +1 -0
- package/dist/guard/sandbox.js +379 -0
- package/dist/guard/sandbox.js.map +1 -0
- package/dist/guard/schema-gate.d.ts +90 -0
- package/dist/guard/schema-gate.d.ts.map +1 -0
- package/dist/guard/schema-gate.js +452 -0
- package/dist/guard/schema-gate.js.map +1 -0
- package/dist/guard/snapshot-verify.d.ts +111 -0
- package/dist/guard/snapshot-verify.d.ts.map +1 -0
- package/dist/guard/snapshot-verify.js +571 -0
- package/dist/guard/snapshot-verify.js.map +1 -0
- package/dist/index.d.ts +28 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +59 -0
- package/dist/index.js.map +1 -0
- package/dist/memory/episodic.d.ts +80 -0
- package/dist/memory/episodic.d.ts.map +1 -0
- package/dist/memory/episodic.js +305 -0
- package/dist/memory/episodic.js.map +1 -0
- package/dist/memory/semantic.d.ts +68 -0
- package/dist/memory/semantic.d.ts.map +1 -0
- package/dist/memory/semantic.js +299 -0
- package/dist/memory/semantic.js.map +1 -0
- package/dist/memory/working.d.ts +53 -0
- package/dist/memory/working.d.ts.map +1 -0
- package/dist/memory/working.js +166 -0
- package/dist/memory/working.js.map +1 -0
- package/dist/middleware/openclaw.d.ts +45 -0
- package/dist/middleware/openclaw.d.ts.map +1 -0
- package/dist/middleware/openclaw.js +95 -0
- package/dist/middleware/openclaw.js.map +1 -0
- package/dist/middleware/wrapper.d.ts +54 -0
- package/dist/middleware/wrapper.d.ts.map +1 -0
- package/dist/middleware/wrapper.js +155 -0
- package/dist/middleware/wrapper.js.map +1 -0
- package/dist/server.d.ts +45 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +256 -0
- package/dist/server.js.map +1 -0
- package/dist/src/dashboard.html +9 -7
- package/dist/types/index.d.ts +228 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +23 -0
- package/dist/types/index.js.map +1 -0
- package/package.json +1 -1
- package/scripts/sentinel-light.js +234 -0
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { PreExecMetrics, RuntimeMetrics, PostExecMetrics } from '../types';
|
|
2
|
+
import { SchemaGate } from '../guard/schema-gate';
|
|
3
|
+
import { RiskGate } from '../guard/risk-gate';
|
|
4
|
+
import { WorkingMemory } from '../memory/working';
|
|
5
|
+
/**
|
|
6
|
+
* PreExecEvaluator — captures metrics before tool execution.
|
|
7
|
+
*
|
|
8
|
+
* Watches the Guard layer output and WorkingMemory context
|
|
9
|
+
* to score parameter quality, context utilization, and risk.
|
|
10
|
+
*/
|
|
11
|
+
export declare class PreExecEvaluator {
|
|
12
|
+
private schemaGate;
|
|
13
|
+
private riskGate;
|
|
14
|
+
private workingMemory;
|
|
15
|
+
constructor(schemaGate: SchemaGate, riskGate: RiskGate, workingMemory: WorkingMemory);
|
|
16
|
+
/**
|
|
17
|
+
* Evaluate a tool call before execution.
|
|
18
|
+
*/
|
|
19
|
+
evaluate(toolName: string, parameters: Record<string, unknown>): PreExecMetrics;
|
|
20
|
+
/**
|
|
21
|
+
* Score parameter quality based on contextual awareness.
|
|
22
|
+
*
|
|
23
|
+
* High quality: path contains session-relevant project paths,
|
|
24
|
+
* content references open files, etc.
|
|
25
|
+
* Low quality: bare strings, random-looking paths, missing files.
|
|
26
|
+
*/
|
|
27
|
+
private evaluateParamQuality;
|
|
28
|
+
/**
|
|
29
|
+
* Score how well the agent uses stored context.
|
|
30
|
+
*/
|
|
31
|
+
private evaluateContextUtilization;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* RuntimeEvaluator — captures metrics during execution.
|
|
35
|
+
*
|
|
36
|
+
* Tracks retries, self-corrections, timeouts, and
|
|
37
|
+
* whether the agent selected the right tool for the job.
|
|
38
|
+
*/
|
|
39
|
+
export declare class RuntimeEvaluator {
|
|
40
|
+
/** Historical tool selection patterns — toolName -> successful scenario count */
|
|
41
|
+
private toolHistory;
|
|
42
|
+
/**
|
|
43
|
+
* Evaluate a completed tool execution.
|
|
44
|
+
*/
|
|
45
|
+
evaluate(options: {
|
|
46
|
+
toolName: string;
|
|
47
|
+
startTime: number;
|
|
48
|
+
endTime: number;
|
|
49
|
+
retryCount: number;
|
|
50
|
+
wasSelfCorrected: boolean;
|
|
51
|
+
hadTimeout: boolean;
|
|
52
|
+
expectedTool?: string;
|
|
53
|
+
toolResult: unknown;
|
|
54
|
+
}): RuntimeMetrics;
|
|
55
|
+
/** Record a tool call in the history tracker */
|
|
56
|
+
private recordToolCall;
|
|
57
|
+
/** Get tool selection accuracy statistics */
|
|
58
|
+
getToolAccuracy(): Record<string, {
|
|
59
|
+
calls: number;
|
|
60
|
+
successRate: number;
|
|
61
|
+
}>;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* PostExecEvaluator — captures metrics after execution.
|
|
65
|
+
*
|
|
66
|
+
* Scores verify results, user acceptance patterns,
|
|
67
|
+
* and checks if the agent actually used its own result later.
|
|
68
|
+
*/
|
|
69
|
+
export declare class PostExecEvaluator {
|
|
70
|
+
/** Track result references for utilization scoring */
|
|
71
|
+
private resultReferenceTracker;
|
|
72
|
+
/**
|
|
73
|
+
* Evaluate post-execution outcomes.
|
|
74
|
+
*/
|
|
75
|
+
evaluate(options: {
|
|
76
|
+
verifyPassed: boolean;
|
|
77
|
+
verifyChecks: number;
|
|
78
|
+
verifyFailures: number;
|
|
79
|
+
userAccepted: boolean;
|
|
80
|
+
userProvidedEdit: boolean;
|
|
81
|
+
resultWasUsed: boolean;
|
|
82
|
+
diffLinesChanged?: number;
|
|
83
|
+
}): PostExecMetrics;
|
|
84
|
+
/**
|
|
85
|
+
* Track a tool result for later utilization detection.
|
|
86
|
+
* Call this after each tool execution.
|
|
87
|
+
*/
|
|
88
|
+
trackResult(operationId: string, result: unknown): void;
|
|
89
|
+
/**
|
|
90
|
+
* Mark a previously-tracked result as referenced (used by the agent later).
|
|
91
|
+
*/
|
|
92
|
+
markResultReferenced(operationId: string): void;
|
|
93
|
+
/**
|
|
94
|
+
* Check if a result has been utilized by the agent.
|
|
95
|
+
*/
|
|
96
|
+
isResultReferenced(operationId: string): boolean;
|
|
97
|
+
/**
|
|
98
|
+
* Get overall result utilization rate.
|
|
99
|
+
*/
|
|
100
|
+
getUtilizationRate(): number;
|
|
101
|
+
}
|
|
102
|
+
//# sourceMappingURL=exec-evaluator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"exec-evaluator.d.ts","sourceRoot":"","sources":["../../src/evaluator/exec-evaluator.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,cAAc,EACd,cAAc,EACd,eAAe,EAGhB,MAAM,UAAU,CAAC;AAClB,OAAO,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AAClD,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAElD;;;;;GAKG;AACH,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,QAAQ,CAAW;IAC3B,OAAO,CAAC,aAAa,CAAgB;gBAGnC,UAAU,EAAE,UAAU,EACtB,QAAQ,EAAE,QAAQ,EAClB,aAAa,EAAE,aAAa;IAO9B;;OAEG;IACH,QAAQ,CACN,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAClC,cAAc;IAuBjB;;;;;;OAMG;IACH,OAAO,CAAC,oBAAoB;IA+C5B;;OAEG;IACH,OAAO,CAAC,0BAA0B;CAqCnC;AAED;;;;;GAKG;AACH,qBAAa,gBAAgB;IAC3B,iFAAiF;IACjF,OAAO,CAAC,WAAW,CAAgE;IAEnF;;OAEG;IACH,QAAQ,CAAC,OAAO,EAAE;QAChB,QAAQ,EAAE,MAAM,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;QAClB,OAAO,EAAE,MAAM,CAAC;QAChB,UAAU,EAAE,MAAM,CAAC;QACnB,gBAAgB,EAAE,OAAO,CAAC;QAC1B,UAAU,EAAE,OAAO,CAAC;QACpB,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,UAAU,EAAE,OAAO,CAAC;KACrB,GAAG,cAAc;IA0ClB,gDAAgD;IAChD,OAAO,CAAC,cAAc;IAUtB,6CAA6C;IAC7C,eAAe,IAAI,MAAM,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAE,CAAC;CAY1E;AAED;;;;;GAKG;AACH,qBAAa,iBAAiB;IAC5B,sDAAsD;IACtD,OAAO,CAAC,sBAAsB,CAAoE;IAElG;;OAEG;IACH,QAAQ,CAAC,OAAO,EAAE;QAChB,YAAY,EAAE,OAAO,CAAC;QACtB,YAAY,EAAE,MAAM,CAAC;QACrB,cAAc,EAAE,MAAM,CAAC;QACvB,YAAY,EAAE,OAAO,CAAC;QACtB,gBAAgB,EAAE,OAAO,CAAC;QAC1B,aAAa,EAAE,OAAO,CAAC;QACvB,gBAAgB,CAAC,EAAE,MAAM,CAAC;KAC3B,GAAG,eAAe;IA+BnB;;;OAGG;IACH,WAAW,CAAC,WAAW,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,GAAG,IAAI;IAIvD;;OAEG;IACH,oBAAoB,CAAC,WAAW,EAAE,MAAM,GAAG,IAAI;IAK/C;;OAEG;IACH,kBAAkB,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO;IAIhD;;OAEG;IACH,kBAAkB,IAAI,MAAM;CAM7B"}
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.PostExecEvaluator = exports.RuntimeEvaluator = exports.PreExecEvaluator = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* PreExecEvaluator — captures metrics before tool execution.
|
|
6
|
+
*
|
|
7
|
+
* Watches the Guard layer output and WorkingMemory context
|
|
8
|
+
* to score parameter quality, context utilization, and risk.
|
|
9
|
+
*/
|
|
10
|
+
class PreExecEvaluator {
|
|
11
|
+
schemaGate;
|
|
12
|
+
riskGate;
|
|
13
|
+
workingMemory;
|
|
14
|
+
constructor(schemaGate, riskGate, workingMemory) {
|
|
15
|
+
this.schemaGate = schemaGate;
|
|
16
|
+
this.riskGate = riskGate;
|
|
17
|
+
this.workingMemory = workingMemory;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Evaluate a tool call before execution.
|
|
21
|
+
*/
|
|
22
|
+
evaluate(toolName, parameters) {
|
|
23
|
+
// 1. Schema check
|
|
24
|
+
const schemaCheck = this.schemaGate.check(toolName, parameters);
|
|
25
|
+
// 2. Risk assessment
|
|
26
|
+
const riskScore = this.riskGate.evaluate(toolName, parameters);
|
|
27
|
+
// 3. Parameter quality: does the agent use context-aware params?
|
|
28
|
+
const paramQuality = this.evaluateParamQuality(toolName, parameters);
|
|
29
|
+
// 4. Context utilization: is the agent leveraging WorkingMemory?
|
|
30
|
+
const contextUtilization = this.evaluateContextUtilization(toolName, parameters);
|
|
31
|
+
return {
|
|
32
|
+
timestamp: Date.now(),
|
|
33
|
+
toolName,
|
|
34
|
+
schemaCheck,
|
|
35
|
+
riskScore,
|
|
36
|
+
paramQuality,
|
|
37
|
+
contextUtilization,
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Score parameter quality based on contextual awareness.
|
|
42
|
+
*
|
|
43
|
+
* High quality: path contains session-relevant project paths,
|
|
44
|
+
* content references open files, etc.
|
|
45
|
+
* Low quality: bare strings, random-looking paths, missing files.
|
|
46
|
+
*/
|
|
47
|
+
evaluateParamQuality(_toolName, parameters) {
|
|
48
|
+
let score = 0.5; // neutral start
|
|
49
|
+
const observations = [];
|
|
50
|
+
// Check if path references an open file
|
|
51
|
+
if (typeof parameters['path'] === 'string') {
|
|
52
|
+
const path = parameters['path'];
|
|
53
|
+
if (this.workingMemory.openFiles.some((f) => path.includes(f))) {
|
|
54
|
+
score += 0.3;
|
|
55
|
+
observations.push('Path references an open file');
|
|
56
|
+
}
|
|
57
|
+
if (path.startsWith('/') || path.match(/^[A-Z]:\\/)) {
|
|
58
|
+
observations.push('Absolute path used');
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
// Check if content parameter is meaningful
|
|
62
|
+
if (typeof parameters['content'] === 'string') {
|
|
63
|
+
const content = parameters['content'];
|
|
64
|
+
if (content.length > 20) {
|
|
65
|
+
score = Math.min(1.0, score + 0.1);
|
|
66
|
+
}
|
|
67
|
+
if (content.length === 0) {
|
|
68
|
+
score -= 0.2;
|
|
69
|
+
observations.push('Empty content — possible error');
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
// Check for file paths in multiple parameters
|
|
73
|
+
const filePaths = Object.values(parameters).filter((v) => typeof v === 'string' && (v.includes('.ts') || v.includes('.js') || v.includes('.json')));
|
|
74
|
+
if (filePaths.length > 1) {
|
|
75
|
+
score = Math.min(1.0, score + 0.1);
|
|
76
|
+
observations.push('Multiple file references — coordinated operation');
|
|
77
|
+
}
|
|
78
|
+
return {
|
|
79
|
+
score: Math.round(Math.max(0, Math.min(1, score)) * 100) / 100,
|
|
80
|
+
observations,
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Score how well the agent uses stored context.
|
|
85
|
+
*/
|
|
86
|
+
evaluateContextUtilization(_toolName, parameters) {
|
|
87
|
+
let score = 0.4;
|
|
88
|
+
const patterns = [];
|
|
89
|
+
// Check if agent references recent messages
|
|
90
|
+
if (this.workingMemory.recentMessages.length > 0) {
|
|
91
|
+
score += 0.1;
|
|
92
|
+
patterns.push(`${this.workingMemory.recentMessages.length} recent messages available`);
|
|
93
|
+
}
|
|
94
|
+
// Check if agent uses cached tool results
|
|
95
|
+
const cachedCount = this.workingMemory.recentToolResults.size;
|
|
96
|
+
if (cachedCount > 0) {
|
|
97
|
+
score += 0.1;
|
|
98
|
+
patterns.push(`${cachedCount} cached results available`);
|
|
99
|
+
}
|
|
100
|
+
// Check parameter values for context patterns
|
|
101
|
+
const allValues = Object.values(parameters).map(String).join(' ');
|
|
102
|
+
for (const msg of this.workingMemory.recentMessages.slice(-3)) {
|
|
103
|
+
const words = msg.content.split(/\s+/).filter((w) => w.length > 3);
|
|
104
|
+
for (const word of words.slice(0, 5)) {
|
|
105
|
+
if (allValues.includes(word)) {
|
|
106
|
+
score += 0.1;
|
|
107
|
+
patterns.push(`Parameter references recent context: "${word}"`);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return {
|
|
112
|
+
score: Math.round(Math.min(1.0, score) * 100) / 100,
|
|
113
|
+
patterns,
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
exports.PreExecEvaluator = PreExecEvaluator;
|
|
118
|
+
/**
|
|
119
|
+
* RuntimeEvaluator — captures metrics during execution.
|
|
120
|
+
*
|
|
121
|
+
* Tracks retries, self-corrections, timeouts, and
|
|
122
|
+
* whether the agent selected the right tool for the job.
|
|
123
|
+
*/
|
|
124
|
+
class RuntimeEvaluator {
|
|
125
|
+
/** Historical tool selection patterns — toolName -> successful scenario count */
|
|
126
|
+
toolHistory = new Map();
|
|
127
|
+
/**
|
|
128
|
+
* Evaluate a completed tool execution.
|
|
129
|
+
*/
|
|
130
|
+
evaluate(options) {
|
|
131
|
+
const durationMs = options.endTime - options.startTime;
|
|
132
|
+
const toolSuccess = !options.hadTimeout && options.toolResult !== undefined;
|
|
133
|
+
// Tool selection accuracy: compare against historical patterns
|
|
134
|
+
let toolSelectionMatch;
|
|
135
|
+
if (options.expectedTool) {
|
|
136
|
+
// Direct comparison if expectedTool is provided
|
|
137
|
+
toolSelectionMatch = options.toolName === options.expectedTool;
|
|
138
|
+
}
|
|
139
|
+
else {
|
|
140
|
+
// Auto-detect: is this tool historically successful for similar params?
|
|
141
|
+
const history = this.toolHistory.get(options.toolName);
|
|
142
|
+
if (history) {
|
|
143
|
+
const historicalSuccessRate = history.calls > 0
|
|
144
|
+
? history.successes / history.calls
|
|
145
|
+
: 0;
|
|
146
|
+
// If this tool has >70% historical success, consider it a "good" selection
|
|
147
|
+
toolSelectionMatch = historicalSuccessRate > 0.7 ? true : undefined;
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
// Record this call in history
|
|
151
|
+
this.recordToolCall(options.toolName, toolSuccess);
|
|
152
|
+
// Adaptive score: composite of retry rate, timeout, correction
|
|
153
|
+
let adaptiveScore = 1.0;
|
|
154
|
+
adaptiveScore -= options.retryCount * 0.15; // Each retry costs 0.15
|
|
155
|
+
if (options.hadTimeout)
|
|
156
|
+
adaptiveScore -= 0.5;
|
|
157
|
+
if (options.wasSelfCorrected)
|
|
158
|
+
adaptiveScore += 0.2; // Self-correction is good!
|
|
159
|
+
adaptiveScore = Math.max(0, Math.min(1, adaptiveScore));
|
|
160
|
+
return {
|
|
161
|
+
retryCount: options.retryCount,
|
|
162
|
+
selfCorrected: options.wasSelfCorrected,
|
|
163
|
+
hadTimeout: options.hadTimeout,
|
|
164
|
+
toolSuccess,
|
|
165
|
+
toolSelectionMatch,
|
|
166
|
+
adaptiveScore: Math.round(adaptiveScore * 100) / 100,
|
|
167
|
+
durationMs,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
/** Record a tool call in the history tracker */
|
|
171
|
+
recordToolCall(toolName, success) {
|
|
172
|
+
const existing = this.toolHistory.get(toolName);
|
|
173
|
+
if (existing) {
|
|
174
|
+
existing.calls++;
|
|
175
|
+
if (success)
|
|
176
|
+
existing.successes++;
|
|
177
|
+
}
|
|
178
|
+
else {
|
|
179
|
+
this.toolHistory.set(toolName, { calls: 1, successes: success ? 1 : 0 });
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
/** Get tool selection accuracy statistics */
|
|
183
|
+
getToolAccuracy() {
|
|
184
|
+
const result = {};
|
|
185
|
+
for (const [tool, history] of this.toolHistory) {
|
|
186
|
+
result[tool] = {
|
|
187
|
+
calls: history.calls,
|
|
188
|
+
successRate: history.calls > 0
|
|
189
|
+
? Math.round((history.successes / history.calls) * 100) / 100
|
|
190
|
+
: 0,
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
return result;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
exports.RuntimeEvaluator = RuntimeEvaluator;
|
|
197
|
+
/**
|
|
198
|
+
* PostExecEvaluator — captures metrics after execution.
|
|
199
|
+
*
|
|
200
|
+
* Scores verify results, user acceptance patterns,
|
|
201
|
+
* and checks if the agent actually used its own result later.
|
|
202
|
+
*/
|
|
203
|
+
class PostExecEvaluator {
|
|
204
|
+
/** Track result references for utilization scoring */
|
|
205
|
+
resultReferenceTracker = new Map();
|
|
206
|
+
/**
|
|
207
|
+
* Evaluate post-execution outcomes.
|
|
208
|
+
*/
|
|
209
|
+
evaluate(options) {
|
|
210
|
+
// Verify score
|
|
211
|
+
const verifyScore = options.verifyChecks > 0
|
|
212
|
+
? 1 - (options.verifyFailures / options.verifyChecks)
|
|
213
|
+
: 1;
|
|
214
|
+
// User acceptance
|
|
215
|
+
const acceptance = options.userAccepted ? 1.0 : options.userProvidedEdit ? 0.3 : 0.7;
|
|
216
|
+
// Composite outcome score
|
|
217
|
+
const outcomeScore = (verifyScore * 0.3 +
|
|
218
|
+
acceptance * 0.4 +
|
|
219
|
+
(options.resultWasUsed ? 0.3 : 0));
|
|
220
|
+
// Overall health flag
|
|
221
|
+
const healthy = verifyScore > 0.8 && acceptance > 0.5;
|
|
222
|
+
return {
|
|
223
|
+
verifyPassed: options.verifyPassed,
|
|
224
|
+
verifyScore: Math.round(verifyScore * 100) / 100,
|
|
225
|
+
userAccepted: options.userAccepted,
|
|
226
|
+
userEditRate: options.userProvidedEdit ? 1 : 0,
|
|
227
|
+
resultUtilized: options.resultWasUsed,
|
|
228
|
+
outcomeScore: Math.round(outcomeScore * 100) / 100,
|
|
229
|
+
healthy,
|
|
230
|
+
diffLinesChanged: options.diffLinesChanged,
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
/**
|
|
234
|
+
* Track a tool result for later utilization detection.
|
|
235
|
+
* Call this after each tool execution.
|
|
236
|
+
*/
|
|
237
|
+
trackResult(operationId, result) {
|
|
238
|
+
this.resultReferenceTracker.set(operationId, { result, referenced: false });
|
|
239
|
+
}
|
|
240
|
+
/**
|
|
241
|
+
* Mark a previously-tracked result as referenced (used by the agent later).
|
|
242
|
+
*/
|
|
243
|
+
markResultReferenced(operationId) {
|
|
244
|
+
const entry = this.resultReferenceTracker.get(operationId);
|
|
245
|
+
if (entry)
|
|
246
|
+
entry.referenced = true;
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Check if a result has been utilized by the agent.
|
|
250
|
+
*/
|
|
251
|
+
isResultReferenced(operationId) {
|
|
252
|
+
return this.resultReferenceTracker.get(operationId)?.referenced ?? false;
|
|
253
|
+
}
|
|
254
|
+
/**
|
|
255
|
+
* Get overall result utilization rate.
|
|
256
|
+
*/
|
|
257
|
+
getUtilizationRate() {
|
|
258
|
+
const entries = Array.from(this.resultReferenceTracker.values());
|
|
259
|
+
if (entries.length === 0)
|
|
260
|
+
return 0;
|
|
261
|
+
const referenced = entries.filter((e) => e.referenced).length;
|
|
262
|
+
return Math.round((referenced / entries.length) * 100) / 100;
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
exports.PostExecEvaluator = PostExecEvaluator;
|
|
266
|
+
//# sourceMappingURL=exec-evaluator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"exec-evaluator.js","sourceRoot":"","sources":["../../src/evaluator/exec-evaluator.ts"],"names":[],"mappings":";;;AAWA;;;;;GAKG;AACH,MAAa,gBAAgB;IACnB,UAAU,CAAa;IACvB,QAAQ,CAAW;IACnB,aAAa,CAAgB;IAErC,YACE,UAAsB,EACtB,QAAkB,EAClB,aAA4B;QAE5B,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;IACrC,CAAC;IAED;;OAEG;IACH,QAAQ,CACN,QAAgB,EAChB,UAAmC;QAEnC,kBAAkB;QAClB,MAAM,WAAW,GAAgB,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAE7E,qBAAqB;QACrB,MAAM,SAAS,GAAc,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAE1E,iEAAiE;QACjE,MAAM,YAAY,GAAG,IAAI,CAAC,oBAAoB,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAErE,iEAAiE;QACjE,MAAM,kBAAkB,GAAG,IAAI,CAAC,0BAA0B,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAEjF,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;YACrB,QAAQ;YACR,WAAW;YACX,SAAS;YACT,YAAY;YACZ,kBAAkB;SACnB,CAAC;IACJ,CAAC;IAED;;;;;;OAMG;IACK,oBAAoB,CAC1B,SAAiB,EACjB,UAAmC;QAEnC,IAAI,KAAK,GAAG,GAAG,CAAC,CAAC,gBAAgB;QACjC,MAAM,YAAY,GAAa,EAAE,CAAC;QAElC,wCAAwC;QACxC,IAAI,OAAO,UAAU,CAAC,MAAM,CAAC,KAAK,QAAQ,EAAE,CAAC;YAC3C,MAAM,IAAI,GAAG,UAAU,CAAC,MAAM,CAAW,CAAC;YAC1C,IAAI,IAAI,CAAC,aAAa,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC/D,KAAK,IAAI,GAAG,CAAC;gBACb,YAAY,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;YACpD,CAAC;YACD,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,EAAE,CAAC;gBACpD,YAAY,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;YAC1C,CAAC;QACH,CAAC;QAED,2CAA2C;QAC3C,IAAI,OAAO,UAAU,CAAC,SAAS,CAAC,KAAK,QAAQ,EAAE,CAAC;YAC9C,MAAM,OAAO,GAAG,UAAU,CAAC,SAAS,CAAW,CAAC;YAChD,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;gBACxB,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,GAAG,GAAG,CAAC,CAAC;YACrC,CAAC;YACD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACzB,KAAK,IAAI,GAAG,CAAC;gBACb,YAAY,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;YACtD,CAAC;QACH,CAAC;QAED,8CAA8C;QAC9C,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,MAAM,CAChD,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAChG,CAAC;QAEF,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,GAAG,GAAG,CAAC,CAAC;YACnC,YAAY,CAAC,IAAI,CAAC,kDAAkD,CAAC,CAAC;QACxE,CAAC;QAED,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG;YAC9D,YAAY;SACb,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,0BAA0B,CAChC,SAAiB,EACjB,UAAmC;QAEnC,IAAI,KAAK,GAAG,GAAG,CAAC;QAChB,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,4CAA4C;QAC5C,IAAI,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjD,KAAK,IAAI,GAAG,CAAC;YACb,QAAQ,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,MAAM,4BAA4B,CAAC,CAAC;QACzF,CAAC;QAED,0CAA0C;QAC1C,MAAM,WAAW,GAAG,IAAI,CAAC,aAAa,CAAC,iBAAiB,CAAC,IAAI,CAAC;QAC9D,IAAI,WAAW,GAAG,CAAC,EAAE,CAAC;YACpB,KAAK,IAAI,GAAG,CAAC;YACb,QAAQ,CAAC,IAAI,CAAC,GAAG,WAAW,2BAA2B,CAAC,CAAC;QAC3D,CAAC;QAED,8CAA8C;QAC9C,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAClE,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC9D,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YACnE,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;gBACrC,IAAI,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;oBAC7B,KAAK,IAAI,GAAG,CAAC;oBACb,QAAQ,CAAC,IAAI,CAAC,yCAAyC,IAAI,GAAG,CAAC,CAAC;gBAClE,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG;YACnD,QAAQ;SACT,CAAC;IACJ,CAAC;CACF;AA1ID,4CA0IC;AAED;;;;;GAKG;AACH,MAAa,gBAAgB;IAC3B,iFAAiF;IACzE,WAAW,GAAsD,IAAI,GAAG,EAAE,CAAC;IAEnF;;OAEG;IACH,QAAQ,CAAC,OASR;QACC,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,GAAG,OAAO,CAAC,SAAS,CAAC;QACvD,MAAM,WAAW,GAAG,CAAC,OAAO,CAAC,UAAU,IAAI,OAAO,CAAC,UAAU,KAAK,SAAS,CAAC;QAE5E,+DAA+D;QAC/D,IAAI,kBAAuC,CAAC;QAC5C,IAAI,OAAO,CAAC,YAAY,EAAE,CAAC;YACzB,gDAAgD;YAChD,kBAAkB,GAAG,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,YAAY,CAAC;QACjE,CAAC;aAAM,CAAC;YACN,wEAAwE;YACxE,MAAM,OAAO,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;YACvD,IAAI,OAAO,EAAE,CAAC;gBACZ,MAAM,qBAAqB,GAAG,OAAO,CAAC,KAAK,GAAG,CAAC;oBAC7C,CAAC,CAAC,OAAO,CAAC,SAAS,GAAG,OAAO,CAAC,KAAK;oBACnC,CAAC,CAAC,CAAC,CAAC;gBACN,2EAA2E;gBAC3E,kBAAkB,GAAG,qBAAqB,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC;YACtE,CAAC;QACH,CAAC;QAED,8BAA8B;QAC9B,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC;QAEnD,+DAA+D;QAC/D,IAAI,aAAa,GAAG,GAAG,CAAC;QACxB,aAAa,IAAI,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC,CAAC,wBAAwB;QACpE,IAAI,OAAO,CAAC,UAAU;YAAE,aAAa,IAAI,GAAG,CAAC;QAC7C,IAAI,OAAO,CAAC,gBAAgB;YAAE,aAAa,IAAI,GAAG,CAAC,CAAC,2BAA2B;QAC/E,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC,CAAC;QAExD,OAAO;YACL,UAAU,EAAE,OAAO,CAAC,UAAU;YAC9B,aAAa,EAAE,OAAO,CAAC,gBAAgB;YACvC,UAAU,EAAE,OAAO,CAAC,UAAU;YAC9B,WAAW;YACX,kBAAkB;YAClB,aAAa,EAAE,IAAI,CAAC,KAAK,CAAC,aAAa,GAAG,GAAG,CAAC,GAAG,GAAG;YACpD,UAAU;SACX,CAAC;IACJ,CAAC;IAED,gDAAgD;IACxC,cAAc,CAAC,QAAgB,EAAE,OAAgB;QACvD,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAChD,IAAI,QAAQ,EAAE,CAAC;YACb,QAAQ,CAAC,KAAK,EAAE,CAAC;YACjB,IAAI,OAAO;gBAAE,QAAQ,CAAC,SAAS,EAAE,CAAC;QACpC,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QAC3E,CAAC;IACH,CAAC;IAED,6CAA6C;IAC7C,eAAe;QACb,MAAM,MAAM,GAA2D,EAAE,CAAC;QAC1E,KAAK,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YAC/C,MAAM,CAAC,IAAI,CAAC,GAAG;gBACb,KAAK,EAAE,OAAO,CAAC,KAAK;gBACpB,WAAW,EAAE,OAAO,CAAC,KAAK,GAAG,CAAC;oBAC5B,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG;oBAC7D,CAAC,CAAC,CAAC;aACN,CAAC;QACJ,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;CACF;AAlFD,4CAkFC;AAED;;;;;GAKG;AACH,MAAa,iBAAiB;IAC5B,sDAAsD;IAC9C,sBAAsB,GAA0D,IAAI,GAAG,EAAE,CAAC;IAElG;;OAEG;IACH,QAAQ,CAAC,OAQR;QACC,eAAe;QACf,MAAM,WAAW,GAAG,OAAO,CAAC,YAAY,GAAG,CAAC;YAC1C,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,cAAc,GAAG,OAAO,CAAC,YAAY,CAAC;YACrD,CAAC,CAAC,CAAC,CAAC;QAEN,kBAAkB;QAClB,MAAM,UAAU,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;QAErF,0BAA0B;QAC1B,MAAM,YAAY,GAAG,CACnB,WAAW,GAAG,GAAG;YACjB,UAAU,GAAG,GAAG;YAChB,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAClC,CAAC;QAEF,sBAAsB;QACtB,MAAM,OAAO,GAAG,WAAW,GAAG,GAAG,IAAI,UAAU,GAAG,GAAG,CAAC;QAEtD,OAAO;YACL,YAAY,EAAE,OAAO,CAAC,YAAY;YAClC,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW,GAAG,GAAG,CAAC,GAAG,GAAG;YAChD,YAAY,EAAE,OAAO,CAAC,YAAY;YAClC,YAAY,EAAE,OAAO,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9C,cAAc,EAAE,OAAO,CAAC,aAAa;YACrC,YAAY,EAAE,IAAI,CAAC,KAAK,CAAC,YAAY,GAAG,GAAG,CAAC,GAAG,GAAG;YAClD,OAAO;YACP,gBAAgB,EAAE,OAAO,CAAC,gBAAgB;SAC3C,CAAC;IACJ,CAAC;IAED;;;OAGG;IACH,WAAW,CAAC,WAAmB,EAAE,MAAe;QAC9C,IAAI,CAAC,sBAAsB,CAAC,GAAG,CAAC,WAAW,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,CAAC;IAC9E,CAAC;IAED;;OAEG;IACH,oBAAoB,CAAC,WAAmB;QACtC,MAAM,KAAK,GAAG,IAAI,CAAC,sBAAsB,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;QAC3D,IAAI,KAAK;YAAE,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC;IACrC,CAAC;IAED;;OAEG;IACH,kBAAkB,CAAC,WAAmB;QACpC,OAAO,IAAI,CAAC,sBAAsB,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,UAAU,IAAI,KAAK,CAAC;IAC3E,CAAC;IAED;;OAEG;IACH,kBAAkB;QAChB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,sBAAsB,CAAC,MAAM,EAAE,CAAC,CAAC;QACjE,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC;QACnC,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC;QAC9D,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;IAC/D,CAAC;CACF;AA9ED,8CA8EC"}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { ImplicitFeedback, SignalType } from '../types';
|
|
2
|
+
import { AuditEntry } from '../types';
|
|
3
|
+
/**
|
|
4
|
+
* ImplicitFeedbackEngine — captures and interprets implicit user signals.
|
|
5
|
+
*
|
|
6
|
+
* Instead of relying on explicit "thumbs up/down", this engine
|
|
7
|
+
* detects subtle signals from user behavior to infer satisfaction.
|
|
8
|
+
*
|
|
9
|
+
* Two modes:
|
|
10
|
+
* - Manual: caller provides explicit signals via record()
|
|
11
|
+
* - Auto-detect: scans audit log to infer signals (results unused,
|
|
12
|
+
* results modified later, repeated same tool, verify failures)
|
|
13
|
+
*
|
|
14
|
+
* This is the key differentiator of AgentOS: it learns from
|
|
15
|
+
* what users DO, not just what they SAY.
|
|
16
|
+
*/
|
|
17
|
+
export declare class ImplicitFeedbackEngine {
|
|
18
|
+
private feedbackLog;
|
|
19
|
+
private persistPath;
|
|
20
|
+
private globalAuditPath;
|
|
21
|
+
private detectedKeys;
|
|
22
|
+
private static readonly MAX_FEEDBACK;
|
|
23
|
+
private static readonly MAX_DETECTED_KEYS;
|
|
24
|
+
private persistLinePath;
|
|
25
|
+
/**
|
|
26
|
+
* Record an implicit feedback signal.
|
|
27
|
+
*/
|
|
28
|
+
record(signal: SignalType, sessionId: string, operationId?: string, confidence?: number, source?: string): ImplicitFeedback;
|
|
29
|
+
/**
|
|
30
|
+
* Scan the audit log and auto-detect implicit feedback signals.
|
|
31
|
+
*
|
|
32
|
+
* Detection rules (conservative — low confidence to avoid false positives):
|
|
33
|
+
* - verify FAIL or WARN → user_provided_correction (agent made mistakes)
|
|
34
|
+
* - same tool+params called within 60s → user_repeated_instruction (low confidence, noisy)
|
|
35
|
+
* - high risk operations that were retried and eventually passed → agent_self_corrected
|
|
36
|
+
*
|
|
37
|
+
* Note: auto-detected signals carry lower confidence than explicit user feedback.
|
|
38
|
+
* They serve as supplementary data, not primary quality indicators.
|
|
39
|
+
*
|
|
40
|
+
* @param entries Recent audit entries to analyze
|
|
41
|
+
* @param sessionId Session to attribute signals to
|
|
42
|
+
* @returns Number of signals auto-detected
|
|
43
|
+
*/
|
|
44
|
+
/**
|
|
45
|
+
* Enable persistence for feedbackLog and auto-detected signal keys.
|
|
46
|
+
*/
|
|
47
|
+
enablePersistence(workspaceRoot: string): void;
|
|
48
|
+
/** Persist: append one line to feedback-lines.jsonl. */
|
|
49
|
+
private _writeCount;
|
|
50
|
+
private persist;
|
|
51
|
+
/** Compact feedback-lines.jsonl: keep last 200 lines only. */
|
|
52
|
+
private compactFeedbackFile;
|
|
53
|
+
/** Load persisted feedback log from disk. */
|
|
54
|
+
private load;
|
|
55
|
+
/**
|
|
56
|
+
* Cross-session auto-detect: scan the global audit.jsonl for signals
|
|
57
|
+
* from ALL sessions, not just the current one.
|
|
58
|
+
*/
|
|
59
|
+
autoDetectGlobal(): number;
|
|
60
|
+
autoDetect(entries: AuditEntry[], sessionId: string): number;
|
|
61
|
+
/**
|
|
62
|
+
* Analyze user messages to detect implicit correction/feedback signals.
|
|
63
|
+
*
|
|
64
|
+
* Chinese corrective patterns (high precision, low recall — only matches clear signals):
|
|
65
|
+
* - "不对"/"错了"/"不是这样" → user_provided_correction (confidence 0.8)
|
|
66
|
+
* - "漏了"/"缺了"/"没包括"/"遗漏" → user_provided_correction (confidence 0.75)
|
|
67
|
+
* - "你没"/"你怎么"/"你咋" + 负面动作(忘记/漏/没/不) → user_provided_correction (confidence 0.7)
|
|
68
|
+
* - "失忆"/"忘了"/"不记得" → user_provided_correction (confidence 0.85)
|
|
69
|
+
* - "重新"/"再查"/"再搜"/"再看看" → user_repeated_instruction (confidence 0.6)
|
|
70
|
+
* - "不对吧"/"没音信" → user_interrupted (confidence 0.5)
|
|
71
|
+
*
|
|
72
|
+
* English patterns:
|
|
73
|
+
* - "wrong"/"incorrect"/"not right" → user_provided_correction (confidence 0.7)
|
|
74
|
+
* - "missed"/"missing"/"forgot"/"incomplete" → user_provided_correction (confidence 0.7)
|
|
75
|
+
* - "redo"/"again"/"retry"/"try again" → user_repeated_instruction (confidence 0.5)
|
|
76
|
+
*
|
|
77
|
+
* Returns number of signals detected.
|
|
78
|
+
*/
|
|
79
|
+
detectFromUserMessages(messages: Array<{
|
|
80
|
+
role: string;
|
|
81
|
+
content: string;
|
|
82
|
+
ts?: number;
|
|
83
|
+
}>, sessionId: string): number;
|
|
84
|
+
private getSignalStrength;
|
|
85
|
+
getSatisfactionScore(sessionId?: string, recentHours?: number): number;
|
|
86
|
+
query(filter?: {
|
|
87
|
+
signal?: SignalType;
|
|
88
|
+
sessionId?: string;
|
|
89
|
+
minStrength?: number;
|
|
90
|
+
maxStrength?: number;
|
|
91
|
+
since?: number;
|
|
92
|
+
limit?: number;
|
|
93
|
+
}): ImplicitFeedback[];
|
|
94
|
+
stats(): {
|
|
95
|
+
totalSignals: number;
|
|
96
|
+
positiveSignals: number;
|
|
97
|
+
negativeSignals: number;
|
|
98
|
+
averageStrength: number;
|
|
99
|
+
mostCommonSignal: SignalType | null;
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
//# sourceMappingURL=feedback.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"feedback.d.ts","sourceRoot":"","sources":["../../src/evaluator/feedback.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AACxD,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAYtC;;;;;;;;;;;;;GAaG;AACH,qBAAa,sBAAsB;IACjC,OAAO,CAAC,WAAW,CAA0B;IAC7C,OAAO,CAAC,WAAW,CAAuB;IAE1C,OAAO,CAAC,eAAe,CAAuB;IAE9C,OAAO,CAAC,YAAY,CAA0B;IAE9C,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,YAAY,CAAO;IAC3C,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,iBAAiB,CAAO;IAEhD,OAAO,CAAC,eAAe,CAAuB;IAE9C;;OAEG;IACH,MAAM,CACJ,MAAM,EAAE,UAAU,EAClB,SAAS,EAAE,MAAM,EACjB,WAAW,CAAC,EAAE,MAAM,EACpB,UAAU,SAAM,EAChB,MAAM,SAAkB,GACvB,gBAAgB;IA8BnB;;;;;;;;;;;;;;OAcG;IACH;;OAEG;IACH,iBAAiB,CAAC,aAAa,EAAE,MAAM,GAAG,IAAI;IAW9C,wDAAwD;IACxD,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,OAAO;IAgBf,8DAA8D;IAC9D,OAAO,CAAC,mBAAmB;IAa3B,6CAA6C;IAC7C,OAAO,CAAC,IAAI;IAkCZ;;;OAGG;IACH,gBAAgB,IAAI,MAAM;IAa1B,UAAU,CAAC,OAAO,EAAE,UAAU,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM;IAmF5D;;;;;;;;;;;;;;;;;OAiBG;IACH,sBAAsB,CAAC,QAAQ,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,EAAE,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM;IA2FlH,OAAO,CAAC,iBAAiB;IAkBzB,oBAAoB,CAAC,SAAS,CAAC,EAAE,MAAM,EAAE,WAAW,SAAK,GAAG,MAAM;IAoClE,KAAK,CAAC,MAAM,GAAE;QACZ,MAAM,CAAC,EAAE,UAAU,CAAC;QACpB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,KAAK,CAAC,EAAE,MAAM,CAAC;KACX,GAAG,gBAAgB,EAAE;IAa3B,KAAK,IAAI;QACP,YAAY,EAAE,MAAM,CAAC;QACrB,eAAe,EAAE,MAAM,CAAC;QACxB,eAAe,EAAE,MAAM,CAAC;QACxB,eAAe,EAAE,MAAM,CAAC;QACxB,gBAAgB,EAAE,UAAU,GAAG,IAAI,CAAC;KACrC;CAuBF"}
|