sentinel-agentos 0.3.7 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/LICENSE +0 -21
- package/README.md +0 -1797
- package/dist/adapters/evaluation-bridge.d.ts +0 -78
- package/dist/adapters/evaluation-bridge.d.ts.map +0 -1
- package/dist/adapters/evaluation-bridge.js +0 -273
- package/dist/adapters/evaluation-bridge.js.map +0 -1
- package/dist/adapters/memory-bridge.d.ts +0 -110
- package/dist/adapters/memory-bridge.d.ts.map +0 -1
- package/dist/adapters/memory-bridge.js +0 -316
- package/dist/adapters/memory-bridge.js.map +0 -1
- package/dist/adapters/migrate.d.ts +0 -2
- package/dist/adapters/migrate.d.ts.map +0 -1
- package/dist/adapters/migrate.js +0 -63
- package/dist/adapters/migrate.js.map +0 -1
- package/dist/api.d.ts +0 -151
- package/dist/api.d.ts.map +0 -1
- package/dist/api.js +0 -179
- package/dist/api.js.map +0 -1
- package/dist/cli.d.ts +0 -16
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js +0 -350
- package/dist/cli.js.map +0 -1
- package/dist/core.d.ts +0 -156
- package/dist/core.d.ts.map +0 -1
- package/dist/core.js +0 -400
- package/dist/core.js.map +0 -1
- package/dist/dashboard.html +0 -175
- package/dist/evaluator/exec-evaluator.d.ts +0 -102
- package/dist/evaluator/exec-evaluator.d.ts.map +0 -1
- package/dist/evaluator/exec-evaluator.js +0 -266
- package/dist/evaluator/exec-evaluator.js.map +0 -1
- package/dist/evaluator/feedback.d.ts +0 -96
- package/dist/evaluator/feedback.d.ts.map +0 -1
- package/dist/evaluator/feedback.js +0 -419
- package/dist/evaluator/feedback.js.map +0 -1
- package/dist/evaluator/profiler.d.ts +0 -55
- package/dist/evaluator/profiler.d.ts.map +0 -1
- package/dist/evaluator/profiler.js +0 -130
- package/dist/evaluator/profiler.js.map +0 -1
- package/dist/guard/audit-log.d.ts +0 -47
- package/dist/guard/audit-log.d.ts.map +0 -1
- package/dist/guard/audit-log.js +0 -199
- package/dist/guard/audit-log.js.map +0 -1
- package/dist/guard/container-sandbox.d.ts +0 -25
- package/dist/guard/container-sandbox.d.ts.map +0 -1
- package/dist/guard/container-sandbox.js +0 -145
- package/dist/guard/container-sandbox.js.map +0 -1
- package/dist/guard/risk-gate.d.ts +0 -101
- package/dist/guard/risk-gate.d.ts.map +0 -1
- package/dist/guard/risk-gate.js +0 -200
- package/dist/guard/risk-gate.js.map +0 -1
- package/dist/guard/sandbox.d.ts +0 -112
- package/dist/guard/sandbox.d.ts.map +0 -1
- package/dist/guard/sandbox.js +0 -379
- package/dist/guard/sandbox.js.map +0 -1
- package/dist/guard/schema-gate.d.ts +0 -90
- package/dist/guard/schema-gate.d.ts.map +0 -1
- package/dist/guard/schema-gate.js +0 -452
- package/dist/guard/schema-gate.js.map +0 -1
- package/dist/guard/snapshot-verify.d.ts +0 -111
- package/dist/guard/snapshot-verify.d.ts.map +0 -1
- package/dist/guard/snapshot-verify.js +0 -571
- package/dist/guard/snapshot-verify.js.map +0 -1
- package/dist/index.d.ts +0 -28
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js +0 -59
- package/dist/index.js.map +0 -1
- package/dist/memory/episodic.d.ts +0 -76
- package/dist/memory/episodic.d.ts.map +0 -1
- package/dist/memory/episodic.js +0 -289
- package/dist/memory/episodic.js.map +0 -1
- package/dist/memory/semantic.d.ts +0 -68
- package/dist/memory/semantic.d.ts.map +0 -1
- package/dist/memory/semantic.js +0 -299
- package/dist/memory/semantic.js.map +0 -1
- package/dist/memory/working.d.ts +0 -53
- package/dist/memory/working.d.ts.map +0 -1
- package/dist/memory/working.js +0 -166
- package/dist/memory/working.js.map +0 -1
- package/dist/middleware/openclaw.d.ts +0 -45
- package/dist/middleware/openclaw.d.ts.map +0 -1
- package/dist/middleware/openclaw.js +0 -95
- package/dist/middleware/openclaw.js.map +0 -1
- package/dist/middleware/wrapper.d.ts +0 -54
- package/dist/middleware/wrapper.d.ts.map +0 -1
- package/dist/middleware/wrapper.js +0 -155
- package/dist/middleware/wrapper.js.map +0 -1
- package/dist/server.d.ts +0 -45
- package/dist/server.d.ts.map +0 -1
- package/dist/server.js +0 -256
- package/dist/server.js.map +0 -1
- package/dist/types/index.d.ts +0 -228
- package/dist/types/index.d.ts.map +0 -1
- package/dist/types/index.js +0 -23
- package/dist/types/index.js.map +0 -1
- package/scripts/sentinel-light.js +0 -234
package/dist/dashboard.html
DELETED
|
@@ -1,175 +0,0 @@
|
|
|
1
|
-
<!DOCTYPE html>
|
|
2
|
-
<html lang="zh-CN">
|
|
3
|
-
<head>
|
|
4
|
-
<meta charset="UTF-8">
|
|
5
|
-
<meta name="viewport" content="width=device-width,initial-scale=1.0">
|
|
6
|
-
<title>Sentinel AgentOS</title>
|
|
7
|
-
<style>
|
|
8
|
-
:root{--bg:#060912;--card:#0c111f;--border:#182035;--text:#e2e7f2;--dim:#4e5c79;--blue:#3b82f6;--green:#10b981;--red:#ef4444;--amber:#f59e0b}
|
|
9
|
-
*{margin:0;padding:0;box-sizing:border-box}
|
|
10
|
-
html,body{height:100%;overflow:hidden}
|
|
11
|
-
body{font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;background:var(--bg);color:var(--text);font-size:17px;display:flex}
|
|
12
|
-
|
|
13
|
-
.sb{width:260px;flex-shrink:0;background:var(--card);border-right:1px solid var(--border);display:flex;flex-direction:column;padding:22px;gap:20px;overflow-y:auto}
|
|
14
|
-
.sb-logo{display:flex;align-items:center;gap:10px;font-size:17px;font-weight:700}
|
|
15
|
-
.sb-logo i{width:30px;height:30px;border-radius:6px;background:linear-gradient(135deg,#3b82f6,#6366f1);display:grid;place-items:center;font-size:14px;font-style:normal}
|
|
16
|
-
.sb-logo em{font-weight:400;color:var(--dim);font-size:12px;font-style:normal}
|
|
17
|
-
.sb-sect h3{font-size:10px;font-weight:700;text-transform:uppercase;letter-spacing:1px;color:var(--dim);margin-bottom:8px}
|
|
18
|
-
.sb-stat{display:flex;justify-content:space-between;align-items:baseline;padding:8px 0;border-bottom:1px solid rgba(24,32,53,.5)}
|
|
19
|
-
.sb-stat .k{font-size:14px;color:var(--dim)}
|
|
20
|
-
.sb-stat .v{font-size:24px;font-weight:700;letter-spacing:-.4px}
|
|
21
|
-
|
|
22
|
-
.bar-wrap{display:flex;align-items:center;gap:12px;padding:8px 0}
|
|
23
|
-
.bar-track{flex:1;height:10px;border-radius:5px;background:rgba(24,32,53,.6);overflow:hidden;display:flex}
|
|
24
|
-
.bar-track .pass{background:var(--green);border-radius:5px 0 0 5px}
|
|
25
|
-
.bar-track .fail{background:var(--red);border-radius:0 5px 5px 0}
|
|
26
|
-
.bar-wrap .pct{font-size:18px;font-weight:700;letter-spacing:-.3px;min-width:46px;text-align:right}
|
|
27
|
-
.bar-wrap+.sub{font-size:11px;color:var(--dim)}
|
|
28
|
-
|
|
29
|
-
.g{color:var(--green)}.r{color:var(--red)}.a{color:var(--amber)}.p{color:#8b5cf6}
|
|
30
|
-
|
|
31
|
-
.main{flex:1;display:flex;flex-direction:column;overflow:hidden;min-width:0}
|
|
32
|
-
.topline{flex-shrink:0;display:flex;align-items:center;justify-content:space-between;padding:14px 28px;border-bottom:1px solid var(--border)}
|
|
33
|
-
.topline h2{font-size:14px;font-weight:700;text-transform:uppercase;letter-spacing:.8px;color:var(--dim)}
|
|
34
|
-
.topline .r{display:flex;align-items:center;gap:14px;font-size:13px;color:var(--dim)}
|
|
35
|
-
.topline .r b{width:7px;height:7px;border-radius:50%;display:inline-block;background:var(--green);animation:p 2s infinite}
|
|
36
|
-
@keyframes p{50%{opacity:.2}}
|
|
37
|
-
|
|
38
|
-
.tbl-wrap{flex:1;overflow-y:auto;padding:0 28px}
|
|
39
|
-
.tbl-wrap::-webkit-scrollbar{width:5px}
|
|
40
|
-
.tbl-wrap::-webkit-scrollbar-track{background:transparent}
|
|
41
|
-
.tbl-wrap::-webkit-scrollbar-thumb{background:var(--border);border-radius:2px}
|
|
42
|
-
|
|
43
|
-
table{width:100%;border-collapse:collapse;table-layout:fixed}
|
|
44
|
-
thead{position:sticky;top:0;z-index:2}
|
|
45
|
-
th{padding:13px 16px;text-align:left;font-size:11px;font-weight:700;text-transform:uppercase;letter-spacing:.6px;color:var(--dim);background:var(--bg);border-bottom:2px solid var(--border)}
|
|
46
|
-
th:first-child{width:46px;text-align:center;padding:13px 0}
|
|
47
|
-
th:nth-child(2){width:86px}
|
|
48
|
-
th:nth-child(4){width:100px}
|
|
49
|
-
th:nth-child(5){width:90px}
|
|
50
|
-
th:nth-child(6){width:90px}
|
|
51
|
-
|
|
52
|
-
td{padding:13px 16px;border-bottom:1px solid rgba(24,32,53,.35);font-size:16px;white-space:nowrap}
|
|
53
|
-
td:first-child{text-align:center;padding:13px 0;font-size:17px}
|
|
54
|
-
tr:hover{background:rgba(59,130,246,.02)}
|
|
55
|
-
tr.fail{background:rgba(239,68,68,.005)}tr.fail:hover{background:rgba(239,68,68,.015)}
|
|
56
|
-
|
|
57
|
-
.tag{display:inline-block;font-family:'SF Mono',monospace;font-size:13px;font-weight:600;padding:3px 9px;border-radius:4px}
|
|
58
|
-
.tag.e{color:#60a5fa;background:rgba(59,130,246,.06)}
|
|
59
|
-
.tag.w{color:#a78bfa;background:rgba(139,92,246,.06)}
|
|
60
|
-
.tag.d{color:#22d3ee;background:rgba(6,182,212,.06)}
|
|
61
|
-
.tag.g{color:#34d399;background:rgba(16,185,129,.06)}
|
|
62
|
-
tr.fail .tag.e,tr.fail .tag.w,tr.fail .tag.d,tr.fail .tag.g{color:#f87171;background:rgba(239,68,68,.06)}
|
|
63
|
-
|
|
64
|
-
.pm{font-family:'SF Mono',monospace;font-size:14px;color:var(--dim);display:block;overflow:hidden;text-overflow:ellipsis;opacity:.85}
|
|
65
|
-
.tm{font-family:'SF Mono',monospace;font-size:13px;color:var(--dim)}
|
|
66
|
-
|
|
67
|
-
.pill{display:inline-flex;align-items:center;gap:3px;font-size:12px;font-weight:600;padding:4px 10px;border-radius:10px}
|
|
68
|
-
.pill.ok{color:var(--green);background:rgba(16,185,129,.05)}
|
|
69
|
-
.pill.ko{color:var(--red);background:rgba(239,68,68,.05)}
|
|
70
|
-
|
|
71
|
-
.rw{display:flex;align-items:center;gap:8px}
|
|
72
|
-
.rw .bar{width:44px;height:3px;border-radius:2px;background:var(--border);overflow:hidden}
|
|
73
|
-
.rw .fll{height:100%;border-radius:2px}
|
|
74
|
-
.rw .n{font-family:'SF Mono',monospace;font-size:13px;font-weight:500}
|
|
75
|
-
|
|
76
|
-
.pgr{flex-shrink:0;padding:14px 28px;border-top:1px solid var(--border);display:flex;align-items:center;gap:8px;font-size:14px}
|
|
77
|
-
.pgr .btn{display:inline-flex;align-items:center;justify-content:center;width:34px;height:30px;background:var(--card);border:1px solid var(--border);color:var(--text);border-radius:5px;cursor:pointer;font-size:13px;transition:all .1s}
|
|
78
|
-
.pgr .btn:hover:not(:disabled){border-color:var(--blue);color:var(--blue)}
|
|
79
|
-
.pgr .btn:disabled{opacity:.08;cursor:default}
|
|
80
|
-
.pgr .pn{display:inline-flex;align-items:center;justify-content:center;width:30px;height:30px;border-radius:5px;font-size:13px;font-weight:600;cursor:pointer;color:var(--dim);border:1px solid transparent}
|
|
81
|
-
.pgr .pn:hover{border-color:var(--border);color:var(--text)}
|
|
82
|
-
.pgr .pn.on{background:var(--blue);color:#fff;border-color:var(--blue)}
|
|
83
|
-
|
|
84
|
-
.emp{text-align:center;padding:50px;color:var(--dim);font-size:16px}
|
|
85
|
-
</style>
|
|
86
|
-
</head>
|
|
87
|
-
<body>
|
|
88
|
-
|
|
89
|
-
<div class="sb">
|
|
90
|
-
<div class="sb-logo"><i>🛡️</i>Sentinel AgentOS<em> v0.3.7</em></div>
|
|
91
|
-
<div class="sb-sect"><h3>概览</h3>
|
|
92
|
-
<div class="sb-stat"><span class="k">总操作</span><span class="v" id="t1">—</span></div>
|
|
93
|
-
<div class="sb-stat"><span class="k">通过率</span><span class="v g" id="t2">—</span></div>
|
|
94
|
-
<div class="sb-stat"><span class="k">高风险</span><span class="v a" id="t3">—</span></div>
|
|
95
|
-
<div class="sb-stat"><span class="k">质量分</span><span class="v p" id="t4">—</span></div>
|
|
96
|
-
<div class="sb-stat"><span class="k">会话</span><span class="v g" id="t5">—</span></div>
|
|
97
|
-
</div>
|
|
98
|
-
<div class="sb-sect"><h3>审计通过率</h3>
|
|
99
|
-
<div class="bar-wrap"><div class="bar-track"><div class="pass" id="bp" style="width:100%"></div><div class="fail" id="bf" style="width:0%"></div></div><span class="pct g" id="c1">—</span></div>
|
|
100
|
-
<div class="sub" id="c1s">—</div></div>
|
|
101
|
-
<div class="sb-sect"><h3>质量评分</h3>
|
|
102
|
-
<div class="v p" style="font-size:38px;font-weight:700;letter-spacing:-.8px" id="c2">—</div>
|
|
103
|
-
<div style="font-size:11px;color:var(--dim)" id="c2s">Agent Profile</div>
|
|
104
|
-
</div>
|
|
105
|
-
<div style="font-size:11px;color:var(--dim);text-align:center;margin-top:auto" id="rt">—</div>
|
|
106
|
-
</div>
|
|
107
|
-
|
|
108
|
-
<div class="main">
|
|
109
|
-
<div class="topline">
|
|
110
|
-
<h2>审计记录</h2>
|
|
111
|
-
<div class="r"><b></b><span id="st">—</span> · <span id="up">—</span> · <span id="rc">—</span></div>
|
|
112
|
-
</div>
|
|
113
|
-
|
|
114
|
-
<div class="tbl-wrap">
|
|
115
|
-
<table>
|
|
116
|
-
<thead><tr><th>#</th><th>工具</th><th>参数</th><th>时间</th><th>状态</th><th>风险分</th></tr></thead>
|
|
117
|
-
<tbody id="tb"><tr><td colspan="6"><div class="emp">加载中…</div></td></tr></tbody>
|
|
118
|
-
</table>
|
|
119
|
-
</div>
|
|
120
|
-
|
|
121
|
-
<div class="pgr" id="pg"></div>
|
|
122
|
-
</div>
|
|
123
|
-
|
|
124
|
-
<script>
|
|
125
|
-
let items=[],P=20,cur=0;
|
|
126
|
-
|
|
127
|
-
async function R(){
|
|
128
|
-
try{
|
|
129
|
-
const d=await(await fetch('/pipeline/report')).json();
|
|
130
|
-
document.getElementById('st').textContent='已连接';document.querySelector('.r b').style.background='var(--green)';
|
|
131
|
-
const t=d.audit?.totalOperations||0,f=d.audit?.verifyFailures||0,p=t-f,r=t?Math.round(p/t*100):100,q=d.quality?.overallScore||50;
|
|
132
|
-
document.getElementById('t1').textContent=t;
|
|
133
|
-
document.getElementById('t2').textContent=r+'%';document.getElementById('t2').className='v '+(r>=90?'g':r>=70?'a':'r');
|
|
134
|
-
document.getElementById('t3').textContent=d.audit?.highRiskOps||0;
|
|
135
|
-
document.getElementById('t4').textContent=q;
|
|
136
|
-
document.getElementById('t5').textContent=d.audit?.sessionsTracked||1;
|
|
137
|
-
document.getElementById('up').textContent='运行 '+(d.uptime||'—');
|
|
138
|
-
document.getElementById('rt').textContent='刷新 '+new Date().toLocaleTimeString();
|
|
139
|
-
document.getElementById('c1').textContent=r+'%';
|
|
140
|
-
document.getElementById('c1s').textContent=p+' 通过 · '+f+' 失败';
|
|
141
|
-
document.getElementById('c2').textContent=q+'分';
|
|
142
|
-
document.getElementById('bp').style.width=r+'%';document.getElementById('bf').style.width=(100-r)+'%';
|
|
143
|
-
|
|
144
|
-
items=d.timeline||[];document.getElementById('rc').textContent='共 '+items.length+' 条';G(0);
|
|
145
|
-
}catch{
|
|
146
|
-
document.getElementById('st').textContent='断开';document.querySelector('.r b').style.background='var(--red)';
|
|
147
|
-
}
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
function G(p){cur=p;
|
|
151
|
-
const pages=Math.ceil(items.length/P),start=p*P,rows=items.slice(start,start+P);
|
|
152
|
-
const b=document.getElementById('tb');
|
|
153
|
-
if(!rows.length){b.innerHTML='<tr><td colspan="6"><div class="emp">暂无记录</div></td></tr>';return;}
|
|
154
|
-
const icons={exec:'⚙️',write:'✏️',edit:'🔧',read:'📖'},cls={exec:'e',write:'w',edit:'d',read:'g'};
|
|
155
|
-
b.innerHTML=rows.map(e=>{
|
|
156
|
-
const t=e.tool||'—',ok=e.verify!=='FAIL',s=Number(e.score)||0,rp=Math.min(s*10,100);
|
|
157
|
-
const rc=s>3?'var(--red)':s>1?'var(--amber)':'var(--green)';
|
|
158
|
-
const ts=e.ts?.slice(11,19)||'—',pr=(e.params||'').slice(0,140)||'—';
|
|
159
|
-
return `<tr class="${ok?'':'fail'}">
|
|
160
|
-
<td>${icons[t]||'🔹'}</td><td><span class="tag ${cls[t]||''}">${t}</span></td>
|
|
161
|
-
<td><span class="pm" title="${pr}">${pr}</span></td><td><span class="tm">${ts}</span></td>
|
|
162
|
-
<td><span class="pill ${ok?'ok':'ko'}">${ok?'● 通过':'● 拦截'}</span></td>
|
|
163
|
-
<td><div class="rw"><div class="bar"><div class="fll" style="width:${rp}%;background:${rc}"></div></div><span class="n" style="color:${rc}">${s.toFixed(1)}</span></div></td>
|
|
164
|
-
</tr>`;
|
|
165
|
-
}).join('');
|
|
166
|
-
|
|
167
|
-
const pg=document.getElementById('pg');if(pages<=1){pg.innerHTML='';return;}
|
|
168
|
-
let ns='';for(let i=0;i<pages;i++){const g=i===0||i===pages-1||Math.abs(i-p)<=1;if(g)ns+=`<span class="pn${i===p?' on':''}" onclick="G(${i})">${i+1}</span>`;else if((i===1&&p>2)||(i===pages-2&&p<pages-3))ns+='<span style="color:var(--dim);padding:0 2px">…</span>';}
|
|
169
|
-
pg.innerHTML=`<span class="btn" onclick="G(${p-1})" ${p===0?'disabled':''}>◀</span>${ns}<span class="btn" onclick="G(${p+1})" ${p>=pages-1?'disabled':''}>▶</span>`;
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
R();setInterval(R,5000);
|
|
173
|
-
</script>
|
|
174
|
-
</body>
|
|
175
|
-
</html>
|
|
@@ -1,102 +0,0 @@
|
|
|
1
|
-
import { PreExecMetrics, RuntimeMetrics, PostExecMetrics } from '../types';
|
|
2
|
-
import { SchemaGate } from '../guard/schema-gate';
|
|
3
|
-
import { RiskGate } from '../guard/risk-gate';
|
|
4
|
-
import { WorkingMemory } from '../memory/working';
|
|
5
|
-
/**
|
|
6
|
-
* PreExecEvaluator — captures metrics before tool execution.
|
|
7
|
-
*
|
|
8
|
-
* Watches the Guard layer output and WorkingMemory context
|
|
9
|
-
* to score parameter quality, context utilization, and risk.
|
|
10
|
-
*/
|
|
11
|
-
export declare class PreExecEvaluator {
|
|
12
|
-
private schemaGate;
|
|
13
|
-
private riskGate;
|
|
14
|
-
private workingMemory;
|
|
15
|
-
constructor(schemaGate: SchemaGate, riskGate: RiskGate, workingMemory: WorkingMemory);
|
|
16
|
-
/**
|
|
17
|
-
* Evaluate a tool call before execution.
|
|
18
|
-
*/
|
|
19
|
-
evaluate(toolName: string, parameters: Record<string, unknown>): PreExecMetrics;
|
|
20
|
-
/**
|
|
21
|
-
* Score parameter quality based on contextual awareness.
|
|
22
|
-
*
|
|
23
|
-
* High quality: path contains session-relevant project paths,
|
|
24
|
-
* content references open files, etc.
|
|
25
|
-
* Low quality: bare strings, random-looking paths, missing files.
|
|
26
|
-
*/
|
|
27
|
-
private evaluateParamQuality;
|
|
28
|
-
/**
|
|
29
|
-
* Score how well the agent uses stored context.
|
|
30
|
-
*/
|
|
31
|
-
private evaluateContextUtilization;
|
|
32
|
-
}
|
|
33
|
-
/**
|
|
34
|
-
* RuntimeEvaluator — captures metrics during execution.
|
|
35
|
-
*
|
|
36
|
-
* Tracks retries, self-corrections, timeouts, and
|
|
37
|
-
* whether the agent selected the right tool for the job.
|
|
38
|
-
*/
|
|
39
|
-
export declare class RuntimeEvaluator {
|
|
40
|
-
/** Historical tool selection patterns — toolName -> successful scenario count */
|
|
41
|
-
private toolHistory;
|
|
42
|
-
/**
|
|
43
|
-
* Evaluate a completed tool execution.
|
|
44
|
-
*/
|
|
45
|
-
evaluate(options: {
|
|
46
|
-
toolName: string;
|
|
47
|
-
startTime: number;
|
|
48
|
-
endTime: number;
|
|
49
|
-
retryCount: number;
|
|
50
|
-
wasSelfCorrected: boolean;
|
|
51
|
-
hadTimeout: boolean;
|
|
52
|
-
expectedTool?: string;
|
|
53
|
-
toolResult: unknown;
|
|
54
|
-
}): RuntimeMetrics;
|
|
55
|
-
/** Record a tool call in the history tracker */
|
|
56
|
-
private recordToolCall;
|
|
57
|
-
/** Get tool selection accuracy statistics */
|
|
58
|
-
getToolAccuracy(): Record<string, {
|
|
59
|
-
calls: number;
|
|
60
|
-
successRate: number;
|
|
61
|
-
}>;
|
|
62
|
-
}
|
|
63
|
-
/**
|
|
64
|
-
* PostExecEvaluator — captures metrics after execution.
|
|
65
|
-
*
|
|
66
|
-
* Scores verify results, user acceptance patterns,
|
|
67
|
-
* and checks if the agent actually used its own result later.
|
|
68
|
-
*/
|
|
69
|
-
export declare class PostExecEvaluator {
|
|
70
|
-
/** Track result references for utilization scoring */
|
|
71
|
-
private resultReferenceTracker;
|
|
72
|
-
/**
|
|
73
|
-
* Evaluate post-execution outcomes.
|
|
74
|
-
*/
|
|
75
|
-
evaluate(options: {
|
|
76
|
-
verifyPassed: boolean;
|
|
77
|
-
verifyChecks: number;
|
|
78
|
-
verifyFailures: number;
|
|
79
|
-
userAccepted: boolean;
|
|
80
|
-
userProvidedEdit: boolean;
|
|
81
|
-
resultWasUsed: boolean;
|
|
82
|
-
diffLinesChanged?: number;
|
|
83
|
-
}): PostExecMetrics;
|
|
84
|
-
/**
|
|
85
|
-
* Track a tool result for later utilization detection.
|
|
86
|
-
* Call this after each tool execution.
|
|
87
|
-
*/
|
|
88
|
-
trackResult(operationId: string, result: unknown): void;
|
|
89
|
-
/**
|
|
90
|
-
* Mark a previously-tracked result as referenced (used by the agent later).
|
|
91
|
-
*/
|
|
92
|
-
markResultReferenced(operationId: string): void;
|
|
93
|
-
/**
|
|
94
|
-
* Check if a result has been utilized by the agent.
|
|
95
|
-
*/
|
|
96
|
-
isResultReferenced(operationId: string): boolean;
|
|
97
|
-
/**
|
|
98
|
-
* Get overall result utilization rate.
|
|
99
|
-
*/
|
|
100
|
-
getUtilizationRate(): number;
|
|
101
|
-
}
|
|
102
|
-
//# sourceMappingURL=exec-evaluator.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"exec-evaluator.d.ts","sourceRoot":"","sources":["../../src/evaluator/exec-evaluator.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,cAAc,EACd,cAAc,EACd,eAAe,EAGhB,MAAM,UAAU,CAAC;AAClB,OAAO,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AAClD,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAElD;;;;;GAKG;AACH,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,QAAQ,CAAW;IAC3B,OAAO,CAAC,aAAa,CAAgB;gBAGnC,UAAU,EAAE,UAAU,EACtB,QAAQ,EAAE,QAAQ,EAClB,aAAa,EAAE,aAAa;IAO9B;;OAEG;IACH,QAAQ,CACN,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAClC,cAAc;IAuBjB;;;;;;OAMG;IACH,OAAO,CAAC,oBAAoB;IA+C5B;;OAEG;IACH,OAAO,CAAC,0BAA0B;CAqCnC;AAED;;;;;GAKG;AACH,qBAAa,gBAAgB;IAC3B,iFAAiF;IACjF,OAAO,CAAC,WAAW,CAAgE;IAEnF;;OAEG;IACH,QAAQ,CAAC,OAAO,EAAE;QAChB,QAAQ,EAAE,MAAM,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;QAClB,OAAO,EAAE,MAAM,CAAC;QAChB,UAAU,EAAE,MAAM,CAAC;QACnB,gBAAgB,EAAE,OAAO,CAAC;QAC1B,UAAU,EAAE,OAAO,CAAC;QACpB,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,UAAU,EAAE,OAAO,CAAC;KACrB,GAAG,cAAc;IA0ClB,gDAAgD;IAChD,OAAO,CAAC,cAAc;IAUtB,6CAA6C;IAC7C,eAAe,IAAI,MAAM,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAE,CAAC;CAY1E;AAED;;;;;GAKG;AACH,qBAAa,iBAAiB;IAC5B,sDAAsD;IACtD,OAAO,CAAC,sBAAsB,CAAoE;IAElG;;OAEG;IACH,QAAQ,CAAC,OAAO,EAAE;QAChB,YAAY,EAAE,OAAO,CAAC;QACtB,YAAY,EAAE,MAAM,CAAC;QACrB,cAAc,EAAE,MAAM,CAAC;QACvB,YAAY,EAAE,OAAO,CAAC;QACtB,gBAAgB,EAAE,OAAO,CAAC;QAC1B,aAAa,EAAE,OAAO,CAAC;QACvB,gBAAgB,CAAC,EAAE,MAAM,CAAC;KAC3B,GAAG,eAAe;IA+BnB;;;OAGG;IACH,WAAW,CAAC,WAAW,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,GAAG,IAAI;IAIvD;;OAEG;IACH,oBAAoB,CAAC,WAAW,EAAE,MAAM,GAAG,IAAI;IAK/C;;OAEG;IACH,kBAAkB,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO;IAIhD;;OAEG;IACH,kBAAkB,IAAI,MAAM;CAM7B"}
|
|
@@ -1,266 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.PostExecEvaluator = exports.RuntimeEvaluator = exports.PreExecEvaluator = void 0;
|
|
4
|
-
/**
|
|
5
|
-
* PreExecEvaluator — captures metrics before tool execution.
|
|
6
|
-
*
|
|
7
|
-
* Watches the Guard layer output and WorkingMemory context
|
|
8
|
-
* to score parameter quality, context utilization, and risk.
|
|
9
|
-
*/
|
|
10
|
-
class PreExecEvaluator {
|
|
11
|
-
schemaGate;
|
|
12
|
-
riskGate;
|
|
13
|
-
workingMemory;
|
|
14
|
-
constructor(schemaGate, riskGate, workingMemory) {
|
|
15
|
-
this.schemaGate = schemaGate;
|
|
16
|
-
this.riskGate = riskGate;
|
|
17
|
-
this.workingMemory = workingMemory;
|
|
18
|
-
}
|
|
19
|
-
/**
|
|
20
|
-
* Evaluate a tool call before execution.
|
|
21
|
-
*/
|
|
22
|
-
evaluate(toolName, parameters) {
|
|
23
|
-
// 1. Schema check
|
|
24
|
-
const schemaCheck = this.schemaGate.check(toolName, parameters);
|
|
25
|
-
// 2. Risk assessment
|
|
26
|
-
const riskScore = this.riskGate.evaluate(toolName, parameters);
|
|
27
|
-
// 3. Parameter quality: does the agent use context-aware params?
|
|
28
|
-
const paramQuality = this.evaluateParamQuality(toolName, parameters);
|
|
29
|
-
// 4. Context utilization: is the agent leveraging WorkingMemory?
|
|
30
|
-
const contextUtilization = this.evaluateContextUtilization(toolName, parameters);
|
|
31
|
-
return {
|
|
32
|
-
timestamp: Date.now(),
|
|
33
|
-
toolName,
|
|
34
|
-
schemaCheck,
|
|
35
|
-
riskScore,
|
|
36
|
-
paramQuality,
|
|
37
|
-
contextUtilization,
|
|
38
|
-
};
|
|
39
|
-
}
|
|
40
|
-
/**
|
|
41
|
-
* Score parameter quality based on contextual awareness.
|
|
42
|
-
*
|
|
43
|
-
* High quality: path contains session-relevant project paths,
|
|
44
|
-
* content references open files, etc.
|
|
45
|
-
* Low quality: bare strings, random-looking paths, missing files.
|
|
46
|
-
*/
|
|
47
|
-
evaluateParamQuality(_toolName, parameters) {
|
|
48
|
-
let score = 0.5; // neutral start
|
|
49
|
-
const observations = [];
|
|
50
|
-
// Check if path references an open file
|
|
51
|
-
if (typeof parameters['path'] === 'string') {
|
|
52
|
-
const path = parameters['path'];
|
|
53
|
-
if (this.workingMemory.openFiles.some((f) => path.includes(f))) {
|
|
54
|
-
score += 0.3;
|
|
55
|
-
observations.push('Path references an open file');
|
|
56
|
-
}
|
|
57
|
-
if (path.startsWith('/') || path.match(/^[A-Z]:\\/)) {
|
|
58
|
-
observations.push('Absolute path used');
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
// Check if content parameter is meaningful
|
|
62
|
-
if (typeof parameters['content'] === 'string') {
|
|
63
|
-
const content = parameters['content'];
|
|
64
|
-
if (content.length > 20) {
|
|
65
|
-
score = Math.min(1.0, score + 0.1);
|
|
66
|
-
}
|
|
67
|
-
if (content.length === 0) {
|
|
68
|
-
score -= 0.2;
|
|
69
|
-
observations.push('Empty content — possible error');
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
// Check for file paths in multiple parameters
|
|
73
|
-
const filePaths = Object.values(parameters).filter((v) => typeof v === 'string' && (v.includes('.ts') || v.includes('.js') || v.includes('.json')));
|
|
74
|
-
if (filePaths.length > 1) {
|
|
75
|
-
score = Math.min(1.0, score + 0.1);
|
|
76
|
-
observations.push('Multiple file references — coordinated operation');
|
|
77
|
-
}
|
|
78
|
-
return {
|
|
79
|
-
score: Math.round(Math.max(0, Math.min(1, score)) * 100) / 100,
|
|
80
|
-
observations,
|
|
81
|
-
};
|
|
82
|
-
}
|
|
83
|
-
/**
|
|
84
|
-
* Score how well the agent uses stored context.
|
|
85
|
-
*/
|
|
86
|
-
evaluateContextUtilization(_toolName, parameters) {
|
|
87
|
-
let score = 0.4;
|
|
88
|
-
const patterns = [];
|
|
89
|
-
// Check if agent references recent messages
|
|
90
|
-
if (this.workingMemory.recentMessages.length > 0) {
|
|
91
|
-
score += 0.1;
|
|
92
|
-
patterns.push(`${this.workingMemory.recentMessages.length} recent messages available`);
|
|
93
|
-
}
|
|
94
|
-
// Check if agent uses cached tool results
|
|
95
|
-
const cachedCount = this.workingMemory.recentToolResults.size;
|
|
96
|
-
if (cachedCount > 0) {
|
|
97
|
-
score += 0.1;
|
|
98
|
-
patterns.push(`${cachedCount} cached results available`);
|
|
99
|
-
}
|
|
100
|
-
// Check parameter values for context patterns
|
|
101
|
-
const allValues = Object.values(parameters).map(String).join(' ');
|
|
102
|
-
for (const msg of this.workingMemory.recentMessages.slice(-3)) {
|
|
103
|
-
const words = msg.content.split(/\s+/).filter((w) => w.length > 3);
|
|
104
|
-
for (const word of words.slice(0, 5)) {
|
|
105
|
-
if (allValues.includes(word)) {
|
|
106
|
-
score += 0.1;
|
|
107
|
-
patterns.push(`Parameter references recent context: "${word}"`);
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
return {
|
|
112
|
-
score: Math.round(Math.min(1.0, score) * 100) / 100,
|
|
113
|
-
patterns,
|
|
114
|
-
};
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
exports.PreExecEvaluator = PreExecEvaluator;
|
|
118
|
-
/**
|
|
119
|
-
* RuntimeEvaluator — captures metrics during execution.
|
|
120
|
-
*
|
|
121
|
-
* Tracks retries, self-corrections, timeouts, and
|
|
122
|
-
* whether the agent selected the right tool for the job.
|
|
123
|
-
*/
|
|
124
|
-
class RuntimeEvaluator {
|
|
125
|
-
/** Historical tool selection patterns — toolName -> successful scenario count */
|
|
126
|
-
toolHistory = new Map();
|
|
127
|
-
/**
|
|
128
|
-
* Evaluate a completed tool execution.
|
|
129
|
-
*/
|
|
130
|
-
evaluate(options) {
|
|
131
|
-
const durationMs = options.endTime - options.startTime;
|
|
132
|
-
const toolSuccess = !options.hadTimeout && options.toolResult !== undefined;
|
|
133
|
-
// Tool selection accuracy: compare against historical patterns
|
|
134
|
-
let toolSelectionMatch;
|
|
135
|
-
if (options.expectedTool) {
|
|
136
|
-
// Direct comparison if expectedTool is provided
|
|
137
|
-
toolSelectionMatch = options.toolName === options.expectedTool;
|
|
138
|
-
}
|
|
139
|
-
else {
|
|
140
|
-
// Auto-detect: is this tool historically successful for similar params?
|
|
141
|
-
const history = this.toolHistory.get(options.toolName);
|
|
142
|
-
if (history) {
|
|
143
|
-
const historicalSuccessRate = history.calls > 0
|
|
144
|
-
? history.successes / history.calls
|
|
145
|
-
: 0;
|
|
146
|
-
// If this tool has >70% historical success, consider it a "good" selection
|
|
147
|
-
toolSelectionMatch = historicalSuccessRate > 0.7 ? true : undefined;
|
|
148
|
-
}
|
|
149
|
-
}
|
|
150
|
-
// Record this call in history
|
|
151
|
-
this.recordToolCall(options.toolName, toolSuccess);
|
|
152
|
-
// Adaptive score: composite of retry rate, timeout, correction
|
|
153
|
-
let adaptiveScore = 1.0;
|
|
154
|
-
adaptiveScore -= options.retryCount * 0.15; // Each retry costs 0.15
|
|
155
|
-
if (options.hadTimeout)
|
|
156
|
-
adaptiveScore -= 0.5;
|
|
157
|
-
if (options.wasSelfCorrected)
|
|
158
|
-
adaptiveScore += 0.2; // Self-correction is good!
|
|
159
|
-
adaptiveScore = Math.max(0, Math.min(1, adaptiveScore));
|
|
160
|
-
return {
|
|
161
|
-
retryCount: options.retryCount,
|
|
162
|
-
selfCorrected: options.wasSelfCorrected,
|
|
163
|
-
hadTimeout: options.hadTimeout,
|
|
164
|
-
toolSuccess,
|
|
165
|
-
toolSelectionMatch,
|
|
166
|
-
adaptiveScore: Math.round(adaptiveScore * 100) / 100,
|
|
167
|
-
durationMs,
|
|
168
|
-
};
|
|
169
|
-
}
|
|
170
|
-
/** Record a tool call in the history tracker */
|
|
171
|
-
recordToolCall(toolName, success) {
|
|
172
|
-
const existing = this.toolHistory.get(toolName);
|
|
173
|
-
if (existing) {
|
|
174
|
-
existing.calls++;
|
|
175
|
-
if (success)
|
|
176
|
-
existing.successes++;
|
|
177
|
-
}
|
|
178
|
-
else {
|
|
179
|
-
this.toolHistory.set(toolName, { calls: 1, successes: success ? 1 : 0 });
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
/** Get tool selection accuracy statistics */
|
|
183
|
-
getToolAccuracy() {
|
|
184
|
-
const result = {};
|
|
185
|
-
for (const [tool, history] of this.toolHistory) {
|
|
186
|
-
result[tool] = {
|
|
187
|
-
calls: history.calls,
|
|
188
|
-
successRate: history.calls > 0
|
|
189
|
-
? Math.round((history.successes / history.calls) * 100) / 100
|
|
190
|
-
: 0,
|
|
191
|
-
};
|
|
192
|
-
}
|
|
193
|
-
return result;
|
|
194
|
-
}
|
|
195
|
-
}
|
|
196
|
-
exports.RuntimeEvaluator = RuntimeEvaluator;
|
|
197
|
-
/**
|
|
198
|
-
* PostExecEvaluator — captures metrics after execution.
|
|
199
|
-
*
|
|
200
|
-
* Scores verify results, user acceptance patterns,
|
|
201
|
-
* and checks if the agent actually used its own result later.
|
|
202
|
-
*/
|
|
203
|
-
class PostExecEvaluator {
|
|
204
|
-
/** Track result references for utilization scoring */
|
|
205
|
-
resultReferenceTracker = new Map();
|
|
206
|
-
/**
|
|
207
|
-
* Evaluate post-execution outcomes.
|
|
208
|
-
*/
|
|
209
|
-
evaluate(options) {
|
|
210
|
-
// Verify score
|
|
211
|
-
const verifyScore = options.verifyChecks > 0
|
|
212
|
-
? 1 - (options.verifyFailures / options.verifyChecks)
|
|
213
|
-
: 1;
|
|
214
|
-
// User acceptance
|
|
215
|
-
const acceptance = options.userAccepted ? 1.0 : options.userProvidedEdit ? 0.3 : 0.7;
|
|
216
|
-
// Composite outcome score
|
|
217
|
-
const outcomeScore = (verifyScore * 0.3 +
|
|
218
|
-
acceptance * 0.4 +
|
|
219
|
-
(options.resultWasUsed ? 0.3 : 0));
|
|
220
|
-
// Overall health flag
|
|
221
|
-
const healthy = verifyScore > 0.8 && acceptance > 0.5;
|
|
222
|
-
return {
|
|
223
|
-
verifyPassed: options.verifyPassed,
|
|
224
|
-
verifyScore: Math.round(verifyScore * 100) / 100,
|
|
225
|
-
userAccepted: options.userAccepted,
|
|
226
|
-
userEditRate: options.userProvidedEdit ? 1 : 0,
|
|
227
|
-
resultUtilized: options.resultWasUsed,
|
|
228
|
-
outcomeScore: Math.round(outcomeScore * 100) / 100,
|
|
229
|
-
healthy,
|
|
230
|
-
diffLinesChanged: options.diffLinesChanged,
|
|
231
|
-
};
|
|
232
|
-
}
|
|
233
|
-
/**
|
|
234
|
-
* Track a tool result for later utilization detection.
|
|
235
|
-
* Call this after each tool execution.
|
|
236
|
-
*/
|
|
237
|
-
trackResult(operationId, result) {
|
|
238
|
-
this.resultReferenceTracker.set(operationId, { result, referenced: false });
|
|
239
|
-
}
|
|
240
|
-
/**
|
|
241
|
-
* Mark a previously-tracked result as referenced (used by the agent later).
|
|
242
|
-
*/
|
|
243
|
-
markResultReferenced(operationId) {
|
|
244
|
-
const entry = this.resultReferenceTracker.get(operationId);
|
|
245
|
-
if (entry)
|
|
246
|
-
entry.referenced = true;
|
|
247
|
-
}
|
|
248
|
-
/**
|
|
249
|
-
* Check if a result has been utilized by the agent.
|
|
250
|
-
*/
|
|
251
|
-
isResultReferenced(operationId) {
|
|
252
|
-
return this.resultReferenceTracker.get(operationId)?.referenced ?? false;
|
|
253
|
-
}
|
|
254
|
-
/**
|
|
255
|
-
* Get overall result utilization rate.
|
|
256
|
-
*/
|
|
257
|
-
getUtilizationRate() {
|
|
258
|
-
const entries = Array.from(this.resultReferenceTracker.values());
|
|
259
|
-
if (entries.length === 0)
|
|
260
|
-
return 0;
|
|
261
|
-
const referenced = entries.filter((e) => e.referenced).length;
|
|
262
|
-
return Math.round((referenced / entries.length) * 100) / 100;
|
|
263
|
-
}
|
|
264
|
-
}
|
|
265
|
-
exports.PostExecEvaluator = PostExecEvaluator;
|
|
266
|
-
//# sourceMappingURL=exec-evaluator.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"exec-evaluator.js","sourceRoot":"","sources":["../../src/evaluator/exec-evaluator.ts"],"names":[],"mappings":";;;AAWA;;;;;GAKG;AACH,MAAa,gBAAgB;IACnB,UAAU,CAAa;IACvB,QAAQ,CAAW;IACnB,aAAa,CAAgB;IAErC,YACE,UAAsB,EACtB,QAAkB,EAClB,aAA4B;QAE5B,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;IACrC,CAAC;IAED;;OAEG;IACH,QAAQ,CACN,QAAgB,EAChB,UAAmC;QAEnC,kBAAkB;QAClB,MAAM,WAAW,GAAgB,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAE7E,qBAAqB;QACrB,MAAM,SAAS,GAAc,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAE1E,iEAAiE;QACjE,MAAM,YAAY,GAAG,IAAI,CAAC,oBAAoB,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAErE,iEAAiE;QACjE,MAAM,kBAAkB,GAAG,IAAI,CAAC,0BAA0B,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAEjF,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;YACrB,QAAQ;YACR,WAAW;YACX,SAAS;YACT,YAAY;YACZ,kBAAkB;SACnB,CAAC;IACJ,CAAC;IAED;;;;;;OAMG;IACK,oBAAoB,CAC1B,SAAiB,EACjB,UAAmC;QAEnC,IAAI,KAAK,GAAG,GAAG,CAAC,CAAC,gBAAgB;QACjC,MAAM,YAAY,GAAa,EAAE,CAAC;QAElC,wCAAwC;QACxC,IAAI,OAAO,UAAU,CAAC,MAAM,CAAC,KAAK,QAAQ,EAAE,CAAC;YAC3C,MAAM,IAAI,GAAG,UAAU,CAAC,MAAM,CAAW,CAAC;YAC1C,IAAI,IAAI,CAAC,aAAa,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC/D,KAAK,IAAI,GAAG,CAAC;gBACb,YAAY,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;YACpD,CAAC;YACD,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,EAAE,CAAC;gBACpD,YAAY,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;YAC1C,CAAC;QACH,CAAC;QAED,2CAA2C;QAC3C,IAAI,OAAO,UAAU,CAAC,SAAS,CAAC,KAAK,QAAQ,EAAE,CAAC;YAC9C,MAAM,OAAO,GAAG,UAAU,CAAC,SAAS,CAAW,CAAC;YAChD,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;gBACxB,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,GAAG,GAAG,CAAC,CAAC;YACrC,CAAC;YACD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACzB,KAAK,IAAI,GAAG,CAAC;gBACb,YAAY,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;YACtD,CAAC;QACH,CAAC;QAED,8CAA8C;QAC9C,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,MAAM,CAChD,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAChG,CAAC;QAEF,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,GAAG,GAAG,CAAC,CAAC;YACnC,YAAY,CAAC,IAAI,CAAC,kDAAkD,CAAC,CAAC;QACxE,CAAC;QAED,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG;YAC9D,YAAY;SACb,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,0BAA0B,CAChC,SAAiB,EACjB,UAAmC;QAEnC,IAAI,KAAK,GAAG,GAAG,CAAC;QAChB,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,4CAA4C;QAC5C,IAAI,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjD,KAAK,IAAI,GAAG,CAAC;YACb,QAAQ,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,MAAM,4BAA4B,CAAC,CAAC;QACzF,CAAC;QAED,0CAA0C;QAC1C,MAAM,WAAW,GAAG,IAAI,CAAC,aAAa,CAAC,iBAAiB,CAAC,IAAI,CAAC;QAC9D,IAAI,WAAW,GAAG,CAAC,EAAE,CAAC;YACpB,KAAK,IAAI,GAAG,CAAC;YACb,QAAQ,CAAC,IAAI,CAAC,GAAG,WAAW,2BAA2B,CAAC,CAAC;QAC3D,CAAC;QAED,8CAA8C;QAC9C,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAClE,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC9D,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YACnE,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;gBACrC,IAAI,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;oBAC7B,KAAK,IAAI,GAAG,CAAC;oBACb,QAAQ,CAAC,IAAI,CAAC,yCAAyC,IAAI,GAAG,CAAC,CAAC;gBAClE,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG;YACnD,QAAQ;SACT,CAAC;IACJ,CAAC;CACF;AA1ID,4CA0IC;AAED;;;;;GAKG;AACH,MAAa,gBAAgB;IAC3B,iFAAiF;IACzE,WAAW,GAAsD,IAAI,GAAG,EAAE,CAAC;IAEnF;;OAEG;IACH,QAAQ,CAAC,OASR;QACC,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,GAAG,OAAO,CAAC,SAAS,CAAC;QACvD,MAAM,WAAW,GAAG,CAAC,OAAO,CAAC,UAAU,IAAI,OAAO,CAAC,UAAU,KAAK,SAAS,CAAC;QAE5E,+DAA+D;QAC/D,IAAI,kBAAuC,CAAC;QAC5C,IAAI,OAAO,CAAC,YAAY,EAAE,CAAC;YACzB,gDAAgD;YAChD,kBAAkB,GAAG,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,YAAY,CAAC;QACjE,CAAC;aAAM,CAAC;YACN,wEAAwE;YACxE,MAAM,OAAO,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;YACvD,IAAI,OAAO,EAAE,CAAC;gBACZ,MAAM,qBAAqB,GAAG,OAAO,CAAC,KAAK,GAAG,CAAC;oBAC7C,CAAC,CAAC,OAAO,CAAC,SAAS,GAAG,OAAO,CAAC,KAAK;oBACnC,CAAC,CAAC,CAAC,CAAC;gBACN,2EAA2E;gBAC3E,kBAAkB,GAAG,qBAAqB,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC;YACtE,CAAC;QACH,CAAC;QAED,8BAA8B;QAC9B,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC;QAEnD,+DAA+D;QAC/D,IAAI,aAAa,GAAG,GAAG,CAAC;QACxB,aAAa,IAAI,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC,CAAC,wBAAwB;QACpE,IAAI,OAAO,CAAC,UAAU;YAAE,aAAa,IAAI,GAAG,CAAC;QAC7C,IAAI,OAAO,CAAC,gBAAgB;YAAE,aAAa,IAAI,GAAG,CAAC,CAAC,2BAA2B;QAC/E,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC,CAAC;QAExD,OAAO;YACL,UAAU,EAAE,OAAO,CAAC,UAAU;YAC9B,aAAa,EAAE,OAAO,CAAC,gBAAgB;YACvC,UAAU,EAAE,OAAO,CAAC,UAAU;YAC9B,WAAW;YACX,kBAAkB;YAClB,aAAa,EAAE,IAAI,CAAC,KAAK,CAAC,aAAa,GAAG,GAAG,CAAC,GAAG,GAAG;YACpD,UAAU;SACX,CAAC;IACJ,CAAC;IAED,gDAAgD;IACxC,cAAc,CAAC,QAAgB,EAAE,OAAgB;QACvD,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAChD,IAAI,QAAQ,EAAE,CAAC;YACb,QAAQ,CAAC,KAAK,EAAE,CAAC;YACjB,IAAI,OAAO;gBAAE,QAAQ,CAAC,SAAS,EAAE,CAAC;QACpC,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QAC3E,CAAC;IACH,CAAC;IAED,6CAA6C;IAC7C,eAAe;QACb,MAAM,MAAM,GAA2D,EAAE,CAAC;QAC1E,KAAK,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YAC/C,MAAM,CAAC,IAAI,CAAC,GAAG;gBACb,KAAK,EAAE,OAAO,CAAC,KAAK;gBACpB,WAAW,EAAE,OAAO,CAAC,KAAK,GAAG,CAAC;oBAC5B,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG;oBAC7D,CAAC,CAAC,CAAC;aACN,CAAC;QACJ,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;CACF;AAlFD,4CAkFC;AAED;;;;;GAKG;AACH,MAAa,iBAAiB;IAC5B,sDAAsD;IAC9C,sBAAsB,GAA0D,IAAI,GAAG,EAAE,CAAC;IAElG;;OAEG;IACH,QAAQ,CAAC,OAQR;QACC,eAAe;QACf,MAAM,WAAW,GAAG,OAAO,CAAC,YAAY,GAAG,CAAC;YAC1C,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,cAAc,GAAG,OAAO,CAAC,YAAY,CAAC;YACrD,CAAC,CAAC,CAAC,CAAC;QAEN,kBAAkB;QAClB,MAAM,UAAU,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;QAErF,0BAA0B;QAC1B,MAAM,YAAY,GAAG,CACnB,WAAW,GAAG,GAAG;YACjB,UAAU,GAAG,GAAG;YAChB,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAClC,CAAC;QAEF,sBAAsB;QACtB,MAAM,OAAO,GAAG,WAAW,GAAG,GAAG,IAAI,UAAU,GAAG,GAAG,CAAC;QAEtD,OAAO;YACL,YAAY,EAAE,OAAO,CAAC,YAAY;YAClC,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW,GAAG,GAAG,CAAC,GAAG,GAAG;YAChD,YAAY,EAAE,OAAO,CAAC,YAAY;YAClC,YAAY,EAAE,OAAO,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9C,cAAc,EAAE,OAAO,CAAC,aAAa;YACrC,YAAY,EAAE,IAAI,CAAC,KAAK,CAAC,YAAY,GAAG,GAAG,CAAC,GAAG,GAAG;YAClD,OAAO;YACP,gBAAgB,EAAE,OAAO,CAAC,gBAAgB;SAC3C,CAAC;IACJ,CAAC;IAED;;;OAGG;IACH,WAAW,CAAC,WAAmB,EAAE,MAAe;QAC9C,IAAI,CAAC,sBAAsB,CAAC,GAAG,CAAC,WAAW,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,CAAC;IAC9E,CAAC;IAED;;OAEG;IACH,oBAAoB,CAAC,WAAmB;QACtC,MAAM,KAAK,GAAG,IAAI,CAAC,sBAAsB,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;QAC3D,IAAI,KAAK;YAAE,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC;IACrC,CAAC;IAED;;OAEG;IACH,kBAAkB,CAAC,WAAmB;QACpC,OAAO,IAAI,CAAC,sBAAsB,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,UAAU,IAAI,KAAK,CAAC;IAC3E,CAAC;IAED;;OAEG;IACH,kBAAkB;QAChB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,sBAAsB,CAAC,MAAM,EAAE,CAAC,CAAC;QACjE,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC;QACnC,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC;QAC9D,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;IAC/D,CAAC;CACF;AA9ED,8CA8EC"}
|
|
@@ -1,96 +0,0 @@
|
|
|
1
|
-
import { ImplicitFeedback, SignalType } from '../types';
|
|
2
|
-
import { AuditEntry } from '../types';
|
|
3
|
-
/**
|
|
4
|
-
* ImplicitFeedbackEngine — captures and interprets implicit user signals.
|
|
5
|
-
*
|
|
6
|
-
* Instead of relying on explicit "thumbs up/down", this engine
|
|
7
|
-
* detects subtle signals from user behavior to infer satisfaction.
|
|
8
|
-
*
|
|
9
|
-
* Two modes:
|
|
10
|
-
* - Manual: caller provides explicit signals via record()
|
|
11
|
-
* - Auto-detect: scans audit log to infer signals (results unused,
|
|
12
|
-
* results modified later, repeated same tool, verify failures)
|
|
13
|
-
*
|
|
14
|
-
* This is the key differentiator of AgentOS: it learns from
|
|
15
|
-
* what users DO, not just what they SAY.
|
|
16
|
-
*/
|
|
17
|
-
export declare class ImplicitFeedbackEngine {
|
|
18
|
-
private feedbackLog;
|
|
19
|
-
private persistPath;
|
|
20
|
-
private globalAuditPath;
|
|
21
|
-
private detectedKeys;
|
|
22
|
-
/**
|
|
23
|
-
* Record an implicit feedback signal.
|
|
24
|
-
*/
|
|
25
|
-
record(signal: SignalType, sessionId: string, operationId?: string, confidence?: number, source?: string): ImplicitFeedback;
|
|
26
|
-
/**
|
|
27
|
-
* Scan the audit log and auto-detect implicit feedback signals.
|
|
28
|
-
*
|
|
29
|
-
* Detection rules (conservative — low confidence to avoid false positives):
|
|
30
|
-
* - verify FAIL or WARN → user_provided_correction (agent made mistakes)
|
|
31
|
-
* - same tool+params called within 60s → user_repeated_instruction (low confidence, noisy)
|
|
32
|
-
* - high risk operations that were retried and eventually passed → agent_self_corrected
|
|
33
|
-
*
|
|
34
|
-
* Note: auto-detected signals carry lower confidence than explicit user feedback.
|
|
35
|
-
* They serve as supplementary data, not primary quality indicators.
|
|
36
|
-
*
|
|
37
|
-
* @param entries Recent audit entries to analyze
|
|
38
|
-
* @param sessionId Session to attribute signals to
|
|
39
|
-
* @returns Number of signals auto-detected
|
|
40
|
-
*/
|
|
41
|
-
/**
|
|
42
|
-
* Enable persistence for feedbackLog and auto-detected signal keys.
|
|
43
|
-
*/
|
|
44
|
-
enablePersistence(workspaceRoot: string): void;
|
|
45
|
-
/** Persist the current feedbackLog and detectedKeys to disk. */
|
|
46
|
-
private persist;
|
|
47
|
-
/** Load persisted feedback log from disk. */
|
|
48
|
-
private load;
|
|
49
|
-
/**
|
|
50
|
-
* Cross-session auto-detect: scan the global audit.jsonl for signals
|
|
51
|
-
* from ALL sessions, not just the current one.
|
|
52
|
-
*/
|
|
53
|
-
autoDetectGlobal(): number;
|
|
54
|
-
autoDetect(entries: AuditEntry[], sessionId: string): number;
|
|
55
|
-
/**
|
|
56
|
-
* Analyze user messages to detect implicit correction/feedback signals.
|
|
57
|
-
*
|
|
58
|
-
* Chinese corrective patterns (high precision, low recall — only matches clear signals):
|
|
59
|
-
* - "不对"/"错了"/"不是这样" → user_provided_correction (confidence 0.8)
|
|
60
|
-
* - "漏了"/"缺了"/"没包括"/"遗漏" → user_provided_correction (confidence 0.75)
|
|
61
|
-
* - "你没"/"你怎么"/"你咋" + 负面动作(忘记/漏/没/不) → user_provided_correction (confidence 0.7)
|
|
62
|
-
* - "失忆"/"忘了"/"不记得" → user_provided_correction (confidence 0.85)
|
|
63
|
-
* - "重新"/"再查"/"再搜"/"再看看" → user_repeated_instruction (confidence 0.6)
|
|
64
|
-
* - "不对吧"/"没音信" → user_interrupted (confidence 0.5)
|
|
65
|
-
*
|
|
66
|
-
* English patterns:
|
|
67
|
-
* - "wrong"/"incorrect"/"not right" → user_provided_correction (confidence 0.7)
|
|
68
|
-
* - "missed"/"missing"/"forgot"/"incomplete" → user_provided_correction (confidence 0.7)
|
|
69
|
-
* - "redo"/"again"/"retry"/"try again" → user_repeated_instruction (confidence 0.5)
|
|
70
|
-
*
|
|
71
|
-
* Returns number of signals detected.
|
|
72
|
-
*/
|
|
73
|
-
detectFromUserMessages(messages: Array<{
|
|
74
|
-
role: string;
|
|
75
|
-
content: string;
|
|
76
|
-
ts?: number;
|
|
77
|
-
}>, sessionId: string): number;
|
|
78
|
-
private getSignalStrength;
|
|
79
|
-
getSatisfactionScore(sessionId?: string, recentHours?: number): number;
|
|
80
|
-
query(filter?: {
|
|
81
|
-
signal?: SignalType;
|
|
82
|
-
sessionId?: string;
|
|
83
|
-
minStrength?: number;
|
|
84
|
-
maxStrength?: number;
|
|
85
|
-
since?: number;
|
|
86
|
-
limit?: number;
|
|
87
|
-
}): ImplicitFeedback[];
|
|
88
|
-
stats(): {
|
|
89
|
-
totalSignals: number;
|
|
90
|
-
positiveSignals: number;
|
|
91
|
-
negativeSignals: number;
|
|
92
|
-
averageStrength: number;
|
|
93
|
-
mostCommonSignal: SignalType | null;
|
|
94
|
-
};
|
|
95
|
-
}
|
|
96
|
-
//# sourceMappingURL=feedback.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"feedback.d.ts","sourceRoot":"","sources":["../../src/evaluator/feedback.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AACxD,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAYtC;;;;;;;;;;;;;GAaG;AACH,qBAAa,sBAAsB;IACjC,OAAO,CAAC,WAAW,CAA0B;IAC7C,OAAO,CAAC,WAAW,CAAuB;IAE1C,OAAO,CAAC,eAAe,CAAuB;IAE9C,OAAO,CAAC,YAAY,CAA0B;IAE9C;;OAEG;IACH,MAAM,CACJ,MAAM,EAAE,UAAU,EAClB,SAAS,EAAE,MAAM,EACjB,WAAW,CAAC,EAAE,MAAM,EACpB,UAAU,SAAM,EAChB,MAAM,SAAkB,GACvB,gBAAgB;IAuBnB;;;;;;;;;;;;;;OAcG;IACH;;OAEG;IACH,iBAAiB,CAAC,aAAa,EAAE,MAAM,GAAG,IAAI;IAU9C,gEAAgE;IAChE,OAAO,CAAC,OAAO;IAcf,6CAA6C;IAC7C,OAAO,CAAC,IAAI;IAqBZ;;;OAGG;IACH,gBAAgB,IAAI,MAAM;IAa1B,UAAU,CAAC,OAAO,EAAE,UAAU,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM;IAmF5D;;;;;;;;;;;;;;;;;OAiBG;IACH,sBAAsB,CAAC,QAAQ,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,EAAE,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM;IA2FlH,OAAO,CAAC,iBAAiB;IAkBzB,oBAAoB,CAAC,SAAS,CAAC,EAAE,MAAM,EAAE,WAAW,SAAK,GAAG,MAAM;IAoClE,KAAK,CAAC,MAAM,GAAE;QACZ,MAAM,CAAC,EAAE,UAAU,CAAC;QACpB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,KAAK,CAAC,EAAE,MAAM,CAAC;KACX,GAAG,gBAAgB,EAAE;IAa3B,KAAK,IAAI;QACP,YAAY,EAAE,MAAM,CAAC;QACrB,eAAe,EAAE,MAAM,CAAC;QACxB,eAAe,EAAE,MAAM,CAAC;QACxB,eAAe,EAAE,MAAM,CAAC;QACxB,gBAAgB,EAAE,UAAU,GAAG,IAAI,CAAC;KACrC;CAuBF"}
|