sentinel-agentos 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +636 -0
- package/dist/api.d.ts +151 -0
- package/dist/api.d.ts.map +1 -0
- package/dist/api.js +179 -0
- package/dist/api.js.map +1 -0
- package/dist/cli.d.ts +14 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +182 -0
- package/dist/cli.js.map +1 -0
- package/dist/core.d.ts +139 -0
- package/dist/core.d.ts.map +1 -0
- package/dist/core.js +247 -0
- package/dist/core.js.map +1 -0
- package/dist/evaluator/exec-evaluator.d.ts +102 -0
- package/dist/evaluator/exec-evaluator.d.ts.map +1 -0
- package/dist/evaluator/exec-evaluator.js +266 -0
- package/dist/evaluator/exec-evaluator.js.map +1 -0
- package/dist/evaluator/feedback.d.ts +66 -0
- package/dist/evaluator/feedback.d.ts.map +1 -0
- package/dist/evaluator/feedback.js +195 -0
- package/dist/evaluator/feedback.js.map +1 -0
- package/dist/evaluator/profiler.d.ts +53 -0
- package/dist/evaluator/profiler.d.ts.map +1 -0
- package/dist/evaluator/profiler.js +108 -0
- package/dist/evaluator/profiler.js.map +1 -0
- package/dist/guard/audit-log.d.ts +75 -0
- package/dist/guard/audit-log.d.ts.map +1 -0
- package/dist/guard/audit-log.js +207 -0
- package/dist/guard/audit-log.js.map +1 -0
- package/dist/guard/risk-gate.d.ts +97 -0
- package/dist/guard/risk-gate.d.ts.map +1 -0
- package/dist/guard/risk-gate.js +160 -0
- package/dist/guard/risk-gate.js.map +1 -0
- package/dist/guard/sandbox.d.ts +112 -0
- package/dist/guard/sandbox.d.ts.map +1 -0
- package/dist/guard/sandbox.js +379 -0
- package/dist/guard/sandbox.js.map +1 -0
- package/dist/guard/schema-gate.d.ts +90 -0
- package/dist/guard/schema-gate.d.ts.map +1 -0
- package/dist/guard/schema-gate.js +452 -0
- package/dist/guard/schema-gate.js.map +1 -0
- package/dist/guard/snapshot-verify.d.ts +111 -0
- package/dist/guard/snapshot-verify.d.ts.map +1 -0
- package/dist/guard/snapshot-verify.js +578 -0
- package/dist/guard/snapshot-verify.js.map +1 -0
- package/dist/index.d.ts +28 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +59 -0
- package/dist/index.js.map +1 -0
- package/dist/memory/episodic.d.ts +76 -0
- package/dist/memory/episodic.d.ts.map +1 -0
- package/dist/memory/episodic.js +289 -0
- package/dist/memory/episodic.js.map +1 -0
- package/dist/memory/semantic.d.ts +69 -0
- package/dist/memory/semantic.d.ts.map +1 -0
- package/dist/memory/semantic.js +243 -0
- package/dist/memory/semantic.js.map +1 -0
- package/dist/memory/working.d.ts +53 -0
- package/dist/memory/working.d.ts.map +1 -0
- package/dist/memory/working.js +150 -0
- package/dist/memory/working.js.map +1 -0
- package/dist/middleware/openclaw.d.ts +45 -0
- package/dist/middleware/openclaw.d.ts.map +1 -0
- package/dist/middleware/openclaw.js +95 -0
- package/dist/middleware/openclaw.js.map +1 -0
- package/dist/middleware/wrapper.d.ts +54 -0
- package/dist/middleware/wrapper.d.ts.map +1 -0
- package/dist/middleware/wrapper.js +155 -0
- package/dist/middleware/wrapper.js.map +1 -0
- package/dist/server.d.ts +45 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +229 -0
- package/dist/server.js.map +1 -0
- package/dist/types/index.d.ts +201 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +4 -0
- package/dist/types/index.js.map +1 -0
- package/package.json +64 -0
package/dist/core.d.ts
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import { AgentOSConfig, GuardConfig, Snapshot, AuditEntry, PreExecMetrics, RuntimeMetrics, PostExecMetrics } from './types';
|
|
2
|
+
import { SchemaGate } from './guard/schema-gate';
|
|
3
|
+
import { RiskGate } from './guard/risk-gate';
|
|
4
|
+
import { SnapshotGate, VerifyGate } from './guard/snapshot-verify';
|
|
5
|
+
import { AuditLog } from './guard/audit-log';
|
|
6
|
+
import { WorkingMemory } from './memory/working';
|
|
7
|
+
import { EpisodicMemory } from './memory/episodic';
|
|
8
|
+
import { SemanticMemoryStore } from './memory/semantic';
|
|
9
|
+
import { PreExecEvaluator, RuntimeEvaluator, PostExecEvaluator } from './evaluator/exec-evaluator';
|
|
10
|
+
import { ImplicitFeedbackEngine } from './evaluator/feedback';
|
|
11
|
+
import { AgentProfiler } from './evaluator/profiler';
|
|
12
|
+
import type { AgentProfile } from './evaluator/profiler';
|
|
13
|
+
/**
|
|
14
|
+
* AgentOS — the complete AI Agent Operating System.
|
|
15
|
+
*
|
|
16
|
+
* Architecture:
|
|
17
|
+
* ```
|
|
18
|
+
* User Request
|
|
19
|
+
* ↓
|
|
20
|
+
* ┌─────────────┐
|
|
21
|
+
* │ Memory Layer │ ← Semantic + Episodic + Working memory
|
|
22
|
+
* ├─────────────┤
|
|
23
|
+
* │ Guard Layer │ ← Schema → Risk → Snapshot
|
|
24
|
+
* ├─────────────┤ ↓
|
|
25
|
+
* │ Execute │ ← Tool call execution
|
|
26
|
+
* ├─────────────┤ ↓
|
|
27
|
+
* │ Verify Layer │ ← Snapshot diff → Verify checks
|
|
28
|
+
* ├─────────────┤ ↓
|
|
29
|
+
* │ Audit Log │ ← Immutable operation record
|
|
30
|
+
* ├─────────────┤
|
|
31
|
+
* │ Evaluator │ ← Three-phase metrics + feedback
|
|
32
|
+
* └─────────────┘
|
|
33
|
+
* ```
|
|
34
|
+
*/
|
|
35
|
+
export declare class AgentOS {
|
|
36
|
+
private config;
|
|
37
|
+
readonly memory: {
|
|
38
|
+
working: WorkingMemory;
|
|
39
|
+
episodic: EpisodicMemory;
|
|
40
|
+
semantic: SemanticMemoryStore;
|
|
41
|
+
};
|
|
42
|
+
readonly guard: {
|
|
43
|
+
schema: SchemaGate;
|
|
44
|
+
risk: RiskGate;
|
|
45
|
+
snapshot: SnapshotGate;
|
|
46
|
+
verify: VerifyGate;
|
|
47
|
+
audit: AuditLog;
|
|
48
|
+
};
|
|
49
|
+
readonly evaluator: {
|
|
50
|
+
preExec: PreExecEvaluator;
|
|
51
|
+
runtime: RuntimeEvaluator;
|
|
52
|
+
postExec: PostExecEvaluator;
|
|
53
|
+
feedback: ImplicitFeedbackEngine;
|
|
54
|
+
profiler: AgentProfiler;
|
|
55
|
+
};
|
|
56
|
+
constructor(config?: Partial<AgentOSConfig>);
|
|
57
|
+
/**
|
|
58
|
+
* Get the current AgentOS configuration.
|
|
59
|
+
*/
|
|
60
|
+
getConfig(): Readonly<AgentOSConfig>;
|
|
61
|
+
/**
|
|
62
|
+
* Full pipeline: process a tool call through all layers.
|
|
63
|
+
*
|
|
64
|
+
* This is the main AgentOS orchestration method.
|
|
65
|
+
* In production this would be called by the agent runtime
|
|
66
|
+
* before/after every tool call.
|
|
67
|
+
*/
|
|
68
|
+
executePipeline(options: {
|
|
69
|
+
sessionId: string;
|
|
70
|
+
agentId: string;
|
|
71
|
+
toolName: string;
|
|
72
|
+
parameters: Record<string, unknown>;
|
|
73
|
+
affectedFiles?: string[];
|
|
74
|
+
guardConfig?: GuardConfig;
|
|
75
|
+
}): {
|
|
76
|
+
preExec: PreExecMetrics;
|
|
77
|
+
runtime?: RuntimeMetrics;
|
|
78
|
+
postExec?: PostExecMetrics;
|
|
79
|
+
snapshot: Snapshot | null;
|
|
80
|
+
auditEntry?: AuditEntry;
|
|
81
|
+
profile: AgentProfile;
|
|
82
|
+
};
|
|
83
|
+
/**
|
|
84
|
+
* Complete the pipeline after tool execution.
|
|
85
|
+
*
|
|
86
|
+
* Called by the runtime after the tool call completes
|
|
87
|
+
* (or fails, or times out).
|
|
88
|
+
*/
|
|
89
|
+
completeExecution(options: {
|
|
90
|
+
sessionId: string;
|
|
91
|
+
agentId: string;
|
|
92
|
+
toolName: string;
|
|
93
|
+
toolParameters: Record<string, unknown>;
|
|
94
|
+
toolResult: unknown;
|
|
95
|
+
snapshot: Snapshot | null;
|
|
96
|
+
startTime: number;
|
|
97
|
+
endTime: number;
|
|
98
|
+
retryCount: number;
|
|
99
|
+
wasSelfCorrected: boolean;
|
|
100
|
+
hadTimeout: boolean;
|
|
101
|
+
userAccepted: boolean;
|
|
102
|
+
userProvidedEdit: boolean;
|
|
103
|
+
resultWasUsed: boolean;
|
|
104
|
+
}): {
|
|
105
|
+
runtime: RuntimeMetrics;
|
|
106
|
+
postExec: PostExecMetrics;
|
|
107
|
+
auditEntry: AuditEntry;
|
|
108
|
+
profile: AgentProfile;
|
|
109
|
+
};
|
|
110
|
+
/**
|
|
111
|
+
* Record implicit user feedback.
|
|
112
|
+
*/
|
|
113
|
+
recordFeedback(signal: Parameters<ImplicitFeedbackEngine['record']>[0], sessionId: string, operationId?: string, confidence?: number, source?: string): void;
|
|
114
|
+
/**
|
|
115
|
+
* Inject memory context at session startup.
|
|
116
|
+
*
|
|
117
|
+
* Call this at the beginning of every session to load
|
|
118
|
+
* semantic + episodic context into the session prompt.
|
|
119
|
+
*/
|
|
120
|
+
injectContext(): string;
|
|
121
|
+
/**
|
|
122
|
+
* End current session — promote important events to episodic,
|
|
123
|
+
* clear working memory, and save state.
|
|
124
|
+
*/
|
|
125
|
+
endSession(sessionId: string): void;
|
|
126
|
+
/**
|
|
127
|
+
* Get audit statistics.
|
|
128
|
+
*/
|
|
129
|
+
getAuditStats(): ReturnType<AuditLog['stats']>;
|
|
130
|
+
/**
|
|
131
|
+
* Get the current agent quality profile.
|
|
132
|
+
*/
|
|
133
|
+
getProfile(sessionId?: string): AgentProfile;
|
|
134
|
+
/**
|
|
135
|
+
* Get a summarized status report.
|
|
136
|
+
*/
|
|
137
|
+
statusReport(): string;
|
|
138
|
+
}
|
|
139
|
+
//# sourceMappingURL=core.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"core.d.ts","sourceRoot":"","sources":["../src/core.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,aAAa,EACb,WAAW,EACX,QAAQ,EACR,UAAU,EACV,cAAc,EACd,cAAc,EACd,eAAe,EAChB,MAAM,SAAS,CAAC;AACjB,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AACjD,OAAO,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAC7C,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AACnE,OAAO,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAC7C,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACnD,OAAO,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AACxD,OAAO,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AACnG,OAAO,EAAE,sBAAsB,EAAE,MAAM,sBAAsB,CAAC;AAC9D,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEzD;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,qBAAa,OAAO;IAClB,OAAO,CAAC,MAAM,CAAgB;IAG9B,QAAQ,CAAC,MAAM,EAAE;QACf,OAAO,EAAE,aAAa,CAAC;QACvB,QAAQ,EAAE,cAAc,CAAC;QACzB,QAAQ,EAAE,mBAAmB,CAAC;KAC/B,CAAC;IAGF,QAAQ,CAAC,KAAK,EAAE;QACd,MAAM,EAAE,UAAU,CAAC;QACnB,IAAI,EAAE,QAAQ,CAAC;QACf,QAAQ,EAAE,YAAY,CAAC;QACvB,MAAM,EAAE,UAAU,CAAC;QACnB,KAAK,EAAE,QAAQ,CAAC;KACjB,CAAC;IAGF,QAAQ,CAAC,SAAS,EAAE;QAClB,OAAO,EAAE,gBAAgB,CAAC;QAC1B,OAAO,EAAE,gBAAgB,CAAC;QAC1B,QAAQ,EAAE,iBAAiB,CAAC;QAC5B,QAAQ,EAAE,sBAAsB,CAAC;QACjC,QAAQ,EAAE,aAAa,CAAC;KACzB,CAAC;gBAEU,MAAM,CAAC,EAAE,OAAO,CAAC,aAAa,CAAC;IAqD3C;;OAEG;IACH,SAAS,IAAI,QAAQ,CAAC,aAAa,CAAC;IAIpC;;;;;;OAMG;IACH,eAAe,CAAC,OAAO,EAAE;QACvB,SAAS,EAAE,MAAM,CAAC;QAClB,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE,MAAM,CAAC;QACjB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QACpC,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;QACzB,WAAW,CAAC,EAAE,WAAW,CAAC;KAC3B,GAAG;QACF,OAAO,EAAE,cAAc,CAAC;QACxB,OAAO,CAAC,EAAE,cAAc,CAAC;QACzB,QAAQ,CAAC,EAAE,eAAe,CAAC;QAC3B,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAAC;QAC1B,UAAU,CAAC,EAAE,UAAU,CAAC;QACxB,OAAO,EAAE,YAAY,CAAC;KACvB;IAyBD;;;;;OAKG;IACH,iBAAiB,CAAC,OAAO,EAAE;QACzB,SAAS,EAAE,MAAM,CAAC;QAClB,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE,MAAM,CAAC;QACjB,cAAc,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QACxC,UAAU,EAAE,OAAO,CAAC;QACpB,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAAC;QAC1B,SAAS,EAAE,MAAM,CAAC;QAClB,OAAO,EAAE,MAAM,CAAC;QAChB,UAAU,EAAE,MAAM,CAAC;QACnB,gBAAgB,EAAE,OAAO,CAAC;QAC1B,UAAU,EAAE,OAAO,CAAC;QACpB,YAAY,EAAE,OAAO,CAAC;QACtB,gBAAgB,EAAE,OAAO,CAAC;QAC1B,aAAa,EAAE,OAAO,CAAC;KACxB,GAAG;QACF,OAAO,EAAE,cAAc,CAAC;QACxB,QAAQ,EAAE,eAAe,CAAC;QAC1B,UAAU,EAAE,UAAU,CAAC;QACvB,OAAO,EAAE,YAAY,CAAC;KACvB;IAmED;;OAEG;IACH,cAAc,CACZ,MAAM,EAAE,UAAU,CAAC,sBAAsB,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,EACvD,SAAS,EAAE,MAAM,EACjB,WAAW,CAAC,EAAE,MAAM,EACpB,UAAU,CAAC,EAAE,MAAM,EACnB,MAAM,CAAC,EAAE,MAAM,GACd,IAAI;IAIP;;;;;OAKG;IACH,aAAa,IAAI,MAAM;IAYvB;;;OAGG;IACH,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI;IA0BnC;;OAEG;IACH,aAAa,IAAI,UAAU,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;IAI9C;;OAEG;IACH,UAAU,CAAC,SAAS,CAAC,EAAE,MAAM,GAAG,YAAY;IAI5C;;OAEG;IACH,YAAY,IAAI,MAAM;CAgCvB"}
|
package/dist/core.js
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.AgentOS = void 0;
|
|
4
|
+
const schema_gate_1 = require("./guard/schema-gate");
|
|
5
|
+
const risk_gate_1 = require("./guard/risk-gate");
|
|
6
|
+
const snapshot_verify_1 = require("./guard/snapshot-verify");
|
|
7
|
+
const audit_log_1 = require("./guard/audit-log");
|
|
8
|
+
const working_1 = require("./memory/working");
|
|
9
|
+
const episodic_1 = require("./memory/episodic");
|
|
10
|
+
const semantic_1 = require("./memory/semantic");
|
|
11
|
+
const exec_evaluator_1 = require("./evaluator/exec-evaluator");
|
|
12
|
+
const feedback_1 = require("./evaluator/feedback");
|
|
13
|
+
const profiler_1 = require("./evaluator/profiler");
|
|
14
|
+
/**
|
|
15
|
+
* AgentOS — the complete AI Agent Operating System.
|
|
16
|
+
*
|
|
17
|
+
* Architecture:
|
|
18
|
+
* ```
|
|
19
|
+
* User Request
|
|
20
|
+
* ↓
|
|
21
|
+
* ┌─────────────┐
|
|
22
|
+
* │ Memory Layer │ ← Semantic + Episodic + Working memory
|
|
23
|
+
* ├─────────────┤
|
|
24
|
+
* │ Guard Layer │ ← Schema → Risk → Snapshot
|
|
25
|
+
* ├─────────────┤ ↓
|
|
26
|
+
* │ Execute │ ← Tool call execution
|
|
27
|
+
* ├─────────────┤ ↓
|
|
28
|
+
* │ Verify Layer │ ← Snapshot diff → Verify checks
|
|
29
|
+
* ├─────────────┤ ↓
|
|
30
|
+
* │ Audit Log │ ← Immutable operation record
|
|
31
|
+
* ├─────────────┤
|
|
32
|
+
* │ Evaluator │ ← Three-phase metrics + feedback
|
|
33
|
+
* └─────────────┘
|
|
34
|
+
* ```
|
|
35
|
+
*/
|
|
36
|
+
class AgentOS {
|
|
37
|
+
config;
|
|
38
|
+
// Memory Layer
|
|
39
|
+
memory;
|
|
40
|
+
// Guard Layer
|
|
41
|
+
guard;
|
|
42
|
+
// Evaluator Layer
|
|
43
|
+
evaluator;
|
|
44
|
+
constructor(config) {
|
|
45
|
+
this.config = {
|
|
46
|
+
workspaceRoot: process.cwd(),
|
|
47
|
+
maxWorkingTokens: 50000,
|
|
48
|
+
maxEpisodicSizeKb: 500,
|
|
49
|
+
guardConfig: {},
|
|
50
|
+
...config,
|
|
51
|
+
};
|
|
52
|
+
// --- Memory Layer Init ---
|
|
53
|
+
const semantic = new semantic_1.SemanticMemoryStore();
|
|
54
|
+
semantic.enablePersistence(this.config.workspaceRoot);
|
|
55
|
+
const episodic = new episodic_1.EpisodicMemory(this.config.maxEpisodicSizeKb);
|
|
56
|
+
episodic.enablePersistence(this.config.workspaceRoot);
|
|
57
|
+
const working = new working_1.WorkingMemory(this.config.maxWorkingTokens);
|
|
58
|
+
this.memory = { working, episodic, semantic };
|
|
59
|
+
// --- Guard Layer Init ---
|
|
60
|
+
const schema = new schema_gate_1.SchemaGate();
|
|
61
|
+
const risk = new risk_gate_1.RiskGate();
|
|
62
|
+
const snapshot = new snapshot_verify_1.SnapshotGate(this.config.workspaceRoot);
|
|
63
|
+
const verify = new snapshot_verify_1.VerifyGate(this.config.workspaceRoot);
|
|
64
|
+
const audit = new audit_log_1.AuditLog(this.config.workspaceRoot, schema, risk);
|
|
65
|
+
this.guard = { schema, risk, snapshot, verify, audit };
|
|
66
|
+
// --- Evaluator Layer Init ---
|
|
67
|
+
const preExecEval = new exec_evaluator_1.PreExecEvaluator(schema, risk, working);
|
|
68
|
+
const runtimeEval = new exec_evaluator_1.RuntimeEvaluator();
|
|
69
|
+
const postExecEval = new exec_evaluator_1.PostExecEvaluator();
|
|
70
|
+
const feedbackEngine = new feedback_1.ImplicitFeedbackEngine();
|
|
71
|
+
const profiler = new profiler_1.AgentProfiler(feedbackEngine);
|
|
72
|
+
this.evaluator = {
|
|
73
|
+
preExec: preExecEval,
|
|
74
|
+
runtime: runtimeEval,
|
|
75
|
+
postExec: postExecEval,
|
|
76
|
+
feedback: feedbackEngine,
|
|
77
|
+
profiler,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Get the current AgentOS configuration.
|
|
82
|
+
*/
|
|
83
|
+
getConfig() {
|
|
84
|
+
return this.config;
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Full pipeline: process a tool call through all layers.
|
|
88
|
+
*
|
|
89
|
+
* This is the main AgentOS orchestration method.
|
|
90
|
+
* In production this would be called by the agent runtime
|
|
91
|
+
* before/after every tool call.
|
|
92
|
+
*/
|
|
93
|
+
executePipeline(options) {
|
|
94
|
+
const { sessionId, toolName, parameters, affectedFiles } = options;
|
|
95
|
+
// --- Phase 1: Pre-exec evaluation ---
|
|
96
|
+
const preExec = this.evaluator.preExec.evaluate(toolName, parameters);
|
|
97
|
+
this.memory.working.addMessage('tool', JSON.stringify(parameters));
|
|
98
|
+
// --- Phase 2: Snapshot before execution ---
|
|
99
|
+
const snapshot = this.guard.snapshot.takeSnapshot(`call_${Date.now()}`, toolName, affectedFiles ?? [], 'file');
|
|
100
|
+
// --- Phase 3: Execute (delegated to runtime — here we return pre-exec state) ---
|
|
101
|
+
// Return the pre-exec state so the runtime can complete
|
|
102
|
+
return {
|
|
103
|
+
preExec,
|
|
104
|
+
snapshot,
|
|
105
|
+
profile: this.evaluator.profiler.getProfile(sessionId),
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Complete the pipeline after tool execution.
|
|
110
|
+
*
|
|
111
|
+
* Called by the runtime after the tool call completes
|
|
112
|
+
* (or fails, or times out).
|
|
113
|
+
*/
|
|
114
|
+
completeExecution(options) {
|
|
115
|
+
const { sessionId, agentId, toolName, toolParameters, toolResult, snapshot, startTime, endTime, retryCount, wasSelfCorrected, hadTimeout, userAccepted, userProvidedEdit, resultWasUsed, } = options;
|
|
116
|
+
// --- Phase 1: Runtime evaluation ---
|
|
117
|
+
const runtime = this.evaluator.runtime.evaluate({
|
|
118
|
+
toolName,
|
|
119
|
+
startTime,
|
|
120
|
+
endTime,
|
|
121
|
+
retryCount,
|
|
122
|
+
wasSelfCorrected,
|
|
123
|
+
hadTimeout,
|
|
124
|
+
toolResult,
|
|
125
|
+
});
|
|
126
|
+
// --- Phase 2: Post-exec verification ---
|
|
127
|
+
const verifyResult = this.guard.verify.verify(toolName, snapshot, { files: toolParameters['path'] ? [String(toolParameters['path'])] : undefined });
|
|
128
|
+
// --- Phase 3: Post-exec evaluation ---
|
|
129
|
+
const postExec = this.evaluator.postExec.evaluate({
|
|
130
|
+
verifyPassed: verifyResult.status === 'PASS',
|
|
131
|
+
verifyChecks: verifyResult.checks.length,
|
|
132
|
+
verifyFailures: verifyResult.checks.filter((c) => c.status === 'FAIL').length,
|
|
133
|
+
userAccepted,
|
|
134
|
+
userProvidedEdit,
|
|
135
|
+
resultWasUsed,
|
|
136
|
+
});
|
|
137
|
+
// --- Phase 4: Audit log ---
|
|
138
|
+
const auditEntry = this.guard.audit.record({
|
|
139
|
+
sessionId,
|
|
140
|
+
agentId,
|
|
141
|
+
startedAt: startTime,
|
|
142
|
+
completedAt: endTime,
|
|
143
|
+
toolName,
|
|
144
|
+
toolParameters,
|
|
145
|
+
toolResult,
|
|
146
|
+
snapshot,
|
|
147
|
+
verifyStatus: verifyResult.status,
|
|
148
|
+
verifyChecks: verifyResult.checks,
|
|
149
|
+
});
|
|
150
|
+
// --- Phase 5: Record in profiler ---
|
|
151
|
+
this.evaluator.profiler.recordCycle(sessionId,
|
|
152
|
+
// Re-evaluate pre-exec for profiler
|
|
153
|
+
this.evaluator.preExec.evaluate(toolName, toolParameters), runtime, postExec);
|
|
154
|
+
return {
|
|
155
|
+
runtime,
|
|
156
|
+
postExec,
|
|
157
|
+
auditEntry,
|
|
158
|
+
profile: this.evaluator.profiler.getProfile(sessionId),
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Record implicit user feedback.
|
|
163
|
+
*/
|
|
164
|
+
recordFeedback(signal, sessionId, operationId, confidence, source) {
|
|
165
|
+
this.evaluator.feedback.record(signal, sessionId, operationId, confidence, source);
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Inject memory context at session startup.
|
|
169
|
+
*
|
|
170
|
+
* Call this at the beginning of every session to load
|
|
171
|
+
* semantic + episodic context into the session prompt.
|
|
172
|
+
*/
|
|
173
|
+
injectContext() {
|
|
174
|
+
const semanticSummary = this.memory.semantic.generateContextSummary(2000);
|
|
175
|
+
const episodicSummary = this.memory.episodic.generateContextSummary(1500);
|
|
176
|
+
const parts = [];
|
|
177
|
+
if (semanticSummary)
|
|
178
|
+
parts.push(semanticSummary);
|
|
179
|
+
if (episodicSummary)
|
|
180
|
+
parts.push(episodicSummary);
|
|
181
|
+
return parts.join('\n\n---\n\n');
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* End current session — promote important events to episodic,
|
|
185
|
+
* clear working memory, and save state.
|
|
186
|
+
*/
|
|
187
|
+
endSession(sessionId) {
|
|
188
|
+
// Promote important working memory items to episodic
|
|
189
|
+
if (this.memory.working.currentTask) {
|
|
190
|
+
this.memory.episodic.record('milestone', `Task completed: ${this.memory.working.currentTask.description}`, ['task', 'session-end'], [sessionId]);
|
|
191
|
+
}
|
|
192
|
+
// Log learned rules from this session
|
|
193
|
+
const rules = this.memory.semantic.getRules(0.6);
|
|
194
|
+
for (const rule of rules.slice(0, 3)) {
|
|
195
|
+
this.memory.episodic.record('note', `Rule: ${rule.rule} (confidence: ${Math.round(rule.confidence * 100)}%)`, ['rule', 'semantic'], []);
|
|
196
|
+
}
|
|
197
|
+
// Clear working memory for next session
|
|
198
|
+
this.memory.working.clear();
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Get audit statistics.
|
|
202
|
+
*/
|
|
203
|
+
getAuditStats() {
|
|
204
|
+
return this.guard.audit.stats();
|
|
205
|
+
}
|
|
206
|
+
/**
|
|
207
|
+
* Get the current agent quality profile.
|
|
208
|
+
*/
|
|
209
|
+
getProfile(sessionId) {
|
|
210
|
+
return this.evaluator.profiler.getProfile(sessionId);
|
|
211
|
+
}
|
|
212
|
+
/**
|
|
213
|
+
* Get a summarized status report.
|
|
214
|
+
*/
|
|
215
|
+
statusReport() {
|
|
216
|
+
const profile = this.getProfile();
|
|
217
|
+
const audit = this.getAuditStats();
|
|
218
|
+
const lines = [
|
|
219
|
+
'=== AgentOS Status Report ===',
|
|
220
|
+
'',
|
|
221
|
+
`Quality Score: ${Math.round(profile.overallScore)}/100 ${profile.trends.improving ? '📈' : '📉'}`,
|
|
222
|
+
`Total Operations: ${profile.totalOps} (${profile.trends.recentOps} in last 24h)`,
|
|
223
|
+
'',
|
|
224
|
+
'--- Breakdown ---',
|
|
225
|
+
`Pre-Exec: ${profile.breakdown.preExec}/100`,
|
|
226
|
+
`Runtime: ${profile.breakdown.runtime}/100`,
|
|
227
|
+
`Post-Exec: ${profile.breakdown.postExec}/100`,
|
|
228
|
+
`Satisfaction: ${profile.breakdown.userSatisfaction}/100`,
|
|
229
|
+
'',
|
|
230
|
+
'--- Audit ---',
|
|
231
|
+
`Total: ${audit.totalOperations} | Failures: ${audit.verifyFailures} | High-Risk: ${audit.highRiskOps}`,
|
|
232
|
+
];
|
|
233
|
+
if (profile.warnings.length > 0) {
|
|
234
|
+
lines.push('', '--- ⚠️ Warnings ---');
|
|
235
|
+
for (const w of profile.warnings)
|
|
236
|
+
lines.push(`- ${w}`);
|
|
237
|
+
}
|
|
238
|
+
if (profile.strengths.length > 0) {
|
|
239
|
+
lines.push('', '--- ✅ Strengths ---');
|
|
240
|
+
for (const s of profile.strengths)
|
|
241
|
+
lines.push(`- ${s}`);
|
|
242
|
+
}
|
|
243
|
+
return lines.join('\n');
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
exports.AgentOS = AgentOS;
|
|
247
|
+
//# sourceMappingURL=core.js.map
|
package/dist/core.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"core.js","sourceRoot":"","sources":["../src/core.ts"],"names":[],"mappings":";;;AASA,qDAAiD;AACjD,iDAA6C;AAC7C,6DAAmE;AACnE,iDAA6C;AAC7C,8CAAiD;AACjD,gDAAmD;AACnD,gDAAwD;AACxD,+DAAmG;AACnG,mDAA8D;AAC9D,mDAAqD;AAGrD;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,MAAa,OAAO;IACV,MAAM,CAAgB;IAE9B,eAAe;IACN,MAAM,CAIb;IAEF,cAAc;IACL,KAAK,CAMZ;IAEF,kBAAkB;IACT,SAAS,CAMhB;IAEF,YAAY,MAA+B;QACzC,IAAI,CAAC,MAAM,GAAG;YACZ,aAAa,EAAE,OAAO,CAAC,GAAG,EAAE;YAC5B,gBAAgB,EAAE,KAAK;YACvB,iBAAiB,EAAE,GAAG;YACtB,WAAW,EAAE,EAAE;YACf,GAAG,MAAM;SACV,CAAC;QAEF,4BAA4B;QAC5B,MAAM,QAAQ,GAAG,IAAI,8BAAmB,EAAE,CAAC;QAC3C,QAAQ,CAAC,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,aAAc,CAAC,CAAC;QAEvD,MAAM,QAAQ,GAAG,IAAI,yBAAc,CAAC,IAAI,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC;QACnE,QAAQ,CAAC,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,aAAc,CAAC,CAAC;QAEvD,MAAM,OAAO,GAAG,IAAI,uBAAa,CAAC,IAAI,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC;QAEhE,IAAI,CAAC,MAAM,GAAG,EAAE,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,CAAC;QAE9C,2BAA2B;QAC3B,MAAM,MAAM,GAAG,IAAI,wBAAU,EAAE,CAAC;QAEhC,MAAM,IAAI,GAAG,IAAI,oBAAQ,EAAE,CAAC;QAE5B,MAAM,QAAQ,GAAG,IAAI,8BAAY,CAAC,IAAI,CAAC,MAAM,CAAC,aAAc,CAAC,CAAC;QAE9D,MAAM,MAAM,GAAG,IAAI,4BAAU,CAAC,IAAI,CAAC,MAAM,CAAC,aAAc,CAAC,CAAC;QAE1D,MAAM,KAAK,GAAG,IAAI,oBAAQ,CACxB,IAAI,CAAC,MAAM,CAAC,aAAc,EAC1B,MAAM,EACN,IAAI,CACL,CAAC;QAEF,IAAI,CAAC,KAAK,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;QAEvD,+BAA+B;QAC/B,MAAM,WAAW,GAAG,IAAI,iCAAgB,CAAC,MAAM,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;QAChE,MAAM,WAAW,GAAG,IAAI,iCAAgB,EAAE,CAAC;QAC3C,MAAM,YAAY,GAAG,IAAI,kCAAiB,EAAE,CAAC;QAC7C,MAAM,cAAc,GAAG,IAAI,iCAAsB,EAAE,CAAC;QACpD,MAAM,QAAQ,GAAG,IAAI,wBAAa,CAAC,cAAc,CAAC,CAAC;QAEnD,IAAI,CAAC,SAAS,GAAG;YACf,OAAO,EAAE,WAAW;YACpB,OAAO,EAAE,WAAW;YACpB,QAAQ,EAAE,YAAY;YACtB,QAAQ,EAAE,cAAc;YACxB,QAAQ;SACT,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,SAAS;QACP,OAAO,IAAI,CAAC,MAAM,CAAC;IACrB,CAAC;IAED;;;;;;OAMG;IACH,eAAe,CAAC,OAOf;QAQC,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,UAAU,EAAE,aAAa,EAAE,GAAG,OAAO,CAAC;QAEnE,uCAAuC;QACvC,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QACtE,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC,CAAC;QAEnE,6CAA6C;QAC7C,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,YAAY,CAC/C,QAAQ,IAAI,CAAC,GAAG,EAAE,EAAE,EACpB,QAAQ,EACR,aAAa,IAAI,EAAE,EACnB,MAAM,CACP,CAAC;QAEF,kFAAkF;QAElF,wDAAwD;QACxD,OAAO;YACL,OAAO;YACP,QAAQ;YACR,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC;SACvD,CAAC;IACJ,CAAC;IAED;;;;;OAKG;IACH,iBAAiB,CAAC,OAejB;QAMC,MAAM,EACJ,SAAS,EAAE,OAAO,EAAE,QAAQ,EAAE,cAAc,EAAE,UAAU,EACxD,QAAQ,EAAE,SAAS,EAAE,OAAO,EAC5B,UAAU,EAAE,gBAAgB,EAAE,UAAU,EACxC,YAAY,EAAE,gBAAgB,EAAE,aAAa,GAC9C,GAAG,OAAO,CAAC;QAEZ,sCAAsC;QACtC,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,QAAQ,CAAC;YAC9C,QAAQ;YACR,SAAS;YACT,OAAO;YACP,UAAU;YACV,gBAAgB;YAChB,UAAU;YACV,UAAU;SACX,CAAC,CAAC;QAEH,0CAA0C;QAC1C,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,CAC3C,QAAQ,EACR,QAAS,EACT,EAAE,KAAK,EAAE,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,CACjF,CAAC;QAEF,wCAAwC;QACxC,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC;YAChD,YAAY,EAAE,YAAY,CAAC,MAAM,KAAK,MAAM;YAC5C,YAAY,EAAE,YAAY,CAAC,MAAM,CAAC,MAAM;YACxC,cAAc,EAAE,YAAY,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,MAAM;YAC7E,YAAY;YACZ,gBAAgB;YAChB,aAAa;SACd,CAAC,CAAC;QAEH,6BAA6B;QAC7B,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC;YACzC,SAAS;YACT,OAAO;YACP,SAAS,EAAE,SAAS;YACpB,WAAW,EAAE,OAAO;YACpB,QAAQ;YACR,cAAc;YACd,UAAU;YACV,QAAQ;YACR,YAAY,EAAE,YAAY,CAAC,MAAM;YACjC,YAAY,EAAE,YAAY,CAAC,MAAM;SAClC,CAAC,CAAC;QAEH,sCAAsC;QACtC,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,WAAW,CACjC,SAAS;QACT,oCAAoC;QACpC,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,EAAE,cAAc,CAAC,EACzD,OAAO,EACP,QAAQ,CACT,CAAC;QAEF,OAAO;YACL,OAAO;YACP,QAAQ;YACR,UAAU;YACV,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC;SACvD,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,cAAc,CACZ,MAAuD,EACvD,SAAiB,EACjB,WAAoB,EACpB,UAAmB,EACnB,MAAe;QAEf,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,SAAS,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,CAAC,CAAC;IACrF,CAAC;IAED;;;;;OAKG;IACH,aAAa;QACX,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,sBAAsB,CAAC,IAAI,CAAC,CAAC;QAC1E,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,sBAAsB,CAAC,IAAI,CAAC,CAAC;QAE1E,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,IAAI,eAAe;YAAE,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QACjD,IAAI,eAAe;YAAE,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAEjD,OAAO,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IACnC,CAAC;IAED;;;OAGG;IACH,UAAU,CAAC,SAAiB;QAC1B,qDAAqD;QACrD,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;YACpC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CACzB,WAAW,EACX,mBAAmB,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,WAAW,EAAE,EAChE,CAAC,MAAM,EAAE,aAAa,CAAC,EACvB,CAAC,SAAS,CAAC,CACZ,CAAC;QACJ,CAAC;QAED,sCAAsC;QACtC,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;QACjD,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;YACrC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CACzB,MAAM,EACN,SAAS,IAAI,CAAC,IAAI,iBAAiB,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,GAAG,GAAG,CAAC,IAAI,EACxE,CAAC,MAAM,EAAE,UAAU,CAAC,EACpB,EAAE,CACH,CAAC;QACJ,CAAC;QAED,wCAAwC;QACxC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;IAC9B,CAAC;IAED;;OAEG;IACH,aAAa;QACX,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;IAClC,CAAC;IAED;;OAEG;IACH,UAAU,CAAC,SAAkB;QAC3B,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;IACvD,CAAC;IAED;;OAEG;IACH,YAAY;QACV,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC;QAClC,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC;QAEnC,MAAM,KAAK,GAAG;YACZ,+BAA+B;YAC/B,EAAE;YACF,kBAAkB,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,YAAY,CAAC,QAAQ,OAAO,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,EAAE;YAClG,qBAAqB,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,MAAM,CAAC,SAAS,eAAe;YACjF,EAAE;YACF,mBAAmB;YACnB,eAAe,OAAO,CAAC,SAAS,CAAC,OAAO,MAAM;YAC9C,eAAe,OAAO,CAAC,SAAS,CAAC,OAAO,MAAM;YAC9C,eAAe,OAAO,CAAC,SAAS,CAAC,QAAQ,MAAM;YAC/C,iBAAiB,OAAO,CAAC,SAAS,CAAC,gBAAgB,MAAM;YACzD,EAAE;YACF,eAAe;YACf,UAAU,KAAK,CAAC,eAAe,gBAAgB,KAAK,CAAC,cAAc,iBAAiB,KAAK,CAAC,WAAW,EAAE;SACxG,CAAC;QAEF,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAChC,KAAK,CAAC,IAAI,CAAC,EAAE,EAAE,qBAAqB,CAAC,CAAC;YACtC,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,QAAQ;gBAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QACzD,CAAC;QAED,IAAI,OAAO,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjC,KAAK,CAAC,IAAI,CAAC,EAAE,EAAE,qBAAqB,CAAC,CAAC;YACtC,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,SAAS;gBAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAC1D,CAAC;QAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;CACF;AAjVD,0BAiVC"}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { PreExecMetrics, RuntimeMetrics, PostExecMetrics } from '../types';
|
|
2
|
+
import { SchemaGate } from '../guard/schema-gate';
|
|
3
|
+
import { RiskGate } from '../guard/risk-gate';
|
|
4
|
+
import { WorkingMemory } from '../memory/working';
|
|
5
|
+
/**
|
|
6
|
+
* PreExecEvaluator — captures metrics before tool execution.
|
|
7
|
+
*
|
|
8
|
+
* Watches the Guard layer output and WorkingMemory context
|
|
9
|
+
* to score parameter quality, context utilization, and risk.
|
|
10
|
+
*/
|
|
11
|
+
export declare class PreExecEvaluator {
|
|
12
|
+
private schemaGate;
|
|
13
|
+
private riskGate;
|
|
14
|
+
private workingMemory;
|
|
15
|
+
constructor(schemaGate: SchemaGate, riskGate: RiskGate, workingMemory: WorkingMemory);
|
|
16
|
+
/**
|
|
17
|
+
* Evaluate a tool call before execution.
|
|
18
|
+
*/
|
|
19
|
+
evaluate(toolName: string, parameters: Record<string, unknown>): PreExecMetrics;
|
|
20
|
+
/**
|
|
21
|
+
* Score parameter quality based on contextual awareness.
|
|
22
|
+
*
|
|
23
|
+
* High quality: path contains session-relevant project paths,
|
|
24
|
+
* content references open files, etc.
|
|
25
|
+
* Low quality: bare strings, random-looking paths, missing files.
|
|
26
|
+
*/
|
|
27
|
+
private evaluateParamQuality;
|
|
28
|
+
/**
|
|
29
|
+
* Score how well the agent uses stored context.
|
|
30
|
+
*/
|
|
31
|
+
private evaluateContextUtilization;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* RuntimeEvaluator — captures metrics during execution.
|
|
35
|
+
*
|
|
36
|
+
* Tracks retries, self-corrections, timeouts, and
|
|
37
|
+
* whether the agent selected the right tool for the job.
|
|
38
|
+
*/
|
|
39
|
+
export declare class RuntimeEvaluator {
|
|
40
|
+
/** Historical tool selection patterns — toolName -> successful scenario count */
|
|
41
|
+
private toolHistory;
|
|
42
|
+
/**
|
|
43
|
+
* Evaluate a completed tool execution.
|
|
44
|
+
*/
|
|
45
|
+
evaluate(options: {
|
|
46
|
+
toolName: string;
|
|
47
|
+
startTime: number;
|
|
48
|
+
endTime: number;
|
|
49
|
+
retryCount: number;
|
|
50
|
+
wasSelfCorrected: boolean;
|
|
51
|
+
hadTimeout: boolean;
|
|
52
|
+
expectedTool?: string;
|
|
53
|
+
toolResult: unknown;
|
|
54
|
+
}): RuntimeMetrics;
|
|
55
|
+
/** Record a tool call in the history tracker */
|
|
56
|
+
private recordToolCall;
|
|
57
|
+
/** Get tool selection accuracy statistics */
|
|
58
|
+
getToolAccuracy(): Record<string, {
|
|
59
|
+
calls: number;
|
|
60
|
+
successRate: number;
|
|
61
|
+
}>;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* PostExecEvaluator — captures metrics after execution.
|
|
65
|
+
*
|
|
66
|
+
* Scores verify results, user acceptance patterns,
|
|
67
|
+
* and checks if the agent actually used its own result later.
|
|
68
|
+
*/
|
|
69
|
+
export declare class PostExecEvaluator {
|
|
70
|
+
/** Track result references for utilization scoring */
|
|
71
|
+
private resultReferenceTracker;
|
|
72
|
+
/**
|
|
73
|
+
* Evaluate post-execution outcomes.
|
|
74
|
+
*/
|
|
75
|
+
evaluate(options: {
|
|
76
|
+
verifyPassed: boolean;
|
|
77
|
+
verifyChecks: number;
|
|
78
|
+
verifyFailures: number;
|
|
79
|
+
userAccepted: boolean;
|
|
80
|
+
userProvidedEdit: boolean;
|
|
81
|
+
resultWasUsed: boolean;
|
|
82
|
+
diffLinesChanged?: number;
|
|
83
|
+
}): PostExecMetrics;
|
|
84
|
+
/**
|
|
85
|
+
* Track a tool result for later utilization detection.
|
|
86
|
+
* Call this after each tool execution.
|
|
87
|
+
*/
|
|
88
|
+
trackResult(operationId: string, result: unknown): void;
|
|
89
|
+
/**
|
|
90
|
+
* Mark a previously-tracked result as referenced (used by the agent later).
|
|
91
|
+
*/
|
|
92
|
+
markResultReferenced(operationId: string): void;
|
|
93
|
+
/**
|
|
94
|
+
* Check if a result has been utilized by the agent.
|
|
95
|
+
*/
|
|
96
|
+
isResultReferenced(operationId: string): boolean;
|
|
97
|
+
/**
|
|
98
|
+
* Get overall result utilization rate.
|
|
99
|
+
*/
|
|
100
|
+
getUtilizationRate(): number;
|
|
101
|
+
}
|
|
102
|
+
//# sourceMappingURL=exec-evaluator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"exec-evaluator.d.ts","sourceRoot":"","sources":["../../src/evaluator/exec-evaluator.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,cAAc,EACd,cAAc,EACd,eAAe,EAGhB,MAAM,UAAU,CAAC;AAClB,OAAO,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AAClD,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAElD;;;;;GAKG;AACH,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,QAAQ,CAAW;IAC3B,OAAO,CAAC,aAAa,CAAgB;gBAGnC,UAAU,EAAE,UAAU,EACtB,QAAQ,EAAE,QAAQ,EAClB,aAAa,EAAE,aAAa;IAO9B;;OAEG;IACH,QAAQ,CACN,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAClC,cAAc;IAuBjB;;;;;;OAMG;IACH,OAAO,CAAC,oBAAoB;IA+C5B;;OAEG;IACH,OAAO,CAAC,0BAA0B;CAqCnC;AAED;;;;;GAKG;AACH,qBAAa,gBAAgB;IAC3B,iFAAiF;IACjF,OAAO,CAAC,WAAW,CAAgE;IAEnF;;OAEG;IACH,QAAQ,CAAC,OAAO,EAAE;QAChB,QAAQ,EAAE,MAAM,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;QAClB,OAAO,EAAE,MAAM,CAAC;QAChB,UAAU,EAAE,MAAM,CAAC;QACnB,gBAAgB,EAAE,OAAO,CAAC;QAC1B,UAAU,EAAE,OAAO,CAAC;QACpB,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,UAAU,EAAE,OAAO,CAAC;KACrB,GAAG,cAAc;IA0ClB,gDAAgD;IAChD,OAAO,CAAC,cAAc;IAUtB,6CAA6C;IAC7C,eAAe,IAAI,MAAM,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAE,CAAC;CAY1E;AAED;;;;;GAKG;AACH,qBAAa,iBAAiB;IAC5B,sDAAsD;IACtD,OAAO,CAAC,sBAAsB,CAAoE;IAElG;;OAEG;IACH,QAAQ,CAAC,OAAO,EAAE;QAChB,YAAY,EAAE,OAAO,CAAC;QACtB,YAAY,EAAE,MAAM,CAAC;QACrB,cAAc,EAAE,MAAM,CAAC;QACvB,YAAY,EAAE,OAAO,CAAC;QACtB,gBAAgB,EAAE,OAAO,CAAC;QAC1B,aAAa,EAAE,OAAO,CAAC;QACvB,gBAAgB,CAAC,EAAE,MAAM,CAAC;KAC3B,GAAG,eAAe;IA+BnB;;;OAGG;IACH,WAAW,CAAC,WAAW,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,GAAG,IAAI;IAIvD;;OAEG;IACH,oBAAoB,CAAC,WAAW,EAAE,MAAM,GAAG,IAAI;IAK/C;;OAEG;IACH,kBAAkB,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO;IAIhD;;OAEG;IACH,kBAAkB,IAAI,MAAM;CAM7B"}
|