tryassay 0.1.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +37 -0
- package/dist/cli.js.map +1 -1
- package/dist/commands/runtime.d.ts +23 -0
- package/dist/commands/runtime.js +130 -0
- package/dist/commands/runtime.js.map +1 -0
- package/dist/runtime/agent-loop.d.ts +55 -0
- package/dist/runtime/agent-loop.js +502 -0
- package/dist/runtime/agent-loop.js.map +1 -0
- package/dist/runtime/audit-log.d.ts +35 -0
- package/dist/runtime/audit-log.js +115 -0
- package/dist/runtime/audit-log.js.map +1 -0
- package/dist/runtime/config-loader.d.ts +41 -0
- package/dist/runtime/config-loader.js +116 -0
- package/dist/runtime/config-loader.js.map +1 -0
- package/dist/runtime/control-server.d.ts +25 -0
- package/dist/runtime/control-server.js +83 -0
- package/dist/runtime/control-server.js.map +1 -0
- package/dist/runtime/executor.d.ts +37 -0
- package/dist/runtime/executor.js +518 -0
- package/dist/runtime/executor.js.map +1 -0
- package/dist/runtime/logger.d.ts +20 -0
- package/dist/runtime/logger.js +73 -0
- package/dist/runtime/logger.js.map +1 -0
- package/dist/runtime/observer.d.ts +48 -0
- package/dist/runtime/observer.js +294 -0
- package/dist/runtime/observer.js.map +1 -0
- package/dist/runtime/planner.d.ts +4 -0
- package/dist/runtime/planner.js +299 -0
- package/dist/runtime/planner.js.map +1 -0
- package/dist/runtime/reasoner.d.ts +4 -0
- package/dist/runtime/reasoner.js +238 -0
- package/dist/runtime/reasoner.js.map +1 -0
- package/dist/runtime/reflector.d.ts +67 -0
- package/dist/runtime/reflector.js +393 -0
- package/dist/runtime/reflector.js.map +1 -0
- package/dist/runtime/types.d.ts +321 -0
- package/dist/runtime/types.js +6 -0
- package/dist/runtime/types.js.map +1 -0
- package/dist/runtime/verifier.d.ts +46 -0
- package/dist/runtime/verifier.js +404 -0
- package/dist/runtime/verifier.js.map +1 -0
- package/package.json +1 -1
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
export type SignalSourceType = 'filesystem' | 'webhook' | 'schedule' | 'message' | 'api_poll';
|
|
2
|
+
export interface FileSystemSignalConfig {
|
|
3
|
+
type: 'filesystem';
|
|
4
|
+
paths: string[];
|
|
5
|
+
patterns?: string[];
|
|
6
|
+
ignorePatterns?: string[];
|
|
7
|
+
}
|
|
8
|
+
export interface WebhookSignalConfig {
|
|
9
|
+
type: 'webhook';
|
|
10
|
+
port: number;
|
|
11
|
+
path: string;
|
|
12
|
+
secret?: string;
|
|
13
|
+
}
|
|
14
|
+
export interface ScheduleSignalConfig {
|
|
15
|
+
type: 'schedule';
|
|
16
|
+
intervalMs: number;
|
|
17
|
+
label: string;
|
|
18
|
+
}
|
|
19
|
+
export interface MessageSignalConfig {
|
|
20
|
+
type: 'message';
|
|
21
|
+
channel: string;
|
|
22
|
+
}
|
|
23
|
+
export interface ApiPollSignalConfig {
|
|
24
|
+
type: 'api_poll';
|
|
25
|
+
url: string;
|
|
26
|
+
intervalMs: number;
|
|
27
|
+
headers?: Record<string, string>;
|
|
28
|
+
}
|
|
29
|
+
export type SignalConfig = FileSystemSignalConfig | WebhookSignalConfig | ScheduleSignalConfig | MessageSignalConfig | ApiPollSignalConfig;
|
|
30
|
+
export type ObservationUrgency = 'critical' | 'high' | 'normal' | 'low';
|
|
31
|
+
export interface Observation {
|
|
32
|
+
id: string;
|
|
33
|
+
source: SignalSourceType;
|
|
34
|
+
urgency: ObservationUrgency;
|
|
35
|
+
timestamp: string;
|
|
36
|
+
payload: ObservationPayload;
|
|
37
|
+
relatedExperienceIds?: string[];
|
|
38
|
+
}
|
|
39
|
+
export type ObservationPayload = FileChangePayload | WebhookPayload | ScheduleTickPayload | MessagePayload | ApiPollPayload;
|
|
40
|
+
export interface FileChangePayload {
|
|
41
|
+
type: 'file_change';
|
|
42
|
+
event: 'create' | 'modify' | 'delete';
|
|
43
|
+
path: string;
|
|
44
|
+
diff?: string;
|
|
45
|
+
}
|
|
46
|
+
export interface WebhookPayload {
|
|
47
|
+
type: 'webhook';
|
|
48
|
+
method: string;
|
|
49
|
+
path: string;
|
|
50
|
+
headers: Record<string, string>;
|
|
51
|
+
body: unknown;
|
|
52
|
+
}
|
|
53
|
+
export interface ScheduleTickPayload {
|
|
54
|
+
type: 'schedule_tick';
|
|
55
|
+
label: string;
|
|
56
|
+
tickNumber: number;
|
|
57
|
+
}
|
|
58
|
+
export interface MessagePayload {
|
|
59
|
+
type: 'message';
|
|
60
|
+
channel: string;
|
|
61
|
+
sender: string;
|
|
62
|
+
content: string;
|
|
63
|
+
}
|
|
64
|
+
export interface ApiPollPayload {
|
|
65
|
+
type: 'api_poll';
|
|
66
|
+
url: string;
|
|
67
|
+
statusCode: number;
|
|
68
|
+
body: unknown;
|
|
69
|
+
previousBody?: unknown;
|
|
70
|
+
}
|
|
71
|
+
export type DecisionAction = 'act' | 'ignore' | 'defer';
|
|
72
|
+
export interface Decision {
|
|
73
|
+
id: string;
|
|
74
|
+
observationId: string;
|
|
75
|
+
action: DecisionAction;
|
|
76
|
+
reasoning: string;
|
|
77
|
+
confidence: number;
|
|
78
|
+
proposedActions: ProposedAction[];
|
|
79
|
+
risks: string[];
|
|
80
|
+
modelUsage: {
|
|
81
|
+
inputTokens: number;
|
|
82
|
+
outputTokens: number;
|
|
83
|
+
};
|
|
84
|
+
timestamp: string;
|
|
85
|
+
}
|
|
86
|
+
export interface ProposedAction {
|
|
87
|
+
description: string;
|
|
88
|
+
operationType: OperationType;
|
|
89
|
+
target: string;
|
|
90
|
+
details: string;
|
|
91
|
+
}
|
|
92
|
+
export type OperationType = 'code_write' | 'code_run' | 'api_call' | 'git' | 'message';
|
|
93
|
+
export type ApprovalLevel = 'auto' | 'single' | 'escalate';
|
|
94
|
+
export interface ActionPlan {
|
|
95
|
+
id: string;
|
|
96
|
+
decisionId: string;
|
|
97
|
+
steps: ActionStep[];
|
|
98
|
+
totalSteps: number;
|
|
99
|
+
estimatedDurationMs: number;
|
|
100
|
+
overallRisk: ApprovalLevel;
|
|
101
|
+
timestamp: string;
|
|
102
|
+
}
|
|
103
|
+
export interface ActionStep {
|
|
104
|
+
id: string;
|
|
105
|
+
planId: string;
|
|
106
|
+
index: number;
|
|
107
|
+
description: string;
|
|
108
|
+
operation: Operation;
|
|
109
|
+
preConditions: Condition[];
|
|
110
|
+
postConditions: Condition[];
|
|
111
|
+
approvalLevel: ApprovalLevel;
|
|
112
|
+
dependsOn: string[];
|
|
113
|
+
}
|
|
114
|
+
export interface Condition {
|
|
115
|
+
description: string;
|
|
116
|
+
check: string;
|
|
117
|
+
}
|
|
118
|
+
export type Operation = CodeWriteOp | CodeRunOp | ApiCallOp | GitOp | MessageOp;
|
|
119
|
+
export interface CodeWriteOp {
|
|
120
|
+
type: 'code_write';
|
|
121
|
+
filePath: string;
|
|
122
|
+
content: string;
|
|
123
|
+
mode: 'create' | 'edit' | 'append';
|
|
124
|
+
editTarget?: string;
|
|
125
|
+
}
|
|
126
|
+
export interface CodeRunOp {
|
|
127
|
+
type: 'code_run';
|
|
128
|
+
command: string;
|
|
129
|
+
cwd?: string;
|
|
130
|
+
timeoutMs: number;
|
|
131
|
+
env?: Record<string, string>;
|
|
132
|
+
}
|
|
133
|
+
export interface ApiCallOp {
|
|
134
|
+
type: 'api_call';
|
|
135
|
+
method: 'GET' | 'POST' | 'PUT' | 'DELETE' | 'PATCH';
|
|
136
|
+
url: string;
|
|
137
|
+
headers?: Record<string, string>;
|
|
138
|
+
body?: unknown;
|
|
139
|
+
expectedStatus?: number;
|
|
140
|
+
}
|
|
141
|
+
export interface GitOp {
|
|
142
|
+
type: 'git';
|
|
143
|
+
command: 'add' | 'commit' | 'push' | 'branch' | 'checkout';
|
|
144
|
+
args: string[];
|
|
145
|
+
cwd?: string;
|
|
146
|
+
}
|
|
147
|
+
export interface MessageOp {
|
|
148
|
+
type: 'message';
|
|
149
|
+
channel: 'console' | 'slack' | 'email';
|
|
150
|
+
recipient?: string;
|
|
151
|
+
subject?: string;
|
|
152
|
+
content: string;
|
|
153
|
+
}
|
|
154
|
+
export type VerificationVerdict = 'pass' | 'warn' | 'fail' | 'escalate';
|
|
155
|
+
export interface StepVerification {
|
|
156
|
+
stepId: string;
|
|
157
|
+
verdict: VerificationVerdict;
|
|
158
|
+
claims: VerificationClaim[];
|
|
159
|
+
reasoning: string;
|
|
160
|
+
formalOverrides: number;
|
|
161
|
+
timestamp: string;
|
|
162
|
+
}
|
|
163
|
+
export interface VerificationClaim {
|
|
164
|
+
id: string;
|
|
165
|
+
text: string;
|
|
166
|
+
verdict: 'PASS' | 'PARTIAL' | 'FAIL' | 'N/A';
|
|
167
|
+
confidence: number;
|
|
168
|
+
method: 'formal' | 'llm';
|
|
169
|
+
}
|
|
170
|
+
export interface PlanVerification {
|
|
171
|
+
planId: string;
|
|
172
|
+
overallVerdict: VerificationVerdict;
|
|
173
|
+
stepVerifications: StepVerification[];
|
|
174
|
+
blockedSteps: string[];
|
|
175
|
+
escalatedSteps: string[];
|
|
176
|
+
totalClaims: number;
|
|
177
|
+
passedClaims: number;
|
|
178
|
+
failedClaims: number;
|
|
179
|
+
timestamp: string;
|
|
180
|
+
}
|
|
181
|
+
export type ExecutionStatus = 'success' | 'partial' | 'failure' | 'skipped';
|
|
182
|
+
export interface StepExecution {
|
|
183
|
+
stepId: string;
|
|
184
|
+
status: ExecutionStatus;
|
|
185
|
+
output: string;
|
|
186
|
+
error?: string;
|
|
187
|
+
durationMs: number;
|
|
188
|
+
timestamp: string;
|
|
189
|
+
}
|
|
190
|
+
export interface PlanExecution {
|
|
191
|
+
planId: string;
|
|
192
|
+
status: ExecutionStatus;
|
|
193
|
+
stepExecutions: StepExecution[];
|
|
194
|
+
completedSteps: number;
|
|
195
|
+
totalSteps: number;
|
|
196
|
+
totalDurationMs: number;
|
|
197
|
+
timestamp: string;
|
|
198
|
+
}
|
|
199
|
+
export type ExperienceOutcome = 'success' | 'partial_success' | 'failure' | 'unexpected';
|
|
200
|
+
export interface Experience {
|
|
201
|
+
id: string;
|
|
202
|
+
observation: Observation;
|
|
203
|
+
decision: Decision;
|
|
204
|
+
plan: ActionPlan;
|
|
205
|
+
verification: PlanVerification;
|
|
206
|
+
execution: PlanExecution;
|
|
207
|
+
outcome: ExperienceOutcome;
|
|
208
|
+
delta: string;
|
|
209
|
+
lessons: string[];
|
|
210
|
+
domain: string;
|
|
211
|
+
tags: string[];
|
|
212
|
+
timestamp: string;
|
|
213
|
+
}
|
|
214
|
+
export interface SkillProfile {
|
|
215
|
+
domain: string;
|
|
216
|
+
totalExperiences: number;
|
|
217
|
+
successRate: number;
|
|
218
|
+
firstPassRate: number;
|
|
219
|
+
commonPatterns: string[];
|
|
220
|
+
commonAntiPatterns: string[];
|
|
221
|
+
lastUpdated: string;
|
|
222
|
+
}
|
|
223
|
+
export interface PatternEntry {
|
|
224
|
+
id: string;
|
|
225
|
+
domain: string;
|
|
226
|
+
description: string;
|
|
227
|
+
codeExample: string;
|
|
228
|
+
language: string;
|
|
229
|
+
verificationCount: number;
|
|
230
|
+
lastVerified: string;
|
|
231
|
+
formallyVerified: boolean;
|
|
232
|
+
}
|
|
233
|
+
export interface AntiPatternEntry {
|
|
234
|
+
id: string;
|
|
235
|
+
domain: string;
|
|
236
|
+
description: string;
|
|
237
|
+
badExample: string;
|
|
238
|
+
fixedExample: string;
|
|
239
|
+
language: string;
|
|
240
|
+
claimCategory: string;
|
|
241
|
+
occurrenceCount: number;
|
|
242
|
+
lastSeen: string;
|
|
243
|
+
}
|
|
244
|
+
export interface AgentScope {
|
|
245
|
+
allowedDirectories: string[];
|
|
246
|
+
allowedCommands: string[];
|
|
247
|
+
allowedUrls: string[];
|
|
248
|
+
blockedPatterns: string[];
|
|
249
|
+
maxFileSize: number;
|
|
250
|
+
}
|
|
251
|
+
export interface HardLimits {
|
|
252
|
+
maxActionsPerHour: number;
|
|
253
|
+
maxTokensPerHour: number;
|
|
254
|
+
maxConcurrentSteps: number;
|
|
255
|
+
maxPlanSteps: number;
|
|
256
|
+
commandTimeoutMs: number;
|
|
257
|
+
maxSessionCostCents?: number;
|
|
258
|
+
}
|
|
259
|
+
export interface AgentConfig {
|
|
260
|
+
name: string;
|
|
261
|
+
description: string;
|
|
262
|
+
scope: AgentScope;
|
|
263
|
+
limits: HardLimits;
|
|
264
|
+
signals: SignalConfig[];
|
|
265
|
+
approvalDefaults: Record<OperationType, ApprovalLevel>;
|
|
266
|
+
modelId: string;
|
|
267
|
+
experienceStorePath: string;
|
|
268
|
+
controlPort?: number;
|
|
269
|
+
jsonLogs?: boolean;
|
|
270
|
+
}
|
|
271
|
+
export type AgentPhase = 'idle' | 'observing' | 'reasoning' | 'planning' | 'verifying' | 'awaiting_approval' | 'executing' | 'reflecting' | 'paused' | 'stopped';
|
|
272
|
+
export interface AgentState {
|
|
273
|
+
phase: AgentPhase;
|
|
274
|
+
currentObservation: Observation | null;
|
|
275
|
+
currentDecision: Decision | null;
|
|
276
|
+
currentPlan: ActionPlan | null;
|
|
277
|
+
queueDepth: number;
|
|
278
|
+
actionsThisHour: number;
|
|
279
|
+
tokensThisHour: number;
|
|
280
|
+
totalExperiences: number;
|
|
281
|
+
uptime: number;
|
|
282
|
+
lastActivityTimestamp: string;
|
|
283
|
+
errors: AgentError[];
|
|
284
|
+
}
|
|
285
|
+
export interface AgentError {
|
|
286
|
+
phase: AgentPhase;
|
|
287
|
+
message: string;
|
|
288
|
+
timestamp: string;
|
|
289
|
+
recoverable: boolean;
|
|
290
|
+
}
|
|
291
|
+
export type ApprovalStatus = 'pending' | 'approved' | 'rejected' | 'expired';
|
|
292
|
+
export interface ApprovalRequest {
|
|
293
|
+
id: string;
|
|
294
|
+
planId: string;
|
|
295
|
+
stepId?: string;
|
|
296
|
+
action: string;
|
|
297
|
+
reasoning: string;
|
|
298
|
+
riskLevel: ApprovalLevel;
|
|
299
|
+
verification: PlanVerification;
|
|
300
|
+
status: ApprovalStatus;
|
|
301
|
+
createdAt: string;
|
|
302
|
+
resolvedAt?: string;
|
|
303
|
+
resolvedBy?: string;
|
|
304
|
+
rejectionReason?: string;
|
|
305
|
+
}
|
|
306
|
+
export type AuditEventType = 'observation_received' | 'decision_made' | 'plan_created' | 'verification_completed' | 'approval_requested' | 'approval_resolved' | 'step_executed' | 'experience_stored' | 'agent_started' | 'agent_stopped' | 'agent_error';
|
|
307
|
+
export interface AuditEntry {
|
|
308
|
+
id: string;
|
|
309
|
+
eventType: AuditEventType;
|
|
310
|
+
agentName: string;
|
|
311
|
+
timestamp: string;
|
|
312
|
+
details: Record<string, unknown>;
|
|
313
|
+
relatedIds: {
|
|
314
|
+
observationId?: string;
|
|
315
|
+
decisionId?: string;
|
|
316
|
+
planId?: string;
|
|
317
|
+
stepId?: string;
|
|
318
|
+
experienceId?: string;
|
|
319
|
+
};
|
|
320
|
+
prevHash?: string;
|
|
321
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/runtime/types.ts"],"names":[],"mappings":"AAAA,+DAA+D;AAC/D,4CAA4C;AAC5C,gCAAgC;AAChC,+DAA+D"}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import type { ActionPlan, ActionStep, AgentConfig, PlanExecution, PlanVerification, StepVerification } from './types.js';
|
|
2
|
+
export declare class RuntimeVerifier {
|
|
3
|
+
private config;
|
|
4
|
+
private cachedIndex;
|
|
5
|
+
constructor(config: AgentConfig);
|
|
6
|
+
/**
|
|
7
|
+
* Verify an entire action plan before execution.
|
|
8
|
+
* Returns a PlanVerification with per-step results and an aggregate verdict.
|
|
9
|
+
*/
|
|
10
|
+
verifyPlan(plan: ActionPlan): Promise<PlanVerification>;
|
|
11
|
+
/**
|
|
12
|
+
* Verify a single action step.
|
|
13
|
+
* Routes to the appropriate verification strategy based on operation type.
|
|
14
|
+
*/
|
|
15
|
+
verifyStep(step: ActionStep): Promise<StepVerification>;
|
|
16
|
+
/**
|
|
17
|
+
* Post-execution verification: compare actual outcomes to planned postConditions.
|
|
18
|
+
* Returns a PlanVerification reflecting what actually happened.
|
|
19
|
+
*/
|
|
20
|
+
verifyPostExecution(plan: ActionPlan, execution: PlanExecution): Promise<PlanVerification>;
|
|
21
|
+
/**
|
|
22
|
+
* Verify a code_write step by running the code through Assay's
|
|
23
|
+
* claim extraction + verification pipeline.
|
|
24
|
+
*/
|
|
25
|
+
private verifyCodeWrite;
|
|
26
|
+
/**
|
|
27
|
+
* Verify a code_run step by checking the command and cwd against scope.
|
|
28
|
+
*/
|
|
29
|
+
private verifyCodeRun;
|
|
30
|
+
/**
|
|
31
|
+
* Verify an api_call step by checking the URL against scope.
|
|
32
|
+
*/
|
|
33
|
+
private verifyApiCall;
|
|
34
|
+
/**
|
|
35
|
+
* Verify a git step — always pass. Git operations are safe and reversible.
|
|
36
|
+
*/
|
|
37
|
+
private verifyGit;
|
|
38
|
+
/**
|
|
39
|
+
* Verify a message step — always pass. Messages are informational.
|
|
40
|
+
*/
|
|
41
|
+
private verifyMessage;
|
|
42
|
+
private getCodebaseIndex;
|
|
43
|
+
/** Invalidate the cached codebase index (call after code changes). */
|
|
44
|
+
invalidateIndex(): void;
|
|
45
|
+
private makeStepVerification;
|
|
46
|
+
}
|