@soleri/core 2.1.0 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/brain/brain.d.ts +3 -1
- package/dist/brain/brain.d.ts.map +1 -1
- package/dist/brain/brain.js +60 -4
- package/dist/brain/brain.js.map +1 -1
- package/dist/brain/intelligence.d.ts +36 -1
- package/dist/brain/intelligence.d.ts.map +1 -1
- package/dist/brain/intelligence.js +119 -14
- package/dist/brain/intelligence.js.map +1 -1
- package/dist/brain/types.d.ts +32 -0
- package/dist/brain/types.d.ts.map +1 -1
- package/dist/control/identity-manager.d.ts +22 -0
- package/dist/control/identity-manager.d.ts.map +1 -0
- package/dist/control/identity-manager.js +233 -0
- package/dist/control/identity-manager.js.map +1 -0
- package/dist/control/intent-router.d.ts +32 -0
- package/dist/control/intent-router.d.ts.map +1 -0
- package/dist/control/intent-router.js +242 -0
- package/dist/control/intent-router.js.map +1 -0
- package/dist/control/types.d.ts +68 -0
- package/dist/control/types.d.ts.map +1 -0
- package/dist/control/types.js +9 -0
- package/dist/control/types.js.map +1 -0
- package/dist/curator/curator.d.ts +29 -0
- package/dist/curator/curator.d.ts.map +1 -1
- package/dist/curator/curator.js +135 -0
- package/dist/curator/curator.js.map +1 -1
- package/dist/facades/types.d.ts +1 -1
- package/dist/governance/governance.d.ts +42 -0
- package/dist/governance/governance.d.ts.map +1 -0
- package/dist/governance/governance.js +488 -0
- package/dist/governance/governance.js.map +1 -0
- package/dist/governance/index.d.ts +3 -0
- package/dist/governance/index.d.ts.map +1 -0
- package/dist/governance/index.js +2 -0
- package/dist/governance/index.js.map +1 -0
- package/dist/governance/types.d.ts +102 -0
- package/dist/governance/types.d.ts.map +1 -0
- package/dist/governance/types.js +3 -0
- package/dist/governance/types.js.map +1 -0
- package/dist/index.d.ts +32 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +29 -1
- package/dist/index.js.map +1 -1
- package/dist/logging/logger.d.ts +37 -0
- package/dist/logging/logger.d.ts.map +1 -0
- package/dist/logging/logger.js +145 -0
- package/dist/logging/logger.js.map +1 -0
- package/dist/logging/types.d.ts +19 -0
- package/dist/logging/types.d.ts.map +1 -0
- package/dist/logging/types.js +2 -0
- package/dist/logging/types.js.map +1 -0
- package/dist/loop/loop-manager.d.ts +49 -0
- package/dist/loop/loop-manager.d.ts.map +1 -0
- package/dist/loop/loop-manager.js +105 -0
- package/dist/loop/loop-manager.js.map +1 -0
- package/dist/loop/types.d.ts +35 -0
- package/dist/loop/types.d.ts.map +1 -0
- package/dist/loop/types.js +8 -0
- package/dist/loop/types.js.map +1 -0
- package/dist/planning/gap-analysis.d.ts +29 -0
- package/dist/planning/gap-analysis.d.ts.map +1 -0
- package/dist/planning/gap-analysis.js +265 -0
- package/dist/planning/gap-analysis.js.map +1 -0
- package/dist/planning/gap-types.d.ts +29 -0
- package/dist/planning/gap-types.d.ts.map +1 -0
- package/dist/planning/gap-types.js +28 -0
- package/dist/planning/gap-types.js.map +1 -0
- package/dist/planning/planner.d.ts +150 -1
- package/dist/planning/planner.d.ts.map +1 -1
- package/dist/planning/planner.js +365 -2
- package/dist/planning/planner.js.map +1 -1
- package/dist/project/project-registry.d.ts +79 -0
- package/dist/project/project-registry.d.ts.map +1 -0
- package/dist/project/project-registry.js +276 -0
- package/dist/project/project-registry.js.map +1 -0
- package/dist/project/types.d.ts +28 -0
- package/dist/project/types.d.ts.map +1 -0
- package/dist/project/types.js +5 -0
- package/dist/project/types.js.map +1 -0
- package/dist/runtime/admin-extra-ops.d.ts +13 -0
- package/dist/runtime/admin-extra-ops.d.ts.map +1 -0
- package/dist/runtime/admin-extra-ops.js +284 -0
- package/dist/runtime/admin-extra-ops.js.map +1 -0
- package/dist/runtime/admin-ops.d.ts +15 -0
- package/dist/runtime/admin-ops.d.ts.map +1 -0
- package/dist/runtime/admin-ops.js +322 -0
- package/dist/runtime/admin-ops.js.map +1 -0
- package/dist/runtime/capture-ops.d.ts +15 -0
- package/dist/runtime/capture-ops.d.ts.map +1 -0
- package/dist/runtime/capture-ops.js +345 -0
- package/dist/runtime/capture-ops.js.map +1 -0
- package/dist/runtime/core-ops.d.ts +7 -3
- package/dist/runtime/core-ops.d.ts.map +1 -1
- package/dist/runtime/core-ops.js +474 -8
- package/dist/runtime/core-ops.js.map +1 -1
- package/dist/runtime/curator-extra-ops.d.ts +9 -0
- package/dist/runtime/curator-extra-ops.d.ts.map +1 -0
- package/dist/runtime/curator-extra-ops.js +59 -0
- package/dist/runtime/curator-extra-ops.js.map +1 -0
- package/dist/runtime/domain-ops.d.ts.map +1 -1
- package/dist/runtime/domain-ops.js +59 -13
- package/dist/runtime/domain-ops.js.map +1 -1
- package/dist/runtime/grading-ops.d.ts +14 -0
- package/dist/runtime/grading-ops.d.ts.map +1 -0
- package/dist/runtime/grading-ops.js +105 -0
- package/dist/runtime/grading-ops.js.map +1 -0
- package/dist/runtime/loop-ops.d.ts +13 -0
- package/dist/runtime/loop-ops.d.ts.map +1 -0
- package/dist/runtime/loop-ops.js +179 -0
- package/dist/runtime/loop-ops.js.map +1 -0
- package/dist/runtime/memory-cross-project-ops.d.ts +12 -0
- package/dist/runtime/memory-cross-project-ops.d.ts.map +1 -0
- package/dist/runtime/memory-cross-project-ops.js +165 -0
- package/dist/runtime/memory-cross-project-ops.js.map +1 -0
- package/dist/runtime/memory-extra-ops.d.ts +13 -0
- package/dist/runtime/memory-extra-ops.d.ts.map +1 -0
- package/dist/runtime/memory-extra-ops.js +173 -0
- package/dist/runtime/memory-extra-ops.js.map +1 -0
- package/dist/runtime/orchestrate-ops.d.ts +17 -0
- package/dist/runtime/orchestrate-ops.d.ts.map +1 -0
- package/dist/runtime/orchestrate-ops.js +240 -0
- package/dist/runtime/orchestrate-ops.js.map +1 -0
- package/dist/runtime/planning-extra-ops.d.ts +17 -0
- package/dist/runtime/planning-extra-ops.d.ts.map +1 -0
- package/dist/runtime/planning-extra-ops.js +300 -0
- package/dist/runtime/planning-extra-ops.js.map +1 -0
- package/dist/runtime/project-ops.d.ts +15 -0
- package/dist/runtime/project-ops.d.ts.map +1 -0
- package/dist/runtime/project-ops.js +181 -0
- package/dist/runtime/project-ops.js.map +1 -0
- package/dist/runtime/runtime.d.ts.map +1 -1
- package/dist/runtime/runtime.js +44 -1
- package/dist/runtime/runtime.js.map +1 -1
- package/dist/runtime/types.d.ts +21 -0
- package/dist/runtime/types.d.ts.map +1 -1
- package/dist/runtime/vault-extra-ops.d.ts +9 -0
- package/dist/runtime/vault-extra-ops.d.ts.map +1 -0
- package/dist/runtime/vault-extra-ops.js +195 -0
- package/dist/runtime/vault-extra-ops.js.map +1 -0
- package/dist/telemetry/telemetry.d.ts +48 -0
- package/dist/telemetry/telemetry.d.ts.map +1 -0
- package/dist/telemetry/telemetry.js +87 -0
- package/dist/telemetry/telemetry.js.map +1 -0
- package/dist/vault/vault.d.ts +94 -0
- package/dist/vault/vault.d.ts.map +1 -1
- package/dist/vault/vault.js +340 -1
- package/dist/vault/vault.js.map +1 -1
- package/package.json +1 -1
- package/src/__tests__/admin-extra-ops.test.ts +420 -0
- package/src/__tests__/admin-ops.test.ts +271 -0
- package/src/__tests__/brain-intelligence.test.ts +205 -0
- package/src/__tests__/brain.test.ts +131 -0
- package/src/__tests__/capture-ops.test.ts +509 -0
- package/src/__tests__/core-ops.test.ts +266 -2
- package/src/__tests__/curator-extra-ops.test.ts +359 -0
- package/src/__tests__/domain-ops.test.ts +66 -0
- package/src/__tests__/governance.test.ts +522 -0
- package/src/__tests__/grading-ops.test.ts +340 -0
- package/src/__tests__/identity-manager.test.ts +243 -0
- package/src/__tests__/intent-router.test.ts +222 -0
- package/src/__tests__/logger.test.ts +200 -0
- package/src/__tests__/loop-ops.test.ts +398 -0
- package/src/__tests__/memory-cross-project-ops.test.ts +246 -0
- package/src/__tests__/memory-extra-ops.test.ts +352 -0
- package/src/__tests__/orchestrate-ops.test.ts +284 -0
- package/src/__tests__/planner.test.ts +331 -0
- package/src/__tests__/planning-extra-ops.test.ts +548 -0
- package/src/__tests__/project-ops.test.ts +367 -0
- package/src/__tests__/vault-extra-ops.test.ts +407 -0
- package/src/brain/brain.ts +114 -7
- package/src/brain/intelligence.ts +179 -10
- package/src/brain/types.ts +38 -0
- package/src/control/identity-manager.ts +354 -0
- package/src/control/intent-router.ts +326 -0
- package/src/control/types.ts +102 -0
- package/src/curator/curator.ts +213 -0
- package/src/governance/governance.ts +698 -0
- package/src/governance/index.ts +18 -0
- package/src/governance/types.ts +111 -0
- package/src/index.ts +102 -2
- package/src/logging/logger.ts +154 -0
- package/src/logging/types.ts +21 -0
- package/src/loop/loop-manager.ts +130 -0
- package/src/loop/types.ts +44 -0
- package/src/planning/gap-analysis.ts +506 -0
- package/src/planning/gap-types.ts +58 -0
- package/src/planning/planner.ts +478 -2
- package/src/project/project-registry.ts +358 -0
- package/src/project/types.ts +31 -0
- package/src/runtime/admin-extra-ops.ts +307 -0
- package/src/runtime/admin-ops.ts +329 -0
- package/src/runtime/capture-ops.ts +385 -0
- package/src/runtime/core-ops.ts +535 -7
- package/src/runtime/curator-extra-ops.ts +71 -0
- package/src/runtime/domain-ops.ts +65 -13
- package/src/runtime/grading-ops.ts +121 -0
- package/src/runtime/loop-ops.ts +194 -0
- package/src/runtime/memory-cross-project-ops.ts +192 -0
- package/src/runtime/memory-extra-ops.ts +186 -0
- package/src/runtime/orchestrate-ops.ts +272 -0
- package/src/runtime/planning-extra-ops.ts +327 -0
- package/src/runtime/project-ops.ts +196 -0
- package/src/runtime/runtime.ts +49 -1
- package/src/runtime/types.ts +21 -0
- package/src/runtime/vault-extra-ops.ts +225 -0
- package/src/telemetry/telemetry.ts +118 -0
- package/src/vault/vault.ts +412 -1
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
2
|
+
import { mkdirSync, rmSync } from 'node:fs';
|
|
3
|
+
import { join } from 'node:path';
|
|
4
|
+
import { tmpdir } from 'node:os';
|
|
5
|
+
import { createAgentRuntime } from '../runtime/runtime.js';
|
|
6
|
+
import { createCoreOps } from '../runtime/core-ops.js';
|
|
7
|
+
import type { AgentRuntime } from '../runtime/types.js';
|
|
8
|
+
import type { OpDefinition } from '../facades/types.js';
|
|
9
|
+
|
|
10
|
+
describe('Grading Ops', () => {
|
|
11
|
+
let runtime: AgentRuntime;
|
|
12
|
+
let ops: OpDefinition[];
|
|
13
|
+
let plannerDir: string;
|
|
14
|
+
|
|
15
|
+
beforeEach(() => {
|
|
16
|
+
plannerDir = join(tmpdir(), 'grading-ops-test-' + Date.now());
|
|
17
|
+
mkdirSync(plannerDir, { recursive: true });
|
|
18
|
+
runtime = createAgentRuntime({
|
|
19
|
+
agentId: 'test-grading',
|
|
20
|
+
vaultPath: ':memory:',
|
|
21
|
+
plansPath: join(plannerDir, 'plans.json'),
|
|
22
|
+
});
|
|
23
|
+
ops = createCoreOps(runtime);
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
afterEach(() => {
|
|
27
|
+
runtime.close();
|
|
28
|
+
rmSync(plannerDir, { recursive: true, force: true });
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
function findOp(name: string): OpDefinition {
|
|
32
|
+
const op = ops.find((o) => o.name === name);
|
|
33
|
+
if (!op) throw new Error(`Op "${name}" not found`);
|
|
34
|
+
return op;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// ─── Helper to create plans ─────────────────────────────────────
|
|
38
|
+
async function createPlan(opts: {
|
|
39
|
+
objective?: string;
|
|
40
|
+
scope?: string;
|
|
41
|
+
decisions?: string[];
|
|
42
|
+
tasks?: Array<{ title: string; description: string }>;
|
|
43
|
+
}): Promise<string> {
|
|
44
|
+
const result = (await findOp('create_plan').handler({
|
|
45
|
+
objective: opts.objective ?? 'Test plan objective',
|
|
46
|
+
scope: opts.scope ?? 'Test scope description',
|
|
47
|
+
decisions: opts.decisions,
|
|
48
|
+
tasks: opts.tasks,
|
|
49
|
+
})) as { plan: { id: string } };
|
|
50
|
+
return result.plan.id;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
describe('plan_grade', () => {
|
|
54
|
+
it('should grade an empty plan with very low score', async () => {
|
|
55
|
+
const planId = await createPlan({ objective: '', scope: '' });
|
|
56
|
+
const check = (await findOp('plan_grade').handler({ planId })) as {
|
|
57
|
+
score: number;
|
|
58
|
+
grade: string;
|
|
59
|
+
gaps: Array<{ severity: string; category: string; description: string; recommendation: string }>;
|
|
60
|
+
iteration: number;
|
|
61
|
+
};
|
|
62
|
+
// 3 critical gaps: no objective, no scope, no tasks = -90
|
|
63
|
+
expect(check.score).toBeLessThanOrEqual(10);
|
|
64
|
+
expect(check.grade).toBe('F');
|
|
65
|
+
expect(check.gaps.length).toBeGreaterThan(0);
|
|
66
|
+
expect(check.iteration).toBe(1);
|
|
67
|
+
// Gaps should have the new format
|
|
68
|
+
expect(check.gaps[0].recommendation).toBeDefined();
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
it('should grade a well-formed plan highly on first iteration', async () => {
|
|
72
|
+
const planId = await createPlan({
|
|
73
|
+
objective: 'Build a Redis caching layer for API to reduce database load by 50%',
|
|
74
|
+
scope: 'API backend services only. Does not include frontend or CDN caching.',
|
|
75
|
+
decisions: [
|
|
76
|
+
'Use Redis because it provides sub-millisecond latency and supports TTL natively',
|
|
77
|
+
'Set TTL to 5 minutes since average data freshness requirement is 10 minutes',
|
|
78
|
+
],
|
|
79
|
+
tasks: [
|
|
80
|
+
{ title: 'Setup Redis client', description: 'Install and configure Redis connection pool' },
|
|
81
|
+
{ title: 'Add middleware', description: 'Express transparent caching middleware layer' },
|
|
82
|
+
{ title: 'Add invalidation', description: 'Cache invalidation on writes for consistency' },
|
|
83
|
+
{ title: 'Add tests', description: 'Integration tests for cache hit/miss scenarios' },
|
|
84
|
+
{ title: 'Add metrics', description: 'Track and verify cache hit rate monitoring' },
|
|
85
|
+
],
|
|
86
|
+
});
|
|
87
|
+
const check = (await findOp('plan_grade').handler({ planId })) as {
|
|
88
|
+
score: number;
|
|
89
|
+
grade: string;
|
|
90
|
+
};
|
|
91
|
+
// On iteration 1, minor gaps are free
|
|
92
|
+
expect(check.score).toBeGreaterThanOrEqual(95);
|
|
93
|
+
expect(check.grade).toMatch(/^A/);
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
it('should penalize duplicate task titles', async () => {
|
|
97
|
+
const planId = await createPlan({
|
|
98
|
+
objective: 'Test duplicate title detection in the grading engine',
|
|
99
|
+
scope: 'Testing only, does not include production changes',
|
|
100
|
+
decisions: ['Use assertions because they provide clear error messages due to descriptive output'],
|
|
101
|
+
tasks: [
|
|
102
|
+
{ title: 'Same name', description: 'First task implementation' },
|
|
103
|
+
{ title: 'Same name', description: 'Second task implementation' },
|
|
104
|
+
{ title: 'Unique name', description: 'Third task implementation' },
|
|
105
|
+
],
|
|
106
|
+
});
|
|
107
|
+
const check = (await findOp('plan_grade').handler({ planId })) as {
|
|
108
|
+
gaps: Array<{ description: string; category: string }>;
|
|
109
|
+
};
|
|
110
|
+
const dupGap = check.gaps.find((g) => g.description.includes('Duplicate'));
|
|
111
|
+
expect(dupGap).toBeDefined();
|
|
112
|
+
expect(dupGap!.category).toBe('semantic-quality');
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
it('should penalize tasks with short descriptions', async () => {
|
|
116
|
+
const planId = await createPlan({
|
|
117
|
+
objective: 'Test short description detection in the grading engine',
|
|
118
|
+
scope: 'Testing only, does not include production changes',
|
|
119
|
+
decisions: ['Use short tasks because they test the clarity analysis pass'],
|
|
120
|
+
tasks: [
|
|
121
|
+
{ title: 'Task good', description: 'Has a proper description here' },
|
|
122
|
+
{ title: 'Task bad', description: '' },
|
|
123
|
+
{ title: 'Task ok', description: 'Also has a description' },
|
|
124
|
+
],
|
|
125
|
+
});
|
|
126
|
+
const check = (await findOp('plan_grade').handler({ planId })) as {
|
|
127
|
+
gaps: Array<{ description: string; category: string }>;
|
|
128
|
+
};
|
|
129
|
+
const descGap = check.gaps.find((g) => g.description.includes('short descriptions'));
|
|
130
|
+
expect(descGap).toBeDefined();
|
|
131
|
+
expect(descGap!.category).toBe('clarity');
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
it('should penalize plan without decisions using semantic-quality category', async () => {
|
|
135
|
+
const planId = await createPlan({
|
|
136
|
+
objective: 'Test missing decisions detection in the plan grading system',
|
|
137
|
+
scope: 'Testing only, does not include production changes',
|
|
138
|
+
tasks: [
|
|
139
|
+
{ title: 'T1', description: 'First task with details' },
|
|
140
|
+
{ title: 'T2', description: 'Second task with details' },
|
|
141
|
+
{ title: 'T3', description: 'Third task with details' },
|
|
142
|
+
],
|
|
143
|
+
});
|
|
144
|
+
const check = (await findOp('plan_grade').handler({ planId })) as {
|
|
145
|
+
gaps: Array<{ category: string; description: string }>;
|
|
146
|
+
};
|
|
147
|
+
const decGap = check.gaps.find(
|
|
148
|
+
(g) => g.category === 'semantic-quality' && g.description.includes('no decisions'),
|
|
149
|
+
);
|
|
150
|
+
expect(decGap).toBeDefined();
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
it('should penalize too few tasks', async () => {
|
|
154
|
+
const planId = await createPlan({
|
|
155
|
+
objective: 'Test task granularity detection in grading',
|
|
156
|
+
scope: 'Testing only, does not include production changes',
|
|
157
|
+
decisions: ['Use single task because it tests granularity check'],
|
|
158
|
+
tasks: [{ title: 'Only task', description: 'This is the only task' }],
|
|
159
|
+
});
|
|
160
|
+
const check = (await findOp('plan_grade').handler({ planId })) as {
|
|
161
|
+
gaps: Array<{ description: string }>;
|
|
162
|
+
};
|
|
163
|
+
const granGap = check.gaps.find((g) => g.description.includes('lack'));
|
|
164
|
+
expect(granGap).toBeDefined();
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
it('should include iteration number in check', async () => {
|
|
168
|
+
const planId = await createPlan({});
|
|
169
|
+
const check1 = (await findOp('plan_grade').handler({ planId })) as { iteration: number };
|
|
170
|
+
const check2 = (await findOp('plan_grade').handler({ planId })) as { iteration: number };
|
|
171
|
+
expect(check1.iteration).toBe(1);
|
|
172
|
+
expect(check2.iteration).toBe(2);
|
|
173
|
+
});
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
describe('plan_check_history', () => {
|
|
177
|
+
it('should return empty checks for new plan', async () => {
|
|
178
|
+
const planId = await createPlan({});
|
|
179
|
+
const result = (await findOp('plan_check_history').handler({ planId })) as {
|
|
180
|
+
count: number;
|
|
181
|
+
checks: unknown[];
|
|
182
|
+
};
|
|
183
|
+
expect(result.count).toBe(0);
|
|
184
|
+
expect(result.checks).toEqual([]);
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
it('should accumulate checks with increasing iteration', async () => {
|
|
188
|
+
const planId = await createPlan({});
|
|
189
|
+
await findOp('plan_grade').handler({ planId });
|
|
190
|
+
await findOp('plan_grade').handler({ planId });
|
|
191
|
+
await findOp('plan_grade').handler({ planId });
|
|
192
|
+
const result = (await findOp('plan_check_history').handler({ planId })) as {
|
|
193
|
+
count: number;
|
|
194
|
+
checks: Array<{ checkId: string; iteration: number }>;
|
|
195
|
+
};
|
|
196
|
+
expect(result.count).toBe(3);
|
|
197
|
+
const ids = new Set(result.checks.map((c) => c.checkId));
|
|
198
|
+
expect(ids.size).toBe(3);
|
|
199
|
+
expect(result.checks[0].iteration).toBe(1);
|
|
200
|
+
expect(result.checks[2].iteration).toBe(3);
|
|
201
|
+
});
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
describe('plan_latest_check', () => {
|
|
205
|
+
it('should return null-like response for ungraded plan', async () => {
|
|
206
|
+
const planId = await createPlan({});
|
|
207
|
+
const result = (await findOp('plan_latest_check').handler({ planId })) as {
|
|
208
|
+
check?: null;
|
|
209
|
+
message?: string;
|
|
210
|
+
};
|
|
211
|
+
expect(result.message).toBeDefined();
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
it('should return latest check after grading', async () => {
|
|
215
|
+
const planId = await createPlan({});
|
|
216
|
+
const gradeResult = (await findOp('plan_grade').handler({ planId })) as {
|
|
217
|
+
checkId: string;
|
|
218
|
+
};
|
|
219
|
+
const latest = (await findOp('plan_latest_check').handler({ planId })) as {
|
|
220
|
+
checkId: string;
|
|
221
|
+
};
|
|
222
|
+
expect(latest.checkId).toBe(gradeResult.checkId);
|
|
223
|
+
});
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
describe('plan_meets_grade', () => {
|
|
227
|
+
it('should return meets=true for plan meeting target', async () => {
|
|
228
|
+
const planId = await createPlan({
|
|
229
|
+
objective: 'Build a comprehensive feature for the testing module',
|
|
230
|
+
scope: 'Testing module only, does not include deployment or infrastructure',
|
|
231
|
+
decisions: ['Use vitest because it integrates natively with TypeScript due to built-in support'],
|
|
232
|
+
tasks: [
|
|
233
|
+
{ title: 'Write unit tests', description: 'Cover all edge cases in the module' },
|
|
234
|
+
{ title: 'Write integration tests', description: 'End-to-end API tests for the flow' },
|
|
235
|
+
{ title: 'Add CI pipeline', description: 'Run tests automatically on every PR' },
|
|
236
|
+
{ title: 'Add coverage report', description: 'Track and verify code coverage metrics' },
|
|
237
|
+
],
|
|
238
|
+
});
|
|
239
|
+
const result = (await findOp('plan_meets_grade').handler({
|
|
240
|
+
planId,
|
|
241
|
+
targetGrade: 'B',
|
|
242
|
+
})) as { meets: boolean; check: { score: number } };
|
|
243
|
+
expect(result.meets).toBe(true);
|
|
244
|
+
expect(result.check.score).toBeGreaterThanOrEqual(80);
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
it('should return meets=false for plan not meeting target', async () => {
|
|
248
|
+
const planId = await createPlan({ objective: '', scope: '' });
|
|
249
|
+
const result = (await findOp('plan_meets_grade').handler({
|
|
250
|
+
planId,
|
|
251
|
+
targetGrade: 'A+',
|
|
252
|
+
})) as { meets: boolean; check: { score: number } };
|
|
253
|
+
expect(result.meets).toBe(false);
|
|
254
|
+
});
|
|
255
|
+
});
|
|
256
|
+
|
|
257
|
+
describe('plan_auto_improve', () => {
|
|
258
|
+
it('should return gaps grouped by severity with next action', async () => {
|
|
259
|
+
const planId = await createPlan({ objective: '', scope: '' });
|
|
260
|
+
const result = (await findOp('plan_auto_improve').handler({ planId })) as {
|
|
261
|
+
grade: string;
|
|
262
|
+
score: number;
|
|
263
|
+
iteration: number;
|
|
264
|
+
totalGaps: number;
|
|
265
|
+
gapsBySeverity: Record<string, Array<{ category: string; description: string; recommendation: string }>>;
|
|
266
|
+
nextAction: string;
|
|
267
|
+
};
|
|
268
|
+
expect(result.score).toBeLessThan(100);
|
|
269
|
+
expect(result.totalGaps).toBeGreaterThan(0);
|
|
270
|
+
expect(result.gapsBySeverity.critical).toBeDefined();
|
|
271
|
+
expect(result.gapsBySeverity.critical.length).toBeGreaterThan(0);
|
|
272
|
+
expect(result.nextAction).toBe('iterate');
|
|
273
|
+
// Each gap should have recommendation field
|
|
274
|
+
expect(result.gapsBySeverity.critical[0].recommendation).toBeDefined();
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
it('should return approve next action for high-scoring plan', async () => {
|
|
278
|
+
const planId = await createPlan({
|
|
279
|
+
objective: 'Build a Redis caching layer for API to reduce database load by 50%',
|
|
280
|
+
scope: 'API backend services only. Does not include frontend or CDN caching.',
|
|
281
|
+
decisions: [
|
|
282
|
+
'Use Redis because it provides sub-millisecond latency and supports TTL natively',
|
|
283
|
+
'Set TTL to 5 minutes since average data freshness requirement is 10 minutes',
|
|
284
|
+
],
|
|
285
|
+
tasks: [
|
|
286
|
+
{ title: 'Setup Redis', description: 'Install and configure Redis connection pool' },
|
|
287
|
+
{ title: 'Add middleware', description: 'Express transparent caching middleware layer' },
|
|
288
|
+
{ title: 'Add invalidation', description: 'Cache invalidation on writes for consistency' },
|
|
289
|
+
{ title: 'Add tests', description: 'Integration tests for cache hit/miss scenarios' },
|
|
290
|
+
{ title: 'Add metrics', description: 'Track and verify cache hit rate monitoring' },
|
|
291
|
+
],
|
|
292
|
+
});
|
|
293
|
+
const result = (await findOp('plan_auto_improve').handler({ planId })) as {
|
|
294
|
+
score: number;
|
|
295
|
+
nextAction: string;
|
|
296
|
+
};
|
|
297
|
+
expect(result.score).toBeGreaterThanOrEqual(90);
|
|
298
|
+
expect(result.nextAction).toBe('approve');
|
|
299
|
+
});
|
|
300
|
+
});
|
|
301
|
+
|
|
302
|
+
describe('grade thresholds', () => {
|
|
303
|
+
it('A+ should require >= 95', async () => {
|
|
304
|
+
const planId = await createPlan({
|
|
305
|
+
objective: 'Implement a Redis caching layer for the API to reduce DB load by 50%',
|
|
306
|
+
scope: 'Backend API services only. Does not include frontend or CDN.',
|
|
307
|
+
decisions: [
|
|
308
|
+
'Use Redis because it provides sub-millisecond latency and supports TTL natively',
|
|
309
|
+
'Set TTL to 5 minutes since average data freshness requirement is 10 minutes',
|
|
310
|
+
],
|
|
311
|
+
tasks: [
|
|
312
|
+
{ title: 'Setup Redis', description: 'Install and configure Redis connection pool' },
|
|
313
|
+
{ title: 'Add middleware', description: 'Express transparent caching middleware' },
|
|
314
|
+
{ title: 'Add invalidation', description: 'Cache invalidation on write operations' },
|
|
315
|
+
{ title: 'Add tests', description: 'Integration tests for cache hit/miss scenarios' },
|
|
316
|
+
{ title: 'Add metrics', description: 'Track and verify cache hit rate monitoring' },
|
|
317
|
+
],
|
|
318
|
+
});
|
|
319
|
+
const check = (await findOp('plan_grade').handler({ planId })) as {
|
|
320
|
+
score: number;
|
|
321
|
+
grade: string;
|
|
322
|
+
};
|
|
323
|
+
if (check.score >= 95) {
|
|
324
|
+
expect(check.grade).toBe('A+');
|
|
325
|
+
} else if (check.score >= 90) {
|
|
326
|
+
expect(check.grade).toBe('A');
|
|
327
|
+
}
|
|
328
|
+
});
|
|
329
|
+
|
|
330
|
+
it('F should be for score < 60', async () => {
|
|
331
|
+
const planId = await createPlan({ objective: '', scope: '' });
|
|
332
|
+
const check = (await findOp('plan_grade').handler({ planId })) as {
|
|
333
|
+
score: number;
|
|
334
|
+
grade: string;
|
|
335
|
+
};
|
|
336
|
+
expect(check.score).toBeLessThan(60);
|
|
337
|
+
expect(check.grade).toBe('F');
|
|
338
|
+
});
|
|
339
|
+
});
|
|
340
|
+
});
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
2
|
+
import { mkdirSync, rmSync } from 'node:fs';
|
|
3
|
+
import { join } from 'node:path';
|
|
4
|
+
import { tmpdir } from 'node:os';
|
|
5
|
+
import { createAgentRuntime } from '../runtime/runtime.js';
|
|
6
|
+
import { IdentityManager } from '../control/identity-manager.js';
|
|
7
|
+
import type { AgentRuntime } from '../runtime/types.js';
|
|
8
|
+
|
|
9
|
+
describe('IdentityManager', () => {
|
|
10
|
+
let runtime: AgentRuntime;
|
|
11
|
+
let manager: IdentityManager;
|
|
12
|
+
let plannerDir: string;
|
|
13
|
+
const AGENT = 'test-agent';
|
|
14
|
+
|
|
15
|
+
beforeEach(() => {
|
|
16
|
+
plannerDir = join(tmpdir(), 'identity-test-' + Date.now());
|
|
17
|
+
mkdirSync(plannerDir, { recursive: true });
|
|
18
|
+
runtime = createAgentRuntime({
|
|
19
|
+
agentId: 'identity-test',
|
|
20
|
+
vaultPath: ':memory:',
|
|
21
|
+
plansPath: join(plannerDir, 'plans.json'),
|
|
22
|
+
});
|
|
23
|
+
manager = new IdentityManager(runtime.vault);
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
afterEach(() => {
|
|
27
|
+
runtime.close();
|
|
28
|
+
rmSync(plannerDir, { recursive: true, force: true });
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
// ─── Identity CRUD ──────────────────────────────────────────────────
|
|
32
|
+
|
|
33
|
+
it('should return null for unknown agent', () => {
|
|
34
|
+
expect(manager.getIdentity('nonexistent')).toBeNull();
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
it('should create identity with defaults', () => {
|
|
38
|
+
const identity = manager.setIdentity(AGENT, { name: 'TestBot' });
|
|
39
|
+
expect(identity.agentId).toBe(AGENT);
|
|
40
|
+
expect(identity.name).toBe('TestBot');
|
|
41
|
+
expect(identity.role).toBe('');
|
|
42
|
+
expect(identity.description).toBe('');
|
|
43
|
+
expect(identity.personality).toEqual([]);
|
|
44
|
+
expect(identity.version).toBe(1);
|
|
45
|
+
expect(identity.guidelines).toEqual([]);
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it('should create identity with all fields', () => {
|
|
49
|
+
const identity = manager.setIdentity(AGENT, {
|
|
50
|
+
name: 'TestBot',
|
|
51
|
+
role: 'Test Runner',
|
|
52
|
+
description: 'A test agent',
|
|
53
|
+
personality: ['precise', 'thorough'],
|
|
54
|
+
});
|
|
55
|
+
expect(identity.name).toBe('TestBot');
|
|
56
|
+
expect(identity.role).toBe('Test Runner');
|
|
57
|
+
expect(identity.description).toBe('A test agent');
|
|
58
|
+
expect(identity.personality).toEqual(['precise', 'thorough']);
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
it('should use agentId as name if name not provided', () => {
|
|
62
|
+
const identity = manager.setIdentity(AGENT, {});
|
|
63
|
+
expect(identity.name).toBe(AGENT);
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
it('should retrieve identity after creation', () => {
|
|
67
|
+
manager.setIdentity(AGENT, { name: 'TestBot', role: 'Runner' });
|
|
68
|
+
const identity = manager.getIdentity(AGENT);
|
|
69
|
+
expect(identity).not.toBeNull();
|
|
70
|
+
expect(identity!.name).toBe('TestBot');
|
|
71
|
+
expect(identity!.role).toBe('Runner');
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
it('should update identity with partial fields', () => {
|
|
75
|
+
manager.setIdentity(AGENT, { name: 'V1', role: 'Role1', description: 'Desc1' });
|
|
76
|
+
const updated = manager.setIdentity(AGENT, { role: 'Role2' });
|
|
77
|
+
expect(updated.name).toBe('V1');
|
|
78
|
+
expect(updated.role).toBe('Role2');
|
|
79
|
+
expect(updated.description).toBe('Desc1');
|
|
80
|
+
expect(updated.version).toBe(2);
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it('should increment version on each update', () => {
|
|
84
|
+
manager.setIdentity(AGENT, { name: 'V1' });
|
|
85
|
+
manager.setIdentity(AGENT, { name: 'V2' });
|
|
86
|
+
manager.setIdentity(AGENT, { name: 'V3' });
|
|
87
|
+
const identity = manager.getIdentity(AGENT);
|
|
88
|
+
expect(identity!.version).toBe(3);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
// ─── Guidelines ─────────────────────────────────────────────────────
|
|
92
|
+
|
|
93
|
+
it('should add a guideline', () => {
|
|
94
|
+
const guideline = manager.addGuideline(AGENT, {
|
|
95
|
+
category: 'behavior',
|
|
96
|
+
text: 'Always be helpful',
|
|
97
|
+
});
|
|
98
|
+
expect(guideline.id).toBeDefined();
|
|
99
|
+
expect(guideline.category).toBe('behavior');
|
|
100
|
+
expect(guideline.text).toBe('Always be helpful');
|
|
101
|
+
expect(guideline.priority).toBe(0);
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
it('should add guideline with priority', () => {
|
|
105
|
+
const guideline = manager.addGuideline(AGENT, {
|
|
106
|
+
category: 'restriction',
|
|
107
|
+
text: 'Never share secrets',
|
|
108
|
+
priority: 10,
|
|
109
|
+
});
|
|
110
|
+
expect(guideline.priority).toBe(10);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it('should list guidelines by category', () => {
|
|
114
|
+
manager.addGuideline(AGENT, { category: 'behavior', text: 'Be helpful' });
|
|
115
|
+
manager.addGuideline(AGENT, { category: 'restriction', text: 'No secrets' });
|
|
116
|
+
manager.addGuideline(AGENT, { category: 'behavior', text: 'Be concise' });
|
|
117
|
+
|
|
118
|
+
const behaviors = manager.getGuidelines(AGENT, 'behavior');
|
|
119
|
+
expect(behaviors).toHaveLength(2);
|
|
120
|
+
expect(behaviors.every((g) => g.category === 'behavior')).toBe(true);
|
|
121
|
+
|
|
122
|
+
const restrictions = manager.getGuidelines(AGENT, 'restriction');
|
|
123
|
+
expect(restrictions).toHaveLength(1);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
it('should list all guidelines', () => {
|
|
127
|
+
manager.addGuideline(AGENT, { category: 'behavior', text: 'A' });
|
|
128
|
+
manager.addGuideline(AGENT, { category: 'style', text: 'B' });
|
|
129
|
+
const all = manager.getGuidelines(AGENT);
|
|
130
|
+
expect(all).toHaveLength(2);
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
it('should remove a guideline', () => {
|
|
134
|
+
const guideline = manager.addGuideline(AGENT, { category: 'behavior', text: 'Temp' });
|
|
135
|
+
expect(manager.removeGuideline(guideline.id)).toBe(true);
|
|
136
|
+
expect(manager.getGuidelines(AGENT)).toHaveLength(0);
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
it('should return false removing nonexistent guideline', () => {
|
|
140
|
+
expect(manager.removeGuideline('nonexistent-id')).toBe(false);
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
it('should include guidelines in getIdentity', () => {
|
|
144
|
+
manager.setIdentity(AGENT, { name: 'Bot' });
|
|
145
|
+
manager.addGuideline(AGENT, { category: 'behavior', text: 'Be helpful' });
|
|
146
|
+
manager.addGuideline(AGENT, { category: 'restriction', text: 'No secrets' });
|
|
147
|
+
|
|
148
|
+
const identity = manager.getIdentity(AGENT);
|
|
149
|
+
expect(identity!.guidelines).toHaveLength(2);
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
it('should order guidelines by priority descending', () => {
|
|
153
|
+
manager.addGuideline(AGENT, { category: 'behavior', text: 'Low', priority: 1 });
|
|
154
|
+
manager.addGuideline(AGENT, { category: 'behavior', text: 'High', priority: 10 });
|
|
155
|
+
manager.addGuideline(AGENT, { category: 'behavior', text: 'Mid', priority: 5 });
|
|
156
|
+
|
|
157
|
+
const guidelines = manager.getGuidelines(AGENT, 'behavior');
|
|
158
|
+
expect(guidelines[0].text).toBe('High');
|
|
159
|
+
expect(guidelines[1].text).toBe('Mid');
|
|
160
|
+
expect(guidelines[2].text).toBe('Low');
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
// ─── Versioning ─────────────────────────────────────────────────────
|
|
164
|
+
|
|
165
|
+
it('should track version history', () => {
|
|
166
|
+
manager.setIdentity(AGENT, { name: 'V1', changedBy: 'user', changeReason: 'Initial' });
|
|
167
|
+
manager.setIdentity(AGENT, { name: 'V2', changedBy: 'user', changeReason: 'Update' });
|
|
168
|
+
|
|
169
|
+
const history = manager.getVersionHistory(AGENT);
|
|
170
|
+
expect(history).toHaveLength(1);
|
|
171
|
+
expect(history[0].version).toBe(1);
|
|
172
|
+
expect(history[0].changedBy).toBe('user');
|
|
173
|
+
expect(history[0].changeReason).toBe('Update');
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
it('should rollback to a previous version', () => {
|
|
177
|
+
manager.setIdentity(AGENT, { name: 'Original', role: 'V1 Role' });
|
|
178
|
+
manager.setIdentity(AGENT, { name: 'Updated', role: 'V2 Role' });
|
|
179
|
+
|
|
180
|
+
const current = manager.getIdentity(AGENT);
|
|
181
|
+
expect(current!.name).toBe('Updated');
|
|
182
|
+
expect(current!.version).toBe(2);
|
|
183
|
+
|
|
184
|
+
const rolledBack = manager.rollback(AGENT, 1);
|
|
185
|
+
expect(rolledBack.name).toBe('Original');
|
|
186
|
+
expect(rolledBack.role).toBe('V1 Role');
|
|
187
|
+
expect(rolledBack.version).toBe(3);
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
it('should throw on rollback to nonexistent version', () => {
|
|
191
|
+
manager.setIdentity(AGENT, { name: 'V1' });
|
|
192
|
+
expect(() => manager.rollback(AGENT, 99)).toThrow('Version 99 not found');
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
it('should preserve version history after rollback', () => {
|
|
196
|
+
manager.setIdentity(AGENT, { name: 'V1' });
|
|
197
|
+
manager.setIdentity(AGENT, { name: 'V2' });
|
|
198
|
+
manager.rollback(AGENT, 1);
|
|
199
|
+
|
|
200
|
+
const history = manager.getVersionHistory(AGENT);
|
|
201
|
+
expect(history).toHaveLength(2);
|
|
202
|
+
expect(history[0].version).toBe(2);
|
|
203
|
+
expect(history[1].version).toBe(1);
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
it('should limit version history', () => {
|
|
207
|
+
manager.setIdentity(AGENT, { name: 'V1' });
|
|
208
|
+
manager.setIdentity(AGENT, { name: 'V2' });
|
|
209
|
+
manager.setIdentity(AGENT, { name: 'V3' });
|
|
210
|
+
manager.setIdentity(AGENT, { name: 'V4' });
|
|
211
|
+
|
|
212
|
+
const limited = manager.getVersionHistory(AGENT, 2);
|
|
213
|
+
expect(limited).toHaveLength(2);
|
|
214
|
+
expect(limited[0].version).toBe(3);
|
|
215
|
+
expect(limited[1].version).toBe(2);
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
// ─── Rendering ──────────────────────────────────────────────────────
|
|
219
|
+
|
|
220
|
+
it('should render unknown agent markdown', () => {
|
|
221
|
+
const md = manager.renderIdentityMarkdown('unknown');
|
|
222
|
+
expect(md).toContain('Unknown Agent');
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
it('should render identity as markdown', () => {
|
|
226
|
+
manager.setIdentity(AGENT, {
|
|
227
|
+
name: 'TestBot',
|
|
228
|
+
role: 'Runner',
|
|
229
|
+
description: 'A helpful agent.',
|
|
230
|
+
personality: ['precise', 'kind'],
|
|
231
|
+
});
|
|
232
|
+
manager.addGuideline(AGENT, { category: 'behavior', text: 'Be helpful' });
|
|
233
|
+
manager.addGuideline(AGENT, { category: 'restriction', text: 'No secrets' });
|
|
234
|
+
|
|
235
|
+
const md = manager.renderIdentityMarkdown(AGENT);
|
|
236
|
+
expect(md).toContain('# TestBot');
|
|
237
|
+
expect(md).toContain('**Role:** Runner');
|
|
238
|
+
expect(md).toContain('A helpful agent.');
|
|
239
|
+
expect(md).toContain('- precise');
|
|
240
|
+
expect(md).toContain('- Be helpful');
|
|
241
|
+
expect(md).toContain('- No secrets');
|
|
242
|
+
});
|
|
243
|
+
});
|