@soleri/core 9.7.2 → 9.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/dist/enforcement/adapters/index.d.ts +15 -0
  2. package/dist/enforcement/adapters/index.d.ts.map +1 -1
  3. package/dist/enforcement/adapters/index.js +38 -0
  4. package/dist/enforcement/adapters/index.js.map +1 -1
  5. package/dist/enforcement/adapters/opencode.d.ts +21 -0
  6. package/dist/enforcement/adapters/opencode.d.ts.map +1 -0
  7. package/dist/enforcement/adapters/opencode.js +115 -0
  8. package/dist/enforcement/adapters/opencode.js.map +1 -0
  9. package/dist/planning/evidence-collector.d.ts +2 -0
  10. package/dist/planning/evidence-collector.d.ts.map +1 -1
  11. package/dist/planning/evidence-collector.js +7 -2
  12. package/dist/planning/evidence-collector.js.map +1 -1
  13. package/dist/planning/plan-lifecycle.d.ts.map +1 -1
  14. package/dist/planning/plan-lifecycle.js +5 -0
  15. package/dist/planning/plan-lifecycle.js.map +1 -1
  16. package/dist/planning/planner-types.d.ts +2 -0
  17. package/dist/planning/planner-types.d.ts.map +1 -1
  18. package/dist/runtime/orchestrate-ops.d.ts.map +1 -1
  19. package/dist/runtime/orchestrate-ops.js +65 -1
  20. package/dist/runtime/orchestrate-ops.js.map +1 -1
  21. package/dist/runtime/quality-signals.d.ts +42 -0
  22. package/dist/runtime/quality-signals.d.ts.map +1 -0
  23. package/dist/runtime/quality-signals.js +124 -0
  24. package/dist/runtime/quality-signals.js.map +1 -0
  25. package/dist/skills/trust-classifier.js +1 -1
  26. package/dist/skills/trust-classifier.js.map +1 -1
  27. package/package.json +1 -1
  28. package/src/enforcement/adapters/index.ts +45 -0
  29. package/src/enforcement/adapters/opencode.test.ts +404 -0
  30. package/src/enforcement/adapters/opencode.ts +153 -0
  31. package/src/planning/evidence-collector.test.ts +95 -0
  32. package/src/planning/evidence-collector.ts +11 -0
  33. package/src/planning/plan-lifecycle.test.ts +49 -0
  34. package/src/planning/plan-lifecycle.ts +5 -0
  35. package/src/planning/planner-types.ts +2 -0
  36. package/src/runtime/orchestrate-ops.test.ts +78 -1
  37. package/src/runtime/orchestrate-ops.ts +91 -1
  38. package/src/runtime/orchestrate-status-readiness.test.ts +162 -0
  39. package/src/runtime/quality-signals.test.ts +312 -0
  40. package/src/runtime/quality-signals.ts +169 -0
  41. package/src/skills/trust-classifier.ts +1 -1
@@ -342,6 +342,55 @@ describe('plan-lifecycle', () => {
342
342
  expect(task.completedAt).toBeGreaterThan(0);
343
343
  expect(task.status).toBe('failed');
344
344
  });
345
+
346
+ it('increments fixIterations on completed → in_progress rework', () => {
347
+ const task = makeTask();
348
+ applyTaskStatusUpdate(task, 'in_progress');
349
+ applyTaskStatusUpdate(task, 'completed');
350
+ expect(task.fixIterations).toBeUndefined();
351
+ // Rework: send back from completed to in_progress
352
+ applyTaskStatusUpdate(task, 'in_progress');
353
+ expect(task.fixIterations).toBe(1);
354
+ expect(task.completedAt).toBeUndefined();
355
+ });
356
+
357
+ it('increments fixIterations on failed → in_progress rework', () => {
358
+ const task = makeTask();
359
+ applyTaskStatusUpdate(task, 'in_progress');
360
+ applyTaskStatusUpdate(task, 'failed');
361
+ // Rework from failed
362
+ applyTaskStatusUpdate(task, 'in_progress');
363
+ expect(task.fixIterations).toBe(1);
364
+ expect(task.completedAt).toBeUndefined();
365
+ });
366
+
367
+ it('accumulates fixIterations across multiple rework cycles', () => {
368
+ const task = makeTask();
369
+ applyTaskStatusUpdate(task, 'in_progress');
370
+ applyTaskStatusUpdate(task, 'completed');
371
+ applyTaskStatusUpdate(task, 'in_progress'); // rework 1
372
+ applyTaskStatusUpdate(task, 'completed');
373
+ applyTaskStatusUpdate(task, 'in_progress'); // rework 2
374
+ expect(task.fixIterations).toBe(2);
375
+ });
376
+
377
+ it('does not increment fixIterations on pending → in_progress', () => {
378
+ const task = makeTask();
379
+ applyTaskStatusUpdate(task, 'in_progress');
380
+ expect(task.fixIterations).toBeUndefined();
381
+ });
382
+
383
+ it('resets completedAt on rework but preserves startedAt', () => {
384
+ const task = makeTask();
385
+ applyTaskStatusUpdate(task, 'in_progress');
386
+ const originalStartedAt = task.startedAt;
387
+ applyTaskStatusUpdate(task, 'completed');
388
+ expect(task.completedAt).toBeGreaterThan(0);
389
+ // Rework
390
+ applyTaskStatusUpdate(task, 'in_progress');
391
+ expect(task.completedAt).toBeUndefined();
392
+ expect(task.startedAt).toBe(originalStartedAt);
393
+ });
345
394
  });
346
395
 
347
396
  describe('createPlanObject', () => {
@@ -338,6 +338,11 @@ export function applyIteration(plan: Plan, changes: IterateChanges): number {
338
338
  */
339
339
  export function applyTaskStatusUpdate(task: PlanTask, status: TaskStatus): void {
340
340
  const now = Date.now();
341
+ // Rework detection: completed/failed → in_progress means a fix iteration
342
+ if (status === 'in_progress' && (task.status === 'completed' || task.status === 'failed')) {
343
+ task.fixIterations = (task.fixIterations ?? 0) + 1;
344
+ task.completedAt = undefined;
345
+ }
341
346
  if (status === 'in_progress' && !task.startedAt) task.startedAt = now;
342
347
  if (status === 'completed' || status === 'skipped' || status === 'failed') {
343
348
  task.completedAt = now;
@@ -88,6 +88,8 @@ export interface PlanTask {
88
88
  deliverables?: TaskDeliverable[];
89
89
  /** Verification findings for tasks that modify existing code. Advisory only. */
90
90
  verification?: TaskVerification;
91
+ /** Number of times this task was sent back for rework (completed → in_progress). */
92
+ fixIterations?: number;
91
93
  updatedAt: number;
92
94
  }
93
95
 
@@ -75,6 +75,20 @@ vi.mock('../planning/impact-analyzer.js', () => ({
75
75
  })),
76
76
  }));
77
77
 
78
+ vi.mock('../planning/evidence-collector.js', () => ({
79
+ collectGitEvidence: vi.fn().mockReturnValue({
80
+ planId: 'plan-1',
81
+ planObjective: 'test',
82
+ accuracy: 85,
83
+ evidenceSources: ['git'],
84
+ taskEvidence: [],
85
+ unplannedChanges: [],
86
+ missingWork: [],
87
+ verificationGaps: [],
88
+ summary: '0/0 tasks verified by git evidence',
89
+ }),
90
+ }));
91
+
78
92
  // ---------------------------------------------------------------------------
79
93
  // Mock runtime
80
94
  // ---------------------------------------------------------------------------
@@ -88,7 +102,9 @@ function mockRuntime(): AgentRuntime {
88
102
  stats: vi.fn().mockReturnValue({ totalEntries: 10, byDomain: {}, byType: {} }),
89
103
  captureMemory: vi.fn(),
90
104
  },
91
- brain: {},
105
+ brain: {
106
+ recordFeedback: vi.fn(),
107
+ },
92
108
  brainIntelligence: {
93
109
  recommend: vi.fn().mockReturnValue([]),
94
110
  lifecycle: vi.fn().mockReturnValue({ id: 'session-1' }),
@@ -318,6 +334,67 @@ describe('createOrchestrateOps', () => {
318
334
  );
319
335
  expect(result.session).toBeDefined();
320
336
  });
337
+
338
+ it('includes evidenceReport when completing a plan', async () => {
339
+ const op = findOp(ops, 'orchestrate_complete');
340
+ const result = (await op.handler({
341
+ planId: 'plan-1',
342
+ sessionId: 'session-1',
343
+ outcome: 'completed',
344
+ projectPath: '.',
345
+ })) as Record<string, unknown>;
346
+
347
+ expect(result).toHaveProperty('evidenceReport');
348
+ const report = result.evidenceReport as Record<string, unknown>;
349
+ expect(report.accuracy).toBe(85);
350
+ expect(report.evidenceSources).toEqual(['git']);
351
+ });
352
+
353
+ it('succeeds without blocking when git is unavailable', async () => {
354
+ const { collectGitEvidence } = await import('../planning/evidence-collector.js');
355
+ vi.mocked(collectGitEvidence).mockImplementationOnce(() => {
356
+ throw new Error('git not found');
357
+ });
358
+
359
+ const op = findOp(ops, 'orchestrate_complete');
360
+ const result = (await op.handler({
361
+ planId: 'plan-1',
362
+ sessionId: 'session-1',
363
+ outcome: 'completed',
364
+ })) as Record<string, unknown>;
365
+
366
+ // Should complete successfully without evidenceReport
367
+ expect(result).toHaveProperty('plan');
368
+ expect(result).toHaveProperty('session');
369
+ expect(result).not.toHaveProperty('evidenceReport');
370
+ });
371
+
372
+ it('adds warning when evidence accuracy is below 50%', async () => {
373
+ const { collectGitEvidence } = await import('../planning/evidence-collector.js');
374
+ vi.mocked(collectGitEvidence).mockReturnValueOnce({
375
+ planId: 'plan-1',
376
+ planObjective: 'test',
377
+ accuracy: 30,
378
+ evidenceSources: ['git'],
379
+ taskEvidence: [],
380
+ unplannedChanges: [],
381
+ missingWork: [],
382
+ verificationGaps: [],
383
+ summary: '0/2 tasks verified by git evidence',
384
+ });
385
+
386
+ const op = findOp(ops, 'orchestrate_complete');
387
+ const result = (await op.handler({
388
+ planId: 'plan-1',
389
+ sessionId: 'session-1',
390
+ outcome: 'completed',
391
+ })) as Record<string, unknown>;
392
+
393
+ expect(result).toHaveProperty('evidenceReport');
394
+ expect(result).toHaveProperty('warnings');
395
+ const warnings = result.warnings as string[];
396
+ expect(warnings.some((w) => w.includes('Low evidence accuracy (30%)'))).toBe(true);
397
+ });
321
398
  });
322
399
 
323
400
  // ─── orchestrate_status ───────────────────────────────────────
@@ -38,7 +38,10 @@ import {
38
38
  import { detectRationalizations } from '../planning/rationalization-detector.js';
39
39
  import { ImpactAnalyzer } from '../planning/impact-analyzer.js';
40
40
  import type { ImpactReport } from '../planning/impact-analyzer.js';
41
+ import { collectGitEvidence } from '../planning/evidence-collector.js';
42
+ import type { EvidenceReport } from '../planning/evidence-collector.js';
41
43
  import { recordPlanFeedback } from './plan-feedback-helper.js';
44
+ import { analyzeQualitySignals, captureQualitySignals } from './quality-signals.js';
42
45
 
43
46
  // ---------------------------------------------------------------------------
44
47
  // Intent detection — keyword-based mapping from prompt to intent
@@ -741,10 +744,30 @@ export function createOrchestrateOps(
741
744
  }
742
745
  }
743
746
 
747
+ const warnings: string[] = [];
748
+
749
+ // Evidence-based reconciliation: cross-reference plan tasks against git diff
750
+ let evidenceReport: EvidenceReport | null = null;
751
+ if (planObj && outcome === 'completed') {
752
+ try {
753
+ evidenceReport = collectGitEvidence(
754
+ planObj,
755
+ (params.projectPath as string) ?? '.',
756
+ 'main',
757
+ );
758
+ if (evidenceReport.accuracy < 50) {
759
+ warnings.push(
760
+ `Low evidence accuracy (${evidenceReport.accuracy}%) — plan tasks may not match git changes.`,
761
+ );
762
+ }
763
+ } catch {
764
+ // Evidence collection is best-effort — never blocks
765
+ }
766
+ }
767
+
744
768
  // Complete the planner plan (legacy lifecycle) — best-effort
745
769
  // The epilogue (brain session, knowledge extraction, flow epilogue) MUST run
746
770
  // even if plan transition fails (e.g. already completed, missing, invalid state).
747
- const warnings: string[] = [];
748
771
  let completedPlan;
749
772
  if (planObj && planId) {
750
773
  try {
@@ -788,6 +811,33 @@ export function createOrchestrateOps(
788
811
  }
789
812
  }
790
813
 
814
+ // Feed evidence accuracy into brain feedback — low accuracy signals poor pattern match
815
+ if (evidenceReport && planObj) {
816
+ try {
817
+ const evidenceAction = evidenceReport.accuracy < 50 ? 'dismissed' : 'accepted';
818
+ brain.recordFeedback(`plan-evidence:${planObj.objective}`, planObj.id, evidenceAction);
819
+ } catch {
820
+ // Evidence brain feedback is best-effort
821
+ }
822
+ }
823
+
824
+ // Quality signals: capture rework anti-patterns and clean-task feedback
825
+ if (evidenceReport) {
826
+ try {
827
+ const qualityAnalysis = analyzeQualitySignals(evidenceReport, planObj);
828
+ if (qualityAnalysis.antiPatterns.length > 0 || qualityAnalysis.cleanTasks.length > 0) {
829
+ captureQualitySignals(
830
+ qualityAnalysis,
831
+ vault,
832
+ brain,
833
+ planId ?? `direct-${Date.now()}`,
834
+ );
835
+ }
836
+ } catch {
837
+ // Quality signal capture is best-effort — never blocks completion
838
+ }
839
+ }
840
+
791
841
  // Extract knowledge — runs regardless of plan existence
792
842
  let extraction = null;
793
843
  try {
@@ -840,6 +890,7 @@ export function createOrchestrateOps(
840
890
  extraction,
841
891
  epilogue: epilogueResult,
842
892
  ...(impactReport ? { impactAnalysis: impactReport } : {}),
893
+ ...(evidenceReport ? { evidenceReport } : {}),
843
894
  ...(warnings.length > 0 ? { warnings } : {}),
844
895
  };
845
896
  },
@@ -890,6 +941,44 @@ export function createOrchestrateOps(
890
941
  createdAt: e.createdAt,
891
942
  }));
892
943
 
944
+ // Compute readiness for the most recent active plan
945
+ const TERMINAL_TASK_STATES = new Set(['completed', 'skipped', 'failed']);
946
+ let readiness: {
947
+ allTasksTerminal: boolean;
948
+ terminalCount: number;
949
+ totalCount: number;
950
+ idleSince: number | null;
951
+ } | null = null;
952
+
953
+ const executingPlans = activePlans.filter(
954
+ (p: { status: string }) => p.status === 'executing',
955
+ );
956
+ if (executingPlans.length > 0) {
957
+ const plan = executingPlans[0] as {
958
+ tasks?: Array<{ status: string; completedAt?: number; startedAt?: number }>;
959
+ updatedAt?: number;
960
+ };
961
+ const tasks = plan.tasks ?? [];
962
+ const totalCount = tasks.length;
963
+ const terminalCount = tasks.filter((t) => TERMINAL_TASK_STATES.has(t.status)).length;
964
+ const allTasksTerminal = totalCount > 0 && terminalCount === totalCount;
965
+
966
+ // idleSince: the most recent completedAt among terminal tasks, or plan updatedAt
967
+ let idleSince: number | null = null;
968
+ if (totalCount > 0 && !allTasksTerminal) {
969
+ const terminalTimestamps = tasks
970
+ .filter((t) => TERMINAL_TASK_STATES.has(t.status) && t.completedAt)
971
+ .map((t) => t.completedAt as number);
972
+ if (terminalTimestamps.length > 0) {
973
+ idleSince = Math.max(...terminalTimestamps);
974
+ } else if (plan.updatedAt) {
975
+ idleSince = plan.updatedAt;
976
+ }
977
+ }
978
+
979
+ readiness = { allTasksTerminal, terminalCount, totalCount, idleSince };
980
+ }
981
+
893
982
  return {
894
983
  activePlans,
895
984
  sessionContext,
@@ -897,6 +986,7 @@ export function createOrchestrateOps(
897
986
  recommendations,
898
987
  brainStats,
899
988
  flowPlans,
989
+ ...(readiness ? { readiness } : {}),
900
990
  };
901
991
  },
902
992
  },
@@ -0,0 +1,162 @@
1
+ /**
2
+ * Tests for orchestrate_status readiness field.
3
+ *
4
+ * Validates that orchestrate_status computes readiness
5
+ * based on the active plan's task states.
6
+ */
7
+
8
+ import { describe, it, expect, beforeEach, afterEach } from 'vitest';
9
+ import { mkdirSync, rmSync } from 'node:fs';
10
+ import { join } from 'node:path';
11
+ import { tmpdir } from 'node:os';
12
+ import { createOrchestrateOps } from './orchestrate-ops.js';
13
+ import { captureOps } from '../engine/test-helpers.js';
14
+ import { createAgentRuntime } from './runtime.js';
15
+ import type { AgentRuntime } from './types.js';
16
+
17
+ let runtime: AgentRuntime;
18
+ let tempDir: string;
19
+
20
+ beforeEach(() => {
21
+ tempDir = join(tmpdir(), `readiness-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
22
+ mkdirSync(tempDir, { recursive: true });
23
+ runtime = createAgentRuntime({
24
+ agentId: 'test-readiness',
25
+ vaultPath: ':memory:',
26
+ plansPath: join(tempDir, 'plans.json'),
27
+ });
28
+ });
29
+
30
+ afterEach(() => {
31
+ runtime.close();
32
+ rmSync(tempDir, { recursive: true, force: true });
33
+ });
34
+
35
+ /** Helper: call the orchestrate_status handler directly. */
36
+ async function callStatus(rt: AgentRuntime): Promise<Record<string, unknown>> {
37
+ const ops = captureOps(createOrchestrateOps(rt));
38
+ const op = ops.get('orchestrate_status')!;
39
+ return (await op.handler({})) as Record<string, unknown>;
40
+ }
41
+
42
+ /** Helper: create an executing plan with N tasks, return plan + task IDs. */
43
+ function createExecutingPlan(
44
+ rt: AgentRuntime,
45
+ tasks: Array<{ title: string; description: string }>,
46
+ ) {
47
+ const plan = rt.planner.create({
48
+ objective: 'Test plan',
49
+ scope: 'test',
50
+ decisions: [],
51
+ tasks: [],
52
+ });
53
+ rt.planner.approve(plan.id);
54
+ rt.planner.splitTasks(plan.id, tasks);
55
+ rt.planner.startExecution(plan.id);
56
+ const executing = rt.planner.get(plan.id)!;
57
+ return { planId: plan.id, tasks: executing.tasks };
58
+ }
59
+
60
+ describe('orchestrate_status readiness', () => {
61
+ it('returns no readiness when there are no executing plans', async () => {
62
+ const data = await callStatus(runtime);
63
+ expect(data.readiness).toBeUndefined();
64
+ });
65
+
66
+ it('returns readiness with allTasksTerminal=true when all tasks are done', async () => {
67
+ const { planId, tasks } = createExecutingPlan(runtime, [
68
+ { title: 'Task A', description: 'Do A' },
69
+ { title: 'Task B', description: 'Do B' },
70
+ ]);
71
+
72
+ for (const task of tasks) {
73
+ runtime.planner.updateTask(planId, task.id, 'completed');
74
+ }
75
+
76
+ const data = await callStatus(runtime);
77
+ const readiness = data.readiness as {
78
+ allTasksTerminal: boolean;
79
+ terminalCount: number;
80
+ totalCount: number;
81
+ idleSince: number | null;
82
+ };
83
+
84
+ expect(readiness).toBeDefined();
85
+ expect(readiness.allTasksTerminal).toBe(true);
86
+ expect(readiness.terminalCount).toBe(2);
87
+ expect(readiness.totalCount).toBe(2);
88
+ expect(readiness.idleSince).toBeNull();
89
+ });
90
+
91
+ it('returns readiness with mixed task states', async () => {
92
+ const { planId, tasks } = createExecutingPlan(runtime, [
93
+ { title: 'Task X', description: 'Do X' },
94
+ { title: 'Task Y', description: 'Do Y' },
95
+ { title: 'Task Z', description: 'Do Z' },
96
+ ]);
97
+
98
+ runtime.planner.updateTask(planId, tasks[0].id, 'completed');
99
+ runtime.planner.updateTask(planId, tasks[1].id, 'skipped');
100
+ // tasks[2] remains pending
101
+
102
+ const data = await callStatus(runtime);
103
+ const readiness = data.readiness as {
104
+ allTasksTerminal: boolean;
105
+ terminalCount: number;
106
+ totalCount: number;
107
+ idleSince: number | null;
108
+ };
109
+
110
+ expect(readiness).toBeDefined();
111
+ expect(readiness.allTasksTerminal).toBe(false);
112
+ expect(readiness.terminalCount).toBe(2);
113
+ expect(readiness.totalCount).toBe(3);
114
+ });
115
+
116
+ it('includes failed tasks in terminal count', async () => {
117
+ const { planId, tasks } = createExecutingPlan(runtime, [
118
+ { title: 'Task F', description: 'Fail' },
119
+ ]);
120
+
121
+ runtime.planner.updateTask(planId, tasks[0].id, 'failed');
122
+
123
+ const data = await callStatus(runtime);
124
+ const readiness = data.readiness as {
125
+ allTasksTerminal: boolean;
126
+ terminalCount: number;
127
+ totalCount: number;
128
+ idleSince: number | null;
129
+ };
130
+
131
+ expect(readiness).toBeDefined();
132
+ expect(readiness.allTasksTerminal).toBe(true);
133
+ expect(readiness.terminalCount).toBe(1);
134
+ expect(readiness.totalCount).toBe(1);
135
+ });
136
+
137
+ it('computes idleSince from last terminal task timestamp', async () => {
138
+ const { planId, tasks } = createExecutingPlan(runtime, [
139
+ { title: 'Done', description: 'Already done' },
140
+ { title: 'Pending', description: 'Still pending' },
141
+ ]);
142
+
143
+ runtime.planner.updateTask(planId, tasks[0].id, 'completed');
144
+ // tasks[1] remains pending
145
+
146
+ const data = await callStatus(runtime);
147
+ const readiness = data.readiness as {
148
+ allTasksTerminal: boolean;
149
+ terminalCount: number;
150
+ totalCount: number;
151
+ idleSince: number | null;
152
+ };
153
+
154
+ expect(readiness).toBeDefined();
155
+ expect(readiness.allTasksTerminal).toBe(false);
156
+ expect(readiness.terminalCount).toBe(1);
157
+ expect(readiness.totalCount).toBe(2);
158
+ // idleSince should be set (either from completedAt or updatedAt)
159
+ expect(readiness.idleSince).not.toBeNull();
160
+ expect(typeof readiness.idleSince).toBe('number');
161
+ });
162
+ });