@soleri/core 9.3.0 → 9.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/engine/module-manifest.d.ts +2 -0
- package/dist/engine/module-manifest.d.ts.map +1 -1
- package/dist/engine/module-manifest.js +115 -0
- package/dist/engine/module-manifest.js.map +1 -1
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/planning/task-complexity-assessor.d.ts +42 -0
- package/dist/planning/task-complexity-assessor.d.ts.map +1 -0
- package/dist/planning/task-complexity-assessor.js +132 -0
- package/dist/planning/task-complexity-assessor.js.map +1 -0
- package/dist/runtime/admin-ops.d.ts.map +1 -1
- package/dist/runtime/admin-ops.js +18 -0
- package/dist/runtime/admin-ops.js.map +1 -1
- package/dist/runtime/orchestrate-ops.d.ts.map +1 -1
- package/dist/runtime/orchestrate-ops.js +43 -32
- package/dist/runtime/orchestrate-ops.js.map +1 -1
- package/package.json +1 -1
- package/src/engine/module-manifest.test.ts +43 -0
- package/src/engine/module-manifest.ts +117 -0
- package/src/index.ts +8 -0
- package/src/planning/task-complexity-assessor.test.ts +298 -0
- package/src/planning/task-complexity-assessor.ts +183 -0
- package/src/runtime/admin-ops.test.ts +23 -0
- package/src/runtime/admin-ops.ts +19 -0
- package/src/runtime/orchestrate-ops.test.ts +204 -0
- package/src/runtime/orchestrate-ops.ts +49 -38
- package/src/vault/vault-scaling.test.ts +5 -5
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
2
2
|
import { createOrchestrateOps } from './orchestrate-ops.js';
|
|
3
|
+
import { assessTaskComplexity } from '../planning/task-complexity-assessor.js';
|
|
3
4
|
import type { AgentRuntime } from './types.js';
|
|
4
5
|
|
|
5
6
|
// ---------------------------------------------------------------------------
|
|
@@ -241,6 +242,73 @@ describe('createOrchestrateOps', () => {
|
|
|
241
242
|
await op.handler({ planId: 'plan-1', sessionId: 'session-1' });
|
|
242
243
|
expect(rt.brainIntelligence.extractKnowledge).toHaveBeenCalledWith('session-1');
|
|
243
244
|
});
|
|
245
|
+
|
|
246
|
+
it('works without a preceding plan', async () => {
|
|
247
|
+
const op = findOp(ops, 'orchestrate_complete');
|
|
248
|
+
const result = (await op.handler({
|
|
249
|
+
sessionId: 'session-1',
|
|
250
|
+
outcome: 'completed',
|
|
251
|
+
summary: 'Fixed a typo in the README',
|
|
252
|
+
})) as Record<string, unknown>;
|
|
253
|
+
|
|
254
|
+
// Should not call planner.complete
|
|
255
|
+
expect(rt.planner.complete).not.toHaveBeenCalled();
|
|
256
|
+
|
|
257
|
+
// Should return a lightweight completion record
|
|
258
|
+
const plan = result.plan as Record<string, unknown>;
|
|
259
|
+
expect(plan.status).toBe('completed');
|
|
260
|
+
expect(plan.objective).toBe('Fixed a typo in the README');
|
|
261
|
+
});
|
|
262
|
+
|
|
263
|
+
it('captures knowledge even without plan', async () => {
|
|
264
|
+
const op = findOp(ops, 'orchestrate_complete');
|
|
265
|
+
await op.handler({
|
|
266
|
+
sessionId: 'session-1',
|
|
267
|
+
summary: 'Refactored utility function',
|
|
268
|
+
});
|
|
269
|
+
|
|
270
|
+
// Brain session end and knowledge extraction still run
|
|
271
|
+
expect(rt.brainIntelligence.lifecycle).toHaveBeenCalledWith(
|
|
272
|
+
expect.objectContaining({ action: 'end', sessionId: 'session-1' }),
|
|
273
|
+
);
|
|
274
|
+
expect(rt.brainIntelligence.extractKnowledge).toHaveBeenCalledWith('session-1');
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
it('skips anti-rationalization gate when no criteria', async () => {
|
|
278
|
+
const { detectRationalizations } = await import('../planning/rationalization-detector.js');
|
|
279
|
+
const op = findOp(ops, 'orchestrate_complete');
|
|
280
|
+
|
|
281
|
+
await op.handler({
|
|
282
|
+
sessionId: 'session-1',
|
|
283
|
+
outcome: 'completed',
|
|
284
|
+
summary: 'This was basically done already',
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
// detectRationalizations should never be called since there are no criteria
|
|
288
|
+
expect(detectRationalizations).not.toHaveBeenCalled();
|
|
289
|
+
// Should still complete successfully
|
|
290
|
+
expect(rt.brainIntelligence.lifecycle).toHaveBeenCalled();
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
it('still runs brain session end without plan', async () => {
|
|
294
|
+
const op = findOp(ops, 'orchestrate_complete');
|
|
295
|
+
const result = (await op.handler({
|
|
296
|
+
sessionId: 'session-1',
|
|
297
|
+
outcome: 'completed',
|
|
298
|
+
toolsUsed: ['grep', 'edit'],
|
|
299
|
+
filesModified: [],
|
|
300
|
+
})) as Record<string, unknown>;
|
|
301
|
+
|
|
302
|
+
expect(rt.brainIntelligence.lifecycle).toHaveBeenCalledWith(
|
|
303
|
+
expect.objectContaining({
|
|
304
|
+
action: 'end',
|
|
305
|
+
sessionId: 'session-1',
|
|
306
|
+
planOutcome: 'completed',
|
|
307
|
+
toolsUsed: ['grep', 'edit'],
|
|
308
|
+
}),
|
|
309
|
+
);
|
|
310
|
+
expect(result.session).toBeDefined();
|
|
311
|
+
});
|
|
244
312
|
});
|
|
245
313
|
|
|
246
314
|
// ─── orchestrate_status ───────────────────────────────────────
|
|
@@ -299,4 +367,140 @@ describe('createOrchestrateOps', () => {
|
|
|
299
367
|
await expect(op.handler({ planId: 'missing' })).rejects.toThrow('not found');
|
|
300
368
|
});
|
|
301
369
|
});
|
|
370
|
+
|
|
371
|
+
// ─── task auto-assessment routing ────────────────────────────
|
|
372
|
+
//
|
|
373
|
+
// Integration-style tests that verify the full assess → route → complete flow:
|
|
374
|
+
// 1. Use TaskComplexityAssessor to classify the task
|
|
375
|
+
// 2. Route to direct execution (simple) or planning (complex)
|
|
376
|
+
// 3. Complete via orchestrate_complete in both paths
|
|
377
|
+
|
|
378
|
+
describe('task auto-assessment routing', () => {
|
|
379
|
+
it('simple task routes to direct execution + complete', async () => {
|
|
380
|
+
// Step 1: Assess — "fix typo in README" should be simple
|
|
381
|
+
const assessment = assessTaskComplexity({ prompt: 'fix typo in README' });
|
|
382
|
+
expect(assessment.classification).toBe('simple');
|
|
383
|
+
|
|
384
|
+
// Step 2: Skip planning, go straight to complete without a planId
|
|
385
|
+
const completeOp = findOp(ops, 'orchestrate_complete');
|
|
386
|
+
const result = (await completeOp.handler({
|
|
387
|
+
sessionId: 'session-simple',
|
|
388
|
+
outcome: 'completed',
|
|
389
|
+
summary: 'Fixed typo in README',
|
|
390
|
+
})) as Record<string, unknown>;
|
|
391
|
+
|
|
392
|
+
// Should not touch the planner at all
|
|
393
|
+
expect(rt.planner.complete).not.toHaveBeenCalled();
|
|
394
|
+
|
|
395
|
+
// Should still produce a valid completion record
|
|
396
|
+
const plan = result.plan as Record<string, unknown>;
|
|
397
|
+
expect(plan.status).toBe('completed');
|
|
398
|
+
expect(plan.objective).toBe('Fixed typo in README');
|
|
399
|
+
|
|
400
|
+
// Knowledge should still be captured
|
|
401
|
+
expect(rt.brainIntelligence.extractKnowledge).toHaveBeenCalledWith('session-simple');
|
|
402
|
+
});
|
|
403
|
+
|
|
404
|
+
it('complex task routes through planning + complete', async () => {
|
|
405
|
+
// Step 1: Assess — cross-cutting auth task should be complex
|
|
406
|
+
const assessment = assessTaskComplexity({
|
|
407
|
+
prompt: 'add authentication across all API routes',
|
|
408
|
+
filesEstimated: 8,
|
|
409
|
+
});
|
|
410
|
+
expect(assessment.classification).toBe('complex');
|
|
411
|
+
|
|
412
|
+
// Step 2: Create a plan via orchestrate_plan
|
|
413
|
+
const planOp = findOp(ops, 'orchestrate_plan');
|
|
414
|
+
const planResult = (await planOp.handler({
|
|
415
|
+
prompt: 'add authentication across all API routes',
|
|
416
|
+
})) as Record<string, unknown>;
|
|
417
|
+
expect(planResult).toHaveProperty('plan');
|
|
418
|
+
expect(planResult).toHaveProperty('flow');
|
|
419
|
+
|
|
420
|
+
// Step 3: Complete with the planId
|
|
421
|
+
const completeOp = findOp(ops, 'orchestrate_complete');
|
|
422
|
+
const result = (await completeOp.handler({
|
|
423
|
+
planId: 'plan-1',
|
|
424
|
+
sessionId: 'session-complex',
|
|
425
|
+
outcome: 'completed',
|
|
426
|
+
summary: 'Added authentication middleware to all API routes',
|
|
427
|
+
})) as Record<string, unknown>;
|
|
428
|
+
|
|
429
|
+
// Should complete via the planner lifecycle
|
|
430
|
+
expect(rt.planner.complete).toHaveBeenCalledWith('plan-1');
|
|
431
|
+
|
|
432
|
+
// Knowledge should be captured
|
|
433
|
+
expect(rt.brainIntelligence.lifecycle).toHaveBeenCalledWith(
|
|
434
|
+
expect.objectContaining({ action: 'end', sessionId: 'session-complex' }),
|
|
435
|
+
);
|
|
436
|
+
expect(rt.brainIntelligence.extractKnowledge).toHaveBeenCalledWith('session-complex');
|
|
437
|
+
|
|
438
|
+
// Plan should be marked completed
|
|
439
|
+
const completedPlan = result.plan as Record<string, unknown>;
|
|
440
|
+
expect(completedPlan.status).toBe('completed');
|
|
441
|
+
});
|
|
442
|
+
|
|
443
|
+
it('orchestrate_complete captures knowledge in both paths', async () => {
|
|
444
|
+
const completeOp = findOp(ops, 'orchestrate_complete');
|
|
445
|
+
|
|
446
|
+
// ── Simple path (no planId) ──
|
|
447
|
+
vi.clearAllMocks();
|
|
448
|
+
rt = mockRuntime();
|
|
449
|
+
ops = createOrchestrateOps(rt);
|
|
450
|
+
|
|
451
|
+
await findOp(ops, 'orchestrate_complete').handler({
|
|
452
|
+
sessionId: 'session-simple',
|
|
453
|
+
outcome: 'completed',
|
|
454
|
+
summary: 'Renamed a variable',
|
|
455
|
+
});
|
|
456
|
+
|
|
457
|
+
// Brain session end called
|
|
458
|
+
expect(rt.brainIntelligence.lifecycle).toHaveBeenCalledWith(
|
|
459
|
+
expect.objectContaining({ action: 'end', sessionId: 'session-simple' }),
|
|
460
|
+
);
|
|
461
|
+
// Knowledge extraction called
|
|
462
|
+
expect(rt.brainIntelligence.extractKnowledge).toHaveBeenCalledWith('session-simple');
|
|
463
|
+
// Planner.complete NOT called (no plan)
|
|
464
|
+
expect(rt.planner.complete).not.toHaveBeenCalled();
|
|
465
|
+
|
|
466
|
+
// ── Complex path (with planId) ──
|
|
467
|
+
vi.clearAllMocks();
|
|
468
|
+
rt = mockRuntime();
|
|
469
|
+
ops = createOrchestrateOps(rt);
|
|
470
|
+
|
|
471
|
+
await findOp(ops, 'orchestrate_complete').handler({
|
|
472
|
+
planId: 'plan-1',
|
|
473
|
+
sessionId: 'session-complex',
|
|
474
|
+
outcome: 'completed',
|
|
475
|
+
summary: 'Implemented full auth layer',
|
|
476
|
+
});
|
|
477
|
+
|
|
478
|
+
// Brain session end called
|
|
479
|
+
expect(rt.brainIntelligence.lifecycle).toHaveBeenCalledWith(
|
|
480
|
+
expect.objectContaining({ action: 'end', sessionId: 'session-complex' }),
|
|
481
|
+
);
|
|
482
|
+
// Knowledge extraction called
|
|
483
|
+
expect(rt.brainIntelligence.extractKnowledge).toHaveBeenCalledWith('session-complex');
|
|
484
|
+
// Planner.complete IS called (has plan)
|
|
485
|
+
expect(rt.planner.complete).toHaveBeenCalledWith('plan-1');
|
|
486
|
+
});
|
|
487
|
+
|
|
488
|
+
it('assessment result includes non-empty reasoning for simple tasks', () => {
|
|
489
|
+
const result = assessTaskComplexity({ prompt: 'fix typo in README' });
|
|
490
|
+
expect(result.classification).toBe('simple');
|
|
491
|
+
expect(typeof result.reasoning).toBe('string');
|
|
492
|
+
expect(result.reasoning.length).toBeGreaterThan(0);
|
|
493
|
+
});
|
|
494
|
+
|
|
495
|
+
it('assessment result includes non-empty reasoning for complex tasks', () => {
|
|
496
|
+
const result = assessTaskComplexity({
|
|
497
|
+
prompt: 'add authentication across all API routes',
|
|
498
|
+
filesEstimated: 8,
|
|
499
|
+
domains: ['auth', 'api', 'middleware'],
|
|
500
|
+
});
|
|
501
|
+
expect(result.classification).toBe('complex');
|
|
502
|
+
expect(typeof result.reasoning).toBe('string');
|
|
503
|
+
expect(result.reasoning.length).toBeGreaterThan(0);
|
|
504
|
+
});
|
|
505
|
+
});
|
|
302
506
|
});
|
|
@@ -472,7 +472,7 @@ export function createOrchestrateOps(
|
|
|
472
472
|
'end brain session, and clean up.',
|
|
473
473
|
auth: 'write',
|
|
474
474
|
schema: z.object({
|
|
475
|
-
planId: z.string().describe('ID of the executing plan to complete'),
|
|
475
|
+
planId: z.string().optional().describe('ID of the executing plan to complete (optional for direct tasks)'),
|
|
476
476
|
sessionId: z.string().describe('ID of the brain session to end'),
|
|
477
477
|
outcome: z
|
|
478
478
|
.enum(['completed', 'abandoned', 'partial'])
|
|
@@ -497,7 +497,7 @@ export function createOrchestrateOps(
|
|
|
497
497
|
.describe('Set true to bypass rationalization gate and impact warnings after review'),
|
|
498
498
|
}),
|
|
499
499
|
handler: async (params) => {
|
|
500
|
-
const planId = params.planId as string;
|
|
500
|
+
const planId = params.planId as string | undefined;
|
|
501
501
|
const sessionId = params.sessionId as string;
|
|
502
502
|
const outcome = (params.outcome as string) ?? 'completed';
|
|
503
503
|
const completionSummary = (params.summary as string) ?? '';
|
|
@@ -505,20 +505,21 @@ export function createOrchestrateOps(
|
|
|
505
505
|
const filesModified = (params.filesModified as string[]) ?? [];
|
|
506
506
|
const overrideRationalization = (params.overrideRationalization as boolean) ?? false;
|
|
507
507
|
|
|
508
|
-
//
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
508
|
+
// Look up plan — optional for direct tasks that skipped planning
|
|
509
|
+
const planObj = planId ? planner.get(planId) : null;
|
|
510
|
+
|
|
511
|
+
// Anti-rationalization gate: only if we have acceptance criteria from a plan
|
|
512
|
+
const criteria = planObj && planId ? collectAcceptanceCriteria(planner, planId) : [];
|
|
513
|
+
if (outcome === 'completed' && criteria.length > 0 && completionSummary && !overrideRationalization) {
|
|
514
|
+
const report = detectRationalizations(criteria, completionSummary);
|
|
515
|
+
if (report.detected) {
|
|
516
|
+
captureRationalizationAntiPattern(vault, report);
|
|
517
|
+
return {
|
|
518
|
+
blocked: true,
|
|
519
|
+
reason: 'Rationalization language detected in completion summary',
|
|
520
|
+
rationalization: report,
|
|
521
|
+
hint: 'Address the unmet criteria, or set overrideRationalization: true to bypass this gate.',
|
|
522
|
+
};
|
|
522
523
|
}
|
|
523
524
|
}
|
|
524
525
|
|
|
@@ -527,7 +528,6 @@ export function createOrchestrateOps(
|
|
|
527
528
|
if (filesModified.length > 0) {
|
|
528
529
|
try {
|
|
529
530
|
const analyzer = new ImpactAnalyzer();
|
|
530
|
-
const planObj = planner.get(planId);
|
|
531
531
|
const scopeHints = planObj?.scope ? [planObj.scope] : undefined;
|
|
532
532
|
impactReport = analyzer.analyzeImpact(
|
|
533
533
|
filesModified,
|
|
@@ -549,10 +549,19 @@ export function createOrchestrateOps(
|
|
|
549
549
|
}
|
|
550
550
|
}
|
|
551
551
|
|
|
552
|
-
// Complete the planner plan (legacy lifecycle)
|
|
553
|
-
|
|
552
|
+
// Complete the planner plan (legacy lifecycle) — only if plan exists
|
|
553
|
+
let completedPlan;
|
|
554
|
+
if (planObj && planId) {
|
|
555
|
+
completedPlan = planner.complete(planId);
|
|
556
|
+
} else {
|
|
557
|
+
completedPlan = {
|
|
558
|
+
id: planId ?? `direct-${Date.now()}`,
|
|
559
|
+
status: 'completed',
|
|
560
|
+
objective: completionSummary || 'Direct execution',
|
|
561
|
+
};
|
|
562
|
+
}
|
|
554
563
|
|
|
555
|
-
// End brain session
|
|
564
|
+
// End brain session — runs regardless of plan existence
|
|
556
565
|
const session = brainIntelligence.lifecycle({
|
|
557
566
|
action: 'end',
|
|
558
567
|
sessionId,
|
|
@@ -562,7 +571,7 @@ export function createOrchestrateOps(
|
|
|
562
571
|
filesModified,
|
|
563
572
|
});
|
|
564
573
|
|
|
565
|
-
// Extract knowledge
|
|
574
|
+
// Extract knowledge — runs regardless of plan existence
|
|
566
575
|
let extraction = null;
|
|
567
576
|
try {
|
|
568
577
|
extraction = brainIntelligence.extractKnowledge(sessionId);
|
|
@@ -572,27 +581,29 @@ export function createOrchestrateOps(
|
|
|
572
581
|
|
|
573
582
|
// Run flow-engine epilogue if we have a flow plan
|
|
574
583
|
let epilogueResult = null;
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
584
|
+
if (planId) {
|
|
585
|
+
const entry = planStore.get(planId);
|
|
586
|
+
if (entry) {
|
|
587
|
+
try {
|
|
588
|
+
const dispatch = buildDispatch(agentId, runtime, facades);
|
|
589
|
+
const summary = `${outcome}: ${entry.plan.summary}. Tools: ${toolsUsed.join(', ') || 'none'}. Files: ${filesModified.join(', ') || 'none'}.`;
|
|
590
|
+
epilogueResult = await runEpilogue(
|
|
591
|
+
dispatch,
|
|
592
|
+
entry.plan.context.probes,
|
|
593
|
+
entry.plan.context.projectPath,
|
|
594
|
+
summary,
|
|
595
|
+
);
|
|
596
|
+
} catch {
|
|
597
|
+
// Epilogue is best-effort
|
|
598
|
+
}
|
|
589
599
|
|
|
590
|
-
|
|
591
|
-
|
|
600
|
+
// Clean up plan store
|
|
601
|
+
planStore.delete(planId);
|
|
602
|
+
}
|
|
592
603
|
}
|
|
593
604
|
|
|
594
605
|
return {
|
|
595
|
-
plan,
|
|
606
|
+
plan: completedPlan,
|
|
596
607
|
session,
|
|
597
608
|
extraction,
|
|
598
609
|
epilogue: epilogueResult,
|
|
@@ -92,7 +92,7 @@ describe('Vault Scaling — 10K entries', () => {
|
|
|
92
92
|
expect(elapsed).toBeLessThan(50);
|
|
93
93
|
});
|
|
94
94
|
|
|
95
|
-
test('list with filters under
|
|
95
|
+
test('list with filters under 200ms at 10K', () => {
|
|
96
96
|
vault = new Vault(':memory:');
|
|
97
97
|
vault.seed(generateEntries(10_000));
|
|
98
98
|
|
|
@@ -101,7 +101,7 @@ describe('Vault Scaling — 10K entries', () => {
|
|
|
101
101
|
const elapsed = performance.now() - start;
|
|
102
102
|
|
|
103
103
|
expect(entries.length).toBeGreaterThan(0);
|
|
104
|
-
expect(elapsed).toBeLessThan(
|
|
104
|
+
expect(elapsed).toBeLessThan(200);
|
|
105
105
|
});
|
|
106
106
|
|
|
107
107
|
// ─── Stats performance ───────────────────────────────
|
|
@@ -223,7 +223,7 @@ describe('Vault Scaling — 10K entries', () => {
|
|
|
223
223
|
|
|
224
224
|
// ─── Tags and domains at scale ────────────────────────
|
|
225
225
|
|
|
226
|
-
test('getTags under
|
|
226
|
+
test('getTags under 500ms at 10K', () => {
|
|
227
227
|
vault = new Vault(':memory:');
|
|
228
228
|
vault.seed(generateEntries(10_000));
|
|
229
229
|
|
|
@@ -232,7 +232,7 @@ describe('Vault Scaling — 10K entries', () => {
|
|
|
232
232
|
const elapsed = performance.now() - start;
|
|
233
233
|
|
|
234
234
|
expect(tags.length).toBeGreaterThan(0);
|
|
235
|
-
expect(elapsed).toBeLessThan(
|
|
235
|
+
expect(elapsed).toBeLessThan(500);
|
|
236
236
|
});
|
|
237
237
|
|
|
238
238
|
test('getDomains under 10ms at 10K', () => {
|
|
@@ -244,7 +244,7 @@ describe('Vault Scaling — 10K entries', () => {
|
|
|
244
244
|
const elapsed = performance.now() - start;
|
|
245
245
|
|
|
246
246
|
expect(domains.length).toBe(DOMAINS.length);
|
|
247
|
-
expect(elapsed).toBeLessThan(
|
|
247
|
+
expect(elapsed).toBeLessThan(200);
|
|
248
248
|
});
|
|
249
249
|
|
|
250
250
|
// ─── FTS rebuild at scale ─────────────────────────────
|