cognitive-core 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +363 -2
- package/SKILL.md +193 -0
- package/dist/agents/index.d.ts +3 -0
- package/dist/agents/index.d.ts.map +1 -0
- package/dist/agents/index.js +5 -0
- package/dist/agents/index.js.map +1 -0
- package/dist/agents/mock-provider.d.ts +23 -0
- package/dist/agents/mock-provider.d.ts.map +1 -0
- package/dist/agents/mock-provider.js +71 -0
- package/dist/agents/mock-provider.js.map +1 -0
- package/dist/agents/types.d.ts +98 -0
- package/dist/agents/types.d.ts.map +1 -0
- package/dist/agents/types.js +44 -0
- package/dist/agents/types.js.map +1 -0
- package/dist/atlas.d.ts +196 -0
- package/dist/atlas.d.ts.map +1 -0
- package/dist/atlas.js +373 -0
- package/dist/atlas.js.map +1 -0
- package/dist/bin/cognitive-core.d.ts +18 -0
- package/dist/bin/cognitive-core.d.ts.map +1 -0
- package/dist/bin/cognitive-core.js +419 -0
- package/dist/bin/cognitive-core.js.map +1 -0
- package/dist/embeddings/bm25.d.ts +104 -0
- package/dist/embeddings/bm25.d.ts.map +1 -0
- package/dist/embeddings/bm25.js +264 -0
- package/dist/embeddings/bm25.js.map +1 -0
- package/dist/embeddings/index.d.ts +12 -0
- package/dist/embeddings/index.d.ts.map +1 -0
- package/dist/embeddings/index.js +16 -0
- package/dist/embeddings/index.js.map +1 -0
- package/dist/embeddings/manager.d.ts +112 -0
- package/dist/embeddings/manager.d.ts.map +1 -0
- package/dist/embeddings/manager.js +215 -0
- package/dist/embeddings/manager.js.map +1 -0
- package/dist/embeddings/provider.d.ts +101 -0
- package/dist/embeddings/provider.d.ts.map +1 -0
- package/dist/embeddings/provider.js +232 -0
- package/dist/embeddings/provider.js.map +1 -0
- package/dist/embeddings/vector-store.d.ts +101 -0
- package/dist/embeddings/vector-store.d.ts.map +1 -0
- package/dist/embeddings/vector-store.js +256 -0
- package/dist/embeddings/vector-store.js.map +1 -0
- package/dist/factory.d.ts +193 -0
- package/dist/factory.d.ts.map +1 -0
- package/dist/factory.js +109 -0
- package/dist/factory.js.map +1 -0
- package/dist/index.d.ts +43 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +84 -0
- package/dist/index.js.map +1 -0
- package/dist/learning/analyzer.d.ts +110 -0
- package/dist/learning/analyzer.d.ts.map +1 -0
- package/dist/learning/analyzer.js +213 -0
- package/dist/learning/analyzer.js.map +1 -0
- package/dist/learning/effectiveness.d.ts +158 -0
- package/dist/learning/effectiveness.d.ts.map +1 -0
- package/dist/learning/effectiveness.js +251 -0
- package/dist/learning/effectiveness.js.map +1 -0
- package/dist/learning/index.d.ts +8 -0
- package/dist/learning/index.d.ts.map +1 -0
- package/dist/learning/index.js +11 -0
- package/dist/learning/index.js.map +1 -0
- package/dist/learning/llm-extractor.d.ts +88 -0
- package/dist/learning/llm-extractor.d.ts.map +1 -0
- package/dist/learning/llm-extractor.js +372 -0
- package/dist/learning/llm-extractor.js.map +1 -0
- package/dist/learning/meta-learner.d.ts +80 -0
- package/dist/learning/meta-learner.d.ts.map +1 -0
- package/dist/learning/meta-learner.js +355 -0
- package/dist/learning/meta-learner.js.map +1 -0
- package/dist/learning/pipeline.d.ts +65 -0
- package/dist/learning/pipeline.d.ts.map +1 -0
- package/dist/learning/pipeline.js +170 -0
- package/dist/learning/pipeline.js.map +1 -0
- package/dist/learning/playbook-extractor.d.ts +113 -0
- package/dist/learning/playbook-extractor.d.ts.map +1 -0
- package/dist/learning/playbook-extractor.js +523 -0
- package/dist/learning/playbook-extractor.js.map +1 -0
- package/dist/learning/usage-inference.d.ts +82 -0
- package/dist/learning/usage-inference.d.ts.map +1 -0
- package/dist/learning/usage-inference.js +261 -0
- package/dist/learning/usage-inference.js.map +1 -0
- package/dist/mcp/index.d.ts +6 -0
- package/dist/mcp/index.d.ts.map +1 -0
- package/dist/mcp/index.js +6 -0
- package/dist/mcp/index.js.map +1 -0
- package/dist/mcp/playbook-server.d.ts +120 -0
- package/dist/mcp/playbook-server.d.ts.map +1 -0
- package/dist/mcp/playbook-server.js +427 -0
- package/dist/mcp/playbook-server.js.map +1 -0
- package/dist/memory/curated-loader.d.ts +62 -0
- package/dist/memory/curated-loader.d.ts.map +1 -0
- package/dist/memory/curated-loader.js +106 -0
- package/dist/memory/curated-loader.js.map +1 -0
- package/dist/memory/experience.d.ts +122 -0
- package/dist/memory/experience.d.ts.map +1 -0
- package/dist/memory/experience.js +392 -0
- package/dist/memory/experience.js.map +1 -0
- package/dist/memory/index.d.ts +6 -0
- package/dist/memory/index.d.ts.map +1 -0
- package/dist/memory/index.js +9 -0
- package/dist/memory/index.js.map +1 -0
- package/dist/memory/meta.d.ts +90 -0
- package/dist/memory/meta.d.ts.map +1 -0
- package/dist/memory/meta.js +362 -0
- package/dist/memory/meta.js.map +1 -0
- package/dist/memory/playbook.d.ts +133 -0
- package/dist/memory/playbook.d.ts.map +1 -0
- package/dist/memory/playbook.js +357 -0
- package/dist/memory/playbook.js.map +1 -0
- package/dist/memory/system.d.ts +167 -0
- package/dist/memory/system.d.ts.map +1 -0
- package/dist/memory/system.js +383 -0
- package/dist/memory/system.js.map +1 -0
- package/dist/runtime/backends/acp.d.ts +67 -0
- package/dist/runtime/backends/acp.d.ts.map +1 -0
- package/dist/runtime/backends/acp.js +290 -0
- package/dist/runtime/backends/acp.js.map +1 -0
- package/dist/runtime/backends/index.d.ts +5 -0
- package/dist/runtime/backends/index.d.ts.map +1 -0
- package/dist/runtime/backends/index.js +6 -0
- package/dist/runtime/backends/index.js.map +1 -0
- package/dist/runtime/backends/mock.d.ts +67 -0
- package/dist/runtime/backends/mock.d.ts.map +1 -0
- package/dist/runtime/backends/mock.js +153 -0
- package/dist/runtime/backends/mock.js.map +1 -0
- package/dist/runtime/backends/subprocess.d.ts +56 -0
- package/dist/runtime/backends/subprocess.d.ts.map +1 -0
- package/dist/runtime/backends/subprocess.js +260 -0
- package/dist/runtime/backends/subprocess.js.map +1 -0
- package/dist/runtime/flows/learning.d.ts +73 -0
- package/dist/runtime/flows/learning.d.ts.map +1 -0
- package/dist/runtime/flows/learning.js +116 -0
- package/dist/runtime/flows/learning.js.map +1 -0
- package/dist/runtime/flows/validation.d.ts +122 -0
- package/dist/runtime/flows/validation.d.ts.map +1 -0
- package/dist/runtime/flows/validation.js +223 -0
- package/dist/runtime/flows/validation.js.map +1 -0
- package/dist/runtime/index.d.ts +6 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +8 -0
- package/dist/runtime/index.js.map +1 -0
- package/dist/runtime/manager.d.ts +116 -0
- package/dist/runtime/manager.d.ts.map +1 -0
- package/dist/runtime/manager.js +416 -0
- package/dist/runtime/manager.js.map +1 -0
- package/dist/runtime/types.d.ts +138 -0
- package/dist/runtime/types.d.ts.map +1 -0
- package/dist/runtime/types.js +2 -0
- package/dist/runtime/types.js.map +1 -0
- package/dist/search/evaluator.d.ts +102 -0
- package/dist/search/evaluator.d.ts.map +1 -0
- package/dist/search/evaluator.js +352 -0
- package/dist/search/evaluator.js.map +1 -0
- package/dist/search/index.d.ts +7 -0
- package/dist/search/index.d.ts.map +1 -0
- package/dist/search/index.js +11 -0
- package/dist/search/index.js.map +1 -0
- package/dist/search/refinement-loop.d.ts +73 -0
- package/dist/search/refinement-loop.d.ts.map +1 -0
- package/dist/search/refinement-loop.js +245 -0
- package/dist/search/refinement-loop.js.map +1 -0
- package/dist/search/refinement-types.d.ts +154 -0
- package/dist/search/refinement-types.d.ts.map +1 -0
- package/dist/search/refinement-types.js +99 -0
- package/dist/search/refinement-types.js.map +1 -0
- package/dist/search/router.d.ts +61 -0
- package/dist/search/router.d.ts.map +1 -0
- package/dist/search/router.js +197 -0
- package/dist/search/router.js.map +1 -0
- package/dist/search/solver.d.ts +75 -0
- package/dist/search/solver.d.ts.map +1 -0
- package/dist/search/solver.js +216 -0
- package/dist/search/solver.js.map +1 -0
- package/dist/search/verification-runner.d.ts +125 -0
- package/dist/search/verification-runner.d.ts.map +1 -0
- package/dist/search/verification-runner.js +440 -0
- package/dist/search/verification-runner.js.map +1 -0
- package/dist/surfacing/index.d.ts +2 -0
- package/dist/surfacing/index.d.ts.map +1 -0
- package/dist/surfacing/index.js +2 -0
- package/dist/surfacing/index.js.map +1 -0
- package/dist/surfacing/skill-library.d.ts +158 -0
- package/dist/surfacing/skill-library.d.ts.map +1 -0
- package/dist/surfacing/skill-library.js +429 -0
- package/dist/surfacing/skill-library.js.map +1 -0
- package/dist/types/config.d.ts +1113 -0
- package/dist/types/config.d.ts.map +1 -0
- package/dist/types/config.js +274 -0
- package/dist/types/config.js.map +1 -0
- package/dist/types/index.d.ts +9 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +14 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/memory.d.ts +339 -0
- package/dist/types/memory.d.ts.map +1 -0
- package/dist/types/memory.js +207 -0
- package/dist/types/memory.js.map +1 -0
- package/dist/types/meta.d.ts +146 -0
- package/dist/types/meta.d.ts.map +1 -0
- package/dist/types/meta.js +51 -0
- package/dist/types/meta.js.map +1 -0
- package/dist/types/outcome.d.ts +42 -0
- package/dist/types/outcome.d.ts.map +1 -0
- package/dist/types/outcome.js +50 -0
- package/dist/types/outcome.js.map +1 -0
- package/dist/types/playbook.d.ts +119 -0
- package/dist/types/playbook.d.ts.map +1 -0
- package/dist/types/playbook.js +71 -0
- package/dist/types/playbook.js.map +1 -0
- package/dist/types/step.d.ts +44 -0
- package/dist/types/step.d.ts.map +1 -0
- package/dist/types/step.js +32 -0
- package/dist/types/step.js.map +1 -0
- package/dist/types/task.d.ts +91 -0
- package/dist/types/task.d.ts.map +1 -0
- package/dist/types/task.js +39 -0
- package/dist/types/task.js.map +1 -0
- package/dist/types/trajectory.d.ts +221 -0
- package/dist/types/trajectory.d.ts.map +1 -0
- package/dist/types/trajectory.js +60 -0
- package/dist/types/trajectory.js.map +1 -0
- package/dist/utils/index.d.ts +4 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +4 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/similarity.d.ts +31 -0
- package/dist/utils/similarity.d.ts.map +1 -0
- package/dist/utils/similarity.js +107 -0
- package/dist/utils/similarity.js.map +1 -0
- package/dist/utils/storage.d.ts +106 -0
- package/dist/utils/storage.d.ts.map +1 -0
- package/dist/utils/storage.js +203 -0
- package/dist/utils/storage.js.map +1 -0
- package/dist/utils/validation.d.ts +129 -0
- package/dist/utils/validation.d.ts.map +1 -0
- package/dist/utils/validation.js +171 -0
- package/dist/utils/validation.js.map +1 -0
- package/package.json +61 -9
- package/scripts/migrate-to-playbooks.ts +307 -0
- package/src/agents/index.ts +14 -0
- package/src/agents/mock-provider.ts +93 -0
- package/src/agents/types.ts +137 -0
- package/src/atlas.ts +560 -0
- package/src/bin/cognitive-core.ts +470 -0
- package/src/embeddings/bm25.ts +337 -0
- package/src/embeddings/index.ts +39 -0
- package/src/embeddings/manager.ts +288 -0
- package/src/embeddings/provider.ts +311 -0
- package/src/embeddings/vector-store.ts +353 -0
- package/src/factory.ts +263 -0
- package/src/index.ts +246 -0
- package/src/learning/analyzer.ts +335 -0
- package/src/learning/effectiveness.ts +428 -0
- package/src/learning/index.ts +58 -0
- package/src/learning/llm-extractor.ts +542 -0
- package/src/learning/meta-learner.ts +516 -0
- package/src/learning/pipeline.ts +244 -0
- package/src/learning/playbook-extractor.ts +702 -0
- package/src/learning/usage-inference.ts +372 -0
- package/src/mcp/index.ts +12 -0
- package/src/mcp/playbook-server.ts +565 -0
- package/src/memory/curated-loader.ts +160 -0
- package/src/memory/experience.ts +515 -0
- package/src/memory/index.ts +27 -0
- package/src/memory/meta.ts +506 -0
- package/src/memory/playbook.ts +493 -0
- package/src/memory/system.ts +551 -0
- package/src/runtime/backends/acp.ts +378 -0
- package/src/runtime/backends/index.ts +24 -0
- package/src/runtime/backends/mock.ts +218 -0
- package/src/runtime/backends/subprocess.ts +356 -0
- package/src/runtime/flows/learning.ts +183 -0
- package/src/runtime/flows/validation.ts +381 -0
- package/src/runtime/index.ts +53 -0
- package/src/runtime/manager.ts +541 -0
- package/src/runtime/types.ts +157 -0
- package/src/search/evaluator.ts +474 -0
- package/src/search/index.ts +59 -0
- package/src/search/refinement-loop.ts +363 -0
- package/src/search/refinement-types.ts +159 -0
- package/src/search/router.ts +261 -0
- package/src/search/solver.ts +303 -0
- package/src/search/verification-runner.ts +570 -0
- package/src/surfacing/index.ts +6 -0
- package/src/surfacing/skill-library.ts +594 -0
- package/src/types/config.ts +333 -0
- package/src/types/index.ts +130 -0
- package/src/types/memory.ts +270 -0
- package/src/types/meta.ts +218 -0
- package/src/types/outcome.ts +66 -0
- package/src/types/playbook.ts +196 -0
- package/src/types/step.ts +40 -0
- package/src/types/task.ts +52 -0
- package/src/types/trajectory.ts +80 -0
- package/src/utils/index.ts +38 -0
- package/src/utils/similarity.ts +139 -0
- package/src/utils/storage.ts +249 -0
- package/src/utils/validation.ts +286 -0
- package/tests/embeddings/bm25.test.ts +130 -0
- package/tests/embeddings/manager.test.ts +205 -0
- package/tests/integration/atlas.test.ts +266 -0
- package/tests/integration/e2e.test.ts +929 -0
- package/tests/learning/analyzer.test.ts +426 -0
- package/tests/learning/effectiveness.test.ts +542 -0
- package/tests/learning/pipeline.test.ts +176 -0
- package/tests/learning/playbook-extractor-provenance.test.ts +114 -0
- package/tests/learning/usage-inference.test.ts +254 -0
- package/tests/mcp/playbook-server.test.ts +252 -0
- package/tests/memory/experience.test.ts +198 -0
- package/tests/memory/playbook.test.ts +338 -0
- package/tests/memory/provenance.test.ts +639 -0
- package/tests/memory/system.test.ts +325 -0
- package/tests/runtime/agent-manager.test.ts +512 -0
- package/tests/runtime/mock-backend.test.ts +248 -0
- package/tests/search/refinement-loop.test.ts +468 -0
- package/tests/search/refinement.test.ts +267 -0
- package/tests/search/router.test.ts +427 -0
- package/tests/surfacing/skill-library.test.ts +292 -0
- package/tests/types/outcome.test.ts +147 -0
- package/tests/types/step.test.ts +133 -0
- package/tests/types/task.test.ts +158 -0
- package/tests/types/trajectory.test.ts +253 -0
- package/tests/utils/similarity.test.ts +188 -0
- package/tests/utils/validation.test.ts +252 -0
- package/tsconfig.json +25 -0
- package/vitest.config.ts +22 -0
- package/index.d.ts +0 -4
- package/index.js +0 -4
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Refinement Loop
|
|
3
|
+
*
|
|
4
|
+
* Implements ReMem-style iterative refinement that improves solutions
|
|
5
|
+
* using memory-augmented feedback. The loop:
|
|
6
|
+
* 1. Evaluates solution quality
|
|
7
|
+
* 2. If not acceptable, injects failure context and retries
|
|
8
|
+
* 3. Continues until acceptable or max iterations reached
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import type { Task } from '../types/task.js';
|
|
12
|
+
import type { Trajectory } from '../types/trajectory.js';
|
|
13
|
+
import type { AgentManager } from '../runtime/manager.js';
|
|
14
|
+
import type { MemorySystem, MemoryQueryResultV2 } from '../memory/system.js';
|
|
15
|
+
import type { AgentSpawnConfig } from '../runtime/types.js';
|
|
16
|
+
import {
|
|
17
|
+
type RefinementResult,
|
|
18
|
+
type EvaluationResult,
|
|
19
|
+
type FailureContext,
|
|
20
|
+
} from './refinement-types.js';
|
|
21
|
+
import { SolutionEvaluator, type EvaluatorConfig } from './evaluator.js';
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Configuration for the refinement loop
|
|
25
|
+
*/
|
|
26
|
+
export interface RefinementLoopConfig {
|
|
27
|
+
/** Maximum number of refinement iterations (default: 3) */
|
|
28
|
+
maxIterations?: number;
|
|
29
|
+
/** Minimum acceptable quality score (default: 0.7) */
|
|
30
|
+
minAcceptableScore?: number;
|
|
31
|
+
/** Whether to inject previous failure context (default: true) */
|
|
32
|
+
injectFailureContext?: boolean;
|
|
33
|
+
/** Whether to query memory for similar failures (default: true) */
|
|
34
|
+
queryFailureMemory?: boolean;
|
|
35
|
+
/** Evaluator configuration */
|
|
36
|
+
evaluatorConfig?: EvaluatorConfig;
|
|
37
|
+
/** Task strategies that should trigger refinement */
|
|
38
|
+
triggerOnStrategies?: Array<'direct' | 'adapt' | 'explore' | 'fallback'>;
|
|
39
|
+
/** Minimum confidence to skip refinement */
|
|
40
|
+
confidenceThreshold?: number;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Default refinement configuration
|
|
45
|
+
*/
|
|
46
|
+
const DEFAULT_CONFIG: Required<RefinementLoopConfig> = {
|
|
47
|
+
maxIterations: 3,
|
|
48
|
+
minAcceptableScore: 0.7,
|
|
49
|
+
injectFailureContext: true,
|
|
50
|
+
queryFailureMemory: true,
|
|
51
|
+
evaluatorConfig: {},
|
|
52
|
+
triggerOnStrategies: ['explore', 'fallback'],
|
|
53
|
+
confidenceThreshold: 0.85,
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* RefinementLoop - Iteratively improves solutions using feedback
|
|
58
|
+
*/
|
|
59
|
+
export class RefinementLoop {
|
|
60
|
+
private agentManager: AgentManager;
|
|
61
|
+
private memory: MemorySystem;
|
|
62
|
+
private evaluator: SolutionEvaluator;
|
|
63
|
+
private config: Required<RefinementLoopConfig>;
|
|
64
|
+
|
|
65
|
+
constructor(
|
|
66
|
+
agentManager: AgentManager,
|
|
67
|
+
memory: MemorySystem,
|
|
68
|
+
config: RefinementLoopConfig = {}
|
|
69
|
+
) {
|
|
70
|
+
this.agentManager = agentManager;
|
|
71
|
+
this.memory = memory;
|
|
72
|
+
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
73
|
+
this.evaluator = new SolutionEvaluator(
|
|
74
|
+
agentManager,
|
|
75
|
+
this.config.evaluatorConfig
|
|
76
|
+
);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Get the evaluator for registering domain verifiers
|
|
81
|
+
*/
|
|
82
|
+
getEvaluator(): SolutionEvaluator {
|
|
83
|
+
return this.evaluator;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Check if refinement should be triggered based on strategy and confidence
|
|
88
|
+
*/
|
|
89
|
+
shouldTriggerRefinement(
|
|
90
|
+
strategy: 'direct' | 'adapt' | 'explore' | 'fallback',
|
|
91
|
+
confidence?: number
|
|
92
|
+
): boolean {
|
|
93
|
+
// Check if strategy is in trigger list
|
|
94
|
+
if (!this.config.triggerOnStrategies.includes(strategy)) {
|
|
95
|
+
return false;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// If confidence is high enough, skip refinement
|
|
99
|
+
if (
|
|
100
|
+
confidence !== undefined &&
|
|
101
|
+
confidence >= this.config.confidenceThreshold
|
|
102
|
+
) {
|
|
103
|
+
return false;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return true;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Run the refinement loop on an initial trajectory
|
|
111
|
+
*/
|
|
112
|
+
async refine(
|
|
113
|
+
initialTrajectory: Trajectory,
|
|
114
|
+
spawnConfig: AgentSpawnConfig
|
|
115
|
+
): Promise<RefinementResult> {
|
|
116
|
+
const evaluations: EvaluationResult[] = [];
|
|
117
|
+
let currentTrajectory = initialTrajectory;
|
|
118
|
+
let bestTrajectory = initialTrajectory;
|
|
119
|
+
let bestScore = 0;
|
|
120
|
+
let improved = false;
|
|
121
|
+
|
|
122
|
+
// Evaluate initial trajectory
|
|
123
|
+
const initialEval = await this.evaluator.evaluate(
|
|
124
|
+
currentTrajectory,
|
|
125
|
+
spawnConfig.task
|
|
126
|
+
);
|
|
127
|
+
evaluations.push(initialEval);
|
|
128
|
+
bestScore = initialEval.score;
|
|
129
|
+
|
|
130
|
+
// If initial is acceptable, return early
|
|
131
|
+
if (initialEval.acceptable && initialEval.score >= this.config.minAcceptableScore) {
|
|
132
|
+
return {
|
|
133
|
+
trajectory: currentTrajectory,
|
|
134
|
+
success: true,
|
|
135
|
+
iterations: 0,
|
|
136
|
+
evaluations,
|
|
137
|
+
improved: false,
|
|
138
|
+
bestScore: initialEval.score,
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Refinement loop
|
|
143
|
+
for (let iteration = 0; iteration < this.config.maxIterations; iteration++) {
|
|
144
|
+
// Build failure context
|
|
145
|
+
const failureContext: FailureContext = {
|
|
146
|
+
attemptedSolution: currentTrajectory.outcome.solution,
|
|
147
|
+
evaluation: evaluations[evaluations.length - 1],
|
|
148
|
+
stepsAttempted: currentTrajectory.steps.length,
|
|
149
|
+
previousAttempts: iteration + 1,
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
// Query memory for similar failures if enabled
|
|
153
|
+
let failureMemory: MemoryQueryResultV2 | undefined;
|
|
154
|
+
if (this.config.queryFailureMemory) {
|
|
155
|
+
failureMemory = await this.queryFailurePatterns(
|
|
156
|
+
spawnConfig.task,
|
|
157
|
+
failureContext
|
|
158
|
+
);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Build refinement prompt
|
|
162
|
+
const refinementAdditions = this.buildRefinementPrompt(
|
|
163
|
+
failureContext,
|
|
164
|
+
failureMemory
|
|
165
|
+
);
|
|
166
|
+
|
|
167
|
+
// Spawn refined agent
|
|
168
|
+
const refinedConfig: AgentSpawnConfig = {
|
|
169
|
+
...spawnConfig,
|
|
170
|
+
systemPromptAdditions: [
|
|
171
|
+
spawnConfig.systemPromptAdditions,
|
|
172
|
+
refinementAdditions,
|
|
173
|
+
]
|
|
174
|
+
.filter(Boolean)
|
|
175
|
+
.join('\n\n'),
|
|
176
|
+
};
|
|
177
|
+
|
|
178
|
+
const result = await this.agentManager.spawn(refinedConfig);
|
|
179
|
+
currentTrajectory = result.trajectory;
|
|
180
|
+
|
|
181
|
+
// Evaluate refined trajectory
|
|
182
|
+
const evaluation = await this.evaluator.evaluate(
|
|
183
|
+
currentTrajectory,
|
|
184
|
+
spawnConfig.task
|
|
185
|
+
);
|
|
186
|
+
evaluations.push(evaluation);
|
|
187
|
+
|
|
188
|
+
// Track best
|
|
189
|
+
if (evaluation.score > bestScore) {
|
|
190
|
+
bestScore = evaluation.score;
|
|
191
|
+
bestTrajectory = currentTrajectory;
|
|
192
|
+
improved = true;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Check if acceptable
|
|
196
|
+
if (evaluation.acceptable && evaluation.score >= this.config.minAcceptableScore) {
|
|
197
|
+
return {
|
|
198
|
+
trajectory: currentTrajectory,
|
|
199
|
+
success: true,
|
|
200
|
+
iterations: iteration + 1,
|
|
201
|
+
evaluations,
|
|
202
|
+
improved,
|
|
203
|
+
bestScore,
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Return best trajectory after all iterations
|
|
209
|
+
return {
|
|
210
|
+
trajectory: bestTrajectory,
|
|
211
|
+
success: bestScore >= this.config.minAcceptableScore,
|
|
212
|
+
iterations: this.config.maxIterations,
|
|
213
|
+
evaluations,
|
|
214
|
+
improved,
|
|
215
|
+
bestScore,
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Query memory for similar failure patterns
|
|
221
|
+
*/
|
|
222
|
+
private async queryFailurePatterns(
|
|
223
|
+
task: Task,
|
|
224
|
+
failureContext: FailureContext
|
|
225
|
+
): Promise<MemoryQueryResultV2> {
|
|
226
|
+
// Build query from task and failure
|
|
227
|
+
const failureDescription = failureContext.evaluation.issues
|
|
228
|
+
.map((issue) => issue.description)
|
|
229
|
+
.join('; ');
|
|
230
|
+
|
|
231
|
+
const query = `${task.description} - failed with: ${failureDescription}`;
|
|
232
|
+
|
|
233
|
+
// Query memory for similar experiences and playbooks
|
|
234
|
+
const results = await this.memory.queryV2(query, {
|
|
235
|
+
domains: task.domain ? [task.domain] : undefined,
|
|
236
|
+
includeExperiences: true,
|
|
237
|
+
includePlaybooks: true,
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
// Filter to experiences that eventually succeeded after similar failures
|
|
241
|
+
// This gives us recovery strategies
|
|
242
|
+
const recoveryExperiences = results.experiences.filter(
|
|
243
|
+
(exp) =>
|
|
244
|
+
exp.experience.success &&
|
|
245
|
+
this.hasSimilarInitialFailure(exp.experience, failureContext)
|
|
246
|
+
);
|
|
247
|
+
|
|
248
|
+
return {
|
|
249
|
+
...results,
|
|
250
|
+
experiences: recoveryExperiences.length > 0 ? recoveryExperiences : results.experiences,
|
|
251
|
+
};
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/**
|
|
255
|
+
* Check if an experience had a similar initial failure pattern
|
|
256
|
+
*/
|
|
257
|
+
private hasSimilarInitialFailure(
|
|
258
|
+
experience: { steps?: Array<{ observation?: string }>; solutionOutput: string },
|
|
259
|
+
failureContext: FailureContext
|
|
260
|
+
): boolean {
|
|
261
|
+
// Look for similar error patterns in the experience's early steps
|
|
262
|
+
const failureKeywords = failureContext.evaluation.issues
|
|
263
|
+
.map((issue) => issue.description.toLowerCase().split(' '))
|
|
264
|
+
.flat()
|
|
265
|
+
.filter((word) => word.length > 4);
|
|
266
|
+
|
|
267
|
+
const experienceText = (
|
|
268
|
+
(experience.steps ?? [])
|
|
269
|
+
.map((s) => s.observation ?? '')
|
|
270
|
+
.join(' ') +
|
|
271
|
+
' ' +
|
|
272
|
+
experience.solutionOutput
|
|
273
|
+
).toLowerCase();
|
|
274
|
+
|
|
275
|
+
const matchCount = failureKeywords.filter((kw) =>
|
|
276
|
+
experienceText.includes(kw)
|
|
277
|
+
).length;
|
|
278
|
+
|
|
279
|
+
return matchCount >= Math.min(2, failureKeywords.length / 2);
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
/**
|
|
283
|
+
* Build refinement prompt with failure context and memory
|
|
284
|
+
*/
|
|
285
|
+
private buildRefinementPrompt(
|
|
286
|
+
failureContext: FailureContext,
|
|
287
|
+
failureMemory?: MemoryQueryResultV2
|
|
288
|
+
): string {
|
|
289
|
+
const parts: string[] = [];
|
|
290
|
+
|
|
291
|
+
if (this.config.injectFailureContext) {
|
|
292
|
+
parts.push('## Previous Attempt Feedback');
|
|
293
|
+
parts.push('');
|
|
294
|
+
parts.push(
|
|
295
|
+
`Your previous attempt (iteration ${failureContext.previousAttempts}) had issues:`
|
|
296
|
+
);
|
|
297
|
+
parts.push('');
|
|
298
|
+
|
|
299
|
+
for (const issue of failureContext.evaluation.issues) {
|
|
300
|
+
const severity = issue.severity.toUpperCase();
|
|
301
|
+
parts.push(`- **[${severity}]** ${issue.type}: ${issue.description}`);
|
|
302
|
+
if (issue.suggestion) {
|
|
303
|
+
parts.push(` *Suggestion:* ${issue.suggestion}`);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
parts.push('');
|
|
308
|
+
parts.push(`Quality score: ${(failureContext.evaluation.score * 100).toFixed(0)}%`);
|
|
309
|
+
parts.push(`Assessment: ${failureContext.evaluation.quality}`);
|
|
310
|
+
parts.push('');
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
if (failureMemory && this.config.queryFailureMemory) {
|
|
314
|
+
// Add recovery playbooks from memory
|
|
315
|
+
if (failureMemory.playbooks.length > 0) {
|
|
316
|
+
parts.push('## Recovery Playbooks from Memory');
|
|
317
|
+
parts.push('');
|
|
318
|
+
for (const { playbook, score } of failureMemory.playbooks.slice(0, 3)) {
|
|
319
|
+
parts.push(
|
|
320
|
+
`- **${playbook.name}** (${(score * 100).toFixed(0)}% relevant)`
|
|
321
|
+
);
|
|
322
|
+
parts.push(` Strategy: ${playbook.guidance.strategy}`);
|
|
323
|
+
if (playbook.guidance.steps && playbook.guidance.steps.length > 0) {
|
|
324
|
+
parts.push(` Steps: ${playbook.guidance.steps.slice(0, 3).join(' → ')}`);
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
parts.push('');
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// Add successful recovery experiences
|
|
331
|
+
if (failureMemory.experiences.length > 0) {
|
|
332
|
+
parts.push('## Similar Problems Successfully Solved');
|
|
333
|
+
parts.push('');
|
|
334
|
+
for (const { experience } of failureMemory.experiences.slice(0, 2)) {
|
|
335
|
+
parts.push(`- Task: ${experience.taskInput.slice(0, 150)}...`);
|
|
336
|
+
parts.push(` Solution approach: ${experience.solutionOutput.slice(0, 200)}...`);
|
|
337
|
+
}
|
|
338
|
+
parts.push('');
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
parts.push('## Instructions for This Attempt');
|
|
343
|
+
parts.push('');
|
|
344
|
+
parts.push('Please address the issues identified above and try a different approach.');
|
|
345
|
+
parts.push('Focus on:');
|
|
346
|
+
parts.push('1. Avoiding the specific errors from the previous attempt');
|
|
347
|
+
parts.push('2. Using the recovery playbooks if applicable');
|
|
348
|
+
parts.push('3. Taking a more thorough or alternative approach');
|
|
349
|
+
|
|
350
|
+
return parts.join('\n');
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
/**
|
|
355
|
+
* Create a refinement loop
|
|
356
|
+
*/
|
|
357
|
+
export function createRefinementLoop(
|
|
358
|
+
agentManager: AgentManager,
|
|
359
|
+
memory: MemorySystem,
|
|
360
|
+
config?: RefinementLoopConfig
|
|
361
|
+
): RefinementLoop {
|
|
362
|
+
return new RefinementLoop(agentManager, memory, config);
|
|
363
|
+
}
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Refinement Types
|
|
3
|
+
*
|
|
4
|
+
* Types for the ReMem-style refinement loop that iteratively
|
|
5
|
+
* improves solutions using memory-augmented feedback.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { z } from 'zod';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Issue type found during evaluation
|
|
12
|
+
*/
|
|
13
|
+
export const IssueTypeSchema = z.enum([
|
|
14
|
+
'incomplete',
|
|
15
|
+
'incorrect',
|
|
16
|
+
'inefficient',
|
|
17
|
+
'off_topic',
|
|
18
|
+
'error',
|
|
19
|
+
]);
|
|
20
|
+
|
|
21
|
+
export type IssueType = z.infer<typeof IssueTypeSchema>;
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Severity of an issue
|
|
25
|
+
*/
|
|
26
|
+
export const IssueSeveritySchema = z.enum(['critical', 'major', 'minor']);
|
|
27
|
+
|
|
28
|
+
export type IssueSeverity = z.infer<typeof IssueSeveritySchema>;
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* An issue found during solution evaluation
|
|
32
|
+
*/
|
|
33
|
+
export const EvaluationIssueSchema = z.object({
|
|
34
|
+
type: IssueTypeSchema,
|
|
35
|
+
description: z.string(),
|
|
36
|
+
severity: IssueSeveritySchema,
|
|
37
|
+
suggestion: z.string().optional(),
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
export type EvaluationIssue = z.infer<typeof EvaluationIssueSchema>;
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Quality assessment level
|
|
44
|
+
*/
|
|
45
|
+
export const QualityLevelSchema = z.enum([
|
|
46
|
+
'excellent',
|
|
47
|
+
'good',
|
|
48
|
+
'needs_work',
|
|
49
|
+
'poor',
|
|
50
|
+
]);
|
|
51
|
+
|
|
52
|
+
export type QualityLevel = z.infer<typeof QualityLevelSchema>;
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* How the evaluation was performed
|
|
56
|
+
*/
|
|
57
|
+
export const EvaluationMethodSchema = z.enum([
|
|
58
|
+
'verification',
|
|
59
|
+
'agent',
|
|
60
|
+
'heuristic',
|
|
61
|
+
]);
|
|
62
|
+
|
|
63
|
+
export type EvaluationMethod = z.infer<typeof EvaluationMethodSchema>;
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Result of evaluating a solution's quality
|
|
67
|
+
*/
|
|
68
|
+
export const EvaluationResultSchema = z.object({
|
|
69
|
+
/** Overall quality assessment */
|
|
70
|
+
quality: QualityLevelSchema,
|
|
71
|
+
/** Numeric score (0-1) */
|
|
72
|
+
score: z.number().min(0).max(1),
|
|
73
|
+
/** Whether this solution is acceptable */
|
|
74
|
+
acceptable: z.boolean(),
|
|
75
|
+
/** Specific issues found */
|
|
76
|
+
issues: z.array(EvaluationIssueSchema),
|
|
77
|
+
/** How the evaluation was performed */
|
|
78
|
+
method: EvaluationMethodSchema,
|
|
79
|
+
/** Raw response from evaluator (for debugging) */
|
|
80
|
+
rawResponse: z.string().optional(),
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
export type EvaluationResult = z.infer<typeof EvaluationResultSchema>;
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Context about why a solution failed
|
|
87
|
+
*/
|
|
88
|
+
export interface FailureContext {
|
|
89
|
+
/** The solution that was attempted */
|
|
90
|
+
attemptedSolution: unknown;
|
|
91
|
+
/** Evaluation result */
|
|
92
|
+
evaluation: EvaluationResult;
|
|
93
|
+
/** Number of steps taken in the attempt */
|
|
94
|
+
stepsAttempted: number;
|
|
95
|
+
/** Number of previous refinement attempts */
|
|
96
|
+
previousAttempts: number;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Result of the refinement loop
|
|
101
|
+
*/
|
|
102
|
+
export interface RefinementResult {
|
|
103
|
+
/** Final trajectory after refinement */
|
|
104
|
+
trajectory: import('../types/trajectory.js').Trajectory;
|
|
105
|
+
/** Whether the final solution is acceptable */
|
|
106
|
+
success: boolean;
|
|
107
|
+
/** Number of refinement iterations performed */
|
|
108
|
+
iterations: number;
|
|
109
|
+
/** Evaluation results from each iteration */
|
|
110
|
+
evaluations: EvaluationResult[];
|
|
111
|
+
/** Whether refinement improved the solution */
|
|
112
|
+
improved: boolean;
|
|
113
|
+
/** Best score achieved during refinement */
|
|
114
|
+
bestScore: number;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Create an evaluation result
|
|
119
|
+
*/
|
|
120
|
+
export function createEvaluationResult(params: {
|
|
121
|
+
quality: QualityLevel;
|
|
122
|
+
score: number;
|
|
123
|
+
acceptable: boolean;
|
|
124
|
+
issues?: EvaluationIssue[];
|
|
125
|
+
method: EvaluationMethod;
|
|
126
|
+
rawResponse?: string;
|
|
127
|
+
}): EvaluationResult {
|
|
128
|
+
return EvaluationResultSchema.parse({
|
|
129
|
+
quality: params.quality,
|
|
130
|
+
score: params.score,
|
|
131
|
+
acceptable: params.acceptable,
|
|
132
|
+
issues: params.issues ?? [],
|
|
133
|
+
method: params.method,
|
|
134
|
+
rawResponse: params.rawResponse,
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Determine quality level from a numeric score
|
|
140
|
+
*/
|
|
141
|
+
export function scoreToQuality(score: number): QualityLevel {
|
|
142
|
+
if (score >= 0.85) return 'excellent';
|
|
143
|
+
if (score >= 0.7) return 'good';
|
|
144
|
+
if (score >= 0.4) return 'needs_work';
|
|
145
|
+
return 'poor';
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Check if a quality level is acceptable
|
|
150
|
+
*/
|
|
151
|
+
export function isQualityAcceptable(
|
|
152
|
+
quality: QualityLevel,
|
|
153
|
+
minAcceptable: QualityLevel = 'good'
|
|
154
|
+
): boolean {
|
|
155
|
+
const levels: QualityLevel[] = ['poor', 'needs_work', 'good', 'excellent'];
|
|
156
|
+
const qualityIndex = levels.indexOf(quality);
|
|
157
|
+
const minIndex = levels.indexOf(minAcceptable);
|
|
158
|
+
return qualityIndex >= minIndex;
|
|
159
|
+
}
|