recursive-llm-ts 4.8.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,309 @@
1
+ package rlm
2
+
3
+ import (
4
+ "fmt"
5
+ "sync"
6
+ )
7
+
8
+ // ─── LCM Context Control Loop ───────────────────────────────────────────────
9
+ // Implements the dual-threshold context management from the LCM paper:
10
+ // - Below τ_soft: no overhead (zero-cost continuity)
11
+ // - τ_soft ≤ tokens < τ_hard: async compaction between turns
12
+ // - tokens ≥ τ_hard: blocking compaction before next LLM call
13
+
14
+ // LCMConfig configures the Lossless Context Management engine.
15
+ type LCMConfig struct {
16
+ // Enabled activates LCM context management (default: false for backward compat)
17
+ Enabled bool `json:"enabled"`
18
+
19
+ // SoftThreshold is τ_soft: token count above which async compaction begins.
20
+ // Default: 70% of model limit.
21
+ SoftThreshold int `json:"soft_threshold,omitempty"`
22
+
23
+ // HardThreshold is τ_hard: token count above which blocking compaction occurs.
24
+ // Default: 90% of model limit.
25
+ HardThreshold int `json:"hard_threshold,omitempty"`
26
+
27
+ // CompactionBlockSize is how many messages to compact at once.
28
+ // Default: 10 messages.
29
+ CompactionBlockSize int `json:"compaction_block_size,omitempty"`
30
+
31
+ // SummaryTargetTokens is the target size for each summary node.
32
+ // Default: 500 tokens.
33
+ SummaryTargetTokens int `json:"summary_target_tokens,omitempty"`
34
+ }
35
+
36
+ // DefaultLCMConfig returns default LCM configuration.
37
+ func DefaultLCMConfig() LCMConfig {
38
+ return LCMConfig{
39
+ Enabled: false,
40
+ CompactionBlockSize: 10,
41
+ SummaryTargetTokens: 500,
42
+ }
43
+ }
44
+
45
+ // LCMEngine is the main LCM context management engine.
46
+ // It wraps the store, summarizer, and context control loop.
47
+ type LCMEngine struct {
48
+ config LCMConfig
49
+ store *LCMStore
50
+ summarizer *LCMSummarizer
51
+ observer *Observer
52
+ modelLimit int
53
+
54
+ // Async compaction state
55
+ compactMu sync.Mutex
56
+ compacting bool
57
+ compactResult chan *compactionResult
58
+ }
59
+
60
+ type compactionResult struct {
61
+ summary *SummaryNode
62
+ err error
63
+ }
64
+
65
+ // NewLCMEngine creates a new LCM engine with the given configuration.
66
+ func NewLCMEngine(config LCMConfig, store *LCMStore, summarizer *LCMSummarizer, observer *Observer, modelLimit int) *LCMEngine {
67
+ // Apply defaults based on model limit
68
+ if config.SoftThreshold == 0 && modelLimit > 0 {
69
+ config.SoftThreshold = int(float64(modelLimit) * 0.70)
70
+ }
71
+ if config.HardThreshold == 0 && modelLimit > 0 {
72
+ config.HardThreshold = int(float64(modelLimit) * 0.90)
73
+ }
74
+ if config.CompactionBlockSize == 0 {
75
+ config.CompactionBlockSize = 10
76
+ }
77
+ if config.SummaryTargetTokens == 0 {
78
+ config.SummaryTargetTokens = 500
79
+ }
80
+
81
+ return &LCMEngine{
82
+ config: config,
83
+ store: store,
84
+ summarizer: summarizer,
85
+ observer: observer,
86
+ modelLimit: modelLimit,
87
+ }
88
+ }
89
+
90
+ // ─── Context Control Loop (Algorithm 2 from paper) ──────────────────────────
91
+
92
+ // OnNewItem is called after each new message is added to the store.
93
+ // It implements the context control loop from Figure 2 of the LCM paper.
94
+ // Returns nil if no compaction was needed or if async compaction was triggered.
95
+ func (e *LCMEngine) OnNewItem() error {
96
+ if !e.config.Enabled {
97
+ return nil
98
+ }
99
+
100
+ // Check if async compaction has completed
101
+ e.applyPendingCompaction()
102
+
103
+ tokens := e.store.ActiveContextTokens()
104
+
105
+ // Below soft threshold: zero-cost continuity
106
+ if tokens <= e.config.SoftThreshold {
107
+ return nil
108
+ }
109
+
110
+ // Soft threshold exceeded: trigger async compaction (non-blocking)
111
+ if tokens < e.config.HardThreshold {
112
+ e.observer.Debug("lcm.control", "Soft threshold exceeded (%d > %d), triggering async compaction",
113
+ tokens, e.config.SoftThreshold)
114
+ e.triggerAsyncCompaction()
115
+ return nil
116
+ }
117
+
118
+ // Hard threshold exceeded: blocking compaction
119
+ e.observer.Debug("lcm.control", "Hard threshold exceeded (%d >= %d), blocking compaction",
120
+ tokens, e.config.HardThreshold)
121
+ return e.blockingCompaction()
122
+ }
123
+
124
+ // ─── Async Compaction ───────────────────────────────────────────────────────
125
+
126
+ func (e *LCMEngine) triggerAsyncCompaction() {
127
+ e.compactMu.Lock()
128
+ if e.compacting {
129
+ e.compactMu.Unlock()
130
+ return // Already compacting
131
+ }
132
+ e.compacting = true
133
+ e.compactResult = make(chan *compactionResult, 1)
134
+ e.compactMu.Unlock()
135
+
136
+ go func() {
137
+ result := e.performCompaction()
138
+ e.compactResult <- result
139
+ }()
140
+ }
141
+
142
+ func (e *LCMEngine) applyPendingCompaction() {
143
+ e.compactMu.Lock()
144
+ if !e.compacting || e.compactResult == nil {
145
+ e.compactMu.Unlock()
146
+ return
147
+ }
148
+
149
+ // Non-blocking check
150
+ select {
151
+ case result := <-e.compactResult:
152
+ e.compacting = false
153
+ e.compactMu.Unlock()
154
+
155
+ if result.err != nil {
156
+ e.observer.Error("lcm.control", "Async compaction failed: %v", result.err)
157
+ return
158
+ }
159
+ if result.summary != nil {
160
+ removed := e.store.CompactOldestBlock(result.summary)
161
+ e.observer.Debug("lcm.control", "Async compaction applied: replaced %d messages with summary %s",
162
+ len(removed), result.summary.ID)
163
+ e.observer.Event("lcm.compaction", map[string]string{
164
+ "type": "async",
165
+ "summary_id": result.summary.ID,
166
+ "messages_compacted": fmt.Sprintf("%d", len(removed)),
167
+ "summary_tokens": fmt.Sprintf("%d", result.summary.Tokens),
168
+ "level": fmt.Sprintf("%d", result.summary.Level),
169
+ })
170
+ }
171
+ default:
172
+ e.compactMu.Unlock()
173
+ // Not done yet, continue
174
+ }
175
+ }
176
+
177
+ // ─── Blocking Compaction ────────────────────────────────────────────────────
178
+
179
+ func (e *LCMEngine) blockingCompaction() error {
180
+ // Keep compacting until under hard threshold
181
+ for e.store.ActiveContextTokens() >= e.config.HardThreshold {
182
+ result := e.performCompaction()
183
+ if result.err != nil {
184
+ return fmt.Errorf("blocking compaction failed: %w", result.err)
185
+ }
186
+ if result.summary == nil {
187
+ break // Nothing more to compact
188
+ }
189
+
190
+ removed := e.store.CompactOldestBlock(result.summary)
191
+ e.observer.Debug("lcm.control", "Blocking compaction: replaced %d messages with summary %s (%d tokens)",
192
+ len(removed), result.summary.ID, result.summary.Tokens)
193
+ e.observer.Event("lcm.compaction", map[string]string{
194
+ "type": "blocking",
195
+ "summary_id": result.summary.ID,
196
+ "messages_compacted": fmt.Sprintf("%d", len(removed)),
197
+ "summary_tokens": fmt.Sprintf("%d", result.summary.Tokens),
198
+ "level": fmt.Sprintf("%d", result.summary.Level),
199
+ })
200
+ }
201
+ return nil
202
+ }
203
+
204
+ // ─── Core Compaction ────────────────────────────────────────────────────────
205
+
206
+ func (e *LCMEngine) performCompaction() *compactionResult {
207
+ active := e.store.GetActiveContext()
208
+
209
+ // Find the oldest block of raw messages to compact (skip system prompt)
210
+ var block []*StoreMessage
211
+ for _, item := range active {
212
+ if item.IsMessage() {
213
+ if item.Message.Role == RoleSystem {
214
+ continue // Never compact system prompt
215
+ }
216
+ block = append(block, item.Message)
217
+ if len(block) >= e.config.CompactionBlockSize {
218
+ break
219
+ }
220
+ }
221
+ }
222
+
223
+ if len(block) == 0 {
224
+ return &compactionResult{summary: nil, err: nil}
225
+ }
226
+
227
+ // Apply three-level escalation
228
+ result, err := e.summarizer.SummarizeMessages(block, e.config.SummaryTargetTokens)
229
+ if err != nil {
230
+ return &compactionResult{err: err}
231
+ }
232
+
233
+ // Create summary node in the DAG
234
+ var msgIDs []string
235
+ for _, msg := range block {
236
+ msgIDs = append(msgIDs, msg.ID)
237
+ }
238
+
239
+ summary := e.store.CreateLeafSummary(msgIDs, result.Content, result.Level)
240
+
241
+ return &compactionResult{summary: summary}
242
+ }
243
+
244
+ // ─── Condensed Summaries (DAG depth > 1) ────────────────────────────────────
245
+
246
+ // CondenseOldSummaries finds summary nodes in the active context and merges them
247
+ // into a higher-order condensed summary. This creates DAG depth > 1.
248
+ func (e *LCMEngine) CondenseOldSummaries() error {
249
+ active := e.store.GetActiveContext()
250
+
251
+ // Collect summary items
252
+ var summaryItems []*ActiveContextItem
253
+ for _, item := range active {
254
+ if !item.IsMessage() && item.Summary != nil {
255
+ summaryItems = append(summaryItems, item)
256
+ }
257
+ }
258
+
259
+ // Need at least 2 summaries to condense
260
+ if len(summaryItems) < 2 {
261
+ return nil
262
+ }
263
+
264
+ // Condense the oldest summaries
265
+ condenseCount := len(summaryItems)
266
+ if condenseCount > e.config.CompactionBlockSize {
267
+ condenseCount = e.config.CompactionBlockSize
268
+ }
269
+ toCondense := summaryItems[:condenseCount]
270
+
271
+ // Build combined content for re-summarization
272
+ var combined string
273
+ var childIDs []string
274
+ for _, item := range toCondense {
275
+ combined += item.Summary.Content + "\n\n"
276
+ childIDs = append(childIDs, item.Summary.ID)
277
+ }
278
+
279
+ // Summarize the combined summaries
280
+ result, err := e.summarizer.Summarize(combined, e.config.SummaryTargetTokens)
281
+ if err != nil {
282
+ return fmt.Errorf("condensation failed: %w", err)
283
+ }
284
+
285
+ // Create condensed summary node
286
+ condensed := e.store.CreateCondensedSummary(childIDs, result.Content, result.Level)
287
+
288
+ e.observer.Debug("lcm.control", "Condensed %d summaries into %s (%d tokens)",
289
+ len(childIDs), condensed.ID, condensed.Tokens)
290
+
291
+ return nil
292
+ }
293
+
294
+ // ─── Query Helpers ──────────────────────────────────────────────────────────
295
+
296
+ // GetStore returns the underlying LCM store.
297
+ func (e *LCMEngine) GetStore() *LCMStore {
298
+ return e.store
299
+ }
300
+
301
+ // GetConfig returns the LCM configuration.
302
+ func (e *LCMEngine) GetConfig() LCMConfig {
303
+ return e.config
304
+ }
305
+
306
+ // IsEnabled returns whether LCM is active.
307
+ func (e *LCMEngine) IsEnabled() bool {
308
+ return e.config.Enabled
309
+ }
@@ -0,0 +1,257 @@
1
+ package rlm
2
+
3
+ import (
4
+ "fmt"
5
+ "strings"
6
+ )
7
+
8
+ // ─── Infinite Delegation Guard ──────────────────────────────────────────────
9
+ // Implements the scope-reduction invariant from the LCM paper (Section 3.2).
10
+ //
11
+ // When a sub-agent spawns a further sub-agent, it must declare:
12
+ // - delegated_scope: the specific slice of work being handed off
13
+ // - kept_work: the work the caller will still perform itself
14
+ //
15
+ // If the caller cannot articulate what it's retaining (i.e., it would delegate
16
+ // its entire responsibility), the call is rejected. This forces each level of
17
+ // delegation to represent a strict reduction in responsibility.
18
+ //
19
+ // Exemptions:
20
+ // - Root agent (depth 0): no parent to recurse with
21
+ // - Read-only agents: cannot spawn further sub-agents
22
+ // - Parallel decomposition (sibling tasks): not nested delegation
23
+
24
+ // DelegationRequest represents a request to delegate work to a sub-agent.
25
+ type DelegationRequest struct {
26
+ // Prompt is the task description for the sub-agent.
27
+ Prompt string `json:"prompt"`
28
+
29
+ // DelegatedScope describes the specific slice of work being handed off.
30
+ // Required for non-root agents.
31
+ DelegatedScope string `json:"delegated_scope"`
32
+
33
+ // KeptWork describes the work the caller retains for itself.
34
+ // Required for non-root agents. Must be non-empty and distinct from DelegatedScope.
35
+ KeptWork string `json:"kept_work"`
36
+
37
+ // ReadOnly indicates this is a read-only exploration agent (exempt from guard).
38
+ ReadOnly bool `json:"read_only"`
39
+
40
+ // Parallel indicates this is parallel decomposition (exempt from guard).
41
+ Parallel bool `json:"parallel"`
42
+ }
43
+
44
+ // DelegationGuard enforces the scope-reduction invariant.
45
+ type DelegationGuard struct {
46
+ observer *Observer
47
+ }
48
+
49
+ // NewDelegationGuard creates a new delegation guard.
50
+ func NewDelegationGuard(observer *Observer) *DelegationGuard {
51
+ return &DelegationGuard{observer: observer}
52
+ }
53
+
54
+ // DelegationError is returned when a delegation request violates the scope-reduction invariant.
55
+ type DelegationError struct {
56
+ Reason string `json:"reason"`
57
+ Suggestion string `json:"suggestion"`
58
+ }
59
+
60
+ func (e *DelegationError) Error() string {
61
+ return fmt.Sprintf("delegation rejected: %s. %s", e.Reason, e.Suggestion)
62
+ }
63
+
64
+ // ValidateDelegation checks if a delegation request is allowed at the given depth.
65
+ // Returns nil if allowed, or a DelegationError explaining why it was rejected.
66
+ func (g *DelegationGuard) ValidateDelegation(depth int, req DelegationRequest) error {
67
+ // Root agent (depth 0) is always allowed to delegate
68
+ if depth == 0 {
69
+ g.observer.Debug("lcm.delegation", "Root agent delegation allowed (depth 0)")
70
+ return nil
71
+ }
72
+
73
+ // Read-only agents are exempt (they can't spawn further sub-agents)
74
+ if req.ReadOnly {
75
+ g.observer.Debug("lcm.delegation", "Read-only agent delegation allowed")
76
+ return nil
77
+ }
78
+
79
+ // Parallel decomposition is exempt (sibling, not nested)
80
+ if req.Parallel {
81
+ g.observer.Debug("lcm.delegation", "Parallel decomposition delegation allowed")
82
+ return nil
83
+ }
84
+
85
+ // Non-root agents must declare scope reduction
86
+ if strings.TrimSpace(req.DelegatedScope) == "" {
87
+ g.observer.Debug("lcm.delegation", "Delegation rejected: no delegated_scope at depth %d", depth)
88
+ return &DelegationError{
89
+ Reason: "sub-agent must declare delegated_scope",
90
+ Suggestion: "Describe the specific slice of work being handed off, or perform the work directly.",
91
+ }
92
+ }
93
+
94
+ if strings.TrimSpace(req.KeptWork) == "" {
95
+ g.observer.Debug("lcm.delegation", "Delegation rejected: no kept_work at depth %d", depth)
96
+ return &DelegationError{
97
+ Reason: "sub-agent must declare kept_work (what the caller retains)",
98
+ Suggestion: "If you cannot articulate what you're retaining, perform the work directly instead of delegating.",
99
+ }
100
+ }
101
+
102
+ // Check for full delegation (delegated_scope ≈ entire task)
103
+ if isTotalDelegation(req.DelegatedScope, req.KeptWork) {
104
+ g.observer.Debug("lcm.delegation", "Delegation rejected: total delegation detected at depth %d", depth)
105
+ return &DelegationError{
106
+ Reason: "delegated_scope appears to encompass the entire task; kept_work is trivial",
107
+ Suggestion: "Break the task into meaningful subtasks where you retain substantial work, or perform it directly.",
108
+ }
109
+ }
110
+
111
+ g.observer.Debug("lcm.delegation", "Delegation allowed at depth %d: scope=%q, kept=%q",
112
+ depth, truncateStr(req.DelegatedScope, 80), truncateStr(req.KeptWork, 80))
113
+ return nil
114
+ }
115
+
116
+ // isTotalDelegation detects when an agent is trying to delegate its entire responsibility.
117
+ // This is a heuristic check — it catches obvious cases of trivial kept_work.
118
+ func isTotalDelegation(delegatedScope, keptWork string) bool {
119
+ kept := strings.TrimSpace(strings.ToLower(keptWork))
120
+
121
+ // Trivial kept_work patterns that indicate full delegation
122
+ trivialPatterns := []string{
123
+ "none",
124
+ "nothing",
125
+ "n/a",
126
+ "na",
127
+ "",
128
+ "will wait",
129
+ "waiting",
130
+ "just wait",
131
+ "aggregate",
132
+ "collect results",
133
+ "return results",
134
+ "pass through",
135
+ "forward",
136
+ }
137
+
138
+ for _, pattern := range trivialPatterns {
139
+ if kept == pattern {
140
+ return true
141
+ }
142
+ }
143
+
144
+ // Check if kept_work is suspiciously short compared to delegated_scope
145
+ // (less than 10% of the delegated scope's length and under 20 chars)
146
+ if len(kept) < 20 && len(kept) < len(delegatedScope)/10 {
147
+ return true
148
+ }
149
+
150
+ return false
151
+ }
152
+
153
+ // ─── Integration with RLM Engine ────────────────────────────────────────────
154
+
155
+ // DelegateTask validates and executes a delegation request through the RLM engine.
156
+ // This is the main entry point for task delegation with the infinite recursion guard.
157
+ func (r *RLM) DelegateTask(req DelegationRequest) (string, RLMStats, error) {
158
+ // Create or use existing delegation guard
159
+ guard := NewDelegationGuard(r.observer)
160
+
161
+ // Validate the delegation
162
+ if err := guard.ValidateDelegation(r.currentDepth, req); err != nil {
163
+ return "", RLMStats{}, err
164
+ }
165
+
166
+ // Create sub-agent
167
+ subConfig := Config{
168
+ RecursiveModel: r.recursiveModel,
169
+ APIBase: r.apiBase,
170
+ APIKey: r.apiKey,
171
+ MaxDepth: r.maxDepth,
172
+ MaxIterations: r.maxIterations,
173
+ TimeoutSeconds: r.timeoutSeconds,
174
+ UseMetacognitive: r.useMetacognitive,
175
+ ExtraParams: r.extraParams,
176
+ }
177
+
178
+ subRLM := New(r.recursiveModel, subConfig)
179
+ subRLM.currentDepth = r.currentDepth + 1
180
+ subRLM.observer = r.observer
181
+ defer subRLM.Shutdown()
182
+
183
+ r.observer.Debug("lcm.delegation", "Spawning sub-agent at depth %d for: %s",
184
+ r.currentDepth+1, truncateStr(req.Prompt, 100))
185
+
186
+ result, stats, err := subRLM.Completion(req.Prompt, "")
187
+ return result, stats, err
188
+ }
189
+
190
+ // DelegateTasks validates and executes multiple parallel delegation requests.
191
+ // This implements the Tasks() tool from the LCM paper (Appendix C.3).
192
+ // Parallel decomposition is exempt from the recursion guard.
193
+ func (r *RLM) DelegateTasks(tasks []DelegationRequest) ([]string, []RLMStats, error) {
194
+ if len(tasks) < 2 {
195
+ return nil, nil, fmt.Errorf("DelegateTasks requires at least 2 tasks for parallel decomposition")
196
+ }
197
+
198
+ guard := NewDelegationGuard(r.observer)
199
+
200
+ // Mark all as parallel (exempt from guard) but still validate basic structure
201
+ for i := range tasks {
202
+ tasks[i].Parallel = true
203
+ if err := guard.ValidateDelegation(r.currentDepth, tasks[i]); err != nil {
204
+ return nil, nil, fmt.Errorf("task %d validation failed: %w", i, err)
205
+ }
206
+ }
207
+
208
+ r.observer.Debug("lcm.delegation", "Spawning %d parallel sub-agents at depth %d",
209
+ len(tasks), r.currentDepth+1)
210
+
211
+ type taskResult struct {
212
+ index int
213
+ result string
214
+ stats RLMStats
215
+ err error
216
+ }
217
+
218
+ results := make(chan taskResult, len(tasks))
219
+
220
+ for i, task := range tasks {
221
+ go func(idx int, t DelegationRequest) {
222
+ subConfig := Config{
223
+ RecursiveModel: r.recursiveModel,
224
+ APIBase: r.apiBase,
225
+ APIKey: r.apiKey,
226
+ MaxDepth: r.maxDepth,
227
+ MaxIterations: r.maxIterations,
228
+ TimeoutSeconds: r.timeoutSeconds,
229
+ UseMetacognitive: r.useMetacognitive,
230
+ ExtraParams: r.extraParams,
231
+ }
232
+
233
+ subRLM := New(r.recursiveModel, subConfig)
234
+ subRLM.currentDepth = r.currentDepth + 1
235
+ subRLM.observer = r.observer
236
+ defer subRLM.Shutdown()
237
+
238
+ result, stats, err := subRLM.Completion(t.Prompt, "")
239
+ results <- taskResult{index: idx, result: result, stats: stats, err: err}
240
+ }(i, task)
241
+ }
242
+
243
+ // Collect results in order
244
+ resultSlice := make([]string, len(tasks))
245
+ statsSlice := make([]RLMStats, len(tasks))
246
+
247
+ for range tasks {
248
+ tr := <-results
249
+ if tr.err != nil {
250
+ return nil, nil, fmt.Errorf("parallel task %d failed: %w", tr.index, tr.err)
251
+ }
252
+ resultSlice[tr.index] = tr.result
253
+ statsSlice[tr.index] = tr.stats
254
+ }
255
+
256
+ return resultSlice, statsSlice, nil
257
+ }