recursive-llm-ts 4.9.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/bin/rlm-go +0 -0
- package/dist/bridge-interface.d.ts +149 -0
- package/go/cmd/rlm/main.go +39 -6
- package/go/go.mod +13 -3
- package/go/go.sum +53 -2
- package/go/rlm/compression.go +59 -0
- package/go/rlm/context_overflow.go +21 -36
- package/go/rlm/context_savings_test.go +387 -0
- package/go/rlm/json_extraction.go +140 -0
- package/go/rlm/lcm_agentic_map.go +317 -0
- package/go/rlm/lcm_context_loop.go +309 -0
- package/go/rlm/lcm_delegation.go +257 -0
- package/go/rlm/lcm_episodes.go +313 -0
- package/go/rlm/lcm_episodes_test.go +384 -0
- package/go/rlm/lcm_files.go +424 -0
- package/go/rlm/lcm_map.go +348 -0
- package/go/rlm/lcm_store.go +615 -0
- package/go/rlm/lcm_summarizer.go +239 -0
- package/go/rlm/lcm_test.go +1407 -0
- package/go/rlm/rlm.go +124 -1
- package/go/rlm/store_backend.go +121 -0
- package/go/rlm/store_backend_test.go +428 -0
- package/go/rlm/store_sqlite.go +575 -0
- package/go/rlm/structured.go +6 -83
- package/go/rlm/token_tracking_test.go +25 -11
- package/go/rlm/tokenizer.go +216 -0
- package/go/rlm/tokenizer_test.go +305 -0
- package/go/rlm/types.go +23 -1
- package/go/rlm.test +0 -0
- package/package.json +1 -1
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
package rlm
|
|
2
|
+
|
|
3
|
+
import (
|
|
4
|
+
"bufio"
|
|
5
|
+
"encoding/json"
|
|
6
|
+
"fmt"
|
|
7
|
+
"os"
|
|
8
|
+
"strings"
|
|
9
|
+
"sync"
|
|
10
|
+
"sync/atomic"
|
|
11
|
+
"time"
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
// ─── Agentic-Map Operator ───────────────────────────────────────────────────
|
|
15
|
+
// Implements Operator-Level Recursion from the LCM paper (Section 3.1).
|
|
16
|
+
// Similar to LLM-Map, but spawns a full sub-agent session for each item
|
|
17
|
+
// with access to tools (file I/O, code execution, multi-step reasoning).
|
|
18
|
+
|
|
19
|
+
// AgenticMapConfig configures an Agentic-Map operation.
|
|
20
|
+
type AgenticMapConfig struct {
|
|
21
|
+
InputPath string `json:"input_path"` // Path to JSONL input file
|
|
22
|
+
OutputPath string `json:"output_path"` // Path to JSONL output file
|
|
23
|
+
Prompt string `json:"prompt"` // Prompt template ({{item}} is replaced)
|
|
24
|
+
OutputSchema *JSONSchema `json:"output_schema"` // Schema for output validation
|
|
25
|
+
Concurrency int `json:"concurrency"` // Worker pool size (default: 8, lower than LLM-Map)
|
|
26
|
+
MaxRetries int `json:"max_retries"` // Per-item retry limit (default: 2)
|
|
27
|
+
Model string `json:"model"` // Model for sub-agents (default: engine model)
|
|
28
|
+
ReadOnly bool `json:"read_only"` // If true, sub-agents cannot modify filesystem
|
|
29
|
+
MaxDepth int `json:"max_depth"` // Max recursion depth for sub-agents (default: 3)
|
|
30
|
+
MaxIter int `json:"max_iterations"` // Max iterations per sub-agent (default: 15)
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// AgenticMapResult contains results of an Agentic-Map operation.
|
|
34
|
+
type AgenticMapResult struct {
|
|
35
|
+
TotalItems int `json:"total_items"`
|
|
36
|
+
Completed int `json:"completed"`
|
|
37
|
+
Failed int `json:"failed"`
|
|
38
|
+
OutputPath string `json:"output_path"`
|
|
39
|
+
Duration time.Duration `json:"duration"`
|
|
40
|
+
TokensUsed int `json:"tokens_used"`
|
|
41
|
+
ItemResults []AgenticItemResult `json:"item_results,omitempty"`
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// AgenticItemResult tracks the status of a single agentic-map item.
|
|
45
|
+
type AgenticItemResult struct {
|
|
46
|
+
Index int `json:"index"`
|
|
47
|
+
Status MapItemStatus `json:"status"`
|
|
48
|
+
Output json.RawMessage `json:"output,omitempty"`
|
|
49
|
+
Error string `json:"error,omitempty"`
|
|
50
|
+
Retries int `json:"retries"`
|
|
51
|
+
LLMCalls int `json:"llm_calls"`
|
|
52
|
+
Iterations int `json:"iterations"`
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// AgenticMapper executes Agentic-Map operations using full sub-agent sessions.
|
|
56
|
+
type AgenticMapper struct {
|
|
57
|
+
model string
|
|
58
|
+
apiBase string
|
|
59
|
+
apiKey string
|
|
60
|
+
timeout int
|
|
61
|
+
extraParams map[string]interface{}
|
|
62
|
+
observer *Observer
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// NewAgenticMapper creates a new Agentic-Map executor.
|
|
66
|
+
func NewAgenticMapper(model, apiBase, apiKey string, timeout int, extraParams map[string]interface{}, observer *Observer) *AgenticMapper {
|
|
67
|
+
return &AgenticMapper{
|
|
68
|
+
model: model,
|
|
69
|
+
apiBase: apiBase,
|
|
70
|
+
apiKey: apiKey,
|
|
71
|
+
timeout: timeout,
|
|
72
|
+
extraParams: extraParams,
|
|
73
|
+
observer: observer,
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Execute runs an Agentic-Map operation: parallel sub-agent sessions over JSONL input.
|
|
78
|
+
func (am *AgenticMapper) Execute(config AgenticMapConfig) (*AgenticMapResult, error) {
|
|
79
|
+
start := time.Now()
|
|
80
|
+
|
|
81
|
+
// Apply defaults
|
|
82
|
+
if config.Concurrency <= 0 {
|
|
83
|
+
config.Concurrency = 8 // Lower default than LLM-Map due to heavier per-item cost
|
|
84
|
+
}
|
|
85
|
+
if config.MaxRetries <= 0 {
|
|
86
|
+
config.MaxRetries = 2
|
|
87
|
+
}
|
|
88
|
+
if config.MaxDepth <= 0 {
|
|
89
|
+
config.MaxDepth = 3
|
|
90
|
+
}
|
|
91
|
+
if config.MaxIter <= 0 {
|
|
92
|
+
config.MaxIter = 15
|
|
93
|
+
}
|
|
94
|
+
model := config.Model
|
|
95
|
+
if model == "" {
|
|
96
|
+
model = am.model
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
am.observer.Debug("lcm.agentic_map", "Starting Agentic-Map: input=%s, concurrency=%d, model=%s, read_only=%v",
|
|
100
|
+
config.InputPath, config.Concurrency, model, config.ReadOnly)
|
|
101
|
+
|
|
102
|
+
// Read input items
|
|
103
|
+
items, err := readJSONLFile(config.InputPath)
|
|
104
|
+
if err != nil {
|
|
105
|
+
return nil, fmt.Errorf("failed to read input: %w", err)
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
am.observer.Debug("lcm.agentic_map", "Read %d items from %s", len(items), config.InputPath)
|
|
109
|
+
|
|
110
|
+
// Initialize results
|
|
111
|
+
results := make([]AgenticItemResult, len(items))
|
|
112
|
+
for i := range results {
|
|
113
|
+
results[i] = AgenticItemResult{
|
|
114
|
+
Index: i,
|
|
115
|
+
Status: MapItemPending,
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Worker pool
|
|
120
|
+
var wg sync.WaitGroup
|
|
121
|
+
itemChan := make(chan int, len(items))
|
|
122
|
+
var totalTokens int64
|
|
123
|
+
|
|
124
|
+
for i := range items {
|
|
125
|
+
itemChan <- i
|
|
126
|
+
}
|
|
127
|
+
close(itemChan)
|
|
128
|
+
|
|
129
|
+
var mu sync.Mutex
|
|
130
|
+
for w := 0; w < config.Concurrency && w < len(items); w++ {
|
|
131
|
+
wg.Add(1)
|
|
132
|
+
go func() {
|
|
133
|
+
defer wg.Done()
|
|
134
|
+
for idx := range itemChan {
|
|
135
|
+
result := am.processAgenticItem(items[idx], config, model)
|
|
136
|
+
atomic.AddInt64(&totalTokens, int64(result.tokensUsed))
|
|
137
|
+
|
|
138
|
+
mu.Lock()
|
|
139
|
+
results[idx] = AgenticItemResult{
|
|
140
|
+
Index: idx,
|
|
141
|
+
Status: result.status,
|
|
142
|
+
Output: result.output,
|
|
143
|
+
Error: result.errMsg,
|
|
144
|
+
Retries: result.retries,
|
|
145
|
+
LLMCalls: result.llmCalls,
|
|
146
|
+
Iterations: result.iterations,
|
|
147
|
+
}
|
|
148
|
+
mu.Unlock()
|
|
149
|
+
}
|
|
150
|
+
}()
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
wg.Wait()
|
|
154
|
+
|
|
155
|
+
// Write output file
|
|
156
|
+
if config.OutputPath != "" {
|
|
157
|
+
if err := writeAgenticOutput(config.OutputPath, results); err != nil {
|
|
158
|
+
return nil, fmt.Errorf("failed to write output: %w", err)
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
completed, failed := 0, 0
|
|
163
|
+
for _, r := range results {
|
|
164
|
+
switch r.Status {
|
|
165
|
+
case MapItemCompleted:
|
|
166
|
+
completed++
|
|
167
|
+
case MapItemFailed:
|
|
168
|
+
failed++
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
duration := time.Since(start)
|
|
173
|
+
am.observer.Debug("lcm.agentic_map", "Agentic-Map complete: %d/%d succeeded, %d failed in %s",
|
|
174
|
+
completed, len(items), failed, duration)
|
|
175
|
+
am.observer.Event("lcm.agentic_map_complete", map[string]string{
|
|
176
|
+
"total_items": fmt.Sprintf("%d", len(items)),
|
|
177
|
+
"completed": fmt.Sprintf("%d", completed),
|
|
178
|
+
"failed": fmt.Sprintf("%d", failed),
|
|
179
|
+
"duration_ms": fmt.Sprintf("%d", duration.Milliseconds()),
|
|
180
|
+
"tokens_used": fmt.Sprintf("%d", totalTokens),
|
|
181
|
+
})
|
|
182
|
+
|
|
183
|
+
return &AgenticMapResult{
|
|
184
|
+
TotalItems: len(items),
|
|
185
|
+
Completed: completed,
|
|
186
|
+
Failed: failed,
|
|
187
|
+
OutputPath: config.OutputPath,
|
|
188
|
+
Duration: duration,
|
|
189
|
+
TokensUsed: int(totalTokens),
|
|
190
|
+
ItemResults: results,
|
|
191
|
+
}, nil
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// ─── Per-Item Sub-Agent Processing ──────────────────────────────────────────
|
|
195
|
+
|
|
196
|
+
type agenticItemResult struct {
|
|
197
|
+
status MapItemStatus
|
|
198
|
+
output json.RawMessage
|
|
199
|
+
errMsg string
|
|
200
|
+
retries int
|
|
201
|
+
tokensUsed int
|
|
202
|
+
llmCalls int
|
|
203
|
+
iterations int
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
func (am *AgenticMapper) processAgenticItem(item json.RawMessage, config AgenticMapConfig, model string) agenticItemResult {
|
|
207
|
+
prompt := strings.ReplaceAll(config.Prompt, "{{item}}", string(item))
|
|
208
|
+
|
|
209
|
+
var lastErr string
|
|
210
|
+
for attempt := 0; attempt <= config.MaxRetries; attempt++ {
|
|
211
|
+
currentPrompt := prompt
|
|
212
|
+
if attempt > 0 && lastErr != "" {
|
|
213
|
+
currentPrompt = fmt.Sprintf("%s\n\nPrevious attempt failed: %s\nPlease fix the output.", prompt, lastErr)
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// Spawn a full sub-agent (RLM instance) for this item
|
|
217
|
+
subConfig := Config{
|
|
218
|
+
RecursiveModel: model,
|
|
219
|
+
APIBase: am.apiBase,
|
|
220
|
+
APIKey: am.apiKey,
|
|
221
|
+
MaxDepth: config.MaxDepth,
|
|
222
|
+
MaxIterations: config.MaxIter,
|
|
223
|
+
TimeoutSeconds: am.timeout,
|
|
224
|
+
ExtraParams: am.extraParams,
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
subRLM := New(model, subConfig)
|
|
228
|
+
subRLM.currentDepth = 1 // Sub-agents start at depth 1
|
|
229
|
+
subRLM.observer = am.observer
|
|
230
|
+
|
|
231
|
+
// Build context with schema instructions if provided
|
|
232
|
+
context := ""
|
|
233
|
+
if config.OutputSchema != nil {
|
|
234
|
+
schemaJSON, _ := json.MarshalIndent(config.OutputSchema, "", " ")
|
|
235
|
+
context = fmt.Sprintf("You must output valid JSON matching this schema:\n%s\n\nRespond with ONLY the JSON output.", string(schemaJSON))
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
result, stats, err := subRLM.Completion(currentPrompt, context)
|
|
239
|
+
subRLM.Shutdown()
|
|
240
|
+
|
|
241
|
+
tokensUsed := stats.TotalTokens
|
|
242
|
+
|
|
243
|
+
if err != nil {
|
|
244
|
+
lastErr = err.Error()
|
|
245
|
+
continue
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Extract JSON from the sub-agent's output
|
|
249
|
+
output := extractJSON(result)
|
|
250
|
+
if output == nil {
|
|
251
|
+
// Try wrapping the raw result as a string value
|
|
252
|
+
wrapped, _ := json.Marshal(result)
|
|
253
|
+
output = wrapped
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// Validate against schema if provided
|
|
257
|
+
if config.OutputSchema != nil && output != nil {
|
|
258
|
+
if validationErr := validateMapOutput(output, config.OutputSchema); validationErr != "" {
|
|
259
|
+
lastErr = validationErr
|
|
260
|
+
continue
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
return agenticItemResult{
|
|
265
|
+
status: MapItemCompleted,
|
|
266
|
+
output: output,
|
|
267
|
+
retries: attempt,
|
|
268
|
+
tokensUsed: tokensUsed,
|
|
269
|
+
llmCalls: stats.LlmCalls,
|
|
270
|
+
iterations: stats.Iterations,
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
return agenticItemResult{
|
|
275
|
+
status: MapItemFailed,
|
|
276
|
+
errMsg: lastErr,
|
|
277
|
+
retries: config.MaxRetries,
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// ─── Output Writing ─────────────────────────────────────────────────────────
|
|
282
|
+
|
|
283
|
+
func writeAgenticOutput(path string, results []AgenticItemResult) error {
|
|
284
|
+
f, err := os.Create(path)
|
|
285
|
+
if err != nil {
|
|
286
|
+
return err
|
|
287
|
+
}
|
|
288
|
+
defer func() { _ = f.Close() }()
|
|
289
|
+
|
|
290
|
+
w := bufio.NewWriter(f)
|
|
291
|
+
for _, r := range results {
|
|
292
|
+
if r.Status == MapItemCompleted && r.Output != nil {
|
|
293
|
+
if _, writeErr := w.Write(r.Output); writeErr != nil {
|
|
294
|
+
return writeErr
|
|
295
|
+
}
|
|
296
|
+
if _, writeErr := w.WriteString("\n"); writeErr != nil {
|
|
297
|
+
return writeErr
|
|
298
|
+
}
|
|
299
|
+
} else {
|
|
300
|
+
errRecord := map[string]interface{}{
|
|
301
|
+
"_error": r.Error,
|
|
302
|
+
"_index": r.Index,
|
|
303
|
+
"_llm_calls": r.LLMCalls,
|
|
304
|
+
"_iterations": r.Iterations,
|
|
305
|
+
}
|
|
306
|
+
data, _ := json.Marshal(errRecord)
|
|
307
|
+
if _, writeErr := w.Write(data); writeErr != nil {
|
|
308
|
+
return writeErr
|
|
309
|
+
}
|
|
310
|
+
if _, writeErr := w.WriteString("\n"); writeErr != nil {
|
|
311
|
+
return writeErr
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
return w.Flush()
|
|
317
|
+
}
|
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
package rlm
|
|
2
|
+
|
|
3
|
+
import (
|
|
4
|
+
"fmt"
|
|
5
|
+
"sync"
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
// ─── LCM Context Control Loop ───────────────────────────────────────────────
|
|
9
|
+
// Implements the dual-threshold context management from the LCM paper:
|
|
10
|
+
// - Below τ_soft: no overhead (zero-cost continuity)
|
|
11
|
+
// - τ_soft ≤ tokens < τ_hard: async compaction between turns
|
|
12
|
+
// - tokens ≥ τ_hard: blocking compaction before next LLM call
|
|
13
|
+
|
|
14
|
+
// LCMConfig configures the Lossless Context Management engine.
|
|
15
|
+
type LCMConfig struct {
|
|
16
|
+
// Enabled activates LCM context management (default: false for backward compat)
|
|
17
|
+
Enabled bool `json:"enabled"`
|
|
18
|
+
|
|
19
|
+
// SoftThreshold is τ_soft: token count above which async compaction begins.
|
|
20
|
+
// Default: 70% of model limit.
|
|
21
|
+
SoftThreshold int `json:"soft_threshold,omitempty"`
|
|
22
|
+
|
|
23
|
+
// HardThreshold is τ_hard: token count above which blocking compaction occurs.
|
|
24
|
+
// Default: 90% of model limit.
|
|
25
|
+
HardThreshold int `json:"hard_threshold,omitempty"`
|
|
26
|
+
|
|
27
|
+
// CompactionBlockSize is how many messages to compact at once.
|
|
28
|
+
// Default: 10 messages.
|
|
29
|
+
CompactionBlockSize int `json:"compaction_block_size,omitempty"`
|
|
30
|
+
|
|
31
|
+
// SummaryTargetTokens is the target size for each summary node.
|
|
32
|
+
// Default: 500 tokens.
|
|
33
|
+
SummaryTargetTokens int `json:"summary_target_tokens,omitempty"`
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// DefaultLCMConfig returns default LCM configuration.
|
|
37
|
+
func DefaultLCMConfig() LCMConfig {
|
|
38
|
+
return LCMConfig{
|
|
39
|
+
Enabled: false,
|
|
40
|
+
CompactionBlockSize: 10,
|
|
41
|
+
SummaryTargetTokens: 500,
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// LCMEngine is the main LCM context management engine.
|
|
46
|
+
// It wraps the store, summarizer, and context control loop.
|
|
47
|
+
type LCMEngine struct {
|
|
48
|
+
config LCMConfig
|
|
49
|
+
store *LCMStore
|
|
50
|
+
summarizer *LCMSummarizer
|
|
51
|
+
observer *Observer
|
|
52
|
+
modelLimit int
|
|
53
|
+
|
|
54
|
+
// Async compaction state
|
|
55
|
+
compactMu sync.Mutex
|
|
56
|
+
compacting bool
|
|
57
|
+
compactResult chan *compactionResult
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
type compactionResult struct {
|
|
61
|
+
summary *SummaryNode
|
|
62
|
+
err error
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// NewLCMEngine creates a new LCM engine with the given configuration.
|
|
66
|
+
func NewLCMEngine(config LCMConfig, store *LCMStore, summarizer *LCMSummarizer, observer *Observer, modelLimit int) *LCMEngine {
|
|
67
|
+
// Apply defaults based on model limit
|
|
68
|
+
if config.SoftThreshold == 0 && modelLimit > 0 {
|
|
69
|
+
config.SoftThreshold = int(float64(modelLimit) * 0.70)
|
|
70
|
+
}
|
|
71
|
+
if config.HardThreshold == 0 && modelLimit > 0 {
|
|
72
|
+
config.HardThreshold = int(float64(modelLimit) * 0.90)
|
|
73
|
+
}
|
|
74
|
+
if config.CompactionBlockSize == 0 {
|
|
75
|
+
config.CompactionBlockSize = 10
|
|
76
|
+
}
|
|
77
|
+
if config.SummaryTargetTokens == 0 {
|
|
78
|
+
config.SummaryTargetTokens = 500
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return &LCMEngine{
|
|
82
|
+
config: config,
|
|
83
|
+
store: store,
|
|
84
|
+
summarizer: summarizer,
|
|
85
|
+
observer: observer,
|
|
86
|
+
modelLimit: modelLimit,
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// ─── Context Control Loop (Algorithm 2 from paper) ──────────────────────────
|
|
91
|
+
|
|
92
|
+
// OnNewItem is called after each new message is added to the store.
|
|
93
|
+
// It implements the context control loop from Figure 2 of the LCM paper.
|
|
94
|
+
// Returns nil if no compaction was needed or if async compaction was triggered.
|
|
95
|
+
func (e *LCMEngine) OnNewItem() error {
|
|
96
|
+
if !e.config.Enabled {
|
|
97
|
+
return nil
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Check if async compaction has completed
|
|
101
|
+
e.applyPendingCompaction()
|
|
102
|
+
|
|
103
|
+
tokens := e.store.ActiveContextTokens()
|
|
104
|
+
|
|
105
|
+
// Below soft threshold: zero-cost continuity
|
|
106
|
+
if tokens <= e.config.SoftThreshold {
|
|
107
|
+
return nil
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Soft threshold exceeded: trigger async compaction (non-blocking)
|
|
111
|
+
if tokens < e.config.HardThreshold {
|
|
112
|
+
e.observer.Debug("lcm.control", "Soft threshold exceeded (%d > %d), triggering async compaction",
|
|
113
|
+
tokens, e.config.SoftThreshold)
|
|
114
|
+
e.triggerAsyncCompaction()
|
|
115
|
+
return nil
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Hard threshold exceeded: blocking compaction
|
|
119
|
+
e.observer.Debug("lcm.control", "Hard threshold exceeded (%d >= %d), blocking compaction",
|
|
120
|
+
tokens, e.config.HardThreshold)
|
|
121
|
+
return e.blockingCompaction()
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// ─── Async Compaction ───────────────────────────────────────────────────────
|
|
125
|
+
|
|
126
|
+
func (e *LCMEngine) triggerAsyncCompaction() {
|
|
127
|
+
e.compactMu.Lock()
|
|
128
|
+
if e.compacting {
|
|
129
|
+
e.compactMu.Unlock()
|
|
130
|
+
return // Already compacting
|
|
131
|
+
}
|
|
132
|
+
e.compacting = true
|
|
133
|
+
e.compactResult = make(chan *compactionResult, 1)
|
|
134
|
+
e.compactMu.Unlock()
|
|
135
|
+
|
|
136
|
+
go func() {
|
|
137
|
+
result := e.performCompaction()
|
|
138
|
+
e.compactResult <- result
|
|
139
|
+
}()
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
func (e *LCMEngine) applyPendingCompaction() {
|
|
143
|
+
e.compactMu.Lock()
|
|
144
|
+
if !e.compacting || e.compactResult == nil {
|
|
145
|
+
e.compactMu.Unlock()
|
|
146
|
+
return
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Non-blocking check
|
|
150
|
+
select {
|
|
151
|
+
case result := <-e.compactResult:
|
|
152
|
+
e.compacting = false
|
|
153
|
+
e.compactMu.Unlock()
|
|
154
|
+
|
|
155
|
+
if result.err != nil {
|
|
156
|
+
e.observer.Error("lcm.control", "Async compaction failed: %v", result.err)
|
|
157
|
+
return
|
|
158
|
+
}
|
|
159
|
+
if result.summary != nil {
|
|
160
|
+
removed := e.store.CompactOldestBlock(result.summary)
|
|
161
|
+
e.observer.Debug("lcm.control", "Async compaction applied: replaced %d messages with summary %s",
|
|
162
|
+
len(removed), result.summary.ID)
|
|
163
|
+
e.observer.Event("lcm.compaction", map[string]string{
|
|
164
|
+
"type": "async",
|
|
165
|
+
"summary_id": result.summary.ID,
|
|
166
|
+
"messages_compacted": fmt.Sprintf("%d", len(removed)),
|
|
167
|
+
"summary_tokens": fmt.Sprintf("%d", result.summary.Tokens),
|
|
168
|
+
"level": fmt.Sprintf("%d", result.summary.Level),
|
|
169
|
+
})
|
|
170
|
+
}
|
|
171
|
+
default:
|
|
172
|
+
e.compactMu.Unlock()
|
|
173
|
+
// Not done yet, continue
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// ─── Blocking Compaction ────────────────────────────────────────────────────
|
|
178
|
+
|
|
179
|
+
func (e *LCMEngine) blockingCompaction() error {
|
|
180
|
+
// Keep compacting until under hard threshold
|
|
181
|
+
for e.store.ActiveContextTokens() >= e.config.HardThreshold {
|
|
182
|
+
result := e.performCompaction()
|
|
183
|
+
if result.err != nil {
|
|
184
|
+
return fmt.Errorf("blocking compaction failed: %w", result.err)
|
|
185
|
+
}
|
|
186
|
+
if result.summary == nil {
|
|
187
|
+
break // Nothing more to compact
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
removed := e.store.CompactOldestBlock(result.summary)
|
|
191
|
+
e.observer.Debug("lcm.control", "Blocking compaction: replaced %d messages with summary %s (%d tokens)",
|
|
192
|
+
len(removed), result.summary.ID, result.summary.Tokens)
|
|
193
|
+
e.observer.Event("lcm.compaction", map[string]string{
|
|
194
|
+
"type": "blocking",
|
|
195
|
+
"summary_id": result.summary.ID,
|
|
196
|
+
"messages_compacted": fmt.Sprintf("%d", len(removed)),
|
|
197
|
+
"summary_tokens": fmt.Sprintf("%d", result.summary.Tokens),
|
|
198
|
+
"level": fmt.Sprintf("%d", result.summary.Level),
|
|
199
|
+
})
|
|
200
|
+
}
|
|
201
|
+
return nil
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// ─── Core Compaction ────────────────────────────────────────────────────────
|
|
205
|
+
|
|
206
|
+
func (e *LCMEngine) performCompaction() *compactionResult {
|
|
207
|
+
active := e.store.GetActiveContext()
|
|
208
|
+
|
|
209
|
+
// Find the oldest block of raw messages to compact (skip system prompt)
|
|
210
|
+
var block []*StoreMessage
|
|
211
|
+
for _, item := range active {
|
|
212
|
+
if item.IsMessage() {
|
|
213
|
+
if item.Message.Role == RoleSystem {
|
|
214
|
+
continue // Never compact system prompt
|
|
215
|
+
}
|
|
216
|
+
block = append(block, item.Message)
|
|
217
|
+
if len(block) >= e.config.CompactionBlockSize {
|
|
218
|
+
break
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
if len(block) == 0 {
|
|
224
|
+
return &compactionResult{summary: nil, err: nil}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// Apply three-level escalation
|
|
228
|
+
result, err := e.summarizer.SummarizeMessages(block, e.config.SummaryTargetTokens)
|
|
229
|
+
if err != nil {
|
|
230
|
+
return &compactionResult{err: err}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Create summary node in the DAG
|
|
234
|
+
var msgIDs []string
|
|
235
|
+
for _, msg := range block {
|
|
236
|
+
msgIDs = append(msgIDs, msg.ID)
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
summary := e.store.CreateLeafSummary(msgIDs, result.Content, result.Level)
|
|
240
|
+
|
|
241
|
+
return &compactionResult{summary: summary}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// ─── Condensed Summaries (DAG depth > 1) ────────────────────────────────────
|
|
245
|
+
|
|
246
|
+
// CondenseOldSummaries finds summary nodes in the active context and merges them
|
|
247
|
+
// into a higher-order condensed summary. This creates DAG depth > 1.
|
|
248
|
+
func (e *LCMEngine) CondenseOldSummaries() error {
|
|
249
|
+
active := e.store.GetActiveContext()
|
|
250
|
+
|
|
251
|
+
// Collect summary items
|
|
252
|
+
var summaryItems []*ActiveContextItem
|
|
253
|
+
for _, item := range active {
|
|
254
|
+
if !item.IsMessage() && item.Summary != nil {
|
|
255
|
+
summaryItems = append(summaryItems, item)
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// Need at least 2 summaries to condense
|
|
260
|
+
if len(summaryItems) < 2 {
|
|
261
|
+
return nil
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// Condense the oldest summaries
|
|
265
|
+
condenseCount := len(summaryItems)
|
|
266
|
+
if condenseCount > e.config.CompactionBlockSize {
|
|
267
|
+
condenseCount = e.config.CompactionBlockSize
|
|
268
|
+
}
|
|
269
|
+
toCondense := summaryItems[:condenseCount]
|
|
270
|
+
|
|
271
|
+
// Build combined content for re-summarization
|
|
272
|
+
var combined string
|
|
273
|
+
var childIDs []string
|
|
274
|
+
for _, item := range toCondense {
|
|
275
|
+
combined += item.Summary.Content + "\n\n"
|
|
276
|
+
childIDs = append(childIDs, item.Summary.ID)
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// Summarize the combined summaries
|
|
280
|
+
result, err := e.summarizer.Summarize(combined, e.config.SummaryTargetTokens)
|
|
281
|
+
if err != nil {
|
|
282
|
+
return fmt.Errorf("condensation failed: %w", err)
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// Create condensed summary node
|
|
286
|
+
condensed := e.store.CreateCondensedSummary(childIDs, result.Content, result.Level)
|
|
287
|
+
|
|
288
|
+
e.observer.Debug("lcm.control", "Condensed %d summaries into %s (%d tokens)",
|
|
289
|
+
len(childIDs), condensed.ID, condensed.Tokens)
|
|
290
|
+
|
|
291
|
+
return nil
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// ─── Query Helpers ──────────────────────────────────────────────────────────
|
|
295
|
+
|
|
296
|
+
// GetStore returns the underlying LCM store.
|
|
297
|
+
func (e *LCMEngine) GetStore() *LCMStore {
|
|
298
|
+
return e.store
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
// GetConfig returns the LCM configuration.
|
|
302
|
+
func (e *LCMEngine) GetConfig() LCMConfig {
|
|
303
|
+
return e.config
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
// IsEnabled returns whether LCM is active.
|
|
307
|
+
func (e *LCMEngine) IsEnabled() bool {
|
|
308
|
+
return e.config.Enabled
|
|
309
|
+
}
|