npm - recursive-llm-ts - Versions diffs - 4.7.0 → 4.9.0 - Mend

recursive-llm-ts 4.7.0 → 4.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/README.md +2 -2
package/bin/rlm-go +0 -0
package/dist/bridge-interface.d.ts +3 -0
package/dist/rlm.js +10 -0
package/go/README.md +2 -2
package/go/cmd/rlm/main.go +1 -1
package/go/go.mod +1 -1
package/go/rlm/context_overflow.go +181 -29
package/go/rlm/context_overflow_test.go +373 -3
package/go/rlm/doc.go +2 -2
package/go/rlm/meta_agent.go +18 -2
package/go/rlm/observability.go +6 -0
package/go/rlm/openai.go +27 -10
package/go/rlm/rlm.go +86 -3
package/go/rlm/structured.go +23 -0
package/go/rlm/token_tracking_test.go +845 -0
package/go/rlm/types.go +7 -4
package/package.json +4 -4

package/go/rlm/structured.go CHANGED Viewed

@@ -46,6 +46,20 @@ func (r *RLM) StructuredCompletion(query string, context string, config *Structu
 	subTasks := decomposeSchema(config.Schema)
 	r.observer.Debug("structured", "Schema decomposed into %d subtasks", len(subTasks))
+	// Pre-emptive overflow check: reduce context BEFORE building the prompt.
+	// Structured completion embeds the full context in the user message, so this is
+	// critical to prevent overflow on the first LLM call (following the RLM paper's
+	// principle: "the context window of the root LM is rarely clogged").
+	schemaJSON, _ := json.Marshal(config.Schema)
+	schemaOverhead := EstimateTokens(string(schemaJSON)) + structuredPromptOverhead
+	reducedCtx, wasReduced, reduceErr := r.PreemptiveReduceContext(query, context, schemaOverhead)
+	if reduceErr != nil {
+		r.observer.Error("structured", "Pre-emptive reduction failed: %v (proceeding with original context)", reduceErr)
+	} else if wasReduced {
+		r.observer.Debug("structured", "Pre-emptive reduction applied: %d -> %d chars", len(context), len(reducedCtx))
+		context = reducedCtx
+	}
 	// If simple schema or parallel disabled, use direct method
 	if len(subTasks) <= 2 || !config.ParallelExecution {
 		r.observer.Debug("structured", "Using direct completion method")
@@ -182,9 +196,15 @@ func (r *RLM) structuredCompletionDirect(query string, context string, config *S
 		}
 		stats.ParsingRetries = attempt
+		stats.TotalTokens = r.stats.TotalTokens
+		stats.PromptTokens = r.stats.PromptTokens
+		stats.CompletionTokens = r.stats.CompletionTokens
 		return parsed, stats, nil
 	}
+	stats.TotalTokens = r.stats.TotalTokens
+	stats.PromptTokens = r.stats.PromptTokens
+	stats.CompletionTokens = r.stats.CompletionTokens
 	return nil, stats, fmt.Errorf("failed to get valid structured output after %d attempts: %v", config.MaxRetries, lastErr)
 }
@@ -251,6 +271,9 @@ func (r *RLM) structuredCompletionParallel(query string, context string, config
 				totalStats.Depth = stats.Depth
 			}
 			totalStats.ParsingRetries += stats.ParsingRetries
+			totalStats.TotalTokens += stats.TotalTokens
+			totalStats.PromptTokens += stats.PromptTokens
+			totalStats.CompletionTokens += stats.CompletionTokens
 			statsMutex.Unlock()
 		}(i, task)
 	}