npm - recursive-llm-ts - Versions diffs - 4.7.0 → 4.9.0 - Mend

recursive-llm-ts 4.7.0 → 4.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/README.md +2 -2
package/bin/rlm-go +0 -0
package/dist/bridge-interface.d.ts +3 -0
package/dist/rlm.js +10 -0
package/go/README.md +2 -2
package/go/cmd/rlm/main.go +1 -1
package/go/go.mod +1 -1
package/go/rlm/context_overflow.go +181 -29
package/go/rlm/context_overflow_test.go +373 -3
package/go/rlm/doc.go +2 -2
package/go/rlm/meta_agent.go +18 -2
package/go/rlm/observability.go +6 -0
package/go/rlm/openai.go +27 -10
package/go/rlm/rlm.go +86 -3
package/go/rlm/structured.go +23 -0
package/go/rlm/token_tracking_test.go +845 -0
package/go/rlm/types.go +7 -4
package/package.json +4 -4

package/README.md CHANGED Viewed

@@ -1120,7 +1120,7 @@ The Go implementation can be used as a standalone library in Go projects.
 ### Installation
 ```bash
-go get github.com/jbeck018/recursive-llm-ts/go
+go get github.com/howlerops/recursive-llm-ts/go
 ```
 ### Usage
@@ -1132,7 +1132,7 @@ import (
     "fmt"
     "os"
-    "github.com/jbeck018/recursive-llm-ts/go/rlm"
+    "github.com/howlerops/recursive-llm-ts/go/rlm"
 )
 func main() {

package/bin/rlm-go CHANGED Viewed

Binary file

package/dist/bridge-interface.d.ts CHANGED Viewed

@@ -3,6 +3,9 @@ export interface RLMStats {
     iterations: number;
     depth: number;
     parsing_retries?: number;
+    total_tokens?: number;
+    prompt_tokens?: number;
+    completion_tokens?: number;
 }
 export interface RLMResult {
     result: string;

package/dist/rlm.js CHANGED Viewed

@@ -44,6 +44,7 @@ class RLMResultFormatter {
     }
     /** Format stats as a concise one-liner */
     prettyStats() {
+        var _a, _b;
         const parts = [
             `LLM Calls: ${this.stats.llm_calls}`,
             `Iterations: ${this.stats.iterations}`,
@@ -52,6 +53,9 @@ class RLMResultFormatter {
         if (this.stats.parsing_retries) {
             parts.push(`Retries: ${this.stats.parsing_retries}`);
         }
+        if (this.stats.total_tokens) {
+            parts.push(`Tokens: ${this.stats.total_tokens} (prompt: ${(_a = this.stats.prompt_tokens) !== null && _a !== void 0 ? _a : 0}, completion: ${(_b = this.stats.completion_tokens) !== null && _b !== void 0 ? _b : 0})`);
+        }
         if (this.cached) {
             parts.push('(cached)');
         }
@@ -69,6 +73,7 @@ class RLMResultFormatter {
     }
     /** Format as Markdown */
     toMarkdown() {
+        var _a, _b;
         const lines = [
             '## Result',
             '',
@@ -85,6 +90,11 @@ class RLMResultFormatter {
         if (this.stats.parsing_retries) {
             lines.push(`| Parsing Retries | ${this.stats.parsing_retries} |`);
         }
+        if (this.stats.total_tokens) {
+            lines.push(`| Total Tokens | ${this.stats.total_tokens} |`);
+            lines.push(`| Prompt Tokens | ${(_a = this.stats.prompt_tokens) !== null && _a !== void 0 ? _a : 0} |`);
+            lines.push(`| Completion Tokens | ${(_b = this.stats.completion_tokens) !== null && _b !== void 0 ? _b : 0} |`);
+        }
         lines.push(`| Cached | ${this.cached} |`);
         lines.push(`| Model | ${this.model} |`);
         return lines.join('\n');

package/go/README.md CHANGED Viewed

@@ -13,7 +13,7 @@ This is both a standalone Go library and CLI binary that implements the RLM algo
 ### As a Go Library
 ```bash
-go get github.com/jbeck018/recursive-llm-ts/go
+go get github.com/howlerops/recursive-llm-ts/go
 ```
 ### Usage as Library
@@ -25,7 +25,7 @@ import (
     "fmt"
     "os"
-    "github.com/jbeck018/recursive-llm-ts/go/rlm"
+    "github.com/howlerops/recursive-llm-ts/go/rlm"
 )
 func main() {

package/go/cmd/rlm/main.go CHANGED Viewed

@@ -6,7 +6,7 @@ import (
 	"io"
 	"os"
-	"github.com/jbeck018/recursive-llm-ts/go/rlm"
+	"github.com/howlerops/recursive-llm-ts/go/rlm"
 )
 type requestPayload struct {

package/go/go.mod CHANGED Viewed

@@ -1,4 +1,4 @@
-module github.com/jbeck018/recursive-llm-ts/go
+module github.com/howlerops/recursive-llm-ts/go
 go 1.25.0

package/go/rlm/context_overflow.go CHANGED Viewed

@@ -30,6 +30,155 @@ func DefaultContextOverflowConfig() ContextOverflowConfig {
 	}
 }
+// ─── Model Token Limits ──────────────────────────────────────────────────────
+// modelTokenLimits maps known model name patterns to their maximum context window sizes.
+// Used for pre-emptive overflow detection so we don't need to wait for API errors.
+var modelTokenLimits = map[string]int{
+	// OpenAI
+	"gpt-4o":            128000,
+	"gpt-4o-mini":       128000,
+	"gpt-4-turbo":       128000,
+	"gpt-4":             8192,
+	"gpt-4-32k":         32768,
+	"gpt-3.5-turbo":     16385,
+	"gpt-3.5-turbo-16k": 16385,
+	"o1":                200000,
+	"o1-mini":           128000,
+	"o1-preview":        128000,
+	"o3-mini":           200000,
+	// Anthropic (via LiteLLM/proxy)
+	"claude-3-opus":       200000,
+	"claude-3-sonnet":     200000,
+	"claude-3-haiku":      200000,
+	"claude-3.5-sonnet":   200000,
+	"claude-3.5-haiku":    200000,
+	"claude-sonnet-4":     200000,
+	"claude-opus-4":       200000,
+	// Llama (common vLLM deployments)
+	"llama-3":     8192,
+	"llama-3.1":   128000,
+	"llama-3.2":   128000,
+	"llama-3.3":   128000,
+	// Mistral
+	"mistral-7b":    32768,
+	"mixtral-8x7b":  32768,
+	"mistral-large": 128000,
+	"mistral-small": 128000,
+	// Qwen
+	"qwen-2":   32768,
+	"qwen-2.5": 128000,
+}
+// LookupModelTokenLimit returns the known token limit for a model, or 0 if unknown.
+// Matches by prefix so "gpt-4o-mini-2024-07-18" matches "gpt-4o-mini".
+func LookupModelTokenLimit(model string) int {
+	lowerModel := strings.ToLower(model)
+	// Try exact match first
+	if limit, ok := modelTokenLimits[lowerModel]; ok {
+		return limit
+	}
+	// Try prefix matching (longest prefix wins)
+	bestMatch := ""
+	bestLimit := 0
+	for pattern, limit := range modelTokenLimits {
+		if strings.HasPrefix(lowerModel, pattern) && len(pattern) > len(bestMatch) {
+			bestMatch = pattern
+			bestLimit = limit
+		}
+	}
+	return bestLimit
+}
+// getModelTokenLimit returns the effective token limit for pre-emptive overflow checks.
+// Priority: config override > model name lookup > 0 (disabled).
+func (r *RLM) getModelTokenLimit() int {
+	if r.contextOverflow != nil && r.contextOverflow.MaxModelTokens > 0 {
+		return r.contextOverflow.MaxModelTokens
+	}
+	return LookupModelTokenLimit(r.model)
+}
+// ─── Pre-emptive Overflow Check ──────────────────────────────────────────────
+// structuredPromptOverhead is the approximate token overhead for structured completion prompts
+// (instructions, schema constraints, JSON formatting directives).
+const structuredPromptOverhead = 350
+// PreemptiveReduceContext checks if the context would overflow the model's token limit
+// and reduces it proactively BEFORE building the prompt. Returns the (possibly reduced)
+// context, or an error if reduction fails.
+//
+// This is called before the first LLM call, unlike post-hoc overflow recovery which
+// only triggers after an API error. Following the RLM paper's principle that
+// "the context window of the root LM is rarely clogged."
+func (r *RLM) PreemptiveReduceContext(query string, context string, extraOverhead int) (string, bool, error) {
+	modelLimit := r.getModelTokenLimit()
+	if modelLimit == 0 {
+		// No known limit; skip pre-emptive check (will rely on post-hoc recovery)
+		return context, false, nil
+	}
+	if r.contextOverflow == nil || !r.contextOverflow.Enabled {
+		return context, false, nil
+	}
+	// Estimate total token budget needed
+	contextTokens := EstimateTokens(context)
+	queryTokens := EstimateTokens(query)
+	responseTokens := r.getResponseTokenBudget()
+	safetyMargin := r.contextOverflow.SafetyMargin
+	if safetyMargin == 0 {
+		safetyMargin = 0.15
+	}
+	totalEstimate := contextTokens + queryTokens + extraOverhead + responseTokens +
+		int(float64(modelLimit)*safetyMargin)
+	r.observer.Debug("overflow", "Pre-emptive check: context=%d query=%d overhead=%d response=%d safety=%d total=%d limit=%d",
+		contextTokens, queryTokens, extraOverhead, responseTokens,
+		int(float64(modelLimit)*safetyMargin), totalEstimate, modelLimit)
+	if totalEstimate <= modelLimit {
+		return context, false, nil
+	}
+	// Context would overflow — reduce it proactively
+	r.observer.Debug("overflow", "Pre-emptive reduction needed: estimated %d tokens > limit %d", totalEstimate, modelLimit)
+	reducer := newContextReducer(r, *r.contextOverflow, r.observer)
+	reduced, err := reducer.ReduceForCompletion(query, context, modelLimit)
+	if err != nil {
+		return context, false, fmt.Errorf("pre-emptive context reduction failed: %w", err)
+	}
+	r.observer.Debug("overflow", "Pre-emptive reduction: %d -> %d chars", len(context), len(reduced))
+	return reduced, true, nil
+}
+// getResponseTokenBudget extracts max_tokens or max_completion_tokens from ExtraParams.
+func (r *RLM) getResponseTokenBudget() int {
+	if r.extraParams == nil {
+		return 0
+	}
+	for _, key := range []string{"max_completion_tokens", "max_tokens"} {
+		if v, ok := r.extraParams[key]; ok {
+			switch n := v.(type) {
+			case float64:
+				return int(n)
+			case int:
+				return n
+			case int64:
+				return int(n)
+			}
+		}
+	}
+	return 0
+}
 // ─── Token Estimation ────────────────────────────────────────────────────────
 // EstimateTokens provides a fast approximation of token count for a string.
@@ -168,27 +317,9 @@ func newContextReducer(rlm *RLM, config ContextOverflowConfig, obs *Observer) *c
 	return &contextReducer{rlm: rlm, config: config, obs: obs}
 }
-// getResponseTokenBudget extracts max_tokens or max_completion_tokens from ExtraParams.
-// This represents how many tokens the API will reserve for the response, which must be
-// subtracted from the model's total capacity when sizing input chunks.
-func (cr *contextReducer) getResponseTokenBudget(modelLimit int) int {
-	if cr.rlm.extraParams == nil {
-		return 0
-	}
-	// Check max_completion_tokens first (newer API parameter), then max_tokens
-	for _, key := range []string{"max_completion_tokens", "max_tokens"} {
-		if v, ok := cr.rlm.extraParams[key]; ok {
-			switch n := v.(type) {
-			case float64:
-				return int(n)
-			case int:
-				return n
-			case int64:
-				return int(n)
-			}
-		}
-	}
-	return 0
+// getResponseTokenBudget delegates to the RLM engine's method.
+func (cr *contextReducer) getResponseTokenBudget() int {
+	return cr.rlm.getResponseTokenBudget()
 }
 // makeMapPhaseParams creates ExtraParams suitable for map-phase LLM calls (summarization).
@@ -222,7 +353,7 @@ func (cr *contextReducer) ReduceForCompletion(query string, context string, mode
 	// Calculate safe token budget per chunk
 	// Reserve tokens for: system prompt (~500), query, overhead, safety margin, response budget
 	queryTokens := EstimateTokens(query)
-	responseTokens := cr.getResponseTokenBudget(modelLimit)
+	responseTokens := cr.getResponseTokenBudget()
 	overhead := 500 + queryTokens + int(float64(modelLimit)*cr.config.SafetyMargin) + responseTokens
 	safeTokensPerChunk := modelLimit - overhead
@@ -300,8 +431,13 @@ func (cr *contextReducer) reduceByMapReduce(query string, chunks []string, model
 			}
 			cr.rlm.stats.LlmCalls++
-			summaries[idx] = result
-			cr.obs.Debug("overflow", "Chunk %d/%d summarized: %d -> %d chars", idx+1, len(chunks), len(chunkText), len(result))
+			if result.Usage != nil {
+				cr.rlm.stats.PromptTokens += result.Usage.PromptTokens
+				cr.rlm.stats.CompletionTokens += result.Usage.CompletionTokens
+				cr.rlm.stats.TotalTokens += result.Usage.TotalTokens
+			}
+			summaries[idx] = result.Content
+			cr.obs.Debug("overflow", "Chunk %d/%d summarized: %d -> %d chars", idx+1, len(chunks), len(chunkText), len(result.Content))
 		}(i, chunk)
 	}
@@ -405,8 +541,13 @@ func (cr *contextReducer) reduceByChunkedExtraction(query string, chunks []strin
 			}
 			cr.rlm.stats.LlmCalls++
-			if strings.TrimSpace(result) != "NO_RELEVANT_CONTENT" {
-				results[idx] = result
+			if result.Usage != nil {
+				cr.rlm.stats.PromptTokens += result.Usage.PromptTokens
+				cr.rlm.stats.CompletionTokens += result.Usage.CompletionTokens
+				cr.rlm.stats.TotalTokens += result.Usage.TotalTokens
+			}
+			if strings.TrimSpace(result.Content) != "NO_RELEVANT_CONTENT" {
+				results[idx] = result.Content
 			}
 		}(i, chunk)
 	}
@@ -475,7 +616,7 @@ func (cr *contextReducer) reduceByRefine(query string, chunks []string, modelLim
 		{Role: "user", Content: initialPrompt},
 	}
-	currentAnswer, err := CallChatCompletion(ChatRequest{
+	initialResult, err := CallChatCompletion(ChatRequest{
 		Model:       cr.rlm.model,
 		Messages:    messages,
 		APIBase:     cr.rlm.apiBase,
@@ -487,6 +628,12 @@ func (cr *contextReducer) reduceByRefine(query string, chunks []string, modelLim
 		return "", fmt.Errorf("refine initial chunk: %w", err)
 	}
 	cr.rlm.stats.LlmCalls++
+	if initialResult.Usage != nil {
+		cr.rlm.stats.PromptTokens += initialResult.Usage.PromptTokens
+		cr.rlm.stats.CompletionTokens += initialResult.Usage.CompletionTokens
+		cr.rlm.stats.TotalTokens += initialResult.Usage.TotalTokens
+	}
+	currentAnswer := initialResult.Content
 	cr.obs.Debug("overflow", "Refine: initial answer from chunk 1/%d (%d chars)", len(chunks), len(currentAnswer))
 	// Phase 2: Refine the answer with each subsequent chunk
@@ -507,7 +654,7 @@ func (cr *contextReducer) reduceByRefine(query string, chunks []string, modelLim
 			{Role: "user", Content: refinePrompt},
 		}
-		refined, err := CallChatCompletion(ChatRequest{
+		refineResult, err := CallChatCompletion(ChatRequest{
 			Model:       cr.rlm.model,
 			Messages:    messages,
 			APIBase:     cr.rlm.apiBase,
@@ -521,7 +668,12 @@ func (cr *contextReducer) reduceByRefine(query string, chunks []string, modelLim
 			continue
 		}
 		cr.rlm.stats.LlmCalls++
-		currentAnswer = refined
+		if refineResult.Usage != nil {
+			cr.rlm.stats.PromptTokens += refineResult.Usage.PromptTokens
+			cr.rlm.stats.CompletionTokens += refineResult.Usage.CompletionTokens
+			cr.rlm.stats.TotalTokens += refineResult.Usage.TotalTokens
+		}
+		currentAnswer = refineResult.Content
 		cr.obs.Debug("overflow", "Refine: incorporated chunk %d/%d (%d chars)", i+1, len(chunks), len(currentAnswer))
 	}