npm - recursive-llm-ts - Versions diffs - 4.5.0 → 4.7.0 - Mend

recursive-llm-ts 4.5.0 → 4.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/README.md +74 -4
package/bin/rlm-go +0 -0
package/dist/bridge-interface.d.ts +14 -0
package/dist/errors.d.ts +10 -0
package/dist/errors.js +25 -1
package/dist/index.d.ts +2 -2
package/dist/index.js +2 -1
package/dist/rlm.d.ts +3 -1
package/dist/rlm.js +5 -0
package/go/README.md +9 -1
package/go/rlm/context_overflow.go +572 -0
package/go/rlm/context_overflow_test.go +901 -0
package/go/rlm/errors.go +185 -1
package/go/rlm/rlm.go +10 -0
package/go/rlm/structured.go +60 -7
package/go/rlm/textrank.go +273 -0
package/go/rlm/textrank_test.go +335 -0
package/go/rlm/tfidf.go +225 -0
package/go/rlm/tfidf_test.go +272 -0
package/go/rlm/types.go +25 -2
package/package.json +1 -1

package/go/rlm/errors.go CHANGED Viewed

@@ -1,6 +1,10 @@
 package rlm
-import "fmt"
+import (
+	"errors"
+	"fmt"
+	"strings"
+)
 // RLMError is the base error type for all RLM errors
 type RLMError struct {
@@ -81,3 +85,183 @@ func NewAPIError(statusCode int, response string) *APIError {
 		},
 	}
 }
+// ContextOverflowError is returned when the request exceeds the model's context window
+type ContextOverflowError struct {
+	ModelLimit    int // Maximum tokens the model supports
+	RequestTokens int // Number of tokens in the request
+	*APIError
+}
+// NewContextOverflowError creates a ContextOverflowError from parsed API response details
+func NewContextOverflowError(statusCode int, response string, modelLimit, requestTokens int) *ContextOverflowError {
+	return &ContextOverflowError{
+		ModelLimit:    modelLimit,
+		RequestTokens: requestTokens,
+		APIError: &APIError{
+			StatusCode: statusCode,
+			Response:   response,
+			RLMError: &RLMError{
+				Message: fmt.Sprintf("context overflow: model limit is %d tokens but request has %d tokens (overflow by %d)",
+					modelLimit, requestTokens, requestTokens-modelLimit),
+			},
+		},
+	}
+}
+// Unwrap returns the embedded APIError so errors.As can find it in the chain.
+func (e *ContextOverflowError) Unwrap() error {
+	return e.APIError
+}
+// OverflowRatio returns how much the request exceeds the limit (e.g., 1.23 means 23% over)
+func (e *ContextOverflowError) OverflowRatio() float64 {
+	if e.ModelLimit == 0 {
+		return 0
+	}
+	return float64(e.RequestTokens) / float64(e.ModelLimit)
+}
+// IsContextOverflow checks if an error is a context overflow error.
+// It detects both explicit ContextOverflowError types and parses API error messages.
+func IsContextOverflow(err error) (*ContextOverflowError, bool) {
+	// Direct type check
+	var coe *ContextOverflowError
+	if errors.As(err, &coe) {
+		return coe, true
+	}
+	// Parse from APIError message
+	var apiErr *APIError
+	if errors.As(err, &apiErr) {
+		if limit, request, ok := parseContextOverflowMessage(apiErr.Response); ok {
+			return NewContextOverflowError(apiErr.StatusCode, apiErr.Response, limit, request), true
+		}
+		// Also check the error message itself
+		if limit, request, ok := parseContextOverflowMessage(apiErr.Error()); ok {
+			return NewContextOverflowError(apiErr.StatusCode, apiErr.Response, limit, request), true
+		}
+	}
+	// Parse from generic error message
+	if limit, request, ok := parseContextOverflowMessage(err.Error()); ok {
+		return NewContextOverflowError(0, err.Error(), limit, request), true
+	}
+	return nil, false
+}
+// parseContextOverflowMessage extracts token limits from common API error message patterns.
+// Supports OpenAI, Azure, vLLM, and other OpenAI-compatible API error formats.
+func parseContextOverflowMessage(msg string) (modelLimit int, requestTokens int, ok bool) {
+	// Common patterns:
+	// OpenAI: "This model's maximum context length is 32768 tokens. However, your request has 40354 input tokens."
+	// Azure: "This model's maximum context length is 32768 tokens, however you requested 40354 tokens"
+	// vLLM: "This model's maximum context length is 32768 tokens. However, your request has 40354 input tokens."
+	// Anthropic: "max_tokens: ... exceeds the maximum"
+	lowerMsg := strings.ToLower(msg)
+	// Pattern 1: "maximum context length is X tokens"
+	if strings.Contains(lowerMsg, "maximum context length") {
+		limit := extractNumber(msg, "maximum context length is ", " tokens")
+		if limit > 0 {
+			// Try various patterns for the request size
+			request := extractNumber(msg, "your request has ", " input tokens")
+			if request == 0 {
+				request = extractNumber(msg, "your request has ", " tokens")
+			}
+			if request == 0 {
+				request = extractNumber(msg, "you requested ", " tokens")
+			}
+			if request == 0 {
+				request = extractNumber(msg, "requested ", " tokens")
+			}
+			if request > 0 && request > limit {
+				return limit, request, true
+			}
+		}
+	}
+	// Pattern 2: "context_length_exceeded" error code
+	if strings.Contains(lowerMsg, "context_length_exceeded") {
+		limit := extractNumber(msg, "maximum context length is ", " tokens")
+		request := extractNumber(msg, "resulted in ", " tokens")
+		if limit > 0 && request > 0 {
+			return limit, request, true
+		}
+	}
+	// Pattern 3: "max_tokens is too large" - response budget exceeds remaining capacity
+	// vLLM/OpenAI: "max_tokens' or 'max_completion_tokens' is too large: 10000.
+	//   This model's maximum context length is 32768 tokens and your request has 30168 input tokens"
+	// In this case, input tokens < model limit, but input + max_tokens > model limit.
+	// We report the effective total (input + max_tokens) as requestTokens.
+	if strings.Contains(lowerMsg, "max_tokens") && strings.Contains(lowerMsg, "too large") {
+		limit := extractNumber(msg, "maximum context length is ", " tokens")
+		inputTokens := extractNumber(msg, "your request has ", " input tokens")
+		if inputTokens == 0 {
+			inputTokens = extractNumber(msg, "your request has ", " tokens")
+		}
+		maxTokens := extractNumber(msg, "too large: ", ".")
+		if maxTokens == 0 {
+			maxTokens = extractNumber(msg, "too large: ", " ")
+		}
+		if limit > 0 && inputTokens > 0 && maxTokens > 0 {
+			return limit, inputTokens + maxTokens, true
+		}
+		// Fallback: if we got limit and input tokens, treat input as the overflow
+		if limit > 0 && inputTokens > 0 {
+			return limit, inputTokens, true
+		}
+	}
+	// Pattern 4: "input too long" / "too many tokens" generic patterns
+	if strings.Contains(lowerMsg, "input too long") || strings.Contains(lowerMsg, "too many tokens") || strings.Contains(lowerMsg, "too many input tokens") {
+		limit := extractNumber(msg, "limit is ", " tokens")
+		if limit == 0 {
+			limit = extractNumber(msg, "maximum of ", " tokens")
+		}
+		request := extractNumber(msg, "has ", " tokens")
+		if request == 0 {
+			request = extractNumber(msg, "requested ", " tokens")
+		}
+		if limit > 0 && request > 0 {
+			return limit, request, true
+		}
+	}
+	return 0, 0, false
+}
+// extractNumber finds a number between a prefix and suffix in a string
+func extractNumber(s string, prefix string, suffix string) int {
+	lowerS := strings.ToLower(s)
+	lowerPrefix := strings.ToLower(prefix)
+	idx := strings.Index(lowerS, lowerPrefix)
+	if idx < 0 {
+		return 0
+	}
+	start := idx + len(lowerPrefix)
+	remaining := s[start:]
+	// Find the suffix
+	lowerSuffix := strings.ToLower(suffix)
+	endIdx := strings.Index(strings.ToLower(remaining), lowerSuffix)
+	if endIdx < 0 {
+		return 0
+	}
+	numStr := strings.TrimSpace(remaining[:endIdx])
+	// Remove commas from numbers like "32,768"
+	numStr = strings.ReplaceAll(numStr, ",", "")
+	var n int
+	_, err := fmt.Sscanf(numStr, "%d", &n)
+	if err != nil {
+		return 0
+	}
+	return n
+}

package/go/rlm/rlm.go CHANGED Viewed

@@ -20,6 +20,7 @@ type RLM struct {
 	stats            RLMStats
 	observer         *Observer
 	metaAgent        *MetaAgent
+	contextOverflow  *ContextOverflowConfig
 }
 func New(model string, config Config) *RLM {
@@ -57,6 +58,15 @@ func New(model string, config Config) *RLM {
 		r.metaAgent = NewMetaAgent(r, *config.MetaAgent, obs)
 	}
+	// Setup context overflow handling
+	if config.ContextOverflow != nil {
+		r.contextOverflow = config.ContextOverflow
+	} else {
+		// Enable by default with sensible defaults
+		defaultConfig := DefaultContextOverflowConfig()
+		r.contextOverflow = &defaultConfig
+	}
 	return r
 }

package/go/rlm/structured.go CHANGED Viewed

@@ -102,12 +102,65 @@ func (r *RLM) structuredCompletionDirect(query string, context string, config *S
 		{Role: "user", Content: prompt},
 	}
+	// Track whether we've already reduced context for overflow recovery
+	contextReduced := false
 	for attempt := 0; attempt < config.MaxRetries; attempt++ {
 		result, err := r.callLLM(messages)
 		stats.LlmCalls++
 		stats.Iterations++
 		if err != nil {
+			// Check for context overflow and attempt automatic recovery
+			if coe, isOverflow := IsContextOverflow(err); isOverflow && !contextReduced && r.contextOverflow != nil && r.contextOverflow.Enabled {
+				r.observer.Debug("structured", "Context overflow detected on attempt %d: model limit %d, request %d tokens",
+					attempt+1, coe.ModelLimit, coe.RequestTokens)
+				modelLimit := coe.ModelLimit
+				if r.contextOverflow.MaxModelTokens > 0 {
+					modelLimit = r.contextOverflow.MaxModelTokens
+				}
+				reducer := newContextReducer(r, *r.contextOverflow, r.observer)
+				reducedContext, reduceErr := reducer.ReduceForCompletion(query, context, modelLimit)
+				if reduceErr != nil {
+					r.observer.Error("structured", "Context reduction failed: %v", reduceErr)
+					lastErr = err
+					continue
+				}
+				r.observer.Debug("structured", "Context reduced: %d -> %d chars, rebuilding prompt", len(context), len(reducedContext))
+				context = reducedContext
+				contextReduced = true
+				// Rebuild the prompt with reduced context
+				prompt = fmt.Sprintf(
+					"You are a data extraction assistant. Extract information from the context and return it as JSON.\n\n"+
+						"Context:\n%s\n\n"+
+						"Task: %s\n\n"+
+						"Required JSON Schema:\n%s%s\n\n"+
+						"%s"+
+						"CRITICAL INSTRUCTIONS:\n"+
+						"1. Return ONLY valid JSON - no explanations, no markdown, no code blocks\n"+
+						"2. The JSON must match the schema EXACTLY\n"+
+						"3. Include ALL required fields (see list above)\n"+
+						"4. Use correct data types (strings in quotes, numbers without quotes, arrays in [], objects in {})\n"+
+						"5. For arrays, return actual JSON arrays [] not objects\n"+
+						"6. For enum fields, use ONLY the EXACT values listed - do not paraphrase or substitute\n"+
+						"7. For nested objects, ensure ALL required fields within those objects are included\n"+
+						"8. Start your response directly with { or [ depending on the schema\n\n"+
+						"JSON Response:",
+					reducedContext, query, string(schemaJSON), requiredFieldsHint, constraints,
+				)
+				messages = []Message{
+					{Role: "system", Content: "You are a data extraction assistant. Respond only with valid JSON objects."},
+					{Role: "user", Content: prompt},
+				}
+				// Don't count this as a "used" attempt since it was an overflow, not a validation failure
+				attempt--
+				continue
+			}
 			lastErr = err
 			continue
 		}
@@ -790,7 +843,7 @@ func buildValidationFeedback(validationErr error, schema *JSONSchema, previousRe
 	var feedback strings.Builder
 	feedback.WriteString("VALIDATION ERROR - Your previous response was invalid.\n\n")
-	feedback.WriteString(fmt.Sprintf("ERROR: %s\n\n", errMsg))
+	fmt.Fprintf(&feedback, "ERROR: %s\n\n", errMsg)
 	// Extract what field caused the issue
 	if strings.Contains(errMsg, "missing required field:") {
@@ -799,17 +852,17 @@ func buildValidationFeedback(validationErr error, schema *JSONSchema, previousRe
 		fieldName = strings.TrimSpace(fieldName)
 		feedback.WriteString("SPECIFIC ISSUE:\n")
-		feedback.WriteString(fmt.Sprintf("The field '%s' is REQUIRED but was not provided.\n\n", fieldName))
+		fmt.Fprintf(&feedback, "The field '%s' is REQUIRED but was not provided.\n\n", fieldName)
 		// Find the schema for this field and provide details
 		if schema.Type == "object" && schema.Properties != nil {
 			if fieldSchema, exists := schema.Properties[fieldName]; exists {
 				feedback.WriteString("FIELD REQUIREMENTS:\n")
-				feedback.WriteString(fmt.Sprintf("- Field name: '%s'\n", fieldName))
-				feedback.WriteString(fmt.Sprintf("- Type: %s\n", fieldSchema.Type))
+				fmt.Fprintf(&feedback, "- Field name: '%s'\n", fieldName)
+				fmt.Fprintf(&feedback, "- Type: %s\n", fieldSchema.Type)
 				if fieldSchema.Type == "object" && len(fieldSchema.Required) > 0 {
-					feedback.WriteString(fmt.Sprintf("- This is an object with required fields: %s\n", strings.Join(fieldSchema.Required, ", ")))
+					fmt.Fprintf(&feedback, "- This is an object with required fields: %s\n", strings.Join(fieldSchema.Required, ", "))
 					if fieldSchema.Properties != nil {
 						feedback.WriteString("\nNESTED FIELD DETAILS:\n")
@@ -819,13 +872,13 @@ func buildValidationFeedback(validationErr error, schema *JSONSchema, previousRe
 							if isRequired {
 								requiredMark = " [REQUIRED]"
 							}
-							feedback.WriteString(fmt.Sprintf("  - %s: %s%s\n", nestedField, nestedSchema.Type, requiredMark))
+							fmt.Fprintf(&feedback, "  - %s: %s%s\n", nestedField, nestedSchema.Type, requiredMark)
 						}
 					}
 				}
 				if fieldSchema.Type == "array" && fieldSchema.Items != nil {
-					feedback.WriteString(fmt.Sprintf("- This is an array of: %s\n", fieldSchema.Items.Type))
+					fmt.Fprintf(&feedback, "- This is an array of: %s\n", fieldSchema.Items.Type)
 				}
 			}
 		}

package/go/rlm/textrank.go ADDED Viewed

@@ -0,0 +1,273 @@
+package rlm
+import (
+	"math"
+	"sort"
+	"strings"
+)
+// ─── TextRank Graph-Based Sentence Ranking ──────────────────────────────────
+//
+// Pure Go, zero external dependencies, zero API calls.
+// Implements the TextRank algorithm (Mihalcea & Tarau, 2004):
+// 1. Build TF-IDF vectors for each sentence
+// 2. Compute cosine similarity between all sentence pairs
+// 3. Run PageRank iteration on the similarity graph
+// 4. Select top-ranked sentences that fit within token budget
+// 5. Preserve original document order
+// TextRankConfig controls the TextRank algorithm parameters.
+type TextRankConfig struct {
+	// DampingFactor is the PageRank damping factor (default: 0.85)
+	DampingFactor float64
+	// MaxIterations for PageRank convergence (default: 100)
+	MaxIterations int
+	// ConvergenceThreshold for PageRank (default: 0.0001)
+	ConvergenceThreshold float64
+	// MinSimilarity threshold to create an edge (default: 0.1)
+	MinSimilarity float64
+}
+// DefaultTextRankConfig returns sensible defaults for TextRank.
+func DefaultTextRankConfig() TextRankConfig {
+	return TextRankConfig{
+		DampingFactor:        0.85,
+		MaxIterations:        100,
+		ConvergenceThreshold: 0.0001,
+		MinSimilarity:        0.1,
+	}
+}
+// tfidfVector represents a sparse TF-IDF vector for a sentence.
+type tfidfVector struct {
+	terms map[string]float64
+	norm  float64 // precomputed L2 norm
+}
+// buildTFIDFVectors computes TF-IDF vectors for each sentence.
+func buildTFIDFVectors(sentences []string) []tfidfVector {
+	n := len(sentences)
+	if n == 0 {
+		return nil
+	}
+	// Tokenize and filter
+	docWords := make([][]string, n)
+	for i, s := range sentences {
+		docWords[i] = FilterStopWords(TokenizeWords(s))
+	}
+	// Compute document frequency
+	df := make(map[string]int)
+	for _, words := range docWords {
+		seen := make(map[string]bool)
+		for _, w := range words {
+			if !seen[w] {
+				df[w]++
+				seen[w] = true
+			}
+		}
+	}
+	nf := float64(n)
+	// Build vectors
+	vectors := make([]tfidfVector, n)
+	for i, words := range docWords {
+		tf := make(map[string]int)
+		for _, w := range words {
+			tf[w]++
+		}
+		terms := make(map[string]float64)
+		normSq := 0.0
+		for word, freq := range tf {
+			idf := math.Log(nf / float64(df[word]))
+			val := float64(freq) * idf
+			terms[word] = val
+			normSq += val * val
+		}
+		vectors[i] = tfidfVector{
+			terms: terms,
+			norm:  math.Sqrt(normSq),
+		}
+	}
+	return vectors
+}
+// cosineSimilarity computes the cosine similarity between two TF-IDF vectors.
+func cosineSimilarity(a, b tfidfVector) float64 {
+	if a.norm == 0 || b.norm == 0 {
+		return 0
+	}
+	// Compute dot product using the smaller vector for efficiency
+	dot := 0.0
+	small, large := a.terms, b.terms
+	if len(a.terms) > len(b.terms) {
+		small, large = b.terms, a.terms
+	}
+	for term, val := range small {
+		if otherVal, ok := large[term]; ok {
+			dot += val * otherVal
+		}
+	}
+	return dot / (a.norm * b.norm)
+}
+// BuildSimilarityGraph creates a weighted adjacency matrix of sentence similarities.
+// Only edges above the MinSimilarity threshold are kept.
+func BuildSimilarityGraph(sentences []string, config TextRankConfig) [][]float64 {
+	n := len(sentences)
+	vectors := buildTFIDFVectors(sentences)
+	graph := make([][]float64, n)
+	for i := range graph {
+		graph[i] = make([]float64, n)
+	}
+	for i := 0; i < n; i++ {
+		for j := i + 1; j < n; j++ {
+			sim := cosineSimilarity(vectors[i], vectors[j])
+			if sim >= config.MinSimilarity {
+				graph[i][j] = sim
+				graph[j][i] = sim
+			}
+		}
+	}
+	return graph
+}
+// PageRank runs the PageRank algorithm on a weighted graph.
+// Returns a score for each node (sentence).
+func PageRank(graph [][]float64, config TextRankConfig) []float64 {
+	n := len(graph)
+	if n == 0 {
+		return nil
+	}
+	d := config.DampingFactor
+	scores := make([]float64, n)
+	newScores := make([]float64, n)
+	// Initialize with uniform scores
+	initial := 1.0 / float64(n)
+	for i := range scores {
+		scores[i] = initial
+	}
+	// Precompute outgoing weight sums for each node
+	outWeights := make([]float64, n)
+	for i := 0; i < n; i++ {
+		for j := 0; j < n; j++ {
+			outWeights[i] += graph[i][j]
+		}
+	}
+	// Iterate until convergence
+	for iter := 0; iter < config.MaxIterations; iter++ {
+		maxDelta := 0.0
+		for i := 0; i < n; i++ {
+			sum := 0.0
+			for j := 0; j < n; j++ {
+				if graph[j][i] > 0 && outWeights[j] > 0 {
+					sum += graph[j][i] / outWeights[j] * scores[j]
+				}
+			}
+			newScores[i] = (1-d)/float64(n) + d*sum
+			delta := math.Abs(newScores[i] - scores[i])
+			if delta > maxDelta {
+				maxDelta = delta
+			}
+		}
+		// Swap slices
+		scores, newScores = newScores, scores
+		// Check convergence
+		if maxDelta < config.ConvergenceThreshold {
+			break
+		}
+	}
+	return scores
+}
+// CompressContextTextRank reduces context to fit within a token budget using
+// TextRank graph-based sentence ranking.
+// Preserves original sentence order in the output.
+func CompressContextTextRank(text string, targetTokens int) string {
+	return CompressContextTextRankWithConfig(text, targetTokens, DefaultTextRankConfig())
+}
+// CompressContextTextRankWithConfig is like CompressContextTextRank but with custom TextRank parameters.
+func CompressContextTextRankWithConfig(text string, targetTokens int, config TextRankConfig) string {
+	if EstimateTokens(text) <= targetTokens {
+		return text
+	}
+	sentences := SplitSentences(text)
+	if len(sentences) == 0 {
+		return text
+	}
+	// Build similarity graph and run PageRank
+	graph := BuildSimilarityGraph(sentences, config)
+	scores := PageRank(graph, config)
+	// Create scored sentences with PageRank scores
+	ranked := make([]ScoredSentence, len(sentences))
+	for i, s := range sentences {
+		ranked[i] = ScoredSentence{
+			Text:  s,
+			Score: scores[i],
+			Index: i,
+		}
+	}
+	// Sort by score descending
+	sort.Slice(ranked, func(i, j int) bool {
+		return ranked[i].Score > ranked[j].Score
+	})
+	// Greedily select top sentences until budget is reached
+	var selected []ScoredSentence
+	currentTokens := 0
+	for _, s := range ranked {
+		sentTokens := EstimateTokens(s.Text)
+		if currentTokens+sentTokens > targetTokens {
+			continue
+		}
+		selected = append(selected, s)
+		currentTokens += sentTokens
+	}
+	if len(selected) == 0 {
+		// Budget too small - truncate the top sentence
+		if len(ranked) > 0 {
+			maxChars := targetTokens * 3
+			if maxChars > len(ranked[0].Text) {
+				maxChars = len(ranked[0].Text)
+			}
+			return ranked[0].Text[:maxChars]
+		}
+		return text
+	}
+	// Re-sort by original index to preserve document order
+	sort.Slice(selected, func(i, j int) bool {
+		return selected[i].Index < selected[j].Index
+	})
+	parts := make([]string, len(selected))
+	for i, s := range selected {
+		parts[i] = s.Text
+	}
+	return strings.Join(parts, " ")
+}