npm - recursive-llm-ts - Versions diffs - 4.9.0 → 5.0.1 - Mend

recursive-llm-ts 4.9.0 → 5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/README.md +3 -1
package/bin/rlm-go +0 -0
package/dist/bridge-interface.d.ts +149 -0
package/go/cmd/rlm/main.go +39 -6
package/go/go.mod +13 -3
package/go/go.sum +53 -2
package/go/rlm/compression.go +59 -0
package/go/rlm/context_overflow.go +21 -36
package/go/rlm/context_savings_test.go +387 -0
package/go/rlm/json_extraction.go +140 -0
package/go/rlm/lcm_agentic_map.go +317 -0
package/go/rlm/lcm_context_loop.go +309 -0
package/go/rlm/lcm_delegation.go +257 -0
package/go/rlm/lcm_episodes.go +313 -0
package/go/rlm/lcm_episodes_test.go +384 -0
package/go/rlm/lcm_files.go +424 -0
package/go/rlm/lcm_map.go +348 -0
package/go/rlm/lcm_store.go +615 -0
package/go/rlm/lcm_summarizer.go +239 -0
package/go/rlm/lcm_test.go +1407 -0
package/go/rlm/rlm.go +124 -1
package/go/rlm/store_backend.go +121 -0
package/go/rlm/store_backend_test.go +428 -0
package/go/rlm/store_sqlite.go +575 -0
package/go/rlm/structured.go +6 -83
package/go/rlm/token_tracking_test.go +25 -11
package/go/rlm/tokenizer.go +216 -0
package/go/rlm/tokenizer_test.go +305 -0
package/go/rlm/types.go +23 -1
package/go/rlm.test +0 -0
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -15,6 +15,8 @@ TypeScript/JavaScript package for [Recursive Language Models (RLM)](https://gith
 **Performance & Resilience**
 - **Pure Go Backend** - 50x faster startup, 3x less memory vs Python
 - **Context Overflow Recovery** - Automatic detection and 6 reduction strategies (mapreduce, truncate, chunked, tfidf, textrank, refine)
+- **Lossless Context Management** - Episodic memory, 5-level summarization escalation, SQLite persistence with FTS5 search
+- **BPE Token Counting** - Accurate model-specific tokenization via tiktoken (o200k, cl100k) with cached counting
 - **Caching** - Exact-match caching with in-memory and file-based backends
 - **Retry & Fallback** - Exponential backoff, jitter, and multi-provider fallback chains
 - **AbortController** - Cancel any operation mid-flight
@@ -32,7 +34,7 @@ TypeScript/JavaScript package for [Recursive Language Models (RLM)](https://gith
 - **Meta-Agent Mode** - Automatically optimize queries for better results
 - **Observability** - OpenTelemetry tracing, Langfuse integration, and debug logging
 - **File Storage** - Process local directories or S3/MinIO/LocalStack buckets as LLM context
-- **150+ Tests** - Comprehensive Vitest + Go test suites
+- **200+ Tests** - Comprehensive Vitest + Go test suites
 ## Installation

package/bin/rlm-go CHANGED Viewed

Binary file

package/dist/bridge-interface.d.ts CHANGED Viewed

@@ -51,6 +51,154 @@ export interface ContextOverflowConfig {
     /** Maximum reduction attempts before giving up (default: 3) */
     max_reduction_attempts?: number;
 }
+export interface LCMConfig {
+    /** Enable LCM context management (default: false for backward compat) */
+    enabled?: boolean;
+    /** Soft token threshold — async compaction begins above this (default: 70% of model limit) */
+    soft_threshold?: number;
+    /** Hard token threshold — blocking compaction above this (default: 90% of model limit) */
+    hard_threshold?: number;
+    /** Number of messages to compact at once (default: 10) */
+    compaction_block_size?: number;
+    /** Target tokens per summary node (default: 500) */
+    summary_target_tokens?: number;
+    /** Large file handling configuration */
+    file_handling?: LCMFileConfig;
+    /** Episode-based context grouping configuration */
+    episodes?: EpisodeConfig;
+    /** Persistence backend configuration (default: in-memory) */
+    store_backend?: StoreBackendConfig;
+}
+export interface LCMFileConfig {
+    /** Token count above which files are stored externally with exploration summaries (default: 25000) */
+    token_threshold?: number;
+}
+export interface EpisodeConfig {
+    /** Max tokens before auto-closing an episode (default: 2000) */
+    max_episode_tokens?: number;
+    /** Max messages before auto-closing an episode (default: 20) */
+    max_episode_messages?: number;
+    /** Topic change sensitivity 0-1 (reserved for future semantic detection) */
+    topic_change_threshold?: number;
+    /** Auto-generate summary when episode closes (default: true) */
+    auto_compact_after_close?: boolean;
+}
+export interface Episode {
+    id: string;
+    title: string;
+    message_ids: string[];
+    start_time: string;
+    end_time: string;
+    tokens: number;
+    summary?: string;
+    summary_tokens?: number;
+    status: 'active' | 'compacted' | 'archived';
+    tags?: string[];
+    parent_episode_id?: string;
+}
+export interface StoreBackendConfig {
+    /** Backend type: 'memory' (default) or 'sqlite' */
+    type?: 'memory' | 'sqlite';
+    /** Path for SQLite database file (required when type is 'sqlite', use ':memory:' for in-memory SQLite) */
+    path?: string;
+}
+export interface LLMMapConfig {
+    /** Path to JSONL input file */
+    input_path: string;
+    /** Path to JSONL output file */
+    output_path: string;
+    /** Prompt template — use {{item}} as placeholder for each item */
+    prompt: string;
+    /** JSON Schema for output validation */
+    output_schema?: Record<string, any>;
+    /** Worker pool concurrency (default: 16) */
+    concurrency?: number;
+    /** Per-item retry limit (default: 3) */
+    max_retries?: number;
+    /** Model to use (defaults to engine model) */
+    model?: string;
+}
+export interface LLMMapResult {
+    total_items: number;
+    completed: number;
+    failed: number;
+    output_path: string;
+    duration_ms: number;
+    tokens_used: number;
+}
+export interface AgenticMapConfig {
+    /** Path to JSONL input file */
+    input_path: string;
+    /** Path to JSONL output file */
+    output_path: string;
+    /** Prompt template — use {{item}} as placeholder for each item */
+    prompt: string;
+    /** JSON Schema for output validation */
+    output_schema?: Record<string, any>;
+    /** Worker pool concurrency (default: 8) */
+    concurrency?: number;
+    /** Per-item retry limit (default: 2) */
+    max_retries?: number;
+    /** Model for sub-agents (defaults to engine model) */
+    model?: string;
+    /** If true, sub-agents cannot modify filesystem */
+    read_only?: boolean;
+    /** Max recursion depth for sub-agents (default: 3) */
+    max_depth?: number;
+    /** Max iterations per sub-agent (default: 15) */
+    max_iterations?: number;
+}
+export interface AgenticMapResult {
+    total_items: number;
+    completed: number;
+    failed: number;
+    output_path: string;
+    duration_ms: number;
+    tokens_used: number;
+}
+export interface DelegationRequest {
+    /** Task description for the sub-agent */
+    prompt: string;
+    /** Specific slice of work being handed off (required for non-root) */
+    delegated_scope?: string;
+    /** Work the caller retains (required for non-root) */
+    kept_work?: string;
+    /** Read-only exploration agent (exempt from guard) */
+    read_only?: boolean;
+    /** Parallel decomposition (exempt from guard) */
+    parallel?: boolean;
+}
+export interface LCMStoreStats {
+    total_messages: number;
+    total_summaries: number;
+    active_context_items: number;
+    active_context_tokens: number;
+    immutable_store_tokens: number;
+    compression_ratio: number;
+}
+export interface LCMGrepResult {
+    message_id: string;
+    role: string;
+    content: string;
+    summary_id?: string;
+    match_line: string;
+}
+export interface LCMDescribeResult {
+    type: 'message' | 'summary';
+    id: string;
+    tokens: number;
+    role?: string;
+    kind?: 'leaf' | 'condensed';
+    level?: number;
+    covered_ids?: string[];
+    file_ids?: string[];
+    content?: string;
+}
+export interface EpisodeListResult {
+    episodes: Episode[];
+    active_episode_id?: string;
+    total_episodes: number;
+}
 export interface RLMConfig {
     recursive_model?: string;
     api_base?: string;
@@ -62,6 +210,7 @@ export interface RLMConfig {
     meta_agent?: MetaAgentConfig;
     observability?: ObservabilityConfig;
     context_overflow?: ContextOverflowConfig;
+    lcm?: LCMConfig;
     debug?: boolean;
     api_version?: string;
     timeout?: number;

package/go/cmd/rlm/main.go CHANGED Viewed

@@ -14,7 +14,9 @@ type requestPayload struct {
 	Query      string                 `json:"query"`
 	Context    string                 `json:"context"`
 	Config     map[string]interface{} `json:"config"`
-	Structured *structuredRequest     `json:"structured,omitempty"`
+	Structured  *structuredRequest      `json:"structured,omitempty"`
+	LLMMap      *rlm.LLMMapConfig      `json:"llm_map,omitempty"`      // LCM LLM-Map operation
+	AgenticMap  *rlm.AgenticMapConfig  `json:"agentic_map,omitempty"`  // LCM Agentic-Map operation
 }
 type structuredRequest struct {
@@ -24,10 +26,13 @@ type structuredRequest struct {
 }
 type responsePayload struct {
-	Result           interface{}  `json:"result"`
-	Stats            rlm.RLMStats `json:"stats"`
-	StructuredResult bool         `json:"structured_result,omitempty"`
-	TraceEvents      interface{}  `json:"trace_events,omitempty"`
+	Result           interface{}        `json:"result"`
+	Stats            rlm.RLMStats       `json:"stats"`
+	StructuredResult bool               `json:"structured_result,omitempty"`
+	TraceEvents      interface{}        `json:"trace_events,omitempty"`
+	LCMStats          *rlm.LCMStoreStats    `json:"lcm_stats,omitempty"`
+	LLMMapResult      *rlm.LLMMapResult     `json:"llm_map_result,omitempty"`
+	AgenticMapResult  *rlm.AgenticMapResult  `json:"agentic_map_result,omitempty"`
 }
 func main() {
@@ -54,8 +59,30 @@ func main() {
 	var resp responsePayload
+	// Handle LLM-Map operation if requested
+	if req.LLMMap != nil {
+		mapResult, err := engine.LLMMap(*req.LLMMap)
+		if err != nil {
+			fmt.Fprintln(os.Stderr, err)
+			os.Exit(1)
+		}
+		resp = responsePayload{
+			Result:       "llm_map_complete",
+			LLMMapResult: mapResult,
+		}
+	} else if req.AgenticMap != nil {
+		// Handle Agentic-Map operation
+		agenticResult, err := engine.AgenticMap(*req.AgenticMap)
+		if err != nil {
+			fmt.Fprintln(os.Stderr, err)
+			os.Exit(1)
+		}
+		resp = responsePayload{
+			Result:           "agentic_map_complete",
+			AgenticMapResult: agenticResult,
+		}
+	} else if req.Structured != nil {
 	// Handle structured completion if requested
-	if req.Structured != nil {
 		structuredConfig := &rlm.StructuredConfig{
 			Schema:            req.Structured.Schema,
 			ParallelExecution: req.Structured.ParallelExecution,
@@ -96,6 +123,12 @@ func main() {
 		}
 	}
+	// Include LCM stats if enabled
+	if lcmEngine := engine.GetLCMEngine(); lcmEngine != nil {
+		stats := lcmEngine.GetStore().Stats()
+		resp.LCMStats = &stats
+	}
 	payload, err := json.Marshal(resp)
 	if err != nil {
 		fmt.Fprintln(os.Stderr, "Failed to encode response JSON:", err)

package/go/go.mod CHANGED Viewed

@@ -5,24 +5,34 @@ go 1.25.0
 toolchain go1.25.1
 require (
+	github.com/cespare/xxhash/v2 v2.3.0
 	github.com/dop251/goja v0.0.0-20231027120936-b396bb4c349d
 	github.com/google/jsonschema-go v0.4.2
+	github.com/pkoukk/tiktoken-go v0.1.8
 	go.opentelemetry.io/otel v1.40.0
 	go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.40.0
 	go.opentelemetry.io/otel/sdk v1.40.0
 	go.opentelemetry.io/otel/trace v1.40.0
+	modernc.org/sqlite v1.46.1
 )
 require (
-	github.com/cespare/xxhash/v2 v2.3.0 // indirect
-	github.com/dlclark/regexp2 v1.7.0 // indirect
+	github.com/dlclark/regexp2 v1.10.0 // indirect
+	github.com/dustin/go-humanize v1.0.1 // indirect
 	github.com/go-logr/logr v1.4.3 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
 	github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect
-	github.com/google/pprof v0.0.0-20230207041349-798e818bf904 // indirect
+	github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e // indirect
 	github.com/google/uuid v1.6.0 // indirect
+	github.com/mattn/go-isatty v0.0.20 // indirect
+	github.com/ncruces/go-strftime v1.0.0 // indirect
+	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
 	go.opentelemetry.io/auto/sdk v1.2.1 // indirect
 	go.opentelemetry.io/otel/metric v1.40.0 // indirect
+	golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 // indirect
 	golang.org/x/sys v0.40.0 // indirect
 	golang.org/x/text v0.3.8 // indirect
+	modernc.org/libc v1.67.6 // indirect
+	modernc.org/mathutil v1.7.1 // indirect
+	modernc.org/memory v1.11.0 // indirect
 )

package/go/go.sum CHANGED Viewed

@@ -7,13 +7,16 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/dlclark/regexp2 v1.4.1-0.20201116162257-a2a8dda75c91/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
-github.com/dlclark/regexp2 v1.7.0 h1:7lJfhqlPssTb1WQx4yvTHN0uElPEv52sbaECrAQxjAo=
 github.com/dlclark/regexp2 v1.7.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
+github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
+github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
 github.com/dop251/goja v0.0.0-20211022113120-dc8c55024d06/go.mod h1:R9ET47fwRVRPZnOGvHxxhuZcbrMCuiqOz3Rlrh4KSnk=
 github.com/dop251/goja v0.0.0-20231027120936-b396bb4c349d h1:wi6jN5LVt/ljaBG4ue79Ekzb12QfJ52L9Q98tl8SWhw=
 github.com/dop251/goja v0.0.0-20231027120936-b396bb4c349d/go.mod h1:QMWlm50DNe14hD7t24KEqZuUdC9sOTy8W6XbCU1mlw4=
 github.com/dop251/goja_nodejs v0.0.0-20210225215109-d91c329300e7/go.mod h1:hn7BA7c8pLvoGndExHudxTDKZ84Pyvv+90pbBjbTz0Y=
 github.com/dop251/goja_nodejs v0.0.0-20211022123610-8dd9abb0616d/go.mod h1:DngW8aVqWbuLRMHItjPUyqdj+HWPvnQe8V8y1nDpIbM=
+github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
+github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
 github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
 github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
 github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
@@ -25,10 +28,13 @@ github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
 github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
 github.com/google/jsonschema-go v0.4.2 h1:tmrUohrwoLZZS/P3x7ex0WAVknEkBZM46iALbcqoRA8=
 github.com/google/jsonschema-go v0.4.2/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE=
-github.com/google/pprof v0.0.0-20230207041349-798e818bf904 h1:4/hN5RUoecvl+RmJRE2YxKWtnnQls6rQjjW5oV7qg2U=
 github.com/google/pprof v0.0.0-20230207041349-798e818bf904/go.mod h1:uglQLonpP8qtYCYyzA+8c/9qtqgA3qsXGYqCPKARAFg=
+github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
+github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
+github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
 github.com/ianlancetaylor/demangle v0.0.0-20220319035150-800ac71e25c2/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w=
 github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
 github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
@@ -36,8 +42,16 @@ github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NB
 github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
 github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
+github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
+github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
+github.com/pkoukk/tiktoken-go v0.1.8 h1:85ENo+3FpWgAACBaEUVp+lctuTcYUO7BtmfhlN/QTRo=
+github.com/pkoukk/tiktoken-go v0.1.8/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
+github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
 github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
 github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
 github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
@@ -60,18 +74,25 @@ go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
 go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 h1:mgKeJMpvi0yx/sU5GsxQ7p6s2wtOnGAHZWCHUM4KGzY=
+golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546/go.mod h1:j/pmGrbnkbPtQfxEe5D0VQhZC6qKbfKifgD0oM7sR70=
 golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
+golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
 golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
+golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220310020820-b874c991c1a5/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ=
 golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
@@ -85,6 +106,8 @@ golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
+golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
@@ -94,3 +117,31 @@ gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
 gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+modernc.org/cc/v4 v4.27.1 h1:9W30zRlYrefrDV2JE2O8VDtJ1yPGownxciz5rrbQZis=
+modernc.org/cc/v4 v4.27.1/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0=
+modernc.org/ccgo/v4 v4.30.1 h1:4r4U1J6Fhj98NKfSjnPUN7Ze2c6MnAdL0hWw6+LrJpc=
+modernc.org/ccgo/v4 v4.30.1/go.mod h1:bIOeI1JL54Utlxn+LwrFyjCx2n2RDiYEaJVSrgdrRfM=
+modernc.org/fileutil v1.3.40 h1:ZGMswMNc9JOCrcrakF1HrvmergNLAmxOPjizirpfqBA=
+modernc.org/fileutil v1.3.40/go.mod h1:HxmghZSZVAz/LXcMNwZPA/DRrQZEVP9VX0V4LQGQFOc=
+modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI=
+modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito=
+modernc.org/gc/v3 v3.1.1 h1:k8T3gkXWY9sEiytKhcgyiZ2L0DTyCQ/nvX+LoCljoRE=
+modernc.org/gc/v3 v3.1.1/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
+modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
+modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
+modernc.org/libc v1.67.6 h1:eVOQvpModVLKOdT+LvBPjdQqfrZq+pC39BygcT+E7OI=
+modernc.org/libc v1.67.6/go.mod h1:JAhxUVlolfYDErnwiqaLvUqc8nfb2r6S6slAgZOnaiE=
+modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
+modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
+modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
+modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
+modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8=
+modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
+modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
+modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
+modernc.org/sqlite v1.46.1 h1:eFJ2ShBLIEnUWlLy12raN0Z1plqmFX9Qe3rjQTKt6sU=
+modernc.org/sqlite v1.46.1/go.mod h1:CzbrU2lSB1DKUusvwGz7rqEKIq+NUd8GWuBBZDs9/nA=
+modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
+modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
+modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
+modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=

package/go/rlm/compression.go ADDED Viewed

@@ -0,0 +1,59 @@
+package rlm
+// ─── Shared Text Compression Utilities ──────────────────────────────────────
+// Consolidated from context_overflow.go and lcm_summarizer.go to eliminate
+// duplication of the "keep start + end, truncate middle" strategy.
+// TruncateTextParams configures deterministic text truncation.
+type TruncateTextParams struct {
+	// MaxTokens is the target token count.
+	MaxTokens int
+	// MarkerText is inserted at the truncation point.
+	// Default: "\n[... content truncated ...]\n"
+	MarkerText string
+	// StartFraction is the fraction of budget kept from the start (default: 2/3).
+	StartFraction float64
+}
+// TruncateText performs deterministic truncation using the "keep start + end"
+// strategy, which addresses the "lost in the middle" problem by preserving
+// both the beginning and end of the text.
+//
+// This is used as the guaranteed-convergence fallback in both:
+// - LCM's three-level escalation (Level 3)
+// - Context overflow's truncation strategy
+func TruncateText(text string, params TruncateTextParams) string {
+	// Apply defaults
+	if params.StartFraction <= 0 || params.StartFraction >= 1 {
+		params.StartFraction = 2.0 / 3.0
+	}
+	if params.MarkerText == "" {
+		params.MarkerText = "\n[... content truncated ...]\n"
+	}
+	// Conservative chars-to-tokens ratio: 3 chars per token
+	maxChars := params.MaxTokens * 3
+	if len(text) <= maxChars {
+		return text
+	}
+	startChars := int(float64(maxChars) * params.StartFraction)
+	endChars := maxChars - startChars
+	// Guard against edge cases
+	if startChars > len(text) {
+		startChars = len(text)
+	}
+	if endChars > len(text)-startChars {
+		endChars = len(text) - startChars
+	}
+	if endChars < 0 {
+		endChars = 0
+	}
+	if endChars == 0 {
+		return text[:startChars] + params.MarkerText
+	}
+	return text[:startChars] + params.MarkerText + text[len(text)-endChars:]
+}

package/go/rlm/context_overflow.go CHANGED Viewed

@@ -48,18 +48,18 @@ var modelTokenLimits = map[string]int{
 	"o1-preview":        128000,
 	"o3-mini":           200000,
 	// Anthropic (via LiteLLM/proxy)
-	"claude-3-opus":       200000,
-	"claude-3-sonnet":     200000,
-	"claude-3-haiku":      200000,
-	"claude-3.5-sonnet":   200000,
-	"claude-3.5-haiku":    200000,
-	"claude-sonnet-4":     200000,
-	"claude-opus-4":       200000,
+	"claude-3-opus":     200000,
+	"claude-3-sonnet":   200000,
+	"claude-3-haiku":    200000,
+	"claude-3.5-sonnet": 200000,
+	"claude-3.5-haiku":  200000,
+	"claude-sonnet-4":   200000,
+	"claude-opus-4":     200000,
 	// Llama (common vLLM deployments)
-	"llama-3":     8192,
-	"llama-3.1":   128000,
-	"llama-3.2":   128000,
-	"llama-3.3":   128000,
+	"llama-3":   8192,
+	"llama-3.1": 128000,
+	"llama-3.2": 128000,
+	"llama-3.3": 128000,
 	// Mistral
 	"mistral-7b":    32768,
 	"mixtral-8x7b":  32768,
@@ -181,21 +181,11 @@ func (r *RLM) getResponseTokenBudget() int {
 // ─── Token Estimation ────────────────────────────────────────────────────────
-// EstimateTokens provides a fast approximation of token count for a string.
-// Uses a character-to-token ratio heuristic. This is intentionally conservative
-// (over-estimates slightly) to avoid overflow.
-//
-// Approximate ratios for common encodings:
-//   - English text: ~4 chars/token (cl100k_base)
-//   - JSON/code:    ~3.5 chars/token
-//   - CJK text:     ~1.5 chars/token
-//   - Mixed:        ~3.5 chars/token (safe default)
+// EstimateTokens returns the token count for a string using the global tokenizer.
+// When SetDefaultTokenizer has been called with a model name, this uses accurate
+// BPE tokenization via tiktoken. Otherwise falls back to a ~3.5 chars/token heuristic.
 func EstimateTokens(text string) int {
-	if len(text) == 0 {
-		return 0
-	}
-	// Use 3.5 chars/token as conservative estimate
-	return (len(text)*10 + 34) / 35 // equivalent to ceil(len/3.5)
+	return GetTokenizer().CountTokens(text)
 }
 // EstimateMessagesTokens estimates the total tokens for a set of chat messages.
@@ -477,24 +467,20 @@ func (cr *contextReducer) reduceByMapReduce(query string, chunks []string, model
 }
 // reduceByTruncation simply truncates context to fit within the limit.
+// Uses the shared TruncateText utility (compression.go).
 func (cr *contextReducer) reduceByTruncation(context string, modelLimit int, overhead int) (string, error) {
 	cr.obs.Debug("overflow", "Using truncation strategy")
 	availableTokens := modelLimit - overhead
-	maxChars := availableTokens * 3 // Conservative chars-to-tokens
+	truncated := TruncateText(context, TruncateTextParams{
+		MaxTokens:  availableTokens,
+		MarkerText: "\n\n[... context truncated due to token limit ...]\n\n",
+	})
-	if maxChars >= len(context) {
+	if truncated == context {
 		return context, nil
 	}
-	// Keep beginning and end, truncate middle (addresses "lost in the middle" problem)
-	keepFromStart := maxChars * 2 / 3
-	keepFromEnd := maxChars / 3
-	truncated := context[:keepFromStart] +
-		"\n\n[... context truncated due to token limit ...]\n\n" +
-		context[len(context)-keepFromEnd:]
 	cr.obs.Debug("overflow", "Truncated context: %d -> %d chars", len(context), len(truncated))
 	return truncated, nil
 }
@@ -721,4 +707,3 @@ func (cr *contextReducer) reduceByTextRank(context string, modelLimit int, overh
 		len(context), len(result), EstimateTokens(context), EstimateTokens(result))
 	return result, nil
 }