PyPI - lollms-client - Versions diffs - 1.6.6__py3-none-any.whl → 1.6.10__py3-none-any.whl - Mend - Supply Chain Defender

lollms-client 1.6.6py3-none-any.whl → 1.6.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lollms-client might be problematic. Click here for more details.

Files changed (16) hide show

lollms_client/lollms_core.py CHANGED Viewed

@@ -91,21 +91,6 @@ class LollmsClient():
             stt_binding_config (Optional[Dict]): Additional config for the STT binding.
             ttv_binding_config (Optional[Dict]): Additional config for the TTV binding.
             ttm_binding_config (Optional[Dict]): Additional config for the TTM binding.
-            service_key (Optional[str]): Shared authentication key or client_id.
-            verify_ssl_certificate (bool): Whether to verify SSL certificates.
-            ctx_size (Optional[int]): Default context size for LLM.
-            n_predict (Optional[int]): Default max tokens for LLM.
-            stream (bool): Default streaming mode for LLM.
-            temperature (float): Default temperature for LLM.
-            top_k (int): Default top_k for LLM.
-            top_p (float): Default top_p for LLM.
-            repeat_penalty (float): Default repeat penalty for LLM.
-            repeat_last_n (int): Default repeat last n for LLM.
-            seed (Optional[int]): Default seed for LLM.
-            n_threads (int): Default threads for LLM.
-            streaming_callback (Optional[Callable]): Default streaming callback for LLM.
-            user_name (str): Default user name for prompts.
-            ai_name (str): Default AI name for prompts.
         Raises:
             ValueError: If the primary LLM binding cannot be created.
@@ -160,93 +145,119 @@ class LollmsClient():
             except Exception as e:
                 trace_exception(e)
                 ASCIIColors.warning(f"Exception occurred while creating TTS binding: {str(e)}")
+                self.tts = None
         if tti_binding_name:
-            if tti_binding_config:
-                self.tti = self.tti_binding_manager.create_binding(
-                    binding_name=tti_binding_name,
-                    **{
-                        k: v
-                        for k, v in (tti_binding_config or {}).items()
-                        if k != "binding_name"
-                    }
-                )
-            else:
-                self.tti = self.tti_binding_manager.create_binding(
-                    binding_name=tti_binding_name
-                )
-            if self.tti is None:
-                ASCIIColors.warning(f"Failed to create TTI binding: {tti_binding_name}. Available: {self.tti_binding_manager.get_available_bindings()}")
+            try:
+                if tti_binding_config:
+                    self.tti = self.tti_binding_manager.create_binding(
+                        binding_name=tti_binding_name,
+                        **{
+                            k: v
+                            for k, v in (tti_binding_config or {}).items()
+                            if k != "binding_name"
+                        }
+                    )
+                else:
+                    self.tti = self.tti_binding_manager.create_binding(
+                        binding_name=tti_binding_name
+                    )
+                if self.tti is None:
+                    ASCIIColors.warning(f"Failed to create TTI binding: {tti_binding_name}. Available: {self.tti_binding_manager.get_available_bindings()}")
+            except Exception as e:
+                trace_exception(e)
+                ASCIIColors.warning(f"Exception occurred while creating TTI binding: {str(e)}")
+                self.tti = None
         if stt_binding_name:
-            if stt_binding_config:
-                self.stt = self.stt_binding_manager.create_binding(
-                    binding_name=stt_binding_name,
-                    **{
-                        k: v
-                        for k, v in (stt_binding_config or {}).items()
-                        if k != "binding_name"
-                    }
-                )
+            try:
+                if stt_binding_config:
+                    self.stt = self.stt_binding_manager.create_binding(
+                        binding_name=stt_binding_name,
+                        **{
+                            k: v
+                            for k, v in (stt_binding_config or {}).items()
+                            if k != "binding_name"
+                        }
+                    )
-            else:
-                self.stt = self.stt_binding_manager.create_binding(
-                    binding_name=stt_binding_name,
-                )
-            if self.stt is None:
-                ASCIIColors.warning(f"Failed to create STT binding: {stt_binding_name}. Available: {self.stt_binding_manager.get_available_bindings()}")
+                else:
+                    self.stt = self.stt_binding_manager.create_binding(
+                        binding_name=stt_binding_name,
+                    )
+                if self.stt is None:
+                    ASCIIColors.warning(f"Failed to create STT binding: {stt_binding_name}. Available: {self.stt_binding_manager.get_available_bindings()}")
+            except Exception as e:
+                trace_exception(e)
+                ASCIIColors.warning(f"Exception occurred while creating STT binding: {str(e)}")
+                self.stt = None
         if ttv_binding_name:
-            if ttv_binding_config:
-                self.ttv = self.ttv_binding_manager.create_binding(
-                    binding_name=ttv_binding_name,
-                    **{
-                        k: v
-                        for k, v in ttv_binding_config.items()
-                        if k != "binding_name"
-                    }
-                )
+            try:
+                if ttv_binding_config:
+                    self.ttv = self.ttv_binding_manager.create_binding(
+                        binding_name=ttv_binding_name,
+                        **{
+                            k: v
+                            for k, v in ttv_binding_config.items()
+                            if k != "binding_name"
+                        }
+                    )
-            else:
-                self.ttv = self.ttv_binding_manager.create_binding(
-                    binding_name=ttv_binding_name
-                )
-            if self.ttv is None:
-                ASCIIColors.warning(f"Failed to create TTV binding: {ttv_binding_name}. Available: {self.ttv_binding_manager.get_available_bindings()}")
+                else:
+                    self.ttv = self.ttv_binding_manager.create_binding(
+                        binding_name=ttv_binding_name
+                    )
+                if self.ttv is None:
+                    ASCIIColors.warning(f"Failed to create TTV binding: {ttv_binding_name}. Available: {self.ttv_binding_manager.get_available_bindings()}")
+            except Exception as e:
+                trace_exception(e)
+                ASCIIColors.warning(f"Exception occurred while creating TTV binding: {str(e)}")
+                self.ttv = None
         if ttm_binding_name:
-            if ttm_binding_config:
-                self.ttm = self.ttm_binding_manager.create_binding(
-                    binding_name=ttm_binding_name,
-                    **{
-                        k: v
-                        for k, v in (ttm_binding_config or {}).items()
-                        if k != "binding_name"
-                    }
-                )
-            else:
-                self.ttm = self.ttm_binding_manager.create_binding(
-                    binding_name=ttm_binding_name
-                )
-            if self.ttm is None:
-                ASCIIColors.warning(f"Failed to create TTM binding: {ttm_binding_name}. Available: {self.ttm_binding_manager.get_available_bindings()}")
+            try:
+                if ttm_binding_config:
+                    self.ttm = self.ttm_binding_manager.create_binding(
+                        binding_name=ttm_binding_name,
+                        **{
+                            k: v
+                            for k, v in (ttm_binding_config or {}).items()
+                            if k != "binding_name"
+                        }
+                    )
+                else:
+                    self.ttm = self.ttm_binding_manager.create_binding(
+                        binding_name=ttm_binding_name
+                    )
+                if self.ttm is None:
+                    ASCIIColors.warning(f"Failed to create TTM binding: {ttm_binding_name}. Available: {self.ttm_binding_manager.get_available_bindings()}")
+            except Exception as e:
+                trace_exception(e)
+                ASCIIColors.warning(f"Exception occurred while creating TTM binding: {str(e)}")
+                self.ttm = None
         if mcp_binding_name:
-            if mcp_binding_config:
-                self.mcp = self.mcp_binding_manager.create_binding(
-                    binding_name=mcp_binding_name,
-                    **{
-                        k: v
-                        for k, v in (mcp_binding_config or {}).items()
-                        if k != "binding_name"
-                    }
-                )
-            else:
-                self.mcp = self.mcp_binding_manager.create_binding(
-                    mcp_binding_name
-                )
-            if self.mcp is None:
-                ASCIIColors.warning(f"Failed to create MCP binding: {mcp_binding_name}. Available: {self.mcp_binding_manager.get_available_bindings()}")
+            try:
+                if mcp_binding_config:
+                    self.mcp = self.mcp_binding_manager.create_binding(
+                        binding_name=mcp_binding_name,
+                        **{
+                            k: v
+                            for k, v in (mcp_binding_config or {}).items()
+                            if k != "binding_name"
+                        }
+                    )
+                else:
+                    self.mcp = self.mcp_binding_manager.create_binding(
+                        mcp_binding_name
+                    )
+                if self.mcp is None:
+                    ASCIIColors.warning(f"Failed to create MCP binding: {mcp_binding_name}. Available: {self.mcp_binding_manager.get_available_bindings()}")
+            except Exception as e:
+                trace_exception(e)
+                ASCIIColors.warning(f"Exception occurred while creating MCP binding: {str(e)}")
+                self.mcp = None
         # --- Store Default Generation Parameters ---
         # --- Prompt Formatting Attributes ---
@@ -1465,7 +1476,7 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
                 return "🎨 Creating an image based on your request"
             # Handle RAG (data store) tools by their pattern
-            elif "research::" in tool_name:
+            elif "rag::" in tool_name:
                 # Extract the friendly name of the data source
                 source_name = tool_name.split("::")[-1].replace("_", " ").title()
                 return f"🔍 Searching {source_name} for relevant information"
@@ -1516,7 +1527,8 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
             final_answer_temperature=0.7
         if rag_top_k is None:
             rag_top_k=5
+        tools_infos = []
         def log_event(desc, event_type=MSG_TYPE.MSG_TYPE_CHUNK, meta=None, event_id=None) -> Optional[str]:
             if not streaming_callback: return None
             is_start = event_type == MSG_TYPE.MSG_TYPE_STEP_START
@@ -1543,38 +1555,44 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
             mcp_tools = self.mcp.discover_tools(force_refresh=True)
             if isinstance(use_mcps, list):
                 filtered_tools = [t for t in mcp_tools if t["name"] in use_mcps]
+                tools_infos+=[f"    🛠️{f['name']}" for f in filtered_tools]
                 all_discovered_tools.extend(filtered_tools)
                 log_event(f"  ✅ Loaded {len(filtered_tools)} specific MCP tools: {', '.join(use_mcps)}", MSG_TYPE.MSG_TYPE_INFO)
             elif use_mcps is True:
+                tools_infos+=[f"    🛠️{f['name']}" for f in mcp_tools]
                 all_discovered_tools.extend(mcp_tools)
                 log_event(f"  ✅ Loaded {len(mcp_tools)} MCP tools", MSG_TYPE.MSG_TYPE_INFO)
         if use_data_store:
             log_event(f"  📚 Setting up {len(use_data_store)} knowledge bases...", MSG_TYPE.MSG_TYPE_INFO)
             for name, info in use_data_store.items():
-                tool_name, description, call_fn = f"research::{name}", f"Queries the '{name}' knowledge base.", None
+                ASCIIColors.info(f"use_data_store item:\n{name}\n{info}")
+                tool_name, description, call_fn = f"rag::{name}", f"Queries the '{name}' knowledge base.", None
                 if callable(info): call_fn = info
                 elif isinstance(info, dict):
                     if "callable" in info and callable(info["callable"]): call_fn = info["callable"]
-                    description = info.get("description", description)
+                    description = info.get("description", "This is a datastore with the following description: \n" + description)
                 if call_fn:
                     visible_tools.append({"name": tool_name, "description": description, "input_schema": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}})
                     rag_registry[tool_name] = call_fn
                     rag_tool_specs[tool_name] = {"default_top_k": rag_top_k, "default_min_sim": rag_min_similarity_percent}
-                    log_event(f"    📖 Ready: {name}", MSG_TYPE.MSG_TYPE_INFO)
+                    tools_infos.append(f"    📖 {name}")
         visible_tools.extend(all_discovered_tools)
         built_in_tools = [
             {"name": "local_tools::final_answer", "description": "Provide the final answer directly to the user.", "input_schema": {}},
             {"name": "local_tools::request_clarification", "description": "Ask the user for more specific information when the request is ambiguous.", "input_schema": {"type": "object", "properties": {"question": {"type": "string"}}, "required": ["question"]}},
             {"name": "local_tools::revise_plan", "description": "Update the execution plan based on new discoveries or changing requirements.", "input_schema": {"type": "object", "properties": {"reason": {"type": "string"}, "new_plan": {"type": "array"}}, "required": ["reason", "new_plan"]}}
         ]
+        tools_infos+=[f"    🔨 final_answer","    🔨 request_clarification","    🔨 revise_plan"]
         if getattr(self, "tti", None):
             built_in_tools.append({"name": "local_tools::generate_image", "description": "Generate an image from a text description.", "input_schema": {"type": "object", "properties": {"prompt": {"type": "string"}}, "required": ["prompt"]}})
         all_visible_tools = visible_tools + built_in_tools
         tool_summary = "\n".join([f"- **{t['name']}**: {t['description']}" for t in all_visible_tools[:20]])
+        log_event("\n".join(tools_infos), MSG_TYPE.MSG_TYPE_INFO)
         log_event(f"✅ Ready with {len(all_visible_tools)} total capabilities", MSG_TYPE.MSG_TYPE_STEP_END, event_id=discovery_step_id, meta={"tool_count": len(all_visible_tools), "mcp_tools": len(all_discovered_tools), "rag_tools": len(rag_registry)})
         # Enhanced triage with better prompting
@@ -1594,7 +1612,7 @@ AVAILABLE CAPABILITIES:
 Based on the request complexity and available tools, choose the optimal strategy:
 1. **DIRECT_ANSWER**: For simple greetings, basic questions, or requests that don't require any tools
-   - Use when: The request can be fully answered with your existing knowledge
+   - Use when: The request can be fully answered with your existing knowledge with confidence, and no tool seems to add any significant value to the answer
    - Example: "Hello", "What is Python?", "Explain quantum physics"
 2. **REQUEST_CLARIFICATION**: When the request is too vague or ambiguous
@@ -1612,16 +1630,14 @@ Based on the request complexity and available tools, choose the optimal strategy
 Provide your analysis in JSON format:
 {{"thought": "Detailed reasoning about the request complexity and requirements", "strategy": "ONE_OF_THE_FOUR_OPTIONS", "confidence": 0.8, "text_output": "Direct answer or clarification question if applicable", "required_tool_name": "specific tool name if SINGLE_TOOL strategy", "estimated_steps": 3}}"""
-            log_prompt("Triage Prompt", triage_prompt)
             triage_schema = {
                 "thought": "string", "strategy": "string", "confidence": "number",
                 "text_output": "string", "required_tool_name": "string", "estimated_steps": "number"
             }
-            strategy_data = self.generate_structured_content(prompt=triage_prompt, schema=triage_schema, temperature=0.1, **llm_generation_kwargs)
+            strategy_data = self.generate_structured_content(prompt=triage_prompt, schema=triage_schema, temperature=0.1, system_prompt=system_prompt, **llm_generation_kwargs)
             strategy = strategy_data.get("strategy") if strategy_data else "COMPLEX_PLAN"
-            log_event(f"Strategy analysis complete", MSG_TYPE.MSG_TYPE_INFO, meta={
+            log_event(f"Strategy analysis complete.\n**confidence**: {strategy_data.get('confidence', 0.5)}\n**reasoning**: {strategy_data.get('thought', 'None')}", MSG_TYPE.MSG_TYPE_INFO, meta={
                 "strategy": strategy,
                 "confidence": strategy_data.get("confidence", 0.5),
                 "estimated_steps": strategy_data.get("estimated_steps", 1),
@@ -1760,7 +1776,7 @@ RESPONSE:"""
             }
             if tool_name in descriptions:
                 return descriptions[tool_name]
-            if "research::" in tool_name:
+            if "rag::" in tool_name:
                 return f"🔍 Searching {tool_name.split('::')[-1]} knowledge base"
             if requires_code:
                 return "💻 Processing code"
@@ -1829,7 +1845,7 @@ RESPONSE:"""
         # Enhanced planning phase
         planning_step_id = log_event_fn("📋 Creating adaptive execution plan...", MSG_TYPE.MSG_TYPE_STEP_START)
-        execution_plan = planner.decompose_task(original_user_prompt, context or "")
+        execution_plan = planner.decompose_task(original_user_prompt, context or "", "\n".join([f"{tool['name']}:{tool['description']}" for tool in all_visible_tools]))
         current_plan_version = 1
         log_event_fn(f"Initial plan created with {len(execution_plan.tasks)} tasks", MSG_TYPE.MSG_TYPE_INFO, meta={
@@ -4307,28 +4323,31 @@ Provide the final aggregated answer in {output_format} format, directly addressi
         contextual_prompt: Optional[str] = None,
         system_prompt: str | None = None,
         context_fill_percentage: float = 0.75,
-        overlap_tokens: int = 150,  # Added a default for better context continuity
+        overlap_tokens: int = 150,
         expected_generation_tokens: int = 1500,
+        max_scratchpad_tokens: int = 4000,
+        scratchpad_compression_threshold: int = 3000,
         streaming_callback: Optional[Callable] = None,
         return_scratchpad_only: bool = False,
         debug: bool = True,
+        ctx_size=None,
         **kwargs
     ) -> str:
         """
-        Processes long text by breaking it down into chunks, analyzing each one incrementally,
-        and synthesizing the results into a comprehensive final response based on a user-defined objective.
+        Processes long text with FIXED chunk sizing and managed scratchpad growth.
+        Now uses dynamic token calculation based on actual model tokenizer.
         """
         if debug:
             print(f"\n🔧 DEBUG: Starting processing with {len(text_to_process):,} characters")
         # Validate context fill percentage
-        if not (0.1 <= context_fill_percentage <= 0.9):
-            raise ValueError(f"context_fill_percentage must be between 0.1 and 0.9, got {context_fill_percentage}")
+        if not (0.1 <= context_fill_percentage <= 1.0):
+            raise ValueError(f"context_fill_percentage must be between 0.1 and 1.0, got {context_fill_percentage}")
         # Get context size
         try:
-            context_size = self.llm.get_context_size() or 8192 # Using a more modern default
+            context_size = ctx_size or self.llm.default_ctx_size or self.llm.get_context_size() or 8192
         except:
             context_size = 8192
@@ -4339,65 +4358,121 @@ Provide the final aggregated answer in {output_format} format, directly addressi
         if not text_to_process:
             return ""
-        # Use a simple word-based split for token estimation
+        # Use word-based split for token estimation
         tokens = text_to_process.split()
         if debug:
             print(f"🔧 DEBUG: Tokenized into {len(tokens):,} word tokens")
-        # Dynamic token budget calculation
-        def calculate_token_budgets(scratchpad_content: str = "", step_num: int = 0) -> dict:
-            # Generic prompt templates are more concise
-            base_system_tokens = 150
-            user_template_tokens = 250
-            scratchpad_tokens = len(scratchpad_content.split()) * 1.3 if scratchpad_content else 0
-            used_tokens = base_system_tokens + user_template_tokens + scratchpad_tokens + expected_generation_tokens
-            total_budget = int(context_size * context_fill_percentage)
-            available_for_chunk = max(500, int(total_budget - used_tokens)) # Ensure a reasonable minimum chunk size
-            budget_info = {
-                "total_budget": total_budget,
-                "chunk_budget": available_for_chunk,
-                "efficiency_ratio": available_for_chunk / total_budget if total_budget > 0 else 0,
-                "scratchpad_tokens": int(scratchpad_tokens),
-                "used_tokens": int(used_tokens)
-            }
+        # ========================================
+        # ENHANCED: Dynamically calculate token sizes using actual tokenizer
+        # ========================================
+        # Create template system prompt to measure its token size
+        template_system_prompt = (
+            f"You are a component in a multi-step text processing pipeline analyzing step 1 of 100.\n\n"
+            f"**Your Task:** Analyze the 'New Text Chunk' and extract key information relevant to the 'Global Objective'. "
+            f"Review the 'Existing Scratchpad' to avoid repetition. Add ONLY new insights.\n\n"
+            f"**CRITICAL:** Do NOT repeat information already in the scratchpad. "
+            f"If no new relevant information exists, respond with '[No new information found in this chunk.]'"
+        )
+        base_system_tokens = len(self.tokenize(template_system_prompt))
+        # Create MINIMAL template user prompt (structure only, without content placeholders)
+        summarization_objective = contextual_prompt or "Create a comprehensive summary by extracting all key facts, concepts, and conclusions."
+        # Measure only the structural overhead (headers, formatting, instructions)
+        template_structure = (
+            f"--- Global Objective ---\n{summarization_objective}\n\n"
+            f"--- Progress ---\nStep 100/100 | 10 sections completed, 4000 tokens\n\n"  # Worst-case progress text
+            f"--- Existing Scratchpad (for context) ---\n"
+            f"--- New Text Chunk ---\n"
+            f"--- Instructions ---\n"
+            f"Extract NEW key information from this chunk that aligns with the objective. "
+            f"Be concise. Avoid repeating scratchpad content."
+        )
+        user_template_overhead = len(self.tokenize(template_structure))
-            if debug:
-                print(f"🔧 DEBUG Step {step_num}: Budget = {available_for_chunk}/{total_budget} tokens, "
-                    f"Scratchpad = {int(scratchpad_tokens)} tokens")
+        if debug:
+            print(f"🔧 DEBUG: Computed system prompt tokens: {base_system_tokens}")
+            print(f"🔧 DEBUG: Computed user template overhead: {user_template_overhead}")
+            print(f"🔧 DEBUG: (Note: Scratchpad and chunk content allocated separately)")
-            return budget_info
+        # Reserve space for maximum expected scratchpad size
+        reserved_scratchpad_tokens = max_scratchpad_tokens
-        # Initial budget calculation
-        initial_budget = calculate_token_budgets()
-        chunk_size_tokens = initial_budget["chunk_budget"]
+        total_budget = int(context_size * context_fill_percentage)
+        # Only count overhead, not the actual chunk/scratchpad content (that's reserved separately)
+        used_tokens = base_system_tokens + user_template_overhead + reserved_scratchpad_tokens + expected_generation_tokens
+        # FIXED chunk size - never changes during processing
+        FIXED_CHUNK_SIZE = max(1024, int(total_budget - used_tokens))
         if debug:
-            print(f"🔧 DEBUG: Initial chunk size: {chunk_size_tokens} word tokens")
+            print(f"\n🔧 DEBUG: Token budget breakdown:")
+            print(f"  - Context size: {context_size} tokens")
+            print(f"  - Fill percentage: {context_fill_percentage} ({int(context_fill_percentage*100)}%)")
+            print(f"  - Total budget: {total_budget} tokens")
+            print(f"  - System prompt: {base_system_tokens} tokens")
+            print(f"  - User template overhead: {user_template_overhead} tokens")
+            print(f"  - Reserved scratchpad: {reserved_scratchpad_tokens} tokens")
+            print(f"  - Expected generation: {expected_generation_tokens} tokens")
+            print(f"  - Total overhead: {used_tokens} tokens")
+            print(f"  - Remaining for chunks: {total_budget - used_tokens} tokens")
+            print(f"🔧 DEBUG: FIXED chunk size: {FIXED_CHUNK_SIZE} tokens")
+            # Safety check
+            if FIXED_CHUNK_SIZE == 1024:
+                print(f"⚠️  WARNING: Chunk size is at minimum (1024)!")
+                print(f"⚠️  Budget exhausted: {used_tokens} used / {total_budget} available")
+                print(f"⚠️  Consider reducing max_scratchpad_tokens or expected_generation_tokens")
         if streaming_callback:
             streaming_callback(
-                f"Context Budget: {initial_budget['chunk_budget']:,}/{initial_budget['total_budget']:,} tokens "
-                f"({initial_budget['efficiency_ratio']:.1%} efficiency)",
+                "\n".join([
+                        f"\n🔧 DEBUG: Token budget breakdown:",
+                        f"  - Context size: {context_size} tokens",
+                        f"  - Fill percentage: {context_fill_percentage} ({int(context_fill_percentage*100)}%)",
+                        f"  - Total budget: {total_budget} tokens",
+                        f"  - System prompt: {base_system_tokens} tokens",
+                        f"  - User template overhead: {user_template_overhead} tokens",
+                        f"  - Reserved scratchpad: {reserved_scratchpad_tokens} tokens",
+                        f"  - Expected generation: {expected_generation_tokens} tokens",
+                        f"  - Total overhead: {used_tokens} tokens",
+                        f"  - Remaining for chunks: {total_budget - used_tokens} tokens",
+                        f"🔧 DEBUG: FIXED chunk size: {FIXED_CHUNK_SIZE} tokens"
+                        ]
+                ),
+                MSG_TYPE.MSG_TYPE_STEP
+            )
+            if FIXED_CHUNK_SIZE == 1024:
+                streaming_callback(
+                    "\n".join([
+                            f"⚠️  WARNING: Chunk size is at minimum (1024)!",
+                            f"⚠️  Budget exhausted: {used_tokens} used / {total_budget} available",
+                            f"⚠️  Consider reducing max_scratchpad_tokens or expected_generation_tokens"
+                            ]
+                    ),
+                    MSG_TYPE.MSG_TYPE_STEP
+                )
+            streaming_callback(
+                f"Context Budget: {FIXED_CHUNK_SIZE:,}/{total_budget:,} tokens per chunk (fixed)",
                 MSG_TYPE.MSG_TYPE_STEP,
-                {"budget_info": initial_budget}
+                {"fixed_chunk_size": FIXED_CHUNK_SIZE, "total_budget": total_budget}
             )
         # Single pass for short content
-        if len(tokens) <= chunk_size_tokens:
+        if len(tokens) <= FIXED_CHUNK_SIZE:
             if debug:
-                print("🔧 DEBUG: Content is short enough for single-pass processing")
+                print("🔧 DEBUG: Content fits in single pass")
             if streaming_callback:
                 streaming_callback("Content fits in a single pass", MSG_TYPE.MSG_TYPE_STEP, {})
-            # Generic single-pass system prompt
             system_prompt = (
                 "You are an expert AI assistant for text analysis and summarization. "
                 "Your task is to carefully analyze the provided text and generate a comprehensive, "
-                "accurate, and well-structured response that directly addresses the user's objective. "
-                "Focus on extracting key information, identifying main themes, and synthesizing the content effectively."
+                "accurate, and well-structured response that directly addresses the user's objective."
             )
             prompt_objective = contextual_prompt or "Provide a comprehensive summary and analysis of the provided text."
@@ -4413,120 +4488,173 @@ Provide the final aggregated answer in {output_format} format, directly addressi
                     print(f"🔧 DEBUG: Single-pass processing failed: {e}")
                 return f"Error in single-pass processing: {e}"
-        # Multi-chunk processing for long content
+        # ========================================
+        # FIXED: Multi-chunk processing with static sizing
+        # ========================================
         if debug:
-            print("🔧 DEBUG: Using multi-chunk processing for long content")
+            print("🔧 DEBUG: Using multi-chunk processing with FIXED chunk size")
         chunk_summaries = []
         current_position = 0
         step_number = 1
+        # Pre-calculate total steps (won't change since chunk size is fixed)
+        total_steps = -(-len(tokens) // (FIXED_CHUNK_SIZE - overlap_tokens))  # Ceiling division
+        if debug:
+            print(f"🔧 DEBUG: Total estimated steps: {total_steps}")
+        # ========================================
+        # NEW: Scratchpad compression helper with dynamic token counting
+        # ========================================
+        def compress_scratchpad(scratchpad_sections: list) -> list:
+            """Compress scratchpad when it gets too large"""
+            if len(scratchpad_sections) <= 2:
+                return scratchpad_sections
+            combined = "\n\n---\n\n".join(scratchpad_sections)
+            # ENHANCED: Use actual tokenizer to count
+            current_size = len(self.tokenize(combined))
+            if current_size <= scratchpad_compression_threshold:
+                return scratchpad_sections
+            if debug:
+                print(f"🔧 DEBUG: Compressing scratchpad from {current_size} tokens")
+            compression_prompt = (
+                f"Consolidate the following analysis sections into a more concise summary. "
+                f"Retain all key facts, data points, and conclusions, but eliminate redundancy:\n\n"
+                f"{combined}"
+            )
+            try:
+                compressed = self.remove_thinking_blocks(
+                    self.llm.generate_text(
+                        compression_prompt,
+                        system_prompt="You are a text consolidation expert. Create concise summaries that preserve all important information.",
+                        **kwargs
+                    )
+                )
+                if debug:
+                    # ENHANCED: Use actual tokenizer
+                    compressed_size = len(self.tokenize(compressed))
+                    print(f"🔧 DEBUG: Compressed to {compressed_size} tokens (reduction: {100*(1-compressed_size/current_size):.1f}%)")
+                return [compressed]
+            except Exception as e:
+                if debug:
+                    print(f"🔧 DEBUG: Compression failed: {e}, keeping last 3 sections")
+                # Fallback: keep only recent sections
+                return scratchpad_sections[-3:]
+        # Main processing loop with FIXED chunk size
         while current_position < len(tokens):
-            # Recalculate budget for each step for dynamic adaptation
-            current_scratchpad = "\n\n---\n\n".join(chunk_summaries)
-            current_budget = calculate_token_budgets(current_scratchpad, step_number)
-            adaptive_chunk_size = max(500, current_budget["chunk_budget"])
-            # Extract the next chunk of text
-            chunk_end = min(current_position + adaptive_chunk_size, len(tokens))
+            # Extract chunk using FIXED size
+            chunk_end = min(current_position + FIXED_CHUNK_SIZE, len(tokens))
             chunk_tokens = tokens[current_position:chunk_end]
             chunk_text = " ".join(chunk_tokens)
             if debug:
-                print(f"\n🔧 DEBUG Step {step_number}: Processing chunk from {current_position} to {chunk_end} "
-                    f"({len(chunk_tokens)} tokens)")
+                print(f"\n🔧 DEBUG Step {step_number}/{total_steps}: Processing chunk from {current_position} to {chunk_end} "
+                      f"({len(chunk_tokens)} tokens)")
-            # Progress calculation
-            remaining_tokens = len(tokens) - current_position
-            estimated_remaining_steps = max(1, -(-remaining_tokens // adaptive_chunk_size)) # Ceiling division
-            total_estimated_steps = step_number + estimated_remaining_steps -1
-            progress = (current_position / len(tokens)) * 90 if len(tokens) > 0 else 0
+            # Progress calculation (based on fixed steps)
+            progress = (step_number / total_steps) * 90
             if streaming_callback:
                 streaming_callback(
-                    f"Processing chunk {step_number}/{total_estimated_steps} - "
-                    f"Budget: {adaptive_chunk_size:,} tokens",
+                    f"Processing chunk {step_number}/{total_steps} - Fixed size: {FIXED_CHUNK_SIZE:,} tokens",
                     MSG_TYPE.MSG_TYPE_STEP_START,
-                    {"step": step_number, "progress": progress}
+                    {"step": step_number, "total_steps": total_steps, "progress": progress}
                 )
+            # ENHANCED: Check and compress scratchpad with actual token counting
+            current_scratchpad = "\n\n---\n\n".join(chunk_summaries)
+            scratchpad_size = len(self.tokenize(current_scratchpad)) if current_scratchpad else 0
+            if scratchpad_size > scratchpad_compression_threshold:
+                if debug:
+                    print(f"🔧 DEBUG: Scratchpad size ({scratchpad_size}) exceeds threshold, compressing...")
+                chunk_summaries = compress_scratchpad(chunk_summaries)
+                current_scratchpad = "\n\n---\n\n".join(chunk_summaries)
+                scratchpad_size = len(self.tokenize(current_scratchpad)) if current_scratchpad else 0
             try:
-                # Generic, state-aware system prompt
                 system_prompt = (
-                    f"You are a component in a multi-step text processing pipeline. Your role is to analyze a chunk of text and extract key information relevant to a global objective.\n\n"
-                    f"**Current Status:** You are on step {step_number} of approximately {total_estimated_steps} steps. Progress is at {progress:.1f}%.\n\n"
-                    f"**Your Task:**\n"
-                    f"Analyze the 'New Text Chunk' provided below. Extract and summarize any information, data points, or key ideas that are relevant to the 'Global Objective'.\n"
-                    f"Review the 'Existing Scratchpad Content' to understand what has already been found. Your goal is to add *new* insights that are not already captured.\n\n"
-                    f"**CRITICAL:** Do NOT repeat information already present in the scratchpad. Focus only on new, relevant details from the current chunk. If the chunk contains no new relevant information, respond with '[No new information found in this chunk.]'."
+                    f"You are a component in a multi-step text processing pipeline analyzing step {step_number} of {total_steps}.\n\n"
+                    f"**Your Task:** Analyze the 'New Text Chunk' and extract key information relevant to the 'Global Objective'. "
+                    f"Review the 'Existing Scratchpad' to avoid repetition. Add ONLY new insights.\n\n"
+                    f"**CRITICAL:** Do NOT repeat information already in the scratchpad. "
+                    f"If no new relevant information exists, respond with '[No new information found in this chunk.]'"
                 )
-                # Generic, context-aware user prompt
-                summarization_objective = contextual_prompt or "Create a comprehensive summary by extracting all key facts, concepts, and conclusions from the text."
-                scratchpad_status = "The analysis is just beginning; this is the first chunk." if not chunk_summaries else f"Building on existing analysis with {len(chunk_summaries)} sections already completed."
+                summarization_objective = contextual_prompt or "Create a comprehensive summary by extracting all key facts, concepts, and conclusions."
+                scratchpad_status = "First chunk analysis" if not chunk_summaries else f"{len(chunk_summaries)} sections completed, {scratchpad_size} tokens"
                 user_prompt = (
                     f"--- Global Objective ---\n{summarization_objective}\n\n"
-                    f"--- Current Progress ---\n"
-                    f"{scratchpad_status} (Step {step_number}/{total_estimated_steps})\n\n"
-                    f"--- Existing Scratchpad Content (for context) ---\n{current_scratchpad}\n\n"
-                    f"--- New Text Chunk to Analyze ---\n{chunk_text}\n\n"
-                    f"--- Your Instructions ---\n"
-                    f"Extract key information from the 'New Text Chunk' that aligns with the 'Global Objective'. "
-                    f"Provide a concise summary of the new findings. Do not repeat what is already in the scratchpad. "
-                    f"If no new relevant information is found, state that clearly."
+                    f"--- Progress ---\nStep {step_number}/{total_steps} | {scratchpad_status}\n\n"
+                    f"--- Existing Scratchpad (for context) ---\n{current_scratchpad}\n\n"
+                    f"--- New Text Chunk ---\n{chunk_text}\n\n"
+                    f"--- Instructions ---\n"
+                    f"Extract NEW key information from this chunk that aligns with the objective. "
+                    f"Be concise. Avoid repeating scratchpad content."
                 )
+                # ENHANCED: Compute actual prompt size
+                actual_prompt_tokens = len(self.tokenize(user_prompt))
+                actual_system_tokens = len(self.tokenize(system_prompt))
                 if debug:
-                    print(f"🔧 DEBUG: Sending {len(user_prompt)} char prompt to LLM")
+                    print(f"🔧 DEBUG: Actual prompt tokens: {actual_prompt_tokens}")
+                    print(f"🔧 DEBUG: Actual system tokens: {actual_system_tokens}")
+                    print(f"🔧 DEBUG: Total input tokens: {actual_prompt_tokens + actual_system_tokens}")
+                    print(f"🔧 DEBUG: Scratchpad: {scratchpad_size} tokens")
                 chunk_summary = self.remove_thinking_blocks(self.llm.generate_text(user_prompt, system_prompt=system_prompt, **kwargs))
                 if debug:
-                    print(f"🔧 DEBUG: Received {len(chunk_summary)} char response preview: {chunk_summary[:200]}...")
+                    print(f"🔧 DEBUG: Received {len(chunk_summary)} char response")
-                # Generic content filtering
+                # Filter logic
                 filter_out = False
                 filter_reason = "content accepted"
-                # Check for explicit rejection signals
                 if (chunk_summary.strip().lower().startswith('[no new') or
                     chunk_summary.strip().lower().startswith('no new information')):
                     filter_out = True
                     filter_reason = "explicit rejection signal"
-                # Check for overly short or generic refusal responses
                 elif len(chunk_summary.strip()) < 25:
                     filter_out = True
-                    filter_reason = "response too short to be useful"
-                # Check for common error phrases
-                elif any(error_phrase in chunk_summary.lower()[:150] for error_phrase in [
-                    'error', 'failed', 'cannot provide', 'unable to analyze', 'not possible', 'insufficient information']):
+                    filter_reason = "response too short"
+                elif any(error in chunk_summary.lower()[:150] for error in [
+                    'error', 'failed', 'cannot provide', 'unable to analyze']):
                     filter_out = True
-                    filter_reason = "error or refusal response detected"
+                    filter_reason = "error response"
                 if not filter_out:
                     chunk_summaries.append(chunk_summary.strip())
                     content_added = True
                     if debug:
-                        print(f"🔧 DEBUG: ✅ Content added to scratchpad (total sections: {len(chunk_summaries)})")
+                        print(f"🔧 DEBUG: ✅ Content added (total sections: {len(chunk_summaries)})")
                 else:
                     content_added = False
                     if debug:
-                        print(f"🔧 DEBUG: ❌ Content filtered out - {filter_reason}: {chunk_summary[:100]}...")
+                        print(f"🔧 DEBUG: ❌ Filtered: {filter_reason}")
-                # Update progress via callback
                 if streaming_callback:
                     updated_scratchpad = "\n\n---\n\n".join(chunk_summaries)
                     streaming_callback(
                         updated_scratchpad,
                         MSG_TYPE.MSG_TYPE_SCRATCHPAD,
-                        {"step": step_number, "sections": len(chunk_summaries), "content_added": content_added, "filter_reason": filter_reason}
+                        {"step": step_number, "sections": len(chunk_summaries), "content_added": content_added}
                     )
-                    progress_after = ((current_position + len(chunk_tokens)) / len(tokens)) * 90 if len(tokens) > 0 else 90
                     streaming_callback(
                         f"Step {step_number} completed - {'Content added' if content_added else f'Filtered: {filter_reason}'}",
                         MSG_TYPE.MSG_TYPE_STEP_END,
-                        {"progress": progress_after}
+                        {"progress": progress}
                     )
             except Exception as e:
@@ -4536,82 +4664,106 @@ Provide the final aggregated answer in {output_format} format, directly addressi
                 self.trace_exception(e)
                 if streaming_callback:
                     streaming_callback(error_msg, MSG_TYPE.MSG_TYPE_EXCEPTION)
-                chunk_summaries.append(f"[Error processing chunk at step {step_number}: {str(e)[:150]}]")
+                chunk_summaries.append(f"[Error at step {step_number}: {str(e)[:150]}]")
-            # Move to the next chunk, allowing for overlap
-            current_position += max(1, adaptive_chunk_size - overlap_tokens)
+            # Move to next chunk with FIXED size
+            current_position += max(1, FIXED_CHUNK_SIZE - overlap_tokens)
             step_number += 1
-            # Safety break for excessively long documents
+            # Safety break
             if step_number > 200:
-                if debug: print(f"🔧 DEBUG: Safety break after {step_number-1} steps.")
-                chunk_summaries.append("[Processing halted due to exceeding maximum step limit.]")
+                if debug:
+                    print(f"🔧 DEBUG: Safety break at step {step_number}")
+                chunk_summaries.append("[Processing halted: exceeded maximum steps]")
                 break
         if debug:
-            print(f"\n🔧 DEBUG: Chunk processing complete. Total sections gathered: {len(chunk_summaries)}")
+            print(f"\n🔧 DEBUG: Processing complete. Sections: {len(chunk_summaries)}")
-        # Return only the scratchpad content if requested
+        # Return scratchpad only if requested
         if return_scratchpad_only:
             final_scratchpad = "\n\n---\n\n".join(chunk_summaries)
             if streaming_callback:
-                streaming_callback("Returning scratchpad content as final output.", MSG_TYPE.MSG_TYPE_STEP, {})
+                streaming_callback("Returning scratchpad content", MSG_TYPE.MSG_TYPE_STEP, {})
             return final_scratchpad.strip()
-        # Final Synthesis Step
+        # Final synthesis with STRONG objective reinforcement
         if streaming_callback:
-            streaming_callback("Synthesizing final comprehensive response...", MSG_TYPE.MSG_TYPE_STEP_START, {"progress": 95})
+            streaming_callback("Synthesizing final response...", MSG_TYPE.MSG_TYPE_STEP_START, {"progress": 95})
         if not chunk_summaries:
-            error_msg = "No content was successfully processed or extracted from the document. The input might be empty or an issue occurred during processing."
+            error_msg = "No content was successfully processed."
             if debug:
                 print(f"🔧 DEBUG: ❌ {error_msg}")
             return error_msg
         combined_scratchpad = "\n\n---\n\n".join(chunk_summaries)
-        synthesis_objective = contextual_prompt or "Provide a comprehensive, well-structured summary and analysis of the provided text."
+        synthesis_objective = contextual_prompt or "Provide a comprehensive, well-structured summary and analysis."
         if debug:
-            print(f"🔧 DEBUG: Synthesizing from {len(combined_scratchpad):,} char scratchpad with {len(chunk_summaries)} sections.")
+            final_scratchpad_tokens = len(self.tokenize(combined_scratchpad))
+            print(f"🔧 DEBUG: Synthesizing from {len(combined_scratchpad):,} chars, {final_scratchpad_tokens} tokens, {len(chunk_summaries)} sections")
-        # Generic synthesis prompts
+        # ENHANCED: Strong objective-focused synthesis
         synthesis_system_prompt = (
-            "You are an expert AI assistant specializing in synthesizing information. "
-            "Your task is to consolidate a series of text analysis sections from a scratchpad into a single, coherent, and well-structured final response. "
-            "Eliminate redundancy, organize the content logically, and ensure the final output directly and comprehensively addresses the user's primary objective. "
-            "Use markdown for clear formatting (e.g., headers, lists, bold text)."
+            f"You are completing a multi-step text processing task. "
+            f"Your role is to take analysis sections and produce the FINAL OUTPUT that directly fulfills the user's original objective.\n\n"
+            f"**CRITICAL:** Your output must DIRECTLY ADDRESS the user's objective, NOT just summarize the sections. "
+            f"The sections are intermediate work - transform them into the final deliverable the user requested."
         )
+        # ENHANCED: Explicit task reinforcement with examples of what NOT to do
+        task_type_hint = ""
+        if contextual_prompt:
+            lower_prompt = contextual_prompt.lower()
+            if any(word in lower_prompt for word in ['extract', 'list', 'identify', 'find']):
+                task_type_hint = "\n**Task Type:** This is an EXTRACTION/IDENTIFICATION task. Provide a structured list or catalog of items found, NOT a narrative summary."
+            elif any(word in lower_prompt for word in ['analyze', 'evaluate', 'assess', 'examine']):
+                task_type_hint = "\n**Task Type:** This is an ANALYSIS task. Provide insights, patterns, and evaluations, NOT just a description of content."
+            elif any(word in lower_prompt for word in ['compare', 'contrast', 'difference']):
+                task_type_hint = "\n**Task Type:** This is a COMPARISON task. Highlight similarities and differences, NOT separate summaries."
+            elif any(word in lower_prompt for word in ['answer', 'question', 'explain why', 'how does']):
+                task_type_hint = "\n**Task Type:** This is a QUESTION-ANSWERING task. Provide a direct answer, NOT a general overview."
         synthesis_user_prompt = (
-            f"--- Final Objective ---\n{synthesis_objective}\n\n"
-            f"--- Collected Analysis Sections (Scratchpad) ---\n{combined_scratchpad}\n\n"
-            f"--- Your Final Task ---\n"
-            f"Synthesize all the information from the 'Collected Analysis Sections' into a single, high-quality, and comprehensive response. "
-            f"Your response must directly address the 'Final Objective'. "
-            f"Organize your answer logically with clear sections using markdown headers. "
-            f"Ensure all key information is included, remove any repetitive statements, and produce a polished, final document."
+            f"=== ORIGINAL USER OBJECTIVE (MOST IMPORTANT) ===\n{synthesis_objective}\n"
+            f"{task_type_hint}\n\n"
+            f"=== ANALYSIS SECTIONS (Raw Working Material) ===\n{combined_scratchpad}\n\n"
+            f"=== YOUR TASK ===\n"
+            f"Transform the analysis sections above into a final output that DIRECTLY FULFILLS the original objective.\n\n"
+            f"**DO:**\n"
+            f"- Focus exclusively on satisfying the user's original objective stated above\n"
+            f"- Organize information in whatever format best serves that objective\n"
+            f"- Remove redundancy and consolidate related points\n"
+            f"- Use markdown formatting for clarity\n\n"
+            f"**DO NOT:**\n"
+            f"- Provide a generic summary of the sections\n"
+            f"- Describe what the sections contain\n"
+            f"- Create an overview of the analysis process\n"
+            f"- Change the task into something different\n\n"
+            f"Remember: The user asked for '{synthesis_objective}' - deliver exactly that."
         )
         try:
             final_answer = self.remove_thinking_blocks(self.llm.generate_text(synthesis_user_prompt, system_prompt=synthesis_system_prompt, **kwargs))
             if debug:
-                print(f"🔧 DEBUG: Final synthesis generated: {len(final_answer):,} characters")
+                print(f"🔧 DEBUG: Final synthesis: {len(final_answer):,} characters")
             if streaming_callback:
-                streaming_callback("Final synthesis complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"progress": 100})
+                streaming_callback("Final synthesis complete", MSG_TYPE.MSG_TYPE_STEP_END, {"progress": 100})
             return final_answer.strip()
         except Exception as e:
-            error_msg = f"The final synthesis step failed: {str(e)}. Returning the organized scratchpad content as a fallback."
-            if debug: print(f"🔧 DEBUG: ❌ {error_msg}")
+            error_msg = f"Synthesis failed: {str(e)}. Returning scratchpad."
+            if debug:
+                print(f"🔧 DEBUG: ❌ {error_msg}")
-            # Fallback to returning the organized scratchpad
             organized_scratchpad = (
                 f"# Analysis Summary\n\n"
-                f"*Note: The final synthesis process encountered an error. The raw, organized analysis sections are provided below.*\n\n"
-                f"## Collected Sections\n\n"
-                f"{combined_scratchpad}"
+                f"*Note: Final synthesis failed. Raw analysis sections below.*\n\n"
+                f"## Collected Sections\n\n{combined_scratchpad}"
             )
             return organized_scratchpad
 def chunk_text(text, tokenizer, detokenizer, chunk_size, overlap, use_separators=True):