lollms-client 1.6.0__py3-none-any.whl → 1.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

lollms_client/__init__.py CHANGED
@@ -8,7 +8,7 @@ from lollms_client.lollms_utilities import PromptReshaper # Keep general utiliti
8
8
  from lollms_client.lollms_mcp_binding import LollmsMCPBinding, LollmsMCPBindingManager
9
9
  from lollms_client.lollms_llm_binding import LollmsLLMBindingManager
10
10
 
11
- __version__ = "1.6.0" # Updated version
11
+ __version__ = "1.6.2" # Updated version
12
12
 
13
13
  # Optionally, you could define __all__ if you want to be explicit about exports
14
14
  __all__ = [
@@ -143,16 +143,21 @@ class LollmsClient():
143
143
  ASCIIColors.warning(f"Failed to create LLM binding: {llm_binding_name}. Available: {available}")
144
144
 
145
145
  if tts_binding_name:
146
- self.tts = self.tts_binding_manager.create_binding(
147
- binding_name=tts_binding_name,
148
- **{
149
- k: v
150
- for k, v in (tts_binding_config or {}).items()
151
- if k != "binding_name"
152
- }
153
- )
154
- if self.tts is None:
155
- ASCIIColors.warning(f"Failed to create TTS binding: {tts_binding_name}. Available: {self.tts_binding_manager.get_available_bindings()}")
146
+ try:
147
+ params = {
148
+ k: v
149
+ for k, v in (tts_binding_config or {}).items()
150
+ if k != "binding_name"
151
+ }
152
+ self.tts = self.tts_binding_manager.create_binding(
153
+ binding_name=tts_binding_name,
154
+ **params
155
+ )
156
+ if self.tts is None:
157
+ ASCIIColors.warning(f"Failed to create TTS binding: {tts_binding_name}. Available: {self.tts_binding_manager.get_available_bindings()}")
158
+ except Exception as e:
159
+ trace_exception(e)
160
+ ASCIIColors.warning(f"Exception occurred while creating TTS binding: {str(e)}")
156
161
 
157
162
  if tti_binding_name:
158
163
  if tti_binding_config:
@@ -5732,317 +5737,317 @@ Provide the final aggregated answer in {output_format} format, directly addressi
5732
5737
  callback("Deep analysis complete.", MSG_TYPE.MSG_TYPE_STEP_END)
5733
5738
  return final_output
5734
5739
 
5735
- def long_context_processing(
5736
- self,
5737
- text_to_process: str,
5738
- contextual_prompt: Optional[str] = None,
5739
- system_prompt: str | None = None,
5740
- context_fill_percentage: float = 0.75,
5741
- overlap_tokens: int = 150, # Added a default for better context continuity
5742
- expected_generation_tokens: int = 1500,
5743
- streaming_callback: Optional[Callable] = None,
5744
- return_scratchpad_only: bool = False,
5745
- debug: bool = True,
5746
- **kwargs
5747
- ) -> str:
5748
- """
5749
- Processes long text by breaking it down into chunks, analyzing each one incrementally,
5750
- and synthesizing the results into a comprehensive final response based on a user-defined objective.
5751
- """
5752
-
5753
- if debug:
5754
- print(f"\n🔧 DEBUG: Starting processing with {len(text_to_process):,} characters")
5755
-
5756
- # Validate context fill percentage
5757
- if not (0.1 <= context_fill_percentage <= 0.9):
5758
- raise ValueError(f"context_fill_percentage must be between 0.1 and 0.9, got {context_fill_percentage}")
5759
-
5760
- # Get context size
5761
- try:
5762
- context_size = self.llm.get_context_size() or 8192 # Using a more modern default
5763
- except:
5764
- context_size = 8192
5765
-
5766
- if debug:
5767
- print(f"🔧 DEBUG: Context size: {context_size}, Fill %: {context_fill_percentage}")
5768
-
5769
- # Handle empty input
5770
- if not text_to_process:
5771
- return ""
5772
-
5773
- # Use a simple word-based split for token estimation
5774
- tokens = text_to_process.split()
5775
- if debug:
5776
- print(f"🔧 DEBUG: Tokenized into {len(tokens):,} word tokens")
5777
-
5778
- # Dynamic token budget calculation
5779
- def calculate_token_budgets(scratchpad_content: str = "", step_num: int = 0) -> dict:
5780
- # Generic prompt templates are more concise
5781
- base_system_tokens = 150
5782
- user_template_tokens = 250
5783
- scratchpad_tokens = len(scratchpad_content.split()) * 1.3 if scratchpad_content else 0
5784
-
5785
- used_tokens = base_system_tokens + user_template_tokens + scratchpad_tokens + expected_generation_tokens
5786
- total_budget = int(context_size * context_fill_percentage)
5787
- available_for_chunk = max(500, int(total_budget - used_tokens)) # Ensure a reasonable minimum chunk size
5788
-
5789
- budget_info = {
5790
- "total_budget": total_budget,
5791
- "chunk_budget": available_for_chunk,
5792
- "efficiency_ratio": available_for_chunk / total_budget if total_budget > 0 else 0,
5793
- "scratchpad_tokens": int(scratchpad_tokens),
5794
- "used_tokens": int(used_tokens)
5795
- }
5740
+ def long_context_processing(
5741
+ self,
5742
+ text_to_process: str,
5743
+ contextual_prompt: Optional[str] = None,
5744
+ system_prompt: str | None = None,
5745
+ context_fill_percentage: float = 0.75,
5746
+ overlap_tokens: int = 150, # Added a default for better context continuity
5747
+ expected_generation_tokens: int = 1500,
5748
+ streaming_callback: Optional[Callable] = None,
5749
+ return_scratchpad_only: bool = False,
5750
+ debug: bool = True,
5751
+ **kwargs
5752
+ ) -> str:
5753
+ """
5754
+ Processes long text by breaking it down into chunks, analyzing each one incrementally,
5755
+ and synthesizing the results into a comprehensive final response based on a user-defined objective.
5756
+ """
5796
5757
 
5797
5758
  if debug:
5798
- print(f"🔧 DEBUG Step {step_num}: Budget = {available_for_chunk}/{total_budget} tokens, "
5799
- f"Scratchpad = {int(scratchpad_tokens)} tokens")
5759
+ print(f"\n🔧 DEBUG: Starting processing with {len(text_to_process):,} characters")
5800
5760
 
5801
- return budget_info
5761
+ # Validate context fill percentage
5762
+ if not (0.1 <= context_fill_percentage <= 0.9):
5763
+ raise ValueError(f"context_fill_percentage must be between 0.1 and 0.9, got {context_fill_percentage}")
5802
5764
 
5803
- # Initial budget calculation
5804
- initial_budget = calculate_token_budgets()
5805
- chunk_size_tokens = initial_budget["chunk_budget"]
5806
-
5807
- if debug:
5808
- print(f"🔧 DEBUG: Initial chunk size: {chunk_size_tokens} word tokens")
5809
-
5810
- if streaming_callback:
5811
- streaming_callback(
5812
- f"Context Budget: {initial_budget['chunk_budget']:,}/{initial_budget['total_budget']:,} tokens "
5813
- f"({initial_budget['efficiency_ratio']:.1%} efficiency)",
5814
- MSG_TYPE.MSG_TYPE_STEP,
5815
- {"budget_info": initial_budget}
5816
- )
5765
+ # Get context size
5766
+ try:
5767
+ context_size = self.llm.get_context_size() or 8192 # Using a more modern default
5768
+ except:
5769
+ context_size = 8192
5817
5770
 
5818
- # Single pass for short content
5819
- if len(tokens) <= chunk_size_tokens:
5820
5771
  if debug:
5821
- print("🔧 DEBUG: Content is short enough for single-pass processing")
5772
+ print(f"🔧 DEBUG: Context size: {context_size}, Fill %: {context_fill_percentage}")
5822
5773
 
5823
- if streaming_callback:
5824
- streaming_callback("Content fits in a single pass", MSG_TYPE.MSG_TYPE_STEP, {})
5825
-
5826
- # Generic single-pass system prompt
5827
- system_prompt = (
5828
- "You are an expert AI assistant for text analysis and summarization. "
5829
- "Your task is to carefully analyze the provided text and generate a comprehensive, "
5830
- "accurate, and well-structured response that directly addresses the user's objective. "
5831
- "Focus on extracting key information, identifying main themes, and synthesizing the content effectively."
5832
- )
5774
+ # Handle empty input
5775
+ if not text_to_process:
5776
+ return ""
5833
5777
 
5834
- prompt_objective = contextual_prompt or "Provide a comprehensive summary and analysis of the provided text."
5835
- final_prompt = f"Objective: {prompt_objective}\n\n--- Full Text Content ---\n{text_to_process}"
5778
+ # Use a simple word-based split for token estimation
5779
+ tokens = text_to_process.split()
5780
+ if debug:
5781
+ print(f"🔧 DEBUG: Tokenized into {len(tokens):,} word tokens")
5782
+
5783
+ # Dynamic token budget calculation
5784
+ def calculate_token_budgets(scratchpad_content: str = "", step_num: int = 0) -> dict:
5785
+ # Generic prompt templates are more concise
5786
+ base_system_tokens = 150
5787
+ user_template_tokens = 250
5788
+ scratchpad_tokens = len(scratchpad_content.split()) * 1.3 if scratchpad_content else 0
5789
+
5790
+ used_tokens = base_system_tokens + user_template_tokens + scratchpad_tokens + expected_generation_tokens
5791
+ total_budget = int(context_size * context_fill_percentage)
5792
+ available_for_chunk = max(500, int(total_budget - used_tokens)) # Ensure a reasonable minimum chunk size
5793
+
5794
+ budget_info = {
5795
+ "total_budget": total_budget,
5796
+ "chunk_budget": available_for_chunk,
5797
+ "efficiency_ratio": available_for_chunk / total_budget if total_budget > 0 else 0,
5798
+ "scratchpad_tokens": int(scratchpad_tokens),
5799
+ "used_tokens": int(used_tokens)
5800
+ }
5836
5801
 
5837
- try:
5838
- result = self.remove_thinking_blocks(self.llm.generate_text(final_prompt, system_prompt=system_prompt, **kwargs))
5839
5802
  if debug:
5840
- print(f"🔧 DEBUG: Single-pass result: {len(result):,} characters")
5841
- return result
5842
- except Exception as e:
5843
- if debug:
5844
- print(f"🔧 DEBUG: Single-pass processing failed: {e}")
5845
- return f"Error in single-pass processing: {e}"
5846
-
5847
- # Multi-chunk processing for long content
5848
- if debug:
5849
- print("🔧 DEBUG: Using multi-chunk processing for long content")
5850
-
5851
- chunk_summaries = []
5852
- current_position = 0
5853
- step_number = 1
5803
+ print(f"🔧 DEBUG Step {step_num}: Budget = {available_for_chunk}/{total_budget} tokens, "
5804
+ f"Scratchpad = {int(scratchpad_tokens)} tokens")
5854
5805
 
5855
- while current_position < len(tokens):
5856
- # Recalculate budget for each step for dynamic adaptation
5857
- current_scratchpad = "\n\n---\n\n".join(chunk_summaries)
5858
- current_budget = calculate_token_budgets(current_scratchpad, step_number)
5859
- adaptive_chunk_size = max(500, current_budget["chunk_budget"])
5806
+ return budget_info
5860
5807
 
5861
- # Extract the next chunk of text
5862
- chunk_end = min(current_position + adaptive_chunk_size, len(tokens))
5863
- chunk_tokens = tokens[current_position:chunk_end]
5864
- chunk_text = " ".join(chunk_tokens)
5808
+ # Initial budget calculation
5809
+ initial_budget = calculate_token_budgets()
5810
+ chunk_size_tokens = initial_budget["chunk_budget"]
5865
5811
 
5866
5812
  if debug:
5867
- print(f"\n🔧 DEBUG Step {step_number}: Processing chunk from {current_position} to {chunk_end} "
5868
- f"({len(chunk_tokens)} tokens)")
5869
-
5870
- # Progress calculation
5871
- remaining_tokens = len(tokens) - current_position
5872
- estimated_remaining_steps = max(1, -(-remaining_tokens // adaptive_chunk_size)) # Ceiling division
5873
- total_estimated_steps = step_number + estimated_remaining_steps -1
5874
- progress = (current_position / len(tokens)) * 90 if len(tokens) > 0 else 0
5813
+ print(f"🔧 DEBUG: Initial chunk size: {chunk_size_tokens} word tokens")
5875
5814
 
5876
5815
  if streaming_callback:
5877
5816
  streaming_callback(
5878
- f"Processing chunk {step_number}/{total_estimated_steps} - "
5879
- f"Budget: {adaptive_chunk_size:,} tokens",
5880
- MSG_TYPE.MSG_TYPE_STEP_START,
5881
- {"step": step_number, "progress": progress}
5817
+ f"Context Budget: {initial_budget['chunk_budget']:,}/{initial_budget['total_budget']:,} tokens "
5818
+ f"({initial_budget['efficiency_ratio']:.1%} efficiency)",
5819
+ MSG_TYPE.MSG_TYPE_STEP,
5820
+ {"budget_info": initial_budget}
5882
5821
  )
5883
5822
 
5884
- try:
5885
- # Generic, state-aware system prompt
5823
+ # Single pass for short content
5824
+ if len(tokens) <= chunk_size_tokens:
5825
+ if debug:
5826
+ print("🔧 DEBUG: Content is short enough for single-pass processing")
5827
+
5828
+ if streaming_callback:
5829
+ streaming_callback("Content fits in a single pass", MSG_TYPE.MSG_TYPE_STEP, {})
5830
+
5831
+ # Generic single-pass system prompt
5886
5832
  system_prompt = (
5887
- f"You are a component in a multi-step text processing pipeline. Your role is to analyze a chunk of text and extract key information relevant to a global objective.\n\n"
5888
- f"**Current Status:** You are on step {step_number} of approximately {total_estimated_steps} steps. Progress is at {progress:.1f}%.\n\n"
5889
- f"**Your Task:**\n"
5890
- f"Analyze the 'New Text Chunk' provided below. Extract and summarize any information, data points, or key ideas that are relevant to the 'Global Objective'.\n"
5891
- f"Review the 'Existing Scratchpad Content' to understand what has already been found. Your goal is to add *new* insights that are not already captured.\n\n"
5892
- f"**CRITICAL:** Do NOT repeat information already present in the scratchpad. Focus only on new, relevant details from the current chunk. If the chunk contains no new relevant information, respond with '[No new information found in this chunk.]'."
5833
+ "You are an expert AI assistant for text analysis and summarization. "
5834
+ "Your task is to carefully analyze the provided text and generate a comprehensive, "
5835
+ "accurate, and well-structured response that directly addresses the user's objective. "
5836
+ "Focus on extracting key information, identifying main themes, and synthesizing the content effectively."
5893
5837
  )
5894
5838
 
5895
- # Generic, context-aware user prompt
5896
- summarization_objective = contextual_prompt or "Create a comprehensive summary by extracting all key facts, concepts, and conclusions from the text."
5897
- scratchpad_status = "The analysis is just beginning; this is the first chunk." if not chunk_summaries else f"Building on existing analysis with {len(chunk_summaries)} sections already completed."
5898
-
5899
- user_prompt = (
5900
- f"--- Global Objective ---\n{summarization_objective}\n\n"
5901
- f"--- Current Progress ---\n"
5902
- f"{scratchpad_status} (Step {step_number}/{total_estimated_steps})\n\n"
5903
- f"--- Existing Scratchpad Content (for context) ---\n{current_scratchpad}\n\n"
5904
- f"--- New Text Chunk to Analyze ---\n{chunk_text}\n\n"
5905
- f"--- Your Instructions ---\n"
5906
- f"Extract key information from the 'New Text Chunk' that aligns with the 'Global Objective'. "
5907
- f"Provide a concise summary of the new findings. Do not repeat what is already in the scratchpad. "
5908
- f"If no new relevant information is found, state that clearly."
5909
- )
5839
+ prompt_objective = contextual_prompt or "Provide a comprehensive summary and analysis of the provided text."
5840
+ final_prompt = f"Objective: {prompt_objective}\n\n--- Full Text Content ---\n{text_to_process}"
5910
5841
 
5911
- if debug:
5912
- print(f"🔧 DEBUG: Sending {len(user_prompt)} char prompt to LLM")
5842
+ try:
5843
+ result = self.remove_thinking_blocks(self.llm.generate_text(final_prompt, system_prompt=system_prompt, **kwargs))
5844
+ if debug:
5845
+ print(f"🔧 DEBUG: Single-pass result: {len(result):,} characters")
5846
+ return result
5847
+ except Exception as e:
5848
+ if debug:
5849
+ print(f"🔧 DEBUG: Single-pass processing failed: {e}")
5850
+ return f"Error in single-pass processing: {e}"
5851
+
5852
+ # Multi-chunk processing for long content
5853
+ if debug:
5854
+ print("🔧 DEBUG: Using multi-chunk processing for long content")
5855
+
5856
+ chunk_summaries = []
5857
+ current_position = 0
5858
+ step_number = 1
5913
5859
 
5914
- chunk_summary = self.remove_thinking_blocks(self.llm.generate_text(user_prompt, system_prompt=system_prompt, **kwargs))
5860
+ while current_position < len(tokens):
5861
+ # Recalculate budget for each step for dynamic adaptation
5862
+ current_scratchpad = "\n\n---\n\n".join(chunk_summaries)
5863
+ current_budget = calculate_token_budgets(current_scratchpad, step_number)
5864
+ adaptive_chunk_size = max(500, current_budget["chunk_budget"])
5865
+
5866
+ # Extract the next chunk of text
5867
+ chunk_end = min(current_position + adaptive_chunk_size, len(tokens))
5868
+ chunk_tokens = tokens[current_position:chunk_end]
5869
+ chunk_text = " ".join(chunk_tokens)
5915
5870
 
5916
5871
  if debug:
5917
- print(f"🔧 DEBUG: Received {len(chunk_summary)} char response preview: {chunk_summary[:200]}...")
5918
-
5919
- # Generic content filtering
5920
- filter_out = False
5921
- filter_reason = "content accepted"
5922
-
5923
- # Check for explicit rejection signals
5924
- if (chunk_summary.strip().lower().startswith('[no new') or
5925
- chunk_summary.strip().lower().startswith('no new information')):
5926
- filter_out = True
5927
- filter_reason = "explicit rejection signal"
5928
- # Check for overly short or generic refusal responses
5929
- elif len(chunk_summary.strip()) < 25:
5930
- filter_out = True
5931
- filter_reason = "response too short to be useful"
5932
- # Check for common error phrases
5933
- elif any(error_phrase in chunk_summary.lower()[:150] for error_phrase in [
5934
- 'error', 'failed', 'cannot provide', 'unable to analyze', 'not possible', 'insufficient information']):
5935
- filter_out = True
5936
- filter_reason = "error or refusal response detected"
5937
-
5938
- if not filter_out:
5939
- chunk_summaries.append(chunk_summary.strip())
5940
- content_added = True
5941
- if debug:
5942
- print(f"🔧 DEBUG: ✅ Content added to scratchpad (total sections: {len(chunk_summaries)})")
5943
- else:
5944
- content_added = False
5945
- if debug:
5946
- print(f"🔧 DEBUG: ❌ Content filtered out - {filter_reason}: {chunk_summary[:100]}...")
5872
+ print(f"\n🔧 DEBUG Step {step_number}: Processing chunk from {current_position} to {chunk_end} "
5873
+ f"({len(chunk_tokens)} tokens)")
5874
+
5875
+ # Progress calculation
5876
+ remaining_tokens = len(tokens) - current_position
5877
+ estimated_remaining_steps = max(1, -(-remaining_tokens // adaptive_chunk_size)) # Ceiling division
5878
+ total_estimated_steps = step_number + estimated_remaining_steps -1
5879
+ progress = (current_position / len(tokens)) * 90 if len(tokens) > 0 else 0
5947
5880
 
5948
- # Update progress via callback
5949
5881
  if streaming_callback:
5950
- updated_scratchpad = "\n\n---\n\n".join(chunk_summaries)
5951
5882
  streaming_callback(
5952
- updated_scratchpad,
5953
- MSG_TYPE.MSG_TYPE_SCRATCHPAD,
5954
- {"step": step_number, "sections": len(chunk_summaries), "content_added": content_added, "filter_reason": filter_reason}
5883
+ f"Processing chunk {step_number}/{total_estimated_steps} - "
5884
+ f"Budget: {adaptive_chunk_size:,} tokens",
5885
+ MSG_TYPE.MSG_TYPE_STEP_START,
5886
+ {"step": step_number, "progress": progress}
5955
5887
  )
5956
- progress_after = ((current_position + len(chunk_tokens)) / len(tokens)) * 90 if len(tokens) > 0 else 90
5957
- streaming_callback(
5958
- f"Step {step_number} completed - {'Content added' if content_added else f'Filtered: {filter_reason}'}",
5959
- MSG_TYPE.MSG_TYPE_STEP_END,
5960
- {"progress": progress_after}
5888
+
5889
+ try:
5890
+ # Generic, state-aware system prompt
5891
+ system_prompt = (
5892
+ f"You are a component in a multi-step text processing pipeline. Your role is to analyze a chunk of text and extract key information relevant to a global objective.\n\n"
5893
+ f"**Current Status:** You are on step {step_number} of approximately {total_estimated_steps} steps. Progress is at {progress:.1f}%.\n\n"
5894
+ f"**Your Task:**\n"
5895
+ f"Analyze the 'New Text Chunk' provided below. Extract and summarize any information, data points, or key ideas that are relevant to the 'Global Objective'.\n"
5896
+ f"Review the 'Existing Scratchpad Content' to understand what has already been found. Your goal is to add *new* insights that are not already captured.\n\n"
5897
+ f"**CRITICAL:** Do NOT repeat information already present in the scratchpad. Focus only on new, relevant details from the current chunk. If the chunk contains no new relevant information, respond with '[No new information found in this chunk.]'."
5961
5898
  )
5962
5899
 
5963
- except Exception as e:
5964
- error_msg = f"Step {step_number} failed: {str(e)}"
5965
- if debug:
5966
- print(f"🔧 DEBUG: ❌ {error_msg}")
5967
- self.trace_exception(e)
5900
+ # Generic, context-aware user prompt
5901
+ summarization_objective = contextual_prompt or "Create a comprehensive summary by extracting all key facts, concepts, and conclusions from the text."
5902
+ scratchpad_status = "The analysis is just beginning; this is the first chunk." if not chunk_summaries else f"Building on existing analysis with {len(chunk_summaries)} sections already completed."
5903
+
5904
+ user_prompt = (
5905
+ f"--- Global Objective ---\n{summarization_objective}\n\n"
5906
+ f"--- Current Progress ---\n"
5907
+ f"{scratchpad_status} (Step {step_number}/{total_estimated_steps})\n\n"
5908
+ f"--- Existing Scratchpad Content (for context) ---\n{current_scratchpad}\n\n"
5909
+ f"--- New Text Chunk to Analyze ---\n{chunk_text}\n\n"
5910
+ f"--- Your Instructions ---\n"
5911
+ f"Extract key information from the 'New Text Chunk' that aligns with the 'Global Objective'. "
5912
+ f"Provide a concise summary of the new findings. Do not repeat what is already in the scratchpad. "
5913
+ f"If no new relevant information is found, state that clearly."
5914
+ )
5915
+
5916
+ if debug:
5917
+ print(f"🔧 DEBUG: Sending {len(user_prompt)} char prompt to LLM")
5918
+
5919
+ chunk_summary = self.remove_thinking_blocks(self.llm.generate_text(user_prompt, system_prompt=system_prompt, **kwargs))
5920
+
5921
+ if debug:
5922
+ print(f"🔧 DEBUG: Received {len(chunk_summary)} char response preview: {chunk_summary[:200]}...")
5923
+
5924
+ # Generic content filtering
5925
+ filter_out = False
5926
+ filter_reason = "content accepted"
5927
+
5928
+ # Check for explicit rejection signals
5929
+ if (chunk_summary.strip().lower().startswith('[no new') or
5930
+ chunk_summary.strip().lower().startswith('no new information')):
5931
+ filter_out = True
5932
+ filter_reason = "explicit rejection signal"
5933
+ # Check for overly short or generic refusal responses
5934
+ elif len(chunk_summary.strip()) < 25:
5935
+ filter_out = True
5936
+ filter_reason = "response too short to be useful"
5937
+ # Check for common error phrases
5938
+ elif any(error_phrase in chunk_summary.lower()[:150] for error_phrase in [
5939
+ 'error', 'failed', 'cannot provide', 'unable to analyze', 'not possible', 'insufficient information']):
5940
+ filter_out = True
5941
+ filter_reason = "error or refusal response detected"
5942
+
5943
+ if not filter_out:
5944
+ chunk_summaries.append(chunk_summary.strip())
5945
+ content_added = True
5946
+ if debug:
5947
+ print(f"🔧 DEBUG: ✅ Content added to scratchpad (total sections: {len(chunk_summaries)})")
5948
+ else:
5949
+ content_added = False
5950
+ if debug:
5951
+ print(f"🔧 DEBUG: ❌ Content filtered out - {filter_reason}: {chunk_summary[:100]}...")
5952
+
5953
+ # Update progress via callback
5954
+ if streaming_callback:
5955
+ updated_scratchpad = "\n\n---\n\n".join(chunk_summaries)
5956
+ streaming_callback(
5957
+ updated_scratchpad,
5958
+ MSG_TYPE.MSG_TYPE_SCRATCHPAD,
5959
+ {"step": step_number, "sections": len(chunk_summaries), "content_added": content_added, "filter_reason": filter_reason}
5960
+ )
5961
+ progress_after = ((current_position + len(chunk_tokens)) / len(tokens)) * 90 if len(tokens) > 0 else 90
5962
+ streaming_callback(
5963
+ f"Step {step_number} completed - {'Content added' if content_added else f'Filtered: {filter_reason}'}",
5964
+ MSG_TYPE.MSG_TYPE_STEP_END,
5965
+ {"progress": progress_after}
5966
+ )
5967
+
5968
+ except Exception as e:
5969
+ error_msg = f"Step {step_number} failed: {str(e)}"
5970
+ if debug:
5971
+ print(f"🔧 DEBUG: ❌ {error_msg}")
5972
+ self.trace_exception(e)
5973
+ if streaming_callback:
5974
+ streaming_callback(error_msg, MSG_TYPE.MSG_TYPE_EXCEPTION)
5975
+ chunk_summaries.append(f"[Error processing chunk at step {step_number}: {str(e)[:150]}]")
5976
+
5977
+ # Move to the next chunk, allowing for overlap
5978
+ current_position += max(1, adaptive_chunk_size - overlap_tokens)
5979
+ step_number += 1
5980
+
5981
+ # Safety break for excessively long documents
5982
+ if step_number > 200:
5983
+ if debug: print(f"🔧 DEBUG: Safety break after {step_number-1} steps.")
5984
+ chunk_summaries.append("[Processing halted due to exceeding maximum step limit.]")
5985
+ break
5986
+
5987
+ if debug:
5988
+ print(f"\n🔧 DEBUG: Chunk processing complete. Total sections gathered: {len(chunk_summaries)}")
5989
+
5990
+ # Return only the scratchpad content if requested
5991
+ if return_scratchpad_only:
5992
+ final_scratchpad = "\n\n---\n\n".join(chunk_summaries)
5968
5993
  if streaming_callback:
5969
- streaming_callback(error_msg, MSG_TYPE.MSG_TYPE_EXCEPTION)
5970
- chunk_summaries.append(f"[Error processing chunk at step {step_number}: {str(e)[:150]}]")
5994
+ streaming_callback("Returning scratchpad content as final output.", MSG_TYPE.MSG_TYPE_STEP, {})
5995
+ return final_scratchpad.strip()
5971
5996
 
5972
- # Move to the next chunk, allowing for overlap
5973
- current_position += max(1, adaptive_chunk_size - overlap_tokens)
5974
- step_number += 1
5975
-
5976
- # Safety break for excessively long documents
5977
- if step_number > 200:
5978
- if debug: print(f"🔧 DEBUG: Safety break after {step_number-1} steps.")
5979
- chunk_summaries.append("[Processing halted due to exceeding maximum step limit.]")
5980
- break
5981
-
5982
- if debug:
5983
- print(f"\n🔧 DEBUG: Chunk processing complete. Total sections gathered: {len(chunk_summaries)}")
5984
-
5985
- # Return only the scratchpad content if requested
5986
- if return_scratchpad_only:
5987
- final_scratchpad = "\n\n---\n\n".join(chunk_summaries)
5997
+ # Final Synthesis Step
5988
5998
  if streaming_callback:
5989
- streaming_callback("Returning scratchpad content as final output.", MSG_TYPE.MSG_TYPE_STEP, {})
5990
- return final_scratchpad.strip()
5999
+ streaming_callback("Synthesizing final comprehensive response...", MSG_TYPE.MSG_TYPE_STEP_START, {"progress": 95})
5991
6000
 
5992
- # Final Synthesis Step
5993
- if streaming_callback:
5994
- streaming_callback("Synthesizing final comprehensive response...", MSG_TYPE.MSG_TYPE_STEP_START, {"progress": 95})
6001
+ if not chunk_summaries:
6002
+ error_msg = "No content was successfully processed or extracted from the document. The input might be empty or an issue occurred during processing."
6003
+ if debug:
6004
+ print(f"🔧 DEBUG: ❌ {error_msg}")
6005
+ return error_msg
6006
+
6007
+ combined_scratchpad = "\n\n---\n\n".join(chunk_summaries)
6008
+ synthesis_objective = contextual_prompt or "Provide a comprehensive, well-structured summary and analysis of the provided text."
5995
6009
 
5996
- if not chunk_summaries:
5997
- error_msg = "No content was successfully processed or extracted from the document. The input might be empty or an issue occurred during processing."
5998
- if debug:
5999
- print(f"🔧 DEBUG: ❌ {error_msg}")
6000
- return error_msg
6001
-
6002
- combined_scratchpad = "\n\n---\n\n".join(chunk_summaries)
6003
- synthesis_objective = contextual_prompt or "Provide a comprehensive, well-structured summary and analysis of the provided text."
6004
-
6005
- if debug:
6006
- print(f"🔧 DEBUG: Synthesizing from {len(combined_scratchpad):,} char scratchpad with {len(chunk_summaries)} sections.")
6007
-
6008
- # Generic synthesis prompts
6009
- synthesis_system_prompt = (
6010
- "You are an expert AI assistant specializing in synthesizing information. "
6011
- "Your task is to consolidate a series of text analysis sections from a scratchpad into a single, coherent, and well-structured final response. "
6012
- "Eliminate redundancy, organize the content logically, and ensure the final output directly and comprehensively addresses the user's primary objective. "
6013
- "Use markdown for clear formatting (e.g., headers, lists, bold text)."
6014
- )
6015
-
6016
- synthesis_user_prompt = (
6017
- f"--- Final Objective ---\n{synthesis_objective}\n\n"
6018
- f"--- Collected Analysis Sections (Scratchpad) ---\n{combined_scratchpad}\n\n"
6019
- f"--- Your Final Task ---\n"
6020
- f"Synthesize all the information from the 'Collected Analysis Sections' into a single, high-quality, and comprehensive response. "
6021
- f"Your response must directly address the 'Final Objective'. "
6022
- f"Organize your answer logically with clear sections using markdown headers. "
6023
- f"Ensure all key information is included, remove any repetitive statements, and produce a polished, final document."
6024
- )
6025
-
6026
- try:
6027
- final_answer = self.remove_thinking_blocks(self.llm.generate_text(synthesis_user_prompt, system_prompt=synthesis_system_prompt, **kwargs))
6028
6010
  if debug:
6029
- print(f"🔧 DEBUG: Final synthesis generated: {len(final_answer):,} characters")
6030
- if streaming_callback:
6031
- streaming_callback("Final synthesis complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"progress": 100})
6032
- return final_answer.strip()
6011
+ print(f"🔧 DEBUG: Synthesizing from {len(combined_scratchpad):,} char scratchpad with {len(chunk_summaries)} sections.")
6012
+
6013
+ # Generic synthesis prompts
6014
+ synthesis_system_prompt = (
6015
+ "You are an expert AI assistant specializing in synthesizing information. "
6016
+ "Your task is to consolidate a series of text analysis sections from a scratchpad into a single, coherent, and well-structured final response. "
6017
+ "Eliminate redundancy, organize the content logically, and ensure the final output directly and comprehensively addresses the user's primary objective. "
6018
+ "Use markdown for clear formatting (e.g., headers, lists, bold text)."
6019
+ )
6033
6020
 
6034
- except Exception as e:
6035
- error_msg = f"The final synthesis step failed: {str(e)}. Returning the organized scratchpad content as a fallback."
6036
- if debug: print(f"🔧 DEBUG: {error_msg}")
6037
-
6038
- # Fallback to returning the organized scratchpad
6039
- organized_scratchpad = (
6040
- f"# Analysis Summary\n\n"
6041
- f"*Note: The final synthesis process encountered an error. The raw, organized analysis sections are provided below.*\n\n"
6042
- f"## Collected Sections\n\n"
6043
- f"{combined_scratchpad}"
6021
+ synthesis_user_prompt = (
6022
+ f"--- Final Objective ---\n{synthesis_objective}\n\n"
6023
+ f"--- Collected Analysis Sections (Scratchpad) ---\n{combined_scratchpad}\n\n"
6024
+ f"--- Your Final Task ---\n"
6025
+ f"Synthesize all the information from the 'Collected Analysis Sections' into a single, high-quality, and comprehensive response. "
6026
+ f"Your response must directly address the 'Final Objective'. "
6027
+ f"Organize your answer logically with clear sections using markdown headers. "
6028
+ f"Ensure all key information is included, remove any repetitive statements, and produce a polished, final document."
6044
6029
  )
6045
- return organized_scratchpad
6030
+
6031
+ try:
6032
+ final_answer = self.remove_thinking_blocks(self.llm.generate_text(synthesis_user_prompt, system_prompt=synthesis_system_prompt, **kwargs))
6033
+ if debug:
6034
+ print(f"🔧 DEBUG: Final synthesis generated: {len(final_answer):,} characters")
6035
+ if streaming_callback:
6036
+ streaming_callback("Final synthesis complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"progress": 100})
6037
+ return final_answer.strip()
6038
+
6039
+ except Exception as e:
6040
+ error_msg = f"The final synthesis step failed: {str(e)}. Returning the organized scratchpad content as a fallback."
6041
+ if debug: print(f"🔧 DEBUG: ❌ {error_msg}")
6042
+
6043
+ # Fallback to returning the organized scratchpad
6044
+ organized_scratchpad = (
6045
+ f"# Analysis Summary\n\n"
6046
+ f"*Note: The final synthesis process encountered an error. The raw, organized analysis sections are provided below.*\n\n"
6047
+ f"## Collected Sections\n\n"
6048
+ f"{combined_scratchpad}"
6049
+ )
6050
+ return organized_scratchpad
6046
6051
 
6047
6052
 
6048
6053
  def chunk_text(text, tokenizer, detokenizer, chunk_size, overlap, use_separators=True):