lollms-client 0.19.6__tar.gz → 0.19.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

Files changed (81) hide show
  1. {lollms_client-0.19.6 → lollms_client-0.19.7}/PKG-INFO +1 -1
  2. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/__init__.py +1 -1
  3. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_core.py +145 -147
  4. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client.egg-info/PKG-INFO +1 -1
  5. {lollms_client-0.19.6 → lollms_client-0.19.7}/LICENSE +0 -0
  6. {lollms_client-0.19.6 → lollms_client-0.19.7}/README.md +0 -0
  7. {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/article_summary/article_summary.py +0 -0
  8. {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/deep_analyze/deep_analyse.py +0 -0
  9. {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/deep_analyze/deep_analyze_multiple_files.py +0 -0
  10. {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/function_calling_with_local_custom_mcp.py +0 -0
  11. {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/generate_and_speak/generate_and_speak.py +0 -0
  12. {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/generate_game_sfx/generate_game_fx.py +0 -0
  13. {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/generate_text_with_multihop_rag_example.py +0 -0
  14. {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/internet_search_with_rag.py +0 -0
  15. {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/local_mcp.py +0 -0
  16. {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/personality_test/chat_test.py +0 -0
  17. {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/personality_test/chat_with_aristotle.py +0 -0
  18. {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/personality_test/tesks_test.py +0 -0
  19. {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/simple_text_gen_test.py +0 -0
  20. {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/simple_text_gen_with_image_test.py +0 -0
  21. {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/test_local_models/local_chat.py +0 -0
  22. {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/text_2_audio.py +0 -0
  23. {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/text_2_image.py +0 -0
  24. {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/text_2_image_diffusers.py +0 -0
  25. {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/text_and_image_2_audio.py +0 -0
  26. {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/text_gen.py +0 -0
  27. {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/text_gen_system_prompt.py +0 -0
  28. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/__init__.py +0 -0
  29. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/llamacpp/__init__.py +0 -0
  30. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/lollms/__init__.py +0 -0
  31. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/ollama/__init__.py +0 -0
  32. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/openai/__init__.py +0 -0
  33. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/openllm/__init__.py +0 -0
  34. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/pythonllamacpp/__init__.py +0 -0
  35. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/tensor_rt/__init__.py +0 -0
  36. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/transformers/__init__.py +0 -0
  37. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/vllm/__init__.py +0 -0
  38. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_config.py +0 -0
  39. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_discussion.py +0 -0
  40. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_js_analyzer.py +0 -0
  41. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_llm_binding.py +0 -0
  42. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_mcp_binding.py +0 -0
  43. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_python_analyzer.py +0 -0
  44. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_stt_binding.py +0 -0
  45. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_tti_binding.py +0 -0
  46. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_ttm_binding.py +0 -0
  47. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_tts_binding.py +0 -0
  48. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_ttv_binding.py +0 -0
  49. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_types.py +0 -0
  50. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_utilities.py +0 -0
  51. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/mcp_bindings/local_mcp/__init__.py +0 -0
  52. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/mcp_bindings/local_mcp/default_tools/file_writer/file_writer.py +0 -0
  53. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/mcp_bindings/local_mcp/default_tools/generate_image_from_prompt/generate_image_from_prompt.py +0 -0
  54. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/mcp_bindings/local_mcp/default_tools/internet_search/internet_search.py +0 -0
  55. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/mcp_bindings/local_mcp/default_tools/python_interpreter/python_interpreter.py +0 -0
  56. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/stt_bindings/__init__.py +0 -0
  57. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/stt_bindings/lollms/__init__.py +0 -0
  58. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/stt_bindings/whisper/__init__.py +0 -0
  59. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/stt_bindings/whispercpp/__init__.py +0 -0
  60. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/tti_bindings/__init__.py +0 -0
  61. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/tti_bindings/dalle/__init__.py +0 -0
  62. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/tti_bindings/diffusers/__init__.py +0 -0
  63. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/tti_bindings/gemini/__init__.py +0 -0
  64. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/tti_bindings/lollms/__init__.py +0 -0
  65. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/ttm_bindings/__init__.py +0 -0
  66. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/ttm_bindings/audiocraft/__init__.py +0 -0
  67. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/ttm_bindings/bark/__init__.py +0 -0
  68. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/ttm_bindings/lollms/__init__.py +0 -0
  69. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/tts_bindings/__init__.py +0 -0
  70. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/tts_bindings/bark/__init__.py +0 -0
  71. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/tts_bindings/lollms/__init__.py +0 -0
  72. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/tts_bindings/piper_tts/__init__.py +0 -0
  73. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/tts_bindings/xtts/__init__.py +0 -0
  74. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/ttv_bindings/__init__.py +0 -0
  75. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/ttv_bindings/lollms/__init__.py +0 -0
  76. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client.egg-info/SOURCES.txt +0 -0
  77. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client.egg-info/dependency_links.txt +0 -0
  78. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client.egg-info/requires.txt +0 -0
  79. {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client.egg-info/top_level.txt +0 -0
  80. {lollms_client-0.19.6 → lollms_client-0.19.7}/pyproject.toml +0 -0
  81. {lollms_client-0.19.6 → lollms_client-0.19.7}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lollms_client
3
- Version: 0.19.6
3
+ Version: 0.19.7
4
4
  Summary: A client library for LoLLMs generate endpoint
5
5
  Author-email: ParisNeo <parisneoai@gmail.com>
6
6
  License: Apache Software License
@@ -7,7 +7,7 @@ from lollms_client.lollms_utilities import PromptReshaper # Keep general utiliti
7
7
  from lollms_client.lollms_mcp_binding import LollmsMCPBinding, LollmsMCPBindingManager
8
8
 
9
9
 
10
- __version__ = "0.19.6" # Updated version
10
+ __version__ = "0.19.7" # Updated version
11
11
 
12
12
  # Optionally, you could define __all__ if you want to be explicit about exports
13
13
  __all__ = [
@@ -853,8 +853,6 @@ Respond with a JSON object containing ONE of the following structures:
853
853
  turn_history.append({"type":"final_answer_generated", "content":final_answer_text})
854
854
  return {"final_answer": final_answer_text, "tool_calls": tool_calls_made_this_turn, "error": None}
855
855
 
856
- # --- RAG ---
857
-
858
856
  def generate_text_with_rag(
859
857
  self,
860
858
  prompt: str,
@@ -878,16 +876,17 @@ Respond with a JSON object containing ONE of the following structures:
878
876
  ctx_size: int | None = None,
879
877
  streaming_callback: Optional[Callable[[str, MSG_TYPE, Optional[Dict], Optional[List]], bool]] = None,
880
878
  rag_hop_query_generation_temperature: float = 0.2,
881
- rag_hop_summary_temperature: float = 0.3,
879
+ # rag_hop_summary_temperature is no longer needed
880
+ max_rag_context_characters: int = 32000,
882
881
  **llm_generation_kwargs
883
882
  ) -> Dict[str, Any]:
884
883
  if not self.binding:
885
884
  return {"final_answer": "", "rag_hops_history": [], "all_retrieved_sources": [], "error": "LLM binding not initialized."}
886
885
 
887
886
  turn_rag_history_for_callback: List[Dict[str, Any]] = []
888
- accumulated_rag_context_str = ""
889
887
  rag_hops_details_list: List[Dict[str, Any]] = []
890
- all_unique_retrieved_chunks_map: Dict[str, Dict[str, Any]] = {} # To store unique chunks by content hash or path+text
888
+ # Stores all unique chunks with their full details, keyed by a unique identifier (e.g., path + content hash snippet)
889
+ all_unique_retrieved_chunks_map: Dict[str, Dict[str, Any]] = {}
891
890
  current_query_for_rag = rag_query_text
892
891
  original_user_prompt = prompt
893
892
 
@@ -896,205 +895,204 @@ Respond with a JSON object containing ONE of the following structures:
896
895
  streaming_callback(f"Starting RAG Hop {hop_count + 1}", MSG_TYPE.MSG_TYPE_STEP, {"type": "rag_hop_start", "hop": hop_count + 1}, turn_rag_history_for_callback)
897
896
 
898
897
  # 1. Determine/Generate RAG Query Text
899
- if hop_count > 0 or (current_query_for_rag is None and max_rag_hops > 0):
898
+ if hop_count > 0: # Query generation for multi-hop (hop 2 onwards)
900
899
  if streaming_callback:
901
900
  streaming_callback("LLM generating refined RAG query...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "rag_query_generation", "hop": hop_count + 1}, turn_rag_history_for_callback)
902
901
 
902
+ system_prompt_q_gen = "You are an expert research assistant. Your task is to formulate the best possible *new* search query to find additional information relevant to the user's original request, considering previous search attempts."
903
903
  query_gen_prompt_parts = [
904
- f"{self.system_full_header}You are an expert research assistant. Your task is to formulate the best possible search query to find information relevant to the user's original request, considering the information already gathered.",
905
- f"{self.user_full_header}Original user request: '{original_user_prompt}'"
904
+ f"Original user request:\n'{original_user_prompt}'"
906
905
  ]
907
- if accumulated_rag_context_str:
908
- query_gen_prompt_parts.append(f"Information gathered so far (summaries):\n{accumulated_rag_context_str}")
909
906
  if rag_hops_details_list:
910
- query_gen_prompt_parts.append("Previous search attempts and their summarized findings:")
911
- for prev_hop in rag_hops_details_list:
912
- query_gen_prompt_parts.append(f" - Queried for: '{prev_hop['query']}', Summary: '{prev_hop.get('new_information_summary', 'N/A')}'")
907
+ query_gen_prompt_parts.append("\nPrevious search queries and number of chunks found:")
908
+ for i, prev_hop in enumerate(rag_hops_details_list):
909
+ num_chunks_found_in_hop = len(prev_hop.get("retrieved_chunks_details", []))
910
+ query_gen_prompt_parts.append(f" - Query {i+1}: '{prev_hop['query']}' (Found {num_chunks_found_in_hop} chunks)")
913
911
 
914
- query_gen_prompt_parts.append("Based on this, what is the most effective and specific search query to perform next to get closer to answering the user's request? Output only the search query text, nothing else.")
912
+ query_gen_prompt_parts.append("\nBased on the original request and the queries already attempted, what is the most effective and specific *new* search query to perform next to get closer to answering the user's request? The query should aim to find information not likely covered by previous queries. Output only the search query text, nothing else.")
915
913
  query_gen_prompt_parts.append(self.ai_full_header)
916
914
 
917
- new_query_text_raw = self.remove_thinking_blocks(self.generate_text(prompt="".join(query_gen_prompt_parts), temperature=rag_hop_query_generation_temperature, n_predict=100, stream=False))
915
+ new_query_text_raw = self.generate_text(
916
+ prompt="".join(query_gen_prompt_parts),
917
+ system_prompt=system_prompt_q_gen,
918
+ temperature=rag_hop_query_generation_temperature,
919
+ n_predict=100,
920
+ stream=False
921
+ )
922
+
918
923
  if isinstance(new_query_text_raw, dict) and "error" in new_query_text_raw:
919
- return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"Failed to generate RAG query: {new_query_text_raw['error']}"}
924
+ return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"Failed to generate RAG query for hop {hop_count + 1}: {new_query_text_raw['error']}"}
920
925
 
921
- current_query_for_rag = new_query_text_raw.strip().replace("Search query:", "").replace("Query:", "").strip("\"'")
926
+ current_query_for_rag = self.remove_thinking_blocks(new_query_text_raw).strip().replace("Search query:", "").replace("Query:", "").strip("\"'")
922
927
 
923
928
  if streaming_callback:
924
- streaming_callback(f"Generated RAG query: {current_query_for_rag}", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "rag_query_generation", "hop": hop_count + 1, "query": current_query_for_rag}, turn_rag_history_for_callback)
925
-
926
- elif current_query_for_rag is None and max_rag_hops == 0:
929
+ streaming_callback(f"Generated RAG query for hop {hop_count + 1}: {current_query_for_rag}", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "rag_query_generation", "hop": hop_count + 1, "query": current_query_for_rag}, turn_rag_history_for_callback)
930
+
931
+ elif current_query_for_rag is None: # First hop, and no rag_query_text provided
927
932
  current_query_for_rag = original_user_prompt
933
+
934
+ # If current_query_for_rag was provided as an argument, it's used for the first hop.
928
935
 
929
936
  if not current_query_for_rag:
930
- if max_rag_hops > 0 and hop_count < max_rag_hops:
931
- ASCIIColors.warning(f"RAG Hop {hop_count + 1}: Generated query was empty. Skipping hop.")
932
- rag_hops_details_list.append({"query": "EMPTY_QUERY_SKIPPED", "retrieved_chunks_details": [], "new_information_summary": "Skipped due to empty query.", "llm_decision_json": {"need_more_data": True if hop_count < max_rag_hops -1 else False}})
933
- turn_rag_history_for_callback.append({"type":"rag_hop_info", "hop": hop_count + 1, "query": "EMPTY_QUERY_SKIPPED", "summary":"Skipped."})
934
- continue
935
- else:
936
- ASCIIColors.warning("RAG query is empty. Proceeding without RAG context.")
937
- break
937
+ ASCIIColors.warning(f"RAG Hop {hop_count + 1}: Query is empty. Stopping RAG process.")
938
+ # Add a detail for this aborted hop
939
+ rag_hops_details_list.append({
940
+ "query": "EMPTY_QUERY_STOPPED_HOPS",
941
+ "retrieved_chunks_details": [],
942
+ "status": "Query became empty, RAG stopped."
943
+ })
944
+ turn_rag_history_for_callback.append({"type":"rag_hop_info", "hop": hop_count + 1, "query": "EMPTY_QUERY_STOPPED_HOPS", "status":"Stopped."})
945
+ break # Stop if query is empty
938
946
 
939
947
  # 2. Perform RAG Query
940
948
  if streaming_callback:
941
- streaming_callback(f"Querying knowledge base for: '{current_query_for_rag}'...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "rag_retrieval", "hop": hop_count + 1, "query": current_query_for_rag}, turn_rag_history_for_callback)
949
+ streaming_callback(f"Querying knowledge base for (Hop {hop_count + 1}): '{current_query_for_rag}'...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "rag_retrieval", "hop": hop_count + 1, "query": current_query_for_rag}, turn_rag_history_for_callback)
942
950
 
943
951
  try:
944
- retrieved_chunks_raw = rag_query_function(current_query_for_rag, rag_vectorizer_name, rag_top_k, rag_min_similarity_percent)
952
+ retrieved_chunks_raw_this_hop = rag_query_function(current_query_for_rag, rag_vectorizer_name, rag_top_k, rag_min_similarity_percent)
945
953
  except Exception as e_rag_query:
946
954
  trace_exception(e_rag_query)
947
- return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"RAG query function failed: {e_rag_query}"}
955
+ return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"RAG query function failed on hop {hop_count + 1}: {e_rag_query}"}
948
956
 
949
957
  if streaming_callback:
950
- streaming_callback(f"Retrieved {len(retrieved_chunks_raw)} chunks.", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "rag_retrieval", "hop": hop_count + 1, "num_chunks": len(retrieved_chunks_raw)}, turn_rag_history_for_callback)
958
+ streaming_callback(f"Retrieved {len(retrieved_chunks_raw_this_hop)} chunks for hop {hop_count + 1}.", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "rag_retrieval", "hop": hop_count + 1, "num_chunks": len(retrieved_chunks_raw_this_hop)}, turn_rag_history_for_callback)
951
959
 
952
- current_hop_details = {"query": current_query_for_rag, "retrieved_chunks_details": []}
953
-
954
- formatted_new_chunks_for_llm_summary = ""
955
- if retrieved_chunks_raw:
956
- for i, chunk in enumerate(retrieved_chunks_raw):
960
+ current_hop_chunk_details_for_history = []
961
+ new_chunks_added_this_hop = 0
962
+ if retrieved_chunks_raw_this_hop:
963
+ for chunk in retrieved_chunks_raw_this_hop:
957
964
  doc_path = chunk.get('file_path', 'Unknown Document')
958
- similarity = chunk.get('similarity_percent', 'N/A')
959
965
  content = chunk.get('chunk_text', '')
966
+ similarity = chunk.get('similarity_percent', 0.0) # Default to 0.0 if not present
967
+
968
+ # Ensure content is string and similarity is float for sorting later
969
+ if not isinstance(content, str): content = str(content)
970
+ try:
971
+ similarity = float(similarity)
972
+ except (ValueError, TypeError):
973
+ similarity = 0.0 # Default if conversion fails
974
+
975
+ chunk_detail_for_map_and_history = {
976
+ "document": doc_path,
977
+ "similarity": similarity,
978
+ "content": content,
979
+ "retrieved_in_hop": hop_count + 1,
980
+ "query_used": current_query_for_rag
981
+ }
982
+ current_hop_chunk_details_for_history.append(chunk_detail_for_map_and_history)
960
983
 
961
- chunk_detail_for_history = {"document": doc_path, "similarity": similarity, "content": content}
962
- current_hop_details["retrieved_chunks_details"].append(chunk_detail_for_history)
963
-
964
- # Add to unique list for final output
965
- # Use a combination of path and content to uniquely identify a chunk to avoid duplicates if same content appears from different queries.
966
- # A more robust unique key might involve hashing content if it's very large.
967
- unique_key = f"{doc_path}::{content[:100]}" # Simple key
984
+ unique_key = f"{doc_path}::{content[:100]}" # Simple key for uniqueness
968
985
  if unique_key not in all_unique_retrieved_chunks_map:
969
- all_unique_retrieved_chunks_map[unique_key] = chunk_detail_for_history
970
-
971
- # Format for LLM processing (summary or direct use)
972
- formatted_new_chunks_for_llm_summary += f"Document: {doc_path} (Similarity: {similarity}%)\nContent:\n{content}\n---\n"
986
+ all_unique_retrieved_chunks_map[unique_key] = chunk_detail_for_map_and_history
987
+ new_chunks_added_this_hop +=1
973
988
 
974
- if not retrieved_chunks_raw:
975
- current_hop_details["new_information_summary"] = "No relevant information found for this query."
976
- current_hop_details["llm_decision_json"] = {"need_more_data": True if max_rag_hops > 0 and hop_count < max_rag_hops -1 else False, "reasoning_for_decision":"No new information retrieved."}
977
- rag_hops_details_list.append(current_hop_details)
978
- turn_rag_history_for_callback.append({"type":"rag_hop_info", **current_hop_details})
979
- if max_rag_hops == 0 or hop_count >= max_rag_hops -1 :
980
- break
981
- else:
982
- accumulated_rag_context_str += f"\n\n---\nAttempted query: '{current_query_for_rag}' - No new information found.\n---"
983
- continue
984
-
985
- if max_rag_hops == 0: # Classic RAG
986
- accumulated_rag_context_str += formatted_new_chunks_for_llm_summary
987
- current_hop_details["new_information_summary"] = "Directly used in context (classic RAG)."
988
- current_hop_details["llm_decision_json"] = {"need_more_data": False}
989
- rag_hops_details_list.append(current_hop_details)
990
- turn_rag_history_for_callback.append({"type":"rag_hop_info", **current_hop_details})
991
- break
992
-
993
- # Multi-hop: LLM summarizes and decides
994
- if streaming_callback:
995
- streaming_callback("LLM processing retrieved data and deciding next step...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "rag_llm_decision", "hop": hop_count + 1}, turn_rag_history_for_callback)
996
-
997
- decision_prompt_llm_parts = [
998
- f"{self.system_full_header}You are an AI research assistant. Analyze newly retrieved information against the user's request and prior knowledge, then decide if more searching is needed.",
999
- f"{self.user_full_header}Original user request: '{original_user_prompt}'",
1000
- ]
1001
- if accumulated_rag_context_str:
1002
- decision_prompt_llm_parts.append(f"Current accumulated knowledge summary:\n{accumulated_rag_context_str}")
1003
- decision_prompt_llm_parts.append(f"You just searched for: '{current_query_for_rag}'")
1004
- decision_prompt_llm_parts.append(f"And found this new information:\n--- New Information Start ---\n{formatted_new_chunks_for_llm_summary}--- New Information End ---")
1005
- decision_prompt_llm_parts.append(
1006
- "Task: Provide a concise summary of ONLY the new information relevant to the original request. "
1007
- "Then, assess if you now have sufficient information to comprehensively answer the user's original request or if another, more targeted search is necessary. "
1008
- "Respond STRICTLY in the following JSON format, with no other text before or after the JSON block:"
1009
- )
1010
- json_template_for_decision = """
1011
- {
1012
- "new_information_summary": "<Your concise summary of ONLY the new_information relevant to the original_user_request. Focus on what's new and useful. If nothing new is relevant, state that.>",
1013
- "need_more_data": <true_or_false>,
1014
- "reasoning_for_decision": "<Briefly explain why you need more data or why you have enough. If needing more, suggest what kind of information is still missing.>"
1015
- }
1016
- """
1017
- decision_prompt_llm_parts.append(f"```json\n{json_template_for_decision}\n```")
1018
- decision_prompt_llm_parts.append(self.ai_full_header)
1019
-
1020
- llm_decision_json_str = self.generate_code(prompt="".join(decision_prompt_llm_parts), language="json", template=json_template_for_decision, temperature=rag_hop_summary_temperature, max_size=1024)
1021
-
1022
- if isinstance(llm_decision_json_str, dict) and "error" in llm_decision_json_str:
1023
- return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"LLM failed to make RAG decision: {llm_decision_json_str['error']}"}
1024
- if not llm_decision_json_str:
1025
- return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": "LLM provided empty decision for RAG hop."}
1026
-
1027
- try:
1028
- llm_decision = json.loads(llm_decision_json_str)
1029
- except json.JSONDecodeError:
1030
- try:
1031
- match = re.search(r"```json\s*(\{.*?\})\s*```", llm_decision_json_str, re.DOTALL)
1032
- if match: llm_decision = json.loads(match.group(1))
1033
- else: llm_decision = json.loads(self.extract_code_blocks(llm_decision_json_str, format="markdown")[0]["content"])
1034
- except Exception as e_json_parse:
1035
- trace_exception(e_json_parse)
1036
- return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"Failed to parse LLM RAG decision JSON: {llm_decision_json_str}. Error: {e_json_parse}"}
1037
-
1038
- new_summary = llm_decision.get("new_information_summary", "Summary not provided by LLM.")
1039
- need_more_data = llm_decision.get("need_more_data", True)
1040
-
1041
- current_hop_details["new_information_summary"] = new_summary
1042
- current_hop_details["llm_decision_json"] = llm_decision
989
+ hop_status = "Completed"
990
+ if not retrieved_chunks_raw_this_hop:
991
+ hop_status = "No chunks retrieved for this query."
992
+ elif new_chunks_added_this_hop == 0 and hop_count > 0: # Only consider "no new unique chunks" for subsequent hops
993
+ hop_status = "No *new* unique chunks retrieved."
994
+ # Optionally, could break here if no new unique chunks are found in a multi-hop scenario
995
+ # ASCIIColors.warning(f"RAG Hop {hop_count + 1}: No new unique chunks found. Consider stopping if this persists.")
996
+
997
+
998
+ current_hop_details = {
999
+ "query": current_query_for_rag,
1000
+ "retrieved_chunks_details": current_hop_chunk_details_for_history, # Chunks from THIS hop
1001
+ "status": hop_status
1002
+ }
1043
1003
  rag_hops_details_list.append(current_hop_details)
1044
1004
  turn_rag_history_for_callback.append({"type":"rag_hop_info", **current_hop_details})
1045
1005
 
1006
+ # Reset for next potential query generation if it's not the last planned hop
1007
+ if hop_count < max_rag_hops:
1008
+ current_query_for_rag = None
1009
+ else: # This was the last hop
1010
+ break
1011
+
1012
+
1013
+ # 3. Prepare Final Context from All Unique Retrieved Chunks
1014
+ accumulated_rag_context_str = ""
1015
+ if all_unique_retrieved_chunks_map:
1046
1016
  if streaming_callback:
1047
- streaming_callback(f"LLM decision: Summary='{new_summary[:100]}...', NeedMoreData={need_more_data}", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "rag_llm_decision", "hop": hop_count + 1, "decision": llm_decision}, turn_rag_history_for_callback)
1017
+ streaming_callback("Preparing final RAG context from all retrieved chunks...", MSG_TYPE.MSG_TYPE_STEP, {"type": "context_preparation"}, turn_rag_history_for_callback)
1048
1018
 
1049
- accumulated_rag_context_str += f"\n\n--- Summary of findings from query '{current_query_for_rag}' (Hop {hop_count + 1}) ---\n{new_summary}\n---"
1019
+ # Sort all unique chunks by similarity (highest first)
1020
+ sorted_unique_chunks = sorted(
1021
+ list(all_unique_retrieved_chunks_map.values()),
1022
+ key=lambda c: c.get('similarity', 0.0),
1023
+ reverse=True
1024
+ )
1050
1025
 
1051
- if not need_more_data or hop_count >= max_rag_hops -1 : # Subtract 1 because current hop is finishing
1052
- break
1026
+ current_context_chars = 0
1027
+ chunks_used_in_final_context = 0
1028
+ context_lines = []
1029
+ for chunk in sorted_unique_chunks:
1030
+ chunk_text_to_add = f"Source: {chunk['document']} (Similarity: {chunk['similarity']:.2f}%, Hop: {chunk['retrieved_in_hop']}, Query: '{chunk['query_used']}')\nContent:\n{chunk['content']}\n---\n"
1031
+ if current_context_chars + len(chunk_text_to_add) <= max_rag_context_characters:
1032
+ context_lines.append(chunk_text_to_add)
1033
+ current_context_chars += len(chunk_text_to_add)
1034
+ chunks_used_in_final_context +=1
1035
+ else:
1036
+ ASCIIColors.warning(f"Reached max RAG context character limit ({max_rag_context_characters}). Used {chunks_used_in_final_context} of {len(sorted_unique_chunks)} unique chunks.")
1037
+ break
1038
+ accumulated_rag_context_str = "".join(context_lines)
1039
+
1040
+ if streaming_callback:
1041
+ streaming_callback(f"Final RAG context prepared using {chunks_used_in_final_context} chunks ({current_context_chars} chars).", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "context_preparation", "num_chunks_in_context": chunks_used_in_final_context, "chars_in_context": current_context_chars}, turn_rag_history_for_callback)
1042
+
1053
1043
 
1054
1044
  # 4. Final Answer Generation
1055
1045
  if streaming_callback:
1056
- streaming_callback("LLM generating final answer using all gathered information...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "final_answer_generation"}, turn_rag_history_for_callback)
1046
+ streaming_callback("LLM generating final answer...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "final_answer_generation"}, turn_rag_history_for_callback)
1057
1047
 
1058
- final_answer_prompt_parts = []
1059
- if system_prompt:
1060
- final_answer_prompt_parts.append(f"{self.system_full_header}{system_prompt}")
1061
-
1062
- final_answer_prompt_parts.append(f"{self.user_full_header}Original request: {original_user_prompt}")
1048
+ final_answer_prompt_parts = [f"Original request: {original_user_prompt}"]
1063
1049
  if accumulated_rag_context_str:
1064
- final_answer_prompt_parts.append(f"\nBased on the information I have gathered:\n--- Gathered Context Start ---\n{accumulated_rag_context_str.strip()}\n--- Gathered Context End ---")
1050
+ final_answer_prompt_parts.append(f"\nBased on the following information I have gathered from a knowledge base:\n--- Gathered Context Start ---\n{accumulated_rag_context_str.strip()}\n--- Gathered Context End ---")
1065
1051
  else:
1066
1052
  final_answer_prompt_parts.append("\n(No specific information was retrieved from the knowledge base for this request.)")
1067
1053
 
1068
- final_answer_prompt_parts.append("\nPlease provide a comprehensive answer to the original request using ONLY the provided gathered context. If the context is insufficient, clearly state that.")
1054
+ final_answer_prompt_parts.append("\nPlease provide a comprehensive answer to the original request using ONLY the provided gathered context. If the context is insufficient, clearly state that. If the context contains code examples, ensure they are accurately reproduced.")
1069
1055
  final_answer_prompt_parts.append(self.ai_full_header)
1070
1056
 
1071
1057
  final_answer_llm_prompt = "\n".join(final_answer_prompt_parts)
1072
1058
 
1073
- final_answer_streaming_callback = None
1074
- if streaming_callback:
1075
- def final_answer_cb_adapter(chunk, msg_type):
1076
- return streaming_callback(chunk, msg_type, {"type": "final_answer_chunk"}, turn_rag_history_for_callback)
1077
- final_answer_streaming_callback = final_answer_cb_adapter
1059
+ final_answer_streaming_callback_adapted = None
1060
+ if streaming_callback and stream:
1061
+ def final_answer_cb_adapter(chunk_text, msg_type_llm):
1062
+ return streaming_callback(chunk_text, msg_type_llm, {"type": "final_answer_chunk"}, turn_rag_history_for_callback)
1063
+ final_answer_streaming_callback_adapted = final_answer_cb_adapter
1064
+
1065
+ actual_streaming_cb_for_generate = final_answer_streaming_callback_adapted if stream else None
1078
1066
 
1079
- final_answer_text = self.remove_thinking_blocks(self.generate_text(
1080
- prompt=final_answer_llm_prompt, images=images,
1067
+ final_answer_raw = self.generate_text(
1068
+ prompt=final_answer_llm_prompt, images=images, system_prompt=system_prompt,
1081
1069
  n_predict=n_predict, stream=stream, temperature=temperature, top_k=top_k, top_p=top_p,
1082
1070
  repeat_penalty=repeat_penalty, repeat_last_n=repeat_last_n, seed=seed, n_threads=n_threads,
1083
- ctx_size=ctx_size, streaming_callback=final_answer_streaming_callback, **llm_generation_kwargs
1084
- ))
1085
-
1086
- if streaming_callback:
1087
- streaming_callback("Final answer generation complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "final_answer_generation"}, turn_rag_history_for_callback)
1071
+ ctx_size=ctx_size, streaming_callback=actual_streaming_cb_for_generate, **llm_generation_kwargs
1072
+ )
1088
1073
 
1089
- if isinstance(final_answer_text, dict) and "error" in final_answer_text:
1090
- return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"Final answer generation failed: {final_answer_text['error']}"}
1074
+ if isinstance(final_answer_raw, dict) and "error" in final_answer_raw:
1075
+ return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"Final answer generation failed: {final_answer_raw['error']}"}
1091
1076
 
1092
- return {"final_answer": final_answer_text, "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": None}
1077
+ final_answer_text = self.remove_thinking_blocks(final_answer_raw)
1093
1078
 
1079
+ if streaming_callback:
1080
+ streaming_callback("Final answer generation complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "final_answer_generation"}, turn_rag_history_for_callback)
1081
+ if not stream and final_answer_text:
1082
+ streaming_callback(final_answer_text, MSG_TYPE.MSG_TYPE_CHUNK, {"type": "final_answer_full"}, turn_rag_history_for_callback)
1083
+
1084
+ return {
1085
+ "final_answer": final_answer_text,
1086
+ "rag_hops_history": rag_hops_details_list,
1087
+ "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), # All unique chunks found
1088
+ "error": None
1089
+ }
1090
+
1094
1091
  def generate_code(
1095
1092
  self,
1096
1093
  prompt,
1097
1094
  images=[],
1095
+ system_prompt=None,
1098
1096
  template=None,
1099
1097
  language="json",
1100
1098
  code_tag_format="markdown", # or "html"
@@ -1111,8 +1109,8 @@ Respond with a JSON object containing ONE of the following structures:
1111
1109
  Uses the underlying LLM binding via `generate_text`.
1112
1110
  Handles potential continuation if the code block is incomplete.
1113
1111
  """
1114
-
1115
- system_prompt = f"""Act as a code generation assistant that generates code from user prompt."""
1112
+ if not system_prompt:
1113
+ system_prompt = f"""Act as a code generation assistant that generates code from user prompt."""
1116
1114
 
1117
1115
  if template:
1118
1116
  system_prompt += "Here is a template of the answer:\n"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lollms_client
3
- Version: 0.19.6
3
+ Version: 0.19.7
4
4
  Summary: A client library for LoLLMs generate endpoint
5
5
  Author-email: ParisNeo <parisneoai@gmail.com>
6
6
  License: Apache Software License
File without changes
File without changes
File without changes