lollms-client 0.19.6__tar.gz → 0.19.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- {lollms_client-0.19.6 → lollms_client-0.19.7}/PKG-INFO +1 -1
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/__init__.py +1 -1
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_core.py +145 -147
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client.egg-info/PKG-INFO +1 -1
- {lollms_client-0.19.6 → lollms_client-0.19.7}/LICENSE +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/README.md +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/article_summary/article_summary.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/deep_analyze/deep_analyse.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/deep_analyze/deep_analyze_multiple_files.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/function_calling_with_local_custom_mcp.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/generate_and_speak/generate_and_speak.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/generate_game_sfx/generate_game_fx.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/generate_text_with_multihop_rag_example.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/internet_search_with_rag.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/local_mcp.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/personality_test/chat_test.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/personality_test/chat_with_aristotle.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/personality_test/tesks_test.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/simple_text_gen_test.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/simple_text_gen_with_image_test.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/test_local_models/local_chat.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/text_2_audio.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/text_2_image.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/text_2_image_diffusers.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/text_and_image_2_audio.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/text_gen.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/examples/text_gen_system_prompt.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/llamacpp/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/ollama/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/openai/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/openllm/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/pythonllamacpp/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/tensor_rt/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/transformers/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/vllm/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_config.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_discussion.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_js_analyzer.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_llm_binding.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_mcp_binding.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_python_analyzer.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_stt_binding.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_tti_binding.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_ttm_binding.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_tts_binding.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_ttv_binding.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_types.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/lollms_utilities.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/mcp_bindings/local_mcp/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/mcp_bindings/local_mcp/default_tools/file_writer/file_writer.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/mcp_bindings/local_mcp/default_tools/generate_image_from_prompt/generate_image_from_prompt.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/mcp_bindings/local_mcp/default_tools/internet_search/internet_search.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/mcp_bindings/local_mcp/default_tools/python_interpreter/python_interpreter.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/stt_bindings/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/stt_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/stt_bindings/whisper/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/stt_bindings/whispercpp/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/tti_bindings/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/tti_bindings/dalle/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/tti_bindings/diffusers/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/tti_bindings/gemini/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/tti_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/ttm_bindings/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/ttm_bindings/audiocraft/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/ttm_bindings/bark/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/ttm_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/tts_bindings/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/tts_bindings/bark/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/tts_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/tts_bindings/piper_tts/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/tts_bindings/xtts/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/ttv_bindings/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/ttv_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client.egg-info/SOURCES.txt +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client.egg-info/dependency_links.txt +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client.egg-info/requires.txt +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client.egg-info/top_level.txt +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/pyproject.toml +0 -0
- {lollms_client-0.19.6 → lollms_client-0.19.7}/setup.cfg +0 -0
|
@@ -7,7 +7,7 @@ from lollms_client.lollms_utilities import PromptReshaper # Keep general utiliti
|
|
|
7
7
|
from lollms_client.lollms_mcp_binding import LollmsMCPBinding, LollmsMCPBindingManager
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
__version__ = "0.19.
|
|
10
|
+
__version__ = "0.19.7" # Updated version
|
|
11
11
|
|
|
12
12
|
# Optionally, you could define __all__ if you want to be explicit about exports
|
|
13
13
|
__all__ = [
|
|
@@ -853,8 +853,6 @@ Respond with a JSON object containing ONE of the following structures:
|
|
|
853
853
|
turn_history.append({"type":"final_answer_generated", "content":final_answer_text})
|
|
854
854
|
return {"final_answer": final_answer_text, "tool_calls": tool_calls_made_this_turn, "error": None}
|
|
855
855
|
|
|
856
|
-
# --- RAG ---
|
|
857
|
-
|
|
858
856
|
def generate_text_with_rag(
|
|
859
857
|
self,
|
|
860
858
|
prompt: str,
|
|
@@ -878,16 +876,17 @@ Respond with a JSON object containing ONE of the following structures:
|
|
|
878
876
|
ctx_size: int | None = None,
|
|
879
877
|
streaming_callback: Optional[Callable[[str, MSG_TYPE, Optional[Dict], Optional[List]], bool]] = None,
|
|
880
878
|
rag_hop_query_generation_temperature: float = 0.2,
|
|
881
|
-
rag_hop_summary_temperature
|
|
879
|
+
# rag_hop_summary_temperature is no longer needed
|
|
880
|
+
max_rag_context_characters: int = 32000,
|
|
882
881
|
**llm_generation_kwargs
|
|
883
882
|
) -> Dict[str, Any]:
|
|
884
883
|
if not self.binding:
|
|
885
884
|
return {"final_answer": "", "rag_hops_history": [], "all_retrieved_sources": [], "error": "LLM binding not initialized."}
|
|
886
885
|
|
|
887
886
|
turn_rag_history_for_callback: List[Dict[str, Any]] = []
|
|
888
|
-
accumulated_rag_context_str = ""
|
|
889
887
|
rag_hops_details_list: List[Dict[str, Any]] = []
|
|
890
|
-
|
|
888
|
+
# Stores all unique chunks with their full details, keyed by a unique identifier (e.g., path + content hash snippet)
|
|
889
|
+
all_unique_retrieved_chunks_map: Dict[str, Dict[str, Any]] = {}
|
|
891
890
|
current_query_for_rag = rag_query_text
|
|
892
891
|
original_user_prompt = prompt
|
|
893
892
|
|
|
@@ -896,205 +895,204 @@ Respond with a JSON object containing ONE of the following structures:
|
|
|
896
895
|
streaming_callback(f"Starting RAG Hop {hop_count + 1}", MSG_TYPE.MSG_TYPE_STEP, {"type": "rag_hop_start", "hop": hop_count + 1}, turn_rag_history_for_callback)
|
|
897
896
|
|
|
898
897
|
# 1. Determine/Generate RAG Query Text
|
|
899
|
-
if hop_count > 0
|
|
898
|
+
if hop_count > 0: # Query generation for multi-hop (hop 2 onwards)
|
|
900
899
|
if streaming_callback:
|
|
901
900
|
streaming_callback("LLM generating refined RAG query...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "rag_query_generation", "hop": hop_count + 1}, turn_rag_history_for_callback)
|
|
902
901
|
|
|
902
|
+
system_prompt_q_gen = "You are an expert research assistant. Your task is to formulate the best possible *new* search query to find additional information relevant to the user's original request, considering previous search attempts."
|
|
903
903
|
query_gen_prompt_parts = [
|
|
904
|
-
f"
|
|
905
|
-
f"{self.user_full_header}Original user request: '{original_user_prompt}'"
|
|
904
|
+
f"Original user request:\n'{original_user_prompt}'"
|
|
906
905
|
]
|
|
907
|
-
if accumulated_rag_context_str:
|
|
908
|
-
query_gen_prompt_parts.append(f"Information gathered so far (summaries):\n{accumulated_rag_context_str}")
|
|
909
906
|
if rag_hops_details_list:
|
|
910
|
-
query_gen_prompt_parts.append("
|
|
911
|
-
for prev_hop in rag_hops_details_list:
|
|
912
|
-
|
|
907
|
+
query_gen_prompt_parts.append("\nPrevious search queries and number of chunks found:")
|
|
908
|
+
for i, prev_hop in enumerate(rag_hops_details_list):
|
|
909
|
+
num_chunks_found_in_hop = len(prev_hop.get("retrieved_chunks_details", []))
|
|
910
|
+
query_gen_prompt_parts.append(f" - Query {i+1}: '{prev_hop['query']}' (Found {num_chunks_found_in_hop} chunks)")
|
|
913
911
|
|
|
914
|
-
query_gen_prompt_parts.append("
|
|
912
|
+
query_gen_prompt_parts.append("\nBased on the original request and the queries already attempted, what is the most effective and specific *new* search query to perform next to get closer to answering the user's request? The query should aim to find information not likely covered by previous queries. Output only the search query text, nothing else.")
|
|
915
913
|
query_gen_prompt_parts.append(self.ai_full_header)
|
|
916
914
|
|
|
917
|
-
new_query_text_raw = self.
|
|
915
|
+
new_query_text_raw = self.generate_text(
|
|
916
|
+
prompt="".join(query_gen_prompt_parts),
|
|
917
|
+
system_prompt=system_prompt_q_gen,
|
|
918
|
+
temperature=rag_hop_query_generation_temperature,
|
|
919
|
+
n_predict=100,
|
|
920
|
+
stream=False
|
|
921
|
+
)
|
|
922
|
+
|
|
918
923
|
if isinstance(new_query_text_raw, dict) and "error" in new_query_text_raw:
|
|
919
|
-
return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"Failed to generate RAG query: {new_query_text_raw['error']}"}
|
|
924
|
+
return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"Failed to generate RAG query for hop {hop_count + 1}: {new_query_text_raw['error']}"}
|
|
920
925
|
|
|
921
|
-
current_query_for_rag = new_query_text_raw.strip().replace("Search query:", "").replace("Query:", "").strip("\"'")
|
|
926
|
+
current_query_for_rag = self.remove_thinking_blocks(new_query_text_raw).strip().replace("Search query:", "").replace("Query:", "").strip("\"'")
|
|
922
927
|
|
|
923
928
|
if streaming_callback:
|
|
924
|
-
streaming_callback(f"Generated RAG query: {current_query_for_rag}", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "rag_query_generation", "hop": hop_count + 1, "query": current_query_for_rag}, turn_rag_history_for_callback)
|
|
925
|
-
|
|
926
|
-
elif current_query_for_rag is None and
|
|
929
|
+
streaming_callback(f"Generated RAG query for hop {hop_count + 1}: {current_query_for_rag}", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "rag_query_generation", "hop": hop_count + 1, "query": current_query_for_rag}, turn_rag_history_for_callback)
|
|
930
|
+
|
|
931
|
+
elif current_query_for_rag is None: # First hop, and no rag_query_text provided
|
|
927
932
|
current_query_for_rag = original_user_prompt
|
|
933
|
+
|
|
934
|
+
# If current_query_for_rag was provided as an argument, it's used for the first hop.
|
|
928
935
|
|
|
929
936
|
if not current_query_for_rag:
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
937
|
+
ASCIIColors.warning(f"RAG Hop {hop_count + 1}: Query is empty. Stopping RAG process.")
|
|
938
|
+
# Add a detail for this aborted hop
|
|
939
|
+
rag_hops_details_list.append({
|
|
940
|
+
"query": "EMPTY_QUERY_STOPPED_HOPS",
|
|
941
|
+
"retrieved_chunks_details": [],
|
|
942
|
+
"status": "Query became empty, RAG stopped."
|
|
943
|
+
})
|
|
944
|
+
turn_rag_history_for_callback.append({"type":"rag_hop_info", "hop": hop_count + 1, "query": "EMPTY_QUERY_STOPPED_HOPS", "status":"Stopped."})
|
|
945
|
+
break # Stop if query is empty
|
|
938
946
|
|
|
939
947
|
# 2. Perform RAG Query
|
|
940
948
|
if streaming_callback:
|
|
941
|
-
streaming_callback(f"Querying knowledge base for: '{current_query_for_rag}'...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "rag_retrieval", "hop": hop_count + 1, "query": current_query_for_rag}, turn_rag_history_for_callback)
|
|
949
|
+
streaming_callback(f"Querying knowledge base for (Hop {hop_count + 1}): '{current_query_for_rag}'...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "rag_retrieval", "hop": hop_count + 1, "query": current_query_for_rag}, turn_rag_history_for_callback)
|
|
942
950
|
|
|
943
951
|
try:
|
|
944
|
-
|
|
952
|
+
retrieved_chunks_raw_this_hop = rag_query_function(current_query_for_rag, rag_vectorizer_name, rag_top_k, rag_min_similarity_percent)
|
|
945
953
|
except Exception as e_rag_query:
|
|
946
954
|
trace_exception(e_rag_query)
|
|
947
|
-
return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"RAG query function failed: {e_rag_query}"}
|
|
955
|
+
return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"RAG query function failed on hop {hop_count + 1}: {e_rag_query}"}
|
|
948
956
|
|
|
949
957
|
if streaming_callback:
|
|
950
|
-
streaming_callback(f"Retrieved {len(
|
|
958
|
+
streaming_callback(f"Retrieved {len(retrieved_chunks_raw_this_hop)} chunks for hop {hop_count + 1}.", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "rag_retrieval", "hop": hop_count + 1, "num_chunks": len(retrieved_chunks_raw_this_hop)}, turn_rag_history_for_callback)
|
|
951
959
|
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
for i, chunk in enumerate(retrieved_chunks_raw):
|
|
960
|
+
current_hop_chunk_details_for_history = []
|
|
961
|
+
new_chunks_added_this_hop = 0
|
|
962
|
+
if retrieved_chunks_raw_this_hop:
|
|
963
|
+
for chunk in retrieved_chunks_raw_this_hop:
|
|
957
964
|
doc_path = chunk.get('file_path', 'Unknown Document')
|
|
958
|
-
similarity = chunk.get('similarity_percent', 'N/A')
|
|
959
965
|
content = chunk.get('chunk_text', '')
|
|
966
|
+
similarity = chunk.get('similarity_percent', 0.0) # Default to 0.0 if not present
|
|
967
|
+
|
|
968
|
+
# Ensure content is string and similarity is float for sorting later
|
|
969
|
+
if not isinstance(content, str): content = str(content)
|
|
970
|
+
try:
|
|
971
|
+
similarity = float(similarity)
|
|
972
|
+
except (ValueError, TypeError):
|
|
973
|
+
similarity = 0.0 # Default if conversion fails
|
|
974
|
+
|
|
975
|
+
chunk_detail_for_map_and_history = {
|
|
976
|
+
"document": doc_path,
|
|
977
|
+
"similarity": similarity,
|
|
978
|
+
"content": content,
|
|
979
|
+
"retrieved_in_hop": hop_count + 1,
|
|
980
|
+
"query_used": current_query_for_rag
|
|
981
|
+
}
|
|
982
|
+
current_hop_chunk_details_for_history.append(chunk_detail_for_map_and_history)
|
|
960
983
|
|
|
961
|
-
|
|
962
|
-
current_hop_details["retrieved_chunks_details"].append(chunk_detail_for_history)
|
|
963
|
-
|
|
964
|
-
# Add to unique list for final output
|
|
965
|
-
# Use a combination of path and content to uniquely identify a chunk to avoid duplicates if same content appears from different queries.
|
|
966
|
-
# A more robust unique key might involve hashing content if it's very large.
|
|
967
|
-
unique_key = f"{doc_path}::{content[:100]}" # Simple key
|
|
984
|
+
unique_key = f"{doc_path}::{content[:100]}" # Simple key for uniqueness
|
|
968
985
|
if unique_key not in all_unique_retrieved_chunks_map:
|
|
969
|
-
all_unique_retrieved_chunks_map[unique_key] =
|
|
970
|
-
|
|
971
|
-
# Format for LLM processing (summary or direct use)
|
|
972
|
-
formatted_new_chunks_for_llm_summary += f"Document: {doc_path} (Similarity: {similarity}%)\nContent:\n{content}\n---\n"
|
|
986
|
+
all_unique_retrieved_chunks_map[unique_key] = chunk_detail_for_map_and_history
|
|
987
|
+
new_chunks_added_this_hop +=1
|
|
973
988
|
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
if
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
current_hop_details["llm_decision_json"] = {"need_more_data": False}
|
|
989
|
-
rag_hops_details_list.append(current_hop_details)
|
|
990
|
-
turn_rag_history_for_callback.append({"type":"rag_hop_info", **current_hop_details})
|
|
991
|
-
break
|
|
992
|
-
|
|
993
|
-
# Multi-hop: LLM summarizes and decides
|
|
994
|
-
if streaming_callback:
|
|
995
|
-
streaming_callback("LLM processing retrieved data and deciding next step...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "rag_llm_decision", "hop": hop_count + 1}, turn_rag_history_for_callback)
|
|
996
|
-
|
|
997
|
-
decision_prompt_llm_parts = [
|
|
998
|
-
f"{self.system_full_header}You are an AI research assistant. Analyze newly retrieved information against the user's request and prior knowledge, then decide if more searching is needed.",
|
|
999
|
-
f"{self.user_full_header}Original user request: '{original_user_prompt}'",
|
|
1000
|
-
]
|
|
1001
|
-
if accumulated_rag_context_str:
|
|
1002
|
-
decision_prompt_llm_parts.append(f"Current accumulated knowledge summary:\n{accumulated_rag_context_str}")
|
|
1003
|
-
decision_prompt_llm_parts.append(f"You just searched for: '{current_query_for_rag}'")
|
|
1004
|
-
decision_prompt_llm_parts.append(f"And found this new information:\n--- New Information Start ---\n{formatted_new_chunks_for_llm_summary}--- New Information End ---")
|
|
1005
|
-
decision_prompt_llm_parts.append(
|
|
1006
|
-
"Task: Provide a concise summary of ONLY the new information relevant to the original request. "
|
|
1007
|
-
"Then, assess if you now have sufficient information to comprehensively answer the user's original request or if another, more targeted search is necessary. "
|
|
1008
|
-
"Respond STRICTLY in the following JSON format, with no other text before or after the JSON block:"
|
|
1009
|
-
)
|
|
1010
|
-
json_template_for_decision = """
|
|
1011
|
-
{
|
|
1012
|
-
"new_information_summary": "<Your concise summary of ONLY the new_information relevant to the original_user_request. Focus on what's new and useful. If nothing new is relevant, state that.>",
|
|
1013
|
-
"need_more_data": <true_or_false>,
|
|
1014
|
-
"reasoning_for_decision": "<Briefly explain why you need more data or why you have enough. If needing more, suggest what kind of information is still missing.>"
|
|
1015
|
-
}
|
|
1016
|
-
"""
|
|
1017
|
-
decision_prompt_llm_parts.append(f"```json\n{json_template_for_decision}\n```")
|
|
1018
|
-
decision_prompt_llm_parts.append(self.ai_full_header)
|
|
1019
|
-
|
|
1020
|
-
llm_decision_json_str = self.generate_code(prompt="".join(decision_prompt_llm_parts), language="json", template=json_template_for_decision, temperature=rag_hop_summary_temperature, max_size=1024)
|
|
1021
|
-
|
|
1022
|
-
if isinstance(llm_decision_json_str, dict) and "error" in llm_decision_json_str:
|
|
1023
|
-
return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"LLM failed to make RAG decision: {llm_decision_json_str['error']}"}
|
|
1024
|
-
if not llm_decision_json_str:
|
|
1025
|
-
return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": "LLM provided empty decision for RAG hop."}
|
|
1026
|
-
|
|
1027
|
-
try:
|
|
1028
|
-
llm_decision = json.loads(llm_decision_json_str)
|
|
1029
|
-
except json.JSONDecodeError:
|
|
1030
|
-
try:
|
|
1031
|
-
match = re.search(r"```json\s*(\{.*?\})\s*```", llm_decision_json_str, re.DOTALL)
|
|
1032
|
-
if match: llm_decision = json.loads(match.group(1))
|
|
1033
|
-
else: llm_decision = json.loads(self.extract_code_blocks(llm_decision_json_str, format="markdown")[0]["content"])
|
|
1034
|
-
except Exception as e_json_parse:
|
|
1035
|
-
trace_exception(e_json_parse)
|
|
1036
|
-
return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"Failed to parse LLM RAG decision JSON: {llm_decision_json_str}. Error: {e_json_parse}"}
|
|
1037
|
-
|
|
1038
|
-
new_summary = llm_decision.get("new_information_summary", "Summary not provided by LLM.")
|
|
1039
|
-
need_more_data = llm_decision.get("need_more_data", True)
|
|
1040
|
-
|
|
1041
|
-
current_hop_details["new_information_summary"] = new_summary
|
|
1042
|
-
current_hop_details["llm_decision_json"] = llm_decision
|
|
989
|
+
hop_status = "Completed"
|
|
990
|
+
if not retrieved_chunks_raw_this_hop:
|
|
991
|
+
hop_status = "No chunks retrieved for this query."
|
|
992
|
+
elif new_chunks_added_this_hop == 0 and hop_count > 0: # Only consider "no new unique chunks" for subsequent hops
|
|
993
|
+
hop_status = "No *new* unique chunks retrieved."
|
|
994
|
+
# Optionally, could break here if no new unique chunks are found in a multi-hop scenario
|
|
995
|
+
# ASCIIColors.warning(f"RAG Hop {hop_count + 1}: No new unique chunks found. Consider stopping if this persists.")
|
|
996
|
+
|
|
997
|
+
|
|
998
|
+
current_hop_details = {
|
|
999
|
+
"query": current_query_for_rag,
|
|
1000
|
+
"retrieved_chunks_details": current_hop_chunk_details_for_history, # Chunks from THIS hop
|
|
1001
|
+
"status": hop_status
|
|
1002
|
+
}
|
|
1043
1003
|
rag_hops_details_list.append(current_hop_details)
|
|
1044
1004
|
turn_rag_history_for_callback.append({"type":"rag_hop_info", **current_hop_details})
|
|
1045
1005
|
|
|
1006
|
+
# Reset for next potential query generation if it's not the last planned hop
|
|
1007
|
+
if hop_count < max_rag_hops:
|
|
1008
|
+
current_query_for_rag = None
|
|
1009
|
+
else: # This was the last hop
|
|
1010
|
+
break
|
|
1011
|
+
|
|
1012
|
+
|
|
1013
|
+
# 3. Prepare Final Context from All Unique Retrieved Chunks
|
|
1014
|
+
accumulated_rag_context_str = ""
|
|
1015
|
+
if all_unique_retrieved_chunks_map:
|
|
1046
1016
|
if streaming_callback:
|
|
1047
|
-
streaming_callback(
|
|
1017
|
+
streaming_callback("Preparing final RAG context from all retrieved chunks...", MSG_TYPE.MSG_TYPE_STEP, {"type": "context_preparation"}, turn_rag_history_for_callback)
|
|
1048
1018
|
|
|
1049
|
-
|
|
1019
|
+
# Sort all unique chunks by similarity (highest first)
|
|
1020
|
+
sorted_unique_chunks = sorted(
|
|
1021
|
+
list(all_unique_retrieved_chunks_map.values()),
|
|
1022
|
+
key=lambda c: c.get('similarity', 0.0),
|
|
1023
|
+
reverse=True
|
|
1024
|
+
)
|
|
1050
1025
|
|
|
1051
|
-
|
|
1052
|
-
|
|
1026
|
+
current_context_chars = 0
|
|
1027
|
+
chunks_used_in_final_context = 0
|
|
1028
|
+
context_lines = []
|
|
1029
|
+
for chunk in sorted_unique_chunks:
|
|
1030
|
+
chunk_text_to_add = f"Source: {chunk['document']} (Similarity: {chunk['similarity']:.2f}%, Hop: {chunk['retrieved_in_hop']}, Query: '{chunk['query_used']}')\nContent:\n{chunk['content']}\n---\n"
|
|
1031
|
+
if current_context_chars + len(chunk_text_to_add) <= max_rag_context_characters:
|
|
1032
|
+
context_lines.append(chunk_text_to_add)
|
|
1033
|
+
current_context_chars += len(chunk_text_to_add)
|
|
1034
|
+
chunks_used_in_final_context +=1
|
|
1035
|
+
else:
|
|
1036
|
+
ASCIIColors.warning(f"Reached max RAG context character limit ({max_rag_context_characters}). Used {chunks_used_in_final_context} of {len(sorted_unique_chunks)} unique chunks.")
|
|
1037
|
+
break
|
|
1038
|
+
accumulated_rag_context_str = "".join(context_lines)
|
|
1039
|
+
|
|
1040
|
+
if streaming_callback:
|
|
1041
|
+
streaming_callback(f"Final RAG context prepared using {chunks_used_in_final_context} chunks ({current_context_chars} chars).", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "context_preparation", "num_chunks_in_context": chunks_used_in_final_context, "chars_in_context": current_context_chars}, turn_rag_history_for_callback)
|
|
1042
|
+
|
|
1053
1043
|
|
|
1054
1044
|
# 4. Final Answer Generation
|
|
1055
1045
|
if streaming_callback:
|
|
1056
|
-
streaming_callback("LLM generating final answer
|
|
1046
|
+
streaming_callback("LLM generating final answer...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "final_answer_generation"}, turn_rag_history_for_callback)
|
|
1057
1047
|
|
|
1058
|
-
final_answer_prompt_parts = []
|
|
1059
|
-
if system_prompt:
|
|
1060
|
-
final_answer_prompt_parts.append(f"{self.system_full_header}{system_prompt}")
|
|
1061
|
-
|
|
1062
|
-
final_answer_prompt_parts.append(f"{self.user_full_header}Original request: {original_user_prompt}")
|
|
1048
|
+
final_answer_prompt_parts = [f"Original request: {original_user_prompt}"]
|
|
1063
1049
|
if accumulated_rag_context_str:
|
|
1064
|
-
final_answer_prompt_parts.append(f"\nBased on the information I have gathered:\n--- Gathered Context Start ---\n{accumulated_rag_context_str.strip()}\n--- Gathered Context End ---")
|
|
1050
|
+
final_answer_prompt_parts.append(f"\nBased on the following information I have gathered from a knowledge base:\n--- Gathered Context Start ---\n{accumulated_rag_context_str.strip()}\n--- Gathered Context End ---")
|
|
1065
1051
|
else:
|
|
1066
1052
|
final_answer_prompt_parts.append("\n(No specific information was retrieved from the knowledge base for this request.)")
|
|
1067
1053
|
|
|
1068
|
-
final_answer_prompt_parts.append("\nPlease provide a comprehensive answer to the original request using ONLY the provided gathered context. If the context is insufficient, clearly state that.")
|
|
1054
|
+
final_answer_prompt_parts.append("\nPlease provide a comprehensive answer to the original request using ONLY the provided gathered context. If the context is insufficient, clearly state that. If the context contains code examples, ensure they are accurately reproduced.")
|
|
1069
1055
|
final_answer_prompt_parts.append(self.ai_full_header)
|
|
1070
1056
|
|
|
1071
1057
|
final_answer_llm_prompt = "\n".join(final_answer_prompt_parts)
|
|
1072
1058
|
|
|
1073
|
-
|
|
1074
|
-
if streaming_callback:
|
|
1075
|
-
def final_answer_cb_adapter(
|
|
1076
|
-
return streaming_callback(
|
|
1077
|
-
|
|
1059
|
+
final_answer_streaming_callback_adapted = None
|
|
1060
|
+
if streaming_callback and stream:
|
|
1061
|
+
def final_answer_cb_adapter(chunk_text, msg_type_llm):
|
|
1062
|
+
return streaming_callback(chunk_text, msg_type_llm, {"type": "final_answer_chunk"}, turn_rag_history_for_callback)
|
|
1063
|
+
final_answer_streaming_callback_adapted = final_answer_cb_adapter
|
|
1064
|
+
|
|
1065
|
+
actual_streaming_cb_for_generate = final_answer_streaming_callback_adapted if stream else None
|
|
1078
1066
|
|
|
1079
|
-
|
|
1080
|
-
prompt=final_answer_llm_prompt, images=images,
|
|
1067
|
+
final_answer_raw = self.generate_text(
|
|
1068
|
+
prompt=final_answer_llm_prompt, images=images, system_prompt=system_prompt,
|
|
1081
1069
|
n_predict=n_predict, stream=stream, temperature=temperature, top_k=top_k, top_p=top_p,
|
|
1082
1070
|
repeat_penalty=repeat_penalty, repeat_last_n=repeat_last_n, seed=seed, n_threads=n_threads,
|
|
1083
|
-
ctx_size=ctx_size, streaming_callback=
|
|
1084
|
-
)
|
|
1085
|
-
|
|
1086
|
-
if streaming_callback:
|
|
1087
|
-
streaming_callback("Final answer generation complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "final_answer_generation"}, turn_rag_history_for_callback)
|
|
1071
|
+
ctx_size=ctx_size, streaming_callback=actual_streaming_cb_for_generate, **llm_generation_kwargs
|
|
1072
|
+
)
|
|
1088
1073
|
|
|
1089
|
-
if isinstance(
|
|
1090
|
-
return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"Final answer generation failed: {
|
|
1074
|
+
if isinstance(final_answer_raw, dict) and "error" in final_answer_raw:
|
|
1075
|
+
return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"Final answer generation failed: {final_answer_raw['error']}"}
|
|
1091
1076
|
|
|
1092
|
-
|
|
1077
|
+
final_answer_text = self.remove_thinking_blocks(final_answer_raw)
|
|
1093
1078
|
|
|
1079
|
+
if streaming_callback:
|
|
1080
|
+
streaming_callback("Final answer generation complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "final_answer_generation"}, turn_rag_history_for_callback)
|
|
1081
|
+
if not stream and final_answer_text:
|
|
1082
|
+
streaming_callback(final_answer_text, MSG_TYPE.MSG_TYPE_CHUNK, {"type": "final_answer_full"}, turn_rag_history_for_callback)
|
|
1083
|
+
|
|
1084
|
+
return {
|
|
1085
|
+
"final_answer": final_answer_text,
|
|
1086
|
+
"rag_hops_history": rag_hops_details_list,
|
|
1087
|
+
"all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), # All unique chunks found
|
|
1088
|
+
"error": None
|
|
1089
|
+
}
|
|
1090
|
+
|
|
1094
1091
|
def generate_code(
|
|
1095
1092
|
self,
|
|
1096
1093
|
prompt,
|
|
1097
1094
|
images=[],
|
|
1095
|
+
system_prompt=None,
|
|
1098
1096
|
template=None,
|
|
1099
1097
|
language="json",
|
|
1100
1098
|
code_tag_format="markdown", # or "html"
|
|
@@ -1111,8 +1109,8 @@ Respond with a JSON object containing ONE of the following structures:
|
|
|
1111
1109
|
Uses the underlying LLM binding via `generate_text`.
|
|
1112
1110
|
Handles potential continuation if the code block is incomplete.
|
|
1113
1111
|
"""
|
|
1114
|
-
|
|
1115
|
-
|
|
1112
|
+
if not system_prompt:
|
|
1113
|
+
system_prompt = f"""Act as a code generation assistant that generates code from user prompt."""
|
|
1116
1114
|
|
|
1117
1115
|
if template:
|
|
1118
1116
|
system_prompt += "Here is a template of the answer:\n"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lollms_client-0.19.6 → lollms_client-0.19.7}/examples/deep_analyze/deep_analyze_multiple_files.py
RENAMED
|
File without changes
|
{lollms_client-0.19.6 → lollms_client-0.19.7}/examples/function_calling_with_local_custom_mcp.py
RENAMED
|
File without changes
|
{lollms_client-0.19.6 → lollms_client-0.19.7}/examples/generate_and_speak/generate_and_speak.py
RENAMED
|
File without changes
|
{lollms_client-0.19.6 → lollms_client-0.19.7}/examples/generate_game_sfx/generate_game_fx.py
RENAMED
|
File without changes
|
{lollms_client-0.19.6 → lollms_client-0.19.7}/examples/generate_text_with_multihop_rag_example.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lollms_client-0.19.6 → lollms_client-0.19.7}/examples/personality_test/chat_with_aristotle.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/llamacpp/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/openllm/__init__.py
RENAMED
|
File without changes
|
{lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/pythonllamacpp/__init__.py
RENAMED
|
File without changes
|
{lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/tensor_rt/__init__.py
RENAMED
|
File without changes
|
{lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/llm_bindings/transformers/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/mcp_bindings/local_mcp/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/stt_bindings/whisper/__init__.py
RENAMED
|
File without changes
|
{lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/stt_bindings/whispercpp/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/tti_bindings/diffusers/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/ttm_bindings/audiocraft/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lollms_client-0.19.6 → lollms_client-0.19.7}/lollms_client/tts_bindings/piper_tts/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|