lollms-client 0.19.6__py3-none-any.whl → 0.19.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- examples/generate_a_benchmark_for_safe_store.py +89 -0
- examples/generate_text_with_multihop_rag_example.py +0 -1
- examples/internet_search_with_rag.py +4 -5
- lollms_client/__init__.py +1 -1
- lollms_client/lollms_core.py +174 -190
- lollms_client/lollms_discussion.py +5 -5
- {lollms_client-0.19.6.dist-info → lollms_client-0.19.8.dist-info}/METADATA +1 -1
- {lollms_client-0.19.6.dist-info → lollms_client-0.19.8.dist-info}/RECORD +11 -10
- {lollms_client-0.19.6.dist-info → lollms_client-0.19.8.dist-info}/WHEEL +0 -0
- {lollms_client-0.19.6.dist-info → lollms_client-0.19.8.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-0.19.6.dist-info → lollms_client-0.19.8.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
from lollms_client import LollmsClient
|
|
2
|
+
from ascii_colors import ASCIIColors, trace_exception, ProgressBar
|
|
3
|
+
import pipmaster as pm
|
|
4
|
+
pm.ensure_packages(["datasets"])
|
|
5
|
+
#assuming you have an active lollms_webui instance running
|
|
6
|
+
#you can also use ollama or openai or any other lollmc_client binding
|
|
7
|
+
lc = LollmsClient()
|
|
8
|
+
|
|
9
|
+
from datasets import load_dataset
|
|
10
|
+
import json
|
|
11
|
+
# 1. Define the dataset name
|
|
12
|
+
dataset_name = "agentlans/high-quality-english-sentences"
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
# 2. Load the dataset
|
|
16
|
+
# This dataset only has a 'train' split by default.
|
|
17
|
+
# If a dataset had multiple splits (e.g., 'train', 'validation', 'test'),
|
|
18
|
+
# load_dataset() would return a DatasetDict.
|
|
19
|
+
# We can directly access the 'train' split.
|
|
20
|
+
dataset = load_dataset(dataset_name, split='train')
|
|
21
|
+
print(f"Dataset loaded successfully: {dataset_name}")
|
|
22
|
+
print(f"Dataset structure: {dataset}")
|
|
23
|
+
|
|
24
|
+
# 3. Extract the sentences into a list
|
|
25
|
+
# The sentences are in a column likely named 'text' (common for text datasets).
|
|
26
|
+
# Let's inspect the features to be sure.
|
|
27
|
+
print(f"Dataset features: {dataset.features}")
|
|
28
|
+
|
|
29
|
+
# Assuming the column containing sentences is 'text'
|
|
30
|
+
# This is standard for many text datasets on Hugging Face.
|
|
31
|
+
# dataset['text'] directly gives a list of all values in the 'text' column.
|
|
32
|
+
sentences_list = dataset['text']
|
|
33
|
+
|
|
34
|
+
# If you want to be absolutely sure it's a Python list (it usually is or acts like one):
|
|
35
|
+
# sentences_list = list(dataset['text'])
|
|
36
|
+
|
|
37
|
+
# 4. Verify and print some examples
|
|
38
|
+
print(f"\nSuccessfully extracted {len(sentences_list)} sentences into a list.")
|
|
39
|
+
|
|
40
|
+
if sentences_list:
|
|
41
|
+
print("\nFirst 5 sentences:")
|
|
42
|
+
for i in range(min(5, len(sentences_list))):
|
|
43
|
+
print(f"{i+1}. {sentences_list[i]}")
|
|
44
|
+
|
|
45
|
+
print("\nLast 5 sentences:")
|
|
46
|
+
for i in range(max(0, len(sentences_list) - 5), len(sentences_list)):
|
|
47
|
+
print(f"{len(sentences_list) - (len(sentences_list) - 1 - i)}. {sentences_list[i]}")
|
|
48
|
+
else:
|
|
49
|
+
print("The list of sentences is empty.")
|
|
50
|
+
|
|
51
|
+
except Exception as e:
|
|
52
|
+
print(f"An error occurred: {e}")
|
|
53
|
+
print("Please ensure you have an active internet connection and the `datasets` library is installed.")
|
|
54
|
+
print("Dataset name might be incorrect or the dataset might require authentication or specific configurations.")
|
|
55
|
+
|
|
56
|
+
entries = []
|
|
57
|
+
for sentence in ProgressBar(sentences_list, desc="Processing Items"):
|
|
58
|
+
prompt = f"""Given the following text chunk:
|
|
59
|
+
"{sentence}"
|
|
60
|
+
|
|
61
|
+
Generate a JSON object with the following keys and corresponding string values:
|
|
62
|
+
- "id": A title to the sentence being processed
|
|
63
|
+
- "highly_similar": A paraphrase of the original chunk, maintaining the core meaning but using different wording and sentence structure.
|
|
64
|
+
- "related": A sentence or short paragraph that is on the same general topic as the original chunk but discusses a different aspect or a related concept. It should not be a direct paraphrase.
|
|
65
|
+
- "dissimilar": A sentence or short paragraph on a completely unrelated topic.
|
|
66
|
+
- "question_form": A question that encapsulates the main idea or asks about a key aspect of the original chunk.
|
|
67
|
+
- "negation": A sentence that negates the main assertion or a key aspect of the original chunk, while still being topically relevant if possible (e.g., not "The sky is not blue" if the topic is computers).
|
|
68
|
+
|
|
69
|
+
Ensure the output is ONLY a valid JSON object. Example:
|
|
70
|
+
{{
|
|
71
|
+
"id": "...",
|
|
72
|
+
"highly_similar": "...",
|
|
73
|
+
"related": "...",
|
|
74
|
+
"dissimilar": "...",
|
|
75
|
+
"question_form": "...",
|
|
76
|
+
"negation": "..."
|
|
77
|
+
}}
|
|
78
|
+
|
|
79
|
+
JSON object:
|
|
80
|
+
"""
|
|
81
|
+
try:
|
|
82
|
+
output = lc.generate_code(prompt)
|
|
83
|
+
entry = json.loads(output)
|
|
84
|
+
entry["query"]=sentence
|
|
85
|
+
entries.append(entry)
|
|
86
|
+
with open("benchmark_db.json","w") as f:
|
|
87
|
+
json.dump(entries, f, indent=4)
|
|
88
|
+
except Exception as ex:
|
|
89
|
+
trace_exception(ex)
|
|
@@ -173,7 +173,6 @@ if __name__ == "__main__":
|
|
|
173
173
|
streaming_callback=rag_streaming_callback,
|
|
174
174
|
n_predict=1024,
|
|
175
175
|
rag_hop_query_generation_temperature=0.1, # Focused query gen
|
|
176
|
-
rag_hop_summary_temperature=0.2 # Focused summary
|
|
177
176
|
)
|
|
178
177
|
print("\n--- End of Multi-Hop RAG (1 hop) ---")
|
|
179
178
|
ASCIIColors.magenta("\nMulti-Hop RAG (1 hop) Final Output:")
|
|
@@ -148,7 +148,7 @@ if __name__ == "__main__":
|
|
|
148
148
|
)
|
|
149
149
|
print("\n--- End of Classic Search RAG ---")
|
|
150
150
|
ASCIIColors.magenta("\nClassic Search RAG Final Output Structure:")
|
|
151
|
-
print(f" Final Answer (first 100 chars): {classic_rag_result.get('final_answer', '')
|
|
151
|
+
print(f" Final Answer (first 100 chars): {classic_rag_result.get('final_answer', '')}...")
|
|
152
152
|
print(f" Error: {classic_rag_result.get('error')}")
|
|
153
153
|
print(f" Number of Hops: {len(classic_rag_result.get('rag_hops_history', []))}")
|
|
154
154
|
print(f" Total Unique Sources Retrieved: {len(classic_rag_result.get('all_retrieved_sources', []))}")
|
|
@@ -157,7 +157,7 @@ if __name__ == "__main__":
|
|
|
157
157
|
source_ex = classic_rag_result['all_retrieved_sources'][0]
|
|
158
158
|
print(f" Document (URL): {source_ex.get('document')}")
|
|
159
159
|
print(f" Similarity: {source_ex.get('similarity')}%")
|
|
160
|
-
print(f" Content (Snippet, first 50 chars): {source_ex.get('content', '')
|
|
160
|
+
print(f" Content (Snippet, first 50 chars): {source_ex.get('content', '')}...")
|
|
161
161
|
|
|
162
162
|
|
|
163
163
|
# --- Test Case 2: Multi-Hop Search RAG (max_rag_hops = 1) ---
|
|
@@ -174,12 +174,11 @@ if __name__ == "__main__":
|
|
|
174
174
|
rag_min_similarity_percent=50.0,
|
|
175
175
|
streaming_callback=rag_streaming_callback,
|
|
176
176
|
n_predict=400,
|
|
177
|
-
rag_hop_query_generation_temperature=0.1
|
|
178
|
-
rag_hop_summary_temperature=0.2
|
|
177
|
+
rag_hop_query_generation_temperature=0.1
|
|
179
178
|
)
|
|
180
179
|
print("\n--- End of Multi-Hop Search RAG (1 hop max) ---")
|
|
181
180
|
ASCIIColors.magenta("\nMulti-Hop Search RAG (1 hop max) Final Output Structure:")
|
|
182
|
-
print(f" Final Answer (first 100 chars): {multihop_rag_result_1.get('final_answer', '')
|
|
181
|
+
print(f" Final Answer (first 100 chars): {multihop_rag_result_1.get('final_answer', '')}...")
|
|
183
182
|
print(f" Error: {multihop_rag_result_1.get('error')}")
|
|
184
183
|
print(f" Number of Hops Made: {len(multihop_rag_result_1.get('rag_hops_history', []))}")
|
|
185
184
|
for i, hop_info in enumerate(multihop_rag_result_1.get('rag_hops_history', [])):
|
lollms_client/__init__.py
CHANGED
|
@@ -7,7 +7,7 @@ from lollms_client.lollms_utilities import PromptReshaper # Keep general utiliti
|
|
|
7
7
|
from lollms_client.lollms_mcp_binding import LollmsMCPBinding, LollmsMCPBindingManager
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
__version__ = "0.19.
|
|
10
|
+
__version__ = "0.19.8" # Updated version
|
|
11
11
|
|
|
12
12
|
# Optionally, you could define __all__ if you want to be explicit about exports
|
|
13
13
|
__all__ = [
|
lollms_client/lollms_core.py
CHANGED
|
@@ -853,8 +853,6 @@ Respond with a JSON object containing ONE of the following structures:
|
|
|
853
853
|
turn_history.append({"type":"final_answer_generated", "content":final_answer_text})
|
|
854
854
|
return {"final_answer": final_answer_text, "tool_calls": tool_calls_made_this_turn, "error": None}
|
|
855
855
|
|
|
856
|
-
# --- RAG ---
|
|
857
|
-
|
|
858
856
|
def generate_text_with_rag(
|
|
859
857
|
self,
|
|
860
858
|
prompt: str,
|
|
@@ -875,226 +873,212 @@ Respond with a JSON object containing ONE of the following structures:
|
|
|
875
873
|
repeat_last_n: Optional[int] = None,
|
|
876
874
|
seed: Optional[int] = None,
|
|
877
875
|
n_threads: Optional[int] = None,
|
|
878
|
-
ctx_size: int
|
|
876
|
+
ctx_size: Optional[int] = None,
|
|
877
|
+
extract_objectives: bool = True,
|
|
879
878
|
streaming_callback: Optional[Callable[[str, MSG_TYPE, Optional[Dict], Optional[List]], bool]] = None,
|
|
880
|
-
|
|
881
|
-
rag_hop_summary_temperature: float = 0.3,
|
|
879
|
+
max_rag_context_characters: int = 32000,
|
|
882
880
|
**llm_generation_kwargs
|
|
883
881
|
) -> Dict[str, Any]:
|
|
882
|
+
"""
|
|
883
|
+
Enhanced RAG with optional initial objective extraction and automatic intermediate summaries
|
|
884
|
+
when context grows beyond ctx_size or self.default_ctx_size.
|
|
885
|
+
"""
|
|
884
886
|
if not self.binding:
|
|
885
887
|
return {"final_answer": "", "rag_hops_history": [], "all_retrieved_sources": [], "error": "LLM binding not initialized."}
|
|
886
888
|
|
|
889
|
+
# Determine effective context size limit
|
|
890
|
+
effective_ctx_size = ctx_size or getattr(self, "default_ctx_size", 20000)
|
|
891
|
+
|
|
887
892
|
turn_rag_history_for_callback: List[Dict[str, Any]] = []
|
|
888
|
-
accumulated_rag_context_str = ""
|
|
889
893
|
rag_hops_details_list: List[Dict[str, Any]] = []
|
|
890
|
-
all_unique_retrieved_chunks_map: Dict[str, Dict[str, Any]] = {}
|
|
891
|
-
|
|
894
|
+
all_unique_retrieved_chunks_map: Dict[str, Dict[str, Any]] = {}
|
|
895
|
+
|
|
892
896
|
original_user_prompt = prompt
|
|
897
|
+
objectives_text = ""
|
|
898
|
+
# 0. Optional Objectives Extraction Step
|
|
899
|
+
if extract_objectives:
|
|
900
|
+
if streaming_callback:
|
|
901
|
+
streaming_callback("Extracting and structuring objectives...", MSG_TYPE.MSG_TYPE_STEP, {"type": "objectives_extraction"}, turn_rag_history_for_callback)
|
|
902
|
+
obj_prompt = (
|
|
903
|
+
"You are an expert analyst. "
|
|
904
|
+
"Your task is to extract and structure the key objectives from the user's request below. "
|
|
905
|
+
"Output a bullet list of objectives only.\n\n"
|
|
906
|
+
f"User request:\n\"{original_user_prompt}\""
|
|
907
|
+
)
|
|
908
|
+
obj_gen = self.generate_text(
|
|
909
|
+
prompt=obj_prompt,
|
|
910
|
+
system_prompt="Extract objectives",
|
|
911
|
+
temperature=0.0,
|
|
912
|
+
n_predict=200,
|
|
913
|
+
stream=False
|
|
914
|
+
)
|
|
915
|
+
objectives_text = self.remove_thinking_blocks(obj_gen).strip()
|
|
916
|
+
if streaming_callback:
|
|
917
|
+
streaming_callback(f"Objectives extracted:\n{objectives_text}", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "objectives_extracted"}, turn_rag_history_for_callback)
|
|
893
918
|
|
|
919
|
+
current_query_for_rag = rag_query_text or None
|
|
920
|
+
previous_queries=[]
|
|
921
|
+
# 1. RAG Hops
|
|
894
922
|
for hop_count in range(max_rag_hops + 1):
|
|
895
923
|
if streaming_callback:
|
|
896
924
|
streaming_callback(f"Starting RAG Hop {hop_count + 1}", MSG_TYPE.MSG_TYPE_STEP, {"type": "rag_hop_start", "hop": hop_count + 1}, turn_rag_history_for_callback)
|
|
897
925
|
|
|
898
|
-
#
|
|
899
|
-
if hop_count > 0
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
]
|
|
907
|
-
if accumulated_rag_context_str:
|
|
908
|
-
query_gen_prompt_parts.append(f"Information gathered so far (summaries):\n{accumulated_rag_context_str}")
|
|
909
|
-
if rag_hops_details_list:
|
|
910
|
-
query_gen_prompt_parts.append("Previous search attempts and their summarized findings:")
|
|
911
|
-
for prev_hop in rag_hops_details_list:
|
|
912
|
-
query_gen_prompt_parts.append(f" - Queried for: '{prev_hop['query']}', Summary: '{prev_hop.get('new_information_summary', 'N/A')}'")
|
|
913
|
-
|
|
914
|
-
query_gen_prompt_parts.append("Based on this, what is the most effective and specific search query to perform next to get closer to answering the user's request? Output only the search query text, nothing else.")
|
|
915
|
-
query_gen_prompt_parts.append(self.ai_full_header)
|
|
916
|
-
|
|
917
|
-
new_query_text_raw = self.remove_thinking_blocks(self.generate_text(prompt="".join(query_gen_prompt_parts), temperature=rag_hop_query_generation_temperature, n_predict=100, stream=False))
|
|
918
|
-
if isinstance(new_query_text_raw, dict) and "error" in new_query_text_raw:
|
|
919
|
-
return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"Failed to generate RAG query: {new_query_text_raw['error']}"}
|
|
920
|
-
|
|
921
|
-
current_query_for_rag = new_query_text_raw.strip().replace("Search query:", "").replace("Query:", "").strip("\"'")
|
|
922
|
-
|
|
923
|
-
if streaming_callback:
|
|
924
|
-
streaming_callback(f"Generated RAG query: {current_query_for_rag}", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "rag_query_generation", "hop": hop_count + 1, "query": current_query_for_rag}, turn_rag_history_for_callback)
|
|
925
|
-
|
|
926
|
-
elif current_query_for_rag is None and max_rag_hops == 0:
|
|
927
|
-
current_query_for_rag = original_user_prompt
|
|
926
|
+
# Generate refined query for multi-hop
|
|
927
|
+
if hop_count > 0:
|
|
928
|
+
# build system prompt and history...
|
|
929
|
+
# (same as before, omitted for brevity)
|
|
930
|
+
# result => current_query_for_rag
|
|
931
|
+
pass
|
|
932
|
+
elif current_query_for_rag is None:
|
|
933
|
+
current_query_for_rag = prompt
|
|
928
934
|
|
|
929
935
|
if not current_query_for_rag:
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
ASCIIColors.warning("RAG query is empty. Proceeding without RAG context.")
|
|
937
|
-
break
|
|
936
|
+
rag_hops_details_list.append({
|
|
937
|
+
"query": "EMPTY_QUERY_STOPPED_HOPS",
|
|
938
|
+
"retrieved_chunks_details": [],
|
|
939
|
+
"status": "Stopped: empty query."
|
|
940
|
+
})
|
|
941
|
+
break
|
|
938
942
|
|
|
939
|
-
#
|
|
940
|
-
if streaming_callback:
|
|
941
|
-
streaming_callback(f"Querying knowledge base for: '{current_query_for_rag}'...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "rag_retrieval", "hop": hop_count + 1, "query": current_query_for_rag}, turn_rag_history_for_callback)
|
|
942
|
-
|
|
943
|
+
# Retrieve chunks
|
|
943
944
|
try:
|
|
944
|
-
|
|
945
|
-
except Exception as
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
if not retrieved_chunks_raw:
|
|
975
|
-
current_hop_details["new_information_summary"] = "No relevant information found for this query."
|
|
976
|
-
current_hop_details["llm_decision_json"] = {"need_more_data": True if max_rag_hops > 0 and hop_count < max_rag_hops -1 else False, "reasoning_for_decision":"No new information retrieved."}
|
|
977
|
-
rag_hops_details_list.append(current_hop_details)
|
|
978
|
-
turn_rag_history_for_callback.append({"type":"rag_hop_info", **current_hop_details})
|
|
979
|
-
if max_rag_hops == 0 or hop_count >= max_rag_hops -1 :
|
|
980
|
-
break
|
|
981
|
-
else:
|
|
982
|
-
accumulated_rag_context_str += f"\n\n---\nAttempted query: '{current_query_for_rag}' - No new information found.\n---"
|
|
983
|
-
continue
|
|
984
|
-
|
|
985
|
-
if max_rag_hops == 0: # Classic RAG
|
|
986
|
-
accumulated_rag_context_str += formatted_new_chunks_for_llm_summary
|
|
987
|
-
current_hop_details["new_information_summary"] = "Directly used in context (classic RAG)."
|
|
988
|
-
current_hop_details["llm_decision_json"] = {"need_more_data": False}
|
|
989
|
-
rag_hops_details_list.append(current_hop_details)
|
|
990
|
-
turn_rag_history_for_callback.append({"type":"rag_hop_info", **current_hop_details})
|
|
991
|
-
break
|
|
992
|
-
|
|
993
|
-
# Multi-hop: LLM summarizes and decides
|
|
994
|
-
if streaming_callback:
|
|
995
|
-
streaming_callback("LLM processing retrieved data and deciding next step...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "rag_llm_decision", "hop": hop_count + 1}, turn_rag_history_for_callback)
|
|
996
|
-
|
|
997
|
-
decision_prompt_llm_parts = [
|
|
998
|
-
f"{self.system_full_header}You are an AI research assistant. Analyze newly retrieved information against the user's request and prior knowledge, then decide if more searching is needed.",
|
|
999
|
-
f"{self.user_full_header}Original user request: '{original_user_prompt}'",
|
|
1000
|
-
]
|
|
1001
|
-
if accumulated_rag_context_str:
|
|
1002
|
-
decision_prompt_llm_parts.append(f"Current accumulated knowledge summary:\n{accumulated_rag_context_str}")
|
|
1003
|
-
decision_prompt_llm_parts.append(f"You just searched for: '{current_query_for_rag}'")
|
|
1004
|
-
decision_prompt_llm_parts.append(f"And found this new information:\n--- New Information Start ---\n{formatted_new_chunks_for_llm_summary}--- New Information End ---")
|
|
1005
|
-
decision_prompt_llm_parts.append(
|
|
1006
|
-
"Task: Provide a concise summary of ONLY the new information relevant to the original request. "
|
|
1007
|
-
"Then, assess if you now have sufficient information to comprehensively answer the user's original request or if another, more targeted search is necessary. "
|
|
1008
|
-
"Respond STRICTLY in the following JSON format, with no other text before or after the JSON block:"
|
|
1009
|
-
)
|
|
1010
|
-
json_template_for_decision = """
|
|
945
|
+
retrieved = rag_query_function(current_query_for_rag, rag_vectorizer_name, rag_top_k, rag_min_similarity_percent)
|
|
946
|
+
except Exception as e:
|
|
947
|
+
return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": str(e)}
|
|
948
|
+
|
|
949
|
+
hop_details = {"query": current_query_for_rag, "retrieved_chunks_details": [], "status": ""}
|
|
950
|
+
previous_queries.append(current_query_for_rag)
|
|
951
|
+
new_unique = 0
|
|
952
|
+
for chunk in retrieved:
|
|
953
|
+
doc = chunk.get("file_path", "Unknown")
|
|
954
|
+
content = str(chunk.get("chunk_text", ""))
|
|
955
|
+
sim = float(chunk.get("similarity_percent", 0.0))
|
|
956
|
+
detail = {"document": doc, "similarity": sim, "content": content,
|
|
957
|
+
"retrieved_in_hop": hop_count + 1, "query_used": current_query_for_rag}
|
|
958
|
+
hop_details["retrieved_chunks_details"].append(detail)
|
|
959
|
+
key = f"{doc}::{content[:100]}"
|
|
960
|
+
if key not in all_unique_retrieved_chunks_map:
|
|
961
|
+
all_unique_retrieved_chunks_map[key] = detail
|
|
962
|
+
new_unique += 1
|
|
963
|
+
hop_details["status"] = "Completed" if retrieved else "No chunks retrieved"
|
|
964
|
+
if hop_count > 0 and new_unique == 0:
|
|
965
|
+
hop_details["status"] = "No *new* unique chunks retrieved"
|
|
966
|
+
rag_hops_details_list.append(hop_details)
|
|
967
|
+
|
|
968
|
+
# reset for next hop
|
|
969
|
+
if hop_count < max_rag_hops:
|
|
970
|
+
txt_previous_queries = f"Previous queries:\n"+'\n'.join(previous_queries)+"\n\n" if len(previous_queries)>0 else ""
|
|
971
|
+
txt_informations = f"Information:\n"+'\n'.join([f"(from {chunk['document']}):{chunk['content']}" for _, chunk in all_unique_retrieved_chunks_map.items()])
|
|
972
|
+
txt_sp = "Your objective is to analyze the provided chunks of information, then decise if they are sufficient to reach the objective. If you need more information, formulate a new query to extract more data."
|
|
973
|
+
txt_formatting = """The output format must be in form of json placed inside a json markdown tag. Here is the schema to use:
|
|
974
|
+
```json
|
|
1011
975
|
{
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
"reasoning_for_decision": "<Briefly explain why you need more data or why you have enough. If needing more, suggest what kind of information is still missing.>"
|
|
976
|
+
"decision": A boolean depicting your decision (true: more data is needed, false: there is enough data to reach objective),
|
|
977
|
+
"query": (optional, only if decision is true). A new query to recover more information from the data source (do not use previous queries as they have already been used)
|
|
1015
978
|
}
|
|
979
|
+
```
|
|
1016
980
|
"""
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
llm_decision_json_str = self.generate_code(prompt="".join(decision_prompt_llm_parts), language="json", template=json_template_for_decision, temperature=rag_hop_summary_temperature, max_size=1024)
|
|
1021
|
-
|
|
1022
|
-
if isinstance(llm_decision_json_str, dict) and "error" in llm_decision_json_str:
|
|
1023
|
-
return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"LLM failed to make RAG decision: {llm_decision_json_str['error']}"}
|
|
1024
|
-
if not llm_decision_json_str:
|
|
1025
|
-
return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": "LLM provided empty decision for RAG hop."}
|
|
1026
|
-
|
|
1027
|
-
try:
|
|
1028
|
-
llm_decision = json.loads(llm_decision_json_str)
|
|
1029
|
-
except json.JSONDecodeError:
|
|
981
|
+
p = f"Objective:\n{objectives_text}\n\n{txt_previous_queries}\n\n{txt_informations}\n\n{txt_formatting}\n\n"
|
|
982
|
+
response = self.generate_code(p,system_prompt=txt_sp)
|
|
1030
983
|
try:
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
if
|
|
984
|
+
answer = json.loads(response)
|
|
985
|
+
decision = answer["decision"]
|
|
986
|
+
if not decision:
|
|
987
|
+
break
|
|
988
|
+
else:
|
|
989
|
+
current_query_for_rag = answer["query"]
|
|
990
|
+
except Exception as ex:
|
|
991
|
+
trace_exception(ex)
|
|
992
|
+
|
|
993
|
+
# 2. Prepare & Summarize Context
|
|
994
|
+
sorted_chunks = sorted(all_unique_retrieved_chunks_map.values(),
|
|
995
|
+
key=lambda c: c["similarity"], reverse=True)
|
|
996
|
+
context_lines = []
|
|
997
|
+
total_chars = 0
|
|
998
|
+
for c in sorted_chunks:
|
|
999
|
+
snippet = (
|
|
1000
|
+
f"Source: {c['document']} (Sim: {c['similarity']:.1f}%, "
|
|
1001
|
+
f"Hop: {c['retrieved_in_hop']}, Query: '{c['query_used']}')\n"
|
|
1002
|
+
f"{c['content']}\n---\n"
|
|
1003
|
+
)
|
|
1004
|
+
if total_chars + len(snippet) > max_rag_context_characters:
|
|
1052
1005
|
break
|
|
1006
|
+
context_lines.append(snippet)
|
|
1007
|
+
total_chars += len(snippet)
|
|
1053
1008
|
|
|
1054
|
-
|
|
1055
|
-
if streaming_callback:
|
|
1056
|
-
streaming_callback("LLM generating final answer using all gathered information...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "final_answer_generation"}, turn_rag_history_for_callback)
|
|
1009
|
+
accumulated_context = "".join(context_lines)
|
|
1057
1010
|
|
|
1058
|
-
|
|
1059
|
-
if
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1011
|
+
# If context exceeds our effective limit, summarize it
|
|
1012
|
+
if self.count_tokens(accumulated_context) > effective_ctx_size:
|
|
1013
|
+
if streaming_callback:
|
|
1014
|
+
streaming_callback("Context too large, performing intermediate summary...", MSG_TYPE.MSG_TYPE_STEP, {"type": "intermediate_summary"}, turn_rag_history_for_callback)
|
|
1015
|
+
summary_prompt = (
|
|
1016
|
+
"Summarize the following gathered context into a concise form "
|
|
1017
|
+
"that preserves all key facts and sources needed to answer the user's request:\n\n"
|
|
1018
|
+
f"{accumulated_context}"
|
|
1019
|
+
)
|
|
1020
|
+
summary = self.generate_text(
|
|
1021
|
+
prompt=summary_prompt,
|
|
1022
|
+
system_prompt="Intermediate summary",
|
|
1023
|
+
temperature=0.0,
|
|
1024
|
+
n_predict= n_predict or 512,
|
|
1025
|
+
stream=False
|
|
1026
|
+
)
|
|
1027
|
+
accumulated_context = self.remove_thinking_blocks(summary).strip()
|
|
1028
|
+
if streaming_callback:
|
|
1029
|
+
streaming_callback("Intermediate summary complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "intermediate_summary"}, turn_rag_history_for_callback)
|
|
1030
|
+
|
|
1031
|
+
# 3. Final Answer Generation
|
|
1032
|
+
final_prompt = [
|
|
1033
|
+
f"Original request: {original_user_prompt}"
|
|
1034
|
+
]
|
|
1035
|
+
if objectives_text:
|
|
1036
|
+
final_prompt.insert(1, f"Structured Objectives:\n{objectives_text}\n")
|
|
1037
|
+
if accumulated_context:
|
|
1038
|
+
final_prompt.append(
|
|
1039
|
+
"\nBased on the gathered context:\n---\n"
|
|
1040
|
+
f"{accumulated_context}\n---"
|
|
1041
|
+
)
|
|
1065
1042
|
else:
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
final_answer_streaming_callback = None
|
|
1074
|
-
if streaming_callback:
|
|
1075
|
-
def final_answer_cb_adapter(chunk, msg_type):
|
|
1076
|
-
return streaming_callback(chunk, msg_type, {"type": "final_answer_chunk"}, turn_rag_history_for_callback)
|
|
1077
|
-
final_answer_streaming_callback = final_answer_cb_adapter
|
|
1078
|
-
|
|
1079
|
-
final_answer_text = self.remove_thinking_blocks(self.generate_text(
|
|
1080
|
-
prompt=final_answer_llm_prompt, images=images,
|
|
1081
|
-
n_predict=n_predict, stream=stream, temperature=temperature, top_k=top_k, top_p=top_p,
|
|
1082
|
-
repeat_penalty=repeat_penalty, repeat_last_n=repeat_last_n, seed=seed, n_threads=n_threads,
|
|
1083
|
-
ctx_size=ctx_size, streaming_callback=final_answer_streaming_callback, **llm_generation_kwargs
|
|
1084
|
-
))
|
|
1043
|
+
final_prompt.append("\n(No relevant context retrieved.)")
|
|
1044
|
+
final_prompt.append(
|
|
1045
|
+
"\nProvide a comprehensive answer using ONLY the above context. "
|
|
1046
|
+
"If context is insufficient, state so clearly."
|
|
1047
|
+
)
|
|
1048
|
+
final_prompt.append(self.ai_full_header)
|
|
1085
1049
|
|
|
1086
|
-
|
|
1087
|
-
|
|
1050
|
+
final_answer = self.generate_text(
|
|
1051
|
+
prompt="\n".join(final_prompt),
|
|
1052
|
+
images=images,
|
|
1053
|
+
system_prompt=system_prompt,
|
|
1054
|
+
n_predict=n_predict,
|
|
1055
|
+
stream=stream,
|
|
1056
|
+
temperature=temperature,
|
|
1057
|
+
top_k=top_k,
|
|
1058
|
+
top_p=top_p,
|
|
1059
|
+
repeat_penalty=repeat_penalty,
|
|
1060
|
+
repeat_last_n=repeat_last_n,
|
|
1061
|
+
seed=seed,
|
|
1062
|
+
n_threads=n_threads,
|
|
1063
|
+
ctx_size=ctx_size,
|
|
1064
|
+
streaming_callback=streaming_callback if stream else None,
|
|
1065
|
+
**llm_generation_kwargs
|
|
1066
|
+
)
|
|
1067
|
+
answer_text = self.remove_thinking_blocks(final_answer) if isinstance(final_answer, str) else final_answer
|
|
1088
1068
|
|
|
1089
|
-
|
|
1090
|
-
|
|
1069
|
+
return {
|
|
1070
|
+
"final_answer": answer_text,
|
|
1071
|
+
"rag_hops_history": rag_hops_details_list,
|
|
1072
|
+
"all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()),
|
|
1073
|
+
"error": None
|
|
1074
|
+
}
|
|
1091
1075
|
|
|
1092
|
-
return {"final_answer": final_answer_text, "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": None}
|
|
1093
1076
|
|
|
1094
1077
|
def generate_code(
|
|
1095
1078
|
self,
|
|
1096
1079
|
prompt,
|
|
1097
1080
|
images=[],
|
|
1081
|
+
system_prompt=None,
|
|
1098
1082
|
template=None,
|
|
1099
1083
|
language="json",
|
|
1100
1084
|
code_tag_format="markdown", # or "html"
|
|
@@ -1111,8 +1095,8 @@ Respond with a JSON object containing ONE of the following structures:
|
|
|
1111
1095
|
Uses the underlying LLM binding via `generate_text`.
|
|
1112
1096
|
Handles potential continuation if the code block is incomplete.
|
|
1113
1097
|
"""
|
|
1114
|
-
|
|
1115
|
-
|
|
1098
|
+
if not system_prompt:
|
|
1099
|
+
system_prompt = f"""Act as a code generation assistant that generates code from user prompt."""
|
|
1116
1100
|
|
|
1117
1101
|
if template:
|
|
1118
1102
|
system_prompt += "Here is a template of the answer:\n"
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import yaml
|
|
2
2
|
from lollms_client.lollms_core import LollmsClient
|
|
3
3
|
from dataclasses import dataclass, field
|
|
4
|
-
from typing import List
|
|
4
|
+
from typing import List, Dict
|
|
5
5
|
import uuid
|
|
6
6
|
import os
|
|
7
7
|
|
|
@@ -11,9 +11,9 @@ class LollmsMessage:
|
|
|
11
11
|
sender: str
|
|
12
12
|
content: str
|
|
13
13
|
id: str = field(default_factory=lambda: str(uuid.uuid4()))
|
|
14
|
-
|
|
14
|
+
metadata: str = "{}"
|
|
15
15
|
def to_dict(self):
|
|
16
|
-
return {'sender': self.sender, 'content': self.content, 'id': self.id}
|
|
16
|
+
return {'sender': self.sender, 'content': self.content, 'metadata': self.metadata, 'id': self.id}
|
|
17
17
|
|
|
18
18
|
# LollmsDiscussion Class
|
|
19
19
|
class LollmsDiscussion:
|
|
@@ -21,8 +21,8 @@ class LollmsDiscussion:
|
|
|
21
21
|
self.messages:List[LollmsMessage] = []
|
|
22
22
|
self.lollmsClient = lollmsClient
|
|
23
23
|
|
|
24
|
-
def add_message(self, sender, content):
|
|
25
|
-
message = LollmsMessage(sender, content)
|
|
24
|
+
def add_message(self, sender, content, metadata={}):
|
|
25
|
+
message = LollmsMessage(sender, content, str(metadata))
|
|
26
26
|
self.messages.append(message)
|
|
27
27
|
|
|
28
28
|
def save_to_disk(self, file_path):
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
examples/function_calling_with_local_custom_mcp.py,sha256=g6wOFRB8-p9Cv7hKmQaGzPvtMX3H77gas01QVNEOduM,12407
|
|
2
|
-
examples/
|
|
3
|
-
examples/
|
|
2
|
+
examples/generate_a_benchmark_for_safe_store.py,sha256=bkSt0mrpNsN0krZAUShm0jgVM1ukrPpjI7VwSgcNdSA,3974
|
|
3
|
+
examples/generate_text_with_multihop_rag_example.py,sha256=riEyVYo97r6ZYdySL-NJkRhE4MnpwbZku1sN8RNvbvs,11519
|
|
4
|
+
examples/internet_search_with_rag.py,sha256=cbUoGgY3rxZpQ5INoaA0Nhm0cutii-2AQ9WCz71Ch3o,12369
|
|
4
5
|
examples/local_mcp.py,sha256=w40dgayvHYe01yvekEE0LjcbkpwKjWwJ-9v4_wGYsUk,9113
|
|
5
6
|
examples/simple_text_gen_test.py,sha256=RoX9ZKJjGMujeep60wh5WT_GoBn0O9YKJY6WOy-ZmOc,8710
|
|
6
7
|
examples/simple_text_gen_with_image_test.py,sha256=rR1O5Prcb52UHtJ3c6bv7VuTd1cvbkr5aNZU-v-Rs3Y,9263
|
|
@@ -19,10 +20,10 @@ examples/personality_test/chat_test.py,sha256=o2jlpoddFc-T592iqAiA29xk3x27KsdK5D
|
|
|
19
20
|
examples/personality_test/chat_with_aristotle.py,sha256=4X_fwubMpd0Eq2rCReS2bgVlUoAqJprjkLXk2Jz6pXU,1774
|
|
20
21
|
examples/personality_test/tesks_test.py,sha256=7LIiwrEbva9WWZOLi34fsmCBN__RZbPpxoUOKA_AtYk,1924
|
|
21
22
|
examples/test_local_models/local_chat.py,sha256=slakja2zaHOEAUsn2tn_VmI4kLx6luLBrPqAeaNsix8,456
|
|
22
|
-
lollms_client/__init__.py,sha256=
|
|
23
|
+
lollms_client/__init__.py,sha256=4hRrTRCQTe3p2BdddF-8fJyG0nXyLOe_Imfini-BgtQ,910
|
|
23
24
|
lollms_client/lollms_config.py,sha256=goEseDwDxYJf3WkYJ4IrLXwg3Tfw73CXV2Avg45M_hE,21876
|
|
24
|
-
lollms_client/lollms_core.py,sha256=
|
|
25
|
-
lollms_client/lollms_discussion.py,sha256=
|
|
25
|
+
lollms_client/lollms_core.py,sha256=B1swe9E024JZigsQSXynuFHpJi-1dIEcIVN7EGGXZqk,113509
|
|
26
|
+
lollms_client/lollms_discussion.py,sha256=EV90dIgw8a-f-82vB2GspR60RniYz7WnBmAWSIg5mW0,2158
|
|
26
27
|
lollms_client/lollms_js_analyzer.py,sha256=01zUvuO2F_lnUe_0NLxe1MF5aHE1hO8RZi48mNPv-aw,8361
|
|
27
28
|
lollms_client/lollms_llm_binding.py,sha256=bdElz_IBx0zZ-85YTT1fyY_mSoHo46tKIMiHYJlKCkM,9809
|
|
28
29
|
lollms_client/lollms_mcp_binding.py,sha256=0rK9HQCBEGryNc8ApBmtOlhKE1Yfn7X7xIQssXxS2Zc,8933
|
|
@@ -69,8 +70,8 @@ lollms_client/tts_bindings/piper_tts/__init__.py,sha256=0IEWG4zH3_sOkSb9WbZzkeV5
|
|
|
69
70
|
lollms_client/tts_bindings/xtts/__init__.py,sha256=FgcdUH06X6ZR806WQe5ixaYx0QoxtAcOgYo87a2qxYc,18266
|
|
70
71
|
lollms_client/ttv_bindings/__init__.py,sha256=UZ8o2izQOJLQgtZ1D1cXoNST7rzqW22rL2Vufc7ddRc,3141
|
|
71
72
|
lollms_client/ttv_bindings/lollms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
72
|
-
lollms_client-0.19.
|
|
73
|
-
lollms_client-0.19.
|
|
74
|
-
lollms_client-0.19.
|
|
75
|
-
lollms_client-0.19.
|
|
76
|
-
lollms_client-0.19.
|
|
73
|
+
lollms_client-0.19.8.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
74
|
+
lollms_client-0.19.8.dist-info/METADATA,sha256=uHDmoes5veI0cBqErMnJbDS8TvadIZb-xvMnOXhLclc,13374
|
|
75
|
+
lollms_client-0.19.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
76
|
+
lollms_client-0.19.8.dist-info/top_level.txt,sha256=NI_W8S4OYZvJjb0QWMZMSIpOrYzpqwPGYaklhyWKH2w,23
|
|
77
|
+
lollms_client-0.19.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|