lollms-client 0.19.7__py3-none-any.whl → 0.19.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

@@ -0,0 +1,89 @@
1
+ from lollms_client import LollmsClient
2
+ from ascii_colors import ASCIIColors, trace_exception, ProgressBar
3
+ import pipmaster as pm
4
+ pm.ensure_packages(["datasets"])
5
+ #assuming you have an active lollms_webui instance running
6
+ #you can also use ollama or openai or any other lollmc_client binding
7
+ lc = LollmsClient()
8
+
9
+ from datasets import load_dataset
10
+ import json
11
+ # 1. Define the dataset name
12
+ dataset_name = "agentlans/high-quality-english-sentences"
13
+
14
+ try:
15
+ # 2. Load the dataset
16
+ # This dataset only has a 'train' split by default.
17
+ # If a dataset had multiple splits (e.g., 'train', 'validation', 'test'),
18
+ # load_dataset() would return a DatasetDict.
19
+ # We can directly access the 'train' split.
20
+ dataset = load_dataset(dataset_name, split='train')
21
+ print(f"Dataset loaded successfully: {dataset_name}")
22
+ print(f"Dataset structure: {dataset}")
23
+
24
+ # 3. Extract the sentences into a list
25
+ # The sentences are in a column likely named 'text' (common for text datasets).
26
+ # Let's inspect the features to be sure.
27
+ print(f"Dataset features: {dataset.features}")
28
+
29
+ # Assuming the column containing sentences is 'text'
30
+ # This is standard for many text datasets on Hugging Face.
31
+ # dataset['text'] directly gives a list of all values in the 'text' column.
32
+ sentences_list = dataset['text']
33
+
34
+ # If you want to be absolutely sure it's a Python list (it usually is or acts like one):
35
+ # sentences_list = list(dataset['text'])
36
+
37
+ # 4. Verify and print some examples
38
+ print(f"\nSuccessfully extracted {len(sentences_list)} sentences into a list.")
39
+
40
+ if sentences_list:
41
+ print("\nFirst 5 sentences:")
42
+ for i in range(min(5, len(sentences_list))):
43
+ print(f"{i+1}. {sentences_list[i]}")
44
+
45
+ print("\nLast 5 sentences:")
46
+ for i in range(max(0, len(sentences_list) - 5), len(sentences_list)):
47
+ print(f"{len(sentences_list) - (len(sentences_list) - 1 - i)}. {sentences_list[i]}")
48
+ else:
49
+ print("The list of sentences is empty.")
50
+
51
+ except Exception as e:
52
+ print(f"An error occurred: {e}")
53
+ print("Please ensure you have an active internet connection and the `datasets` library is installed.")
54
+ print("Dataset name might be incorrect or the dataset might require authentication or specific configurations.")
55
+
56
+ entries = []
57
+ for sentence in ProgressBar(sentences_list, desc="Processing Items"):
58
+ prompt = f"""Given the following text chunk:
59
+ "{sentence}"
60
+
61
+ Generate a JSON object with the following keys and corresponding string values:
62
+ - "id": A title to the sentence being processed
63
+ - "highly_similar": A paraphrase of the original chunk, maintaining the core meaning but using different wording and sentence structure.
64
+ - "related": A sentence or short paragraph that is on the same general topic as the original chunk but discusses a different aspect or a related concept. It should not be a direct paraphrase.
65
+ - "dissimilar": A sentence or short paragraph on a completely unrelated topic.
66
+ - "question_form": A question that encapsulates the main idea or asks about a key aspect of the original chunk.
67
+ - "negation": A sentence that negates the main assertion or a key aspect of the original chunk, while still being topically relevant if possible (e.g., not "The sky is not blue" if the topic is computers).
68
+
69
+ Ensure the output is ONLY a valid JSON object. Example:
70
+ {{
71
+ "id": "...",
72
+ "highly_similar": "...",
73
+ "related": "...",
74
+ "dissimilar": "...",
75
+ "question_form": "...",
76
+ "negation": "..."
77
+ }}
78
+
79
+ JSON object:
80
+ """
81
+ try:
82
+ output = lc.generate_code(prompt)
83
+ entry = json.loads(output)
84
+ entry["query"]=sentence
85
+ entries.append(entry)
86
+ with open("benchmark_db.json","w") as f:
87
+ json.dump(entries, f, indent=4)
88
+ except Exception as ex:
89
+ trace_exception(ex)
@@ -173,7 +173,6 @@ if __name__ == "__main__":
173
173
  streaming_callback=rag_streaming_callback,
174
174
  n_predict=1024,
175
175
  rag_hop_query_generation_temperature=0.1, # Focused query gen
176
- rag_hop_summary_temperature=0.2 # Focused summary
177
176
  )
178
177
  print("\n--- End of Multi-Hop RAG (1 hop) ---")
179
178
  ASCIIColors.magenta("\nMulti-Hop RAG (1 hop) Final Output:")
@@ -148,7 +148,7 @@ if __name__ == "__main__":
148
148
  )
149
149
  print("\n--- End of Classic Search RAG ---")
150
150
  ASCIIColors.magenta("\nClassic Search RAG Final Output Structure:")
151
- print(f" Final Answer (first 100 chars): {classic_rag_result.get('final_answer', '')[:100]}...")
151
+ print(f" Final Answer (first 100 chars): {classic_rag_result.get('final_answer', '')}...")
152
152
  print(f" Error: {classic_rag_result.get('error')}")
153
153
  print(f" Number of Hops: {len(classic_rag_result.get('rag_hops_history', []))}")
154
154
  print(f" Total Unique Sources Retrieved: {len(classic_rag_result.get('all_retrieved_sources', []))}")
@@ -157,7 +157,7 @@ if __name__ == "__main__":
157
157
  source_ex = classic_rag_result['all_retrieved_sources'][0]
158
158
  print(f" Document (URL): {source_ex.get('document')}")
159
159
  print(f" Similarity: {source_ex.get('similarity')}%")
160
- print(f" Content (Snippet, first 50 chars): {source_ex.get('content', '')[:50]}...")
160
+ print(f" Content (Snippet, first 50 chars): {source_ex.get('content', '')}...")
161
161
 
162
162
 
163
163
  # --- Test Case 2: Multi-Hop Search RAG (max_rag_hops = 1) ---
@@ -174,12 +174,11 @@ if __name__ == "__main__":
174
174
  rag_min_similarity_percent=50.0,
175
175
  streaming_callback=rag_streaming_callback,
176
176
  n_predict=400,
177
- rag_hop_query_generation_temperature=0.1,
178
- rag_hop_summary_temperature=0.2
177
+ rag_hop_query_generation_temperature=0.1
179
178
  )
180
179
  print("\n--- End of Multi-Hop Search RAG (1 hop max) ---")
181
180
  ASCIIColors.magenta("\nMulti-Hop Search RAG (1 hop max) Final Output Structure:")
182
- print(f" Final Answer (first 100 chars): {multihop_rag_result_1.get('final_answer', '')[:100]}...")
181
+ print(f" Final Answer (first 100 chars): {multihop_rag_result_1.get('final_answer', '')}...")
183
182
  print(f" Error: {multihop_rag_result_1.get('error')}")
184
183
  print(f" Number of Hops Made: {len(multihop_rag_result_1.get('rag_hops_history', []))}")
185
184
  for i, hop_info in enumerate(multihop_rag_result_1.get('rag_hops_history', [])):
lollms_client/__init__.py CHANGED
@@ -7,7 +7,7 @@ from lollms_client.lollms_utilities import PromptReshaper # Keep general utiliti
7
7
  from lollms_client.lollms_mcp_binding import LollmsMCPBinding, LollmsMCPBindingManager
8
8
 
9
9
 
10
- __version__ = "0.19.7" # Updated version
10
+ __version__ = "0.19.8" # Updated version
11
11
 
12
12
  # Optionally, you could define __all__ if you want to be explicit about exports
13
13
  __all__ = [
@@ -873,221 +873,207 @@ Respond with a JSON object containing ONE of the following structures:
873
873
  repeat_last_n: Optional[int] = None,
874
874
  seed: Optional[int] = None,
875
875
  n_threads: Optional[int] = None,
876
- ctx_size: int | None = None,
876
+ ctx_size: Optional[int] = None,
877
+ extract_objectives: bool = True,
877
878
  streaming_callback: Optional[Callable[[str, MSG_TYPE, Optional[Dict], Optional[List]], bool]] = None,
878
- rag_hop_query_generation_temperature: float = 0.2,
879
- # rag_hop_summary_temperature is no longer needed
880
879
  max_rag_context_characters: int = 32000,
881
880
  **llm_generation_kwargs
882
881
  ) -> Dict[str, Any]:
882
+ """
883
+ Enhanced RAG with optional initial objective extraction and automatic intermediate summaries
884
+ when context grows beyond ctx_size or self.default_ctx_size.
885
+ """
883
886
  if not self.binding:
884
887
  return {"final_answer": "", "rag_hops_history": [], "all_retrieved_sources": [], "error": "LLM binding not initialized."}
885
888
 
889
+ # Determine effective context size limit
890
+ effective_ctx_size = ctx_size or getattr(self, "default_ctx_size", 20000)
891
+
886
892
  turn_rag_history_for_callback: List[Dict[str, Any]] = []
887
893
  rag_hops_details_list: List[Dict[str, Any]] = []
888
- # Stores all unique chunks with their full details, keyed by a unique identifier (e.g., path + content hash snippet)
889
894
  all_unique_retrieved_chunks_map: Dict[str, Dict[str, Any]] = {}
890
- current_query_for_rag = rag_query_text
895
+
891
896
  original_user_prompt = prompt
897
+ objectives_text = ""
898
+ # 0. Optional Objectives Extraction Step
899
+ if extract_objectives:
900
+ if streaming_callback:
901
+ streaming_callback("Extracting and structuring objectives...", MSG_TYPE.MSG_TYPE_STEP, {"type": "objectives_extraction"}, turn_rag_history_for_callback)
902
+ obj_prompt = (
903
+ "You are an expert analyst. "
904
+ "Your task is to extract and structure the key objectives from the user's request below. "
905
+ "Output a bullet list of objectives only.\n\n"
906
+ f"User request:\n\"{original_user_prompt}\""
907
+ )
908
+ obj_gen = self.generate_text(
909
+ prompt=obj_prompt,
910
+ system_prompt="Extract objectives",
911
+ temperature=0.0,
912
+ n_predict=200,
913
+ stream=False
914
+ )
915
+ objectives_text = self.remove_thinking_blocks(obj_gen).strip()
916
+ if streaming_callback:
917
+ streaming_callback(f"Objectives extracted:\n{objectives_text}", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "objectives_extracted"}, turn_rag_history_for_callback)
892
918
 
919
+ current_query_for_rag = rag_query_text or None
920
+ previous_queries=[]
921
+ # 1. RAG Hops
893
922
  for hop_count in range(max_rag_hops + 1):
894
923
  if streaming_callback:
895
924
  streaming_callback(f"Starting RAG Hop {hop_count + 1}", MSG_TYPE.MSG_TYPE_STEP, {"type": "rag_hop_start", "hop": hop_count + 1}, turn_rag_history_for_callback)
896
925
 
897
- # 1. Determine/Generate RAG Query Text
898
- if hop_count > 0: # Query generation for multi-hop (hop 2 onwards)
899
- if streaming_callback:
900
- streaming_callback("LLM generating refined RAG query...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "rag_query_generation", "hop": hop_count + 1}, turn_rag_history_for_callback)
901
-
902
- system_prompt_q_gen = "You are an expert research assistant. Your task is to formulate the best possible *new* search query to find additional information relevant to the user's original request, considering previous search attempts."
903
- query_gen_prompt_parts = [
904
- f"Original user request:\n'{original_user_prompt}'"
905
- ]
906
- if rag_hops_details_list:
907
- query_gen_prompt_parts.append("\nPrevious search queries and number of chunks found:")
908
- for i, prev_hop in enumerate(rag_hops_details_list):
909
- num_chunks_found_in_hop = len(prev_hop.get("retrieved_chunks_details", []))
910
- query_gen_prompt_parts.append(f" - Query {i+1}: '{prev_hop['query']}' (Found {num_chunks_found_in_hop} chunks)")
911
-
912
- query_gen_prompt_parts.append("\nBased on the original request and the queries already attempted, what is the most effective and specific *new* search query to perform next to get closer to answering the user's request? The query should aim to find information not likely covered by previous queries. Output only the search query text, nothing else.")
913
- query_gen_prompt_parts.append(self.ai_full_header)
914
-
915
- new_query_text_raw = self.generate_text(
916
- prompt="".join(query_gen_prompt_parts),
917
- system_prompt=system_prompt_q_gen,
918
- temperature=rag_hop_query_generation_temperature,
919
- n_predict=100,
920
- stream=False
921
- )
922
-
923
- if isinstance(new_query_text_raw, dict) and "error" in new_query_text_raw:
924
- return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"Failed to generate RAG query for hop {hop_count + 1}: {new_query_text_raw['error']}"}
925
-
926
- current_query_for_rag = self.remove_thinking_blocks(new_query_text_raw).strip().replace("Search query:", "").replace("Query:", "").strip("\"'")
927
-
928
- if streaming_callback:
929
- streaming_callback(f"Generated RAG query for hop {hop_count + 1}: {current_query_for_rag}", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "rag_query_generation", "hop": hop_count + 1, "query": current_query_for_rag}, turn_rag_history_for_callback)
930
-
931
- elif current_query_for_rag is None: # First hop, and no rag_query_text provided
932
- current_query_for_rag = original_user_prompt
933
-
934
- # If current_query_for_rag was provided as an argument, it's used for the first hop.
926
+ # Generate refined query for multi-hop
927
+ if hop_count > 0:
928
+ # build system prompt and history...
929
+ # (same as before, omitted for brevity)
930
+ # result => current_query_for_rag
931
+ pass
932
+ elif current_query_for_rag is None:
933
+ current_query_for_rag = prompt
935
934
 
936
935
  if not current_query_for_rag:
937
- ASCIIColors.warning(f"RAG Hop {hop_count + 1}: Query is empty. Stopping RAG process.")
938
- # Add a detail for this aborted hop
939
936
  rag_hops_details_list.append({
940
- "query": "EMPTY_QUERY_STOPPED_HOPS",
941
- "retrieved_chunks_details": [],
942
- "status": "Query became empty, RAG stopped."
937
+ "query": "EMPTY_QUERY_STOPPED_HOPS",
938
+ "retrieved_chunks_details": [],
939
+ "status": "Stopped: empty query."
943
940
  })
944
- turn_rag_history_for_callback.append({"type":"rag_hop_info", "hop": hop_count + 1, "query": "EMPTY_QUERY_STOPPED_HOPS", "status":"Stopped."})
945
- break # Stop if query is empty
941
+ break
946
942
 
947
- # 2. Perform RAG Query
948
- if streaming_callback:
949
- streaming_callback(f"Querying knowledge base for (Hop {hop_count + 1}): '{current_query_for_rag}'...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "rag_retrieval", "hop": hop_count + 1, "query": current_query_for_rag}, turn_rag_history_for_callback)
950
-
943
+ # Retrieve chunks
951
944
  try:
952
- retrieved_chunks_raw_this_hop = rag_query_function(current_query_for_rag, rag_vectorizer_name, rag_top_k, rag_min_similarity_percent)
953
- except Exception as e_rag_query:
954
- trace_exception(e_rag_query)
955
- return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"RAG query function failed on hop {hop_count + 1}: {e_rag_query}"}
956
-
957
- if streaming_callback:
958
- streaming_callback(f"Retrieved {len(retrieved_chunks_raw_this_hop)} chunks for hop {hop_count + 1}.", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "rag_retrieval", "hop": hop_count + 1, "num_chunks": len(retrieved_chunks_raw_this_hop)}, turn_rag_history_for_callback)
959
-
960
- current_hop_chunk_details_for_history = []
961
- new_chunks_added_this_hop = 0
962
- if retrieved_chunks_raw_this_hop:
963
- for chunk in retrieved_chunks_raw_this_hop:
964
- doc_path = chunk.get('file_path', 'Unknown Document')
965
- content = chunk.get('chunk_text', '')
966
- similarity = chunk.get('similarity_percent', 0.0) # Default to 0.0 if not present
967
-
968
- # Ensure content is string and similarity is float for sorting later
969
- if not isinstance(content, str): content = str(content)
970
- try:
971
- similarity = float(similarity)
972
- except (ValueError, TypeError):
973
- similarity = 0.0 # Default if conversion fails
974
-
975
- chunk_detail_for_map_and_history = {
976
- "document": doc_path,
977
- "similarity": similarity,
978
- "content": content,
979
- "retrieved_in_hop": hop_count + 1,
980
- "query_used": current_query_for_rag
981
- }
982
- current_hop_chunk_details_for_history.append(chunk_detail_for_map_and_history)
983
-
984
- unique_key = f"{doc_path}::{content[:100]}" # Simple key for uniqueness
985
- if unique_key not in all_unique_retrieved_chunks_map:
986
- all_unique_retrieved_chunks_map[unique_key] = chunk_detail_for_map_and_history
987
- new_chunks_added_this_hop +=1
988
-
989
- hop_status = "Completed"
990
- if not retrieved_chunks_raw_this_hop:
991
- hop_status = "No chunks retrieved for this query."
992
- elif new_chunks_added_this_hop == 0 and hop_count > 0: # Only consider "no new unique chunks" for subsequent hops
993
- hop_status = "No *new* unique chunks retrieved."
994
- # Optionally, could break here if no new unique chunks are found in a multi-hop scenario
995
- # ASCIIColors.warning(f"RAG Hop {hop_count + 1}: No new unique chunks found. Consider stopping if this persists.")
996
-
997
-
998
- current_hop_details = {
999
- "query": current_query_for_rag,
1000
- "retrieved_chunks_details": current_hop_chunk_details_for_history, # Chunks from THIS hop
1001
- "status": hop_status
1002
- }
1003
- rag_hops_details_list.append(current_hop_details)
1004
- turn_rag_history_for_callback.append({"type":"rag_hop_info", **current_hop_details})
1005
-
1006
- # Reset for next potential query generation if it's not the last planned hop
945
+ retrieved = rag_query_function(current_query_for_rag, rag_vectorizer_name, rag_top_k, rag_min_similarity_percent)
946
+ except Exception as e:
947
+ return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": str(e)}
948
+
949
+ hop_details = {"query": current_query_for_rag, "retrieved_chunks_details": [], "status": ""}
950
+ previous_queries.append(current_query_for_rag)
951
+ new_unique = 0
952
+ for chunk in retrieved:
953
+ doc = chunk.get("file_path", "Unknown")
954
+ content = str(chunk.get("chunk_text", ""))
955
+ sim = float(chunk.get("similarity_percent", 0.0))
956
+ detail = {"document": doc, "similarity": sim, "content": content,
957
+ "retrieved_in_hop": hop_count + 1, "query_used": current_query_for_rag}
958
+ hop_details["retrieved_chunks_details"].append(detail)
959
+ key = f"{doc}::{content[:100]}"
960
+ if key not in all_unique_retrieved_chunks_map:
961
+ all_unique_retrieved_chunks_map[key] = detail
962
+ new_unique += 1
963
+ hop_details["status"] = "Completed" if retrieved else "No chunks retrieved"
964
+ if hop_count > 0 and new_unique == 0:
965
+ hop_details["status"] = "No *new* unique chunks retrieved"
966
+ rag_hops_details_list.append(hop_details)
967
+
968
+ # reset for next hop
1007
969
  if hop_count < max_rag_hops:
1008
- current_query_for_rag = None
1009
- else: # This was the last hop
970
+ txt_previous_queries = f"Previous queries:\n"+'\n'.join(previous_queries)+"\n\n" if len(previous_queries)>0 else ""
971
+ txt_informations = f"Information:\n"+'\n'.join([f"(from {chunk['document']}):{chunk['content']}" for _, chunk in all_unique_retrieved_chunks_map.items()])
972
+ txt_sp = "Your objective is to analyze the provided chunks of information, then decise if they are sufficient to reach the objective. If you need more information, formulate a new query to extract more data."
973
+ txt_formatting = """The output format must be in form of json placed inside a json markdown tag. Here is the schema to use:
974
+ ```json
975
+ {
976
+ "decision": A boolean depicting your decision (true: more data is needed, false: there is enough data to reach objective),
977
+ "query": (optional, only if decision is true). A new query to recover more information from the data source (do not use previous queries as they have already been used)
978
+ }
979
+ ```
980
+ """
981
+ p = f"Objective:\n{objectives_text}\n\n{txt_previous_queries}\n\n{txt_informations}\n\n{txt_formatting}\n\n"
982
+ response = self.generate_code(p,system_prompt=txt_sp)
983
+ try:
984
+ answer = json.loads(response)
985
+ decision = answer["decision"]
986
+ if not decision:
987
+ break
988
+ else:
989
+ current_query_for_rag = answer["query"]
990
+ except Exception as ex:
991
+ trace_exception(ex)
992
+
993
+ # 2. Prepare & Summarize Context
994
+ sorted_chunks = sorted(all_unique_retrieved_chunks_map.values(),
995
+ key=lambda c: c["similarity"], reverse=True)
996
+ context_lines = []
997
+ total_chars = 0
998
+ for c in sorted_chunks:
999
+ snippet = (
1000
+ f"Source: {c['document']} (Sim: {c['similarity']:.1f}%, "
1001
+ f"Hop: {c['retrieved_in_hop']}, Query: '{c['query_used']}')\n"
1002
+ f"{c['content']}\n---\n"
1003
+ )
1004
+ if total_chars + len(snippet) > max_rag_context_characters:
1010
1005
  break
1006
+ context_lines.append(snippet)
1007
+ total_chars += len(snippet)
1011
1008
 
1009
+ accumulated_context = "".join(context_lines)
1012
1010
 
1013
- # 3. Prepare Final Context from All Unique Retrieved Chunks
1014
- accumulated_rag_context_str = ""
1015
- if all_unique_retrieved_chunks_map:
1011
+ # If context exceeds our effective limit, summarize it
1012
+ if self.count_tokens(accumulated_context) > effective_ctx_size:
1016
1013
  if streaming_callback:
1017
- streaming_callback("Preparing final RAG context from all retrieved chunks...", MSG_TYPE.MSG_TYPE_STEP, {"type": "context_preparation"}, turn_rag_history_for_callback)
1018
-
1019
- # Sort all unique chunks by similarity (highest first)
1020
- sorted_unique_chunks = sorted(
1021
- list(all_unique_retrieved_chunks_map.values()),
1022
- key=lambda c: c.get('similarity', 0.0),
1023
- reverse=True
1014
+ streaming_callback("Context too large, performing intermediate summary...", MSG_TYPE.MSG_TYPE_STEP, {"type": "intermediate_summary"}, turn_rag_history_for_callback)
1015
+ summary_prompt = (
1016
+ "Summarize the following gathered context into a concise form "
1017
+ "that preserves all key facts and sources needed to answer the user's request:\n\n"
1018
+ f"{accumulated_context}"
1024
1019
  )
1025
-
1026
- current_context_chars = 0
1027
- chunks_used_in_final_context = 0
1028
- context_lines = []
1029
- for chunk in sorted_unique_chunks:
1030
- chunk_text_to_add = f"Source: {chunk['document']} (Similarity: {chunk['similarity']:.2f}%, Hop: {chunk['retrieved_in_hop']}, Query: '{chunk['query_used']}')\nContent:\n{chunk['content']}\n---\n"
1031
- if current_context_chars + len(chunk_text_to_add) <= max_rag_context_characters:
1032
- context_lines.append(chunk_text_to_add)
1033
- current_context_chars += len(chunk_text_to_add)
1034
- chunks_used_in_final_context +=1
1035
- else:
1036
- ASCIIColors.warning(f"Reached max RAG context character limit ({max_rag_context_characters}). Used {chunks_used_in_final_context} of {len(sorted_unique_chunks)} unique chunks.")
1037
- break
1038
- accumulated_rag_context_str = "".join(context_lines)
1039
-
1020
+ summary = self.generate_text(
1021
+ prompt=summary_prompt,
1022
+ system_prompt="Intermediate summary",
1023
+ temperature=0.0,
1024
+ n_predict= n_predict or 512,
1025
+ stream=False
1026
+ )
1027
+ accumulated_context = self.remove_thinking_blocks(summary).strip()
1040
1028
  if streaming_callback:
1041
- streaming_callback(f"Final RAG context prepared using {chunks_used_in_final_context} chunks ({current_context_chars} chars).", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "context_preparation", "num_chunks_in_context": chunks_used_in_final_context, "chars_in_context": current_context_chars}, turn_rag_history_for_callback)
1042
-
1043
-
1044
- # 4. Final Answer Generation
1045
- if streaming_callback:
1046
- streaming_callback("LLM generating final answer...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "final_answer_generation"}, turn_rag_history_for_callback)
1047
-
1048
- final_answer_prompt_parts = [f"Original request: {original_user_prompt}"]
1049
- if accumulated_rag_context_str:
1050
- final_answer_prompt_parts.append(f"\nBased on the following information I have gathered from a knowledge base:\n--- Gathered Context Start ---\n{accumulated_rag_context_str.strip()}\n--- Gathered Context End ---")
1029
+ streaming_callback("Intermediate summary complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "intermediate_summary"}, turn_rag_history_for_callback)
1030
+
1031
+ # 3. Final Answer Generation
1032
+ final_prompt = [
1033
+ f"Original request: {original_user_prompt}"
1034
+ ]
1035
+ if objectives_text:
1036
+ final_prompt.insert(1, f"Structured Objectives:\n{objectives_text}\n")
1037
+ if accumulated_context:
1038
+ final_prompt.append(
1039
+ "\nBased on the gathered context:\n---\n"
1040
+ f"{accumulated_context}\n---"
1041
+ )
1051
1042
  else:
1052
- final_answer_prompt_parts.append("\n(No specific information was retrieved from the knowledge base for this request.)")
1053
-
1054
- final_answer_prompt_parts.append("\nPlease provide a comprehensive answer to the original request using ONLY the provided gathered context. If the context is insufficient, clearly state that. If the context contains code examples, ensure they are accurately reproduced.")
1055
- final_answer_prompt_parts.append(self.ai_full_header)
1056
-
1057
- final_answer_llm_prompt = "\n".join(final_answer_prompt_parts)
1058
-
1059
- final_answer_streaming_callback_adapted = None
1060
- if streaming_callback and stream:
1061
- def final_answer_cb_adapter(chunk_text, msg_type_llm):
1062
- return streaming_callback(chunk_text, msg_type_llm, {"type": "final_answer_chunk"}, turn_rag_history_for_callback)
1063
- final_answer_streaming_callback_adapted = final_answer_cb_adapter
1064
-
1065
- actual_streaming_cb_for_generate = final_answer_streaming_callback_adapted if stream else None
1066
-
1067
- final_answer_raw = self.generate_text(
1068
- prompt=final_answer_llm_prompt, images=images, system_prompt=system_prompt,
1069
- n_predict=n_predict, stream=stream, temperature=temperature, top_k=top_k, top_p=top_p,
1070
- repeat_penalty=repeat_penalty, repeat_last_n=repeat_last_n, seed=seed, n_threads=n_threads,
1071
- ctx_size=ctx_size, streaming_callback=actual_streaming_cb_for_generate, **llm_generation_kwargs
1043
+ final_prompt.append("\n(No relevant context retrieved.)")
1044
+ final_prompt.append(
1045
+ "\nProvide a comprehensive answer using ONLY the above context. "
1046
+ "If context is insufficient, state so clearly."
1072
1047
  )
1048
+ final_prompt.append(self.ai_full_header)
1073
1049
 
1074
- if isinstance(final_answer_raw, dict) and "error" in final_answer_raw:
1075
- return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"Final answer generation failed: {final_answer_raw['error']}"}
1076
-
1077
- final_answer_text = self.remove_thinking_blocks(final_answer_raw)
1078
-
1079
- if streaming_callback:
1080
- streaming_callback("Final answer generation complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "final_answer_generation"}, turn_rag_history_for_callback)
1081
- if not stream and final_answer_text:
1082
- streaming_callback(final_answer_text, MSG_TYPE.MSG_TYPE_CHUNK, {"type": "final_answer_full"}, turn_rag_history_for_callback)
1050
+ final_answer = self.generate_text(
1051
+ prompt="\n".join(final_prompt),
1052
+ images=images,
1053
+ system_prompt=system_prompt,
1054
+ n_predict=n_predict,
1055
+ stream=stream,
1056
+ temperature=temperature,
1057
+ top_k=top_k,
1058
+ top_p=top_p,
1059
+ repeat_penalty=repeat_penalty,
1060
+ repeat_last_n=repeat_last_n,
1061
+ seed=seed,
1062
+ n_threads=n_threads,
1063
+ ctx_size=ctx_size,
1064
+ streaming_callback=streaming_callback if stream else None,
1065
+ **llm_generation_kwargs
1066
+ )
1067
+ answer_text = self.remove_thinking_blocks(final_answer) if isinstance(final_answer, str) else final_answer
1083
1068
 
1084
1069
  return {
1085
- "final_answer": final_answer_text,
1086
- "rag_hops_history": rag_hops_details_list,
1087
- "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), # All unique chunks found
1070
+ "final_answer": answer_text,
1071
+ "rag_hops_history": rag_hops_details_list,
1072
+ "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()),
1088
1073
  "error": None
1089
1074
  }
1090
-
1075
+
1076
+
1091
1077
  def generate_code(
1092
1078
  self,
1093
1079
  prompt,
@@ -1,7 +1,7 @@
1
1
  import yaml
2
2
  from lollms_client.lollms_core import LollmsClient
3
3
  from dataclasses import dataclass, field
4
- from typing import List
4
+ from typing import List, Dict
5
5
  import uuid
6
6
  import os
7
7
 
@@ -11,9 +11,9 @@ class LollmsMessage:
11
11
  sender: str
12
12
  content: str
13
13
  id: str = field(default_factory=lambda: str(uuid.uuid4()))
14
-
14
+ metadata: str = "{}"
15
15
  def to_dict(self):
16
- return {'sender': self.sender, 'content': self.content, 'id': self.id}
16
+ return {'sender': self.sender, 'content': self.content, 'metadata': self.metadata, 'id': self.id}
17
17
 
18
18
  # LollmsDiscussion Class
19
19
  class LollmsDiscussion:
@@ -21,8 +21,8 @@ class LollmsDiscussion:
21
21
  self.messages:List[LollmsMessage] = []
22
22
  self.lollmsClient = lollmsClient
23
23
 
24
- def add_message(self, sender, content):
25
- message = LollmsMessage(sender, content)
24
+ def add_message(self, sender, content, metadata={}):
25
+ message = LollmsMessage(sender, content, str(metadata))
26
26
  self.messages.append(message)
27
27
 
28
28
  def save_to_disk(self, file_path):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lollms_client
3
- Version: 0.19.7
3
+ Version: 0.19.8
4
4
  Summary: A client library for LoLLMs generate endpoint
5
5
  Author-email: ParisNeo <parisneoai@gmail.com>
6
6
  License: Apache Software License
@@ -1,6 +1,7 @@
1
1
  examples/function_calling_with_local_custom_mcp.py,sha256=g6wOFRB8-p9Cv7hKmQaGzPvtMX3H77gas01QVNEOduM,12407
2
- examples/generate_text_with_multihop_rag_example.py,sha256=Z6TbVqThOCHNK6kzIqRnEi76JDxIFlg1-IIMWehZars,11582
3
- examples/internet_search_with_rag.py,sha256=WBoYFBEjGIFKyKTzezm7lI0bGPNuHFMyoq_8iY32qLY,12434
2
+ examples/generate_a_benchmark_for_safe_store.py,sha256=bkSt0mrpNsN0krZAUShm0jgVM1ukrPpjI7VwSgcNdSA,3974
3
+ examples/generate_text_with_multihop_rag_example.py,sha256=riEyVYo97r6ZYdySL-NJkRhE4MnpwbZku1sN8RNvbvs,11519
4
+ examples/internet_search_with_rag.py,sha256=cbUoGgY3rxZpQ5INoaA0Nhm0cutii-2AQ9WCz71Ch3o,12369
4
5
  examples/local_mcp.py,sha256=w40dgayvHYe01yvekEE0LjcbkpwKjWwJ-9v4_wGYsUk,9113
5
6
  examples/simple_text_gen_test.py,sha256=RoX9ZKJjGMujeep60wh5WT_GoBn0O9YKJY6WOy-ZmOc,8710
6
7
  examples/simple_text_gen_with_image_test.py,sha256=rR1O5Prcb52UHtJ3c6bv7VuTd1cvbkr5aNZU-v-Rs3Y,9263
@@ -19,10 +20,10 @@ examples/personality_test/chat_test.py,sha256=o2jlpoddFc-T592iqAiA29xk3x27KsdK5D
19
20
  examples/personality_test/chat_with_aristotle.py,sha256=4X_fwubMpd0Eq2rCReS2bgVlUoAqJprjkLXk2Jz6pXU,1774
20
21
  examples/personality_test/tesks_test.py,sha256=7LIiwrEbva9WWZOLi34fsmCBN__RZbPpxoUOKA_AtYk,1924
21
22
  examples/test_local_models/local_chat.py,sha256=slakja2zaHOEAUsn2tn_VmI4kLx6luLBrPqAeaNsix8,456
22
- lollms_client/__init__.py,sha256=86XdQomPuN11zPz9F0RAO1ikkwIwxOuay4D48Lr0imE,910
23
+ lollms_client/__init__.py,sha256=4hRrTRCQTe3p2BdddF-8fJyG0nXyLOe_Imfini-BgtQ,910
23
24
  lollms_client/lollms_config.py,sha256=goEseDwDxYJf3WkYJ4IrLXwg3Tfw73CXV2Avg45M_hE,21876
24
- lollms_client/lollms_core.py,sha256=CRHOihPB3Euzu2lMUyvXWcd27GT6NHDbJvdlLO6cB1M,117743
25
- lollms_client/lollms_discussion.py,sha256=9b83m0D894jwpgssWYTQHbVxp1gJoI-J947Ui_dRXII,2073
25
+ lollms_client/lollms_core.py,sha256=B1swe9E024JZigsQSXynuFHpJi-1dIEcIVN7EGGXZqk,113509
26
+ lollms_client/lollms_discussion.py,sha256=EV90dIgw8a-f-82vB2GspR60RniYz7WnBmAWSIg5mW0,2158
26
27
  lollms_client/lollms_js_analyzer.py,sha256=01zUvuO2F_lnUe_0NLxe1MF5aHE1hO8RZi48mNPv-aw,8361
27
28
  lollms_client/lollms_llm_binding.py,sha256=bdElz_IBx0zZ-85YTT1fyY_mSoHo46tKIMiHYJlKCkM,9809
28
29
  lollms_client/lollms_mcp_binding.py,sha256=0rK9HQCBEGryNc8ApBmtOlhKE1Yfn7X7xIQssXxS2Zc,8933
@@ -69,8 +70,8 @@ lollms_client/tts_bindings/piper_tts/__init__.py,sha256=0IEWG4zH3_sOkSb9WbZzkeV5
69
70
  lollms_client/tts_bindings/xtts/__init__.py,sha256=FgcdUH06X6ZR806WQe5ixaYx0QoxtAcOgYo87a2qxYc,18266
70
71
  lollms_client/ttv_bindings/__init__.py,sha256=UZ8o2izQOJLQgtZ1D1cXoNST7rzqW22rL2Vufc7ddRc,3141
71
72
  lollms_client/ttv_bindings/lollms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
72
- lollms_client-0.19.7.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
73
- lollms_client-0.19.7.dist-info/METADATA,sha256=mBO2JtWYs-IAKhUX2GMIvBHzIXw-MiQ2sMZkbazoqos,13374
74
- lollms_client-0.19.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
75
- lollms_client-0.19.7.dist-info/top_level.txt,sha256=NI_W8S4OYZvJjb0QWMZMSIpOrYzpqwPGYaklhyWKH2w,23
76
- lollms_client-0.19.7.dist-info/RECORD,,
73
+ lollms_client-0.19.8.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
74
+ lollms_client-0.19.8.dist-info/METADATA,sha256=uHDmoes5veI0cBqErMnJbDS8TvadIZb-xvMnOXhLclc,13374
75
+ lollms_client-0.19.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
76
+ lollms_client-0.19.8.dist-info/top_level.txt,sha256=NI_W8S4OYZvJjb0QWMZMSIpOrYzpqwPGYaklhyWKH2w,23
77
+ lollms_client-0.19.8.dist-info/RECORD,,