lollms-client 0.19.0__py3-none-any.whl → 0.19.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- examples/generate_text_with_multihop_rag_example.py +211 -0
- examples/internet_search_with_rag.py +189 -0
- lollms_client/__init__.py +1 -1
- lollms_client/lollms_core.py +238 -1
- {lollms_client-0.19.0.dist-info → lollms_client-0.19.5.dist-info}/METADATA +71 -16
- {lollms_client-0.19.0.dist-info → lollms_client-0.19.5.dist-info}/RECORD +9 -7
- {lollms_client-0.19.0.dist-info → lollms_client-0.19.5.dist-info}/WHEEL +0 -0
- {lollms_client-0.19.0.dist-info → lollms_client-0.19.5.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-0.19.0.dist-info → lollms_client-0.19.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
from lollms_client import LollmsClient, MSG_TYPE
|
|
2
|
+
from ascii_colors import ASCIIColors, trace_exception
|
|
3
|
+
from typing import List, Dict, Any, Optional, Callable
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
# --- Mock RAG Implementation ---
|
|
8
|
+
# In a real application, this would interact with your vector database (Pinecone, ChromaDB, FAISS, etc.)
|
|
9
|
+
# and use a real sentence transformer for vectorization.
|
|
10
|
+
|
|
11
|
+
MOCK_KNOWLEDGE_BASE = {
|
|
12
|
+
"python_basics.md": [
|
|
13
|
+
{"chunk_id": 1, "text": "Python is a high-level, interpreted programming language known for its readability and versatility. It was created by Guido van Rossum and first released in 1991."},
|
|
14
|
+
{"chunk_id": 2, "text": "Key features of Python include dynamic typing, automatic memory management (garbage collection), and a large standard library. It supports multiple programming paradigms, such as procedural, object-oriented, and functional programming."},
|
|
15
|
+
{"chunk_id": 3, "text": "Common applications of Python include web development (e.g., Django, Flask), data science (e.g., Pandas, NumPy, Scikit-learn), machine learning, artificial intelligence, automation, and scripting."},
|
|
16
|
+
],
|
|
17
|
+
"javascript_info.js": [
|
|
18
|
+
{"chunk_id": 1, "text": "JavaScript is a scripting language primarily used for front-end web development to create interactive effects within web browsers. It is also used in back-end development (Node.js), mobile app development, and game development."},
|
|
19
|
+
{"chunk_id": 2, "text": "JavaScript is dynamically typed, prototype-based, and multi-paradigm. Along with HTML and CSS, it is one of the core technologies of the World Wide Web."},
|
|
20
|
+
{"chunk_id": 3, "text": "Popular JavaScript frameworks and libraries include React, Angular, Vue.js for front-end, and Express.js for Node.js back-end applications."},
|
|
21
|
+
],
|
|
22
|
+
"ai_concepts.txt": [
|
|
23
|
+
{"chunk_id": 1, "text": "Artificial Intelligence (AI) refers to the simulation of human intelligence in machines that are programmed to think like humans and mimic their actions. The term may also be applied to any machine that exhibits traits associated with a human mind such as learning and problem-solving."},
|
|
24
|
+
{"chunk_id": 2, "text": "Machine Learning (ML) is a subset of AI that provides systems the ability to automatically learn and improve from experience without being explicitly programmed. Deep Learning (DL) is a further subset of ML based on artificial neural networks with representation learning."},
|
|
25
|
+
{"chunk_id": 3, "text": "Retrieval Augmented Generation (RAG) is an AI framework for improving the quality of LLM-generated responses by grounding the model on external sources of knowledge to supplement the LLM’s internal representation of information."},
|
|
26
|
+
]
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
def mock_rag_query_function(
|
|
30
|
+
query_text: str,
|
|
31
|
+
vectorizer_name: Optional[str] = None, # Ignored in mock
|
|
32
|
+
top_k: int = 3,
|
|
33
|
+
min_similarity_percent: float = 0.0 # Ignored in mock, simple keyword match
|
|
34
|
+
) -> List[Dict[str, Any]]:
|
|
35
|
+
"""
|
|
36
|
+
A mock RAG query function.
|
|
37
|
+
Performs a simple keyword search in the MOCK_KNOWLEDGE_BASE.
|
|
38
|
+
"""
|
|
39
|
+
ASCIIColors.magenta(f" [MOCK RAG] Querying with: '{query_text}', top_k={top_k}")
|
|
40
|
+
results = []
|
|
41
|
+
query_lower = query_text.lower()
|
|
42
|
+
|
|
43
|
+
all_chunks = []
|
|
44
|
+
for file_path, chunks_in_file in MOCK_KNOWLEDGE_BASE.items():
|
|
45
|
+
for chunk_data in chunks_in_file:
|
|
46
|
+
all_chunks.append({"file_path": file_path, **chunk_data})
|
|
47
|
+
|
|
48
|
+
# Simple keyword matching and scoring (very basic)
|
|
49
|
+
scored_chunks = []
|
|
50
|
+
for chunk_info in all_chunks:
|
|
51
|
+
score = 0
|
|
52
|
+
for keyword in query_lower.split():
|
|
53
|
+
if keyword in chunk_info["text"].lower() and len(keyword)>2: # Basic relevance
|
|
54
|
+
score += 1
|
|
55
|
+
if "python" in query_lower and "python" in chunk_info["file_path"].lower(): score+=5
|
|
56
|
+
if "javascript" in query_lower and "javascript" in chunk_info["file_path"].lower(): score+=5
|
|
57
|
+
if "ai" in query_lower and "ai" in chunk_info["file_path"].lower(): score+=3
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
if score > 0 : # Only include if some keywords match
|
|
61
|
+
# Simulate similarity percentage (higher score = higher similarity)
|
|
62
|
+
similarity = min(100.0, score * 20.0 + 40.0) # Arbitrary scaling
|
|
63
|
+
if similarity >= min_similarity_percent:
|
|
64
|
+
scored_chunks.append({
|
|
65
|
+
"file_path": chunk_info["file_path"],
|
|
66
|
+
"chunk_text": chunk_info["text"],
|
|
67
|
+
"similarity_percent": similarity,
|
|
68
|
+
"_score_for_ranking": score # Internal score for sorting
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
# Sort by internal score (descending) and take top_k
|
|
72
|
+
scored_chunks.sort(key=lambda x: x["_score_for_ranking"], reverse=True)
|
|
73
|
+
results = [
|
|
74
|
+
{"file_path": c["file_path"], "chunk_text": c["chunk_text"], "similarity_percent": c["similarity_percent"]}
|
|
75
|
+
for c in scored_chunks[:top_k]
|
|
76
|
+
]
|
|
77
|
+
ASCIIColors.magenta(f" [MOCK RAG] Found {len(results)} relevant chunks.")
|
|
78
|
+
return results
|
|
79
|
+
|
|
80
|
+
# --- Streaming Callback for RAG and LLM ---
|
|
81
|
+
def rag_streaming_callback(
|
|
82
|
+
chunk: str,
|
|
83
|
+
msg_type: MSG_TYPE,
|
|
84
|
+
metadata: Optional[Dict] = None,
|
|
85
|
+
turn_history: Optional[List] = None # history of this specific RAG turn
|
|
86
|
+
) -> bool:
|
|
87
|
+
"""
|
|
88
|
+
Handles various stages of RAG and final LLM generation.
|
|
89
|
+
"""
|
|
90
|
+
metadata = metadata or {}
|
|
91
|
+
turn_history = turn_history or [] # Should be populated by LollmsClient
|
|
92
|
+
|
|
93
|
+
if msg_type == MSG_TYPE.MSG_TYPE_CHUNK: # Final answer chunks
|
|
94
|
+
ASCIIColors.success(chunk, end="", flush=True)
|
|
95
|
+
elif msg_type == MSG_TYPE.MSG_TYPE_STEP_START:
|
|
96
|
+
step_type = metadata.get("type", "step")
|
|
97
|
+
hop = metadata.get("hop", "")
|
|
98
|
+
info = metadata.get("query", chunk) if step_type == "rag_query_generation" or step_type == "rag_retrieval" else chunk
|
|
99
|
+
ASCIIColors.yellow(f"\n>> RAG Step Start (Hop {hop}): {step_type} - Info: {str(info)[:100]}...", flush=True)
|
|
100
|
+
elif msg_type == MSG_TYPE.MSG_TYPE_STEP_END:
|
|
101
|
+
step_type = metadata.get("type", "step")
|
|
102
|
+
hop = metadata.get("hop", "")
|
|
103
|
+
num_chunks = metadata.get("num_chunks", "")
|
|
104
|
+
query = metadata.get("query", "")
|
|
105
|
+
decision = metadata.get("decision", "")
|
|
106
|
+
|
|
107
|
+
info_str = ""
|
|
108
|
+
if step_type == "rag_query_generation" and query: info_str = f"Generated Query: {query}"
|
|
109
|
+
elif step_type == "rag_retrieval": info_str = f"Retrieved {num_chunks} chunks"
|
|
110
|
+
elif step_type == "rag_llm_decision": info_str = f"LLM Decision: {json.dumps(decision)}"
|
|
111
|
+
elif step_type == "final_answer_generation": info_str = "Final answer generation complete."
|
|
112
|
+
else: info_str = chunk
|
|
113
|
+
|
|
114
|
+
ASCIIColors.green(f"\n<< RAG Step End (Hop {hop}): {step_type} - {info_str}", flush=True)
|
|
115
|
+
elif msg_type == MSG_TYPE.MSG_TYPE_EXCEPTION:
|
|
116
|
+
ASCIIColors.error(f"\nError in RAG stream: {chunk}", flush=True)
|
|
117
|
+
|
|
118
|
+
# You can inspect turn_history here if needed:
|
|
119
|
+
# ASCIIColors.debug(f"Current RAG Turn History: {turn_history}")
|
|
120
|
+
return True
|
|
121
|
+
|
|
122
|
+
# --- Main Example ---
|
|
123
|
+
if __name__ == "__main__":
|
|
124
|
+
ASCIIColors.red("--- Multi-Hop RAG Example with LollmsClient ---")
|
|
125
|
+
|
|
126
|
+
# LLM Configuration (use a model good at instruction following and JSON)
|
|
127
|
+
# Ensure your Ollama server is running and has this model pulled.
|
|
128
|
+
LLM_BINDING_NAME = "ollama"
|
|
129
|
+
LLM_MODEL_NAME = "qwen3:4b" # or llama3, phi3 etc.
|
|
130
|
+
# LLM_MODEL_NAME = "qwen2:1.5b" # Smaller model for quicker tests, but might struggle with complex JSON
|
|
131
|
+
|
|
132
|
+
try:
|
|
133
|
+
lc = LollmsClient(
|
|
134
|
+
binding_name=LLM_BINDING_NAME,
|
|
135
|
+
model_name=LLM_MODEL_NAME,
|
|
136
|
+
temperature=0.1, # Default temp for final answer if not overridden
|
|
137
|
+
# Other LollmsClient params as needed
|
|
138
|
+
)
|
|
139
|
+
ASCIIColors.green(f"LollmsClient initialized with LLM: {LLM_BINDING_NAME}/{LLM_MODEL_NAME}")
|
|
140
|
+
|
|
141
|
+
# --- Test Case 1: Classic RAG (max_rag_hops = 0) ---
|
|
142
|
+
ASCIIColors.cyan("\n\n--- Test Case 1: Classic RAG (max_rag_hops = 0) ---")
|
|
143
|
+
classic_rag_prompt = "What are the key features of Python?"
|
|
144
|
+
ASCIIColors.blue(f"User Prompt: {classic_rag_prompt}")
|
|
145
|
+
|
|
146
|
+
classic_rag_result = lc.generate_text_with_rag(
|
|
147
|
+
prompt=classic_rag_prompt,
|
|
148
|
+
rag_query_function=mock_rag_query_function,
|
|
149
|
+
# rag_query_text=None, # Will use `prompt` for query
|
|
150
|
+
max_rag_hops=0,
|
|
151
|
+
rag_top_k=2, # Get 2 best chunks
|
|
152
|
+
rag_min_similarity_percent=50.0,
|
|
153
|
+
streaming_callback=rag_streaming_callback,
|
|
154
|
+
n_predict=1024 # Max tokens for final answer
|
|
155
|
+
)
|
|
156
|
+
print("\n--- End of Classic RAG ---")
|
|
157
|
+
ASCIIColors.magenta("\nClassic RAG Final Output:")
|
|
158
|
+
print(json.dumps(classic_rag_result, indent=2))
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# --- Test Case 2: Multi-Hop RAG (max_rag_hops = 1) ---
|
|
162
|
+
ASCIIColors.cyan("\n\n--- Test Case 2: Multi-Hop RAG (max_rag_hops = 1) ---")
|
|
163
|
+
multihop_prompt_1 = "Compare Python and JavaScript for web development based on their common applications and core technologies."
|
|
164
|
+
ASCIIColors.blue(f"User Prompt: {multihop_prompt_1}")
|
|
165
|
+
|
|
166
|
+
multihop_rag_result_1 = lc.generate_text_with_rag(
|
|
167
|
+
prompt=multihop_prompt_1,
|
|
168
|
+
rag_query_function=mock_rag_query_function,
|
|
169
|
+
# rag_query_text="Python web development applications", # Optional: provide an initial query
|
|
170
|
+
max_rag_hops=1, # Allow one hop for LLM to refine search or decide
|
|
171
|
+
rag_top_k=2,
|
|
172
|
+
rag_min_similarity_percent=60.0,
|
|
173
|
+
streaming_callback=rag_streaming_callback,
|
|
174
|
+
n_predict=1024,
|
|
175
|
+
rag_hop_query_generation_temperature=0.1, # Focused query gen
|
|
176
|
+
rag_hop_summary_temperature=0.2 # Focused summary
|
|
177
|
+
)
|
|
178
|
+
print("\n--- End of Multi-Hop RAG (1 hop) ---")
|
|
179
|
+
ASCIIColors.magenta("\nMulti-Hop RAG (1 hop) Final Output:")
|
|
180
|
+
print(json.dumps(multihop_rag_result_1, indent=2))
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
# --- Test Case 3: Multi-Hop RAG (max_rag_hops = 2) - LLM might decide it has enough earlier ---
|
|
184
|
+
ASCIIColors.cyan("\n\n--- Test Case 3: Multi-Hop RAG (max_rag_hops = 2) ---")
|
|
185
|
+
multihop_prompt_2 = "Explain Retrieval Augmented Generation (RAG) and its relation to Machine Learning."
|
|
186
|
+
ASCIIColors.blue(f"User Prompt: {multihop_prompt_2}")
|
|
187
|
+
|
|
188
|
+
multihop_rag_result_2 = lc.generate_text_with_rag(
|
|
189
|
+
prompt=multihop_prompt_2,
|
|
190
|
+
rag_query_function=mock_rag_query_function,
|
|
191
|
+
max_rag_hops=2, # Allow up to two refinement hops
|
|
192
|
+
rag_top_k=1, # Get only the best chunk per hop to force more specific queries
|
|
193
|
+
rag_min_similarity_percent=50.0,
|
|
194
|
+
streaming_callback=rag_streaming_callback,
|
|
195
|
+
n_predict=300
|
|
196
|
+
)
|
|
197
|
+
print("\n--- End of Multi-Hop RAG (up to 2 hops) ---")
|
|
198
|
+
ASCIIColors.magenta("\nMulti-Hop RAG (up to 2 hops) Final Output:")
|
|
199
|
+
print(json.dumps(multihop_rag_result_2, indent=2))
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
except ValueError as ve:
|
|
203
|
+
ASCIIColors.error(f"Initialization or RAG parameter error: {ve}")
|
|
204
|
+
trace_exception(ve)
|
|
205
|
+
except ConnectionRefusedError:
|
|
206
|
+
ASCIIColors.error(f"Connection refused. Is the Ollama server ({LLM_BINDING_NAME}) running?")
|
|
207
|
+
except Exception as e:
|
|
208
|
+
ASCIIColors.error(f"An unexpected error occurred: {e}")
|
|
209
|
+
trace_exception(e)
|
|
210
|
+
|
|
211
|
+
ASCIIColors.red("\n--- Multi-Hop RAG Example Finished ---")
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
from lollms_client import LollmsClient, MSG_TYPE
|
|
2
|
+
from ascii_colors import ASCIIColors, trace_exception
|
|
3
|
+
from typing import List, Dict, Any, Optional, Callable
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
# --- Dependency Management for the Search Tool ---
|
|
8
|
+
# Ensure the duckduckgo_search library is installed for our RAG query function.
|
|
9
|
+
try:
|
|
10
|
+
import pipmaster as pm
|
|
11
|
+
pm.ensure_packages(["duckduckgo_search"])
|
|
12
|
+
from duckduckgo_search import DDGS
|
|
13
|
+
_ddgs_installed = True
|
|
14
|
+
except Exception as e_dep:
|
|
15
|
+
_ddgs_installed = False
|
|
16
|
+
ASCIIColors.error(f"Could not ensure/import duckduckgo_search: {e_dep}")
|
|
17
|
+
ASCIIColors.warning("The RAG function in this example will not work.")
|
|
18
|
+
DDGS = None
|
|
19
|
+
# --- End Dependency Management ---
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def internet_rag_query_function(
|
|
23
|
+
query_text: str,
|
|
24
|
+
vectorizer_name: Optional[str] = None, # Not used for this keyword-based search
|
|
25
|
+
top_k: int = 5,
|
|
26
|
+
min_similarity_percent: float = 0.0 # Not used for this keyword-based search
|
|
27
|
+
) -> List[Dict[str, Any]]:
|
|
28
|
+
"""
|
|
29
|
+
A RAG-compatible query function that performs a live internet search using DuckDuckGo.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
query_text: The search query.
|
|
33
|
+
vectorizer_name: Ignored by this function.
|
|
34
|
+
top_k: The maximum number of search results to return.
|
|
35
|
+
min_similarity_percent: Ignored by this function.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
A list of dictionaries, each formatted for RAG with 'document', 'content', and 'similarity'.
|
|
39
|
+
"""
|
|
40
|
+
if not _ddgs_installed:
|
|
41
|
+
ASCIIColors.error("duckduckgo_search library is not available. Cannot perform internet search.")
|
|
42
|
+
return []
|
|
43
|
+
|
|
44
|
+
ASCIIColors.magenta(f" [INTERNET RAG] Searching web for: '{query_text}', max_results={top_k}")
|
|
45
|
+
formatted_results = []
|
|
46
|
+
try:
|
|
47
|
+
with DDGS() as ddgs:
|
|
48
|
+
# Fetch search results from DuckDuckGo
|
|
49
|
+
search_results = ddgs.text(keywords=query_text, max_results=top_k)
|
|
50
|
+
|
|
51
|
+
if not search_results:
|
|
52
|
+
ASCIIColors.yellow(" [INTERNET RAG] DuckDuckGo returned no results for this query.")
|
|
53
|
+
return []
|
|
54
|
+
|
|
55
|
+
for i, result in enumerate(search_results):
|
|
56
|
+
# Format the search result into the structure expected by generate_text_with_rag
|
|
57
|
+
# 'document' will be the URL.
|
|
58
|
+
# 'content' will be a combination of title and snippet.
|
|
59
|
+
# 'similarity' is emulated based on rank, as DDG doesn't provide a score.
|
|
60
|
+
formatted_results.append({
|
|
61
|
+
"document": result.get("href", "#"),
|
|
62
|
+
"similarity": round(100.0 - (i * (10.0 / top_k)), 2), # Create a descending score
|
|
63
|
+
"content": f"Title: {result.get('title', 'N/A')}\nSnippet: {result.get('body', 'N/A')}"
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
ASCIIColors.magenta(f" [INTERNET RAG] Found {len(formatted_results)} results.")
|
|
67
|
+
return formatted_results
|
|
68
|
+
|
|
69
|
+
except Exception as e:
|
|
70
|
+
trace_exception(e)
|
|
71
|
+
ASCIIColors.error(f" [INTERNET RAG] An error occurred during search: {e}")
|
|
72
|
+
return []
|
|
73
|
+
|
|
74
|
+
# --- Streaming Callback for RAG and LLM ---
|
|
75
|
+
# (This is the same useful callback from the previous example)
|
|
76
|
+
def rag_streaming_callback(
|
|
77
|
+
chunk: str,
|
|
78
|
+
msg_type: MSG_TYPE,
|
|
79
|
+
metadata: Optional[Dict] = None,
|
|
80
|
+
turn_history: Optional[List] = None
|
|
81
|
+
) -> bool:
|
|
82
|
+
metadata = metadata or {}
|
|
83
|
+
hop = metadata.get("hop", "")
|
|
84
|
+
type_info = metadata.get("type", "N/A")
|
|
85
|
+
|
|
86
|
+
if msg_type == MSG_TYPE.MSG_TYPE_CHUNK:
|
|
87
|
+
ASCIIColors.success(chunk, end="", flush=True)
|
|
88
|
+
elif msg_type == MSG_TYPE.MSG_TYPE_STEP_START:
|
|
89
|
+
info = metadata.get("query", chunk) if type_info in ["rag_query_generation", "rag_retrieval"] else chunk
|
|
90
|
+
ASCIIColors.yellow(f"\n>> RAG Hop {hop} | START | {type_info.upper()} | Info: {str(info)[:100]}...", flush=True)
|
|
91
|
+
elif msg_type == MSG_TYPE.MSG_TYPE_STEP_END:
|
|
92
|
+
num_chunks = metadata.get("num_chunks")
|
|
93
|
+
query = metadata.get("query")
|
|
94
|
+
decision = metadata.get("decision")
|
|
95
|
+
|
|
96
|
+
end_info = []
|
|
97
|
+
if query: end_info.append(f"Query: '{str(query)[:50]}...'")
|
|
98
|
+
if num_chunks is not None: end_info.append(f"Retrieved: {num_chunks} sources")
|
|
99
|
+
if decision: end_info.append(f"LLM Decision: NeedMore={decision.get('need_more_data')}, Summary: '{str(decision.get('new_information_summary'))[:40]}...'")
|
|
100
|
+
|
|
101
|
+
ASCIIColors.green(f"\n<< RAG Hop {hop} | END | {type_info.upper()} | {' | '.join(end_info) if end_info else chunk}", flush=True)
|
|
102
|
+
elif msg_type == MSG_TYPE.MSG_TYPE_EXCEPTION:
|
|
103
|
+
ASCIIColors.error(f"\nError in RAG stream: {chunk}", flush=True)
|
|
104
|
+
|
|
105
|
+
return True
|
|
106
|
+
|
|
107
|
+
# --- Main Example ---
|
|
108
|
+
if __name__ == "__main__":
|
|
109
|
+
ASCIIColors.red("--- Internet Search with Multi-Hop RAG Example ---")
|
|
110
|
+
|
|
111
|
+
LLM_BINDING_NAME = "ollama"
|
|
112
|
+
LLM_MODEL_NAME = "mistral-nemo:latest" # Nemo is good with JSON and reasoning
|
|
113
|
+
|
|
114
|
+
if not _ddgs_installed:
|
|
115
|
+
ASCIIColors.error("Cannot run this example because the 'duckduckgo-search' library is not installed.")
|
|
116
|
+
exit(1)
|
|
117
|
+
|
|
118
|
+
try:
|
|
119
|
+
lc = LollmsClient(
|
|
120
|
+
binding_name=LLM_BINDING_NAME,
|
|
121
|
+
model_name=LLM_MODEL_NAME,
|
|
122
|
+
temperature=0.1,
|
|
123
|
+
ctx_size=4096
|
|
124
|
+
)
|
|
125
|
+
ASCIIColors.green(f"LollmsClient initialized with LLM: {LLM_BINDING_NAME}/{LLM_MODEL_NAME}")
|
|
126
|
+
|
|
127
|
+
# --- Test Case 1: Classic RAG with Internet Search ---
|
|
128
|
+
ASCIIColors.cyan("\n\n--- Test Case 1: Classic RAG (max_rag_hops = 0) using Internet Search ---")
|
|
129
|
+
classic_rag_prompt = "What is the James Webb Space Telescope and what was its launch date?"
|
|
130
|
+
ASCIIColors.blue(f"User Prompt: {classic_rag_prompt}")
|
|
131
|
+
|
|
132
|
+
classic_rag_result = lc.generate_text_with_rag(
|
|
133
|
+
prompt=classic_rag_prompt,
|
|
134
|
+
rag_query_function=internet_rag_query_function,
|
|
135
|
+
max_rag_hops=0,
|
|
136
|
+
rag_top_k=3,
|
|
137
|
+
streaming_callback=rag_streaming_callback,
|
|
138
|
+
n_predict=300
|
|
139
|
+
)
|
|
140
|
+
print("\n--- End of Classic RAG ---")
|
|
141
|
+
ASCIIColors.magenta("\nClassic RAG Final Output Details:")
|
|
142
|
+
print(f" Final Answer (first 150 chars): {classic_rag_result.get('final_answer', '')[:150]}...")
|
|
143
|
+
print(f" Error: {classic_rag_result.get('error')}")
|
|
144
|
+
print(f" Total Unique Sources Retrieved: {len(classic_rag_result.get('all_retrieved_sources', []))}")
|
|
145
|
+
if classic_rag_result.get('all_retrieved_sources'):
|
|
146
|
+
print(" Retrieved Sources (URLs):")
|
|
147
|
+
for source in classic_rag_result['all_retrieved_sources']:
|
|
148
|
+
print(f" - {source.get('document')}")
|
|
149
|
+
|
|
150
|
+
# --- Test Case 2: Multi-Hop RAG with Internet Search ---
|
|
151
|
+
ASCIIColors.cyan("\n\n--- Test Case 2: Multi-Hop RAG (max_rag_hops = 2) using Internet Search ---")
|
|
152
|
+
multihop_prompt = "First, find out what the TRAPPIST-1 system is. Then, search for recent news about its planets from the James Webb Space Telescope."
|
|
153
|
+
ASCIIColors.blue(f"User Prompt: {multihop_prompt}")
|
|
154
|
+
|
|
155
|
+
multihop_rag_result = lc.generate_text_with_rag(
|
|
156
|
+
prompt=multihop_prompt,
|
|
157
|
+
rag_query_function=internet_rag_query_function,
|
|
158
|
+
rag_query_text=None, # Let the LLM generate the first query
|
|
159
|
+
max_rag_hops=2, # Allow up to two separate search queries
|
|
160
|
+
rag_top_k=2,
|
|
161
|
+
streaming_callback=rag_streaming_callback,
|
|
162
|
+
n_predict=400,
|
|
163
|
+
)
|
|
164
|
+
print("\n--- End of Multi-Hop RAG ---")
|
|
165
|
+
ASCIIColors.magenta("\nMulti-Hop RAG Final Output Details:")
|
|
166
|
+
print(f" Final Answer (first 150 chars): {multihop_rag_result.get('final_answer', '')[:150]}...")
|
|
167
|
+
print(f" Error: {multihop_rag_result.get('error')}")
|
|
168
|
+
print(f" Number of Hops Made: {len(multihop_rag_result.get('rag_hops_history', []))}")
|
|
169
|
+
for i, hop_info in enumerate(multihop_rag_result.get('rag_hops_history', [])):
|
|
170
|
+
print(f" Hop {i+1} Query: '{hop_info.get('query')}'")
|
|
171
|
+
print(f" Hop {i+1} Retrieved Count: {len(hop_info.get('retrieved_chunks_details',[]))}")
|
|
172
|
+
print(f" Hop {i+1} LLM Decision: NeedMoreData={hop_info.get('llm_decision_json',{}).get('need_more_data')}")
|
|
173
|
+
print(f" Total Unique Sources Retrieved: {len(multihop_rag_result.get('all_retrieved_sources', []))}")
|
|
174
|
+
if multihop_rag_result.get('all_retrieved_sources'):
|
|
175
|
+
print(" All Retrieved Sources (URLs):")
|
|
176
|
+
for source in multihop_rag_result['all_retrieved_sources']:
|
|
177
|
+
print(f" - {source.get('document')}")
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
except ValueError as ve:
|
|
181
|
+
ASCIIColors.error(f"Initialization or RAG parameter error: {ve}")
|
|
182
|
+
trace_exception(ve)
|
|
183
|
+
except ConnectionRefusedError:
|
|
184
|
+
ASCIIColors.error(f"Connection refused. Is the Ollama server ({LLM_BINDING_NAME}) running?")
|
|
185
|
+
except Exception as e:
|
|
186
|
+
ASCIIColors.error(f"An unexpected error occurred: {e}")
|
|
187
|
+
trace_exception(e)
|
|
188
|
+
|
|
189
|
+
ASCIIColors.red("\n--- Internet Search RAG Example Finished ---")
|
lollms_client/__init__.py
CHANGED
|
@@ -7,7 +7,7 @@ from lollms_client.lollms_utilities import PromptReshaper # Keep general utiliti
|
|
|
7
7
|
from lollms_client.lollms_mcp_binding import LollmsMCPBinding, LollmsMCPBindingManager
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
__version__ = "0.19.
|
|
10
|
+
__version__ = "0.19.5" # Updated version
|
|
11
11
|
|
|
12
12
|
# Optionally, you could define __all__ if you want to be explicit about exports
|
|
13
13
|
__all__ = [
|
lollms_client/lollms_core.py
CHANGED
|
@@ -12,7 +12,7 @@ from lollms_client.lollms_ttv_binding import LollmsTTVBinding, LollmsTTVBindingM
|
|
|
12
12
|
from lollms_client.lollms_ttm_binding import LollmsTTMBinding, LollmsTTMBindingManager
|
|
13
13
|
from lollms_client.lollms_mcp_binding import LollmsMCPBinding, LollmsMCPBindingManager
|
|
14
14
|
|
|
15
|
-
import json
|
|
15
|
+
import json, re
|
|
16
16
|
from enum import Enum
|
|
17
17
|
import base64
|
|
18
18
|
import requests
|
|
@@ -853,6 +853,243 @@ Respond with a JSON object containing ONE of the following structures:
|
|
|
853
853
|
turn_history.append({"type":"final_answer_generated", "content":final_answer_text})
|
|
854
854
|
return {"final_answer": final_answer_text, "tool_calls": tool_calls_made_this_turn, "error": None}
|
|
855
855
|
|
|
856
|
+
# --- RAG ---
|
|
857
|
+
|
|
858
|
+
def generate_text_with_rag(
|
|
859
|
+
self,
|
|
860
|
+
prompt: str,
|
|
861
|
+
rag_query_function: Callable[[str, Optional[str], int, float], List[Dict[str, Any]]],
|
|
862
|
+
rag_query_text: Optional[str] = None,
|
|
863
|
+
rag_vectorizer_name: Optional[str] = None,
|
|
864
|
+
rag_top_k: int = 5,
|
|
865
|
+
rag_min_similarity_percent: float = 70.0,
|
|
866
|
+
max_rag_hops: int = 0,
|
|
867
|
+
images: Optional[List[str]] = None,
|
|
868
|
+
system_prompt: str = "",
|
|
869
|
+
n_predict: Optional[int] = None,
|
|
870
|
+
stream: Optional[bool] = None,
|
|
871
|
+
temperature: Optional[float] = None,
|
|
872
|
+
top_k: Optional[int] = None,
|
|
873
|
+
top_p: Optional[float] = None,
|
|
874
|
+
repeat_penalty: Optional[float] = None,
|
|
875
|
+
repeat_last_n: Optional[int] = None,
|
|
876
|
+
seed: Optional[int] = None,
|
|
877
|
+
n_threads: Optional[int] = None,
|
|
878
|
+
ctx_size: int | None = None,
|
|
879
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE, Optional[Dict], Optional[List]], bool]] = None,
|
|
880
|
+
rag_hop_query_generation_temperature: float = 0.2,
|
|
881
|
+
rag_hop_summary_temperature: float = 0.3,
|
|
882
|
+
**llm_generation_kwargs
|
|
883
|
+
) -> Dict[str, Any]:
|
|
884
|
+
if not self.binding:
|
|
885
|
+
return {"final_answer": "", "rag_hops_history": [], "all_retrieved_sources": [], "error": "LLM binding not initialized."}
|
|
886
|
+
|
|
887
|
+
turn_rag_history_for_callback: List[Dict[str, Any]] = []
|
|
888
|
+
accumulated_rag_context_str = ""
|
|
889
|
+
rag_hops_details_list: List[Dict[str, Any]] = []
|
|
890
|
+
all_unique_retrieved_chunks_map: Dict[str, Dict[str, Any]] = {} # To store unique chunks by content hash or path+text
|
|
891
|
+
current_query_for_rag = rag_query_text
|
|
892
|
+
original_user_prompt = prompt
|
|
893
|
+
|
|
894
|
+
for hop_count in range(max_rag_hops + 1):
|
|
895
|
+
if streaming_callback:
|
|
896
|
+
streaming_callback(f"Starting RAG Hop {hop_count + 1}", MSG_TYPE.MSG_TYPE_STEP, {"type": "rag_hop_start", "hop": hop_count + 1}, turn_rag_history_for_callback)
|
|
897
|
+
|
|
898
|
+
# 1. Determine/Generate RAG Query Text
|
|
899
|
+
if hop_count > 0 or (current_query_for_rag is None and max_rag_hops > 0):
|
|
900
|
+
if streaming_callback:
|
|
901
|
+
streaming_callback("LLM generating refined RAG query...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "rag_query_generation", "hop": hop_count + 1}, turn_rag_history_for_callback)
|
|
902
|
+
|
|
903
|
+
query_gen_prompt_parts = [
|
|
904
|
+
f"{self.system_full_header}You are an expert research assistant. Your task is to formulate the best possible search query to find information relevant to the user's original request, considering the information already gathered.",
|
|
905
|
+
f"{self.user_full_header}Original user request: '{original_user_prompt}'"
|
|
906
|
+
]
|
|
907
|
+
if accumulated_rag_context_str:
|
|
908
|
+
query_gen_prompt_parts.append(f"Information gathered so far (summaries):\n{accumulated_rag_context_str}")
|
|
909
|
+
if rag_hops_details_list:
|
|
910
|
+
query_gen_prompt_parts.append("Previous search attempts and their summarized findings:")
|
|
911
|
+
for prev_hop in rag_hops_details_list:
|
|
912
|
+
query_gen_prompt_parts.append(f" - Queried for: '{prev_hop['query']}', Summary: '{prev_hop.get('new_information_summary', 'N/A')}'")
|
|
913
|
+
|
|
914
|
+
query_gen_prompt_parts.append("Based on this, what is the most effective and specific search query to perform next to get closer to answering the user's request? Output only the search query text, nothing else.")
|
|
915
|
+
query_gen_prompt_parts.append(self.ai_full_header)
|
|
916
|
+
|
|
917
|
+
new_query_text_raw = self.remove_thinking_blocks(self.generate_text(prompt="".join(query_gen_prompt_parts), temperature=rag_hop_query_generation_temperature, n_predict=100, stream=False))
|
|
918
|
+
if isinstance(new_query_text_raw, dict) and "error" in new_query_text_raw:
|
|
919
|
+
return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"Failed to generate RAG query: {new_query_text_raw['error']}"}
|
|
920
|
+
|
|
921
|
+
current_query_for_rag = new_query_text_raw.strip().replace("Search query:", "").replace("Query:", "").strip("\"'")
|
|
922
|
+
|
|
923
|
+
if streaming_callback:
|
|
924
|
+
streaming_callback(f"Generated RAG query: {current_query_for_rag}", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "rag_query_generation", "hop": hop_count + 1, "query": current_query_for_rag}, turn_rag_history_for_callback)
|
|
925
|
+
|
|
926
|
+
elif current_query_for_rag is None and max_rag_hops == 0:
|
|
927
|
+
current_query_for_rag = original_user_prompt
|
|
928
|
+
|
|
929
|
+
if not current_query_for_rag:
|
|
930
|
+
if max_rag_hops > 0 and hop_count < max_rag_hops:
|
|
931
|
+
ASCIIColors.warning(f"RAG Hop {hop_count + 1}: Generated query was empty. Skipping hop.")
|
|
932
|
+
rag_hops_details_list.append({"query": "EMPTY_QUERY_SKIPPED", "retrieved_chunks_details": [], "new_information_summary": "Skipped due to empty query.", "llm_decision_json": {"need_more_data": True if hop_count < max_rag_hops -1 else False}})
|
|
933
|
+
turn_rag_history_for_callback.append({"type":"rag_hop_info", "hop": hop_count + 1, "query": "EMPTY_QUERY_SKIPPED", "summary":"Skipped."})
|
|
934
|
+
continue
|
|
935
|
+
else:
|
|
936
|
+
ASCIIColors.warning("RAG query is empty. Proceeding without RAG context.")
|
|
937
|
+
break
|
|
938
|
+
|
|
939
|
+
# 2. Perform RAG Query
|
|
940
|
+
if streaming_callback:
|
|
941
|
+
streaming_callback(f"Querying knowledge base for: '{current_query_for_rag}'...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "rag_retrieval", "hop": hop_count + 1, "query": current_query_for_rag}, turn_rag_history_for_callback)
|
|
942
|
+
|
|
943
|
+
try:
|
|
944
|
+
retrieved_chunks_raw = rag_query_function(current_query_for_rag, rag_vectorizer_name, rag_top_k, rag_min_similarity_percent)
|
|
945
|
+
except Exception as e_rag_query:
|
|
946
|
+
trace_exception(e_rag_query)
|
|
947
|
+
return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"RAG query function failed: {e_rag_query}"}
|
|
948
|
+
|
|
949
|
+
if streaming_callback:
|
|
950
|
+
streaming_callback(f"Retrieved {len(retrieved_chunks_raw)} chunks.", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "rag_retrieval", "hop": hop_count + 1, "num_chunks": len(retrieved_chunks_raw)}, turn_rag_history_for_callback)
|
|
951
|
+
|
|
952
|
+
current_hop_details = {"query": current_query_for_rag, "retrieved_chunks_details": []}
|
|
953
|
+
|
|
954
|
+
formatted_new_chunks_for_llm_summary = ""
|
|
955
|
+
if retrieved_chunks_raw:
|
|
956
|
+
for i, chunk in enumerate(retrieved_chunks_raw):
|
|
957
|
+
doc_path = chunk.get('file_path', 'Unknown Document')
|
|
958
|
+
similarity = chunk.get('similarity_percent', 'N/A')
|
|
959
|
+
content = chunk.get('chunk_text', '')
|
|
960
|
+
|
|
961
|
+
chunk_detail_for_history = {"document": doc_path, "similarity": similarity, "content": content}
|
|
962
|
+
current_hop_details["retrieved_chunks_details"].append(chunk_detail_for_history)
|
|
963
|
+
|
|
964
|
+
# Add to unique list for final output
|
|
965
|
+
# Use a combination of path and content to uniquely identify a chunk to avoid duplicates if same content appears from different queries.
|
|
966
|
+
# A more robust unique key might involve hashing content if it's very large.
|
|
967
|
+
unique_key = f"{doc_path}::{content[:100]}" # Simple key
|
|
968
|
+
if unique_key not in all_unique_retrieved_chunks_map:
|
|
969
|
+
all_unique_retrieved_chunks_map[unique_key] = chunk_detail_for_history
|
|
970
|
+
|
|
971
|
+
# Format for LLM processing (summary or direct use)
|
|
972
|
+
formatted_new_chunks_for_llm_summary += f"Document: {doc_path} (Similarity: {similarity}%)\nContent:\n{content}\n---\n"
|
|
973
|
+
|
|
974
|
+
if not retrieved_chunks_raw:
|
|
975
|
+
current_hop_details["new_information_summary"] = "No relevant information found for this query."
|
|
976
|
+
current_hop_details["llm_decision_json"] = {"need_more_data": True if max_rag_hops > 0 and hop_count < max_rag_hops -1 else False, "reasoning_for_decision":"No new information retrieved."}
|
|
977
|
+
rag_hops_details_list.append(current_hop_details)
|
|
978
|
+
turn_rag_history_for_callback.append({"type":"rag_hop_info", **current_hop_details})
|
|
979
|
+
if max_rag_hops == 0 or hop_count >= max_rag_hops -1 :
|
|
980
|
+
break
|
|
981
|
+
else:
|
|
982
|
+
accumulated_rag_context_str += f"\n\n---\nAttempted query: '{current_query_for_rag}' - No new information found.\n---"
|
|
983
|
+
continue
|
|
984
|
+
|
|
985
|
+
if max_rag_hops == 0: # Classic RAG
|
|
986
|
+
accumulated_rag_context_str += formatted_new_chunks_for_llm_summary
|
|
987
|
+
current_hop_details["new_information_summary"] = "Directly used in context (classic RAG)."
|
|
988
|
+
current_hop_details["llm_decision_json"] = {"need_more_data": False}
|
|
989
|
+
rag_hops_details_list.append(current_hop_details)
|
|
990
|
+
turn_rag_history_for_callback.append({"type":"rag_hop_info", **current_hop_details})
|
|
991
|
+
break
|
|
992
|
+
|
|
993
|
+
# Multi-hop: LLM summarizes and decides
|
|
994
|
+
if streaming_callback:
|
|
995
|
+
streaming_callback("LLM processing retrieved data and deciding next step...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "rag_llm_decision", "hop": hop_count + 1}, turn_rag_history_for_callback)
|
|
996
|
+
|
|
997
|
+
decision_prompt_llm_parts = [
|
|
998
|
+
f"{self.system_full_header}You are an AI research assistant. Analyze newly retrieved information against the user's request and prior knowledge, then decide if more searching is needed.",
|
|
999
|
+
f"{self.user_full_header}Original user request: '{original_user_prompt}'",
|
|
1000
|
+
]
|
|
1001
|
+
if accumulated_rag_context_str:
|
|
1002
|
+
decision_prompt_llm_parts.append(f"Current accumulated knowledge summary:\n{accumulated_rag_context_str}")
|
|
1003
|
+
decision_prompt_llm_parts.append(f"You just searched for: '{current_query_for_rag}'")
|
|
1004
|
+
decision_prompt_llm_parts.append(f"And found this new information:\n--- New Information Start ---\n{formatted_new_chunks_for_llm_summary}--- New Information End ---")
|
|
1005
|
+
decision_prompt_llm_parts.append(
|
|
1006
|
+
"Task: Provide a concise summary of ONLY the new information relevant to the original request. "
|
|
1007
|
+
"Then, assess if you now have sufficient information to comprehensively answer the user's original request or if another, more targeted search is necessary. "
|
|
1008
|
+
"Respond STRICTLY in the following JSON format, with no other text before or after the JSON block:"
|
|
1009
|
+
)
|
|
1010
|
+
json_template_for_decision = """
|
|
1011
|
+
{
|
|
1012
|
+
"new_information_summary": "<Your concise summary of ONLY the new_information relevant to the original_user_request. Focus on what's new and useful. If nothing new is relevant, state that.>",
|
|
1013
|
+
"need_more_data": <true_or_false>,
|
|
1014
|
+
"reasoning_for_decision": "<Briefly explain why you need more data or why you have enough. If needing more, suggest what kind of information is still missing.>"
|
|
1015
|
+
}
|
|
1016
|
+
"""
|
|
1017
|
+
decision_prompt_llm_parts.append(f"```json\n{json_template_for_decision}\n```")
|
|
1018
|
+
decision_prompt_llm_parts.append(self.ai_full_header)
|
|
1019
|
+
|
|
1020
|
+
llm_decision_json_str = self.generate_code(prompt="".join(decision_prompt_llm_parts), language="json", template=json_template_for_decision, temperature=rag_hop_summary_temperature, max_size=1024)
|
|
1021
|
+
|
|
1022
|
+
if isinstance(llm_decision_json_str, dict) and "error" in llm_decision_json_str:
|
|
1023
|
+
return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"LLM failed to make RAG decision: {llm_decision_json_str['error']}"}
|
|
1024
|
+
if not llm_decision_json_str:
|
|
1025
|
+
return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": "LLM provided empty decision for RAG hop."}
|
|
1026
|
+
|
|
1027
|
+
try:
|
|
1028
|
+
llm_decision = json.loads(llm_decision_json_str)
|
|
1029
|
+
except json.JSONDecodeError:
|
|
1030
|
+
try:
|
|
1031
|
+
match = re.search(r"```json\s*(\{.*?\})\s*```", llm_decision_json_str, re.DOTALL)
|
|
1032
|
+
if match: llm_decision = json.loads(match.group(1))
|
|
1033
|
+
else: llm_decision = json.loads(self.extract_code_blocks(llm_decision_json_str, format="markdown")[0]["content"])
|
|
1034
|
+
except Exception as e_json_parse:
|
|
1035
|
+
trace_exception(e_json_parse)
|
|
1036
|
+
return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"Failed to parse LLM RAG decision JSON: {llm_decision_json_str}. Error: {e_json_parse}"}
|
|
1037
|
+
|
|
1038
|
+
new_summary = llm_decision.get("new_information_summary", "Summary not provided by LLM.")
|
|
1039
|
+
need_more_data = llm_decision.get("need_more_data", True)
|
|
1040
|
+
|
|
1041
|
+
current_hop_details["new_information_summary"] = new_summary
|
|
1042
|
+
current_hop_details["llm_decision_json"] = llm_decision
|
|
1043
|
+
rag_hops_details_list.append(current_hop_details)
|
|
1044
|
+
turn_rag_history_for_callback.append({"type":"rag_hop_info", **current_hop_details})
|
|
1045
|
+
|
|
1046
|
+
if streaming_callback:
|
|
1047
|
+
streaming_callback(f"LLM decision: Summary='{new_summary[:100]}...', NeedMoreData={need_more_data}", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "rag_llm_decision", "hop": hop_count + 1, "decision": llm_decision}, turn_rag_history_for_callback)
|
|
1048
|
+
|
|
1049
|
+
accumulated_rag_context_str += f"\n\n--- Summary of findings from query '{current_query_for_rag}' (Hop {hop_count + 1}) ---\n{new_summary}\n---"
|
|
1050
|
+
|
|
1051
|
+
if not need_more_data or hop_count >= max_rag_hops -1 : # Subtract 1 because current hop is finishing
|
|
1052
|
+
break
|
|
1053
|
+
|
|
1054
|
+
# 4. Final Answer Generation
|
|
1055
|
+
if streaming_callback:
|
|
1056
|
+
streaming_callback("LLM generating final answer using all gathered information...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "final_answer_generation"}, turn_rag_history_for_callback)
|
|
1057
|
+
|
|
1058
|
+
final_answer_prompt_parts = []
|
|
1059
|
+
if system_prompt:
|
|
1060
|
+
final_answer_prompt_parts.append(f"{self.system_full_header}{system_prompt}")
|
|
1061
|
+
|
|
1062
|
+
final_answer_prompt_parts.append(f"{self.user_full_header}Original request: {original_user_prompt}")
|
|
1063
|
+
if accumulated_rag_context_str:
|
|
1064
|
+
final_answer_prompt_parts.append(f"\nBased on the information I have gathered:\n--- Gathered Context Start ---\n{accumulated_rag_context_str.strip()}\n--- Gathered Context End ---")
|
|
1065
|
+
else:
|
|
1066
|
+
final_answer_prompt_parts.append("\n(No specific information was retrieved from the knowledge base for this request.)")
|
|
1067
|
+
|
|
1068
|
+
final_answer_prompt_parts.append("\nPlease provide a comprehensive answer to the original request using ONLY the provided gathered context. If the context is insufficient, clearly state that.")
|
|
1069
|
+
final_answer_prompt_parts.append(self.ai_full_header)
|
|
1070
|
+
|
|
1071
|
+
final_answer_llm_prompt = "\n".join(final_answer_prompt_parts)
|
|
1072
|
+
|
|
1073
|
+
final_answer_streaming_callback = None
|
|
1074
|
+
if streaming_callback:
|
|
1075
|
+
def final_answer_cb_adapter(chunk, msg_type):
|
|
1076
|
+
return streaming_callback(chunk, msg_type, {"type": "final_answer_chunk"}, turn_rag_history_for_callback)
|
|
1077
|
+
final_answer_streaming_callback = final_answer_cb_adapter
|
|
1078
|
+
|
|
1079
|
+
final_answer_text = self.remove_thinking_blocks(self.generate_text(
|
|
1080
|
+
prompt=final_answer_llm_prompt, images=images,
|
|
1081
|
+
n_predict=n_predict, stream=stream, temperature=temperature, top_k=top_k, top_p=top_p,
|
|
1082
|
+
repeat_penalty=repeat_penalty, repeat_last_n=repeat_last_n, seed=seed, n_threads=n_threads,
|
|
1083
|
+
ctx_size=ctx_size, streaming_callback=final_answer_streaming_callback, **llm_generation_kwargs
|
|
1084
|
+
))
|
|
1085
|
+
|
|
1086
|
+
if streaming_callback:
|
|
1087
|
+
streaming_callback("Final answer generation complete.", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "final_answer_generation"}, turn_rag_history_for_callback)
|
|
1088
|
+
|
|
1089
|
+
if isinstance(final_answer_text, dict) and "error" in final_answer_text:
|
|
1090
|
+
return {"final_answer": "", "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": f"Final answer generation failed: {final_answer_text['error']}"}
|
|
1091
|
+
|
|
1092
|
+
return {"final_answer": final_answer_text, "rag_hops_history": rag_hops_details_list, "all_retrieved_sources": list(all_unique_retrieved_chunks_map.values()), "error": None}
|
|
856
1093
|
|
|
857
1094
|
def generate_code(
|
|
858
1095
|
self,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lollms_client
|
|
3
|
-
Version: 0.19.
|
|
3
|
+
Version: 0.19.5
|
|
4
4
|
Summary: A client library for LoLLMs generate endpoint
|
|
5
5
|
Author-email: ParisNeo <parisneoai@gmail.com>
|
|
6
6
|
License: Apache Software License
|
|
@@ -39,7 +39,7 @@ Dynamic: license-file
|
|
|
39
39
|
[](https://github.com/ParisNeo/lollms_client/stargazers/)
|
|
40
40
|
[](https://github.com/ParisNeo/lollms_client/issues)
|
|
41
41
|
|
|
42
|
-
**`lollms_client`** is a powerful and flexible Python library designed to simplify interactions with the **LoLLMs (Lord of Large Language Models)** ecosystem and various other Large Language Model (LLM) backends. It provides a unified API for text generation, multimodal operations (text-to-image, text-to-speech, etc.), function calling
|
|
42
|
+
**`lollms_client`** is a powerful and flexible Python library designed to simplify interactions with the **LoLLMs (Lord of Large Language Models)** ecosystem and various other Large Language Model (LLM) backends. It provides a unified API for text generation, multimodal operations (text-to-image, text-to-speech, etc.), and robust function calling through the Model Context Protocol (MCP).
|
|
43
43
|
|
|
44
44
|
Whether you're connecting to a remote LoLLMs server, an Ollama instance, the OpenAI API, or running models locally using GGUF (via `llama-cpp-python` or a managed `llama.cpp` server), Hugging Face Transformers, or vLLM, `lollms-client` offers a consistent and developer-friendly experience.
|
|
45
45
|
|
|
@@ -47,12 +47,12 @@ Whether you're connecting to a remote LoLLMs server, an Ollama instance, the Ope
|
|
|
47
47
|
|
|
48
48
|
* 🔌 **Versatile Binding System:** Seamlessly switch between different LLM backends (LoLLMs, Ollama, OpenAI, Llama.cpp, Transformers, vLLM, OpenLLM) without major code changes.
|
|
49
49
|
* 🗣️ **Multimodal Support:** Interact with models capable of processing images and generate various outputs like speech (TTS) and images (TTI).
|
|
50
|
-
*
|
|
51
|
-
*
|
|
52
|
-
* 📞 **Function Calling:** Enable LLMs to invoke your custom Python functions, bridging the gap between language models and external tools or data sources.
|
|
50
|
+
* 🤖 **Function Calling with MCP:** Empowers LLMs to use external tools and functions through the Model Context Protocol (MCP), with built-in support for local Python tool execution via `local_mcp` binding and its default tools (file I/O, internet search, Python interpreter, image generation).
|
|
51
|
+
* 🚀 **Streaming & Callbacks:** Efficiently handle real-time text generation with customizable callback functions, including during MCP interactions.
|
|
53
52
|
* 💬 **Discussion Management:** Utilities to easily manage and format conversation histories for chat applications.
|
|
54
53
|
* ⚙️ **Configuration Management:** Flexible ways to configure bindings and generation parameters.
|
|
55
|
-
* 🧩 **Extensible:** Designed to easily incorporate new LLM backends and modality services.
|
|
54
|
+
* 🧩 **Extensible:** Designed to easily incorporate new LLM backends and modality services, including custom MCP toolsets.
|
|
55
|
+
* 📝 **High-Level Operations:** Includes convenience methods for complex tasks like sequential summarization and deep text analysis directly within `LollmsClient`.
|
|
56
56
|
|
|
57
57
|
## Installation
|
|
58
58
|
|
|
@@ -119,12 +119,61 @@ except Exception as e:
|
|
|
119
119
|
|
|
120
120
|
```
|
|
121
121
|
|
|
122
|
+
### Function Calling with MCP
|
|
123
|
+
|
|
124
|
+
`lollms-client` supports robust function calling via the Model Context Protocol (MCP), allowing LLMs to interact with your custom Python tools or pre-defined utilities.
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
from lollms_client import LollmsClient, MSG_TYPE
|
|
128
|
+
from ascii_colors import ASCIIColors
|
|
129
|
+
import json # For pretty printing results
|
|
130
|
+
|
|
131
|
+
# Example callback for MCP streaming
|
|
132
|
+
def mcp_stream_callback(chunk: str, msg_type: MSG_TYPE, metadata: dict = None, turn_history: list = None) -> bool:
|
|
133
|
+
if msg_type == MSG_TYPE.MSG_TYPE_CHUNK: ASCIIColors.success(chunk, end="", flush=True) # LLM's final answer or thought process
|
|
134
|
+
elif msg_type == MSG_TYPE.MSG_TYPE_STEP_START: ASCIIColors.info(f"\n>> MCP Step Start: {metadata.get('tool_name', chunk)}", flush=True)
|
|
135
|
+
elif msg_type == MSG_TYPE.MSG_TYPE_STEP_END: ASCIIColors.success(f"\n<< MCP Step End: {metadata.get('tool_name', chunk)} -> Result: {json.dumps(metadata.get('result', ''))}", flush=True)
|
|
136
|
+
elif msg_type == MSG_TYPE.MSG_TYPE_INFO and metadata and metadata.get("type") == "tool_call_request": ASCIIColors.info(f"\nAI requests: {metadata.get('name')}({metadata.get('params')})", flush=True)
|
|
137
|
+
return True
|
|
138
|
+
|
|
139
|
+
try:
|
|
140
|
+
# Initialize LollmsClient with an LLM binding and the local_mcp binding
|
|
141
|
+
lc = LollmsClient(
|
|
142
|
+
binding_name="ollama", model_name="mistral", # Example LLM
|
|
143
|
+
mcp_binding_name="local_mcp" # Enables default tools (file_writer, internet_search, etc.)
|
|
144
|
+
# or custom tools if mcp_binding_config.tools_folder_path is set.
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
user_query = "What were the main AI headlines last week and write a summary to 'ai_news.txt'?"
|
|
148
|
+
ASCIIColors.blue(f"User Query: {user_query}")
|
|
149
|
+
ASCIIColors.yellow("AI Processing with MCP (streaming):")
|
|
150
|
+
|
|
151
|
+
mcp_result = lc.generate_with_mcp(
|
|
152
|
+
prompt=user_query,
|
|
153
|
+
streaming_callback=mcp_stream_callback
|
|
154
|
+
)
|
|
155
|
+
print("\n--- End of MCP Interaction ---")
|
|
156
|
+
|
|
157
|
+
if mcp_result.get("error"):
|
|
158
|
+
ASCIIColors.error(f"MCP Error: {mcp_result['error']}")
|
|
159
|
+
else:
|
|
160
|
+
ASCIIColors.cyan(f"\nFinal Answer from AI: {mcp_result.get('final_answer', 'N/A')}")
|
|
161
|
+
ASCIIColors.magenta("\nTool Calls Made:")
|
|
162
|
+
for tc in mcp_result.get("tool_calls", []):
|
|
163
|
+
print(f" - Tool: {tc.get('name')}, Params: {tc.get('params')}, Result (first 50 chars): {str(tc.get('result'))[:50]}...")
|
|
164
|
+
|
|
165
|
+
except Exception as e:
|
|
166
|
+
ASCIIColors.error(f"An error occurred in MCP example: {e}")
|
|
167
|
+
trace_exception(e) # Assuming you have trace_exception utility
|
|
168
|
+
```
|
|
169
|
+
For a comprehensive guide on function calling and setting up tools, please refer to the [Usage Guide (DOC_USE.md)](DOC_USE.md).
|
|
170
|
+
|
|
122
171
|
## Documentation
|
|
123
172
|
|
|
124
173
|
For more in-depth information, please refer to:
|
|
125
174
|
|
|
126
|
-
* **[Usage Guide (DOC_USE.md)](DOC_USE.md):** Learn how to use `LollmsClient`, different bindings, modality features,
|
|
127
|
-
* **[Developer Guide (DOC_DEV.md)](DOC_DEV.md):** Understand the architecture, how to create new bindings, and contribute to the library.
|
|
175
|
+
* **[Usage Guide (DOC_USE.md)](DOC_USE.md):** Learn how to use `LollmsClient`, different bindings, modality features, function calling with MCP, and high-level operations.
|
|
176
|
+
* **[Developer Guide (DOC_DEV.md)](DOC_DEV.md):** Understand the architecture, how to create new bindings (LLM, modality, MCP), and contribute to the library.
|
|
128
177
|
|
|
129
178
|
## Core Concepts
|
|
130
179
|
|
|
@@ -134,8 +183,9 @@ graph LR
|
|
|
134
183
|
|
|
135
184
|
subgraph LollmsClient_Core
|
|
136
185
|
LC -- Manages --> LLB[LLM Binding];
|
|
137
|
-
LC --
|
|
138
|
-
LC --
|
|
186
|
+
LC -- Manages --> MCPB[MCP Binding];
|
|
187
|
+
LC -- Orchestrates --> MCP_Interaction[generate_with_mcp];
|
|
188
|
+
LC -- Provides --> HighLevelOps[High-Level Ops<br>(summarize, deep_analyze etc.)];
|
|
139
189
|
LC -- Provides Access To --> DM[DiscussionManager];
|
|
140
190
|
LC -- Provides Access To --> ModalityBindings[TTS, TTI, STT etc.];
|
|
141
191
|
end
|
|
@@ -148,14 +198,19 @@ graph LR
|
|
|
148
198
|
LLB --> LocalHF[Local HuggingFace<br>(transformers / vLLM)];
|
|
149
199
|
end
|
|
150
200
|
|
|
151
|
-
|
|
201
|
+
MCP_Interaction --> MCPB;
|
|
202
|
+
MCPB --> LocalTools[Local Python Tools<br>(via local_mcp)];
|
|
203
|
+
MCPB --> RemoteTools[Remote MCP Tool Servers<br>(Future Potential)];
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
ModalityBindings --> ModalityServices[Modality Services<br>(e.g., LoLLMs Server TTS/TTI, local Bark/XTTS)];
|
|
152
207
|
```
|
|
153
208
|
|
|
154
|
-
* **`LollmsClient`**: The central class for all interactions. It holds the currently active LLM binding and provides access to modality bindings and
|
|
209
|
+
* **`LollmsClient`**: The central class for all interactions. It holds the currently active LLM binding, an optional MCP binding, and provides access to modality bindings and high-level operations.
|
|
155
210
|
* **LLM Bindings**: These are plugins that allow `LollmsClient` to communicate with different LLM backends. You choose a binding (e.g., `"ollama"`, `"lollms"`, `"pythonllamacpp"`) when you initialize `LollmsClient`.
|
|
211
|
+
* **🔧 MCP Bindings**: Enable tool use and function calling. `lollms-client` includes `local_mcp` for executing Python tools. It discovers tools from a specified folder (or uses its default set), each defined by a `.py` script and a `.mcp.json` metadata file.
|
|
156
212
|
* **Modality Bindings**: Similar to LLM bindings, but for services like Text-to-Speech (`tts`), Text-to-Image (`tti`), etc.
|
|
157
|
-
*
|
|
158
|
-
* **`FunctionCalling_Library`**: Enables you to define Python functions that the LLM can request to execute, allowing for tool usage.
|
|
213
|
+
* **High-Level Operations**: Methods directly on `LollmsClient` (e.g., `sequential_summarize`, `deep_analyze`, `generate_code`, `yes_no`) for performing complex, multi-step AI tasks.
|
|
159
214
|
* **`LollmsDiscussion`**: Helps manage and format conversation histories for chat applications.
|
|
160
215
|
|
|
161
216
|
## Examples
|
|
@@ -164,8 +219,8 @@ The `examples/` directory in this repository contains a rich set of scripts demo
|
|
|
164
219
|
* Basic text generation with different bindings.
|
|
165
220
|
* Streaming and non-streaming examples.
|
|
166
221
|
* Multimodal generation (text with images).
|
|
167
|
-
* Using
|
|
168
|
-
* Implementing and using function calls.
|
|
222
|
+
* Using built-in methods for summarization and Q&A.
|
|
223
|
+
* Implementing and using function calls with **`generate_with_mcp`** and the `local_mcp` binding (see `examples/function_calling_with_local_custom_mcp.py` and `examples/local_mcp.py`).
|
|
169
224
|
* Text-to-Speech and Text-to-Image generation.
|
|
170
225
|
|
|
171
226
|
Explore these examples to see `lollms-client` in action!
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
examples/function_calling_with_local_custom_mcp.py,sha256=g6wOFRB8-p9Cv7hKmQaGzPvtMX3H77gas01QVNEOduM,12407
|
|
2
|
+
examples/generate_text_with_multihop_rag_example.py,sha256=Z6TbVqThOCHNK6kzIqRnEi76JDxIFlg1-IIMWehZars,11582
|
|
3
|
+
examples/internet_search_with_rag.py,sha256=sw0B4Nea6D5TeGtVdh17imm2E3IassGhUTTL4LkqfuY,9150
|
|
2
4
|
examples/local_mcp.py,sha256=w40dgayvHYe01yvekEE0LjcbkpwKjWwJ-9v4_wGYsUk,9113
|
|
3
5
|
examples/simple_text_gen_test.py,sha256=RoX9ZKJjGMujeep60wh5WT_GoBn0O9YKJY6WOy-ZmOc,8710
|
|
4
6
|
examples/simple_text_gen_with_image_test.py,sha256=rR1O5Prcb52UHtJ3c6bv7VuTd1cvbkr5aNZU-v-Rs3Y,9263
|
|
@@ -17,9 +19,9 @@ examples/personality_test/chat_test.py,sha256=o2jlpoddFc-T592iqAiA29xk3x27KsdK5D
|
|
|
17
19
|
examples/personality_test/chat_with_aristotle.py,sha256=4X_fwubMpd0Eq2rCReS2bgVlUoAqJprjkLXk2Jz6pXU,1774
|
|
18
20
|
examples/personality_test/tesks_test.py,sha256=7LIiwrEbva9WWZOLi34fsmCBN__RZbPpxoUOKA_AtYk,1924
|
|
19
21
|
examples/test_local_models/local_chat.py,sha256=slakja2zaHOEAUsn2tn_VmI4kLx6luLBrPqAeaNsix8,456
|
|
20
|
-
lollms_client/__init__.py,sha256=
|
|
22
|
+
lollms_client/__init__.py,sha256=pR9LDMi5tPNW-WpvXcAiKcOdKjfuNvqSrKApzIFAn8o,910
|
|
21
23
|
lollms_client/lollms_config.py,sha256=goEseDwDxYJf3WkYJ4IrLXwg3Tfw73CXV2Avg45M_hE,21876
|
|
22
|
-
lollms_client/lollms_core.py,sha256=
|
|
24
|
+
lollms_client/lollms_core.py,sha256=56wntZAimRfx0qBR_96_1h9_ZKuvY1Uq_kMVs9xg-dE,119768
|
|
23
25
|
lollms_client/lollms_discussion.py,sha256=9b83m0D894jwpgssWYTQHbVxp1gJoI-J947Ui_dRXII,2073
|
|
24
26
|
lollms_client/lollms_js_analyzer.py,sha256=01zUvuO2F_lnUe_0NLxe1MF5aHE1hO8RZi48mNPv-aw,8361
|
|
25
27
|
lollms_client/lollms_llm_binding.py,sha256=bdElz_IBx0zZ-85YTT1fyY_mSoHo46tKIMiHYJlKCkM,9809
|
|
@@ -67,8 +69,8 @@ lollms_client/tts_bindings/piper_tts/__init__.py,sha256=0IEWG4zH3_sOkSb9WbZzkeV5
|
|
|
67
69
|
lollms_client/tts_bindings/xtts/__init__.py,sha256=FgcdUH06X6ZR806WQe5ixaYx0QoxtAcOgYo87a2qxYc,18266
|
|
68
70
|
lollms_client/ttv_bindings/__init__.py,sha256=UZ8o2izQOJLQgtZ1D1cXoNST7rzqW22rL2Vufc7ddRc,3141
|
|
69
71
|
lollms_client/ttv_bindings/lollms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
70
|
-
lollms_client-0.19.
|
|
71
|
-
lollms_client-0.19.
|
|
72
|
-
lollms_client-0.19.
|
|
73
|
-
lollms_client-0.19.
|
|
74
|
-
lollms_client-0.19.
|
|
72
|
+
lollms_client-0.19.5.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
73
|
+
lollms_client-0.19.5.dist-info/METADATA,sha256=c7UkzPm39_qA9TpFQSTqoZSfGC0SomSnIoKiHJWbSdc,13374
|
|
74
|
+
lollms_client-0.19.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
75
|
+
lollms_client-0.19.5.dist-info/top_level.txt,sha256=NI_W8S4OYZvJjb0QWMZMSIpOrYzpqwPGYaklhyWKH2w,23
|
|
76
|
+
lollms_client-0.19.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|