lollms-client 0.20.3__py3-none-any.whl → 0.20.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- examples/gradio_chat_app.py +228 -0
- examples/internet_search_with_rag.py +1 -2
- examples/run_remote_mcp_example copy.py +226 -0
- lollms_client/__init__.py +2 -2
- lollms_client/llm_bindings/llamacpp/__init__.py +104 -0
- lollms_client/llm_bindings/lollms/__init__.py +102 -1
- lollms_client/llm_bindings/ollama/__init__.py +99 -0
- lollms_client/llm_bindings/openai/__init__.py +109 -0
- lollms_client/lollms_core.py +60 -0
- lollms_client/lollms_discussion.py +478 -33
- lollms_client/lollms_llm_binding.py +43 -0
- lollms_client/mcp_bindings/remote_mcp/__init__.py +233 -132
- {lollms_client-0.20.3.dist-info → lollms_client-0.20.6.dist-info}/METADATA +1 -1
- {lollms_client-0.20.3.dist-info → lollms_client-0.20.6.dist-info}/RECORD +17 -15
- {lollms_client-0.20.3.dist-info → lollms_client-0.20.6.dist-info}/WHEEL +0 -0
- {lollms_client-0.20.3.dist-info → lollms_client-0.20.6.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-0.20.3.dist-info → lollms_client-0.20.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
# final_working_chat_app.py
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
import os
|
|
5
|
+
import json
|
|
6
|
+
import gradio as gr
|
|
7
|
+
import requests
|
|
8
|
+
from typing import List, Dict, Optional, Tuple
|
|
9
|
+
|
|
10
|
+
# --- Dependency Installation ---
|
|
11
|
+
try:
|
|
12
|
+
import pipmaster as pm
|
|
13
|
+
print("Pipmaster found. Ensuring dependencies are installed...")
|
|
14
|
+
pm.ensure_packages(["gradio", "requests", "ascii_colors"])
|
|
15
|
+
except ImportError:
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
# --- Import Core Components ---
|
|
19
|
+
try:
|
|
20
|
+
from lollms_client import LollmsClient
|
|
21
|
+
from lollms_client.lollms_discussion import LollmsDiscussion
|
|
22
|
+
from ascii_colors import ASCIIColors
|
|
23
|
+
except ImportError as e:
|
|
24
|
+
print(f"\nFATAL: A required library is missing.\nPlease ensure lollms-client and ascii_colors are installed.")
|
|
25
|
+
print(f"Error: {e}"); sys.exit(1)
|
|
26
|
+
|
|
27
|
+
# --- Standalone Helper Functions for LollmsDiscussion ---
|
|
28
|
+
def export_for_chatbot(discussion: Optional[LollmsDiscussion]) -> List[Dict[str, str]]:
|
|
29
|
+
if not discussion: return []
|
|
30
|
+
branch = discussion.get_branch(discussion.active_branch_id)
|
|
31
|
+
return [{"role": discussion.participants.get(msg.sender, "user"), "content": msg.content} for msg in branch]
|
|
32
|
+
|
|
33
|
+
def render_discussion_tree(discussion: Optional[LollmsDiscussion]) -> str:
|
|
34
|
+
if not discussion or not discussion.messages: return "No messages yet."
|
|
35
|
+
tree_markdown = "### Discussion Tree\n\n"; root_ids = [msg.id for msg in discussion.messages if msg.parent_id is None]
|
|
36
|
+
def _render_node(node_id: str, depth: int) -> str:
|
|
37
|
+
node = discussion.message_index.get(node_id)
|
|
38
|
+
if not node: return ""
|
|
39
|
+
is_active = " <span class='activ'>[ACTIVE]</span>" if node.id == discussion.active_branch_id else ""
|
|
40
|
+
line = f"{' ' * depth}- **{node.sender}**: _{node.content.replace(chr(10), ' ').strip()[:60]}..._{is_active}\n"
|
|
41
|
+
for child_id in discussion.children_index.get(node.id, []): line += _render_node(child_id, depth + 1)
|
|
42
|
+
return line
|
|
43
|
+
for root_id in root_ids: tree_markdown += _render_node(root_id, 0)
|
|
44
|
+
return tree_markdown
|
|
45
|
+
|
|
46
|
+
def get_message_choices(discussion: Optional[LollmsDiscussion]) -> List[tuple]:
|
|
47
|
+
if not discussion: return []
|
|
48
|
+
return [(f"{msg.sender}: {msg.content[:40]}...", msg.id) for msg in discussion.messages]
|
|
49
|
+
|
|
50
|
+
# --- Configuration & File Management ---
|
|
51
|
+
CONFIG_FILE = "config.json"; DISCUSSIONS_DIR = "discussions"; os.makedirs(DISCUSSIONS_DIR, exist_ok=True)
|
|
52
|
+
DEFAULT_CONFIG = {"binding_name": "ollama", "model_name": "mistral:latest", "host_address": "http://localhost:11434", "openai_api_key": "", "openai_model_name": "gpt-4o"}
|
|
53
|
+
def load_config() -> Dict:
|
|
54
|
+
if os.path.exists(CONFIG_FILE):
|
|
55
|
+
try:
|
|
56
|
+
with open(CONFIG_FILE, 'r') as f: ASCIIColors.info(f"Loaded config from {CONFIG_FILE}"); return json.load(f)
|
|
57
|
+
except: ASCIIColors.warning(f"Could not load {CONFIG_FILE}, using defaults."); return DEFAULT_CONFIG
|
|
58
|
+
return DEFAULT_CONFIG
|
|
59
|
+
def save_config(config: Dict):
|
|
60
|
+
with open(CONFIG_FILE, 'w') as f: json.dump(config, f, indent=2); ASCIIColors.green(f"Saved config to {CONFIG_FILE}")
|
|
61
|
+
|
|
62
|
+
# --- LollmsClient & Discussion Management ---
|
|
63
|
+
def create_lollms_client(config: Dict) -> Optional[LollmsClient]:
|
|
64
|
+
try:
|
|
65
|
+
if config["binding_name"] == "ollama": client = LollmsClient(binding_name="ollama", host_address=config["host_address"], model_name=config["model_name"])
|
|
66
|
+
elif config["binding_name"] == "openai":
|
|
67
|
+
if not config.get("openai_api_key"): gr.Warning("OpenAI API key missing."); return None
|
|
68
|
+
client = LollmsClient(binding_name="openai", model_name=config["openai_model_name"], service_key=config["openai_api_key"])
|
|
69
|
+
else: gr.Warning(f"Unsupported binding: {config['binding_name']}"); return None
|
|
70
|
+
ASCIIColors.green("LollmsClient created successfully."); return client
|
|
71
|
+
except Exception as e: gr.Error(f"Failed to create LollmsClient: {e}"); return None
|
|
72
|
+
def get_discussions_list() -> List[str]: return sorted([f for f in os.listdir(DISCUSSIONS_DIR) if f.endswith(".yaml")])
|
|
73
|
+
def load_discussion(filename: str, client: LollmsClient) -> Optional[LollmsDiscussion]:
|
|
74
|
+
if not client: ASCIIColors.warning("Cannot load discussion: client is not initialized."); return None
|
|
75
|
+
try:
|
|
76
|
+
discussion = LollmsDiscussion(client); discussion.load_from_disk(os.path.join(DISCUSSIONS_DIR, filename))
|
|
77
|
+
ASCIIColors.info(f"Loaded discussion: {filename}"); return discussion
|
|
78
|
+
except Exception as e: gr.Error(f"Failed to load discussion {filename}: {e}"); return None
|
|
79
|
+
def list_ollama_models(host: str) -> List[str]:
|
|
80
|
+
try:
|
|
81
|
+
r = requests.get(f"{host}/api/tags"); r.raise_for_status(); return [m["name"] for m in r.json().get("models", [])]
|
|
82
|
+
except: gr.Warning(f"Could not fetch models from {host}."); return []
|
|
83
|
+
|
|
84
|
+
# --- Gradio UI & Logic ---
|
|
85
|
+
with gr.Blocks(theme=gr.themes.Soft(), css=".activ { font-weight: bold; color: #FF4B4B; }") as demo:
|
|
86
|
+
client_state = gr.State()
|
|
87
|
+
discussion_state = gr.State()
|
|
88
|
+
|
|
89
|
+
gr.Markdown("# 🌿 Multi-Branch Discussion App")
|
|
90
|
+
with gr.Row():
|
|
91
|
+
with gr.Column(scale=1):
|
|
92
|
+
gr.Markdown("### 📝 Session & Branch Management")
|
|
93
|
+
discussion_selector = gr.Dropdown(label="Load Discussion", interactive=True)
|
|
94
|
+
new_discussion_name = gr.Textbox(label="New Discussion Name", placeholder="Enter name and press Enter...")
|
|
95
|
+
delete_discussion_button = gr.Button("Delete Current Discussion", variant="stop")
|
|
96
|
+
branch_selector = gr.Dropdown(label="Select Message to Branch From", interactive=True)
|
|
97
|
+
discussion_tree_display = gr.Markdown("No discussion loaded.")
|
|
98
|
+
with gr.Column(scale=2):
|
|
99
|
+
with gr.Accordion("⚙️ Settings & System Prompt", open=False):
|
|
100
|
+
system_prompt_input = gr.Textbox(label="System Prompt", lines=3, interactive=True)
|
|
101
|
+
with gr.Row():
|
|
102
|
+
binding_selector = gr.Radio(["ollama", "openai"], label="AI Binding")
|
|
103
|
+
save_settings_button = gr.Button("Save Settings & Re-initialize", variant="primary")
|
|
104
|
+
with gr.Group(visible=True) as ollama_settings_group:
|
|
105
|
+
ollama_host_input = gr.Textbox(label="Ollama Host Address"); ollama_model_selector = gr.Dropdown(label="Ollama Model", interactive=True); refresh_ollama_button = gr.Button("Refresh Ollama Models")
|
|
106
|
+
with gr.Group(visible=False) as openai_settings_group:
|
|
107
|
+
openai_api_key_input = gr.Textbox(label="OpenAI API Key", type="password"); openai_model_selector = gr.Dropdown(choices=["gpt-4o", "gpt-4o-mini", "gpt-4-turbo"], label="OpenAI Model", interactive=True)
|
|
108
|
+
chatbot = gr.Chatbot(label="Conversation", height=600, type="messages")
|
|
109
|
+
user_input = gr.Textbox(show_label=False, placeholder="Type your message here...", lines=3)
|
|
110
|
+
send_button = gr.Button("Send", variant="primary")
|
|
111
|
+
|
|
112
|
+
# --- Event Handler Functions ---
|
|
113
|
+
def on_load():
|
|
114
|
+
config = load_config(); client = create_lollms_client(config)
|
|
115
|
+
discussions_list = get_discussions_list(); discussion = load_discussion(discussions_list[0], client) if discussions_list else (LollmsDiscussion(client) if client else None)
|
|
116
|
+
active_discussion_file = discussions_list[0] if discussions_list else None
|
|
117
|
+
|
|
118
|
+
history = export_for_chatbot(discussion) if discussion else [{"role": "assistant", "content": "Welcome! Configure client in Settings and create a new chat."}]
|
|
119
|
+
tree = render_discussion_tree(discussion); branch_choices = get_message_choices(discussion)
|
|
120
|
+
sys_prompt = discussion.system_prompt if discussion else ""
|
|
121
|
+
active_branch_id = discussion.active_branch_id if discussion else None
|
|
122
|
+
is_ollama = config['binding_name'] == 'ollama'; ollama_models = list_ollama_models(config['host_address']) if is_ollama and client else []
|
|
123
|
+
|
|
124
|
+
return (client, discussion, gr.update(choices=discussions_list, value=active_discussion_file), config['binding_name'],
|
|
125
|
+
gr.update(visible=is_ollama), gr.update(visible=not is_ollama), config['host_address'],
|
|
126
|
+
gr.update(choices=ollama_models, value=config.get('model_name')), config['openai_api_key'],
|
|
127
|
+
config.get('openai_model_name'), sys_prompt, history, tree, gr.update(choices=branch_choices, value=active_branch_id))
|
|
128
|
+
|
|
129
|
+
def handle_save_settings(binding, host, ollama_model, openai_key, openai_model):
|
|
130
|
+
config = {"binding_name": binding, "host_address": host, "model_name": ollama_model, "openai_api_key": openai_key, "openai_model_name": openai_model}
|
|
131
|
+
save_config(config); gr.Info("Settings saved! Reloading application..."); return on_load()
|
|
132
|
+
|
|
133
|
+
def handle_new_discussion(client, name):
|
|
134
|
+
if not client: gr.Error("Client not initialized."); return (gr.skip(),) * 5
|
|
135
|
+
if not name.strip(): gr.Warning("Provide a name."); return (gr.skip(),) * 5
|
|
136
|
+
filename = f"{name.strip().replace(' ', '_')}.yaml"
|
|
137
|
+
if os.path.exists(os.path.join(DISCUSSIONS_DIR, filename)): gr.Warning(f"Discussion '{name}' already exists."); return (gr.skip(),) * 5
|
|
138
|
+
discussion = LollmsDiscussion(client); discussion.set_participants({"user": "user", "assistant": "assistant"})
|
|
139
|
+
discussion.add_message("assistant", f"This is the beginning of '{name}'."); discussion.save_to_disk(os.path.join(DISCUSSIONS_DIR, filename))
|
|
140
|
+
return discussion, gr.update(choices=get_discussions_list(), value=filename), export_for_chatbot(discussion), render_discussion_tree(discussion), gr.update(choices=get_message_choices(discussion), value=discussion.active_branch_id)
|
|
141
|
+
|
|
142
|
+
def handle_load_discussion(client, filename):
|
|
143
|
+
if not client: gr.Error("Client not initialized."); return (gr.skip(),) * 5
|
|
144
|
+
if not filename: return (gr.skip(),) * 5
|
|
145
|
+
discussion = load_discussion(filename, client)
|
|
146
|
+
if not discussion: return (gr.skip(),) * 5
|
|
147
|
+
return discussion, discussion.system_prompt or "", export_for_chatbot(discussion), render_discussion_tree(discussion), gr.update(choices=get_message_choices(discussion), value=discussion.active_branch_id)
|
|
148
|
+
|
|
149
|
+
def handle_delete_discussion(filename):
|
|
150
|
+
if not filename: gr.Warning("No discussion selected to delete."); return (gr.skip(),) * 14
|
|
151
|
+
try:
|
|
152
|
+
os.remove(os.path.join(DISCUSSIONS_DIR, filename)); ASCIIColors.red(f"Deleted discussion: {filename}"); gr.Info(f"Deleted {filename}.")
|
|
153
|
+
return on_load()
|
|
154
|
+
except Exception as e:
|
|
155
|
+
gr.Error(f"Failed to delete file: {e}"); return (gr.skip(),) * 14
|
|
156
|
+
|
|
157
|
+
def handle_chat_submit(client, discussion, user_text, history, filename):
|
|
158
|
+
if not client: gr.Error("Client not initialized."); return
|
|
159
|
+
if not discussion: gr.Error("No discussion loaded."); return
|
|
160
|
+
if not user_text.strip(): return
|
|
161
|
+
if not filename: gr.Error("No active discussion file. Cannot save."); return
|
|
162
|
+
|
|
163
|
+
parent_id = discussion.active_branch_id
|
|
164
|
+
discussion.add_message(sender="user", content=user_text, parent_id=parent_id)
|
|
165
|
+
history.append({"role": "user", "content": user_text}); history.append({"role": "assistant", "content": ""})
|
|
166
|
+
yield history
|
|
167
|
+
|
|
168
|
+
full_response = ""
|
|
169
|
+
try:
|
|
170
|
+
# The callback must return True to continue the stream.
|
|
171
|
+
for chunk in client.chat(discussion, stream=True, streaming_callback=lambda c,t: True):
|
|
172
|
+
full_response += chunk; history[-1]["content"] = full_response; yield history
|
|
173
|
+
discussion.add_message(sender="assistant", content=full_response); discussion.save_to_disk(os.path.join(DISCUSSIONS_DIR, filename))
|
|
174
|
+
except Exception as e:
|
|
175
|
+
full_response = f"An error occurred: {e}"; gr.Error(full_response); history[-1]["content"] = full_response
|
|
176
|
+
discussion.add_message(sender="assistant", content=f"ERROR: {full_response}")
|
|
177
|
+
|
|
178
|
+
def on_chat_finish(discussion):
|
|
179
|
+
# This function updates non-streaming components after the chat is done
|
|
180
|
+
if not discussion: return gr.skip(), gr.skip()
|
|
181
|
+
return render_discussion_tree(discussion), gr.update(choices=get_message_choices(discussion), value=discussion.active_branch_id)
|
|
182
|
+
|
|
183
|
+
def handle_branch_change(discussion, selected_id):
|
|
184
|
+
if not discussion or not selected_id: return gr.skip(), gr.skip()
|
|
185
|
+
discussion.set_active_branch(selected_id)
|
|
186
|
+
return discussion, export_for_chatbot(discussion)
|
|
187
|
+
|
|
188
|
+
# --- Wire up Components ---
|
|
189
|
+
outputs_on_load = [client_state, discussion_state, discussion_selector, binding_selector, ollama_settings_group, openai_settings_group, ollama_host_input, ollama_model_selector, openai_api_key_input, openai_model_selector, system_prompt_input, chatbot, discussion_tree_display, branch_selector]
|
|
190
|
+
demo.load(on_load, outputs=outputs_on_load)
|
|
191
|
+
save_settings_button.click(handle_save_settings, [binding_selector, ollama_host_input, ollama_model_selector, openai_api_key_input, openai_model_selector], outputs_on_load)
|
|
192
|
+
binding_selector.change(lambda x: (gr.update(visible=x=='ollama'), gr.update(visible=x=='openai')), binding_selector, [ollama_settings_group, openai_settings_group])
|
|
193
|
+
refresh_ollama_button.click(list_ollama_models, ollama_host_input, ollama_model_selector)
|
|
194
|
+
system_prompt_input.blur(lambda d,t,f: d.set_system_prompt(t) and d.save_to_disk(os.path.join(DISCUSSIONS_DIR,f)) if d and f else None, [discussion_state, system_prompt_input, discussion_selector], [])
|
|
195
|
+
|
|
196
|
+
new_discussion_name.submit(handle_new_discussion, [client_state, new_discussion_name], [discussion_state, discussion_selector, chatbot, discussion_tree_display, branch_selector]).then(lambda: "", outputs=[new_discussion_name])
|
|
197
|
+
discussion_selector.change(handle_load_discussion, [client_state, discussion_selector], [discussion_state, system_prompt_input, chatbot, discussion_tree_display, branch_selector])
|
|
198
|
+
delete_discussion_button.click(handle_delete_discussion, [discussion_selector], outputs_on_load)
|
|
199
|
+
|
|
200
|
+
# --- CORRECTED WIRING FOR CHAT ---
|
|
201
|
+
chat_stream_event = user_input.submit(
|
|
202
|
+
fn=handle_chat_submit,
|
|
203
|
+
inputs=[client_state, discussion_state, user_input, chatbot, discussion_selector],
|
|
204
|
+
outputs=[chatbot],
|
|
205
|
+
)
|
|
206
|
+
# After the stream from handle_chat_submit is done, its input (discussion_state) will be updated.
|
|
207
|
+
# We can then pass that state to on_chat_finish.
|
|
208
|
+
chat_stream_event.then(
|
|
209
|
+
fn=on_chat_finish,
|
|
210
|
+
inputs=[discussion_state], # The input is the state object that was modified by the previous function
|
|
211
|
+
outputs=[discussion_tree_display, branch_selector]
|
|
212
|
+
).then(lambda: "", outputs=[user_input])
|
|
213
|
+
|
|
214
|
+
send_button_stream_event = send_button.click(
|
|
215
|
+
fn=handle_chat_submit,
|
|
216
|
+
inputs=[client_state, discussion_state, user_input, chatbot, discussion_selector],
|
|
217
|
+
outputs=[chatbot]
|
|
218
|
+
)
|
|
219
|
+
send_button_stream_event.then(
|
|
220
|
+
fn=on_chat_finish,
|
|
221
|
+
inputs=[discussion_state],
|
|
222
|
+
outputs=[discussion_tree_display, branch_selector]
|
|
223
|
+
).then(lambda: "", outputs=[user_input])
|
|
224
|
+
|
|
225
|
+
branch_selector.change(handle_branch_change, [discussion_state, branch_selector], [discussion_state, chatbot])
|
|
226
|
+
|
|
227
|
+
if __name__ == "__main__":
|
|
228
|
+
demo.launch()
|
|
@@ -173,8 +173,7 @@ if __name__ == "__main__":
|
|
|
173
173
|
rag_top_k=2, # Get 2 search results per query
|
|
174
174
|
rag_min_similarity_percent=50.0,
|
|
175
175
|
streaming_callback=rag_streaming_callback,
|
|
176
|
-
n_predict=400
|
|
177
|
-
rag_hop_query_generation_temperature=0.1
|
|
176
|
+
n_predict=400
|
|
178
177
|
)
|
|
179
178
|
print("\n--- End of Multi-Hop Search RAG (1 hop max) ---")
|
|
180
179
|
ASCIIColors.magenta("\nMulti-Hop Search RAG (1 hop max) Final Output Structure:")
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
# File: run_lollms_client_with_mcp_example.py
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
import os
|
|
5
|
+
import shutil
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
import json
|
|
8
|
+
from lollms_client import LollmsClient
|
|
9
|
+
import subprocess
|
|
10
|
+
# --- Dynamically adjust Python path to find lollms_client ---
|
|
11
|
+
# This assumes the example script is in a directory, and 'lollms_client' is
|
|
12
|
+
# in a sibling directory or a known relative path. Adjust as needed.
|
|
13
|
+
# For example, if script is in 'lollms_client/examples/' and lollms_client code is in 'lollms_client/'
|
|
14
|
+
# then the parent of the script's parent is the project root.
|
|
15
|
+
|
|
16
|
+
# Get the directory of the current script
|
|
17
|
+
current_script_dir = Path(__file__).resolve().parent
|
|
18
|
+
|
|
19
|
+
# Option 1: If lollms_client is in the parent directory of this script's directory
|
|
20
|
+
# (e.g. script is in 'project_root/examples' and lollms_client is in 'project_root/lollms_client')
|
|
21
|
+
# project_root = current_script_dir.parent
|
|
22
|
+
# lollms_client_path = project_root / "lollms_client" # Assuming this is where lollms_client.py and bindings are
|
|
23
|
+
|
|
24
|
+
# Option 2: If lollms_client package is directly one level up
|
|
25
|
+
# (e.g. script is in 'lollms_client/examples' and lollms_client package is 'lollms_client')
|
|
26
|
+
project_root_for_lollms_client = current_script_dir.parent
|
|
27
|
+
if str(project_root_for_lollms_client) not in sys.path:
|
|
28
|
+
sys.path.insert(0, str(project_root_for_lollms_client))
|
|
29
|
+
print(f"Added to sys.path: {project_root_for_lollms_client}")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# --- Ensure pipmaster is available (core LoLLMs dependency) ---
|
|
33
|
+
try:
|
|
34
|
+
import pipmaster as pm
|
|
35
|
+
except ImportError:
|
|
36
|
+
print("ERROR: pipmaster is not installed or not in PYTHONPATH.")
|
|
37
|
+
sys.exit(1)
|
|
38
|
+
|
|
39
|
+
# --- Import LollmsClient and supporting components ---
|
|
40
|
+
try:
|
|
41
|
+
|
|
42
|
+
from lollms_client.lollms_llm_binding import LollmsLLMBinding # Base for LLM
|
|
43
|
+
from ascii_colors import ASCIIColors, trace_exception
|
|
44
|
+
from lollms_client.lollms_types import MSG_TYPE # Assuming MSG_TYPE is here
|
|
45
|
+
except ImportError as e:
|
|
46
|
+
print(f"ERROR: Could not import LollmsClient components: {e}")
|
|
47
|
+
print("Ensure 'lollms_client' package structure is correct and accessible via PYTHONPATH.")
|
|
48
|
+
print(f"Current sys.path: {sys.path}")
|
|
49
|
+
trace_exception(e)
|
|
50
|
+
sys.exit(1)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# --- Dummy Server Scripts using FastMCP (as per previous successful iteration) ---
|
|
54
|
+
TIME_SERVER_PY = """
|
|
55
|
+
import asyncio
|
|
56
|
+
from datetime import datetime
|
|
57
|
+
from mcp.server.fastmcp import FastMCP
|
|
58
|
+
|
|
59
|
+
mcp_server = FastMCP("TimeMCP", description="A server that provides the current time.", host="localhost",
|
|
60
|
+
port=9624,
|
|
61
|
+
log_level="DEBUG")
|
|
62
|
+
|
|
63
|
+
@mcp_server.tool(description="Returns the current server time and echoes received parameters.")
|
|
64
|
+
def get_current_time(user_id: str = "unknown_user") -> dict:
|
|
65
|
+
return {"time": datetime.now().isoformat(), "params_received": {"user_id": user_id}, "server_name": "TimeServer"}
|
|
66
|
+
|
|
67
|
+
if __name__ == "__main__":
|
|
68
|
+
mcp_server.run(transport="streamable-http")
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
CALCULATOR_SERVER_PY = """
|
|
72
|
+
import asyncio
|
|
73
|
+
from typing import List, Union
|
|
74
|
+
from mcp.server.fastmcp import FastMCP
|
|
75
|
+
|
|
76
|
+
mcp_server = FastMCP("CalculatorMCP", description="A server that performs addition.", host="localhost",
|
|
77
|
+
port=9625,
|
|
78
|
+
log_level="DEBUG")
|
|
79
|
+
|
|
80
|
+
@mcp_server.tool(description="Adds a list of numbers provided in the 'numbers' parameter.")
|
|
81
|
+
def add_numbers(numbers: List[Union[int, float]]) -> dict:
|
|
82
|
+
if not isinstance(numbers, list) or not all(isinstance(x, (int, float)) for x in numbers):
|
|
83
|
+
return {"error": "'numbers' must be a list of numbers."}
|
|
84
|
+
return {"sum": sum(numbers), "server_name": "CalculatorServer"}
|
|
85
|
+
|
|
86
|
+
if __name__ == "__main__":
|
|
87
|
+
mcp_server.run(transport="streamable-http")
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def main():
|
|
92
|
+
ASCIIColors.red("--- Example: Using LollmsClient with StandardMCPBinding ---")
|
|
93
|
+
|
|
94
|
+
# --- 1. Setup Temporary Directory for Dummy MCP Servers ---
|
|
95
|
+
example_base_dir = Path(__file__).parent / "temp_mcp_example_servers"
|
|
96
|
+
if example_base_dir.exists():
|
|
97
|
+
shutil.rmtree(example_base_dir)
|
|
98
|
+
example_base_dir.mkdir(exist_ok=True)
|
|
99
|
+
|
|
100
|
+
time_server_script_path = example_base_dir / "time_server.py"
|
|
101
|
+
with open(time_server_script_path, "w") as f: f.write(TIME_SERVER_PY)
|
|
102
|
+
|
|
103
|
+
calculator_server_script_path = example_base_dir / "calculator_server.py"
|
|
104
|
+
with open(calculator_server_script_path, "w") as f: f.write(CALCULATOR_SERVER_PY)
|
|
105
|
+
|
|
106
|
+
subprocess.Popen(
|
|
107
|
+
[sys.executable, str(time_server_script_path.resolve())],
|
|
108
|
+
stdin=subprocess.DEVNULL,
|
|
109
|
+
stdout=subprocess.DEVNULL,
|
|
110
|
+
stderr=subprocess.DEVNULL,
|
|
111
|
+
start_new_session=True
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
subprocess.Popen(
|
|
115
|
+
[sys.executable, str(calculator_server_script_path.resolve())],
|
|
116
|
+
stdin=subprocess.DEVNULL,
|
|
117
|
+
stdout=subprocess.DEVNULL,
|
|
118
|
+
stderr=subprocess.DEVNULL,
|
|
119
|
+
start_new_session=True
|
|
120
|
+
)
|
|
121
|
+
# MCP Binding Configuration (for RemoteMCPBinding with multiple servers)
|
|
122
|
+
mcp_config = {
|
|
123
|
+
"servers_infos":{
|
|
124
|
+
"time_machine":{
|
|
125
|
+
"server_url": "http://localhost:9624/mcp",
|
|
126
|
+
},
|
|
127
|
+
|
|
128
|
+
"calc_unit":{
|
|
129
|
+
"server_url": "http://localhost:9625/mcp",
|
|
130
|
+
},
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
ASCIIColors.magenta("\n1. Initializing LollmsClient...")
|
|
134
|
+
try:
|
|
135
|
+
client = LollmsClient(
|
|
136
|
+
binding_name="ollama", # Use the dummy LLM binding
|
|
137
|
+
model_name="mistral-nemo:latest",
|
|
138
|
+
mcp_binding_name="remote_mcp",
|
|
139
|
+
mcp_binding_config=mcp_config,
|
|
140
|
+
|
|
141
|
+
)
|
|
142
|
+
except Exception as e:
|
|
143
|
+
ASCIIColors.error(f"Failed to initialize LollmsClient: {e}")
|
|
144
|
+
trace_exception(e)
|
|
145
|
+
shutil.rmtree(example_base_dir)
|
|
146
|
+
sys.exit(1)
|
|
147
|
+
|
|
148
|
+
if not client.binding:
|
|
149
|
+
ASCIIColors.error("LollmsClient's LLM binding (dummy_llm) failed to load.")
|
|
150
|
+
shutil.rmtree(example_base_dir)
|
|
151
|
+
sys.exit(1)
|
|
152
|
+
if not client.mcp:
|
|
153
|
+
ASCIIColors.error("LollmsClient's MCP binding (standard_mcp) failed to load.")
|
|
154
|
+
client.close() # Close LLM binding if it loaded
|
|
155
|
+
shutil.rmtree(example_base_dir)
|
|
156
|
+
sys.exit(1)
|
|
157
|
+
|
|
158
|
+
ASCIIColors.green("LollmsClient initialized successfully with DummyLLM and StandardMCP bindings.")
|
|
159
|
+
|
|
160
|
+
# --- 3. Define a streaming callback for generate_with_mcp ---
|
|
161
|
+
def mcp_streaming_callback(chunk: str, msg_type: MSG_TYPE, metadata: dict = None, history: list = None) -> bool:
|
|
162
|
+
if metadata:
|
|
163
|
+
type_info = metadata.get('type', 'unknown_type')
|
|
164
|
+
if msg_type == MSG_TYPE.MSG_TYPE_STEP_START:
|
|
165
|
+
ASCIIColors.cyan(f"MCP Step Start ({type_info}): {chunk}")
|
|
166
|
+
elif msg_type == MSG_TYPE.MSG_TYPE_STEP_END:
|
|
167
|
+
ASCIIColors.cyan(f"MCP Step End ({type_info}): {chunk}")
|
|
168
|
+
elif msg_type == MSG_TYPE.MSG_TYPE_INFO:
|
|
169
|
+
ASCIIColors.yellow(f"MCP Info ({type_info}): {chunk}")
|
|
170
|
+
elif msg_type == MSG_TYPE.MSG_TYPE_CHUNK: # Part of final answer typically
|
|
171
|
+
ASCIIColors.green(chunk, end="") # type: ignore
|
|
172
|
+
else: # FULL, default, etc.
|
|
173
|
+
ASCIIColors.green(f"MCP Output ({str(msg_type)}, {type_info}): {chunk}")
|
|
174
|
+
else:
|
|
175
|
+
if msg_type == MSG_TYPE.MSG_TYPE_CHUNK:
|
|
176
|
+
ASCIIColors.green(chunk, end="") # type: ignore
|
|
177
|
+
else:
|
|
178
|
+
ASCIIColors.green(f"MCP Output ({str(msg_type)}): {chunk}")
|
|
179
|
+
sys.stdout.flush()
|
|
180
|
+
return True # Continue streaming
|
|
181
|
+
|
|
182
|
+
# --- 4. Use generate_with_mcp ---
|
|
183
|
+
ASCIIColors.magenta("\n2. Calling generate_with_mcp to get current time...")
|
|
184
|
+
time_prompt = "Hey assistant, what time is it right now?"
|
|
185
|
+
time_response = client.generate_with_mcp(
|
|
186
|
+
prompt=time_prompt,
|
|
187
|
+
streaming_callback=mcp_streaming_callback,
|
|
188
|
+
interactive_tool_execution=False # Set to True to test interactive mode
|
|
189
|
+
)
|
|
190
|
+
print() # Newline after streaming
|
|
191
|
+
ASCIIColors.blue(f"Final response for time prompt: {json.dumps(time_response, indent=2)}")
|
|
192
|
+
|
|
193
|
+
assert time_response.get("error") is None, f"Time prompt resulted in an error: {time_response.get('error')}"
|
|
194
|
+
assert time_response.get("final_answer"), "Time prompt did not produce a final answer."
|
|
195
|
+
assert len(time_response.get("tool_calls", [])) > 0, "Time prompt should have called a tool."
|
|
196
|
+
assert time_response["tool_calls"][0]["name"] == "time_machine::get_current_time", "Incorrect tool called for time."
|
|
197
|
+
assert "time" in time_response["tool_calls"][0].get("result", {}).get("output", {}), "Time tool result missing time."
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
ASCIIColors.magenta("\n3. Calling generate_with_mcp for calculation...")
|
|
201
|
+
calc_prompt = "Can you please calculate the sum of 50, 25, and 7.5 for me?"
|
|
202
|
+
calc_response = client.generate_with_mcp(
|
|
203
|
+
prompt=calc_prompt,
|
|
204
|
+
streaming_callback=mcp_streaming_callback
|
|
205
|
+
)
|
|
206
|
+
print() # Newline
|
|
207
|
+
ASCIIColors.blue(f"Final response for calc prompt: {json.dumps(calc_response, indent=2)}")
|
|
208
|
+
|
|
209
|
+
assert calc_response.get("error") is None, f"Calc prompt resulted in an error: {calc_response.get('error')}"
|
|
210
|
+
assert calc_response.get("final_answer"), "Calc prompt did not produce a final answer."
|
|
211
|
+
assert len(calc_response.get("tool_calls", [])) > 0, "Calc prompt should have called a tool."
|
|
212
|
+
assert calc_response["tool_calls"][0]["name"] == "calc_unit::add_numbers", "Incorrect tool called for calculation."
|
|
213
|
+
# The dummy LLM uses hardcoded params [1,2,3] for calc, so result will be 6.
|
|
214
|
+
# A real LLM would extract 50, 25, 7.5.
|
|
215
|
+
# For this dummy test, we check against the dummy's behavior.
|
|
216
|
+
assert calc_response["tool_calls"][0].get("result", {}).get("output", {}).get("sum") == 82.5, "Calculator tool result mismatch for dummy params."
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
# --- 5. Cleanup ---
|
|
220
|
+
ASCIIColors.info("Cleaning up temporary server scripts and dummy binding directory...")
|
|
221
|
+
shutil.rmtree(example_base_dir, ignore_errors=True)
|
|
222
|
+
|
|
223
|
+
ASCIIColors.red("\n--- LollmsClient with StandardMCPBinding Example Finished Successfully! ---")
|
|
224
|
+
|
|
225
|
+
if __name__ == "__main__":
|
|
226
|
+
main()
|
lollms_client/__init__.py
CHANGED
|
@@ -7,7 +7,7 @@ from lollms_client.lollms_utilities import PromptReshaper # Keep general utiliti
|
|
|
7
7
|
from lollms_client.lollms_mcp_binding import LollmsMCPBinding, LollmsMCPBindingManager
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
__version__ = "0.20.
|
|
10
|
+
__version__ = "0.20.6" # Updated version
|
|
11
11
|
|
|
12
12
|
# Optionally, you could define __all__ if you want to be explicit about exports
|
|
13
13
|
__all__ = [
|
|
@@ -19,4 +19,4 @@ __all__ = [
|
|
|
19
19
|
"PromptReshaper",
|
|
20
20
|
"LollmsMCPBinding", # Export LollmsMCPBinding ABC
|
|
21
21
|
"LollmsMCPBindingManager", # Export LollmsMCPBindingManager
|
|
22
|
-
]
|
|
22
|
+
]
|
|
@@ -635,6 +635,110 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
635
635
|
error_message = f"Llama.cpp generation error: {str(ex)}"; trace_exception(ex)
|
|
636
636
|
return {"status": False, "error": error_message}
|
|
637
637
|
|
|
638
|
+
def chat(self,
|
|
639
|
+
discussion: LollmsDiscussion,
|
|
640
|
+
branch_tip_id: Optional[str] = None,
|
|
641
|
+
n_predict: Optional[int] = None,
|
|
642
|
+
stream: Optional[bool] = None,
|
|
643
|
+
temperature: float = 0.7,
|
|
644
|
+
top_k: int = 40,
|
|
645
|
+
top_p: float = 0.9,
|
|
646
|
+
repeat_penalty: float = 1.1,
|
|
647
|
+
repeat_last_n: int = 64,
|
|
648
|
+
seed: Optional[int] = None,
|
|
649
|
+
n_threads: Optional[int] = None,
|
|
650
|
+
ctx_size: Optional[int] = None,
|
|
651
|
+
streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
|
|
652
|
+
**generation_kwargs
|
|
653
|
+
) -> Union[str, dict]:
|
|
654
|
+
"""
|
|
655
|
+
Conduct a chat session with the llama.cpp server using a LollmsDiscussion object.
|
|
656
|
+
|
|
657
|
+
Args:
|
|
658
|
+
discussion (LollmsDiscussion): The discussion object containing the conversation history.
|
|
659
|
+
branch_tip_id (Optional[str]): The ID of the message to use as the tip of the conversation branch. Defaults to the active branch.
|
|
660
|
+
n_predict (Optional[int]): Maximum number of tokens to generate.
|
|
661
|
+
stream (Optional[bool]): Whether to stream the output.
|
|
662
|
+
temperature (float): Sampling temperature.
|
|
663
|
+
top_k (int): Top-k sampling parameter.
|
|
664
|
+
top_p (float): Top-p sampling parameter.
|
|
665
|
+
repeat_penalty (float): Penalty for repeated tokens.
|
|
666
|
+
repeat_last_n (int): Number of previous tokens to consider for repeat penalty.
|
|
667
|
+
seed (Optional[int]): Random seed for generation.
|
|
668
|
+
streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
|
|
669
|
+
|
|
670
|
+
Returns:
|
|
671
|
+
Union[str, dict]: The generated text or an error dictionary.
|
|
672
|
+
"""
|
|
673
|
+
if not self.server_process or not self.server_process.is_healthy:
|
|
674
|
+
return {"status": "error", "message": "Llama.cpp server is not running or not healthy."}
|
|
675
|
+
|
|
676
|
+
# 1. Export the discussion to the OpenAI chat format, which llama.cpp server understands.
|
|
677
|
+
# This handles system prompts, user/assistant roles, and multi-modal content.
|
|
678
|
+
messages = discussion.export("openai_chat", branch_tip_id)
|
|
679
|
+
|
|
680
|
+
# 2. Build the generation payload for the server
|
|
681
|
+
payload = {
|
|
682
|
+
"messages": messages,
|
|
683
|
+
"max_tokens": n_predict,
|
|
684
|
+
"temperature": temperature,
|
|
685
|
+
"top_k": top_k,
|
|
686
|
+
"top_p": top_p,
|
|
687
|
+
"repeat_penalty": repeat_penalty,
|
|
688
|
+
"seed": seed,
|
|
689
|
+
"stream": stream,
|
|
690
|
+
**generation_kwargs # Pass any extra parameters
|
|
691
|
+
}
|
|
692
|
+
# Remove None values, as the API expects them to be absent
|
|
693
|
+
payload = {k: v for k, v in payload.items() if v is not None}
|
|
694
|
+
|
|
695
|
+
endpoint = "/v1/chat/completions"
|
|
696
|
+
request_url = self._get_request_url(endpoint)
|
|
697
|
+
full_response_text = ""
|
|
698
|
+
|
|
699
|
+
try:
|
|
700
|
+
# 3. Make the request to the server
|
|
701
|
+
response = self.server_process.session.post(request_url, json=payload, stream=stream, timeout=self.server_args.get("generation_timeout", 300))
|
|
702
|
+
response.raise_for_status()
|
|
703
|
+
|
|
704
|
+
if stream:
|
|
705
|
+
for line in response.iter_lines():
|
|
706
|
+
if not line: continue
|
|
707
|
+
line_str = line.decode('utf-8').strip()
|
|
708
|
+
if line_str.startswith('data: '): line_str = line_str[6:]
|
|
709
|
+
if line_str == '[DONE]': break
|
|
710
|
+
try:
|
|
711
|
+
chunk_data = json.loads(line_str)
|
|
712
|
+
chunk_content = chunk_data.get('choices', [{}])[0].get('delta', {}).get('content', '')
|
|
713
|
+
if chunk_content:
|
|
714
|
+
full_response_text += chunk_content
|
|
715
|
+
if streaming_callback and not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
|
|
716
|
+
ASCIIColors.info("Streaming callback requested stop.")
|
|
717
|
+
response.close()
|
|
718
|
+
break
|
|
719
|
+
except json.JSONDecodeError:
|
|
720
|
+
ASCIIColors.warning(f"Failed to decode JSON stream chunk: {line_str}")
|
|
721
|
+
continue
|
|
722
|
+
return full_response_text
|
|
723
|
+
else: # Not streaming
|
|
724
|
+
response_data = response.json()
|
|
725
|
+
return response_data.get('choices', [{}])[0].get('message', {}).get('content', '')
|
|
726
|
+
|
|
727
|
+
except requests.exceptions.RequestException as e:
|
|
728
|
+
error_message = f"Llama.cpp server request error: {e}"
|
|
729
|
+
if e.response is not None:
|
|
730
|
+
try:
|
|
731
|
+
error_details = e.response.json()
|
|
732
|
+
error_message += f" - Details: {error_details.get('error', e.response.text)}"
|
|
733
|
+
except json.JSONDecodeError:
|
|
734
|
+
error_message += f" - Response: {e.response.text[:200]}"
|
|
735
|
+
ASCIIColors.error(error_message)
|
|
736
|
+
return {"status": "error", "message": error_message}
|
|
737
|
+
except Exception as ex:
|
|
738
|
+
error_message = f"Llama.cpp generation error: {str(ex)}"
|
|
739
|
+
trace_exception(ex)
|
|
740
|
+
return {"status": "error", "message": error_message}
|
|
741
|
+
|
|
638
742
|
def tokenize(self, text: str) -> List[int]:
|
|
639
743
|
if not self.server_process or not self.server_process.is_healthy: raise ConnectionError("Server not running.")
|
|
640
744
|
try:
|