lollms-client 0.20.3__py3-none-any.whl → 0.20.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

@@ -0,0 +1,228 @@
1
+ # final_working_chat_app.py
2
+
3
+ import sys
4
+ import os
5
+ import json
6
+ import gradio as gr
7
+ import requests
8
+ from typing import List, Dict, Optional, Tuple
9
+
10
+ # --- Dependency Installation ---
11
+ try:
12
+ import pipmaster as pm
13
+ print("Pipmaster found. Ensuring dependencies are installed...")
14
+ pm.ensure_packages(["gradio", "requests", "ascii_colors"])
15
+ except ImportError:
16
+ pass
17
+
18
+ # --- Import Core Components ---
19
+ try:
20
+ from lollms_client import LollmsClient
21
+ from lollms_client.lollms_discussion import LollmsDiscussion
22
+ from ascii_colors import ASCIIColors
23
+ except ImportError as e:
24
+ print(f"\nFATAL: A required library is missing.\nPlease ensure lollms-client and ascii_colors are installed.")
25
+ print(f"Error: {e}"); sys.exit(1)
26
+
27
+ # --- Standalone Helper Functions for LollmsDiscussion ---
28
+ def export_for_chatbot(discussion: Optional[LollmsDiscussion]) -> List[Dict[str, str]]:
29
+ if not discussion: return []
30
+ branch = discussion.get_branch(discussion.active_branch_id)
31
+ return [{"role": discussion.participants.get(msg.sender, "user"), "content": msg.content} for msg in branch]
32
+
33
+ def render_discussion_tree(discussion: Optional[LollmsDiscussion]) -> str:
34
+ if not discussion or not discussion.messages: return "No messages yet."
35
+ tree_markdown = "### Discussion Tree\n\n"; root_ids = [msg.id for msg in discussion.messages if msg.parent_id is None]
36
+ def _render_node(node_id: str, depth: int) -> str:
37
+ node = discussion.message_index.get(node_id)
38
+ if not node: return ""
39
+ is_active = " <span class='activ'>[ACTIVE]</span>" if node.id == discussion.active_branch_id else ""
40
+ line = f"{' ' * depth}- **{node.sender}**: _{node.content.replace(chr(10), ' ').strip()[:60]}..._{is_active}\n"
41
+ for child_id in discussion.children_index.get(node.id, []): line += _render_node(child_id, depth + 1)
42
+ return line
43
+ for root_id in root_ids: tree_markdown += _render_node(root_id, 0)
44
+ return tree_markdown
45
+
46
+ def get_message_choices(discussion: Optional[LollmsDiscussion]) -> List[tuple]:
47
+ if not discussion: return []
48
+ return [(f"{msg.sender}: {msg.content[:40]}...", msg.id) for msg in discussion.messages]
49
+
50
+ # --- Configuration & File Management ---
51
+ CONFIG_FILE = "config.json"; DISCUSSIONS_DIR = "discussions"; os.makedirs(DISCUSSIONS_DIR, exist_ok=True)
52
+ DEFAULT_CONFIG = {"binding_name": "ollama", "model_name": "mistral:latest", "host_address": "http://localhost:11434", "openai_api_key": "", "openai_model_name": "gpt-4o"}
53
+ def load_config() -> Dict:
54
+ if os.path.exists(CONFIG_FILE):
55
+ try:
56
+ with open(CONFIG_FILE, 'r') as f: ASCIIColors.info(f"Loaded config from {CONFIG_FILE}"); return json.load(f)
57
+ except: ASCIIColors.warning(f"Could not load {CONFIG_FILE}, using defaults."); return DEFAULT_CONFIG
58
+ return DEFAULT_CONFIG
59
+ def save_config(config: Dict):
60
+ with open(CONFIG_FILE, 'w') as f: json.dump(config, f, indent=2); ASCIIColors.green(f"Saved config to {CONFIG_FILE}")
61
+
62
+ # --- LollmsClient & Discussion Management ---
63
+ def create_lollms_client(config: Dict) -> Optional[LollmsClient]:
64
+ try:
65
+ if config["binding_name"] == "ollama": client = LollmsClient(binding_name="ollama", host_address=config["host_address"], model_name=config["model_name"])
66
+ elif config["binding_name"] == "openai":
67
+ if not config.get("openai_api_key"): gr.Warning("OpenAI API key missing."); return None
68
+ client = LollmsClient(binding_name="openai", model_name=config["openai_model_name"], service_key=config["openai_api_key"])
69
+ else: gr.Warning(f"Unsupported binding: {config['binding_name']}"); return None
70
+ ASCIIColors.green("LollmsClient created successfully."); return client
71
+ except Exception as e: gr.Error(f"Failed to create LollmsClient: {e}"); return None
72
+ def get_discussions_list() -> List[str]: return sorted([f for f in os.listdir(DISCUSSIONS_DIR) if f.endswith(".yaml")])
73
+ def load_discussion(filename: str, client: LollmsClient) -> Optional[LollmsDiscussion]:
74
+ if not client: ASCIIColors.warning("Cannot load discussion: client is not initialized."); return None
75
+ try:
76
+ discussion = LollmsDiscussion(client); discussion.load_from_disk(os.path.join(DISCUSSIONS_DIR, filename))
77
+ ASCIIColors.info(f"Loaded discussion: {filename}"); return discussion
78
+ except Exception as e: gr.Error(f"Failed to load discussion {filename}: {e}"); return None
79
+ def list_ollama_models(host: str) -> List[str]:
80
+ try:
81
+ r = requests.get(f"{host}/api/tags"); r.raise_for_status(); return [m["name"] for m in r.json().get("models", [])]
82
+ except: gr.Warning(f"Could not fetch models from {host}."); return []
83
+
84
+ # --- Gradio UI & Logic ---
85
+ with gr.Blocks(theme=gr.themes.Soft(), css=".activ { font-weight: bold; color: #FF4B4B; }") as demo:
86
+ client_state = gr.State()
87
+ discussion_state = gr.State()
88
+
89
+ gr.Markdown("# 🌿 Multi-Branch Discussion App")
90
+ with gr.Row():
91
+ with gr.Column(scale=1):
92
+ gr.Markdown("### 📝 Session & Branch Management")
93
+ discussion_selector = gr.Dropdown(label="Load Discussion", interactive=True)
94
+ new_discussion_name = gr.Textbox(label="New Discussion Name", placeholder="Enter name and press Enter...")
95
+ delete_discussion_button = gr.Button("Delete Current Discussion", variant="stop")
96
+ branch_selector = gr.Dropdown(label="Select Message to Branch From", interactive=True)
97
+ discussion_tree_display = gr.Markdown("No discussion loaded.")
98
+ with gr.Column(scale=2):
99
+ with gr.Accordion("⚙️ Settings & System Prompt", open=False):
100
+ system_prompt_input = gr.Textbox(label="System Prompt", lines=3, interactive=True)
101
+ with gr.Row():
102
+ binding_selector = gr.Radio(["ollama", "openai"], label="AI Binding")
103
+ save_settings_button = gr.Button("Save Settings & Re-initialize", variant="primary")
104
+ with gr.Group(visible=True) as ollama_settings_group:
105
+ ollama_host_input = gr.Textbox(label="Ollama Host Address"); ollama_model_selector = gr.Dropdown(label="Ollama Model", interactive=True); refresh_ollama_button = gr.Button("Refresh Ollama Models")
106
+ with gr.Group(visible=False) as openai_settings_group:
107
+ openai_api_key_input = gr.Textbox(label="OpenAI API Key", type="password"); openai_model_selector = gr.Dropdown(choices=["gpt-4o", "gpt-4o-mini", "gpt-4-turbo"], label="OpenAI Model", interactive=True)
108
+ chatbot = gr.Chatbot(label="Conversation", height=600, type="messages")
109
+ user_input = gr.Textbox(show_label=False, placeholder="Type your message here...", lines=3)
110
+ send_button = gr.Button("Send", variant="primary")
111
+
112
+ # --- Event Handler Functions ---
113
+ def on_load():
114
+ config = load_config(); client = create_lollms_client(config)
115
+ discussions_list = get_discussions_list(); discussion = load_discussion(discussions_list[0], client) if discussions_list else (LollmsDiscussion(client) if client else None)
116
+ active_discussion_file = discussions_list[0] if discussions_list else None
117
+
118
+ history = export_for_chatbot(discussion) if discussion else [{"role": "assistant", "content": "Welcome! Configure client in Settings and create a new chat."}]
119
+ tree = render_discussion_tree(discussion); branch_choices = get_message_choices(discussion)
120
+ sys_prompt = discussion.system_prompt if discussion else ""
121
+ active_branch_id = discussion.active_branch_id if discussion else None
122
+ is_ollama = config['binding_name'] == 'ollama'; ollama_models = list_ollama_models(config['host_address']) if is_ollama and client else []
123
+
124
+ return (client, discussion, gr.update(choices=discussions_list, value=active_discussion_file), config['binding_name'],
125
+ gr.update(visible=is_ollama), gr.update(visible=not is_ollama), config['host_address'],
126
+ gr.update(choices=ollama_models, value=config.get('model_name')), config['openai_api_key'],
127
+ config.get('openai_model_name'), sys_prompt, history, tree, gr.update(choices=branch_choices, value=active_branch_id))
128
+
129
+ def handle_save_settings(binding, host, ollama_model, openai_key, openai_model):
130
+ config = {"binding_name": binding, "host_address": host, "model_name": ollama_model, "openai_api_key": openai_key, "openai_model_name": openai_model}
131
+ save_config(config); gr.Info("Settings saved! Reloading application..."); return on_load()
132
+
133
+ def handle_new_discussion(client, name):
134
+ if not client: gr.Error("Client not initialized."); return (gr.skip(),) * 5
135
+ if not name.strip(): gr.Warning("Provide a name."); return (gr.skip(),) * 5
136
+ filename = f"{name.strip().replace(' ', '_')}.yaml"
137
+ if os.path.exists(os.path.join(DISCUSSIONS_DIR, filename)): gr.Warning(f"Discussion '{name}' already exists."); return (gr.skip(),) * 5
138
+ discussion = LollmsDiscussion(client); discussion.set_participants({"user": "user", "assistant": "assistant"})
139
+ discussion.add_message("assistant", f"This is the beginning of '{name}'."); discussion.save_to_disk(os.path.join(DISCUSSIONS_DIR, filename))
140
+ return discussion, gr.update(choices=get_discussions_list(), value=filename), export_for_chatbot(discussion), render_discussion_tree(discussion), gr.update(choices=get_message_choices(discussion), value=discussion.active_branch_id)
141
+
142
+ def handle_load_discussion(client, filename):
143
+ if not client: gr.Error("Client not initialized."); return (gr.skip(),) * 5
144
+ if not filename: return (gr.skip(),) * 5
145
+ discussion = load_discussion(filename, client)
146
+ if not discussion: return (gr.skip(),) * 5
147
+ return discussion, discussion.system_prompt or "", export_for_chatbot(discussion), render_discussion_tree(discussion), gr.update(choices=get_message_choices(discussion), value=discussion.active_branch_id)
148
+
149
+ def handle_delete_discussion(filename):
150
+ if not filename: gr.Warning("No discussion selected to delete."); return (gr.skip(),) * 14
151
+ try:
152
+ os.remove(os.path.join(DISCUSSIONS_DIR, filename)); ASCIIColors.red(f"Deleted discussion: {filename}"); gr.Info(f"Deleted {filename}.")
153
+ return on_load()
154
+ except Exception as e:
155
+ gr.Error(f"Failed to delete file: {e}"); return (gr.skip(),) * 14
156
+
157
+ def handle_chat_submit(client, discussion, user_text, history, filename):
158
+ if not client: gr.Error("Client not initialized."); return
159
+ if not discussion: gr.Error("No discussion loaded."); return
160
+ if not user_text.strip(): return
161
+ if not filename: gr.Error("No active discussion file. Cannot save."); return
162
+
163
+ parent_id = discussion.active_branch_id
164
+ discussion.add_message(sender="user", content=user_text, parent_id=parent_id)
165
+ history.append({"role": "user", "content": user_text}); history.append({"role": "assistant", "content": ""})
166
+ yield history
167
+
168
+ full_response = ""
169
+ try:
170
+ # The callback must return True to continue the stream.
171
+ for chunk in client.chat(discussion, stream=True, streaming_callback=lambda c,t: True):
172
+ full_response += chunk; history[-1]["content"] = full_response; yield history
173
+ discussion.add_message(sender="assistant", content=full_response); discussion.save_to_disk(os.path.join(DISCUSSIONS_DIR, filename))
174
+ except Exception as e:
175
+ full_response = f"An error occurred: {e}"; gr.Error(full_response); history[-1]["content"] = full_response
176
+ discussion.add_message(sender="assistant", content=f"ERROR: {full_response}")
177
+
178
+ def on_chat_finish(discussion):
179
+ # This function updates non-streaming components after the chat is done
180
+ if not discussion: return gr.skip(), gr.skip()
181
+ return render_discussion_tree(discussion), gr.update(choices=get_message_choices(discussion), value=discussion.active_branch_id)
182
+
183
+ def handle_branch_change(discussion, selected_id):
184
+ if not discussion or not selected_id: return gr.skip(), gr.skip()
185
+ discussion.set_active_branch(selected_id)
186
+ return discussion, export_for_chatbot(discussion)
187
+
188
+ # --- Wire up Components ---
189
+ outputs_on_load = [client_state, discussion_state, discussion_selector, binding_selector, ollama_settings_group, openai_settings_group, ollama_host_input, ollama_model_selector, openai_api_key_input, openai_model_selector, system_prompt_input, chatbot, discussion_tree_display, branch_selector]
190
+ demo.load(on_load, outputs=outputs_on_load)
191
+ save_settings_button.click(handle_save_settings, [binding_selector, ollama_host_input, ollama_model_selector, openai_api_key_input, openai_model_selector], outputs_on_load)
192
+ binding_selector.change(lambda x: (gr.update(visible=x=='ollama'), gr.update(visible=x=='openai')), binding_selector, [ollama_settings_group, openai_settings_group])
193
+ refresh_ollama_button.click(list_ollama_models, ollama_host_input, ollama_model_selector)
194
+ system_prompt_input.blur(lambda d,t,f: d.set_system_prompt(t) and d.save_to_disk(os.path.join(DISCUSSIONS_DIR,f)) if d and f else None, [discussion_state, system_prompt_input, discussion_selector], [])
195
+
196
+ new_discussion_name.submit(handle_new_discussion, [client_state, new_discussion_name], [discussion_state, discussion_selector, chatbot, discussion_tree_display, branch_selector]).then(lambda: "", outputs=[new_discussion_name])
197
+ discussion_selector.change(handle_load_discussion, [client_state, discussion_selector], [discussion_state, system_prompt_input, chatbot, discussion_tree_display, branch_selector])
198
+ delete_discussion_button.click(handle_delete_discussion, [discussion_selector], outputs_on_load)
199
+
200
+ # --- CORRECTED WIRING FOR CHAT ---
201
+ chat_stream_event = user_input.submit(
202
+ fn=handle_chat_submit,
203
+ inputs=[client_state, discussion_state, user_input, chatbot, discussion_selector],
204
+ outputs=[chatbot],
205
+ )
206
+ # After the stream from handle_chat_submit is done, its input (discussion_state) will be updated.
207
+ # We can then pass that state to on_chat_finish.
208
+ chat_stream_event.then(
209
+ fn=on_chat_finish,
210
+ inputs=[discussion_state], # The input is the state object that was modified by the previous function
211
+ outputs=[discussion_tree_display, branch_selector]
212
+ ).then(lambda: "", outputs=[user_input])
213
+
214
+ send_button_stream_event = send_button.click(
215
+ fn=handle_chat_submit,
216
+ inputs=[client_state, discussion_state, user_input, chatbot, discussion_selector],
217
+ outputs=[chatbot]
218
+ )
219
+ send_button_stream_event.then(
220
+ fn=on_chat_finish,
221
+ inputs=[discussion_state],
222
+ outputs=[discussion_tree_display, branch_selector]
223
+ ).then(lambda: "", outputs=[user_input])
224
+
225
+ branch_selector.change(handle_branch_change, [discussion_state, branch_selector], [discussion_state, chatbot])
226
+
227
+ if __name__ == "__main__":
228
+ demo.launch()
@@ -173,8 +173,7 @@ if __name__ == "__main__":
173
173
  rag_top_k=2, # Get 2 search results per query
174
174
  rag_min_similarity_percent=50.0,
175
175
  streaming_callback=rag_streaming_callback,
176
- n_predict=400,
177
- rag_hop_query_generation_temperature=0.1
176
+ n_predict=400
178
177
  )
179
178
  print("\n--- End of Multi-Hop Search RAG (1 hop max) ---")
180
179
  ASCIIColors.magenta("\nMulti-Hop Search RAG (1 hop max) Final Output Structure:")
lollms_client/__init__.py CHANGED
@@ -7,7 +7,7 @@ from lollms_client.lollms_utilities import PromptReshaper # Keep general utiliti
7
7
  from lollms_client.lollms_mcp_binding import LollmsMCPBinding, LollmsMCPBindingManager
8
8
 
9
9
 
10
- __version__ = "0.20.3" # Updated version
10
+ __version__ = "0.20.4" # Updated version
11
11
 
12
12
  # Optionally, you could define __all__ if you want to be explicit about exports
13
13
  __all__ = [
@@ -635,6 +635,110 @@ class LlamaCppServerBinding(LollmsLLMBinding):
635
635
  error_message = f"Llama.cpp generation error: {str(ex)}"; trace_exception(ex)
636
636
  return {"status": False, "error": error_message}
637
637
 
638
+ def chat(self,
639
+ discussion: LollmsDiscussion,
640
+ branch_tip_id: Optional[str] = None,
641
+ n_predict: Optional[int] = None,
642
+ stream: Optional[bool] = None,
643
+ temperature: float = 0.7,
644
+ top_k: int = 40,
645
+ top_p: float = 0.9,
646
+ repeat_penalty: float = 1.1,
647
+ repeat_last_n: int = 64,
648
+ seed: Optional[int] = None,
649
+ n_threads: Optional[int] = None,
650
+ ctx_size: Optional[int] = None,
651
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
652
+ **generation_kwargs
653
+ ) -> Union[str, dict]:
654
+ """
655
+ Conduct a chat session with the llama.cpp server using a LollmsDiscussion object.
656
+
657
+ Args:
658
+ discussion (LollmsDiscussion): The discussion object containing the conversation history.
659
+ branch_tip_id (Optional[str]): The ID of the message to use as the tip of the conversation branch. Defaults to the active branch.
660
+ n_predict (Optional[int]): Maximum number of tokens to generate.
661
+ stream (Optional[bool]): Whether to stream the output.
662
+ temperature (float): Sampling temperature.
663
+ top_k (int): Top-k sampling parameter.
664
+ top_p (float): Top-p sampling parameter.
665
+ repeat_penalty (float): Penalty for repeated tokens.
666
+ repeat_last_n (int): Number of previous tokens to consider for repeat penalty.
667
+ seed (Optional[int]): Random seed for generation.
668
+ streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
669
+
670
+ Returns:
671
+ Union[str, dict]: The generated text or an error dictionary.
672
+ """
673
+ if not self.server_process or not self.server_process.is_healthy:
674
+ return {"status": "error", "message": "Llama.cpp server is not running or not healthy."}
675
+
676
+ # 1. Export the discussion to the OpenAI chat format, which llama.cpp server understands.
677
+ # This handles system prompts, user/assistant roles, and multi-modal content.
678
+ messages = discussion.export("openai_chat", branch_tip_id)
679
+
680
+ # 2. Build the generation payload for the server
681
+ payload = {
682
+ "messages": messages,
683
+ "max_tokens": n_predict,
684
+ "temperature": temperature,
685
+ "top_k": top_k,
686
+ "top_p": top_p,
687
+ "repeat_penalty": repeat_penalty,
688
+ "seed": seed,
689
+ "stream": stream,
690
+ **generation_kwargs # Pass any extra parameters
691
+ }
692
+ # Remove None values, as the API expects them to be absent
693
+ payload = {k: v for k, v in payload.items() if v is not None}
694
+
695
+ endpoint = "/v1/chat/completions"
696
+ request_url = self._get_request_url(endpoint)
697
+ full_response_text = ""
698
+
699
+ try:
700
+ # 3. Make the request to the server
701
+ response = self.server_process.session.post(request_url, json=payload, stream=stream, timeout=self.server_args.get("generation_timeout", 300))
702
+ response.raise_for_status()
703
+
704
+ if stream:
705
+ for line in response.iter_lines():
706
+ if not line: continue
707
+ line_str = line.decode('utf-8').strip()
708
+ if line_str.startswith('data: '): line_str = line_str[6:]
709
+ if line_str == '[DONE]': break
710
+ try:
711
+ chunk_data = json.loads(line_str)
712
+ chunk_content = chunk_data.get('choices', [{}])[0].get('delta', {}).get('content', '')
713
+ if chunk_content:
714
+ full_response_text += chunk_content
715
+ if streaming_callback and not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
716
+ ASCIIColors.info("Streaming callback requested stop.")
717
+ response.close()
718
+ break
719
+ except json.JSONDecodeError:
720
+ ASCIIColors.warning(f"Failed to decode JSON stream chunk: {line_str}")
721
+ continue
722
+ return full_response_text
723
+ else: # Not streaming
724
+ response_data = response.json()
725
+ return response_data.get('choices', [{}])[0].get('message', {}).get('content', '')
726
+
727
+ except requests.exceptions.RequestException as e:
728
+ error_message = f"Llama.cpp server request error: {e}"
729
+ if e.response is not None:
730
+ try:
731
+ error_details = e.response.json()
732
+ error_message += f" - Details: {error_details.get('error', e.response.text)}"
733
+ except json.JSONDecodeError:
734
+ error_message += f" - Response: {e.response.text[:200]}"
735
+ ASCIIColors.error(error_message)
736
+ return {"status": "error", "message": error_message}
737
+ except Exception as ex:
738
+ error_message = f"Llama.cpp generation error: {str(ex)}"
739
+ trace_exception(ex)
740
+ return {"status": "error", "message": error_message}
741
+
638
742
  def tokenize(self, text: str) -> List[int]:
639
743
  if not self.server_process or not self.server_process.is_healthy: raise ConnectionError("Server not running.")
640
744
  try:
@@ -168,7 +168,108 @@ class LollmsLLMBinding(LollmsLLMBinding):
168
168
  return {"status": False, "error": str(ex)}
169
169
  else:
170
170
  return {"status": False, "error": response.text}
171
-
171
+ def chat(self,
172
+ discussion: LollmsDiscussion,
173
+ branch_tip_id: Optional[str] = None,
174
+ n_predict: Optional[int] = None,
175
+ stream: Optional[bool] = None,
176
+ temperature: Optional[float] = None,
177
+ top_k: Optional[int] = None,
178
+ top_p: Optional[float] = None,
179
+ repeat_penalty: Optional[float] = None,
180
+ repeat_last_n: Optional[int] = None,
181
+ seed: Optional[int] = None,
182
+ n_threads: Optional[int] = None,
183
+ ctx_size: int | None = None,
184
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None
185
+ ) -> Union[str, dict]:
186
+ """
187
+ Conduct a chat session with a lollms-webui server using a LollmsDiscussion object.
188
+
189
+ Args:
190
+ discussion (LollmsDiscussion): The discussion object containing the conversation history.
191
+ branch_tip_id (Optional[str]): The ID of the message to use as the tip of the conversation branch. Defaults to the active branch.
192
+ ... (other parameters) ...
193
+
194
+ Returns:
195
+ Union[str, dict]: The generated text or an error dictionary.
196
+ """
197
+ # 1. Export the discussion to the lollms-native text format
198
+ prompt_text = discussion.export("lollms_text", branch_tip_id)
199
+
200
+ # 2. Extract images from the LAST message of the branch
201
+ # lollms-webui's endpoint associates images with the final prompt
202
+ active_branch_id = branch_tip_id or discussion.active_branch_id
203
+ branch = discussion.get_branch(active_branch_id)
204
+ last_message = branch[-1] if branch else None
205
+
206
+ image_data = []
207
+ if last_message and last_message.images:
208
+ # The endpoint expects a list of base64 strings.
209
+ # We will only process images of type 'base64'. URL types are not supported by this endpoint.
210
+ for img in last_message.images:
211
+ if img['type'] == 'base64':
212
+ image_data.append(img['data'])
213
+ # Note: 'url' type images are ignored for this binding.
214
+
215
+ # 3. Determine endpoint and build payload
216
+ endpoint = "/lollms_generate_with_images" if image_data else "/lollms_generate"
217
+ url = f"{self.host_address}{endpoint}"
218
+
219
+ headers = {'Content-Type': 'application/json'}
220
+ if self.service_key:
221
+ headers['Authorization'] = f'Bearer {self.service_key}'
222
+
223
+ data = {
224
+ "prompt": prompt_text,
225
+ "model_name": self.model_name,
226
+ "personality": self.personality,
227
+ "n_predict": n_predict,
228
+ "stream": stream,
229
+ "temperature": temperature,
230
+ "top_k": top_k,
231
+ "top_p": top_p,
232
+ "repeat_penalty": repeat_penalty,
233
+ "repeat_last_n": repeat_last_n,
234
+ "seed": seed,
235
+ "n_threads": n_threads
236
+ }
237
+ if image_data:
238
+ data["images"] = image_data
239
+
240
+ # 4. Make the request (logic copied and adapted from generate_text)
241
+ try:
242
+ response = requests.post(
243
+ url,
244
+ json=data,
245
+ headers=headers,
246
+ stream=stream,
247
+ verify=self.verify_ssl_certificate
248
+ )
249
+ response.raise_for_status() # Raise an exception for bad status codes
250
+
251
+ if not stream:
252
+ return response.text.strip()
253
+ else:
254
+ full_response_text = ""
255
+ for line in response.iter_lines():
256
+ if line:
257
+ chunk = line.decode("utf-8")
258
+ full_response_text += chunk
259
+ if streaming_callback:
260
+ if not streaming_callback(chunk, MSG_TYPE.MSG_TYPE_CHUNK):
261
+ break
262
+ # Clean up potential quotes from some streaming formats
263
+ if full_response_text.startswith('"') and full_response_text.endswith('"'):
264
+ full_response_text = full_response_text[1:-1]
265
+ return full_response_text.rstrip('!')
266
+
267
+ except requests.exceptions.RequestException as e:
268
+ error_message = f"lollms-webui request error: {e}"
269
+ return {"status": "error", "message": error_message}
270
+ except Exception as ex:
271
+ error_message = f"lollms-webui generation error: {str(ex)}"
272
+ return {"status": "error", "message": error_message}
172
273
  def tokenize(self, text: str) -> list:
173
274
  """
174
275
  Tokenize the input text into a list of tokens using the /lollms_tokenize endpoint.
@@ -6,6 +6,7 @@ from lollms_client.lollms_types import MSG_TYPE
6
6
  # encode_image is not strictly needed if ollama-python handles paths, but kept for consistency if ever needed.
7
7
  # from lollms_client.lollms_utilities import encode_image
8
8
  from lollms_client.lollms_types import ELF_COMPLETION_FORMAT
9
+ from lollms_client.lollms_discussion import LollmsDiscussion
9
10
  from typing import Optional, Callable, List, Union, Dict
10
11
 
11
12
  from ascii_colors import ASCIIColors, trace_exception
@@ -258,6 +259,104 @@ class OllamaBinding(LollmsLLMBinding):
258
259
  trace_exception(ex)
259
260
  return {"status": False, "error": error_message}
260
261
 
262
+ def chat(self,
263
+ discussion: LollmsDiscussion,
264
+ branch_tip_id: Optional[str] = None,
265
+ n_predict: Optional[int] = None,
266
+ stream: Optional[bool] = None,
267
+ temperature: float = 0.7,
268
+ top_k: int = 40,
269
+ top_p: float = 0.9,
270
+ repeat_penalty: float = 1.1,
271
+ repeat_last_n: int = 64,
272
+ seed: Optional[int] = None,
273
+ n_threads: Optional[int] = None,
274
+ ctx_size: Optional[int] = None,
275
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None
276
+ ) -> Union[str, dict]:
277
+ """
278
+ Conduct a chat session with the Ollama model using a LollmsDiscussion object.
279
+
280
+ Args:
281
+ discussion (LollmsDiscussion): The discussion object containing the conversation history.
282
+ branch_tip_id (Optional[str]): The ID of the message to use as the tip of the conversation branch. Defaults to the active branch.
283
+ n_predict (Optional[int]): Maximum number of tokens to generate.
284
+ stream (Optional[bool]): Whether to stream the output.
285
+ temperature (float): Sampling temperature.
286
+ top_k (int): Top-k sampling parameter.
287
+ top_p (float): Top-p sampling parameter.
288
+ repeat_penalty (float): Penalty for repeated tokens.
289
+ repeat_last_n (int): Number of previous tokens to consider for repeat penalty.
290
+ seed (Optional[int]): Random seed for generation.
291
+ n_threads (Optional[int]): Number of threads to use.
292
+ ctx_size (Optional[int]): Context size override for this generation.
293
+ streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
294
+
295
+ Returns:
296
+ Union[str, dict]: The generated text or an error dictionary.
297
+ """
298
+ if not self.ollama_client:
299
+ return {"status": "error", "message": "Ollama client not initialized."}
300
+
301
+ # 1. Export the discussion to the Ollama chat format
302
+ # This handles system prompts, user/assistant roles, and base64-encoded images.
303
+ messages = discussion.export("ollama_chat", branch_tip_id)
304
+
305
+ # 2. Build the generation options dictionary
306
+ options = {
307
+ 'num_predict': n_predict,
308
+ 'temperature': float(temperature),
309
+ 'top_k': top_k,
310
+ 'top_p': top_p,
311
+ 'repeat_penalty': repeat_penalty,
312
+ 'repeat_last_n': repeat_last_n,
313
+ 'seed': seed,
314
+ 'num_thread': n_threads,
315
+ 'num_ctx': ctx_size,
316
+ }
317
+ # Remove None values, as ollama-python expects them to be absent
318
+ options = {k: v for k, v in options.items() if v is not None}
319
+
320
+ full_response_text = ""
321
+
322
+ try:
323
+ # 3. Call the Ollama API
324
+ if stream:
325
+ response_stream = self.ollama_client.chat(
326
+ model=self.model_name,
327
+ messages=messages,
328
+ stream=True,
329
+ options=options if options else None
330
+ )
331
+ for chunk in response_stream:
332
+ chunk_content = chunk.get('message', {}).get('content', '')
333
+ if chunk_content:
334
+ full_response_text += chunk_content
335
+ if streaming_callback:
336
+ if not streaming_callback(chunk_content, MSG_TYPE.MSG_TYPE_CHUNK):
337
+ break
338
+ return full_response_text
339
+ else: # Not streaming
340
+ response_dict = self.ollama_client.chat(
341
+ model=self.model_name,
342
+ messages=messages,
343
+ stream=False,
344
+ options=options if options else None
345
+ )
346
+ return response_dict.get('message', {}).get('content', '')
347
+
348
+ except ollama.ResponseError as e:
349
+ error_message = f"Ollama API ResponseError: {e.error or 'Unknown error'} (status code: {e.status_code})"
350
+ ASCIIColors.error(error_message)
351
+ return {"status": "error", "message": error_message}
352
+ except ollama.RequestError as e:
353
+ error_message = f"Ollama API RequestError: {str(e)}"
354
+ ASCIIColors.error(error_message)
355
+ return {"status": "error", "message": error_message}
356
+ except Exception as ex:
357
+ error_message = f"An unexpected error occurred: {str(ex)}"
358
+ trace_exception(ex)
359
+ return {"status": "error", "message": error_message}
261
360
  def tokenize(self, text: str) -> list:
262
361
  """
263
362
  Tokenize the input text into a list of characters.