lollms-client 0.20.2__py3-none-any.whl → 0.20.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

@@ -5,6 +5,7 @@ from lollms_client.lollms_llm_binding import LollmsLLMBinding
5
5
  from lollms_client.lollms_types import MSG_TYPE
6
6
  from lollms_client.lollms_utilities import encode_image
7
7
  from lollms_client.lollms_types import ELF_COMPLETION_FORMAT
8
+ from lollms_client.lollms_discussion import LollmsDiscussion
8
9
  from typing import Optional, Callable, List, Union
9
10
  from ascii_colors import ASCIIColors, trace_exception
10
11
  from typing import List, Dict
@@ -207,6 +208,114 @@ class OpenAIBinding(LollmsLLMBinding):
207
208
 
208
209
  return output
209
210
 
211
+ def chat(self,
212
+ discussion: LollmsDiscussion,
213
+ branch_tip_id: Optional[str] = None,
214
+ n_predict: Optional[int] = None,
215
+ stream: Optional[bool] = None,
216
+ temperature: float = 0.7,
217
+ top_k: int = 40,
218
+ top_p: float = 0.9,
219
+ repeat_penalty: float = 1.1,
220
+ repeat_last_n: int = 64,
221
+ seed: Optional[int] = None,
222
+ n_threads: Optional[int] = None,
223
+ ctx_size: Optional[int] = None,
224
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None
225
+ ) -> Union[str, dict]:
226
+ """
227
+ Conduct a chat session with the OpenAI model using a LollmsDiscussion object.
228
+
229
+ Args:
230
+ discussion (LollmsDiscussion): The discussion object containing the conversation history.
231
+ branch_tip_id (Optional[str]): The ID of the message to use as the tip of the conversation branch. Defaults to the active branch.
232
+ n_predict (Optional[int]): Maximum number of tokens to generate.
233
+ stream (Optional[bool]): Whether to stream the output.
234
+ temperature (float): Sampling temperature.
235
+ top_k (int): Top-k sampling parameter (Note: not all OpenAI models use this).
236
+ top_p (float): Top-p sampling parameter.
237
+ repeat_penalty (float): Frequency penalty for repeated tokens.
238
+ seed (Optional[int]): Random seed for generation.
239
+ streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
240
+
241
+ Returns:
242
+ Union[str, dict]: The generated text or an error dictionary.
243
+ """
244
+ # 1. Export the discussion to the OpenAI chat format
245
+ # This handles system prompts, user/assistant roles, and multi-modal content automatically.
246
+ messages = discussion.export("openai_chat", branch_tip_id)
247
+
248
+ # Build the request parameters
249
+ params = {
250
+ "model": self.model_name,
251
+ "messages": messages,
252
+ "max_tokens": n_predict,
253
+ "n": 1,
254
+ "temperature": temperature,
255
+ "top_p": top_p,
256
+ "frequency_penalty": repeat_penalty,
257
+ "stream": stream
258
+ }
259
+ # Add seed if available, as it's supported by newer OpenAI models
260
+ if seed is not None:
261
+ params["seed"] = seed
262
+
263
+ # Remove None values, as the API expects them to be absent
264
+ params = {k: v for k, v in params.items() if v is not None}
265
+
266
+ output = ""
267
+ # 2. Call the API
268
+ try:
269
+ # Check if we should use the chat completions or legacy completions endpoint
270
+ if self.completion_format == ELF_COMPLETION_FORMAT.Chat:
271
+ completion = self.client.chat.completions.create(**params)
272
+
273
+ if stream:
274
+ for chunk in completion:
275
+ # The streaming response for chat has a different structure
276
+ delta = chunk.choices[0].delta
277
+ if delta.content:
278
+ word = delta.content
279
+ if streaming_callback is not None:
280
+ if not streaming_callback(word, MSG_TYPE.MSG_TYPE_CHUNK):
281
+ break
282
+ output += word
283
+ else:
284
+ output = completion.choices[0].message.content
285
+
286
+ else: # Fallback to legacy completion format (not recommended for chat)
287
+ # We need to format the messages list into a single string prompt
288
+ legacy_prompt = discussion.export("openai_completion", branch_tip_id)
289
+ legacy_params = {
290
+ "model": self.model_name,
291
+ "prompt": legacy_prompt,
292
+ "max_tokens": n_predict,
293
+ "n": 1,
294
+ "temperature": temperature,
295
+ "top_p": top_p,
296
+ "frequency_penalty": repeat_penalty,
297
+ "stream": stream
298
+ }
299
+ completion = self.client.completions.create(**legacy_params)
300
+
301
+ if stream:
302
+ for chunk in completion:
303
+ word = chunk.choices[0].text
304
+ if streaming_callback is not None:
305
+ if not streaming_callback(word, MSG_TYPE.MSG_TYPE_CHUNK):
306
+ break
307
+ output += word
308
+ else:
309
+ output = completion.choices[0].text
310
+
311
+ except Exception as e:
312
+ # Handle API errors gracefully
313
+ error_message = f"An error occurred with the OpenAI API: {e}"
314
+ if streaming_callback:
315
+ streaming_callback(error_message, MSG_TYPE.MSG_TYPE_EXCEPTION)
316
+ return {"status": "error", "message": error_message}
317
+
318
+ return output
210
319
  def tokenize(self, text: str) -> list:
211
320
  """
212
321
  Tokenize the input text into a list of characters.
@@ -12,6 +12,7 @@ from lollms_client.lollms_ttv_binding import LollmsTTVBinding, LollmsTTVBindingM
12
12
  from lollms_client.lollms_ttm_binding import LollmsTTMBinding, LollmsTTMBindingManager
13
13
  from lollms_client.lollms_mcp_binding import LollmsMCPBinding, LollmsMCPBindingManager
14
14
 
15
+ from lollms_client.lollms_discussion import LollmsDiscussion
15
16
  import json, re
16
17
  from enum import Enum
17
18
  import base64
@@ -386,6 +387,7 @@ class LollmsClient():
386
387
  split:Optional[bool]=False, # put to true if the prompt is a discussion
387
388
  user_keyword:Optional[str]="!@>user:",
388
389
  ai_keyword:Optional[str]="!@>assistant:",
390
+ **kwargs
389
391
  ) -> Union[str, dict]:
390
392
  """
391
393
  Generate text using the active LLM binding, using instance defaults if parameters are not provided.
@@ -434,6 +436,64 @@ class LollmsClient():
434
436
  raise RuntimeError("LLM binding not initialized.")
435
437
 
436
438
 
439
+ def chat(self,
440
+ discussion: LollmsDiscussion,
441
+ branch_tip_id: Optional[str] = None,
442
+ n_predict: Optional[int] = None,
443
+ stream: Optional[bool] = None,
444
+ temperature: Optional[float] = None,
445
+ top_k: Optional[int] = None,
446
+ top_p: Optional[float] = None,
447
+ repeat_penalty: Optional[float] = None,
448
+ repeat_last_n: Optional[int] = None,
449
+ seed: Optional[int] = None,
450
+ n_threads: Optional[int] = None,
451
+ ctx_size: Optional[int] = None,
452
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None
453
+ ) -> Union[str, dict]:
454
+ """
455
+ High-level method to perform a chat generation using a LollmsDiscussion object.
456
+
457
+ This is the recommended method for conversational interactions. It uses the
458
+ discussion object to correctly format the context for the model, including
459
+ system prompts, roles, and multi-modal content.
460
+
461
+ Args:
462
+ discussion (LollmsDiscussion): The discussion object to use for context.
463
+ branch_tip_id (Optional[str]): The ID of the message to use as the end of the conversation branch. If None, the active branch is used.
464
+ n_predict (Optional[int]): Maximum number of tokens to generate. Uses instance default if None.
465
+ stream (Optional[bool]): Whether to stream the output. Uses instance default if None.
466
+ temperature (Optional[float]): Sampling temperature. Uses instance default if None.
467
+ top_k (Optional[int]): Top-k sampling parameter. Uses instance default if None.
468
+ top_p (Optional[float]): Top-p sampling parameter. Uses instance default if None.
469
+ repeat_penalty (Optional[float]): Penalty for repeated tokens. Uses instance default if None.
470
+ repeat_last_n (Optional[int]): Number of previous tokens to consider for repeat penalty. Uses instance default if None.
471
+ seed (Optional[int]): Random seed for generation. Uses instance default if None.
472
+ n_threads (Optional[int]): Number of threads to use. Uses instance default if None.
473
+ ctx_size (Optional[int]): Context size override for this generation.
474
+ streaming_callback (Optional[Callable[[str, MSG_TYPE], None]]): Callback for streaming output.
475
+
476
+ Returns:
477
+ Union[str, dict]: Generated text or an error dictionary if failed.
478
+ """
479
+ if self.binding:
480
+ return self.binding.chat(
481
+ discussion=discussion,
482
+ branch_tip_id=branch_tip_id,
483
+ n_predict=n_predict if n_predict is not None else self.default_n_predict,
484
+ stream=stream if stream is not None else self.default_stream,
485
+ temperature=temperature if temperature is not None else self.default_temperature,
486
+ top_k=top_k if top_k is not None else self.default_top_k,
487
+ top_p=top_p if top_p is not None else self.default_top_p,
488
+ repeat_penalty=repeat_penalty if repeat_penalty is not None else self.default_repeat_penalty,
489
+ repeat_last_n=repeat_last_n if repeat_last_n is not None else self.default_repeat_last_n,
490
+ seed=seed if seed is not None else self.default_seed,
491
+ n_threads=n_threads if n_threads is not None else self.default_n_threads,
492
+ ctx_size = ctx_size if ctx_size is not None else self.default_ctx_size,
493
+ streaming_callback=streaming_callback if streaming_callback is not None else self.default_streaming_callback
494
+ )
495
+ raise RuntimeError("LLM binding not initialized.")
496
+
437
497
  def embed(self, text, **kwargs):
438
498
  """
439
499
  Generate embeddings for the input text using the active LLM binding.
@@ -666,7 +726,7 @@ Respond with a JSON object containing ONE of the following structures:
666
726
  """ # No {self.ai_full_header} here, generate_code will get raw JSON
667
727
 
668
728
  if streaming_callback:
669
- streaming_callback(f"LLM deciding next step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "decision_making"}, turn_history)
729
+ streaming_callback(f"LLM deciding next step (iteration {llm_iterations})...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "decision_making"}, turn_history)
670
730
 
671
731
  # Use generate_code to get structured JSON output from LLM
672
732
  # Note: generate_code itself uses generate_text. We are asking for JSON here.
@@ -679,7 +739,7 @@ Respond with a JSON object containing ONE of the following structures:
679
739
  # streaming_callback=None, # Decisions are usually not streamed chunk by chunk
680
740
  )
681
741
  if streaming_callback:
682
- streaming_callback(f"LLM decision received.", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "decision_making"}, turn_history)
742
+ streaming_callback(f"LLM decision received.", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "decision_making"}, turn_history)
683
743
 
684
744
 
685
745
  if not raw_llm_decision_json:
@@ -733,10 +793,11 @@ Respond with a JSON object containing ONE of the following structures:
733
793
  current_conversation.append({"role":"assistant", "content":"(I decided to use a tool, but I'm unsure which one. Could you clarify?)"})
734
794
  break # Or ask LLM to try again without this faulty decision in history
735
795
 
736
- tool_call_info = {"type": "tool_call_request", "name": tool_name, "params": tool_params}
796
+ tool_call_info = {"id": "tool_call_request", "name": tool_name, "params": tool_params}
737
797
  turn_history.append(tool_call_info)
738
798
  if streaming_callback:
739
799
  streaming_callback(f"LLM requests to call tool: {tool_name} with params: {tool_params}", MSG_TYPE.MSG_TYPE_INFO, tool_call_info, turn_history)
800
+ streaming_callback("", MSG_TYPE.MSG_TYPE_TOOL_CALL, tool_call_info, turn_history)
740
801
 
741
802
  # Interactive execution if enabled
742
803
  if interactive_tool_execution:
@@ -760,15 +821,17 @@ Respond with a JSON object containing ONE of the following structures:
760
821
 
761
822
 
762
823
  if streaming_callback:
763
- streaming_callback(f"Executing tool: {tool_name}...", MSG_TYPE.MSG_TYPE_STEP_START, {"type": "tool_execution", "tool_name": tool_name}, turn_history)
824
+ streaming_callback(f"Executing tool: {tool_name}...", MSG_TYPE.MSG_TYPE_STEP_START, {"id": "tool_execution", "tool_name": tool_name}, turn_history)
764
825
 
765
826
  tool_result = self.mcp.execute_tool(tool_name, tool_params, lollms_client_instance=self)
766
827
 
767
828
  tool_call_info["result"] = tool_result # Add result to this call's info
768
829
  tool_calls_made_this_turn.append(tool_call_info) # Log the completed call
830
+ if streaming_callback:
831
+ streaming_callback(f"", MSG_TYPE.MSG_TYPE_TOOL_OUTPUT, tool_result, turn_history)
769
832
 
770
833
  if streaming_callback:
771
- streaming_callback(f"Tool {tool_name} execution finished. Result: {json.dumps(tool_result)}", MSG_TYPE.MSG_TYPE_STEP_END, {"type": "tool_execution", "tool_name": tool_name, "result": tool_result}, turn_history)
834
+ streaming_callback(f"Tool {tool_name} execution finished. Result: {json.dumps(tool_result)}", MSG_TYPE.MSG_TYPE_STEP_END, {"id": "tool_execution", "tool_name": tool_name, "result": tool_result}, turn_history)
772
835
 
773
836
  # Add tool execution result to conversation for the LLM
774
837
  # The format of this message can influence how the LLM uses the tool output.
@@ -972,12 +1035,14 @@ Respond with a JSON object containing ONE of the following structures:
972
1035
  hop_details = {"query": current_query_for_rag, "retrieved_chunks_details": [], "status": ""}
973
1036
  previous_queries.append(current_query_for_rag)
974
1037
  new_unique = 0
1038
+ documents = []
975
1039
  for chunk in retrieved:
976
1040
  doc = chunk.get("file_path", "Unknown")
977
1041
  content = str(chunk.get("chunk_text", ""))
978
1042
  sim = float(chunk.get("similarity_percent", 0.0))
979
1043
  detail = {"document": doc, "similarity": sim, "content": content,
980
1044
  "retrieved_in_hop": hop_count + 1, "query_used": current_query_for_rag}
1045
+ documents.append(doc)
981
1046
  hop_details["retrieved_chunks_details"].append(detail)
982
1047
  key = f"{doc}::{content[:100]}"
983
1048
  if key not in all_unique_retrieved_chunks_map:
@@ -987,6 +1052,8 @@ Respond with a JSON object containing ONE of the following structures:
987
1052
  if hop_count > 0 and new_unique == 0:
988
1053
  hop_details["status"] = "No *new* unique chunks retrieved"
989
1054
  rag_hops_details_list.append(hop_details)
1055
+ if streaming_callback:
1056
+ streaming_callback(f"Retreived {len(retrieved)} data chunks from {set(documents)}", MSG_TYPE.MSG_TYPE_STEP, {"id": f"retreival {hop_count + 1}", "hop": hop_count + 1}, turn_rag_history_for_callback)
990
1057
 
991
1058
  if streaming_callback:
992
1059
  streaming_callback(f"RAG Hop {hop_count + 1} done", MSG_TYPE.MSG_TYPE_STEP_END, {"id": f"rag_hop_{hop_count + 1}", "hop": hop_count + 1}, turn_rag_history_for_callback)