lollms-client 0.25.1__py3-none-any.whl → 0.25.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

lollms_client/__init__.py CHANGED
@@ -8,7 +8,7 @@ from lollms_client.lollms_utilities import PromptReshaper # Keep general utiliti
8
8
  from lollms_client.lollms_mcp_binding import LollmsMCPBinding, LollmsMCPBindingManager
9
9
 
10
10
 
11
- __version__ = "0.25.1" # Updated version
11
+ __version__ = "0.25.6" # Updated version
12
12
 
13
13
  # Optionally, you could define __all__ if you want to be explicit about exports
14
14
  __all__ = [
@@ -0,0 +1,501 @@
1
+ # bindings/gemini/binding.py
2
+ import base64
3
+ import os
4
+ from io import BytesIO
5
+ from pathlib import Path
6
+ from typing import Optional, Callable, List, Union, Dict
7
+
8
+ from lollms_client.lollms_discussion import LollmsDiscussion, LollmsMessage
9
+ from lollms_client.lollms_llm_binding import LollmsLLMBinding
10
+ from lollms_client.lollms_types import MSG_TYPE
11
+ from ascii_colors import ASCIIColors, trace_exception
12
+
13
+ import pipmaster as pm
14
+
15
+ # Ensure the required packages are installed
16
+ pm.ensure_packages(["google-generativeai", "pillow", "tiktoken", "protobuf"])
17
+
18
+ import google.generativeai as genai
19
+ from PIL import Image, ImageDraw # ImageDraw is used in the test script below
20
+ import tiktoken
21
+
22
+ BindingName = "GeminiBinding"
23
+
24
+ # Helper to check if a string is a valid path to an image
25
+ def is_image_path(path_str: str) -> bool:
26
+ try:
27
+ p = Path(path_str)
28
+ return p.is_file() and p.suffix.lower() in ['.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp']
29
+ except Exception:
30
+ return False
31
+
32
+ class GeminiBinding(LollmsLLMBinding):
33
+ """Google Gemini-specific binding implementation."""
34
+
35
+ def __init__(self,
36
+ host_address: str = None, # Ignored, for compatibility
37
+ model_name: str = "gemini-1.5-pro-latest",
38
+ service_key: str = None,
39
+ verify_ssl_certificate: bool = True, # Ignored, for compatibility
40
+ **kwargs
41
+ ):
42
+ """
43
+ Initialize the Gemini binding.
44
+
45
+ Args:
46
+ model_name (str): Name of the Gemini model to use.
47
+ service_key (str): Google AI Studio API key.
48
+ """
49
+ super().__init__(binding_name=BindingName)
50
+ self.model_name = model_name
51
+ self.service_key = service_key
52
+
53
+ if not self.service_key:
54
+ self.service_key = os.getenv("GOOGLE_API_KEY")
55
+
56
+ if not self.service_key:
57
+ raise ValueError("Google API key is required. Please set it via the 'service_key' parameter or the GOOGLE_API_KEY environment variable.")
58
+
59
+ try:
60
+ genai.configure(api_key=self.service_key)
61
+ self.client = genai # Alias for consistency
62
+ except Exception as e:
63
+ ASCIIColors.error(f"Failed to configure Gemini client: {e}")
64
+ self.client = None
65
+ raise ConnectionError(f"Could not configure Gemini client: {e}") from e
66
+
67
+ def get_generation_config(self,
68
+ temperature: float,
69
+ top_p: float,
70
+ top_k: int,
71
+ n_predict: int) -> genai.types.GenerationConfig:
72
+ """Builds a GenerationConfig object from parameters."""
73
+ config = {}
74
+ if temperature is not None: config['temperature'] = float(temperature)
75
+ if top_p is not None: config['top_p'] = top_p
76
+ if top_k is not None: config['top_k'] = top_k
77
+ if n_predict is not None: config['max_output_tokens'] = n_predict
78
+ return genai.types.GenerationConfig(**config)
79
+
80
+ def generate_text(self,
81
+ prompt: str,
82
+ images: Optional[List[str]] = None,
83
+ system_prompt: str = "",
84
+ n_predict: Optional[int] = 2048,
85
+ stream: Optional[bool] = False,
86
+ temperature: float = 0.7,
87
+ top_k: int = 40,
88
+ top_p: float = 0.9,
89
+ repeat_penalty: float = 1.1, # Not directly supported by Gemini API
90
+ repeat_last_n: int = 64, # Not directly supported
91
+ seed: Optional[int] = None, # Not directly supported
92
+ n_threads: Optional[int] = None, # Not applicable
93
+ ctx_size: int | None = None, # Determined by model, not settable per-call
94
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None,
95
+ split:Optional[bool]=False,
96
+ user_keyword:Optional[str]="!@>user:",
97
+ ai_keyword:Optional[str]="!@>assistant:",
98
+ ) -> Union[str, dict]:
99
+ """
100
+ Generate text using the Gemini model.
101
+
102
+ Args:
103
+ prompt (str): The input prompt for text generation.
104
+ images (Optional[List[str]]): List of image file paths or base64 strings.
105
+ system_prompt (str): The system prompt to guide the model.
106
+ ... other LollmsLLMBinding parameters ...
107
+
108
+ Returns:
109
+ Union[str, dict]: Generated text or error dictionary.
110
+ """
111
+ if not self.client:
112
+ return {"status": False, "error": "Gemini client not initialized."}
113
+
114
+ # Gemini uses 'system_instruction' for GenerativeModel, not part of the regular message list.
115
+ model = self.client.GenerativeModel(
116
+ model_name=self.model_name,
117
+ system_instruction=system_prompt if system_prompt else None
118
+ )
119
+
120
+ generation_config = self.get_generation_config(temperature, top_p, top_k, n_predict)
121
+
122
+ # Prepare content for the API call
123
+ content_parts = []
124
+ if split:
125
+ # Note: The 'split' logic for Gemini should ideally build a multi-turn history,
126
+ # but for `generate_text`, we'll treat the last user part as the main prompt.
127
+ discussion_messages = self.split_discussion(prompt, user_keyword, ai_keyword)
128
+ if discussion_messages:
129
+ last_message = discussion_messages[-1]['content']
130
+ content_parts.append(last_message)
131
+ else:
132
+ content_parts.append(prompt)
133
+ else:
134
+ content_parts.append(prompt)
135
+
136
+ if images:
137
+ for image_data in images:
138
+ try:
139
+ if is_image_path(image_data):
140
+ img = Image.open(image_data)
141
+ else: # Assume base64
142
+ img = Image.open(BytesIO(base64.b64decode(image_data)))
143
+ content_parts.append(img)
144
+ except Exception as e:
145
+ error_msg = f"Failed to process image: {e}"
146
+ ASCIIColors.error(error_msg)
147
+ return {"status": False, "error": error_msg}
148
+
149
+ full_response_text = ""
150
+ try:
151
+ response = model.generate_content(
152
+ contents=content_parts,
153
+ generation_config=generation_config,
154
+ stream=stream
155
+ )
156
+
157
+ if stream:
158
+ for chunk in response:
159
+ try:
160
+ chunk_text = chunk.text
161
+ except ValueError:
162
+ # Handle potential empty parts in the stream
163
+ chunk_text = ""
164
+
165
+ if chunk_text:
166
+ full_response_text += chunk_text
167
+ if streaming_callback:
168
+ if not streaming_callback(chunk_text, MSG_TYPE.MSG_TYPE_CHUNK):
169
+ break # Callback requested stop
170
+ return full_response_text
171
+ else:
172
+ # Check for safety blocks
173
+ if response.prompt_feedback.block_reason:
174
+ error_msg = f"Content blocked due to: {response.prompt_feedback.block_reason.name}"
175
+ ASCIIColors.warning(error_msg)
176
+ return {"status": False, "error": error_msg}
177
+ return response.text
178
+
179
+ except Exception as ex:
180
+ error_message = f"An unexpected error occurred with Gemini API: {str(ex)}"
181
+ trace_exception(ex)
182
+ return {"status": False, "error": error_message}
183
+
184
+ def chat(self,
185
+ discussion: LollmsDiscussion,
186
+ branch_tip_id: Optional[str] = None,
187
+ n_predict: Optional[int] = 2048,
188
+ stream: Optional[bool] = False,
189
+ temperature: float = 0.7,
190
+ top_k: int = 40,
191
+ top_p: float = 0.9,
192
+ repeat_penalty: float = 1.1,
193
+ repeat_last_n: int = 64,
194
+ seed: Optional[int] = None,
195
+ n_threads: Optional[int] = None,
196
+ ctx_size: Optional[int] = None,
197
+ streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None
198
+ ) -> Union[str, dict]:
199
+ """
200
+ Conduct a chat session with the Gemini model using a LollmsDiscussion object.
201
+ """
202
+ if not self.client:
203
+ return {"status": "error", "message": "Gemini client not initialized."}
204
+
205
+ # 1. Manually export discussion to Gemini's format.
206
+ # Gemini uses 'user' and 'model' roles.
207
+ # The system prompt is handled separately at model initialization.
208
+ system_prompt = discussion.system_prompt
209
+ messages = discussion.get_messages(branch_tip_id)
210
+
211
+ history = []
212
+ for msg in messages:
213
+ role = 'user' if msg.sender_type == "user" else 'assistant'
214
+
215
+ # Handle multimodal content in the message
216
+ content_parts = []
217
+ if msg.content:
218
+ content_parts.append(msg.content)
219
+
220
+ # Check for images associated with this message
221
+ if msg.images:
222
+ for file_path in msg.images:
223
+ if is_image_path(file_path):
224
+ try:
225
+ content_parts.append(Image.open(file_path))
226
+ except Exception as e:
227
+ ASCIIColors.warning(f"Could not load image {file_path}: {e}")
228
+
229
+ if content_parts:
230
+ history.append({'role': role, 'parts': content_parts})
231
+
232
+ model = self.client.GenerativeModel(
233
+ model_name=self.model_name,
234
+ system_instruction=system_prompt
235
+ )
236
+
237
+ # History must not be empty and should not contain consecutive roles of the same type.
238
+ # We also need to separate the final prompt from the history.
239
+ if not history:
240
+ return {"status": "error", "message": "Cannot start chat with an empty discussion."}
241
+
242
+ chat_history = history[:-1] if len(history) > 1 else []
243
+ last_prompt_parts = history[-1]['parts']
244
+
245
+ # Ensure history is valid (no consecutive same roles)
246
+ valid_history = []
247
+ if chat_history:
248
+ valid_history.append(chat_history[0])
249
+ for i in range(1, len(chat_history)):
250
+ if chat_history[i]['role'] != chat_history[i-1]['role']:
251
+ valid_history.append(chat_history[i])
252
+
253
+ chat_session = model.start_chat(history=valid_history)
254
+
255
+ generation_config = self.get_generation_config(temperature, top_p, top_k, n_predict)
256
+
257
+ full_response_text = ""
258
+ try:
259
+ response = chat_session.send_message(
260
+ content=last_prompt_parts,
261
+ generation_config=generation_config,
262
+ stream=stream
263
+ )
264
+
265
+ if stream:
266
+ for chunk in response:
267
+ try:
268
+ chunk_text = chunk.text
269
+ except ValueError:
270
+ chunk_text = ""
271
+
272
+ if chunk_text:
273
+ full_response_text += chunk_text
274
+ if streaming_callback:
275
+ if not streaming_callback(chunk_text, MSG_TYPE.MSG_TYPE_CHUNK):
276
+ break
277
+ return full_response_text
278
+ else:
279
+ if response.prompt_feedback.block_reason:
280
+ error_msg = f"Content blocked due to: {response.prompt_feedback.block_reason.name}"
281
+ ASCIIColors.warning(error_msg)
282
+ return {"status": "error", "message": error_msg}
283
+ return response.text
284
+
285
+ except Exception as ex:
286
+ error_message = f"An unexpected error occurred with Gemini API: {str(ex)}"
287
+ trace_exception(ex)
288
+ return {"status": "error", "message": error_message}
289
+
290
+ def tokenize(self, text: str) -> list:
291
+ """
292
+ Tokenize the input text.
293
+ Note: Gemini doesn't expose a public tokenizer API.
294
+ Using tiktoken for a rough estimate, NOT accurate for Gemini.
295
+ """
296
+ try:
297
+ encoding = tiktoken.get_encoding("cl100k_base")
298
+ return encoding.encode(text)
299
+ except:
300
+ return list(text.encode('utf-8'))
301
+
302
+ def detokenize(self, tokens: list) -> str:
303
+ """
304
+ Detokenize a list of tokens.
305
+ Note: Based on the placeholder tokenizer.
306
+ """
307
+ try:
308
+ encoding = tiktoken.get_encoding("cl100k_base")
309
+ return encoding.decode(tokens)
310
+ except:
311
+ return bytes(tokens).decode('utf-8', errors='ignore')
312
+
313
+ def count_tokens(self, text: str) -> int:
314
+ """
315
+ Count tokens from a text using the Gemini API.
316
+ """
317
+ if not self.client or not self.model_name:
318
+ ASCIIColors.warning("Cannot count tokens, Gemini client or model_name not set.")
319
+ return -1
320
+ try:
321
+ model = self.client.GenerativeModel(self.model_name)
322
+ return model.count_tokens(text).total_tokens
323
+ except Exception as e:
324
+ ASCIIColors.error(f"Failed to count tokens with Gemini API: {e}")
325
+ # Fallback to tiktoken for a rough estimate
326
+ return len(self.tokenize(text))
327
+
328
+ def embed(self, text: str, **kwargs) -> List[float]:
329
+ """
330
+ Get embeddings for the input text using Gemini API.
331
+ """
332
+ if not self.client:
333
+ raise Exception("Gemini client not initialized.")
334
+
335
+ # Default to a known Gemini embedding model
336
+ model_to_use = kwargs.get("model", "models/embedding-001")
337
+
338
+ try:
339
+ response = self.client.embed_content(
340
+ model=model_to_use,
341
+ content=text,
342
+ task_type="retrieval_document" # or "semantic_similarity", etc.
343
+ )
344
+ return response['embedding']
345
+ except Exception as ex:
346
+ trace_exception(ex)
347
+ raise Exception(f"Gemini embedding failed: {str(ex)}") from ex
348
+
349
+ def get_model_info(self) -> dict:
350
+ """Return information about the current Gemini model setup."""
351
+ return {
352
+ "name": self.binding_name,
353
+ "version": genai.__version__,
354
+ "host_address": "https://generativelanguage.googleapis.com",
355
+ "model_name": self.model_name,
356
+ "supports_structured_output": False,
357
+ "supports_vision": "vision" in self.model_name or "gemini-1.5" in self.model_name,
358
+ }
359
+
360
+ def listModels(self) -> List[Dict[str, str]]:
361
+ """Lists available generative models from the Gemini service."""
362
+ if not self.client:
363
+ ASCIIColors.error("Gemini client not initialized. Cannot list models.")
364
+ return []
365
+ try:
366
+ ASCIIColors.debug("Listing Gemini models...")
367
+ model_info_list = []
368
+ for m in self.client.list_models():
369
+ # We are interested in models that can generate content.
370
+ if 'generateContent' in m.supported_generation_methods:
371
+ model_info_list.append({
372
+ 'model_name': m.name,
373
+ 'display_name': m.display_name,
374
+ 'description': m.description,
375
+ 'owned_by': 'Google'
376
+ })
377
+ return model_info_list
378
+ except Exception as ex:
379
+ trace_exception(ex)
380
+ return []
381
+
382
+ def load_model(self, model_name: str) -> bool:
383
+ """Set the model name for subsequent operations."""
384
+ self.model_name = model_name
385
+ ASCIIColors.info(f"Gemini model set to: {model_name}. It will be used on the next API call.")
386
+ return True
387
+
388
+ if __name__ == '__main__':
389
+ # Example Usage (requires GOOGLE_API_KEY environment variable)
390
+ if 'GOOGLE_API_KEY' not in os.environ:
391
+ ASCIIColors.red("Error: GOOGLE_API_KEY environment variable not set.")
392
+ print("Please get your key from Google AI Studio and set it.")
393
+ exit(1)
394
+
395
+ ASCIIColors.yellow("--- Testing GeminiBinding ---")
396
+
397
+ # --- Configuration ---
398
+ test_model_name = "gemini-1.5-pro-latest"
399
+ test_vision_model_name = "gemini-1.5-pro-latest" # or gemini-pro-vision
400
+ test_embedding_model = "models/embedding-001"
401
+
402
+ # This variable is global to the script's execution
403
+ full_streamed_text = ""
404
+
405
+ try:
406
+ # --- Initialization ---
407
+ ASCIIColors.cyan("\n--- Initializing Binding ---")
408
+ binding = GeminiBinding(model_name=test_model_name)
409
+ ASCIIColors.green("Binding initialized successfully.")
410
+ ASCIIColors.info(f"Using google-generativeai version: {genai.__version__}")
411
+
412
+ # --- List Models ---
413
+ ASCIIColors.cyan("\n--- Listing Models ---")
414
+ models = binding.listModels()
415
+ if models:
416
+ ASCIIColors.green(f"Found {len(models)} generative models. First 5:")
417
+ for m in models[:5]:
418
+ print(m['model_name'])
419
+ else:
420
+ ASCIIColors.warning("No models found or failed to list models.")
421
+
422
+ # --- Count Tokens ---
423
+ ASCIIColors.cyan("\n--- Counting Tokens ---")
424
+ sample_text = "Hello, world! This is a test."
425
+ token_count = binding.count_tokens(sample_text)
426
+ ASCIIColors.green(f"Token count for '{sample_text}': {token_count}")
427
+
428
+ # --- Text Generation (Non-Streaming) ---
429
+ ASCIIColors.cyan("\n--- Text Generation (Non-Streaming) ---")
430
+ prompt_text = "Explain the importance of bees in one paragraph."
431
+ ASCIIColors.info(f"Prompt: {prompt_text}")
432
+ generated_text = binding.generate_text(prompt_text, n_predict=100, stream=False)
433
+ if isinstance(generated_text, str):
434
+ ASCIIColors.green(f"Generated text:\n{generated_text}")
435
+ else:
436
+ ASCIIColors.error(f"Generation failed: {generated_text}")
437
+
438
+ # --- Text Generation (Streaming) ---
439
+ ASCIIColors.cyan("\n--- Text Generation (Streaming) ---")
440
+
441
+ def stream_callback(chunk: str, msg_type: int):
442
+ # FIX: Use 'global' to modify the variable in the module's scope
443
+ global full_streamed_text
444
+ ASCIIColors.green(chunk, end="", flush=True)
445
+ full_streamed_text += chunk
446
+ return True
447
+
448
+ # Reset for this test
449
+ full_streamed_text = ""
450
+ ASCIIColors.info(f"Prompt: {prompt_text}")
451
+ result = binding.generate_text(prompt_text, n_predict=150, stream=True, streaming_callback=stream_callback)
452
+ print("\n--- End of Stream ---")
453
+ # 'result' is the full text after streaming, which should match our captured text.
454
+ ASCIIColors.green(f"Full streamed text (for verification): {result}")
455
+
456
+ # --- Embeddings ---
457
+ ASCIIColors.cyan("\n--- Embeddings ---")
458
+ try:
459
+ embedding_text = "Lollms is a cool project."
460
+ embedding_vector = binding.embed(embedding_text, model=test_embedding_model)
461
+ ASCIIColors.green(f"Embedding for '{embedding_text}' (first 5 dims): {embedding_vector[:5]}...")
462
+ ASCIIColors.info(f"Embedding vector dimension: {len(embedding_vector)}")
463
+ except Exception as e:
464
+ ASCIIColors.warning(f"Could not get embedding: {e}")
465
+
466
+ # --- Vision Model Test ---
467
+ dummy_image_path = "gemini_dummy_test_image.png"
468
+ try:
469
+ img = Image.new('RGB', (200, 50), color = ('blue'))
470
+ d = ImageDraw.Draw(img)
471
+ d.text((10,10), "Test Image", fill=('yellow'))
472
+ img.save(dummy_image_path)
473
+ ASCIIColors.info(f"Created dummy image: {dummy_image_path}")
474
+
475
+ ASCIIColors.cyan(f"\n--- Vision Generation (using {test_vision_model_name}) ---")
476
+ binding.load_model(test_vision_model_name)
477
+ vision_prompt = "What color is the text and what does it say?"
478
+ ASCIIColors.info(f"Vision Prompt: {vision_prompt} with image {dummy_image_path}")
479
+
480
+ vision_response = binding.generate_text(
481
+ prompt=vision_prompt,
482
+ images=[dummy_image_path],
483
+ n_predict=50,
484
+ stream=False
485
+ )
486
+ if isinstance(vision_response, str):
487
+ ASCIIColors.green(f"Vision model response: {vision_response}")
488
+ else:
489
+ ASCIIColors.error(f"Vision generation failed: {vision_response}")
490
+ except Exception as e:
491
+ ASCIIColors.error(f"Error during vision test: {e}")
492
+ trace_exception(e)
493
+ finally:
494
+ if os.path.exists(dummy_image_path):
495
+ os.remove(dummy_image_path)
496
+
497
+ except Exception as e:
498
+ ASCIIColors.error(f"An error occurred during testing: {e}")
499
+ trace_exception(e)
500
+
501
+ ASCIIColors.yellow("\nGeminiBinding test finished.")
@@ -0,0 +1,201 @@
1
+ # bindings/LiteLLM/binding.py
2
+ import requests
3
+ import json
4
+ from lollms_client.lollms_llm_binding import LollmsLLMBinding
5
+ from lollms_client.lollms_types import MSG_TYPE
6
+ from lollms_client.lollms_discussion import LollmsDiscussion
7
+ from lollms_client.lollms_utilities import encode_image
8
+ from typing import Optional, Callable, List, Union, Dict
9
+ from ascii_colors import ASCIIColors, trace_exception
10
+
11
+ # Use pipmaster to ensure required packages are installed
12
+ try:
13
+ import pipmaster as pm
14
+ except ImportError:
15
+ print("Pipmaster not found. Please install it using 'pip install pipmaster'")
16
+ raise
17
+
18
+ # Ensure requests and tiktoken are installed
19
+ pm.ensure_packages(["requests", "tiktoken"])
20
+
21
+ import tiktoken
22
+
23
+ BindingName = "LiteLLMBinding"
24
+
25
+ def get_icon_path(model_name: str) -> str:
26
+ model_name = model_name.lower()
27
+ if 'gpt' in model_name: return '/bindings/openai/logo.png'
28
+ if 'mistral' in model_name or 'mixtral' in model_name: return '/bindings/mistral/logo.png'
29
+ if 'claude' in model_name: return '/bindings/anthropic/logo.png'
30
+ return '/bindings/litellm/logo.png'
31
+
32
+ class LiteLLMBinding(LollmsLLMBinding):
33
+ """
34
+ A binding for the LiteLLM proxy using direct HTTP requests.
35
+ This version includes detailed logging, a fallback for listing models,
36
+ and correct payload formatting for both streaming and non-streaming modes.
37
+ """
38
+
39
+ def __init__(self, host_address: str, model_name: str, service_key: str = "anything", verify_ssl_certificate: bool = True, **kwargs):
40
+ super().__init__(binding_name="litellm")
41
+ self.host_address = host_address.rstrip('/')
42
+ self.model_name = model_name
43
+ self.service_key = service_key
44
+ self.verify_ssl_certificate = verify_ssl_certificate
45
+
46
+ def _perform_generation(self, messages: List[Dict], n_predict: Optional[int], stream: bool, temperature: float, top_p: float, repeat_penalty: float, seed: Optional[int], streaming_callback: Optional[Callable[[str, MSG_TYPE], None]]) -> Union[str, dict]:
47
+ url = f'{self.host_address}/v1/chat/completions'
48
+ headers = {'Content-Type': 'application/json', 'Authorization': f'Bearer {self.service_key}'}
49
+ payload = {
50
+ "model": self.model_name, "messages": messages, "max_tokens": n_predict,
51
+ "temperature": temperature, "top_p": top_p, "frequency_penalty": repeat_penalty,
52
+ "stream": stream
53
+ }
54
+ if seed is not None: payload["seed"] = seed
55
+
56
+ payload = {k: v for k, v in payload.items() if v is not None}
57
+ output = ""
58
+ try:
59
+ response = requests.post(url, headers=headers, data=json.dumps(payload), stream=stream, verify=self.verify_ssl_certificate)
60
+ response.raise_for_status()
61
+
62
+ if stream:
63
+ for line in response.iter_lines():
64
+ if line:
65
+ decoded_line = line.decode('utf-8')
66
+ if decoded_line.startswith('data: '):
67
+ if '[DONE]' in decoded_line: break
68
+ json_data_string = decoded_line[6:]
69
+ try:
70
+ chunk_data = json.loads(json_data_string)
71
+ delta = chunk_data.get('choices', [{}])[0].get('delta', {})
72
+ if 'content' in delta and delta['content'] is not None:
73
+ word = delta['content']
74
+ if streaming_callback and not streaming_callback(word, MSG_TYPE.MSG_TYPE_CHUNK):
75
+ return output
76
+ output += word
77
+ except json.JSONDecodeError: continue
78
+ else:
79
+ full_response = response.json()
80
+ output = full_response['choices'][0]['message']['content']
81
+ if streaming_callback:
82
+ streaming_callback(output, MSG_TYPE.MSG_TYPE_CHUNK)
83
+ except Exception as e:
84
+ error_message = f"An error occurred: {e}\nResponse: {response.text if 'response' in locals() else 'No response'}"
85
+ trace_exception(e)
86
+ if streaming_callback: streaming_callback(error_message, MSG_TYPE.MSG_TYPE_EXCEPTION)
87
+ return {"status": "error", "message": error_message}
88
+ return output
89
+
90
+ def generate_text(self, prompt: str, images: Optional[List[str]] = None, system_prompt: str = "", n_predict: Optional[int] = None, stream: Optional[bool] = None, temperature: float = 0.7, top_p: float = 0.9, repeat_penalty: float = 1.1, seed: Optional[int] = None, streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None, **kwargs) -> Union[str, dict]:
91
+ """Generates text from a prompt, correctly formatting for text-only and multi-modal cases."""
92
+ is_streaming = stream if stream is not None else (streaming_callback is not None)
93
+
94
+ messages = []
95
+ if system_prompt:
96
+ messages.append({"role": "system", "content": system_prompt})
97
+
98
+ # --- THIS IS THE CRITICAL FIX ---
99
+ if images:
100
+ # If images are present, use the multi-modal list format for content
101
+ user_content = [{"type": "text", "text": prompt}]
102
+ for image_path in images:
103
+ base64_image = encode_image(image_path)
104
+ user_content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}})
105
+ messages.append({"role": "user", "content": user_content})
106
+ else:
107
+ # If no images, use a simple string for content to avoid the API error
108
+ messages.append({"role": "user", "content": prompt})
109
+ # --- END OF FIX ---
110
+
111
+ return self._perform_generation(messages, n_predict, is_streaming, temperature, top_p, repeat_penalty, seed, streaming_callback)
112
+
113
+ def chat(self, discussion: LollmsDiscussion, branch_tip_id: Optional[str] = None, n_predict: Optional[int] = None, stream: Optional[bool] = None, temperature: float = 0.7, top_p: float = 0.9, repeat_penalty: float = 1.1, seed: Optional[int] = None, streaming_callback: Optional[Callable[[str, MSG_TYPE], None]] = None, **kwargs) -> Union[str, dict]:
114
+ is_streaming = stream if stream is not None else (streaming_callback is not None)
115
+ messages = discussion.export("openai_chat", branch_tip_id)
116
+ return self._perform_generation(messages, n_predict, is_streaming, temperature, top_p, repeat_penalty, seed, streaming_callback)
117
+
118
+ def embed(self, text: str, **kwargs) -> List[float]:
119
+ url = f'{self.host_address}/v1/embeddings'
120
+ headers = {'Content-Type': 'application/json', 'Authorization': f'Bearer {self.service_key}'}
121
+ payload = {"model": self.model_name, "input": text}
122
+ try:
123
+ response = requests.post(url, headers=headers, data=json.dumps(payload), verify=self.verify_ssl_certificate)
124
+ response.raise_for_status()
125
+ return response.json()['data'][0]['embedding']
126
+ except Exception as e:
127
+ trace_exception(e)
128
+ return []
129
+
130
+ def tokenize(self, text: str) -> list:
131
+ return tiktoken.model.encoding_for_model("gpt-3.5-turbo").encode(text)
132
+
133
+ def detokenize(self, tokens: list) -> str:
134
+ return tiktoken.model.encoding_for_model("gpt-3.5-turbo").decode(tokens)
135
+
136
+ def count_tokens(self, text: str) -> int:
137
+ return len(self.tokenize(text))
138
+
139
+ def _list_models_openai_fallback(self) -> List[Dict]:
140
+ ASCIIColors.warning("--- [LiteLLM Binding] Falling back to /v1/models endpoint. Rich metadata will be unavailable.")
141
+ url = f'{self.host_address}/v1/models'
142
+ headers = {'Authorization': f'Bearer {self.service_key}'}
143
+ entries = []
144
+ try:
145
+ response = requests.get(url, headers=headers, verify=self.verify_ssl_certificate)
146
+ response.raise_for_status()
147
+ models_data = response.json().get('data', [])
148
+ for model in models_data:
149
+ model_name = model.get('id')
150
+ entries.append({
151
+ "category": "api", "datasets": "unknown", "icon": get_icon_path(model_name),
152
+ "license": "unknown", "model_creator": model.get('owned_by', 'unknown'),
153
+ "name": model_name, "provider": "litellm", "rank": "1.0", "type": "api",
154
+ "variants": [{"name": model_name, "size": -1}]
155
+ })
156
+ except Exception as e:
157
+ ASCIIColors.error(f"--- [LiteLLM Binding] Fallback method failed: {e}")
158
+ return entries
159
+
160
+ def listModels(self) -> List[Dict]:
161
+ url = f'{self.host_address}/model/info'
162
+ headers = {'Authorization': f'Bearer {self.service_key}'}
163
+ entries = []
164
+ ASCIIColors.yellow(f"--- [LiteLLM Binding] Attempting to list models from: {url}")
165
+ try:
166
+ response = requests.get(url, headers=headers, verify=self.verify_ssl_certificate)
167
+ if response.status_code == 404:
168
+ ASCIIColors.warning("--- [LiteLLM Binding] /model/info endpoint not found (404).")
169
+ return self._list_models_openai_fallback()
170
+ response.raise_for_status()
171
+ models_data = response.json().get('data', [])
172
+ ASCIIColors.info(f"--- [LiteLLM Binding] Successfully parsed {len(models_data)} models from primary endpoint.")
173
+ for model in models_data:
174
+ model_name = model.get('model_name')
175
+ if not model_name: continue
176
+ model_info = model.get('model_info', {})
177
+ context_size = model_info.get('max_tokens', model_info.get('max_input_tokens', 4096))
178
+ entries.append({
179
+ "category": "api", "datasets": "unknown", "icon": get_icon_path(model_name),
180
+ "license": "unknown", "model_creator": model_info.get('owned_by', 'unknown'),
181
+ "name": model_name, "provider": "litellm", "rank": "1.0", "type": "api",
182
+ "variants": [{
183
+ "name": model_name, "size": context_size,
184
+ "input_cost_per_token": model_info.get('input_cost_per_token', 0),
185
+ "output_cost_per_token": model_info.get('output_cost_per_token', 0),
186
+ "max_output_tokens": model_info.get('max_output_tokens', 0),
187
+ }]
188
+ })
189
+ except requests.exceptions.RequestException as e:
190
+ ASCIIColors.error(f"--- [LiteLLM Binding] Network error when trying to list models: {e}")
191
+ if "404" in str(e): return self._list_models_openai_fallback()
192
+ except Exception as e:
193
+ ASCIIColors.error(f"--- [LiteLLM Binding] An unexpected error occurred while listing models: {e}")
194
+ return entries
195
+
196
+ def get_model_info(self) -> dict:
197
+ return {"name": "LiteLLM", "host_address": self.host_address, "model_name": self.model_name}
198
+
199
+ def load_model(self, model_name: str) -> bool:
200
+ self.model_name = model_name
201
+ return True
@@ -1562,7 +1562,7 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
1562
1562
  else:
1563
1563
  _substitute_code_uuids_recursive(item, code_store)
1564
1564
 
1565
- discovery_step_id = log_event("Discovering tools",MSG_TYPE.MSG_TYPE_STEP_START)
1565
+ discovery_step_id = log_event("**Discovering tools**",MSG_TYPE.MSG_TYPE_STEP_START)
1566
1566
  # --- 1. Discover Available Tools ---
1567
1567
  available_tools = []
1568
1568
  if use_mcps and self.mcp:
@@ -1595,12 +1595,12 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
1595
1595
  formatted_tools_list += "\n**request_clarification**:\nUse if the user's request is ambiguous and you can not infer a clear idea of his intent. this tool has no parameters."
1596
1596
  formatted_tools_list += "\n**final_answer**:\nUse when you are ready to respond to the user. this tool has no parameters."
1597
1597
 
1598
- if discovery_step_id: log_event("Discovering tools",MSG_TYPE.MSG_TYPE_STEP_END, event_id=discovery_step_id)
1598
+ if discovery_step_id: log_event("**Discovering tools**",MSG_TYPE.MSG_TYPE_STEP_END, event_id=discovery_step_id)
1599
1599
 
1600
1600
  # --- 2. Dynamic Reasoning Loop ---
1601
1601
  for i in range(max_reasoning_steps):
1602
1602
  try:
1603
- reasoning_step_id = log_event(f"Reasoning Step {i+1}/{max_reasoning_steps}", MSG_TYPE.MSG_TYPE_STEP_START)
1603
+ reasoning_step_id = log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**", MSG_TYPE.MSG_TYPE_STEP_START)
1604
1604
  user_context = f'Original User Request: "{original_user_prompt}"'
1605
1605
  if images: user_context += f'\n(Note: {len(images)} image(s) were provided with this request.)'
1606
1606
 
@@ -1636,6 +1636,9 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
1636
1636
  system_prompt=reasoning_system_prompt, temperature=decision_temperature,
1637
1637
  images=images if i == 0 else None
1638
1638
  )
1639
+ if structured_action_response is None:
1640
+ log_event("**Error generating thought.** Retrying..", MSG_TYPE.MSG_TYPE_EXCEPTION)
1641
+ continue
1639
1642
  if debug: log_prompt(structured_action_response, f"RAW REASONING RESPONSE (Step {i+1})")
1640
1643
 
1641
1644
  try:
@@ -1651,11 +1654,11 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
1651
1654
  except (json.JSONDecodeError, TypeError) as e:
1652
1655
  current_scratchpad += f"\n\n### Step {i+1} Failure\n- **Error:** Failed to generate a valid JSON action: {e}"
1653
1656
  log_event(f"Step Failure: Invalid JSON action.", MSG_TYPE.MSG_TYPE_EXCEPTION, metadata={"details": str(e)})
1654
- if reasoning_step_id: log_event(f"Reasoning Step {i+1}/{max_reasoning_steps}", MSG_TYPE.MSG_TYPE_STEP_END, metadata={"error": str(e)}, event_id=reasoning_step_id)
1657
+ if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**", MSG_TYPE.MSG_TYPE_STEP_END, metadata={"error": str(e)}, event_id=reasoning_step_id)
1655
1658
 
1656
1659
 
1657
1660
  current_scratchpad += f"\n\n### Step {i+1}: Thought\n{thought}"
1658
- log_event(f"Thought: {thought}", MSG_TYPE.MSG_TYPE_THOUGHT_CONTENT)
1661
+ log_event(f"**Thought**: {thought}", MSG_TYPE.MSG_TYPE_THOUGHT_CONTENT)
1659
1662
 
1660
1663
  if not tool_name:
1661
1664
  # Handle error...
@@ -1668,8 +1671,8 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
1668
1671
 
1669
1672
  if tool_name == "final_answer":
1670
1673
  current_scratchpad += f"\n\n### Step {i+1}: Action\n- **Action:** Decided to formulate the final answer."
1671
- log_event("Action: Formulate final answer.", MSG_TYPE.MSG_TYPE_THOUGHT_CHUNK)
1672
- if reasoning_step_id: log_event(f"Reasoning Step {i+1}/{max_reasoning_steps}",MSG_TYPE.MSG_TYPE_STEP_END, event_id=reasoning_step_id)
1674
+ log_event("**Action**: Formulate final answer.", MSG_TYPE.MSG_TYPE_THOUGHT_CHUNK)
1675
+ if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**",MSG_TYPE.MSG_TYPE_STEP_END, event_id=reasoning_step_id)
1673
1676
  break
1674
1677
 
1675
1678
  # --- Handle the `put_code_in_buffer` tool specifically ---
@@ -1688,9 +1691,9 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
1688
1691
  tool_calls_this_turn.append({"name": "put_code_in_buffer", "params": tool_params, "result": tool_result})
1689
1692
  observation_text = f"```json\n{json.dumps(tool_result, indent=2)}\n```"
1690
1693
  current_scratchpad += f"\n\n### Step {i+1}: Observation\n- **Action:** Called `{tool_name}`\n- **Result:**\n{observation_text}"
1691
- log_event(f"Observation: Code generated with ID: {code_uuid}", MSG_TYPE.MSG_TYPE_OBSERVATION)
1694
+ log_event(f"**Observation**:Code generated with ID: {code_uuid}", MSG_TYPE.MSG_TYPE_OBSERVATION)
1692
1695
  if code_gen_id: log_event(f"Generating code...", MSG_TYPE.MSG_TYPE_TOOL_CALL, metadata={"id": code_gen_id, "result": tool_result})
1693
- if reasoning_step_id: log_event(f"Reasoning Step {i+1}/{max_reasoning_steps}", MSG_TYPE.MSG_TYPE_STEP_END, event_id= reasoning_step_id)
1696
+ if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**", MSG_TYPE.MSG_TYPE_STEP_END, event_id= reasoning_step_id)
1694
1697
  continue # Go to the next reasoning step immediately
1695
1698
  if tool_name == 'refactor_scratchpad':
1696
1699
  scratchpad_cleaning_prompt = f"""Enhance this scratchpad content to be more organized and comprehensive. Keep relevant experience information and remove any useless redundancies. Try to log learned things from the context so that you won't make the same mistakes again. Do not remove the main objective information or any crucial information that may be useful for the next iterations. Answer directly with the new scratchpad content without any comments.
@@ -1698,13 +1701,13 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
1698
1701
  {current_scratchpad}
1699
1702
  --- END OF SCRATCHPAD ---"""
1700
1703
  current_scratchpad = self.generate_text(scratchpad_cleaning_prompt)
1701
- log_event(f"New scratchpad:\n{current_scratchpad}")
1704
+ log_event(f"**New scratchpad**:\n{current_scratchpad}", MSG_TYPE.MSG_TYPE_SCRATCHPAD)
1702
1705
 
1703
1706
  # --- Substitute UUIDs and Execute Standard Tools ---
1704
- log_event(f"Calling tool: `{tool_name}` with params:\n{dict_to_markdown(tool_params)}", MSG_TYPE.MSG_TYPE_STEP)
1707
+ log_event(f"**Calling tool**: `{tool_name}` with params:\n{dict_to_markdown(tool_params)}", MSG_TYPE.MSG_TYPE_TOOL_CALL)
1705
1708
  _substitute_code_uuids_recursive(tool_params, generated_code_store)
1706
1709
 
1707
- tool_call_id = log_event(f"Executing tool: {tool_name}",MSG_TYPE.MSG_TYPE_STEP_START, metadata={"name": tool_name, "parameters": tool_params, "id":"executing tool"})
1710
+ tool_call_id = log_event(f"**Executing tool**: {tool_name}",MSG_TYPE.MSG_TYPE_STEP_START, metadata={"name": tool_name, "parameters": tool_params, "id":"executing tool"})
1708
1711
  tool_result = None
1709
1712
  try:
1710
1713
  if tool_name.startswith("research::") and use_data_store:
@@ -1726,7 +1729,7 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
1726
1729
  trace_exception(e)
1727
1730
  tool_result = {"status": "failure", "error": f"Exception executing tool: {str(e)}"}
1728
1731
 
1729
- if tool_call_id: log_event(f"Executing tool: {tool_name}", MSG_TYPE.MSG_TYPE_STEP_END, metadata={"result": tool_result}, event_id= tool_call_id)
1732
+ if tool_call_id: log_event(f"**Executing tool**: {tool_name}", MSG_TYPE.MSG_TYPE_STEP_END, metadata={"result": tool_result}, event_id= tool_call_id)
1730
1733
 
1731
1734
  observation_text = ""
1732
1735
  sanitized_result = {}
@@ -1752,13 +1755,13 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
1752
1755
 
1753
1756
  tool_calls_this_turn.append({"name": tool_name, "params": tool_params, "result": tool_result})
1754
1757
  current_scratchpad += f"\n\n### Step {i+1}: Observation\n- **Action:** Called `{tool_name}`\n- **Result:**\n{observation_text}"
1755
- log_event(f"Observation: Result from `{tool_name}`:\n{dict_to_markdown(sanitized_result)}", MSG_TYPE.MSG_TYPE_OBSERVATION)
1758
+ log_event(f"**Observation**: Result from `{tool_name}`:\n{dict_to_markdown(sanitized_result)}", MSG_TYPE.MSG_TYPE_OBSERVATION)
1756
1759
 
1757
- if reasoning_step_id: log_event(f"Reasoning Step {i+1}/{max_reasoning_steps}", MSG_TYPE.MSG_TYPE_STEP_END, event_id = reasoning_step_id)
1760
+ if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**", MSG_TYPE.MSG_TYPE_STEP_END, event_id = reasoning_step_id)
1758
1761
  except Exception as ex:
1759
1762
  trace_exception(ex)
1760
1763
  current_scratchpad += f"\n\n### Error : {ex}"
1761
- if reasoning_step_id: log_event(f"Reasoning Step {i+1}/{max_reasoning_steps}", MSG_TYPE.MSG_TYPE_STEP_END, event_id = reasoning_step_id)
1764
+ if reasoning_step_id: log_event(f"**Reasoning Step {i+1}/{max_reasoning_steps}**", MSG_TYPE.MSG_TYPE_STEP_END, event_id = reasoning_step_id)
1762
1765
 
1763
1766
  # --- Final Answer Synthesis ---
1764
1767
  synthesis_id = log_event("Synthesizing final answer...", MSG_TYPE.MSG_TYPE_STEP_START)
@@ -1777,6 +1780,17 @@ Provide your response as a single JSON object inside a JSON markdown tag. Use th
1777
1780
 
1778
1781
 
1779
1782
  final_answer_text = self.generate_text(prompt=final_answer_prompt, system_prompt=system_prompt, images=images, stream=streaming_callback is not None, streaming_callback=streaming_callback, temperature=final_answer_temperature, **llm_generation_kwargs)
1783
+ if type(final_answer_text) is dict:
1784
+ if streaming_callback:
1785
+ streaming_callback(final_answer_text["error"], MSG_TYPE.MSG_TYPE_EXCEPTION)
1786
+ return {
1787
+ "final_answer": "",
1788
+ "final_scratchpad": current_scratchpad,
1789
+ "tool_calls": tool_calls_this_turn,
1790
+ "sources": sources_this_turn,
1791
+ "clarification_required": False,
1792
+ "error": final_answer_text["error"]
1793
+ }
1780
1794
  final_answer = self.remove_thinking_blocks(final_answer_text)
1781
1795
  if debug: log_prompt(final_answer_text, "FINAL ANSWER RESPONSE")
1782
1796
 
@@ -423,9 +423,34 @@ class LollmsDiscussion:
423
423
  else:
424
424
  return cls(lollmsClient=lollms_client, discussion_id=kwargs.get('id'), **init_args)
425
425
 
426
- def get_messages(self):
427
- """Returns the list of messages"""
428
- return self._db_discussion.messages
426
+ def get_messages(self, branch_id: Optional[str] = None) -> Union[List[LollmsMessage], Optional[LollmsMessage]]:
427
+ """
428
+ Returns messages from the discussion with branch-aware logic.
429
+
430
+ - If no branch_id is provided, it returns a list of all messages
431
+ in the currently active branch, ordered from root to leaf.
432
+ - If a branch_id is provided, it returns the single message object
433
+ (the "leaf") corresponding to that ID.
434
+
435
+ Args:
436
+ branch_id: The ID of the leaf message. If provided, only this
437
+ message is returned. If None, the full active branch is returned.
438
+
439
+ Returns:
440
+ A list of LollmsMessage objects for the active branch, or a single
441
+ LollmsMessage if a branch_id is specified, or None if the ID is not found.
442
+ """
443
+ if branch_id is None:
444
+ # Case 1: No ID, return the current active branch as a list of messages
445
+ leaf_id = self.active_branch_id
446
+ return self.get_branch(leaf_id)
447
+ else:
448
+ # Case 2: ID provided, return just the single leaf message
449
+ if branch_id in self._message_index:
450
+ return LollmsMessage(self, self._message_index[branch_id])
451
+ else:
452
+ return None
453
+
429
454
 
430
455
  def __getattr__(self, name: str) -> Any:
431
456
  """Proxies attribute getting to the underlying discussion object."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lollms_client
3
- Version: 0.25.1
3
+ Version: 0.25.6
4
4
  Summary: A client library for LoLLMs generate endpoint
5
5
  Author-email: ParisNeo <parisneoai@gmail.com>
6
6
  License: Apache Software License
@@ -169,6 +169,107 @@ except Exception as e:
169
169
  ```
170
170
  For a comprehensive guide on function calling and setting up tools, please refer to the [Usage Guide (DOC_USE.md)](DOC_USE.md).
171
171
 
172
+ ### 🤖 Advanced Agentic Generation with RAG: `generate_with_mcp_rag`
173
+
174
+ For more complex tasks, `generate_with_mcp_rag` provides a powerful, built-in agent that uses a ReAct-style (Reason, Act) loop. This agent can reason about a user's request, use tools (MCP), retrieve information from knowledge bases (RAG), and adapt its plan based on the results of its actions.
175
+
176
+ **Key Agent Capabilities:**
177
+
178
+ * **Observe-Think-Act Loop:** The agent iteratively reviews its progress, thinks about the next logical step, and takes an action (like calling a tool).
179
+ * **Tool Integration (MCP):** Can use any available MCP tools, such as searching the web or executing code.
180
+ * **Retrieval-Augmented Generation (RAG):** You can provide one or more "data stores" (knowledge bases). The agent gains a `research::{store_name}` tool to query these stores for relevant information.
181
+ * **In-Memory Code Generation:** The agent has a special `generate_code` tool. This allows it to first write a piece of code (e.g., a complex Python script) and then pass that code to another tool (e.g., `python_code_interpreter`) in a subsequent step.
182
+ * **Stateful Progress Tracking:** Designed for rich UI experiences, it emits `step_start` and `step_end` events with unique IDs via the streaming callback. This allows an application to track the agent's individual thoughts and long-running tool calls in real-time.
183
+ * **Self-Correction:** Includes a `refactor_scratchpad` tool for the agent to clean up its own thought process if it becomes cluttered.
184
+
185
+ Here is an example of using the agent to answer a question by first performing RAG on a custom knowledge base and then using the retrieved information to generate and execute code.
186
+
187
+ ```python
188
+ import json
189
+ from lollms_client import LollmsClient, MSG_TYPE
190
+ from ascii_colors import ASCIIColors
191
+
192
+ # 1. Define a mock RAG data store and retrieval function
193
+ project_notes = {
194
+ "project_phoenix_details": "Project Phoenix has a current budget of $500,000 and an expected quarterly growth rate of 15%."
195
+ }
196
+
197
+ def retrieve_from_notes(query: str, top_k: int = 1, min_similarity: float = 0.5):
198
+ """A simple keyword-based retriever for our mock data store."""
199
+ results = []
200
+ for key, text in project_notes.items():
201
+ if query.lower() in text.lower():
202
+ results.append({"source": key, "content": text})
203
+ return results[:top_k]
204
+
205
+ # 2. Define a detailed streaming callback to visualize the agent's process
206
+ def agent_streaming_callback(chunk: str, msg_type: MSG_TYPE, params: dict = None, metadata: list = None) -> bool:
207
+ if not params: params = {}
208
+ msg_id = params.get("id", "")
209
+
210
+ if msg_type == MSG_TYPE.MSG_TYPE_STEP_START:
211
+ ASCIIColors.yellow(f"\n>> Agent Step Start [ID: {msg_id}]: {chunk}")
212
+ elif msg_type == MSG_TYPE.MSG_TYPE_STEP_END:
213
+ ASCIIColors.green(f"<< Agent Step End [ID: {msg_id}]: {chunk}")
214
+ if params.get('result'):
215
+ ASCIIColors.cyan(f" Result: {json.dumps(params['result'], indent=2)}")
216
+ elif msg_type == MSG_TYPE.MSG_TYPE_THOUGHT_CONTENT:
217
+ ASCIIColors.magenta(f"\n🤔 Agent Thought: {chunk}")
218
+ elif msg_type == MSG_TYPE.MSG_TYPE_TOOL_CALL:
219
+ ASCIIColors.blue(f"\n🛠️ Agent Action: {chunk}")
220
+ elif msg_type == MSG_TYPE.MSG_TYPE_OBSERVATION:
221
+ ASCIIColors.cyan(f"\n👀 Agent Observation: {chunk}")
222
+ elif msg_type == MSG_TYPE.MSG_TYPE_CHUNK:
223
+ print(chunk, end="", flush=True) # Final answer stream
224
+ return True
225
+
226
+ try:
227
+ # 3. Initialize LollmsClient with an LLM and local tools enabled
228
+ lc = LollmsClient(
229
+ binding_name="ollama", # Use Ollama
230
+ model_name="llama3", # Or any capable model like mistral, gemma, etc.
231
+ mcp_binding_name="local_mcp" # Enable local tools like python_code_interpreter
232
+ )
233
+
234
+ # 4. Define the user prompt and the RAG data store
235
+ prompt = "Based on my notes about Project Phoenix, write and run a Python script to calculate its projected budget after two quarters."
236
+
237
+ rag_data_store = {
238
+ "project_notes": {"callable": retrieve_from_notes}
239
+ }
240
+
241
+ ASCIIColors.yellow(f"User Prompt: {prompt}")
242
+ print("\n" + "="*50 + "\nAgent is now running...\n" + "="*50)
243
+
244
+ # 5. Run the agent
245
+ agent_output = lc.generate_with_mcp_rag(
246
+ prompt=prompt,
247
+ use_data_store=rag_data_store,
248
+ use_mcps=["python_code_interpreter"], # Make specific tools available
249
+ streaming_callback=agent_streaming_callback,
250
+ max_reasoning_steps=5
251
+ )
252
+
253
+ print("\n" + "="*50 + "\nAgent finished.\n" + "="*50)
254
+
255
+ # 6. Print the final results
256
+ if agent_output.get("error"):
257
+ ASCIIColors.error(f"\nAgent Error: {agent_output['error']}")
258
+ else:
259
+ ASCIIColors.green("\n--- Final Answer ---")
260
+ print(agent_output.get("final_answer"))
261
+
262
+ ASCIIColors.magenta("\n--- Tool Calls ---")
263
+ print(json.dumps(agent_output.get("tool_calls", []), indent=2))
264
+
265
+ ASCIIColors.cyan("\n--- RAG Sources ---")
266
+ print(json.dumps(agent_output.get("sources", []), indent=2))
267
+
268
+ except Exception as e:
269
+ ASCIIColors.red(f"\nAn unexpected error occurred: {e}")
270
+
271
+ ```
272
+
172
273
  ## Documentation
173
274
 
174
275
  For more in-depth information, please refer to:
@@ -186,7 +287,7 @@ graph LR
186
287
  LC -- Manages --> LLB[LLM Binding];
187
288
  LC -- Manages --> MCPB[MCP Binding];
188
289
  LC -- Orchestrates --> MCP_Interaction[generate_with_mcp];
189
- LC -- Provides --> HighLevelOps[High-Level Ops<br>(summarize, deep_analyze etc.)];
290
+ LC -- Provides --> HighLevelOps["High-Level Ops(summarize, deep_analyze etc.)"];
190
291
  LC -- Provides Access To --> DM[DiscussionManager];
191
292
  LC -- Provides Access To --> ModalityBindings[TTS, TTI, STT etc.];
192
293
  end
@@ -195,16 +296,16 @@ graph LR
195
296
  LLB --> LollmsServer[LoLLMs Server];
196
297
  LLB --> OllamaServer[Ollama];
197
298
  LLB --> OpenAPIServer[OpenAI API];
198
- LLB --> LocalGGUF[Local GGUF<br>(pythonllamacpp / llamacpp server)];
199
- LLB --> LocalHF[Local HuggingFace<br>(transformers / vLLM)];
299
+ LLB --> LocalGGUF["Local GGUF<br>(pythonllamacpp / llamacpp server)"];
300
+ LLB --> LocalHF["Local HuggingFace<br>(transformers / vLLM)"];
200
301
  end
201
302
 
202
303
  MCP_Interaction --> MCPB;
203
- MCPB --> LocalTools[Local Python Tools<br>(via local_mcp)];
204
- MCPB --> RemoteTools[Remote MCP Tool Servers<br>(Future Potential)];
304
+ MCPB --> LocalTools["Local Python Tools<br>(via local_mcp)"];
305
+ MCPB --> RemoteTools["Remote MCP Tool Servers<br>(Future Potential)"];
205
306
 
206
307
 
207
- ModalityBindings --> ModalityServices[Modality Services<br>(e.g., LoLLMs Server TTS/TTI, local Bark/XTTS)];
308
+ ModalityBindings --> ModalityServices["Modality Services<br>(e.g., LoLLMs Server TTS/TTI, local Bark/XTTS)"];
208
309
  ```
209
310
 
210
311
  * **`LollmsClient`**: The central class for all interactions. It holds the currently active LLM binding, an optional MCP binding, and provides access to modality bindings and high-level operations.
@@ -26,10 +26,10 @@ examples/mcp_examples/openai_mcp.py,sha256=7IEnPGPXZgYZyiES_VaUbQ6viQjenpcUxGiHE
26
26
  examples/mcp_examples/run_remote_mcp_example_v2.py,sha256=bbNn93NO_lKcFzfIsdvJJijGx2ePFTYfknofqZxMuRM,14626
27
27
  examples/mcp_examples/run_standard_mcp_example.py,sha256=GSZpaACPf3mDPsjA8esBQVUsIi7owI39ca5avsmvCxA,9419
28
28
  examples/test_local_models/local_chat.py,sha256=slakja2zaHOEAUsn2tn_VmI4kLx6luLBrPqAeaNsix8,456
29
- lollms_client/__init__.py,sha256=Vt2zeJ4Ekn2UWxfSKbn_pjE-QGL7uwoTnbTFuFIOyUk,1047
29
+ lollms_client/__init__.py,sha256=pXsP6DSu8Afm4PZN5PmsBipV-ZOKCS81s7bngvYCcgU,1047
30
30
  lollms_client/lollms_config.py,sha256=goEseDwDxYJf3WkYJ4IrLXwg3Tfw73CXV2Avg45M_hE,21876
31
- lollms_client/lollms_core.py,sha256=m_qfzybasY61KgAPVa84tdkqJWIog9iuIZc88pQQ-vw,158842
32
- lollms_client/lollms_discussion.py,sha256=JqKx--a6YMzL6ec6N9OD0B9oRlmkSV_KDKXjqP8291Y,47636
31
+ lollms_client/lollms_core.py,sha256=TujAapwba9gDe6EEY4olVSP-lZrLftY4LOSex-D-IPs,159610
32
+ lollms_client/lollms_discussion.py,sha256=By_dN3GJ7AtInkOUdcrXuVhKliBirKd3ZxFkaRmt1yM,48843
33
33
  lollms_client/lollms_js_analyzer.py,sha256=01zUvuO2F_lnUe_0NLxe1MF5aHE1hO8RZi48mNPv-aw,8361
34
34
  lollms_client/lollms_llm_binding.py,sha256=Kpzhs5Jx8eAlaaUacYnKV7qIq2wbME5lOEtKSfJKbpg,12161
35
35
  lollms_client/lollms_mcp_binding.py,sha256=0rK9HQCBEGryNc8ApBmtOlhKE1Yfn7X7xIQssXxS2Zc,8933
@@ -43,6 +43,8 @@ lollms_client/lollms_ttv_binding.py,sha256=KkTaHLBhEEdt4sSVBlbwr5i_g_TlhcrwrT-7D
43
43
  lollms_client/lollms_types.py,sha256=0iSH1QHRRD-ddBqoL9EEKJ8wWCuwDUlN_FrfbCdg7Lw,3522
44
44
  lollms_client/lollms_utilities.py,sha256=zx1X4lAXQ2eCUM4jDpu_1QV5oMGdFkpaSEdTASmaiqE,13545
45
45
  lollms_client/llm_bindings/__init__.py,sha256=9sWGpmWSSj6KQ8H4lKGCjpLYwhnVdL_2N7gXCphPqh4,14
46
+ lollms_client/llm_bindings/gemini/__init__.py,sha256=ZflZVwAkAa-GfctuehOWIav977oTCdXUisQy253PFsk,21611
47
+ lollms_client/llm_bindings/litellm/__init__.py,sha256=xlTaKosxK1tKz1YJ6witK6wAJHIENTV6O7ZbfpUOdB4,11289
46
48
  lollms_client/llm_bindings/llamacpp/__init__.py,sha256=Qj5RvsgPeHGNfb5AEwZSzFwAp4BOWjyxmm9qBNtstrc,63716
47
49
  lollms_client/llm_bindings/lollms/__init__.py,sha256=jfiCGJqMensJ7RymeGDDJOsdokEdlORpw9ND_Q30GYc,17831
48
50
  lollms_client/llm_bindings/ollama/__init__.py,sha256=QufsYqak2VlA2XGbzks8u55yNJFeDH2V35NGeZABkm8,32554
@@ -79,8 +81,8 @@ lollms_client/tts_bindings/piper_tts/__init__.py,sha256=0IEWG4zH3_sOkSb9WbZzkeV5
79
81
  lollms_client/tts_bindings/xtts/__init__.py,sha256=FgcdUH06X6ZR806WQe5ixaYx0QoxtAcOgYo87a2qxYc,18266
80
82
  lollms_client/ttv_bindings/__init__.py,sha256=UZ8o2izQOJLQgtZ1D1cXoNST7rzqW22rL2Vufc7ddRc,3141
81
83
  lollms_client/ttv_bindings/lollms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
82
- lollms_client-0.25.1.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
83
- lollms_client-0.25.1.dist-info/METADATA,sha256=4yR9ohOc_JjNnJeDRTdbzfYbKkmMpl0wbw0Y9D2P0gc,13401
84
- lollms_client-0.25.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
85
- lollms_client-0.25.1.dist-info/top_level.txt,sha256=NI_W8S4OYZvJjb0QWMZMSIpOrYzpqwPGYaklhyWKH2w,23
86
- lollms_client-0.25.1.dist-info/RECORD,,
84
+ lollms_client-0.25.6.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
85
+ lollms_client-0.25.6.dist-info/METADATA,sha256=dqV9ITu1ABd8rtnvPb4N7K3qUTCD6stQJhys08xoUJs,18659
86
+ lollms_client-0.25.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
87
+ lollms_client-0.25.6.dist-info/top_level.txt,sha256=NI_W8S4OYZvJjb0QWMZMSIpOrYzpqwPGYaklhyWKH2w,23
88
+ lollms_client-0.25.6.dist-info/RECORD,,