lollms-client 0.29.1__tar.gz → 0.29.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- {lollms_client-0.29.1/lollms_client.egg-info → lollms_client-0.29.3}/PKG-INFO +86 -34
- {lollms_client-0.29.1 → lollms_client-0.29.3}/README.md +85 -33
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/__init__.py +1 -1
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/llm_bindings/llamacpp/__init__.py +5 -2
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/lollms_core.py +35 -10
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/lollms_discussion.py +86 -65
- {lollms_client-0.29.1 → lollms_client-0.29.3/lollms_client.egg-info}/PKG-INFO +86 -34
- {lollms_client-0.29.1 → lollms_client-0.29.3}/LICENSE +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/article_summary/article_summary.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/console_discussion/console_app.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/console_discussion.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/deep_analyze/deep_analyse.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/deep_analyze/deep_analyze_multiple_files.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/function_calling_with_local_custom_mcp.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/generate_a_benchmark_for_safe_store.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/generate_and_speak/generate_and_speak.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/generate_game_sfx/generate_game_fx.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/generate_text_with_multihop_rag_example.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/gradio_chat_app.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/gradio_lollms_chat.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/internet_search_with_rag.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/lollms_chat/calculator.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/lollms_chat/derivative.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/lollms_chat/test_openai_compatible_with_lollms_chat.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/lollms_discussions_test.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/mcp_examples/external_mcp.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/mcp_examples/local_mcp.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/mcp_examples/openai_mcp.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/mcp_examples/run_remote_mcp_example_v2.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/mcp_examples/run_standard_mcp_example.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/simple_text_gen_test.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/simple_text_gen_with_image_test.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/test_local_models/local_chat.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/text_2_audio.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/text_2_image.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/text_2_image_diffusers.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/text_and_image_2_audio.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/text_gen.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/examples/text_gen_system_prompt.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/llm_bindings/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/llm_bindings/azure_openai/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/llm_bindings/claude/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/llm_bindings/gemini/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/llm_bindings/grok/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/llm_bindings/groq/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/llm_bindings/litellm/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/llm_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/llm_bindings/lollms_webui/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/llm_bindings/mistral/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/llm_bindings/ollama/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/llm_bindings/open_router/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/llm_bindings/openai/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/llm_bindings/openllm/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/llm_bindings/pythonllamacpp/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/llm_bindings/tensor_rt/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/llm_bindings/transformers/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/llm_bindings/vllm/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/lollms_config.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/lollms_js_analyzer.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/lollms_llm_binding.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/lollms_mcp_binding.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/lollms_personality.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/lollms_python_analyzer.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/lollms_stt_binding.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/lollms_tti_binding.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/lollms_ttm_binding.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/lollms_tts_binding.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/lollms_ttv_binding.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/lollms_types.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/lollms_utilities.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/mcp_bindings/local_mcp/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/mcp_bindings/local_mcp/default_tools/file_writer/file_writer.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/mcp_bindings/local_mcp/default_tools/generate_image_from_prompt/generate_image_from_prompt.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/mcp_bindings/local_mcp/default_tools/internet_search/internet_search.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/mcp_bindings/local_mcp/default_tools/python_interpreter/python_interpreter.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/mcp_bindings/remote_mcp/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/mcp_bindings/standard_mcp/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/stt_bindings/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/stt_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/stt_bindings/whisper/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/stt_bindings/whispercpp/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/tti_bindings/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/tti_bindings/dalle/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/tti_bindings/diffusers/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/tti_bindings/gemini/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/tti_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/ttm_bindings/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/ttm_bindings/audiocraft/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/ttm_bindings/bark/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/ttm_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/tts_bindings/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/tts_bindings/bark/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/tts_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/tts_bindings/piper_tts/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/tts_bindings/xtts/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/ttv_bindings/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/ttv_bindings/lollms/__init__.py +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client.egg-info/SOURCES.txt +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client.egg-info/dependency_links.txt +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client.egg-info/requires.txt +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client.egg-info/top_level.txt +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/pyproject.toml +0 -0
- {lollms_client-0.29.1 → lollms_client-0.29.3}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lollms_client
|
|
3
|
-
Version: 0.29.
|
|
3
|
+
Version: 0.29.3
|
|
4
4
|
Summary: A client library for LoLLMs generate endpoint
|
|
5
5
|
Author-email: ParisNeo <parisneoai@gmail.com>
|
|
6
6
|
License: Apache Software License
|
|
@@ -296,9 +296,22 @@ This example showcases how `lollms-client` allows you to build powerful, knowled
|
|
|
296
296
|
|
|
297
297
|
### Building Stateful Agents with Memory and Data Zones
|
|
298
298
|
|
|
299
|
-
The
|
|
299
|
+
The `LollmsDiscussion` class provides a sophisticated system for creating stateful agents that can remember information across conversations. This is achieved through a layered system of "context zones" that are automatically combined into the AI's system prompt.
|
|
300
300
|
|
|
301
|
-
|
|
301
|
+
#### Understanding the Context Zones
|
|
302
|
+
|
|
303
|
+
The AI's context is more than just chat history. It's built from several distinct components, each with a specific purpose:
|
|
304
|
+
|
|
305
|
+
* **`system_prompt`**: The foundational layer defining the AI's core identity, persona, and primary instructions.
|
|
306
|
+
* **`memory`**: The AI's long-term, persistent memory. It stores key facts about the user or topics, built up over time using the `memorize()` method.
|
|
307
|
+
* **`user_data_zone`**: Holds session-specific information about the user's current state or goals (e.g., "User is currently working on 'file.py'").
|
|
308
|
+
* **`discussion_data_zone`**: Contains state or meta-information about the current conversational task (e.g., "Step 1 of the plan is complete").
|
|
309
|
+
* **`personality_data_zone`**: A knowledge base or set of rules automatically injected from a `LollmsPersonality`'s `data_source`.
|
|
310
|
+
* **`pruning_summary`**: An automatic, AI-generated summary of the oldest messages in a very long chat, used to conserve tokens without losing the gist of the early conversation.
|
|
311
|
+
|
|
312
|
+
The `get_context_status()` method is your window into this system, showing you exactly how these zones are combined and how many tokens they consume.
|
|
313
|
+
|
|
314
|
+
Let's see this in action with a "Personal Assistant" agent that learns about the user over time.
|
|
302
315
|
|
|
303
316
|
```python
|
|
304
317
|
from lollms_client import LollmsClient, LollmsDataManager, LollmsDiscussion, MSG_TYPE
|
|
@@ -320,7 +333,8 @@ if not discussion:
|
|
|
320
333
|
id=discussion_id,
|
|
321
334
|
autosave=True # Important for persistence
|
|
322
335
|
)
|
|
323
|
-
# Let's preset some
|
|
336
|
+
# Let's preset some data in different zones
|
|
337
|
+
discussion.system_prompt = "You are a helpful Personal Assistant."
|
|
324
338
|
discussion.user_data_zone = "User's Name: Alex\nUser's Goal: Learn about AI development."
|
|
325
339
|
discussion.commit()
|
|
326
340
|
else:
|
|
@@ -331,13 +345,24 @@ def run_chat_turn(prompt: str):
|
|
|
331
345
|
"""Helper function to run a single chat turn and print details."""
|
|
332
346
|
ASCIIColors.cyan(f"\n> User: {prompt}")
|
|
333
347
|
|
|
334
|
-
# --- A. Check context status BEFORE the turn ---
|
|
348
|
+
# --- A. Check context status BEFORE the turn using get_context_status() ---
|
|
335
349
|
ASCIIColors.magenta("\n--- Context Status (Before Generation) ---")
|
|
336
350
|
status = discussion.get_context_status()
|
|
337
|
-
print(f"Max Tokens: {status.get('max_tokens')}, Current
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
351
|
+
print(f"Max Tokens: {status.get('max_tokens')}, Current Tokens: {status.get('current_tokens')}")
|
|
352
|
+
|
|
353
|
+
# Print the system context details
|
|
354
|
+
if 'system_context' in status['zones']:
|
|
355
|
+
sys_ctx = status['zones']['system_context']
|
|
356
|
+
print(f" - System Context Tokens: {sys_ctx['tokens']}")
|
|
357
|
+
# The 'breakdown' shows the individual zones that were combined
|
|
358
|
+
for name, content in sys_ctx.get('breakdown', {}).items():
|
|
359
|
+
print(f" -> Contains '{name}': {content.split(chr(10))[0]}...")
|
|
360
|
+
|
|
361
|
+
# Print the message history details
|
|
362
|
+
if 'message_history' in status['zones']:
|
|
363
|
+
msg_hist = status['zones']['message_history']
|
|
364
|
+
print(f" - Message History Tokens: {msg_hist['tokens']} ({msg_hist['message_count']} messages)")
|
|
365
|
+
|
|
341
366
|
print("------------------------------------------")
|
|
342
367
|
|
|
343
368
|
# --- B. Run the chat ---
|
|
@@ -348,7 +373,7 @@ def run_chat_turn(prompt: str):
|
|
|
348
373
|
)
|
|
349
374
|
print() # Newline after stream
|
|
350
375
|
|
|
351
|
-
# --- C. Trigger memorization ---
|
|
376
|
+
# --- C. Trigger memorization to update the 'memory' zone ---
|
|
352
377
|
ASCIIColors.yellow("\nTriggering memorization process...")
|
|
353
378
|
discussion.memorize()
|
|
354
379
|
discussion.commit() # Save the new memory to the DB
|
|
@@ -359,24 +384,30 @@ run_chat_turn("Hi there! Can you recommend a good Python library for building we
|
|
|
359
384
|
run_chat_turn("That sounds great. By the way, my favorite programming language is Rust, I find its safety features amazing.")
|
|
360
385
|
run_chat_turn("What was my favorite programming language again?")
|
|
361
386
|
|
|
362
|
-
# --- Final Inspection ---
|
|
387
|
+
# --- Final Inspection of Memory ---
|
|
363
388
|
ASCIIColors.magenta("\n--- Final Context Status ---")
|
|
364
389
|
status = discussion.get_context_status()
|
|
365
|
-
print(f"Max Tokens: {status.get('max_tokens')}, Current
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
print(f"
|
|
390
|
+
print(f"Max Tokens: {status.get('max_tokens')}, Current Tokens: {status.get('current_tokens')}")
|
|
391
|
+
if 'system_context' in status['zones']:
|
|
392
|
+
sys_ctx = status['zones']['system_context']
|
|
393
|
+
print(f" - System Context Tokens: {sys_ctx['tokens']}")
|
|
394
|
+
for name, content in sys_ctx.get('breakdown', {}).items():
|
|
395
|
+
# Print the full content of the memory zone to verify it was updated
|
|
396
|
+
if name == 'memory':
|
|
397
|
+
ASCIIColors.yellow(f" -> Full '{name}' content:\n{content}")
|
|
398
|
+
else:
|
|
399
|
+
print(f" -> Contains '{name}': {content.split(chr(10))[0]}...")
|
|
369
400
|
print("------------------------------------------")
|
|
370
401
|
|
|
371
402
|
```
|
|
372
403
|
|
|
373
404
|
#### How it Works:
|
|
374
405
|
|
|
375
|
-
1. **Persistence:** The `LollmsDataManager` and
|
|
376
|
-
2. **`
|
|
377
|
-
3. **`
|
|
378
|
-
4.
|
|
379
|
-
|
|
406
|
+
1. **Persistence & Initialization:** The `LollmsDataManager` saves and loads the discussion. We initialize the `system_prompt` and `user_data_zone` to provide initial context.
|
|
407
|
+
2. **`get_context_status()`:** Before each generation, we call this method. The output shows a `system_context` block with a token count for all combined zones and a `breakdown` field that lets us see the content of each individual zone that contributed to it.
|
|
408
|
+
3. **`memorize()`:** After the user mentions their favorite language, `memorize()` is called. The LLM analyzes the last turn, identifies this new, important fact, and appends it to the `discussion.memory` zone.
|
|
409
|
+
4. **Recall:** In the final turn, when asked to recall the favorite language, the AI has access to the updated `memory` content within its system context and can correctly answer "Rust". This demonstrates true long-term, stateful memory.
|
|
410
|
+
|
|
380
411
|
|
|
381
412
|
## Documentation
|
|
382
413
|
|
|
@@ -922,33 +953,54 @@ discussion.commit() # Save the updated memory to the database
|
|
|
922
953
|
```
|
|
923
954
|
|
|
924
955
|
#### `get_context_status()`
|
|
925
|
-
Provides a detailed, real-time breakdown of the current prompt context, showing exactly what will be sent to the model and how many tokens each part occupies.
|
|
926
956
|
|
|
927
|
-
|
|
928
|
-
|
|
957
|
+
Provides a detailed, real-time breakdown of the current prompt context, showing exactly what will be sent to the model and how many tokens each major component occupies. This is crucial for debugging context issues and understanding token usage.
|
|
958
|
+
|
|
959
|
+
The method accurately reflects the structure of the `lollms_text` format, where all system-level instructions (the main prompt, all data zones, and the pruning summary) are combined into a single system block.
|
|
960
|
+
|
|
961
|
+
- **Return Value:** A dictionary containing:
|
|
962
|
+
- `max_tokens`: The configured maximum token limit for the discussion.
|
|
963
|
+
- `current_tokens`: The total, most accurate token count for the entire prompt, calculated using the same logic as the `chat()` method.
|
|
964
|
+
- `zones`: A dictionary with up to two keys:
|
|
965
|
+
- **`system_context`**: Present if there is any system-level content. It contains:
|
|
966
|
+
- `tokens`: The total token count for the **entire combined system block** (e.g., `!@>system:\n...\n`).
|
|
967
|
+
- `content`: The full string content of the system block, showing exactly how all zones are merged.
|
|
968
|
+
- `breakdown`: A sub-dictionary showing the raw text of each individual component (e.g., `system_prompt`, `memory`, `user_data_zone`) that was used to build the `content`.
|
|
969
|
+
- **`message_history`**: Present if there are messages in the branch. It contains:
|
|
970
|
+
- `tokens`: The total token count for the message history part of the prompt.
|
|
971
|
+
- `content`: The full string of the formatted message history.
|
|
972
|
+
- `message_count`: The number of messages included in the history.
|
|
973
|
+
|
|
974
|
+
- **Use Case:** Essential for debugging context issues, visualizing how different data zones contribute to the final prompt, and monitoring token consumption.
|
|
929
975
|
|
|
930
976
|
```python
|
|
931
977
|
import json
|
|
932
978
|
|
|
979
|
+
# Assuming 'discussion' is an LollmsDiscussion object with some data
|
|
980
|
+
discussion.system_prompt = "You are a helpful AI."
|
|
981
|
+
discussion.user_data_zone = "User is named Bob."
|
|
982
|
+
discussion.add_message(sender="user", content="Hello!")
|
|
983
|
+
discussion.add_message(sender="assistant", content="Hi Bob!")
|
|
984
|
+
|
|
933
985
|
status = discussion.get_context_status()
|
|
934
986
|
print(json.dumps(status, indent=2))
|
|
935
987
|
|
|
936
988
|
# Expected Output Structure:
|
|
937
989
|
# {
|
|
938
|
-
# "max_tokens":
|
|
939
|
-
# "current_tokens":
|
|
990
|
+
# "max_tokens": null,
|
|
991
|
+
# "current_tokens": 46,
|
|
940
992
|
# "zones": {
|
|
941
|
-
# "
|
|
942
|
-
# "content": "You are a helpful
|
|
943
|
-
# "tokens":
|
|
944
|
-
#
|
|
945
|
-
#
|
|
946
|
-
#
|
|
947
|
-
#
|
|
993
|
+
# "system_context": {
|
|
994
|
+
# "content": "You are a helpful AI.\n\n-- User Data Zone --\nUser is named Bob.",
|
|
995
|
+
# "tokens": 25,
|
|
996
|
+
# "breakdown": {
|
|
997
|
+
# "system_prompt": "You are a helpful AI.",
|
|
998
|
+
# "user_data_zone": "User is named Bob."
|
|
999
|
+
# }
|
|
948
1000
|
# },
|
|
949
1001
|
# "message_history": {
|
|
950
|
-
# "content": "!@>user:\
|
|
951
|
-
# "tokens":
|
|
1002
|
+
# "content": "!@>user:\nHello!\n!@>assistant:\nHi Bob!\n",
|
|
1003
|
+
# "tokens": 21,
|
|
952
1004
|
# "message_count": 2
|
|
953
1005
|
# }
|
|
954
1006
|
# }
|
|
@@ -265,9 +265,22 @@ This example showcases how `lollms-client` allows you to build powerful, knowled
|
|
|
265
265
|
|
|
266
266
|
### Building Stateful Agents with Memory and Data Zones
|
|
267
267
|
|
|
268
|
-
The
|
|
268
|
+
The `LollmsDiscussion` class provides a sophisticated system for creating stateful agents that can remember information across conversations. This is achieved through a layered system of "context zones" that are automatically combined into the AI's system prompt.
|
|
269
269
|
|
|
270
|
-
|
|
270
|
+
#### Understanding the Context Zones
|
|
271
|
+
|
|
272
|
+
The AI's context is more than just chat history. It's built from several distinct components, each with a specific purpose:
|
|
273
|
+
|
|
274
|
+
* **`system_prompt`**: The foundational layer defining the AI's core identity, persona, and primary instructions.
|
|
275
|
+
* **`memory`**: The AI's long-term, persistent memory. It stores key facts about the user or topics, built up over time using the `memorize()` method.
|
|
276
|
+
* **`user_data_zone`**: Holds session-specific information about the user's current state or goals (e.g., "User is currently working on 'file.py'").
|
|
277
|
+
* **`discussion_data_zone`**: Contains state or meta-information about the current conversational task (e.g., "Step 1 of the plan is complete").
|
|
278
|
+
* **`personality_data_zone`**: A knowledge base or set of rules automatically injected from a `LollmsPersonality`'s `data_source`.
|
|
279
|
+
* **`pruning_summary`**: An automatic, AI-generated summary of the oldest messages in a very long chat, used to conserve tokens without losing the gist of the early conversation.
|
|
280
|
+
|
|
281
|
+
The `get_context_status()` method is your window into this system, showing you exactly how these zones are combined and how many tokens they consume.
|
|
282
|
+
|
|
283
|
+
Let's see this in action with a "Personal Assistant" agent that learns about the user over time.
|
|
271
284
|
|
|
272
285
|
```python
|
|
273
286
|
from lollms_client import LollmsClient, LollmsDataManager, LollmsDiscussion, MSG_TYPE
|
|
@@ -289,7 +302,8 @@ if not discussion:
|
|
|
289
302
|
id=discussion_id,
|
|
290
303
|
autosave=True # Important for persistence
|
|
291
304
|
)
|
|
292
|
-
# Let's preset some
|
|
305
|
+
# Let's preset some data in different zones
|
|
306
|
+
discussion.system_prompt = "You are a helpful Personal Assistant."
|
|
293
307
|
discussion.user_data_zone = "User's Name: Alex\nUser's Goal: Learn about AI development."
|
|
294
308
|
discussion.commit()
|
|
295
309
|
else:
|
|
@@ -300,13 +314,24 @@ def run_chat_turn(prompt: str):
|
|
|
300
314
|
"""Helper function to run a single chat turn and print details."""
|
|
301
315
|
ASCIIColors.cyan(f"\n> User: {prompt}")
|
|
302
316
|
|
|
303
|
-
# --- A. Check context status BEFORE the turn ---
|
|
317
|
+
# --- A. Check context status BEFORE the turn using get_context_status() ---
|
|
304
318
|
ASCIIColors.magenta("\n--- Context Status (Before Generation) ---")
|
|
305
319
|
status = discussion.get_context_status()
|
|
306
|
-
print(f"Max Tokens: {status.get('max_tokens')}, Current
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
320
|
+
print(f"Max Tokens: {status.get('max_tokens')}, Current Tokens: {status.get('current_tokens')}")
|
|
321
|
+
|
|
322
|
+
# Print the system context details
|
|
323
|
+
if 'system_context' in status['zones']:
|
|
324
|
+
sys_ctx = status['zones']['system_context']
|
|
325
|
+
print(f" - System Context Tokens: {sys_ctx['tokens']}")
|
|
326
|
+
# The 'breakdown' shows the individual zones that were combined
|
|
327
|
+
for name, content in sys_ctx.get('breakdown', {}).items():
|
|
328
|
+
print(f" -> Contains '{name}': {content.split(chr(10))[0]}...")
|
|
329
|
+
|
|
330
|
+
# Print the message history details
|
|
331
|
+
if 'message_history' in status['zones']:
|
|
332
|
+
msg_hist = status['zones']['message_history']
|
|
333
|
+
print(f" - Message History Tokens: {msg_hist['tokens']} ({msg_hist['message_count']} messages)")
|
|
334
|
+
|
|
310
335
|
print("------------------------------------------")
|
|
311
336
|
|
|
312
337
|
# --- B. Run the chat ---
|
|
@@ -317,7 +342,7 @@ def run_chat_turn(prompt: str):
|
|
|
317
342
|
)
|
|
318
343
|
print() # Newline after stream
|
|
319
344
|
|
|
320
|
-
# --- C. Trigger memorization ---
|
|
345
|
+
# --- C. Trigger memorization to update the 'memory' zone ---
|
|
321
346
|
ASCIIColors.yellow("\nTriggering memorization process...")
|
|
322
347
|
discussion.memorize()
|
|
323
348
|
discussion.commit() # Save the new memory to the DB
|
|
@@ -328,24 +353,30 @@ run_chat_turn("Hi there! Can you recommend a good Python library for building we
|
|
|
328
353
|
run_chat_turn("That sounds great. By the way, my favorite programming language is Rust, I find its safety features amazing.")
|
|
329
354
|
run_chat_turn("What was my favorite programming language again?")
|
|
330
355
|
|
|
331
|
-
# --- Final Inspection ---
|
|
356
|
+
# --- Final Inspection of Memory ---
|
|
332
357
|
ASCIIColors.magenta("\n--- Final Context Status ---")
|
|
333
358
|
status = discussion.get_context_status()
|
|
334
|
-
print(f"Max Tokens: {status.get('max_tokens')}, Current
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
print(f"
|
|
359
|
+
print(f"Max Tokens: {status.get('max_tokens')}, Current Tokens: {status.get('current_tokens')}")
|
|
360
|
+
if 'system_context' in status['zones']:
|
|
361
|
+
sys_ctx = status['zones']['system_context']
|
|
362
|
+
print(f" - System Context Tokens: {sys_ctx['tokens']}")
|
|
363
|
+
for name, content in sys_ctx.get('breakdown', {}).items():
|
|
364
|
+
# Print the full content of the memory zone to verify it was updated
|
|
365
|
+
if name == 'memory':
|
|
366
|
+
ASCIIColors.yellow(f" -> Full '{name}' content:\n{content}")
|
|
367
|
+
else:
|
|
368
|
+
print(f" -> Contains '{name}': {content.split(chr(10))[0]}...")
|
|
338
369
|
print("------------------------------------------")
|
|
339
370
|
|
|
340
371
|
```
|
|
341
372
|
|
|
342
373
|
#### How it Works:
|
|
343
374
|
|
|
344
|
-
1. **Persistence:** The `LollmsDataManager` and
|
|
345
|
-
2. **`
|
|
346
|
-
3. **`
|
|
347
|
-
4.
|
|
348
|
-
|
|
375
|
+
1. **Persistence & Initialization:** The `LollmsDataManager` saves and loads the discussion. We initialize the `system_prompt` and `user_data_zone` to provide initial context.
|
|
376
|
+
2. **`get_context_status()`:** Before each generation, we call this method. The output shows a `system_context` block with a token count for all combined zones and a `breakdown` field that lets us see the content of each individual zone that contributed to it.
|
|
377
|
+
3. **`memorize()`:** After the user mentions their favorite language, `memorize()` is called. The LLM analyzes the last turn, identifies this new, important fact, and appends it to the `discussion.memory` zone.
|
|
378
|
+
4. **Recall:** In the final turn, when asked to recall the favorite language, the AI has access to the updated `memory` content within its system context and can correctly answer "Rust". This demonstrates true long-term, stateful memory.
|
|
379
|
+
|
|
349
380
|
|
|
350
381
|
## Documentation
|
|
351
382
|
|
|
@@ -891,33 +922,54 @@ discussion.commit() # Save the updated memory to the database
|
|
|
891
922
|
```
|
|
892
923
|
|
|
893
924
|
#### `get_context_status()`
|
|
894
|
-
Provides a detailed, real-time breakdown of the current prompt context, showing exactly what will be sent to the model and how many tokens each part occupies.
|
|
895
925
|
|
|
896
|
-
|
|
897
|
-
|
|
926
|
+
Provides a detailed, real-time breakdown of the current prompt context, showing exactly what will be sent to the model and how many tokens each major component occupies. This is crucial for debugging context issues and understanding token usage.
|
|
927
|
+
|
|
928
|
+
The method accurately reflects the structure of the `lollms_text` format, where all system-level instructions (the main prompt, all data zones, and the pruning summary) are combined into a single system block.
|
|
929
|
+
|
|
930
|
+
- **Return Value:** A dictionary containing:
|
|
931
|
+
- `max_tokens`: The configured maximum token limit for the discussion.
|
|
932
|
+
- `current_tokens`: The total, most accurate token count for the entire prompt, calculated using the same logic as the `chat()` method.
|
|
933
|
+
- `zones`: A dictionary with up to two keys:
|
|
934
|
+
- **`system_context`**: Present if there is any system-level content. It contains:
|
|
935
|
+
- `tokens`: The total token count for the **entire combined system block** (e.g., `!@>system:\n...\n`).
|
|
936
|
+
- `content`: The full string content of the system block, showing exactly how all zones are merged.
|
|
937
|
+
- `breakdown`: A sub-dictionary showing the raw text of each individual component (e.g., `system_prompt`, `memory`, `user_data_zone`) that was used to build the `content`.
|
|
938
|
+
- **`message_history`**: Present if there are messages in the branch. It contains:
|
|
939
|
+
- `tokens`: The total token count for the message history part of the prompt.
|
|
940
|
+
- `content`: The full string of the formatted message history.
|
|
941
|
+
- `message_count`: The number of messages included in the history.
|
|
942
|
+
|
|
943
|
+
- **Use Case:** Essential for debugging context issues, visualizing how different data zones contribute to the final prompt, and monitoring token consumption.
|
|
898
944
|
|
|
899
945
|
```python
|
|
900
946
|
import json
|
|
901
947
|
|
|
948
|
+
# Assuming 'discussion' is an LollmsDiscussion object with some data
|
|
949
|
+
discussion.system_prompt = "You are a helpful AI."
|
|
950
|
+
discussion.user_data_zone = "User is named Bob."
|
|
951
|
+
discussion.add_message(sender="user", content="Hello!")
|
|
952
|
+
discussion.add_message(sender="assistant", content="Hi Bob!")
|
|
953
|
+
|
|
902
954
|
status = discussion.get_context_status()
|
|
903
955
|
print(json.dumps(status, indent=2))
|
|
904
956
|
|
|
905
957
|
# Expected Output Structure:
|
|
906
958
|
# {
|
|
907
|
-
# "max_tokens":
|
|
908
|
-
# "current_tokens":
|
|
959
|
+
# "max_tokens": null,
|
|
960
|
+
# "current_tokens": 46,
|
|
909
961
|
# "zones": {
|
|
910
|
-
# "
|
|
911
|
-
# "content": "You are a helpful
|
|
912
|
-
# "tokens":
|
|
913
|
-
#
|
|
914
|
-
#
|
|
915
|
-
#
|
|
916
|
-
#
|
|
962
|
+
# "system_context": {
|
|
963
|
+
# "content": "You are a helpful AI.\n\n-- User Data Zone --\nUser is named Bob.",
|
|
964
|
+
# "tokens": 25,
|
|
965
|
+
# "breakdown": {
|
|
966
|
+
# "system_prompt": "You are a helpful AI.",
|
|
967
|
+
# "user_data_zone": "User is named Bob."
|
|
968
|
+
# }
|
|
917
969
|
# },
|
|
918
970
|
# "message_history": {
|
|
919
|
-
# "content": "!@>user:\
|
|
920
|
-
# "tokens":
|
|
971
|
+
# "content": "!@>user:\nHello!\n!@>assistant:\nHi Bob!\n",
|
|
972
|
+
# "tokens": 21,
|
|
921
973
|
# "message_count": 2
|
|
922
974
|
# }
|
|
923
975
|
# }
|
|
@@ -8,7 +8,7 @@ from lollms_client.lollms_utilities import PromptReshaper # Keep general utiliti
|
|
|
8
8
|
from lollms_client.lollms_mcp_binding import LollmsMCPBinding, LollmsMCPBindingManager
|
|
9
9
|
from lollms_client.lollms_llm_binding import LollmsLLMBindingManager
|
|
10
10
|
|
|
11
|
-
__version__ = "0.29.
|
|
11
|
+
__version__ = "0.29.3" # Updated version
|
|
12
12
|
|
|
13
13
|
# Optionally, you could define __all__ if you want to be explicit about exports
|
|
14
14
|
__all__ = [
|
{lollms_client-0.29.1 → lollms_client-0.29.3}/lollms_client/llm_bindings/llamacpp/__init__.py
RENAMED
|
@@ -352,8 +352,11 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
352
352
|
|
|
353
353
|
|
|
354
354
|
def load_model(self, model_name_or_path: str) -> bool:
|
|
355
|
-
|
|
356
|
-
|
|
355
|
+
try:
|
|
356
|
+
resolved_model_path = self._resolve_model_path(model_name_or_path)
|
|
357
|
+
except Exception as ex:
|
|
358
|
+
trace_exception(ex)
|
|
359
|
+
return False
|
|
357
360
|
# Determine the clip_model_path for this server instance
|
|
358
361
|
# Priority: 1. Explicit `clip_model_path` from init (if exists) 2. Auto-detection
|
|
359
362
|
final_clip_model_path: Optional[Path] = None
|
|
@@ -147,9 +147,6 @@ class LollmsClient():
|
|
|
147
147
|
available = self.binding_manager.get_available_bindings()
|
|
148
148
|
raise ValueError(f"Failed to create LLM binding: {binding_name}. Available: {available}")
|
|
149
149
|
|
|
150
|
-
# Determine the effective host address (use LLM binding's if initial was None)
|
|
151
|
-
effective_host_address = self.host_address
|
|
152
|
-
|
|
153
150
|
# --- Modality Binding Setup ---
|
|
154
151
|
self.tts_binding_manager = LollmsTTSBindingManager(tts_bindings_dir)
|
|
155
152
|
self.tti_binding_manager = LollmsTTIBindingManager(tti_bindings_dir)
|
|
@@ -2961,7 +2958,6 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
2961
2958
|
callback("Deep analysis complete.", MSG_TYPE.MSG_TYPE_STEP_END)
|
|
2962
2959
|
return final_output
|
|
2963
2960
|
|
|
2964
|
-
|
|
2965
2961
|
def summarize(
|
|
2966
2962
|
self,
|
|
2967
2963
|
text_to_summarize: str,
|
|
@@ -2990,6 +2986,7 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
2990
2986
|
is not lost at the boundaries. Defaults to 250.
|
|
2991
2987
|
streaming_callback (Optional[Callable], optional): A callback function to receive real-time updates
|
|
2992
2988
|
on the process (e.g., which chunk is being processed).
|
|
2989
|
+
It receives a message, a message type, and optional metadata.
|
|
2993
2990
|
Defaults to None.
|
|
2994
2991
|
**kwargs: Additional keyword arguments to be passed to the generation method (e.g., temperature, top_p).
|
|
2995
2992
|
|
|
@@ -3004,12 +3001,17 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
3004
3001
|
|
|
3005
3002
|
if len(tokens) <= chunk_size_tokens:
|
|
3006
3003
|
if streaming_callback:
|
|
3007
|
-
streaming_callback("Text is short enough for a single summary.", MSG_TYPE.MSG_TYPE_STEP)
|
|
3004
|
+
streaming_callback("Text is short enough for a single summary.", MSG_TYPE.MSG_TYPE_STEP, {"progress": 0})
|
|
3008
3005
|
|
|
3009
3006
|
prompt_objective = contextual_prompt or "Provide a comprehensive summary of the following text."
|
|
3010
3007
|
final_prompt = f"{prompt_objective}\n\n--- Text to Summarize ---\n{text_to_summarize}"
|
|
3011
3008
|
|
|
3012
|
-
|
|
3009
|
+
summary = self.generate_text(final_prompt, **kwargs)
|
|
3010
|
+
|
|
3011
|
+
if streaming_callback:
|
|
3012
|
+
streaming_callback("Summary generated.", MSG_TYPE.MSG_TYPE_STEP, {"progress": 100})
|
|
3013
|
+
|
|
3014
|
+
return summary
|
|
3013
3015
|
|
|
3014
3016
|
# --- Stage 1: Chunking and Independent Summarization ---
|
|
3015
3017
|
chunks = []
|
|
@@ -3021,13 +3023,21 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
3021
3023
|
|
|
3022
3024
|
chunk_summaries = []
|
|
3023
3025
|
|
|
3026
|
+
# Total steps include each chunk plus the final synthesis step
|
|
3027
|
+
total_steps = len(chunks) + 1
|
|
3028
|
+
|
|
3024
3029
|
# Define the prompt for summarizing each chunk
|
|
3025
3030
|
summarization_objective = contextual_prompt or "Summarize the key points of the following text excerpt."
|
|
3026
3031
|
chunk_summary_prompt_template = f"{summarization_objective}\n\n--- Text Excerpt ---\n{{chunk_text}}"
|
|
3027
3032
|
|
|
3028
3033
|
for i, chunk in enumerate(chunks):
|
|
3034
|
+
progress_before = (i / total_steps) * 100
|
|
3029
3035
|
if streaming_callback:
|
|
3030
|
-
streaming_callback(
|
|
3036
|
+
streaming_callback(
|
|
3037
|
+
f"Summarizing chunk {i + 1} of {len(chunks)}...",
|
|
3038
|
+
MSG_TYPE.MSG_TYPE_STEP_START,
|
|
3039
|
+
{"id": f"chunk_{i+1}", "progress": progress_before}
|
|
3040
|
+
)
|
|
3031
3041
|
|
|
3032
3042
|
prompt = chunk_summary_prompt_template.format(chunk_text=chunk)
|
|
3033
3043
|
|
|
@@ -3035,8 +3045,14 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
3035
3045
|
# Generate summary for the current chunk
|
|
3036
3046
|
chunk_summary = self.generate_text(prompt, **kwargs)
|
|
3037
3047
|
chunk_summaries.append(chunk_summary)
|
|
3048
|
+
|
|
3049
|
+
progress_after = ((i + 1) / total_steps) * 100
|
|
3038
3050
|
if streaming_callback:
|
|
3039
|
-
streaming_callback(
|
|
3051
|
+
streaming_callback(
|
|
3052
|
+
f"Chunk {i + 1} summarized. Progress: {progress_after:.0f}%",
|
|
3053
|
+
MSG_TYPE.MSG_TYPE_STEP_END,
|
|
3054
|
+
{"id": f"chunk_{i+1}", "summary_snippet": chunk_summary[:100], "progress": progress_after}
|
|
3055
|
+
)
|
|
3040
3056
|
except Exception as e:
|
|
3041
3057
|
trace_exception(e)
|
|
3042
3058
|
if streaming_callback:
|
|
@@ -3045,8 +3061,13 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
3045
3061
|
chunk_summaries.append(f"[Error summarizing chunk {i+1}]")
|
|
3046
3062
|
|
|
3047
3063
|
# --- Stage 2: Final Synthesis of All Chunk Summaries ---
|
|
3064
|
+
progress_before_synthesis = (len(chunks) / total_steps) * 100
|
|
3048
3065
|
if streaming_callback:
|
|
3049
|
-
streaming_callback(
|
|
3066
|
+
streaming_callback(
|
|
3067
|
+
"Synthesizing all chunk summaries into a final version...",
|
|
3068
|
+
MSG_TYPE.MSG_TYPE_STEP_START,
|
|
3069
|
+
{"id": "final_synthesis", "progress": progress_before_synthesis}
|
|
3070
|
+
)
|
|
3050
3071
|
|
|
3051
3072
|
combined_summaries = "\n\n---\n\n".join(chunk_summaries)
|
|
3052
3073
|
|
|
@@ -3064,7 +3085,11 @@ Provide the final aggregated answer in {output_format} format, directly addressi
|
|
|
3064
3085
|
final_summary = self.generate_text(final_synthesis_prompt, **kwargs)
|
|
3065
3086
|
|
|
3066
3087
|
if streaming_callback:
|
|
3067
|
-
streaming_callback(
|
|
3088
|
+
streaming_callback(
|
|
3089
|
+
"Final summary synthesized.",
|
|
3090
|
+
MSG_TYPE.MSG_TYPE_STEP_END,
|
|
3091
|
+
{"id": "final_synthesis", "progress": 100}
|
|
3092
|
+
)
|
|
3068
3093
|
|
|
3069
3094
|
return final_summary.strip()
|
|
3070
3095
|
|