lollms-client 0.31.1__tar.gz → 0.32.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lollms-client might be problematic. Click here for more details.

Files changed (107) hide show
  1. {lollms_client-0.31.1 → lollms_client-0.32.1}/PKG-INFO +1 -1
  2. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/__init__.py +1 -1
  3. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/llm_bindings/llamacpp/__init__.py +210 -35
  4. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/llm_bindings/ollama/__init__.py +88 -0
  5. lollms_client-0.32.1/lollms_client/llm_bindings/openai/__init__.py +678 -0
  6. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/llm_bindings/pythonllamacpp/__init__.py +153 -139
  7. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/lollms_core.py +17 -5
  8. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/lollms_discussion.py +2 -0
  9. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/lollms_llm_binding.py +1 -1
  10. lollms_client-0.32.1/lollms_client/lollms_mcp_security.py +84 -0
  11. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client.egg-info/PKG-INFO +1 -1
  12. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client.egg-info/SOURCES.txt +1 -0
  13. lollms_client-0.31.1/lollms_client/llm_bindings/openai/__init__.py +0 -600
  14. {lollms_client-0.31.1 → lollms_client-0.32.1}/LICENSE +0 -0
  15. {lollms_client-0.31.1 → lollms_client-0.32.1}/README.md +0 -0
  16. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/article_summary/article_summary.py +0 -0
  17. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/console_discussion/console_app.py +0 -0
  18. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/console_discussion.py +0 -0
  19. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/deep_analyze/deep_analyse.py +0 -0
  20. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/deep_analyze/deep_analyze_multiple_files.py +0 -0
  21. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/function_calling_with_local_custom_mcp.py +0 -0
  22. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/generate_a_benchmark_for_safe_store.py +0 -0
  23. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/generate_and_speak/generate_and_speak.py +0 -0
  24. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/generate_game_sfx/generate_game_fx.py +0 -0
  25. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/generate_text_with_multihop_rag_example.py +0 -0
  26. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/gradio_chat_app.py +0 -0
  27. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/gradio_lollms_chat.py +0 -0
  28. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/internet_search_with_rag.py +0 -0
  29. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/lollms_chat/calculator.py +0 -0
  30. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/lollms_chat/derivative.py +0 -0
  31. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/lollms_chat/test_openai_compatible_with_lollms_chat.py +0 -0
  32. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/lollms_discussions_test.py +0 -0
  33. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/mcp_examples/external_mcp.py +0 -0
  34. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/mcp_examples/local_mcp.py +0 -0
  35. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/mcp_examples/openai_mcp.py +0 -0
  36. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/mcp_examples/run_remote_mcp_example_v2.py +0 -0
  37. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/mcp_examples/run_standard_mcp_example.py +0 -0
  38. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/simple_text_gen_test.py +0 -0
  39. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/simple_text_gen_with_image_test.py +0 -0
  40. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/test_local_models/local_chat.py +0 -0
  41. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/text_2_audio.py +0 -0
  42. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/text_2_image.py +0 -0
  43. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/text_2_image_diffusers.py +0 -0
  44. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/text_and_image_2_audio.py +0 -0
  45. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/text_gen.py +0 -0
  46. {lollms_client-0.31.1 → lollms_client-0.32.1}/examples/text_gen_system_prompt.py +0 -0
  47. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/llm_bindings/__init__.py +0 -0
  48. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/llm_bindings/azure_openai/__init__.py +0 -0
  49. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/llm_bindings/claude/__init__.py +0 -0
  50. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/llm_bindings/gemini/__init__.py +0 -0
  51. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/llm_bindings/grok/__init__.py +0 -0
  52. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/llm_bindings/groq/__init__.py +0 -0
  53. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/llm_bindings/hugging_face_inference_api/__init__.py +0 -0
  54. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/llm_bindings/litellm/__init__.py +0 -0
  55. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/llm_bindings/lollms/__init__.py +0 -0
  56. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/llm_bindings/lollms_webui/__init__.py +0 -0
  57. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/llm_bindings/mistral/__init__.py +0 -0
  58. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/llm_bindings/open_router/__init__.py +0 -0
  59. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/llm_bindings/openllm/__init__.py +0 -0
  60. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/llm_bindings/tensor_rt/__init__.py +0 -0
  61. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/llm_bindings/transformers/__init__.py +0 -0
  62. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/llm_bindings/vllm/__init__.py +0 -0
  63. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/lollms_config.py +0 -0
  64. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/lollms_js_analyzer.py +0 -0
  65. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/lollms_mcp_binding.py +0 -0
  66. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/lollms_personality.py +0 -0
  67. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/lollms_python_analyzer.py +0 -0
  68. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/lollms_stt_binding.py +0 -0
  69. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/lollms_tti_binding.py +0 -0
  70. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/lollms_ttm_binding.py +0 -0
  71. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/lollms_tts_binding.py +0 -0
  72. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/lollms_ttv_binding.py +0 -0
  73. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/lollms_types.py +0 -0
  74. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/lollms_utilities.py +0 -0
  75. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/mcp_bindings/local_mcp/__init__.py +0 -0
  76. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/mcp_bindings/local_mcp/default_tools/file_writer/file_writer.py +0 -0
  77. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/mcp_bindings/local_mcp/default_tools/generate_image_from_prompt/generate_image_from_prompt.py +0 -0
  78. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/mcp_bindings/local_mcp/default_tools/internet_search/internet_search.py +0 -0
  79. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/mcp_bindings/local_mcp/default_tools/python_interpreter/python_interpreter.py +0 -0
  80. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/mcp_bindings/remote_mcp/__init__.py +0 -0
  81. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/mcp_bindings/standard_mcp/__init__.py +0 -0
  82. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/stt_bindings/__init__.py +0 -0
  83. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/stt_bindings/lollms/__init__.py +0 -0
  84. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/stt_bindings/whisper/__init__.py +0 -0
  85. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/stt_bindings/whispercpp/__init__.py +0 -0
  86. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/tti_bindings/__init__.py +0 -0
  87. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/tti_bindings/dalle/__init__.py +0 -0
  88. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/tti_bindings/diffusers/__init__.py +0 -0
  89. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/tti_bindings/gemini/__init__.py +0 -0
  90. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/tti_bindings/lollms/__init__.py +0 -0
  91. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/ttm_bindings/__init__.py +0 -0
  92. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/ttm_bindings/audiocraft/__init__.py +0 -0
  93. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/ttm_bindings/bark/__init__.py +0 -0
  94. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/ttm_bindings/lollms/__init__.py +0 -0
  95. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/tts_bindings/__init__.py +0 -0
  96. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/tts_bindings/bark/__init__.py +0 -0
  97. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/tts_bindings/lollms/__init__.py +0 -0
  98. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/tts_bindings/piper_tts/__init__.py +0 -0
  99. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/tts_bindings/xtts/__init__.py +0 -0
  100. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/ttv_bindings/__init__.py +0 -0
  101. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client/ttv_bindings/lollms/__init__.py +0 -0
  102. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client.egg-info/dependency_links.txt +0 -0
  103. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client.egg-info/requires.txt +0 -0
  104. {lollms_client-0.31.1 → lollms_client-0.32.1}/lollms_client.egg-info/top_level.txt +0 -0
  105. {lollms_client-0.31.1 → lollms_client-0.32.1}/pyproject.toml +0 -0
  106. {lollms_client-0.31.1 → lollms_client-0.32.1}/setup.cfg +0 -0
  107. {lollms_client-0.31.1 → lollms_client-0.32.1}/test/test_lollms_discussion.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lollms_client
3
- Version: 0.31.1
3
+ Version: 0.32.1
4
4
  Summary: A client library for LoLLMs generate endpoint
5
5
  Author-email: ParisNeo <parisneoai@gmail.com>
6
6
  License: Apache Software License
@@ -8,7 +8,7 @@ from lollms_client.lollms_utilities import PromptReshaper # Keep general utiliti
8
8
  from lollms_client.lollms_mcp_binding import LollmsMCPBinding, LollmsMCPBindingManager
9
9
  from lollms_client.lollms_llm_binding import LollmsLLMBindingManager
10
10
 
11
- __version__ = "0.31.1" # Updated version
11
+ __version__ = "0.32.1" # Updated version
12
12
 
13
13
  # Optionally, you could define __all__ if you want to be explicit about exports
14
14
  __all__ = [
@@ -272,8 +272,22 @@ class LlamaCppServerBinding(LollmsLLMBinding):
272
272
  if llama_cpp_binaries is None: raise ImportError("llama-cpp-binaries package is required but not found.")
273
273
 
274
274
  self.models_path = Path(models_path)
275
- self.user_provided_model_name = model_name # Store the name/path user gave
276
-
275
+ self.user_provided_model_name = model_name # Store the name/path user gave
276
+ self._model_path_map: Dict[str, Path] = {} # Maps unique name to full Path
277
+
278
+ # Initial scan for available models
279
+ self._scan_models()
280
+
281
+ # Determine the model to load
282
+ effective_model_to_load = model_name
283
+ if not effective_model_to_load and self._model_path_map:
284
+ # If no model was specified and we have models, pick the first one
285
+ # Sorting ensures a deterministic choice
286
+ first_model_name = sorted(self._model_path_map.keys())[0]
287
+ effective_model_to_load = first_model_name
288
+ ASCIIColors.info(f"No model was specified. Automatically selecting the first available model: '{effective_model_to_load}'")
289
+ self.user_provided_model_name = effective_model_to_load # Update for get_model_info etc.
290
+
277
291
  # Initial hint for clip_model_path, resolved fully in load_model
278
292
  self.clip_model_path: Optional[Path] = None
279
293
  if clip_model_name:
@@ -294,8 +308,12 @@ class LlamaCppServerBinding(LollmsLLMBinding):
294
308
  self.port: Optional[int] = None
295
309
  self.server_key: Optional[tuple] = None
296
310
 
297
- if not self.load_model(self.user_provided_model_name):
298
- ASCIIColors.error(f"Initial model load for '{self.user_provided_model_name}' failed. Binding may not be functional.")
311
+ # Now, attempt to load the selected model
312
+ if effective_model_to_load:
313
+ if not self.load_model(effective_model_to_load):
314
+ ASCIIColors.error(f"Initial model load for '{effective_model_to_load}' failed. Binding may not be functional.")
315
+ else:
316
+ ASCIIColors.warning("No models found in the models path. The binding will be idle until a model is loaded.")
299
317
 
300
318
  def _get_server_binary_path(self) -> Path:
301
319
  custom_path_str = self.server_args.get("llama_server_binary_path")
@@ -313,16 +331,41 @@ class LlamaCppServerBinding(LollmsLLMBinding):
313
331
  raise FileNotFoundError("Llama.cpp server binary not found. Ensure 'llama-cpp-binaries' or 'llama-cpp-python[server]' is installed or provide 'llama_server_binary_path'.")
314
332
 
315
333
  def _resolve_model_path(self, model_name_or_path: str) -> Path:
334
+ """
335
+ Resolves a model name or path to a full Path object.
336
+ It prioritizes the internal map, then checks for absolute/relative paths,
337
+ and rescans the models directory as a fallback.
338
+ """
339
+ # 1. Check if the provided name is a key in our map
340
+ if model_name_or_path in self._model_path_map:
341
+ resolved_path = self._model_path_map[model_name_or_path]
342
+ ASCIIColors.info(f"Resolved model name '{model_name_or_path}' to path: {resolved_path}")
343
+ return resolved_path
344
+
345
+ # 2. If not in map, treat it as a potential path (absolute or relative to models_path)
316
346
  model_p = Path(model_name_or_path)
317
347
  if model_p.is_absolute():
318
- if model_p.exists(): return model_p
319
- else: raise FileNotFoundError(f"Absolute model path specified but not found: {model_p}")
320
-
348
+ if model_p.exists() and model_p.is_file():
349
+ return model_p
350
+
321
351
  path_in_models_dir = self.models_path / model_name_or_path
322
352
  if path_in_models_dir.exists() and path_in_models_dir.is_file():
323
- ASCIIColors.info(f"Found model at: {path_in_models_dir}"); return path_in_models_dir
324
-
325
- raise FileNotFoundError(f"Model '{model_name_or_path}' not found as absolute path or within '{self.models_path}'.")
353
+ ASCIIColors.info(f"Found model at relative path: {path_in_models_dir}")
354
+ return path_in_models_dir
355
+
356
+ # 3. As a fallback, rescan the models directory in case the file was just added
357
+ ASCIIColors.info("Model not found in cache, rescanning directory...")
358
+ self._scan_models()
359
+ if model_name_or_path in self._model_path_map:
360
+ resolved_path = self._model_path_map[model_name_or_path]
361
+ ASCIIColors.info(f"Found model '{model_name_or_path}' after rescan: {resolved_path}")
362
+ return resolved_path
363
+
364
+ # Final check for absolute path after rescan
365
+ if model_p.is_absolute() and model_p.exists() and model_p.is_file():
366
+ return model_p
367
+
368
+ raise FileNotFoundError(f"Model '{model_name_or_path}' not found in the map, as an absolute path, or within '{self.models_path}'.")
326
369
 
327
370
  def _find_available_port(self) -> int:
328
371
  with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
@@ -352,6 +395,7 @@ class LlamaCppServerBinding(LollmsLLMBinding):
352
395
 
353
396
 
354
397
  def load_model(self, model_name_or_path: str) -> bool:
398
+ self.user_provided_model_name = model_name_or_path # Keep track of the selected model name
355
399
  try:
356
400
  resolved_model_path = self._resolve_model_path(model_name_or_path)
357
401
  except Exception as ex:
@@ -805,23 +849,129 @@ class LlamaCppServerBinding(LollmsLLMBinding):
805
849
  info["supports_structured_output"] = self.server_args.get("grammar_string") is not None
806
850
  return info
807
851
 
808
- def listModels(self) -> List[Dict[str, str]]:
852
+ def _scan_models(self):
853
+ """
854
+ Scans the models_path for GGUF files and populates the model map.
855
+ Handles duplicate filenames by prefixing them with their parent directory path.
856
+ """
857
+ self._model_path_map = {}
858
+ if not self.models_path.exists() or not self.models_path.is_dir():
859
+ ASCIIColors.warning(f"Models path does not exist or is not a directory: {self.models_path}")
860
+ return
861
+
862
+ all_paths = list(self.models_path.rglob("*.gguf"))
863
+ filenames_count = {}
864
+ for path in all_paths:
865
+ if path.is_file():
866
+ filenames_count[path.name] = filenames_count.get(path.name, 0) + 1
867
+
868
+ for model_file in all_paths:
869
+ if model_file.is_file():
870
+ # On Windows, path separators can be tricky. Convert to generic format.
871
+ relative_path_str = str(model_file.relative_to(self.models_path).as_posix())
872
+ if filenames_count[model_file.name] > 1:
873
+ # Duplicate filename, use relative path as the unique name
874
+ unique_name = relative_path_str
875
+ else:
876
+ # Unique filename, use the name itself
877
+ unique_name = model_file.name
878
+
879
+ self._model_path_map[unique_name] = model_file
880
+
881
+ ASCIIColors.info(f"Scanned {len(self._model_path_map)} models from {self.models_path}.")
882
+
883
+ def listModels(self) -> List[Dict[str, Any]]:
884
+ """
885
+ Lists all available GGUF models, rescanning the directory first.
886
+ """
887
+ self._scan_models() # Always rescan when asked for the list
888
+
809
889
  models_found = []
810
- unique_models = set()
811
- if self.models_path.exists() and self.models_path.is_dir():
812
- for model_file in self.models_path.rglob("*.gguf"):
813
- if model_file.is_file() and model_file.name not in unique_models:
814
- models_found.append({
815
- 'model_name': model_file.name,
816
- 'path_hint': str(model_file.relative_to(self.models_path.parent) if model_file.is_relative_to(self.models_path.parent) else model_file),
817
- 'size_gb': f"{model_file.stat().st_size / (1024**3):.2f} GB"
818
- })
819
- unique_models.add(model_file.name)
820
- return models_found
890
+ for unique_name, model_path in self._model_path_map.items():
891
+ models_found.append({
892
+ 'name': unique_name, # The unique name for selection
893
+ 'model_name': model_path.name, # The original filename for display
894
+ 'path': str(model_path), # The full path
895
+ 'size': model_path.stat().st_size
896
+ })
897
+
898
+ # Sort the list alphabetically by the unique name for consistent ordering
899
+ return sorted(models_found, key=lambda x: x['name'])
821
900
 
822
901
  def __del__(self):
823
902
  self.unload_model()
824
903
 
904
+ def get_ctx_size(self, model_name: Optional[str] = None) -> Optional[int]:
905
+ """
906
+ Retrieves context size for a model from a hardcoded list.
907
+
908
+ This method checks if the model name contains a known base model identifier
909
+ (e.g., 'llama3.1', 'gemma2') to determine its context length. It's intended
910
+ as a failsafe when the context size cannot be retrieved directly from the
911
+ Ollama API.
912
+ """
913
+ if model_name is None:
914
+ model_name = self.model_name
915
+
916
+ # Hardcoded context sizes for popular models. More specific names (e.g., 'llama3.1')
917
+ # should appear, as they will be checked first due to the sorting logic below.
918
+ known_contexts = {
919
+ 'llama3.1': 131072, # Llama 3.1 extended context
920
+ 'llama3.2': 131072, # Llama 3.2 extended context
921
+ 'llama3.3': 131072, # Assuming similar to 3.1/3.2
922
+ 'llama3': 8192, # Llama 3 default
923
+ 'llama2': 4096, # Llama 2 default
924
+ 'mixtral8x22b': 65536, # Mixtral 8x22B default
925
+ 'mixtral': 32768, # Mixtral 8x7B default
926
+ 'mistral': 32768, # Mistral 7B v0.2+ default
927
+ 'gemma3': 131072, # Gemma 3 with 128K context
928
+ 'gemma2': 8192, # Gemma 2 default
929
+ 'gemma': 8192, # Gemma default
930
+ 'phi3': 131072, # Phi-3 variants often use 128K (mini/medium extended)
931
+ 'phi2': 2048, # Phi-2 default
932
+ 'phi': 2048, # Phi default (older)
933
+ 'qwen2.5': 131072, # Qwen2.5 with 128K
934
+ 'qwen2': 32768, # Qwen2 default for 7B
935
+ 'qwen': 8192, # Qwen default
936
+ 'codellama': 16384, # CodeLlama extended
937
+ 'codegemma': 8192, # CodeGemma default
938
+ 'deepseek-coder-v2': 131072, # DeepSeek-Coder V2 with 128K
939
+ 'deepseek-coder': 16384, # DeepSeek-Coder V1 default
940
+ 'deepseek-v2': 131072, # DeepSeek-V2 with 128K
941
+ 'deepseek-llm': 4096, # DeepSeek-LLM default
942
+ 'yi1.5': 32768, # Yi-1.5 with 32K
943
+ 'yi': 4096, # Yi base default
944
+ 'command-r': 131072, # Command-R with 128K
945
+ 'wizardlm2': 32768, # WizardLM2 (Mistral-based)
946
+ 'wizardlm': 16384, # WizardLM default
947
+ 'zephyr': 65536, # Zephyr beta (Mistral-based extended)
948
+ 'vicuna': 2048, # Vicuna default (up to 16K in some variants)
949
+ 'falcon': 2048, # Falcon default
950
+ 'starcoder': 8192, # StarCoder default
951
+ 'stablelm': 4096, # StableLM default
952
+ 'orca2': 4096, # Orca 2 default
953
+ 'orca': 4096, # Orca default
954
+ 'dolphin': 32768, # Dolphin (often Mistral-based)
955
+ 'openhermes': 8192, # OpenHermes default
956
+ }
957
+
958
+ normalized_model_name = model_name.lower().strip()
959
+
960
+ # Sort keys by length in descending order. This ensures that a more specific
961
+ # name like 'llama3.1' is checked before a less specific name like 'llama3'.
962
+ sorted_base_models = sorted(known_contexts.keys(), key=len, reverse=True)
963
+
964
+ for base_name in sorted_base_models:
965
+ if base_name in normalized_model_name:
966
+ context_size = known_contexts[base_name]
967
+ ASCIIColors.warning(
968
+ f"Using hardcoded context size for model '{model_name}' "
969
+ f"based on base name '{base_name}': {context_size}"
970
+ )
971
+ return context_size
972
+
973
+ ASCIIColors.warning(f"Context size not found for model '{model_name}' in the hardcoded list.")
974
+ return None
825
975
 
826
976
  if __name__ == '__main__':
827
977
  global full_streamed_text # Define for the callback
@@ -872,17 +1022,21 @@ if __name__ == '__main__':
872
1022
  try:
873
1023
  if primary_model_available:
874
1024
  ASCIIColors.cyan("\n--- Initializing First LlamaCppServerBinding Instance ---")
1025
+ # Test default model selection by passing model_name=None
1026
+ ASCIIColors.info("Testing default model selection (model_name=None)")
875
1027
  active_binding1 = LlamaCppServerBinding(
876
- model_name=model_name_str, models_path=str(models_path), config=binding_config
1028
+ model_name=None, models_path=str(models_path), config=binding_config
877
1029
  )
878
1030
  if not active_binding1.server_process or not active_binding1.server_process.is_healthy:
879
1031
  raise RuntimeError("Server for binding1 failed to start or become healthy.")
880
- ASCIIColors.green(f"Binding1 initialized. Server for '{active_binding1.current_model_path.name}' running on port {active_binding1.port}.")
1032
+ ASCIIColors.green(f"Binding1 initialized with default model. Server for '{active_binding1.current_model_path.name}' running on port {active_binding1.port}.")
881
1033
  ASCIIColors.info(f"Binding1 Model Info: {json.dumps(active_binding1.get_model_info(), indent=2)}")
882
1034
 
883
- ASCIIColors.cyan("\n--- Initializing Second LlamaCppServerBinding Instance (Same Model) ---")
1035
+ ASCIIColors.cyan("\n--- Initializing Second LlamaCppServerBinding Instance (Same Model, explicit name) ---")
1036
+ # Load the same model explicitly now
1037
+ model_to_load_explicitly = active_binding1.user_provided_model_name
884
1038
  active_binding2 = LlamaCppServerBinding(
885
- model_name=model_name_str, models_path=str(models_path), config=binding_config # Same model and config
1039
+ model_name=model_to_load_explicitly, models_path=str(models_path), config=binding_config
886
1040
  )
887
1041
  if not active_binding2.server_process or not active_binding2.server_process.is_healthy:
888
1042
  raise RuntimeError("Server for binding2 failed to start or become healthy (should reuse).")
@@ -896,9 +1050,30 @@ if __name__ == '__main__':
896
1050
 
897
1051
  # --- List Models (scans configured directories) ---
898
1052
  ASCIIColors.cyan("\n--- Listing Models (from search paths, using binding1) ---")
1053
+ # Create a dummy duplicate model to test unique naming
1054
+ duplicate_folder = models_path / "subdir"
1055
+ duplicate_folder.mkdir(exist_ok=True)
1056
+ duplicate_model_path = duplicate_folder / test_model_path.name
1057
+ import shutil
1058
+ shutil.copy(test_model_path, duplicate_model_path)
1059
+ ASCIIColors.info(f"Created a duplicate model for testing: {duplicate_model_path}")
1060
+
899
1061
  listed_models = active_binding1.listModels()
900
- if listed_models: ASCIIColors.green(f"Found {len(listed_models)} GGUF files. First 5: {listed_models[:5]}")
1062
+ if listed_models:
1063
+ ASCIIColors.green(f"Found {len(listed_models)} GGUF files.")
1064
+ pprint.pprint(listed_models)
1065
+ # Check if the duplicate was handled
1066
+ names = [m['name'] for m in listed_models]
1067
+ if test_model_path.name in names and f"subdir/{test_model_path.name}" in names:
1068
+ ASCIIColors.green("SUCCESS: Duplicate model names were correctly handled.")
1069
+ else:
1070
+ ASCIIColors.error("FAILURE: Duplicate model names were not handled correctly.")
901
1071
  else: ASCIIColors.warning("No GGUF models found in search paths.")
1072
+
1073
+ # Clean up dummy duplicate
1074
+ duplicate_model_path.unlink()
1075
+ duplicate_folder.rmdir()
1076
+
902
1077
 
903
1078
  # --- Tokenize/Detokenize ---
904
1079
  ASCIIColors.cyan("\n--- Tokenize/Detokenize (using binding1) ---")
@@ -913,16 +1088,16 @@ if __name__ == '__main__':
913
1088
  # --- Text Generation (Non-Streaming, Chat API, binding1) ---
914
1089
  ASCIIColors.cyan("\n--- Text Generation (Non-Streaming, Chat API, binding1) ---")
915
1090
  prompt_text = "What is the capital of Germany?"
916
- generated_text = active_binding1.generate_text(prompt_text, system_prompt="Concise expert.", n_predict=20, stream=False, use_chat_format_override=True)
1091
+ generated_text = active_binding1.generate_text(prompt_text, system_prompt="Concise expert.", n_predict=20, stream=False)
917
1092
  if isinstance(generated_text, str): ASCIIColors.green(f"Generated text (binding1): {generated_text}")
918
1093
  else: ASCIIColors.error(f"Generation failed (binding1): {generated_text}")
919
1094
 
920
1095
  # --- Text Generation (Streaming, Completion API, binding2) ---
921
- ASCIIColors.cyan("\n--- Text Generation (Streaming, Completion API, binding2) ---")
1096
+ ASCIIColors.cyan("\n--- Text Generation (Streaming, Chat API, binding2) ---")
922
1097
  full_streamed_text = "" # Reset global
923
1098
  def stream_callback(chunk: str, msg_type: int): global full_streamed_text; ASCIIColors.green(f"{chunk}", end="", flush=True); full_streamed_text += chunk; return True
924
1099
 
925
- result_b2 = active_binding2.generate_text(prompt_text, system_prompt="Concise expert.", n_predict=30, stream=True, streaming_callback=stream_callback, use_chat_format_override=False)
1100
+ result_b2 = active_binding2.generate_text(prompt_text, system_prompt="Concise expert.", n_predict=30, stream=True, streaming_callback=stream_callback)
926
1101
  print("\n--- End of Stream (binding2) ---")
927
1102
  if isinstance(result_b2, str): ASCIIColors.green(f"Full streamed text (binding2): {result_b2}")
928
1103
  else: ASCIIColors.error(f"Streaming generation failed (binding2): {result_b2}")
@@ -957,9 +1132,9 @@ if __name__ == '__main__':
957
1132
  # llava_binding_config["chat_template"] = "llava-1.5"
958
1133
 
959
1134
  active_binding_llava = LlamaCppServerBinding(
960
- model_name=str(llava_model_path), # Pass full path for clarity in test
1135
+ model_name=str(llava_model_path.name), # Pass filename, let it resolve
961
1136
  models_path=str(models_path),
962
- clip_model_name=str(llava_clip_path_actual), # Pass full path for clip
1137
+ clip_model_name=str(llava_clip_path_actual.name), # Pass filename for clip
963
1138
  config=llava_binding_config
964
1139
  )
965
1140
  if not active_binding_llava.server_process or not active_binding_llava.server_process.is_healthy:
@@ -970,7 +1145,7 @@ if __name__ == '__main__':
970
1145
 
971
1146
  llava_prompt = "Describe this image."
972
1147
  llava_response = active_binding_llava.generate_text(
973
- prompt=llava_prompt, images=[str(dummy_image_path)], n_predict=40, stream=False, use_chat_format_override=True
1148
+ prompt=llava_prompt, images=[str(dummy_image_path)], n_predict=40, stream=False
974
1149
  )
975
1150
  if isinstance(llava_response, str): ASCIIColors.green(f"LLaVA response: {llava_response}")
976
1151
  else: ASCIIColors.error(f"LLaVA generation failed: {llava_response}")
@@ -986,7 +1161,7 @@ if __name__ == '__main__':
986
1161
  # --- Test changing model (using binding1 to load a different or same model) ---
987
1162
  ASCIIColors.cyan("\n--- Testing Model Change (binding1 reloads its model) ---")
988
1163
  # For a real change, use a different model name if available. Here, we reload the same.
989
- reload_success = active_binding1.load_model(model_name_str) # Reload original model
1164
+ reload_success = active_binding1.load_model(active_binding1.user_provided_model_name) # Reload original model
990
1165
  if reload_success and active_binding1.server_process and active_binding1.server_process.is_healthy:
991
1166
  ASCIIColors.green(f"Model reloaded/re-confirmed successfully by binding1. Server on port {active_binding1.port}.")
992
1167
  reloaded_gen = active_binding1.generate_text("Ping", n_predict=5, stream=False)
@@ -1023,4 +1198,4 @@ if __name__ == '__main__':
1023
1198
  else:
1024
1199
  ASCIIColors.green("All servers shut down correctly.")
1025
1200
 
1026
- ASCIIColors.yellow("\nLlamaCppServerBinding test finished.")
1201
+ ASCIIColors.yellow("\nLlamaCppServerBinding test finished.")
@@ -598,6 +598,94 @@ class OllamaBinding(LollmsLLMBinding):
598
598
  ASCIIColors.info(f"Ollama model set to: {model_name}. It will be loaded by the server on first use.")
599
599
  return True
600
600
 
601
+ def get_ctx_size(self, model_name: Optional[str] = None) -> Optional[int]:
602
+ """
603
+ Retrieves the context size for an Ollama model.
604
+
605
+ The effective context size is the `num_ctx` parameter if overridden in the Modelfile,
606
+ otherwise it falls back to the model's default context length from its architecture details.
607
+ As a final failsafe, uses a hardcoded list of known popular models' context lengths.
608
+ """
609
+ if model_name is None:
610
+ model_name = self.model_name
611
+
612
+ try:
613
+ info = ollama.show(model_name)
614
+
615
+ # Parse num_ctx from the 'parameters' string (e.g., "PARAMETER num_ctx 4096")
616
+ parameters = info.get('parameters', '')
617
+ num_ctx = None
618
+ for param in parameters.split('\n'):
619
+ if param.strip().startswith('num_ctx'):
620
+ num_ctx = int(param.split()[1])
621
+ break
622
+
623
+ if num_ctx is not None:
624
+ return num_ctx
625
+
626
+ # Fall back to model_info context_length (e.g., 'llama.context_length')
627
+ model_info = info.get('model_info', {})
628
+ arch = model_info.get('general.architecture', '')
629
+ context_key = f'{arch}.context_length' if arch else 'general.context_length'
630
+ context_length = model_info.get(context_key)
631
+
632
+ if context_length is not None:
633
+ return int(context_length)
634
+
635
+ except Exception as e:
636
+ ASCIIColors.warning(f"Error fetching model info: {str(e)}")
637
+
638
+ # Failsafe: Hardcoded context sizes for popular Ollama models
639
+ known_contexts = {
640
+ 'llama2': 4096, # Llama 2 default
641
+ 'llama3': 8192, # Llama 3 default
642
+ 'llama3.1': 131072, # Llama 3.1 extended context
643
+ 'llama3.2': 131072, # Llama 3.2 extended context
644
+ 'llama3.3': 131072, # Assuming similar to 3.1/3.2
645
+ 'mistral': 32768, # Mistral 7B v0.2+ default
646
+ 'mixtral': 32768, # Mixtral 8x7B default
647
+ 'mixtral8x22b': 65536, # Mixtral 8x22B default
648
+ 'gemma': 8192, # Gemma default
649
+ 'gemma2': 8192, # Gemma 2 default
650
+ 'gemma3': 131072, # Gemma 3 with 128K context
651
+ 'phi': 2048, # Phi default (older)
652
+ 'phi2': 2048, # Phi-2 default
653
+ 'phi3': 131072, # Phi-3 variants often use 128K (mini/medium extended)
654
+ 'qwen': 8192, # Qwen default
655
+ 'qwen2': 32768, # Qwen2 default for 7B
656
+ 'qwen2.5': 131072, # Qwen2.5 with 128K
657
+ 'codellama': 16384, # CodeLlama extended
658
+ 'codegemma': 8192, # CodeGemma default
659
+ 'deepseek-coder': 16384, # DeepSeek-Coder V1 default
660
+ 'deepseek-coder-v2': 131072, # DeepSeek-Coder V2 with 128K
661
+ 'deepseek-llm': 4096, # DeepSeek-LLM default
662
+ 'deepseek-v2': 131072, # DeepSeek-V2 with 128K
663
+ 'yi': 4096, # Yi base default
664
+ 'yi1.5': 32768, # Yi-1.5 with 32K
665
+ 'command-r': 131072, # Command-R with 128K
666
+ 'vicuna': 2048, # Vicuna default (up to 16K in some variants)
667
+ 'wizardlm': 16384, # WizardLM default
668
+ 'wizardlm2': 32768, # WizardLM2 (Mistral-based)
669
+ 'zephyr': 65536, # Zephyr beta (Mistral-based extended)
670
+ 'falcon': 2048, # Falcon default
671
+ 'starcoder': 8192, # StarCoder default
672
+ 'stablelm': 4096, # StableLM default
673
+ 'orca': 4096, # Orca default
674
+ 'orca2': 4096, # Orca 2 default
675
+ 'dolphin': 32768, # Dolphin (often Mistral-based)
676
+ 'openhermes': 8192, # OpenHermes default
677
+ }
678
+
679
+ # Extract base model name (e.g., 'llama3' from 'llama3:8b-instruct')
680
+ base_name = model_name.split(':')[0].lower().strip()
681
+
682
+ if base_name in known_contexts:
683
+ ASCIIColors.warning(f"Using hardcoded context size for model '{model_name}': {known_contexts[base_name]}")
684
+ return known_contexts[base_name]
685
+
686
+ ASCIIColors.warning(f"Context size not found for model '{model_name}'")
687
+ return None
688
+
601
689
  if __name__ == '__main__':
602
690
  global full_streamed_text
603
691
  # Example Usage (requires an Ollama server running)