lollms-client 0.31.1__py3-none-any.whl → 0.32.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lollms-client might be problematic. Click here for more details.
- lollms_client/__init__.py +1 -1
- lollms_client/llm_bindings/llamacpp/__init__.py +210 -35
- lollms_client/llm_bindings/ollama/__init__.py +88 -0
- lollms_client/llm_bindings/openai/__init__.py +372 -294
- lollms_client/llm_bindings/pythonllamacpp/__init__.py +153 -139
- lollms_client/lollms_core.py +17 -5
- lollms_client/lollms_discussion.py +2 -0
- lollms_client/lollms_llm_binding.py +1 -1
- lollms_client/lollms_mcp_security.py +84 -0
- {lollms_client-0.31.1.dist-info → lollms_client-0.32.1.dist-info}/METADATA +1 -1
- {lollms_client-0.31.1.dist-info → lollms_client-0.32.1.dist-info}/RECORD +14 -13
- {lollms_client-0.31.1.dist-info → lollms_client-0.32.1.dist-info}/WHEEL +0 -0
- {lollms_client-0.31.1.dist-info → lollms_client-0.32.1.dist-info}/licenses/LICENSE +0 -0
- {lollms_client-0.31.1.dist-info → lollms_client-0.32.1.dist-info}/top_level.txt +0 -0
lollms_client/__init__.py
CHANGED
|
@@ -8,7 +8,7 @@ from lollms_client.lollms_utilities import PromptReshaper # Keep general utiliti
|
|
|
8
8
|
from lollms_client.lollms_mcp_binding import LollmsMCPBinding, LollmsMCPBindingManager
|
|
9
9
|
from lollms_client.lollms_llm_binding import LollmsLLMBindingManager
|
|
10
10
|
|
|
11
|
-
__version__ = "0.
|
|
11
|
+
__version__ = "0.32.1" # Updated version
|
|
12
12
|
|
|
13
13
|
# Optionally, you could define __all__ if you want to be explicit about exports
|
|
14
14
|
__all__ = [
|
|
@@ -272,8 +272,22 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
272
272
|
if llama_cpp_binaries is None: raise ImportError("llama-cpp-binaries package is required but not found.")
|
|
273
273
|
|
|
274
274
|
self.models_path = Path(models_path)
|
|
275
|
-
self.user_provided_model_name = model_name
|
|
276
|
-
|
|
275
|
+
self.user_provided_model_name = model_name # Store the name/path user gave
|
|
276
|
+
self._model_path_map: Dict[str, Path] = {} # Maps unique name to full Path
|
|
277
|
+
|
|
278
|
+
# Initial scan for available models
|
|
279
|
+
self._scan_models()
|
|
280
|
+
|
|
281
|
+
# Determine the model to load
|
|
282
|
+
effective_model_to_load = model_name
|
|
283
|
+
if not effective_model_to_load and self._model_path_map:
|
|
284
|
+
# If no model was specified and we have models, pick the first one
|
|
285
|
+
# Sorting ensures a deterministic choice
|
|
286
|
+
first_model_name = sorted(self._model_path_map.keys())[0]
|
|
287
|
+
effective_model_to_load = first_model_name
|
|
288
|
+
ASCIIColors.info(f"No model was specified. Automatically selecting the first available model: '{effective_model_to_load}'")
|
|
289
|
+
self.user_provided_model_name = effective_model_to_load # Update for get_model_info etc.
|
|
290
|
+
|
|
277
291
|
# Initial hint for clip_model_path, resolved fully in load_model
|
|
278
292
|
self.clip_model_path: Optional[Path] = None
|
|
279
293
|
if clip_model_name:
|
|
@@ -294,8 +308,12 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
294
308
|
self.port: Optional[int] = None
|
|
295
309
|
self.server_key: Optional[tuple] = None
|
|
296
310
|
|
|
297
|
-
|
|
298
|
-
|
|
311
|
+
# Now, attempt to load the selected model
|
|
312
|
+
if effective_model_to_load:
|
|
313
|
+
if not self.load_model(effective_model_to_load):
|
|
314
|
+
ASCIIColors.error(f"Initial model load for '{effective_model_to_load}' failed. Binding may not be functional.")
|
|
315
|
+
else:
|
|
316
|
+
ASCIIColors.warning("No models found in the models path. The binding will be idle until a model is loaded.")
|
|
299
317
|
|
|
300
318
|
def _get_server_binary_path(self) -> Path:
|
|
301
319
|
custom_path_str = self.server_args.get("llama_server_binary_path")
|
|
@@ -313,16 +331,41 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
313
331
|
raise FileNotFoundError("Llama.cpp server binary not found. Ensure 'llama-cpp-binaries' or 'llama-cpp-python[server]' is installed or provide 'llama_server_binary_path'.")
|
|
314
332
|
|
|
315
333
|
def _resolve_model_path(self, model_name_or_path: str) -> Path:
|
|
334
|
+
"""
|
|
335
|
+
Resolves a model name or path to a full Path object.
|
|
336
|
+
It prioritizes the internal map, then checks for absolute/relative paths,
|
|
337
|
+
and rescans the models directory as a fallback.
|
|
338
|
+
"""
|
|
339
|
+
# 1. Check if the provided name is a key in our map
|
|
340
|
+
if model_name_or_path in self._model_path_map:
|
|
341
|
+
resolved_path = self._model_path_map[model_name_or_path]
|
|
342
|
+
ASCIIColors.info(f"Resolved model name '{model_name_or_path}' to path: {resolved_path}")
|
|
343
|
+
return resolved_path
|
|
344
|
+
|
|
345
|
+
# 2. If not in map, treat it as a potential path (absolute or relative to models_path)
|
|
316
346
|
model_p = Path(model_name_or_path)
|
|
317
347
|
if model_p.is_absolute():
|
|
318
|
-
if model_p.exists()
|
|
319
|
-
|
|
320
|
-
|
|
348
|
+
if model_p.exists() and model_p.is_file():
|
|
349
|
+
return model_p
|
|
350
|
+
|
|
321
351
|
path_in_models_dir = self.models_path / model_name_or_path
|
|
322
352
|
if path_in_models_dir.exists() and path_in_models_dir.is_file():
|
|
323
|
-
ASCIIColors.info(f"Found model at: {path_in_models_dir}")
|
|
324
|
-
|
|
325
|
-
|
|
353
|
+
ASCIIColors.info(f"Found model at relative path: {path_in_models_dir}")
|
|
354
|
+
return path_in_models_dir
|
|
355
|
+
|
|
356
|
+
# 3. As a fallback, rescan the models directory in case the file was just added
|
|
357
|
+
ASCIIColors.info("Model not found in cache, rescanning directory...")
|
|
358
|
+
self._scan_models()
|
|
359
|
+
if model_name_or_path in self._model_path_map:
|
|
360
|
+
resolved_path = self._model_path_map[model_name_or_path]
|
|
361
|
+
ASCIIColors.info(f"Found model '{model_name_or_path}' after rescan: {resolved_path}")
|
|
362
|
+
return resolved_path
|
|
363
|
+
|
|
364
|
+
# Final check for absolute path after rescan
|
|
365
|
+
if model_p.is_absolute() and model_p.exists() and model_p.is_file():
|
|
366
|
+
return model_p
|
|
367
|
+
|
|
368
|
+
raise FileNotFoundError(f"Model '{model_name_or_path}' not found in the map, as an absolute path, or within '{self.models_path}'.")
|
|
326
369
|
|
|
327
370
|
def _find_available_port(self) -> int:
|
|
328
371
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
@@ -352,6 +395,7 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
352
395
|
|
|
353
396
|
|
|
354
397
|
def load_model(self, model_name_or_path: str) -> bool:
|
|
398
|
+
self.user_provided_model_name = model_name_or_path # Keep track of the selected model name
|
|
355
399
|
try:
|
|
356
400
|
resolved_model_path = self._resolve_model_path(model_name_or_path)
|
|
357
401
|
except Exception as ex:
|
|
@@ -805,23 +849,129 @@ class LlamaCppServerBinding(LollmsLLMBinding):
|
|
|
805
849
|
info["supports_structured_output"] = self.server_args.get("grammar_string") is not None
|
|
806
850
|
return info
|
|
807
851
|
|
|
808
|
-
def
|
|
852
|
+
def _scan_models(self):
|
|
853
|
+
"""
|
|
854
|
+
Scans the models_path for GGUF files and populates the model map.
|
|
855
|
+
Handles duplicate filenames by prefixing them with their parent directory path.
|
|
856
|
+
"""
|
|
857
|
+
self._model_path_map = {}
|
|
858
|
+
if not self.models_path.exists() or not self.models_path.is_dir():
|
|
859
|
+
ASCIIColors.warning(f"Models path does not exist or is not a directory: {self.models_path}")
|
|
860
|
+
return
|
|
861
|
+
|
|
862
|
+
all_paths = list(self.models_path.rglob("*.gguf"))
|
|
863
|
+
filenames_count = {}
|
|
864
|
+
for path in all_paths:
|
|
865
|
+
if path.is_file():
|
|
866
|
+
filenames_count[path.name] = filenames_count.get(path.name, 0) + 1
|
|
867
|
+
|
|
868
|
+
for model_file in all_paths:
|
|
869
|
+
if model_file.is_file():
|
|
870
|
+
# On Windows, path separators can be tricky. Convert to generic format.
|
|
871
|
+
relative_path_str = str(model_file.relative_to(self.models_path).as_posix())
|
|
872
|
+
if filenames_count[model_file.name] > 1:
|
|
873
|
+
# Duplicate filename, use relative path as the unique name
|
|
874
|
+
unique_name = relative_path_str
|
|
875
|
+
else:
|
|
876
|
+
# Unique filename, use the name itself
|
|
877
|
+
unique_name = model_file.name
|
|
878
|
+
|
|
879
|
+
self._model_path_map[unique_name] = model_file
|
|
880
|
+
|
|
881
|
+
ASCIIColors.info(f"Scanned {len(self._model_path_map)} models from {self.models_path}.")
|
|
882
|
+
|
|
883
|
+
def listModels(self) -> List[Dict[str, Any]]:
|
|
884
|
+
"""
|
|
885
|
+
Lists all available GGUF models, rescanning the directory first.
|
|
886
|
+
"""
|
|
887
|
+
self._scan_models() # Always rescan when asked for the list
|
|
888
|
+
|
|
809
889
|
models_found = []
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
return models_found
|
|
890
|
+
for unique_name, model_path in self._model_path_map.items():
|
|
891
|
+
models_found.append({
|
|
892
|
+
'name': unique_name, # The unique name for selection
|
|
893
|
+
'model_name': model_path.name, # The original filename for display
|
|
894
|
+
'path': str(model_path), # The full path
|
|
895
|
+
'size': model_path.stat().st_size
|
|
896
|
+
})
|
|
897
|
+
|
|
898
|
+
# Sort the list alphabetically by the unique name for consistent ordering
|
|
899
|
+
return sorted(models_found, key=lambda x: x['name'])
|
|
821
900
|
|
|
822
901
|
def __del__(self):
|
|
823
902
|
self.unload_model()
|
|
824
903
|
|
|
904
|
+
def get_ctx_size(self, model_name: Optional[str] = None) -> Optional[int]:
|
|
905
|
+
"""
|
|
906
|
+
Retrieves context size for a model from a hardcoded list.
|
|
907
|
+
|
|
908
|
+
This method checks if the model name contains a known base model identifier
|
|
909
|
+
(e.g., 'llama3.1', 'gemma2') to determine its context length. It's intended
|
|
910
|
+
as a failsafe when the context size cannot be retrieved directly from the
|
|
911
|
+
Ollama API.
|
|
912
|
+
"""
|
|
913
|
+
if model_name is None:
|
|
914
|
+
model_name = self.model_name
|
|
915
|
+
|
|
916
|
+
# Hardcoded context sizes for popular models. More specific names (e.g., 'llama3.1')
|
|
917
|
+
# should appear, as they will be checked first due to the sorting logic below.
|
|
918
|
+
known_contexts = {
|
|
919
|
+
'llama3.1': 131072, # Llama 3.1 extended context
|
|
920
|
+
'llama3.2': 131072, # Llama 3.2 extended context
|
|
921
|
+
'llama3.3': 131072, # Assuming similar to 3.1/3.2
|
|
922
|
+
'llama3': 8192, # Llama 3 default
|
|
923
|
+
'llama2': 4096, # Llama 2 default
|
|
924
|
+
'mixtral8x22b': 65536, # Mixtral 8x22B default
|
|
925
|
+
'mixtral': 32768, # Mixtral 8x7B default
|
|
926
|
+
'mistral': 32768, # Mistral 7B v0.2+ default
|
|
927
|
+
'gemma3': 131072, # Gemma 3 with 128K context
|
|
928
|
+
'gemma2': 8192, # Gemma 2 default
|
|
929
|
+
'gemma': 8192, # Gemma default
|
|
930
|
+
'phi3': 131072, # Phi-3 variants often use 128K (mini/medium extended)
|
|
931
|
+
'phi2': 2048, # Phi-2 default
|
|
932
|
+
'phi': 2048, # Phi default (older)
|
|
933
|
+
'qwen2.5': 131072, # Qwen2.5 with 128K
|
|
934
|
+
'qwen2': 32768, # Qwen2 default for 7B
|
|
935
|
+
'qwen': 8192, # Qwen default
|
|
936
|
+
'codellama': 16384, # CodeLlama extended
|
|
937
|
+
'codegemma': 8192, # CodeGemma default
|
|
938
|
+
'deepseek-coder-v2': 131072, # DeepSeek-Coder V2 with 128K
|
|
939
|
+
'deepseek-coder': 16384, # DeepSeek-Coder V1 default
|
|
940
|
+
'deepseek-v2': 131072, # DeepSeek-V2 with 128K
|
|
941
|
+
'deepseek-llm': 4096, # DeepSeek-LLM default
|
|
942
|
+
'yi1.5': 32768, # Yi-1.5 with 32K
|
|
943
|
+
'yi': 4096, # Yi base default
|
|
944
|
+
'command-r': 131072, # Command-R with 128K
|
|
945
|
+
'wizardlm2': 32768, # WizardLM2 (Mistral-based)
|
|
946
|
+
'wizardlm': 16384, # WizardLM default
|
|
947
|
+
'zephyr': 65536, # Zephyr beta (Mistral-based extended)
|
|
948
|
+
'vicuna': 2048, # Vicuna default (up to 16K in some variants)
|
|
949
|
+
'falcon': 2048, # Falcon default
|
|
950
|
+
'starcoder': 8192, # StarCoder default
|
|
951
|
+
'stablelm': 4096, # StableLM default
|
|
952
|
+
'orca2': 4096, # Orca 2 default
|
|
953
|
+
'orca': 4096, # Orca default
|
|
954
|
+
'dolphin': 32768, # Dolphin (often Mistral-based)
|
|
955
|
+
'openhermes': 8192, # OpenHermes default
|
|
956
|
+
}
|
|
957
|
+
|
|
958
|
+
normalized_model_name = model_name.lower().strip()
|
|
959
|
+
|
|
960
|
+
# Sort keys by length in descending order. This ensures that a more specific
|
|
961
|
+
# name like 'llama3.1' is checked before a less specific name like 'llama3'.
|
|
962
|
+
sorted_base_models = sorted(known_contexts.keys(), key=len, reverse=True)
|
|
963
|
+
|
|
964
|
+
for base_name in sorted_base_models:
|
|
965
|
+
if base_name in normalized_model_name:
|
|
966
|
+
context_size = known_contexts[base_name]
|
|
967
|
+
ASCIIColors.warning(
|
|
968
|
+
f"Using hardcoded context size for model '{model_name}' "
|
|
969
|
+
f"based on base name '{base_name}': {context_size}"
|
|
970
|
+
)
|
|
971
|
+
return context_size
|
|
972
|
+
|
|
973
|
+
ASCIIColors.warning(f"Context size not found for model '{model_name}' in the hardcoded list.")
|
|
974
|
+
return None
|
|
825
975
|
|
|
826
976
|
if __name__ == '__main__':
|
|
827
977
|
global full_streamed_text # Define for the callback
|
|
@@ -872,17 +1022,21 @@ if __name__ == '__main__':
|
|
|
872
1022
|
try:
|
|
873
1023
|
if primary_model_available:
|
|
874
1024
|
ASCIIColors.cyan("\n--- Initializing First LlamaCppServerBinding Instance ---")
|
|
1025
|
+
# Test default model selection by passing model_name=None
|
|
1026
|
+
ASCIIColors.info("Testing default model selection (model_name=None)")
|
|
875
1027
|
active_binding1 = LlamaCppServerBinding(
|
|
876
|
-
model_name=
|
|
1028
|
+
model_name=None, models_path=str(models_path), config=binding_config
|
|
877
1029
|
)
|
|
878
1030
|
if not active_binding1.server_process or not active_binding1.server_process.is_healthy:
|
|
879
1031
|
raise RuntimeError("Server for binding1 failed to start or become healthy.")
|
|
880
|
-
ASCIIColors.green(f"Binding1 initialized. Server for '{active_binding1.current_model_path.name}' running on port {active_binding1.port}.")
|
|
1032
|
+
ASCIIColors.green(f"Binding1 initialized with default model. Server for '{active_binding1.current_model_path.name}' running on port {active_binding1.port}.")
|
|
881
1033
|
ASCIIColors.info(f"Binding1 Model Info: {json.dumps(active_binding1.get_model_info(), indent=2)}")
|
|
882
1034
|
|
|
883
|
-
ASCIIColors.cyan("\n--- Initializing Second LlamaCppServerBinding Instance (Same Model) ---")
|
|
1035
|
+
ASCIIColors.cyan("\n--- Initializing Second LlamaCppServerBinding Instance (Same Model, explicit name) ---")
|
|
1036
|
+
# Load the same model explicitly now
|
|
1037
|
+
model_to_load_explicitly = active_binding1.user_provided_model_name
|
|
884
1038
|
active_binding2 = LlamaCppServerBinding(
|
|
885
|
-
model_name=
|
|
1039
|
+
model_name=model_to_load_explicitly, models_path=str(models_path), config=binding_config
|
|
886
1040
|
)
|
|
887
1041
|
if not active_binding2.server_process or not active_binding2.server_process.is_healthy:
|
|
888
1042
|
raise RuntimeError("Server for binding2 failed to start or become healthy (should reuse).")
|
|
@@ -896,9 +1050,30 @@ if __name__ == '__main__':
|
|
|
896
1050
|
|
|
897
1051
|
# --- List Models (scans configured directories) ---
|
|
898
1052
|
ASCIIColors.cyan("\n--- Listing Models (from search paths, using binding1) ---")
|
|
1053
|
+
# Create a dummy duplicate model to test unique naming
|
|
1054
|
+
duplicate_folder = models_path / "subdir"
|
|
1055
|
+
duplicate_folder.mkdir(exist_ok=True)
|
|
1056
|
+
duplicate_model_path = duplicate_folder / test_model_path.name
|
|
1057
|
+
import shutil
|
|
1058
|
+
shutil.copy(test_model_path, duplicate_model_path)
|
|
1059
|
+
ASCIIColors.info(f"Created a duplicate model for testing: {duplicate_model_path}")
|
|
1060
|
+
|
|
899
1061
|
listed_models = active_binding1.listModels()
|
|
900
|
-
if listed_models:
|
|
1062
|
+
if listed_models:
|
|
1063
|
+
ASCIIColors.green(f"Found {len(listed_models)} GGUF files.")
|
|
1064
|
+
pprint.pprint(listed_models)
|
|
1065
|
+
# Check if the duplicate was handled
|
|
1066
|
+
names = [m['name'] for m in listed_models]
|
|
1067
|
+
if test_model_path.name in names and f"subdir/{test_model_path.name}" in names:
|
|
1068
|
+
ASCIIColors.green("SUCCESS: Duplicate model names were correctly handled.")
|
|
1069
|
+
else:
|
|
1070
|
+
ASCIIColors.error("FAILURE: Duplicate model names were not handled correctly.")
|
|
901
1071
|
else: ASCIIColors.warning("No GGUF models found in search paths.")
|
|
1072
|
+
|
|
1073
|
+
# Clean up dummy duplicate
|
|
1074
|
+
duplicate_model_path.unlink()
|
|
1075
|
+
duplicate_folder.rmdir()
|
|
1076
|
+
|
|
902
1077
|
|
|
903
1078
|
# --- Tokenize/Detokenize ---
|
|
904
1079
|
ASCIIColors.cyan("\n--- Tokenize/Detokenize (using binding1) ---")
|
|
@@ -913,16 +1088,16 @@ if __name__ == '__main__':
|
|
|
913
1088
|
# --- Text Generation (Non-Streaming, Chat API, binding1) ---
|
|
914
1089
|
ASCIIColors.cyan("\n--- Text Generation (Non-Streaming, Chat API, binding1) ---")
|
|
915
1090
|
prompt_text = "What is the capital of Germany?"
|
|
916
|
-
generated_text = active_binding1.generate_text(prompt_text, system_prompt="Concise expert.", n_predict=20, stream=False
|
|
1091
|
+
generated_text = active_binding1.generate_text(prompt_text, system_prompt="Concise expert.", n_predict=20, stream=False)
|
|
917
1092
|
if isinstance(generated_text, str): ASCIIColors.green(f"Generated text (binding1): {generated_text}")
|
|
918
1093
|
else: ASCIIColors.error(f"Generation failed (binding1): {generated_text}")
|
|
919
1094
|
|
|
920
1095
|
# --- Text Generation (Streaming, Completion API, binding2) ---
|
|
921
|
-
ASCIIColors.cyan("\n--- Text Generation (Streaming,
|
|
1096
|
+
ASCIIColors.cyan("\n--- Text Generation (Streaming, Chat API, binding2) ---")
|
|
922
1097
|
full_streamed_text = "" # Reset global
|
|
923
1098
|
def stream_callback(chunk: str, msg_type: int): global full_streamed_text; ASCIIColors.green(f"{chunk}", end="", flush=True); full_streamed_text += chunk; return True
|
|
924
1099
|
|
|
925
|
-
result_b2 = active_binding2.generate_text(prompt_text, system_prompt="Concise expert.", n_predict=30, stream=True, streaming_callback=stream_callback
|
|
1100
|
+
result_b2 = active_binding2.generate_text(prompt_text, system_prompt="Concise expert.", n_predict=30, stream=True, streaming_callback=stream_callback)
|
|
926
1101
|
print("\n--- End of Stream (binding2) ---")
|
|
927
1102
|
if isinstance(result_b2, str): ASCIIColors.green(f"Full streamed text (binding2): {result_b2}")
|
|
928
1103
|
else: ASCIIColors.error(f"Streaming generation failed (binding2): {result_b2}")
|
|
@@ -957,9 +1132,9 @@ if __name__ == '__main__':
|
|
|
957
1132
|
# llava_binding_config["chat_template"] = "llava-1.5"
|
|
958
1133
|
|
|
959
1134
|
active_binding_llava = LlamaCppServerBinding(
|
|
960
|
-
model_name=str(llava_model_path), # Pass
|
|
1135
|
+
model_name=str(llava_model_path.name), # Pass filename, let it resolve
|
|
961
1136
|
models_path=str(models_path),
|
|
962
|
-
clip_model_name=str(llava_clip_path_actual), # Pass
|
|
1137
|
+
clip_model_name=str(llava_clip_path_actual.name), # Pass filename for clip
|
|
963
1138
|
config=llava_binding_config
|
|
964
1139
|
)
|
|
965
1140
|
if not active_binding_llava.server_process or not active_binding_llava.server_process.is_healthy:
|
|
@@ -970,7 +1145,7 @@ if __name__ == '__main__':
|
|
|
970
1145
|
|
|
971
1146
|
llava_prompt = "Describe this image."
|
|
972
1147
|
llava_response = active_binding_llava.generate_text(
|
|
973
|
-
prompt=llava_prompt, images=[str(dummy_image_path)], n_predict=40, stream=False
|
|
1148
|
+
prompt=llava_prompt, images=[str(dummy_image_path)], n_predict=40, stream=False
|
|
974
1149
|
)
|
|
975
1150
|
if isinstance(llava_response, str): ASCIIColors.green(f"LLaVA response: {llava_response}")
|
|
976
1151
|
else: ASCIIColors.error(f"LLaVA generation failed: {llava_response}")
|
|
@@ -986,7 +1161,7 @@ if __name__ == '__main__':
|
|
|
986
1161
|
# --- Test changing model (using binding1 to load a different or same model) ---
|
|
987
1162
|
ASCIIColors.cyan("\n--- Testing Model Change (binding1 reloads its model) ---")
|
|
988
1163
|
# For a real change, use a different model name if available. Here, we reload the same.
|
|
989
|
-
reload_success = active_binding1.load_model(
|
|
1164
|
+
reload_success = active_binding1.load_model(active_binding1.user_provided_model_name) # Reload original model
|
|
990
1165
|
if reload_success and active_binding1.server_process and active_binding1.server_process.is_healthy:
|
|
991
1166
|
ASCIIColors.green(f"Model reloaded/re-confirmed successfully by binding1. Server on port {active_binding1.port}.")
|
|
992
1167
|
reloaded_gen = active_binding1.generate_text("Ping", n_predict=5, stream=False)
|
|
@@ -1023,4 +1198,4 @@ if __name__ == '__main__':
|
|
|
1023
1198
|
else:
|
|
1024
1199
|
ASCIIColors.green("All servers shut down correctly.")
|
|
1025
1200
|
|
|
1026
|
-
ASCIIColors.yellow("\nLlamaCppServerBinding test finished.")
|
|
1201
|
+
ASCIIColors.yellow("\nLlamaCppServerBinding test finished.")
|
|
@@ -598,6 +598,94 @@ class OllamaBinding(LollmsLLMBinding):
|
|
|
598
598
|
ASCIIColors.info(f"Ollama model set to: {model_name}. It will be loaded by the server on first use.")
|
|
599
599
|
return True
|
|
600
600
|
|
|
601
|
+
def get_ctx_size(self, model_name: Optional[str] = None) -> Optional[int]:
|
|
602
|
+
"""
|
|
603
|
+
Retrieves the context size for an Ollama model.
|
|
604
|
+
|
|
605
|
+
The effective context size is the `num_ctx` parameter if overridden in the Modelfile,
|
|
606
|
+
otherwise it falls back to the model's default context length from its architecture details.
|
|
607
|
+
As a final failsafe, uses a hardcoded list of known popular models' context lengths.
|
|
608
|
+
"""
|
|
609
|
+
if model_name is None:
|
|
610
|
+
model_name = self.model_name
|
|
611
|
+
|
|
612
|
+
try:
|
|
613
|
+
info = ollama.show(model_name)
|
|
614
|
+
|
|
615
|
+
# Parse num_ctx from the 'parameters' string (e.g., "PARAMETER num_ctx 4096")
|
|
616
|
+
parameters = info.get('parameters', '')
|
|
617
|
+
num_ctx = None
|
|
618
|
+
for param in parameters.split('\n'):
|
|
619
|
+
if param.strip().startswith('num_ctx'):
|
|
620
|
+
num_ctx = int(param.split()[1])
|
|
621
|
+
break
|
|
622
|
+
|
|
623
|
+
if num_ctx is not None:
|
|
624
|
+
return num_ctx
|
|
625
|
+
|
|
626
|
+
# Fall back to model_info context_length (e.g., 'llama.context_length')
|
|
627
|
+
model_info = info.get('model_info', {})
|
|
628
|
+
arch = model_info.get('general.architecture', '')
|
|
629
|
+
context_key = f'{arch}.context_length' if arch else 'general.context_length'
|
|
630
|
+
context_length = model_info.get(context_key)
|
|
631
|
+
|
|
632
|
+
if context_length is not None:
|
|
633
|
+
return int(context_length)
|
|
634
|
+
|
|
635
|
+
except Exception as e:
|
|
636
|
+
ASCIIColors.warning(f"Error fetching model info: {str(e)}")
|
|
637
|
+
|
|
638
|
+
# Failsafe: Hardcoded context sizes for popular Ollama models
|
|
639
|
+
known_contexts = {
|
|
640
|
+
'llama2': 4096, # Llama 2 default
|
|
641
|
+
'llama3': 8192, # Llama 3 default
|
|
642
|
+
'llama3.1': 131072, # Llama 3.1 extended context
|
|
643
|
+
'llama3.2': 131072, # Llama 3.2 extended context
|
|
644
|
+
'llama3.3': 131072, # Assuming similar to 3.1/3.2
|
|
645
|
+
'mistral': 32768, # Mistral 7B v0.2+ default
|
|
646
|
+
'mixtral': 32768, # Mixtral 8x7B default
|
|
647
|
+
'mixtral8x22b': 65536, # Mixtral 8x22B default
|
|
648
|
+
'gemma': 8192, # Gemma default
|
|
649
|
+
'gemma2': 8192, # Gemma 2 default
|
|
650
|
+
'gemma3': 131072, # Gemma 3 with 128K context
|
|
651
|
+
'phi': 2048, # Phi default (older)
|
|
652
|
+
'phi2': 2048, # Phi-2 default
|
|
653
|
+
'phi3': 131072, # Phi-3 variants often use 128K (mini/medium extended)
|
|
654
|
+
'qwen': 8192, # Qwen default
|
|
655
|
+
'qwen2': 32768, # Qwen2 default for 7B
|
|
656
|
+
'qwen2.5': 131072, # Qwen2.5 with 128K
|
|
657
|
+
'codellama': 16384, # CodeLlama extended
|
|
658
|
+
'codegemma': 8192, # CodeGemma default
|
|
659
|
+
'deepseek-coder': 16384, # DeepSeek-Coder V1 default
|
|
660
|
+
'deepseek-coder-v2': 131072, # DeepSeek-Coder V2 with 128K
|
|
661
|
+
'deepseek-llm': 4096, # DeepSeek-LLM default
|
|
662
|
+
'deepseek-v2': 131072, # DeepSeek-V2 with 128K
|
|
663
|
+
'yi': 4096, # Yi base default
|
|
664
|
+
'yi1.5': 32768, # Yi-1.5 with 32K
|
|
665
|
+
'command-r': 131072, # Command-R with 128K
|
|
666
|
+
'vicuna': 2048, # Vicuna default (up to 16K in some variants)
|
|
667
|
+
'wizardlm': 16384, # WizardLM default
|
|
668
|
+
'wizardlm2': 32768, # WizardLM2 (Mistral-based)
|
|
669
|
+
'zephyr': 65536, # Zephyr beta (Mistral-based extended)
|
|
670
|
+
'falcon': 2048, # Falcon default
|
|
671
|
+
'starcoder': 8192, # StarCoder default
|
|
672
|
+
'stablelm': 4096, # StableLM default
|
|
673
|
+
'orca': 4096, # Orca default
|
|
674
|
+
'orca2': 4096, # Orca 2 default
|
|
675
|
+
'dolphin': 32768, # Dolphin (often Mistral-based)
|
|
676
|
+
'openhermes': 8192, # OpenHermes default
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
# Extract base model name (e.g., 'llama3' from 'llama3:8b-instruct')
|
|
680
|
+
base_name = model_name.split(':')[0].lower().strip()
|
|
681
|
+
|
|
682
|
+
if base_name in known_contexts:
|
|
683
|
+
ASCIIColors.warning(f"Using hardcoded context size for model '{model_name}': {known_contexts[base_name]}")
|
|
684
|
+
return known_contexts[base_name]
|
|
685
|
+
|
|
686
|
+
ASCIIColors.warning(f"Context size not found for model '{model_name}'")
|
|
687
|
+
return None
|
|
688
|
+
|
|
601
689
|
if __name__ == '__main__':
|
|
602
690
|
global full_streamed_text
|
|
603
691
|
# Example Usage (requires an Ollama server running)
|